diff --git a/.gitattributes b/.gitattributes index 3650f67793ae41dfc5a18bc0e0e7ac727dc6e558..f5ca58352bc0ec40d64d6aae44c17f59f9ad249e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -49,3 +49,7 @@ wandb/run-20220301_221232-283qa50u/run-283qa50u.wandb filter=lfs diff=lfs merge= wandb/run-20220302_000300-2ebwk6gp/run-2ebwk6gp.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20220302_021624-vszekdxg/run-vszekdxg.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/pytorch_model.bin b/pytorch_model.bin index 0705186a0b0c6ba82abc4878743d5ab947cde4af..9413b8724ac98d05209b1a30b100e540a69d5f6b 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab594e46112a0b3b6c97a2155d0861551020f73b9f2d0f1ce90b232f7eda2e16 +oid sha256:0231e55487797a42f7a98c2d31b1c557e54ded159d58fb491feed454bb814029 size 3210531882 diff --git a/training_args.bin b/training_args.bin index cc0fad3e21f17f0899b4a752e1e11fbae0427def..063564cf5998011d4f8b8959d032c9864b08c38e 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5296214d28c5e8d2d279dbf83f78ee85351aa888d0629a8d317f53222766206 +oid sha256:2cda31eca71861bfef1fdefa9727c0c3604b9d1cb42ab0a0a66a5b41235b287f size 3119 diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index 85d2290f1b870bfd29e326ac9b79e6f36f1291a3..b87ade128485e957f980be0fd3f05b75b9864dbe 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20220302_041332-j5suzd56/logs/debug-internal.log \ No newline at end of file +run-20220302_085255-16llzpbl/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index 8aca2483148e8b08b2d9f3620df40c3a4069aef7..a7a6863a8b661520f2515ee27961a6e3067d176f 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20220302_041332-j5suzd56/logs/debug.log \ No newline at end of file +run-20220302_085255-16llzpbl/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index 0200080f5358ffe225964c2f8e88963837325193..c864015d68855cdab52d90be3d889a6e3bf9968a 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20220302_041332-j5suzd56 \ No newline at end of file +run-20220302_085255-16llzpbl \ No newline at end of file diff --git a/wandb/run-20220302_041332-j5suzd56/files/config.yaml b/wandb/run-20220302_041332-j5suzd56/files/config.yaml index 9a13fd4264ae439c3fa4f6e8a640878690101c6e..c5ddf4a4c526631eef82a3f250e88fc54c8ac116 100644 --- a/wandb/run-20220302_041332-j5suzd56/files/config.yaml +++ b/wandb/run-20220302_041332-j5suzd56/files/config.yaml @@ -10673,7 +10673,14 @@ _wandb: - 1 - 5 - 11 + 2: + - 1 + - 5 + - 11 + - 12 3: + - 1 + - 7 - 13 4: 3.9.5 5: 0.12.10 diff --git a/wandb/run-20220302_041332-j5suzd56/files/output.log b/wandb/run-20220302_041332-j5suzd56/files/output.log index 6358ad4c4cf0751fe6995620e00d2449aef1d4c8..32bf7b5718bf8fa5678277dc863485c45b4be07a 100644 --- a/wandb/run-20220302_041332-j5suzd56/files/output.log +++ b/wandb/run-20220302_041332-j5suzd56/files/output.log @@ -2787,3 +2787,10 @@ Upload file wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb: 42%|██ eval_samples_per_second = 2.812 eval_steps_per_second = 0.235 [INFO|modeling_utils.py:1081] 2022-03-02 05:53:57,022 >> Model weights saved in ./pytorch_model.bin:06<08:59, 2.47s/it] argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +Upload file wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb: 50%|█████▍ | 17.2M/34.7M [00:01<00:01, 18.0MB/s] argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +Upload file wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb: 100%|███████████| 34.7M/34.7M [00:03<00:00, 12.1MB/s] argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +Upload file wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb: 100%|███████████| 34.7M/34.7M [00:03<00:00, 12.1MB/s] argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +03/02/2022 05:54:26 - WARNING - huggingface_hub.repository - To https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search + return ModelInfo(**d)f.finetuned_from)formers/src/transformers/modelcard.py", line 611, in from_trainercard31, in mainule>ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + return ModelInfo(**d)f.finetuned_from)formers/src/transformers/modelcard.py", line 611, in from_trainercard31, in mainule>ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + return ModelInfo(**d)f.finetuned_from)formers/src/transformers/modelcard.py", line 611, in from_trainercard31, in mainule>ent in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. \ No newline at end of file diff --git a/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json b/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json index f398fc8411857c5d8b6afbde2ebd493325fd7379..c28fbb2ddaa8cfc21f9563e1b28ab718c85ed36d 100644 --- a/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json +++ b/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json @@ -1 +1 @@ -{"train/loss": 4.5069, "train/learning_rate": 5.92e-05, "train/epoch": 1.0, "train/global_step": 297, "_runtime": 6008, "_timestamp": 1646200420, "_step": 298, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 117.0, 651.0, 234.0, 12.0], "bins": [-389.90692138671875, -383.5785827636719, -377.250244140625, -370.92193603515625, -364.5935974121094, -358.2652587890625, -351.93695068359375, -345.6086120605469, -339.2802734375, -332.9519348144531, -326.62359619140625, -320.2952880859375, -313.9669494628906, -307.63861083984375, -301.310302734375, -294.9819641113281, -288.65362548828125, -282.3252868652344, -275.9969482421875, -269.66864013671875, -263.3403015136719, -257.011962890625, -250.6836395263672, -244.35531616210938, -238.0269775390625, -231.69863891601562, -225.3703155517578, -219.0419921875, -212.71365356445312, -206.38531494140625, -200.05699157714844, -193.72866821289062, -187.4003448486328, -181.072021484375, -174.74368286132812, -168.41534423828125, -162.08702087402344, -155.75869750976562, -149.43035888671875, -143.10202026367188, -136.77369689941406, -130.44537353515625, -124.11703491210938, -117.78870391845703, -111.46037292480469, -105.13204193115234, -98.8037109375, -92.47537994384766, -86.14704132080078, -79.81871032714844, -73.4903793334961, -67.16204833984375, -60.833717346191406, -54.50538635253906, -48.17705535888672, -41.848724365234375, -35.5203971862793, -29.192066192626953, -22.86373519897461, -16.535404205322266, -10.207073211669922, -3.878742218017578, 2.4495887756347656, 8.77791976928711, 15.10625171661377]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 8.0, 9.0, 13.0, 8.0, 17.0, 21.0, 14.0, 26.0, 32.0, 17.0, 43.0, 55.0, 40.0, 43.0, 49.0, 46.0, 60.0, 54.0, 56.0, 54.0, 39.0, 34.0, 43.0, 28.0, 33.0, 20.0, 27.0, 17.0, 27.0, 9.0, 9.0, 14.0, 12.0, 4.0, 4.0, 5.0, 5.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-62.19386291503906, -60.59320068359375, -58.99253845214844, -57.391876220703125, -55.79121398925781, -54.1905517578125, -52.58988571166992, -50.98922348022461, -49.3885612487793, -47.787899017333984, -46.18723678588867, -44.58657455444336, -42.98590850830078, -41.38524627685547, -39.784584045410156, -38.183921813964844, -36.58325958251953, -34.98259735107422, -33.381935119628906, -31.78127098083496, -30.18060874938965, -28.579946517944336, -26.97928237915039, -25.378620147705078, -23.777957916259766, -22.177295684814453, -20.57663345336914, -18.975969314575195, -17.375307083129883, -15.77464485168457, -14.173981666564941, -12.573318481445312, -10.972652435302734, -9.371990203857422, -7.771327018737793, -6.170664310455322, -4.570001602172852, -2.969339370727539, -1.3686761856079102, 0.23198699951171875, 1.8326492309570312, 3.433311939239502, 5.033974647521973, 6.634637355804443, 8.235300064086914, 9.835962295532227, 11.436625480651855, 13.037288665771484, 14.637950897216797, 16.23861312866211, 17.839275360107422, 19.439939498901367, 21.04060173034668, 22.641263961791992, 24.241928100585938, 25.84259033203125, 27.443252563476562, 29.043914794921875, 30.644577026367188, 32.2452392578125, 33.84590148925781, 35.446563720703125, 37.0472297668457, 38.647891998291016, 40.24855422973633]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 3.0, 5.0, 10.0, 9.0, 10.0, 15.0, 19.0, 15.0, 15.0, 29.0, 28.0, 36.0, 52.0, 40.0, 49.0, 40.0, 49.0, 48.0, 70.0, 56.0, 36.0, 48.0, 37.0, 31.0, 37.0, 32.0, 34.0, 21.0, 22.0, 21.0, 23.0, 10.0, 15.0, 12.0, 4.0, 5.0, 3.0, 1.0, 4.0, 2.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.328125, -2.24615478515625, -2.1641845703125, -2.08221435546875, -2.000244140625, -1.91827392578125, -1.8363037109375, -1.75433349609375, -1.67236328125, -1.59039306640625, -1.5084228515625, -1.42645263671875, -1.344482421875, -1.26251220703125, -1.1805419921875, -1.09857177734375, -1.0166015625, -0.93463134765625, -0.8526611328125, -0.77069091796875, -0.688720703125, -0.60675048828125, -0.5247802734375, -0.44281005859375, -0.36083984375, -0.27886962890625, -0.1968994140625, -0.11492919921875, -0.032958984375, 0.04901123046875, 0.1309814453125, 0.21295166015625, 0.294921875, 0.37689208984375, 0.4588623046875, 0.54083251953125, 0.622802734375, 0.70477294921875, 0.7867431640625, 0.86871337890625, 0.95068359375, 1.03265380859375, 1.1146240234375, 1.19659423828125, 1.278564453125, 1.36053466796875, 1.4425048828125, 1.52447509765625, 1.6064453125, 1.68841552734375, 1.7703857421875, 1.85235595703125, 1.934326171875, 2.01629638671875, 2.0982666015625, 2.18023681640625, 2.26220703125, 2.34417724609375, 2.4261474609375, 2.50811767578125, 2.590087890625, 2.67205810546875, 2.7540283203125, 2.83599853515625, 2.91796875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 5.0, 3.0, 5.0, 9.0, 5.0, 15.0, 22.0, 37.0, 64.0, 90.0, 162.0, 276.0, 457.0, 891.0, 1726.0, 4247.0, 14065.0, 78326.0, 901183.0, 2831164.0, 309275.0, 37287.0, 8841.0, 3094.0, 1379.0, 698.0, 425.0, 240.0, 120.0, 61.0, 51.0, 23.0, 13.0, 17.0, 5.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.171875, -10.84619140625, -10.5205078125, -10.19482421875, -9.869140625, -9.54345703125, -9.2177734375, -8.89208984375, -8.56640625, -8.24072265625, -7.9150390625, -7.58935546875, -7.263671875, -6.93798828125, -6.6123046875, -6.28662109375, -5.9609375, -5.63525390625, -5.3095703125, -4.98388671875, -4.658203125, -4.33251953125, -4.0068359375, -3.68115234375, -3.35546875, -3.02978515625, -2.7041015625, -2.37841796875, -2.052734375, -1.72705078125, -1.4013671875, -1.07568359375, -0.75, -0.42431640625, -0.0986328125, 0.22705078125, 0.552734375, 0.87841796875, 1.2041015625, 1.52978515625, 1.85546875, 2.18115234375, 2.5068359375, 2.83251953125, 3.158203125, 3.48388671875, 3.8095703125, 4.13525390625, 4.4609375, 4.78662109375, 5.1123046875, 5.43798828125, 5.763671875, 6.08935546875, 6.4150390625, 6.74072265625, 7.06640625, 7.39208984375, 7.7177734375, 8.04345703125, 8.369140625, 8.69482421875, 9.0205078125, 9.34619140625, 9.671875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 3.0, 4.0, 2.0, 10.0, 6.0, 15.0, 22.0, 26.0, 41.0, 59.0, 84.0, 125.0, 149.0, 261.0, 360.0, 456.0, 540.0, 512.0, 435.0, 292.0, 202.0, 143.0, 122.0, 65.0, 45.0, 35.0, 21.0, 11.0, 12.0, 3.0, 8.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-12.609375, -12.2742919921875, -11.939208984375, -11.6041259765625, -11.26904296875, -10.9339599609375, -10.598876953125, -10.2637939453125, -9.9287109375, -9.5936279296875, -9.258544921875, -8.9234619140625, -8.58837890625, -8.2532958984375, -7.918212890625, -7.5831298828125, -7.248046875, -6.9129638671875, -6.577880859375, -6.2427978515625, -5.90771484375, -5.5726318359375, -5.237548828125, -4.9024658203125, -4.5673828125, -4.2322998046875, -3.897216796875, -3.5621337890625, -3.22705078125, -2.8919677734375, -2.556884765625, -2.2218017578125, -1.88671875, -1.5516357421875, -1.216552734375, -0.8814697265625, -0.54638671875, -0.2113037109375, 0.123779296875, 0.4588623046875, 0.7939453125, 1.1290283203125, 1.464111328125, 1.7991943359375, 2.13427734375, 2.4693603515625, 2.804443359375, 3.1395263671875, 3.474609375, 3.8096923828125, 4.144775390625, 4.4798583984375, 4.81494140625, 5.1500244140625, 5.485107421875, 5.8201904296875, 6.1552734375, 6.4903564453125, 6.825439453125, 7.1605224609375, 7.49560546875, 7.8306884765625, 8.165771484375, 8.5008544921875, 8.8359375]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 3.0, 6.0, 13.0, 10.0, 14.0, 21.0, 49.0, 67.0, 100.0, 159.0, 269.0, 450.0, 858.0, 1879.0, 6225.0, 174344.0, 3887694.0, 112843.0, 5511.0, 1755.0, 846.0, 470.0, 240.0, 154.0, 107.0, 50.0, 35.0, 35.0, 17.0, 12.0, 6.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 1.0, 4.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.65625, -35.52685546875, -34.3974609375, -33.26806640625, -32.138671875, -31.00927734375, -29.8798828125, -28.75048828125, -27.62109375, -26.49169921875, -25.3623046875, -24.23291015625, -23.103515625, -21.97412109375, -20.8447265625, -19.71533203125, -18.5859375, -17.45654296875, -16.3271484375, -15.19775390625, -14.068359375, -12.93896484375, -11.8095703125, -10.68017578125, -9.55078125, -8.42138671875, -7.2919921875, -6.16259765625, -5.033203125, -3.90380859375, -2.7744140625, -1.64501953125, -0.515625, 0.61376953125, 1.7431640625, 2.87255859375, 4.001953125, 5.13134765625, 6.2607421875, 7.39013671875, 8.51953125, 9.64892578125, 10.7783203125, 11.90771484375, 13.037109375, 14.16650390625, 15.2958984375, 16.42529296875, 17.5546875, 18.68408203125, 19.8134765625, 20.94287109375, 22.072265625, 23.20166015625, 24.3310546875, 25.46044921875, 26.58984375, 27.71923828125, 28.8486328125, 29.97802734375, 31.107421875, 32.23681640625, 33.3662109375, 34.49560546875, 35.625]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 21.0, 81.0, 209.0, 340.0, 227.0, 107.0, 21.0, 5.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-81.21906280517578, -78.64128112792969, -76.06349182128906, -73.48571014404297, -70.90792846679688, -68.33013916015625, -65.75235748291016, -63.17457580566406, -60.5967903137207, -58.019004821777344, -55.44122314453125, -52.86343765258789, -50.28565216064453, -47.70787048339844, -45.13008499145508, -42.55229949951172, -39.974517822265625, -37.396732330322266, -34.81895065307617, -32.24116516113281, -29.663381576538086, -27.08559799194336, -24.5078125, -21.930028915405273, -19.352245330810547, -16.77446174621582, -14.196677207946777, -11.618892669677734, -9.041109085083008, -6.463325500488281, -3.8855409622192383, -1.3077564239501953, 1.2700347900390625, 3.8478188514709473, 6.425602912902832, 9.003387451171875, 11.581171035766602, 14.158954620361328, 16.736740112304688, 19.314523696899414, 21.89230728149414, 24.470090866088867, 27.047874450683594, 29.625659942626953, 32.20344543457031, 34.781227111816406, 37.359012603759766, 39.936798095703125, 42.51457977294922, 45.09236526489258, 47.67014694213867, 50.24793243408203, 52.825714111328125, 55.403499603271484, 57.981285095214844, 60.55906677246094, 63.1368522644043, 65.71463775634766, 68.29241943359375, 70.87020111083984, 73.44799041748047, 76.02577209472656, 78.60355377197266, 81.18134307861328, 83.75912475585938]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 7.0, 4.0, 7.0, 8.0, 12.0, 14.0, 23.0, 19.0, 25.0, 25.0, 25.0, 32.0, 38.0, 33.0, 42.0, 40.0, 33.0, 42.0, 43.0, 46.0, 43.0, 58.0, 49.0, 42.0, 40.0, 35.0, 40.0, 29.0, 29.0, 24.0, 17.0, 18.0, 16.0, 6.0, 7.0, 13.0, 7.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.706695556640625, -31.624561309814453, -30.54242515563965, -29.460290908813477, -28.378154754638672, -27.2960205078125, -26.213886260986328, -25.131752014160156, -24.04961585998535, -22.96748161315918, -21.885345458984375, -20.803211212158203, -19.72107696533203, -18.638940811157227, -17.556806564331055, -16.47467041015625, -15.392536163330078, -14.31040096282959, -13.228265762329102, -12.14613151550293, -11.063996315002441, -9.981861114501953, -8.899726867675781, -7.817591667175293, -6.735456466674805, -5.653321266174316, -4.571186542510986, -3.489051580429077, -2.406916618347168, -1.3247814178466797, -0.2426466941833496, 0.8394880294799805, 1.9216194152832031, 3.0037543773651123, 4.0858893394470215, 5.168024063110352, 6.25015926361084, 7.332294464111328, 8.4144287109375, 9.496563911437988, 10.578699111938477, 11.660834312438965, 12.742969512939453, 13.825103759765625, 14.907238960266113, 15.989374160766602, 17.071508407592773, 18.153644561767578, 19.23577880859375, 20.317913055419922, 21.400049209594727, 22.4821834564209, 23.564319610595703, 24.646453857421875, 25.728588104248047, 26.81072235107422, 27.892858505249023, 28.974992752075195, 30.05712890625, 31.139263153076172, 32.221397399902344, 33.30353546142578, 34.38566970825195, 35.467803955078125, 36.5499382019043]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 3.0, 2.0, 7.0, 9.0, 13.0, 13.0, 15.0, 27.0, 22.0, 14.0, 29.0, 28.0, 34.0, 31.0, 36.0, 47.0, 52.0, 38.0, 50.0, 49.0, 50.0, 36.0, 37.0, 41.0, 44.0, 36.0, 35.0, 31.0, 22.0, 23.0, 24.0, 14.0, 19.0, 13.0, 15.0, 8.0, 8.0, 3.0, 7.0, 6.0, 2.0, 7.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.296905517578125, -2.21685791015625, -2.136810302734375, -2.0567626953125, -1.976715087890625, -1.89666748046875, -1.816619873046875, -1.736572265625, -1.656524658203125, -1.57647705078125, -1.496429443359375, -1.4163818359375, -1.336334228515625, -1.25628662109375, -1.176239013671875, -1.09619140625, -1.016143798828125, -0.93609619140625, -0.856048583984375, -0.7760009765625, -0.695953369140625, -0.61590576171875, -0.535858154296875, -0.455810546875, -0.375762939453125, -0.29571533203125, -0.215667724609375, -0.1356201171875, -0.055572509765625, 0.02447509765625, 0.104522705078125, 0.1845703125, 0.264617919921875, 0.34466552734375, 0.424713134765625, 0.5047607421875, 0.584808349609375, 0.66485595703125, 0.744903564453125, 0.824951171875, 0.904998779296875, 0.98504638671875, 1.065093994140625, 1.1451416015625, 1.225189208984375, 1.30523681640625, 1.385284423828125, 1.46533203125, 1.545379638671875, 1.62542724609375, 1.705474853515625, 1.7855224609375, 1.865570068359375, 1.94561767578125, 2.025665283203125, 2.105712890625, 2.185760498046875, 2.26580810546875, 2.345855712890625, 2.4259033203125, 2.505950927734375, 2.58599853515625, 2.666046142578125, 2.74609375]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 8.0, 6.0, 7.0, 13.0, 19.0, 33.0, 37.0, 51.0, 82.0, 113.0, 177.0, 260.0, 377.0, 572.0, 924.0, 1476.0, 2330.0, 3580.0, 5862.0, 9807.0, 16274.0, 27015.0, 45356.0, 74976.0, 122388.0, 190473.0, 202971.0, 134488.0, 82778.0, 50038.0, 30008.0, 17856.0, 10728.0, 6481.0, 4057.0, 2418.0, 1547.0, 995.0, 715.0, 422.0, 249.0, 191.0, 128.0, 80.0, 59.0, 32.0, 37.0, 18.0, 17.0, 14.0, 9.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0, 3.0], "bins": [-0.2022705078125, -0.1959686279296875, -0.189666748046875, -0.1833648681640625, -0.17706298828125, -0.1707611083984375, -0.164459228515625, -0.1581573486328125, -0.15185546875, -0.1455535888671875, -0.139251708984375, -0.1329498291015625, -0.12664794921875, -0.1203460693359375, -0.114044189453125, -0.1077423095703125, -0.1014404296875, -0.0951385498046875, -0.088836669921875, -0.0825347900390625, -0.07623291015625, -0.0699310302734375, -0.063629150390625, -0.0573272705078125, -0.051025390625, -0.0447235107421875, -0.038421630859375, -0.0321197509765625, -0.02581787109375, -0.0195159912109375, -0.013214111328125, -0.0069122314453125, -0.0006103515625, 0.0056915283203125, 0.011993408203125, 0.0182952880859375, 0.02459716796875, 0.0308990478515625, 0.037200927734375, 0.0435028076171875, 0.0498046875, 0.0561065673828125, 0.062408447265625, 0.0687103271484375, 0.07501220703125, 0.0813140869140625, 0.087615966796875, 0.0939178466796875, 0.1002197265625, 0.1065216064453125, 0.112823486328125, 0.1191253662109375, 0.12542724609375, 0.1317291259765625, 0.138031005859375, 0.1443328857421875, 0.150634765625, 0.1569366455078125, 0.163238525390625, 0.1695404052734375, 0.17584228515625, 0.1821441650390625, 0.188446044921875, 0.1947479248046875, 0.2010498046875]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [4.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 7.0, 10.0, 6.0, 7.0, 7.0, 14.0, 15.0, 24.0, 18.0, 29.0, 32.0, 32.0, 37.0, 36.0, 34.0, 48.0, 38.0, 49.0, 54.0, 1059.0, 52.0, 46.0, 38.0, 41.0, 42.0, 29.0, 26.0, 29.0, 17.0, 22.0, 30.0, 20.0, 18.0, 14.0, 12.0, 3.0, 6.0, 2.0, 4.0, 2.0, 2.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3583984375, -1.3083343505859375, -1.258270263671875, -1.2082061767578125, -1.15814208984375, -1.1080780029296875, -1.058013916015625, -1.0079498291015625, -0.9578857421875, -0.9078216552734375, -0.857757568359375, -0.8076934814453125, -0.75762939453125, -0.7075653076171875, -0.657501220703125, -0.6074371337890625, -0.557373046875, -0.5073089599609375, -0.457244873046875, -0.4071807861328125, -0.35711669921875, -0.3070526123046875, -0.256988525390625, -0.2069244384765625, -0.1568603515625, -0.1067962646484375, -0.056732177734375, -0.0066680908203125, 0.04339599609375, 0.0934600830078125, 0.143524169921875, 0.1935882568359375, 0.24365234375, 0.2937164306640625, 0.343780517578125, 0.3938446044921875, 0.44390869140625, 0.4939727783203125, 0.544036865234375, 0.5941009521484375, 0.6441650390625, 0.6942291259765625, 0.744293212890625, 0.7943572998046875, 0.84442138671875, 0.8944854736328125, 0.944549560546875, 0.9946136474609375, 1.044677734375, 1.0947418212890625, 1.144805908203125, 1.1948699951171875, 1.24493408203125, 1.2949981689453125, 1.345062255859375, 1.3951263427734375, 1.4451904296875, 1.4952545166015625, 1.545318603515625, 1.5953826904296875, 1.64544677734375, 1.6955108642578125, 1.745574951171875, 1.7956390380859375, 1.845703125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 6.0, 5.0, 18.0, 18.0, 18.0, 40.0, 58.0, 89.0, 115.0, 195.0, 270.0, 404.0, 592.0, 859.0, 1228.0, 1790.0, 2628.0, 3764.0, 5535.0, 7963.0, 11863.0, 17369.0, 25599.0, 37847.0, 55897.0, 82904.0, 118622.0, 1117951.0, 238714.0, 115932.0, 80122.0, 53969.0, 36531.0, 24699.0, 16596.0, 11580.0, 7968.0, 5506.0, 3708.0, 2545.0, 1790.0, 1247.0, 830.0, 571.0, 381.0, 235.0, 174.0, 130.0, 97.0, 48.0, 47.0, 22.0, 20.0, 18.0, 5.0, 6.0, 5.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1290283203125, -0.12486076354980469, -0.12069320678710938, -0.11652565002441406, -0.11235809326171875, -0.10819053649902344, -0.10402297973632812, -0.09985542297363281, -0.0956878662109375, -0.09152030944824219, -0.08735275268554688, -0.08318519592285156, -0.07901763916015625, -0.07485008239746094, -0.07068252563476562, -0.06651496887207031, -0.062347412109375, -0.05817985534667969, -0.054012298583984375, -0.04984474182128906, -0.04567718505859375, -0.04150962829589844, -0.037342071533203125, -0.03317451477050781, -0.0290069580078125, -0.024839401245117188, -0.020671844482421875, -0.016504287719726562, -0.01233673095703125, -0.008169174194335938, -0.004001617431640625, 0.0001659393310546875, 0.00433349609375, 0.008501052856445312, 0.012668609619140625, 0.016836166381835938, 0.02100372314453125, 0.025171279907226562, 0.029338836669921875, 0.03350639343261719, 0.0376739501953125, 0.04184150695800781, 0.046009063720703125, 0.05017662048339844, 0.05434417724609375, 0.05851173400878906, 0.06267929077148438, 0.06684684753417969, 0.071014404296875, 0.07518196105957031, 0.07934951782226562, 0.08351707458496094, 0.08768463134765625, 0.09185218811035156, 0.09601974487304688, 0.10018730163574219, 0.1043548583984375, 0.10852241516113281, 0.11268997192382812, 0.11685752868652344, 0.12102508544921875, 0.12519264221191406, 0.12936019897460938, 0.1335277557373047, 0.1376953125]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 7.0, 5.0, 8.0, 8.0, 6.0, 16.0, 3.0, 6.0, 14.0, 14.0, 21.0, 25.0, 36.0, 34.0, 47.0, 35.0, 55.0, 52.0, 50.0, 38.0, 48.0, 43.0, 56.0, 55.0, 36.0, 45.0, 45.0, 28.0, 27.0, 23.0, 12.0, 22.0, 18.0, 10.0, 11.0, 13.0, 6.0, 6.0, 7.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0005435943603515625, -0.0005276650190353394, -0.0005117356777191162, -0.0004958063364028931, -0.0004798769950866699, -0.0004639476537704468, -0.00044801831245422363, -0.0004320889711380005, -0.00041615962982177734, -0.0004002302885055542, -0.00038430094718933105, -0.0003683716058731079, -0.00035244226455688477, -0.0003365129232406616, -0.0003205835819244385, -0.00030465424060821533, -0.0002887248992919922, -0.00027279555797576904, -0.0002568662166595459, -0.00024093687534332275, -0.0002250075340270996, -0.00020907819271087646, -0.00019314885139465332, -0.00017721951007843018, -0.00016129016876220703, -0.0001453608274459839, -0.00012943148612976074, -0.0001135021448135376, -9.757280349731445e-05, -8.164346218109131e-05, -6.571412086486816e-05, -4.978477954864502e-05, -3.3855438232421875e-05, -1.792609691619873e-05, -1.996755599975586e-06, 1.3932585716247559e-05, 2.9861927032470703e-05, 4.579126834869385e-05, 6.172060966491699e-05, 7.764995098114014e-05, 9.357929229736328e-05, 0.00010950863361358643, 0.00012543797492980957, 0.00014136731624603271, 0.00015729665756225586, 0.000173225998878479, 0.00018915534019470215, 0.0002050846815109253, 0.00022101402282714844, 0.00023694336414337158, 0.0002528727054595947, 0.00026880204677581787, 0.000284731388092041, 0.00030066072940826416, 0.0003165900707244873, 0.00033251941204071045, 0.0003484487533569336, 0.00036437809467315674, 0.0003803074359893799, 0.00039623677730560303, 0.00041216611862182617, 0.0004280954599380493, 0.00044402480125427246, 0.0004599541425704956, 0.00047588348388671875]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 6.0, 0.0, 7.0, 6.0, 7.0, 8.0, 12.0, 13.0, 24.0, 21.0, 22.0, 31.0, 37.0, 31.0, 46.0, 62.0, 72.0, 94.0, 118.0, 165.0, 196.0, 230.0, 385.0, 821.0, 23234.0, 1018160.0, 2672.0, 617.0, 326.0, 217.0, 186.0, 137.0, 112.0, 87.0, 58.0, 56.0, 50.0, 52.0, 36.0, 28.0, 26.0, 20.0, 17.0, 16.0, 10.0, 10.0, 7.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.00925445556640625, -0.008968234062194824, -0.008682012557983398, -0.008395791053771973, -0.008109569549560547, -0.007823348045349121, -0.007537126541137695, -0.0072509050369262695, -0.006964683532714844, -0.006678462028503418, -0.006392240524291992, -0.006106019020080566, -0.005819797515869141, -0.005533576011657715, -0.005247354507446289, -0.004961133003234863, -0.0046749114990234375, -0.004388689994812012, -0.004102468490600586, -0.00381624698638916, -0.0035300254821777344, -0.0032438039779663086, -0.002957582473754883, -0.002671360969543457, -0.0023851394653320312, -0.0020989179611206055, -0.0018126964569091797, -0.001526474952697754, -0.0012402534484863281, -0.0009540319442749023, -0.0006678104400634766, -0.0003815889358520508, -9.5367431640625e-05, 0.00019085407257080078, 0.00047707557678222656, 0.0007632970809936523, 0.0010495185852050781, 0.001335740089416504, 0.0016219615936279297, 0.0019081830978393555, 0.0021944046020507812, 0.002480626106262207, 0.002766847610473633, 0.0030530691146850586, 0.0033392906188964844, 0.00362551212310791, 0.003911733627319336, 0.004197955131530762, 0.0044841766357421875, 0.004770398139953613, 0.005056619644165039, 0.005342841148376465, 0.005629062652587891, 0.005915284156799316, 0.006201505661010742, 0.006487727165222168, 0.006773948669433594, 0.0070601701736450195, 0.007346391677856445, 0.007632613182067871, 0.007918834686279297, 0.008205056190490723, 0.008491277694702148, 0.008777499198913574, 0.009063720703125]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 34.0, 844.0, 135.0, 1.0], "bins": [-0.0048851193860173225, -0.004805841017514467, -0.004726562183350325, -0.004647283814847469, -0.004568005446344614, -0.004488727077841759, -0.004409448243677616, -0.004330169875174761, -0.0042508915066719055, -0.00417161313816905, -0.004092334304004908, -0.004013055935502052, -0.003933777566999197, -0.003854498965665698, -0.0037752205971628428, -0.003695941995829344, -0.003616663394495845, -0.003537384793162346, -0.0034581064246594906, -0.0033788278233259916, -0.0032995494548231363, -0.0032202708534896374, -0.003140992484986782, -0.003061713883653283, -0.002982435282319784, -0.002903156680986285, -0.00282387831248343, -0.002744599711149931, -0.0026653213426470757, -0.0025860427413135767, -0.0025067643728107214, -0.0024274857714772224, -0.0023482071701437235, -0.0022689285688102245, -0.0021896502003073692, -0.0021103715989738703, -0.002031093230471015, -0.001951814629137516, -0.001872536144219339, -0.0017932576593011618, -0.0017139792907983065, -0.0016347008058801293, -0.0015554223209619522, -0.001476143836043775, -0.001396865351125598, -0.001317586749792099, -0.0012383082648739219, -0.0011590297799557447, -0.0010797512950375676, -0.0010004728101193905, -0.0009211943252012134, -0.0008419157820753753, -0.0007626372971571982, -0.0006833588122390211, -0.000604080269113183, -0.0005248017841950059, -0.00044552329927682877, -0.00036624481435865164, -0.00028696630033664405, -0.0002076878008665517, -0.00012840930139645934, -4.913081647828221e-05, 3.014769754372537e-05, 0.00010942621156573296, 0.0001887047110358253]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 4.0, 9.0, 7.0, 3.0, 7.0, 16.0, 9.0, 16.0, 11.0, 11.0, 10.0, 18.0, 22.0, 30.0, 32.0, 43.0, 23.0, 35.0, 34.0, 43.0, 40.0, 41.0, 32.0, 36.0, 37.0, 28.0, 36.0, 29.0, 36.0, 40.0, 29.0, 29.0, 17.0, 20.0, 18.0, 32.0, 13.0, 25.0, 14.0, 9.0, 12.0, 5.0, 9.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00027430057525634766, -0.0002656802535057068, -0.0002570599317550659, -0.00024843961000442505, -0.00023981928825378418, -0.0002311989665031433, -0.00022257864475250244, -0.00021395832300186157, -0.0002053380012512207, -0.00019671767950057983, -0.00018809735774993896, -0.0001794770359992981, -0.00017085671424865723, -0.00016223639249801636, -0.0001536160707473755, -0.00014499574899673462, -0.00013637542724609375, -0.00012775510549545288, -0.00011913478374481201, -0.00011051446199417114, -0.00010189414024353027, -9.32738184928894e-05, -8.465349674224854e-05, -7.603317499160767e-05, -6.74128532409668e-05, -5.879253149032593e-05, -5.017220973968506e-05, -4.155188798904419e-05, -3.293156623840332e-05, -2.431124448776245e-05, -1.5690922737121582e-05, -7.070600986480713e-06, 1.5497207641601562e-06, 1.0170042514801025e-05, 1.8790364265441895e-05, 2.7410686016082764e-05, 3.603100776672363e-05, 4.46513295173645e-05, 5.327165126800537e-05, 6.189197301864624e-05, 7.051229476928711e-05, 7.913261651992798e-05, 8.775293827056885e-05, 9.637326002120972e-05, 0.00010499358177185059, 0.00011361390352249146, 0.00012223422527313232, 0.0001308545470237732, 0.00013947486877441406, 0.00014809519052505493, 0.0001567155122756958, 0.00016533583402633667, 0.00017395615577697754, 0.0001825764775276184, 0.00019119679927825928, 0.00019981712102890015, 0.00020843744277954102, 0.00021705776453018188, 0.00022567808628082275, 0.00023429840803146362, 0.0002429187297821045, 0.00025153905153274536, 0.00026015937328338623, 0.0002687796950340271, 0.00027740001678466797]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 3.0, 2.0, 7.0, 9.0, 13.0, 13.0, 15.0, 27.0, 22.0, 14.0, 29.0, 28.0, 34.0, 31.0, 36.0, 47.0, 52.0, 38.0, 50.0, 49.0, 50.0, 36.0, 37.0, 41.0, 44.0, 36.0, 35.0, 31.0, 22.0, 23.0, 24.0, 14.0, 19.0, 13.0, 15.0, 8.0, 8.0, 3.0, 7.0, 6.0, 2.0, 7.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.296905517578125, -2.21685791015625, -2.136810302734375, -2.0567626953125, -1.976715087890625, -1.89666748046875, -1.816619873046875, -1.736572265625, -1.656524658203125, -1.57647705078125, -1.496429443359375, -1.4163818359375, -1.336334228515625, -1.25628662109375, -1.176239013671875, -1.09619140625, -1.016143798828125, -0.93609619140625, -0.856048583984375, -0.7760009765625, -0.695953369140625, -0.61590576171875, -0.535858154296875, -0.455810546875, -0.375762939453125, -0.29571533203125, -0.215667724609375, -0.1356201171875, -0.055572509765625, 0.02447509765625, 0.104522705078125, 0.1845703125, 0.264617919921875, 0.34466552734375, 0.424713134765625, 0.5047607421875, 0.584808349609375, 0.66485595703125, 0.744903564453125, 0.824951171875, 0.904998779296875, 0.98504638671875, 1.065093994140625, 1.1451416015625, 1.225189208984375, 1.30523681640625, 1.385284423828125, 1.46533203125, 1.545379638671875, 1.62542724609375, 1.705474853515625, 1.7855224609375, 1.865570068359375, 1.94561767578125, 2.025665283203125, 2.105712890625, 2.185760498046875, 2.26580810546875, 2.345855712890625, 2.4259033203125, 2.505950927734375, 2.58599853515625, 2.666046142578125, 2.74609375]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 7.0, 2.0, 5.0, 9.0, 14.0, 30.0, 34.0, 44.0, 58.0, 82.0, 122.0, 151.0, 212.0, 290.0, 391.0, 591.0, 731.0, 1107.0, 1483.0, 2214.0, 3087.0, 4795.0, 7646.0, 13639.0, 29917.0, 92708.0, 331696.0, 374486.0, 109682.0, 34039.0, 15059.0, 8122.0, 4997.0, 3308.0, 2169.0, 1558.0, 1115.0, 827.0, 599.0, 420.0, 320.0, 245.0, 155.0, 112.0, 82.0, 63.0, 50.0, 28.0, 22.0, 17.0, 11.0, 6.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-6.78515625, -6.57354736328125, -6.3619384765625, -6.15032958984375, -5.938720703125, -5.72711181640625, -5.5155029296875, -5.30389404296875, -5.09228515625, -4.88067626953125, -4.6690673828125, -4.45745849609375, -4.245849609375, -4.03424072265625, -3.8226318359375, -3.61102294921875, -3.3994140625, -3.18780517578125, -2.9761962890625, -2.76458740234375, -2.552978515625, -2.34136962890625, -2.1297607421875, -1.91815185546875, -1.70654296875, -1.49493408203125, -1.2833251953125, -1.07171630859375, -0.860107421875, -0.64849853515625, -0.4368896484375, -0.22528076171875, -0.013671875, 0.19793701171875, 0.4095458984375, 0.62115478515625, 0.832763671875, 1.04437255859375, 1.2559814453125, 1.46759033203125, 1.67919921875, 1.89080810546875, 2.1024169921875, 2.31402587890625, 2.525634765625, 2.73724365234375, 2.9488525390625, 3.16046142578125, 3.3720703125, 3.58367919921875, 3.7952880859375, 4.00689697265625, 4.218505859375, 4.43011474609375, 4.6417236328125, 4.85333251953125, 5.06494140625, 5.27655029296875, 5.4881591796875, 5.69976806640625, 5.911376953125, 6.12298583984375, 6.3345947265625, 6.54620361328125, 6.7578125]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 6.0, 4.0, 7.0, 6.0, 6.0, 13.0, 10.0, 10.0, 4.0, 24.0, 18.0, 15.0, 24.0, 19.0, 30.0, 26.0, 31.0, 33.0, 53.0, 54.0, 84.0, 150.0, 223.0, 1314.0, 244.0, 143.0, 97.0, 51.0, 51.0, 40.0, 30.0, 37.0, 32.0, 23.0, 19.0, 19.0, 23.0, 18.0, 13.0, 15.0, 8.0, 7.0, 5.0, 6.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.49609375, -7.27294921875, -7.0498046875, -6.82666015625, -6.603515625, -6.38037109375, -6.1572265625, -5.93408203125, -5.7109375, -5.48779296875, -5.2646484375, -5.04150390625, -4.818359375, -4.59521484375, -4.3720703125, -4.14892578125, -3.92578125, -3.70263671875, -3.4794921875, -3.25634765625, -3.033203125, -2.81005859375, -2.5869140625, -2.36376953125, -2.140625, -1.91748046875, -1.6943359375, -1.47119140625, -1.248046875, -1.02490234375, -0.8017578125, -0.57861328125, -0.35546875, -0.13232421875, 0.0908203125, 0.31396484375, 0.537109375, 0.76025390625, 0.9833984375, 1.20654296875, 1.4296875, 1.65283203125, 1.8759765625, 2.09912109375, 2.322265625, 2.54541015625, 2.7685546875, 2.99169921875, 3.21484375, 3.43798828125, 3.6611328125, 3.88427734375, 4.107421875, 4.33056640625, 4.5537109375, 4.77685546875, 5.0, 5.22314453125, 5.4462890625, 5.66943359375, 5.892578125, 6.11572265625, 6.3388671875, 6.56201171875, 6.78515625]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 6.0, 10.0, 10.0, 14.0, 14.0, 12.0, 23.0, 22.0, 30.0, 34.0, 34.0, 47.0, 71.0, 112.0, 218.0, 613.0, 2778.0, 358014.0, 2774986.0, 6927.0, 889.0, 288.0, 142.0, 100.0, 57.0, 37.0, 26.0, 30.0, 18.0, 24.0, 18.0, 17.0, 11.0, 7.0, 9.0, 15.0, 4.0, 6.0, 6.0, 6.0, 2.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-26.25, -25.349853515625, -24.44970703125, -23.549560546875, -22.6494140625, -21.749267578125, -20.84912109375, -19.948974609375, -19.048828125, -18.148681640625, -17.24853515625, -16.348388671875, -15.4482421875, -14.548095703125, -13.64794921875, -12.747802734375, -11.84765625, -10.947509765625, -10.04736328125, -9.147216796875, -8.2470703125, -7.346923828125, -6.44677734375, -5.546630859375, -4.646484375, -3.746337890625, -2.84619140625, -1.946044921875, -1.0458984375, -0.145751953125, 0.75439453125, 1.654541015625, 2.5546875, 3.454833984375, 4.35498046875, 5.255126953125, 6.1552734375, 7.055419921875, 7.95556640625, 8.855712890625, 9.755859375, 10.656005859375, 11.55615234375, 12.456298828125, 13.3564453125, 14.256591796875, 15.15673828125, 16.056884765625, 16.95703125, 17.857177734375, 18.75732421875, 19.657470703125, 20.5576171875, 21.457763671875, 22.35791015625, 23.258056640625, 24.158203125, 25.058349609375, 25.95849609375, 26.858642578125, 27.7587890625, 28.658935546875, 29.55908203125, 30.459228515625, 31.359375]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 11.0, 887.0, 118.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-79.46012878417969, -73.2244873046875, -66.98884582519531, -60.75320816040039, -54.5175666809082, -48.281925201416016, -42.046287536621094, -35.810646057128906, -29.57500457763672, -23.33936309814453, -17.103723526000977, -10.868083953857422, -4.632442474365234, 1.6031990051269531, 7.838836669921875, 14.074478149414062, 20.31011962890625, 26.545761108398438, 32.781402587890625, 39.01704025268555, 45.252681732177734, 51.48832321166992, 57.723960876464844, 63.95960235595703, 70.19524383544922, 76.4308853149414, 82.6665267944336, 88.90216064453125, 95.13780212402344, 101.37344360351562, 107.60908508300781, 113.8447265625, 120.08038330078125, 126.31602478027344, 132.55166625976562, 138.7873077392578, 145.02294921875, 151.2585906982422, 157.49423217773438, 163.7298583984375, 169.96551513671875, 176.20115661621094, 182.43679809570312, 188.6724395751953, 194.9080810546875, 201.1437225341797, 207.37936401367188, 213.614990234375, 219.8506317138672, 226.08627319335938, 232.32191467285156, 238.55755615234375, 244.79319763183594, 251.02883911132812, 257.26446533203125, 263.5001220703125, 269.7357482910156, 275.97137451171875, 282.20703125, 288.4426574707031, 294.6783142089844, 300.9139404296875, 307.14959716796875, 313.3852233886719, 319.6208801269531]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 2.0, 3.0, 3.0, 3.0, 10.0, 7.0, 11.0, 11.0, 16.0, 15.0, 13.0, 21.0, 22.0, 25.0, 18.0, 35.0, 31.0, 29.0, 37.0, 32.0, 40.0, 35.0, 39.0, 47.0, 36.0, 41.0, 25.0, 34.0, 49.0, 47.0, 28.0, 28.0, 19.0, 34.0, 24.0, 26.0, 14.0, 17.0, 11.0, 9.0, 13.0, 8.0, 12.0, 4.0, 7.0, 5.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-26.04438018798828, -25.22437286376953, -24.40436363220215, -23.5843563079834, -22.764347076416016, -21.944339752197266, -21.124332427978516, -20.304323196411133, -19.484315872192383, -18.664308547973633, -17.84429931640625, -17.0242919921875, -16.204282760620117, -15.384275436401367, -14.5642671585083, -13.744258880615234, -12.924250602722168, -12.104242324829102, -11.284234046936035, -10.464225769042969, -9.644218444824219, -8.824210166931152, -8.004201889038086, -7.184194087982178, -6.364185810089111, -5.544177532196045, -4.724169731140137, -3.9041614532470703, -3.084153413772583, -2.2641453742980957, -1.4441370964050293, -0.6241292953491211, 0.1958789825439453, 1.0158870220184326, 1.8358951807022095, 2.6559033393859863, 3.4759113788604736, 4.295919418334961, 5.115927696228027, 5.9359354972839355, 6.755943775177002, 7.575952053070068, 8.395959854125977, 9.215968132019043, 10.03597640991211, 10.85598373413086, 11.675992965698242, 12.496000289916992, 13.316008567810059, 14.136016845703125, 14.956025123596191, 15.776033401489258, 16.596040725708008, 17.41604995727539, 18.23605728149414, 19.05606460571289, 19.876073837280273, 20.696081161499023, 21.516090393066406, 22.336097717285156, 23.15610694885254, 23.97611427307129, 24.796123504638672, 25.616130828857422, 26.436138153076172]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 6.0, 2.0, 7.0, 12.0, 12.0, 15.0, 8.0, 18.0, 19.0, 21.0, 17.0, 35.0, 37.0, 34.0, 37.0, 46.0, 48.0, 44.0, 47.0, 46.0, 45.0, 37.0, 31.0, 50.0, 39.0, 35.0, 35.0, 31.0, 28.0, 27.0, 27.0, 16.0, 13.0, 19.0, 11.0, 11.0, 6.0, 6.0, 7.0, 7.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.58203125, -2.4959716796875, -2.409912109375, -2.3238525390625, -2.23779296875, -2.1517333984375, -2.065673828125, -1.9796142578125, -1.8935546875, -1.8074951171875, -1.721435546875, -1.6353759765625, -1.54931640625, -1.4632568359375, -1.377197265625, -1.2911376953125, -1.205078125, -1.1190185546875, -1.032958984375, -0.9468994140625, -0.86083984375, -0.7747802734375, -0.688720703125, -0.6026611328125, -0.5166015625, -0.4305419921875, -0.344482421875, -0.2584228515625, -0.17236328125, -0.0863037109375, -0.000244140625, 0.0858154296875, 0.171875, 0.2579345703125, 0.343994140625, 0.4300537109375, 0.51611328125, 0.6021728515625, 0.688232421875, 0.7742919921875, 0.8603515625, 0.9464111328125, 1.032470703125, 1.1185302734375, 1.20458984375, 1.2906494140625, 1.376708984375, 1.4627685546875, 1.548828125, 1.6348876953125, 1.720947265625, 1.8070068359375, 1.89306640625, 1.9791259765625, 2.065185546875, 2.1512451171875, 2.2373046875, 2.3233642578125, 2.409423828125, 2.4954833984375, 2.58154296875, 2.6676025390625, 2.753662109375, 2.8397216796875, 2.92578125]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 8.0, 13.0, 9.0, 12.0, 19.0, 32.0, 50.0, 53.0, 73.0, 108.0, 144.0, 190.0, 281.0, 486.0, 842.0, 1864.0, 4628.0, 15994.0, 74950.0, 590109.0, 2795175.0, 606658.0, 76989.0, 16401.0, 4995.0, 1791.0, 874.0, 461.0, 275.0, 202.0, 145.0, 133.0, 90.0, 59.0, 38.0, 28.0, 25.0, 18.0, 16.0, 14.0, 10.0, 8.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.4765625, -9.1553955078125, -8.834228515625, -8.5130615234375, -8.19189453125, -7.8707275390625, -7.549560546875, -7.2283935546875, -6.9072265625, -6.5860595703125, -6.264892578125, -5.9437255859375, -5.62255859375, -5.3013916015625, -4.980224609375, -4.6590576171875, -4.337890625, -4.0167236328125, -3.695556640625, -3.3743896484375, -3.05322265625, -2.7320556640625, -2.410888671875, -2.0897216796875, -1.7685546875, -1.4473876953125, -1.126220703125, -0.8050537109375, -0.48388671875, -0.1627197265625, 0.158447265625, 0.4796142578125, 0.80078125, 1.1219482421875, 1.443115234375, 1.7642822265625, 2.08544921875, 2.4066162109375, 2.727783203125, 3.0489501953125, 3.3701171875, 3.6912841796875, 4.012451171875, 4.3336181640625, 4.65478515625, 4.9759521484375, 5.297119140625, 5.6182861328125, 5.939453125, 6.2606201171875, 6.581787109375, 6.9029541015625, 7.22412109375, 7.5452880859375, 7.866455078125, 8.1876220703125, 8.5087890625, 8.8299560546875, 9.151123046875, 9.4722900390625, 9.79345703125, 10.1146240234375, 10.435791015625, 10.7569580078125, 11.078125]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 5.0, 3.0, 4.0, 9.0, 7.0, 13.0, 24.0, 37.0, 30.0, 57.0, 68.0, 84.0, 111.0, 178.0, 208.0, 246.0, 279.0, 414.0, 404.0, 377.0, 350.0, 277.0, 243.0, 187.0, 123.0, 83.0, 72.0, 46.0, 47.0, 21.0, 28.0, 10.0, 10.0, 7.0, 6.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-10.828125, -10.54107666015625, -10.2540283203125, -9.96697998046875, -9.679931640625, -9.39288330078125, -9.1058349609375, -8.81878662109375, -8.53173828125, -8.24468994140625, -7.9576416015625, -7.67059326171875, -7.383544921875, -7.09649658203125, -6.8094482421875, -6.52239990234375, -6.2353515625, -5.94830322265625, -5.6612548828125, -5.37420654296875, -5.087158203125, -4.80010986328125, -4.5130615234375, -4.22601318359375, -3.93896484375, -3.65191650390625, -3.3648681640625, -3.07781982421875, -2.790771484375, -2.50372314453125, -2.2166748046875, -1.92962646484375, -1.642578125, -1.35552978515625, -1.0684814453125, -0.78143310546875, -0.494384765625, -0.20733642578125, 0.0797119140625, 0.36676025390625, 0.65380859375, 0.94085693359375, 1.2279052734375, 1.51495361328125, 1.802001953125, 2.08905029296875, 2.3760986328125, 2.66314697265625, 2.9501953125, 3.23724365234375, 3.5242919921875, 3.81134033203125, 4.098388671875, 4.38543701171875, 4.6724853515625, 4.95953369140625, 5.24658203125, 5.53363037109375, 5.8206787109375, 6.10772705078125, 6.394775390625, 6.68182373046875, 6.9688720703125, 7.25592041015625, 7.54296875]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 2.0, 7.0, 7.0, 4.0, 6.0, 9.0, 10.0, 15.0, 36.0, 40.0, 38.0, 66.0, 117.0, 187.0, 402.0, 887.0, 3103.0, 27988.0, 1003065.0, 3048841.0, 100878.0, 6116.0, 1352.0, 501.0, 230.0, 124.0, 73.0, 51.0, 47.0, 21.0, 21.0, 14.0, 8.0, 4.0, 3.0, 10.0, 2.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.71875, -18.86865234375, -18.0185546875, -17.16845703125, -16.318359375, -15.46826171875, -14.6181640625, -13.76806640625, -12.91796875, -12.06787109375, -11.2177734375, -10.36767578125, -9.517578125, -8.66748046875, -7.8173828125, -6.96728515625, -6.1171875, -5.26708984375, -4.4169921875, -3.56689453125, -2.716796875, -1.86669921875, -1.0166015625, -0.16650390625, 0.68359375, 1.53369140625, 2.3837890625, 3.23388671875, 4.083984375, 4.93408203125, 5.7841796875, 6.63427734375, 7.484375, 8.33447265625, 9.1845703125, 10.03466796875, 10.884765625, 11.73486328125, 12.5849609375, 13.43505859375, 14.28515625, 15.13525390625, 15.9853515625, 16.83544921875, 17.685546875, 18.53564453125, 19.3857421875, 20.23583984375, 21.0859375, 21.93603515625, 22.7861328125, 23.63623046875, 24.486328125, 25.33642578125, 26.1865234375, 27.03662109375, 27.88671875, 28.73681640625, 29.5869140625, 30.43701171875, 31.287109375, 32.13720703125, 32.9873046875, 33.83740234375, 34.6875]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 24.0, 178.0, 529.0, 251.0, 31.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.37868881225586, -55.20637512207031, -50.03406524658203, -44.861751556396484, -39.68943786621094, -34.517127990722656, -29.34481430053711, -24.172504425048828, -19.00019073486328, -13.827878952026367, -8.655566215515137, -3.4832534790039062, 1.6890583038330078, 6.861370086669922, 12.033683776855469, 17.20599365234375, 22.378307342529297, 27.55061912536621, 32.722930908203125, 37.89524459838867, 43.06755828857422, 48.2398681640625, 53.41218185424805, 58.58449172973633, 63.756805419921875, 68.92911529541016, 74.10143280029297, 79.27374267578125, 84.44605255126953, 89.61836242675781, 94.79067993164062, 99.9629898071289, 105.13529968261719, 110.30760955810547, 115.47992706298828, 120.65223693847656, 125.82454681396484, 130.99685668945312, 136.16917419433594, 141.34149169921875, 146.5137939453125, 151.6861114501953, 156.85841369628906, 162.03073120117188, 167.2030487060547, 172.37535095214844, 177.54766845703125, 182.719970703125, 187.89230346679688, 193.0646209716797, 198.23692321777344, 203.40924072265625, 208.58155822753906, 213.7538604736328, 218.92617797851562, 224.09848022460938, 229.2707977294922, 234.443115234375, 239.61541748046875, 244.78773498535156, 249.96005249023438, 255.13235473632812, 260.3046875, 265.47698974609375, 270.6492919921875]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 4.0, 6.0, 8.0, 6.0, 12.0, 10.0, 18.0, 13.0, 25.0, 25.0, 31.0, 24.0, 29.0, 30.0, 33.0, 29.0, 24.0, 42.0, 43.0, 41.0, 53.0, 36.0, 38.0, 36.0, 45.0, 26.0, 45.0, 29.0, 26.0, 35.0, 25.0, 24.0, 23.0, 12.0, 24.0, 10.0, 10.0, 12.0, 10.0, 11.0, 5.0, 3.0, 2.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.33553695678711, -28.34575843811035, -27.355981826782227, -26.36620330810547, -25.37642478942871, -24.386646270751953, -23.396869659423828, -22.40709114074707, -21.417312622070312, -20.427534103393555, -19.43775749206543, -18.447978973388672, -17.458200454711914, -16.468421936035156, -15.478645324707031, -14.488866806030273, -13.499089241027832, -12.50931167602539, -11.519533157348633, -10.529755592346191, -9.539977073669434, -8.550199508666992, -7.560421466827393, -6.570643424987793, -5.580865383148193, -4.591087341308594, -3.601309299468994, -2.6115314960479736, -1.621753454208374, -0.6319756507873535, 0.3578023910522461, 1.3475804328918457, 2.3373584747314453, 3.327136516571045, 4.3169145584106445, 5.306692123413086, 6.296470642089844, 7.286248207092285, 8.276025772094727, 9.265804290771484, 10.255582809448242, 11.245360374450684, 12.235138893127441, 13.224916458129883, 14.21469497680664, 15.204472541809082, 16.194250106811523, 17.18402862548828, 18.173805236816406, 19.163583755493164, 20.15336036682129, 21.143138885498047, 22.132917404174805, 23.122695922851562, 24.112472534179688, 25.102251052856445, 26.092029571533203, 27.08180809020996, 28.071584701538086, 29.061363220214844, 30.0511417388916, 31.04092025756836, 32.030696868896484, 33.020477294921875, 34.01025390625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 7.0, 3.0, 12.0, 9.0, 13.0, 9.0, 11.0, 12.0, 27.0, 31.0, 23.0, 16.0, 31.0, 32.0, 37.0, 29.0, 34.0, 27.0, 39.0, 25.0, 45.0, 47.0, 42.0, 43.0, 37.0, 41.0, 31.0, 22.0, 33.0, 26.0, 23.0, 28.0, 21.0, 11.0, 19.0, 10.0, 17.0, 12.0, 9.0, 13.0, 3.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 2.0], "bins": [-2.578125, -2.500579833984375, -2.42303466796875, -2.345489501953125, -2.2679443359375, -2.190399169921875, -2.11285400390625, -2.035308837890625, -1.957763671875, -1.880218505859375, -1.80267333984375, -1.725128173828125, -1.6475830078125, -1.570037841796875, -1.49249267578125, -1.414947509765625, -1.33740234375, -1.259857177734375, -1.18231201171875, -1.104766845703125, -1.0272216796875, -0.949676513671875, -0.87213134765625, -0.794586181640625, -0.717041015625, -0.639495849609375, -0.56195068359375, -0.484405517578125, -0.4068603515625, -0.329315185546875, -0.25177001953125, -0.174224853515625, -0.0966796875, -0.019134521484375, 0.05841064453125, 0.135955810546875, 0.2135009765625, 0.291046142578125, 0.36859130859375, 0.446136474609375, 0.523681640625, 0.601226806640625, 0.67877197265625, 0.756317138671875, 0.8338623046875, 0.911407470703125, 0.98895263671875, 1.066497802734375, 1.14404296875, 1.221588134765625, 1.29913330078125, 1.376678466796875, 1.4542236328125, 1.531768798828125, 1.60931396484375, 1.686859130859375, 1.764404296875, 1.841949462890625, 1.91949462890625, 1.997039794921875, 2.0745849609375, 2.152130126953125, 2.22967529296875, 2.307220458984375, 2.384765625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 6.0, 6.0, 12.0, 13.0, 26.0, 26.0, 36.0, 83.0, 96.0, 145.0, 209.0, 287.0, 453.0, 651.0, 985.0, 1444.0, 2042.0, 3081.0, 4474.0, 6737.0, 10196.0, 15216.0, 22400.0, 34304.0, 52072.0, 80125.0, 123075.0, 172778.0, 169600.0, 119417.0, 77822.0, 50672.0, 33179.0, 22225.0, 14690.0, 9617.0, 6713.0, 4373.0, 3033.0, 1977.0, 1364.0, 924.0, 628.0, 440.0, 290.0, 203.0, 127.0, 101.0, 68.0, 39.0, 33.0, 16.0, 9.0, 10.0, 9.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.1778564453125, -0.1719207763671875, -0.165985107421875, -0.1600494384765625, -0.15411376953125, -0.1481781005859375, -0.142242431640625, -0.1363067626953125, -0.13037109375, -0.1244354248046875, -0.118499755859375, -0.1125640869140625, -0.10662841796875, -0.1006927490234375, -0.094757080078125, -0.0888214111328125, -0.0828857421875, -0.0769500732421875, -0.071014404296875, -0.0650787353515625, -0.05914306640625, -0.0532073974609375, -0.047271728515625, -0.0413360595703125, -0.035400390625, -0.0294647216796875, -0.023529052734375, -0.0175933837890625, -0.01165771484375, -0.0057220458984375, 0.000213623046875, 0.0061492919921875, 0.0120849609375, 0.0180206298828125, 0.023956298828125, 0.0298919677734375, 0.03582763671875, 0.0417633056640625, 0.047698974609375, 0.0536346435546875, 0.0595703125, 0.0655059814453125, 0.071441650390625, 0.0773773193359375, 0.08331298828125, 0.0892486572265625, 0.095184326171875, 0.1011199951171875, 0.1070556640625, 0.1129913330078125, 0.118927001953125, 0.1248626708984375, 0.13079833984375, 0.1367340087890625, 0.142669677734375, 0.1486053466796875, 0.154541015625, 0.1604766845703125, 0.166412353515625, 0.1723480224609375, 0.17828369140625, 0.1842193603515625, 0.190155029296875, 0.1960906982421875, 0.2020263671875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 5.0, 6.0, 13.0, 12.0, 8.0, 20.0, 22.0, 16.0, 24.0, 26.0, 30.0, 36.0, 40.0, 37.0, 42.0, 41.0, 43.0, 37.0, 1077.0, 43.0, 50.0, 36.0, 35.0, 44.0, 30.0, 48.0, 35.0, 37.0, 26.0, 28.0, 14.0, 10.0, 10.0, 16.0, 11.0, 5.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-2.05859375, -1.99847412109375, -1.9383544921875, -1.87823486328125, -1.818115234375, -1.75799560546875, -1.6978759765625, -1.63775634765625, -1.57763671875, -1.51751708984375, -1.4573974609375, -1.39727783203125, -1.337158203125, -1.27703857421875, -1.2169189453125, -1.15679931640625, -1.0966796875, -1.03656005859375, -0.9764404296875, -0.91632080078125, -0.856201171875, -0.79608154296875, -0.7359619140625, -0.67584228515625, -0.61572265625, -0.55560302734375, -0.4954833984375, -0.43536376953125, -0.375244140625, -0.31512451171875, -0.2550048828125, -0.19488525390625, -0.134765625, -0.07464599609375, -0.0145263671875, 0.04559326171875, 0.105712890625, 0.16583251953125, 0.2259521484375, 0.28607177734375, 0.34619140625, 0.40631103515625, 0.4664306640625, 0.52655029296875, 0.586669921875, 0.64678955078125, 0.7069091796875, 0.76702880859375, 0.8271484375, 0.88726806640625, 0.9473876953125, 1.00750732421875, 1.067626953125, 1.12774658203125, 1.1878662109375, 1.24798583984375, 1.30810546875, 1.36822509765625, 1.4283447265625, 1.48846435546875, 1.548583984375, 1.60870361328125, 1.6688232421875, 1.72894287109375, 1.7890625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 8.0, 9.0, 12.0, 16.0, 20.0, 34.0, 49.0, 57.0, 75.0, 134.0, 204.0, 283.0, 418.0, 580.0, 940.0, 1322.0, 2040.0, 3087.0, 4834.0, 7391.0, 11676.0, 18656.0, 29371.0, 46772.0, 73751.0, 115592.0, 165012.0, 1228425.0, 137955.0, 90577.0, 57942.0, 36821.0, 22751.0, 14441.0, 8987.0, 5939.0, 3751.0, 2369.0, 1619.0, 1079.0, 650.0, 490.0, 328.0, 234.0, 136.0, 95.0, 73.0, 36.0, 31.0, 19.0, 19.0, 9.0, 7.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.1707763671875, -0.16530799865722656, -0.15983963012695312, -0.1543712615966797, -0.14890289306640625, -0.1434345245361328, -0.13796615600585938, -0.13249778747558594, -0.1270294189453125, -0.12156105041503906, -0.11609268188476562, -0.11062431335449219, -0.10515594482421875, -0.09968757629394531, -0.09421920776367188, -0.08875083923339844, -0.083282470703125, -0.07781410217285156, -0.07234573364257812, -0.06687736511230469, -0.06140899658203125, -0.05594062805175781, -0.050472259521484375, -0.04500389099121094, -0.0395355224609375, -0.03406715393066406, -0.028598785400390625, -0.023130416870117188, -0.01766204833984375, -0.012193679809570312, -0.006725311279296875, -0.0012569427490234375, 0.00421142578125, 0.009679794311523438, 0.015148162841796875, 0.020616531372070312, 0.02608489990234375, 0.03155326843261719, 0.037021636962890625, 0.04249000549316406, 0.0479583740234375, 0.05342674255371094, 0.058895111083984375, 0.06436347961425781, 0.06983184814453125, 0.07530021667480469, 0.08076858520507812, 0.08623695373535156, 0.091705322265625, 0.09717369079589844, 0.10264205932617188, 0.10811042785644531, 0.11357879638671875, 0.11904716491699219, 0.12451553344726562, 0.12998390197753906, 0.1354522705078125, 0.14092063903808594, 0.14638900756835938, 0.1518573760986328, 0.15732574462890625, 0.1627941131591797, 0.16826248168945312, 0.17373085021972656, 0.17919921875]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 9.0, 13.0, 5.0, 8.0, 9.0, 9.0, 24.0, 19.0, 26.0, 39.0, 32.0, 33.0, 51.0, 60.0, 51.0, 75.0, 71.0, 68.0, 51.0, 61.0, 43.0, 39.0, 39.0, 28.0, 23.0, 19.0, 19.0, 16.0, 9.0, 13.0, 9.0, 7.0, 2.0, 0.0, 3.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006823539733886719, -0.0006617531180381775, -0.0006411522626876831, -0.0006205514073371887, -0.0005999505519866943, -0.0005793496966362, -0.0005587488412857056, -0.0005381479859352112, -0.0005175471305847168, -0.0004969462752342224, -0.00047634541988372803, -0.00045574456453323364, -0.00043514370918273926, -0.0004145428538322449, -0.0003939419984817505, -0.0003733411431312561, -0.0003527402877807617, -0.00033213943243026733, -0.00031153857707977295, -0.00029093772172927856, -0.0002703368663787842, -0.0002497360110282898, -0.0002291351556777954, -0.00020853430032730103, -0.00018793344497680664, -0.00016733258962631226, -0.00014673173427581787, -0.00012613087892532349, -0.0001055300235748291, -8.492916822433472e-05, -6.432831287384033e-05, -4.372745752334595e-05, -2.3126602172851562e-05, -2.5257468223571777e-06, 1.8075108528137207e-05, 3.867596387863159e-05, 5.9276819229125977e-05, 7.987767457962036e-05, 0.00010047852993011475, 0.00012107938528060913, 0.00014168024063110352, 0.0001622810959815979, 0.00018288195133209229, 0.00020348280668258667, 0.00022408366203308105, 0.00024468451738357544, 0.0002652853727340698, 0.0002858862280845642, 0.0003064870834350586, 0.000327087938785553, 0.00034768879413604736, 0.00036828964948654175, 0.00038889050483703613, 0.0004094913601875305, 0.0004300922155380249, 0.0004506930708885193, 0.00047129392623901367, 0.0004918947815895081, 0.0005124956369400024, 0.0005330964922904968, 0.0005536973476409912, 0.0005742982029914856, 0.00059489905834198, 0.0006154999136924744, 0.0006361007690429688]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 9.0, 2.0, 3.0, 4.0, 8.0, 4.0, 12.0, 16.0, 17.0, 28.0, 39.0, 32.0, 33.0, 39.0, 40.0, 60.0, 70.0, 94.0, 122.0, 142.0, 220.0, 388.0, 715.0, 12120.0, 1010937.0, 21079.0, 820.0, 411.0, 250.0, 165.0, 139.0, 109.0, 91.0, 56.0, 59.0, 45.0, 26.0, 45.0, 21.0, 15.0, 15.0, 12.0, 9.0, 9.0, 7.0, 4.0, 3.0, 4.0, 7.0, 4.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0], "bins": [-0.01049041748046875, -0.010168075561523438, -0.009845733642578125, -0.009523391723632812, -0.0092010498046875, -0.008878707885742188, -0.008556365966796875, -0.008234024047851562, -0.00791168212890625, -0.0075893402099609375, -0.007266998291015625, -0.0069446563720703125, -0.006622314453125, -0.0062999725341796875, -0.005977630615234375, -0.0056552886962890625, -0.00533294677734375, -0.0050106048583984375, -0.004688262939453125, -0.0043659210205078125, -0.0040435791015625, -0.0037212371826171875, -0.003398895263671875, -0.0030765533447265625, -0.00275421142578125, -0.0024318695068359375, -0.002109527587890625, -0.0017871856689453125, -0.00146484375, -0.0011425018310546875, -0.000820159912109375, -0.0004978179931640625, -0.00017547607421875, 0.0001468658447265625, 0.000469207763671875, 0.0007915496826171875, 0.0011138916015625, 0.0014362335205078125, 0.001758575439453125, 0.0020809173583984375, 0.00240325927734375, 0.0027256011962890625, 0.003047943115234375, 0.0033702850341796875, 0.003692626953125, 0.0040149688720703125, 0.004337310791015625, 0.0046596527099609375, 0.00498199462890625, 0.0053043365478515625, 0.005626678466796875, 0.0059490203857421875, 0.0062713623046875, 0.0065937042236328125, 0.006916046142578125, 0.0072383880615234375, 0.00756072998046875, 0.007883071899414062, 0.008205413818359375, 0.008527755737304688, 0.00885009765625, 0.009172439575195312, 0.009494781494140625, 0.009817123413085938, 0.01013946533203125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 63.0, 516.0, 403.0, 28.0, 3.0], "bins": [-0.004223145544528961, -0.004153778776526451, -0.004084412008523941, -0.004015045240521431, -0.003945678472518921, -0.003876311471685767, -0.003806944703683257, -0.0037375777028501034, -0.0036682109348475933, -0.0035988441668450832, -0.003529477398842573, -0.0034601103980094194, -0.0033907436300069094, -0.0033213768620043993, -0.0032520100940018892, -0.0031826430931687355, -0.0031132763251662254, -0.0030439095571637154, -0.0029745427891612053, -0.0029051757883280516, -0.0028358090203255415, -0.0027664422523230314, -0.0026970754843205214, -0.0026277084834873676, -0.002558341948315501, -0.002488975180312991, -0.002419608412310481, -0.0023502414114773273, -0.0022808746434748173, -0.002211507875472307, -0.002142141107469797, -0.002072774339467287, -0.0020034073386341333, -0.0019340405706316233, -0.0018646736862137914, -0.0017953069182112813, -0.0017259400337934494, -0.0016565732657909393, -0.0015872064977884293, -0.0015178396133705974, -0.0014484727289527655, -0.0013791059609502554, -0.0013097390765324235, -0.0012403723085299134, -0.0011710054241120815, -0.0011016386561095715, -0.0010322718881070614, -0.0009629050036892295, -0.0008935381192713976, -0.0008241712930612266, -0.0007548044668510556, -0.0006854376988485456, -0.0006160708144307137, -0.0005467040464282036, -0.0004773372202180326, -0.0004079703940078616, -0.00033860356779769063, -0.00026923674158751965, -0.0001998699299292639, -0.00013050311827100813, -6.113629206083715e-05, 8.230534149333835e-06, 7.759733125567436e-05, 0.00014696415746584535, 0.0002163309691241011]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 5.0, 3.0, 9.0, 19.0, 18.0, 25.0, 22.0, 17.0, 23.0, 28.0, 19.0, 28.0, 33.0, 26.0, 39.0, 44.0, 45.0, 44.0, 38.0, 43.0, 26.0, 48.0, 46.0, 36.0, 38.0, 31.0, 31.0, 26.0, 23.0, 26.0, 24.0, 17.0, 21.0, 17.0, 15.0, 11.0, 9.0, 8.0, 4.0, 7.0, 6.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.0003572702407836914, -0.0003469865769147873, -0.0003367029130458832, -0.00032641924917697906, -0.00031613558530807495, -0.00030585192143917084, -0.0002955682575702667, -0.0002852845937013626, -0.0002750009298324585, -0.0002647172659635544, -0.00025443360209465027, -0.00024414993822574615, -0.00023386627435684204, -0.00022358261048793793, -0.0002132989466190338, -0.0002030152827501297, -0.00019273161888122559, -0.00018244795501232147, -0.00017216429114341736, -0.00016188062727451324, -0.00015159696340560913, -0.00014131329953670502, -0.0001310296356678009, -0.00012074597179889679, -0.00011046230792999268, -0.00010017864406108856, -8.989498019218445e-05, -7.961131632328033e-05, -6.932765245437622e-05, -5.904398858547211e-05, -4.876032471656799e-05, -3.847666084766388e-05, -2.8192996978759766e-05, -1.7909333109855652e-05, -7.625669240951538e-06, 2.6579946279525757e-06, 1.294165849685669e-05, 2.3225322365760803e-05, 3.350898623466492e-05, 4.379265010356903e-05, 5.4076313972473145e-05, 6.435997784137726e-05, 7.464364171028137e-05, 8.492730557918549e-05, 9.52109694480896e-05, 0.00010549463331699371, 0.00011577829718589783, 0.00012606196105480194, 0.00013634562492370605, 0.00014662928879261017, 0.00015691295266151428, 0.0001671966165304184, 0.0001774802803993225, 0.00018776394426822662, 0.00019804760813713074, 0.00020833127200603485, 0.00021861493587493896, 0.00022889859974384308, 0.0002391822636127472, 0.0002494659274816513, 0.0002597495913505554, 0.00027003325521945953, 0.00028031691908836365, 0.00029060058295726776, 0.0003008842468261719]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 7.0, 3.0, 12.0, 9.0, 13.0, 9.0, 11.0, 12.0, 27.0, 31.0, 23.0, 16.0, 31.0, 32.0, 37.0, 29.0, 34.0, 27.0, 39.0, 25.0, 45.0, 47.0, 42.0, 43.0, 37.0, 41.0, 31.0, 22.0, 33.0, 26.0, 23.0, 28.0, 21.0, 11.0, 19.0, 10.0, 17.0, 12.0, 9.0, 13.0, 3.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 2.0], "bins": [-2.578125, -2.500579833984375, -2.42303466796875, -2.345489501953125, -2.2679443359375, -2.190399169921875, -2.11285400390625, -2.035308837890625, -1.957763671875, -1.880218505859375, -1.80267333984375, -1.725128173828125, -1.6475830078125, -1.570037841796875, -1.49249267578125, -1.414947509765625, -1.33740234375, -1.259857177734375, -1.18231201171875, -1.104766845703125, -1.0272216796875, -0.949676513671875, -0.87213134765625, -0.794586181640625, -0.717041015625, -0.639495849609375, -0.56195068359375, -0.484405517578125, -0.4068603515625, -0.329315185546875, -0.25177001953125, -0.174224853515625, -0.0966796875, -0.019134521484375, 0.05841064453125, 0.135955810546875, 0.2135009765625, 0.291046142578125, 0.36859130859375, 0.446136474609375, 0.523681640625, 0.601226806640625, 0.67877197265625, 0.756317138671875, 0.8338623046875, 0.911407470703125, 0.98895263671875, 1.066497802734375, 1.14404296875, 1.221588134765625, 1.29913330078125, 1.376678466796875, 1.4542236328125, 1.531768798828125, 1.60931396484375, 1.686859130859375, 1.764404296875, 1.841949462890625, 1.91949462890625, 1.997039794921875, 2.0745849609375, 2.152130126953125, 2.22967529296875, 2.307220458984375, 2.384765625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 5.0, 7.0, 15.0, 18.0, 24.0, 21.0, 30.0, 52.0, 70.0, 101.0, 131.0, 202.0, 269.0, 395.0, 617.0, 1141.0, 2240.0, 4461.0, 10154.0, 23990.0, 60426.0, 144013.0, 266518.0, 272482.0, 150818.0, 64065.0, 25538.0, 10519.0, 4709.0, 2251.0, 1172.0, 704.0, 418.0, 281.0, 201.0, 152.0, 95.0, 72.0, 51.0, 39.0, 27.0, 19.0, 13.0, 7.0, 7.0, 7.0, 7.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.08203125, -2.985809326171875, -2.88958740234375, -2.793365478515625, -2.6971435546875, -2.600921630859375, -2.50469970703125, -2.408477783203125, -2.312255859375, -2.216033935546875, -2.11981201171875, -2.023590087890625, -1.9273681640625, -1.831146240234375, -1.73492431640625, -1.638702392578125, -1.54248046875, -1.446258544921875, -1.35003662109375, -1.253814697265625, -1.1575927734375, -1.061370849609375, -0.96514892578125, -0.868927001953125, -0.772705078125, -0.676483154296875, -0.58026123046875, -0.484039306640625, -0.3878173828125, -0.291595458984375, -0.19537353515625, -0.099151611328125, -0.0029296875, 0.093292236328125, 0.18951416015625, 0.285736083984375, 0.3819580078125, 0.478179931640625, 0.57440185546875, 0.670623779296875, 0.766845703125, 0.863067626953125, 0.95928955078125, 1.055511474609375, 1.1517333984375, 1.247955322265625, 1.34417724609375, 1.440399169921875, 1.53662109375, 1.632843017578125, 1.72906494140625, 1.825286865234375, 1.9215087890625, 2.017730712890625, 2.11395263671875, 2.210174560546875, 2.306396484375, 2.402618408203125, 2.49884033203125, 2.595062255859375, 2.6912841796875, 2.787506103515625, 2.88372802734375, 2.979949951171875, 3.076171875]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 5.0, 1.0, 15.0, 6.0, 8.0, 8.0, 14.0, 17.0, 23.0, 30.0, 22.0, 22.0, 26.0, 28.0, 35.0, 39.0, 57.0, 45.0, 108.0, 207.0, 1361.0, 286.0, 152.0, 112.0, 75.0, 49.0, 34.0, 36.0, 33.0, 26.0, 35.0, 16.0, 22.0, 14.0, 12.0, 19.0, 10.0, 7.0, 5.0, 6.0, 6.0, 9.0, 4.0, 6.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.6875, -9.3834228515625, -9.079345703125, -8.7752685546875, -8.47119140625, -8.1671142578125, -7.863037109375, -7.5589599609375, -7.2548828125, -6.9508056640625, -6.646728515625, -6.3426513671875, -6.03857421875, -5.7344970703125, -5.430419921875, -5.1263427734375, -4.822265625, -4.5181884765625, -4.214111328125, -3.9100341796875, -3.60595703125, -3.3018798828125, -2.997802734375, -2.6937255859375, -2.3896484375, -2.0855712890625, -1.781494140625, -1.4774169921875, -1.17333984375, -0.8692626953125, -0.565185546875, -0.2611083984375, 0.04296875, 0.3470458984375, 0.651123046875, 0.9552001953125, 1.25927734375, 1.5633544921875, 1.867431640625, 2.1715087890625, 2.4755859375, 2.7796630859375, 3.083740234375, 3.3878173828125, 3.69189453125, 3.9959716796875, 4.300048828125, 4.6041259765625, 4.908203125, 5.2122802734375, 5.516357421875, 5.8204345703125, 6.12451171875, 6.4285888671875, 6.732666015625, 7.0367431640625, 7.3408203125, 7.6448974609375, 7.948974609375, 8.2530517578125, 8.55712890625, 8.8612060546875, 9.165283203125, 9.4693603515625, 9.7734375]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 9.0, 6.0, 8.0, 8.0, 6.0, 9.0, 13.0, 13.0, 19.0, 18.0, 26.0, 28.0, 43.0, 56.0, 80.0, 114.0, 180.0, 379.0, 994.0, 5638.0, 416284.0, 2701339.0, 17508.0, 1674.0, 517.0, 251.0, 135.0, 79.0, 50.0, 40.0, 21.0, 31.0, 22.0, 23.0, 20.0, 16.0, 14.0, 7.0, 6.0, 8.0, 9.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.546875, -19.88818359375, -19.2294921875, -18.57080078125, -17.912109375, -17.25341796875, -16.5947265625, -15.93603515625, -15.27734375, -14.61865234375, -13.9599609375, -13.30126953125, -12.642578125, -11.98388671875, -11.3251953125, -10.66650390625, -10.0078125, -9.34912109375, -8.6904296875, -8.03173828125, -7.373046875, -6.71435546875, -6.0556640625, -5.39697265625, -4.73828125, -4.07958984375, -3.4208984375, -2.76220703125, -2.103515625, -1.44482421875, -0.7861328125, -0.12744140625, 0.53125, 1.18994140625, 1.8486328125, 2.50732421875, 3.166015625, 3.82470703125, 4.4833984375, 5.14208984375, 5.80078125, 6.45947265625, 7.1181640625, 7.77685546875, 8.435546875, 9.09423828125, 9.7529296875, 10.41162109375, 11.0703125, 11.72900390625, 12.3876953125, 13.04638671875, 13.705078125, 14.36376953125, 15.0224609375, 15.68115234375, 16.33984375, 16.99853515625, 17.6572265625, 18.31591796875, 18.974609375, 19.63330078125, 20.2919921875, 20.95068359375, 21.609375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 28.0, 187.0, 468.0, 259.0, 61.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.43870162963867, -55.869876861572266, -54.30105209350586, -52.73222732543945, -51.16339874267578, -49.594573974609375, -48.02574920654297, -46.45692443847656, -44.888099670410156, -43.31927490234375, -41.750450134277344, -40.18162536621094, -38.61280059814453, -37.04397201538086, -35.47514724731445, -33.90632247924805, -32.33749771118164, -30.768672943115234, -29.199848175048828, -27.63102149963379, -26.062196731567383, -24.493371963500977, -22.924545288085938, -21.35572052001953, -19.786895751953125, -18.21807098388672, -16.649246215820312, -15.080419540405273, -13.511594772338867, -11.942770004272461, -10.373944282531738, -8.805118560791016, -7.236293792724609, -5.667468547821045, -4.0986433029174805, -2.529818058013916, -0.9609928131103516, 0.6078324317932129, 2.1766576766967773, 3.7454833984375, 5.314308166503906, 6.883133411407471, 8.451958656311035, 10.020784378051758, 11.589609146118164, 13.15843391418457, 14.727259635925293, 16.296085357666016, 17.864910125732422, 19.433734893798828, 21.002559661865234, 22.571386337280273, 24.14021110534668, 25.709035873413086, 27.277862548828125, 28.84668731689453, 30.415512084960938, 31.984336853027344, 33.55316162109375, 35.121986389160156, 36.69081115722656, 38.259639739990234, 39.82846450805664, 41.39728927612305, 42.96611404418945]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 6.0, 5.0, 8.0, 3.0, 7.0, 11.0, 10.0, 10.0, 7.0, 23.0, 26.0, 23.0, 23.0, 28.0, 32.0, 17.0, 23.0, 29.0, 33.0, 59.0, 42.0, 51.0, 31.0, 39.0, 29.0, 39.0, 24.0, 42.0, 28.0, 26.0, 35.0, 33.0, 37.0, 30.0, 19.0, 19.0, 19.0, 8.0, 15.0, 15.0, 10.0, 7.0, 7.0, 2.0, 4.0, 3.0, 1.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-30.044788360595703, -29.11012077331543, -28.175453186035156, -27.240785598754883, -26.30611801147461, -25.371450424194336, -24.436782836914062, -23.50211524963379, -22.567447662353516, -21.632780075073242, -20.69811248779297, -19.763444900512695, -18.828777313232422, -17.89410972595215, -16.959442138671875, -16.0247745513916, -15.090106964111328, -14.155439376831055, -13.220771789550781, -12.286104202270508, -11.351436614990234, -10.416769027709961, -9.482101440429688, -8.547433853149414, -7.612766265869141, -6.678098678588867, -5.743431091308594, -4.80876350402832, -3.874095916748047, -2.9394283294677734, -2.0047607421875, -1.0700931549072266, -0.13542556762695312, 0.7992420196533203, 1.7339096069335938, 2.668577194213867, 3.6032447814941406, 4.537912368774414, 5.4725799560546875, 6.407247543334961, 7.341915130615234, 8.276582717895508, 9.211250305175781, 10.145917892456055, 11.080585479736328, 12.015253067016602, 12.949920654296875, 13.884588241577148, 14.819255828857422, 15.753923416137695, 16.68859100341797, 17.623258590698242, 18.557926177978516, 19.49259376525879, 20.427261352539062, 21.361928939819336, 22.29659652709961, 23.231264114379883, 24.165931701660156, 25.10059928894043, 26.035266876220703, 26.969934463500977, 27.90460205078125, 28.839269638061523, 29.773937225341797]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 7.0, 7.0, 4.0, 9.0, 6.0, 14.0, 7.0, 19.0, 15.0, 17.0, 24.0, 19.0, 22.0, 28.0, 24.0, 31.0, 33.0, 22.0, 39.0, 30.0, 35.0, 35.0, 43.0, 31.0, 51.0, 36.0, 41.0, 38.0, 29.0, 30.0, 30.0, 29.0, 24.0, 24.0, 17.0, 16.0, 15.0, 22.0, 14.0, 12.0, 8.0, 6.0, 7.0, 4.0, 3.0, 5.0, 6.0, 7.0, 6.0, 1.0, 2.0, 4.0, 0.0, 2.0], "bins": [-2.63671875, -2.55706787109375, -2.4774169921875, -2.39776611328125, -2.318115234375, -2.23846435546875, -2.1588134765625, -2.07916259765625, -1.99951171875, -1.91986083984375, -1.8402099609375, -1.76055908203125, -1.680908203125, -1.60125732421875, -1.5216064453125, -1.44195556640625, -1.3623046875, -1.28265380859375, -1.2030029296875, -1.12335205078125, -1.043701171875, -0.96405029296875, -0.8843994140625, -0.80474853515625, -0.72509765625, -0.64544677734375, -0.5657958984375, -0.48614501953125, -0.406494140625, -0.32684326171875, -0.2471923828125, -0.16754150390625, -0.087890625, -0.00823974609375, 0.0714111328125, 0.15106201171875, 0.230712890625, 0.31036376953125, 0.3900146484375, 0.46966552734375, 0.54931640625, 0.62896728515625, 0.7086181640625, 0.78826904296875, 0.867919921875, 0.94757080078125, 1.0272216796875, 1.10687255859375, 1.1865234375, 1.26617431640625, 1.3458251953125, 1.42547607421875, 1.505126953125, 1.58477783203125, 1.6644287109375, 1.74407958984375, 1.82373046875, 1.90338134765625, 1.9830322265625, 2.06268310546875, 2.142333984375, 2.22198486328125, 2.3016357421875, 2.38128662109375, 2.4609375]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 0.0, 6.0, 9.0, 6.0, 7.0, 11.0, 16.0, 18.0, 22.0, 30.0, 34.0, 39.0, 56.0, 68.0, 97.0, 205.0, 355.0, 856.0, 2490.0, 9780.0, 58993.0, 555934.0, 2787277.0, 691813.0, 70241.0, 11276.0, 2668.0, 966.0, 378.0, 181.0, 114.0, 69.0, 46.0, 41.0, 41.0, 24.0, 19.0, 21.0, 22.0, 13.0, 11.0, 14.0, 8.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0], "bins": [-10.8515625, -10.5462646484375, -10.240966796875, -9.9356689453125, -9.63037109375, -9.3250732421875, -9.019775390625, -8.7144775390625, -8.4091796875, -8.1038818359375, -7.798583984375, -7.4932861328125, -7.18798828125, -6.8826904296875, -6.577392578125, -6.2720947265625, -5.966796875, -5.6614990234375, -5.356201171875, -5.0509033203125, -4.74560546875, -4.4403076171875, -4.135009765625, -3.8297119140625, -3.5244140625, -3.2191162109375, -2.913818359375, -2.6085205078125, -2.30322265625, -1.9979248046875, -1.692626953125, -1.3873291015625, -1.08203125, -0.7767333984375, -0.471435546875, -0.1661376953125, 0.13916015625, 0.4444580078125, 0.749755859375, 1.0550537109375, 1.3603515625, 1.6656494140625, 1.970947265625, 2.2762451171875, 2.58154296875, 2.8868408203125, 3.192138671875, 3.4974365234375, 3.802734375, 4.1080322265625, 4.413330078125, 4.7186279296875, 5.02392578125, 5.3292236328125, 5.634521484375, 5.9398193359375, 6.2451171875, 6.5504150390625, 6.855712890625, 7.1610107421875, 7.46630859375, 7.7716064453125, 8.076904296875, 8.3822021484375, 8.6875]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 2.0, 1.0, 8.0, 10.0, 12.0, 20.0, 27.0, 44.0, 56.0, 77.0, 119.0, 138.0, 186.0, 271.0, 372.0, 459.0, 450.0, 458.0, 389.0, 267.0, 186.0, 136.0, 103.0, 89.0, 54.0, 46.0, 39.0, 21.0, 11.0, 7.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.375, -12.0531005859375, -11.731201171875, -11.4093017578125, -11.08740234375, -10.7655029296875, -10.443603515625, -10.1217041015625, -9.7998046875, -9.4779052734375, -9.156005859375, -8.8341064453125, -8.51220703125, -8.1903076171875, -7.868408203125, -7.5465087890625, -7.224609375, -6.9027099609375, -6.580810546875, -6.2589111328125, -5.93701171875, -5.6151123046875, -5.293212890625, -4.9713134765625, -4.6494140625, -4.3275146484375, -4.005615234375, -3.6837158203125, -3.36181640625, -3.0399169921875, -2.718017578125, -2.3961181640625, -2.07421875, -1.7523193359375, -1.430419921875, -1.1085205078125, -0.78662109375, -0.4647216796875, -0.142822265625, 0.1790771484375, 0.5009765625, 0.8228759765625, 1.144775390625, 1.4666748046875, 1.78857421875, 2.1104736328125, 2.432373046875, 2.7542724609375, 3.076171875, 3.3980712890625, 3.719970703125, 4.0418701171875, 4.36376953125, 4.6856689453125, 5.007568359375, 5.3294677734375, 5.6513671875, 5.9732666015625, 6.295166015625, 6.6170654296875, 6.93896484375, 7.2608642578125, 7.582763671875, 7.9046630859375, 8.2265625]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 6.0, 16.0, 11.0, 26.0, 36.0, 63.0, 97.0, 221.0, 346.0, 787.0, 2109.0, 11850.0, 175439.0, 3002515.0, 950736.0, 43151.0, 4410.0, 1320.0, 536.0, 263.0, 131.0, 77.0, 63.0, 28.0, 14.0, 15.0, 6.0, 3.0, 6.0, 7.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.59375, -18.861572265625, -18.12939453125, -17.397216796875, -16.6650390625, -15.932861328125, -15.20068359375, -14.468505859375, -13.736328125, -13.004150390625, -12.27197265625, -11.539794921875, -10.8076171875, -10.075439453125, -9.34326171875, -8.611083984375, -7.87890625, -7.146728515625, -6.41455078125, -5.682373046875, -4.9501953125, -4.218017578125, -3.48583984375, -2.753662109375, -2.021484375, -1.289306640625, -0.55712890625, 0.175048828125, 0.9072265625, 1.639404296875, 2.37158203125, 3.103759765625, 3.8359375, 4.568115234375, 5.30029296875, 6.032470703125, 6.7646484375, 7.496826171875, 8.22900390625, 8.961181640625, 9.693359375, 10.425537109375, 11.15771484375, 11.889892578125, 12.6220703125, 13.354248046875, 14.08642578125, 14.818603515625, 15.55078125, 16.282958984375, 17.01513671875, 17.747314453125, 18.4794921875, 19.211669921875, 19.94384765625, 20.676025390625, 21.408203125, 22.140380859375, 22.87255859375, 23.604736328125, 24.3369140625, 25.069091796875, 25.80126953125, 26.533447265625, 27.265625]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 13.0, 21.0, 34.0, 74.0, 97.0, 139.0, 169.0, 143.0, 131.0, 76.0, 64.0, 28.0, 12.0, 6.0, 1.0, 1.0, 1.0, 1.0], "bins": [-94.88855743408203, -93.08930206298828, -91.29005432128906, -89.49079895019531, -87.69154357910156, -85.89228820800781, -84.0930404663086, -82.29378509521484, -80.49453735351562, -78.69528198242188, -76.89603424072266, -75.0967788696289, -73.29752349853516, -71.49827575683594, -69.69902038574219, -67.89976501464844, -66.10050964355469, -64.30125427246094, -62.50200271606445, -60.70275115966797, -58.90349578857422, -57.104244232177734, -55.30499267578125, -53.5057373046875, -51.706485748291016, -49.90723419189453, -48.10797882080078, -46.3087272644043, -44.50947570800781, -42.71022033691406, -40.91096878051758, -39.111717224121094, -37.312461853027344, -35.51321029663086, -33.71395492553711, -31.914703369140625, -30.115449905395508, -28.31619644165039, -26.516944885253906, -24.71769142150879, -22.91843605041504, -21.119182586669922, -19.319931030273438, -17.52067756652832, -15.721424102783203, -13.922170639038086, -12.122918128967285, -10.323665618896484, -8.524412155151367, -6.725159168243408, -4.925906181335449, -3.1266531944274902, -1.3274002075195312, 0.47185325622558594, 2.2711057662963867, 4.0703582763671875, 5.869611740112305, 7.668864727020264, 9.468117713928223, 11.267370223999023, 13.06662368774414, 14.865877151489258, 16.665130615234375, 18.46438217163086, 20.263635635375977]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 2.0, 4.0, 2.0, 2.0, 9.0, 11.0, 7.0, 10.0, 16.0, 21.0, 23.0, 14.0, 34.0, 31.0, 33.0, 33.0, 49.0, 42.0, 34.0, 40.0, 42.0, 44.0, 49.0, 48.0, 45.0, 61.0, 36.0, 45.0, 31.0, 34.0, 26.0, 25.0, 16.0, 13.0, 11.0, 12.0, 12.0, 7.0, 11.0, 6.0, 9.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.22817611694336, -35.15205764770508, -34.07594299316406, -32.99982452392578, -31.923709869384766, -30.847591400146484, -29.771474838256836, -28.695358276367188, -27.61924171447754, -26.54312515258789, -25.467008590698242, -24.390892028808594, -23.314773559570312, -22.238658905029297, -21.162540435791016, -20.086423873901367, -19.01030731201172, -17.93419075012207, -16.858074188232422, -15.781956672668457, -14.705840110778809, -13.62972354888916, -12.553606033325195, -11.477489471435547, -10.401372909545898, -9.32525634765625, -8.249139785766602, -7.173022270202637, -6.096905708312988, -5.02078914642334, -3.944672107696533, -2.8685550689697266, -1.7924346923828125, -0.716317892074585, 0.3597989082336426, 1.4359157085418701, 2.5120325088500977, 3.588149070739746, 4.664266109466553, 5.740383148193359, 6.816499710083008, 7.892616271972656, 8.968732833862305, 10.04485034942627, 11.120966911315918, 12.197083473205566, 13.273200988769531, 14.34931755065918, 15.425434112548828, 16.501550674438477, 17.577667236328125, 18.653783798217773, 19.729900360107422, 20.806018829345703, 21.88213539123535, 22.958251953125, 24.03436851501465, 25.110485076904297, 26.186601638793945, 27.262718200683594, 28.338836669921875, 29.41495132446289, 30.491069793701172, 31.56718635559082, 32.64330291748047]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 5.0, 8.0, 7.0, 6.0, 7.0, 11.0, 11.0, 16.0, 10.0, 18.0, 20.0, 14.0, 26.0, 29.0, 38.0, 27.0, 33.0, 32.0, 36.0, 43.0, 40.0, 30.0, 48.0, 30.0, 51.0, 47.0, 30.0, 23.0, 35.0, 34.0, 25.0, 34.0, 20.0, 15.0, 17.0, 18.0, 10.0, 16.0, 15.0, 12.0, 6.0, 11.0, 15.0, 5.0, 2.0, 5.0, 2.0, 1.0, 2.0, 6.0, 3.0], "bins": [-3.033203125, -2.9478759765625, -2.862548828125, -2.7772216796875, -2.69189453125, -2.6065673828125, -2.521240234375, -2.4359130859375, -2.3505859375, -2.2652587890625, -2.179931640625, -2.0946044921875, -2.00927734375, -1.9239501953125, -1.838623046875, -1.7532958984375, -1.66796875, -1.5826416015625, -1.497314453125, -1.4119873046875, -1.32666015625, -1.2413330078125, -1.156005859375, -1.0706787109375, -0.9853515625, -0.9000244140625, -0.814697265625, -0.7293701171875, -0.64404296875, -0.5587158203125, -0.473388671875, -0.3880615234375, -0.302734375, -0.2174072265625, -0.132080078125, -0.0467529296875, 0.03857421875, 0.1239013671875, 0.209228515625, 0.2945556640625, 0.3798828125, 0.4652099609375, 0.550537109375, 0.6358642578125, 0.72119140625, 0.8065185546875, 0.891845703125, 0.9771728515625, 1.0625, 1.1478271484375, 1.233154296875, 1.3184814453125, 1.40380859375, 1.4891357421875, 1.574462890625, 1.6597900390625, 1.7451171875, 1.8304443359375, 1.915771484375, 2.0010986328125, 2.08642578125, 2.1717529296875, 2.257080078125, 2.3424072265625, 2.427734375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 11.0, 6.0, 6.0, 13.0, 14.0, 22.0, 38.0, 50.0, 88.0, 126.0, 195.0, 293.0, 400.0, 561.0, 844.0, 1280.0, 1888.0, 2699.0, 4042.0, 5897.0, 8733.0, 13083.0, 19707.0, 30710.0, 46931.0, 72198.0, 110736.0, 162771.0, 180899.0, 132650.0, 87331.0, 56157.0, 36448.0, 24038.0, 15645.0, 10310.0, 7097.0, 4680.0, 3007.0, 2153.0, 1570.0, 1040.0, 679.0, 504.0, 299.0, 200.0, 170.0, 124.0, 75.0, 55.0, 35.0, 19.0, 14.0, 14.0, 8.0, 5.0, 1.0, 1.0, 2.0], "bins": [-0.220458984375, -0.2138214111328125, -0.207183837890625, -0.2005462646484375, -0.19390869140625, -0.1872711181640625, -0.180633544921875, -0.1739959716796875, -0.1673583984375, -0.1607208251953125, -0.154083251953125, -0.1474456787109375, -0.14080810546875, -0.1341705322265625, -0.127532958984375, -0.1208953857421875, -0.1142578125, -0.1076202392578125, -0.100982666015625, -0.0943450927734375, -0.08770751953125, -0.0810699462890625, -0.074432373046875, -0.0677947998046875, -0.0611572265625, -0.0545196533203125, -0.047882080078125, -0.0412445068359375, -0.03460693359375, -0.0279693603515625, -0.021331787109375, -0.0146942138671875, -0.008056640625, -0.0014190673828125, 0.005218505859375, 0.0118560791015625, 0.01849365234375, 0.0251312255859375, 0.031768798828125, 0.0384063720703125, 0.0450439453125, 0.0516815185546875, 0.058319091796875, 0.0649566650390625, 0.07159423828125, 0.0782318115234375, 0.084869384765625, 0.0915069580078125, 0.09814453125, 0.1047821044921875, 0.111419677734375, 0.1180572509765625, 0.12469482421875, 0.1313323974609375, 0.137969970703125, 0.1446075439453125, 0.1512451171875, 0.1578826904296875, 0.164520263671875, 0.1711578369140625, 0.17779541015625, 0.1844329833984375, 0.191070556640625, 0.1977081298828125, 0.204345703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 7.0, 3.0, 5.0, 5.0, 7.0, 8.0, 11.0, 11.0, 19.0, 22.0, 27.0, 24.0, 26.0, 20.0, 34.0, 29.0, 35.0, 35.0, 46.0, 32.0, 50.0, 45.0, 1070.0, 52.0, 41.0, 35.0, 34.0, 35.0, 28.0, 28.0, 20.0, 29.0, 21.0, 24.0, 18.0, 18.0, 15.0, 8.0, 8.0, 7.0, 8.0, 11.0, 9.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.7548828125, -1.69659423828125, -1.6383056640625, -1.58001708984375, -1.521728515625, -1.46343994140625, -1.4051513671875, -1.34686279296875, -1.28857421875, -1.23028564453125, -1.1719970703125, -1.11370849609375, -1.055419921875, -0.99713134765625, -0.9388427734375, -0.88055419921875, -0.822265625, -0.76397705078125, -0.7056884765625, -0.64739990234375, -0.589111328125, -0.53082275390625, -0.4725341796875, -0.41424560546875, -0.35595703125, -0.29766845703125, -0.2393798828125, -0.18109130859375, -0.122802734375, -0.06451416015625, -0.0062255859375, 0.05206298828125, 0.1103515625, 0.16864013671875, 0.2269287109375, 0.28521728515625, 0.343505859375, 0.40179443359375, 0.4600830078125, 0.51837158203125, 0.57666015625, 0.63494873046875, 0.6932373046875, 0.75152587890625, 0.809814453125, 0.86810302734375, 0.9263916015625, 0.98468017578125, 1.04296875, 1.10125732421875, 1.1595458984375, 1.21783447265625, 1.276123046875, 1.33441162109375, 1.3927001953125, 1.45098876953125, 1.50927734375, 1.56756591796875, 1.6258544921875, 1.68414306640625, 1.742431640625, 1.80072021484375, 1.8590087890625, 1.91729736328125, 1.9755859375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 6.0, 7.0, 19.0, 21.0, 19.0, 40.0, 63.0, 90.0, 153.0, 221.0, 402.0, 509.0, 908.0, 1292.0, 2249.0, 3454.0, 5686.0, 8675.0, 13959.0, 21910.0, 34166.0, 53562.0, 85984.0, 134467.0, 1235174.0, 174318.0, 117520.0, 74258.0, 46666.0, 29698.0, 18939.0, 11921.0, 7708.0, 4848.0, 3085.0, 1871.0, 1206.0, 763.0, 471.0, 306.0, 164.0, 124.0, 93.0, 38.0, 34.0, 29.0, 14.0, 11.0, 6.0, 3.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1888427734375, -0.18272018432617188, -0.17659759521484375, -0.17047500610351562, -0.1643524169921875, -0.15822982788085938, -0.15210723876953125, -0.14598464965820312, -0.139862060546875, -0.13373947143554688, -0.12761688232421875, -0.12149429321289062, -0.1153717041015625, -0.10924911499023438, -0.10312652587890625, -0.09700393676757812, -0.09088134765625, -0.08475875854492188, -0.07863616943359375, -0.07251358032226562, -0.0663909912109375, -0.060268402099609375, -0.05414581298828125, -0.048023223876953125, -0.041900634765625, -0.035778045654296875, -0.02965545654296875, -0.023532867431640625, -0.0174102783203125, -0.011287689208984375, -0.00516510009765625, 0.000957489013671875, 0.007080078125, 0.013202667236328125, 0.01932525634765625, 0.025447845458984375, 0.0315704345703125, 0.037693023681640625, 0.04381561279296875, 0.049938201904296875, 0.056060791015625, 0.062183380126953125, 0.06830596923828125, 0.07442855834960938, 0.0805511474609375, 0.08667373657226562, 0.09279632568359375, 0.09891891479492188, 0.10504150390625, 0.11116409301757812, 0.11728668212890625, 0.12340927124023438, 0.1295318603515625, 0.13565444946289062, 0.14177703857421875, 0.14789962768554688, 0.154022216796875, 0.16014480590820312, 0.16626739501953125, 0.17238998413085938, 0.1785125732421875, 0.18463516235351562, 0.19075775146484375, 0.19688034057617188, 0.2030029296875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 5.0, 0.0, 2.0, 4.0, 2.0, 3.0, 2.0, 10.0, 7.0, 9.0, 12.0, 15.0, 17.0, 19.0, 21.0, 36.0, 44.0, 35.0, 45.0, 43.0, 34.0, 56.0, 59.0, 47.0, 68.0, 60.0, 55.0, 53.0, 47.0, 34.0, 33.0, 25.0, 21.0, 18.0, 17.0, 10.0, 5.0, 11.0, 7.0, 2.0, 3.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006127357482910156, -0.0005902275443077087, -0.0005677193403244019, -0.000545211136341095, -0.0005227029323577881, -0.0005001947283744812, -0.0004776865243911743, -0.00045517832040786743, -0.00043267011642456055, -0.00041016191244125366, -0.0003876537084579468, -0.0003651455044746399, -0.000342637300491333, -0.0003201290965080261, -0.00029762089252471924, -0.00027511268854141235, -0.00025260448455810547, -0.00023009628057479858, -0.0002075880765914917, -0.00018507987260818481, -0.00016257166862487793, -0.00014006346464157104, -0.00011755526065826416, -9.504705667495728e-05, -7.253885269165039e-05, -5.0030648708343506e-05, -2.752244472503662e-05, -5.014240741729736e-06, 1.749396324157715e-05, 4.000216722488403e-05, 6.251037120819092e-05, 8.50185751914978e-05, 0.00010752677917480469, 0.00013003498315811157, 0.00015254318714141846, 0.00017505139112472534, 0.00019755959510803223, 0.0002200677990913391, 0.000242576003074646, 0.0002650842070579529, 0.00028759241104125977, 0.00031010061502456665, 0.00033260881900787354, 0.0003551170229911804, 0.0003776252269744873, 0.0004001334309577942, 0.0004226416349411011, 0.00044514983892440796, 0.00046765804290771484, 0.0004901662468910217, 0.0005126744508743286, 0.0005351826548576355, 0.0005576908588409424, 0.0005801990628242493, 0.0006027072668075562, 0.000625215470790863, 0.0006477236747741699, 0.0006702318787574768, 0.0006927400827407837, 0.0007152482867240906, 0.0007377564907073975, 0.0007602646946907043, 0.0007827728986740112, 0.0008052811026573181, 0.000827789306640625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 4.0, 4.0, 6.0, 5.0, 6.0, 8.0, 14.0, 18.0, 13.0, 24.0, 41.0, 61.0, 60.0, 71.0, 89.0, 157.0, 186.0, 241.0, 440.0, 1230.0, 206226.0, 835366.0, 2643.0, 452.0, 324.0, 216.0, 145.0, 121.0, 97.0, 62.0, 45.0, 30.0, 34.0, 29.0, 17.0, 15.0, 16.0, 8.0, 7.0, 6.0, 9.0, 5.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.01371002197265625, -0.013308525085449219, -0.012907028198242188, -0.012505531311035156, -0.012104034423828125, -0.011702537536621094, -0.011301040649414062, -0.010899543762207031, -0.010498046875, -0.010096549987792969, -0.009695053100585938, -0.009293556213378906, -0.008892059326171875, -0.008490562438964844, -0.008089065551757812, -0.007687568664550781, -0.00728607177734375, -0.006884574890136719, -0.0064830780029296875, -0.006081581115722656, -0.005680084228515625, -0.005278587341308594, -0.0048770904541015625, -0.004475593566894531, -0.0040740966796875, -0.0036725997924804688, -0.0032711029052734375, -0.0028696060180664062, -0.002468109130859375, -0.0020666122436523438, -0.0016651153564453125, -0.0012636184692382812, -0.00086212158203125, -0.00046062469482421875, -5.91278076171875e-05, 0.00034236907958984375, 0.000743865966796875, 0.0011453628540039062, 0.0015468597412109375, 0.0019483566284179688, 0.002349853515625, 0.0027513504028320312, 0.0031528472900390625, 0.0035543441772460938, 0.003955841064453125, 0.004357337951660156, 0.0047588348388671875, 0.005160331726074219, 0.00556182861328125, 0.005963325500488281, 0.0063648223876953125, 0.006766319274902344, 0.007167816162109375, 0.007569313049316406, 0.007970809936523438, 0.008372306823730469, 0.0087738037109375, 0.009175300598144531, 0.009576797485351562, 0.009978294372558594, 0.010379791259765625, 0.010781288146972656, 0.011182785034179688, 0.011584281921386719, 0.01198577880859375]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 11.0, 13.0, 21.0, 42.0, 78.0, 107.0, 139.0, 134.0, 152.0, 132.0, 80.0, 47.0, 23.0, 17.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000258095795288682, -0.00024033985391724855, -0.0002225839125458151, -0.0002048279857262969, -0.00018707204435486346, -0.00016931610298343003, -0.00015156017616391182, -0.00013380423479247838, -0.00011604829342104495, -9.829235204961151e-05, -8.053641795413569e-05, -6.278048385865986e-05, -4.5024542487226427e-05, -2.726860111579299e-05, -9.512667020317167e-06, 8.243267075158656e-06, 2.5999208446592093e-05, 4.375514618004672e-05, 6.151108391350135e-05, 7.926701800897717e-05, 9.702295938041061e-05, 0.00011477890075184405, 0.00013253482757136226, 0.0001502907689427957, 0.00016804671031422913, 0.00018580265168566257, 0.000203558593057096, 0.0002213145198766142, 0.00023907046124804765, 0.0002568264026194811, 0.0002745823294389993, 0.0002923382562585175, 0.0003100942703895271, 0.0003278501972090453, 0.00034560615313239396, 0.00036336207995191216, 0.00038111803587526083, 0.00039887396269477904, 0.00041662988951429725, 0.0004343858454376459, 0.0004521417722571641, 0.00046989769907668233, 0.000487653655000031, 0.0005054096109233797, 0.0005231655086390674, 0.0005409214645624161, 0.0005586774204857647, 0.0005764333182014525, 0.0005941892741248012, 0.0006119452300481498, 0.0006297011277638376, 0.0006474570836871862, 0.0006652130396105349, 0.0006829689955338836, 0.0007007248932495713, 0.00071848084917292, 0.0007362368050962687, 0.0007539927610196173, 0.0007717486587353051, 0.0007895046146586537, 0.0008072605705820024, 0.0008250165265053511, 0.0008427724242210388, 0.0008605283801443875, 0.0008782842778600752]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 6.0, 5.0, 4.0, 3.0, 10.0, 11.0, 14.0, 15.0, 20.0, 24.0, 27.0, 19.0, 28.0, 29.0, 35.0, 42.0, 44.0, 42.0, 45.0, 50.0, 42.0, 44.0, 39.0, 41.0, 43.0, 36.0, 43.0, 27.0, 38.0, 31.0, 17.0, 22.0, 18.0, 22.0, 19.0, 10.0, 9.0, 9.0, 8.0, 5.0, 5.0, 1.0, 1.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004277229309082031, -0.00041467975825071335, -0.00040163658559322357, -0.0003885934129357338, -0.000375550240278244, -0.00036250706762075424, -0.00034946389496326447, -0.0003364207223057747, -0.0003233775496482849, -0.00031033437699079514, -0.00029729120433330536, -0.0002842480316758156, -0.0002712048590183258, -0.00025816168636083603, -0.00024511851370334625, -0.00023207534104585648, -0.0002190321683883667, -0.00020598899573087692, -0.00019294582307338715, -0.00017990265041589737, -0.0001668594777584076, -0.00015381630510091782, -0.00014077313244342804, -0.00012772995978593826, -0.00011468678712844849, -0.00010164361447095871, -8.860044181346893e-05, -7.555726915597916e-05, -6.251409649848938e-05, -4.94709238409996e-05, -3.642775118350983e-05, -2.338457852602005e-05, -1.0341405868530273e-05, 2.701766788959503e-06, 1.574493944644928e-05, 2.8788112103939056e-05, 4.183128476142883e-05, 5.487445741891861e-05, 6.791763007640839e-05, 8.096080273389816e-05, 9.400397539138794e-05, 0.00010704714804887772, 0.00012009032070636749, 0.00013313349336385727, 0.00014617666602134705, 0.00015921983867883682, 0.0001722630113363266, 0.00018530618399381638, 0.00019834935665130615, 0.00021139252930879593, 0.0002244357019662857, 0.00023747887462377548, 0.00025052204728126526, 0.00026356521993875504, 0.0002766083925962448, 0.0002896515652537346, 0.00030269473791122437, 0.00031573791056871414, 0.0003287810832262039, 0.0003418242558836937, 0.00035486742854118347, 0.00036791060119867325, 0.000380953773856163, 0.0003939969465136528, 0.0004070401191711426]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 5.0, 8.0, 7.0, 6.0, 7.0, 11.0, 11.0, 16.0, 10.0, 18.0, 20.0, 14.0, 26.0, 29.0, 38.0, 27.0, 33.0, 32.0, 36.0, 43.0, 40.0, 30.0, 48.0, 30.0, 51.0, 47.0, 30.0, 23.0, 35.0, 34.0, 25.0, 34.0, 20.0, 15.0, 17.0, 18.0, 10.0, 16.0, 15.0, 12.0, 6.0, 11.0, 15.0, 5.0, 2.0, 5.0, 2.0, 1.0, 2.0, 6.0, 3.0], "bins": [-3.033203125, -2.9478759765625, -2.862548828125, -2.7772216796875, -2.69189453125, -2.6065673828125, -2.521240234375, -2.4359130859375, -2.3505859375, -2.2652587890625, -2.179931640625, -2.0946044921875, -2.00927734375, -1.9239501953125, -1.838623046875, -1.7532958984375, -1.66796875, -1.5826416015625, -1.497314453125, -1.4119873046875, -1.32666015625, -1.2413330078125, -1.156005859375, -1.0706787109375, -0.9853515625, -0.9000244140625, -0.814697265625, -0.7293701171875, -0.64404296875, -0.5587158203125, -0.473388671875, -0.3880615234375, -0.302734375, -0.2174072265625, -0.132080078125, -0.0467529296875, 0.03857421875, 0.1239013671875, 0.209228515625, 0.2945556640625, 0.3798828125, 0.4652099609375, 0.550537109375, 0.6358642578125, 0.72119140625, 0.8065185546875, 0.891845703125, 0.9771728515625, 1.0625, 1.1478271484375, 1.233154296875, 1.3184814453125, 1.40380859375, 1.4891357421875, 1.574462890625, 1.6597900390625, 1.7451171875, 1.8304443359375, 1.915771484375, 2.0010986328125, 2.08642578125, 2.1717529296875, 2.257080078125, 2.3424072265625, 2.427734375]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 4.0, 7.0, 17.0, 14.0, 27.0, 26.0, 49.0, 42.0, 64.0, 92.0, 137.0, 203.0, 255.0, 356.0, 560.0, 865.0, 1340.0, 2208.0, 3505.0, 5782.0, 9795.0, 18147.0, 37416.0, 85982.0, 202316.0, 336722.0, 187501.0, 79216.0, 34826.0, 17077.0, 9246.0, 5379.0, 3273.0, 2126.0, 1328.0, 805.0, 550.0, 387.0, 255.0, 178.0, 136.0, 77.0, 68.0, 50.0, 42.0, 39.0, 25.0, 11.0, 11.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 3.0, 1.0, 4.0], "bins": [-2.75, -2.662567138671875, -2.57513427734375, -2.487701416015625, -2.4002685546875, -2.312835693359375, -2.22540283203125, -2.137969970703125, -2.050537109375, -1.963104248046875, -1.87567138671875, -1.788238525390625, -1.7008056640625, -1.613372802734375, -1.52593994140625, -1.438507080078125, -1.35107421875, -1.263641357421875, -1.17620849609375, -1.088775634765625, -1.0013427734375, -0.913909912109375, -0.82647705078125, -0.739044189453125, -0.651611328125, -0.564178466796875, -0.47674560546875, -0.389312744140625, -0.3018798828125, -0.214447021484375, -0.12701416015625, -0.039581298828125, 0.0478515625, 0.135284423828125, 0.22271728515625, 0.310150146484375, 0.3975830078125, 0.485015869140625, 0.57244873046875, 0.659881591796875, 0.747314453125, 0.834747314453125, 0.92218017578125, 1.009613037109375, 1.0970458984375, 1.184478759765625, 1.27191162109375, 1.359344482421875, 1.44677734375, 1.534210205078125, 1.62164306640625, 1.709075927734375, 1.7965087890625, 1.883941650390625, 1.97137451171875, 2.058807373046875, 2.146240234375, 2.233673095703125, 2.32110595703125, 2.408538818359375, 2.4959716796875, 2.583404541015625, 2.67083740234375, 2.758270263671875, 2.845703125]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 7.0, 2.0, 4.0, 8.0, 5.0, 10.0, 6.0, 11.0, 12.0, 18.0, 17.0, 22.0, 23.0, 24.0, 37.0, 36.0, 35.0, 35.0, 49.0, 47.0, 85.0, 137.0, 1427.0, 405.0, 114.0, 76.0, 46.0, 51.0, 40.0, 27.0, 30.0, 25.0, 20.0, 30.0, 26.0, 19.0, 7.0, 9.0, 14.0, 15.0, 14.0, 6.0, 5.0, 10.0, 5.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-9.796875, -9.4805908203125, -9.164306640625, -8.8480224609375, -8.53173828125, -8.2154541015625, -7.899169921875, -7.5828857421875, -7.2666015625, -6.9503173828125, -6.634033203125, -6.3177490234375, -6.00146484375, -5.6851806640625, -5.368896484375, -5.0526123046875, -4.736328125, -4.4200439453125, -4.103759765625, -3.7874755859375, -3.47119140625, -3.1549072265625, -2.838623046875, -2.5223388671875, -2.2060546875, -1.8897705078125, -1.573486328125, -1.2572021484375, -0.94091796875, -0.6246337890625, -0.308349609375, 0.0079345703125, 0.32421875, 0.6405029296875, 0.956787109375, 1.2730712890625, 1.58935546875, 1.9056396484375, 2.221923828125, 2.5382080078125, 2.8544921875, 3.1707763671875, 3.487060546875, 3.8033447265625, 4.11962890625, 4.4359130859375, 4.752197265625, 5.0684814453125, 5.384765625, 5.7010498046875, 6.017333984375, 6.3336181640625, 6.64990234375, 6.9661865234375, 7.282470703125, 7.5987548828125, 7.9150390625, 8.2313232421875, 8.547607421875, 8.8638916015625, 9.18017578125, 9.4964599609375, 9.812744140625, 10.1290283203125, 10.4453125]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 4.0, 5.0, 11.0, 7.0, 10.0, 14.0, 24.0, 18.0, 21.0, 26.0, 63.0, 54.0, 63.0, 103.0, 149.0, 314.0, 756.0, 3734.0, 61550.0, 2975123.0, 97196.0, 4602.0, 879.0, 340.0, 210.0, 106.0, 85.0, 56.0, 40.0, 33.0, 30.0, 19.0, 15.0, 9.0, 10.0, 11.0, 4.0, 7.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-22.109375, -21.487548828125, -20.86572265625, -20.243896484375, -19.6220703125, -19.000244140625, -18.37841796875, -17.756591796875, -17.134765625, -16.512939453125, -15.89111328125, -15.269287109375, -14.6474609375, -14.025634765625, -13.40380859375, -12.781982421875, -12.16015625, -11.538330078125, -10.91650390625, -10.294677734375, -9.6728515625, -9.051025390625, -8.42919921875, -7.807373046875, -7.185546875, -6.563720703125, -5.94189453125, -5.320068359375, -4.6982421875, -4.076416015625, -3.45458984375, -2.832763671875, -2.2109375, -1.589111328125, -0.96728515625, -0.345458984375, 0.2763671875, 0.898193359375, 1.52001953125, 2.141845703125, 2.763671875, 3.385498046875, 4.00732421875, 4.629150390625, 5.2509765625, 5.872802734375, 6.49462890625, 7.116455078125, 7.73828125, 8.360107421875, 8.98193359375, 9.603759765625, 10.2255859375, 10.847412109375, 11.46923828125, 12.091064453125, 12.712890625, 13.334716796875, 13.95654296875, 14.578369140625, 15.2001953125, 15.822021484375, 16.44384765625, 17.065673828125, 17.6875]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 10.0, 66.0, 258.0, 414.0, 201.0, 52.0, 8.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.334421157836914, -8.718422889709473, -7.102424621582031, -5.48642635345459, -3.8704280853271484, -2.254429817199707, -0.6384315490722656, 0.9775667190551758, 2.593564987182617, 4.209563255310059, 5.8255615234375, 7.441559791564941, 9.057558059692383, 10.673556327819824, 12.289554595947266, 13.905552864074707, 15.521551132202148, 17.137550354003906, 18.75354766845703, 20.369544982910156, 21.985544204711914, 23.601543426513672, 25.217540740966797, 26.833538055419922, 28.44953727722168, 30.065536499023438, 31.681533813476562, 33.29753112792969, 34.91352844238281, 36.5295295715332, 38.14552688598633, 39.76152420043945, 41.377525329589844, 42.99352264404297, 44.609519958496094, 46.225521087646484, 47.84151840209961, 49.457515716552734, 51.073516845703125, 52.68951416015625, 54.305511474609375, 55.9215087890625, 57.537506103515625, 59.153507232666016, 60.76950454711914, 62.385501861572266, 64.00150299072266, 65.61750030517578, 67.2334976196289, 68.84949493408203, 70.46549224853516, 72.08148956298828, 73.69749450683594, 75.31349182128906, 76.92948913574219, 78.54548645019531, 80.16148376464844, 81.77748107910156, 83.39347839355469, 85.00947570800781, 86.62547302246094, 88.2414779663086, 89.85747528076172, 91.47347259521484, 93.08946990966797]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 6.0, 8.0, 7.0, 12.0, 11.0, 9.0, 10.0, 11.0, 16.0, 13.0, 16.0, 32.0, 31.0, 34.0, 35.0, 48.0, 35.0, 30.0, 39.0, 35.0, 27.0, 35.0, 36.0, 32.0, 52.0, 34.0, 32.0, 29.0, 29.0, 30.0, 42.0, 25.0, 25.0, 19.0, 18.0, 21.0, 11.0, 8.0, 11.0, 7.0, 13.0, 5.0, 9.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-29.35616683959961, -28.36530113220215, -27.37443733215332, -26.38357162475586, -25.39270782470703, -24.40184211730957, -23.41097640991211, -22.42011260986328, -21.429248809814453, -20.438383102416992, -19.447519302368164, -18.456653594970703, -17.465789794921875, -16.474924087524414, -15.48405933380127, -14.493194580078125, -13.502328872680664, -12.51146411895752, -11.520599365234375, -10.529733657836914, -9.538869857788086, -8.548004150390625, -7.5571393966674805, -6.566274642944336, -5.575409889221191, -4.584545135498047, -3.5936801433563232, -2.6028151512145996, -1.611950397491455, -0.6210856437683105, 0.3697795867919922, 1.3606443405151367, 2.3515090942382812, 3.342373847961426, 4.33323860168457, 5.324103832244873, 6.314968585968018, 7.305833339691162, 8.296698570251465, 9.28756332397461, 10.278428077697754, 11.269292831420898, 12.260157585144043, 13.251022338867188, 14.241888046264648, 15.232751846313477, 16.223617553710938, 17.214481353759766, 18.205347061157227, 19.196212768554688, 20.187076568603516, 21.177942276000977, 22.168806076049805, 23.159671783447266, 24.150535583496094, 25.141401290893555, 26.132266998291016, 27.123132705688477, 28.113996505737305, 29.104862213134766, 30.095726013183594, 31.086591720581055, 32.077457427978516, 33.068321228027344, 34.05918502807617]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 5.0, 9.0, 12.0, 11.0, 11.0, 8.0, 15.0, 18.0, 20.0, 25.0, 29.0, 28.0, 30.0, 24.0, 39.0, 39.0, 42.0, 37.0, 44.0, 50.0, 40.0, 31.0, 48.0, 42.0, 35.0, 34.0, 30.0, 26.0, 29.0, 19.0, 20.0, 16.0, 12.0, 21.0, 19.0, 10.0, 11.0, 12.0, 8.0, 9.0, 3.0, 4.0, 4.0, 1.0, 3.0, 4.0, 0.0, 2.0, 3.0], "bins": [-3.267578125, -3.173675537109375, -3.07977294921875, -2.985870361328125, -2.8919677734375, -2.798065185546875, -2.70416259765625, -2.610260009765625, -2.516357421875, -2.422454833984375, -2.32855224609375, -2.234649658203125, -2.1407470703125, -2.046844482421875, -1.95294189453125, -1.859039306640625, -1.76513671875, -1.671234130859375, -1.57733154296875, -1.483428955078125, -1.3895263671875, -1.295623779296875, -1.20172119140625, -1.107818603515625, -1.013916015625, -0.920013427734375, -0.82611083984375, -0.732208251953125, -0.6383056640625, -0.544403076171875, -0.45050048828125, -0.356597900390625, -0.2626953125, -0.168792724609375, -0.07489013671875, 0.019012451171875, 0.1129150390625, 0.206817626953125, 0.30072021484375, 0.394622802734375, 0.488525390625, 0.582427978515625, 0.67633056640625, 0.770233154296875, 0.8641357421875, 0.958038330078125, 1.05194091796875, 1.145843505859375, 1.23974609375, 1.333648681640625, 1.42755126953125, 1.521453857421875, 1.6153564453125, 1.709259033203125, 1.80316162109375, 1.897064208984375, 1.990966796875, 2.084869384765625, 2.17877197265625, 2.272674560546875, 2.3665771484375, 2.460479736328125, 2.55438232421875, 2.648284912109375, 2.7421875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0, 5.0, 6.0, 7.0, 8.0, 8.0, 10.0, 8.0, 11.0, 11.0, 18.0, 23.0, 25.0, 35.0, 37.0, 55.0, 104.0, 165.0, 503.0, 1977.0, 14027.0, 195908.0, 3007820.0, 923884.0, 43475.0, 4604.0, 869.0, 246.0, 126.0, 63.0, 49.0, 33.0, 25.0, 13.0, 21.0, 14.0, 16.0, 12.0, 11.0, 10.0, 10.0, 5.0, 5.0, 7.0, 1.0, 3.0, 5.0, 2.0, 4.0, 0.0, 3.0], "bins": [-15.25, -14.8232421875, -14.396484375, -13.9697265625, -13.54296875, -13.1162109375, -12.689453125, -12.2626953125, -11.8359375, -11.4091796875, -10.982421875, -10.5556640625, -10.12890625, -9.7021484375, -9.275390625, -8.8486328125, -8.421875, -7.9951171875, -7.568359375, -7.1416015625, -6.71484375, -6.2880859375, -5.861328125, -5.4345703125, -5.0078125, -4.5810546875, -4.154296875, -3.7275390625, -3.30078125, -2.8740234375, -2.447265625, -2.0205078125, -1.59375, -1.1669921875, -0.740234375, -0.3134765625, 0.11328125, 0.5400390625, 0.966796875, 1.3935546875, 1.8203125, 2.2470703125, 2.673828125, 3.1005859375, 3.52734375, 3.9541015625, 4.380859375, 4.8076171875, 5.234375, 5.6611328125, 6.087890625, 6.5146484375, 6.94140625, 7.3681640625, 7.794921875, 8.2216796875, 8.6484375, 9.0751953125, 9.501953125, 9.9287109375, 10.35546875, 10.7822265625, 11.208984375, 11.6357421875, 12.0625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 8.0, 10.0, 10.0, 8.0, 22.0, 35.0, 32.0, 57.0, 85.0, 120.0, 152.0, 246.0, 319.0, 436.0, 486.0, 510.0, 391.0, 349.0, 251.0, 162.0, 122.0, 81.0, 56.0, 43.0, 27.0, 28.0, 13.0, 8.0, 2.0, 4.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.3046875, -11.9718017578125, -11.638916015625, -11.3060302734375, -10.97314453125, -10.6402587890625, -10.307373046875, -9.9744873046875, -9.6416015625, -9.3087158203125, -8.975830078125, -8.6429443359375, -8.31005859375, -7.9771728515625, -7.644287109375, -7.3114013671875, -6.978515625, -6.6456298828125, -6.312744140625, -5.9798583984375, -5.64697265625, -5.3140869140625, -4.981201171875, -4.6483154296875, -4.3154296875, -3.9825439453125, -3.649658203125, -3.3167724609375, -2.98388671875, -2.6510009765625, -2.318115234375, -1.9852294921875, -1.65234375, -1.3194580078125, -0.986572265625, -0.6536865234375, -0.32080078125, 0.0120849609375, 0.344970703125, 0.6778564453125, 1.0107421875, 1.3436279296875, 1.676513671875, 2.0093994140625, 2.34228515625, 2.6751708984375, 3.008056640625, 3.3409423828125, 3.673828125, 4.0067138671875, 4.339599609375, 4.6724853515625, 5.00537109375, 5.3382568359375, 5.671142578125, 6.0040283203125, 6.3369140625, 6.6697998046875, 7.002685546875, 7.3355712890625, 7.66845703125, 8.0013427734375, 8.334228515625, 8.6671142578125, 9.0]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 6.0, 7.0, 11.0, 14.0, 17.0, 18.0, 36.0, 61.0, 77.0, 127.0, 206.0, 395.0, 904.0, 3292.0, 27319.0, 506738.0, 3282627.0, 347868.0, 20192.0, 2721.0, 795.0, 361.0, 168.0, 118.0, 78.0, 33.0, 32.0, 17.0, 12.0, 11.0, 8.0, 4.0, 5.0, 2.0, 4.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.453125, -16.739013671875, -16.02490234375, -15.310791015625, -14.5966796875, -13.882568359375, -13.16845703125, -12.454345703125, -11.740234375, -11.026123046875, -10.31201171875, -9.597900390625, -8.8837890625, -8.169677734375, -7.45556640625, -6.741455078125, -6.02734375, -5.313232421875, -4.59912109375, -3.885009765625, -3.1708984375, -2.456787109375, -1.74267578125, -1.028564453125, -0.314453125, 0.399658203125, 1.11376953125, 1.827880859375, 2.5419921875, 3.256103515625, 3.97021484375, 4.684326171875, 5.3984375, 6.112548828125, 6.82666015625, 7.540771484375, 8.2548828125, 8.968994140625, 9.68310546875, 10.397216796875, 11.111328125, 11.825439453125, 12.53955078125, 13.253662109375, 13.9677734375, 14.681884765625, 15.39599609375, 16.110107421875, 16.82421875, 17.538330078125, 18.25244140625, 18.966552734375, 19.6806640625, 20.394775390625, 21.10888671875, 21.822998046875, 22.537109375, 23.251220703125, 23.96533203125, 24.679443359375, 25.3935546875, 26.107666015625, 26.82177734375, 27.535888671875, 28.25]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 10.0, 53.0, 154.0, 264.0, 284.0, 172.0, 56.0, 18.0, 2.0, 2.0], "bins": [-215.17364501953125, -211.4653778076172, -207.75711059570312, -204.04884338378906, -200.340576171875, -196.63230895996094, -192.92404174804688, -189.21578979492188, -185.50750732421875, -181.7992401123047, -178.09097290039062, -174.38270568847656, -170.6744384765625, -166.96617126464844, -163.25790405273438, -159.54965209960938, -155.8413848876953, -152.13311767578125, -148.4248504638672, -144.71658325195312, -141.00831604003906, -137.300048828125, -133.59178161621094, -129.88351440429688, -126.17525482177734, -122.46698760986328, -118.75872039794922, -115.05045318603516, -111.34219360351562, -107.63392639160156, -103.9256591796875, -100.21739196777344, -96.50912475585938, -92.80085754394531, -89.09259033203125, -85.38432312011719, -81.67605590820312, -77.96778869628906, -74.25952911376953, -70.55126190185547, -66.8429946899414, -63.134727478027344, -59.42646026611328, -55.718196868896484, -52.00992965698242, -48.30166244506836, -44.59339904785156, -40.8851318359375, -37.17686462402344, -33.468597412109375, -29.760332107543945, -26.052066802978516, -22.343799591064453, -18.63553237915039, -14.927267074584961, -11.219001770019531, -7.510736465454102, -3.8024702072143555, -0.09420394897460938, 3.6140623092651367, 7.322328567504883, 11.030595779418945, 14.738861083984375, 18.447126388549805, 22.155393600463867]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 3.0, 15.0, 12.0, 11.0, 11.0, 13.0, 20.0, 25.0, 31.0, 29.0, 29.0, 30.0, 31.0, 44.0, 44.0, 50.0, 52.0, 33.0, 50.0, 33.0, 38.0, 49.0, 40.0, 35.0, 50.0, 30.0, 19.0, 31.0, 21.0, 16.0, 27.0, 21.0, 8.0, 10.0, 14.0, 5.0, 6.0, 1.0, 5.0, 4.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0], "bins": [-35.61521530151367, -34.57433319091797, -33.533451080322266, -32.49257278442383, -31.451690673828125, -30.410808563232422, -29.36992645263672, -28.329044342041016, -27.288164138793945, -26.247282028198242, -25.206401824951172, -24.16551971435547, -23.124637603759766, -22.083757400512695, -21.042875289916992, -20.001995086669922, -18.96111297607422, -17.920230865478516, -16.879350662231445, -15.838468551635742, -14.797587394714355, -13.756706237792969, -12.715824127197266, -11.674942970275879, -10.634061813354492, -9.593180656433105, -8.552299499511719, -7.511417388916016, -6.470536231994629, -5.429655075073242, -4.388773441314697, -3.3478918075561523, -2.3070106506347656, -1.2661292552947998, -0.22524785995483398, 0.8156335353851318, 1.8565149307250977, 2.8973960876464844, 3.9382777214050293, 4.979159355163574, 6.020040512084961, 7.060921669006348, 8.101802825927734, 9.142684936523438, 10.183566093444824, 11.224447250366211, 12.265329360961914, 13.3062105178833, 14.347091674804688, 15.387972831726074, 16.42885398864746, 17.469736099243164, 18.510616302490234, 19.551498413085938, 20.59238052368164, 21.633262634277344, 22.674142837524414, 23.715024948120117, 24.755905151367188, 25.79678726196289, 26.837669372558594, 27.878549575805664, 28.919431686401367, 29.960311889648438, 31.00119400024414]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 7.0, 5.0, 4.0, 10.0, 12.0, 13.0, 11.0, 20.0, 20.0, 19.0, 15.0, 30.0, 32.0, 39.0, 34.0, 37.0, 47.0, 46.0, 49.0, 52.0, 56.0, 50.0, 35.0, 35.0, 35.0, 30.0, 36.0, 37.0, 23.0, 21.0, 30.0, 27.0, 11.0, 12.0, 16.0, 5.0, 8.0, 6.0, 9.0, 6.0, 3.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39263916015625, -3.2833251953125, -3.17401123046875, -3.064697265625, -2.95538330078125, -2.8460693359375, -2.73675537109375, -2.62744140625, -2.51812744140625, -2.4088134765625, -2.29949951171875, -2.190185546875, -2.08087158203125, -1.9715576171875, -1.86224365234375, -1.7529296875, -1.64361572265625, -1.5343017578125, -1.42498779296875, -1.315673828125, -1.20635986328125, -1.0970458984375, -0.98773193359375, -0.87841796875, -0.76910400390625, -0.6597900390625, -0.55047607421875, -0.441162109375, -0.33184814453125, -0.2225341796875, -0.11322021484375, -0.00390625, 0.10540771484375, 0.2147216796875, 0.32403564453125, 0.433349609375, 0.54266357421875, 0.6519775390625, 0.76129150390625, 0.87060546875, 0.97991943359375, 1.0892333984375, 1.19854736328125, 1.307861328125, 1.41717529296875, 1.5264892578125, 1.63580322265625, 1.7451171875, 1.85443115234375, 1.9637451171875, 2.07305908203125, 2.182373046875, 2.29168701171875, 2.4010009765625, 2.51031494140625, 2.61962890625, 2.72894287109375, 2.8382568359375, 2.94757080078125, 3.056884765625, 3.16619873046875, 3.2755126953125, 3.38482666015625, 3.494140625]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 6.0, 4.0, 5.0, 4.0, 9.0, 8.0, 41.0, 31.0, 46.0, 90.0, 121.0, 193.0, 279.0, 461.0, 813.0, 1230.0, 2053.0, 3447.0, 5543.0, 9546.0, 16394.0, 28446.0, 49717.0, 88671.0, 159010.0, 247651.0, 188434.0, 105725.0, 59559.0, 33763.0, 19333.0, 11296.0, 6481.0, 3911.0, 2304.0, 1420.0, 962.0, 549.0, 363.0, 253.0, 142.0, 93.0, 56.0, 48.0, 27.0, 18.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.27783203125, -0.26813507080078125, -0.2584381103515625, -0.24874114990234375, -0.239044189453125, -0.22934722900390625, -0.2196502685546875, -0.20995330810546875, -0.20025634765625, -0.19055938720703125, -0.1808624267578125, -0.17116546630859375, -0.161468505859375, -0.15177154541015625, -0.1420745849609375, -0.13237762451171875, -0.1226806640625, -0.11298370361328125, -0.1032867431640625, -0.09358978271484375, -0.083892822265625, -0.07419586181640625, -0.0644989013671875, -0.05480194091796875, -0.04510498046875, -0.03540802001953125, -0.0257110595703125, -0.01601409912109375, -0.006317138671875, 0.00337982177734375, 0.0130767822265625, 0.02277374267578125, 0.032470703125, 0.04216766357421875, 0.0518646240234375, 0.06156158447265625, 0.071258544921875, 0.08095550537109375, 0.0906524658203125, 0.10034942626953125, 0.11004638671875, 0.11974334716796875, 0.1294403076171875, 0.13913726806640625, 0.148834228515625, 0.15853118896484375, 0.1682281494140625, 0.17792510986328125, 0.1876220703125, 0.19731903076171875, 0.2070159912109375, 0.21671295166015625, 0.226409912109375, 0.23610687255859375, 0.2458038330078125, 0.25550079345703125, 0.26519775390625, 0.27489471435546875, 0.2845916748046875, 0.29428863525390625, 0.303985595703125, 0.31368255615234375, 0.3233795166015625, 0.33307647705078125, 0.3427734375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 4.0, 15.0, 8.0, 10.0, 14.0, 17.0, 26.0, 20.0, 33.0, 31.0, 26.0, 28.0, 31.0, 41.0, 49.0, 40.0, 48.0, 51.0, 1058.0, 56.0, 42.0, 40.0, 40.0, 35.0, 38.0, 33.0, 26.0, 23.0, 33.0, 20.0, 17.0, 15.0, 8.0, 12.0, 4.0, 10.0, 4.0, 9.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0], "bins": [-2.490234375, -2.419677734375, -2.34912109375, -2.278564453125, -2.2080078125, -2.137451171875, -2.06689453125, -1.996337890625, -1.92578125, -1.855224609375, -1.78466796875, -1.714111328125, -1.6435546875, -1.572998046875, -1.50244140625, -1.431884765625, -1.361328125, -1.290771484375, -1.22021484375, -1.149658203125, -1.0791015625, -1.008544921875, -0.93798828125, -0.867431640625, -0.796875, -0.726318359375, -0.65576171875, -0.585205078125, -0.5146484375, -0.444091796875, -0.37353515625, -0.302978515625, -0.232421875, -0.161865234375, -0.09130859375, -0.020751953125, 0.0498046875, 0.120361328125, 0.19091796875, 0.261474609375, 0.33203125, 0.402587890625, 0.47314453125, 0.543701171875, 0.6142578125, 0.684814453125, 0.75537109375, 0.825927734375, 0.896484375, 0.967041015625, 1.03759765625, 1.108154296875, 1.1787109375, 1.249267578125, 1.31982421875, 1.390380859375, 1.4609375, 1.531494140625, 1.60205078125, 1.672607421875, 1.7431640625, 1.813720703125, 1.88427734375, 1.954833984375, 2.025390625]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 8.0, 9.0, 17.0, 27.0, 31.0, 56.0, 78.0, 123.0, 193.0, 304.0, 452.0, 637.0, 982.0, 1443.0, 2249.0, 3413.0, 5239.0, 7943.0, 12191.0, 18992.0, 28634.0, 43944.0, 66515.0, 99723.0, 144830.0, 1220891.0, 144137.0, 100548.0, 66095.0, 43783.0, 29028.0, 18894.0, 12316.0, 8045.0, 5324.0, 3377.0, 2229.0, 1581.0, 961.0, 633.0, 423.0, 266.0, 186.0, 128.0, 92.0, 62.0, 33.0, 27.0, 13.0, 13.0, 11.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.202392578125, -0.1961650848388672, -0.18993759155273438, -0.18371009826660156, -0.17748260498046875, -0.17125511169433594, -0.16502761840820312, -0.1588001251220703, -0.1525726318359375, -0.1463451385498047, -0.14011764526367188, -0.13389015197753906, -0.12766265869140625, -0.12143516540527344, -0.11520767211914062, -0.10898017883300781, -0.102752685546875, -0.09652519226074219, -0.09029769897460938, -0.08407020568847656, -0.07784271240234375, -0.07161521911621094, -0.06538772583007812, -0.05916023254394531, -0.0529327392578125, -0.04670524597167969, -0.040477752685546875, -0.03425025939941406, -0.02802276611328125, -0.021795272827148438, -0.015567779541015625, -0.009340286254882812, -0.00311279296875, 0.0031147003173828125, 0.009342193603515625, 0.015569686889648438, 0.02179718017578125, 0.028024673461914062, 0.034252166748046875, 0.04047966003417969, 0.0467071533203125, 0.05293464660644531, 0.059162139892578125, 0.06538963317871094, 0.07161712646484375, 0.07784461975097656, 0.08407211303710938, 0.09029960632324219, 0.096527099609375, 0.10275459289550781, 0.10898208618164062, 0.11520957946777344, 0.12143707275390625, 0.12766456604003906, 0.13389205932617188, 0.1401195526123047, 0.1463470458984375, 0.1525745391845703, 0.15880203247070312, 0.16502952575683594, 0.17125701904296875, 0.17748451232910156, 0.18371200561523438, 0.1899394989013672, 0.1961669921875]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 7.0, 5.0, 12.0, 6.0, 11.0, 16.0, 21.0, 18.0, 35.0, 40.0, 42.0, 57.0, 65.0, 60.0, 71.0, 93.0, 66.0, 75.0, 67.0, 50.0, 30.0, 26.0, 24.0, 15.0, 16.0, 22.0, 11.0, 11.0, 4.0, 6.0, 7.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0009241104125976562, -0.0008934289216995239, -0.0008627474308013916, -0.0008320659399032593, -0.000801384449005127, -0.0007707029581069946, -0.0007400214672088623, -0.00070933997631073, -0.0006786584854125977, -0.0006479769945144653, -0.000617295503616333, -0.0005866140127182007, -0.0005559325218200684, -0.000525251030921936, -0.0004945695400238037, -0.0004638880491256714, -0.00043320655822753906, -0.00040252506732940674, -0.0003718435764312744, -0.0003411620855331421, -0.00031048059463500977, -0.00027979910373687744, -0.0002491176128387451, -0.0002184361219406128, -0.00018775463104248047, -0.00015707314014434814, -0.00012639164924621582, -9.57101583480835e-05, -6.502866744995117e-05, -3.434717655181885e-05, -3.6656856536865234e-06, 2.70158052444458e-05, 5.7697296142578125e-05, 8.837878704071045e-05, 0.00011906027793884277, 0.0001497417688369751, 0.00018042325973510742, 0.00021110475063323975, 0.00024178624153137207, 0.0002724677324295044, 0.0003031492233276367, 0.00033383071422576904, 0.00036451220512390137, 0.0003951936960220337, 0.000425875186920166, 0.00045655667781829834, 0.00048723816871643066, 0.000517919659614563, 0.0005486011505126953, 0.0005792826414108276, 0.00060996413230896, 0.0006406456232070923, 0.0006713271141052246, 0.0007020086050033569, 0.0007326900959014893, 0.0007633715867996216, 0.0007940530776977539, 0.0008247345685958862, 0.0008554160594940186, 0.0008860975503921509, 0.0009167790412902832, 0.0009474605321884155, 0.0009781420230865479, 0.0010088235139846802, 0.0010395050048828125]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 6.0, 8.0, 6.0, 11.0, 9.0, 19.0, 16.0, 30.0, 34.0, 40.0, 64.0, 72.0, 99.0, 179.0, 257.0, 432.0, 1152.0, 251001.0, 791714.0, 2006.0, 479.0, 273.0, 187.0, 118.0, 76.0, 68.0, 65.0, 40.0, 18.0, 16.0, 14.0, 5.0, 7.0, 9.0, 10.0, 2.0, 1.0, 7.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0186767578125, -0.018144726753234863, -0.017612695693969727, -0.01708066463470459, -0.016548633575439453, -0.016016602516174316, -0.01548457145690918, -0.014952540397644043, -0.014420509338378906, -0.01388847827911377, -0.013356447219848633, -0.012824416160583496, -0.01229238510131836, -0.011760354042053223, -0.011228322982788086, -0.01069629192352295, -0.010164260864257812, -0.009632229804992676, -0.009100198745727539, -0.008568167686462402, -0.008036136627197266, -0.007504105567932129, -0.006972074508666992, -0.0064400434494018555, -0.005908012390136719, -0.005375981330871582, -0.004843950271606445, -0.004311919212341309, -0.003779888153076172, -0.003247857093811035, -0.0027158260345458984, -0.0021837949752807617, -0.001651763916015625, -0.0011197328567504883, -0.0005877017974853516, -5.5670738220214844e-05, 0.0004763603210449219, 0.0010083913803100586, 0.0015404224395751953, 0.002072453498840332, 0.0026044845581054688, 0.0031365156173706055, 0.003668546676635742, 0.004200577735900879, 0.004732608795166016, 0.005264639854431152, 0.005796670913696289, 0.006328701972961426, 0.0068607330322265625, 0.007392764091491699, 0.007924795150756836, 0.008456826210021973, 0.00898885726928711, 0.009520888328552246, 0.010052919387817383, 0.01058495044708252, 0.011116981506347656, 0.011649012565612793, 0.01218104362487793, 0.012713074684143066, 0.013245105743408203, 0.01377713680267334, 0.014309167861938477, 0.014841198921203613, 0.01537322998046875]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 29.0, 263.0, 560.0, 153.0, 8.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009381965501233935, -0.0008477285737171769, -0.0007572606555186212, -0.0006667927373200655, -0.0005763247609138489, -0.0004858568136114627, -0.00039538886630907655, -0.00030492094811052084, -0.00021445297170430422, -0.00012398502440191805, -3.351707709953189e-05, 5.6950870202854276e-05, 0.00014741881750524044, 0.0002378867648076266, 0.00032835471211001277, 0.0004188226303085685, 0.0005092906067147851, 0.0005997585831210017, 0.0006902265013195574, 0.0007806944195181131, 0.0008711623959243298, 0.0009616303723305464, 0.001052098348736763, 0.0011425662087276578, 0.0012330341851338744, 0.001323502161540091, 0.0014139700215309858, 0.0015044379979372025, 0.001594905974343419, 0.0016853739507496357, 0.0017758419271558523, 0.0018663097871467471, 0.001956777647137642, 0.0020472456235438585, 0.002137713599950075, 0.0022281815763562918, 0.0023186495527625084, 0.0024091172963380814, 0.002499585272744298, 0.0025900532491505146, 0.0026805212255567312, 0.002770989201962948, 0.0028614571783691645, 0.002951925154775381, 0.003042392898350954, 0.0031328608747571707, 0.0032233288511633873, 0.003313796827569604, 0.0034042648039758205, 0.003494732780382037, 0.003585200756788254, 0.0036756687331944704, 0.003766136709600687, 0.00385660445317626, 0.00394707266241312, 0.004037540405988693, 0.004128008149564266, 0.004218475893139839, 0.0043089441023766994, 0.004399411845952272, 0.004489880055189133, 0.004580347798764706, 0.004670816008001566, 0.004761283751577139, 0.004851751960813999]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 4.0, 5.0, 5.0, 5.0, 10.0, 12.0, 10.0, 17.0, 20.0, 19.0, 22.0, 22.0, 26.0, 25.0, 23.0, 33.0, 36.0, 36.0, 43.0, 34.0, 30.0, 25.0, 35.0, 57.0, 32.0, 47.0, 42.0, 39.0, 35.0, 36.0, 36.0, 21.0, 27.0, 21.0, 16.0, 18.0, 12.0, 12.0, 12.0, 12.0, 6.0, 7.0, 5.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.0004127621650695801, -0.00039967894554138184, -0.0003865957260131836, -0.00037351250648498535, -0.0003604292869567871, -0.00034734606742858887, -0.0003342628479003906, -0.0003211796283721924, -0.00030809640884399414, -0.0002950131893157959, -0.00028192996978759766, -0.0002688467502593994, -0.00025576353073120117, -0.00024268031120300293, -0.0002295970916748047, -0.00021651387214660645, -0.0002034306526184082, -0.00019034743309020996, -0.00017726421356201172, -0.00016418099403381348, -0.00015109777450561523, -0.000138014554977417, -0.00012493133544921875, -0.00011184811592102051, -9.876489639282227e-05, -8.568167686462402e-05, -7.259845733642578e-05, -5.951523780822754e-05, -4.64320182800293e-05, -3.3348798751831055e-05, -2.0265579223632812e-05, -7.18235969543457e-06, 5.900859832763672e-06, 1.8984079360961914e-05, 3.2067298889160156e-05, 4.51505184173584e-05, 5.823373794555664e-05, 7.131695747375488e-05, 8.440017700195312e-05, 9.748339653015137e-05, 0.00011056661605834961, 0.00012364983558654785, 0.0001367330551147461, 0.00014981627464294434, 0.00016289949417114258, 0.00017598271369934082, 0.00018906593322753906, 0.0002021491527557373, 0.00021523237228393555, 0.0002283155918121338, 0.00024139881134033203, 0.0002544820308685303, 0.0002675652503967285, 0.00028064846992492676, 0.000293731689453125, 0.00030681490898132324, 0.0003198981285095215, 0.0003329813480377197, 0.00034606456756591797, 0.0003591477870941162, 0.00037223100662231445, 0.0003853142261505127, 0.00039839744567871094, 0.0004114806652069092, 0.0004245638847351074]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 7.0, 5.0, 4.0, 10.0, 12.0, 13.0, 11.0, 20.0, 20.0, 19.0, 15.0, 30.0, 32.0, 39.0, 34.0, 37.0, 47.0, 46.0, 49.0, 52.0, 56.0, 50.0, 35.0, 35.0, 35.0, 30.0, 36.0, 37.0, 23.0, 21.0, 30.0, 27.0, 11.0, 12.0, 16.0, 5.0, 8.0, 6.0, 9.0, 6.0, 3.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39263916015625, -3.2833251953125, -3.17401123046875, -3.064697265625, -2.95538330078125, -2.8460693359375, -2.73675537109375, -2.62744140625, -2.51812744140625, -2.4088134765625, -2.29949951171875, -2.190185546875, -2.08087158203125, -1.9715576171875, -1.86224365234375, -1.7529296875, -1.64361572265625, -1.5343017578125, -1.42498779296875, -1.315673828125, -1.20635986328125, -1.0970458984375, -0.98773193359375, -0.87841796875, -0.76910400390625, -0.6597900390625, -0.55047607421875, -0.441162109375, -0.33184814453125, -0.2225341796875, -0.11322021484375, -0.00390625, 0.10540771484375, 0.2147216796875, 0.32403564453125, 0.433349609375, 0.54266357421875, 0.6519775390625, 0.76129150390625, 0.87060546875, 0.97991943359375, 1.0892333984375, 1.19854736328125, 1.307861328125, 1.41717529296875, 1.5264892578125, 1.63580322265625, 1.7451171875, 1.85443115234375, 1.9637451171875, 2.07305908203125, 2.182373046875, 2.29168701171875, 2.4010009765625, 2.51031494140625, 2.61962890625, 2.72894287109375, 2.8382568359375, 2.94757080078125, 3.056884765625, 3.16619873046875, 3.2755126953125, 3.38482666015625, 3.494140625]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 2.0, 3.0, 5.0, 10.0, 7.0, 7.0, 12.0, 14.0, 21.0, 32.0, 42.0, 68.0, 94.0, 116.0, 164.0, 237.0, 391.0, 601.0, 1097.0, 2213.0, 5040.0, 12610.0, 38251.0, 157383.0, 498071.0, 245884.0, 56383.0, 17243.0, 6437.0, 2741.0, 1295.0, 725.0, 436.0, 276.0, 165.0, 123.0, 100.0, 75.0, 56.0, 33.0, 22.0, 23.0, 12.0, 12.0, 10.0, 7.0, 6.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0], "bins": [-4.93359375, -4.79119873046875, -4.6488037109375, -4.50640869140625, -4.364013671875, -4.22161865234375, -4.0792236328125, -3.93682861328125, -3.79443359375, -3.65203857421875, -3.5096435546875, -3.36724853515625, -3.224853515625, -3.08245849609375, -2.9400634765625, -2.79766845703125, -2.6552734375, -2.51287841796875, -2.3704833984375, -2.22808837890625, -2.085693359375, -1.94329833984375, -1.8009033203125, -1.65850830078125, -1.51611328125, -1.37371826171875, -1.2313232421875, -1.08892822265625, -0.946533203125, -0.80413818359375, -0.6617431640625, -0.51934814453125, -0.376953125, -0.23455810546875, -0.0921630859375, 0.05023193359375, 0.192626953125, 0.33502197265625, 0.4774169921875, 0.61981201171875, 0.76220703125, 0.90460205078125, 1.0469970703125, 1.18939208984375, 1.331787109375, 1.47418212890625, 1.6165771484375, 1.75897216796875, 1.9013671875, 2.04376220703125, 2.1861572265625, 2.32855224609375, 2.470947265625, 2.61334228515625, 2.7557373046875, 2.89813232421875, 3.04052734375, 3.18292236328125, 3.3253173828125, 3.46771240234375, 3.610107421875, 3.75250244140625, 3.8948974609375, 4.03729248046875, 4.1796875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 7.0, 2.0, 14.0, 6.0, 8.0, 17.0, 18.0, 21.0, 17.0, 35.0, 29.0, 45.0, 48.0, 51.0, 60.0, 104.0, 298.0, 1667.0, 157.0, 68.0, 57.0, 56.0, 38.0, 46.0, 32.0, 23.0, 24.0, 27.0, 20.0, 12.0, 19.0, 9.0, 6.0, 7.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.875, -14.4141845703125, -13.953369140625, -13.4925537109375, -13.03173828125, -12.5709228515625, -12.110107421875, -11.6492919921875, -11.1884765625, -10.7276611328125, -10.266845703125, -9.8060302734375, -9.34521484375, -8.8843994140625, -8.423583984375, -7.9627685546875, -7.501953125, -7.0411376953125, -6.580322265625, -6.1195068359375, -5.65869140625, -5.1978759765625, -4.737060546875, -4.2762451171875, -3.8154296875, -3.3546142578125, -2.893798828125, -2.4329833984375, -1.97216796875, -1.5113525390625, -1.050537109375, -0.5897216796875, -0.12890625, 0.3319091796875, 0.792724609375, 1.2535400390625, 1.71435546875, 2.1751708984375, 2.635986328125, 3.0968017578125, 3.5576171875, 4.0184326171875, 4.479248046875, 4.9400634765625, 5.40087890625, 5.8616943359375, 6.322509765625, 6.7833251953125, 7.244140625, 7.7049560546875, 8.165771484375, 8.6265869140625, 9.08740234375, 9.5482177734375, 10.009033203125, 10.4698486328125, 10.9306640625, 11.3914794921875, 11.852294921875, 12.3131103515625, 12.77392578125, 13.2347412109375, 13.695556640625, 14.1563720703125, 14.6171875]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 3.0, 7.0, 8.0, 8.0, 8.0, 14.0, 21.0, 11.0, 37.0, 31.0, 36.0, 41.0, 60.0, 101.0, 175.0, 333.0, 860.0, 4420.0, 134230.0, 2976153.0, 25836.0, 2106.0, 534.0, 234.0, 126.0, 81.0, 54.0, 34.0, 29.0, 26.0, 19.0, 21.0, 14.0, 12.0, 9.0, 6.0, 4.0, 4.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.0, -20.30712890625, -19.6142578125, -18.92138671875, -18.228515625, -17.53564453125, -16.8427734375, -16.14990234375, -15.45703125, -14.76416015625, -14.0712890625, -13.37841796875, -12.685546875, -11.99267578125, -11.2998046875, -10.60693359375, -9.9140625, -9.22119140625, -8.5283203125, -7.83544921875, -7.142578125, -6.44970703125, -5.7568359375, -5.06396484375, -4.37109375, -3.67822265625, -2.9853515625, -2.29248046875, -1.599609375, -0.90673828125, -0.2138671875, 0.47900390625, 1.171875, 1.86474609375, 2.5576171875, 3.25048828125, 3.943359375, 4.63623046875, 5.3291015625, 6.02197265625, 6.71484375, 7.40771484375, 8.1005859375, 8.79345703125, 9.486328125, 10.17919921875, 10.8720703125, 11.56494140625, 12.2578125, 12.95068359375, 13.6435546875, 14.33642578125, 15.029296875, 15.72216796875, 16.4150390625, 17.10791015625, 17.80078125, 18.49365234375, 19.1865234375, 19.87939453125, 20.572265625, 21.26513671875, 21.9580078125, 22.65087890625, 23.34375]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 3.0, 24.0, 128.0, 354.0, 360.0, 130.0, 16.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.339313507080078, -6.671306610107422, -5.003299713134766, -3.3352928161621094, -1.6672859191894531, 0.000720977783203125, 1.6687278747558594, 3.3367347717285156, 5.004741668701172, 6.672748565673828, 8.340755462646484, 10.00876235961914, 11.676769256591797, 13.344776153564453, 15.01278305053711, 16.680789947509766, 18.348796844482422, 20.016803741455078, 21.684810638427734, 23.35281753540039, 25.020824432373047, 26.688831329345703, 28.35683822631836, 30.024845123291016, 31.692852020263672, 33.36085891723633, 35.028865814208984, 36.69687271118164, 38.3648796081543, 40.03288650512695, 41.70089340209961, 43.368900299072266, 45.03691101074219, 46.704917907714844, 48.3729248046875, 50.040931701660156, 51.70893859863281, 53.37694549560547, 55.044952392578125, 56.71295928955078, 58.38096618652344, 60.048973083496094, 61.71697998046875, 63.384986877441406, 65.05299377441406, 66.72100067138672, 68.38900756835938, 70.05701446533203, 71.72502136230469, 73.39302825927734, 75.06103515625, 76.72904205322266, 78.39704895019531, 80.06505584716797, 81.73306274414062, 83.40106964111328, 85.06907653808594, 86.7370834350586, 88.40509033203125, 90.0730972290039, 91.74110412597656, 93.40911102294922, 95.07711791992188, 96.74512481689453, 98.41313171386719]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 6.0, 2.0, 3.0, 6.0, 15.0, 11.0, 12.0, 14.0, 17.0, 15.0, 14.0, 24.0, 22.0, 23.0, 29.0, 35.0, 49.0, 49.0, 31.0, 34.0, 37.0, 40.0, 35.0, 41.0, 48.0, 38.0, 35.0, 36.0, 30.0, 32.0, 34.0, 20.0, 25.0, 21.0, 17.0, 17.0, 12.0, 11.0, 12.0, 12.0, 13.0, 12.0, 5.0, 6.0, 5.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.41712951660156, -36.2373046875, -35.05747604370117, -33.87765121459961, -32.69782257080078, -31.51799774169922, -30.338172912597656, -29.15834617614746, -27.978519439697266, -26.79869270324707, -25.618865966796875, -24.439041137695312, -23.259214401245117, -22.079387664794922, -20.89956283569336, -19.719736099243164, -18.53990936279297, -17.360082626342773, -16.180255889892578, -15.000431060791016, -13.82060432434082, -12.640777587890625, -11.460951805114746, -10.281126022338867, -9.101299285888672, -7.921473026275635, -6.741646766662598, -5.5618205070495605, -4.381994247436523, -3.2021679878234863, -2.022341728210449, -0.8425159454345703, 0.3373069763183594, 1.5171332359313965, 2.6969594955444336, 3.8767857551574707, 5.056612014770508, 6.236438274383545, 7.416264533996582, 8.596090316772461, 9.775917053222656, 10.955743789672852, 12.13556957244873, 13.31539535522461, 14.495222091674805, 15.675048828125, 16.854873657226562, 18.034700393676758, 19.214527130126953, 20.39435386657715, 21.574180603027344, 22.754005432128906, 23.9338321685791, 25.113658905029297, 26.29348373413086, 27.473310470581055, 28.65313720703125, 29.832963943481445, 31.01279067993164, 32.1926155090332, 33.37244415283203, 34.552268981933594, 35.732093811035156, 36.91191864013672, 38.09174728393555]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 3.0, 8.0, 5.0, 6.0, 12.0, 8.0, 20.0, 13.0, 18.0, 25.0, 19.0, 25.0, 33.0, 35.0, 30.0, 44.0, 42.0, 41.0, 55.0, 46.0, 54.0, 48.0, 51.0, 28.0, 35.0, 34.0, 30.0, 31.0, 36.0, 21.0, 22.0, 23.0, 15.0, 15.0, 11.0, 13.0, 4.0, 12.0, 9.0, 2.0, 2.0, 5.0, 5.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.591796875, -3.4798583984375, -3.367919921875, -3.2559814453125, -3.14404296875, -3.0321044921875, -2.920166015625, -2.8082275390625, -2.6962890625, -2.5843505859375, -2.472412109375, -2.3604736328125, -2.24853515625, -2.1365966796875, -2.024658203125, -1.9127197265625, -1.80078125, -1.6888427734375, -1.576904296875, -1.4649658203125, -1.35302734375, -1.2410888671875, -1.129150390625, -1.0172119140625, -0.9052734375, -0.7933349609375, -0.681396484375, -0.5694580078125, -0.45751953125, -0.3455810546875, -0.233642578125, -0.1217041015625, -0.009765625, 0.1021728515625, 0.214111328125, 0.3260498046875, 0.43798828125, 0.5499267578125, 0.661865234375, 0.7738037109375, 0.8857421875, 0.9976806640625, 1.109619140625, 1.2215576171875, 1.33349609375, 1.4454345703125, 1.557373046875, 1.6693115234375, 1.78125, 1.8931884765625, 2.005126953125, 2.1170654296875, 2.22900390625, 2.3409423828125, 2.452880859375, 2.5648193359375, 2.6767578125, 2.7886962890625, 2.900634765625, 3.0125732421875, 3.12451171875, 3.2364501953125, 3.348388671875, 3.4603271484375, 3.572265625]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 0.0, 3.0, 2.0, 5.0, 6.0, 4.0, 10.0, 11.0, 10.0, 14.0, 19.0, 13.0, 35.0, 51.0, 45.0, 77.0, 131.0, 275.0, 614.0, 1533.0, 4248.0, 13649.0, 49486.0, 235269.0, 1249609.0, 1989961.0, 517880.0, 96544.0, 23778.0, 6924.0, 2316.0, 857.0, 387.0, 173.0, 94.0, 61.0, 44.0, 29.0, 23.0, 23.0, 17.0, 13.0, 7.0, 6.0, 7.0, 7.0, 5.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0], "bins": [-7.41015625, -7.1871337890625, -6.964111328125, -6.7410888671875, -6.51806640625, -6.2950439453125, -6.072021484375, -5.8489990234375, -5.6259765625, -5.4029541015625, -5.179931640625, -4.9569091796875, -4.73388671875, -4.5108642578125, -4.287841796875, -4.0648193359375, -3.841796875, -3.6187744140625, -3.395751953125, -3.1727294921875, -2.94970703125, -2.7266845703125, -2.503662109375, -2.2806396484375, -2.0576171875, -1.8345947265625, -1.611572265625, -1.3885498046875, -1.16552734375, -0.9425048828125, -0.719482421875, -0.4964599609375, -0.2734375, -0.0504150390625, 0.172607421875, 0.3956298828125, 0.61865234375, 0.8416748046875, 1.064697265625, 1.2877197265625, 1.5107421875, 1.7337646484375, 1.956787109375, 2.1798095703125, 2.40283203125, 2.6258544921875, 2.848876953125, 3.0718994140625, 3.294921875, 3.5179443359375, 3.740966796875, 3.9639892578125, 4.18701171875, 4.4100341796875, 4.633056640625, 4.8560791015625, 5.0791015625, 5.3021240234375, 5.525146484375, 5.7481689453125, 5.97119140625, 6.1942138671875, 6.417236328125, 6.6402587890625, 6.86328125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 10.0, 8.0, 14.0, 19.0, 34.0, 57.0, 99.0, 153.0, 263.0, 411.0, 573.0, 696.0, 594.0, 421.0, 277.0, 176.0, 107.0, 71.0, 36.0, 29.0, 16.0, 5.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.0078125, -13.5494384765625, -13.091064453125, -12.6326904296875, -12.17431640625, -11.7159423828125, -11.257568359375, -10.7991943359375, -10.3408203125, -9.8824462890625, -9.424072265625, -8.9656982421875, -8.50732421875, -8.0489501953125, -7.590576171875, -7.1322021484375, -6.673828125, -6.2154541015625, -5.757080078125, -5.2987060546875, -4.84033203125, -4.3819580078125, -3.923583984375, -3.4652099609375, -3.0068359375, -2.5484619140625, -2.090087890625, -1.6317138671875, -1.17333984375, -0.7149658203125, -0.256591796875, 0.2017822265625, 0.66015625, 1.1185302734375, 1.576904296875, 2.0352783203125, 2.49365234375, 2.9520263671875, 3.410400390625, 3.8687744140625, 4.3271484375, 4.7855224609375, 5.243896484375, 5.7022705078125, 6.16064453125, 6.6190185546875, 7.077392578125, 7.5357666015625, 7.994140625, 8.4525146484375, 8.910888671875, 9.3692626953125, 9.82763671875, 10.2860107421875, 10.744384765625, 11.2027587890625, 11.6611328125, 12.1195068359375, 12.577880859375, 13.0362548828125, 13.49462890625, 13.9530029296875, 14.411376953125, 14.8697509765625, 15.328125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 5.0, 6.0, 19.0, 28.0, 37.0, 80.0, 131.0, 314.0, 624.0, 2199.0, 17017.0, 427828.0, 3471873.0, 259122.0, 12025.0, 1840.0, 561.0, 271.0, 138.0, 73.0, 39.0, 30.0, 9.0, 5.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-33.34375, -32.51904296875, -31.6943359375, -30.86962890625, -30.044921875, -29.22021484375, -28.3955078125, -27.57080078125, -26.74609375, -25.92138671875, -25.0966796875, -24.27197265625, -23.447265625, -22.62255859375, -21.7978515625, -20.97314453125, -20.1484375, -19.32373046875, -18.4990234375, -17.67431640625, -16.849609375, -16.02490234375, -15.2001953125, -14.37548828125, -13.55078125, -12.72607421875, -11.9013671875, -11.07666015625, -10.251953125, -9.42724609375, -8.6025390625, -7.77783203125, -6.953125, -6.12841796875, -5.3037109375, -4.47900390625, -3.654296875, -2.82958984375, -2.0048828125, -1.18017578125, -0.35546875, 0.46923828125, 1.2939453125, 2.11865234375, 2.943359375, 3.76806640625, 4.5927734375, 5.41748046875, 6.2421875, 7.06689453125, 7.8916015625, 8.71630859375, 9.541015625, 10.36572265625, 11.1904296875, 12.01513671875, 12.83984375, 13.66455078125, 14.4892578125, 15.31396484375, 16.138671875, 16.96337890625, 17.7880859375, 18.61279296875, 19.4375]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 16.0, 80.0, 192.0, 285.0, 246.0, 144.0, 45.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-200.8099822998047, -196.77439880371094, -192.73883056640625, -188.7032470703125, -184.6676788330078, -180.63209533691406, -176.59652709960938, -172.56094360351562, -168.52536010742188, -164.48977661132812, -160.45420837402344, -156.4186248779297, -152.383056640625, -148.34747314453125, -144.31190490722656, -140.2763214111328, -136.24075317382812, -132.20516967773438, -128.1696014404297, -124.13402557373047, -120.09844970703125, -116.0628662109375, -112.02729034423828, -107.99171447753906, -103.95613861083984, -99.92056274414062, -95.8849868774414, -91.84941101074219, -87.81382751464844, -83.77825927734375, -79.74267578125, -75.70709991455078, -71.6715087890625, -67.63593292236328, -63.60035705566406, -59.56477737426758, -55.52920150756836, -51.49362564086914, -47.458045959472656, -43.42247009277344, -39.386898040771484, -35.351322174072266, -31.315744400024414, -27.280166625976562, -23.244590759277344, -19.209014892578125, -15.173437118530273, -11.137859344482422, -7.102283477783203, -3.066706657409668, 0.9688701629638672, 5.004446983337402, 9.040023803710938, 13.075599670410156, 17.111177444458008, 21.14675521850586, 25.182331085205078, 29.217906951904297, 33.25348663330078, 37.2890625, 41.32463836669922, 45.36021423339844, 49.395790100097656, 53.43136978149414, 57.46694564819336]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 6.0, 10.0, 7.0, 6.0, 11.0, 20.0, 16.0, 17.0, 18.0, 21.0, 23.0, 27.0, 34.0, 29.0, 23.0, 47.0, 42.0, 28.0, 36.0, 41.0, 41.0, 42.0, 39.0, 36.0, 38.0, 29.0, 35.0, 41.0, 31.0, 20.0, 23.0, 26.0, 21.0, 25.0, 15.0, 20.0, 11.0, 8.0, 5.0, 7.0, 5.0, 6.0, 8.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-31.257848739624023, -30.298168182373047, -29.338489532470703, -28.378808975219727, -27.41912841796875, -26.459447860717773, -25.499767303466797, -24.540088653564453, -23.580408096313477, -22.6207275390625, -21.661048889160156, -20.70136833190918, -19.741687774658203, -18.782007217407227, -17.82232666015625, -16.862648010253906, -15.90296745300293, -14.943286895751953, -13.983607292175293, -13.023927688598633, -12.064247131347656, -11.10456657409668, -10.14488697052002, -9.18520736694336, -8.225526809692383, -7.2658467292785645, -6.306166648864746, -5.346486568450928, -4.386806488037109, -3.427126407623291, -2.4674463272094727, -1.5077662467956543, -0.5480842590332031, 0.41159582138061523, 1.3712759017944336, 2.330955982208252, 3.2906360626220703, 4.250316143035889, 5.209996223449707, 6.169676303863525, 7.129356384277344, 8.08903694152832, 9.04871654510498, 10.00839614868164, 10.968076705932617, 11.927757263183594, 12.887436866760254, 13.847116470336914, 14.80679702758789, 15.766477584838867, 16.726158142089844, 17.685836791992188, 18.645517349243164, 19.60519790649414, 20.564876556396484, 21.52455711364746, 22.484237670898438, 23.443918228149414, 24.40359878540039, 25.363277435302734, 26.32295799255371, 27.282638549804688, 28.24231719970703, 29.201997756958008, 30.161678314208984]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 0.0, 3.0, 10.0, 7.0, 4.0, 7.0, 17.0, 9.0, 15.0, 8.0, 18.0, 21.0, 15.0, 29.0, 29.0, 33.0, 28.0, 40.0, 35.0, 48.0, 44.0, 44.0, 52.0, 54.0, 30.0, 40.0, 38.0, 32.0, 34.0, 26.0, 32.0, 26.0, 27.0, 28.0, 21.0, 14.0, 13.0, 13.0, 10.0, 12.0, 2.0, 8.0, 5.0, 4.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.501953125, -3.39495849609375, -3.2879638671875, -3.18096923828125, -3.073974609375, -2.96697998046875, -2.8599853515625, -2.75299072265625, -2.64599609375, -2.53900146484375, -2.4320068359375, -2.32501220703125, -2.218017578125, -2.11102294921875, -2.0040283203125, -1.89703369140625, -1.7900390625, -1.68304443359375, -1.5760498046875, -1.46905517578125, -1.362060546875, -1.25506591796875, -1.1480712890625, -1.04107666015625, -0.93408203125, -0.82708740234375, -0.7200927734375, -0.61309814453125, -0.506103515625, -0.39910888671875, -0.2921142578125, -0.18511962890625, -0.078125, 0.02886962890625, 0.1358642578125, 0.24285888671875, 0.349853515625, 0.45684814453125, 0.5638427734375, 0.67083740234375, 0.77783203125, 0.88482666015625, 0.9918212890625, 1.09881591796875, 1.205810546875, 1.31280517578125, 1.4197998046875, 1.52679443359375, 1.6337890625, 1.74078369140625, 1.8477783203125, 1.95477294921875, 2.061767578125, 2.16876220703125, 2.2757568359375, 2.38275146484375, 2.48974609375, 2.59674072265625, 2.7037353515625, 2.81072998046875, 2.917724609375, 3.02471923828125, 3.1317138671875, 3.23870849609375, 3.345703125]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 5.0, 7.0, 20.0, 14.0, 29.0, 39.0, 52.0, 95.0, 148.0, 221.0, 404.0, 582.0, 895.0, 1463.0, 2526.0, 3995.0, 6577.0, 10588.0, 17263.0, 28523.0, 47270.0, 78990.0, 135476.0, 214812.0, 198055.0, 120544.0, 71397.0, 42419.0, 25533.0, 15619.0, 9640.0, 5898.0, 3635.0, 2195.0, 1405.0, 810.0, 516.0, 338.0, 193.0, 134.0, 67.0, 63.0, 36.0, 21.0, 11.0, 11.0, 7.0, 6.0, 7.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.309814453125, -0.3003959655761719, -0.29097747802734375, -0.2815589904785156, -0.2721405029296875, -0.2627220153808594, -0.25330352783203125, -0.24388504028320312, -0.234466552734375, -0.22504806518554688, -0.21562957763671875, -0.20621109008789062, -0.1967926025390625, -0.18737411499023438, -0.17795562744140625, -0.16853713989257812, -0.15911865234375, -0.14970016479492188, -0.14028167724609375, -0.13086318969726562, -0.1214447021484375, -0.11202621459960938, -0.10260772705078125, -0.09318923950195312, -0.083770751953125, -0.07435226440429688, -0.06493377685546875, -0.055515289306640625, -0.0460968017578125, -0.036678314208984375, -0.02725982666015625, -0.017841339111328125, -0.0084228515625, 0.000995635986328125, 0.01041412353515625, 0.019832611083984375, 0.0292510986328125, 0.038669586181640625, 0.04808807373046875, 0.057506561279296875, 0.066925048828125, 0.07634353637695312, 0.08576202392578125, 0.09518051147460938, 0.1045989990234375, 0.11401748657226562, 0.12343597412109375, 0.13285446166992188, 0.14227294921875, 0.15169143676757812, 0.16110992431640625, 0.17052841186523438, 0.1799468994140625, 0.18936538696289062, 0.19878387451171875, 0.20820236206054688, 0.217620849609375, 0.22703933715820312, 0.23645782470703125, 0.24587631225585938, 0.2552947998046875, 0.2647132873535156, 0.27413177490234375, 0.2835502624511719, 0.29296875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 3.0, 3.0, 7.0, 6.0, 11.0, 7.0, 15.0, 9.0, 18.0, 27.0, 10.0, 22.0, 27.0, 24.0, 40.0, 40.0, 41.0, 54.0, 31.0, 50.0, 1073.0, 52.0, 47.0, 41.0, 48.0, 49.0, 35.0, 41.0, 34.0, 18.0, 28.0, 16.0, 16.0, 16.0, 10.0, 7.0, 15.0, 9.0, 9.0, 4.0, 3.0, 6.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.001953125, -1.927703857421875, -1.85345458984375, -1.779205322265625, -1.7049560546875, -1.630706787109375, -1.55645751953125, -1.482208251953125, -1.407958984375, -1.333709716796875, -1.25946044921875, -1.185211181640625, -1.1109619140625, -1.036712646484375, -0.96246337890625, -0.888214111328125, -0.81396484375, -0.739715576171875, -0.66546630859375, -0.591217041015625, -0.5169677734375, -0.442718505859375, -0.36846923828125, -0.294219970703125, -0.219970703125, -0.145721435546875, -0.07147216796875, 0.002777099609375, 0.0770263671875, 0.151275634765625, 0.22552490234375, 0.299774169921875, 0.3740234375, 0.448272705078125, 0.52252197265625, 0.596771240234375, 0.6710205078125, 0.745269775390625, 0.81951904296875, 0.893768310546875, 0.968017578125, 1.042266845703125, 1.11651611328125, 1.190765380859375, 1.2650146484375, 1.339263916015625, 1.41351318359375, 1.487762451171875, 1.56201171875, 1.636260986328125, 1.71051025390625, 1.784759521484375, 1.8590087890625, 1.933258056640625, 2.00750732421875, 2.081756591796875, 2.156005859375, 2.230255126953125, 2.30450439453125, 2.378753662109375, 2.4530029296875, 2.527252197265625, 2.60150146484375, 2.675750732421875, 2.75]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 6.0, 10.0, 15.0, 15.0, 37.0, 55.0, 81.0, 125.0, 182.0, 306.0, 446.0, 689.0, 1128.0, 1810.0, 2710.0, 4267.0, 6320.0, 10160.0, 15631.0, 25329.0, 40981.0, 68678.0, 114083.0, 176186.0, 1249880.0, 146901.0, 89901.0, 53705.0, 32499.0, 19953.0, 12596.0, 8060.0, 5108.0, 3280.0, 2214.0, 1350.0, 880.0, 533.0, 364.0, 226.0, 168.0, 99.0, 54.0, 36.0, 37.0, 15.0, 11.0, 7.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.2325439453125, -0.22510528564453125, -0.2176666259765625, -0.21022796630859375, -0.202789306640625, -0.19535064697265625, -0.1879119873046875, -0.18047332763671875, -0.17303466796875, -0.16559600830078125, -0.1581573486328125, -0.15071868896484375, -0.143280029296875, -0.13584136962890625, -0.1284027099609375, -0.12096405029296875, -0.113525390625, -0.10608673095703125, -0.0986480712890625, -0.09120941162109375, -0.083770751953125, -0.07633209228515625, -0.0688934326171875, -0.06145477294921875, -0.05401611328125, -0.04657745361328125, -0.0391387939453125, -0.03170013427734375, -0.024261474609375, -0.01682281494140625, -0.0093841552734375, -0.00194549560546875, 0.0054931640625, 0.01293182373046875, 0.0203704833984375, 0.02780914306640625, 0.035247802734375, 0.04268646240234375, 0.0501251220703125, 0.05756378173828125, 0.06500244140625, 0.07244110107421875, 0.0798797607421875, 0.08731842041015625, 0.094757080078125, 0.10219573974609375, 0.1096343994140625, 0.11707305908203125, 0.12451171875, 0.13195037841796875, 0.1393890380859375, 0.14682769775390625, 0.154266357421875, 0.16170501708984375, 0.1691436767578125, 0.17658233642578125, 0.18402099609375, 0.19145965576171875, 0.1988983154296875, 0.20633697509765625, 0.213775634765625, 0.22121429443359375, 0.2286529541015625, 0.23609161376953125, 0.2435302734375]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 7.0, 5.0, 5.0, 10.0, 11.0, 10.0, 14.0, 20.0, 15.0, 25.0, 23.0, 32.0, 26.0, 34.0, 46.0, 63.0, 65.0, 43.0, 55.0, 55.0, 52.0, 47.0, 51.0, 46.0, 48.0, 25.0, 23.0, 31.0, 22.0, 18.0, 20.0, 13.0, 9.0, 7.0, 5.0, 4.0, 8.0, 5.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006999969482421875, -0.0006755143404006958, -0.0006510317325592041, -0.0006265491247177124, -0.0006020665168762207, -0.000577583909034729, -0.0005531013011932373, -0.0005286186933517456, -0.0005041360855102539, -0.0004796534776687622, -0.0004551708698272705, -0.0004306882619857788, -0.0004062056541442871, -0.0003817230463027954, -0.0003572404384613037, -0.000332757830619812, -0.0003082752227783203, -0.0002837926149368286, -0.0002593100070953369, -0.00023482739925384521, -0.00021034479141235352, -0.00018586218357086182, -0.00016137957572937012, -0.00013689696788787842, -0.00011241436004638672, -8.793175220489502e-05, -6.344914436340332e-05, -3.896653652191162e-05, -1.4483928680419922e-05, 9.998679161071777e-06, 3.4481287002563477e-05, 5.8963894844055176e-05, 8.344650268554688e-05, 0.00010792911052703857, 0.00013241171836853027, 0.00015689432621002197, 0.00018137693405151367, 0.00020585954189300537, 0.00023034214973449707, 0.00025482475757598877, 0.00027930736541748047, 0.00030378997325897217, 0.00032827258110046387, 0.00035275518894195557, 0.00037723779678344727, 0.00040172040462493896, 0.00042620301246643066, 0.00045068562030792236, 0.00047516822814941406, 0.0004996508359909058, 0.0005241334438323975, 0.0005486160516738892, 0.0005730986595153809, 0.0005975812673568726, 0.0006220638751983643, 0.000646546483039856, 0.0006710290908813477, 0.0006955116987228394, 0.0007199943065643311, 0.0007444769144058228, 0.0007689595222473145, 0.0007934421300888062, 0.0008179247379302979, 0.0008424073457717896, 0.0008668899536132812]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 6.0, 5.0, 8.0, 4.0, 10.0, 22.0, 25.0, 29.0, 24.0, 43.0, 41.0, 70.0, 90.0, 103.0, 141.0, 201.0, 229.0, 348.0, 723.0, 7438.0, 782059.0, 252528.0, 2568.0, 562.0, 320.0, 216.0, 151.0, 109.0, 100.0, 101.0, 69.0, 47.0, 26.0, 35.0, 29.0, 15.0, 21.0, 9.0, 7.0, 5.0, 7.0, 4.0, 2.0, 6.0, 2.0, 0.0, 3.0, 1.0], "bins": [-0.01459503173828125, -0.014198899269104004, -0.013802766799926758, -0.013406634330749512, -0.013010501861572266, -0.01261436939239502, -0.012218236923217773, -0.011822104454040527, -0.011425971984863281, -0.011029839515686035, -0.010633707046508789, -0.010237574577331543, -0.009841442108154297, -0.00944530963897705, -0.009049177169799805, -0.008653044700622559, -0.008256912231445312, -0.007860779762268066, -0.00746464729309082, -0.007068514823913574, -0.006672382354736328, -0.006276249885559082, -0.005880117416381836, -0.00548398494720459, -0.005087852478027344, -0.004691720008850098, -0.0042955875396728516, -0.0038994550704956055, -0.0035033226013183594, -0.0031071901321411133, -0.002711057662963867, -0.002314925193786621, -0.001918792724609375, -0.001522660255432129, -0.0011265277862548828, -0.0007303953170776367, -0.0003342628479003906, 6.186962127685547e-05, 0.00045800209045410156, 0.0008541345596313477, 0.0012502670288085938, 0.0016463994979858398, 0.002042531967163086, 0.002438664436340332, 0.002834796905517578, 0.0032309293746948242, 0.0036270618438720703, 0.004023194313049316, 0.0044193267822265625, 0.004815459251403809, 0.005211591720581055, 0.005607724189758301, 0.006003856658935547, 0.006399989128112793, 0.006796121597290039, 0.007192254066467285, 0.007588386535644531, 0.007984519004821777, 0.008380651473999023, 0.00877678394317627, 0.009172916412353516, 0.009569048881530762, 0.009965181350708008, 0.010361313819885254, 0.0107574462890625]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 14.0, 50.0, 178.0, 325.0, 307.0, 109.0, 25.0, 8.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013764874311164021, -0.0013186584692448378, -0.0012608295073732734, -0.001203000545501709, -0.0011451717000454664, -0.0010873426217585802, -0.0010295137763023376, -0.0009716848144307733, -0.0009138558525592089, -0.0008560268906876445, -0.0007981979288160801, -0.0007403690251521766, -0.0006825400632806122, -0.0006247111014090478, -0.0005668821977451444, -0.00050905323587358, -0.0004512242740020156, -0.0003933953121304512, -0.00033556637936271727, -0.00027773744659498334, -0.00021990848472341895, -0.00016207952285185456, -0.00010425059008412063, -4.64216573163867e-05, 1.1407304555177689e-05, 6.923625187482685e-05, 0.000127065199194476, 0.00018489414651412517, 0.00024272309383377433, 0.0003005520557053387, 0.00035838098847307265, 0.0004162099212408066, 0.0004740389995276928, 0.0005318679613992572, 0.0005896969232708216, 0.000647525826934725, 0.0007053547888062894, 0.0007631837506778538, 0.0008210126543417573, 0.0008788416162133217, 0.0009366705780848861, 0.0009944995399564505, 0.0010523285018280149, 0.0011101574636995792, 0.0011679863091558218, 0.001225815387442708, 0.0012836442328989506, 0.001341473194770515, 0.0013993021566420794, 0.0014571311185136437, 0.0015149600803852081, 0.0015727890422567725, 0.001630618004128337, 0.0016884468495845795, 0.0017462758114561439, 0.0018041047733277082, 0.0018619337351992726, 0.001919762697070837, 0.0019775915425270796, 0.002035420620813966, 0.0020932494662702084, 0.0021510785445570946, 0.002208907390013337, 0.0022667362354695797, 0.002324565313756466]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 6.0, 6.0, 9.0, 12.0, 10.0, 15.0, 21.0, 28.0, 22.0, 32.0, 44.0, 41.0, 39.0, 35.0, 36.0, 39.0, 42.0, 46.0, 44.0, 37.0, 43.0, 41.0, 42.0, 35.0, 32.0, 34.0, 29.0, 31.0, 28.0, 22.0, 22.0, 13.0, 13.0, 16.0, 16.0, 6.0, 7.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004877448081970215, -0.00047342386096715927, -0.00045910291373729706, -0.00044478196650743484, -0.00043046101927757263, -0.0004161400720477104, -0.0004018191248178482, -0.000387498177587986, -0.0003731772303581238, -0.00035885628312826157, -0.00034453533589839935, -0.00033021438866853714, -0.0003158934414386749, -0.0003015724942088127, -0.0002872515469789505, -0.0002729305997490883, -0.0002586096525192261, -0.00024428870528936386, -0.00022996775805950165, -0.00021564681082963943, -0.00020132586359977722, -0.000187004916369915, -0.0001726839691400528, -0.00015836302191019058, -0.00014404207468032837, -0.00012972112745046616, -0.00011540018022060394, -0.00010107923299074173, -8.675828576087952e-05, -7.24373385310173e-05, -5.811639130115509e-05, -4.379544407129288e-05, -2.9474496841430664e-05, -1.5153549611568451e-05, -8.326023817062378e-07, 1.3488344848155975e-05, 2.780929207801819e-05, 4.21302393078804e-05, 5.6451186537742615e-05, 7.077213376760483e-05, 8.509308099746704e-05, 9.941402822732925e-05, 0.00011373497545719147, 0.00012805592268705368, 0.0001423768699169159, 0.0001566978171467781, 0.00017101876437664032, 0.00018533971160650253, 0.00019966065883636475, 0.00021398160606622696, 0.00022830255329608917, 0.00024262350052595139, 0.0002569444477558136, 0.0002712653949856758, 0.000285586342215538, 0.00029990728944540024, 0.00031422823667526245, 0.00032854918390512466, 0.0003428701311349869, 0.0003571910783648491, 0.0003715120255947113, 0.0003858329728245735, 0.00040015392005443573, 0.00041447486728429794, 0.00042879581451416016]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 0.0, 3.0, 10.0, 7.0, 4.0, 7.0, 17.0, 9.0, 15.0, 8.0, 18.0, 21.0, 15.0, 29.0, 29.0, 33.0, 28.0, 40.0, 35.0, 48.0, 44.0, 44.0, 52.0, 54.0, 30.0, 40.0, 38.0, 32.0, 34.0, 26.0, 32.0, 26.0, 27.0, 28.0, 21.0, 13.0, 14.0, 13.0, 10.0, 12.0, 2.0, 8.0, 5.0, 4.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.501953125, -3.39495849609375, -3.2879638671875, -3.18096923828125, -3.073974609375, -2.96697998046875, -2.8599853515625, -2.75299072265625, -2.64599609375, -2.53900146484375, -2.4320068359375, -2.32501220703125, -2.218017578125, -2.11102294921875, -2.0040283203125, -1.89703369140625, -1.7900390625, -1.68304443359375, -1.5760498046875, -1.46905517578125, -1.362060546875, -1.25506591796875, -1.1480712890625, -1.04107666015625, -0.93408203125, -0.82708740234375, -0.7200927734375, -0.61309814453125, -0.506103515625, -0.39910888671875, -0.2921142578125, -0.18511962890625, -0.078125, 0.02886962890625, 0.1358642578125, 0.24285888671875, 0.349853515625, 0.45684814453125, 0.5638427734375, 0.67083740234375, 0.77783203125, 0.88482666015625, 0.9918212890625, 1.09881591796875, 1.205810546875, 1.31280517578125, 1.4197998046875, 1.52679443359375, 1.6337890625, 1.74078369140625, 1.8477783203125, 1.95477294921875, 2.061767578125, 2.16876220703125, 2.2757568359375, 2.38275146484375, 2.48974609375, 2.59674072265625, 2.7037353515625, 2.81072998046875, 2.917724609375, 3.02471923828125, 3.1317138671875, 3.23870849609375, 3.345703125]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 8.0, 7.0, 11.0, 15.0, 25.0, 35.0, 43.0, 66.0, 113.0, 119.0, 235.0, 351.0, 563.0, 960.0, 1601.0, 2644.0, 4752.0, 8418.0, 15738.0, 30462.0, 61101.0, 120977.0, 209728.0, 243148.0, 166556.0, 88055.0, 43728.0, 22154.0, 11604.0, 6351.0, 3558.0, 2003.0, 1290.0, 783.0, 473.0, 287.0, 198.0, 116.0, 78.0, 49.0, 38.0, 29.0, 23.0, 14.0, 9.0, 14.0, 7.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.244140625, -2.16998291015625, -2.0958251953125, -2.02166748046875, -1.947509765625, -1.87335205078125, -1.7991943359375, -1.72503662109375, -1.65087890625, -1.57672119140625, -1.5025634765625, -1.42840576171875, -1.354248046875, -1.28009033203125, -1.2059326171875, -1.13177490234375, -1.0576171875, -0.98345947265625, -0.9093017578125, -0.83514404296875, -0.760986328125, -0.68682861328125, -0.6126708984375, -0.53851318359375, -0.46435546875, -0.39019775390625, -0.3160400390625, -0.24188232421875, -0.167724609375, -0.09356689453125, -0.0194091796875, 0.05474853515625, 0.12890625, 0.20306396484375, 0.2772216796875, 0.35137939453125, 0.425537109375, 0.49969482421875, 0.5738525390625, 0.64801025390625, 0.72216796875, 0.79632568359375, 0.8704833984375, 0.94464111328125, 1.018798828125, 1.09295654296875, 1.1671142578125, 1.24127197265625, 1.3154296875, 1.38958740234375, 1.4637451171875, 1.53790283203125, 1.612060546875, 1.68621826171875, 1.7603759765625, 1.83453369140625, 1.90869140625, 1.98284912109375, 2.0570068359375, 2.13116455078125, 2.205322265625, 2.27947998046875, 2.3536376953125, 2.42779541015625, 2.501953125]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 6.0, 7.0, 16.0, 12.0, 7.0, 9.0, 17.0, 12.0, 15.0, 25.0, 16.0, 39.0, 26.0, 29.0, 44.0, 51.0, 44.0, 71.0, 169.0, 1517.0, 351.0, 132.0, 65.0, 40.0, 48.0, 42.0, 34.0, 30.0, 28.0, 21.0, 33.0, 24.0, 16.0, 19.0, 8.0, 6.0, 10.0, 4.0, 3.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.921875, -13.49853515625, -13.0751953125, -12.65185546875, -12.228515625, -11.80517578125, -11.3818359375, -10.95849609375, -10.53515625, -10.11181640625, -9.6884765625, -9.26513671875, -8.841796875, -8.41845703125, -7.9951171875, -7.57177734375, -7.1484375, -6.72509765625, -6.3017578125, -5.87841796875, -5.455078125, -5.03173828125, -4.6083984375, -4.18505859375, -3.76171875, -3.33837890625, -2.9150390625, -2.49169921875, -2.068359375, -1.64501953125, -1.2216796875, -0.79833984375, -0.375, 0.04833984375, 0.4716796875, 0.89501953125, 1.318359375, 1.74169921875, 2.1650390625, 2.58837890625, 3.01171875, 3.43505859375, 3.8583984375, 4.28173828125, 4.705078125, 5.12841796875, 5.5517578125, 5.97509765625, 6.3984375, 6.82177734375, 7.2451171875, 7.66845703125, 8.091796875, 8.51513671875, 8.9384765625, 9.36181640625, 9.78515625, 10.20849609375, 10.6318359375, 11.05517578125, 11.478515625, 11.90185546875, 12.3251953125, 12.74853515625, 13.171875]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 4.0, 3.0, 7.0, 1.0, 5.0, 3.0, 7.0, 11.0, 12.0, 26.0, 20.0, 24.0, 36.0, 28.0, 43.0, 70.0, 64.0, 100.0, 149.0, 290.0, 546.0, 1880.0, 22509.0, 2461568.0, 646594.0, 9282.0, 1187.0, 451.0, 218.0, 148.0, 113.0, 62.0, 49.0, 42.0, 30.0, 20.0, 15.0, 11.0, 17.0, 10.0, 15.0, 14.0, 11.0, 5.0, 4.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.21875, -18.595947265625, -17.97314453125, -17.350341796875, -16.7275390625, -16.104736328125, -15.48193359375, -14.859130859375, -14.236328125, -13.613525390625, -12.99072265625, -12.367919921875, -11.7451171875, -11.122314453125, -10.49951171875, -9.876708984375, -9.25390625, -8.631103515625, -8.00830078125, -7.385498046875, -6.7626953125, -6.139892578125, -5.51708984375, -4.894287109375, -4.271484375, -3.648681640625, -3.02587890625, -2.403076171875, -1.7802734375, -1.157470703125, -0.53466796875, 0.088134765625, 0.7109375, 1.333740234375, 1.95654296875, 2.579345703125, 3.2021484375, 3.824951171875, 4.44775390625, 5.070556640625, 5.693359375, 6.316162109375, 6.93896484375, 7.561767578125, 8.1845703125, 8.807373046875, 9.43017578125, 10.052978515625, 10.67578125, 11.298583984375, 11.92138671875, 12.544189453125, 13.1669921875, 13.789794921875, 14.41259765625, 15.035400390625, 15.658203125, 16.281005859375, 16.90380859375, 17.526611328125, 18.1494140625, 18.772216796875, 19.39501953125, 20.017822265625, 20.640625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [3.0, 25.0, 181.0, 464.0, 299.0, 39.0, 7.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.677033424377441, -6.325624465942383, -3.974215507507324, -1.6228065490722656, 0.728602409362793, 3.0800113677978516, 5.43142032623291, 7.782828330993652, 10.134238243103027, 12.485647201538086, 14.837056159973145, 17.188465118408203, 19.539875030517578, 21.89128303527832, 24.242691040039062, 26.594100952148438, 28.945510864257812, 31.296920776367188, 33.64833068847656, 35.99973678588867, 38.35114669799805, 40.70255661010742, 43.05396270751953, 45.405372619628906, 47.75678253173828, 50.108192443847656, 52.45960235595703, 54.81100845336914, 57.162418365478516, 59.51382827758789, 61.865234375, 64.21664428710938, 66.56805419921875, 68.91946411132812, 71.2708740234375, 73.62228393554688, 75.97369384765625, 78.3250961303711, 80.67650604248047, 83.02791595458984, 85.37932586669922, 87.7307357788086, 90.08214569091797, 92.43355560302734, 94.78495788574219, 97.13636779785156, 99.48777770996094, 101.83918762207031, 104.19059753417969, 106.54200744628906, 108.89341735839844, 111.24482727050781, 113.59623718261719, 115.94763946533203, 118.2990493774414, 120.65045928955078, 123.00186920166016, 125.35327911376953, 127.7046890258789, 130.05609130859375, 132.40750122070312, 134.7589111328125, 137.11032104492188, 139.46173095703125, 141.81314086914062]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 4.0, 2.0, 1.0, 3.0, 2.0, 9.0, 13.0, 11.0, 11.0, 19.0, 20.0, 32.0, 22.0, 27.0, 41.0, 45.0, 39.0, 42.0, 47.0, 50.0, 44.0, 45.0, 35.0, 46.0, 39.0, 42.0, 36.0, 42.0, 36.0, 35.0, 23.0, 21.0, 24.0, 17.0, 19.0, 11.0, 11.0, 12.0, 13.0, 3.0, 5.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.44906234741211, -41.07564926147461, -39.70223617553711, -38.32882308959961, -36.955413818359375, -35.582000732421875, -34.208587646484375, -32.835174560546875, -31.461761474609375, -30.088348388671875, -28.714935302734375, -27.341524124145508, -25.968111038208008, -24.594697952270508, -23.22128677368164, -21.84787368774414, -20.47446060180664, -19.10104751586914, -17.72763442993164, -16.354223251342773, -14.980810165405273, -13.607397079467773, -12.23398494720459, -10.860572814941406, -9.487159729003906, -8.113746643066406, -6.740334510803223, -5.366921901702881, -3.993509292602539, -2.6200966835021973, -1.2466840744018555, 0.12672805786132812, 1.5001449584960938, 2.8735575675964355, 4.246970176696777, 5.620382785797119, 6.993795394897461, 8.367208480834961, 9.740620613098145, 11.114032745361328, 12.487445831298828, 13.860858917236328, 15.234271049499512, 16.607683181762695, 17.981096267700195, 19.354509353637695, 20.727920532226562, 22.101333618164062, 23.474746704101562, 24.848159790039062, 26.221572875976562, 27.59498405456543, 28.96839714050293, 30.34181022644043, 31.715221405029297, 33.0886344909668, 34.4620475769043, 35.8354606628418, 37.2088737487793, 38.5822868347168, 39.95569610595703, 41.32910919189453, 42.70252227783203, 44.07593536376953, 45.44934844970703]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 8.0, 4.0, 6.0, 6.0, 6.0, 10.0, 6.0, 11.0, 6.0, 11.0, 12.0, 17.0, 12.0, 29.0, 25.0, 24.0, 26.0, 32.0, 30.0, 52.0, 36.0, 38.0, 46.0, 41.0, 47.0, 41.0, 47.0, 37.0, 34.0, 36.0, 28.0, 27.0, 35.0, 23.0, 23.0, 23.0, 22.0, 17.0, 15.0, 5.0, 7.0, 13.0, 7.0, 6.0, 5.0, 4.0, 6.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-3.66015625, -3.550079345703125, -3.44000244140625, -3.329925537109375, -3.2198486328125, -3.109771728515625, -2.99969482421875, -2.889617919921875, -2.779541015625, -2.669464111328125, -2.55938720703125, -2.449310302734375, -2.3392333984375, -2.229156494140625, -2.11907958984375, -2.009002685546875, -1.89892578125, -1.788848876953125, -1.67877197265625, -1.568695068359375, -1.4586181640625, -1.348541259765625, -1.23846435546875, -1.128387451171875, -1.018310546875, -0.908233642578125, -0.79815673828125, -0.688079833984375, -0.5780029296875, -0.467926025390625, -0.35784912109375, -0.247772216796875, -0.1376953125, -0.027618408203125, 0.08245849609375, 0.192535400390625, 0.3026123046875, 0.412689208984375, 0.52276611328125, 0.632843017578125, 0.742919921875, 0.852996826171875, 0.96307373046875, 1.073150634765625, 1.1832275390625, 1.293304443359375, 1.40338134765625, 1.513458251953125, 1.62353515625, 1.733612060546875, 1.84368896484375, 1.953765869140625, 2.0638427734375, 2.173919677734375, 2.28399658203125, 2.394073486328125, 2.504150390625, 2.614227294921875, 2.72430419921875, 2.834381103515625, 2.9444580078125, 3.054534912109375, 3.16461181640625, 3.274688720703125, 3.384765625]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 6.0, 3.0, 5.0, 7.0, 6.0, 6.0, 6.0, 12.0, 12.0, 12.0, 19.0, 18.0, 21.0, 33.0, 47.0, 77.0, 106.0, 191.0, 340.0, 744.0, 2228.0, 7932.0, 33319.0, 180995.0, 1151332.0, 2170498.0, 536932.0, 84929.0, 17521.0, 4367.0, 1386.0, 486.0, 235.0, 121.0, 93.0, 57.0, 41.0, 21.0, 18.0, 27.0, 7.0, 13.0, 13.0, 10.0, 5.0, 1.0, 14.0, 3.0, 4.0, 2.0, 2.0, 6.0, 1.0, 1.0, 2.0, 2.0], "bins": [-8.6484375, -8.38861083984375, -8.1287841796875, -7.86895751953125, -7.609130859375, -7.34930419921875, -7.0894775390625, -6.82965087890625, -6.56982421875, -6.30999755859375, -6.0501708984375, -5.79034423828125, -5.530517578125, -5.27069091796875, -5.0108642578125, -4.75103759765625, -4.4912109375, -4.23138427734375, -3.9715576171875, -3.71173095703125, -3.451904296875, -3.19207763671875, -2.9322509765625, -2.67242431640625, -2.41259765625, -2.15277099609375, -1.8929443359375, -1.63311767578125, -1.373291015625, -1.11346435546875, -0.8536376953125, -0.59381103515625, -0.333984375, -0.07415771484375, 0.1856689453125, 0.44549560546875, 0.705322265625, 0.96514892578125, 1.2249755859375, 1.48480224609375, 1.74462890625, 2.00445556640625, 2.2642822265625, 2.52410888671875, 2.783935546875, 3.04376220703125, 3.3035888671875, 3.56341552734375, 3.8232421875, 4.08306884765625, 4.3428955078125, 4.60272216796875, 4.862548828125, 5.12237548828125, 5.3822021484375, 5.64202880859375, 5.90185546875, 6.16168212890625, 6.4215087890625, 6.68133544921875, 6.941162109375, 7.20098876953125, 7.4608154296875, 7.72064208984375, 7.98046875]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 5.0, 4.0, 9.0, 13.0, 18.0, 22.0, 22.0, 27.0, 52.0, 53.0, 77.0, 115.0, 152.0, 206.0, 252.0, 347.0, 444.0, 423.0, 409.0, 334.0, 286.0, 214.0, 155.0, 123.0, 98.0, 58.0, 54.0, 23.0, 23.0, 18.0, 7.0, 13.0, 4.0, 6.0, 6.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.7734375, -8.4654541015625, -8.157470703125, -7.8494873046875, -7.54150390625, -7.2335205078125, -6.925537109375, -6.6175537109375, -6.3095703125, -6.0015869140625, -5.693603515625, -5.3856201171875, -5.07763671875, -4.7696533203125, -4.461669921875, -4.1536865234375, -3.845703125, -3.5377197265625, -3.229736328125, -2.9217529296875, -2.61376953125, -2.3057861328125, -1.997802734375, -1.6898193359375, -1.3818359375, -1.0738525390625, -0.765869140625, -0.4578857421875, -0.14990234375, 0.1580810546875, 0.466064453125, 0.7740478515625, 1.08203125, 1.3900146484375, 1.697998046875, 2.0059814453125, 2.31396484375, 2.6219482421875, 2.929931640625, 3.2379150390625, 3.5458984375, 3.8538818359375, 4.161865234375, 4.4698486328125, 4.77783203125, 5.0858154296875, 5.393798828125, 5.7017822265625, 6.009765625, 6.3177490234375, 6.625732421875, 6.9337158203125, 7.24169921875, 7.5496826171875, 7.857666015625, 8.1656494140625, 8.4736328125, 8.7816162109375, 9.089599609375, 9.3975830078125, 9.70556640625, 10.0135498046875, 10.321533203125, 10.6295166015625, 10.9375]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 9.0, 12.0, 11.0, 15.0, 15.0, 35.0, 29.0, 51.0, 38.0, 69.0, 92.0, 139.0, 194.0, 357.0, 722.0, 1803.0, 6800.0, 33747.0, 219756.0, 1556559.0, 2007388.0, 307990.0, 45637.0, 8523.0, 2255.0, 834.0, 409.0, 206.0, 163.0, 112.0, 75.0, 64.0, 38.0, 32.0, 22.0, 19.0, 14.0, 17.0, 11.0, 7.0, 5.0, 4.0, 6.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-14.65625, -14.199462890625, -13.74267578125, -13.285888671875, -12.8291015625, -12.372314453125, -11.91552734375, -11.458740234375, -11.001953125, -10.545166015625, -10.08837890625, -9.631591796875, -9.1748046875, -8.718017578125, -8.26123046875, -7.804443359375, -7.34765625, -6.890869140625, -6.43408203125, -5.977294921875, -5.5205078125, -5.063720703125, -4.60693359375, -4.150146484375, -3.693359375, -3.236572265625, -2.77978515625, -2.322998046875, -1.8662109375, -1.409423828125, -0.95263671875, -0.495849609375, -0.0390625, 0.417724609375, 0.87451171875, 1.331298828125, 1.7880859375, 2.244873046875, 2.70166015625, 3.158447265625, 3.615234375, 4.072021484375, 4.52880859375, 4.985595703125, 5.4423828125, 5.899169921875, 6.35595703125, 6.812744140625, 7.26953125, 7.726318359375, 8.18310546875, 8.639892578125, 9.0966796875, 9.553466796875, 10.01025390625, 10.467041015625, 10.923828125, 11.380615234375, 11.83740234375, 12.294189453125, 12.7509765625, 13.207763671875, 13.66455078125, 14.121337890625, 14.578125]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 14.0, 19.0, 29.0, 69.0, 64.0, 90.0, 90.0, 107.0, 101.0, 115.0, 89.0, 84.0, 57.0, 31.0, 24.0, 11.0, 9.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.82568359375, -35.23188781738281, -33.63808822631836, -32.04429244995117, -30.45049476623535, -28.85669708251953, -27.26289939880371, -25.66910171508789, -24.075305938720703, -22.481508255004883, -20.887710571289062, -19.293914794921875, -17.700117111206055, -16.106319427490234, -14.512521743774414, -12.91872501373291, -11.324926376342773, -9.731128692626953, -8.13733196258545, -6.543534278869629, -4.949737071990967, -3.3559398651123047, -1.7621421813964844, -0.16834545135498047, 1.4254522323608398, 3.019249439239502, 4.613046646118164, 6.206844329833984, 7.8006415367126465, 9.394438743591309, 10.988236427307129, 12.582033157348633, 14.175830841064453, 15.769628524780273, 17.363426208496094, 18.95722198486328, 20.5510196685791, 22.144817352294922, 23.738615036010742, 25.332412719726562, 26.92620849609375, 28.52000617980957, 30.11380386352539, 31.707599639892578, 33.30139923095703, 34.89519500732422, 36.488990783691406, 38.08279037475586, 39.67658996582031, 41.2703857421875, 42.86418533325195, 44.45798110961914, 46.051780700683594, 47.64557647705078, 49.23937225341797, 50.83317184448242, 52.42696762084961, 54.0207633972168, 55.61456298828125, 57.20835876464844, 58.80215835571289, 60.39595413208008, 61.98975372314453, 63.58354949951172, 65.1773452758789]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 4.0, 7.0, 6.0, 3.0, 7.0, 8.0, 7.0, 11.0, 8.0, 14.0, 19.0, 26.0, 19.0, 27.0, 25.0, 24.0, 30.0, 30.0, 27.0, 33.0, 45.0, 29.0, 35.0, 30.0, 42.0, 42.0, 35.0, 34.0, 35.0, 40.0, 33.0, 36.0, 30.0, 28.0, 17.0, 17.0, 21.0, 22.0, 19.0, 13.0, 13.0, 11.0, 9.0, 9.0, 7.0, 6.0, 3.0, 5.0, 2.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.311731338500977, -27.35302734375, -26.394325256347656, -25.435623168945312, -24.476919174194336, -23.51821517944336, -22.559513092041016, -21.600811004638672, -20.642107009887695, -19.68340301513672, -18.724700927734375, -17.76599884033203, -16.807294845581055, -15.848591804504395, -14.889888763427734, -13.931185722351074, -12.972482681274414, -12.013779640197754, -11.055076599121094, -10.096373558044434, -9.137670516967773, -8.178967475891113, -7.220264434814453, -6.261561393737793, -5.302858352661133, -4.344155311584473, -3.3854522705078125, -2.4267492294311523, -1.4680461883544922, -0.509343147277832, 0.4493598937988281, 1.4080629348754883, 2.3667678833007812, 3.3254709243774414, 4.284173965454102, 5.242877006530762, 6.201580047607422, 7.160283088684082, 8.118986129760742, 9.077689170837402, 10.036392211914062, 10.995095252990723, 11.953798294067383, 12.912501335144043, 13.871204376220703, 14.829907417297363, 15.788610458374023, 16.747314453125, 17.706016540527344, 18.664718627929688, 19.623422622680664, 20.58212661743164, 21.540828704833984, 22.499530792236328, 23.458234786987305, 24.41693878173828, 25.375640869140625, 26.33434295654297, 27.293046951293945, 28.251750946044922, 29.210453033447266, 30.16915512084961, 31.127859115600586, 32.08656311035156, 33.045265197753906]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 4.0, 8.0, 8.0, 12.0, 10.0, 15.0, 14.0, 20.0, 22.0, 29.0, 26.0, 32.0, 44.0, 48.0, 52.0, 54.0, 56.0, 47.0, 46.0, 56.0, 49.0, 29.0, 46.0, 45.0, 34.0, 29.0, 21.0, 20.0, 23.0, 19.0, 15.0, 9.0, 16.0, 9.0, 13.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.75390625, -4.6187744140625, -4.483642578125, -4.3485107421875, -4.21337890625, -4.0782470703125, -3.943115234375, -3.8079833984375, -3.6728515625, -3.5377197265625, -3.402587890625, -3.2674560546875, -3.13232421875, -2.9971923828125, -2.862060546875, -2.7269287109375, -2.591796875, -2.4566650390625, -2.321533203125, -2.1864013671875, -2.05126953125, -1.9161376953125, -1.781005859375, -1.6458740234375, -1.5107421875, -1.3756103515625, -1.240478515625, -1.1053466796875, -0.97021484375, -0.8350830078125, -0.699951171875, -0.5648193359375, -0.4296875, -0.2945556640625, -0.159423828125, -0.0242919921875, 0.11083984375, 0.2459716796875, 0.381103515625, 0.5162353515625, 0.6513671875, 0.7864990234375, 0.921630859375, 1.0567626953125, 1.19189453125, 1.3270263671875, 1.462158203125, 1.5972900390625, 1.732421875, 1.8675537109375, 2.002685546875, 2.1378173828125, 2.27294921875, 2.4080810546875, 2.543212890625, 2.6783447265625, 2.8134765625, 2.9486083984375, 3.083740234375, 3.2188720703125, 3.35400390625, 3.4891357421875, 3.624267578125, 3.7593994140625, 3.89453125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 4.0, 5.0, 9.0, 8.0, 18.0, 33.0, 41.0, 52.0, 93.0, 123.0, 179.0, 292.0, 454.0, 663.0, 1034.0, 1627.0, 2598.0, 4202.0, 6646.0, 10970.0, 17758.0, 29074.0, 47054.0, 76689.0, 126432.0, 196821.0, 197553.0, 126835.0, 77551.0, 47597.0, 29014.0, 17772.0, 10956.0, 6868.0, 4188.0, 2640.0, 1600.0, 1050.0, 734.0, 453.0, 291.0, 198.0, 118.0, 85.0, 54.0, 48.0, 21.0, 26.0, 15.0, 6.0, 4.0, 7.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.292236328125, -0.2824974060058594, -0.27275848388671875, -0.2630195617675781, -0.2532806396484375, -0.24354171752929688, -0.23380279541015625, -0.22406387329101562, -0.214324951171875, -0.20458602905273438, -0.19484710693359375, -0.18510818481445312, -0.1753692626953125, -0.16563034057617188, -0.15589141845703125, -0.14615249633789062, -0.13641357421875, -0.12667465209960938, -0.11693572998046875, -0.10719680786132812, -0.0974578857421875, -0.08771896362304688, -0.07798004150390625, -0.06824111938476562, -0.058502197265625, -0.048763275146484375, -0.03902435302734375, -0.029285430908203125, -0.0195465087890625, -0.009807586669921875, -6.866455078125e-05, 0.009670257568359375, 0.0194091796875, 0.029148101806640625, 0.03888702392578125, 0.048625946044921875, 0.0583648681640625, 0.06810379028320312, 0.07784271240234375, 0.08758163452148438, 0.097320556640625, 0.10705947875976562, 0.11679840087890625, 0.12653732299804688, 0.1362762451171875, 0.14601516723632812, 0.15575408935546875, 0.16549301147460938, 0.17523193359375, 0.18497085571289062, 0.19470977783203125, 0.20444869995117188, 0.2141876220703125, 0.22392654418945312, 0.23366546630859375, 0.24340438842773438, 0.253143310546875, 0.2628822326660156, 0.27262115478515625, 0.2823600769042969, 0.2920989990234375, 0.3018379211425781, 0.31157684326171875, 0.3213157653808594, 0.3310546875]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 10.0, 2.0, 4.0, 17.0, 12.0, 10.0, 24.0, 11.0, 22.0, 22.0, 32.0, 27.0, 37.0, 34.0, 38.0, 55.0, 46.0, 46.0, 1077.0, 56.0, 41.0, 59.0, 43.0, 42.0, 37.0, 56.0, 35.0, 25.0, 18.0, 19.0, 12.0, 13.0, 16.0, 6.0, 5.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.853515625, -2.76409912109375, -2.6746826171875, -2.58526611328125, -2.495849609375, -2.40643310546875, -2.3170166015625, -2.22760009765625, -2.13818359375, -2.04876708984375, -1.9593505859375, -1.86993408203125, -1.780517578125, -1.69110107421875, -1.6016845703125, -1.51226806640625, -1.4228515625, -1.33343505859375, -1.2440185546875, -1.15460205078125, -1.065185546875, -0.97576904296875, -0.8863525390625, -0.79693603515625, -0.70751953125, -0.61810302734375, -0.5286865234375, -0.43927001953125, -0.349853515625, -0.26043701171875, -0.1710205078125, -0.08160400390625, 0.0078125, 0.09722900390625, 0.1866455078125, 0.27606201171875, 0.365478515625, 0.45489501953125, 0.5443115234375, 0.63372802734375, 0.72314453125, 0.81256103515625, 0.9019775390625, 0.99139404296875, 1.080810546875, 1.17022705078125, 1.2596435546875, 1.34906005859375, 1.4384765625, 1.52789306640625, 1.6173095703125, 1.70672607421875, 1.796142578125, 1.88555908203125, 1.9749755859375, 2.06439208984375, 2.15380859375, 2.24322509765625, 2.3326416015625, 2.42205810546875, 2.511474609375, 2.60089111328125, 2.6903076171875, 2.77972412109375, 2.869140625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 7.0, 13.0, 14.0, 33.0, 42.0, 59.0, 90.0, 129.0, 246.0, 389.0, 695.0, 1117.0, 1959.0, 3285.0, 5361.0, 9378.0, 16020.0, 27574.0, 47281.0, 79982.0, 133710.0, 985607.0, 460114.0, 132258.0, 79660.0, 46303.0, 27102.0, 15781.0, 9369.0, 5610.0, 3230.0, 1861.0, 1114.0, 714.0, 378.0, 262.0, 157.0, 80.0, 59.0, 32.0, 25.0, 14.0, 6.0, 5.0, 5.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.302490234375, -0.2935905456542969, -0.28469085693359375, -0.2757911682128906, -0.2668914794921875, -0.2579917907714844, -0.24909210205078125, -0.24019241333007812, -0.231292724609375, -0.22239303588867188, -0.21349334716796875, -0.20459365844726562, -0.1956939697265625, -0.18679428100585938, -0.17789459228515625, -0.16899490356445312, -0.16009521484375, -0.15119552612304688, -0.14229583740234375, -0.13339614868164062, -0.1244964599609375, -0.11559677124023438, -0.10669708251953125, -0.09779739379882812, -0.088897705078125, -0.07999801635742188, -0.07109832763671875, -0.062198638916015625, -0.0532989501953125, -0.044399261474609375, -0.03549957275390625, -0.026599884033203125, -0.0177001953125, -0.008800506591796875, 9.918212890625e-05, 0.008998870849609375, 0.0178985595703125, 0.026798248291015625, 0.03569793701171875, 0.044597625732421875, 0.053497314453125, 0.062397003173828125, 0.07129669189453125, 0.08019638061523438, 0.0890960693359375, 0.09799575805664062, 0.10689544677734375, 0.11579513549804688, 0.12469482421875, 0.13359451293945312, 0.14249420166015625, 0.15139389038085938, 0.1602935791015625, 0.16919326782226562, 0.17809295654296875, 0.18699264526367188, 0.195892333984375, 0.20479202270507812, 0.21369171142578125, 0.22259140014648438, 0.2314910888671875, 0.24039077758789062, 0.24929046630859375, 0.2581901550292969, 0.26708984375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 3.0, 9.0, 4.0, 6.0, 10.0, 9.0, 16.0, 19.0, 20.0, 28.0, 45.0, 29.0, 53.0, 51.0, 79.0, 69.0, 64.0, 80.0, 70.0, 61.0, 58.0, 49.0, 38.0, 24.0, 20.0, 20.0, 13.0, 12.0, 8.0, 4.0, 5.0, 7.0, 3.0, 4.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.0010089874267578125, -0.0009768903255462646, -0.0009447932243347168, -0.0009126961231231689, -0.0008805990219116211, -0.0008485019207000732, -0.0008164048194885254, -0.0007843077182769775, -0.0007522106170654297, -0.0007201135158538818, -0.000688016414642334, -0.0006559193134307861, -0.0006238222122192383, -0.0005917251110076904, -0.0005596280097961426, -0.0005275309085845947, -0.0004954338073730469, -0.000463336706161499, -0.00043123960494995117, -0.0003991425037384033, -0.00036704540252685547, -0.0003349483013153076, -0.00030285120010375977, -0.0002707540988922119, -0.00023865699768066406, -0.0002065598964691162, -0.00017446279525756836, -0.0001423656940460205, -0.00011026859283447266, -7.81714916229248e-05, -4.607439041137695e-05, -1.3977289199829102e-05, 1.811981201171875e-05, 5.02169132232666e-05, 8.231401443481445e-05, 0.0001144111156463623, 0.00014650821685791016, 0.000178605318069458, 0.00021070241928100586, 0.0002427995204925537, 0.00027489662170410156, 0.0003069937229156494, 0.00033909082412719727, 0.0003711879253387451, 0.00040328502655029297, 0.0004353821277618408, 0.00046747922897338867, 0.0004995763301849365, 0.0005316734313964844, 0.0005637705326080322, 0.0005958676338195801, 0.0006279647350311279, 0.0006600618362426758, 0.0006921589374542236, 0.0007242560386657715, 0.0007563531398773193, 0.0007884502410888672, 0.000820547342300415, 0.0008526444435119629, 0.0008847415447235107, 0.0009168386459350586, 0.0009489357471466064, 0.0009810328483581543, 0.0010131299495697021, 0.00104522705078125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 4.0, 10.0, 6.0, 9.0, 11.0, 13.0, 13.0, 17.0, 33.0, 48.0, 67.0, 84.0, 117.0, 230.0, 317.0, 576.0, 4681.0, 957314.0, 82663.0, 1146.0, 431.0, 244.0, 155.0, 102.0, 66.0, 48.0, 42.0, 22.0, 24.0, 16.0, 11.0, 6.0, 7.0, 5.0, 3.0, 5.0, 0.0, 3.0, 2.0, 0.0, 0.0, 5.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0192108154296875, -0.018624067306518555, -0.01803731918334961, -0.017450571060180664, -0.01686382293701172, -0.016277074813842773, -0.015690326690673828, -0.015103578567504883, -0.014516830444335938, -0.013930082321166992, -0.013343334197998047, -0.012756586074829102, -0.012169837951660156, -0.011583089828491211, -0.010996341705322266, -0.01040959358215332, -0.009822845458984375, -0.00923609733581543, -0.008649349212646484, -0.008062601089477539, -0.007475852966308594, -0.0068891048431396484, -0.006302356719970703, -0.005715608596801758, -0.0051288604736328125, -0.004542112350463867, -0.003955364227294922, -0.0033686161041259766, -0.0027818679809570312, -0.002195119857788086, -0.0016083717346191406, -0.0010216236114501953, -0.00043487548828125, 0.0001518726348876953, 0.0007386207580566406, 0.001325368881225586, 0.0019121170043945312, 0.0024988651275634766, 0.003085613250732422, 0.003672361373901367, 0.0042591094970703125, 0.004845857620239258, 0.005432605743408203, 0.0060193538665771484, 0.006606101989746094, 0.007192850112915039, 0.007779598236083984, 0.00836634635925293, 0.008953094482421875, 0.00953984260559082, 0.010126590728759766, 0.010713338851928711, 0.011300086975097656, 0.011886835098266602, 0.012473583221435547, 0.013060331344604492, 0.013647079467773438, 0.014233827590942383, 0.014820575714111328, 0.015407323837280273, 0.01599407196044922, 0.016580820083618164, 0.01716756820678711, 0.017754316329956055, 0.018341064453125]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 9.0, 11.0, 84.0, 157.0, 278.0, 254.0, 150.0, 49.0, 20.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00055880774743855, -0.0005075038061477244, -0.0004561998648568988, -0.0004048959235660732, -0.0003535919822752476, -0.0003022880700882524, -0.0002509841287974268, -0.00019968018750660121, -0.0001483762462157756, -9.707230492495e-05, -4.576837091008201e-05, 5.535563104785979e-06, 5.6839504395611584e-05, 0.00010814343113452196, 0.00015944737242534757, 0.00021075131371617317, 0.0002620552550069988, 0.0003133591962978244, 0.00036466313758865, 0.0004159670788794756, 0.0004672710201703012, 0.0005185749614611268, 0.0005698788445442915, 0.000621182844042778, 0.0006724867271259427, 0.0007237906684167683, 0.0007750946097075939, 0.0008263985509984195, 0.0008777024922892451, 0.0009290063753724098, 0.0009803103748708963, 0.001031614257954061, 0.0010829182574525476, 0.0011342221405357122, 0.0011855261400341988, 0.0012368300231173635, 0.00128813402261585, 0.0013394379056990147, 0.0013907419051975012, 0.0014420457882806659, 0.0014933497877791524, 0.001544653670862317, 0.0015959576703608036, 0.0016472615534439683, 0.0016985655529424548, 0.0017498694360256195, 0.001801173435524106, 0.0018524773186072707, 0.0019037812016904354, 0.001955085201188922, 0.002006388967856765, 0.0020576929673552513, 0.002108996966853738, 0.0021603009663522243, 0.002211604733020067, 0.0022629087325185537, 0.0023142127320170403, 0.0023655167315155268, 0.0024168204981833696, 0.002468124497681856, 0.0025194284971803427, 0.002570732496678829, 0.002622036263346672, 0.0026733402628451586, 0.002724644262343645]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 4.0, 9.0, 7.0, 8.0, 9.0, 9.0, 13.0, 10.0, 11.0, 16.0, 19.0, 29.0, 21.0, 28.0, 31.0, 34.0, 27.0, 26.0, 35.0, 35.0, 44.0, 49.0, 48.0, 38.0, 35.0, 38.0, 33.0, 27.0, 34.0, 28.0, 26.0, 27.0, 36.0, 25.0, 16.0, 11.0, 25.0, 13.0, 14.0, 11.0, 9.0, 6.0, 4.0, 3.0, 7.0, 5.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.00040781497955322266, -0.0003941114991903305, -0.00038040801882743835, -0.0003667045384645462, -0.00035300105810165405, -0.0003392975777387619, -0.00032559409737586975, -0.0003118906170129776, -0.00029818713665008545, -0.0002844836562871933, -0.00027078017592430115, -0.000257076695561409, -0.00024337321519851685, -0.0002296697348356247, -0.00021596625447273254, -0.0002022627741098404, -0.00018855929374694824, -0.0001748558133840561, -0.00016115233302116394, -0.0001474488526582718, -0.00013374537229537964, -0.00012004189193248749, -0.00010633841156959534, -9.263493120670319e-05, -7.893145084381104e-05, -6.522797048091888e-05, -5.1524490118026733e-05, -3.782100975513458e-05, -2.411752939224243e-05, -1.041404902935028e-05, 3.28943133354187e-06, 1.699291169643402e-05, 3.069639205932617e-05, 4.439987242221832e-05, 5.8103352785110474e-05, 7.180683314800262e-05, 8.551031351089478e-05, 9.921379387378693e-05, 0.00011291727423667908, 0.00012662075459957123, 0.00014032423496246338, 0.00015402771532535553, 0.00016773119568824768, 0.00018143467605113983, 0.00019513815641403198, 0.00020884163677692413, 0.00022254511713981628, 0.00023624859750270844, 0.0002499520778656006, 0.00026365555822849274, 0.0002773590385913849, 0.00029106251895427704, 0.0003047659993171692, 0.00031846947968006134, 0.0003321729600429535, 0.00034587644040584564, 0.0003595799207687378, 0.00037328340113162994, 0.0003869868814945221, 0.00040069036185741425, 0.0004143938422203064, 0.00042809732258319855, 0.0004418008029460907, 0.00045550428330898285, 0.000469207763671875]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 4.0, 8.0, 8.0, 12.0, 10.0, 15.0, 14.0, 20.0, 22.0, 29.0, 26.0, 32.0, 44.0, 48.0, 52.0, 54.0, 56.0, 47.0, 46.0, 56.0, 49.0, 29.0, 46.0, 45.0, 34.0, 29.0, 21.0, 20.0, 23.0, 19.0, 15.0, 9.0, 16.0, 9.0, 13.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.75390625, -4.6187744140625, -4.483642578125, -4.3485107421875, -4.21337890625, -4.0782470703125, -3.943115234375, -3.8079833984375, -3.6728515625, -3.5377197265625, -3.402587890625, -3.2674560546875, -3.13232421875, -2.9971923828125, -2.862060546875, -2.7269287109375, -2.591796875, -2.4566650390625, -2.321533203125, -2.1864013671875, -2.05126953125, -1.9161376953125, -1.781005859375, -1.6458740234375, -1.5107421875, -1.3756103515625, -1.240478515625, -1.1053466796875, -0.97021484375, -0.8350830078125, -0.699951171875, -0.5648193359375, -0.4296875, -0.2945556640625, -0.159423828125, -0.0242919921875, 0.11083984375, 0.2459716796875, 0.381103515625, 0.5162353515625, 0.6513671875, 0.7864990234375, 0.921630859375, 1.0567626953125, 1.19189453125, 1.3270263671875, 1.462158203125, 1.5972900390625, 1.732421875, 1.8675537109375, 2.002685546875, 2.1378173828125, 2.27294921875, 2.4080810546875, 2.543212890625, 2.6783447265625, 2.8134765625, 2.9486083984375, 3.083740234375, 3.2188720703125, 3.35400390625, 3.4891357421875, 3.624267578125, 3.7593994140625, 3.89453125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 4.0, 0.0, 3.0, 4.0, 7.0, 7.0, 10.0, 14.0, 23.0, 28.0, 31.0, 43.0, 82.0, 110.0, 170.0, 270.0, 347.0, 619.0, 1038.0, 1755.0, 2781.0, 5004.0, 9307.0, 19715.0, 52129.0, 175957.0, 482648.0, 196299.0, 56438.0, 21064.0, 9692.0, 5298.0, 2993.0, 1773.0, 1028.0, 640.0, 437.0, 230.0, 176.0, 115.0, 77.0, 60.0, 38.0, 36.0, 19.0, 15.0, 9.0, 4.0, 7.0, 5.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.1015625, -4.93450927734375, -4.7674560546875, -4.60040283203125, -4.433349609375, -4.26629638671875, -4.0992431640625, -3.93218994140625, -3.76513671875, -3.59808349609375, -3.4310302734375, -3.26397705078125, -3.096923828125, -2.92987060546875, -2.7628173828125, -2.59576416015625, -2.4287109375, -2.26165771484375, -2.0946044921875, -1.92755126953125, -1.760498046875, -1.59344482421875, -1.4263916015625, -1.25933837890625, -1.09228515625, -0.92523193359375, -0.7581787109375, -0.59112548828125, -0.424072265625, -0.25701904296875, -0.0899658203125, 0.07708740234375, 0.244140625, 0.41119384765625, 0.5782470703125, 0.74530029296875, 0.912353515625, 1.07940673828125, 1.2464599609375, 1.41351318359375, 1.58056640625, 1.74761962890625, 1.9146728515625, 2.08172607421875, 2.248779296875, 2.41583251953125, 2.5828857421875, 2.74993896484375, 2.9169921875, 3.08404541015625, 3.2510986328125, 3.41815185546875, 3.585205078125, 3.75225830078125, 3.9193115234375, 4.08636474609375, 4.25341796875, 4.42047119140625, 4.5875244140625, 4.75457763671875, 4.921630859375, 5.08868408203125, 5.2557373046875, 5.42279052734375, 5.58984375]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 0.0, 4.0, 4.0, 1.0, 4.0, 4.0, 7.0, 6.0, 7.0, 15.0, 9.0, 14.0, 16.0, 29.0, 28.0, 30.0, 24.0, 35.0, 35.0, 39.0, 39.0, 41.0, 59.0, 117.0, 241.0, 1483.0, 236.0, 104.0, 60.0, 44.0, 44.0, 33.0, 36.0, 24.0, 25.0, 17.0, 22.0, 22.0, 16.0, 10.0, 13.0, 9.0, 6.0, 11.0, 10.0, 3.0, 5.0, 2.0, 6.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.4296875, -12.0352783203125, -11.640869140625, -11.2464599609375, -10.85205078125, -10.4576416015625, -10.063232421875, -9.6688232421875, -9.2744140625, -8.8800048828125, -8.485595703125, -8.0911865234375, -7.69677734375, -7.3023681640625, -6.907958984375, -6.5135498046875, -6.119140625, -5.7247314453125, -5.330322265625, -4.9359130859375, -4.54150390625, -4.1470947265625, -3.752685546875, -3.3582763671875, -2.9638671875, -2.5694580078125, -2.175048828125, -1.7806396484375, -1.38623046875, -0.9918212890625, -0.597412109375, -0.2030029296875, 0.19140625, 0.5858154296875, 0.980224609375, 1.3746337890625, 1.76904296875, 2.1634521484375, 2.557861328125, 2.9522705078125, 3.3466796875, 3.7410888671875, 4.135498046875, 4.5299072265625, 4.92431640625, 5.3187255859375, 5.713134765625, 6.1075439453125, 6.501953125, 6.8963623046875, 7.290771484375, 7.6851806640625, 8.07958984375, 8.4739990234375, 8.868408203125, 9.2628173828125, 9.6572265625, 10.0516357421875, 10.446044921875, 10.8404541015625, 11.23486328125, 11.6292724609375, 12.023681640625, 12.4180908203125, 12.8125]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 9.0, 9.0, 13.0, 11.0, 21.0, 29.0, 42.0, 84.0, 88.0, 187.0, 370.0, 1097.0, 21871.0, 3080128.0, 39310.0, 1415.0, 424.0, 224.0, 105.0, 91.0, 53.0, 23.0, 32.0, 22.0, 11.0, 10.0, 10.0, 4.0, 4.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.33642578125, -39.0166015625, -37.69677734375, -36.376953125, -35.05712890625, -33.7373046875, -32.41748046875, -31.09765625, -29.77783203125, -28.4580078125, -27.13818359375, -25.818359375, -24.49853515625, -23.1787109375, -21.85888671875, -20.5390625, -19.21923828125, -17.8994140625, -16.57958984375, -15.259765625, -13.93994140625, -12.6201171875, -11.30029296875, -9.98046875, -8.66064453125, -7.3408203125, -6.02099609375, -4.701171875, -3.38134765625, -2.0615234375, -0.74169921875, 0.578125, 1.89794921875, 3.2177734375, 4.53759765625, 5.857421875, 7.17724609375, 8.4970703125, 9.81689453125, 11.13671875, 12.45654296875, 13.7763671875, 15.09619140625, 16.416015625, 17.73583984375, 19.0556640625, 20.37548828125, 21.6953125, 23.01513671875, 24.3349609375, 25.65478515625, 26.974609375, 28.29443359375, 29.6142578125, 30.93408203125, 32.25390625, 33.57373046875, 34.8935546875, 36.21337890625, 37.533203125, 38.85302734375, 40.1728515625, 41.49267578125, 42.8125]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 34.0, 203.0, 483.0, 249.0, 38.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.999847412109375, -27.16200828552246, -24.32417106628418, -21.486331939697266, -18.648494720458984, -15.81065559387207, -12.972816467285156, -10.134979248046875, -7.297140121459961, -4.459301948547363, -1.6214632987976074, 1.2163753509521484, 4.054213523864746, 6.892051696777344, 9.729890823364258, 12.567728042602539, 15.405567169189453, 18.243406295776367, 21.08124351501465, 23.919082641601562, 26.756919860839844, 29.594758987426758, 32.43259811401367, 35.27043533325195, 38.1082763671875, 40.94611358642578, 43.78395462036133, 46.62179183959961, 49.45962905883789, 52.29747009277344, 55.13530731201172, 57.97314453125, 60.81098175048828, 63.64881896972656, 66.48665618896484, 69.32449340820312, 72.16233825683594, 75.00017547607422, 77.8380126953125, 80.67584991455078, 83.51368713378906, 86.35152435302734, 89.18936157226562, 92.02720642089844, 94.86504364013672, 97.702880859375, 100.54071807861328, 103.37855529785156, 106.21640014648438, 109.05423736572266, 111.89207458496094, 114.72991943359375, 117.56775665283203, 120.40559387207031, 123.2434310913086, 126.08126831054688, 128.91909790039062, 131.75694274902344, 134.5947723388672, 137.4326171875, 140.27044677734375, 143.10829162597656, 145.94613647460938, 148.78396606445312, 151.62181091308594]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 7.0, 5.0, 5.0, 8.0, 8.0, 18.0, 17.0, 25.0, 29.0, 32.0, 22.0, 25.0, 30.0, 35.0, 45.0, 32.0, 40.0, 50.0, 51.0, 48.0, 38.0, 49.0, 35.0, 36.0, 45.0, 28.0, 27.0, 27.0, 32.0, 24.0, 18.0, 12.0, 17.0, 14.0, 16.0, 5.0, 15.0, 6.0, 11.0, 8.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.61349105834961, -39.28224563598633, -37.95100021362305, -36.619754791259766, -35.28851318359375, -33.95726776123047, -32.62602233886719, -31.294776916503906, -29.963531494140625, -28.632286071777344, -27.301040649414062, -25.969797134399414, -24.638551712036133, -23.30730628967285, -21.976062774658203, -20.644817352294922, -19.31357192993164, -17.98232650756836, -16.651081085205078, -15.31983757019043, -13.988592147827148, -12.657346725463867, -11.326102256774902, -9.994857788085938, -8.663612365722656, -7.332367420196533, -6.00112247467041, -4.669877529144287, -3.338632583618164, -2.007387638092041, -0.676142692565918, 0.6551017761230469, 1.9863471984863281, 3.317592144012451, 4.648837089538574, 5.980082035064697, 7.31132698059082, 8.642572402954102, 9.973816871643066, 11.305061340332031, 12.636306762695312, 13.967552185058594, 15.298796653747559, 16.630041122436523, 17.961286544799805, 19.292531967163086, 20.623775482177734, 21.955020904541016, 23.286266326904297, 24.617511749267578, 25.94875717163086, 27.280000686645508, 28.61124610900879, 29.94249153137207, 31.27373504638672, 32.60498046875, 33.93622589111328, 35.26747131347656, 36.598716735839844, 37.929962158203125, 39.261207580566406, 40.59244918823242, 41.9236946105957, 43.254940032958984, 44.586185455322266]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 6.0, 2.0, 4.0, 8.0, 13.0, 14.0, 10.0, 14.0, 8.0, 18.0, 23.0, 28.0, 29.0, 27.0, 43.0, 42.0, 40.0, 48.0, 58.0, 48.0, 65.0, 52.0, 39.0, 33.0, 43.0, 35.0, 36.0, 31.0, 35.0, 21.0, 28.0, 15.0, 7.0, 16.0, 15.0, 11.0, 11.0, 7.0, 5.0, 7.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 0.0, 2.0], "bins": [-4.95703125, -4.821258544921875, -4.68548583984375, -4.549713134765625, -4.4139404296875, -4.278167724609375, -4.14239501953125, -4.006622314453125, -3.870849609375, -3.735076904296875, -3.59930419921875, -3.463531494140625, -3.3277587890625, -3.191986083984375, -3.05621337890625, -2.920440673828125, -2.78466796875, -2.648895263671875, -2.51312255859375, -2.377349853515625, -2.2415771484375, -2.105804443359375, -1.97003173828125, -1.834259033203125, -1.698486328125, -1.562713623046875, -1.42694091796875, -1.291168212890625, -1.1553955078125, -1.019622802734375, -0.88385009765625, -0.748077392578125, -0.6123046875, -0.476531982421875, -0.34075927734375, -0.204986572265625, -0.0692138671875, 0.066558837890625, 0.20233154296875, 0.338104248046875, 0.473876953125, 0.609649658203125, 0.74542236328125, 0.881195068359375, 1.0169677734375, 1.152740478515625, 1.28851318359375, 1.424285888671875, 1.56005859375, 1.695831298828125, 1.83160400390625, 1.967376708984375, 2.1031494140625, 2.238922119140625, 2.37469482421875, 2.510467529296875, 2.646240234375, 2.782012939453125, 2.91778564453125, 3.053558349609375, 3.1893310546875, 3.325103759765625, 3.46087646484375, 3.596649169921875, 3.732421875]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 2.0, 6.0, 6.0, 6.0, 9.0, 7.0, 15.0, 12.0, 21.0, 29.0, 39.0, 78.0, 109.0, 202.0, 326.0, 494.0, 960.0, 1897.0, 4073.0, 8979.0, 21767.0, 56781.0, 162458.0, 479182.0, 1133850.0, 1328034.0, 642997.0, 224326.0, 77371.0, 28866.0, 11491.0, 4908.0, 2343.0, 1104.0, 586.0, 357.0, 180.0, 147.0, 85.0, 51.0, 38.0, 29.0, 18.0, 9.0, 12.0, 12.0, 8.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.27734375, -5.122802734375, -4.96826171875, -4.813720703125, -4.6591796875, -4.504638671875, -4.35009765625, -4.195556640625, -4.041015625, -3.886474609375, -3.73193359375, -3.577392578125, -3.4228515625, -3.268310546875, -3.11376953125, -2.959228515625, -2.8046875, -2.650146484375, -2.49560546875, -2.341064453125, -2.1865234375, -2.031982421875, -1.87744140625, -1.722900390625, -1.568359375, -1.413818359375, -1.25927734375, -1.104736328125, -0.9501953125, -0.795654296875, -0.64111328125, -0.486572265625, -0.33203125, -0.177490234375, -0.02294921875, 0.131591796875, 0.2861328125, 0.440673828125, 0.59521484375, 0.749755859375, 0.904296875, 1.058837890625, 1.21337890625, 1.367919921875, 1.5224609375, 1.677001953125, 1.83154296875, 1.986083984375, 2.140625, 2.295166015625, 2.44970703125, 2.604248046875, 2.7587890625, 2.913330078125, 3.06787109375, 3.222412109375, 3.376953125, 3.531494140625, 3.68603515625, 3.840576171875, 3.9951171875, 4.149658203125, 4.30419921875, 4.458740234375, 4.61328125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 7.0, 5.0, 2.0, 3.0, 14.0, 16.0, 14.0, 24.0, 35.0, 43.0, 85.0, 119.0, 188.0, 271.0, 336.0, 429.0, 527.0, 480.0, 418.0, 317.0, 218.0, 156.0, 134.0, 76.0, 59.0, 36.0, 27.0, 19.0, 12.0, 10.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.796875, -13.412353515625, -13.02783203125, -12.643310546875, -12.2587890625, -11.874267578125, -11.48974609375, -11.105224609375, -10.720703125, -10.336181640625, -9.95166015625, -9.567138671875, -9.1826171875, -8.798095703125, -8.41357421875, -8.029052734375, -7.64453125, -7.260009765625, -6.87548828125, -6.490966796875, -6.1064453125, -5.721923828125, -5.33740234375, -4.952880859375, -4.568359375, -4.183837890625, -3.79931640625, -3.414794921875, -3.0302734375, -2.645751953125, -2.26123046875, -1.876708984375, -1.4921875, -1.107666015625, -0.72314453125, -0.338623046875, 0.0458984375, 0.430419921875, 0.81494140625, 1.199462890625, 1.583984375, 1.968505859375, 2.35302734375, 2.737548828125, 3.1220703125, 3.506591796875, 3.89111328125, 4.275634765625, 4.66015625, 5.044677734375, 5.42919921875, 5.813720703125, 6.1982421875, 6.582763671875, 6.96728515625, 7.351806640625, 7.736328125, 8.120849609375, 8.50537109375, 8.889892578125, 9.2744140625, 9.658935546875, 10.04345703125, 10.427978515625, 10.8125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 6.0, 3.0, 8.0, 11.0, 12.0, 20.0, 31.0, 50.0, 85.0, 100.0, 189.0, 409.0, 910.0, 3111.0, 17972.0, 183782.0, 2107792.0, 1719529.0, 141299.0, 14619.0, 2642.0, 831.0, 351.0, 175.0, 97.0, 74.0, 48.0, 47.0, 22.0, 19.0, 14.0, 6.0, 7.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.125, -13.53515625, -12.9453125, -12.35546875, -11.765625, -11.17578125, -10.5859375, -9.99609375, -9.40625, -8.81640625, -8.2265625, -7.63671875, -7.046875, -6.45703125, -5.8671875, -5.27734375, -4.6875, -4.09765625, -3.5078125, -2.91796875, -2.328125, -1.73828125, -1.1484375, -0.55859375, 0.03125, 0.62109375, 1.2109375, 1.80078125, 2.390625, 2.98046875, 3.5703125, 4.16015625, 4.75, 5.33984375, 5.9296875, 6.51953125, 7.109375, 7.69921875, 8.2890625, 8.87890625, 9.46875, 10.05859375, 10.6484375, 11.23828125, 11.828125, 12.41796875, 13.0078125, 13.59765625, 14.1875, 14.77734375, 15.3671875, 15.95703125, 16.546875, 17.13671875, 17.7265625, 18.31640625, 18.90625, 19.49609375, 20.0859375, 20.67578125, 21.265625, 21.85546875, 22.4453125, 23.03515625, 23.625]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 6.0, 20.0, 34.0, 69.0, 173.0, 202.0, 202.0, 143.0, 91.0, 48.0, 14.0, 10.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-142.42080688476562, -139.45489501953125, -136.48898315429688, -133.52308654785156, -130.5571746826172, -127.59126281738281, -124.62535095214844, -121.6594467163086, -118.69354248046875, -115.72763061523438, -112.76172637939453, -109.79581451416016, -106.82991027832031, -103.86399841308594, -100.89808654785156, -97.93218231201172, -94.96627044677734, -92.00035858154297, -89.03445434570312, -86.06854248046875, -83.1026382446289, -80.13672637939453, -77.17082214355469, -74.20491027832031, -71.23899841308594, -68.27308654785156, -65.30718231201172, -62.341270446777344, -59.3753662109375, -56.409454345703125, -53.443546295166016, -50.477638244628906, -47.51172637939453, -44.54581832885742, -41.57991027832031, -38.61399841308594, -35.648094177246094, -32.68218231201172, -29.71627426147461, -26.7503662109375, -23.78445816040039, -20.81855010986328, -17.852642059326172, -14.88673210144043, -11.92082405090332, -8.954916000366211, -5.989006042480469, -3.0230979919433594, -0.05718994140625, 2.9087185859680176, 5.874627113342285, 8.840536117553711, 11.80644416809082, 14.77235221862793, 17.738262176513672, 20.70417022705078, 23.67007827758789, 26.635986328125, 29.60189437866211, 32.56780242919922, 35.533714294433594, 38.49961853027344, 41.46553039550781, 44.43143844604492, 47.39734649658203]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 6.0, 9.0, 4.0, 8.0, 5.0, 13.0, 11.0, 10.0, 18.0, 17.0, 11.0, 26.0, 16.0, 26.0, 32.0, 37.0, 32.0, 37.0, 26.0, 36.0, 34.0, 47.0, 41.0, 42.0, 34.0, 39.0, 41.0, 40.0, 34.0, 36.0, 26.0, 25.0, 30.0, 21.0, 23.0, 20.0, 13.0, 16.0, 14.0, 11.0, 7.0, 9.0, 6.0, 6.0, 3.0, 5.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0], "bins": [-35.398033142089844, -34.387638092041016, -33.37724685668945, -32.366851806640625, -31.35645866394043, -30.346065521240234, -29.335670471191406, -28.32527732849121, -27.314884185791016, -26.30449104309082, -25.294095993041992, -24.283702850341797, -23.2733097076416, -22.262916564941406, -21.252521514892578, -20.242128372192383, -19.231733322143555, -18.22134017944336, -17.21094512939453, -16.200551986694336, -15.19015884399414, -14.179764747619629, -13.169370651245117, -12.158977508544922, -11.14858341217041, -10.138189315795898, -9.127796173095703, -8.117402076721191, -7.107008457183838, -6.096614837646484, -5.086220741271973, -4.075827121734619, -3.065431594848633, -2.0550379753112793, -1.0446441173553467, -0.03425025939941406, 0.9761433601379395, 1.986536979675293, 2.9969310760498047, 4.007324695587158, 5.017718315124512, 6.028111934661865, 7.038505554199219, 8.04889965057373, 9.059293746948242, 10.069686889648438, 11.08008098602295, 12.090475082397461, 13.100868225097656, 14.111262321472168, 15.121655464172363, 16.132049560546875, 17.14244270324707, 18.152835845947266, 19.163230895996094, 20.17362403869629, 21.184017181396484, 22.19441032409668, 23.204805374145508, 24.215198516845703, 25.2255916595459, 26.235984802246094, 27.246379852294922, 28.256772994995117, 29.267168045043945]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 4.0, 6.0, 8.0, 9.0, 9.0, 7.0, 12.0, 19.0, 18.0, 28.0, 21.0, 27.0, 32.0, 45.0, 52.0, 55.0, 44.0, 61.0, 53.0, 35.0, 46.0, 50.0, 44.0, 44.0, 37.0, 34.0, 26.0, 26.0, 34.0, 20.0, 18.0, 16.0, 17.0, 8.0, 11.0, 7.0, 5.0, 8.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 11.0, 6.0, 19.0, 30.0, 27.0, 40.0, 98.0, 134.0, 179.0, 297.0, 352.0, 565.0, 852.0, 1328.0, 1994.0, 3006.0, 4775.0, 7013.0, 10985.0, 16761.0, 25519.0, 39749.0, 61459.0, 95757.0, 145301.0, 188228.0, 154372.0, 102282.0, 65464.0, 42605.0, 27700.0, 17924.0, 11650.0, 7446.0, 4963.0, 3210.0, 2096.0, 1437.0, 962.0, 611.0, 457.0, 314.0, 177.0, 130.0, 88.0, 51.0, 41.0, 27.0, 25.0, 16.0, 9.0, 7.0, 6.0, 5.0, 0.0, 2.0, 1.0], "bins": [-0.322998046875, -0.3130836486816406, -0.30316925048828125, -0.2932548522949219, -0.2833404541015625, -0.2734260559082031, -0.26351165771484375, -0.2535972595214844, -0.243682861328125, -0.23376846313476562, -0.22385406494140625, -0.21393966674804688, -0.2040252685546875, -0.19411087036132812, -0.18419647216796875, -0.17428207397460938, -0.16436767578125, -0.15445327758789062, -0.14453887939453125, -0.13462448120117188, -0.1247100830078125, -0.11479568481445312, -0.10488128662109375, -0.09496688842773438, -0.085052490234375, -0.07513809204101562, -0.06522369384765625, -0.055309295654296875, -0.0453948974609375, -0.035480499267578125, -0.02556610107421875, -0.015651702880859375, -0.0057373046875, 0.004177093505859375, 0.01409149169921875, 0.024005889892578125, 0.0339202880859375, 0.043834686279296875, 0.05374908447265625, 0.06366348266601562, 0.073577880859375, 0.08349227905273438, 0.09340667724609375, 0.10332107543945312, 0.1132354736328125, 0.12314987182617188, 0.13306427001953125, 0.14297866821289062, 0.15289306640625, 0.16280746459960938, 0.17272186279296875, 0.18263626098632812, 0.1925506591796875, 0.20246505737304688, 0.21237945556640625, 0.22229385375976562, 0.232208251953125, 0.24212265014648438, 0.25203704833984375, 0.2619514465332031, 0.2718658447265625, 0.2817802429199219, 0.29169464111328125, 0.3016090393066406, 0.3115234375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 7.0, 3.0, 3.0, 1.0, 5.0, 2.0, 13.0, 4.0, 8.0, 8.0, 21.0, 14.0, 15.0, 22.0, 14.0, 20.0, 24.0, 29.0, 32.0, 32.0, 24.0, 33.0, 31.0, 42.0, 37.0, 30.0, 1052.0, 33.0, 35.0, 37.0, 30.0, 36.0, 39.0, 30.0, 36.0, 32.0, 29.0, 25.0, 20.0, 22.0, 13.0, 20.0, 11.0, 12.0, 4.0, 10.0, 12.0, 9.0, 3.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.0859375, -2.01300048828125, -1.9400634765625, -1.86712646484375, -1.794189453125, -1.72125244140625, -1.6483154296875, -1.57537841796875, -1.50244140625, -1.42950439453125, -1.3565673828125, -1.28363037109375, -1.210693359375, -1.13775634765625, -1.0648193359375, -0.99188232421875, -0.9189453125, -0.84600830078125, -0.7730712890625, -0.70013427734375, -0.627197265625, -0.55426025390625, -0.4813232421875, -0.40838623046875, -0.33544921875, -0.26251220703125, -0.1895751953125, -0.11663818359375, -0.043701171875, 0.02923583984375, 0.1021728515625, 0.17510986328125, 0.248046875, 0.32098388671875, 0.3939208984375, 0.46685791015625, 0.539794921875, 0.61273193359375, 0.6856689453125, 0.75860595703125, 0.83154296875, 0.90447998046875, 0.9774169921875, 1.05035400390625, 1.123291015625, 1.19622802734375, 1.2691650390625, 1.34210205078125, 1.4150390625, 1.48797607421875, 1.5609130859375, 1.63385009765625, 1.706787109375, 1.77972412109375, 1.8526611328125, 1.92559814453125, 1.99853515625, 2.07147216796875, 2.1444091796875, 2.21734619140625, 2.290283203125, 2.36322021484375, 2.4361572265625, 2.50909423828125, 2.58203125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 11.0, 7.0, 18.0, 15.0, 38.0, 67.0, 97.0, 172.0, 266.0, 458.0, 678.0, 1228.0, 2072.0, 3382.0, 5683.0, 9084.0, 14783.0, 24079.0, 39760.0, 63939.0, 102794.0, 157470.0, 1243385.0, 157992.0, 103248.0, 64586.0, 39316.0, 24139.0, 14876.0, 9296.0, 5633.0, 3398.0, 2110.0, 1242.0, 690.0, 419.0, 274.0, 168.0, 103.0, 55.0, 33.0, 29.0, 16.0, 11.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.269775390625, -0.2606315612792969, -0.25148773193359375, -0.24234390258789062, -0.2332000732421875, -0.22405624389648438, -0.21491241455078125, -0.20576858520507812, -0.196624755859375, -0.18748092651367188, -0.17833709716796875, -0.16919326782226562, -0.1600494384765625, -0.15090560913085938, -0.14176177978515625, -0.13261795043945312, -0.12347412109375, -0.11433029174804688, -0.10518646240234375, -0.09604263305664062, -0.0868988037109375, -0.07775497436523438, -0.06861114501953125, -0.059467315673828125, -0.050323486328125, -0.041179656982421875, -0.03203582763671875, -0.022891998291015625, -0.0137481689453125, -0.004604339599609375, 0.00453948974609375, 0.013683319091796875, 0.0228271484375, 0.031970977783203125, 0.04111480712890625, 0.050258636474609375, 0.0594024658203125, 0.06854629516601562, 0.07769012451171875, 0.08683395385742188, 0.095977783203125, 0.10512161254882812, 0.11426544189453125, 0.12340927124023438, 0.1325531005859375, 0.14169692993164062, 0.15084075927734375, 0.15998458862304688, 0.16912841796875, 0.17827224731445312, 0.18741607666015625, 0.19655990600585938, 0.2057037353515625, 0.21484756469726562, 0.22399139404296875, 0.23313522338867188, 0.242279052734375, 0.2514228820800781, 0.26056671142578125, 0.2697105407714844, 0.2788543701171875, 0.2879981994628906, 0.29714202880859375, 0.3062858581542969, 0.3154296875]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 2.0, 6.0, 5.0, 3.0, 7.0, 12.0, 23.0, 28.0, 26.0, 40.0, 37.0, 67.0, 50.0, 69.0, 75.0, 79.0, 75.0, 88.0, 60.0, 55.0, 47.0, 34.0, 25.0, 28.0, 13.0, 14.0, 9.0, 9.0, 4.0, 1.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.0014410018920898438, -0.0014044567942619324, -0.001367911696434021, -0.0013313665986061096, -0.0012948215007781982, -0.0012582764029502869, -0.0012217313051223755, -0.0011851862072944641, -0.0011486411094665527, -0.0011120960116386414, -0.00107555091381073, -0.0010390058159828186, -0.0010024607181549072, -0.0009659156203269958, -0.0009293705224990845, -0.0008928254246711731, -0.0008562803268432617, -0.0008197352290153503, -0.000783190131187439, -0.0007466450333595276, -0.0007100999355316162, -0.0006735548377037048, -0.0006370097398757935, -0.0006004646420478821, -0.0005639195442199707, -0.0005273744463920593, -0.000490829348564148, -0.00045428425073623657, -0.0004177391529083252, -0.0003811940550804138, -0.00034464895725250244, -0.00030810385942459106, -0.0002715587615966797, -0.0002350136637687683, -0.00019846856594085693, -0.00016192346811294556, -0.00012537837028503418, -8.88332724571228e-05, -5.2288174629211426e-05, -1.574307680130005e-05, 2.0802021026611328e-05, 5.7347118854522705e-05, 9.389221668243408e-05, 0.00013043731451034546, 0.00016698241233825684, 0.0002035275101661682, 0.0002400726079940796, 0.00027661770582199097, 0.00031316280364990234, 0.0003497079014778137, 0.0003862529993057251, 0.0004227980971336365, 0.00045934319496154785, 0.0004958882927894592, 0.0005324333906173706, 0.000568978488445282, 0.0006055235862731934, 0.0006420686841011047, 0.0006786137819290161, 0.0007151588797569275, 0.0007517039775848389, 0.0007882490754127502, 0.0008247941732406616, 0.000861339271068573, 0.0008978843688964844]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 11.0, 10.0, 11.0, 23.0, 33.0, 39.0, 59.0, 105.0, 135.0, 232.0, 387.0, 849.0, 19140.0, 1009116.0, 16534.0, 816.0, 376.0, 220.0, 146.0, 86.0, 60.0, 49.0, 27.0, 20.0, 10.0, 11.0, 5.0, 8.0, 4.0, 4.0, 5.0, 6.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.01535797119140625, -0.01473081111907959, -0.01410365104675293, -0.01347649097442627, -0.01284933090209961, -0.01222217082977295, -0.011595010757446289, -0.010967850685119629, -0.010340690612792969, -0.009713530540466309, -0.009086370468139648, -0.008459210395812988, -0.007832050323486328, -0.007204890251159668, -0.006577730178833008, -0.005950570106506348, -0.0053234100341796875, -0.004696249961853027, -0.004069089889526367, -0.003441929817199707, -0.002814769744873047, -0.0021876096725463867, -0.0015604496002197266, -0.0009332895278930664, -0.00030612945556640625, 0.0003210306167602539, 0.0009481906890869141, 0.0015753507614135742, 0.0022025108337402344, 0.0028296709060668945, 0.0034568309783935547, 0.004083991050720215, 0.004711151123046875, 0.005338311195373535, 0.005965471267700195, 0.0065926313400268555, 0.007219791412353516, 0.007846951484680176, 0.008474111557006836, 0.009101271629333496, 0.009728431701660156, 0.010355591773986816, 0.010982751846313477, 0.011609911918640137, 0.012237071990966797, 0.012864232063293457, 0.013491392135620117, 0.014118552207946777, 0.014745712280273438, 0.015372872352600098, 0.016000032424926758, 0.016627192497253418, 0.017254352569580078, 0.01788151264190674, 0.0185086727142334, 0.01913583278656006, 0.01976299285888672, 0.02039015293121338, 0.02101731300354004, 0.0216444730758667, 0.02227163314819336, 0.02289879322052002, 0.02352595329284668, 0.02415311336517334, 0.0247802734375]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 110.0, 490.0, 368.0, 39.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004035771358758211, -0.0039128875359892845, -0.0037900032475590706, -0.0036671191919595003, -0.00354423513635993, -0.0034213513135910034, -0.003298467257991433, -0.003175583202391863, -0.0030526991467922926, -0.0029298150911927223, -0.002806931035593152, -0.0026840469799935818, -0.002561163157224655, -0.0024382788687944412, -0.0023153950460255146, -0.0021925109904259443, -0.002069626934826374, -0.0019467428792268038, -0.0018238588236272335, -0.001700974884442985, -0.0015780908288434148, -0.0014552067732438445, -0.001332322834059596, -0.0012094387784600258, -0.0010865547228604555, -0.0009636706672608852, -0.0008407866698689759, -0.0007179026724770665, -0.0005950186168774962, -0.00047213456127792597, -0.0003492505638860166, -0.00022636656649410725, -0.00010348204523324966, 1.9401981262490153e-05, 0.00014228600775822997, 0.0002651700342539698, 0.0003880540607497096, 0.0005109381163492799, 0.0006338221137411892, 0.0007567061111330986, 0.0008795901667326689, 0.0010024742223322392, 0.0011253582779318094, 0.0012482422171160579, 0.0013711262727156281, 0.0014940103283151984, 0.0016168942674994469, 0.0017397783230990171, 0.0018626623786985874, 0.0019855464342981577, 0.002108430489897728, 0.0022313145454972982, 0.0023541986010968685, 0.002477082423865795, 0.0025999664794653654, 0.0027228505350649357, 0.002845734590664506, 0.0029686186462640762, 0.0030915027018636465, 0.0032143867574632168, 0.0033372705802321434, 0.0034601548686623573, 0.003583038691431284, 0.0037059227470308542, 0.0038288068026304245]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 7.0, 3.0, 5.0, 10.0, 11.0, 16.0, 19.0, 22.0, 23.0, 23.0, 38.0, 25.0, 32.0, 30.0, 31.0, 45.0, 37.0, 45.0, 53.0, 38.0, 51.0, 46.0, 35.0, 57.0, 31.0, 46.0, 38.0, 23.0, 34.0, 19.0, 20.0, 22.0, 22.0, 12.0, 5.0, 10.0, 8.0, 5.0, 5.0, 2.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005851984024047852, -0.0005677500739693642, -0.0005503017455339432, -0.0005328534170985222, -0.0005154050886631012, -0.0004979567602276802, -0.0004805084317922592, -0.0004630601033568382, -0.00044561177492141724, -0.00042816344648599625, -0.00041071511805057526, -0.00039326678961515427, -0.0003758184611797333, -0.0003583701327443123, -0.0003409218043088913, -0.0003234734758734703, -0.0003060251474380493, -0.0002885768190026283, -0.00027112849056720734, -0.00025368016213178635, -0.00023623183369636536, -0.00021878350526094437, -0.00020133517682552338, -0.0001838868483901024, -0.0001664385199546814, -0.0001489901915192604, -0.00013154186308383942, -0.00011409353464841843, -9.664520621299744e-05, -7.919687777757645e-05, -6.174854934215546e-05, -4.4300220906734467e-05, -2.6851892471313477e-05, -9.403564035892487e-06, 8.044764399528503e-06, 2.5493092834949493e-05, 4.2941421270370483e-05, 6.0389749705791473e-05, 7.783807814121246e-05, 9.528640657663345e-05, 0.00011273473501205444, 0.00013018306344747543, 0.00014763139188289642, 0.0001650797203183174, 0.0001825280487537384, 0.0001999763771891594, 0.00021742470562458038, 0.00023487303406000137, 0.00025232136249542236, 0.00026976969093084335, 0.00028721801936626434, 0.00030466634780168533, 0.0003221146762371063, 0.0003395630046725273, 0.0003570113331079483, 0.0003744596615433693, 0.0003919079899787903, 0.0004093563184142113, 0.00042680464684963226, 0.00044425297528505325, 0.00046170130372047424, 0.00047914963215589523, 0.0004965979605913162, 0.0005140462890267372, 0.0005314946174621582]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 4.0, 6.0, 8.0, 9.0, 9.0, 7.0, 12.0, 19.0, 18.0, 28.0, 21.0, 27.0, 32.0, 45.0, 52.0, 55.0, 44.0, 61.0, 53.0, 35.0, 46.0, 50.0, 45.0, 43.0, 37.0, 34.0, 26.0, 26.0, 34.0, 20.0, 18.0, 16.0, 17.0, 8.0, 11.0, 7.0, 5.0, 8.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 6.0, 11.0, 5.0, 10.0, 17.0, 18.0, 21.0, 31.0, 38.0, 53.0, 114.0, 145.0, 253.0, 441.0, 806.0, 1551.0, 3030.0, 5876.0, 11714.0, 25022.0, 63039.0, 204481.0, 423538.0, 198432.0, 61572.0, 24628.0, 11459.0, 5871.0, 2882.0, 1502.0, 857.0, 430.0, 251.0, 164.0, 70.0, 70.0, 48.0, 34.0, 18.0, 17.0, 13.0, 5.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.72265625, -4.5625, -4.40234375, -4.2421875, -4.08203125, -3.921875, -3.76171875, -3.6015625, -3.44140625, -3.28125, -3.12109375, -2.9609375, -2.80078125, -2.640625, -2.48046875, -2.3203125, -2.16015625, -2.0, -1.83984375, -1.6796875, -1.51953125, -1.359375, -1.19921875, -1.0390625, -0.87890625, -0.71875, -0.55859375, -0.3984375, -0.23828125, -0.078125, 0.08203125, 0.2421875, 0.40234375, 0.5625, 0.72265625, 0.8828125, 1.04296875, 1.203125, 1.36328125, 1.5234375, 1.68359375, 1.84375, 2.00390625, 2.1640625, 2.32421875, 2.484375, 2.64453125, 2.8046875, 2.96484375, 3.125, 3.28515625, 3.4453125, 3.60546875, 3.765625, 3.92578125, 4.0859375, 4.24609375, 4.40625, 4.56640625, 4.7265625, 4.88671875, 5.046875, 5.20703125, 5.3671875, 5.52734375]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 6.0, 5.0, 6.0, 2.0, 6.0, 9.0, 13.0, 12.0, 13.0, 20.0, 25.0, 29.0, 34.0, 36.0, 40.0, 48.0, 68.0, 141.0, 251.0, 1511.0, 265.0, 110.0, 73.0, 59.0, 41.0, 27.0, 36.0, 33.0, 29.0, 20.0, 21.0, 17.0, 13.0, 8.0, 8.0, 8.0, 7.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-19.75, -19.1932373046875, -18.636474609375, -18.0797119140625, -17.52294921875, -16.9661865234375, -16.409423828125, -15.8526611328125, -15.2958984375, -14.7391357421875, -14.182373046875, -13.6256103515625, -13.06884765625, -12.5120849609375, -11.955322265625, -11.3985595703125, -10.841796875, -10.2850341796875, -9.728271484375, -9.1715087890625, -8.61474609375, -8.0579833984375, -7.501220703125, -6.9444580078125, -6.3876953125, -5.8309326171875, -5.274169921875, -4.7174072265625, -4.16064453125, -3.6038818359375, -3.047119140625, -2.4903564453125, -1.93359375, -1.3768310546875, -0.820068359375, -0.2633056640625, 0.29345703125, 0.8502197265625, 1.406982421875, 1.9637451171875, 2.5205078125, 3.0772705078125, 3.634033203125, 4.1907958984375, 4.74755859375, 5.3043212890625, 5.861083984375, 6.4178466796875, 6.974609375, 7.5313720703125, 8.088134765625, 8.6448974609375, 9.20166015625, 9.7584228515625, 10.315185546875, 10.8719482421875, 11.4287109375, 11.9854736328125, 12.542236328125, 13.0989990234375, 13.65576171875, 14.2125244140625, 14.769287109375, 15.3260498046875, 15.8828125]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 4.0, 2.0, 4.0, 6.0, 13.0, 14.0, 16.0, 21.0, 29.0, 36.0, 39.0, 77.0, 112.0, 186.0, 271.0, 562.0, 2331.0, 135086.0, 2992731.0, 12180.0, 952.0, 372.0, 226.0, 116.0, 81.0, 64.0, 49.0, 28.0, 24.0, 19.0, 12.0, 9.0, 9.0, 5.0, 5.0, 6.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.65625, -39.31298828125, -37.9697265625, -36.62646484375, -35.283203125, -33.93994140625, -32.5966796875, -31.25341796875, -29.91015625, -28.56689453125, -27.2236328125, -25.88037109375, -24.537109375, -23.19384765625, -21.8505859375, -20.50732421875, -19.1640625, -17.82080078125, -16.4775390625, -15.13427734375, -13.791015625, -12.44775390625, -11.1044921875, -9.76123046875, -8.41796875, -7.07470703125, -5.7314453125, -4.38818359375, -3.044921875, -1.70166015625, -0.3583984375, 0.98486328125, 2.328125, 3.67138671875, 5.0146484375, 6.35791015625, 7.701171875, 9.04443359375, 10.3876953125, 11.73095703125, 13.07421875, 14.41748046875, 15.7607421875, 17.10400390625, 18.447265625, 19.79052734375, 21.1337890625, 22.47705078125, 23.8203125, 25.16357421875, 26.5068359375, 27.85009765625, 29.193359375, 30.53662109375, 31.8798828125, 33.22314453125, 34.56640625, 35.90966796875, 37.2529296875, 38.59619140625, 39.939453125, 41.28271484375, 42.6259765625, 43.96923828125, 45.3125]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 9.0, 34.0, 122.0, 303.0, 315.0, 181.0, 45.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.525617599487305, -26.355003356933594, -24.18438720703125, -22.01377296447754, -19.843158721923828, -17.672542572021484, -15.501928329467773, -13.331313133239746, -11.160697937011719, -8.990082740783691, -6.819468021392822, -4.648853302001953, -2.478238105773926, -0.30762290954589844, 1.8629913330078125, 4.03360652923584, 6.204221725463867, 8.374836921691895, 10.545452117919922, 12.716066360473633, 14.88668155670166, 17.057296752929688, 19.2279109954834, 21.39852523803711, 23.569141387939453, 25.739755630493164, 27.910371780395508, 30.08098602294922, 32.25160217285156, 34.422218322753906, 36.592830657958984, 38.76344680786133, 40.93406677246094, 43.10468292236328, 45.27529525756836, 47.4459114074707, 49.61652755737305, 51.787139892578125, 53.95775604248047, 56.12837219238281, 58.298988342285156, 60.4696044921875, 62.64021682739258, 64.81083679199219, 66.9814453125, 69.15206146240234, 71.32267761230469, 73.49329376220703, 75.66390991210938, 77.83452606201172, 80.00514221191406, 82.17575073242188, 84.34636688232422, 86.51698303222656, 88.6875991821289, 90.85821533203125, 93.02882385253906, 95.1994400024414, 97.37005615234375, 99.54066467285156, 101.7112808227539, 103.88189697265625, 106.0525131225586, 108.22312927246094, 110.39374542236328]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 4.0, 5.0, 8.0, 8.0, 12.0, 16.0, 12.0, 14.0, 19.0, 26.0, 18.0, 29.0, 28.0, 36.0, 32.0, 35.0, 38.0, 44.0, 46.0, 39.0, 37.0, 37.0, 47.0, 45.0, 37.0, 42.0, 29.0, 33.0, 30.0, 31.0, 29.0, 25.0, 15.0, 18.0, 13.0, 15.0, 13.0, 12.0, 7.0, 4.0, 7.0, 2.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-39.598392486572266, -38.39763641357422, -37.19688034057617, -35.99612045288086, -34.79536437988281, -33.594608306884766, -32.39385223388672, -31.193096160888672, -29.992338180541992, -28.791582107543945, -27.590824127197266, -26.39006805419922, -25.189311981201172, -23.988554000854492, -22.787797927856445, -21.587039947509766, -20.38628387451172, -19.185527801513672, -17.984769821166992, -16.784013748168945, -15.583256721496582, -14.382499694824219, -13.181743621826172, -11.980986595153809, -10.780229568481445, -9.579472541809082, -8.378715515136719, -7.177959442138672, -5.977202415466309, -4.776445388793945, -3.5756888389587402, -2.374932289123535, -1.1741714477539062, 0.02658534049987793, 1.227342128753662, 2.4280989170074463, 3.6288557052612305, 4.829612731933594, 6.030369281768799, 7.231125831604004, 8.431882858276367, 9.63263988494873, 10.833396911621094, 12.03415298461914, 13.234910011291504, 14.435667037963867, 15.636423110961914, 16.837181091308594, 18.03793716430664, 19.238693237304688, 20.439451217651367, 21.640207290649414, 22.840965270996094, 24.04172134399414, 25.242477416992188, 26.443233489990234, 27.643991470336914, 28.84474754333496, 30.04550552368164, 31.246261596679688, 32.447017669677734, 33.64777374267578, 34.848533630371094, 36.04928970336914, 37.25004577636719]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 6.0, 10.0, 5.0, 11.0, 2.0, 7.0, 18.0, 18.0, 22.0, 40.0, 26.0, 31.0, 41.0, 40.0, 45.0, 52.0, 37.0, 51.0, 67.0, 44.0, 49.0, 42.0, 31.0, 41.0, 47.0, 31.0, 34.0, 22.0, 27.0, 18.0, 20.0, 13.0, 8.0, 15.0, 10.0, 9.0, 3.0, 6.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.17578125, -5.01776123046875, -4.8597412109375, -4.70172119140625, -4.543701171875, -4.38568115234375, -4.2276611328125, -4.06964111328125, -3.91162109375, -3.75360107421875, -3.5955810546875, -3.43756103515625, -3.279541015625, -3.12152099609375, -2.9635009765625, -2.80548095703125, -2.6474609375, -2.48944091796875, -2.3314208984375, -2.17340087890625, -2.015380859375, -1.85736083984375, -1.6993408203125, -1.54132080078125, -1.38330078125, -1.22528076171875, -1.0672607421875, -0.90924072265625, -0.751220703125, -0.59320068359375, -0.4351806640625, -0.27716064453125, -0.119140625, 0.03887939453125, 0.1968994140625, 0.35491943359375, 0.512939453125, 0.67095947265625, 0.8289794921875, 0.98699951171875, 1.14501953125, 1.30303955078125, 1.4610595703125, 1.61907958984375, 1.777099609375, 1.93511962890625, 2.0931396484375, 2.25115966796875, 2.4091796875, 2.56719970703125, 2.7252197265625, 2.88323974609375, 3.041259765625, 3.19927978515625, 3.3572998046875, 3.51531982421875, 3.67333984375, 3.83135986328125, 3.9893798828125, 4.14739990234375, 4.305419921875, 4.46343994140625, 4.6214599609375, 4.77947998046875, 4.9375]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 4.0, 4.0, 8.0, 9.0, 17.0, 13.0, 14.0, 16.0, 26.0, 31.0, 37.0, 54.0, 46.0, 75.0, 109.0, 142.0, 343.0, 1062.0, 9354.0, 375798.0, 3552837.0, 245623.0, 6969.0, 860.0, 264.0, 125.0, 103.0, 63.0, 63.0, 33.0, 38.0, 35.0, 30.0, 22.0, 13.0, 8.0, 11.0, 7.0, 8.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.4375, -20.796630859375, -20.15576171875, -19.514892578125, -18.8740234375, -18.233154296875, -17.59228515625, -16.951416015625, -16.310546875, -15.669677734375, -15.02880859375, -14.387939453125, -13.7470703125, -13.106201171875, -12.46533203125, -11.824462890625, -11.18359375, -10.542724609375, -9.90185546875, -9.260986328125, -8.6201171875, -7.979248046875, -7.33837890625, -6.697509765625, -6.056640625, -5.415771484375, -4.77490234375, -4.134033203125, -3.4931640625, -2.852294921875, -2.21142578125, -1.570556640625, -0.9296875, -0.288818359375, 0.35205078125, 0.992919921875, 1.6337890625, 2.274658203125, 2.91552734375, 3.556396484375, 4.197265625, 4.838134765625, 5.47900390625, 6.119873046875, 6.7607421875, 7.401611328125, 8.04248046875, 8.683349609375, 9.32421875, 9.965087890625, 10.60595703125, 11.246826171875, 11.8876953125, 12.528564453125, 13.16943359375, 13.810302734375, 14.451171875, 15.092041015625, 15.73291015625, 16.373779296875, 17.0146484375, 17.655517578125, 18.29638671875, 18.937255859375, 19.578125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 5.0, 7.0, 12.0, 18.0, 26.0, 51.0, 73.0, 128.0, 215.0, 385.0, 580.0, 738.0, 704.0, 430.0, 297.0, 174.0, 109.0, 59.0, 26.0, 16.0, 16.0, 6.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.765625, -13.185302734375, -12.60498046875, -12.024658203125, -11.4443359375, -10.864013671875, -10.28369140625, -9.703369140625, -9.123046875, -8.542724609375, -7.96240234375, -7.382080078125, -6.8017578125, -6.221435546875, -5.64111328125, -5.060791015625, -4.48046875, -3.900146484375, -3.31982421875, -2.739501953125, -2.1591796875, -1.578857421875, -0.99853515625, -0.418212890625, 0.162109375, 0.742431640625, 1.32275390625, 1.903076171875, 2.4833984375, 3.063720703125, 3.64404296875, 4.224365234375, 4.8046875, 5.385009765625, 5.96533203125, 6.545654296875, 7.1259765625, 7.706298828125, 8.28662109375, 8.866943359375, 9.447265625, 10.027587890625, 10.60791015625, 11.188232421875, 11.7685546875, 12.348876953125, 12.92919921875, 13.509521484375, 14.08984375, 14.670166015625, 15.25048828125, 15.830810546875, 16.4111328125, 16.991455078125, 17.57177734375, 18.152099609375, 18.732421875, 19.312744140625, 19.89306640625, 20.473388671875, 21.0537109375, 21.634033203125, 22.21435546875, 22.794677734375, 23.375]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 5.0, 13.0, 16.0, 27.0, 52.0, 83.0, 193.0, 389.0, 867.0, 8311.0, 2760090.0, 1417664.0, 5061.0, 791.0, 330.0, 169.0, 102.0, 55.0, 21.0, 23.0, 11.0, 2.0, 8.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-65.4375, -63.8388671875, -62.240234375, -60.6416015625, -59.04296875, -57.4443359375, -55.845703125, -54.2470703125, -52.6484375, -51.0498046875, -49.451171875, -47.8525390625, -46.25390625, -44.6552734375, -43.056640625, -41.4580078125, -39.859375, -38.2607421875, -36.662109375, -35.0634765625, -33.46484375, -31.8662109375, -30.267578125, -28.6689453125, -27.0703125, -25.4716796875, -23.873046875, -22.2744140625, -20.67578125, -19.0771484375, -17.478515625, -15.8798828125, -14.28125, -12.6826171875, -11.083984375, -9.4853515625, -7.88671875, -6.2880859375, -4.689453125, -3.0908203125, -1.4921875, 0.1064453125, 1.705078125, 3.3037109375, 4.90234375, 6.5009765625, 8.099609375, 9.6982421875, 11.296875, 12.8955078125, 14.494140625, 16.0927734375, 17.69140625, 19.2900390625, 20.888671875, 22.4873046875, 24.0859375, 25.6845703125, 27.283203125, 28.8818359375, 30.48046875, 32.0791015625, 33.677734375, 35.2763671875, 36.875]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 6.0, 11.0, 13.0, 23.0, 25.0, 36.0, 50.0, 51.0, 64.0, 60.0, 82.0, 86.0, 70.0, 74.0, 77.0, 63.0, 51.0, 27.0, 33.0, 32.0, 20.0, 12.0, 13.0, 10.0, 7.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.672157287597656, -34.4562873840332, -33.240421295166016, -32.02455139160156, -30.80868148803711, -29.59281349182129, -28.37694549560547, -27.161075592041016, -25.945207595825195, -24.729339599609375, -23.513469696044922, -22.2976016998291, -21.08173370361328, -19.865863800048828, -18.649995803833008, -17.434127807617188, -16.218257904052734, -15.002388954162598, -13.786520004272461, -12.57065200805664, -11.354783058166504, -10.138914108276367, -8.923046112060547, -7.70717716217041, -6.491308212280273, -5.275439262390137, -4.059570789337158, -2.8437020778656006, -1.627833366394043, -0.41196441650390625, 0.8039040565490723, 2.019772529602051, 3.235645294189453, 4.45151424407959, 5.667382717132568, 6.883251190185547, 8.099120140075684, 9.31498908996582, 10.53085708618164, 11.746726036071777, 12.962594985961914, 14.17846393585205, 15.394332885742188, 16.610200881958008, 17.826068878173828, 19.04193878173828, 20.2578067779541, 21.473674774169922, 22.689544677734375, 23.905412673950195, 25.12128257751465, 26.33715057373047, 27.553020477294922, 28.768888473510742, 29.984756469726562, 31.200626373291016, 32.41649627685547, 33.63236618041992, 34.84823226928711, 36.06410217285156, 37.279972076416016, 38.49584197998047, 39.711708068847656, 40.92757797241211, 42.1434440612793]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 11.0, 9.0, 12.0, 23.0, 14.0, 21.0, 17.0, 27.0, 18.0, 37.0, 33.0, 36.0, 26.0, 34.0, 51.0, 33.0, 46.0, 38.0, 37.0, 43.0, 49.0, 43.0, 28.0, 41.0, 35.0, 31.0, 22.0, 35.0, 23.0, 20.0, 23.0, 12.0, 12.0, 6.0, 9.0, 12.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-37.93120574951172, -36.79109191894531, -35.650978088378906, -34.5108642578125, -33.370750427246094, -32.23063659667969, -31.090524673461914, -29.950410842895508, -28.8102970123291, -27.670183181762695, -26.53006935119629, -25.389955520629883, -24.24984359741211, -23.109729766845703, -21.969615936279297, -20.82950210571289, -19.689388275146484, -18.549274444580078, -17.409160614013672, -16.269046783447266, -15.128933906555176, -13.98882007598877, -12.84870719909668, -11.708593368530273, -10.568479537963867, -9.428365707397461, -8.288251876831055, -7.148138999938965, -6.008025169372559, -4.867911338806152, -3.7277979850769043, -2.5876846313476562, -1.4475746154785156, -0.3074610233306885, 0.8326525688171387, 1.9727661609649658, 3.112879753112793, 4.252993583679199, 5.393106937408447, 6.533220291137695, 7.673334121704102, 8.813447952270508, 9.953561782836914, 11.093674659729004, 12.23378849029541, 13.373902320861816, 14.514015197753906, 15.654129028320312, 16.79424285888672, 17.934356689453125, 19.07447052001953, 20.214584350585938, 21.354698181152344, 22.49481201171875, 23.634923934936523, 24.77503776550293, 25.915151596069336, 27.055265426635742, 28.19537925720215, 29.335493087768555, 30.475605010986328, 31.615718841552734, 32.75583267211914, 33.89594650268555, 35.03606033325195]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 9.0, 8.0, 8.0, 9.0, 9.0, 14.0, 14.0, 25.0, 23.0, 34.0, 46.0, 46.0, 40.0, 46.0, 50.0, 55.0, 39.0, 36.0, 46.0, 54.0, 41.0, 44.0, 35.0, 32.0, 48.0, 36.0, 23.0, 20.0, 20.0, 18.0, 15.0, 12.0, 4.0, 9.0, 4.0, 12.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.16796875, -5.00970458984375, -4.8514404296875, -4.69317626953125, -4.534912109375, -4.37664794921875, -4.2183837890625, -4.06011962890625, -3.90185546875, -3.74359130859375, -3.5853271484375, -3.42706298828125, -3.268798828125, -3.11053466796875, -2.9522705078125, -2.79400634765625, -2.6357421875, -2.47747802734375, -2.3192138671875, -2.16094970703125, -2.002685546875, -1.84442138671875, -1.6861572265625, -1.52789306640625, -1.36962890625, -1.21136474609375, -1.0531005859375, -0.89483642578125, -0.736572265625, -0.57830810546875, -0.4200439453125, -0.26177978515625, -0.103515625, 0.05474853515625, 0.2130126953125, 0.37127685546875, 0.529541015625, 0.68780517578125, 0.8460693359375, 1.00433349609375, 1.16259765625, 1.32086181640625, 1.4791259765625, 1.63739013671875, 1.795654296875, 1.95391845703125, 2.1121826171875, 2.27044677734375, 2.4287109375, 2.58697509765625, 2.7452392578125, 2.90350341796875, 3.061767578125, 3.22003173828125, 3.3782958984375, 3.53656005859375, 3.69482421875, 3.85308837890625, 4.0113525390625, 4.16961669921875, 4.327880859375, 4.48614501953125, 4.6444091796875, 4.80267333984375, 4.9609375]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 5.0, 10.0, 10.0, 15.0, 21.0, 45.0, 59.0, 86.0, 149.0, 220.0, 326.0, 541.0, 844.0, 1331.0, 2026.0, 3424.0, 5282.0, 8457.0, 13616.0, 22628.0, 36920.0, 59712.0, 95604.0, 147922.0, 196096.0, 164943.0, 108651.0, 68596.0, 42749.0, 26209.0, 16124.0, 9801.0, 6028.0, 3671.0, 2369.0, 1479.0, 919.0, 600.0, 388.0, 237.0, 153.0, 106.0, 68.0, 39.0, 23.0, 27.0, 14.0, 12.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.35888671875, -0.34717559814453125, -0.3354644775390625, -0.32375335693359375, -0.312042236328125, -0.30033111572265625, -0.2886199951171875, -0.27690887451171875, -0.26519775390625, -0.25348663330078125, -0.2417755126953125, -0.23006439208984375, -0.218353271484375, -0.20664215087890625, -0.1949310302734375, -0.18321990966796875, -0.1715087890625, -0.15979766845703125, -0.1480865478515625, -0.13637542724609375, -0.124664306640625, -0.11295318603515625, -0.1012420654296875, -0.08953094482421875, -0.07781982421875, -0.06610870361328125, -0.0543975830078125, -0.04268646240234375, -0.030975341796875, -0.01926422119140625, -0.0075531005859375, 0.00415802001953125, 0.015869140625, 0.02758026123046875, 0.0392913818359375, 0.05100250244140625, 0.062713623046875, 0.07442474365234375, 0.0861358642578125, 0.09784698486328125, 0.10955810546875, 0.12126922607421875, 0.1329803466796875, 0.14469146728515625, 0.156402587890625, 0.16811370849609375, 0.1798248291015625, 0.19153594970703125, 0.2032470703125, 0.21495819091796875, 0.2266693115234375, 0.23838043212890625, 0.250091552734375, 0.26180267333984375, 0.2735137939453125, 0.28522491455078125, 0.29693603515625, 0.30864715576171875, 0.3203582763671875, 0.33206939697265625, 0.343780517578125, 0.35549163818359375, 0.3672027587890625, 0.37891387939453125, 0.390625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 3.0, 3.0, 4.0, 3.0, 8.0, 5.0, 9.0, 5.0, 9.0, 12.0, 17.0, 21.0, 22.0, 26.0, 33.0, 33.0, 29.0, 27.0, 31.0, 39.0, 40.0, 38.0, 40.0, 46.0, 1057.0, 43.0, 45.0, 31.0, 33.0, 47.0, 37.0, 27.0, 23.0, 22.0, 25.0, 21.0, 19.0, 15.0, 9.0, 11.0, 11.0, 13.0, 11.0, 6.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.578125, -2.492950439453125, -2.40777587890625, -2.322601318359375, -2.2374267578125, -2.152252197265625, -2.06707763671875, -1.981903076171875, -1.896728515625, -1.811553955078125, -1.72637939453125, -1.641204833984375, -1.5560302734375, -1.470855712890625, -1.38568115234375, -1.300506591796875, -1.21533203125, -1.130157470703125, -1.04498291015625, -0.959808349609375, -0.8746337890625, -0.789459228515625, -0.70428466796875, -0.619110107421875, -0.533935546875, -0.448760986328125, -0.36358642578125, -0.278411865234375, -0.1932373046875, -0.108062744140625, -0.02288818359375, 0.062286376953125, 0.1474609375, 0.232635498046875, 0.31781005859375, 0.402984619140625, 0.4881591796875, 0.573333740234375, 0.65850830078125, 0.743682861328125, 0.828857421875, 0.914031982421875, 0.99920654296875, 1.084381103515625, 1.1695556640625, 1.254730224609375, 1.33990478515625, 1.425079345703125, 1.51025390625, 1.595428466796875, 1.68060302734375, 1.765777587890625, 1.8509521484375, 1.936126708984375, 2.02130126953125, 2.106475830078125, 2.191650390625, 2.276824951171875, 2.36199951171875, 2.447174072265625, 2.5323486328125, 2.617523193359375, 2.70269775390625, 2.787872314453125, 2.873046875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 6.0, 4.0, 10.0, 13.0, 19.0, 34.0, 35.0, 60.0, 99.0, 123.0, 218.0, 298.0, 471.0, 633.0, 974.0, 1500.0, 2274.0, 3240.0, 4828.0, 7398.0, 10955.0, 16239.0, 24049.0, 35991.0, 53282.0, 79037.0, 114201.0, 151178.0, 1200487.0, 122739.0, 87554.0, 58947.0, 39726.0, 26355.0, 17846.0, 11899.0, 7986.0, 5326.0, 3643.0, 2457.0, 1623.0, 1055.0, 797.0, 552.0, 296.0, 234.0, 157.0, 96.0, 62.0, 35.0, 37.0, 19.0, 19.0, 8.0, 10.0, 6.0, 3.0, 2.0, 1.0], "bins": [-0.261962890625, -0.25405311584472656, -0.24614334106445312, -0.2382335662841797, -0.23032379150390625, -0.2224140167236328, -0.21450424194335938, -0.20659446716308594, -0.1986846923828125, -0.19077491760253906, -0.18286514282226562, -0.1749553680419922, -0.16704559326171875, -0.1591358184814453, -0.15122604370117188, -0.14331626892089844, -0.135406494140625, -0.12749671936035156, -0.11958694458007812, -0.11167716979980469, -0.10376739501953125, -0.09585762023925781, -0.08794784545898438, -0.08003807067871094, -0.0721282958984375, -0.06421852111816406, -0.056308746337890625, -0.04839897155761719, -0.04048919677734375, -0.03257942199707031, -0.024669647216796875, -0.016759872436523438, -0.00885009765625, -0.0009403228759765625, 0.006969451904296875, 0.014879226684570312, 0.02278900146484375, 0.030698776245117188, 0.038608551025390625, 0.04651832580566406, 0.0544281005859375, 0.06233787536621094, 0.07024765014648438, 0.07815742492675781, 0.08606719970703125, 0.09397697448730469, 0.10188674926757812, 0.10979652404785156, 0.117706298828125, 0.12561607360839844, 0.13352584838867188, 0.1414356231689453, 0.14934539794921875, 0.1572551727294922, 0.16516494750976562, 0.17307472229003906, 0.1809844970703125, 0.18889427185058594, 0.19680404663085938, 0.2047138214111328, 0.21262359619140625, 0.2205333709716797, 0.22844314575195312, 0.23635292053222656, 0.2442626953125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 5.0, 9.0, 8.0, 6.0, 8.0, 6.0, 11.0, 17.0, 16.0, 22.0, 17.0, 27.0, 39.0, 26.0, 39.0, 35.0, 41.0, 41.0, 53.0, 53.0, 58.0, 42.0, 41.0, 48.0, 58.0, 38.0, 40.0, 37.0, 36.0, 21.0, 18.0, 13.0, 9.0, 13.0, 17.0, 10.0, 1.0, 4.0, 5.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.000743865966796875, -0.0007175654172897339, -0.0006912648677825928, -0.0006649643182754517, -0.0006386637687683105, -0.0006123632192611694, -0.0005860626697540283, -0.0005597621202468872, -0.0005334615707397461, -0.000507161021232605, -0.00048086047172546387, -0.00045455992221832275, -0.00042825937271118164, -0.00040195882320404053, -0.0003756582736968994, -0.0003493577241897583, -0.0003230571746826172, -0.0002967566251754761, -0.00027045607566833496, -0.00024415552616119385, -0.00021785497665405273, -0.00019155442714691162, -0.0001652538776397705, -0.0001389533281326294, -0.00011265277862548828, -8.635222911834717e-05, -6.0051679611206055e-05, -3.375113010406494e-05, -7.450580596923828e-06, 1.8849968910217285e-05, 4.51505184173584e-05, 7.145106792449951e-05, 9.775161743164062e-05, 0.00012405216693878174, 0.00015035271644592285, 0.00017665326595306396, 0.00020295381546020508, 0.0002292543649673462, 0.0002555549144744873, 0.0002818554639816284, 0.00030815601348876953, 0.00033445656299591064, 0.00036075711250305176, 0.00038705766201019287, 0.000413358211517334, 0.0004396587610244751, 0.0004659593105316162, 0.0004922598600387573, 0.0005185604095458984, 0.0005448609590530396, 0.0005711615085601807, 0.0005974620580673218, 0.0006237626075744629, 0.000650063157081604, 0.0006763637065887451, 0.0007026642560958862, 0.0007289648056030273, 0.0007552653551101685, 0.0007815659046173096, 0.0008078664541244507, 0.0008341670036315918, 0.0008604675531387329, 0.000886768102645874, 0.0009130686521530151, 0.0009393692016601562]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 5.0, 3.0, 2.0, 5.0, 9.0, 11.0, 13.0, 14.0, 25.0, 31.0, 34.0, 40.0, 67.0, 106.0, 156.0, 202.0, 311.0, 511.0, 1421.0, 37662.0, 946986.0, 57628.0, 1739.0, 540.0, 318.0, 183.0, 125.0, 103.0, 68.0, 44.0, 45.0, 30.0, 21.0, 21.0, 26.0, 13.0, 6.0, 8.0, 11.0, 6.0, 0.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01715087890625, -0.016654372215270996, -0.016157865524291992, -0.01566135883331299, -0.015164852142333984, -0.01466834545135498, -0.014171838760375977, -0.013675332069396973, -0.013178825378417969, -0.012682318687438965, -0.012185811996459961, -0.011689305305480957, -0.011192798614501953, -0.01069629192352295, -0.010199785232543945, -0.009703278541564941, -0.009206771850585938, -0.008710265159606934, -0.00821375846862793, -0.007717251777648926, -0.007220745086669922, -0.006724238395690918, -0.006227731704711914, -0.00573122501373291, -0.005234718322753906, -0.004738211631774902, -0.0042417049407958984, -0.0037451982498168945, -0.0032486915588378906, -0.0027521848678588867, -0.002255678176879883, -0.001759171485900879, -0.001262664794921875, -0.0007661581039428711, -0.0002696514129638672, 0.00022685527801513672, 0.0007233619689941406, 0.0012198686599731445, 0.0017163753509521484, 0.0022128820419311523, 0.0027093887329101562, 0.00320589542388916, 0.003702402114868164, 0.004198908805847168, 0.004695415496826172, 0.005191922187805176, 0.00568842887878418, 0.006184935569763184, 0.0066814422607421875, 0.007177948951721191, 0.007674455642700195, 0.0081709623336792, 0.008667469024658203, 0.009163975715637207, 0.009660482406616211, 0.010156989097595215, 0.010653495788574219, 0.011150002479553223, 0.011646509170532227, 0.01214301586151123, 0.012639522552490234, 0.013136029243469238, 0.013632535934448242, 0.014129042625427246, 0.01462554931640625]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 12.0, 21.0, 36.0, 73.0, 91.0, 151.0, 152.0, 155.0, 107.0, 87.0, 50.0, 25.0, 20.0, 8.0, 8.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012561699841171503, -0.0012219747295603156, -0.0011877795914188027, -0.001153584336861968, -0.0011193891987204552, -0.0010851939441636205, -0.0010509986896067858, -0.001016803551465273, -0.0009826082969084382, -0.0009484131005592644, -0.0009142179042100906, -0.0008800226496532559, -0.0008458274533040822, -0.0008116322569549084, -0.0007774370606057346, -0.0007432418642565608, -0.000709046667907387, -0.0006748514715582132, -0.0006406562752090394, -0.0006064610788598657, -0.000572265824303031, -0.0005380706279538572, -0.0005038754316046834, -0.0004696802352555096, -0.0004354850098025054, -0.0004012898134533316, -0.00036709458800032735, -0.00033289939165115356, -0.0002987041953019798, -0.00026450896984897554, -0.00023031377349980175, -0.00019611856259871274, -0.00016192340990528464, -0.00012772819900419563, -9.353299537906423e-05, -5.9337791753932834e-05, -2.514258085284382e-05, 9.052630048245192e-06, 4.3247826397418976e-05, 7.744303729850799e-05, 0.000111638248199597, 0.00014583345910068601, 0.00018002867000177503, 0.0002142238663509488, 0.0002484190627001226, 0.00028261428815312684, 0.0003168094845023006, 0.00035100470995530486, 0.00038519990630447865, 0.00041939510265365243, 0.00045359032810665667, 0.00048778552445583045, 0.0005219807499088347, 0.0005561759462580085, 0.0005903711426071823, 0.000624566338956356, 0.0006587615935131907, 0.0006929567898623645, 0.0007271519862115383, 0.000761347240768373, 0.0007955424371175468, 0.0008297376334667206, 0.0008639328298158944, 0.0008981280261650681, 0.0009323232225142419]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 3.0, 4.0, 6.0, 5.0, 14.0, 6.0, 13.0, 16.0, 13.0, 14.0, 12.0, 24.0, 21.0, 36.0, 21.0, 26.0, 26.0, 36.0, 36.0, 38.0, 38.0, 51.0, 33.0, 49.0, 37.0, 38.0, 40.0, 39.0, 33.0, 33.0, 30.0, 27.0, 24.0, 29.0, 22.0, 26.0, 14.0, 15.0, 9.0, 12.0, 10.0, 3.0, 5.0, 3.0, 5.0, 8.0, 3.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.00047218799591064453, -0.0004574321210384369, -0.00044267624616622925, -0.0004279203712940216, -0.00041316449642181396, -0.0003984086215496063, -0.0003836527466773987, -0.00036889687180519104, -0.0003541409969329834, -0.00033938512206077576, -0.0003246292471885681, -0.0003098733723163605, -0.00029511749744415283, -0.0002803616225719452, -0.00026560574769973755, -0.0002508498728275299, -0.00023609399795532227, -0.00022133812308311462, -0.00020658224821090698, -0.00019182637333869934, -0.0001770704984664917, -0.00016231462359428406, -0.00014755874872207642, -0.00013280287384986877, -0.00011804699897766113, -0.00010329112410545349, -8.853524923324585e-05, -7.377937436103821e-05, -5.9023499488830566e-05, -4.4267624616622925e-05, -2.9511749744415283e-05, -1.4755874872207642e-05, 0.0, 1.4755874872207642e-05, 2.9511749744415283e-05, 4.4267624616622925e-05, 5.9023499488830566e-05, 7.377937436103821e-05, 8.853524923324585e-05, 0.00010329112410545349, 0.00011804699897766113, 0.00013280287384986877, 0.00014755874872207642, 0.00016231462359428406, 0.0001770704984664917, 0.00019182637333869934, 0.00020658224821090698, 0.00022133812308311462, 0.00023609399795532227, 0.0002508498728275299, 0.00026560574769973755, 0.0002803616225719452, 0.00029511749744415283, 0.0003098733723163605, 0.0003246292471885681, 0.00033938512206077576, 0.0003541409969329834, 0.00036889687180519104, 0.0003836527466773987, 0.0003984086215496063, 0.00041316449642181396, 0.0004279203712940216, 0.00044267624616622925, 0.0004574321210384369, 0.00047218799591064453]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 9.0, 8.0, 8.0, 9.0, 9.0, 14.0, 14.0, 25.0, 23.0, 34.0, 46.0, 46.0, 40.0, 46.0, 50.0, 55.0, 39.0, 36.0, 46.0, 54.0, 41.0, 44.0, 35.0, 32.0, 48.0, 36.0, 23.0, 20.0, 20.0, 18.0, 15.0, 12.0, 4.0, 9.0, 4.0, 12.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.16796875, -5.00970458984375, -4.8514404296875, -4.69317626953125, -4.534912109375, -4.37664794921875, -4.2183837890625, -4.06011962890625, -3.90185546875, -3.74359130859375, -3.5853271484375, -3.42706298828125, -3.268798828125, -3.11053466796875, -2.9522705078125, -2.79400634765625, -2.6357421875, -2.47747802734375, -2.3192138671875, -2.16094970703125, -2.002685546875, -1.84442138671875, -1.6861572265625, -1.52789306640625, -1.36962890625, -1.21136474609375, -1.0531005859375, -0.89483642578125, -0.736572265625, -0.57830810546875, -0.4200439453125, -0.26177978515625, -0.103515625, 0.05474853515625, 0.2130126953125, 0.37127685546875, 0.529541015625, 0.68780517578125, 0.8460693359375, 1.00433349609375, 1.16259765625, 1.32086181640625, 1.4791259765625, 1.63739013671875, 1.795654296875, 1.95391845703125, 2.1121826171875, 2.27044677734375, 2.4287109375, 2.58697509765625, 2.7452392578125, 2.90350341796875, 3.061767578125, 3.22003173828125, 3.3782958984375, 3.53656005859375, 3.69482421875, 3.85308837890625, 4.0113525390625, 4.16961669921875, 4.327880859375, 4.48614501953125, 4.6444091796875, 4.80267333984375, 4.9609375]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 4.0, 2.0, 9.0, 7.0, 21.0, 30.0, 28.0, 38.0, 76.0, 90.0, 168.0, 240.0, 425.0, 739.0, 1494.0, 2794.0, 5419.0, 10302.0, 19674.0, 37582.0, 79369.0, 200567.0, 358969.0, 182696.0, 73334.0, 35331.0, 18432.0, 9734.0, 5196.0, 2689.0, 1357.0, 730.0, 406.0, 216.0, 114.0, 87.0, 57.0, 35.0, 22.0, 10.0, 16.0, 13.0, 6.0, 4.0, 3.0, 4.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.03125, -3.903076171875, -3.77490234375, -3.646728515625, -3.5185546875, -3.390380859375, -3.26220703125, -3.134033203125, -3.005859375, -2.877685546875, -2.74951171875, -2.621337890625, -2.4931640625, -2.364990234375, -2.23681640625, -2.108642578125, -1.98046875, -1.852294921875, -1.72412109375, -1.595947265625, -1.4677734375, -1.339599609375, -1.21142578125, -1.083251953125, -0.955078125, -0.826904296875, -0.69873046875, -0.570556640625, -0.4423828125, -0.314208984375, -0.18603515625, -0.057861328125, 0.0703125, 0.198486328125, 0.32666015625, 0.454833984375, 0.5830078125, 0.711181640625, 0.83935546875, 0.967529296875, 1.095703125, 1.223876953125, 1.35205078125, 1.480224609375, 1.6083984375, 1.736572265625, 1.86474609375, 1.992919921875, 2.12109375, 2.249267578125, 2.37744140625, 2.505615234375, 2.6337890625, 2.761962890625, 2.89013671875, 3.018310546875, 3.146484375, 3.274658203125, 3.40283203125, 3.531005859375, 3.6591796875, 3.787353515625, 3.91552734375, 4.043701171875, 4.171875]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 7.0, 5.0, 9.0, 13.0, 14.0, 13.0, 14.0, 20.0, 23.0, 28.0, 33.0, 34.0, 40.0, 41.0, 53.0, 39.0, 64.0, 100.0, 166.0, 1510.0, 228.0, 127.0, 56.0, 53.0, 46.0, 43.0, 30.0, 41.0, 35.0, 29.0, 23.0, 22.0, 19.0, 19.0, 9.0, 10.0, 6.0, 9.0, 3.0, 7.0, 3.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.1875, -17.6474609375, -17.107421875, -16.5673828125, -16.02734375, -15.4873046875, -14.947265625, -14.4072265625, -13.8671875, -13.3271484375, -12.787109375, -12.2470703125, -11.70703125, -11.1669921875, -10.626953125, -10.0869140625, -9.546875, -9.0068359375, -8.466796875, -7.9267578125, -7.38671875, -6.8466796875, -6.306640625, -5.7666015625, -5.2265625, -4.6865234375, -4.146484375, -3.6064453125, -3.06640625, -2.5263671875, -1.986328125, -1.4462890625, -0.90625, -0.3662109375, 0.173828125, 0.7138671875, 1.25390625, 1.7939453125, 2.333984375, 2.8740234375, 3.4140625, 3.9541015625, 4.494140625, 5.0341796875, 5.57421875, 6.1142578125, 6.654296875, 7.1943359375, 7.734375, 8.2744140625, 8.814453125, 9.3544921875, 9.89453125, 10.4345703125, 10.974609375, 11.5146484375, 12.0546875, 12.5947265625, 13.134765625, 13.6748046875, 14.21484375, 14.7548828125, 15.294921875, 15.8349609375, 16.375]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 3.0, 6.0, 7.0, 13.0, 18.0, 18.0, 59.0, 99.0, 211.0, 586.0, 8588.0, 3132816.0, 2477.0, 411.0, 170.0, 84.0, 46.0, 31.0, 27.0, 9.0, 11.0, 7.0, 2.0, 4.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.3125, -113.1748046875, -109.037109375, -104.8994140625, -100.76171875, -96.6240234375, -92.486328125, -88.3486328125, -84.2109375, -80.0732421875, -75.935546875, -71.7978515625, -67.66015625, -63.5224609375, -59.384765625, -55.2470703125, -51.109375, -46.9716796875, -42.833984375, -38.6962890625, -34.55859375, -30.4208984375, -26.283203125, -22.1455078125, -18.0078125, -13.8701171875, -9.732421875, -5.5947265625, -1.45703125, 2.6806640625, 6.818359375, 10.9560546875, 15.09375, 19.2314453125, 23.369140625, 27.5068359375, 31.64453125, 35.7822265625, 39.919921875, 44.0576171875, 48.1953125, 52.3330078125, 56.470703125, 60.6083984375, 64.74609375, 68.8837890625, 73.021484375, 77.1591796875, 81.296875, 85.4345703125, 89.572265625, 93.7099609375, 97.84765625, 101.9853515625, 106.123046875, 110.2607421875, 114.3984375, 118.5361328125, 122.673828125, 126.8115234375, 130.94921875, 135.0869140625, 139.224609375, 143.3623046875, 147.5]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1010.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.05943298339844, -64.73582458496094, -46.41221237182617, -28.088600158691406, -9.764991760253906, 8.558616638183594, 26.882232666015625, 45.205841064453125, 63.529449462890625, 81.85305786132812, 100.17666625976562, 118.50028228759766, 136.82388305664062, 155.14749145507812, 173.4711151123047, 191.7947235107422, 210.1183319091797, 228.4419403076172, 246.7655487060547, 265.08917236328125, 283.41278076171875, 301.73638916015625, 320.05999755859375, 338.38360595703125, 356.70721435546875, 375.03082275390625, 393.35443115234375, 411.67803955078125, 430.00164794921875, 448.32525634765625, 466.64886474609375, 484.97247314453125, 503.29608154296875, 521.6196899414062, 539.9432983398438, 558.2669067382812, 576.5905151367188, 594.9141235351562, 613.2377319335938, 631.5613403320312, 649.8849487304688, 668.2085571289062, 686.5321655273438, 704.8557739257812, 723.1793823242188, 741.5029907226562, 759.8265991210938, 778.1502075195312, 796.473876953125, 814.7974853515625, 833.12109375, 851.4447021484375, 869.768310546875, 888.0919189453125, 906.41552734375, 924.7391357421875, 943.062744140625, 961.3863525390625, 979.7099609375, 998.0335693359375, 1016.357177734375, 1034.6807861328125, 1053.00439453125, 1071.3280029296875, 1089.651611328125]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 7.0, 13.0, 6.0, 12.0, 17.0, 21.0, 17.0, 23.0, 25.0, 30.0, 30.0, 22.0, 26.0, 30.0, 42.0, 39.0, 44.0, 49.0, 28.0, 44.0, 45.0, 41.0, 42.0, 56.0, 34.0, 30.0, 29.0, 23.0, 34.0, 13.0, 21.0, 10.0, 16.0, 12.0, 12.0, 10.0, 5.0, 14.0, 9.0, 5.0, 6.0, 4.0, 0.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.9068717956543, -41.4150390625, -39.92321014404297, -38.43137741088867, -36.939544677734375, -35.44771194458008, -33.95587921142578, -32.46405029296875, -30.972217559814453, -29.480384826660156, -27.988554000854492, -26.496723175048828, -25.00489044189453, -23.513057708740234, -22.02122688293457, -20.529396057128906, -19.03756332397461, -17.545730590820312, -16.05389976501465, -14.562067985534668, -13.070236206054688, -11.578404426574707, -10.086572647094727, -8.594740867614746, -7.102909088134766, -5.611077308654785, -4.119245529174805, -2.627413749694824, -1.1355819702148438, 0.3562498092651367, 1.8480815887451172, 3.3399133682250977, 4.8317413330078125, 6.323573112487793, 7.815404891967773, 9.307236671447754, 10.799068450927734, 12.290900230407715, 13.782732009887695, 15.274563789367676, 16.766395568847656, 18.258228302001953, 19.750059127807617, 21.24188995361328, 22.733722686767578, 24.225555419921875, 25.71738624572754, 27.209217071533203, 28.7010498046875, 30.192882537841797, 31.68471336364746, 33.176544189453125, 34.66837692260742, 36.16020965576172, 37.65203857421875, 39.14387130737305, 40.635704040527344, 42.12753677368164, 43.61936950683594, 45.11119842529297, 46.603031158447266, 48.09486389160156, 49.586692810058594, 51.07852554321289, 52.57035827636719]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 6.0, 5.0, 10.0, 9.0, 6.0, 10.0, 13.0, 19.0, 21.0, 15.0, 28.0, 39.0, 45.0, 40.0, 48.0, 43.0, 50.0, 37.0, 52.0, 48.0, 38.0, 57.0, 40.0, 41.0, 40.0, 30.0, 39.0, 31.0, 26.0, 23.0, 17.0, 19.0, 13.0, 12.0, 4.0, 9.0, 5.0, 5.0, 5.0, 6.0, 1.0, 1.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.51953125, -5.3433837890625, -5.167236328125, -4.9910888671875, -4.81494140625, -4.6387939453125, -4.462646484375, -4.2864990234375, -4.1103515625, -3.9342041015625, -3.758056640625, -3.5819091796875, -3.40576171875, -3.2296142578125, -3.053466796875, -2.8773193359375, -2.701171875, -2.5250244140625, -2.348876953125, -2.1727294921875, -1.99658203125, -1.8204345703125, -1.644287109375, -1.4681396484375, -1.2919921875, -1.1158447265625, -0.939697265625, -0.7635498046875, -0.58740234375, -0.4112548828125, -0.235107421875, -0.0589599609375, 0.1171875, 0.2933349609375, 0.469482421875, 0.6456298828125, 0.82177734375, 0.9979248046875, 1.174072265625, 1.3502197265625, 1.5263671875, 1.7025146484375, 1.878662109375, 2.0548095703125, 2.23095703125, 2.4071044921875, 2.583251953125, 2.7593994140625, 2.935546875, 3.1116943359375, 3.287841796875, 3.4639892578125, 3.64013671875, 3.8162841796875, 3.992431640625, 4.1685791015625, 4.3447265625, 4.5208740234375, 4.697021484375, 4.8731689453125, 5.04931640625, 5.2254638671875, 5.401611328125, 5.5777587890625, 5.75390625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 4.0, 5.0, 7.0, 5.0, 6.0, 11.0, 16.0, 17.0, 24.0, 28.0, 34.0, 49.0, 72.0, 131.0, 224.0, 421.0, 872.0, 2667.0, 10212.0, 52793.0, 326256.0, 1440286.0, 1773679.0, 487036.0, 78908.0, 14649.0, 3577.0, 1108.0, 486.0, 232.0, 156.0, 92.0, 67.0, 45.0, 33.0, 18.0, 12.0, 11.0, 11.0, 7.0, 5.0, 8.0, 7.0, 4.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.0859375, -7.8082275390625, -7.530517578125, -7.2528076171875, -6.97509765625, -6.6973876953125, -6.419677734375, -6.1419677734375, -5.8642578125, -5.5865478515625, -5.308837890625, -5.0311279296875, -4.75341796875, -4.4757080078125, -4.197998046875, -3.9202880859375, -3.642578125, -3.3648681640625, -3.087158203125, -2.8094482421875, -2.53173828125, -2.2540283203125, -1.976318359375, -1.6986083984375, -1.4208984375, -1.1431884765625, -0.865478515625, -0.5877685546875, -0.31005859375, -0.0323486328125, 0.245361328125, 0.5230712890625, 0.80078125, 1.0784912109375, 1.356201171875, 1.6339111328125, 1.91162109375, 2.1893310546875, 2.467041015625, 2.7447509765625, 3.0224609375, 3.3001708984375, 3.577880859375, 3.8555908203125, 4.13330078125, 4.4110107421875, 4.688720703125, 4.9664306640625, 5.244140625, 5.5218505859375, 5.799560546875, 6.0772705078125, 6.35498046875, 6.6326904296875, 6.910400390625, 7.1881103515625, 7.4658203125, 7.7435302734375, 8.021240234375, 8.2989501953125, 8.57666015625, 8.8543701171875, 9.132080078125, 9.4097900390625, 9.6875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 2.0, 3.0, 6.0, 15.0, 15.0, 40.0, 43.0, 74.0, 100.0, 117.0, 154.0, 226.0, 347.0, 436.0, 483.0, 473.0, 418.0, 289.0, 255.0, 174.0, 129.0, 79.0, 53.0, 45.0, 28.0, 22.0, 20.0, 9.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 3.0, 5.0, 2.0], "bins": [-16.671875, -16.2647705078125, -15.857666015625, -15.4505615234375, -15.04345703125, -14.6363525390625, -14.229248046875, -13.8221435546875, -13.4150390625, -13.0079345703125, -12.600830078125, -12.1937255859375, -11.78662109375, -11.3795166015625, -10.972412109375, -10.5653076171875, -10.158203125, -9.7510986328125, -9.343994140625, -8.9368896484375, -8.52978515625, -8.1226806640625, -7.715576171875, -7.3084716796875, -6.9013671875, -6.4942626953125, -6.087158203125, -5.6800537109375, -5.27294921875, -4.8658447265625, -4.458740234375, -4.0516357421875, -3.64453125, -3.2374267578125, -2.830322265625, -2.4232177734375, -2.01611328125, -1.6090087890625, -1.201904296875, -0.7947998046875, -0.3876953125, 0.0194091796875, 0.426513671875, 0.8336181640625, 1.24072265625, 1.6478271484375, 2.054931640625, 2.4620361328125, 2.869140625, 3.2762451171875, 3.683349609375, 4.0904541015625, 4.49755859375, 4.9046630859375, 5.311767578125, 5.7188720703125, 6.1259765625, 6.5330810546875, 6.940185546875, 7.3472900390625, 7.75439453125, 8.1614990234375, 8.568603515625, 8.9757080078125, 9.3828125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 10.0, 6.0, 6.0, 14.0, 21.0, 37.0, 40.0, 50.0, 83.0, 114.0, 179.0, 308.0, 565.0, 1800.0, 11783.0, 259744.0, 3233560.0, 656456.0, 25142.0, 2624.0, 702.0, 351.0, 223.0, 146.0, 99.0, 72.0, 50.0, 35.0, 15.0, 17.0, 15.0, 5.0, 3.0, 2.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.078125, -19.294921875, -18.51171875, -17.728515625, -16.9453125, -16.162109375, -15.37890625, -14.595703125, -13.8125, -13.029296875, -12.24609375, -11.462890625, -10.6796875, -9.896484375, -9.11328125, -8.330078125, -7.546875, -6.763671875, -5.98046875, -5.197265625, -4.4140625, -3.630859375, -2.84765625, -2.064453125, -1.28125, -0.498046875, 0.28515625, 1.068359375, 1.8515625, 2.634765625, 3.41796875, 4.201171875, 4.984375, 5.767578125, 6.55078125, 7.333984375, 8.1171875, 8.900390625, 9.68359375, 10.466796875, 11.25, 12.033203125, 12.81640625, 13.599609375, 14.3828125, 15.166015625, 15.94921875, 16.732421875, 17.515625, 18.298828125, 19.08203125, 19.865234375, 20.6484375, 21.431640625, 22.21484375, 22.998046875, 23.78125, 24.564453125, 25.34765625, 26.130859375, 26.9140625, 27.697265625, 28.48046875, 29.263671875, 30.046875]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 5.0, 12.0, 29.0, 74.0, 167.0, 211.0, 225.0, 166.0, 70.0, 35.0, 13.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.85086059570312, -130.50595092773438, -127.16104888916016, -123.8161392211914, -120.47123718261719, -117.12632751464844, -113.78141784667969, -110.43650817871094, -107.09160614013672, -103.74669647216797, -100.40179443359375, -97.056884765625, -93.71197509765625, -90.36707305908203, -87.02216339111328, -83.67726135253906, -80.33235168457031, -76.98744201660156, -73.64253997802734, -70.2976303100586, -66.95272827148438, -63.607818603515625, -60.262908935546875, -56.91800308227539, -53.573097229003906, -50.22819137573242, -46.88328552246094, -43.53837585449219, -40.1934700012207, -36.84856414794922, -33.50365447998047, -30.158748626708984, -26.81383514404297, -23.468929290771484, -20.124021530151367, -16.77911376953125, -13.434207916259766, -10.089302062988281, -6.744394302368164, -3.399486541748047, -0.0545806884765625, 3.2903261184692383, 6.635232925415039, 9.98013973236084, 13.32504653930664, 16.669952392578125, 20.014860153198242, 23.35976791381836, 26.704673767089844, 30.049579620361328, 33.39448547363281, 36.73939514160156, 40.08430099487305, 43.42920684814453, 46.77411651611328, 50.119022369384766, 53.46392822265625, 56.808834075927734, 60.15373992919922, 63.49864959716797, 66.84355163574219, 70.18846130371094, 73.53337097167969, 76.87828063964844, 80.22318267822266]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 7.0, 2.0, 5.0, 17.0, 10.0, 13.0, 22.0, 15.0, 20.0, 13.0, 26.0, 38.0, 47.0, 34.0, 44.0, 37.0, 50.0, 48.0, 50.0, 39.0, 30.0, 67.0, 44.0, 48.0, 26.0, 44.0, 31.0, 33.0, 24.0, 23.0, 21.0, 19.0, 12.0, 8.0, 7.0, 11.0, 7.0, 0.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.96455383300781, -41.62192153930664, -40.27928924560547, -38.9366569519043, -37.594024658203125, -36.25139236450195, -34.90876007080078, -33.56612777709961, -32.22349548339844, -30.880863189697266, -29.538230895996094, -28.195598602294922, -26.85296630859375, -25.510334014892578, -24.167701721191406, -22.825069427490234, -21.48243522644043, -20.139802932739258, -18.797170639038086, -17.454538345336914, -16.111906051635742, -14.76927375793457, -13.426640510559082, -12.08400821685791, -10.741375923156738, -9.398743629455566, -8.056111335754395, -6.7134785652160645, -5.370846271514893, -4.028213977813721, -2.6855812072753906, -1.3429489135742188, -0.000316619873046875, 1.3423157930374146, 2.684948205947876, 4.027580738067627, 5.370213031768799, 6.712845325469971, 8.0554780960083, 9.398110389709473, 10.740742683410645, 12.083374977111816, 13.426007270812988, 14.768640518188477, 16.11127281188965, 17.45390510559082, 18.796537399291992, 20.139169692993164, 21.481801986694336, 22.824434280395508, 24.16706657409668, 25.50969886779785, 26.852331161499023, 28.194963455200195, 29.53759765625, 30.880229949951172, 32.222862243652344, 33.565494537353516, 34.90812683105469, 36.25075912475586, 37.59339141845703, 38.9360237121582, 40.278656005859375, 41.62128829956055, 42.96392059326172]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 10.0, 6.0, 11.0, 10.0, 15.0, 16.0, 17.0, 18.0, 31.0, 35.0, 43.0, 51.0, 44.0, 44.0, 61.0, 57.0, 51.0, 61.0, 47.0, 61.0, 53.0, 37.0, 41.0, 26.0, 32.0, 30.0, 17.0, 19.0, 9.0, 16.0, 10.0, 12.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.44140625, -5.2442626953125, -5.047119140625, -4.8499755859375, -4.65283203125, -4.4556884765625, -4.258544921875, -4.0614013671875, -3.8642578125, -3.6671142578125, -3.469970703125, -3.2728271484375, -3.07568359375, -2.8785400390625, -2.681396484375, -2.4842529296875, -2.287109375, -2.0899658203125, -1.892822265625, -1.6956787109375, -1.49853515625, -1.3013916015625, -1.104248046875, -0.9071044921875, -0.7099609375, -0.5128173828125, -0.315673828125, -0.1185302734375, 0.07861328125, 0.2757568359375, 0.472900390625, 0.6700439453125, 0.8671875, 1.0643310546875, 1.261474609375, 1.4586181640625, 1.65576171875, 1.8529052734375, 2.050048828125, 2.2471923828125, 2.4443359375, 2.6414794921875, 2.838623046875, 3.0357666015625, 3.23291015625, 3.4300537109375, 3.627197265625, 3.8243408203125, 4.021484375, 4.2186279296875, 4.415771484375, 4.6129150390625, 4.81005859375, 5.0072021484375, 5.204345703125, 5.4014892578125, 5.5986328125, 5.7957763671875, 5.992919921875, 6.1900634765625, 6.38720703125, 6.5843505859375, 6.781494140625, 6.9786376953125, 7.17578125]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 10.0, 7.0, 23.0, 39.0, 37.0, 57.0, 94.0, 162.0, 267.0, 454.0, 773.0, 1185.0, 1935.0, 3150.0, 4946.0, 7893.0, 12423.0, 19887.0, 32261.0, 52527.0, 87862.0, 146994.0, 213330.0, 180149.0, 110635.0, 65979.0, 39724.0, 24679.0, 15202.0, 9581.0, 6207.0, 3875.0, 2363.0, 1516.0, 907.0, 569.0, 340.0, 189.0, 112.0, 85.0, 45.0, 34.0, 17.0, 13.0, 6.0, 11.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39794921875, -0.38455963134765625, -0.3711700439453125, -0.35778045654296875, -0.344390869140625, -0.33100128173828125, -0.3176116943359375, -0.30422210693359375, -0.29083251953125, -0.27744293212890625, -0.2640533447265625, -0.25066375732421875, -0.237274169921875, -0.22388458251953125, -0.2104949951171875, -0.19710540771484375, -0.1837158203125, -0.17032623291015625, -0.1569366455078125, -0.14354705810546875, -0.130157470703125, -0.11676788330078125, -0.1033782958984375, -0.08998870849609375, -0.07659912109375, -0.06320953369140625, -0.0498199462890625, -0.03643035888671875, -0.023040771484375, -0.00965118408203125, 0.0037384033203125, 0.01712799072265625, 0.030517578125, 0.04390716552734375, 0.0572967529296875, 0.07068634033203125, 0.084075927734375, 0.09746551513671875, 0.1108551025390625, 0.12424468994140625, 0.13763427734375, 0.15102386474609375, 0.1644134521484375, 0.17780303955078125, 0.191192626953125, 0.20458221435546875, 0.2179718017578125, 0.23136138916015625, 0.2447509765625, 0.25814056396484375, 0.2715301513671875, 0.28491973876953125, 0.298309326171875, 0.31169891357421875, 0.3250885009765625, 0.33847808837890625, 0.35186767578125, 0.36525726318359375, 0.3786468505859375, 0.39203643798828125, 0.405426025390625, 0.41881561279296875, 0.4322052001953125, 0.44559478759765625, 0.458984375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 7.0, 9.0, 8.0, 5.0, 13.0, 14.0, 18.0, 13.0, 11.0, 30.0, 27.0, 19.0, 29.0, 28.0, 31.0, 30.0, 47.0, 44.0, 31.0, 29.0, 48.0, 1068.0, 31.0, 29.0, 40.0, 39.0, 28.0, 37.0, 33.0, 28.0, 29.0, 20.0, 26.0, 20.0, 18.0, 19.0, 9.0, 15.0, 8.0, 10.0, 8.0, 5.0, 2.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0], "bins": [-3.005859375, -2.915618896484375, -2.82537841796875, -2.735137939453125, -2.6448974609375, -2.554656982421875, -2.46441650390625, -2.374176025390625, -2.283935546875, -2.193695068359375, -2.10345458984375, -2.013214111328125, -1.9229736328125, -1.832733154296875, -1.74249267578125, -1.652252197265625, -1.56201171875, -1.471771240234375, -1.38153076171875, -1.291290283203125, -1.2010498046875, -1.110809326171875, -1.02056884765625, -0.930328369140625, -0.840087890625, -0.749847412109375, -0.65960693359375, -0.569366455078125, -0.4791259765625, -0.388885498046875, -0.29864501953125, -0.208404541015625, -0.1181640625, -0.027923583984375, 0.06231689453125, 0.152557373046875, 0.2427978515625, 0.333038330078125, 0.42327880859375, 0.513519287109375, 0.603759765625, 0.694000244140625, 0.78424072265625, 0.874481201171875, 0.9647216796875, 1.054962158203125, 1.14520263671875, 1.235443115234375, 1.32568359375, 1.415924072265625, 1.50616455078125, 1.596405029296875, 1.6866455078125, 1.776885986328125, 1.86712646484375, 1.957366943359375, 2.047607421875, 2.137847900390625, 2.22808837890625, 2.318328857421875, 2.4085693359375, 2.498809814453125, 2.58905029296875, 2.679290771484375, 2.76953125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 7.0, 8.0, 25.0, 26.0, 31.0, 48.0, 66.0, 121.0, 189.0, 254.0, 433.0, 580.0, 881.0, 1380.0, 2068.0, 3244.0, 4604.0, 7033.0, 10599.0, 16028.0, 24199.0, 36805.0, 56416.0, 85671.0, 125445.0, 1200836.0, 170210.0, 118124.0, 79045.0, 52341.0, 33959.0, 22361.0, 14738.0, 9994.0, 6523.0, 4244.0, 2894.0, 1924.0, 1267.0, 871.0, 562.0, 395.0, 247.0, 154.0, 106.0, 59.0, 44.0, 31.0, 12.0, 15.0, 11.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.29150390625, -0.2823677062988281, -0.27323150634765625, -0.2640953063964844, -0.2549591064453125, -0.24582290649414062, -0.23668670654296875, -0.22755050659179688, -0.218414306640625, -0.20927810668945312, -0.20014190673828125, -0.19100570678710938, -0.1818695068359375, -0.17273330688476562, -0.16359710693359375, -0.15446090698242188, -0.14532470703125, -0.13618850708007812, -0.12705230712890625, -0.11791610717773438, -0.1087799072265625, -0.09964370727539062, -0.09050750732421875, -0.08137130737304688, -0.072235107421875, -0.06309890747070312, -0.05396270751953125, -0.044826507568359375, -0.0356903076171875, -0.026554107666015625, -0.01741790771484375, -0.008281707763671875, 0.0008544921875, 0.009990692138671875, 0.01912689208984375, 0.028263092041015625, 0.0373992919921875, 0.046535491943359375, 0.05567169189453125, 0.06480789184570312, 0.073944091796875, 0.08308029174804688, 0.09221649169921875, 0.10135269165039062, 0.1104888916015625, 0.11962509155273438, 0.12876129150390625, 0.13789749145507812, 0.14703369140625, 0.15616989135742188, 0.16530609130859375, 0.17444229125976562, 0.1835784912109375, 0.19271469116210938, 0.20185089111328125, 0.21098709106445312, 0.220123291015625, 0.22925949096679688, 0.23839569091796875, 0.24753189086914062, 0.2566680908203125, 0.2658042907714844, 0.27494049072265625, 0.2840766906738281, 0.293212890625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 2.0, 1.0, 6.0, 5.0, 4.0, 5.0, 10.0, 13.0, 14.0, 18.0, 17.0, 29.0, 26.0, 32.0, 42.0, 60.0, 37.0, 42.0, 51.0, 55.0, 62.0, 48.0, 55.0, 57.0, 30.0, 40.0, 44.0, 34.0, 29.0, 20.0, 23.0, 22.0, 14.0, 9.0, 15.0, 5.0, 9.0, 6.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.001247406005859375, -0.0012080371379852295, -0.001168668270111084, -0.0011292994022369385, -0.001089930534362793, -0.0010505616664886475, -0.001011192798614502, -0.0009718239307403564, -0.0009324550628662109, -0.0008930861949920654, -0.0008537173271179199, -0.0008143484592437744, -0.0007749795913696289, -0.0007356107234954834, -0.0006962418556213379, -0.0006568729877471924, -0.0006175041198730469, -0.0005781352519989014, -0.0005387663841247559, -0.0004993975162506104, -0.00046002864837646484, -0.00042065978050231934, -0.00038129091262817383, -0.0003419220447540283, -0.0003025531768798828, -0.0002631843090057373, -0.0002238154411315918, -0.0001844465732574463, -0.00014507770538330078, -0.00010570883750915527, -6.633996963500977e-05, -2.6971101760864258e-05, 1.239776611328125e-05, 5.176663398742676e-05, 9.113550186157227e-05, 0.00013050436973571777, 0.00016987323760986328, 0.0002092421054840088, 0.0002486109733581543, 0.0002879798412322998, 0.0003273487091064453, 0.0003667175769805908, 0.00040608644485473633, 0.00044545531272888184, 0.00048482418060302734, 0.0005241930484771729, 0.0005635619163513184, 0.0006029307842254639, 0.0006422996520996094, 0.0006816685199737549, 0.0007210373878479004, 0.0007604062557220459, 0.0007997751235961914, 0.0008391439914703369, 0.0008785128593444824, 0.0009178817272186279, 0.0009572505950927734, 0.000996619462966919, 0.0010359883308410645, 0.00107535719871521, 0.0011147260665893555, 0.001154094934463501, 0.0011934638023376465, 0.001232832670211792, 0.0012722015380859375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 2.0, 2.0, 2.0, 1.0, 6.0, 7.0, 9.0, 8.0, 16.0, 20.0, 10.0, 30.0, 30.0, 39.0, 50.0, 78.0, 138.0, 213.0, 358.0, 790.0, 6090.0, 992427.0, 45939.0, 1045.0, 496.0, 232.0, 158.0, 99.0, 63.0, 48.0, 35.0, 31.0, 16.0, 16.0, 12.0, 8.0, 9.0, 5.0, 2.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025909423828125, -0.025091171264648438, -0.024272918701171875, -0.023454666137695312, -0.02263641357421875, -0.021818161010742188, -0.020999908447265625, -0.020181655883789062, -0.0193634033203125, -0.018545150756835938, -0.017726898193359375, -0.016908645629882812, -0.01609039306640625, -0.015272140502929688, -0.014453887939453125, -0.013635635375976562, -0.0128173828125, -0.011999130249023438, -0.011180877685546875, -0.010362625122070312, -0.00954437255859375, -0.008726119995117188, -0.007907867431640625, -0.0070896148681640625, -0.0062713623046875, -0.0054531097412109375, -0.004634857177734375, -0.0038166046142578125, -0.00299835205078125, -0.0021800994873046875, -0.001361846923828125, -0.0005435943603515625, 0.000274658203125, 0.0010929107666015625, 0.001911163330078125, 0.0027294158935546875, 0.00354766845703125, 0.0043659210205078125, 0.005184173583984375, 0.0060024261474609375, 0.0068206787109375, 0.0076389312744140625, 0.008457183837890625, 0.009275436401367188, 0.01009368896484375, 0.010911941528320312, 0.011730194091796875, 0.012548446655273438, 0.01336669921875, 0.014184951782226562, 0.015003204345703125, 0.015821456909179688, 0.01663970947265625, 0.017457962036132812, 0.018276214599609375, 0.019094467163085938, 0.0199127197265625, 0.020730972290039062, 0.021549224853515625, 0.022367477416992188, 0.02318572998046875, 0.024003982543945312, 0.024822235107421875, 0.025640487670898438, 0.026458740234375]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 7.0, 23.0, 110.0, 396.0, 343.0, 117.0, 17.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004974533338099718, -0.0048685637302696705, -0.004762594122439623, -0.004656624980270863, -0.004550655372440815, -0.004444685764610767, -0.00433871615678072, -0.0042327470146119595, -0.004126777406781912, -0.004020807798951864, -0.003914838191121817, -0.0038088688161224127, -0.0037028994411230087, -0.003596929833292961, -0.003490960458293557, -0.0033849908504635096, -0.0032790214754641056, -0.003173051867634058, -0.003067082492634654, -0.0029611128848046064, -0.0028551435098052025, -0.002749173901975155, -0.002643204526975751, -0.0025372349191457033, -0.0024312653113156557, -0.002325295703485608, -0.002219326328486204, -0.0021133567206561565, -0.0020073873456567526, -0.001901417737826705, -0.001795448362827301, -0.0016894787549972534, -0.0015835093799978495, -0.0014775398885831237, -0.001371570397168398, -0.0012656009057536721, -0.0011596314143389463, -0.0010536618065088987, -0.0009476923733018339, -0.0008417228818871081, -0.0007357533904723823, -0.0006297838990576565, -0.0005238144076429307, -0.0004178448871243745, -0.00031187539570964873, -0.0002059058751910925, -9.993638377636671e-05, 6.03310763835907e-06, 0.00011200259905308485, 0.00021797209046781063, 0.0003239415818825364, 0.00042991110240109265, 0.000535880564711988, 0.0006418501143343747, 0.0007478196057491004, 0.0008537890971638262, 0.000959758588578552, 0.0010657281382009387, 0.0011716976296156645, 0.0012776671210303903, 0.001383636612445116, 0.0014896061038598418, 0.0015955755952745676, 0.0017015450866892934, 0.0018075145781040192]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 3.0, 6.0, 6.0, 6.0, 9.0, 10.0, 7.0, 13.0, 11.0, 22.0, 23.0, 24.0, 30.0, 37.0, 32.0, 31.0, 41.0, 43.0, 49.0, 39.0, 28.0, 45.0, 46.0, 50.0, 41.0, 38.0, 39.0, 31.0, 35.0, 38.0, 26.0, 15.0, 18.0, 27.0, 18.0, 16.0, 11.0, 9.0, 5.0, 7.0, 6.0, 4.0, 9.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006800293922424316, -0.0006577242165803909, -0.0006354190409183502, -0.0006131138652563095, -0.0005908086895942688, -0.0005685035139322281, -0.0005461983382701874, -0.0005238931626081467, -0.000501587986946106, -0.00047928281128406525, -0.00045697763562202454, -0.0004346724599599838, -0.0004123672842979431, -0.0003900621086359024, -0.0003677569329738617, -0.000345451757311821, -0.0003231465816497803, -0.00030084140598773956, -0.00027853623032569885, -0.00025623105466365814, -0.00023392587900161743, -0.00021162070333957672, -0.000189315527677536, -0.0001670103520154953, -0.0001447051763534546, -0.00012240000069141388, -0.00010009482502937317, -7.778964936733246e-05, -5.548447370529175e-05, -3.317929804325104e-05, -1.0874122381210327e-05, 1.1431053280830383e-05, 3.3736228942871094e-05, 5.6041404604911804e-05, 7.834658026695251e-05, 0.00010065175592899323, 0.00012295693159103394, 0.00014526210725307465, 0.00016756728291511536, 0.00018987245857715607, 0.00021217763423919678, 0.0002344828099012375, 0.0002567879855632782, 0.0002790931612253189, 0.0003013983368873596, 0.00032370351254940033, 0.00034600868821144104, 0.00036831386387348175, 0.00039061903953552246, 0.00041292421519756317, 0.0004352293908596039, 0.0004575345665216446, 0.0004798397421836853, 0.000502144917845726, 0.0005244500935077667, 0.0005467552691698074, 0.0005690604448318481, 0.0005913656204938889, 0.0006136707961559296, 0.0006359759718179703, 0.000658281147480011, 0.0006805863231420517, 0.0007028914988040924, 0.0007251966744661331, 0.0007475018501281738]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 10.0, 6.0, 11.0, 10.0, 15.0, 16.0, 17.0, 18.0, 31.0, 35.0, 43.0, 51.0, 44.0, 44.0, 61.0, 57.0, 51.0, 61.0, 47.0, 61.0, 53.0, 37.0, 41.0, 26.0, 32.0, 30.0, 17.0, 19.0, 9.0, 16.0, 10.0, 12.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.44140625, -5.2442626953125, -5.047119140625, -4.8499755859375, -4.65283203125, -4.4556884765625, -4.258544921875, -4.0614013671875, -3.8642578125, -3.6671142578125, -3.469970703125, -3.2728271484375, -3.07568359375, -2.8785400390625, -2.681396484375, -2.4842529296875, -2.287109375, -2.0899658203125, -1.892822265625, -1.6956787109375, -1.49853515625, -1.3013916015625, -1.104248046875, -0.9071044921875, -0.7099609375, -0.5128173828125, -0.315673828125, -0.1185302734375, 0.07861328125, 0.2757568359375, 0.472900390625, 0.6700439453125, 0.8671875, 1.0643310546875, 1.261474609375, 1.4586181640625, 1.65576171875, 1.8529052734375, 2.050048828125, 2.2471923828125, 2.4443359375, 2.6414794921875, 2.838623046875, 3.0357666015625, 3.23291015625, 3.4300537109375, 3.627197265625, 3.8243408203125, 4.021484375, 4.2186279296875, 4.415771484375, 4.6129150390625, 4.81005859375, 5.0072021484375, 5.204345703125, 5.4014892578125, 5.5986328125, 5.7957763671875, 5.992919921875, 6.1900634765625, 6.38720703125, 6.5843505859375, 6.781494140625, 6.9786376953125, 7.17578125]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 6.0, 11.0, 19.0, 15.0, 39.0, 37.0, 78.0, 129.0, 181.0, 348.0, 597.0, 1159.0, 2285.0, 4629.0, 9310.0, 18906.0, 38038.0, 76955.0, 156197.0, 272724.0, 230338.0, 119022.0, 59093.0, 29239.0, 14818.0, 7147.0, 3438.0, 1797.0, 822.0, 473.0, 262.0, 159.0, 97.0, 57.0, 46.0, 36.0, 20.0, 9.0, 14.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.01171875, -4.87176513671875, -4.7318115234375, -4.59185791015625, -4.451904296875, -4.31195068359375, -4.1719970703125, -4.03204345703125, -3.89208984375, -3.75213623046875, -3.6121826171875, -3.47222900390625, -3.332275390625, -3.19232177734375, -3.0523681640625, -2.91241455078125, -2.7724609375, -2.63250732421875, -2.4925537109375, -2.35260009765625, -2.212646484375, -2.07269287109375, -1.9327392578125, -1.79278564453125, -1.65283203125, -1.51287841796875, -1.3729248046875, -1.23297119140625, -1.093017578125, -0.95306396484375, -0.8131103515625, -0.67315673828125, -0.533203125, -0.39324951171875, -0.2532958984375, -0.11334228515625, 0.026611328125, 0.16656494140625, 0.3065185546875, 0.44647216796875, 0.58642578125, 0.72637939453125, 0.8663330078125, 1.00628662109375, 1.146240234375, 1.28619384765625, 1.4261474609375, 1.56610107421875, 1.7060546875, 1.84600830078125, 1.9859619140625, 2.12591552734375, 2.265869140625, 2.40582275390625, 2.5457763671875, 2.68572998046875, 2.82568359375, 2.96563720703125, 3.1055908203125, 3.24554443359375, 3.385498046875, 3.52545166015625, 3.6654052734375, 3.80535888671875, 3.9453125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 6.0, 7.0, 8.0, 10.0, 10.0, 11.0, 11.0, 23.0, 28.0, 25.0, 40.0, 36.0, 49.0, 56.0, 64.0, 76.0, 121.0, 243.0, 1464.0, 223.0, 127.0, 72.0, 61.0, 43.0, 41.0, 34.0, 26.0, 24.0, 20.0, 13.0, 17.0, 13.0, 8.0, 9.0, 9.0, 7.0, 4.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-24.03125, -23.353515625, -22.67578125, -21.998046875, -21.3203125, -20.642578125, -19.96484375, -19.287109375, -18.609375, -17.931640625, -17.25390625, -16.576171875, -15.8984375, -15.220703125, -14.54296875, -13.865234375, -13.1875, -12.509765625, -11.83203125, -11.154296875, -10.4765625, -9.798828125, -9.12109375, -8.443359375, -7.765625, -7.087890625, -6.41015625, -5.732421875, -5.0546875, -4.376953125, -3.69921875, -3.021484375, -2.34375, -1.666015625, -0.98828125, -0.310546875, 0.3671875, 1.044921875, 1.72265625, 2.400390625, 3.078125, 3.755859375, 4.43359375, 5.111328125, 5.7890625, 6.466796875, 7.14453125, 7.822265625, 8.5, 9.177734375, 9.85546875, 10.533203125, 11.2109375, 11.888671875, 12.56640625, 13.244140625, 13.921875, 14.599609375, 15.27734375, 15.955078125, 16.6328125, 17.310546875, 17.98828125, 18.666015625, 19.34375]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 12.0, 14.0, 16.0, 30.0, 31.0, 47.0, 83.0, 124.0, 193.0, 342.0, 748.0, 7220.0, 3118618.0, 16294.0, 932.0, 398.0, 212.0, 131.0, 82.0, 55.0, 33.0, 25.0, 19.0, 16.0, 10.0, 8.0, 6.0, 3.0, 4.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.4375, -63.9375, -61.4375, -58.9375, -56.4375, -53.9375, -51.4375, -48.9375, -46.4375, -43.9375, -41.4375, -38.9375, -36.4375, -33.9375, -31.4375, -28.9375, -26.4375, -23.9375, -21.4375, -18.9375, -16.4375, -13.9375, -11.4375, -8.9375, -6.4375, -3.9375, -1.4375, 1.0625, 3.5625, 6.0625, 8.5625, 11.0625, 13.5625, 16.0625, 18.5625, 21.0625, 23.5625, 26.0625, 28.5625, 31.0625, 33.5625, 36.0625, 38.5625, 41.0625, 43.5625, 46.0625, 48.5625, 51.0625, 53.5625, 56.0625, 58.5625, 61.0625, 63.5625, 66.0625, 68.5625, 71.0625, 73.5625, 76.0625, 78.5625, 81.0625, 83.5625, 86.0625, 88.5625, 91.0625, 93.5625]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 43.0, 200.0, 419.0, 278.0, 60.0, 9.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-152.42967224121094, -149.29757690429688, -146.16549682617188, -143.0334014892578, -139.9013214111328, -136.76922607421875, -133.63714599609375, -130.5050506591797, -127.37297058105469, -124.24088287353516, -121.10879516601562, -117.9767074584961, -114.84461975097656, -111.71253204345703, -108.5804443359375, -105.44834899902344, -102.3162612915039, -99.18417358398438, -96.05208587646484, -92.91999816894531, -89.78791046142578, -86.65582275390625, -83.52372741699219, -80.39164733886719, -77.25955200195312, -74.1274642944336, -70.99537658691406, -67.86328887939453, -64.731201171875, -61.59911346435547, -58.46702194213867, -55.33493423461914, -52.20285415649414, -49.07076644897461, -45.93867874145508, -42.80658721923828, -39.67449951171875, -36.54241180419922, -33.41032409667969, -30.278236389160156, -27.146148681640625, -24.014060974121094, -20.881973266601562, -17.7498836517334, -14.617795944213867, -11.485708236694336, -8.353618621826172, -5.221530914306641, -2.0894432067871094, 1.04264497756958, 4.1747331619262695, 7.306821823120117, 10.438909530639648, 13.57099723815918, 16.703086853027344, 19.835174560546875, 22.967262268066406, 26.099349975585938, 29.23143768310547, 32.363525390625, 35.49561309814453, 38.62770080566406, 41.75979232788086, 44.89188003540039, 48.02396774291992]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 5.0, 9.0, 2.0, 7.0, 7.0, 13.0, 8.0, 11.0, 9.0, 13.0, 13.0, 22.0, 25.0, 18.0, 34.0, 21.0, 26.0, 19.0, 33.0, 32.0, 43.0, 32.0, 33.0, 32.0, 28.0, 40.0, 30.0, 41.0, 33.0, 41.0, 32.0, 30.0, 31.0, 22.0, 29.0, 23.0, 13.0, 20.0, 11.0, 21.0, 6.0, 6.0, 13.0, 10.0, 11.0, 9.0, 7.0, 7.0, 9.0, 3.0, 1.0, 5.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0], "bins": [-36.1612663269043, -34.994667053222656, -33.82807159423828, -32.66147232055664, -31.494876861572266, -30.328277587890625, -29.161680221557617, -27.99508285522461, -26.8284854888916, -25.661888122558594, -24.495290756225586, -23.328693389892578, -22.162094116210938, -20.995498657226562, -19.828899383544922, -18.662302017211914, -17.495704650878906, -16.3291072845459, -15.16250991821289, -13.995911598205566, -12.829314231872559, -11.66271686553955, -10.496118545532227, -9.329521179199219, -8.162923812866211, -6.996326446533203, -5.829728603363037, -4.663130760192871, -3.4965333938598633, -2.3299360275268555, -1.1633381843566895, 0.0032596588134765625, 1.1698570251464844, 2.3364546298980713, 3.503052234649658, 4.669650077819824, 5.836247444152832, 7.00284481048584, 8.169443130493164, 9.336040496826172, 10.50263786315918, 11.669235229492188, 12.835832595825195, 14.00243091583252, 15.169028282165527, 16.33562469482422, 17.50222396850586, 18.668821334838867, 19.835418701171875, 21.002016067504883, 22.16861343383789, 23.3352108001709, 24.501808166503906, 25.668407440185547, 26.835004806518555, 28.001602172851562, 29.16819953918457, 30.334796905517578, 31.501394271850586, 32.667991638183594, 33.834590911865234, 35.00118637084961, 36.16778564453125, 37.334381103515625, 38.500980377197266]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 9.0, 11.0, 6.0, 9.0, 15.0, 12.0, 8.0, 23.0, 34.0, 22.0, 32.0, 34.0, 39.0, 49.0, 48.0, 55.0, 50.0, 58.0, 45.0, 59.0, 53.0, 56.0, 37.0, 38.0, 33.0, 32.0, 22.0, 24.0, 16.0, 16.0, 16.0, 9.0, 7.0, 13.0, 8.0, 6.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5546875, -5.3482666015625, -5.141845703125, -4.9354248046875, -4.72900390625, -4.5225830078125, -4.316162109375, -4.1097412109375, -3.9033203125, -3.6968994140625, -3.490478515625, -3.2840576171875, -3.07763671875, -2.8712158203125, -2.664794921875, -2.4583740234375, -2.251953125, -2.0455322265625, -1.839111328125, -1.6326904296875, -1.42626953125, -1.2198486328125, -1.013427734375, -0.8070068359375, -0.6005859375, -0.3941650390625, -0.187744140625, 0.0186767578125, 0.22509765625, 0.4315185546875, 0.637939453125, 0.8443603515625, 1.05078125, 1.2572021484375, 1.463623046875, 1.6700439453125, 1.87646484375, 2.0828857421875, 2.289306640625, 2.4957275390625, 2.7021484375, 2.9085693359375, 3.114990234375, 3.3214111328125, 3.52783203125, 3.7342529296875, 3.940673828125, 4.1470947265625, 4.353515625, 4.5599365234375, 4.766357421875, 4.9727783203125, 5.17919921875, 5.3856201171875, 5.592041015625, 5.7984619140625, 6.0048828125, 6.2113037109375, 6.417724609375, 6.6241455078125, 6.83056640625, 7.0369873046875, 7.243408203125, 7.4498291015625, 7.65625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 11.0, 6.0, 6.0, 10.0, 5.0, 10.0, 15.0, 15.0, 25.0, 22.0, 30.0, 27.0, 46.0, 69.0, 147.0, 377.0, 2019.0, 40141.0, 1761877.0, 2325088.0, 60943.0, 2506.0, 427.0, 146.0, 75.0, 49.0, 36.0, 18.0, 26.0, 22.0, 19.0, 19.0, 15.0, 7.0, 5.0, 9.0, 8.0, 3.0, 7.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.96875, -17.3046875, -16.640625, -15.9765625, -15.3125, -14.6484375, -13.984375, -13.3203125, -12.65625, -11.9921875, -11.328125, -10.6640625, -10.0, -9.3359375, -8.671875, -8.0078125, -7.34375, -6.6796875, -6.015625, -5.3515625, -4.6875, -4.0234375, -3.359375, -2.6953125, -2.03125, -1.3671875, -0.703125, -0.0390625, 0.625, 1.2890625, 1.953125, 2.6171875, 3.28125, 3.9453125, 4.609375, 5.2734375, 5.9375, 6.6015625, 7.265625, 7.9296875, 8.59375, 9.2578125, 9.921875, 10.5859375, 11.25, 11.9140625, 12.578125, 13.2421875, 13.90625, 14.5703125, 15.234375, 15.8984375, 16.5625, 17.2265625, 17.890625, 18.5546875, 19.21875, 19.8828125, 20.546875, 21.2109375, 21.875, 22.5390625, 23.203125, 23.8671875, 24.53125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 6.0, 11.0, 11.0, 19.0, 16.0, 24.0, 37.0, 43.0, 47.0, 98.0, 114.0, 169.0, 216.0, 262.0, 351.0, 412.0, 439.0, 399.0, 335.0, 258.0, 197.0, 169.0, 120.0, 89.0, 52.0, 54.0, 32.0, 20.0, 20.0, 12.0, 11.0, 9.0, 3.0, 4.0, 1.0, 1.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-11.140625, -10.78564453125, -10.4306640625, -10.07568359375, -9.720703125, -9.36572265625, -9.0107421875, -8.65576171875, -8.30078125, -7.94580078125, -7.5908203125, -7.23583984375, -6.880859375, -6.52587890625, -6.1708984375, -5.81591796875, -5.4609375, -5.10595703125, -4.7509765625, -4.39599609375, -4.041015625, -3.68603515625, -3.3310546875, -2.97607421875, -2.62109375, -2.26611328125, -1.9111328125, -1.55615234375, -1.201171875, -0.84619140625, -0.4912109375, -0.13623046875, 0.21875, 0.57373046875, 0.9287109375, 1.28369140625, 1.638671875, 1.99365234375, 2.3486328125, 2.70361328125, 3.05859375, 3.41357421875, 3.7685546875, 4.12353515625, 4.478515625, 4.83349609375, 5.1884765625, 5.54345703125, 5.8984375, 6.25341796875, 6.6083984375, 6.96337890625, 7.318359375, 7.67333984375, 8.0283203125, 8.38330078125, 8.73828125, 9.09326171875, 9.4482421875, 9.80322265625, 10.158203125, 10.51318359375, 10.8681640625, 11.22314453125, 11.578125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 5.0, 11.0, 18.0, 14.0, 28.0, 33.0, 48.0, 60.0, 89.0, 129.0, 203.0, 315.0, 503.0, 1567.0, 16376.0, 624180.0, 3353531.0, 188394.0, 6572.0, 957.0, 407.0, 242.0, 175.0, 101.0, 95.0, 61.0, 43.0, 37.0, 23.0, 18.0, 9.0, 8.0, 9.0, 7.0, 4.0, 2.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-27.578125, -26.6982421875, -25.818359375, -24.9384765625, -24.05859375, -23.1787109375, -22.298828125, -21.4189453125, -20.5390625, -19.6591796875, -18.779296875, -17.8994140625, -17.01953125, -16.1396484375, -15.259765625, -14.3798828125, -13.5, -12.6201171875, -11.740234375, -10.8603515625, -9.98046875, -9.1005859375, -8.220703125, -7.3408203125, -6.4609375, -5.5810546875, -4.701171875, -3.8212890625, -2.94140625, -2.0615234375, -1.181640625, -0.3017578125, 0.578125, 1.4580078125, 2.337890625, 3.2177734375, 4.09765625, 4.9775390625, 5.857421875, 6.7373046875, 7.6171875, 8.4970703125, 9.376953125, 10.2568359375, 11.13671875, 12.0166015625, 12.896484375, 13.7763671875, 14.65625, 15.5361328125, 16.416015625, 17.2958984375, 18.17578125, 19.0556640625, 19.935546875, 20.8154296875, 21.6953125, 22.5751953125, 23.455078125, 24.3349609375, 25.21484375, 26.0947265625, 26.974609375, 27.8544921875, 28.734375]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 17.0, 45.0, 72.0, 140.0, 206.0, 231.0, 146.0, 80.0, 45.0, 20.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-119.32610321044922, -116.27906036376953, -113.23200988769531, -110.18496704101562, -107.13792419433594, -104.09087371826172, -101.04383087158203, -97.99678039550781, -94.94973754882812, -91.90269470214844, -88.85564422607422, -85.80860137939453, -82.76155090332031, -79.71450805664062, -76.66746520996094, -73.62042236328125, -70.57337188720703, -67.52632904052734, -64.47927856445312, -61.43223571777344, -58.385189056396484, -55.33814239501953, -52.291099548339844, -49.24405288696289, -46.19700622558594, -43.149959564208984, -40.10291290283203, -37.055870056152344, -34.00882339477539, -30.961776733398438, -27.914731979370117, -24.867687225341797, -21.820648193359375, -18.773601531982422, -15.726556777954102, -12.679511070251465, -9.632465362548828, -6.585419654846191, -3.5383739471435547, -0.4913291931152344, 2.5557174682617188, 5.6027631759643555, 8.649808883666992, 11.696854591369629, 14.743900299072266, 17.79094696044922, 20.83799171447754, 23.88503646850586, 26.932083129882812, 29.979129791259766, 33.02617645263672, 36.073219299316406, 39.12026596069336, 42.16731262207031, 45.21435546875, 48.26140213012695, 51.308448791503906, 54.35549545288086, 57.40254211425781, 60.4495849609375, 63.49663162231445, 66.5436782836914, 69.5907211303711, 72.63777160644531, 75.684814453125]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 8.0, 8.0, 6.0, 13.0, 8.0, 12.0, 13.0, 16.0, 19.0, 30.0, 18.0, 26.0, 32.0, 40.0, 38.0, 36.0, 33.0, 48.0, 41.0, 27.0, 27.0, 47.0, 42.0, 40.0, 26.0, 50.0, 44.0, 31.0, 33.0, 24.0, 16.0, 28.0, 22.0, 12.0, 17.0, 12.0, 8.0, 12.0, 8.0, 7.0, 6.0, 8.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-31.032493591308594, -29.87095069885254, -28.709407806396484, -27.547866821289062, -26.386323928833008, -25.224781036376953, -24.0632381439209, -22.901695251464844, -21.740154266357422, -20.578611373901367, -19.417068481445312, -18.25552749633789, -17.093984603881836, -15.932441711425781, -14.770898818969727, -13.609356880187988, -12.447813034057617, -11.286270141601562, -10.124728202819824, -8.96318531036377, -7.801642894744873, -6.640100479125977, -5.478557586669922, -4.317015647888184, -3.155472755432129, -1.9939302206039429, -0.8323876857757568, 0.32915496826171875, 1.4906973838806152, 2.6522397994995117, 3.8137826919555664, 4.975324630737305, 6.136867523193359, 7.298409938812256, 8.459952354431152, 9.621495246887207, 10.783037185668945, 11.944580078125, 13.106122970581055, 14.267664909362793, 15.429207801818848, 16.590749740600586, 17.75229263305664, 18.913835525512695, 20.07537841796875, 21.236919403076172, 22.39846420288086, 23.56000518798828, 24.721548080444336, 25.88309097290039, 27.044633865356445, 28.2061767578125, 29.367717742919922, 30.529260635375977, 31.69080352783203, 32.85234451293945, 34.01388931274414, 35.17543029785156, 36.33697509765625, 37.49851608276367, 38.66006088256836, 39.82160186767578, 40.98314666748047, 42.14468765258789, 43.30622863769531]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 7.0, 10.0, 10.0, 17.0, 23.0, 22.0, 23.0, 22.0, 40.0, 49.0, 34.0, 45.0, 53.0, 53.0, 62.0, 38.0, 50.0, 55.0, 51.0, 41.0, 49.0, 43.0, 37.0, 26.0, 23.0, 20.0, 16.0, 21.0, 13.0, 16.0, 9.0, 5.0, 6.0, 3.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3984375, -5.19635009765625, -4.9942626953125, -4.79217529296875, -4.590087890625, -4.38800048828125, -4.1859130859375, -3.98382568359375, -3.78173828125, -3.57965087890625, -3.3775634765625, -3.17547607421875, -2.973388671875, -2.77130126953125, -2.5692138671875, -2.36712646484375, -2.1650390625, -1.96295166015625, -1.7608642578125, -1.55877685546875, -1.356689453125, -1.15460205078125, -0.9525146484375, -0.75042724609375, -0.54833984375, -0.34625244140625, -0.1441650390625, 0.05792236328125, 0.260009765625, 0.46209716796875, 0.6641845703125, 0.86627197265625, 1.068359375, 1.27044677734375, 1.4725341796875, 1.67462158203125, 1.876708984375, 2.07879638671875, 2.2808837890625, 2.48297119140625, 2.68505859375, 2.88714599609375, 3.0892333984375, 3.29132080078125, 3.493408203125, 3.69549560546875, 3.8975830078125, 4.09967041015625, 4.3017578125, 4.50384521484375, 4.7059326171875, 4.90802001953125, 5.110107421875, 5.31219482421875, 5.5142822265625, 5.71636962890625, 5.91845703125, 6.12054443359375, 6.3226318359375, 6.52471923828125, 6.726806640625, 6.92889404296875, 7.1309814453125, 7.33306884765625, 7.53515625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 4.0, 6.0, 4.0, 12.0, 16.0, 23.0, 27.0, 49.0, 75.0, 107.0, 190.0, 289.0, 432.0, 610.0, 983.0, 1531.0, 2238.0, 3490.0, 4872.0, 7490.0, 11368.0, 16852.0, 25797.0, 39578.0, 61426.0, 95631.0, 147396.0, 189593.0, 153337.0, 100035.0, 63772.0, 41387.0, 26777.0, 17890.0, 11686.0, 7831.0, 5283.0, 3629.0, 2390.0, 1632.0, 1004.0, 617.0, 425.0, 277.0, 194.0, 115.0, 82.0, 52.0, 31.0, 10.0, 8.0, 8.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.385498046875, -0.3732795715332031, -0.36106109619140625, -0.3488426208496094, -0.3366241455078125, -0.3244056701660156, -0.31218719482421875, -0.2999687194824219, -0.287750244140625, -0.2755317687988281, -0.26331329345703125, -0.2510948181152344, -0.2388763427734375, -0.22665786743164062, -0.21443939208984375, -0.20222091674804688, -0.19000244140625, -0.17778396606445312, -0.16556549072265625, -0.15334701538085938, -0.1411285400390625, -0.12891006469726562, -0.11669158935546875, -0.10447311401367188, -0.092254638671875, -0.08003616333007812, -0.06781768798828125, -0.055599212646484375, -0.0433807373046875, -0.031162261962890625, -0.01894378662109375, -0.006725311279296875, 0.0054931640625, 0.017711639404296875, 0.02993011474609375, 0.042148590087890625, 0.0543670654296875, 0.06658554077148438, 0.07880401611328125, 0.09102249145507812, 0.103240966796875, 0.11545944213867188, 0.12767791748046875, 0.13989639282226562, 0.1521148681640625, 0.16433334350585938, 0.17655181884765625, 0.18877029418945312, 0.20098876953125, 0.21320724487304688, 0.22542572021484375, 0.23764419555664062, 0.2498626708984375, 0.2620811462402344, 0.27429962158203125, 0.2865180969238281, 0.298736572265625, 0.3109550476074219, 0.32317352294921875, 0.3353919982910156, 0.3476104736328125, 0.3598289489746094, 0.37204742431640625, 0.3842658996582031, 0.396484375]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 8.0, 4.0, 8.0, 11.0, 8.0, 14.0, 16.0, 17.0, 20.0, 21.0, 29.0, 30.0, 28.0, 27.0, 30.0, 28.0, 32.0, 39.0, 34.0, 40.0, 1070.0, 33.0, 52.0, 37.0, 38.0, 32.0, 36.0, 31.0, 40.0, 37.0, 22.0, 21.0, 19.0, 31.0, 16.0, 7.0, 11.0, 7.0, 10.0, 9.0, 5.0, 6.0, 3.0, 3.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0], "bins": [-3.162109375, -3.0589599609375, -2.955810546875, -2.8526611328125, -2.74951171875, -2.6463623046875, -2.543212890625, -2.4400634765625, -2.3369140625, -2.2337646484375, -2.130615234375, -2.0274658203125, -1.92431640625, -1.8211669921875, -1.718017578125, -1.6148681640625, -1.51171875, -1.4085693359375, -1.305419921875, -1.2022705078125, -1.09912109375, -0.9959716796875, -0.892822265625, -0.7896728515625, -0.6865234375, -0.5833740234375, -0.480224609375, -0.3770751953125, -0.27392578125, -0.1707763671875, -0.067626953125, 0.0355224609375, 0.138671875, 0.2418212890625, 0.344970703125, 0.4481201171875, 0.55126953125, 0.6544189453125, 0.757568359375, 0.8607177734375, 0.9638671875, 1.0670166015625, 1.170166015625, 1.2733154296875, 1.37646484375, 1.4796142578125, 1.582763671875, 1.6859130859375, 1.7890625, 1.8922119140625, 1.995361328125, 2.0985107421875, 2.20166015625, 2.3048095703125, 2.407958984375, 2.5111083984375, 2.6142578125, 2.7174072265625, 2.820556640625, 2.9237060546875, 3.02685546875, 3.1300048828125, 3.233154296875, 3.3363037109375, 3.439453125]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 7.0, 10.0, 7.0, 12.0, 26.0, 36.0, 57.0, 62.0, 96.0, 150.0, 234.0, 333.0, 446.0, 734.0, 1195.0, 1797.0, 2792.0, 4224.0, 6683.0, 10502.0, 16838.0, 26446.0, 41941.0, 66105.0, 103386.0, 153481.0, 1232142.0, 150657.0, 100715.0, 64177.0, 40870.0, 25854.0, 16321.0, 10293.0, 6629.0, 4197.0, 2704.0, 1717.0, 1150.0, 690.0, 458.0, 310.0, 220.0, 159.0, 107.0, 55.0, 35.0, 25.0, 20.0, 11.0, 8.0, 6.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.34375, -0.3331642150878906, -0.32257843017578125, -0.3119926452636719, -0.3014068603515625, -0.2908210754394531, -0.28023529052734375, -0.2696495056152344, -0.259063720703125, -0.24847793579101562, -0.23789215087890625, -0.22730636596679688, -0.2167205810546875, -0.20613479614257812, -0.19554901123046875, -0.18496322631835938, -0.17437744140625, -0.16379165649414062, -0.15320587158203125, -0.14262008666992188, -0.1320343017578125, -0.12144851684570312, -0.11086273193359375, -0.10027694702148438, -0.089691162109375, -0.07910537719726562, -0.06851959228515625, -0.057933807373046875, -0.0473480224609375, -0.036762237548828125, -0.02617645263671875, -0.015590667724609375, -0.0050048828125, 0.005580902099609375, 0.01616668701171875, 0.026752471923828125, 0.0373382568359375, 0.047924041748046875, 0.05850982666015625, 0.06909561157226562, 0.079681396484375, 0.09026718139648438, 0.10085296630859375, 0.11143875122070312, 0.1220245361328125, 0.13261032104492188, 0.14319610595703125, 0.15378189086914062, 0.16436767578125, 0.17495346069335938, 0.18553924560546875, 0.19612503051757812, 0.2067108154296875, 0.21729660034179688, 0.22788238525390625, 0.23846817016601562, 0.249053955078125, 0.2596397399902344, 0.27022552490234375, 0.2808113098144531, 0.2913970947265625, 0.3019828796386719, 0.31256866455078125, 0.3231544494628906, 0.333740234375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 5.0, 4.0, 4.0, 5.0, 7.0, 20.0, 21.0, 16.0, 23.0, 36.0, 47.0, 57.0, 55.0, 59.0, 75.0, 70.0, 59.0, 66.0, 57.0, 54.0, 52.0, 48.0, 34.0, 27.0, 20.0, 24.0, 14.0, 12.0, 5.0, 5.0, 6.0, 4.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011997222900390625, -0.0011571943759918213, -0.00111466646194458, -0.0010721385478973389, -0.0010296106338500977, -0.0009870827198028564, -0.0009445548057556152, -0.000902026891708374, -0.0008594989776611328, -0.0008169710636138916, -0.0007744431495666504, -0.0007319152355194092, -0.000689387321472168, -0.0006468594074249268, -0.0006043314933776855, -0.0005618035793304443, -0.0005192756652832031, -0.0004767477512359619, -0.0004342198371887207, -0.0003916919231414795, -0.0003491640090942383, -0.00030663609504699707, -0.00026410818099975586, -0.00022158026695251465, -0.00017905235290527344, -0.00013652443885803223, -9.399652481079102e-05, -5.1468610763549805e-05, -8.940696716308594e-06, 3.358721733093262e-05, 7.611513137817383e-05, 0.00011864304542541504, 0.00016117095947265625, 0.00020369887351989746, 0.00024622678756713867, 0.0002887547016143799, 0.0003312826156616211, 0.0003738105297088623, 0.0004163384437561035, 0.0004588663578033447, 0.0005013942718505859, 0.0005439221858978271, 0.0005864500999450684, 0.0006289780139923096, 0.0006715059280395508, 0.000714033842086792, 0.0007565617561340332, 0.0007990896701812744, 0.0008416175842285156, 0.0008841454982757568, 0.000926673412322998, 0.0009692013263702393, 0.0010117292404174805, 0.0010542571544647217, 0.0010967850685119629, 0.001139312982559204, 0.0011818408966064453, 0.0012243688106536865, 0.0012668967247009277, 0.001309424638748169, 0.0013519525527954102, 0.0013944804668426514, 0.0014370083808898926, 0.0014795362949371338, 0.001522064208984375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 6.0, 6.0, 6.0, 9.0, 5.0, 15.0, 17.0, 35.0, 30.0, 43.0, 66.0, 100.0, 175.0, 335.0, 812.0, 19305.0, 1018849.0, 7211.0, 758.0, 274.0, 144.0, 116.0, 75.0, 40.0, 30.0, 33.0, 15.0, 18.0, 7.0, 5.0, 4.0, 4.0, 4.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.031890869140625, -0.0309906005859375, -0.03009033203125, -0.0291900634765625, -0.028289794921875, -0.0273895263671875, -0.0264892578125, -0.0255889892578125, -0.024688720703125, -0.0237884521484375, -0.02288818359375, -0.0219879150390625, -0.021087646484375, -0.0201873779296875, -0.019287109375, -0.0183868408203125, -0.017486572265625, -0.0165863037109375, -0.01568603515625, -0.0147857666015625, -0.013885498046875, -0.0129852294921875, -0.0120849609375, -0.0111846923828125, -0.010284423828125, -0.0093841552734375, -0.00848388671875, -0.0075836181640625, -0.006683349609375, -0.0057830810546875, -0.0048828125, -0.0039825439453125, -0.003082275390625, -0.0021820068359375, -0.00128173828125, -0.0003814697265625, 0.000518798828125, 0.0014190673828125, 0.0023193359375, 0.0032196044921875, 0.004119873046875, 0.0050201416015625, 0.00592041015625, 0.0068206787109375, 0.007720947265625, 0.0086212158203125, 0.009521484375, 0.0104217529296875, 0.011322021484375, 0.0122222900390625, 0.01312255859375, 0.0140228271484375, 0.014923095703125, 0.0158233642578125, 0.0167236328125, 0.0176239013671875, 0.018524169921875, 0.0194244384765625, 0.02032470703125, 0.0212249755859375, 0.022125244140625, 0.0230255126953125, 0.02392578125, 0.0248260498046875, 0.025726318359375]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 8.0, 12.0, 19.0, 54.0, 90.0, 120.0, 189.0, 155.0, 153.0, 109.0, 55.0, 22.0, 12.0, 10.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0015501226298511028, -0.0015071295201778412, -0.0014641364105045795, -0.001421143300831318, -0.0013781501911580563, -0.0013351570814847946, -0.001292163971811533, -0.0012491707457229495, -0.0012061776360496879, -0.0011631845263764262, -0.0011201914167031646, -0.001077198307029903, -0.0010342051973566413, -0.0009912119712680578, -0.0009482189198024571, -0.0009052257519215345, -0.0008622327004559338, -0.0008192395907826722, -0.0007762464811094105, -0.000733253313228488, -0.0006902602035552263, -0.0006472670938819647, -0.000604273984208703, -0.0005612808745354414, -0.0005182877648621798, -0.0004752946551889181, -0.000432301516411826, -0.00038930840673856437, -0.00034631526796147227, -0.00030332215828821063, -0.000260329048614949, -0.0002173359098378569, -0.0001743427710607648, -0.00013134964683558792, -8.835652988636866e-05, -4.5363412937149405e-05, -2.3702887119725347e-06, 4.0622835513204336e-05, 8.361594518646598e-05, 0.00012660908396355808, 0.00016960219363681972, 0.0002125953178619966, 0.00025558844208717346, 0.0002985815517604351, 0.00034157466143369675, 0.00038456780021078885, 0.0004275609098840505, 0.0004705540486611426, 0.0005135471583344042, 0.0005565402680076659, 0.0005995333776809275, 0.0006425265455618501, 0.0006855196552351117, 0.0007285127649083734, 0.000771505874581635, 0.0008144989842548966, 0.0008574920939281583, 0.0009004852036014199, 0.0009434783132746816, 0.0009864714229479432, 0.0010294645326212049, 0.0010724577587097883, 0.00111545086838305, 0.0011584439780563116, 0.0012014370877295732]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 2.0, 7.0, 3.0, 11.0, 7.0, 12.0, 8.0, 14.0, 11.0, 21.0, 19.0, 24.0, 29.0, 27.0, 27.0, 41.0, 28.0, 41.0, 48.0, 32.0, 36.0, 33.0, 46.0, 31.0, 49.0, 41.0, 37.0, 31.0, 39.0, 22.0, 29.0, 28.0, 20.0, 27.0, 25.0, 20.0, 13.0, 16.0, 4.0, 8.0, 11.0, 7.0, 7.0, 3.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0005868673324584961, -0.000568232499063015, -0.0005495976656675339, -0.0005309628322720528, -0.0005123279988765717, -0.0004936931654810905, -0.00047505833208560944, -0.0004564234986901283, -0.0004377886652946472, -0.0004191538318991661, -0.000400518998503685, -0.0003818841651082039, -0.0003632493317127228, -0.00034461449831724167, -0.00032597966492176056, -0.00030734483152627945, -0.00028870999813079834, -0.00027007516473531723, -0.0002514403313398361, -0.000232805497944355, -0.0002141706645488739, -0.0001955358311533928, -0.00017690099775791168, -0.00015826616436243057, -0.00013963133096694946, -0.00012099649757146835, -0.00010236166417598724, -8.372683078050613e-05, -6.509199738502502e-05, -4.6457163989543915e-05, -2.7822330594062805e-05, -9.187497198581696e-06, 9.447336196899414e-06, 2.8082169592380524e-05, 4.671700298786163e-05, 6.535183638334274e-05, 8.398666977882385e-05, 0.00010262150317430496, 0.00012125633656978607, 0.00013989116996526718, 0.0001585260033607483, 0.0001771608367562294, 0.0001957956701517105, 0.00021443050354719162, 0.00023306533694267273, 0.00025170017033815384, 0.00027033500373363495, 0.00028896983712911606, 0.00030760467052459717, 0.0003262395039200783, 0.0003448743373155594, 0.0003635091707110405, 0.0003821440041065216, 0.0004007788375020027, 0.0004194136708974838, 0.00043804850429296494, 0.00045668333768844604, 0.00047531817108392715, 0.0004939530044794083, 0.0005125878378748894, 0.0005312226712703705, 0.0005498575046658516, 0.0005684923380613327, 0.0005871271714568138, 0.0006057620048522949]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 7.0, 10.0, 10.0, 17.0, 23.0, 22.0, 23.0, 22.0, 40.0, 49.0, 34.0, 45.0, 52.0, 54.0, 62.0, 38.0, 50.0, 55.0, 51.0, 41.0, 49.0, 43.0, 37.0, 26.0, 23.0, 20.0, 16.0, 21.0, 13.0, 16.0, 9.0, 5.0, 6.0, 3.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3984375, -5.19635009765625, -4.9942626953125, -4.79217529296875, -4.590087890625, -4.38800048828125, -4.1859130859375, -3.98382568359375, -3.78173828125, -3.57965087890625, -3.3775634765625, -3.17547607421875, -2.973388671875, -2.77130126953125, -2.5692138671875, -2.36712646484375, -2.1650390625, -1.96295166015625, -1.7608642578125, -1.55877685546875, -1.356689453125, -1.15460205078125, -0.9525146484375, -0.75042724609375, -0.54833984375, -0.34625244140625, -0.1441650390625, 0.05792236328125, 0.260009765625, 0.46209716796875, 0.6641845703125, 0.86627197265625, 1.068359375, 1.27044677734375, 1.4725341796875, 1.67462158203125, 1.876708984375, 2.07879638671875, 2.2808837890625, 2.48297119140625, 2.68505859375, 2.88714599609375, 3.0892333984375, 3.29132080078125, 3.493408203125, 3.69549560546875, 3.8975830078125, 4.09967041015625, 4.3017578125, 4.50384521484375, 4.7059326171875, 4.90802001953125, 5.110107421875, 5.31219482421875, 5.5142822265625, 5.71636962890625, 5.91845703125, 6.12054443359375, 6.3226318359375, 6.52471923828125, 6.726806640625, 6.92889404296875, 7.1309814453125, 7.33306884765625, 7.53515625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 12.0, 10.0, 16.0, 20.0, 19.0, 39.0, 47.0, 81.0, 123.0, 232.0, 339.0, 632.0, 1290.0, 2653.0, 5845.0, 12352.0, 25966.0, 52539.0, 105078.0, 215558.0, 309392.0, 161488.0, 78577.0, 39557.0, 19122.0, 9159.0, 4201.0, 1960.0, 960.0, 488.0, 287.0, 152.0, 118.0, 64.0, 51.0, 23.0, 27.0, 21.0, 9.0, 4.0, 10.0, 5.0, 5.0, 3.0, 6.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.85546875, -4.7003173828125, -4.545166015625, -4.3900146484375, -4.23486328125, -4.0797119140625, -3.924560546875, -3.7694091796875, -3.6142578125, -3.4591064453125, -3.303955078125, -3.1488037109375, -2.99365234375, -2.8385009765625, -2.683349609375, -2.5281982421875, -2.373046875, -2.2178955078125, -2.062744140625, -1.9075927734375, -1.75244140625, -1.5972900390625, -1.442138671875, -1.2869873046875, -1.1318359375, -0.9766845703125, -0.821533203125, -0.6663818359375, -0.51123046875, -0.3560791015625, -0.200927734375, -0.0457763671875, 0.109375, 0.2645263671875, 0.419677734375, 0.5748291015625, 0.72998046875, 0.8851318359375, 1.040283203125, 1.1954345703125, 1.3505859375, 1.5057373046875, 1.660888671875, 1.8160400390625, 1.97119140625, 2.1263427734375, 2.281494140625, 2.4366455078125, 2.591796875, 2.7469482421875, 2.902099609375, 3.0572509765625, 3.21240234375, 3.3675537109375, 3.522705078125, 3.6778564453125, 3.8330078125, 3.9881591796875, 4.143310546875, 4.2984619140625, 4.45361328125, 4.6087646484375, 4.763916015625, 4.9190673828125, 5.07421875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 6.0, 5.0, 3.0, 5.0, 7.0, 17.0, 12.0, 10.0, 23.0, 12.0, 13.0, 13.0, 21.0, 26.0, 31.0, 32.0, 27.0, 33.0, 38.0, 46.0, 65.0, 141.0, 314.0, 1400.0, 213.0, 84.0, 69.0, 49.0, 38.0, 30.0, 28.0, 18.0, 20.0, 24.0, 27.0, 24.0, 26.0, 18.0, 15.0, 13.0, 13.0, 7.0, 8.0, 5.0, 4.0, 5.0, 2.0, 4.0, 5.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0], "bins": [-16.015625, -15.517822265625, -15.02001953125, -14.522216796875, -14.0244140625, -13.526611328125, -13.02880859375, -12.531005859375, -12.033203125, -11.535400390625, -11.03759765625, -10.539794921875, -10.0419921875, -9.544189453125, -9.04638671875, -8.548583984375, -8.05078125, -7.552978515625, -7.05517578125, -6.557373046875, -6.0595703125, -5.561767578125, -5.06396484375, -4.566162109375, -4.068359375, -3.570556640625, -3.07275390625, -2.574951171875, -2.0771484375, -1.579345703125, -1.08154296875, -0.583740234375, -0.0859375, 0.411865234375, 0.90966796875, 1.407470703125, 1.9052734375, 2.403076171875, 2.90087890625, 3.398681640625, 3.896484375, 4.394287109375, 4.89208984375, 5.389892578125, 5.8876953125, 6.385498046875, 6.88330078125, 7.381103515625, 7.87890625, 8.376708984375, 8.87451171875, 9.372314453125, 9.8701171875, 10.367919921875, 10.86572265625, 11.363525390625, 11.861328125, 12.359130859375, 12.85693359375, 13.354736328125, 13.8525390625, 14.350341796875, 14.84814453125, 15.345947265625, 15.84375]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 5.0, 4.0, 9.0, 10.0, 3.0, 9.0, 8.0, 24.0, 23.0, 25.0, 39.0, 51.0, 68.0, 94.0, 137.0, 163.0, 289.0, 550.0, 2160.0, 1708812.0, 1429722.0, 2092.0, 543.0, 273.0, 175.0, 113.0, 68.0, 57.0, 53.0, 36.0, 27.0, 18.0, 13.0, 16.0, 8.0, 5.0, 7.0, 6.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.8125, -44.861328125, -42.91015625, -40.958984375, -39.0078125, -37.056640625, -35.10546875, -33.154296875, -31.203125, -29.251953125, -27.30078125, -25.349609375, -23.3984375, -21.447265625, -19.49609375, -17.544921875, -15.59375, -13.642578125, -11.69140625, -9.740234375, -7.7890625, -5.837890625, -3.88671875, -1.935546875, 0.015625, 1.966796875, 3.91796875, 5.869140625, 7.8203125, 9.771484375, 11.72265625, 13.673828125, 15.625, 17.576171875, 19.52734375, 21.478515625, 23.4296875, 25.380859375, 27.33203125, 29.283203125, 31.234375, 33.185546875, 35.13671875, 37.087890625, 39.0390625, 40.990234375, 42.94140625, 44.892578125, 46.84375, 48.794921875, 50.74609375, 52.697265625, 54.6484375, 56.599609375, 58.55078125, 60.501953125, 62.453125, 64.404296875, 66.35546875, 68.306640625, 70.2578125, 72.208984375, 74.16015625, 76.111328125, 78.0625]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 99.0, 563.0, 328.0, 22.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-266.1383972167969, -261.0935363769531, -256.04864501953125, -251.00376892089844, -245.95889282226562, -240.9140167236328, -235.869140625, -230.82427978515625, -225.77938842773438, -220.73451232910156, -215.68963623046875, -210.64476013183594, -205.59988403320312, -200.5550079345703, -195.5101318359375, -190.46527099609375, -185.42039489746094, -180.37551879882812, -175.3306427001953, -170.2857666015625, -165.2408905029297, -160.19601440429688, -155.15113830566406, -150.10626220703125, -145.0614013671875, -140.0165252685547, -134.97164916992188, -129.92677307128906, -124.88189697265625, -119.83702087402344, -114.79215240478516, -109.74727630615234, -104.70240020751953, -99.65752410888672, -94.6126480102539, -89.5677719116211, -84.52290344238281, -79.47802734375, -74.43315124511719, -69.38827514648438, -64.34339904785156, -59.29852294921875, -54.25364685058594, -49.20877456665039, -44.16389846801758, -39.119022369384766, -34.07415008544922, -29.029273986816406, -23.984397888183594, -18.93952178955078, -13.894647598266602, -8.849772453308105, -3.8048973083496094, 1.2399787902832031, 6.284852981567383, 11.329727172851562, 16.374603271484375, 21.419479370117188, 26.464353561401367, 31.509227752685547, 36.55410385131836, 41.59897994995117, 46.64385223388672, 51.68872833251953, 56.733604431152344]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 4.0, 6.0, 4.0, 5.0, 11.0, 9.0, 11.0, 17.0, 13.0, 19.0, 16.0, 18.0, 22.0, 19.0, 23.0, 26.0, 34.0, 39.0, 53.0, 39.0, 35.0, 47.0, 36.0, 38.0, 36.0, 39.0, 53.0, 48.0, 23.0, 41.0, 31.0, 20.0, 19.0, 19.0, 25.0, 17.0, 14.0, 13.0, 11.0, 13.0, 8.0, 7.0, 3.0, 5.0, 5.0, 1.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-53.213050842285156, -51.61225891113281, -50.01146697998047, -48.41067886352539, -46.80988693237305, -45.2090950012207, -43.608306884765625, -42.00751495361328, -40.40672302246094, -38.805931091308594, -37.20513916015625, -35.60435104370117, -34.00355911254883, -32.402767181396484, -30.801977157592773, -29.201187133789062, -27.60039520263672, -25.999603271484375, -24.398813247680664, -22.798023223876953, -21.19723129272461, -19.596439361572266, -17.995649337768555, -16.394859313964844, -14.7940673828125, -13.193276405334473, -11.592485427856445, -9.991694450378418, -8.39090347290039, -6.790112495422363, -5.189321517944336, -3.5885305404663086, -1.9877395629882812, -0.3869485855102539, 1.2138423919677734, 2.814633369445801, 4.415424346923828, 6.0162153244018555, 7.617006301879883, 9.21779727935791, 10.818588256835938, 12.419379234313965, 14.020170211791992, 15.62096118927002, 17.221752166748047, 18.82254409790039, 20.4233341217041, 22.024124145507812, 23.624916076660156, 25.2257080078125, 26.82649803161621, 28.427288055419922, 30.028079986572266, 31.62887191772461, 33.22966003417969, 34.83045196533203, 36.431243896484375, 38.03203582763672, 39.63282775878906, 41.23361587524414, 42.834407806396484, 44.43519973754883, 46.035987854003906, 47.63677978515625, 49.237571716308594]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0, 5.0, 8.0, 11.0, 14.0, 17.0, 17.0, 15.0, 26.0, 30.0, 28.0, 24.0, 40.0, 43.0, 43.0, 44.0, 42.0, 46.0, 58.0, 40.0, 46.0, 63.0, 47.0, 45.0, 33.0, 37.0, 31.0, 19.0, 20.0, 23.0, 21.0, 14.0, 14.0, 13.0, 7.0, 8.0, 4.0, 7.0, 3.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.97265625, -5.75830078125, -5.5439453125, -5.32958984375, -5.115234375, -4.90087890625, -4.6865234375, -4.47216796875, -4.2578125, -4.04345703125, -3.8291015625, -3.61474609375, -3.400390625, -3.18603515625, -2.9716796875, -2.75732421875, -2.54296875, -2.32861328125, -2.1142578125, -1.89990234375, -1.685546875, -1.47119140625, -1.2568359375, -1.04248046875, -0.828125, -0.61376953125, -0.3994140625, -0.18505859375, 0.029296875, 0.24365234375, 0.4580078125, 0.67236328125, 0.88671875, 1.10107421875, 1.3154296875, 1.52978515625, 1.744140625, 1.95849609375, 2.1728515625, 2.38720703125, 2.6015625, 2.81591796875, 3.0302734375, 3.24462890625, 3.458984375, 3.67333984375, 3.8876953125, 4.10205078125, 4.31640625, 4.53076171875, 4.7451171875, 4.95947265625, 5.173828125, 5.38818359375, 5.6025390625, 5.81689453125, 6.03125, 6.24560546875, 6.4599609375, 6.67431640625, 6.888671875, 7.10302734375, 7.3173828125, 7.53173828125, 7.74609375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 7.0, 3.0, 5.0, 8.0, 11.0, 15.0, 13.0, 17.0, 31.0, 29.0, 35.0, 53.0, 47.0, 105.0, 241.0, 580.0, 2709.0, 46087.0, 1527712.0, 2497631.0, 112881.0, 4543.0, 860.0, 264.0, 122.0, 61.0, 52.0, 18.0, 28.0, 23.0, 21.0, 16.0, 11.0, 11.0, 10.0, 8.0, 9.0, 5.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.90625, -17.26953125, -16.6328125, -15.99609375, -15.359375, -14.72265625, -14.0859375, -13.44921875, -12.8125, -12.17578125, -11.5390625, -10.90234375, -10.265625, -9.62890625, -8.9921875, -8.35546875, -7.71875, -7.08203125, -6.4453125, -5.80859375, -5.171875, -4.53515625, -3.8984375, -3.26171875, -2.625, -1.98828125, -1.3515625, -0.71484375, -0.078125, 0.55859375, 1.1953125, 1.83203125, 2.46875, 3.10546875, 3.7421875, 4.37890625, 5.015625, 5.65234375, 6.2890625, 6.92578125, 7.5625, 8.19921875, 8.8359375, 9.47265625, 10.109375, 10.74609375, 11.3828125, 12.01953125, 12.65625, 13.29296875, 13.9296875, 14.56640625, 15.203125, 15.83984375, 16.4765625, 17.11328125, 17.75, 18.38671875, 19.0234375, 19.66015625, 20.296875, 20.93359375, 21.5703125, 22.20703125, 22.84375]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 8.0, 6.0, 9.0, 15.0, 26.0, 27.0, 47.0, 67.0, 119.0, 191.0, 287.0, 382.0, 505.0, 599.0, 532.0, 411.0, 275.0, 209.0, 145.0, 93.0, 45.0, 34.0, 16.0, 19.0, 6.0, 4.0, 5.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.890625, -9.356689453125, -8.82275390625, -8.288818359375, -7.7548828125, -7.220947265625, -6.68701171875, -6.153076171875, -5.619140625, -5.085205078125, -4.55126953125, -4.017333984375, -3.4833984375, -2.949462890625, -2.41552734375, -1.881591796875, -1.34765625, -0.813720703125, -0.27978515625, 0.254150390625, 0.7880859375, 1.322021484375, 1.85595703125, 2.389892578125, 2.923828125, 3.457763671875, 3.99169921875, 4.525634765625, 5.0595703125, 5.593505859375, 6.12744140625, 6.661376953125, 7.1953125, 7.729248046875, 8.26318359375, 8.797119140625, 9.3310546875, 9.864990234375, 10.39892578125, 10.932861328125, 11.466796875, 12.000732421875, 12.53466796875, 13.068603515625, 13.6025390625, 14.136474609375, 14.67041015625, 15.204345703125, 15.73828125, 16.272216796875, 16.80615234375, 17.340087890625, 17.8740234375, 18.407958984375, 18.94189453125, 19.475830078125, 20.009765625, 20.543701171875, 21.07763671875, 21.611572265625, 22.1455078125, 22.679443359375, 23.21337890625, 23.747314453125, 24.28125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 5.0, 11.0, 25.0, 23.0, 42.0, 72.0, 127.0, 212.0, 285.0, 520.0, 1786.0, 320612.0, 3854938.0, 13749.0, 869.0, 373.0, 257.0, 148.0, 79.0, 51.0, 29.0, 29.0, 19.0, 13.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-80.9375, -79.1494140625, -77.361328125, -75.5732421875, -73.78515625, -71.9970703125, -70.208984375, -68.4208984375, -66.6328125, -64.8447265625, -63.056640625, -61.2685546875, -59.48046875, -57.6923828125, -55.904296875, -54.1162109375, -52.328125, -50.5400390625, -48.751953125, -46.9638671875, -45.17578125, -43.3876953125, -41.599609375, -39.8115234375, -38.0234375, -36.2353515625, -34.447265625, -32.6591796875, -30.87109375, -29.0830078125, -27.294921875, -25.5068359375, -23.71875, -21.9306640625, -20.142578125, -18.3544921875, -16.56640625, -14.7783203125, -12.990234375, -11.2021484375, -9.4140625, -7.6259765625, -5.837890625, -4.0498046875, -2.26171875, -0.4736328125, 1.314453125, 3.1025390625, 4.890625, 6.6787109375, 8.466796875, 10.2548828125, 12.04296875, 13.8310546875, 15.619140625, 17.4072265625, 19.1953125, 20.9833984375, 22.771484375, 24.5595703125, 26.34765625, 28.1357421875, 29.923828125, 31.7119140625, 33.5]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 11.0, 56.0, 237.0, 398.0, 237.0, 64.0, 12.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-129.09307861328125, -123.33033752441406, -117.5676040649414, -111.80486297607422, -106.04212951660156, -100.27938842773438, -94.51664733886719, -88.75391387939453, -82.99118041992188, -77.22843933105469, -71.46570587158203, -65.70296478271484, -59.94023132324219, -54.177490234375, -48.41475296020508, -42.652015686035156, -36.88927459716797, -31.126537322998047, -25.363800048828125, -19.60106086730957, -13.838323593139648, -8.075586318969727, -2.312847137451172, 3.44989013671875, 9.212627410888672, 14.975364685058594, 20.738101959228516, 26.50084114074707, 32.263580322265625, 38.02631378173828, 43.78905487060547, 49.55179214477539, 55.31452941894531, 61.077266693115234, 66.84000396728516, 72.60274505615234, 78.365478515625, 84.12821960449219, 89.89096069335938, 95.65369415283203, 101.41642761230469, 107.17916870117188, 112.94190216064453, 118.70464324951172, 124.46737670898438, 130.23011779785156, 135.99285888671875, 141.75558471679688, 147.51834106445312, 153.2810821533203, 159.0438232421875, 164.80654907226562, 170.5692901611328, 176.33203125, 182.0947723388672, 187.85751342773438, 193.6202392578125, 199.3829803466797, 205.14572143554688, 210.908447265625, 216.6711883544922, 222.43392944335938, 228.19667053222656, 233.95941162109375, 239.72213745117188]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 6.0, 3.0, 4.0, 1.0, 8.0, 8.0, 9.0, 9.0, 9.0, 19.0, 12.0, 31.0, 32.0, 28.0, 32.0, 33.0, 51.0, 33.0, 39.0, 50.0, 41.0, 31.0, 37.0, 45.0, 38.0, 47.0, 34.0, 48.0, 36.0, 29.0, 33.0, 19.0, 31.0, 13.0, 16.0, 20.0, 18.0, 16.0, 6.0, 12.0, 4.0, 7.0, 2.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.532196044921875, -39.180660247802734, -37.82912063598633, -36.47758483886719, -35.12604904174805, -33.774513244628906, -32.4229736328125, -31.07143783569336, -29.719900131225586, -28.368362426757812, -27.016826629638672, -25.6652889251709, -24.313751220703125, -22.962215423583984, -21.61067771911621, -20.259140014648438, -18.907604217529297, -17.556066513061523, -16.204530715942383, -14.85299301147461, -13.501456260681152, -12.149919509887695, -10.798381805419922, -9.446845054626465, -8.095308303833008, -6.743771553039551, -5.3922343254089355, -4.04069709777832, -2.6891603469848633, -1.3376235961914062, 0.013914108276367188, 1.3654508590698242, 2.7169837951660156, 4.068520545959473, 5.420057773590088, 6.771595001220703, 8.12313175201416, 9.474668502807617, 10.82620620727539, 12.177742958068848, 13.529279708862305, 14.880816459655762, 16.23235321044922, 17.583890914916992, 18.935428619384766, 20.286964416503906, 21.63850212097168, 22.990039825439453, 24.341575622558594, 25.693113327026367, 27.044649124145508, 28.39618682861328, 29.747722625732422, 31.099260330200195, 32.45079803466797, 33.80233383178711, 35.15386962890625, 36.50540542602539, 37.8569450378418, 39.20848083496094, 40.56001663208008, 41.91155242919922, 43.263092041015625, 44.614627838134766, 45.96616744995117]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 8.0, 8.0, 16.0, 9.0, 14.0, 28.0, 20.0, 25.0, 30.0, 32.0, 24.0, 45.0, 39.0, 44.0, 49.0, 44.0, 39.0, 54.0, 58.0, 56.0, 50.0, 51.0, 32.0, 29.0, 30.0, 19.0, 33.0, 21.0, 28.0, 13.0, 12.0, 10.0, 8.0, 9.0, 9.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.390625, -5.1854248046875, -4.980224609375, -4.7750244140625, -4.56982421875, -4.3646240234375, -4.159423828125, -3.9542236328125, -3.7490234375, -3.5438232421875, -3.338623046875, -3.1334228515625, -2.92822265625, -2.7230224609375, -2.517822265625, -2.3126220703125, -2.107421875, -1.9022216796875, -1.697021484375, -1.4918212890625, -1.28662109375, -1.0814208984375, -0.876220703125, -0.6710205078125, -0.4658203125, -0.2606201171875, -0.055419921875, 0.1497802734375, 0.35498046875, 0.5601806640625, 0.765380859375, 0.9705810546875, 1.17578125, 1.3809814453125, 1.586181640625, 1.7913818359375, 1.99658203125, 2.2017822265625, 2.406982421875, 2.6121826171875, 2.8173828125, 3.0225830078125, 3.227783203125, 3.4329833984375, 3.63818359375, 3.8433837890625, 4.048583984375, 4.2537841796875, 4.458984375, 4.6641845703125, 4.869384765625, 5.0745849609375, 5.27978515625, 5.4849853515625, 5.690185546875, 5.8953857421875, 6.1005859375, 6.3057861328125, 6.510986328125, 6.7161865234375, 6.92138671875, 7.1265869140625, 7.331787109375, 7.5369873046875, 7.7421875]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 8.0, 18.0, 10.0, 17.0, 30.0, 42.0, 57.0, 94.0, 105.0, 183.0, 267.0, 386.0, 584.0, 902.0, 1338.0, 2002.0, 2987.0, 4740.0, 7309.0, 11482.0, 17703.0, 28207.0, 44224.0, 69633.0, 107740.0, 163500.0, 191305.0, 140936.0, 90954.0, 58243.0, 37265.0, 23647.0, 15017.0, 9546.0, 6231.0, 3957.0, 2648.0, 1691.0, 1227.0, 759.0, 509.0, 331.0, 237.0, 159.0, 102.0, 68.0, 60.0, 38.0, 20.0, 17.0, 13.0, 8.0, 4.0, 4.0, 1.0, 0.0, 2.0], "bins": [-0.43798828125, -0.4248390197753906, -0.41168975830078125, -0.3985404968261719, -0.3853912353515625, -0.3722419738769531, -0.35909271240234375, -0.3459434509277344, -0.332794189453125, -0.3196449279785156, -0.30649566650390625, -0.2933464050292969, -0.2801971435546875, -0.2670478820800781, -0.25389862060546875, -0.24074935913085938, -0.22760009765625, -0.21445083618164062, -0.20130157470703125, -0.18815231323242188, -0.1750030517578125, -0.16185379028320312, -0.14870452880859375, -0.13555526733398438, -0.122406005859375, -0.10925674438476562, -0.09610748291015625, -0.08295822143554688, -0.0698089599609375, -0.056659698486328125, -0.04351043701171875, -0.030361175537109375, -0.0172119140625, -0.004062652587890625, 0.00908660888671875, 0.022235870361328125, 0.0353851318359375, 0.048534393310546875, 0.06168365478515625, 0.07483291625976562, 0.087982177734375, 0.10113143920898438, 0.11428070068359375, 0.12742996215820312, 0.1405792236328125, 0.15372848510742188, 0.16687774658203125, 0.18002700805664062, 0.19317626953125, 0.20632553100585938, 0.21947479248046875, 0.23262405395507812, 0.2457733154296875, 0.2589225769042969, 0.27207183837890625, 0.2852210998535156, 0.298370361328125, 0.3115196228027344, 0.32466888427734375, 0.3378181457519531, 0.3509674072265625, 0.3641166687011719, 0.37726593017578125, 0.3904151916503906, 0.403564453125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 6.0, 4.0, 8.0, 12.0, 10.0, 16.0, 20.0, 27.0, 19.0, 26.0, 29.0, 38.0, 34.0, 32.0, 40.0, 30.0, 42.0, 48.0, 47.0, 1068.0, 33.0, 36.0, 46.0, 39.0, 44.0, 40.0, 26.0, 31.0, 26.0, 19.0, 23.0, 17.0, 16.0, 10.0, 13.0, 8.0, 4.0, 4.0, 13.0, 8.0, 5.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.9140625, -3.798187255859375, -3.68231201171875, -3.566436767578125, -3.4505615234375, -3.334686279296875, -3.21881103515625, -3.102935791015625, -2.987060546875, -2.871185302734375, -2.75531005859375, -2.639434814453125, -2.5235595703125, -2.407684326171875, -2.29180908203125, -2.175933837890625, -2.06005859375, -1.944183349609375, -1.82830810546875, -1.712432861328125, -1.5965576171875, -1.480682373046875, -1.36480712890625, -1.248931884765625, -1.133056640625, -1.017181396484375, -0.90130615234375, -0.785430908203125, -0.6695556640625, -0.553680419921875, -0.43780517578125, -0.321929931640625, -0.2060546875, -0.090179443359375, 0.02569580078125, 0.141571044921875, 0.2574462890625, 0.373321533203125, 0.48919677734375, 0.605072021484375, 0.720947265625, 0.836822509765625, 0.95269775390625, 1.068572998046875, 1.1844482421875, 1.300323486328125, 1.41619873046875, 1.532073974609375, 1.64794921875, 1.763824462890625, 1.87969970703125, 1.995574951171875, 2.1114501953125, 2.227325439453125, 2.34320068359375, 2.459075927734375, 2.574951171875, 2.690826416015625, 2.80670166015625, 2.922576904296875, 3.0384521484375, 3.154327392578125, 3.27020263671875, 3.386077880859375, 3.501953125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 6.0, 7.0, 8.0, 9.0, 23.0, 29.0, 59.0, 72.0, 114.0, 177.0, 253.0, 407.0, 647.0, 977.0, 1437.0, 2273.0, 3567.0, 5518.0, 8478.0, 12817.0, 19913.0, 30633.0, 46954.0, 71823.0, 108964.0, 151333.0, 1216983.0, 138771.0, 96053.0, 62833.0, 40628.0, 26457.0, 17178.0, 11107.0, 7210.0, 4696.0, 3120.0, 1983.0, 1235.0, 878.0, 546.0, 331.0, 224.0, 148.0, 82.0, 63.0, 52.0, 20.0, 14.0, 7.0, 8.0, 9.0, 2.0, 5.0, 1.0, 2.0], "bins": [-0.37451171875, -0.3636054992675781, -0.35269927978515625, -0.3417930603027344, -0.3308868408203125, -0.3199806213378906, -0.30907440185546875, -0.2981681823730469, -0.287261962890625, -0.2763557434082031, -0.26544952392578125, -0.2545433044433594, -0.2436370849609375, -0.23273086547851562, -0.22182464599609375, -0.21091842651367188, -0.20001220703125, -0.18910598754882812, -0.17819976806640625, -0.16729354858398438, -0.1563873291015625, -0.14548110961914062, -0.13457489013671875, -0.12366867065429688, -0.112762451171875, -0.10185623168945312, -0.09095001220703125, -0.08004379272460938, -0.0691375732421875, -0.058231353759765625, -0.04732513427734375, -0.036418914794921875, -0.0255126953125, -0.014606475830078125, -0.00370025634765625, 0.007205963134765625, 0.0181121826171875, 0.029018402099609375, 0.03992462158203125, 0.050830841064453125, 0.061737060546875, 0.07264328002929688, 0.08354949951171875, 0.09445571899414062, 0.1053619384765625, 0.11626815795898438, 0.12717437744140625, 0.13808059692382812, 0.14898681640625, 0.15989303588867188, 0.17079925537109375, 0.18170547485351562, 0.1926116943359375, 0.20351791381835938, 0.21442413330078125, 0.22533035278320312, 0.236236572265625, 0.24714279174804688, 0.25804901123046875, 0.2689552307128906, 0.2798614501953125, 0.2907676696777344, 0.30167388916015625, 0.3125801086425781, 0.323486328125]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 4.0, 7.0, 7.0, 10.0, 10.0, 20.0, 15.0, 26.0, 28.0, 32.0, 44.0, 46.0, 52.0, 58.0, 68.0, 60.0, 59.0, 61.0, 61.0, 59.0, 48.0, 53.0, 46.0, 30.0, 25.0, 16.0, 20.0, 13.0, 9.0, 8.0, 9.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013570785522460938, -0.0013088881969451904, -0.0012606978416442871, -0.0012125074863433838, -0.0011643171310424805, -0.0011161267757415771, -0.0010679364204406738, -0.0010197460651397705, -0.0009715557098388672, -0.0009233653545379639, -0.0008751749992370605, -0.0008269846439361572, -0.0007787942886352539, -0.0007306039333343506, -0.0006824135780334473, -0.0006342232227325439, -0.0005860328674316406, -0.0005378425121307373, -0.000489652156829834, -0.00044146180152893066, -0.00039327144622802734, -0.000345081090927124, -0.0002968907356262207, -0.0002487003803253174, -0.00020051002502441406, -0.00015231966972351074, -0.00010412931442260742, -5.59389591217041e-05, -7.748603820800781e-06, 4.044175148010254e-05, 8.863210678100586e-05, 0.00013682246208190918, 0.0001850128173828125, 0.00023320317268371582, 0.00028139352798461914, 0.00032958388328552246, 0.0003777742385864258, 0.0004259645938873291, 0.0004741549491882324, 0.0005223453044891357, 0.0005705356597900391, 0.0006187260150909424, 0.0006669163703918457, 0.000715106725692749, 0.0007632970809936523, 0.0008114874362945557, 0.000859677791595459, 0.0009078681468963623, 0.0009560585021972656, 0.001004248857498169, 0.0010524392127990723, 0.0011006295680999756, 0.001148819923400879, 0.0011970102787017822, 0.0012452006340026855, 0.0012933909893035889, 0.0013415813446044922, 0.0013897716999053955, 0.0014379620552062988, 0.0014861524105072021, 0.0015343427658081055, 0.0015825331211090088, 0.0016307234764099121, 0.0016789138317108154, 0.0017271041870117188]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 2.0, 8.0, 16.0, 20.0, 17.0, 31.0, 32.0, 56.0, 85.0, 109.0, 171.0, 373.0, 1184.0, 462298.0, 581960.0, 1230.0, 416.0, 169.0, 102.0, 66.0, 63.0, 44.0, 27.0, 17.0, 18.0, 11.0, 6.0, 10.0, 4.0, 7.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.03790283203125, -0.036820411682128906, -0.03573799133300781, -0.03465557098388672, -0.033573150634765625, -0.03249073028564453, -0.03140830993652344, -0.030325889587402344, -0.02924346923828125, -0.028161048889160156, -0.027078628540039062, -0.02599620819091797, -0.024913787841796875, -0.02383136749267578, -0.022748947143554688, -0.021666526794433594, -0.0205841064453125, -0.019501686096191406, -0.018419265747070312, -0.01733684539794922, -0.016254425048828125, -0.015172004699707031, -0.014089584350585938, -0.013007164001464844, -0.01192474365234375, -0.010842323303222656, -0.009759902954101562, -0.008677482604980469, -0.007595062255859375, -0.006512641906738281, -0.0054302215576171875, -0.004347801208496094, -0.003265380859375, -0.0021829605102539062, -0.0011005401611328125, -1.811981201171875e-05, 0.001064300537109375, 0.0021467208862304688, 0.0032291412353515625, 0.004311561584472656, 0.00539398193359375, 0.006476402282714844, 0.0075588226318359375, 0.008641242980957031, 0.009723663330078125, 0.010806083679199219, 0.011888504028320312, 0.012970924377441406, 0.0140533447265625, 0.015135765075683594, 0.016218185424804688, 0.01730060577392578, 0.018383026123046875, 0.01946544647216797, 0.020547866821289062, 0.021630287170410156, 0.02271270751953125, 0.023795127868652344, 0.024877548217773438, 0.02595996856689453, 0.027042388916015625, 0.02812480926513672, 0.029207229614257812, 0.030289649963378906, 0.0313720703125]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 83.0, 316.0, 414.0, 163.0, 24.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012022546725347638, -0.0010945876128971577, -0.0009869205532595515, -0.0008792535518296063, -0.0007715865503996611, -0.0006639194907620549, -0.0005562524311244488, -0.00044858542969450355, -0.0003409183700568974, -0.00023325133952312171, -0.0001255842944374308, -1.7917249351739883e-05, 8.97497811820358e-05, 0.0001974168117158115, 0.00030508387135341763, 0.00041275087278336287, 0.000520417932420969, 0.0006280849920585752, 0.0007357519934885204, 0.0008434190531261265, 0.0009510860545560718, 0.001058753114193678, 0.001166420173831284, 0.0012740872334688902, 0.0013817541766911745, 0.0014894212363287807, 0.0015970882959663868, 0.0017047552391886711, 0.0018124222988262773, 0.0019200893584638834, 0.0020277565345168114, 0.0021354234777390957, 0.0022430906537920237, 0.002350757597014308, 0.002458424773067236, 0.0025660917162895203, 0.0026737588923424482, 0.0027814258355647326, 0.0028890930116176605, 0.002996759954839945, 0.003104426898062229, 0.0032120938412845135, 0.0033197610173374414, 0.0034274279605597258, 0.0035350951366126537, 0.003642762079834938, 0.0037504290230572224, 0.0038580961991101503, 0.003965763375163078, 0.004073430318385363, 0.004181097261607647, 0.004288764670491219, 0.004396431613713503, 0.004504098556935787, 0.0046117655001580715, 0.004719432443380356, 0.00482709938660264, 0.0049347663298249245, 0.005042433273047209, 0.00515010068193078, 0.005257767625153065, 0.005365434568375349, 0.005473101511597633, 0.005580768454819918, 0.005688435863703489]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 1.0, 8.0, 3.0, 15.0, 7.0, 10.0, 13.0, 15.0, 18.0, 24.0, 26.0, 33.0, 21.0, 38.0, 22.0, 35.0, 37.0, 36.0, 35.0, 42.0, 45.0, 44.0, 45.0, 32.0, 50.0, 38.0, 25.0, 40.0, 25.0, 40.0, 21.0, 24.0, 25.0, 8.0, 15.0, 13.0, 20.0, 19.0, 7.0, 8.0, 4.0, 0.0, 2.0, 6.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000577092170715332, -0.0005559744313359261, -0.0005348566919565201, -0.0005137389525771141, -0.0004926212131977081, -0.00047150347381830215, -0.0004503857344388962, -0.0004292679950594902, -0.00040815025568008423, -0.00038703251630067825, -0.0003659147769212723, -0.0003447970375418663, -0.00032367929816246033, -0.00030256155878305435, -0.0002814438194036484, -0.0002603260800242424, -0.00023920834064483643, -0.00021809060126543045, -0.00019697286188602448, -0.0001758551225066185, -0.00015473738312721252, -0.00013361964374780655, -0.00011250190436840057, -9.13841649889946e-05, -7.026642560958862e-05, -4.914868623018265e-05, -2.8030946850776672e-05, -6.913207471370697e-06, 1.4204531908035278e-05, 3.5322271287441254e-05, 5.644001066684723e-05, 7.75577500462532e-05, 9.867548942565918e-05, 0.00011979322880506516, 0.00014091096818447113, 0.0001620287075638771, 0.00018314644694328308, 0.00020426418632268906, 0.00022538192570209503, 0.000246499665081501, 0.000267617404460907, 0.00028873514384031296, 0.00030985288321971893, 0.0003309706225991249, 0.0003520883619785309, 0.00037320610135793686, 0.00039432384073734283, 0.0004154415801167488, 0.0004365593194961548, 0.00045767705887556076, 0.00047879479825496674, 0.0004999125376343727, 0.0005210302770137787, 0.0005421480163931847, 0.0005632657557725906, 0.0005843834951519966, 0.0006055012345314026, 0.0006266189739108086, 0.0006477367132902145, 0.0006688544526696205, 0.0006899721920490265, 0.0007110899314284325, 0.0007322076708078384, 0.0007533254101872444, 0.0007744431495666504]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 8.0, 8.0, 16.0, 9.0, 14.0, 28.0, 20.0, 25.0, 30.0, 32.0, 24.0, 45.0, 39.0, 44.0, 49.0, 44.0, 39.0, 54.0, 58.0, 56.0, 50.0, 51.0, 32.0, 29.0, 30.0, 19.0, 33.0, 21.0, 28.0, 13.0, 12.0, 10.0, 8.0, 9.0, 9.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.390625, -5.1854248046875, -4.980224609375, -4.7750244140625, -4.56982421875, -4.3646240234375, -4.159423828125, -3.9542236328125, -3.7490234375, -3.5438232421875, -3.338623046875, -3.1334228515625, -2.92822265625, -2.7230224609375, -2.517822265625, -2.3126220703125, -2.107421875, -1.9022216796875, -1.697021484375, -1.4918212890625, -1.28662109375, -1.0814208984375, -0.876220703125, -0.6710205078125, -0.4658203125, -0.2606201171875, -0.055419921875, 0.1497802734375, 0.35498046875, 0.5601806640625, 0.765380859375, 0.9705810546875, 1.17578125, 1.3809814453125, 1.586181640625, 1.7913818359375, 1.99658203125, 2.2017822265625, 2.406982421875, 2.6121826171875, 2.8173828125, 3.0225830078125, 3.227783203125, 3.4329833984375, 3.63818359375, 3.8433837890625, 4.048583984375, 4.2537841796875, 4.458984375, 4.6641845703125, 4.869384765625, 5.0745849609375, 5.27978515625, 5.4849853515625, 5.690185546875, 5.8953857421875, 6.1005859375, 6.3057861328125, 6.510986328125, 6.7161865234375, 6.92138671875, 7.1265869140625, 7.331787109375, 7.5369873046875, 7.7421875]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 6.0, 4.0, 5.0, 13.0, 17.0, 15.0, 25.0, 48.0, 66.0, 116.0, 169.0, 225.0, 399.0, 689.0, 1142.0, 2262.0, 4514.0, 9722.0, 21865.0, 52206.0, 129725.0, 300145.0, 300907.0, 129755.0, 53081.0, 22018.0, 9714.0, 4535.0, 2275.0, 1189.0, 609.0, 398.0, 234.0, 135.0, 107.0, 82.0, 31.0, 33.0, 22.0, 9.0, 16.0, 8.0, 6.0, 3.0, 4.0, 10.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-5.14453125, -4.9608154296875, -4.777099609375, -4.5933837890625, -4.40966796875, -4.2259521484375, -4.042236328125, -3.8585205078125, -3.6748046875, -3.4910888671875, -3.307373046875, -3.1236572265625, -2.93994140625, -2.7562255859375, -2.572509765625, -2.3887939453125, -2.205078125, -2.0213623046875, -1.837646484375, -1.6539306640625, -1.47021484375, -1.2864990234375, -1.102783203125, -0.9190673828125, -0.7353515625, -0.5516357421875, -0.367919921875, -0.1842041015625, -0.00048828125, 0.1832275390625, 0.366943359375, 0.5506591796875, 0.734375, 0.9180908203125, 1.101806640625, 1.2855224609375, 1.46923828125, 1.6529541015625, 1.836669921875, 2.0203857421875, 2.2041015625, 2.3878173828125, 2.571533203125, 2.7552490234375, 2.93896484375, 3.1226806640625, 3.306396484375, 3.4901123046875, 3.673828125, 3.8575439453125, 4.041259765625, 4.2249755859375, 4.40869140625, 4.5924072265625, 4.776123046875, 4.9598388671875, 5.1435546875, 5.3272705078125, 5.510986328125, 5.6947021484375, 5.87841796875, 6.0621337890625, 6.245849609375, 6.4295654296875, 6.61328125]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 6.0, 8.0, 3.0, 4.0, 4.0, 4.0, 8.0, 11.0, 7.0, 11.0, 13.0, 18.0, 25.0, 26.0, 28.0, 21.0, 33.0, 25.0, 40.0, 43.0, 50.0, 83.0, 131.0, 227.0, 1429.0, 234.0, 133.0, 70.0, 41.0, 39.0, 37.0, 38.0, 39.0, 21.0, 23.0, 18.0, 16.0, 14.0, 17.0, 8.0, 15.0, 5.0, 9.0, 6.0, 1.0, 5.0, 1.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.328125, -16.759765625, -16.19140625, -15.623046875, -15.0546875, -14.486328125, -13.91796875, -13.349609375, -12.78125, -12.212890625, -11.64453125, -11.076171875, -10.5078125, -9.939453125, -9.37109375, -8.802734375, -8.234375, -7.666015625, -7.09765625, -6.529296875, -5.9609375, -5.392578125, -4.82421875, -4.255859375, -3.6875, -3.119140625, -2.55078125, -1.982421875, -1.4140625, -0.845703125, -0.27734375, 0.291015625, 0.859375, 1.427734375, 1.99609375, 2.564453125, 3.1328125, 3.701171875, 4.26953125, 4.837890625, 5.40625, 5.974609375, 6.54296875, 7.111328125, 7.6796875, 8.248046875, 8.81640625, 9.384765625, 9.953125, 10.521484375, 11.08984375, 11.658203125, 12.2265625, 12.794921875, 13.36328125, 13.931640625, 14.5, 15.068359375, 15.63671875, 16.205078125, 16.7734375, 17.341796875, 17.91015625, 18.478515625, 19.046875]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 4.0, 5.0, 6.0, 9.0, 11.0, 18.0, 25.0, 20.0, 32.0, 43.0, 79.0, 119.0, 202.0, 400.0, 913.0, 18236.0, 3118910.0, 5061.0, 668.0, 340.0, 209.0, 128.0, 79.0, 48.0, 43.0, 35.0, 14.0, 13.0, 8.0, 8.0, 4.0, 8.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-97.1875, -94.275390625, -91.36328125, -88.451171875, -85.5390625, -82.626953125, -79.71484375, -76.802734375, -73.890625, -70.978515625, -68.06640625, -65.154296875, -62.2421875, -59.330078125, -56.41796875, -53.505859375, -50.59375, -47.681640625, -44.76953125, -41.857421875, -38.9453125, -36.033203125, -33.12109375, -30.208984375, -27.296875, -24.384765625, -21.47265625, -18.560546875, -15.6484375, -12.736328125, -9.82421875, -6.912109375, -4.0, -1.087890625, 1.82421875, 4.736328125, 7.6484375, 10.560546875, 13.47265625, 16.384765625, 19.296875, 22.208984375, 25.12109375, 28.033203125, 30.9453125, 33.857421875, 36.76953125, 39.681640625, 42.59375, 45.505859375, 48.41796875, 51.330078125, 54.2421875, 57.154296875, 60.06640625, 62.978515625, 65.890625, 68.802734375, 71.71484375, 74.626953125, 77.5390625, 80.451171875, 83.36328125, 86.275390625, 89.1875]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 9.0, 41.0, 75.0, 154.0, 236.0, 228.0, 157.0, 79.0, 23.0, 7.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.55328369140625, -45.788604736328125, -44.023921966552734, -42.25924301147461, -40.494564056396484, -38.729881286621094, -36.96520233154297, -35.200523376464844, -33.43584442138672, -31.67116355895996, -29.906484603881836, -28.141803741455078, -26.377124786376953, -24.612443923950195, -22.847763061523438, -21.083084106445312, -19.318401336669922, -17.553720474243164, -15.789041519165039, -14.024360656738281, -12.25968074798584, -10.495000839233398, -8.73031997680664, -6.965640068054199, -5.200960159301758, -3.4362800121307373, -1.6715998649597168, 0.09308052062988281, 1.8577604293823242, 3.6224403381347656, 5.387121200561523, 7.151801109313965, 8.916481018066406, 10.681160926818848, 12.445840835571289, 14.210521697998047, 15.975201606750488, 17.73988151550293, 19.504562377929688, 21.269241333007812, 23.03392219543457, 24.798603057861328, 26.563282012939453, 28.32796287536621, 30.09264373779297, 31.857322692871094, 33.62200164794922, 35.38668441772461, 37.151363372802734, 38.91604232788086, 40.68072509765625, 42.445404052734375, 44.2100830078125, 45.974761962890625, 47.739444732666016, 49.50412368774414, 51.26880645751953, 53.033485412597656, 54.79816818237305, 56.56284713745117, 58.3275260925293, 60.09220886230469, 61.85688781738281, 63.62156677246094, 65.38624572753906]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 2.0, 4.0, 5.0, 10.0, 8.0, 9.0, 17.0, 20.0, 14.0, 13.0, 18.0, 20.0, 16.0, 30.0, 27.0, 33.0, 27.0, 35.0, 38.0, 28.0, 37.0, 38.0, 34.0, 48.0, 37.0, 42.0, 43.0, 35.0, 29.0, 25.0, 34.0, 29.0, 25.0, 22.0, 22.0, 17.0, 19.0, 13.0, 8.0, 17.0, 13.0, 8.0, 4.0, 7.0, 8.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-45.83510971069336, -44.48036575317383, -43.12561798095703, -41.7708740234375, -40.41613006591797, -39.06138229370117, -37.70663833618164, -36.351890563964844, -34.99714660644531, -33.64240264892578, -32.287654876708984, -30.932910919189453, -29.57816505432129, -28.223419189453125, -26.868675231933594, -25.51392936706543, -24.159183502197266, -22.8044376373291, -21.449691772460938, -20.094947814941406, -18.740201950073242, -17.385456085205078, -16.030712127685547, -14.675966262817383, -13.321220397949219, -11.966474533081055, -10.611729621887207, -9.25698471069336, -7.902238845825195, -6.5474934577941895, -5.192748069763184, -3.838003158569336, -2.4832534790039062, -1.1285080909729004, 0.22623729705810547, 1.5809826850891113, 2.935728073120117, 4.290473461151123, 5.645218849182129, 6.999963760375977, 8.35470962524414, 9.709455490112305, 11.064200401306152, 12.4189453125, 13.773691177368164, 15.128437042236328, 16.48318099975586, 17.837926864624023, 19.192672729492188, 20.54741859436035, 21.902164459228516, 23.256908416748047, 24.61165428161621, 25.966400146484375, 27.321144104003906, 28.67588996887207, 30.030635833740234, 31.3853816986084, 32.74012756347656, 34.094871520996094, 35.449615478515625, 36.80436325073242, 38.15910720825195, 39.51385498046875, 40.86859893798828]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 6.0, 10.0, 9.0, 9.0, 20.0, 16.0, 17.0, 31.0, 25.0, 18.0, 33.0, 31.0, 33.0, 43.0, 39.0, 49.0, 40.0, 45.0, 51.0, 55.0, 61.0, 46.0, 42.0, 34.0, 33.0, 33.0, 31.0, 20.0, 21.0, 23.0, 15.0, 18.0, 19.0, 8.0, 2.0, 6.0, 7.0, 3.0, 0.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.74609375, -5.52716064453125, -5.3082275390625, -5.08929443359375, -4.870361328125, -4.65142822265625, -4.4324951171875, -4.21356201171875, -3.99462890625, -3.77569580078125, -3.5567626953125, -3.33782958984375, -3.118896484375, -2.89996337890625, -2.6810302734375, -2.46209716796875, -2.2431640625, -2.02423095703125, -1.8052978515625, -1.58636474609375, -1.367431640625, -1.14849853515625, -0.9295654296875, -0.71063232421875, -0.49169921875, -0.27276611328125, -0.0538330078125, 0.16510009765625, 0.384033203125, 0.60296630859375, 0.8218994140625, 1.04083251953125, 1.259765625, 1.47869873046875, 1.6976318359375, 1.91656494140625, 2.135498046875, 2.35443115234375, 2.5733642578125, 2.79229736328125, 3.01123046875, 3.23016357421875, 3.4490966796875, 3.66802978515625, 3.886962890625, 4.10589599609375, 4.3248291015625, 4.54376220703125, 4.7626953125, 4.98162841796875, 5.2005615234375, 5.41949462890625, 5.638427734375, 5.85736083984375, 6.0762939453125, 6.29522705078125, 6.51416015625, 6.73309326171875, 6.9520263671875, 7.17095947265625, 7.389892578125, 7.60882568359375, 7.8277587890625, 8.04669189453125, 8.265625]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 0.0, 0.0, 4.0, 4.0, 3.0, 7.0, 4.0, 8.0, 6.0, 11.0, 19.0, 21.0, 39.0, 38.0, 47.0, 53.0, 91.0, 146.0, 210.0, 415.0, 1053.0, 4270.0, 31103.0, 330693.0, 1988301.0, 1600767.0, 212096.0, 20046.0, 3004.0, 845.0, 364.0, 209.0, 103.0, 82.0, 45.0, 45.0, 34.0, 21.0, 23.0, 15.0, 8.0, 15.0, 7.0, 6.0, 4.0, 5.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.078125, -10.6663818359375, -10.254638671875, -9.8428955078125, -9.43115234375, -9.0194091796875, -8.607666015625, -8.1959228515625, -7.7841796875, -7.3724365234375, -6.960693359375, -6.5489501953125, -6.13720703125, -5.7254638671875, -5.313720703125, -4.9019775390625, -4.490234375, -4.0784912109375, -3.666748046875, -3.2550048828125, -2.84326171875, -2.4315185546875, -2.019775390625, -1.6080322265625, -1.1962890625, -0.7845458984375, -0.372802734375, 0.0389404296875, 0.45068359375, 0.8624267578125, 1.274169921875, 1.6859130859375, 2.09765625, 2.5093994140625, 2.921142578125, 3.3328857421875, 3.74462890625, 4.1563720703125, 4.568115234375, 4.9798583984375, 5.3916015625, 5.8033447265625, 6.215087890625, 6.6268310546875, 7.03857421875, 7.4503173828125, 7.862060546875, 8.2738037109375, 8.685546875, 9.0972900390625, 9.509033203125, 9.9207763671875, 10.33251953125, 10.7442626953125, 11.156005859375, 11.5677490234375, 11.9794921875, 12.3912353515625, 12.802978515625, 13.2147216796875, 13.62646484375, 14.0382080078125, 14.449951171875, 14.8616943359375, 15.2734375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 5.0, 4.0, 3.0, 9.0, 6.0, 14.0, 15.0, 14.0, 27.0, 41.0, 46.0, 75.0, 120.0, 152.0, 182.0, 253.0, 317.0, 409.0, 433.0, 422.0, 392.0, 309.0, 230.0, 174.0, 127.0, 85.0, 56.0, 37.0, 42.0, 27.0, 12.0, 10.0, 5.0, 8.0, 4.0, 5.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-13.09375, -12.693603515625, -12.29345703125, -11.893310546875, -11.4931640625, -11.093017578125, -10.69287109375, -10.292724609375, -9.892578125, -9.492431640625, -9.09228515625, -8.692138671875, -8.2919921875, -7.891845703125, -7.49169921875, -7.091552734375, -6.69140625, -6.291259765625, -5.89111328125, -5.490966796875, -5.0908203125, -4.690673828125, -4.29052734375, -3.890380859375, -3.490234375, -3.090087890625, -2.68994140625, -2.289794921875, -1.8896484375, -1.489501953125, -1.08935546875, -0.689208984375, -0.2890625, 0.111083984375, 0.51123046875, 0.911376953125, 1.3115234375, 1.711669921875, 2.11181640625, 2.511962890625, 2.912109375, 3.312255859375, 3.71240234375, 4.112548828125, 4.5126953125, 4.912841796875, 5.31298828125, 5.713134765625, 6.11328125, 6.513427734375, 6.91357421875, 7.313720703125, 7.7138671875, 8.114013671875, 8.51416015625, 8.914306640625, 9.314453125, 9.714599609375, 10.11474609375, 10.514892578125, 10.9150390625, 11.315185546875, 11.71533203125, 12.115478515625, 12.515625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 4.0, 12.0, 8.0, 15.0, 24.0, 40.0, 55.0, 64.0, 101.0, 154.0, 226.0, 337.0, 526.0, 1643.0, 93952.0, 3881716.0, 211358.0, 2319.0, 581.0, 349.0, 231.0, 170.0, 115.0, 81.0, 59.0, 32.0, 29.0, 17.0, 20.0, 7.0, 2.0, 10.0, 3.0, 6.0, 2.0, 6.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.84375, -41.48681640625, -40.1298828125, -38.77294921875, -37.416015625, -36.05908203125, -34.7021484375, -33.34521484375, -31.98828125, -30.63134765625, -29.2744140625, -27.91748046875, -26.560546875, -25.20361328125, -23.8466796875, -22.48974609375, -21.1328125, -19.77587890625, -18.4189453125, -17.06201171875, -15.705078125, -14.34814453125, -12.9912109375, -11.63427734375, -10.27734375, -8.92041015625, -7.5634765625, -6.20654296875, -4.849609375, -3.49267578125, -2.1357421875, -0.77880859375, 0.578125, 1.93505859375, 3.2919921875, 4.64892578125, 6.005859375, 7.36279296875, 8.7197265625, 10.07666015625, 11.43359375, 12.79052734375, 14.1474609375, 15.50439453125, 16.861328125, 18.21826171875, 19.5751953125, 20.93212890625, 22.2890625, 23.64599609375, 25.0029296875, 26.35986328125, 27.716796875, 29.07373046875, 30.4306640625, 31.78759765625, 33.14453125, 34.50146484375, 35.8583984375, 37.21533203125, 38.572265625, 39.92919921875, 41.2861328125, 42.64306640625, 44.0]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 3.0, 8.0, 23.0, 25.0, 39.0, 55.0, 72.0, 95.0, 95.0, 112.0, 114.0, 108.0, 92.0, 63.0, 47.0, 22.0, 15.0, 10.0, 5.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.18644714355469, -63.49686050415039, -61.807273864746094, -60.1176872253418, -58.4281005859375, -56.7385139465332, -55.048927307128906, -53.359336853027344, -51.66975402832031, -49.980167388916016, -48.29058074951172, -46.60099411010742, -44.911407470703125, -43.22182083129883, -41.53223419189453, -39.84264373779297, -38.15305709838867, -36.463470458984375, -34.77388381958008, -33.08429718017578, -31.394710540771484, -29.705123901367188, -28.015535354614258, -26.32594871520996, -24.636362075805664, -22.946775436401367, -21.25718879699707, -19.56760025024414, -17.878013610839844, -16.188426971435547, -14.49884033203125, -12.809253692626953, -11.119670867919922, -9.430084228515625, -7.74049711227417, -6.050909996032715, -4.361323356628418, -2.671736717224121, -0.9821491241455078, 0.7074375152587891, 2.397024154663086, 4.086610794067383, 5.776197910308838, 7.465785026550293, 9.15537166595459, 10.844958305358887, 12.5345458984375, 14.224132537841797, 15.913719177246094, 17.60330581665039, 19.292892456054688, 20.982479095458984, 22.67206573486328, 24.361652374267578, 26.051240921020508, 27.740827560424805, 29.4304141998291, 31.1200008392334, 32.80958938598633, 34.499176025390625, 36.18876266479492, 37.87834930419922, 39.567935943603516, 41.25752258300781, 42.94710922241211]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 6.0, 6.0, 7.0, 6.0, 6.0, 18.0, 14.0, 25.0, 17.0, 14.0, 18.0, 23.0, 20.0, 26.0, 21.0, 29.0, 39.0, 30.0, 39.0, 34.0, 28.0, 43.0, 34.0, 39.0, 43.0, 48.0, 33.0, 38.0, 33.0, 28.0, 35.0, 24.0, 24.0, 25.0, 23.0, 21.0, 17.0, 18.0, 14.0, 4.0, 9.0, 9.0, 6.0, 2.0, 1.0, 5.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-39.19010925292969, -38.00028610229492, -36.810462951660156, -35.620635986328125, -34.43081283569336, -33.240989685058594, -32.05116271972656, -30.861339569091797, -29.67151641845703, -28.481693267822266, -27.291868209838867, -26.10204315185547, -24.912220001220703, -23.722396850585938, -22.53257179260254, -21.34274673461914, -20.152923583984375, -18.96310043334961, -17.77327537536621, -16.583450317382812, -15.393627166748047, -14.203803062438965, -13.013978958129883, -11.8241548538208, -10.634330749511719, -9.444506645202637, -8.254682540893555, -7.064858436584473, -5.875034332275391, -4.685210227966309, -3.4953861236572266, -2.3055620193481445, -1.1157341003417969, 0.07409000396728516, 1.2639141082763672, 2.453738212585449, 3.6435623168945312, 4.833386421203613, 6.023210525512695, 7.213034629821777, 8.40285873413086, 9.592682838439941, 10.782506942749023, 11.972331047058105, 13.162155151367188, 14.35197925567627, 15.541803359985352, 16.73162841796875, 17.921451568603516, 19.11127471923828, 20.30109977722168, 21.490924835205078, 22.680747985839844, 23.87057113647461, 25.060396194458008, 26.250221252441406, 27.440044403076172, 28.629867553710938, 29.819692611694336, 31.009517669677734, 32.1993408203125, 33.389163970947266, 34.57898712158203, 35.76881408691406, 36.95863723754883]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 8.0, 10.0, 9.0, 9.0, 13.0, 18.0, 24.0, 19.0, 35.0, 25.0, 33.0, 37.0, 42.0, 38.0, 42.0, 48.0, 42.0, 47.0, 48.0, 60.0, 51.0, 56.0, 34.0, 49.0, 25.0, 28.0, 29.0, 23.0, 21.0, 15.0, 12.0, 9.0, 7.0, 10.0, 12.0, 8.0, 1.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.984375, -5.7679443359375, -5.551513671875, -5.3350830078125, -5.11865234375, -4.9022216796875, -4.685791015625, -4.4693603515625, -4.2529296875, -4.0364990234375, -3.820068359375, -3.6036376953125, -3.38720703125, -3.1707763671875, -2.954345703125, -2.7379150390625, -2.521484375, -2.3050537109375, -2.088623046875, -1.8721923828125, -1.65576171875, -1.4393310546875, -1.222900390625, -1.0064697265625, -0.7900390625, -0.5736083984375, -0.357177734375, -0.1407470703125, 0.07568359375, 0.2921142578125, 0.508544921875, 0.7249755859375, 0.94140625, 1.1578369140625, 1.374267578125, 1.5906982421875, 1.80712890625, 2.0235595703125, 2.239990234375, 2.4564208984375, 2.6728515625, 2.8892822265625, 3.105712890625, 3.3221435546875, 3.53857421875, 3.7550048828125, 3.971435546875, 4.1878662109375, 4.404296875, 4.6207275390625, 4.837158203125, 5.0535888671875, 5.27001953125, 5.4864501953125, 5.702880859375, 5.9193115234375, 6.1357421875, 6.3521728515625, 6.568603515625, 6.7850341796875, 7.00146484375, 7.2178955078125, 7.434326171875, 7.6507568359375, 7.8671875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 9.0, 6.0, 12.0, 27.0, 33.0, 62.0, 80.0, 141.0, 233.0, 381.0, 671.0, 1134.0, 1977.0, 3454.0, 5992.0, 11054.0, 20442.0, 38108.0, 73151.0, 139500.0, 243169.0, 233142.0, 130202.0, 67498.0, 35176.0, 19024.0, 10433.0, 5643.0, 3243.0, 1854.0, 1126.0, 630.0, 363.0, 212.0, 141.0, 77.0, 47.0, 44.0, 20.0, 16.0, 10.0, 11.0, 6.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5830078125, -0.56353759765625, -0.5440673828125, -0.52459716796875, -0.505126953125, -0.48565673828125, -0.4661865234375, -0.44671630859375, -0.42724609375, -0.40777587890625, -0.3883056640625, -0.36883544921875, -0.349365234375, -0.32989501953125, -0.3104248046875, -0.29095458984375, -0.271484375, -0.25201416015625, -0.2325439453125, -0.21307373046875, -0.193603515625, -0.17413330078125, -0.1546630859375, -0.13519287109375, -0.11572265625, -0.09625244140625, -0.0767822265625, -0.05731201171875, -0.037841796875, -0.01837158203125, 0.0010986328125, 0.02056884765625, 0.0400390625, 0.05950927734375, 0.0789794921875, 0.09844970703125, 0.117919921875, 0.13739013671875, 0.1568603515625, 0.17633056640625, 0.19580078125, 0.21527099609375, 0.2347412109375, 0.25421142578125, 0.273681640625, 0.29315185546875, 0.3126220703125, 0.33209228515625, 0.3515625, 0.37103271484375, 0.3905029296875, 0.40997314453125, 0.429443359375, 0.44891357421875, 0.4683837890625, 0.48785400390625, 0.50732421875, 0.52679443359375, 0.5462646484375, 0.56573486328125, 0.585205078125, 0.60467529296875, 0.6241455078125, 0.64361572265625, 0.6630859375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 6.0, 4.0, 8.0, 14.0, 21.0, 15.0, 28.0, 21.0, 19.0, 26.0, 23.0, 25.0, 43.0, 34.0, 48.0, 43.0, 37.0, 51.0, 1068.0, 35.0, 47.0, 38.0, 39.0, 40.0, 42.0, 43.0, 34.0, 19.0, 16.0, 22.0, 24.0, 15.0, 16.0, 15.0, 8.0, 7.0, 11.0, 9.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.78125, -3.65509033203125, -3.5289306640625, -3.40277099609375, -3.276611328125, -3.15045166015625, -3.0242919921875, -2.89813232421875, -2.77197265625, -2.64581298828125, -2.5196533203125, -2.39349365234375, -2.267333984375, -2.14117431640625, -2.0150146484375, -1.88885498046875, -1.7626953125, -1.63653564453125, -1.5103759765625, -1.38421630859375, -1.258056640625, -1.13189697265625, -1.0057373046875, -0.87957763671875, -0.75341796875, -0.62725830078125, -0.5010986328125, -0.37493896484375, -0.248779296875, -0.12261962890625, 0.0035400390625, 0.12969970703125, 0.255859375, 0.38201904296875, 0.5081787109375, 0.63433837890625, 0.760498046875, 0.88665771484375, 1.0128173828125, 1.13897705078125, 1.26513671875, 1.39129638671875, 1.5174560546875, 1.64361572265625, 1.769775390625, 1.89593505859375, 2.0220947265625, 2.14825439453125, 2.2744140625, 2.40057373046875, 2.5267333984375, 2.65289306640625, 2.779052734375, 2.90521240234375, 3.0313720703125, 3.15753173828125, 3.28369140625, 3.40985107421875, 3.5360107421875, 3.66217041015625, 3.788330078125, 3.91448974609375, 4.0406494140625, 4.16680908203125, 4.29296875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 7.0, 6.0, 13.0, 22.0, 40.0, 40.0, 70.0, 99.0, 144.0, 227.0, 358.0, 558.0, 781.0, 1124.0, 1766.0, 2714.0, 3942.0, 6076.0, 9029.0, 13464.0, 19794.0, 29967.0, 44785.0, 67038.0, 99963.0, 142231.0, 1215350.0, 140109.0, 98096.0, 66345.0, 44072.0, 29437.0, 19582.0, 13293.0, 9079.0, 6027.0, 3840.0, 2614.0, 1682.0, 1129.0, 743.0, 496.0, 341.0, 231.0, 148.0, 87.0, 58.0, 43.0, 27.0, 27.0, 9.0, 8.0, 4.0, 3.0, 3.0, 2.0, 2.0], "bins": [-0.359130859375, -0.3484039306640625, -0.337677001953125, -0.3269500732421875, -0.31622314453125, -0.3054962158203125, -0.294769287109375, -0.2840423583984375, -0.2733154296875, -0.2625885009765625, -0.251861572265625, -0.2411346435546875, -0.23040771484375, -0.2196807861328125, -0.208953857421875, -0.1982269287109375, -0.1875, -0.1767730712890625, -0.166046142578125, -0.1553192138671875, -0.14459228515625, -0.1338653564453125, -0.123138427734375, -0.1124114990234375, -0.1016845703125, -0.0909576416015625, -0.080230712890625, -0.0695037841796875, -0.05877685546875, -0.0480499267578125, -0.037322998046875, -0.0265960693359375, -0.015869140625, -0.0051422119140625, 0.005584716796875, 0.0163116455078125, 0.02703857421875, 0.0377655029296875, 0.048492431640625, 0.0592193603515625, 0.0699462890625, 0.0806732177734375, 0.091400146484375, 0.1021270751953125, 0.11285400390625, 0.1235809326171875, 0.134307861328125, 0.1450347900390625, 0.15576171875, 0.1664886474609375, 0.177215576171875, 0.1879425048828125, 0.19866943359375, 0.2093963623046875, 0.220123291015625, 0.2308502197265625, 0.2415771484375, 0.2523040771484375, 0.263031005859375, 0.2737579345703125, 0.28448486328125, 0.2952117919921875, 0.305938720703125, 0.3166656494140625, 0.327392578125]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 4.0, 4.0, 9.0, 13.0, 13.0, 12.0, 20.0, 25.0, 36.0, 31.0, 46.0, 45.0, 67.0, 67.0, 58.0, 67.0, 73.0, 60.0, 65.0, 41.0, 41.0, 38.0, 41.0, 21.0, 21.0, 17.0, 16.0, 16.0, 6.0, 10.0, 11.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011816024780273438, -0.0011403709650039673, -0.0010991394519805908, -0.0010579079389572144, -0.0010166764259338379, -0.0009754449129104614, -0.000934213399887085, -0.0008929818868637085, -0.000851750373840332, -0.0008105188608169556, -0.0007692873477935791, -0.0007280558347702026, -0.0006868243217468262, -0.0006455928087234497, -0.0006043612957000732, -0.0005631297826766968, -0.0005218982696533203, -0.00048066675662994385, -0.0004394352436065674, -0.0003982037305831909, -0.00035697221755981445, -0.000315740704536438, -0.0002745091915130615, -0.00023327767848968506, -0.0001920461654663086, -0.00015081465244293213, -0.00010958313941955566, -6.83516263961792e-05, -2.7120113372802734e-05, 1.411139965057373e-05, 5.5342912673950195e-05, 9.657442569732666e-05, 0.00013780593872070312, 0.0001790374517440796, 0.00022026896476745605, 0.0002615004777908325, 0.000302731990814209, 0.00034396350383758545, 0.0003851950168609619, 0.0004264265298843384, 0.00046765804290771484, 0.0005088895559310913, 0.0005501210689544678, 0.0005913525819778442, 0.0006325840950012207, 0.0006738156080245972, 0.0007150471210479736, 0.0007562786340713501, 0.0007975101470947266, 0.000838741660118103, 0.0008799731731414795, 0.000921204686164856, 0.0009624361991882324, 0.0010036677122116089, 0.0010448992252349854, 0.0010861307382583618, 0.0011273622512817383, 0.0011685937643051147, 0.0012098252773284912, 0.0012510567903518677, 0.0012922883033752441, 0.0013335198163986206, 0.001374751329421997, 0.0014159828424453735, 0.00145721435546875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 12.0, 10.0, 6.0, 14.0, 23.0, 31.0, 29.0, 40.0, 64.0, 73.0, 86.0, 115.0, 238.0, 566.0, 2644.0, 947660.0, 94929.0, 994.0, 374.0, 205.0, 131.0, 76.0, 54.0, 51.0, 35.0, 19.0, 19.0, 12.0, 12.0, 14.0, 7.0, 3.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0289764404296875, -0.02814316749572754, -0.027309894561767578, -0.026476621627807617, -0.025643348693847656, -0.024810075759887695, -0.023976802825927734, -0.023143529891967773, -0.022310256958007812, -0.02147698402404785, -0.02064371109008789, -0.01981043815612793, -0.01897716522216797, -0.018143892288208008, -0.017310619354248047, -0.016477346420288086, -0.015644073486328125, -0.014810800552368164, -0.013977527618408203, -0.013144254684448242, -0.012310981750488281, -0.01147770881652832, -0.01064443588256836, -0.009811162948608398, -0.008977890014648438, -0.008144617080688477, -0.007311344146728516, -0.006478071212768555, -0.005644798278808594, -0.004811525344848633, -0.003978252410888672, -0.003144979476928711, -0.00231170654296875, -0.001478433609008789, -0.0006451606750488281, 0.0001881122589111328, 0.0010213851928710938, 0.0018546581268310547, 0.0026879310607910156, 0.0035212039947509766, 0.0043544769287109375, 0.0051877498626708984, 0.006021022796630859, 0.00685429573059082, 0.007687568664550781, 0.008520841598510742, 0.009354114532470703, 0.010187387466430664, 0.011020660400390625, 0.011853933334350586, 0.012687206268310547, 0.013520479202270508, 0.014353752136230469, 0.01518702507019043, 0.01602029800415039, 0.01685357093811035, 0.017686843872070312, 0.018520116806030273, 0.019353389739990234, 0.020186662673950195, 0.021019935607910156, 0.021853208541870117, 0.022686481475830078, 0.02351975440979004, 0.02435302734375]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 15.0, 159.0, 508.0, 291.0, 39.0, 4.0, 1.0], "bins": [-0.007772545330226421, -0.0076423571445047855, -0.00751216895878315, -0.007381980773061514, -0.007251792587339878, -0.007121603935956955, -0.006991415750235319, -0.006861227564513683, -0.0067310393787920475, -0.006600851193070412, -0.006470663007348776, -0.00634047482162714, -0.006210286170244217, -0.006080097984522581, -0.005949909798800945, -0.0058197216130793095, -0.005689533427357674, -0.005559345241636038, -0.005429157055914402, -0.005298968870192766, -0.00516878068447113, -0.005038592033088207, -0.004908403847366571, -0.004778215661644936, -0.0046480274759233, -0.004517839290201664, -0.004387651104480028, -0.004257462918758392, -0.004127274267375469, -0.003997086081653833, -0.0038668978959321976, -0.0037367097102105618, -0.0036065219901502132, -0.0034763338044285774, -0.0033461456187069416, -0.003215957200154662, -0.0030857690144330263, -0.0029555808287113905, -0.002825392410159111, -0.002695204224437475, -0.0025650160387158394, -0.0024348278529942036, -0.0023046396672725677, -0.0021744512487202883, -0.0020442630629986525, -0.0019140748772770166, -0.001783886575140059, -0.0016536982730031013, -0.0015235100872814655, -0.0013933219015598297, -0.001263133599422872, -0.0011329452972859144, -0.0010027571115642786, -0.0008725688676349819, -0.0007423806237056851, -0.0006121923797763884, -0.00048200407763943076, -0.00035181583371013403, -0.0002216275897808373, -9.143934585154057e-05, 3.8748898077756166e-05, 0.0001689371420070529, 0.00029912538593634963, 0.00042931362986564636, 0.0005595018737949431]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 6.0, 5.0, 8.0, 6.0, 10.0, 5.0, 12.0, 16.0, 17.0, 15.0, 19.0, 22.0, 33.0, 19.0, 32.0, 34.0, 33.0, 25.0, 30.0, 41.0, 31.0, 37.0, 48.0, 30.0, 35.0, 30.0, 35.0, 36.0, 43.0, 42.0, 33.0, 26.0, 22.0, 24.0, 27.0, 18.0, 12.0, 16.0, 8.0, 11.0, 9.0, 11.0, 13.0, 4.0, 6.0, 5.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.000517725944519043, -0.0005003409460186958, -0.0004829559475183487, -0.00046557094901800156, -0.0004481859505176544, -0.0004308009520173073, -0.00041341595351696014, -0.000396030955016613, -0.00037864595651626587, -0.00036126095801591873, -0.0003438759595155716, -0.00032649096101522446, -0.0003091059625148773, -0.0002917209640145302, -0.00027433596551418304, -0.0002569509670138359, -0.00023956596851348877, -0.00022218097001314163, -0.0002047959715127945, -0.00018741097301244736, -0.00017002597451210022, -0.00015264097601175308, -0.00013525597751140594, -0.00011787097901105881, -0.00010048598051071167, -8.310098201036453e-05, -6.57159835100174e-05, -4.833098500967026e-05, -3.094598650932312e-05, -1.3560988008975983e-05, 3.824010491371155e-06, 2.1209008991718292e-05, 3.859400749206543e-05, 5.597900599241257e-05, 7.33640044927597e-05, 9.074900299310684e-05, 0.00010813400149345398, 0.00012551899999380112, 0.00014290399849414825, 0.0001602889969944954, 0.00017767399549484253, 0.00019505899399518967, 0.0002124439924955368, 0.00022982899099588394, 0.0002472139894962311, 0.0002645989879965782, 0.00028198398649692535, 0.0002993689849972725, 0.00031675398349761963, 0.00033413898199796677, 0.0003515239804983139, 0.00036890897899866104, 0.0003862939774990082, 0.0004036789759993553, 0.00042106397449970245, 0.0004384489730000496, 0.00045583397150039673, 0.00047321897000074387, 0.000490603968501091, 0.0005079889670014381, 0.0005253739655017853, 0.0005427589640021324, 0.0005601439625024796, 0.0005775289610028267, 0.0005949139595031738]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 8.0, 10.0, 9.0, 9.0, 13.0, 18.0, 24.0, 19.0, 35.0, 25.0, 33.0, 37.0, 42.0, 37.0, 43.0, 48.0, 42.0, 47.0, 48.0, 60.0, 51.0, 56.0, 34.0, 49.0, 25.0, 28.0, 29.0, 23.0, 21.0, 15.0, 12.0, 9.0, 7.0, 10.0, 12.0, 8.0, 1.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.984375, -5.7679443359375, -5.551513671875, -5.3350830078125, -5.11865234375, -4.9022216796875, -4.685791015625, -4.4693603515625, -4.2529296875, -4.0364990234375, -3.820068359375, -3.6036376953125, -3.38720703125, -3.1707763671875, -2.954345703125, -2.7379150390625, -2.521484375, -2.3050537109375, -2.088623046875, -1.8721923828125, -1.65576171875, -1.4393310546875, -1.222900390625, -1.0064697265625, -0.7900390625, -0.5736083984375, -0.357177734375, -0.1407470703125, 0.07568359375, 0.2921142578125, 0.508544921875, 0.7249755859375, 0.94140625, 1.1578369140625, 1.374267578125, 1.5906982421875, 1.80712890625, 2.0235595703125, 2.239990234375, 2.4564208984375, 2.6728515625, 2.8892822265625, 3.105712890625, 3.3221435546875, 3.53857421875, 3.7550048828125, 3.971435546875, 4.1878662109375, 4.404296875, 4.6207275390625, 4.837158203125, 5.0535888671875, 5.27001953125, 5.4864501953125, 5.702880859375, 5.9193115234375, 6.1357421875, 6.3521728515625, 6.568603515625, 6.7850341796875, 7.00146484375, 7.2178955078125, 7.434326171875, 7.6507568359375, 7.8671875]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 7.0, 7.0, 13.0, 10.0, 21.0, 32.0, 39.0, 75.0, 116.0, 169.0, 353.0, 611.0, 1292.0, 3405.0, 9194.0, 28635.0, 94740.0, 306959.0, 405852.0, 135334.0, 40707.0, 12989.0, 4509.0, 1754.0, 729.0, 408.0, 226.0, 126.0, 79.0, 61.0, 34.0, 32.0, 13.0, 7.0, 13.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.5546875, -8.296630859375, -8.03857421875, -7.780517578125, -7.5224609375, -7.264404296875, -7.00634765625, -6.748291015625, -6.490234375, -6.232177734375, -5.97412109375, -5.716064453125, -5.4580078125, -5.199951171875, -4.94189453125, -4.683837890625, -4.42578125, -4.167724609375, -3.90966796875, -3.651611328125, -3.3935546875, -3.135498046875, -2.87744140625, -2.619384765625, -2.361328125, -2.103271484375, -1.84521484375, -1.587158203125, -1.3291015625, -1.071044921875, -0.81298828125, -0.554931640625, -0.296875, -0.038818359375, 0.21923828125, 0.477294921875, 0.7353515625, 0.993408203125, 1.25146484375, 1.509521484375, 1.767578125, 2.025634765625, 2.28369140625, 2.541748046875, 2.7998046875, 3.057861328125, 3.31591796875, 3.573974609375, 3.83203125, 4.090087890625, 4.34814453125, 4.606201171875, 4.8642578125, 5.122314453125, 5.38037109375, 5.638427734375, 5.896484375, 6.154541015625, 6.41259765625, 6.670654296875, 6.9287109375, 7.186767578125, 7.44482421875, 7.702880859375, 7.9609375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 1.0, 6.0, 11.0, 5.0, 10.0, 18.0, 20.0, 23.0, 19.0, 29.0, 25.0, 27.0, 27.0, 39.0, 53.0, 43.0, 51.0, 84.0, 192.0, 1447.0, 310.0, 140.0, 65.0, 62.0, 45.0, 41.0, 36.0, 46.0, 37.0, 24.0, 15.0, 23.0, 13.0, 5.0, 11.0, 8.0, 18.0, 2.0, 9.0, 7.0, 2.0, 5.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.9375, -20.279296875, -19.62109375, -18.962890625, -18.3046875, -17.646484375, -16.98828125, -16.330078125, -15.671875, -15.013671875, -14.35546875, -13.697265625, -13.0390625, -12.380859375, -11.72265625, -11.064453125, -10.40625, -9.748046875, -9.08984375, -8.431640625, -7.7734375, -7.115234375, -6.45703125, -5.798828125, -5.140625, -4.482421875, -3.82421875, -3.166015625, -2.5078125, -1.849609375, -1.19140625, -0.533203125, 0.125, 0.783203125, 1.44140625, 2.099609375, 2.7578125, 3.416015625, 4.07421875, 4.732421875, 5.390625, 6.048828125, 6.70703125, 7.365234375, 8.0234375, 8.681640625, 9.33984375, 9.998046875, 10.65625, 11.314453125, 11.97265625, 12.630859375, 13.2890625, 13.947265625, 14.60546875, 15.263671875, 15.921875, 16.580078125, 17.23828125, 17.896484375, 18.5546875, 19.212890625, 19.87109375, 20.529296875, 21.1875]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 4.0, 9.0, 10.0, 11.0, 28.0, 23.0, 46.0, 65.0, 83.0, 111.0, 215.0, 420.0, 1224.0, 36865.0, 3101728.0, 3465.0, 642.0, 257.0, 182.0, 102.0, 69.0, 53.0, 23.0, 22.0, 17.0, 13.0, 6.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-122.6875, -119.3935546875, -116.099609375, -112.8056640625, -109.51171875, -106.2177734375, -102.923828125, -99.6298828125, -96.3359375, -93.0419921875, -89.748046875, -86.4541015625, -83.16015625, -79.8662109375, -76.572265625, -73.2783203125, -69.984375, -66.6904296875, -63.396484375, -60.1025390625, -56.80859375, -53.5146484375, -50.220703125, -46.9267578125, -43.6328125, -40.3388671875, -37.044921875, -33.7509765625, -30.45703125, -27.1630859375, -23.869140625, -20.5751953125, -17.28125, -13.9873046875, -10.693359375, -7.3994140625, -4.10546875, -0.8115234375, 2.482421875, 5.7763671875, 9.0703125, 12.3642578125, 15.658203125, 18.9521484375, 22.24609375, 25.5400390625, 28.833984375, 32.1279296875, 35.421875, 38.7158203125, 42.009765625, 45.3037109375, 48.59765625, 51.8916015625, 55.185546875, 58.4794921875, 61.7734375, 65.0673828125, 68.361328125, 71.6552734375, 74.94921875, 78.2431640625, 81.537109375, 84.8310546875, 88.125]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 5.0, 17.0, 31.0, 39.0, 51.0, 88.0, 108.0, 129.0, 130.0, 99.0, 107.0, 70.0, 66.0, 27.0, 20.0, 6.0, 7.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.224462509155273, -24.33256721496582, -23.440671920776367, -22.548778533935547, -21.656883239746094, -20.76498794555664, -19.873092651367188, -18.981197357177734, -18.08930206298828, -17.197406768798828, -16.305511474609375, -15.413617134094238, -14.521722793579102, -13.629827499389648, -12.737932205200195, -11.846036911010742, -10.954143524169922, -10.062248229980469, -9.170353889465332, -8.278458595275879, -7.386563777923584, -6.494668960571289, -5.602773666381836, -4.710878849029541, -3.818984031677246, -2.927089214324951, -2.035194158554077, -1.1432991027832031, -0.2514042854309082, 0.6404905319213867, 1.5323858261108398, 2.4242806434631348, 3.316173553466797, 4.208068370819092, 5.099963188171387, 5.99185848236084, 6.883753299713135, 7.77564811706543, 8.667543411254883, 9.559438705444336, 10.451333045959473, 11.343228340148926, 12.235122680664062, 13.127017974853516, 14.018913269042969, 14.910807609558105, 15.802702903747559, 16.694597244262695, 17.58649253845215, 18.4783878326416, 19.370283126831055, 20.262176513671875, 21.154071807861328, 22.04596710205078, 22.937862396240234, 23.829757690429688, 24.72165298461914, 25.613548278808594, 26.505443572998047, 27.3973388671875, 28.28923225402832, 29.181127548217773, 30.073022842407227, 30.96491813659668, 31.8568115234375]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 6.0, 5.0, 8.0, 10.0, 7.0, 10.0, 9.0, 13.0, 20.0, 17.0, 22.0, 15.0, 26.0, 22.0, 27.0, 44.0, 26.0, 42.0, 35.0, 38.0, 46.0, 28.0, 33.0, 51.0, 51.0, 30.0, 31.0, 44.0, 34.0, 25.0, 21.0, 28.0, 26.0, 24.0, 19.0, 21.0, 18.0, 19.0, 11.0, 9.0, 9.0, 7.0, 6.0, 3.0, 6.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-43.384952545166016, -41.89735794067383, -40.40976333618164, -38.92216873168945, -37.434574127197266, -35.94697952270508, -34.45938491821289, -32.9717903137207, -31.484195709228516, -29.996601104736328, -28.50900650024414, -27.021411895751953, -25.533817291259766, -24.046222686767578, -22.55862808227539, -21.071033477783203, -19.583438873291016, -18.095844268798828, -16.60824966430664, -15.120655059814453, -13.633060455322266, -12.145465850830078, -10.65787124633789, -9.170276641845703, -7.682682037353516, -6.195087432861328, -4.707492828369141, -3.219898223876953, -1.7323036193847656, -0.24470901489257812, 1.2428855895996094, 2.730480194091797, 4.218074798583984, 5.705669403076172, 7.193264007568359, 8.680858612060547, 10.168453216552734, 11.656047821044922, 13.14364242553711, 14.631237030029297, 16.118831634521484, 17.606426239013672, 19.09402084350586, 20.581615447998047, 22.069210052490234, 23.556804656982422, 25.04439926147461, 26.531993865966797, 28.019588470458984, 29.507183074951172, 30.99477767944336, 32.48237228393555, 33.969966888427734, 35.45756149291992, 36.94515609741211, 38.4327507019043, 39.920345306396484, 41.40793991088867, 42.89553451538086, 44.38312911987305, 45.870723724365234, 47.35831832885742, 48.84591293334961, 50.3335075378418, 51.821102142333984]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 5.0, 12.0, 10.0, 13.0, 22.0, 14.0, 19.0, 25.0, 29.0, 34.0, 41.0, 32.0, 30.0, 48.0, 33.0, 47.0, 51.0, 62.0, 58.0, 50.0, 42.0, 50.0, 34.0, 49.0, 31.0, 28.0, 28.0, 16.0, 19.0, 14.0, 11.0, 13.0, 7.0, 7.0, 7.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4609375, -6.223388671875, -5.98583984375, -5.748291015625, -5.5107421875, -5.273193359375, -5.03564453125, -4.798095703125, -4.560546875, -4.322998046875, -4.08544921875, -3.847900390625, -3.6103515625, -3.372802734375, -3.13525390625, -2.897705078125, -2.66015625, -2.422607421875, -2.18505859375, -1.947509765625, -1.7099609375, -1.472412109375, -1.23486328125, -0.997314453125, -0.759765625, -0.522216796875, -0.28466796875, -0.047119140625, 0.1904296875, 0.427978515625, 0.66552734375, 0.903076171875, 1.140625, 1.378173828125, 1.61572265625, 1.853271484375, 2.0908203125, 2.328369140625, 2.56591796875, 2.803466796875, 3.041015625, 3.278564453125, 3.51611328125, 3.753662109375, 3.9912109375, 4.228759765625, 4.46630859375, 4.703857421875, 4.94140625, 5.178955078125, 5.41650390625, 5.654052734375, 5.8916015625, 6.129150390625, 6.36669921875, 6.604248046875, 6.841796875, 7.079345703125, 7.31689453125, 7.554443359375, 7.7919921875, 8.029541015625, 8.26708984375, 8.504638671875, 8.7421875]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 5.0, 3.0, 4.0, 9.0, 8.0, 15.0, 10.0, 17.0, 22.0, 42.0, 39.0, 40.0, 45.0, 71.0, 81.0, 124.0, 167.0, 373.0, 977.0, 4206.0, 36560.0, 486669.0, 2438500.0, 1118283.0, 96809.0, 8442.0, 1478.0, 470.0, 250.0, 148.0, 85.0, 78.0, 47.0, 38.0, 35.0, 20.0, 30.0, 22.0, 17.0, 14.0, 8.0, 13.0, 2.0, 4.0, 4.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.328125, -12.863525390625, -12.39892578125, -11.934326171875, -11.4697265625, -11.005126953125, -10.54052734375, -10.075927734375, -9.611328125, -9.146728515625, -8.68212890625, -8.217529296875, -7.7529296875, -7.288330078125, -6.82373046875, -6.359130859375, -5.89453125, -5.429931640625, -4.96533203125, -4.500732421875, -4.0361328125, -3.571533203125, -3.10693359375, -2.642333984375, -2.177734375, -1.713134765625, -1.24853515625, -0.783935546875, -0.3193359375, 0.145263671875, 0.60986328125, 1.074462890625, 1.5390625, 2.003662109375, 2.46826171875, 2.932861328125, 3.3974609375, 3.862060546875, 4.32666015625, 4.791259765625, 5.255859375, 5.720458984375, 6.18505859375, 6.649658203125, 7.1142578125, 7.578857421875, 8.04345703125, 8.508056640625, 8.97265625, 9.437255859375, 9.90185546875, 10.366455078125, 10.8310546875, 11.295654296875, 11.76025390625, 12.224853515625, 12.689453125, 13.154052734375, 13.61865234375, 14.083251953125, 14.5478515625, 15.012451171875, 15.47705078125, 15.941650390625, 16.40625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 8.0, 12.0, 13.0, 17.0, 38.0, 38.0, 43.0, 65.0, 77.0, 143.0, 179.0, 294.0, 365.0, 484.0, 523.0, 462.0, 413.0, 269.0, 210.0, 128.0, 72.0, 52.0, 47.0, 38.0, 20.0, 21.0, 15.0, 6.0, 8.0, 6.0, 5.0, 6.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.546875, -12.09130859375, -11.6357421875, -11.18017578125, -10.724609375, -10.26904296875, -9.8134765625, -9.35791015625, -8.90234375, -8.44677734375, -7.9912109375, -7.53564453125, -7.080078125, -6.62451171875, -6.1689453125, -5.71337890625, -5.2578125, -4.80224609375, -4.3466796875, -3.89111328125, -3.435546875, -2.97998046875, -2.5244140625, -2.06884765625, -1.61328125, -1.15771484375, -0.7021484375, -0.24658203125, 0.208984375, 0.66455078125, 1.1201171875, 1.57568359375, 2.03125, 2.48681640625, 2.9423828125, 3.39794921875, 3.853515625, 4.30908203125, 4.7646484375, 5.22021484375, 5.67578125, 6.13134765625, 6.5869140625, 7.04248046875, 7.498046875, 7.95361328125, 8.4091796875, 8.86474609375, 9.3203125, 9.77587890625, 10.2314453125, 10.68701171875, 11.142578125, 11.59814453125, 12.0537109375, 12.50927734375, 12.96484375, 13.42041015625, 13.8759765625, 14.33154296875, 14.787109375, 15.24267578125, 15.6982421875, 16.15380859375, 16.609375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 7.0, 5.0, 4.0, 8.0, 9.0, 12.0, 11.0, 28.0, 40.0, 50.0, 51.0, 93.0, 137.0, 245.0, 381.0, 825.0, 45158.0, 4115479.0, 29888.0, 745.0, 393.0, 249.0, 154.0, 81.0, 67.0, 45.0, 45.0, 27.0, 17.0, 15.0, 6.0, 10.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.75, -76.6484375, -74.546875, -72.4453125, -70.34375, -68.2421875, -66.140625, -64.0390625, -61.9375, -59.8359375, -57.734375, -55.6328125, -53.53125, -51.4296875, -49.328125, -47.2265625, -45.125, -43.0234375, -40.921875, -38.8203125, -36.71875, -34.6171875, -32.515625, -30.4140625, -28.3125, -26.2109375, -24.109375, -22.0078125, -19.90625, -17.8046875, -15.703125, -13.6015625, -11.5, -9.3984375, -7.296875, -5.1953125, -3.09375, -0.9921875, 1.109375, 3.2109375, 5.3125, 7.4140625, 9.515625, 11.6171875, 13.71875, 15.8203125, 17.921875, 20.0234375, 22.125, 24.2265625, 26.328125, 28.4296875, 30.53125, 32.6328125, 34.734375, 36.8359375, 38.9375, 41.0390625, 43.140625, 45.2421875, 47.34375, 49.4453125, 51.546875, 53.6484375, 55.75]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 15.0, 107.0, 355.0, 382.0, 135.0, 21.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.03279876708984, -76.68462371826172, -70.33644104003906, -63.98826599121094, -57.64009094238281, -51.29191589355469, -44.9437370300293, -38.595558166503906, -32.24738311767578, -25.899206161499023, -19.551029205322266, -13.202852249145508, -6.85467529296875, -0.5064983367919922, 5.841678619384766, 12.189857482910156, 18.53803253173828, 24.88620948791504, 31.234386444091797, 37.58256530761719, 43.93074035644531, 50.27891540527344, 56.62709426879883, 62.97527313232422, 69.32344818115234, 75.67162322998047, 82.01980590820312, 88.36798095703125, 94.71615600585938, 101.0643310546875, 107.41250610351562, 113.76068878173828, 120.10885620117188, 126.45703125, 132.80520629882812, 139.15338134765625, 145.50155639648438, 151.84974670410156, 158.1979217529297, 164.5460968017578, 170.89427185058594, 177.24244689941406, 183.5906219482422, 189.9387969970703, 196.2869873046875, 202.63516235351562, 208.98333740234375, 215.33151245117188, 221.6796875, 228.02786254882812, 234.37603759765625, 240.72421264648438, 247.0723876953125, 253.4205780029297, 259.76873779296875, 266.116943359375, 272.465087890625, 278.8132629394531, 285.16143798828125, 291.5096130371094, 297.8577880859375, 304.2059631347656, 310.55413818359375, 316.90234375, 323.2505187988281]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 3.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 10.0, 18.0, 23.0, 26.0, 23.0, 31.0, 22.0, 35.0, 38.0, 32.0, 38.0, 42.0, 28.0, 42.0, 34.0, 49.0, 38.0, 46.0, 30.0, 36.0, 39.0, 32.0, 32.0, 28.0, 25.0, 22.0, 31.0, 13.0, 20.0, 10.0, 13.0, 10.0, 11.0, 7.0, 2.0, 8.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-33.16272735595703, -31.94219970703125, -30.721670150756836, -29.501142501831055, -28.280614852905273, -27.06008529663086, -25.839557647705078, -24.619029998779297, -23.398502349853516, -22.177974700927734, -20.95744514465332, -19.73691749572754, -18.516389846801758, -17.295860290527344, -16.075332641601562, -14.854804992675781, -13.634275436401367, -12.41374683380127, -11.193219184875488, -9.97269058227539, -8.75216293334961, -7.531634330749512, -6.311105728149414, -5.090578079223633, -3.870049476623535, -2.6495213508605957, -1.4289929866790771, -0.2084646224975586, 1.0120635032653809, 2.2325916290283203, 3.453120231628418, 4.673647880554199, 5.894176483154297, 7.114704608917236, 8.335232734680176, 9.555761337280273, 10.776288986206055, 11.996817588806152, 13.21734619140625, 14.437873840332031, 15.658402442932129, 16.878931045532227, 18.099458694458008, 19.319988250732422, 20.540515899658203, 21.761043548583984, 22.981571197509766, 24.202098846435547, 25.42262840270996, 26.643156051635742, 27.863685607910156, 29.084213256835938, 30.30474090576172, 31.5252685546875, 32.74579620361328, 33.96632385253906, 35.18685531616211, 36.40738296508789, 37.62791061401367, 38.84844207763672, 40.0689697265625, 41.28949737548828, 42.51002502441406, 43.730552673339844, 44.951080322265625]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 3.0, 7.0, 9.0, 10.0, 8.0, 11.0, 16.0, 20.0, 23.0, 22.0, 42.0, 27.0, 36.0, 51.0, 47.0, 40.0, 44.0, 44.0, 59.0, 42.0, 44.0, 43.0, 41.0, 30.0, 57.0, 31.0, 23.0, 27.0, 24.0, 19.0, 18.0, 18.0, 13.0, 13.0, 10.0, 7.0, 7.0, 5.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33984375, -6.1270751953125, -5.914306640625, -5.7015380859375, -5.48876953125, -5.2760009765625, -5.063232421875, -4.8504638671875, -4.6376953125, -4.4249267578125, -4.212158203125, -3.9993896484375, -3.78662109375, -3.5738525390625, -3.361083984375, -3.1483154296875, -2.935546875, -2.7227783203125, -2.510009765625, -2.2972412109375, -2.08447265625, -1.8717041015625, -1.658935546875, -1.4461669921875, -1.2333984375, -1.0206298828125, -0.807861328125, -0.5950927734375, -0.38232421875, -0.1695556640625, 0.043212890625, 0.2559814453125, 0.46875, 0.6815185546875, 0.894287109375, 1.1070556640625, 1.31982421875, 1.5325927734375, 1.745361328125, 1.9581298828125, 2.1708984375, 2.3836669921875, 2.596435546875, 2.8092041015625, 3.02197265625, 3.2347412109375, 3.447509765625, 3.6602783203125, 3.873046875, 4.0858154296875, 4.298583984375, 4.5113525390625, 4.72412109375, 4.9368896484375, 5.149658203125, 5.3624267578125, 5.5751953125, 5.7879638671875, 6.000732421875, 6.2135009765625, 6.42626953125, 6.6390380859375, 6.851806640625, 7.0645751953125, 7.27734375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 5.0, 8.0, 7.0, 6.0, 11.0, 20.0, 27.0, 34.0, 50.0, 96.0, 125.0, 152.0, 235.0, 320.0, 454.0, 667.0, 993.0, 1514.0, 2129.0, 3085.0, 4600.0, 6753.0, 10237.0, 15179.0, 22716.0, 34503.0, 53355.0, 83043.0, 128209.0, 177811.0, 168302.0, 116096.0, 74945.0, 48178.0, 31375.0, 20674.0, 13846.0, 9463.0, 6175.0, 4092.0, 2803.0, 1912.0, 1374.0, 954.0, 600.0, 419.0, 300.0, 206.0, 140.0, 110.0, 76.0, 58.0, 45.0, 20.0, 14.0, 21.0, 13.0, 1.0, 9.0, 0.0, 1.0, 2.0, 3.0], "bins": [-0.40185546875, -0.3888435363769531, -0.37583160400390625, -0.3628196716308594, -0.3498077392578125, -0.3367958068847656, -0.32378387451171875, -0.3107719421386719, -0.297760009765625, -0.2847480773925781, -0.27173614501953125, -0.2587242126464844, -0.2457122802734375, -0.23270034790039062, -0.21968841552734375, -0.20667648315429688, -0.19366455078125, -0.18065261840820312, -0.16764068603515625, -0.15462875366210938, -0.1416168212890625, -0.12860488891601562, -0.11559295654296875, -0.10258102416992188, -0.089569091796875, -0.07655715942382812, -0.06354522705078125, -0.050533294677734375, -0.0375213623046875, -0.024509429931640625, -0.01149749755859375, 0.001514434814453125, 0.0145263671875, 0.027538299560546875, 0.04055023193359375, 0.053562164306640625, 0.0665740966796875, 0.07958602905273438, 0.09259796142578125, 0.10560989379882812, 0.118621826171875, 0.13163375854492188, 0.14464569091796875, 0.15765762329101562, 0.1706695556640625, 0.18368148803710938, 0.19669342041015625, 0.20970535278320312, 0.22271728515625, 0.23572921752929688, 0.24874114990234375, 0.2617530822753906, 0.2747650146484375, 0.2877769470214844, 0.30078887939453125, 0.3138008117675781, 0.326812744140625, 0.3398246765136719, 0.35283660888671875, 0.3658485412597656, 0.3788604736328125, 0.3918724060058594, 0.40488433837890625, 0.4178962707519531, 0.430908203125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 5.0, 7.0, 12.0, 7.0, 12.0, 10.0, 10.0, 8.0, 14.0, 16.0, 20.0, 29.0, 18.0, 26.0, 21.0, 44.0, 35.0, 41.0, 38.0, 42.0, 39.0, 1066.0, 38.0, 41.0, 35.0, 34.0, 43.0, 37.0, 25.0, 41.0, 29.0, 30.0, 26.0, 22.0, 20.0, 21.0, 15.0, 8.0, 11.0, 5.0, 7.0, 2.0, 2.0, 2.0, 7.0, 3.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.59375, -3.47332763671875, -3.3529052734375, -3.23248291015625, -3.112060546875, -2.99163818359375, -2.8712158203125, -2.75079345703125, -2.63037109375, -2.50994873046875, -2.3895263671875, -2.26910400390625, -2.148681640625, -2.02825927734375, -1.9078369140625, -1.78741455078125, -1.6669921875, -1.54656982421875, -1.4261474609375, -1.30572509765625, -1.185302734375, -1.06488037109375, -0.9444580078125, -0.82403564453125, -0.70361328125, -0.58319091796875, -0.4627685546875, -0.34234619140625, -0.221923828125, -0.10150146484375, 0.0189208984375, 0.13934326171875, 0.259765625, 0.38018798828125, 0.5006103515625, 0.62103271484375, 0.741455078125, 0.86187744140625, 0.9822998046875, 1.10272216796875, 1.22314453125, 1.34356689453125, 1.4639892578125, 1.58441162109375, 1.704833984375, 1.82525634765625, 1.9456787109375, 2.06610107421875, 2.1865234375, 2.30694580078125, 2.4273681640625, 2.54779052734375, 2.668212890625, 2.78863525390625, 2.9090576171875, 3.02947998046875, 3.14990234375, 3.27032470703125, 3.3907470703125, 3.51116943359375, 3.631591796875, 3.75201416015625, 3.8724365234375, 3.99285888671875, 4.11328125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 6.0, 15.0, 11.0, 12.0, 17.0, 35.0, 52.0, 93.0, 117.0, 158.0, 239.0, 314.0, 493.0, 727.0, 1028.0, 1400.0, 2124.0, 2934.0, 4316.0, 6134.0, 8946.0, 12722.0, 17912.0, 26065.0, 37977.0, 55498.0, 80954.0, 114855.0, 440824.0, 901668.0, 116469.0, 81699.0, 55875.0, 38222.0, 26399.0, 18523.0, 12599.0, 8839.0, 6369.0, 4433.0, 3096.0, 2166.0, 1509.0, 1038.0, 682.0, 492.0, 366.0, 229.0, 152.0, 112.0, 65.0, 50.0, 38.0, 29.0, 16.0, 18.0, 5.0, 2.0, 4.0, 3.0, 2.0], "bins": [-0.323486328125, -0.3133811950683594, -0.30327606201171875, -0.2931709289550781, -0.2830657958984375, -0.2729606628417969, -0.26285552978515625, -0.2527503967285156, -0.242645263671875, -0.23254013061523438, -0.22243499755859375, -0.21232986450195312, -0.2022247314453125, -0.19211959838867188, -0.18201446533203125, -0.17190933227539062, -0.16180419921875, -0.15169906616210938, -0.14159393310546875, -0.13148880004882812, -0.1213836669921875, -0.11127853393554688, -0.10117340087890625, -0.09106826782226562, -0.080963134765625, -0.07085800170898438, -0.06075286865234375, -0.050647735595703125, -0.0405426025390625, -0.030437469482421875, -0.02033233642578125, -0.010227203369140625, -0.0001220703125, 0.009983062744140625, 0.02008819580078125, 0.030193328857421875, 0.0402984619140625, 0.050403594970703125, 0.06050872802734375, 0.07061386108398438, 0.080718994140625, 0.09082412719726562, 0.10092926025390625, 0.11103439331054688, 0.1211395263671875, 0.13124465942382812, 0.14134979248046875, 0.15145492553710938, 0.16156005859375, 0.17166519165039062, 0.18177032470703125, 0.19187545776367188, 0.2019805908203125, 0.21208572387695312, 0.22219085693359375, 0.23229598999023438, 0.242401123046875, 0.2525062561035156, 0.26261138916015625, 0.2727165222167969, 0.2828216552734375, 0.2929267883300781, 0.30303192138671875, 0.3131370544433594, 0.3232421875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 2.0, 3.0, 7.0, 4.0, 7.0, 7.0, 12.0, 6.0, 7.0, 18.0, 19.0, 22.0, 30.0, 29.0, 43.0, 52.0, 51.0, 57.0, 60.0, 54.0, 56.0, 50.0, 62.0, 52.0, 44.0, 47.0, 34.0, 33.0, 36.0, 20.0, 13.0, 14.0, 14.0, 8.0, 8.0, 5.0, 2.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001148223876953125, -0.001107737421989441, -0.0010672509670257568, -0.0010267645120620728, -0.0009862780570983887, -0.0009457916021347046, -0.0009053051471710205, -0.0008648186922073364, -0.0008243322372436523, -0.0007838457822799683, -0.0007433593273162842, -0.0007028728723526001, -0.000662386417388916, -0.0006218999624252319, -0.0005814135074615479, -0.0005409270524978638, -0.0005004405975341797, -0.0004599541425704956, -0.0004194676876068115, -0.00037898123264312744, -0.00033849477767944336, -0.0002980083227157593, -0.0002575218677520752, -0.0002170354127883911, -0.00017654895782470703, -0.00013606250286102295, -9.557604789733887e-05, -5.5089592933654785e-05, -1.4603137969970703e-05, 2.588331699371338e-05, 6.636977195739746e-05, 0.00010685622692108154, 0.00014734268188476562, 0.0001878291368484497, 0.0002283155918121338, 0.00026880204677581787, 0.00030928850173950195, 0.00034977495670318604, 0.0003902614116668701, 0.0004307478666305542, 0.0004712343215942383, 0.0005117207765579224, 0.0005522072315216064, 0.0005926936864852905, 0.0006331801414489746, 0.0006736665964126587, 0.0007141530513763428, 0.0007546395063400269, 0.0007951259613037109, 0.000835612416267395, 0.0008760988712310791, 0.0009165853261947632, 0.0009570717811584473, 0.0009975582361221313, 0.0010380446910858154, 0.0010785311460494995, 0.0011190176010131836, 0.0011595040559768677, 0.0011999905109405518, 0.0012404769659042358, 0.00128096342086792, 0.001321449875831604, 0.001361936330795288, 0.0014024227857589722, 0.0014429092407226562]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 7.0, 4.0, 8.0, 8.0, 8.0, 10.0, 19.0, 24.0, 31.0, 34.0, 56.0, 57.0, 98.0, 130.0, 204.0, 427.0, 1436.0, 422944.0, 620195.0, 1668.0, 435.0, 220.0, 145.0, 93.0, 56.0, 49.0, 39.0, 35.0, 28.0, 15.0, 10.0, 11.0, 12.0, 10.0, 6.0, 4.0, 3.0, 6.0, 0.0, 2.0, 5.0, 2.0, 3.0, 1.0, 2.0], "bins": [-0.032379150390625, -0.03150486946105957, -0.03063058853149414, -0.02975630760192871, -0.02888202667236328, -0.02800774574279785, -0.027133464813232422, -0.026259183883666992, -0.025384902954101562, -0.024510622024536133, -0.023636341094970703, -0.022762060165405273, -0.021887779235839844, -0.021013498306274414, -0.020139217376708984, -0.019264936447143555, -0.018390655517578125, -0.017516374588012695, -0.016642093658447266, -0.015767812728881836, -0.014893531799316406, -0.014019250869750977, -0.013144969940185547, -0.012270689010620117, -0.011396408081054688, -0.010522127151489258, -0.009647846221923828, -0.008773565292358398, -0.007899284362792969, -0.007025003433227539, -0.006150722503662109, -0.00527644157409668, -0.00440216064453125, -0.0035278797149658203, -0.0026535987854003906, -0.001779317855834961, -0.0009050369262695312, -3.075599670410156e-05, 0.0008435249328613281, 0.0017178058624267578, 0.0025920867919921875, 0.003466367721557617, 0.004340648651123047, 0.0052149295806884766, 0.006089210510253906, 0.006963491439819336, 0.007837772369384766, 0.008712053298950195, 0.009586334228515625, 0.010460615158081055, 0.011334896087646484, 0.012209177017211914, 0.013083457946777344, 0.013957738876342773, 0.014832019805908203, 0.015706300735473633, 0.016580581665039062, 0.017454862594604492, 0.018329143524169922, 0.01920342445373535, 0.02007770538330078, 0.02095198631286621, 0.02182626724243164, 0.02270054817199707, 0.0235748291015625]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 356.0, 661.0], "bins": [-0.0279895830899477, -0.02754577435553074, -0.027101963758468628, -0.026658155024051666, -0.026214346289634705, -0.025770535692572594, -0.025326726958155632, -0.02488291636109352, -0.02443910762667656, -0.023995298892259598, -0.023551488295197487, -0.023107679560780525, -0.022663868963718414, -0.022220060229301453, -0.02177625149488449, -0.02133244089782238, -0.02088863216340542, -0.020444823428988457, -0.020001012831926346, -0.019557204097509384, -0.019113395363092422, -0.01866958476603031, -0.01822577603161335, -0.01778196543455124, -0.017338156700134277, -0.016894347965717316, -0.016450537368655205, -0.016006728634238243, -0.015562918968498707, -0.01511910930275917, -0.014675300568342209, -0.014231490902602673, -0.013787681236863136, -0.0133438715711236, -0.012900061905384064, -0.012456253170967102, -0.012012443505227566, -0.01156863383948803, -0.011124825105071068, -0.010681015439331532, -0.010237205773591995, -0.009793396107852459, -0.009349586442112923, -0.008905777707695961, -0.008461968041956425, -0.008018158376216888, -0.0075743491761386395, -0.0071305399760603905, -0.006686730310320854, -0.006242920644581318, -0.005799111444503069, -0.00535530224442482, -0.004911492578685284, -0.004467682912945747, -0.004023873712867498, -0.0035800642799586058, -0.003136254847049713, -0.0026924454141408205, -0.002248635981231928, -0.0018048265483230352, -0.0013610171154141426, -0.00091720768250525, -0.00047339824959635735, -2.9588816687464714e-05, 0.0004142206453252584]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 4.0, 7.0, 3.0, 6.0, 5.0, 12.0, 13.0, 25.0, 20.0, 25.0, 34.0, 31.0, 42.0, 56.0, 57.0, 55.0, 47.0, 60.0, 64.0, 60.0, 50.0, 36.0, 45.0, 37.0, 42.0, 27.0, 39.0, 33.0, 12.0, 16.0, 16.0, 11.0, 6.0, 4.0, 2.0, 1.0, 3.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005998015403747559, -0.0005710385739803314, -0.000542275607585907, -0.0005135126411914825, -0.0004847496747970581, -0.00045598670840263367, -0.00042722374200820923, -0.0003984607756137848, -0.00036969780921936035, -0.0003409348428249359, -0.0003121718764305115, -0.00028340891003608704, -0.0002546459436416626, -0.00022588297724723816, -0.00019712001085281372, -0.00016835704445838928, -0.00013959407806396484, -0.0001108311116695404, -8.206814527511597e-05, -5.330517888069153e-05, -2.454221248626709e-05, 4.220753908157349e-06, 3.298372030258179e-05, 6.174668669700623e-05, 9.050965309143066e-05, 0.0001192726194858551, 0.00014803558588027954, 0.00017679855227470398, 0.00020556151866912842, 0.00023432448506355286, 0.0002630874514579773, 0.00029185041785240173, 0.00032061338424682617, 0.0003493763506412506, 0.00037813931703567505, 0.0004069022834300995, 0.0004356652498245239, 0.00046442821621894836, 0.0004931911826133728, 0.0005219541490077972, 0.0005507171154022217, 0.0005794800817966461, 0.0006082430481910706, 0.000637006014585495, 0.0006657689809799194, 0.0006945319473743439, 0.0007232949137687683, 0.0007520578801631927, 0.0007808208465576172, 0.0008095838129520416, 0.0008383467793464661, 0.0008671097457408905, 0.0008958727121353149, 0.0009246356785297394, 0.0009533986449241638, 0.0009821616113185883, 0.0010109245777130127, 0.0010396875441074371, 0.0010684505105018616, 0.001097213476896286, 0.0011259764432907104, 0.0011547394096851349, 0.0011835023760795593, 0.0012122653424739838, 0.0012410283088684082]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 3.0, 7.0, 9.0, 10.0, 8.0, 11.0, 16.0, 20.0, 23.0, 22.0, 42.0, 27.0, 36.0, 51.0, 47.0, 40.0, 44.0, 44.0, 59.0, 42.0, 44.0, 43.0, 41.0, 30.0, 57.0, 31.0, 23.0, 27.0, 24.0, 19.0, 18.0, 18.0, 13.0, 13.0, 10.0, 7.0, 7.0, 5.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33984375, -6.1270751953125, -5.914306640625, -5.7015380859375, -5.48876953125, -5.2760009765625, -5.063232421875, -4.8504638671875, -4.6376953125, -4.4249267578125, -4.212158203125, -3.9993896484375, -3.78662109375, -3.5738525390625, -3.361083984375, -3.1483154296875, -2.935546875, -2.7227783203125, -2.510009765625, -2.2972412109375, -2.08447265625, -1.8717041015625, -1.658935546875, -1.4461669921875, -1.2333984375, -1.0206298828125, -0.807861328125, -0.5950927734375, -0.38232421875, -0.1695556640625, 0.043212890625, 0.2559814453125, 0.46875, 0.6815185546875, 0.894287109375, 1.1070556640625, 1.31982421875, 1.5325927734375, 1.745361328125, 1.9581298828125, 2.1708984375, 2.3836669921875, 2.596435546875, 2.8092041015625, 3.02197265625, 3.2347412109375, 3.447509765625, 3.6602783203125, 3.873046875, 4.0858154296875, 4.298583984375, 4.5113525390625, 4.72412109375, 4.9368896484375, 5.149658203125, 5.3624267578125, 5.5751953125, 5.7879638671875, 6.000732421875, 6.2135009765625, 6.42626953125, 6.6390380859375, 6.851806640625, 7.0645751953125, 7.27734375]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 5.0, 6.0, 11.0, 11.0, 14.0, 22.0, 39.0, 40.0, 60.0, 91.0, 137.0, 254.0, 482.0, 991.0, 2105.0, 4293.0, 8760.0, 18017.0, 39115.0, 98200.0, 263284.0, 354660.0, 152265.0, 57362.0, 24824.0, 11880.0, 5794.0, 2839.0, 1399.0, 684.0, 364.0, 188.0, 112.0, 74.0, 51.0, 29.0, 32.0, 19.0, 11.0, 12.0, 9.0, 3.0, 0.0, 4.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-6.2890625, -6.0809326171875, -5.872802734375, -5.6646728515625, -5.45654296875, -5.2484130859375, -5.040283203125, -4.8321533203125, -4.6240234375, -4.4158935546875, -4.207763671875, -3.9996337890625, -3.79150390625, -3.5833740234375, -3.375244140625, -3.1671142578125, -2.958984375, -2.7508544921875, -2.542724609375, -2.3345947265625, -2.12646484375, -1.9183349609375, -1.710205078125, -1.5020751953125, -1.2939453125, -1.0858154296875, -0.877685546875, -0.6695556640625, -0.46142578125, -0.2532958984375, -0.045166015625, 0.1629638671875, 0.37109375, 0.5792236328125, 0.787353515625, 0.9954833984375, 1.20361328125, 1.4117431640625, 1.619873046875, 1.8280029296875, 2.0361328125, 2.2442626953125, 2.452392578125, 2.6605224609375, 2.86865234375, 3.0767822265625, 3.284912109375, 3.4930419921875, 3.701171875, 3.9093017578125, 4.117431640625, 4.3255615234375, 4.53369140625, 4.7418212890625, 4.949951171875, 5.1580810546875, 5.3662109375, 5.5743408203125, 5.782470703125, 5.9906005859375, 6.19873046875, 6.4068603515625, 6.614990234375, 6.8231201171875, 7.03125]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 8.0, 13.0, 7.0, 8.0, 17.0, 28.0, 27.0, 28.0, 29.0, 34.0, 38.0, 49.0, 62.0, 99.0, 150.0, 397.0, 1425.0, 164.0, 92.0, 57.0, 59.0, 31.0, 34.0, 36.0, 23.0, 34.0, 21.0, 14.0, 9.0, 11.0, 11.0, 2.0, 7.0, 3.0, 5.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.609375, -27.7919921875, -26.974609375, -26.1572265625, -25.33984375, -24.5224609375, -23.705078125, -22.8876953125, -22.0703125, -21.2529296875, -20.435546875, -19.6181640625, -18.80078125, -17.9833984375, -17.166015625, -16.3486328125, -15.53125, -14.7138671875, -13.896484375, -13.0791015625, -12.26171875, -11.4443359375, -10.626953125, -9.8095703125, -8.9921875, -8.1748046875, -7.357421875, -6.5400390625, -5.72265625, -4.9052734375, -4.087890625, -3.2705078125, -2.453125, -1.6357421875, -0.818359375, -0.0009765625, 0.81640625, 1.6337890625, 2.451171875, 3.2685546875, 4.0859375, 4.9033203125, 5.720703125, 6.5380859375, 7.35546875, 8.1728515625, 8.990234375, 9.8076171875, 10.625, 11.4423828125, 12.259765625, 13.0771484375, 13.89453125, 14.7119140625, 15.529296875, 16.3466796875, 17.1640625, 17.9814453125, 18.798828125, 19.6162109375, 20.43359375, 21.2509765625, 22.068359375, 22.8857421875, 23.703125]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 6.0, 4.0, 9.0, 12.0, 11.0, 17.0, 20.0, 32.0, 38.0, 66.0, 84.0, 154.0, 215.0, 375.0, 890.0, 38517.0, 3099070.0, 4633.0, 645.0, 325.0, 174.0, 130.0, 78.0, 56.0, 45.0, 36.0, 20.0, 12.0, 14.0, 3.0, 6.0, 3.0, 5.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-85.4375, -82.7099609375, -79.982421875, -77.2548828125, -74.52734375, -71.7998046875, -69.072265625, -66.3447265625, -63.6171875, -60.8896484375, -58.162109375, -55.4345703125, -52.70703125, -49.9794921875, -47.251953125, -44.5244140625, -41.796875, -39.0693359375, -36.341796875, -33.6142578125, -30.88671875, -28.1591796875, -25.431640625, -22.7041015625, -19.9765625, -17.2490234375, -14.521484375, -11.7939453125, -9.06640625, -6.3388671875, -3.611328125, -0.8837890625, 1.84375, 4.5712890625, 7.298828125, 10.0263671875, 12.75390625, 15.4814453125, 18.208984375, 20.9365234375, 23.6640625, 26.3916015625, 29.119140625, 31.8466796875, 34.57421875, 37.3017578125, 40.029296875, 42.7568359375, 45.484375, 48.2119140625, 50.939453125, 53.6669921875, 56.39453125, 59.1220703125, 61.849609375, 64.5771484375, 67.3046875, 70.0322265625, 72.759765625, 75.4873046875, 78.21484375, 80.9423828125, 83.669921875, 86.3974609375, 89.125]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [101.0, 853.0, 61.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.375727653503418, -4.142389297485352, 4.090949058532715, 12.324288368225098, 20.55762481689453, 28.79096221923828, 37.0243034362793, 45.25764083862305, 53.4909782409668, 61.72431564331055, 69.95765686035156, 78.19099426269531, 86.42433166503906, 94.65766906738281, 102.89100646972656, 111.12434387207031, 119.3576889038086, 127.59102630615234, 135.82437133789062, 144.05770874023438, 152.29104614257812, 160.52438354492188, 168.75772094726562, 176.99105834960938, 185.22439575195312, 193.45773315429688, 201.69107055664062, 209.92440795898438, 218.15774536132812, 226.39108276367188, 234.62442016601562, 242.85775756835938, 251.09109497070312, 259.3244323730469, 267.5577697753906, 275.7911071777344, 284.0244445800781, 292.2577819824219, 300.4911193847656, 308.7244567871094, 316.9577941894531, 325.1911315917969, 333.4244689941406, 341.6578063964844, 349.8911437988281, 358.1244812011719, 366.3578186035156, 374.5911560058594, 382.82452392578125, 391.057861328125, 399.29119873046875, 407.5245361328125, 415.75787353515625, 423.9912109375, 432.22454833984375, 440.4578857421875, 448.69122314453125, 456.924560546875, 465.15789794921875, 473.3912353515625, 481.62457275390625, 489.85791015625, 498.09124755859375, 506.3245849609375, 514.5579223632812]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 3.0, 4.0, 5.0, 10.0, 9.0, 8.0, 15.0, 18.0, 14.0, 22.0, 12.0, 18.0, 12.0, 25.0, 26.0, 29.0, 28.0, 37.0, 38.0, 39.0, 45.0, 38.0, 26.0, 46.0, 35.0, 28.0, 40.0, 34.0, 31.0, 26.0, 38.0, 31.0, 30.0, 35.0, 14.0, 29.0, 20.0, 15.0, 14.0, 9.0, 13.0, 4.0, 6.0, 6.0, 9.0, 7.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-48.27090835571289, -46.7557487487793, -45.2405891418457, -43.72542953491211, -42.21026611328125, -40.695106506347656, -39.17994689941406, -37.66478729248047, -36.149627685546875, -34.63446807861328, -33.11930847167969, -31.60414695739746, -30.088987350463867, -28.573827743530273, -27.058666229248047, -25.543506622314453, -24.02834701538086, -22.513187408447266, -20.998027801513672, -19.482866287231445, -17.96770668029785, -16.452547073364258, -14.937386512756348, -13.422225952148438, -11.907066345214844, -10.39190673828125, -8.87674617767334, -7.361586093902588, -5.846426010131836, -4.331265926361084, -2.816105842590332, -1.3009452819824219, 0.21421432495117188, 1.7293744087219238, 3.244534492492676, 4.759694576263428, 6.27485466003418, 7.790014743804932, 9.305174827575684, 10.820335388183594, 12.335494995117188, 13.850654602050781, 15.365815162658691, 16.8809757232666, 18.396135330200195, 19.91129493713379, 21.426456451416016, 22.94161605834961, 24.456775665283203, 25.971935272216797, 27.48709487915039, 29.002256393432617, 30.51741600036621, 32.03257751464844, 33.54773712158203, 35.062896728515625, 36.57805633544922, 38.09321594238281, 39.608375549316406, 41.12353515625, 42.638694763183594, 44.15385818481445, 45.66901779174805, 47.18417739868164, 48.699337005615234]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 5.0, 2.0, 6.0, 7.0, 10.0, 10.0, 9.0, 14.0, 10.0, 19.0, 25.0, 21.0, 30.0, 33.0, 51.0, 38.0, 36.0, 24.0, 51.0, 42.0, 59.0, 38.0, 46.0, 38.0, 41.0, 39.0, 31.0, 44.0, 23.0, 29.0, 28.0, 26.0, 19.0, 15.0, 14.0, 17.0, 17.0, 7.0, 6.0, 12.0, 5.0, 6.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4375, -6.2113037109375, -5.985107421875, -5.7589111328125, -5.53271484375, -5.3065185546875, -5.080322265625, -4.8541259765625, -4.6279296875, -4.4017333984375, -4.175537109375, -3.9493408203125, -3.72314453125, -3.4969482421875, -3.270751953125, -3.0445556640625, -2.818359375, -2.5921630859375, -2.365966796875, -2.1397705078125, -1.91357421875, -1.6873779296875, -1.461181640625, -1.2349853515625, -1.0087890625, -0.7825927734375, -0.556396484375, -0.3302001953125, -0.10400390625, 0.1221923828125, 0.348388671875, 0.5745849609375, 0.80078125, 1.0269775390625, 1.253173828125, 1.4793701171875, 1.70556640625, 1.9317626953125, 2.157958984375, 2.3841552734375, 2.6103515625, 2.8365478515625, 3.062744140625, 3.2889404296875, 3.51513671875, 3.7413330078125, 3.967529296875, 4.1937255859375, 4.419921875, 4.6461181640625, 4.872314453125, 5.0985107421875, 5.32470703125, 5.5509033203125, 5.777099609375, 6.0032958984375, 6.2294921875, 6.4556884765625, 6.681884765625, 6.9080810546875, 7.13427734375, 7.3604736328125, 7.586669921875, 7.8128662109375, 8.0390625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 7.0, 15.0, 16.0, 13.0, 17.0, 34.0, 19.0, 36.0, 62.0, 69.0, 91.0, 126.0, 210.0, 342.0, 815.0, 3590.0, 49210.0, 1095016.0, 2735125.0, 293462.0, 13136.0, 1527.0, 483.0, 252.0, 161.0, 96.0, 73.0, 62.0, 44.0, 39.0, 26.0, 20.0, 22.0, 11.0, 7.0, 17.0, 9.0, 8.0, 3.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.09375, -17.49609375, -16.8984375, -16.30078125, -15.703125, -15.10546875, -14.5078125, -13.91015625, -13.3125, -12.71484375, -12.1171875, -11.51953125, -10.921875, -10.32421875, -9.7265625, -9.12890625, -8.53125, -7.93359375, -7.3359375, -6.73828125, -6.140625, -5.54296875, -4.9453125, -4.34765625, -3.75, -3.15234375, -2.5546875, -1.95703125, -1.359375, -0.76171875, -0.1640625, 0.43359375, 1.03125, 1.62890625, 2.2265625, 2.82421875, 3.421875, 4.01953125, 4.6171875, 5.21484375, 5.8125, 6.41015625, 7.0078125, 7.60546875, 8.203125, 8.80078125, 9.3984375, 9.99609375, 10.59375, 11.19140625, 11.7890625, 12.38671875, 12.984375, 13.58203125, 14.1796875, 14.77734375, 15.375, 15.97265625, 16.5703125, 17.16796875, 17.765625, 18.36328125, 18.9609375, 19.55859375, 20.15625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 8.0, 7.0, 13.0, 11.0, 14.0, 26.0, 43.0, 54.0, 66.0, 87.0, 138.0, 222.0, 287.0, 406.0, 466.0, 518.0, 415.0, 357.0, 262.0, 195.0, 132.0, 89.0, 79.0, 56.0, 33.0, 29.0, 20.0, 14.0, 8.0, 4.0, 8.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.0859375, -13.6221923828125, -13.158447265625, -12.6947021484375, -12.23095703125, -11.7672119140625, -11.303466796875, -10.8397216796875, -10.3759765625, -9.9122314453125, -9.448486328125, -8.9847412109375, -8.52099609375, -8.0572509765625, -7.593505859375, -7.1297607421875, -6.666015625, -6.2022705078125, -5.738525390625, -5.2747802734375, -4.81103515625, -4.3472900390625, -3.883544921875, -3.4197998046875, -2.9560546875, -2.4923095703125, -2.028564453125, -1.5648193359375, -1.10107421875, -0.6373291015625, -0.173583984375, 0.2901611328125, 0.75390625, 1.2176513671875, 1.681396484375, 2.1451416015625, 2.60888671875, 3.0726318359375, 3.536376953125, 4.0001220703125, 4.4638671875, 4.9276123046875, 5.391357421875, 5.8551025390625, 6.31884765625, 6.7825927734375, 7.246337890625, 7.7100830078125, 8.173828125, 8.6375732421875, 9.101318359375, 9.5650634765625, 10.02880859375, 10.4925537109375, 10.956298828125, 11.4200439453125, 11.8837890625, 12.3475341796875, 12.811279296875, 13.2750244140625, 13.73876953125, 14.2025146484375, 14.666259765625, 15.1300048828125, 15.59375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 5.0, 6.0, 18.0, 14.0, 29.0, 39.0, 56.0, 71.0, 99.0, 152.0, 210.0, 355.0, 624.0, 6151.0, 3625400.0, 557827.0, 1781.0, 541.0, 289.0, 212.0, 111.0, 76.0, 64.0, 43.0, 32.0, 19.0, 11.0, 12.0, 12.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-69.9375, -67.8701171875, -65.802734375, -63.7353515625, -61.66796875, -59.6005859375, -57.533203125, -55.4658203125, -53.3984375, -51.3310546875, -49.263671875, -47.1962890625, -45.12890625, -43.0615234375, -40.994140625, -38.9267578125, -36.859375, -34.7919921875, -32.724609375, -30.6572265625, -28.58984375, -26.5224609375, -24.455078125, -22.3876953125, -20.3203125, -18.2529296875, -16.185546875, -14.1181640625, -12.05078125, -9.9833984375, -7.916015625, -5.8486328125, -3.78125, -1.7138671875, 0.353515625, 2.4208984375, 4.48828125, 6.5556640625, 8.623046875, 10.6904296875, 12.7578125, 14.8251953125, 16.892578125, 18.9599609375, 21.02734375, 23.0947265625, 25.162109375, 27.2294921875, 29.296875, 31.3642578125, 33.431640625, 35.4990234375, 37.56640625, 39.6337890625, 41.701171875, 43.7685546875, 45.8359375, 47.9033203125, 49.970703125, 52.0380859375, 54.10546875, 56.1728515625, 58.240234375, 60.3076171875, 62.375]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 62.0, 367.0, 467.0, 106.0, 12.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.62886047363281, -69.0408706665039, -61.452880859375, -53.86488723754883, -46.27689743041992, -38.688907623291016, -31.100914001464844, -23.512924194335938, -15.924934387207031, -8.336943626403809, -0.7489528656005859, 6.839038848876953, 14.42702865600586, 22.015018463134766, 29.603012084960938, 37.191001892089844, 44.77899169921875, 52.366981506347656, 59.95497131347656, 67.54296875, 75.13095092773438, 82.71894836425781, 90.30693817138672, 97.89492797851562, 105.48291778564453, 113.07090759277344, 120.65889739990234, 128.24688720703125, 135.8348846435547, 143.42286682128906, 151.0108642578125, 158.59884643554688, 166.18682861328125, 173.7748260498047, 181.36280822753906, 188.9508056640625, 196.53878784179688, 204.1267852783203, 211.71478271484375, 219.30276489257812, 226.8907470703125, 234.47874450683594, 242.0667266845703, 249.65472412109375, 257.2427062988281, 264.8306884765625, 272.418701171875, 280.0066833496094, 287.59466552734375, 295.1826477050781, 302.7706604003906, 310.358642578125, 317.9466247558594, 325.53460693359375, 333.12261962890625, 340.7106018066406, 348.2986145019531, 355.8865966796875, 363.474609375, 371.0625915527344, 378.65057373046875, 386.2385559082031, 393.8265686035156, 401.41455078125, 409.0025329589844]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 2.0, 5.0, 5.0, 4.0, 7.0, 6.0, 8.0, 11.0, 8.0, 14.0, 18.0, 18.0, 23.0, 18.0, 33.0, 27.0, 38.0, 29.0, 39.0, 40.0, 39.0, 35.0, 32.0, 39.0, 55.0, 44.0, 33.0, 42.0, 37.0, 40.0, 32.0, 31.0, 26.0, 22.0, 23.0, 18.0, 19.0, 16.0, 13.0, 8.0, 7.0, 7.0, 10.0, 5.0, 5.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-44.74001693725586, -43.415733337402344, -42.09144592285156, -40.76716232299805, -39.44287872314453, -38.11859130859375, -36.794307708740234, -35.47002410888672, -34.14573669433594, -32.82145309448242, -31.497167587280273, -30.172882080078125, -28.84859848022461, -27.52431297302246, -26.200027465820312, -24.875743865966797, -23.55146026611328, -22.227174758911133, -20.902891159057617, -19.57860565185547, -18.254322052001953, -16.930036544799805, -15.605751037597656, -14.281466484069824, -12.957181930541992, -11.63289737701416, -10.308612823486328, -8.98432731628418, -7.660042762756348, -6.335758209228516, -5.011473178863525, -3.687188148498535, -2.3628997802734375, -1.0386149883270264, 0.28566980361938477, 1.609954595565796, 2.934239387512207, 4.258523941040039, 5.582808971405029, 6.9070940017700195, 8.231378555297852, 9.555663108825684, 10.879947662353516, 12.204233169555664, 13.528517723083496, 14.852802276611328, 16.177087783813477, 17.501373291015625, 18.82565689086914, 20.14994239807129, 21.474225997924805, 22.798511505126953, 24.12279510498047, 25.447080612182617, 26.771366119384766, 28.09564971923828, 29.41993522644043, 30.744220733642578, 32.068504333496094, 33.39278793334961, 34.71707534790039, 36.041358947753906, 37.36564254760742, 38.6899299621582, 40.01421356201172]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 11.0, 12.0, 16.0, 13.0, 12.0, 21.0, 29.0, 26.0, 32.0, 38.0, 33.0, 34.0, 42.0, 44.0, 48.0, 37.0, 47.0, 58.0, 49.0, 42.0, 43.0, 44.0, 37.0, 21.0, 26.0, 24.0, 24.0, 13.0, 22.0, 17.0, 17.0, 12.0, 7.0, 17.0, 9.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.45703125, -7.24188232421875, -7.0267333984375, -6.81158447265625, -6.596435546875, -6.38128662109375, -6.1661376953125, -5.95098876953125, -5.73583984375, -5.52069091796875, -5.3055419921875, -5.09039306640625, -4.875244140625, -4.66009521484375, -4.4449462890625, -4.22979736328125, -4.0146484375, -3.79949951171875, -3.5843505859375, -3.36920166015625, -3.154052734375, -2.93890380859375, -2.7237548828125, -2.50860595703125, -2.29345703125, -2.07830810546875, -1.8631591796875, -1.64801025390625, -1.432861328125, -1.21771240234375, -1.0025634765625, -0.78741455078125, -0.572265625, -0.35711669921875, -0.1419677734375, 0.07318115234375, 0.288330078125, 0.50347900390625, 0.7186279296875, 0.93377685546875, 1.14892578125, 1.36407470703125, 1.5792236328125, 1.79437255859375, 2.009521484375, 2.22467041015625, 2.4398193359375, 2.65496826171875, 2.8701171875, 3.08526611328125, 3.3004150390625, 3.51556396484375, 3.730712890625, 3.94586181640625, 4.1610107421875, 4.37615966796875, 4.59130859375, 4.80645751953125, 5.0216064453125, 5.23675537109375, 5.451904296875, 5.66705322265625, 5.8822021484375, 6.09735107421875, 6.3125]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 3.0, 3.0, 6.0, 8.0, 14.0, 11.0, 26.0, 38.0, 49.0, 74.0, 100.0, 149.0, 237.0, 325.0, 490.0, 711.0, 1076.0, 1552.0, 2193.0, 3509.0, 5023.0, 7573.0, 11442.0, 17476.0, 26631.0, 40192.0, 60999.0, 92166.0, 138853.0, 178494.0, 153339.0, 104069.0, 68644.0, 45024.0, 29419.0, 19762.0, 12790.0, 8563.0, 5677.0, 3885.0, 2552.0, 1731.0, 1188.0, 819.0, 530.0, 361.0, 241.0, 164.0, 128.0, 94.0, 54.0, 37.0, 21.0, 15.0, 11.0, 8.0, 7.0, 5.0, 4.0, 0.0, 2.0], "bins": [-0.4296875, -0.4165229797363281, -0.40335845947265625, -0.3901939392089844, -0.3770294189453125, -0.3638648986816406, -0.35070037841796875, -0.3375358581542969, -0.324371337890625, -0.3112068176269531, -0.29804229736328125, -0.2848777770996094, -0.2717132568359375, -0.2585487365722656, -0.24538421630859375, -0.23221969604492188, -0.21905517578125, -0.20589065551757812, -0.19272613525390625, -0.17956161499023438, -0.1663970947265625, -0.15323257446289062, -0.14006805419921875, -0.12690353393554688, -0.113739013671875, -0.10057449340820312, -0.08740997314453125, -0.07424545288085938, -0.0610809326171875, -0.047916412353515625, -0.03475189208984375, -0.021587371826171875, -0.0084228515625, 0.004741668701171875, 0.01790618896484375, 0.031070709228515625, 0.0442352294921875, 0.057399749755859375, 0.07056427001953125, 0.08372879028320312, 0.096893310546875, 0.11005783081054688, 0.12322235107421875, 0.13638687133789062, 0.1495513916015625, 0.16271591186523438, 0.17588043212890625, 0.18904495239257812, 0.20220947265625, 0.21537399291992188, 0.22853851318359375, 0.24170303344726562, 0.2548675537109375, 0.2680320739746094, 0.28119659423828125, 0.2943611145019531, 0.307525634765625, 0.3206901550292969, 0.33385467529296875, 0.3470191955566406, 0.3601837158203125, 0.3733482360839844, 0.38651275634765625, 0.3996772766113281, 0.412841796875]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 2.0, 3.0, 6.0, 7.0, 8.0, 8.0, 7.0, 11.0, 14.0, 18.0, 20.0, 25.0, 30.0, 17.0, 33.0, 34.0, 34.0, 37.0, 39.0, 42.0, 35.0, 38.0, 1062.0, 58.0, 40.0, 36.0, 29.0, 37.0, 33.0, 33.0, 30.0, 24.0, 23.0, 12.0, 24.0, 25.0, 22.0, 20.0, 10.0, 7.0, 9.0, 5.0, 3.0, 5.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0], "bins": [-4.296875, -4.171051025390625, -4.04522705078125, -3.919403076171875, -3.7935791015625, -3.667755126953125, -3.54193115234375, -3.416107177734375, -3.290283203125, -3.164459228515625, -3.03863525390625, -2.912811279296875, -2.7869873046875, -2.661163330078125, -2.53533935546875, -2.409515380859375, -2.28369140625, -2.157867431640625, -2.03204345703125, -1.906219482421875, -1.7803955078125, -1.654571533203125, -1.52874755859375, -1.402923583984375, -1.277099609375, -1.151275634765625, -1.02545166015625, -0.899627685546875, -0.7738037109375, -0.647979736328125, -0.52215576171875, -0.396331787109375, -0.2705078125, -0.144683837890625, -0.01885986328125, 0.106964111328125, 0.2327880859375, 0.358612060546875, 0.48443603515625, 0.610260009765625, 0.736083984375, 0.861907958984375, 0.98773193359375, 1.113555908203125, 1.2393798828125, 1.365203857421875, 1.49102783203125, 1.616851806640625, 1.74267578125, 1.868499755859375, 1.99432373046875, 2.120147705078125, 2.2459716796875, 2.371795654296875, 2.49761962890625, 2.623443603515625, 2.749267578125, 2.875091552734375, 3.00091552734375, 3.126739501953125, 3.2525634765625, 3.378387451171875, 3.50421142578125, 3.630035400390625, 3.755859375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 8.0, 13.0, 10.0, 14.0, 31.0, 30.0, 78.0, 96.0, 155.0, 198.0, 325.0, 504.0, 780.0, 1162.0, 1758.0, 2755.0, 4077.0, 6434.0, 10012.0, 15522.0, 24295.0, 37477.0, 58132.0, 90626.0, 137352.0, 1224580.0, 160375.0, 113310.0, 74219.0, 47532.0, 30391.0, 19555.0, 12564.0, 7990.0, 5104.0, 3338.0, 2193.0, 1453.0, 931.0, 614.0, 380.0, 261.0, 169.0, 111.0, 79.0, 50.0, 32.0, 25.0, 11.0, 12.0, 3.0, 7.0, 2.0, 2.0, 2.0, 3.0], "bins": [-0.421630859375, -0.40914154052734375, -0.3966522216796875, -0.38416290283203125, -0.371673583984375, -0.35918426513671875, -0.3466949462890625, -0.33420562744140625, -0.32171630859375, -0.30922698974609375, -0.2967376708984375, -0.28424835205078125, -0.271759033203125, -0.25926971435546875, -0.2467803955078125, -0.23429107666015625, -0.2218017578125, -0.20931243896484375, -0.1968231201171875, -0.18433380126953125, -0.171844482421875, -0.15935516357421875, -0.1468658447265625, -0.13437652587890625, -0.12188720703125, -0.10939788818359375, -0.0969085693359375, -0.08441925048828125, -0.071929931640625, -0.05944061279296875, -0.0469512939453125, -0.03446197509765625, -0.02197265625, -0.00948333740234375, 0.0030059814453125, 0.01549530029296875, 0.027984619140625, 0.04047393798828125, 0.0529632568359375, 0.06545257568359375, 0.07794189453125, 0.09043121337890625, 0.1029205322265625, 0.11540985107421875, 0.127899169921875, 0.14038848876953125, 0.1528778076171875, 0.16536712646484375, 0.1778564453125, 0.19034576416015625, 0.2028350830078125, 0.21532440185546875, 0.227813720703125, 0.24030303955078125, 0.2527923583984375, 0.26528167724609375, 0.27777099609375, 0.29026031494140625, 0.3027496337890625, 0.31523895263671875, 0.327728271484375, 0.34021759033203125, 0.3527069091796875, 0.36519622802734375, 0.377685546875]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 4.0, 7.0, 10.0, 14.0, 34.0, 37.0, 34.0, 35.0, 62.0, 85.0, 91.0, 105.0, 107.0, 99.0, 58.0, 65.0, 45.0, 35.0, 20.0, 16.0, 7.0, 3.0, 9.0, 6.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00237274169921875, -0.002306237816810608, -0.002239733934402466, -0.0021732300519943237, -0.0021067261695861816, -0.0020402222871780396, -0.0019737184047698975, -0.0019072145223617554, -0.0018407106399536133, -0.0017742067575454712, -0.001707702875137329, -0.001641198992729187, -0.001574695110321045, -0.0015081912279129028, -0.0014416873455047607, -0.0013751834630966187, -0.0013086795806884766, -0.0012421756982803345, -0.0011756718158721924, -0.0011091679334640503, -0.0010426640510559082, -0.0009761601686477661, -0.000909656286239624, -0.0008431524038314819, -0.0007766485214233398, -0.0007101446390151978, -0.0006436407566070557, -0.0005771368741989136, -0.0005106329917907715, -0.0004441291093826294, -0.0003776252269744873, -0.0003111213445663452, -0.0002446174621582031, -0.00017811357975006104, -0.00011160969734191895, -4.5105814933776855e-05, 2.1398067474365234e-05, 8.790194988250732e-05, 0.00015440583229064941, 0.0002209097146987915, 0.0002874135971069336, 0.0003539174795150757, 0.0004204213619232178, 0.00048692524433135986, 0.000553429126739502, 0.000619933009147644, 0.0006864368915557861, 0.0007529407739639282, 0.0008194446563720703, 0.0008859485387802124, 0.0009524524211883545, 0.0010189563035964966, 0.0010854601860046387, 0.0011519640684127808, 0.0012184679508209229, 0.001284971833229065, 0.001351475715637207, 0.0014179795980453491, 0.0014844834804534912, 0.0015509873628616333, 0.0016174912452697754, 0.0016839951276779175, 0.0017504990100860596, 0.0018170028924942017, 0.0018835067749023438]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 2.0, 7.0, 8.0, 11.0, 8.0, 20.0, 29.0, 41.0, 57.0, 84.0, 120.0, 270.0, 514.0, 13049.0, 1031862.0, 1562.0, 354.0, 164.0, 114.0, 88.0, 61.0, 35.0, 29.0, 13.0, 13.0, 4.0, 9.0, 3.0, 3.0, 6.0, 5.0, 3.0, 2.0, 1.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03778076171875, -0.036446571350097656, -0.03511238098144531, -0.03377819061279297, -0.032444000244140625, -0.03110980987548828, -0.029775619506835938, -0.028441429138183594, -0.02710723876953125, -0.025773048400878906, -0.024438858032226562, -0.02310466766357422, -0.021770477294921875, -0.02043628692626953, -0.019102096557617188, -0.017767906188964844, -0.0164337158203125, -0.015099525451660156, -0.013765335083007812, -0.012431144714355469, -0.011096954345703125, -0.009762763977050781, -0.008428573608398438, -0.007094383239746094, -0.00576019287109375, -0.004426002502441406, -0.0030918121337890625, -0.0017576217651367188, -0.000423431396484375, 0.0009107589721679688, 0.0022449493408203125, 0.0035791397094726562, 0.004913330078125, 0.006247520446777344, 0.0075817108154296875, 0.008915901184082031, 0.010250091552734375, 0.011584281921386719, 0.012918472290039062, 0.014252662658691406, 0.01558685302734375, 0.016921043395996094, 0.018255233764648438, 0.01958942413330078, 0.020923614501953125, 0.02225780487060547, 0.023591995239257812, 0.024926185607910156, 0.0262603759765625, 0.027594566345214844, 0.028928756713867188, 0.03026294708251953, 0.031597137451171875, 0.03293132781982422, 0.03426551818847656, 0.035599708557128906, 0.03693389892578125, 0.038268089294433594, 0.03960227966308594, 0.04093647003173828, 0.042270660400390625, 0.04360485076904297, 0.04493904113769531, 0.046273231506347656, 0.047607421875]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 14.0, 61.0, 180.0, 303.0, 253.0, 130.0, 45.0, 18.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0028207041323184967, -0.0027498146519064903, -0.0026789249386638403, -0.002608035458251834, -0.002537145745009184, -0.0024662562645971775, -0.002395366784185171, -0.0023244773037731647, -0.0022535875905305147, -0.0021826981101185083, -0.0021118083968758583, -0.002040918916463852, -0.0019700294360518456, -0.0018991397228091955, -0.0018282502423971891, -0.001757360645569861, -0.0016864710487425327, -0.0016155814519152045, -0.0015446918550878763, -0.00147380237467587, -0.0014029127778485417, -0.0013320231810212135, -0.0012611337006092072, -0.001190244103781879, -0.0011193545069545507, -0.0010484649101272225, -0.0009775753132998943, -0.000906685832887888, -0.0008357962360605597, -0.0007649066392332315, -0.0006940171006135643, -0.000623127561993897, -0.0005522381979972124, -0.00048134863027371466, -0.0004104590625502169, -0.00033956949482671916, -0.0002686799271032214, -0.00019779035937972367, -0.00012690079165622592, -5.601125303655863e-05, 1.4878343790769577e-05, 8.576791151426733e-05, 0.00015665747923776507, 0.00022754704696126282, 0.00029843661468476057, 0.0003693261824082583, 0.00044021575013175607, 0.0005111052887514234, 0.0005819948855787516, 0.0006528844824060798, 0.0007237740210257471, 0.0007946635596454144, 0.0008655531564727426, 0.0009364427533000708, 0.0010073322337120771, 0.0010782218305394053, 0.0011491114273667336, 0.0012200010241940618, 0.00129089062102139, 0.0013617801014333963, 0.0014326696982607245, 0.0015035592950880527, 0.0015744487755000591, 0.0016453383723273873, 0.0017162279691547155]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 5.0, 1.0, 5.0, 4.0, 5.0, 3.0, 6.0, 11.0, 19.0, 13.0, 12.0, 19.0, 23.0, 17.0, 17.0, 21.0, 35.0, 29.0, 34.0, 45.0, 43.0, 43.0, 46.0, 47.0, 35.0, 38.0, 49.0, 43.0, 38.0, 34.0, 28.0, 31.0, 30.0, 26.0, 27.0, 15.0, 16.0, 15.0, 18.0, 14.0, 9.0, 8.0, 8.0, 6.0, 7.0, 3.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0006355047225952148, -0.0006129816174507141, -0.0005904585123062134, -0.0005679354071617126, -0.0005454123020172119, -0.0005228891968727112, -0.0005003660917282104, -0.0004778429865837097, -0.000455319881439209, -0.00043279677629470825, -0.0004102736711502075, -0.0003877505660057068, -0.00036522746086120605, -0.0003427043557167053, -0.0003201812505722046, -0.00029765814542770386, -0.0002751350402832031, -0.0002526119351387024, -0.00023008882999420166, -0.00020756572484970093, -0.0001850426197052002, -0.00016251951456069946, -0.00013999640941619873, -0.000117473304271698, -9.495019912719727e-05, -7.242709398269653e-05, -4.99039888381958e-05, -2.738088369369507e-05, -4.857778549194336e-06, 1.7665326595306396e-05, 4.018843173980713e-05, 6.271153688430786e-05, 8.52346420288086e-05, 0.00010775774717330933, 0.00013028085231781006, 0.0001528039574623108, 0.00017532706260681152, 0.00019785016775131226, 0.000220373272895813, 0.00024289637804031372, 0.00026541948318481445, 0.0002879425883293152, 0.0003104656934738159, 0.00033298879861831665, 0.0003555119037628174, 0.0003780350089073181, 0.00040055811405181885, 0.0004230812191963196, 0.0004456043243408203, 0.00046812742948532104, 0.0004906505346298218, 0.0005131736397743225, 0.0005356967449188232, 0.000558219850063324, 0.0005807429552078247, 0.0006032660603523254, 0.0006257891654968262, 0.0006483122706413269, 0.0006708353757858276, 0.0006933584809303284, 0.0007158815860748291, 0.0007384046912193298, 0.0007609277963638306, 0.0007834509015083313, 0.000805974006652832]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 11.0, 12.0, 16.0, 13.0, 12.0, 21.0, 29.0, 26.0, 32.0, 38.0, 33.0, 33.0, 43.0, 44.0, 48.0, 37.0, 47.0, 58.0, 49.0, 42.0, 43.0, 44.0, 37.0, 21.0, 26.0, 24.0, 24.0, 13.0, 22.0, 17.0, 17.0, 12.0, 7.0, 17.0, 9.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.45703125, -7.24188232421875, -7.0267333984375, -6.81158447265625, -6.596435546875, -6.38128662109375, -6.1661376953125, -5.95098876953125, -5.73583984375, -5.52069091796875, -5.3055419921875, -5.09039306640625, -4.875244140625, -4.66009521484375, -4.4449462890625, -4.22979736328125, -4.0146484375, -3.79949951171875, -3.5843505859375, -3.36920166015625, -3.154052734375, -2.93890380859375, -2.7237548828125, -2.50860595703125, -2.29345703125, -2.07830810546875, -1.8631591796875, -1.64801025390625, -1.432861328125, -1.21771240234375, -1.0025634765625, -0.78741455078125, -0.572265625, -0.35711669921875, -0.1419677734375, 0.07318115234375, 0.288330078125, 0.50347900390625, 0.7186279296875, 0.93377685546875, 1.14892578125, 1.36407470703125, 1.5792236328125, 1.79437255859375, 2.009521484375, 2.22467041015625, 2.4398193359375, 2.65496826171875, 2.8701171875, 3.08526611328125, 3.3004150390625, 3.51556396484375, 3.730712890625, 3.94586181640625, 4.1610107421875, 4.37615966796875, 4.59130859375, 4.80645751953125, 5.0216064453125, 5.23675537109375, 5.451904296875, 5.66705322265625, 5.8822021484375, 6.09735107421875, 6.3125]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 8.0, 5.0, 4.0, 5.0, 17.0, 14.0, 27.0, 26.0, 35.0, 65.0, 68.0, 75.0, 129.0, 229.0, 399.0, 760.0, 1716.0, 4287.0, 10444.0, 26957.0, 67759.0, 166872.0, 358929.0, 239890.0, 101983.0, 40402.0, 16079.0, 6467.0, 2503.0, 1058.0, 506.0, 258.0, 121.0, 115.0, 73.0, 64.0, 59.0, 32.0, 30.0, 21.0, 29.0, 6.0, 14.0, 4.0, 5.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-7.65234375, -7.43194580078125, -7.2115478515625, -6.99114990234375, -6.770751953125, -6.55035400390625, -6.3299560546875, -6.10955810546875, -5.88916015625, -5.66876220703125, -5.4483642578125, -5.22796630859375, -5.007568359375, -4.78717041015625, -4.5667724609375, -4.34637451171875, -4.1259765625, -3.90557861328125, -3.6851806640625, -3.46478271484375, -3.244384765625, -3.02398681640625, -2.8035888671875, -2.58319091796875, -2.36279296875, -2.14239501953125, -1.9219970703125, -1.70159912109375, -1.481201171875, -1.26080322265625, -1.0404052734375, -0.82000732421875, -0.599609375, -0.37921142578125, -0.1588134765625, 0.06158447265625, 0.281982421875, 0.50238037109375, 0.7227783203125, 0.94317626953125, 1.16357421875, 1.38397216796875, 1.6043701171875, 1.82476806640625, 2.045166015625, 2.26556396484375, 2.4859619140625, 2.70635986328125, 2.9267578125, 3.14715576171875, 3.3675537109375, 3.58795166015625, 3.808349609375, 4.02874755859375, 4.2491455078125, 4.46954345703125, 4.68994140625, 4.91033935546875, 5.1307373046875, 5.35113525390625, 5.571533203125, 5.79193115234375, 6.0123291015625, 6.23272705078125, 6.453125]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 4.0, 4.0, 4.0, 7.0, 5.0, 7.0, 5.0, 13.0, 6.0, 10.0, 10.0, 24.0, 22.0, 20.0, 22.0, 30.0, 39.0, 35.0, 35.0, 32.0, 57.0, 63.0, 142.0, 350.0, 1453.0, 174.0, 84.0, 57.0, 46.0, 42.0, 35.0, 37.0, 24.0, 15.0, 25.0, 13.0, 21.0, 14.0, 18.0, 8.0, 17.0, 2.0, 10.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-22.6875, -22.0224609375, -21.357421875, -20.6923828125, -20.02734375, -19.3623046875, -18.697265625, -18.0322265625, -17.3671875, -16.7021484375, -16.037109375, -15.3720703125, -14.70703125, -14.0419921875, -13.376953125, -12.7119140625, -12.046875, -11.3818359375, -10.716796875, -10.0517578125, -9.38671875, -8.7216796875, -8.056640625, -7.3916015625, -6.7265625, -6.0615234375, -5.396484375, -4.7314453125, -4.06640625, -3.4013671875, -2.736328125, -2.0712890625, -1.40625, -0.7412109375, -0.076171875, 0.5888671875, 1.25390625, 1.9189453125, 2.583984375, 3.2490234375, 3.9140625, 4.5791015625, 5.244140625, 5.9091796875, 6.57421875, 7.2392578125, 7.904296875, 8.5693359375, 9.234375, 9.8994140625, 10.564453125, 11.2294921875, 11.89453125, 12.5595703125, 13.224609375, 13.8896484375, 14.5546875, 15.2197265625, 15.884765625, 16.5498046875, 17.21484375, 17.8798828125, 18.544921875, 19.2099609375, 19.875]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0, 7.0, 10.0, 9.0, 12.0, 12.0, 28.0, 33.0, 31.0, 70.0, 77.0, 111.0, 190.0, 400.0, 1078.0, 7822.0, 3123815.0, 9844.0, 1067.0, 434.0, 206.0, 152.0, 76.0, 43.0, 35.0, 32.0, 32.0, 19.0, 17.0, 8.0, 8.0, 4.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-104.5625, -101.443359375, -98.32421875, -95.205078125, -92.0859375, -88.966796875, -85.84765625, -82.728515625, -79.609375, -76.490234375, -73.37109375, -70.251953125, -67.1328125, -64.013671875, -60.89453125, -57.775390625, -54.65625, -51.537109375, -48.41796875, -45.298828125, -42.1796875, -39.060546875, -35.94140625, -32.822265625, -29.703125, -26.583984375, -23.46484375, -20.345703125, -17.2265625, -14.107421875, -10.98828125, -7.869140625, -4.75, -1.630859375, 1.48828125, 4.607421875, 7.7265625, 10.845703125, 13.96484375, 17.083984375, 20.203125, 23.322265625, 26.44140625, 29.560546875, 32.6796875, 35.798828125, 38.91796875, 42.037109375, 45.15625, 48.275390625, 51.39453125, 54.513671875, 57.6328125, 60.751953125, 63.87109375, 66.990234375, 70.109375, 73.228515625, 76.34765625, 79.466796875, 82.5859375, 85.705078125, 88.82421875, 91.943359375, 95.0625]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 19.0, 165.0, 449.0, 316.0, 62.0, 4.0, 1.0, 0.0, 1.0], "bins": [-207.00746154785156, -203.4169464111328, -199.82643127441406, -196.2359161376953, -192.64540100097656, -189.0548858642578, -185.46437072753906, -181.8738555908203, -178.28334045410156, -174.6928253173828, -171.10231018066406, -167.5117950439453, -163.92127990722656, -160.3307647705078, -156.74024963378906, -153.1497344970703, -149.5592041015625, -145.96868896484375, -142.378173828125, -138.78765869140625, -135.1971435546875, -131.60662841796875, -128.01611328125, -124.42559814453125, -120.8350830078125, -117.24456787109375, -113.654052734375, -110.06353759765625, -106.4730224609375, -102.88250732421875, -99.2919921875, -95.70147705078125, -92.1109619140625, -88.52044677734375, -84.929931640625, -81.33941650390625, -77.7489013671875, -74.15838623046875, -70.56787109375, -66.97735595703125, -63.386837005615234, -59.796321868896484, -56.205806732177734, -52.61528778076172, -49.02477264404297, -45.43425750732422, -41.84374237060547, -38.25322723388672, -34.66271209716797, -31.07219696044922, -27.48168182373047, -23.891164779663086, -20.300649642944336, -16.710134506225586, -13.119617462158203, -9.529102325439453, -5.938587188720703, -2.348071575164795, 1.2424440383911133, 4.83296012878418, 8.42347526550293, 12.01399040222168, 15.604507446289062, 19.195022583007812, 22.785537719726562]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 1.0, 7.0, 8.0, 9.0, 7.0, 9.0, 14.0, 13.0, 23.0, 18.0, 22.0, 19.0, 31.0, 27.0, 30.0, 31.0, 35.0, 43.0, 42.0, 46.0, 36.0, 38.0, 41.0, 53.0, 54.0, 37.0, 37.0, 36.0, 28.0, 32.0, 28.0, 22.0, 17.0, 16.0, 17.0, 17.0, 12.0, 15.0, 13.0, 7.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-59.79783630371094, -58.027889251708984, -56.25794219970703, -54.48799514770508, -52.718048095703125, -50.948097229003906, -49.17815017700195, -47.408203125, -45.63825607299805, -43.868309020996094, -42.09836196899414, -40.32841491699219, -38.55846405029297, -36.78852081298828, -35.01856994628906, -33.24862289428711, -31.478675842285156, -29.708728790283203, -27.93878173828125, -26.168832778930664, -24.39888572692871, -22.628938674926758, -20.858989715576172, -19.08904266357422, -17.319095611572266, -15.549148559570312, -13.779200553894043, -12.009252548217773, -10.23930549621582, -8.469358444213867, -6.699410438537598, -4.929462432861328, -3.159515380859375, -1.3895678520202637, 0.38037967681884766, 2.150327205657959, 3.9202747344970703, 5.690221786499023, 7.460169792175293, 9.230117797851562, 11.000064849853516, 12.770011901855469, 14.539959907531738, 16.309907913208008, 18.07985496520996, 19.849802017211914, 21.6197509765625, 23.389698028564453, 25.159645080566406, 26.92959213256836, 28.699539184570312, 30.4694881439209, 32.23943328857422, 34.00938415527344, 35.77933120727539, 37.549278259277344, 39.3192253112793, 41.08917236328125, 42.8591194152832, 44.629066467285156, 46.399017333984375, 48.16896057128906, 49.93891143798828, 51.708858489990234, 53.47880554199219]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 6.0, 2.0, 7.0, 8.0, 7.0, 10.0, 8.0, 19.0, 19.0, 23.0, 30.0, 27.0, 25.0, 36.0, 37.0, 43.0, 33.0, 46.0, 38.0, 52.0, 51.0, 44.0, 43.0, 42.0, 53.0, 44.0, 39.0, 25.0, 22.0, 25.0, 20.0, 24.0, 19.0, 16.0, 13.0, 15.0, 11.0, 10.0, 5.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.75, -7.5040283203125, -7.258056640625, -7.0120849609375, -6.76611328125, -6.5201416015625, -6.274169921875, -6.0281982421875, -5.7822265625, -5.5362548828125, -5.290283203125, -5.0443115234375, -4.79833984375, -4.5523681640625, -4.306396484375, -4.0604248046875, -3.814453125, -3.5684814453125, -3.322509765625, -3.0765380859375, -2.83056640625, -2.5845947265625, -2.338623046875, -2.0926513671875, -1.8466796875, -1.6007080078125, -1.354736328125, -1.1087646484375, -0.86279296875, -0.6168212890625, -0.370849609375, -0.1248779296875, 0.12109375, 0.3670654296875, 0.613037109375, 0.8590087890625, 1.10498046875, 1.3509521484375, 1.596923828125, 1.8428955078125, 2.0888671875, 2.3348388671875, 2.580810546875, 2.8267822265625, 3.07275390625, 3.3187255859375, 3.564697265625, 3.8106689453125, 4.056640625, 4.3026123046875, 4.548583984375, 4.7945556640625, 5.04052734375, 5.2864990234375, 5.532470703125, 5.7784423828125, 6.0244140625, 6.2703857421875, 6.516357421875, 6.7623291015625, 7.00830078125, 7.2542724609375, 7.500244140625, 7.7462158203125, 7.9921875]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 4.0, 5.0, 8.0, 12.0, 11.0, 15.0, 25.0, 32.0, 31.0, 30.0, 47.0, 66.0, 106.0, 99.0, 180.0, 319.0, 995.0, 8152.0, 247897.0, 3100914.0, 809707.0, 22795.0, 1676.0, 410.0, 202.0, 112.0, 86.0, 67.0, 56.0, 47.0, 38.0, 39.0, 20.0, 14.0, 15.0, 15.0, 11.0, 11.0, 3.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.984375, -22.28076171875, -21.5771484375, -20.87353515625, -20.169921875, -19.46630859375, -18.7626953125, -18.05908203125, -17.35546875, -16.65185546875, -15.9482421875, -15.24462890625, -14.541015625, -13.83740234375, -13.1337890625, -12.43017578125, -11.7265625, -11.02294921875, -10.3193359375, -9.61572265625, -8.912109375, -8.20849609375, -7.5048828125, -6.80126953125, -6.09765625, -5.39404296875, -4.6904296875, -3.98681640625, -3.283203125, -2.57958984375, -1.8759765625, -1.17236328125, -0.46875, 0.23486328125, 0.9384765625, 1.64208984375, 2.345703125, 3.04931640625, 3.7529296875, 4.45654296875, 5.16015625, 5.86376953125, 6.5673828125, 7.27099609375, 7.974609375, 8.67822265625, 9.3818359375, 10.08544921875, 10.7890625, 11.49267578125, 12.1962890625, 12.89990234375, 13.603515625, 14.30712890625, 15.0107421875, 15.71435546875, 16.41796875, 17.12158203125, 17.8251953125, 18.52880859375, 19.232421875, 19.93603515625, 20.6396484375, 21.34326171875, 22.046875]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 12.0, 14.0, 24.0, 15.0, 30.0, 47.0, 58.0, 90.0, 139.0, 213.0, 321.0, 456.0, 568.0, 583.0, 444.0, 343.0, 229.0, 139.0, 100.0, 61.0, 67.0, 35.0, 31.0, 18.0, 15.0, 8.0, 6.0, 6.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.9296875, -14.4166259765625, -13.903564453125, -13.3905029296875, -12.87744140625, -12.3643798828125, -11.851318359375, -11.3382568359375, -10.8251953125, -10.3121337890625, -9.799072265625, -9.2860107421875, -8.77294921875, -8.2598876953125, -7.746826171875, -7.2337646484375, -6.720703125, -6.2076416015625, -5.694580078125, -5.1815185546875, -4.66845703125, -4.1553955078125, -3.642333984375, -3.1292724609375, -2.6162109375, -2.1031494140625, -1.590087890625, -1.0770263671875, -0.56396484375, -0.0509033203125, 0.462158203125, 0.9752197265625, 1.48828125, 2.0013427734375, 2.514404296875, 3.0274658203125, 3.54052734375, 4.0535888671875, 4.566650390625, 5.0797119140625, 5.5927734375, 6.1058349609375, 6.618896484375, 7.1319580078125, 7.64501953125, 8.1580810546875, 8.671142578125, 9.1842041015625, 9.697265625, 10.2103271484375, 10.723388671875, 11.2364501953125, 11.74951171875, 12.2625732421875, 12.775634765625, 13.2886962890625, 13.8017578125, 14.3148193359375, 14.827880859375, 15.3409423828125, 15.85400390625, 16.3670654296875, 16.880126953125, 17.3931884765625, 17.90625]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 5.0, 8.0, 14.0, 17.0, 28.0, 35.0, 54.0, 68.0, 97.0, 165.0, 291.0, 479.0, 1477.0, 1449033.0, 2739366.0, 1869.0, 499.0, 289.0, 163.0, 97.0, 73.0, 49.0, 34.0, 24.0, 11.0, 17.0, 9.0, 3.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.625, -90.953125, -88.28125, -85.609375, -82.9375, -80.265625, -77.59375, -74.921875, -72.25, -69.578125, -66.90625, -64.234375, -61.5625, -58.890625, -56.21875, -53.546875, -50.875, -48.203125, -45.53125, -42.859375, -40.1875, -37.515625, -34.84375, -32.171875, -29.5, -26.828125, -24.15625, -21.484375, -18.8125, -16.140625, -13.46875, -10.796875, -8.125, -5.453125, -2.78125, -0.109375, 2.5625, 5.234375, 7.90625, 10.578125, 13.25, 15.921875, 18.59375, 21.265625, 23.9375, 26.609375, 29.28125, 31.953125, 34.625, 37.296875, 39.96875, 42.640625, 45.3125, 47.984375, 50.65625, 53.328125, 56.0, 58.671875, 61.34375, 64.015625, 66.6875, 69.359375, 72.03125, 74.703125, 77.375]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 41.0, 185.0, 395.0, 307.0, 76.0, 12.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-139.16845703125, -133.52442932128906, -127.88040924072266, -122.23638153076172, -116.59235382080078, -110.94833374023438, -105.30430603027344, -99.6602783203125, -94.01625061035156, -88.37222290039062, -82.72820281982422, -77.08417510986328, -71.44014739990234, -65.79612731933594, -60.152099609375, -54.50807189941406, -48.864051818847656, -43.220027923583984, -37.57600021362305, -31.931976318359375, -26.28795051574707, -20.643924713134766, -14.999900817871094, -9.355873107910156, -3.7118492126464844, 1.932176113128662, 7.576201438903809, 13.220226287841797, 18.8642520904541, 24.508277893066406, 30.152301788330078, 35.796329498291016, 41.44035339355469, 47.08437728881836, 52.7284049987793, 58.37242889404297, 64.0164566040039, 69.66047668457031, 75.30450439453125, 80.94853210449219, 86.59255981445312, 92.23658752441406, 97.88060760498047, 103.5246353149414, 109.16866302490234, 114.81268310546875, 120.45671081542969, 126.10073852539062, 131.7447509765625, 137.38877868652344, 143.03280639648438, 148.67681884765625, 154.3208465576172, 159.96487426757812, 165.60890197753906, 171.2529296875, 176.89695739746094, 182.54098510742188, 188.1850128173828, 193.82904052734375, 199.47305297851562, 205.11708068847656, 210.7611083984375, 216.40513610839844, 222.04916381835938]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 1.0, 9.0, 9.0, 19.0, 14.0, 20.0, 23.0, 32.0, 33.0, 28.0, 33.0, 33.0, 37.0, 39.0, 44.0, 50.0, 43.0, 55.0, 37.0, 40.0, 38.0, 34.0, 37.0, 34.0, 27.0, 25.0, 25.0, 33.0, 18.0, 22.0, 19.0, 14.0, 19.0, 14.0, 10.0, 6.0, 2.0, 6.0, 8.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.55462646484375, -44.15840530395508, -42.762184143066406, -41.365966796875, -39.96974563598633, -38.573524475097656, -37.177303314208984, -35.78108215332031, -34.384864807128906, -32.988643646240234, -31.592424392700195, -30.196203231811523, -28.799983978271484, -27.403762817382812, -26.00754165649414, -24.6113224029541, -23.21510124206543, -21.818880081176758, -20.42266082763672, -19.026439666748047, -17.630220413208008, -16.233999252319336, -14.83777904510498, -13.441558837890625, -12.04533863067627, -10.649118423461914, -9.252898216247559, -7.856677532196045, -6.4604573249816895, -5.064237117767334, -3.6680164337158203, -2.271796226501465, -0.8755760192871094, 0.5206443071365356, 1.9168646335601807, 3.3130850791931152, 4.709305286407471, 6.105525493621826, 7.50174617767334, 8.897966384887695, 10.29418659210205, 11.690406799316406, 13.086627006530762, 14.482847213745117, 15.879068374633789, 17.275287628173828, 18.6715087890625, 20.067729949951172, 21.46394920349121, 22.860170364379883, 24.256389617919922, 25.652610778808594, 27.048830032348633, 28.445051193237305, 29.841270446777344, 31.237491607666016, 32.63371276855469, 34.02993392944336, 35.42615509033203, 36.82237243652344, 38.21859359741211, 39.61481475830078, 41.01103591918945, 42.407257080078125, 43.80347442626953]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 6.0, 8.0, 9.0, 2.0, 12.0, 9.0, 14.0, 10.0, 19.0, 22.0, 25.0, 26.0, 33.0, 26.0, 35.0, 36.0, 38.0, 36.0, 38.0, 34.0, 38.0, 40.0, 41.0, 57.0, 44.0, 28.0, 43.0, 45.0, 22.0, 22.0, 32.0, 20.0, 19.0, 18.0, 17.0, 11.0, 18.0, 9.0, 11.0, 7.0, 9.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.37109375, -6.160888671875, -5.95068359375, -5.740478515625, -5.5302734375, -5.320068359375, -5.10986328125, -4.899658203125, -4.689453125, -4.479248046875, -4.26904296875, -4.058837890625, -3.8486328125, -3.638427734375, -3.42822265625, -3.218017578125, -3.0078125, -2.797607421875, -2.58740234375, -2.377197265625, -2.1669921875, -1.956787109375, -1.74658203125, -1.536376953125, -1.326171875, -1.115966796875, -0.90576171875, -0.695556640625, -0.4853515625, -0.275146484375, -0.06494140625, 0.145263671875, 0.35546875, 0.565673828125, 0.77587890625, 0.986083984375, 1.1962890625, 1.406494140625, 1.61669921875, 1.826904296875, 2.037109375, 2.247314453125, 2.45751953125, 2.667724609375, 2.8779296875, 3.088134765625, 3.29833984375, 3.508544921875, 3.71875, 3.928955078125, 4.13916015625, 4.349365234375, 4.5595703125, 4.769775390625, 4.97998046875, 5.190185546875, 5.400390625, 5.610595703125, 5.82080078125, 6.031005859375, 6.2412109375, 6.451416015625, 6.66162109375, 6.871826171875, 7.08203125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 7.0, 11.0, 26.0, 26.0, 41.0, 66.0, 100.0, 125.0, 196.0, 358.0, 511.0, 716.0, 1011.0, 1584.0, 2476.0, 3780.0, 5815.0, 8438.0, 13235.0, 19889.0, 30221.0, 45207.0, 69180.0, 105217.0, 152964.0, 178592.0, 138199.0, 92022.0, 60778.0, 40173.0, 26217.0, 17337.0, 11697.0, 7589.0, 5025.0, 3294.0, 2212.0, 1435.0, 940.0, 634.0, 438.0, 262.0, 171.0, 120.0, 61.0, 54.0, 41.0, 20.0, 17.0, 12.0, 11.0, 8.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.46923828125, -0.4547309875488281, -0.44022369384765625, -0.4257164001464844, -0.4112091064453125, -0.3967018127441406, -0.38219451904296875, -0.3676872253417969, -0.353179931640625, -0.3386726379394531, -0.32416534423828125, -0.3096580505371094, -0.2951507568359375, -0.2806434631347656, -0.26613616943359375, -0.2516288757324219, -0.23712158203125, -0.22261428833007812, -0.20810699462890625, -0.19359970092773438, -0.1790924072265625, -0.16458511352539062, -0.15007781982421875, -0.13557052612304688, -0.121063232421875, -0.10655593872070312, -0.09204864501953125, -0.07754135131835938, -0.0630340576171875, -0.048526763916015625, -0.03401947021484375, -0.019512176513671875, -0.0050048828125, 0.009502410888671875, 0.02400970458984375, 0.038516998291015625, 0.0530242919921875, 0.06753158569335938, 0.08203887939453125, 0.09654617309570312, 0.111053466796875, 0.12556076049804688, 0.14006805419921875, 0.15457534790039062, 0.1690826416015625, 0.18358993530273438, 0.19809722900390625, 0.21260452270507812, 0.22711181640625, 0.24161911010742188, 0.25612640380859375, 0.2706336975097656, 0.2851409912109375, 0.2996482849121094, 0.31415557861328125, 0.3286628723144531, 0.343170166015625, 0.3576774597167969, 0.37218475341796875, 0.3866920471191406, 0.4011993408203125, 0.4157066345214844, 0.43021392822265625, 0.4447212219238281, 0.459228515625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 4.0, 1.0, 5.0, 8.0, 7.0, 14.0, 17.0, 17.0, 20.0, 13.0, 18.0, 21.0, 28.0, 26.0, 25.0, 31.0, 28.0, 42.0, 29.0, 35.0, 53.0, 1064.0, 52.0, 29.0, 44.0, 46.0, 33.0, 35.0, 29.0, 34.0, 43.0, 31.0, 29.0, 20.0, 14.0, 17.0, 16.0, 11.0, 11.0, 11.0, 6.0, 1.0, 4.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0], "bins": [-4.84375, -4.707489013671875, -4.57122802734375, -4.434967041015625, -4.2987060546875, -4.162445068359375, -4.02618408203125, -3.889923095703125, -3.753662109375, -3.617401123046875, -3.48114013671875, -3.344879150390625, -3.2086181640625, -3.072357177734375, -2.93609619140625, -2.799835205078125, -2.66357421875, -2.527313232421875, -2.39105224609375, -2.254791259765625, -2.1185302734375, -1.982269287109375, -1.84600830078125, -1.709747314453125, -1.573486328125, -1.437225341796875, -1.30096435546875, -1.164703369140625, -1.0284423828125, -0.892181396484375, -0.75592041015625, -0.619659423828125, -0.4833984375, -0.347137451171875, -0.21087646484375, -0.074615478515625, 0.0616455078125, 0.197906494140625, 0.33416748046875, 0.470428466796875, 0.606689453125, 0.742950439453125, 0.87921142578125, 1.015472412109375, 1.1517333984375, 1.287994384765625, 1.42425537109375, 1.560516357421875, 1.69677734375, 1.833038330078125, 1.96929931640625, 2.105560302734375, 2.2418212890625, 2.378082275390625, 2.51434326171875, 2.650604248046875, 2.786865234375, 2.923126220703125, 3.05938720703125, 3.195648193359375, 3.3319091796875, 3.468170166015625, 3.60443115234375, 3.740692138671875, 3.876953125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 6.0, 8.0, 9.0, 14.0, 26.0, 37.0, 59.0, 101.0, 122.0, 215.0, 313.0, 480.0, 715.0, 1209.0, 1774.0, 2706.0, 4177.0, 6368.0, 10014.0, 15408.0, 23974.0, 36626.0, 57117.0, 88577.0, 134592.0, 1222597.0, 162208.0, 115555.0, 75332.0, 48445.0, 30949.0, 20477.0, 13167.0, 8365.0, 5315.0, 3512.0, 2287.0, 1538.0, 943.0, 623.0, 369.0, 289.0, 187.0, 102.0, 75.0, 66.0, 36.0, 15.0, 13.0, 4.0, 9.0, 9.0, 5.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.43994140625, -0.4265327453613281, -0.41312408447265625, -0.3997154235839844, -0.3863067626953125, -0.3728981018066406, -0.35948944091796875, -0.3460807800292969, -0.332672119140625, -0.3192634582519531, -0.30585479736328125, -0.2924461364746094, -0.2790374755859375, -0.2656288146972656, -0.25222015380859375, -0.23881149291992188, -0.22540283203125, -0.21199417114257812, -0.19858551025390625, -0.18517684936523438, -0.1717681884765625, -0.15835952758789062, -0.14495086669921875, -0.13154220581054688, -0.118133544921875, -0.10472488403320312, -0.09131622314453125, -0.07790756225585938, -0.0644989013671875, -0.051090240478515625, -0.03768157958984375, -0.024272918701171875, -0.0108642578125, 0.002544403076171875, 0.01595306396484375, 0.029361724853515625, 0.0427703857421875, 0.056179046630859375, 0.06958770751953125, 0.08299636840820312, 0.096405029296875, 0.10981369018554688, 0.12322235107421875, 0.13663101196289062, 0.1500396728515625, 0.16344833374023438, 0.17685699462890625, 0.19026565551757812, 0.20367431640625, 0.21708297729492188, 0.23049163818359375, 0.24390029907226562, 0.2573089599609375, 0.2707176208496094, 0.28412628173828125, 0.2975349426269531, 0.310943603515625, 0.3243522644042969, 0.33776092529296875, 0.3511695861816406, 0.3645782470703125, 0.3779869079589844, 0.39139556884765625, 0.4048042297363281, 0.418212890625]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 7.0, 8.0, 5.0, 8.0, 13.0, 10.0, 12.0, 18.0, 22.0, 17.0, 34.0, 26.0, 30.0, 48.0, 39.0, 49.0, 47.0, 44.0, 63.0, 49.0, 59.0, 38.0, 47.0, 46.0, 33.0, 32.0, 30.0, 32.0, 37.0, 20.0, 19.0, 10.0, 9.0, 9.0, 8.0, 3.0, 6.0, 7.0, 2.0, 4.0, 1.0, 4.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001132965087890625, -0.00109119713306427, -0.001049429178237915, -0.00100766122341156, -0.0009658932685852051, -0.0009241253137588501, -0.0008823573589324951, -0.0008405894041061401, -0.0007988214492797852, -0.0007570534944534302, -0.0007152855396270752, -0.0006735175848007202, -0.0006317496299743652, -0.0005899816751480103, -0.0005482137203216553, -0.0005064457654953003, -0.0004646778106689453, -0.00042290985584259033, -0.00038114190101623535, -0.00033937394618988037, -0.0002976059913635254, -0.0002558380365371704, -0.00021407008171081543, -0.00017230212688446045, -0.00013053417205810547, -8.876621723175049e-05, -4.699826240539551e-05, -5.230307579040527e-06, 3.653764724731445e-05, 7.830560207366943e-05, 0.00012007355690002441, 0.0001618415117263794, 0.00020360946655273438, 0.00024537742137908936, 0.00028714537620544434, 0.0003289133310317993, 0.0003706812858581543, 0.0004124492406845093, 0.00045421719551086426, 0.0004959851503372192, 0.0005377531051635742, 0.0005795210599899292, 0.0006212890148162842, 0.0006630569696426392, 0.0007048249244689941, 0.0007465928792953491, 0.0007883608341217041, 0.0008301287889480591, 0.0008718967437744141, 0.000913664698600769, 0.000955432653427124, 0.000997200608253479, 0.001038968563079834, 0.001080736517906189, 0.001122504472732544, 0.001164272427558899, 0.001206040382385254, 0.0012478083372116089, 0.0012895762920379639, 0.0013313442468643188, 0.0013731122016906738, 0.0014148801565170288, 0.0014566481113433838, 0.0014984160661697388, 0.0015401840209960938]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 2.0, 2.0, 1.0, 1.0, 7.0, 13.0, 6.0, 14.0, 20.0, 21.0, 28.0, 35.0, 52.0, 60.0, 89.0, 121.0, 180.0, 355.0, 970.0, 44058.0, 997890.0, 3198.0, 617.0, 266.0, 137.0, 99.0, 66.0, 57.0, 47.0, 36.0, 35.0, 16.0, 17.0, 10.0, 6.0, 11.0, 5.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.036102294921875, -0.035079240798950195, -0.03405618667602539, -0.033033132553100586, -0.03201007843017578, -0.030987024307250977, -0.029963970184326172, -0.028940916061401367, -0.027917861938476562, -0.026894807815551758, -0.025871753692626953, -0.02484869956970215, -0.023825645446777344, -0.02280259132385254, -0.021779537200927734, -0.02075648307800293, -0.019733428955078125, -0.01871037483215332, -0.017687320709228516, -0.01666426658630371, -0.015641212463378906, -0.014618158340454102, -0.013595104217529297, -0.012572050094604492, -0.011548995971679688, -0.010525941848754883, -0.009502887725830078, -0.008479833602905273, -0.007456779479980469, -0.006433725357055664, -0.005410671234130859, -0.004387617111206055, -0.00336456298828125, -0.0023415088653564453, -0.0013184547424316406, -0.00029540061950683594, 0.0007276535034179688, 0.0017507076263427734, 0.002773761749267578, 0.003796815872192383, 0.0048198699951171875, 0.005842924118041992, 0.006865978240966797, 0.007889032363891602, 0.008912086486816406, 0.009935140609741211, 0.010958194732666016, 0.01198124885559082, 0.013004302978515625, 0.01402735710144043, 0.015050411224365234, 0.01607346534729004, 0.017096519470214844, 0.01811957359313965, 0.019142627716064453, 0.020165681838989258, 0.021188735961914062, 0.022211790084838867, 0.023234844207763672, 0.024257898330688477, 0.02528095245361328, 0.026304006576538086, 0.02732706069946289, 0.028350114822387695, 0.0293731689453125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 180.0, 720.0, 110.0, 2.0], "bins": [-0.013621442019939423, -0.013399799354374409, -0.01317815575748682, -0.012956513091921806, -0.012734870426356792, -0.012513226829469204, -0.01229158416390419, -0.012069941498339176, -0.011848297901451588, -0.011626655235886574, -0.011405011638998985, -0.011183368973433971, -0.010961726307868958, -0.010740082710981369, -0.010518440045416355, -0.010296797379851341, -0.010075154714286327, -0.009853512048721313, -0.009631868451833725, -0.009410225786268711, -0.009188583120703697, -0.008966939523816109, -0.008745296858251095, -0.008523654192686081, -0.008302010595798492, -0.008080367930233479, -0.00785872433334589, -0.007637081667780876, -0.007415438536554575, -0.007193795870989561, -0.00697215273976326, -0.006750510074198246, -0.006528867408633232, -0.006307224277406931, -0.006085581611841917, -0.005863938480615616, -0.005642295349389315, -0.005420652683824301, -0.005199009552598, -0.004977366887032986, -0.004755723290145397, -0.004534080158919096, -0.004312437493354082, -0.004090794362127781, -0.0038691514637321234, -0.003647508565336466, -0.0034258654341101646, -0.003204222535714507, -0.0029825796373188496, -0.002760936738923192, -0.002539293607696891, -0.0023176507093012333, -0.0020960078109055758, -0.0018743647960945964, -0.001652721781283617, -0.0014310788828879595, -0.001209435984492302, -0.0009877929696813226, -0.000766150071285665, -0.0005445070564746857, -0.0003228640998713672, -0.00010122114326804876, 0.0001204218715429306, 0.00034206476993858814, 0.0005637077847495675]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 4.0, 5.0, 3.0, 8.0, 7.0, 10.0, 11.0, 11.0, 15.0, 19.0, 20.0, 19.0, 22.0, 28.0, 29.0, 37.0, 33.0, 46.0, 50.0, 46.0, 53.0, 43.0, 45.0, 53.0, 45.0, 40.0, 45.0, 31.0, 32.0, 29.0, 26.0, 35.0, 21.0, 22.0, 18.0, 15.0, 5.0, 4.0, 8.0, 10.0, 3.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000621795654296875, -0.0005951123312115669, -0.0005684290081262589, -0.0005417456850409508, -0.0005150623619556427, -0.0004883790388703346, -0.00046169571578502655, -0.0004350123926997185, -0.0004083290696144104, -0.0003816457465291023, -0.00035496242344379425, -0.0003282791003584862, -0.0003015957772731781, -0.00027491245418787, -0.00024822913110256195, -0.00022154580801725388, -0.0001948624849319458, -0.00016817916184663773, -0.00014149583876132965, -0.00011481251567602158, -8.81291925907135e-05, -6.144586950540543e-05, -3.476254642009735e-05, -8.079223334789276e-06, 1.86040997505188e-05, 4.5287422835826874e-05, 7.197074592113495e-05, 9.865406900644302e-05, 0.0001253373920917511, 0.00015202071517705917, 0.00017870403826236725, 0.00020538736134767532, 0.0002320706844329834, 0.0002587540075182915, 0.00028543733060359955, 0.0003121206536889076, 0.0003388039767742157, 0.0003654872998595238, 0.00039217062294483185, 0.0004188539460301399, 0.000445537269115448, 0.0004722205922007561, 0.0004989039152860641, 0.0005255872383713722, 0.0005522705614566803, 0.0005789538845419884, 0.0006056372076272964, 0.0006323205307126045, 0.0006590038537979126, 0.0006856871768832207, 0.0007123704999685287, 0.0007390538230538368, 0.0007657371461391449, 0.000792420469224453, 0.000819103792309761, 0.0008457871153950691, 0.0008724704384803772, 0.0008991537615656853, 0.0009258370846509933, 0.0009525204077363014, 0.0009792037308216095, 0.0010058870539069176, 0.0010325703769922256, 0.0010592537000775337, 0.0010859370231628418]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 6.0, 8.0, 9.0, 2.0, 12.0, 9.0, 14.0, 10.0, 19.0, 22.0, 25.0, 26.0, 33.0, 26.0, 35.0, 36.0, 38.0, 36.0, 38.0, 34.0, 38.0, 40.0, 41.0, 57.0, 44.0, 28.0, 43.0, 44.0, 23.0, 22.0, 32.0, 20.0, 19.0, 18.0, 17.0, 11.0, 18.0, 9.0, 11.0, 7.0, 9.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.37109375, -6.160888671875, -5.95068359375, -5.740478515625, -5.5302734375, -5.320068359375, -5.10986328125, -4.899658203125, -4.689453125, -4.479248046875, -4.26904296875, -4.058837890625, -3.8486328125, -3.638427734375, -3.42822265625, -3.218017578125, -3.0078125, -2.797607421875, -2.58740234375, -2.377197265625, -2.1669921875, -1.956787109375, -1.74658203125, -1.536376953125, -1.326171875, -1.115966796875, -0.90576171875, -0.695556640625, -0.4853515625, -0.275146484375, -0.06494140625, 0.145263671875, 0.35546875, 0.565673828125, 0.77587890625, 0.986083984375, 1.1962890625, 1.406494140625, 1.61669921875, 1.826904296875, 2.037109375, 2.247314453125, 2.45751953125, 2.667724609375, 2.8779296875, 3.088134765625, 3.29833984375, 3.508544921875, 3.71875, 3.928955078125, 4.13916015625, 4.349365234375, 4.5595703125, 4.769775390625, 4.97998046875, 5.190185546875, 5.400390625, 5.610595703125, 5.82080078125, 6.031005859375, 6.2412109375, 6.451416015625, 6.66162109375, 6.871826171875, 7.08203125]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 9.0, 5.0, 9.0, 7.0, 15.0, 9.0, 23.0, 20.0, 27.0, 38.0, 58.0, 84.0, 92.0, 131.0, 212.0, 321.0, 617.0, 1752.0, 8739.0, 53814.0, 363762.0, 526061.0, 76350.0, 12134.0, 2375.0, 736.0, 369.0, 229.0, 141.0, 107.0, 67.0, 50.0, 47.0, 35.0, 24.0, 19.0, 18.0, 11.0, 8.0, 9.0, 4.0, 4.0, 3.0, 5.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-13.6796875, -13.23974609375, -12.7998046875, -12.35986328125, -11.919921875, -11.47998046875, -11.0400390625, -10.60009765625, -10.16015625, -9.72021484375, -9.2802734375, -8.84033203125, -8.400390625, -7.96044921875, -7.5205078125, -7.08056640625, -6.640625, -6.20068359375, -5.7607421875, -5.32080078125, -4.880859375, -4.44091796875, -4.0009765625, -3.56103515625, -3.12109375, -2.68115234375, -2.2412109375, -1.80126953125, -1.361328125, -0.92138671875, -0.4814453125, -0.04150390625, 0.3984375, 0.83837890625, 1.2783203125, 1.71826171875, 2.158203125, 2.59814453125, 3.0380859375, 3.47802734375, 3.91796875, 4.35791015625, 4.7978515625, 5.23779296875, 5.677734375, 6.11767578125, 6.5576171875, 6.99755859375, 7.4375, 7.87744140625, 8.3173828125, 8.75732421875, 9.197265625, 9.63720703125, 10.0771484375, 10.51708984375, 10.95703125, 11.39697265625, 11.8369140625, 12.27685546875, 12.716796875, 13.15673828125, 13.5966796875, 14.03662109375, 14.4765625]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 5.0, 3.0, 4.0, 5.0, 2.0, 11.0, 4.0, 12.0, 4.0, 14.0, 16.0, 14.0, 16.0, 18.0, 24.0, 27.0, 35.0, 33.0, 46.0, 38.0, 49.0, 64.0, 110.0, 199.0, 1496.0, 212.0, 96.0, 80.0, 49.0, 54.0, 44.0, 27.0, 30.0, 31.0, 34.0, 24.0, 25.0, 18.0, 23.0, 12.0, 8.0, 4.0, 10.0, 6.0, 5.0, 8.0, 6.0, 2.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.734375, -22.035888671875, -21.33740234375, -20.638916015625, -19.9404296875, -19.241943359375, -18.54345703125, -17.844970703125, -17.146484375, -16.447998046875, -15.74951171875, -15.051025390625, -14.3525390625, -13.654052734375, -12.95556640625, -12.257080078125, -11.55859375, -10.860107421875, -10.16162109375, -9.463134765625, -8.7646484375, -8.066162109375, -7.36767578125, -6.669189453125, -5.970703125, -5.272216796875, -4.57373046875, -3.875244140625, -3.1767578125, -2.478271484375, -1.77978515625, -1.081298828125, -0.3828125, 0.315673828125, 1.01416015625, 1.712646484375, 2.4111328125, 3.109619140625, 3.80810546875, 4.506591796875, 5.205078125, 5.903564453125, 6.60205078125, 7.300537109375, 7.9990234375, 8.697509765625, 9.39599609375, 10.094482421875, 10.79296875, 11.491455078125, 12.18994140625, 12.888427734375, 13.5869140625, 14.285400390625, 14.98388671875, 15.682373046875, 16.380859375, 17.079345703125, 17.77783203125, 18.476318359375, 19.1748046875, 19.873291015625, 20.57177734375, 21.270263671875, 21.96875]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 5.0, 9.0, 15.0, 8.0, 18.0, 24.0, 26.0, 44.0, 51.0, 76.0, 98.0, 115.0, 232.0, 401.0, 1121.0, 25260.0, 3111692.0, 4859.0, 737.0, 296.0, 179.0, 127.0, 79.0, 61.0, 54.0, 32.0, 26.0, 28.0, 13.0, 4.0, 6.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-108.1875, -105.037109375, -101.88671875, -98.736328125, -95.5859375, -92.435546875, -89.28515625, -86.134765625, -82.984375, -79.833984375, -76.68359375, -73.533203125, -70.3828125, -67.232421875, -64.08203125, -60.931640625, -57.78125, -54.630859375, -51.48046875, -48.330078125, -45.1796875, -42.029296875, -38.87890625, -35.728515625, -32.578125, -29.427734375, -26.27734375, -23.126953125, -19.9765625, -16.826171875, -13.67578125, -10.525390625, -7.375, -4.224609375, -1.07421875, 2.076171875, 5.2265625, 8.376953125, 11.52734375, 14.677734375, 17.828125, 20.978515625, 24.12890625, 27.279296875, 30.4296875, 33.580078125, 36.73046875, 39.880859375, 43.03125, 46.181640625, 49.33203125, 52.482421875, 55.6328125, 58.783203125, 61.93359375, 65.083984375, 68.234375, 71.384765625, 74.53515625, 77.685546875, 80.8359375, 83.986328125, 87.13671875, 90.287109375, 93.4375]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [4.0, 5.0, 59.0, 167.0, 295.0, 306.0, 144.0, 25.0, 8.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.430879592895508, -9.155984878540039, -6.88109016418457, -4.606195449829102, -2.331300735473633, -0.05640602111816406, 2.2184886932373047, 4.493383407592773, 6.768278121948242, 9.043172836303711, 11.31806755065918, 13.592962265014648, 15.867856979370117, 18.142751693725586, 20.417646408081055, 22.692541122436523, 24.967435836791992, 27.24233055114746, 29.51722526550293, 31.7921199798584, 34.0670166015625, 36.34191131591797, 38.61680603027344, 40.891700744628906, 43.166595458984375, 45.441490173339844, 47.71638488769531, 49.99127960205078, 52.26617431640625, 54.54106903076172, 56.81596374511719, 59.090858459472656, 61.365753173828125, 63.640647888183594, 65.91554260253906, 68.19043731689453, 70.46533203125, 72.74022674560547, 75.01512145996094, 77.2900161743164, 79.56491088867188, 81.83980560302734, 84.11470031738281, 86.38959503173828, 88.66448974609375, 90.93938446044922, 93.21427917480469, 95.48917388916016, 97.76406860351562, 100.0389633178711, 102.31385803222656, 104.58875274658203, 106.8636474609375, 109.13854217529297, 111.41343688964844, 113.6883316040039, 115.96322631835938, 118.23812103271484, 120.51301574707031, 122.78791046142578, 125.06280517578125, 127.33769989013672, 129.6125946044922, 131.88748168945312, 134.16238403320312]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 6.0, 8.0, 3.0, 3.0, 6.0, 9.0, 13.0, 7.0, 14.0, 12.0, 15.0, 22.0, 27.0, 22.0, 28.0, 33.0, 26.0, 39.0, 29.0, 36.0, 38.0, 31.0, 32.0, 43.0, 41.0, 45.0, 46.0, 34.0, 25.0, 28.0, 40.0, 25.0, 21.0, 27.0, 17.0, 19.0, 12.0, 16.0, 16.0, 13.0, 10.0, 16.0, 8.0, 10.0, 7.0, 7.0, 3.0, 4.0, 4.0, 2.0, 3.0, 4.0, 4.0, 3.0], "bins": [-57.18463134765625, -55.519927978515625, -53.855224609375, -52.190521240234375, -50.52581787109375, -48.861114501953125, -47.196414947509766, -45.53171157836914, -43.867008209228516, -42.20230484008789, -40.537601470947266, -38.87289810180664, -37.20819854736328, -35.543495178222656, -33.87879180908203, -32.214088439941406, -30.54938507080078, -28.884681701660156, -27.21997833251953, -25.55527687072754, -23.890573501586914, -22.22587013244629, -20.561168670654297, -18.896465301513672, -17.231761932373047, -15.567058563232422, -13.902356147766113, -12.237653732299805, -10.57295036315918, -8.908246994018555, -7.243544578552246, -5.5788421630859375, -3.914142608642578, -2.2494397163391113, -0.5847368240356445, 1.0799660682678223, 2.744668960571289, 4.409372329711914, 6.074074745178223, 7.738777160644531, 9.403480529785156, 11.068183898925781, 12.73288631439209, 14.397588729858398, 16.062292098999023, 17.72699546813965, 19.39169692993164, 21.056400299072266, 22.72110366821289, 24.385807037353516, 26.05051040649414, 27.715211868286133, 29.379915237426758, 31.044618606567383, 32.709320068359375, 34.3740234375, 36.038726806640625, 37.70343017578125, 39.368133544921875, 41.0328369140625, 42.697540283203125, 44.36224365234375, 46.02694320678711, 47.691646575927734, 49.35634994506836]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 3.0, 1.0, 3.0, 6.0, 9.0, 8.0, 9.0, 12.0, 9.0, 12.0, 16.0, 17.0, 14.0, 25.0, 28.0, 32.0, 39.0, 43.0, 41.0, 41.0, 48.0, 38.0, 41.0, 55.0, 37.0, 38.0, 45.0, 44.0, 33.0, 28.0, 33.0, 30.0, 26.0, 30.0, 12.0, 18.0, 15.0, 11.0, 10.0, 10.0, 10.0, 8.0, 4.0, 5.0, 2.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.171875, -6.92578125, -6.6796875, -6.43359375, -6.1875, -5.94140625, -5.6953125, -5.44921875, -5.203125, -4.95703125, -4.7109375, -4.46484375, -4.21875, -3.97265625, -3.7265625, -3.48046875, -3.234375, -2.98828125, -2.7421875, -2.49609375, -2.25, -2.00390625, -1.7578125, -1.51171875, -1.265625, -1.01953125, -0.7734375, -0.52734375, -0.28125, -0.03515625, 0.2109375, 0.45703125, 0.703125, 0.94921875, 1.1953125, 1.44140625, 1.6875, 1.93359375, 2.1796875, 2.42578125, 2.671875, 2.91796875, 3.1640625, 3.41015625, 3.65625, 3.90234375, 4.1484375, 4.39453125, 4.640625, 4.88671875, 5.1328125, 5.37890625, 5.625, 5.87109375, 6.1171875, 6.36328125, 6.609375, 6.85546875, 7.1015625, 7.34765625, 7.59375, 7.83984375, 8.0859375, 8.33203125, 8.578125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 5.0, 2.0, 5.0, 8.0, 5.0, 6.0, 3.0, 15.0, 19.0, 18.0, 34.0, 44.0, 74.0, 78.0, 103.0, 167.0, 315.0, 605.0, 1651.0, 5105.0, 20197.0, 90556.0, 387350.0, 1147223.0, 1532506.0, 743585.0, 203765.0, 45047.0, 10727.0, 2866.0, 1040.0, 440.0, 237.0, 142.0, 86.0, 56.0, 57.0, 28.0, 25.0, 21.0, 13.0, 20.0, 11.0, 6.0, 8.0, 8.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.15625, -8.853759765625, -8.55126953125, -8.248779296875, -7.9462890625, -7.643798828125, -7.34130859375, -7.038818359375, -6.736328125, -6.433837890625, -6.13134765625, -5.828857421875, -5.5263671875, -5.223876953125, -4.92138671875, -4.618896484375, -4.31640625, -4.013916015625, -3.71142578125, -3.408935546875, -3.1064453125, -2.803955078125, -2.50146484375, -2.198974609375, -1.896484375, -1.593994140625, -1.29150390625, -0.989013671875, -0.6865234375, -0.384033203125, -0.08154296875, 0.220947265625, 0.5234375, 0.825927734375, 1.12841796875, 1.430908203125, 1.7333984375, 2.035888671875, 2.33837890625, 2.640869140625, 2.943359375, 3.245849609375, 3.54833984375, 3.850830078125, 4.1533203125, 4.455810546875, 4.75830078125, 5.060791015625, 5.36328125, 5.665771484375, 5.96826171875, 6.270751953125, 6.5732421875, 6.875732421875, 7.17822265625, 7.480712890625, 7.783203125, 8.085693359375, 8.38818359375, 8.690673828125, 8.9931640625, 9.295654296875, 9.59814453125, 9.900634765625, 10.203125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 8.0, 13.0, 3.0, 17.0, 31.0, 44.0, 80.0, 100.0, 142.0, 201.0, 321.0, 473.0, 560.0, 594.0, 497.0, 333.0, 218.0, 143.0, 94.0, 59.0, 47.0, 37.0, 28.0, 14.0, 6.0, 8.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.640625, -21.0562744140625, -20.471923828125, -19.8875732421875, -19.30322265625, -18.7188720703125, -18.134521484375, -17.5501708984375, -16.9658203125, -16.3814697265625, -15.797119140625, -15.2127685546875, -14.62841796875, -14.0440673828125, -13.459716796875, -12.8753662109375, -12.291015625, -11.7066650390625, -11.122314453125, -10.5379638671875, -9.95361328125, -9.3692626953125, -8.784912109375, -8.2005615234375, -7.6162109375, -7.0318603515625, -6.447509765625, -5.8631591796875, -5.27880859375, -4.6944580078125, -4.110107421875, -3.5257568359375, -2.94140625, -2.3570556640625, -1.772705078125, -1.1883544921875, -0.60400390625, -0.0196533203125, 0.564697265625, 1.1490478515625, 1.7333984375, 2.3177490234375, 2.902099609375, 3.4864501953125, 4.07080078125, 4.6551513671875, 5.239501953125, 5.8238525390625, 6.408203125, 6.9925537109375, 7.576904296875, 8.1612548828125, 8.74560546875, 9.3299560546875, 9.914306640625, 10.4986572265625, 11.0830078125, 11.6673583984375, 12.251708984375, 12.8360595703125, 13.42041015625, 14.0047607421875, 14.589111328125, 15.1734619140625, 15.7578125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 8.0, 9.0, 8.0, 24.0, 39.0, 49.0, 63.0, 118.0, 222.0, 407.0, 933.0, 3103.0, 342894.0, 3832103.0, 11570.0, 1413.0, 596.0, 309.0, 153.0, 110.0, 62.0, 43.0, 18.0, 8.0, 11.0, 6.0, 5.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.8125, -54.7265625, -52.640625, -50.5546875, -48.46875, -46.3828125, -44.296875, -42.2109375, -40.125, -38.0390625, -35.953125, -33.8671875, -31.78125, -29.6953125, -27.609375, -25.5234375, -23.4375, -21.3515625, -19.265625, -17.1796875, -15.09375, -13.0078125, -10.921875, -8.8359375, -6.75, -4.6640625, -2.578125, -0.4921875, 1.59375, 3.6796875, 5.765625, 7.8515625, 9.9375, 12.0234375, 14.109375, 16.1953125, 18.28125, 20.3671875, 22.453125, 24.5390625, 26.625, 28.7109375, 30.796875, 32.8828125, 34.96875, 37.0546875, 39.140625, 41.2265625, 43.3125, 45.3984375, 47.484375, 49.5703125, 51.65625, 53.7421875, 55.828125, 57.9140625, 60.0, 62.0859375, 64.171875, 66.2578125, 68.34375, 70.4296875, 72.515625, 74.6015625, 76.6875]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 9.0, 139.0, 435.0, 356.0, 71.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-233.53562927246094, -226.1939239501953, -218.85220336914062, -211.510498046875, -204.16879272460938, -196.8270721435547, -189.48536682128906, -182.14364624023438, -174.80194091796875, -167.46023559570312, -160.11851501464844, -152.7768096923828, -145.4351043701172, -138.0933837890625, -130.75167846679688, -123.40996551513672, -116.0682601928711, -108.72654724121094, -101.38484191894531, -94.04312896728516, -86.701416015625, -79.35971069335938, -72.01799774169922, -64.67628479003906, -57.33457565307617, -49.99286651611328, -42.651153564453125, -35.309444427490234, -27.96773338317871, -20.626022338867188, -13.284313201904297, -5.942600250244141, 1.39910888671875, 8.740819931030273, 16.082530975341797, 23.424240112304688, 30.76595115661621, 38.107662200927734, 45.449371337890625, 52.79108428955078, 60.13279342651367, 67.47450256347656, 74.81621551513672, 82.15792846679688, 89.4996337890625, 96.84134674072266, 104.18305969238281, 111.52476501464844, 118.8664779663086, 126.20819091796875, 133.54989624023438, 140.8916015625, 148.2333221435547, 155.5750274658203, 162.916748046875, 170.25845336914062, 177.60015869140625, 184.94186401367188, 192.28358459472656, 199.6252899169922, 206.9669952392578, 214.3087158203125, 221.65042114257812, 228.99212646484375, 236.33384704589844]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 11.0, 6.0, 8.0, 11.0, 10.0, 17.0, 18.0, 19.0, 24.0, 20.0, 17.0, 31.0, 34.0, 32.0, 34.0, 48.0, 40.0, 42.0, 39.0, 39.0, 49.0, 30.0, 38.0, 34.0, 28.0, 24.0, 43.0, 37.0, 30.0, 35.0, 27.0, 24.0, 18.0, 22.0, 10.0, 10.0, 10.0, 7.0, 6.0, 5.0, 4.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-46.094520568847656, -44.69280242919922, -43.29108428955078, -41.88936996459961, -40.48765182495117, -39.085933685302734, -37.6842155456543, -36.282501220703125, -34.88078308105469, -33.47906494140625, -32.07734680175781, -30.675630569458008, -29.273914337158203, -27.872196197509766, -26.470478057861328, -25.068761825561523, -23.667043685913086, -22.26532554626465, -20.863609313964844, -19.461891174316406, -18.0601749420166, -16.658456802368164, -15.256739616394043, -13.855022430419922, -12.4533052444458, -11.05158805847168, -9.649870872497559, -8.248153686523438, -6.846436023712158, -5.444718837738037, -4.043001174926758, -2.6412839889526367, -1.2395668029785156, 0.16215050220489502, 1.5638678073883057, 2.965585231781006, 4.367302417755127, 5.769019603729248, 7.170737266540527, 8.572454452514648, 9.97417163848877, 11.37588882446289, 12.777606010437012, 14.179323196411133, 15.58104133605957, 16.982757568359375, 18.384475708007812, 19.78619384765625, 21.187910079956055, 22.589628219604492, 23.991344451904297, 25.393062591552734, 26.79477882385254, 28.196496963500977, 29.59821319580078, 30.99993133544922, 32.401649475097656, 33.803367614746094, 35.20508575439453, 36.6068000793457, 38.00851821899414, 39.41023635864258, 40.811954498291016, 42.21366882324219, 43.615386962890625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 6.0, 7.0, 12.0, 9.0, 9.0, 9.0, 23.0, 20.0, 17.0, 21.0, 25.0, 29.0, 38.0, 41.0, 45.0, 34.0, 31.0, 47.0, 47.0, 34.0, 30.0, 50.0, 48.0, 32.0, 27.0, 32.0, 24.0, 23.0, 21.0, 26.0, 34.0, 24.0, 17.0, 25.0, 9.0, 10.0, 9.0, 8.0, 10.0, 6.0, 3.0, 4.0, 4.0, 7.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-7.5234375, -7.30572509765625, -7.0880126953125, -6.87030029296875, -6.652587890625, -6.43487548828125, -6.2171630859375, -5.99945068359375, -5.78173828125, -5.56402587890625, -5.3463134765625, -5.12860107421875, -4.910888671875, -4.69317626953125, -4.4754638671875, -4.25775146484375, -4.0400390625, -3.82232666015625, -3.6046142578125, -3.38690185546875, -3.169189453125, -2.95147705078125, -2.7337646484375, -2.51605224609375, -2.29833984375, -2.08062744140625, -1.8629150390625, -1.64520263671875, -1.427490234375, -1.20977783203125, -0.9920654296875, -0.77435302734375, -0.556640625, -0.33892822265625, -0.1212158203125, 0.09649658203125, 0.314208984375, 0.53192138671875, 0.7496337890625, 0.96734619140625, 1.18505859375, 1.40277099609375, 1.6204833984375, 1.83819580078125, 2.055908203125, 2.27362060546875, 2.4913330078125, 2.70904541015625, 2.9267578125, 3.14447021484375, 3.3621826171875, 3.57989501953125, 3.797607421875, 4.01531982421875, 4.2330322265625, 4.45074462890625, 4.66845703125, 4.88616943359375, 5.1038818359375, 5.32159423828125, 5.539306640625, 5.75701904296875, 5.9747314453125, 6.19244384765625, 6.41015625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 4.0, 6.0, 3.0, 16.0, 9.0, 16.0, 39.0, 40.0, 59.0, 111.0, 134.0, 240.0, 368.0, 474.0, 806.0, 1096.0, 1662.0, 2535.0, 4005.0, 5702.0, 8705.0, 13450.0, 20482.0, 31647.0, 48791.0, 75336.0, 116379.0, 172006.0, 182280.0, 126674.0, 81928.0, 53161.0, 34550.0, 22760.0, 14681.0, 9601.0, 6346.0, 4137.0, 2729.0, 1921.0, 1207.0, 819.0, 525.0, 391.0, 262.0, 149.0, 124.0, 68.0, 50.0, 34.0, 16.0, 10.0, 10.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.49951171875, -0.48345184326171875, -0.4673919677734375, -0.45133209228515625, -0.435272216796875, -0.41921234130859375, -0.4031524658203125, -0.38709259033203125, -0.37103271484375, -0.35497283935546875, -0.3389129638671875, -0.32285308837890625, -0.306793212890625, -0.29073333740234375, -0.2746734619140625, -0.25861358642578125, -0.2425537109375, -0.22649383544921875, -0.2104339599609375, -0.19437408447265625, -0.178314208984375, -0.16225433349609375, -0.1461944580078125, -0.13013458251953125, -0.11407470703125, -0.09801483154296875, -0.0819549560546875, -0.06589508056640625, -0.049835205078125, -0.03377532958984375, -0.0177154541015625, -0.00165557861328125, 0.014404296875, 0.03046417236328125, 0.0465240478515625, 0.06258392333984375, 0.078643798828125, 0.09470367431640625, 0.1107635498046875, 0.12682342529296875, 0.14288330078125, 0.15894317626953125, 0.1750030517578125, 0.19106292724609375, 0.207122802734375, 0.22318267822265625, 0.2392425537109375, 0.25530242919921875, 0.2713623046875, 0.28742218017578125, 0.3034820556640625, 0.31954193115234375, 0.335601806640625, 0.35166168212890625, 0.3677215576171875, 0.38378143310546875, 0.39984130859375, 0.41590118408203125, 0.4319610595703125, 0.44802093505859375, 0.464080810546875, 0.48014068603515625, 0.4962005615234375, 0.5122604370117188, 0.5283203125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 5.0, 1.0, 2.0, 6.0, 10.0, 7.0, 13.0, 17.0, 6.0, 18.0, 12.0, 19.0, 29.0, 25.0, 30.0, 28.0, 39.0, 32.0, 32.0, 32.0, 47.0, 41.0, 1069.0, 44.0, 47.0, 41.0, 43.0, 50.0, 34.0, 40.0, 37.0, 22.0, 27.0, 23.0, 14.0, 19.0, 15.0, 15.0, 5.0, 9.0, 9.0, 2.0, 5.0, 4.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.02734375, -4.874267578125, -4.72119140625, -4.568115234375, -4.4150390625, -4.261962890625, -4.10888671875, -3.955810546875, -3.802734375, -3.649658203125, -3.49658203125, -3.343505859375, -3.1904296875, -3.037353515625, -2.88427734375, -2.731201171875, -2.578125, -2.425048828125, -2.27197265625, -2.118896484375, -1.9658203125, -1.812744140625, -1.65966796875, -1.506591796875, -1.353515625, -1.200439453125, -1.04736328125, -0.894287109375, -0.7412109375, -0.588134765625, -0.43505859375, -0.281982421875, -0.12890625, 0.024169921875, 0.17724609375, 0.330322265625, 0.4833984375, 0.636474609375, 0.78955078125, 0.942626953125, 1.095703125, 1.248779296875, 1.40185546875, 1.554931640625, 1.7080078125, 1.861083984375, 2.01416015625, 2.167236328125, 2.3203125, 2.473388671875, 2.62646484375, 2.779541015625, 2.9326171875, 3.085693359375, 3.23876953125, 3.391845703125, 3.544921875, 3.697998046875, 3.85107421875, 4.004150390625, 4.1572265625, 4.310302734375, 4.46337890625, 4.616455078125, 4.76953125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 5.0, 10.0, 5.0, 14.0, 16.0, 30.0, 36.0, 64.0, 52.0, 123.0, 155.0, 244.0, 367.0, 508.0, 780.0, 1103.0, 1627.0, 2264.0, 3328.0, 4737.0, 7113.0, 10266.0, 14912.0, 22071.0, 32274.0, 46914.0, 68790.0, 99836.0, 138226.0, 1207036.0, 132681.0, 94694.0, 65478.0, 44722.0, 30901.0, 20450.0, 14143.0, 9734.0, 6507.0, 4705.0, 3126.0, 2243.0, 1530.0, 990.0, 706.0, 518.0, 327.0, 253.0, 184.0, 102.0, 68.0, 47.0, 56.0, 30.0, 14.0, 12.0, 8.0, 7.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-0.376220703125, -0.3638572692871094, -0.35149383544921875, -0.3391304016113281, -0.3267669677734375, -0.3144035339355469, -0.30204010009765625, -0.2896766662597656, -0.277313232421875, -0.2649497985839844, -0.25258636474609375, -0.24022293090820312, -0.2278594970703125, -0.21549606323242188, -0.20313262939453125, -0.19076919555664062, -0.17840576171875, -0.16604232788085938, -0.15367889404296875, -0.14131546020507812, -0.1289520263671875, -0.11658859252929688, -0.10422515869140625, -0.09186172485351562, -0.079498291015625, -0.06713485717773438, -0.05477142333984375, -0.042407989501953125, -0.0300445556640625, -0.017681121826171875, -0.00531768798828125, 0.007045745849609375, 0.0194091796875, 0.031772613525390625, 0.04413604736328125, 0.056499481201171875, 0.0688629150390625, 0.08122634887695312, 0.09358978271484375, 0.10595321655273438, 0.118316650390625, 0.13068008422851562, 0.14304351806640625, 0.15540695190429688, 0.1677703857421875, 0.18013381958007812, 0.19249725341796875, 0.20486068725585938, 0.21722412109375, 0.22958755493164062, 0.24195098876953125, 0.2543144226074219, 0.2666778564453125, 0.2790412902832031, 0.29140472412109375, 0.3037681579589844, 0.316131591796875, 0.3284950256347656, 0.34085845947265625, 0.3532218933105469, 0.3655853271484375, 0.3779487609863281, 0.39031219482421875, 0.4026756286621094, 0.4150390625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 3.0, 5.0, 10.0, 13.0, 18.0, 13.0, 13.0, 24.0, 28.0, 51.0, 32.0, 54.0, 72.0, 55.0, 67.0, 59.0, 61.0, 63.0, 56.0, 54.0, 47.0, 40.0, 29.0, 19.0, 26.0, 12.0, 18.0, 12.0, 6.0, 10.0, 8.0, 8.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0017175674438476562, -0.0016696304082870483, -0.0016216933727264404, -0.0015737563371658325, -0.0015258193016052246, -0.0014778822660446167, -0.0014299452304840088, -0.0013820081949234009, -0.001334071159362793, -0.001286134123802185, -0.0012381970882415771, -0.0011902600526809692, -0.0011423230171203613, -0.0010943859815597534, -0.0010464489459991455, -0.0009985119104385376, -0.0009505748748779297, -0.0009026378393173218, -0.0008547008037567139, -0.000806763768196106, -0.000758826732635498, -0.0007108896970748901, -0.0006629526615142822, -0.0006150156259536743, -0.0005670785903930664, -0.0005191415548324585, -0.0004712045192718506, -0.0004232674837112427, -0.00037533044815063477, -0.00032739341259002686, -0.00027945637702941895, -0.00023151934146881104, -0.00018358230590820312, -0.00013564527034759521, -8.77082347869873e-05, -3.9771199226379395e-05, 8.165836334228516e-06, 5.6102871894836426e-05, 0.00010403990745544434, 0.00015197694301605225, 0.00019991397857666016, 0.00024785101413726807, 0.000295788049697876, 0.0003437250852584839, 0.0003916621208190918, 0.0004395991563796997, 0.0004875361919403076, 0.0005354732275009155, 0.0005834102630615234, 0.0006313472986221313, 0.0006792843341827393, 0.0007272213697433472, 0.0007751584053039551, 0.000823095440864563, 0.0008710324764251709, 0.0009189695119857788, 0.0009669065475463867, 0.0010148435831069946, 0.0010627806186676025, 0.0011107176542282104, 0.0011586546897888184, 0.0012065917253494263, 0.0012545287609100342, 0.001302465796470642, 0.00135040283203125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 8.0, 4.0, 6.0, 13.0, 7.0, 20.0, 25.0, 27.0, 44.0, 55.0, 77.0, 94.0, 135.0, 264.0, 591.0, 1945.0, 909959.0, 133024.0, 1066.0, 441.0, 225.0, 148.0, 86.0, 84.0, 51.0, 42.0, 21.0, 18.0, 15.0, 11.0, 10.0, 11.0, 8.0, 3.0, 6.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028564453125, -0.027538299560546875, -0.02651214599609375, -0.025485992431640625, -0.0244598388671875, -0.023433685302734375, -0.02240753173828125, -0.021381378173828125, -0.020355224609375, -0.019329071044921875, -0.01830291748046875, -0.017276763916015625, -0.0162506103515625, -0.015224456787109375, -0.01419830322265625, -0.013172149658203125, -0.01214599609375, -0.011119842529296875, -0.01009368896484375, -0.009067535400390625, -0.0080413818359375, -0.007015228271484375, -0.00598907470703125, -0.004962921142578125, -0.003936767578125, -0.002910614013671875, -0.00188446044921875, -0.000858306884765625, 0.0001678466796875, 0.001194000244140625, 0.00222015380859375, 0.003246307373046875, 0.0042724609375, 0.005298614501953125, 0.00632476806640625, 0.007350921630859375, 0.0083770751953125, 0.009403228759765625, 0.01042938232421875, 0.011455535888671875, 0.012481689453125, 0.013507843017578125, 0.01453399658203125, 0.015560150146484375, 0.0165863037109375, 0.017612457275390625, 0.01863861083984375, 0.019664764404296875, 0.02069091796875, 0.021717071533203125, 0.02274322509765625, 0.023769378662109375, 0.0247955322265625, 0.025821685791015625, 0.02684783935546875, 0.027873992919921875, 0.028900146484375, 0.029926300048828125, 0.03095245361328125, 0.031978607177734375, 0.0330047607421875, 0.034030914306640625, 0.03505706787109375, 0.036083221435546875, 0.037109375]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 20.0, 100.0, 345.0, 366.0, 148.0, 27.0, 9.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005234075710177422, -0.0051317536272108555, -0.005029431078583002, -0.004927108995616436, -0.00482478691264987, -0.0047224643640220165, -0.0046201422810554504, -0.004517819732427597, -0.004415497649461031, -0.004313175566494465, -0.0042108530178666115, -0.004108530934900045, -0.004006208851933479, -0.003903886303305626, -0.00380156422033906, -0.00369924190454185, -0.0035969195887446404, -0.0034945972729474306, -0.0033922751899808645, -0.003289952874183655, -0.003187630558386445, -0.0030853082425892353, -0.0029829861596226692, -0.0028806638438254595, -0.0027783417608588934, -0.0026760194450616837, -0.0025736973620951176, -0.002471375046297908, -0.002369052730500698, -0.002266730647534132, -0.0021644083317369223, -0.0020620860159397125, -0.001959763700142503, -0.0018574415007606149, -0.0017551191849634051, -0.0016527969855815172, -0.0015504746697843075, -0.0014481524704024196, -0.0013458302710205317, -0.001243507955223322, -0.0011411856394261122, -0.0010388634400442243, -0.0009365411242470145, -0.0008342189248651266, -0.0007318966090679169, -0.000629574409686029, -0.0005272521520964801, -0.0004249298945069313, -0.0003226076951250434, -0.00022028543753549457, -0.00011796319449786097, -1.564095146022737e-05, 8.668130612932146e-05, 0.00018900353461503983, 0.00029132579220458865, 0.0003936480497941375, 0.0004959703073836863, 0.0005982925649732351, 0.000700614822562784, 0.0008029370801523328, 0.0009052592795342207, 0.0010075815953314304, 0.0011099037947133183, 0.0012122259940952063, 0.001314548309892416]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 5.0, 3.0, 2.0, 3.0, 15.0, 15.0, 20.0, 15.0, 17.0, 34.0, 25.0, 31.0, 31.0, 40.0, 40.0, 32.0, 62.0, 41.0, 48.0, 56.0, 52.0, 46.0, 32.0, 43.0, 30.0, 28.0, 43.0, 30.0, 31.0, 20.0, 23.0, 17.0, 14.0, 14.0, 11.0, 10.0, 8.0, 4.0, 7.0, 6.0, 6.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006322264671325684, -0.0006057368591427803, -0.0005792472511529922, -0.0005527576431632042, -0.0005262680351734161, -0.0004997784271836281, -0.00047328881919384, -0.00044679921120405197, -0.0004203096032142639, -0.00039381999522447586, -0.0003673303872346878, -0.00034084077924489975, -0.0003143511712551117, -0.00028786156326532364, -0.0002613719552755356, -0.00023488234728574753, -0.00020839273929595947, -0.00018190313130617142, -0.00015541352331638336, -0.0001289239153265953, -0.00010243430733680725, -7.59446993470192e-05, -4.945509135723114e-05, -2.2965483367443085e-05, 3.5241246223449707e-06, 3.0013732612133026e-05, 5.650334060192108e-05, 8.299294859170914e-05, 0.00010948255658149719, 0.00013597216457128525, 0.0001624617725610733, 0.00018895138055086136, 0.00021544098854064941, 0.00024193059653043747, 0.0002684202045202255, 0.0002949098125100136, 0.00032139942049980164, 0.0003478890284895897, 0.00037437863647937775, 0.0004008682444691658, 0.00042735785245895386, 0.0004538474604487419, 0.00048033706843852997, 0.000506826676428318, 0.0005333162844181061, 0.0005598058924078941, 0.0005862955003976822, 0.0006127851083874702, 0.0006392747163772583, 0.0006657643243670464, 0.0006922539323568344, 0.0007187435403466225, 0.0007452331483364105, 0.0007717227563261986, 0.0007982123643159866, 0.0008247019723057747, 0.0008511915802955627, 0.0008776811882853508, 0.0009041707962751389, 0.0009306604042649269, 0.000957150012254715, 0.000983639620244503, 0.001010129228234291, 0.0010366188362240791, 0.0010631084442138672]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 6.0, 7.0, 12.0, 9.0, 9.0, 9.0, 23.0, 20.0, 17.0, 21.0, 25.0, 29.0, 38.0, 41.0, 45.0, 34.0, 31.0, 47.0, 47.0, 34.0, 30.0, 50.0, 48.0, 32.0, 27.0, 32.0, 24.0, 23.0, 21.0, 26.0, 34.0, 24.0, 17.0, 25.0, 9.0, 10.0, 9.0, 8.0, 10.0, 6.0, 3.0, 4.0, 4.0, 7.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-7.5234375, -7.30572509765625, -7.0880126953125, -6.87030029296875, -6.652587890625, -6.43487548828125, -6.2171630859375, -5.99945068359375, -5.78173828125, -5.56402587890625, -5.3463134765625, -5.12860107421875, -4.910888671875, -4.69317626953125, -4.4754638671875, -4.25775146484375, -4.0400390625, -3.82232666015625, -3.6046142578125, -3.38690185546875, -3.169189453125, -2.95147705078125, -2.7337646484375, -2.51605224609375, -2.29833984375, -2.08062744140625, -1.8629150390625, -1.64520263671875, -1.427490234375, -1.20977783203125, -0.9920654296875, -0.77435302734375, -0.556640625, -0.33892822265625, -0.1212158203125, 0.09649658203125, 0.314208984375, 0.53192138671875, 0.7496337890625, 0.96734619140625, 1.18505859375, 1.40277099609375, 1.6204833984375, 1.83819580078125, 2.055908203125, 2.27362060546875, 2.4913330078125, 2.70904541015625, 2.9267578125, 3.14447021484375, 3.3621826171875, 3.57989501953125, 3.797607421875, 4.01531982421875, 4.2330322265625, 4.45074462890625, 4.66845703125, 4.88616943359375, 5.1038818359375, 5.32159423828125, 5.539306640625, 5.75701904296875, 5.9747314453125, 6.19244384765625, 6.41015625]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 10.0, 3.0, 5.0, 6.0, 6.0, 6.0, 14.0, 11.0, 14.0, 30.0, 39.0, 74.0, 85.0, 236.0, 402.0, 904.0, 2080.0, 4901.0, 11473.0, 26692.0, 66531.0, 188736.0, 380918.0, 228999.0, 80651.0, 31916.0, 13320.0, 5843.0, 2500.0, 1093.0, 488.0, 235.0, 118.0, 66.0, 35.0, 34.0, 22.0, 15.0, 10.0, 9.0, 9.0, 6.0, 6.0, 7.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.2109375, -8.9447021484375, -8.678466796875, -8.4122314453125, -8.14599609375, -7.8797607421875, -7.613525390625, -7.3472900390625, -7.0810546875, -6.8148193359375, -6.548583984375, -6.2823486328125, -6.01611328125, -5.7498779296875, -5.483642578125, -5.2174072265625, -4.951171875, -4.6849365234375, -4.418701171875, -4.1524658203125, -3.88623046875, -3.6199951171875, -3.353759765625, -3.0875244140625, -2.8212890625, -2.5550537109375, -2.288818359375, -2.0225830078125, -1.75634765625, -1.4901123046875, -1.223876953125, -0.9576416015625, -0.69140625, -0.4251708984375, -0.158935546875, 0.1072998046875, 0.37353515625, 0.6397705078125, 0.906005859375, 1.1722412109375, 1.4384765625, 1.7047119140625, 1.970947265625, 2.2371826171875, 2.50341796875, 2.7696533203125, 3.035888671875, 3.3021240234375, 3.568359375, 3.8345947265625, 4.100830078125, 4.3670654296875, 4.63330078125, 4.8995361328125, 5.165771484375, 5.4320068359375, 5.6982421875, 5.9644775390625, 6.230712890625, 6.4969482421875, 6.76318359375, 7.0294189453125, 7.295654296875, 7.5618896484375, 7.828125]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 2.0, 5.0, 2.0, 5.0, 12.0, 8.0, 11.0, 11.0, 8.0, 18.0, 36.0, 22.0, 39.0, 38.0, 49.0, 38.0, 55.0, 58.0, 92.0, 178.0, 1410.0, 345.0, 131.0, 85.0, 89.0, 47.0, 44.0, 24.0, 32.0, 22.0, 27.0, 22.0, 16.0, 16.0, 16.0, 7.0, 12.0, 9.0, 4.0, 5.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.984375, -22.131103515625, -21.27783203125, -20.424560546875, -19.5712890625, -18.718017578125, -17.86474609375, -17.011474609375, -16.158203125, -15.304931640625, -14.45166015625, -13.598388671875, -12.7451171875, -11.891845703125, -11.03857421875, -10.185302734375, -9.33203125, -8.478759765625, -7.62548828125, -6.772216796875, -5.9189453125, -5.065673828125, -4.21240234375, -3.359130859375, -2.505859375, -1.652587890625, -0.79931640625, 0.053955078125, 0.9072265625, 1.760498046875, 2.61376953125, 3.467041015625, 4.3203125, 5.173583984375, 6.02685546875, 6.880126953125, 7.7333984375, 8.586669921875, 9.43994140625, 10.293212890625, 11.146484375, 11.999755859375, 12.85302734375, 13.706298828125, 14.5595703125, 15.412841796875, 16.26611328125, 17.119384765625, 17.97265625, 18.825927734375, 19.67919921875, 20.532470703125, 21.3857421875, 22.239013671875, 23.09228515625, 23.945556640625, 24.798828125, 25.652099609375, 26.50537109375, 27.358642578125, 28.2119140625, 29.065185546875, 29.91845703125, 30.771728515625, 31.625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 9.0, 8.0, 17.0, 27.0, 36.0, 55.0, 73.0, 113.0, 181.0, 277.0, 560.0, 1531.0, 47292.0, 3074774.0, 18222.0, 1175.0, 514.0, 309.0, 167.0, 113.0, 68.0, 36.0, 32.0, 23.0, 17.0, 17.0, 15.0, 8.0, 5.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.3125, -58.1123046875, -55.912109375, -53.7119140625, -51.51171875, -49.3115234375, -47.111328125, -44.9111328125, -42.7109375, -40.5107421875, -38.310546875, -36.1103515625, -33.91015625, -31.7099609375, -29.509765625, -27.3095703125, -25.109375, -22.9091796875, -20.708984375, -18.5087890625, -16.30859375, -14.1083984375, -11.908203125, -9.7080078125, -7.5078125, -5.3076171875, -3.107421875, -0.9072265625, 1.29296875, 3.4931640625, 5.693359375, 7.8935546875, 10.09375, 12.2939453125, 14.494140625, 16.6943359375, 18.89453125, 21.0947265625, 23.294921875, 25.4951171875, 27.6953125, 29.8955078125, 32.095703125, 34.2958984375, 36.49609375, 38.6962890625, 40.896484375, 43.0966796875, 45.296875, 47.4970703125, 49.697265625, 51.8974609375, 54.09765625, 56.2978515625, 58.498046875, 60.6982421875, 62.8984375, 65.0986328125, 67.298828125, 69.4990234375, 71.69921875, 73.8994140625, 76.099609375, 78.2998046875, 80.5]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 34.0, 66.0, 139.0, 200.0, 207.0, 165.0, 94.0, 72.0, 21.0, 5.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.15580940246582, -18.501420974731445, -16.84703254699707, -15.192644119262695, -13.53825569152832, -11.883867263793945, -10.22947883605957, -8.575090408325195, -6.92070198059082, -5.266313552856445, -3.6119251251220703, -1.9575366973876953, -0.3031482696533203, 1.3512401580810547, 3.0056285858154297, 4.660017013549805, 6.31440544128418, 7.968793869018555, 9.62318229675293, 11.277570724487305, 12.93195915222168, 14.586347579956055, 16.24073600769043, 17.895124435424805, 19.54951286315918, 21.203901290893555, 22.85828971862793, 24.512678146362305, 26.16706657409668, 27.821455001831055, 29.47584342956543, 31.130231857299805, 32.78462219238281, 34.43901062011719, 36.09339904785156, 37.74778747558594, 39.40217590332031, 41.05656433105469, 42.71095275878906, 44.36534118652344, 46.01972961425781, 47.67411804199219, 49.32850646972656, 50.98289489746094, 52.63728332519531, 54.29167175292969, 55.94606018066406, 57.60044860839844, 59.25483703613281, 60.90922546386719, 62.56361389160156, 64.21800231933594, 65.87239074707031, 67.52677917480469, 69.18116760253906, 70.83555603027344, 72.48994445800781, 74.14433288574219, 75.79872131347656, 77.45310974121094, 79.10749816894531, 80.76188659667969, 82.41627502441406, 84.07066345214844, 85.72505187988281]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 2.0, 6.0, 8.0, 10.0, 5.0, 7.0, 20.0, 26.0, 8.0, 15.0, 27.0, 24.0, 28.0, 41.0, 35.0, 35.0, 42.0, 38.0, 45.0, 43.0, 42.0, 38.0, 36.0, 40.0, 56.0, 36.0, 34.0, 37.0, 35.0, 26.0, 24.0, 30.0, 10.0, 17.0, 13.0, 13.0, 11.0, 13.0, 9.0, 5.0, 6.0, 4.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-54.48859786987305, -52.599021911621094, -50.70944595336914, -48.81986999511719, -46.93029022216797, -45.04071807861328, -43.15113830566406, -41.26156234741211, -39.371986389160156, -37.4824104309082, -35.59283447265625, -33.7032585144043, -31.81368064880371, -29.924104690551758, -28.034526824951172, -26.14495086669922, -24.255374908447266, -22.365798950195312, -20.47622299194336, -18.586645126342773, -16.69706916809082, -14.807493209838867, -12.917916297912598, -11.028339385986328, -9.138763427734375, -7.249186992645264, -5.359610557556152, -3.470034122467041, -1.5804576873779297, 0.30911827087402344, 2.198695182800293, 4.0882720947265625, 5.97784423828125, 7.867420673370361, 9.756997108459473, 11.646574020385742, 13.536149978637695, 15.425725936889648, 17.315303802490234, 19.204879760742188, 21.09445571899414, 22.984031677246094, 24.873607635498047, 26.763185501098633, 28.652761459350586, 30.54233741760254, 32.431915283203125, 34.32149124145508, 36.21106719970703, 38.100643157958984, 39.99021911621094, 41.87979507446289, 43.769371032714844, 45.65895080566406, 47.548526763916016, 49.43810272216797, 51.32767868041992, 53.217254638671875, 55.10683059692383, 56.99640655517578, 58.885986328125, 60.77555847167969, 62.665138244628906, 64.55471801757812, 66.44429016113281]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 3.0, 4.0, 1.0, 4.0, 4.0, 7.0, 2.0, 7.0, 4.0, 6.0, 15.0, 12.0, 14.0, 18.0, 9.0, 19.0, 17.0, 21.0, 29.0, 35.0, 26.0, 33.0, 32.0, 47.0, 39.0, 38.0, 45.0, 38.0, 44.0, 29.0, 38.0, 42.0, 37.0, 21.0, 32.0, 24.0, 30.0, 30.0, 17.0, 18.0, 16.0, 16.0, 15.0, 14.0, 11.0, 10.0, 8.0, 7.0, 8.0, 6.0, 3.0, 5.0, 2.0, 0.0, 2.0, 2.0, 1.0], "bins": [-7.6640625, -7.4420166015625, -7.219970703125, -6.9979248046875, -6.77587890625, -6.5538330078125, -6.331787109375, -6.1097412109375, -5.8876953125, -5.6656494140625, -5.443603515625, -5.2215576171875, -4.99951171875, -4.7774658203125, -4.555419921875, -4.3333740234375, -4.111328125, -3.8892822265625, -3.667236328125, -3.4451904296875, -3.22314453125, -3.0010986328125, -2.779052734375, -2.5570068359375, -2.3349609375, -2.1129150390625, -1.890869140625, -1.6688232421875, -1.44677734375, -1.2247314453125, -1.002685546875, -0.7806396484375, -0.55859375, -0.3365478515625, -0.114501953125, 0.1075439453125, 0.32958984375, 0.5516357421875, 0.773681640625, 0.9957275390625, 1.2177734375, 1.4398193359375, 1.661865234375, 1.8839111328125, 2.10595703125, 2.3280029296875, 2.550048828125, 2.7720947265625, 2.994140625, 3.2161865234375, 3.438232421875, 3.6602783203125, 3.88232421875, 4.1043701171875, 4.326416015625, 4.5484619140625, 4.7705078125, 4.9925537109375, 5.214599609375, 5.4366455078125, 5.65869140625, 5.8807373046875, 6.102783203125, 6.3248291015625, 6.546875]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 4.0, 3.0, 6.0, 8.0, 4.0, 5.0, 9.0, 8.0, 7.0, 23.0, 18.0, 25.0, 22.0, 26.0, 36.0, 42.0, 70.0, 70.0, 124.0, 165.0, 501.0, 2334.0, 26609.0, 491150.0, 2798075.0, 821885.0, 48049.0, 3639.0, 629.0, 208.0, 119.0, 71.0, 60.0, 47.0, 45.0, 38.0, 22.0, 17.0, 20.0, 11.0, 15.0, 10.0, 16.0, 10.0, 7.0, 9.0, 8.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-20.1875, -19.60400390625, -19.0205078125, -18.43701171875, -17.853515625, -17.27001953125, -16.6865234375, -16.10302734375, -15.51953125, -14.93603515625, -14.3525390625, -13.76904296875, -13.185546875, -12.60205078125, -12.0185546875, -11.43505859375, -10.8515625, -10.26806640625, -9.6845703125, -9.10107421875, -8.517578125, -7.93408203125, -7.3505859375, -6.76708984375, -6.18359375, -5.60009765625, -5.0166015625, -4.43310546875, -3.849609375, -3.26611328125, -2.6826171875, -2.09912109375, -1.515625, -0.93212890625, -0.3486328125, 0.23486328125, 0.818359375, 1.40185546875, 1.9853515625, 2.56884765625, 3.15234375, 3.73583984375, 4.3193359375, 4.90283203125, 5.486328125, 6.06982421875, 6.6533203125, 7.23681640625, 7.8203125, 8.40380859375, 8.9873046875, 9.57080078125, 10.154296875, 10.73779296875, 11.3212890625, 11.90478515625, 12.48828125, 13.07177734375, 13.6552734375, 14.23876953125, 14.822265625, 15.40576171875, 15.9892578125, 16.57275390625, 17.15625]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 5.0, 5.0, 16.0, 14.0, 16.0, 12.0, 31.0, 44.0, 59.0, 77.0, 84.0, 121.0, 207.0, 240.0, 357.0, 413.0, 423.0, 432.0, 398.0, 279.0, 218.0, 174.0, 118.0, 93.0, 48.0, 41.0, 44.0, 33.0, 25.0, 10.0, 9.0, 8.0, 6.0, 6.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.734375, -12.334716796875, -11.93505859375, -11.535400390625, -11.1357421875, -10.736083984375, -10.33642578125, -9.936767578125, -9.537109375, -9.137451171875, -8.73779296875, -8.338134765625, -7.9384765625, -7.538818359375, -7.13916015625, -6.739501953125, -6.33984375, -5.940185546875, -5.54052734375, -5.140869140625, -4.7412109375, -4.341552734375, -3.94189453125, -3.542236328125, -3.142578125, -2.742919921875, -2.34326171875, -1.943603515625, -1.5439453125, -1.144287109375, -0.74462890625, -0.344970703125, 0.0546875, 0.454345703125, 0.85400390625, 1.253662109375, 1.6533203125, 2.052978515625, 2.45263671875, 2.852294921875, 3.251953125, 3.651611328125, 4.05126953125, 4.450927734375, 4.8505859375, 5.250244140625, 5.64990234375, 6.049560546875, 6.44921875, 6.848876953125, 7.24853515625, 7.648193359375, 8.0478515625, 8.447509765625, 8.84716796875, 9.246826171875, 9.646484375, 10.046142578125, 10.44580078125, 10.845458984375, 11.2451171875, 11.644775390625, 12.04443359375, 12.444091796875, 12.84375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 6.0, 3.0, 5.0, 16.0, 10.0, 28.0, 34.0, 35.0, 59.0, 84.0, 115.0, 139.0, 216.0, 340.0, 586.0, 1554.0, 23994.0, 1886895.0, 2248074.0, 28478.0, 1857.0, 564.0, 389.0, 232.0, 143.0, 122.0, 90.0, 68.0, 41.0, 23.0, 29.0, 15.0, 13.0, 13.0, 4.0, 4.0, 4.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-42.1875, -40.8701171875, -39.552734375, -38.2353515625, -36.91796875, -35.6005859375, -34.283203125, -32.9658203125, -31.6484375, -30.3310546875, -29.013671875, -27.6962890625, -26.37890625, -25.0615234375, -23.744140625, -22.4267578125, -21.109375, -19.7919921875, -18.474609375, -17.1572265625, -15.83984375, -14.5224609375, -13.205078125, -11.8876953125, -10.5703125, -9.2529296875, -7.935546875, -6.6181640625, -5.30078125, -3.9833984375, -2.666015625, -1.3486328125, -0.03125, 1.2861328125, 2.603515625, 3.9208984375, 5.23828125, 6.5556640625, 7.873046875, 9.1904296875, 10.5078125, 11.8251953125, 13.142578125, 14.4599609375, 15.77734375, 17.0947265625, 18.412109375, 19.7294921875, 21.046875, 22.3642578125, 23.681640625, 24.9990234375, 26.31640625, 27.6337890625, 28.951171875, 30.2685546875, 31.5859375, 32.9033203125, 34.220703125, 35.5380859375, 36.85546875, 38.1728515625, 39.490234375, 40.8076171875, 42.125]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [4.0, 44.0, 311.0, 473.0, 172.0, 13.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.403104782104492, -17.038330078125, -9.673555374145508, -2.3087806701660156, 5.055994033813477, 12.420770645141602, 19.78554344177246, 27.15031623840332, 34.51509094238281, 41.87986755371094, 49.2446403503418, 56.609413146972656, 63.97418975830078, 71.3389663696289, 78.7037353515625, 86.06851196289062, 93.43328857421875, 100.79806518554688, 108.162841796875, 115.5276107788086, 122.89238739013672, 130.25717163085938, 137.62193298339844, 144.98670959472656, 152.3514862060547, 159.7162628173828, 167.08103942871094, 174.44581604003906, 181.81057739257812, 189.17535400390625, 196.54013061523438, 203.9049072265625, 211.26968383789062, 218.63446044921875, 225.99923706054688, 233.364013671875, 240.72879028320312, 248.09356689453125, 255.4583282470703, 262.8231201171875, 270.1878662109375, 277.5526428222656, 284.91741943359375, 292.2821960449219, 299.64697265625, 307.0117492675781, 314.37652587890625, 321.74127197265625, 329.1060791015625, 336.4708557128906, 343.83563232421875, 351.2004089355469, 358.565185546875, 365.9299621582031, 373.29473876953125, 380.65948486328125, 388.0242919921875, 395.3890686035156, 402.75384521484375, 410.1186218261719, 417.4833984375, 424.8481750488281, 432.21295166015625, 439.57769775390625, 446.9424743652344]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 13.0, 13.0, 21.0, 21.0, 21.0, 22.0, 28.0, 30.0, 43.0, 45.0, 30.0, 40.0, 54.0, 34.0, 51.0, 41.0, 42.0, 41.0, 26.0, 32.0, 42.0, 39.0, 26.0, 41.0, 31.0, 23.0, 15.0, 13.0, 15.0, 19.0, 13.0, 11.0, 7.0, 4.0, 8.0, 3.0, 7.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.55632019042969, -40.210723876953125, -38.86512756347656, -37.51953125, -36.17393493652344, -34.828338623046875, -33.48274230957031, -32.137149810791016, -30.791553497314453, -29.44595718383789, -28.100360870361328, -26.754764556884766, -25.409170150756836, -24.063573837280273, -22.71797752380371, -21.37238311767578, -20.026784896850586, -18.681188583374023, -17.33559226989746, -15.989996910095215, -14.644401550292969, -13.298805236816406, -11.953208923339844, -10.607613563537598, -9.262017250061035, -7.916421413421631, -6.570825576782227, -5.225229263305664, -3.8796334266662598, -2.5340375900268555, -1.188441276550293, 0.15715408325195312, 1.5027503967285156, 2.84834623336792, 4.193942070007324, 5.539538383483887, 6.885134220123291, 8.230730056762695, 9.576326370239258, 10.921921730041504, 12.267518043518066, 13.613114356994629, 14.958709716796875, 16.304306030273438, 17.64990234375, 18.995498657226562, 20.341094970703125, 21.686689376831055, 23.032285690307617, 24.37788200378418, 25.723478317260742, 27.069072723388672, 28.414669036865234, 29.760265350341797, 31.10586166381836, 32.45145797729492, 33.797054290771484, 35.14265060424805, 36.48824691772461, 37.83384323120117, 39.179439544677734, 40.52503204345703, 41.870628356933594, 43.216224670410156, 44.56182098388672]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 1.0, 4.0, 5.0, 8.0, 6.0, 7.0, 13.0, 6.0, 14.0, 14.0, 12.0, 14.0, 22.0, 19.0, 25.0, 36.0, 31.0, 32.0, 39.0, 35.0, 30.0, 32.0, 36.0, 42.0, 29.0, 42.0, 43.0, 37.0, 39.0, 35.0, 36.0, 35.0, 24.0, 28.0, 27.0, 21.0, 15.0, 23.0, 14.0, 15.0, 10.0, 9.0, 5.0, 5.0, 12.0, 14.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.10546875, -6.88580322265625, -6.6661376953125, -6.44647216796875, -6.226806640625, -6.00714111328125, -5.7874755859375, -5.56781005859375, -5.34814453125, -5.12847900390625, -4.9088134765625, -4.68914794921875, -4.469482421875, -4.24981689453125, -4.0301513671875, -3.81048583984375, -3.5908203125, -3.37115478515625, -3.1514892578125, -2.93182373046875, -2.712158203125, -2.49249267578125, -2.2728271484375, -2.05316162109375, -1.83349609375, -1.61383056640625, -1.3941650390625, -1.17449951171875, -0.954833984375, -0.73516845703125, -0.5155029296875, -0.29583740234375, -0.076171875, 0.14349365234375, 0.3631591796875, 0.58282470703125, 0.802490234375, 1.02215576171875, 1.2418212890625, 1.46148681640625, 1.68115234375, 1.90081787109375, 2.1204833984375, 2.34014892578125, 2.559814453125, 2.77947998046875, 2.9991455078125, 3.21881103515625, 3.4384765625, 3.65814208984375, 3.8778076171875, 4.09747314453125, 4.317138671875, 4.53680419921875, 4.7564697265625, 4.97613525390625, 5.19580078125, 5.41546630859375, 5.6351318359375, 5.85479736328125, 6.074462890625, 6.29412841796875, 6.5137939453125, 6.73345947265625, 6.953125]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 4.0, 10.0, 8.0, 7.0, 18.0, 23.0, 52.0, 67.0, 97.0, 126.0, 201.0, 254.0, 413.0, 655.0, 925.0, 1310.0, 1958.0, 2895.0, 4426.0, 6730.0, 10317.0, 16351.0, 25792.0, 41047.0, 67624.0, 108619.0, 166660.0, 195391.0, 146904.0, 93714.0, 57747.0, 35526.0, 22121.0, 14065.0, 8879.0, 5860.0, 3892.0, 2543.0, 1745.0, 1153.0, 805.0, 500.0, 367.0, 231.0, 165.0, 127.0, 72.0, 56.0, 42.0, 24.0, 16.0, 14.0, 7.0, 7.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.5439453125, -0.5265884399414062, -0.5092315673828125, -0.49187469482421875, -0.474517822265625, -0.45716094970703125, -0.4398040771484375, -0.42244720458984375, -0.40509033203125, -0.38773345947265625, -0.3703765869140625, -0.35301971435546875, -0.335662841796875, -0.31830596923828125, -0.3009490966796875, -0.28359222412109375, -0.2662353515625, -0.24887847900390625, -0.2315216064453125, -0.21416473388671875, -0.196807861328125, -0.17945098876953125, -0.1620941162109375, -0.14473724365234375, -0.12738037109375, -0.11002349853515625, -0.0926666259765625, -0.07530975341796875, -0.057952880859375, -0.04059600830078125, -0.0232391357421875, -0.00588226318359375, 0.011474609375, 0.02883148193359375, 0.0461883544921875, 0.06354522705078125, 0.080902099609375, 0.09825897216796875, 0.1156158447265625, 0.13297271728515625, 0.15032958984375, 0.16768646240234375, 0.1850433349609375, 0.20240020751953125, 0.219757080078125, 0.23711395263671875, 0.2544708251953125, 0.27182769775390625, 0.2891845703125, 0.30654144287109375, 0.3238983154296875, 0.34125518798828125, 0.358612060546875, 0.37596893310546875, 0.3933258056640625, 0.41068267822265625, 0.42803955078125, 0.44539642333984375, 0.4627532958984375, 0.48011016845703125, 0.497467041015625, 0.5148239135742188, 0.5321807861328125, 0.5495376586914062, 0.56689453125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 1.0, 4.0, 5.0, 12.0, 10.0, 13.0, 9.0, 15.0, 13.0, 25.0, 19.0, 26.0, 22.0, 25.0, 38.0, 33.0, 48.0, 28.0, 31.0, 46.0, 42.0, 55.0, 1068.0, 44.0, 41.0, 37.0, 42.0, 30.0, 33.0, 26.0, 24.0, 20.0, 27.0, 25.0, 15.0, 13.0, 10.0, 7.0, 9.0, 9.0, 5.0, 6.0, 6.0, 1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 3.0], "bins": [-5.44140625, -5.2904052734375, -5.139404296875, -4.9884033203125, -4.83740234375, -4.6864013671875, -4.535400390625, -4.3843994140625, -4.2333984375, -4.0823974609375, -3.931396484375, -3.7803955078125, -3.62939453125, -3.4783935546875, -3.327392578125, -3.1763916015625, -3.025390625, -2.8743896484375, -2.723388671875, -2.5723876953125, -2.42138671875, -2.2703857421875, -2.119384765625, -1.9683837890625, -1.8173828125, -1.6663818359375, -1.515380859375, -1.3643798828125, -1.21337890625, -1.0623779296875, -0.911376953125, -0.7603759765625, -0.609375, -0.4583740234375, -0.307373046875, -0.1563720703125, -0.00537109375, 0.1456298828125, 0.296630859375, 0.4476318359375, 0.5986328125, 0.7496337890625, 0.900634765625, 1.0516357421875, 1.20263671875, 1.3536376953125, 1.504638671875, 1.6556396484375, 1.806640625, 1.9576416015625, 2.108642578125, 2.2596435546875, 2.41064453125, 2.5616455078125, 2.712646484375, 2.8636474609375, 3.0146484375, 3.1656494140625, 3.316650390625, 3.4676513671875, 3.61865234375, 3.7696533203125, 3.920654296875, 4.0716552734375, 4.22265625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 0.0, 6.0, 4.0, 20.0, 25.0, 44.0, 44.0, 98.0, 139.0, 245.0, 381.0, 661.0, 1042.0, 1657.0, 2786.0, 4407.0, 7318.0, 11551.0, 18437.0, 30564.0, 50245.0, 83272.0, 134693.0, 1226085.0, 198172.0, 126636.0, 77621.0, 46589.0, 28461.0, 17432.0, 10779.0, 6777.0, 4170.0, 2568.0, 1611.0, 1052.0, 601.0, 383.0, 219.0, 107.0, 90.0, 56.0, 33.0, 26.0, 15.0, 7.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.5361328125, -0.5198516845703125, -0.503570556640625, -0.4872894287109375, -0.47100830078125, -0.4547271728515625, -0.438446044921875, -0.4221649169921875, -0.4058837890625, -0.3896026611328125, -0.373321533203125, -0.3570404052734375, -0.34075927734375, -0.3244781494140625, -0.308197021484375, -0.2919158935546875, -0.275634765625, -0.2593536376953125, -0.243072509765625, -0.2267913818359375, -0.21051025390625, -0.1942291259765625, -0.177947998046875, -0.1616668701171875, -0.1453857421875, -0.1291046142578125, -0.112823486328125, -0.0965423583984375, -0.08026123046875, -0.0639801025390625, -0.047698974609375, -0.0314178466796875, -0.01513671875, 0.0011444091796875, 0.017425537109375, 0.0337066650390625, 0.04998779296875, 0.0662689208984375, 0.082550048828125, 0.0988311767578125, 0.1151123046875, 0.1313934326171875, 0.147674560546875, 0.1639556884765625, 0.18023681640625, 0.1965179443359375, 0.212799072265625, 0.2290802001953125, 0.245361328125, 0.2616424560546875, 0.277923583984375, 0.2942047119140625, 0.31048583984375, 0.3267669677734375, 0.343048095703125, 0.3593292236328125, 0.3756103515625, 0.3918914794921875, 0.408172607421875, 0.4244537353515625, 0.44073486328125, 0.4570159912109375, 0.473297119140625, 0.4895782470703125, 0.505859375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 17.0, 19.0, 18.0, 28.0, 27.0, 32.0, 56.0, 75.0, 78.0, 93.0, 89.0, 87.0, 78.0, 80.0, 58.0, 34.0, 33.0, 20.0, 23.0, 13.0, 10.0, 12.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002216339111328125, -0.002132326364517212, -0.002048313617706299, -0.0019643008708953857, -0.0018802881240844727, -0.0017962753772735596, -0.0017122626304626465, -0.0016282498836517334, -0.0015442371368408203, -0.0014602243900299072, -0.0013762116432189941, -0.001292198896408081, -0.001208186149597168, -0.0011241734027862549, -0.0010401606559753418, -0.0009561479091644287, -0.0008721351623535156, -0.0007881224155426025, -0.0007041096687316895, -0.0006200969219207764, -0.0005360841751098633, -0.0004520714282989502, -0.0003680586814880371, -0.000284045934677124, -0.00020003318786621094, -0.00011602044105529785, -3.2007694244384766e-05, 5.200505256652832e-05, 0.0001360177993774414, 0.0002200305461883545, 0.0003040432929992676, 0.00038805603981018066, 0.00047206878662109375, 0.0005560815334320068, 0.0006400942802429199, 0.000724107027053833, 0.0008081197738647461, 0.0008921325206756592, 0.0009761452674865723, 0.0010601580142974854, 0.0011441707611083984, 0.0012281835079193115, 0.0013121962547302246, 0.0013962090015411377, 0.0014802217483520508, 0.0015642344951629639, 0.001648247241973877, 0.00173225998878479, 0.0018162727355957031, 0.0019002854824066162, 0.0019842982292175293, 0.0020683109760284424, 0.0021523237228393555, 0.0022363364696502686, 0.0023203492164611816, 0.0024043619632720947, 0.002488374710083008, 0.002572387456893921, 0.002656400203704834, 0.002740412950515747, 0.00282442569732666, 0.0029084384441375732, 0.0029924511909484863, 0.0030764639377593994, 0.0031604766845703125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.0, 1.0, 2.0, 4.0, 5.0, 4.0, 5.0, 15.0, 15.0, 15.0, 37.0, 41.0, 62.0, 95.0, 148.0, 248.0, 556.0, 2601.0, 1040640.0, 2823.0, 572.0, 238.0, 147.0, 66.0, 62.0, 50.0, 44.0, 23.0, 15.0, 6.0, 5.0, 4.0, 3.0, 4.0, 0.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0648193359375, -0.06309127807617188, -0.06136322021484375, -0.059635162353515625, -0.0579071044921875, -0.056179046630859375, -0.05445098876953125, -0.052722930908203125, -0.050994873046875, -0.049266815185546875, -0.04753875732421875, -0.045810699462890625, -0.0440826416015625, -0.042354583740234375, -0.04062652587890625, -0.038898468017578125, -0.03717041015625, -0.035442352294921875, -0.03371429443359375, -0.031986236572265625, -0.0302581787109375, -0.028530120849609375, -0.02680206298828125, -0.025074005126953125, -0.023345947265625, -0.021617889404296875, -0.01988983154296875, -0.018161773681640625, -0.0164337158203125, -0.014705657958984375, -0.01297760009765625, -0.011249542236328125, -0.009521484375, -0.007793426513671875, -0.00606536865234375, -0.004337310791015625, -0.0026092529296875, -0.000881195068359375, 0.00084686279296875, 0.002574920654296875, 0.004302978515625, 0.006031036376953125, 0.00775909423828125, 0.009487152099609375, 0.0112152099609375, 0.012943267822265625, 0.01467132568359375, 0.016399383544921875, 0.01812744140625, 0.019855499267578125, 0.02158355712890625, 0.023311614990234375, 0.0250396728515625, 0.026767730712890625, 0.02849578857421875, 0.030223846435546875, 0.031951904296875, 0.033679962158203125, 0.03540802001953125, 0.037136077880859375, 0.0388641357421875, 0.040592193603515625, 0.04232025146484375, 0.044048309326171875, 0.0457763671875]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 18.0, 159.0, 428.0, 350.0, 48.0, 12.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0007464042864739895, -0.0006164053920656443, -0.00048640643944963813, -0.00035640751593746245, -0.00022640859242528677, -9.640969801694155e-05, 3.358925459906459e-05, 0.00016358820721507072, 0.00029358710162341595, 0.0004235860251355916, 0.0005535849486477673, 0.0006835839012637734, 0.0008135827956721187, 0.0009435816900804639, 0.001073580700904131, 0.0012035795953124762, 0.0013335784897208214, 0.0014635773841291666, 0.0015935762785375118, 0.0017235752893611789, 0.001853574183769524, 0.001983573194593191, 0.0021135720890015364, 0.0022435709834098816, 0.002373569877818227, 0.002503568772226572, 0.0026335676666349173, 0.0027635665610432625, 0.0028935656882822514, 0.003023564349859953, 0.003153563477098942, 0.003283562371507287, 0.0034135612659156322, 0.0035435601603239775, 0.0036735590547323227, 0.003803557949140668, 0.003933556843549013, 0.004063555970788002, 0.004193554632365704, 0.0043235537596046925, 0.004453552886843681, 0.00458355201408267, 0.004713550675660372, 0.004843549802899361, 0.004973548464477062, 0.005103547591716051, 0.005233546253293753, 0.0053635453805327415, 0.005493544042110443, 0.005623543169349432, 0.0057535418309271336, 0.0058835409581661224, 0.006013539619743824, 0.006143538746982813, 0.0062735374085605145, 0.006403536535799503, 0.006533535197377205, 0.006663534324616194, 0.006793532986193895, 0.006923532113432884, 0.007053530775010586, 0.007183529902249575, 0.007313528563827276, 0.007443527691066265, 0.007573526818305254]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 4.0, 1.0, 6.0, 5.0, 12.0, 9.0, 9.0, 7.0, 16.0, 16.0, 29.0, 18.0, 22.0, 26.0, 34.0, 28.0, 23.0, 38.0, 36.0, 43.0, 43.0, 42.0, 40.0, 35.0, 44.0, 41.0, 30.0, 41.0, 29.0, 33.0, 37.0, 28.0, 35.0, 25.0, 16.0, 16.0, 18.0, 10.0, 16.0, 11.0, 5.0, 9.0, 7.0, 9.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009061694145202637, -0.0008759219199419022, -0.0008456744253635406, -0.0008154269307851791, -0.0007851794362068176, -0.0007549319416284561, -0.0007246844470500946, -0.0006944369524717331, -0.0006641894578933716, -0.0006339419633150101, -0.0006036944687366486, -0.000573446974158287, -0.0005431994795799255, -0.000512951985001564, -0.0004827044904232025, -0.000452456995844841, -0.0004222095012664795, -0.000391962006688118, -0.00036171451210975647, -0.00033146701753139496, -0.00030121952295303345, -0.00027097202837467194, -0.00024072453379631042, -0.00021047703921794891, -0.0001802295446395874, -0.0001499820500612259, -0.00011973455548286438, -8.948706090450287e-05, -5.923956632614136e-05, -2.8992071747779846e-05, 1.255422830581665e-06, 3.1502917408943176e-05, 6.175041198730469e-05, 9.19979065656662e-05, 0.0001222454011440277, 0.00015249289572238922, 0.00018274039030075073, 0.00021298788487911224, 0.00024323537945747375, 0.00027348287403583527, 0.0003037303686141968, 0.0003339778631925583, 0.0003642253577709198, 0.0003944728523492813, 0.0004247203469276428, 0.00045496784150600433, 0.00048521533608436584, 0.0005154628306627274, 0.0005457103252410889, 0.0005759578198194504, 0.0006062053143978119, 0.0006364528089761734, 0.0006667003035545349, 0.0006969477981328964, 0.0007271952927112579, 0.0007574427872896194, 0.000787690281867981, 0.0008179377764463425, 0.000848185271024704, 0.0008784327656030655, 0.000908680260181427, 0.0009389277547597885, 0.00096917524933815, 0.0009994227439165115, 0.001029670238494873]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 1.0, 4.0, 5.0, 8.0, 6.0, 7.0, 13.0, 6.0, 14.0, 14.0, 12.0, 14.0, 22.0, 19.0, 25.0, 36.0, 30.0, 33.0, 39.0, 35.0, 30.0, 32.0, 37.0, 41.0, 29.0, 42.0, 43.0, 37.0, 39.0, 35.0, 36.0, 35.0, 24.0, 28.0, 27.0, 21.0, 15.0, 23.0, 14.0, 15.0, 10.0, 9.0, 5.0, 5.0, 12.0, 14.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.10546875, -6.88580322265625, -6.6661376953125, -6.44647216796875, -6.226806640625, -6.00714111328125, -5.7874755859375, -5.56781005859375, -5.34814453125, -5.12847900390625, -4.9088134765625, -4.68914794921875, -4.469482421875, -4.24981689453125, -4.0301513671875, -3.81048583984375, -3.5908203125, -3.37115478515625, -3.1514892578125, -2.93182373046875, -2.712158203125, -2.49249267578125, -2.2728271484375, -2.05316162109375, -1.83349609375, -1.61383056640625, -1.3941650390625, -1.17449951171875, -0.954833984375, -0.73516845703125, -0.5155029296875, -0.29583740234375, -0.076171875, 0.14349365234375, 0.3631591796875, 0.58282470703125, 0.802490234375, 1.02215576171875, 1.2418212890625, 1.46148681640625, 1.68115234375, 1.90081787109375, 2.1204833984375, 2.34014892578125, 2.559814453125, 2.77947998046875, 2.9991455078125, 3.21881103515625, 3.4384765625, 3.65814208984375, 3.8778076171875, 4.09747314453125, 4.317138671875, 4.53680419921875, 4.7564697265625, 4.97613525390625, 5.19580078125, 5.41546630859375, 5.6351318359375, 5.85479736328125, 6.074462890625, 6.29412841796875, 6.5137939453125, 6.73345947265625, 6.953125]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 1.0, 2.0, 6.0, 9.0, 4.0, 8.0, 14.0, 16.0, 19.0, 26.0, 38.0, 45.0, 58.0, 54.0, 99.0, 107.0, 137.0, 228.0, 376.0, 769.0, 2184.0, 7234.0, 25169.0, 105291.0, 489146.0, 326764.0, 65662.0, 16896.0, 4921.0, 1544.0, 583.0, 335.0, 209.0, 139.0, 98.0, 89.0, 61.0, 46.0, 42.0, 30.0, 20.0, 21.0, 19.0, 11.0, 10.0, 7.0, 6.0, 3.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.421875, -12.0191650390625, -11.616455078125, -11.2137451171875, -10.81103515625, -10.4083251953125, -10.005615234375, -9.6029052734375, -9.2001953125, -8.7974853515625, -8.394775390625, -7.9920654296875, -7.58935546875, -7.1866455078125, -6.783935546875, -6.3812255859375, -5.978515625, -5.5758056640625, -5.173095703125, -4.7703857421875, -4.36767578125, -3.9649658203125, -3.562255859375, -3.1595458984375, -2.7568359375, -2.3541259765625, -1.951416015625, -1.5487060546875, -1.14599609375, -0.7432861328125, -0.340576171875, 0.0621337890625, 0.46484375, 0.8675537109375, 1.270263671875, 1.6729736328125, 2.07568359375, 2.4783935546875, 2.881103515625, 3.2838134765625, 3.6865234375, 4.0892333984375, 4.491943359375, 4.8946533203125, 5.29736328125, 5.7000732421875, 6.102783203125, 6.5054931640625, 6.908203125, 7.3109130859375, 7.713623046875, 8.1163330078125, 8.51904296875, 8.9217529296875, 9.324462890625, 9.7271728515625, 10.1298828125, 10.5325927734375, 10.935302734375, 11.3380126953125, 11.74072265625, 12.1434326171875, 12.546142578125, 12.9488525390625, 13.3515625]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 8.0, 3.0, 2.0, 7.0, 5.0, 8.0, 7.0, 11.0, 14.0, 19.0, 21.0, 22.0, 16.0, 25.0, 34.0, 21.0, 28.0, 35.0, 48.0, 49.0, 70.0, 97.0, 254.0, 1449.0, 242.0, 114.0, 51.0, 52.0, 47.0, 35.0, 33.0, 30.0, 20.0, 25.0, 28.0, 23.0, 12.0, 21.0, 16.0, 11.0, 9.0, 8.0, 4.0, 4.0, 9.0, 4.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0], "bins": [-23.78125, -23.072998046875, -22.36474609375, -21.656494140625, -20.9482421875, -20.239990234375, -19.53173828125, -18.823486328125, -18.115234375, -17.406982421875, -16.69873046875, -15.990478515625, -15.2822265625, -14.573974609375, -13.86572265625, -13.157470703125, -12.44921875, -11.740966796875, -11.03271484375, -10.324462890625, -9.6162109375, -8.907958984375, -8.19970703125, -7.491455078125, -6.783203125, -6.074951171875, -5.36669921875, -4.658447265625, -3.9501953125, -3.241943359375, -2.53369140625, -1.825439453125, -1.1171875, -0.408935546875, 0.29931640625, 1.007568359375, 1.7158203125, 2.424072265625, 3.13232421875, 3.840576171875, 4.548828125, 5.257080078125, 5.96533203125, 6.673583984375, 7.3818359375, 8.090087890625, 8.79833984375, 9.506591796875, 10.21484375, 10.923095703125, 11.63134765625, 12.339599609375, 13.0478515625, 13.756103515625, 14.46435546875, 15.172607421875, 15.880859375, 16.589111328125, 17.29736328125, 18.005615234375, 18.7138671875, 19.422119140625, 20.13037109375, 20.838623046875, 21.546875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 3.0, 9.0, 7.0, 8.0, 12.0, 16.0, 15.0, 26.0, 24.0, 51.0, 51.0, 62.0, 89.0, 105.0, 173.0, 361.0, 762.0, 3685.0, 909502.0, 2223871.0, 4937.0, 873.0, 376.0, 188.0, 119.0, 101.0, 63.0, 47.0, 39.0, 44.0, 20.0, 19.0, 12.0, 8.0, 11.0, 9.0, 7.0, 4.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.625, -59.57421875, -57.5234375, -55.47265625, -53.421875, -51.37109375, -49.3203125, -47.26953125, -45.21875, -43.16796875, -41.1171875, -39.06640625, -37.015625, -34.96484375, -32.9140625, -30.86328125, -28.8125, -26.76171875, -24.7109375, -22.66015625, -20.609375, -18.55859375, -16.5078125, -14.45703125, -12.40625, -10.35546875, -8.3046875, -6.25390625, -4.203125, -2.15234375, -0.1015625, 1.94921875, 4.0, 6.05078125, 8.1015625, 10.15234375, 12.203125, 14.25390625, 16.3046875, 18.35546875, 20.40625, 22.45703125, 24.5078125, 26.55859375, 28.609375, 30.66015625, 32.7109375, 34.76171875, 36.8125, 38.86328125, 40.9140625, 42.96484375, 45.015625, 47.06640625, 49.1171875, 51.16796875, 53.21875, 55.26953125, 57.3203125, 59.37109375, 61.421875, 63.47265625, 65.5234375, 67.57421875, 69.625]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 11.0, 18.0, 57.0, 76.0, 121.0, 137.0, 157.0, 158.0, 126.0, 75.0, 40.0, 18.0, 15.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.489408493041992, -11.192358016967773, -9.895307540893555, -8.59825611114502, -7.301205635070801, -6.004155158996582, -4.707104206085205, -3.410053253173828, -2.1130027770996094, -0.8159520626068115, 0.48109865188598633, 1.7781493663787842, 3.075200080871582, 4.372250556945801, 5.669301509857178, 6.966352462768555, 8.263402938842773, 9.560453414916992, 10.857503890991211, 12.154555320739746, 13.451605796813965, 14.748656272888184, 16.04570770263672, 17.342758178710938, 18.639808654785156, 19.936859130859375, 21.233909606933594, 22.530960083007812, 23.82801055908203, 25.12506103515625, 26.4221134185791, 27.71916389465332, 29.016212463378906, 30.313262939453125, 31.610313415527344, 32.90736389160156, 34.20441436767578, 35.50146484375, 36.79851531982422, 38.09556579589844, 39.392616271972656, 40.689666748046875, 41.986717224121094, 43.28376770019531, 44.58081817626953, 45.87786865234375, 47.17491912841797, 48.47196960449219, 49.76902389526367, 51.06607437133789, 52.36312484741211, 53.66017532348633, 54.95722579956055, 56.254276275634766, 57.551326751708984, 58.84838104248047, 60.14543151855469, 61.442481994628906, 62.739532470703125, 64.03658294677734, 65.33363342285156, 66.63068389892578, 67.927734375, 69.22478485107422, 70.52183532714844]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 4.0, 5.0, 2.0, 4.0, 8.0, 7.0, 8.0, 12.0, 9.0, 13.0, 15.0, 14.0, 22.0, 21.0, 22.0, 23.0, 29.0, 28.0, 21.0, 28.0, 33.0, 29.0, 41.0, 34.0, 38.0, 43.0, 39.0, 35.0, 48.0, 30.0, 40.0, 34.0, 32.0, 24.0, 32.0, 27.0, 20.0, 17.0, 19.0, 15.0, 12.0, 14.0, 5.0, 4.0, 7.0, 7.0, 12.0, 4.0, 4.0, 1.0, 4.0, 3.0, 5.0, 1.0, 2.0, 1.0, 1.0], "bins": [-54.7220344543457, -53.028076171875, -51.3341178894043, -49.640159606933594, -47.94620132446289, -46.25224304199219, -44.558284759521484, -42.86432647705078, -41.17036819458008, -39.476409912109375, -37.78245162963867, -36.08849334716797, -34.394535064697266, -32.70057678222656, -31.00661849975586, -29.312660217285156, -27.618703842163086, -25.924745559692383, -24.23078727722168, -22.536828994750977, -20.842870712280273, -19.148914337158203, -17.4549560546875, -15.76099681854248, -14.067038536071777, -12.373080253601074, -10.679121971130371, -8.985164642333984, -7.291205883026123, -5.597248077392578, -3.903289794921875, -2.209331512451172, -0.5153732299804688, 1.1785849332809448, 2.8725430965423584, 4.566501140594482, 6.2604594230651855, 7.9544172286987305, 9.648375511169434, 11.342333793640137, 13.03629207611084, 14.730250358581543, 16.42420768737793, 18.118165969848633, 19.812124252319336, 21.50608253479004, 23.200040817260742, 24.893999099731445, 26.58795738220215, 28.28191566467285, 29.975873947143555, 31.669832229614258, 33.36378860473633, 35.05774688720703, 36.751705169677734, 38.44566345214844, 40.13962173461914, 41.833580017089844, 43.52753829956055, 45.22149658203125, 46.91545486450195, 48.609413146972656, 50.30337142944336, 51.99732971191406, 53.691287994384766]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 3.0, 1.0, 3.0, 3.0, 10.0, 8.0, 8.0, 9.0, 10.0, 14.0, 11.0, 18.0, 28.0, 25.0, 24.0, 33.0, 44.0, 32.0, 32.0, 40.0, 35.0, 37.0, 31.0, 40.0, 36.0, 49.0, 38.0, 35.0, 33.0, 41.0, 24.0, 38.0, 27.0, 23.0, 25.0, 17.0, 24.0, 18.0, 17.0, 11.0, 10.0, 15.0, 5.0, 4.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-7.703125, -7.46136474609375, -7.2196044921875, -6.97784423828125, -6.736083984375, -6.49432373046875, -6.2525634765625, -6.01080322265625, -5.76904296875, -5.52728271484375, -5.2855224609375, -5.04376220703125, -4.802001953125, -4.56024169921875, -4.3184814453125, -4.07672119140625, -3.8349609375, -3.59320068359375, -3.3514404296875, -3.10968017578125, -2.867919921875, -2.62615966796875, -2.3843994140625, -2.14263916015625, -1.90087890625, -1.65911865234375, -1.4173583984375, -1.17559814453125, -0.933837890625, -0.69207763671875, -0.4503173828125, -0.20855712890625, 0.033203125, 0.27496337890625, 0.5167236328125, 0.75848388671875, 1.000244140625, 1.24200439453125, 1.4837646484375, 1.72552490234375, 1.96728515625, 2.20904541015625, 2.4508056640625, 2.69256591796875, 2.934326171875, 3.17608642578125, 3.4178466796875, 3.65960693359375, 3.9013671875, 4.14312744140625, 4.3848876953125, 4.62664794921875, 4.868408203125, 5.11016845703125, 5.3519287109375, 5.59368896484375, 5.83544921875, 6.07720947265625, 6.3189697265625, 6.56072998046875, 6.802490234375, 7.04425048828125, 7.2860107421875, 7.52777099609375, 7.76953125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 3.0, 1.0, 6.0, 5.0, 4.0, 12.0, 13.0, 9.0, 20.0, 23.0, 33.0, 44.0, 51.0, 55.0, 104.0, 118.0, 281.0, 1000.0, 5888.0, 59876.0, 779520.0, 2615453.0, 673548.0, 51583.0, 5027.0, 851.0, 245.0, 138.0, 85.0, 51.0, 42.0, 50.0, 23.0, 20.0, 22.0, 18.0, 12.0, 16.0, 10.0, 10.0, 7.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.125, -16.580810546875, -16.03662109375, -15.492431640625, -14.9482421875, -14.404052734375, -13.85986328125, -13.315673828125, -12.771484375, -12.227294921875, -11.68310546875, -11.138916015625, -10.5947265625, -10.050537109375, -9.50634765625, -8.962158203125, -8.41796875, -7.873779296875, -7.32958984375, -6.785400390625, -6.2412109375, -5.697021484375, -5.15283203125, -4.608642578125, -4.064453125, -3.520263671875, -2.97607421875, -2.431884765625, -1.8876953125, -1.343505859375, -0.79931640625, -0.255126953125, 0.2890625, 0.833251953125, 1.37744140625, 1.921630859375, 2.4658203125, 3.010009765625, 3.55419921875, 4.098388671875, 4.642578125, 5.186767578125, 5.73095703125, 6.275146484375, 6.8193359375, 7.363525390625, 7.90771484375, 8.451904296875, 8.99609375, 9.540283203125, 10.08447265625, 10.628662109375, 11.1728515625, 11.717041015625, 12.26123046875, 12.805419921875, 13.349609375, 13.893798828125, 14.43798828125, 14.982177734375, 15.5263671875, 16.070556640625, 16.61474609375, 17.158935546875, 17.703125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 4.0, 9.0, 8.0, 12.0, 13.0, 17.0, 22.0, 25.0, 30.0, 42.0, 56.0, 90.0, 122.0, 183.0, 202.0, 302.0, 353.0, 402.0, 386.0, 403.0, 349.0, 253.0, 212.0, 151.0, 109.0, 82.0, 62.0, 45.0, 33.0, 23.0, 12.0, 11.0, 10.0, 10.0, 6.0, 5.0, 5.0, 0.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0], "bins": [-14.3515625, -13.9771728515625, -13.602783203125, -13.2283935546875, -12.85400390625, -12.4796142578125, -12.105224609375, -11.7308349609375, -11.3564453125, -10.9820556640625, -10.607666015625, -10.2332763671875, -9.85888671875, -9.4844970703125, -9.110107421875, -8.7357177734375, -8.361328125, -7.9869384765625, -7.612548828125, -7.2381591796875, -6.86376953125, -6.4893798828125, -6.114990234375, -5.7406005859375, -5.3662109375, -4.9918212890625, -4.617431640625, -4.2430419921875, -3.86865234375, -3.4942626953125, -3.119873046875, -2.7454833984375, -2.37109375, -1.9967041015625, -1.622314453125, -1.2479248046875, -0.87353515625, -0.4991455078125, -0.124755859375, 0.2496337890625, 0.6240234375, 0.9984130859375, 1.372802734375, 1.7471923828125, 2.12158203125, 2.4959716796875, 2.870361328125, 3.2447509765625, 3.619140625, 3.9935302734375, 4.367919921875, 4.7423095703125, 5.11669921875, 5.4910888671875, 5.865478515625, 6.2398681640625, 6.6142578125, 6.9886474609375, 7.363037109375, 7.7374267578125, 8.11181640625, 8.4862060546875, 8.860595703125, 9.2349853515625, 9.609375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0, 3.0, 4.0, 3.0, 11.0, 12.0, 20.0, 32.0, 24.0, 52.0, 51.0, 84.0, 115.0, 162.0, 219.0, 343.0, 566.0, 1360.0, 5952.0, 66461.0, 1757990.0, 2262799.0, 87553.0, 7194.0, 1443.0, 639.0, 354.0, 227.0, 158.0, 112.0, 90.0, 63.0, 47.0, 31.0, 31.0, 18.0, 17.0, 13.0, 8.0, 9.0, 7.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-27.0625, -26.130859375, -25.19921875, -24.267578125, -23.3359375, -22.404296875, -21.47265625, -20.541015625, -19.609375, -18.677734375, -17.74609375, -16.814453125, -15.8828125, -14.951171875, -14.01953125, -13.087890625, -12.15625, -11.224609375, -10.29296875, -9.361328125, -8.4296875, -7.498046875, -6.56640625, -5.634765625, -4.703125, -3.771484375, -2.83984375, -1.908203125, -0.9765625, -0.044921875, 0.88671875, 1.818359375, 2.75, 3.681640625, 4.61328125, 5.544921875, 6.4765625, 7.408203125, 8.33984375, 9.271484375, 10.203125, 11.134765625, 12.06640625, 12.998046875, 13.9296875, 14.861328125, 15.79296875, 16.724609375, 17.65625, 18.587890625, 19.51953125, 20.451171875, 21.3828125, 22.314453125, 23.24609375, 24.177734375, 25.109375, 26.041015625, 26.97265625, 27.904296875, 28.8359375, 29.767578125, 30.69921875, 31.630859375, 32.5625]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 54.0, 513.0, 419.0, 28.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.686092376708984, -50.46839141845703, -40.250694274902344, -30.032995223999023, -19.815296173095703, -9.597599029541016, 0.6201019287109375, 10.83780288696289, 21.055500030517578, 31.2731990814209, 41.49089813232422, 51.708595275878906, 61.92629623413086, 72.14399719238281, 82.3616943359375, 92.57939147949219, 102.79708862304688, 113.01478576660156, 123.23248291015625, 133.45018005371094, 143.66787719726562, 153.88558959960938, 164.10328674316406, 174.32098388671875, 184.53868103027344, 194.75637817382812, 204.9740753173828, 215.1917724609375, 225.40948486328125, 235.62716674804688, 245.84487915039062, 256.06256103515625, 266.2802734375, 276.49798583984375, 286.7156677246094, 296.9333801269531, 307.15106201171875, 317.3687744140625, 327.5864562988281, 337.8041687011719, 348.0218505859375, 358.23956298828125, 368.4572448730469, 378.6749572753906, 388.89263916015625, 399.1103515625, 409.3280334472656, 419.5457458496094, 429.7634582519531, 439.9811706542969, 450.1988525390625, 460.41656494140625, 470.6342468261719, 480.8519592285156, 491.06964111328125, 501.287353515625, 511.50506591796875, 521.7227783203125, 531.9404907226562, 542.1581420898438, 552.3758544921875, 562.5935668945312, 572.811279296875, 583.0289306640625, 593.2466430664062]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 5.0, 3.0, 6.0, 5.0, 7.0, 6.0, 7.0, 8.0, 15.0, 17.0, 14.0, 22.0, 17.0, 26.0, 31.0, 30.0, 28.0, 47.0, 43.0, 31.0, 41.0, 35.0, 58.0, 32.0, 43.0, 38.0, 33.0, 35.0, 30.0, 29.0, 27.0, 35.0, 27.0, 27.0, 17.0, 26.0, 13.0, 21.0, 11.0, 13.0, 14.0, 8.0, 4.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-45.06510925292969, -43.67557907104492, -42.286048889160156, -40.89651870727539, -39.506988525390625, -38.11745834350586, -36.727928161621094, -35.33839797973633, -33.94886779785156, -32.5593376159668, -31.16980743408203, -29.780277252197266, -28.3907470703125, -27.001216888427734, -25.61168670654297, -24.222156524658203, -22.83262825012207, -21.443098068237305, -20.05356788635254, -18.664037704467773, -17.274507522583008, -15.884977340698242, -14.495448112487793, -13.105917930603027, -11.716387748718262, -10.326857566833496, -8.93732738494873, -7.547797679901123, -6.158267498016357, -4.768737316131592, -3.3792076110839844, -1.9896774291992188, -0.6001472473144531, 0.789382815361023, 2.178912878036499, 3.5684428215026855, 4.957973003387451, 6.347503185272217, 7.737032890319824, 9.12656307220459, 10.516093254089355, 11.905623435974121, 13.295153617858887, 14.684682846069336, 16.0742130279541, 17.463743209838867, 18.853273391723633, 20.2428035736084, 21.632333755493164, 23.02186393737793, 24.411394119262695, 25.80092430114746, 27.190454483032227, 28.579984664916992, 29.969512939453125, 31.35904312133789, 32.748573303222656, 34.13810348510742, 35.52763366699219, 36.91716384887695, 38.30669403076172, 39.696224212646484, 41.08575439453125, 42.475284576416016, 43.86481475830078]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 4.0, 3.0, 5.0, 6.0, 7.0, 7.0, 12.0, 11.0, 12.0, 12.0, 20.0, 22.0, 22.0, 28.0, 27.0, 32.0, 24.0, 35.0, 33.0, 26.0, 31.0, 35.0, 38.0, 50.0, 42.0, 36.0, 37.0, 34.0, 44.0, 34.0, 33.0, 37.0, 32.0, 24.0, 25.0, 17.0, 13.0, 20.0, 16.0, 12.0, 9.0, 11.0, 2.0, 10.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-7.515625, -7.27972412109375, -7.0438232421875, -6.80792236328125, -6.572021484375, -6.33612060546875, -6.1002197265625, -5.86431884765625, -5.62841796875, -5.39251708984375, -5.1566162109375, -4.92071533203125, -4.684814453125, -4.44891357421875, -4.2130126953125, -3.97711181640625, -3.7412109375, -3.50531005859375, -3.2694091796875, -3.03350830078125, -2.797607421875, -2.56170654296875, -2.3258056640625, -2.08990478515625, -1.85400390625, -1.61810302734375, -1.3822021484375, -1.14630126953125, -0.910400390625, -0.67449951171875, -0.4385986328125, -0.20269775390625, 0.033203125, 0.26910400390625, 0.5050048828125, 0.74090576171875, 0.976806640625, 1.21270751953125, 1.4486083984375, 1.68450927734375, 1.92041015625, 2.15631103515625, 2.3922119140625, 2.62811279296875, 2.864013671875, 3.09991455078125, 3.3358154296875, 3.57171630859375, 3.8076171875, 4.04351806640625, 4.2794189453125, 4.51531982421875, 4.751220703125, 4.98712158203125, 5.2230224609375, 5.45892333984375, 5.69482421875, 5.93072509765625, 6.1666259765625, 6.40252685546875, 6.638427734375, 6.87432861328125, 7.1102294921875, 7.34613037109375, 7.58203125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 6.0, 11.0, 13.0, 19.0, 40.0, 60.0, 115.0, 180.0, 267.0, 434.0, 717.0, 1091.0, 1775.0, 2791.0, 4649.0, 7332.0, 12090.0, 19904.0, 32964.0, 54303.0, 91099.0, 150663.0, 218324.0, 176339.0, 108377.0, 64810.0, 39339.0, 23482.0, 14361.0, 8760.0, 5472.0, 3392.0, 2024.0, 1211.0, 830.0, 480.0, 298.0, 200.0, 132.0, 74.0, 48.0, 36.0, 14.0, 13.0, 4.0, 8.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.60009765625, -0.5798721313476562, -0.5596466064453125, -0.5394210815429688, -0.519195556640625, -0.49897003173828125, -0.4787445068359375, -0.45851898193359375, -0.43829345703125, -0.41806793212890625, -0.3978424072265625, -0.37761688232421875, -0.357391357421875, -0.33716583251953125, -0.3169403076171875, -0.29671478271484375, -0.2764892578125, -0.25626373291015625, -0.2360382080078125, -0.21581268310546875, -0.195587158203125, -0.17536163330078125, -0.1551361083984375, -0.13491058349609375, -0.11468505859375, -0.09445953369140625, -0.0742340087890625, -0.05400848388671875, -0.033782958984375, -0.01355743408203125, 0.0066680908203125, 0.02689361572265625, 0.047119140625, 0.06734466552734375, 0.0875701904296875, 0.10779571533203125, 0.128021240234375, 0.14824676513671875, 0.1684722900390625, 0.18869781494140625, 0.20892333984375, 0.22914886474609375, 0.2493743896484375, 0.26959991455078125, 0.289825439453125, 0.31005096435546875, 0.3302764892578125, 0.35050201416015625, 0.3707275390625, 0.39095306396484375, 0.4111785888671875, 0.43140411376953125, 0.451629638671875, 0.47185516357421875, 0.4920806884765625, 0.5123062133789062, 0.53253173828125, 0.5527572631835938, 0.5729827880859375, 0.5932083129882812, 0.613433837890625, 0.6336593627929688, 0.6538848876953125, 0.6741104125976562, 0.6943359375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 8.0, 6.0, 8.0, 6.0, 15.0, 5.0, 22.0, 8.0, 16.0, 15.0, 16.0, 16.0, 17.0, 26.0, 25.0, 22.0, 23.0, 23.0, 25.0, 30.0, 28.0, 34.0, 36.0, 1059.0, 51.0, 43.0, 38.0, 25.0, 37.0, 27.0, 33.0, 32.0, 31.0, 26.0, 33.0, 17.0, 20.0, 14.0, 19.0, 18.0, 14.0, 8.0, 13.0, 10.0, 6.0, 4.0, 7.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 3.0], "bins": [-4.546875, -4.404296875, -4.26171875, -4.119140625, -3.9765625, -3.833984375, -3.69140625, -3.548828125, -3.40625, -3.263671875, -3.12109375, -2.978515625, -2.8359375, -2.693359375, -2.55078125, -2.408203125, -2.265625, -2.123046875, -1.98046875, -1.837890625, -1.6953125, -1.552734375, -1.41015625, -1.267578125, -1.125, -0.982421875, -0.83984375, -0.697265625, -0.5546875, -0.412109375, -0.26953125, -0.126953125, 0.015625, 0.158203125, 0.30078125, 0.443359375, 0.5859375, 0.728515625, 0.87109375, 1.013671875, 1.15625, 1.298828125, 1.44140625, 1.583984375, 1.7265625, 1.869140625, 2.01171875, 2.154296875, 2.296875, 2.439453125, 2.58203125, 2.724609375, 2.8671875, 3.009765625, 3.15234375, 3.294921875, 3.4375, 3.580078125, 3.72265625, 3.865234375, 4.0078125, 4.150390625, 4.29296875, 4.435546875, 4.578125]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 10.0, 10.0, 16.0, 22.0, 22.0, 30.0, 79.0, 109.0, 135.0, 241.0, 343.0, 533.0, 811.0, 1209.0, 1848.0, 2676.0, 4091.0, 6297.0, 9261.0, 14092.0, 21283.0, 32121.0, 48358.0, 73198.0, 110175.0, 155831.0, 1217841.0, 132330.0, 89912.0, 58689.0, 38923.0, 25692.0, 17271.0, 11404.0, 7479.0, 4998.0, 3301.0, 2251.0, 1434.0, 987.0, 582.0, 430.0, 290.0, 177.0, 126.0, 75.0, 41.0, 38.0, 14.0, 20.0, 14.0, 8.0, 7.0, 0.0, 0.0, 3.0, 6.0], "bins": [-0.5009765625, -0.4859046936035156, -0.47083282470703125, -0.4557609558105469, -0.4406890869140625, -0.4256172180175781, -0.41054534912109375, -0.3954734802246094, -0.380401611328125, -0.3653297424316406, -0.35025787353515625, -0.3351860046386719, -0.3201141357421875, -0.3050422668457031, -0.28997039794921875, -0.2748985290527344, -0.25982666015625, -0.24475479125976562, -0.22968292236328125, -0.21461105346679688, -0.1995391845703125, -0.18446731567382812, -0.16939544677734375, -0.15432357788085938, -0.139251708984375, -0.12417984008789062, -0.10910797119140625, -0.09403610229492188, -0.0789642333984375, -0.06389236450195312, -0.04882049560546875, -0.033748626708984375, -0.0186767578125, -0.003604888916015625, 0.01146697998046875, 0.026538848876953125, 0.0416107177734375, 0.056682586669921875, 0.07175445556640625, 0.08682632446289062, 0.101898193359375, 0.11697006225585938, 0.13204193115234375, 0.14711380004882812, 0.1621856689453125, 0.17725753784179688, 0.19232940673828125, 0.20740127563476562, 0.22247314453125, 0.23754501342773438, 0.25261688232421875, 0.2676887512207031, 0.2827606201171875, 0.2978324890136719, 0.31290435791015625, 0.3279762268066406, 0.343048095703125, 0.3581199645996094, 0.37319183349609375, 0.3882637023925781, 0.4033355712890625, 0.4184074401855469, 0.43347930908203125, 0.4485511779785156, 0.463623046875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 7.0, 2.0, 5.0, 6.0, 7.0, 9.0, 4.0, 8.0, 3.0, 18.0, 26.0, 24.0, 20.0, 18.0, 30.0, 37.0, 28.0, 37.0, 37.0, 51.0, 50.0, 49.0, 58.0, 55.0, 49.0, 52.0, 32.0, 48.0, 28.0, 34.0, 26.0, 25.0, 21.0, 15.0, 15.0, 17.0, 6.0, 8.0, 9.0, 3.0, 5.0, 4.0, 3.0, 2.0, 3.0, 3.0, 6.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0018224716186523438, -0.0017688721418380737, -0.0017152726650238037, -0.0016616731882095337, -0.0016080737113952637, -0.0015544742345809937, -0.0015008747577667236, -0.0014472752809524536, -0.0013936758041381836, -0.0013400763273239136, -0.0012864768505096436, -0.0012328773736953735, -0.0011792778968811035, -0.0011256784200668335, -0.0010720789432525635, -0.0010184794664382935, -0.0009648799896240234, -0.0009112805128097534, -0.0008576810359954834, -0.0008040815591812134, -0.0007504820823669434, -0.0006968826055526733, -0.0006432831287384033, -0.0005896836519241333, -0.0005360841751098633, -0.00048248469829559326, -0.00042888522148132324, -0.0003752857446670532, -0.0003216862678527832, -0.0002680867910385132, -0.00021448731422424316, -0.00016088783740997314, -0.00010728836059570312, -5.3688883781433105e-05, -8.940696716308594e-08, 5.3510069847106934e-05, 0.00010710954666137695, 0.00016070902347564697, 0.000214308500289917, 0.000267907977104187, 0.00032150745391845703, 0.00037510693073272705, 0.00042870640754699707, 0.0004823058843612671, 0.0005359053611755371, 0.0005895048379898071, 0.0006431043148040771, 0.0006967037916183472, 0.0007503032684326172, 0.0008039027452468872, 0.0008575022220611572, 0.0009111016988754272, 0.0009647011756896973, 0.0010183006525039673, 0.0010719001293182373, 0.0011254996061325073, 0.0011790990829467773, 0.0012326985597610474, 0.0012862980365753174, 0.0013398975133895874, 0.0013934969902038574, 0.0014470964670181274, 0.0015006959438323975, 0.0015542954206466675, 0.0016078948974609375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 4.0, 6.0, 10.0, 5.0, 9.0, 11.0, 17.0, 32.0, 21.0, 33.0, 41.0, 46.0, 71.0, 94.0, 124.0, 193.0, 316.0, 617.0, 1793.0, 854170.0, 188401.0, 1115.0, 499.0, 273.0, 154.0, 104.0, 85.0, 63.0, 54.0, 47.0, 22.0, 25.0, 17.0, 12.0, 13.0, 13.0, 10.0, 6.0, 2.0, 6.0, 4.0, 5.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.037353515625, -0.03614377975463867, -0.034934043884277344, -0.033724308013916016, -0.03251457214355469, -0.03130483627319336, -0.03009510040283203, -0.028885364532470703, -0.027675628662109375, -0.026465892791748047, -0.02525615692138672, -0.02404642105102539, -0.022836685180664062, -0.021626949310302734, -0.020417213439941406, -0.019207477569580078, -0.01799774169921875, -0.016788005828857422, -0.015578269958496094, -0.014368534088134766, -0.013158798217773438, -0.01194906234741211, -0.010739326477050781, -0.009529590606689453, -0.008319854736328125, -0.007110118865966797, -0.005900382995605469, -0.004690647125244141, -0.0034809112548828125, -0.0022711753845214844, -0.0010614395141601562, 0.00014829635620117188, 0.0013580322265625, 0.002567768096923828, 0.0037775039672851562, 0.004987239837646484, 0.0061969757080078125, 0.007406711578369141, 0.008616447448730469, 0.009826183319091797, 0.011035919189453125, 0.012245655059814453, 0.013455390930175781, 0.01466512680053711, 0.015874862670898438, 0.017084598541259766, 0.018294334411621094, 0.019504070281982422, 0.02071380615234375, 0.021923542022705078, 0.023133277893066406, 0.024343013763427734, 0.025552749633789062, 0.02676248550415039, 0.02797222137451172, 0.029181957244873047, 0.030391693115234375, 0.0316014289855957, 0.03281116485595703, 0.03402090072631836, 0.03523063659667969, 0.036440372467041016, 0.037650108337402344, 0.03885984420776367, 0.040069580078125]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 247.0, 738.0, 25.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01695728488266468, -0.016632722690701485, -0.01630816049873829, -0.015983598306775093, -0.015659036114811897, -0.015334473922848701, -0.015009911730885506, -0.01468534953892231, -0.014360787346959114, -0.014036225154995918, -0.013711662963032722, -0.013387100771069527, -0.013062538579106331, -0.012737976387143135, -0.01241341419517994, -0.012088852003216743, -0.011764289811253548, -0.011439727619290352, -0.011115165427327156, -0.01079060323536396, -0.010466041043400764, -0.010141478851437569, -0.009816916659474373, -0.009492354467511177, -0.009167792275547981, -0.008843230083584785, -0.00851866789162159, -0.008194105699658394, -0.007869543507695198, -0.007544981315732002, -0.0072204191237688065, -0.006895856931805611, -0.006571294739842415, -0.006246732547879219, -0.005922170355916023, -0.0055976081639528275, -0.005273045971989632, -0.004948483780026436, -0.00462392158806324, -0.004299359396100044, -0.0039747972041368484, -0.0036502350121736526, -0.003325672820210457, -0.003001110628247261, -0.0026765484362840652, -0.0023519862443208694, -0.0020274240523576736, -0.0017028618603944778, -0.0013782994356006384, -0.0010537372436374426, -0.0007291750516742468, -0.000404612859711051, -8.005066774785519e-05, 0.0002445115242153406, 0.0005690737161785364, 0.0008936359081417322, 0.001218198100104928, 0.0015427602920681238, 0.0018673224840313196, 0.0021918846759945154, 0.0025164468679577112, 0.002841009059920907, 0.003165571251884103, 0.0034901334438472986, 0.0038146956358104944]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 4.0, 4.0, 7.0, 9.0, 9.0, 7.0, 15.0, 15.0, 20.0, 19.0, 22.0, 25.0, 30.0, 30.0, 38.0, 35.0, 36.0, 47.0, 32.0, 38.0, 50.0, 39.0, 38.0, 41.0, 45.0, 38.0, 36.0, 36.0, 29.0, 29.0, 30.0, 17.0, 21.0, 27.0, 15.0, 14.0, 11.0, 9.0, 9.0, 7.0, 5.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0010389089584350586, -0.0010055126622319221, -0.0009721163660287857, -0.0009387200698256493, -0.0009053237736225128, -0.0008719274774193764, -0.0008385311812162399, -0.0008051348850131035, -0.000771738588809967, -0.0007383422926068306, -0.0007049459964036942, -0.0006715497002005577, -0.0006381534039974213, -0.0006047571077942848, -0.0005713608115911484, -0.0005379645153880119, -0.0005045682191848755, -0.00047117192298173904, -0.0004377756267786026, -0.00040437933057546616, -0.0003709830343723297, -0.00033758673816919327, -0.0003041904419660568, -0.0002707941457629204, -0.00023739784955978394, -0.0002040015533566475, -0.00017060525715351105, -0.0001372089609503746, -0.00010381266474723816, -7.041636854410172e-05, -3.702007234096527e-05, -3.623776137828827e-06, 2.9772520065307617e-05, 6.316881626844406e-05, 9.65651124715805e-05, 0.00012996140867471695, 0.0001633577048778534, 0.00019675400108098984, 0.00023015029728412628, 0.0002635465934872627, 0.00029694288969039917, 0.0003303391858935356, 0.00036373548209667206, 0.0003971317782998085, 0.00043052807450294495, 0.0004639243707060814, 0.0004973206669092178, 0.0005307169631123543, 0.0005641132593154907, 0.0005975095555186272, 0.0006309058517217636, 0.0006643021479249001, 0.0006976984441280365, 0.0007310947403311729, 0.0007644910365343094, 0.0007978873327374458, 0.0008312836289405823, 0.0008646799251437187, 0.0008980762213468552, 0.0009314725175499916, 0.000964868813753128, 0.0009982651099562645, 0.001031661406159401, 0.0010650577023625374, 0.0010984539985656738]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 5.0, 6.0, 7.0, 7.0, 12.0, 11.0, 12.0, 12.0, 20.0, 22.0, 22.0, 28.0, 27.0, 33.0, 23.0, 35.0, 33.0, 26.0, 31.0, 35.0, 38.0, 50.0, 42.0, 36.0, 37.0, 34.0, 44.0, 34.0, 33.0, 37.0, 32.0, 24.0, 25.0, 17.0, 13.0, 20.0, 16.0, 12.0, 9.0, 11.0, 2.0, 10.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-7.515625, -7.27972412109375, -7.0438232421875, -6.80792236328125, -6.572021484375, -6.33612060546875, -6.1002197265625, -5.86431884765625, -5.62841796875, -5.39251708984375, -5.1566162109375, -4.92071533203125, -4.684814453125, -4.44891357421875, -4.2130126953125, -3.97711181640625, -3.7412109375, -3.50531005859375, -3.2694091796875, -3.03350830078125, -2.797607421875, -2.56170654296875, -2.3258056640625, -2.08990478515625, -1.85400390625, -1.61810302734375, -1.3822021484375, -1.14630126953125, -0.910400390625, -0.67449951171875, -0.4385986328125, -0.20269775390625, 0.033203125, 0.26910400390625, 0.5050048828125, 0.74090576171875, 0.976806640625, 1.21270751953125, 1.4486083984375, 1.68450927734375, 1.92041015625, 2.15631103515625, 2.3922119140625, 2.62811279296875, 2.864013671875, 3.09991455078125, 3.3358154296875, 3.57171630859375, 3.8076171875, 4.04351806640625, 4.2794189453125, 4.51531982421875, 4.751220703125, 4.98712158203125, 5.2230224609375, 5.45892333984375, 5.69482421875, 5.93072509765625, 6.1666259765625, 6.40252685546875, 6.638427734375, 6.87432861328125, 7.1102294921875, 7.34613037109375, 7.58203125]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 8.0, 6.0, 10.0, 11.0, 16.0, 13.0, 26.0, 30.0, 41.0, 60.0, 68.0, 86.0, 100.0, 155.0, 193.0, 260.0, 331.0, 477.0, 749.0, 1622.0, 4693.0, 17705.0, 90345.0, 746660.0, 149004.0, 24717.0, 6243.0, 2020.0, 857.0, 548.0, 379.0, 258.0, 209.0, 157.0, 113.0, 83.0, 81.0, 49.0, 45.0, 29.0, 25.0, 15.0, 11.0, 14.0, 8.0, 8.0, 5.0, 8.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-18.546875, -17.960205078125, -17.37353515625, -16.786865234375, -16.2001953125, -15.613525390625, -15.02685546875, -14.440185546875, -13.853515625, -13.266845703125, -12.68017578125, -12.093505859375, -11.5068359375, -10.920166015625, -10.33349609375, -9.746826171875, -9.16015625, -8.573486328125, -7.98681640625, -7.400146484375, -6.8134765625, -6.226806640625, -5.64013671875, -5.053466796875, -4.466796875, -3.880126953125, -3.29345703125, -2.706787109375, -2.1201171875, -1.533447265625, -0.94677734375, -0.360107421875, 0.2265625, 0.813232421875, 1.39990234375, 1.986572265625, 2.5732421875, 3.159912109375, 3.74658203125, 4.333251953125, 4.919921875, 5.506591796875, 6.09326171875, 6.679931640625, 7.2666015625, 7.853271484375, 8.43994140625, 9.026611328125, 9.61328125, 10.199951171875, 10.78662109375, 11.373291015625, 11.9599609375, 12.546630859375, 13.13330078125, 13.719970703125, 14.306640625, 14.893310546875, 15.47998046875, 16.066650390625, 16.6533203125, 17.239990234375, 17.82666015625, 18.413330078125, 19.0]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 4.0, 5.0, 6.0, 6.0, 2.0, 2.0, 3.0, 7.0, 13.0, 11.0, 14.0, 20.0, 30.0, 30.0, 25.0, 24.0, 27.0, 30.0, 42.0, 36.0, 47.0, 63.0, 89.0, 167.0, 1471.0, 330.0, 112.0, 63.0, 49.0, 40.0, 39.0, 36.0, 25.0, 38.0, 21.0, 23.0, 17.0, 17.0, 11.0, 15.0, 13.0, 9.0, 6.0, 7.0, 4.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0], "bins": [-22.265625, -21.564453125, -20.86328125, -20.162109375, -19.4609375, -18.759765625, -18.05859375, -17.357421875, -16.65625, -15.955078125, -15.25390625, -14.552734375, -13.8515625, -13.150390625, -12.44921875, -11.748046875, -11.046875, -10.345703125, -9.64453125, -8.943359375, -8.2421875, -7.541015625, -6.83984375, -6.138671875, -5.4375, -4.736328125, -4.03515625, -3.333984375, -2.6328125, -1.931640625, -1.23046875, -0.529296875, 0.171875, 0.873046875, 1.57421875, 2.275390625, 2.9765625, 3.677734375, 4.37890625, 5.080078125, 5.78125, 6.482421875, 7.18359375, 7.884765625, 8.5859375, 9.287109375, 9.98828125, 10.689453125, 11.390625, 12.091796875, 12.79296875, 13.494140625, 14.1953125, 14.896484375, 15.59765625, 16.298828125, 17.0, 17.701171875, 18.40234375, 19.103515625, 19.8046875, 20.505859375, 21.20703125, 21.908203125, 22.609375]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 5.0, 1.0, 1.0, 5.0, 1.0, 2.0, 3.0, 10.0, 6.0, 17.0, 19.0, 15.0, 17.0, 21.0, 39.0, 31.0, 55.0, 58.0, 90.0, 108.0, 218.0, 387.0, 1007.0, 4757.0, 287507.0, 2836732.0, 11690.0, 1527.0, 508.0, 257.0, 144.0, 98.0, 68.0, 57.0, 56.0, 47.0, 35.0, 26.0, 16.0, 21.0, 12.0, 6.0, 8.0, 3.0, 6.0, 8.0, 2.0, 6.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-63.6875, -61.6435546875, -59.599609375, -57.5556640625, -55.51171875, -53.4677734375, -51.423828125, -49.3798828125, -47.3359375, -45.2919921875, -43.248046875, -41.2041015625, -39.16015625, -37.1162109375, -35.072265625, -33.0283203125, -30.984375, -28.9404296875, -26.896484375, -24.8525390625, -22.80859375, -20.7646484375, -18.720703125, -16.6767578125, -14.6328125, -12.5888671875, -10.544921875, -8.5009765625, -6.45703125, -4.4130859375, -2.369140625, -0.3251953125, 1.71875, 3.7626953125, 5.806640625, 7.8505859375, 9.89453125, 11.9384765625, 13.982421875, 16.0263671875, 18.0703125, 20.1142578125, 22.158203125, 24.2021484375, 26.24609375, 28.2900390625, 30.333984375, 32.3779296875, 34.421875, 36.4658203125, 38.509765625, 40.5537109375, 42.59765625, 44.6416015625, 46.685546875, 48.7294921875, 50.7734375, 52.8173828125, 54.861328125, 56.9052734375, 58.94921875, 60.9931640625, 63.037109375, 65.0810546875, 67.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 10.0, 123.0, 336.0, 375.0, 139.0, 26.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.370925903320312, -17.832563400268555, -14.29420280456543, -10.755840301513672, -7.2174787521362305, -3.679117202758789, -0.14075469970703125, 3.3976058959960938, 6.935968399047852, 10.474329948425293, 14.012691497802734, 17.551054000854492, 21.08941650390625, 24.627777099609375, 28.166139602661133, 31.704500198364258, 35.242862701416016, 38.78122329711914, 42.31958770751953, 45.857948303222656, 49.39630889892578, 52.934669494628906, 56.4730339050293, 60.01139450073242, 63.54975891113281, 67.08811950683594, 70.62648010253906, 74.16484069824219, 77.70320892333984, 81.24156951904297, 84.7799301147461, 88.31829071044922, 91.85665130615234, 95.39501190185547, 98.9333724975586, 102.47174072265625, 106.01010131835938, 109.5484619140625, 113.08682250976562, 116.62518310546875, 120.16354370117188, 123.701904296875, 127.24026489257812, 130.77862548828125, 134.31698608398438, 137.8553466796875, 141.39370727539062, 144.93206787109375, 148.47044372558594, 152.00880432128906, 155.5471649169922, 159.0855255126953, 162.62388610839844, 166.16224670410156, 169.70062255859375, 173.23898315429688, 176.77732849121094, 180.31568908691406, 183.8540496826172, 187.3924102783203, 190.93077087402344, 194.46913146972656, 198.00750732421875, 201.54586791992188, 205.084228515625]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 5.0, 5.0, 3.0, 2.0, 6.0, 13.0, 8.0, 14.0, 12.0, 13.0, 12.0, 17.0, 22.0, 14.0, 22.0, 25.0, 40.0, 37.0, 21.0, 26.0, 21.0, 39.0, 38.0, 40.0, 39.0, 35.0, 47.0, 47.0, 32.0, 33.0, 26.0, 34.0, 31.0, 36.0, 26.0, 31.0, 23.0, 10.0, 19.0, 15.0, 12.0, 13.0, 10.0, 7.0, 7.0, 4.0, 5.0, 4.0, 6.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-61.70460510253906, -59.826332092285156, -57.948062896728516, -56.06978988647461, -54.1915168762207, -52.31324768066406, -50.434974670410156, -48.55670166015625, -46.678428649902344, -44.80015563964844, -42.9218864440918, -41.04361343383789, -39.165340423583984, -37.287071228027344, -35.40879821777344, -33.53052520751953, -31.65225601196289, -29.773984909057617, -27.89571189880371, -26.017440795898438, -24.13916778564453, -22.260896682739258, -20.382625579833984, -18.504352569580078, -16.626081466674805, -14.747809410095215, -12.869537353515625, -10.991266250610352, -9.112994194030762, -7.234722137451172, -5.356451034545898, -3.4781789779663086, -1.5999031066894531, 0.2783687114715576, 2.1566405296325684, 4.034912109375, 5.91318416595459, 7.79145622253418, 9.669727325439453, 11.547999382019043, 13.426271438598633, 15.304543495178223, 17.182815551757812, 19.061086654663086, 20.93935775756836, 22.817630767822266, 24.69590187072754, 26.574172973632812, 28.45244598388672, 30.330717086791992, 32.208988189697266, 34.08726119995117, 35.96553421020508, 37.84380340576172, 39.722076416015625, 41.60034942626953, 43.47862243652344, 45.356895446777344, 47.235164642333984, 49.11343765258789, 50.9917106628418, 52.86997985839844, 54.748252868652344, 56.62652587890625, 58.50479507446289]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 7.0, 4.0, 3.0, 6.0, 10.0, 11.0, 10.0, 13.0, 13.0, 22.0, 19.0, 26.0, 30.0, 26.0, 25.0, 25.0, 27.0, 31.0, 31.0, 41.0, 52.0, 40.0, 36.0, 46.0, 40.0, 38.0, 39.0, 40.0, 40.0, 36.0, 19.0, 23.0, 20.0, 14.0, 27.0, 19.0, 15.0, 15.0, 10.0, 11.0, 5.0, 8.0, 7.0, 3.0, 7.0, 0.0, 1.0, 4.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-8.15625, -7.9111328125, -7.666015625, -7.4208984375, -7.17578125, -6.9306640625, -6.685546875, -6.4404296875, -6.1953125, -5.9501953125, -5.705078125, -5.4599609375, -5.21484375, -4.9697265625, -4.724609375, -4.4794921875, -4.234375, -3.9892578125, -3.744140625, -3.4990234375, -3.25390625, -3.0087890625, -2.763671875, -2.5185546875, -2.2734375, -2.0283203125, -1.783203125, -1.5380859375, -1.29296875, -1.0478515625, -0.802734375, -0.5576171875, -0.3125, -0.0673828125, 0.177734375, 0.4228515625, 0.66796875, 0.9130859375, 1.158203125, 1.4033203125, 1.6484375, 1.8935546875, 2.138671875, 2.3837890625, 2.62890625, 2.8740234375, 3.119140625, 3.3642578125, 3.609375, 3.8544921875, 4.099609375, 4.3447265625, 4.58984375, 4.8349609375, 5.080078125, 5.3251953125, 5.5703125, 5.8154296875, 6.060546875, 6.3056640625, 6.55078125, 6.7958984375, 7.041015625, 7.2861328125, 7.53125]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 6.0, 6.0, 9.0, 9.0, 15.0, 18.0, 12.0, 24.0, 27.0, 38.0, 43.0, 59.0, 77.0, 108.0, 147.0, 201.0, 326.0, 588.0, 1486.0, 5421.0, 28072.0, 199086.0, 1166425.0, 2041192.0, 635678.0, 95540.0, 14250.0, 3019.0, 977.0, 441.0, 272.0, 185.0, 123.0, 93.0, 57.0, 48.0, 49.0, 43.0, 19.0, 25.0, 22.0, 9.0, 12.0, 10.0, 8.0, 4.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.9375, -13.506103515625, -13.07470703125, -12.643310546875, -12.2119140625, -11.780517578125, -11.34912109375, -10.917724609375, -10.486328125, -10.054931640625, -9.62353515625, -9.192138671875, -8.7607421875, -8.329345703125, -7.89794921875, -7.466552734375, -7.03515625, -6.603759765625, -6.17236328125, -5.740966796875, -5.3095703125, -4.878173828125, -4.44677734375, -4.015380859375, -3.583984375, -3.152587890625, -2.72119140625, -2.289794921875, -1.8583984375, -1.427001953125, -0.99560546875, -0.564208984375, -0.1328125, 0.298583984375, 0.72998046875, 1.161376953125, 1.5927734375, 2.024169921875, 2.45556640625, 2.886962890625, 3.318359375, 3.749755859375, 4.18115234375, 4.612548828125, 5.0439453125, 5.475341796875, 5.90673828125, 6.338134765625, 6.76953125, 7.200927734375, 7.63232421875, 8.063720703125, 8.4951171875, 8.926513671875, 9.35791015625, 9.789306640625, 10.220703125, 10.652099609375, 11.08349609375, 11.514892578125, 11.9462890625, 12.377685546875, 12.80908203125, 13.240478515625, 13.671875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 7.0, 1.0, 4.0, 2.0, 6.0, 6.0, 5.0, 12.0, 6.0, 12.0, 29.0, 33.0, 38.0, 56.0, 75.0, 102.0, 122.0, 176.0, 201.0, 283.0, 324.0, 379.0, 396.0, 379.0, 329.0, 261.0, 201.0, 164.0, 121.0, 79.0, 65.0, 49.0, 27.0, 31.0, 12.0, 17.0, 14.0, 11.0, 9.0, 15.0, 5.0, 3.0, 6.0, 4.0, 2.0, 4.0, 2.0, 2.0, 2.0], "bins": [-13.90625, -13.5343017578125, -13.162353515625, -12.7904052734375, -12.41845703125, -12.0465087890625, -11.674560546875, -11.3026123046875, -10.9306640625, -10.5587158203125, -10.186767578125, -9.8148193359375, -9.44287109375, -9.0709228515625, -8.698974609375, -8.3270263671875, -7.955078125, -7.5831298828125, -7.211181640625, -6.8392333984375, -6.46728515625, -6.0953369140625, -5.723388671875, -5.3514404296875, -4.9794921875, -4.6075439453125, -4.235595703125, -3.8636474609375, -3.49169921875, -3.1197509765625, -2.747802734375, -2.3758544921875, -2.00390625, -1.6319580078125, -1.260009765625, -0.8880615234375, -0.51611328125, -0.1441650390625, 0.227783203125, 0.5997314453125, 0.9716796875, 1.3436279296875, 1.715576171875, 2.0875244140625, 2.45947265625, 2.8314208984375, 3.203369140625, 3.5753173828125, 3.947265625, 4.3192138671875, 4.691162109375, 5.0631103515625, 5.43505859375, 5.8070068359375, 6.178955078125, 6.5509033203125, 6.9228515625, 7.2947998046875, 7.666748046875, 8.0386962890625, 8.41064453125, 8.7825927734375, 9.154541015625, 9.5264892578125, 9.8984375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 3.0, 6.0, 1.0, 3.0, 3.0, 2.0, 4.0, 8.0, 6.0, 14.0, 12.0, 24.0, 20.0, 28.0, 35.0, 46.0, 57.0, 96.0, 129.0, 236.0, 319.0, 488.0, 863.0, 2073.0, 7647.0, 61020.0, 1181013.0, 2760357.0, 158765.0, 15047.0, 3119.0, 1146.0, 561.0, 349.0, 204.0, 184.0, 126.0, 68.0, 52.0, 44.0, 24.0, 20.0, 17.0, 19.0, 12.0, 9.0, 6.0, 2.0, 7.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-25.296875, -24.399169921875, -23.50146484375, -22.603759765625, -21.7060546875, -20.808349609375, -19.91064453125, -19.012939453125, -18.115234375, -17.217529296875, -16.31982421875, -15.422119140625, -14.5244140625, -13.626708984375, -12.72900390625, -11.831298828125, -10.93359375, -10.035888671875, -9.13818359375, -8.240478515625, -7.3427734375, -6.445068359375, -5.54736328125, -4.649658203125, -3.751953125, -2.854248046875, -1.95654296875, -1.058837890625, -0.1611328125, 0.736572265625, 1.63427734375, 2.531982421875, 3.4296875, 4.327392578125, 5.22509765625, 6.122802734375, 7.0205078125, 7.918212890625, 8.81591796875, 9.713623046875, 10.611328125, 11.509033203125, 12.40673828125, 13.304443359375, 14.2021484375, 15.099853515625, 15.99755859375, 16.895263671875, 17.79296875, 18.690673828125, 19.58837890625, 20.486083984375, 21.3837890625, 22.281494140625, 23.17919921875, 24.076904296875, 24.974609375, 25.872314453125, 26.77001953125, 27.667724609375, 28.5654296875, 29.463134765625, 30.36083984375, 31.258544921875, 32.15625]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 24.0, 431.0, 517.0, 42.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-195.6342010498047, -184.1607208251953, -172.68724060058594, -161.21376037597656, -149.7402801513672, -138.2667999267578, -126.79332733154297, -115.3198471069336, -103.84636688232422, -92.37288665771484, -80.89940643310547, -69.42593383789062, -57.952449798583984, -46.47896957397461, -35.0054931640625, -23.532012939453125, -12.05853271484375, -0.5850534439086914, 10.888425827026367, 22.36190414428711, 33.835384368896484, 45.30886459350586, 56.78234100341797, 68.25582122802734, 79.72930145263672, 91.2027816772461, 102.67626190185547, 114.14973449707031, 125.62321472167969, 137.09669494628906, 148.57017517089844, 160.0436553955078, 171.51712036132812, 182.9906005859375, 194.46408081054688, 205.93756103515625, 217.41104125976562, 228.884521484375, 240.35800170898438, 251.83148193359375, 263.3049621582031, 274.7784423828125, 286.2519226074219, 297.72540283203125, 309.1988830566406, 320.67236328125, 332.1458435058594, 343.61932373046875, 355.0927734375, 366.5662536621094, 378.03973388671875, 389.5132141113281, 400.9866943359375, 412.4601745605469, 423.93365478515625, 435.4071350097656, 446.880615234375, 458.3540954589844, 469.82757568359375, 481.3010559082031, 492.7745361328125, 504.2480163574219, 515.7214965820312, 527.1949462890625, 538.66845703125]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 8.0, 9.0, 4.0, 11.0, 10.0, 11.0, 15.0, 18.0, 22.0, 22.0, 21.0, 38.0, 31.0, 25.0, 31.0, 40.0, 32.0, 45.0, 37.0, 32.0, 50.0, 52.0, 45.0, 48.0, 35.0, 40.0, 24.0, 23.0, 26.0, 23.0, 21.0, 25.0, 24.0, 18.0, 13.0, 16.0, 5.0, 9.0, 13.0, 6.0, 6.0, 9.0, 2.0, 2.0, 4.0, 5.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-44.03077697753906, -42.614967346191406, -41.199153900146484, -39.78334426879883, -38.367530822753906, -36.95172119140625, -35.535911560058594, -34.12009811401367, -32.704288482666016, -31.288476943969727, -29.872665405273438, -28.45685577392578, -27.041044235229492, -25.625232696533203, -24.209421157836914, -22.793609619140625, -21.377798080444336, -19.961986541748047, -18.546175003051758, -17.13036346435547, -15.714553833007812, -14.298742294311523, -12.882930755615234, -11.467120170593262, -10.051308631896973, -8.635497093200684, -7.219686508178711, -5.803874969482422, -4.388063907623291, -2.97225284576416, -1.556441307067871, -0.14063072204589844, 1.2751808166503906, 2.6909918785095215, 4.106802940368652, 5.522614479064941, 6.938425540924072, 8.354236602783203, 9.770048141479492, 11.185858726501465, 12.601670265197754, 14.017481803894043, 15.433292388916016, 16.849103927612305, 18.264915466308594, 19.68072509765625, 21.096538543701172, 22.512348175048828, 23.928159713745117, 25.343971252441406, 26.759782791137695, 28.175594329833984, 29.59140396118164, 31.00721549987793, 32.42302703857422, 33.838836669921875, 35.2546501159668, 36.67045974731445, 38.086273193359375, 39.50208282470703, 40.91789627075195, 42.33370590209961, 43.74951934814453, 45.16532897949219, 46.581138610839844]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 5.0, 11.0, 5.0, 11.0, 7.0, 8.0, 10.0, 20.0, 14.0, 17.0, 15.0, 21.0, 15.0, 33.0, 28.0, 32.0, 31.0, 40.0, 31.0, 34.0, 41.0, 34.0, 43.0, 56.0, 32.0, 51.0, 38.0, 32.0, 39.0, 37.0, 24.0, 24.0, 21.0, 19.0, 18.0, 21.0, 16.0, 13.0, 7.0, 9.0, 4.0, 11.0, 11.0, 8.0, 2.0, 3.0, 0.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9569091796875, -7.695068359375, -7.4332275390625, -7.17138671875, -6.9095458984375, -6.647705078125, -6.3858642578125, -6.1240234375, -5.8621826171875, -5.600341796875, -5.3385009765625, -5.07666015625, -4.8148193359375, -4.552978515625, -4.2911376953125, -4.029296875, -3.7674560546875, -3.505615234375, -3.2437744140625, -2.98193359375, -2.7200927734375, -2.458251953125, -2.1964111328125, -1.9345703125, -1.6727294921875, -1.410888671875, -1.1490478515625, -0.88720703125, -0.6253662109375, -0.363525390625, -0.1016845703125, 0.16015625, 0.4219970703125, 0.683837890625, 0.9456787109375, 1.20751953125, 1.4693603515625, 1.731201171875, 1.9930419921875, 2.2548828125, 2.5167236328125, 2.778564453125, 3.0404052734375, 3.30224609375, 3.5640869140625, 3.825927734375, 4.0877685546875, 4.349609375, 4.6114501953125, 4.873291015625, 5.1351318359375, 5.39697265625, 5.6588134765625, 5.920654296875, 6.1824951171875, 6.4443359375, 6.7061767578125, 6.968017578125, 7.2298583984375, 7.49169921875, 7.7535400390625, 8.015380859375, 8.2772216796875, 8.5390625]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 6.0, 17.0, 11.0, 26.0, 27.0, 52.0, 67.0, 107.0, 147.0, 196.0, 293.0, 400.0, 587.0, 815.0, 1229.0, 1785.0, 2461.0, 3661.0, 5341.0, 7575.0, 11142.0, 16510.0, 24790.0, 36164.0, 54750.0, 83097.0, 122764.0, 164017.0, 159570.0, 115008.0, 78567.0, 51290.0, 33990.0, 23003.0, 15495.0, 10526.0, 7181.0, 4928.0, 3361.0, 2339.0, 1594.0, 1099.0, 708.0, 551.0, 397.0, 279.0, 193.0, 147.0, 84.0, 74.0, 43.0, 34.0, 25.0, 7.0, 14.0, 7.0, 9.0, 3.0, 3.0, 2.0], "bins": [-0.5625, -0.5448760986328125, -0.527252197265625, -0.5096282958984375, -0.49200439453125, -0.4743804931640625, -0.456756591796875, -0.4391326904296875, -0.4215087890625, -0.4038848876953125, -0.386260986328125, -0.3686370849609375, -0.35101318359375, -0.3333892822265625, -0.315765380859375, -0.2981414794921875, -0.280517578125, -0.2628936767578125, -0.245269775390625, -0.2276458740234375, -0.21002197265625, -0.1923980712890625, -0.174774169921875, -0.1571502685546875, -0.1395263671875, -0.1219024658203125, -0.104278564453125, -0.0866546630859375, -0.06903076171875, -0.0514068603515625, -0.033782958984375, -0.0161590576171875, 0.00146484375, 0.0190887451171875, 0.036712646484375, 0.0543365478515625, 0.07196044921875, 0.0895843505859375, 0.107208251953125, 0.1248321533203125, 0.1424560546875, 0.1600799560546875, 0.177703857421875, 0.1953277587890625, 0.21295166015625, 0.2305755615234375, 0.248199462890625, 0.2658233642578125, 0.283447265625, 0.3010711669921875, 0.318695068359375, 0.3363189697265625, 0.35394287109375, 0.3715667724609375, 0.389190673828125, 0.4068145751953125, 0.4244384765625, 0.4420623779296875, 0.459686279296875, 0.4773101806640625, 0.49493408203125, 0.5125579833984375, 0.530181884765625, 0.5478057861328125, 0.5654296875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.0, 2.0, 6.0, 4.0, 7.0, 6.0, 8.0, 14.0, 8.0, 18.0, 14.0, 17.0, 25.0, 18.0, 26.0, 18.0, 28.0, 26.0, 32.0, 34.0, 30.0, 53.0, 38.0, 51.0, 1056.0, 26.0, 51.0, 42.0, 43.0, 41.0, 47.0, 24.0, 23.0, 25.0, 27.0, 22.0, 15.0, 16.0, 24.0, 11.0, 11.0, 10.0, 5.0, 4.0, 12.0, 4.0, 3.0, 1.0, 4.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-5.6484375, -5.4747314453125, -5.301025390625, -5.1273193359375, -4.95361328125, -4.7799072265625, -4.606201171875, -4.4324951171875, -4.2587890625, -4.0850830078125, -3.911376953125, -3.7376708984375, -3.56396484375, -3.3902587890625, -3.216552734375, -3.0428466796875, -2.869140625, -2.6954345703125, -2.521728515625, -2.3480224609375, -2.17431640625, -2.0006103515625, -1.826904296875, -1.6531982421875, -1.4794921875, -1.3057861328125, -1.132080078125, -0.9583740234375, -0.78466796875, -0.6109619140625, -0.437255859375, -0.2635498046875, -0.08984375, 0.0838623046875, 0.257568359375, 0.4312744140625, 0.60498046875, 0.7786865234375, 0.952392578125, 1.1260986328125, 1.2998046875, 1.4735107421875, 1.647216796875, 1.8209228515625, 1.99462890625, 2.1683349609375, 2.342041015625, 2.5157470703125, 2.689453125, 2.8631591796875, 3.036865234375, 3.2105712890625, 3.38427734375, 3.5579833984375, 3.731689453125, 3.9053955078125, 4.0791015625, 4.2528076171875, 4.426513671875, 4.6002197265625, 4.77392578125, 4.9476318359375, 5.121337890625, 5.2950439453125, 5.46875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 2.0, 13.0, 5.0, 15.0, 28.0, 43.0, 73.0, 94.0, 169.0, 251.0, 376.0, 583.0, 857.0, 1381.0, 2198.0, 3439.0, 5049.0, 7791.0, 11650.0, 17835.0, 27090.0, 41204.0, 62391.0, 93849.0, 135115.0, 1214260.0, 150141.0, 108094.0, 72697.0, 47992.0, 31768.0, 20714.0, 14028.0, 8940.0, 6004.0, 3867.0, 2552.0, 1646.0, 1016.0, 721.0, 411.0, 277.0, 196.0, 122.0, 78.0, 43.0, 25.0, 14.0, 13.0, 3.0, 9.0, 3.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.56591796875, -0.5491180419921875, -0.532318115234375, -0.5155181884765625, -0.49871826171875, -0.4819183349609375, -0.465118408203125, -0.4483184814453125, -0.4315185546875, -0.4147186279296875, -0.397918701171875, -0.3811187744140625, -0.36431884765625, -0.3475189208984375, -0.330718994140625, -0.3139190673828125, -0.297119140625, -0.2803192138671875, -0.263519287109375, -0.2467193603515625, -0.22991943359375, -0.2131195068359375, -0.196319580078125, -0.1795196533203125, -0.1627197265625, -0.1459197998046875, -0.129119873046875, -0.1123199462890625, -0.09552001953125, -0.0787200927734375, -0.061920166015625, -0.0451202392578125, -0.0283203125, -0.0115203857421875, 0.005279541015625, 0.0220794677734375, 0.03887939453125, 0.0556793212890625, 0.072479248046875, 0.0892791748046875, 0.1060791015625, 0.1228790283203125, 0.139678955078125, 0.1564788818359375, 0.17327880859375, 0.1900787353515625, 0.206878662109375, 0.2236785888671875, 0.240478515625, 0.2572784423828125, 0.274078369140625, 0.2908782958984375, 0.30767822265625, 0.3244781494140625, 0.341278076171875, 0.3580780029296875, 0.3748779296875, 0.3916778564453125, 0.408477783203125, 0.4252777099609375, 0.44207763671875, 0.4588775634765625, 0.475677490234375, 0.4924774169921875, 0.50927734375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 9.0, 3.0, 9.0, 17.0, 14.0, 24.0, 27.0, 36.0, 41.0, 49.0, 62.0, 65.0, 74.0, 73.0, 69.0, 67.0, 65.0, 65.0, 49.0, 40.0, 31.0, 25.0, 13.0, 19.0, 11.0, 8.0, 7.0, 6.0, 7.0, 7.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0025463104248046875, -0.0024728775024414062, -0.002399444580078125, -0.0023260116577148438, -0.0022525787353515625, -0.0021791458129882812, -0.002105712890625, -0.0020322799682617188, -0.0019588470458984375, -0.0018854141235351562, -0.001811981201171875, -0.0017385482788085938, -0.0016651153564453125, -0.0015916824340820312, -0.00151824951171875, -0.0014448165893554688, -0.0013713836669921875, -0.0012979507446289062, -0.001224517822265625, -0.0011510848999023438, -0.0010776519775390625, -0.0010042190551757812, -0.0009307861328125, -0.0008573532104492188, -0.0007839202880859375, -0.0007104873657226562, -0.000637054443359375, -0.0005636215209960938, -0.0004901885986328125, -0.00041675567626953125, -0.00034332275390625, -0.00026988983154296875, -0.0001964569091796875, -0.00012302398681640625, -4.9591064453125e-05, 2.384185791015625e-05, 9.72747802734375e-05, 0.00017070770263671875, 0.000244140625, 0.00031757354736328125, 0.0003910064697265625, 0.00046443939208984375, 0.000537872314453125, 0.0006113052368164062, 0.0006847381591796875, 0.0007581710815429688, 0.00083160400390625, 0.0009050369262695312, 0.0009784698486328125, 0.0010519027709960938, 0.001125335693359375, 0.0011987686157226562, 0.0012722015380859375, 0.0013456344604492188, 0.0014190673828125, 0.0014925003051757812, 0.0015659332275390625, 0.0016393661499023438, 0.001712799072265625, 0.0017862319946289062, 0.0018596649169921875, 0.0019330978393554688, 0.00200653076171875, 0.0020799636840820312, 0.0021533966064453125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 6.0, 12.0, 15.0, 14.0, 16.0, 21.0, 42.0, 37.0, 55.0, 74.0, 123.0, 153.0, 325.0, 527.0, 1259.0, 524889.0, 518363.0, 1244.0, 527.0, 276.0, 156.0, 120.0, 84.0, 55.0, 41.0, 34.0, 17.0, 14.0, 13.0, 12.0, 7.0, 2.0, 5.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.042083740234375, -0.040680885314941406, -0.03927803039550781, -0.03787517547607422, -0.036472320556640625, -0.03506946563720703, -0.03366661071777344, -0.032263755798339844, -0.03086090087890625, -0.029458045959472656, -0.028055191040039062, -0.02665233612060547, -0.025249481201171875, -0.02384662628173828, -0.022443771362304688, -0.021040916442871094, -0.0196380615234375, -0.018235206604003906, -0.016832351684570312, -0.015429496765136719, -0.014026641845703125, -0.012623786926269531, -0.011220932006835938, -0.009818077087402344, -0.00841522216796875, -0.007012367248535156, -0.0056095123291015625, -0.004206657409667969, -0.002803802490234375, -0.0014009475708007812, 1.9073486328125e-06, 0.0014047622680664062, 0.0028076171875, 0.004210472106933594, 0.0056133270263671875, 0.007016181945800781, 0.008419036865234375, 0.009821891784667969, 0.011224746704101562, 0.012627601623535156, 0.01403045654296875, 0.015433311462402344, 0.016836166381835938, 0.01823902130126953, 0.019641876220703125, 0.02104473114013672, 0.022447586059570312, 0.023850440979003906, 0.0252532958984375, 0.026656150817871094, 0.028059005737304688, 0.02946186065673828, 0.030864715576171875, 0.03226757049560547, 0.03367042541503906, 0.035073280334472656, 0.03647613525390625, 0.037878990173339844, 0.03928184509277344, 0.04068470001220703, 0.042087554931640625, 0.04349040985107422, 0.04489326477050781, 0.046296119689941406, 0.047698974609375]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 19.0, 523.0, 454.0, 20.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.012449084781110287, -0.012172176502645016, -0.01189526915550232, -0.011618360877037048, -0.011341452598571777, -0.011064544320106506, -0.01078763697296381, -0.010510728694498539, -0.010233820416033268, -0.009956912137567997, -0.0096800047904253, -0.00940309651196003, -0.009126188233494759, -0.008849279955029488, -0.008572372607886791, -0.00829546432942152, -0.008018556982278824, -0.00774164916947484, -0.007464740891009569, -0.0071878330782055855, -0.0069109247997403145, -0.006634016986936331, -0.006357109174132347, -0.006080200895667076, -0.005803292617201805, -0.005526384804397821, -0.00524947652593255, -0.004972568713128567, -0.004695660434663296, -0.004418752621859312, -0.004141844809055328, -0.0038649365305900574, -0.0035880282521247864, -0.003311120206490159, -0.0030342121608555317, -0.002757304348051548, -0.002480396069586277, -0.0022034882567822933, -0.001926580211147666, -0.0016496721655130386, -0.0013727641198784113, -0.001095856074243784, -0.0008189480868168175, -0.0005420400993898511, -0.00026513205375522375, 1.1775991879403591e-05, 0.0002886839210987091, 0.0005655919667333364, 0.0008425000123679638, 0.0011194080580025911, 0.0013963161036372185, 0.001673224032856524, 0.0019501320784911513, 0.002227040007710457, 0.002503948053345084, 0.0027808560989797115, 0.003057764144614339, 0.0033346721902489662, 0.0036115802358835936, 0.003888488281518221, 0.004165396094322205, 0.004442304372787476, 0.004719212185591459, 0.004996119998395443, 0.005273028276860714]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 6.0, 5.0, 9.0, 3.0, 7.0, 5.0, 19.0, 14.0, 17.0, 15.0, 21.0, 24.0, 21.0, 28.0, 27.0, 32.0, 33.0, 38.0, 40.0, 47.0, 33.0, 48.0, 38.0, 29.0, 47.0, 37.0, 27.0, 36.0, 30.0, 39.0, 29.0, 23.0, 23.0, 22.0, 21.0, 23.0, 20.0, 13.0, 9.0, 10.0, 8.0, 9.0, 8.0, 4.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0008704066276550293, -0.0008399803191423416, -0.0008095540106296539, -0.0007791277021169662, -0.0007487013936042786, -0.0007182750850915909, -0.0006878487765789032, -0.0006574224680662155, -0.0006269961595535278, -0.0005965698510408401, -0.0005661435425281525, -0.0005357172340154648, -0.0005052909255027771, -0.0004748646169900894, -0.00044443830847740173, -0.00041401199996471405, -0.00038358569145202637, -0.0003531593829393387, -0.000322733074426651, -0.0002923067659139633, -0.00026188045740127563, -0.00023145414888858795, -0.00020102784037590027, -0.00017060153186321259, -0.0001401752233505249, -0.00010974891483783722, -7.932260632514954e-05, -4.889629781246185e-05, -1.846998929977417e-05, 1.1956319212913513e-05, 4.2382627725601196e-05, 7.280893623828888e-05, 0.00010323524475097656, 0.00013366155326366425, 0.00016408786177635193, 0.0001945141702890396, 0.0002249404788017273, 0.000255366787314415, 0.00028579309582710266, 0.00031621940433979034, 0.00034664571285247803, 0.0003770720213651657, 0.0004074983298778534, 0.0004379246383905411, 0.00046835094690322876, 0.0004987772554159164, 0.0005292035639286041, 0.0005596298724412918, 0.0005900561809539795, 0.0006204824894666672, 0.0006509087979793549, 0.0006813351064920425, 0.0007117614150047302, 0.0007421877235174179, 0.0007726140320301056, 0.0008030403405427933, 0.000833466649055481, 0.0008638929575681686, 0.0008943192660808563, 0.000924745574593544, 0.0009551718831062317, 0.0009855981916189194, 0.001016024500131607, 0.0010464508086442947, 0.0010768771171569824]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 5.0, 11.0, 5.0, 11.0, 7.0, 8.0, 10.0, 20.0, 14.0, 17.0, 15.0, 21.0, 15.0, 33.0, 28.0, 32.0, 31.0, 40.0, 31.0, 34.0, 41.0, 34.0, 43.0, 56.0, 32.0, 51.0, 38.0, 32.0, 39.0, 37.0, 24.0, 24.0, 21.0, 19.0, 18.0, 21.0, 16.0, 13.0, 7.0, 9.0, 4.0, 11.0, 11.0, 8.0, 2.0, 3.0, 0.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9569091796875, -7.695068359375, -7.4332275390625, -7.17138671875, -6.9095458984375, -6.647705078125, -6.3858642578125, -6.1240234375, -5.8621826171875, -5.600341796875, -5.3385009765625, -5.07666015625, -4.8148193359375, -4.552978515625, -4.2911376953125, -4.029296875, -3.7674560546875, -3.505615234375, -3.2437744140625, -2.98193359375, -2.7200927734375, -2.458251953125, -2.1964111328125, -1.9345703125, -1.6727294921875, -1.410888671875, -1.1490478515625, -0.88720703125, -0.6253662109375, -0.363525390625, -0.1016845703125, 0.16015625, 0.4219970703125, 0.683837890625, 0.9456787109375, 1.20751953125, 1.4693603515625, 1.731201171875, 1.9930419921875, 2.2548828125, 2.5167236328125, 2.778564453125, 3.0404052734375, 3.30224609375, 3.5640869140625, 3.825927734375, 4.0877685546875, 4.349609375, 4.6114501953125, 4.873291015625, 5.1351318359375, 5.39697265625, 5.6588134765625, 5.920654296875, 6.1824951171875, 6.4443359375, 6.7061767578125, 6.968017578125, 7.2298583984375, 7.49169921875, 7.7535400390625, 8.015380859375, 8.2772216796875, 8.5390625]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 7.0, 3.0, 3.0, 3.0, 9.0, 13.0, 22.0, 17.0, 21.0, 30.0, 39.0, 53.0, 78.0, 99.0, 114.0, 156.0, 219.0, 374.0, 556.0, 1022.0, 1895.0, 3654.0, 8033.0, 18170.0, 44583.0, 110293.0, 289500.0, 342203.0, 133107.0, 53510.0, 22070.0, 9090.0, 4304.0, 2187.0, 1121.0, 662.0, 370.0, 256.0, 187.0, 139.0, 115.0, 63.0, 50.0, 45.0, 40.0, 23.0, 16.0, 12.0, 9.0, 13.0, 4.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.2734375, -9.943115234375, -9.61279296875, -9.282470703125, -8.9521484375, -8.621826171875, -8.29150390625, -7.961181640625, -7.630859375, -7.300537109375, -6.97021484375, -6.639892578125, -6.3095703125, -5.979248046875, -5.64892578125, -5.318603515625, -4.98828125, -4.657958984375, -4.32763671875, -3.997314453125, -3.6669921875, -3.336669921875, -3.00634765625, -2.676025390625, -2.345703125, -2.015380859375, -1.68505859375, -1.354736328125, -1.0244140625, -0.694091796875, -0.36376953125, -0.033447265625, 0.296875, 0.627197265625, 0.95751953125, 1.287841796875, 1.6181640625, 1.948486328125, 2.27880859375, 2.609130859375, 2.939453125, 3.269775390625, 3.60009765625, 3.930419921875, 4.2607421875, 4.591064453125, 4.92138671875, 5.251708984375, 5.58203125, 5.912353515625, 6.24267578125, 6.572998046875, 6.9033203125, 7.233642578125, 7.56396484375, 7.894287109375, 8.224609375, 8.554931640625, 8.88525390625, 9.215576171875, 9.5458984375, 9.876220703125, 10.20654296875, 10.536865234375, 10.8671875]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 6.0, 3.0, 5.0, 3.0, 6.0, 8.0, 7.0, 9.0, 12.0, 11.0, 8.0, 19.0, 11.0, 25.0, 29.0, 37.0, 30.0, 40.0, 40.0, 64.0, 80.0, 112.0, 181.0, 1427.0, 252.0, 126.0, 87.0, 56.0, 38.0, 55.0, 32.0, 27.0, 20.0, 33.0, 19.0, 21.0, 20.0, 21.0, 19.0, 6.0, 10.0, 4.0, 11.0, 2.0, 6.0, 3.0, 5.0, 6.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.703125, -22.931640625, -22.16015625, -21.388671875, -20.6171875, -19.845703125, -19.07421875, -18.302734375, -17.53125, -16.759765625, -15.98828125, -15.216796875, -14.4453125, -13.673828125, -12.90234375, -12.130859375, -11.359375, -10.587890625, -9.81640625, -9.044921875, -8.2734375, -7.501953125, -6.73046875, -5.958984375, -5.1875, -4.416015625, -3.64453125, -2.873046875, -2.1015625, -1.330078125, -0.55859375, 0.212890625, 0.984375, 1.755859375, 2.52734375, 3.298828125, 4.0703125, 4.841796875, 5.61328125, 6.384765625, 7.15625, 7.927734375, 8.69921875, 9.470703125, 10.2421875, 11.013671875, 11.78515625, 12.556640625, 13.328125, 14.099609375, 14.87109375, 15.642578125, 16.4140625, 17.185546875, 17.95703125, 18.728515625, 19.5, 20.271484375, 21.04296875, 21.814453125, 22.5859375, 23.357421875, 24.12890625, 24.900390625, 25.671875]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 4.0, 3.0, 4.0, 3.0, 9.0, 7.0, 13.0, 11.0, 20.0, 19.0, 28.0, 41.0, 62.0, 75.0, 141.0, 195.0, 308.0, 482.0, 1052.0, 7838.0, 466865.0, 2641512.0, 23829.0, 1635.0, 537.0, 333.0, 208.0, 127.0, 100.0, 67.0, 47.0, 39.0, 29.0, 15.0, 9.0, 13.0, 8.0, 5.0, 2.0, 3.0, 3.0, 4.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-60.59375, -58.81982421875, -57.0458984375, -55.27197265625, -53.498046875, -51.72412109375, -49.9501953125, -48.17626953125, -46.40234375, -44.62841796875, -42.8544921875, -41.08056640625, -39.306640625, -37.53271484375, -35.7587890625, -33.98486328125, -32.2109375, -30.43701171875, -28.6630859375, -26.88916015625, -25.115234375, -23.34130859375, -21.5673828125, -19.79345703125, -18.01953125, -16.24560546875, -14.4716796875, -12.69775390625, -10.923828125, -9.14990234375, -7.3759765625, -5.60205078125, -3.828125, -2.05419921875, -0.2802734375, 1.49365234375, 3.267578125, 5.04150390625, 6.8154296875, 8.58935546875, 10.36328125, 12.13720703125, 13.9111328125, 15.68505859375, 17.458984375, 19.23291015625, 21.0068359375, 22.78076171875, 24.5546875, 26.32861328125, 28.1025390625, 29.87646484375, 31.650390625, 33.42431640625, 35.1982421875, 36.97216796875, 38.74609375, 40.52001953125, 42.2939453125, 44.06787109375, 45.841796875, 47.61572265625, 49.3896484375, 51.16357421875, 52.9375]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [20.0, 179.0, 452.0, 308.0, 48.0, 7.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.707290649414062, -8.592103958129883, -3.476917266845703, 1.6382694244384766, 6.753456115722656, 11.868642807006836, 16.983829498291016, 22.099014282226562, 27.214202880859375, 32.32939147949219, 37.444576263427734, 42.55976104736328, 47.674949645996094, 52.790138244628906, 57.90532302856445, 63.0205078125, 68.13569641113281, 73.25088500976562, 78.36607360839844, 83.48125457763672, 88.59644317626953, 93.71163177490234, 98.82681274414062, 103.94200134277344, 109.05718994140625, 114.17237854003906, 119.28756713867188, 124.40274810791016, 129.5179443359375, 134.63311767578125, 139.74830627441406, 144.86349487304688, 149.97869873046875, 155.09388732910156, 160.20907592773438, 165.3242645263672, 170.439453125, 175.55462646484375, 180.66981506347656, 185.78500366210938, 190.9001922607422, 196.015380859375, 201.1305694580078, 206.24575805664062, 211.36093139648438, 216.4761199951172, 221.59130859375, 226.7064971923828, 231.82168579101562, 236.93687438964844, 242.05206298828125, 247.16725158691406, 252.28244018554688, 257.3976135253906, 262.5128173828125, 267.62799072265625, 272.7431640625, 277.85833740234375, 282.9735412597656, 288.0887145996094, 293.20391845703125, 298.319091796875, 303.4342956542969, 308.5494689941406, 313.6646728515625]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 2.0, 4.0, 8.0, 9.0, 5.0, 8.0, 19.0, 12.0, 26.0, 11.0, 19.0, 24.0, 20.0, 38.0, 24.0, 24.0, 32.0, 37.0, 39.0, 44.0, 56.0, 35.0, 39.0, 39.0, 54.0, 42.0, 40.0, 35.0, 30.0, 36.0, 28.0, 34.0, 18.0, 20.0, 15.0, 10.0, 9.0, 13.0, 13.0, 13.0, 5.0, 6.0, 4.0, 1.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.21308135986328, -58.29692459106445, -56.380767822265625, -54.46461486816406, -52.548458099365234, -50.632301330566406, -48.71614456176758, -46.79998779296875, -44.88383483886719, -42.96767807006836, -41.05152130126953, -39.13536834716797, -37.21921157836914, -35.30305480957031, -33.386898040771484, -31.47074317932129, -29.55458641052246, -27.638429641723633, -25.722274780273438, -23.80611801147461, -21.889963150024414, -19.973806381225586, -18.05765151977539, -16.141494750976562, -14.22533893585205, -12.309183120727539, -10.393027305603027, -8.476871490478516, -6.560715198516846, -4.644558906555176, -2.728403091430664, -0.8122472763061523, 1.1039085388183594, 3.020064353942871, 4.936220169067383, 6.852376461029053, 8.768531799316406, 10.684688568115234, 12.600844383239746, 14.517000198364258, 16.433155059814453, 18.34931182861328, 20.265466690063477, 22.181623458862305, 24.0977783203125, 26.013935089111328, 27.930091857910156, 29.84624671936035, 31.76240348815918, 33.678558349609375, 35.5947151184082, 37.51087188720703, 39.42702865600586, 41.34318542480469, 43.25933837890625, 45.17549514770508, 47.091651916503906, 49.007808685302734, 50.92396545410156, 52.840118408203125, 54.75627517700195, 56.67243194580078, 58.58858871459961, 60.50474548339844, 62.4208984375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 8.0, 1.0, 9.0, 4.0, 5.0, 9.0, 16.0, 9.0, 10.0, 27.0, 14.0, 18.0, 27.0, 25.0, 23.0, 29.0, 37.0, 33.0, 41.0, 51.0, 32.0, 48.0, 43.0, 41.0, 31.0, 52.0, 47.0, 23.0, 38.0, 42.0, 31.0, 24.0, 35.0, 21.0, 18.0, 18.0, 10.0, 6.0, 12.0, 10.0, 9.0, 6.0, 5.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-10.3828125, -10.0902099609375, -9.797607421875, -9.5050048828125, -9.21240234375, -8.9197998046875, -8.627197265625, -8.3345947265625, -8.0419921875, -7.7493896484375, -7.456787109375, -7.1641845703125, -6.87158203125, -6.5789794921875, -6.286376953125, -5.9937744140625, -5.701171875, -5.4085693359375, -5.115966796875, -4.8233642578125, -4.53076171875, -4.2381591796875, -3.945556640625, -3.6529541015625, -3.3603515625, -3.0677490234375, -2.775146484375, -2.4825439453125, -2.18994140625, -1.8973388671875, -1.604736328125, -1.3121337890625, -1.01953125, -0.7269287109375, -0.434326171875, -0.1417236328125, 0.15087890625, 0.4434814453125, 0.736083984375, 1.0286865234375, 1.3212890625, 1.6138916015625, 1.906494140625, 2.1990966796875, 2.49169921875, 2.7843017578125, 3.076904296875, 3.3695068359375, 3.662109375, 3.9547119140625, 4.247314453125, 4.5399169921875, 4.83251953125, 5.1251220703125, 5.417724609375, 5.7103271484375, 6.0029296875, 6.2955322265625, 6.588134765625, 6.8807373046875, 7.17333984375, 7.4659423828125, 7.758544921875, 8.0511474609375, 8.34375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 4.0, 6.0, 5.0, 9.0, 11.0, 16.0, 13.0, 25.0, 39.0, 54.0, 74.0, 95.0, 136.0, 222.0, 374.0, 714.0, 1530.0, 3854.0, 11812.0, 43088.0, 177190.0, 656773.0, 1511787.0, 1213300.0, 424470.0, 108388.0, 27026.0, 7782.0, 2793.0, 1123.0, 613.0, 331.0, 168.0, 145.0, 102.0, 51.0, 35.0, 36.0, 23.0, 16.0, 12.0, 14.0, 11.0, 8.0, 4.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.46875, -10.1497802734375, -9.830810546875, -9.5118408203125, -9.19287109375, -8.8739013671875, -8.554931640625, -8.2359619140625, -7.9169921875, -7.5980224609375, -7.279052734375, -6.9600830078125, -6.64111328125, -6.3221435546875, -6.003173828125, -5.6842041015625, -5.365234375, -5.0462646484375, -4.727294921875, -4.4083251953125, -4.08935546875, -3.7703857421875, -3.451416015625, -3.1324462890625, -2.8134765625, -2.4945068359375, -2.175537109375, -1.8565673828125, -1.53759765625, -1.2186279296875, -0.899658203125, -0.5806884765625, -0.26171875, 0.0572509765625, 0.376220703125, 0.6951904296875, 1.01416015625, 1.3331298828125, 1.652099609375, 1.9710693359375, 2.2900390625, 2.6090087890625, 2.927978515625, 3.2469482421875, 3.56591796875, 3.8848876953125, 4.203857421875, 4.5228271484375, 4.841796875, 5.1607666015625, 5.479736328125, 5.7987060546875, 6.11767578125, 6.4366455078125, 6.755615234375, 7.0745849609375, 7.3935546875, 7.7125244140625, 8.031494140625, 8.3504638671875, 8.66943359375, 8.9884033203125, 9.307373046875, 9.6263427734375, 9.9453125]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 7.0, 10.0, 7.0, 15.0, 9.0, 25.0, 19.0, 42.0, 47.0, 77.0, 101.0, 144.0, 225.0, 290.0, 424.0, 479.0, 526.0, 415.0, 357.0, 245.0, 194.0, 127.0, 93.0, 54.0, 37.0, 27.0, 28.0, 15.0, 13.0, 7.0, 5.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-14.0859375, -13.64892578125, -13.2119140625, -12.77490234375, -12.337890625, -11.90087890625, -11.4638671875, -11.02685546875, -10.58984375, -10.15283203125, -9.7158203125, -9.27880859375, -8.841796875, -8.40478515625, -7.9677734375, -7.53076171875, -7.09375, -6.65673828125, -6.2197265625, -5.78271484375, -5.345703125, -4.90869140625, -4.4716796875, -4.03466796875, -3.59765625, -3.16064453125, -2.7236328125, -2.28662109375, -1.849609375, -1.41259765625, -0.9755859375, -0.53857421875, -0.1015625, 0.33544921875, 0.7724609375, 1.20947265625, 1.646484375, 2.08349609375, 2.5205078125, 2.95751953125, 3.39453125, 3.83154296875, 4.2685546875, 4.70556640625, 5.142578125, 5.57958984375, 6.0166015625, 6.45361328125, 6.890625, 7.32763671875, 7.7646484375, 8.20166015625, 8.638671875, 9.07568359375, 9.5126953125, 9.94970703125, 10.38671875, 10.82373046875, 11.2607421875, 11.69775390625, 12.134765625, 12.57177734375, 13.0087890625, 13.44580078125, 13.8828125]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 3.0, 5.0, 4.0, 8.0, 7.0, 10.0, 15.0, 31.0, 32.0, 42.0, 70.0, 78.0, 190.0, 253.0, 537.0, 1065.0, 2531.0, 7641.0, 27919.0, 148319.0, 1215655.0, 2372509.0, 344528.0, 53578.0, 12392.0, 3777.0, 1461.0, 663.0, 344.0, 206.0, 122.0, 79.0, 64.0, 40.0, 27.0, 28.0, 14.0, 6.0, 12.0, 4.0, 4.0, 5.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.21875, -17.635009765625, -17.05126953125, -16.467529296875, -15.8837890625, -15.300048828125, -14.71630859375, -14.132568359375, -13.548828125, -12.965087890625, -12.38134765625, -11.797607421875, -11.2138671875, -10.630126953125, -10.04638671875, -9.462646484375, -8.87890625, -8.295166015625, -7.71142578125, -7.127685546875, -6.5439453125, -5.960205078125, -5.37646484375, -4.792724609375, -4.208984375, -3.625244140625, -3.04150390625, -2.457763671875, -1.8740234375, -1.290283203125, -0.70654296875, -0.122802734375, 0.4609375, 1.044677734375, 1.62841796875, 2.212158203125, 2.7958984375, 3.379638671875, 3.96337890625, 4.547119140625, 5.130859375, 5.714599609375, 6.29833984375, 6.882080078125, 7.4658203125, 8.049560546875, 8.63330078125, 9.217041015625, 9.80078125, 10.384521484375, 10.96826171875, 11.552001953125, 12.1357421875, 12.719482421875, 13.30322265625, 13.886962890625, 14.470703125, 15.054443359375, 15.63818359375, 16.221923828125, 16.8056640625, 17.389404296875, 17.97314453125, 18.556884765625, 19.140625]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 32.0, 386.0, 547.0, 49.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-213.45945739746094, -203.35934448242188, -193.2592315673828, -183.15911865234375, -173.0590057373047, -162.95889282226562, -152.85877990722656, -142.7586669921875, -132.65855407714844, -122.55844116210938, -112.45832824707031, -102.35821533203125, -92.25810241699219, -82.15798950195312, -72.05787658691406, -61.957763671875, -51.85765075683594, -41.757537841796875, -31.657424926757812, -21.55731201171875, -11.457199096679688, -1.357086181640625, 8.743026733398438, 18.8431396484375, 28.943252563476562, 39.043365478515625, 49.14347839355469, 59.24359130859375, 69.34370422363281, 79.44381713867188, 89.54393005371094, 99.64404296875, 109.744140625, 119.84425354003906, 129.94436645507812, 140.0444793701172, 150.14459228515625, 160.2447052001953, 170.34481811523438, 180.44493103027344, 190.5450439453125, 200.64515686035156, 210.74526977539062, 220.8453826904297, 230.94549560546875, 241.0456085205078, 251.14572143554688, 261.245849609375, 271.345947265625, 281.446044921875, 291.5461730957031, 301.64630126953125, 311.74639892578125, 321.84649658203125, 331.9466247558594, 342.0467529296875, 352.1468505859375, 362.2469482421875, 372.3470764160156, 382.44720458984375, 392.54730224609375, 402.64739990234375, 412.7475280761719, 422.84765625, 432.94775390625]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 6.0, 7.0, 6.0, 11.0, 11.0, 12.0, 8.0, 20.0, 14.0, 25.0, 20.0, 27.0, 28.0, 27.0, 31.0, 40.0, 37.0, 46.0, 48.0, 51.0, 38.0, 54.0, 44.0, 46.0, 40.0, 35.0, 29.0, 31.0, 30.0, 26.0, 34.0, 24.0, 21.0, 9.0, 16.0, 11.0, 2.0, 10.0, 4.0, 3.0, 6.0, 3.0, 4.0, 9.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-43.23390579223633, -41.83201217651367, -40.43012237548828, -39.028228759765625, -37.626338958740234, -36.22444534301758, -34.82255554199219, -33.42066192626953, -32.018768310546875, -30.61687660217285, -29.214984893798828, -27.813091278076172, -26.41119956970215, -25.009307861328125, -23.6074161529541, -22.205524444580078, -20.803632736206055, -19.40174102783203, -17.999849319458008, -16.597957611083984, -15.196063995361328, -13.794172286987305, -12.392280578613281, -10.990387916564941, -9.588496208190918, -8.186604499816895, -6.784711837768555, -5.382820129394531, -3.9809279441833496, -2.579035758972168, -1.1771440505981445, 0.2247486114501953, 1.6266403198242188, 3.0285325050354004, 4.430424690246582, 5.8323163986206055, 7.234208583831787, 8.636100769042969, 10.037992477416992, 11.439885139465332, 12.841776847839355, 14.243668556213379, 15.645561218261719, 17.047452926635742, 18.449344635009766, 19.851238250732422, 21.253128051757812, 22.65502166748047, 24.056913375854492, 25.458805084228516, 26.86069679260254, 28.262588500976562, 29.66448211669922, 31.066373825073242, 32.468265533447266, 33.87015914916992, 35.27204895019531, 36.67394256591797, 38.07583236694336, 39.477725982666016, 40.879615783691406, 42.28150939941406, 43.68340301513672, 45.08529281616211, 46.487186431884766]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 8.0, 7.0, 15.0, 19.0, 17.0, 18.0, 19.0, 25.0, 26.0, 28.0, 42.0, 35.0, 35.0, 33.0, 40.0, 34.0, 35.0, 43.0, 47.0, 39.0, 50.0, 35.0, 34.0, 35.0, 38.0, 27.0, 28.0, 37.0, 25.0, 23.0, 10.0, 12.0, 9.0, 7.0, 2.0, 8.0, 6.0, 10.0, 4.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-9.5234375, -9.239501953125, -8.95556640625, -8.671630859375, -8.3876953125, -8.103759765625, -7.81982421875, -7.535888671875, -7.251953125, -6.968017578125, -6.68408203125, -6.400146484375, -6.1162109375, -5.832275390625, -5.54833984375, -5.264404296875, -4.98046875, -4.696533203125, -4.41259765625, -4.128662109375, -3.8447265625, -3.560791015625, -3.27685546875, -2.992919921875, -2.708984375, -2.425048828125, -2.14111328125, -1.857177734375, -1.5732421875, -1.289306640625, -1.00537109375, -0.721435546875, -0.4375, -0.153564453125, 0.13037109375, 0.414306640625, 0.6982421875, 0.982177734375, 1.26611328125, 1.550048828125, 1.833984375, 2.117919921875, 2.40185546875, 2.685791015625, 2.9697265625, 3.253662109375, 3.53759765625, 3.821533203125, 4.10546875, 4.389404296875, 4.67333984375, 4.957275390625, 5.2412109375, 5.525146484375, 5.80908203125, 6.093017578125, 6.376953125, 6.660888671875, 6.94482421875, 7.228759765625, 7.5126953125, 7.796630859375, 8.08056640625, 8.364501953125, 8.6484375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 7.0, 5.0, 16.0, 13.0, 19.0, 41.0, 36.0, 61.0, 94.0, 170.0, 240.0, 444.0, 674.0, 1132.0, 1815.0, 2968.0, 4890.0, 8190.0, 13206.0, 22279.0, 37130.0, 63880.0, 109407.0, 185858.0, 230015.0, 150827.0, 87976.0, 51386.0, 29993.0, 18043.0, 10741.0, 6587.0, 3971.0, 2476.0, 1501.0, 935.0, 522.0, 378.0, 237.0, 139.0, 102.0, 59.0, 39.0, 21.0, 19.0, 14.0, 6.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.8525390625, -0.826934814453125, -0.80133056640625, -0.775726318359375, -0.7501220703125, -0.724517822265625, -0.69891357421875, -0.673309326171875, -0.647705078125, -0.622100830078125, -0.59649658203125, -0.570892333984375, -0.5452880859375, -0.519683837890625, -0.49407958984375, -0.468475341796875, -0.44287109375, -0.417266845703125, -0.39166259765625, -0.366058349609375, -0.3404541015625, -0.314849853515625, -0.28924560546875, -0.263641357421875, -0.238037109375, -0.212432861328125, -0.18682861328125, -0.161224365234375, -0.1356201171875, -0.110015869140625, -0.08441162109375, -0.058807373046875, -0.033203125, -0.007598876953125, 0.01800537109375, 0.043609619140625, 0.0692138671875, 0.094818115234375, 0.12042236328125, 0.146026611328125, 0.171630859375, 0.197235107421875, 0.22283935546875, 0.248443603515625, 0.2740478515625, 0.299652099609375, 0.32525634765625, 0.350860595703125, 0.37646484375, 0.402069091796875, 0.42767333984375, 0.453277587890625, 0.4788818359375, 0.504486083984375, 0.53009033203125, 0.555694580078125, 0.581298828125, 0.606903076171875, 0.63250732421875, 0.658111572265625, 0.6837158203125, 0.709320068359375, 0.73492431640625, 0.760528564453125, 0.7861328125]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 3.0, 7.0, 8.0, 6.0, 14.0, 7.0, 12.0, 17.0, 28.0, 15.0, 23.0, 26.0, 34.0, 26.0, 44.0, 46.0, 35.0, 31.0, 54.0, 45.0, 1078.0, 46.0, 53.0, 35.0, 40.0, 37.0, 27.0, 35.0, 41.0, 22.0, 28.0, 19.0, 18.0, 20.0, 13.0, 12.0, 3.0, 10.0, 2.0, 5.0, 6.0, 3.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.86328125, -5.669189453125, -5.47509765625, -5.281005859375, -5.0869140625, -4.892822265625, -4.69873046875, -4.504638671875, -4.310546875, -4.116455078125, -3.92236328125, -3.728271484375, -3.5341796875, -3.340087890625, -3.14599609375, -2.951904296875, -2.7578125, -2.563720703125, -2.36962890625, -2.175537109375, -1.9814453125, -1.787353515625, -1.59326171875, -1.399169921875, -1.205078125, -1.010986328125, -0.81689453125, -0.622802734375, -0.4287109375, -0.234619140625, -0.04052734375, 0.153564453125, 0.34765625, 0.541748046875, 0.73583984375, 0.929931640625, 1.1240234375, 1.318115234375, 1.51220703125, 1.706298828125, 1.900390625, 2.094482421875, 2.28857421875, 2.482666015625, 2.6767578125, 2.870849609375, 3.06494140625, 3.259033203125, 3.453125, 3.647216796875, 3.84130859375, 4.035400390625, 4.2294921875, 4.423583984375, 4.61767578125, 4.811767578125, 5.005859375, 5.199951171875, 5.39404296875, 5.588134765625, 5.7822265625, 5.976318359375, 6.17041015625, 6.364501953125, 6.55859375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 5.0, 3.0, 7.0, 13.0, 18.0, 21.0, 41.0, 48.0, 84.0, 136.0, 174.0, 285.0, 429.0, 678.0, 1089.0, 1652.0, 2451.0, 3846.0, 5847.0, 9031.0, 13935.0, 21447.0, 33371.0, 50735.0, 78891.0, 121607.0, 281174.0, 1114926.0, 124755.0, 81000.0, 52282.0, 34108.0, 22327.0, 14111.0, 9252.0, 6098.0, 3881.0, 2557.0, 1657.0, 1127.0, 666.0, 476.0, 300.0, 230.0, 128.0, 84.0, 60.0, 37.0, 25.0, 17.0, 6.0, 5.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.57177734375, -0.5533523559570312, -0.5349273681640625, -0.5165023803710938, -0.498077392578125, -0.47965240478515625, -0.4612274169921875, -0.44280242919921875, -0.42437744140625, -0.40595245361328125, -0.3875274658203125, -0.36910247802734375, -0.350677490234375, -0.33225250244140625, -0.3138275146484375, -0.29540252685546875, -0.2769775390625, -0.25855255126953125, -0.2401275634765625, -0.22170257568359375, -0.203277587890625, -0.18485260009765625, -0.1664276123046875, -0.14800262451171875, -0.12957763671875, -0.11115264892578125, -0.0927276611328125, -0.07430267333984375, -0.055877685546875, -0.03745269775390625, -0.0190277099609375, -0.00060272216796875, 0.017822265625, 0.03624725341796875, 0.0546722412109375, 0.07309722900390625, 0.091522216796875, 0.10994720458984375, 0.1283721923828125, 0.14679718017578125, 0.16522216796875, 0.18364715576171875, 0.2020721435546875, 0.22049713134765625, 0.238922119140625, 0.25734710693359375, 0.2757720947265625, 0.29419708251953125, 0.3126220703125, 0.33104705810546875, 0.3494720458984375, 0.36789703369140625, 0.386322021484375, 0.40474700927734375, 0.4231719970703125, 0.44159698486328125, 0.46002197265625, 0.47844696044921875, 0.4968719482421875, 0.5152969360351562, 0.533721923828125, 0.5521469116210938, 0.5705718994140625, 0.5889968872070312, 0.607421875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 7.0, 5.0, 5.0, 9.0, 12.0, 15.0, 19.0, 22.0, 30.0, 21.0, 41.0, 39.0, 52.0, 55.0, 48.0, 52.0, 63.0, 68.0, 57.0, 47.0, 48.0, 37.0, 56.0, 47.0, 37.0, 28.0, 17.0, 25.0, 17.0, 9.0, 10.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002513885498046875, -0.0024206936359405518, -0.0023275017738342285, -0.0022343099117279053, -0.002141118049621582, -0.002047926187515259, -0.0019547343254089355, -0.0018615424633026123, -0.001768350601196289, -0.0016751587390899658, -0.0015819668769836426, -0.0014887750148773193, -0.001395583152770996, -0.0013023912906646729, -0.0012091994285583496, -0.0011160075664520264, -0.0010228157043457031, -0.0009296238422393799, -0.0008364319801330566, -0.0007432401180267334, -0.0006500482559204102, -0.0005568563938140869, -0.00046366453170776367, -0.00037047266960144043, -0.0002772808074951172, -0.00018408894538879395, -9.08970832824707e-05, 2.294778823852539e-06, 9.548664093017578e-05, 0.00018867850303649902, 0.00028187036514282227, 0.0003750622272491455, 0.00046825408935546875, 0.000561445951461792, 0.0006546378135681152, 0.0007478296756744385, 0.0008410215377807617, 0.000934213399887085, 0.0010274052619934082, 0.0011205971240997314, 0.0012137889862060547, 0.001306980848312378, 0.0014001727104187012, 0.0014933645725250244, 0.0015865564346313477, 0.001679748296737671, 0.0017729401588439941, 0.0018661320209503174, 0.0019593238830566406, 0.002052515745162964, 0.002145707607269287, 0.0022388994693756104, 0.0023320913314819336, 0.002425283193588257, 0.00251847505569458, 0.0026116669178009033, 0.0027048587799072266, 0.00279805064201355, 0.002891242504119873, 0.0029844343662261963, 0.0030776262283325195, 0.0031708180904388428, 0.003264009952545166, 0.0033572018146514893, 0.0034503936767578125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 5.0, 6.0, 4.0, 14.0, 24.0, 28.0, 43.0, 40.0, 57.0, 88.0, 98.0, 134.0, 176.0, 289.0, 549.0, 1040.0, 57291.0, 985205.0, 1641.0, 642.0, 357.0, 241.0, 165.0, 93.0, 58.0, 69.0, 47.0, 35.0, 37.0, 20.0, 17.0, 12.0, 10.0, 10.0, 3.0, 0.0, 2.0, 3.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.06158447265625, -0.05988311767578125, -0.0581817626953125, -0.05648040771484375, -0.054779052734375, -0.05307769775390625, -0.0513763427734375, -0.04967498779296875, -0.0479736328125, -0.04627227783203125, -0.0445709228515625, -0.04286956787109375, -0.041168212890625, -0.03946685791015625, -0.0377655029296875, -0.03606414794921875, -0.03436279296875, -0.03266143798828125, -0.0309600830078125, -0.02925872802734375, -0.027557373046875, -0.02585601806640625, -0.0241546630859375, -0.02245330810546875, -0.020751953125, -0.01905059814453125, -0.0173492431640625, -0.01564788818359375, -0.013946533203125, -0.01224517822265625, -0.0105438232421875, -0.00884246826171875, -0.00714111328125, -0.00543975830078125, -0.0037384033203125, -0.00203704833984375, -0.000335693359375, 0.00136566162109375, 0.0030670166015625, 0.00476837158203125, 0.0064697265625, 0.00817108154296875, 0.0098724365234375, 0.01157379150390625, 0.013275146484375, 0.01497650146484375, 0.0166778564453125, 0.01837921142578125, 0.02008056640625, 0.02178192138671875, 0.0234832763671875, 0.02518463134765625, 0.026885986328125, 0.02858734130859375, 0.0302886962890625, 0.03199005126953125, 0.03369140625, 0.03539276123046875, 0.0370941162109375, 0.03879547119140625, 0.040496826171875, 0.04219818115234375, 0.0438995361328125, 0.04560089111328125, 0.04730224609375]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 20.0, 97.0, 324.0, 405.0, 135.0, 25.0, 6.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.007933234795928001, -0.007791867479681969, -0.007650499697774649, -0.007509132381528616, -0.007367765065282583, -0.0072263977490365505, -0.0070850299671292305, -0.006943662650883198, -0.006802295334637165, -0.006660928018391132, -0.006519560236483812, -0.00637819292023778, -0.006236825603991747, -0.006095458287745714, -0.005954090505838394, -0.0058127231895923615, -0.005671355873346329, -0.005529988557100296, -0.005388620775192976, -0.005247253458946943, -0.005105886142700911, -0.004964518826454878, -0.004823151044547558, -0.004681783728301525, -0.004540415946394205, -0.004399048630148172, -0.004257680848240852, -0.00411631353199482, -0.003974946215748787, -0.0038335786666721106, -0.003692211117595434, -0.0035508438013494015, -0.003409476252272725, -0.0032681087031960487, -0.003126741386950016, -0.0029853738378733397, -0.002844006521627307, -0.0027026389725506306, -0.002561271656304598, -0.0024199041072279215, -0.002278536558151245, -0.0021371690090745687, -0.001995801692828536, -0.0018544341437518597, -0.001713066827505827, -0.0015716992784291506, -0.001430331845767796, -0.0012889644131064415, -0.0011475970968604088, -0.0010062296641990542, -0.0008648622315376997, -0.0007234947406686842, -0.0005821273080073297, -0.00044075987534597516, -0.0002993923844769597, -0.00015802495181560516, -1.6657519154250622e-05, 0.00012470992805901915, 0.0002660773752722889, 0.0004074448370374739, 0.0005488122696988285, 0.000690179702360183, 0.0008315471932291985, 0.000972914625890553, 0.0011142820585519075]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 4.0, 8.0, 6.0, 5.0, 5.0, 3.0, 9.0, 12.0, 15.0, 14.0, 16.0, 20.0, 21.0, 30.0, 29.0, 33.0, 39.0, 42.0, 32.0, 31.0, 32.0, 36.0, 41.0, 41.0, 35.0, 36.0, 45.0, 35.0, 33.0, 33.0, 39.0, 25.0, 27.0, 23.0, 27.0, 16.0, 20.0, 13.0, 9.0, 13.0, 11.0, 4.0, 9.0, 5.0, 8.0, 1.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011650919914245605, -0.001124613918364048, -0.0010841358453035355, -0.001043657772243023, -0.0010031796991825104, -0.0009627016261219978, -0.0009222235530614853, -0.0008817454800009727, -0.0008412674069404602, -0.0008007893338799477, -0.0007603112608194351, -0.0007198331877589226, -0.00067935511469841, -0.0006388770416378975, -0.000598398968577385, -0.0005579208955168724, -0.0005174428224563599, -0.0004769647493958473, -0.0004364866763353348, -0.00039600860327482224, -0.0003555305302143097, -0.00031505245715379715, -0.0002745743840932846, -0.00023409631103277206, -0.00019361823797225952, -0.00015314016491174698, -0.00011266209185123444, -7.21840187907219e-05, -3.170594573020935e-05, 8.772127330303192e-06, 4.9250200390815735e-05, 8.972827345132828e-05, 0.00013020634651184082, 0.00017068441957235336, 0.0002111624926328659, 0.00025164056569337845, 0.000292118638753891, 0.00033259671181440353, 0.0003730747848749161, 0.0004135528579354286, 0.00045403093099594116, 0.0004945090040564537, 0.0005349870771169662, 0.0005754651501774788, 0.0006159432232379913, 0.0006564212962985039, 0.0006968993693590164, 0.000737377442419529, 0.0007778555154800415, 0.000818333588540554, 0.0008588116616010666, 0.0008992897346615791, 0.0009397678077220917, 0.0009802458807826042, 0.0010207239538431168, 0.0010612020269036293, 0.0011016800999641418, 0.0011421581730246544, 0.001182636246085167, 0.0012231143191456795, 0.001263592392206192, 0.0013040704652667046, 0.001344548538327217, 0.0013850266113877296, 0.0014255046844482422]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 8.0, 7.0, 15.0, 19.0, 17.0, 18.0, 19.0, 25.0, 26.0, 28.0, 42.0, 35.0, 35.0, 33.0, 40.0, 34.0, 35.0, 43.0, 47.0, 39.0, 50.0, 35.0, 34.0, 35.0, 38.0, 27.0, 28.0, 37.0, 25.0, 23.0, 10.0, 12.0, 9.0, 7.0, 2.0, 8.0, 6.0, 10.0, 4.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-9.5234375, -9.239501953125, -8.95556640625, -8.671630859375, -8.3876953125, -8.103759765625, -7.81982421875, -7.535888671875, -7.251953125, -6.968017578125, -6.68408203125, -6.400146484375, -6.1162109375, -5.832275390625, -5.54833984375, -5.264404296875, -4.98046875, -4.696533203125, -4.41259765625, -4.128662109375, -3.8447265625, -3.560791015625, -3.27685546875, -2.992919921875, -2.708984375, -2.425048828125, -2.14111328125, -1.857177734375, -1.5732421875, -1.289306640625, -1.00537109375, -0.721435546875, -0.4375, -0.153564453125, 0.13037109375, 0.414306640625, 0.6982421875, 0.982177734375, 1.26611328125, 1.550048828125, 1.833984375, 2.117919921875, 2.40185546875, 2.685791015625, 2.9697265625, 3.253662109375, 3.53759765625, 3.821533203125, 4.10546875, 4.389404296875, 4.67333984375, 4.957275390625, 5.2412109375, 5.525146484375, 5.80908203125, 6.093017578125, 6.376953125, 6.660888671875, 6.94482421875, 7.228759765625, 7.5126953125, 7.796630859375, 8.08056640625, 8.364501953125, 8.6484375]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 3.0, 4.0, 4.0, 5.0, 6.0, 15.0, 18.0, 17.0, 26.0, 26.0, 43.0, 41.0, 44.0, 86.0, 102.0, 161.0, 174.0, 275.0, 381.0, 629.0, 1155.0, 2155.0, 4255.0, 10749.0, 29942.0, 85989.0, 243182.0, 385457.0, 182089.0, 63355.0, 21966.0, 8156.0, 3555.0, 1691.0, 938.0, 566.0, 316.0, 240.0, 192.0, 130.0, 113.0, 75.0, 53.0, 40.0, 39.0, 30.0, 16.0, 19.0, 11.0, 7.0, 12.0, 6.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-12.1640625, -11.776123046875, -11.38818359375, -11.000244140625, -10.6123046875, -10.224365234375, -9.83642578125, -9.448486328125, -9.060546875, -8.672607421875, -8.28466796875, -7.896728515625, -7.5087890625, -7.120849609375, -6.73291015625, -6.344970703125, -5.95703125, -5.569091796875, -5.18115234375, -4.793212890625, -4.4052734375, -4.017333984375, -3.62939453125, -3.241455078125, -2.853515625, -2.465576171875, -2.07763671875, -1.689697265625, -1.3017578125, -0.913818359375, -0.52587890625, -0.137939453125, 0.25, 0.637939453125, 1.02587890625, 1.413818359375, 1.8017578125, 2.189697265625, 2.57763671875, 2.965576171875, 3.353515625, 3.741455078125, 4.12939453125, 4.517333984375, 4.9052734375, 5.293212890625, 5.68115234375, 6.069091796875, 6.45703125, 6.844970703125, 7.23291015625, 7.620849609375, 8.0087890625, 8.396728515625, 8.78466796875, 9.172607421875, 9.560546875, 9.948486328125, 10.33642578125, 10.724365234375, 11.1123046875, 11.500244140625, 11.88818359375, 12.276123046875, 12.6640625]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 6.0, 1.0, 9.0, 7.0, 8.0, 12.0, 15.0, 26.0, 21.0, 28.0, 31.0, 32.0, 37.0, 49.0, 59.0, 58.0, 116.0, 193.0, 315.0, 1315.0, 179.0, 102.0, 82.0, 64.0, 48.0, 36.0, 35.0, 32.0, 23.0, 19.0, 12.0, 17.0, 15.0, 16.0, 9.0, 5.0, 5.0, 5.0, 8.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.1875, -22.388427734375, -21.58935546875, -20.790283203125, -19.9912109375, -19.192138671875, -18.39306640625, -17.593994140625, -16.794921875, -15.995849609375, -15.19677734375, -14.397705078125, -13.5986328125, -12.799560546875, -12.00048828125, -11.201416015625, -10.40234375, -9.603271484375, -8.80419921875, -8.005126953125, -7.2060546875, -6.406982421875, -5.60791015625, -4.808837890625, -4.009765625, -3.210693359375, -2.41162109375, -1.612548828125, -0.8134765625, -0.014404296875, 0.78466796875, 1.583740234375, 2.3828125, 3.181884765625, 3.98095703125, 4.780029296875, 5.5791015625, 6.378173828125, 7.17724609375, 7.976318359375, 8.775390625, 9.574462890625, 10.37353515625, 11.172607421875, 11.9716796875, 12.770751953125, 13.56982421875, 14.368896484375, 15.16796875, 15.967041015625, 16.76611328125, 17.565185546875, 18.3642578125, 19.163330078125, 19.96240234375, 20.761474609375, 21.560546875, 22.359619140625, 23.15869140625, 23.957763671875, 24.7568359375, 25.555908203125, 26.35498046875, 27.154052734375, 27.953125]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 2.0, 5.0, 5.0, 14.0, 11.0, 18.0, 17.0, 37.0, 39.0, 76.0, 80.0, 133.0, 222.0, 290.0, 514.0, 1034.0, 5748.0, 507501.0, 2612138.0, 14714.0, 1380.0, 626.0, 375.0, 252.0, 133.0, 101.0, 56.0, 54.0, 29.0, 30.0, 21.0, 18.0, 8.0, 7.0, 4.0, 3.0, 2.0, 1.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.15625, -46.24072265625, -44.3251953125, -42.40966796875, -40.494140625, -38.57861328125, -36.6630859375, -34.74755859375, -32.83203125, -30.91650390625, -29.0009765625, -27.08544921875, -25.169921875, -23.25439453125, -21.3388671875, -19.42333984375, -17.5078125, -15.59228515625, -13.6767578125, -11.76123046875, -9.845703125, -7.93017578125, -6.0146484375, -4.09912109375, -2.18359375, -0.26806640625, 1.6474609375, 3.56298828125, 5.478515625, 7.39404296875, 9.3095703125, 11.22509765625, 13.140625, 15.05615234375, 16.9716796875, 18.88720703125, 20.802734375, 22.71826171875, 24.6337890625, 26.54931640625, 28.46484375, 30.38037109375, 32.2958984375, 34.21142578125, 36.126953125, 38.04248046875, 39.9580078125, 41.87353515625, 43.7890625, 45.70458984375, 47.6201171875, 49.53564453125, 51.451171875, 53.36669921875, 55.2822265625, 57.19775390625, 59.11328125, 61.02880859375, 62.9443359375, 64.85986328125, 66.775390625, 68.69091796875, 70.6064453125, 72.52197265625, 74.4375]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 14.0, 96.0, 318.0, 412.0, 146.0, 23.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.877758026123047, -24.12710952758789, -19.376461029052734, -14.625810623168945, -9.875162124633789, -5.124513626098633, -0.37386322021484375, 4.3767852783203125, 9.127433776855469, 13.878082275390625, 18.62873077392578, 23.37938117980957, 28.130029678344727, 32.88067626953125, 37.63132858276367, 42.38197708129883, 47.132625579833984, 51.88327407836914, 56.6339225769043, 61.38457489013672, 66.13522338867188, 70.88587188720703, 75.63652038574219, 80.38716888427734, 85.1378173828125, 89.88846588134766, 94.63911437988281, 99.38976287841797, 104.14041137695312, 108.89105987548828, 113.64170837402344, 118.39236450195312, 123.14302062988281, 127.89366912841797, 132.64431762695312, 137.3949737548828, 142.14561462402344, 146.89627075195312, 151.64691162109375, 156.39756774902344, 161.14820861816406, 165.89886474609375, 170.64950561523438, 175.40016174316406, 180.1508026123047, 184.90145874023438, 189.652099609375, 194.4027557373047, 199.15341186523438, 203.90406799316406, 208.6547088623047, 213.40536499023438, 218.156005859375, 222.9066619873047, 227.6573028564453, 232.407958984375, 237.15859985351562, 241.9092559814453, 246.65989685058594, 251.41055297851562, 256.16119384765625, 260.9118347167969, 265.6625061035156, 270.41314697265625, 275.1637878417969]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [3.0, 0.0, 6.0, 1.0, 3.0, 4.0, 3.0, 5.0, 5.0, 9.0, 9.0, 6.0, 11.0, 19.0, 19.0, 18.0, 18.0, 19.0, 24.0, 34.0, 34.0, 30.0, 26.0, 34.0, 29.0, 29.0, 31.0, 34.0, 38.0, 45.0, 50.0, 31.0, 25.0, 36.0, 22.0, 36.0, 41.0, 29.0, 25.0, 19.0, 18.0, 22.0, 19.0, 16.0, 23.0, 14.0, 5.0, 9.0, 11.0, 5.0, 7.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-43.03937530517578, -41.484378814697266, -39.92938232421875, -38.374385833740234, -36.81938934326172, -35.2643928527832, -33.70939636230469, -32.15440368652344, -30.59940528869629, -29.044408798217773, -27.489412307739258, -25.934417724609375, -24.37942123413086, -22.824424743652344, -21.269428253173828, -19.714431762695312, -18.159435272216797, -16.60443878173828, -15.049442291259766, -13.494446754455566, -11.93945026397705, -10.384453773498535, -8.829458236694336, -7.27446174621582, -5.719465255737305, -4.164468765258789, -2.6094727516174316, -1.0544767379760742, 0.5005197525024414, 2.055516242980957, 3.6105117797851562, 5.165508270263672, 6.7205047607421875, 8.275501251220703, 9.830497741699219, 11.385493278503418, 12.940489768981934, 14.49548625946045, 16.05048179626465, 17.605478286743164, 19.16047477722168, 20.715471267700195, 22.27046775817871, 23.825462341308594, 25.38045883178711, 26.935455322265625, 28.49045181274414, 30.045448303222656, 31.600444793701172, 33.15544128417969, 34.7104377746582, 36.26543426513672, 37.820430755615234, 39.37542724609375, 40.930419921875, 42.48542022705078, 44.04041290283203, 45.59540939331055, 47.15040588378906, 48.70540237426758, 50.260398864746094, 51.81539535522461, 53.370391845703125, 54.925384521484375, 56.480384826660156]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 5.0, 4.0, 10.0, 9.0, 8.0, 17.0, 14.0, 11.0, 15.0, 19.0, 15.0, 28.0, 24.0, 27.0, 36.0, 29.0, 30.0, 30.0, 47.0, 35.0, 39.0, 38.0, 39.0, 44.0, 42.0, 35.0, 42.0, 41.0, 40.0, 33.0, 31.0, 31.0, 25.0, 16.0, 8.0, 15.0, 11.0, 9.0, 10.0, 5.0, 6.0, 5.0, 9.0, 3.0, 2.0, 3.0, 5.0, 2.0, 0.0, 3.0, 2.0, 2.0], "bins": [-9.4453125, -9.166259765625, -8.88720703125, -8.608154296875, -8.3291015625, -8.050048828125, -7.77099609375, -7.491943359375, -7.212890625, -6.933837890625, -6.65478515625, -6.375732421875, -6.0966796875, -5.817626953125, -5.53857421875, -5.259521484375, -4.98046875, -4.701416015625, -4.42236328125, -4.143310546875, -3.8642578125, -3.585205078125, -3.30615234375, -3.027099609375, -2.748046875, -2.468994140625, -2.18994140625, -1.910888671875, -1.6318359375, -1.352783203125, -1.07373046875, -0.794677734375, -0.515625, -0.236572265625, 0.04248046875, 0.321533203125, 0.6005859375, 0.879638671875, 1.15869140625, 1.437744140625, 1.716796875, 1.995849609375, 2.27490234375, 2.553955078125, 2.8330078125, 3.112060546875, 3.39111328125, 3.670166015625, 3.94921875, 4.228271484375, 4.50732421875, 4.786376953125, 5.0654296875, 5.344482421875, 5.62353515625, 5.902587890625, 6.181640625, 6.460693359375, 6.73974609375, 7.018798828125, 7.2978515625, 7.576904296875, 7.85595703125, 8.135009765625, 8.4140625]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, 7.0, 9.0, 9.0, 21.0, 26.0, 20.0, 25.0, 41.0, 37.0, 67.0, 77.0, 102.0, 140.0, 209.0, 274.0, 373.0, 551.0, 5431.0, 4016327.0, 167874.0, 991.0, 442.0, 288.0, 219.0, 186.0, 123.0, 87.0, 75.0, 47.0, 43.0, 27.0, 28.0, 18.0, 17.0, 17.0, 15.0, 5.0, 8.0, 4.0, 5.0, 7.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-90.6875, -87.9990234375, -85.310546875, -82.6220703125, -79.93359375, -77.2451171875, -74.556640625, -71.8681640625, -69.1796875, -66.4912109375, -63.802734375, -61.1142578125, -58.42578125, -55.7373046875, -53.048828125, -50.3603515625, -47.671875, -44.9833984375, -42.294921875, -39.6064453125, -36.91796875, -34.2294921875, -31.541015625, -28.8525390625, -26.1640625, -23.4755859375, -20.787109375, -18.0986328125, -15.41015625, -12.7216796875, -10.033203125, -7.3447265625, -4.65625, -1.9677734375, 0.720703125, 3.4091796875, 6.09765625, 8.7861328125, 11.474609375, 14.1630859375, 16.8515625, 19.5400390625, 22.228515625, 24.9169921875, 27.60546875, 30.2939453125, 32.982421875, 35.6708984375, 38.359375, 41.0478515625, 43.736328125, 46.4248046875, 49.11328125, 51.8017578125, 54.490234375, 57.1787109375, 59.8671875, 62.5556640625, 65.244140625, 67.9326171875, 70.62109375, 73.3095703125, 75.998046875, 78.6865234375, 81.375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 15.0, 8.0, 13.0, 21.0, 19.0, 31.0, 50.0, 54.0, 108.0, 123.0, 197.0, 305.0, 474.0, 602.0, 613.0, 439.0, 321.0, 207.0, 144.0, 99.0, 65.0, 35.0, 39.0, 24.0, 18.0, 16.0, 14.0, 9.0, 5.0, 2.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.234375, -19.68798828125, -19.1416015625, -18.59521484375, -18.048828125, -17.50244140625, -16.9560546875, -16.40966796875, -15.86328125, -15.31689453125, -14.7705078125, -14.22412109375, -13.677734375, -13.13134765625, -12.5849609375, -12.03857421875, -11.4921875, -10.94580078125, -10.3994140625, -9.85302734375, -9.306640625, -8.76025390625, -8.2138671875, -7.66748046875, -7.12109375, -6.57470703125, -6.0283203125, -5.48193359375, -4.935546875, -4.38916015625, -3.8427734375, -3.29638671875, -2.75, -2.20361328125, -1.6572265625, -1.11083984375, -0.564453125, -0.01806640625, 0.5283203125, 1.07470703125, 1.62109375, 2.16748046875, 2.7138671875, 3.26025390625, 3.806640625, 4.35302734375, 4.8994140625, 5.44580078125, 5.9921875, 6.53857421875, 7.0849609375, 7.63134765625, 8.177734375, 8.72412109375, 9.2705078125, 9.81689453125, 10.36328125, 10.90966796875, 11.4560546875, 12.00244140625, 12.548828125, 13.09521484375, 13.6416015625, 14.18798828125, 14.734375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 5.0, 7.0, 10.0, 13.0, 22.0, 31.0, 36.0, 51.0, 83.0, 147.0, 289.0, 1161.0, 47756.0, 4042788.0, 99493.0, 1630.0, 322.0, 165.0, 82.0, 55.0, 33.0, 31.0, 26.0, 22.0, 11.0, 9.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.6875, -48.44140625, -46.1953125, -43.94921875, -41.703125, -39.45703125, -37.2109375, -34.96484375, -32.71875, -30.47265625, -28.2265625, -25.98046875, -23.734375, -21.48828125, -19.2421875, -16.99609375, -14.75, -12.50390625, -10.2578125, -8.01171875, -5.765625, -3.51953125, -1.2734375, 0.97265625, 3.21875, 5.46484375, 7.7109375, 9.95703125, 12.203125, 14.44921875, 16.6953125, 18.94140625, 21.1875, 23.43359375, 25.6796875, 27.92578125, 30.171875, 32.41796875, 34.6640625, 36.91015625, 39.15625, 41.40234375, 43.6484375, 45.89453125, 48.140625, 50.38671875, 52.6328125, 54.87890625, 57.125, 59.37109375, 61.6171875, 63.86328125, 66.109375, 68.35546875, 70.6015625, 72.84765625, 75.09375, 77.33984375, 79.5859375, 81.83203125, 84.078125, 86.32421875, 88.5703125, 90.81640625, 93.0625]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 28.0, 180.0, 483.0, 259.0, 57.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-216.067626953125, -209.0161895751953, -201.96475219726562, -194.91331481933594, -187.86187744140625, -180.8104248046875, -173.75900268554688, -166.70755004882812, -159.65611267089844, -152.60467529296875, -145.55323791503906, -138.50180053710938, -131.4503631591797, -124.39891815185547, -117.34748077392578, -110.29603576660156, -103.2446060180664, -96.19316864013672, -89.14173126220703, -82.09028625488281, -75.03884887695312, -67.98741149902344, -60.93597412109375, -53.8845329284668, -46.83309555053711, -39.78165817260742, -32.73021697998047, -25.67877960205078, -18.62734031677246, -11.57590103149414, -4.524463653564453, 2.5269775390625, 9.578414916992188, 16.629854202270508, 23.681293487548828, 30.732730865478516, 37.78417205810547, 44.835609436035156, 51.887046813964844, 58.9384880065918, 65.98992919921875, 73.04136657714844, 80.09280395507812, 87.14424133300781, 94.19568634033203, 101.24712371826172, 108.2985610961914, 115.35000610351562, 122.40143585205078, 129.452880859375, 136.5043182373047, 143.55575561523438, 150.60719299316406, 157.65863037109375, 164.71006774902344, 171.76150512695312, 178.8129425048828, 185.8643798828125, 192.9158172607422, 199.96725463867188, 207.01869201660156, 214.07012939453125, 221.12158203125, 228.1730194091797, 235.22445678710938]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 3.0, 8.0, 4.0, 8.0, 5.0, 2.0, 12.0, 16.0, 17.0, 17.0, 16.0, 21.0, 16.0, 23.0, 31.0, 33.0, 37.0, 33.0, 33.0, 40.0, 43.0, 49.0, 29.0, 35.0, 40.0, 46.0, 31.0, 39.0, 37.0, 34.0, 34.0, 32.0, 25.0, 18.0, 20.0, 17.0, 18.0, 14.0, 11.0, 7.0, 10.0, 10.0, 12.0, 6.0, 9.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-46.22955322265625, -44.726890563964844, -43.22422409057617, -41.721561431884766, -40.21889877319336, -38.71623229980469, -37.21356964111328, -35.710906982421875, -34.20824432373047, -32.70558166503906, -31.202917098999023, -29.700252532958984, -28.197589874267578, -26.69492530822754, -25.1922607421875, -23.689598083496094, -22.186931610107422, -20.684267044067383, -19.181604385375977, -17.678939819335938, -16.17627716064453, -14.673612594604492, -13.170948028564453, -11.66828441619873, -10.165620803833008, -8.662957191467285, -7.160293102264404, -5.657629013061523, -4.154965400695801, -2.652301788330078, -1.149637222290039, 0.3530263900756836, 1.8556861877441406, 3.3583500385284424, 4.861013889312744, 6.363677978515625, 7.866341590881348, 9.36900520324707, 10.87166976928711, 12.374333381652832, 13.876996994018555, 15.379660606384277, 16.88232421875, 18.38498878479004, 19.887653350830078, 21.390316009521484, 22.892980575561523, 24.395645141601562, 25.89830780029297, 27.400972366333008, 28.903635025024414, 30.406299591064453, 31.90896224975586, 33.41162872314453, 34.91429138183594, 36.416954040527344, 37.91961669921875, 39.422279357910156, 40.92494583129883, 42.427608489990234, 43.93027114868164, 45.43293762207031, 46.93560028076172, 48.438262939453125, 49.9409294128418]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 4.0, 6.0, 7.0, 8.0, 8.0, 9.0, 13.0, 11.0, 18.0, 9.0, 14.0, 17.0, 20.0, 25.0, 21.0, 37.0, 32.0, 30.0, 29.0, 39.0, 48.0, 41.0, 48.0, 41.0, 39.0, 35.0, 40.0, 34.0, 39.0, 26.0, 30.0, 30.0, 26.0, 28.0, 21.0, 17.0, 13.0, 10.0, 15.0, 12.0, 11.0, 6.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0], "bins": [-8.7734375, -8.5087890625, -8.244140625, -7.9794921875, -7.71484375, -7.4501953125, -7.185546875, -6.9208984375, -6.65625, -6.3916015625, -6.126953125, -5.8623046875, -5.59765625, -5.3330078125, -5.068359375, -4.8037109375, -4.5390625, -4.2744140625, -4.009765625, -3.7451171875, -3.48046875, -3.2158203125, -2.951171875, -2.6865234375, -2.421875, -2.1572265625, -1.892578125, -1.6279296875, -1.36328125, -1.0986328125, -0.833984375, -0.5693359375, -0.3046875, -0.0400390625, 0.224609375, 0.4892578125, 0.75390625, 1.0185546875, 1.283203125, 1.5478515625, 1.8125, 2.0771484375, 2.341796875, 2.6064453125, 2.87109375, 3.1357421875, 3.400390625, 3.6650390625, 3.9296875, 4.1943359375, 4.458984375, 4.7236328125, 4.98828125, 5.2529296875, 5.517578125, 5.7822265625, 6.046875, 6.3115234375, 6.576171875, 6.8408203125, 7.10546875, 7.3701171875, 7.634765625, 7.8994140625, 8.1640625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 8.0, 4.0, 10.0, 12.0, 14.0, 15.0, 44.0, 44.0, 74.0, 111.0, 156.0, 260.0, 419.0, 622.0, 948.0, 1555.0, 2466.0, 3776.0, 5955.0, 9553.0, 15205.0, 24512.0, 39350.0, 64575.0, 105843.0, 170705.0, 212473.0, 149359.0, 91654.0, 56316.0, 34463.0, 21649.0, 13455.0, 8419.0, 5319.0, 3344.0, 2121.0, 1317.0, 869.0, 560.0, 340.0, 229.0, 165.0, 104.0, 53.0, 45.0, 23.0, 23.0, 8.0, 6.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.76708984375, -0.7433853149414062, -0.7196807861328125, -0.6959762573242188, -0.672271728515625, -0.6485671997070312, -0.6248626708984375, -0.6011581420898438, -0.57745361328125, -0.5537490844726562, -0.5300445556640625, -0.5063400268554688, -0.482635498046875, -0.45893096923828125, -0.4352264404296875, -0.41152191162109375, -0.3878173828125, -0.36411285400390625, -0.3404083251953125, -0.31670379638671875, -0.292999267578125, -0.26929473876953125, -0.2455902099609375, -0.22188568115234375, -0.19818115234375, -0.17447662353515625, -0.1507720947265625, -0.12706756591796875, -0.103363037109375, -0.07965850830078125, -0.0559539794921875, -0.03224945068359375, -0.008544921875, 0.01515960693359375, 0.0388641357421875, 0.06256866455078125, 0.086273193359375, 0.10997772216796875, 0.1336822509765625, 0.15738677978515625, 0.18109130859375, 0.20479583740234375, 0.2285003662109375, 0.25220489501953125, 0.275909423828125, 0.29961395263671875, 0.3233184814453125, 0.34702301025390625, 0.3707275390625, 0.39443206787109375, 0.4181365966796875, 0.44184112548828125, 0.465545654296875, 0.48925018310546875, 0.5129547119140625, 0.5366592407226562, 0.56036376953125, 0.5840682983398438, 0.6077728271484375, 0.6314773559570312, 0.655181884765625, 0.6788864135742188, 0.7025909423828125, 0.7262954711914062, 0.75]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 5.0, 2.0, 4.0, 2.0, 6.0, 2.0, 7.0, 7.0, 14.0, 8.0, 10.0, 24.0, 13.0, 18.0, 23.0, 33.0, 30.0, 28.0, 30.0, 24.0, 36.0, 37.0, 28.0, 35.0, 35.0, 1072.0, 43.0, 42.0, 51.0, 39.0, 43.0, 28.0, 37.0, 22.0, 30.0, 25.0, 27.0, 25.0, 12.0, 14.0, 12.0, 12.0, 8.0, 3.0, 9.0, 5.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.3828125, -5.20819091796875, -5.0335693359375, -4.85894775390625, -4.684326171875, -4.50970458984375, -4.3350830078125, -4.16046142578125, -3.98583984375, -3.81121826171875, -3.6365966796875, -3.46197509765625, -3.287353515625, -3.11273193359375, -2.9381103515625, -2.76348876953125, -2.5888671875, -2.41424560546875, -2.2396240234375, -2.06500244140625, -1.890380859375, -1.71575927734375, -1.5411376953125, -1.36651611328125, -1.19189453125, -1.01727294921875, -0.8426513671875, -0.66802978515625, -0.493408203125, -0.31878662109375, -0.1441650390625, 0.03045654296875, 0.205078125, 0.37969970703125, 0.5543212890625, 0.72894287109375, 0.903564453125, 1.07818603515625, 1.2528076171875, 1.42742919921875, 1.60205078125, 1.77667236328125, 1.9512939453125, 2.12591552734375, 2.300537109375, 2.47515869140625, 2.6497802734375, 2.82440185546875, 2.9990234375, 3.17364501953125, 3.3482666015625, 3.52288818359375, 3.697509765625, 3.87213134765625, 4.0467529296875, 4.22137451171875, 4.39599609375, 4.57061767578125, 4.7452392578125, 4.91986083984375, 5.094482421875, 5.26910400390625, 5.4437255859375, 5.61834716796875, 5.79296875]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 3.0, 2.0, 7.0, 12.0, 19.0, 35.0, 50.0, 58.0, 105.0, 169.0, 274.0, 369.0, 580.0, 903.0, 1431.0, 2306.0, 3363.0, 5320.0, 8153.0, 12096.0, 18964.0, 29562.0, 45161.0, 70594.0, 108848.0, 156427.0, 1225780.0, 139834.0, 93794.0, 60715.0, 39320.0, 25464.0, 16421.0, 10900.0, 6989.0, 4616.0, 2940.0, 1894.0, 1318.0, 797.0, 548.0, 365.0, 215.0, 140.0, 92.0, 67.0, 42.0, 33.0, 20.0, 10.0, 4.0, 6.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.6181640625, -0.60015869140625, -0.5821533203125, -0.56414794921875, -0.546142578125, -0.52813720703125, -0.5101318359375, -0.49212646484375, -0.47412109375, -0.45611572265625, -0.4381103515625, -0.42010498046875, -0.402099609375, -0.38409423828125, -0.3660888671875, -0.34808349609375, -0.330078125, -0.31207275390625, -0.2940673828125, -0.27606201171875, -0.258056640625, -0.24005126953125, -0.2220458984375, -0.20404052734375, -0.18603515625, -0.16802978515625, -0.1500244140625, -0.13201904296875, -0.114013671875, -0.09600830078125, -0.0780029296875, -0.05999755859375, -0.0419921875, -0.02398681640625, -0.0059814453125, 0.01202392578125, 0.030029296875, 0.04803466796875, 0.0660400390625, 0.08404541015625, 0.10205078125, 0.12005615234375, 0.1380615234375, 0.15606689453125, 0.174072265625, 0.19207763671875, 0.2100830078125, 0.22808837890625, 0.24609375, 0.26409912109375, 0.2821044921875, 0.30010986328125, 0.318115234375, 0.33612060546875, 0.3541259765625, 0.37213134765625, 0.39013671875, 0.40814208984375, 0.4261474609375, 0.44415283203125, 0.462158203125, 0.48016357421875, 0.4981689453125, 0.51617431640625, 0.5341796875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 1.0, 1.0, 2.0, 7.0, 12.0, 20.0, 14.0, 22.0, 25.0, 15.0, 24.0, 34.0, 44.0, 49.0, 68.0, 64.0, 61.0, 61.0, 74.0, 65.0, 67.0, 51.0, 36.0, 35.0, 34.0, 32.0, 20.0, 11.0, 14.0, 10.0, 6.0, 5.0, 6.0, 5.0, 2.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0025310516357421875, -0.002453625202178955, -0.0023761987686157227, -0.0022987723350524902, -0.002221345901489258, -0.0021439194679260254, -0.002066493034362793, -0.0019890666007995605, -0.0019116401672363281, -0.0018342137336730957, -0.0017567873001098633, -0.0016793608665466309, -0.0016019344329833984, -0.001524507999420166, -0.0014470815658569336, -0.0013696551322937012, -0.0012922286987304688, -0.0012148022651672363, -0.001137375831604004, -0.0010599493980407715, -0.000982522964477539, -0.0009050965309143066, -0.0008276700973510742, -0.0007502436637878418, -0.0006728172302246094, -0.000595390796661377, -0.0005179643630981445, -0.0004405379295349121, -0.0003631114959716797, -0.00028568506240844727, -0.00020825862884521484, -0.00013083219528198242, -5.340576171875e-05, 2.4020671844482422e-05, 0.00010144710540771484, 0.00017887353897094727, 0.0002562999725341797, 0.0003337264060974121, 0.00041115283966064453, 0.000488579273223877, 0.0005660057067871094, 0.0006434321403503418, 0.0007208585739135742, 0.0007982850074768066, 0.0008757114410400391, 0.0009531378746032715, 0.001030564308166504, 0.0011079907417297363, 0.0011854171752929688, 0.0012628436088562012, 0.0013402700424194336, 0.001417696475982666, 0.0014951229095458984, 0.0015725493431091309, 0.0016499757766723633, 0.0017274022102355957, 0.0018048286437988281, 0.0018822550773620605, 0.001959681510925293, 0.0020371079444885254, 0.002114534378051758, 0.0021919608116149902, 0.0022693872451782227, 0.002346813678741455, 0.0024242401123046875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 5.0, 3.0, 7.0, 10.0, 13.0, 15.0, 14.0, 25.0, 38.0, 40.0, 70.0, 82.0, 121.0, 173.0, 276.0, 565.0, 1134.0, 88656.0, 953666.0, 1864.0, 652.0, 386.0, 208.0, 132.0, 92.0, 58.0, 52.0, 46.0, 41.0, 38.0, 29.0, 13.0, 4.0, 2.0, 7.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.046356201171875, -0.04486894607543945, -0.043381690979003906, -0.04189443588256836, -0.04040718078613281, -0.038919925689697266, -0.03743267059326172, -0.03594541549682617, -0.034458160400390625, -0.03297090530395508, -0.03148365020751953, -0.029996395111083984, -0.028509140014648438, -0.02702188491821289, -0.025534629821777344, -0.024047374725341797, -0.02256011962890625, -0.021072864532470703, -0.019585609436035156, -0.01809835433959961, -0.016611099243164062, -0.015123844146728516, -0.013636589050292969, -0.012149333953857422, -0.010662078857421875, -0.009174823760986328, -0.007687568664550781, -0.006200313568115234, -0.0047130584716796875, -0.0032258033752441406, -0.0017385482788085938, -0.0002512931823730469, 0.0012359619140625, 0.002723217010498047, 0.004210472106933594, 0.005697727203369141, 0.0071849822998046875, 0.008672237396240234, 0.010159492492675781, 0.011646747589111328, 0.013134002685546875, 0.014621257781982422, 0.01610851287841797, 0.017595767974853516, 0.019083023071289062, 0.02057027816772461, 0.022057533264160156, 0.023544788360595703, 0.02503204345703125, 0.026519298553466797, 0.028006553649902344, 0.02949380874633789, 0.030981063842773438, 0.032468318939208984, 0.03395557403564453, 0.03544282913208008, 0.036930084228515625, 0.03841733932495117, 0.03990459442138672, 0.041391849517822266, 0.04287910461425781, 0.04436635971069336, 0.045853614807128906, 0.04734086990356445, 0.048828125]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 4.0, 27.0, 71.0, 151.0, 275.0, 258.0, 139.0, 61.0, 16.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00186209287494421, -0.0017769852420315146, -0.001691877725534141, -0.0016067700926214457, -0.001521662576124072, -0.0014365549432113767, -0.0013514473102986813, -0.0012663397938013077, -0.0011812321608886123, -0.0010961245279759169, -0.0010110170114785433, -0.0009259093785658479, -0.0008408018038608134, -0.0007556942291557789, -0.0006705865962430835, -0.000585479021538049, -0.0005003714468330145, -0.00041526387212798, -0.00033015626831911504, -0.0002450486645102501, -0.0001599410898052156, -7.48335151001811e-05, 1.0274117812514305e-05, 9.53816925175488e-05, 0.0001804892672225833, 0.0002655968419276178, 0.00035070444573648274, 0.0004358120495453477, 0.0005209196242503822, 0.0006060271989554167, 0.0006911348318681121, 0.0007762424065731466, 0.0008613502141088247, 0.0009464577888138592, 0.0010315653635188937, 0.0011166729964315891, 0.0012017805129289627, 0.0012868881458416581, 0.0013719957787543535, 0.001457103295251727, 0.0015422109281644225, 0.001627318561077118, 0.0017124260775744915, 0.001797533710487187, 0.0018826413433998823, 0.001967748859897256, 0.002052856609225273, 0.0021379641257226467, 0.0022230716422200203, 0.002308179158717394, 0.002393286908045411, 0.0024783944245427847, 0.0025635019410401583, 0.0026486096903681755, 0.002733717206865549, 0.0028188247233629227, 0.0029039322398602962, 0.00298903975635767, 0.003074147505685687, 0.0031592550221830606, 0.0032443625386804342, 0.0033294702880084515, 0.003414577804505825, 0.0034996853210031986, 0.003584793070331216]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 3.0, 8.0, 8.0, 3.0, 9.0, 6.0, 8.0, 14.0, 8.0, 23.0, 14.0, 18.0, 28.0, 33.0, 27.0, 37.0, 44.0, 36.0, 42.0, 37.0, 51.0, 45.0, 36.0, 35.0, 42.0, 41.0, 51.0, 40.0, 31.0, 43.0, 22.0, 28.0, 22.0, 19.0, 17.0, 20.0, 12.0, 9.0, 9.0, 9.0, 10.0, 2.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.001218259334564209, -0.0011798962950706482, -0.0011415332555770874, -0.0011031702160835266, -0.0010648071765899658, -0.001026444137096405, -0.0009880810976028442, -0.0009497180581092834, -0.0009113550186157227, -0.0008729919791221619, -0.0008346289396286011, -0.0007962659001350403, -0.0007579028606414795, -0.0007195398211479187, -0.0006811767816543579, -0.0006428137421607971, -0.0006044507026672363, -0.0005660876631736755, -0.0005277246236801147, -0.000489361584186554, -0.00045099854469299316, -0.0004126355051994324, -0.0003742724657058716, -0.0003359094262123108, -0.00029754638671875, -0.0002591833472251892, -0.00022082030773162842, -0.00018245726823806763, -0.00014409422874450684, -0.00010573118925094604, -6.736814975738525e-05, -2.9005110263824463e-05, 9.357929229736328e-06, 4.772096872329712e-05, 8.608400821685791e-05, 0.0001244470477104187, 0.0001628100872039795, 0.00020117312669754028, 0.00023953616619110107, 0.00027789920568466187, 0.00031626224517822266, 0.00035462528467178345, 0.00039298832416534424, 0.00043135136365890503, 0.0004697144031524658, 0.0005080774426460266, 0.0005464404821395874, 0.0005848035216331482, 0.000623166561126709, 0.0006615296006202698, 0.0006998926401138306, 0.0007382556796073914, 0.0007766187191009521, 0.0008149817585945129, 0.0008533447980880737, 0.0008917078375816345, 0.0009300708770751953, 0.0009684339165687561, 0.001006796956062317, 0.0010451599955558777, 0.0010835230350494385, 0.0011218860745429993, 0.00116024911403656, 0.0011986121535301208, 0.0012369751930236816]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 4.0, 6.0, 7.0, 8.0, 8.0, 9.0, 13.0, 11.0, 18.0, 9.0, 14.0, 17.0, 20.0, 26.0, 20.0, 37.0, 32.0, 30.0, 29.0, 39.0, 48.0, 41.0, 48.0, 41.0, 39.0, 35.0, 40.0, 34.0, 39.0, 26.0, 30.0, 30.0, 26.0, 29.0, 20.0, 17.0, 13.0, 10.0, 15.0, 12.0, 11.0, 6.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0], "bins": [-8.7734375, -8.5087890625, -8.244140625, -7.9794921875, -7.71484375, -7.4501953125, -7.185546875, -6.9208984375, -6.65625, -6.3916015625, -6.126953125, -5.8623046875, -5.59765625, -5.3330078125, -5.068359375, -4.8037109375, -4.5390625, -4.2744140625, -4.009765625, -3.7451171875, -3.48046875, -3.2158203125, -2.951171875, -2.6865234375, -2.421875, -2.1572265625, -1.892578125, -1.6279296875, -1.36328125, -1.0986328125, -0.833984375, -0.5693359375, -0.3046875, -0.0400390625, 0.224609375, 0.4892578125, 0.75390625, 1.0185546875, 1.283203125, 1.5478515625, 1.8125, 2.0771484375, 2.341796875, 2.6064453125, 2.87109375, 3.1357421875, 3.400390625, 3.6650390625, 3.9296875, 4.1943359375, 4.458984375, 4.7236328125, 4.98828125, 5.2529296875, 5.517578125, 5.7822265625, 6.046875, 6.3115234375, 6.576171875, 6.8408203125, 7.10546875, 7.3701171875, 7.634765625, 7.8994140625, 8.1640625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 10.0, 12.0, 21.0, 8.0, 13.0, 24.0, 27.0, 36.0, 52.0, 67.0, 104.0, 101.0, 130.0, 162.0, 223.0, 301.0, 353.0, 438.0, 606.0, 833.0, 1188.0, 1819.0, 3931.0, 13343.0, 61184.0, 267144.0, 484489.0, 159775.0, 35424.0, 8145.0, 2838.0, 1573.0, 998.0, 710.0, 513.0, 411.0, 345.0, 285.0, 201.0, 150.0, 133.0, 106.0, 63.0, 51.0, 42.0, 44.0, 28.0, 15.0, 21.0, 14.0, 13.0, 12.0, 11.0, 8.0, 3.0, 4.0, 3.0], "bins": [-17.359375, -16.83837890625, -16.3173828125, -15.79638671875, -15.275390625, -14.75439453125, -14.2333984375, -13.71240234375, -13.19140625, -12.67041015625, -12.1494140625, -11.62841796875, -11.107421875, -10.58642578125, -10.0654296875, -9.54443359375, -9.0234375, -8.50244140625, -7.9814453125, -7.46044921875, -6.939453125, -6.41845703125, -5.8974609375, -5.37646484375, -4.85546875, -4.33447265625, -3.8134765625, -3.29248046875, -2.771484375, -2.25048828125, -1.7294921875, -1.20849609375, -0.6875, -0.16650390625, 0.3544921875, 0.87548828125, 1.396484375, 1.91748046875, 2.4384765625, 2.95947265625, 3.48046875, 4.00146484375, 4.5224609375, 5.04345703125, 5.564453125, 6.08544921875, 6.6064453125, 7.12744140625, 7.6484375, 8.16943359375, 8.6904296875, 9.21142578125, 9.732421875, 10.25341796875, 10.7744140625, 11.29541015625, 11.81640625, 12.33740234375, 12.8583984375, 13.37939453125, 13.900390625, 14.42138671875, 14.9423828125, 15.46337890625, 15.984375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 6.0, 4.0, 5.0, 5.0, 7.0, 9.0, 11.0, 12.0, 10.0, 12.0, 17.0, 25.0, 21.0, 33.0, 35.0, 38.0, 45.0, 59.0, 84.0, 105.0, 200.0, 1403.0, 295.0, 144.0, 86.0, 50.0, 36.0, 41.0, 37.0, 35.0, 30.0, 18.0, 22.0, 22.0, 20.0, 15.0, 10.0, 12.0, 10.0, 7.0, 10.0, 5.0, 1.0, 2.0, 2.0, 2.0, 4.0], "bins": [-27.84375, -27.125244140625, -26.40673828125, -25.688232421875, -24.9697265625, -24.251220703125, -23.53271484375, -22.814208984375, -22.095703125, -21.377197265625, -20.65869140625, -19.940185546875, -19.2216796875, -18.503173828125, -17.78466796875, -17.066162109375, -16.34765625, -15.629150390625, -14.91064453125, -14.192138671875, -13.4736328125, -12.755126953125, -12.03662109375, -11.318115234375, -10.599609375, -9.881103515625, -9.16259765625, -8.444091796875, -7.7255859375, -7.007080078125, -6.28857421875, -5.570068359375, -4.8515625, -4.133056640625, -3.41455078125, -2.696044921875, -1.9775390625, -1.259033203125, -0.54052734375, 0.177978515625, 0.896484375, 1.614990234375, 2.33349609375, 3.052001953125, 3.7705078125, 4.489013671875, 5.20751953125, 5.926025390625, 6.64453125, 7.363037109375, 8.08154296875, 8.800048828125, 9.5185546875, 10.237060546875, 10.95556640625, 11.674072265625, 12.392578125, 13.111083984375, 13.82958984375, 14.548095703125, 15.2666015625, 15.985107421875, 16.70361328125, 17.422119140625, 18.140625]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 11.0, 7.0, 10.0, 9.0, 11.0, 12.0, 22.0, 14.0, 29.0, 28.0, 37.0, 58.0, 89.0, 118.0, 158.0, 198.0, 293.0, 597.0, 1167.0, 6187.0, 3126867.0, 6800.0, 1299.0, 545.0, 323.0, 195.0, 157.0, 126.0, 72.0, 63.0, 46.0, 36.0, 28.0, 13.0, 19.0, 9.0, 7.0, 8.0, 8.0, 7.0, 4.0, 5.0, 3.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-111.25, -107.828125, -104.40625, -100.984375, -97.5625, -94.140625, -90.71875, -87.296875, -83.875, -80.453125, -77.03125, -73.609375, -70.1875, -66.765625, -63.34375, -59.921875, -56.5, -53.078125, -49.65625, -46.234375, -42.8125, -39.390625, -35.96875, -32.546875, -29.125, -25.703125, -22.28125, -18.859375, -15.4375, -12.015625, -8.59375, -5.171875, -1.75, 1.671875, 5.09375, 8.515625, 11.9375, 15.359375, 18.78125, 22.203125, 25.625, 29.046875, 32.46875, 35.890625, 39.3125, 42.734375, 46.15625, 49.578125, 53.0, 56.421875, 59.84375, 63.265625, 66.6875, 70.109375, 73.53125, 76.953125, 80.375, 83.796875, 87.21875, 90.640625, 94.0625, 97.484375, 100.90625, 104.328125, 107.75]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 11.0, 51.0, 139.0, 250.0, 271.0, 186.0, 78.0, 17.0, 8.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-147.3551483154297, -143.85218811035156, -140.3492431640625, -136.84628295898438, -133.3433380126953, -129.8403778076172, -126.33743286132812, -122.83447265625, -119.33152770996094, -115.82857513427734, -112.32562255859375, -108.82266998291016, -105.31971740722656, -101.81676483154297, -98.31381225585938, -94.81085205078125, -91.30789947509766, -87.80494689941406, -84.30199432373047, -80.79904174804688, -77.29608917236328, -73.79313659667969, -70.29017639160156, -66.7872314453125, -63.28427505493164, -59.78132247924805, -56.27836990356445, -52.775413513183594, -49.2724609375, -45.769508361816406, -42.26655578613281, -38.76360321044922, -35.260650634765625, -31.75769805908203, -28.254745483398438, -24.75179100036621, -21.248838424682617, -17.745885848999023, -14.242931365966797, -10.739978790283203, -7.237026214599609, -3.7340731620788574, -0.23112010955810547, 3.2718334197998047, 6.774785995483398, 10.277738571166992, 13.780693054199219, 17.283645629882812, 20.786598205566406, 24.28955078125, 27.792503356933594, 31.29545783996582, 34.79840850830078, 38.301361083984375, 41.804317474365234, 45.30727005004883, 48.81022262573242, 52.313175201416016, 55.81612777709961, 59.31908416748047, 62.82203674316406, 66.32498931884766, 69.82794189453125, 73.33089447021484, 76.83384704589844]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 1.0, 2.0, 5.0, 9.0, 10.0, 14.0, 18.0, 19.0, 24.0, 14.0, 18.0, 34.0, 29.0, 41.0, 37.0, 37.0, 47.0, 48.0, 49.0, 47.0, 50.0, 39.0, 39.0, 55.0, 42.0, 38.0, 15.0, 35.0, 28.0, 25.0, 17.0, 22.0, 22.0, 9.0, 5.0, 8.0, 10.0, 9.0, 9.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.86651611328125, -55.76604080200195, -53.665565490722656, -51.56509017944336, -49.46461486816406, -47.36414337158203, -45.263668060302734, -43.16319274902344, -41.06271743774414, -38.962242126464844, -36.86176681518555, -34.76129150390625, -32.66082000732422, -30.56034278869629, -28.459869384765625, -26.359394073486328, -24.25891876220703, -22.158443450927734, -20.057968139648438, -17.957494735717773, -15.857019424438477, -13.75654411315918, -11.6560697555542, -9.555595397949219, -7.455120086669922, -5.354645252227783, -3.2541704177856445, -1.1536955833435059, 0.9467792510986328, 3.0472545623779297, 5.14772891998291, 7.248203277587891, 9.348678588867188, 11.449153900146484, 13.549628257751465, 15.650102615356445, 17.750577926635742, 19.85105323791504, 21.951526641845703, 24.052001953125, 26.152477264404297, 28.252952575683594, 30.35342788696289, 32.45390319824219, 34.55437469482422, 36.65485382080078, 38.75532531738281, 40.85580062866211, 42.956275939941406, 45.0567512512207, 47.1572265625, 49.2577018737793, 51.358177185058594, 53.458648681640625, 55.55912399291992, 57.65959930419922, 59.760074615478516, 61.86054992675781, 63.96102523803711, 66.0615005493164, 68.16197204589844, 70.262451171875, 72.36292266845703, 74.46339416503906, 76.56387329101562]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 3.0, 3.0, 7.0, 5.0, 6.0, 8.0, 6.0, 9.0, 10.0, 6.0, 9.0, 10.0, 12.0, 18.0, 13.0, 27.0, 27.0, 32.0, 28.0, 38.0, 30.0, 38.0, 43.0, 53.0, 36.0, 37.0, 41.0, 40.0, 37.0, 36.0, 30.0, 36.0, 25.0, 28.0, 28.0, 29.0, 28.0, 17.0, 18.0, 13.0, 11.0, 23.0, 10.0, 7.0, 5.0, 6.0, 2.0, 5.0, 7.0, 3.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0], "bins": [-8.8671875, -8.6043701171875, -8.341552734375, -8.0787353515625, -7.81591796875, -7.5531005859375, -7.290283203125, -7.0274658203125, -6.7646484375, -6.5018310546875, -6.239013671875, -5.9761962890625, -5.71337890625, -5.4505615234375, -5.187744140625, -4.9249267578125, -4.662109375, -4.3992919921875, -4.136474609375, -3.8736572265625, -3.61083984375, -3.3480224609375, -3.085205078125, -2.8223876953125, -2.5595703125, -2.2967529296875, -2.033935546875, -1.7711181640625, -1.50830078125, -1.2454833984375, -0.982666015625, -0.7198486328125, -0.45703125, -0.1942138671875, 0.068603515625, 0.3314208984375, 0.59423828125, 0.8570556640625, 1.119873046875, 1.3826904296875, 1.6455078125, 1.9083251953125, 2.171142578125, 2.4339599609375, 2.69677734375, 2.9595947265625, 3.222412109375, 3.4852294921875, 3.748046875, 4.0108642578125, 4.273681640625, 4.5364990234375, 4.79931640625, 5.0621337890625, 5.324951171875, 5.5877685546875, 5.8505859375, 6.1134033203125, 6.376220703125, 6.6390380859375, 6.90185546875, 7.1646728515625, 7.427490234375, 7.6903076171875, 7.953125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 5.0, 5.0, 6.0, 12.0, 21.0, 38.0, 64.0, 112.0, 225.0, 421.0, 982.0, 2540.0, 9752.0, 78476.0, 1185451.0, 2583370.0, 302907.0, 22939.0, 4408.0, 1363.0, 595.0, 272.0, 151.0, 71.0, 36.0, 23.0, 17.0, 3.0, 8.0, 6.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.96875, -19.235595703125, -18.50244140625, -17.769287109375, -17.0361328125, -16.302978515625, -15.56982421875, -14.836669921875, -14.103515625, -13.370361328125, -12.63720703125, -11.904052734375, -11.1708984375, -10.437744140625, -9.70458984375, -8.971435546875, -8.23828125, -7.505126953125, -6.77197265625, -6.038818359375, -5.3056640625, -4.572509765625, -3.83935546875, -3.106201171875, -2.373046875, -1.639892578125, -0.90673828125, -0.173583984375, 0.5595703125, 1.292724609375, 2.02587890625, 2.759033203125, 3.4921875, 4.225341796875, 4.95849609375, 5.691650390625, 6.4248046875, 7.157958984375, 7.89111328125, 8.624267578125, 9.357421875, 10.090576171875, 10.82373046875, 11.556884765625, 12.2900390625, 13.023193359375, 13.75634765625, 14.489501953125, 15.22265625, 15.955810546875, 16.68896484375, 17.422119140625, 18.1552734375, 18.888427734375, 19.62158203125, 20.354736328125, 21.087890625, 21.821044921875, 22.55419921875, 23.287353515625, 24.0205078125, 24.753662109375, 25.48681640625, 26.219970703125, 26.953125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 3.0, 8.0, 2.0, 7.0, 11.0, 16.0, 22.0, 29.0, 40.0, 50.0, 72.0, 92.0, 105.0, 138.0, 187.0, 284.0, 331.0, 485.0, 487.0, 411.0, 338.0, 217.0, 193.0, 141.0, 109.0, 71.0, 65.0, 44.0, 28.0, 23.0, 16.0, 19.0, 7.0, 10.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-16.3125, -15.80908203125, -15.3056640625, -14.80224609375, -14.298828125, -13.79541015625, -13.2919921875, -12.78857421875, -12.28515625, -11.78173828125, -11.2783203125, -10.77490234375, -10.271484375, -9.76806640625, -9.2646484375, -8.76123046875, -8.2578125, -7.75439453125, -7.2509765625, -6.74755859375, -6.244140625, -5.74072265625, -5.2373046875, -4.73388671875, -4.23046875, -3.72705078125, -3.2236328125, -2.72021484375, -2.216796875, -1.71337890625, -1.2099609375, -0.70654296875, -0.203125, 0.30029296875, 0.8037109375, 1.30712890625, 1.810546875, 2.31396484375, 2.8173828125, 3.32080078125, 3.82421875, 4.32763671875, 4.8310546875, 5.33447265625, 5.837890625, 6.34130859375, 6.8447265625, 7.34814453125, 7.8515625, 8.35498046875, 8.8583984375, 9.36181640625, 9.865234375, 10.36865234375, 10.8720703125, 11.37548828125, 11.87890625, 12.38232421875, 12.8857421875, 13.38916015625, 13.892578125, 14.39599609375, 14.8994140625, 15.40283203125, 15.90625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 1.0, 4.0, 5.0, 7.0, 10.0, 20.0, 21.0, 29.0, 32.0, 60.0, 78.0, 131.0, 176.0, 257.0, 405.0, 653.0, 1194.0, 2283.0, 6404.0, 41260.0, 509163.0, 3123225.0, 459833.0, 37578.0, 6117.0, 2252.0, 1112.0, 729.0, 419.0, 276.0, 140.0, 134.0, 81.0, 54.0, 43.0, 23.0, 22.0, 12.0, 13.0, 8.0, 8.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-25.328125, -24.54833984375, -23.7685546875, -22.98876953125, -22.208984375, -21.42919921875, -20.6494140625, -19.86962890625, -19.08984375, -18.31005859375, -17.5302734375, -16.75048828125, -15.970703125, -15.19091796875, -14.4111328125, -13.63134765625, -12.8515625, -12.07177734375, -11.2919921875, -10.51220703125, -9.732421875, -8.95263671875, -8.1728515625, -7.39306640625, -6.61328125, -5.83349609375, -5.0537109375, -4.27392578125, -3.494140625, -2.71435546875, -1.9345703125, -1.15478515625, -0.375, 0.40478515625, 1.1845703125, 1.96435546875, 2.744140625, 3.52392578125, 4.3037109375, 5.08349609375, 5.86328125, 6.64306640625, 7.4228515625, 8.20263671875, 8.982421875, 9.76220703125, 10.5419921875, 11.32177734375, 12.1015625, 12.88134765625, 13.6611328125, 14.44091796875, 15.220703125, 16.00048828125, 16.7802734375, 17.56005859375, 18.33984375, 19.11962890625, 19.8994140625, 20.67919921875, 21.458984375, 22.23876953125, 23.0185546875, 23.79833984375, 24.578125]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 58.0, 195.0, 344.0, 268.0, 111.0, 25.0, 6.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.53214263916016, -83.48588562011719, -77.43962097167969, -71.39336395263672, -65.34710693359375, -59.300846099853516, -53.25458526611328, -47.20832824707031, -41.16206741333008, -35.115806579589844, -29.069549560546875, -23.02328872680664, -16.97702980041504, -10.930770874023438, -4.884510040283203, 1.1617469787597656, 7.2080078125, 13.254266738891602, 19.300525665283203, 25.346786499023438, 31.39304542541504, 37.43930435180664, 43.485565185546875, 49.531822204589844, 55.57808303833008, 61.62434387207031, 67.67060089111328, 73.71685791015625, 79.76312255859375, 85.80937957763672, 91.85563659667969, 97.90190124511719, 103.94816589355469, 109.99442291259766, 116.04068756103516, 122.08694458007812, 128.13320922851562, 134.17945861816406, 140.22572326660156, 146.27197265625, 152.3182373046875, 158.364501953125, 164.41075134277344, 170.45701599121094, 176.50328063964844, 182.54953002929688, 188.59579467773438, 194.64205932617188, 200.68832397460938, 206.73458862304688, 212.7808380126953, 218.8271026611328, 224.8733673095703, 230.91961669921875, 236.96588134765625, 243.01214599609375, 249.0583953857422, 255.1046600341797, 261.1509094238281, 267.1971740722656, 273.2434387207031, 279.2897033691406, 285.3359375, 291.3822021484375, 297.428466796875]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 7.0, 7.0, 3.0, 4.0, 3.0, 12.0, 7.0, 10.0, 11.0, 16.0, 12.0, 15.0, 25.0, 27.0, 20.0, 16.0, 28.0, 29.0, 39.0, 25.0, 40.0, 49.0, 49.0, 40.0, 38.0, 52.0, 38.0, 44.0, 30.0, 37.0, 35.0, 31.0, 30.0, 24.0, 25.0, 21.0, 17.0, 11.0, 17.0, 19.0, 14.0, 5.0, 5.0, 9.0, 6.0, 3.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.56891632080078, -59.69052505493164, -57.8121337890625, -55.93374252319336, -54.05535125732422, -52.17695999145508, -50.29856872558594, -48.42017364501953, -46.541786193847656, -44.663394927978516, -42.785003662109375, -40.906612396240234, -39.028221130371094, -37.14982986450195, -35.27143859863281, -33.393043518066406, -31.514652252197266, -29.636260986328125, -27.757869720458984, -25.879478454589844, -24.001087188720703, -22.122695922851562, -20.24430274963379, -18.36591148376465, -16.487520217895508, -14.609128952026367, -12.730737686157227, -10.85234546661377, -8.973954200744629, -7.095562934875488, -5.217170715332031, -3.3387794494628906, -1.46038818359375, 0.4180033206939697, 2.2963948249816895, 4.174786567687988, 6.053177833557129, 7.9315690994262695, 9.809961318969727, 11.688352584838867, 13.566743850708008, 15.445135116577148, 17.32352638244629, 19.201919555664062, 21.080310821533203, 22.958702087402344, 24.837093353271484, 26.715484619140625, 28.593875885009766, 30.472267150878906, 32.35065841674805, 34.22904968261719, 36.10744094848633, 37.98583221435547, 39.864227294921875, 41.74261474609375, 43.621009826660156, 45.4994010925293, 47.37779235839844, 49.25618362426758, 51.13457489013672, 53.01296615600586, 54.891357421875, 56.769752502441406, 58.64813995361328]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 11.0, 4.0, 8.0, 11.0, 13.0, 12.0, 19.0, 11.0, 14.0, 19.0, 21.0, 28.0, 27.0, 30.0, 30.0, 36.0, 42.0, 45.0, 34.0, 31.0, 31.0, 44.0, 37.0, 34.0, 36.0, 30.0, 41.0, 25.0, 34.0, 31.0, 27.0, 23.0, 25.0, 25.0, 25.0, 20.0, 10.0, 7.0, 7.0, 6.0, 8.0, 5.0, 6.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.18359375, -5.98553466796875, -5.7874755859375, -5.58941650390625, -5.391357421875, -5.19329833984375, -4.9952392578125, -4.79718017578125, -4.59912109375, -4.40106201171875, -4.2030029296875, -4.00494384765625, -3.806884765625, -3.60882568359375, -3.4107666015625, -3.21270751953125, -3.0146484375, -2.81658935546875, -2.6185302734375, -2.42047119140625, -2.222412109375, -2.02435302734375, -1.8262939453125, -1.62823486328125, -1.43017578125, -1.23211669921875, -1.0340576171875, -0.83599853515625, -0.637939453125, -0.43988037109375, -0.2418212890625, -0.04376220703125, 0.154296875, 0.35235595703125, 0.5504150390625, 0.74847412109375, 0.946533203125, 1.14459228515625, 1.3426513671875, 1.54071044921875, 1.73876953125, 1.93682861328125, 2.1348876953125, 2.33294677734375, 2.531005859375, 2.72906494140625, 2.9271240234375, 3.12518310546875, 3.3232421875, 3.52130126953125, 3.7193603515625, 3.91741943359375, 4.115478515625, 4.31353759765625, 4.5115966796875, 4.70965576171875, 4.90771484375, 5.10577392578125, 5.3038330078125, 5.50189208984375, 5.699951171875, 5.89801025390625, 6.0960693359375, 6.29412841796875, 6.4921875]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 3.0, 8.0, 10.0, 13.0, 29.0, 37.0, 55.0, 80.0, 109.0, 179.0, 260.0, 380.0, 560.0, 840.0, 1293.0, 1916.0, 2795.0, 4474.0, 6692.0, 10641.0, 16623.0, 26038.0, 40787.0, 64357.0, 100521.0, 156352.0, 196660.0, 149996.0, 96402.0, 61561.0, 39152.0, 24912.0, 15766.0, 9932.0, 6509.0, 4316.0, 2786.0, 1839.0, 1248.0, 771.0, 544.0, 356.0, 250.0, 171.0, 120.0, 75.0, 52.0, 31.0, 19.0, 18.0, 9.0, 7.0, 4.0, 2.0, 4.0, 3.0], "bins": [-0.56298828125, -0.5466461181640625, -0.530303955078125, -0.5139617919921875, -0.49761962890625, -0.4812774658203125, -0.464935302734375, -0.4485931396484375, -0.4322509765625, -0.4159088134765625, -0.399566650390625, -0.3832244873046875, -0.36688232421875, -0.3505401611328125, -0.334197998046875, -0.3178558349609375, -0.301513671875, -0.2851715087890625, -0.268829345703125, -0.2524871826171875, -0.23614501953125, -0.2198028564453125, -0.203460693359375, -0.1871185302734375, -0.1707763671875, -0.1544342041015625, -0.138092041015625, -0.1217498779296875, -0.10540771484375, -0.0890655517578125, -0.072723388671875, -0.0563812255859375, -0.0400390625, -0.0236968994140625, -0.007354736328125, 0.0089874267578125, 0.02532958984375, 0.0416717529296875, 0.058013916015625, 0.0743560791015625, 0.0906982421875, 0.1070404052734375, 0.123382568359375, 0.1397247314453125, 0.15606689453125, 0.1724090576171875, 0.188751220703125, 0.2050933837890625, 0.221435546875, 0.2377777099609375, 0.254119873046875, 0.2704620361328125, 0.28680419921875, 0.3031463623046875, 0.319488525390625, 0.3358306884765625, 0.3521728515625, 0.3685150146484375, 0.384857177734375, 0.4011993408203125, 0.41754150390625, 0.4338836669921875, 0.450225830078125, 0.4665679931640625, 0.48291015625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 6.0, 4.0, 11.0, 7.0, 5.0, 12.0, 24.0, 19.0, 24.0, 28.0, 26.0, 24.0, 32.0, 28.0, 35.0, 47.0, 35.0, 37.0, 61.0, 1074.0, 41.0, 45.0, 39.0, 41.0, 30.0, 37.0, 31.0, 37.0, 36.0, 26.0, 8.0, 13.0, 22.0, 11.0, 16.0, 5.0, 10.0, 14.0, 4.0, 6.0, 9.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.25, -4.11053466796875, -3.9710693359375, -3.83160400390625, -3.692138671875, -3.55267333984375, -3.4132080078125, -3.27374267578125, -3.13427734375, -2.99481201171875, -2.8553466796875, -2.71588134765625, -2.576416015625, -2.43695068359375, -2.2974853515625, -2.15802001953125, -2.0185546875, -1.87908935546875, -1.7396240234375, -1.60015869140625, -1.460693359375, -1.32122802734375, -1.1817626953125, -1.04229736328125, -0.90283203125, -0.76336669921875, -0.6239013671875, -0.48443603515625, -0.344970703125, -0.20550537109375, -0.0660400390625, 0.07342529296875, 0.212890625, 0.35235595703125, 0.4918212890625, 0.63128662109375, 0.770751953125, 0.91021728515625, 1.0496826171875, 1.18914794921875, 1.32861328125, 1.46807861328125, 1.6075439453125, 1.74700927734375, 1.886474609375, 2.02593994140625, 2.1654052734375, 2.30487060546875, 2.4443359375, 2.58380126953125, 2.7232666015625, 2.86273193359375, 3.002197265625, 3.14166259765625, 3.2811279296875, 3.42059326171875, 3.56005859375, 3.69952392578125, 3.8389892578125, 3.97845458984375, 4.117919921875, 4.25738525390625, 4.3968505859375, 4.53631591796875, 4.67578125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 7.0, 7.0, 14.0, 22.0, 26.0, 47.0, 67.0, 91.0, 151.0, 188.0, 279.0, 434.0, 617.0, 880.0, 1399.0, 1990.0, 2922.0, 4452.0, 6413.0, 9420.0, 14091.0, 21004.0, 31705.0, 48097.0, 72919.0, 110752.0, 157423.0, 1219555.0, 131620.0, 88109.0, 57745.0, 37732.0, 25343.0, 16898.0, 11207.0, 7770.0, 5167.0, 3405.0, 2399.0, 1575.0, 1019.0, 716.0, 463.0, 334.0, 235.0, 136.0, 101.0, 73.0, 42.0, 29.0, 17.0, 16.0, 7.0, 8.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.418701171875, -0.4052886962890625, -0.391876220703125, -0.3784637451171875, -0.36505126953125, -0.3516387939453125, -0.338226318359375, -0.3248138427734375, -0.3114013671875, -0.2979888916015625, -0.284576416015625, -0.2711639404296875, -0.25775146484375, -0.2443389892578125, -0.230926513671875, -0.2175140380859375, -0.2041015625, -0.1906890869140625, -0.177276611328125, -0.1638641357421875, -0.15045166015625, -0.1370391845703125, -0.123626708984375, -0.1102142333984375, -0.0968017578125, -0.0833892822265625, -0.069976806640625, -0.0565643310546875, -0.04315185546875, -0.0297393798828125, -0.016326904296875, -0.0029144287109375, 0.010498046875, 0.0239105224609375, 0.037322998046875, 0.0507354736328125, 0.06414794921875, 0.0775604248046875, 0.090972900390625, 0.1043853759765625, 0.1177978515625, 0.1312103271484375, 0.144622802734375, 0.1580352783203125, 0.17144775390625, 0.1848602294921875, 0.198272705078125, 0.2116851806640625, 0.22509765625, 0.2385101318359375, 0.251922607421875, 0.2653350830078125, 0.27874755859375, 0.2921600341796875, 0.305572509765625, 0.3189849853515625, 0.3323974609375, 0.3458099365234375, 0.359222412109375, 0.3726348876953125, 0.38604736328125, 0.3994598388671875, 0.412872314453125, 0.4262847900390625, 0.439697265625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 0.0, 6.0, 6.0, 3.0, 8.0, 2.0, 15.0, 14.0, 14.0, 11.0, 23.0, 18.0, 34.0, 29.0, 22.0, 34.0, 35.0, 35.0, 37.0, 39.0, 52.0, 39.0, 53.0, 36.0, 30.0, 43.0, 44.0, 30.0, 33.0, 34.0, 37.0, 25.0, 20.0, 17.0, 25.0, 23.0, 13.0, 9.0, 14.0, 13.0, 8.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0009455680847167969, -0.0009166374802589417, -0.0008877068758010864, -0.0008587762713432312, -0.000829845666885376, -0.0008009150624275208, -0.0007719844579696655, -0.0007430538535118103, -0.0007141232490539551, -0.0006851926445960999, -0.0006562620401382446, -0.0006273314356803894, -0.0005984008312225342, -0.000569470226764679, -0.0005405396223068237, -0.0005116090178489685, -0.0004826784133911133, -0.00045374780893325806, -0.00042481720447540283, -0.0003958866000175476, -0.0003669559955596924, -0.00033802539110183716, -0.00030909478664398193, -0.0002801641821861267, -0.0002512335777282715, -0.00022230297327041626, -0.00019337236881256104, -0.0001644417643547058, -0.00013551115989685059, -0.00010658055543899536, -7.764995098114014e-05, -4.871934652328491e-05, -1.9788742065429688e-05, 9.141862392425537e-06, 3.807246685028076e-05, 6.700307130813599e-05, 9.593367576599121e-05, 0.00012486428022384644, 0.00015379488468170166, 0.00018272548913955688, 0.0002116560935974121, 0.00024058669805526733, 0.00026951730251312256, 0.0002984479069709778, 0.000327378511428833, 0.00035630911588668823, 0.00038523972034454346, 0.0004141703248023987, 0.0004431009292602539, 0.00047203153371810913, 0.0005009621381759644, 0.0005298927426338196, 0.0005588233470916748, 0.00058775395154953, 0.0006166845560073853, 0.0006456151604652405, 0.0006745457649230957, 0.0007034763693809509, 0.0007324069738388062, 0.0007613375782966614, 0.0007902681827545166, 0.0008191987872123718, 0.000848129391670227, 0.0008770599961280823, 0.0009059906005859375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 7.0, 9.0, 10.0, 12.0, 20.0, 19.0, 21.0, 26.0, 43.0, 48.0, 40.0, 58.0, 73.0, 95.0, 150.0, 199.0, 339.0, 469.0, 809.0, 3201.0, 374313.0, 660854.0, 5094.0, 908.0, 503.0, 317.0, 228.0, 136.0, 119.0, 92.0, 74.0, 61.0, 37.0, 36.0, 28.0, 22.0, 19.0, 20.0, 8.0, 9.0, 8.0, 5.0, 7.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.019439697265625, -0.01885199546813965, -0.018264293670654297, -0.017676591873168945, -0.017088890075683594, -0.016501188278198242, -0.01591348648071289, -0.015325784683227539, -0.014738082885742188, -0.014150381088256836, -0.013562679290771484, -0.012974977493286133, -0.012387275695800781, -0.01179957389831543, -0.011211872100830078, -0.010624170303344727, -0.010036468505859375, -0.009448766708374023, -0.008861064910888672, -0.00827336311340332, -0.007685661315917969, -0.007097959518432617, -0.006510257720947266, -0.005922555923461914, -0.0053348541259765625, -0.004747152328491211, -0.004159450531005859, -0.003571748733520508, -0.0029840469360351562, -0.0023963451385498047, -0.0018086433410644531, -0.0012209415435791016, -0.00063323974609375, -4.553794860839844e-05, 0.0005421638488769531, 0.0011298656463623047, 0.0017175674438476562, 0.002305269241333008, 0.0028929710388183594, 0.003480672836303711, 0.0040683746337890625, 0.004656076431274414, 0.005243778228759766, 0.005831480026245117, 0.006419181823730469, 0.00700688362121582, 0.007594585418701172, 0.008182287216186523, 0.008769989013671875, 0.009357690811157227, 0.009945392608642578, 0.01053309440612793, 0.011120796203613281, 0.011708498001098633, 0.012296199798583984, 0.012883901596069336, 0.013471603393554688, 0.014059305191040039, 0.01464700698852539, 0.015234708786010742, 0.015822410583496094, 0.016410112380981445, 0.016997814178466797, 0.01758551597595215, 0.0181732177734375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 44.0, 737.0, 235.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006766138132661581, -0.006522041745483875, -0.006277945823967457, -0.006033849436789751, -0.005789753515273333, -0.005545657128095627, -0.005301561206579208, -0.005057464819401503, -0.004813368432223797, -0.004569272045046091, -0.004325176123529673, -0.004081079736351967, -0.0038369838148355484, -0.0035928874276578426, -0.0033487912733107805, -0.0031046951189637184, -0.0028605991974473, -0.002616503043100238, -0.0023724068887531757, -0.00212831050157547, -0.0018842144636437297, -0.0016401183092966676, -0.0013960220385342836, -0.0011519258841872215, -0.0009078297298401594, -0.0006637335754930973, -0.0004196373629383743, -0.00017554115038365126, 6.855500396341085e-05, 0.00031265115831047297, 0.0005567474290728569, 0.000800843583419919, 0.0010449392721056938, 0.001289035426452756, 0.001533131580799818, 0.001777227851562202, 0.0020213238894939423, 0.002265420276671648, 0.00250951643101871, 0.0027536125853657722, 0.0029977087397128344, 0.0032418048940598965, 0.0034859010484069586, 0.0037299972027540207, 0.0039740935899317265, 0.004218189511448145, 0.004462285898625851, 0.004706381820142269, 0.004950478207319975, 0.005194574594497681, 0.005438670516014099, 0.005682766903191805, 0.005926862824708223, 0.006170959211885929, 0.0064150551334023476, 0.006659151520580053, 0.006903247907757759, 0.007147344294935465, 0.007391440216451883, 0.007635536603629589, 0.007879632525146008, 0.008123728446662426, 0.008367825299501419, 0.008611921221017838, 0.008856017142534256]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 4.0, 6.0, 8.0, 9.0, 4.0, 6.0, 6.0, 14.0, 10.0, 9.0, 16.0, 12.0, 21.0, 16.0, 26.0, 24.0, 34.0, 32.0, 29.0, 26.0, 30.0, 35.0, 36.0, 39.0, 32.0, 29.0, 40.0, 33.0, 35.0, 33.0, 37.0, 26.0, 30.0, 18.0, 28.0, 31.0, 24.0, 19.0, 17.0, 21.0, 12.0, 12.0, 14.0, 10.0, 13.0, 9.0, 4.0, 6.0, 7.0, 6.0, 7.0, 1.0, 5.0, 1.0, 0.0, 3.0], "bins": [-0.0005356669425964355, -0.0005198512226343155, -0.0005040355026721954, -0.0004882197827100754, -0.0004724040627479553, -0.00045658834278583527, -0.0004407726228237152, -0.00042495690286159515, -0.0004091411828994751, -0.00039332546293735504, -0.000377509742975235, -0.00036169402301311493, -0.0003458783030509949, -0.0003300625830888748, -0.00031424686312675476, -0.0002984311431646347, -0.00028261542320251465, -0.0002667997032403946, -0.00025098398327827454, -0.00023516826331615448, -0.00021935254335403442, -0.00020353682339191437, -0.0001877211034297943, -0.00017190538346767426, -0.0001560896635055542, -0.00014027394354343414, -0.0001244582235813141, -0.00010864250361919403, -9.282678365707397e-05, -7.701106369495392e-05, -6.119534373283386e-05, -4.5379623770713806e-05, -2.956390380859375e-05, -1.3748183846473694e-05, 2.0675361156463623e-06, 1.788325607776642e-05, 3.3698976039886475e-05, 4.951469600200653e-05, 6.533041596412659e-05, 8.114613592624664e-05, 9.69618558883667e-05, 0.00011277757585048676, 0.0001285932958126068, 0.00014440901577472687, 0.00016022473573684692, 0.00017604045569896698, 0.00019185617566108704, 0.0002076718956232071, 0.00022348761558532715, 0.0002393033355474472, 0.00025511905550956726, 0.0002709347754716873, 0.0002867504954338074, 0.00030256621539592743, 0.0003183819353580475, 0.00033419765532016754, 0.0003500133752822876, 0.00036582909524440765, 0.0003816448152065277, 0.00039746053516864777, 0.0004132762551307678, 0.0004290919750928879, 0.00044490769505500793, 0.000460723415017128, 0.00047653913497924805]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 11.0, 4.0, 8.0, 11.0, 13.0, 12.0, 19.0, 11.0, 14.0, 19.0, 21.0, 28.0, 27.0, 30.0, 30.0, 36.0, 42.0, 45.0, 34.0, 31.0, 31.0, 44.0, 37.0, 34.0, 36.0, 30.0, 41.0, 25.0, 33.0, 32.0, 27.0, 23.0, 25.0, 25.0, 25.0, 20.0, 10.0, 7.0, 7.0, 6.0, 8.0, 5.0, 6.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.18359375, -5.98553466796875, -5.7874755859375, -5.58941650390625, -5.391357421875, -5.19329833984375, -4.9952392578125, -4.79718017578125, -4.59912109375, -4.40106201171875, -4.2030029296875, -4.00494384765625, -3.806884765625, -3.60882568359375, -3.4107666015625, -3.21270751953125, -3.0146484375, -2.81658935546875, -2.6185302734375, -2.42047119140625, -2.222412109375, -2.02435302734375, -1.8262939453125, -1.62823486328125, -1.43017578125, -1.23211669921875, -1.0340576171875, -0.83599853515625, -0.637939453125, -0.43988037109375, -0.2418212890625, -0.04376220703125, 0.154296875, 0.35235595703125, 0.5504150390625, 0.74847412109375, 0.946533203125, 1.14459228515625, 1.3426513671875, 1.54071044921875, 1.73876953125, 1.93682861328125, 2.1348876953125, 2.33294677734375, 2.531005859375, 2.72906494140625, 2.9271240234375, 3.12518310546875, 3.3232421875, 3.52130126953125, 3.7193603515625, 3.91741943359375, 4.115478515625, 4.31353759765625, 4.5115966796875, 4.70965576171875, 4.90771484375, 5.10577392578125, 5.3038330078125, 5.50189208984375, 5.699951171875, 5.89801025390625, 6.0960693359375, 6.29412841796875, 6.4921875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 7.0, 6.0, 6.0, 13.0, 9.0, 19.0, 24.0, 36.0, 47.0, 50.0, 68.0, 78.0, 120.0, 141.0, 185.0, 236.0, 307.0, 377.0, 522.0, 692.0, 883.0, 1114.0, 1417.0, 2095.0, 3114.0, 5982.0, 20127.0, 109253.0, 579720.0, 255513.0, 42727.0, 9911.0, 3974.0, 2446.0, 1749.0, 1313.0, 999.0, 692.0, 588.0, 467.0, 346.0, 279.0, 219.0, 152.0, 123.0, 99.0, 79.0, 56.0, 50.0, 38.0, 28.0, 19.0, 17.0, 9.0, 11.0, 6.0, 5.0, 2.0, 1.0, 2.0, 4.0], "bins": [-15.3984375, -14.9132080078125, -14.427978515625, -13.9427490234375, -13.45751953125, -12.9722900390625, -12.487060546875, -12.0018310546875, -11.5166015625, -11.0313720703125, -10.546142578125, -10.0609130859375, -9.57568359375, -9.0904541015625, -8.605224609375, -8.1199951171875, -7.634765625, -7.1495361328125, -6.664306640625, -6.1790771484375, -5.69384765625, -5.2086181640625, -4.723388671875, -4.2381591796875, -3.7529296875, -3.2677001953125, -2.782470703125, -2.2972412109375, -1.81201171875, -1.3267822265625, -0.841552734375, -0.3563232421875, 0.12890625, 0.6141357421875, 1.099365234375, 1.5845947265625, 2.06982421875, 2.5550537109375, 3.040283203125, 3.5255126953125, 4.0107421875, 4.4959716796875, 4.981201171875, 5.4664306640625, 5.95166015625, 6.4368896484375, 6.922119140625, 7.4073486328125, 7.892578125, 8.3778076171875, 8.863037109375, 9.3482666015625, 9.83349609375, 10.3187255859375, 10.803955078125, 11.2891845703125, 11.7744140625, 12.2596435546875, 12.744873046875, 13.2301025390625, 13.71533203125, 14.2005615234375, 14.685791015625, 15.1710205078125, 15.65625]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 5.0, 5.0, 7.0, 4.0, 9.0, 14.0, 9.0, 8.0, 17.0, 15.0, 16.0, 21.0, 22.0, 32.0, 23.0, 43.0, 44.0, 55.0, 81.0, 138.0, 217.0, 1417.0, 220.0, 164.0, 85.0, 63.0, 46.0, 33.0, 30.0, 26.0, 21.0, 27.0, 14.0, 20.0, 15.0, 16.0, 11.0, 7.0, 9.0, 10.0, 6.0, 2.0, 6.0, 7.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0], "bins": [-19.46875, -18.905029296875, -18.34130859375, -17.777587890625, -17.2138671875, -16.650146484375, -16.08642578125, -15.522705078125, -14.958984375, -14.395263671875, -13.83154296875, -13.267822265625, -12.7041015625, -12.140380859375, -11.57666015625, -11.012939453125, -10.44921875, -9.885498046875, -9.32177734375, -8.758056640625, -8.1943359375, -7.630615234375, -7.06689453125, -6.503173828125, -5.939453125, -5.375732421875, -4.81201171875, -4.248291015625, -3.6845703125, -3.120849609375, -2.55712890625, -1.993408203125, -1.4296875, -0.865966796875, -0.30224609375, 0.261474609375, 0.8251953125, 1.388916015625, 1.95263671875, 2.516357421875, 3.080078125, 3.643798828125, 4.20751953125, 4.771240234375, 5.3349609375, 5.898681640625, 6.46240234375, 7.026123046875, 7.58984375, 8.153564453125, 8.71728515625, 9.281005859375, 9.8447265625, 10.408447265625, 10.97216796875, 11.535888671875, 12.099609375, 12.663330078125, 13.22705078125, 13.790771484375, 14.3544921875, 14.918212890625, 15.48193359375, 16.045654296875, 16.609375]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 5.0, 1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 7.0, 8.0, 12.0, 9.0, 16.0, 16.0, 21.0, 28.0, 30.0, 49.0, 51.0, 68.0, 95.0, 154.0, 210.0, 370.0, 708.0, 1535.0, 5277.0, 3071567.0, 60447.0, 2543.0, 1038.0, 494.0, 282.0, 177.0, 112.0, 82.0, 71.0, 48.0, 30.0, 29.0, 18.0, 19.0, 17.0, 18.0, 13.0, 9.0, 6.0, 7.0, 2.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.875, -65.4306640625, -62.986328125, -60.5419921875, -58.09765625, -55.6533203125, -53.208984375, -50.7646484375, -48.3203125, -45.8759765625, -43.431640625, -40.9873046875, -38.54296875, -36.0986328125, -33.654296875, -31.2099609375, -28.765625, -26.3212890625, -23.876953125, -21.4326171875, -18.98828125, -16.5439453125, -14.099609375, -11.6552734375, -9.2109375, -6.7666015625, -4.322265625, -1.8779296875, 0.56640625, 3.0107421875, 5.455078125, 7.8994140625, 10.34375, 12.7880859375, 15.232421875, 17.6767578125, 20.12109375, 22.5654296875, 25.009765625, 27.4541015625, 29.8984375, 32.3427734375, 34.787109375, 37.2314453125, 39.67578125, 42.1201171875, 44.564453125, 47.0087890625, 49.453125, 51.8974609375, 54.341796875, 56.7861328125, 59.23046875, 61.6748046875, 64.119140625, 66.5634765625, 69.0078125, 71.4521484375, 73.896484375, 76.3408203125, 78.78515625, 81.2294921875, 83.673828125, 86.1181640625, 88.5625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 16.0, 33.0, 77.0, 178.0, 218.0, 232.0, 150.0, 71.0, 23.0, 10.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.3274383544922, -155.07431030273438, -151.82119750976562, -148.5680694580078, -145.31495666503906, -142.06182861328125, -138.8087158203125, -135.5555877685547, -132.30247497558594, -129.04934692382812, -125.79623413085938, -122.5431137084961, -119.28999328613281, -116.03687286376953, -112.78375244140625, -109.53062438964844, -106.27750396728516, -103.02438354492188, -99.7712631225586, -96.51814270019531, -93.26502227783203, -90.01190185546875, -86.75877380371094, -83.50566101074219, -80.25253295898438, -76.9994125366211, -73.74629211425781, -70.49317169189453, -67.24005126953125, -63.98693084716797, -60.73380661010742, -57.48068618774414, -54.227569580078125, -50.974449157714844, -47.72132873535156, -44.46820831298828, -41.215087890625, -37.96196746826172, -34.70884323120117, -31.45572280883789, -28.20260238647461, -24.949481964111328, -21.696361541748047, -18.443239212036133, -15.190118789672852, -11.93699836730957, -8.683876037597656, -5.430755615234375, -2.1776351928710938, 1.0754857063293457, 4.328606605529785, 7.581727981567383, 10.834848403930664, 14.087968826293945, 17.34109115600586, 20.59421157836914, 23.847332000732422, 27.100452423095703, 30.353572845458984, 33.60669708251953, 36.85981750488281, 40.112937927246094, 43.366058349609375, 46.619178771972656, 49.87229919433594]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 6.0, 4.0, 0.0, 3.0, 8.0, 8.0, 9.0, 12.0, 5.0, 13.0, 17.0, 22.0, 24.0, 31.0, 27.0, 35.0, 34.0, 35.0, 31.0, 29.0, 52.0, 41.0, 59.0, 47.0, 40.0, 36.0, 33.0, 45.0, 27.0, 37.0, 25.0, 39.0, 22.0, 16.0, 24.0, 14.0, 24.0, 15.0, 12.0, 12.0, 13.0, 7.0, 4.0, 2.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.20210266113281, -52.51921081542969, -50.83631896972656, -49.15342712402344, -47.47053909301758, -45.78764724731445, -44.10475540161133, -42.4218635559082, -40.73897171020508, -39.05607986450195, -37.37318801879883, -35.69029998779297, -34.007408142089844, -32.32451629638672, -30.641624450683594, -28.95873260498047, -27.275842666625977, -25.59295082092285, -23.91006088256836, -22.227169036865234, -20.54427719116211, -18.861385345458984, -17.178495407104492, -15.495603561401367, -13.812712669372559, -12.12982177734375, -10.446929931640625, -8.764039039611816, -7.08114767074585, -5.398256301879883, -3.715365409851074, -2.032473564147949, -0.3495826721191406, 1.3333085775375366, 3.016199827194214, 4.699090957641602, 6.381982326507568, 8.064873695373535, 9.747764587402344, 11.430656433105469, 13.113547325134277, 14.796438217163086, 16.47933006286621, 18.162220001220703, 19.845111846923828, 21.528003692626953, 23.210895538330078, 24.893787384033203, 26.576677322387695, 28.25956916809082, 29.942459106445312, 31.625350952148438, 33.30824279785156, 34.99113464355469, 36.67402648925781, 38.35691833496094, 40.0398063659668, 41.72269821166992, 43.40559005737305, 45.088478088378906, 46.77136993408203, 48.454261779785156, 50.13715362548828, 51.820045471191406, 53.50293731689453]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 1.0, 4.0, 4.0, 8.0, 7.0, 7.0, 11.0, 17.0, 13.0, 12.0, 23.0, 15.0, 20.0, 24.0, 31.0, 24.0, 45.0, 34.0, 48.0, 43.0, 48.0, 44.0, 40.0, 51.0, 34.0, 43.0, 34.0, 31.0, 44.0, 27.0, 26.0, 30.0, 30.0, 19.0, 16.0, 9.0, 10.0, 14.0, 16.0, 10.0, 9.0, 6.0, 4.0, 5.0, 3.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-7.59375, -7.3668212890625, -7.139892578125, -6.9129638671875, -6.68603515625, -6.4591064453125, -6.232177734375, -6.0052490234375, -5.7783203125, -5.5513916015625, -5.324462890625, -5.0975341796875, -4.87060546875, -4.6436767578125, -4.416748046875, -4.1898193359375, -3.962890625, -3.7359619140625, -3.509033203125, -3.2821044921875, -3.05517578125, -2.8282470703125, -2.601318359375, -2.3743896484375, -2.1474609375, -1.9205322265625, -1.693603515625, -1.4666748046875, -1.23974609375, -1.0128173828125, -0.785888671875, -0.5589599609375, -0.33203125, -0.1051025390625, 0.121826171875, 0.3487548828125, 0.57568359375, 0.8026123046875, 1.029541015625, 1.2564697265625, 1.4833984375, 1.7103271484375, 1.937255859375, 2.1641845703125, 2.39111328125, 2.6180419921875, 2.844970703125, 3.0718994140625, 3.298828125, 3.5257568359375, 3.752685546875, 3.9796142578125, 4.20654296875, 4.4334716796875, 4.660400390625, 4.8873291015625, 5.1142578125, 5.3411865234375, 5.568115234375, 5.7950439453125, 6.02197265625, 6.2489013671875, 6.475830078125, 6.7027587890625, 6.9296875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 15.0, 21.0, 32.0, 41.0, 73.0, 141.0, 233.0, 500.0, 1093.0, 2816.0, 9157.0, 51339.0, 700770.0, 2783335.0, 587718.0, 44147.0, 8206.0, 2580.0, 1023.0, 453.0, 249.0, 119.0, 78.0, 41.0, 31.0, 31.0, 8.0, 6.0, 5.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.4375, -15.879638671875, -15.32177734375, -14.763916015625, -14.2060546875, -13.648193359375, -13.09033203125, -12.532470703125, -11.974609375, -11.416748046875, -10.85888671875, -10.301025390625, -9.7431640625, -9.185302734375, -8.62744140625, -8.069580078125, -7.51171875, -6.953857421875, -6.39599609375, -5.838134765625, -5.2802734375, -4.722412109375, -4.16455078125, -3.606689453125, -3.048828125, -2.490966796875, -1.93310546875, -1.375244140625, -0.8173828125, -0.259521484375, 0.29833984375, 0.856201171875, 1.4140625, 1.971923828125, 2.52978515625, 3.087646484375, 3.6455078125, 4.203369140625, 4.76123046875, 5.319091796875, 5.876953125, 6.434814453125, 6.99267578125, 7.550537109375, 8.1083984375, 8.666259765625, 9.22412109375, 9.781982421875, 10.33984375, 10.897705078125, 11.45556640625, 12.013427734375, 12.5712890625, 13.129150390625, 13.68701171875, 14.244873046875, 14.802734375, 15.360595703125, 15.91845703125, 16.476318359375, 17.0341796875, 17.592041015625, 18.14990234375, 18.707763671875, 19.265625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 6.0, 26.0, 95.0, 340.0, 948.0, 1662.0, 713.0, 216.0, 60.0, 13.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.0, -59.41064453125, -57.8212890625, -56.23193359375, -54.642578125, -53.05322265625, -51.4638671875, -49.87451171875, -48.28515625, -46.69580078125, -45.1064453125, -43.51708984375, -41.927734375, -40.33837890625, -38.7490234375, -37.15966796875, -35.5703125, -33.98095703125, -32.3916015625, -30.80224609375, -29.212890625, -27.62353515625, -26.0341796875, -24.44482421875, -22.85546875, -21.26611328125, -19.6767578125, -18.08740234375, -16.498046875, -14.90869140625, -13.3193359375, -11.72998046875, -10.140625, -8.55126953125, -6.9619140625, -5.37255859375, -3.783203125, -2.19384765625, -0.6044921875, 0.98486328125, 2.57421875, 4.16357421875, 5.7529296875, 7.34228515625, 8.931640625, 10.52099609375, 12.1103515625, 13.69970703125, 15.2890625, 16.87841796875, 18.4677734375, 20.05712890625, 21.646484375, 23.23583984375, 24.8251953125, 26.41455078125, 28.00390625, 29.59326171875, 31.1826171875, 32.77197265625, 34.361328125, 35.95068359375, 37.5400390625, 39.12939453125, 40.71875]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 13.0, 18.0, 78.0, 350.0, 2839.0, 4106209.0, 83254.0, 1249.0, 192.0, 58.0, 11.0, 6.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.625, -87.11328125, -83.6015625, -80.08984375, -76.578125, -73.06640625, -69.5546875, -66.04296875, -62.53125, -59.01953125, -55.5078125, -51.99609375, -48.484375, -44.97265625, -41.4609375, -37.94921875, -34.4375, -30.92578125, -27.4140625, -23.90234375, -20.390625, -16.87890625, -13.3671875, -9.85546875, -6.34375, -2.83203125, 0.6796875, 4.19140625, 7.703125, 11.21484375, 14.7265625, 18.23828125, 21.75, 25.26171875, 28.7734375, 32.28515625, 35.796875, 39.30859375, 42.8203125, 46.33203125, 49.84375, 53.35546875, 56.8671875, 60.37890625, 63.890625, 67.40234375, 70.9140625, 74.42578125, 77.9375, 81.44921875, 84.9609375, 88.47265625, 91.984375, 95.49609375, 99.0078125, 102.51953125, 106.03125, 109.54296875, 113.0546875, 116.56640625, 120.078125, 123.58984375, 127.1015625, 130.61328125, 134.125]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 46.0, 461.0, 457.0, 45.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-289.13006591796875, -279.4825134277344, -269.8349609375, -260.1874084472656, -250.53985595703125, -240.89230346679688, -231.2447509765625, -221.59719848632812, -211.94964599609375, -202.30209350585938, -192.654541015625, -183.00698852539062, -173.35943603515625, -163.71188354492188, -154.0643310546875, -144.41677856445312, -134.7692413330078, -125.12168884277344, -115.47413635253906, -105.82658386230469, -96.17903137207031, -86.53147888183594, -76.8839340209961, -67.23638153076172, -57.588829040527344, -47.94127655029297, -38.293724060058594, -28.646175384521484, -18.99862289428711, -9.351070404052734, 0.296478271484375, 9.94403076171875, 19.591583251953125, 29.2391357421875, 38.886688232421875, 48.534236907958984, 58.18178939819336, 67.829345703125, 77.47689056396484, 87.12444305419922, 96.7719955444336, 106.41954803466797, 116.06710052490234, 125.71464538574219, 135.36219787597656, 145.00975036621094, 154.6573028564453, 164.3048553466797, 173.95240783691406, 183.59996032714844, 193.2475128173828, 202.8950653076172, 212.54261779785156, 222.19017028808594, 231.83770751953125, 241.48526000976562, 251.1328125, 260.7803649902344, 270.42791748046875, 280.0754699707031, 289.7230224609375, 299.3705749511719, 309.01812744140625, 318.6656799316406, 328.313232421875]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 3.0, 8.0, 10.0, 10.0, 18.0, 16.0, 11.0, 26.0, 19.0, 32.0, 31.0, 39.0, 22.0, 45.0, 47.0, 35.0, 45.0, 51.0, 45.0, 37.0, 39.0, 41.0, 40.0, 50.0, 35.0, 27.0, 41.0, 30.0, 26.0, 24.0, 25.0, 12.0, 19.0, 9.0, 11.0, 9.0, 2.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.79084396362305, -45.97328567504883, -44.15572738647461, -42.33816909790039, -40.52061080932617, -38.70305252075195, -36.885494232177734, -35.067935943603516, -33.2503776550293, -31.432819366455078, -29.61526107788086, -27.79770278930664, -25.980144500732422, -24.162586212158203, -22.345027923583984, -20.527469635009766, -18.709911346435547, -16.892353057861328, -15.07479476928711, -13.25723648071289, -11.439678192138672, -9.622119903564453, -7.804561614990234, -5.987003326416016, -4.169445037841797, -2.351886749267578, -0.5343284606933594, 1.2832298278808594, 3.100788116455078, 4.918346405029297, 6.735904693603516, 8.553462982177734, 10.371017456054688, 12.188575744628906, 14.006134033203125, 15.823692321777344, 17.641250610351562, 19.45880889892578, 21.2763671875, 23.09392547607422, 24.911483764648438, 26.729042053222656, 28.546600341796875, 30.364158630371094, 32.18171691894531, 33.99927520751953, 35.81683349609375, 37.63439178466797, 39.45195007324219, 41.269508361816406, 43.087066650390625, 44.904624938964844, 46.72218322753906, 48.53974151611328, 50.3572998046875, 52.17485809326172, 53.99241638183594, 55.809974670410156, 57.627532958984375, 59.445091247558594, 61.26264953613281, 63.08020782470703, 64.89776611328125, 66.71532440185547, 68.53288269042969]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 7.0, 2.0, 2.0, 7.0, 5.0, 2.0, 10.0, 11.0, 16.0, 11.0, 21.0, 20.0, 33.0, 25.0, 31.0, 31.0, 29.0, 49.0, 54.0, 50.0, 43.0, 56.0, 44.0, 39.0, 47.0, 51.0, 48.0, 33.0, 37.0, 30.0, 25.0, 27.0, 14.0, 16.0, 21.0, 6.0, 15.0, 10.0, 10.0, 6.0, 3.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5234375, -5.325439453125, -5.12744140625, -4.929443359375, -4.7314453125, -4.533447265625, -4.33544921875, -4.137451171875, -3.939453125, -3.741455078125, -3.54345703125, -3.345458984375, -3.1474609375, -2.949462890625, -2.75146484375, -2.553466796875, -2.35546875, -2.157470703125, -1.95947265625, -1.761474609375, -1.5634765625, -1.365478515625, -1.16748046875, -0.969482421875, -0.771484375, -0.573486328125, -0.37548828125, -0.177490234375, 0.0205078125, 0.218505859375, 0.41650390625, 0.614501953125, 0.8125, 1.010498046875, 1.20849609375, 1.406494140625, 1.6044921875, 1.802490234375, 2.00048828125, 2.198486328125, 2.396484375, 2.594482421875, 2.79248046875, 2.990478515625, 3.1884765625, 3.386474609375, 3.58447265625, 3.782470703125, 3.98046875, 4.178466796875, 4.37646484375, 4.574462890625, 4.7724609375, 4.970458984375, 5.16845703125, 5.366455078125, 5.564453125, 5.762451171875, 5.96044921875, 6.158447265625, 6.3564453125, 6.554443359375, 6.75244140625, 6.950439453125, 7.1484375]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 9.0, 13.0, 16.0, 23.0, 38.0, 37.0, 54.0, 93.0, 154.0, 239.0, 352.0, 498.0, 744.0, 1133.0, 1758.0, 2609.0, 3940.0, 5992.0, 8986.0, 13844.0, 21539.0, 33967.0, 54452.0, 87990.0, 142932.0, 204429.0, 172586.0, 108240.0, 65649.0, 41390.0, 26180.0, 17120.0, 10896.0, 6850.0, 4706.0, 3110.0, 1970.0, 1401.0, 865.0, 552.0, 398.0, 251.0, 214.0, 103.0, 67.0, 53.0, 34.0, 24.0, 30.0, 10.0, 4.0, 5.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.4384765625, -0.4246482849121094, -0.41082000732421875, -0.3969917297363281, -0.3831634521484375, -0.3693351745605469, -0.35550689697265625, -0.3416786193847656, -0.327850341796875, -0.3140220642089844, -0.30019378662109375, -0.2863655090332031, -0.2725372314453125, -0.2587089538574219, -0.24488067626953125, -0.23105239868164062, -0.21722412109375, -0.20339584350585938, -0.18956756591796875, -0.17573928833007812, -0.1619110107421875, -0.14808273315429688, -0.13425445556640625, -0.12042617797851562, -0.106597900390625, -0.09276962280273438, -0.07894134521484375, -0.06511306762695312, -0.0512847900390625, -0.037456512451171875, -0.02362823486328125, -0.009799957275390625, 0.0040283203125, 0.017856597900390625, 0.03168487548828125, 0.045513153076171875, 0.0593414306640625, 0.07316970825195312, 0.08699798583984375, 0.10082626342773438, 0.114654541015625, 0.12848281860351562, 0.14231109619140625, 0.15613937377929688, 0.1699676513671875, 0.18379592895507812, 0.19762420654296875, 0.21145248413085938, 0.22528076171875, 0.23910903930664062, 0.25293731689453125, 0.2667655944824219, 0.2805938720703125, 0.2944221496582031, 0.30825042724609375, 0.3220787048339844, 0.335906982421875, 0.3497352600097656, 0.36356353759765625, 0.3773918151855469, 0.3912200927734375, 0.4050483703613281, 0.41887664794921875, 0.4327049255371094, 0.446533203125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 7.0, 7.0, 5.0, 6.0, 9.0, 7.0, 18.0, 14.0, 13.0, 26.0, 21.0, 28.0, 25.0, 19.0, 13.0, 31.0, 26.0, 32.0, 27.0, 38.0, 36.0, 36.0, 1077.0, 34.0, 33.0, 54.0, 38.0, 37.0, 44.0, 32.0, 31.0, 23.0, 19.0, 20.0, 18.0, 16.0, 25.0, 7.0, 14.0, 12.0, 15.0, 9.0, 7.0, 1.0, 7.0, 3.0, 5.0, 3.0, 2.0, 0.0, 2.0, 3.0], "bins": [-3.6015625, -3.49749755859375, -3.3934326171875, -3.28936767578125, -3.185302734375, -3.08123779296875, -2.9771728515625, -2.87310791015625, -2.76904296875, -2.66497802734375, -2.5609130859375, -2.45684814453125, -2.352783203125, -2.24871826171875, -2.1446533203125, -2.04058837890625, -1.9365234375, -1.83245849609375, -1.7283935546875, -1.62432861328125, -1.520263671875, -1.41619873046875, -1.3121337890625, -1.20806884765625, -1.10400390625, -0.99993896484375, -0.8958740234375, -0.79180908203125, -0.687744140625, -0.58367919921875, -0.4796142578125, -0.37554931640625, -0.271484375, -0.16741943359375, -0.0633544921875, 0.04071044921875, 0.144775390625, 0.24884033203125, 0.3529052734375, 0.45697021484375, 0.56103515625, 0.66510009765625, 0.7691650390625, 0.87322998046875, 0.977294921875, 1.08135986328125, 1.1854248046875, 1.28948974609375, 1.3935546875, 1.49761962890625, 1.6016845703125, 1.70574951171875, 1.809814453125, 1.91387939453125, 2.0179443359375, 2.12200927734375, 2.22607421875, 2.33013916015625, 2.4342041015625, 2.53826904296875, 2.642333984375, 2.74639892578125, 2.8504638671875, 2.95452880859375, 3.05859375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 7.0, 13.0, 7.0, 25.0, 24.0, 40.0, 75.0, 83.0, 128.0, 184.0, 307.0, 514.0, 726.0, 1156.0, 1791.0, 2699.0, 4138.0, 6513.0, 10119.0, 15890.0, 24559.0, 37890.0, 58809.0, 90498.0, 137906.0, 1227445.0, 160922.0, 111102.0, 72130.0, 47028.0, 30124.0, 19176.0, 12516.0, 8033.0, 5080.0, 3383.0, 2133.0, 1396.0, 843.0, 609.0, 414.0, 249.0, 153.0, 103.0, 74.0, 38.0, 33.0, 21.0, 9.0, 8.0, 5.0, 8.0, 4.0, 2.0, 0.0, 1.0], "bins": [-0.38671875, -0.3752555847167969, -0.36379241943359375, -0.3523292541503906, -0.3408660888671875, -0.3294029235839844, -0.31793975830078125, -0.3064765930175781, -0.295013427734375, -0.2835502624511719, -0.27208709716796875, -0.2606239318847656, -0.2491607666015625, -0.23769760131835938, -0.22623443603515625, -0.21477127075195312, -0.20330810546875, -0.19184494018554688, -0.18038177490234375, -0.16891860961914062, -0.1574554443359375, -0.14599227905273438, -0.13452911376953125, -0.12306594848632812, -0.111602783203125, -0.10013961791992188, -0.08867645263671875, -0.07721328735351562, -0.0657501220703125, -0.054286956787109375, -0.04282379150390625, -0.031360626220703125, -0.0198974609375, -0.008434295654296875, 0.00302886962890625, 0.014492034912109375, 0.0259552001953125, 0.037418365478515625, 0.04888153076171875, 0.060344696044921875, 0.071807861328125, 0.08327102661132812, 0.09473419189453125, 0.10619735717773438, 0.1176605224609375, 0.12912368774414062, 0.14058685302734375, 0.15205001831054688, 0.16351318359375, 0.17497634887695312, 0.18643951416015625, 0.19790267944335938, 0.2093658447265625, 0.22082901000976562, 0.23229217529296875, 0.24375534057617188, 0.255218505859375, 0.2666816711425781, 0.27814483642578125, 0.2896080017089844, 0.3010711669921875, 0.3125343322753906, 0.32399749755859375, 0.3354606628417969, 0.346923828125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 1.0, 6.0, 6.0, 5.0, 4.0, 8.0, 8.0, 10.0, 24.0, 27.0, 25.0, 35.0, 32.0, 33.0, 45.0, 37.0, 50.0, 53.0, 76.0, 52.0, 55.0, 43.0, 52.0, 40.0, 44.0, 39.0, 32.0, 24.0, 27.0, 27.0, 26.0, 8.0, 12.0, 10.0, 9.0, 6.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00109100341796875, -0.0010564029216766357, -0.0010218024253845215, -0.0009872019290924072, -0.000952601432800293, -0.0009180009365081787, -0.0008834004402160645, -0.0008487999439239502, -0.0008141994476318359, -0.0007795989513397217, -0.0007449984550476074, -0.0007103979587554932, -0.0006757974624633789, -0.0006411969661712646, -0.0006065964698791504, -0.0005719959735870361, -0.0005373954772949219, -0.0005027949810028076, -0.00046819448471069336, -0.0004335939884185791, -0.00039899349212646484, -0.0003643929958343506, -0.00032979249954223633, -0.00029519200325012207, -0.0002605915069580078, -0.00022599101066589355, -0.0001913905143737793, -0.00015679001808166504, -0.00012218952178955078, -8.758902549743652e-05, -5.2988529205322266e-05, -1.8388032913208008e-05, 1.621246337890625e-05, 5.081295967102051e-05, 8.541345596313477e-05, 0.00012001395225524902, 0.00015461444854736328, 0.00018921494483947754, 0.0002238154411315918, 0.00025841593742370605, 0.0002930164337158203, 0.00032761693000793457, 0.00036221742630004883, 0.0003968179225921631, 0.00043141841888427734, 0.0004660189151763916, 0.0005006194114685059, 0.0005352199077606201, 0.0005698204040527344, 0.0006044209003448486, 0.0006390213966369629, 0.0006736218929290771, 0.0007082223892211914, 0.0007428228855133057, 0.0007774233818054199, 0.0008120238780975342, 0.0008466243743896484, 0.0008812248706817627, 0.000915825366973877, 0.0009504258632659912, 0.0009850263595581055, 0.0010196268558502197, 0.001054227352142334, 0.0010888278484344482, 0.0011234283447265625]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 10.0, 7.0, 10.0, 11.0, 12.0, 17.0, 17.0, 44.0, 32.0, 47.0, 70.0, 88.0, 114.0, 156.0, 250.0, 345.0, 588.0, 1007.0, 9320.0, 1020753.0, 12771.0, 1049.0, 595.0, 360.0, 244.0, 139.0, 104.0, 103.0, 83.0, 54.0, 25.0, 30.0, 18.0, 20.0, 17.0, 9.0, 10.0, 10.0, 6.0, 4.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0196990966796875, -0.019074201583862305, -0.01844930648803711, -0.017824411392211914, -0.01719951629638672, -0.016574621200561523, -0.015949726104736328, -0.015324831008911133, -0.014699935913085938, -0.014075040817260742, -0.013450145721435547, -0.012825250625610352, -0.012200355529785156, -0.011575460433959961, -0.010950565338134766, -0.01032567024230957, -0.009700775146484375, -0.00907588005065918, -0.008450984954833984, -0.007826089859008789, -0.007201194763183594, -0.0065762996673583984, -0.005951404571533203, -0.005326509475708008, -0.0047016143798828125, -0.004076719284057617, -0.003451824188232422, -0.0028269290924072266, -0.0022020339965820312, -0.001577138900756836, -0.0009522438049316406, -0.0003273487091064453, 0.00029754638671875, 0.0009224414825439453, 0.0015473365783691406, 0.002172231674194336, 0.0027971267700195312, 0.0034220218658447266, 0.004046916961669922, 0.004671812057495117, 0.0052967071533203125, 0.005921602249145508, 0.006546497344970703, 0.0071713924407958984, 0.007796287536621094, 0.008421182632446289, 0.009046077728271484, 0.00967097282409668, 0.010295867919921875, 0.01092076301574707, 0.011545658111572266, 0.012170553207397461, 0.012795448303222656, 0.013420343399047852, 0.014045238494873047, 0.014670133590698242, 0.015295028686523438, 0.015919923782348633, 0.016544818878173828, 0.017169713973999023, 0.01779460906982422, 0.018419504165649414, 0.01904439926147461, 0.019669294357299805, 0.020294189453125]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 14.0, 35.0, 95.0, 179.0, 251.0, 219.0, 143.0, 57.0, 13.0, 6.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00028665721765719354, -0.00024328375002369285, -0.00019991029694210738, -0.0001565368438605219, -0.00011316337622702122, -6.978990859352052e-05, -2.6416470063850284e-05, 1.695699756965041e-05, 6.033046520315111e-05, 0.00010370392556069419, 0.00014707738591823727, 0.00019045083899982274, 0.00023382430663332343, 0.0002771977742668241, 0.00032057121279649436, 0.00036394468042999506, 0.00040731814806349576, 0.00045069161569699645, 0.0004940650542266667, 0.0005374385509639978, 0.0005808119894936681, 0.0006241854280233383, 0.0006675588665530086, 0.0007109323632903397, 0.0007543058600276709, 0.0007976792985573411, 0.0008410527952946723, 0.0008844262338243425, 0.0009277997305616736, 0.0009711731690913439, 0.0010145466076210141, 0.0010579200461506844, 0.0011012936010956764, 0.0011446670396253467, 0.001188040478155017, 0.001231414033100009, 0.0012747874716296792, 0.0013181609101593494, 0.0013615343486890197, 0.00140490778721869, 0.001448281342163682, 0.0014916547806933522, 0.0015350282192230225, 0.0015784017741680145, 0.0016217752126976848, 0.001665148651227355, 0.0017085220897570252, 0.0017518955282866955, 0.0017952689668163657, 0.001838642405346036, 0.0018820158438757062, 0.0019253892824053764, 0.0019687628373503685, 0.0020121363922953606, 0.002055509714409709, 0.002098883269354701, 0.0021422565914690495, 0.0021856301464140415, 0.00222900346852839, 0.002272377023473382, 0.0023157503455877304, 0.0023591239005327225, 0.0024024974554777145, 0.002445870777592063, 0.002489244332537055]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 8.0, 3.0, 8.0, 9.0, 5.0, 6.0, 7.0, 12.0, 16.0, 15.0, 24.0, 14.0, 31.0, 18.0, 24.0, 32.0, 27.0, 38.0, 36.0, 30.0, 49.0, 44.0, 48.0, 43.0, 35.0, 36.0, 42.0, 31.0, 25.0, 25.0, 32.0, 30.0, 26.0, 22.0, 21.0, 15.0, 15.0, 20.0, 12.0, 9.0, 8.0, 13.0, 12.0, 6.0, 7.0, 7.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.0005502700805664062, -0.0005331402644515038, -0.0005160104483366013, -0.0004988806322216988, -0.00048175081610679626, -0.00046462099999189377, -0.00044749118387699127, -0.0004303613677620888, -0.0004132315516471863, -0.0003961017355322838, -0.0003789719194173813, -0.0003618421033024788, -0.0003447122871875763, -0.0003275824710726738, -0.0003104526549577713, -0.0002933228388428688, -0.0002761930227279663, -0.0002590632066130638, -0.00024193339049816132, -0.00022480357438325882, -0.00020767375826835632, -0.00019054394215345383, -0.00017341412603855133, -0.00015628430992364883, -0.00013915449380874634, -0.00012202467769384384, -0.00010489486157894135, -8.776504546403885e-05, -7.063522934913635e-05, -5.3505413234233856e-05, -3.637559711933136e-05, -1.9245781004428864e-05, -2.115964889526367e-06, 1.5013851225376129e-05, 3.2143667340278625e-05, 4.927348345518112e-05, 6.640329957008362e-05, 8.353311568498611e-05, 0.00010066293179988861, 0.00011779274791479111, 0.0001349225640296936, 0.0001520523801445961, 0.0001691821962594986, 0.0001863120123744011, 0.0002034418284893036, 0.00022057164460420609, 0.00023770146071910858, 0.0002548312768340111, 0.0002719610929489136, 0.00028909090906381607, 0.00030622072517871857, 0.00032335054129362106, 0.00034048035740852356, 0.00035761017352342606, 0.00037473998963832855, 0.00039186980575323105, 0.00040899962186813354, 0.00042612943798303604, 0.00044325925409793854, 0.00046038907021284103, 0.00047751888632774353, 0.000494648702442646, 0.0005117785185575485, 0.000528908334672451, 0.0005460381507873535]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 7.0, 2.0, 2.0, 7.0, 5.0, 2.0, 10.0, 11.0, 16.0, 11.0, 21.0, 20.0, 33.0, 25.0, 31.0, 31.0, 29.0, 49.0, 54.0, 50.0, 43.0, 56.0, 44.0, 39.0, 47.0, 51.0, 48.0, 33.0, 37.0, 30.0, 25.0, 27.0, 14.0, 16.0, 21.0, 6.0, 15.0, 10.0, 10.0, 6.0, 3.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5234375, -5.325439453125, -5.12744140625, -4.929443359375, -4.7314453125, -4.533447265625, -4.33544921875, -4.137451171875, -3.939453125, -3.741455078125, -3.54345703125, -3.345458984375, -3.1474609375, -2.949462890625, -2.75146484375, -2.553466796875, -2.35546875, -2.157470703125, -1.95947265625, -1.761474609375, -1.5634765625, -1.365478515625, -1.16748046875, -0.969482421875, -0.771484375, -0.573486328125, -0.37548828125, -0.177490234375, 0.0205078125, 0.218505859375, 0.41650390625, 0.614501953125, 0.8125, 1.010498046875, 1.20849609375, 1.406494140625, 1.6044921875, 1.802490234375, 2.00048828125, 2.198486328125, 2.396484375, 2.594482421875, 2.79248046875, 2.990478515625, 3.1884765625, 3.386474609375, 3.58447265625, 3.782470703125, 3.98046875, 4.178466796875, 4.37646484375, 4.574462890625, 4.7724609375, 4.970458984375, 5.16845703125, 5.366455078125, 5.564453125, 5.762451171875, 5.96044921875, 6.158447265625, 6.3564453125, 6.554443359375, 6.75244140625, 6.950439453125, 7.1484375]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 6.0, 5.0, 5.0, 7.0, 9.0, 13.0, 2.0, 18.0, 24.0, 29.0, 42.0, 57.0, 99.0, 112.0, 178.0, 231.0, 339.0, 509.0, 764.0, 1354.0, 2557.0, 6795.0, 27940.0, 245810.0, 657200.0, 81862.0, 13352.0, 4299.0, 1897.0, 1027.0, 602.0, 450.0, 263.0, 166.0, 155.0, 83.0, 89.0, 53.0, 53.0, 28.0, 26.0, 14.0, 17.0, 5.0, 6.0, 4.0, 2.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-12.3046875, -11.898193359375, -11.49169921875, -11.085205078125, -10.6787109375, -10.272216796875, -9.86572265625, -9.459228515625, -9.052734375, -8.646240234375, -8.23974609375, -7.833251953125, -7.4267578125, -7.020263671875, -6.61376953125, -6.207275390625, -5.80078125, -5.394287109375, -4.98779296875, -4.581298828125, -4.1748046875, -3.768310546875, -3.36181640625, -2.955322265625, -2.548828125, -2.142333984375, -1.73583984375, -1.329345703125, -0.9228515625, -0.516357421875, -0.10986328125, 0.296630859375, 0.703125, 1.109619140625, 1.51611328125, 1.922607421875, 2.3291015625, 2.735595703125, 3.14208984375, 3.548583984375, 3.955078125, 4.361572265625, 4.76806640625, 5.174560546875, 5.5810546875, 5.987548828125, 6.39404296875, 6.800537109375, 7.20703125, 7.613525390625, 8.02001953125, 8.426513671875, 8.8330078125, 9.239501953125, 9.64599609375, 10.052490234375, 10.458984375, 10.865478515625, 11.27197265625, 11.678466796875, 12.0849609375, 12.491455078125, 12.89794921875, 13.304443359375, 13.7109375]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 7.0, 3.0, 8.0, 17.0, 15.0, 18.0, 26.0, 38.0, 51.0, 60.0, 105.0, 144.0, 427.0, 1603.0, 179.0, 90.0, 70.0, 52.0, 31.0, 26.0, 20.0, 10.0, 15.0, 9.0, 5.0, 12.0, 9.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.6875, -26.703125, -25.71875, -24.734375, -23.75, -22.765625, -21.78125, -20.796875, -19.8125, -18.828125, -17.84375, -16.859375, -15.875, -14.890625, -13.90625, -12.921875, -11.9375, -10.953125, -9.96875, -8.984375, -8.0, -7.015625, -6.03125, -5.046875, -4.0625, -3.078125, -2.09375, -1.109375, -0.125, 0.859375, 1.84375, 2.828125, 3.8125, 4.796875, 5.78125, 6.765625, 7.75, 8.734375, 9.71875, 10.703125, 11.6875, 12.671875, 13.65625, 14.640625, 15.625, 16.609375, 17.59375, 18.578125, 19.5625, 20.546875, 21.53125, 22.515625, 23.5, 24.484375, 25.46875, 26.453125, 27.4375, 28.421875, 29.40625, 30.390625, 31.375, 32.359375, 33.34375, 34.328125, 35.3125]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 1.0, 4.0, 7.0, 8.0, 11.0, 21.0, 28.0, 40.0, 72.0, 116.0, 168.0, 266.0, 738.0, 5074.0, 3126082.0, 11230.0, 1005.0, 355.0, 190.0, 101.0, 62.0, 45.0, 27.0, 25.0, 9.0, 11.0, 1.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.4375, -73.318359375, -71.19921875, -69.080078125, -66.9609375, -64.841796875, -62.72265625, -60.603515625, -58.484375, -56.365234375, -54.24609375, -52.126953125, -50.0078125, -47.888671875, -45.76953125, -43.650390625, -41.53125, -39.412109375, -37.29296875, -35.173828125, -33.0546875, -30.935546875, -28.81640625, -26.697265625, -24.578125, -22.458984375, -20.33984375, -18.220703125, -16.1015625, -13.982421875, -11.86328125, -9.744140625, -7.625, -5.505859375, -3.38671875, -1.267578125, 0.8515625, 2.970703125, 5.08984375, 7.208984375, 9.328125, 11.447265625, 13.56640625, 15.685546875, 17.8046875, 19.923828125, 22.04296875, 24.162109375, 26.28125, 28.400390625, 30.51953125, 32.638671875, 34.7578125, 36.876953125, 38.99609375, 41.115234375, 43.234375, 45.353515625, 47.47265625, 49.591796875, 51.7109375, 53.830078125, 55.94921875, 58.068359375, 60.1875]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 78.0, 825.0, 103.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.83654022216797, -55.443214416503906, -48.049888610839844, -40.65656280517578, -33.26323699951172, -25.869911193847656, -18.476585388183594, -11.083259582519531, -3.6899337768554688, 3.7033920288085938, 11.096717834472656, 18.49004364013672, 25.88336944580078, 33.276695251464844, 40.670021057128906, 48.06334686279297, 55.45667266845703, 62.849998474121094, 70.24332427978516, 77.63665008544922, 85.02997589111328, 92.42330169677734, 99.8166275024414, 107.20995330810547, 114.60327911376953, 121.9966049194336, 129.38992309570312, 136.78326416015625, 144.17657470703125, 151.56991577148438, 158.96322631835938, 166.3565673828125, 173.7498779296875, 181.14320373535156, 188.53652954101562, 195.9298553466797, 203.32318115234375, 210.7165069580078, 218.10983276367188, 225.50315856933594, 232.896484375, 240.28981018066406, 247.68313598632812, 255.0764617919922, 262.46978759765625, 269.86309814453125, 277.2564392089844, 284.6497802734375, 292.0430908203125, 299.4364013671875, 306.8297424316406, 314.22308349609375, 321.61639404296875, 329.00970458984375, 336.4030456542969, 343.79638671875, 351.189697265625, 358.5830078125, 365.9763488769531, 373.36968994140625, 380.76300048828125, 388.15631103515625, 395.5496520996094, 402.9429931640625, 410.3363037109375]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 6.0, 5.0, 7.0, 9.0, 7.0, 8.0, 8.0, 10.0, 15.0, 19.0, 25.0, 23.0, 21.0, 30.0, 36.0, 23.0, 30.0, 36.0, 29.0, 47.0, 43.0, 34.0, 42.0, 42.0, 35.0, 36.0, 43.0, 36.0, 38.0, 30.0, 25.0, 28.0, 30.0, 21.0, 16.0, 14.0, 16.0, 15.0, 8.0, 10.0, 10.0, 9.0, 8.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 0.0, 2.0], "bins": [-49.26926803588867, -47.708824157714844, -46.148380279541016, -44.58793640136719, -43.027496337890625, -41.4670524597168, -39.90660858154297, -38.34616470336914, -36.78572082519531, -35.225276947021484, -33.664833068847656, -32.10438919067383, -30.543947219848633, -28.983503341674805, -27.42306137084961, -25.86261749267578, -24.302173614501953, -22.741729736328125, -21.181285858154297, -19.6208438873291, -18.060400009155273, -16.499956130981445, -14.939513206481934, -13.379070281982422, -11.818626403808594, -10.258182525634766, -8.697739601135254, -7.137296199798584, -5.576852798461914, -4.016409397125244, -2.455965995788574, -0.8955230712890625, 0.6649246215820312, 2.225368022918701, 3.785811424255371, 5.346254825592041, 6.906698226928711, 8.467142105102539, 10.02758502960205, 11.588027954101562, 13.14847183227539, 14.708915710449219, 16.269359588623047, 17.829801559448242, 19.39024543762207, 20.9506893157959, 22.511131286621094, 24.071575164794922, 25.63201904296875, 27.192462921142578, 28.752906799316406, 30.3133487701416, 31.87379264831543, 33.434234619140625, 34.99467849731445, 36.55512237548828, 38.11556625366211, 39.67601013183594, 41.236454010009766, 42.796897888183594, 44.357337951660156, 45.917781829833984, 47.47822570800781, 49.03866958618164, 50.59911346435547]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 8.0, 14.0, 11.0, 13.0, 18.0, 10.0, 17.0, 18.0, 31.0, 34.0, 32.0, 41.0, 38.0, 47.0, 35.0, 62.0, 37.0, 57.0, 39.0, 49.0, 41.0, 43.0, 37.0, 27.0, 39.0, 32.0, 25.0, 24.0, 22.0, 19.0, 19.0, 8.0, 8.0, 10.0, 10.0, 4.0, 4.0, 7.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.671875, -8.4202880859375, -8.168701171875, -7.9171142578125, -7.66552734375, -7.4139404296875, -7.162353515625, -6.9107666015625, -6.6591796875, -6.4075927734375, -6.156005859375, -5.9044189453125, -5.65283203125, -5.4012451171875, -5.149658203125, -4.8980712890625, -4.646484375, -4.3948974609375, -4.143310546875, -3.8917236328125, -3.64013671875, -3.3885498046875, -3.136962890625, -2.8853759765625, -2.6337890625, -2.3822021484375, -2.130615234375, -1.8790283203125, -1.62744140625, -1.3758544921875, -1.124267578125, -0.8726806640625, -0.62109375, -0.3695068359375, -0.117919921875, 0.1336669921875, 0.38525390625, 0.6368408203125, 0.888427734375, 1.1400146484375, 1.3916015625, 1.6431884765625, 1.894775390625, 2.1463623046875, 2.39794921875, 2.6495361328125, 2.901123046875, 3.1527099609375, 3.404296875, 3.6558837890625, 3.907470703125, 4.1590576171875, 4.41064453125, 4.6622314453125, 4.913818359375, 5.1654052734375, 5.4169921875, 5.6685791015625, 5.920166015625, 6.1717529296875, 6.42333984375, 6.6749267578125, 6.926513671875, 7.1781005859375, 7.4296875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 6.0, 2.0, 2.0, 5.0, 7.0, 10.0, 17.0, 21.0, 26.0, 27.0, 38.0, 52.0, 88.0, 128.0, 159.0, 189.0, 275.0, 442.0, 640.0, 1103.0, 2953.0, 34420.0, 3327362.0, 812960.0, 8800.0, 1860.0, 833.0, 560.0, 371.0, 246.0, 186.0, 124.0, 88.0, 78.0, 55.0, 41.0, 27.0, 21.0, 14.0, 9.0, 12.0, 6.0, 8.0, 6.0, 2.0, 4.0, 3.0, 2.0, 3.0, 3.0], "bins": [-47.96875, -46.6650390625, -45.361328125, -44.0576171875, -42.75390625, -41.4501953125, -40.146484375, -38.8427734375, -37.5390625, -36.2353515625, -34.931640625, -33.6279296875, -32.32421875, -31.0205078125, -29.716796875, -28.4130859375, -27.109375, -25.8056640625, -24.501953125, -23.1982421875, -21.89453125, -20.5908203125, -19.287109375, -17.9833984375, -16.6796875, -15.3759765625, -14.072265625, -12.7685546875, -11.46484375, -10.1611328125, -8.857421875, -7.5537109375, -6.25, -4.9462890625, -3.642578125, -2.3388671875, -1.03515625, 0.2685546875, 1.572265625, 2.8759765625, 4.1796875, 5.4833984375, 6.787109375, 8.0908203125, 9.39453125, 10.6982421875, 12.001953125, 13.3056640625, 14.609375, 15.9130859375, 17.216796875, 18.5205078125, 19.82421875, 21.1279296875, 22.431640625, 23.7353515625, 25.0390625, 26.3427734375, 27.646484375, 28.9501953125, 30.25390625, 31.5576171875, 32.861328125, 34.1650390625, 35.46875]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 8.0, 6.0, 22.0, 88.0, 368.0, 1253.0, 1512.0, 594.0, 153.0, 44.0, 17.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.36279296875, -39.0693359375, -37.77587890625, -36.482421875, -35.18896484375, -33.8955078125, -32.60205078125, -31.30859375, -30.01513671875, -28.7216796875, -27.42822265625, -26.134765625, -24.84130859375, -23.5478515625, -22.25439453125, -20.9609375, -19.66748046875, -18.3740234375, -17.08056640625, -15.787109375, -14.49365234375, -13.2001953125, -11.90673828125, -10.61328125, -9.31982421875, -8.0263671875, -6.73291015625, -5.439453125, -4.14599609375, -2.8525390625, -1.55908203125, -0.265625, 1.02783203125, 2.3212890625, 3.61474609375, 4.908203125, 6.20166015625, 7.4951171875, 8.78857421875, 10.08203125, 11.37548828125, 12.6689453125, 13.96240234375, 15.255859375, 16.54931640625, 17.8427734375, 19.13623046875, 20.4296875, 21.72314453125, 23.0166015625, 24.31005859375, 25.603515625, 26.89697265625, 28.1904296875, 29.48388671875, 30.77734375, 32.07080078125, 33.3642578125, 34.65771484375, 35.951171875, 37.24462890625, 38.5380859375, 39.83154296875, 41.125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 7.0, 2.0, 3.0, 3.0, 15.0, 21.0, 38.0, 91.0, 217.0, 442.0, 1231.0, 6943.0, 1344939.0, 2828211.0, 9755.0, 1459.0, 496.0, 199.0, 104.0, 39.0, 21.0, 10.0, 11.0, 7.0, 3.0, 1.0, 4.0, 2.0, 3.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.78125, -39.4248046875, -38.068359375, -36.7119140625, -35.35546875, -33.9990234375, -32.642578125, -31.2861328125, -29.9296875, -28.5732421875, -27.216796875, -25.8603515625, -24.50390625, -23.1474609375, -21.791015625, -20.4345703125, -19.078125, -17.7216796875, -16.365234375, -15.0087890625, -13.65234375, -12.2958984375, -10.939453125, -9.5830078125, -8.2265625, -6.8701171875, -5.513671875, -4.1572265625, -2.80078125, -1.4443359375, -0.087890625, 1.2685546875, 2.625, 3.9814453125, 5.337890625, 6.6943359375, 8.05078125, 9.4072265625, 10.763671875, 12.1201171875, 13.4765625, 14.8330078125, 16.189453125, 17.5458984375, 18.90234375, 20.2587890625, 21.615234375, 22.9716796875, 24.328125, 25.6845703125, 27.041015625, 28.3974609375, 29.75390625, 31.1103515625, 32.466796875, 33.8232421875, 35.1796875, 36.5361328125, 37.892578125, 39.2490234375, 40.60546875, 41.9619140625, 43.318359375, 44.6748046875, 46.03125]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 8.0, 9.0, 11.0, 23.0, 27.0, 51.0, 57.0, 75.0, 83.0, 102.0, 115.0, 110.0, 90.0, 76.0, 51.0, 41.0, 29.0, 25.0, 10.0, 6.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-100.0187759399414, -97.2203140258789, -94.42184448242188, -91.62338256835938, -88.82491302490234, -86.02645111083984, -83.22798156738281, -80.42951965332031, -77.63105773925781, -74.83259582519531, -72.03412628173828, -69.23566436767578, -66.43719482421875, -63.63873291015625, -60.840267181396484, -58.04180145263672, -55.24333572387695, -52.44486999511719, -49.64640426635742, -46.847938537597656, -44.049476623535156, -41.25101089477539, -38.452545166015625, -35.654083251953125, -32.855613708496094, -30.057147979736328, -27.258684158325195, -24.46021842956543, -21.661754608154297, -18.86328887939453, -16.064823150634766, -13.266359329223633, -10.4678955078125, -7.669430732727051, -4.870965480804443, -2.072500228881836, 0.7259645462036133, 3.5244293212890625, 6.322895050048828, 9.121358871459961, 11.919824600219727, 14.718289375305176, 17.516754150390625, 20.31521987915039, 23.113685607910156, 25.91214942932129, 28.710615158081055, 31.509078979492188, 34.30754470825195, 37.10601043701172, 39.904476165771484, 42.70294189453125, 45.50140380859375, 48.299869537353516, 51.09833526611328, 53.89679718017578, 56.69526672363281, 59.49373245239258, 62.292198181152344, 65.09066009521484, 67.88912963867188, 70.68759155273438, 73.48605346679688, 76.2845230102539, 79.0829849243164]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 11.0, 6.0, 6.0, 7.0, 8.0, 9.0, 17.0, 12.0, 22.0, 22.0, 35.0, 28.0, 33.0, 33.0, 38.0, 31.0, 34.0, 43.0, 43.0, 43.0, 44.0, 41.0, 49.0, 42.0, 45.0, 22.0, 40.0, 23.0, 33.0, 30.0, 20.0, 23.0, 19.0, 14.0, 11.0, 10.0, 11.0, 5.0, 11.0, 5.0, 9.0, 4.0, 8.0, 4.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.16655349731445, -37.79261016845703, -36.418663024902344, -35.04471969604492, -33.670772552490234, -32.29682922363281, -30.922883987426758, -29.548938751220703, -28.17499351501465, -26.801048278808594, -25.42710304260254, -24.053157806396484, -22.679214477539062, -21.305267333984375, -19.931324005126953, -18.5573787689209, -17.183433532714844, -15.809488296508789, -14.435543060302734, -13.061598777770996, -11.687653541564941, -10.313708305358887, -8.939764022827148, -7.565818786621094, -6.191873550415039, -4.817928314208984, -3.443983554840088, -2.0700387954711914, -0.6960935592651367, 0.677851676940918, 2.0517959594726562, 3.425741195678711, 4.799690246582031, 6.173635482788086, 7.547580242156982, 8.921525001525879, 10.295470237731934, 11.669415473937988, 13.043359756469727, 14.417304992675781, 15.791250228881836, 17.16519546508789, 18.539140701293945, 19.9130859375, 21.287029266357422, 22.66097640991211, 24.03491973876953, 25.408864974975586, 26.78281021118164, 28.156755447387695, 29.53070068359375, 30.904644012451172, 32.27859115600586, 33.65253448486328, 35.02648162841797, 36.40042495727539, 37.77436828613281, 39.148311614990234, 40.52225875854492, 41.896202087402344, 43.27014923095703, 44.64409255981445, 46.018035888671875, 47.39198303222656, 48.76593017578125]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 5.0, 7.0, 5.0, 7.0, 10.0, 11.0, 16.0, 16.0, 24.0, 15.0, 29.0, 30.0, 44.0, 31.0, 30.0, 38.0, 43.0, 41.0, 40.0, 46.0, 43.0, 49.0, 47.0, 42.0, 51.0, 24.0, 34.0, 25.0, 31.0, 29.0, 20.0, 14.0, 16.0, 17.0, 13.0, 11.0, 10.0, 6.0, 9.0, 9.0, 8.0, 4.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.6875, -76.0126953125, -73.337890625, -70.6630859375, -67.98828125, -65.3134765625, -62.638671875, -59.9638671875, -57.2890625, -54.6142578125, -51.939453125, -49.2646484375, -46.58984375, -43.9150390625, -41.240234375, -38.5654296875, -35.890625, -33.2158203125, -30.541015625, -27.8662109375, -25.19140625, -22.5166015625, -19.841796875, -17.1669921875, -14.4921875, -11.8173828125, -9.142578125, -6.4677734375, -3.79296875, -1.1181640625, 1.556640625, 4.2314453125, 6.90625, 9.5810546875, 12.255859375, 14.9306640625, 17.60546875, 20.2802734375, 22.955078125, 25.6298828125, 28.3046875, 30.9794921875, 33.654296875, 36.3291015625, 39.00390625, 41.6787109375, 44.353515625, 47.0283203125, 49.703125, 52.3779296875, 55.052734375, 57.7275390625, 60.40234375, 63.0771484375, 65.751953125, 68.4267578125, 71.1015625, 73.7763671875, 76.451171875, 79.1259765625, 81.80078125, 84.4755859375, 87.150390625, 89.8251953125, 92.5]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 5.0, 7.0, 13.0, 15.0, 36.0, 36.0, 67.0, 108.0, 176.0, 251.0, 445.0, 620.0, 955.0, 1421.0, 2157.0, 3130.0, 4821.0, 7104.0, 10613.0, 15360.0, 23306.0, 34972.0, 52678.0, 80170.0, 122507.0, 174126.0, 169161.0, 116686.0, 76830.0, 50292.0, 33468.0, 22085.0, 14756.0, 9894.0, 6636.0, 4510.0, 3163.0, 2005.0, 1313.0, 911.0, 593.0, 429.0, 271.0, 157.0, 114.0, 82.0, 43.0, 22.0, 16.0, 11.0, 4.0, 4.0, 4.0, 1.0, 2.0, 3.0], "bins": [-5.83984375, -5.66778564453125, -5.4957275390625, -5.32366943359375, -5.151611328125, -4.97955322265625, -4.8074951171875, -4.63543701171875, -4.46337890625, -4.29132080078125, -4.1192626953125, -3.94720458984375, -3.775146484375, -3.60308837890625, -3.4310302734375, -3.25897216796875, -3.0869140625, -2.91485595703125, -2.7427978515625, -2.57073974609375, -2.398681640625, -2.22662353515625, -2.0545654296875, -1.88250732421875, -1.71044921875, -1.53839111328125, -1.3663330078125, -1.19427490234375, -1.022216796875, -0.85015869140625, -0.6781005859375, -0.50604248046875, -0.333984375, -0.16192626953125, 0.0101318359375, 0.18218994140625, 0.354248046875, 0.52630615234375, 0.6983642578125, 0.87042236328125, 1.04248046875, 1.21453857421875, 1.3865966796875, 1.55865478515625, 1.730712890625, 1.90277099609375, 2.0748291015625, 2.24688720703125, 2.4189453125, 2.59100341796875, 2.7630615234375, 2.93511962890625, 3.107177734375, 3.27923583984375, 3.4512939453125, 3.62335205078125, 3.79541015625, 3.96746826171875, 4.1395263671875, 4.31158447265625, 4.483642578125, 4.65570068359375, 4.8277587890625, 4.99981689453125, 5.171875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 0.0, 1.0, 3.0, 10.0, 13.0, 13.0, 15.0, 13.0, 21.0, 19.0, 27.0, 21.0, 36.0, 45.0, 39.0, 38.0, 39.0, 50.0, 49.0, 52.0, 1075.0, 48.0, 52.0, 48.0, 42.0, 40.0, 30.0, 38.0, 27.0, 26.0, 19.0, 11.0, 13.0, 17.0, 10.0, 9.0, 6.0, 5.0, 3.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.15625, -52.23291015625, -50.3095703125, -48.38623046875, -46.462890625, -44.53955078125, -42.6162109375, -40.69287109375, -38.76953125, -36.84619140625, -34.9228515625, -32.99951171875, -31.076171875, -29.15283203125, -27.2294921875, -25.30615234375, -23.3828125, -21.45947265625, -19.5361328125, -17.61279296875, -15.689453125, -13.76611328125, -11.8427734375, -9.91943359375, -7.99609375, -6.07275390625, -4.1494140625, -2.22607421875, -0.302734375, 1.62060546875, 3.5439453125, 5.46728515625, 7.390625, 9.31396484375, 11.2373046875, 13.16064453125, 15.083984375, 17.00732421875, 18.9306640625, 20.85400390625, 22.77734375, 24.70068359375, 26.6240234375, 28.54736328125, 30.470703125, 32.39404296875, 34.3173828125, 36.24072265625, 38.1640625, 40.08740234375, 42.0107421875, 43.93408203125, 45.857421875, 47.78076171875, 49.7041015625, 51.62744140625, 53.55078125, 55.47412109375, 57.3974609375, 59.32080078125, 61.244140625, 63.16748046875, 65.0908203125, 67.01416015625, 68.9375]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 5.0, 8.0, 18.0, 14.0, 36.0, 67.0, 76.0, 100.0, 173.0, 250.0, 396.0, 606.0, 951.0, 1519.0, 2356.0, 3729.0, 5719.0, 9149.0, 14579.0, 22695.0, 36381.0, 57790.0, 92529.0, 146000.0, 1241888.0, 165937.0, 110278.0, 68265.0, 42413.0, 26499.0, 16829.0, 10809.0, 6843.0, 4324.0, 2832.0, 1712.0, 1162.0, 788.0, 487.0, 307.0, 217.0, 132.0, 102.0, 46.0, 45.0, 31.0, 18.0, 15.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.42578125, -5.2489013671875, -5.072021484375, -4.8951416015625, -4.71826171875, -4.5413818359375, -4.364501953125, -4.1876220703125, -4.0107421875, -3.8338623046875, -3.656982421875, -3.4801025390625, -3.30322265625, -3.1263427734375, -2.949462890625, -2.7725830078125, -2.595703125, -2.4188232421875, -2.241943359375, -2.0650634765625, -1.88818359375, -1.7113037109375, -1.534423828125, -1.3575439453125, -1.1806640625, -1.0037841796875, -0.826904296875, -0.6500244140625, -0.47314453125, -0.2962646484375, -0.119384765625, 0.0574951171875, 0.234375, 0.4112548828125, 0.588134765625, 0.7650146484375, 0.94189453125, 1.1187744140625, 1.295654296875, 1.4725341796875, 1.6494140625, 1.8262939453125, 2.003173828125, 2.1800537109375, 2.35693359375, 2.5338134765625, 2.710693359375, 2.8875732421875, 3.064453125, 3.2413330078125, 3.418212890625, 3.5950927734375, 3.77197265625, 3.9488525390625, 4.125732421875, 4.3026123046875, 4.4794921875, 4.6563720703125, 4.833251953125, 5.0101318359375, 5.18701171875, 5.3638916015625, 5.540771484375, 5.7176513671875, 5.89453125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 6.0, 4.0, 5.0, 8.0, 10.0, 14.0, 9.0, 19.0, 23.0, 25.0, 24.0, 38.0, 43.0, 52.0, 66.0, 56.0, 66.0, 75.0, 82.0, 58.0, 55.0, 45.0, 42.0, 30.0, 37.0, 19.0, 25.0, 17.0, 8.0, 10.0, 9.0, 7.0, 4.0, 2.0, 1.0, 2.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027374267578125, -0.02653050422668457, -0.02568674087524414, -0.02484297752380371, -0.02399921417236328, -0.02315545082092285, -0.022311687469482422, -0.021467924118041992, -0.020624160766601562, -0.019780397415161133, -0.018936634063720703, -0.018092870712280273, -0.017249107360839844, -0.016405344009399414, -0.015561580657958984, -0.014717817306518555, -0.013874053955078125, -0.013030290603637695, -0.012186527252197266, -0.011342763900756836, -0.010499000549316406, -0.009655237197875977, -0.008811473846435547, -0.007967710494995117, -0.0071239471435546875, -0.006280183792114258, -0.005436420440673828, -0.0045926570892333984, -0.0037488937377929688, -0.002905130386352539, -0.0020613670349121094, -0.0012176036834716797, -0.00037384033203125, 0.0004699230194091797, 0.0013136863708496094, 0.002157449722290039, 0.0030012130737304688, 0.0038449764251708984, 0.004688739776611328, 0.005532503128051758, 0.0063762664794921875, 0.007220029830932617, 0.008063793182373047, 0.008907556533813477, 0.009751319885253906, 0.010595083236694336, 0.011438846588134766, 0.012282609939575195, 0.013126373291015625, 0.013970136642456055, 0.014813899993896484, 0.015657663345336914, 0.016501426696777344, 0.017345190048217773, 0.018188953399658203, 0.019032716751098633, 0.019876480102539062, 0.020720243453979492, 0.021564006805419922, 0.02240777015686035, 0.02325153350830078, 0.02409529685974121, 0.02493906021118164, 0.02578282356262207, 0.0266265869140625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 5.0, 3.0, 4.0, 7.0, 13.0, 8.0, 22.0, 16.0, 16.0, 38.0, 41.0, 63.0, 90.0, 143.0, 245.0, 367.0, 716.0, 1430.0, 3374.0, 9344.0, 33075.0, 151756.0, 510965.0, 260345.0, 53758.0, 14073.0, 4667.0, 1855.0, 805.0, 455.0, 277.0, 190.0, 96.0, 89.0, 40.0, 44.0, 32.0, 32.0, 15.0, 11.0, 11.0, 4.0, 4.0, 6.0, 1.0, 3.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1533203125, -0.14832115173339844, -0.14332199096679688, -0.1383228302001953, -0.13332366943359375, -0.1283245086669922, -0.12332534790039062, -0.11832618713378906, -0.1133270263671875, -0.10832786560058594, -0.10332870483398438, -0.09832954406738281, -0.09333038330078125, -0.08833122253417969, -0.08333206176757812, -0.07833290100097656, -0.073333740234375, -0.06833457946777344, -0.06333541870117188, -0.05833625793457031, -0.05333709716796875, -0.04833793640136719, -0.043338775634765625, -0.03833961486816406, -0.0333404541015625, -0.028341293334960938, -0.023342132568359375, -0.018342971801757812, -0.01334381103515625, -0.008344650268554688, -0.003345489501953125, 0.0016536712646484375, 0.00665283203125, 0.011651992797851562, 0.016651153564453125, 0.021650314331054688, 0.02664947509765625, 0.03164863586425781, 0.036647796630859375, 0.04164695739746094, 0.0466461181640625, 0.05164527893066406, 0.056644439697265625, 0.06164360046386719, 0.06664276123046875, 0.07164192199707031, 0.07664108276367188, 0.08164024353027344, 0.086639404296875, 0.09163856506347656, 0.09663772583007812, 0.10163688659667969, 0.10663604736328125, 0.11163520812988281, 0.11663436889648438, 0.12163352966308594, 0.1266326904296875, 0.13163185119628906, 0.13663101196289062, 0.1416301727294922, 0.14662933349609375, 0.1516284942626953, 0.15662765502929688, 0.16162681579589844, 0.1666259765625]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 2.0, 6.0, 5.0, 13.0, 10.0, 18.0, 38.0, 40.0, 53.0, 89.0, 95.0, 120.0, 98.0, 79.0, 93.0, 66.0, 56.0, 39.0, 25.0, 17.0, 15.0, 9.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.030228953808546066, -0.029400598257780075, -0.028572242707014084, -0.027743887156248093, -0.0269155316054821, -0.02608717605471611, -0.02525882050395012, -0.024430466815829277, -0.023602111265063286, -0.022773755714297295, -0.021945400163531303, -0.021117044612765312, -0.02028868906199932, -0.01946033537387848, -0.018631979823112488, -0.017803624272346497, -0.016975268721580505, -0.016146913170814514, -0.015318557620048523, -0.014490202069282532, -0.013661847449839115, -0.012833491899073124, -0.012005136348307133, -0.011176781728863716, -0.010348424315452576, -0.009520068764686584, -0.008691713213920593, -0.007863357663154602, -0.0070350030437111855, -0.006206647492945194, -0.005378291942179203, -0.004549936857074499, -0.0037215817719697952, -0.0028932264540344477, -0.0020648711360991, -0.001236515585333109, -0.00040816026739776134, 0.0004201950505375862, 0.0012485506013035774, 0.0020769056864082813, 0.0029052612371742725, 0.00373361655510962, 0.004561971873044968, 0.005390327423810959, 0.00621868297457695, 0.007047038059681654, 0.007875394076108932, 0.008703748695552349, 0.00953210424631834, 0.010360459797084332, 0.011188815347850323, 0.01201716996729374, 0.01284552551805973, 0.013673881068825722, 0.014502236619591713, 0.015330592170357704, 0.016158947721123695, 0.016987303271889687, 0.017815658822655678, 0.01864401437342167, 0.01947236992418766, 0.020300723612308502, 0.021129079163074493, 0.021957434713840485, 0.022785790264606476]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 5.0, 5.0, 8.0, 5.0, 8.0, 10.0, 14.0, 19.0, 26.0, 22.0, 26.0, 24.0, 35.0, 45.0, 39.0, 50.0, 37.0, 49.0, 57.0, 46.0, 45.0, 38.0, 37.0, 29.0, 43.0, 28.0, 50.0, 30.0, 22.0, 31.0, 22.0, 19.0, 16.0, 13.0, 12.0, 8.0, 2.0, 10.0, 9.0, 0.0, 4.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.015018641948699951, -0.014564094133675098, -0.014109546318650246, -0.013654998503625393, -0.01320045068860054, -0.012745902873575687, -0.012291355058550835, -0.011836807243525982, -0.01138225942850113, -0.010927711613476276, -0.010473163798451424, -0.010018615983426571, -0.009564068168401718, -0.009109520353376865, -0.008654972538352013, -0.00820042472332716, -0.007745876908302307, -0.007291329093277454, -0.006836781278252602, -0.006382233463227749, -0.005927685648202896, -0.005473137833178043, -0.005018590018153191, -0.004564042203128338, -0.004109494388103485, -0.0036549465730786324, -0.0032003987580537796, -0.002745850943028927, -0.002291303128004074, -0.0018367553129792213, -0.0013822074979543686, -0.0009276596829295158, -0.0004731118679046631, -1.8564052879810333e-05, 0.0004359837621450424, 0.0008905315771698952, 0.001345079392194748, 0.0017996272072196007, 0.0022541750222444534, 0.002708722837269306, 0.003163270652294159, 0.0036178184673190117, 0.0040723662823438644, 0.004526914097368717, 0.00498146191239357, 0.005436009727418423, 0.0058905575424432755, 0.006345105357468128, 0.006799653172492981, 0.007254200987517834, 0.0077087488025426865, 0.00816329661756754, 0.008617844432592392, 0.009072392247617245, 0.009526940062642097, 0.00998148787766695, 0.010436035692691803, 0.010890583507716656, 0.011345131322741508, 0.011799679137766361, 0.012254226952791214, 0.012708774767816067, 0.01316332258284092, 0.013617870397865772, 0.014072418212890625]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 4.0, 8.0, 5.0, 7.0, 10.0, 11.0, 16.0, 15.0, 23.0, 16.0, 29.0, 30.0, 45.0, 30.0, 31.0, 37.0, 43.0, 40.0, 42.0, 45.0, 44.0, 45.0, 49.0, 43.0, 51.0, 25.0, 32.0, 24.0, 33.0, 30.0, 19.0, 14.0, 17.0, 16.0, 14.0, 11.0, 10.0, 6.0, 9.0, 9.0, 7.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.75, -76.0751953125, -73.400390625, -70.7255859375, -68.05078125, -65.3759765625, -62.701171875, -60.0263671875, -57.3515625, -54.6767578125, -52.001953125, -49.3271484375, -46.65234375, -43.9775390625, -41.302734375, -38.6279296875, -35.953125, -33.2783203125, -30.603515625, -27.9287109375, -25.25390625, -22.5791015625, -19.904296875, -17.2294921875, -14.5546875, -11.8798828125, -9.205078125, -6.5302734375, -3.85546875, -1.1806640625, 1.494140625, 4.1689453125, 6.84375, 9.5185546875, 12.193359375, 14.8681640625, 17.54296875, 20.2177734375, 22.892578125, 25.5673828125, 28.2421875, 30.9169921875, 33.591796875, 36.2666015625, 38.94140625, 41.6162109375, 44.291015625, 46.9658203125, 49.640625, 52.3154296875, 54.990234375, 57.6650390625, 60.33984375, 63.0146484375, 65.689453125, 68.3642578125, 71.0390625, 73.7138671875, 76.388671875, 79.0634765625, 81.73828125, 84.4130859375, 87.087890625, 89.7626953125, 92.4375]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 6.0, 5.0, 10.0, 15.0, 11.0, 22.0, 18.0, 34.0, 52.0, 82.0, 113.0, 159.0, 307.0, 410.0, 751.0, 1397.0, 2874.0, 7447.0, 26316.0, 137648.0, 538870.0, 264177.0, 48254.0, 11526.0, 3884.0, 1807.0, 853.0, 530.0, 310.0, 210.0, 124.0, 99.0, 76.0, 47.0, 23.0, 17.0, 27.0, 16.0, 11.0, 7.0, 6.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-15.265625, -14.75146484375, -14.2373046875, -13.72314453125, -13.208984375, -12.69482421875, -12.1806640625, -11.66650390625, -11.15234375, -10.63818359375, -10.1240234375, -9.60986328125, -9.095703125, -8.58154296875, -8.0673828125, -7.55322265625, -7.0390625, -6.52490234375, -6.0107421875, -5.49658203125, -4.982421875, -4.46826171875, -3.9541015625, -3.43994140625, -2.92578125, -2.41162109375, -1.8974609375, -1.38330078125, -0.869140625, -0.35498046875, 0.1591796875, 0.67333984375, 1.1875, 1.70166015625, 2.2158203125, 2.72998046875, 3.244140625, 3.75830078125, 4.2724609375, 4.78662109375, 5.30078125, 5.81494140625, 6.3291015625, 6.84326171875, 7.357421875, 7.87158203125, 8.3857421875, 8.89990234375, 9.4140625, 9.92822265625, 10.4423828125, 10.95654296875, 11.470703125, 11.98486328125, 12.4990234375, 13.01318359375, 13.52734375, 14.04150390625, 14.5556640625, 15.06982421875, 15.583984375, 16.09814453125, 16.6123046875, 17.12646484375, 17.640625]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 4.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 9.0, 12.0, 10.0, 9.0, 11.0, 23.0, 25.0, 25.0, 42.0, 41.0, 31.0, 47.0, 63.0, 56.0, 49.0, 586.0, 1552.0, 58.0, 62.0, 48.0, 41.0, 37.0, 31.0, 32.0, 26.0, 16.0, 27.0, 13.0, 13.0, 7.0, 11.0, 7.0, 5.0, 1.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-172.0, -167.0859375, -162.171875, -157.2578125, -152.34375, -147.4296875, -142.515625, -137.6015625, -132.6875, -127.7734375, -122.859375, -117.9453125, -113.03125, -108.1171875, -103.203125, -98.2890625, -93.375, -88.4609375, -83.546875, -78.6328125, -73.71875, -68.8046875, -63.890625, -58.9765625, -54.0625, -49.1484375, -44.234375, -39.3203125, -34.40625, -29.4921875, -24.578125, -19.6640625, -14.75, -9.8359375, -4.921875, -0.0078125, 4.90625, 9.8203125, 14.734375, 19.6484375, 24.5625, 29.4765625, 34.390625, 39.3046875, 44.21875, 49.1328125, 54.046875, 58.9609375, 63.875, 68.7890625, 73.703125, 78.6171875, 83.53125, 88.4453125, 93.359375, 98.2734375, 103.1875, 108.1015625, 113.015625, 117.9296875, 122.84375, 127.7578125, 132.671875, 137.5859375, 142.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 9.0, 5.0, 14.0, 17.0, 15.0, 31.0, 50.0, 113.0, 153.0, 266.0, 447.0, 953.0, 2624.0, 16056.0, 2694155.0, 415267.0, 11268.0, 2246.0, 873.0, 446.0, 277.0, 146.0, 86.0, 67.0, 31.0, 26.0, 19.0, 13.0, 14.0, 7.0, 6.0, 4.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.21875, -37.90869140625, -36.5986328125, -35.28857421875, -33.978515625, -32.66845703125, -31.3583984375, -30.04833984375, -28.73828125, -27.42822265625, -26.1181640625, -24.80810546875, -23.498046875, -22.18798828125, -20.8779296875, -19.56787109375, -18.2578125, -16.94775390625, -15.6376953125, -14.32763671875, -13.017578125, -11.70751953125, -10.3974609375, -9.08740234375, -7.77734375, -6.46728515625, -5.1572265625, -3.84716796875, -2.537109375, -1.22705078125, 0.0830078125, 1.39306640625, 2.703125, 4.01318359375, 5.3232421875, 6.63330078125, 7.943359375, 9.25341796875, 10.5634765625, 11.87353515625, 13.18359375, 14.49365234375, 15.8037109375, 17.11376953125, 18.423828125, 19.73388671875, 21.0439453125, 22.35400390625, 23.6640625, 24.97412109375, 26.2841796875, 27.59423828125, 28.904296875, 30.21435546875, 31.5244140625, 32.83447265625, 34.14453125, 35.45458984375, 36.7646484375, 38.07470703125, 39.384765625, 40.69482421875, 42.0048828125, 43.31494140625, 44.625]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 1.0, 17.0, 19.0, 65.0, 161.0, 297.0, 254.0, 90.0, 44.0, 14.0, 16.0, 5.0, 12.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.0509262084961, -107.7363510131836, -95.42178344726562, -83.10720825195312, -70.79263305664062, -58.478065490722656, -46.163490295410156, -33.84892272949219, -21.534347534179688, -9.219775199890137, 3.094797134399414, 15.409370422363281, 27.723941802978516, 40.03851318359375, 52.35308837890625, 64.66765594482422, 76.98223114013672, 89.29680633544922, 101.61137390136719, 113.92594909667969, 126.24052429199219, 138.55508422851562, 150.86965942382812, 163.18423461914062, 175.49880981445312, 187.81338500976562, 200.12796020507812, 212.44253540039062, 224.75709533691406, 237.07167053222656, 249.38624572753906, 261.7008056640625, 274.0154113769531, 286.3299865722656, 298.6445617675781, 310.9591369628906, 323.2737121582031, 335.5882568359375, 347.90283203125, 360.2174072265625, 372.531982421875, 384.8465576171875, 397.1611328125, 409.4757080078125, 421.790283203125, 434.1048583984375, 446.41943359375, 458.7339782714844, 471.048583984375, 483.3631591796875, 495.677734375, 507.9923095703125, 520.306884765625, 532.6214599609375, 544.93603515625, 557.2506103515625, 569.5651245117188, 581.8796997070312, 594.1942749023438, 606.5088500976562, 618.8234252929688, 631.1380004882812, 643.4525756835938, 655.7671508789062, 668.0817260742188]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 1.0, 8.0, 5.0, 4.0, 8.0, 8.0, 13.0, 11.0, 12.0, 14.0, 23.0, 20.0, 24.0, 26.0, 23.0, 43.0, 44.0, 41.0, 40.0, 46.0, 59.0, 48.0, 49.0, 52.0, 43.0, 44.0, 52.0, 36.0, 25.0, 31.0, 33.0, 22.0, 17.0, 21.0, 16.0, 11.0, 8.0, 7.0, 6.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-210.40274047851562, -204.1172332763672, -197.8317108154297, -191.54620361328125, -185.2606964111328, -178.97518920898438, -172.68966674804688, -166.40415954589844, -160.11865234375, -153.83314514160156, -147.54762268066406, -141.26211547851562, -134.9766082763672, -128.69110107421875, -122.40557861328125, -116.12007141113281, -109.83454895019531, -103.54903411865234, -97.2635269165039, -90.97801208496094, -84.6925048828125, -78.40699005126953, -72.12147521972656, -65.83596801757812, -59.550453186035156, -53.26494216918945, -46.97943115234375, -40.69391632080078, -34.40840530395508, -28.122894287109375, -21.837379455566406, -15.551868438720703, -9.266357421875, -2.9808454513549805, 3.304666519165039, 9.590179443359375, 15.875690460205078, 22.16120147705078, 28.44671630859375, 34.73222732543945, 41.017738342285156, 47.30324935913086, 53.58876037597656, 59.87427520751953, 66.1597900390625, 72.44529724121094, 78.7308120727539, 85.01632690429688, 91.30183410644531, 97.58734893798828, 103.87285614013672, 110.15837097167969, 116.44387817382812, 122.7293930053711, 129.01490783691406, 135.3004150390625, 141.5859375, 147.87144470214844, 154.15696716308594, 160.44247436523438, 166.7279815673828, 173.01348876953125, 179.29901123046875, 185.5845184326172, 191.87002563476562]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 1.0, 6.0, 3.0, 1.0, 3.0, 8.0, 5.0, 14.0, 27.0, 42.0, 53.0, 68.0, 150.0, 167.0, 287.0, 464.0, 729.0, 952.0, 1486.0, 2229.0, 3194.0, 4806.0, 1016280.0, 6003.0, 3606.0, 2561.0, 1784.0, 1219.0, 851.0, 535.0, 376.0, 222.0, 141.0, 102.0, 75.0, 37.0, 21.0, 18.0, 12.0, 9.0, 6.0, 1.0, 2.0, 6.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-37.234500885009766, -36.1920051574707, -35.14950942993164, -34.10701370239258, -33.06451416015625, -32.02201843261719, -30.979522705078125, -29.937026977539062, -28.89453125, -27.852035522460938, -26.809539794921875, -25.76704216003418, -24.724546432495117, -23.682050704956055, -22.63955307006836, -21.597057342529297, -20.554561614990234, -19.512065887451172, -18.46957015991211, -17.427072525024414, -16.38457679748535, -15.342081069946289, -14.29958438873291, -13.257087707519531, -12.214591979980469, -11.172096252441406, -10.129599571228027, -9.087102890014648, -8.044607162475586, -7.002110958099365, -5.9596147537231445, -4.917118549346924, -3.8746185302734375, -2.832122325897217, -1.789626121520996, -0.7471299171447754, 0.2953662872314453, 1.337862491607666, 2.3803586959838867, 3.4228549003601074, 4.465351104736328, 5.507847309112549, 6.5503435134887695, 7.59283971786499, 8.635335922241211, 9.677831649780273, 10.720328330993652, 11.762825012207031, 12.805320739746094, 13.847816467285156, 14.890313148498535, 15.932809829711914, 16.975305557250977, 18.01780128479004, 19.060298919677734, 20.102794647216797, 21.14529037475586, 22.187786102294922, 23.230281829833984, 24.27277946472168, 25.315275192260742, 26.357770919799805, 27.4002685546875, 28.442764282226562, 29.485260009765625]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 5.0, 6.0, 9.0, 21.0, 50.0, 81.0, 92.0, 640.0, 51461832.0, 245.0, 70.0, 44.0, 24.0, 15.0, 10.0, 7.0, 4.0, 6.0, 3.0, 4.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5684.64111328125, -5508.79443359375, -5332.94775390625, -5157.10107421875, -4981.25439453125, -4805.40771484375, -4629.5615234375, -4453.71484375, -4277.8681640625, -4102.021484375, -3926.1748046875, -3750.328125, -3574.4814453125, -3398.634765625, -3222.788330078125, -3046.941650390625, -2871.0947265625, -2695.248046875, -2519.4013671875, -2343.5546875, -2167.7080078125, -1991.8614501953125, -1816.014892578125, -1640.168212890625, -1464.321533203125, -1288.474853515625, -1112.628173828125, -936.7816162109375, -760.9349365234375, -585.0882568359375, -409.24169921875, -233.39501953125, -57.548828125, 118.29782104492188, 294.14447021484375, 469.9910888671875, 645.8377685546875, 821.6844482421875, 997.531005859375, 1173.377685546875, 1349.224365234375, 1525.071044921875, 1700.917724609375, 1876.7642822265625, 2052.61083984375, 2228.45751953125, 2404.30419921875, 2580.15087890625, 2755.99755859375, 2931.84423828125, 3107.69091796875, 3283.53759765625, 3459.38427734375, 3635.23095703125, 3811.077392578125, 3986.924072265625, 4162.7705078125, 4338.6171875, 4514.4638671875, 4690.310546875, 4866.1572265625, 5042.00390625, 5217.8505859375, 5393.697265625, 5569.5439453125]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 7.0, 8.0, 4.0, 20.0, 19.0, 25.0, 37.0, 62.0, 81.0, 119.0, 185.0, 333.0, 509.0, 816.0, 1390.0, 2227.0, 3288.0, 5395.0, 8653.0, 14019.0, 22324.0, 36217.0, 57725.0, 92325.0, 149768.0, 243521.0, 395920.0, 2914489.0, 1355507.0, 377133.0, 232158.0, 142141.0, 88373.0, 55325.0, 34506.0, 21444.0, 13351.0, 8328.0, 5169.0, 3166.0, 1928.0, 1233.0, 727.0, 523.0, 330.0, 201.0, 131.0, 94.0, 68.0, 41.0, 31.0, 31.0, 9.0, 6.0, 7.0, 2.0, 1.0, 1.0], "bins": [-2.349609375, -2.2803955078125, -2.211181640625, -2.1419677734375, -2.07275390625, -2.0035400390625, -1.934326171875, -1.8651123046875, -1.7958984375, -1.7266845703125, -1.657470703125, -1.5882568359375, -1.51904296875, -1.4498291015625, -1.380615234375, -1.3114013671875, -1.2421875, -1.1729736328125, -1.103759765625, -1.0345458984375, -0.96533203125, -0.8961181640625, -0.826904296875, -0.7576904296875, -0.6884765625, -0.6192626953125, -0.550048828125, -0.4808349609375, -0.41162109375, -0.3424072265625, -0.273193359375, -0.2039794921875, -0.134765625, -0.0655517578125, 0.003662109375, 0.0728759765625, 0.14208984375, 0.2113037109375, 0.280517578125, 0.3497314453125, 0.4189453125, 0.4881591796875, 0.557373046875, 0.6265869140625, 0.69580078125, 0.7650146484375, 0.834228515625, 0.9034423828125, 0.97265625, 1.0418701171875, 1.111083984375, 1.1802978515625, 1.24951171875, 1.3187255859375, 1.387939453125, 1.4571533203125, 1.5263671875, 1.5955810546875, 1.664794921875, 1.7340087890625, 1.80322265625, 1.8724365234375, 1.941650390625, 2.0108642578125, 2.080078125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 8.0, 9.0, 6.0, 10.0, 19.0, 12.0, 12.0, 22.0, 23.0, 22.0, 23.0, 29.0, 33.0, 36.0, 32.0, 36.0, 37.0, 41.0, 44.0, 312.0, 775.0, 59.0, 34.0, 36.0, 36.0, 45.0, 34.0, 38.0, 37.0, 32.0, 19.0, 13.0, 25.0, 10.0, 19.0, 9.0, 18.0, 7.0, 3.0, 5.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-19.109375, -18.566162109375, -18.02294921875, -17.479736328125, -16.9365234375, -16.393310546875, -15.85009765625, -15.306884765625, -14.763671875, -14.220458984375, -13.67724609375, -13.134033203125, -12.5908203125, -12.047607421875, -11.50439453125, -10.961181640625, -10.41796875, -9.874755859375, -9.33154296875, -8.788330078125, -8.2451171875, -7.701904296875, -7.15869140625, -6.615478515625, -6.072265625, -5.529052734375, -4.98583984375, -4.442626953125, -3.8994140625, -3.356201171875, -2.81298828125, -2.269775390625, -1.7265625, -1.183349609375, -0.64013671875, -0.096923828125, 0.4462890625, 0.989501953125, 1.53271484375, 2.075927734375, 2.619140625, 3.162353515625, 3.70556640625, 4.248779296875, 4.7919921875, 5.335205078125, 5.87841796875, 6.421630859375, 6.96484375, 7.508056640625, 8.05126953125, 8.594482421875, 9.1376953125, 9.680908203125, 10.22412109375, 10.767333984375, 11.310546875, 11.853759765625, 12.39697265625, 12.940185546875, 13.4833984375, 14.026611328125, 14.56982421875, 15.113037109375, 15.65625]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 3.0, 5.0, 5.0, 15.0, 33.0, 20.0, 36.0, 44.0, 78.0, 101.0, 153.0, 248.0, 313.0, 526.0, 776.0, 1091.0, 1634.0, 2398.0, 3865.0, 5695.0, 8769.0, 13756.0, 21510.0, 34239.0, 54895.0, 89018.0, 147162.0, 261902.0, 520578.0, 3860624.0, 574244.0, 279260.0, 156713.0, 93887.0, 57868.0, 35918.0, 22733.0, 14482.0, 9223.0, 5883.0, 4014.0, 2454.0, 1679.0, 1137.0, 784.0, 536.0, 362.0, 235.0, 166.0, 115.0, 67.0, 65.0, 53.0, 27.0, 16.0, 18.0, 8.0, 3.0, 0.0, 6.0, 3.0], "bins": [-3.21484375, -3.1160888671875, -3.017333984375, -2.9185791015625, -2.81982421875, -2.7210693359375, -2.622314453125, -2.5235595703125, -2.4248046875, -2.3260498046875, -2.227294921875, -2.1285400390625, -2.02978515625, -1.9310302734375, -1.832275390625, -1.7335205078125, -1.634765625, -1.5360107421875, -1.437255859375, -1.3385009765625, -1.23974609375, -1.1409912109375, -1.042236328125, -0.9434814453125, -0.8447265625, -0.7459716796875, -0.647216796875, -0.5484619140625, -0.44970703125, -0.3509521484375, -0.252197265625, -0.1534423828125, -0.0546875, 0.0440673828125, 0.142822265625, 0.2415771484375, 0.34033203125, 0.4390869140625, 0.537841796875, 0.6365966796875, 0.7353515625, 0.8341064453125, 0.932861328125, 1.0316162109375, 1.13037109375, 1.2291259765625, 1.327880859375, 1.4266357421875, 1.525390625, 1.6241455078125, 1.722900390625, 1.8216552734375, 1.92041015625, 2.0191650390625, 2.117919921875, 2.2166748046875, 2.3154296875, 2.4141845703125, 2.512939453125, 2.6116943359375, 2.71044921875, 2.8092041015625, 2.907958984375, 3.0067138671875, 3.10546875]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 2.0, 2.0, 7.0, 3.0, 6.0, 6.0, 13.0, 12.0, 12.0, 13.0, 14.0, 22.0, 24.0, 23.0, 17.0, 23.0, 28.0, 35.0, 26.0, 26.0, 37.0, 39.0, 53.0, 426.0, 630.0, 75.0, 41.0, 40.0, 37.0, 40.0, 34.0, 20.0, 33.0, 18.0, 33.0, 23.0, 22.0, 18.0, 16.0, 11.0, 16.0, 11.0, 3.0, 8.0, 8.0, 9.0, 1.0, 3.0, 2.0, 4.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-9.90625, -9.5882568359375, -9.270263671875, -8.9522705078125, -8.63427734375, -8.3162841796875, -7.998291015625, -7.6802978515625, -7.3623046875, -7.0443115234375, -6.726318359375, -6.4083251953125, -6.09033203125, -5.7723388671875, -5.454345703125, -5.1363525390625, -4.818359375, -4.5003662109375, -4.182373046875, -3.8643798828125, -3.54638671875, -3.2283935546875, -2.910400390625, -2.5924072265625, -2.2744140625, -1.9564208984375, -1.638427734375, -1.3204345703125, -1.00244140625, -0.6844482421875, -0.366455078125, -0.0484619140625, 0.26953125, 0.5875244140625, 0.905517578125, 1.2235107421875, 1.54150390625, 1.8594970703125, 2.177490234375, 2.4954833984375, 2.8134765625, 3.1314697265625, 3.449462890625, 3.7674560546875, 4.08544921875, 4.4034423828125, 4.721435546875, 5.0394287109375, 5.357421875, 5.6754150390625, 5.993408203125, 6.3114013671875, 6.62939453125, 6.9473876953125, 7.265380859375, 7.5833740234375, 7.9013671875, 8.2193603515625, 8.537353515625, 8.8553466796875, 9.17333984375, 9.4913330078125, 9.809326171875, 10.1273193359375, 10.4453125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [5.0, 4.0, 7.0, 4.0, 3.0, 10.0, 11.0, 14.0, 21.0, 21.0, 29.0, 43.0, 53.0, 59.0, 108.0, 126.0, 150.0, 225.0, 313.0, 364.0, 488.0, 615.0, 969.0, 1255.0, 1807.0, 2703.0, 4477.0, 8403.0, 17109.0, 45463.0, 196810.0, 5861639.0, 89870.0, 28205.0, 12130.0, 6302.0, 3592.0, 2342.0, 1650.0, 1069.0, 773.0, 529.0, 404.0, 325.0, 228.0, 185.0, 130.0, 115.0, 73.0, 66.0, 42.0, 37.0, 24.0, 13.0, 8.0, 4.0, 8.0, 6.0, 2.0, 6.0, 1.0, 2.0, 3.0, 4.0], "bins": [-12.4921875, -12.0927734375, -11.693359375, -11.2939453125, -10.89453125, -10.4951171875, -10.095703125, -9.6962890625, -9.296875, -8.8974609375, -8.498046875, -8.0986328125, -7.69921875, -7.2998046875, -6.900390625, -6.5009765625, -6.1015625, -5.7021484375, -5.302734375, -4.9033203125, -4.50390625, -4.1044921875, -3.705078125, -3.3056640625, -2.90625, -2.5068359375, -2.107421875, -1.7080078125, -1.30859375, -0.9091796875, -0.509765625, -0.1103515625, 0.2890625, 0.6884765625, 1.087890625, 1.4873046875, 1.88671875, 2.2861328125, 2.685546875, 3.0849609375, 3.484375, 3.8837890625, 4.283203125, 4.6826171875, 5.08203125, 5.4814453125, 5.880859375, 6.2802734375, 6.6796875, 7.0791015625, 7.478515625, 7.8779296875, 8.27734375, 8.6767578125, 9.076171875, 9.4755859375, 9.875, 10.2744140625, 10.673828125, 11.0732421875, 11.47265625, 11.8720703125, 12.271484375, 12.6708984375, 13.0703125]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 13.0, 9.0, 14.0, 13.0, 15.0, 26.0, 22.0, 18.0, 21.0, 28.0, 37.0, 47.0, 56.0, 54.0, 75.0, 174.0, 646.0, 246.0, 77.0, 48.0, 44.0, 40.0, 39.0, 29.0, 37.0, 37.0, 36.0, 16.0, 15.0, 23.0, 20.0, 9.0, 7.0, 6.0, 5.0, 4.0, 7.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.3125, -8.05816650390625, -7.8038330078125, -7.54949951171875, -7.295166015625, -7.04083251953125, -6.7864990234375, -6.53216552734375, -6.27783203125, -6.02349853515625, -5.7691650390625, -5.51483154296875, -5.260498046875, -5.00616455078125, -4.7518310546875, -4.49749755859375, -4.2431640625, -3.98883056640625, -3.7344970703125, -3.48016357421875, -3.225830078125, -2.97149658203125, -2.7171630859375, -2.46282958984375, -2.20849609375, -1.95416259765625, -1.6998291015625, -1.44549560546875, -1.191162109375, -0.93682861328125, -0.6824951171875, -0.42816162109375, -0.173828125, 0.08050537109375, 0.3348388671875, 0.58917236328125, 0.843505859375, 1.09783935546875, 1.3521728515625, 1.60650634765625, 1.86083984375, 2.11517333984375, 2.3695068359375, 2.62384033203125, 2.878173828125, 3.13250732421875, 3.3868408203125, 3.64117431640625, 3.8955078125, 4.14984130859375, 4.4041748046875, 4.65850830078125, 4.912841796875, 5.16717529296875, 5.4215087890625, 5.67584228515625, 5.93017578125, 6.18450927734375, 6.4388427734375, 6.69317626953125, 6.947509765625, 7.20184326171875, 7.4561767578125, 7.71051025390625, 7.96484375]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 4.0, 8.0, 10.0, 18.0, 49.0, 97.0, 244.0, 339.0, 143.0, 46.0, 24.0, 7.0, 7.0, 9.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-47.907508850097656, -46.95437240600586, -46.00123977661133, -45.04810333251953, -44.094966888427734, -43.14183044433594, -42.188697814941406, -41.23556137084961, -40.28242492675781, -39.329288482666016, -38.376155853271484, -37.42301940917969, -36.46988296508789, -35.516746520996094, -34.56361389160156, -33.610477447509766, -32.657344818115234, -31.70421028137207, -30.751073837280273, -29.79793930053711, -28.844802856445312, -27.89166831970215, -26.938533782958984, -25.985397338867188, -25.03226089477539, -24.079126358032227, -23.12598991394043, -22.172855377197266, -21.21971893310547, -20.266584396362305, -19.31344985961914, -18.360313415527344, -17.40717887878418, -16.454044342041016, -15.500907897949219, -14.547773361206055, -13.594637870788574, -12.641502380371094, -11.688366889953613, -10.735231399536133, -9.782096862792969, -8.828961372375488, -7.875826358795166, -6.9226908683776855, -5.969555854797363, -5.016420364379883, -4.063284873962402, -3.11014986038208, -2.1570138931274414, -1.20387864112854, -0.2507432699203491, 0.7023921012878418, 1.6555273532867432, 2.6086626052856445, 3.561798095703125, 4.514933109283447, 5.468068599700928, 6.421204090118408, 7.3743391036987305, 8.327474594116211, 9.280610084533691, 10.233745574951172, 11.186880111694336, 12.140015602111816, 13.093151092529297]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 1.0, 0.0, 1.0, 3.0, 7.0, 3.0, 4.0, 2.0, 9.0, 10.0, 8.0, 9.0, 8.0, 14.0, 20.0, 14.0, 23.0, 17.0, 37.0, 31.0, 38.0, 39.0, 38.0, 38.0, 28.0, 45.0, 48.0, 40.0, 42.0, 48.0, 39.0, 30.0, 31.0, 30.0, 36.0, 30.0, 19.0, 22.0, 27.0, 13.0, 24.0, 10.0, 14.0, 14.0, 10.0, 10.0, 5.0, 3.0, 5.0, 2.0, 6.0, 5.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-9.6207275390625, -9.30551815032959, -8.990309715270996, -8.675100326538086, -8.359890937805176, -8.044682502746582, -7.729473114013672, -7.41426420211792, -7.099055290222168, -6.783846378326416, -6.468636989593506, -6.153428077697754, -5.838219165802002, -5.52301025390625, -5.20780086517334, -4.892591953277588, -4.577382564544678, -4.262173652648926, -3.9469645023345947, -3.6317553520202637, -3.3165464401245117, -3.0013372898101807, -2.6861281394958496, -2.3709192276000977, -2.0557100772857666, -1.740501046180725, -1.4252920150756836, -1.1100828647613525, -0.794873833656311, -0.47966480255126953, -0.16445565223693848, 0.15075325965881348, 0.46596240997314453, 0.781171441078186, 1.0963804721832275, 1.4115896224975586, 1.7267986536026, 2.0420076847076416, 2.3572168350219727, 2.6724257469177246, 2.9876348972320557, 3.3028440475463867, 3.6180529594421387, 3.9332621097564697, 4.248471260070801, 4.563680171966553, 4.878889083862305, 5.194098472595215, 5.509307384490967, 5.824516296386719, 6.139725685119629, 6.454934597015381, 6.770143508911133, 7.085352897644043, 7.400561809539795, 7.715770721435547, 8.030980110168457, 8.346189498901367, 8.661397933959961, 8.976607322692871, 9.291816711425781, 9.607025146484375, 9.922234535217285, 10.237443923950195, 10.552652359008789]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 5.0, 2.0, 5.0, 13.0, 27.0, 28.0, 49.0, 68.0, 151.0, 271.0, 541.0, 1061.0, 2240.0, 5626.0, 17872.0, 120686.0, 3978636.0, 45145.0, 12147.0, 4844.0, 2146.0, 1077.0, 556.0, 319.0, 212.0, 140.0, 110.0, 46.0, 64.0, 39.0, 38.0, 23.0, 16.0, 12.0, 8.0, 16.0, 8.0, 11.0, 11.0, 6.0, 3.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20654296875, -0.19584274291992188, -0.18514251708984375, -0.17444229125976562, -0.1637420654296875, -0.15304183959960938, -0.14234161376953125, -0.13164138793945312, -0.120941162109375, -0.11024093627929688, -0.09954071044921875, -0.08884048461914062, -0.0781402587890625, -0.06744003295898438, -0.05673980712890625, -0.046039581298828125, -0.03533935546875, -0.024639129638671875, -0.01393890380859375, -0.003238677978515625, 0.0074615478515625, 0.018161773681640625, 0.02886199951171875, 0.039562225341796875, 0.050262451171875, 0.060962677001953125, 0.07166290283203125, 0.08236312866210938, 0.0930633544921875, 0.10376358032226562, 0.11446380615234375, 0.12516403198242188, 0.1358642578125, 0.14656448364257812, 0.15726470947265625, 0.16796493530273438, 0.1786651611328125, 0.18936538696289062, 0.20006561279296875, 0.21076583862304688, 0.221466064453125, 0.23216629028320312, 0.24286651611328125, 0.2535667419433594, 0.2642669677734375, 0.2749671936035156, 0.28566741943359375, 0.2963676452636719, 0.30706787109375, 0.3177680969238281, 0.32846832275390625, 0.3391685485839844, 0.3498687744140625, 0.3605690002441406, 0.37126922607421875, 0.3819694519042969, 0.392669677734375, 0.4033699035644531, 0.41407012939453125, 0.4247703552246094, 0.4354705810546875, 0.4461708068847656, 0.45687103271484375, 0.4675712585449219, 0.478271484375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 1.0, 7.0, 9.0, 6.0, 7.0, 12.0, 18.0, 15.0, 24.0, 403.0, 408.0, 17.0, 9.0, 13.0, 8.0, 11.0, 6.0, 1.0, 7.0, 4.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.042510986328125, -0.04099559783935547, -0.03948020935058594, -0.037964820861816406, -0.036449432373046875, -0.034934043884277344, -0.03341865539550781, -0.03190326690673828, -0.03038787841796875, -0.02887248992919922, -0.027357101440429688, -0.025841712951660156, -0.024326324462890625, -0.022810935974121094, -0.021295547485351562, -0.01978015899658203, -0.0182647705078125, -0.01674938201904297, -0.015233993530273438, -0.013718605041503906, -0.012203216552734375, -0.010687828063964844, -0.009172439575195312, -0.007657051086425781, -0.00614166259765625, -0.004626274108886719, -0.0031108856201171875, -0.0015954971313476562, -8.0108642578125e-05, 0.0014352798461914062, 0.0029506683349609375, 0.004466056823730469, 0.0059814453125, 0.007496833801269531, 0.009012222290039062, 0.010527610778808594, 0.012042999267578125, 0.013558387756347656, 0.015073776245117188, 0.01658916473388672, 0.01810455322265625, 0.01961994171142578, 0.021135330200195312, 0.022650718688964844, 0.024166107177734375, 0.025681495666503906, 0.027196884155273438, 0.02871227264404297, 0.0302276611328125, 0.03174304962158203, 0.03325843811035156, 0.034773826599121094, 0.036289215087890625, 0.037804603576660156, 0.03931999206542969, 0.04083538055419922, 0.04235076904296875, 0.04386615753173828, 0.04538154602050781, 0.046896934509277344, 0.048412322998046875, 0.049927711486816406, 0.05144309997558594, 0.05295848846435547, 0.054473876953125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 7.0, 9.0, 10.0, 10.0, 25.0, 25.0, 31.0, 60.0, 100.0, 141.0, 226.0, 422.0, 995.0, 2859.0, 10734.0, 58370.0, 747966.0, 3235994.0, 111936.0, 17419.0, 4184.0, 1400.0, 611.0, 341.0, 177.0, 105.0, 58.0, 26.0, 19.0, 6.0, 7.0, 6.0, 2.0, 1.0, 0.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.33349609375, -0.3242759704589844, -0.31505584716796875, -0.3058357238769531, -0.2966156005859375, -0.2873954772949219, -0.27817535400390625, -0.2689552307128906, -0.259735107421875, -0.2505149841308594, -0.24129486083984375, -0.23207473754882812, -0.2228546142578125, -0.21363449096679688, -0.20441436767578125, -0.19519424438476562, -0.18597412109375, -0.17675399780273438, -0.16753387451171875, -0.15831375122070312, -0.1490936279296875, -0.13987350463867188, -0.13065338134765625, -0.12143325805664062, -0.112213134765625, -0.10299301147460938, -0.09377288818359375, -0.08455276489257812, -0.0753326416015625, -0.06611251831054688, -0.05689239501953125, -0.047672271728515625, -0.0384521484375, -0.029232025146484375, -0.02001190185546875, -0.010791778564453125, -0.0015716552734375, 0.007648468017578125, 0.01686859130859375, 0.026088714599609375, 0.035308837890625, 0.044528961181640625, 0.05374908447265625, 0.06296920776367188, 0.0721893310546875, 0.08140945434570312, 0.09062957763671875, 0.09984970092773438, 0.10906982421875, 0.11828994750976562, 0.12751007080078125, 0.13673019409179688, 0.1459503173828125, 0.15517044067382812, 0.16439056396484375, 0.17361068725585938, 0.182830810546875, 0.19205093383789062, 0.20127105712890625, 0.21049118041992188, 0.2197113037109375, 0.22893142700195312, 0.23815155029296875, 0.24737167358398438, 0.256591796875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 6.0, 7.0, 11.0, 11.0, 14.0, 19.0, 15.0, 28.0, 35.0, 53.0, 70.0, 91.0, 121.0, 135.0, 186.0, 332.0, 920.0, 935.0, 352.0, 227.0, 137.0, 109.0, 65.0, 50.0, 38.0, 25.0, 23.0, 19.0, 14.0, 8.0, 8.0, 4.0, 6.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.050018310546875, -0.04816579818725586, -0.04631328582763672, -0.04446077346801758, -0.04260826110839844, -0.0407557487487793, -0.038903236389160156, -0.037050724029541016, -0.035198211669921875, -0.033345699310302734, -0.031493186950683594, -0.029640674591064453, -0.027788162231445312, -0.025935649871826172, -0.02408313751220703, -0.02223062515258789, -0.02037811279296875, -0.01852560043334961, -0.01667308807373047, -0.014820575714111328, -0.012968063354492188, -0.011115550994873047, -0.009263038635253906, -0.007410526275634766, -0.005558013916015625, -0.0037055015563964844, -0.0018529891967773438, -4.76837158203125e-07, 0.0018520355224609375, 0.003704547882080078, 0.005557060241699219, 0.007409572601318359, 0.0092620849609375, 0.01111459732055664, 0.012967109680175781, 0.014819622039794922, 0.016672134399414062, 0.018524646759033203, 0.020377159118652344, 0.022229671478271484, 0.024082183837890625, 0.025934696197509766, 0.027787208557128906, 0.029639720916748047, 0.03149223327636719, 0.03334474563598633, 0.03519725799560547, 0.03704977035522461, 0.03890228271484375, 0.04075479507446289, 0.04260730743408203, 0.04445981979370117, 0.04631233215332031, 0.04816484451293945, 0.050017356872558594, 0.051869869232177734, 0.053722381591796875, 0.055574893951416016, 0.057427406311035156, 0.0592799186706543, 0.06113243103027344, 0.06298494338989258, 0.06483745574951172, 0.06668996810913086, 0.06854248046875]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 6.0, 10.0, 30.0, 63.0, 273.0, 464.0, 133.0, 22.0, 8.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.265873432159424, -2.2208850383758545, -2.175896644592285, -2.130908250808716, -2.0859198570251465, -2.040931463241577, -1.9959430694580078, -1.9509546756744385, -1.9059662818908691, -1.8609778881072998, -1.8159894943237305, -1.7710011005401611, -1.7260127067565918, -1.6810243129730225, -1.6360359191894531, -1.5910475254058838, -1.5460591316223145, -1.5010707378387451, -1.4560823440551758, -1.4110939502716064, -1.366105556488037, -1.3211171627044678, -1.2761287689208984, -1.231140375137329, -1.1861518621444702, -1.1411634683609009, -1.0961750745773315, -1.0511866807937622, -1.0061982870101929, -0.9612098932266235, -0.9162214994430542, -0.8712331056594849, -0.8262446522712708, -0.7812562584877014, -0.7362678647041321, -0.6912794709205627, -0.6462910771369934, -0.6013026833534241, -0.55631422996521, -0.5113258361816406, -0.4663374722003937, -0.42134907841682434, -0.376360684633255, -0.3313722610473633, -0.28638386726379395, -0.2413954883813858, -0.19640707969665527, -0.15141868591308594, -0.1064302921295166, -0.06144189462065697, -0.016453497111797333, 0.0285349041223526, 0.07352329790592194, 0.11851169168949127, 0.1635001003742218, 0.20848849415779114, 0.2534768879413605, 0.2984652817249298, 0.34345367550849915, 0.38844209909439087, 0.4334304928779602, 0.47841888666152954, 0.5234072804450989, 0.5683956742286682, 0.6133840680122375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 5.0, 4.0, 4.0, 6.0, 7.0, 5.0, 10.0, 4.0, 17.0, 15.0, 16.0, 18.0, 23.0, 30.0, 32.0, 34.0, 44.0, 49.0, 58.0, 33.0, 50.0, 55.0, 47.0, 51.0, 50.0, 42.0, 41.0, 42.0, 35.0, 27.0, 26.0, 29.0, 25.0, 16.0, 14.0, 13.0, 10.0, 10.0, 4.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.2884256839752197, -0.27962765097618103, -0.27082961797714233, -0.26203158497810364, -0.25323355197906494, -0.24443553388118744, -0.23563751578330994, -0.22683948278427124, -0.21804144978523254, -0.20924341678619385, -0.20044538378715515, -0.19164736568927765, -0.18284933269023895, -0.17405129969120026, -0.16525328159332275, -0.15645524859428406, -0.14765721559524536, -0.13885918259620667, -0.13006114959716797, -0.12126313149929047, -0.11246509850025177, -0.10366706550121307, -0.09486903995275497, -0.08607101440429688, -0.07727298140525818, -0.06847494840621948, -0.05967692285776138, -0.050878893584012985, -0.04208086431026459, -0.03328283503651619, -0.024484805762767792, -0.015686776489019394, -0.006888747215270996, 0.0019092820584774017, 0.0107073113322258, 0.019505340605974197, 0.028303369879722595, 0.03710139915347099, 0.04589942842721939, 0.05469745770096779, 0.06349548697471619, 0.07229351997375488, 0.08109154552221298, 0.08988957107067108, 0.09868760406970978, 0.10748563706874847, 0.11628366261720657, 0.12508168816566467, 0.13387972116470337, 0.14267775416374207, 0.15147578716278076, 0.16027380526065826, 0.16907183825969696, 0.17786987125873566, 0.18666788935661316, 0.19546592235565186, 0.20426395535469055, 0.21306198835372925, 0.22186002135276794, 0.23065803945064545, 0.23945607244968414, 0.24825410544872284, 0.25705212354660034, 0.26585015654563904, 0.27464818954467773]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 4.0, 2.0, 3.0, 6.0, 9.0, 16.0, 20.0, 24.0, 28.0, 41.0, 61.0, 94.0, 143.0, 182.0, 276.0, 430.0, 718.0, 1127.0, 1895.0, 3165.0, 5632.0, 10712.0, 24717.0, 239072.0, 698539.0, 32921.0, 12953.0, 6490.0, 3557.0, 2062.0, 1282.0, 809.0, 495.0, 333.0, 238.0, 148.0, 102.0, 73.0, 49.0, 47.0, 33.0, 13.0, 15.0, 6.0, 9.0, 3.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.235595703125, -0.22869110107421875, -0.2217864990234375, -0.21488189697265625, -0.207977294921875, -0.20107269287109375, -0.1941680908203125, -0.18726348876953125, -0.18035888671875, -0.17345428466796875, -0.1665496826171875, -0.15964508056640625, -0.152740478515625, -0.14583587646484375, -0.1389312744140625, -0.13202667236328125, -0.1251220703125, -0.11821746826171875, -0.1113128662109375, -0.10440826416015625, -0.097503662109375, -0.09059906005859375, -0.0836944580078125, -0.07678985595703125, -0.06988525390625, -0.06298065185546875, -0.0560760498046875, -0.04917144775390625, -0.042266845703125, -0.03536224365234375, -0.0284576416015625, -0.02155303955078125, -0.0146484375, -0.00774383544921875, -0.0008392333984375, 0.00606536865234375, 0.012969970703125, 0.01987457275390625, 0.0267791748046875, 0.03368377685546875, 0.04058837890625, 0.04749298095703125, 0.0543975830078125, 0.06130218505859375, 0.068206787109375, 0.07511138916015625, 0.0820159912109375, 0.08892059326171875, 0.0958251953125, 0.10272979736328125, 0.1096343994140625, 0.11653900146484375, 0.123443603515625, 0.13034820556640625, 0.1372528076171875, 0.14415740966796875, 0.15106201171875, 0.15796661376953125, 0.1648712158203125, 0.17177581787109375, 0.178680419921875, 0.18558502197265625, 0.1924896240234375, 0.19939422607421875, 0.206298828125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 7.0, 7.0, 13.0, 4.0, 7.0, 21.0, 36.0, 94.0, 259.0, 301.0, 126.0, 43.0, 12.0, 11.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0372314453125, -0.036036014556884766, -0.03484058380126953, -0.0336451530456543, -0.03244972229003906, -0.03125429153442383, -0.030058860778808594, -0.02886343002319336, -0.027667999267578125, -0.02647256851196289, -0.025277137756347656, -0.024081707000732422, -0.022886276245117188, -0.021690845489501953, -0.02049541473388672, -0.019299983978271484, -0.01810455322265625, -0.016909122467041016, -0.01571369171142578, -0.014518260955810547, -0.013322830200195312, -0.012127399444580078, -0.010931968688964844, -0.00973653793334961, -0.008541107177734375, -0.007345676422119141, -0.006150245666503906, -0.004954814910888672, -0.0037593841552734375, -0.002563953399658203, -0.0013685226440429688, -0.00017309188842773438, 0.0010223388671875, 0.0022177696228027344, 0.0034132003784179688, 0.004608631134033203, 0.0058040618896484375, 0.006999492645263672, 0.008194923400878906, 0.00939035415649414, 0.010585784912109375, 0.01178121566772461, 0.012976646423339844, 0.014172077178955078, 0.015367507934570312, 0.016562938690185547, 0.01775836944580078, 0.018953800201416016, 0.02014923095703125, 0.021344661712646484, 0.02254009246826172, 0.023735523223876953, 0.024930953979492188, 0.026126384735107422, 0.027321815490722656, 0.02851724624633789, 0.029712677001953125, 0.03090810775756836, 0.032103538513183594, 0.03329896926879883, 0.03449440002441406, 0.0356898307800293, 0.03688526153564453, 0.038080692291259766, 0.039276123046875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 6.0, 8.0, 18.0, 15.0, 26.0, 35.0, 65.0, 117.0, 187.0, 409.0, 1077.0, 3971.0, 37585.0, 923216.0, 73859.0, 5615.0, 1309.0, 500.0, 225.0, 120.0, 72.0, 40.0, 19.0, 14.0, 14.0, 9.0, 5.0, 9.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58642578125, -0.56591796875, -0.54541015625, -0.52490234375, -0.50439453125, -0.48388671875, -0.46337890625, -0.44287109375, -0.42236328125, -0.40185546875, -0.38134765625, -0.36083984375, -0.34033203125, -0.31982421875, -0.29931640625, -0.27880859375, -0.25830078125, -0.23779296875, -0.21728515625, -0.19677734375, -0.17626953125, -0.15576171875, -0.13525390625, -0.11474609375, -0.09423828125, -0.07373046875, -0.05322265625, -0.03271484375, -0.01220703125, 0.00830078125, 0.02880859375, 0.04931640625, 0.06982421875, 0.09033203125, 0.11083984375, 0.13134765625, 0.15185546875, 0.17236328125, 0.19287109375, 0.21337890625, 0.23388671875, 0.25439453125, 0.27490234375, 0.29541015625, 0.31591796875, 0.33642578125, 0.35693359375, 0.37744140625, 0.39794921875, 0.41845703125, 0.43896484375, 0.45947265625, 0.47998046875, 0.50048828125, 0.52099609375, 0.54150390625, 0.56201171875, 0.58251953125, 0.60302734375, 0.62353515625, 0.64404296875, 0.66455078125, 0.68505859375, 0.70556640625, 0.72607421875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 5.0, 6.0, 11.0, 15.0, 12.0, 12.0, 24.0, 27.0, 19.0, 29.0, 32.0, 30.0, 39.0, 34.0, 41.0, 44.0, 52.0, 44.0, 34.0, 52.0, 45.0, 39.0, 35.0, 27.0, 35.0, 34.0, 26.0, 29.0, 26.0, 21.0, 19.0, 17.0, 16.0, 12.0, 8.0, 8.0, 8.0, 3.0, 7.0, 1.0, 8.0, 3.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1605224609375, -0.15567398071289062, -0.15082550048828125, -0.14597702026367188, -0.1411285400390625, -0.13628005981445312, -0.13143157958984375, -0.12658309936523438, -0.121734619140625, -0.11688613891601562, -0.11203765869140625, -0.10718917846679688, -0.1023406982421875, -0.09749221801757812, -0.09264373779296875, -0.08779525756835938, -0.08294677734375, -0.07809829711914062, -0.07324981689453125, -0.06840133666992188, -0.0635528564453125, -0.058704376220703125, -0.05385589599609375, -0.049007415771484375, -0.044158935546875, -0.039310455322265625, -0.03446197509765625, -0.029613494873046875, -0.0247650146484375, -0.019916534423828125, -0.01506805419921875, -0.010219573974609375, -0.00537109375, -0.000522613525390625, 0.00432586669921875, 0.009174346923828125, 0.0140228271484375, 0.018871307373046875, 0.02371978759765625, 0.028568267822265625, 0.033416748046875, 0.038265228271484375, 0.04311370849609375, 0.047962188720703125, 0.0528106689453125, 0.057659149169921875, 0.06250762939453125, 0.06735610961914062, 0.07220458984375, 0.07705307006835938, 0.08190155029296875, 0.08675003051757812, 0.0915985107421875, 0.09644699096679688, 0.10129547119140625, 0.10614395141601562, 0.110992431640625, 0.11584091186523438, 0.12068939208984375, 0.12553787231445312, 0.1303863525390625, 0.13523483276367188, 0.14008331298828125, 0.14493179321289062, 0.1497802734375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 4.0, 2.0, 5.0, 3.0, 4.0, 9.0, 9.0, 19.0, 21.0, 33.0, 39.0, 68.0, 118.0, 150.0, 275.0, 580.0, 1204.0, 3240.0, 13415.0, 339894.0, 666043.0, 16954.0, 3655.0, 1346.0, 610.0, 329.0, 197.0, 123.0, 67.0, 31.0, 30.0, 18.0, 13.0, 15.0, 10.0, 7.0, 4.0, 4.0, 5.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.404296875, -0.3912811279296875, -0.378265380859375, -0.3652496337890625, -0.35223388671875, -0.3392181396484375, -0.326202392578125, -0.3131866455078125, -0.3001708984375, -0.2871551513671875, -0.274139404296875, -0.2611236572265625, -0.24810791015625, -0.2350921630859375, -0.222076416015625, -0.2090606689453125, -0.196044921875, -0.1830291748046875, -0.170013427734375, -0.1569976806640625, -0.14398193359375, -0.1309661865234375, -0.117950439453125, -0.1049346923828125, -0.0919189453125, -0.0789031982421875, -0.065887451171875, -0.0528717041015625, -0.03985595703125, -0.0268402099609375, -0.013824462890625, -0.0008087158203125, 0.01220703125, 0.0252227783203125, 0.038238525390625, 0.0512542724609375, 0.06427001953125, 0.0772857666015625, 0.090301513671875, 0.1033172607421875, 0.1163330078125, 0.1293487548828125, 0.142364501953125, 0.1553802490234375, 0.16839599609375, 0.1814117431640625, 0.194427490234375, 0.2074432373046875, 0.220458984375, 0.2334747314453125, 0.246490478515625, 0.2595062255859375, 0.27252197265625, 0.2855377197265625, 0.298553466796875, 0.3115692138671875, 0.3245849609375, 0.3376007080078125, 0.350616455078125, 0.3636322021484375, 0.37664794921875, 0.3896636962890625, 0.402679443359375, 0.4156951904296875, 0.4287109375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 7.0, 5.0, 4.0, 8.0, 7.0, 15.0, 27.0, 38.0, 39.0, 67.0, 84.0, 141.0, 137.0, 113.0, 79.0, 58.0, 49.0, 21.0, 18.0, 22.0, 11.0, 10.0, 6.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0005664825439453125, -0.000547848641872406, -0.0005292147397994995, -0.000510580837726593, -0.0004919469356536865, -0.00047331303358078003, -0.00045467913150787354, -0.00043604522943496704, -0.00041741132736206055, -0.00039877742528915405, -0.00038014352321624756, -0.00036150962114334106, -0.00034287571907043457, -0.0003242418169975281, -0.0003056079149246216, -0.0002869740128517151, -0.0002683401107788086, -0.0002497062087059021, -0.0002310723066329956, -0.0002124384045600891, -0.00019380450248718262, -0.00017517060041427612, -0.00015653669834136963, -0.00013790279626846313, -0.00011926889419555664, -0.00010063499212265015, -8.200109004974365e-05, -6.336718797683716e-05, -4.4733285903930664e-05, -2.609938383102417e-05, -7.465481758117676e-06, 1.1168420314788818e-05, 2.9802322387695312e-05, 4.843622446060181e-05, 6.70701265335083e-05, 8.57040286064148e-05, 0.00010433793067932129, 0.00012297183275222778, 0.00014160573482513428, 0.00016023963689804077, 0.00017887353897094727, 0.00019750744104385376, 0.00021614134311676025, 0.00023477524518966675, 0.00025340914726257324, 0.00027204304933547974, 0.00029067695140838623, 0.0003093108534812927, 0.0003279447555541992, 0.0003465786576271057, 0.0003652125597000122, 0.0003838464617729187, 0.0004024803638458252, 0.0004211142659187317, 0.0004397481679916382, 0.0004583820700645447, 0.00047701597213745117, 0.0004956498742103577, 0.0005142837762832642, 0.0005329176783561707, 0.0005515515804290771, 0.0005701854825019836, 0.0005888193845748901, 0.0006074532866477966, 0.0006260871887207031]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 2.0, 3.0, 4.0, 3.0, 3.0, 10.0, 14.0, 19.0, 38.0, 56.0, 89.0, 168.0, 364.0, 1101.0, 3993.0, 26082.0, 671323.0, 323454.0, 17249.0, 3019.0, 849.0, 360.0, 158.0, 84.0, 46.0, 33.0, 22.0, 9.0, 7.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2352294921875, -0.22278785705566406, -0.21034622192382812, -0.1979045867919922, -0.18546295166015625, -0.1730213165283203, -0.16057968139648438, -0.14813804626464844, -0.1356964111328125, -0.12325477600097656, -0.11081314086914062, -0.09837150573730469, -0.08592987060546875, -0.07348823547363281, -0.061046600341796875, -0.04860496520996094, -0.036163330078125, -0.023721694946289062, -0.011280059814453125, 0.0011615753173828125, 0.01360321044921875, 0.026044845581054688, 0.038486480712890625, 0.05092811584472656, 0.0633697509765625, 0.07581138610839844, 0.08825302124023438, 0.10069465637207031, 0.11313629150390625, 0.1255779266357422, 0.13801956176757812, 0.15046119689941406, 0.16290283203125, 0.17534446716308594, 0.18778610229492188, 0.2002277374267578, 0.21266937255859375, 0.2251110076904297, 0.23755264282226562, 0.24999427795410156, 0.2624359130859375, 0.27487754821777344, 0.2873191833496094, 0.2997608184814453, 0.31220245361328125, 0.3246440887451172, 0.3370857238769531, 0.34952735900878906, 0.361968994140625, 0.37441062927246094, 0.3868522644042969, 0.3992938995361328, 0.41173553466796875, 0.4241771697998047, 0.4366188049316406, 0.44906044006347656, 0.4615020751953125, 0.47394371032714844, 0.4863853454589844, 0.4988269805908203, 0.5112686157226562, 0.5237102508544922, 0.5361518859863281, 0.5485935211181641, 0.56103515625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 5.0, 7.0, 9.0, 11.0, 20.0, 26.0, 39.0, 54.0, 97.0, 156.0, 186.0, 131.0, 91.0, 51.0, 45.0, 21.0, 16.0, 15.0, 4.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1812744140625, -0.1738414764404297, -0.16640853881835938, -0.15897560119628906, -0.15154266357421875, -0.14410972595214844, -0.13667678833007812, -0.1292438507080078, -0.1218109130859375, -0.11437797546386719, -0.10694503784179688, -0.09951210021972656, -0.09207916259765625, -0.08464622497558594, -0.07721328735351562, -0.06978034973144531, -0.062347412109375, -0.05491447448730469, -0.047481536865234375, -0.04004859924316406, -0.03261566162109375, -0.025182723999023438, -0.017749786376953125, -0.010316848754882812, -0.0028839111328125, 0.0045490264892578125, 0.011981964111328125, 0.019414901733398438, 0.02684783935546875, 0.03428077697753906, 0.041713714599609375, 0.04914665222167969, 0.05657958984375, 0.06401252746582031, 0.07144546508789062, 0.07887840270996094, 0.08631134033203125, 0.09374427795410156, 0.10117721557617188, 0.10861015319824219, 0.1160430908203125, 0.12347602844238281, 0.13090896606445312, 0.13834190368652344, 0.14577484130859375, 0.15320777893066406, 0.16064071655273438, 0.1680736541748047, 0.175506591796875, 0.1829395294189453, 0.19037246704101562, 0.19780540466308594, 0.20523834228515625, 0.21267127990722656, 0.22010421752929688, 0.2275371551513672, 0.2349700927734375, 0.2424030303955078, 0.24983596801757812, 0.25726890563964844, 0.26470184326171875, 0.27213478088378906, 0.2795677185058594, 0.2870006561279297, 0.29443359375]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 9.0, 5.0, 10.0, 29.0, 73.0, 301.0, 395.0, 105.0, 32.0, 16.0, 8.0, 6.0, 3.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.940474033355713, -5.80599308013916, -5.671512126922607, -5.5370306968688965, -5.402549743652344, -5.268068790435791, -5.133587837219238, -4.9991068840026855, -4.864625930786133, -4.73014497756958, -4.595664024353027, -4.461182594299316, -4.326701641082764, -4.192220687866211, -4.057739734649658, -3.9232587814331055, -3.7887773513793945, -3.654296398162842, -3.51981520652771, -3.3853342533111572, -3.2508530616760254, -3.1163721084594727, -2.98189115524292, -2.847410202026367, -2.7129290103912354, -2.5784480571746826, -2.443966865539551, -2.309485912322998, -2.1750049591064453, -2.0405237674713135, -1.9060428142547607, -1.7715617418289185, -1.6370806694030762, -1.5025995969772339, -1.3681185245513916, -1.2336375713348389, -1.0991564989089966, -0.9646754264831543, -0.8301944136619568, -0.6957134008407593, -0.561232328414917, -0.4267512857913971, -0.2922702431678772, -0.1577892005443573, -0.023308157920837402, 0.11117291450500488, 0.2456539273262024, 0.3801349401473999, 0.5146160125732422, 0.6490970849990845, 0.783578097820282, 0.9180591106414795, 1.0525401830673218, 1.187021255493164, 1.3215022087097168, 1.455983281135559, 1.5904643535614014, 1.7249454259872437, 1.859426498413086, 1.9939074516296387, 2.1283884048461914, 2.2628695964813232, 2.397350549697876, 2.531831741333008, 2.6663126945495605]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 6.0, 9.0, 9.0, 10.0, 17.0, 23.0, 35.0, 58.0, 103.0, 132.0, 171.0, 144.0, 94.0, 53.0, 43.0, 17.0, 12.0, 9.0, 5.0, 9.0, 4.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8885433673858643, -2.7938334941864014, -2.6991233825683594, -2.6044135093688965, -2.5097036361694336, -2.4149937629699707, -2.320283889770508, -2.225573778152466, -2.130863904953003, -2.03615403175354, -1.9414440393447876, -1.8467340469360352, -1.7520241737365723, -1.6573143005371094, -1.562604308128357, -1.4678943157196045, -1.3731844425201416, -1.2784745693206787, -1.1837645769119263, -1.0890545845031738, -0.9943447113037109, -0.8996347784996033, -0.8049248456954956, -0.7102149128913879, -0.6155049800872803, -0.5207950472831726, -0.42608511447906494, -0.3313751816749573, -0.2366652488708496, -0.14195531606674194, -0.04724538326263428, 0.04746454954147339, 0.14217472076416016, 0.23688465356826782, 0.3315945863723755, 0.42630451917648315, 0.5210144519805908, 0.6157243847846985, 0.7104343175888062, 0.8051442503929138, 0.8998541831970215, 0.9945641160011292, 1.0892740488052368, 1.1839840412139893, 1.2786939144134521, 1.373403787612915, 1.4681137800216675, 1.56282377243042, 1.6575336456298828, 1.7522435188293457, 1.8469535112380981, 1.9416635036468506, 2.0363733768463135, 2.1310832500457764, 2.2257933616638184, 2.3205032348632812, 2.415213108062744, 2.509922981262207, 2.60463285446167, 2.699342966079712, 2.794052839279175, 2.8887627124786377, 2.9834728240966797, 3.0781826972961426, 3.1728925704956055]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 2.0, 1.0, 4.0, 3.0, 3.0, 9.0, 7.0, 13.0, 12.0, 15.0, 20.0, 31.0, 23.0, 80.0, 393.0, 7879.0, 4106897.0, 76392.0, 2050.0, 260.0, 71.0, 34.0, 16.0, 22.0, 10.0, 9.0, 8.0, 2.0, 2.0, 8.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.93359375, -1.86639404296875, -1.7991943359375, -1.73199462890625, -1.664794921875, -1.59759521484375, -1.5303955078125, -1.46319580078125, -1.39599609375, -1.32879638671875, -1.2615966796875, -1.19439697265625, -1.127197265625, -1.05999755859375, -0.9927978515625, -0.92559814453125, -0.8583984375, -0.79119873046875, -0.7239990234375, -0.65679931640625, -0.589599609375, -0.52239990234375, -0.4552001953125, -0.38800048828125, -0.32080078125, -0.25360107421875, -0.1864013671875, -0.11920166015625, -0.052001953125, 0.01519775390625, 0.0823974609375, 0.14959716796875, 0.216796875, 0.28399658203125, 0.3511962890625, 0.41839599609375, 0.485595703125, 0.55279541015625, 0.6199951171875, 0.68719482421875, 0.75439453125, 0.82159423828125, 0.8887939453125, 0.95599365234375, 1.023193359375, 1.09039306640625, 1.1575927734375, 1.22479248046875, 1.2919921875, 1.35919189453125, 1.4263916015625, 1.49359130859375, 1.560791015625, 1.62799072265625, 1.6951904296875, 1.76239013671875, 1.82958984375, 1.89678955078125, 1.9639892578125, 2.03118896484375, 2.098388671875, 2.16558837890625, 2.2327880859375, 2.29998779296875, 2.3671875]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 2.0, 3.0, 4.0, 2.0, 6.0, 4.0, 10.0, 9.0, 5.0, 11.0, 13.0, 25.0, 35.0, 59.0, 122.0, 129.0, 149.0, 126.0, 102.0, 59.0, 31.0, 21.0, 8.0, 10.0, 7.0, 6.0, 13.0, 2.0, 4.0, 6.0, 6.0, 6.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0214691162109375, -0.020688533782958984, -0.01990795135498047, -0.019127368927001953, -0.018346786499023438, -0.017566204071044922, -0.016785621643066406, -0.01600503921508789, -0.015224456787109375, -0.01444387435913086, -0.013663291931152344, -0.012882709503173828, -0.012102127075195312, -0.011321544647216797, -0.010540962219238281, -0.009760379791259766, -0.00897979736328125, -0.008199214935302734, -0.007418632507324219, -0.006638050079345703, -0.0058574676513671875, -0.005076885223388672, -0.004296302795410156, -0.0035157203674316406, -0.002735137939453125, -0.0019545555114746094, -0.0011739730834960938, -0.0003933906555175781, 0.0003871917724609375, 0.0011677742004394531, 0.0019483566284179688, 0.0027289390563964844, 0.003509521484375, 0.004290103912353516, 0.005070686340332031, 0.005851268768310547, 0.0066318511962890625, 0.007412433624267578, 0.008193016052246094, 0.00897359848022461, 0.009754180908203125, 0.01053476333618164, 0.011315345764160156, 0.012095928192138672, 0.012876510620117188, 0.013657093048095703, 0.014437675476074219, 0.015218257904052734, 0.01599884033203125, 0.016779422760009766, 0.01756000518798828, 0.018340587615966797, 0.019121170043945312, 0.019901752471923828, 0.020682334899902344, 0.02146291732788086, 0.022243499755859375, 0.02302408218383789, 0.023804664611816406, 0.024585247039794922, 0.025365829467773438, 0.026146411895751953, 0.02692699432373047, 0.027707576751708984, 0.0284881591796875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 12.0, 16.0, 37.0, 50.0, 82.0, 209.0, 428.0, 1089.0, 3107.0, 12992.0, 115218.0, 3926190.0, 117190.0, 12690.0, 3022.0, 990.0, 440.0, 216.0, 139.0, 68.0, 39.0, 22.0, 18.0, 9.0, 7.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29833984375, -0.2812957763671875, -0.264251708984375, -0.2472076416015625, -0.23016357421875, -0.2131195068359375, -0.196075439453125, -0.1790313720703125, -0.1619873046875, -0.1449432373046875, -0.127899169921875, -0.1108551025390625, -0.09381103515625, -0.0767669677734375, -0.059722900390625, -0.0426788330078125, -0.025634765625, -0.0085906982421875, 0.008453369140625, 0.0254974365234375, 0.04254150390625, 0.0595855712890625, 0.076629638671875, 0.0936737060546875, 0.1107177734375, 0.1277618408203125, 0.144805908203125, 0.1618499755859375, 0.17889404296875, 0.1959381103515625, 0.212982177734375, 0.2300262451171875, 0.2470703125, 0.2641143798828125, 0.281158447265625, 0.2982025146484375, 0.31524658203125, 0.3322906494140625, 0.349334716796875, 0.3663787841796875, 0.3834228515625, 0.4004669189453125, 0.417510986328125, 0.4345550537109375, 0.45159912109375, 0.4686431884765625, 0.485687255859375, 0.5027313232421875, 0.519775390625, 0.5368194580078125, 0.553863525390625, 0.5709075927734375, 0.58795166015625, 0.6049957275390625, 0.622039794921875, 0.6390838623046875, 0.6561279296875, 0.6731719970703125, 0.690216064453125, 0.7072601318359375, 0.72430419921875, 0.7413482666015625, 0.758392333984375, 0.7754364013671875, 0.79248046875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 6.0, 4.0, 8.0, 5.0, 8.0, 11.0, 18.0, 21.0, 36.0, 56.0, 76.0, 135.0, 297.0, 1446.0, 1337.0, 308.0, 101.0, 79.0, 32.0, 26.0, 16.0, 13.0, 10.0, 5.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.052032470703125, -0.050295352935791016, -0.04855823516845703, -0.04682111740112305, -0.04508399963378906, -0.04334688186645508, -0.041609764099121094, -0.03987264633178711, -0.038135528564453125, -0.03639841079711914, -0.034661293029785156, -0.03292417526245117, -0.031187057495117188, -0.029449939727783203, -0.02771282196044922, -0.025975704193115234, -0.02423858642578125, -0.022501468658447266, -0.02076435089111328, -0.019027233123779297, -0.017290115356445312, -0.015552997589111328, -0.013815879821777344, -0.01207876205444336, -0.010341644287109375, -0.00860452651977539, -0.006867408752441406, -0.005130290985107422, -0.0033931732177734375, -0.0016560554504394531, 8.106231689453125e-05, 0.0018181800842285156, 0.0035552978515625, 0.005292415618896484, 0.007029533386230469, 0.008766651153564453, 0.010503768920898438, 0.012240886688232422, 0.013978004455566406, 0.01571512222290039, 0.017452239990234375, 0.01918935775756836, 0.020926475524902344, 0.022663593292236328, 0.024400711059570312, 0.026137828826904297, 0.02787494659423828, 0.029612064361572266, 0.03134918212890625, 0.033086299896240234, 0.03482341766357422, 0.0365605354309082, 0.03829765319824219, 0.04003477096557617, 0.041771888732910156, 0.04350900650024414, 0.045246124267578125, 0.04698324203491211, 0.048720359802246094, 0.05045747756958008, 0.05219459533691406, 0.05393171310424805, 0.05566883087158203, 0.057405948638916016, 0.05914306640625]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [15.0, 279.0, 681.0, 33.0, 8.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12057021260261536, -0.06381553411483765, -0.0070608556270599365, 0.04969382286071777, 0.10644850134849548, 0.1632031798362732, 0.2199578583240509, 0.2767125368118286, 0.3334672152996063, 0.39022189378738403, 0.44697657227516174, 0.5037312507629395, 0.5604859590530396, 0.6172406077384949, 0.6739952564239502, 0.7307499647140503, 0.7875046730041504, 0.8442593812942505, 0.9010140299797058, 0.9577686786651611, 1.0145233869552612, 1.0712780952453613, 1.1280326843261719, 1.184787392616272, 1.241542100906372, 1.2982968091964722, 1.3550515174865723, 1.4118061065673828, 1.468560814857483, 1.525315523147583, 1.5820701122283936, 1.6388248205184937, 1.6955795288085938, 1.7523342370986938, 1.809088945388794, 1.8658435344696045, 1.9225982427597046, 1.9793529510498047, 2.0361075401306152, 2.092862367630005, 2.1496169567108154, 2.206371545791626, 2.2631263732910156, 2.319880962371826, 2.3766355514526367, 2.4333903789520264, 2.490144968032837, 2.5468997955322266, 2.603654384613037, 2.6604089736938477, 2.7171638011932373, 2.773918390274048, 2.8306732177734375, 2.887427806854248, 2.9441823959350586, 3.0009372234344482, 3.057691812515259, 3.1144464015960693, 3.171201229095459, 3.2279558181762695, 3.28471040725708, 3.3414652347564697, 3.3982198238372803, 3.45497465133667, 3.5117292404174805]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 5.0, 5.0, 4.0, 4.0, 1.0, 1.0, 3.0, 7.0, 8.0, 11.0, 13.0, 27.0, 22.0, 47.0, 58.0, 85.0, 92.0, 106.0, 82.0, 95.0, 93.0, 72.0, 44.0, 20.0, 20.0, 20.0, 14.0, 10.0, 5.0, 6.0, 3.0, 4.0, 3.0, 4.0, 3.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-0.28923821449279785, -0.2812667489051819, -0.27329525351524353, -0.26532378792762756, -0.2573522925376892, -0.24938082695007324, -0.24140936136245728, -0.23343788087368011, -0.22546640038490295, -0.2174949198961258, -0.20952343940734863, -0.20155197381973267, -0.1935804933309555, -0.18560901284217834, -0.17763754725456238, -0.16966606676578522, -0.16169458627700806, -0.1537231057882309, -0.14575162529945374, -0.13778015971183777, -0.1298086792230606, -0.12183719873428345, -0.11386572569608688, -0.10589425265789032, -0.09792277216911316, -0.089951291680336, -0.08197981864213943, -0.07400834560394287, -0.06603686511516571, -0.05806538835167885, -0.050093911588191986, -0.042122434824705124, -0.03415095806121826, -0.0261794812977314, -0.018208004534244537, -0.010236527770757675, -0.002265051007270813, 0.005706425756216049, 0.013677902519702911, 0.021649379283189774, 0.029620856046676636, 0.0375923328101635, 0.04556380957365036, 0.05353528633713722, 0.061506763100624084, 0.06947824358940125, 0.07744971662759781, 0.08542118966579437, 0.09339267015457153, 0.1013641506433487, 0.10933562368154526, 0.11730709671974182, 0.12527857720851898, 0.13325005769729614, 0.1412215232849121, 0.14919300377368927, 0.15716448426246643, 0.1651359647512436, 0.17310744524002075, 0.18107891082763672, 0.18905039131641388, 0.19702187180519104, 0.204993337392807, 0.21296481788158417, 0.22093629837036133]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 6.0, 7.0, 17.0, 18.0, 47.0, 79.0, 191.0, 376.0, 999.0, 2362.0, 6714.0, 21573.0, 125753.0, 753310.0, 107041.0, 19969.0, 6178.0, 2276.0, 901.0, 355.0, 176.0, 99.0, 35.0, 30.0, 14.0, 8.0, 5.0, 6.0, 2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.225341796875, -0.216827392578125, -0.20831298828125, -0.199798583984375, -0.1912841796875, -0.182769775390625, -0.17425537109375, -0.165740966796875, -0.1572265625, -0.148712158203125, -0.14019775390625, -0.131683349609375, -0.1231689453125, -0.114654541015625, -0.10614013671875, -0.097625732421875, -0.089111328125, -0.080596923828125, -0.07208251953125, -0.063568115234375, -0.0550537109375, -0.046539306640625, -0.03802490234375, -0.029510498046875, -0.02099609375, -0.012481689453125, -0.00396728515625, 0.004547119140625, 0.0130615234375, 0.021575927734375, 0.03009033203125, 0.038604736328125, 0.047119140625, 0.055633544921875, 0.06414794921875, 0.072662353515625, 0.0811767578125, 0.089691162109375, 0.09820556640625, 0.106719970703125, 0.115234375, 0.123748779296875, 0.13226318359375, 0.140777587890625, 0.1492919921875, 0.157806396484375, 0.16632080078125, 0.174835205078125, 0.183349609375, 0.191864013671875, 0.20037841796875, 0.208892822265625, 0.2174072265625, 0.225921630859375, 0.23443603515625, 0.242950439453125, 0.25146484375, 0.259979248046875, 0.26849365234375, 0.277008056640625, 0.2855224609375, 0.294036865234375, 0.30255126953125, 0.311065673828125, 0.319580078125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 3.0, 3.0, 3.0, 2.0, 11.0, 6.0, 5.0, 5.0, 6.0, 13.0, 8.0, 17.0, 26.0, 47.0, 57.0, 74.0, 90.0, 94.0, 106.0, 106.0, 64.0, 71.0, 44.0, 39.0, 25.0, 14.0, 15.0, 7.0, 6.0, 5.0, 7.0, 7.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.016998291015625, -0.016397953033447266, -0.01579761505126953, -0.015197277069091797, -0.014596939086914062, -0.013996601104736328, -0.013396263122558594, -0.01279592514038086, -0.012195587158203125, -0.01159524917602539, -0.010994911193847656, -0.010394573211669922, -0.009794235229492188, -0.009193897247314453, -0.008593559265136719, -0.007993221282958984, -0.00739288330078125, -0.006792545318603516, -0.006192207336425781, -0.005591869354248047, -0.0049915313720703125, -0.004391193389892578, -0.0037908554077148438, -0.0031905174255371094, -0.002590179443359375, -0.0019898414611816406, -0.0013895034790039062, -0.0007891654968261719, -0.0001888275146484375, 0.0004115104675292969, 0.0010118484497070312, 0.0016121864318847656, 0.0022125244140625, 0.0028128623962402344, 0.0034132003784179688, 0.004013538360595703, 0.0046138763427734375, 0.005214214324951172, 0.005814552307128906, 0.006414890289306641, 0.007015228271484375, 0.007615566253662109, 0.008215904235839844, 0.008816242218017578, 0.009416580200195312, 0.010016918182373047, 0.010617256164550781, 0.011217594146728516, 0.01181793212890625, 0.012418270111083984, 0.013018608093261719, 0.013618946075439453, 0.014219284057617188, 0.014819622039794922, 0.015419960021972656, 0.01602029800415039, 0.016620635986328125, 0.01722097396850586, 0.017821311950683594, 0.018421649932861328, 0.019021987915039062, 0.019622325897216797, 0.02022266387939453, 0.020823001861572266, 0.02142333984375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 6.0, 8.0, 10.0, 13.0, 22.0, 47.0, 143.0, 728.0, 7941.0, 646939.0, 386767.0, 5091.0, 556.0, 127.0, 28.0, 16.0, 9.0, 8.0, 8.0, 5.0, 4.0, 8.0, 9.0, 3.0, 6.0, 7.0, 5.0, 3.0, 3.0, 3.0, 2.0, 4.0, 0.0, 0.0, 2.0], "bins": [-0.7470703125, -0.7278823852539062, -0.7086944580078125, -0.6895065307617188, -0.670318603515625, -0.6511306762695312, -0.6319427490234375, -0.6127548217773438, -0.59356689453125, -0.5743789672851562, -0.5551910400390625, -0.5360031127929688, -0.516815185546875, -0.49762725830078125, -0.4784393310546875, -0.45925140380859375, -0.4400634765625, -0.42087554931640625, -0.4016876220703125, -0.38249969482421875, -0.363311767578125, -0.34412384033203125, -0.3249359130859375, -0.30574798583984375, -0.28656005859375, -0.26737213134765625, -0.2481842041015625, -0.22899627685546875, -0.209808349609375, -0.19062042236328125, -0.1714324951171875, -0.15224456787109375, -0.133056640625, -0.11386871337890625, -0.0946807861328125, -0.07549285888671875, -0.056304931640625, -0.03711700439453125, -0.0179290771484375, 0.00125885009765625, 0.02044677734375, 0.03963470458984375, 0.0588226318359375, 0.07801055908203125, 0.097198486328125, 0.11638641357421875, 0.1355743408203125, 0.15476226806640625, 0.1739501953125, 0.19313812255859375, 0.2123260498046875, 0.23151397705078125, 0.250701904296875, 0.26988983154296875, 0.2890777587890625, 0.30826568603515625, 0.32745361328125, 0.34664154052734375, 0.3658294677734375, 0.38501739501953125, 0.404205322265625, 0.42339324951171875, 0.4425811767578125, 0.46176910400390625, 0.48095703125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 4.0, 9.0, 2.0, 5.0, 4.0, 6.0, 8.0, 9.0, 13.0, 13.0, 7.0, 13.0, 19.0, 24.0, 22.0, 22.0, 23.0, 35.0, 38.0, 38.0, 33.0, 36.0, 47.0, 53.0, 34.0, 45.0, 34.0, 45.0, 44.0, 31.0, 36.0, 30.0, 31.0, 29.0, 34.0, 23.0, 17.0, 20.0, 18.0, 9.0, 11.0, 9.0, 7.0, 2.0, 4.0, 3.0, 3.0, 7.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0850830078125, -0.0824737548828125, -0.079864501953125, -0.0772552490234375, -0.07464599609375, -0.0720367431640625, -0.069427490234375, -0.0668182373046875, -0.064208984375, -0.0615997314453125, -0.058990478515625, -0.0563812255859375, -0.05377197265625, -0.0511627197265625, -0.048553466796875, -0.0459442138671875, -0.0433349609375, -0.0407257080078125, -0.038116455078125, -0.0355072021484375, -0.03289794921875, -0.0302886962890625, -0.027679443359375, -0.0250701904296875, -0.0224609375, -0.0198516845703125, -0.017242431640625, -0.0146331787109375, -0.01202392578125, -0.0094146728515625, -0.006805419921875, -0.0041961669921875, -0.0015869140625, 0.0010223388671875, 0.003631591796875, 0.0062408447265625, 0.00885009765625, 0.0114593505859375, 0.014068603515625, 0.0166778564453125, 0.019287109375, 0.0218963623046875, 0.024505615234375, 0.0271148681640625, 0.02972412109375, 0.0323333740234375, 0.034942626953125, 0.0375518798828125, 0.0401611328125, 0.0427703857421875, 0.045379638671875, 0.0479888916015625, 0.05059814453125, 0.0532073974609375, 0.055816650390625, 0.0584259033203125, 0.06103515625, 0.0636444091796875, 0.066253662109375, 0.0688629150390625, 0.07147216796875, 0.0740814208984375, 0.076690673828125, 0.0792999267578125, 0.0819091796875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 9.0, 8.0, 7.0, 22.0, 29.0, 56.0, 127.0, 468.0, 3414.0, 825038.0, 216589.0, 2222.0, 328.0, 91.0, 40.0, 31.0, 30.0, 6.0, 10.0, 2.0, 4.0, 8.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.0126953125, -0.9828338623046875, -0.952972412109375, -0.9231109619140625, -0.89324951171875, -0.8633880615234375, -0.833526611328125, -0.8036651611328125, -0.7738037109375, -0.7439422607421875, -0.714080810546875, -0.6842193603515625, -0.65435791015625, -0.6244964599609375, -0.594635009765625, -0.5647735595703125, -0.534912109375, -0.5050506591796875, -0.475189208984375, -0.4453277587890625, -0.41546630859375, -0.3856048583984375, -0.355743408203125, -0.3258819580078125, -0.2960205078125, -0.2661590576171875, -0.236297607421875, -0.2064361572265625, -0.17657470703125, -0.1467132568359375, -0.116851806640625, -0.0869903564453125, -0.05712890625, -0.0272674560546875, 0.002593994140625, 0.0324554443359375, 0.06231689453125, 0.0921783447265625, 0.122039794921875, 0.1519012451171875, 0.1817626953125, 0.2116241455078125, 0.241485595703125, 0.2713470458984375, 0.30120849609375, 0.3310699462890625, 0.360931396484375, 0.3907928466796875, 0.420654296875, 0.4505157470703125, 0.480377197265625, 0.5102386474609375, 0.54010009765625, 0.5699615478515625, 0.599822998046875, 0.6296844482421875, 0.6595458984375, 0.6894073486328125, 0.719268798828125, 0.7491302490234375, 0.77899169921875, 0.8088531494140625, 0.838714599609375, 0.8685760498046875, 0.8984375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 5.0, 2.0, 3.0, 5.0, 5.0, 10.0, 16.0, 9.0, 27.0, 31.0, 61.0, 181.0, 323.0, 138.0, 58.0, 30.0, 20.0, 20.0, 9.0, 6.0, 11.0, 8.0, 4.0, 5.0, 5.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0010747909545898438, -0.0010394752025604248, -0.0010041594505310059, -0.0009688436985015869, -0.000933527946472168, -0.000898212194442749, -0.0008628964424133301, -0.0008275806903839111, -0.0007922649383544922, -0.0007569491863250732, -0.0007216334342956543, -0.0006863176822662354, -0.0006510019302368164, -0.0006156861782073975, -0.0005803704261779785, -0.0005450546741485596, -0.0005097389221191406, -0.0004744231700897217, -0.00043910741806030273, -0.0004037916660308838, -0.00036847591400146484, -0.0003331601619720459, -0.00029784440994262695, -0.000262528657913208, -0.00022721290588378906, -0.00019189715385437012, -0.00015658140182495117, -0.00012126564979553223, -8.594989776611328e-05, -5.0634145736694336e-05, -1.531839370727539e-05, 1.9997358322143555e-05, 5.53131103515625e-05, 9.062886238098145e-05, 0.0001259446144104004, 0.00016126036643981934, 0.00019657611846923828, 0.00023189187049865723, 0.00026720762252807617, 0.0003025233745574951, 0.00033783912658691406, 0.000373154878616333, 0.00040847063064575195, 0.0004437863826751709, 0.00047910213470458984, 0.0005144178867340088, 0.0005497336387634277, 0.0005850493907928467, 0.0006203651428222656, 0.0006556808948516846, 0.0006909966468811035, 0.0007263123989105225, 0.0007616281509399414, 0.0007969439029693604, 0.0008322596549987793, 0.0008675754070281982, 0.0009028911590576172, 0.0009382069110870361, 0.0009735226631164551, 0.001008838415145874, 0.001044154167175293, 0.001079469919204712, 0.0011147856712341309, 0.0011501014232635498, 0.0011854171752929688]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 6.0, 3.0, 4.0, 12.0, 18.0, 39.0, 86.0, 225.0, 573.0, 2195.0, 12693.0, 437269.0, 576920.0, 15092.0, 2360.0, 660.0, 220.0, 84.0, 42.0, 20.0, 16.0, 6.0, 3.0, 4.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.619140625, -0.6036758422851562, -0.5882110595703125, -0.5727462768554688, -0.557281494140625, -0.5418167114257812, -0.5263519287109375, -0.5108871459960938, -0.49542236328125, -0.47995758056640625, -0.4644927978515625, -0.44902801513671875, -0.433563232421875, -0.41809844970703125, -0.4026336669921875, -0.38716888427734375, -0.3717041015625, -0.35623931884765625, -0.3407745361328125, -0.32530975341796875, -0.309844970703125, -0.29438018798828125, -0.2789154052734375, -0.26345062255859375, -0.24798583984375, -0.23252105712890625, -0.2170562744140625, -0.20159149169921875, -0.186126708984375, -0.17066192626953125, -0.1551971435546875, -0.13973236083984375, -0.124267578125, -0.10880279541015625, -0.0933380126953125, -0.07787322998046875, -0.062408447265625, -0.04694366455078125, -0.0314788818359375, -0.01601409912109375, -0.00054931640625, 0.01491546630859375, 0.0303802490234375, 0.04584503173828125, 0.061309814453125, 0.07677459716796875, 0.0922393798828125, 0.10770416259765625, 0.1231689453125, 0.13863372802734375, 0.1540985107421875, 0.16956329345703125, 0.185028076171875, 0.20049285888671875, 0.2159576416015625, 0.23142242431640625, 0.24688720703125, 0.26235198974609375, 0.2778167724609375, 0.29328155517578125, 0.308746337890625, 0.32421112060546875, 0.3396759033203125, 0.35514068603515625, 0.37060546875]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 3.0, 6.0, 11.0, 18.0, 19.0, 18.0, 29.0, 44.0, 50.0, 73.0, 72.0, 91.0, 85.0, 84.0, 82.0, 66.0, 42.0, 34.0, 36.0, 34.0, 15.0, 12.0, 13.0, 16.0, 4.0, 7.0, 5.0, 9.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.1033935546875, -0.09981727600097656, -0.09624099731445312, -0.09266471862792969, -0.08908843994140625, -0.08551216125488281, -0.08193588256835938, -0.07835960388183594, -0.0747833251953125, -0.07120704650878906, -0.06763076782226562, -0.06405448913574219, -0.06047821044921875, -0.05690193176269531, -0.053325653076171875, -0.04974937438964844, -0.046173095703125, -0.04259681701660156, -0.039020538330078125, -0.03544425964355469, -0.03186798095703125, -0.028291702270507812, -0.024715423583984375, -0.021139144897460938, -0.0175628662109375, -0.013986587524414062, -0.010410308837890625, -0.0068340301513671875, -0.00325775146484375, 0.0003185272216796875, 0.003894805908203125, 0.0074710845947265625, 0.01104736328125, 0.014623641967773438, 0.018199920654296875, 0.021776199340820312, 0.02535247802734375, 0.028928756713867188, 0.032505035400390625, 0.03608131408691406, 0.0396575927734375, 0.04323387145996094, 0.046810150146484375, 0.05038642883300781, 0.05396270751953125, 0.05753898620605469, 0.061115264892578125, 0.06469154357910156, 0.068267822265625, 0.07184410095214844, 0.07542037963867188, 0.07899665832519531, 0.08257293701171875, 0.08614921569824219, 0.08972549438476562, 0.09330177307128906, 0.0968780517578125, 0.10045433044433594, 0.10403060913085938, 0.10760688781738281, 0.11118316650390625, 0.11475944519042969, 0.11833572387695312, 0.12191200256347656, 0.12548828125]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 8.0, 17.0, 39.0, 87.0, 261.0, 335.0, 163.0, 51.0, 19.0, 9.0, 8.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9462106823921204, -0.887650728225708, -0.8290907740592957, -0.7705308198928833, -0.7119709253311157, -0.6534109115600586, -0.594851016998291, -0.5362910628318787, -0.4777311086654663, -0.41917115449905396, -0.3606112003326416, -0.30205127596855164, -0.24349132180213928, -0.18493136763572693, -0.12637144327163696, -0.06781148910522461, -0.009251534938812256, 0.0493084117770195, 0.10786835849285126, 0.16642829775810242, 0.22498825192451477, 0.2835482060909271, 0.3421081304550171, 0.40066808462142944, 0.4592280387878418, 0.5177879929542542, 0.5763479471206665, 0.6349078416824341, 0.6934678554534912, 0.7520277500152588, 0.8105877041816711, 0.8691476583480835, 0.9277076721191406, 0.986267626285553, 1.0448275804519653, 1.103387475013733, 1.16194748878479, 1.2205073833465576, 1.2790672779083252, 1.3376272916793823, 1.3961873054504395, 1.454747200012207, 1.5133072137832642, 1.5718671083450317, 1.6304271221160889, 1.6889870166778564, 1.747546911239624, 1.8061069250106812, 1.8646668195724487, 1.9232267141342163, 1.9817867279052734, 2.040346622467041, 2.0989065170288086, 2.1574666500091553, 2.216026544570923, 2.2745864391326904, 2.333146333694458, 2.3917062282562256, 2.450266122817993, 2.50882625579834, 2.5673861503601074, 2.625946044921875, 2.6845059394836426, 2.74306583404541, 2.801625967025757]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 5.0, 12.0, 14.0, 21.0, 28.0, 32.0, 63.0, 86.0, 87.0, 110.0, 140.0, 124.0, 85.0, 59.0, 45.0, 23.0, 20.0, 9.0, 8.0, 8.0, 6.0, 8.0, 3.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1661746501922607, -1.1209747791290283, -1.0757747888565063, -1.030574917793274, -0.9853750467300415, -0.9401751160621643, -0.8949751853942871, -0.8497753143310547, -0.8045753836631775, -0.7593754529953003, -0.7141755819320679, -0.6689756512641907, -0.6237757205963135, -0.578575849533081, -0.5333759188652039, -0.48817601799964905, -0.44297611713409424, -0.39777621626853943, -0.3525763154029846, -0.3073763847351074, -0.2621764838695526, -0.2169765830039978, -0.1717766523361206, -0.1265767514705658, -0.08137685060501099, -0.03617694228887558, 0.009022966027259827, 0.05422288179397583, 0.09942278265953064, 0.14462268352508545, 0.18982261419296265, 0.23502251505851746, 0.2802225351333618, 0.3254224359989166, 0.37062233686447144, 0.41582226753234863, 0.46102216839790344, 0.5062220692634583, 0.5514219999313354, 0.5966218709945679, 0.6418218016624451, 0.6870217323303223, 0.7322216033935547, 0.7774215340614319, 0.8226214647293091, 0.8678213357925415, 0.9130212664604187, 0.9582211971282959, 1.0034210681915283, 1.0486209392547607, 1.0938209295272827, 1.1390208005905151, 1.1842206716537476, 1.2294206619262695, 1.274620532989502, 1.3198204040527344, 1.3650202751159668, 1.4102201461791992, 1.4554201364517212, 1.5006200075149536, 1.545819878578186, 1.591019868850708, 1.6362197399139404, 1.6814196109771729, 1.7266196012496948]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 8.0, 8.0, 8.0, 4.0, 8.0, 12.0, 0.0, 7.0, 13.0, 14.0, 10.0, 28.0, 51.0, 124.0, 287.0, 962.0, 4988.0, 123970.0, 4044390.0, 16579.0, 1914.0, 510.0, 154.0, 63.0, 50.0, 37.0, 28.0, 7.0, 5.0, 9.0, 1.0, 1.0, 4.0, 5.0, 13.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-1.2109375, -1.1765670776367188, -1.1421966552734375, -1.1078262329101562, -1.073455810546875, -1.0390853881835938, -1.0047149658203125, -0.9703445434570312, -0.93597412109375, -0.9016036987304688, -0.8672332763671875, -0.8328628540039062, -0.798492431640625, -0.7641220092773438, -0.7297515869140625, -0.6953811645507812, -0.6610107421875, -0.6266403198242188, -0.5922698974609375, -0.5578994750976562, -0.523529052734375, -0.48915863037109375, -0.4547882080078125, -0.42041778564453125, -0.38604736328125, -0.35167694091796875, -0.3173065185546875, -0.28293609619140625, -0.248565673828125, -0.21419525146484375, -0.1798248291015625, -0.14545440673828125, -0.111083984375, -0.07671356201171875, -0.0423431396484375, -0.00797271728515625, 0.026397705078125, 0.06076812744140625, 0.0951385498046875, 0.12950897216796875, 0.16387939453125, 0.19824981689453125, 0.2326202392578125, 0.26699066162109375, 0.301361083984375, 0.33573150634765625, 0.3701019287109375, 0.40447235107421875, 0.4388427734375, 0.47321319580078125, 0.5075836181640625, 0.5419540405273438, 0.576324462890625, 0.6106948852539062, 0.6450653076171875, 0.6794357299804688, 0.71380615234375, 0.7481765747070312, 0.7825469970703125, 0.8169174194335938, 0.851287841796875, 0.8856582641601562, 0.9200286865234375, 0.9543991088867188, 0.98876953125]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 4.0, 2.0, 4.0, 3.0, 2.0, 4.0, 4.0, 11.0, 10.0, 14.0, 23.0, 33.0, 48.0, 55.0, 61.0, 76.0, 85.0, 84.0, 94.0, 79.0, 75.0, 44.0, 51.0, 45.0, 34.0, 18.0, 6.0, 7.0, 4.0, 7.0, 7.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0194091796875, -0.01886439323425293, -0.01831960678100586, -0.01777482032775879, -0.01723003387451172, -0.01668524742126465, -0.016140460968017578, -0.015595674514770508, -0.015050888061523438, -0.014506101608276367, -0.013961315155029297, -0.013416528701782227, -0.012871742248535156, -0.012326955795288086, -0.011782169342041016, -0.011237382888793945, -0.010692596435546875, -0.010147809982299805, -0.009603023529052734, -0.009058237075805664, -0.008513450622558594, -0.007968664169311523, -0.007423877716064453, -0.006879091262817383, -0.0063343048095703125, -0.005789518356323242, -0.005244731903076172, -0.0046999454498291016, -0.004155158996582031, -0.003610372543334961, -0.0030655860900878906, -0.0025207996368408203, -0.00197601318359375, -0.0014312267303466797, -0.0008864402770996094, -0.00034165382385253906, 0.00020313262939453125, 0.0007479190826416016, 0.0012927055358886719, 0.0018374919891357422, 0.0023822784423828125, 0.002927064895629883, 0.003471851348876953, 0.0040166378021240234, 0.004561424255371094, 0.005106210708618164, 0.005650997161865234, 0.006195783615112305, 0.006740570068359375, 0.007285356521606445, 0.007830142974853516, 0.008374929428100586, 0.008919715881347656, 0.009464502334594727, 0.010009288787841797, 0.010554075241088867, 0.011098861694335938, 0.011643648147583008, 0.012188434600830078, 0.012733221054077148, 0.013278007507324219, 0.013822793960571289, 0.01436758041381836, 0.01491236686706543, 0.0154571533203125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 6.0, 12.0, 49.0, 125.0, 575.0, 8043.0, 4137203.0, 46768.0, 1203.0, 198.0, 71.0, 23.0, 7.0, 10.0, 1.0, 1.0], "bins": [-2.3828125, -2.3392333984375, -2.295654296875, -2.2520751953125, -2.20849609375, -2.1649169921875, -2.121337890625, -2.0777587890625, -2.0341796875, -1.9906005859375, -1.947021484375, -1.9034423828125, -1.85986328125, -1.8162841796875, -1.772705078125, -1.7291259765625, -1.685546875, -1.6419677734375, -1.598388671875, -1.5548095703125, -1.51123046875, -1.4676513671875, -1.424072265625, -1.3804931640625, -1.3369140625, -1.2933349609375, -1.249755859375, -1.2061767578125, -1.16259765625, -1.1190185546875, -1.075439453125, -1.0318603515625, -0.98828125, -0.9447021484375, -0.901123046875, -0.8575439453125, -0.81396484375, -0.7703857421875, -0.726806640625, -0.6832275390625, -0.6396484375, -0.5960693359375, -0.552490234375, -0.5089111328125, -0.46533203125, -0.4217529296875, -0.378173828125, -0.3345947265625, -0.291015625, -0.2474365234375, -0.203857421875, -0.1602783203125, -0.11669921875, -0.0731201171875, -0.029541015625, 0.0140380859375, 0.0576171875, 0.1011962890625, 0.144775390625, 0.1883544921875, 0.23193359375, 0.2755126953125, 0.319091796875, 0.3626708984375, 0.40625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 7.0, 13.0, 13.0, 13.0, 25.0, 24.0, 44.0, 51.0, 97.0, 153.0, 284.0, 777.0, 1516.0, 498.0, 195.0, 118.0, 63.0, 62.0, 34.0, 24.0, 10.0, 9.0, 8.0, 4.0, 7.0, 6.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.043365478515625, -0.04202413558959961, -0.04068279266357422, -0.03934144973754883, -0.03800010681152344, -0.03665876388549805, -0.035317420959472656, -0.033976078033447266, -0.032634735107421875, -0.031293392181396484, -0.029952049255371094, -0.028610706329345703, -0.027269363403320312, -0.025928020477294922, -0.02458667755126953, -0.02324533462524414, -0.02190399169921875, -0.02056264877319336, -0.01922130584716797, -0.017879962921142578, -0.016538619995117188, -0.015197277069091797, -0.013855934143066406, -0.012514591217041016, -0.011173248291015625, -0.009831905364990234, -0.008490562438964844, -0.007149219512939453, -0.0058078765869140625, -0.004466533660888672, -0.0031251907348632812, -0.0017838478088378906, -0.0004425048828125, 0.0008988380432128906, 0.0022401809692382812, 0.003581523895263672, 0.0049228668212890625, 0.006264209747314453, 0.007605552673339844, 0.008946895599365234, 0.010288238525390625, 0.011629581451416016, 0.012970924377441406, 0.014312267303466797, 0.015653610229492188, 0.016994953155517578, 0.01833629608154297, 0.01967763900756836, 0.02101898193359375, 0.02236032485961914, 0.02370166778564453, 0.025043010711669922, 0.026384353637695312, 0.027725696563720703, 0.029067039489746094, 0.030408382415771484, 0.031749725341796875, 0.033091068267822266, 0.034432411193847656, 0.03577375411987305, 0.03711509704589844, 0.03845643997192383, 0.03979778289794922, 0.04113912582397461, 0.04248046875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 42.0, 804.0, 153.0, 13.0, 1.0], "bins": [-5.712839603424072, -5.6185622215271, -5.524285316467285, -5.4300079345703125, -5.33573055267334, -5.241453170776367, -5.147176265716553, -5.05289888381958, -4.958621501922607, -4.864344120025635, -4.77006721496582, -4.675789833068848, -4.581512451171875, -4.487235069274902, -4.392958164215088, -4.298680782318115, -4.204403400421143, -4.11012601852417, -4.0158491134643555, -3.921571731567383, -3.82729434967041, -3.7330172061920166, -3.638739824295044, -3.5444626808166504, -3.450185537338257, -3.3559083938598633, -3.2616310119628906, -3.167353868484497, -3.0730764865875244, -2.978799343109131, -2.884521961212158, -2.7902448177337646, -2.695967435836792, -2.6016902923583984, -2.507412910461426, -2.4131357669830322, -2.3188583850860596, -2.224581241607666, -2.1303038597106934, -2.0360267162323, -1.9417493343353271, -1.847472071647644, -1.753194808959961, -1.6589175462722778, -1.5646402835845947, -1.4703630208969116, -1.3760857582092285, -1.281808614730835, -1.1875313520431519, -1.0932540893554688, -0.9989768266677856, -0.9046995639801025, -0.8104223012924194, -0.7161450386047363, -0.621867835521698, -0.5275905728340149, -0.4333133101463318, -0.3390360474586487, -0.24475879967212677, -0.15048155188560486, -0.05620428919792175, 0.03807297348976135, 0.13235020637512207, 0.22662746906280518, 0.3209047317504883]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 13.0, 9.0, 32.0, 67.0, 76.0, 103.0, 157.0, 165.0, 148.0, 97.0, 69.0, 30.0, 20.0, 13.0, 7.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-0.9175159335136414, -0.8995136618614197, -0.881511390209198, -0.8635091185569763, -0.8455069065093994, -0.8275046348571777, -0.809502363204956, -0.7915000915527344, -0.7734978199005127, -0.755495548248291, -0.7374932765960693, -0.7194910049438477, -0.701488733291626, -0.6834865212440491, -0.6654842495918274, -0.6474819779396057, -0.629479706287384, -0.6114774346351624, -0.5934751629829407, -0.575472891330719, -0.5574706792831421, -0.5394684076309204, -0.5214661359786987, -0.503463864326477, -0.48546159267425537, -0.4674593210220337, -0.449457049369812, -0.4314548075199127, -0.41345253586769104, -0.39545026421546936, -0.37744802236557007, -0.3594457507133484, -0.3414434790611267, -0.32344120740890503, -0.30543893575668335, -0.28743669390678406, -0.2694344222545624, -0.2514321506023407, -0.2334298938512802, -0.21542763710021973, -0.19742536544799805, -0.17942309379577637, -0.16142083704471588, -0.1434185802936554, -0.12541630864143372, -0.10741404443979263, -0.08941178023815155, -0.07140952348709106, -0.053407251834869385, -0.0354049876332283, -0.01740272343158722, 0.0005995407700538635, 0.018601804971694946, 0.03660406917333603, 0.05460633337497711, 0.0726085901260376, 0.09061086177825928, 0.10861312597990036, 0.12661539018154144, 0.14461764693260193, 0.1626199185848236, 0.1806221902370453, 0.19862444698810577, 0.21662670373916626, 0.23462897539138794]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 3.0, 3.0, 7.0, 4.0, 11.0, 4.0, 7.0, 12.0, 21.0, 16.0, 32.0, 32.0, 25.0, 27.0, 34.0, 41.0, 65.0, 52.0, 838.0, 1045748.0, 1137.0, 61.0, 57.0, 50.0, 45.0, 32.0, 39.0, 27.0, 34.0, 11.0, 26.0, 9.0, 8.0, 11.0, 7.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8740234375, -1.8005828857421875, -1.727142333984375, -1.6537017822265625, -1.58026123046875, -1.5068206787109375, -1.433380126953125, -1.3599395751953125, -1.2864990234375, -1.2130584716796875, -1.139617919921875, -1.0661773681640625, -0.99273681640625, -0.9192962646484375, -0.845855712890625, -0.7724151611328125, -0.698974609375, -0.6255340576171875, -0.552093505859375, -0.4786529541015625, -0.40521240234375, -0.3317718505859375, -0.258331298828125, -0.1848907470703125, -0.1114501953125, -0.0380096435546875, 0.035430908203125, 0.1088714599609375, 0.18231201171875, 0.2557525634765625, 0.329193115234375, 0.4026336669921875, 0.47607421875, 0.5495147705078125, 0.622955322265625, 0.6963958740234375, 0.76983642578125, 0.8432769775390625, 0.916717529296875, 0.9901580810546875, 1.0635986328125, 1.1370391845703125, 1.210479736328125, 1.2839202880859375, 1.35736083984375, 1.4308013916015625, 1.504241943359375, 1.5776824951171875, 1.651123046875, 1.7245635986328125, 1.798004150390625, 1.8714447021484375, 1.94488525390625, 2.0183258056640625, 2.091766357421875, 2.1652069091796875, 2.2386474609375, 2.3120880126953125, 2.385528564453125, 2.4589691162109375, 2.53240966796875, 2.6058502197265625, 2.679290771484375, 2.7527313232421875, 2.826171875]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 34.0, 191.0, 440.0, 277.0, 58.0, 13.0, 1.0], "bins": [-0.2120361328125, -0.20847773551940918, -0.20491933822631836, -0.20136094093322754, -0.19780254364013672, -0.1942441463470459, -0.19068574905395508, -0.18712735176086426, -0.18356895446777344, -0.18001055717468262, -0.1764521598815918, -0.17289376258850098, -0.16933536529541016, -0.16577696800231934, -0.16221857070922852, -0.1586601734161377, -0.15510177612304688, -0.15154337882995605, -0.14798498153686523, -0.14442658424377441, -0.1408681869506836, -0.13730978965759277, -0.13375139236450195, -0.13019299507141113, -0.1266345977783203, -0.12307620048522949, -0.11951780319213867, -0.11595940589904785, -0.11240100860595703, -0.10884261131286621, -0.10528421401977539, -0.10172581672668457, -0.09816741943359375, -0.09460902214050293, -0.09105062484741211, -0.08749222755432129, -0.08393383026123047, -0.08037543296813965, -0.07681703567504883, -0.07325863838195801, -0.06970024108886719, -0.06614184379577637, -0.06258344650268555, -0.05902504920959473, -0.055466651916503906, -0.051908254623413086, -0.048349857330322266, -0.044791460037231445, -0.041233062744140625, -0.037674665451049805, -0.034116268157958984, -0.030557870864868164, -0.026999473571777344, -0.023441076278686523, -0.019882678985595703, -0.016324281692504883, -0.012765884399414062, -0.009207487106323242, -0.005649089813232422, -0.0020906925201416016, 0.0014677047729492188, 0.005026102066040039, 0.00858449935913086, 0.01214289665222168, 0.0157012939453125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 5.0, 8.0, 20.0, 20.0, 25.0, 32.0, 43.0, 72.0, 93.0, 123.0, 209.0, 359.0, 648.0, 1131.0, 2263.0, 4743.0, 11802.0, 36218.0, 164355.0, 560586.0, 199781.0, 42227.0, 13404.0, 5109.0, 2314.0, 1134.0, 679.0, 402.0, 234.0, 168.0, 118.0, 76.0, 37.0, 29.0, 22.0, 14.0, 14.0, 10.0, 8.0, 7.0, 6.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.41162109375, -0.39935302734375, -0.3870849609375, -0.37481689453125, -0.362548828125, -0.35028076171875, -0.3380126953125, -0.32574462890625, -0.3134765625, -0.30120849609375, -0.2889404296875, -0.27667236328125, -0.264404296875, -0.25213623046875, -0.2398681640625, -0.22760009765625, -0.21533203125, -0.20306396484375, -0.1907958984375, -0.17852783203125, -0.166259765625, -0.15399169921875, -0.1417236328125, -0.12945556640625, -0.1171875, -0.10491943359375, -0.0926513671875, -0.08038330078125, -0.068115234375, -0.05584716796875, -0.0435791015625, -0.03131103515625, -0.01904296875, -0.00677490234375, 0.0054931640625, 0.01776123046875, 0.030029296875, 0.04229736328125, 0.0545654296875, 0.06683349609375, 0.0791015625, 0.09136962890625, 0.1036376953125, 0.11590576171875, 0.128173828125, 0.14044189453125, 0.1527099609375, 0.16497802734375, 0.17724609375, 0.18951416015625, 0.2017822265625, 0.21405029296875, 0.226318359375, 0.23858642578125, 0.2508544921875, 0.26312255859375, 0.275390625, 0.28765869140625, 0.2999267578125, 0.31219482421875, 0.324462890625, 0.33673095703125, 0.3489990234375, 0.36126708984375, 0.37353515625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 1.0, 1.0, 4.0, 2.0, 3.0, 4.0, 3.0, 7.0, 13.0, 13.0, 20.0, 21.0, 27.0, 38.0, 39.0, 49.0, 48.0, 74.0, 56.0, 71.0, 62.0, 75.0, 65.0, 57.0, 48.0, 42.0, 39.0, 37.0, 21.0, 15.0, 10.0, 12.0, 7.0, 11.0, 7.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1796875, -0.1738605499267578, -0.16803359985351562, -0.16220664978027344, -0.15637969970703125, -0.15055274963378906, -0.14472579956054688, -0.1388988494873047, -0.1330718994140625, -0.1272449493408203, -0.12141799926757812, -0.11559104919433594, -0.10976409912109375, -0.10393714904785156, -0.09811019897460938, -0.09228324890136719, -0.086456298828125, -0.08062934875488281, -0.07480239868164062, -0.06897544860839844, -0.06314849853515625, -0.05732154846191406, -0.051494598388671875, -0.04566764831542969, -0.0398406982421875, -0.03401374816894531, -0.028186798095703125, -0.022359848022460938, -0.01653289794921875, -0.010705947875976562, -0.004878997802734375, 0.0009479522705078125, 0.00677490234375, 0.012601852416992188, 0.018428802490234375, 0.024255752563476562, 0.03008270263671875, 0.03590965270996094, 0.041736602783203125, 0.04756355285644531, 0.0533905029296875, 0.05921745300292969, 0.06504440307617188, 0.07087135314941406, 0.07669830322265625, 0.08252525329589844, 0.08835220336914062, 0.09417915344238281, 0.100006103515625, 0.10583305358886719, 0.11166000366210938, 0.11748695373535156, 0.12331390380859375, 0.12914085388183594, 0.13496780395507812, 0.1407947540283203, 0.1466217041015625, 0.1524486541748047, 0.15827560424804688, 0.16410255432128906, 0.16992950439453125, 0.17575645446777344, 0.18158340454101562, 0.1874103546142578, 0.1932373046875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 5.0, 1.0, 6.0, 1.0, 8.0, 9.0, 6.0, 7.0, 13.0, 16.0, 29.0, 35.0, 44.0, 65.0, 98.0, 137.0, 278.0, 446.0, 1080.0, 2888.0, 9695.0, 45474.0, 309479.0, 556002.0, 97495.0, 17523.0, 4579.0, 1590.0, 672.0, 314.0, 187.0, 117.0, 70.0, 44.0, 32.0, 29.0, 18.0, 22.0, 8.0, 13.0, 6.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.39697265625, -0.3838310241699219, -0.37068939208984375, -0.3575477600097656, -0.3444061279296875, -0.3312644958496094, -0.31812286376953125, -0.3049812316894531, -0.291839599609375, -0.2786979675292969, -0.26555633544921875, -0.2524147033691406, -0.2392730712890625, -0.22613143920898438, -0.21298980712890625, -0.19984817504882812, -0.18670654296875, -0.17356491088867188, -0.16042327880859375, -0.14728164672851562, -0.1341400146484375, -0.12099838256835938, -0.10785675048828125, -0.09471511840820312, -0.081573486328125, -0.06843185424804688, -0.05529022216796875, -0.042148590087890625, -0.0290069580078125, -0.015865325927734375, -0.00272369384765625, 0.010417938232421875, 0.0235595703125, 0.036701202392578125, 0.04984283447265625, 0.06298446655273438, 0.0761260986328125, 0.08926773071289062, 0.10240936279296875, 0.11555099487304688, 0.128692626953125, 0.14183425903320312, 0.15497589111328125, 0.16811752319335938, 0.1812591552734375, 0.19440078735351562, 0.20754241943359375, 0.22068405151367188, 0.23382568359375, 0.24696731567382812, 0.26010894775390625, 0.2732505798339844, 0.2863922119140625, 0.2995338439941406, 0.31267547607421875, 0.3258171081542969, 0.338958740234375, 0.3521003723144531, 0.36524200439453125, 0.3783836364746094, 0.3915252685546875, 0.4046669006347656, 0.41780853271484375, 0.4309501647949219, 0.444091796875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 6.0, 4.0, 5.0, 7.0, 9.0, 6.0, 12.0, 11.0, 10.0, 20.0, 29.0, 21.0, 30.0, 35.0, 38.0, 60.0, 74.0, 85.0, 92.0, 68.0, 76.0, 65.0, 45.0, 33.0, 26.0, 24.0, 26.0, 22.0, 13.0, 11.0, 7.0, 6.0, 11.0, 3.0, 2.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00013589859008789062, -0.00013161450624465942, -0.00012733042240142822, -0.00012304633855819702, -0.00011876225471496582, -0.00011447817087173462, -0.00011019408702850342, -0.00010591000318527222, -0.00010162591934204102, -9.734183549880981e-05, -9.305775165557861e-05, -8.877366781234741e-05, -8.448958396911621e-05, -8.020550012588501e-05, -7.592141628265381e-05, -7.163733243942261e-05, -6.73532485961914e-05, -6.30691647529602e-05, -5.8785080909729004e-05, -5.45009970664978e-05, -5.02169132232666e-05, -4.59328293800354e-05, -4.16487455368042e-05, -3.7364661693573e-05, -3.30805778503418e-05, -2.8796494007110596e-05, -2.4512410163879395e-05, -2.0228326320648193e-05, -1.5944242477416992e-05, -1.1660158634185791e-05, -7.37607479095459e-06, -3.0919909477233887e-06, 1.1920928955078125e-06, 5.476176738739014e-06, 9.760260581970215e-06, 1.4044344425201416e-05, 1.8328428268432617e-05, 2.261251211166382e-05, 2.689659595489502e-05, 3.118067979812622e-05, 3.546476364135742e-05, 3.974884748458862e-05, 4.4032931327819824e-05, 4.8317015171051025e-05, 5.2601099014282227e-05, 5.688518285751343e-05, 6.116926670074463e-05, 6.545335054397583e-05, 6.973743438720703e-05, 7.402151823043823e-05, 7.830560207366943e-05, 8.258968591690063e-05, 8.687376976013184e-05, 9.115785360336304e-05, 9.544193744659424e-05, 9.972602128982544e-05, 0.00010401010513305664, 0.00010829418897628784, 0.00011257827281951904, 0.00011686235666275024, 0.00012114644050598145, 0.00012543052434921265, 0.00012971460819244385, 0.00013399869203567505, 0.00013828277587890625]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 17.0, 7.0, 14.0, 31.0, 58.0, 71.0, 142.0, 253.0, 449.0, 1072.0, 3330.0, 15729.0, 136055.0, 755469.0, 117040.0, 13837.0, 2961.0, 1024.0, 461.0, 214.0, 121.0, 69.0, 42.0, 25.0, 23.0, 15.0, 6.0, 3.0, 8.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.40771484375, -0.3895721435546875, -0.371429443359375, -0.3532867431640625, -0.33514404296875, -0.3170013427734375, -0.298858642578125, -0.2807159423828125, -0.2625732421875, -0.2444305419921875, -0.226287841796875, -0.2081451416015625, -0.19000244140625, -0.1718597412109375, -0.153717041015625, -0.1355743408203125, -0.117431640625, -0.0992889404296875, -0.081146240234375, -0.0630035400390625, -0.04486083984375, -0.0267181396484375, -0.008575439453125, 0.0095672607421875, 0.0277099609375, 0.0458526611328125, 0.063995361328125, 0.0821380615234375, 0.10028076171875, 0.1184234619140625, 0.136566162109375, 0.1547088623046875, 0.1728515625, 0.1909942626953125, 0.209136962890625, 0.2272796630859375, 0.24542236328125, 0.2635650634765625, 0.281707763671875, 0.2998504638671875, 0.3179931640625, 0.3361358642578125, 0.354278564453125, 0.3724212646484375, 0.39056396484375, 0.4087066650390625, 0.426849365234375, 0.4449920654296875, 0.463134765625, 0.4812774658203125, 0.499420166015625, 0.5175628662109375, 0.53570556640625, 0.5538482666015625, 0.571990966796875, 0.5901336669921875, 0.6082763671875, 0.6264190673828125, 0.644561767578125, 0.6627044677734375, 0.68084716796875, 0.6989898681640625, 0.717132568359375, 0.7352752685546875, 0.75341796875]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 4.0, 7.0, 16.0, 20.0, 23.0, 39.0, 41.0, 73.0, 84.0, 103.0, 122.0, 115.0, 97.0, 75.0, 62.0, 40.0, 22.0, 13.0, 12.0, 9.0, 5.0, 4.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2310791015625, -0.2242145538330078, -0.21735000610351562, -0.21048545837402344, -0.20362091064453125, -0.19675636291503906, -0.18989181518554688, -0.1830272674560547, -0.1761627197265625, -0.1692981719970703, -0.16243362426757812, -0.15556907653808594, -0.14870452880859375, -0.14183998107910156, -0.13497543334960938, -0.1281108856201172, -0.121246337890625, -0.11438179016113281, -0.10751724243164062, -0.10065269470214844, -0.09378814697265625, -0.08692359924316406, -0.08005905151367188, -0.07319450378417969, -0.0663299560546875, -0.05946540832519531, -0.052600860595703125, -0.04573631286621094, -0.03887176513671875, -0.03200721740722656, -0.025142669677734375, -0.018278121948242188, -0.01141357421875, -0.0045490264892578125, 0.002315521240234375, 0.009180068969726562, 0.01604461669921875, 0.022909164428710938, 0.029773712158203125, 0.03663825988769531, 0.0435028076171875, 0.05036735534667969, 0.057231903076171875, 0.06409645080566406, 0.07096099853515625, 0.07782554626464844, 0.08469009399414062, 0.09155464172363281, 0.098419189453125, 0.10528373718261719, 0.11214828491210938, 0.11901283264160156, 0.12587738037109375, 0.13274192810058594, 0.13960647583007812, 0.1464710235595703, 0.1533355712890625, 0.1602001190185547, 0.16706466674804688, 0.17392921447753906, 0.18079376220703125, 0.18765830993652344, 0.19452285766601562, 0.2013874053955078, 0.208251953125]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 7.0, 173.0, 708.0, 109.0, 10.0, 6.0, 1.0, 0.0, 1.0], "bins": [-22.180301666259766, -21.79336929321289, -21.406436920166016, -21.01950454711914, -20.632572174072266, -20.24563980102539, -19.858707427978516, -19.47177505493164, -19.084842681884766, -18.69791030883789, -18.310977935791016, -17.92404556274414, -17.537113189697266, -17.15018081665039, -16.763248443603516, -16.37631607055664, -15.989385604858398, -15.602453231811523, -15.215520858764648, -14.828588485717773, -14.441656112670898, -14.05472469329834, -13.667792320251465, -13.28085994720459, -12.893927574157715, -12.50699520111084, -12.120062828063965, -11.73313045501709, -11.346199035644531, -10.959266662597656, -10.572334289550781, -10.185401916503906, -9.798469543457031, -9.411537170410156, -9.024604797363281, -8.637672424316406, -8.250740051269531, -7.8638081550598145, -7.476876258850098, -7.089943885803223, -6.7030110359191895, -6.3160786628723145, -5.929146766662598, -5.542214393615723, -5.155282020568848, -4.768349647521973, -4.381417274475098, -3.994485378265381, -3.607553005218506, -3.220620632171631, -2.833688497543335, -2.446756362915039, -2.059823989868164, -1.6728917360305786, -1.2859594821929932, -0.8990273475646973, -0.5120949745178223, -0.12516272068023682, 0.26176953315734863, 0.6487017869949341, 1.0356340408325195, 1.422566294670105, 1.8094985485076904, 2.1964306831359863, 2.5833630561828613]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 9.0, 12.0, 10.0, 21.0, 22.0, 29.0, 29.0, 49.0, 46.0, 71.0, 89.0, 87.0, 92.0, 74.0, 70.0, 73.0, 49.0, 49.0, 38.0, 29.0, 20.0, 15.0, 8.0, 6.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9082056283950806, -1.8585504293441772, -1.8088951110839844, -1.759239912033081, -1.7095847129821777, -1.6599295139312744, -1.6102741956710815, -1.5606189966201782, -1.5109636783599854, -1.461308479309082, -1.4116531610488892, -1.3619979619979858, -1.3123427629470825, -1.2626874446868896, -1.2130322456359863, -1.163377046585083, -1.1137218475341797, -1.0640666484832764, -1.0144113302230835, -0.9647561311721802, -0.9151009321212769, -0.8654456734657288, -0.8157904148101807, -0.7661352157592773, -0.7164799571037292, -0.6668246984481812, -0.6171694993972778, -0.5675142407417297, -0.5178589820861816, -0.4682037830352783, -0.4185485243797302, -0.3688932955265045, -0.3192380666732788, -0.2695828378200531, -0.2199275940656662, -0.1702723503112793, -0.12061712145805359, -0.07096189260482788, -0.021306633949279785, 0.028348594903945923, 0.07800382375717163, 0.12765905261039734, 0.17731429636478424, 0.22696954011917114, 0.27662476897239685, 0.32627999782562256, 0.37593525648117065, 0.42559048533439636, 0.47524571418762207, 0.5249009728431702, 0.5745561718940735, 0.6242114305496216, 0.6738666296005249, 0.723521888256073, 0.7731771469116211, 0.8228323459625244, 0.8724876046180725, 0.9221428632736206, 0.9717980623245239, 1.0214533805847168, 1.0711085796356201, 1.1207637786865234, 1.1704189777374268, 1.2200742959976196, 1.269729495048523]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 5.0, 6.0, 10.0, 14.0, 15.0, 16.0, 38.0, 30.0, 34.0, 54.0, 61.0, 82.0, 104.0, 143.0, 236.0, 514.0, 1536068.0, 2655931.0, 492.0, 179.0, 88.0, 67.0, 36.0, 20.0, 11.0, 8.0, 8.0, 5.0, 6.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.171875, -6.0316162109375, -5.891357421875, -5.7510986328125, -5.61083984375, -5.4705810546875, -5.330322265625, -5.1900634765625, -5.0498046875, -4.9095458984375, -4.769287109375, -4.6290283203125, -4.48876953125, -4.3485107421875, -4.208251953125, -4.0679931640625, -3.927734375, -3.7874755859375, -3.647216796875, -3.5069580078125, -3.36669921875, -3.2264404296875, -3.086181640625, -2.9459228515625, -2.8056640625, -2.6654052734375, -2.525146484375, -2.3848876953125, -2.24462890625, -2.1043701171875, -1.964111328125, -1.8238525390625, -1.68359375, -1.5433349609375, -1.403076171875, -1.2628173828125, -1.12255859375, -0.9822998046875, -0.842041015625, -0.7017822265625, -0.5615234375, -0.4212646484375, -0.281005859375, -0.1407470703125, -0.00048828125, 0.1397705078125, 0.280029296875, 0.4202880859375, 0.560546875, 0.7008056640625, 0.841064453125, 0.9813232421875, 1.12158203125, 1.2618408203125, 1.402099609375, 1.5423583984375, 1.6826171875, 1.8228759765625, 1.963134765625, 2.1033935546875, 2.24365234375, 2.3839111328125, 2.524169921875, 2.6644287109375, 2.8046875]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 11.0, 33.0, 109.0, 223.0, 331.0, 199.0, 75.0, 24.0, 7.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.2305908203125, -0.22642183303833008, -0.22225284576416016, -0.21808385848999023, -0.2139148712158203, -0.2097458839416504, -0.20557689666748047, -0.20140790939331055, -0.19723892211914062, -0.1930699348449707, -0.18890094757080078, -0.18473196029663086, -0.18056297302246094, -0.17639398574829102, -0.1722249984741211, -0.16805601119995117, -0.16388702392578125, -0.15971803665161133, -0.1555490493774414, -0.15138006210327148, -0.14721107482910156, -0.14304208755493164, -0.13887310028076172, -0.1347041130065918, -0.13053512573242188, -0.12636613845825195, -0.12219715118408203, -0.11802816390991211, -0.11385917663574219, -0.10969018936157227, -0.10552120208740234, -0.10135221481323242, -0.0971832275390625, -0.09301424026489258, -0.08884525299072266, -0.08467626571655273, -0.08050727844238281, -0.07633829116821289, -0.07216930389404297, -0.06800031661987305, -0.06383132934570312, -0.0596623420715332, -0.05549335479736328, -0.05132436752319336, -0.04715538024902344, -0.042986392974853516, -0.038817405700683594, -0.03464841842651367, -0.03047943115234375, -0.026310443878173828, -0.022141456604003906, -0.017972469329833984, -0.013803482055664062, -0.00963449478149414, -0.005465507507324219, -0.0012965202331542969, 0.002872467041015625, 0.007041454315185547, 0.011210441589355469, 0.01537942886352539, 0.019548416137695312, 0.023717403411865234, 0.027886390686035156, 0.03205537796020508, 0.036224365234375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 19.0, 27.0, 42.0, 68.0, 111.0, 225.0, 903.0, 4157383.0, 34683.0, 466.0, 169.0, 85.0, 39.0, 25.0, 20.0, 11.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.7314453125, -6.408203125, -6.0849609375, -5.76171875, -5.4384765625, -5.115234375, -4.7919921875, -4.46875, -4.1455078125, -3.822265625, -3.4990234375, -3.17578125, -2.8525390625, -2.529296875, -2.2060546875, -1.8828125, -1.5595703125, -1.236328125, -0.9130859375, -0.58984375, -0.2666015625, 0.056640625, 0.3798828125, 0.703125, 1.0263671875, 1.349609375, 1.6728515625, 1.99609375, 2.3193359375, 2.642578125, 2.9658203125, 3.2890625, 3.6123046875, 3.935546875, 4.2587890625, 4.58203125, 4.9052734375, 5.228515625, 5.5517578125, 5.875, 6.1982421875, 6.521484375, 6.8447265625, 7.16796875, 7.4912109375, 7.814453125, 8.1376953125, 8.4609375, 8.7841796875, 9.107421875, 9.4306640625, 9.75390625, 10.0771484375, 10.400390625, 10.7236328125, 11.046875, 11.3701171875, 11.693359375, 12.0166015625, 12.33984375, 12.6630859375, 12.986328125, 13.3095703125, 13.6328125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 17.0, 76.0, 597.0, 2998.0, 271.0, 78.0, 29.0, 12.0, 4.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.67822265625, -0.6613922119140625, -0.644561767578125, -0.6277313232421875, -0.61090087890625, -0.5940704345703125, -0.577239990234375, -0.5604095458984375, -0.5435791015625, -0.5267486572265625, -0.509918212890625, -0.4930877685546875, -0.47625732421875, -0.4594268798828125, -0.442596435546875, -0.4257659912109375, -0.408935546875, -0.3921051025390625, -0.375274658203125, -0.3584442138671875, -0.34161376953125, -0.3247833251953125, -0.307952880859375, -0.2911224365234375, -0.2742919921875, -0.2574615478515625, -0.240631103515625, -0.2238006591796875, -0.20697021484375, -0.1901397705078125, -0.173309326171875, -0.1564788818359375, -0.1396484375, -0.1228179931640625, -0.105987548828125, -0.0891571044921875, -0.07232666015625, -0.0554962158203125, -0.038665771484375, -0.0218353271484375, -0.0050048828125, 0.0118255615234375, 0.028656005859375, 0.0454864501953125, 0.06231689453125, 0.0791473388671875, 0.095977783203125, 0.1128082275390625, 0.129638671875, 0.1464691162109375, 0.163299560546875, 0.1801300048828125, 0.19696044921875, 0.2137908935546875, 0.230621337890625, 0.2474517822265625, 0.2642822265625, 0.2811126708984375, 0.297943115234375, 0.3147735595703125, 0.33160400390625, 0.3484344482421875, 0.365264892578125, 0.3820953369140625, 0.39892578125]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 16.0, 41.0, 222.0, 510.0, 149.0, 34.0, 15.0, 10.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0], "bins": [-10.385074615478516, -10.190502166748047, -9.995928764343262, -9.801355361938477, -9.606782913208008, -9.412210464477539, -9.217637062072754, -9.023063659667969, -8.8284912109375, -8.633918762207031, -8.439345359802246, -8.244771957397461, -8.050199508666992, -7.855626583099365, -7.661053657531738, -7.466480731964111, -7.271907806396484, -7.077334880828857, -6.8827619552612305, -6.6881890296936035, -6.493616104125977, -6.29904317855835, -6.104470252990723, -5.909897327423096, -5.715324401855469, -5.520751476287842, -5.326178550720215, -5.131605625152588, -4.937032699584961, -4.742459774017334, -4.547886848449707, -4.35331392288208, -4.158741474151611, -3.9641685485839844, -3.7695956230163574, -3.5750226974487305, -3.3804497718811035, -3.1858768463134766, -2.9913039207458496, -2.7967309951782227, -2.6021580696105957, -2.4075851440429688, -2.213012218475342, -2.018439292907715, -1.823866367340088, -1.629293441772461, -1.434720516204834, -1.240147590637207, -1.04557466506958, -0.8510017395019531, -0.6564288139343262, -0.4618558883666992, -0.26728296279907227, -0.07271003723144531, 0.12186288833618164, 0.3164358139038086, 0.5110087394714355, 0.7055816650390625, 0.9001545906066895, 1.0947275161743164, 1.2893004417419434, 1.4838733673095703, 1.6784462928771973, 1.8730192184448242, 2.067592144012451]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 4.0, 11.0, 12.0, 24.0, 47.0, 50.0, 90.0, 94.0, 96.0, 131.0, 119.0, 98.0, 72.0, 55.0, 34.0, 27.0, 17.0, 9.0, 5.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4142887592315674, -1.3648885488510132, -1.315488338470459, -1.2660882472991943, -1.2166880369186401, -1.167287826538086, -1.1178877353668213, -1.068487524986267, -1.019087314605713, -0.9696871042251587, -0.9202869534492493, -0.8708868026733398, -0.8214865922927856, -0.7720863819122314, -0.722686231136322, -0.6732860803604126, -0.6238858699798584, -0.5744856595993042, -0.5250855088233948, -0.47568532824516296, -0.42628514766693115, -0.37688496708869934, -0.32748478651046753, -0.2780846059322357, -0.2286844253540039, -0.1792842447757721, -0.12988406419754028, -0.08048388361930847, -0.03108370304107666, 0.01831647753715515, 0.06771665811538696, 0.11711683869361877, 0.16651701927185059, 0.2159171998500824, 0.2653173804283142, 0.314717561006546, 0.36411774158477783, 0.41351792216300964, 0.46291810274124146, 0.5123182535171509, 0.5617184638977051, 0.6111186742782593, 0.6605188250541687, 0.7099189758300781, 0.7593191862106323, 0.8087193965911865, 0.858119547367096, 0.9075196981430054, 0.9569199085235596, 1.0063201189041138, 1.055720329284668, 1.1051204204559326, 1.1545206308364868, 1.203920841217041, 1.2533209323883057, 1.3027211427688599, 1.352121353149414, 1.4015215635299683, 1.4509217739105225, 1.500321865081787, 1.5497220754623413, 1.5991222858428955, 1.6485223770141602, 1.6979225873947144, 1.7473227977752686]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 5.0, 3.0, 9.0, 12.0, 17.0, 11.0, 16.0, 29.0, 37.0, 50.0, 54.0, 91.0, 183.0, 302.0, 781.0, 3049.0, 26968.0, 784521.0, 220657.0, 9049.0, 1562.0, 513.0, 219.0, 119.0, 75.0, 45.0, 37.0, 18.0, 20.0, 22.0, 19.0, 13.0, 6.0, 8.0, 6.0, 7.0, 3.0, 2.0, 4.0, 2.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8076171875, -1.7469329833984375, -1.686248779296875, -1.6255645751953125, -1.56488037109375, -1.5041961669921875, -1.443511962890625, -1.3828277587890625, -1.3221435546875, -1.2614593505859375, -1.200775146484375, -1.1400909423828125, -1.07940673828125, -1.0187225341796875, -0.958038330078125, -0.8973541259765625, -0.836669921875, -0.7759857177734375, -0.715301513671875, -0.6546173095703125, -0.59393310546875, -0.5332489013671875, -0.472564697265625, -0.4118804931640625, -0.3511962890625, -0.2905120849609375, -0.229827880859375, -0.1691436767578125, -0.10845947265625, -0.0477752685546875, 0.012908935546875, 0.0735931396484375, 0.13427734375, 0.1949615478515625, 0.255645751953125, 0.3163299560546875, 0.37701416015625, 0.4376983642578125, 0.498382568359375, 0.5590667724609375, 0.6197509765625, 0.6804351806640625, 0.741119384765625, 0.8018035888671875, 0.86248779296875, 0.9231719970703125, 0.983856201171875, 1.0445404052734375, 1.105224609375, 1.1659088134765625, 1.226593017578125, 1.2872772216796875, 1.34796142578125, 1.4086456298828125, 1.469329833984375, 1.5300140380859375, 1.5906982421875, 1.6513824462890625, 1.712066650390625, 1.7727508544921875, 1.83343505859375, 1.8941192626953125, 1.954803466796875, 2.0154876708984375, 2.076171875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 12.0, 38.0, 64.0, 138.0, 178.0, 232.0, 165.0, 105.0, 46.0, 16.0, 6.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.261962890625, -0.2569441795349121, -0.2519254684448242, -0.24690675735473633, -0.24188804626464844, -0.23686933517456055, -0.23185062408447266, -0.22683191299438477, -0.22181320190429688, -0.21679449081420898, -0.2117757797241211, -0.2067570686340332, -0.2017383575439453, -0.19671964645385742, -0.19170093536376953, -0.18668222427368164, -0.18166351318359375, -0.17664480209350586, -0.17162609100341797, -0.16660737991333008, -0.1615886688232422, -0.1565699577331543, -0.1515512466430664, -0.14653253555297852, -0.14151382446289062, -0.13649511337280273, -0.13147640228271484, -0.12645769119262695, -0.12143898010253906, -0.11642026901245117, -0.11140155792236328, -0.10638284683227539, -0.1013641357421875, -0.09634542465209961, -0.09132671356201172, -0.08630800247192383, -0.08128929138183594, -0.07627058029174805, -0.07125186920166016, -0.06623315811157227, -0.061214447021484375, -0.056195735931396484, -0.051177024841308594, -0.0461583137512207, -0.04113960266113281, -0.03612089157104492, -0.03110218048095703, -0.02608346939086914, -0.02106475830078125, -0.01604604721069336, -0.011027336120605469, -0.006008625030517578, -0.0009899139404296875, 0.004028797149658203, 0.009047508239746094, 0.014066219329833984, 0.019084930419921875, 0.024103641510009766, 0.029122352600097656, 0.03414106369018555, 0.03915977478027344, 0.04417848587036133, 0.04919719696044922, 0.05421590805053711, 0.059234619140625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 7.0, 5.0, 5.0, 6.0, 11.0, 16.0, 15.0, 24.0, 32.0, 29.0, 52.0, 81.0, 112.0, 150.0, 242.0, 360.0, 545.0, 1062.0, 2020.0, 4595.0, 13991.0, 59591.0, 318282.0, 495579.0, 115676.0, 23076.0, 6842.0, 2812.0, 1307.0, 733.0, 437.0, 256.0, 174.0, 134.0, 88.0, 59.0, 46.0, 24.0, 20.0, 17.0, 13.0, 8.0, 4.0, 6.0, 8.0, 5.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.51025390625, -0.492156982421875, -0.47406005859375, -0.455963134765625, -0.4378662109375, -0.419769287109375, -0.40167236328125, -0.383575439453125, -0.365478515625, -0.347381591796875, -0.32928466796875, -0.311187744140625, -0.2930908203125, -0.274993896484375, -0.25689697265625, -0.238800048828125, -0.220703125, -0.202606201171875, -0.18450927734375, -0.166412353515625, -0.1483154296875, -0.130218505859375, -0.11212158203125, -0.094024658203125, -0.075927734375, -0.057830810546875, -0.03973388671875, -0.021636962890625, -0.0035400390625, 0.014556884765625, 0.03265380859375, 0.050750732421875, 0.06884765625, 0.086944580078125, 0.10504150390625, 0.123138427734375, 0.1412353515625, 0.159332275390625, 0.17742919921875, 0.195526123046875, 0.213623046875, 0.231719970703125, 0.24981689453125, 0.267913818359375, 0.2860107421875, 0.304107666015625, 0.32220458984375, 0.340301513671875, 0.3583984375, 0.376495361328125, 0.39459228515625, 0.412689208984375, 0.4307861328125, 0.448883056640625, 0.46697998046875, 0.485076904296875, 0.503173828125, 0.521270751953125, 0.53936767578125, 0.557464599609375, 0.5755615234375, 0.593658447265625, 0.61175537109375, 0.629852294921875, 0.64794921875]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 5.0, 10.0, 9.0, 8.0, 20.0, 13.0, 20.0, 22.0, 25.0, 36.0, 40.0, 46.0, 51.0, 49.0, 51.0, 48.0, 43.0, 63.0, 66.0, 60.0, 64.0, 48.0, 42.0, 32.0, 26.0, 25.0, 25.0, 14.0, 10.0, 9.0, 5.0, 3.0, 6.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.29833984375, -0.28994178771972656, -0.2815437316894531, -0.2731456756591797, -0.26474761962890625, -0.2563495635986328, -0.24795150756835938, -0.23955345153808594, -0.2311553955078125, -0.22275733947753906, -0.21435928344726562, -0.2059612274169922, -0.19756317138671875, -0.1891651153564453, -0.18076705932617188, -0.17236900329589844, -0.163970947265625, -0.15557289123535156, -0.14717483520507812, -0.1387767791748047, -0.13037872314453125, -0.12198066711425781, -0.11358261108398438, -0.10518455505371094, -0.0967864990234375, -0.08838844299316406, -0.07999038696289062, -0.07159233093261719, -0.06319427490234375, -0.05479621887207031, -0.046398162841796875, -0.03800010681152344, -0.02960205078125, -0.021203994750976562, -0.012805938720703125, -0.0044078826904296875, 0.00399017333984375, 0.012388229370117188, 0.020786285400390625, 0.029184341430664062, 0.0375823974609375, 0.04598045349121094, 0.054378509521484375, 0.06277656555175781, 0.07117462158203125, 0.07957267761230469, 0.08797073364257812, 0.09636878967285156, 0.104766845703125, 0.11316490173339844, 0.12156295776367188, 0.1299610137939453, 0.13835906982421875, 0.1467571258544922, 0.15515518188476562, 0.16355323791503906, 0.1719512939453125, 0.18034934997558594, 0.18874740600585938, 0.1971454620361328, 0.20554351806640625, 0.2139415740966797, 0.22233963012695312, 0.23073768615722656, 0.2391357421875]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 8.0, 6.0, 19.0, 27.0, 43.0, 80.0, 154.0, 317.0, 698.0, 3330.0, 128839.0, 904774.0, 8252.0, 1181.0, 400.0, 211.0, 109.0, 42.0, 25.0, 12.0, 9.0, 6.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.072265625, -2.0149993896484375, -1.957733154296875, -1.9004669189453125, -1.84320068359375, -1.7859344482421875, -1.728668212890625, -1.6714019775390625, -1.6141357421875, -1.5568695068359375, -1.499603271484375, -1.4423370361328125, -1.38507080078125, -1.3278045654296875, -1.270538330078125, -1.2132720947265625, -1.156005859375, -1.0987396240234375, -1.041473388671875, -0.9842071533203125, -0.92694091796875, -0.8696746826171875, -0.812408447265625, -0.7551422119140625, -0.6978759765625, -0.6406097412109375, -0.583343505859375, -0.5260772705078125, -0.46881103515625, -0.4115447998046875, -0.354278564453125, -0.2970123291015625, -0.23974609375, -0.1824798583984375, -0.125213623046875, -0.0679473876953125, -0.01068115234375, 0.0465850830078125, 0.103851318359375, 0.1611175537109375, 0.2183837890625, 0.2756500244140625, 0.332916259765625, 0.3901824951171875, 0.44744873046875, 0.5047149658203125, 0.561981201171875, 0.6192474365234375, 0.676513671875, 0.7337799072265625, 0.791046142578125, 0.8483123779296875, 0.90557861328125, 0.9628448486328125, 1.020111083984375, 1.0773773193359375, 1.1346435546875, 1.1919097900390625, 1.249176025390625, 1.3064422607421875, 1.36370849609375, 1.4209747314453125, 1.478240966796875, 1.5355072021484375, 1.5927734375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 4.0, 7.0, 2.0, 9.0, 9.0, 6.0, 11.0, 6.0, 16.0, 24.0, 17.0, 38.0, 34.0, 38.0, 57.0, 71.0, 81.0, 86.0, 87.0, 75.0, 54.0, 52.0, 44.0, 33.0, 25.0, 18.0, 15.0, 12.0, 17.0, 7.0, 10.0, 10.0, 4.0, 7.0, 6.0, 0.0, 3.0, 4.0, 2.0, 1.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011813640594482422, -0.00011442694813013077, -0.00011071749031543732, -0.00010700803250074387, -0.00010329857468605042, -9.958911687135696e-05, -9.587965905666351e-05, -9.217020124197006e-05, -8.846074342727661e-05, -8.475128561258316e-05, -8.104182779788971e-05, -7.733236998319626e-05, -7.362291216850281e-05, -6.991345435380936e-05, -6.62039965391159e-05, -6.249453872442245e-05, -5.8785080909729004e-05, -5.507562309503555e-05, -5.13661652803421e-05, -4.765670746564865e-05, -4.39472496509552e-05, -4.023779183626175e-05, -3.65283340215683e-05, -3.281887620687485e-05, -2.9109418392181396e-05, -2.5399960577487946e-05, -2.1690502762794495e-05, -1.7981044948101044e-05, -1.4271587133407593e-05, -1.0562129318714142e-05, -6.852671504020691e-06, -3.14321368932724e-06, 5.662441253662109e-07, 4.275701940059662e-06, 7.985159754753113e-06, 1.1694617569446564e-05, 1.5404075384140015e-05, 1.9113533198833466e-05, 2.2822991013526917e-05, 2.6532448828220367e-05, 3.024190664291382e-05, 3.395136445760727e-05, 3.766082227230072e-05, 4.137028008699417e-05, 4.507973790168762e-05, 4.878919571638107e-05, 5.2498653531074524e-05, 5.6208111345767975e-05, 5.9917569160461426e-05, 6.362702697515488e-05, 6.733648478984833e-05, 7.104594260454178e-05, 7.475540041923523e-05, 7.846485823392868e-05, 8.217431604862213e-05, 8.588377386331558e-05, 8.959323167800903e-05, 9.330268949270248e-05, 9.701214730739594e-05, 0.00010072160512208939, 0.00010443106293678284, 0.00010814052075147629, 0.00011184997856616974, 0.00011555943638086319, 0.00011926889419555664]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 9.0, 6.0, 3.0, 12.0, 16.0, 28.0, 51.0, 78.0, 131.0, 223.0, 426.0, 876.0, 2669.0, 10718.0, 87232.0, 831652.0, 98520.0, 11342.0, 2775.0, 923.0, 376.0, 179.0, 112.0, 62.0, 39.0, 25.0, 15.0, 12.0, 9.0, 7.0, 6.0, 5.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.77294921875, -0.74676513671875, -0.7205810546875, -0.69439697265625, -0.668212890625, -0.64202880859375, -0.6158447265625, -0.58966064453125, -0.5634765625, -0.53729248046875, -0.5111083984375, -0.48492431640625, -0.458740234375, -0.43255615234375, -0.4063720703125, -0.38018798828125, -0.35400390625, -0.32781982421875, -0.3016357421875, -0.27545166015625, -0.249267578125, -0.22308349609375, -0.1968994140625, -0.17071533203125, -0.14453125, -0.11834716796875, -0.0921630859375, -0.06597900390625, -0.039794921875, -0.01361083984375, 0.0125732421875, 0.03875732421875, 0.06494140625, 0.09112548828125, 0.1173095703125, 0.14349365234375, 0.169677734375, 0.19586181640625, 0.2220458984375, 0.24822998046875, 0.2744140625, 0.30059814453125, 0.3267822265625, 0.35296630859375, 0.379150390625, 0.40533447265625, 0.4315185546875, 0.45770263671875, 0.48388671875, 0.51007080078125, 0.5362548828125, 0.56243896484375, 0.588623046875, 0.61480712890625, 0.6409912109375, 0.66717529296875, 0.693359375, 0.71954345703125, 0.7457275390625, 0.77191162109375, 0.798095703125, 0.82427978515625, 0.8504638671875, 0.87664794921875, 0.90283203125]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 3.0, 5.0, 5.0, 7.0, 10.0, 14.0, 28.0, 54.0, 81.0, 109.0, 154.0, 147.0, 114.0, 100.0, 67.0, 43.0, 15.0, 15.0, 7.0, 8.0, 3.0, 6.0, 3.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.445068359375, -0.4340171813964844, -0.42296600341796875, -0.4119148254394531, -0.4008636474609375, -0.3898124694824219, -0.37876129150390625, -0.3677101135253906, -0.356658935546875, -0.3456077575683594, -0.33455657958984375, -0.3235054016113281, -0.3124542236328125, -0.3014030456542969, -0.29035186767578125, -0.2793006896972656, -0.26824951171875, -0.2571983337402344, -0.24614715576171875, -0.23509597778320312, -0.2240447998046875, -0.21299362182617188, -0.20194244384765625, -0.19089126586914062, -0.179840087890625, -0.16878890991210938, -0.15773773193359375, -0.14668655395507812, -0.1356353759765625, -0.12458419799804688, -0.11353302001953125, -0.10248184204101562, -0.0914306640625, -0.08037948608398438, -0.06932830810546875, -0.058277130126953125, -0.0472259521484375, -0.036174774169921875, -0.02512359619140625, -0.014072418212890625, -0.003021240234375, 0.008029937744140625, 0.01908111572265625, 0.030132293701171875, 0.0411834716796875, 0.052234649658203125, 0.06328582763671875, 0.07433700561523438, 0.08538818359375, 0.09643936157226562, 0.10749053955078125, 0.11854171752929688, 0.1295928955078125, 0.14064407348632812, 0.15169525146484375, 0.16274642944335938, 0.173797607421875, 0.18484878540039062, 0.19589996337890625, 0.20695114135742188, 0.2180023193359375, 0.22905349731445312, 0.24010467529296875, 0.2511558532714844, 0.26220703125]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 2.0, 6.0, 10.0, 13.0, 39.0, 97.0, 198.0, 272.0, 179.0, 117.0, 47.0, 14.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-7.4218430519104, -7.24770975112915, -7.0735764503479, -6.899442672729492, -6.725309371948242, -6.551176071166992, -6.377042770385742, -6.202909469604492, -6.028775691986084, -5.854642391204834, -5.680509090423584, -5.506375312805176, -5.332242012023926, -5.158108711242676, -4.983975410461426, -4.809842109680176, -4.635708808898926, -4.461575508117676, -4.287442207336426, -4.113308429718018, -3.9391751289367676, -3.7650418281555176, -3.5909085273742676, -3.4167749881744385, -3.2426414489746094, -3.0685081481933594, -2.8943746089935303, -2.7202413082122803, -2.546107769012451, -2.371974468231201, -2.197841167449951, -2.023707628250122, -1.8495738506317139, -1.6754404306411743, -1.5013070106506348, -1.3271737098693848, -1.1530401706695557, -0.9789068102836609, -0.8047734498977661, -0.6306400299072266, -0.456506609916687, -0.28237318992614746, -0.1082397997379303, 0.06589359045028687, 0.24002701044082642, 0.41416043043136597, 0.5882937908172607, 0.7624272108078003, 0.9365606307983398, 1.1106940507888794, 1.284827470779419, 1.458960771560669, 1.633094310760498, 1.807227611541748, 1.9813610315322876, 2.155494451522827, 2.3296279907226562, 2.5037612915039062, 2.6778948307037354, 2.8520281314849854, 3.0261616706848145, 3.2002949714660645, 3.3744282722473145, 3.5485618114471436, 3.7226951122283936]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 2.0, 7.0, 6.0, 8.0, 14.0, 21.0, 13.0, 32.0, 29.0, 24.0, 44.0, 48.0, 54.0, 49.0, 45.0, 64.0, 57.0, 53.0, 61.0, 40.0, 54.0, 48.0, 39.0, 38.0, 26.0, 27.0, 17.0, 13.0, 24.0, 11.0, 11.0, 7.0, 8.0, 1.0, 4.0, 2.0, 3.0, 0.0, 1.0, 2.0, 4.0], "bins": [-2.2807202339172363, -2.2223715782165527, -2.164022922515869, -2.1056742668151855, -2.047325611114502, -1.9889768362045288, -1.9306280612945557, -1.872279405593872, -1.8139307498931885, -1.7555820941925049, -1.6972334384918213, -1.6388846635818481, -1.5805360078811646, -1.522187352180481, -1.4638385772705078, -1.4054899215698242, -1.3471412658691406, -1.288792610168457, -1.2304439544677734, -1.1720951795578003, -1.1137465238571167, -1.055397868156433, -0.9970491528511047, -0.9387004375457764, -0.8803517818450928, -0.8220031261444092, -0.7636544108390808, -0.7053056955337524, -0.6469570398330688, -0.5886083841323853, -0.5302596688270569, -0.4719109833240509, -0.4135622978210449, -0.35521361231803894, -0.29686492681503296, -0.23851624131202698, -0.180167555809021, -0.12181887030601501, -0.06347018480300903, -0.005121499300003052, 0.05322718620300293, 0.11157587170600891, 0.1699245572090149, 0.22827324271202087, 0.28662192821502686, 0.34497061371803284, 0.4033192992210388, 0.4616679847240448, 0.5200166702270508, 0.5783653259277344, 0.6367140412330627, 0.6950627565383911, 0.7534114122390747, 0.8117600679397583, 0.8701087832450867, 0.928457498550415, 0.9868061542510986, 1.0451548099517822, 1.1035034656524658, 1.161852240562439, 1.2202008962631226, 1.2785495519638062, 1.3368983268737793, 1.395246982574463, 1.4535956382751465]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 5.0, 3.0, 2.0, 7.0, 5.0, 13.0, 13.0, 15.0, 23.0, 17.0, 18.0, 24.0, 35.0, 33.0, 61.0, 63.0, 98.0, 144.0, 218.0, 589.0, 1993.0, 16505.0, 3264047.0, 893623.0, 14081.0, 1825.0, 425.0, 167.0, 72.0, 48.0, 34.0, 21.0, 18.0, 14.0, 6.0, 6.0, 1.0, 4.0, 3.0, 4.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.751953125, -2.6732177734375, -2.594482421875, -2.5157470703125, -2.43701171875, -2.3582763671875, -2.279541015625, -2.2008056640625, -2.1220703125, -2.0433349609375, -1.964599609375, -1.8858642578125, -1.80712890625, -1.7283935546875, -1.649658203125, -1.5709228515625, -1.4921875, -1.4134521484375, -1.334716796875, -1.2559814453125, -1.17724609375, -1.0985107421875, -1.019775390625, -0.9410400390625, -0.8623046875, -0.7835693359375, -0.704833984375, -0.6260986328125, -0.54736328125, -0.4686279296875, -0.389892578125, -0.3111572265625, -0.232421875, -0.1536865234375, -0.074951171875, 0.0037841796875, 0.08251953125, 0.1612548828125, 0.239990234375, 0.3187255859375, 0.3974609375, 0.4761962890625, 0.554931640625, 0.6336669921875, 0.71240234375, 0.7911376953125, 0.869873046875, 0.9486083984375, 1.02734375, 1.1060791015625, 1.184814453125, 1.2635498046875, 1.34228515625, 1.4210205078125, 1.499755859375, 1.5784912109375, 1.6572265625, 1.7359619140625, 1.814697265625, 1.8934326171875, 1.97216796875, 2.0509033203125, 2.129638671875, 2.2083740234375, 2.287109375]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 11.0, 33.0, 78.0, 126.0, 142.0, 193.0, 159.0, 117.0, 76.0, 37.0, 18.0, 11.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.250244140625, -0.24529647827148438, -0.24034881591796875, -0.23540115356445312, -0.2304534912109375, -0.22550582885742188, -0.22055816650390625, -0.21561050415039062, -0.210662841796875, -0.20571517944335938, -0.20076751708984375, -0.19581985473632812, -0.1908721923828125, -0.18592453002929688, -0.18097686767578125, -0.17602920532226562, -0.17108154296875, -0.16613388061523438, -0.16118621826171875, -0.15623855590820312, -0.1512908935546875, -0.14634323120117188, -0.14139556884765625, -0.13644790649414062, -0.131500244140625, -0.12655258178710938, -0.12160491943359375, -0.11665725708007812, -0.1117095947265625, -0.10676193237304688, -0.10181427001953125, -0.09686660766601562, -0.0919189453125, -0.08697128295898438, -0.08202362060546875, -0.07707595825195312, -0.0721282958984375, -0.06718063354492188, -0.06223297119140625, -0.057285308837890625, -0.052337646484375, -0.047389984130859375, -0.04244232177734375, -0.037494659423828125, -0.0325469970703125, -0.027599334716796875, -0.02265167236328125, -0.017704010009765625, -0.01275634765625, -0.007808685302734375, -0.00286102294921875, 0.002086639404296875, 0.0070343017578125, 0.011981964111328125, 0.01692962646484375, 0.021877288818359375, 0.026824951171875, 0.031772613525390625, 0.03672027587890625, 0.041667938232421875, 0.0466156005859375, 0.051563262939453125, 0.05651092529296875, 0.061458587646484375, 0.06640625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 6.0, 4.0, 10.0, 13.0, 12.0, 20.0, 30.0, 49.0, 47.0, 92.0, 202.0, 609.0, 2918.0, 3919008.0, 268346.0, 2044.0, 491.0, 160.0, 64.0, 46.0, 39.0, 22.0, 19.0, 9.0, 10.0, 8.0, 3.0, 3.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3046875, -7.08935546875, -6.8740234375, -6.65869140625, -6.443359375, -6.22802734375, -6.0126953125, -5.79736328125, -5.58203125, -5.36669921875, -5.1513671875, -4.93603515625, -4.720703125, -4.50537109375, -4.2900390625, -4.07470703125, -3.859375, -3.64404296875, -3.4287109375, -3.21337890625, -2.998046875, -2.78271484375, -2.5673828125, -2.35205078125, -2.13671875, -1.92138671875, -1.7060546875, -1.49072265625, -1.275390625, -1.06005859375, -0.8447265625, -0.62939453125, -0.4140625, -0.19873046875, 0.0166015625, 0.23193359375, 0.447265625, 0.66259765625, 0.8779296875, 1.09326171875, 1.30859375, 1.52392578125, 1.7392578125, 1.95458984375, 2.169921875, 2.38525390625, 2.6005859375, 2.81591796875, 3.03125, 3.24658203125, 3.4619140625, 3.67724609375, 3.892578125, 4.10791015625, 4.3232421875, 4.53857421875, 4.75390625, 4.96923828125, 5.1845703125, 5.39990234375, 5.615234375, 5.83056640625, 6.0458984375, 6.26123046875, 6.4765625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 12.0, 15.0, 33.0, 109.0, 468.0, 2817.0, 453.0, 124.0, 27.0, 16.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.73828125, -0.7204818725585938, -0.7026824951171875, -0.6848831176757812, -0.667083740234375, -0.6492843627929688, -0.6314849853515625, -0.6136856079101562, -0.59588623046875, -0.5780868530273438, -0.5602874755859375, -0.5424880981445312, -0.524688720703125, -0.5068893432617188, -0.4890899658203125, -0.47129058837890625, -0.4534912109375, -0.43569183349609375, -0.4178924560546875, -0.40009307861328125, -0.382293701171875, -0.36449432373046875, -0.3466949462890625, -0.32889556884765625, -0.31109619140625, -0.29329681396484375, -0.2754974365234375, -0.25769805908203125, -0.239898681640625, -0.22209930419921875, -0.2042999267578125, -0.18650054931640625, -0.168701171875, -0.15090179443359375, -0.1331024169921875, -0.11530303955078125, -0.097503662109375, -0.07970428466796875, -0.0619049072265625, -0.04410552978515625, -0.02630615234375, -0.00850677490234375, 0.0092926025390625, 0.02709197998046875, 0.044891357421875, 0.06269073486328125, 0.0804901123046875, 0.09828948974609375, 0.1160888671875, 0.13388824462890625, 0.1516876220703125, 0.16948699951171875, 0.187286376953125, 0.20508575439453125, 0.2228851318359375, 0.24068450927734375, 0.25848388671875, 0.27628326416015625, 0.2940826416015625, 0.31188201904296875, 0.329681396484375, 0.34748077392578125, 0.3652801513671875, 0.38307952880859375, 0.40087890625]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 3.0, 3.0, 12.0, 11.0, 24.0, 55.0, 106.0, 164.0, 215.0, 174.0, 109.0, 46.0, 29.0, 18.0, 9.0, 7.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.0067596435546875, -3.910369396209717, -3.813978910446167, -3.7175886631011963, -3.6211984157562256, -3.524807929992676, -3.428417682647705, -3.3320274353027344, -3.2356371879577637, -3.139246940612793, -3.042856454849243, -2.9464662075042725, -2.8500759601593018, -2.753685474395752, -2.6572952270507812, -2.5609049797058105, -2.4645144939422607, -2.36812424659729, -2.2717337608337402, -2.1753435134887695, -2.078953266143799, -1.9825628995895386, -1.8861725330352783, -1.7897822856903076, -1.6933919191360474, -1.597001552581787, -1.5006113052368164, -1.4042209386825562, -1.307830572128296, -1.2114403247833252, -1.115049958229065, -1.0186595916748047, -0.9222695827484131, -0.8258792757987976, -0.7294889688491821, -0.6330986022949219, -0.5367082953453064, -0.4403179883956909, -0.34392762184143066, -0.24753731489181519, -0.1511470079421997, -0.054756686091423035, 0.04163363575935364, 0.1380239725112915, 0.23441427946090698, 0.33080458641052246, 0.4271949529647827, 0.5235852599143982, 0.6199755668640137, 0.7163658738136292, 0.8127561807632446, 0.9091465473175049, 1.0055367946624756, 1.1019271612167358, 1.198317527770996, 1.2947077751159668, 1.391098141670227, 1.4874885082244873, 1.583878755569458, 1.6802691221237183, 1.7766594886779785, 1.8730497360229492, 1.9694401025772095, 2.0658304691314697, 2.1622207164764404]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 2.0, 3.0, 7.0, 9.0, 11.0, 16.0, 19.0, 27.0, 38.0, 41.0, 35.0, 68.0, 64.0, 72.0, 53.0, 60.0, 82.0, 62.0, 51.0, 62.0, 42.0, 32.0, 35.0, 25.0, 28.0, 13.0, 12.0, 12.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0176641941070557, -0.9888597130775452, -0.9600552916526794, -0.931250810623169, -0.9024463295936584, -0.873641848564148, -0.8448374271392822, -0.8160329461097717, -0.7872284650802612, -0.7584239840507507, -0.729619562625885, -0.7008150815963745, -0.672010600566864, -0.6432061195373535, -0.6144016981124878, -0.5855972170829773, -0.5567927360534668, -0.5279882550239563, -0.4991838037967682, -0.4703793525695801, -0.4415748715400696, -0.41277042031288147, -0.38396596908569336, -0.35516148805618286, -0.32635706663131714, -0.29755261540412903, -0.26874813437461853, -0.23994368314743042, -0.21113920211791992, -0.1823347508907318, -0.1535302847623825, -0.1247258186340332, -0.0959213376045227, -0.0671168714761734, -0.038312409073114395, -0.00950794667005539, 0.019296519458293915, 0.04810097813606262, 0.07690544426441193, 0.10570991039276123, 0.13451437652111053, 0.16331884264945984, 0.19212330877780914, 0.22092777490615845, 0.24973222613334656, 0.27853667736053467, 0.30734115839004517, 0.33614563941955566, 0.3649500906467438, 0.3937545418739319, 0.4225590229034424, 0.4513634741306305, 0.480167955160141, 0.5089724063873291, 0.5377768874168396, 0.5665813684463501, 0.5953857898712158, 0.6241902709007263, 0.652994692325592, 0.6817991733551025, 0.710603654384613, 0.7394081354141235, 0.7682125568389893, 0.7970170378684998, 0.8258215188980103]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 4.0, 7.0, 4.0, 11.0, 21.0, 12.0, 36.0, 35.0, 82.0, 96.0, 214.0, 449.0, 1202.0, 5922.0, 76843.0, 871069.0, 84084.0, 6085.0, 1280.0, 502.0, 242.0, 122.0, 81.0, 48.0, 29.0, 18.0, 20.0, 12.0, 13.0, 6.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.958984375, -1.892608642578125, -1.82623291015625, -1.759857177734375, -1.6934814453125, -1.627105712890625, -1.56072998046875, -1.494354248046875, -1.427978515625, -1.361602783203125, -1.29522705078125, -1.228851318359375, -1.1624755859375, -1.096099853515625, -1.02972412109375, -0.963348388671875, -0.89697265625, -0.830596923828125, -0.76422119140625, -0.697845458984375, -0.6314697265625, -0.565093994140625, -0.49871826171875, -0.432342529296875, -0.365966796875, -0.299591064453125, -0.23321533203125, -0.166839599609375, -0.1004638671875, -0.034088134765625, 0.03228759765625, 0.098663330078125, 0.1650390625, 0.231414794921875, 0.29779052734375, 0.364166259765625, 0.4305419921875, 0.496917724609375, 0.56329345703125, 0.629669189453125, 0.696044921875, 0.762420654296875, 0.82879638671875, 0.895172119140625, 0.9615478515625, 1.027923583984375, 1.09429931640625, 1.160675048828125, 1.22705078125, 1.293426513671875, 1.35980224609375, 1.426177978515625, 1.4925537109375, 1.558929443359375, 1.62530517578125, 1.691680908203125, 1.758056640625, 1.824432373046875, 1.89080810546875, 1.957183837890625, 2.0235595703125, 2.089935302734375, 2.15631103515625, 2.222686767578125, 2.2890625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 6.0, 15.0, 35.0, 62.0, 76.0, 110.0, 139.0, 170.0, 140.0, 95.0, 66.0, 30.0, 22.0, 23.0, 13.0, 6.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.230224609375, -0.22546005249023438, -0.22069549560546875, -0.21593093872070312, -0.2111663818359375, -0.20640182495117188, -0.20163726806640625, -0.19687271118164062, -0.192108154296875, -0.18734359741210938, -0.18257904052734375, -0.17781448364257812, -0.1730499267578125, -0.16828536987304688, -0.16352081298828125, -0.15875625610351562, -0.15399169921875, -0.14922714233398438, -0.14446258544921875, -0.13969802856445312, -0.1349334716796875, -0.13016891479492188, -0.12540435791015625, -0.12063980102539062, -0.115875244140625, -0.11111068725585938, -0.10634613037109375, -0.10158157348632812, -0.0968170166015625, -0.09205245971679688, -0.08728790283203125, -0.08252334594726562, -0.0777587890625, -0.07299423217773438, -0.06822967529296875, -0.06346511840820312, -0.0587005615234375, -0.053936004638671875, -0.04917144775390625, -0.044406890869140625, -0.039642333984375, -0.034877777099609375, -0.03011322021484375, -0.025348663330078125, -0.0205841064453125, -0.015819549560546875, -0.01105499267578125, -0.006290435791015625, -0.00152587890625, 0.003238677978515625, 0.00800323486328125, 0.012767791748046875, 0.0175323486328125, 0.022296905517578125, 0.02706146240234375, 0.031826019287109375, 0.036590576171875, 0.041355133056640625, 0.04611968994140625, 0.050884246826171875, 0.0556488037109375, 0.060413360595703125, 0.06517791748046875, 0.06994247436523438, 0.07470703125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 3.0, 8.0, 4.0, 8.0, 10.0, 24.0, 16.0, 15.0, 29.0, 33.0, 54.0, 74.0, 110.0, 158.0, 253.0, 384.0, 696.0, 1320.0, 2645.0, 6691.0, 20248.0, 95108.0, 499867.0, 340031.0, 56742.0, 14190.0, 5029.0, 2193.0, 1024.0, 592.0, 325.0, 207.0, 127.0, 98.0, 68.0, 50.0, 41.0, 20.0, 17.0, 12.0, 8.0, 5.0, 7.0, 4.0, 6.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.8017578125, -0.7773513793945312, -0.7529449462890625, -0.7285385131835938, -0.704132080078125, -0.6797256469726562, -0.6553192138671875, -0.6309127807617188, -0.60650634765625, -0.5820999145507812, -0.5576934814453125, -0.5332870483398438, -0.508880615234375, -0.48447418212890625, -0.4600677490234375, -0.43566131591796875, -0.4112548828125, -0.38684844970703125, -0.3624420166015625, -0.33803558349609375, -0.313629150390625, -0.28922271728515625, -0.2648162841796875, -0.24040985107421875, -0.21600341796875, -0.19159698486328125, -0.1671905517578125, -0.14278411865234375, -0.118377685546875, -0.09397125244140625, -0.0695648193359375, -0.04515838623046875, -0.020751953125, 0.00365447998046875, 0.0280609130859375, 0.05246734619140625, 0.076873779296875, 0.10128021240234375, 0.1256866455078125, 0.15009307861328125, 0.17449951171875, 0.19890594482421875, 0.2233123779296875, 0.24771881103515625, 0.272125244140625, 0.29653167724609375, 0.3209381103515625, 0.34534454345703125, 0.3697509765625, 0.39415740966796875, 0.4185638427734375, 0.44297027587890625, 0.467376708984375, 0.49178314208984375, 0.5161895751953125, 0.5405960083007812, 0.56500244140625, 0.5894088745117188, 0.6138153076171875, 0.6382217407226562, 0.662628173828125, 0.6870346069335938, 0.7114410400390625, 0.7358474731445312, 0.76025390625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 4.0, 2.0, 2.0, 4.0, 3.0, 4.0, 7.0, 5.0, 8.0, 9.0, 8.0, 8.0, 24.0, 17.0, 21.0, 25.0, 30.0, 28.0, 34.0, 31.0, 43.0, 34.0, 40.0, 50.0, 46.0, 36.0, 50.0, 47.0, 44.0, 43.0, 38.0, 39.0, 30.0, 38.0, 27.0, 17.0, 15.0, 18.0, 11.0, 7.0, 9.0, 9.0, 7.0, 6.0, 5.0, 7.0, 6.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.2264404296875, -0.21875572204589844, -0.21107101440429688, -0.2033863067626953, -0.19570159912109375, -0.1880168914794922, -0.18033218383789062, -0.17264747619628906, -0.1649627685546875, -0.15727806091308594, -0.14959335327148438, -0.1419086456298828, -0.13422393798828125, -0.1265392303466797, -0.11885452270507812, -0.11116981506347656, -0.103485107421875, -0.09580039978027344, -0.08811569213867188, -0.08043098449707031, -0.07274627685546875, -0.06506156921386719, -0.057376861572265625, -0.04969215393066406, -0.0420074462890625, -0.03432273864746094, -0.026638031005859375, -0.018953323364257812, -0.01126861572265625, -0.0035839080810546875, 0.004100799560546875, 0.011785507202148438, 0.01947021484375, 0.027154922485351562, 0.034839630126953125, 0.04252433776855469, 0.05020904541015625, 0.05789375305175781, 0.06557846069335938, 0.07326316833496094, 0.0809478759765625, 0.08863258361816406, 0.09631729125976562, 0.10400199890136719, 0.11168670654296875, 0.11937141418457031, 0.12705612182617188, 0.13474082946777344, 0.142425537109375, 0.15011024475097656, 0.15779495239257812, 0.1654796600341797, 0.17316436767578125, 0.1808490753173828, 0.18853378295898438, 0.19621849060058594, 0.2039031982421875, 0.21158790588378906, 0.21927261352539062, 0.2269573211669922, 0.23464202880859375, 0.2423267364501953, 0.2500114440917969, 0.25769615173339844, 0.265380859375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 6.0, 8.0, 6.0, 13.0, 21.0, 24.0, 33.0, 44.0, 78.0, 123.0, 193.0, 457.0, 942.0, 2254.0, 6448.0, 22241.0, 113931.0, 566597.0, 274424.0, 43709.0, 10749.0, 3563.0, 1351.0, 622.0, 286.0, 156.0, 94.0, 54.0, 34.0, 34.0, 18.0, 17.0, 9.0, 6.0, 5.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.43798828125, -0.4241828918457031, -0.41037750244140625, -0.3965721130371094, -0.3827667236328125, -0.3689613342285156, -0.35515594482421875, -0.3413505554199219, -0.327545166015625, -0.3137397766113281, -0.29993438720703125, -0.2861289978027344, -0.2723236083984375, -0.2585182189941406, -0.24471282958984375, -0.23090744018554688, -0.21710205078125, -0.20329666137695312, -0.18949127197265625, -0.17568588256835938, -0.1618804931640625, -0.14807510375976562, -0.13426971435546875, -0.12046432495117188, -0.106658935546875, -0.09285354614257812, -0.07904815673828125, -0.06524276733398438, -0.0514373779296875, -0.037631988525390625, -0.02382659912109375, -0.010021209716796875, 0.0037841796875, 0.017589569091796875, 0.03139495849609375, 0.045200347900390625, 0.0590057373046875, 0.07281112670898438, 0.08661651611328125, 0.10042190551757812, 0.114227294921875, 0.12803268432617188, 0.14183807373046875, 0.15564346313476562, 0.1694488525390625, 0.18325424194335938, 0.19705963134765625, 0.21086502075195312, 0.22467041015625, 0.23847579956054688, 0.25228118896484375, 0.2660865783691406, 0.2798919677734375, 0.2936973571777344, 0.30750274658203125, 0.3213081359863281, 0.335113525390625, 0.3489189147949219, 0.36272430419921875, 0.3765296936035156, 0.3903350830078125, 0.4041404724121094, 0.41794586181640625, 0.4317512512207031, 0.445556640625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 8.0, 8.0, 9.0, 5.0, 11.0, 10.0, 12.0, 12.0, 24.0, 19.0, 34.0, 24.0, 42.0, 44.0, 63.0, 50.0, 63.0, 64.0, 78.0, 62.0, 52.0, 43.0, 41.0, 41.0, 37.0, 24.0, 27.0, 23.0, 16.0, 12.0, 8.0, 9.0, 4.0, 6.0, 7.0, 1.0, 1.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0], "bins": [-9.018182754516602e-05, -8.776038885116577e-05, -8.533895015716553e-05, -8.291751146316528e-05, -8.049607276916504e-05, -7.80746340751648e-05, -7.565319538116455e-05, -7.32317566871643e-05, -7.081031799316406e-05, -6.838887929916382e-05, -6.596744060516357e-05, -6.354600191116333e-05, -6.112456321716309e-05, -5.870312452316284e-05, -5.62816858291626e-05, -5.3860247135162354e-05, -5.143880844116211e-05, -4.9017369747161865e-05, -4.659593105316162e-05, -4.417449235916138e-05, -4.175305366516113e-05, -3.933161497116089e-05, -3.6910176277160645e-05, -3.44887375831604e-05, -3.2067298889160156e-05, -2.9645860195159912e-05, -2.7224421501159668e-05, -2.4802982807159424e-05, -2.238154411315918e-05, -1.9960105419158936e-05, -1.753866672515869e-05, -1.5117228031158447e-05, -1.2695789337158203e-05, -1.0274350643157959e-05, -7.852911949157715e-06, -5.431473255157471e-06, -3.0100345611572266e-06, -5.885958671569824e-07, 1.8328428268432617e-06, 4.254281520843506e-06, 6.67572021484375e-06, 9.097158908843994e-06, 1.1518597602844238e-05, 1.3940036296844482e-05, 1.6361474990844727e-05, 1.878291368484497e-05, 2.1204352378845215e-05, 2.362579107284546e-05, 2.6047229766845703e-05, 2.8468668460845947e-05, 3.089010715484619e-05, 3.3311545848846436e-05, 3.573298454284668e-05, 3.8154423236846924e-05, 4.057586193084717e-05, 4.299730062484741e-05, 4.5418739318847656e-05, 4.78401780128479e-05, 5.0261616706848145e-05, 5.268305540084839e-05, 5.510449409484863e-05, 5.752593278884888e-05, 5.994737148284912e-05, 6.236881017684937e-05, 6.479024887084961e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 6.0, 8.0, 8.0, 12.0, 6.0, 28.0, 33.0, 48.0, 120.0, 195.0, 427.0, 1096.0, 3248.0, 13414.0, 105775.0, 747449.0, 153504.0, 17184.0, 3776.0, 1264.0, 470.0, 219.0, 112.0, 57.0, 43.0, 17.0, 10.0, 12.0, 3.0, 6.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.446044921875, -0.4271202087402344, -0.40819549560546875, -0.3892707824707031, -0.3703460693359375, -0.3514213562011719, -0.33249664306640625, -0.3135719299316406, -0.294647216796875, -0.2757225036621094, -0.25679779052734375, -0.23787307739257812, -0.2189483642578125, -0.20002365112304688, -0.18109893798828125, -0.16217422485351562, -0.14324951171875, -0.12432479858398438, -0.10540008544921875, -0.08647537231445312, -0.0675506591796875, -0.048625946044921875, -0.02970123291015625, -0.010776519775390625, 0.008148193359375, 0.027072906494140625, 0.04599761962890625, 0.06492233276367188, 0.0838470458984375, 0.10277175903320312, 0.12169647216796875, 0.14062118530273438, 0.1595458984375, 0.17847061157226562, 0.19739532470703125, 0.21632003784179688, 0.2352447509765625, 0.2541694641113281, 0.27309417724609375, 0.2920188903808594, 0.310943603515625, 0.3298683166503906, 0.34879302978515625, 0.3677177429199219, 0.3866424560546875, 0.4055671691894531, 0.42449188232421875, 0.4434165954589844, 0.46234130859375, 0.4812660217285156, 0.5001907348632812, 0.5191154479980469, 0.5380401611328125, 0.5569648742675781, 0.5758895874023438, 0.5948143005371094, 0.613739013671875, 0.6326637268066406, 0.6515884399414062, 0.6705131530761719, 0.6894378662109375, 0.7083625793457031, 0.7272872924804688, 0.7462120056152344, 0.76513671875]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 5.0, 2.0, 0.0, 6.0, 4.0, 6.0, 3.0, 15.0, 16.0, 27.0, 25.0, 52.0, 47.0, 85.0, 81.0, 115.0, 93.0, 100.0, 81.0, 64.0, 53.0, 31.0, 30.0, 19.0, 20.0, 7.0, 5.0, 4.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25244140625, -0.24381637573242188, -0.23519134521484375, -0.22656631469726562, -0.2179412841796875, -0.20931625366210938, -0.20069122314453125, -0.19206619262695312, -0.183441162109375, -0.17481613159179688, -0.16619110107421875, -0.15756607055664062, -0.1489410400390625, -0.14031600952148438, -0.13169097900390625, -0.12306594848632812, -0.11444091796875, -0.10581588745117188, -0.09719085693359375, -0.08856582641601562, -0.0799407958984375, -0.07131576538085938, -0.06269073486328125, -0.054065704345703125, -0.045440673828125, -0.036815643310546875, -0.02819061279296875, -0.019565582275390625, -0.0109405517578125, -0.002315521240234375, 0.00630950927734375, 0.014934539794921875, 0.0235595703125, 0.032184600830078125, 0.04080963134765625, 0.049434661865234375, 0.0580596923828125, 0.06668472290039062, 0.07530975341796875, 0.08393478393554688, 0.092559814453125, 0.10118484497070312, 0.10980987548828125, 0.11843490600585938, 0.1270599365234375, 0.13568496704101562, 0.14430999755859375, 0.15293502807617188, 0.16156005859375, 0.17018508911132812, 0.17881011962890625, 0.18743515014648438, 0.1960601806640625, 0.20468521118164062, 0.21331024169921875, 0.22193527221679688, 0.230560302734375, 0.23918533325195312, 0.24781036376953125, 0.2564353942871094, 0.2650604248046875, 0.2736854553222656, 0.28231048583984375, 0.2909355163574219, 0.299560546875]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 14.0, 15.0, 20.0, 43.0, 80.0, 126.0, 137.0, 139.0, 128.0, 123.0, 67.0, 41.0, 25.0, 16.0, 6.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.451085090637207, -5.325107097625732, -5.199129581451416, -5.073151588439941, -4.947173595428467, -4.821195602416992, -4.695218086242676, -4.569240093231201, -4.443262100219727, -4.317284107208252, -4.1913065910339355, -4.065328598022461, -3.9393506050109863, -3.813372850418091, -3.6873950958251953, -3.5614171028137207, -3.435439348220825, -3.3094615936279297, -3.183483600616455, -3.0575058460235596, -2.931527853012085, -2.8055500984191895, -2.679572105407715, -2.5535943508148193, -2.427616596221924, -2.3016388416290283, -2.1756608486175537, -2.049683094024658, -1.9237051010131836, -1.797727346420288, -1.671749472618103, -1.545771598815918, -1.4197933673858643, -1.2938154935836792, -1.1678376197814941, -1.0418598651885986, -0.9158819317817688, -0.7899040579795837, -0.6639262437820435, -0.5379483699798584, -0.41197049617767334, -0.2859926223754883, -0.1600147783756256, -0.03403693437576294, 0.09194093942642212, 0.21791881322860718, 0.34389662742614746, 0.4698745012283325, 0.5958523750305176, 0.7218302488327026, 0.8478081226348877, 0.973785936832428, 1.0997638702392578, 1.2257416248321533, 1.3517194986343384, 1.4776973724365234, 1.6036752462387085, 1.7296531200408936, 1.8556309938430786, 1.9816088676452637, 2.107586622238159, 2.233564615249634, 2.3595423698425293, 2.485520362854004, 2.6114981174468994]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 5.0, 2.0, 5.0, 10.0, 11.0, 10.0, 15.0, 22.0, 19.0, 15.0, 22.0, 33.0, 40.0, 41.0, 42.0, 46.0, 40.0, 37.0, 56.0, 40.0, 45.0, 42.0, 51.0, 40.0, 43.0, 40.0, 23.0, 28.0, 19.0, 25.0, 23.0, 21.0, 10.0, 13.0, 17.0, 9.0, 8.0, 6.0, 5.0, 8.0, 6.0, 3.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6530956029891968, -1.6028468608856201, -1.552598237991333, -1.5023494958877563, -1.4521007537841797, -1.401852011680603, -1.3516032695770264, -1.3013546466827393, -1.2511059045791626, -1.200857162475586, -1.1506085395812988, -1.1003597974777222, -1.0501110553741455, -0.9998623132705688, -0.949613630771637, -0.8993649482727051, -0.8491162061691284, -0.7988674640655518, -0.7486187815666199, -0.698370099067688, -0.6481213569641113, -0.5978726148605347, -0.5476239323616028, -0.4973752200603485, -0.44712650775909424, -0.39687779545783997, -0.3466290831565857, -0.2963803708553314, -0.24613165855407715, -0.19588294625282288, -0.1456342339515686, -0.09538552165031433, -0.04513680934906006, 0.005111902952194214, 0.055360615253448486, 0.10560932755470276, 0.15585803985595703, 0.2061067521572113, 0.2563554644584656, 0.30660417675971985, 0.3568528890609741, 0.4071016013622284, 0.45735031366348267, 0.5075989961624146, 0.5578477382659912, 0.6080964803695679, 0.6583451628684998, 0.7085938453674316, 0.7588425874710083, 0.809091329574585, 0.8593400120735168, 0.9095886945724487, 0.9598374366760254, 1.010086178779602, 1.0603349208831787, 1.1105835437774658, 1.1608322858810425, 1.2110810279846191, 1.2613296508789062, 1.311578392982483, 1.3618271350860596, 1.4120758771896362, 1.462324619293213, 1.5125732421875, 1.5628219842910767]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 4.0, 4.0, 6.0, 3.0, 9.0, 5.0, 7.0, 11.0, 9.0, 18.0, 17.0, 34.0, 32.0, 35.0, 43.0, 72.0, 89.0, 105.0, 195.0, 324.0, 639.0, 1497.0, 5714.0, 41968.0, 3572025.0, 540538.0, 24178.0, 4333.0, 1240.0, 532.0, 247.0, 140.0, 80.0, 49.0, 29.0, 20.0, 15.0, 5.0, 7.0, 9.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.71484375, -2.6500091552734375, -2.585174560546875, -2.5203399658203125, -2.45550537109375, -2.3906707763671875, -2.325836181640625, -2.2610015869140625, -2.1961669921875, -2.1313323974609375, -2.066497802734375, -2.0016632080078125, -1.93682861328125, -1.8719940185546875, -1.807159423828125, -1.7423248291015625, -1.677490234375, -1.6126556396484375, -1.547821044921875, -1.4829864501953125, -1.41815185546875, -1.3533172607421875, -1.288482666015625, -1.2236480712890625, -1.1588134765625, -1.0939788818359375, -1.029144287109375, -0.9643096923828125, -0.89947509765625, -0.8346405029296875, -0.769805908203125, -0.7049713134765625, -0.64013671875, -0.5753021240234375, -0.510467529296875, -0.4456329345703125, -0.38079833984375, -0.3159637451171875, -0.251129150390625, -0.1862945556640625, -0.1214599609375, -0.0566253662109375, 0.008209228515625, 0.0730438232421875, 0.13787841796875, 0.2027130126953125, 0.267547607421875, 0.3323822021484375, 0.397216796875, 0.4620513916015625, 0.526885986328125, 0.5917205810546875, 0.65655517578125, 0.7213897705078125, 0.786224365234375, 0.8510589599609375, 0.9158935546875, 0.9807281494140625, 1.045562744140625, 1.1103973388671875, 1.17523193359375, 1.2400665283203125, 1.304901123046875, 1.3697357177734375, 1.4345703125]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 11.0, 23.0, 43.0, 58.0, 87.0, 115.0, 145.0, 144.0, 123.0, 90.0, 67.0, 41.0, 26.0, 17.0, 8.0, 9.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.240966796875, -0.23606109619140625, -0.2311553955078125, -0.22624969482421875, -0.221343994140625, -0.21643829345703125, -0.2115325927734375, -0.20662689208984375, -0.20172119140625, -0.19681549072265625, -0.1919097900390625, -0.18700408935546875, -0.182098388671875, -0.17719268798828125, -0.1722869873046875, -0.16738128662109375, -0.1624755859375, -0.15756988525390625, -0.1526641845703125, -0.14775848388671875, -0.142852783203125, -0.13794708251953125, -0.1330413818359375, -0.12813568115234375, -0.12322998046875, -0.11832427978515625, -0.1134185791015625, -0.10851287841796875, -0.103607177734375, -0.09870147705078125, -0.0937957763671875, -0.08889007568359375, -0.083984375, -0.07907867431640625, -0.0741729736328125, -0.06926727294921875, -0.064361572265625, -0.05945587158203125, -0.0545501708984375, -0.04964447021484375, -0.04473876953125, -0.03983306884765625, -0.0349273681640625, -0.03002166748046875, -0.025115966796875, -0.02021026611328125, -0.0153045654296875, -0.01039886474609375, -0.0054931640625, -0.00058746337890625, 0.0043182373046875, 0.00922393798828125, 0.014129638671875, 0.01903533935546875, 0.0239410400390625, 0.02884674072265625, 0.03375244140625, 0.03865814208984375, 0.0435638427734375, 0.04846954345703125, 0.053375244140625, 0.05828094482421875, 0.0631866455078125, 0.06809234619140625, 0.072998046875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 6.0, 11.0, 27.0, 44.0, 97.0, 120.0, 255.0, 851.0, 23273.0, 4155876.0, 12668.0, 659.0, 196.0, 102.0, 51.0, 22.0, 15.0, 7.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.85546875, -4.705810546875, -4.55615234375, -4.406494140625, -4.2568359375, -4.107177734375, -3.95751953125, -3.807861328125, -3.658203125, -3.508544921875, -3.35888671875, -3.209228515625, -3.0595703125, -2.909912109375, -2.76025390625, -2.610595703125, -2.4609375, -2.311279296875, -2.16162109375, -2.011962890625, -1.8623046875, -1.712646484375, -1.56298828125, -1.413330078125, -1.263671875, -1.114013671875, -0.96435546875, -0.814697265625, -0.6650390625, -0.515380859375, -0.36572265625, -0.216064453125, -0.06640625, 0.083251953125, 0.23291015625, 0.382568359375, 0.5322265625, 0.681884765625, 0.83154296875, 0.981201171875, 1.130859375, 1.280517578125, 1.43017578125, 1.579833984375, 1.7294921875, 1.879150390625, 2.02880859375, 2.178466796875, 2.328125, 2.477783203125, 2.62744140625, 2.777099609375, 2.9267578125, 3.076416015625, 3.22607421875, 3.375732421875, 3.525390625, 3.675048828125, 3.82470703125, 3.974365234375, 4.1240234375, 4.273681640625, 4.42333984375, 4.572998046875, 4.72265625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 7.0, 8.0, 18.0, 36.0, 135.0, 434.0, 2560.0, 640.0, 147.0, 67.0, 27.0, 5.0, 2.0, 1.0, 4.0], "bins": [-1.0185546875, -1.0002384185791016, -0.9819221496582031, -0.9636058807373047, -0.9452896118164062, -0.9269733428955078, -0.9086570739746094, -0.8903408050537109, -0.8720245361328125, -0.8537082672119141, -0.8353919982910156, -0.8170757293701172, -0.7987594604492188, -0.7804431915283203, -0.7621269226074219, -0.7438106536865234, -0.725494384765625, -0.7071781158447266, -0.6888618469238281, -0.6705455780029297, -0.6522293090820312, -0.6339130401611328, -0.6155967712402344, -0.5972805023193359, -0.5789642333984375, -0.5606479644775391, -0.5423316955566406, -0.5240154266357422, -0.5056991577148438, -0.4873828887939453, -0.4690666198730469, -0.45075035095214844, -0.43243408203125, -0.41411781311035156, -0.3958015441894531, -0.3774852752685547, -0.35916900634765625, -0.3408527374267578, -0.3225364685058594, -0.30422019958496094, -0.2859039306640625, -0.26758766174316406, -0.24927139282226562, -0.2309551239013672, -0.21263885498046875, -0.1943225860595703, -0.17600631713867188, -0.15769004821777344, -0.139373779296875, -0.12105751037597656, -0.10274124145507812, -0.08442497253417969, -0.06610870361328125, -0.04779243469238281, -0.029476165771484375, -0.011159896850585938, 0.0071563720703125, 0.025472640991210938, 0.043788909912109375, 0.06210517883300781, 0.08042144775390625, 0.09873771667480469, 0.11705398559570312, 0.13537025451660156, 0.1536865234375]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 4.0, 7.0, 8.0, 9.0, 35.0, 64.0, 108.0, 189.0, 215.0, 159.0, 82.0, 56.0, 22.0, 16.0, 10.0, 3.0, 4.0, 4.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.8358864784240723, -2.7601702213287354, -2.6844539642333984, -2.6087377071380615, -2.5330214500427246, -2.457305431365967, -2.381588935852051, -2.305872917175293, -2.230156660079956, -2.154440402984619, -2.0787241458892822, -2.0030078887939453, -1.927291750907898, -1.851575493812561, -1.7758592367172241, -1.7001430988311768, -1.6244267225265503, -1.5487104654312134, -1.4729942083358765, -1.397278070449829, -1.3215618133544922, -1.2458455562591553, -1.1701292991638184, -1.0944130420684814, -1.0186967849731445, -0.9429805278778076, -0.8672643303871155, -0.7915480732917786, -0.7158318758010864, -0.6401156187057495, -0.5643993616104126, -0.48868316411972046, -0.4129669666290283, -0.3372507393360138, -0.26153451204299927, -0.18581825494766235, -0.11010202765464783, -0.0343858003616333, 0.04133045673370361, 0.11704665422439575, 0.19276291131973267, 0.2684791386127472, 0.3441953659057617, 0.41991162300109863, 0.49562785029411316, 0.5713440775871277, 0.6470603346824646, 0.7227765321731567, 0.7984927892684937, 0.8742090463638306, 0.9499252438545227, 1.0256414413452148, 1.1013576984405518, 1.1770739555358887, 1.2527902126312256, 1.3285064697265625, 1.4042227268218994, 1.4799389839172363, 1.5556552410125732, 1.6313714981079102, 1.7070876359939575, 1.7828038930892944, 1.8585201501846313, 1.9342362880706787, 2.0099525451660156]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 11.0, 9.0, 3.0, 11.0, 21.0, 18.0, 44.0, 46.0, 54.0, 58.0, 74.0, 83.0, 73.0, 73.0, 87.0, 68.0, 67.0, 49.0, 38.0, 31.0, 25.0, 18.0, 20.0, 10.0, 3.0, 6.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.430869221687317, -1.3889741897583008, -1.3470792770385742, -1.305184245109558, -1.263289213180542, -1.2213941812515259, -1.1794991493225098, -1.1376042366027832, -1.095709204673767, -1.053814172744751, -1.0119192600250244, -0.9700242280960083, -0.9281291961669922, -0.8862341642379761, -0.8443391919136047, -0.8024442195892334, -0.7605491876602173, -0.7186541557312012, -0.6767591834068298, -0.6348642110824585, -0.5929691791534424, -0.5510741472244263, -0.5091791749000549, -0.4672841727733612, -0.4253891706466675, -0.38349416851997375, -0.34159916639328003, -0.2997041642665863, -0.2578091621398926, -0.21591416001319885, -0.17401915788650513, -0.1321241557598114, -0.09022927284240723, -0.0483342707157135, -0.006439268589019775, 0.03545573353767395, 0.07735073566436768, 0.1192457377910614, 0.16114073991775513, 0.20303574204444885, 0.24493074417114258, 0.2868257462978363, 0.32872074842453003, 0.37061575055122375, 0.4125107526779175, 0.4544057548046112, 0.49630075693130493, 0.5381957292556763, 0.5800907611846924, 0.6219857931137085, 0.6638807654380798, 0.7057757377624512, 0.7476707696914673, 0.7895658016204834, 0.8314607739448547, 0.8733557462692261, 0.9152507781982422, 0.9571458101272583, 0.9990407824516296, 1.040935754776001, 1.082830786705017, 1.1247258186340332, 1.1666207313537598, 1.2085157632827759, 1.250410795211792]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 2.0, 10.0, 5.0, 8.0, 16.0, 16.0, 25.0, 29.0, 61.0, 92.0, 150.0, 243.0, 459.0, 1071.0, 3452.0, 19132.0, 262649.0, 692292.0, 58431.0, 7126.0, 1778.0, 715.0, 329.0, 179.0, 95.0, 58.0, 37.0, 31.0, 22.0, 11.0, 10.0, 10.0, 4.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.9306640625, -1.878814697265625, -1.82696533203125, -1.775115966796875, -1.7232666015625, -1.671417236328125, -1.61956787109375, -1.567718505859375, -1.515869140625, -1.464019775390625, -1.41217041015625, -1.360321044921875, -1.3084716796875, -1.256622314453125, -1.20477294921875, -1.152923583984375, -1.10107421875, -1.049224853515625, -0.99737548828125, -0.945526123046875, -0.8936767578125, -0.841827392578125, -0.78997802734375, -0.738128662109375, -0.686279296875, -0.634429931640625, -0.58258056640625, -0.530731201171875, -0.4788818359375, -0.427032470703125, -0.37518310546875, -0.323333740234375, -0.271484375, -0.219635009765625, -0.16778564453125, -0.115936279296875, -0.0640869140625, -0.012237548828125, 0.03961181640625, 0.091461181640625, 0.143310546875, 0.195159912109375, 0.24700927734375, 0.298858642578125, 0.3507080078125, 0.402557373046875, 0.45440673828125, 0.506256103515625, 0.55810546875, 0.609954833984375, 0.66180419921875, 0.713653564453125, 0.7655029296875, 0.817352294921875, 0.86920166015625, 0.921051025390625, 0.972900390625, 1.024749755859375, 1.07659912109375, 1.128448486328125, 1.1802978515625, 1.232147216796875, 1.28399658203125, 1.335845947265625, 1.3876953125]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 20.0, 19.0, 34.0, 64.0, 66.0, 102.0, 111.0, 125.0, 111.0, 109.0, 70.0, 49.0, 44.0, 27.0, 17.0, 10.0, 9.0, 6.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.2457275390625, -0.2404613494873047, -0.23519515991210938, -0.22992897033691406, -0.22466278076171875, -0.21939659118652344, -0.21413040161132812, -0.2088642120361328, -0.2035980224609375, -0.1983318328857422, -0.19306564331054688, -0.18779945373535156, -0.18253326416015625, -0.17726707458496094, -0.17200088500976562, -0.1667346954345703, -0.161468505859375, -0.1562023162841797, -0.15093612670898438, -0.14566993713378906, -0.14040374755859375, -0.13513755798339844, -0.12987136840820312, -0.12460517883300781, -0.1193389892578125, -0.11407279968261719, -0.10880661010742188, -0.10354042053222656, -0.09827423095703125, -0.09300804138183594, -0.08774185180664062, -0.08247566223144531, -0.07720947265625, -0.07194328308105469, -0.06667709350585938, -0.06141090393066406, -0.05614471435546875, -0.05087852478027344, -0.045612335205078125, -0.04034614562988281, -0.0350799560546875, -0.029813766479492188, -0.024547576904296875, -0.019281387329101562, -0.01401519775390625, -0.008749008178710938, -0.003482818603515625, 0.0017833709716796875, 0.007049560546875, 0.012315750122070312, 0.017581939697265625, 0.022848129272460938, 0.02811431884765625, 0.03338050842285156, 0.038646697998046875, 0.04391288757324219, 0.0491790771484375, 0.05444526672363281, 0.059711456298828125, 0.06497764587402344, 0.07024383544921875, 0.07551002502441406, 0.08077621459960938, 0.08604240417480469, 0.09130859375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 8.0, 3.0, 3.0, 3.0, 2.0, 6.0, 6.0, 10.0, 16.0, 21.0, 37.0, 45.0, 59.0, 87.0, 185.0, 310.0, 554.0, 1219.0, 2695.0, 7270.0, 33650.0, 334587.0, 584753.0, 65189.0, 11146.0, 3501.0, 1508.0, 734.0, 391.0, 189.0, 142.0, 57.0, 46.0, 33.0, 22.0, 17.0, 12.0, 7.0, 7.0, 6.0, 4.0, 7.0, 1.0, 6.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0810546875, -1.0452117919921875, -1.009368896484375, -0.9735260009765625, -0.93768310546875, -0.9018402099609375, -0.865997314453125, -0.8301544189453125, -0.7943115234375, -0.7584686279296875, -0.722625732421875, -0.6867828369140625, -0.65093994140625, -0.6150970458984375, -0.579254150390625, -0.5434112548828125, -0.507568359375, -0.4717254638671875, -0.435882568359375, -0.4000396728515625, -0.36419677734375, -0.3283538818359375, -0.292510986328125, -0.2566680908203125, -0.2208251953125, -0.1849822998046875, -0.149139404296875, -0.1132965087890625, -0.07745361328125, -0.0416107177734375, -0.005767822265625, 0.0300750732421875, 0.06591796875, 0.1017608642578125, 0.137603759765625, 0.1734466552734375, 0.20928955078125, 0.2451324462890625, 0.280975341796875, 0.3168182373046875, 0.3526611328125, 0.3885040283203125, 0.424346923828125, 0.4601898193359375, 0.49603271484375, 0.5318756103515625, 0.567718505859375, 0.6035614013671875, 0.639404296875, 0.6752471923828125, 0.711090087890625, 0.7469329833984375, 0.78277587890625, 0.8186187744140625, 0.854461669921875, 0.8903045654296875, 0.9261474609375, 0.9619903564453125, 0.997833251953125, 1.0336761474609375, 1.06951904296875, 1.1053619384765625, 1.141204833984375, 1.1770477294921875, 1.212890625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 9.0, 4.0, 9.0, 12.0, 16.0, 25.0, 33.0, 47.0, 38.0, 52.0, 60.0, 71.0, 64.0, 87.0, 71.0, 72.0, 48.0, 73.0, 49.0, 40.0, 34.0, 16.0, 18.0, 17.0, 11.0, 8.0, 9.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.63818359375, -0.62091064453125, -0.6036376953125, -0.58636474609375, -0.569091796875, -0.55181884765625, -0.5345458984375, -0.51727294921875, -0.5, -0.48272705078125, -0.4654541015625, -0.44818115234375, -0.430908203125, -0.41363525390625, -0.3963623046875, -0.37908935546875, -0.36181640625, -0.34454345703125, -0.3272705078125, -0.30999755859375, -0.292724609375, -0.27545166015625, -0.2581787109375, -0.24090576171875, -0.2236328125, -0.20635986328125, -0.1890869140625, -0.17181396484375, -0.154541015625, -0.13726806640625, -0.1199951171875, -0.10272216796875, -0.08544921875, -0.06817626953125, -0.0509033203125, -0.03363037109375, -0.016357421875, 0.00091552734375, 0.0181884765625, 0.03546142578125, 0.052734375, 0.07000732421875, 0.0872802734375, 0.10455322265625, 0.121826171875, 0.13909912109375, 0.1563720703125, 0.17364501953125, 0.19091796875, 0.20819091796875, 0.2254638671875, 0.24273681640625, 0.260009765625, 0.27728271484375, 0.2945556640625, 0.31182861328125, 0.3291015625, 0.34637451171875, 0.3636474609375, 0.38092041015625, 0.398193359375, 0.41546630859375, 0.4327392578125, 0.45001220703125, 0.46728515625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 8.0, 11.0, 27.0, 31.0, 56.0, 91.0, 190.0, 454.0, 1105.0, 3388.0, 17654.0, 260446.0, 702641.0, 52721.0, 6663.0, 1791.0, 670.0, 279.0, 149.0, 73.0, 37.0, 31.0, 20.0, 3.0, 3.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.818359375, -0.7963790893554688, -0.7743988037109375, -0.7524185180664062, -0.730438232421875, -0.7084579467773438, -0.6864776611328125, -0.6644973754882812, -0.64251708984375, -0.6205368041992188, -0.5985565185546875, -0.5765762329101562, -0.554595947265625, -0.5326156616210938, -0.5106353759765625, -0.48865509033203125, -0.4666748046875, -0.44469451904296875, -0.4227142333984375, -0.40073394775390625, -0.378753662109375, -0.35677337646484375, -0.3347930908203125, -0.31281280517578125, -0.29083251953125, -0.26885223388671875, -0.2468719482421875, -0.22489166259765625, -0.202911376953125, -0.18093109130859375, -0.1589508056640625, -0.13697052001953125, -0.114990234375, -0.09300994873046875, -0.0710296630859375, -0.04904937744140625, -0.027069091796875, -0.00508880615234375, 0.0168914794921875, 0.03887176513671875, 0.06085205078125, 0.08283233642578125, 0.1048126220703125, 0.12679290771484375, 0.148773193359375, 0.17075347900390625, 0.1927337646484375, 0.21471405029296875, 0.2366943359375, 0.25867462158203125, 0.2806549072265625, 0.30263519287109375, 0.324615478515625, 0.34659576416015625, 0.3685760498046875, 0.39055633544921875, 0.41253662109375, 0.43451690673828125, 0.4564971923828125, 0.47847747802734375, 0.500457763671875, 0.5224380493164062, 0.5444183349609375, 0.5663986206054688, 0.58837890625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 5.0, 8.0, 7.0, 6.0, 15.0, 14.0, 20.0, 20.0, 30.0, 48.0, 60.0, 67.0, 87.0, 96.0, 106.0, 96.0, 67.0, 68.0, 43.0, 30.0, 25.0, 21.0, 25.0, 7.0, 8.0, 10.0, 7.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00011032819747924805, -0.00010719709098339081, -0.00010406598448753357, -0.00010093487799167633, -9.780377149581909e-05, -9.467266499996185e-05, -9.154155850410461e-05, -8.841045200824738e-05, -8.527934551239014e-05, -8.21482390165329e-05, -7.901713252067566e-05, -7.588602602481842e-05, -7.275491952896118e-05, -6.962381303310394e-05, -6.64927065372467e-05, -6.336160004138947e-05, -6.0230493545532227e-05, -5.709938704967499e-05, -5.396828055381775e-05, -5.083717405796051e-05, -4.770606756210327e-05, -4.457496106624603e-05, -4.1443854570388794e-05, -3.8312748074531555e-05, -3.5181641578674316e-05, -3.205053508281708e-05, -2.891942858695984e-05, -2.57883220911026e-05, -2.265721559524536e-05, -1.9526109099388123e-05, -1.6395002603530884e-05, -1.3263896107673645e-05, -1.0132789611816406e-05, -7.0016831159591675e-06, -3.870576620101929e-06, -7.394701242446899e-07, 2.391636371612549e-06, 5.522742867469788e-06, 8.653849363327026e-06, 1.1784955859184265e-05, 1.4916062355041504e-05, 1.8047168850898743e-05, 2.117827534675598e-05, 2.430938184261322e-05, 2.744048833847046e-05, 3.05715948343277e-05, 3.3702701330184937e-05, 3.6833807826042175e-05, 3.9964914321899414e-05, 4.309602081775665e-05, 4.622712731361389e-05, 4.935823380947113e-05, 5.248934030532837e-05, 5.562044680118561e-05, 5.875155329704285e-05, 6.188265979290009e-05, 6.501376628875732e-05, 6.814487278461456e-05, 7.12759792804718e-05, 7.440708577632904e-05, 7.753819227218628e-05, 8.066929876804352e-05, 8.380040526390076e-05, 8.6931511759758e-05, 9.006261825561523e-05]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 4.0, 13.0, 19.0, 26.0, 49.0, 100.0, 176.0, 460.0, 1316.0, 5657.0, 71942.0, 896783.0, 64539.0, 5358.0, 1273.0, 427.0, 186.0, 85.0, 60.0, 26.0, 16.0, 20.0, 10.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69287109375, -0.6633682250976562, -0.6338653564453125, -0.6043624877929688, -0.574859619140625, -0.5453567504882812, -0.5158538818359375, -0.48635101318359375, -0.45684814453125, -0.42734527587890625, -0.3978424072265625, -0.36833953857421875, -0.338836669921875, -0.30933380126953125, -0.2798309326171875, -0.25032806396484375, -0.2208251953125, -0.19132232666015625, -0.1618194580078125, -0.13231658935546875, -0.102813720703125, -0.07331085205078125, -0.0438079833984375, -0.01430511474609375, 0.01519775390625, 0.04470062255859375, 0.0742034912109375, 0.10370635986328125, 0.133209228515625, 0.16271209716796875, 0.1922149658203125, 0.22171783447265625, 0.251220703125, 0.28072357177734375, 0.3102264404296875, 0.33972930908203125, 0.369232177734375, 0.39873504638671875, 0.4282379150390625, 0.45774078369140625, 0.48724365234375, 0.5167465209960938, 0.5462493896484375, 0.5757522583007812, 0.605255126953125, 0.6347579956054688, 0.6642608642578125, 0.6937637329101562, 0.7232666015625, 0.7527694702148438, 0.7822723388671875, 0.8117752075195312, 0.841278076171875, 0.8707809448242188, 0.9002838134765625, 0.9297866821289062, 0.95928955078125, 0.9887924194335938, 1.0182952880859375, 1.0477981567382812, 1.077301025390625, 1.1068038940429688, 1.1363067626953125, 1.1658096313476562, 1.1953125]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 7.0, 17.0, 31.0, 32.0, 36.0, 77.0, 108.0, 114.0, 134.0, 120.0, 97.0, 77.0, 50.0, 23.0, 24.0, 21.0, 9.0, 6.0, 7.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29296875, -0.28058624267578125, -0.2682037353515625, -0.25582122802734375, -0.243438720703125, -0.23105621337890625, -0.2186737060546875, -0.20629119873046875, -0.19390869140625, -0.18152618408203125, -0.1691436767578125, -0.15676116943359375, -0.144378662109375, -0.13199615478515625, -0.1196136474609375, -0.10723114013671875, -0.0948486328125, -0.08246612548828125, -0.0700836181640625, -0.05770111083984375, -0.045318603515625, -0.03293609619140625, -0.0205535888671875, -0.00817108154296875, 0.00421142578125, 0.01659393310546875, 0.0289764404296875, 0.04135894775390625, 0.053741455078125, 0.06612396240234375, 0.0785064697265625, 0.09088897705078125, 0.103271484375, 0.11565399169921875, 0.1280364990234375, 0.14041900634765625, 0.152801513671875, 0.16518402099609375, 0.1775665283203125, 0.18994903564453125, 0.20233154296875, 0.21471405029296875, 0.2270965576171875, 0.23947906494140625, 0.251861572265625, 0.26424407958984375, 0.2766265869140625, 0.28900909423828125, 0.3013916015625, 0.31377410888671875, 0.3261566162109375, 0.33853912353515625, 0.350921630859375, 0.36330413818359375, 0.3756866455078125, 0.38806915283203125, 0.40045166015625, 0.41283416748046875, 0.4252166748046875, 0.43759918212890625, 0.449981689453125, 0.46236419677734375, 0.4747467041015625, 0.48712921142578125, 0.49951171875]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 10.0, 18.0, 47.0, 64.0, 118.0, 173.0, 214.0, 174.0, 87.0, 49.0, 22.0, 9.0, 2.0, 3.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.256625175476074, -6.062235355377197, -5.86784553527832, -5.673455238342285, -5.479065418243408, -5.284675598144531, -5.090285778045654, -4.895895957946777, -4.701505661010742, -4.507115840911865, -4.312726020812988, -4.118335723876953, -3.923945903778076, -3.729556083679199, -3.5351662635803223, -3.3407764434814453, -3.1463866233825684, -2.9519968032836914, -2.7576067447662354, -2.5632169246673584, -2.3688268661499023, -2.1744370460510254, -1.9800472259521484, -1.785657286643982, -1.5912673473358154, -1.396877408027649, -1.2024874687194824, -1.0080976486206055, -0.813707709312439, -0.6193177700042725, -0.4249279499053955, -0.230538010597229, -0.0361475944519043, 0.15824231505393982, 0.35263222455978394, 0.5470221042633057, 0.7414120435714722, 0.9358019828796387, 1.1301918029785156, 1.3245817422866821, 1.5189716815948486, 1.7133616209030151, 1.9077515602111816, 2.1021413803100586, 2.2965312004089355, 2.4909212589263916, 2.6853110790252686, 2.8797011375427246, 3.0740909576416016, 3.2684807777404785, 3.4628708362579346, 3.6572606563568115, 3.8516507148742676, 4.0460405349731445, 4.2404303550720215, 4.434820175170898, 4.629210472106934, 4.8236002922058105, 5.0179901123046875, 5.212380409240723, 5.4067702293396, 5.601160049438477, 5.7955498695373535, 5.9899396896362305, 6.184329509735107]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 6.0, 7.0, 3.0, 9.0, 12.0, 17.0, 10.0, 18.0, 30.0, 41.0, 39.0, 55.0, 57.0, 42.0, 75.0, 72.0, 72.0, 74.0, 69.0, 54.0, 38.0, 38.0, 42.0, 33.0, 26.0, 19.0, 12.0, 9.0, 11.0, 6.0, 5.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.0189208984375, -3.9207749366760254, -3.82262921333313, -3.7244832515716553, -3.6263375282287598, -3.528191566467285, -3.4300458431243896, -3.331899881362915, -3.2337541580200195, -3.135608196258545, -3.0374624729156494, -2.939316511154175, -2.8411707878112793, -2.7430248260498047, -2.644879102706909, -2.5467331409454346, -2.448587417602539, -2.3504414558410645, -2.252295732498169, -2.1541497707366943, -2.056004047393799, -1.9578582048416138, -1.8597123622894287, -1.761566400527954, -1.6634204387664795, -1.5652745962142944, -1.4671287536621094, -1.3689829111099243, -1.2708370685577393, -1.1726912260055542, -1.0745453834533691, -0.9763994812965393, -0.878253698348999, -0.780107855796814, -0.6819620132446289, -0.5838161706924438, -0.4856702983379364, -0.38752445578575134, -0.2893785834312439, -0.19123274087905884, -0.09308689832687378, 0.005058951675891876, 0.10320480167865753, 0.20135065913200378, 0.29949650168418884, 0.3976423442363739, 0.49578821659088135, 0.5939340591430664, 0.6920799016952515, 0.7902257442474365, 0.8883715867996216, 0.9865174293518066, 1.0846632719039917, 1.1828091144561768, 1.2809550762176514, 1.3791007995605469, 1.4772467613220215, 1.5753926038742065, 1.6735384464263916, 1.7716842889785767, 1.8698301315307617, 1.9679759740829468, 2.066121816635132, 2.1642677783966064, 2.262413501739502]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 3.0, 3.0, 8.0, 6.0, 3.0, 13.0, 11.0, 26.0, 33.0, 40.0, 45.0, 75.0, 111.0, 123.0, 204.0, 292.0, 456.0, 1123.0, 3066.0, 15026.0, 175363.0, 3830082.0, 149012.0, 14101.0, 3065.0, 1079.0, 449.0, 201.0, 95.0, 73.0, 38.0, 18.0, 19.0, 10.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.59765625, -2.5318450927734375, -2.466033935546875, -2.4002227783203125, -2.33441162109375, -2.2686004638671875, -2.202789306640625, -2.1369781494140625, -2.0711669921875, -2.0053558349609375, -1.939544677734375, -1.8737335205078125, -1.80792236328125, -1.7421112060546875, -1.676300048828125, -1.6104888916015625, -1.544677734375, -1.4788665771484375, -1.413055419921875, -1.3472442626953125, -1.28143310546875, -1.2156219482421875, -1.149810791015625, -1.0839996337890625, -1.0181884765625, -0.9523773193359375, -0.886566162109375, -0.8207550048828125, -0.75494384765625, -0.6891326904296875, -0.623321533203125, -0.5575103759765625, -0.49169921875, -0.4258880615234375, -0.360076904296875, -0.2942657470703125, -0.22845458984375, -0.1626434326171875, -0.096832275390625, -0.0310211181640625, 0.0347900390625, 0.1006011962890625, 0.166412353515625, 0.2322235107421875, 0.29803466796875, 0.3638458251953125, 0.429656982421875, 0.4954681396484375, 0.561279296875, 0.6270904541015625, 0.692901611328125, 0.7587127685546875, 0.82452392578125, 0.8903350830078125, 0.956146240234375, 1.0219573974609375, 1.0877685546875, 1.1535797119140625, 1.219390869140625, 1.2852020263671875, 1.35101318359375, 1.4168243408203125, 1.482635498046875, 1.5484466552734375, 1.6142578125]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 15.0, 17.0, 25.0, 49.0, 61.0, 78.0, 99.0, 116.0, 123.0, 103.0, 82.0, 71.0, 56.0, 33.0, 24.0, 15.0, 8.0, 9.0, 7.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.237060546875, -0.23192787170410156, -0.22679519653320312, -0.2216625213623047, -0.21652984619140625, -0.2113971710205078, -0.20626449584960938, -0.20113182067871094, -0.1959991455078125, -0.19086647033691406, -0.18573379516601562, -0.1806011199951172, -0.17546844482421875, -0.1703357696533203, -0.16520309448242188, -0.16007041931152344, -0.154937744140625, -0.14980506896972656, -0.14467239379882812, -0.1395397186279297, -0.13440704345703125, -0.1292743682861328, -0.12414169311523438, -0.11900901794433594, -0.1138763427734375, -0.10874366760253906, -0.10361099243164062, -0.09847831726074219, -0.09334564208984375, -0.08821296691894531, -0.08308029174804688, -0.07794761657714844, -0.07281494140625, -0.06768226623535156, -0.06254959106445312, -0.05741691589355469, -0.05228424072265625, -0.04715156555175781, -0.042018890380859375, -0.03688621520996094, -0.0317535400390625, -0.026620864868164062, -0.021488189697265625, -0.016355514526367188, -0.01122283935546875, -0.0060901641845703125, -0.000957489013671875, 0.0041751861572265625, 0.009307861328125, 0.014440536499023438, 0.019573211669921875, 0.024705886840820312, 0.02983856201171875, 0.03497123718261719, 0.040103912353515625, 0.04523658752441406, 0.0503692626953125, 0.05550193786621094, 0.060634613037109375, 0.06576728820800781, 0.07089996337890625, 0.07603263854980469, 0.08116531372070312, 0.08629798889160156, 0.0914306640625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 8.0, 3.0, 11.0, 18.0, 16.0, 25.0, 37.0, 40.0, 57.0, 116.0, 225.0, 652.0, 2064.0, 9118.0, 409306.0, 3754673.0, 13925.0, 2632.0, 716.0, 275.0, 127.0, 72.0, 54.0, 31.0, 31.0, 14.0, 16.0, 6.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.28515625, -4.137939453125, -3.99072265625, -3.843505859375, -3.6962890625, -3.549072265625, -3.40185546875, -3.254638671875, -3.107421875, -2.960205078125, -2.81298828125, -2.665771484375, -2.5185546875, -2.371337890625, -2.22412109375, -2.076904296875, -1.9296875, -1.782470703125, -1.63525390625, -1.488037109375, -1.3408203125, -1.193603515625, -1.04638671875, -0.899169921875, -0.751953125, -0.604736328125, -0.45751953125, -0.310302734375, -0.1630859375, -0.015869140625, 0.13134765625, 0.278564453125, 0.42578125, 0.572998046875, 0.72021484375, 0.867431640625, 1.0146484375, 1.161865234375, 1.30908203125, 1.456298828125, 1.603515625, 1.750732421875, 1.89794921875, 2.045166015625, 2.1923828125, 2.339599609375, 2.48681640625, 2.634033203125, 2.78125, 2.928466796875, 3.07568359375, 3.222900390625, 3.3701171875, 3.517333984375, 3.66455078125, 3.811767578125, 3.958984375, 4.106201171875, 4.25341796875, 4.400634765625, 4.5478515625, 4.695068359375, 4.84228515625, 4.989501953125, 5.13671875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 10.0, 16.0, 23.0, 35.0, 98.0, 247.0, 761.0, 1922.0, 576.0, 197.0, 81.0, 32.0, 29.0, 7.0, 9.0, 9.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0], "bins": [-1.0048828125, -0.9827499389648438, -0.9606170654296875, -0.9384841918945312, -0.916351318359375, -0.8942184448242188, -0.8720855712890625, -0.8499526977539062, -0.82781982421875, -0.8056869506835938, -0.7835540771484375, -0.7614212036132812, -0.739288330078125, -0.7171554565429688, -0.6950225830078125, -0.6728897094726562, -0.6507568359375, -0.6286239624023438, -0.6064910888671875, -0.5843582153320312, -0.562225341796875, -0.5400924682617188, -0.5179595947265625, -0.49582672119140625, -0.47369384765625, -0.45156097412109375, -0.4294281005859375, -0.40729522705078125, -0.385162353515625, -0.36302947998046875, -0.3408966064453125, -0.31876373291015625, -0.296630859375, -0.27449798583984375, -0.2523651123046875, -0.23023223876953125, -0.208099365234375, -0.18596649169921875, -0.1638336181640625, -0.14170074462890625, -0.11956787109375, -0.09743499755859375, -0.0753021240234375, -0.05316925048828125, -0.031036376953125, -0.00890350341796875, 0.0132293701171875, 0.03536224365234375, 0.0574951171875, 0.07962799072265625, 0.1017608642578125, 0.12389373779296875, 0.146026611328125, 0.16815948486328125, 0.1902923583984375, 0.21242523193359375, 0.23455810546875, 0.25669097900390625, 0.2788238525390625, 0.30095672607421875, 0.323089599609375, 0.34522247314453125, 0.3673553466796875, 0.38948822021484375, 0.41162109375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 9.0, 17.0, 23.0, 82.0, 192.0, 264.0, 209.0, 120.0, 45.0, 19.0, 7.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.52221965789795, -9.303872108459473, -9.085525512695312, -8.867177963256836, -8.648831367492676, -8.4304838180542, -8.212137222290039, -7.9937896728515625, -7.775442600250244, -7.557095527648926, -7.338748455047607, -7.120401382446289, -6.9020538330078125, -6.683706760406494, -6.465359687805176, -6.247012615203857, -6.028665542602539, -5.810318470001221, -5.591971397399902, -5.373623847961426, -5.155276775360107, -4.936929702758789, -4.718582630157471, -4.500235557556152, -4.281888008117676, -4.063540935516357, -3.84519362449646, -3.6268465518951416, -3.4084994792938232, -3.190152168273926, -2.9718050956726074, -2.753458023071289, -2.5351109504699707, -2.3167638778686523, -2.098416566848755, -1.8800694942474365, -1.6617224216461182, -1.4433752298355103, -1.2250280380249023, -1.006680965423584, -0.7883337736129761, -0.5699866414070129, -0.3516394793987274, -0.1332923173904419, 0.08505481481552124, 0.3034019470214844, 0.5217491388320923, 0.7400962114334106, 0.9584434032440186, 1.1767905950546265, 1.3951376676559448, 1.6134848594665527, 1.831831932067871, 2.0501790046691895, 2.268526315689087, 2.4868733882904053, 2.7052206993103027, 2.923567771911621, 3.1419150829315186, 3.360262155532837, 3.5786092281341553, 3.7969565391540527, 4.015303611755371, 4.2336506843566895, 4.451997756958008]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 4.0, 6.0, 10.0, 15.0, 20.0, 32.0, 45.0, 48.0, 60.0, 73.0, 87.0, 84.0, 75.0, 102.0, 75.0, 65.0, 65.0, 45.0, 20.0, 24.0, 19.0, 7.0, 10.0, 10.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4079484939575195, -2.3296139240264893, -2.251279354095459, -2.1729447841644287, -2.0946102142333984, -2.016275644302368, -1.937941074371338, -1.8596065044403076, -1.7812719345092773, -1.702937364578247, -1.6246027946472168, -1.5462682247161865, -1.4679336547851562, -1.389599084854126, -1.3112645149230957, -1.2329299449920654, -1.1545952558517456, -1.0762606859207153, -0.9979261159896851, -0.9195915460586548, -0.8412569761276245, -0.7629224061965942, -0.6845877766609192, -0.6062532067298889, -0.5279186367988586, -0.44958406686782837, -0.3712494969367981, -0.29291489720344543, -0.21458032727241516, -0.1362457573413849, -0.05791115760803223, 0.020423412322998047, 0.09875798225402832, 0.1770925521850586, 0.25542712211608887, 0.33376172184944153, 0.4120962917804718, 0.4904308617115021, 0.5687654614448547, 0.647100031375885, 0.7254346013069153, 0.8037691712379456, 0.8821037411689758, 0.9604383707046509, 1.0387729406356812, 1.1171075105667114, 1.1954420804977417, 1.273776650428772, 1.3521112203598022, 1.4304457902908325, 1.5087803602218628, 1.587114930152893, 1.6654495000839233, 1.7437840700149536, 1.8221187591552734, 1.9004533290863037, 1.978787899017334, 2.0571224689483643, 2.1354570388793945, 2.213791608810425, 2.292126178741455, 2.3704607486724854, 2.4487953186035156, 2.527129888534546, 2.605464458465576]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 6.0, 4.0, 5.0, 12.0, 16.0, 11.0, 15.0, 31.0, 33.0, 54.0, 83.0, 104.0, 146.0, 225.0, 329.0, 581.0, 1036.0, 2288.0, 8143.0, 49820.0, 496934.0, 434351.0, 42488.0, 7073.0, 2126.0, 1004.0, 531.0, 344.0, 208.0, 146.0, 100.0, 91.0, 48.0, 39.0, 30.0, 21.0, 28.0, 15.0, 12.0, 5.0, 5.0, 2.0, 3.0, 0.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.37890625, -1.3328857421875, -1.286865234375, -1.2408447265625, -1.19482421875, -1.1488037109375, -1.102783203125, -1.0567626953125, -1.0107421875, -0.9647216796875, -0.918701171875, -0.8726806640625, -0.82666015625, -0.7806396484375, -0.734619140625, -0.6885986328125, -0.642578125, -0.5965576171875, -0.550537109375, -0.5045166015625, -0.45849609375, -0.4124755859375, -0.366455078125, -0.3204345703125, -0.2744140625, -0.2283935546875, -0.182373046875, -0.1363525390625, -0.09033203125, -0.0443115234375, 0.001708984375, 0.0477294921875, 0.09375, 0.1397705078125, 0.185791015625, 0.2318115234375, 0.27783203125, 0.3238525390625, 0.369873046875, 0.4158935546875, 0.4619140625, 0.5079345703125, 0.553955078125, 0.5999755859375, 0.64599609375, 0.6920166015625, 0.738037109375, 0.7840576171875, 0.830078125, 0.8760986328125, 0.922119140625, 0.9681396484375, 1.01416015625, 1.0601806640625, 1.106201171875, 1.1522216796875, 1.1982421875, 1.2442626953125, 1.290283203125, 1.3363037109375, 1.38232421875, 1.4283447265625, 1.474365234375, 1.5203857421875, 1.56640625]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 9.0, 7.0, 13.0, 31.0, 52.0, 66.0, 96.0, 110.0, 132.0, 129.0, 126.0, 71.0, 69.0, 32.0, 22.0, 15.0, 10.0, 6.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.284912109375, -0.27809715270996094, -0.2712821960449219, -0.2644672393798828, -0.25765228271484375, -0.2508373260498047, -0.24402236938476562, -0.23720741271972656, -0.2303924560546875, -0.22357749938964844, -0.21676254272460938, -0.2099475860595703, -0.20313262939453125, -0.1963176727294922, -0.18950271606445312, -0.18268775939941406, -0.175872802734375, -0.16905784606933594, -0.16224288940429688, -0.1554279327392578, -0.14861297607421875, -0.1417980194091797, -0.13498306274414062, -0.12816810607910156, -0.1213531494140625, -0.11453819274902344, -0.10772323608398438, -0.10090827941894531, -0.09409332275390625, -0.08727836608886719, -0.08046340942382812, -0.07364845275878906, -0.06683349609375, -0.06001853942871094, -0.053203582763671875, -0.04638862609863281, -0.03957366943359375, -0.03275871276855469, -0.025943756103515625, -0.019128799438476562, -0.0123138427734375, -0.0054988861083984375, 0.001316070556640625, 0.008131027221679688, 0.01494598388671875, 0.021760940551757812, 0.028575897216796875, 0.03539085388183594, 0.042205810546875, 0.04902076721191406, 0.055835723876953125, 0.06265068054199219, 0.06946563720703125, 0.07628059387207031, 0.08309555053710938, 0.08991050720214844, 0.0967254638671875, 0.10354042053222656, 0.11035537719726562, 0.11717033386230469, 0.12398529052734375, 0.1308002471923828, 0.13761520385742188, 0.14443016052246094, 0.1512451171875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 4.0, 7.0, 12.0, 8.0, 10.0, 17.0, 26.0, 55.0, 49.0, 79.0, 131.0, 206.0, 282.0, 527.0, 1090.0, 2384.0, 6030.0, 18975.0, 67774.0, 230811.0, 417944.0, 213078.0, 61514.0, 17313.0, 5682.0, 2170.0, 970.0, 504.0, 302.0, 189.0, 111.0, 106.0, 67.0, 38.0, 28.0, 21.0, 18.0, 9.0, 5.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5419921875, -0.524200439453125, -0.50640869140625, -0.488616943359375, -0.4708251953125, -0.453033447265625, -0.43524169921875, -0.417449951171875, -0.399658203125, -0.381866455078125, -0.36407470703125, -0.346282958984375, -0.3284912109375, -0.310699462890625, -0.29290771484375, -0.275115966796875, -0.25732421875, -0.239532470703125, -0.22174072265625, -0.203948974609375, -0.1861572265625, -0.168365478515625, -0.15057373046875, -0.132781982421875, -0.114990234375, -0.097198486328125, -0.07940673828125, -0.061614990234375, -0.0438232421875, -0.026031494140625, -0.00823974609375, 0.009552001953125, 0.02734375, 0.045135498046875, 0.06292724609375, 0.080718994140625, 0.0985107421875, 0.116302490234375, 0.13409423828125, 0.151885986328125, 0.169677734375, 0.187469482421875, 0.20526123046875, 0.223052978515625, 0.2408447265625, 0.258636474609375, 0.27642822265625, 0.294219970703125, 0.31201171875, 0.329803466796875, 0.34759521484375, 0.365386962890625, 0.3831787109375, 0.400970458984375, 0.41876220703125, 0.436553955078125, 0.454345703125, 0.472137451171875, 0.48992919921875, 0.507720947265625, 0.5255126953125, 0.543304443359375, 0.56109619140625, 0.578887939453125, 0.5966796875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 8.0, 7.0, 5.0, 6.0, 15.0, 11.0, 16.0, 20.0, 16.0, 19.0, 22.0, 21.0, 34.0, 34.0, 43.0, 34.0, 44.0, 56.0, 48.0, 49.0, 42.0, 44.0, 43.0, 46.0, 47.0, 41.0, 23.0, 36.0, 24.0, 39.0, 12.0, 22.0, 10.0, 8.0, 19.0, 6.0, 11.0, 5.0, 9.0, 4.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.375, -0.36243438720703125, -0.3498687744140625, -0.33730316162109375, -0.324737548828125, -0.31217193603515625, -0.2996063232421875, -0.28704071044921875, -0.27447509765625, -0.26190948486328125, -0.2493438720703125, -0.23677825927734375, -0.224212646484375, -0.21164703369140625, -0.1990814208984375, -0.18651580810546875, -0.1739501953125, -0.16138458251953125, -0.1488189697265625, -0.13625335693359375, -0.123687744140625, -0.11112213134765625, -0.0985565185546875, -0.08599090576171875, -0.07342529296875, -0.06085968017578125, -0.0482940673828125, -0.03572845458984375, -0.023162841796875, -0.01059722900390625, 0.0019683837890625, 0.01453399658203125, 0.027099609375, 0.03966522216796875, 0.0522308349609375, 0.06479644775390625, 0.077362060546875, 0.08992767333984375, 0.1024932861328125, 0.11505889892578125, 0.12762451171875, 0.14019012451171875, 0.1527557373046875, 0.16532135009765625, 0.177886962890625, 0.19045257568359375, 0.2030181884765625, 0.21558380126953125, 0.2281494140625, 0.24071502685546875, 0.2532806396484375, 0.26584625244140625, 0.278411865234375, 0.29097747802734375, 0.3035430908203125, 0.31610870361328125, 0.32867431640625, 0.34123992919921875, 0.3538055419921875, 0.36637115478515625, 0.378936767578125, 0.39150238037109375, 0.4040679931640625, 0.41663360595703125, 0.42919921875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 7.0, 4.0, 8.0, 8.0, 11.0, 22.0, 20.0, 32.0, 40.0, 89.0, 137.0, 189.0, 367.0, 689.0, 1485.0, 3825.0, 12736.0, 53562.0, 243581.0, 473372.0, 199019.0, 42895.0, 10389.0, 3252.0, 1288.0, 654.0, 300.0, 200.0, 119.0, 77.0, 52.0, 38.0, 26.0, 15.0, 15.0, 14.0, 8.0, 6.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.332763671875, -0.3218269348144531, -0.31089019775390625, -0.2999534606933594, -0.2890167236328125, -0.2780799865722656, -0.26714324951171875, -0.2562065124511719, -0.245269775390625, -0.23433303833007812, -0.22339630126953125, -0.21245956420898438, -0.2015228271484375, -0.19058609008789062, -0.17964935302734375, -0.16871261596679688, -0.15777587890625, -0.14683914184570312, -0.13590240478515625, -0.12496566772460938, -0.1140289306640625, -0.10309219360351562, -0.09215545654296875, -0.08121871948242188, -0.070281982421875, -0.059345245361328125, -0.04840850830078125, -0.037471771240234375, -0.0265350341796875, -0.015598297119140625, -0.00466156005859375, 0.006275177001953125, 0.0172119140625, 0.028148651123046875, 0.03908538818359375, 0.050022125244140625, 0.0609588623046875, 0.07189559936523438, 0.08283233642578125, 0.09376907348632812, 0.104705810546875, 0.11564254760742188, 0.12657928466796875, 0.13751602172851562, 0.1484527587890625, 0.15938949584960938, 0.17032623291015625, 0.18126296997070312, 0.19219970703125, 0.20313644409179688, 0.21407318115234375, 0.22500991821289062, 0.2359466552734375, 0.24688339233398438, 0.25782012939453125, 0.2687568664550781, 0.279693603515625, 0.2906303405761719, 0.30156707763671875, 0.3125038146972656, 0.3234405517578125, 0.3343772888183594, 0.34531402587890625, 0.3562507629394531, 0.3671875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 3.0, 12.0, 13.0, 15.0, 15.0, 30.0, 52.0, 38.0, 72.0, 91.0, 91.0, 97.0, 87.0, 92.0, 85.0, 59.0, 40.0, 35.0, 18.0, 18.0, 13.0, 11.0, 5.0, 10.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001112222671508789, -0.00010801013559103012, -0.00010479800403118134, -0.00010158587247133255, -9.837374091148376e-05, -9.516160935163498e-05, -9.19494777917862e-05, -8.873734623193741e-05, -8.552521467208862e-05, -8.231308311223984e-05, -7.910095155239105e-05, -7.588881999254227e-05, -7.267668843269348e-05, -6.94645568728447e-05, -6.625242531299591e-05, -6.304029375314713e-05, -5.982816219329834e-05, -5.6616030633449554e-05, -5.340389907360077e-05, -5.0191767513751984e-05, -4.69796359539032e-05, -4.376750439405441e-05, -4.055537283420563e-05, -3.734324127435684e-05, -3.413110971450806e-05, -3.091897815465927e-05, -2.7706846594810486e-05, -2.44947150349617e-05, -2.1282583475112915e-05, -1.807045191526413e-05, -1.4858320355415344e-05, -1.1646188795566559e-05, -8.434057235717773e-06, -5.221925675868988e-06, -2.0097941160202026e-06, 1.2023374438285828e-06, 4.414469003677368e-06, 7.6266005635261536e-06, 1.0838732123374939e-05, 1.4050863683223724e-05, 1.726299524307251e-05, 2.0475126802921295e-05, 2.368725836277008e-05, 2.6899389922618866e-05, 3.011152148246765e-05, 3.332365304231644e-05, 3.653578460216522e-05, 3.974791616201401e-05, 4.296004772186279e-05, 4.617217928171158e-05, 4.9384310841560364e-05, 5.259644240140915e-05, 5.5808573961257935e-05, 5.902070552110672e-05, 6.22328370809555e-05, 6.544496864080429e-05, 6.865710020065308e-05, 7.186923176050186e-05, 7.508136332035065e-05, 7.829349488019943e-05, 8.150562644004822e-05, 8.4717757999897e-05, 8.792988955974579e-05, 9.114202111959457e-05, 9.435415267944336e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 6.0, 3.0, 11.0, 28.0, 34.0, 53.0, 79.0, 172.0, 397.0, 1009.0, 3482.0, 21691.0, 311556.0, 637844.0, 62870.0, 6701.0, 1582.0, 561.0, 218.0, 123.0, 72.0, 32.0, 13.0, 12.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.79931640625, -0.7799186706542969, -0.7605209350585938, -0.7411231994628906, -0.7217254638671875, -0.7023277282714844, -0.6829299926757812, -0.6635322570800781, -0.644134521484375, -0.6247367858886719, -0.6053390502929688, -0.5859413146972656, -0.5665435791015625, -0.5471458435058594, -0.5277481079101562, -0.5083503723144531, -0.48895263671875, -0.4695549011230469, -0.45015716552734375, -0.4307594299316406, -0.4113616943359375, -0.3919639587402344, -0.37256622314453125, -0.3531684875488281, -0.333770751953125, -0.3143730163574219, -0.29497528076171875, -0.2755775451660156, -0.2561798095703125, -0.23678207397460938, -0.21738433837890625, -0.19798660278320312, -0.1785888671875, -0.15919113159179688, -0.13979339599609375, -0.12039566040039062, -0.1009979248046875, -0.08160018920898438, -0.06220245361328125, -0.042804718017578125, -0.023406982421875, -0.004009246826171875, 0.01538848876953125, 0.034786224365234375, 0.0541839599609375, 0.07358169555664062, 0.09297943115234375, 0.11237716674804688, 0.13177490234375, 0.15117263793945312, 0.17057037353515625, 0.18996810913085938, 0.2093658447265625, 0.22876358032226562, 0.24816131591796875, 0.2675590515136719, 0.286956787109375, 0.3063545227050781, 0.32575225830078125, 0.3451499938964844, 0.3645477294921875, 0.3839454650878906, 0.40334320068359375, 0.4227409362792969, 0.442138671875]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 6.0, 5.0, 6.0, 8.0, 24.0, 35.0, 32.0, 59.0, 91.0, 114.0, 112.0, 139.0, 112.0, 102.0, 69.0, 35.0, 24.0, 21.0, 12.0, 5.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.19970703125, -0.18547821044921875, -0.1712493896484375, -0.15702056884765625, -0.142791748046875, -0.12856292724609375, -0.1143341064453125, -0.10010528564453125, -0.08587646484375, -0.07164764404296875, -0.0574188232421875, -0.04319000244140625, -0.028961181640625, -0.01473236083984375, -0.0005035400390625, 0.01372528076171875, 0.0279541015625, 0.04218292236328125, 0.0564117431640625, 0.07064056396484375, 0.084869384765625, 0.09909820556640625, 0.1133270263671875, 0.12755584716796875, 0.14178466796875, 0.15601348876953125, 0.1702423095703125, 0.18447113037109375, 0.198699951171875, 0.21292877197265625, 0.2271575927734375, 0.24138641357421875, 0.255615234375, 0.26984405517578125, 0.2840728759765625, 0.29830169677734375, 0.312530517578125, 0.32675933837890625, 0.3409881591796875, 0.35521697998046875, 0.36944580078125, 0.38367462158203125, 0.3979034423828125, 0.41213226318359375, 0.426361083984375, 0.44058990478515625, 0.4548187255859375, 0.46904754638671875, 0.4832763671875, 0.49750518798828125, 0.5117340087890625, 0.5259628295898438, 0.540191650390625, 0.5544204711914062, 0.5686492919921875, 0.5828781127929688, 0.59710693359375, 0.6113357543945312, 0.6255645751953125, 0.6397933959960938, 0.654022216796875, 0.6682510375976562, 0.6824798583984375, 0.6967086791992188, 0.7109375]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 13.0, 20.0, 73.0, 175.0, 242.0, 237.0, 163.0, 50.0, 21.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.493064880371094, -10.223384857177734, -9.953704833984375, -9.684024810791016, -9.414344787597656, -9.144664764404297, -8.874984741210938, -8.605304718017578, -8.335624694824219, -8.06594467163086, -7.7962646484375, -7.526584625244141, -7.256904602050781, -6.987224578857422, -6.7175445556640625, -6.447864532470703, -6.178184509277344, -5.908504486083984, -5.638824462890625, -5.369144439697266, -5.099464416503906, -4.829784393310547, -4.5601043701171875, -4.290424346923828, -4.020744323730469, -3.7510643005371094, -3.48138427734375, -3.2117042541503906, -2.9420242309570312, -2.672344207763672, -2.4026641845703125, -2.132984161376953, -1.8633041381835938, -1.5936241149902344, -1.323944091796875, -1.0542640686035156, -0.7845840454101562, -0.5149040222167969, -0.2452239990234375, 0.024456024169921875, 0.29413604736328125, 0.5638160705566406, 0.83349609375, 1.1031761169433594, 1.3728561401367188, 1.6425361633300781, 1.9122161865234375, 2.181896209716797, 2.4515762329101562, 2.7212562561035156, 2.990936279296875, 3.2606163024902344, 3.5302963256835938, 3.799976348876953, 4.0696563720703125, 4.339336395263672, 4.609016418457031, 4.878696441650391, 5.14837646484375, 5.418056488037109, 5.687736511230469, 5.957416534423828, 6.2270965576171875, 6.496776580810547, 6.766456604003906]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 7.0, 8.0, 6.0, 13.0, 11.0, 19.0, 13.0, 27.0, 29.0, 30.0, 46.0, 50.0, 44.0, 57.0, 62.0, 69.0, 50.0, 59.0, 57.0, 68.0, 51.0, 42.0, 38.0, 35.0, 26.0, 27.0, 13.0, 12.0, 11.0, 5.0, 4.0, 9.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0462570190429688, -2.949765920639038, -2.8532748222351074, -2.7567834854125977, -2.660292387008667, -2.5638012886047363, -2.4673101902008057, -2.370819091796875, -2.2743279933929443, -2.1778368949890137, -2.081345796585083, -1.9848545789718628, -1.8883634805679321, -1.791872262954712, -1.6953811645507812, -1.5988900661468506, -1.5023988485336304, -1.4059077501296997, -1.3094165325164795, -1.2129254341125488, -1.1164343357086182, -1.0199432373046875, -0.9234520196914673, -0.8269609212875366, -0.7304697632789612, -0.6339786052703857, -0.5374875068664551, -0.44099634885787964, -0.3445052206516266, -0.24801409244537354, -0.1515229344367981, -0.05503183603286743, 0.04145932197570801, 0.13795045018196106, 0.2344415932893753, 0.33093273639678955, 0.4274238646030426, 0.5239149928092957, 0.6204061508178711, 0.7168972492218018, 0.8133884072303772, 0.9098795652389526, 1.0063706636428833, 1.1028618812561035, 1.1993529796600342, 1.2958440780639648, 1.3923351764678955, 1.4888262748718262, 1.5853174924850464, 1.681808590888977, 1.7782998085021973, 1.874790906906128, 1.9712820053100586, 2.0677731037139893, 2.16426420211792, 2.2607555389404297, 2.3572466373443604, 2.453737735748291, 2.5502288341522217, 2.6467199325561523, 2.743211269378662, 2.8397023677825928, 2.9361934661865234, 3.032684564590454, 3.1291756629943848]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 6.0, 7.0, 5.0, 11.0, 7.0, 14.0, 18.0, 12.0, 27.0, 32.0, 46.0, 50.0, 77.0, 100.0, 145.0, 200.0, 326.0, 447.0, 798.0, 1893.0, 6126.0, 34675.0, 1125523.0, 2970950.0, 41162.0, 7091.0, 2151.0, 945.0, 485.0, 315.0, 191.0, 133.0, 69.0, 70.0, 53.0, 37.0, 17.0, 19.0, 11.0, 6.0, 8.0, 5.0, 5.0, 6.0, 0.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.078125, -2.0170745849609375, -1.956024169921875, -1.8949737548828125, -1.83392333984375, -1.7728729248046875, -1.711822509765625, -1.6507720947265625, -1.5897216796875, -1.5286712646484375, -1.467620849609375, -1.4065704345703125, -1.34552001953125, -1.2844696044921875, -1.223419189453125, -1.1623687744140625, -1.101318359375, -1.0402679443359375, -0.979217529296875, -0.9181671142578125, -0.85711669921875, -0.7960662841796875, -0.735015869140625, -0.6739654541015625, -0.6129150390625, -0.5518646240234375, -0.490814208984375, -0.4297637939453125, -0.36871337890625, -0.3076629638671875, -0.246612548828125, -0.1855621337890625, -0.12451171875, -0.0634613037109375, -0.002410888671875, 0.0586395263671875, 0.11968994140625, 0.1807403564453125, 0.241790771484375, 0.3028411865234375, 0.3638916015625, 0.4249420166015625, 0.485992431640625, 0.5470428466796875, 0.60809326171875, 0.6691436767578125, 0.730194091796875, 0.7912445068359375, 0.852294921875, 0.9133453369140625, 0.974395751953125, 1.0354461669921875, 1.09649658203125, 1.1575469970703125, 1.218597412109375, 1.2796478271484375, 1.3406982421875, 1.4017486572265625, 1.462799072265625, 1.5238494873046875, 1.58489990234375, 1.6459503173828125, 1.707000732421875, 1.7680511474609375, 1.8291015625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 7.0, 12.0, 23.0, 29.0, 45.0, 59.0, 75.0, 102.0, 108.0, 112.0, 116.0, 97.0, 62.0, 50.0, 34.0, 33.0, 19.0, 7.0, 5.0, 4.0, 3.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.269287109375, -0.2627716064453125, -0.256256103515625, -0.2497406005859375, -0.24322509765625, -0.2367095947265625, -0.230194091796875, -0.2236785888671875, -0.2171630859375, -0.2106475830078125, -0.204132080078125, -0.1976165771484375, -0.19110107421875, -0.1845855712890625, -0.178070068359375, -0.1715545654296875, -0.1650390625, -0.1585235595703125, -0.152008056640625, -0.1454925537109375, -0.13897705078125, -0.1324615478515625, -0.125946044921875, -0.1194305419921875, -0.1129150390625, -0.1063995361328125, -0.099884033203125, -0.0933685302734375, -0.08685302734375, -0.0803375244140625, -0.073822021484375, -0.0673065185546875, -0.060791015625, -0.0542755126953125, -0.047760009765625, -0.0412445068359375, -0.03472900390625, -0.0282135009765625, -0.021697998046875, -0.0151824951171875, -0.0086669921875, -0.0021514892578125, 0.004364013671875, 0.0108795166015625, 0.01739501953125, 0.0239105224609375, 0.030426025390625, 0.0369415283203125, 0.04345703125, 0.0499725341796875, 0.056488037109375, 0.0630035400390625, 0.06951904296875, 0.0760345458984375, 0.082550048828125, 0.0890655517578125, 0.0955810546875, 0.1020965576171875, 0.108612060546875, 0.1151275634765625, 0.12164306640625, 0.1281585693359375, 0.134674072265625, 0.1411895751953125, 0.147705078125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 3.0, 3.0, 10.0, 15.0, 14.0, 19.0, 30.0, 57.0, 99.0, 138.0, 242.0, 419.0, 903.0, 2599.0, 9205.0, 44810.0, 512200.0, 3480231.0, 116132.0, 19412.0, 4903.0, 1457.0, 603.0, 278.0, 178.0, 99.0, 79.0, 37.0, 32.0, 28.0, 13.0, 12.0, 8.0, 3.0, 5.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.2001953125, -1.1629638671875, -1.125732421875, -1.0885009765625, -1.05126953125, -1.0140380859375, -0.976806640625, -0.9395751953125, -0.90234375, -0.8651123046875, -0.827880859375, -0.7906494140625, -0.75341796875, -0.7161865234375, -0.678955078125, -0.6417236328125, -0.6044921875, -0.5672607421875, -0.530029296875, -0.4927978515625, -0.45556640625, -0.4183349609375, -0.381103515625, -0.3438720703125, -0.306640625, -0.2694091796875, -0.232177734375, -0.1949462890625, -0.15771484375, -0.1204833984375, -0.083251953125, -0.0460205078125, -0.0087890625, 0.0284423828125, 0.065673828125, 0.1029052734375, 0.14013671875, 0.1773681640625, 0.214599609375, 0.2518310546875, 0.2890625, 0.3262939453125, 0.363525390625, 0.4007568359375, 0.43798828125, 0.4752197265625, 0.512451171875, 0.5496826171875, 0.5869140625, 0.6241455078125, 0.661376953125, 0.6986083984375, 0.73583984375, 0.7730712890625, 0.810302734375, 0.8475341796875, 0.884765625, 0.9219970703125, 0.959228515625, 0.9964599609375, 1.03369140625, 1.0709228515625, 1.108154296875, 1.1453857421875, 1.1826171875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 7.0, 9.0, 14.0, 11.0, 30.0, 32.0, 74.0, 136.0, 271.0, 683.0, 1763.0, 508.0, 215.0, 110.0, 77.0, 53.0, 23.0, 25.0, 14.0, 9.0, 7.0, 7.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.333984375, -0.32131195068359375, -0.3086395263671875, -0.29596710205078125, -0.283294677734375, -0.27062225341796875, -0.2579498291015625, -0.24527740478515625, -0.23260498046875, -0.21993255615234375, -0.2072601318359375, -0.19458770751953125, -0.181915283203125, -0.16924285888671875, -0.1565704345703125, -0.14389801025390625, -0.1312255859375, -0.11855316162109375, -0.1058807373046875, -0.09320831298828125, -0.080535888671875, -0.06786346435546875, -0.0551910400390625, -0.04251861572265625, -0.02984619140625, -0.01717376708984375, -0.0045013427734375, 0.00817108154296875, 0.020843505859375, 0.03351593017578125, 0.0461883544921875, 0.05886077880859375, 0.071533203125, 0.08420562744140625, 0.0968780517578125, 0.10955047607421875, 0.122222900390625, 0.13489532470703125, 0.1475677490234375, 0.16024017333984375, 0.17291259765625, 0.18558502197265625, 0.1982574462890625, 0.21092987060546875, 0.223602294921875, 0.23627471923828125, 0.2489471435546875, 0.26161956787109375, 0.2742919921875, 0.28696441650390625, 0.2996368408203125, 0.31230926513671875, 0.324981689453125, 0.33765411376953125, 0.3503265380859375, 0.36299896240234375, 0.37567138671875, 0.38834381103515625, 0.4010162353515625, 0.41368865966796875, 0.426361083984375, 0.43903350830078125, 0.4517059326171875, 0.46437835693359375, 0.47705078125]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 7.0, 14.0, 25.0, 51.0, 86.0, 166.0, 191.0, 153.0, 124.0, 83.0, 46.0, 28.0, 6.0, 8.0, 2.0, 10.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3724277019500732, -2.2828047275543213, -2.1931819915771484, -2.1035590171813965, -2.0139360427856445, -1.9243131875991821, -1.8346903324127197, -1.7450673580169678, -1.6554445028305054, -1.565821647644043, -1.476198673248291, -1.3865758180618286, -1.2969529628753662, -1.2073299884796143, -1.1177071332931519, -1.0280842781066895, -0.9384613037109375, -0.8488383889198303, -0.7592154741287231, -0.6695926189422607, -0.5799697041511536, -0.4903467893600464, -0.400723934173584, -0.3111010193824768, -0.22147810459136963, -0.13185520470142365, -0.04223230481147766, 0.04739058017730713, 0.1370134949684143, 0.22663640975952148, 0.3162592649459839, 0.40588217973709106, 0.49550509452819824, 0.5851280093193054, 0.6747509241104126, 0.764373779296875, 0.8539966940879822, 0.9436196088790894, 1.0332424640655518, 1.1228654384613037, 1.2124882936477661, 1.3021111488342285, 1.3917341232299805, 1.4813569784164429, 1.5709798336029053, 1.6606028079986572, 1.7502256631851196, 1.839848518371582, 1.929471492767334, 2.019094467163086, 2.108717203140259, 2.1983401775360107, 2.2879631519317627, 2.3775858879089355, 2.4672088623046875, 2.5568318367004395, 2.6464548110961914, 2.7360777854919434, 2.825700521469116, 2.915323495864868, 3.00494647026062, 3.094569206237793, 3.184192180633545, 3.273815155029297, 3.3634378910064697]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 9.0, 9.0, 9.0, 17.0, 17.0, 25.0, 22.0, 30.0, 35.0, 35.0, 43.0, 50.0, 56.0, 57.0, 54.0, 61.0, 69.0, 59.0, 51.0, 56.0, 39.0, 41.0, 38.0, 24.0, 33.0, 16.0, 15.0, 11.0, 8.0, 10.0, 2.0, 3.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2504863739013672, -1.208534598350525, -1.166582703590393, -1.1246309280395508, -1.082679033279419, -1.0407272577285767, -0.9987754225730896, -0.9568235874176025, -0.9148717522621155, -0.8729199171066284, -0.8309680819511414, -0.7890162467956543, -0.747064471244812, -0.7051125764846802, -0.6631608009338379, -0.6212089657783508, -0.5792571306228638, -0.5373052954673767, -0.49535346031188965, -0.453401654958725, -0.4114498198032379, -0.36949798464775085, -0.3275461792945862, -0.2855943441390991, -0.24364250898361206, -0.201690673828125, -0.15973885357379913, -0.11778703331947327, -0.0758351981639862, -0.033883363008499146, 0.008068442344665527, 0.05002027750015259, 0.09197211265563965, 0.1339239478111267, 0.17587576806545258, 0.21782758831977844, 0.2597794234752655, 0.30173125863075256, 0.34368306398391724, 0.3856348991394043, 0.42758673429489136, 0.4695385694503784, 0.5114904046058655, 0.5534422397613525, 0.5953940153121948, 0.6373459100723267, 0.679297685623169, 0.721249520778656, 0.7632013559341431, 0.8051531910896301, 0.8471050262451172, 0.8890568017959595, 0.9310086965560913, 0.9729604721069336, 1.0149123668670654, 1.0568641424179077, 1.09881591796875, 1.1407676935195923, 1.1827195882797241, 1.2246713638305664, 1.2666232585906982, 1.3085750341415405, 1.3505268096923828, 1.3924787044525146, 1.4344305992126465]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 5.0, 9.0, 6.0, 10.0, 12.0, 15.0, 32.0, 29.0, 43.0, 66.0, 103.0, 130.0, 229.0, 365.0, 608.0, 1338.0, 3888.0, 20911.0, 260750.0, 678350.0, 69107.0, 8355.0, 2150.0, 815.0, 441.0, 264.0, 138.0, 130.0, 74.0, 57.0, 30.0, 18.0, 23.0, 15.0, 10.0, 4.0, 11.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.763671875, -1.71063232421875, -1.6575927734375, -1.60455322265625, -1.551513671875, -1.49847412109375, -1.4454345703125, -1.39239501953125, -1.33935546875, -1.28631591796875, -1.2332763671875, -1.18023681640625, -1.127197265625, -1.07415771484375, -1.0211181640625, -0.96807861328125, -0.9150390625, -0.86199951171875, -0.8089599609375, -0.75592041015625, -0.702880859375, -0.64984130859375, -0.5968017578125, -0.54376220703125, -0.49072265625, -0.43768310546875, -0.3846435546875, -0.33160400390625, -0.278564453125, -0.22552490234375, -0.1724853515625, -0.11944580078125, -0.06640625, -0.01336669921875, 0.0396728515625, 0.09271240234375, 0.145751953125, 0.19879150390625, 0.2518310546875, 0.30487060546875, 0.35791015625, 0.41094970703125, 0.4639892578125, 0.51702880859375, 0.570068359375, 0.62310791015625, 0.6761474609375, 0.72918701171875, 0.7822265625, 0.83526611328125, 0.8883056640625, 0.94134521484375, 0.994384765625, 1.04742431640625, 1.1004638671875, 1.15350341796875, 1.20654296875, 1.25958251953125, 1.3126220703125, 1.36566162109375, 1.418701171875, 1.47174072265625, 1.5247802734375, 1.57781982421875, 1.630859375]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 4.0, 8.0, 13.0, 16.0, 39.0, 55.0, 69.0, 67.0, 93.0, 114.0, 97.0, 118.0, 75.0, 69.0, 47.0, 37.0, 39.0, 14.0, 11.0, 8.0, 6.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.264404296875, -0.25766563415527344, -0.2509269714355469, -0.2441883087158203, -0.23744964599609375, -0.2307109832763672, -0.22397232055664062, -0.21723365783691406, -0.2104949951171875, -0.20375633239746094, -0.19701766967773438, -0.1902790069580078, -0.18354034423828125, -0.1768016815185547, -0.17006301879882812, -0.16332435607910156, -0.156585693359375, -0.14984703063964844, -0.14310836791992188, -0.1363697052001953, -0.12963104248046875, -0.12289237976074219, -0.11615371704101562, -0.10941505432128906, -0.1026763916015625, -0.09593772888183594, -0.08919906616210938, -0.08246040344238281, -0.07572174072265625, -0.06898307800292969, -0.062244415283203125, -0.05550575256347656, -0.04876708984375, -0.04202842712402344, -0.035289764404296875, -0.028551101684570312, -0.02181243896484375, -0.015073776245117188, -0.008335113525390625, -0.0015964508056640625, 0.0051422119140625, 0.011880874633789062, 0.018619537353515625, 0.025358200073242188, 0.03209686279296875, 0.03883552551269531, 0.045574188232421875, 0.05231285095214844, 0.059051513671875, 0.06579017639160156, 0.07252883911132812, 0.07926750183105469, 0.08600616455078125, 0.09274482727050781, 0.09948348999023438, 0.10622215270996094, 0.1129608154296875, 0.11969947814941406, 0.12643814086914062, 0.1331768035888672, 0.13991546630859375, 0.1466541290283203, 0.15339279174804688, 0.16013145446777344, 0.1668701171875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 2.0, 6.0, 4.0, 9.0, 15.0, 14.0, 11.0, 19.0, 34.0, 35.0, 46.0, 70.0, 110.0, 146.0, 208.0, 355.0, 598.0, 1084.0, 2602.0, 7035.0, 23461.0, 89649.0, 324858.0, 411001.0, 135891.0, 34574.0, 9980.0, 3304.0, 1447.0, 681.0, 404.0, 257.0, 196.0, 126.0, 94.0, 65.0, 37.0, 47.0, 23.0, 13.0, 11.0, 9.0, 7.0, 8.0, 3.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.6533203125, -0.6323471069335938, -0.6113739013671875, -0.5904006958007812, -0.569427490234375, -0.5484542846679688, -0.5274810791015625, -0.5065078735351562, -0.48553466796875, -0.46456146240234375, -0.4435882568359375, -0.42261505126953125, -0.401641845703125, -0.38066864013671875, -0.3596954345703125, -0.33872222900390625, -0.3177490234375, -0.29677581787109375, -0.2758026123046875, -0.25482940673828125, -0.233856201171875, -0.21288299560546875, -0.1919097900390625, -0.17093658447265625, -0.14996337890625, -0.12899017333984375, -0.1080169677734375, -0.08704376220703125, -0.066070556640625, -0.04509735107421875, -0.0241241455078125, -0.00315093994140625, 0.017822265625, 0.03879547119140625, 0.0597686767578125, 0.08074188232421875, 0.101715087890625, 0.12268829345703125, 0.1436614990234375, 0.16463470458984375, 0.18560791015625, 0.20658111572265625, 0.2275543212890625, 0.24852752685546875, 0.269500732421875, 0.29047393798828125, 0.3114471435546875, 0.33242034912109375, 0.3533935546875, 0.37436676025390625, 0.3953399658203125, 0.41631317138671875, 0.437286376953125, 0.45825958251953125, 0.4792327880859375, 0.5002059936523438, 0.52117919921875, 0.5421524047851562, 0.5631256103515625, 0.5840988159179688, 0.605072021484375, 0.6260452270507812, 0.6470184326171875, 0.6679916381835938, 0.68896484375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 7.0, 9.0, 10.0, 14.0, 25.0, 24.0, 26.0, 28.0, 37.0, 39.0, 45.0, 35.0, 51.0, 44.0, 44.0, 64.0, 45.0, 64.0, 47.0, 37.0, 43.0, 29.0, 48.0, 28.0, 32.0, 23.0, 22.0, 16.0, 19.0, 12.0, 8.0, 8.0, 4.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.474365234375, -0.4579505920410156, -0.44153594970703125, -0.4251213073730469, -0.4087066650390625, -0.3922920227050781, -0.37587738037109375, -0.3594627380371094, -0.343048095703125, -0.3266334533691406, -0.31021881103515625, -0.2938041687011719, -0.2773895263671875, -0.2609748840332031, -0.24456024169921875, -0.22814559936523438, -0.21173095703125, -0.19531631469726562, -0.17890167236328125, -0.16248703002929688, -0.1460723876953125, -0.12965774536132812, -0.11324310302734375, -0.09682846069335938, -0.080413818359375, -0.06399917602539062, -0.04758453369140625, -0.031169891357421875, -0.0147552490234375, 0.001659393310546875, 0.01807403564453125, 0.034488677978515625, 0.0509033203125, 0.06731796264648438, 0.08373260498046875, 0.10014724731445312, 0.1165618896484375, 0.13297653198242188, 0.14939117431640625, 0.16580581665039062, 0.182220458984375, 0.19863510131835938, 0.21504974365234375, 0.23146438598632812, 0.2478790283203125, 0.2642936706542969, 0.28070831298828125, 0.2971229553222656, 0.31353759765625, 0.3299522399902344, 0.34636688232421875, 0.3627815246582031, 0.3791961669921875, 0.3956108093261719, 0.41202545166015625, 0.4284400939941406, 0.444854736328125, 0.4612693786621094, 0.47768402099609375, 0.4940986633300781, 0.5105133056640625, 0.5269279479980469, 0.5433425903320312, 0.5597572326660156, 0.576171875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 3.0, 1.0, 5.0, 6.0, 10.0, 11.0, 17.0, 17.0, 53.0, 59.0, 100.0, 189.0, 264.0, 495.0, 956.0, 2007.0, 4832.0, 13599.0, 46829.0, 181553.0, 429887.0, 264842.0, 71769.0, 19240.0, 6529.0, 2555.0, 1259.0, 596.0, 357.0, 195.0, 102.0, 77.0, 48.0, 27.0, 18.0, 14.0, 7.0, 5.0, 7.0, 5.0, 1.0, 1.0, 3.0, 3.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.27099609375, -0.2615394592285156, -0.25208282470703125, -0.24262619018554688, -0.2331695556640625, -0.22371292114257812, -0.21425628662109375, -0.20479965209960938, -0.195343017578125, -0.18588638305664062, -0.17642974853515625, -0.16697311401367188, -0.1575164794921875, -0.14805984497070312, -0.13860321044921875, -0.12914657592773438, -0.11968994140625, -0.11023330688476562, -0.10077667236328125, -0.09132003784179688, -0.0818634033203125, -0.07240676879882812, -0.06295013427734375, -0.053493499755859375, -0.044036865234375, -0.034580230712890625, -0.02512359619140625, -0.015666961669921875, -0.0062103271484375, 0.003246307373046875, 0.01270294189453125, 0.022159576416015625, 0.0316162109375, 0.041072845458984375, 0.05052947998046875, 0.059986114501953125, 0.0694427490234375, 0.07889938354492188, 0.08835601806640625, 0.09781265258789062, 0.107269287109375, 0.11672592163085938, 0.12618255615234375, 0.13563919067382812, 0.1450958251953125, 0.15455245971679688, 0.16400909423828125, 0.17346572875976562, 0.18292236328125, 0.19237899780273438, 0.20183563232421875, 0.21129226684570312, 0.2207489013671875, 0.23020553588867188, 0.23966217041015625, 0.24911880493164062, 0.258575439453125, 0.2680320739746094, 0.27748870849609375, 0.2869453430175781, 0.2964019775390625, 0.3058586120605469, 0.31531524658203125, 0.3247718811035156, 0.334228515625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 8.0, 2.0, 1.0, 1.0, 3.0, 9.0, 7.0, 8.0, 10.0, 4.0, 7.0, 13.0, 8.0, 12.0, 15.0, 20.0, 30.0, 32.0, 53.0, 55.0, 66.0, 61.0, 68.0, 79.0, 91.0, 70.0, 46.0, 35.0, 31.0, 24.0, 23.0, 17.0, 24.0, 10.0, 10.0, 5.0, 7.0, 7.0, 8.0, 7.0, 5.0, 3.0, 4.0, 4.0, 5.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.547306060791016e-05, -8.297152817249298e-05, -8.04699957370758e-05, -7.796846330165863e-05, -7.546693086624146e-05, -7.296539843082428e-05, -7.04638659954071e-05, -6.796233355998993e-05, -6.546080112457275e-05, -6.295926868915558e-05, -6.04577362537384e-05, -5.795620381832123e-05, -5.545467138290405e-05, -5.295313894748688e-05, -5.04516065120697e-05, -4.795007407665253e-05, -4.544854164123535e-05, -4.2947009205818176e-05, -4.0445476770401e-05, -3.7943944334983826e-05, -3.544241189956665e-05, -3.2940879464149475e-05, -3.04393470287323e-05, -2.7937814593315125e-05, -2.543628215789795e-05, -2.2934749722480774e-05, -2.04332172870636e-05, -1.7931684851646423e-05, -1.5430152416229248e-05, -1.2928619980812073e-05, -1.0427087545394897e-05, -7.925555109977722e-06, -5.424022674560547e-06, -2.9224902391433716e-06, -4.209578037261963e-07, 2.080574631690979e-06, 4.582107067108154e-06, 7.08363950252533e-06, 9.585171937942505e-06, 1.208670437335968e-05, 1.4588236808776855e-05, 1.708976924419403e-05, 1.9591301679611206e-05, 2.209283411502838e-05, 2.4594366550445557e-05, 2.7095898985862732e-05, 2.9597431421279907e-05, 3.209896385669708e-05, 3.460049629211426e-05, 3.710202872753143e-05, 3.960356116294861e-05, 4.2105093598365784e-05, 4.460662603378296e-05, 4.7108158469200134e-05, 4.960969090461731e-05, 5.2111223340034485e-05, 5.461275577545166e-05, 5.7114288210868835e-05, 5.961582064628601e-05, 6.211735308170319e-05, 6.461888551712036e-05, 6.712041795253754e-05, 6.962195038795471e-05, 7.212348282337189e-05, 7.462501525878906e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 4.0, 2.0, 6.0, 11.0, 11.0, 18.0, 29.0, 32.0, 60.0, 75.0, 130.0, 180.0, 335.0, 558.0, 1227.0, 2825.0, 9017.0, 38178.0, 204187.0, 519927.0, 215845.0, 40807.0, 9389.0, 2976.0, 1254.0, 595.0, 300.0, 208.0, 109.0, 76.0, 48.0, 45.0, 27.0, 13.0, 13.0, 7.0, 10.0, 2.0, 4.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.341064453125, -0.3295097351074219, -0.31795501708984375, -0.3064002990722656, -0.2948455810546875, -0.2832908630371094, -0.27173614501953125, -0.2601814270019531, -0.248626708984375, -0.23707199096679688, -0.22551727294921875, -0.21396255493164062, -0.2024078369140625, -0.19085311889648438, -0.17929840087890625, -0.16774368286132812, -0.15618896484375, -0.14463424682617188, -0.13307952880859375, -0.12152481079101562, -0.1099700927734375, -0.09841537475585938, -0.08686065673828125, -0.07530593872070312, -0.063751220703125, -0.052196502685546875, -0.04064178466796875, -0.029087066650390625, -0.0175323486328125, -0.005977630615234375, 0.00557708740234375, 0.017131805419921875, 0.0286865234375, 0.040241241455078125, 0.05179595947265625, 0.06335067749023438, 0.0749053955078125, 0.08646011352539062, 0.09801483154296875, 0.10956954956054688, 0.121124267578125, 0.13267898559570312, 0.14423370361328125, 0.15578842163085938, 0.1673431396484375, 0.17889785766601562, 0.19045257568359375, 0.20200729370117188, 0.21356201171875, 0.22511672973632812, 0.23667144775390625, 0.24822616577148438, 0.2597808837890625, 0.2713356018066406, 0.28289031982421875, 0.2944450378417969, 0.305999755859375, 0.3175544738769531, 0.32910919189453125, 0.3406639099121094, 0.3522186279296875, 0.3637733459472656, 0.37532806396484375, 0.3868827819824219, 0.3984375]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 1.0, 2.0, 3.0, 10.0, 9.0, 17.0, 18.0, 22.0, 35.0, 62.0, 47.0, 69.0, 80.0, 92.0, 101.0, 82.0, 72.0, 71.0, 62.0, 46.0, 31.0, 23.0, 18.0, 8.0, 12.0, 11.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.46923828125, -0.4588775634765625, -0.448516845703125, -0.4381561279296875, -0.42779541015625, -0.4174346923828125, -0.407073974609375, -0.3967132568359375, -0.3863525390625, -0.3759918212890625, -0.365631103515625, -0.3552703857421875, -0.34490966796875, -0.3345489501953125, -0.324188232421875, -0.3138275146484375, -0.303466796875, -0.2931060791015625, -0.282745361328125, -0.2723846435546875, -0.26202392578125, -0.2516632080078125, -0.241302490234375, -0.2309417724609375, -0.2205810546875, -0.2102203369140625, -0.199859619140625, -0.1894989013671875, -0.17913818359375, -0.1687774658203125, -0.158416748046875, -0.1480560302734375, -0.1376953125, -0.1273345947265625, -0.116973876953125, -0.1066131591796875, -0.09625244140625, -0.0858917236328125, -0.075531005859375, -0.0651702880859375, -0.0548095703125, -0.0444488525390625, -0.034088134765625, -0.0237274169921875, -0.01336669921875, -0.0030059814453125, 0.007354736328125, 0.0177154541015625, 0.028076171875, 0.0384368896484375, 0.048797607421875, 0.0591583251953125, 0.06951904296875, 0.0798797607421875, 0.090240478515625, 0.1006011962890625, 0.1109619140625, 0.1213226318359375, 0.131683349609375, 0.1420440673828125, 0.15240478515625, 0.1627655029296875, 0.173126220703125, 0.1834869384765625, 0.19384765625]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 7.0, 11.0, 23.0, 30.0, 61.0, 97.0, 140.0, 148.0, 163.0, 108.0, 96.0, 60.0, 25.0, 16.0, 10.0, 5.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.731440544128418, -4.548333168029785, -4.3652262687683105, -4.182119369506836, -3.999011993408203, -3.8159048557281494, -3.6327977180480957, -3.449690580368042, -3.2665834426879883, -3.0834763050079346, -2.900369167327881, -2.717262029647827, -2.5341548919677734, -2.3510477542877197, -2.167940616607666, -1.9848334789276123, -1.8017263412475586, -1.6186192035675049, -1.4355120658874512, -1.2524049282073975, -1.0692977905273438, -0.88619065284729, -0.7030835151672363, -0.5199763774871826, -0.3368692398071289, -0.1537621021270752, 0.029345035552978516, 0.21245217323303223, 0.39555931091308594, 0.5786664485931396, 0.7617735862731934, 0.9448807239532471, 1.127988338470459, 1.3110954761505127, 1.4942026138305664, 1.6773097515106201, 1.8604168891906738, 2.0435240268707275, 2.2266311645507812, 2.409738302230835, 2.5928454399108887, 2.7759525775909424, 2.959059715270996, 3.14216685295105, 3.3252739906311035, 3.5083811283111572, 3.691488265991211, 3.8745954036712646, 4.057702541351318, 4.240809440612793, 4.423916816711426, 4.607024192810059, 4.790131092071533, 4.973237991333008, 5.156345367431641, 5.339452743530273, 5.522559642791748, 5.705666542053223, 5.8887739181518555, 6.071881294250488, 6.254988193511963, 6.4380950927734375, 6.62120246887207, 6.804309844970703, 6.987416744232178]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 4.0, 6.0, 2.0, 6.0, 4.0, 10.0, 9.0, 14.0, 16.0, 22.0, 19.0, 14.0, 23.0, 27.0, 28.0, 36.0, 55.0, 46.0, 39.0, 48.0, 35.0, 49.0, 47.0, 50.0, 56.0, 39.0, 61.0, 38.0, 28.0, 28.0, 28.0, 28.0, 17.0, 14.0, 13.0, 12.0, 3.0, 9.0, 8.0, 6.0, 3.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8268818855285645, -2.734602451324463, -2.6423230171203613, -2.5500435829162598, -2.457764148712158, -2.3654849529266357, -2.273205518722534, -2.1809260845184326, -2.088646650314331, -1.9963672161102295, -1.904087781906128, -1.811808466911316, -1.7195290327072144, -1.6272495985031128, -1.5349702835083008, -1.4426908493041992, -1.3504114151000977, -1.258131980895996, -1.1658525466918945, -1.0735732316970825, -0.981293797492981, -0.8890143632888794, -0.7967349886894226, -0.7044556140899658, -0.6121761798858643, -0.5198967456817627, -0.4276173710823059, -0.33533796668052673, -0.24305856227874756, -0.15077915787696838, -0.05849975347518921, 0.03377962112426758, 0.12605905532836914, 0.21833845973014832, 0.3106178641319275, 0.40289726853370667, 0.49517667293548584, 0.5874561071395874, 0.6797354817390442, 0.772014856338501, 0.8642942905426025, 0.9565737247467041, 1.0488531589508057, 1.1411324739456177, 1.2334119081497192, 1.3256913423538208, 1.4179706573486328, 1.5102500915527344, 1.602529525756836, 1.6948089599609375, 1.787088394165039, 1.879367709159851, 1.9716471433639526, 2.0639264583587646, 2.156205892562866, 2.2484853267669678, 2.3407647609710693, 2.433044195175171, 2.5253236293792725, 2.617603063583374, 2.7098822593688965, 2.802161693572998, 2.8944411277770996, 2.986720561981201, 3.0789999961853027]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 9.0, 12.0, 11.0, 12.0, 22.0, 23.0, 35.0, 34.0, 50.0, 81.0, 94.0, 172.0, 222.0, 379.0, 703.0, 1564.0, 5188.0, 35809.0, 2444908.0, 1662321.0, 33675.0, 5259.0, 1711.0, 783.0, 438.0, 238.0, 177.0, 102.0, 73.0, 43.0, 41.0, 21.0, 20.0, 14.0, 10.0, 8.0, 8.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.1875, -2.123138427734375, -2.05877685546875, -1.994415283203125, -1.9300537109375, -1.865692138671875, -1.80133056640625, -1.736968994140625, -1.672607421875, -1.608245849609375, -1.54388427734375, -1.479522705078125, -1.4151611328125, -1.350799560546875, -1.28643798828125, -1.222076416015625, -1.15771484375, -1.093353271484375, -1.02899169921875, -0.964630126953125, -0.9002685546875, -0.835906982421875, -0.77154541015625, -0.707183837890625, -0.642822265625, -0.578460693359375, -0.51409912109375, -0.449737548828125, -0.3853759765625, -0.321014404296875, -0.25665283203125, -0.192291259765625, -0.1279296875, -0.063568115234375, 0.00079345703125, 0.065155029296875, 0.1295166015625, 0.193878173828125, 0.25823974609375, 0.322601318359375, 0.386962890625, 0.451324462890625, 0.51568603515625, 0.580047607421875, 0.6444091796875, 0.708770751953125, 0.77313232421875, 0.837493896484375, 0.90185546875, 0.966217041015625, 1.03057861328125, 1.094940185546875, 1.1593017578125, 1.223663330078125, 1.28802490234375, 1.352386474609375, 1.416748046875, 1.481109619140625, 1.54547119140625, 1.609832763671875, 1.6741943359375, 1.738555908203125, 1.80291748046875, 1.867279052734375, 1.931640625]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 7.0, 7.0, 12.0, 22.0, 43.0, 58.0, 64.0, 72.0, 80.0, 98.0, 110.0, 96.0, 84.0, 60.0, 65.0, 38.0, 26.0, 22.0, 11.0, 6.0, 7.0, 3.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.27734375, -0.27045631408691406, -0.2635688781738281, -0.2566814422607422, -0.24979400634765625, -0.2429065704345703, -0.23601913452148438, -0.22913169860839844, -0.2222442626953125, -0.21535682678222656, -0.20846939086914062, -0.2015819549560547, -0.19469451904296875, -0.1878070831298828, -0.18091964721679688, -0.17403221130371094, -0.167144775390625, -0.16025733947753906, -0.15336990356445312, -0.1464824676513672, -0.13959503173828125, -0.1327075958251953, -0.12582015991210938, -0.11893272399902344, -0.1120452880859375, -0.10515785217285156, -0.09827041625976562, -0.09138298034667969, -0.08449554443359375, -0.07760810852050781, -0.07072067260742188, -0.06383323669433594, -0.05694580078125, -0.05005836486816406, -0.043170928955078125, -0.03628349304199219, -0.02939605712890625, -0.022508621215820312, -0.015621185302734375, -0.008733749389648438, -0.0018463134765625, 0.0050411224365234375, 0.011928558349609375, 0.018815994262695312, 0.02570343017578125, 0.03259086608886719, 0.039478302001953125, 0.04636573791503906, 0.053253173828125, 0.06014060974121094, 0.06702804565429688, 0.07391548156738281, 0.08080291748046875, 0.08769035339355469, 0.09457778930664062, 0.10146522521972656, 0.1083526611328125, 0.11524009704589844, 0.12212753295898438, 0.1290149688720703, 0.13590240478515625, 0.1427898406982422, 0.14967727661132812, 0.15656471252441406, 0.1634521484375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 7.0, 7.0, 13.0, 18.0, 21.0, 32.0, 55.0, 112.0, 232.0, 518.0, 1344.0, 3755.0, 15786.0, 118680.0, 3552209.0, 455872.0, 35402.0, 6814.0, 2017.0, 747.0, 317.0, 154.0, 81.0, 39.0, 22.0, 9.0, 9.0, 8.0, 3.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.45703125, -1.415069580078125, -1.37310791015625, -1.331146240234375, -1.2891845703125, -1.247222900390625, -1.20526123046875, -1.163299560546875, -1.121337890625, -1.079376220703125, -1.03741455078125, -0.995452880859375, -0.9534912109375, -0.911529541015625, -0.86956787109375, -0.827606201171875, -0.78564453125, -0.743682861328125, -0.70172119140625, -0.659759521484375, -0.6177978515625, -0.575836181640625, -0.53387451171875, -0.491912841796875, -0.449951171875, -0.407989501953125, -0.36602783203125, -0.324066162109375, -0.2821044921875, -0.240142822265625, -0.19818115234375, -0.156219482421875, -0.1142578125, -0.072296142578125, -0.03033447265625, 0.011627197265625, 0.0535888671875, 0.095550537109375, 0.13751220703125, 0.179473876953125, 0.221435546875, 0.263397216796875, 0.30535888671875, 0.347320556640625, 0.3892822265625, 0.431243896484375, 0.47320556640625, 0.515167236328125, 0.55712890625, 0.599090576171875, 0.64105224609375, 0.683013916015625, 0.7249755859375, 0.766937255859375, 0.80889892578125, 0.850860595703125, 0.892822265625, 0.934783935546875, 0.97674560546875, 1.018707275390625, 1.0606689453125, 1.102630615234375, 1.14459228515625, 1.186553955078125, 1.228515625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 7.0, 5.0, 7.0, 18.0, 21.0, 23.0, 28.0, 47.0, 55.0, 80.0, 155.0, 250.0, 509.0, 1282.0, 699.0, 328.0, 176.0, 133.0, 77.0, 58.0, 35.0, 26.0, 13.0, 8.0, 12.0, 8.0, 4.0, 3.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.381591796875, -0.3699188232421875, -0.358245849609375, -0.3465728759765625, -0.33489990234375, -0.3232269287109375, -0.311553955078125, -0.2998809814453125, -0.2882080078125, -0.2765350341796875, -0.264862060546875, -0.2531890869140625, -0.24151611328125, -0.2298431396484375, -0.218170166015625, -0.2064971923828125, -0.19482421875, -0.1831512451171875, -0.171478271484375, -0.1598052978515625, -0.14813232421875, -0.1364593505859375, -0.124786376953125, -0.1131134033203125, -0.1014404296875, -0.0897674560546875, -0.078094482421875, -0.0664215087890625, -0.05474853515625, -0.0430755615234375, -0.031402587890625, -0.0197296142578125, -0.008056640625, 0.0036163330078125, 0.015289306640625, 0.0269622802734375, 0.03863525390625, 0.0503082275390625, 0.061981201171875, 0.0736541748046875, 0.0853271484375, 0.0970001220703125, 0.108673095703125, 0.1203460693359375, 0.13201904296875, 0.1436920166015625, 0.155364990234375, 0.1670379638671875, 0.1787109375, 0.1903839111328125, 0.202056884765625, 0.2137298583984375, 0.22540283203125, 0.2370758056640625, 0.248748779296875, 0.2604217529296875, 0.2720947265625, 0.2837677001953125, 0.295440673828125, 0.3071136474609375, 0.31878662109375, 0.3304595947265625, 0.342132568359375, 0.3538055419921875, 0.365478515625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 1.0, 2.0, 12.0, 19.0, 27.0, 68.0, 125.0, 200.0, 195.0, 174.0, 87.0, 50.0, 22.0, 11.0, 4.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.545332670211792, -1.4198428392410278, -1.2943530082702637, -1.1688631772994995, -1.0433733463287354, -0.9178835153579712, -0.792393684387207, -0.6669038534164429, -0.5414140224456787, -0.41592419147491455, -0.2904343605041504, -0.16494452953338623, -0.03945469856262207, 0.08603513240814209, 0.21152496337890625, 0.3370147943496704, 0.46250462532043457, 0.5879944562911987, 0.7134842872619629, 0.838974118232727, 0.9644639492034912, 1.0899537801742554, 1.2154436111450195, 1.3409334421157837, 1.4664232730865479, 1.591913104057312, 1.7174029350280762, 1.8428927659988403, 1.9683825969696045, 2.093872547149658, 2.219362258911133, 2.3448519706726074, 2.4703421592712402, 2.595831871032715, 2.7213218212127686, 2.8468117713928223, 2.972301483154297, 3.0977911949157715, 3.223281145095825, 3.348771095275879, 3.4742608070373535, 3.599750518798828, 3.725240468978882, 3.8507304191589355, 3.97622013092041, 4.101709842681885, 4.227199554443359, 4.352689743041992, 4.478179454803467, 4.603669166564941, 4.729159355163574, 4.854649066925049, 4.980138778686523, 5.105628490447998, 5.231118202209473, 5.3566083908081055, 5.48209810256958, 5.607587814331055, 5.7330780029296875, 5.858567714691162, 5.984057426452637, 6.109547138214111, 6.235036849975586, 6.360527038574219, 6.486016750335693]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 6.0, 5.0, 5.0, 6.0, 6.0, 7.0, 11.0, 8.0, 11.0, 18.0, 27.0, 16.0, 28.0, 28.0, 33.0, 37.0, 52.0, 49.0, 34.0, 36.0, 49.0, 38.0, 54.0, 41.0, 40.0, 44.0, 42.0, 43.0, 26.0, 39.0, 28.0, 23.0, 17.0, 23.0, 20.0, 11.0, 11.0, 6.0, 5.0, 8.0, 2.0, 2.0, 1.0, 3.0, 2.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.2867462635040283, -1.2483692169189453, -1.2099921703338623, -1.1716152429580688, -1.1332381963729858, -1.0948611497879028, -1.0564841032028198, -1.0181070566177368, -0.9797300696372986, -0.9413530230522156, -0.9029760360717773, -0.8645989894866943, -0.8262219429016113, -0.7878449559211731, -0.7494679093360901, -0.7110909223556519, -0.6727138757705688, -0.6343368291854858, -0.5959598422050476, -0.5575827956199646, -0.5192058086395264, -0.48082876205444336, -0.44245171546936035, -0.40407469868659973, -0.3656976819038391, -0.3273206651210785, -0.28894364833831787, -0.25056660175323486, -0.21218958497047424, -0.17381256818771362, -0.1354355365037918, -0.09705850481987, -0.058681488037109375, -0.020304463803768158, 0.01807256042957306, 0.056449584662914276, 0.0948266088962555, 0.1332036256790161, 0.17158065736293793, 0.20995768904685974, 0.24833470582962036, 0.286711722612381, 0.3250887393951416, 0.3634657859802246, 0.40184280276298523, 0.44021981954574585, 0.47859686613082886, 0.5169738531112671, 0.5553508996963501, 0.5937279462814331, 0.6321049332618713, 0.6704819798469543, 0.7088589668273926, 0.7472360134124756, 0.7856130599975586, 0.8239901065826416, 0.8623670935630798, 0.9007441401481628, 0.9391211271286011, 0.9774981737136841, 1.015875220298767, 1.0542521476745605, 1.0926291942596436, 1.1310062408447266, 1.1693832874298096]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 1.0, 3.0, 1.0, 5.0, 5.0, 7.0, 10.0, 13.0, 11.0, 25.0, 23.0, 48.0, 63.0, 83.0, 181.0, 294.0, 643.0, 1666.0, 6687.0, 55763.0, 726125.0, 235078.0, 16691.0, 3144.0, 1002.0, 386.0, 232.0, 133.0, 64.0, 51.0, 37.0, 29.0, 11.0, 13.0, 14.0, 4.0, 4.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.458984375, -1.39483642578125, -1.3306884765625, -1.26654052734375, -1.202392578125, -1.13824462890625, -1.0740966796875, -1.00994873046875, -0.94580078125, -0.88165283203125, -0.8175048828125, -0.75335693359375, -0.689208984375, -0.62506103515625, -0.5609130859375, -0.49676513671875, -0.4326171875, -0.36846923828125, -0.3043212890625, -0.24017333984375, -0.176025390625, -0.11187744140625, -0.0477294921875, 0.01641845703125, 0.08056640625, 0.14471435546875, 0.2088623046875, 0.27301025390625, 0.337158203125, 0.40130615234375, 0.4654541015625, 0.52960205078125, 0.59375, 0.65789794921875, 0.7220458984375, 0.78619384765625, 0.850341796875, 0.91448974609375, 0.9786376953125, 1.04278564453125, 1.10693359375, 1.17108154296875, 1.2352294921875, 1.29937744140625, 1.363525390625, 1.42767333984375, 1.4918212890625, 1.55596923828125, 1.6201171875, 1.68426513671875, 1.7484130859375, 1.81256103515625, 1.876708984375, 1.94085693359375, 2.0050048828125, 2.06915283203125, 2.13330078125, 2.19744873046875, 2.2615966796875, 2.32574462890625, 2.389892578125, 2.45404052734375, 2.5181884765625, 2.58233642578125, 2.646484375]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 6.0, 17.0, 18.0, 23.0, 39.0, 49.0, 58.0, 65.0, 110.0, 90.0, 92.0, 76.0, 95.0, 65.0, 55.0, 44.0, 31.0, 17.0, 17.0, 13.0, 12.0, 5.0, 4.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.281982421875, -0.27482032775878906, -0.2676582336425781, -0.2604961395263672, -0.25333404541015625, -0.2461719512939453, -0.23900985717773438, -0.23184776306152344, -0.2246856689453125, -0.21752357482910156, -0.21036148071289062, -0.2031993865966797, -0.19603729248046875, -0.1888751983642578, -0.18171310424804688, -0.17455101013183594, -0.167388916015625, -0.16022682189941406, -0.15306472778320312, -0.1459026336669922, -0.13874053955078125, -0.1315784454345703, -0.12441635131835938, -0.11725425720214844, -0.1100921630859375, -0.10293006896972656, -0.09576797485351562, -0.08860588073730469, -0.08144378662109375, -0.07428169250488281, -0.06711959838867188, -0.05995750427246094, -0.05279541015625, -0.04563331604003906, -0.038471221923828125, -0.03130912780761719, -0.02414703369140625, -0.016984939575195312, -0.009822845458984375, -0.0026607513427734375, 0.0045013427734375, 0.011663436889648438, 0.018825531005859375, 0.025987625122070312, 0.03314971923828125, 0.04031181335449219, 0.047473907470703125, 0.05463600158691406, 0.061798095703125, 0.06896018981933594, 0.07612228393554688, 0.08328437805175781, 0.09044647216796875, 0.09760856628417969, 0.10477066040039062, 0.11193275451660156, 0.1190948486328125, 0.12625694274902344, 0.13341903686523438, 0.1405811309814453, 0.14774322509765625, 0.1549053192138672, 0.16206741333007812, 0.16922950744628906, 0.1763916015625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 5.0, 5.0, 6.0, 5.0, 11.0, 10.0, 19.0, 27.0, 16.0, 36.0, 58.0, 53.0, 62.0, 100.0, 142.0, 182.0, 353.0, 657.0, 1232.0, 2822.0, 8303.0, 29914.0, 125870.0, 433548.0, 332706.0, 81751.0, 19893.0, 6043.0, 2190.0, 943.0, 529.0, 319.0, 218.0, 109.0, 95.0, 85.0, 47.0, 43.0, 36.0, 23.0, 13.0, 18.0, 11.0, 12.0, 7.0, 10.0, 1.0, 1.0, 3.0, 0.0, 0.0, 5.0, 1.0, 2.0, 0.0, 1.0, 4.0], "bins": [-0.76123046875, -0.7365570068359375, -0.711883544921875, -0.6872100830078125, -0.66253662109375, -0.6378631591796875, -0.613189697265625, -0.5885162353515625, -0.5638427734375, -0.5391693115234375, -0.514495849609375, -0.4898223876953125, -0.46514892578125, -0.4404754638671875, -0.415802001953125, -0.3911285400390625, -0.366455078125, -0.3417816162109375, -0.317108154296875, -0.2924346923828125, -0.26776123046875, -0.2430877685546875, -0.218414306640625, -0.1937408447265625, -0.1690673828125, -0.1443939208984375, -0.119720458984375, -0.0950469970703125, -0.07037353515625, -0.0457000732421875, -0.021026611328125, 0.0036468505859375, 0.0283203125, 0.0529937744140625, 0.077667236328125, 0.1023406982421875, 0.12701416015625, 0.1516876220703125, 0.176361083984375, 0.2010345458984375, 0.2257080078125, 0.2503814697265625, 0.275054931640625, 0.2997283935546875, 0.32440185546875, 0.3490753173828125, 0.373748779296875, 0.3984222412109375, 0.423095703125, 0.4477691650390625, 0.472442626953125, 0.4971160888671875, 0.52178955078125, 0.5464630126953125, 0.571136474609375, 0.5958099365234375, 0.6204833984375, 0.6451568603515625, 0.669830322265625, 0.6945037841796875, 0.71917724609375, 0.7438507080078125, 0.768524169921875, 0.7931976318359375, 0.81787109375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 6.0, 5.0, 9.0, 12.0, 16.0, 10.0, 8.0, 15.0, 20.0, 30.0, 32.0, 27.0, 42.0, 43.0, 42.0, 52.0, 34.0, 41.0, 47.0, 40.0, 49.0, 38.0, 45.0, 45.0, 30.0, 36.0, 37.0, 42.0, 22.0, 18.0, 22.0, 13.0, 20.0, 12.0, 12.0, 11.0, 9.0, 4.0, 5.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.476806640625, -0.4592247009277344, -0.44164276123046875, -0.4240608215332031, -0.4064788818359375, -0.3888969421386719, -0.37131500244140625, -0.3537330627441406, -0.336151123046875, -0.3185691833496094, -0.30098724365234375, -0.2834053039550781, -0.2658233642578125, -0.24824142456054688, -0.23065948486328125, -0.21307754516601562, -0.19549560546875, -0.17791366577148438, -0.16033172607421875, -0.14274978637695312, -0.1251678466796875, -0.10758590698242188, -0.09000396728515625, -0.07242202758789062, -0.054840087890625, -0.037258148193359375, -0.01967620849609375, -0.002094268798828125, 0.0154876708984375, 0.033069610595703125, 0.05065155029296875, 0.06823348999023438, 0.0858154296875, 0.10339736938476562, 0.12097930908203125, 0.13856124877929688, 0.1561431884765625, 0.17372512817382812, 0.19130706787109375, 0.20888900756835938, 0.226470947265625, 0.24405288696289062, 0.26163482666015625, 0.2792167663574219, 0.2967987060546875, 0.3143806457519531, 0.33196258544921875, 0.3495445251464844, 0.36712646484375, 0.3847084045410156, 0.40229034423828125, 0.4198722839355469, 0.4374542236328125, 0.4550361633300781, 0.47261810302734375, 0.4902000427246094, 0.507781982421875, 0.5253639221191406, 0.5429458618164062, 0.5605278015136719, 0.5781097412109375, 0.5956916809082031, 0.6132736206054688, 0.6308555603027344, 0.6484375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 7.0, 10.0, 8.0, 12.0, 32.0, 25.0, 59.0, 92.0, 187.0, 334.0, 659.0, 2046.0, 9882.0, 142499.0, 815237.0, 68274.0, 6363.0, 1599.0, 551.0, 277.0, 138.0, 98.0, 59.0, 28.0, 23.0, 10.0, 8.0, 9.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.732421875, -0.7046661376953125, -0.676910400390625, -0.6491546630859375, -0.62139892578125, -0.5936431884765625, -0.565887451171875, -0.5381317138671875, -0.5103759765625, -0.4826202392578125, -0.454864501953125, -0.4271087646484375, -0.39935302734375, -0.3715972900390625, -0.343841552734375, -0.3160858154296875, -0.288330078125, -0.2605743408203125, -0.232818603515625, -0.2050628662109375, -0.17730712890625, -0.1495513916015625, -0.121795654296875, -0.0940399169921875, -0.0662841796875, -0.0385284423828125, -0.010772705078125, 0.0169830322265625, 0.04473876953125, 0.0724945068359375, 0.100250244140625, 0.1280059814453125, 0.15576171875, 0.1835174560546875, 0.211273193359375, 0.2390289306640625, 0.26678466796875, 0.2945404052734375, 0.322296142578125, 0.3500518798828125, 0.3778076171875, 0.4055633544921875, 0.433319091796875, 0.4610748291015625, 0.48883056640625, 0.5165863037109375, 0.544342041015625, 0.5720977783203125, 0.599853515625, 0.6276092529296875, 0.655364990234375, 0.6831207275390625, 0.71087646484375, 0.7386322021484375, 0.766387939453125, 0.7941436767578125, 0.8218994140625, 0.8496551513671875, 0.877410888671875, 0.9051666259765625, 0.93292236328125, 0.9606781005859375, 0.988433837890625, 1.0161895751953125, 1.0439453125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 13.0, 13.0, 16.0, 38.0, 75.0, 194.0, 333.0, 180.0, 90.0, 29.0, 17.0, 5.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00063323974609375, -0.0006211623549461365, -0.000609084963798523, -0.0005970075726509094, -0.0005849301815032959, -0.0005728527903556824, -0.0005607753992080688, -0.0005486980080604553, -0.0005366206169128418, -0.0005245432257652283, -0.0005124658346176147, -0.0005003884434700012, -0.0004883110523223877, -0.00047623366117477417, -0.00046415627002716064, -0.0004520788788795471, -0.0004400014877319336, -0.00042792409658432007, -0.00041584670543670654, -0.000403769314289093, -0.0003916919231414795, -0.00037961453199386597, -0.00036753714084625244, -0.0003554597496986389, -0.0003433823585510254, -0.00033130496740341187, -0.00031922757625579834, -0.0003071501851081848, -0.0002950727939605713, -0.00028299540281295776, -0.00027091801166534424, -0.0002588406205177307, -0.0002467632293701172, -0.00023468583822250366, -0.00022260844707489014, -0.0002105310559272766, -0.00019845366477966309, -0.00018637627363204956, -0.00017429888248443604, -0.0001622214913368225, -0.00015014410018920898, -0.00013806670904159546, -0.00012598931789398193, -0.00011391192674636841, -0.00010183453559875488, -8.975714445114136e-05, -7.767975330352783e-05, -6.56023621559143e-05, -5.352497100830078e-05, -4.1447579860687256e-05, -2.937018871307373e-05, -1.7292797565460205e-05, -5.21540641784668e-06, 6.861984729766846e-06, 1.893937587738037e-05, 3.1016767024993896e-05, 4.309415817260742e-05, 5.517154932022095e-05, 6.724894046783447e-05, 7.9326331615448e-05, 9.140372276306152e-05, 0.00010348111391067505, 0.00011555850505828857, 0.0001276358962059021, 0.00013971328735351562]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 2.0, 7.0, 8.0, 14.0, 20.0, 31.0, 56.0, 80.0, 149.0, 295.0, 1002.0, 9550.0, 914717.0, 118698.0, 2842.0, 578.0, 221.0, 124.0, 63.0, 38.0, 26.0, 11.0, 8.0, 10.0, 0.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1.95703125, -1.9052276611328125, -1.853424072265625, -1.8016204833984375, -1.74981689453125, -1.6980133056640625, -1.646209716796875, -1.5944061279296875, -1.5426025390625, -1.4907989501953125, -1.438995361328125, -1.3871917724609375, -1.33538818359375, -1.2835845947265625, -1.231781005859375, -1.1799774169921875, -1.128173828125, -1.0763702392578125, -1.024566650390625, -0.9727630615234375, -0.92095947265625, -0.8691558837890625, -0.817352294921875, -0.7655487060546875, -0.7137451171875, -0.6619415283203125, -0.610137939453125, -0.5583343505859375, -0.50653076171875, -0.4547271728515625, -0.402923583984375, -0.3511199951171875, -0.29931640625, -0.2475128173828125, -0.195709228515625, -0.1439056396484375, -0.09210205078125, -0.0402984619140625, 0.011505126953125, 0.0633087158203125, 0.1151123046875, 0.1669158935546875, 0.218719482421875, 0.2705230712890625, 0.32232666015625, 0.3741302490234375, 0.425933837890625, 0.4777374267578125, 0.529541015625, 0.5813446044921875, 0.633148193359375, 0.6849517822265625, 0.73675537109375, 0.7885589599609375, 0.840362548828125, 0.8921661376953125, 0.9439697265625, 0.9957733154296875, 1.047576904296875, 1.0993804931640625, 1.15118408203125, 1.2029876708984375, 1.254791259765625, 1.3065948486328125, 1.3583984375]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 3.0, 11.0, 19.0, 9.0, 20.0, 28.0, 65.0, 79.0, 100.0, 97.0, 109.0, 107.0, 100.0, 89.0, 48.0, 37.0, 17.0, 16.0, 16.0, 14.0, 4.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.57568359375, -0.5599098205566406, -0.5441360473632812, -0.5283622741699219, -0.5125885009765625, -0.4968147277832031, -0.48104095458984375, -0.4652671813964844, -0.449493408203125, -0.4337196350097656, -0.41794586181640625, -0.4021720886230469, -0.3863983154296875, -0.3706245422363281, -0.35485076904296875, -0.3390769958496094, -0.32330322265625, -0.3075294494628906, -0.29175567626953125, -0.2759819030761719, -0.2602081298828125, -0.24443435668945312, -0.22866058349609375, -0.21288681030273438, -0.197113037109375, -0.18133926391601562, -0.16556549072265625, -0.14979171752929688, -0.1340179443359375, -0.11824417114257812, -0.10247039794921875, -0.08669662475585938, -0.0709228515625, -0.055149078369140625, -0.03937530517578125, -0.023601531982421875, -0.0078277587890625, 0.007946014404296875, 0.02371978759765625, 0.039493560791015625, 0.055267333984375, 0.07104110717773438, 0.08681488037109375, 0.10258865356445312, 0.1183624267578125, 0.13413619995117188, 0.14990997314453125, 0.16568374633789062, 0.18145751953125, 0.19723129272460938, 0.21300506591796875, 0.22877883911132812, 0.2445526123046875, 0.2603263854980469, 0.27610015869140625, 0.2918739318847656, 0.307647705078125, 0.3234214782714844, 0.33919525146484375, 0.3549690246582031, 0.3707427978515625, 0.3865165710449219, 0.40229034423828125, 0.4180641174316406, 0.433837890625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 12.0, 21.0, 33.0, 67.0, 87.0, 143.0, 190.0, 162.0, 128.0, 64.0, 43.0, 24.0, 14.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.981949806213379, -7.737807750701904, -7.493666172027588, -7.249524116516113, -7.005382537841797, -6.761240482330322, -6.517098426818848, -6.272956848144531, -6.028814792633057, -5.784672737121582, -5.540531158447266, -5.296389102935791, -5.052247524261475, -4.80810546875, -4.563963890075684, -4.319821834564209, -4.075679779052734, -3.831537961959839, -3.5873961448669434, -3.3432540893554688, -3.0991122722625732, -2.8549704551696777, -2.6108286380767822, -2.3666868209838867, -2.1225452423095703, -1.8784034252166748, -1.6342614889144897, -1.3901196718215942, -1.1459777355194092, -0.9018359184265137, -0.6576941013336182, -0.4135521650314331, -0.16941022872924805, 0.07473163306713104, 0.31887349486351013, 0.563015341758728, 0.8071572184562683, 1.0512990951538086, 1.295440912246704, 1.5395828485488892, 1.7837246656417847, 2.0278666019439697, 2.2720084190368652, 2.5161502361297607, 2.7602920532226562, 3.004434108734131, 3.2485756874084473, 3.492717742919922, 3.7368595600128174, 3.981001377105713, 4.2251434326171875, 4.469285011291504, 4.7134270668029785, 4.957569122314453, 5.2017107009887695, 5.445852756500244, 5.6899943351745605, 5.934136390686035, 6.178277969360352, 6.422420024871826, 6.666561603546143, 6.910703659057617, 7.154845237731934, 7.398987293243408, 7.643129348754883]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 11.0, 8.0, 11.0, 14.0, 15.0, 13.0, 22.0, 26.0, 25.0, 36.0, 35.0, 47.0, 51.0, 42.0, 51.0, 52.0, 40.0, 51.0, 52.0, 53.0, 44.0, 40.0, 39.0, 35.0, 23.0, 24.0, 28.0, 24.0, 17.0, 14.0, 12.0, 11.0, 8.0, 5.0, 7.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9368813037872314, -2.835259437561035, -2.733637809753418, -2.6320159435272217, -2.5303940773010254, -2.428772449493408, -2.327150583267212, -2.2255287170410156, -2.1239070892333984, -2.022285223007202, -1.9206634759902954, -1.8190417289733887, -1.7174198627471924, -1.6157981157302856, -1.514176368713379, -1.4125545024871826, -1.3109326362609863, -1.2093108892440796, -1.1076890230178833, -1.0060672760009766, -0.904445469379425, -0.8028236627578735, -0.7012019157409668, -0.5995801091194153, -0.49795830249786377, -0.39633649587631226, -0.29471471905708313, -0.193092942237854, -0.09147113561630249, 0.010150671005249023, 0.11177241802215576, 0.21339422464370728, 0.3150162696838379, 0.4166380763053894, 0.5182598829269409, 0.6198816299438477, 0.7215034365653992, 0.8231252431869507, 0.9247469902038574, 1.0263688564300537, 1.1279906034469604, 1.2296123504638672, 1.3312342166900635, 1.4328559637069702, 1.534477710723877, 1.6360995769500732, 1.73772132396698, 1.8393430709838867, 1.940964937210083, 2.0425868034362793, 2.1442084312438965, 2.2458302974700928, 2.347452163696289, 2.4490737915039062, 2.5506956577301025, 2.652317523956299, 2.753939151763916, 2.8555610179901123, 2.9571826457977295, 3.058804512023926, 3.160426378250122, 3.2620482444763184, 3.3636698722839355, 3.465291738510132, 3.566913604736328]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 7.0, 4.0, 8.0, 6.0, 6.0, 10.0, 10.0, 31.0, 26.0, 58.0, 63.0, 95.0, 158.0, 388.0, 959.0, 3720.0, 36620.0, 3913386.0, 225013.0, 10465.0, 1897.0, 631.0, 309.0, 143.0, 100.0, 60.0, 39.0, 23.0, 19.0, 10.0, 7.0, 7.0, 5.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5703125, -2.486724853515625, -2.40313720703125, -2.319549560546875, -2.2359619140625, -2.152374267578125, -2.06878662109375, -1.985198974609375, -1.901611328125, -1.818023681640625, -1.73443603515625, -1.650848388671875, -1.5672607421875, -1.483673095703125, -1.40008544921875, -1.316497802734375, -1.23291015625, -1.149322509765625, -1.06573486328125, -0.982147216796875, -0.8985595703125, -0.814971923828125, -0.73138427734375, -0.647796630859375, -0.564208984375, -0.480621337890625, -0.39703369140625, -0.313446044921875, -0.2298583984375, -0.146270751953125, -0.06268310546875, 0.020904541015625, 0.1044921875, 0.188079833984375, 0.27166748046875, 0.355255126953125, 0.4388427734375, 0.522430419921875, 0.60601806640625, 0.689605712890625, 0.773193359375, 0.856781005859375, 0.94036865234375, 1.023956298828125, 1.1075439453125, 1.191131591796875, 1.27471923828125, 1.358306884765625, 1.44189453125, 1.525482177734375, 1.60906982421875, 1.692657470703125, 1.7762451171875, 1.859832763671875, 1.94342041015625, 2.027008056640625, 2.110595703125, 2.194183349609375, 2.27777099609375, 2.361358642578125, 2.4449462890625, 2.528533935546875, 2.61212158203125, 2.695709228515625, 2.779296875]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 7.0, 9.0, 12.0, 16.0, 26.0, 41.0, 53.0, 63.0, 73.0, 70.0, 82.0, 81.0, 82.0, 69.0, 77.0, 57.0, 38.0, 43.0, 36.0, 16.0, 16.0, 16.0, 6.0, 6.0, 6.0, 4.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.275390625, -0.268463134765625, -0.26153564453125, -0.254608154296875, -0.2476806640625, -0.240753173828125, -0.23382568359375, -0.226898193359375, -0.219970703125, -0.213043212890625, -0.20611572265625, -0.199188232421875, -0.1922607421875, -0.185333251953125, -0.17840576171875, -0.171478271484375, -0.16455078125, -0.157623291015625, -0.15069580078125, -0.143768310546875, -0.1368408203125, -0.129913330078125, -0.12298583984375, -0.116058349609375, -0.109130859375, -0.102203369140625, -0.09527587890625, -0.088348388671875, -0.0814208984375, -0.074493408203125, -0.06756591796875, -0.060638427734375, -0.0537109375, -0.046783447265625, -0.03985595703125, -0.032928466796875, -0.0260009765625, -0.019073486328125, -0.01214599609375, -0.005218505859375, 0.001708984375, 0.008636474609375, 0.01556396484375, 0.022491455078125, 0.0294189453125, 0.036346435546875, 0.04327392578125, 0.050201416015625, 0.05712890625, 0.064056396484375, 0.07098388671875, 0.077911376953125, 0.0848388671875, 0.091766357421875, 0.09869384765625, 0.105621337890625, 0.112548828125, 0.119476318359375, 0.12640380859375, 0.133331298828125, 0.1402587890625, 0.147186279296875, 0.15411376953125, 0.161041259765625, 0.16796875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 1.0, 10.0, 12.0, 19.0, 29.0, 49.0, 89.0, 114.0, 228.0, 477.0, 1644.0, 12462.0, 2526764.0, 1638130.0, 11591.0, 1576.0, 550.0, 230.0, 136.0, 57.0, 36.0, 30.0, 22.0, 14.0, 7.0, 6.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.05078125, -2.955413818359375, -2.86004638671875, -2.764678955078125, -2.6693115234375, -2.573944091796875, -2.47857666015625, -2.383209228515625, -2.287841796875, -2.192474365234375, -2.09710693359375, -2.001739501953125, -1.9063720703125, -1.811004638671875, -1.71563720703125, -1.620269775390625, -1.52490234375, -1.429534912109375, -1.33416748046875, -1.238800048828125, -1.1434326171875, -1.048065185546875, -0.95269775390625, -0.857330322265625, -0.761962890625, -0.666595458984375, -0.57122802734375, -0.475860595703125, -0.3804931640625, -0.285125732421875, -0.18975830078125, -0.094390869140625, 0.0009765625, 0.096343994140625, 0.19171142578125, 0.287078857421875, 0.3824462890625, 0.477813720703125, 0.57318115234375, 0.668548583984375, 0.763916015625, 0.859283447265625, 0.95465087890625, 1.050018310546875, 1.1453857421875, 1.240753173828125, 1.33612060546875, 1.431488037109375, 1.52685546875, 1.622222900390625, 1.71759033203125, 1.812957763671875, 1.9083251953125, 2.003692626953125, 2.09906005859375, 2.194427490234375, 2.289794921875, 2.385162353515625, 2.48052978515625, 2.575897216796875, 2.6712646484375, 2.766632080078125, 2.86199951171875, 2.957366943359375, 3.052734375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 4.0, 6.0, 5.0, 14.0, 17.0, 27.0, 48.0, 89.0, 164.0, 377.0, 1757.0, 1028.0, 265.0, 125.0, 69.0, 31.0, 20.0, 9.0, 10.0, 6.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.44140625, -0.4229278564453125, -0.404449462890625, -0.3859710693359375, -0.36749267578125, -0.3490142822265625, -0.330535888671875, -0.3120574951171875, -0.2935791015625, -0.2751007080078125, -0.256622314453125, -0.2381439208984375, -0.21966552734375, -0.2011871337890625, -0.182708740234375, -0.1642303466796875, -0.145751953125, -0.1272735595703125, -0.108795166015625, -0.0903167724609375, -0.07183837890625, -0.0533599853515625, -0.034881591796875, -0.0164031982421875, 0.0020751953125, 0.0205535888671875, 0.039031982421875, 0.0575103759765625, 0.07598876953125, 0.0944671630859375, 0.112945556640625, 0.1314239501953125, 0.14990234375, 0.1683807373046875, 0.186859130859375, 0.2053375244140625, 0.22381591796875, 0.2422943115234375, 0.260772705078125, 0.2792510986328125, 0.2977294921875, 0.3162078857421875, 0.334686279296875, 0.3531646728515625, 0.37164306640625, 0.3901214599609375, 0.408599853515625, 0.4270782470703125, 0.445556640625, 0.4640350341796875, 0.482513427734375, 0.5009918212890625, 0.51947021484375, 0.5379486083984375, 0.556427001953125, 0.5749053955078125, 0.5933837890625, 0.6118621826171875, 0.630340576171875, 0.6488189697265625, 0.66729736328125, 0.6857757568359375, 0.704254150390625, 0.7227325439453125, 0.7412109375]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 4.0, 6.0, 16.0, 56.0, 105.0, 218.0, 298.0, 182.0, 77.0, 19.0, 10.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.7136688232421875, -7.544812202453613, -7.375955581665039, -7.207098960876465, -7.038242340087891, -6.869385719299316, -6.700529098510742, -6.53167200088501, -6.3628153800964355, -6.193958759307861, -6.025102138519287, -5.856245517730713, -5.687388896942139, -5.518531799316406, -5.349675178527832, -5.180818557739258, -5.011961936950684, -4.843105316162109, -4.674248695373535, -4.505392074584961, -4.336535453796387, -4.1676788330078125, -3.998821973800659, -3.829965114593506, -3.66110897064209, -3.4922523498535156, -3.3233957290649414, -3.154539108276367, -2.985682249069214, -2.8168256282806396, -2.6479690074920654, -2.479112148284912, -2.310255527496338, -2.1413989067077637, -1.9725421667099, -1.8036855459213257, -1.634828805923462, -1.4659721851348877, -1.2971155643463135, -1.1282588243484497, -0.9594022035598755, -0.7905455231666565, -0.6216888427734375, -0.4528322219848633, -0.2839755415916443, -0.11511886119842529, 0.053737759590148926, 0.2225944995880127, 0.3914511203765869, 0.5603078007698059, 0.7291644811630249, 0.8980211019515991, 1.066877841949463, 1.235734462738037, 1.4045910835266113, 1.573447823524475, 1.7423044443130493, 1.9111610651016235, 2.0800178050994873, 2.2488744258880615, 2.4177310466766357, 2.586587905883789, 2.7554445266723633, 2.9243011474609375, 3.0931577682495117]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 2.0, 2.0, 5.0, 8.0, 9.0, 10.0, 15.0, 17.0, 14.0, 25.0, 28.0, 30.0, 36.0, 43.0, 48.0, 60.0, 61.0, 54.0, 62.0, 54.0, 56.0, 60.0, 63.0, 50.0, 41.0, 34.0, 20.0, 23.0, 20.0, 18.0, 7.0, 10.0, 5.0, 5.0, 7.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6444146633148193, -1.5893096923828125, -1.5342048406600952, -1.4790998697280884, -1.423995018005371, -1.3688900470733643, -1.3137850761413574, -1.2586801052093506, -1.2035752534866333, -1.1484702825546265, -1.0933654308319092, -1.0382604598999023, -0.9831555485725403, -0.9280506372451782, -0.8729456663131714, -0.8178407549858093, -0.7627358436584473, -0.7076309323310852, -0.6525260210037231, -0.5974210500717163, -0.5423161387443542, -0.4872112274169922, -0.43210628628730774, -0.3770013451576233, -0.32189643383026123, -0.26679152250289917, -0.21168658137321472, -0.15658165514469147, -0.10147672891616821, -0.04637181758880615, 0.008733123540878296, 0.06383806467056274, 0.1189429759979248, 0.17404790222644806, 0.2291528284549713, 0.28425776958465576, 0.3393626809120178, 0.3944675922393799, 0.44957253336906433, 0.5046774744987488, 0.5597823858261108, 0.6148872971534729, 0.669992208480835, 0.7250971794128418, 0.7802020907402039, 0.8353070020675659, 0.8904119729995728, 0.9455168843269348, 1.0006217956542969, 1.0557267665863037, 1.110831618309021, 1.1659365892410278, 1.2210414409637451, 1.276146411895752, 1.3312513828277588, 1.3863563537597656, 1.441461205482483, 1.4965661764144897, 1.551671028137207, 1.6067759990692139, 1.6618809700012207, 1.716985821723938, 1.7720907926559448, 1.827195644378662, 1.882300615310669]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 1.0, 13.0, 3.0, 7.0, 14.0, 14.0, 22.0, 22.0, 31.0, 48.0, 92.0, 131.0, 207.0, 441.0, 1009.0, 3575.0, 21835.0, 329025.0, 633819.0, 49643.0, 5835.0, 1488.0, 597.0, 243.0, 152.0, 89.0, 56.0, 37.0, 27.0, 23.0, 18.0, 8.0, 5.0, 4.0, 3.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-1.6923828125, -1.6343536376953125, -1.576324462890625, -1.5182952880859375, -1.46026611328125, -1.4022369384765625, -1.344207763671875, -1.2861785888671875, -1.2281494140625, -1.1701202392578125, -1.112091064453125, -1.0540618896484375, -0.99603271484375, -0.9380035400390625, -0.879974365234375, -0.8219451904296875, -0.763916015625, -0.7058868408203125, -0.647857666015625, -0.5898284912109375, -0.53179931640625, -0.4737701416015625, -0.415740966796875, -0.3577117919921875, -0.2996826171875, -0.2416534423828125, -0.183624267578125, -0.1255950927734375, -0.06756591796875, -0.0095367431640625, 0.048492431640625, 0.1065216064453125, 0.16455078125, 0.2225799560546875, 0.280609130859375, 0.3386383056640625, 0.39666748046875, 0.4546966552734375, 0.512725830078125, 0.5707550048828125, 0.6287841796875, 0.6868133544921875, 0.744842529296875, 0.8028717041015625, 0.86090087890625, 0.9189300537109375, 0.976959228515625, 1.0349884033203125, 1.093017578125, 1.1510467529296875, 1.209075927734375, 1.2671051025390625, 1.32513427734375, 1.3831634521484375, 1.441192626953125, 1.4992218017578125, 1.5572509765625, 1.6152801513671875, 1.673309326171875, 1.7313385009765625, 1.78936767578125, 1.8473968505859375, 1.905426025390625, 1.9634552001953125, 2.021484375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 7.0, 6.0, 16.0, 12.0, 18.0, 27.0, 34.0, 34.0, 45.0, 51.0, 66.0, 70.0, 66.0, 71.0, 67.0, 73.0, 58.0, 51.0, 49.0, 35.0, 29.0, 36.0, 23.0, 14.0, 13.0, 12.0, 8.0, 2.0, 6.0, 1.0, 4.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-0.270751953125, -0.26425933837890625, -0.2577667236328125, -0.25127410888671875, -0.244781494140625, -0.23828887939453125, -0.2317962646484375, -0.22530364990234375, -0.21881103515625, -0.21231842041015625, -0.2058258056640625, -0.19933319091796875, -0.192840576171875, -0.18634796142578125, -0.1798553466796875, -0.17336273193359375, -0.1668701171875, -0.16037750244140625, -0.1538848876953125, -0.14739227294921875, -0.140899658203125, -0.13440704345703125, -0.1279144287109375, -0.12142181396484375, -0.11492919921875, -0.10843658447265625, -0.1019439697265625, -0.09545135498046875, -0.088958740234375, -0.08246612548828125, -0.0759735107421875, -0.06948089599609375, -0.06298828125, -0.05649566650390625, -0.0500030517578125, -0.04351043701171875, -0.037017822265625, -0.03052520751953125, -0.0240325927734375, -0.01753997802734375, -0.01104736328125, -0.00455474853515625, 0.0019378662109375, 0.00843048095703125, 0.014923095703125, 0.02141571044921875, 0.0279083251953125, 0.03440093994140625, 0.0408935546875, 0.04738616943359375, 0.0538787841796875, 0.06037139892578125, 0.066864013671875, 0.07335662841796875, 0.0798492431640625, 0.08634185791015625, 0.09283447265625, 0.09932708740234375, 0.1058197021484375, 0.11231231689453125, 0.118804931640625, 0.12529754638671875, 0.1317901611328125, 0.13828277587890625, 0.144775390625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 4.0, 3.0, 7.0, 12.0, 23.0, 9.0, 24.0, 37.0, 48.0, 51.0, 100.0, 209.0, 419.0, 1239.0, 3881.0, 16084.0, 80325.0, 368982.0, 441297.0, 107432.0, 20841.0, 4962.0, 1468.0, 518.0, 222.0, 113.0, 80.0, 36.0, 34.0, 28.0, 13.0, 15.0, 13.0, 6.0, 6.0, 4.0, 0.0, 2.0, 1.0, 5.0, 1.0, 1.0, 1.0], "bins": [-1.0595703125, -1.0324630737304688, -1.0053558349609375, -0.9782485961914062, -0.951141357421875, -0.9240341186523438, -0.8969268798828125, -0.8698196411132812, -0.84271240234375, -0.8156051635742188, -0.7884979248046875, -0.7613906860351562, -0.734283447265625, -0.7071762084960938, -0.6800689697265625, -0.6529617309570312, -0.6258544921875, -0.5987472534179688, -0.5716400146484375, -0.5445327758789062, -0.517425537109375, -0.49031829833984375, -0.4632110595703125, -0.43610382080078125, -0.40899658203125, -0.38188934326171875, -0.3547821044921875, -0.32767486572265625, -0.300567626953125, -0.27346038818359375, -0.2463531494140625, -0.21924591064453125, -0.192138671875, -0.16503143310546875, -0.1379241943359375, -0.11081695556640625, -0.083709716796875, -0.05660247802734375, -0.0294952392578125, -0.00238800048828125, 0.02471923828125, 0.05182647705078125, 0.0789337158203125, 0.10604095458984375, 0.133148193359375, 0.16025543212890625, 0.1873626708984375, 0.21446990966796875, 0.2415771484375, 0.26868438720703125, 0.2957916259765625, 0.32289886474609375, 0.350006103515625, 0.37711334228515625, 0.4042205810546875, 0.43132781982421875, 0.45843505859375, 0.48554229736328125, 0.5126495361328125, 0.5397567749023438, 0.566864013671875, 0.5939712524414062, 0.6210784912109375, 0.6481857299804688, 0.67529296875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 3.0, 7.0, 12.0, 12.0, 11.0, 15.0, 19.0, 19.0, 21.0, 35.0, 40.0, 42.0, 51.0, 56.0, 66.0, 68.0, 57.0, 50.0, 72.0, 66.0, 51.0, 52.0, 34.0, 29.0, 35.0, 17.0, 24.0, 12.0, 7.0, 7.0, 9.0, 9.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0595703125, -1.03350830078125, -1.0074462890625, -0.98138427734375, -0.955322265625, -0.92926025390625, -0.9031982421875, -0.87713623046875, -0.85107421875, -0.82501220703125, -0.7989501953125, -0.77288818359375, -0.746826171875, -0.72076416015625, -0.6947021484375, -0.66864013671875, -0.642578125, -0.61651611328125, -0.5904541015625, -0.56439208984375, -0.538330078125, -0.51226806640625, -0.4862060546875, -0.46014404296875, -0.43408203125, -0.40802001953125, -0.3819580078125, -0.35589599609375, -0.329833984375, -0.30377197265625, -0.2777099609375, -0.25164794921875, -0.2255859375, -0.19952392578125, -0.1734619140625, -0.14739990234375, -0.121337890625, -0.09527587890625, -0.0692138671875, -0.04315185546875, -0.01708984375, 0.00897216796875, 0.0350341796875, 0.06109619140625, 0.087158203125, 0.11322021484375, 0.1392822265625, 0.16534423828125, 0.19140625, 0.21746826171875, 0.2435302734375, 0.26959228515625, 0.295654296875, 0.32171630859375, 0.3477783203125, 0.37384033203125, 0.39990234375, 0.42596435546875, 0.4520263671875, 0.47808837890625, 0.504150390625, 0.53021240234375, 0.5562744140625, 0.58233642578125, 0.6083984375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 2.0, 1.0, 3.0, 10.0, 9.0, 15.0, 13.0, 23.0, 33.0, 44.0, 72.0, 125.0, 231.0, 521.0, 1740.0, 7855.0, 68324.0, 552323.0, 373617.0, 36398.0, 4912.0, 1323.0, 440.0, 183.0, 104.0, 60.0, 54.0, 41.0, 23.0, 14.0, 14.0, 9.0, 6.0, 6.0, 1.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.533203125, -0.51593017578125, -0.4986572265625, -0.48138427734375, -0.464111328125, -0.44683837890625, -0.4295654296875, -0.41229248046875, -0.39501953125, -0.37774658203125, -0.3604736328125, -0.34320068359375, -0.325927734375, -0.30865478515625, -0.2913818359375, -0.27410888671875, -0.2568359375, -0.23956298828125, -0.2222900390625, -0.20501708984375, -0.187744140625, -0.17047119140625, -0.1531982421875, -0.13592529296875, -0.11865234375, -0.10137939453125, -0.0841064453125, -0.06683349609375, -0.049560546875, -0.03228759765625, -0.0150146484375, 0.00225830078125, 0.01953125, 0.03680419921875, 0.0540771484375, 0.07135009765625, 0.088623046875, 0.10589599609375, 0.1231689453125, 0.14044189453125, 0.15771484375, 0.17498779296875, 0.1922607421875, 0.20953369140625, 0.226806640625, 0.24407958984375, 0.2613525390625, 0.27862548828125, 0.2958984375, 0.31317138671875, 0.3304443359375, 0.34771728515625, 0.364990234375, 0.38226318359375, 0.3995361328125, 0.41680908203125, 0.43408203125, 0.45135498046875, 0.4686279296875, 0.48590087890625, 0.503173828125, 0.52044677734375, 0.5377197265625, 0.55499267578125, 0.572265625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 8.0, 14.0, 17.0, 21.0, 48.0, 74.0, 90.0, 141.0, 151.0, 136.0, 111.0, 72.0, 44.0, 23.0, 22.0, 16.0, 7.0, 5.0, 4.0, 1.0, 3.0, 0.0, 2.0, 3.0], "bins": [-0.0001881122589111328, -0.00018424354493618011, -0.00018037483096122742, -0.00017650611698627472, -0.00017263740301132202, -0.00016876868903636932, -0.00016489997506141663, -0.00016103126108646393, -0.00015716254711151123, -0.00015329383313655853, -0.00014942511916160583, -0.00014555640518665314, -0.00014168769121170044, -0.00013781897723674774, -0.00013395026326179504, -0.00013008154928684235, -0.00012621283531188965, -0.00012234412133693695, -0.00011847540736198425, -0.00011460669338703156, -0.00011073797941207886, -0.00010686926543712616, -0.00010300055146217346, -9.913183748722076e-05, -9.526312351226807e-05, -9.139440953731537e-05, -8.752569556236267e-05, -8.365698158740997e-05, -7.978826761245728e-05, -7.591955363750458e-05, -7.205083966255188e-05, -6.818212568759918e-05, -6.431341171264648e-05, -6.044469773769379e-05, -5.657598376274109e-05, -5.270726978778839e-05, -4.883855581283569e-05, -4.4969841837882996e-05, -4.11011278629303e-05, -3.72324138879776e-05, -3.33636999130249e-05, -2.9494985938072205e-05, -2.5626271963119507e-05, -2.175755798816681e-05, -1.788884401321411e-05, -1.4020130038261414e-05, -1.0151416063308716e-05, -6.282702088356018e-06, -2.4139881134033203e-06, 1.4547258615493774e-06, 5.323439836502075e-06, 9.192153811454773e-06, 1.306086778640747e-05, 1.692958176136017e-05, 2.0798295736312866e-05, 2.4667009711265564e-05, 2.8535723686218262e-05, 3.240443766117096e-05, 3.627315163612366e-05, 4.0141865611076355e-05, 4.401057958602905e-05, 4.787929356098175e-05, 5.174800753593445e-05, 5.5616721510887146e-05, 5.9485435485839844e-05]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 3.0, 4.0, 8.0, 14.0, 29.0, 35.0, 72.0, 134.0, 260.0, 696.0, 2208.0, 13769.0, 380423.0, 620703.0, 25742.0, 2989.0, 838.0, 319.0, 144.0, 61.0, 36.0, 25.0, 16.0, 12.0, 6.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.59765625, -0.5738525390625, -0.550048828125, -0.5262451171875, -0.50244140625, -0.4786376953125, -0.454833984375, -0.4310302734375, -0.4072265625, -0.3834228515625, -0.359619140625, -0.3358154296875, -0.31201171875, -0.2882080078125, -0.264404296875, -0.2406005859375, -0.216796875, -0.1929931640625, -0.169189453125, -0.1453857421875, -0.12158203125, -0.0977783203125, -0.073974609375, -0.0501708984375, -0.0263671875, -0.0025634765625, 0.021240234375, 0.0450439453125, 0.06884765625, 0.0926513671875, 0.116455078125, 0.1402587890625, 0.1640625, 0.1878662109375, 0.211669921875, 0.2354736328125, 0.25927734375, 0.2830810546875, 0.306884765625, 0.3306884765625, 0.3544921875, 0.3782958984375, 0.402099609375, 0.4259033203125, 0.44970703125, 0.4735107421875, 0.497314453125, 0.5211181640625, 0.544921875, 0.5687255859375, 0.592529296875, 0.6163330078125, 0.64013671875, 0.6639404296875, 0.687744140625, 0.7115478515625, 0.7353515625, 0.7591552734375, 0.782958984375, 0.8067626953125, 0.83056640625, 0.8543701171875, 0.878173828125, 0.9019775390625, 0.92578125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 9.0, 9.0, 5.0, 17.0, 21.0, 36.0, 30.0, 50.0, 66.0, 82.0, 87.0, 90.0, 116.0, 86.0, 80.0, 51.0, 49.0, 34.0, 30.0, 21.0, 12.0, 7.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.3115234375, -0.2994384765625, -0.287353515625, -0.2752685546875, -0.26318359375, -0.2510986328125, -0.239013671875, -0.2269287109375, -0.21484375, -0.2027587890625, -0.190673828125, -0.1785888671875, -0.16650390625, -0.1544189453125, -0.142333984375, -0.1302490234375, -0.1181640625, -0.1060791015625, -0.093994140625, -0.0819091796875, -0.06982421875, -0.0577392578125, -0.045654296875, -0.0335693359375, -0.021484375, -0.0093994140625, 0.002685546875, 0.0147705078125, 0.02685546875, 0.0389404296875, 0.051025390625, 0.0631103515625, 0.0751953125, 0.0872802734375, 0.099365234375, 0.1114501953125, 0.12353515625, 0.1356201171875, 0.147705078125, 0.1597900390625, 0.171875, 0.1839599609375, 0.196044921875, 0.2081298828125, 0.22021484375, 0.2322998046875, 0.244384765625, 0.2564697265625, 0.2685546875, 0.2806396484375, 0.292724609375, 0.3048095703125, 0.31689453125, 0.3289794921875, 0.341064453125, 0.3531494140625, 0.365234375, 0.3773193359375, 0.389404296875, 0.4014892578125, 0.41357421875, 0.4256591796875, 0.437744140625, 0.4498291015625, 0.4619140625]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 5.0, 17.0, 38.0, 98.0, 195.0, 307.0, 183.0, 110.0, 40.0, 9.0, 7.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.664787292480469, -9.301289558410645, -8.937792778015137, -8.574295043945312, -8.210797309875488, -7.847300052642822, -7.483802795410156, -7.120305061340332, -6.756807804107666, -6.393310546875, -6.029812812805176, -5.66631555557251, -5.302818298339844, -4.9393205642700195, -4.5758233070373535, -4.2123260498046875, -3.8488283157348633, -3.485330820083618, -3.121833324432373, -2.758336067199707, -2.394838571548462, -2.031341075897217, -1.6678438186645508, -1.3043463230133057, -0.9408488273620605, -0.5773513913154602, -0.21385395526885986, 0.1496434211730957, 0.5131409168243408, 0.8766384124755859, 1.240135669708252, 1.603633165359497, 1.9671306610107422, 2.3306281566619873, 2.6941256523132324, 3.0576229095458984, 3.4211204051971436, 3.7846179008483887, 4.148115158081055, 4.511612892150879, 4.875110149383545, 5.238607406616211, 5.602105140686035, 5.965602397918701, 6.329099655151367, 6.692597389221191, 7.056094646453857, 7.419591903686523, 7.783089637756348, 8.146587371826172, 8.51008415222168, 8.873581886291504, 9.237079620361328, 9.600576400756836, 9.96407413482666, 10.327571868896484, 10.691068649291992, 11.054566383361816, 11.418063163757324, 11.781560897827148, 12.145058631896973, 12.508556365966797, 12.872053146362305, 13.235550880432129, 13.599048614501953]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 10.0, 4.0, 13.0, 5.0, 8.0, 17.0, 14.0, 16.0, 24.0, 17.0, 27.0, 25.0, 31.0, 26.0, 25.0, 27.0, 32.0, 29.0, 39.0, 31.0, 36.0, 50.0, 39.0, 26.0, 30.0, 53.0, 36.0, 35.0, 25.0, 33.0, 24.0, 27.0, 18.0, 23.0, 22.0, 15.0, 17.0, 16.0, 10.0, 8.0, 10.0, 3.0, 7.0, 2.0, 7.0, 6.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.555950164794922, -2.475146770477295, -2.394343376159668, -2.313539743423462, -2.232736349105835, -2.151932954788208, -2.071129560470581, -1.990326166152954, -1.9095226526260376, -1.8287192583084106, -1.7479157447814941, -1.6671123504638672, -1.5863089561462402, -1.5055054426193237, -1.4247020483016968, -1.3438985347747803, -1.2630951404571533, -1.1822917461395264, -1.1014882326126099, -1.020684838294983, -0.9398813843727112, -0.8590779304504395, -0.7782745361328125, -0.6974710822105408, -0.616667628288269, -0.5358641743659973, -0.455060750246048, -0.37425732612609863, -0.2934538722038269, -0.21265041828155518, -0.13184699416160583, -0.051043570041656494, 0.029759883880615234, 0.11056332290172577, 0.1913667619228363, 0.27217018604278564, 0.3529736399650574, 0.4337770938873291, 0.514580488204956, 0.5953839421272278, 0.6761873960494995, 0.7569908499717712, 0.837794303894043, 0.9185976982116699, 0.9994011521339417, 1.0802046060562134, 1.1610080003738403, 1.2418115139007568, 1.3226149082183838, 1.4034183025360107, 1.4842218160629272, 1.5650252103805542, 1.6458287239074707, 1.7266321182250977, 1.8074355125427246, 1.8882389068603516, 1.969042420387268, 2.0498459339141846, 2.1306493282318115, 2.2114527225494385, 2.2922561168670654, 2.3730597496032715, 2.4538631439208984, 2.5346665382385254, 2.6154699325561523]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 3.0, 5.0, 4.0, 4.0, 4.0, 7.0, 8.0, 10.0, 12.0, 23.0, 28.0, 31.0, 50.0, 84.0, 129.0, 246.0, 519.0, 1502.0, 6358.0, 51297.0, 3955624.0, 162089.0, 12086.0, 2387.0, 897.0, 342.0, 209.0, 104.0, 72.0, 59.0, 21.0, 17.0, 20.0, 19.0, 6.0, 1.0, 6.0, 8.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.796875, -1.714019775390625, -1.63116455078125, -1.548309326171875, -1.4654541015625, -1.382598876953125, -1.29974365234375, -1.216888427734375, -1.134033203125, -1.051177978515625, -0.96832275390625, -0.885467529296875, -0.8026123046875, -0.719757080078125, -0.63690185546875, -0.554046630859375, -0.47119140625, -0.388336181640625, -0.30548095703125, -0.222625732421875, -0.1397705078125, -0.056915283203125, 0.02593994140625, 0.108795166015625, 0.191650390625, 0.274505615234375, 0.35736083984375, 0.440216064453125, 0.5230712890625, 0.605926513671875, 0.68878173828125, 0.771636962890625, 0.8544921875, 0.937347412109375, 1.02020263671875, 1.103057861328125, 1.1859130859375, 1.268768310546875, 1.35162353515625, 1.434478759765625, 1.517333984375, 1.600189208984375, 1.68304443359375, 1.765899658203125, 1.8487548828125, 1.931610107421875, 2.01446533203125, 2.097320556640625, 2.18017578125, 2.263031005859375, 2.34588623046875, 2.428741455078125, 2.5115966796875, 2.594451904296875, 2.67730712890625, 2.760162353515625, 2.843017578125, 2.925872802734375, 3.00872802734375, 3.091583251953125, 3.1744384765625, 3.257293701171875, 3.34014892578125, 3.423004150390625, 3.505859375]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 7.0, 8.0, 7.0, 16.0, 22.0, 21.0, 27.0, 25.0, 49.0, 48.0, 57.0, 58.0, 57.0, 72.0, 71.0, 67.0, 63.0, 55.0, 44.0, 42.0, 41.0, 40.0, 26.0, 16.0, 19.0, 11.0, 10.0, 6.0, 8.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.257568359375, -0.2508506774902344, -0.24413299560546875, -0.23741531372070312, -0.2306976318359375, -0.22397994995117188, -0.21726226806640625, -0.21054458618164062, -0.203826904296875, -0.19710922241210938, -0.19039154052734375, -0.18367385864257812, -0.1769561767578125, -0.17023849487304688, -0.16352081298828125, -0.15680313110351562, -0.15008544921875, -0.14336776733398438, -0.13665008544921875, -0.12993240356445312, -0.1232147216796875, -0.11649703979492188, -0.10977935791015625, -0.10306167602539062, -0.096343994140625, -0.08962631225585938, -0.08290863037109375, -0.07619094848632812, -0.0694732666015625, -0.06275558471679688, -0.05603790283203125, -0.049320220947265625, -0.0426025390625, -0.035884857177734375, -0.02916717529296875, -0.022449493408203125, -0.0157318115234375, -0.009014129638671875, -0.00229644775390625, 0.004421234130859375, 0.011138916015625, 0.017856597900390625, 0.02457427978515625, 0.031291961669921875, 0.0380096435546875, 0.044727325439453125, 0.05144500732421875, 0.058162689208984375, 0.06488037109375, 0.07159805297851562, 0.07831573486328125, 0.08503341674804688, 0.0917510986328125, 0.09846878051757812, 0.10518646240234375, 0.11190414428710938, 0.118621826171875, 0.12533950805664062, 0.13205718994140625, 0.13877487182617188, 0.1454925537109375, 0.15221023559570312, 0.15892791748046875, 0.16564559936523438, 0.17236328125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 9.0, 7.0, 4.0, 4.0, 8.0, 6.0, 11.0, 20.0, 21.0, 30.0, 46.0, 58.0, 96.0, 149.0, 236.0, 436.0, 866.0, 1884.0, 4965.0, 17088.0, 88733.0, 3479543.0, 535310.0, 47397.0, 10815.0, 3540.0, 1446.0, 666.0, 329.0, 172.0, 109.0, 68.0, 63.0, 36.0, 26.0, 17.0, 17.0, 11.0, 5.0, 10.0, 8.0, 4.0, 8.0, 4.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.2812652587890625, -1.238311767578125, -1.1953582763671875, -1.15240478515625, -1.1094512939453125, -1.066497802734375, -1.0235443115234375, -0.9805908203125, -0.9376373291015625, -0.894683837890625, -0.8517303466796875, -0.80877685546875, -0.7658233642578125, -0.722869873046875, -0.6799163818359375, -0.636962890625, -0.5940093994140625, -0.551055908203125, -0.5081024169921875, -0.46514892578125, -0.4221954345703125, -0.379241943359375, -0.3362884521484375, -0.2933349609375, -0.2503814697265625, -0.207427978515625, -0.1644744873046875, -0.12152099609375, -0.0785675048828125, -0.035614013671875, 0.0073394775390625, 0.05029296875, 0.0932464599609375, 0.136199951171875, 0.1791534423828125, 0.22210693359375, 0.2650604248046875, 0.308013916015625, 0.3509674072265625, 0.3939208984375, 0.4368743896484375, 0.479827880859375, 0.5227813720703125, 0.56573486328125, 0.6086883544921875, 0.651641845703125, 0.6945953369140625, 0.737548828125, 0.7805023193359375, 0.823455810546875, 0.8664093017578125, 0.90936279296875, 0.9523162841796875, 0.995269775390625, 1.0382232666015625, 1.0811767578125, 1.1241302490234375, 1.167083740234375, 1.2100372314453125, 1.25299072265625, 1.2959442138671875, 1.338897705078125, 1.3818511962890625, 1.4248046875]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 2.0, 6.0, 12.0, 3.0, 13.0, 21.0, 23.0, 51.0, 79.0, 126.0, 263.0, 1483.0, 1294.0, 293.0, 130.0, 90.0, 49.0, 44.0, 29.0, 15.0, 23.0, 4.0, 7.0, 3.0, 3.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0], "bins": [-0.64794921875, -0.633544921875, -0.619140625, -0.604736328125, -0.59033203125, -0.575927734375, -0.5615234375, -0.547119140625, -0.53271484375, -0.518310546875, -0.50390625, -0.489501953125, -0.47509765625, -0.460693359375, -0.4462890625, -0.431884765625, -0.41748046875, -0.403076171875, -0.388671875, -0.374267578125, -0.35986328125, -0.345458984375, -0.3310546875, -0.316650390625, -0.30224609375, -0.287841796875, -0.2734375, -0.259033203125, -0.24462890625, -0.230224609375, -0.2158203125, -0.201416015625, -0.18701171875, -0.172607421875, -0.158203125, -0.143798828125, -0.12939453125, -0.114990234375, -0.1005859375, -0.086181640625, -0.07177734375, -0.057373046875, -0.04296875, -0.028564453125, -0.01416015625, 0.000244140625, 0.0146484375, 0.029052734375, 0.04345703125, 0.057861328125, 0.072265625, 0.086669921875, 0.10107421875, 0.115478515625, 0.1298828125, 0.144287109375, 0.15869140625, 0.173095703125, 0.1875, 0.201904296875, 0.21630859375, 0.230712890625, 0.2451171875, 0.259521484375, 0.27392578125]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 6.0, 55.0, 205.0, 406.0, 243.0, 65.0, 9.0, 5.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.044795513153076, -6.840387344360352, -6.635979175567627, -6.431571006774902, -6.2271623611450195, -6.022754192352295, -5.81834602355957, -5.613937854766846, -5.409529685974121, -5.2051215171813965, -5.000713348388672, -4.796304702758789, -4.5918965339660645, -4.38748836517334, -4.183080196380615, -3.9786720275878906, -3.774263381958008, -3.569855213165283, -3.3654468059539795, -3.161038637161255, -2.956630229949951, -2.7522220611572266, -2.547813892364502, -2.3434057235717773, -2.1389973163604736, -1.9345890283584595, -1.7301807403564453, -1.5257725715637207, -1.3213642835617065, -1.1169559955596924, -0.9125478267669678, -0.7081395387649536, -0.5037307739257812, -0.2993225157260895, -0.0949142575263977, 0.10949397087097168, 0.31390225887298584, 0.518310546875, 0.7227187156677246, 0.9271270036697388, 1.131535291671753, 1.335943579673767, 1.5403518676757812, 1.7447600364685059, 1.94916832447052, 2.153576612472534, 2.357984781265259, 2.5623931884765625, 2.766801357269287, 2.9712095260620117, 3.1756179332733154, 3.38002610206604, 3.5844345092773438, 3.7888426780700684, 3.993250846862793, 4.197659015655518, 4.402067184448242, 4.606475353240967, 4.810883522033691, 5.015292167663574, 5.219700336456299, 5.424108505249023, 5.628516674041748, 5.832924842834473, 6.0373334884643555]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 7.0, 9.0, 5.0, 12.0, 15.0, 18.0, 18.0, 23.0, 27.0, 35.0, 41.0, 57.0, 51.0, 46.0, 64.0, 52.0, 59.0, 52.0, 60.0, 56.0, 42.0, 63.0, 37.0, 34.0, 28.0, 27.0, 20.0, 12.0, 7.0, 12.0, 6.0, 4.0, 4.0, 6.0, 1.0, 3.0, 0.0, 1.0], "bins": [-1.8850996494293213, -1.8407809734344482, -1.7964622974395752, -1.7521436214447021, -1.707824945449829, -1.6635061502456665, -1.6191874742507935, -1.5748687982559204, -1.5305501222610474, -1.4862314462661743, -1.4419127702713013, -1.3975940942764282, -1.3532752990722656, -1.3089566230773926, -1.2646379470825195, -1.2203192710876465, -1.1760005950927734, -1.1316819190979004, -1.0873632431030273, -1.0430445671081543, -0.9987258315086365, -0.9544071555137634, -0.9100884199142456, -0.8657697439193726, -0.8214510679244995, -0.7771323919296265, -0.7328137159347534, -0.6884949803352356, -0.6441763043403625, -0.5998576283454895, -0.5555388927459717, -0.5112202167510986, -0.46690165996551514, -0.4225829839706421, -0.37826427817344666, -0.3339455723762512, -0.2896268963813782, -0.24530820548534393, -0.2009895145893097, -0.15667080879211426, -0.11235213279724121, -0.06803344190120697, -0.02371475100517273, 0.02060393989086151, 0.06492263078689575, 0.10924132168292999, 0.15356001257896423, 0.19787871837615967, 0.24219739437103271, 0.28651607036590576, 0.3308347761631012, 0.37515348196029663, 0.4194721579551697, 0.4637908339500427, 0.5081095695495605, 0.5524282455444336, 0.5967469215393066, 0.6410655975341797, 0.6853842735290527, 0.7297030091285706, 0.7740216851234436, 0.8183403611183167, 0.8626590967178345, 0.9069777727127075, 0.9512964487075806]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 10.0, 4.0, 4.0, 10.0, 17.0, 17.0, 30.0, 41.0, 64.0, 122.0, 198.0, 361.0, 811.0, 2608.0, 13573.0, 167036.0, 728369.0, 120701.0, 10729.0, 2232.0, 769.0, 344.0, 193.0, 105.0, 63.0, 42.0, 34.0, 18.0, 12.0, 16.0, 10.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.80859375, -1.747161865234375, -1.68572998046875, -1.624298095703125, -1.5628662109375, -1.501434326171875, -1.44000244140625, -1.378570556640625, -1.317138671875, -1.255706787109375, -1.19427490234375, -1.132843017578125, -1.0714111328125, -1.009979248046875, -0.94854736328125, -0.887115478515625, -0.82568359375, -0.764251708984375, -0.70281982421875, -0.641387939453125, -0.5799560546875, -0.518524169921875, -0.45709228515625, -0.395660400390625, -0.334228515625, -0.272796630859375, -0.21136474609375, -0.149932861328125, -0.0885009765625, -0.027069091796875, 0.03436279296875, 0.095794677734375, 0.1572265625, 0.218658447265625, 0.28009033203125, 0.341522216796875, 0.4029541015625, 0.464385986328125, 0.52581787109375, 0.587249755859375, 0.648681640625, 0.710113525390625, 0.77154541015625, 0.832977294921875, 0.8944091796875, 0.955841064453125, 1.01727294921875, 1.078704833984375, 1.14013671875, 1.201568603515625, 1.26300048828125, 1.324432373046875, 1.3858642578125, 1.447296142578125, 1.50872802734375, 1.570159912109375, 1.631591796875, 1.693023681640625, 1.75445556640625, 1.815887451171875, 1.8773193359375, 1.938751220703125, 2.00018310546875, 2.061614990234375, 2.123046875]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0, 3.0, 4.0, 5.0, 8.0, 11.0, 19.0, 21.0, 14.0, 35.0, 48.0, 42.0, 56.0, 47.0, 54.0, 62.0, 61.0, 70.0, 59.0, 77.0, 60.0, 46.0, 37.0, 37.0, 23.0, 29.0, 13.0, 19.0, 13.0, 11.0, 9.0, 2.0, 4.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.2744140625, -0.2673912048339844, -0.26036834716796875, -0.2533454895019531, -0.2463226318359375, -0.23929977416992188, -0.23227691650390625, -0.22525405883789062, -0.218231201171875, -0.21120834350585938, -0.20418548583984375, -0.19716262817382812, -0.1901397705078125, -0.18311691284179688, -0.17609405517578125, -0.16907119750976562, -0.16204833984375, -0.15502548217773438, -0.14800262451171875, -0.14097976684570312, -0.1339569091796875, -0.12693405151367188, -0.11991119384765625, -0.11288833618164062, -0.105865478515625, -0.09884262084960938, -0.09181976318359375, -0.08479690551757812, -0.0777740478515625, -0.07075119018554688, -0.06372833251953125, -0.056705474853515625, -0.0496826171875, -0.042659759521484375, -0.03563690185546875, -0.028614044189453125, -0.0215911865234375, -0.014568328857421875, -0.00754547119140625, -0.000522613525390625, 0.006500244140625, 0.013523101806640625, 0.02054595947265625, 0.027568817138671875, 0.0345916748046875, 0.041614532470703125, 0.04863739013671875, 0.055660247802734375, 0.06268310546875, 0.06970596313476562, 0.07672882080078125, 0.08375167846679688, 0.0907745361328125, 0.09779739379882812, 0.10482025146484375, 0.11184310913085938, 0.118865966796875, 0.12588882446289062, 0.13291168212890625, 0.13993453979492188, 0.1469573974609375, 0.15398025512695312, 0.16100311279296875, 0.16802597045898438, 0.175048828125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 3.0, 3.0, 4.0, 1.0, 8.0, 11.0, 10.0, 14.0, 18.0, 32.0, 34.0, 38.0, 92.0, 74.0, 126.0, 149.0, 234.0, 333.0, 509.0, 996.0, 2424.0, 6659.0, 23159.0, 93412.0, 323846.0, 405042.0, 140982.0, 34531.0, 9527.0, 3110.0, 1298.0, 623.0, 361.0, 246.0, 176.0, 120.0, 89.0, 82.0, 39.0, 51.0, 30.0, 17.0, 14.0, 11.0, 13.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.6611328125, -0.63677978515625, -0.6124267578125, -0.58807373046875, -0.563720703125, -0.53936767578125, -0.5150146484375, -0.49066162109375, -0.46630859375, -0.44195556640625, -0.4176025390625, -0.39324951171875, -0.368896484375, -0.34454345703125, -0.3201904296875, -0.29583740234375, -0.271484375, -0.24713134765625, -0.2227783203125, -0.19842529296875, -0.174072265625, -0.14971923828125, -0.1253662109375, -0.10101318359375, -0.07666015625, -0.05230712890625, -0.0279541015625, -0.00360107421875, 0.020751953125, 0.04510498046875, 0.0694580078125, 0.09381103515625, 0.1181640625, 0.14251708984375, 0.1668701171875, 0.19122314453125, 0.215576171875, 0.23992919921875, 0.2642822265625, 0.28863525390625, 0.31298828125, 0.33734130859375, 0.3616943359375, 0.38604736328125, 0.410400390625, 0.43475341796875, 0.4591064453125, 0.48345947265625, 0.5078125, 0.53216552734375, 0.5565185546875, 0.58087158203125, 0.605224609375, 0.62957763671875, 0.6539306640625, 0.67828369140625, 0.70263671875, 0.72698974609375, 0.7513427734375, 0.77569580078125, 0.800048828125, 0.82440185546875, 0.8487548828125, 0.87310791015625, 0.8974609375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 7.0, 5.0, 4.0, 14.0, 9.0, 5.0, 11.0, 10.0, 25.0, 38.0, 31.0, 33.0, 30.0, 38.0, 48.0, 49.0, 42.0, 66.0, 63.0, 39.0, 40.0, 45.0, 40.0, 42.0, 32.0, 40.0, 41.0, 38.0, 24.0, 17.0, 18.0, 15.0, 17.0, 6.0, 6.0, 5.0, 8.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69091796875, -0.6640548706054688, -0.6371917724609375, -0.6103286743164062, -0.583465576171875, -0.5566024780273438, -0.5297393798828125, -0.5028762817382812, -0.47601318359375, -0.44915008544921875, -0.4222869873046875, -0.39542388916015625, -0.368560791015625, -0.34169769287109375, -0.3148345947265625, -0.28797149658203125, -0.2611083984375, -0.23424530029296875, -0.2073822021484375, -0.18051910400390625, -0.153656005859375, -0.12679290771484375, -0.0999298095703125, -0.07306671142578125, -0.04620361328125, -0.01934051513671875, 0.0075225830078125, 0.03438568115234375, 0.061248779296875, 0.08811187744140625, 0.1149749755859375, 0.14183807373046875, 0.168701171875, 0.19556427001953125, 0.2224273681640625, 0.24929046630859375, 0.276153564453125, 0.30301666259765625, 0.3298797607421875, 0.35674285888671875, 0.38360595703125, 0.41046905517578125, 0.4373321533203125, 0.46419525146484375, 0.491058349609375, 0.5179214477539062, 0.5447845458984375, 0.5716476440429688, 0.5985107421875, 0.6253738403320312, 0.6522369384765625, 0.6791000366210938, 0.705963134765625, 0.7328262329101562, 0.7596893310546875, 0.7865524291992188, 0.81341552734375, 0.8402786254882812, 0.8671417236328125, 0.8940048217773438, 0.920867919921875, 0.9477310180664062, 0.9745941162109375, 1.0014572143554688, 1.0283203125]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 4.0, 15.0, 10.0, 15.0, 19.0, 29.0, 53.0, 97.0, 207.0, 374.0, 974.0, 3120.0, 16426.0, 168183.0, 723187.0, 118795.0, 12899.0, 2554.0, 844.0, 380.0, 167.0, 91.0, 55.0, 23.0, 8.0, 8.0, 3.0, 7.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68994140625, -0.6679916381835938, -0.6460418701171875, -0.6240921020507812, -0.602142333984375, -0.5801925659179688, -0.5582427978515625, -0.5362930297851562, -0.51434326171875, -0.49239349365234375, -0.4704437255859375, -0.44849395751953125, -0.426544189453125, -0.40459442138671875, -0.3826446533203125, -0.36069488525390625, -0.3387451171875, -0.31679534912109375, -0.2948455810546875, -0.27289581298828125, -0.250946044921875, -0.22899627685546875, -0.2070465087890625, -0.18509674072265625, -0.16314697265625, -0.14119720458984375, -0.1192474365234375, -0.09729766845703125, -0.075347900390625, -0.05339813232421875, -0.0314483642578125, -0.00949859619140625, 0.012451171875, 0.03440093994140625, 0.0563507080078125, 0.07830047607421875, 0.100250244140625, 0.12220001220703125, 0.1441497802734375, 0.16609954833984375, 0.18804931640625, 0.20999908447265625, 0.2319488525390625, 0.25389862060546875, 0.275848388671875, 0.29779815673828125, 0.3197479248046875, 0.34169769287109375, 0.3636474609375, 0.38559722900390625, 0.4075469970703125, 0.42949676513671875, 0.451446533203125, 0.47339630126953125, 0.4953460693359375, 0.5172958374023438, 0.53924560546875, 0.5611953735351562, 0.5831451416015625, 0.6050949096679688, 0.627044677734375, 0.6489944458007812, 0.6709442138671875, 0.6928939819335938, 0.71484375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 9.0, 6.0, 10.0, 16.0, 16.0, 22.0, 37.0, 45.0, 44.0, 72.0, 90.0, 99.0, 104.0, 90.0, 62.0, 55.0, 59.0, 27.0, 24.0, 23.0, 17.0, 15.0, 13.0, 9.0, 5.0, 3.0, 5.0, 7.0, 2.0, 5.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.00010752677917480469, -0.00010478124022483826, -0.00010203570127487183, -9.92901623249054e-05, -9.654462337493896e-05, -9.379908442497253e-05, -9.10535454750061e-05, -8.830800652503967e-05, -8.556246757507324e-05, -8.281692862510681e-05, -8.007138967514038e-05, -7.732585072517395e-05, -7.458031177520752e-05, -7.183477282524109e-05, -6.908923387527466e-05, -6.634369492530823e-05, -6.35981559753418e-05, -6.0852617025375366e-05, -5.8107078075408936e-05, -5.5361539125442505e-05, -5.2616000175476074e-05, -4.9870461225509644e-05, -4.712492227554321e-05, -4.437938332557678e-05, -4.163384437561035e-05, -3.888830542564392e-05, -3.614276647567749e-05, -3.339722752571106e-05, -3.065168857574463e-05, -2.7906149625778198e-05, -2.5160610675811768e-05, -2.2415071725845337e-05, -1.9669532775878906e-05, -1.6923993825912476e-05, -1.4178454875946045e-05, -1.1432915925979614e-05, -8.687376976013184e-06, -5.941838026046753e-06, -3.1962990760803223e-06, -4.507601261138916e-07, 2.294778823852539e-06, 5.04031777381897e-06, 7.7858567237854e-06, 1.0531395673751831e-05, 1.3276934623718262e-05, 1.6022473573684692e-05, 1.8768012523651123e-05, 2.1513551473617554e-05, 2.4259090423583984e-05, 2.7004629373550415e-05, 2.9750168323516846e-05, 3.2495707273483276e-05, 3.524124622344971e-05, 3.798678517341614e-05, 4.073232412338257e-05, 4.3477863073349e-05, 4.622340202331543e-05, 4.896894097328186e-05, 5.171447992324829e-05, 5.446001887321472e-05, 5.720555782318115e-05, 5.995109677314758e-05, 6.269663572311401e-05, 6.544217467308044e-05, 6.818771362304688e-05]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 1.0, 11.0, 12.0, 35.0, 52.0, 123.0, 180.0, 530.0, 1973.0, 15414.0, 380342.0, 618642.0, 27328.0, 2744.0, 650.0, 268.0, 123.0, 59.0, 27.0, 16.0, 12.0, 5.0, 2.0, 6.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.2646484375, -1.2338790893554688, -1.2031097412109375, -1.1723403930664062, -1.141571044921875, -1.1108016967773438, -1.0800323486328125, -1.0492630004882812, -1.01849365234375, -0.9877243041992188, -0.9569549560546875, -0.9261856079101562, -0.895416259765625, -0.8646469116210938, -0.8338775634765625, -0.8031082153320312, -0.7723388671875, -0.7415695190429688, -0.7108001708984375, -0.6800308227539062, -0.649261474609375, -0.6184921264648438, -0.5877227783203125, -0.5569534301757812, -0.52618408203125, -0.49541473388671875, -0.4646453857421875, -0.43387603759765625, -0.403106689453125, -0.37233734130859375, -0.3415679931640625, -0.31079864501953125, -0.280029296875, -0.24925994873046875, -0.2184906005859375, -0.18772125244140625, -0.156951904296875, -0.12618255615234375, -0.0954132080078125, -0.06464385986328125, -0.03387451171875, -0.00310516357421875, 0.0276641845703125, 0.05843353271484375, 0.089202880859375, 0.11997222900390625, 0.1507415771484375, 0.18151092529296875, 0.2122802734375, 0.24304962158203125, 0.2738189697265625, 0.30458831787109375, 0.335357666015625, 0.36612701416015625, 0.3968963623046875, 0.42766571044921875, 0.45843505859375, 0.48920440673828125, 0.5199737548828125, 0.5507431030273438, 0.581512451171875, 0.6122817993164062, 0.6430511474609375, 0.6738204956054688, 0.70458984375]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 4.0, 6.0, 3.0, 14.0, 13.0, 27.0, 32.0, 40.0, 49.0, 68.0, 89.0, 93.0, 117.0, 102.0, 80.0, 69.0, 53.0, 44.0, 35.0, 19.0, 12.0, 10.0, 5.0, 7.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5556640625, -0.53912353515625, -0.5225830078125, -0.50604248046875, -0.489501953125, -0.47296142578125, -0.4564208984375, -0.43988037109375, -0.42333984375, -0.40679931640625, -0.3902587890625, -0.37371826171875, -0.357177734375, -0.34063720703125, -0.3240966796875, -0.30755615234375, -0.291015625, -0.27447509765625, -0.2579345703125, -0.24139404296875, -0.224853515625, -0.20831298828125, -0.1917724609375, -0.17523193359375, -0.15869140625, -0.14215087890625, -0.1256103515625, -0.10906982421875, -0.092529296875, -0.07598876953125, -0.0594482421875, -0.04290771484375, -0.0263671875, -0.00982666015625, 0.0067138671875, 0.02325439453125, 0.039794921875, 0.05633544921875, 0.0728759765625, 0.08941650390625, 0.10595703125, 0.12249755859375, 0.1390380859375, 0.15557861328125, 0.172119140625, 0.18865966796875, 0.2052001953125, 0.22174072265625, 0.23828125, 0.25482177734375, 0.2713623046875, 0.28790283203125, 0.304443359375, 0.32098388671875, 0.3375244140625, 0.35406494140625, 0.37060546875, 0.38714599609375, 0.4036865234375, 0.42022705078125, 0.436767578125, 0.45330810546875, 0.4698486328125, 0.48638916015625, 0.5029296875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 3.0, 5.0, 13.0, 28.0, 69.0, 89.0, 131.0, 165.0, 172.0, 145.0, 91.0, 46.0, 36.0, 8.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-13.52730941772461, -13.234052658081055, -12.940794944763184, -12.647537231445312, -12.354280471801758, -12.061023712158203, -11.767765998840332, -11.474508285522461, -11.181251525878906, -10.887994766235352, -10.59473705291748, -10.30147933959961, -10.008222579956055, -9.7149658203125, -9.421708106994629, -9.128450393676758, -8.835193634033203, -8.541936874389648, -8.248679161071777, -7.9554219245910645, -7.662164688110352, -7.368907451629639, -7.075650215148926, -6.782392978668213, -6.4891357421875, -6.195878505706787, -5.902621269226074, -5.609364032745361, -5.316106796264648, -5.0228495597839355, -4.729592323303223, -4.43633508682251, -4.1430768966674805, -3.8498196601867676, -3.5565624237060547, -3.263305187225342, -2.970047950744629, -2.676790714263916, -2.383533477783203, -2.0902762413024902, -1.7970190048217773, -1.5037617683410645, -1.2105045318603516, -0.9172472953796387, -0.6239900588989258, -0.3307328224182129, -0.0374755859375, 0.2557816505432129, 0.5490388870239258, 0.8422961235046387, 1.1355533599853516, 1.4288105964660645, 1.7220678329467773, 2.0153250694274902, 2.308582305908203, 2.601839542388916, 2.895096778869629, 3.188354015350342, 3.4816112518310547, 3.7748684883117676, 4.0681257247924805, 4.361382961273193, 4.654640197753906, 4.947897434234619, 5.241154670715332]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 5.0, 4.0, 8.0, 9.0, 8.0, 14.0, 17.0, 25.0, 24.0, 41.0, 25.0, 31.0, 30.0, 54.0, 39.0, 34.0, 38.0, 42.0, 42.0, 50.0, 51.0, 36.0, 41.0, 36.0, 45.0, 38.0, 27.0, 35.0, 22.0, 22.0, 19.0, 20.0, 12.0, 18.0, 13.0, 10.0, 8.0, 3.0, 5.0, 4.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6102945804595947, -3.4762771129608154, -3.3422598838806152, -3.208242416381836, -3.0742249488830566, -2.9402074813842773, -2.806190013885498, -2.672172784805298, -2.5381553173065186, -2.4041378498077393, -2.270120620727539, -2.1361031532287598, -2.0020856857299805, -1.8680682182312012, -1.7340508699417114, -1.6000335216522217, -1.4660160541534424, -1.331998586654663, -1.1979812383651733, -1.0639638900756836, -0.9299464225769043, -0.7959290146827698, -0.6619116067886353, -0.5278941988945007, -0.3938767910003662, -0.2598593831062317, -0.12584197521209717, 0.008175432682037354, 0.14219284057617188, 0.2762102484703064, 0.4102276563644409, 0.5442450642585754, 0.6782627105712891, 0.8122801184654236, 0.9462975263595581, 1.0803148746490479, 1.2143323421478271, 1.3483498096466064, 1.4823671579360962, 1.616384506225586, 1.7504019737243652, 1.8844194412231445, 2.018436908721924, 2.152454137802124, 2.2864716053009033, 2.4204890727996826, 2.554506301879883, 2.688523769378662, 2.8225412368774414, 2.9565587043762207, 3.090576171875, 3.2245934009552, 3.3586108684539795, 3.492628335952759, 3.626645565032959, 3.7606630325317383, 3.8946805000305176, 4.028697967529297, 4.162715435028076, 4.2967329025268555, 4.430749893188477, 4.564767360687256, 4.698784828186035, 4.8328022956848145, 4.966819763183594]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [4.0, 3.0, 3.0, 1.0, 3.0, 8.0, 13.0, 9.0, 15.0, 29.0, 27.0, 59.0, 99.0, 127.0, 305.0, 772.0, 3422.0, 33518.0, 4007929.0, 137681.0, 7688.0, 1531.0, 540.0, 234.0, 115.0, 62.0, 36.0, 23.0, 11.0, 12.0, 4.0, 8.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.908203125, -1.806182861328125, -1.70416259765625, -1.602142333984375, -1.5001220703125, -1.398101806640625, -1.29608154296875, -1.194061279296875, -1.092041015625, -0.990020751953125, -0.88800048828125, -0.785980224609375, -0.6839599609375, -0.581939697265625, -0.47991943359375, -0.377899169921875, -0.27587890625, -0.173858642578125, -0.07183837890625, 0.030181884765625, 0.1322021484375, 0.234222412109375, 0.33624267578125, 0.438262939453125, 0.540283203125, 0.642303466796875, 0.74432373046875, 0.846343994140625, 0.9483642578125, 1.050384521484375, 1.15240478515625, 1.254425048828125, 1.3564453125, 1.458465576171875, 1.56048583984375, 1.662506103515625, 1.7645263671875, 1.866546630859375, 1.96856689453125, 2.070587158203125, 2.172607421875, 2.274627685546875, 2.37664794921875, 2.478668212890625, 2.5806884765625, 2.682708740234375, 2.78472900390625, 2.886749267578125, 2.98876953125, 3.090789794921875, 3.19281005859375, 3.294830322265625, 3.3968505859375, 3.498870849609375, 3.60089111328125, 3.702911376953125, 3.804931640625, 3.906951904296875, 4.00897216796875, 4.110992431640625, 4.2130126953125, 4.315032958984375, 4.41705322265625, 4.519073486328125, 4.62109375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 3.0, 13.0, 15.0, 13.0, 21.0, 30.0, 46.0, 41.0, 42.0, 43.0, 50.0, 67.0, 53.0, 62.0, 73.0, 53.0, 61.0, 55.0, 36.0, 37.0, 41.0, 31.0, 26.0, 22.0, 14.0, 14.0, 11.0, 3.0, 8.0, 7.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.284912109375, -0.2776031494140625, -0.270294189453125, -0.2629852294921875, -0.25567626953125, -0.2483673095703125, -0.241058349609375, -0.2337493896484375, -0.2264404296875, -0.2191314697265625, -0.211822509765625, -0.2045135498046875, -0.19720458984375, -0.1898956298828125, -0.182586669921875, -0.1752777099609375, -0.16796875, -0.1606597900390625, -0.153350830078125, -0.1460418701171875, -0.13873291015625, -0.1314239501953125, -0.124114990234375, -0.1168060302734375, -0.1094970703125, -0.1021881103515625, -0.094879150390625, -0.0875701904296875, -0.08026123046875, -0.0729522705078125, -0.065643310546875, -0.0583343505859375, -0.051025390625, -0.0437164306640625, -0.036407470703125, -0.0290985107421875, -0.02178955078125, -0.0144805908203125, -0.007171630859375, 0.0001373291015625, 0.0074462890625, 0.0147552490234375, 0.022064208984375, 0.0293731689453125, 0.03668212890625, 0.0439910888671875, 0.051300048828125, 0.0586090087890625, 0.06591796875, 0.0732269287109375, 0.080535888671875, 0.0878448486328125, 0.09515380859375, 0.1024627685546875, 0.109771728515625, 0.1170806884765625, 0.1243896484375, 0.1316986083984375, 0.139007568359375, 0.1463165283203125, 0.15362548828125, 0.1609344482421875, 0.168243408203125, 0.1755523681640625, 0.182861328125]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 4.0, 9.0, 18.0, 29.0, 52.0, 94.0, 151.0, 282.0, 485.0, 792.0, 1529.0, 3131.0, 7602.0, 26910.0, 174221.0, 3700974.0, 230766.0, 31712.0, 8424.0, 3334.0, 1727.0, 873.0, 513.0, 268.0, 163.0, 97.0, 50.0, 34.0, 16.0, 9.0, 6.0, 7.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.27734375, -1.2368621826171875, -1.196380615234375, -1.1558990478515625, -1.11541748046875, -1.0749359130859375, -1.034454345703125, -0.9939727783203125, -0.9534912109375, -0.9130096435546875, -0.872528076171875, -0.8320465087890625, -0.79156494140625, -0.7510833740234375, -0.710601806640625, -0.6701202392578125, -0.629638671875, -0.5891571044921875, -0.548675537109375, -0.5081939697265625, -0.46771240234375, -0.4272308349609375, -0.386749267578125, -0.3462677001953125, -0.3057861328125, -0.2653045654296875, -0.224822998046875, -0.1843414306640625, -0.14385986328125, -0.1033782958984375, -0.062896728515625, -0.0224151611328125, 0.01806640625, 0.0585479736328125, 0.099029541015625, 0.1395111083984375, 0.17999267578125, 0.2204742431640625, 0.260955810546875, 0.3014373779296875, 0.3419189453125, 0.3824005126953125, 0.422882080078125, 0.4633636474609375, 0.50384521484375, 0.5443267822265625, 0.584808349609375, 0.6252899169921875, 0.665771484375, 0.7062530517578125, 0.746734619140625, 0.7872161865234375, 0.82769775390625, 0.8681793212890625, 0.908660888671875, 0.9491424560546875, 0.9896240234375, 1.0301055908203125, 1.070587158203125, 1.1110687255859375, 1.15155029296875, 1.1920318603515625, 1.232513427734375, 1.2729949951171875, 1.3134765625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 3.0, 1.0, 4.0, 7.0, 5.0, 4.0, 9.0, 11.0, 19.0, 29.0, 37.0, 96.0, 167.0, 463.0, 2191.0, 587.0, 194.0, 82.0, 57.0, 28.0, 29.0, 12.0, 16.0, 8.0, 7.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.66748046875, -0.6463394165039062, -0.6251983642578125, -0.6040573120117188, -0.582916259765625, -0.5617752075195312, -0.5406341552734375, -0.5194931030273438, -0.49835205078125, -0.47721099853515625, -0.4560699462890625, -0.43492889404296875, -0.413787841796875, -0.39264678955078125, -0.3715057373046875, -0.35036468505859375, -0.3292236328125, -0.30808258056640625, -0.2869415283203125, -0.26580047607421875, -0.244659423828125, -0.22351837158203125, -0.2023773193359375, -0.18123626708984375, -0.16009521484375, -0.13895416259765625, -0.1178131103515625, -0.09667205810546875, -0.075531005859375, -0.05438995361328125, -0.0332489013671875, -0.01210784912109375, 0.009033203125, 0.03017425537109375, 0.0513153076171875, 0.07245635986328125, 0.093597412109375, 0.11473846435546875, 0.1358795166015625, 0.15702056884765625, 0.17816162109375, 0.19930267333984375, 0.2204437255859375, 0.24158477783203125, 0.262725830078125, 0.28386688232421875, 0.3050079345703125, 0.32614898681640625, 0.3472900390625, 0.36843109130859375, 0.3895721435546875, 0.41071319580078125, 0.431854248046875, 0.45299530029296875, 0.4741363525390625, 0.49527740478515625, 0.51641845703125, 0.5375595092773438, 0.5587005615234375, 0.5798416137695312, 0.600982666015625, 0.6221237182617188, 0.6432647705078125, 0.6644058227539062, 0.685546875]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 7.0, 6.0, 20.0, 42.0, 119.0, 241.0, 247.0, 174.0, 80.0, 32.0, 18.0, 6.0, 4.0, 3.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0390124320983887, -2.8628294467926025, -2.6866466999053955, -2.5104637145996094, -2.3342809677124023, -2.158097982406616, -1.98191499710083, -1.8057321310043335, -1.629549264907837, -1.4533663988113403, -1.2771835327148438, -1.1010005474090576, -0.924817681312561, -0.7486348152160645, -0.5724518299102783, -0.39626896381378174, -0.22008609771728516, -0.043903201818466187, 0.13227969408035278, 0.30846261978149414, 0.4846454858779907, 0.6608283519744873, 0.8370113372802734, 1.01319420337677, 1.1893770694732666, 1.3655599355697632, 1.5417428016662598, 1.717925786972046, 1.8941086530685425, 2.070291519165039, 2.246474504470825, 2.4226574897766113, 2.59883975982666, 2.7750227451324463, 2.9512054920196533, 3.1273884773254395, 3.3035712242126465, 3.4797542095184326, 3.6559371948242188, 3.832119941711426, 4.008302688598633, 4.18448543548584, 4.360668659210205, 4.536851406097412, 4.713034152984619, 4.889217376708984, 5.065400123596191, 5.241582870483398, 5.417766094207764, 5.593948841094971, 5.770132064819336, 5.946314811706543, 6.12249755859375, 6.298680305480957, 6.474863529205322, 6.651046276092529, 6.8272294998168945, 7.003412246704102, 7.179595470428467, 7.355778217315674, 7.531960964202881, 7.708144187927246, 7.884326934814453, 8.06050968170166, 8.236692428588867]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 8.0, 10.0, 11.0, 6.0, 10.0, 15.0, 20.0, 27.0, 32.0, 37.0, 31.0, 37.0, 41.0, 45.0, 53.0, 57.0, 61.0, 56.0, 36.0, 50.0, 49.0, 62.0, 38.0, 46.0, 25.0, 20.0, 17.0, 24.0, 14.0, 9.0, 10.0, 11.0, 8.0, 4.0, 7.0, 6.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0], "bins": [-2.198601007461548, -2.1419484615325928, -2.0852956771850586, -2.0286431312561035, -1.9719904661178589, -1.9153378009796143, -1.8586851358413696, -1.802032470703125, -1.74537992477417, -1.6887272596359253, -1.6320745944976807, -1.5754220485687256, -1.518769383430481, -1.4621167182922363, -1.4054640531539917, -1.348811388015747, -1.2921587228775024, -1.2355060577392578, -1.1788533926010132, -1.1222007274627686, -1.0655481815338135, -1.0088955163955688, -0.9522428512573242, -0.8955901861190796, -0.8389375805854797, -0.7822849154472351, -0.7256323099136353, -0.6689796447753906, -0.612326979637146, -0.5556743741035461, -0.4990217089653015, -0.4423690736293793, -0.38571643829345703, -0.3290638029575348, -0.27241116762161255, -0.21575850248336792, -0.15910586714744568, -0.10245323181152344, -0.04580056667327881, 0.010852068662643433, 0.06750470399856567, 0.12415734678506851, 0.18080998957157135, 0.23746263980865479, 0.294115275144577, 0.35076791048049927, 0.4074205756187439, 0.46407321095466614, 0.5207258462905884, 0.577378511428833, 0.6340311169624329, 0.6906837821006775, 0.7473363876342773, 0.803989052772522, 0.8606417179107666, 0.9172943830490112, 0.9739469885826111, 1.030599594116211, 1.0872522592544556, 1.1439049243927002, 1.2005575895309448, 1.2572102546691895, 1.3138628005981445, 1.3705154657363892, 1.4271681308746338]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 5.0, 0.0, 3.0, 3.0, 6.0, 6.0, 6.0, 11.0, 12.0, 44.0, 41.0, 73.0, 149.0, 284.0, 931.0, 4438.0, 60018.0, 784877.0, 185853.0, 9443.0, 1491.0, 446.0, 176.0, 96.0, 49.0, 37.0, 21.0, 16.0, 11.0, 8.0, 4.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.681640625, -1.604034423828125, -1.52642822265625, -1.448822021484375, -1.3712158203125, -1.293609619140625, -1.21600341796875, -1.138397216796875, -1.060791015625, -0.983184814453125, -0.90557861328125, -0.827972412109375, -0.7503662109375, -0.672760009765625, -0.59515380859375, -0.517547607421875, -0.43994140625, -0.362335205078125, -0.28472900390625, -0.207122802734375, -0.1295166015625, -0.051910400390625, 0.02569580078125, 0.103302001953125, 0.180908203125, 0.258514404296875, 0.33612060546875, 0.413726806640625, 0.4913330078125, 0.568939208984375, 0.64654541015625, 0.724151611328125, 0.8017578125, 0.879364013671875, 0.95697021484375, 1.034576416015625, 1.1121826171875, 1.189788818359375, 1.26739501953125, 1.345001220703125, 1.422607421875, 1.500213623046875, 1.57781982421875, 1.655426025390625, 1.7330322265625, 1.810638427734375, 1.88824462890625, 1.965850830078125, 2.04345703125, 2.121063232421875, 2.19866943359375, 2.276275634765625, 2.3538818359375, 2.431488037109375, 2.50909423828125, 2.586700439453125, 2.664306640625, 2.741912841796875, 2.81951904296875, 2.897125244140625, 2.9747314453125, 3.052337646484375, 3.12994384765625, 3.207550048828125, 3.28515625]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 4.0, 3.0, 5.0, 6.0, 6.0, 9.0, 19.0, 15.0, 26.0, 24.0, 27.0, 53.0, 49.0, 44.0, 50.0, 57.0, 58.0, 66.0, 55.0, 50.0, 48.0, 40.0, 47.0, 45.0, 41.0, 43.0, 23.0, 20.0, 14.0, 13.0, 11.0, 10.0, 10.0, 8.0, 1.0, 2.0, 3.0, 6.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.324462890625, -0.3163471221923828, -0.3082313537597656, -0.30011558532714844, -0.29199981689453125, -0.28388404846191406, -0.2757682800292969, -0.2676525115966797, -0.2595367431640625, -0.2514209747314453, -0.24330520629882812, -0.23518943786621094, -0.22707366943359375, -0.21895790100097656, -0.21084213256835938, -0.2027263641357422, -0.194610595703125, -0.1864948272705078, -0.17837905883789062, -0.17026329040527344, -0.16214752197265625, -0.15403175354003906, -0.14591598510742188, -0.1378002166748047, -0.1296844482421875, -0.12156867980957031, -0.11345291137695312, -0.10533714294433594, -0.09722137451171875, -0.08910560607910156, -0.08098983764648438, -0.07287406921386719, -0.06475830078125, -0.05664253234863281, -0.048526763916015625, -0.04041099548339844, -0.03229522705078125, -0.024179458618164062, -0.016063690185546875, -0.007947921752929688, 0.0001678466796875, 0.008283615112304688, 0.016399383544921875, 0.024515151977539062, 0.03263092041015625, 0.04074668884277344, 0.048862457275390625, 0.05697822570800781, 0.065093994140625, 0.07320976257324219, 0.08132553100585938, 0.08944129943847656, 0.09755706787109375, 0.10567283630371094, 0.11378860473632812, 0.12190437316894531, 0.1300201416015625, 0.1381359100341797, 0.14625167846679688, 0.15436744689941406, 0.16248321533203125, 0.17059898376464844, 0.17871475219726562, 0.1868305206298828, 0.1949462890625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 12.0, 5.0, 5.0, 13.0, 13.0, 19.0, 28.0, 45.0, 51.0, 93.0, 137.0, 167.0, 279.0, 431.0, 740.0, 1432.0, 3328.0, 8900.0, 31183.0, 127677.0, 440195.0, 323096.0, 78818.0, 20176.0, 6400.0, 2446.0, 1151.0, 587.0, 387.0, 214.0, 137.0, 107.0, 91.0, 55.0, 46.0, 26.0, 13.0, 13.0, 11.0, 6.0, 3.0, 8.0, 3.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.9423828125, -0.9136810302734375, -0.884979248046875, -0.8562774658203125, -0.82757568359375, -0.7988739013671875, -0.770172119140625, -0.7414703369140625, -0.7127685546875, -0.6840667724609375, -0.655364990234375, -0.6266632080078125, -0.59796142578125, -0.5692596435546875, -0.540557861328125, -0.5118560791015625, -0.483154296875, -0.4544525146484375, -0.425750732421875, -0.3970489501953125, -0.36834716796875, -0.3396453857421875, -0.310943603515625, -0.2822418212890625, -0.2535400390625, -0.2248382568359375, -0.196136474609375, -0.1674346923828125, -0.13873291015625, -0.1100311279296875, -0.081329345703125, -0.0526275634765625, -0.02392578125, 0.0047760009765625, 0.033477783203125, 0.0621795654296875, 0.09088134765625, 0.1195831298828125, 0.148284912109375, 0.1769866943359375, 0.2056884765625, 0.2343902587890625, 0.263092041015625, 0.2917938232421875, 0.32049560546875, 0.3491973876953125, 0.377899169921875, 0.4066009521484375, 0.435302734375, 0.4640045166015625, 0.492706298828125, 0.5214080810546875, 0.55010986328125, 0.5788116455078125, 0.607513427734375, 0.6362152099609375, 0.6649169921875, 0.6936187744140625, 0.722320556640625, 0.7510223388671875, 0.77972412109375, 0.8084259033203125, 0.837127685546875, 0.8658294677734375, 0.89453125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 9.0, 9.0, 4.0, 6.0, 13.0, 24.0, 21.0, 22.0, 32.0, 29.0, 43.0, 44.0, 62.0, 56.0, 64.0, 60.0, 51.0, 58.0, 40.0, 64.0, 41.0, 38.0, 25.0, 35.0, 28.0, 28.0, 21.0, 15.0, 11.0, 18.0, 9.0, 5.0, 6.0, 2.0, 4.0, 2.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.056640625, -1.0169525146484375, -0.977264404296875, -0.9375762939453125, -0.89788818359375, -0.8582000732421875, -0.818511962890625, -0.7788238525390625, -0.7391357421875, -0.6994476318359375, -0.659759521484375, -0.6200714111328125, -0.58038330078125, -0.5406951904296875, -0.501007080078125, -0.4613189697265625, -0.421630859375, -0.3819427490234375, -0.342254638671875, -0.3025665283203125, -0.26287841796875, -0.2231903076171875, -0.183502197265625, -0.1438140869140625, -0.1041259765625, -0.0644378662109375, -0.024749755859375, 0.0149383544921875, 0.05462646484375, 0.0943145751953125, 0.134002685546875, 0.1736907958984375, 0.21337890625, 0.2530670166015625, 0.292755126953125, 0.3324432373046875, 0.37213134765625, 0.4118194580078125, 0.451507568359375, 0.4911956787109375, 0.5308837890625, 0.5705718994140625, 0.610260009765625, 0.6499481201171875, 0.68963623046875, 0.7293243408203125, 0.769012451171875, 0.8087005615234375, 0.848388671875, 0.8880767822265625, 0.927764892578125, 0.9674530029296875, 1.00714111328125, 1.0468292236328125, 1.086517333984375, 1.1262054443359375, 1.1658935546875, 1.2055816650390625, 1.245269775390625, 1.2849578857421875, 1.32464599609375, 1.3643341064453125, 1.404022216796875, 1.4437103271484375, 1.4833984375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0, 7.0, 8.0, 16.0, 11.0, 31.0, 26.0, 50.0, 66.0, 105.0, 183.0, 339.0, 707.0, 1606.0, 4135.0, 15294.0, 98310.0, 635416.0, 251044.0, 30160.0, 6753.0, 2284.0, 914.0, 475.0, 238.0, 120.0, 83.0, 46.0, 31.0, 19.0, 14.0, 18.0, 8.0, 8.0, 7.0, 6.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.6474609375, -0.6256637573242188, -0.6038665771484375, -0.5820693969726562, -0.560272216796875, -0.5384750366210938, -0.5166778564453125, -0.49488067626953125, -0.47308349609375, -0.45128631591796875, -0.4294891357421875, -0.40769195556640625, -0.385894775390625, -0.36409759521484375, -0.3423004150390625, -0.32050323486328125, -0.2987060546875, -0.27690887451171875, -0.2551116943359375, -0.23331451416015625, -0.211517333984375, -0.18972015380859375, -0.1679229736328125, -0.14612579345703125, -0.12432861328125, -0.10253143310546875, -0.0807342529296875, -0.05893707275390625, -0.037139892578125, -0.01534271240234375, 0.0064544677734375, 0.02825164794921875, 0.050048828125, 0.07184600830078125, 0.0936431884765625, 0.11544036865234375, 0.137237548828125, 0.15903472900390625, 0.1808319091796875, 0.20262908935546875, 0.22442626953125, 0.24622344970703125, 0.2680206298828125, 0.28981781005859375, 0.311614990234375, 0.33341217041015625, 0.3552093505859375, 0.37700653076171875, 0.3988037109375, 0.42060089111328125, 0.4423980712890625, 0.46419525146484375, 0.485992431640625, 0.5077896118164062, 0.5295867919921875, 0.5513839721679688, 0.57318115234375, 0.5949783325195312, 0.6167755126953125, 0.6385726928710938, 0.660369873046875, 0.6821670532226562, 0.7039642333984375, 0.7257614135742188, 0.74755859375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 7.0, 5.0, 10.0, 7.0, 10.0, 22.0, 29.0, 28.0, 37.0, 35.0, 43.0, 73.0, 62.0, 86.0, 103.0, 74.0, 73.0, 60.0, 55.0, 47.0, 33.0, 26.0, 23.0, 12.0, 5.0, 12.0, 6.0, 4.0, 6.0, 3.0, 3.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.279085159301758e-05, -8.021295070648193e-05, -7.763504981994629e-05, -7.505714893341064e-05, -7.2479248046875e-05, -6.990134716033936e-05, -6.732344627380371e-05, -6.474554538726807e-05, -6.216764450073242e-05, -5.958974361419678e-05, -5.701184272766113e-05, -5.443394184112549e-05, -5.1856040954589844e-05, -4.92781400680542e-05, -4.6700239181518555e-05, -4.412233829498291e-05, -4.1544437408447266e-05, -3.896653652191162e-05, -3.6388635635375977e-05, -3.381073474884033e-05, -3.123283386230469e-05, -2.8654932975769043e-05, -2.60770320892334e-05, -2.3499131202697754e-05, -2.092123031616211e-05, -1.8343329429626465e-05, -1.576542854309082e-05, -1.3187527656555176e-05, -1.0609626770019531e-05, -8.031725883483887e-06, -5.453824996948242e-06, -2.8759241104125977e-06, -2.980232238769531e-07, 2.2798776626586914e-06, 4.857778549194336e-06, 7.4356794357299805e-06, 1.0013580322265625e-05, 1.259148120880127e-05, 1.5169382095336914e-05, 1.774728298187256e-05, 2.0325183868408203e-05, 2.2903084754943848e-05, 2.5480985641479492e-05, 2.8058886528015137e-05, 3.063678741455078e-05, 3.3214688301086426e-05, 3.579258918762207e-05, 3.8370490074157715e-05, 4.094839096069336e-05, 4.3526291847229004e-05, 4.610419273376465e-05, 4.868209362030029e-05, 5.125999450683594e-05, 5.383789539337158e-05, 5.6415796279907227e-05, 5.899369716644287e-05, 6.157159805297852e-05, 6.414949893951416e-05, 6.67273998260498e-05, 6.930530071258545e-05, 7.18832015991211e-05, 7.446110248565674e-05, 7.703900337219238e-05, 7.961690425872803e-05, 8.219480514526367e-05]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 7.0, 14.0, 20.0, 34.0, 44.0, 109.0, 185.0, 461.0, 1311.0, 5401.0, 40717.0, 587404.0, 382345.0, 24939.0, 3840.0, 978.0, 385.0, 163.0, 77.0, 42.0, 33.0, 23.0, 8.0, 12.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.89013671875, -0.8622283935546875, -0.834320068359375, -0.8064117431640625, -0.77850341796875, -0.7505950927734375, -0.722686767578125, -0.6947784423828125, -0.6668701171875, -0.6389617919921875, -0.611053466796875, -0.5831451416015625, -0.55523681640625, -0.5273284912109375, -0.499420166015625, -0.4715118408203125, -0.443603515625, -0.4156951904296875, -0.387786865234375, -0.3598785400390625, -0.33197021484375, -0.3040618896484375, -0.276153564453125, -0.2482452392578125, -0.2203369140625, -0.1924285888671875, -0.164520263671875, -0.1366119384765625, -0.10870361328125, -0.0807952880859375, -0.052886962890625, -0.0249786376953125, 0.0029296875, 0.0308380126953125, 0.058746337890625, 0.0866546630859375, 0.11456298828125, 0.1424713134765625, 0.170379638671875, 0.1982879638671875, 0.2261962890625, 0.2541046142578125, 0.282012939453125, 0.3099212646484375, 0.33782958984375, 0.3657379150390625, 0.393646240234375, 0.4215545654296875, 0.449462890625, 0.4773712158203125, 0.505279541015625, 0.5331878662109375, 0.56109619140625, 0.5890045166015625, 0.616912841796875, 0.6448211669921875, 0.6727294921875, 0.7006378173828125, 0.728546142578125, 0.7564544677734375, 0.78436279296875, 0.8122711181640625, 0.840179443359375, 0.8680877685546875, 0.89599609375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 6.0, 4.0, 5.0, 6.0, 9.0, 19.0, 18.0, 27.0, 27.0, 36.0, 41.0, 52.0, 62.0, 78.0, 96.0, 87.0, 70.0, 67.0, 55.0, 42.0, 43.0, 28.0, 26.0, 12.0, 18.0, 16.0, 11.0, 7.0, 7.0, 4.0, 5.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.51904296875, -0.5043144226074219, -0.48958587646484375, -0.4748573303222656, -0.4601287841796875, -0.4454002380371094, -0.43067169189453125, -0.4159431457519531, -0.401214599609375, -0.3864860534667969, -0.37175750732421875, -0.3570289611816406, -0.3423004150390625, -0.3275718688964844, -0.31284332275390625, -0.2981147766113281, -0.28338623046875, -0.2686576843261719, -0.25392913818359375, -0.23920059204101562, -0.2244720458984375, -0.20974349975585938, -0.19501495361328125, -0.18028640747070312, -0.165557861328125, -0.15082931518554688, -0.13610076904296875, -0.12137222290039062, -0.1066436767578125, -0.09191513061523438, -0.07718658447265625, -0.062458038330078125, -0.0477294921875, -0.033000946044921875, -0.01827239990234375, -0.003543853759765625, 0.0111846923828125, 0.025913238525390625, 0.04064178466796875, 0.055370330810546875, 0.070098876953125, 0.08482742309570312, 0.09955596923828125, 0.11428451538085938, 0.1290130615234375, 0.14374160766601562, 0.15847015380859375, 0.17319869995117188, 0.18792724609375, 0.20265579223632812, 0.21738433837890625, 0.23211288452148438, 0.2468414306640625, 0.2615699768066406, 0.27629852294921875, 0.2910270690917969, 0.305755615234375, 0.3204841613769531, 0.33521270751953125, 0.3499412536621094, 0.3646697998046875, 0.3793983459472656, 0.39412689208984375, 0.4088554382324219, 0.423583984375]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 10.0, 19.0, 58.0, 179.0, 285.0, 288.0, 120.0, 37.0, 15.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.48061752319336, -26.85401725769043, -26.227418899536133, -25.600818634033203, -24.974220275878906, -24.347620010375977, -23.72102165222168, -23.09442138671875, -22.467823028564453, -21.841222763061523, -21.214624404907227, -20.588024139404297, -19.96142578125, -19.33482551574707, -18.708227157592773, -18.081626892089844, -17.455026626586914, -16.828426361083984, -16.201828002929688, -15.575228691101074, -14.948629379272461, -14.322029113769531, -13.695429801940918, -13.068830490112305, -12.442231178283691, -11.815631866455078, -11.189032554626465, -10.562433242797852, -9.935832977294922, -9.309234619140625, -8.682634353637695, -8.056035041809082, -7.429435729980469, -6.8028364181518555, -6.176237106323242, -5.549637317657471, -4.923038005828857, -4.296438694000244, -3.6698391437530518, -3.0432395935058594, -2.416640281677246, -1.7900408506393433, -1.1634414196014404, -0.5368419885635376, 0.08975744247436523, 0.7163567543029785, 1.342956304550171, 1.9695558547973633, 2.5961551666259766, 3.22275447845459, 3.8493540287017822, 4.475953578948975, 5.102552890777588, 5.729152202606201, 6.355751991271973, 6.982351303100586, 7.608950614929199, 8.235549926757812, 8.862149238586426, 9.488748550415039, 10.115348815917969, 10.741947174072266, 11.368547439575195, 11.995146751403809, 12.621746063232422]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 2.0, 5.0, 12.0, 9.0, 14.0, 18.0, 23.0, 13.0, 13.0, 21.0, 34.0, 26.0, 41.0, 36.0, 38.0, 43.0, 48.0, 41.0, 47.0, 55.0, 46.0, 52.0, 43.0, 37.0, 41.0, 31.0, 31.0, 24.0, 18.0, 23.0, 18.0, 22.0, 8.0, 12.0, 12.0, 6.0, 10.0, 6.0, 6.0, 7.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.342444896697998, -5.167723178863525, -4.993001461029053, -4.81827974319458, -4.643557548522949, -4.468835830688477, -4.294114112854004, -4.119392395019531, -3.9446706771850586, -3.769948959350586, -3.5952272415161133, -3.4205052852630615, -3.245783567428589, -3.071061849594116, -2.8963398933410645, -2.721618175506592, -2.546896457672119, -2.3721747398376465, -2.197453022003174, -2.022731065750122, -1.8480093479156494, -1.6732876300811768, -1.4985657930374146, -1.3238439559936523, -1.1491222381591797, -0.9744004607200623, -0.7996786832809448, -0.6249569058418274, -0.45023512840270996, -0.27551335096359253, -0.1007915735244751, 0.07393026351928711, 0.24865198135375977, 0.4233737587928772, 0.5980955362319946, 0.7728173136711121, 0.9475390911102295, 1.1222608089447021, 1.2969826459884644, 1.4717044830322266, 1.6464262008666992, 1.8211479187011719, 1.995869755744934, 2.1705915927886963, 2.345313310623169, 2.5200350284576416, 2.6947569847106934, 2.869478702545166, 3.0442004203796387, 3.2189221382141113, 3.393643856048584, 3.5683658123016357, 3.7430875301361084, 3.917809247970581, 4.092531204223633, 4.2672529220581055, 4.441974639892578, 4.616696357727051, 4.791418075561523, 4.966139793395996, 5.140861511230469, 5.3155837059021, 5.490305423736572, 5.665027141571045, 5.839748859405518]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 1.0, 6.0, 8.0, 2.0, 12.0, 13.0, 15.0, 15.0, 45.0, 78.0, 184.0, 549.0, 2347.0, 23426.0, 4143215.0, 21234.0, 2245.0, 535.0, 165.0, 70.0, 41.0, 27.0, 15.0, 8.0, 9.0, 7.0, 5.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.02734375, -3.916961669921875, -3.80657958984375, -3.696197509765625, -3.5858154296875, -3.475433349609375, -3.36505126953125, -3.254669189453125, -3.144287109375, -3.033905029296875, -2.92352294921875, -2.813140869140625, -2.7027587890625, -2.592376708984375, -2.48199462890625, -2.371612548828125, -2.26123046875, -2.150848388671875, -2.04046630859375, -1.930084228515625, -1.8197021484375, -1.709320068359375, -1.59893798828125, -1.488555908203125, -1.378173828125, -1.267791748046875, -1.15740966796875, -1.047027587890625, -0.9366455078125, -0.826263427734375, -0.71588134765625, -0.605499267578125, -0.4951171875, -0.384735107421875, -0.27435302734375, -0.163970947265625, -0.0535888671875, 0.056793212890625, 0.16717529296875, 0.277557373046875, 0.387939453125, 0.498321533203125, 0.60870361328125, 0.719085693359375, 0.8294677734375, 0.939849853515625, 1.05023193359375, 1.160614013671875, 1.27099609375, 1.381378173828125, 1.49176025390625, 1.602142333984375, 1.7125244140625, 1.822906494140625, 1.93328857421875, 2.043670654296875, 2.154052734375, 2.264434814453125, 2.37481689453125, 2.485198974609375, 2.5955810546875, 2.705963134765625, 2.81634521484375, 2.926727294921875, 3.037109375]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 3.0, 3.0, 12.0, 7.0, 11.0, 17.0, 31.0, 23.0, 43.0, 43.0, 40.0, 44.0, 56.0, 63.0, 78.0, 47.0, 51.0, 56.0, 40.0, 58.0, 45.0, 51.0, 28.0, 29.0, 21.0, 19.0, 20.0, 23.0, 13.0, 4.0, 6.0, 5.0, 1.0, 4.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.324462890625, -0.3150978088378906, -0.30573272705078125, -0.2963676452636719, -0.2870025634765625, -0.2776374816894531, -0.26827239990234375, -0.2589073181152344, -0.249542236328125, -0.24017715454101562, -0.23081207275390625, -0.22144699096679688, -0.2120819091796875, -0.20271682739257812, -0.19335174560546875, -0.18398666381835938, -0.17462158203125, -0.16525650024414062, -0.15589141845703125, -0.14652633666992188, -0.1371612548828125, -0.12779617309570312, -0.11843109130859375, -0.10906600952148438, -0.099700927734375, -0.09033584594726562, -0.08097076416015625, -0.07160568237304688, -0.0622406005859375, -0.052875518798828125, -0.04351043701171875, -0.034145355224609375, -0.0247802734375, -0.015415191650390625, -0.00605010986328125, 0.003314971923828125, 0.0126800537109375, 0.022045135498046875, 0.03141021728515625, 0.040775299072265625, 0.050140380859375, 0.059505462646484375, 0.06887054443359375, 0.07823562622070312, 0.0876007080078125, 0.09696578979492188, 0.10633087158203125, 0.11569595336914062, 0.12506103515625, 0.13442611694335938, 0.14379119873046875, 0.15315628051757812, 0.1625213623046875, 0.17188644409179688, 0.18125152587890625, 0.19061660766601562, 0.199981689453125, 0.20934677124023438, 0.21871185302734375, 0.22807693481445312, 0.2374420166015625, 0.24680709838867188, 0.25617218017578125, 0.2655372619628906, 0.27490234375]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 9.0, 2.0, 8.0, 11.0, 14.0, 18.0, 14.0, 35.0, 38.0, 73.0, 91.0, 145.0, 245.0, 424.0, 737.0, 1322.0, 2755.0, 5640.0, 13345.0, 45302.0, 3716111.0, 355705.0, 31970.0, 10579.0, 4707.0, 2249.0, 1134.0, 634.0, 358.0, 221.0, 122.0, 64.0, 35.0, 46.0, 40.0, 15.0, 19.0, 9.0, 9.0, 7.0, 3.0, 8.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.8994140625, -0.8712005615234375, -0.842987060546875, -0.8147735595703125, -0.78656005859375, -0.7583465576171875, -0.730133056640625, -0.7019195556640625, -0.6737060546875, -0.6454925537109375, -0.617279052734375, -0.5890655517578125, -0.56085205078125, -0.5326385498046875, -0.504425048828125, -0.4762115478515625, -0.447998046875, -0.4197845458984375, -0.391571044921875, -0.3633575439453125, -0.33514404296875, -0.3069305419921875, -0.278717041015625, -0.2505035400390625, -0.2222900390625, -0.1940765380859375, -0.165863037109375, -0.1376495361328125, -0.10943603515625, -0.0812225341796875, -0.053009033203125, -0.0247955322265625, 0.00341796875, 0.0316314697265625, 0.059844970703125, 0.0880584716796875, 0.11627197265625, 0.1444854736328125, 0.172698974609375, 0.2009124755859375, 0.2291259765625, 0.2573394775390625, 0.285552978515625, 0.3137664794921875, 0.34197998046875, 0.3701934814453125, 0.398406982421875, 0.4266204833984375, 0.454833984375, 0.4830474853515625, 0.511260986328125, 0.5394744873046875, 0.56768798828125, 0.5959014892578125, 0.624114990234375, 0.6523284912109375, 0.6805419921875, 0.7087554931640625, 0.736968994140625, 0.7651824951171875, 0.79339599609375, 0.8216094970703125, 0.849822998046875, 0.8780364990234375, 0.90625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 7.0, 3.0, 11.0, 10.0, 16.0, 17.0, 56.0, 227.0, 3362.0, 223.0, 62.0, 28.0, 18.0, 12.0, 6.0, 5.0, 6.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.428955078125, -0.4195270538330078, -0.4100990295410156, -0.40067100524902344, -0.39124298095703125, -0.38181495666503906, -0.3723869323730469, -0.3629589080810547, -0.3535308837890625, -0.3441028594970703, -0.3346748352050781, -0.32524681091308594, -0.31581878662109375, -0.30639076232910156, -0.2969627380371094, -0.2875347137451172, -0.278106689453125, -0.2686786651611328, -0.2592506408691406, -0.24982261657714844, -0.24039459228515625, -0.23096656799316406, -0.22153854370117188, -0.2121105194091797, -0.2026824951171875, -0.1932544708251953, -0.18382644653320312, -0.17439842224121094, -0.16497039794921875, -0.15554237365722656, -0.14611434936523438, -0.1366863250732422, -0.12725830078125, -0.11783027648925781, -0.10840225219726562, -0.09897422790527344, -0.08954620361328125, -0.08011817932128906, -0.07069015502929688, -0.06126213073730469, -0.0518341064453125, -0.04240608215332031, -0.032978057861328125, -0.023550033569335938, -0.01412200927734375, -0.0046939849853515625, 0.004734039306640625, 0.014162063598632812, 0.023590087890625, 0.03301811218261719, 0.042446136474609375, 0.05187416076660156, 0.06130218505859375, 0.07073020935058594, 0.08015823364257812, 0.08958625793457031, 0.0990142822265625, 0.10844230651855469, 0.11787033081054688, 0.12729835510253906, 0.13672637939453125, 0.14615440368652344, 0.15558242797851562, 0.1650104522705078, 0.1744384765625]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 7.0, 17.0, 77.0, 220.0, 305.0, 227.0, 113.0, 26.0, 11.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7380779385566711, -0.6724780201911926, -0.6068781018257141, -0.5412781834602356, -0.4756782650947571, -0.41007834672927856, -0.34447842836380005, -0.27887850999832153, -0.21327859163284302, -0.1476786732673645, -0.08207875490188599, -0.01647883653640747, 0.049121081829071045, 0.11472100019454956, 0.18032091856002808, 0.2459208369255066, 0.3115207552909851, 0.3771206736564636, 0.44272059202194214, 0.5083205103874207, 0.5739204287528992, 0.6395203471183777, 0.7051202654838562, 0.7707201838493347, 0.8363201022148132, 0.9019200205802917, 0.9675199389457703, 1.0331199169158936, 1.098719835281372, 1.1643197536468506, 1.229919672012329, 1.2955195903778076, 1.3611195087432861, 1.4267194271087646, 1.4923193454742432, 1.5579192638397217, 1.6235191822052002, 1.6891191005706787, 1.7547190189361572, 1.8203189373016357, 1.8859188556671143, 1.9515187740325928, 2.0171186923980713, 2.08271861076355, 2.1483185291290283, 2.213918447494507, 2.2795183658599854, 2.345118284225464, 2.4107182025909424, 2.476318120956421, 2.5419180393218994, 2.607517957687378, 2.6731178760528564, 2.738717794418335, 2.8043177127838135, 2.869917631149292, 2.9355175495147705, 3.001117467880249, 3.0667173862457275, 3.132317304611206, 3.1979172229766846, 3.263517141342163, 3.3291170597076416, 3.39471697807312, 3.4603168964385986]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 7.0, 1.0, 5.0, 4.0, 8.0, 8.0, 7.0, 12.0, 10.0, 22.0, 22.0, 25.0, 25.0, 32.0, 33.0, 36.0, 31.0, 39.0, 41.0, 35.0, 52.0, 50.0, 41.0, 39.0, 37.0, 40.0, 31.0, 42.0, 42.0, 29.0, 34.0, 29.0, 19.0, 20.0, 13.0, 15.0, 14.0, 12.0, 9.0, 11.0, 12.0, 4.0, 2.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 1.0], "bins": [-0.5773084759712219, -0.5618777275085449, -0.5464469790458679, -0.5310162305831909, -0.5155854225158691, -0.5001546740531921, -0.48472392559051514, -0.46929317712783813, -0.45386242866516113, -0.43843168020248413, -0.42300093173980713, -0.40757015347480774, -0.39213940501213074, -0.37670865654945374, -0.36127787828445435, -0.34584712982177734, -0.33041638135910034, -0.31498563289642334, -0.29955488443374634, -0.28412410616874695, -0.26869335770606995, -0.25326260924339294, -0.23783184587955475, -0.22240108251571655, -0.20697033405303955, -0.19153958559036255, -0.17610882222652435, -0.16067805886268616, -0.14524731040000916, -0.12981656193733215, -0.11438579857349396, -0.09895504266023636, -0.08352428674697876, -0.06809353083372116, -0.05266277492046356, -0.03723201900720596, -0.021801263093948364, -0.006370507180690765, 0.009060248732566833, 0.024491004645824432, 0.03992176055908203, 0.05535251647233963, 0.07078327238559723, 0.08621402829885483, 0.10164478421211243, 0.11707554012537003, 0.13250629603862762, 0.14793705940246582, 0.16336780786514282, 0.17879855632781982, 0.19422931969165802, 0.20966008305549622, 0.22509083151817322, 0.24052157998085022, 0.2559523582458496, 0.2713831067085266, 0.2868138551712036, 0.3022446036338806, 0.3176753520965576, 0.333106130361557, 0.348536878824234, 0.363967627286911, 0.3793984055519104, 0.3948291540145874, 0.4102599024772644]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 7.0, 5.0, 8.0, 6.0, 10.0, 10.0, 21.0, 31.0, 53.0, 68.0, 104.0, 222.0, 461.0, 1440.0, 5779.0, 37468.0, 322714.0, 567646.0, 96527.0, 12112.0, 2440.0, 731.0, 340.0, 131.0, 84.0, 41.0, 39.0, 24.0, 8.0, 15.0, 7.0, 5.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.84375, -1.788238525390625, -1.73272705078125, -1.677215576171875, -1.6217041015625, -1.566192626953125, -1.51068115234375, -1.455169677734375, -1.399658203125, -1.344146728515625, -1.28863525390625, -1.233123779296875, -1.1776123046875, -1.122100830078125, -1.06658935546875, -1.011077880859375, -0.95556640625, -0.900054931640625, -0.84454345703125, -0.789031982421875, -0.7335205078125, -0.678009033203125, -0.62249755859375, -0.566986083984375, -0.511474609375, -0.455963134765625, -0.40045166015625, -0.344940185546875, -0.2894287109375, -0.233917236328125, -0.17840576171875, -0.122894287109375, -0.0673828125, -0.011871337890625, 0.04364013671875, 0.099151611328125, 0.1546630859375, 0.210174560546875, 0.26568603515625, 0.321197509765625, 0.376708984375, 0.432220458984375, 0.48773193359375, 0.543243408203125, 0.5987548828125, 0.654266357421875, 0.70977783203125, 0.765289306640625, 0.82080078125, 0.876312255859375, 0.93182373046875, 0.987335205078125, 1.0428466796875, 1.098358154296875, 1.15386962890625, 1.209381103515625, 1.264892578125, 1.320404052734375, 1.37591552734375, 1.431427001953125, 1.4869384765625, 1.542449951171875, 1.59796142578125, 1.653472900390625, 1.708984375]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 8.0, 4.0, 6.0, 12.0, 11.0, 16.0, 23.0, 25.0, 39.0, 33.0, 31.0, 33.0, 40.0, 50.0, 49.0, 48.0, 72.0, 40.0, 54.0, 44.0, 49.0, 40.0, 36.0, 38.0, 37.0, 28.0, 25.0, 23.0, 18.0, 19.0, 10.0, 16.0, 6.0, 8.0, 3.0, 0.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.319091796875, -0.3104724884033203, -0.3018531799316406, -0.29323387145996094, -0.28461456298828125, -0.27599525451660156, -0.2673759460449219, -0.2587566375732422, -0.2501373291015625, -0.2415180206298828, -0.23289871215820312, -0.22427940368652344, -0.21566009521484375, -0.20704078674316406, -0.19842147827148438, -0.1898021697998047, -0.181182861328125, -0.1725635528564453, -0.16394424438476562, -0.15532493591308594, -0.14670562744140625, -0.13808631896972656, -0.12946701049804688, -0.12084770202636719, -0.1122283935546875, -0.10360908508300781, -0.09498977661132812, -0.08637046813964844, -0.07775115966796875, -0.06913185119628906, -0.060512542724609375, -0.05189323425292969, -0.04327392578125, -0.03465461730957031, -0.026035308837890625, -0.017416000366210938, -0.00879669189453125, -0.0001773834228515625, 0.008441925048828125, 0.017061233520507812, 0.0256805419921875, 0.03429985046386719, 0.042919158935546875, 0.05153846740722656, 0.06015777587890625, 0.06877708435058594, 0.07739639282226562, 0.08601570129394531, 0.094635009765625, 0.10325431823730469, 0.11187362670898438, 0.12049293518066406, 0.12911224365234375, 0.13773155212402344, 0.14635086059570312, 0.1549701690673828, 0.1635894775390625, 0.1722087860107422, 0.18082809448242188, 0.18944740295410156, 0.19806671142578125, 0.20668601989746094, 0.21530532836914062, 0.2239246368408203, 0.2325439453125]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 5.0, 5.0, 6.0, 10.0, 9.0, 20.0, 32.0, 36.0, 57.0, 73.0, 130.0, 207.0, 320.0, 697.0, 1553.0, 4919.0, 21906.0, 149015.0, 631015.0, 200586.0, 28424.0, 5897.0, 1837.0, 796.0, 378.0, 213.0, 129.0, 101.0, 77.0, 35.0, 23.0, 21.0, 9.0, 5.0, 3.0, 3.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2294921875, -1.18646240234375, -1.1434326171875, -1.10040283203125, -1.057373046875, -1.01434326171875, -0.9713134765625, -0.92828369140625, -0.88525390625, -0.84222412109375, -0.7991943359375, -0.75616455078125, -0.713134765625, -0.67010498046875, -0.6270751953125, -0.58404541015625, -0.541015625, -0.49798583984375, -0.4549560546875, -0.41192626953125, -0.368896484375, -0.32586669921875, -0.2828369140625, -0.23980712890625, -0.19677734375, -0.15374755859375, -0.1107177734375, -0.06768798828125, -0.024658203125, 0.01837158203125, 0.0614013671875, 0.10443115234375, 0.1474609375, 0.19049072265625, 0.2335205078125, 0.27655029296875, 0.319580078125, 0.36260986328125, 0.4056396484375, 0.44866943359375, 0.49169921875, 0.53472900390625, 0.5777587890625, 0.62078857421875, 0.663818359375, 0.70684814453125, 0.7498779296875, 0.79290771484375, 0.8359375, 0.87896728515625, 0.9219970703125, 0.96502685546875, 1.008056640625, 1.05108642578125, 1.0941162109375, 1.13714599609375, 1.18017578125, 1.22320556640625, 1.2662353515625, 1.30926513671875, 1.352294921875, 1.39532470703125, 1.4383544921875, 1.48138427734375, 1.5244140625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 6.0, 8.0, 4.0, 9.0, 6.0, 10.0, 12.0, 14.0, 18.0, 18.0, 31.0, 19.0, 21.0, 27.0, 34.0, 42.0, 36.0, 34.0, 45.0, 56.0, 52.0, 43.0, 47.0, 39.0, 37.0, 29.0, 48.0, 30.0, 31.0, 25.0, 32.0, 29.0, 20.0, 16.0, 20.0, 16.0, 9.0, 6.0, 6.0, 4.0, 7.0, 3.0, 4.0, 2.0, 3.0, 0.0, 2.0, 1.0, 2.0], "bins": [-1.2685546875, -1.2338943481445312, -1.1992340087890625, -1.1645736694335938, -1.129913330078125, -1.0952529907226562, -1.0605926513671875, -1.0259323120117188, -0.99127197265625, -0.9566116333007812, -0.9219512939453125, -0.8872909545898438, -0.852630615234375, -0.8179702758789062, -0.7833099365234375, -0.7486495971679688, -0.7139892578125, -0.6793289184570312, -0.6446685791015625, -0.6100082397460938, -0.575347900390625, -0.5406875610351562, -0.5060272216796875, -0.47136688232421875, -0.43670654296875, -0.40204620361328125, -0.3673858642578125, -0.33272552490234375, -0.298065185546875, -0.26340484619140625, -0.2287445068359375, -0.19408416748046875, -0.159423828125, -0.12476348876953125, -0.0901031494140625, -0.05544281005859375, -0.020782470703125, 0.01387786865234375, 0.0485382080078125, 0.08319854736328125, 0.11785888671875, 0.15251922607421875, 0.1871795654296875, 0.22183990478515625, 0.256500244140625, 0.29116058349609375, 0.3258209228515625, 0.36048126220703125, 0.3951416015625, 0.42980194091796875, 0.4644622802734375, 0.49912261962890625, 0.533782958984375, 0.5684432983398438, 0.6031036376953125, 0.6377639770507812, 0.67242431640625, 0.7070846557617188, 0.7417449951171875, 0.7764053344726562, 0.811065673828125, 0.8457260131835938, 0.8803863525390625, 0.9150466918945312, 0.94970703125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 6.0, 5.0, 8.0, 12.0, 5.0, 12.0, 14.0, 24.0, 27.0, 40.0, 53.0, 59.0, 93.0, 140.0, 221.0, 354.0, 656.0, 1548.0, 5648.0, 61164.0, 922030.0, 48167.0, 5045.0, 1551.0, 638.0, 365.0, 182.0, 128.0, 93.0, 61.0, 56.0, 32.0, 26.0, 25.0, 21.0, 7.0, 8.0, 10.0, 5.0, 4.0, 2.0, 4.0, 3.0, 3.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.6806640625, -1.6304473876953125, -1.580230712890625, -1.5300140380859375, -1.47979736328125, -1.4295806884765625, -1.379364013671875, -1.3291473388671875, -1.2789306640625, -1.2287139892578125, -1.178497314453125, -1.1282806396484375, -1.07806396484375, -1.0278472900390625, -0.977630615234375, -0.9274139404296875, -0.877197265625, -0.8269805908203125, -0.776763916015625, -0.7265472412109375, -0.67633056640625, -0.6261138916015625, -0.575897216796875, -0.5256805419921875, -0.4754638671875, -0.4252471923828125, -0.375030517578125, -0.3248138427734375, -0.27459716796875, -0.2243804931640625, -0.174163818359375, -0.1239471435546875, -0.07373046875, -0.0235137939453125, 0.026702880859375, 0.0769195556640625, 0.12713623046875, 0.1773529052734375, 0.227569580078125, 0.2777862548828125, 0.3280029296875, 0.3782196044921875, 0.428436279296875, 0.4786529541015625, 0.52886962890625, 0.5790863037109375, 0.629302978515625, 0.6795196533203125, 0.729736328125, 0.7799530029296875, 0.830169677734375, 0.8803863525390625, 0.93060302734375, 0.9808197021484375, 1.031036376953125, 1.0812530517578125, 1.1314697265625, 1.1816864013671875, 1.231903076171875, 1.2821197509765625, 1.33233642578125, 1.3825531005859375, 1.432769775390625, 1.4829864501953125, 1.533203125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 6.0, 4.0, 13.0, 10.0, 17.0, 47.0, 84.0, 86.0, 115.0, 171.0, 143.0, 114.0, 70.0, 54.0, 29.0, 24.0, 14.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011461973190307617, -0.0001095188781619072, -0.00010441802442073822, -9.931717067956924e-05, -9.421631693840027e-05, -8.911546319723129e-05, -8.401460945606232e-05, -7.891375571489334e-05, -7.381290197372437e-05, -6.871204823255539e-05, -6.361119449138641e-05, -5.851034075021744e-05, -5.340948700904846e-05, -4.8308633267879486e-05, -4.320777952671051e-05, -3.8106925785541534e-05, -3.300607204437256e-05, -2.7905218303203583e-05, -2.2804364562034607e-05, -1.770351082086563e-05, -1.2602657079696655e-05, -7.5018033385276794e-06, -2.4009495973587036e-06, 2.6999041438102722e-06, 7.800757884979248e-06, 1.2901611626148224e-05, 1.80024653673172e-05, 2.3103319108486176e-05, 2.820417284965515e-05, 3.330502659082413e-05, 3.84058803319931e-05, 4.350673407316208e-05, 4.8607587814331055e-05, 5.370844155550003e-05, 5.8809295296669006e-05, 6.391014903783798e-05, 6.901100277900696e-05, 7.411185652017593e-05, 7.921271026134491e-05, 8.431356400251389e-05, 8.941441774368286e-05, 9.451527148485184e-05, 9.961612522602081e-05, 0.00010471697896718979, 0.00010981783270835876, 0.00011491868644952774, 0.00012001954019069672, 0.0001251203939318657, 0.00013022124767303467, 0.00013532210141420364, 0.00014042295515537262, 0.0001455238088965416, 0.00015062466263771057, 0.00015572551637887955, 0.00016082637012004852, 0.0001659272238612175, 0.00017102807760238647, 0.00017612893134355545, 0.00018122978508472443, 0.0001863306388258934, 0.00019143149256706238, 0.00019653234630823135, 0.00020163320004940033, 0.0002067340537905693, 0.00021183490753173828]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 1.0, 9.0, 9.0, 16.0, 23.0, 28.0, 47.0, 70.0, 108.0, 238.0, 436.0, 1182.0, 3956.0, 42088.0, 948907.0, 45061.0, 4161.0, 1154.0, 460.0, 253.0, 129.0, 75.0, 44.0, 25.0, 22.0, 15.0, 11.0, 9.0, 5.0, 4.0, 3.0, 7.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.73828125, -1.681304931640625, -1.62432861328125, -1.567352294921875, -1.5103759765625, -1.453399658203125, -1.39642333984375, -1.339447021484375, -1.282470703125, -1.225494384765625, -1.16851806640625, -1.111541748046875, -1.0545654296875, -0.997589111328125, -0.94061279296875, -0.883636474609375, -0.82666015625, -0.769683837890625, -0.71270751953125, -0.655731201171875, -0.5987548828125, -0.541778564453125, -0.48480224609375, -0.427825927734375, -0.370849609375, -0.313873291015625, -0.25689697265625, -0.199920654296875, -0.1429443359375, -0.085968017578125, -0.02899169921875, 0.027984619140625, 0.0849609375, 0.141937255859375, 0.19891357421875, 0.255889892578125, 0.3128662109375, 0.369842529296875, 0.42681884765625, 0.483795166015625, 0.540771484375, 0.597747802734375, 0.65472412109375, 0.711700439453125, 0.7686767578125, 0.825653076171875, 0.88262939453125, 0.939605712890625, 0.99658203125, 1.053558349609375, 1.11053466796875, 1.167510986328125, 1.2244873046875, 1.281463623046875, 1.33843994140625, 1.395416259765625, 1.452392578125, 1.509368896484375, 1.56634521484375, 1.623321533203125, 1.6802978515625, 1.737274169921875, 1.79425048828125, 1.851226806640625, 1.908203125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 4.0, 10.0, 22.0, 58.0, 173.0, 296.0, 248.0, 111.0, 37.0, 13.0, 10.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.96875, -2.8922882080078125, -2.815826416015625, -2.7393646240234375, -2.66290283203125, -2.5864410400390625, -2.509979248046875, -2.4335174560546875, -2.3570556640625, -2.2805938720703125, -2.204132080078125, -2.1276702880859375, -2.05120849609375, -1.9747467041015625, -1.898284912109375, -1.8218231201171875, -1.745361328125, -1.6688995361328125, -1.592437744140625, -1.5159759521484375, -1.43951416015625, -1.3630523681640625, -1.286590576171875, -1.2101287841796875, -1.1336669921875, -1.0572052001953125, -0.980743408203125, -0.9042816162109375, -0.82781982421875, -0.7513580322265625, -0.674896240234375, -0.5984344482421875, -0.52197265625, -0.4455108642578125, -0.369049072265625, -0.2925872802734375, -0.21612548828125, -0.1396636962890625, -0.063201904296875, 0.0132598876953125, 0.0897216796875, 0.1661834716796875, 0.242645263671875, 0.3191070556640625, 0.39556884765625, 0.4720306396484375, 0.548492431640625, 0.6249542236328125, 0.701416015625, 0.7778778076171875, 0.854339599609375, 0.9308013916015625, 1.00726318359375, 1.0837249755859375, 1.160186767578125, 1.2366485595703125, 1.3131103515625, 1.3895721435546875, 1.466033935546875, 1.5424957275390625, 1.61895751953125, 1.6954193115234375, 1.771881103515625, 1.8483428955078125, 1.9248046875]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 5.0, 7.0, 17.0, 20.0, 18.0, 34.0, 44.0, 56.0, 78.0, 83.0, 96.0, 89.0, 72.0, 75.0, 79.0, 68.0, 38.0, 27.0, 24.0, 18.0, 13.0, 13.0, 9.0, 2.0, 4.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.188946723937988, -5.946016311645508, -5.703085899353027, -5.460155010223389, -5.217224597930908, -4.974294185638428, -4.731363296508789, -4.488432884216309, -4.245502471923828, -4.002572059631348, -3.759641408920288, -3.5167107582092285, -3.273780345916748, -3.0308499336242676, -2.787919282913208, -2.5449886322021484, -2.302058219909668, -2.0591278076171875, -1.816197156906128, -1.573266625404358, -1.330336093902588, -1.0874055624008179, -0.8444750308990479, -0.6015444993972778, -0.3586139678955078, -0.11568343639373779, 0.12724709510803223, 0.37017762660980225, 0.6131081581115723, 0.8560386896133423, 1.0989692211151123, 1.3418997526168823, 1.5848302841186523, 1.8277608156204224, 2.0706913471221924, 2.313621997833252, 2.5565524101257324, 2.799482822418213, 3.0424134731292725, 3.285344123840332, 3.5282745361328125, 3.771204948425293, 4.014135360717773, 4.257066249847412, 4.499996662139893, 4.742927074432373, 4.985857963562012, 5.228788375854492, 5.471718788146973, 5.714649200439453, 5.957579612731934, 6.200510501861572, 6.443440914154053, 6.686371326446533, 6.929302215576172, 7.172232627868652, 7.415163040161133, 7.658093452453613, 7.901023864746094, 8.143954277038574, 8.386884689331055, 8.629816055297852, 8.872746467590332, 9.115676879882812, 9.358607292175293]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 2.0, 3.0, 4.0, 11.0, 6.0, 7.0, 12.0, 7.0, 14.0, 10.0, 11.0, 15.0, 15.0, 22.0, 19.0, 32.0, 25.0, 34.0, 20.0, 30.0, 34.0, 44.0, 54.0, 49.0, 33.0, 47.0, 34.0, 41.0, 35.0, 40.0, 37.0, 37.0, 22.0, 27.0, 27.0, 25.0, 13.0, 21.0, 19.0, 12.0, 9.0, 8.0, 6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-6.115711212158203, -5.9148054122924805, -5.713899612426758, -5.512993812561035, -5.312088489532471, -5.111182689666748, -4.910276889801025, -4.709371089935303, -4.50846529006958, -4.307559490203857, -4.106653690338135, -3.905748128890991, -3.7048423290252686, -3.503936767578125, -3.3030309677124023, -3.1021251678466797, -2.901219606399536, -2.7003138065338135, -2.49940824508667, -2.2985024452209473, -2.0975966453552246, -1.8966909646987915, -1.6957852840423584, -1.4948794841766357, -1.2939738035202026, -1.0930681228637695, -0.8921623229980469, -0.6912566423416138, -0.4903509020805359, -0.289445161819458, -0.0885394811630249, 0.11236631870269775, 0.31327199935913086, 0.5141777396202087, 0.7150834798812866, 0.9159891605377197, 1.1168949604034424, 1.3178006410598755, 1.5187063217163086, 1.7196121215820312, 1.9205178022384644, 2.1214234828948975, 2.32232928276062, 2.5232348442077637, 2.7241406440734863, 2.925046443939209, 3.1259522438049316, 3.3268580436706543, 3.527763605117798, 3.7286694049835205, 3.929574966430664, 4.130480766296387, 4.331386566162109, 4.532292366027832, 4.733198165893555, 4.934103965759277, 5.135009288787842, 5.3359150886535645, 5.536820888519287, 5.737726211547852, 5.938632011413574, 6.139537811279297, 6.3404436111450195, 6.541349411010742, 6.742255210876465]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 7.0, 2.0, 5.0, 8.0, 18.0, 20.0, 66.0, 120.0, 332.0, 1494.0, 18217.0, 4154248.0, 17811.0, 1415.0, 320.0, 104.0, 40.0, 26.0, 16.0, 8.0, 2.0, 6.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.713623046875, -4.56396484375, -4.414306640625, -4.2646484375, -4.114990234375, -3.96533203125, -3.815673828125, -3.666015625, -3.516357421875, -3.36669921875, -3.217041015625, -3.0673828125, -2.917724609375, -2.76806640625, -2.618408203125, -2.46875, -2.319091796875, -2.16943359375, -2.019775390625, -1.8701171875, -1.720458984375, -1.57080078125, -1.421142578125, -1.271484375, -1.121826171875, -0.97216796875, -0.822509765625, -0.6728515625, -0.523193359375, -0.37353515625, -0.223876953125, -0.07421875, 0.075439453125, 0.22509765625, 0.374755859375, 0.5244140625, 0.674072265625, 0.82373046875, 0.973388671875, 1.123046875, 1.272705078125, 1.42236328125, 1.572021484375, 1.7216796875, 1.871337890625, 2.02099609375, 2.170654296875, 2.3203125, 2.469970703125, 2.61962890625, 2.769287109375, 2.9189453125, 3.068603515625, 3.21826171875, 3.367919921875, 3.517578125, 3.667236328125, 3.81689453125, 3.966552734375, 4.1162109375, 4.265869140625, 4.41552734375, 4.565185546875, 4.71484375]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 5.0, 3.0, 12.0, 8.0, 10.0, 15.0, 22.0, 20.0, 22.0, 27.0, 39.0, 42.0, 46.0, 57.0, 53.0, 54.0, 57.0, 68.0, 57.0, 54.0, 44.0, 40.0, 60.0, 28.0, 30.0, 31.0, 21.0, 22.0, 6.0, 10.0, 9.0, 15.0, 1.0, 6.0, 5.0, 0.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32373046875, -0.3129768371582031, -0.30222320556640625, -0.2914695739746094, -0.2807159423828125, -0.2699623107910156, -0.25920867919921875, -0.24845504760742188, -0.237701416015625, -0.22694778442382812, -0.21619415283203125, -0.20544052124023438, -0.1946868896484375, -0.18393325805664062, -0.17317962646484375, -0.16242599487304688, -0.15167236328125, -0.14091873168945312, -0.13016510009765625, -0.11941146850585938, -0.1086578369140625, -0.09790420532226562, -0.08715057373046875, -0.07639694213867188, -0.065643310546875, -0.054889678955078125, -0.04413604736328125, -0.033382415771484375, -0.0226287841796875, -0.011875152587890625, -0.00112152099609375, 0.009632110595703125, 0.0203857421875, 0.031139373779296875, 0.04189300537109375, 0.052646636962890625, 0.0634002685546875, 0.07415390014648438, 0.08490753173828125, 0.09566116333007812, 0.106414794921875, 0.11716842651367188, 0.12792205810546875, 0.13867568969726562, 0.1494293212890625, 0.16018295288085938, 0.17093658447265625, 0.18169021606445312, 0.19244384765625, 0.20319747924804688, 0.21395111083984375, 0.22470474243164062, 0.2354583740234375, 0.24621200561523438, 0.25696563720703125, 0.2677192687988281, 0.278472900390625, 0.2892265319824219, 0.29998016357421875, 0.3107337951660156, 0.3214874267578125, 0.3322410583496094, 0.34299468994140625, 0.3537483215332031, 0.364501953125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 6.0, 5.0, 6.0, 9.0, 11.0, 24.0, 34.0, 46.0, 62.0, 80.0, 93.0, 185.0, 264.0, 412.0, 541.0, 834.0, 1339.0, 2209.0, 3597.0, 7278.0, 16084.0, 47382.0, 324438.0, 3642325.0, 99036.0, 25084.0, 10224.0, 4990.0, 2809.0, 1690.0, 1075.0, 661.0, 460.0, 310.0, 211.0, 153.0, 90.0, 58.0, 51.0, 36.0, 24.0, 21.0, 18.0, 6.0, 11.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.8447265625, -0.8167877197265625, -0.788848876953125, -0.7609100341796875, -0.73297119140625, -0.7050323486328125, -0.677093505859375, -0.6491546630859375, -0.6212158203125, -0.5932769775390625, -0.565338134765625, -0.5373992919921875, -0.50946044921875, -0.4815216064453125, -0.453582763671875, -0.4256439208984375, -0.397705078125, -0.3697662353515625, -0.341827392578125, -0.3138885498046875, -0.28594970703125, -0.2580108642578125, -0.230072021484375, -0.2021331787109375, -0.1741943359375, -0.1462554931640625, -0.118316650390625, -0.0903778076171875, -0.06243896484375, -0.0345001220703125, -0.006561279296875, 0.0213775634765625, 0.04931640625, 0.0772552490234375, 0.105194091796875, 0.1331329345703125, 0.16107177734375, 0.1890106201171875, 0.216949462890625, 0.2448883056640625, 0.2728271484375, 0.3007659912109375, 0.328704833984375, 0.3566436767578125, 0.38458251953125, 0.4125213623046875, 0.440460205078125, 0.4683990478515625, 0.496337890625, 0.5242767333984375, 0.552215576171875, 0.5801544189453125, 0.60809326171875, 0.6360321044921875, 0.663970947265625, 0.6919097900390625, 0.7198486328125, 0.7477874755859375, 0.775726318359375, 0.8036651611328125, 0.83160400390625, 0.8595428466796875, 0.887481689453125, 0.9154205322265625, 0.943359375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 4.0, 7.0, 3.0, 4.0, 5.0, 13.0, 13.0, 30.0, 52.0, 116.0, 319.0, 3034.0, 250.0, 94.0, 46.0, 25.0, 14.0, 9.0, 18.0, 3.0, 3.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.375732421875, -0.364166259765625, -0.35260009765625, -0.341033935546875, -0.3294677734375, -0.317901611328125, -0.30633544921875, -0.294769287109375, -0.283203125, -0.271636962890625, -0.26007080078125, -0.248504638671875, -0.2369384765625, -0.225372314453125, -0.21380615234375, -0.202239990234375, -0.190673828125, -0.179107666015625, -0.16754150390625, -0.155975341796875, -0.1444091796875, -0.132843017578125, -0.12127685546875, -0.109710693359375, -0.09814453125, -0.086578369140625, -0.07501220703125, -0.063446044921875, -0.0518798828125, -0.040313720703125, -0.02874755859375, -0.017181396484375, -0.005615234375, 0.005950927734375, 0.01751708984375, 0.029083251953125, 0.0406494140625, 0.052215576171875, 0.06378173828125, 0.075347900390625, 0.0869140625, 0.098480224609375, 0.11004638671875, 0.121612548828125, 0.1331787109375, 0.144744873046875, 0.15631103515625, 0.167877197265625, 0.179443359375, 0.191009521484375, 0.20257568359375, 0.214141845703125, 0.2257080078125, 0.237274169921875, 0.24884033203125, 0.260406494140625, 0.27197265625, 0.283538818359375, 0.29510498046875, 0.306671142578125, 0.3182373046875, 0.329803466796875, 0.34136962890625, 0.352935791015625, 0.364501953125]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 15.0, 74.0, 280.0, 433.0, 172.0, 27.0, 9.0, 4.0], "bins": [-9.228479385375977, -9.073238372802734, -8.917998313903809, -8.762757301330566, -8.607516288757324, -8.452276229858398, -8.297035217285156, -8.141794204711914, -7.98655366897583, -7.831313133239746, -7.676072120666504, -7.52083158493042, -7.365591049194336, -7.210350036621094, -7.05510950088501, -6.899868965148926, -6.744627952575684, -6.5893874168396, -6.434146404266357, -6.278905868530273, -6.1236653327941895, -5.968424320220947, -5.813183784484863, -5.657942771911621, -5.502702713012695, -5.347462177276611, -5.192221164703369, -5.036980628967285, -4.881740093231201, -4.726499080657959, -4.571258544921875, -4.416017532348633, -4.260776519775391, -4.105535984039307, -3.9502952098846436, -3.7950544357299805, -3.6398136615753174, -3.4845728874206543, -3.3293323516845703, -3.1740915775299072, -3.0188510417938232, -2.86361026763916, -2.708369731903076, -2.553128957748413, -2.39788818359375, -2.242647647857666, -2.087406873703003, -1.9321660995483398, -1.7769255638122559, -1.6216849088668823, -1.4664441347122192, -1.3112034797668457, -1.1559627056121826, -1.000722050666809, -0.8454813957214355, -0.6902406215667725, -0.5349999070167542, -0.37975919246673584, -0.22451850771903992, -0.069277822971344, 0.08596289157867432, 0.24120360612869263, 0.39644426107406616, 0.5516850352287292, 0.7069256901741028]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 2.0, 8.0, 6.0, 9.0, 9.0, 13.0, 16.0, 15.0, 11.0, 14.0, 33.0, 24.0, 29.0, 22.0, 36.0, 35.0, 29.0, 38.0, 40.0, 37.0, 43.0, 36.0, 36.0, 39.0, 48.0, 42.0, 23.0, 33.0, 36.0, 27.0, 34.0, 22.0, 19.0, 19.0, 22.0, 15.0, 12.0, 19.0, 12.0, 8.0, 8.0, 8.0, 3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.6587540507316589, -0.6377313137054443, -0.6167086362838745, -0.5956858992576599, -0.5746631622314453, -0.5536404848098755, -0.5326177477836609, -0.5115950107574463, -0.4905723035335541, -0.46954959630966187, -0.44852685928344727, -0.42750415205955505, -0.40648144483566284, -0.38545870780944824, -0.36443600058555603, -0.3434132933616638, -0.3223905563354492, -0.301367849111557, -0.2803451120853424, -0.2593224048614502, -0.2382996827363968, -0.21727696061134338, -0.19625425338745117, -0.17523153126239777, -0.15420880913734436, -0.13318608701229095, -0.11216337233781815, -0.09114065766334534, -0.07011793553829193, -0.049095213413238525, -0.028072506189346313, -0.007049784064292908, 0.013972878456115723, 0.03499559685587883, 0.05601831525564194, 0.07704102993011475, 0.09806375205516815, 0.11908647418022156, 0.14010918140411377, 0.16113190352916718, 0.18215462565422058, 0.203177347779274, 0.2242000699043274, 0.2452227771282196, 0.2662454843521118, 0.2872682213783264, 0.30829092860221863, 0.32931363582611084, 0.35033637285232544, 0.37135908007621765, 0.39238181710243225, 0.41340452432632446, 0.43442726135253906, 0.4554499685764313, 0.4764726758003235, 0.4974954128265381, 0.5185180902481079, 0.5395408272743225, 0.5605635046958923, 0.5815862417221069, 0.6026089787483215, 0.6236317157745361, 0.644654393196106, 0.6656771302223206, 0.6866998672485352]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 4.0, 2.0, 6.0, 4.0, 9.0, 10.0, 6.0, 18.0, 20.0, 17.0, 29.0, 41.0, 61.0, 84.0, 156.0, 220.0, 402.0, 733.0, 1519.0, 3526.0, 9054.0, 25921.0, 81428.0, 239638.0, 376133.0, 206335.0, 67336.0, 21712.0, 7838.0, 3155.0, 1415.0, 681.0, 398.0, 217.0, 125.0, 77.0, 64.0, 52.0, 31.0, 23.0, 10.0, 9.0, 7.0, 7.0, 6.0, 6.0, 5.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0], "bins": [-0.99609375, -0.9671554565429688, -0.9382171630859375, -0.9092788696289062, -0.880340576171875, -0.8514022827148438, -0.8224639892578125, -0.7935256958007812, -0.76458740234375, -0.7356491088867188, -0.7067108154296875, -0.6777725219726562, -0.648834228515625, -0.6198959350585938, -0.5909576416015625, -0.5620193481445312, -0.5330810546875, -0.5041427612304688, -0.4752044677734375, -0.44626617431640625, -0.417327880859375, -0.38838958740234375, -0.3594512939453125, -0.33051300048828125, -0.30157470703125, -0.27263641357421875, -0.2436981201171875, -0.21475982666015625, -0.185821533203125, -0.15688323974609375, -0.1279449462890625, -0.09900665283203125, -0.070068359375, -0.04113006591796875, -0.0121917724609375, 0.01674652099609375, 0.045684814453125, 0.07462310791015625, 0.1035614013671875, 0.13249969482421875, 0.16143798828125, 0.19037628173828125, 0.2193145751953125, 0.24825286865234375, 0.277191162109375, 0.30612945556640625, 0.3350677490234375, 0.36400604248046875, 0.3929443359375, 0.42188262939453125, 0.4508209228515625, 0.47975921630859375, 0.508697509765625, 0.5376358032226562, 0.5665740966796875, 0.5955123901367188, 0.62445068359375, 0.6533889770507812, 0.6823272705078125, 0.7112655639648438, 0.740203857421875, 0.7691421508789062, 0.7980804443359375, 0.8270187377929688, 0.85595703125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 10.0, 8.0, 14.0, 19.0, 22.0, 24.0, 22.0, 26.0, 45.0, 39.0, 43.0, 57.0, 51.0, 52.0, 51.0, 54.0, 54.0, 43.0, 37.0, 44.0, 35.0, 44.0, 32.0, 33.0, 32.0, 13.0, 10.0, 10.0, 11.0, 9.0, 9.0, 5.0, 4.0, 4.0, 5.0, 2.0, 7.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.3359375, -0.32544708251953125, -0.3149566650390625, -0.30446624755859375, -0.293975830078125, -0.28348541259765625, -0.2729949951171875, -0.26250457763671875, -0.25201416015625, -0.24152374267578125, -0.2310333251953125, -0.22054290771484375, -0.210052490234375, -0.19956207275390625, -0.1890716552734375, -0.17858123779296875, -0.1680908203125, -0.15760040283203125, -0.1471099853515625, -0.13661956787109375, -0.126129150390625, -0.11563873291015625, -0.1051483154296875, -0.09465789794921875, -0.08416748046875, -0.07367706298828125, -0.0631866455078125, -0.05269622802734375, -0.042205810546875, -0.03171539306640625, -0.0212249755859375, -0.01073455810546875, -0.000244140625, 0.01024627685546875, 0.0207366943359375, 0.03122711181640625, 0.041717529296875, 0.05220794677734375, 0.0626983642578125, 0.07318878173828125, 0.08367919921875, 0.09416961669921875, 0.1046600341796875, 0.11515045166015625, 0.125640869140625, 0.13613128662109375, 0.1466217041015625, 0.15711212158203125, 0.1676025390625, 0.17809295654296875, 0.1885833740234375, 0.19907379150390625, 0.209564208984375, 0.22005462646484375, 0.2305450439453125, 0.24103546142578125, 0.25152587890625, 0.26201629638671875, 0.2725067138671875, 0.28299713134765625, 0.293487548828125, 0.30397796630859375, 0.3144683837890625, 0.32495880126953125, 0.33544921875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 6.0, 3.0, 9.0, 18.0, 15.0, 31.0, 26.0, 45.0, 76.0, 91.0, 184.0, 257.0, 459.0, 808.0, 1749.0, 3962.0, 11173.0, 44029.0, 266752.0, 562989.0, 120254.0, 23109.0, 6898.0, 2714.0, 1290.0, 620.0, 376.0, 222.0, 136.0, 83.0, 51.0, 43.0, 22.0, 24.0, 9.0, 10.0, 4.0, 2.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.306640625, -1.2648773193359375, -1.223114013671875, -1.1813507080078125, -1.13958740234375, -1.0978240966796875, -1.056060791015625, -1.0142974853515625, -0.9725341796875, -0.9307708740234375, -0.889007568359375, -0.8472442626953125, -0.80548095703125, -0.7637176513671875, -0.721954345703125, -0.6801910400390625, -0.638427734375, -0.5966644287109375, -0.554901123046875, -0.5131378173828125, -0.47137451171875, -0.4296112060546875, -0.387847900390625, -0.3460845947265625, -0.3043212890625, -0.2625579833984375, -0.220794677734375, -0.1790313720703125, -0.13726806640625, -0.0955047607421875, -0.053741455078125, -0.0119781494140625, 0.02978515625, 0.0715484619140625, 0.113311767578125, 0.1550750732421875, 0.19683837890625, 0.2386016845703125, 0.280364990234375, 0.3221282958984375, 0.3638916015625, 0.4056549072265625, 0.447418212890625, 0.4891815185546875, 0.53094482421875, 0.5727081298828125, 0.614471435546875, 0.6562347412109375, 0.697998046875, 0.7397613525390625, 0.781524658203125, 0.8232879638671875, 0.86505126953125, 0.9068145751953125, 0.948577880859375, 0.9903411865234375, 1.0321044921875, 1.0738677978515625, 1.115631103515625, 1.1573944091796875, 1.19915771484375, 1.2409210205078125, 1.282684326171875, 1.3244476318359375, 1.3662109375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 5.0, 8.0, 5.0, 14.0, 19.0, 20.0, 30.0, 28.0, 34.0, 40.0, 44.0, 40.0, 59.0, 45.0, 63.0, 54.0, 61.0, 65.0, 43.0, 49.0, 49.0, 30.0, 39.0, 32.0, 27.0, 19.0, 15.0, 16.0, 6.0, 10.0, 11.0, 2.0, 4.0, 5.0, 2.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.4951171875, -1.441497802734375, -1.38787841796875, -1.334259033203125, -1.2806396484375, -1.227020263671875, -1.17340087890625, -1.119781494140625, -1.066162109375, -1.012542724609375, -0.95892333984375, -0.905303955078125, -0.8516845703125, -0.798065185546875, -0.74444580078125, -0.690826416015625, -0.63720703125, -0.583587646484375, -0.52996826171875, -0.476348876953125, -0.4227294921875, -0.369110107421875, -0.31549072265625, -0.261871337890625, -0.208251953125, -0.154632568359375, -0.10101318359375, -0.047393798828125, 0.0062255859375, 0.059844970703125, 0.11346435546875, 0.167083740234375, 0.220703125, 0.274322509765625, 0.32794189453125, 0.381561279296875, 0.4351806640625, 0.488800048828125, 0.54241943359375, 0.596038818359375, 0.649658203125, 0.703277587890625, 0.75689697265625, 0.810516357421875, 0.8641357421875, 0.917755126953125, 0.97137451171875, 1.024993896484375, 1.07861328125, 1.132232666015625, 1.18585205078125, 1.239471435546875, 1.2930908203125, 1.346710205078125, 1.40032958984375, 1.453948974609375, 1.507568359375, 1.561187744140625, 1.61480712890625, 1.668426513671875, 1.7220458984375, 1.775665283203125, 1.82928466796875, 1.882904052734375, 1.9365234375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 6.0, 10.0, 6.0, 39.0, 71.0, 146.0, 342.0, 944.0, 3860.0, 72160.0, 948272.0, 19481.0, 2055.0, 649.0, 264.0, 134.0, 52.0, 34.0, 11.0, 8.0, 9.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.373046875, -2.29730224609375, -2.2215576171875, -2.14581298828125, -2.070068359375, -1.99432373046875, -1.9185791015625, -1.84283447265625, -1.76708984375, -1.69134521484375, -1.6156005859375, -1.53985595703125, -1.464111328125, -1.38836669921875, -1.3126220703125, -1.23687744140625, -1.1611328125, -1.08538818359375, -1.0096435546875, -0.93389892578125, -0.858154296875, -0.78240966796875, -0.7066650390625, -0.63092041015625, -0.55517578125, -0.47943115234375, -0.4036865234375, -0.32794189453125, -0.252197265625, -0.17645263671875, -0.1007080078125, -0.02496337890625, 0.05078125, 0.12652587890625, 0.2022705078125, 0.27801513671875, 0.353759765625, 0.42950439453125, 0.5052490234375, 0.58099365234375, 0.65673828125, 0.73248291015625, 0.8082275390625, 0.88397216796875, 0.959716796875, 1.03546142578125, 1.1112060546875, 1.18695068359375, 1.2626953125, 1.33843994140625, 1.4141845703125, 1.48992919921875, 1.565673828125, 1.64141845703125, 1.7171630859375, 1.79290771484375, 1.86865234375, 1.94439697265625, 2.0201416015625, 2.09588623046875, 2.171630859375, 2.24737548828125, 2.3231201171875, 2.39886474609375, 2.474609375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 13.0, 18.0, 38.0, 76.0, 160.0, 193.0, 211.0, 149.0, 78.0, 42.0, 18.0, 7.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003209114074707031, -0.0003134477883577347, -0.00030598416924476624, -0.0002985205501317978, -0.00029105693101882935, -0.0002835933119058609, -0.00027612969279289246, -0.000268666073679924, -0.00026120245456695557, -0.0002537388354539871, -0.0002462752163410187, -0.00023881159722805023, -0.0002313479781150818, -0.00022388435900211334, -0.0002164207398891449, -0.00020895712077617645, -0.000201493501663208, -0.00019402988255023956, -0.00018656626343727112, -0.00017910264432430267, -0.00017163902521133423, -0.00016417540609836578, -0.00015671178698539734, -0.0001492481678724289, -0.00014178454875946045, -0.000134320929646492, -0.00012685731053352356, -0.00011939369142055511, -0.00011193007230758667, -0.00010446645319461823, -9.700283408164978e-05, -8.953921496868134e-05, -8.207559585571289e-05, -7.461197674274445e-05, -6.7148357629776e-05, -5.9684738516807556e-05, -5.222111940383911e-05, -4.4757500290870667e-05, -3.729388117790222e-05, -2.9830262064933777e-05, -2.2366642951965332e-05, -1.4903023838996887e-05, -7.439404726028442e-06, 2.421438694000244e-08, 7.487833499908447e-06, 1.4951452612876892e-05, 2.2415071725845337e-05, 2.9878690838813782e-05, 3.7342309951782227e-05, 4.480592906475067e-05, 5.2269548177719116e-05, 5.973316729068756e-05, 6.7196786403656e-05, 7.466040551662445e-05, 8.21240246295929e-05, 8.958764374256134e-05, 9.705126285552979e-05, 0.00010451488196849823, 0.00011197850108146667, 0.00011944212019443512, 0.00012690573930740356, 0.000134369358420372, 0.00014183297753334045, 0.0001492965966463089, 0.00015676021575927734]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 17.0, 27.0, 40.0, 73.0, 159.0, 341.0, 925.0, 4243.0, 55987.0, 947725.0, 34236.0, 3315.0, 806.0, 319.0, 135.0, 88.0, 43.0, 14.0, 21.0, 12.0, 6.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.125, -2.065032958984375, -2.00506591796875, -1.945098876953125, -1.8851318359375, -1.825164794921875, -1.76519775390625, -1.705230712890625, -1.645263671875, -1.585296630859375, -1.52532958984375, -1.465362548828125, -1.4053955078125, -1.345428466796875, -1.28546142578125, -1.225494384765625, -1.16552734375, -1.105560302734375, -1.04559326171875, -0.985626220703125, -0.9256591796875, -0.865692138671875, -0.80572509765625, -0.745758056640625, -0.685791015625, -0.625823974609375, -0.56585693359375, -0.505889892578125, -0.4459228515625, -0.385955810546875, -0.32598876953125, -0.266021728515625, -0.2060546875, -0.146087646484375, -0.08612060546875, -0.026153564453125, 0.0338134765625, 0.093780517578125, 0.15374755859375, 0.213714599609375, 0.273681640625, 0.333648681640625, 0.39361572265625, 0.453582763671875, 0.5135498046875, 0.573516845703125, 0.63348388671875, 0.693450927734375, 0.75341796875, 0.813385009765625, 0.87335205078125, 0.933319091796875, 0.9932861328125, 1.053253173828125, 1.11322021484375, 1.173187255859375, 1.233154296875, 1.293121337890625, 1.35308837890625, 1.413055419921875, 1.4730224609375, 1.532989501953125, 1.59295654296875, 1.652923583984375, 1.712890625]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 5.0, 2.0, 2.0, 5.0, 11.0, 12.0, 16.0, 24.0, 42.0, 74.0, 109.0, 124.0, 163.0, 159.0, 108.0, 57.0, 37.0, 21.0, 7.0, 7.0, 11.0, 3.0, 1.0, 3.0, 3.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.86328125, -1.8203811645507812, -1.7774810791015625, -1.7345809936523438, -1.691680908203125, -1.6487808227539062, -1.6058807373046875, -1.5629806518554688, -1.52008056640625, -1.4771804809570312, -1.4342803955078125, -1.3913803100585938, -1.348480224609375, -1.3055801391601562, -1.2626800537109375, -1.2197799682617188, -1.1768798828125, -1.1339797973632812, -1.0910797119140625, -1.0481796264648438, -1.005279541015625, -0.9623794555664062, -0.9194793701171875, -0.8765792846679688, -0.83367919921875, -0.7907791137695312, -0.7478790283203125, -0.7049789428710938, -0.662078857421875, -0.6191787719726562, -0.5762786865234375, -0.5333786010742188, -0.490478515625, -0.44757843017578125, -0.4046783447265625, -0.36177825927734375, -0.318878173828125, -0.27597808837890625, -0.2330780029296875, -0.19017791748046875, -0.14727783203125, -0.10437774658203125, -0.0614776611328125, -0.01857757568359375, 0.024322509765625, 0.06722259521484375, 0.1101226806640625, 0.15302276611328125, 0.1959228515625, 0.23882293701171875, 0.2817230224609375, 0.32462310791015625, 0.367523193359375, 0.41042327880859375, 0.4533233642578125, 0.49622344970703125, 0.53912353515625, 0.5820236206054688, 0.6249237060546875, 0.6678237915039062, 0.710723876953125, 0.7536239624023438, 0.7965240478515625, 0.8394241333007812, 0.88232421875]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 8.0, 4.0, 6.0, 9.0, 9.0, 18.0, 21.0, 37.0, 46.0, 66.0, 89.0, 105.0, 116.0, 130.0, 88.0, 87.0, 53.0, 41.0, 25.0, 18.0, 14.0, 7.0, 3.0, 6.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.19670581817627, -12.809350967407227, -12.421996116638184, -12.03464126586914, -11.647287368774414, -11.259932518005371, -10.872577667236328, -10.485222816467285, -10.097867965698242, -9.7105131149292, -9.323158264160156, -8.935803413391113, -8.54844856262207, -8.161094665527344, -7.773739814758301, -7.386384963989258, -6.999030113220215, -6.611675262451172, -6.224320411682129, -5.836966037750244, -5.449611186981201, -5.062256336212158, -4.674901962280273, -4.2875471115112305, -3.9001922607421875, -3.5128374099731445, -3.1254827976226807, -2.738128185272217, -2.350773334503174, -1.9634186029434204, -1.576063871383667, -1.1887092590332031, -0.8013553619384766, -0.41400063037872314, -0.026645898818969727, 0.3607088327407837, 0.7480635643005371, 1.1354182958602905, 1.522773027420044, 1.9101276397705078, 2.297482490539551, 2.6848373413085938, 3.0721919536590576, 3.4595465660095215, 3.8469014167785645, 4.234256267547607, 4.621610641479492, 5.008965492248535, 5.396320343017578, 5.783675193786621, 6.171030044555664, 6.558384418487549, 6.945739269256592, 7.333094120025635, 7.7204484939575195, 8.107803344726562, 8.495158195495605, 8.882513046264648, 9.269867897033691, 9.657222747802734, 10.044576644897461, 10.431931495666504, 10.819286346435547, 11.20664119720459, 11.593996047973633]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 5.0, 1.0, 0.0, 3.0, 2.0, 5.0, 2.0, 7.0, 6.0, 6.0, 10.0, 6.0, 15.0, 12.0, 19.0, 16.0, 24.0, 35.0, 18.0, 34.0, 27.0, 42.0, 29.0, 49.0, 47.0, 51.0, 42.0, 43.0, 47.0, 43.0, 43.0, 41.0, 39.0, 29.0, 28.0, 33.0, 27.0, 18.0, 22.0, 24.0, 14.0, 7.0, 8.0, 5.0, 7.0, 3.0, 2.0, 3.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-7.183813095092773, -6.948230743408203, -6.712648391723633, -6.4770660400390625, -6.241483688354492, -6.005901336669922, -5.770318984985352, -5.534736633300781, -5.299154281616211, -5.063571929931641, -4.82798957824707, -4.5924072265625, -4.35682487487793, -4.121242523193359, -3.88565993309021, -3.6500775814056396, -3.4144949913024902, -3.17891263961792, -2.9433302879333496, -2.7077479362487793, -2.472165584564209, -2.2365832328796387, -2.0010006427764893, -1.765418291091919, -1.5298359394073486, -1.2942535877227783, -1.058671236038208, -0.8230887651443481, -0.5875064134597778, -0.3519240617752075, -0.11634159088134766, 0.11924076080322266, 0.35482358932495117, 0.5904059410095215, 0.8259883522987366, 1.0615707635879517, 1.297153115272522, 1.5327354669570923, 1.7683179378509521, 2.0039002895355225, 2.2394826412200928, 2.475064992904663, 2.7106473445892334, 2.946229934692383, 3.181812286376953, 3.4173946380615234, 3.6529769897460938, 3.888559341430664, 4.124141693115234, 4.359724044799805, 4.595306396484375, 4.830888748168945, 5.066471099853516, 5.302053451538086, 5.537635803222656, 5.773218154907227, 6.008800506591797, 6.244382858276367, 6.4799652099609375, 6.715547561645508, 6.951129913330078, 7.186712265014648, 7.422294616699219, 7.657876968383789, 7.893459796905518]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 9.0, 7.0, 10.0, 10.0, 20.0, 41.0, 85.0, 141.0, 249.0, 628.0, 2472.0, 25277.0, 4145818.0, 16380.0, 2017.0, 570.0, 225.0, 128.0, 79.0, 47.0, 20.0, 11.0, 18.0, 6.0, 5.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.24609375, -4.1065673828125, -3.967041015625, -3.8275146484375, -3.68798828125, -3.5484619140625, -3.408935546875, -3.2694091796875, -3.1298828125, -2.9903564453125, -2.850830078125, -2.7113037109375, -2.57177734375, -2.4322509765625, -2.292724609375, -2.1531982421875, -2.013671875, -1.8741455078125, -1.734619140625, -1.5950927734375, -1.45556640625, -1.3160400390625, -1.176513671875, -1.0369873046875, -0.8974609375, -0.7579345703125, -0.618408203125, -0.4788818359375, -0.33935546875, -0.1998291015625, -0.060302734375, 0.0792236328125, 0.21875, 0.3582763671875, 0.497802734375, 0.6373291015625, 0.77685546875, 0.9163818359375, 1.055908203125, 1.1954345703125, 1.3349609375, 1.4744873046875, 1.614013671875, 1.7535400390625, 1.89306640625, 2.0325927734375, 2.172119140625, 2.3116455078125, 2.451171875, 2.5906982421875, 2.730224609375, 2.8697509765625, 3.00927734375, 3.1488037109375, 3.288330078125, 3.4278564453125, 3.5673828125, 3.7069091796875, 3.846435546875, 3.9859619140625, 4.12548828125, 4.2650146484375, 4.404541015625, 4.5440673828125, 4.68359375]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 2.0, 5.0, 8.0, 7.0, 14.0, 18.0, 17.0, 14.0, 24.0, 35.0, 38.0, 39.0, 35.0, 40.0, 40.0, 44.0, 55.0, 51.0, 49.0, 57.0, 57.0, 42.0, 45.0, 33.0, 32.0, 31.0, 29.0, 25.0, 18.0, 22.0, 15.0, 12.0, 5.0, 10.0, 7.0, 7.0, 5.0, 2.0, 5.0, 4.0, 3.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.343994140625, -0.3327140808105469, -0.32143402099609375, -0.3101539611816406, -0.2988739013671875, -0.2875938415527344, -0.27631378173828125, -0.2650337219238281, -0.253753662109375, -0.24247360229492188, -0.23119354248046875, -0.21991348266601562, -0.2086334228515625, -0.19735336303710938, -0.18607330322265625, -0.17479324340820312, -0.16351318359375, -0.15223312377929688, -0.14095306396484375, -0.12967300415039062, -0.1183929443359375, -0.10711288452148438, -0.09583282470703125, -0.08455276489257812, -0.073272705078125, -0.061992645263671875, -0.05071258544921875, -0.039432525634765625, -0.0281524658203125, -0.016872406005859375, -0.00559234619140625, 0.005687713623046875, 0.0169677734375, 0.028247833251953125, 0.03952789306640625, 0.050807952880859375, 0.0620880126953125, 0.07336807250976562, 0.08464813232421875, 0.09592819213867188, 0.107208251953125, 0.11848831176757812, 0.12976837158203125, 0.14104843139648438, 0.1523284912109375, 0.16360855102539062, 0.17488861083984375, 0.18616867065429688, 0.19744873046875, 0.20872879028320312, 0.22000885009765625, 0.23128890991210938, 0.2425689697265625, 0.2538490295410156, 0.26512908935546875, 0.2764091491699219, 0.287689208984375, 0.2989692687988281, 0.31024932861328125, 0.3215293884277344, 0.3328094482421875, 0.3440895080566406, 0.35536956787109375, 0.3666496276855469, 0.3779296875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 7.0, 7.0, 9.0, 12.0, 13.0, 32.0, 34.0, 47.0, 52.0, 132.0, 153.0, 245.0, 411.0, 735.0, 1429.0, 3799.0, 15956.0, 307409.0, 3835657.0, 20162.0, 4373.0, 1617.0, 785.0, 429.0, 241.0, 177.0, 118.0, 98.0, 51.0, 33.0, 20.0, 8.0, 16.0, 8.0, 2.0, 5.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.439453125, -2.358306884765625, -2.27716064453125, -2.196014404296875, -2.1148681640625, -2.033721923828125, -1.95257568359375, -1.871429443359375, -1.790283203125, -1.709136962890625, -1.62799072265625, -1.546844482421875, -1.4656982421875, -1.384552001953125, -1.30340576171875, -1.222259521484375, -1.14111328125, -1.059967041015625, -0.97882080078125, -0.897674560546875, -0.8165283203125, -0.735382080078125, -0.65423583984375, -0.573089599609375, -0.491943359375, -0.410797119140625, -0.32965087890625, -0.248504638671875, -0.1673583984375, -0.086212158203125, -0.00506591796875, 0.076080322265625, 0.1572265625, 0.238372802734375, 0.31951904296875, 0.400665283203125, 0.4818115234375, 0.562957763671875, 0.64410400390625, 0.725250244140625, 0.806396484375, 0.887542724609375, 0.96868896484375, 1.049835205078125, 1.1309814453125, 1.212127685546875, 1.29327392578125, 1.374420166015625, 1.45556640625, 1.536712646484375, 1.61785888671875, 1.699005126953125, 1.7801513671875, 1.861297607421875, 1.94244384765625, 2.023590087890625, 2.104736328125, 2.185882568359375, 2.26702880859375, 2.348175048828125, 2.4293212890625, 2.510467529296875, 2.59161376953125, 2.672760009765625, 2.75390625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 5.0, 15.0, 18.0, 41.0, 94.0, 2972.0, 764.0, 76.0, 31.0, 20.0, 9.0, 11.0, 8.0, 8.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.80810546875, -0.78839111328125, -0.7686767578125, -0.74896240234375, -0.729248046875, -0.70953369140625, -0.6898193359375, -0.67010498046875, -0.650390625, -0.63067626953125, -0.6109619140625, -0.59124755859375, -0.571533203125, -0.55181884765625, -0.5321044921875, -0.51239013671875, -0.49267578125, -0.47296142578125, -0.4532470703125, -0.43353271484375, -0.413818359375, -0.39410400390625, -0.3743896484375, -0.35467529296875, -0.3349609375, -0.31524658203125, -0.2955322265625, -0.27581787109375, -0.256103515625, -0.23638916015625, -0.2166748046875, -0.19696044921875, -0.17724609375, -0.15753173828125, -0.1378173828125, -0.11810302734375, -0.098388671875, -0.07867431640625, -0.0589599609375, -0.03924560546875, -0.01953125, 0.00018310546875, 0.0198974609375, 0.03961181640625, 0.059326171875, 0.07904052734375, 0.0987548828125, 0.11846923828125, 0.13818359375, 0.15789794921875, 0.1776123046875, 0.19732666015625, 0.217041015625, 0.23675537109375, 0.2564697265625, 0.27618408203125, 0.2958984375, 0.31561279296875, 0.3353271484375, 0.35504150390625, 0.374755859375, 0.39447021484375, 0.4141845703125, 0.43389892578125, 0.45361328125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 4.0, 7.0, 18.0, 19.0, 30.0, 48.0, 82.0, 106.0, 115.0, 124.0, 122.0, 89.0, 75.0, 59.0, 41.0, 24.0, 13.0, 12.0, 7.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0], "bins": [-2.144212484359741, -2.0932729244232178, -2.0423333644866943, -1.9913936853408813, -1.940454125404358, -1.8895145654678345, -1.838575005531311, -1.787635326385498, -1.7366957664489746, -1.6857562065124512, -1.6348166465759277, -1.5838769674301147, -1.5329374074935913, -1.4819978475570679, -1.4310582876205444, -1.3801186084747314, -1.329179048538208, -1.2782394886016846, -1.2272999286651611, -1.1763602495193481, -1.1254206895828247, -1.0744811296463013, -1.0235415697097778, -0.9726019501686096, -0.921662449836731, -0.8707228899002075, -0.8197832703590393, -0.7688437104225159, -0.7179040908813477, -0.6669645309448242, -0.6160249710083008, -0.5650853514671326, -0.5141457319259644, -0.46320614218711853, -0.4122665524482727, -0.36132699251174927, -0.31038737297058105, -0.2594478130340576, -0.2085082232952118, -0.15756863355636597, -0.10662904381752014, -0.055689457803964615, -0.004749871790409088, 0.04618971049785614, 0.09712930023670197, 0.1480688750743866, 0.19900846481323242, 0.24994805455207825, 0.3008876442909241, 0.3518272340297699, 0.4027668237686157, 0.45370638370513916, 0.5046460032463074, 0.5555855631828308, 0.606525182723999, 0.6574647426605225, 0.7084043025970459, 0.7593438625335693, 0.8102834820747375, 0.861223042011261, 0.9121626615524292, 0.9631022214889526, 1.014041781425476, 1.064981460571289, 1.1159210205078125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 4.0, 5.0, 5.0, 0.0, 4.0, 3.0, 3.0, 3.0, 7.0, 5.0, 7.0, 13.0, 20.0, 24.0, 23.0, 25.0, 22.0, 29.0, 33.0, 31.0, 39.0, 38.0, 46.0, 44.0, 44.0, 45.0, 54.0, 38.0, 44.0, 36.0, 36.0, 34.0, 29.0, 32.0, 29.0, 17.0, 11.0, 19.0, 20.0, 13.0, 26.0, 11.0, 9.0, 6.0, 7.0, 3.0, 6.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 3.0], "bins": [-0.8593247532844543, -0.8347761034965515, -0.8102275133132935, -0.7856788635253906, -0.7611302137374878, -0.7365816235542297, -0.7120329737663269, -0.6874843835830688, -0.662935733795166, -0.6383870840072632, -0.6138384938240051, -0.5892898440361023, -0.5647412538528442, -0.5401926040649414, -0.5156439542770386, -0.49109533429145813, -0.4665467143058777, -0.44199809432029724, -0.4174494743347168, -0.39290082454681396, -0.3683522045612335, -0.3438035845756531, -0.31925493478775024, -0.2947063148021698, -0.27015769481658936, -0.2456090748310089, -0.22106043994426727, -0.19651180505752563, -0.1719631850719452, -0.14741456508636475, -0.12286593019962311, -0.09831729531288147, -0.07376861572265625, -0.04921998828649521, -0.024671360850334167, -0.00012273341417312622, 0.024425894021987915, 0.048974521458148956, 0.07352314889431, 0.09807178378105164, 0.12262040376663208, 0.14716902375221252, 0.17171765863895416, 0.1962662935256958, 0.22081491351127625, 0.2453635334968567, 0.2699121832847595, 0.29446080327033997, 0.3190094232559204, 0.34355804324150085, 0.3681066632270813, 0.39265531301498413, 0.4172039330005646, 0.441752552986145, 0.46630120277404785, 0.4908498227596283, 0.5153984427452087, 0.5399470925331116, 0.5644956827163696, 0.5890443325042725, 0.6135929822921753, 0.6381415724754333, 0.6626902222633362, 0.6872388124465942, 0.7117874622344971]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 4.0, 5.0, 6.0, 12.0, 9.0, 10.0, 20.0, 24.0, 25.0, 46.0, 49.0, 73.0, 97.0, 144.0, 250.0, 354.0, 561.0, 1043.0, 1829.0, 3875.0, 9421.0, 27491.0, 93302.0, 294836.0, 389129.0, 155724.0, 44975.0, 14076.0, 5415.0, 2492.0, 1260.0, 673.0, 418.0, 270.0, 170.0, 145.0, 93.0, 69.0, 37.0, 33.0, 24.0, 22.0, 15.0, 13.0, 8.0, 4.0, 5.0, 7.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.88720703125, -0.8557510375976562, -0.8242950439453125, -0.7928390502929688, -0.761383056640625, -0.7299270629882812, -0.6984710693359375, -0.6670150756835938, -0.63555908203125, -0.6041030883789062, -0.5726470947265625, -0.5411911010742188, -0.509735107421875, -0.47827911376953125, -0.4468231201171875, -0.41536712646484375, -0.3839111328125, -0.35245513916015625, -0.3209991455078125, -0.28954315185546875, -0.258087158203125, -0.22663116455078125, -0.1951751708984375, -0.16371917724609375, -0.13226318359375, -0.10080718994140625, -0.0693511962890625, -0.03789520263671875, -0.006439208984375, 0.02501678466796875, 0.0564727783203125, 0.08792877197265625, 0.119384765625, 0.15084075927734375, 0.1822967529296875, 0.21375274658203125, 0.245208740234375, 0.27666473388671875, 0.3081207275390625, 0.33957672119140625, 0.37103271484375, 0.40248870849609375, 0.4339447021484375, 0.46540069580078125, 0.496856689453125, 0.5283126831054688, 0.5597686767578125, 0.5912246704101562, 0.6226806640625, 0.6541366577148438, 0.6855926513671875, 0.7170486450195312, 0.748504638671875, 0.7799606323242188, 0.8114166259765625, 0.8428726196289062, 0.87432861328125, 0.9057846069335938, 0.9372406005859375, 0.9686965942382812, 1.000152587890625, 1.0316085815429688, 1.0630645751953125, 1.0945205688476562, 1.1259765625]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 7.0, 6.0, 7.0, 10.0, 9.0, 11.0, 20.0, 22.0, 25.0, 25.0, 29.0, 37.0, 30.0, 33.0, 43.0, 39.0, 51.0, 40.0, 62.0, 44.0, 51.0, 41.0, 50.0, 47.0, 31.0, 29.0, 25.0, 35.0, 25.0, 15.0, 15.0, 16.0, 12.0, 7.0, 8.0, 10.0, 5.0, 6.0, 6.0, 8.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3427734375, -0.3313484191894531, -0.31992340087890625, -0.3084983825683594, -0.2970733642578125, -0.2856483459472656, -0.27422332763671875, -0.2627983093261719, -0.251373291015625, -0.23994827270507812, -0.22852325439453125, -0.21709823608398438, -0.2056732177734375, -0.19424819946289062, -0.18282318115234375, -0.17139816284179688, -0.15997314453125, -0.14854812622070312, -0.13712310791015625, -0.12569808959960938, -0.1142730712890625, -0.10284805297851562, -0.09142303466796875, -0.07999801635742188, -0.068572998046875, -0.057147979736328125, -0.04572296142578125, -0.034297943115234375, -0.0228729248046875, -0.011447906494140625, -2.288818359375e-05, 0.011402130126953125, 0.0228271484375, 0.034252166748046875, 0.04567718505859375, 0.057102203369140625, 0.0685272216796875, 0.07995223999023438, 0.09137725830078125, 0.10280227661132812, 0.114227294921875, 0.12565231323242188, 0.13707733154296875, 0.14850234985351562, 0.1599273681640625, 0.17135238647460938, 0.18277740478515625, 0.19420242309570312, 0.20562744140625, 0.21705245971679688, 0.22847747802734375, 0.23990249633789062, 0.2513275146484375, 0.2627525329589844, 0.27417755126953125, 0.2856025695800781, 0.297027587890625, 0.3084526062011719, 0.31987762451171875, 0.3313026428222656, 0.3427276611328125, 0.3541526794433594, 0.36557769775390625, 0.3770027160644531, 0.388427734375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 7.0, 6.0, 5.0, 17.0, 18.0, 21.0, 33.0, 42.0, 64.0, 93.0, 135.0, 182.0, 299.0, 468.0, 778.0, 1337.0, 2629.0, 5981.0, 18485.0, 82802.0, 400384.0, 416557.0, 86633.0, 19166.0, 6062.0, 2755.0, 1383.0, 738.0, 479.0, 329.0, 209.0, 128.0, 110.0, 65.0, 37.0, 32.0, 23.0, 20.0, 13.0, 10.0, 8.0, 4.0, 6.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.04296875, -1.0057373046875, -0.968505859375, -0.9312744140625, -0.89404296875, -0.8568115234375, -0.819580078125, -0.7823486328125, -0.7451171875, -0.7078857421875, -0.670654296875, -0.6334228515625, -0.59619140625, -0.5589599609375, -0.521728515625, -0.4844970703125, -0.447265625, -0.4100341796875, -0.372802734375, -0.3355712890625, -0.29833984375, -0.2611083984375, -0.223876953125, -0.1866455078125, -0.1494140625, -0.1121826171875, -0.074951171875, -0.0377197265625, -0.00048828125, 0.0367431640625, 0.073974609375, 0.1112060546875, 0.1484375, 0.1856689453125, 0.222900390625, 0.2601318359375, 0.29736328125, 0.3345947265625, 0.371826171875, 0.4090576171875, 0.4462890625, 0.4835205078125, 0.520751953125, 0.5579833984375, 0.59521484375, 0.6324462890625, 0.669677734375, 0.7069091796875, 0.744140625, 0.7813720703125, 0.818603515625, 0.8558349609375, 0.89306640625, 0.9302978515625, 0.967529296875, 1.0047607421875, 1.0419921875, 1.0792236328125, 1.116455078125, 1.1536865234375, 1.19091796875, 1.2281494140625, 1.265380859375, 1.3026123046875, 1.33984375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 0.0, 3.0, 2.0, 5.0, 9.0, 6.0, 15.0, 14.0, 14.0, 19.0, 25.0, 17.0, 31.0, 20.0, 27.0, 36.0, 39.0, 35.0, 37.0, 59.0, 43.0, 40.0, 54.0, 40.0, 44.0, 42.0, 35.0, 40.0, 25.0, 35.0, 30.0, 28.0, 20.0, 17.0, 22.0, 14.0, 15.0, 9.0, 6.0, 6.0, 10.0, 4.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.646484375, -1.5935821533203125, -1.540679931640625, -1.4877777099609375, -1.43487548828125, -1.3819732666015625, -1.329071044921875, -1.2761688232421875, -1.2232666015625, -1.1703643798828125, -1.117462158203125, -1.0645599365234375, -1.01165771484375, -0.9587554931640625, -0.905853271484375, -0.8529510498046875, -0.800048828125, -0.7471466064453125, -0.694244384765625, -0.6413421630859375, -0.58843994140625, -0.5355377197265625, -0.482635498046875, -0.4297332763671875, -0.3768310546875, -0.3239288330078125, -0.271026611328125, -0.2181243896484375, -0.16522216796875, -0.1123199462890625, -0.059417724609375, -0.0065155029296875, 0.04638671875, 0.0992889404296875, 0.152191162109375, 0.2050933837890625, 0.25799560546875, 0.3108978271484375, 0.363800048828125, 0.4167022705078125, 0.4696044921875, 0.5225067138671875, 0.575408935546875, 0.6283111572265625, 0.68121337890625, 0.7341156005859375, 0.787017822265625, 0.8399200439453125, 0.892822265625, 0.9457244873046875, 0.998626708984375, 1.0515289306640625, 1.10443115234375, 1.1573333740234375, 1.210235595703125, 1.2631378173828125, 1.3160400390625, 1.3689422607421875, 1.421844482421875, 1.4747467041015625, 1.52764892578125, 1.5805511474609375, 1.633453369140625, 1.6863555908203125, 1.7392578125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 4.0, 9.0, 12.0, 17.0, 24.0, 39.0, 98.0, 159.0, 351.0, 1131.0, 6785.0, 515700.0, 515519.0, 6939.0, 1116.0, 344.0, 146.0, 73.0, 33.0, 21.0, 17.0, 6.0, 3.0, 4.0, 3.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.58203125, -1.51611328125, -1.4501953125, -1.38427734375, -1.318359375, -1.25244140625, -1.1865234375, -1.12060546875, -1.0546875, -0.98876953125, -0.9228515625, -0.85693359375, -0.791015625, -0.72509765625, -0.6591796875, -0.59326171875, -0.52734375, -0.46142578125, -0.3955078125, -0.32958984375, -0.263671875, -0.19775390625, -0.1318359375, -0.06591796875, 0.0, 0.06591796875, 0.1318359375, 0.19775390625, 0.263671875, 0.32958984375, 0.3955078125, 0.46142578125, 0.52734375, 0.59326171875, 0.6591796875, 0.72509765625, 0.791015625, 0.85693359375, 0.9228515625, 0.98876953125, 1.0546875, 1.12060546875, 1.1865234375, 1.25244140625, 1.318359375, 1.38427734375, 1.4501953125, 1.51611328125, 1.58203125, 1.64794921875, 1.7138671875, 1.77978515625, 1.845703125, 1.91162109375, 1.9775390625, 2.04345703125, 2.109375, 2.17529296875, 2.2412109375, 2.30712890625, 2.373046875, 2.43896484375, 2.5048828125, 2.57080078125, 2.63671875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 10.0, 8.0, 7.0, 8.0, 17.0, 14.0, 17.0, 34.0, 25.0, 47.0, 54.0, 62.0, 66.0, 72.0, 94.0, 71.0, 65.0, 70.0, 44.0, 51.0, 38.0, 31.0, 15.0, 17.0, 18.0, 11.0, 9.0, 7.0, 4.0, 7.0, 3.0, 6.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.302927017211914e-05, -8.033681660890579e-05, -7.764436304569244e-05, -7.49519094824791e-05, -7.225945591926575e-05, -6.95670023560524e-05, -6.687454879283905e-05, -6.41820952296257e-05, -6.148964166641235e-05, -5.8797188103199005e-05, -5.610473453998566e-05, -5.341228097677231e-05, -5.071982741355896e-05, -4.802737385034561e-05, -4.533492028713226e-05, -4.2642466723918915e-05, -3.9950013160705566e-05, -3.725755959749222e-05, -3.456510603427887e-05, -3.187265247106552e-05, -2.9180198907852173e-05, -2.6487745344638824e-05, -2.3795291781425476e-05, -2.1102838218212128e-05, -1.841038465499878e-05, -1.571793109178543e-05, -1.3025477528572083e-05, -1.0333023965358734e-05, -7.640570402145386e-06, -4.948116838932037e-06, -2.255663275718689e-06, 4.367902874946594e-07, 3.129243850708008e-06, 5.821697413921356e-06, 8.514150977134705e-06, 1.1206604540348053e-05, 1.3899058103561401e-05, 1.659151166677475e-05, 1.9283965229988098e-05, 2.1976418793201447e-05, 2.4668872356414795e-05, 2.7361325919628143e-05, 3.0053779482841492e-05, 3.274623304605484e-05, 3.543868660926819e-05, 3.813114017248154e-05, 4.0823593735694885e-05, 4.3516047298908234e-05, 4.620850086212158e-05, 4.890095442533493e-05, 5.159340798854828e-05, 5.428586155176163e-05, 5.6978315114974976e-05, 5.9670768678188324e-05, 6.236322224140167e-05, 6.505567580461502e-05, 6.774812936782837e-05, 7.044058293104172e-05, 7.313303649425507e-05, 7.582549005746841e-05, 7.851794362068176e-05, 8.121039718389511e-05, 8.390285074710846e-05, 8.659530431032181e-05, 8.928775787353516e-05]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 11.0, 25.0, 34.0, 66.0, 154.0, 350.0, 1091.0, 7040.0, 341367.0, 685280.0, 10977.0, 1403.0, 423.0, 144.0, 73.0, 38.0, 29.0, 18.0, 8.0, 3.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.328125, -1.272979736328125, -1.21783447265625, -1.162689208984375, -1.1075439453125, -1.052398681640625, -0.99725341796875, -0.942108154296875, -0.886962890625, -0.831817626953125, -0.77667236328125, -0.721527099609375, -0.6663818359375, -0.611236572265625, -0.55609130859375, -0.500946044921875, -0.44580078125, -0.390655517578125, -0.33551025390625, -0.280364990234375, -0.2252197265625, -0.170074462890625, -0.11492919921875, -0.059783935546875, -0.004638671875, 0.050506591796875, 0.10565185546875, 0.160797119140625, 0.2159423828125, 0.271087646484375, 0.32623291015625, 0.381378173828125, 0.4365234375, 0.491668701171875, 0.54681396484375, 0.601959228515625, 0.6571044921875, 0.712249755859375, 0.76739501953125, 0.822540283203125, 0.877685546875, 0.932830810546875, 0.98797607421875, 1.043121337890625, 1.0982666015625, 1.153411865234375, 1.20855712890625, 1.263702392578125, 1.31884765625, 1.373992919921875, 1.42913818359375, 1.484283447265625, 1.5394287109375, 1.594573974609375, 1.64971923828125, 1.704864501953125, 1.760009765625, 1.815155029296875, 1.87030029296875, 1.925445556640625, 1.9805908203125, 2.035736083984375, 2.09088134765625, 2.146026611328125, 2.201171875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 5.0, 4.0, 12.0, 15.0, 28.0, 30.0, 50.0, 94.0, 116.0, 152.0, 149.0, 127.0, 88.0, 43.0, 33.0, 28.0, 15.0, 7.0, 9.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.75146484375, -0.7096939086914062, -0.6679229736328125, -0.6261520385742188, -0.584381103515625, -0.5426101684570312, -0.5008392333984375, -0.45906829833984375, -0.41729736328125, -0.37552642822265625, -0.3337554931640625, -0.29198455810546875, -0.250213623046875, -0.20844268798828125, -0.1666717529296875, -0.12490081787109375, -0.0831298828125, -0.04135894775390625, 0.0004119873046875, 0.04218292236328125, 0.083953857421875, 0.12572479248046875, 0.1674957275390625, 0.20926666259765625, 0.25103759765625, 0.29280853271484375, 0.3345794677734375, 0.37635040283203125, 0.418121337890625, 0.45989227294921875, 0.5016632080078125, 0.5434341430664062, 0.585205078125, 0.6269760131835938, 0.6687469482421875, 0.7105178833007812, 0.752288818359375, 0.7940597534179688, 0.8358306884765625, 0.8776016235351562, 0.91937255859375, 0.9611434936523438, 1.0029144287109375, 1.0446853637695312, 1.086456298828125, 1.1282272338867188, 1.1699981689453125, 1.2117691040039062, 1.2535400390625, 1.2953109741210938, 1.3370819091796875, 1.3788528442382812, 1.420623779296875, 1.4623947143554688, 1.5041656494140625, 1.5459365844726562, 1.58770751953125, 1.6294784545898438, 1.6712493896484375, 1.7130203247070312, 1.754791259765625, 1.7965621948242188, 1.8383331298828125, 1.8801040649414062, 1.921875]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 8.0, 10.0, 94.0, 414.0, 396.0, 72.0, 15.0, 5.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.544710159301758, -6.038434982299805, -4.532159328460693, -3.025883674621582, -1.519608497619629, -0.013333320617675781, 1.4929428100585938, 2.999217987060547, 4.5054931640625, 6.011768341064453, 7.5180439949035645, 9.024319648742676, 10.530594825744629, 12.036870002746582, 13.543146133422852, 15.049421310424805, 16.555696487426758, 18.06197166442871, 19.568246841430664, 21.07452392578125, 22.580799102783203, 24.087074279785156, 25.59334945678711, 27.099624633789062, 28.605899810791016, 30.11217498779297, 31.618450164794922, 33.124725341796875, 34.63100051879883, 36.13727569580078, 37.6435546875, 39.14982604980469, 40.656105041503906, 42.16238021850586, 43.66865539550781, 45.174930572509766, 46.68120574951172, 48.18748092651367, 49.693756103515625, 51.200035095214844, 52.70630645751953, 54.212581634521484, 55.71885681152344, 57.22513198852539, 58.731407165527344, 60.2376823425293, 61.74395751953125, 63.25023651123047, 64.75651550292969, 66.2627944946289, 67.7690658569336, 69.27534484863281, 70.7816162109375, 72.28789520263672, 73.7941665649414, 75.30044555664062, 76.80671691894531, 78.31299591064453, 79.81926727294922, 81.32554626464844, 82.83181762695312, 84.33809661865234, 85.84436798095703, 87.35064697265625, 88.85691833496094]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 5.0, 9.0, 6.0, 5.0, 8.0, 12.0, 12.0, 20.0, 15.0, 23.0, 16.0, 20.0, 38.0, 28.0, 44.0, 31.0, 48.0, 37.0, 46.0, 52.0, 50.0, 63.0, 50.0, 46.0, 38.0, 47.0, 33.0, 31.0, 28.0, 21.0, 12.0, 23.0, 16.0, 15.0, 12.0, 12.0, 9.0, 7.0, 1.0, 4.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.179851531982422, -7.908729076385498, -7.637606143951416, -7.366483688354492, -7.09536075592041, -6.824238300323486, -6.5531158447265625, -6.2819929122924805, -6.010870456695557, -5.739748001098633, -5.468625068664551, -5.197502613067627, -4.926380157470703, -4.655257225036621, -4.384134769439697, -4.113012313842773, -3.8418893814086914, -3.5707666873931885, -3.2996439933776855, -3.0285215377807617, -2.757398843765259, -2.486276149749756, -2.215153694152832, -1.944031000137329, -1.6729083061218262, -1.4017856121063232, -1.1306630373001099, -0.8595404028892517, -0.5884177684783936, -0.3172950744628906, -0.046172499656677246, 0.22495007514953613, 0.49607372283935547, 0.7671963572502136, 1.0383189916610718, 1.3094415664672852, 1.580564260482788, 1.851686954498291, 2.122809410095215, 2.3939321041107178, 2.6650547981262207, 2.9361774921417236, 3.2073001861572266, 3.4784226417541504, 3.7495453357696533, 4.020668029785156, 4.29179048538208, 4.562912940979004, 4.834035873413086, 5.10515832901001, 5.376281261444092, 5.647403717041016, 5.918526649475098, 6.1896491050720215, 6.460771560668945, 6.731894493103027, 7.003016948699951, 7.274139404296875, 7.545262336730957, 7.816384792327881, 8.087507247924805, 8.358630180358887, 8.629753112792969, 8.900875091552734, 9.171998023986816]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 6.0, 6.0, 9.0, 15.0, 16.0, 28.0, 48.0, 70.0, 108.0, 137.0, 252.0, 458.0, 953.0, 2457.0, 8546.0, 58228.0, 4016910.0, 89962.0, 10814.0, 2899.0, 1081.0, 471.0, 282.0, 180.0, 109.0, 68.0, 43.0, 38.0, 25.0, 19.0, 12.0, 6.0, 6.0, 2.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-2.2734375, -2.2076873779296875, -2.141937255859375, -2.0761871337890625, -2.01043701171875, -1.9446868896484375, -1.878936767578125, -1.8131866455078125, -1.7474365234375, -1.6816864013671875, -1.615936279296875, -1.5501861572265625, -1.48443603515625, -1.4186859130859375, -1.352935791015625, -1.2871856689453125, -1.221435546875, -1.1556854248046875, -1.089935302734375, -1.0241851806640625, -0.95843505859375, -0.8926849365234375, -0.826934814453125, -0.7611846923828125, -0.6954345703125, -0.6296844482421875, -0.563934326171875, -0.4981842041015625, -0.43243408203125, -0.3666839599609375, -0.300933837890625, -0.2351837158203125, -0.16943359375, -0.1036834716796875, -0.037933349609375, 0.0278167724609375, 0.09356689453125, 0.1593170166015625, 0.225067138671875, 0.2908172607421875, 0.3565673828125, 0.4223175048828125, 0.488067626953125, 0.5538177490234375, 0.61956787109375, 0.6853179931640625, 0.751068115234375, 0.8168182373046875, 0.882568359375, 0.9483184814453125, 1.014068603515625, 1.0798187255859375, 1.14556884765625, 1.2113189697265625, 1.277069091796875, 1.3428192138671875, 1.4085693359375, 1.4743194580078125, 1.540069580078125, 1.6058197021484375, 1.67156982421875, 1.7373199462890625, 1.803070068359375, 1.8688201904296875, 1.9345703125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 3.0, 1.0, 2.0, 9.0, 5.0, 16.0, 18.0, 21.0, 23.0, 50.0, 65.0, 78.0, 91.0, 98.0, 95.0, 99.0, 69.0, 61.0, 67.0, 45.0, 32.0, 17.0, 11.0, 12.0, 2.0, 5.0, 6.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.86865234375, -0.8459091186523438, -0.8231658935546875, -0.8004226684570312, -0.777679443359375, -0.7549362182617188, -0.7321929931640625, -0.7094497680664062, -0.68670654296875, -0.6639633178710938, -0.6412200927734375, -0.6184768676757812, -0.595733642578125, -0.5729904174804688, -0.5502471923828125, -0.5275039672851562, -0.5047607421875, -0.48201751708984375, -0.4592742919921875, -0.43653106689453125, -0.413787841796875, -0.39104461669921875, -0.3683013916015625, -0.34555816650390625, -0.32281494140625, -0.30007171630859375, -0.2773284912109375, -0.25458526611328125, -0.231842041015625, -0.20909881591796875, -0.1863555908203125, -0.16361236572265625, -0.140869140625, -0.11812591552734375, -0.0953826904296875, -0.07263946533203125, -0.049896240234375, -0.02715301513671875, -0.0044097900390625, 0.01833343505859375, 0.04107666015625, 0.06381988525390625, 0.0865631103515625, 0.10930633544921875, 0.132049560546875, 0.15479278564453125, 0.1775360107421875, 0.20027923583984375, 0.2230224609375, 0.24576568603515625, 0.2685089111328125, 0.29125213623046875, 0.313995361328125, 0.33673858642578125, 0.3594818115234375, 0.38222503662109375, 0.40496826171875, 0.42771148681640625, 0.4504547119140625, 0.47319793701171875, 0.495941162109375, 0.5186843872070312, 0.5414276123046875, 0.5641708374023438, 0.5869140625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 8.0, 9.0, 14.0, 18.0, 32.0, 39.0, 71.0, 111.0, 178.0, 299.0, 498.0, 801.0, 1714.0, 3754.0, 11169.0, 51464.0, 3687395.0, 391648.0, 31221.0, 7870.0, 2814.0, 1329.0, 705.0, 439.0, 239.0, 147.0, 95.0, 65.0, 47.0, 35.0, 23.0, 12.0, 8.0, 7.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.494140625, -1.4405059814453125, -1.386871337890625, -1.3332366943359375, -1.27960205078125, -1.2259674072265625, -1.172332763671875, -1.1186981201171875, -1.0650634765625, -1.0114288330078125, -0.957794189453125, -0.9041595458984375, -0.85052490234375, -0.7968902587890625, -0.743255615234375, -0.6896209716796875, -0.635986328125, -0.5823516845703125, -0.528717041015625, -0.4750823974609375, -0.42144775390625, -0.3678131103515625, -0.314178466796875, -0.2605438232421875, -0.2069091796875, -0.1532745361328125, -0.099639892578125, -0.0460052490234375, 0.00762939453125, 0.0612640380859375, 0.114898681640625, 0.1685333251953125, 0.22216796875, 0.2758026123046875, 0.329437255859375, 0.3830718994140625, 0.43670654296875, 0.4903411865234375, 0.543975830078125, 0.5976104736328125, 0.6512451171875, 0.7048797607421875, 0.758514404296875, 0.8121490478515625, 0.86578369140625, 0.9194183349609375, 0.973052978515625, 1.0266876220703125, 1.080322265625, 1.1339569091796875, 1.187591552734375, 1.2412261962890625, 1.29486083984375, 1.3484954833984375, 1.402130126953125, 1.4557647705078125, 1.5093994140625, 1.5630340576171875, 1.616668701171875, 1.6703033447265625, 1.72393798828125, 1.7775726318359375, 1.831207275390625, 1.8848419189453125, 1.9384765625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 10.0, 6.0, 18.0, 20.0, 31.0, 79.0, 145.0, 662.0, 2697.0, 197.0, 90.0, 48.0, 24.0, 16.0, 11.0, 4.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.97412109375, -0.9471664428710938, -0.9202117919921875, -0.8932571411132812, -0.866302490234375, -0.8393478393554688, -0.8123931884765625, -0.7854385375976562, -0.75848388671875, -0.7315292358398438, -0.7045745849609375, -0.6776199340820312, -0.650665283203125, -0.6237106323242188, -0.5967559814453125, -0.5698013305664062, -0.5428466796875, -0.5158920288085938, -0.4889373779296875, -0.46198272705078125, -0.435028076171875, -0.40807342529296875, -0.3811187744140625, -0.35416412353515625, -0.32720947265625, -0.30025482177734375, -0.2733001708984375, -0.24634552001953125, -0.219390869140625, -0.19243621826171875, -0.1654815673828125, -0.13852691650390625, -0.111572265625, -0.08461761474609375, -0.0576629638671875, -0.03070831298828125, -0.003753662109375, 0.02320098876953125, 0.0501556396484375, 0.07711029052734375, 0.10406494140625, 0.13101959228515625, 0.1579742431640625, 0.18492889404296875, 0.211883544921875, 0.23883819580078125, 0.2657928466796875, 0.29274749755859375, 0.3197021484375, 0.34665679931640625, 0.3736114501953125, 0.40056610107421875, 0.427520751953125, 0.45447540283203125, 0.4814300537109375, 0.5083847045898438, 0.53533935546875, 0.5622940063476562, 0.5892486572265625, 0.6162033081054688, 0.643157958984375, 0.6701126098632812, 0.6970672607421875, 0.7240219116210938, 0.7509765625]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 5.0, 9.0, 14.0, 44.0, 54.0, 120.0, 195.0, 200.0, 173.0, 99.0, 49.0, 29.0, 8.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.250251531600952, -3.105616331100464, -2.9609808921813965, -2.816345691680908, -2.671710252761841, -2.5270750522613525, -2.382439613342285, -2.237804412841797, -2.0931692123413086, -1.9485338926315308, -1.803898572921753, -1.6592633724212646, -1.5146279335021973, -1.369992733001709, -1.2253574132919312, -1.0807220935821533, -0.9360866546630859, -0.7914513349533081, -0.6468160152435303, -0.5021807551383972, -0.3575454354286194, -0.21291011571884155, -0.0682748556137085, 0.07636046409606934, 0.22099578380584717, 0.365631103515625, 0.5102664232254028, 0.6549016833305359, 0.7995370030403137, 0.9441723227500916, 1.0888075828552246, 1.2334429025650024, 1.3780779838562012, 1.522713303565979, 1.6673486232757568, 1.8119838237762451, 1.9566192626953125, 2.101254463195801, 2.245889663696289, 2.3905251026153564, 2.535160541534424, 2.679795742034912, 2.8244311809539795, 2.9690663814544678, 3.113701820373535, 3.2583370208740234, 3.4029722213745117, 3.547607660293579, 3.6922428607940674, 3.8368780612945557, 3.981513500213623, 4.126148700714111, 4.2707839012146, 4.415419578552246, 4.560054779052734, 4.704689979553223, 4.849325180053711, 4.993960380554199, 5.1385955810546875, 5.283231258392334, 5.427866458892822, 5.5725016593933105, 5.717136859893799, 5.861772537231445, 6.006407737731934]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 6.0, 6.0, 5.0, 7.0, 11.0, 13.0, 21.0, 17.0, 27.0, 19.0, 18.0, 31.0, 27.0, 41.0, 40.0, 36.0, 45.0, 53.0, 50.0, 40.0, 52.0, 40.0, 33.0, 51.0, 47.0, 41.0, 28.0, 30.0, 29.0, 21.0, 23.0, 22.0, 13.0, 13.0, 11.0, 9.0, 5.0, 6.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.692967176437378, -1.6411323547363281, -1.5892975330352783, -1.5374627113342285, -1.4856278896331787, -1.433793067932129, -1.381958246231079, -1.3301235437393188, -1.278288722038269, -1.2264539003372192, -1.1746190786361694, -1.1227842569351196, -1.0709494352340698, -1.0191147327423096, -0.967279851436615, -0.91544508934021, -0.8636102080345154, -0.8117753863334656, -0.7599405646324158, -0.7081058025360107, -0.6562709808349609, -0.6044361591339111, -0.5526013374328613, -0.5007665157318115, -0.4489317238330841, -0.3970969021320343, -0.3452621102333069, -0.2934272885322571, -0.24159248173236847, -0.18975767493247986, -0.13792285323143005, -0.08608806133270264, -0.03425323963165283, 0.017581570893526077, 0.06941638141870499, 0.1212511956691742, 0.1730860024690628, 0.22492080926895142, 0.2767556309700012, 0.32859042286872864, 0.38042524456977844, 0.43226006627082825, 0.48409485816955566, 0.5359296798706055, 0.5877645015716553, 0.6395993232727051, 0.6914341449737549, 0.7432689070701599, 0.7951037287712097, 0.8469385504722595, 0.8987733721733093, 0.9506081342697144, 1.0024429559707642, 1.054277777671814, 1.1061125993728638, 1.1579474210739136, 1.2097822427749634, 1.2616170644760132, 1.313451886177063, 1.3652867078781128, 1.4171215295791626, 1.4689562320709229, 1.5207910537719727, 1.5726258754730225, 1.6244606971740723]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 4.0, 9.0, 15.0, 12.0, 27.0, 51.0, 73.0, 132.0, 270.0, 575.0, 1369.0, 4109.0, 16229.0, 92156.0, 474471.0, 376450.0, 64743.0, 12451.0, 3299.0, 1123.0, 483.0, 212.0, 100.0, 73.0, 37.0, 32.0, 16.0, 11.0, 10.0, 4.0, 3.0, 5.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.9892578125, -1.9381256103515625, -1.886993408203125, -1.8358612060546875, -1.78472900390625, -1.7335968017578125, -1.682464599609375, -1.6313323974609375, -1.5802001953125, -1.5290679931640625, -1.477935791015625, -1.4268035888671875, -1.37567138671875, -1.3245391845703125, -1.273406982421875, -1.2222747802734375, -1.171142578125, -1.1200103759765625, -1.068878173828125, -1.0177459716796875, -0.96661376953125, -0.9154815673828125, -0.864349365234375, -0.8132171630859375, -0.7620849609375, -0.7109527587890625, -0.659820556640625, -0.6086883544921875, -0.55755615234375, -0.5064239501953125, -0.455291748046875, -0.4041595458984375, -0.35302734375, -0.3018951416015625, -0.250762939453125, -0.1996307373046875, -0.14849853515625, -0.0973663330078125, -0.046234130859375, 0.0048980712890625, 0.0560302734375, 0.1071624755859375, 0.158294677734375, 0.2094268798828125, 0.26055908203125, 0.3116912841796875, 0.362823486328125, 0.4139556884765625, 0.465087890625, 0.5162200927734375, 0.567352294921875, 0.6184844970703125, 0.66961669921875, 0.7207489013671875, 0.771881103515625, 0.8230133056640625, 0.8741455078125, 0.9252777099609375, 0.976409912109375, 1.0275421142578125, 1.07867431640625, 1.1298065185546875, 1.180938720703125, 1.2320709228515625, 1.283203125]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 0.0, 1.0, 4.0, 4.0, 7.0, 8.0, 11.0, 15.0, 19.0, 27.0, 27.0, 51.0, 61.0, 58.0, 83.0, 57.0, 84.0, 83.0, 64.0, 64.0, 64.0, 46.0, 49.0, 27.0, 27.0, 21.0, 9.0, 6.0, 8.0, 5.0, 2.0, 6.0, 2.0, 2.0, 0.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69921875, -0.679656982421875, -0.66009521484375, -0.640533447265625, -0.6209716796875, -0.601409912109375, -0.58184814453125, -0.562286376953125, -0.542724609375, -0.523162841796875, -0.50360107421875, -0.484039306640625, -0.4644775390625, -0.444915771484375, -0.42535400390625, -0.405792236328125, -0.38623046875, -0.366668701171875, -0.34710693359375, -0.327545166015625, -0.3079833984375, -0.288421630859375, -0.26885986328125, -0.249298095703125, -0.229736328125, -0.210174560546875, -0.19061279296875, -0.171051025390625, -0.1514892578125, -0.131927490234375, -0.11236572265625, -0.092803955078125, -0.0732421875, -0.053680419921875, -0.03411865234375, -0.014556884765625, 0.0050048828125, 0.024566650390625, 0.04412841796875, 0.063690185546875, 0.083251953125, 0.102813720703125, 0.12237548828125, 0.141937255859375, 0.1614990234375, 0.181060791015625, 0.20062255859375, 0.220184326171875, 0.23974609375, 0.259307861328125, 0.27886962890625, 0.298431396484375, 0.3179931640625, 0.337554931640625, 0.35711669921875, 0.376678466796875, 0.396240234375, 0.415802001953125, 0.43536376953125, 0.454925537109375, 0.4744873046875, 0.494049072265625, 0.51361083984375, 0.533172607421875, 0.552734375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 5.0, 3.0, 2.0, 5.0, 6.0, 3.0, 8.0, 19.0, 19.0, 32.0, 52.0, 62.0, 124.0, 188.0, 295.0, 626.0, 1189.0, 3109.0, 10738.0, 79137.0, 650900.0, 267179.0, 25946.0, 5216.0, 1800.0, 794.0, 420.0, 254.0, 132.0, 101.0, 69.0, 41.0, 27.0, 27.0, 15.0, 4.0, 10.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.44921875, -1.390655517578125, -1.33209228515625, -1.273529052734375, -1.2149658203125, -1.156402587890625, -1.09783935546875, -1.039276123046875, -0.980712890625, -0.922149658203125, -0.86358642578125, -0.805023193359375, -0.7464599609375, -0.687896728515625, -0.62933349609375, -0.570770263671875, -0.51220703125, -0.453643798828125, -0.39508056640625, -0.336517333984375, -0.2779541015625, -0.219390869140625, -0.16082763671875, -0.102264404296875, -0.043701171875, 0.014862060546875, 0.07342529296875, 0.131988525390625, 0.1905517578125, 0.249114990234375, 0.30767822265625, 0.366241455078125, 0.4248046875, 0.483367919921875, 0.54193115234375, 0.600494384765625, 0.6590576171875, 0.717620849609375, 0.77618408203125, 0.834747314453125, 0.893310546875, 0.951873779296875, 1.01043701171875, 1.069000244140625, 1.1275634765625, 1.186126708984375, 1.24468994140625, 1.303253173828125, 1.36181640625, 1.420379638671875, 1.47894287109375, 1.537506103515625, 1.5960693359375, 1.654632568359375, 1.71319580078125, 1.771759033203125, 1.830322265625, 1.888885498046875, 1.94744873046875, 2.006011962890625, 2.0645751953125, 2.123138427734375, 2.18170166015625, 2.240264892578125, 2.298828125]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 3.0, 5.0, 5.0, 7.0, 1.0, 7.0, 15.0, 10.0, 17.0, 23.0, 28.0, 35.0, 27.0, 31.0, 34.0, 38.0, 42.0, 47.0, 52.0, 60.0, 42.0, 49.0, 69.0, 51.0, 39.0, 48.0, 41.0, 31.0, 21.0, 26.0, 26.0, 16.0, 13.0, 9.0, 7.0, 11.0, 6.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8125, -1.7547454833984375, -1.696990966796875, -1.6392364501953125, -1.58148193359375, -1.5237274169921875, -1.465972900390625, -1.4082183837890625, -1.3504638671875, -1.2927093505859375, -1.234954833984375, -1.1772003173828125, -1.11944580078125, -1.0616912841796875, -1.003936767578125, -0.9461822509765625, -0.888427734375, -0.8306732177734375, -0.772918701171875, -0.7151641845703125, -0.65740966796875, -0.5996551513671875, -0.541900634765625, -0.4841461181640625, -0.4263916015625, -0.3686370849609375, -0.310882568359375, -0.2531280517578125, -0.19537353515625, -0.1376190185546875, -0.079864501953125, -0.0221099853515625, 0.03564453125, 0.0933990478515625, 0.151153564453125, 0.2089080810546875, 0.26666259765625, 0.3244171142578125, 0.382171630859375, 0.4399261474609375, 0.4976806640625, 0.5554351806640625, 0.613189697265625, 0.6709442138671875, 0.72869873046875, 0.7864532470703125, 0.844207763671875, 0.9019622802734375, 0.959716796875, 1.0174713134765625, 1.075225830078125, 1.1329803466796875, 1.19073486328125, 1.2484893798828125, 1.306243896484375, 1.3639984130859375, 1.4217529296875, 1.4795074462890625, 1.537261962890625, 1.5950164794921875, 1.65277099609375, 1.7105255126953125, 1.768280029296875, 1.8260345458984375, 1.8837890625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 4.0, 3.0, 5.0, 10.0, 12.0, 9.0, 35.0, 39.0, 62.0, 77.0, 143.0, 299.0, 728.0, 1919.0, 8126.0, 90707.0, 794700.0, 136975.0, 10796.0, 2300.0, 796.0, 357.0, 184.0, 91.0, 49.0, 35.0, 15.0, 19.0, 16.0, 19.0, 6.0, 6.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.95361328125, -0.9275360107421875, -0.901458740234375, -0.8753814697265625, -0.84930419921875, -0.8232269287109375, -0.797149658203125, -0.7710723876953125, -0.7449951171875, -0.7189178466796875, -0.692840576171875, -0.6667633056640625, -0.64068603515625, -0.6146087646484375, -0.588531494140625, -0.5624542236328125, -0.536376953125, -0.5102996826171875, -0.484222412109375, -0.4581451416015625, -0.43206787109375, -0.4059906005859375, -0.379913330078125, -0.3538360595703125, -0.3277587890625, -0.3016815185546875, -0.275604248046875, -0.2495269775390625, -0.22344970703125, -0.1973724365234375, -0.171295166015625, -0.1452178955078125, -0.119140625, -0.0930633544921875, -0.066986083984375, -0.0409088134765625, -0.01483154296875, 0.0112457275390625, 0.037322998046875, 0.0634002685546875, 0.0894775390625, 0.1155548095703125, 0.141632080078125, 0.1677093505859375, 0.19378662109375, 0.2198638916015625, 0.245941162109375, 0.2720184326171875, 0.298095703125, 0.3241729736328125, 0.350250244140625, 0.3763275146484375, 0.40240478515625, 0.4284820556640625, 0.454559326171875, 0.4806365966796875, 0.5067138671875, 0.5327911376953125, 0.558868408203125, 0.5849456787109375, 0.61102294921875, 0.6371002197265625, 0.663177490234375, 0.6892547607421875, 0.71533203125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 3.0, 3.0, 4.0, 2.0, 1.0, 5.0, 10.0, 10.0, 16.0, 13.0, 23.0, 34.0, 42.0, 56.0, 64.0, 90.0, 89.0, 83.0, 94.0, 75.0, 68.0, 53.0, 47.0, 40.0, 24.0, 18.0, 15.0, 5.0, 3.0, 5.0, 5.0, 5.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.893013000488281e-05, -8.591823279857635e-05, -8.29063355922699e-05, -7.989443838596344e-05, -7.688254117965698e-05, -7.387064397335052e-05, -7.085874676704407e-05, -6.784684956073761e-05, -6.483495235443115e-05, -6.18230551481247e-05, -5.881115794181824e-05, -5.579926073551178e-05, -5.278736352920532e-05, -4.9775466322898865e-05, -4.676356911659241e-05, -4.375167191028595e-05, -4.073977470397949e-05, -3.7727877497673035e-05, -3.471598029136658e-05, -3.170408308506012e-05, -2.8692185878753662e-05, -2.5680288672447205e-05, -2.2668391466140747e-05, -1.965649425983429e-05, -1.6644597053527832e-05, -1.3632699847221375e-05, -1.0620802640914917e-05, -7.6089054346084595e-06, -4.597008228302002e-06, -1.5851110219955444e-06, 1.426786184310913e-06, 4.438683390617371e-06, 7.450580596923828e-06, 1.0462477803230286e-05, 1.3474375009536743e-05, 1.64862722158432e-05, 1.9498169422149658e-05, 2.2510066628456116e-05, 2.5521963834762573e-05, 2.853386104106903e-05, 3.154575824737549e-05, 3.4557655453681946e-05, 3.75695526599884e-05, 4.058144986629486e-05, 4.359334707260132e-05, 4.6605244278907776e-05, 4.9617141485214233e-05, 5.262903869152069e-05, 5.564093589782715e-05, 5.8652833104133606e-05, 6.166473031044006e-05, 6.467662751674652e-05, 6.768852472305298e-05, 7.070042192935944e-05, 7.37123191356659e-05, 7.672421634197235e-05, 7.973611354827881e-05, 8.274801075458527e-05, 8.575990796089172e-05, 8.877180516719818e-05, 9.178370237350464e-05, 9.47955995798111e-05, 9.780749678611755e-05, 0.00010081939399242401, 0.00010383129119873047]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 5.0, 14.0, 21.0, 21.0, 27.0, 63.0, 128.0, 229.0, 539.0, 1616.0, 6302.0, 74121.0, 838598.0, 115823.0, 8044.0, 1763.0, 669.0, 259.0, 129.0, 71.0, 44.0, 29.0, 14.0, 8.0, 6.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8349609375, -0.8046722412109375, -0.774383544921875, -0.7440948486328125, -0.71380615234375, -0.6835174560546875, -0.653228759765625, -0.6229400634765625, -0.5926513671875, -0.5623626708984375, -0.532073974609375, -0.5017852783203125, -0.47149658203125, -0.4412078857421875, -0.410919189453125, -0.3806304931640625, -0.350341796875, -0.3200531005859375, -0.289764404296875, -0.2594757080078125, -0.22918701171875, -0.1988983154296875, -0.168609619140625, -0.1383209228515625, -0.1080322265625, -0.0777435302734375, -0.047454833984375, -0.0171661376953125, 0.01312255859375, 0.0434112548828125, 0.073699951171875, 0.1039886474609375, 0.13427734375, 0.1645660400390625, 0.194854736328125, 0.2251434326171875, 0.25543212890625, 0.2857208251953125, 0.316009521484375, 0.3462982177734375, 0.3765869140625, 0.4068756103515625, 0.437164306640625, 0.4674530029296875, 0.49774169921875, 0.5280303955078125, 0.558319091796875, 0.5886077880859375, 0.618896484375, 0.6491851806640625, 0.679473876953125, 0.7097625732421875, 0.74005126953125, 0.7703399658203125, 0.800628662109375, 0.8309173583984375, 0.8612060546875, 0.8914947509765625, 0.921783447265625, 0.9520721435546875, 0.98236083984375, 1.0126495361328125, 1.042938232421875, 1.0732269287109375, 1.103515625]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 7.0, 5.0, 8.0, 14.0, 16.0, 26.0, 24.0, 36.0, 47.0, 52.0, 58.0, 69.0, 77.0, 84.0, 87.0, 86.0, 66.0, 67.0, 38.0, 32.0, 37.0, 23.0, 15.0, 10.0, 5.0, 4.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.78466796875, -0.7642059326171875, -0.743743896484375, -0.7232818603515625, -0.70281982421875, -0.6823577880859375, -0.661895751953125, -0.6414337158203125, -0.6209716796875, -0.6005096435546875, -0.580047607421875, -0.5595855712890625, -0.53912353515625, -0.5186614990234375, -0.498199462890625, -0.4777374267578125, -0.457275390625, -0.4368133544921875, -0.416351318359375, -0.3958892822265625, -0.37542724609375, -0.3549652099609375, -0.334503173828125, -0.3140411376953125, -0.2935791015625, -0.2731170654296875, -0.252655029296875, -0.2321929931640625, -0.21173095703125, -0.1912689208984375, -0.170806884765625, -0.1503448486328125, -0.1298828125, -0.1094207763671875, -0.088958740234375, -0.0684967041015625, -0.04803466796875, -0.0275726318359375, -0.007110595703125, 0.0133514404296875, 0.0338134765625, 0.0542755126953125, 0.074737548828125, 0.0951995849609375, 0.11566162109375, 0.1361236572265625, 0.156585693359375, 0.1770477294921875, 0.197509765625, 0.2179718017578125, 0.238433837890625, 0.2588958740234375, 0.27935791015625, 0.2998199462890625, 0.320281982421875, 0.3407440185546875, 0.3612060546875, 0.3816680908203125, 0.402130126953125, 0.4225921630859375, 0.44305419921875, 0.4635162353515625, 0.483978271484375, 0.5044403076171875, 0.52490234375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 0.0, 8.0, 8.0, 15.0, 31.0, 43.0, 87.0, 112.0, 166.0, 153.0, 145.0, 102.0, 60.0, 33.0, 13.0, 11.0, 6.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-21.54706573486328, -21.114044189453125, -20.6810245513916, -20.248003005981445, -19.81498146057129, -19.381959915161133, -18.94894027709961, -18.515918731689453, -18.082897186279297, -17.64987564086914, -17.216856002807617, -16.78383445739746, -16.350812911987305, -15.917792320251465, -15.484770774841309, -15.051750183105469, -14.618728637695312, -14.185708045959473, -13.752686500549316, -13.319665908813477, -12.88664436340332, -12.45362377166748, -12.020602226257324, -11.587581634521484, -11.154561042785645, -10.721540451049805, -10.288518905639648, -9.855498313903809, -9.422476768493652, -8.989456176757812, -8.556434631347656, -8.123414039611816, -7.690392971038818, -7.25737190246582, -6.824350833892822, -6.391329765319824, -5.958308696746826, -5.525287628173828, -5.092267036437988, -4.659245491027832, -4.226224899291992, -3.793203830718994, -3.360182762145996, -2.927161693572998, -2.494140625, -2.061119794845581, -1.628098726272583, -1.195077657699585, -0.7620563507080078, -0.32903531193733215, 0.1039857268333435, 0.5370067358016968, 0.9700278043746948, 1.4030487537384033, 1.8360698223114014, 2.2690908908843994, 2.7021119594573975, 3.1351330280303955, 3.5681540966033936, 4.0011749267578125, 4.4341959953308105, 4.867217063903809, 5.300238132476807, 5.733259201049805, 6.166280269622803]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 6.0, 5.0, 2.0, 3.0, 3.0, 5.0, 6.0, 11.0, 9.0, 18.0, 21.0, 15.0, 16.0, 24.0, 28.0, 38.0, 32.0, 35.0, 26.0, 40.0, 44.0, 46.0, 54.0, 43.0, 53.0, 40.0, 46.0, 34.0, 46.0, 44.0, 24.0, 39.0, 22.0, 25.0, 16.0, 12.0, 16.0, 9.0, 12.0, 9.0, 5.0, 8.0, 5.0, 7.0, 5.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.958752155303955, -6.720723628997803, -6.482695579528809, -6.244667053222656, -6.006638526916504, -5.768610000610352, -5.530581474304199, -5.292553424835205, -5.054524898529053, -4.8164963722229, -4.578468322753906, -4.340439796447754, -4.102411270141602, -3.864382743835449, -3.626354455947876, -3.3883261680603027, -3.1502976417541504, -2.912269115447998, -2.674240827560425, -2.4362125396728516, -2.198184013366699, -1.9601556062698364, -1.7221271991729736, -1.4840987920761108, -1.246070384979248, -1.0080419778823853, -0.7700135707855225, -0.5319851636886597, -0.2939567565917969, -0.05592834949493408, 0.1821000576019287, 0.4201284646987915, 0.6581573486328125, 0.8961857557296753, 1.134214162826538, 1.3722425699234009, 1.6102709770202637, 1.8482993841171265, 2.0863277912139893, 2.3243560791015625, 2.562384605407715, 2.800413131713867, 3.0384414196014404, 3.2764697074890137, 3.514498233795166, 3.7525267601013184, 3.9905550479888916, 4.228583335876465, 4.466611862182617, 4.7046403884887695, 4.942668914794922, 5.180696964263916, 5.418725490570068, 5.656754016876221, 5.894782066345215, 6.132810592651367, 6.3708391189575195, 6.608867645263672, 6.846896171569824, 7.084924221038818, 7.322952747344971, 7.560981273651123, 7.799009323120117, 8.03703784942627, 8.275066375732422]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 5.0, 2.0, 1.0, 5.0, 4.0, 6.0, 10.0, 14.0, 23.0, 32.0, 45.0, 71.0, 94.0, 174.0, 353.0, 856.0, 2098.0, 5815.0, 19371.0, 91700.0, 1724204.0, 2216325.0, 101744.0, 20630.0, 6255.0, 2293.0, 1011.0, 501.0, 245.0, 129.0, 86.0, 51.0, 35.0, 28.0, 25.0, 8.0, 7.0, 6.0, 10.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-1.24609375, -1.2115097045898438, -1.1769256591796875, -1.1423416137695312, -1.107757568359375, -1.0731735229492188, -1.0385894775390625, -1.0040054321289062, -0.96942138671875, -0.9348373413085938, -0.9002532958984375, -0.8656692504882812, -0.831085205078125, -0.7965011596679688, -0.7619171142578125, -0.7273330688476562, -0.6927490234375, -0.6581649780273438, -0.6235809326171875, -0.5889968872070312, -0.554412841796875, -0.5198287963867188, -0.4852447509765625, -0.45066070556640625, -0.41607666015625, -0.38149261474609375, -0.3469085693359375, -0.31232452392578125, -0.277740478515625, -0.24315643310546875, -0.2085723876953125, -0.17398834228515625, -0.139404296875, -0.10482025146484375, -0.0702362060546875, -0.03565216064453125, -0.001068115234375, 0.03351593017578125, 0.0680999755859375, 0.10268402099609375, 0.13726806640625, 0.17185211181640625, 0.2064361572265625, 0.24102020263671875, 0.275604248046875, 0.31018829345703125, 0.3447723388671875, 0.37935638427734375, 0.4139404296875, 0.44852447509765625, 0.4831085205078125, 0.5176925659179688, 0.552276611328125, 0.5868606567382812, 0.6214447021484375, 0.6560287475585938, 0.69061279296875, 0.7251968383789062, 0.7597808837890625, 0.7943649291992188, 0.828948974609375, 0.8635330200195312, 0.8981170654296875, 0.9327011108398438, 0.96728515625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 5.0, 6.0, 11.0, 8.0, 17.0, 10.0, 9.0, 26.0, 27.0, 27.0, 35.0, 43.0, 46.0, 60.0, 56.0, 61.0, 68.0, 57.0, 66.0, 55.0, 54.0, 33.0, 46.0, 37.0, 31.0, 25.0, 22.0, 15.0, 15.0, 7.0, 6.0, 6.0, 8.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.379150390625, -0.3655204772949219, -0.35189056396484375, -0.3382606506347656, -0.3246307373046875, -0.3110008239746094, -0.29737091064453125, -0.2837409973144531, -0.270111083984375, -0.2564811706542969, -0.24285125732421875, -0.22922134399414062, -0.2155914306640625, -0.20196151733398438, -0.18833160400390625, -0.17470169067382812, -0.16107177734375, -0.14744186401367188, -0.13381195068359375, -0.12018203735351562, -0.1065521240234375, -0.09292221069335938, -0.07929229736328125, -0.06566238403320312, -0.052032470703125, -0.038402557373046875, -0.02477264404296875, -0.011142730712890625, 0.0024871826171875, 0.016117095947265625, 0.02974700927734375, 0.043376922607421875, 0.0570068359375, 0.07063674926757812, 0.08426666259765625, 0.09789657592773438, 0.1115264892578125, 0.12515640258789062, 0.13878631591796875, 0.15241622924804688, 0.166046142578125, 0.17967605590820312, 0.19330596923828125, 0.20693588256835938, 0.2205657958984375, 0.23419570922851562, 0.24782562255859375, 0.2614555358886719, 0.27508544921875, 0.2887153625488281, 0.30234527587890625, 0.3159751892089844, 0.3296051025390625, 0.3432350158691406, 0.35686492919921875, 0.3704948425292969, 0.384124755859375, 0.3977546691894531, 0.41138458251953125, 0.4250144958496094, 0.4386444091796875, 0.4522743225097656, 0.46590423583984375, 0.4795341491699219, 0.4931640625]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 9.0, 20.0, 24.0, 58.0, 119.0, 262.0, 834.0, 3716.0, 33525.0, 3267053.0, 864748.0, 19970.0, 2709.0, 741.0, 272.0, 110.0, 42.0, 17.0, 17.0, 10.0, 11.0, 7.0, 8.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.08203125, -2.009979248046875, -1.93792724609375, -1.865875244140625, -1.7938232421875, -1.721771240234375, -1.64971923828125, -1.577667236328125, -1.505615234375, -1.433563232421875, -1.36151123046875, -1.289459228515625, -1.2174072265625, -1.145355224609375, -1.07330322265625, -1.001251220703125, -0.92919921875, -0.857147216796875, -0.78509521484375, -0.713043212890625, -0.6409912109375, -0.568939208984375, -0.49688720703125, -0.424835205078125, -0.352783203125, -0.280731201171875, -0.20867919921875, -0.136627197265625, -0.0645751953125, 0.007476806640625, 0.07952880859375, 0.151580810546875, 0.2236328125, 0.295684814453125, 0.36773681640625, 0.439788818359375, 0.5118408203125, 0.583892822265625, 0.65594482421875, 0.727996826171875, 0.800048828125, 0.872100830078125, 0.94415283203125, 1.016204833984375, 1.0882568359375, 1.160308837890625, 1.23236083984375, 1.304412841796875, 1.37646484375, 1.448516845703125, 1.52056884765625, 1.592620849609375, 1.6646728515625, 1.736724853515625, 1.80877685546875, 1.880828857421875, 1.952880859375, 2.024932861328125, 2.09698486328125, 2.169036865234375, 2.2410888671875, 2.313140869140625, 2.38519287109375, 2.457244873046875, 2.529296875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 13.0, 23.0, 37.0, 84.0, 164.0, 424.0, 1120.0, 1330.0, 481.0, 174.0, 91.0, 50.0, 24.0, 15.0, 13.0, 8.0, 2.0, 4.0, 1.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4052734375, -1.3443756103515625, -1.283477783203125, -1.2225799560546875, -1.16168212890625, -1.1007843017578125, -1.039886474609375, -0.9789886474609375, -0.9180908203125, -0.8571929931640625, -0.796295166015625, -0.7353973388671875, -0.67449951171875, -0.6136016845703125, -0.552703857421875, -0.4918060302734375, -0.430908203125, -0.3700103759765625, -0.309112548828125, -0.2482147216796875, -0.18731689453125, -0.1264190673828125, -0.065521240234375, -0.0046234130859375, 0.0562744140625, 0.1171722412109375, 0.178070068359375, 0.2389678955078125, 0.29986572265625, 0.3607635498046875, 0.421661376953125, 0.4825592041015625, 0.54345703125, 0.6043548583984375, 0.665252685546875, 0.7261505126953125, 0.78704833984375, 0.8479461669921875, 0.908843994140625, 0.9697418212890625, 1.0306396484375, 1.0915374755859375, 1.152435302734375, 1.2133331298828125, 1.27423095703125, 1.3351287841796875, 1.396026611328125, 1.4569244384765625, 1.517822265625, 1.5787200927734375, 1.639617919921875, 1.7005157470703125, 1.76141357421875, 1.8223114013671875, 1.883209228515625, 1.9441070556640625, 2.0050048828125, 2.0659027099609375, 2.126800537109375, 2.1876983642578125, 2.24859619140625, 2.3094940185546875, 2.370391845703125, 2.4312896728515625, 2.4921875]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 5.0, 7.0, 17.0, 39.0, 90.0, 195.0, 192.0, 199.0, 131.0, 55.0, 27.0, 13.0, 17.0, 7.0, 5.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.360735893249512, -8.938557624816895, -8.516378402709961, -8.094200134277344, -7.672021865844727, -7.249843120574951, -6.827664375305176, -6.405486106872559, -5.983307361602783, -5.561128616333008, -5.138950347900391, -4.716771602630615, -4.29459285736084, -3.8724145889282227, -3.4502358436584473, -3.028057336807251, -2.6058788299560547, -2.1837003231048584, -1.7615216970443726, -1.3393430709838867, -0.9171645641326904, -0.49498605728149414, -0.07280731201171875, 0.34937119483947754, 0.7715497016906738, 1.1937282085418701, 1.615906834602356, 2.038085460662842, 2.460263967514038, 2.8824424743652344, 3.3046212196350098, 3.726799726486206, 4.148977279663086, 4.571156024932861, 4.9933342933654785, 5.415513038635254, 5.837691307067871, 6.2598700523376465, 6.682048797607422, 7.104227066040039, 7.5264058113098145, 7.94858455657959, 8.370762825012207, 8.79294204711914, 9.215120315551758, 9.637298583984375, 10.059476852416992, 10.481656074523926, 10.903834342956543, 11.32601261138916, 11.748191833496094, 12.170370101928711, 12.592548370361328, 13.014726638793945, 13.436905860900879, 13.859084129333496, 14.28126335144043, 14.703441619873047, 15.12562084197998, 15.547799110412598, 15.969977378845215, 16.39215660095215, 16.814334869384766, 17.236513137817383, 17.65869140625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 13.0, 27.0, 20.0, 31.0, 24.0, 33.0, 29.0, 33.0, 30.0, 46.0, 37.0, 48.0, 38.0, 50.0, 42.0, 38.0, 31.0, 44.0, 42.0, 41.0, 45.0, 24.0, 35.0, 30.0, 28.0, 18.0, 13.0, 14.0, 15.0, 9.0, 7.0, 8.0, 7.0, 12.0, 5.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0], "bins": [-5.8065924644470215, -5.6538519859313965, -5.5011115074157715, -5.348371505737305, -5.19563102722168, -5.042890548706055, -4.89015007019043, -4.737409591674805, -4.58466911315918, -4.431928634643555, -4.27918815612793, -4.126448154449463, -3.973707675933838, -3.820967197418213, -3.668226718902588, -3.515486240386963, -3.362746238708496, -3.210005760192871, -3.057265520095825, -2.9045250415802, -2.7517848014831543, -2.5990443229675293, -2.4463038444519043, -2.2935633659362793, -2.1408231258392334, -1.988082766532898, -1.8353424072265625, -1.6826019287109375, -1.529861569404602, -1.3771212100982666, -1.2243807315826416, -1.0716403722763062, -0.9188995361328125, -0.766159176826477, -0.6134187579154968, -0.460678368806839, -0.30793797969818115, -0.1551976203918457, -0.0024572014808654785, 0.15028321743011475, 0.3030235767364502, 0.45576396584510803, 0.6085043549537659, 0.7612447738647461, 0.9139851331710815, 1.066725492477417, 1.219465970993042, 1.3722063302993774, 1.524946689605713, 1.6776870489120483, 1.8304274082183838, 1.9831678867340088, 2.1359081268310547, 2.2886486053466797, 2.4413890838623047, 2.5941295623779297, 2.7468698024749756, 2.8996102809906006, 3.0523505210876465, 3.2050909996032715, 3.3578314781188965, 3.5105717182159424, 3.6633121967315674, 3.8160524368286133, 3.9687929153442383]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 9.0, 3.0, 2.0, 8.0, 16.0, 18.0, 30.0, 49.0, 68.0, 122.0, 248.0, 578.0, 1361.0, 3380.0, 10370.0, 37686.0, 159048.0, 477311.0, 268950.0, 64244.0, 16483.0, 5107.0, 1922.0, 768.0, 355.0, 180.0, 87.0, 51.0, 36.0, 25.0, 7.0, 14.0, 7.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-1.2978515625, -1.2643051147460938, -1.2307586669921875, -1.1972122192382812, -1.163665771484375, -1.1301193237304688, -1.0965728759765625, -1.0630264282226562, -1.02947998046875, -0.9959335327148438, -0.9623870849609375, -0.9288406372070312, -0.895294189453125, -0.8617477416992188, -0.8282012939453125, -0.7946548461914062, -0.7611083984375, -0.7275619506835938, -0.6940155029296875, -0.6604690551757812, -0.626922607421875, -0.5933761596679688, -0.5598297119140625, -0.5262832641601562, -0.49273681640625, -0.45919036865234375, -0.4256439208984375, -0.39209747314453125, -0.358551025390625, -0.32500457763671875, -0.2914581298828125, -0.25791168212890625, -0.224365234375, -0.19081878662109375, -0.1572723388671875, -0.12372589111328125, -0.090179443359375, -0.05663299560546875, -0.0230865478515625, 0.01045989990234375, 0.04400634765625, 0.07755279541015625, 0.1110992431640625, 0.14464569091796875, 0.178192138671875, 0.21173858642578125, 0.2452850341796875, 0.27883148193359375, 0.3123779296875, 0.34592437744140625, 0.3794708251953125, 0.41301727294921875, 0.446563720703125, 0.48011016845703125, 0.5136566162109375, 0.5472030639648438, 0.58074951171875, 0.6142959594726562, 0.6478424072265625, 0.6813888549804688, 0.714935302734375, 0.7484817504882812, 0.7820281982421875, 0.8155746459960938, 0.84912109375]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 8.0, 3.0, 6.0, 7.0, 6.0, 11.0, 14.0, 15.0, 14.0, 27.0, 27.0, 20.0, 20.0, 42.0, 36.0, 41.0, 52.0, 49.0, 54.0, 48.0, 55.0, 53.0, 41.0, 50.0, 39.0, 24.0, 35.0, 34.0, 31.0, 26.0, 25.0, 23.0, 14.0, 13.0, 12.0, 4.0, 8.0, 2.0, 4.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.33349609375, -0.32195281982421875, -0.3104095458984375, -0.29886627197265625, -0.287322998046875, -0.27577972412109375, -0.2642364501953125, -0.25269317626953125, -0.24114990234375, -0.22960662841796875, -0.2180633544921875, -0.20652008056640625, -0.194976806640625, -0.18343353271484375, -0.1718902587890625, -0.16034698486328125, -0.1488037109375, -0.13726043701171875, -0.1257171630859375, -0.11417388916015625, -0.102630615234375, -0.09108734130859375, -0.0795440673828125, -0.06800079345703125, -0.05645751953125, -0.04491424560546875, -0.0333709716796875, -0.02182769775390625, -0.010284423828125, 0.00125885009765625, 0.0128021240234375, 0.02434539794921875, 0.035888671875, 0.04743194580078125, 0.0589752197265625, 0.07051849365234375, 0.082061767578125, 0.09360504150390625, 0.1051483154296875, 0.11669158935546875, 0.12823486328125, 0.13977813720703125, 0.1513214111328125, 0.16286468505859375, 0.174407958984375, 0.18595123291015625, 0.1974945068359375, 0.20903778076171875, 0.2205810546875, 0.23212432861328125, 0.2436676025390625, 0.25521087646484375, 0.266754150390625, 0.27829742431640625, 0.2898406982421875, 0.30138397216796875, 0.31292724609375, 0.32447052001953125, 0.3360137939453125, 0.34755706787109375, 0.359100341796875, 0.37064361572265625, 0.3821868896484375, 0.39373016357421875, 0.4052734375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 4.0, 7.0, 12.0, 9.0, 23.0, 31.0, 37.0, 61.0, 106.0, 138.0, 237.0, 349.0, 557.0, 870.0, 1783.0, 4456.0, 21167.0, 229345.0, 697755.0, 75811.0, 9566.0, 2887.0, 1325.0, 769.0, 438.0, 264.0, 186.0, 106.0, 78.0, 43.0, 38.0, 32.0, 24.0, 13.0, 14.0, 3.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0], "bins": [-1.8408203125, -1.7951202392578125, -1.749420166015625, -1.7037200927734375, -1.65802001953125, -1.6123199462890625, -1.566619873046875, -1.5209197998046875, -1.4752197265625, -1.4295196533203125, -1.383819580078125, -1.3381195068359375, -1.29241943359375, -1.2467193603515625, -1.201019287109375, -1.1553192138671875, -1.109619140625, -1.0639190673828125, -1.018218994140625, -0.9725189208984375, -0.92681884765625, -0.8811187744140625, -0.835418701171875, -0.7897186279296875, -0.7440185546875, -0.6983184814453125, -0.652618408203125, -0.6069183349609375, -0.56121826171875, -0.5155181884765625, -0.469818115234375, -0.4241180419921875, -0.37841796875, -0.3327178955078125, -0.287017822265625, -0.2413177490234375, -0.19561767578125, -0.1499176025390625, -0.104217529296875, -0.0585174560546875, -0.0128173828125, 0.0328826904296875, 0.078582763671875, 0.1242828369140625, 0.16998291015625, 0.2156829833984375, 0.261383056640625, 0.3070831298828125, 0.352783203125, 0.3984832763671875, 0.444183349609375, 0.4898834228515625, 0.53558349609375, 0.5812835693359375, 0.626983642578125, 0.6726837158203125, 0.7183837890625, 0.7640838623046875, 0.809783935546875, 0.8554840087890625, 0.90118408203125, 0.9468841552734375, 0.992584228515625, 1.0382843017578125, 1.083984375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 7.0, 9.0, 14.0, 14.0, 9.0, 16.0, 24.0, 25.0, 32.0, 37.0, 38.0, 55.0, 48.0, 41.0, 43.0, 51.0, 52.0, 56.0, 60.0, 48.0, 50.0, 45.0, 36.0, 44.0, 26.0, 24.0, 21.0, 12.0, 15.0, 8.0, 11.0, 5.0, 2.0, 6.0, 4.0, 1.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.56640625, -1.514678955078125, -1.46295166015625, -1.411224365234375, -1.3594970703125, -1.307769775390625, -1.25604248046875, -1.204315185546875, -1.152587890625, -1.100860595703125, -1.04913330078125, -0.997406005859375, -0.9456787109375, -0.893951416015625, -0.84222412109375, -0.790496826171875, -0.73876953125, -0.687042236328125, -0.63531494140625, -0.583587646484375, -0.5318603515625, -0.480133056640625, -0.42840576171875, -0.376678466796875, -0.324951171875, -0.273223876953125, -0.22149658203125, -0.169769287109375, -0.1180419921875, -0.066314697265625, -0.01458740234375, 0.037139892578125, 0.0888671875, 0.140594482421875, 0.19232177734375, 0.244049072265625, 0.2957763671875, 0.347503662109375, 0.39923095703125, 0.450958251953125, 0.502685546875, 0.554412841796875, 0.60614013671875, 0.657867431640625, 0.7095947265625, 0.761322021484375, 0.81304931640625, 0.864776611328125, 0.91650390625, 0.968231201171875, 1.01995849609375, 1.071685791015625, 1.1234130859375, 1.175140380859375, 1.22686767578125, 1.278594970703125, 1.330322265625, 1.382049560546875, 1.43377685546875, 1.485504150390625, 1.5372314453125, 1.588958740234375, 1.64068603515625, 1.692413330078125, 1.744140625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 5.0, 5.0, 5.0, 4.0, 9.0, 12.0, 7.0, 13.0, 17.0, 19.0, 25.0, 51.0, 64.0, 121.0, 193.0, 370.0, 824.0, 2243.0, 8800.0, 67347.0, 726345.0, 215849.0, 19929.0, 3870.0, 1248.0, 504.0, 265.0, 137.0, 89.0, 35.0, 44.0, 22.0, 21.0, 20.0, 10.0, 7.0, 3.0, 4.0, 5.0, 4.0, 4.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.55517578125, -0.5364990234375, -0.517822265625, -0.4991455078125, -0.48046875, -0.4617919921875, -0.443115234375, -0.4244384765625, -0.40576171875, -0.3870849609375, -0.368408203125, -0.3497314453125, -0.3310546875, -0.3123779296875, -0.293701171875, -0.2750244140625, -0.25634765625, -0.2376708984375, -0.218994140625, -0.2003173828125, -0.181640625, -0.1629638671875, -0.144287109375, -0.1256103515625, -0.10693359375, -0.0882568359375, -0.069580078125, -0.0509033203125, -0.0322265625, -0.0135498046875, 0.005126953125, 0.0238037109375, 0.04248046875, 0.0611572265625, 0.079833984375, 0.0985107421875, 0.1171875, 0.1358642578125, 0.154541015625, 0.1732177734375, 0.19189453125, 0.2105712890625, 0.229248046875, 0.2479248046875, 0.2666015625, 0.2852783203125, 0.303955078125, 0.3226318359375, 0.34130859375, 0.3599853515625, 0.378662109375, 0.3973388671875, 0.416015625, 0.4346923828125, 0.453369140625, 0.4720458984375, 0.49072265625, 0.5093994140625, 0.528076171875, 0.5467529296875, 0.5654296875, 0.5841064453125, 0.602783203125, 0.6214599609375, 0.64013671875]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 1.0, 1.0, 5.0, 3.0, 1.0, 3.0, 8.0, 9.0, 14.0, 10.0, 12.0, 13.0, 19.0, 29.0, 38.0, 41.0, 69.0, 80.0, 78.0, 118.0, 91.0, 105.0, 48.0, 50.0, 34.0, 21.0, 33.0, 14.0, 17.0, 9.0, 9.0, 7.0, 4.0, 9.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010442733764648438, -0.00010148249566555023, -9.853765368461609e-05, -9.559281170368195e-05, -9.26479697227478e-05, -8.970312774181366e-05, -8.675828576087952e-05, -8.381344377994537e-05, -8.086860179901123e-05, -7.792375981807709e-05, -7.497891783714294e-05, -7.20340758562088e-05, -6.908923387527466e-05, -6.614439189434052e-05, -6.319954991340637e-05, -6.025470793247223e-05, -5.7309865951538086e-05, -5.436502397060394e-05, -5.14201819896698e-05, -4.847534000873566e-05, -4.5530498027801514e-05, -4.258565604686737e-05, -3.964081406593323e-05, -3.6695972084999084e-05, -3.375113010406494e-05, -3.08062881231308e-05, -2.7861446142196655e-05, -2.4916604161262512e-05, -2.197176218032837e-05, -1.9026920199394226e-05, -1.6082078218460083e-05, -1.313723623752594e-05, -1.0192394256591797e-05, -7.247552275657654e-06, -4.302710294723511e-06, -1.3578683137893677e-06, 1.5869736671447754e-06, 4.5318156480789185e-06, 7.4766576290130615e-06, 1.0421499609947205e-05, 1.3366341590881348e-05, 1.631118357181549e-05, 1.9256025552749634e-05, 2.2200867533683777e-05, 2.514570951461792e-05, 2.8090551495552063e-05, 3.1035393476486206e-05, 3.398023545742035e-05, 3.692507743835449e-05, 3.9869919419288635e-05, 4.281476140022278e-05, 4.575960338115692e-05, 4.8704445362091064e-05, 5.164928734302521e-05, 5.459412932395935e-05, 5.7538971304893494e-05, 6.048381328582764e-05, 6.342865526676178e-05, 6.637349724769592e-05, 6.931833922863007e-05, 7.226318120956421e-05, 7.520802319049835e-05, 7.81528651714325e-05, 8.109770715236664e-05, 8.404254913330078e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 6.0, 6.0, 16.0, 22.0, 32.0, 44.0, 83.0, 127.0, 248.0, 471.0, 1060.0, 3048.0, 14583.0, 194610.0, 767026.0, 56962.0, 6808.0, 1878.0, 740.0, 355.0, 184.0, 83.0, 63.0, 38.0, 23.0, 10.0, 11.0, 7.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68310546875, -0.6619491577148438, -0.6407928466796875, -0.6196365356445312, -0.598480224609375, -0.5773239135742188, -0.5561676025390625, -0.5350112915039062, -0.51385498046875, -0.49269866943359375, -0.4715423583984375, -0.45038604736328125, -0.429229736328125, -0.40807342529296875, -0.3869171142578125, -0.36576080322265625, -0.3446044921875, -0.32344818115234375, -0.3022918701171875, -0.28113555908203125, -0.259979248046875, -0.23882293701171875, -0.2176666259765625, -0.19651031494140625, -0.17535400390625, -0.15419769287109375, -0.1330413818359375, -0.11188507080078125, -0.090728759765625, -0.06957244873046875, -0.0484161376953125, -0.02725982666015625, -0.006103515625, 0.01505279541015625, 0.0362091064453125, 0.05736541748046875, 0.078521728515625, 0.09967803955078125, 0.1208343505859375, 0.14199066162109375, 0.16314697265625, 0.18430328369140625, 0.2054595947265625, 0.22661590576171875, 0.247772216796875, 0.26892852783203125, 0.2900848388671875, 0.31124114990234375, 0.3323974609375, 0.35355377197265625, 0.3747100830078125, 0.39586639404296875, 0.417022705078125, 0.43817901611328125, 0.4593353271484375, 0.48049163818359375, 0.50164794921875, 0.5228042602539062, 0.5439605712890625, 0.5651168823242188, 0.586273193359375, 0.6074295043945312, 0.6285858154296875, 0.6497421264648438, 0.6708984375]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 3.0, 0.0, 4.0, 3.0, 7.0, 2.0, 1.0, 7.0, 7.0, 11.0, 18.0, 14.0, 17.0, 30.0, 24.0, 35.0, 36.0, 44.0, 47.0, 56.0, 55.0, 73.0, 56.0, 50.0, 54.0, 57.0, 56.0, 38.0, 31.0, 37.0, 26.0, 26.0, 15.0, 12.0, 15.0, 10.0, 9.0, 4.0, 2.0, 5.0, 4.0, 2.0, 6.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.46337890625, -0.4516754150390625, -0.439971923828125, -0.4282684326171875, -0.41656494140625, -0.4048614501953125, -0.393157958984375, -0.3814544677734375, -0.3697509765625, -0.3580474853515625, -0.346343994140625, -0.3346405029296875, -0.32293701171875, -0.3112335205078125, -0.299530029296875, -0.2878265380859375, -0.276123046875, -0.2644195556640625, -0.252716064453125, -0.2410125732421875, -0.22930908203125, -0.2176055908203125, -0.205902099609375, -0.1941986083984375, -0.1824951171875, -0.1707916259765625, -0.159088134765625, -0.1473846435546875, -0.13568115234375, -0.1239776611328125, -0.112274169921875, -0.1005706787109375, -0.0888671875, -0.0771636962890625, -0.065460205078125, -0.0537567138671875, -0.04205322265625, -0.0303497314453125, -0.018646240234375, -0.0069427490234375, 0.0047607421875, 0.0164642333984375, 0.028167724609375, 0.0398712158203125, 0.05157470703125, 0.0632781982421875, 0.074981689453125, 0.0866851806640625, 0.098388671875, 0.1100921630859375, 0.121795654296875, 0.1334991455078125, 0.14520263671875, 0.1569061279296875, 0.168609619140625, 0.1803131103515625, 0.1920166015625, 0.2037200927734375, 0.215423583984375, 0.2271270751953125, 0.23883056640625, 0.2505340576171875, 0.262237548828125, 0.2739410400390625, 0.28564453125]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 7.0, 28.0, 20.0, 52.0, 106.0, 143.0, 178.0, 168.0, 139.0, 89.0, 34.0, 10.0, 13.0, 2.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-13.555559158325195, -13.2539644241333, -12.952369689941406, -12.650774955749512, -12.349180221557617, -12.047586441040039, -11.745991706848145, -11.44439697265625, -11.142802238464355, -10.841207504272461, -10.539612770080566, -10.238018035888672, -9.936424255371094, -9.6348295211792, -9.333234786987305, -9.03164005279541, -8.730045318603516, -8.428450584411621, -8.126855850219727, -7.82526159286499, -7.523666858673096, -7.222072124481201, -6.920477867126465, -6.61888313293457, -6.317288398742676, -6.015693664550781, -5.714098930358887, -5.41250467300415, -5.110909938812256, -4.809315204620361, -4.507720947265625, -4.2061262130737305, -3.904531955718994, -3.6029372215270996, -3.301342725753784, -2.9997482299804688, -2.698153495788574, -2.3965587615966797, -2.0949642658233643, -1.7933697700500488, -1.4917750358581543, -1.1901804208755493, -0.8885858058929443, -0.5869911909103394, -0.2853965759277344, 0.016198039054870605, 0.3177926540374756, 0.619387149810791, 0.9209818840026855, 1.2225764989852905, 1.5241711139678955, 1.8257657289505005, 2.1273603439331055, 2.428955078125, 2.7305495738983154, 3.032144069671631, 3.3337388038635254, 3.63533353805542, 3.9369280338287354, 4.238522529602051, 4.540117263793945, 4.84171199798584, 5.143306732177734, 5.444900989532471, 5.746495723724365]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 10.0, 5.0, 5.0, 7.0, 7.0, 17.0, 13.0, 21.0, 19.0, 29.0, 29.0, 36.0, 47.0, 39.0, 60.0, 81.0, 75.0, 83.0, 66.0, 44.0, 56.0, 44.0, 47.0, 44.0, 20.0, 22.0, 11.0, 17.0, 15.0, 9.0, 9.0, 6.0, 2.0, 1.0, 3.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-9.460855484008789, -9.210884094238281, -8.960912704467773, -8.710941314697266, -8.460969924926758, -8.21099853515625, -7.961026668548584, -7.711055278778076, -7.461083889007568, -7.2111124992370605, -6.961141109466553, -6.711169719696045, -6.461197853088379, -6.211226463317871, -5.961255073547363, -5.7112836837768555, -5.461312294006348, -5.21134090423584, -4.961369514465332, -4.711398124694824, -4.461426734924316, -4.211455345153809, -3.9614834785461426, -3.7115120887756348, -3.461540699005127, -3.211569309234619, -2.9615979194641113, -2.7116262912750244, -2.4616549015045166, -2.211683511734009, -1.9617120027542114, -1.711740493774414, -1.4617695808410645, -1.2117981910705566, -0.9618266820907593, -0.7118552327156067, -0.4618837833404541, -0.2119123935699463, 0.038059115409851074, 0.28803062438964844, 0.5380020141601562, 0.7879734635353088, 1.0379449129104614, 1.2879164218902588, 1.5378878116607666, 1.7878592014312744, 2.0378308296203613, 2.287802219390869, 2.537773609161377, 2.7877449989318848, 3.0377163887023926, 3.2876880168914795, 3.5376594066619873, 3.787630796432495, 4.037602424621582, 4.28757381439209, 4.537545204162598, 4.7875165939331055, 5.037487983703613, 5.287459373474121, 5.537430763244629, 5.787402153015137, 6.037374019622803, 6.2873454093933105, 6.537316799163818]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 4.0, 6.0, 7.0, 8.0, 19.0, 23.0, 26.0, 31.0, 42.0, 68.0, 65.0, 108.0, 146.0, 221.0, 474.0, 1182.0, 4962.0, 43933.0, 3606888.0, 514819.0, 16885.0, 2639.0, 736.0, 329.0, 191.0, 124.0, 91.0, 54.0, 53.0, 38.0, 30.0, 13.0, 11.0, 15.0, 7.0, 11.0, 6.0, 3.0, 5.0, 2.0, 5.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0], "bins": [-1.884765625, -1.832122802734375, -1.77947998046875, -1.726837158203125, -1.6741943359375, -1.621551513671875, -1.56890869140625, -1.516265869140625, -1.463623046875, -1.410980224609375, -1.35833740234375, -1.305694580078125, -1.2530517578125, -1.200408935546875, -1.14776611328125, -1.095123291015625, -1.04248046875, -0.989837646484375, -0.93719482421875, -0.884552001953125, -0.8319091796875, -0.779266357421875, -0.72662353515625, -0.673980712890625, -0.621337890625, -0.568695068359375, -0.51605224609375, -0.463409423828125, -0.4107666015625, -0.358123779296875, -0.30548095703125, -0.252838134765625, -0.2001953125, -0.147552490234375, -0.09490966796875, -0.042266845703125, 0.0103759765625, 0.063018798828125, 0.11566162109375, 0.168304443359375, 0.220947265625, 0.273590087890625, 0.32623291015625, 0.378875732421875, 0.4315185546875, 0.484161376953125, 0.53680419921875, 0.589447021484375, 0.64208984375, 0.694732666015625, 0.74737548828125, 0.800018310546875, 0.8526611328125, 0.905303955078125, 0.95794677734375, 1.010589599609375, 1.063232421875, 1.115875244140625, 1.16851806640625, 1.221160888671875, 1.2738037109375, 1.326446533203125, 1.37908935546875, 1.431732177734375, 1.484375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 5.0, 3.0, 4.0, 8.0, 12.0, 15.0, 17.0, 21.0, 21.0, 42.0, 47.0, 62.0, 80.0, 69.0, 69.0, 84.0, 86.0, 86.0, 62.0, 53.0, 39.0, 46.0, 27.0, 20.0, 10.0, 2.0, 8.0, 4.0, 1.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50927734375, -0.4910888671875, -0.472900390625, -0.4547119140625, -0.4365234375, -0.4183349609375, -0.400146484375, -0.3819580078125, -0.36376953125, -0.3455810546875, -0.327392578125, -0.3092041015625, -0.291015625, -0.2728271484375, -0.254638671875, -0.2364501953125, -0.21826171875, -0.2000732421875, -0.181884765625, -0.1636962890625, -0.1455078125, -0.1273193359375, -0.109130859375, -0.0909423828125, -0.07275390625, -0.0545654296875, -0.036376953125, -0.0181884765625, 0.0, 0.0181884765625, 0.036376953125, 0.0545654296875, 0.07275390625, 0.0909423828125, 0.109130859375, 0.1273193359375, 0.1455078125, 0.1636962890625, 0.181884765625, 0.2000732421875, 0.21826171875, 0.2364501953125, 0.254638671875, 0.2728271484375, 0.291015625, 0.3092041015625, 0.327392578125, 0.3455810546875, 0.36376953125, 0.3819580078125, 0.400146484375, 0.4183349609375, 0.4365234375, 0.4547119140625, 0.472900390625, 0.4910888671875, 0.50927734375, 0.5274658203125, 0.545654296875, 0.5638427734375, 0.58203125, 0.6002197265625, 0.618408203125, 0.6365966796875, 0.65478515625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 5.0, 8.0, 13.0, 14.0, 18.0, 24.0, 42.0, 72.0, 108.0, 152.0, 280.0, 571.0, 1101.0, 2353.0, 5288.0, 13897.0, 46963.0, 308845.0, 3205780.0, 512751.0, 64021.0, 18409.0, 7136.0, 3131.0, 1520.0, 781.0, 422.0, 216.0, 127.0, 75.0, 40.0, 39.0, 19.0, 13.0, 11.0, 11.0, 7.0, 4.0, 3.0, 4.0, 4.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.84375, -0.8193588256835938, -0.7949676513671875, -0.7705764770507812, -0.746185302734375, -0.7217941284179688, -0.6974029541015625, -0.6730117797851562, -0.64862060546875, -0.6242294311523438, -0.5998382568359375, -0.5754470825195312, -0.551055908203125, -0.5266647338867188, -0.5022735595703125, -0.47788238525390625, -0.4534912109375, -0.42910003662109375, -0.4047088623046875, -0.38031768798828125, -0.355926513671875, -0.33153533935546875, -0.3071441650390625, -0.28275299072265625, -0.25836181640625, -0.23397064208984375, -0.2095794677734375, -0.18518829345703125, -0.160797119140625, -0.13640594482421875, -0.1120147705078125, -0.08762359619140625, -0.063232421875, -0.03884124755859375, -0.0144500732421875, 0.00994110107421875, 0.034332275390625, 0.05872344970703125, 0.0831146240234375, 0.10750579833984375, 0.13189697265625, 0.15628814697265625, 0.1806793212890625, 0.20507049560546875, 0.229461669921875, 0.25385284423828125, 0.2782440185546875, 0.30263519287109375, 0.3270263671875, 0.35141754150390625, 0.3758087158203125, 0.40019989013671875, 0.424591064453125, 0.44898223876953125, 0.4733734130859375, 0.49776458740234375, 0.52215576171875, 0.5465469360351562, 0.5709381103515625, 0.5953292846679688, 0.619720458984375, 0.6441116333007812, 0.6685028076171875, 0.6928939819335938, 0.71728515625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 2.0, 5.0, 7.0, 14.0, 17.0, 25.0, 43.0, 73.0, 97.0, 176.0, 339.0, 570.0, 860.0, 741.0, 455.0, 226.0, 137.0, 74.0, 48.0, 36.0, 21.0, 16.0, 13.0, 18.0, 9.0, 11.0, 6.0, 1.0, 4.0, 5.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.8662109375, -0.83892822265625, -0.8116455078125, -0.78436279296875, -0.757080078125, -0.72979736328125, -0.7025146484375, -0.67523193359375, -0.64794921875, -0.62066650390625, -0.5933837890625, -0.56610107421875, -0.538818359375, -0.51153564453125, -0.4842529296875, -0.45697021484375, -0.4296875, -0.40240478515625, -0.3751220703125, -0.34783935546875, -0.320556640625, -0.29327392578125, -0.2659912109375, -0.23870849609375, -0.21142578125, -0.18414306640625, -0.1568603515625, -0.12957763671875, -0.102294921875, -0.07501220703125, -0.0477294921875, -0.02044677734375, 0.0068359375, 0.03411865234375, 0.0614013671875, 0.08868408203125, 0.115966796875, 0.14324951171875, 0.1705322265625, 0.19781494140625, 0.22509765625, 0.25238037109375, 0.2796630859375, 0.30694580078125, 0.334228515625, 0.36151123046875, 0.3887939453125, 0.41607666015625, 0.443359375, 0.47064208984375, 0.4979248046875, 0.52520751953125, 0.552490234375, 0.57977294921875, 0.6070556640625, 0.63433837890625, 0.66162109375, 0.68890380859375, 0.7161865234375, 0.74346923828125, 0.770751953125, 0.79803466796875, 0.8253173828125, 0.85260009765625, 0.8798828125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 5.0, 8.0, 34.0, 127.0, 341.0, 311.0, 121.0, 28.0, 12.0, 6.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.42849349975586, -17.928373336791992, -17.428253173828125, -16.92813491821289, -16.428014755249023, -15.927894592285156, -15.427775382995605, -14.927656173706055, -14.427536010742188, -13.92741584777832, -13.42729663848877, -12.927177429199219, -12.427057266235352, -11.926937103271484, -11.426817893981934, -10.926698684692383, -10.426578521728516, -9.926458358764648, -9.426339149475098, -8.926219940185547, -8.42609977722168, -7.925980091094971, -7.425860404968262, -6.925740718841553, -6.425621032714844, -5.925501346588135, -5.425381660461426, -4.925261974334717, -4.425142288208008, -3.925022602081299, -3.42490291595459, -2.924783229827881, -2.424661636352539, -1.92454195022583, -1.424422264099121, -0.9243025779724121, -0.4241828918457031, 0.07593679428100586, 0.5760564804077148, 1.0761761665344238, 1.5762958526611328, 2.076415538787842, 2.576535224914551, 3.0766549110412598, 3.5767745971679688, 4.076894283294678, 4.577013969421387, 5.077133655548096, 5.577253341674805, 6.077373027801514, 6.577492713928223, 7.077612400054932, 7.577732086181641, 8.077852249145508, 8.577971458435059, 9.07809066772461, 9.578210830688477, 10.078330993652344, 10.578450202941895, 11.078569412231445, 11.578689575195312, 12.07880973815918, 12.57892894744873, 13.079048156738281, 13.579168319702148]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 0.0, 3.0, 4.0, 4.0, 12.0, 17.0, 18.0, 27.0, 40.0, 48.0, 33.0, 43.0, 50.0, 64.0, 50.0, 69.0, 61.0, 72.0, 48.0, 51.0, 47.0, 35.0, 47.0, 31.0, 27.0, 28.0, 16.0, 9.0, 9.0, 7.0, 9.0, 8.0, 5.0, 2.0, 2.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.135585784912109, -5.9689154624938965, -5.802245616912842, -5.635575294494629, -5.468905448913574, -5.302235126495361, -5.135564804077148, -4.968894958496094, -4.802224636077881, -4.635554313659668, -4.468884468078613, -4.3022141456604, -4.135544300079346, -3.968873977661133, -3.802203893661499, -3.6355338096618652, -3.4688637256622314, -3.3021936416625977, -3.135523557662964, -2.96885347366333, -2.802183151245117, -2.6355130672454834, -2.4688429832458496, -2.3021726608276367, -2.135502815246582, -1.9688327312469482, -1.802162528038025, -1.6354924440383911, -1.4688222408294678, -1.302152156829834, -1.1354820728302002, -0.9688118696212769, -0.8021416664123535, -0.635471522808075, -0.4688014090061188, -0.3021312952041626, -0.13546115159988403, 0.03120899200439453, 0.19787907600402832, 0.36454927921295166, 0.5312193632125854, 0.697889506816864, 0.8645596504211426, 1.0312297344207764, 1.1978998184204102, 1.3645700216293335, 1.5312401056289673, 1.6979103088378906, 1.8645803928375244, 2.031250476837158, 2.197920560836792, 2.364590644836426, 2.5312609672546387, 2.6979310512542725, 2.8646011352539062, 3.031271457672119, 3.197941303253174, 3.3646113872528076, 3.5312814712524414, 3.6979517936706543, 3.864621877670288, 4.031291961669922, 4.197961807250977, 4.3646321296691895, 4.531302452087402]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 2.0, 18.0, 13.0, 23.0, 45.0, 72.0, 132.0, 263.0, 607.0, 1273.0, 3343.0, 9624.0, 33429.0, 137254.0, 458068.0, 303430.0, 72172.0, 18940.0, 5952.0, 2152.0, 892.0, 352.0, 216.0, 100.0, 65.0, 29.0, 20.0, 18.0, 9.0, 11.0, 4.0, 6.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93994140625, -0.9083786010742188, -0.8768157958984375, -0.8452529907226562, -0.813690185546875, -0.7821273803710938, -0.7505645751953125, -0.7190017700195312, -0.68743896484375, -0.6558761596679688, -0.6243133544921875, -0.5927505493164062, -0.561187744140625, -0.5296249389648438, -0.4980621337890625, -0.46649932861328125, -0.4349365234375, -0.40337371826171875, -0.3718109130859375, -0.34024810791015625, -0.308685302734375, -0.27712249755859375, -0.2455596923828125, -0.21399688720703125, -0.18243408203125, -0.15087127685546875, -0.1193084716796875, -0.08774566650390625, -0.056182861328125, -0.02462005615234375, 0.0069427490234375, 0.03850555419921875, 0.070068359375, 0.10163116455078125, 0.1331939697265625, 0.16475677490234375, 0.196319580078125, 0.22788238525390625, 0.2594451904296875, 0.29100799560546875, 0.32257080078125, 0.35413360595703125, 0.3856964111328125, 0.41725921630859375, 0.448822021484375, 0.48038482666015625, 0.5119476318359375, 0.5435104370117188, 0.5750732421875, 0.6066360473632812, 0.6381988525390625, 0.6697616577148438, 0.701324462890625, 0.7328872680664062, 0.7644500732421875, 0.7960128784179688, 0.82757568359375, 0.8591384887695312, 0.8907012939453125, 0.9222640991210938, 0.953826904296875, 0.9853897094726562, 1.0169525146484375, 1.0485153198242188, 1.080078125]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 4.0, 1.0, 10.0, 8.0, 10.0, 13.0, 30.0, 22.0, 25.0, 30.0, 42.0, 45.0, 65.0, 69.0, 85.0, 83.0, 64.0, 75.0, 57.0, 59.0, 58.0, 42.0, 30.0, 32.0, 17.0, 14.0, 9.0, 3.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.44189453125, -0.42455291748046875, -0.4072113037109375, -0.38986968994140625, -0.372528076171875, -0.35518646240234375, -0.3378448486328125, -0.32050323486328125, -0.30316162109375, -0.28582000732421875, -0.2684783935546875, -0.25113677978515625, -0.233795166015625, -0.21645355224609375, -0.1991119384765625, -0.18177032470703125, -0.1644287109375, -0.14708709716796875, -0.1297454833984375, -0.11240386962890625, -0.095062255859375, -0.07772064208984375, -0.0603790283203125, -0.04303741455078125, -0.02569580078125, -0.00835418701171875, 0.0089874267578125, 0.02632904052734375, 0.043670654296875, 0.06101226806640625, 0.0783538818359375, 0.09569549560546875, 0.113037109375, 0.13037872314453125, 0.1477203369140625, 0.16506195068359375, 0.182403564453125, 0.19974517822265625, 0.2170867919921875, 0.23442840576171875, 0.25177001953125, 0.26911163330078125, 0.2864532470703125, 0.30379486083984375, 0.321136474609375, 0.33847808837890625, 0.3558197021484375, 0.37316131591796875, 0.3905029296875, 0.40784454345703125, 0.4251861572265625, 0.44252777099609375, 0.459869384765625, 0.47721099853515625, 0.4945526123046875, 0.5118942260742188, 0.52923583984375, 0.5465774536132812, 0.5639190673828125, 0.5812606811523438, 0.598602294921875, 0.6159439086914062, 0.6332855224609375, 0.6506271362304688, 0.66796875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 4.0, 6.0, 4.0, 13.0, 9.0, 17.0, 19.0, 47.0, 75.0, 90.0, 136.0, 228.0, 398.0, 727.0, 1636.0, 4645.0, 24028.0, 234389.0, 687622.0, 78835.0, 10362.0, 2737.0, 1164.0, 528.0, 289.0, 177.0, 124.0, 68.0, 50.0, 38.0, 19.0, 21.0, 16.0, 11.0, 3.0, 6.0, 4.0, 5.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4287109375, -1.383026123046875, -1.33734130859375, -1.291656494140625, -1.2459716796875, -1.200286865234375, -1.15460205078125, -1.108917236328125, -1.063232421875, -1.017547607421875, -0.97186279296875, -0.926177978515625, -0.8804931640625, -0.834808349609375, -0.78912353515625, -0.743438720703125, -0.69775390625, -0.652069091796875, -0.60638427734375, -0.560699462890625, -0.5150146484375, -0.469329833984375, -0.42364501953125, -0.377960205078125, -0.332275390625, -0.286590576171875, -0.24090576171875, -0.195220947265625, -0.1495361328125, -0.103851318359375, -0.05816650390625, -0.012481689453125, 0.033203125, 0.078887939453125, 0.12457275390625, 0.170257568359375, 0.2159423828125, 0.261627197265625, 0.30731201171875, 0.352996826171875, 0.398681640625, 0.444366455078125, 0.49005126953125, 0.535736083984375, 0.5814208984375, 0.627105712890625, 0.67279052734375, 0.718475341796875, 0.76416015625, 0.809844970703125, 0.85552978515625, 0.901214599609375, 0.9468994140625, 0.992584228515625, 1.03826904296875, 1.083953857421875, 1.129638671875, 1.175323486328125, 1.22100830078125, 1.266693115234375, 1.3123779296875, 1.358062744140625, 1.40374755859375, 1.449432373046875, 1.4951171875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 5.0, 8.0, 15.0, 19.0, 16.0, 29.0, 32.0, 29.0, 37.0, 35.0, 40.0, 48.0, 57.0, 64.0, 62.0, 64.0, 43.0, 52.0, 58.0, 50.0, 43.0, 35.0, 31.0, 30.0, 22.0, 16.0, 12.0, 14.0, 11.0, 2.0, 6.0, 3.0, 3.0, 3.0, 1.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.599609375, -1.542816162109375, -1.48602294921875, -1.429229736328125, -1.3724365234375, -1.315643310546875, -1.25885009765625, -1.202056884765625, -1.145263671875, -1.088470458984375, -1.03167724609375, -0.974884033203125, -0.9180908203125, -0.861297607421875, -0.80450439453125, -0.747711181640625, -0.69091796875, -0.634124755859375, -0.57733154296875, -0.520538330078125, -0.4637451171875, -0.406951904296875, -0.35015869140625, -0.293365478515625, -0.236572265625, -0.179779052734375, -0.12298583984375, -0.066192626953125, -0.0093994140625, 0.047393798828125, 0.10418701171875, 0.160980224609375, 0.2177734375, 0.274566650390625, 0.33135986328125, 0.388153076171875, 0.4449462890625, 0.501739501953125, 0.55853271484375, 0.615325927734375, 0.672119140625, 0.728912353515625, 0.78570556640625, 0.842498779296875, 0.8992919921875, 0.956085205078125, 1.01287841796875, 1.069671630859375, 1.12646484375, 1.183258056640625, 1.24005126953125, 1.296844482421875, 1.3536376953125, 1.410430908203125, 1.46722412109375, 1.524017333984375, 1.580810546875, 1.637603759765625, 1.69439697265625, 1.751190185546875, 1.8079833984375, 1.864776611328125, 1.92156982421875, 1.978363037109375, 2.03515625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 7.0, 4.0, 5.0, 6.0, 6.0, 15.0, 31.0, 64.0, 115.0, 291.0, 936.0, 5584.0, 220089.0, 804829.0, 14237.0, 1586.0, 408.0, 162.0, 75.0, 33.0, 22.0, 16.0, 9.0, 6.0, 8.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0439453125, -1.00933837890625, -0.9747314453125, -0.94012451171875, -0.905517578125, -0.87091064453125, -0.8363037109375, -0.80169677734375, -0.76708984375, -0.73248291015625, -0.6978759765625, -0.66326904296875, -0.628662109375, -0.59405517578125, -0.5594482421875, -0.52484130859375, -0.490234375, -0.45562744140625, -0.4210205078125, -0.38641357421875, -0.351806640625, -0.31719970703125, -0.2825927734375, -0.24798583984375, -0.21337890625, -0.17877197265625, -0.1441650390625, -0.10955810546875, -0.074951171875, -0.04034423828125, -0.0057373046875, 0.02886962890625, 0.0634765625, 0.09808349609375, 0.1326904296875, 0.16729736328125, 0.201904296875, 0.23651123046875, 0.2711181640625, 0.30572509765625, 0.34033203125, 0.37493896484375, 0.4095458984375, 0.44415283203125, 0.478759765625, 0.51336669921875, 0.5479736328125, 0.58258056640625, 0.6171875, 0.65179443359375, 0.6864013671875, 0.72100830078125, 0.755615234375, 0.79022216796875, 0.8248291015625, 0.85943603515625, 0.89404296875, 0.92864990234375, 0.9632568359375, 0.99786376953125, 1.032470703125, 1.06707763671875, 1.1016845703125, 1.13629150390625, 1.1708984375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 5.0, 7.0, 13.0, 18.0, 30.0, 45.0, 64.0, 108.0, 121.0, 139.0, 122.0, 110.0, 78.0, 44.0, 29.0, 26.0, 14.0, 9.0, 5.0, 5.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012564659118652344, -0.00012168660759925842, -0.00011772662401199341, -0.0001137666404247284, -0.00010980665683746338, -0.00010584667325019836, -0.00010188668966293335, -9.792670607566833e-05, -9.396672248840332e-05, -9.00067389011383e-05, -8.604675531387329e-05, -8.208677172660828e-05, -7.812678813934326e-05, -7.416680455207825e-05, -7.020682096481323e-05, -6.624683737754822e-05, -6.22868537902832e-05, -5.832687020301819e-05, -5.4366886615753174e-05, -5.040690302848816e-05, -4.6446919441223145e-05, -4.248693585395813e-05, -3.8526952266693115e-05, -3.45669686794281e-05, -3.0606985092163086e-05, -2.664700150489807e-05, -2.2687017917633057e-05, -1.8727034330368042e-05, -1.4767050743103027e-05, -1.0807067155838013e-05, -6.847083568572998e-06, -2.8870999813079834e-06, 1.0728836059570312e-06, 5.032867193222046e-06, 8.99285078048706e-06, 1.2952834367752075e-05, 1.691281795501709e-05, 2.0872801542282104e-05, 2.483278512954712e-05, 2.8792768716812134e-05, 3.275275230407715e-05, 3.671273589134216e-05, 4.067271947860718e-05, 4.463270306587219e-05, 4.859268665313721e-05, 5.255267024040222e-05, 5.6512653827667236e-05, 6.047263741493225e-05, 6.443262100219727e-05, 6.839260458946228e-05, 7.23525881767273e-05, 7.631257176399231e-05, 8.027255535125732e-05, 8.423253893852234e-05, 8.819252252578735e-05, 9.215250611305237e-05, 9.611248970031738e-05, 0.0001000724732875824, 0.00010403245687484741, 0.00010799244046211243, 0.00011195242404937744, 0.00011591240763664246, 0.00011987239122390747, 0.00012383237481117249, 0.0001277923583984375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 7.0, 9.0, 9.0, 12.0, 17.0, 29.0, 46.0, 68.0, 101.0, 150.0, 284.0, 596.0, 1228.0, 3278.0, 12226.0, 91897.0, 683452.0, 223140.0, 23736.0, 4915.0, 1719.0, 734.0, 372.0, 210.0, 117.0, 64.0, 50.0, 23.0, 14.0, 15.0, 15.0, 8.0, 3.0, 4.0, 4.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.56103515625, -0.54486083984375, -0.5286865234375, -0.51251220703125, -0.496337890625, -0.48016357421875, -0.4639892578125, -0.44781494140625, -0.431640625, -0.41546630859375, -0.3992919921875, -0.38311767578125, -0.366943359375, -0.35076904296875, -0.3345947265625, -0.31842041015625, -0.30224609375, -0.28607177734375, -0.2698974609375, -0.25372314453125, -0.237548828125, -0.22137451171875, -0.2052001953125, -0.18902587890625, -0.1728515625, -0.15667724609375, -0.1405029296875, -0.12432861328125, -0.108154296875, -0.09197998046875, -0.0758056640625, -0.05963134765625, -0.04345703125, -0.02728271484375, -0.0111083984375, 0.00506591796875, 0.021240234375, 0.03741455078125, 0.0535888671875, 0.06976318359375, 0.0859375, 0.10211181640625, 0.1182861328125, 0.13446044921875, 0.150634765625, 0.16680908203125, 0.1829833984375, 0.19915771484375, 0.21533203125, 0.23150634765625, 0.2476806640625, 0.26385498046875, 0.280029296875, 0.29620361328125, 0.3123779296875, 0.32855224609375, 0.3447265625, 0.36090087890625, 0.3770751953125, 0.39324951171875, 0.409423828125, 0.42559814453125, 0.4417724609375, 0.45794677734375, 0.47412109375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 10.0, 10.0, 10.0, 24.0, 25.0, 46.0, 40.0, 70.0, 90.0, 94.0, 105.0, 85.0, 91.0, 84.0, 59.0, 45.0, 37.0, 13.0, 21.0, 13.0, 9.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.66748046875, -0.6488418579101562, -0.6302032470703125, -0.6115646362304688, -0.592926025390625, -0.5742874145507812, -0.5556488037109375, -0.5370101928710938, -0.51837158203125, -0.49973297119140625, -0.4810943603515625, -0.46245574951171875, -0.443817138671875, -0.42517852783203125, -0.4065399169921875, -0.38790130615234375, -0.3692626953125, -0.35062408447265625, -0.3319854736328125, -0.31334686279296875, -0.294708251953125, -0.27606964111328125, -0.2574310302734375, -0.23879241943359375, -0.22015380859375, -0.20151519775390625, -0.1828765869140625, -0.16423797607421875, -0.145599365234375, -0.12696075439453125, -0.1083221435546875, -0.08968353271484375, -0.071044921875, -0.05240631103515625, -0.0337677001953125, -0.01512908935546875, 0.003509521484375, 0.02214813232421875, 0.0407867431640625, 0.05942535400390625, 0.07806396484375, 0.09670257568359375, 0.1153411865234375, 0.13397979736328125, 0.152618408203125, 0.17125701904296875, 0.1898956298828125, 0.20853424072265625, 0.2271728515625, 0.24581146240234375, 0.2644500732421875, 0.28308868408203125, 0.301727294921875, 0.32036590576171875, 0.3390045166015625, 0.35764312744140625, 0.37628173828125, 0.39492034912109375, 0.4135589599609375, 0.43219757080078125, 0.450836181640625, 0.46947479248046875, 0.4881134033203125, 0.5067520141601562, 0.525390625]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 6.0, 2.0, 6.0, 12.0, 40.0, 108.0, 223.0, 300.0, 182.0, 79.0, 23.0, 10.0, 6.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-19.266334533691406, -18.833110809326172, -18.399887084960938, -17.966663360595703, -17.53343963623047, -17.100215911865234, -16.6669921875, -16.233768463134766, -15.800543785095215, -15.36732006072998, -14.934096336364746, -14.500872611999512, -14.067647933959961, -13.634424209594727, -13.201200485229492, -12.767976760864258, -12.334753036499023, -11.901529312133789, -11.468305587768555, -11.03508186340332, -10.601858139038086, -10.168634414672852, -9.7354097366333, -9.302186012268066, -8.868962287902832, -8.435738563537598, -8.002514839172363, -7.569290637969971, -7.136066913604736, -6.702843189239502, -6.269618988037109, -5.836395263671875, -5.403171539306641, -4.969947814941406, -4.536724090576172, -4.103499889373779, -3.670276165008545, -3.2370524406433105, -2.803828477859497, -2.3706045150756836, -1.9373807907104492, -1.5041569471359253, -1.0709331035614014, -0.6377092599868774, -0.20448541641235352, 0.22873830795288086, 0.6619622707366943, 1.0951862335205078, 1.5284099578857422, 1.9616338014602661, 2.39485764503479, 2.8280816078186035, 3.261305332183838, 3.6945290565490723, 4.127753257751465, 4.560976982116699, 4.994200706481934, 5.427424430847168, 5.860648155212402, 6.293872356414795, 6.727096080780029, 7.160319805145264, 7.593544006347656, 8.02676773071289, 8.459991455078125]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 3.0, 10.0, 13.0, 22.0, 22.0, 31.0, 45.0, 55.0, 81.0, 76.0, 93.0, 107.0, 98.0, 74.0, 70.0, 53.0, 42.0, 33.0, 17.0, 16.0, 10.0, 16.0, 6.0, 7.0, 2.0, 3.0, 4.0], "bins": [-18.6641902923584, -18.279586791992188, -17.894981384277344, -17.510377883911133, -17.12577247619629, -16.741168975830078, -16.356563568115234, -15.971959114074707, -15.58735466003418, -15.202750205993652, -14.818145751953125, -14.433541297912598, -14.04893684387207, -13.664332389831543, -13.279727935791016, -12.895123481750488, -12.510519027709961, -12.125914573669434, -11.741310119628906, -11.356705665588379, -10.972101211547852, -10.587496757507324, -10.202892303466797, -9.81828784942627, -9.433684349060059, -9.049079895019531, -8.664475440979004, -8.279870986938477, -7.895266532897949, -7.510662078857422, -7.1260576248168945, -6.741453170776367, -6.35684871673584, -5.9722442626953125, -5.587639808654785, -5.203035354614258, -4.8184309005737305, -4.433826446533203, -4.049221992492676, -3.6646177768707275, -3.2800133228302, -2.895408868789673, -2.5108044147491455, -2.1262001991271973, -1.7415956258773804, -1.356991171836853, -0.9723868370056152, -0.5877823829650879, -0.20317792892456055, 0.1814264953136444, 0.5660309195518494, 0.9506353139877319, 1.3352397680282593, 1.7198442220687866, 2.1044485569000244, 2.4890530109405518, 2.873657464981079, 3.2582619190216064, 3.642866373062134, 4.027470588684082, 4.412075042724609, 4.796679496765137, 5.181283950805664, 5.565888404846191, 5.950492858886719]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 7.0, 2.0, 6.0, 6.0, 9.0, 7.0, 9.0, 16.0, 20.0, 18.0, 36.0, 38.0, 67.0, 90.0, 162.0, 283.0, 720.0, 2045.0, 7367.0, 37705.0, 400046.0, 3366171.0, 331779.0, 35427.0, 7881.0, 2441.0, 1028.0, 422.0, 224.0, 104.0, 57.0, 39.0, 14.0, 17.0, 8.0, 6.0, 4.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.08984375, -1.0622329711914062, -1.0346221923828125, -1.0070114135742188, -0.979400634765625, -0.9517898559570312, -0.9241790771484375, -0.8965682983398438, -0.86895751953125, -0.8413467407226562, -0.8137359619140625, -0.7861251831054688, -0.758514404296875, -0.7309036254882812, -0.7032928466796875, -0.6756820678710938, -0.6480712890625, -0.6204605102539062, -0.5928497314453125, -0.5652389526367188, -0.537628173828125, -0.5100173950195312, -0.4824066162109375, -0.45479583740234375, -0.42718505859375, -0.39957427978515625, -0.3719635009765625, -0.34435272216796875, -0.316741943359375, -0.28913116455078125, -0.2615203857421875, -0.23390960693359375, -0.206298828125, -0.17868804931640625, -0.1510772705078125, -0.12346649169921875, -0.095855712890625, -0.06824493408203125, -0.0406341552734375, -0.01302337646484375, 0.01458740234375, 0.04219818115234375, 0.0698089599609375, 0.09741973876953125, 0.125030517578125, 0.15264129638671875, 0.1802520751953125, 0.20786285400390625, 0.2354736328125, 0.26308441162109375, 0.2906951904296875, 0.31830596923828125, 0.345916748046875, 0.37352752685546875, 0.4011383056640625, 0.42874908447265625, 0.45635986328125, 0.48397064208984375, 0.5115814208984375, 0.5391921997070312, 0.566802978515625, 0.5944137573242188, 0.6220245361328125, 0.6496353149414062, 0.67724609375]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 19.0, 22.0, 33.0, 56.0, 94.0, 125.0, 155.0, 131.0, 135.0, 94.0, 64.0, 34.0, 28.0, 7.0, 8.0, 3.0, 3.0], "bins": [-1.650390625, -1.6197929382324219, -1.5891952514648438, -1.5585975646972656, -1.5279998779296875, -1.4974021911621094, -1.4668045043945312, -1.4362068176269531, -1.405609130859375, -1.3750114440917969, -1.3444137573242188, -1.3138160705566406, -1.2832183837890625, -1.2526206970214844, -1.2220230102539062, -1.1914253234863281, -1.16082763671875, -1.1302299499511719, -1.0996322631835938, -1.0690345764160156, -1.0384368896484375, -1.0078392028808594, -0.9772415161132812, -0.9466438293457031, -0.916046142578125, -0.8854484558105469, -0.8548507690429688, -0.8242530822753906, -0.7936553955078125, -0.7630577087402344, -0.7324600219726562, -0.7018623352050781, -0.6712646484375, -0.6406669616699219, -0.6100692749023438, -0.5794715881347656, -0.5488739013671875, -0.5182762145996094, -0.48767852783203125, -0.4570808410644531, -0.426483154296875, -0.3958854675292969, -0.36528778076171875, -0.3346900939941406, -0.3040924072265625, -0.2734947204589844, -0.24289703369140625, -0.21229934692382812, -0.18170166015625, -0.15110397338867188, -0.12050628662109375, -0.08990859985351562, -0.0593109130859375, -0.028713226318359375, 0.00188446044921875, 0.032482147216796875, 0.063079833984375, 0.09367752075195312, 0.12427520751953125, 0.15487289428710938, 0.1854705810546875, 0.21606826782226562, 0.24666595458984375, 0.2772636413574219, 0.307861328125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 7.0, 7.0, 11.0, 17.0, 33.0, 24.0, 24.0, 36.0, 76.0, 130.0, 185.0, 325.0, 723.0, 1841.0, 5041.0, 16946.0, 77204.0, 1001704.0, 2875080.0, 170930.0, 30809.0, 8128.0, 2672.0, 1074.0, 546.0, 255.0, 147.0, 99.0, 73.0, 47.0, 30.0, 24.0, 8.0, 9.0, 8.0, 5.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.7705078125, -0.7431640625, -0.7158203125, -0.6884765625, -0.6611328125, -0.6337890625, -0.6064453125, -0.5791015625, -0.5517578125, -0.5244140625, -0.4970703125, -0.4697265625, -0.4423828125, -0.4150390625, -0.3876953125, -0.3603515625, -0.3330078125, -0.3056640625, -0.2783203125, -0.2509765625, -0.2236328125, -0.1962890625, -0.1689453125, -0.1416015625, -0.1142578125, -0.0869140625, -0.0595703125, -0.0322265625, -0.0048828125, 0.0224609375, 0.0498046875, 0.0771484375, 0.1044921875, 0.1318359375, 0.1591796875, 0.1865234375, 0.2138671875, 0.2412109375, 0.2685546875, 0.2958984375, 0.3232421875, 0.3505859375, 0.3779296875, 0.4052734375, 0.4326171875, 0.4599609375, 0.4873046875, 0.5146484375, 0.5419921875, 0.5693359375, 0.5966796875, 0.6240234375, 0.6513671875, 0.6787109375, 0.7060546875, 0.7333984375, 0.7607421875, 0.7880859375, 0.8154296875, 0.8427734375, 0.8701171875, 0.8974609375, 0.9248046875, 0.9521484375, 0.9794921875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 4.0, 6.0, 2.0, 6.0, 7.0, 12.0, 14.0, 24.0, 39.0, 46.0, 60.0, 103.0, 199.0, 295.0, 566.0, 780.0, 702.0, 430.0, 236.0, 180.0, 90.0, 84.0, 43.0, 30.0, 30.0, 21.0, 17.0, 10.0, 12.0, 9.0, 6.0, 2.0, 4.0, 1.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.91796875, -0.8881607055664062, -0.8583526611328125, -0.8285446166992188, -0.798736572265625, -0.7689285278320312, -0.7391204833984375, -0.7093124389648438, -0.67950439453125, -0.6496963500976562, -0.6198883056640625, -0.5900802612304688, -0.560272216796875, -0.5304641723632812, -0.5006561279296875, -0.47084808349609375, -0.4410400390625, -0.41123199462890625, -0.3814239501953125, -0.35161590576171875, -0.321807861328125, -0.29199981689453125, -0.2621917724609375, -0.23238372802734375, -0.20257568359375, -0.17276763916015625, -0.1429595947265625, -0.11315155029296875, -0.083343505859375, -0.05353546142578125, -0.0237274169921875, 0.00608062744140625, 0.035888671875, 0.06569671630859375, 0.0955047607421875, 0.12531280517578125, 0.155120849609375, 0.18492889404296875, 0.2147369384765625, 0.24454498291015625, 0.27435302734375, 0.30416107177734375, 0.3339691162109375, 0.36377716064453125, 0.393585205078125, 0.42339324951171875, 0.4532012939453125, 0.48300933837890625, 0.5128173828125, 0.5426254272460938, 0.5724334716796875, 0.6022415161132812, 0.632049560546875, 0.6618576049804688, 0.6916656494140625, 0.7214736938476562, 0.75128173828125, 0.7810897827148438, 0.8108978271484375, 0.8407058715820312, 0.870513916015625, 0.9003219604492188, 0.9301300048828125, 0.9599380493164062, 0.98974609375]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 7.0, 11.0, 30.0, 60.0, 165.0, 250.0, 208.0, 153.0, 54.0, 33.0, 11.0, 4.0, 5.0, 5.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.791822910308838, -7.416045188903809, -7.040267467498779, -6.66448974609375, -6.2887115478515625, -5.912933826446533, -5.537156105041504, -5.161377906799316, -4.785600662231445, -4.409822940826416, -4.034045219421387, -3.6582672595977783, -3.28248929977417, -2.9067115783691406, -2.5309338569641113, -2.155155897140503, -1.7793779373168945, -1.4036000967025757, -1.0278222560882568, -0.6520445346832275, -0.2762666940689087, 0.09951114654541016, 0.47528886795043945, 0.8510668277740479, 1.2268445491790771, 1.602622389793396, 1.9784002304077148, 2.354177951812744, 2.7299556732177734, 3.105733633041382, 3.481511354446411, 3.8572893142700195, 4.233067512512207, 4.608845233917236, 4.984622955322266, 5.360401153564453, 5.736178874969482, 6.111956596374512, 6.487734317779541, 6.86351203918457, 7.239290237426758, 7.615067958831787, 7.990845680236816, 8.366623878479004, 8.742401123046875, 9.118179321289062, 9.49395751953125, 9.869734764099121, 10.245512008666992, 10.62129020690918, 10.99706745147705, 11.372845649719238, 11.74862289428711, 12.124401092529297, 12.500179290771484, 12.875956535339355, 13.251734733581543, 13.62751293182373, 14.003290176391602, 14.379068374633789, 14.75484561920166, 15.130623817443848, 15.506401062011719, 15.882179260253906, 16.257957458496094]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 8.0, 9.0, 9.0, 19.0, 21.0, 47.0, 53.0, 46.0, 71.0, 92.0, 73.0, 98.0, 88.0, 83.0, 77.0, 56.0, 51.0, 29.0, 26.0, 25.0, 9.0, 9.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.828957557678223, -5.5522847175598145, -5.275611877441406, -4.998939037322998, -4.72226619720459, -4.445592880249023, -4.168920040130615, -3.892247200012207, -3.615574359893799, -3.3389015197753906, -3.0622286796569824, -2.785555601119995, -2.508882761001587, -2.2322099208831787, -1.955536961555481, -1.6788640022277832, -1.402191162109375, -1.1255183219909668, -0.848845362663269, -0.5721724629402161, -0.2954995632171631, -0.018826723098754883, 0.25784623622894287, 0.5345191955566406, 0.8111920356750488, 1.087864875793457, 1.3645378351211548, 1.6412107944488525, 1.9178836345672607, 2.194556474685669, 2.4712295532226562, 2.7479023933410645, 3.024576187133789, 3.3012490272521973, 3.5779218673706055, 3.8545949459075928, 4.131267547607422, 4.407940864562988, 4.6846137046813965, 4.961286544799805, 5.237959384918213, 5.514632225036621, 5.791305065155029, 6.0679779052734375, 6.344651222229004, 6.621323585510254, 6.89799690246582, 7.1746697425842285, 7.451342582702637, 7.728015422821045, 8.004688262939453, 8.28136157989502, 8.55803394317627, 8.834707260131836, 9.111379623413086, 9.388052940368652, 9.664726257324219, 9.941399574279785, 10.218071937561035, 10.494745254516602, 10.771417617797852, 11.048090934753418, 11.324763298034668, 11.601436614990234, 11.878108978271484]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 7.0, 4.0, 9.0, 10.0, 14.0, 15.0, 34.0, 50.0, 60.0, 113.0, 262.0, 511.0, 1236.0, 3310.0, 10908.0, 43911.0, 185602.0, 460690.0, 256569.0, 62853.0, 15083.0, 4465.0, 1485.0, 644.0, 283.0, 158.0, 98.0, 64.0, 38.0, 23.0, 17.0, 9.0, 7.0, 4.0, 5.0, 3.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.9814453125, -0.9538955688476562, -0.9263458251953125, -0.8987960815429688, -0.871246337890625, -0.8436965942382812, -0.8161468505859375, -0.7885971069335938, -0.76104736328125, -0.7334976196289062, -0.7059478759765625, -0.6783981323242188, -0.650848388671875, -0.6232986450195312, -0.5957489013671875, -0.5681991577148438, -0.5406494140625, -0.5130996704101562, -0.4855499267578125, -0.45800018310546875, -0.430450439453125, -0.40290069580078125, -0.3753509521484375, -0.34780120849609375, -0.32025146484375, -0.29270172119140625, -0.2651519775390625, -0.23760223388671875, -0.210052490234375, -0.18250274658203125, -0.1549530029296875, -0.12740325927734375, -0.099853515625, -0.07230377197265625, -0.0447540283203125, -0.01720428466796875, 0.010345458984375, 0.03789520263671875, 0.0654449462890625, 0.09299468994140625, 0.12054443359375, 0.14809417724609375, 0.1756439208984375, 0.20319366455078125, 0.230743408203125, 0.25829315185546875, 0.2858428955078125, 0.31339263916015625, 0.3409423828125, 0.36849212646484375, 0.3960418701171875, 0.42359161376953125, 0.451141357421875, 0.47869110107421875, 0.5062408447265625, 0.5337905883789062, 0.56134033203125, 0.5888900756835938, 0.6164398193359375, 0.6439895629882812, 0.671539306640625, 0.6990890502929688, 0.7266387939453125, 0.7541885375976562, 0.78173828125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 5.0, 2.0, 3.0, 14.0, 11.0, 19.0, 16.0, 23.0, 21.0, 32.0, 31.0, 45.0, 39.0, 55.0, 60.0, 56.0, 76.0, 50.0, 66.0, 57.0, 62.0, 47.0, 39.0, 29.0, 37.0, 23.0, 19.0, 19.0, 17.0, 15.0, 3.0, 4.0, 5.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.52392578125, -0.5107879638671875, -0.497650146484375, -0.4845123291015625, -0.47137451171875, -0.4582366943359375, -0.445098876953125, -0.4319610595703125, -0.4188232421875, -0.4056854248046875, -0.392547607421875, -0.3794097900390625, -0.36627197265625, -0.3531341552734375, -0.339996337890625, -0.3268585205078125, -0.313720703125, -0.3005828857421875, -0.287445068359375, -0.2743072509765625, -0.26116943359375, -0.2480316162109375, -0.234893798828125, -0.2217559814453125, -0.2086181640625, -0.1954803466796875, -0.182342529296875, -0.1692047119140625, -0.15606689453125, -0.1429290771484375, -0.129791259765625, -0.1166534423828125, -0.103515625, -0.0903778076171875, -0.077239990234375, -0.0641021728515625, -0.05096435546875, -0.0378265380859375, -0.024688720703125, -0.0115509033203125, 0.0015869140625, 0.0147247314453125, 0.027862548828125, 0.0410003662109375, 0.05413818359375, 0.0672760009765625, 0.080413818359375, 0.0935516357421875, 0.106689453125, 0.1198272705078125, 0.132965087890625, 0.1461029052734375, 0.15924072265625, 0.1723785400390625, 0.185516357421875, 0.1986541748046875, 0.2117919921875, 0.2249298095703125, 0.238067626953125, 0.2512054443359375, 0.26434326171875, 0.2774810791015625, 0.290618896484375, 0.3037567138671875, 0.31689453125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 4.0, 5.0, 3.0, 8.0, 13.0, 17.0, 27.0, 20.0, 46.0, 41.0, 80.0, 100.0, 142.0, 169.0, 275.0, 359.0, 518.0, 813.0, 1315.0, 2586.0, 7046.0, 37800.0, 332249.0, 589506.0, 58574.0, 9443.0, 3062.0, 1432.0, 921.0, 555.0, 383.0, 249.0, 221.0, 164.0, 101.0, 84.0, 61.0, 47.0, 37.0, 20.0, 15.0, 15.0, 9.0, 9.0, 5.0, 3.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-1.2529296875, -1.2172393798828125, -1.181549072265625, -1.1458587646484375, -1.11016845703125, -1.0744781494140625, -1.038787841796875, -1.0030975341796875, -0.9674072265625, -0.9317169189453125, -0.896026611328125, -0.8603363037109375, -0.82464599609375, -0.7889556884765625, -0.753265380859375, -0.7175750732421875, -0.681884765625, -0.6461944580078125, -0.610504150390625, -0.5748138427734375, -0.53912353515625, -0.5034332275390625, -0.467742919921875, -0.4320526123046875, -0.3963623046875, -0.3606719970703125, -0.324981689453125, -0.2892913818359375, -0.25360107421875, -0.2179107666015625, -0.182220458984375, -0.1465301513671875, -0.11083984375, -0.0751495361328125, -0.039459228515625, -0.0037689208984375, 0.03192138671875, 0.0676116943359375, 0.103302001953125, 0.1389923095703125, 0.1746826171875, 0.2103729248046875, 0.246063232421875, 0.2817535400390625, 0.31744384765625, 0.3531341552734375, 0.388824462890625, 0.4245147705078125, 0.460205078125, 0.4958953857421875, 0.531585693359375, 0.5672760009765625, 0.60296630859375, 0.6386566162109375, 0.674346923828125, 0.7100372314453125, 0.7457275390625, 0.7814178466796875, 0.817108154296875, 0.8527984619140625, 0.88848876953125, 0.9241790771484375, 0.959869384765625, 0.9955596923828125, 1.03125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 4.0, 3.0, 4.0, 7.0, 5.0, 16.0, 9.0, 16.0, 11.0, 24.0, 29.0, 34.0, 36.0, 35.0, 44.0, 58.0, 46.0, 43.0, 47.0, 71.0, 41.0, 39.0, 48.0, 44.0, 45.0, 38.0, 27.0, 29.0, 30.0, 16.0, 16.0, 19.0, 12.0, 17.0, 9.0, 11.0, 12.0, 5.0, 1.0, 2.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.705078125, -1.6544189453125, -1.603759765625, -1.5531005859375, -1.50244140625, -1.4517822265625, -1.401123046875, -1.3504638671875, -1.2998046875, -1.2491455078125, -1.198486328125, -1.1478271484375, -1.09716796875, -1.0465087890625, -0.995849609375, -0.9451904296875, -0.89453125, -0.8438720703125, -0.793212890625, -0.7425537109375, -0.69189453125, -0.6412353515625, -0.590576171875, -0.5399169921875, -0.4892578125, -0.4385986328125, -0.387939453125, -0.3372802734375, -0.28662109375, -0.2359619140625, -0.185302734375, -0.1346435546875, -0.083984375, -0.0333251953125, 0.017333984375, 0.0679931640625, 0.11865234375, 0.1693115234375, 0.219970703125, 0.2706298828125, 0.3212890625, 0.3719482421875, 0.422607421875, 0.4732666015625, 0.52392578125, 0.5745849609375, 0.625244140625, 0.6759033203125, 0.7265625, 0.7772216796875, 0.827880859375, 0.8785400390625, 0.92919921875, 0.9798583984375, 1.030517578125, 1.0811767578125, 1.1318359375, 1.1824951171875, 1.233154296875, 1.2838134765625, 1.33447265625, 1.3851318359375, 1.435791015625, 1.4864501953125, 1.537109375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 7.0, 14.0, 13.0, 28.0, 31.0, 72.0, 135.0, 250.0, 651.0, 2031.0, 10794.0, 145599.0, 827217.0, 53713.0, 5759.0, 1334.0, 450.0, 186.0, 89.0, 84.0, 34.0, 22.0, 18.0, 7.0, 9.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.47314453125, -0.4544677734375, -0.435791015625, -0.4171142578125, -0.3984375, -0.3797607421875, -0.361083984375, -0.3424072265625, -0.32373046875, -0.3050537109375, -0.286376953125, -0.2677001953125, -0.2490234375, -0.2303466796875, -0.211669921875, -0.1929931640625, -0.17431640625, -0.1556396484375, -0.136962890625, -0.1182861328125, -0.099609375, -0.0809326171875, -0.062255859375, -0.0435791015625, -0.02490234375, -0.0062255859375, 0.012451171875, 0.0311279296875, 0.0498046875, 0.0684814453125, 0.087158203125, 0.1058349609375, 0.12451171875, 0.1431884765625, 0.161865234375, 0.1805419921875, 0.19921875, 0.2178955078125, 0.236572265625, 0.2552490234375, 0.27392578125, 0.2926025390625, 0.311279296875, 0.3299560546875, 0.3486328125, 0.3673095703125, 0.385986328125, 0.4046630859375, 0.42333984375, 0.4420166015625, 0.460693359375, 0.4793701171875, 0.498046875, 0.5167236328125, 0.535400390625, 0.5540771484375, 0.57275390625, 0.5914306640625, 0.610107421875, 0.6287841796875, 0.6474609375, 0.6661376953125, 0.684814453125, 0.7034912109375, 0.72216796875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 1.0, 2.0, 3.0, 5.0, 10.0, 1.0, 6.0, 10.0, 15.0, 15.0, 19.0, 38.0, 43.0, 46.0, 63.0, 61.0, 76.0, 79.0, 92.0, 94.0, 57.0, 44.0, 47.0, 34.0, 26.0, 18.0, 15.0, 18.0, 19.0, 9.0, 4.0, 8.0, 2.0, 7.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.867813110351562e-05, -7.628742605447769e-05, -7.389672100543976e-05, -7.150601595640182e-05, -6.911531090736389e-05, -6.672460585832596e-05, -6.433390080928802e-05, -6.194319576025009e-05, -5.955249071121216e-05, -5.7161785662174225e-05, -5.477108061313629e-05, -5.238037556409836e-05, -4.9989670515060425e-05, -4.759896546602249e-05, -4.520826041698456e-05, -4.2817555367946625e-05, -4.042685031890869e-05, -3.803614526987076e-05, -3.5645440220832825e-05, -3.325473517179489e-05, -3.086403012275696e-05, -2.8473325073719025e-05, -2.608262002468109e-05, -2.3691914975643158e-05, -2.1301209926605225e-05, -1.891050487756729e-05, -1.6519799828529358e-05, -1.4129094779491425e-05, -1.1738389730453491e-05, -9.347684681415558e-06, -6.9569796323776245e-06, -4.566274583339691e-06, -2.175569534301758e-06, 2.1513551473617554e-07, 2.605840563774109e-06, 4.996545612812042e-06, 7.387250661849976e-06, 9.777955710887909e-06, 1.2168660759925842e-05, 1.4559365808963776e-05, 1.695007085800171e-05, 1.9340775907039642e-05, 2.1731480956077576e-05, 2.412218600511551e-05, 2.6512891054153442e-05, 2.8903596103191376e-05, 3.129430115222931e-05, 3.368500620126724e-05, 3.6075711250305176e-05, 3.846641629934311e-05, 4.085712134838104e-05, 4.3247826397418976e-05, 4.563853144645691e-05, 4.802923649549484e-05, 5.0419941544532776e-05, 5.281064659357071e-05, 5.520135164260864e-05, 5.7592056691646576e-05, 5.998276174068451e-05, 6.237346678972244e-05, 6.476417183876038e-05, 6.715487688779831e-05, 6.954558193683624e-05, 7.193628698587418e-05, 7.432699203491211e-05]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 13.0, 11.0, 15.0, 30.0, 36.0, 58.0, 83.0, 165.0, 254.0, 491.0, 1054.0, 2689.0, 8412.0, 40940.0, 502482.0, 438362.0, 40301.0, 8251.0, 2626.0, 1007.0, 551.0, 260.0, 165.0, 101.0, 60.0, 44.0, 28.0, 21.0, 18.0, 8.0, 5.0, 5.0, 1.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.47021484375, -0.45638275146484375, -0.4425506591796875, -0.42871856689453125, -0.414886474609375, -0.40105438232421875, -0.3872222900390625, -0.37339019775390625, -0.35955810546875, -0.34572601318359375, -0.3318939208984375, -0.31806182861328125, -0.304229736328125, -0.29039764404296875, -0.2765655517578125, -0.26273345947265625, -0.2489013671875, -0.23506927490234375, -0.2212371826171875, -0.20740509033203125, -0.193572998046875, -0.17974090576171875, -0.1659088134765625, -0.15207672119140625, -0.13824462890625, -0.12441253662109375, -0.1105804443359375, -0.09674835205078125, -0.082916259765625, -0.06908416748046875, -0.0552520751953125, -0.04141998291015625, -0.027587890625, -0.01375579833984375, 7.62939453125e-05, 0.01390838623046875, 0.027740478515625, 0.04157257080078125, 0.0554046630859375, 0.06923675537109375, 0.08306884765625, 0.09690093994140625, 0.1107330322265625, 0.12456512451171875, 0.138397216796875, 0.15222930908203125, 0.1660614013671875, 0.17989349365234375, 0.1937255859375, 0.20755767822265625, 0.2213897705078125, 0.23522186279296875, 0.249053955078125, 0.26288604736328125, 0.2767181396484375, 0.29055023193359375, 0.30438232421875, 0.31821441650390625, 0.3320465087890625, 0.34587860107421875, 0.359710693359375, 0.37354278564453125, 0.3873748779296875, 0.40120697021484375, 0.4150390625]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 4.0, 7.0, 3.0, 5.0, 14.0, 15.0, 12.0, 12.0, 17.0, 23.0, 31.0, 31.0, 37.0, 35.0, 40.0, 32.0, 47.0, 62.0, 72.0, 69.0, 53.0, 59.0, 48.0, 34.0, 49.0, 22.0, 27.0, 29.0, 25.0, 20.0, 8.0, 10.0, 8.0, 10.0, 5.0, 4.0, 3.0, 4.0, 1.0, 3.0, 4.0, 4.0, 1.0, 4.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-0.28466796875, -0.27577972412109375, -0.2668914794921875, -0.25800323486328125, -0.249114990234375, -0.24022674560546875, -0.2313385009765625, -0.22245025634765625, -0.21356201171875, -0.20467376708984375, -0.1957855224609375, -0.18689727783203125, -0.178009033203125, -0.16912078857421875, -0.1602325439453125, -0.15134429931640625, -0.1424560546875, -0.13356781005859375, -0.1246795654296875, -0.11579132080078125, -0.106903076171875, -0.09801483154296875, -0.0891265869140625, -0.08023834228515625, -0.07135009765625, -0.06246185302734375, -0.0535736083984375, -0.04468536376953125, -0.035797119140625, -0.02690887451171875, -0.0180206298828125, -0.00913238525390625, -0.000244140625, 0.00864410400390625, 0.0175323486328125, 0.02642059326171875, 0.035308837890625, 0.04419708251953125, 0.0530853271484375, 0.06197357177734375, 0.07086181640625, 0.07975006103515625, 0.0886383056640625, 0.09752655029296875, 0.106414794921875, 0.11530303955078125, 0.1241912841796875, 0.13307952880859375, 0.1419677734375, 0.15085601806640625, 0.1597442626953125, 0.16863250732421875, 0.177520751953125, 0.18640899658203125, 0.1952972412109375, 0.20418548583984375, 0.21307373046875, 0.22196197509765625, 0.2308502197265625, 0.23973846435546875, 0.248626708984375, 0.25751495361328125, 0.2664031982421875, 0.27529144287109375, 0.2841796875]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 8.0, 6.0, 11.0, 49.0, 122.0, 293.0, 299.0, 131.0, 47.0, 18.0, 5.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.055168151855469, -11.624258041381836, -11.193347930908203, -10.76243782043457, -10.331526756286621, -9.900616645812988, -9.469706535339355, -9.038796424865723, -8.60788631439209, -8.176976203918457, -7.746065616607666, -7.315155506134033, -6.8842453956604, -6.453334808349609, -6.022424697875977, -5.591514587402344, -5.160604000091553, -4.72969388961792, -4.298783302307129, -3.867873191833496, -3.4369630813598633, -3.0060527324676514, -2.5751423835754395, -2.1442322731018066, -1.7133219242095947, -1.2824116945266724, -0.8515014052391052, -0.4205911159515381, 0.010319113731384277, 0.44122934341430664, 0.8721396923065186, 1.3030498027801514, 1.7339601516723633, 2.164870500564575, 2.595780611038208, 3.02669095993042, 3.4576010704040527, 3.8885114192962646, 4.319421768188477, 4.750331878662109, 5.181241989135742, 5.612152099609375, 6.043062686920166, 6.473972797393799, 6.904882907867432, 7.335793495178223, 7.7667036056518555, 8.197613716125488, 8.628524780273438, 9.05943489074707, 9.490345001220703, 9.921255111694336, 10.352166175842285, 10.783076286315918, 11.21398639678955, 11.644896507263184, 12.075806617736816, 12.50671672821045, 12.937626838684082, 13.368537902832031, 13.799448013305664, 14.230358123779297, 14.66126823425293, 15.092178344726562, 15.523088455200195]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 8.0, 6.0, 11.0, 9.0, 11.0, 11.0, 21.0, 29.0, 30.0, 35.0, 36.0, 50.0, 52.0, 68.0, 88.0, 97.0, 78.0, 53.0, 57.0, 43.0, 39.0, 33.0, 35.0, 28.0, 20.0, 14.0, 18.0, 7.0, 6.0, 3.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.397326469421387, -6.133326053619385, -5.869325637817383, -5.605325222015381, -5.341324806213379, -5.077323913574219, -4.813323497772217, -4.549323081970215, -4.285322666168213, -4.021322250366211, -3.757321834564209, -3.493321180343628, -3.229320764541626, -2.965320348739624, -2.701319694519043, -2.437319278717041, -2.173318862915039, -1.909318447113037, -1.6453179121017456, -1.381317377090454, -1.1173169612884521, -0.8533165454864502, -0.5893160104751587, -0.3253154754638672, -0.061315059661865234, 0.2026854157447815, 0.4666858911514282, 0.730686366558075, 0.9946868419647217, 1.2586872577667236, 1.5226877927780151, 1.7866883277893066, 2.0506887435913086, 2.3146891593933105, 2.5786895751953125, 2.8426902294158936, 3.1066906452178955, 3.3706910610198975, 3.6346917152404785, 3.8986921310424805, 4.162692546844482, 4.426692962646484, 4.690693378448486, 4.954693794250488, 5.218694686889648, 5.482694625854492, 5.746695518493652, 6.010695934295654, 6.274696350097656, 6.538696765899658, 6.80269718170166, 7.066697597503662, 7.330698013305664, 7.594698905944824, 7.858699321746826, 8.122699737548828, 8.386699676513672, 8.650700569152832, 8.914700508117676, 9.178701400756836, 9.44270133972168, 9.70670223236084, 9.970702171325684, 10.234703063964844, 10.498703956604004]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 11.0, 6.0, 8.0, 13.0, 10.0, 17.0, 32.0, 53.0, 57.0, 111.0, 238.0, 573.0, 1710.0, 6501.0, 34786.0, 409029.0, 3316914.0, 381750.0, 33058.0, 6379.0, 1670.0, 650.0, 308.0, 151.0, 81.0, 55.0, 41.0, 19.0, 19.0, 12.0, 3.0, 2.0, 7.0, 2.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9287109375, -0.90325927734375, -0.8778076171875, -0.85235595703125, -0.826904296875, -0.80145263671875, -0.7760009765625, -0.75054931640625, -0.72509765625, -0.69964599609375, -0.6741943359375, -0.64874267578125, -0.623291015625, -0.59783935546875, -0.5723876953125, -0.54693603515625, -0.521484375, -0.49603271484375, -0.4705810546875, -0.44512939453125, -0.419677734375, -0.39422607421875, -0.3687744140625, -0.34332275390625, -0.31787109375, -0.29241943359375, -0.2669677734375, -0.24151611328125, -0.216064453125, -0.19061279296875, -0.1651611328125, -0.13970947265625, -0.1142578125, -0.08880615234375, -0.0633544921875, -0.03790283203125, -0.012451171875, 0.01300048828125, 0.0384521484375, 0.06390380859375, 0.08935546875, 0.11480712890625, 0.1402587890625, 0.16571044921875, 0.191162109375, 0.21661376953125, 0.2420654296875, 0.26751708984375, 0.29296875, 0.31842041015625, 0.3438720703125, 0.36932373046875, 0.394775390625, 0.42022705078125, 0.4456787109375, 0.47113037109375, 0.49658203125, 0.52203369140625, 0.5474853515625, 0.57293701171875, 0.598388671875, 0.62384033203125, 0.6492919921875, 0.67474365234375, 0.7001953125]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 6.0, 8.0, 14.0, 11.0, 25.0, 19.0, 47.0, 47.0, 59.0, 72.0, 73.0, 108.0, 76.0, 79.0, 90.0, 56.0, 49.0, 40.0, 43.0, 34.0, 15.0, 11.0, 10.0, 8.0, 4.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3408203125, -0.32404327392578125, -0.3072662353515625, -0.29048919677734375, -0.273712158203125, -0.25693511962890625, -0.2401580810546875, -0.22338104248046875, -0.20660400390625, -0.18982696533203125, -0.1730499267578125, -0.15627288818359375, -0.139495849609375, -0.12271881103515625, -0.1059417724609375, -0.08916473388671875, -0.0723876953125, -0.05561065673828125, -0.0388336181640625, -0.02205657958984375, -0.005279541015625, 0.01149749755859375, 0.0282745361328125, 0.04505157470703125, 0.06182861328125, 0.07860565185546875, 0.0953826904296875, 0.11215972900390625, 0.128936767578125, 0.14571380615234375, 0.1624908447265625, 0.17926788330078125, 0.196044921875, 0.21282196044921875, 0.2295989990234375, 0.24637603759765625, 0.263153076171875, 0.27993011474609375, 0.2967071533203125, 0.31348419189453125, 0.33026123046875, 0.34703826904296875, 0.3638153076171875, 0.38059234619140625, 0.397369384765625, 0.41414642333984375, 0.4309234619140625, 0.44770050048828125, 0.4644775390625, 0.48125457763671875, 0.4980316162109375, 0.5148086547851562, 0.531585693359375, 0.5483627319335938, 0.5651397705078125, 0.5819168090820312, 0.59869384765625, 0.6154708862304688, 0.6322479248046875, 0.6490249633789062, 0.665802001953125, 0.6825790405273438, 0.6993560791015625, 0.7161331176757812, 0.73291015625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 7.0, 12.0, 10.0, 13.0, 25.0, 24.0, 40.0, 72.0, 104.0, 187.0, 314.0, 575.0, 1175.0, 3163.0, 9716.0, 38475.0, 243415.0, 2991290.0, 796814.0, 80377.0, 18413.0, 5558.0, 2096.0, 1010.0, 556.0, 331.0, 178.0, 110.0, 81.0, 46.0, 33.0, 15.0, 12.0, 11.0, 4.0, 10.0, 6.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5908203125, -0.5694961547851562, -0.5481719970703125, -0.5268478393554688, -0.505523681640625, -0.48419952392578125, -0.4628753662109375, -0.44155120849609375, -0.42022705078125, -0.39890289306640625, -0.3775787353515625, -0.35625457763671875, -0.334930419921875, -0.31360626220703125, -0.2922821044921875, -0.27095794677734375, -0.2496337890625, -0.22830963134765625, -0.2069854736328125, -0.18566131591796875, -0.164337158203125, -0.14301300048828125, -0.1216888427734375, -0.10036468505859375, -0.07904052734375, -0.05771636962890625, -0.0363922119140625, -0.01506805419921875, 0.006256103515625, 0.02758026123046875, 0.0489044189453125, 0.07022857666015625, 0.091552734375, 0.11287689208984375, 0.1342010498046875, 0.15552520751953125, 0.176849365234375, 0.19817352294921875, 0.2194976806640625, 0.24082183837890625, 0.26214599609375, 0.28347015380859375, 0.3047943115234375, 0.32611846923828125, 0.347442626953125, 0.36876678466796875, 0.3900909423828125, 0.41141510009765625, 0.4327392578125, 0.45406341552734375, 0.4753875732421875, 0.49671173095703125, 0.518035888671875, 0.5393600463867188, 0.5606842041015625, 0.5820083618164062, 0.60333251953125, 0.6246566772460938, 0.6459808349609375, 0.6673049926757812, 0.688629150390625, 0.7099533081054688, 0.7312774658203125, 0.7526016235351562, 0.77392578125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 8.0, 6.0, 7.0, 15.0, 28.0, 33.0, 58.0, 104.0, 245.0, 386.0, 782.0, 902.0, 673.0, 348.0, 204.0, 96.0, 55.0, 40.0, 21.0, 24.0, 15.0, 9.0, 7.0, 7.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.82763671875, -0.7949752807617188, -0.7623138427734375, -0.7296524047851562, -0.696990966796875, -0.6643295288085938, -0.6316680908203125, -0.5990066528320312, -0.56634521484375, -0.5336837768554688, -0.5010223388671875, -0.46836090087890625, -0.435699462890625, -0.40303802490234375, -0.3703765869140625, -0.33771514892578125, -0.3050537109375, -0.27239227294921875, -0.2397308349609375, -0.20706939697265625, -0.174407958984375, -0.14174652099609375, -0.1090850830078125, -0.07642364501953125, -0.04376220703125, -0.01110076904296875, 0.0215606689453125, 0.05422210693359375, 0.086883544921875, 0.11954498291015625, 0.1522064208984375, 0.18486785888671875, 0.217529296875, 0.25019073486328125, 0.2828521728515625, 0.31551361083984375, 0.348175048828125, 0.38083648681640625, 0.4134979248046875, 0.44615936279296875, 0.47882080078125, 0.5114822387695312, 0.5441436767578125, 0.5768051147460938, 0.609466552734375, 0.6421279907226562, 0.6747894287109375, 0.7074508666992188, 0.7401123046875, 0.7727737426757812, 0.8054351806640625, 0.8380966186523438, 0.870758056640625, 0.9034194946289062, 0.9360809326171875, 0.9687423706054688, 1.00140380859375, 1.0340652465820312, 1.0667266845703125, 1.0993881225585938, 1.132049560546875, 1.1647109985351562, 1.1973724365234375, 1.2300338745117188, 1.2626953125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 3.0, 13.0, 22.0, 105.0, 233.0, 305.0, 174.0, 76.0, 27.0, 16.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.742885589599609, -5.364243507385254, -4.985601902008057, -4.606959819793701, -4.228318214416504, -3.8496761322021484, -3.471034049987793, -3.0923922061920166, -2.7137503623962402, -2.335108518600464, -1.956466555595398, -1.577824592590332, -1.1991827487945557, -0.8205409049987793, -0.44189882278442383, -0.06325697898864746, 0.3153848648071289, 0.69402676820755, 1.0726686716079712, 1.451310634613037, 1.8299524784088135, 2.20859432220459, 2.5872364044189453, 2.9658782482147217, 3.344520092010498, 3.7231619358062744, 4.101803779602051, 4.480445861816406, 4.859087944030762, 5.237729549407959, 5.6163716316223145, 5.995013236999512, 6.373655319213867, 6.752297401428223, 7.13093900680542, 7.509581089019775, 7.888222694396973, 8.266864776611328, 8.645506858825684, 9.024148941040039, 9.402790069580078, 9.781432151794434, 10.160074234008789, 10.538715362548828, 10.917357444763184, 11.295999526977539, 11.674641609191895, 12.05328369140625, 12.431925773620605, 12.810567855834961, 13.189209938049316, 13.567852020263672, 13.946493148803711, 14.325135231018066, 14.703777313232422, 15.082419395446777, 15.461061477661133, 15.839703559875488, 16.218345642089844, 16.596986770629883, 16.975629806518555, 17.354270935058594, 17.732913970947266, 18.111555099487305, 18.490196228027344]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 4.0, 5.0, 8.0, 15.0, 15.0, 28.0, 41.0, 58.0, 44.0, 61.0, 74.0, 56.0, 69.0, 69.0, 71.0, 68.0, 48.0, 57.0, 45.0, 33.0, 39.0, 17.0, 25.0, 15.0, 11.0, 9.0, 5.0, 7.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.110647678375244, -5.93239164352417, -5.754136085510254, -5.57588005065918, -5.3976240158081055, -5.219367980957031, -5.041112422943115, -4.862856388092041, -4.684600830078125, -4.506344795227051, -4.328089237213135, -4.1498332023620605, -3.9715771675109863, -3.793321371078491, -3.615065574645996, -3.436809539794922, -3.2585535049438477, -3.0802977085113525, -2.9020416736602783, -2.723785877227783, -2.545529842376709, -2.367274045944214, -2.1890182495117188, -2.0107622146606445, -1.8325064182281494, -1.6542505025863647, -1.47599458694458, -1.297738790512085, -1.1194828748703003, -0.9412269592285156, -0.7629711627960205, -0.5847152471542358, -0.4064598083496094, -0.2282039225101471, -0.049948036670684814, 0.12830781936645508, 0.30656373500823975, 0.4848196506500244, 0.6630754470825195, 0.8413313627243042, 1.0195872783660889, 1.1978431940078735, 1.3760991096496582, 1.5543549060821533, 1.732610821723938, 1.9108667373657227, 2.0891225337982178, 2.267378330230713, 2.445634365081787, 2.6238901615142822, 2.8021461963653564, 2.9804019927978516, 3.158658027648926, 3.336913824081421, 3.515169620513916, 3.6934256553649902, 3.8716814517974854, 4.0499372482299805, 4.228193283081055, 4.406449317932129, 4.584704875946045, 4.762960910797119, 4.941216468811035, 5.119472503662109, 5.297728538513184]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 8.0, 11.0, 10.0, 17.0, 24.0, 28.0, 43.0, 87.0, 140.0, 271.0, 554.0, 1215.0, 2678.0, 7009.0, 21716.0, 73315.0, 248842.0, 424743.0, 187575.0, 54362.0, 16230.0, 5448.0, 2194.0, 971.0, 489.0, 238.0, 143.0, 60.0, 49.0, 29.0, 21.0, 10.0, 10.0, 8.0, 1.0, 2.0, 6.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59228515625, -0.5689468383789062, -0.5456085205078125, -0.5222702026367188, -0.498931884765625, -0.47559356689453125, -0.4522552490234375, -0.42891693115234375, -0.40557861328125, -0.38224029541015625, -0.3589019775390625, -0.33556365966796875, -0.312225341796875, -0.28888702392578125, -0.2655487060546875, -0.24221038818359375, -0.2188720703125, -0.19553375244140625, -0.1721954345703125, -0.14885711669921875, -0.125518798828125, -0.10218048095703125, -0.0788421630859375, -0.05550384521484375, -0.03216552734375, -0.00882720947265625, 0.0145111083984375, 0.03784942626953125, 0.061187744140625, 0.08452606201171875, 0.1078643798828125, 0.13120269775390625, 0.154541015625, 0.17787933349609375, 0.2012176513671875, 0.22455596923828125, 0.247894287109375, 0.27123260498046875, 0.2945709228515625, 0.31790924072265625, 0.34124755859375, 0.36458587646484375, 0.3879241943359375, 0.41126251220703125, 0.434600830078125, 0.45793914794921875, 0.4812774658203125, 0.5046157836914062, 0.5279541015625, 0.5512924194335938, 0.5746307373046875, 0.5979690551757812, 0.621307373046875, 0.6446456909179688, 0.6679840087890625, 0.6913223266601562, 0.71466064453125, 0.7379989624023438, 0.7613372802734375, 0.7846755981445312, 0.808013916015625, 0.8313522338867188, 0.8546905517578125, 0.8780288696289062, 0.9013671875]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 8.0, 3.0, 7.0, 6.0, 9.0, 12.0, 14.0, 13.0, 16.0, 25.0, 43.0, 28.0, 35.0, 46.0, 53.0, 56.0, 56.0, 68.0, 51.0, 53.0, 52.0, 43.0, 45.0, 43.0, 47.0, 30.0, 26.0, 23.0, 30.0, 14.0, 8.0, 8.0, 16.0, 7.0, 5.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32666015625, -0.3155059814453125, -0.304351806640625, -0.2931976318359375, -0.28204345703125, -0.2708892822265625, -0.259735107421875, -0.2485809326171875, -0.2374267578125, -0.2262725830078125, -0.215118408203125, -0.2039642333984375, -0.19281005859375, -0.1816558837890625, -0.170501708984375, -0.1593475341796875, -0.148193359375, -0.1370391845703125, -0.125885009765625, -0.1147308349609375, -0.10357666015625, -0.0924224853515625, -0.081268310546875, -0.0701141357421875, -0.0589599609375, -0.0478057861328125, -0.036651611328125, -0.0254974365234375, -0.01434326171875, -0.0031890869140625, 0.007965087890625, 0.0191192626953125, 0.0302734375, 0.0414276123046875, 0.052581787109375, 0.0637359619140625, 0.07489013671875, 0.0860443115234375, 0.097198486328125, 0.1083526611328125, 0.1195068359375, 0.1306610107421875, 0.141815185546875, 0.1529693603515625, 0.16412353515625, 0.1752777099609375, 0.186431884765625, 0.1975860595703125, 0.208740234375, 0.2198944091796875, 0.231048583984375, 0.2422027587890625, 0.25335693359375, 0.2645111083984375, 0.275665283203125, 0.2868194580078125, 0.2979736328125, 0.3091278076171875, 0.320281982421875, 0.3314361572265625, 0.34259033203125, 0.3537445068359375, 0.364898681640625, 0.3760528564453125, 0.38720703125]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 10.0, 3.0, 7.0, 15.0, 13.0, 19.0, 32.0, 38.0, 74.0, 69.0, 116.0, 197.0, 267.0, 398.0, 639.0, 1189.0, 2453.0, 7884.0, 46130.0, 530862.0, 402582.0, 42908.0, 7272.0, 2390.0, 1125.0, 644.0, 395.0, 246.0, 180.0, 121.0, 82.0, 52.0, 44.0, 28.0, 20.0, 15.0, 13.0, 8.0, 0.0, 3.0, 10.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.28125, -1.2435455322265625, -1.205841064453125, -1.1681365966796875, -1.13043212890625, -1.0927276611328125, -1.055023193359375, -1.0173187255859375, -0.9796142578125, -0.9419097900390625, -0.904205322265625, -0.8665008544921875, -0.82879638671875, -0.7910919189453125, -0.753387451171875, -0.7156829833984375, -0.677978515625, -0.6402740478515625, -0.602569580078125, -0.5648651123046875, -0.52716064453125, -0.4894561767578125, -0.451751708984375, -0.4140472412109375, -0.3763427734375, -0.3386383056640625, -0.300933837890625, -0.2632293701171875, -0.22552490234375, -0.1878204345703125, -0.150115966796875, -0.1124114990234375, -0.07470703125, -0.0370025634765625, 0.000701904296875, 0.0384063720703125, 0.07611083984375, 0.1138153076171875, 0.151519775390625, 0.1892242431640625, 0.2269287109375, 0.2646331787109375, 0.302337646484375, 0.3400421142578125, 0.37774658203125, 0.4154510498046875, 0.453155517578125, 0.4908599853515625, 0.528564453125, 0.5662689208984375, 0.603973388671875, 0.6416778564453125, 0.67938232421875, 0.7170867919921875, 0.754791259765625, 0.7924957275390625, 0.8302001953125, 0.8679046630859375, 0.905609130859375, 0.9433135986328125, 0.98101806640625, 1.0187225341796875, 1.056427001953125, 1.0941314697265625, 1.1318359375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 5.0, 12.0, 5.0, 9.0, 13.0, 19.0, 27.0, 24.0, 30.0, 30.0, 51.0, 44.0, 55.0, 72.0, 67.0, 70.0, 67.0, 75.0, 52.0, 50.0, 34.0, 30.0, 35.0, 28.0, 22.0, 15.0, 8.0, 11.0, 4.0, 15.0, 6.0, 4.0, 1.0, 5.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5771484375, -1.531494140625, -1.48583984375, -1.440185546875, -1.39453125, -1.348876953125, -1.30322265625, -1.257568359375, -1.2119140625, -1.166259765625, -1.12060546875, -1.074951171875, -1.029296875, -0.983642578125, -0.93798828125, -0.892333984375, -0.8466796875, -0.801025390625, -0.75537109375, -0.709716796875, -0.6640625, -0.618408203125, -0.57275390625, -0.527099609375, -0.4814453125, -0.435791015625, -0.39013671875, -0.344482421875, -0.298828125, -0.253173828125, -0.20751953125, -0.161865234375, -0.1162109375, -0.070556640625, -0.02490234375, 0.020751953125, 0.06640625, 0.112060546875, 0.15771484375, 0.203369140625, 0.2490234375, 0.294677734375, 0.34033203125, 0.385986328125, 0.431640625, 0.477294921875, 0.52294921875, 0.568603515625, 0.6142578125, 0.659912109375, 0.70556640625, 0.751220703125, 0.796875, 0.842529296875, 0.88818359375, 0.933837890625, 0.9794921875, 1.025146484375, 1.07080078125, 1.116455078125, 1.162109375, 1.207763671875, 1.25341796875, 1.299072265625, 1.3447265625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 7.0, 9.0, 8.0, 13.0, 18.0, 18.0, 43.0, 58.0, 69.0, 123.0, 147.0, 279.0, 428.0, 932.0, 2250.0, 6936.0, 28160.0, 132295.0, 603332.0, 212058.0, 45099.0, 10645.0, 3041.0, 1205.0, 560.0, 295.0, 156.0, 118.0, 73.0, 53.0, 32.0, 14.0, 25.0, 14.0, 9.0, 12.0, 2.0, 6.0, 6.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.274658203125, -0.2662467956542969, -0.25783538818359375, -0.24942398071289062, -0.2410125732421875, -0.23260116577148438, -0.22418975830078125, -0.21577835083007812, -0.207366943359375, -0.19895553588867188, -0.19054412841796875, -0.18213272094726562, -0.1737213134765625, -0.16530990600585938, -0.15689849853515625, -0.14848709106445312, -0.14007568359375, -0.13166427612304688, -0.12325286865234375, -0.11484146118164062, -0.1064300537109375, -0.09801864624023438, -0.08960723876953125, -0.08119583129882812, -0.072784423828125, -0.06437301635742188, -0.05596160888671875, -0.047550201416015625, -0.0391387939453125, -0.030727386474609375, -0.02231597900390625, -0.013904571533203125, -0.0054931640625, 0.002918243408203125, 0.01132965087890625, 0.019741058349609375, 0.0281524658203125, 0.036563873291015625, 0.04497528076171875, 0.053386688232421875, 0.061798095703125, 0.07020950317382812, 0.07862091064453125, 0.08703231811523438, 0.0954437255859375, 0.10385513305664062, 0.11226654052734375, 0.12067794799804688, 0.12908935546875, 0.13750076293945312, 0.14591217041015625, 0.15432357788085938, 0.1627349853515625, 0.17114639282226562, 0.17955780029296875, 0.18796920776367188, 0.196380615234375, 0.20479202270507812, 0.21320343017578125, 0.22161483764648438, 0.2300262451171875, 0.23843765258789062, 0.24684906005859375, 0.2552604675292969, 0.263671875]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 5.0, 2.0, 1.0, 3.0, 3.0, 7.0, 6.0, 8.0, 9.0, 16.0, 11.0, 20.0, 26.0, 29.0, 44.0, 44.0, 51.0, 60.0, 59.0, 72.0, 73.0, 69.0, 75.0, 41.0, 62.0, 33.0, 36.0, 16.0, 22.0, 20.0, 15.0, 8.0, 11.0, 5.0, 11.0, 5.0, 4.0, 11.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.7756900787353516e-05, -5.570054054260254e-05, -5.364418029785156e-05, -5.1587820053100586e-05, -4.953145980834961e-05, -4.747509956359863e-05, -4.5418739318847656e-05, -4.336237907409668e-05, -4.13060188293457e-05, -3.9249658584594727e-05, -3.719329833984375e-05, -3.5136938095092773e-05, -3.30805778503418e-05, -3.102421760559082e-05, -2.8967857360839844e-05, -2.6911497116088867e-05, -2.485513687133789e-05, -2.2798776626586914e-05, -2.0742416381835938e-05, -1.868605613708496e-05, -1.6629695892333984e-05, -1.4573335647583008e-05, -1.2516975402832031e-05, -1.0460615158081055e-05, -8.404254913330078e-06, -6.3478946685791016e-06, -4.291534423828125e-06, -2.2351741790771484e-06, -1.7881393432617188e-07, 1.8775463104248047e-06, 3.933906555175781e-06, 5.990266799926758e-06, 8.046627044677734e-06, 1.0102987289428711e-05, 1.2159347534179688e-05, 1.4215707778930664e-05, 1.627206802368164e-05, 1.8328428268432617e-05, 2.0384788513183594e-05, 2.244114875793457e-05, 2.4497509002685547e-05, 2.6553869247436523e-05, 2.86102294921875e-05, 3.0666589736938477e-05, 3.272294998168945e-05, 3.477931022644043e-05, 3.6835670471191406e-05, 3.889203071594238e-05, 4.094839096069336e-05, 4.3004751205444336e-05, 4.506111145019531e-05, 4.711747169494629e-05, 4.9173831939697266e-05, 5.123019218444824e-05, 5.328655242919922e-05, 5.5342912673950195e-05, 5.739927291870117e-05, 5.945563316345215e-05, 6.151199340820312e-05, 6.35683536529541e-05, 6.562471389770508e-05, 6.768107414245605e-05, 6.973743438720703e-05, 7.179379463195801e-05, 7.385015487670898e-05]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 5.0, 0.0, 4.0, 9.0, 5.0, 9.0, 9.0, 12.0, 30.0, 45.0, 53.0, 91.0, 135.0, 237.0, 364.0, 731.0, 1394.0, 3183.0, 9380.0, 37882.0, 184714.0, 613579.0, 151388.0, 31429.0, 8094.0, 2869.0, 1233.0, 683.0, 392.0, 197.0, 124.0, 87.0, 57.0, 40.0, 29.0, 22.0, 13.0, 12.0, 6.0, 5.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.310791015625, -0.3022613525390625, -0.293731689453125, -0.2852020263671875, -0.27667236328125, -0.2681427001953125, -0.259613037109375, -0.2510833740234375, -0.2425537109375, -0.2340240478515625, -0.225494384765625, -0.2169647216796875, -0.20843505859375, -0.1999053955078125, -0.191375732421875, -0.1828460693359375, -0.17431640625, -0.1657867431640625, -0.157257080078125, -0.1487274169921875, -0.14019775390625, -0.1316680908203125, -0.123138427734375, -0.1146087646484375, -0.1060791015625, -0.0975494384765625, -0.089019775390625, -0.0804901123046875, -0.07196044921875, -0.0634307861328125, -0.054901123046875, -0.0463714599609375, -0.037841796875, -0.0293121337890625, -0.020782470703125, -0.0122528076171875, -0.00372314453125, 0.0048065185546875, 0.013336181640625, 0.0218658447265625, 0.0303955078125, 0.0389251708984375, 0.047454833984375, 0.0559844970703125, 0.06451416015625, 0.0730438232421875, 0.081573486328125, 0.0901031494140625, 0.0986328125, 0.1071624755859375, 0.115692138671875, 0.1242218017578125, 0.13275146484375, 0.1412811279296875, 0.149810791015625, 0.1583404541015625, 0.1668701171875, 0.1753997802734375, 0.183929443359375, 0.1924591064453125, 0.20098876953125, 0.2095184326171875, 0.218048095703125, 0.2265777587890625, 0.235107421875]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 5.0, 8.0, 6.0, 11.0, 8.0, 16.0, 12.0, 19.0, 20.0, 26.0, 31.0, 39.0, 36.0, 56.0, 52.0, 61.0, 62.0, 68.0, 55.0, 62.0, 51.0, 41.0, 48.0, 41.0, 28.0, 30.0, 20.0, 17.0, 16.0, 4.0, 7.0, 14.0, 9.0, 4.0, 10.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.273681640625, -0.26654815673828125, -0.2594146728515625, -0.25228118896484375, -0.245147705078125, -0.23801422119140625, -0.2308807373046875, -0.22374725341796875, -0.21661376953125, -0.20948028564453125, -0.2023468017578125, -0.19521331787109375, -0.188079833984375, -0.18094635009765625, -0.1738128662109375, -0.16667938232421875, -0.1595458984375, -0.15241241455078125, -0.1452789306640625, -0.13814544677734375, -0.131011962890625, -0.12387847900390625, -0.1167449951171875, -0.10961151123046875, -0.10247802734375, -0.09534454345703125, -0.0882110595703125, -0.08107757568359375, -0.073944091796875, -0.06681060791015625, -0.0596771240234375, -0.05254364013671875, -0.04541015625, -0.03827667236328125, -0.0311431884765625, -0.02400970458984375, -0.016876220703125, -0.00974273681640625, -0.0026092529296875, 0.00452423095703125, 0.01165771484375, 0.01879119873046875, 0.0259246826171875, 0.03305816650390625, 0.040191650390625, 0.04732513427734375, 0.0544586181640625, 0.06159210205078125, 0.0687255859375, 0.07585906982421875, 0.0829925537109375, 0.09012603759765625, 0.097259521484375, 0.10439300537109375, 0.1115264892578125, 0.11865997314453125, 0.12579345703125, 0.13292694091796875, 0.1400604248046875, 0.14719390869140625, 0.154327392578125, 0.16146087646484375, 0.1685943603515625, 0.17572784423828125, 0.182861328125]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 1.0, 3.0, 18.0, 14.0, 49.0, 128.0, 243.0, 320.0, 143.0, 55.0, 15.0, 4.0, 4.0, 2.0, 0.0, 3.0], "bins": [-20.081356048583984, -19.719009399414062, -19.356664657592773, -18.99431800842285, -18.63197135925293, -18.26962661743164, -17.90727996826172, -17.544933319091797, -17.182586669921875, -16.820240020751953, -16.457895278930664, -16.095548629760742, -15.73320198059082, -15.370856285095215, -15.00851058959961, -14.646163940429688, -14.283819198608398, -13.921473503112793, -13.559126853942871, -13.196781158447266, -12.834434509277344, -12.472088813781738, -12.109743118286133, -11.747396469116211, -11.385049819946289, -11.022704124450684, -10.660357475280762, -10.298011779785156, -9.935665130615234, -9.573319435119629, -9.210973739624023, -8.848627090454102, -8.486282348632812, -8.123936653137207, -7.761590003967285, -7.39924430847168, -7.036898136138916, -6.674551963806152, -6.312205791473389, -5.949859619140625, -5.587512969970703, -5.2251667976379395, -4.862820625305176, -4.50047492980957, -4.138128757476807, -3.775782585144043, -3.4134364128112793, -3.0510904788970947, -2.68874454498291, -2.3263983726501465, -1.964052438735962, -1.6017062664031982, -1.2393602132797241, -0.87701416015625, -0.5146679878234863, -0.15232205390930176, 0.21002411842346191, 0.572370171546936, 0.9347162842750549, 1.2970623970031738, 1.659408450126648, 2.021754503250122, 2.3841006755828857, 2.7464466094970703, 3.108792781829834]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 5.0, 2.0, 6.0, 9.0, 8.0, 11.0, 12.0, 16.0, 14.0, 28.0, 24.0, 27.0, 48.0, 34.0, 43.0, 48.0, 57.0, 71.0, 89.0, 74.0, 51.0, 52.0, 49.0, 27.0, 41.0, 17.0, 20.0, 17.0, 21.0, 14.0, 11.0, 15.0, 9.0, 4.0, 11.0, 5.0, 5.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.043880939483643, -4.883065223693848, -4.7222490310668945, -4.5614333152771, -4.400617599487305, -4.23980188369751, -4.078986167907715, -3.9181699752807617, -3.757354259490967, -3.596538543701172, -3.435722589492798, -3.274906635284424, -3.114090919494629, -2.953275203704834, -2.79245924949646, -2.631643295288086, -2.470827579498291, -2.310011863708496, -2.149195909500122, -1.9883800745010376, -1.8275642395019531, -1.6667484045028687, -1.5059325695037842, -1.3451167345046997, -1.1843008995056152, -1.0234850645065308, -0.8626692295074463, -0.7018533945083618, -0.5410375595092773, -0.38022172451019287, -0.2194058895111084, -0.058590054512023926, 0.10222625732421875, 0.2630420923233032, 0.4238579273223877, 0.5846737623214722, 0.7454895973205566, 0.9063054323196411, 1.0671212673187256, 1.22793710231781, 1.3887529373168945, 1.549568772315979, 1.7103846073150635, 1.871200442314148, 2.0320162773132324, 2.1928319931030273, 2.3536479473114014, 2.5144639015197754, 2.6752796173095703, 2.8360953330993652, 2.9969112873077393, 3.1577272415161133, 3.318542957305908, 3.479358673095703, 3.640174627304077, 3.800990581512451, 3.961806297302246, 4.122622013092041, 4.283437728881836, 4.444253921508789, 4.605069637298584, 4.765885353088379, 4.926701545715332, 5.087517261505127, 5.248332977294922]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 8.0, 4.0, 9.0, 10.0, 21.0, 35.0, 48.0, 81.0, 156.0, 254.0, 544.0, 1508.0, 4665.0, 22190.0, 232088.0, 3267693.0, 614942.0, 38892.0, 7448.0, 2128.0, 776.0, 321.0, 172.0, 100.0, 71.0, 37.0, 25.0, 15.0, 7.0, 14.0, 5.0, 6.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58642578125, -0.5626296997070312, -0.5388336181640625, -0.5150375366210938, -0.491241455078125, -0.46744537353515625, -0.4436492919921875, -0.41985321044921875, -0.39605712890625, -0.37226104736328125, -0.3484649658203125, -0.32466888427734375, -0.300872802734375, -0.27707672119140625, -0.2532806396484375, -0.22948455810546875, -0.2056884765625, -0.18189239501953125, -0.1580963134765625, -0.13430023193359375, -0.110504150390625, -0.08670806884765625, -0.0629119873046875, -0.03911590576171875, -0.01531982421875, 0.00847625732421875, 0.0322723388671875, 0.05606842041015625, 0.079864501953125, 0.10366058349609375, 0.1274566650390625, 0.15125274658203125, 0.175048828125, 0.19884490966796875, 0.2226409912109375, 0.24643707275390625, 0.270233154296875, 0.29402923583984375, 0.3178253173828125, 0.34162139892578125, 0.36541748046875, 0.38921356201171875, 0.4130096435546875, 0.43680572509765625, 0.460601806640625, 0.48439788818359375, 0.5081939697265625, 0.5319900512695312, 0.5557861328125, 0.5795822143554688, 0.6033782958984375, 0.6271743774414062, 0.650970458984375, 0.6747665405273438, 0.6985626220703125, 0.7223587036132812, 0.74615478515625, 0.7699508666992188, 0.7937469482421875, 0.8175430297851562, 0.841339111328125, 0.8651351928710938, 0.8889312744140625, 0.9127273559570312, 0.9365234375]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 6.0, 4.0, 11.0, 11.0, 11.0, 15.0, 15.0, 22.0, 23.0, 21.0, 31.0, 21.0, 36.0, 46.0, 42.0, 65.0, 61.0, 49.0, 51.0, 45.0, 46.0, 36.0, 60.0, 38.0, 45.0, 39.0, 24.0, 24.0, 21.0, 14.0, 11.0, 14.0, 12.0, 11.0, 5.0, 1.0, 4.0, 3.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.333984375, -0.3247489929199219, -0.31551361083984375, -0.3062782287597656, -0.2970428466796875, -0.2878074645996094, -0.27857208251953125, -0.2693367004394531, -0.260101318359375, -0.2508659362792969, -0.24163055419921875, -0.23239517211914062, -0.2231597900390625, -0.21392440795898438, -0.20468902587890625, -0.19545364379882812, -0.18621826171875, -0.17698287963867188, -0.16774749755859375, -0.15851211547851562, -0.1492767333984375, -0.14004135131835938, -0.13080596923828125, -0.12157058715820312, -0.112335205078125, -0.10309982299804688, -0.09386444091796875, -0.08462905883789062, -0.0753936767578125, -0.06615829467773438, -0.05692291259765625, -0.047687530517578125, -0.0384521484375, -0.029216766357421875, -0.01998138427734375, -0.010746002197265625, -0.0015106201171875, 0.007724761962890625, 0.01696014404296875, 0.026195526123046875, 0.035430908203125, 0.044666290283203125, 0.05390167236328125, 0.06313705444335938, 0.0723724365234375, 0.08160781860351562, 0.09084320068359375, 0.10007858276367188, 0.10931396484375, 0.11854934692382812, 0.12778472900390625, 0.13702011108398438, 0.1462554931640625, 0.15549087524414062, 0.16472625732421875, 0.17396163940429688, 0.183197021484375, 0.19243240356445312, 0.20166778564453125, 0.21090316772460938, 0.2201385498046875, 0.22937393188476562, 0.23860931396484375, 0.24784469604492188, 0.257080078125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 12.0, 11.0, 17.0, 32.0, 49.0, 72.0, 155.0, 323.0, 560.0, 1533.0, 5149.0, 24859.0, 316449.0, 3642266.0, 179366.0, 17410.0, 3765.0, 1150.0, 527.0, 258.0, 135.0, 68.0, 34.0, 32.0, 14.0, 14.0, 8.0, 4.0, 3.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.052734375, -1.0221481323242188, -0.9915618896484375, -0.9609756469726562, -0.930389404296875, -0.8998031616210938, -0.8692169189453125, -0.8386306762695312, -0.80804443359375, -0.7774581909179688, -0.7468719482421875, -0.7162857055664062, -0.685699462890625, -0.6551132202148438, -0.6245269775390625, -0.5939407348632812, -0.5633544921875, -0.5327682495117188, -0.5021820068359375, -0.47159576416015625, -0.441009521484375, -0.41042327880859375, -0.3798370361328125, -0.34925079345703125, -0.31866455078125, -0.28807830810546875, -0.2574920654296875, -0.22690582275390625, -0.196319580078125, -0.16573333740234375, -0.1351470947265625, -0.10456085205078125, -0.073974609375, -0.04338836669921875, -0.0128021240234375, 0.01778411865234375, 0.048370361328125, 0.07895660400390625, 0.1095428466796875, 0.14012908935546875, 0.17071533203125, 0.20130157470703125, 0.2318878173828125, 0.26247406005859375, 0.293060302734375, 0.32364654541015625, 0.3542327880859375, 0.38481903076171875, 0.4154052734375, 0.44599151611328125, 0.4765777587890625, 0.5071640014648438, 0.537750244140625, 0.5683364868164062, 0.5989227294921875, 0.6295089721679688, 0.66009521484375, 0.6906814575195312, 0.7212677001953125, 0.7518539428710938, 0.782440185546875, 0.8130264282226562, 0.8436126708984375, 0.8741989135742188, 0.90478515625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 8.0, 4.0, 12.0, 7.0, 10.0, 7.0, 17.0, 19.0, 32.0, 45.0, 74.0, 129.0, 182.0, 349.0, 526.0, 688.0, 722.0, 497.0, 292.0, 132.0, 116.0, 62.0, 44.0, 32.0, 20.0, 12.0, 13.0, 7.0, 3.0, 4.0, 0.0, 3.0, 3.0, 1.0, 0.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.92236328125, -0.8986053466796875, -0.874847412109375, -0.8510894775390625, -0.82733154296875, -0.8035736083984375, -0.779815673828125, -0.7560577392578125, -0.7322998046875, -0.7085418701171875, -0.684783935546875, -0.6610260009765625, -0.63726806640625, -0.6135101318359375, -0.589752197265625, -0.5659942626953125, -0.542236328125, -0.5184783935546875, -0.494720458984375, -0.4709625244140625, -0.44720458984375, -0.4234466552734375, -0.399688720703125, -0.3759307861328125, -0.3521728515625, -0.3284149169921875, -0.304656982421875, -0.2808990478515625, -0.25714111328125, -0.2333831787109375, -0.209625244140625, -0.1858673095703125, -0.162109375, -0.1383514404296875, -0.114593505859375, -0.0908355712890625, -0.06707763671875, -0.0433197021484375, -0.019561767578125, 0.0041961669921875, 0.0279541015625, 0.0517120361328125, 0.075469970703125, 0.0992279052734375, 0.12298583984375, 0.1467437744140625, 0.170501708984375, 0.1942596435546875, 0.218017578125, 0.2417755126953125, 0.265533447265625, 0.2892913818359375, 0.31304931640625, 0.3368072509765625, 0.360565185546875, 0.3843231201171875, 0.4080810546875, 0.4318389892578125, 0.455596923828125, 0.4793548583984375, 0.50311279296875, 0.5268707275390625, 0.550628662109375, 0.5743865966796875, 0.59814453125]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 3.0, 6.0, 14.0, 31.0, 68.0, 120.0, 167.0, 187.0, 183.0, 100.0, 52.0, 30.0, 16.0, 5.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.0898027420043945, -4.85304594039917, -4.616289138793945, -4.379532337188721, -4.142775535583496, -3.9060187339782715, -3.669261932373047, -3.4325051307678223, -3.1957483291625977, -2.958991527557373, -2.7222347259521484, -2.485477924346924, -2.248721122741699, -2.0119643211364746, -1.77520751953125, -1.5384507179260254, -1.3016939163208008, -1.0649371147155762, -0.8281803131103516, -0.591423511505127, -0.35466670989990234, -0.11790990829467773, 0.11884689331054688, 0.3556036949157715, 0.5923604965209961, 0.8291172981262207, 1.0658740997314453, 1.30263090133667, 1.5393877029418945, 1.7761445045471191, 2.0129013061523438, 2.2496581077575684, 2.486414909362793, 2.7231717109680176, 2.959928512573242, 3.196685314178467, 3.4334421157836914, 3.670198917388916, 3.9069557189941406, 4.143712520599365, 4.38046932220459, 4.6172261238098145, 4.853982925415039, 5.090739727020264, 5.327496528625488, 5.564253330230713, 5.8010101318359375, 6.037766933441162, 6.274523735046387, 6.511280536651611, 6.748037338256836, 6.9847941398620605, 7.221550941467285, 7.45830774307251, 7.695064544677734, 7.931821346282959, 8.168578147888184, 8.40533447265625, 8.642091751098633, 8.878849029541016, 9.115605354309082, 9.352361679077148, 9.589118957519531, 9.825876235961914, 10.06263256072998]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 6.0, 4.0, 6.0, 14.0, 13.0, 16.0, 21.0, 27.0, 39.0, 35.0, 41.0, 44.0, 51.0, 55.0, 91.0, 50.0, 62.0, 55.0, 48.0, 54.0, 57.0, 40.0, 30.0, 34.0, 28.0, 21.0, 15.0, 10.0, 12.0, 6.0, 6.0, 1.0, 3.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.12051248550415, -3.9956350326538086, -3.870757818222046, -3.745880603790283, -3.6210031509399414, -3.4961256980895996, -3.371248483657837, -3.246371269226074, -3.1214938163757324, -2.9966163635253906, -2.871739149093628, -2.7468619346618652, -2.6219844818115234, -2.4971070289611816, -2.372229814529419, -2.2473526000976562, -2.1224751472473145, -1.9975978136062622, -1.87272047996521, -1.7478431463241577, -1.6229658126831055, -1.4980884790420532, -1.373211145401001, -1.2483338117599487, -1.1234564781188965, -0.9985791444778442, -0.873701810836792, -0.7488244771957397, -0.6239471435546875, -0.49906980991363525, -0.374192476272583, -0.24931514263153076, -0.12443804740905762, 0.0004392862319946289, 0.12531661987304688, 0.2501939535140991, 0.37507128715515137, 0.4999486207962036, 0.6248259544372559, 0.7497032880783081, 0.8745806217193604, 0.9994579553604126, 1.1243352890014648, 1.249212622642517, 1.3740899562835693, 1.4989672899246216, 1.6238446235656738, 1.748721957206726, 1.8735992908477783, 1.9984766244888306, 2.123353958129883, 2.2482314109802246, 2.3731086254119873, 2.49798583984375, 2.622863292694092, 2.7477407455444336, 2.8726179599761963, 2.997495174407959, 3.122372627258301, 3.2472500801086426, 3.3721272945404053, 3.497004508972168, 3.6218819618225098, 3.7467594146728516, 3.8716366291046143]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 2.0, 6.0, 5.0, 3.0, 9.0, 9.0, 15.0, 19.0, 24.0, 36.0, 72.0, 97.0, 198.0, 367.0, 700.0, 1501.0, 3333.0, 9044.0, 28027.0, 97554.0, 293362.0, 382472.0, 161451.0, 46940.0, 13984.0, 5029.0, 2169.0, 972.0, 521.0, 262.0, 131.0, 97.0, 51.0, 29.0, 16.0, 13.0, 8.0, 8.0, 9.0, 3.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.7607421875, -0.7391357421875, -0.717529296875, -0.6959228515625, -0.67431640625, -0.6527099609375, -0.631103515625, -0.6094970703125, -0.587890625, -0.5662841796875, -0.544677734375, -0.5230712890625, -0.50146484375, -0.4798583984375, -0.458251953125, -0.4366455078125, -0.4150390625, -0.3934326171875, -0.371826171875, -0.3502197265625, -0.32861328125, -0.3070068359375, -0.285400390625, -0.2637939453125, -0.2421875, -0.2205810546875, -0.198974609375, -0.1773681640625, -0.15576171875, -0.1341552734375, -0.112548828125, -0.0909423828125, -0.0693359375, -0.0477294921875, -0.026123046875, -0.0045166015625, 0.01708984375, 0.0386962890625, 0.060302734375, 0.0819091796875, 0.103515625, 0.1251220703125, 0.146728515625, 0.1683349609375, 0.18994140625, 0.2115478515625, 0.233154296875, 0.2547607421875, 0.2763671875, 0.2979736328125, 0.319580078125, 0.3411865234375, 0.36279296875, 0.3843994140625, 0.406005859375, 0.4276123046875, 0.44921875, 0.4708251953125, 0.492431640625, 0.5140380859375, 0.53564453125, 0.5572509765625, 0.578857421875, 0.6004638671875, 0.6220703125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 6.0, 2.0, 6.0, 2.0, 6.0, 10.0, 14.0, 11.0, 17.0, 23.0, 27.0, 35.0, 30.0, 38.0, 41.0, 41.0, 51.0, 60.0, 53.0, 71.0, 41.0, 64.0, 38.0, 45.0, 43.0, 39.0, 35.0, 28.0, 31.0, 21.0, 15.0, 13.0, 10.0, 11.0, 12.0, 9.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.344482421875, -0.33420562744140625, -0.3239288330078125, -0.31365203857421875, -0.303375244140625, -0.29309844970703125, -0.2828216552734375, -0.27254486083984375, -0.26226806640625, -0.25199127197265625, -0.2417144775390625, -0.23143768310546875, -0.221160888671875, -0.21088409423828125, -0.2006072998046875, -0.19033050537109375, -0.1800537109375, -0.16977691650390625, -0.1595001220703125, -0.14922332763671875, -0.138946533203125, -0.12866973876953125, -0.1183929443359375, -0.10811614990234375, -0.09783935546875, -0.08756256103515625, -0.0772857666015625, -0.06700897216796875, -0.056732177734375, -0.04645538330078125, -0.0361785888671875, -0.02590179443359375, -0.015625, -0.00534820556640625, 0.0049285888671875, 0.01520538330078125, 0.025482177734375, 0.03575897216796875, 0.0460357666015625, 0.05631256103515625, 0.06658935546875, 0.07686614990234375, 0.0871429443359375, 0.09741973876953125, 0.107696533203125, 0.11797332763671875, 0.1282501220703125, 0.13852691650390625, 0.1488037109375, 0.15908050537109375, 0.1693572998046875, 0.17963409423828125, 0.189910888671875, 0.20018768310546875, 0.2104644775390625, 0.22074127197265625, 0.23101806640625, 0.24129486083984375, 0.2515716552734375, 0.26184844970703125, 0.272125244140625, 0.28240203857421875, 0.2926788330078125, 0.30295562744140625, 0.313232421875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 5.0, 2.0, 6.0, 5.0, 5.0, 10.0, 7.0, 11.0, 11.0, 23.0, 26.0, 25.0, 44.0, 63.0, 91.0, 118.0, 252.0, 465.0, 984.0, 2740.0, 10557.0, 52131.0, 284272.0, 580017.0, 92336.0, 17556.0, 4143.0, 1324.0, 540.0, 246.0, 159.0, 112.0, 72.0, 41.0, 44.0, 33.0, 24.0, 12.0, 14.0, 6.0, 6.0, 6.0, 6.0, 4.0, 4.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.7841796875, -0.75531005859375, -0.7264404296875, -0.69757080078125, -0.668701171875, -0.63983154296875, -0.6109619140625, -0.58209228515625, -0.55322265625, -0.52435302734375, -0.4954833984375, -0.46661376953125, -0.437744140625, -0.40887451171875, -0.3800048828125, -0.35113525390625, -0.322265625, -0.29339599609375, -0.2645263671875, -0.23565673828125, -0.206787109375, -0.17791748046875, -0.1490478515625, -0.12017822265625, -0.09130859375, -0.06243896484375, -0.0335693359375, -0.00469970703125, 0.024169921875, 0.05303955078125, 0.0819091796875, 0.11077880859375, 0.1396484375, 0.16851806640625, 0.1973876953125, 0.22625732421875, 0.255126953125, 0.28399658203125, 0.3128662109375, 0.34173583984375, 0.37060546875, 0.39947509765625, 0.4283447265625, 0.45721435546875, 0.486083984375, 0.51495361328125, 0.5438232421875, 0.57269287109375, 0.6015625, 0.63043212890625, 0.6593017578125, 0.68817138671875, 0.717041015625, 0.74591064453125, 0.7747802734375, 0.80364990234375, 0.83251953125, 0.86138916015625, 0.8902587890625, 0.91912841796875, 0.947998046875, 0.97686767578125, 1.0057373046875, 1.03460693359375, 1.0634765625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 0.0, 2.0, 2.0, 7.0, 5.0, 5.0, 2.0, 2.0, 7.0, 15.0, 7.0, 14.0, 12.0, 16.0, 23.0, 18.0, 22.0, 21.0, 21.0, 33.0, 27.0, 22.0, 44.0, 37.0, 41.0, 41.0, 31.0, 45.0, 53.0, 48.0, 28.0, 36.0, 42.0, 23.0, 43.0, 22.0, 25.0, 20.0, 25.0, 22.0, 18.0, 13.0, 9.0, 8.0, 7.0, 9.0, 8.0, 8.0, 4.0, 2.0, 4.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.82568359375, -0.797943115234375, -0.77020263671875, -0.742462158203125, -0.7147216796875, -0.686981201171875, -0.65924072265625, -0.631500244140625, -0.603759765625, -0.576019287109375, -0.54827880859375, -0.520538330078125, -0.4927978515625, -0.465057373046875, -0.43731689453125, -0.409576416015625, -0.3818359375, -0.354095458984375, -0.32635498046875, -0.298614501953125, -0.2708740234375, -0.243133544921875, -0.21539306640625, -0.187652587890625, -0.159912109375, -0.132171630859375, -0.10443115234375, -0.076690673828125, -0.0489501953125, -0.021209716796875, 0.00653076171875, 0.034271240234375, 0.06201171875, 0.089752197265625, 0.11749267578125, 0.145233154296875, 0.1729736328125, 0.200714111328125, 0.22845458984375, 0.256195068359375, 0.283935546875, 0.311676025390625, 0.33941650390625, 0.367156982421875, 0.3948974609375, 0.422637939453125, 0.45037841796875, 0.478118896484375, 0.505859375, 0.533599853515625, 0.56134033203125, 0.589080810546875, 0.6168212890625, 0.644561767578125, 0.67230224609375, 0.700042724609375, 0.727783203125, 0.755523681640625, 0.78326416015625, 0.811004638671875, 0.8387451171875, 0.866485595703125, 0.89422607421875, 0.921966552734375, 0.94970703125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 4.0, 7.0, 12.0, 11.0, 23.0, 51.0, 54.0, 107.0, 199.0, 513.0, 1148.0, 3274.0, 9840.0, 34891.0, 138254.0, 585450.0, 203091.0, 50680.0, 13853.0, 4364.0, 1496.0, 609.0, 274.0, 131.0, 78.0, 55.0, 32.0, 16.0, 10.0, 13.0, 6.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2978515625, -0.2897224426269531, -0.28159332275390625, -0.2734642028808594, -0.2653350830078125, -0.2572059631347656, -0.24907684326171875, -0.24094772338867188, -0.232818603515625, -0.22468948364257812, -0.21656036376953125, -0.20843124389648438, -0.2003021240234375, -0.19217300415039062, -0.18404388427734375, -0.17591476440429688, -0.16778564453125, -0.15965652465820312, -0.15152740478515625, -0.14339828491210938, -0.1352691650390625, -0.12714004516601562, -0.11901092529296875, -0.11088180541992188, -0.102752685546875, -0.09462356567382812, -0.08649444580078125, -0.07836532592773438, -0.0702362060546875, -0.062107086181640625, -0.05397796630859375, -0.045848846435546875, -0.0377197265625, -0.029590606689453125, -0.02146148681640625, -0.013332366943359375, -0.0052032470703125, 0.002925872802734375, 0.01105499267578125, 0.019184112548828125, 0.027313232421875, 0.035442352294921875, 0.04357147216796875, 0.051700592041015625, 0.0598297119140625, 0.06795883178710938, 0.07608795166015625, 0.08421707153320312, 0.09234619140625, 0.10047531127929688, 0.10860443115234375, 0.11673355102539062, 0.1248626708984375, 0.13299179077148438, 0.14112091064453125, 0.14925003051757812, 0.157379150390625, 0.16550827026367188, 0.17363739013671875, 0.18176651000976562, 0.1898956298828125, 0.19802474975585938, 0.20615386962890625, 0.21428298950195312, 0.222412109375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 12.0, 8.0, 13.0, 11.0, 18.0, 22.0, 26.0, 53.0, 60.0, 80.0, 112.0, 91.0, 88.0, 92.0, 71.0, 59.0, 60.0, 24.0, 34.0, 24.0, 11.0, 13.0, 8.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.104873657226562e-05, -6.851367652416229e-05, -6.597861647605896e-05, -6.344355642795563e-05, -6.0908496379852295e-05, -5.837343633174896e-05, -5.583837628364563e-05, -5.33033162355423e-05, -5.0768256187438965e-05, -4.823319613933563e-05, -4.56981360912323e-05, -4.316307604312897e-05, -4.0628015995025635e-05, -3.80929559469223e-05, -3.555789589881897e-05, -3.302283585071564e-05, -3.0487775802612305e-05, -2.7952715754508972e-05, -2.541765570640564e-05, -2.2882595658302307e-05, -2.0347535610198975e-05, -1.7812475562095642e-05, -1.527741551399231e-05, -1.2742355465888977e-05, -1.0207295417785645e-05, -7.672235369682312e-06, -5.1371753215789795e-06, -2.602115273475647e-06, -6.705522537231445e-08, 2.468004822731018e-06, 5.003064870834351e-06, 7.538124918937683e-06, 1.0073184967041016e-05, 1.2608245015144348e-05, 1.514330506324768e-05, 1.7678365111351013e-05, 2.0213425159454346e-05, 2.2748485207557678e-05, 2.528354525566101e-05, 2.7818605303764343e-05, 3.0353665351867676e-05, 3.288872539997101e-05, 3.542378544807434e-05, 3.795884549617767e-05, 4.0493905544281006e-05, 4.302896559238434e-05, 4.556402564048767e-05, 4.8099085688591003e-05, 5.0634145736694336e-05, 5.316920578479767e-05, 5.5704265832901e-05, 5.8239325881004333e-05, 6.0774385929107666e-05, 6.3309445977211e-05, 6.584450602531433e-05, 6.837956607341766e-05, 7.0914626121521e-05, 7.344968616962433e-05, 7.598474621772766e-05, 7.8519806265831e-05, 8.105486631393433e-05, 8.358992636203766e-05, 8.612498641014099e-05, 8.866004645824432e-05, 9.119510650634766e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 1.0, 3.0, 10.0, 5.0, 18.0, 15.0, 20.0, 34.0, 70.0, 142.0, 355.0, 880.0, 3080.0, 16347.0, 124212.0, 708727.0, 167404.0, 21554.0, 3868.0, 1019.0, 401.0, 156.0, 90.0, 58.0, 25.0, 16.0, 20.0, 8.0, 9.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4814453125, -0.468963623046875, -0.45648193359375, -0.444000244140625, -0.4315185546875, -0.419036865234375, -0.40655517578125, -0.394073486328125, -0.381591796875, -0.369110107421875, -0.35662841796875, -0.344146728515625, -0.3316650390625, -0.319183349609375, -0.30670166015625, -0.294219970703125, -0.28173828125, -0.269256591796875, -0.25677490234375, -0.244293212890625, -0.2318115234375, -0.219329833984375, -0.20684814453125, -0.194366455078125, -0.181884765625, -0.169403076171875, -0.15692138671875, -0.144439697265625, -0.1319580078125, -0.119476318359375, -0.10699462890625, -0.094512939453125, -0.08203125, -0.069549560546875, -0.05706787109375, -0.044586181640625, -0.0321044921875, -0.019622802734375, -0.00714111328125, 0.005340576171875, 0.017822265625, 0.030303955078125, 0.04278564453125, 0.055267333984375, 0.0677490234375, 0.080230712890625, 0.09271240234375, 0.105194091796875, 0.11767578125, 0.130157470703125, 0.14263916015625, 0.155120849609375, 0.1676025390625, 0.180084228515625, 0.19256591796875, 0.205047607421875, 0.217529296875, 0.230010986328125, 0.24249267578125, 0.254974365234375, 0.2674560546875, 0.279937744140625, 0.29241943359375, 0.304901123046875, 0.3173828125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 2.0, 6.0, 4.0, 11.0, 8.0, 20.0, 19.0, 28.0, 32.0, 37.0, 38.0, 51.0, 60.0, 56.0, 65.0, 70.0, 82.0, 60.0, 62.0, 62.0, 44.0, 38.0, 27.0, 17.0, 22.0, 23.0, 14.0, 8.0, 13.0, 8.0, 5.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.23779296875, -0.22983932495117188, -0.22188568115234375, -0.21393203735351562, -0.2059783935546875, -0.19802474975585938, -0.19007110595703125, -0.18211746215820312, -0.174163818359375, -0.16621017456054688, -0.15825653076171875, -0.15030288696289062, -0.1423492431640625, -0.13439559936523438, -0.12644195556640625, -0.11848831176757812, -0.11053466796875, -0.10258102416992188, -0.09462738037109375, -0.08667373657226562, -0.0787200927734375, -0.07076644897460938, -0.06281280517578125, -0.054859161376953125, -0.046905517578125, -0.038951873779296875, -0.03099822998046875, -0.023044586181640625, -0.0150909423828125, -0.007137298583984375, 0.00081634521484375, 0.008769989013671875, 0.0167236328125, 0.024677276611328125, 0.03263092041015625, 0.040584564208984375, 0.0485382080078125, 0.056491851806640625, 0.06444549560546875, 0.07239913940429688, 0.080352783203125, 0.08830642700195312, 0.09626007080078125, 0.10421371459960938, 0.1121673583984375, 0.12012100219726562, 0.12807464599609375, 0.13602828979492188, 0.14398193359375, 0.15193557739257812, 0.15988922119140625, 0.16784286499023438, 0.1757965087890625, 0.18375015258789062, 0.19170379638671875, 0.19965744018554688, 0.207611083984375, 0.21556472778320312, 0.22351837158203125, 0.23147201538085938, 0.2394256591796875, 0.24737930297851562, 0.25533294677734375, 0.2632865905761719, 0.271240234375]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 10.0, 12.0, 22.0, 75.0, 130.0, 308.0, 255.0, 120.0, 43.0, 13.0, 2.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.501377105712891, -7.187282085418701, -6.873187065124512, -6.559092044830322, -6.244997024536133, -5.930902004241943, -5.616806983947754, -5.302712440490723, -4.988616943359375, -4.6745219230651855, -4.360426902770996, -4.046331882476807, -3.732236862182617, -3.4181418418884277, -3.1040470600128174, -2.789952039718628, -2.4758572578430176, -2.161762237548828, -1.8476672172546387, -1.5335723161697388, -1.2194772958755493, -0.9053822755813599, -0.59128737449646, -0.2771923542022705, 0.036902666091918945, 0.350997656583786, 0.6650926470756531, 0.9791876077651978, 1.2932826280593872, 1.6073776483535767, 1.9214725494384766, 2.235567569732666, 2.5496625900268555, 2.863757610321045, 3.1778526306152344, 3.491947650909424, 3.8060426712036133, 4.120137691497803, 4.434232711791992, 4.748327255249023, 5.062422752380371, 5.3765177726745605, 5.69061279296875, 6.0047078132629395, 6.318802833557129, 6.632897853851318, 6.946992874145508, 7.261087417602539, 7.5751824378967285, 7.889277458190918, 8.20337200164795, 8.517467498779297, 8.831562042236328, 9.145657539367676, 9.459752082824707, 9.773847579956055, 10.087942123413086, 10.402036666870117, 10.716132164001465, 11.030226707458496, 11.344322204589844, 11.658416748046875, 11.972512245178223, 12.286606788635254, 12.600702285766602]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 6.0, 3.0, 4.0, 13.0, 5.0, 8.0, 9.0, 14.0, 16.0, 7.0, 23.0, 12.0, 14.0, 25.0, 28.0, 24.0, 30.0, 30.0, 46.0, 48.0, 70.0, 75.0, 70.0, 56.0, 49.0, 28.0, 42.0, 32.0, 35.0, 27.0, 25.0, 20.0, 16.0, 23.0, 12.0, 13.0, 11.0, 7.0, 4.0, 6.0, 4.0, 4.0, 3.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.901181221008301, -3.781344413757324, -3.6615076065063477, -3.541670799255371, -3.4218339920043945, -3.301997184753418, -3.1821601390838623, -3.0623233318328857, -2.942486524581909, -2.8226497173309326, -2.702812910079956, -2.5829761028289795, -2.463139057159424, -2.3433022499084473, -2.2234654426574707, -2.103628635406494, -1.9837918281555176, -1.863955020904541, -1.7441182136535645, -1.6242812871932983, -1.5044444799423218, -1.3846076726913452, -1.264770746231079, -1.1449339389801025, -1.025097131729126, -0.9052603244781494, -0.7854234576225281, -0.6655865907669067, -0.5457497835159302, -0.4259129762649536, -0.3060761094093323, -0.18623924255371094, -0.06640267372131348, 0.053434163331985474, 0.17327100038528442, 0.2931078374385834, 0.4129446744918823, 0.5327814817428589, 0.6526183485984802, 0.7724552154541016, 0.8922920227050781, 1.0121288299560547, 1.1319656372070312, 1.2518025636672974, 1.371639370918274, 1.4914761781692505, 1.6113131046295166, 1.7311499118804932, 1.8509867191314697, 1.9708235263824463, 2.090660333633423, 2.2104971408843994, 2.330334186553955, 2.4501709938049316, 2.570007801055908, 2.6898446083068848, 2.8096814155578613, 2.929518222808838, 3.0493550300598145, 3.169191837310791, 3.2890286445617676, 3.408865451812744, 3.5287024974823, 3.6485393047332764, 3.768376111984253]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 6.0, 6.0, 3.0, 7.0, 9.0, 21.0, 32.0, 39.0, 64.0, 170.0, 263.0, 520.0, 1288.0, 4660.0, 27286.0, 464856.0, 3397993.0, 270234.0, 20425.0, 4023.0, 1270.0, 505.0, 247.0, 169.0, 68.0, 46.0, 30.0, 15.0, 11.0, 6.0, 3.0, 3.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7236328125, -0.6981735229492188, -0.6727142333984375, -0.6472549438476562, -0.621795654296875, -0.5963363647460938, -0.5708770751953125, -0.5454177856445312, -0.51995849609375, -0.49449920654296875, -0.4690399169921875, -0.44358062744140625, -0.418121337890625, -0.39266204833984375, -0.3672027587890625, -0.34174346923828125, -0.3162841796875, -0.29082489013671875, -0.2653656005859375, -0.23990631103515625, -0.214447021484375, -0.18898773193359375, -0.1635284423828125, -0.13806915283203125, -0.11260986328125, -0.08715057373046875, -0.0616912841796875, -0.03623199462890625, -0.010772705078125, 0.01468658447265625, 0.0401458740234375, 0.06560516357421875, 0.091064453125, 0.11652374267578125, 0.1419830322265625, 0.16744232177734375, 0.192901611328125, 0.21836090087890625, 0.2438201904296875, 0.26927947998046875, 0.29473876953125, 0.32019805908203125, 0.3456573486328125, 0.37111663818359375, 0.396575927734375, 0.42203521728515625, 0.4474945068359375, 0.47295379638671875, 0.4984130859375, 0.5238723754882812, 0.5493316650390625, 0.5747909545898438, 0.600250244140625, 0.6257095336914062, 0.6511688232421875, 0.6766281127929688, 0.70208740234375, 0.7275466918945312, 0.7530059814453125, 0.7784652709960938, 0.803924560546875, 0.8293838500976562, 0.8548431396484375, 0.8803024291992188, 0.90576171875]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 6.0, 4.0, 7.0, 10.0, 14.0, 12.0, 14.0, 25.0, 14.0, 33.0, 25.0, 36.0, 41.0, 38.0, 58.0, 51.0, 50.0, 64.0, 50.0, 52.0, 49.0, 51.0, 38.0, 39.0, 42.0, 29.0, 38.0, 18.0, 20.0, 14.0, 17.0, 12.0, 13.0, 6.0, 6.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.354736328125, -0.34516143798828125, -0.3355865478515625, -0.32601165771484375, -0.316436767578125, -0.30686187744140625, -0.2972869873046875, -0.28771209716796875, -0.27813720703125, -0.26856231689453125, -0.2589874267578125, -0.24941253662109375, -0.239837646484375, -0.23026275634765625, -0.2206878662109375, -0.21111297607421875, -0.2015380859375, -0.19196319580078125, -0.1823883056640625, -0.17281341552734375, -0.163238525390625, -0.15366363525390625, -0.1440887451171875, -0.13451385498046875, -0.12493896484375, -0.11536407470703125, -0.1057891845703125, -0.09621429443359375, -0.086639404296875, -0.07706451416015625, -0.0674896240234375, -0.05791473388671875, -0.04833984375, -0.03876495361328125, -0.0291900634765625, -0.01961517333984375, -0.010040283203125, -0.00046539306640625, 0.0091094970703125, 0.01868438720703125, 0.02825927734375, 0.03783416748046875, 0.0474090576171875, 0.05698394775390625, 0.066558837890625, 0.07613372802734375, 0.0857086181640625, 0.09528350830078125, 0.1048583984375, 0.11443328857421875, 0.1240081787109375, 0.13358306884765625, 0.143157958984375, 0.15273284912109375, 0.1623077392578125, 0.17188262939453125, 0.18145751953125, 0.19103240966796875, 0.2006072998046875, 0.21018218994140625, 0.219757080078125, 0.22933197021484375, 0.2389068603515625, 0.24848175048828125, 0.258056640625]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 5.0, 8.0, 7.0, 14.0, 27.0, 19.0, 40.0, 62.0, 101.0, 260.0, 680.0, 2533.0, 16629.0, 340358.0, 3715270.0, 107181.0, 8685.0, 1505.0, 443.0, 192.0, 109.0, 45.0, 43.0, 20.0, 17.0, 16.0, 5.0, 6.0, 3.0, 1.0, 0.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.060546875, -1.0244140625, -0.98828125, -0.9521484375, -0.916015625, -0.8798828125, -0.84375, -0.8076171875, -0.771484375, -0.7353515625, -0.69921875, -0.6630859375, -0.626953125, -0.5908203125, -0.5546875, -0.5185546875, -0.482421875, -0.4462890625, -0.41015625, -0.3740234375, -0.337890625, -0.3017578125, -0.265625, -0.2294921875, -0.193359375, -0.1572265625, -0.12109375, -0.0849609375, -0.048828125, -0.0126953125, 0.0234375, 0.0595703125, 0.095703125, 0.1318359375, 0.16796875, 0.2041015625, 0.240234375, 0.2763671875, 0.3125, 0.3486328125, 0.384765625, 0.4208984375, 0.45703125, 0.4931640625, 0.529296875, 0.5654296875, 0.6015625, 0.6376953125, 0.673828125, 0.7099609375, 0.74609375, 0.7822265625, 0.818359375, 0.8544921875, 0.890625, 0.9267578125, 0.962890625, 0.9990234375, 1.03515625, 1.0712890625, 1.107421875, 1.1435546875, 1.1796875, 1.2158203125, 1.251953125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 3.0, 7.0, 11.0, 9.0, 21.0, 22.0, 22.0, 44.0, 53.0, 87.0, 123.0, 212.0, 364.0, 559.0, 681.0, 634.0, 461.0, 297.0, 156.0, 100.0, 72.0, 33.0, 33.0, 24.0, 11.0, 10.0, 9.0, 8.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.666015625, -0.642852783203125, -0.61968994140625, -0.596527099609375, -0.5733642578125, -0.550201416015625, -0.52703857421875, -0.503875732421875, -0.480712890625, -0.457550048828125, -0.43438720703125, -0.411224365234375, -0.3880615234375, -0.364898681640625, -0.34173583984375, -0.318572998046875, -0.29541015625, -0.272247314453125, -0.24908447265625, -0.225921630859375, -0.2027587890625, -0.179595947265625, -0.15643310546875, -0.133270263671875, -0.110107421875, -0.086944580078125, -0.06378173828125, -0.040618896484375, -0.0174560546875, 0.005706787109375, 0.02886962890625, 0.052032470703125, 0.0751953125, 0.098358154296875, 0.12152099609375, 0.144683837890625, 0.1678466796875, 0.191009521484375, 0.21417236328125, 0.237335205078125, 0.260498046875, 0.283660888671875, 0.30682373046875, 0.329986572265625, 0.3531494140625, 0.376312255859375, 0.39947509765625, 0.422637939453125, 0.44580078125, 0.468963623046875, 0.49212646484375, 0.515289306640625, 0.5384521484375, 0.561614990234375, 0.58477783203125, 0.607940673828125, 0.631103515625, 0.654266357421875, 0.67742919921875, 0.700592041015625, 0.7237548828125, 0.746917724609375, 0.77008056640625, 0.793243408203125, 0.81640625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 6.0, 2.0, 5.0, 3.0, 11.0, 18.0, 24.0, 73.0, 124.0, 208.0, 187.0, 151.0, 102.0, 59.0, 20.0, 8.0, 8.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.71927261352539, -11.491153717041016, -11.263035774230957, -11.034916877746582, -10.806797981262207, -10.578680038452148, -10.350561141967773, -10.122442245483398, -9.894323348999023, -9.666204452514648, -9.43808650970459, -9.209967613220215, -8.98184871673584, -8.753730773925781, -8.525611877441406, -8.297492980957031, -8.069375038146973, -7.841256618499756, -7.613137722015381, -7.385019302368164, -7.156900405883789, -6.928781986236572, -6.7006635665893555, -6.4725446701049805, -6.244426250457764, -6.016307830810547, -5.788188934326172, -5.560070514678955, -5.331952095031738, -5.103833198547363, -4.8757147789001465, -4.64759635925293, -4.419477462768555, -4.191359043121338, -3.963240146636963, -3.735121726989746, -3.50700306892395, -3.2788844108581543, -3.0507659912109375, -2.8226473331451416, -2.5945284366607666, -2.3664097785949707, -2.138291358947754, -1.910172700881958, -1.682054042816162, -1.4539353847503662, -1.2258168458938599, -0.9976983070373535, -0.7695796489715576, -0.5414610505104065, -0.31334245204925537, -0.08522385358810425, 0.14289474487304688, 0.3710134029388428, 0.5991319417953491, 0.8272504806518555, 1.0553691387176514, 1.2834877967834473, 1.5116063356399536, 1.73972487449646, 1.9678435325622559, 2.1959621906280518, 2.4240808486938477, 2.6521992683410645, 2.8803179264068604]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 1.0, 0.0, 3.0, 4.0, 7.0, 3.0, 2.0, 8.0, 5.0, 8.0, 11.0, 9.0, 14.0, 21.0, 13.0, 18.0, 20.0, 24.0, 20.0, 28.0, 44.0, 30.0, 49.0, 33.0, 47.0, 38.0, 38.0, 42.0, 43.0, 37.0, 36.0, 31.0, 40.0, 27.0, 29.0, 35.0, 25.0, 23.0, 17.0, 17.0, 14.0, 15.0, 13.0, 16.0, 7.0, 9.0, 9.0, 5.0, 4.0, 5.0, 2.0, 6.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-2.3750357627868652, -2.2939507961273193, -2.2128658294677734, -2.1317811012268066, -2.0506961345672607, -1.9696111679077148, -1.888526201248169, -1.807441234588623, -1.7263563871383667, -1.6452714204788208, -1.5641865730285645, -1.4831016063690186, -1.4020166397094727, -1.3209317922592163, -1.2398468255996704, -1.158761978149414, -1.0776770114898682, -0.996592104434967, -0.9155071973800659, -0.83442223072052, -0.7533373236656189, -0.6722524166107178, -0.5911674499511719, -0.5100825428962708, -0.42899763584136963, -0.3479127287864685, -0.266827791929245, -0.18574286997318268, -0.10465794801712036, -0.02357304096221924, 0.05751189589500427, 0.13859683275222778, 0.2196817398071289, 0.30076664686203003, 0.38185158371925354, 0.46293652057647705, 0.5440214276313782, 0.6251063346862793, 0.7061913013458252, 0.7872762084007263, 0.8683611154556274, 0.9494460225105286, 1.0305309295654297, 1.1116158962249756, 1.1927008628845215, 1.2737857103347778, 1.3548706769943237, 1.43595552444458, 1.517040491104126, 1.5981254577636719, 1.6792103052139282, 1.7602952718734741, 1.8413801193237305, 1.9224650859832764, 2.0035500526428223, 2.084635019302368, 2.165719985961914, 2.24680495262146, 2.327889919281006, 2.4089746475219727, 2.4900596141815186, 2.5711445808410645, 2.6522295475006104, 2.7333145141601562, 2.814399242401123]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 7.0, 5.0, 14.0, 26.0, 33.0, 41.0, 58.0, 110.0, 183.0, 325.0, 675.0, 1429.0, 3384.0, 8822.0, 24978.0, 76902.0, 241540.0, 409023.0, 188995.0, 59803.0, 19926.0, 7028.0, 2795.0, 1183.0, 537.0, 333.0, 151.0, 75.0, 63.0, 46.0, 17.0, 14.0, 16.0, 10.0, 3.0, 7.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.6279296875, -0.6091232299804688, -0.5903167724609375, -0.5715103149414062, -0.552703857421875, -0.5338973999023438, -0.5150909423828125, -0.49628448486328125, -0.47747802734375, -0.45867156982421875, -0.4398651123046875, -0.42105865478515625, -0.402252197265625, -0.38344573974609375, -0.3646392822265625, -0.34583282470703125, -0.3270263671875, -0.30821990966796875, -0.2894134521484375, -0.27060699462890625, -0.251800537109375, -0.23299407958984375, -0.2141876220703125, -0.19538116455078125, -0.17657470703125, -0.15776824951171875, -0.1389617919921875, -0.12015533447265625, -0.101348876953125, -0.08254241943359375, -0.0637359619140625, -0.04492950439453125, -0.026123046875, -0.00731658935546875, 0.0114898681640625, 0.03029632568359375, 0.049102783203125, 0.06790924072265625, 0.0867156982421875, 0.10552215576171875, 0.12432861328125, 0.14313507080078125, 0.1619415283203125, 0.18074798583984375, 0.199554443359375, 0.21836090087890625, 0.2371673583984375, 0.25597381591796875, 0.2747802734375, 0.29358673095703125, 0.3123931884765625, 0.33119964599609375, 0.350006103515625, 0.36881256103515625, 0.3876190185546875, 0.40642547607421875, 0.42523193359375, 0.44403839111328125, 0.4628448486328125, 0.48165130615234375, 0.500457763671875, 0.5192642211914062, 0.5380706787109375, 0.5568771362304688, 0.57568359375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 4.0, 1.0, 3.0, 5.0, 2.0, 6.0, 10.0, 19.0, 13.0, 17.0, 22.0, 25.0, 34.0, 33.0, 33.0, 41.0, 44.0, 53.0, 52.0, 53.0, 58.0, 58.0, 52.0, 53.0, 54.0, 36.0, 33.0, 34.0, 30.0, 23.0, 18.0, 18.0, 14.0, 17.0, 10.0, 9.0, 3.0, 8.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.361572265625, -0.3513336181640625, -0.341094970703125, -0.3308563232421875, -0.32061767578125, -0.3103790283203125, -0.300140380859375, -0.2899017333984375, -0.2796630859375, -0.2694244384765625, -0.259185791015625, -0.2489471435546875, -0.23870849609375, -0.2284698486328125, -0.218231201171875, -0.2079925537109375, -0.19775390625, -0.1875152587890625, -0.177276611328125, -0.1670379638671875, -0.15679931640625, -0.1465606689453125, -0.136322021484375, -0.1260833740234375, -0.1158447265625, -0.1056060791015625, -0.095367431640625, -0.0851287841796875, -0.07489013671875, -0.0646514892578125, -0.054412841796875, -0.0441741943359375, -0.033935546875, -0.0236968994140625, -0.013458251953125, -0.0032196044921875, 0.00701904296875, 0.0172576904296875, 0.027496337890625, 0.0377349853515625, 0.0479736328125, 0.0582122802734375, 0.068450927734375, 0.0786895751953125, 0.08892822265625, 0.0991668701171875, 0.109405517578125, 0.1196441650390625, 0.1298828125, 0.1401214599609375, 0.150360107421875, 0.1605987548828125, 0.17083740234375, 0.1810760498046875, 0.191314697265625, 0.2015533447265625, 0.2117919921875, 0.2220306396484375, 0.232269287109375, 0.2425079345703125, 0.25274658203125, 0.2629852294921875, 0.273223876953125, 0.2834625244140625, 0.293701171875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 5.0, 7.0, 6.0, 13.0, 18.0, 29.0, 49.0, 66.0, 112.0, 151.0, 292.0, 472.0, 972.0, 2669.0, 13201.0, 129919.0, 791153.0, 94442.0, 10557.0, 2349.0, 852.0, 454.0, 282.0, 181.0, 111.0, 79.0, 44.0, 29.0, 18.0, 10.0, 9.0, 2.0, 6.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98291015625, -0.9457321166992188, -0.9085540771484375, -0.8713760375976562, -0.834197998046875, -0.7970199584960938, -0.7598419189453125, -0.7226638793945312, -0.68548583984375, -0.6483078002929688, -0.6111297607421875, -0.5739517211914062, -0.536773681640625, -0.49959564208984375, -0.4624176025390625, -0.42523956298828125, -0.3880615234375, -0.35088348388671875, -0.3137054443359375, -0.27652740478515625, -0.239349365234375, -0.20217132568359375, -0.1649932861328125, -0.12781524658203125, -0.09063720703125, -0.05345916748046875, -0.0162811279296875, 0.02089691162109375, 0.058074951171875, 0.09525299072265625, 0.1324310302734375, 0.16960906982421875, 0.206787109375, 0.24396514892578125, 0.2811431884765625, 0.31832122802734375, 0.355499267578125, 0.39267730712890625, 0.4298553466796875, 0.46703338623046875, 0.50421142578125, 0.5413894653320312, 0.5785675048828125, 0.6157455444335938, 0.652923583984375, 0.6901016235351562, 0.7272796630859375, 0.7644577026367188, 0.8016357421875, 0.8388137817382812, 0.8759918212890625, 0.9131698608398438, 0.950347900390625, 0.9875259399414062, 1.0247039794921875, 1.0618820190429688, 1.09906005859375, 1.1362380981445312, 1.1734161376953125, 1.2105941772460938, 1.247772216796875, 1.2849502563476562, 1.3221282958984375, 1.3593063354492188, 1.396484375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 6.0, 3.0, 7.0, 5.0, 3.0, 9.0, 13.0, 10.0, 15.0, 23.0, 21.0, 30.0, 42.0, 55.0, 56.0, 52.0, 53.0, 50.0, 64.0, 70.0, 68.0, 53.0, 34.0, 50.0, 42.0, 29.0, 29.0, 19.0, 15.0, 19.0, 13.0, 12.0, 12.0, 5.0, 7.0, 2.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.4384765625, -1.4010162353515625, -1.363555908203125, -1.3260955810546875, -1.28863525390625, -1.2511749267578125, -1.213714599609375, -1.1762542724609375, -1.1387939453125, -1.1013336181640625, -1.063873291015625, -1.0264129638671875, -0.98895263671875, -0.9514923095703125, -0.914031982421875, -0.8765716552734375, -0.839111328125, -0.8016510009765625, -0.764190673828125, -0.7267303466796875, -0.68927001953125, -0.6518096923828125, -0.614349365234375, -0.5768890380859375, -0.5394287109375, -0.5019683837890625, -0.464508056640625, -0.4270477294921875, -0.38958740234375, -0.3521270751953125, -0.314666748046875, -0.2772064208984375, -0.23974609375, -0.2022857666015625, -0.164825439453125, -0.1273651123046875, -0.08990478515625, -0.0524444580078125, -0.014984130859375, 0.0224761962890625, 0.0599365234375, 0.0973968505859375, 0.134857177734375, 0.1723175048828125, 0.20977783203125, 0.2472381591796875, 0.284698486328125, 0.3221588134765625, 0.359619140625, 0.3970794677734375, 0.434539794921875, 0.4720001220703125, 0.50946044921875, 0.5469207763671875, 0.584381103515625, 0.6218414306640625, 0.6593017578125, 0.6967620849609375, 0.734222412109375, 0.7716827392578125, 0.80914306640625, 0.8466033935546875, 0.884063720703125, 0.9215240478515625, 0.958984375]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 4.0, 5.0, 3.0, 5.0, 3.0, 8.0, 12.0, 15.0, 18.0, 36.0, 71.0, 113.0, 193.0, 334.0, 648.0, 1411.0, 3080.0, 7769.0, 22140.0, 73021.0, 521404.0, 315494.0, 68660.0, 21082.0, 7400.0, 2969.0, 1237.0, 624.0, 315.0, 185.0, 103.0, 73.0, 39.0, 25.0, 20.0, 10.0, 4.0, 8.0, 6.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.241455078125, -0.2339038848876953, -0.22635269165039062, -0.21880149841308594, -0.21125030517578125, -0.20369911193847656, -0.19614791870117188, -0.1885967254638672, -0.1810455322265625, -0.1734943389892578, -0.16594314575195312, -0.15839195251464844, -0.15084075927734375, -0.14328956604003906, -0.13573837280273438, -0.1281871795654297, -0.120635986328125, -0.11308479309082031, -0.10553359985351562, -0.09798240661621094, -0.09043121337890625, -0.08288002014160156, -0.07532882690429688, -0.06777763366699219, -0.0602264404296875, -0.05267524719238281, -0.045124053955078125, -0.03757286071777344, -0.03002166748046875, -0.022470474243164062, -0.014919281005859375, -0.0073680877685546875, 0.00018310546875, 0.0077342987060546875, 0.015285491943359375, 0.022836685180664062, 0.03038787841796875, 0.03793907165527344, 0.045490264892578125, 0.05304145812988281, 0.0605926513671875, 0.06814384460449219, 0.07569503784179688, 0.08324623107910156, 0.09079742431640625, 0.09834861755371094, 0.10589981079101562, 0.11345100402832031, 0.121002197265625, 0.1285533905029297, 0.13610458374023438, 0.14365577697753906, 0.15120697021484375, 0.15875816345214844, 0.16630935668945312, 0.1738605499267578, 0.1814117431640625, 0.1889629364013672, 0.19651412963867188, 0.20406532287597656, 0.21161651611328125, 0.21916770935058594, 0.22671890258789062, 0.2342700958251953, 0.2418212890625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 8.0, 14.0, 27.0, 34.0, 28.0, 61.0, 67.0, 131.0, 128.0, 117.0, 93.0, 72.0, 62.0, 26.0, 35.0, 26.0, 15.0, 9.0, 11.0, 5.0, 2.0, 5.0, 3.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011235475540161133, -0.00010947417467832565, -0.00010659359395503998, -0.0001037130132317543, -0.00010083243250846863, -9.795185178518295e-05, -9.507127106189728e-05, -9.21906903386116e-05, -8.931010961532593e-05, -8.642952889204025e-05, -8.354894816875458e-05, -8.06683674454689e-05, -7.778778672218323e-05, -7.490720599889755e-05, -7.202662527561188e-05, -6.91460445523262e-05, -6.626546382904053e-05, -6.338488310575485e-05, -6.050430238246918e-05, -5.76237216591835e-05, -5.474314093589783e-05, -5.186256021261215e-05, -4.898197948932648e-05, -4.61013987660408e-05, -4.322081804275513e-05, -4.034023731946945e-05, -3.745965659618378e-05, -3.45790758728981e-05, -3.169849514961243e-05, -2.8817914426326752e-05, -2.5937333703041077e-05, -2.30567529797554e-05, -2.0176172256469727e-05, -1.729559153318405e-05, -1.4415010809898376e-05, -1.1534430086612701e-05, -8.653849363327026e-06, -5.773268640041351e-06, -2.8926879167556763e-06, -1.210719347000122e-08, 2.868473529815674e-06, 5.749054253101349e-06, 8.629634976387024e-06, 1.1510215699672699e-05, 1.4390796422958374e-05, 1.727137714624405e-05, 2.0151957869529724e-05, 2.30325385928154e-05, 2.5913119316101074e-05, 2.879370003938675e-05, 3.1674280762672424e-05, 3.45548614859581e-05, 3.7435442209243774e-05, 4.031602293252945e-05, 4.3196603655815125e-05, 4.60771843791008e-05, 4.8957765102386475e-05, 5.183834582567215e-05, 5.4718926548957825e-05, 5.75995072722435e-05, 6.0480087995529175e-05, 6.336066871881485e-05, 6.624124944210052e-05, 6.91218301653862e-05, 7.200241088867188e-05]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 5.0, 8.0, 15.0, 23.0, 17.0, 28.0, 54.0, 60.0, 107.0, 156.0, 255.0, 420.0, 749.0, 1487.0, 3322.0, 8438.0, 23998.0, 79742.0, 544925.0, 284143.0, 66698.0, 20406.0, 7379.0, 2941.0, 1341.0, 721.0, 425.0, 245.0, 141.0, 108.0, 61.0, 39.0, 26.0, 25.0, 12.0, 11.0, 6.0, 6.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.27197265625, -0.26460838317871094, -0.2572441101074219, -0.2498798370361328, -0.24251556396484375, -0.2351512908935547, -0.22778701782226562, -0.22042274475097656, -0.2130584716796875, -0.20569419860839844, -0.19832992553710938, -0.1909656524658203, -0.18360137939453125, -0.1762371063232422, -0.16887283325195312, -0.16150856018066406, -0.154144287109375, -0.14678001403808594, -0.13941574096679688, -0.1320514678955078, -0.12468719482421875, -0.11732292175292969, -0.10995864868164062, -0.10259437561035156, -0.0952301025390625, -0.08786582946777344, -0.08050155639648438, -0.07313728332519531, -0.06577301025390625, -0.05840873718261719, -0.051044464111328125, -0.04368019104003906, -0.03631591796875, -0.028951644897460938, -0.021587371826171875, -0.014223098754882812, -0.00685882568359375, 0.0005054473876953125, 0.007869720458984375, 0.015233993530273438, 0.0225982666015625, 0.029962539672851562, 0.037326812744140625, 0.04469108581542969, 0.05205535888671875, 0.05941963195800781, 0.06678390502929688, 0.07414817810058594, 0.081512451171875, 0.08887672424316406, 0.09624099731445312, 0.10360527038574219, 0.11096954345703125, 0.11833381652832031, 0.12569808959960938, 0.13306236267089844, 0.1404266357421875, 0.14779090881347656, 0.15515518188476562, 0.1625194549560547, 0.16988372802734375, 0.1772480010986328, 0.18461227416992188, 0.19197654724121094, 0.1993408203125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 7.0, 1.0, 4.0, 6.0, 4.0, 6.0, 6.0, 6.0, 13.0, 18.0, 16.0, 24.0, 40.0, 45.0, 54.0, 71.0, 96.0, 103.0, 96.0, 80.0, 67.0, 66.0, 39.0, 46.0, 23.0, 17.0, 10.0, 13.0, 8.0, 7.0, 10.0, 0.0, 5.0, 0.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3251953125, -0.3161773681640625, -0.307159423828125, -0.2981414794921875, -0.28912353515625, -0.2801055908203125, -0.271087646484375, -0.2620697021484375, -0.2530517578125, -0.2440338134765625, -0.235015869140625, -0.2259979248046875, -0.21697998046875, -0.2079620361328125, -0.198944091796875, -0.1899261474609375, -0.180908203125, -0.1718902587890625, -0.162872314453125, -0.1538543701171875, -0.14483642578125, -0.1358184814453125, -0.126800537109375, -0.1177825927734375, -0.1087646484375, -0.0997467041015625, -0.090728759765625, -0.0817108154296875, -0.07269287109375, -0.0636749267578125, -0.054656982421875, -0.0456390380859375, -0.03662109375, -0.0276031494140625, -0.018585205078125, -0.0095672607421875, -0.00054931640625, 0.0084686279296875, 0.017486572265625, 0.0265045166015625, 0.0355224609375, 0.0445404052734375, 0.053558349609375, 0.0625762939453125, 0.07159423828125, 0.0806121826171875, 0.089630126953125, 0.0986480712890625, 0.107666015625, 0.1166839599609375, 0.125701904296875, 0.1347198486328125, 0.14373779296875, 0.1527557373046875, 0.161773681640625, 0.1707916259765625, 0.1798095703125, 0.1888275146484375, 0.197845458984375, 0.2068634033203125, 0.21588134765625, 0.2248992919921875, 0.233917236328125, 0.2429351806640625, 0.251953125]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 6.0, 1.0, 9.0, 24.0, 34.0, 82.0, 148.0, 302.0, 211.0, 90.0, 41.0, 25.0, 10.0, 6.0, 6.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.440393447875977, -6.190920829772949, -5.941448211669922, -5.6919755935668945, -5.442502975463867, -5.19303035736084, -4.9435577392578125, -4.694085121154785, -4.444612503051758, -4.1951398849487305, -3.945667266845703, -3.696194648742676, -3.4467220306396484, -3.197249412536621, -2.9477767944335938, -2.6983041763305664, -2.448831558227539, -2.1993589401245117, -1.9498863220214844, -1.700413703918457, -1.4509410858154297, -1.2014684677124023, -0.951995849609375, -0.7025232315063477, -0.4530506134033203, -0.20357799530029297, 0.045894622802734375, 0.2953672409057617, 0.5448398590087891, 0.7943124771118164, 1.0437850952148438, 1.293257713317871, 1.5427303314208984, 1.7922029495239258, 2.041675567626953, 2.2911481857299805, 2.540620803833008, 2.790093421936035, 3.0395660400390625, 3.28903865814209, 3.538511276245117, 3.7879838943481445, 4.037456512451172, 4.286929130554199, 4.536401748657227, 4.785874366760254, 5.035346984863281, 5.284819602966309, 5.534292221069336, 5.783764839172363, 6.033237457275391, 6.282710075378418, 6.532182693481445, 6.781655311584473, 7.0311279296875, 7.280600547790527, 7.530073165893555, 7.779545783996582, 8.02901840209961, 8.278491020202637, 8.527963638305664, 8.777436256408691, 9.026908874511719, 9.276381492614746, 9.525854110717773]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 6.0, 2.0, 6.0, 5.0, 6.0, 11.0, 11.0, 13.0, 18.0, 22.0, 23.0, 22.0, 23.0, 21.0, 19.0, 39.0, 52.0, 87.0, 95.0, 124.0, 66.0, 43.0, 34.0, 32.0, 31.0, 22.0, 23.0, 25.0, 22.0, 14.0, 21.0, 11.0, 12.0, 7.0, 12.0, 7.0, 3.0, 4.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.863215923309326, -3.7298285961151123, -3.5964412689208984, -3.4630539417266846, -3.3296666145324707, -3.1962790489196777, -3.062891721725464, -2.92950439453125, -2.796117067337036, -2.6627297401428223, -2.5293424129486084, -2.3959550857543945, -2.2625675201416016, -2.129180431365967, -1.9957928657531738, -1.86240553855896, -1.729018211364746, -1.5956308841705322, -1.4622435569763184, -1.328856110572815, -1.195468783378601, -1.0620814561843872, -0.9286940693855286, -0.7953066825866699, -0.661919355392456, -0.5285320281982422, -0.39514464139938354, -0.2617572844028473, -0.12836992740631104, 0.005017399787902832, 0.13840478658676147, 0.2717921733856201, 0.4051799774169922, 0.538567304611206, 0.6719546914100647, 0.8053420782089233, 0.9387294054031372, 1.072116732597351, 1.2055041790008545, 1.3388915061950684, 1.4722788333892822, 1.605666160583496, 1.73905348777771, 1.8724409341812134, 2.005828380584717, 2.1392154693603516, 2.2726030349731445, 2.4059903621673584, 2.5393776893615723, 2.672765016555786, 2.80615234375, 2.939539670944214, 3.0729269981384277, 3.2063145637512207, 3.3397018909454346, 3.4730892181396484, 3.6064765453338623, 3.739863872528076, 3.87325119972229, 4.006638526916504, 4.140026092529297, 4.273413181304932, 4.406800746917725, 4.540187835693359, 4.673575401306152]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 10.0, 8.0, 5.0, 16.0, 17.0, 15.0, 43.0, 47.0, 75.0, 99.0, 120.0, 221.0, 393.0, 701.0, 1649.0, 4536.0, 20237.0, 186845.0, 2503499.0, 1370087.0, 86796.0, 12657.0, 3371.0, 1311.0, 585.0, 279.0, 194.0, 118.0, 78.0, 70.0, 54.0, 32.0, 35.0, 19.0, 14.0, 14.0, 4.0, 8.0, 8.0, 2.0, 3.0, 2.0, 6.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.69482421875, -0.67230224609375, -0.6497802734375, -0.62725830078125, -0.604736328125, -0.58221435546875, -0.5596923828125, -0.53717041015625, -0.5146484375, -0.49212646484375, -0.4696044921875, -0.44708251953125, -0.424560546875, -0.40203857421875, -0.3795166015625, -0.35699462890625, -0.33447265625, -0.31195068359375, -0.2894287109375, -0.26690673828125, -0.244384765625, -0.22186279296875, -0.1993408203125, -0.17681884765625, -0.154296875, -0.13177490234375, -0.1092529296875, -0.08673095703125, -0.064208984375, -0.04168701171875, -0.0191650390625, 0.00335693359375, 0.02587890625, 0.04840087890625, 0.0709228515625, 0.09344482421875, 0.115966796875, 0.13848876953125, 0.1610107421875, 0.18353271484375, 0.2060546875, 0.22857666015625, 0.2510986328125, 0.27362060546875, 0.296142578125, 0.31866455078125, 0.3411865234375, 0.36370849609375, 0.38623046875, 0.40875244140625, 0.4312744140625, 0.45379638671875, 0.476318359375, 0.49884033203125, 0.5213623046875, 0.54388427734375, 0.56640625, 0.58892822265625, 0.6114501953125, 0.63397216796875, 0.656494140625, 0.67901611328125, 0.7015380859375, 0.72406005859375, 0.74658203125]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 4.0, 4.0, 6.0, 5.0, 8.0, 17.0, 23.0, 15.0, 22.0, 34.0, 30.0, 43.0, 35.0, 46.0, 61.0, 60.0, 52.0, 77.0, 57.0, 68.0, 41.0, 46.0, 33.0, 37.0, 31.0, 35.0, 28.0, 23.0, 20.0, 10.0, 7.0, 5.0, 8.0, 7.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.37158203125, -0.36011505126953125, -0.3486480712890625, -0.33718109130859375, -0.325714111328125, -0.31424713134765625, -0.3027801513671875, -0.29131317138671875, -0.27984619140625, -0.26837921142578125, -0.2569122314453125, -0.24544525146484375, -0.233978271484375, -0.22251129150390625, -0.2110443115234375, -0.19957733154296875, -0.1881103515625, -0.17664337158203125, -0.1651763916015625, -0.15370941162109375, -0.142242431640625, -0.13077545166015625, -0.1193084716796875, -0.10784149169921875, -0.09637451171875, -0.08490753173828125, -0.0734405517578125, -0.06197357177734375, -0.050506591796875, -0.03903961181640625, -0.0275726318359375, -0.01610565185546875, -0.004638671875, 0.00682830810546875, 0.0182952880859375, 0.02976226806640625, 0.041229248046875, 0.05269622802734375, 0.0641632080078125, 0.07563018798828125, 0.08709716796875, 0.09856414794921875, 0.1100311279296875, 0.12149810791015625, 0.132965087890625, 0.14443206787109375, 0.1558990478515625, 0.16736602783203125, 0.1788330078125, 0.19029998779296875, 0.2017669677734375, 0.21323394775390625, 0.224700927734375, 0.23616790771484375, 0.2476348876953125, 0.25910186767578125, 0.27056884765625, 0.28203582763671875, 0.2935028076171875, 0.30496978759765625, 0.316436767578125, 0.32790374755859375, 0.3393707275390625, 0.35083770751953125, 0.3623046875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 9.0, 5.0, 18.0, 12.0, 35.0, 39.0, 98.0, 185.0, 362.0, 952.0, 4239.0, 59308.0, 3882484.0, 236301.0, 8056.0, 1333.0, 435.0, 190.0, 101.0, 49.0, 23.0, 16.0, 14.0, 7.0, 6.0, 6.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.373046875, -1.32513427734375, -1.2772216796875, -1.22930908203125, -1.181396484375, -1.13348388671875, -1.0855712890625, -1.03765869140625, -0.98974609375, -0.94183349609375, -0.8939208984375, -0.84600830078125, -0.798095703125, -0.75018310546875, -0.7022705078125, -0.65435791015625, -0.6064453125, -0.55853271484375, -0.5106201171875, -0.46270751953125, -0.414794921875, -0.36688232421875, -0.3189697265625, -0.27105712890625, -0.22314453125, -0.17523193359375, -0.1273193359375, -0.07940673828125, -0.031494140625, 0.01641845703125, 0.0643310546875, 0.11224365234375, 0.16015625, 0.20806884765625, 0.2559814453125, 0.30389404296875, 0.351806640625, 0.39971923828125, 0.4476318359375, 0.49554443359375, 0.54345703125, 0.59136962890625, 0.6392822265625, 0.68719482421875, 0.735107421875, 0.78302001953125, 0.8309326171875, 0.87884521484375, 0.9267578125, 0.97467041015625, 1.0225830078125, 1.07049560546875, 1.118408203125, 1.16632080078125, 1.2142333984375, 1.26214599609375, 1.31005859375, 1.35797119140625, 1.4058837890625, 1.45379638671875, 1.501708984375, 1.54962158203125, 1.5975341796875, 1.64544677734375, 1.693359375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 7.0, 6.0, 6.0, 13.0, 16.0, 27.0, 42.0, 70.0, 144.0, 259.0, 490.0, 798.0, 920.0, 620.0, 303.0, 158.0, 84.0, 52.0, 22.0, 15.0, 9.0, 5.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.591796875, -1.5557022094726562, -1.5196075439453125, -1.4835128784179688, -1.447418212890625, -1.4113235473632812, -1.3752288818359375, -1.3391342163085938, -1.30303955078125, -1.2669448852539062, -1.2308502197265625, -1.1947555541992188, -1.158660888671875, -1.1225662231445312, -1.0864715576171875, -1.0503768920898438, -1.0142822265625, -0.9781875610351562, -0.9420928955078125, -0.9059982299804688, -0.869903564453125, -0.8338088989257812, -0.7977142333984375, -0.7616195678710938, -0.72552490234375, -0.6894302368164062, -0.6533355712890625, -0.6172409057617188, -0.581146240234375, -0.5450515747070312, -0.5089569091796875, -0.47286224365234375, -0.436767578125, -0.40067291259765625, -0.3645782470703125, -0.32848358154296875, -0.292388916015625, -0.25629425048828125, -0.2201995849609375, -0.18410491943359375, -0.14801025390625, -0.11191558837890625, -0.0758209228515625, -0.03972625732421875, -0.003631591796875, 0.03246307373046875, 0.0685577392578125, 0.10465240478515625, 0.1407470703125, 0.17684173583984375, 0.2129364013671875, 0.24903106689453125, 0.285125732421875, 0.32122039794921875, 0.3573150634765625, 0.39340972900390625, 0.42950439453125, 0.46559906005859375, 0.5016937255859375, 0.5377883911132812, 0.573883056640625, 0.6099777221679688, 0.6460723876953125, 0.6821670532226562, 0.71826171875]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 8.0, 13.0, 7.0, 24.0, 33.0, 59.0, 91.0, 133.0, 163.0, 166.0, 124.0, 81.0, 48.0, 21.0, 12.0, 7.0, 5.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.421880722045898, -10.205196380615234, -9.988511085510254, -9.77182674407959, -9.55514144897461, -9.338457107543945, -9.121771812438965, -8.9050874710083, -8.68840217590332, -8.471717834472656, -8.255032539367676, -8.038348197937012, -7.821662902832031, -7.604978084564209, -7.388293266296387, -7.1716084480285645, -6.954923629760742, -6.73823881149292, -6.521553993225098, -6.304869174957275, -6.088184356689453, -5.871499538421631, -5.654814720153809, -5.438129901885986, -5.221445560455322, -5.0047607421875, -4.788075923919678, -4.5713911056518555, -4.354706287384033, -4.138021469116211, -3.9213366508483887, -3.7046518325805664, -3.487967014312744, -3.271282196044922, -3.0545973777770996, -2.8379125595092773, -2.621227741241455, -2.404542922973633, -2.1878581047058105, -1.9711734056472778, -1.7544885873794556, -1.5378037691116333, -1.321118950843811, -1.1044342517852783, -0.8877493739128113, -0.6710646152496338, -0.4543797969818115, -0.23769497871398926, -0.021010160446166992, 0.19567464292049408, 0.41235944628715515, 0.629044234752655, 0.8457290530204773, 1.0624138116836548, 1.279098629951477, 1.4957834482192993, 1.7124682664871216, 1.9291530847549438, 2.1458377838134766, 2.362522602081299, 2.579207420349121, 2.7958922386169434, 3.0125770568847656, 3.229261875152588, 3.44594669342041]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 1.0, 5.0, 3.0, 5.0, 5.0, 8.0, 14.0, 17.0, 14.0, 12.0, 21.0, 24.0, 27.0, 31.0, 31.0, 30.0, 38.0, 49.0, 43.0, 55.0, 30.0, 44.0, 50.0, 45.0, 48.0, 49.0, 44.0, 31.0, 27.0, 28.0, 25.0, 27.0, 20.0, 16.0, 16.0, 13.0, 10.0, 13.0, 8.0, 7.0, 12.0, 4.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2860822677612305, -3.170498847961426, -3.054915189743042, -2.9393317699432373, -2.8237481117248535, -2.708164691925049, -2.592581033706665, -2.4769976139068604, -2.3614139556884766, -2.245830535888672, -2.130246877670288, -2.0146634578704834, -1.8990797996520996, -1.783496379852295, -1.6679127216339111, -1.5523293018341064, -1.4367457628250122, -1.321162223815918, -1.2055786848068237, -1.0899951457977295, -0.9744116067886353, -0.8588281273841858, -0.7432445883750916, -0.6276610493659973, -0.5120775103569031, -0.39649397134780884, -0.2809104323387146, -0.16532692313194275, -0.04974338412284851, 0.06584012508392334, 0.18142366409301758, 0.2970072031021118, 0.41259074211120605, 0.5281742811203003, 0.6437578201293945, 0.7593413591384888, 0.874924898147583, 0.9905083775520325, 1.1060919761657715, 1.2216753959655762, 1.33725905418396, 1.4528425931930542, 1.5684261322021484, 1.6840096712112427, 1.799593210220337, 1.9151766300201416, 2.0307602882385254, 2.14634370803833, 2.2619271278381348, 2.3775105476379395, 2.4930942058563232, 2.608677625656128, 2.7242612838745117, 2.8398447036743164, 2.9554283618927, 3.071011781692505, 3.1865954399108887, 3.3021788597106934, 3.417762517929077, 3.533345937728882, 3.6489295959472656, 3.7645130157470703, 3.880096673965454, 3.995680093765259, 4.111263751983643]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 12.0, 9.0, 29.0, 33.0, 61.0, 88.0, 147.0, 235.0, 425.0, 724.0, 1229.0, 2125.0, 3859.0, 7358.0, 14876.0, 31952.0, 75326.0, 188256.0, 350697.0, 215737.0, 85345.0, 35673.0, 16345.0, 8180.0, 4284.0, 2364.0, 1308.0, 747.0, 440.0, 243.0, 151.0, 97.0, 85.0, 39.0, 25.0, 16.0, 8.0, 7.0, 6.0, 2.0, 3.0, 1.0, 4.0, 1.0, 0.0, 1.0], "bins": [-0.5830078125, -0.5670661926269531, -0.5511245727539062, -0.5351829528808594, -0.5192413330078125, -0.5032997131347656, -0.48735809326171875, -0.4714164733886719, -0.455474853515625, -0.4395332336425781, -0.42359161376953125, -0.4076499938964844, -0.3917083740234375, -0.3757667541503906, -0.35982513427734375, -0.3438835144042969, -0.32794189453125, -0.3120002746582031, -0.29605865478515625, -0.2801170349121094, -0.2641754150390625, -0.24823379516601562, -0.23229217529296875, -0.21635055541992188, -0.200408935546875, -0.18446731567382812, -0.16852569580078125, -0.15258407592773438, -0.1366424560546875, -0.12070083618164062, -0.10475921630859375, -0.08881759643554688, -0.0728759765625, -0.056934356689453125, -0.04099273681640625, -0.025051116943359375, -0.0091094970703125, 0.006832122802734375, 0.02277374267578125, 0.038715362548828125, 0.054656982421875, 0.07059860229492188, 0.08654022216796875, 0.10248184204101562, 0.1184234619140625, 0.13436508178710938, 0.15030670166015625, 0.16624832153320312, 0.18218994140625, 0.19813156127929688, 0.21407318115234375, 0.23001480102539062, 0.2459564208984375, 0.2618980407714844, 0.27783966064453125, 0.2937812805175781, 0.309722900390625, 0.3256645202636719, 0.34160614013671875, 0.3575477600097656, 0.3734893798828125, 0.3894309997558594, 0.40537261962890625, 0.4213142395019531, 0.437255859375]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 7.0, 4.0, 9.0, 7.0, 11.0, 9.0, 11.0, 18.0, 28.0, 41.0, 26.0, 42.0, 33.0, 51.0, 42.0, 43.0, 57.0, 55.0, 76.0, 65.0, 50.0, 41.0, 39.0, 38.0, 34.0, 28.0, 33.0, 24.0, 11.0, 14.0, 13.0, 7.0, 14.0, 9.0, 1.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.373779296875, -0.3620338439941406, -0.35028839111328125, -0.3385429382324219, -0.3267974853515625, -0.3150520324707031, -0.30330657958984375, -0.2915611267089844, -0.279815673828125, -0.2680702209472656, -0.25632476806640625, -0.24457931518554688, -0.2328338623046875, -0.22108840942382812, -0.20934295654296875, -0.19759750366210938, -0.18585205078125, -0.17410659790039062, -0.16236114501953125, -0.15061569213867188, -0.1388702392578125, -0.12712478637695312, -0.11537933349609375, -0.10363388061523438, -0.091888427734375, -0.08014297485351562, -0.06839752197265625, -0.056652069091796875, -0.0449066162109375, -0.033161163330078125, -0.02141571044921875, -0.009670257568359375, 0.0020751953125, 0.013820648193359375, 0.02556610107421875, 0.037311553955078125, 0.0490570068359375, 0.060802459716796875, 0.07254791259765625, 0.08429336547851562, 0.096038818359375, 0.10778427124023438, 0.11952972412109375, 0.13127517700195312, 0.1430206298828125, 0.15476608276367188, 0.16651153564453125, 0.17825698852539062, 0.19000244140625, 0.20174789428710938, 0.21349334716796875, 0.22523880004882812, 0.2369842529296875, 0.24872970581054688, 0.26047515869140625, 0.2722206115722656, 0.283966064453125, 0.2957115173339844, 0.30745697021484375, 0.3192024230957031, 0.3309478759765625, 0.3426933288574219, 0.35443878173828125, 0.3661842346191406, 0.3779296875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 5.0, 6.0, 4.0, 5.0, 14.0, 13.0, 16.0, 25.0, 32.0, 41.0, 55.0, 83.0, 107.0, 148.0, 216.0, 287.0, 471.0, 765.0, 1352.0, 3118.0, 12097.0, 106648.0, 822981.0, 83297.0, 10430.0, 2788.0, 1300.0, 747.0, 455.0, 277.0, 210.0, 132.0, 119.0, 73.0, 55.0, 44.0, 30.0, 29.0, 24.0, 9.0, 12.0, 6.0, 8.0, 5.0, 2.0, 4.0, 0.0, 6.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.123046875, -1.0849151611328125, -1.046783447265625, -1.0086517333984375, -0.97052001953125, -0.9323883056640625, -0.894256591796875, -0.8561248779296875, -0.8179931640625, -0.7798614501953125, -0.741729736328125, -0.7035980224609375, -0.66546630859375, -0.6273345947265625, -0.589202880859375, -0.5510711669921875, -0.512939453125, -0.4748077392578125, -0.436676025390625, -0.3985443115234375, -0.36041259765625, -0.3222808837890625, -0.284149169921875, -0.2460174560546875, -0.2078857421875, -0.1697540283203125, -0.131622314453125, -0.0934906005859375, -0.05535888671875, -0.0172271728515625, 0.020904541015625, 0.0590362548828125, 0.09716796875, 0.1352996826171875, 0.173431396484375, 0.2115631103515625, 0.24969482421875, 0.2878265380859375, 0.325958251953125, 0.3640899658203125, 0.4022216796875, 0.4403533935546875, 0.478485107421875, 0.5166168212890625, 0.55474853515625, 0.5928802490234375, 0.631011962890625, 0.6691436767578125, 0.707275390625, 0.7454071044921875, 0.783538818359375, 0.8216705322265625, 0.85980224609375, 0.8979339599609375, 0.936065673828125, 0.9741973876953125, 1.0123291015625, 1.0504608154296875, 1.088592529296875, 1.1267242431640625, 1.16485595703125, 1.2029876708984375, 1.241119384765625, 1.2792510986328125, 1.3173828125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 3.0, 5.0, 7.0, 8.0, 20.0, 15.0, 17.0, 17.0, 22.0, 30.0, 29.0, 41.0, 33.0, 44.0, 68.0, 75.0, 72.0, 65.0, 53.0, 54.0, 46.0, 47.0, 38.0, 42.0, 25.0, 26.0, 15.0, 14.0, 16.0, 8.0, 5.0, 5.0, 7.0, 7.0, 5.0, 2.0, 2.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5498046875, -1.5025177001953125, -1.455230712890625, -1.4079437255859375, -1.36065673828125, -1.3133697509765625, -1.266082763671875, -1.2187957763671875, -1.1715087890625, -1.1242218017578125, -1.076934814453125, -1.0296478271484375, -0.98236083984375, -0.9350738525390625, -0.887786865234375, -0.8404998779296875, -0.793212890625, -0.7459259033203125, -0.698638916015625, -0.6513519287109375, -0.60406494140625, -0.5567779541015625, -0.509490966796875, -0.4622039794921875, -0.4149169921875, -0.3676300048828125, -0.320343017578125, -0.2730560302734375, -0.22576904296875, -0.1784820556640625, -0.131195068359375, -0.0839080810546875, -0.03662109375, 0.0106658935546875, 0.057952880859375, 0.1052398681640625, 0.15252685546875, 0.1998138427734375, 0.247100830078125, 0.2943878173828125, 0.3416748046875, 0.3889617919921875, 0.436248779296875, 0.4835357666015625, 0.53082275390625, 0.5781097412109375, 0.625396728515625, 0.6726837158203125, 0.719970703125, 0.7672576904296875, 0.814544677734375, 0.8618316650390625, 0.90911865234375, 0.9564056396484375, 1.003692626953125, 1.0509796142578125, 1.0982666015625, 1.1455535888671875, 1.192840576171875, 1.2401275634765625, 1.28741455078125, 1.3347015380859375, 1.381988525390625, 1.4292755126953125, 1.4765625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 6.0, 4.0, 11.0, 16.0, 16.0, 22.0, 38.0, 58.0, 95.0, 185.0, 443.0, 1019.0, 3183.0, 13945.0, 104359.0, 826193.0, 82502.0, 11945.0, 2707.0, 931.0, 351.0, 194.0, 117.0, 80.0, 48.0, 22.0, 19.0, 18.0, 13.0, 6.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.346435546875, -0.3360786437988281, -0.32572174072265625, -0.3153648376464844, -0.3050079345703125, -0.2946510314941406, -0.28429412841796875, -0.2739372253417969, -0.263580322265625, -0.2532234191894531, -0.24286651611328125, -0.23250961303710938, -0.2221527099609375, -0.21179580688476562, -0.20143890380859375, -0.19108200073242188, -0.18072509765625, -0.17036819458007812, -0.16001129150390625, -0.14965438842773438, -0.1392974853515625, -0.12894058227539062, -0.11858367919921875, -0.10822677612304688, -0.097869873046875, -0.08751296997070312, -0.07715606689453125, -0.06679916381835938, -0.0564422607421875, -0.046085357666015625, -0.03572845458984375, -0.025371551513671875, -0.0150146484375, -0.004657745361328125, 0.00569915771484375, 0.016056060791015625, 0.0264129638671875, 0.036769866943359375, 0.04712677001953125, 0.057483673095703125, 0.067840576171875, 0.07819747924804688, 0.08855438232421875, 0.09891128540039062, 0.1092681884765625, 0.11962509155273438, 0.12998199462890625, 0.14033889770507812, 0.15069580078125, 0.16105270385742188, 0.17140960693359375, 0.18176651000976562, 0.1921234130859375, 0.20248031616210938, 0.21283721923828125, 0.22319412231445312, 0.233551025390625, 0.24390792846679688, 0.25426483154296875, 0.2646217346191406, 0.2749786376953125, 0.2853355407714844, 0.29569244384765625, 0.3060493469238281, 0.31640625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 12.0, 13.0, 11.0, 15.0, 21.0, 23.0, 20.0, 25.0, 28.0, 46.0, 47.0, 69.0, 61.0, 94.0, 80.0, 66.0, 49.0, 56.0, 42.0, 34.0, 26.0, 30.0, 19.0, 17.0, 13.0, 10.0, 10.0, 10.0, 7.0, 6.0, 6.0, 1.0, 3.0, 4.0, 4.0, 0.0, 5.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.125999450683594e-05, -4.9598515033721924e-05, -4.793703556060791e-05, -4.6275556087493896e-05, -4.461407661437988e-05, -4.295259714126587e-05, -4.1291117668151855e-05, -3.962963819503784e-05, -3.796815872192383e-05, -3.6306679248809814e-05, -3.46451997756958e-05, -3.298372030258179e-05, -3.1322240829467773e-05, -2.966076135635376e-05, -2.7999281883239746e-05, -2.6337802410125732e-05, -2.467632293701172e-05, -2.3014843463897705e-05, -2.135336399078369e-05, -1.9691884517669678e-05, -1.8030405044555664e-05, -1.636892557144165e-05, -1.4707446098327637e-05, -1.3045966625213623e-05, -1.138448715209961e-05, -9.723007678985596e-06, -8.061528205871582e-06, -6.400048732757568e-06, -4.738569259643555e-06, -3.077089786529541e-06, -1.4156103134155273e-06, 2.4586915969848633e-07, 1.9073486328125e-06, 3.5688281059265137e-06, 5.230307579040527e-06, 6.891787052154541e-06, 8.553266525268555e-06, 1.0214745998382568e-05, 1.1876225471496582e-05, 1.3537704944610596e-05, 1.519918441772461e-05, 1.6860663890838623e-05, 1.8522143363952637e-05, 2.018362283706665e-05, 2.1845102310180664e-05, 2.3506581783294678e-05, 2.516806125640869e-05, 2.6829540729522705e-05, 2.849102020263672e-05, 3.0152499675750732e-05, 3.1813979148864746e-05, 3.347545862197876e-05, 3.5136938095092773e-05, 3.679841756820679e-05, 3.84598970413208e-05, 4.0121376514434814e-05, 4.178285598754883e-05, 4.344433546066284e-05, 4.5105814933776855e-05, 4.676729440689087e-05, 4.842877388000488e-05, 5.0090253353118896e-05, 5.175173282623291e-05, 5.3413212299346924e-05, 5.507469177246094e-05]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 7.0, 9.0, 25.0, 32.0, 63.0, 93.0, 213.0, 483.0, 1031.0, 2874.0, 12381.0, 114285.0, 833485.0, 70880.0, 8844.0, 2262.0, 779.0, 376.0, 203.0, 83.0, 55.0, 38.0, 23.0, 12.0, 3.0, 6.0, 6.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.376220703125, -0.3658866882324219, -0.35555267333984375, -0.3452186584472656, -0.3348846435546875, -0.3245506286621094, -0.31421661376953125, -0.3038825988769531, -0.293548583984375, -0.2832145690917969, -0.27288055419921875, -0.2625465393066406, -0.2522125244140625, -0.24187850952148438, -0.23154449462890625, -0.22121047973632812, -0.21087646484375, -0.20054244995117188, -0.19020843505859375, -0.17987442016601562, -0.1695404052734375, -0.15920639038085938, -0.14887237548828125, -0.13853836059570312, -0.128204345703125, -0.11787033081054688, -0.10753631591796875, -0.09720230102539062, -0.0868682861328125, -0.07653427124023438, -0.06620025634765625, -0.055866241455078125, -0.0455322265625, -0.035198211669921875, -0.02486419677734375, -0.014530181884765625, -0.0041961669921875, 0.006137847900390625, 0.01647186279296875, 0.026805877685546875, 0.037139892578125, 0.047473907470703125, 0.05780792236328125, 0.06814193725585938, 0.0784759521484375, 0.08880996704101562, 0.09914398193359375, 0.10947799682617188, 0.11981201171875, 0.13014602661132812, 0.14048004150390625, 0.15081405639648438, 0.1611480712890625, 0.17148208618164062, 0.18181610107421875, 0.19215011596679688, 0.202484130859375, 0.21281814575195312, 0.22315216064453125, 0.23348617553710938, 0.2438201904296875, 0.2541542053222656, 0.26448822021484375, 0.2748222351074219, 0.28515625]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 7.0, 8.0, 15.0, 21.0, 20.0, 28.0, 44.0, 38.0, 58.0, 82.0, 90.0, 98.0, 88.0, 86.0, 70.0, 63.0, 56.0, 39.0, 23.0, 23.0, 10.0, 12.0, 5.0, 6.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.146240234375, -0.13813400268554688, -0.13002777099609375, -0.12192153930664062, -0.1138153076171875, -0.10570907592773438, -0.09760284423828125, -0.08949661254882812, -0.081390380859375, -0.07328414916992188, -0.06517791748046875, -0.057071685791015625, -0.0489654541015625, -0.040859222412109375, -0.03275299072265625, -0.024646759033203125, -0.01654052734375, -0.008434295654296875, -0.00032806396484375, 0.007778167724609375, 0.0158843994140625, 0.023990631103515625, 0.03209686279296875, 0.040203094482421875, 0.048309326171875, 0.056415557861328125, 0.06452178955078125, 0.07262802124023438, 0.0807342529296875, 0.08884048461914062, 0.09694671630859375, 0.10505294799804688, 0.1131591796875, 0.12126541137695312, 0.12937164306640625, 0.13747787475585938, 0.1455841064453125, 0.15369033813476562, 0.16179656982421875, 0.16990280151367188, 0.178009033203125, 0.18611526489257812, 0.19422149658203125, 0.20232772827148438, 0.2104339599609375, 0.21854019165039062, 0.22664642333984375, 0.23475265502929688, 0.24285888671875, 0.2509651184082031, 0.25907135009765625, 0.2671775817871094, 0.2752838134765625, 0.2833900451660156, 0.29149627685546875, 0.2996025085449219, 0.307708740234375, 0.3158149719238281, 0.32392120361328125, 0.3320274353027344, 0.3401336669921875, 0.3482398986816406, 0.35634613037109375, 0.3644523620605469, 0.37255859375]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 3.0, 4.0, 5.0, 10.0, 20.0, 49.0, 116.0, 336.0, 269.0, 91.0, 50.0, 24.0, 16.0, 7.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.845046997070312, -8.539398193359375, -8.233749389648438, -7.928099632263184, -7.622450828552246, -7.316802024841309, -7.011152744293213, -6.705503463745117, -6.39985466003418, -6.094205856323242, -5.7885565757751465, -5.482907295227051, -5.177258491516113, -4.871609687805176, -4.56596040725708, -4.260311126708984, -3.954662322998047, -3.6490132808685303, -3.3433642387390137, -3.037715196609497, -2.7320661544799805, -2.426417112350464, -2.1207680702209473, -1.8151190280914307, -1.509469985961914, -1.2038209438323975, -0.8981719017028809, -0.5925228595733643, -0.28687381744384766, 0.018775224685668945, 0.32442426681518555, 0.6300733089447021, 0.9357233047485352, 1.2413723468780518, 1.5470213890075684, 1.852670431137085, 2.1583194732666016, 2.463968515396118, 2.7696175575256348, 3.0752665996551514, 3.380915641784668, 3.6865646839141846, 3.992213726043701, 4.297863006591797, 4.603511810302734, 4.909160614013672, 5.214809894561768, 5.520459175109863, 5.826107978820801, 6.131756782531738, 6.437406063079834, 6.74305534362793, 7.048704147338867, 7.354352951049805, 7.6600022315979, 7.965651512145996, 8.271300315856934, 8.576949119567871, 8.882598876953125, 9.188247680664062, 9.493896484375, 9.799545288085938, 10.105194091796875, 10.410843849182129, 10.716492652893066]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 6.0, 4.0, 2.0, 5.0, 8.0, 5.0, 11.0, 13.0, 18.0, 12.0, 12.0, 23.0, 19.0, 22.0, 31.0, 26.0, 37.0, 25.0, 50.0, 99.0, 166.0, 89.0, 47.0, 39.0, 26.0, 22.0, 21.0, 22.0, 18.0, 19.0, 21.0, 14.0, 7.0, 6.0, 8.0, 8.0, 8.0, 7.0, 2.0, 4.0, 3.0, 2.0, 9.0, 3.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.150975227355957, -4.969811916351318, -4.788649082183838, -4.607485771179199, -4.426322937011719, -4.24515962600708, -4.063996315002441, -3.882833480834961, -3.7016704082489014, -3.520507335662842, -3.3393442630767822, -3.1581811904907227, -2.977017879486084, -2.7958550453186035, -2.614691734313965, -2.4335286617279053, -2.2523655891418457, -2.071202516555786, -1.8900394439697266, -1.7088762521743774, -1.5277131795883179, -1.3465501070022583, -1.1653869152069092, -0.9842238426208496, -0.80306077003479, -0.6218976974487305, -0.4407345652580261, -0.2595714330673218, -0.07840836048126221, 0.10275471210479736, 0.2839179039001465, 0.46508097648620605, 0.6462440490722656, 0.8274071216583252, 1.0085701942443848, 1.1897333860397339, 1.3708964586257935, 1.552059531211853, 1.7332227230072021, 1.9143857955932617, 2.0955488681793213, 2.276711940765381, 2.4578750133514404, 2.6390380859375, 2.8202013969421387, 3.001364231109619, 3.182527542114258, 3.3636906147003174, 3.544853687286377, 3.7260167598724365, 3.907179832458496, 4.088343143463135, 4.269505977630615, 4.450669288635254, 4.631832122802734, 4.812995433807373, 4.994158744812012, 5.17532205581665, 5.356484889984131, 5.5376482009887695, 5.71881103515625, 5.899974346160889, 6.081137657165527, 6.262300491333008, 6.443463325500488]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 4.0, 5.0, 7.0, 12.0, 10.0, 12.0, 26.0, 47.0, 71.0, 145.0, 244.0, 512.0, 943.0, 2315.0, 6326.0, 26968.0, 222531.0, 1947236.0, 1757586.0, 193331.0, 25206.0, 6272.0, 2332.0, 1036.0, 520.0, 257.0, 129.0, 80.0, 47.0, 26.0, 13.0, 10.0, 5.0, 7.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.85986328125, -0.8352737426757812, -0.8106842041015625, -0.7860946655273438, -0.761505126953125, -0.7369155883789062, -0.7123260498046875, -0.6877365112304688, -0.66314697265625, -0.6385574340820312, -0.6139678955078125, -0.5893783569335938, -0.564788818359375, -0.5401992797851562, -0.5156097412109375, -0.49102020263671875, -0.4664306640625, -0.44184112548828125, -0.4172515869140625, -0.39266204833984375, -0.368072509765625, -0.34348297119140625, -0.3188934326171875, -0.29430389404296875, -0.26971435546875, -0.24512481689453125, -0.2205352783203125, -0.19594573974609375, -0.171356201171875, -0.14676666259765625, -0.1221771240234375, -0.09758758544921875, -0.072998046875, -0.04840850830078125, -0.0238189697265625, 0.00077056884765625, 0.025360107421875, 0.04994964599609375, 0.0745391845703125, 0.09912872314453125, 0.12371826171875, 0.14830780029296875, 0.1728973388671875, 0.19748687744140625, 0.222076416015625, 0.24666595458984375, 0.2712554931640625, 0.29584503173828125, 0.3204345703125, 0.34502410888671875, 0.3696136474609375, 0.39420318603515625, 0.418792724609375, 0.44338226318359375, 0.4679718017578125, 0.49256134033203125, 0.51715087890625, 0.5417404174804688, 0.5663299560546875, 0.5909194946289062, 0.615509033203125, 0.6400985717773438, 0.6646881103515625, 0.6892776489257812, 0.7138671875]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 6.0, 7.0, 7.0, 9.0, 11.0, 18.0, 19.0, 20.0, 23.0, 31.0, 42.0, 37.0, 42.0, 71.0, 64.0, 60.0, 58.0, 66.0, 60.0, 49.0, 45.0, 35.0, 34.0, 24.0, 36.0, 24.0, 26.0, 19.0, 15.0, 12.0, 6.0, 3.0, 7.0, 5.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.361572265625, -0.3504638671875, -0.33935546875, -0.3282470703125, -0.317138671875, -0.3060302734375, -0.294921875, -0.2838134765625, -0.272705078125, -0.2615966796875, -0.25048828125, -0.2393798828125, -0.228271484375, -0.2171630859375, -0.2060546875, -0.1949462890625, -0.183837890625, -0.1727294921875, -0.16162109375, -0.1505126953125, -0.139404296875, -0.1282958984375, -0.1171875, -0.1060791015625, -0.094970703125, -0.0838623046875, -0.07275390625, -0.0616455078125, -0.050537109375, -0.0394287109375, -0.0283203125, -0.0172119140625, -0.006103515625, 0.0050048828125, 0.01611328125, 0.0272216796875, 0.038330078125, 0.0494384765625, 0.060546875, 0.0716552734375, 0.082763671875, 0.0938720703125, 0.10498046875, 0.1160888671875, 0.127197265625, 0.1383056640625, 0.1494140625, 0.1605224609375, 0.171630859375, 0.1827392578125, 0.19384765625, 0.2049560546875, 0.216064453125, 0.2271728515625, 0.23828125, 0.2493896484375, 0.260498046875, 0.2716064453125, 0.28271484375, 0.2938232421875, 0.304931640625, 0.3160400390625, 0.3271484375, 0.3382568359375, 0.349365234375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 6.0, 6.0, 6.0, 10.0, 8.0, 22.0, 20.0, 40.0, 47.0, 66.0, 83.0, 131.0, 211.0, 350.0, 604.0, 1140.0, 2553.0, 7780.0, 47250.0, 2319179.0, 1760310.0, 42195.0, 7201.0, 2410.0, 1094.0, 580.0, 318.0, 229.0, 150.0, 93.0, 51.0, 36.0, 30.0, 19.0, 17.0, 10.0, 8.0, 8.0, 8.0, 2.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6005859375, -1.5534820556640625, -1.506378173828125, -1.4592742919921875, -1.41217041015625, -1.3650665283203125, -1.317962646484375, -1.2708587646484375, -1.2237548828125, -1.1766510009765625, -1.129547119140625, -1.0824432373046875, -1.03533935546875, -0.9882354736328125, -0.941131591796875, -0.8940277099609375, -0.846923828125, -0.7998199462890625, -0.752716064453125, -0.7056121826171875, -0.65850830078125, -0.6114044189453125, -0.564300537109375, -0.5171966552734375, -0.4700927734375, -0.4229888916015625, -0.375885009765625, -0.3287811279296875, -0.28167724609375, -0.2345733642578125, -0.187469482421875, -0.1403656005859375, -0.09326171875, -0.0461578369140625, 0.000946044921875, 0.0480499267578125, 0.09515380859375, 0.1422576904296875, 0.189361572265625, 0.2364654541015625, 0.2835693359375, 0.3306732177734375, 0.377777099609375, 0.4248809814453125, 0.47198486328125, 0.5190887451171875, 0.566192626953125, 0.6132965087890625, 0.660400390625, 0.7075042724609375, 0.754608154296875, 0.8017120361328125, 0.84881591796875, 0.8959197998046875, 0.943023681640625, 0.9901275634765625, 1.0372314453125, 1.0843353271484375, 1.131439208984375, 1.1785430908203125, 1.22564697265625, 1.2727508544921875, 1.319854736328125, 1.3669586181640625, 1.4140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 8.0, 4.0, 5.0, 9.0, 12.0, 15.0, 13.0, 33.0, 48.0, 61.0, 80.0, 120.0, 179.0, 229.0, 337.0, 468.0, 558.0, 558.0, 407.0, 276.0, 209.0, 149.0, 92.0, 67.0, 44.0, 34.0, 21.0, 16.0, 11.0, 10.0, 2.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.4072265625, -1.3709259033203125, -1.334625244140625, -1.2983245849609375, -1.26202392578125, -1.2257232666015625, -1.189422607421875, -1.1531219482421875, -1.1168212890625, -1.0805206298828125, -1.044219970703125, -1.0079193115234375, -0.97161865234375, -0.9353179931640625, -0.899017333984375, -0.8627166748046875, -0.826416015625, -0.7901153564453125, -0.753814697265625, -0.7175140380859375, -0.68121337890625, -0.6449127197265625, -0.608612060546875, -0.5723114013671875, -0.5360107421875, -0.4997100830078125, -0.463409423828125, -0.4271087646484375, -0.39080810546875, -0.3545074462890625, -0.318206787109375, -0.2819061279296875, -0.24560546875, -0.2093048095703125, -0.173004150390625, -0.1367034912109375, -0.10040283203125, -0.0641021728515625, -0.027801513671875, 0.0084991455078125, 0.0447998046875, 0.0811004638671875, 0.117401123046875, 0.1537017822265625, 0.19000244140625, 0.2263031005859375, 0.262603759765625, 0.2989044189453125, 0.335205078125, 0.3715057373046875, 0.407806396484375, 0.4441070556640625, 0.48040771484375, 0.5167083740234375, 0.553009033203125, 0.5893096923828125, 0.6256103515625, 0.6619110107421875, 0.698211669921875, 0.7345123291015625, 0.77081298828125, 0.8071136474609375, 0.843414306640625, 0.8797149658203125, 0.916015625]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 3.0, 4.0, 6.0, 8.0, 14.0, 30.0, 56.0, 112.0, 232.0, 262.0, 143.0, 76.0, 26.0, 15.0, 7.0, 2.0, 5.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.248767852783203, -19.638607025146484, -19.028446197509766, -18.418283462524414, -17.808122634887695, -17.197961807250977, -16.587799072265625, -15.977638244628906, -15.367477416992188, -14.757316589355469, -14.147154808044434, -13.536993026733398, -12.92683219909668, -12.316671371459961, -11.706509590148926, -11.09634780883789, -10.486186981201172, -9.876026153564453, -9.265864372253418, -8.655702590942383, -8.045541763305664, -7.435380458831787, -6.82521915435791, -6.215057849884033, -5.604896545410156, -4.994735240936279, -4.384573936462402, -3.7744126319885254, -3.1642513275146484, -2.5540900230407715, -1.9439287185668945, -1.3337674140930176, -0.7236080169677734, -0.11344671249389648, 0.49671459197998047, 1.1068758964538574, 1.7170372009277344, 2.3271985054016113, 2.9373598098754883, 3.5475211143493652, 4.157682418823242, 4.767843723297119, 5.378005027770996, 5.988166332244873, 6.59832763671875, 7.208488941192627, 7.818650245666504, 8.428812026977539, 9.038972854614258, 9.649133682250977, 10.259295463562012, 10.869457244873047, 11.479618072509766, 12.089778900146484, 12.69994068145752, 13.310102462768555, 13.920263290405273, 14.530424118041992, 15.140585899353027, 15.750747680664062, 16.36090850830078, 16.9710693359375, 17.58123016357422, 18.19139289855957, 18.80155372619629]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 4.0, 1.0, 5.0, 5.0, 13.0, 11.0, 14.0, 14.0, 20.0, 27.0, 20.0, 22.0, 26.0, 28.0, 31.0, 26.0, 32.0, 32.0, 34.0, 57.0, 64.0, 70.0, 43.0, 42.0, 50.0, 32.0, 32.0, 30.0, 34.0, 25.0, 34.0, 25.0, 14.0, 15.0, 19.0, 8.0, 8.0, 8.0, 5.0, 4.0, 6.0, 3.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.119375228881836, -5.936121940612793, -5.752869129180908, -5.569616317749023, -5.3863630294799805, -5.2031097412109375, -5.019856929779053, -4.836604118347168, -4.653350830078125, -4.470097541809082, -4.286844730377197, -4.1035919189453125, -3.9203386306762695, -3.7370855808258057, -3.553832530975342, -3.370579481124878, -3.187326431274414, -3.00407338142395, -2.8208203315734863, -2.6375672817230225, -2.4543142318725586, -2.2710611820220947, -2.087808132171631, -1.904555082321167, -1.7213020324707031, -1.5380489826202393, -1.3547959327697754, -1.1715428829193115, -0.9882898330688477, -0.8050367832183838, -0.6217837333679199, -0.43853068351745605, -0.2552781105041504, -0.07202506065368652, 0.11122798919677734, 0.2944810390472412, 0.4777340888977051, 0.660987138748169, 0.8442401885986328, 1.0274932384490967, 1.2107462882995605, 1.3939993381500244, 1.5772523880004883, 1.7605054378509521, 1.943758487701416, 2.12701153755188, 2.3102645874023438, 2.4935176372528076, 2.6767706871032715, 2.8600237369537354, 3.043276786804199, 3.226529836654663, 3.409782886505127, 3.593035936355591, 3.7762889862060547, 3.9595420360565186, 4.142795085906982, 4.326047897338867, 4.50930118560791, 4.692554473876953, 4.875807285308838, 5.059060096740723, 5.242313385009766, 5.425566673278809, 5.608819484710693]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 6.0, 8.0, 10.0, 14.0, 13.0, 9.0, 20.0, 35.0, 52.0, 86.0, 142.0, 243.0, 395.0, 704.0, 1478.0, 3029.0, 7000.0, 18730.0, 63785.0, 359880.0, 481678.0, 75037.0, 21450.0, 7862.0, 3395.0, 1578.0, 795.0, 425.0, 254.0, 145.0, 99.0, 53.0, 26.0, 27.0, 21.0, 19.0, 6.0, 9.0, 15.0, 3.0, 5.0, 6.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.72216796875, -0.7009658813476562, -0.6797637939453125, -0.6585617065429688, -0.637359619140625, -0.6161575317382812, -0.5949554443359375, -0.5737533569335938, -0.55255126953125, -0.5313491821289062, -0.5101470947265625, -0.48894500732421875, -0.467742919921875, -0.44654083251953125, -0.4253387451171875, -0.40413665771484375, -0.3829345703125, -0.36173248291015625, -0.3405303955078125, -0.31932830810546875, -0.298126220703125, -0.27692413330078125, -0.2557220458984375, -0.23451995849609375, -0.21331787109375, -0.19211578369140625, -0.1709136962890625, -0.14971160888671875, -0.128509521484375, -0.10730743408203125, -0.0861053466796875, -0.06490325927734375, -0.043701171875, -0.02249908447265625, -0.0012969970703125, 0.01990509033203125, 0.041107177734375, 0.06230926513671875, 0.0835113525390625, 0.10471343994140625, 0.12591552734375, 0.14711761474609375, 0.1683197021484375, 0.18952178955078125, 0.210723876953125, 0.23192596435546875, 0.2531280517578125, 0.27433013916015625, 0.2955322265625, 0.31673431396484375, 0.3379364013671875, 0.35913848876953125, 0.380340576171875, 0.40154266357421875, 0.4227447509765625, 0.44394683837890625, 0.46514892578125, 0.48635101318359375, 0.5075531005859375, 0.5287551879882812, 0.549957275390625, 0.5711593627929688, 0.5923614501953125, 0.6135635375976562, 0.634765625]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 5.0, 5.0, 13.0, 9.0, 13.0, 19.0, 18.0, 21.0, 28.0, 33.0, 41.0, 45.0, 57.0, 56.0, 64.0, 55.0, 65.0, 71.0, 54.0, 42.0, 49.0, 38.0, 37.0, 30.0, 35.0, 18.0, 13.0, 13.0, 12.0, 8.0, 9.0, 5.0, 9.0, 3.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.462158203125, -0.4486351013183594, -0.43511199951171875, -0.4215888977050781, -0.4080657958984375, -0.3945426940917969, -0.38101959228515625, -0.3674964904785156, -0.353973388671875, -0.3404502868652344, -0.32692718505859375, -0.3134040832519531, -0.2998809814453125, -0.2863578796386719, -0.27283477783203125, -0.2593116760253906, -0.24578857421875, -0.23226547241210938, -0.21874237060546875, -0.20521926879882812, -0.1916961669921875, -0.17817306518554688, -0.16464996337890625, -0.15112686157226562, -0.137603759765625, -0.12408065795898438, -0.11055755615234375, -0.09703445434570312, -0.0835113525390625, -0.06998825073242188, -0.05646514892578125, -0.042942047119140625, -0.0294189453125, -0.015895843505859375, -0.00237274169921875, 0.011150360107421875, 0.0246734619140625, 0.038196563720703125, 0.05171966552734375, 0.06524276733398438, 0.078765869140625, 0.09228897094726562, 0.10581207275390625, 0.11933517456054688, 0.1328582763671875, 0.14638137817382812, 0.15990447998046875, 0.17342758178710938, 0.18695068359375, 0.20047378540039062, 0.21399688720703125, 0.22751998901367188, 0.2410430908203125, 0.2545661926269531, 0.26808929443359375, 0.2816123962402344, 0.295135498046875, 0.3086585998535156, 0.32218170166015625, 0.3357048034667969, 0.3492279052734375, 0.3627510070800781, 0.37627410888671875, 0.3897972106933594, 0.4033203125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 1.0, 8.0, 5.0, 3.0, 8.0, 14.0, 19.0, 21.0, 29.0, 54.0, 81.0, 134.0, 192.0, 310.0, 609.0, 1378.0, 4693.0, 31372.0, 882322.0, 112405.0, 10606.0, 2242.0, 838.0, 444.0, 253.0, 162.0, 110.0, 85.0, 46.0, 29.0, 33.0, 21.0, 10.0, 5.0, 6.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 4.0], "bins": [-1.2197265625, -1.187408447265625, -1.15509033203125, -1.122772216796875, -1.0904541015625, -1.058135986328125, -1.02581787109375, -0.993499755859375, -0.961181640625, -0.928863525390625, -0.89654541015625, -0.864227294921875, -0.8319091796875, -0.799591064453125, -0.76727294921875, -0.734954833984375, -0.70263671875, -0.670318603515625, -0.63800048828125, -0.605682373046875, -0.5733642578125, -0.541046142578125, -0.50872802734375, -0.476409912109375, -0.444091796875, -0.411773681640625, -0.37945556640625, -0.347137451171875, -0.3148193359375, -0.282501220703125, -0.25018310546875, -0.217864990234375, -0.185546875, -0.153228759765625, -0.12091064453125, -0.088592529296875, -0.0562744140625, -0.023956298828125, 0.00836181640625, 0.040679931640625, 0.072998046875, 0.105316162109375, 0.13763427734375, 0.169952392578125, 0.2022705078125, 0.234588623046875, 0.26690673828125, 0.299224853515625, 0.33154296875, 0.363861083984375, 0.39617919921875, 0.428497314453125, 0.4608154296875, 0.493133544921875, 0.52545166015625, 0.557769775390625, 0.590087890625, 0.622406005859375, 0.65472412109375, 0.687042236328125, 0.7193603515625, 0.751678466796875, 0.78399658203125, 0.816314697265625, 0.8486328125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 5.0, 4.0, 8.0, 7.0, 11.0, 7.0, 13.0, 14.0, 17.0, 20.0, 36.0, 29.0, 44.0, 42.0, 53.0, 84.0, 82.0, 98.0, 83.0, 68.0, 42.0, 38.0, 28.0, 26.0, 24.0, 19.0, 18.0, 14.0, 12.0, 10.0, 12.0, 5.0, 10.0, 7.0, 5.0, 5.0, 1.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.8505859375, -1.7966461181640625, -1.742706298828125, -1.6887664794921875, -1.63482666015625, -1.5808868408203125, -1.526947021484375, -1.4730072021484375, -1.4190673828125, -1.3651275634765625, -1.311187744140625, -1.2572479248046875, -1.20330810546875, -1.1493682861328125, -1.095428466796875, -1.0414886474609375, -0.987548828125, -0.9336090087890625, -0.879669189453125, -0.8257293701171875, -0.77178955078125, -0.7178497314453125, -0.663909912109375, -0.6099700927734375, -0.5560302734375, -0.5020904541015625, -0.448150634765625, -0.3942108154296875, -0.34027099609375, -0.2863311767578125, -0.232391357421875, -0.1784515380859375, -0.12451171875, -0.0705718994140625, -0.016632080078125, 0.0373077392578125, 0.09124755859375, 0.1451873779296875, 0.199127197265625, 0.2530670166015625, 0.3070068359375, 0.3609466552734375, 0.414886474609375, 0.4688262939453125, 0.52276611328125, 0.5767059326171875, 0.630645751953125, 0.6845855712890625, 0.738525390625, 0.7924652099609375, 0.846405029296875, 0.9003448486328125, 0.95428466796875, 1.0082244873046875, 1.062164306640625, 1.1161041259765625, 1.1700439453125, 1.2239837646484375, 1.277923583984375, 1.3318634033203125, 1.38580322265625, 1.4397430419921875, 1.493682861328125, 1.5476226806640625, 1.6015625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 3.0, 6.0, 7.0, 7.0, 20.0, 18.0, 30.0, 40.0, 78.0, 142.0, 287.0, 682.0, 1837.0, 6790.0, 41730.0, 905192.0, 77254.0, 10406.0, 2546.0, 788.0, 299.0, 154.0, 86.0, 54.0, 26.0, 17.0, 17.0, 10.0, 11.0, 9.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.187744140625, -0.1821613311767578, -0.17657852172851562, -0.17099571228027344, -0.16541290283203125, -0.15983009338378906, -0.15424728393554688, -0.1486644744873047, -0.1430816650390625, -0.1374988555908203, -0.13191604614257812, -0.12633323669433594, -0.12075042724609375, -0.11516761779785156, -0.10958480834960938, -0.10400199890136719, -0.098419189453125, -0.09283638000488281, -0.08725357055664062, -0.08167076110839844, -0.07608795166015625, -0.07050514221191406, -0.06492233276367188, -0.05933952331542969, -0.0537567138671875, -0.04817390441894531, -0.042591094970703125, -0.03700828552246094, -0.03142547607421875, -0.025842666625976562, -0.020259857177734375, -0.014677047729492188, -0.00909423828125, -0.0035114288330078125, 0.002071380615234375, 0.0076541900634765625, 0.01323699951171875, 0.018819808959960938, 0.024402618408203125, 0.029985427856445312, 0.0355682373046875, 0.04115104675292969, 0.046733856201171875, 0.05231666564941406, 0.05789947509765625, 0.06348228454589844, 0.06906509399414062, 0.07464790344238281, 0.080230712890625, 0.08581352233886719, 0.09139633178710938, 0.09697914123535156, 0.10256195068359375, 0.10814476013183594, 0.11372756958007812, 0.11931037902832031, 0.1248931884765625, 0.1304759979248047, 0.13605880737304688, 0.14164161682128906, 0.14722442626953125, 0.15280723571777344, 0.15839004516601562, 0.1639728546142578, 0.1695556640625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 6.0, 3.0, 8.0, 11.0, 10.0, 10.0, 15.0, 18.0, 28.0, 26.0, 29.0, 49.0, 72.0, 88.0, 120.0, 109.0, 99.0, 71.0, 57.0, 43.0, 32.0, 27.0, 15.0, 16.0, 9.0, 9.0, 7.0, 4.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.93986701965332e-05, -3.812834620475769e-05, -3.685802221298218e-05, -3.5587698221206665e-05, -3.431737422943115e-05, -3.304705023765564e-05, -3.177672624588013e-05, -3.0506402254104614e-05, -2.92360782623291e-05, -2.796575427055359e-05, -2.6695430278778076e-05, -2.5425106287002563e-05, -2.415478229522705e-05, -2.2884458303451538e-05, -2.1614134311676025e-05, -2.0343810319900513e-05, -1.9073486328125e-05, -1.7803162336349487e-05, -1.6532838344573975e-05, -1.5262514352798462e-05, -1.399219036102295e-05, -1.2721866369247437e-05, -1.1451542377471924e-05, -1.0181218385696411e-05, -8.910894393920898e-06, -7.640570402145386e-06, -6.370246410369873e-06, -5.09992241859436e-06, -3.829598426818848e-06, -2.559274435043335e-06, -1.2889504432678223e-06, -1.862645149230957e-08, 1.2516975402832031e-06, 2.522021532058716e-06, 3.7923455238342285e-06, 5.062669515609741e-06, 6.332993507385254e-06, 7.603317499160767e-06, 8.87364149093628e-06, 1.0143965482711792e-05, 1.1414289474487305e-05, 1.2684613466262817e-05, 1.395493745803833e-05, 1.5225261449813843e-05, 1.6495585441589355e-05, 1.7765909433364868e-05, 1.903623342514038e-05, 2.0306557416915894e-05, 2.1576881408691406e-05, 2.284720540046692e-05, 2.411752939224243e-05, 2.5387853384017944e-05, 2.6658177375793457e-05, 2.792850136756897e-05, 2.9198825359344482e-05, 3.0469149351119995e-05, 3.173947334289551e-05, 3.300979733467102e-05, 3.428012132644653e-05, 3.5550445318222046e-05, 3.682076930999756e-05, 3.809109330177307e-05, 3.9361417293548584e-05, 4.06317412853241e-05, 4.190206527709961e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 1.0, 3.0, 6.0, 9.0, 9.0, 16.0, 11.0, 24.0, 28.0, 48.0, 46.0, 78.0, 129.0, 195.0, 307.0, 474.0, 739.0, 1350.0, 2488.0, 5260.0, 12045.0, 35909.0, 198063.0, 717550.0, 46552.0, 14540.0, 5928.0, 2897.0, 1513.0, 874.0, 502.0, 303.0, 211.0, 143.0, 104.0, 56.0, 37.0, 21.0, 28.0, 21.0, 15.0, 7.0, 9.0, 3.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0936279296875, -0.09051704406738281, -0.08740615844726562, -0.08429527282714844, -0.08118438720703125, -0.07807350158691406, -0.07496261596679688, -0.07185173034667969, -0.0687408447265625, -0.06562995910644531, -0.06251907348632812, -0.05940818786621094, -0.05629730224609375, -0.05318641662597656, -0.050075531005859375, -0.04696464538574219, -0.043853759765625, -0.04074287414550781, -0.037631988525390625, -0.03452110290527344, -0.03141021728515625, -0.028299331665039062, -0.025188446044921875, -0.022077560424804688, -0.0189666748046875, -0.015855789184570312, -0.012744903564453125, -0.009634017944335938, -0.00652313232421875, -0.0034122467041015625, -0.000301361083984375, 0.0028095245361328125, 0.00592041015625, 0.009031295776367188, 0.012142181396484375, 0.015253067016601562, 0.01836395263671875, 0.021474838256835938, 0.024585723876953125, 0.027696609497070312, 0.0308074951171875, 0.03391838073730469, 0.037029266357421875, 0.04014015197753906, 0.04325103759765625, 0.04636192321777344, 0.049472808837890625, 0.05258369445800781, 0.055694580078125, 0.05880546569824219, 0.061916351318359375, 0.06502723693847656, 0.06813812255859375, 0.07124900817871094, 0.07435989379882812, 0.07747077941894531, 0.0805816650390625, 0.08369255065917969, 0.08680343627929688, 0.08991432189941406, 0.09302520751953125, 0.09613609313964844, 0.09924697875976562, 0.10235786437988281, 0.10546875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 4.0, 7.0, 2.0, 8.0, 5.0, 13.0, 18.0, 14.0, 21.0, 40.0, 34.0, 63.0, 69.0, 81.0, 101.0, 91.0, 77.0, 72.0, 55.0, 40.0, 34.0, 26.0, 29.0, 21.0, 12.0, 9.0, 13.0, 9.0, 5.0, 4.0, 5.0, 4.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1219482421875, -0.11815738677978516, -0.11436653137207031, -0.11057567596435547, -0.10678482055664062, -0.10299396514892578, -0.09920310974121094, -0.0954122543334961, -0.09162139892578125, -0.0878305435180664, -0.08403968811035156, -0.08024883270263672, -0.07645797729492188, -0.07266712188720703, -0.06887626647949219, -0.06508541107177734, -0.0612945556640625, -0.057503700256347656, -0.05371284484863281, -0.04992198944091797, -0.046131134033203125, -0.04234027862548828, -0.03854942321777344, -0.034758567810058594, -0.03096771240234375, -0.027176856994628906, -0.023386001586914062, -0.01959514617919922, -0.015804290771484375, -0.012013435363769531, -0.008222579956054688, -0.004431724548339844, -0.000640869140625, 0.0031499862670898438, 0.0069408416748046875, 0.010731697082519531, 0.014522552490234375, 0.01831340789794922, 0.022104263305664062, 0.025895118713378906, 0.02968597412109375, 0.033476829528808594, 0.03726768493652344, 0.04105854034423828, 0.044849395751953125, 0.04864025115966797, 0.05243110656738281, 0.056221961975097656, 0.0600128173828125, 0.06380367279052734, 0.06759452819824219, 0.07138538360595703, 0.07517623901367188, 0.07896709442138672, 0.08275794982910156, 0.0865488052368164, 0.09033966064453125, 0.0941305160522461, 0.09792137145996094, 0.10171222686767578, 0.10550308227539062, 0.10929393768310547, 0.11308479309082031, 0.11687564849853516, 0.12066650390625]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 1.0, 11.0, 13.0, 22.0, 39.0, 53.0, 88.0, 519.0, 100.0, 67.0, 27.0, 17.0, 21.0, 10.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.364222526550293, -5.18243932723999, -5.0006561279296875, -4.818872451782227, -4.637089252471924, -4.455306053161621, -4.27352237701416, -4.091739177703857, -3.9099559783935547, -3.728172779083252, -3.54638934135437, -3.3646059036254883, -3.1828227043151855, -3.001039505004883, -2.819256067276001, -2.637472629547119, -2.4556894302368164, -2.2739062309265137, -2.092122793197632, -1.9103394746780396, -1.7285561561584473, -1.546772837638855, -1.3649895191192627, -1.1832062005996704, -1.0014228820800781, -0.8196395635604858, -0.6378562450408936, -0.45607292652130127, -0.274289608001709, -0.0925062894821167, 0.08927702903747559, 0.27106034755706787, 0.45284414291381836, 0.6346274614334106, 0.8164107799530029, 0.9981940984725952, 1.1799774169921875, 1.3617607355117798, 1.543544054031372, 1.7253273725509644, 1.9071106910705566, 2.0888938903808594, 2.270677328109741, 2.452460765838623, 2.634243965148926, 2.8160271644592285, 2.9978106021881104, 3.179594039916992, 3.361377239227295, 3.5431604385375977, 3.7249438762664795, 3.9067273139953613, 4.088510513305664, 4.270293712615967, 4.4520769119262695, 4.6338605880737305, 4.815643787384033, 4.997426986694336, 5.179210662841797, 5.3609938621521, 5.542777061462402, 5.724560260772705, 5.906343460083008, 6.088127136230469, 6.2699103355407715]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 8.0, 6.0, 6.0, 4.0, 7.0, 8.0, 4.0, 8.0, 12.0, 19.0, 18.0, 21.0, 23.0, 26.0, 27.0, 25.0, 38.0, 180.0, 260.0, 83.0, 32.0, 25.0, 14.0, 15.0, 15.0, 12.0, 9.0, 10.0, 19.0, 8.0, 8.0, 10.0, 8.0, 6.0, 8.0, 4.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.015302658081055, -3.894341230392456, -3.7733800411224365, -3.652418613433838, -3.5314571857452393, -3.4104957580566406, -3.289534568786621, -3.1685731410980225, -3.047611713409424, -2.926650285720825, -2.8056890964508057, -2.684727668762207, -2.5637662410736084, -2.4428048133850098, -2.3218436241149902, -2.2008821964263916, -2.079921007156372, -1.958959698677063, -1.8379982709884644, -1.7170369625091553, -1.5960755348205566, -1.4751142263412476, -1.3541529178619385, -1.2331914901733398, -1.1122301816940308, -0.9912688136100769, -0.870307445526123, -0.749346137046814, -0.6283847689628601, -0.5074234008789062, -0.38646209239959717, -0.2655007243156433, -0.14453959465026855, -0.02357824146747589, 0.09738311171531677, 0.21834444999694824, 0.3393058180809021, 0.46026718616485596, 0.581228494644165, 0.7021898627281189, 0.8231512308120728, 0.9441125988960266, 1.0650739669799805, 1.1860352754592896, 1.3069965839385986, 1.4279580116271973, 1.5489193201065063, 1.6698806285858154, 1.790842056274414, 1.9118033647537231, 2.0327646732330322, 2.153726100921631, 2.2746875286102295, 2.395648956298828, 2.5166101455688477, 2.6375715732574463, 2.758533000946045, 2.8794944286346436, 3.000455617904663, 3.1214170455932617, 3.2423784732818604, 3.363339900970459, 3.4843010902404785, 3.605262517929077, 3.7262237071990967]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 5.0, 4.0, 4.0, 8.0, 10.0, 11.0, 9.0, 11.0, 11.0, 18.0, 21.0, 34.0, 22.0, 27.0, 39.0, 33.0, 37.0, 109.0, 212.0, 85.0, 41.0, 27.0, 14.0, 32.0, 15.0, 25.0, 14.0, 21.0, 12.0, 15.0, 12.0, 13.0, 12.0, 3.0, 4.0, 7.0, 9.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.28076171875, -0.2715034484863281, -0.26224517822265625, -0.2529869079589844, -0.2437286376953125, -0.23447036743164062, -0.22521209716796875, -0.21595382690429688, -0.206695556640625, -0.19743728637695312, -0.18817901611328125, -0.17892074584960938, -0.1696624755859375, -0.16040420532226562, -0.15114593505859375, -0.14188766479492188, -0.13262939453125, -0.12337112426757812, -0.11411285400390625, -0.10485458374023438, -0.0955963134765625, -0.08633804321289062, -0.07707977294921875, -0.06782150268554688, -0.058563232421875, -0.049304962158203125, -0.04004669189453125, -0.030788421630859375, -0.0215301513671875, -0.012271881103515625, -0.00301361083984375, 0.006244659423828125, 0.0155029296875, 0.024761199951171875, 0.03401947021484375, 0.043277740478515625, 0.0525360107421875, 0.061794281005859375, 0.07105255126953125, 0.08031082153320312, 0.089569091796875, 0.09882736206054688, 0.10808563232421875, 0.11734390258789062, 0.1266021728515625, 0.13586044311523438, 0.14511871337890625, 0.15437698364257812, 0.16363525390625, 0.17289352416992188, 0.18215179443359375, 0.19141006469726562, 0.2006683349609375, 0.20992660522460938, 0.21918487548828125, 0.22844314575195312, 0.237701416015625, 0.24695968627929688, 0.25621795654296875, 0.2654762268066406, 0.2747344970703125, 0.2839927673339844, 0.29325103759765625, 0.3025093078613281, 0.311767578125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 4.0, 6.0, 12.0, 15.0, 21.0, 27.0, 70.0, 214.0, 1587.0, 91547.0, 8292478.0, 2144.0, 262.0, 79.0, 56.0, 33.0, 15.0, 5.0, 1.0, 4.0, 2.0, 0.0, 2.0, 4.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.132739067077637, -7.870877742767334, -7.609016418457031, -7.34715461730957, -7.085293292999268, -6.823431968688965, -6.561570167541504, -6.299708843231201, -6.037847518920898, -5.775986194610596, -5.514124870300293, -5.252263069152832, -4.990401744842529, -4.728540420532227, -4.466678619384766, -4.204817295074463, -3.94295597076416, -3.6810946464538574, -3.4192330837249756, -3.1573715209960938, -2.895510196685791, -2.6336488723754883, -2.3717873096466064, -2.1099257469177246, -1.8480644226074219, -1.5862029790878296, -1.3243415355682373, -1.062480092048645, -0.8006186485290527, -0.5387572050094604, -0.27689576148986816, -0.015034317970275879, 0.2468271255493164, 0.5086885690689087, 0.770550012588501, 1.0324114561080933, 1.2942728996276855, 1.5561343431472778, 1.8179957866668701, 2.079857349395752, 2.3417186737060547, 2.6035799980163574, 2.8654415607452393, 3.127303123474121, 3.389164447784424, 3.6510257720947266, 3.9128873348236084, 4.17474889755249, 4.436610221862793, 4.698471546173096, 4.960332870483398, 5.222194671630859, 5.484055995941162, 5.745917320251465, 6.007779121398926, 6.2696404457092285, 6.531501770019531, 6.793363094329834, 7.055224418640137, 7.317086219787598, 7.5789475440979, 7.840808868408203, 8.102670669555664, 8.364531517028809, 8.62639331817627]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 2.0, 2.0, 2.0, 7.0, 3.0, 7.0, 4.0, 5.0, 7.0, 3.0, 3.0, 6.0, 4.0, 4.0, 7.0, 7.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.435853958129883, -6.282285213470459, -6.128715991973877, -5.975147247314453, -5.821578025817871, -5.668009281158447, -5.514440536499023, -5.360871315002441, -5.207302570343018, -5.053733825683594, -4.900164604187012, -4.746595859527588, -4.593026638031006, -4.439457893371582, -4.285888671875, -4.132319927215576, -3.9787509441375732, -3.8251819610595703, -3.6716129779815674, -3.5180439949035645, -3.3644752502441406, -3.2109062671661377, -3.0573372840881348, -2.903768539428711, -2.750199317932129, -2.596630334854126, -2.443061351776123, -2.289492607116699, -2.1359236240386963, -1.9823546409606934, -1.8287856578826904, -1.675216794013977, -1.5216476917266846, -1.3680787086486816, -1.2145098447799683, -1.0609408617019653, -0.9073719382286072, -0.753803014755249, -0.6002340316772461, -0.4466651678085327, -0.2930961847305298, -0.13952724635601044, 0.014041692018508911, 0.16761064529418945, 0.3211795687675476, 0.47474849224090576, 0.6283174753189087, 0.7818863391876221, 0.935455322265625, 1.089024305343628, 1.2425931692123413, 1.3961621522903442, 1.5497310161590576, 1.7032999992370605, 1.8568689823150635, 2.0104379653930664, 2.1640067100524902, 2.317575693130493, 2.471144676208496, 2.62471342086792, 2.778282403945923, 2.931851387023926, 3.0854203701019287, 3.2389893531799316, 3.3925583362579346]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 1.0, 1.0, 1.0, 3.0, 6.0, 6.0, 11.0, 17.0, 15.0, 24.0, 31.0, 60.0, 72.0, 108.0, 164.0, 298.0, 611.0, 2132.0, 10046.0, 60171.0, 295552.0, 129081.0, 19904.0, 3834.0, 1032.0, 447.0, 179.0, 134.0, 96.0, 54.0, 56.0, 28.0, 23.0, 22.0, 16.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.69921875, -5.5194091796875, -5.339599609375, -5.1597900390625, -4.97998046875, -4.8001708984375, -4.620361328125, -4.4405517578125, -4.2607421875, -4.0809326171875, -3.901123046875, -3.7213134765625, -3.54150390625, -3.3616943359375, -3.181884765625, -3.0020751953125, -2.822265625, -2.6424560546875, -2.462646484375, -2.2828369140625, -2.10302734375, -1.9232177734375, -1.743408203125, -1.5635986328125, -1.3837890625, -1.2039794921875, -1.024169921875, -0.8443603515625, -0.66455078125, -0.4847412109375, -0.304931640625, -0.1251220703125, 0.0546875, 0.2344970703125, 0.414306640625, 0.5941162109375, 0.77392578125, 0.9537353515625, 1.133544921875, 1.3133544921875, 1.4931640625, 1.6729736328125, 1.852783203125, 2.0325927734375, 2.21240234375, 2.3922119140625, 2.572021484375, 2.7518310546875, 2.931640625, 3.1114501953125, 3.291259765625, 3.4710693359375, 3.65087890625, 3.8306884765625, 4.010498046875, 4.1903076171875, 4.3701171875, 4.5499267578125, 4.729736328125, 4.9095458984375, 5.08935546875, 5.2691650390625, 5.448974609375, 5.6287841796875, 5.80859375]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 6.0, 4.0, 10.0, 7.0, 13.0, 12.0, 10.0, 22.0, 20.0, 34.0, 45.0, 45.0, 42.0, 61.0, 74.0, 65.0, 85.0, 63.0, 54.0, 59.0, 45.0, 41.0, 46.0, 31.0, 24.0, 16.0, 14.0, 14.0, 7.0, 8.0, 6.0, 5.0, 6.0, 2.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.470458984375, -0.4566993713378906, -0.44293975830078125, -0.4291801452636719, -0.4154205322265625, -0.4016609191894531, -0.38790130615234375, -0.3741416931152344, -0.360382080078125, -0.3466224670410156, -0.33286285400390625, -0.3191032409667969, -0.3053436279296875, -0.2915840148925781, -0.27782440185546875, -0.2640647888183594, -0.25030517578125, -0.23654556274414062, -0.22278594970703125, -0.20902633666992188, -0.1952667236328125, -0.18150711059570312, -0.16774749755859375, -0.15398788452148438, -0.140228271484375, -0.12646865844726562, -0.11270904541015625, -0.09894943237304688, -0.0851898193359375, -0.07143020629882812, -0.05767059326171875, -0.043910980224609375, -0.0301513671875, -0.016391754150390625, -0.00263214111328125, 0.011127471923828125, 0.0248870849609375, 0.038646697998046875, 0.05240631103515625, 0.06616592407226562, 0.079925537109375, 0.09368515014648438, 0.10744476318359375, 0.12120437622070312, 0.1349639892578125, 0.14872360229492188, 0.16248321533203125, 0.17624282836914062, 0.19000244140625, 0.20376205444335938, 0.21752166748046875, 0.23128128051757812, 0.2450408935546875, 0.2588005065917969, 0.27256011962890625, 0.2863197326660156, 0.300079345703125, 0.3138389587402344, 0.32759857177734375, 0.3413581848144531, 0.3551177978515625, 0.3688774108886719, 0.38263702392578125, 0.3963966369628906, 0.41015625]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 5.0, 6.0, 2.0, 11.0, 10.0, 30.0, 62.0, 91.0, 93.0, 69.0, 46.0, 21.0, 10.0, 9.0, 3.0, 5.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9703562259674072, -2.837705612182617, -2.7050552368164062, -2.5724048614501953, -2.4397542476654053, -2.3071036338806152, -2.1744532585144043, -2.0418028831481934, -1.9091522693634033, -1.7765017747879028, -1.6438512802124023, -1.5112007856369019, -1.3785502910614014, -1.2458997964859009, -1.1132493019104004, -0.9805988073348999, -0.8479483127593994, -0.7152978181838989, -0.5826473236083984, -0.44999682903289795, -0.31734633445739746, -0.18469583988189697, -0.052045345306396484, 0.080605149269104, 0.2132556438446045, 0.345906138420105, 0.47855663299560547, 0.611207127571106, 0.7438576221466064, 0.8765081167221069, 1.0091586112976074, 1.141809105873108, 1.2744593620300293, 1.4071098566055298, 1.5397603511810303, 1.6724108457565308, 1.8050613403320312, 1.9377118349075317, 2.0703623294830322, 2.2030129432678223, 2.335663318634033, 2.468313694000244, 2.600964307785034, 2.733614921569824, 2.866265296936035, 2.998915672302246, 3.131566286087036, 3.264216899871826, 3.396867275238037, 3.529517650604248, 3.662168264389038, 3.794818878173828, 3.927469253540039, 4.06011962890625, 4.192770004272461, 4.32542085647583, 4.458071231842041, 4.590721607208252, 4.723372459411621, 4.856022834777832, 4.988673210144043, 5.121323585510254, 5.253973960876465, 5.386624813079834, 5.519275188446045]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 5.0, 1.0, 3.0, 1.0, 1.0, 3.0, 5.0, 1.0, 3.0, 4.0, 2.0, 4.0, 6.0, 4.0, 7.0, 6.0, 16.0, 35.0, 68.0, 69.0, 82.0, 61.0, 37.0, 15.0, 12.0, 7.0, 5.0, 5.0, 8.0, 2.0, 3.0, 3.0, 4.0, 0.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7188446521759033, -1.6588523387908936, -1.5988600254058838, -1.5388678312301636, -1.4788755178451538, -1.418883204460144, -1.3588908910751343, -1.298898696899414, -1.2389063835144043, -1.1789140701293945, -1.1189217567443848, -1.0589295625686646, -0.9989372491836548, -0.938944935798645, -0.8789526224136353, -0.8189603686332703, -0.7589680552482605, -0.6989757418632507, -0.6389834880828857, -0.578991174697876, -0.518998920917511, -0.4590066075325012, -0.39901432394981384, -0.33902204036712646, -0.2790297567844391, -0.2190374732017517, -0.15904518961906433, -0.09905289113521576, -0.03906060755252838, 0.02093169093132019, 0.08092397451400757, 0.14091625809669495, 0.20090854167938232, 0.2609008252620697, 0.3208931088447571, 0.38088542222976685, 0.44087767601013184, 0.5008699893951416, 0.5608623027801514, 0.6208545565605164, 0.6808468103408813, 0.7408391237258911, 0.8008313775062561, 0.8608236908912659, 0.9208159446716309, 0.9808082580566406, 1.0408005714416504, 1.1007928848266602, 1.16078519821167, 1.2207775115966797, 1.2807698249816895, 1.3407620191574097, 1.4007543325424194, 1.4607466459274292, 1.520738959312439, 1.5807311534881592, 1.640723466873169, 1.7007157802581787, 1.7607080936431885, 1.8207002878189087, 1.8806926012039185, 1.9406849145889282, 2.0006771087646484, 2.060669422149658, 2.120661735534668]}, "train/train_runtime": 4741.9429, "train/train_samples_per_second": 6.018, "train/train_steps_per_second": 0.063, "train/total_flos": 0.0, "train/train_loss": 4.266032724669485, "eval/loss": 4.213485240936279, "eval/wer": 2.3599563665212218, "eval/runtime": 939.6426, "eval/samples_per_second": 2.812, "eval/steps_per_second": 0.235} \ No newline at end of file +{"train/loss": 4.5069, "train/learning_rate": 5.92e-05, "train/epoch": 1.0, "train/global_step": 297, "_runtime": 6008, "_timestamp": 1646200420, "_step": 298, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 117.0, 651.0, 234.0, 12.0], "bins": [-389.90692138671875, -383.5785827636719, -377.250244140625, -370.92193603515625, -364.5935974121094, -358.2652587890625, -351.93695068359375, -345.6086120605469, -339.2802734375, -332.9519348144531, -326.62359619140625, -320.2952880859375, -313.9669494628906, -307.63861083984375, -301.310302734375, -294.9819641113281, -288.65362548828125, -282.3252868652344, -275.9969482421875, -269.66864013671875, -263.3403015136719, -257.011962890625, -250.6836395263672, -244.35531616210938, -238.0269775390625, -231.69863891601562, -225.3703155517578, -219.0419921875, -212.71365356445312, -206.38531494140625, -200.05699157714844, -193.72866821289062, -187.4003448486328, -181.072021484375, -174.74368286132812, -168.41534423828125, -162.08702087402344, -155.75869750976562, -149.43035888671875, -143.10202026367188, -136.77369689941406, -130.44537353515625, -124.11703491210938, -117.78870391845703, -111.46037292480469, -105.13204193115234, -98.8037109375, -92.47537994384766, -86.14704132080078, -79.81871032714844, -73.4903793334961, -67.16204833984375, -60.833717346191406, -54.50538635253906, -48.17705535888672, -41.848724365234375, -35.5203971862793, -29.192066192626953, -22.86373519897461, -16.535404205322266, -10.207073211669922, -3.878742218017578, 2.4495887756347656, 8.77791976928711, 15.10625171661377]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 8.0, 9.0, 13.0, 8.0, 17.0, 21.0, 14.0, 26.0, 32.0, 17.0, 43.0, 55.0, 40.0, 43.0, 49.0, 46.0, 60.0, 54.0, 56.0, 54.0, 39.0, 34.0, 43.0, 28.0, 33.0, 20.0, 27.0, 17.0, 27.0, 9.0, 9.0, 14.0, 12.0, 4.0, 4.0, 5.0, 5.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-62.19386291503906, -60.59320068359375, -58.99253845214844, -57.391876220703125, -55.79121398925781, -54.1905517578125, -52.58988571166992, -50.98922348022461, -49.3885612487793, -47.787899017333984, -46.18723678588867, -44.58657455444336, -42.98590850830078, -41.38524627685547, -39.784584045410156, -38.183921813964844, -36.58325958251953, -34.98259735107422, -33.381935119628906, -31.78127098083496, -30.18060874938965, -28.579946517944336, -26.97928237915039, -25.378620147705078, -23.777957916259766, -22.177295684814453, -20.57663345336914, -18.975969314575195, -17.375307083129883, -15.77464485168457, -14.173981666564941, -12.573318481445312, -10.972652435302734, -9.371990203857422, -7.771327018737793, -6.170664310455322, -4.570001602172852, -2.969339370727539, -1.3686761856079102, 0.23198699951171875, 1.8326492309570312, 3.433311939239502, 5.033974647521973, 6.634637355804443, 8.235300064086914, 9.835962295532227, 11.436625480651855, 13.037288665771484, 14.637950897216797, 16.23861312866211, 17.839275360107422, 19.439939498901367, 21.04060173034668, 22.641263961791992, 24.241928100585938, 25.84259033203125, 27.443252563476562, 29.043914794921875, 30.644577026367188, 32.2452392578125, 33.84590148925781, 35.446563720703125, 37.0472297668457, 38.647891998291016, 40.24855422973633]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 3.0, 5.0, 10.0, 9.0, 10.0, 15.0, 19.0, 15.0, 15.0, 29.0, 28.0, 36.0, 52.0, 40.0, 49.0, 40.0, 49.0, 48.0, 70.0, 56.0, 36.0, 48.0, 37.0, 31.0, 37.0, 32.0, 34.0, 21.0, 22.0, 21.0, 23.0, 10.0, 15.0, 12.0, 4.0, 5.0, 3.0, 1.0, 4.0, 2.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.328125, -2.24615478515625, -2.1641845703125, -2.08221435546875, -2.000244140625, -1.91827392578125, -1.8363037109375, -1.75433349609375, -1.67236328125, -1.59039306640625, -1.5084228515625, -1.42645263671875, -1.344482421875, -1.26251220703125, -1.1805419921875, -1.09857177734375, -1.0166015625, -0.93463134765625, -0.8526611328125, -0.77069091796875, -0.688720703125, -0.60675048828125, -0.5247802734375, -0.44281005859375, -0.36083984375, -0.27886962890625, -0.1968994140625, -0.11492919921875, -0.032958984375, 0.04901123046875, 0.1309814453125, 0.21295166015625, 0.294921875, 0.37689208984375, 0.4588623046875, 0.54083251953125, 0.622802734375, 0.70477294921875, 0.7867431640625, 0.86871337890625, 0.95068359375, 1.03265380859375, 1.1146240234375, 1.19659423828125, 1.278564453125, 1.36053466796875, 1.4425048828125, 1.52447509765625, 1.6064453125, 1.68841552734375, 1.7703857421875, 1.85235595703125, 1.934326171875, 2.01629638671875, 2.0982666015625, 2.18023681640625, 2.26220703125, 2.34417724609375, 2.4261474609375, 2.50811767578125, 2.590087890625, 2.67205810546875, 2.7540283203125, 2.83599853515625, 2.91796875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 5.0, 3.0, 5.0, 9.0, 5.0, 15.0, 22.0, 37.0, 64.0, 90.0, 162.0, 276.0, 457.0, 891.0, 1726.0, 4247.0, 14065.0, 78326.0, 901183.0, 2831164.0, 309275.0, 37287.0, 8841.0, 3094.0, 1379.0, 698.0, 425.0, 240.0, 120.0, 61.0, 51.0, 23.0, 13.0, 17.0, 5.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.171875, -10.84619140625, -10.5205078125, -10.19482421875, -9.869140625, -9.54345703125, -9.2177734375, -8.89208984375, -8.56640625, -8.24072265625, -7.9150390625, -7.58935546875, -7.263671875, -6.93798828125, -6.6123046875, -6.28662109375, -5.9609375, -5.63525390625, -5.3095703125, -4.98388671875, -4.658203125, -4.33251953125, -4.0068359375, -3.68115234375, -3.35546875, -3.02978515625, -2.7041015625, -2.37841796875, -2.052734375, -1.72705078125, -1.4013671875, -1.07568359375, -0.75, -0.42431640625, -0.0986328125, 0.22705078125, 0.552734375, 0.87841796875, 1.2041015625, 1.52978515625, 1.85546875, 2.18115234375, 2.5068359375, 2.83251953125, 3.158203125, 3.48388671875, 3.8095703125, 4.13525390625, 4.4609375, 4.78662109375, 5.1123046875, 5.43798828125, 5.763671875, 6.08935546875, 6.4150390625, 6.74072265625, 7.06640625, 7.39208984375, 7.7177734375, 8.04345703125, 8.369140625, 8.69482421875, 9.0205078125, 9.34619140625, 9.671875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 3.0, 4.0, 2.0, 10.0, 6.0, 15.0, 22.0, 26.0, 41.0, 59.0, 84.0, 125.0, 149.0, 261.0, 360.0, 456.0, 540.0, 512.0, 435.0, 292.0, 202.0, 143.0, 122.0, 65.0, 45.0, 35.0, 21.0, 11.0, 12.0, 3.0, 8.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-12.609375, -12.2742919921875, -11.939208984375, -11.6041259765625, -11.26904296875, -10.9339599609375, -10.598876953125, -10.2637939453125, -9.9287109375, -9.5936279296875, -9.258544921875, -8.9234619140625, -8.58837890625, -8.2532958984375, -7.918212890625, -7.5831298828125, -7.248046875, -6.9129638671875, -6.577880859375, -6.2427978515625, -5.90771484375, -5.5726318359375, -5.237548828125, -4.9024658203125, -4.5673828125, -4.2322998046875, -3.897216796875, -3.5621337890625, -3.22705078125, -2.8919677734375, -2.556884765625, -2.2218017578125, -1.88671875, -1.5516357421875, -1.216552734375, -0.8814697265625, -0.54638671875, -0.2113037109375, 0.123779296875, 0.4588623046875, 0.7939453125, 1.1290283203125, 1.464111328125, 1.7991943359375, 2.13427734375, 2.4693603515625, 2.804443359375, 3.1395263671875, 3.474609375, 3.8096923828125, 4.144775390625, 4.4798583984375, 4.81494140625, 5.1500244140625, 5.485107421875, 5.8201904296875, 6.1552734375, 6.4903564453125, 6.825439453125, 7.1605224609375, 7.49560546875, 7.8306884765625, 8.165771484375, 8.5008544921875, 8.8359375]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 3.0, 6.0, 13.0, 10.0, 14.0, 21.0, 49.0, 67.0, 100.0, 159.0, 269.0, 450.0, 858.0, 1879.0, 6225.0, 174344.0, 3887694.0, 112843.0, 5511.0, 1755.0, 846.0, 470.0, 240.0, 154.0, 107.0, 50.0, 35.0, 35.0, 17.0, 12.0, 6.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 1.0, 4.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.65625, -35.52685546875, -34.3974609375, -33.26806640625, -32.138671875, -31.00927734375, -29.8798828125, -28.75048828125, -27.62109375, -26.49169921875, -25.3623046875, -24.23291015625, -23.103515625, -21.97412109375, -20.8447265625, -19.71533203125, -18.5859375, -17.45654296875, -16.3271484375, -15.19775390625, -14.068359375, -12.93896484375, -11.8095703125, -10.68017578125, -9.55078125, -8.42138671875, -7.2919921875, -6.16259765625, -5.033203125, -3.90380859375, -2.7744140625, -1.64501953125, -0.515625, 0.61376953125, 1.7431640625, 2.87255859375, 4.001953125, 5.13134765625, 6.2607421875, 7.39013671875, 8.51953125, 9.64892578125, 10.7783203125, 11.90771484375, 13.037109375, 14.16650390625, 15.2958984375, 16.42529296875, 17.5546875, 18.68408203125, 19.8134765625, 20.94287109375, 22.072265625, 23.20166015625, 24.3310546875, 25.46044921875, 26.58984375, 27.71923828125, 28.8486328125, 29.97802734375, 31.107421875, 32.23681640625, 33.3662109375, 34.49560546875, 35.625]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 21.0, 81.0, 209.0, 340.0, 227.0, 107.0, 21.0, 5.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-81.21906280517578, -78.64128112792969, -76.06349182128906, -73.48571014404297, -70.90792846679688, -68.33013916015625, -65.75235748291016, -63.17457580566406, -60.5967903137207, -58.019004821777344, -55.44122314453125, -52.86343765258789, -50.28565216064453, -47.70787048339844, -45.13008499145508, -42.55229949951172, -39.974517822265625, -37.396732330322266, -34.81895065307617, -32.24116516113281, -29.663381576538086, -27.08559799194336, -24.5078125, -21.930028915405273, -19.352245330810547, -16.77446174621582, -14.196677207946777, -11.618892669677734, -9.041109085083008, -6.463325500488281, -3.8855409622192383, -1.3077564239501953, 1.2700347900390625, 3.8478188514709473, 6.425602912902832, 9.003387451171875, 11.581171035766602, 14.158954620361328, 16.736740112304688, 19.314523696899414, 21.89230728149414, 24.470090866088867, 27.047874450683594, 29.625659942626953, 32.20344543457031, 34.781227111816406, 37.359012603759766, 39.936798095703125, 42.51457977294922, 45.09236526489258, 47.67014694213867, 50.24793243408203, 52.825714111328125, 55.403499603271484, 57.981285095214844, 60.55906677246094, 63.1368522644043, 65.71463775634766, 68.29241943359375, 70.87020111083984, 73.44799041748047, 76.02577209472656, 78.60355377197266, 81.18134307861328, 83.75912475585938]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 7.0, 4.0, 7.0, 8.0, 12.0, 14.0, 23.0, 19.0, 25.0, 25.0, 25.0, 32.0, 38.0, 33.0, 42.0, 40.0, 33.0, 42.0, 43.0, 46.0, 43.0, 58.0, 49.0, 42.0, 40.0, 35.0, 40.0, 29.0, 29.0, 24.0, 17.0, 18.0, 16.0, 6.0, 7.0, 13.0, 7.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.706695556640625, -31.624561309814453, -30.54242515563965, -29.460290908813477, -28.378154754638672, -27.2960205078125, -26.213886260986328, -25.131752014160156, -24.04961585998535, -22.96748161315918, -21.885345458984375, -20.803211212158203, -19.72107696533203, -18.638940811157227, -17.556806564331055, -16.47467041015625, -15.392536163330078, -14.31040096282959, -13.228265762329102, -12.14613151550293, -11.063996315002441, -9.981861114501953, -8.899726867675781, -7.817591667175293, -6.735456466674805, -5.653321266174316, -4.571186542510986, -3.489051580429077, -2.406916618347168, -1.3247814178466797, -0.2426466941833496, 0.8394880294799805, 1.9216194152832031, 3.0037543773651123, 4.0858893394470215, 5.168024063110352, 6.25015926361084, 7.332294464111328, 8.4144287109375, 9.496563911437988, 10.578699111938477, 11.660834312438965, 12.742969512939453, 13.825103759765625, 14.907238960266113, 15.989374160766602, 17.071508407592773, 18.153644561767578, 19.23577880859375, 20.317913055419922, 21.400049209594727, 22.4821834564209, 23.564319610595703, 24.646453857421875, 25.728588104248047, 26.81072235107422, 27.892858505249023, 28.974992752075195, 30.05712890625, 31.139263153076172, 32.221397399902344, 33.30353546142578, 34.38566970825195, 35.467803955078125, 36.5499382019043]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 3.0, 2.0, 7.0, 9.0, 13.0, 13.0, 15.0, 27.0, 22.0, 14.0, 29.0, 28.0, 34.0, 31.0, 36.0, 47.0, 52.0, 38.0, 50.0, 49.0, 50.0, 36.0, 37.0, 41.0, 44.0, 36.0, 35.0, 31.0, 22.0, 23.0, 24.0, 14.0, 19.0, 13.0, 15.0, 8.0, 8.0, 3.0, 7.0, 6.0, 2.0, 7.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.296905517578125, -2.21685791015625, -2.136810302734375, -2.0567626953125, -1.976715087890625, -1.89666748046875, -1.816619873046875, -1.736572265625, -1.656524658203125, -1.57647705078125, -1.496429443359375, -1.4163818359375, -1.336334228515625, -1.25628662109375, -1.176239013671875, -1.09619140625, -1.016143798828125, -0.93609619140625, -0.856048583984375, -0.7760009765625, -0.695953369140625, -0.61590576171875, -0.535858154296875, -0.455810546875, -0.375762939453125, -0.29571533203125, -0.215667724609375, -0.1356201171875, -0.055572509765625, 0.02447509765625, 0.104522705078125, 0.1845703125, 0.264617919921875, 0.34466552734375, 0.424713134765625, 0.5047607421875, 0.584808349609375, 0.66485595703125, 0.744903564453125, 0.824951171875, 0.904998779296875, 0.98504638671875, 1.065093994140625, 1.1451416015625, 1.225189208984375, 1.30523681640625, 1.385284423828125, 1.46533203125, 1.545379638671875, 1.62542724609375, 1.705474853515625, 1.7855224609375, 1.865570068359375, 1.94561767578125, 2.025665283203125, 2.105712890625, 2.185760498046875, 2.26580810546875, 2.345855712890625, 2.4259033203125, 2.505950927734375, 2.58599853515625, 2.666046142578125, 2.74609375]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 8.0, 6.0, 7.0, 13.0, 19.0, 33.0, 37.0, 51.0, 82.0, 113.0, 177.0, 260.0, 377.0, 572.0, 924.0, 1476.0, 2330.0, 3580.0, 5862.0, 9807.0, 16274.0, 27015.0, 45356.0, 74976.0, 122388.0, 190473.0, 202971.0, 134488.0, 82778.0, 50038.0, 30008.0, 17856.0, 10728.0, 6481.0, 4057.0, 2418.0, 1547.0, 995.0, 715.0, 422.0, 249.0, 191.0, 128.0, 80.0, 59.0, 32.0, 37.0, 18.0, 17.0, 14.0, 9.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0, 3.0], "bins": [-0.2022705078125, -0.1959686279296875, -0.189666748046875, -0.1833648681640625, -0.17706298828125, -0.1707611083984375, -0.164459228515625, -0.1581573486328125, -0.15185546875, -0.1455535888671875, -0.139251708984375, -0.1329498291015625, -0.12664794921875, -0.1203460693359375, -0.114044189453125, -0.1077423095703125, -0.1014404296875, -0.0951385498046875, -0.088836669921875, -0.0825347900390625, -0.07623291015625, -0.0699310302734375, -0.063629150390625, -0.0573272705078125, -0.051025390625, -0.0447235107421875, -0.038421630859375, -0.0321197509765625, -0.02581787109375, -0.0195159912109375, -0.013214111328125, -0.0069122314453125, -0.0006103515625, 0.0056915283203125, 0.011993408203125, 0.0182952880859375, 0.02459716796875, 0.0308990478515625, 0.037200927734375, 0.0435028076171875, 0.0498046875, 0.0561065673828125, 0.062408447265625, 0.0687103271484375, 0.07501220703125, 0.0813140869140625, 0.087615966796875, 0.0939178466796875, 0.1002197265625, 0.1065216064453125, 0.112823486328125, 0.1191253662109375, 0.12542724609375, 0.1317291259765625, 0.138031005859375, 0.1443328857421875, 0.150634765625, 0.1569366455078125, 0.163238525390625, 0.1695404052734375, 0.17584228515625, 0.1821441650390625, 0.188446044921875, 0.1947479248046875, 0.2010498046875]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [4.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 7.0, 10.0, 6.0, 7.0, 7.0, 14.0, 15.0, 24.0, 18.0, 29.0, 32.0, 32.0, 37.0, 36.0, 34.0, 48.0, 38.0, 49.0, 54.0, 1059.0, 52.0, 46.0, 38.0, 41.0, 42.0, 29.0, 26.0, 29.0, 17.0, 22.0, 30.0, 20.0, 18.0, 14.0, 12.0, 3.0, 6.0, 2.0, 4.0, 2.0, 2.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3583984375, -1.3083343505859375, -1.258270263671875, -1.2082061767578125, -1.15814208984375, -1.1080780029296875, -1.058013916015625, -1.0079498291015625, -0.9578857421875, -0.9078216552734375, -0.857757568359375, -0.8076934814453125, -0.75762939453125, -0.7075653076171875, -0.657501220703125, -0.6074371337890625, -0.557373046875, -0.5073089599609375, -0.457244873046875, -0.4071807861328125, -0.35711669921875, -0.3070526123046875, -0.256988525390625, -0.2069244384765625, -0.1568603515625, -0.1067962646484375, -0.056732177734375, -0.0066680908203125, 0.04339599609375, 0.0934600830078125, 0.143524169921875, 0.1935882568359375, 0.24365234375, 0.2937164306640625, 0.343780517578125, 0.3938446044921875, 0.44390869140625, 0.4939727783203125, 0.544036865234375, 0.5941009521484375, 0.6441650390625, 0.6942291259765625, 0.744293212890625, 0.7943572998046875, 0.84442138671875, 0.8944854736328125, 0.944549560546875, 0.9946136474609375, 1.044677734375, 1.0947418212890625, 1.144805908203125, 1.1948699951171875, 1.24493408203125, 1.2949981689453125, 1.345062255859375, 1.3951263427734375, 1.4451904296875, 1.4952545166015625, 1.545318603515625, 1.5953826904296875, 1.64544677734375, 1.6955108642578125, 1.745574951171875, 1.7956390380859375, 1.845703125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 6.0, 5.0, 18.0, 18.0, 18.0, 40.0, 58.0, 89.0, 115.0, 195.0, 270.0, 404.0, 592.0, 859.0, 1228.0, 1790.0, 2628.0, 3764.0, 5535.0, 7963.0, 11863.0, 17369.0, 25599.0, 37847.0, 55897.0, 82904.0, 118622.0, 1117951.0, 238714.0, 115932.0, 80122.0, 53969.0, 36531.0, 24699.0, 16596.0, 11580.0, 7968.0, 5506.0, 3708.0, 2545.0, 1790.0, 1247.0, 830.0, 571.0, 381.0, 235.0, 174.0, 130.0, 97.0, 48.0, 47.0, 22.0, 20.0, 18.0, 5.0, 6.0, 5.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1290283203125, -0.12486076354980469, -0.12069320678710938, -0.11652565002441406, -0.11235809326171875, -0.10819053649902344, -0.10402297973632812, -0.09985542297363281, -0.0956878662109375, -0.09152030944824219, -0.08735275268554688, -0.08318519592285156, -0.07901763916015625, -0.07485008239746094, -0.07068252563476562, -0.06651496887207031, -0.062347412109375, -0.05817985534667969, -0.054012298583984375, -0.04984474182128906, -0.04567718505859375, -0.04150962829589844, -0.037342071533203125, -0.03317451477050781, -0.0290069580078125, -0.024839401245117188, -0.020671844482421875, -0.016504287719726562, -0.01233673095703125, -0.008169174194335938, -0.004001617431640625, 0.0001659393310546875, 0.00433349609375, 0.008501052856445312, 0.012668609619140625, 0.016836166381835938, 0.02100372314453125, 0.025171279907226562, 0.029338836669921875, 0.03350639343261719, 0.0376739501953125, 0.04184150695800781, 0.046009063720703125, 0.05017662048339844, 0.05434417724609375, 0.05851173400878906, 0.06267929077148438, 0.06684684753417969, 0.071014404296875, 0.07518196105957031, 0.07934951782226562, 0.08351707458496094, 0.08768463134765625, 0.09185218811035156, 0.09601974487304688, 0.10018730163574219, 0.1043548583984375, 0.10852241516113281, 0.11268997192382812, 0.11685752868652344, 0.12102508544921875, 0.12519264221191406, 0.12936019897460938, 0.1335277557373047, 0.1376953125]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 7.0, 5.0, 8.0, 8.0, 6.0, 16.0, 3.0, 6.0, 14.0, 14.0, 21.0, 25.0, 36.0, 34.0, 47.0, 35.0, 55.0, 52.0, 50.0, 38.0, 48.0, 43.0, 56.0, 55.0, 36.0, 45.0, 45.0, 28.0, 27.0, 23.0, 12.0, 22.0, 18.0, 10.0, 11.0, 13.0, 6.0, 6.0, 7.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0005435943603515625, -0.0005276650190353394, -0.0005117356777191162, -0.0004958063364028931, -0.0004798769950866699, -0.0004639476537704468, -0.00044801831245422363, -0.0004320889711380005, -0.00041615962982177734, -0.0004002302885055542, -0.00038430094718933105, -0.0003683716058731079, -0.00035244226455688477, -0.0003365129232406616, -0.0003205835819244385, -0.00030465424060821533, -0.0002887248992919922, -0.00027279555797576904, -0.0002568662166595459, -0.00024093687534332275, -0.0002250075340270996, -0.00020907819271087646, -0.00019314885139465332, -0.00017721951007843018, -0.00016129016876220703, -0.0001453608274459839, -0.00012943148612976074, -0.0001135021448135376, -9.757280349731445e-05, -8.164346218109131e-05, -6.571412086486816e-05, -4.978477954864502e-05, -3.3855438232421875e-05, -1.792609691619873e-05, -1.996755599975586e-06, 1.3932585716247559e-05, 2.9861927032470703e-05, 4.579126834869385e-05, 6.172060966491699e-05, 7.764995098114014e-05, 9.357929229736328e-05, 0.00010950863361358643, 0.00012543797492980957, 0.00014136731624603271, 0.00015729665756225586, 0.000173225998878479, 0.00018915534019470215, 0.0002050846815109253, 0.00022101402282714844, 0.00023694336414337158, 0.0002528727054595947, 0.00026880204677581787, 0.000284731388092041, 0.00030066072940826416, 0.0003165900707244873, 0.00033251941204071045, 0.0003484487533569336, 0.00036437809467315674, 0.0003803074359893799, 0.00039623677730560303, 0.00041216611862182617, 0.0004280954599380493, 0.00044402480125427246, 0.0004599541425704956, 0.00047588348388671875]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 6.0, 0.0, 7.0, 6.0, 7.0, 8.0, 12.0, 13.0, 24.0, 21.0, 22.0, 31.0, 37.0, 31.0, 46.0, 62.0, 72.0, 94.0, 118.0, 165.0, 196.0, 230.0, 385.0, 821.0, 23234.0, 1018160.0, 2672.0, 617.0, 326.0, 217.0, 186.0, 137.0, 112.0, 87.0, 58.0, 56.0, 50.0, 52.0, 36.0, 28.0, 26.0, 20.0, 17.0, 16.0, 10.0, 10.0, 7.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.00925445556640625, -0.008968234062194824, -0.008682012557983398, -0.008395791053771973, -0.008109569549560547, -0.007823348045349121, -0.007537126541137695, -0.0072509050369262695, -0.006964683532714844, -0.006678462028503418, -0.006392240524291992, -0.006106019020080566, -0.005819797515869141, -0.005533576011657715, -0.005247354507446289, -0.004961133003234863, -0.0046749114990234375, -0.004388689994812012, -0.004102468490600586, -0.00381624698638916, -0.0035300254821777344, -0.0032438039779663086, -0.002957582473754883, -0.002671360969543457, -0.0023851394653320312, -0.0020989179611206055, -0.0018126964569091797, -0.001526474952697754, -0.0012402534484863281, -0.0009540319442749023, -0.0006678104400634766, -0.0003815889358520508, -9.5367431640625e-05, 0.00019085407257080078, 0.00047707557678222656, 0.0007632970809936523, 0.0010495185852050781, 0.001335740089416504, 0.0016219615936279297, 0.0019081830978393555, 0.0021944046020507812, 0.002480626106262207, 0.002766847610473633, 0.0030530691146850586, 0.0033392906188964844, 0.00362551212310791, 0.003911733627319336, 0.004197955131530762, 0.0044841766357421875, 0.004770398139953613, 0.005056619644165039, 0.005342841148376465, 0.005629062652587891, 0.005915284156799316, 0.006201505661010742, 0.006487727165222168, 0.006773948669433594, 0.0070601701736450195, 0.007346391677856445, 0.007632613182067871, 0.007918834686279297, 0.008205056190490723, 0.008491277694702148, 0.008777499198913574, 0.009063720703125]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 34.0, 844.0, 135.0, 1.0], "bins": [-0.0048851193860173225, -0.004805841017514467, -0.004726562183350325, -0.004647283814847469, -0.004568005446344614, -0.004488727077841759, -0.004409448243677616, -0.004330169875174761, -0.0042508915066719055, -0.00417161313816905, -0.004092334304004908, -0.004013055935502052, -0.003933777566999197, -0.003854498965665698, -0.0037752205971628428, -0.003695941995829344, -0.003616663394495845, -0.003537384793162346, -0.0034581064246594906, -0.0033788278233259916, -0.0032995494548231363, -0.0032202708534896374, -0.003140992484986782, -0.003061713883653283, -0.002982435282319784, -0.002903156680986285, -0.00282387831248343, -0.002744599711149931, -0.0026653213426470757, -0.0025860427413135767, -0.0025067643728107214, -0.0024274857714772224, -0.0023482071701437235, -0.0022689285688102245, -0.0021896502003073692, -0.0021103715989738703, -0.002031093230471015, -0.001951814629137516, -0.001872536144219339, -0.0017932576593011618, -0.0017139792907983065, -0.0016347008058801293, -0.0015554223209619522, -0.001476143836043775, -0.001396865351125598, -0.001317586749792099, -0.0012383082648739219, -0.0011590297799557447, -0.0010797512950375676, -0.0010004728101193905, -0.0009211943252012134, -0.0008419157820753753, -0.0007626372971571982, -0.0006833588122390211, -0.000604080269113183, -0.0005248017841950059, -0.00044552329927682877, -0.00036624481435865164, -0.00028696630033664405, -0.0002076878008665517, -0.00012840930139645934, -4.913081647828221e-05, 3.014769754372537e-05, 0.00010942621156573296, 0.0001887047110358253]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 4.0, 4.0, 9.0, 7.0, 3.0, 7.0, 16.0, 9.0, 16.0, 11.0, 11.0, 10.0, 18.0, 22.0, 30.0, 32.0, 43.0, 23.0, 35.0, 34.0, 43.0, 40.0, 41.0, 32.0, 36.0, 37.0, 28.0, 36.0, 29.0, 36.0, 40.0, 29.0, 29.0, 17.0, 20.0, 18.0, 32.0, 13.0, 25.0, 14.0, 9.0, 12.0, 5.0, 9.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00027430057525634766, -0.0002656802535057068, -0.0002570599317550659, -0.00024843961000442505, -0.00023981928825378418, -0.0002311989665031433, -0.00022257864475250244, -0.00021395832300186157, -0.0002053380012512207, -0.00019671767950057983, -0.00018809735774993896, -0.0001794770359992981, -0.00017085671424865723, -0.00016223639249801636, -0.0001536160707473755, -0.00014499574899673462, -0.00013637542724609375, -0.00012775510549545288, -0.00011913478374481201, -0.00011051446199417114, -0.00010189414024353027, -9.32738184928894e-05, -8.465349674224854e-05, -7.603317499160767e-05, -6.74128532409668e-05, -5.879253149032593e-05, -5.017220973968506e-05, -4.155188798904419e-05, -3.293156623840332e-05, -2.431124448776245e-05, -1.5690922737121582e-05, -7.070600986480713e-06, 1.5497207641601562e-06, 1.0170042514801025e-05, 1.8790364265441895e-05, 2.7410686016082764e-05, 3.603100776672363e-05, 4.46513295173645e-05, 5.327165126800537e-05, 6.189197301864624e-05, 7.051229476928711e-05, 7.913261651992798e-05, 8.775293827056885e-05, 9.637326002120972e-05, 0.00010499358177185059, 0.00011361390352249146, 0.00012223422527313232, 0.0001308545470237732, 0.00013947486877441406, 0.00014809519052505493, 0.0001567155122756958, 0.00016533583402633667, 0.00017395615577697754, 0.0001825764775276184, 0.00019119679927825928, 0.00019981712102890015, 0.00020843744277954102, 0.00021705776453018188, 0.00022567808628082275, 0.00023429840803146362, 0.0002429187297821045, 0.00025153905153274536, 0.00026015937328338623, 0.0002687796950340271, 0.00027740001678466797]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 3.0, 2.0, 7.0, 9.0, 13.0, 13.0, 15.0, 27.0, 22.0, 14.0, 29.0, 28.0, 34.0, 31.0, 36.0, 47.0, 52.0, 38.0, 50.0, 49.0, 50.0, 36.0, 37.0, 41.0, 44.0, 36.0, 35.0, 31.0, 22.0, 23.0, 24.0, 14.0, 19.0, 13.0, 15.0, 8.0, 8.0, 3.0, 7.0, 6.0, 2.0, 7.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.376953125, -2.296905517578125, -2.21685791015625, -2.136810302734375, -2.0567626953125, -1.976715087890625, -1.89666748046875, -1.816619873046875, -1.736572265625, -1.656524658203125, -1.57647705078125, -1.496429443359375, -1.4163818359375, -1.336334228515625, -1.25628662109375, -1.176239013671875, -1.09619140625, -1.016143798828125, -0.93609619140625, -0.856048583984375, -0.7760009765625, -0.695953369140625, -0.61590576171875, -0.535858154296875, -0.455810546875, -0.375762939453125, -0.29571533203125, -0.215667724609375, -0.1356201171875, -0.055572509765625, 0.02447509765625, 0.104522705078125, 0.1845703125, 0.264617919921875, 0.34466552734375, 0.424713134765625, 0.5047607421875, 0.584808349609375, 0.66485595703125, 0.744903564453125, 0.824951171875, 0.904998779296875, 0.98504638671875, 1.065093994140625, 1.1451416015625, 1.225189208984375, 1.30523681640625, 1.385284423828125, 1.46533203125, 1.545379638671875, 1.62542724609375, 1.705474853515625, 1.7855224609375, 1.865570068359375, 1.94561767578125, 2.025665283203125, 2.105712890625, 2.185760498046875, 2.26580810546875, 2.345855712890625, 2.4259033203125, 2.505950927734375, 2.58599853515625, 2.666046142578125, 2.74609375]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 7.0, 2.0, 5.0, 9.0, 14.0, 30.0, 34.0, 44.0, 58.0, 82.0, 122.0, 151.0, 212.0, 290.0, 391.0, 591.0, 731.0, 1107.0, 1483.0, 2214.0, 3087.0, 4795.0, 7646.0, 13639.0, 29917.0, 92708.0, 331696.0, 374486.0, 109682.0, 34039.0, 15059.0, 8122.0, 4997.0, 3308.0, 2169.0, 1558.0, 1115.0, 827.0, 599.0, 420.0, 320.0, 245.0, 155.0, 112.0, 82.0, 63.0, 50.0, 28.0, 22.0, 17.0, 11.0, 6.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-6.78515625, -6.57354736328125, -6.3619384765625, -6.15032958984375, -5.938720703125, -5.72711181640625, -5.5155029296875, -5.30389404296875, -5.09228515625, -4.88067626953125, -4.6690673828125, -4.45745849609375, -4.245849609375, -4.03424072265625, -3.8226318359375, -3.61102294921875, -3.3994140625, -3.18780517578125, -2.9761962890625, -2.76458740234375, -2.552978515625, -2.34136962890625, -2.1297607421875, -1.91815185546875, -1.70654296875, -1.49493408203125, -1.2833251953125, -1.07171630859375, -0.860107421875, -0.64849853515625, -0.4368896484375, -0.22528076171875, -0.013671875, 0.19793701171875, 0.4095458984375, 0.62115478515625, 0.832763671875, 1.04437255859375, 1.2559814453125, 1.46759033203125, 1.67919921875, 1.89080810546875, 2.1024169921875, 2.31402587890625, 2.525634765625, 2.73724365234375, 2.9488525390625, 3.16046142578125, 3.3720703125, 3.58367919921875, 3.7952880859375, 4.00689697265625, 4.218505859375, 4.43011474609375, 4.6417236328125, 4.85333251953125, 5.06494140625, 5.27655029296875, 5.4881591796875, 5.69976806640625, 5.911376953125, 6.12298583984375, 6.3345947265625, 6.54620361328125, 6.7578125]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 6.0, 4.0, 7.0, 6.0, 6.0, 13.0, 10.0, 10.0, 4.0, 24.0, 18.0, 15.0, 24.0, 19.0, 30.0, 26.0, 31.0, 33.0, 53.0, 54.0, 84.0, 150.0, 223.0, 1314.0, 244.0, 143.0, 97.0, 51.0, 51.0, 40.0, 30.0, 37.0, 32.0, 23.0, 19.0, 19.0, 23.0, 18.0, 13.0, 15.0, 8.0, 7.0, 5.0, 6.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.49609375, -7.27294921875, -7.0498046875, -6.82666015625, -6.603515625, -6.38037109375, -6.1572265625, -5.93408203125, -5.7109375, -5.48779296875, -5.2646484375, -5.04150390625, -4.818359375, -4.59521484375, -4.3720703125, -4.14892578125, -3.92578125, -3.70263671875, -3.4794921875, -3.25634765625, -3.033203125, -2.81005859375, -2.5869140625, -2.36376953125, -2.140625, -1.91748046875, -1.6943359375, -1.47119140625, -1.248046875, -1.02490234375, -0.8017578125, -0.57861328125, -0.35546875, -0.13232421875, 0.0908203125, 0.31396484375, 0.537109375, 0.76025390625, 0.9833984375, 1.20654296875, 1.4296875, 1.65283203125, 1.8759765625, 2.09912109375, 2.322265625, 2.54541015625, 2.7685546875, 2.99169921875, 3.21484375, 3.43798828125, 3.6611328125, 3.88427734375, 4.107421875, 4.33056640625, 4.5537109375, 4.77685546875, 5.0, 5.22314453125, 5.4462890625, 5.66943359375, 5.892578125, 6.11572265625, 6.3388671875, 6.56201171875, 6.78515625]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 6.0, 10.0, 10.0, 14.0, 14.0, 12.0, 23.0, 22.0, 30.0, 34.0, 34.0, 47.0, 71.0, 112.0, 218.0, 613.0, 2778.0, 358014.0, 2774986.0, 6927.0, 889.0, 288.0, 142.0, 100.0, 57.0, 37.0, 26.0, 30.0, 18.0, 24.0, 18.0, 17.0, 11.0, 7.0, 9.0, 15.0, 4.0, 6.0, 6.0, 6.0, 2.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-26.25, -25.349853515625, -24.44970703125, -23.549560546875, -22.6494140625, -21.749267578125, -20.84912109375, -19.948974609375, -19.048828125, -18.148681640625, -17.24853515625, -16.348388671875, -15.4482421875, -14.548095703125, -13.64794921875, -12.747802734375, -11.84765625, -10.947509765625, -10.04736328125, -9.147216796875, -8.2470703125, -7.346923828125, -6.44677734375, -5.546630859375, -4.646484375, -3.746337890625, -2.84619140625, -1.946044921875, -1.0458984375, -0.145751953125, 0.75439453125, 1.654541015625, 2.5546875, 3.454833984375, 4.35498046875, 5.255126953125, 6.1552734375, 7.055419921875, 7.95556640625, 8.855712890625, 9.755859375, 10.656005859375, 11.55615234375, 12.456298828125, 13.3564453125, 14.256591796875, 15.15673828125, 16.056884765625, 16.95703125, 17.857177734375, 18.75732421875, 19.657470703125, 20.5576171875, 21.457763671875, 22.35791015625, 23.258056640625, 24.158203125, 25.058349609375, 25.95849609375, 26.858642578125, 27.7587890625, 28.658935546875, 29.55908203125, 30.459228515625, 31.359375]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 11.0, 887.0, 118.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-79.46012878417969, -73.2244873046875, -66.98884582519531, -60.75320816040039, -54.5175666809082, -48.281925201416016, -42.046287536621094, -35.810646057128906, -29.57500457763672, -23.33936309814453, -17.103723526000977, -10.868083953857422, -4.632442474365234, 1.6031990051269531, 7.838836669921875, 14.074478149414062, 20.31011962890625, 26.545761108398438, 32.781402587890625, 39.01704025268555, 45.252681732177734, 51.48832321166992, 57.723960876464844, 63.95960235595703, 70.19524383544922, 76.4308853149414, 82.6665267944336, 88.90216064453125, 95.13780212402344, 101.37344360351562, 107.60908508300781, 113.8447265625, 120.08038330078125, 126.31602478027344, 132.55166625976562, 138.7873077392578, 145.02294921875, 151.2585906982422, 157.49423217773438, 163.7298583984375, 169.96551513671875, 176.20115661621094, 182.43679809570312, 188.6724395751953, 194.9080810546875, 201.1437225341797, 207.37936401367188, 213.614990234375, 219.8506317138672, 226.08627319335938, 232.32191467285156, 238.55755615234375, 244.79319763183594, 251.02883911132812, 257.26446533203125, 263.5001220703125, 269.7357482910156, 275.97137451171875, 282.20703125, 288.4426574707031, 294.6783142089844, 300.9139404296875, 307.14959716796875, 313.3852233886719, 319.6208801269531]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 2.0, 3.0, 3.0, 3.0, 10.0, 7.0, 11.0, 11.0, 16.0, 15.0, 13.0, 21.0, 22.0, 25.0, 18.0, 35.0, 31.0, 29.0, 37.0, 32.0, 40.0, 35.0, 39.0, 47.0, 36.0, 41.0, 25.0, 34.0, 49.0, 47.0, 28.0, 28.0, 19.0, 34.0, 24.0, 26.0, 14.0, 17.0, 11.0, 9.0, 13.0, 8.0, 12.0, 4.0, 7.0, 5.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-26.04438018798828, -25.22437286376953, -24.40436363220215, -23.5843563079834, -22.764347076416016, -21.944339752197266, -21.124332427978516, -20.304323196411133, -19.484315872192383, -18.664308547973633, -17.84429931640625, -17.0242919921875, -16.204282760620117, -15.384275436401367, -14.5642671585083, -13.744258880615234, -12.924250602722168, -12.104242324829102, -11.284234046936035, -10.464225769042969, -9.644218444824219, -8.824210166931152, -8.004201889038086, -7.184194087982178, -6.364185810089111, -5.544177532196045, -4.724169731140137, -3.9041614532470703, -3.084153413772583, -2.2641453742980957, -1.4441370964050293, -0.6241292953491211, 0.1958789825439453, 1.0158870220184326, 1.8358951807022095, 2.6559033393859863, 3.4759113788604736, 4.295919418334961, 5.115927696228027, 5.9359354972839355, 6.755943775177002, 7.575952053070068, 8.395959854125977, 9.215968132019043, 10.03597640991211, 10.85598373413086, 11.675992965698242, 12.496000289916992, 13.316008567810059, 14.136016845703125, 14.956025123596191, 15.776033401489258, 16.596040725708008, 17.41604995727539, 18.23605728149414, 19.05606460571289, 19.876073837280273, 20.696081161499023, 21.516090393066406, 22.336097717285156, 23.15610694885254, 23.97611427307129, 24.796123504638672, 25.616130828857422, 26.436138153076172]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 6.0, 2.0, 7.0, 12.0, 12.0, 15.0, 8.0, 18.0, 19.0, 21.0, 17.0, 35.0, 37.0, 34.0, 37.0, 46.0, 48.0, 44.0, 47.0, 46.0, 45.0, 37.0, 31.0, 50.0, 39.0, 35.0, 35.0, 31.0, 28.0, 27.0, 27.0, 16.0, 13.0, 19.0, 11.0, 11.0, 6.0, 6.0, 7.0, 7.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.58203125, -2.4959716796875, -2.409912109375, -2.3238525390625, -2.23779296875, -2.1517333984375, -2.065673828125, -1.9796142578125, -1.8935546875, -1.8074951171875, -1.721435546875, -1.6353759765625, -1.54931640625, -1.4632568359375, -1.377197265625, -1.2911376953125, -1.205078125, -1.1190185546875, -1.032958984375, -0.9468994140625, -0.86083984375, -0.7747802734375, -0.688720703125, -0.6026611328125, -0.5166015625, -0.4305419921875, -0.344482421875, -0.2584228515625, -0.17236328125, -0.0863037109375, -0.000244140625, 0.0858154296875, 0.171875, 0.2579345703125, 0.343994140625, 0.4300537109375, 0.51611328125, 0.6021728515625, 0.688232421875, 0.7742919921875, 0.8603515625, 0.9464111328125, 1.032470703125, 1.1185302734375, 1.20458984375, 1.2906494140625, 1.376708984375, 1.4627685546875, 1.548828125, 1.6348876953125, 1.720947265625, 1.8070068359375, 1.89306640625, 1.9791259765625, 2.065185546875, 2.1512451171875, 2.2373046875, 2.3233642578125, 2.409423828125, 2.4954833984375, 2.58154296875, 2.6676025390625, 2.753662109375, 2.8397216796875, 2.92578125]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 8.0, 13.0, 9.0, 12.0, 19.0, 32.0, 50.0, 53.0, 73.0, 108.0, 144.0, 190.0, 281.0, 486.0, 842.0, 1864.0, 4628.0, 15994.0, 74950.0, 590109.0, 2795175.0, 606658.0, 76989.0, 16401.0, 4995.0, 1791.0, 874.0, 461.0, 275.0, 202.0, 145.0, 133.0, 90.0, 59.0, 38.0, 28.0, 25.0, 18.0, 16.0, 14.0, 10.0, 8.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.4765625, -9.1553955078125, -8.834228515625, -8.5130615234375, -8.19189453125, -7.8707275390625, -7.549560546875, -7.2283935546875, -6.9072265625, -6.5860595703125, -6.264892578125, -5.9437255859375, -5.62255859375, -5.3013916015625, -4.980224609375, -4.6590576171875, -4.337890625, -4.0167236328125, -3.695556640625, -3.3743896484375, -3.05322265625, -2.7320556640625, -2.410888671875, -2.0897216796875, -1.7685546875, -1.4473876953125, -1.126220703125, -0.8050537109375, -0.48388671875, -0.1627197265625, 0.158447265625, 0.4796142578125, 0.80078125, 1.1219482421875, 1.443115234375, 1.7642822265625, 2.08544921875, 2.4066162109375, 2.727783203125, 3.0489501953125, 3.3701171875, 3.6912841796875, 4.012451171875, 4.3336181640625, 4.65478515625, 4.9759521484375, 5.297119140625, 5.6182861328125, 5.939453125, 6.2606201171875, 6.581787109375, 6.9029541015625, 7.22412109375, 7.5452880859375, 7.866455078125, 8.1876220703125, 8.5087890625, 8.8299560546875, 9.151123046875, 9.4722900390625, 9.79345703125, 10.1146240234375, 10.435791015625, 10.7569580078125, 11.078125]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 5.0, 3.0, 4.0, 9.0, 7.0, 13.0, 24.0, 37.0, 30.0, 57.0, 68.0, 84.0, 111.0, 178.0, 208.0, 246.0, 279.0, 414.0, 404.0, 377.0, 350.0, 277.0, 243.0, 187.0, 123.0, 83.0, 72.0, 46.0, 47.0, 21.0, 28.0, 10.0, 10.0, 7.0, 6.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-10.828125, -10.54107666015625, -10.2540283203125, -9.96697998046875, -9.679931640625, -9.39288330078125, -9.1058349609375, -8.81878662109375, -8.53173828125, -8.24468994140625, -7.9576416015625, -7.67059326171875, -7.383544921875, -7.09649658203125, -6.8094482421875, -6.52239990234375, -6.2353515625, -5.94830322265625, -5.6612548828125, -5.37420654296875, -5.087158203125, -4.80010986328125, -4.5130615234375, -4.22601318359375, -3.93896484375, -3.65191650390625, -3.3648681640625, -3.07781982421875, -2.790771484375, -2.50372314453125, -2.2166748046875, -1.92962646484375, -1.642578125, -1.35552978515625, -1.0684814453125, -0.78143310546875, -0.494384765625, -0.20733642578125, 0.0797119140625, 0.36676025390625, 0.65380859375, 0.94085693359375, 1.2279052734375, 1.51495361328125, 1.802001953125, 2.08905029296875, 2.3760986328125, 2.66314697265625, 2.9501953125, 3.23724365234375, 3.5242919921875, 3.81134033203125, 4.098388671875, 4.38543701171875, 4.6724853515625, 4.95953369140625, 5.24658203125, 5.53363037109375, 5.8206787109375, 6.10772705078125, 6.394775390625, 6.68182373046875, 6.9688720703125, 7.25592041015625, 7.54296875]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 2.0, 7.0, 7.0, 4.0, 6.0, 9.0, 10.0, 15.0, 36.0, 40.0, 38.0, 66.0, 117.0, 187.0, 402.0, 887.0, 3103.0, 27988.0, 1003065.0, 3048841.0, 100878.0, 6116.0, 1352.0, 501.0, 230.0, 124.0, 73.0, 51.0, 47.0, 21.0, 21.0, 14.0, 8.0, 4.0, 3.0, 10.0, 2.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.71875, -18.86865234375, -18.0185546875, -17.16845703125, -16.318359375, -15.46826171875, -14.6181640625, -13.76806640625, -12.91796875, -12.06787109375, -11.2177734375, -10.36767578125, -9.517578125, -8.66748046875, -7.8173828125, -6.96728515625, -6.1171875, -5.26708984375, -4.4169921875, -3.56689453125, -2.716796875, -1.86669921875, -1.0166015625, -0.16650390625, 0.68359375, 1.53369140625, 2.3837890625, 3.23388671875, 4.083984375, 4.93408203125, 5.7841796875, 6.63427734375, 7.484375, 8.33447265625, 9.1845703125, 10.03466796875, 10.884765625, 11.73486328125, 12.5849609375, 13.43505859375, 14.28515625, 15.13525390625, 15.9853515625, 16.83544921875, 17.685546875, 18.53564453125, 19.3857421875, 20.23583984375, 21.0859375, 21.93603515625, 22.7861328125, 23.63623046875, 24.486328125, 25.33642578125, 26.1865234375, 27.03662109375, 27.88671875, 28.73681640625, 29.5869140625, 30.43701171875, 31.287109375, 32.13720703125, 32.9873046875, 33.83740234375, 34.6875]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 24.0, 178.0, 529.0, 251.0, 31.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.37868881225586, -55.20637512207031, -50.03406524658203, -44.861751556396484, -39.68943786621094, -34.517127990722656, -29.34481430053711, -24.172504425048828, -19.00019073486328, -13.827878952026367, -8.655566215515137, -3.4832534790039062, 1.6890583038330078, 6.861370086669922, 12.033683776855469, 17.20599365234375, 22.378307342529297, 27.55061912536621, 32.722930908203125, 37.89524459838867, 43.06755828857422, 48.2398681640625, 53.41218185424805, 58.58449172973633, 63.756805419921875, 68.92911529541016, 74.10143280029297, 79.27374267578125, 84.44605255126953, 89.61836242675781, 94.79067993164062, 99.9629898071289, 105.13529968261719, 110.30760955810547, 115.47992706298828, 120.65223693847656, 125.82454681396484, 130.99685668945312, 136.16917419433594, 141.34149169921875, 146.5137939453125, 151.6861114501953, 156.85841369628906, 162.03073120117188, 167.2030487060547, 172.37535095214844, 177.54766845703125, 182.719970703125, 187.89230346679688, 193.0646209716797, 198.23692321777344, 203.40924072265625, 208.58155822753906, 213.7538604736328, 218.92617797851562, 224.09848022460938, 229.2707977294922, 234.443115234375, 239.61541748046875, 244.78773498535156, 249.96005249023438, 255.13235473632812, 260.3046875, 265.47698974609375, 270.6492919921875]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 4.0, 6.0, 8.0, 6.0, 12.0, 10.0, 18.0, 13.0, 25.0, 25.0, 31.0, 24.0, 29.0, 30.0, 33.0, 29.0, 24.0, 42.0, 43.0, 41.0, 53.0, 36.0, 38.0, 36.0, 45.0, 26.0, 45.0, 29.0, 26.0, 35.0, 25.0, 24.0, 23.0, 12.0, 24.0, 10.0, 10.0, 12.0, 10.0, 11.0, 5.0, 3.0, 2.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.33553695678711, -28.34575843811035, -27.355981826782227, -26.36620330810547, -25.37642478942871, -24.386646270751953, -23.396869659423828, -22.40709114074707, -21.417312622070312, -20.427534103393555, -19.43775749206543, -18.447978973388672, -17.458200454711914, -16.468421936035156, -15.478645324707031, -14.488866806030273, -13.499089241027832, -12.50931167602539, -11.519533157348633, -10.529755592346191, -9.539977073669434, -8.550199508666992, -7.560421466827393, -6.570643424987793, -5.580865383148193, -4.591087341308594, -3.601309299468994, -2.6115314960479736, -1.621753454208374, -0.6319756507873535, 0.3578023910522461, 1.3475804328918457, 2.3373584747314453, 3.327136516571045, 4.3169145584106445, 5.306692123413086, 6.296470642089844, 7.286248207092285, 8.276025772094727, 9.265804290771484, 10.255582809448242, 11.245360374450684, 12.235138893127441, 13.224916458129883, 14.21469497680664, 15.204472541809082, 16.194250106811523, 17.18402862548828, 18.173805236816406, 19.163583755493164, 20.15336036682129, 21.143138885498047, 22.132917404174805, 23.122695922851562, 24.112472534179688, 25.102251052856445, 26.092029571533203, 27.08180809020996, 28.071584701538086, 29.061363220214844, 30.0511417388916, 31.04092025756836, 32.030696868896484, 33.020477294921875, 34.01025390625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 7.0, 3.0, 12.0, 9.0, 13.0, 9.0, 11.0, 12.0, 27.0, 31.0, 23.0, 16.0, 31.0, 32.0, 37.0, 29.0, 34.0, 27.0, 39.0, 25.0, 45.0, 47.0, 42.0, 43.0, 37.0, 41.0, 31.0, 22.0, 33.0, 26.0, 23.0, 28.0, 21.0, 11.0, 19.0, 10.0, 17.0, 12.0, 9.0, 13.0, 3.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 2.0], "bins": [-2.578125, -2.500579833984375, -2.42303466796875, -2.345489501953125, -2.2679443359375, -2.190399169921875, -2.11285400390625, -2.035308837890625, -1.957763671875, -1.880218505859375, -1.80267333984375, -1.725128173828125, -1.6475830078125, -1.570037841796875, -1.49249267578125, -1.414947509765625, -1.33740234375, -1.259857177734375, -1.18231201171875, -1.104766845703125, -1.0272216796875, -0.949676513671875, -0.87213134765625, -0.794586181640625, -0.717041015625, -0.639495849609375, -0.56195068359375, -0.484405517578125, -0.4068603515625, -0.329315185546875, -0.25177001953125, -0.174224853515625, -0.0966796875, -0.019134521484375, 0.05841064453125, 0.135955810546875, 0.2135009765625, 0.291046142578125, 0.36859130859375, 0.446136474609375, 0.523681640625, 0.601226806640625, 0.67877197265625, 0.756317138671875, 0.8338623046875, 0.911407470703125, 0.98895263671875, 1.066497802734375, 1.14404296875, 1.221588134765625, 1.29913330078125, 1.376678466796875, 1.4542236328125, 1.531768798828125, 1.60931396484375, 1.686859130859375, 1.764404296875, 1.841949462890625, 1.91949462890625, 1.997039794921875, 2.0745849609375, 2.152130126953125, 2.22967529296875, 2.307220458984375, 2.384765625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 6.0, 6.0, 12.0, 13.0, 26.0, 26.0, 36.0, 83.0, 96.0, 145.0, 209.0, 287.0, 453.0, 651.0, 985.0, 1444.0, 2042.0, 3081.0, 4474.0, 6737.0, 10196.0, 15216.0, 22400.0, 34304.0, 52072.0, 80125.0, 123075.0, 172778.0, 169600.0, 119417.0, 77822.0, 50672.0, 33179.0, 22225.0, 14690.0, 9617.0, 6713.0, 4373.0, 3033.0, 1977.0, 1364.0, 924.0, 628.0, 440.0, 290.0, 203.0, 127.0, 101.0, 68.0, 39.0, 33.0, 16.0, 9.0, 10.0, 9.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.1778564453125, -0.1719207763671875, -0.165985107421875, -0.1600494384765625, -0.15411376953125, -0.1481781005859375, -0.142242431640625, -0.1363067626953125, -0.13037109375, -0.1244354248046875, -0.118499755859375, -0.1125640869140625, -0.10662841796875, -0.1006927490234375, -0.094757080078125, -0.0888214111328125, -0.0828857421875, -0.0769500732421875, -0.071014404296875, -0.0650787353515625, -0.05914306640625, -0.0532073974609375, -0.047271728515625, -0.0413360595703125, -0.035400390625, -0.0294647216796875, -0.023529052734375, -0.0175933837890625, -0.01165771484375, -0.0057220458984375, 0.000213623046875, 0.0061492919921875, 0.0120849609375, 0.0180206298828125, 0.023956298828125, 0.0298919677734375, 0.03582763671875, 0.0417633056640625, 0.047698974609375, 0.0536346435546875, 0.0595703125, 0.0655059814453125, 0.071441650390625, 0.0773773193359375, 0.08331298828125, 0.0892486572265625, 0.095184326171875, 0.1011199951171875, 0.1070556640625, 0.1129913330078125, 0.118927001953125, 0.1248626708984375, 0.13079833984375, 0.1367340087890625, 0.142669677734375, 0.1486053466796875, 0.154541015625, 0.1604766845703125, 0.166412353515625, 0.1723480224609375, 0.17828369140625, 0.1842193603515625, 0.190155029296875, 0.1960906982421875, 0.2020263671875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 5.0, 6.0, 13.0, 12.0, 8.0, 20.0, 22.0, 16.0, 24.0, 26.0, 30.0, 36.0, 40.0, 37.0, 42.0, 41.0, 43.0, 37.0, 1077.0, 43.0, 50.0, 36.0, 35.0, 44.0, 30.0, 48.0, 35.0, 37.0, 26.0, 28.0, 14.0, 10.0, 10.0, 16.0, 11.0, 5.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-2.05859375, -1.99847412109375, -1.9383544921875, -1.87823486328125, -1.818115234375, -1.75799560546875, -1.6978759765625, -1.63775634765625, -1.57763671875, -1.51751708984375, -1.4573974609375, -1.39727783203125, -1.337158203125, -1.27703857421875, -1.2169189453125, -1.15679931640625, -1.0966796875, -1.03656005859375, -0.9764404296875, -0.91632080078125, -0.856201171875, -0.79608154296875, -0.7359619140625, -0.67584228515625, -0.61572265625, -0.55560302734375, -0.4954833984375, -0.43536376953125, -0.375244140625, -0.31512451171875, -0.2550048828125, -0.19488525390625, -0.134765625, -0.07464599609375, -0.0145263671875, 0.04559326171875, 0.105712890625, 0.16583251953125, 0.2259521484375, 0.28607177734375, 0.34619140625, 0.40631103515625, 0.4664306640625, 0.52655029296875, 0.586669921875, 0.64678955078125, 0.7069091796875, 0.76702880859375, 0.8271484375, 0.88726806640625, 0.9473876953125, 1.00750732421875, 1.067626953125, 1.12774658203125, 1.1878662109375, 1.24798583984375, 1.30810546875, 1.36822509765625, 1.4283447265625, 1.48846435546875, 1.548583984375, 1.60870361328125, 1.6688232421875, 1.72894287109375, 1.7890625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 8.0, 9.0, 12.0, 16.0, 20.0, 34.0, 49.0, 57.0, 75.0, 134.0, 204.0, 283.0, 418.0, 580.0, 940.0, 1322.0, 2040.0, 3087.0, 4834.0, 7391.0, 11676.0, 18656.0, 29371.0, 46772.0, 73751.0, 115592.0, 165012.0, 1228425.0, 137955.0, 90577.0, 57942.0, 36821.0, 22751.0, 14441.0, 8987.0, 5939.0, 3751.0, 2369.0, 1619.0, 1079.0, 650.0, 490.0, 328.0, 234.0, 136.0, 95.0, 73.0, 36.0, 31.0, 19.0, 19.0, 9.0, 7.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.1707763671875, -0.16530799865722656, -0.15983963012695312, -0.1543712615966797, -0.14890289306640625, -0.1434345245361328, -0.13796615600585938, -0.13249778747558594, -0.1270294189453125, -0.12156105041503906, -0.11609268188476562, -0.11062431335449219, -0.10515594482421875, -0.09968757629394531, -0.09421920776367188, -0.08875083923339844, -0.083282470703125, -0.07781410217285156, -0.07234573364257812, -0.06687736511230469, -0.06140899658203125, -0.05594062805175781, -0.050472259521484375, -0.04500389099121094, -0.0395355224609375, -0.03406715393066406, -0.028598785400390625, -0.023130416870117188, -0.01766204833984375, -0.012193679809570312, -0.006725311279296875, -0.0012569427490234375, 0.00421142578125, 0.009679794311523438, 0.015148162841796875, 0.020616531372070312, 0.02608489990234375, 0.03155326843261719, 0.037021636962890625, 0.04249000549316406, 0.0479583740234375, 0.05342674255371094, 0.058895111083984375, 0.06436347961425781, 0.06983184814453125, 0.07530021667480469, 0.08076858520507812, 0.08623695373535156, 0.091705322265625, 0.09717369079589844, 0.10264205932617188, 0.10811042785644531, 0.11357879638671875, 0.11904716491699219, 0.12451553344726562, 0.12998390197753906, 0.1354522705078125, 0.14092063903808594, 0.14638900756835938, 0.1518573760986328, 0.15732574462890625, 0.1627941131591797, 0.16826248168945312, 0.17373085021972656, 0.17919921875]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 9.0, 13.0, 5.0, 8.0, 9.0, 9.0, 24.0, 19.0, 26.0, 39.0, 32.0, 33.0, 51.0, 60.0, 51.0, 75.0, 71.0, 68.0, 51.0, 61.0, 43.0, 39.0, 39.0, 28.0, 23.0, 19.0, 19.0, 16.0, 9.0, 13.0, 9.0, 7.0, 2.0, 0.0, 3.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006823539733886719, -0.0006617531180381775, -0.0006411522626876831, -0.0006205514073371887, -0.0005999505519866943, -0.0005793496966362, -0.0005587488412857056, -0.0005381479859352112, -0.0005175471305847168, -0.0004969462752342224, -0.00047634541988372803, -0.00045574456453323364, -0.00043514370918273926, -0.0004145428538322449, -0.0003939419984817505, -0.0003733411431312561, -0.0003527402877807617, -0.00033213943243026733, -0.00031153857707977295, -0.00029093772172927856, -0.0002703368663787842, -0.0002497360110282898, -0.0002291351556777954, -0.00020853430032730103, -0.00018793344497680664, -0.00016733258962631226, -0.00014673173427581787, -0.00012613087892532349, -0.0001055300235748291, -8.492916822433472e-05, -6.432831287384033e-05, -4.372745752334595e-05, -2.3126602172851562e-05, -2.5257468223571777e-06, 1.8075108528137207e-05, 3.867596387863159e-05, 5.9276819229125977e-05, 7.987767457962036e-05, 0.00010047852993011475, 0.00012107938528060913, 0.00014168024063110352, 0.0001622810959815979, 0.00018288195133209229, 0.00020348280668258667, 0.00022408366203308105, 0.00024468451738357544, 0.0002652853727340698, 0.0002858862280845642, 0.0003064870834350586, 0.000327087938785553, 0.00034768879413604736, 0.00036828964948654175, 0.00038889050483703613, 0.0004094913601875305, 0.0004300922155380249, 0.0004506930708885193, 0.00047129392623901367, 0.0004918947815895081, 0.0005124956369400024, 0.0005330964922904968, 0.0005536973476409912, 0.0005742982029914856, 0.00059489905834198, 0.0006154999136924744, 0.0006361007690429688]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 9.0, 2.0, 3.0, 4.0, 8.0, 4.0, 12.0, 16.0, 17.0, 28.0, 39.0, 32.0, 33.0, 39.0, 40.0, 60.0, 70.0, 94.0, 122.0, 142.0, 220.0, 388.0, 715.0, 12120.0, 1010937.0, 21079.0, 820.0, 411.0, 250.0, 165.0, 139.0, 109.0, 91.0, 56.0, 59.0, 45.0, 26.0, 45.0, 21.0, 15.0, 15.0, 12.0, 9.0, 9.0, 7.0, 4.0, 3.0, 4.0, 7.0, 4.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0], "bins": [-0.01049041748046875, -0.010168075561523438, -0.009845733642578125, -0.009523391723632812, -0.0092010498046875, -0.008878707885742188, -0.008556365966796875, -0.008234024047851562, -0.00791168212890625, -0.0075893402099609375, -0.007266998291015625, -0.0069446563720703125, -0.006622314453125, -0.0062999725341796875, -0.005977630615234375, -0.0056552886962890625, -0.00533294677734375, -0.0050106048583984375, -0.004688262939453125, -0.0043659210205078125, -0.0040435791015625, -0.0037212371826171875, -0.003398895263671875, -0.0030765533447265625, -0.00275421142578125, -0.0024318695068359375, -0.002109527587890625, -0.0017871856689453125, -0.00146484375, -0.0011425018310546875, -0.000820159912109375, -0.0004978179931640625, -0.00017547607421875, 0.0001468658447265625, 0.000469207763671875, 0.0007915496826171875, 0.0011138916015625, 0.0014362335205078125, 0.001758575439453125, 0.0020809173583984375, 0.00240325927734375, 0.0027256011962890625, 0.003047943115234375, 0.0033702850341796875, 0.003692626953125, 0.0040149688720703125, 0.004337310791015625, 0.0046596527099609375, 0.00498199462890625, 0.0053043365478515625, 0.005626678466796875, 0.0059490203857421875, 0.0062713623046875, 0.0065937042236328125, 0.006916046142578125, 0.0072383880615234375, 0.00756072998046875, 0.007883071899414062, 0.008205413818359375, 0.008527755737304688, 0.00885009765625, 0.009172439575195312, 0.009494781494140625, 0.009817123413085938, 0.01013946533203125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 63.0, 516.0, 403.0, 28.0, 3.0], "bins": [-0.004223145544528961, -0.004153778776526451, -0.004084412008523941, -0.004015045240521431, -0.003945678472518921, -0.003876311471685767, -0.003806944703683257, -0.0037375777028501034, -0.0036682109348475933, -0.0035988441668450832, -0.003529477398842573, -0.0034601103980094194, -0.0033907436300069094, -0.0033213768620043993, -0.0032520100940018892, -0.0031826430931687355, -0.0031132763251662254, -0.0030439095571637154, -0.0029745427891612053, -0.0029051757883280516, -0.0028358090203255415, -0.0027664422523230314, -0.0026970754843205214, -0.0026277084834873676, -0.002558341948315501, -0.002488975180312991, -0.002419608412310481, -0.0023502414114773273, -0.0022808746434748173, -0.002211507875472307, -0.002142141107469797, -0.002072774339467287, -0.0020034073386341333, -0.0019340405706316233, -0.0018646736862137914, -0.0017953069182112813, -0.0017259400337934494, -0.0016565732657909393, -0.0015872064977884293, -0.0015178396133705974, -0.0014484727289527655, -0.0013791059609502554, -0.0013097390765324235, -0.0012403723085299134, -0.0011710054241120815, -0.0011016386561095715, -0.0010322718881070614, -0.0009629050036892295, -0.0008935381192713976, -0.0008241712930612266, -0.0007548044668510556, -0.0006854376988485456, -0.0006160708144307137, -0.0005467040464282036, -0.0004773372202180326, -0.0004079703940078616, -0.00033860356779769063, -0.00026923674158751965, -0.0001998699299292639, -0.00013050311827100813, -6.113629206083715e-05, 8.230534149333835e-06, 7.759733125567436e-05, 0.00014696415746584535, 0.0002163309691241011]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 5.0, 3.0, 9.0, 19.0, 18.0, 25.0, 22.0, 17.0, 23.0, 28.0, 19.0, 28.0, 33.0, 26.0, 39.0, 44.0, 45.0, 44.0, 38.0, 43.0, 26.0, 48.0, 46.0, 36.0, 38.0, 31.0, 31.0, 26.0, 23.0, 26.0, 24.0, 17.0, 21.0, 17.0, 15.0, 11.0, 9.0, 8.0, 4.0, 7.0, 6.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.0003572702407836914, -0.0003469865769147873, -0.0003367029130458832, -0.00032641924917697906, -0.00031613558530807495, -0.00030585192143917084, -0.0002955682575702667, -0.0002852845937013626, -0.0002750009298324585, -0.0002647172659635544, -0.00025443360209465027, -0.00024414993822574615, -0.00023386627435684204, -0.00022358261048793793, -0.0002132989466190338, -0.0002030152827501297, -0.00019273161888122559, -0.00018244795501232147, -0.00017216429114341736, -0.00016188062727451324, -0.00015159696340560913, -0.00014131329953670502, -0.0001310296356678009, -0.00012074597179889679, -0.00011046230792999268, -0.00010017864406108856, -8.989498019218445e-05, -7.961131632328033e-05, -6.932765245437622e-05, -5.904398858547211e-05, -4.876032471656799e-05, -3.847666084766388e-05, -2.8192996978759766e-05, -1.7909333109855652e-05, -7.625669240951538e-06, 2.6579946279525757e-06, 1.294165849685669e-05, 2.3225322365760803e-05, 3.350898623466492e-05, 4.379265010356903e-05, 5.4076313972473145e-05, 6.435997784137726e-05, 7.464364171028137e-05, 8.492730557918549e-05, 9.52109694480896e-05, 0.00010549463331699371, 0.00011577829718589783, 0.00012606196105480194, 0.00013634562492370605, 0.00014662928879261017, 0.00015691295266151428, 0.0001671966165304184, 0.0001774802803993225, 0.00018776394426822662, 0.00019804760813713074, 0.00020833127200603485, 0.00021861493587493896, 0.00022889859974384308, 0.0002391822636127472, 0.0002494659274816513, 0.0002597495913505554, 0.00027003325521945953, 0.00028031691908836365, 0.00029060058295726776, 0.0003008842468261719]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 7.0, 3.0, 12.0, 9.0, 13.0, 9.0, 11.0, 12.0, 27.0, 31.0, 23.0, 16.0, 31.0, 32.0, 37.0, 29.0, 34.0, 27.0, 39.0, 25.0, 45.0, 47.0, 42.0, 43.0, 37.0, 41.0, 31.0, 22.0, 33.0, 26.0, 23.0, 28.0, 21.0, 11.0, 19.0, 10.0, 17.0, 12.0, 9.0, 13.0, 3.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 2.0], "bins": [-2.578125, -2.500579833984375, -2.42303466796875, -2.345489501953125, -2.2679443359375, -2.190399169921875, -2.11285400390625, -2.035308837890625, -1.957763671875, -1.880218505859375, -1.80267333984375, -1.725128173828125, -1.6475830078125, -1.570037841796875, -1.49249267578125, -1.414947509765625, -1.33740234375, -1.259857177734375, -1.18231201171875, -1.104766845703125, -1.0272216796875, -0.949676513671875, -0.87213134765625, -0.794586181640625, -0.717041015625, -0.639495849609375, -0.56195068359375, -0.484405517578125, -0.4068603515625, -0.329315185546875, -0.25177001953125, -0.174224853515625, -0.0966796875, -0.019134521484375, 0.05841064453125, 0.135955810546875, 0.2135009765625, 0.291046142578125, 0.36859130859375, 0.446136474609375, 0.523681640625, 0.601226806640625, 0.67877197265625, 0.756317138671875, 0.8338623046875, 0.911407470703125, 0.98895263671875, 1.066497802734375, 1.14404296875, 1.221588134765625, 1.29913330078125, 1.376678466796875, 1.4542236328125, 1.531768798828125, 1.60931396484375, 1.686859130859375, 1.764404296875, 1.841949462890625, 1.91949462890625, 1.997039794921875, 2.0745849609375, 2.152130126953125, 2.22967529296875, 2.307220458984375, 2.384765625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 5.0, 7.0, 15.0, 18.0, 24.0, 21.0, 30.0, 52.0, 70.0, 101.0, 131.0, 202.0, 269.0, 395.0, 617.0, 1141.0, 2240.0, 4461.0, 10154.0, 23990.0, 60426.0, 144013.0, 266518.0, 272482.0, 150818.0, 64065.0, 25538.0, 10519.0, 4709.0, 2251.0, 1172.0, 704.0, 418.0, 281.0, 201.0, 152.0, 95.0, 72.0, 51.0, 39.0, 27.0, 19.0, 13.0, 7.0, 7.0, 7.0, 7.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.08203125, -2.985809326171875, -2.88958740234375, -2.793365478515625, -2.6971435546875, -2.600921630859375, -2.50469970703125, -2.408477783203125, -2.312255859375, -2.216033935546875, -2.11981201171875, -2.023590087890625, -1.9273681640625, -1.831146240234375, -1.73492431640625, -1.638702392578125, -1.54248046875, -1.446258544921875, -1.35003662109375, -1.253814697265625, -1.1575927734375, -1.061370849609375, -0.96514892578125, -0.868927001953125, -0.772705078125, -0.676483154296875, -0.58026123046875, -0.484039306640625, -0.3878173828125, -0.291595458984375, -0.19537353515625, -0.099151611328125, -0.0029296875, 0.093292236328125, 0.18951416015625, 0.285736083984375, 0.3819580078125, 0.478179931640625, 0.57440185546875, 0.670623779296875, 0.766845703125, 0.863067626953125, 0.95928955078125, 1.055511474609375, 1.1517333984375, 1.247955322265625, 1.34417724609375, 1.440399169921875, 1.53662109375, 1.632843017578125, 1.72906494140625, 1.825286865234375, 1.9215087890625, 2.017730712890625, 2.11395263671875, 2.210174560546875, 2.306396484375, 2.402618408203125, 2.49884033203125, 2.595062255859375, 2.6912841796875, 2.787506103515625, 2.88372802734375, 2.979949951171875, 3.076171875]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 5.0, 1.0, 15.0, 6.0, 8.0, 8.0, 14.0, 17.0, 23.0, 30.0, 22.0, 22.0, 26.0, 28.0, 35.0, 39.0, 57.0, 45.0, 108.0, 207.0, 1361.0, 286.0, 152.0, 112.0, 75.0, 49.0, 34.0, 36.0, 33.0, 26.0, 35.0, 16.0, 22.0, 14.0, 12.0, 19.0, 10.0, 7.0, 5.0, 6.0, 6.0, 9.0, 4.0, 6.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.6875, -9.3834228515625, -9.079345703125, -8.7752685546875, -8.47119140625, -8.1671142578125, -7.863037109375, -7.5589599609375, -7.2548828125, -6.9508056640625, -6.646728515625, -6.3426513671875, -6.03857421875, -5.7344970703125, -5.430419921875, -5.1263427734375, -4.822265625, -4.5181884765625, -4.214111328125, -3.9100341796875, -3.60595703125, -3.3018798828125, -2.997802734375, -2.6937255859375, -2.3896484375, -2.0855712890625, -1.781494140625, -1.4774169921875, -1.17333984375, -0.8692626953125, -0.565185546875, -0.2611083984375, 0.04296875, 0.3470458984375, 0.651123046875, 0.9552001953125, 1.25927734375, 1.5633544921875, 1.867431640625, 2.1715087890625, 2.4755859375, 2.7796630859375, 3.083740234375, 3.3878173828125, 3.69189453125, 3.9959716796875, 4.300048828125, 4.6041259765625, 4.908203125, 5.2122802734375, 5.516357421875, 5.8204345703125, 6.12451171875, 6.4285888671875, 6.732666015625, 7.0367431640625, 7.3408203125, 7.6448974609375, 7.948974609375, 8.2530517578125, 8.55712890625, 8.8612060546875, 9.165283203125, 9.4693603515625, 9.7734375]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 9.0, 6.0, 8.0, 8.0, 6.0, 9.0, 13.0, 13.0, 19.0, 18.0, 26.0, 28.0, 43.0, 56.0, 80.0, 114.0, 180.0, 379.0, 994.0, 5638.0, 416284.0, 2701339.0, 17508.0, 1674.0, 517.0, 251.0, 135.0, 79.0, 50.0, 40.0, 21.0, 31.0, 22.0, 23.0, 20.0, 16.0, 14.0, 7.0, 6.0, 8.0, 9.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.546875, -19.88818359375, -19.2294921875, -18.57080078125, -17.912109375, -17.25341796875, -16.5947265625, -15.93603515625, -15.27734375, -14.61865234375, -13.9599609375, -13.30126953125, -12.642578125, -11.98388671875, -11.3251953125, -10.66650390625, -10.0078125, -9.34912109375, -8.6904296875, -8.03173828125, -7.373046875, -6.71435546875, -6.0556640625, -5.39697265625, -4.73828125, -4.07958984375, -3.4208984375, -2.76220703125, -2.103515625, -1.44482421875, -0.7861328125, -0.12744140625, 0.53125, 1.18994140625, 1.8486328125, 2.50732421875, 3.166015625, 3.82470703125, 4.4833984375, 5.14208984375, 5.80078125, 6.45947265625, 7.1181640625, 7.77685546875, 8.435546875, 9.09423828125, 9.7529296875, 10.41162109375, 11.0703125, 11.72900390625, 12.3876953125, 13.04638671875, 13.705078125, 14.36376953125, 15.0224609375, 15.68115234375, 16.33984375, 16.99853515625, 17.6572265625, 18.31591796875, 18.974609375, 19.63330078125, 20.2919921875, 20.95068359375, 21.609375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 28.0, 187.0, 468.0, 259.0, 61.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.43870162963867, -55.869876861572266, -54.30105209350586, -52.73222732543945, -51.16339874267578, -49.594573974609375, -48.02574920654297, -46.45692443847656, -44.888099670410156, -43.31927490234375, -41.750450134277344, -40.18162536621094, -38.61280059814453, -37.04397201538086, -35.47514724731445, -33.90632247924805, -32.33749771118164, -30.768672943115234, -29.199848175048828, -27.63102149963379, -26.062196731567383, -24.493371963500977, -22.924545288085938, -21.35572052001953, -19.786895751953125, -18.21807098388672, -16.649246215820312, -15.080419540405273, -13.511594772338867, -11.942770004272461, -10.373944282531738, -8.805118560791016, -7.236293792724609, -5.667468547821045, -4.0986433029174805, -2.529818058013916, -0.9609928131103516, 0.6078324317932129, 2.1766576766967773, 3.7454833984375, 5.314308166503906, 6.883133411407471, 8.451958656311035, 10.020784378051758, 11.589609146118164, 13.15843391418457, 14.727259635925293, 16.296085357666016, 17.864910125732422, 19.433734893798828, 21.002559661865234, 22.571386337280273, 24.14021110534668, 25.709035873413086, 27.277862548828125, 28.84668731689453, 30.415512084960938, 31.984336853027344, 33.55316162109375, 35.121986389160156, 36.69081115722656, 38.259639739990234, 39.82846450805664, 41.39728927612305, 42.96611404418945]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 6.0, 5.0, 8.0, 3.0, 7.0, 11.0, 10.0, 10.0, 7.0, 23.0, 26.0, 23.0, 23.0, 28.0, 32.0, 17.0, 23.0, 29.0, 33.0, 59.0, 42.0, 51.0, 31.0, 39.0, 29.0, 39.0, 24.0, 42.0, 28.0, 26.0, 35.0, 33.0, 37.0, 30.0, 19.0, 19.0, 19.0, 8.0, 15.0, 15.0, 10.0, 7.0, 7.0, 2.0, 4.0, 3.0, 1.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-30.044788360595703, -29.11012077331543, -28.175453186035156, -27.240785598754883, -26.30611801147461, -25.371450424194336, -24.436782836914062, -23.50211524963379, -22.567447662353516, -21.632780075073242, -20.69811248779297, -19.763444900512695, -18.828777313232422, -17.89410972595215, -16.959442138671875, -16.0247745513916, -15.090106964111328, -14.155439376831055, -13.220771789550781, -12.286104202270508, -11.351436614990234, -10.416769027709961, -9.482101440429688, -8.547433853149414, -7.612766265869141, -6.678098678588867, -5.743431091308594, -4.80876350402832, -3.874095916748047, -2.9394283294677734, -2.0047607421875, -1.0700931549072266, -0.13542556762695312, 0.7992420196533203, 1.7339096069335938, 2.668577194213867, 3.6032447814941406, 4.537912368774414, 5.4725799560546875, 6.407247543334961, 7.341915130615234, 8.276582717895508, 9.211250305175781, 10.145917892456055, 11.080585479736328, 12.015253067016602, 12.949920654296875, 13.884588241577148, 14.819255828857422, 15.753923416137695, 16.68859100341797, 17.623258590698242, 18.557926177978516, 19.49259376525879, 20.427261352539062, 21.361928939819336, 22.29659652709961, 23.231264114379883, 24.165931701660156, 25.10059928894043, 26.035266876220703, 26.969934463500977, 27.90460205078125, 28.839269638061523, 29.773937225341797]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 7.0, 7.0, 4.0, 9.0, 6.0, 14.0, 7.0, 19.0, 15.0, 17.0, 24.0, 19.0, 22.0, 28.0, 24.0, 31.0, 33.0, 22.0, 39.0, 30.0, 35.0, 35.0, 43.0, 31.0, 51.0, 36.0, 41.0, 38.0, 29.0, 30.0, 30.0, 29.0, 24.0, 24.0, 17.0, 16.0, 15.0, 22.0, 14.0, 12.0, 8.0, 6.0, 7.0, 4.0, 3.0, 5.0, 6.0, 7.0, 6.0, 1.0, 2.0, 4.0, 0.0, 2.0], "bins": [-2.63671875, -2.55706787109375, -2.4774169921875, -2.39776611328125, -2.318115234375, -2.23846435546875, -2.1588134765625, -2.07916259765625, -1.99951171875, -1.91986083984375, -1.8402099609375, -1.76055908203125, -1.680908203125, -1.60125732421875, -1.5216064453125, -1.44195556640625, -1.3623046875, -1.28265380859375, -1.2030029296875, -1.12335205078125, -1.043701171875, -0.96405029296875, -0.8843994140625, -0.80474853515625, -0.72509765625, -0.64544677734375, -0.5657958984375, -0.48614501953125, -0.406494140625, -0.32684326171875, -0.2471923828125, -0.16754150390625, -0.087890625, -0.00823974609375, 0.0714111328125, 0.15106201171875, 0.230712890625, 0.31036376953125, 0.3900146484375, 0.46966552734375, 0.54931640625, 0.62896728515625, 0.7086181640625, 0.78826904296875, 0.867919921875, 0.94757080078125, 1.0272216796875, 1.10687255859375, 1.1865234375, 1.26617431640625, 1.3458251953125, 1.42547607421875, 1.505126953125, 1.58477783203125, 1.6644287109375, 1.74407958984375, 1.82373046875, 1.90338134765625, 1.9830322265625, 2.06268310546875, 2.142333984375, 2.22198486328125, 2.3016357421875, 2.38128662109375, 2.4609375]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 0.0, 6.0, 9.0, 6.0, 7.0, 11.0, 16.0, 18.0, 22.0, 30.0, 34.0, 39.0, 56.0, 68.0, 97.0, 205.0, 355.0, 856.0, 2490.0, 9780.0, 58993.0, 555934.0, 2787277.0, 691813.0, 70241.0, 11276.0, 2668.0, 966.0, 378.0, 181.0, 114.0, 69.0, 46.0, 41.0, 41.0, 24.0, 19.0, 21.0, 22.0, 13.0, 11.0, 14.0, 8.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0], "bins": [-10.8515625, -10.5462646484375, -10.240966796875, -9.9356689453125, -9.63037109375, -9.3250732421875, -9.019775390625, -8.7144775390625, -8.4091796875, -8.1038818359375, -7.798583984375, -7.4932861328125, -7.18798828125, -6.8826904296875, -6.577392578125, -6.2720947265625, -5.966796875, -5.6614990234375, -5.356201171875, -5.0509033203125, -4.74560546875, -4.4403076171875, -4.135009765625, -3.8297119140625, -3.5244140625, -3.2191162109375, -2.913818359375, -2.6085205078125, -2.30322265625, -1.9979248046875, -1.692626953125, -1.3873291015625, -1.08203125, -0.7767333984375, -0.471435546875, -0.1661376953125, 0.13916015625, 0.4444580078125, 0.749755859375, 1.0550537109375, 1.3603515625, 1.6656494140625, 1.970947265625, 2.2762451171875, 2.58154296875, 2.8868408203125, 3.192138671875, 3.4974365234375, 3.802734375, 4.1080322265625, 4.413330078125, 4.7186279296875, 5.02392578125, 5.3292236328125, 5.634521484375, 5.9398193359375, 6.2451171875, 6.5504150390625, 6.855712890625, 7.1610107421875, 7.46630859375, 7.7716064453125, 8.076904296875, 8.3822021484375, 8.6875]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 2.0, 1.0, 8.0, 10.0, 12.0, 20.0, 27.0, 44.0, 56.0, 77.0, 119.0, 138.0, 186.0, 271.0, 372.0, 459.0, 450.0, 458.0, 389.0, 267.0, 186.0, 136.0, 103.0, 89.0, 54.0, 46.0, 39.0, 21.0, 11.0, 7.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.375, -12.0531005859375, -11.731201171875, -11.4093017578125, -11.08740234375, -10.7655029296875, -10.443603515625, -10.1217041015625, -9.7998046875, -9.4779052734375, -9.156005859375, -8.8341064453125, -8.51220703125, -8.1903076171875, -7.868408203125, -7.5465087890625, -7.224609375, -6.9027099609375, -6.580810546875, -6.2589111328125, -5.93701171875, -5.6151123046875, -5.293212890625, -4.9713134765625, -4.6494140625, -4.3275146484375, -4.005615234375, -3.6837158203125, -3.36181640625, -3.0399169921875, -2.718017578125, -2.3961181640625, -2.07421875, -1.7523193359375, -1.430419921875, -1.1085205078125, -0.78662109375, -0.4647216796875, -0.142822265625, 0.1790771484375, 0.5009765625, 0.8228759765625, 1.144775390625, 1.4666748046875, 1.78857421875, 2.1104736328125, 2.432373046875, 2.7542724609375, 3.076171875, 3.3980712890625, 3.719970703125, 4.0418701171875, 4.36376953125, 4.6856689453125, 5.007568359375, 5.3294677734375, 5.6513671875, 5.9732666015625, 6.295166015625, 6.6170654296875, 6.93896484375, 7.2608642578125, 7.582763671875, 7.9046630859375, 8.2265625]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 6.0, 16.0, 11.0, 26.0, 36.0, 63.0, 97.0, 221.0, 346.0, 787.0, 2109.0, 11850.0, 175439.0, 3002515.0, 950736.0, 43151.0, 4410.0, 1320.0, 536.0, 263.0, 131.0, 77.0, 63.0, 28.0, 14.0, 15.0, 6.0, 3.0, 6.0, 7.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.59375, -18.861572265625, -18.12939453125, -17.397216796875, -16.6650390625, -15.932861328125, -15.20068359375, -14.468505859375, -13.736328125, -13.004150390625, -12.27197265625, -11.539794921875, -10.8076171875, -10.075439453125, -9.34326171875, -8.611083984375, -7.87890625, -7.146728515625, -6.41455078125, -5.682373046875, -4.9501953125, -4.218017578125, -3.48583984375, -2.753662109375, -2.021484375, -1.289306640625, -0.55712890625, 0.175048828125, 0.9072265625, 1.639404296875, 2.37158203125, 3.103759765625, 3.8359375, 4.568115234375, 5.30029296875, 6.032470703125, 6.7646484375, 7.496826171875, 8.22900390625, 8.961181640625, 9.693359375, 10.425537109375, 11.15771484375, 11.889892578125, 12.6220703125, 13.354248046875, 14.08642578125, 14.818603515625, 15.55078125, 16.282958984375, 17.01513671875, 17.747314453125, 18.4794921875, 19.211669921875, 19.94384765625, 20.676025390625, 21.408203125, 22.140380859375, 22.87255859375, 23.604736328125, 24.3369140625, 25.069091796875, 25.80126953125, 26.533447265625, 27.265625]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 13.0, 21.0, 34.0, 74.0, 97.0, 139.0, 169.0, 143.0, 131.0, 76.0, 64.0, 28.0, 12.0, 6.0, 1.0, 1.0, 1.0, 1.0], "bins": [-94.88855743408203, -93.08930206298828, -91.29005432128906, -89.49079895019531, -87.69154357910156, -85.89228820800781, -84.0930404663086, -82.29378509521484, -80.49453735351562, -78.69528198242188, -76.89603424072266, -75.0967788696289, -73.29752349853516, -71.49827575683594, -69.69902038574219, -67.89976501464844, -66.10050964355469, -64.30125427246094, -62.50200271606445, -60.70275115966797, -58.90349578857422, -57.104244232177734, -55.30499267578125, -53.5057373046875, -51.706485748291016, -49.90723419189453, -48.10797882080078, -46.3087272644043, -44.50947570800781, -42.71022033691406, -40.91096878051758, -39.111717224121094, -37.312461853027344, -35.51321029663086, -33.71395492553711, -31.914703369140625, -30.115449905395508, -28.31619644165039, -26.516944885253906, -24.71769142150879, -22.91843605041504, -21.119182586669922, -19.319931030273438, -17.52067756652832, -15.721424102783203, -13.922170639038086, -12.122918128967285, -10.323665618896484, -8.524412155151367, -6.725159168243408, -4.925906181335449, -3.1266531944274902, -1.3274002075195312, 0.47185325622558594, 2.2711057662963867, 4.0703582763671875, 5.869611740112305, 7.668864727020264, 9.468117713928223, 11.267370223999023, 13.06662368774414, 14.865877151489258, 16.665130615234375, 18.46438217163086, 20.263635635375977]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 2.0, 4.0, 2.0, 2.0, 9.0, 11.0, 7.0, 10.0, 16.0, 21.0, 23.0, 14.0, 34.0, 31.0, 33.0, 33.0, 49.0, 42.0, 34.0, 40.0, 42.0, 44.0, 49.0, 48.0, 45.0, 61.0, 36.0, 45.0, 31.0, 34.0, 26.0, 25.0, 16.0, 13.0, 11.0, 12.0, 12.0, 7.0, 11.0, 6.0, 9.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.22817611694336, -35.15205764770508, -34.07594299316406, -32.99982452392578, -31.923709869384766, -30.847591400146484, -29.771474838256836, -28.695358276367188, -27.61924171447754, -26.54312515258789, -25.467008590698242, -24.390892028808594, -23.314773559570312, -22.238658905029297, -21.162540435791016, -20.086423873901367, -19.01030731201172, -17.93419075012207, -16.858074188232422, -15.781956672668457, -14.705840110778809, -13.62972354888916, -12.553606033325195, -11.477489471435547, -10.401372909545898, -9.32525634765625, -8.249139785766602, -7.173022270202637, -6.096905708312988, -5.02078914642334, -3.944672107696533, -2.8685550689697266, -1.7924346923828125, -0.716317892074585, 0.3597989082336426, 1.4359157085418701, 2.5120325088500977, 3.588149070739746, 4.664266109466553, 5.740383148193359, 6.816499710083008, 7.892616271972656, 8.968732833862305, 10.04485034942627, 11.120966911315918, 12.197083473205566, 13.273200988769531, 14.34931755065918, 15.425434112548828, 16.501550674438477, 17.577667236328125, 18.653783798217773, 19.729900360107422, 20.806018829345703, 21.88213539123535, 22.958251953125, 24.03436851501465, 25.110485076904297, 26.186601638793945, 27.262718200683594, 28.338836669921875, 29.41495132446289, 30.491069793701172, 31.56718635559082, 32.64330291748047]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 5.0, 8.0, 7.0, 6.0, 7.0, 11.0, 11.0, 16.0, 10.0, 18.0, 20.0, 14.0, 26.0, 29.0, 38.0, 27.0, 33.0, 32.0, 36.0, 43.0, 40.0, 30.0, 48.0, 30.0, 51.0, 47.0, 30.0, 23.0, 35.0, 34.0, 25.0, 34.0, 20.0, 15.0, 17.0, 18.0, 10.0, 16.0, 15.0, 12.0, 6.0, 11.0, 15.0, 5.0, 2.0, 5.0, 2.0, 1.0, 2.0, 6.0, 3.0], "bins": [-3.033203125, -2.9478759765625, -2.862548828125, -2.7772216796875, -2.69189453125, -2.6065673828125, -2.521240234375, -2.4359130859375, -2.3505859375, -2.2652587890625, -2.179931640625, -2.0946044921875, -2.00927734375, -1.9239501953125, -1.838623046875, -1.7532958984375, -1.66796875, -1.5826416015625, -1.497314453125, -1.4119873046875, -1.32666015625, -1.2413330078125, -1.156005859375, -1.0706787109375, -0.9853515625, -0.9000244140625, -0.814697265625, -0.7293701171875, -0.64404296875, -0.5587158203125, -0.473388671875, -0.3880615234375, -0.302734375, -0.2174072265625, -0.132080078125, -0.0467529296875, 0.03857421875, 0.1239013671875, 0.209228515625, 0.2945556640625, 0.3798828125, 0.4652099609375, 0.550537109375, 0.6358642578125, 0.72119140625, 0.8065185546875, 0.891845703125, 0.9771728515625, 1.0625, 1.1478271484375, 1.233154296875, 1.3184814453125, 1.40380859375, 1.4891357421875, 1.574462890625, 1.6597900390625, 1.7451171875, 1.8304443359375, 1.915771484375, 2.0010986328125, 2.08642578125, 2.1717529296875, 2.257080078125, 2.3424072265625, 2.427734375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 11.0, 6.0, 6.0, 13.0, 14.0, 22.0, 38.0, 50.0, 88.0, 126.0, 195.0, 293.0, 400.0, 561.0, 844.0, 1280.0, 1888.0, 2699.0, 4042.0, 5897.0, 8733.0, 13083.0, 19707.0, 30710.0, 46931.0, 72198.0, 110736.0, 162771.0, 180899.0, 132650.0, 87331.0, 56157.0, 36448.0, 24038.0, 15645.0, 10310.0, 7097.0, 4680.0, 3007.0, 2153.0, 1570.0, 1040.0, 679.0, 504.0, 299.0, 200.0, 170.0, 124.0, 75.0, 55.0, 35.0, 19.0, 14.0, 14.0, 8.0, 5.0, 1.0, 1.0, 2.0], "bins": [-0.220458984375, -0.2138214111328125, -0.207183837890625, -0.2005462646484375, -0.19390869140625, -0.1872711181640625, -0.180633544921875, -0.1739959716796875, -0.1673583984375, -0.1607208251953125, -0.154083251953125, -0.1474456787109375, -0.14080810546875, -0.1341705322265625, -0.127532958984375, -0.1208953857421875, -0.1142578125, -0.1076202392578125, -0.100982666015625, -0.0943450927734375, -0.08770751953125, -0.0810699462890625, -0.074432373046875, -0.0677947998046875, -0.0611572265625, -0.0545196533203125, -0.047882080078125, -0.0412445068359375, -0.03460693359375, -0.0279693603515625, -0.021331787109375, -0.0146942138671875, -0.008056640625, -0.0014190673828125, 0.005218505859375, 0.0118560791015625, 0.01849365234375, 0.0251312255859375, 0.031768798828125, 0.0384063720703125, 0.0450439453125, 0.0516815185546875, 0.058319091796875, 0.0649566650390625, 0.07159423828125, 0.0782318115234375, 0.084869384765625, 0.0915069580078125, 0.09814453125, 0.1047821044921875, 0.111419677734375, 0.1180572509765625, 0.12469482421875, 0.1313323974609375, 0.137969970703125, 0.1446075439453125, 0.1512451171875, 0.1578826904296875, 0.164520263671875, 0.1711578369140625, 0.17779541015625, 0.1844329833984375, 0.191070556640625, 0.1977081298828125, 0.204345703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 7.0, 3.0, 5.0, 5.0, 7.0, 8.0, 11.0, 11.0, 19.0, 22.0, 27.0, 24.0, 26.0, 20.0, 34.0, 29.0, 35.0, 35.0, 46.0, 32.0, 50.0, 45.0, 1070.0, 52.0, 41.0, 35.0, 34.0, 35.0, 28.0, 28.0, 20.0, 29.0, 21.0, 24.0, 18.0, 18.0, 15.0, 8.0, 8.0, 7.0, 8.0, 11.0, 9.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.7548828125, -1.69659423828125, -1.6383056640625, -1.58001708984375, -1.521728515625, -1.46343994140625, -1.4051513671875, -1.34686279296875, -1.28857421875, -1.23028564453125, -1.1719970703125, -1.11370849609375, -1.055419921875, -0.99713134765625, -0.9388427734375, -0.88055419921875, -0.822265625, -0.76397705078125, -0.7056884765625, -0.64739990234375, -0.589111328125, -0.53082275390625, -0.4725341796875, -0.41424560546875, -0.35595703125, -0.29766845703125, -0.2393798828125, -0.18109130859375, -0.122802734375, -0.06451416015625, -0.0062255859375, 0.05206298828125, 0.1103515625, 0.16864013671875, 0.2269287109375, 0.28521728515625, 0.343505859375, 0.40179443359375, 0.4600830078125, 0.51837158203125, 0.57666015625, 0.63494873046875, 0.6932373046875, 0.75152587890625, 0.809814453125, 0.86810302734375, 0.9263916015625, 0.98468017578125, 1.04296875, 1.10125732421875, 1.1595458984375, 1.21783447265625, 1.276123046875, 1.33441162109375, 1.3927001953125, 1.45098876953125, 1.50927734375, 1.56756591796875, 1.6258544921875, 1.68414306640625, 1.742431640625, 1.80072021484375, 1.8590087890625, 1.91729736328125, 1.9755859375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 6.0, 7.0, 19.0, 21.0, 19.0, 40.0, 63.0, 90.0, 153.0, 221.0, 402.0, 509.0, 908.0, 1292.0, 2249.0, 3454.0, 5686.0, 8675.0, 13959.0, 21910.0, 34166.0, 53562.0, 85984.0, 134467.0, 1235174.0, 174318.0, 117520.0, 74258.0, 46666.0, 29698.0, 18939.0, 11921.0, 7708.0, 4848.0, 3085.0, 1871.0, 1206.0, 763.0, 471.0, 306.0, 164.0, 124.0, 93.0, 38.0, 34.0, 29.0, 14.0, 11.0, 6.0, 3.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1888427734375, -0.18272018432617188, -0.17659759521484375, -0.17047500610351562, -0.1643524169921875, -0.15822982788085938, -0.15210723876953125, -0.14598464965820312, -0.139862060546875, -0.13373947143554688, -0.12761688232421875, -0.12149429321289062, -0.1153717041015625, -0.10924911499023438, -0.10312652587890625, -0.09700393676757812, -0.09088134765625, -0.08475875854492188, -0.07863616943359375, -0.07251358032226562, -0.0663909912109375, -0.060268402099609375, -0.05414581298828125, -0.048023223876953125, -0.041900634765625, -0.035778045654296875, -0.02965545654296875, -0.023532867431640625, -0.0174102783203125, -0.011287689208984375, -0.00516510009765625, 0.000957489013671875, 0.007080078125, 0.013202667236328125, 0.01932525634765625, 0.025447845458984375, 0.0315704345703125, 0.037693023681640625, 0.04381561279296875, 0.049938201904296875, 0.056060791015625, 0.062183380126953125, 0.06830596923828125, 0.07442855834960938, 0.0805511474609375, 0.08667373657226562, 0.09279632568359375, 0.09891891479492188, 0.10504150390625, 0.11116409301757812, 0.11728668212890625, 0.12340927124023438, 0.1295318603515625, 0.13565444946289062, 0.14177703857421875, 0.14789962768554688, 0.154022216796875, 0.16014480590820312, 0.16626739501953125, 0.17238998413085938, 0.1785125732421875, 0.18463516235351562, 0.19075775146484375, 0.19688034057617188, 0.2030029296875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 5.0, 0.0, 2.0, 4.0, 2.0, 3.0, 2.0, 10.0, 7.0, 9.0, 12.0, 15.0, 17.0, 19.0, 21.0, 36.0, 44.0, 35.0, 45.0, 43.0, 34.0, 56.0, 59.0, 47.0, 68.0, 60.0, 55.0, 53.0, 47.0, 34.0, 33.0, 25.0, 21.0, 18.0, 17.0, 10.0, 5.0, 11.0, 7.0, 2.0, 3.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006127357482910156, -0.0005902275443077087, -0.0005677193403244019, -0.000545211136341095, -0.0005227029323577881, -0.0005001947283744812, -0.0004776865243911743, -0.00045517832040786743, -0.00043267011642456055, -0.00041016191244125366, -0.0003876537084579468, -0.0003651455044746399, -0.000342637300491333, -0.0003201290965080261, -0.00029762089252471924, -0.00027511268854141235, -0.00025260448455810547, -0.00023009628057479858, -0.0002075880765914917, -0.00018507987260818481, -0.00016257166862487793, -0.00014006346464157104, -0.00011755526065826416, -9.504705667495728e-05, -7.253885269165039e-05, -5.0030648708343506e-05, -2.752244472503662e-05, -5.014240741729736e-06, 1.749396324157715e-05, 4.000216722488403e-05, 6.251037120819092e-05, 8.50185751914978e-05, 0.00010752677917480469, 0.00013003498315811157, 0.00015254318714141846, 0.00017505139112472534, 0.00019755959510803223, 0.0002200677990913391, 0.000242576003074646, 0.0002650842070579529, 0.00028759241104125977, 0.00031010061502456665, 0.00033260881900787354, 0.0003551170229911804, 0.0003776252269744873, 0.0004001334309577942, 0.0004226416349411011, 0.00044514983892440796, 0.00046765804290771484, 0.0004901662468910217, 0.0005126744508743286, 0.0005351826548576355, 0.0005576908588409424, 0.0005801990628242493, 0.0006027072668075562, 0.000625215470790863, 0.0006477236747741699, 0.0006702318787574768, 0.0006927400827407837, 0.0007152482867240906, 0.0007377564907073975, 0.0007602646946907043, 0.0007827728986740112, 0.0008052811026573181, 0.000827789306640625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 4.0, 4.0, 6.0, 5.0, 6.0, 8.0, 14.0, 18.0, 13.0, 24.0, 41.0, 61.0, 60.0, 71.0, 89.0, 157.0, 186.0, 241.0, 440.0, 1230.0, 206226.0, 835366.0, 2643.0, 452.0, 324.0, 216.0, 145.0, 121.0, 97.0, 62.0, 45.0, 30.0, 34.0, 29.0, 17.0, 15.0, 16.0, 8.0, 7.0, 6.0, 9.0, 5.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.01371002197265625, -0.013308525085449219, -0.012907028198242188, -0.012505531311035156, -0.012104034423828125, -0.011702537536621094, -0.011301040649414062, -0.010899543762207031, -0.010498046875, -0.010096549987792969, -0.009695053100585938, -0.009293556213378906, -0.008892059326171875, -0.008490562438964844, -0.008089065551757812, -0.007687568664550781, -0.00728607177734375, -0.006884574890136719, -0.0064830780029296875, -0.006081581115722656, -0.005680084228515625, -0.005278587341308594, -0.0048770904541015625, -0.004475593566894531, -0.0040740966796875, -0.0036725997924804688, -0.0032711029052734375, -0.0028696060180664062, -0.002468109130859375, -0.0020666122436523438, -0.0016651153564453125, -0.0012636184692382812, -0.00086212158203125, -0.00046062469482421875, -5.91278076171875e-05, 0.00034236907958984375, 0.000743865966796875, 0.0011453628540039062, 0.0015468597412109375, 0.0019483566284179688, 0.002349853515625, 0.0027513504028320312, 0.0031528472900390625, 0.0035543441772460938, 0.003955841064453125, 0.004357337951660156, 0.0047588348388671875, 0.005160331726074219, 0.00556182861328125, 0.005963325500488281, 0.0063648223876953125, 0.006766319274902344, 0.007167816162109375, 0.007569313049316406, 0.007970809936523438, 0.008372306823730469, 0.0087738037109375, 0.009175300598144531, 0.009576797485351562, 0.009978294372558594, 0.010379791259765625, 0.010781288146972656, 0.011182785034179688, 0.011584281921386719, 0.01198577880859375]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 11.0, 13.0, 21.0, 42.0, 78.0, 107.0, 139.0, 134.0, 152.0, 132.0, 80.0, 47.0, 23.0, 17.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000258095795288682, -0.00024033985391724855, -0.0002225839125458151, -0.0002048279857262969, -0.00018707204435486346, -0.00016931610298343003, -0.00015156017616391182, -0.00013380423479247838, -0.00011604829342104495, -9.829235204961151e-05, -8.053641795413569e-05, -6.278048385865986e-05, -4.5024542487226427e-05, -2.726860111579299e-05, -9.512667020317167e-06, 8.243267075158656e-06, 2.5999208446592093e-05, 4.375514618004672e-05, 6.151108391350135e-05, 7.926701800897717e-05, 9.702295938041061e-05, 0.00011477890075184405, 0.00013253482757136226, 0.0001502907689427957, 0.00016804671031422913, 0.00018580265168566257, 0.000203558593057096, 0.0002213145198766142, 0.00023907046124804765, 0.0002568264026194811, 0.0002745823294389993, 0.0002923382562585175, 0.0003100942703895271, 0.0003278501972090453, 0.00034560615313239396, 0.00036336207995191216, 0.00038111803587526083, 0.00039887396269477904, 0.00041662988951429725, 0.0004343858454376459, 0.0004521417722571641, 0.00046989769907668233, 0.000487653655000031, 0.0005054096109233797, 0.0005231655086390674, 0.0005409214645624161, 0.0005586774204857647, 0.0005764333182014525, 0.0005941892741248012, 0.0006119452300481498, 0.0006297011277638376, 0.0006474570836871862, 0.0006652130396105349, 0.0006829689955338836, 0.0007007248932495713, 0.00071848084917292, 0.0007362368050962687, 0.0007539927610196173, 0.0007717486587353051, 0.0007895046146586537, 0.0008072605705820024, 0.0008250165265053511, 0.0008427724242210388, 0.0008605283801443875, 0.0008782842778600752]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 6.0, 5.0, 4.0, 3.0, 10.0, 11.0, 14.0, 15.0, 20.0, 24.0, 27.0, 19.0, 28.0, 29.0, 35.0, 42.0, 44.0, 42.0, 45.0, 50.0, 42.0, 44.0, 39.0, 41.0, 43.0, 36.0, 43.0, 27.0, 38.0, 31.0, 17.0, 22.0, 18.0, 22.0, 19.0, 10.0, 9.0, 9.0, 8.0, 5.0, 5.0, 1.0, 1.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004277229309082031, -0.00041467975825071335, -0.00040163658559322357, -0.0003885934129357338, -0.000375550240278244, -0.00036250706762075424, -0.00034946389496326447, -0.0003364207223057747, -0.0003233775496482849, -0.00031033437699079514, -0.00029729120433330536, -0.0002842480316758156, -0.0002712048590183258, -0.00025816168636083603, -0.00024511851370334625, -0.00023207534104585648, -0.0002190321683883667, -0.00020598899573087692, -0.00019294582307338715, -0.00017990265041589737, -0.0001668594777584076, -0.00015381630510091782, -0.00014077313244342804, -0.00012772995978593826, -0.00011468678712844849, -0.00010164361447095871, -8.860044181346893e-05, -7.555726915597916e-05, -6.251409649848938e-05, -4.94709238409996e-05, -3.642775118350983e-05, -2.338457852602005e-05, -1.0341405868530273e-05, 2.701766788959503e-06, 1.574493944644928e-05, 2.8788112103939056e-05, 4.183128476142883e-05, 5.487445741891861e-05, 6.791763007640839e-05, 8.096080273389816e-05, 9.400397539138794e-05, 0.00010704714804887772, 0.00012009032070636749, 0.00013313349336385727, 0.00014617666602134705, 0.00015921983867883682, 0.0001722630113363266, 0.00018530618399381638, 0.00019834935665130615, 0.00021139252930879593, 0.0002244357019662857, 0.00023747887462377548, 0.00025052204728126526, 0.00026356521993875504, 0.0002766083925962448, 0.0002896515652537346, 0.00030269473791122437, 0.00031573791056871414, 0.0003287810832262039, 0.0003418242558836937, 0.00035486742854118347, 0.00036791060119867325, 0.000380953773856163, 0.0003939969465136528, 0.0004070401191711426]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 5.0, 8.0, 7.0, 6.0, 7.0, 11.0, 11.0, 16.0, 10.0, 18.0, 20.0, 14.0, 26.0, 29.0, 38.0, 27.0, 33.0, 32.0, 36.0, 43.0, 40.0, 30.0, 48.0, 30.0, 51.0, 47.0, 30.0, 23.0, 35.0, 34.0, 25.0, 34.0, 20.0, 15.0, 17.0, 18.0, 10.0, 16.0, 15.0, 12.0, 6.0, 11.0, 15.0, 5.0, 2.0, 5.0, 2.0, 1.0, 2.0, 6.0, 3.0], "bins": [-3.033203125, -2.9478759765625, -2.862548828125, -2.7772216796875, -2.69189453125, -2.6065673828125, -2.521240234375, -2.4359130859375, -2.3505859375, -2.2652587890625, -2.179931640625, -2.0946044921875, -2.00927734375, -1.9239501953125, -1.838623046875, -1.7532958984375, -1.66796875, -1.5826416015625, -1.497314453125, -1.4119873046875, -1.32666015625, -1.2413330078125, -1.156005859375, -1.0706787109375, -0.9853515625, -0.9000244140625, -0.814697265625, -0.7293701171875, -0.64404296875, -0.5587158203125, -0.473388671875, -0.3880615234375, -0.302734375, -0.2174072265625, -0.132080078125, -0.0467529296875, 0.03857421875, 0.1239013671875, 0.209228515625, 0.2945556640625, 0.3798828125, 0.4652099609375, 0.550537109375, 0.6358642578125, 0.72119140625, 0.8065185546875, 0.891845703125, 0.9771728515625, 1.0625, 1.1478271484375, 1.233154296875, 1.3184814453125, 1.40380859375, 1.4891357421875, 1.574462890625, 1.6597900390625, 1.7451171875, 1.8304443359375, 1.915771484375, 2.0010986328125, 2.08642578125, 2.1717529296875, 2.257080078125, 2.3424072265625, 2.427734375]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 4.0, 7.0, 17.0, 14.0, 27.0, 26.0, 49.0, 42.0, 64.0, 92.0, 137.0, 203.0, 255.0, 356.0, 560.0, 865.0, 1340.0, 2208.0, 3505.0, 5782.0, 9795.0, 18147.0, 37416.0, 85982.0, 202316.0, 336722.0, 187501.0, 79216.0, 34826.0, 17077.0, 9246.0, 5379.0, 3273.0, 2126.0, 1328.0, 805.0, 550.0, 387.0, 255.0, 178.0, 136.0, 77.0, 68.0, 50.0, 42.0, 39.0, 25.0, 11.0, 11.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 3.0, 1.0, 4.0], "bins": [-2.75, -2.662567138671875, -2.57513427734375, -2.487701416015625, -2.4002685546875, -2.312835693359375, -2.22540283203125, -2.137969970703125, -2.050537109375, -1.963104248046875, -1.87567138671875, -1.788238525390625, -1.7008056640625, -1.613372802734375, -1.52593994140625, -1.438507080078125, -1.35107421875, -1.263641357421875, -1.17620849609375, -1.088775634765625, -1.0013427734375, -0.913909912109375, -0.82647705078125, -0.739044189453125, -0.651611328125, -0.564178466796875, -0.47674560546875, -0.389312744140625, -0.3018798828125, -0.214447021484375, -0.12701416015625, -0.039581298828125, 0.0478515625, 0.135284423828125, 0.22271728515625, 0.310150146484375, 0.3975830078125, 0.485015869140625, 0.57244873046875, 0.659881591796875, 0.747314453125, 0.834747314453125, 0.92218017578125, 1.009613037109375, 1.0970458984375, 1.184478759765625, 1.27191162109375, 1.359344482421875, 1.44677734375, 1.534210205078125, 1.62164306640625, 1.709075927734375, 1.7965087890625, 1.883941650390625, 1.97137451171875, 2.058807373046875, 2.146240234375, 2.233673095703125, 2.32110595703125, 2.408538818359375, 2.4959716796875, 2.583404541015625, 2.67083740234375, 2.758270263671875, 2.845703125]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 7.0, 2.0, 4.0, 8.0, 5.0, 10.0, 6.0, 11.0, 12.0, 18.0, 17.0, 22.0, 23.0, 24.0, 37.0, 36.0, 35.0, 35.0, 49.0, 47.0, 85.0, 137.0, 1427.0, 405.0, 114.0, 76.0, 46.0, 51.0, 40.0, 27.0, 30.0, 25.0, 20.0, 30.0, 26.0, 19.0, 7.0, 9.0, 14.0, 15.0, 14.0, 6.0, 5.0, 10.0, 5.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-9.796875, -9.4805908203125, -9.164306640625, -8.8480224609375, -8.53173828125, -8.2154541015625, -7.899169921875, -7.5828857421875, -7.2666015625, -6.9503173828125, -6.634033203125, -6.3177490234375, -6.00146484375, -5.6851806640625, -5.368896484375, -5.0526123046875, -4.736328125, -4.4200439453125, -4.103759765625, -3.7874755859375, -3.47119140625, -3.1549072265625, -2.838623046875, -2.5223388671875, -2.2060546875, -1.8897705078125, -1.573486328125, -1.2572021484375, -0.94091796875, -0.6246337890625, -0.308349609375, 0.0079345703125, 0.32421875, 0.6405029296875, 0.956787109375, 1.2730712890625, 1.58935546875, 1.9056396484375, 2.221923828125, 2.5382080078125, 2.8544921875, 3.1707763671875, 3.487060546875, 3.8033447265625, 4.11962890625, 4.4359130859375, 4.752197265625, 5.0684814453125, 5.384765625, 5.7010498046875, 6.017333984375, 6.3336181640625, 6.64990234375, 6.9661865234375, 7.282470703125, 7.5987548828125, 7.9150390625, 8.2313232421875, 8.547607421875, 8.8638916015625, 9.18017578125, 9.4964599609375, 9.812744140625, 10.1290283203125, 10.4453125]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 4.0, 5.0, 11.0, 7.0, 10.0, 14.0, 24.0, 18.0, 21.0, 26.0, 63.0, 54.0, 63.0, 103.0, 149.0, 314.0, 756.0, 3734.0, 61550.0, 2975123.0, 97196.0, 4602.0, 879.0, 340.0, 210.0, 106.0, 85.0, 56.0, 40.0, 33.0, 30.0, 19.0, 15.0, 9.0, 10.0, 11.0, 4.0, 7.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-22.109375, -21.487548828125, -20.86572265625, -20.243896484375, -19.6220703125, -19.000244140625, -18.37841796875, -17.756591796875, -17.134765625, -16.512939453125, -15.89111328125, -15.269287109375, -14.6474609375, -14.025634765625, -13.40380859375, -12.781982421875, -12.16015625, -11.538330078125, -10.91650390625, -10.294677734375, -9.6728515625, -9.051025390625, -8.42919921875, -7.807373046875, -7.185546875, -6.563720703125, -5.94189453125, -5.320068359375, -4.6982421875, -4.076416015625, -3.45458984375, -2.832763671875, -2.2109375, -1.589111328125, -0.96728515625, -0.345458984375, 0.2763671875, 0.898193359375, 1.52001953125, 2.141845703125, 2.763671875, 3.385498046875, 4.00732421875, 4.629150390625, 5.2509765625, 5.872802734375, 6.49462890625, 7.116455078125, 7.73828125, 8.360107421875, 8.98193359375, 9.603759765625, 10.2255859375, 10.847412109375, 11.46923828125, 12.091064453125, 12.712890625, 13.334716796875, 13.95654296875, 14.578369140625, 15.2001953125, 15.822021484375, 16.44384765625, 17.065673828125, 17.6875]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 10.0, 66.0, 258.0, 414.0, 201.0, 52.0, 8.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.334421157836914, -8.718422889709473, -7.102424621582031, -5.48642635345459, -3.8704280853271484, -2.254429817199707, -0.6384315490722656, 0.9775667190551758, 2.593564987182617, 4.209563255310059, 5.8255615234375, 7.441559791564941, 9.057558059692383, 10.673556327819824, 12.289554595947266, 13.905552864074707, 15.521551132202148, 17.137550354003906, 18.75354766845703, 20.369544982910156, 21.985544204711914, 23.601543426513672, 25.217540740966797, 26.833538055419922, 28.44953727722168, 30.065536499023438, 31.681533813476562, 33.29753112792969, 34.91352844238281, 36.5295295715332, 38.14552688598633, 39.76152420043945, 41.377525329589844, 42.99352264404297, 44.609519958496094, 46.225521087646484, 47.84151840209961, 49.457515716552734, 51.073516845703125, 52.68951416015625, 54.305511474609375, 55.9215087890625, 57.537506103515625, 59.153507232666016, 60.76950454711914, 62.385501861572266, 64.00150299072266, 65.61750030517578, 67.2334976196289, 68.84949493408203, 70.46549224853516, 72.08148956298828, 73.69749450683594, 75.31349182128906, 76.92948913574219, 78.54548645019531, 80.16148376464844, 81.77748107910156, 83.39347839355469, 85.00947570800781, 86.62547302246094, 88.2414779663086, 89.85747528076172, 91.47347259521484, 93.08946990966797]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 6.0, 8.0, 7.0, 12.0, 11.0, 9.0, 10.0, 11.0, 16.0, 13.0, 16.0, 32.0, 31.0, 34.0, 35.0, 48.0, 35.0, 30.0, 39.0, 35.0, 27.0, 35.0, 36.0, 32.0, 52.0, 34.0, 32.0, 29.0, 29.0, 30.0, 42.0, 25.0, 25.0, 19.0, 18.0, 21.0, 11.0, 8.0, 11.0, 7.0, 13.0, 5.0, 9.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-29.35616683959961, -28.36530113220215, -27.37443733215332, -26.38357162475586, -25.39270782470703, -24.40184211730957, -23.41097640991211, -22.42011260986328, -21.429248809814453, -20.438383102416992, -19.447519302368164, -18.456653594970703, -17.465789794921875, -16.474924087524414, -15.48405933380127, -14.493194580078125, -13.502328872680664, -12.51146411895752, -11.520599365234375, -10.529733657836914, -9.538869857788086, -8.548004150390625, -7.5571393966674805, -6.566274642944336, -5.575409889221191, -4.584545135498047, -3.5936801433563232, -2.6028151512145996, -1.611950397491455, -0.6210856437683105, 0.3697795867919922, 1.3606443405151367, 2.3515090942382812, 3.342373847961426, 4.33323860168457, 5.324103832244873, 6.314968585968018, 7.305833339691162, 8.296698570251465, 9.28756332397461, 10.278428077697754, 11.269292831420898, 12.260157585144043, 13.251022338867188, 14.241888046264648, 15.232751846313477, 16.223617553710938, 17.214481353759766, 18.205347061157227, 19.196212768554688, 20.187076568603516, 21.177942276000977, 22.168806076049805, 23.159671783447266, 24.150535583496094, 25.141401290893555, 26.132266998291016, 27.123132705688477, 28.113996505737305, 29.104862213134766, 30.095726013183594, 31.086591720581055, 32.077457427978516, 33.068321228027344, 34.05918502807617]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 5.0, 9.0, 12.0, 11.0, 11.0, 8.0, 15.0, 18.0, 20.0, 25.0, 29.0, 28.0, 30.0, 24.0, 39.0, 39.0, 42.0, 37.0, 44.0, 50.0, 40.0, 31.0, 48.0, 42.0, 35.0, 34.0, 30.0, 26.0, 29.0, 19.0, 20.0, 16.0, 12.0, 21.0, 19.0, 10.0, 11.0, 12.0, 8.0, 9.0, 3.0, 4.0, 4.0, 1.0, 3.0, 4.0, 0.0, 2.0, 3.0], "bins": [-3.267578125, -3.173675537109375, -3.07977294921875, -2.985870361328125, -2.8919677734375, -2.798065185546875, -2.70416259765625, -2.610260009765625, -2.516357421875, -2.422454833984375, -2.32855224609375, -2.234649658203125, -2.1407470703125, -2.046844482421875, -1.95294189453125, -1.859039306640625, -1.76513671875, -1.671234130859375, -1.57733154296875, -1.483428955078125, -1.3895263671875, -1.295623779296875, -1.20172119140625, -1.107818603515625, -1.013916015625, -0.920013427734375, -0.82611083984375, -0.732208251953125, -0.6383056640625, -0.544403076171875, -0.45050048828125, -0.356597900390625, -0.2626953125, -0.168792724609375, -0.07489013671875, 0.019012451171875, 0.1129150390625, 0.206817626953125, 0.30072021484375, 0.394622802734375, 0.488525390625, 0.582427978515625, 0.67633056640625, 0.770233154296875, 0.8641357421875, 0.958038330078125, 1.05194091796875, 1.145843505859375, 1.23974609375, 1.333648681640625, 1.42755126953125, 1.521453857421875, 1.6153564453125, 1.709259033203125, 1.80316162109375, 1.897064208984375, 1.990966796875, 2.084869384765625, 2.17877197265625, 2.272674560546875, 2.3665771484375, 2.460479736328125, 2.55438232421875, 2.648284912109375, 2.7421875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0, 5.0, 6.0, 7.0, 8.0, 8.0, 10.0, 8.0, 11.0, 11.0, 18.0, 23.0, 25.0, 35.0, 37.0, 55.0, 104.0, 165.0, 503.0, 1977.0, 14027.0, 195908.0, 3007820.0, 923884.0, 43475.0, 4604.0, 869.0, 246.0, 126.0, 63.0, 49.0, 33.0, 25.0, 13.0, 21.0, 14.0, 16.0, 12.0, 11.0, 10.0, 10.0, 5.0, 5.0, 7.0, 1.0, 3.0, 5.0, 2.0, 4.0, 0.0, 3.0], "bins": [-15.25, -14.8232421875, -14.396484375, -13.9697265625, -13.54296875, -13.1162109375, -12.689453125, -12.2626953125, -11.8359375, -11.4091796875, -10.982421875, -10.5556640625, -10.12890625, -9.7021484375, -9.275390625, -8.8486328125, -8.421875, -7.9951171875, -7.568359375, -7.1416015625, -6.71484375, -6.2880859375, -5.861328125, -5.4345703125, -5.0078125, -4.5810546875, -4.154296875, -3.7275390625, -3.30078125, -2.8740234375, -2.447265625, -2.0205078125, -1.59375, -1.1669921875, -0.740234375, -0.3134765625, 0.11328125, 0.5400390625, 0.966796875, 1.3935546875, 1.8203125, 2.2470703125, 2.673828125, 3.1005859375, 3.52734375, 3.9541015625, 4.380859375, 4.8076171875, 5.234375, 5.6611328125, 6.087890625, 6.5146484375, 6.94140625, 7.3681640625, 7.794921875, 8.2216796875, 8.6484375, 9.0751953125, 9.501953125, 9.9287109375, 10.35546875, 10.7822265625, 11.208984375, 11.6357421875, 12.0625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 8.0, 10.0, 10.0, 8.0, 22.0, 35.0, 32.0, 57.0, 85.0, 120.0, 152.0, 246.0, 319.0, 436.0, 486.0, 510.0, 391.0, 349.0, 251.0, 162.0, 122.0, 81.0, 56.0, 43.0, 27.0, 28.0, 13.0, 8.0, 2.0, 4.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.3046875, -11.9718017578125, -11.638916015625, -11.3060302734375, -10.97314453125, -10.6402587890625, -10.307373046875, -9.9744873046875, -9.6416015625, -9.3087158203125, -8.975830078125, -8.6429443359375, -8.31005859375, -7.9771728515625, -7.644287109375, -7.3114013671875, -6.978515625, -6.6456298828125, -6.312744140625, -5.9798583984375, -5.64697265625, -5.3140869140625, -4.981201171875, -4.6483154296875, -4.3154296875, -3.9825439453125, -3.649658203125, -3.3167724609375, -2.98388671875, -2.6510009765625, -2.318115234375, -1.9852294921875, -1.65234375, -1.3194580078125, -0.986572265625, -0.6536865234375, -0.32080078125, 0.0120849609375, 0.344970703125, 0.6778564453125, 1.0107421875, 1.3436279296875, 1.676513671875, 2.0093994140625, 2.34228515625, 2.6751708984375, 3.008056640625, 3.3409423828125, 3.673828125, 4.0067138671875, 4.339599609375, 4.6724853515625, 5.00537109375, 5.3382568359375, 5.671142578125, 6.0040283203125, 6.3369140625, 6.6697998046875, 7.002685546875, 7.3355712890625, 7.66845703125, 8.0013427734375, 8.334228515625, 8.6671142578125, 9.0]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 6.0, 7.0, 11.0, 14.0, 17.0, 18.0, 36.0, 61.0, 77.0, 127.0, 206.0, 395.0, 904.0, 3292.0, 27319.0, 506738.0, 3282627.0, 347868.0, 20192.0, 2721.0, 795.0, 361.0, 168.0, 118.0, 78.0, 33.0, 32.0, 17.0, 12.0, 11.0, 8.0, 4.0, 5.0, 2.0, 4.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.453125, -16.739013671875, -16.02490234375, -15.310791015625, -14.5966796875, -13.882568359375, -13.16845703125, -12.454345703125, -11.740234375, -11.026123046875, -10.31201171875, -9.597900390625, -8.8837890625, -8.169677734375, -7.45556640625, -6.741455078125, -6.02734375, -5.313232421875, -4.59912109375, -3.885009765625, -3.1708984375, -2.456787109375, -1.74267578125, -1.028564453125, -0.314453125, 0.399658203125, 1.11376953125, 1.827880859375, 2.5419921875, 3.256103515625, 3.97021484375, 4.684326171875, 5.3984375, 6.112548828125, 6.82666015625, 7.540771484375, 8.2548828125, 8.968994140625, 9.68310546875, 10.397216796875, 11.111328125, 11.825439453125, 12.53955078125, 13.253662109375, 13.9677734375, 14.681884765625, 15.39599609375, 16.110107421875, 16.82421875, 17.538330078125, 18.25244140625, 18.966552734375, 19.6806640625, 20.394775390625, 21.10888671875, 21.822998046875, 22.537109375, 23.251220703125, 23.96533203125, 24.679443359375, 25.3935546875, 26.107666015625, 26.82177734375, 27.535888671875, 28.25]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 10.0, 53.0, 154.0, 264.0, 284.0, 172.0, 56.0, 18.0, 2.0, 2.0], "bins": [-215.17364501953125, -211.4653778076172, -207.75711059570312, -204.04884338378906, -200.340576171875, -196.63230895996094, -192.92404174804688, -189.21578979492188, -185.50750732421875, -181.7992401123047, -178.09097290039062, -174.38270568847656, -170.6744384765625, -166.96617126464844, -163.25790405273438, -159.54965209960938, -155.8413848876953, -152.13311767578125, -148.4248504638672, -144.71658325195312, -141.00831604003906, -137.300048828125, -133.59178161621094, -129.88351440429688, -126.17525482177734, -122.46698760986328, -118.75872039794922, -115.05045318603516, -111.34219360351562, -107.63392639160156, -103.9256591796875, -100.21739196777344, -96.50912475585938, -92.80085754394531, -89.09259033203125, -85.38432312011719, -81.67605590820312, -77.96778869628906, -74.25952911376953, -70.55126190185547, -66.8429946899414, -63.134727478027344, -59.42646026611328, -55.718196868896484, -52.00992965698242, -48.30166244506836, -44.59339904785156, -40.8851318359375, -37.17686462402344, -33.468597412109375, -29.760332107543945, -26.052066802978516, -22.343799591064453, -18.63553237915039, -14.927267074584961, -11.219001770019531, -7.510736465454102, -3.8024702072143555, -0.09420394897460938, 3.6140623092651367, 7.322328567504883, 11.030595779418945, 14.738861083984375, 18.447126388549805, 22.155393600463867]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 3.0, 15.0, 12.0, 11.0, 11.0, 13.0, 20.0, 25.0, 31.0, 29.0, 29.0, 30.0, 31.0, 44.0, 44.0, 50.0, 52.0, 33.0, 50.0, 33.0, 38.0, 49.0, 40.0, 35.0, 50.0, 30.0, 19.0, 31.0, 21.0, 16.0, 27.0, 21.0, 8.0, 10.0, 14.0, 5.0, 6.0, 1.0, 5.0, 4.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0], "bins": [-35.61521530151367, -34.57433319091797, -33.533451080322266, -32.49257278442383, -31.451690673828125, -30.410808563232422, -29.36992645263672, -28.329044342041016, -27.288164138793945, -26.247282028198242, -25.206401824951172, -24.16551971435547, -23.124637603759766, -22.083757400512695, -21.042875289916992, -20.001995086669922, -18.96111297607422, -17.920230865478516, -16.879350662231445, -15.838468551635742, -14.797587394714355, -13.756706237792969, -12.715824127197266, -11.674942970275879, -10.634061813354492, -9.593180656433105, -8.552299499511719, -7.511417388916016, -6.470536231994629, -5.429655075073242, -4.388773441314697, -3.3478918075561523, -2.3070106506347656, -1.2661292552947998, -0.22524785995483398, 0.8156335353851318, 1.8565149307250977, 2.8973960876464844, 3.9382777214050293, 4.979159355163574, 6.020040512084961, 7.060921669006348, 8.101802825927734, 9.142684936523438, 10.183566093444824, 11.224447250366211, 12.265329360961914, 13.3062105178833, 14.347091674804688, 15.387972831726074, 16.42885398864746, 17.469736099243164, 18.510616302490234, 19.551498413085938, 20.59238052368164, 21.633262634277344, 22.674142837524414, 23.715024948120117, 24.755905151367188, 25.79678726196289, 26.837669372558594, 27.878549575805664, 28.919431686401367, 29.960311889648438, 31.00119400024414]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 7.0, 5.0, 4.0, 10.0, 12.0, 13.0, 11.0, 20.0, 20.0, 19.0, 15.0, 30.0, 32.0, 39.0, 34.0, 37.0, 47.0, 46.0, 49.0, 52.0, 56.0, 50.0, 35.0, 35.0, 35.0, 30.0, 36.0, 37.0, 23.0, 21.0, 30.0, 27.0, 11.0, 12.0, 16.0, 5.0, 8.0, 6.0, 9.0, 6.0, 3.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39263916015625, -3.2833251953125, -3.17401123046875, -3.064697265625, -2.95538330078125, -2.8460693359375, -2.73675537109375, -2.62744140625, -2.51812744140625, -2.4088134765625, -2.29949951171875, -2.190185546875, -2.08087158203125, -1.9715576171875, -1.86224365234375, -1.7529296875, -1.64361572265625, -1.5343017578125, -1.42498779296875, -1.315673828125, -1.20635986328125, -1.0970458984375, -0.98773193359375, -0.87841796875, -0.76910400390625, -0.6597900390625, -0.55047607421875, -0.441162109375, -0.33184814453125, -0.2225341796875, -0.11322021484375, -0.00390625, 0.10540771484375, 0.2147216796875, 0.32403564453125, 0.433349609375, 0.54266357421875, 0.6519775390625, 0.76129150390625, 0.87060546875, 0.97991943359375, 1.0892333984375, 1.19854736328125, 1.307861328125, 1.41717529296875, 1.5264892578125, 1.63580322265625, 1.7451171875, 1.85443115234375, 1.9637451171875, 2.07305908203125, 2.182373046875, 2.29168701171875, 2.4010009765625, 2.51031494140625, 2.61962890625, 2.72894287109375, 2.8382568359375, 2.94757080078125, 3.056884765625, 3.16619873046875, 3.2755126953125, 3.38482666015625, 3.494140625]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 6.0, 4.0, 5.0, 4.0, 9.0, 8.0, 41.0, 31.0, 46.0, 90.0, 121.0, 193.0, 279.0, 461.0, 813.0, 1230.0, 2053.0, 3447.0, 5543.0, 9546.0, 16394.0, 28446.0, 49717.0, 88671.0, 159010.0, 247651.0, 188434.0, 105725.0, 59559.0, 33763.0, 19333.0, 11296.0, 6481.0, 3911.0, 2304.0, 1420.0, 962.0, 549.0, 363.0, 253.0, 142.0, 93.0, 56.0, 48.0, 27.0, 18.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.27783203125, -0.26813507080078125, -0.2584381103515625, -0.24874114990234375, -0.239044189453125, -0.22934722900390625, -0.2196502685546875, -0.20995330810546875, -0.20025634765625, -0.19055938720703125, -0.1808624267578125, -0.17116546630859375, -0.161468505859375, -0.15177154541015625, -0.1420745849609375, -0.13237762451171875, -0.1226806640625, -0.11298370361328125, -0.1032867431640625, -0.09358978271484375, -0.083892822265625, -0.07419586181640625, -0.0644989013671875, -0.05480194091796875, -0.04510498046875, -0.03540802001953125, -0.0257110595703125, -0.01601409912109375, -0.006317138671875, 0.00337982177734375, 0.0130767822265625, 0.02277374267578125, 0.032470703125, 0.04216766357421875, 0.0518646240234375, 0.06156158447265625, 0.071258544921875, 0.08095550537109375, 0.0906524658203125, 0.10034942626953125, 0.11004638671875, 0.11974334716796875, 0.1294403076171875, 0.13913726806640625, 0.148834228515625, 0.15853118896484375, 0.1682281494140625, 0.17792510986328125, 0.1876220703125, 0.19731903076171875, 0.2070159912109375, 0.21671295166015625, 0.226409912109375, 0.23610687255859375, 0.2458038330078125, 0.25550079345703125, 0.26519775390625, 0.27489471435546875, 0.2845916748046875, 0.29428863525390625, 0.303985595703125, 0.31368255615234375, 0.3233795166015625, 0.33307647705078125, 0.3427734375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 4.0, 15.0, 8.0, 10.0, 14.0, 17.0, 26.0, 20.0, 33.0, 31.0, 26.0, 28.0, 31.0, 41.0, 49.0, 40.0, 48.0, 51.0, 1058.0, 56.0, 42.0, 40.0, 40.0, 35.0, 38.0, 33.0, 26.0, 23.0, 33.0, 20.0, 17.0, 15.0, 8.0, 12.0, 4.0, 10.0, 4.0, 9.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0], "bins": [-2.490234375, -2.419677734375, -2.34912109375, -2.278564453125, -2.2080078125, -2.137451171875, -2.06689453125, -1.996337890625, -1.92578125, -1.855224609375, -1.78466796875, -1.714111328125, -1.6435546875, -1.572998046875, -1.50244140625, -1.431884765625, -1.361328125, -1.290771484375, -1.22021484375, -1.149658203125, -1.0791015625, -1.008544921875, -0.93798828125, -0.867431640625, -0.796875, -0.726318359375, -0.65576171875, -0.585205078125, -0.5146484375, -0.444091796875, -0.37353515625, -0.302978515625, -0.232421875, -0.161865234375, -0.09130859375, -0.020751953125, 0.0498046875, 0.120361328125, 0.19091796875, 0.261474609375, 0.33203125, 0.402587890625, 0.47314453125, 0.543701171875, 0.6142578125, 0.684814453125, 0.75537109375, 0.825927734375, 0.896484375, 0.967041015625, 1.03759765625, 1.108154296875, 1.1787109375, 1.249267578125, 1.31982421875, 1.390380859375, 1.4609375, 1.531494140625, 1.60205078125, 1.672607421875, 1.7431640625, 1.813720703125, 1.88427734375, 1.954833984375, 2.025390625]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 8.0, 9.0, 17.0, 27.0, 31.0, 56.0, 78.0, 123.0, 193.0, 304.0, 452.0, 637.0, 982.0, 1443.0, 2249.0, 3413.0, 5239.0, 7943.0, 12191.0, 18992.0, 28634.0, 43944.0, 66515.0, 99723.0, 144830.0, 1220891.0, 144137.0, 100548.0, 66095.0, 43783.0, 29028.0, 18894.0, 12316.0, 8045.0, 5324.0, 3377.0, 2229.0, 1581.0, 961.0, 633.0, 423.0, 266.0, 186.0, 128.0, 92.0, 62.0, 33.0, 27.0, 13.0, 13.0, 11.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.202392578125, -0.1961650848388672, -0.18993759155273438, -0.18371009826660156, -0.17748260498046875, -0.17125511169433594, -0.16502761840820312, -0.1588001251220703, -0.1525726318359375, -0.1463451385498047, -0.14011764526367188, -0.13389015197753906, -0.12766265869140625, -0.12143516540527344, -0.11520767211914062, -0.10898017883300781, -0.102752685546875, -0.09652519226074219, -0.09029769897460938, -0.08407020568847656, -0.07784271240234375, -0.07161521911621094, -0.06538772583007812, -0.05916023254394531, -0.0529327392578125, -0.04670524597167969, -0.040477752685546875, -0.03425025939941406, -0.02802276611328125, -0.021795272827148438, -0.015567779541015625, -0.009340286254882812, -0.00311279296875, 0.0031147003173828125, 0.009342193603515625, 0.015569686889648438, 0.02179718017578125, 0.028024673461914062, 0.034252166748046875, 0.04047966003417969, 0.0467071533203125, 0.05293464660644531, 0.059162139892578125, 0.06538963317871094, 0.07161712646484375, 0.07784461975097656, 0.08407211303710938, 0.09029960632324219, 0.096527099609375, 0.10275459289550781, 0.10898208618164062, 0.11520957946777344, 0.12143707275390625, 0.12766456604003906, 0.13389205932617188, 0.1401195526123047, 0.1463470458984375, 0.1525745391845703, 0.15880203247070312, 0.16502952575683594, 0.17125701904296875, 0.17748451232910156, 0.18371200561523438, 0.1899394989013672, 0.1961669921875]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 7.0, 5.0, 12.0, 6.0, 11.0, 16.0, 21.0, 18.0, 35.0, 40.0, 42.0, 57.0, 65.0, 60.0, 71.0, 93.0, 66.0, 75.0, 67.0, 50.0, 30.0, 26.0, 24.0, 15.0, 16.0, 22.0, 11.0, 11.0, 4.0, 6.0, 7.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0009241104125976562, -0.0008934289216995239, -0.0008627474308013916, -0.0008320659399032593, -0.000801384449005127, -0.0007707029581069946, -0.0007400214672088623, -0.00070933997631073, -0.0006786584854125977, -0.0006479769945144653, -0.000617295503616333, -0.0005866140127182007, -0.0005559325218200684, -0.000525251030921936, -0.0004945695400238037, -0.0004638880491256714, -0.00043320655822753906, -0.00040252506732940674, -0.0003718435764312744, -0.0003411620855331421, -0.00031048059463500977, -0.00027979910373687744, -0.0002491176128387451, -0.0002184361219406128, -0.00018775463104248047, -0.00015707314014434814, -0.00012639164924621582, -9.57101583480835e-05, -6.502866744995117e-05, -3.434717655181885e-05, -3.6656856536865234e-06, 2.70158052444458e-05, 5.7697296142578125e-05, 8.837878704071045e-05, 0.00011906027793884277, 0.0001497417688369751, 0.00018042325973510742, 0.00021110475063323975, 0.00024178624153137207, 0.0002724677324295044, 0.0003031492233276367, 0.00033383071422576904, 0.00036451220512390137, 0.0003951936960220337, 0.000425875186920166, 0.00045655667781829834, 0.00048723816871643066, 0.000517919659614563, 0.0005486011505126953, 0.0005792826414108276, 0.00060996413230896, 0.0006406456232070923, 0.0006713271141052246, 0.0007020086050033569, 0.0007326900959014893, 0.0007633715867996216, 0.0007940530776977539, 0.0008247345685958862, 0.0008554160594940186, 0.0008860975503921509, 0.0009167790412902832, 0.0009474605321884155, 0.0009781420230865479, 0.0010088235139846802, 0.0010395050048828125]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 6.0, 8.0, 6.0, 11.0, 9.0, 19.0, 16.0, 30.0, 34.0, 40.0, 64.0, 72.0, 99.0, 179.0, 257.0, 432.0, 1152.0, 251001.0, 791714.0, 2006.0, 479.0, 273.0, 187.0, 118.0, 76.0, 68.0, 65.0, 40.0, 18.0, 16.0, 14.0, 5.0, 7.0, 9.0, 10.0, 2.0, 1.0, 7.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0186767578125, -0.018144726753234863, -0.017612695693969727, -0.01708066463470459, -0.016548633575439453, -0.016016602516174316, -0.01548457145690918, -0.014952540397644043, -0.014420509338378906, -0.01388847827911377, -0.013356447219848633, -0.012824416160583496, -0.01229238510131836, -0.011760354042053223, -0.011228322982788086, -0.01069629192352295, -0.010164260864257812, -0.009632229804992676, -0.009100198745727539, -0.008568167686462402, -0.008036136627197266, -0.007504105567932129, -0.006972074508666992, -0.0064400434494018555, -0.005908012390136719, -0.005375981330871582, -0.004843950271606445, -0.004311919212341309, -0.003779888153076172, -0.003247857093811035, -0.0027158260345458984, -0.0021837949752807617, -0.001651763916015625, -0.0011197328567504883, -0.0005877017974853516, -5.5670738220214844e-05, 0.0004763603210449219, 0.0010083913803100586, 0.0015404224395751953, 0.002072453498840332, 0.0026044845581054688, 0.0031365156173706055, 0.003668546676635742, 0.004200577735900879, 0.004732608795166016, 0.005264639854431152, 0.005796670913696289, 0.006328701972961426, 0.0068607330322265625, 0.007392764091491699, 0.007924795150756836, 0.008456826210021973, 0.00898885726928711, 0.009520888328552246, 0.010052919387817383, 0.01058495044708252, 0.011116981506347656, 0.011649012565612793, 0.01218104362487793, 0.012713074684143066, 0.013245105743408203, 0.01377713680267334, 0.014309167861938477, 0.014841198921203613, 0.01537322998046875]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 29.0, 263.0, 560.0, 153.0, 8.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009381965501233935, -0.0008477285737171769, -0.0007572606555186212, -0.0006667927373200655, -0.0005763247609138489, -0.0004858568136114627, -0.00039538886630907655, -0.00030492094811052084, -0.00021445297170430422, -0.00012398502440191805, -3.351707709953189e-05, 5.6950870202854276e-05, 0.00014741881750524044, 0.0002378867648076266, 0.00032835471211001277, 0.0004188226303085685, 0.0005092906067147851, 0.0005997585831210017, 0.0006902265013195574, 0.0007806944195181131, 0.0008711623959243298, 0.0009616303723305464, 0.001052098348736763, 0.0011425662087276578, 0.0012330341851338744, 0.001323502161540091, 0.0014139700215309858, 0.0015044379979372025, 0.001594905974343419, 0.0016853739507496357, 0.0017758419271558523, 0.0018663097871467471, 0.001956777647137642, 0.0020472456235438585, 0.002137713599950075, 0.0022281815763562918, 0.0023186495527625084, 0.0024091172963380814, 0.002499585272744298, 0.0025900532491505146, 0.0026805212255567312, 0.002770989201962948, 0.0028614571783691645, 0.002951925154775381, 0.003042392898350954, 0.0031328608747571707, 0.0032233288511633873, 0.003313796827569604, 0.0034042648039758205, 0.003494732780382037, 0.003585200756788254, 0.0036756687331944704, 0.003766136709600687, 0.00385660445317626, 0.00394707266241312, 0.004037540405988693, 0.004128008149564266, 0.004218475893139839, 0.0043089441023766994, 0.004399411845952272, 0.004489880055189133, 0.004580347798764706, 0.004670816008001566, 0.004761283751577139, 0.004851751960813999]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 4.0, 5.0, 5.0, 5.0, 10.0, 12.0, 10.0, 17.0, 20.0, 19.0, 22.0, 22.0, 26.0, 25.0, 23.0, 33.0, 36.0, 36.0, 43.0, 34.0, 30.0, 25.0, 35.0, 57.0, 32.0, 47.0, 42.0, 39.0, 35.0, 36.0, 36.0, 21.0, 27.0, 21.0, 16.0, 18.0, 12.0, 12.0, 12.0, 12.0, 6.0, 7.0, 5.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.0004127621650695801, -0.00039967894554138184, -0.0003865957260131836, -0.00037351250648498535, -0.0003604292869567871, -0.00034734606742858887, -0.0003342628479003906, -0.0003211796283721924, -0.00030809640884399414, -0.0002950131893157959, -0.00028192996978759766, -0.0002688467502593994, -0.00025576353073120117, -0.00024268031120300293, -0.0002295970916748047, -0.00021651387214660645, -0.0002034306526184082, -0.00019034743309020996, -0.00017726421356201172, -0.00016418099403381348, -0.00015109777450561523, -0.000138014554977417, -0.00012493133544921875, -0.00011184811592102051, -9.876489639282227e-05, -8.568167686462402e-05, -7.259845733642578e-05, -5.951523780822754e-05, -4.64320182800293e-05, -3.3348798751831055e-05, -2.0265579223632812e-05, -7.18235969543457e-06, 5.900859832763672e-06, 1.8984079360961914e-05, 3.2067298889160156e-05, 4.51505184173584e-05, 5.823373794555664e-05, 7.131695747375488e-05, 8.440017700195312e-05, 9.748339653015137e-05, 0.00011056661605834961, 0.00012364983558654785, 0.0001367330551147461, 0.00014981627464294434, 0.00016289949417114258, 0.00017598271369934082, 0.00018906593322753906, 0.0002021491527557373, 0.00021523237228393555, 0.0002283155918121338, 0.00024139881134033203, 0.0002544820308685303, 0.0002675652503967285, 0.00028064846992492676, 0.000293731689453125, 0.00030681490898132324, 0.0003198981285095215, 0.0003329813480377197, 0.00034606456756591797, 0.0003591477870941162, 0.00037223100662231445, 0.0003853142261505127, 0.00039839744567871094, 0.0004114806652069092, 0.0004245638847351074]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 7.0, 5.0, 4.0, 10.0, 12.0, 13.0, 11.0, 20.0, 20.0, 19.0, 15.0, 30.0, 32.0, 39.0, 34.0, 37.0, 47.0, 46.0, 49.0, 52.0, 56.0, 50.0, 35.0, 35.0, 35.0, 30.0, 36.0, 37.0, 23.0, 21.0, 30.0, 27.0, 11.0, 12.0, 16.0, 5.0, 8.0, 6.0, 9.0, 6.0, 3.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.501953125, -3.39263916015625, -3.2833251953125, -3.17401123046875, -3.064697265625, -2.95538330078125, -2.8460693359375, -2.73675537109375, -2.62744140625, -2.51812744140625, -2.4088134765625, -2.29949951171875, -2.190185546875, -2.08087158203125, -1.9715576171875, -1.86224365234375, -1.7529296875, -1.64361572265625, -1.5343017578125, -1.42498779296875, -1.315673828125, -1.20635986328125, -1.0970458984375, -0.98773193359375, -0.87841796875, -0.76910400390625, -0.6597900390625, -0.55047607421875, -0.441162109375, -0.33184814453125, -0.2225341796875, -0.11322021484375, -0.00390625, 0.10540771484375, 0.2147216796875, 0.32403564453125, 0.433349609375, 0.54266357421875, 0.6519775390625, 0.76129150390625, 0.87060546875, 0.97991943359375, 1.0892333984375, 1.19854736328125, 1.307861328125, 1.41717529296875, 1.5264892578125, 1.63580322265625, 1.7451171875, 1.85443115234375, 1.9637451171875, 2.07305908203125, 2.182373046875, 2.29168701171875, 2.4010009765625, 2.51031494140625, 2.61962890625, 2.72894287109375, 2.8382568359375, 2.94757080078125, 3.056884765625, 3.16619873046875, 3.2755126953125, 3.38482666015625, 3.494140625]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 2.0, 3.0, 5.0, 10.0, 7.0, 7.0, 12.0, 14.0, 21.0, 32.0, 42.0, 68.0, 94.0, 116.0, 164.0, 237.0, 391.0, 601.0, 1097.0, 2213.0, 5040.0, 12610.0, 38251.0, 157383.0, 498071.0, 245884.0, 56383.0, 17243.0, 6437.0, 2741.0, 1295.0, 725.0, 436.0, 276.0, 165.0, 123.0, 100.0, 75.0, 56.0, 33.0, 22.0, 23.0, 12.0, 12.0, 10.0, 7.0, 6.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0], "bins": [-4.93359375, -4.79119873046875, -4.6488037109375, -4.50640869140625, -4.364013671875, -4.22161865234375, -4.0792236328125, -3.93682861328125, -3.79443359375, -3.65203857421875, -3.5096435546875, -3.36724853515625, -3.224853515625, -3.08245849609375, -2.9400634765625, -2.79766845703125, -2.6552734375, -2.51287841796875, -2.3704833984375, -2.22808837890625, -2.085693359375, -1.94329833984375, -1.8009033203125, -1.65850830078125, -1.51611328125, -1.37371826171875, -1.2313232421875, -1.08892822265625, -0.946533203125, -0.80413818359375, -0.6617431640625, -0.51934814453125, -0.376953125, -0.23455810546875, -0.0921630859375, 0.05023193359375, 0.192626953125, 0.33502197265625, 0.4774169921875, 0.61981201171875, 0.76220703125, 0.90460205078125, 1.0469970703125, 1.18939208984375, 1.331787109375, 1.47418212890625, 1.6165771484375, 1.75897216796875, 1.9013671875, 2.04376220703125, 2.1861572265625, 2.32855224609375, 2.470947265625, 2.61334228515625, 2.7557373046875, 2.89813232421875, 3.04052734375, 3.18292236328125, 3.3253173828125, 3.46771240234375, 3.610107421875, 3.75250244140625, 3.8948974609375, 4.03729248046875, 4.1796875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 7.0, 2.0, 14.0, 6.0, 8.0, 17.0, 18.0, 21.0, 17.0, 35.0, 29.0, 45.0, 48.0, 51.0, 60.0, 104.0, 298.0, 1667.0, 157.0, 68.0, 57.0, 56.0, 38.0, 46.0, 32.0, 23.0, 24.0, 27.0, 20.0, 12.0, 19.0, 9.0, 6.0, 7.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.875, -14.4141845703125, -13.953369140625, -13.4925537109375, -13.03173828125, -12.5709228515625, -12.110107421875, -11.6492919921875, -11.1884765625, -10.7276611328125, -10.266845703125, -9.8060302734375, -9.34521484375, -8.8843994140625, -8.423583984375, -7.9627685546875, -7.501953125, -7.0411376953125, -6.580322265625, -6.1195068359375, -5.65869140625, -5.1978759765625, -4.737060546875, -4.2762451171875, -3.8154296875, -3.3546142578125, -2.893798828125, -2.4329833984375, -1.97216796875, -1.5113525390625, -1.050537109375, -0.5897216796875, -0.12890625, 0.3319091796875, 0.792724609375, 1.2535400390625, 1.71435546875, 2.1751708984375, 2.635986328125, 3.0968017578125, 3.5576171875, 4.0184326171875, 4.479248046875, 4.9400634765625, 5.40087890625, 5.8616943359375, 6.322509765625, 6.7833251953125, 7.244140625, 7.7049560546875, 8.165771484375, 8.6265869140625, 9.08740234375, 9.5482177734375, 10.009033203125, 10.4698486328125, 10.9306640625, 11.3914794921875, 11.852294921875, 12.3131103515625, 12.77392578125, 13.2347412109375, 13.695556640625, 14.1563720703125, 14.6171875]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 3.0, 7.0, 8.0, 8.0, 8.0, 14.0, 21.0, 11.0, 37.0, 31.0, 36.0, 41.0, 60.0, 101.0, 175.0, 333.0, 860.0, 4420.0, 134230.0, 2976153.0, 25836.0, 2106.0, 534.0, 234.0, 126.0, 81.0, 54.0, 34.0, 29.0, 26.0, 19.0, 21.0, 14.0, 12.0, 9.0, 6.0, 4.0, 4.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.0, -20.30712890625, -19.6142578125, -18.92138671875, -18.228515625, -17.53564453125, -16.8427734375, -16.14990234375, -15.45703125, -14.76416015625, -14.0712890625, -13.37841796875, -12.685546875, -11.99267578125, -11.2998046875, -10.60693359375, -9.9140625, -9.22119140625, -8.5283203125, -7.83544921875, -7.142578125, -6.44970703125, -5.7568359375, -5.06396484375, -4.37109375, -3.67822265625, -2.9853515625, -2.29248046875, -1.599609375, -0.90673828125, -0.2138671875, 0.47900390625, 1.171875, 1.86474609375, 2.5576171875, 3.25048828125, 3.943359375, 4.63623046875, 5.3291015625, 6.02197265625, 6.71484375, 7.40771484375, 8.1005859375, 8.79345703125, 9.486328125, 10.17919921875, 10.8720703125, 11.56494140625, 12.2578125, 12.95068359375, 13.6435546875, 14.33642578125, 15.029296875, 15.72216796875, 16.4150390625, 17.10791015625, 17.80078125, 18.49365234375, 19.1865234375, 19.87939453125, 20.572265625, 21.26513671875, 21.9580078125, 22.65087890625, 23.34375]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 3.0, 24.0, 128.0, 354.0, 360.0, 130.0, 16.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.339313507080078, -6.671306610107422, -5.003299713134766, -3.3352928161621094, -1.6672859191894531, 0.000720977783203125, 1.6687278747558594, 3.3367347717285156, 5.004741668701172, 6.672748565673828, 8.340755462646484, 10.00876235961914, 11.676769256591797, 13.344776153564453, 15.01278305053711, 16.680789947509766, 18.348796844482422, 20.016803741455078, 21.684810638427734, 23.35281753540039, 25.020824432373047, 26.688831329345703, 28.35683822631836, 30.024845123291016, 31.692852020263672, 33.36085891723633, 35.028865814208984, 36.69687271118164, 38.3648796081543, 40.03288650512695, 41.70089340209961, 43.368900299072266, 45.03691101074219, 46.704917907714844, 48.3729248046875, 50.040931701660156, 51.70893859863281, 53.37694549560547, 55.044952392578125, 56.71295928955078, 58.38096618652344, 60.048973083496094, 61.71697998046875, 63.384986877441406, 65.05299377441406, 66.72100067138672, 68.38900756835938, 70.05701446533203, 71.72502136230469, 73.39302825927734, 75.06103515625, 76.72904205322266, 78.39704895019531, 80.06505584716797, 81.73306274414062, 83.40106964111328, 85.06907653808594, 86.7370834350586, 88.40509033203125, 90.0730972290039, 91.74110412597656, 93.40911102294922, 95.07711791992188, 96.74512481689453, 98.41313171386719]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 6.0, 2.0, 3.0, 6.0, 15.0, 11.0, 12.0, 14.0, 17.0, 15.0, 14.0, 24.0, 22.0, 23.0, 29.0, 35.0, 49.0, 49.0, 31.0, 34.0, 37.0, 40.0, 35.0, 41.0, 48.0, 38.0, 35.0, 36.0, 30.0, 32.0, 34.0, 20.0, 25.0, 21.0, 17.0, 17.0, 12.0, 11.0, 12.0, 12.0, 13.0, 12.0, 5.0, 6.0, 5.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.41712951660156, -36.2373046875, -35.05747604370117, -33.87765121459961, -32.69782257080078, -31.51799774169922, -30.338172912597656, -29.15834617614746, -27.978519439697266, -26.79869270324707, -25.618865966796875, -24.439041137695312, -23.259214401245117, -22.079387664794922, -20.89956283569336, -19.719736099243164, -18.53990936279297, -17.360082626342773, -16.180255889892578, -15.000431060791016, -13.82060432434082, -12.640777587890625, -11.460951805114746, -10.281126022338867, -9.101299285888672, -7.921473026275635, -6.741646766662598, -5.5618205070495605, -4.381994247436523, -3.2021679878234863, -2.022341728210449, -0.8425159454345703, 0.3373069763183594, 1.5171332359313965, 2.6969594955444336, 3.8767857551574707, 5.056612014770508, 6.236438274383545, 7.416264533996582, 8.596090316772461, 9.775917053222656, 10.955743789672852, 12.13556957244873, 13.31539535522461, 14.495222091674805, 15.675048828125, 16.854873657226562, 18.034700393676758, 19.214527130126953, 20.39435386657715, 21.574180603027344, 22.754005432128906, 23.9338321685791, 25.113658905029297, 26.29348373413086, 27.473310470581055, 28.65313720703125, 29.832963943481445, 31.01279067993164, 32.1926155090332, 33.37244415283203, 34.552268981933594, 35.732093811035156, 36.91191864013672, 38.09174728393555]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 3.0, 8.0, 5.0, 6.0, 12.0, 8.0, 20.0, 13.0, 18.0, 25.0, 19.0, 25.0, 33.0, 35.0, 30.0, 44.0, 42.0, 41.0, 55.0, 46.0, 54.0, 48.0, 51.0, 28.0, 35.0, 34.0, 30.0, 31.0, 36.0, 21.0, 22.0, 23.0, 15.0, 15.0, 11.0, 13.0, 4.0, 12.0, 9.0, 2.0, 2.0, 5.0, 5.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.591796875, -3.4798583984375, -3.367919921875, -3.2559814453125, -3.14404296875, -3.0321044921875, -2.920166015625, -2.8082275390625, -2.6962890625, -2.5843505859375, -2.472412109375, -2.3604736328125, -2.24853515625, -2.1365966796875, -2.024658203125, -1.9127197265625, -1.80078125, -1.6888427734375, -1.576904296875, -1.4649658203125, -1.35302734375, -1.2410888671875, -1.129150390625, -1.0172119140625, -0.9052734375, -0.7933349609375, -0.681396484375, -0.5694580078125, -0.45751953125, -0.3455810546875, -0.233642578125, -0.1217041015625, -0.009765625, 0.1021728515625, 0.214111328125, 0.3260498046875, 0.43798828125, 0.5499267578125, 0.661865234375, 0.7738037109375, 0.8857421875, 0.9976806640625, 1.109619140625, 1.2215576171875, 1.33349609375, 1.4454345703125, 1.557373046875, 1.6693115234375, 1.78125, 1.8931884765625, 2.005126953125, 2.1170654296875, 2.22900390625, 2.3409423828125, 2.452880859375, 2.5648193359375, 2.6767578125, 2.7886962890625, 2.900634765625, 3.0125732421875, 3.12451171875, 3.2364501953125, 3.348388671875, 3.4603271484375, 3.572265625]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 0.0, 3.0, 2.0, 5.0, 6.0, 4.0, 10.0, 11.0, 10.0, 14.0, 19.0, 13.0, 35.0, 51.0, 45.0, 77.0, 131.0, 275.0, 614.0, 1533.0, 4248.0, 13649.0, 49486.0, 235269.0, 1249609.0, 1989961.0, 517880.0, 96544.0, 23778.0, 6924.0, 2316.0, 857.0, 387.0, 173.0, 94.0, 61.0, 44.0, 29.0, 23.0, 23.0, 17.0, 13.0, 7.0, 6.0, 7.0, 7.0, 5.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0], "bins": [-7.41015625, -7.1871337890625, -6.964111328125, -6.7410888671875, -6.51806640625, -6.2950439453125, -6.072021484375, -5.8489990234375, -5.6259765625, -5.4029541015625, -5.179931640625, -4.9569091796875, -4.73388671875, -4.5108642578125, -4.287841796875, -4.0648193359375, -3.841796875, -3.6187744140625, -3.395751953125, -3.1727294921875, -2.94970703125, -2.7266845703125, -2.503662109375, -2.2806396484375, -2.0576171875, -1.8345947265625, -1.611572265625, -1.3885498046875, -1.16552734375, -0.9425048828125, -0.719482421875, -0.4964599609375, -0.2734375, -0.0504150390625, 0.172607421875, 0.3956298828125, 0.61865234375, 0.8416748046875, 1.064697265625, 1.2877197265625, 1.5107421875, 1.7337646484375, 1.956787109375, 2.1798095703125, 2.40283203125, 2.6258544921875, 2.848876953125, 3.0718994140625, 3.294921875, 3.5179443359375, 3.740966796875, 3.9639892578125, 4.18701171875, 4.4100341796875, 4.633056640625, 4.8560791015625, 5.0791015625, 5.3021240234375, 5.525146484375, 5.7481689453125, 5.97119140625, 6.1942138671875, 6.417236328125, 6.6402587890625, 6.86328125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 10.0, 8.0, 14.0, 19.0, 34.0, 57.0, 99.0, 153.0, 263.0, 411.0, 573.0, 696.0, 594.0, 421.0, 277.0, 176.0, 107.0, 71.0, 36.0, 29.0, 16.0, 5.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.0078125, -13.5494384765625, -13.091064453125, -12.6326904296875, -12.17431640625, -11.7159423828125, -11.257568359375, -10.7991943359375, -10.3408203125, -9.8824462890625, -9.424072265625, -8.9656982421875, -8.50732421875, -8.0489501953125, -7.590576171875, -7.1322021484375, -6.673828125, -6.2154541015625, -5.757080078125, -5.2987060546875, -4.84033203125, -4.3819580078125, -3.923583984375, -3.4652099609375, -3.0068359375, -2.5484619140625, -2.090087890625, -1.6317138671875, -1.17333984375, -0.7149658203125, -0.256591796875, 0.2017822265625, 0.66015625, 1.1185302734375, 1.576904296875, 2.0352783203125, 2.49365234375, 2.9520263671875, 3.410400390625, 3.8687744140625, 4.3271484375, 4.7855224609375, 5.243896484375, 5.7022705078125, 6.16064453125, 6.6190185546875, 7.077392578125, 7.5357666015625, 7.994140625, 8.4525146484375, 8.910888671875, 9.3692626953125, 9.82763671875, 10.2860107421875, 10.744384765625, 11.2027587890625, 11.6611328125, 12.1195068359375, 12.577880859375, 13.0362548828125, 13.49462890625, 13.9530029296875, 14.411376953125, 14.8697509765625, 15.328125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 5.0, 6.0, 19.0, 28.0, 37.0, 80.0, 131.0, 314.0, 624.0, 2199.0, 17017.0, 427828.0, 3471873.0, 259122.0, 12025.0, 1840.0, 561.0, 271.0, 138.0, 73.0, 39.0, 30.0, 9.0, 5.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-33.34375, -32.51904296875, -31.6943359375, -30.86962890625, -30.044921875, -29.22021484375, -28.3955078125, -27.57080078125, -26.74609375, -25.92138671875, -25.0966796875, -24.27197265625, -23.447265625, -22.62255859375, -21.7978515625, -20.97314453125, -20.1484375, -19.32373046875, -18.4990234375, -17.67431640625, -16.849609375, -16.02490234375, -15.2001953125, -14.37548828125, -13.55078125, -12.72607421875, -11.9013671875, -11.07666015625, -10.251953125, -9.42724609375, -8.6025390625, -7.77783203125, -6.953125, -6.12841796875, -5.3037109375, -4.47900390625, -3.654296875, -2.82958984375, -2.0048828125, -1.18017578125, -0.35546875, 0.46923828125, 1.2939453125, 2.11865234375, 2.943359375, 3.76806640625, 4.5927734375, 5.41748046875, 6.2421875, 7.06689453125, 7.8916015625, 8.71630859375, 9.541015625, 10.36572265625, 11.1904296875, 12.01513671875, 12.83984375, 13.66455078125, 14.4892578125, 15.31396484375, 16.138671875, 16.96337890625, 17.7880859375, 18.61279296875, 19.4375]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 16.0, 80.0, 192.0, 285.0, 246.0, 144.0, 45.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-200.8099822998047, -196.77439880371094, -192.73883056640625, -188.7032470703125, -184.6676788330078, -180.63209533691406, -176.59652709960938, -172.56094360351562, -168.52536010742188, -164.48977661132812, -160.45420837402344, -156.4186248779297, -152.383056640625, -148.34747314453125, -144.31190490722656, -140.2763214111328, -136.24075317382812, -132.20516967773438, -128.1696014404297, -124.13402557373047, -120.09844970703125, -116.0628662109375, -112.02729034423828, -107.99171447753906, -103.95613861083984, -99.92056274414062, -95.8849868774414, -91.84941101074219, -87.81382751464844, -83.77825927734375, -79.74267578125, -75.70709991455078, -71.6715087890625, -67.63593292236328, -63.60035705566406, -59.56477737426758, -55.52920150756836, -51.49362564086914, -47.458045959472656, -43.42247009277344, -39.386898040771484, -35.351322174072266, -31.315744400024414, -27.280166625976562, -23.244590759277344, -19.209014892578125, -15.173437118530273, -11.137859344482422, -7.102283477783203, -3.066706657409668, 0.9688701629638672, 5.004446983337402, 9.040023803710938, 13.075599670410156, 17.111177444458008, 21.14675521850586, 25.182331085205078, 29.217906951904297, 33.25348663330078, 37.2890625, 41.32463836669922, 45.36021423339844, 49.395790100097656, 53.43136978149414, 57.46694564819336]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 6.0, 10.0, 7.0, 6.0, 11.0, 20.0, 16.0, 17.0, 18.0, 21.0, 23.0, 27.0, 34.0, 29.0, 23.0, 47.0, 42.0, 28.0, 36.0, 41.0, 41.0, 42.0, 39.0, 36.0, 38.0, 29.0, 35.0, 41.0, 31.0, 20.0, 23.0, 26.0, 21.0, 25.0, 15.0, 20.0, 11.0, 8.0, 5.0, 7.0, 5.0, 6.0, 8.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-31.257848739624023, -30.298168182373047, -29.338489532470703, -28.378808975219727, -27.41912841796875, -26.459447860717773, -25.499767303466797, -24.540088653564453, -23.580408096313477, -22.6207275390625, -21.661048889160156, -20.70136833190918, -19.741687774658203, -18.782007217407227, -17.82232666015625, -16.862648010253906, -15.90296745300293, -14.943286895751953, -13.983607292175293, -13.023927688598633, -12.064247131347656, -11.10456657409668, -10.14488697052002, -9.18520736694336, -8.225526809692383, -7.2658467292785645, -6.306166648864746, -5.346486568450928, -4.386806488037109, -3.427126407623291, -2.4674463272094727, -1.5077662467956543, -0.5480842590332031, 0.41159582138061523, 1.3712759017944336, 2.330955982208252, 3.2906360626220703, 4.250316143035889, 5.209996223449707, 6.169676303863525, 7.129356384277344, 8.08903694152832, 9.04871654510498, 10.00839614868164, 10.968076705932617, 11.927757263183594, 12.887436866760254, 13.847116470336914, 14.80679702758789, 15.766477584838867, 16.726158142089844, 17.685836791992188, 18.645517349243164, 19.60519790649414, 20.564876556396484, 21.52455711364746, 22.484237670898438, 23.443918228149414, 24.40359878540039, 25.363277435302734, 26.32295799255371, 27.282638549804688, 28.24231719970703, 29.201997756958008, 30.161678314208984]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 0.0, 3.0, 10.0, 7.0, 4.0, 7.0, 17.0, 9.0, 15.0, 8.0, 18.0, 21.0, 15.0, 29.0, 29.0, 33.0, 28.0, 40.0, 35.0, 48.0, 44.0, 44.0, 52.0, 54.0, 30.0, 40.0, 38.0, 32.0, 34.0, 26.0, 32.0, 26.0, 27.0, 28.0, 21.0, 14.0, 13.0, 13.0, 10.0, 12.0, 2.0, 8.0, 5.0, 4.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.501953125, -3.39495849609375, -3.2879638671875, -3.18096923828125, -3.073974609375, -2.96697998046875, -2.8599853515625, -2.75299072265625, -2.64599609375, -2.53900146484375, -2.4320068359375, -2.32501220703125, -2.218017578125, -2.11102294921875, -2.0040283203125, -1.89703369140625, -1.7900390625, -1.68304443359375, -1.5760498046875, -1.46905517578125, -1.362060546875, -1.25506591796875, -1.1480712890625, -1.04107666015625, -0.93408203125, -0.82708740234375, -0.7200927734375, -0.61309814453125, -0.506103515625, -0.39910888671875, -0.2921142578125, -0.18511962890625, -0.078125, 0.02886962890625, 0.1358642578125, 0.24285888671875, 0.349853515625, 0.45684814453125, 0.5638427734375, 0.67083740234375, 0.77783203125, 0.88482666015625, 0.9918212890625, 1.09881591796875, 1.205810546875, 1.31280517578125, 1.4197998046875, 1.52679443359375, 1.6337890625, 1.74078369140625, 1.8477783203125, 1.95477294921875, 2.061767578125, 2.16876220703125, 2.2757568359375, 2.38275146484375, 2.48974609375, 2.59674072265625, 2.7037353515625, 2.81072998046875, 2.917724609375, 3.02471923828125, 3.1317138671875, 3.23870849609375, 3.345703125]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 5.0, 7.0, 20.0, 14.0, 29.0, 39.0, 52.0, 95.0, 148.0, 221.0, 404.0, 582.0, 895.0, 1463.0, 2526.0, 3995.0, 6577.0, 10588.0, 17263.0, 28523.0, 47270.0, 78990.0, 135476.0, 214812.0, 198055.0, 120544.0, 71397.0, 42419.0, 25533.0, 15619.0, 9640.0, 5898.0, 3635.0, 2195.0, 1405.0, 810.0, 516.0, 338.0, 193.0, 134.0, 67.0, 63.0, 36.0, 21.0, 11.0, 11.0, 7.0, 6.0, 7.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.309814453125, -0.3003959655761719, -0.29097747802734375, -0.2815589904785156, -0.2721405029296875, -0.2627220153808594, -0.25330352783203125, -0.24388504028320312, -0.234466552734375, -0.22504806518554688, -0.21562957763671875, -0.20621109008789062, -0.1967926025390625, -0.18737411499023438, -0.17795562744140625, -0.16853713989257812, -0.15911865234375, -0.14970016479492188, -0.14028167724609375, -0.13086318969726562, -0.1214447021484375, -0.11202621459960938, -0.10260772705078125, -0.09318923950195312, -0.083770751953125, -0.07435226440429688, -0.06493377685546875, -0.055515289306640625, -0.0460968017578125, -0.036678314208984375, -0.02725982666015625, -0.017841339111328125, -0.0084228515625, 0.000995635986328125, 0.01041412353515625, 0.019832611083984375, 0.0292510986328125, 0.038669586181640625, 0.04808807373046875, 0.057506561279296875, 0.066925048828125, 0.07634353637695312, 0.08576202392578125, 0.09518051147460938, 0.1045989990234375, 0.11401748657226562, 0.12343597412109375, 0.13285446166992188, 0.14227294921875, 0.15169143676757812, 0.16110992431640625, 0.17052841186523438, 0.1799468994140625, 0.18936538696289062, 0.19878387451171875, 0.20820236206054688, 0.217620849609375, 0.22703933715820312, 0.23645782470703125, 0.24587631225585938, 0.2552947998046875, 0.2647132873535156, 0.27413177490234375, 0.2835502624511719, 0.29296875]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 3.0, 3.0, 7.0, 6.0, 11.0, 7.0, 15.0, 9.0, 18.0, 27.0, 10.0, 22.0, 27.0, 24.0, 40.0, 40.0, 41.0, 54.0, 31.0, 50.0, 1073.0, 52.0, 47.0, 41.0, 48.0, 49.0, 35.0, 41.0, 34.0, 18.0, 28.0, 16.0, 16.0, 16.0, 10.0, 7.0, 15.0, 9.0, 9.0, 4.0, 3.0, 6.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.001953125, -1.927703857421875, -1.85345458984375, -1.779205322265625, -1.7049560546875, -1.630706787109375, -1.55645751953125, -1.482208251953125, -1.407958984375, -1.333709716796875, -1.25946044921875, -1.185211181640625, -1.1109619140625, -1.036712646484375, -0.96246337890625, -0.888214111328125, -0.81396484375, -0.739715576171875, -0.66546630859375, -0.591217041015625, -0.5169677734375, -0.442718505859375, -0.36846923828125, -0.294219970703125, -0.219970703125, -0.145721435546875, -0.07147216796875, 0.002777099609375, 0.0770263671875, 0.151275634765625, 0.22552490234375, 0.299774169921875, 0.3740234375, 0.448272705078125, 0.52252197265625, 0.596771240234375, 0.6710205078125, 0.745269775390625, 0.81951904296875, 0.893768310546875, 0.968017578125, 1.042266845703125, 1.11651611328125, 1.190765380859375, 1.2650146484375, 1.339263916015625, 1.41351318359375, 1.487762451171875, 1.56201171875, 1.636260986328125, 1.71051025390625, 1.784759521484375, 1.8590087890625, 1.933258056640625, 2.00750732421875, 2.081756591796875, 2.156005859375, 2.230255126953125, 2.30450439453125, 2.378753662109375, 2.4530029296875, 2.527252197265625, 2.60150146484375, 2.675750732421875, 2.75]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 6.0, 10.0, 15.0, 15.0, 37.0, 55.0, 81.0, 125.0, 182.0, 306.0, 446.0, 689.0, 1128.0, 1810.0, 2710.0, 4267.0, 6320.0, 10160.0, 15631.0, 25329.0, 40981.0, 68678.0, 114083.0, 176186.0, 1249880.0, 146901.0, 89901.0, 53705.0, 32499.0, 19953.0, 12596.0, 8060.0, 5108.0, 3280.0, 2214.0, 1350.0, 880.0, 533.0, 364.0, 226.0, 168.0, 99.0, 54.0, 36.0, 37.0, 15.0, 11.0, 7.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.2325439453125, -0.22510528564453125, -0.2176666259765625, -0.21022796630859375, -0.202789306640625, -0.19535064697265625, -0.1879119873046875, -0.18047332763671875, -0.17303466796875, -0.16559600830078125, -0.1581573486328125, -0.15071868896484375, -0.143280029296875, -0.13584136962890625, -0.1284027099609375, -0.12096405029296875, -0.113525390625, -0.10608673095703125, -0.0986480712890625, -0.09120941162109375, -0.083770751953125, -0.07633209228515625, -0.0688934326171875, -0.06145477294921875, -0.05401611328125, -0.04657745361328125, -0.0391387939453125, -0.03170013427734375, -0.024261474609375, -0.01682281494140625, -0.0093841552734375, -0.00194549560546875, 0.0054931640625, 0.01293182373046875, 0.0203704833984375, 0.02780914306640625, 0.035247802734375, 0.04268646240234375, 0.0501251220703125, 0.05756378173828125, 0.06500244140625, 0.07244110107421875, 0.0798797607421875, 0.08731842041015625, 0.094757080078125, 0.10219573974609375, 0.1096343994140625, 0.11707305908203125, 0.12451171875, 0.13195037841796875, 0.1393890380859375, 0.14682769775390625, 0.154266357421875, 0.16170501708984375, 0.1691436767578125, 0.17658233642578125, 0.18402099609375, 0.19145965576171875, 0.1988983154296875, 0.20633697509765625, 0.213775634765625, 0.22121429443359375, 0.2286529541015625, 0.23609161376953125, 0.2435302734375]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 7.0, 5.0, 5.0, 10.0, 11.0, 10.0, 14.0, 20.0, 15.0, 25.0, 23.0, 32.0, 26.0, 34.0, 46.0, 63.0, 65.0, 43.0, 55.0, 55.0, 52.0, 47.0, 51.0, 46.0, 48.0, 25.0, 23.0, 31.0, 22.0, 18.0, 20.0, 13.0, 9.0, 7.0, 5.0, 4.0, 8.0, 5.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006999969482421875, -0.0006755143404006958, -0.0006510317325592041, -0.0006265491247177124, -0.0006020665168762207, -0.000577583909034729, -0.0005531013011932373, -0.0005286186933517456, -0.0005041360855102539, -0.0004796534776687622, -0.0004551708698272705, -0.0004306882619857788, -0.0004062056541442871, -0.0003817230463027954, -0.0003572404384613037, -0.000332757830619812, -0.0003082752227783203, -0.0002837926149368286, -0.0002593100070953369, -0.00023482739925384521, -0.00021034479141235352, -0.00018586218357086182, -0.00016137957572937012, -0.00013689696788787842, -0.00011241436004638672, -8.793175220489502e-05, -6.344914436340332e-05, -3.896653652191162e-05, -1.4483928680419922e-05, 9.998679161071777e-06, 3.4481287002563477e-05, 5.8963894844055176e-05, 8.344650268554688e-05, 0.00010792911052703857, 0.00013241171836853027, 0.00015689432621002197, 0.00018137693405151367, 0.00020585954189300537, 0.00023034214973449707, 0.00025482475757598877, 0.00027930736541748047, 0.00030378997325897217, 0.00032827258110046387, 0.00035275518894195557, 0.00037723779678344727, 0.00040172040462493896, 0.00042620301246643066, 0.00045068562030792236, 0.00047516822814941406, 0.0004996508359909058, 0.0005241334438323975, 0.0005486160516738892, 0.0005730986595153809, 0.0005975812673568726, 0.0006220638751983643, 0.000646546483039856, 0.0006710290908813477, 0.0006955116987228394, 0.0007199943065643311, 0.0007444769144058228, 0.0007689595222473145, 0.0007934421300888062, 0.0008179247379302979, 0.0008424073457717896, 0.0008668899536132812]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 6.0, 5.0, 8.0, 4.0, 10.0, 22.0, 25.0, 29.0, 24.0, 43.0, 41.0, 70.0, 90.0, 103.0, 141.0, 201.0, 229.0, 348.0, 723.0, 7438.0, 782059.0, 252528.0, 2568.0, 562.0, 320.0, 216.0, 151.0, 109.0, 100.0, 101.0, 69.0, 47.0, 26.0, 35.0, 29.0, 15.0, 21.0, 9.0, 7.0, 5.0, 7.0, 4.0, 2.0, 6.0, 2.0, 0.0, 3.0, 1.0], "bins": [-0.01459503173828125, -0.014198899269104004, -0.013802766799926758, -0.013406634330749512, -0.013010501861572266, -0.01261436939239502, -0.012218236923217773, -0.011822104454040527, -0.011425971984863281, -0.011029839515686035, -0.010633707046508789, -0.010237574577331543, -0.009841442108154297, -0.00944530963897705, -0.009049177169799805, -0.008653044700622559, -0.008256912231445312, -0.007860779762268066, -0.00746464729309082, -0.007068514823913574, -0.006672382354736328, -0.006276249885559082, -0.005880117416381836, -0.00548398494720459, -0.005087852478027344, -0.004691720008850098, -0.0042955875396728516, -0.0038994550704956055, -0.0035033226013183594, -0.0031071901321411133, -0.002711057662963867, -0.002314925193786621, -0.001918792724609375, -0.001522660255432129, -0.0011265277862548828, -0.0007303953170776367, -0.0003342628479003906, 6.186962127685547e-05, 0.00045800209045410156, 0.0008541345596313477, 0.0012502670288085938, 0.0016463994979858398, 0.002042531967163086, 0.002438664436340332, 0.002834796905517578, 0.0032309293746948242, 0.0036270618438720703, 0.004023194313049316, 0.0044193267822265625, 0.004815459251403809, 0.005211591720581055, 0.005607724189758301, 0.006003856658935547, 0.006399989128112793, 0.006796121597290039, 0.007192254066467285, 0.007588386535644531, 0.007984519004821777, 0.008380651473999023, 0.00877678394317627, 0.009172916412353516, 0.009569048881530762, 0.009965181350708008, 0.010361313819885254, 0.0107574462890625]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 14.0, 50.0, 178.0, 325.0, 307.0, 109.0, 25.0, 8.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013764874311164021, -0.0013186584692448378, -0.0012608295073732734, -0.001203000545501709, -0.0011451717000454664, -0.0010873426217585802, -0.0010295137763023376, -0.0009716848144307733, -0.0009138558525592089, -0.0008560268906876445, -0.0007981979288160801, -0.0007403690251521766, -0.0006825400632806122, -0.0006247111014090478, -0.0005668821977451444, -0.00050905323587358, -0.0004512242740020156, -0.0003933953121304512, -0.00033556637936271727, -0.00027773744659498334, -0.00021990848472341895, -0.00016207952285185456, -0.00010425059008412063, -4.64216573163867e-05, 1.1407304555177689e-05, 6.923625187482685e-05, 0.000127065199194476, 0.00018489414651412517, 0.00024272309383377433, 0.0003005520557053387, 0.00035838098847307265, 0.0004162099212408066, 0.0004740389995276928, 0.0005318679613992572, 0.0005896969232708216, 0.000647525826934725, 0.0007053547888062894, 0.0007631837506778538, 0.0008210126543417573, 0.0008788416162133217, 0.0009366705780848861, 0.0009944995399564505, 0.0010523285018280149, 0.0011101574636995792, 0.0011679863091558218, 0.001225815387442708, 0.0012836442328989506, 0.001341473194770515, 0.0013993021566420794, 0.0014571311185136437, 0.0015149600803852081, 0.0015727890422567725, 0.001630618004128337, 0.0016884468495845795, 0.0017462758114561439, 0.0018041047733277082, 0.0018619337351992726, 0.001919762697070837, 0.0019775915425270796, 0.002035420620813966, 0.0020932494662702084, 0.0021510785445570946, 0.002208907390013337, 0.0022667362354695797, 0.002324565313756466]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 6.0, 6.0, 9.0, 12.0, 10.0, 15.0, 21.0, 28.0, 22.0, 32.0, 44.0, 41.0, 39.0, 35.0, 36.0, 39.0, 42.0, 46.0, 44.0, 37.0, 43.0, 41.0, 42.0, 35.0, 32.0, 34.0, 29.0, 31.0, 28.0, 22.0, 22.0, 13.0, 13.0, 16.0, 16.0, 6.0, 7.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004877448081970215, -0.00047342386096715927, -0.00045910291373729706, -0.00044478196650743484, -0.00043046101927757263, -0.0004161400720477104, -0.0004018191248178482, -0.000387498177587986, -0.0003731772303581238, -0.00035885628312826157, -0.00034453533589839935, -0.00033021438866853714, -0.0003158934414386749, -0.0003015724942088127, -0.0002872515469789505, -0.0002729305997490883, -0.0002586096525192261, -0.00024428870528936386, -0.00022996775805950165, -0.00021564681082963943, -0.00020132586359977722, -0.000187004916369915, -0.0001726839691400528, -0.00015836302191019058, -0.00014404207468032837, -0.00012972112745046616, -0.00011540018022060394, -0.00010107923299074173, -8.675828576087952e-05, -7.24373385310173e-05, -5.811639130115509e-05, -4.379544407129288e-05, -2.9474496841430664e-05, -1.5153549611568451e-05, -8.326023817062378e-07, 1.3488344848155975e-05, 2.780929207801819e-05, 4.21302393078804e-05, 5.6451186537742615e-05, 7.077213376760483e-05, 8.509308099746704e-05, 9.941402822732925e-05, 0.00011373497545719147, 0.00012805592268705368, 0.0001423768699169159, 0.0001566978171467781, 0.00017101876437664032, 0.00018533971160650253, 0.00019966065883636475, 0.00021398160606622696, 0.00022830255329608917, 0.00024262350052595139, 0.0002569444477558136, 0.0002712653949856758, 0.000285586342215538, 0.00029990728944540024, 0.00031422823667526245, 0.00032854918390512466, 0.0003428701311349869, 0.0003571910783648491, 0.0003715120255947113, 0.0003858329728245735, 0.00040015392005443573, 0.00041447486728429794, 0.00042879581451416016]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 0.0, 3.0, 10.0, 7.0, 4.0, 7.0, 17.0, 9.0, 15.0, 8.0, 18.0, 21.0, 15.0, 29.0, 29.0, 33.0, 28.0, 40.0, 35.0, 48.0, 44.0, 44.0, 52.0, 54.0, 30.0, 40.0, 38.0, 32.0, 34.0, 26.0, 32.0, 26.0, 27.0, 28.0, 21.0, 13.0, 14.0, 13.0, 10.0, 12.0, 2.0, 8.0, 5.0, 4.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.501953125, -3.39495849609375, -3.2879638671875, -3.18096923828125, -3.073974609375, -2.96697998046875, -2.8599853515625, -2.75299072265625, -2.64599609375, -2.53900146484375, -2.4320068359375, -2.32501220703125, -2.218017578125, -2.11102294921875, -2.0040283203125, -1.89703369140625, -1.7900390625, -1.68304443359375, -1.5760498046875, -1.46905517578125, -1.362060546875, -1.25506591796875, -1.1480712890625, -1.04107666015625, -0.93408203125, -0.82708740234375, -0.7200927734375, -0.61309814453125, -0.506103515625, -0.39910888671875, -0.2921142578125, -0.18511962890625, -0.078125, 0.02886962890625, 0.1358642578125, 0.24285888671875, 0.349853515625, 0.45684814453125, 0.5638427734375, 0.67083740234375, 0.77783203125, 0.88482666015625, 0.9918212890625, 1.09881591796875, 1.205810546875, 1.31280517578125, 1.4197998046875, 1.52679443359375, 1.6337890625, 1.74078369140625, 1.8477783203125, 1.95477294921875, 2.061767578125, 2.16876220703125, 2.2757568359375, 2.38275146484375, 2.48974609375, 2.59674072265625, 2.7037353515625, 2.81072998046875, 2.917724609375, 3.02471923828125, 3.1317138671875, 3.23870849609375, 3.345703125]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 8.0, 7.0, 11.0, 15.0, 25.0, 35.0, 43.0, 66.0, 113.0, 119.0, 235.0, 351.0, 563.0, 960.0, 1601.0, 2644.0, 4752.0, 8418.0, 15738.0, 30462.0, 61101.0, 120977.0, 209728.0, 243148.0, 166556.0, 88055.0, 43728.0, 22154.0, 11604.0, 6351.0, 3558.0, 2003.0, 1290.0, 783.0, 473.0, 287.0, 198.0, 116.0, 78.0, 49.0, 38.0, 29.0, 23.0, 14.0, 9.0, 14.0, 7.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.244140625, -2.16998291015625, -2.0958251953125, -2.02166748046875, -1.947509765625, -1.87335205078125, -1.7991943359375, -1.72503662109375, -1.65087890625, -1.57672119140625, -1.5025634765625, -1.42840576171875, -1.354248046875, -1.28009033203125, -1.2059326171875, -1.13177490234375, -1.0576171875, -0.98345947265625, -0.9093017578125, -0.83514404296875, -0.760986328125, -0.68682861328125, -0.6126708984375, -0.53851318359375, -0.46435546875, -0.39019775390625, -0.3160400390625, -0.24188232421875, -0.167724609375, -0.09356689453125, -0.0194091796875, 0.05474853515625, 0.12890625, 0.20306396484375, 0.2772216796875, 0.35137939453125, 0.425537109375, 0.49969482421875, 0.5738525390625, 0.64801025390625, 0.72216796875, 0.79632568359375, 0.8704833984375, 0.94464111328125, 1.018798828125, 1.09295654296875, 1.1671142578125, 1.24127197265625, 1.3154296875, 1.38958740234375, 1.4637451171875, 1.53790283203125, 1.612060546875, 1.68621826171875, 1.7603759765625, 1.83453369140625, 1.90869140625, 1.98284912109375, 2.0570068359375, 2.13116455078125, 2.205322265625, 2.27947998046875, 2.3536376953125, 2.42779541015625, 2.501953125]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 6.0, 7.0, 16.0, 12.0, 7.0, 9.0, 17.0, 12.0, 15.0, 25.0, 16.0, 39.0, 26.0, 29.0, 44.0, 51.0, 44.0, 71.0, 169.0, 1517.0, 351.0, 132.0, 65.0, 40.0, 48.0, 42.0, 34.0, 30.0, 28.0, 21.0, 33.0, 24.0, 16.0, 19.0, 8.0, 6.0, 10.0, 4.0, 3.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.921875, -13.49853515625, -13.0751953125, -12.65185546875, -12.228515625, -11.80517578125, -11.3818359375, -10.95849609375, -10.53515625, -10.11181640625, -9.6884765625, -9.26513671875, -8.841796875, -8.41845703125, -7.9951171875, -7.57177734375, -7.1484375, -6.72509765625, -6.3017578125, -5.87841796875, -5.455078125, -5.03173828125, -4.6083984375, -4.18505859375, -3.76171875, -3.33837890625, -2.9150390625, -2.49169921875, -2.068359375, -1.64501953125, -1.2216796875, -0.79833984375, -0.375, 0.04833984375, 0.4716796875, 0.89501953125, 1.318359375, 1.74169921875, 2.1650390625, 2.58837890625, 3.01171875, 3.43505859375, 3.8583984375, 4.28173828125, 4.705078125, 5.12841796875, 5.5517578125, 5.97509765625, 6.3984375, 6.82177734375, 7.2451171875, 7.66845703125, 8.091796875, 8.51513671875, 8.9384765625, 9.36181640625, 9.78515625, 10.20849609375, 10.6318359375, 11.05517578125, 11.478515625, 11.90185546875, 12.3251953125, 12.74853515625, 13.171875]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 4.0, 3.0, 7.0, 1.0, 5.0, 3.0, 7.0, 11.0, 12.0, 26.0, 20.0, 24.0, 36.0, 28.0, 43.0, 70.0, 64.0, 100.0, 149.0, 290.0, 546.0, 1880.0, 22509.0, 2461568.0, 646594.0, 9282.0, 1187.0, 451.0, 218.0, 148.0, 113.0, 62.0, 49.0, 42.0, 30.0, 20.0, 15.0, 11.0, 17.0, 10.0, 15.0, 14.0, 11.0, 5.0, 4.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.21875, -18.595947265625, -17.97314453125, -17.350341796875, -16.7275390625, -16.104736328125, -15.48193359375, -14.859130859375, -14.236328125, -13.613525390625, -12.99072265625, -12.367919921875, -11.7451171875, -11.122314453125, -10.49951171875, -9.876708984375, -9.25390625, -8.631103515625, -8.00830078125, -7.385498046875, -6.7626953125, -6.139892578125, -5.51708984375, -4.894287109375, -4.271484375, -3.648681640625, -3.02587890625, -2.403076171875, -1.7802734375, -1.157470703125, -0.53466796875, 0.088134765625, 0.7109375, 1.333740234375, 1.95654296875, 2.579345703125, 3.2021484375, 3.824951171875, 4.44775390625, 5.070556640625, 5.693359375, 6.316162109375, 6.93896484375, 7.561767578125, 8.1845703125, 8.807373046875, 9.43017578125, 10.052978515625, 10.67578125, 11.298583984375, 11.92138671875, 12.544189453125, 13.1669921875, 13.789794921875, 14.41259765625, 15.035400390625, 15.658203125, 16.281005859375, 16.90380859375, 17.526611328125, 18.1494140625, 18.772216796875, 19.39501953125, 20.017822265625, 20.640625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [3.0, 25.0, 181.0, 464.0, 299.0, 39.0, 7.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.677033424377441, -6.325624465942383, -3.974215507507324, -1.6228065490722656, 0.728602409362793, 3.0800113677978516, 5.43142032623291, 7.782828330993652, 10.134238243103027, 12.485647201538086, 14.837056159973145, 17.188465118408203, 19.539875030517578, 21.89128303527832, 24.242691040039062, 26.594100952148438, 28.945510864257812, 31.296920776367188, 33.64833068847656, 35.99973678588867, 38.35114669799805, 40.70255661010742, 43.05396270751953, 45.405372619628906, 47.75678253173828, 50.108192443847656, 52.45960235595703, 54.81100845336914, 57.162418365478516, 59.51382827758789, 61.865234375, 64.21664428710938, 66.56805419921875, 68.91946411132812, 71.2708740234375, 73.62228393554688, 75.97369384765625, 78.3250961303711, 80.67650604248047, 83.02791595458984, 85.37932586669922, 87.7307357788086, 90.08214569091797, 92.43355560302734, 94.78495788574219, 97.13636779785156, 99.48777770996094, 101.83918762207031, 104.19059753417969, 106.54200744628906, 108.89341735839844, 111.24482727050781, 113.59623718261719, 115.94763946533203, 118.2990493774414, 120.65045928955078, 123.00186920166016, 125.35327911376953, 127.7046890258789, 130.05609130859375, 132.40750122070312, 134.7589111328125, 137.11032104492188, 139.46173095703125, 141.81314086914062]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 4.0, 2.0, 1.0, 3.0, 2.0, 9.0, 13.0, 11.0, 11.0, 19.0, 20.0, 32.0, 22.0, 27.0, 41.0, 45.0, 39.0, 42.0, 47.0, 50.0, 44.0, 45.0, 35.0, 46.0, 39.0, 42.0, 36.0, 42.0, 36.0, 35.0, 23.0, 21.0, 24.0, 17.0, 19.0, 11.0, 11.0, 12.0, 13.0, 3.0, 5.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.44906234741211, -41.07564926147461, -39.70223617553711, -38.32882308959961, -36.955413818359375, -35.582000732421875, -34.208587646484375, -32.835174560546875, -31.461761474609375, -30.088348388671875, -28.714935302734375, -27.341524124145508, -25.968111038208008, -24.594697952270508, -23.22128677368164, -21.84787368774414, -20.47446060180664, -19.10104751586914, -17.72763442993164, -16.354223251342773, -14.980810165405273, -13.607397079467773, -12.23398494720459, -10.860572814941406, -9.487159729003906, -8.113746643066406, -6.740334510803223, -5.366921901702881, -3.993509292602539, -2.6200966835021973, -1.2466840744018555, 0.12672805786132812, 1.5001449584960938, 2.8735575675964355, 4.246970176696777, 5.620382785797119, 6.993795394897461, 8.367208480834961, 9.740620613098145, 11.114032745361328, 12.487445831298828, 13.860858917236328, 15.234271049499512, 16.607683181762695, 17.981096267700195, 19.354509353637695, 20.727920532226562, 22.101333618164062, 23.474746704101562, 24.848159790039062, 26.221572875976562, 27.59498405456543, 28.96839714050293, 30.34181022644043, 31.715221405029297, 33.0886344909668, 34.4620475769043, 35.8354606628418, 37.2088737487793, 38.5822868347168, 39.95569610595703, 41.32910919189453, 42.70252227783203, 44.07593536376953, 45.44934844970703]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 8.0, 4.0, 6.0, 6.0, 6.0, 10.0, 6.0, 11.0, 6.0, 11.0, 12.0, 17.0, 12.0, 29.0, 25.0, 24.0, 26.0, 32.0, 30.0, 52.0, 36.0, 38.0, 46.0, 41.0, 47.0, 41.0, 47.0, 37.0, 34.0, 36.0, 28.0, 27.0, 35.0, 23.0, 23.0, 23.0, 22.0, 17.0, 15.0, 5.0, 7.0, 13.0, 7.0, 6.0, 5.0, 4.0, 6.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-3.66015625, -3.550079345703125, -3.44000244140625, -3.329925537109375, -3.2198486328125, -3.109771728515625, -2.99969482421875, -2.889617919921875, -2.779541015625, -2.669464111328125, -2.55938720703125, -2.449310302734375, -2.3392333984375, -2.229156494140625, -2.11907958984375, -2.009002685546875, -1.89892578125, -1.788848876953125, -1.67877197265625, -1.568695068359375, -1.4586181640625, -1.348541259765625, -1.23846435546875, -1.128387451171875, -1.018310546875, -0.908233642578125, -0.79815673828125, -0.688079833984375, -0.5780029296875, -0.467926025390625, -0.35784912109375, -0.247772216796875, -0.1376953125, -0.027618408203125, 0.08245849609375, 0.192535400390625, 0.3026123046875, 0.412689208984375, 0.52276611328125, 0.632843017578125, 0.742919921875, 0.852996826171875, 0.96307373046875, 1.073150634765625, 1.1832275390625, 1.293304443359375, 1.40338134765625, 1.513458251953125, 1.62353515625, 1.733612060546875, 1.84368896484375, 1.953765869140625, 2.0638427734375, 2.173919677734375, 2.28399658203125, 2.394073486328125, 2.504150390625, 2.614227294921875, 2.72430419921875, 2.834381103515625, 2.9444580078125, 3.054534912109375, 3.16461181640625, 3.274688720703125, 3.384765625]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 6.0, 3.0, 5.0, 7.0, 6.0, 6.0, 6.0, 12.0, 12.0, 12.0, 19.0, 18.0, 21.0, 33.0, 47.0, 77.0, 106.0, 191.0, 340.0, 744.0, 2228.0, 7932.0, 33319.0, 180995.0, 1151332.0, 2170498.0, 536932.0, 84929.0, 17521.0, 4367.0, 1386.0, 486.0, 235.0, 121.0, 93.0, 57.0, 41.0, 21.0, 18.0, 27.0, 7.0, 13.0, 13.0, 10.0, 5.0, 1.0, 14.0, 3.0, 4.0, 2.0, 2.0, 6.0, 1.0, 1.0, 2.0, 2.0], "bins": [-8.6484375, -8.38861083984375, -8.1287841796875, -7.86895751953125, -7.609130859375, -7.34930419921875, -7.0894775390625, -6.82965087890625, -6.56982421875, -6.30999755859375, -6.0501708984375, -5.79034423828125, -5.530517578125, -5.27069091796875, -5.0108642578125, -4.75103759765625, -4.4912109375, -4.23138427734375, -3.9715576171875, -3.71173095703125, -3.451904296875, -3.19207763671875, -2.9322509765625, -2.67242431640625, -2.41259765625, -2.15277099609375, -1.8929443359375, -1.63311767578125, -1.373291015625, -1.11346435546875, -0.8536376953125, -0.59381103515625, -0.333984375, -0.07415771484375, 0.1856689453125, 0.44549560546875, 0.705322265625, 0.96514892578125, 1.2249755859375, 1.48480224609375, 1.74462890625, 2.00445556640625, 2.2642822265625, 2.52410888671875, 2.783935546875, 3.04376220703125, 3.3035888671875, 3.56341552734375, 3.8232421875, 4.08306884765625, 4.3428955078125, 4.60272216796875, 4.862548828125, 5.12237548828125, 5.3822021484375, 5.64202880859375, 5.90185546875, 6.16168212890625, 6.4215087890625, 6.68133544921875, 6.941162109375, 7.20098876953125, 7.4608154296875, 7.72064208984375, 7.98046875]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 5.0, 4.0, 9.0, 13.0, 18.0, 22.0, 22.0, 27.0, 52.0, 53.0, 77.0, 115.0, 152.0, 206.0, 252.0, 347.0, 444.0, 423.0, 409.0, 334.0, 286.0, 214.0, 155.0, 123.0, 98.0, 58.0, 54.0, 23.0, 23.0, 18.0, 7.0, 13.0, 4.0, 6.0, 6.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.7734375, -8.4654541015625, -8.157470703125, -7.8494873046875, -7.54150390625, -7.2335205078125, -6.925537109375, -6.6175537109375, -6.3095703125, -6.0015869140625, -5.693603515625, -5.3856201171875, -5.07763671875, -4.7696533203125, -4.461669921875, -4.1536865234375, -3.845703125, -3.5377197265625, -3.229736328125, -2.9217529296875, -2.61376953125, -2.3057861328125, -1.997802734375, -1.6898193359375, -1.3818359375, -1.0738525390625, -0.765869140625, -0.4578857421875, -0.14990234375, 0.1580810546875, 0.466064453125, 0.7740478515625, 1.08203125, 1.3900146484375, 1.697998046875, 2.0059814453125, 2.31396484375, 2.6219482421875, 2.929931640625, 3.2379150390625, 3.5458984375, 3.8538818359375, 4.161865234375, 4.4698486328125, 4.77783203125, 5.0858154296875, 5.393798828125, 5.7017822265625, 6.009765625, 6.3177490234375, 6.625732421875, 6.9337158203125, 7.24169921875, 7.5496826171875, 7.857666015625, 8.1656494140625, 8.4736328125, 8.7816162109375, 9.089599609375, 9.3975830078125, 9.70556640625, 10.0135498046875, 10.321533203125, 10.6295166015625, 10.9375]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 9.0, 12.0, 11.0, 15.0, 15.0, 35.0, 29.0, 51.0, 38.0, 69.0, 92.0, 139.0, 194.0, 357.0, 722.0, 1803.0, 6800.0, 33747.0, 219756.0, 1556559.0, 2007388.0, 307990.0, 45637.0, 8523.0, 2255.0, 834.0, 409.0, 206.0, 163.0, 112.0, 75.0, 64.0, 38.0, 32.0, 22.0, 19.0, 14.0, 17.0, 11.0, 7.0, 5.0, 4.0, 6.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-14.65625, -14.199462890625, -13.74267578125, -13.285888671875, -12.8291015625, -12.372314453125, -11.91552734375, -11.458740234375, -11.001953125, -10.545166015625, -10.08837890625, -9.631591796875, -9.1748046875, -8.718017578125, -8.26123046875, -7.804443359375, -7.34765625, -6.890869140625, -6.43408203125, -5.977294921875, -5.5205078125, -5.063720703125, -4.60693359375, -4.150146484375, -3.693359375, -3.236572265625, -2.77978515625, -2.322998046875, -1.8662109375, -1.409423828125, -0.95263671875, -0.495849609375, -0.0390625, 0.417724609375, 0.87451171875, 1.331298828125, 1.7880859375, 2.244873046875, 2.70166015625, 3.158447265625, 3.615234375, 4.072021484375, 4.52880859375, 4.985595703125, 5.4423828125, 5.899169921875, 6.35595703125, 6.812744140625, 7.26953125, 7.726318359375, 8.18310546875, 8.639892578125, 9.0966796875, 9.553466796875, 10.01025390625, 10.467041015625, 10.923828125, 11.380615234375, 11.83740234375, 12.294189453125, 12.7509765625, 13.207763671875, 13.66455078125, 14.121337890625, 14.578125]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 14.0, 19.0, 29.0, 69.0, 64.0, 90.0, 90.0, 107.0, 101.0, 115.0, 89.0, 84.0, 57.0, 31.0, 24.0, 11.0, 9.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.82568359375, -35.23188781738281, -33.63808822631836, -32.04429244995117, -30.45049476623535, -28.85669708251953, -27.26289939880371, -25.66910171508789, -24.075305938720703, -22.481508255004883, -20.887710571289062, -19.293914794921875, -17.700117111206055, -16.106319427490234, -14.512521743774414, -12.91872501373291, -11.324926376342773, -9.731128692626953, -8.13733196258545, -6.543534278869629, -4.949737071990967, -3.3559398651123047, -1.7621421813964844, -0.16834545135498047, 1.4254522323608398, 3.019249439239502, 4.613046646118164, 6.206844329833984, 7.8006415367126465, 9.394438743591309, 10.988236427307129, 12.582033157348633, 14.175830841064453, 15.769628524780273, 17.363426208496094, 18.95722198486328, 20.5510196685791, 22.144817352294922, 23.738615036010742, 25.332412719726562, 26.92620849609375, 28.52000617980957, 30.11380386352539, 31.707599639892578, 33.30139923095703, 34.89519500732422, 36.488990783691406, 38.08279037475586, 39.67658996582031, 41.2703857421875, 42.86418533325195, 44.45798110961914, 46.051780700683594, 47.64557647705078, 49.23937225341797, 50.83317184448242, 52.42696762084961, 54.0207633972168, 55.61456298828125, 57.20835876464844, 58.80215835571289, 60.39595413208008, 61.98975372314453, 63.58354949951172, 65.1773452758789]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 4.0, 7.0, 6.0, 3.0, 7.0, 8.0, 7.0, 11.0, 8.0, 14.0, 19.0, 26.0, 19.0, 27.0, 25.0, 24.0, 30.0, 30.0, 27.0, 33.0, 45.0, 29.0, 35.0, 30.0, 42.0, 42.0, 35.0, 34.0, 35.0, 40.0, 33.0, 36.0, 30.0, 28.0, 17.0, 17.0, 21.0, 22.0, 19.0, 13.0, 13.0, 11.0, 9.0, 9.0, 7.0, 6.0, 3.0, 5.0, 2.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.311731338500977, -27.35302734375, -26.394325256347656, -25.435623168945312, -24.476919174194336, -23.51821517944336, -22.559513092041016, -21.600811004638672, -20.642107009887695, -19.68340301513672, -18.724700927734375, -17.76599884033203, -16.807294845581055, -15.848591804504395, -14.889888763427734, -13.931185722351074, -12.972482681274414, -12.013779640197754, -11.055076599121094, -10.096373558044434, -9.137670516967773, -8.178967475891113, -7.220264434814453, -6.261561393737793, -5.302858352661133, -4.344155311584473, -3.3854522705078125, -2.4267492294311523, -1.4680461883544922, -0.509343147277832, 0.4493598937988281, 1.4080629348754883, 2.3667678833007812, 3.3254709243774414, 4.284173965454102, 5.242877006530762, 6.201580047607422, 7.160283088684082, 8.118986129760742, 9.077689170837402, 10.036392211914062, 10.995095252990723, 11.953798294067383, 12.912501335144043, 13.871204376220703, 14.829907417297363, 15.788610458374023, 16.747314453125, 17.706016540527344, 18.664718627929688, 19.623422622680664, 20.58212661743164, 21.540828704833984, 22.499530792236328, 23.458234786987305, 24.41693878173828, 25.375640869140625, 26.33434295654297, 27.293046951293945, 28.251750946044922, 29.210453033447266, 30.16915512084961, 31.127859115600586, 32.08656311035156, 33.045265197753906]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 4.0, 8.0, 8.0, 12.0, 10.0, 15.0, 14.0, 20.0, 22.0, 29.0, 26.0, 32.0, 44.0, 48.0, 52.0, 54.0, 56.0, 47.0, 46.0, 56.0, 49.0, 29.0, 46.0, 45.0, 34.0, 29.0, 21.0, 20.0, 23.0, 19.0, 15.0, 9.0, 16.0, 9.0, 13.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.75390625, -4.6187744140625, -4.483642578125, -4.3485107421875, -4.21337890625, -4.0782470703125, -3.943115234375, -3.8079833984375, -3.6728515625, -3.5377197265625, -3.402587890625, -3.2674560546875, -3.13232421875, -2.9971923828125, -2.862060546875, -2.7269287109375, -2.591796875, -2.4566650390625, -2.321533203125, -2.1864013671875, -2.05126953125, -1.9161376953125, -1.781005859375, -1.6458740234375, -1.5107421875, -1.3756103515625, -1.240478515625, -1.1053466796875, -0.97021484375, -0.8350830078125, -0.699951171875, -0.5648193359375, -0.4296875, -0.2945556640625, -0.159423828125, -0.0242919921875, 0.11083984375, 0.2459716796875, 0.381103515625, 0.5162353515625, 0.6513671875, 0.7864990234375, 0.921630859375, 1.0567626953125, 1.19189453125, 1.3270263671875, 1.462158203125, 1.5972900390625, 1.732421875, 1.8675537109375, 2.002685546875, 2.1378173828125, 2.27294921875, 2.4080810546875, 2.543212890625, 2.6783447265625, 2.8134765625, 2.9486083984375, 3.083740234375, 3.2188720703125, 3.35400390625, 3.4891357421875, 3.624267578125, 3.7593994140625, 3.89453125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 4.0, 5.0, 9.0, 8.0, 18.0, 33.0, 41.0, 52.0, 93.0, 123.0, 179.0, 292.0, 454.0, 663.0, 1034.0, 1627.0, 2598.0, 4202.0, 6646.0, 10970.0, 17758.0, 29074.0, 47054.0, 76689.0, 126432.0, 196821.0, 197553.0, 126835.0, 77551.0, 47597.0, 29014.0, 17772.0, 10956.0, 6868.0, 4188.0, 2640.0, 1600.0, 1050.0, 734.0, 453.0, 291.0, 198.0, 118.0, 85.0, 54.0, 48.0, 21.0, 26.0, 15.0, 6.0, 4.0, 7.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.292236328125, -0.2824974060058594, -0.27275848388671875, -0.2630195617675781, -0.2532806396484375, -0.24354171752929688, -0.23380279541015625, -0.22406387329101562, -0.214324951171875, -0.20458602905273438, -0.19484710693359375, -0.18510818481445312, -0.1753692626953125, -0.16563034057617188, -0.15589141845703125, -0.14615249633789062, -0.13641357421875, -0.12667465209960938, -0.11693572998046875, -0.10719680786132812, -0.0974578857421875, -0.08771896362304688, -0.07798004150390625, -0.06824111938476562, -0.058502197265625, -0.048763275146484375, -0.03902435302734375, -0.029285430908203125, -0.0195465087890625, -0.009807586669921875, -6.866455078125e-05, 0.009670257568359375, 0.0194091796875, 0.029148101806640625, 0.03888702392578125, 0.048625946044921875, 0.0583648681640625, 0.06810379028320312, 0.07784271240234375, 0.08758163452148438, 0.097320556640625, 0.10705947875976562, 0.11679840087890625, 0.12653732299804688, 0.1362762451171875, 0.14601516723632812, 0.15575408935546875, 0.16549301147460938, 0.17523193359375, 0.18497085571289062, 0.19470977783203125, 0.20444869995117188, 0.2141876220703125, 0.22392654418945312, 0.23366546630859375, 0.24340438842773438, 0.253143310546875, 0.2628822326660156, 0.27262115478515625, 0.2823600769042969, 0.2920989990234375, 0.3018379211425781, 0.31157684326171875, 0.3213157653808594, 0.3310546875]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 10.0, 2.0, 4.0, 17.0, 12.0, 10.0, 24.0, 11.0, 22.0, 22.0, 32.0, 27.0, 37.0, 34.0, 38.0, 55.0, 46.0, 46.0, 1077.0, 56.0, 41.0, 59.0, 43.0, 42.0, 37.0, 56.0, 35.0, 25.0, 18.0, 19.0, 12.0, 13.0, 16.0, 6.0, 5.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.853515625, -2.76409912109375, -2.6746826171875, -2.58526611328125, -2.495849609375, -2.40643310546875, -2.3170166015625, -2.22760009765625, -2.13818359375, -2.04876708984375, -1.9593505859375, -1.86993408203125, -1.780517578125, -1.69110107421875, -1.6016845703125, -1.51226806640625, -1.4228515625, -1.33343505859375, -1.2440185546875, -1.15460205078125, -1.065185546875, -0.97576904296875, -0.8863525390625, -0.79693603515625, -0.70751953125, -0.61810302734375, -0.5286865234375, -0.43927001953125, -0.349853515625, -0.26043701171875, -0.1710205078125, -0.08160400390625, 0.0078125, 0.09722900390625, 0.1866455078125, 0.27606201171875, 0.365478515625, 0.45489501953125, 0.5443115234375, 0.63372802734375, 0.72314453125, 0.81256103515625, 0.9019775390625, 0.99139404296875, 1.080810546875, 1.17022705078125, 1.2596435546875, 1.34906005859375, 1.4384765625, 1.52789306640625, 1.6173095703125, 1.70672607421875, 1.796142578125, 1.88555908203125, 1.9749755859375, 2.06439208984375, 2.15380859375, 2.24322509765625, 2.3326416015625, 2.42205810546875, 2.511474609375, 2.60089111328125, 2.6903076171875, 2.77972412109375, 2.869140625]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 7.0, 13.0, 14.0, 33.0, 42.0, 59.0, 90.0, 129.0, 246.0, 389.0, 695.0, 1117.0, 1959.0, 3285.0, 5361.0, 9378.0, 16020.0, 27574.0, 47281.0, 79982.0, 133710.0, 985607.0, 460114.0, 132258.0, 79660.0, 46303.0, 27102.0, 15781.0, 9369.0, 5610.0, 3230.0, 1861.0, 1114.0, 714.0, 378.0, 262.0, 157.0, 80.0, 59.0, 32.0, 25.0, 14.0, 6.0, 5.0, 5.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.302490234375, -0.2935905456542969, -0.28469085693359375, -0.2757911682128906, -0.2668914794921875, -0.2579917907714844, -0.24909210205078125, -0.24019241333007812, -0.231292724609375, -0.22239303588867188, -0.21349334716796875, -0.20459365844726562, -0.1956939697265625, -0.18679428100585938, -0.17789459228515625, -0.16899490356445312, -0.16009521484375, -0.15119552612304688, -0.14229583740234375, -0.13339614868164062, -0.1244964599609375, -0.11559677124023438, -0.10669708251953125, -0.09779739379882812, -0.088897705078125, -0.07999801635742188, -0.07109832763671875, -0.062198638916015625, -0.0532989501953125, -0.044399261474609375, -0.03549957275390625, -0.026599884033203125, -0.0177001953125, -0.008800506591796875, 9.918212890625e-05, 0.008998870849609375, 0.0178985595703125, 0.026798248291015625, 0.03569793701171875, 0.044597625732421875, 0.053497314453125, 0.062397003173828125, 0.07129669189453125, 0.08019638061523438, 0.0890960693359375, 0.09799575805664062, 0.10689544677734375, 0.11579513549804688, 0.12469482421875, 0.13359451293945312, 0.14249420166015625, 0.15139389038085938, 0.1602935791015625, 0.16919326782226562, 0.17809295654296875, 0.18699264526367188, 0.195892333984375, 0.20479202270507812, 0.21369171142578125, 0.22259140014648438, 0.2314910888671875, 0.24039077758789062, 0.24929046630859375, 0.2581901550292969, 0.26708984375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 3.0, 9.0, 4.0, 6.0, 10.0, 9.0, 16.0, 19.0, 20.0, 28.0, 45.0, 29.0, 53.0, 51.0, 79.0, 69.0, 64.0, 80.0, 70.0, 61.0, 58.0, 49.0, 38.0, 24.0, 20.0, 20.0, 13.0, 12.0, 8.0, 4.0, 5.0, 7.0, 3.0, 4.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.0010089874267578125, -0.0009768903255462646, -0.0009447932243347168, -0.0009126961231231689, -0.0008805990219116211, -0.0008485019207000732, -0.0008164048194885254, -0.0007843077182769775, -0.0007522106170654297, -0.0007201135158538818, -0.000688016414642334, -0.0006559193134307861, -0.0006238222122192383, -0.0005917251110076904, -0.0005596280097961426, -0.0005275309085845947, -0.0004954338073730469, -0.000463336706161499, -0.00043123960494995117, -0.0003991425037384033, -0.00036704540252685547, -0.0003349483013153076, -0.00030285120010375977, -0.0002707540988922119, -0.00023865699768066406, -0.0002065598964691162, -0.00017446279525756836, -0.0001423656940460205, -0.00011026859283447266, -7.81714916229248e-05, -4.607439041137695e-05, -1.3977289199829102e-05, 1.811981201171875e-05, 5.02169132232666e-05, 8.231401443481445e-05, 0.0001144111156463623, 0.00014650821685791016, 0.000178605318069458, 0.00021070241928100586, 0.0002427995204925537, 0.00027489662170410156, 0.0003069937229156494, 0.00033909082412719727, 0.0003711879253387451, 0.00040328502655029297, 0.0004353821277618408, 0.00046747922897338867, 0.0004995763301849365, 0.0005316734313964844, 0.0005637705326080322, 0.0005958676338195801, 0.0006279647350311279, 0.0006600618362426758, 0.0006921589374542236, 0.0007242560386657715, 0.0007563531398773193, 0.0007884502410888672, 0.000820547342300415, 0.0008526444435119629, 0.0008847415447235107, 0.0009168386459350586, 0.0009489357471466064, 0.0009810328483581543, 0.0010131299495697021, 0.00104522705078125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 4.0, 10.0, 6.0, 9.0, 11.0, 13.0, 13.0, 17.0, 33.0, 48.0, 67.0, 84.0, 117.0, 230.0, 317.0, 576.0, 4681.0, 957314.0, 82663.0, 1146.0, 431.0, 244.0, 155.0, 102.0, 66.0, 48.0, 42.0, 22.0, 24.0, 16.0, 11.0, 6.0, 7.0, 5.0, 3.0, 5.0, 0.0, 3.0, 2.0, 0.0, 0.0, 5.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0192108154296875, -0.018624067306518555, -0.01803731918334961, -0.017450571060180664, -0.01686382293701172, -0.016277074813842773, -0.015690326690673828, -0.015103578567504883, -0.014516830444335938, -0.013930082321166992, -0.013343334197998047, -0.012756586074829102, -0.012169837951660156, -0.011583089828491211, -0.010996341705322266, -0.01040959358215332, -0.009822845458984375, -0.00923609733581543, -0.008649349212646484, -0.008062601089477539, -0.007475852966308594, -0.0068891048431396484, -0.006302356719970703, -0.005715608596801758, -0.0051288604736328125, -0.004542112350463867, -0.003955364227294922, -0.0033686161041259766, -0.0027818679809570312, -0.002195119857788086, -0.0016083717346191406, -0.0010216236114501953, -0.00043487548828125, 0.0001518726348876953, 0.0007386207580566406, 0.001325368881225586, 0.0019121170043945312, 0.0024988651275634766, 0.003085613250732422, 0.003672361373901367, 0.0042591094970703125, 0.004845857620239258, 0.005432605743408203, 0.0060193538665771484, 0.006606101989746094, 0.007192850112915039, 0.007779598236083984, 0.00836634635925293, 0.008953094482421875, 0.00953984260559082, 0.010126590728759766, 0.010713338851928711, 0.011300086975097656, 0.011886835098266602, 0.012473583221435547, 0.013060331344604492, 0.013647079467773438, 0.014233827590942383, 0.014820575714111328, 0.015407323837280273, 0.01599407196044922, 0.016580820083618164, 0.01716756820678711, 0.017754316329956055, 0.018341064453125]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 9.0, 11.0, 84.0, 157.0, 278.0, 254.0, 150.0, 49.0, 20.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00055880774743855, -0.0005075038061477244, -0.0004561998648568988, -0.0004048959235660732, -0.0003535919822752476, -0.0003022880700882524, -0.0002509841287974268, -0.00019968018750660121, -0.0001483762462157756, -9.707230492495e-05, -4.576837091008201e-05, 5.535563104785979e-06, 5.6839504395611584e-05, 0.00010814343113452196, 0.00015944737242534757, 0.00021075131371617317, 0.0002620552550069988, 0.0003133591962978244, 0.00036466313758865, 0.0004159670788794756, 0.0004672710201703012, 0.0005185749614611268, 0.0005698788445442915, 0.000621182844042778, 0.0006724867271259427, 0.0007237906684167683, 0.0007750946097075939, 0.0008263985509984195, 0.0008777024922892451, 0.0009290063753724098, 0.0009803103748708963, 0.001031614257954061, 0.0010829182574525476, 0.0011342221405357122, 0.0011855261400341988, 0.0012368300231173635, 0.00128813402261585, 0.0013394379056990147, 0.0013907419051975012, 0.0014420457882806659, 0.0014933497877791524, 0.001544653670862317, 0.0015959576703608036, 0.0016472615534439683, 0.0016985655529424548, 0.0017498694360256195, 0.001801173435524106, 0.0018524773186072707, 0.0019037812016904354, 0.001955085201188922, 0.002006388967856765, 0.0020576929673552513, 0.002108996966853738, 0.0021603009663522243, 0.002211604733020067, 0.0022629087325185537, 0.0023142127320170403, 0.0023655167315155268, 0.0024168204981833696, 0.002468124497681856, 0.0025194284971803427, 0.002570732496678829, 0.002622036263346672, 0.0026733402628451586, 0.002724644262343645]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 4.0, 9.0, 7.0, 8.0, 9.0, 9.0, 13.0, 10.0, 11.0, 16.0, 19.0, 29.0, 21.0, 28.0, 31.0, 34.0, 27.0, 26.0, 35.0, 35.0, 44.0, 49.0, 48.0, 38.0, 35.0, 38.0, 33.0, 27.0, 34.0, 28.0, 26.0, 27.0, 36.0, 25.0, 16.0, 11.0, 25.0, 13.0, 14.0, 11.0, 9.0, 6.0, 4.0, 3.0, 7.0, 5.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.00040781497955322266, -0.0003941114991903305, -0.00038040801882743835, -0.0003667045384645462, -0.00035300105810165405, -0.0003392975777387619, -0.00032559409737586975, -0.0003118906170129776, -0.00029818713665008545, -0.0002844836562871933, -0.00027078017592430115, -0.000257076695561409, -0.00024337321519851685, -0.0002296697348356247, -0.00021596625447273254, -0.0002022627741098404, -0.00018855929374694824, -0.0001748558133840561, -0.00016115233302116394, -0.0001474488526582718, -0.00013374537229537964, -0.00012004189193248749, -0.00010633841156959534, -9.263493120670319e-05, -7.893145084381104e-05, -6.522797048091888e-05, -5.1524490118026733e-05, -3.782100975513458e-05, -2.411752939224243e-05, -1.041404902935028e-05, 3.28943133354187e-06, 1.699291169643402e-05, 3.069639205932617e-05, 4.439987242221832e-05, 5.8103352785110474e-05, 7.180683314800262e-05, 8.551031351089478e-05, 9.921379387378693e-05, 0.00011291727423667908, 0.00012662075459957123, 0.00014032423496246338, 0.00015402771532535553, 0.00016773119568824768, 0.00018143467605113983, 0.00019513815641403198, 0.00020884163677692413, 0.00022254511713981628, 0.00023624859750270844, 0.0002499520778656006, 0.00026365555822849274, 0.0002773590385913849, 0.00029106251895427704, 0.0003047659993171692, 0.00031846947968006134, 0.0003321729600429535, 0.00034587644040584564, 0.0003595799207687378, 0.00037328340113162994, 0.0003869868814945221, 0.00040069036185741425, 0.0004143938422203064, 0.00042809732258319855, 0.0004418008029460907, 0.00045550428330898285, 0.000469207763671875]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 4.0, 8.0, 8.0, 12.0, 10.0, 15.0, 14.0, 20.0, 22.0, 29.0, 26.0, 32.0, 44.0, 48.0, 52.0, 54.0, 56.0, 47.0, 46.0, 56.0, 49.0, 29.0, 46.0, 45.0, 34.0, 29.0, 21.0, 20.0, 23.0, 19.0, 15.0, 9.0, 16.0, 9.0, 13.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.75390625, -4.6187744140625, -4.483642578125, -4.3485107421875, -4.21337890625, -4.0782470703125, -3.943115234375, -3.8079833984375, -3.6728515625, -3.5377197265625, -3.402587890625, -3.2674560546875, -3.13232421875, -2.9971923828125, -2.862060546875, -2.7269287109375, -2.591796875, -2.4566650390625, -2.321533203125, -2.1864013671875, -2.05126953125, -1.9161376953125, -1.781005859375, -1.6458740234375, -1.5107421875, -1.3756103515625, -1.240478515625, -1.1053466796875, -0.97021484375, -0.8350830078125, -0.699951171875, -0.5648193359375, -0.4296875, -0.2945556640625, -0.159423828125, -0.0242919921875, 0.11083984375, 0.2459716796875, 0.381103515625, 0.5162353515625, 0.6513671875, 0.7864990234375, 0.921630859375, 1.0567626953125, 1.19189453125, 1.3270263671875, 1.462158203125, 1.5972900390625, 1.732421875, 1.8675537109375, 2.002685546875, 2.1378173828125, 2.27294921875, 2.4080810546875, 2.543212890625, 2.6783447265625, 2.8134765625, 2.9486083984375, 3.083740234375, 3.2188720703125, 3.35400390625, 3.4891357421875, 3.624267578125, 3.7593994140625, 3.89453125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 4.0, 0.0, 3.0, 4.0, 7.0, 7.0, 10.0, 14.0, 23.0, 28.0, 31.0, 43.0, 82.0, 110.0, 170.0, 270.0, 347.0, 619.0, 1038.0, 1755.0, 2781.0, 5004.0, 9307.0, 19715.0, 52129.0, 175957.0, 482648.0, 196299.0, 56438.0, 21064.0, 9692.0, 5298.0, 2993.0, 1773.0, 1028.0, 640.0, 437.0, 230.0, 176.0, 115.0, 77.0, 60.0, 38.0, 36.0, 19.0, 15.0, 9.0, 4.0, 7.0, 5.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.1015625, -4.93450927734375, -4.7674560546875, -4.60040283203125, -4.433349609375, -4.26629638671875, -4.0992431640625, -3.93218994140625, -3.76513671875, -3.59808349609375, -3.4310302734375, -3.26397705078125, -3.096923828125, -2.92987060546875, -2.7628173828125, -2.59576416015625, -2.4287109375, -2.26165771484375, -2.0946044921875, -1.92755126953125, -1.760498046875, -1.59344482421875, -1.4263916015625, -1.25933837890625, -1.09228515625, -0.92523193359375, -0.7581787109375, -0.59112548828125, -0.424072265625, -0.25701904296875, -0.0899658203125, 0.07708740234375, 0.244140625, 0.41119384765625, 0.5782470703125, 0.74530029296875, 0.912353515625, 1.07940673828125, 1.2464599609375, 1.41351318359375, 1.58056640625, 1.74761962890625, 1.9146728515625, 2.08172607421875, 2.248779296875, 2.41583251953125, 2.5828857421875, 2.74993896484375, 2.9169921875, 3.08404541015625, 3.2510986328125, 3.41815185546875, 3.585205078125, 3.75225830078125, 3.9193115234375, 4.08636474609375, 4.25341796875, 4.42047119140625, 4.5875244140625, 4.75457763671875, 4.921630859375, 5.08868408203125, 5.2557373046875, 5.42279052734375, 5.58984375]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 0.0, 4.0, 4.0, 1.0, 4.0, 4.0, 7.0, 6.0, 7.0, 15.0, 9.0, 14.0, 16.0, 29.0, 28.0, 30.0, 24.0, 35.0, 35.0, 39.0, 39.0, 41.0, 59.0, 117.0, 241.0, 1483.0, 236.0, 104.0, 60.0, 44.0, 44.0, 33.0, 36.0, 24.0, 25.0, 17.0, 22.0, 22.0, 16.0, 10.0, 13.0, 9.0, 6.0, 11.0, 10.0, 3.0, 5.0, 2.0, 6.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.4296875, -12.0352783203125, -11.640869140625, -11.2464599609375, -10.85205078125, -10.4576416015625, -10.063232421875, -9.6688232421875, -9.2744140625, -8.8800048828125, -8.485595703125, -8.0911865234375, -7.69677734375, -7.3023681640625, -6.907958984375, -6.5135498046875, -6.119140625, -5.7247314453125, -5.330322265625, -4.9359130859375, -4.54150390625, -4.1470947265625, -3.752685546875, -3.3582763671875, -2.9638671875, -2.5694580078125, -2.175048828125, -1.7806396484375, -1.38623046875, -0.9918212890625, -0.597412109375, -0.2030029296875, 0.19140625, 0.5858154296875, 0.980224609375, 1.3746337890625, 1.76904296875, 2.1634521484375, 2.557861328125, 2.9522705078125, 3.3466796875, 3.7410888671875, 4.135498046875, 4.5299072265625, 4.92431640625, 5.3187255859375, 5.713134765625, 6.1075439453125, 6.501953125, 6.8963623046875, 7.290771484375, 7.6851806640625, 8.07958984375, 8.4739990234375, 8.868408203125, 9.2628173828125, 9.6572265625, 10.0516357421875, 10.446044921875, 10.8404541015625, 11.23486328125, 11.6292724609375, 12.023681640625, 12.4180908203125, 12.8125]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 9.0, 9.0, 13.0, 11.0, 21.0, 29.0, 42.0, 84.0, 88.0, 187.0, 370.0, 1097.0, 21871.0, 3080128.0, 39310.0, 1415.0, 424.0, 224.0, 105.0, 91.0, 53.0, 23.0, 32.0, 22.0, 11.0, 10.0, 10.0, 4.0, 4.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.33642578125, -39.0166015625, -37.69677734375, -36.376953125, -35.05712890625, -33.7373046875, -32.41748046875, -31.09765625, -29.77783203125, -28.4580078125, -27.13818359375, -25.818359375, -24.49853515625, -23.1787109375, -21.85888671875, -20.5390625, -19.21923828125, -17.8994140625, -16.57958984375, -15.259765625, -13.93994140625, -12.6201171875, -11.30029296875, -9.98046875, -8.66064453125, -7.3408203125, -6.02099609375, -4.701171875, -3.38134765625, -2.0615234375, -0.74169921875, 0.578125, 1.89794921875, 3.2177734375, 4.53759765625, 5.857421875, 7.17724609375, 8.4970703125, 9.81689453125, 11.13671875, 12.45654296875, 13.7763671875, 15.09619140625, 16.416015625, 17.73583984375, 19.0556640625, 20.37548828125, 21.6953125, 23.01513671875, 24.3349609375, 25.65478515625, 26.974609375, 28.29443359375, 29.6142578125, 30.93408203125, 32.25390625, 33.57373046875, 34.8935546875, 36.21337890625, 37.533203125, 38.85302734375, 40.1728515625, 41.49267578125, 42.8125]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 34.0, 203.0, 483.0, 249.0, 38.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.999847412109375, -27.16200828552246, -24.32417106628418, -21.486331939697266, -18.648494720458984, -15.81065559387207, -12.972816467285156, -10.134979248046875, -7.297140121459961, -4.459301948547363, -1.6214632987976074, 1.2163753509521484, 4.054213523864746, 6.892051696777344, 9.729890823364258, 12.567728042602539, 15.405567169189453, 18.243406295776367, 21.08124351501465, 23.919082641601562, 26.756919860839844, 29.594758987426758, 32.43259811401367, 35.27043533325195, 38.1082763671875, 40.94611358642578, 43.78395462036133, 46.62179183959961, 49.45962905883789, 52.29747009277344, 55.13530731201172, 57.97314453125, 60.81098175048828, 63.64881896972656, 66.48665618896484, 69.32449340820312, 72.16233825683594, 75.00017547607422, 77.8380126953125, 80.67584991455078, 83.51368713378906, 86.35152435302734, 89.18936157226562, 92.02720642089844, 94.86504364013672, 97.702880859375, 100.54071807861328, 103.37855529785156, 106.21640014648438, 109.05423736572266, 111.89207458496094, 114.72991943359375, 117.56775665283203, 120.40559387207031, 123.2434310913086, 126.08126831054688, 128.91909790039062, 131.75694274902344, 134.5947723388672, 137.4326171875, 140.27044677734375, 143.10829162597656, 145.94613647460938, 148.78396606445312, 151.62181091308594]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 7.0, 5.0, 5.0, 8.0, 8.0, 18.0, 17.0, 25.0, 29.0, 32.0, 22.0, 25.0, 30.0, 35.0, 45.0, 32.0, 40.0, 50.0, 51.0, 48.0, 38.0, 49.0, 35.0, 36.0, 45.0, 28.0, 27.0, 27.0, 32.0, 24.0, 18.0, 12.0, 17.0, 14.0, 16.0, 5.0, 15.0, 6.0, 11.0, 8.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.61349105834961, -39.28224563598633, -37.95100021362305, -36.619754791259766, -35.28851318359375, -33.95726776123047, -32.62602233886719, -31.294776916503906, -29.963531494140625, -28.632286071777344, -27.301040649414062, -25.969797134399414, -24.638551712036133, -23.30730628967285, -21.976062774658203, -20.644817352294922, -19.31357192993164, -17.98232650756836, -16.651081085205078, -15.31983757019043, -13.988592147827148, -12.657346725463867, -11.326102256774902, -9.994857788085938, -8.663612365722656, -7.332367420196533, -6.00112247467041, -4.669877529144287, -3.338632583618164, -2.007387638092041, -0.676142692565918, 0.6551017761230469, 1.9863471984863281, 3.317592144012451, 4.648837089538574, 5.980082035064697, 7.31132698059082, 8.642572402954102, 9.973816871643066, 11.305061340332031, 12.636306762695312, 13.967552185058594, 15.298796653747559, 16.630041122436523, 17.961286544799805, 19.292531967163086, 20.623775482177734, 21.955020904541016, 23.286266326904297, 24.617511749267578, 25.94875717163086, 27.280000686645508, 28.61124610900879, 29.94249153137207, 31.27373504638672, 32.60498046875, 33.93622589111328, 35.26747131347656, 36.598716735839844, 37.929962158203125, 39.261207580566406, 40.59244918823242, 41.9236946105957, 43.254940032958984, 44.586185455322266]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 6.0, 2.0, 4.0, 8.0, 13.0, 14.0, 10.0, 14.0, 8.0, 18.0, 23.0, 28.0, 29.0, 27.0, 43.0, 42.0, 40.0, 48.0, 58.0, 48.0, 65.0, 52.0, 39.0, 33.0, 43.0, 35.0, 36.0, 31.0, 35.0, 21.0, 28.0, 15.0, 7.0, 16.0, 15.0, 11.0, 11.0, 7.0, 5.0, 7.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 0.0, 2.0], "bins": [-4.95703125, -4.821258544921875, -4.68548583984375, -4.549713134765625, -4.4139404296875, -4.278167724609375, -4.14239501953125, -4.006622314453125, -3.870849609375, -3.735076904296875, -3.59930419921875, -3.463531494140625, -3.3277587890625, -3.191986083984375, -3.05621337890625, -2.920440673828125, -2.78466796875, -2.648895263671875, -2.51312255859375, -2.377349853515625, -2.2415771484375, -2.105804443359375, -1.97003173828125, -1.834259033203125, -1.698486328125, -1.562713623046875, -1.42694091796875, -1.291168212890625, -1.1553955078125, -1.019622802734375, -0.88385009765625, -0.748077392578125, -0.6123046875, -0.476531982421875, -0.34075927734375, -0.204986572265625, -0.0692138671875, 0.066558837890625, 0.20233154296875, 0.338104248046875, 0.473876953125, 0.609649658203125, 0.74542236328125, 0.881195068359375, 1.0169677734375, 1.152740478515625, 1.28851318359375, 1.424285888671875, 1.56005859375, 1.695831298828125, 1.83160400390625, 1.967376708984375, 2.1031494140625, 2.238922119140625, 2.37469482421875, 2.510467529296875, 2.646240234375, 2.782012939453125, 2.91778564453125, 3.053558349609375, 3.1893310546875, 3.325103759765625, 3.46087646484375, 3.596649169921875, 3.732421875]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 2.0, 6.0, 6.0, 6.0, 9.0, 7.0, 15.0, 12.0, 21.0, 29.0, 39.0, 78.0, 109.0, 202.0, 326.0, 494.0, 960.0, 1897.0, 4073.0, 8979.0, 21767.0, 56781.0, 162458.0, 479182.0, 1133850.0, 1328034.0, 642997.0, 224326.0, 77371.0, 28866.0, 11491.0, 4908.0, 2343.0, 1104.0, 586.0, 357.0, 180.0, 147.0, 85.0, 51.0, 38.0, 29.0, 18.0, 9.0, 12.0, 12.0, 8.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.27734375, -5.122802734375, -4.96826171875, -4.813720703125, -4.6591796875, -4.504638671875, -4.35009765625, -4.195556640625, -4.041015625, -3.886474609375, -3.73193359375, -3.577392578125, -3.4228515625, -3.268310546875, -3.11376953125, -2.959228515625, -2.8046875, -2.650146484375, -2.49560546875, -2.341064453125, -2.1865234375, -2.031982421875, -1.87744140625, -1.722900390625, -1.568359375, -1.413818359375, -1.25927734375, -1.104736328125, -0.9501953125, -0.795654296875, -0.64111328125, -0.486572265625, -0.33203125, -0.177490234375, -0.02294921875, 0.131591796875, 0.2861328125, 0.440673828125, 0.59521484375, 0.749755859375, 0.904296875, 1.058837890625, 1.21337890625, 1.367919921875, 1.5224609375, 1.677001953125, 1.83154296875, 1.986083984375, 2.140625, 2.295166015625, 2.44970703125, 2.604248046875, 2.7587890625, 2.913330078125, 3.06787109375, 3.222412109375, 3.376953125, 3.531494140625, 3.68603515625, 3.840576171875, 3.9951171875, 4.149658203125, 4.30419921875, 4.458740234375, 4.61328125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 7.0, 5.0, 2.0, 3.0, 14.0, 16.0, 14.0, 24.0, 35.0, 43.0, 85.0, 119.0, 188.0, 271.0, 336.0, 429.0, 527.0, 480.0, 418.0, 317.0, 218.0, 156.0, 134.0, 76.0, 59.0, 36.0, 27.0, 19.0, 12.0, 10.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.796875, -13.412353515625, -13.02783203125, -12.643310546875, -12.2587890625, -11.874267578125, -11.48974609375, -11.105224609375, -10.720703125, -10.336181640625, -9.95166015625, -9.567138671875, -9.1826171875, -8.798095703125, -8.41357421875, -8.029052734375, -7.64453125, -7.260009765625, -6.87548828125, -6.490966796875, -6.1064453125, -5.721923828125, -5.33740234375, -4.952880859375, -4.568359375, -4.183837890625, -3.79931640625, -3.414794921875, -3.0302734375, -2.645751953125, -2.26123046875, -1.876708984375, -1.4921875, -1.107666015625, -0.72314453125, -0.338623046875, 0.0458984375, 0.430419921875, 0.81494140625, 1.199462890625, 1.583984375, 1.968505859375, 2.35302734375, 2.737548828125, 3.1220703125, 3.506591796875, 3.89111328125, 4.275634765625, 4.66015625, 5.044677734375, 5.42919921875, 5.813720703125, 6.1982421875, 6.582763671875, 6.96728515625, 7.351806640625, 7.736328125, 8.120849609375, 8.50537109375, 8.889892578125, 9.2744140625, 9.658935546875, 10.04345703125, 10.427978515625, 10.8125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 6.0, 3.0, 8.0, 11.0, 12.0, 20.0, 31.0, 50.0, 85.0, 100.0, 189.0, 409.0, 910.0, 3111.0, 17972.0, 183782.0, 2107792.0, 1719529.0, 141299.0, 14619.0, 2642.0, 831.0, 351.0, 175.0, 97.0, 74.0, 48.0, 47.0, 22.0, 19.0, 14.0, 6.0, 7.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.125, -13.53515625, -12.9453125, -12.35546875, -11.765625, -11.17578125, -10.5859375, -9.99609375, -9.40625, -8.81640625, -8.2265625, -7.63671875, -7.046875, -6.45703125, -5.8671875, -5.27734375, -4.6875, -4.09765625, -3.5078125, -2.91796875, -2.328125, -1.73828125, -1.1484375, -0.55859375, 0.03125, 0.62109375, 1.2109375, 1.80078125, 2.390625, 2.98046875, 3.5703125, 4.16015625, 4.75, 5.33984375, 5.9296875, 6.51953125, 7.109375, 7.69921875, 8.2890625, 8.87890625, 9.46875, 10.05859375, 10.6484375, 11.23828125, 11.828125, 12.41796875, 13.0078125, 13.59765625, 14.1875, 14.77734375, 15.3671875, 15.95703125, 16.546875, 17.13671875, 17.7265625, 18.31640625, 18.90625, 19.49609375, 20.0859375, 20.67578125, 21.265625, 21.85546875, 22.4453125, 23.03515625, 23.625]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 6.0, 20.0, 34.0, 69.0, 173.0, 202.0, 202.0, 143.0, 91.0, 48.0, 14.0, 10.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-142.42080688476562, -139.45489501953125, -136.48898315429688, -133.52308654785156, -130.5571746826172, -127.59126281738281, -124.62535095214844, -121.6594467163086, -118.69354248046875, -115.72763061523438, -112.76172637939453, -109.79581451416016, -106.82991027832031, -103.86399841308594, -100.89808654785156, -97.93218231201172, -94.96627044677734, -92.00035858154297, -89.03445434570312, -86.06854248046875, -83.1026382446289, -80.13672637939453, -77.17082214355469, -74.20491027832031, -71.23899841308594, -68.27308654785156, -65.30718231201172, -62.341270446777344, -59.3753662109375, -56.409454345703125, -53.443546295166016, -50.477638244628906, -47.51172637939453, -44.54581832885742, -41.57991027832031, -38.61399841308594, -35.648094177246094, -32.68218231201172, -29.71627426147461, -26.7503662109375, -23.78445816040039, -20.81855010986328, -17.852642059326172, -14.88673210144043, -11.92082405090332, -8.954916000366211, -5.989006042480469, -3.0230979919433594, -0.05718994140625, 2.9087185859680176, 5.874627113342285, 8.840536117553711, 11.80644416809082, 14.77235221862793, 17.738262176513672, 20.70417022705078, 23.67007827758789, 26.635986328125, 29.60189437866211, 32.56780242919922, 35.533714294433594, 38.49961853027344, 41.46553039550781, 44.43143844604492, 47.39734649658203]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 6.0, 9.0, 4.0, 8.0, 5.0, 13.0, 11.0, 10.0, 18.0, 17.0, 11.0, 26.0, 16.0, 26.0, 32.0, 37.0, 32.0, 37.0, 26.0, 36.0, 34.0, 47.0, 41.0, 42.0, 34.0, 39.0, 41.0, 40.0, 34.0, 36.0, 26.0, 25.0, 30.0, 21.0, 23.0, 20.0, 13.0, 16.0, 14.0, 11.0, 7.0, 9.0, 6.0, 6.0, 3.0, 5.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0], "bins": [-35.398033142089844, -34.387638092041016, -33.37724685668945, -32.366851806640625, -31.35645866394043, -30.346065521240234, -29.335670471191406, -28.32527732849121, -27.314884185791016, -26.30449104309082, -25.294095993041992, -24.283702850341797, -23.2733097076416, -22.262916564941406, -21.252521514892578, -20.242128372192383, -19.231733322143555, -18.22134017944336, -17.21094512939453, -16.200551986694336, -15.19015884399414, -14.179764747619629, -13.169370651245117, -12.158977508544922, -11.14858341217041, -10.138189315795898, -9.127796173095703, -8.117402076721191, -7.107008457183838, -6.096614837646484, -5.086220741271973, -4.075827121734619, -3.065431594848633, -2.0550379753112793, -1.0446441173553467, -0.03425025939941406, 0.9761433601379395, 1.986536979675293, 2.9969310760498047, 4.007324695587158, 5.017718315124512, 6.028111934661865, 7.038505554199219, 8.04889965057373, 9.059293746948242, 10.069686889648438, 11.08008098602295, 12.090475082397461, 13.100868225097656, 14.111262321472168, 15.121655464172363, 16.132049560546875, 17.14244270324707, 18.152835845947266, 19.163230895996094, 20.17362403869629, 21.184017181396484, 22.19441032409668, 23.204805374145508, 24.215198516845703, 25.2255916595459, 26.235984802246094, 27.246379852294922, 28.256772994995117, 29.267168045043945]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 4.0, 6.0, 8.0, 9.0, 9.0, 7.0, 12.0, 19.0, 18.0, 28.0, 21.0, 27.0, 32.0, 45.0, 52.0, 55.0, 44.0, 61.0, 53.0, 35.0, 46.0, 50.0, 44.0, 44.0, 37.0, 34.0, 26.0, 26.0, 34.0, 20.0, 18.0, 16.0, 17.0, 8.0, 11.0, 7.0, 5.0, 8.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 11.0, 6.0, 19.0, 30.0, 27.0, 40.0, 98.0, 134.0, 179.0, 297.0, 352.0, 565.0, 852.0, 1328.0, 1994.0, 3006.0, 4775.0, 7013.0, 10985.0, 16761.0, 25519.0, 39749.0, 61459.0, 95757.0, 145301.0, 188228.0, 154372.0, 102282.0, 65464.0, 42605.0, 27700.0, 17924.0, 11650.0, 7446.0, 4963.0, 3210.0, 2096.0, 1437.0, 962.0, 611.0, 457.0, 314.0, 177.0, 130.0, 88.0, 51.0, 41.0, 27.0, 25.0, 16.0, 9.0, 7.0, 6.0, 5.0, 0.0, 2.0, 1.0], "bins": [-0.322998046875, -0.3130836486816406, -0.30316925048828125, -0.2932548522949219, -0.2833404541015625, -0.2734260559082031, -0.26351165771484375, -0.2535972595214844, -0.243682861328125, -0.23376846313476562, -0.22385406494140625, -0.21393966674804688, -0.2040252685546875, -0.19411087036132812, -0.18419647216796875, -0.17428207397460938, -0.16436767578125, -0.15445327758789062, -0.14453887939453125, -0.13462448120117188, -0.1247100830078125, -0.11479568481445312, -0.10488128662109375, -0.09496688842773438, -0.085052490234375, -0.07513809204101562, -0.06522369384765625, -0.055309295654296875, -0.0453948974609375, -0.035480499267578125, -0.02556610107421875, -0.015651702880859375, -0.0057373046875, 0.004177093505859375, 0.01409149169921875, 0.024005889892578125, 0.0339202880859375, 0.043834686279296875, 0.05374908447265625, 0.06366348266601562, 0.073577880859375, 0.08349227905273438, 0.09340667724609375, 0.10332107543945312, 0.1132354736328125, 0.12314987182617188, 0.13306427001953125, 0.14297866821289062, 0.15289306640625, 0.16280746459960938, 0.17272186279296875, 0.18263626098632812, 0.1925506591796875, 0.20246505737304688, 0.21237945556640625, 0.22229385375976562, 0.232208251953125, 0.24212265014648438, 0.25203704833984375, 0.2619514465332031, 0.2718658447265625, 0.2817802429199219, 0.29169464111328125, 0.3016090393066406, 0.3115234375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 7.0, 3.0, 3.0, 1.0, 5.0, 2.0, 13.0, 4.0, 8.0, 8.0, 21.0, 14.0, 15.0, 22.0, 14.0, 20.0, 24.0, 29.0, 32.0, 32.0, 24.0, 33.0, 31.0, 42.0, 37.0, 30.0, 1052.0, 33.0, 35.0, 37.0, 30.0, 36.0, 39.0, 30.0, 36.0, 32.0, 29.0, 25.0, 20.0, 22.0, 13.0, 20.0, 11.0, 12.0, 4.0, 10.0, 12.0, 9.0, 3.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.0859375, -2.01300048828125, -1.9400634765625, -1.86712646484375, -1.794189453125, -1.72125244140625, -1.6483154296875, -1.57537841796875, -1.50244140625, -1.42950439453125, -1.3565673828125, -1.28363037109375, -1.210693359375, -1.13775634765625, -1.0648193359375, -0.99188232421875, -0.9189453125, -0.84600830078125, -0.7730712890625, -0.70013427734375, -0.627197265625, -0.55426025390625, -0.4813232421875, -0.40838623046875, -0.33544921875, -0.26251220703125, -0.1895751953125, -0.11663818359375, -0.043701171875, 0.02923583984375, 0.1021728515625, 0.17510986328125, 0.248046875, 0.32098388671875, 0.3939208984375, 0.46685791015625, 0.539794921875, 0.61273193359375, 0.6856689453125, 0.75860595703125, 0.83154296875, 0.90447998046875, 0.9774169921875, 1.05035400390625, 1.123291015625, 1.19622802734375, 1.2691650390625, 1.34210205078125, 1.4150390625, 1.48797607421875, 1.5609130859375, 1.63385009765625, 1.706787109375, 1.77972412109375, 1.8526611328125, 1.92559814453125, 1.99853515625, 2.07147216796875, 2.1444091796875, 2.21734619140625, 2.290283203125, 2.36322021484375, 2.4361572265625, 2.50909423828125, 2.58203125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 11.0, 7.0, 18.0, 15.0, 38.0, 67.0, 97.0, 172.0, 266.0, 458.0, 678.0, 1228.0, 2072.0, 3382.0, 5683.0, 9084.0, 14783.0, 24079.0, 39760.0, 63939.0, 102794.0, 157470.0, 1243385.0, 157992.0, 103248.0, 64586.0, 39316.0, 24139.0, 14876.0, 9296.0, 5633.0, 3398.0, 2110.0, 1242.0, 690.0, 419.0, 274.0, 168.0, 103.0, 55.0, 33.0, 29.0, 16.0, 11.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.269775390625, -0.2606315612792969, -0.25148773193359375, -0.24234390258789062, -0.2332000732421875, -0.22405624389648438, -0.21491241455078125, -0.20576858520507812, -0.196624755859375, -0.18748092651367188, -0.17833709716796875, -0.16919326782226562, -0.1600494384765625, -0.15090560913085938, -0.14176177978515625, -0.13261795043945312, -0.12347412109375, -0.11433029174804688, -0.10518646240234375, -0.09604263305664062, -0.0868988037109375, -0.07775497436523438, -0.06861114501953125, -0.059467315673828125, -0.050323486328125, -0.041179656982421875, -0.03203582763671875, -0.022891998291015625, -0.0137481689453125, -0.004604339599609375, 0.00453948974609375, 0.013683319091796875, 0.0228271484375, 0.031970977783203125, 0.04111480712890625, 0.050258636474609375, 0.0594024658203125, 0.06854629516601562, 0.07769012451171875, 0.08683395385742188, 0.095977783203125, 0.10512161254882812, 0.11426544189453125, 0.12340927124023438, 0.1325531005859375, 0.14169692993164062, 0.15084075927734375, 0.15998458862304688, 0.16912841796875, 0.17827224731445312, 0.18741607666015625, 0.19655990600585938, 0.2057037353515625, 0.21484756469726562, 0.22399139404296875, 0.23313522338867188, 0.242279052734375, 0.2514228820800781, 0.26056671142578125, 0.2697105407714844, 0.2788543701171875, 0.2879981994628906, 0.29714202880859375, 0.3062858581542969, 0.3154296875]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 2.0, 6.0, 5.0, 3.0, 7.0, 12.0, 23.0, 28.0, 26.0, 40.0, 37.0, 67.0, 50.0, 69.0, 75.0, 79.0, 75.0, 88.0, 60.0, 55.0, 47.0, 34.0, 25.0, 28.0, 13.0, 14.0, 9.0, 9.0, 4.0, 1.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.0014410018920898438, -0.0014044567942619324, -0.001367911696434021, -0.0013313665986061096, -0.0012948215007781982, -0.0012582764029502869, -0.0012217313051223755, -0.0011851862072944641, -0.0011486411094665527, -0.0011120960116386414, -0.00107555091381073, -0.0010390058159828186, -0.0010024607181549072, -0.0009659156203269958, -0.0009293705224990845, -0.0008928254246711731, -0.0008562803268432617, -0.0008197352290153503, -0.000783190131187439, -0.0007466450333595276, -0.0007100999355316162, -0.0006735548377037048, -0.0006370097398757935, -0.0006004646420478821, -0.0005639195442199707, -0.0005273744463920593, -0.000490829348564148, -0.00045428425073623657, -0.0004177391529083252, -0.0003811940550804138, -0.00034464895725250244, -0.00030810385942459106, -0.0002715587615966797, -0.0002350136637687683, -0.00019846856594085693, -0.00016192346811294556, -0.00012537837028503418, -8.88332724571228e-05, -5.2288174629211426e-05, -1.574307680130005e-05, 2.0802021026611328e-05, 5.7347118854522705e-05, 9.389221668243408e-05, 0.00013043731451034546, 0.00016698241233825684, 0.0002035275101661682, 0.0002400726079940796, 0.00027661770582199097, 0.00031316280364990234, 0.0003497079014778137, 0.0003862529993057251, 0.0004227980971336365, 0.00045934319496154785, 0.0004958882927894592, 0.0005324333906173706, 0.000568978488445282, 0.0006055235862731934, 0.0006420686841011047, 0.0006786137819290161, 0.0007151588797569275, 0.0007517039775848389, 0.0007882490754127502, 0.0008247941732406616, 0.000861339271068573, 0.0008978843688964844]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 11.0, 10.0, 11.0, 23.0, 33.0, 39.0, 59.0, 105.0, 135.0, 232.0, 387.0, 849.0, 19140.0, 1009116.0, 16534.0, 816.0, 376.0, 220.0, 146.0, 86.0, 60.0, 49.0, 27.0, 20.0, 10.0, 11.0, 5.0, 8.0, 4.0, 4.0, 5.0, 6.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.01535797119140625, -0.01473081111907959, -0.01410365104675293, -0.01347649097442627, -0.01284933090209961, -0.01222217082977295, -0.011595010757446289, -0.010967850685119629, -0.010340690612792969, -0.009713530540466309, -0.009086370468139648, -0.008459210395812988, -0.007832050323486328, -0.007204890251159668, -0.006577730178833008, -0.005950570106506348, -0.0053234100341796875, -0.004696249961853027, -0.004069089889526367, -0.003441929817199707, -0.002814769744873047, -0.0021876096725463867, -0.0015604496002197266, -0.0009332895278930664, -0.00030612945556640625, 0.0003210306167602539, 0.0009481906890869141, 0.0015753507614135742, 0.0022025108337402344, 0.0028296709060668945, 0.0034568309783935547, 0.004083991050720215, 0.004711151123046875, 0.005338311195373535, 0.005965471267700195, 0.0065926313400268555, 0.007219791412353516, 0.007846951484680176, 0.008474111557006836, 0.009101271629333496, 0.009728431701660156, 0.010355591773986816, 0.010982751846313477, 0.011609911918640137, 0.012237071990966797, 0.012864232063293457, 0.013491392135620117, 0.014118552207946777, 0.014745712280273438, 0.015372872352600098, 0.016000032424926758, 0.016627192497253418, 0.017254352569580078, 0.01788151264190674, 0.0185086727142334, 0.01913583278656006, 0.01976299285888672, 0.02039015293121338, 0.02101731300354004, 0.0216444730758667, 0.02227163314819336, 0.02289879322052002, 0.02352595329284668, 0.02415311336517334, 0.0247802734375]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 110.0, 490.0, 368.0, 39.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004035771358758211, -0.0039128875359892845, -0.0037900032475590706, -0.0036671191919595003, -0.00354423513635993, -0.0034213513135910034, -0.003298467257991433, -0.003175583202391863, -0.0030526991467922926, -0.0029298150911927223, -0.002806931035593152, -0.0026840469799935818, -0.002561163157224655, -0.0024382788687944412, -0.0023153950460255146, -0.0021925109904259443, -0.002069626934826374, -0.0019467428792268038, -0.0018238588236272335, -0.001700974884442985, -0.0015780908288434148, -0.0014552067732438445, -0.001332322834059596, -0.0012094387784600258, -0.0010865547228604555, -0.0009636706672608852, -0.0008407866698689759, -0.0007179026724770665, -0.0005950186168774962, -0.00047213456127792597, -0.0003492505638860166, -0.00022636656649410725, -0.00010348204523324966, 1.9401981262490153e-05, 0.00014228600775822997, 0.0002651700342539698, 0.0003880540607497096, 0.0005109381163492799, 0.0006338221137411892, 0.0007567061111330986, 0.0008795901667326689, 0.0010024742223322392, 0.0011253582779318094, 0.0012482422171160579, 0.0013711262727156281, 0.0014940103283151984, 0.0016168942674994469, 0.0017397783230990171, 0.0018626623786985874, 0.0019855464342981577, 0.002108430489897728, 0.0022313145454972982, 0.0023541986010968685, 0.002477082423865795, 0.0025999664794653654, 0.0027228505350649357, 0.002845734590664506, 0.0029686186462640762, 0.0030915027018636465, 0.0032143867574632168, 0.0033372705802321434, 0.0034601548686623573, 0.003583038691431284, 0.0037059227470308542, 0.0038288068026304245]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 7.0, 3.0, 5.0, 10.0, 11.0, 16.0, 19.0, 22.0, 23.0, 23.0, 38.0, 25.0, 32.0, 30.0, 31.0, 45.0, 37.0, 45.0, 53.0, 38.0, 51.0, 46.0, 35.0, 57.0, 31.0, 46.0, 38.0, 23.0, 34.0, 19.0, 20.0, 22.0, 22.0, 12.0, 5.0, 10.0, 8.0, 5.0, 5.0, 2.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005851984024047852, -0.0005677500739693642, -0.0005503017455339432, -0.0005328534170985222, -0.0005154050886631012, -0.0004979567602276802, -0.0004805084317922592, -0.0004630601033568382, -0.00044561177492141724, -0.00042816344648599625, -0.00041071511805057526, -0.00039326678961515427, -0.0003758184611797333, -0.0003583701327443123, -0.0003409218043088913, -0.0003234734758734703, -0.0003060251474380493, -0.0002885768190026283, -0.00027112849056720734, -0.00025368016213178635, -0.00023623183369636536, -0.00021878350526094437, -0.00020133517682552338, -0.0001838868483901024, -0.0001664385199546814, -0.0001489901915192604, -0.00013154186308383942, -0.00011409353464841843, -9.664520621299744e-05, -7.919687777757645e-05, -6.174854934215546e-05, -4.4300220906734467e-05, -2.6851892471313477e-05, -9.403564035892487e-06, 8.044764399528503e-06, 2.5493092834949493e-05, 4.2941421270370483e-05, 6.0389749705791473e-05, 7.783807814121246e-05, 9.528640657663345e-05, 0.00011273473501205444, 0.00013018306344747543, 0.00014763139188289642, 0.0001650797203183174, 0.0001825280487537384, 0.0001999763771891594, 0.00021742470562458038, 0.00023487303406000137, 0.00025232136249542236, 0.00026976969093084335, 0.00028721801936626434, 0.00030466634780168533, 0.0003221146762371063, 0.0003395630046725273, 0.0003570113331079483, 0.0003744596615433693, 0.0003919079899787903, 0.0004093563184142113, 0.00042680464684963226, 0.00044425297528505325, 0.00046170130372047424, 0.00047914963215589523, 0.0004965979605913162, 0.0005140462890267372, 0.0005314946174621582]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 4.0, 6.0, 8.0, 9.0, 9.0, 7.0, 12.0, 19.0, 18.0, 28.0, 21.0, 27.0, 32.0, 45.0, 52.0, 55.0, 44.0, 61.0, 53.0, 35.0, 46.0, 50.0, 45.0, 43.0, 37.0, 34.0, 26.0, 26.0, 34.0, 20.0, 18.0, 16.0, 17.0, 8.0, 11.0, 7.0, 5.0, 8.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.91796875, -4.76983642578125, -4.6217041015625, -4.47357177734375, -4.325439453125, -4.17730712890625, -4.0291748046875, -3.88104248046875, -3.73291015625, -3.58477783203125, -3.4366455078125, -3.28851318359375, -3.140380859375, -2.99224853515625, -2.8441162109375, -2.69598388671875, -2.5478515625, -2.39971923828125, -2.2515869140625, -2.10345458984375, -1.955322265625, -1.80718994140625, -1.6590576171875, -1.51092529296875, -1.36279296875, -1.21466064453125, -1.0665283203125, -0.91839599609375, -0.770263671875, -0.62213134765625, -0.4739990234375, -0.32586669921875, -0.177734375, -0.02960205078125, 0.1185302734375, 0.26666259765625, 0.414794921875, 0.56292724609375, 0.7110595703125, 0.85919189453125, 1.00732421875, 1.15545654296875, 1.3035888671875, 1.45172119140625, 1.599853515625, 1.74798583984375, 1.8961181640625, 2.04425048828125, 2.1923828125, 2.34051513671875, 2.4886474609375, 2.63677978515625, 2.784912109375, 2.93304443359375, 3.0811767578125, 3.22930908203125, 3.37744140625, 3.52557373046875, 3.6737060546875, 3.82183837890625, 3.969970703125, 4.11810302734375, 4.2662353515625, 4.41436767578125, 4.5625]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 6.0, 11.0, 5.0, 10.0, 17.0, 18.0, 21.0, 31.0, 38.0, 53.0, 114.0, 145.0, 253.0, 441.0, 806.0, 1551.0, 3030.0, 5876.0, 11714.0, 25022.0, 63039.0, 204481.0, 423538.0, 198432.0, 61572.0, 24628.0, 11459.0, 5871.0, 2882.0, 1502.0, 857.0, 430.0, 251.0, 164.0, 70.0, 70.0, 48.0, 34.0, 18.0, 17.0, 13.0, 5.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.72265625, -4.5625, -4.40234375, -4.2421875, -4.08203125, -3.921875, -3.76171875, -3.6015625, -3.44140625, -3.28125, -3.12109375, -2.9609375, -2.80078125, -2.640625, -2.48046875, -2.3203125, -2.16015625, -2.0, -1.83984375, -1.6796875, -1.51953125, -1.359375, -1.19921875, -1.0390625, -0.87890625, -0.71875, -0.55859375, -0.3984375, -0.23828125, -0.078125, 0.08203125, 0.2421875, 0.40234375, 0.5625, 0.72265625, 0.8828125, 1.04296875, 1.203125, 1.36328125, 1.5234375, 1.68359375, 1.84375, 2.00390625, 2.1640625, 2.32421875, 2.484375, 2.64453125, 2.8046875, 2.96484375, 3.125, 3.28515625, 3.4453125, 3.60546875, 3.765625, 3.92578125, 4.0859375, 4.24609375, 4.40625, 4.56640625, 4.7265625, 4.88671875, 5.046875, 5.20703125, 5.3671875, 5.52734375]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 6.0, 5.0, 6.0, 2.0, 6.0, 9.0, 13.0, 12.0, 13.0, 20.0, 25.0, 29.0, 34.0, 36.0, 40.0, 48.0, 68.0, 141.0, 251.0, 1511.0, 265.0, 110.0, 73.0, 59.0, 41.0, 27.0, 36.0, 33.0, 29.0, 20.0, 21.0, 17.0, 13.0, 8.0, 8.0, 8.0, 7.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-19.75, -19.1932373046875, -18.636474609375, -18.0797119140625, -17.52294921875, -16.9661865234375, -16.409423828125, -15.8526611328125, -15.2958984375, -14.7391357421875, -14.182373046875, -13.6256103515625, -13.06884765625, -12.5120849609375, -11.955322265625, -11.3985595703125, -10.841796875, -10.2850341796875, -9.728271484375, -9.1715087890625, -8.61474609375, -8.0579833984375, -7.501220703125, -6.9444580078125, -6.3876953125, -5.8309326171875, -5.274169921875, -4.7174072265625, -4.16064453125, -3.6038818359375, -3.047119140625, -2.4903564453125, -1.93359375, -1.3768310546875, -0.820068359375, -0.2633056640625, 0.29345703125, 0.8502197265625, 1.406982421875, 1.9637451171875, 2.5205078125, 3.0772705078125, 3.634033203125, 4.1907958984375, 4.74755859375, 5.3043212890625, 5.861083984375, 6.4178466796875, 6.974609375, 7.5313720703125, 8.088134765625, 8.6448974609375, 9.20166015625, 9.7584228515625, 10.315185546875, 10.8719482421875, 11.4287109375, 11.9854736328125, 12.542236328125, 13.0989990234375, 13.65576171875, 14.2125244140625, 14.769287109375, 15.3260498046875, 15.8828125]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 4.0, 2.0, 4.0, 6.0, 13.0, 14.0, 16.0, 21.0, 29.0, 36.0, 39.0, 77.0, 112.0, 186.0, 271.0, 562.0, 2331.0, 135086.0, 2992731.0, 12180.0, 952.0, 372.0, 226.0, 116.0, 81.0, 64.0, 49.0, 28.0, 24.0, 19.0, 12.0, 9.0, 9.0, 5.0, 5.0, 6.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.65625, -39.31298828125, -37.9697265625, -36.62646484375, -35.283203125, -33.93994140625, -32.5966796875, -31.25341796875, -29.91015625, -28.56689453125, -27.2236328125, -25.88037109375, -24.537109375, -23.19384765625, -21.8505859375, -20.50732421875, -19.1640625, -17.82080078125, -16.4775390625, -15.13427734375, -13.791015625, -12.44775390625, -11.1044921875, -9.76123046875, -8.41796875, -7.07470703125, -5.7314453125, -4.38818359375, -3.044921875, -1.70166015625, -0.3583984375, 0.98486328125, 2.328125, 3.67138671875, 5.0146484375, 6.35791015625, 7.701171875, 9.04443359375, 10.3876953125, 11.73095703125, 13.07421875, 14.41748046875, 15.7607421875, 17.10400390625, 18.447265625, 19.79052734375, 21.1337890625, 22.47705078125, 23.8203125, 25.16357421875, 26.5068359375, 27.85009765625, 29.193359375, 30.53662109375, 31.8798828125, 33.22314453125, 34.56640625, 35.90966796875, 37.2529296875, 38.59619140625, 39.939453125, 41.28271484375, 42.6259765625, 43.96923828125, 45.3125]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 9.0, 34.0, 122.0, 303.0, 315.0, 181.0, 45.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.525617599487305, -26.355003356933594, -24.18438720703125, -22.01377296447754, -19.843158721923828, -17.672542572021484, -15.501928329467773, -13.331313133239746, -11.160697937011719, -8.990082740783691, -6.819468021392822, -4.648853302001953, -2.478238105773926, -0.30762290954589844, 1.8629913330078125, 4.03360652923584, 6.204221725463867, 8.374836921691895, 10.545452117919922, 12.716066360473633, 14.88668155670166, 17.057296752929688, 19.2279109954834, 21.39852523803711, 23.569141387939453, 25.739755630493164, 27.910371780395508, 30.08098602294922, 32.25160217285156, 34.422218322753906, 36.592830657958984, 38.76344680786133, 40.93406677246094, 43.10468292236328, 45.27529525756836, 47.4459114074707, 49.61652755737305, 51.787139892578125, 53.95775604248047, 56.12837219238281, 58.298988342285156, 60.4696044921875, 62.64021682739258, 64.81083679199219, 66.9814453125, 69.15206146240234, 71.32267761230469, 73.49329376220703, 75.66390991210938, 77.83452606201172, 80.00514221191406, 82.17575073242188, 84.34636688232422, 86.51698303222656, 88.6875991821289, 90.85821533203125, 93.02882385253906, 95.1994400024414, 97.37005615234375, 99.54066467285156, 101.7112808227539, 103.88189697265625, 106.0525131225586, 108.22312927246094, 110.39374542236328]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 4.0, 5.0, 8.0, 8.0, 12.0, 16.0, 12.0, 14.0, 19.0, 26.0, 18.0, 29.0, 28.0, 36.0, 32.0, 35.0, 38.0, 44.0, 46.0, 39.0, 37.0, 37.0, 47.0, 45.0, 37.0, 42.0, 29.0, 33.0, 30.0, 31.0, 29.0, 25.0, 15.0, 18.0, 13.0, 15.0, 13.0, 12.0, 7.0, 4.0, 7.0, 2.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-39.598392486572266, -38.39763641357422, -37.19688034057617, -35.99612045288086, -34.79536437988281, -33.594608306884766, -32.39385223388672, -31.193096160888672, -29.992338180541992, -28.791582107543945, -27.590824127197266, -26.39006805419922, -25.189311981201172, -23.988554000854492, -22.787797927856445, -21.587039947509766, -20.38628387451172, -19.185527801513672, -17.984769821166992, -16.784013748168945, -15.583256721496582, -14.382499694824219, -13.181743621826172, -11.980986595153809, -10.780229568481445, -9.579472541809082, -8.378715515136719, -7.177959442138672, -5.977202415466309, -4.776445388793945, -3.5756888389587402, -2.374932289123535, -1.1741714477539062, 0.02658534049987793, 1.227342128753662, 2.4280989170074463, 3.6288557052612305, 4.829612731933594, 6.030369281768799, 7.231125831604004, 8.431882858276367, 9.63263988494873, 10.833396911621094, 12.03415298461914, 13.234910011291504, 14.435667037963867, 15.636423110961914, 16.837181091308594, 18.03793716430664, 19.238693237304688, 20.439451217651367, 21.640207290649414, 22.840965270996094, 24.04172134399414, 25.242477416992188, 26.443233489990234, 27.643991470336914, 28.84474754333496, 30.04550552368164, 31.246261596679688, 32.447017669677734, 33.64777374267578, 34.848533630371094, 36.04928970336914, 37.25004577636719]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 6.0, 10.0, 5.0, 11.0, 2.0, 7.0, 18.0, 18.0, 22.0, 40.0, 26.0, 31.0, 41.0, 40.0, 45.0, 52.0, 37.0, 51.0, 67.0, 44.0, 49.0, 42.0, 31.0, 41.0, 47.0, 31.0, 34.0, 22.0, 27.0, 18.0, 20.0, 13.0, 8.0, 15.0, 10.0, 9.0, 3.0, 6.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.17578125, -5.01776123046875, -4.8597412109375, -4.70172119140625, -4.543701171875, -4.38568115234375, -4.2276611328125, -4.06964111328125, -3.91162109375, -3.75360107421875, -3.5955810546875, -3.43756103515625, -3.279541015625, -3.12152099609375, -2.9635009765625, -2.80548095703125, -2.6474609375, -2.48944091796875, -2.3314208984375, -2.17340087890625, -2.015380859375, -1.85736083984375, -1.6993408203125, -1.54132080078125, -1.38330078125, -1.22528076171875, -1.0672607421875, -0.90924072265625, -0.751220703125, -0.59320068359375, -0.4351806640625, -0.27716064453125, -0.119140625, 0.03887939453125, 0.1968994140625, 0.35491943359375, 0.512939453125, 0.67095947265625, 0.8289794921875, 0.98699951171875, 1.14501953125, 1.30303955078125, 1.4610595703125, 1.61907958984375, 1.777099609375, 1.93511962890625, 2.0931396484375, 2.25115966796875, 2.4091796875, 2.56719970703125, 2.7252197265625, 2.88323974609375, 3.041259765625, 3.19927978515625, 3.3572998046875, 3.51531982421875, 3.67333984375, 3.83135986328125, 3.9893798828125, 4.14739990234375, 4.305419921875, 4.46343994140625, 4.6214599609375, 4.77947998046875, 4.9375]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 4.0, 4.0, 8.0, 9.0, 17.0, 13.0, 14.0, 16.0, 26.0, 31.0, 37.0, 54.0, 46.0, 75.0, 109.0, 142.0, 343.0, 1062.0, 9354.0, 375798.0, 3552837.0, 245623.0, 6969.0, 860.0, 264.0, 125.0, 103.0, 63.0, 63.0, 33.0, 38.0, 35.0, 30.0, 22.0, 13.0, 8.0, 11.0, 7.0, 8.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.4375, -20.796630859375, -20.15576171875, -19.514892578125, -18.8740234375, -18.233154296875, -17.59228515625, -16.951416015625, -16.310546875, -15.669677734375, -15.02880859375, -14.387939453125, -13.7470703125, -13.106201171875, -12.46533203125, -11.824462890625, -11.18359375, -10.542724609375, -9.90185546875, -9.260986328125, -8.6201171875, -7.979248046875, -7.33837890625, -6.697509765625, -6.056640625, -5.415771484375, -4.77490234375, -4.134033203125, -3.4931640625, -2.852294921875, -2.21142578125, -1.570556640625, -0.9296875, -0.288818359375, 0.35205078125, 0.992919921875, 1.6337890625, 2.274658203125, 2.91552734375, 3.556396484375, 4.197265625, 4.838134765625, 5.47900390625, 6.119873046875, 6.7607421875, 7.401611328125, 8.04248046875, 8.683349609375, 9.32421875, 9.965087890625, 10.60595703125, 11.246826171875, 11.8876953125, 12.528564453125, 13.16943359375, 13.810302734375, 14.451171875, 15.092041015625, 15.73291015625, 16.373779296875, 17.0146484375, 17.655517578125, 18.29638671875, 18.937255859375, 19.578125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 5.0, 7.0, 12.0, 18.0, 26.0, 51.0, 73.0, 128.0, 215.0, 385.0, 580.0, 738.0, 704.0, 430.0, 297.0, 174.0, 109.0, 59.0, 26.0, 16.0, 16.0, 6.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.765625, -13.185302734375, -12.60498046875, -12.024658203125, -11.4443359375, -10.864013671875, -10.28369140625, -9.703369140625, -9.123046875, -8.542724609375, -7.96240234375, -7.382080078125, -6.8017578125, -6.221435546875, -5.64111328125, -5.060791015625, -4.48046875, -3.900146484375, -3.31982421875, -2.739501953125, -2.1591796875, -1.578857421875, -0.99853515625, -0.418212890625, 0.162109375, 0.742431640625, 1.32275390625, 1.903076171875, 2.4833984375, 3.063720703125, 3.64404296875, 4.224365234375, 4.8046875, 5.385009765625, 5.96533203125, 6.545654296875, 7.1259765625, 7.706298828125, 8.28662109375, 8.866943359375, 9.447265625, 10.027587890625, 10.60791015625, 11.188232421875, 11.7685546875, 12.348876953125, 12.92919921875, 13.509521484375, 14.08984375, 14.670166015625, 15.25048828125, 15.830810546875, 16.4111328125, 16.991455078125, 17.57177734375, 18.152099609375, 18.732421875, 19.312744140625, 19.89306640625, 20.473388671875, 21.0537109375, 21.634033203125, 22.21435546875, 22.794677734375, 23.375]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 5.0, 13.0, 16.0, 27.0, 52.0, 83.0, 193.0, 389.0, 867.0, 8311.0, 2760090.0, 1417664.0, 5061.0, 791.0, 330.0, 169.0, 102.0, 55.0, 21.0, 23.0, 11.0, 2.0, 8.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-65.4375, -63.8388671875, -62.240234375, -60.6416015625, -59.04296875, -57.4443359375, -55.845703125, -54.2470703125, -52.6484375, -51.0498046875, -49.451171875, -47.8525390625, -46.25390625, -44.6552734375, -43.056640625, -41.4580078125, -39.859375, -38.2607421875, -36.662109375, -35.0634765625, -33.46484375, -31.8662109375, -30.267578125, -28.6689453125, -27.0703125, -25.4716796875, -23.873046875, -22.2744140625, -20.67578125, -19.0771484375, -17.478515625, -15.8798828125, -14.28125, -12.6826171875, -11.083984375, -9.4853515625, -7.88671875, -6.2880859375, -4.689453125, -3.0908203125, -1.4921875, 0.1064453125, 1.705078125, 3.3037109375, 4.90234375, 6.5009765625, 8.099609375, 9.6982421875, 11.296875, 12.8955078125, 14.494140625, 16.0927734375, 17.69140625, 19.2900390625, 20.888671875, 22.4873046875, 24.0859375, 25.6845703125, 27.283203125, 28.8818359375, 30.48046875, 32.0791015625, 33.677734375, 35.2763671875, 36.875]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 6.0, 11.0, 13.0, 23.0, 25.0, 36.0, 50.0, 51.0, 64.0, 60.0, 82.0, 86.0, 70.0, 74.0, 77.0, 63.0, 51.0, 27.0, 33.0, 32.0, 20.0, 12.0, 13.0, 10.0, 7.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.672157287597656, -34.4562873840332, -33.240421295166016, -32.02455139160156, -30.80868148803711, -29.59281349182129, -28.37694549560547, -27.161075592041016, -25.945207595825195, -24.729339599609375, -23.513469696044922, -22.2976016998291, -21.08173370361328, -19.865863800048828, -18.649995803833008, -17.434127807617188, -16.218257904052734, -15.002388954162598, -13.786520004272461, -12.57065200805664, -11.354783058166504, -10.138914108276367, -8.923046112060547, -7.70717716217041, -6.491308212280273, -5.275439262390137, -4.059570789337158, -2.8437020778656006, -1.627833366394043, -0.41196441650390625, 0.8039040565490723, 2.019772529602051, 3.235645294189453, 4.45151424407959, 5.667382717132568, 6.883251190185547, 8.099120140075684, 9.31498908996582, 10.53085708618164, 11.746726036071777, 12.962594985961914, 14.17846393585205, 15.394332885742188, 16.610200881958008, 17.826068878173828, 19.04193878173828, 20.2578067779541, 21.473674774169922, 22.689544677734375, 23.905412673950195, 25.12128257751465, 26.33715057373047, 27.553020477294922, 28.768888473510742, 29.984756469726562, 31.200626373291016, 32.41649627685547, 33.63236618041992, 34.84823226928711, 36.06410217285156, 37.279972076416016, 38.49584197998047, 39.711708068847656, 40.92757797241211, 42.1434440612793]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 11.0, 9.0, 12.0, 23.0, 14.0, 21.0, 17.0, 27.0, 18.0, 37.0, 33.0, 36.0, 26.0, 34.0, 51.0, 33.0, 46.0, 38.0, 37.0, 43.0, 49.0, 43.0, 28.0, 41.0, 35.0, 31.0, 22.0, 35.0, 23.0, 20.0, 23.0, 12.0, 12.0, 6.0, 9.0, 12.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-37.93120574951172, -36.79109191894531, -35.650978088378906, -34.5108642578125, -33.370750427246094, -32.23063659667969, -31.090524673461914, -29.950410842895508, -28.8102970123291, -27.670183181762695, -26.53006935119629, -25.389955520629883, -24.24984359741211, -23.109729766845703, -21.969615936279297, -20.82950210571289, -19.689388275146484, -18.549274444580078, -17.409160614013672, -16.269046783447266, -15.128933906555176, -13.98882007598877, -12.84870719909668, -11.708593368530273, -10.568479537963867, -9.428365707397461, -8.288251876831055, -7.148138999938965, -6.008025169372559, -4.867911338806152, -3.7277979850769043, -2.5876846313476562, -1.4475746154785156, -0.3074610233306885, 0.8326525688171387, 1.9727661609649658, 3.112879753112793, 4.252993583679199, 5.393106937408447, 6.533220291137695, 7.673334121704102, 8.813447952270508, 9.953561782836914, 11.093674659729004, 12.23378849029541, 13.373902320861816, 14.514015197753906, 15.654129028320312, 16.79424285888672, 17.934356689453125, 19.07447052001953, 20.214584350585938, 21.354698181152344, 22.49481201171875, 23.634923934936523, 24.77503776550293, 25.915151596069336, 27.055265426635742, 28.19537925720215, 29.335493087768555, 30.475605010986328, 31.615718841552734, 32.75583267211914, 33.89594650268555, 35.03606033325195]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 9.0, 8.0, 8.0, 9.0, 9.0, 14.0, 14.0, 25.0, 23.0, 34.0, 46.0, 46.0, 40.0, 46.0, 50.0, 55.0, 39.0, 36.0, 46.0, 54.0, 41.0, 44.0, 35.0, 32.0, 48.0, 36.0, 23.0, 20.0, 20.0, 18.0, 15.0, 12.0, 4.0, 9.0, 4.0, 12.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.16796875, -5.00970458984375, -4.8514404296875, -4.69317626953125, -4.534912109375, -4.37664794921875, -4.2183837890625, -4.06011962890625, -3.90185546875, -3.74359130859375, -3.5853271484375, -3.42706298828125, -3.268798828125, -3.11053466796875, -2.9522705078125, -2.79400634765625, -2.6357421875, -2.47747802734375, -2.3192138671875, -2.16094970703125, -2.002685546875, -1.84442138671875, -1.6861572265625, -1.52789306640625, -1.36962890625, -1.21136474609375, -1.0531005859375, -0.89483642578125, -0.736572265625, -0.57830810546875, -0.4200439453125, -0.26177978515625, -0.103515625, 0.05474853515625, 0.2130126953125, 0.37127685546875, 0.529541015625, 0.68780517578125, 0.8460693359375, 1.00433349609375, 1.16259765625, 1.32086181640625, 1.4791259765625, 1.63739013671875, 1.795654296875, 1.95391845703125, 2.1121826171875, 2.27044677734375, 2.4287109375, 2.58697509765625, 2.7452392578125, 2.90350341796875, 3.061767578125, 3.22003173828125, 3.3782958984375, 3.53656005859375, 3.69482421875, 3.85308837890625, 4.0113525390625, 4.16961669921875, 4.327880859375, 4.48614501953125, 4.6444091796875, 4.80267333984375, 4.9609375]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 5.0, 10.0, 10.0, 15.0, 21.0, 45.0, 59.0, 86.0, 149.0, 220.0, 326.0, 541.0, 844.0, 1331.0, 2026.0, 3424.0, 5282.0, 8457.0, 13616.0, 22628.0, 36920.0, 59712.0, 95604.0, 147922.0, 196096.0, 164943.0, 108651.0, 68596.0, 42749.0, 26209.0, 16124.0, 9801.0, 6028.0, 3671.0, 2369.0, 1479.0, 919.0, 600.0, 388.0, 237.0, 153.0, 106.0, 68.0, 39.0, 23.0, 27.0, 14.0, 12.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.35888671875, -0.34717559814453125, -0.3354644775390625, -0.32375335693359375, -0.312042236328125, -0.30033111572265625, -0.2886199951171875, -0.27690887451171875, -0.26519775390625, -0.25348663330078125, -0.2417755126953125, -0.23006439208984375, -0.218353271484375, -0.20664215087890625, -0.1949310302734375, -0.18321990966796875, -0.1715087890625, -0.15979766845703125, -0.1480865478515625, -0.13637542724609375, -0.124664306640625, -0.11295318603515625, -0.1012420654296875, -0.08953094482421875, -0.07781982421875, -0.06610870361328125, -0.0543975830078125, -0.04268646240234375, -0.030975341796875, -0.01926422119140625, -0.0075531005859375, 0.00415802001953125, 0.015869140625, 0.02758026123046875, 0.0392913818359375, 0.05100250244140625, 0.062713623046875, 0.07442474365234375, 0.0861358642578125, 0.09784698486328125, 0.10955810546875, 0.12126922607421875, 0.1329803466796875, 0.14469146728515625, 0.156402587890625, 0.16811370849609375, 0.1798248291015625, 0.19153594970703125, 0.2032470703125, 0.21495819091796875, 0.2266693115234375, 0.23838043212890625, 0.250091552734375, 0.26180267333984375, 0.2735137939453125, 0.28522491455078125, 0.29693603515625, 0.30864715576171875, 0.3203582763671875, 0.33206939697265625, 0.343780517578125, 0.35549163818359375, 0.3672027587890625, 0.37891387939453125, 0.390625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 3.0, 3.0, 4.0, 3.0, 8.0, 5.0, 9.0, 5.0, 9.0, 12.0, 17.0, 21.0, 22.0, 26.0, 33.0, 33.0, 29.0, 27.0, 31.0, 39.0, 40.0, 38.0, 40.0, 46.0, 1057.0, 43.0, 45.0, 31.0, 33.0, 47.0, 37.0, 27.0, 23.0, 22.0, 25.0, 21.0, 19.0, 15.0, 9.0, 11.0, 11.0, 13.0, 11.0, 6.0, 8.0, 6.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.578125, -2.492950439453125, -2.40777587890625, -2.322601318359375, -2.2374267578125, -2.152252197265625, -2.06707763671875, -1.981903076171875, -1.896728515625, -1.811553955078125, -1.72637939453125, -1.641204833984375, -1.5560302734375, -1.470855712890625, -1.38568115234375, -1.300506591796875, -1.21533203125, -1.130157470703125, -1.04498291015625, -0.959808349609375, -0.8746337890625, -0.789459228515625, -0.70428466796875, -0.619110107421875, -0.533935546875, -0.448760986328125, -0.36358642578125, -0.278411865234375, -0.1932373046875, -0.108062744140625, -0.02288818359375, 0.062286376953125, 0.1474609375, 0.232635498046875, 0.31781005859375, 0.402984619140625, 0.4881591796875, 0.573333740234375, 0.65850830078125, 0.743682861328125, 0.828857421875, 0.914031982421875, 0.99920654296875, 1.084381103515625, 1.1695556640625, 1.254730224609375, 1.33990478515625, 1.425079345703125, 1.51025390625, 1.595428466796875, 1.68060302734375, 1.765777587890625, 1.8509521484375, 1.936126708984375, 2.02130126953125, 2.106475830078125, 2.191650390625, 2.276824951171875, 2.36199951171875, 2.447174072265625, 2.5323486328125, 2.617523193359375, 2.70269775390625, 2.787872314453125, 2.873046875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 6.0, 4.0, 10.0, 13.0, 19.0, 34.0, 35.0, 60.0, 99.0, 123.0, 218.0, 298.0, 471.0, 633.0, 974.0, 1500.0, 2274.0, 3240.0, 4828.0, 7398.0, 10955.0, 16239.0, 24049.0, 35991.0, 53282.0, 79037.0, 114201.0, 151178.0, 1200487.0, 122739.0, 87554.0, 58947.0, 39726.0, 26355.0, 17846.0, 11899.0, 7986.0, 5326.0, 3643.0, 2457.0, 1623.0, 1055.0, 797.0, 552.0, 296.0, 234.0, 157.0, 96.0, 62.0, 35.0, 37.0, 19.0, 19.0, 8.0, 10.0, 6.0, 3.0, 2.0, 1.0], "bins": [-0.261962890625, -0.25405311584472656, -0.24614334106445312, -0.2382335662841797, -0.23032379150390625, -0.2224140167236328, -0.21450424194335938, -0.20659446716308594, -0.1986846923828125, -0.19077491760253906, -0.18286514282226562, -0.1749553680419922, -0.16704559326171875, -0.1591358184814453, -0.15122604370117188, -0.14331626892089844, -0.135406494140625, -0.12749671936035156, -0.11958694458007812, -0.11167716979980469, -0.10376739501953125, -0.09585762023925781, -0.08794784545898438, -0.08003807067871094, -0.0721282958984375, -0.06421852111816406, -0.056308746337890625, -0.04839897155761719, -0.04048919677734375, -0.03257942199707031, -0.024669647216796875, -0.016759872436523438, -0.00885009765625, -0.0009403228759765625, 0.006969451904296875, 0.014879226684570312, 0.02278900146484375, 0.030698776245117188, 0.038608551025390625, 0.04651832580566406, 0.0544281005859375, 0.06233787536621094, 0.07024765014648438, 0.07815742492675781, 0.08606719970703125, 0.09397697448730469, 0.10188674926757812, 0.10979652404785156, 0.117706298828125, 0.12561607360839844, 0.13352584838867188, 0.1414356231689453, 0.14934539794921875, 0.1572551727294922, 0.16516494750976562, 0.17307472229003906, 0.1809844970703125, 0.18889427185058594, 0.19680404663085938, 0.2047138214111328, 0.21262359619140625, 0.2205333709716797, 0.22844314575195312, 0.23635292053222656, 0.2442626953125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 5.0, 9.0, 8.0, 6.0, 8.0, 6.0, 11.0, 17.0, 16.0, 22.0, 17.0, 27.0, 39.0, 26.0, 39.0, 35.0, 41.0, 41.0, 53.0, 53.0, 58.0, 42.0, 41.0, 48.0, 58.0, 38.0, 40.0, 37.0, 36.0, 21.0, 18.0, 13.0, 9.0, 13.0, 17.0, 10.0, 1.0, 4.0, 5.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.000743865966796875, -0.0007175654172897339, -0.0006912648677825928, -0.0006649643182754517, -0.0006386637687683105, -0.0006123632192611694, -0.0005860626697540283, -0.0005597621202468872, -0.0005334615707397461, -0.000507161021232605, -0.00048086047172546387, -0.00045455992221832275, -0.00042825937271118164, -0.00040195882320404053, -0.0003756582736968994, -0.0003493577241897583, -0.0003230571746826172, -0.0002967566251754761, -0.00027045607566833496, -0.00024415552616119385, -0.00021785497665405273, -0.00019155442714691162, -0.0001652538776397705, -0.0001389533281326294, -0.00011265277862548828, -8.635222911834717e-05, -6.0051679611206055e-05, -3.375113010406494e-05, -7.450580596923828e-06, 1.8849968910217285e-05, 4.51505184173584e-05, 7.145106792449951e-05, 9.775161743164062e-05, 0.00012405216693878174, 0.00015035271644592285, 0.00017665326595306396, 0.00020295381546020508, 0.0002292543649673462, 0.0002555549144744873, 0.0002818554639816284, 0.00030815601348876953, 0.00033445656299591064, 0.00036075711250305176, 0.00038705766201019287, 0.000413358211517334, 0.0004396587610244751, 0.0004659593105316162, 0.0004922598600387573, 0.0005185604095458984, 0.0005448609590530396, 0.0005711615085601807, 0.0005974620580673218, 0.0006237626075744629, 0.000650063157081604, 0.0006763637065887451, 0.0007026642560958862, 0.0007289648056030273, 0.0007552653551101685, 0.0007815659046173096, 0.0008078664541244507, 0.0008341670036315918, 0.0008604675531387329, 0.000886768102645874, 0.0009130686521530151, 0.0009393692016601562]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 5.0, 3.0, 2.0, 5.0, 9.0, 11.0, 13.0, 14.0, 25.0, 31.0, 34.0, 40.0, 67.0, 106.0, 156.0, 202.0, 311.0, 511.0, 1421.0, 37662.0, 946986.0, 57628.0, 1739.0, 540.0, 318.0, 183.0, 125.0, 103.0, 68.0, 44.0, 45.0, 30.0, 21.0, 21.0, 26.0, 13.0, 6.0, 8.0, 11.0, 6.0, 0.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01715087890625, -0.016654372215270996, -0.016157865524291992, -0.01566135883331299, -0.015164852142333984, -0.01466834545135498, -0.014171838760375977, -0.013675332069396973, -0.013178825378417969, -0.012682318687438965, -0.012185811996459961, -0.011689305305480957, -0.011192798614501953, -0.01069629192352295, -0.010199785232543945, -0.009703278541564941, -0.009206771850585938, -0.008710265159606934, -0.00821375846862793, -0.007717251777648926, -0.007220745086669922, -0.006724238395690918, -0.006227731704711914, -0.00573122501373291, -0.005234718322753906, -0.004738211631774902, -0.0042417049407958984, -0.0037451982498168945, -0.0032486915588378906, -0.0027521848678588867, -0.002255678176879883, -0.001759171485900879, -0.001262664794921875, -0.0007661581039428711, -0.0002696514129638672, 0.00022685527801513672, 0.0007233619689941406, 0.0012198686599731445, 0.0017163753509521484, 0.0022128820419311523, 0.0027093887329101562, 0.00320589542388916, 0.003702402114868164, 0.004198908805847168, 0.004695415496826172, 0.005191922187805176, 0.00568842887878418, 0.006184935569763184, 0.0066814422607421875, 0.007177948951721191, 0.007674455642700195, 0.0081709623336792, 0.008667469024658203, 0.009163975715637207, 0.009660482406616211, 0.010156989097595215, 0.010653495788574219, 0.011150002479553223, 0.011646509170532227, 0.01214301586151123, 0.012639522552490234, 0.013136029243469238, 0.013632535934448242, 0.014129042625427246, 0.01462554931640625]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 12.0, 21.0, 36.0, 73.0, 91.0, 151.0, 152.0, 155.0, 107.0, 87.0, 50.0, 25.0, 20.0, 8.0, 8.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012561699841171503, -0.0012219747295603156, -0.0011877795914188027, -0.001153584336861968, -0.0011193891987204552, -0.0010851939441636205, -0.0010509986896067858, -0.001016803551465273, -0.0009826082969084382, -0.0009484131005592644, -0.0009142179042100906, -0.0008800226496532559, -0.0008458274533040822, -0.0008116322569549084, -0.0007774370606057346, -0.0007432418642565608, -0.000709046667907387, -0.0006748514715582132, -0.0006406562752090394, -0.0006064610788598657, -0.000572265824303031, -0.0005380706279538572, -0.0005038754316046834, -0.0004696802352555096, -0.0004354850098025054, -0.0004012898134533316, -0.00036709458800032735, -0.00033289939165115356, -0.0002987041953019798, -0.00026450896984897554, -0.00023031377349980175, -0.00019611856259871274, -0.00016192340990528464, -0.00012772819900419563, -9.353299537906423e-05, -5.9337791753932834e-05, -2.514258085284382e-05, 9.052630048245192e-06, 4.3247826397418976e-05, 7.744303729850799e-05, 0.000111638248199597, 0.00014583345910068601, 0.00018002867000177503, 0.0002142238663509488, 0.0002484190627001226, 0.00028261428815312684, 0.0003168094845023006, 0.00035100470995530486, 0.00038519990630447865, 0.00041939510265365243, 0.00045359032810665667, 0.00048778552445583045, 0.0005219807499088347, 0.0005561759462580085, 0.0005903711426071823, 0.000624566338956356, 0.0006587615935131907, 0.0006929567898623645, 0.0007271519862115383, 0.000761347240768373, 0.0007955424371175468, 0.0008297376334667206, 0.0008639328298158944, 0.0008981280261650681, 0.0009323232225142419]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 3.0, 4.0, 6.0, 5.0, 14.0, 6.0, 13.0, 16.0, 13.0, 14.0, 12.0, 24.0, 21.0, 36.0, 21.0, 26.0, 26.0, 36.0, 36.0, 38.0, 38.0, 51.0, 33.0, 49.0, 37.0, 38.0, 40.0, 39.0, 33.0, 33.0, 30.0, 27.0, 24.0, 29.0, 22.0, 26.0, 14.0, 15.0, 9.0, 12.0, 10.0, 3.0, 5.0, 3.0, 5.0, 8.0, 3.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.00047218799591064453, -0.0004574321210384369, -0.00044267624616622925, -0.0004279203712940216, -0.00041316449642181396, -0.0003984086215496063, -0.0003836527466773987, -0.00036889687180519104, -0.0003541409969329834, -0.00033938512206077576, -0.0003246292471885681, -0.0003098733723163605, -0.00029511749744415283, -0.0002803616225719452, -0.00026560574769973755, -0.0002508498728275299, -0.00023609399795532227, -0.00022133812308311462, -0.00020658224821090698, -0.00019182637333869934, -0.0001770704984664917, -0.00016231462359428406, -0.00014755874872207642, -0.00013280287384986877, -0.00011804699897766113, -0.00010329112410545349, -8.853524923324585e-05, -7.377937436103821e-05, -5.9023499488830566e-05, -4.4267624616622925e-05, -2.9511749744415283e-05, -1.4755874872207642e-05, 0.0, 1.4755874872207642e-05, 2.9511749744415283e-05, 4.4267624616622925e-05, 5.9023499488830566e-05, 7.377937436103821e-05, 8.853524923324585e-05, 0.00010329112410545349, 0.00011804699897766113, 0.00013280287384986877, 0.00014755874872207642, 0.00016231462359428406, 0.0001770704984664917, 0.00019182637333869934, 0.00020658224821090698, 0.00022133812308311462, 0.00023609399795532227, 0.0002508498728275299, 0.00026560574769973755, 0.0002803616225719452, 0.00029511749744415283, 0.0003098733723163605, 0.0003246292471885681, 0.00033938512206077576, 0.0003541409969329834, 0.00036889687180519104, 0.0003836527466773987, 0.0003984086215496063, 0.00041316449642181396, 0.0004279203712940216, 0.00044267624616622925, 0.0004574321210384369, 0.00047218799591064453]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 9.0, 8.0, 8.0, 9.0, 9.0, 14.0, 14.0, 25.0, 23.0, 34.0, 46.0, 46.0, 40.0, 46.0, 50.0, 55.0, 39.0, 36.0, 46.0, 54.0, 41.0, 44.0, 35.0, 32.0, 48.0, 36.0, 23.0, 20.0, 20.0, 18.0, 15.0, 12.0, 4.0, 9.0, 4.0, 12.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.16796875, -5.00970458984375, -4.8514404296875, -4.69317626953125, -4.534912109375, -4.37664794921875, -4.2183837890625, -4.06011962890625, -3.90185546875, -3.74359130859375, -3.5853271484375, -3.42706298828125, -3.268798828125, -3.11053466796875, -2.9522705078125, -2.79400634765625, -2.6357421875, -2.47747802734375, -2.3192138671875, -2.16094970703125, -2.002685546875, -1.84442138671875, -1.6861572265625, -1.52789306640625, -1.36962890625, -1.21136474609375, -1.0531005859375, -0.89483642578125, -0.736572265625, -0.57830810546875, -0.4200439453125, -0.26177978515625, -0.103515625, 0.05474853515625, 0.2130126953125, 0.37127685546875, 0.529541015625, 0.68780517578125, 0.8460693359375, 1.00433349609375, 1.16259765625, 1.32086181640625, 1.4791259765625, 1.63739013671875, 1.795654296875, 1.95391845703125, 2.1121826171875, 2.27044677734375, 2.4287109375, 2.58697509765625, 2.7452392578125, 2.90350341796875, 3.061767578125, 3.22003173828125, 3.3782958984375, 3.53656005859375, 3.69482421875, 3.85308837890625, 4.0113525390625, 4.16961669921875, 4.327880859375, 4.48614501953125, 4.6444091796875, 4.80267333984375, 4.9609375]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 4.0, 2.0, 9.0, 7.0, 21.0, 30.0, 28.0, 38.0, 76.0, 90.0, 168.0, 240.0, 425.0, 739.0, 1494.0, 2794.0, 5419.0, 10302.0, 19674.0, 37582.0, 79369.0, 200567.0, 358969.0, 182696.0, 73334.0, 35331.0, 18432.0, 9734.0, 5196.0, 2689.0, 1357.0, 730.0, 406.0, 216.0, 114.0, 87.0, 57.0, 35.0, 22.0, 10.0, 16.0, 13.0, 6.0, 4.0, 3.0, 4.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.03125, -3.903076171875, -3.77490234375, -3.646728515625, -3.5185546875, -3.390380859375, -3.26220703125, -3.134033203125, -3.005859375, -2.877685546875, -2.74951171875, -2.621337890625, -2.4931640625, -2.364990234375, -2.23681640625, -2.108642578125, -1.98046875, -1.852294921875, -1.72412109375, -1.595947265625, -1.4677734375, -1.339599609375, -1.21142578125, -1.083251953125, -0.955078125, -0.826904296875, -0.69873046875, -0.570556640625, -0.4423828125, -0.314208984375, -0.18603515625, -0.057861328125, 0.0703125, 0.198486328125, 0.32666015625, 0.454833984375, 0.5830078125, 0.711181640625, 0.83935546875, 0.967529296875, 1.095703125, 1.223876953125, 1.35205078125, 1.480224609375, 1.6083984375, 1.736572265625, 1.86474609375, 1.992919921875, 2.12109375, 2.249267578125, 2.37744140625, 2.505615234375, 2.6337890625, 2.761962890625, 2.89013671875, 3.018310546875, 3.146484375, 3.274658203125, 3.40283203125, 3.531005859375, 3.6591796875, 3.787353515625, 3.91552734375, 4.043701171875, 4.171875]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 7.0, 5.0, 9.0, 13.0, 14.0, 13.0, 14.0, 20.0, 23.0, 28.0, 33.0, 34.0, 40.0, 41.0, 53.0, 39.0, 64.0, 100.0, 166.0, 1510.0, 228.0, 127.0, 56.0, 53.0, 46.0, 43.0, 30.0, 41.0, 35.0, 29.0, 23.0, 22.0, 19.0, 19.0, 9.0, 10.0, 6.0, 9.0, 3.0, 7.0, 3.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.1875, -17.6474609375, -17.107421875, -16.5673828125, -16.02734375, -15.4873046875, -14.947265625, -14.4072265625, -13.8671875, -13.3271484375, -12.787109375, -12.2470703125, -11.70703125, -11.1669921875, -10.626953125, -10.0869140625, -9.546875, -9.0068359375, -8.466796875, -7.9267578125, -7.38671875, -6.8466796875, -6.306640625, -5.7666015625, -5.2265625, -4.6865234375, -4.146484375, -3.6064453125, -3.06640625, -2.5263671875, -1.986328125, -1.4462890625, -0.90625, -0.3662109375, 0.173828125, 0.7138671875, 1.25390625, 1.7939453125, 2.333984375, 2.8740234375, 3.4140625, 3.9541015625, 4.494140625, 5.0341796875, 5.57421875, 6.1142578125, 6.654296875, 7.1943359375, 7.734375, 8.2744140625, 8.814453125, 9.3544921875, 9.89453125, 10.4345703125, 10.974609375, 11.5146484375, 12.0546875, 12.5947265625, 13.134765625, 13.6748046875, 14.21484375, 14.7548828125, 15.294921875, 15.8349609375, 16.375]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 3.0, 6.0, 7.0, 13.0, 18.0, 18.0, 59.0, 99.0, 211.0, 586.0, 8588.0, 3132816.0, 2477.0, 411.0, 170.0, 84.0, 46.0, 31.0, 27.0, 9.0, 11.0, 7.0, 2.0, 4.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.3125, -113.1748046875, -109.037109375, -104.8994140625, -100.76171875, -96.6240234375, -92.486328125, -88.3486328125, -84.2109375, -80.0732421875, -75.935546875, -71.7978515625, -67.66015625, -63.5224609375, -59.384765625, -55.2470703125, -51.109375, -46.9716796875, -42.833984375, -38.6962890625, -34.55859375, -30.4208984375, -26.283203125, -22.1455078125, -18.0078125, -13.8701171875, -9.732421875, -5.5947265625, -1.45703125, 2.6806640625, 6.818359375, 10.9560546875, 15.09375, 19.2314453125, 23.369140625, 27.5068359375, 31.64453125, 35.7822265625, 39.919921875, 44.0576171875, 48.1953125, 52.3330078125, 56.470703125, 60.6083984375, 64.74609375, 68.8837890625, 73.021484375, 77.1591796875, 81.296875, 85.4345703125, 89.572265625, 93.7099609375, 97.84765625, 101.9853515625, 106.123046875, 110.2607421875, 114.3984375, 118.5361328125, 122.673828125, 126.8115234375, 130.94921875, 135.0869140625, 139.224609375, 143.3623046875, 147.5]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1010.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.05943298339844, -64.73582458496094, -46.41221237182617, -28.088600158691406, -9.764991760253906, 8.558616638183594, 26.882232666015625, 45.205841064453125, 63.529449462890625, 81.85305786132812, 100.17666625976562, 118.50028228759766, 136.82388305664062, 155.14749145507812, 173.4711151123047, 191.7947235107422, 210.1183319091797, 228.4419403076172, 246.7655487060547, 265.08917236328125, 283.41278076171875, 301.73638916015625, 320.05999755859375, 338.38360595703125, 356.70721435546875, 375.03082275390625, 393.35443115234375, 411.67803955078125, 430.00164794921875, 448.32525634765625, 466.64886474609375, 484.97247314453125, 503.29608154296875, 521.6196899414062, 539.9432983398438, 558.2669067382812, 576.5905151367188, 594.9141235351562, 613.2377319335938, 631.5613403320312, 649.8849487304688, 668.2085571289062, 686.5321655273438, 704.8557739257812, 723.1793823242188, 741.5029907226562, 759.8265991210938, 778.1502075195312, 796.473876953125, 814.7974853515625, 833.12109375, 851.4447021484375, 869.768310546875, 888.0919189453125, 906.41552734375, 924.7391357421875, 943.062744140625, 961.3863525390625, 979.7099609375, 998.0335693359375, 1016.357177734375, 1034.6807861328125, 1053.00439453125, 1071.3280029296875, 1089.651611328125]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 7.0, 13.0, 6.0, 12.0, 17.0, 21.0, 17.0, 23.0, 25.0, 30.0, 30.0, 22.0, 26.0, 30.0, 42.0, 39.0, 44.0, 49.0, 28.0, 44.0, 45.0, 41.0, 42.0, 56.0, 34.0, 30.0, 29.0, 23.0, 34.0, 13.0, 21.0, 10.0, 16.0, 12.0, 12.0, 10.0, 5.0, 14.0, 9.0, 5.0, 6.0, 4.0, 0.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.9068717956543, -41.4150390625, -39.92321014404297, -38.43137741088867, -36.939544677734375, -35.44771194458008, -33.95587921142578, -32.46405029296875, -30.972217559814453, -29.480384826660156, -27.988554000854492, -26.496723175048828, -25.00489044189453, -23.513057708740234, -22.02122688293457, -20.529396057128906, -19.03756332397461, -17.545730590820312, -16.05389976501465, -14.562067985534668, -13.070236206054688, -11.578404426574707, -10.086572647094727, -8.594740867614746, -7.102909088134766, -5.611077308654785, -4.119245529174805, -2.627413749694824, -1.1355819702148438, 0.3562498092651367, 1.8480815887451172, 3.3399133682250977, 4.8317413330078125, 6.323573112487793, 7.815404891967773, 9.307236671447754, 10.799068450927734, 12.290900230407715, 13.782732009887695, 15.274563789367676, 16.766395568847656, 18.258228302001953, 19.750059127807617, 21.24188995361328, 22.733722686767578, 24.225555419921875, 25.71738624572754, 27.209217071533203, 28.7010498046875, 30.192882537841797, 31.68471336364746, 33.176544189453125, 34.66837692260742, 36.16020965576172, 37.65203857421875, 39.14387130737305, 40.635704040527344, 42.12753677368164, 43.61936950683594, 45.11119842529297, 46.603031158447266, 48.09486389160156, 49.586692810058594, 51.07852554321289, 52.57035827636719]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 6.0, 5.0, 10.0, 9.0, 6.0, 10.0, 13.0, 19.0, 21.0, 15.0, 28.0, 39.0, 45.0, 40.0, 48.0, 43.0, 50.0, 37.0, 52.0, 48.0, 38.0, 57.0, 40.0, 41.0, 40.0, 30.0, 39.0, 31.0, 26.0, 23.0, 17.0, 19.0, 13.0, 12.0, 4.0, 9.0, 5.0, 5.0, 5.0, 6.0, 1.0, 1.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.51953125, -5.3433837890625, -5.167236328125, -4.9910888671875, -4.81494140625, -4.6387939453125, -4.462646484375, -4.2864990234375, -4.1103515625, -3.9342041015625, -3.758056640625, -3.5819091796875, -3.40576171875, -3.2296142578125, -3.053466796875, -2.8773193359375, -2.701171875, -2.5250244140625, -2.348876953125, -2.1727294921875, -1.99658203125, -1.8204345703125, -1.644287109375, -1.4681396484375, -1.2919921875, -1.1158447265625, -0.939697265625, -0.7635498046875, -0.58740234375, -0.4112548828125, -0.235107421875, -0.0589599609375, 0.1171875, 0.2933349609375, 0.469482421875, 0.6456298828125, 0.82177734375, 0.9979248046875, 1.174072265625, 1.3502197265625, 1.5263671875, 1.7025146484375, 1.878662109375, 2.0548095703125, 2.23095703125, 2.4071044921875, 2.583251953125, 2.7593994140625, 2.935546875, 3.1116943359375, 3.287841796875, 3.4639892578125, 3.64013671875, 3.8162841796875, 3.992431640625, 4.1685791015625, 4.3447265625, 4.5208740234375, 4.697021484375, 4.8731689453125, 5.04931640625, 5.2254638671875, 5.401611328125, 5.5777587890625, 5.75390625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 4.0, 5.0, 7.0, 5.0, 6.0, 11.0, 16.0, 17.0, 24.0, 28.0, 34.0, 49.0, 72.0, 131.0, 224.0, 421.0, 872.0, 2667.0, 10212.0, 52793.0, 326256.0, 1440286.0, 1773679.0, 487036.0, 78908.0, 14649.0, 3577.0, 1108.0, 486.0, 232.0, 156.0, 92.0, 67.0, 45.0, 33.0, 18.0, 12.0, 11.0, 11.0, 7.0, 5.0, 8.0, 7.0, 4.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.0859375, -7.8082275390625, -7.530517578125, -7.2528076171875, -6.97509765625, -6.6973876953125, -6.419677734375, -6.1419677734375, -5.8642578125, -5.5865478515625, -5.308837890625, -5.0311279296875, -4.75341796875, -4.4757080078125, -4.197998046875, -3.9202880859375, -3.642578125, -3.3648681640625, -3.087158203125, -2.8094482421875, -2.53173828125, -2.2540283203125, -1.976318359375, -1.6986083984375, -1.4208984375, -1.1431884765625, -0.865478515625, -0.5877685546875, -0.31005859375, -0.0323486328125, 0.245361328125, 0.5230712890625, 0.80078125, 1.0784912109375, 1.356201171875, 1.6339111328125, 1.91162109375, 2.1893310546875, 2.467041015625, 2.7447509765625, 3.0224609375, 3.3001708984375, 3.577880859375, 3.8555908203125, 4.13330078125, 4.4110107421875, 4.688720703125, 4.9664306640625, 5.244140625, 5.5218505859375, 5.799560546875, 6.0772705078125, 6.35498046875, 6.6326904296875, 6.910400390625, 7.1881103515625, 7.4658203125, 7.7435302734375, 8.021240234375, 8.2989501953125, 8.57666015625, 8.8543701171875, 9.132080078125, 9.4097900390625, 9.6875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 2.0, 3.0, 6.0, 15.0, 15.0, 40.0, 43.0, 74.0, 100.0, 117.0, 154.0, 226.0, 347.0, 436.0, 483.0, 473.0, 418.0, 289.0, 255.0, 174.0, 129.0, 79.0, 53.0, 45.0, 28.0, 22.0, 20.0, 9.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 3.0, 5.0, 2.0], "bins": [-16.671875, -16.2647705078125, -15.857666015625, -15.4505615234375, -15.04345703125, -14.6363525390625, -14.229248046875, -13.8221435546875, -13.4150390625, -13.0079345703125, -12.600830078125, -12.1937255859375, -11.78662109375, -11.3795166015625, -10.972412109375, -10.5653076171875, -10.158203125, -9.7510986328125, -9.343994140625, -8.9368896484375, -8.52978515625, -8.1226806640625, -7.715576171875, -7.3084716796875, -6.9013671875, -6.4942626953125, -6.087158203125, -5.6800537109375, -5.27294921875, -4.8658447265625, -4.458740234375, -4.0516357421875, -3.64453125, -3.2374267578125, -2.830322265625, -2.4232177734375, -2.01611328125, -1.6090087890625, -1.201904296875, -0.7947998046875, -0.3876953125, 0.0194091796875, 0.426513671875, 0.8336181640625, 1.24072265625, 1.6478271484375, 2.054931640625, 2.4620361328125, 2.869140625, 3.2762451171875, 3.683349609375, 4.0904541015625, 4.49755859375, 4.9046630859375, 5.311767578125, 5.7188720703125, 6.1259765625, 6.5330810546875, 6.940185546875, 7.3472900390625, 7.75439453125, 8.1614990234375, 8.568603515625, 8.9757080078125, 9.3828125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 10.0, 6.0, 6.0, 14.0, 21.0, 37.0, 40.0, 50.0, 83.0, 114.0, 179.0, 308.0, 565.0, 1800.0, 11783.0, 259744.0, 3233560.0, 656456.0, 25142.0, 2624.0, 702.0, 351.0, 223.0, 146.0, 99.0, 72.0, 50.0, 35.0, 15.0, 17.0, 15.0, 5.0, 3.0, 2.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.078125, -19.294921875, -18.51171875, -17.728515625, -16.9453125, -16.162109375, -15.37890625, -14.595703125, -13.8125, -13.029296875, -12.24609375, -11.462890625, -10.6796875, -9.896484375, -9.11328125, -8.330078125, -7.546875, -6.763671875, -5.98046875, -5.197265625, -4.4140625, -3.630859375, -2.84765625, -2.064453125, -1.28125, -0.498046875, 0.28515625, 1.068359375, 1.8515625, 2.634765625, 3.41796875, 4.201171875, 4.984375, 5.767578125, 6.55078125, 7.333984375, 8.1171875, 8.900390625, 9.68359375, 10.466796875, 11.25, 12.033203125, 12.81640625, 13.599609375, 14.3828125, 15.166015625, 15.94921875, 16.732421875, 17.515625, 18.298828125, 19.08203125, 19.865234375, 20.6484375, 21.431640625, 22.21484375, 22.998046875, 23.78125, 24.564453125, 25.34765625, 26.130859375, 26.9140625, 27.697265625, 28.48046875, 29.263671875, 30.046875]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 5.0, 12.0, 29.0, 74.0, 167.0, 211.0, 225.0, 166.0, 70.0, 35.0, 13.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.85086059570312, -130.50595092773438, -127.16104888916016, -123.8161392211914, -120.47123718261719, -117.12632751464844, -113.78141784667969, -110.43650817871094, -107.09160614013672, -103.74669647216797, -100.40179443359375, -97.056884765625, -93.71197509765625, -90.36707305908203, -87.02216339111328, -83.67726135253906, -80.33235168457031, -76.98744201660156, -73.64253997802734, -70.2976303100586, -66.95272827148438, -63.607818603515625, -60.262908935546875, -56.91800308227539, -53.573097229003906, -50.22819137573242, -46.88328552246094, -43.53837585449219, -40.1934700012207, -36.84856414794922, -33.50365447998047, -30.158748626708984, -26.81383514404297, -23.468929290771484, -20.124021530151367, -16.77911376953125, -13.434207916259766, -10.089302062988281, -6.744394302368164, -3.399486541748047, -0.0545806884765625, 3.2903261184692383, 6.635232925415039, 9.98013973236084, 13.32504653930664, 16.669952392578125, 20.014860153198242, 23.35976791381836, 26.704673767089844, 30.049579620361328, 33.39448547363281, 36.73939514160156, 40.08430099487305, 43.42920684814453, 46.77411651611328, 50.119022369384766, 53.46392822265625, 56.808834075927734, 60.15373992919922, 63.49864959716797, 66.84355163574219, 70.18846130371094, 73.53337097167969, 76.87828063964844, 80.22318267822266]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 7.0, 2.0, 5.0, 17.0, 10.0, 13.0, 22.0, 15.0, 20.0, 13.0, 26.0, 38.0, 47.0, 34.0, 44.0, 37.0, 50.0, 48.0, 50.0, 39.0, 30.0, 67.0, 44.0, 48.0, 26.0, 44.0, 31.0, 33.0, 24.0, 23.0, 21.0, 19.0, 12.0, 8.0, 7.0, 11.0, 7.0, 0.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.96455383300781, -41.62192153930664, -40.27928924560547, -38.9366569519043, -37.594024658203125, -36.25139236450195, -34.90876007080078, -33.56612777709961, -32.22349548339844, -30.880863189697266, -29.538230895996094, -28.195598602294922, -26.85296630859375, -25.510334014892578, -24.167701721191406, -22.825069427490234, -21.48243522644043, -20.139802932739258, -18.797170639038086, -17.454538345336914, -16.111906051635742, -14.76927375793457, -13.426640510559082, -12.08400821685791, -10.741375923156738, -9.398743629455566, -8.056111335754395, -6.7134785652160645, -5.370846271514893, -4.028213977813721, -2.6855812072753906, -1.3429489135742188, -0.000316619873046875, 1.3423157930374146, 2.684948205947876, 4.027580738067627, 5.370213031768799, 6.712845325469971, 8.0554780960083, 9.398110389709473, 10.740742683410645, 12.083374977111816, 13.426007270812988, 14.768640518188477, 16.11127281188965, 17.45390510559082, 18.796537399291992, 20.139169692993164, 21.481801986694336, 22.824434280395508, 24.16706657409668, 25.50969886779785, 26.852331161499023, 28.194963455200195, 29.53759765625, 30.880229949951172, 32.222862243652344, 33.565494537353516, 34.90812683105469, 36.25075912475586, 37.59339141845703, 38.9360237121582, 40.278656005859375, 41.62128829956055, 42.96392059326172]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 10.0, 6.0, 11.0, 10.0, 15.0, 16.0, 17.0, 18.0, 31.0, 35.0, 43.0, 51.0, 44.0, 44.0, 61.0, 57.0, 51.0, 61.0, 47.0, 61.0, 53.0, 37.0, 41.0, 26.0, 32.0, 30.0, 17.0, 19.0, 9.0, 16.0, 10.0, 12.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.44140625, -5.2442626953125, -5.047119140625, -4.8499755859375, -4.65283203125, -4.4556884765625, -4.258544921875, -4.0614013671875, -3.8642578125, -3.6671142578125, -3.469970703125, -3.2728271484375, -3.07568359375, -2.8785400390625, -2.681396484375, -2.4842529296875, -2.287109375, -2.0899658203125, -1.892822265625, -1.6956787109375, -1.49853515625, -1.3013916015625, -1.104248046875, -0.9071044921875, -0.7099609375, -0.5128173828125, -0.315673828125, -0.1185302734375, 0.07861328125, 0.2757568359375, 0.472900390625, 0.6700439453125, 0.8671875, 1.0643310546875, 1.261474609375, 1.4586181640625, 1.65576171875, 1.8529052734375, 2.050048828125, 2.2471923828125, 2.4443359375, 2.6414794921875, 2.838623046875, 3.0357666015625, 3.23291015625, 3.4300537109375, 3.627197265625, 3.8243408203125, 4.021484375, 4.2186279296875, 4.415771484375, 4.6129150390625, 4.81005859375, 5.0072021484375, 5.204345703125, 5.4014892578125, 5.5986328125, 5.7957763671875, 5.992919921875, 6.1900634765625, 6.38720703125, 6.5843505859375, 6.781494140625, 6.9786376953125, 7.17578125]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 10.0, 7.0, 23.0, 39.0, 37.0, 57.0, 94.0, 162.0, 267.0, 454.0, 773.0, 1185.0, 1935.0, 3150.0, 4946.0, 7893.0, 12423.0, 19887.0, 32261.0, 52527.0, 87862.0, 146994.0, 213330.0, 180149.0, 110635.0, 65979.0, 39724.0, 24679.0, 15202.0, 9581.0, 6207.0, 3875.0, 2363.0, 1516.0, 907.0, 569.0, 340.0, 189.0, 112.0, 85.0, 45.0, 34.0, 17.0, 13.0, 6.0, 11.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39794921875, -0.38455963134765625, -0.3711700439453125, -0.35778045654296875, -0.344390869140625, -0.33100128173828125, -0.3176116943359375, -0.30422210693359375, -0.29083251953125, -0.27744293212890625, -0.2640533447265625, -0.25066375732421875, -0.237274169921875, -0.22388458251953125, -0.2104949951171875, -0.19710540771484375, -0.1837158203125, -0.17032623291015625, -0.1569366455078125, -0.14354705810546875, -0.130157470703125, -0.11676788330078125, -0.1033782958984375, -0.08998870849609375, -0.07659912109375, -0.06320953369140625, -0.0498199462890625, -0.03643035888671875, -0.023040771484375, -0.00965118408203125, 0.0037384033203125, 0.01712799072265625, 0.030517578125, 0.04390716552734375, 0.0572967529296875, 0.07068634033203125, 0.084075927734375, 0.09746551513671875, 0.1108551025390625, 0.12424468994140625, 0.13763427734375, 0.15102386474609375, 0.1644134521484375, 0.17780303955078125, 0.191192626953125, 0.20458221435546875, 0.2179718017578125, 0.23136138916015625, 0.2447509765625, 0.25814056396484375, 0.2715301513671875, 0.28491973876953125, 0.298309326171875, 0.31169891357421875, 0.3250885009765625, 0.33847808837890625, 0.35186767578125, 0.36525726318359375, 0.3786468505859375, 0.39203643798828125, 0.405426025390625, 0.41881561279296875, 0.4322052001953125, 0.44559478759765625, 0.458984375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 7.0, 9.0, 8.0, 5.0, 13.0, 14.0, 18.0, 13.0, 11.0, 30.0, 27.0, 19.0, 29.0, 28.0, 31.0, 30.0, 47.0, 44.0, 31.0, 29.0, 48.0, 1068.0, 31.0, 29.0, 40.0, 39.0, 28.0, 37.0, 33.0, 28.0, 29.0, 20.0, 26.0, 20.0, 18.0, 19.0, 9.0, 15.0, 8.0, 10.0, 8.0, 5.0, 2.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0], "bins": [-3.005859375, -2.915618896484375, -2.82537841796875, -2.735137939453125, -2.6448974609375, -2.554656982421875, -2.46441650390625, -2.374176025390625, -2.283935546875, -2.193695068359375, -2.10345458984375, -2.013214111328125, -1.9229736328125, -1.832733154296875, -1.74249267578125, -1.652252197265625, -1.56201171875, -1.471771240234375, -1.38153076171875, -1.291290283203125, -1.2010498046875, -1.110809326171875, -1.02056884765625, -0.930328369140625, -0.840087890625, -0.749847412109375, -0.65960693359375, -0.569366455078125, -0.4791259765625, -0.388885498046875, -0.29864501953125, -0.208404541015625, -0.1181640625, -0.027923583984375, 0.06231689453125, 0.152557373046875, 0.2427978515625, 0.333038330078125, 0.42327880859375, 0.513519287109375, 0.603759765625, 0.694000244140625, 0.78424072265625, 0.874481201171875, 0.9647216796875, 1.054962158203125, 1.14520263671875, 1.235443115234375, 1.32568359375, 1.415924072265625, 1.50616455078125, 1.596405029296875, 1.6866455078125, 1.776885986328125, 1.86712646484375, 1.957366943359375, 2.047607421875, 2.137847900390625, 2.22808837890625, 2.318328857421875, 2.4085693359375, 2.498809814453125, 2.58905029296875, 2.679290771484375, 2.76953125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 7.0, 8.0, 25.0, 26.0, 31.0, 48.0, 66.0, 121.0, 189.0, 254.0, 433.0, 580.0, 881.0, 1380.0, 2068.0, 3244.0, 4604.0, 7033.0, 10599.0, 16028.0, 24199.0, 36805.0, 56416.0, 85671.0, 125445.0, 1200836.0, 170210.0, 118124.0, 79045.0, 52341.0, 33959.0, 22361.0, 14738.0, 9994.0, 6523.0, 4244.0, 2894.0, 1924.0, 1267.0, 871.0, 562.0, 395.0, 247.0, 154.0, 106.0, 59.0, 44.0, 31.0, 12.0, 15.0, 11.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.29150390625, -0.2823677062988281, -0.27323150634765625, -0.2640953063964844, -0.2549591064453125, -0.24582290649414062, -0.23668670654296875, -0.22755050659179688, -0.218414306640625, -0.20927810668945312, -0.20014190673828125, -0.19100570678710938, -0.1818695068359375, -0.17273330688476562, -0.16359710693359375, -0.15446090698242188, -0.14532470703125, -0.13618850708007812, -0.12705230712890625, -0.11791610717773438, -0.1087799072265625, -0.09964370727539062, -0.09050750732421875, -0.08137130737304688, -0.072235107421875, -0.06309890747070312, -0.05396270751953125, -0.044826507568359375, -0.0356903076171875, -0.026554107666015625, -0.01741790771484375, -0.008281707763671875, 0.0008544921875, 0.009990692138671875, 0.01912689208984375, 0.028263092041015625, 0.0373992919921875, 0.046535491943359375, 0.05567169189453125, 0.06480789184570312, 0.073944091796875, 0.08308029174804688, 0.09221649169921875, 0.10135269165039062, 0.1104888916015625, 0.11962509155273438, 0.12876129150390625, 0.13789749145507812, 0.14703369140625, 0.15616989135742188, 0.16530609130859375, 0.17444229125976562, 0.1835784912109375, 0.19271469116210938, 0.20185089111328125, 0.21098709106445312, 0.220123291015625, 0.22925949096679688, 0.23839569091796875, 0.24753189086914062, 0.2566680908203125, 0.2658042907714844, 0.27494049072265625, 0.2840766906738281, 0.293212890625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 2.0, 1.0, 6.0, 5.0, 4.0, 5.0, 10.0, 13.0, 14.0, 18.0, 17.0, 29.0, 26.0, 32.0, 42.0, 60.0, 37.0, 42.0, 51.0, 55.0, 62.0, 48.0, 55.0, 57.0, 30.0, 40.0, 44.0, 34.0, 29.0, 20.0, 23.0, 22.0, 14.0, 9.0, 15.0, 5.0, 9.0, 6.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.001247406005859375, -0.0012080371379852295, -0.001168668270111084, -0.0011292994022369385, -0.001089930534362793, -0.0010505616664886475, -0.001011192798614502, -0.0009718239307403564, -0.0009324550628662109, -0.0008930861949920654, -0.0008537173271179199, -0.0008143484592437744, -0.0007749795913696289, -0.0007356107234954834, -0.0006962418556213379, -0.0006568729877471924, -0.0006175041198730469, -0.0005781352519989014, -0.0005387663841247559, -0.0004993975162506104, -0.00046002864837646484, -0.00042065978050231934, -0.00038129091262817383, -0.0003419220447540283, -0.0003025531768798828, -0.0002631843090057373, -0.0002238154411315918, -0.0001844465732574463, -0.00014507770538330078, -0.00010570883750915527, -6.633996963500977e-05, -2.6971101760864258e-05, 1.239776611328125e-05, 5.176663398742676e-05, 9.113550186157227e-05, 0.00013050436973571777, 0.00016987323760986328, 0.0002092421054840088, 0.0002486109733581543, 0.0002879798412322998, 0.0003273487091064453, 0.0003667175769805908, 0.00040608644485473633, 0.00044545531272888184, 0.00048482418060302734, 0.0005241930484771729, 0.0005635619163513184, 0.0006029307842254639, 0.0006422996520996094, 0.0006816685199737549, 0.0007210373878479004, 0.0007604062557220459, 0.0007997751235961914, 0.0008391439914703369, 0.0008785128593444824, 0.0009178817272186279, 0.0009572505950927734, 0.000996619462966919, 0.0010359883308410645, 0.00107535719871521, 0.0011147260665893555, 0.001154094934463501, 0.0011934638023376465, 0.001232832670211792, 0.0012722015380859375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 2.0, 2.0, 2.0, 1.0, 6.0, 7.0, 9.0, 8.0, 16.0, 20.0, 10.0, 30.0, 30.0, 39.0, 50.0, 78.0, 138.0, 213.0, 358.0, 790.0, 6090.0, 992427.0, 45939.0, 1045.0, 496.0, 232.0, 158.0, 99.0, 63.0, 48.0, 35.0, 31.0, 16.0, 16.0, 12.0, 8.0, 9.0, 5.0, 2.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025909423828125, -0.025091171264648438, -0.024272918701171875, -0.023454666137695312, -0.02263641357421875, -0.021818161010742188, -0.020999908447265625, -0.020181655883789062, -0.0193634033203125, -0.018545150756835938, -0.017726898193359375, -0.016908645629882812, -0.01609039306640625, -0.015272140502929688, -0.014453887939453125, -0.013635635375976562, -0.0128173828125, -0.011999130249023438, -0.011180877685546875, -0.010362625122070312, -0.00954437255859375, -0.008726119995117188, -0.007907867431640625, -0.0070896148681640625, -0.0062713623046875, -0.0054531097412109375, -0.004634857177734375, -0.0038166046142578125, -0.00299835205078125, -0.0021800994873046875, -0.001361846923828125, -0.0005435943603515625, 0.000274658203125, 0.0010929107666015625, 0.001911163330078125, 0.0027294158935546875, 0.00354766845703125, 0.0043659210205078125, 0.005184173583984375, 0.0060024261474609375, 0.0068206787109375, 0.0076389312744140625, 0.008457183837890625, 0.009275436401367188, 0.01009368896484375, 0.010911941528320312, 0.011730194091796875, 0.012548446655273438, 0.01336669921875, 0.014184951782226562, 0.015003204345703125, 0.015821456909179688, 0.01663970947265625, 0.017457962036132812, 0.018276214599609375, 0.019094467163085938, 0.0199127197265625, 0.020730972290039062, 0.021549224853515625, 0.022367477416992188, 0.02318572998046875, 0.024003982543945312, 0.024822235107421875, 0.025640487670898438, 0.026458740234375]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 7.0, 23.0, 110.0, 396.0, 343.0, 117.0, 17.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004974533338099718, -0.0048685637302696705, -0.004762594122439623, -0.004656624980270863, -0.004550655372440815, -0.004444685764610767, -0.00433871615678072, -0.0042327470146119595, -0.004126777406781912, -0.004020807798951864, -0.003914838191121817, -0.0038088688161224127, -0.0037028994411230087, -0.003596929833292961, -0.003490960458293557, -0.0033849908504635096, -0.0032790214754641056, -0.003173051867634058, -0.003067082492634654, -0.0029611128848046064, -0.0028551435098052025, -0.002749173901975155, -0.002643204526975751, -0.0025372349191457033, -0.0024312653113156557, -0.002325295703485608, -0.002219326328486204, -0.0021133567206561565, -0.0020073873456567526, -0.001901417737826705, -0.001795448362827301, -0.0016894787549972534, -0.0015835093799978495, -0.0014775398885831237, -0.001371570397168398, -0.0012656009057536721, -0.0011596314143389463, -0.0010536618065088987, -0.0009476923733018339, -0.0008417228818871081, -0.0007357533904723823, -0.0006297838990576565, -0.0005238144076429307, -0.0004178448871243745, -0.00031187539570964873, -0.0002059058751910925, -9.993638377636671e-05, 6.03310763835907e-06, 0.00011200259905308485, 0.00021797209046781063, 0.0003239415818825364, 0.00042991110240109265, 0.000535880564711988, 0.0006418501143343747, 0.0007478196057491004, 0.0008537890971638262, 0.000959758588578552, 0.0010657281382009387, 0.0011716976296156645, 0.0012776671210303903, 0.001383636612445116, 0.0014896061038598418, 0.0015955755952745676, 0.0017015450866892934, 0.0018075145781040192]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 3.0, 6.0, 6.0, 6.0, 9.0, 10.0, 7.0, 13.0, 11.0, 22.0, 23.0, 24.0, 30.0, 37.0, 32.0, 31.0, 41.0, 43.0, 49.0, 39.0, 28.0, 45.0, 46.0, 50.0, 41.0, 38.0, 39.0, 31.0, 35.0, 38.0, 26.0, 15.0, 18.0, 27.0, 18.0, 16.0, 11.0, 9.0, 5.0, 7.0, 6.0, 4.0, 9.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006800293922424316, -0.0006577242165803909, -0.0006354190409183502, -0.0006131138652563095, -0.0005908086895942688, -0.0005685035139322281, -0.0005461983382701874, -0.0005238931626081467, -0.000501587986946106, -0.00047928281128406525, -0.00045697763562202454, -0.0004346724599599838, -0.0004123672842979431, -0.0003900621086359024, -0.0003677569329738617, -0.000345451757311821, -0.0003231465816497803, -0.00030084140598773956, -0.00027853623032569885, -0.00025623105466365814, -0.00023392587900161743, -0.00021162070333957672, -0.000189315527677536, -0.0001670103520154953, -0.0001447051763534546, -0.00012240000069141388, -0.00010009482502937317, -7.778964936733246e-05, -5.548447370529175e-05, -3.317929804325104e-05, -1.0874122381210327e-05, 1.1431053280830383e-05, 3.3736228942871094e-05, 5.6041404604911804e-05, 7.834658026695251e-05, 0.00010065175592899323, 0.00012295693159103394, 0.00014526210725307465, 0.00016756728291511536, 0.00018987245857715607, 0.00021217763423919678, 0.0002344828099012375, 0.0002567879855632782, 0.0002790931612253189, 0.0003013983368873596, 0.00032370351254940033, 0.00034600868821144104, 0.00036831386387348175, 0.00039061903953552246, 0.00041292421519756317, 0.0004352293908596039, 0.0004575345665216446, 0.0004798397421836853, 0.000502144917845726, 0.0005244500935077667, 0.0005467552691698074, 0.0005690604448318481, 0.0005913656204938889, 0.0006136707961559296, 0.0006359759718179703, 0.000658281147480011, 0.0006805863231420517, 0.0007028914988040924, 0.0007251966744661331, 0.0007475018501281738]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 1.0, 10.0, 6.0, 11.0, 10.0, 15.0, 16.0, 17.0, 18.0, 31.0, 35.0, 43.0, 51.0, 44.0, 44.0, 61.0, 57.0, 51.0, 61.0, 47.0, 61.0, 53.0, 37.0, 41.0, 26.0, 32.0, 30.0, 17.0, 19.0, 9.0, 16.0, 10.0, 12.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.44140625, -5.2442626953125, -5.047119140625, -4.8499755859375, -4.65283203125, -4.4556884765625, -4.258544921875, -4.0614013671875, -3.8642578125, -3.6671142578125, -3.469970703125, -3.2728271484375, -3.07568359375, -2.8785400390625, -2.681396484375, -2.4842529296875, -2.287109375, -2.0899658203125, -1.892822265625, -1.6956787109375, -1.49853515625, -1.3013916015625, -1.104248046875, -0.9071044921875, -0.7099609375, -0.5128173828125, -0.315673828125, -0.1185302734375, 0.07861328125, 0.2757568359375, 0.472900390625, 0.6700439453125, 0.8671875, 1.0643310546875, 1.261474609375, 1.4586181640625, 1.65576171875, 1.8529052734375, 2.050048828125, 2.2471923828125, 2.4443359375, 2.6414794921875, 2.838623046875, 3.0357666015625, 3.23291015625, 3.4300537109375, 3.627197265625, 3.8243408203125, 4.021484375, 4.2186279296875, 4.415771484375, 4.6129150390625, 4.81005859375, 5.0072021484375, 5.204345703125, 5.4014892578125, 5.5986328125, 5.7957763671875, 5.992919921875, 6.1900634765625, 6.38720703125, 6.5843505859375, 6.781494140625, 6.9786376953125, 7.17578125]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 6.0, 11.0, 19.0, 15.0, 39.0, 37.0, 78.0, 129.0, 181.0, 348.0, 597.0, 1159.0, 2285.0, 4629.0, 9310.0, 18906.0, 38038.0, 76955.0, 156197.0, 272724.0, 230338.0, 119022.0, 59093.0, 29239.0, 14818.0, 7147.0, 3438.0, 1797.0, 822.0, 473.0, 262.0, 159.0, 97.0, 57.0, 46.0, 36.0, 20.0, 9.0, 14.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.01171875, -4.87176513671875, -4.7318115234375, -4.59185791015625, -4.451904296875, -4.31195068359375, -4.1719970703125, -4.03204345703125, -3.89208984375, -3.75213623046875, -3.6121826171875, -3.47222900390625, -3.332275390625, -3.19232177734375, -3.0523681640625, -2.91241455078125, -2.7724609375, -2.63250732421875, -2.4925537109375, -2.35260009765625, -2.212646484375, -2.07269287109375, -1.9327392578125, -1.79278564453125, -1.65283203125, -1.51287841796875, -1.3729248046875, -1.23297119140625, -1.093017578125, -0.95306396484375, -0.8131103515625, -0.67315673828125, -0.533203125, -0.39324951171875, -0.2532958984375, -0.11334228515625, 0.026611328125, 0.16656494140625, 0.3065185546875, 0.44647216796875, 0.58642578125, 0.72637939453125, 0.8663330078125, 1.00628662109375, 1.146240234375, 1.28619384765625, 1.4261474609375, 1.56610107421875, 1.7060546875, 1.84600830078125, 1.9859619140625, 2.12591552734375, 2.265869140625, 2.40582275390625, 2.5457763671875, 2.68572998046875, 2.82568359375, 2.96563720703125, 3.1055908203125, 3.24554443359375, 3.385498046875, 3.52545166015625, 3.6654052734375, 3.80535888671875, 3.9453125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 6.0, 7.0, 8.0, 10.0, 10.0, 11.0, 11.0, 23.0, 28.0, 25.0, 40.0, 36.0, 49.0, 56.0, 64.0, 76.0, 121.0, 243.0, 1464.0, 223.0, 127.0, 72.0, 61.0, 43.0, 41.0, 34.0, 26.0, 24.0, 20.0, 13.0, 17.0, 13.0, 8.0, 9.0, 9.0, 7.0, 4.0, 6.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-24.03125, -23.353515625, -22.67578125, -21.998046875, -21.3203125, -20.642578125, -19.96484375, -19.287109375, -18.609375, -17.931640625, -17.25390625, -16.576171875, -15.8984375, -15.220703125, -14.54296875, -13.865234375, -13.1875, -12.509765625, -11.83203125, -11.154296875, -10.4765625, -9.798828125, -9.12109375, -8.443359375, -7.765625, -7.087890625, -6.41015625, -5.732421875, -5.0546875, -4.376953125, -3.69921875, -3.021484375, -2.34375, -1.666015625, -0.98828125, -0.310546875, 0.3671875, 1.044921875, 1.72265625, 2.400390625, 3.078125, 3.755859375, 4.43359375, 5.111328125, 5.7890625, 6.466796875, 7.14453125, 7.822265625, 8.5, 9.177734375, 9.85546875, 10.533203125, 11.2109375, 11.888671875, 12.56640625, 13.244140625, 13.921875, 14.599609375, 15.27734375, 15.955078125, 16.6328125, 17.310546875, 17.98828125, 18.666015625, 19.34375]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 12.0, 14.0, 16.0, 30.0, 31.0, 47.0, 83.0, 124.0, 193.0, 342.0, 748.0, 7220.0, 3118618.0, 16294.0, 932.0, 398.0, 212.0, 131.0, 82.0, 55.0, 33.0, 25.0, 19.0, 16.0, 10.0, 8.0, 6.0, 3.0, 4.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.4375, -63.9375, -61.4375, -58.9375, -56.4375, -53.9375, -51.4375, -48.9375, -46.4375, -43.9375, -41.4375, -38.9375, -36.4375, -33.9375, -31.4375, -28.9375, -26.4375, -23.9375, -21.4375, -18.9375, -16.4375, -13.9375, -11.4375, -8.9375, -6.4375, -3.9375, -1.4375, 1.0625, 3.5625, 6.0625, 8.5625, 11.0625, 13.5625, 16.0625, 18.5625, 21.0625, 23.5625, 26.0625, 28.5625, 31.0625, 33.5625, 36.0625, 38.5625, 41.0625, 43.5625, 46.0625, 48.5625, 51.0625, 53.5625, 56.0625, 58.5625, 61.0625, 63.5625, 66.0625, 68.5625, 71.0625, 73.5625, 76.0625, 78.5625, 81.0625, 83.5625, 86.0625, 88.5625, 91.0625, 93.5625]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 43.0, 200.0, 419.0, 278.0, 60.0, 9.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-152.42967224121094, -149.29757690429688, -146.16549682617188, -143.0334014892578, -139.9013214111328, -136.76922607421875, -133.63714599609375, -130.5050506591797, -127.37297058105469, -124.24088287353516, -121.10879516601562, -117.9767074584961, -114.84461975097656, -111.71253204345703, -108.5804443359375, -105.44834899902344, -102.3162612915039, -99.18417358398438, -96.05208587646484, -92.91999816894531, -89.78791046142578, -86.65582275390625, -83.52372741699219, -80.39164733886719, -77.25955200195312, -74.1274642944336, -70.99537658691406, -67.86328887939453, -64.731201171875, -61.59911346435547, -58.46702194213867, -55.33493423461914, -52.20285415649414, -49.07076644897461, -45.93867874145508, -42.80658721923828, -39.67449951171875, -36.54241180419922, -33.41032409667969, -30.278236389160156, -27.146148681640625, -24.014060974121094, -20.881973266601562, -17.7498836517334, -14.617795944213867, -11.485708236694336, -8.353618621826172, -5.221530914306641, -2.0894432067871094, 1.04264497756958, 4.1747331619262695, 7.306821823120117, 10.438909530639648, 13.57099723815918, 16.703086853027344, 19.835174560546875, 22.967262268066406, 26.099349975585938, 29.23143768310547, 32.363525390625, 35.49561309814453, 38.62770080566406, 41.75979232788086, 44.89188003540039, 48.02396774291992]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 5.0, 9.0, 2.0, 7.0, 7.0, 13.0, 8.0, 11.0, 9.0, 13.0, 13.0, 22.0, 25.0, 18.0, 34.0, 21.0, 26.0, 19.0, 33.0, 32.0, 43.0, 32.0, 33.0, 32.0, 28.0, 40.0, 30.0, 41.0, 33.0, 41.0, 32.0, 30.0, 31.0, 22.0, 29.0, 23.0, 13.0, 20.0, 11.0, 21.0, 6.0, 6.0, 13.0, 10.0, 11.0, 9.0, 7.0, 7.0, 9.0, 3.0, 1.0, 5.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0], "bins": [-36.1612663269043, -34.994667053222656, -33.82807159423828, -32.66147232055664, -31.494876861572266, -30.328277587890625, -29.161680221557617, -27.99508285522461, -26.8284854888916, -25.661888122558594, -24.495290756225586, -23.328693389892578, -22.162094116210938, -20.995498657226562, -19.828899383544922, -18.662302017211914, -17.495704650878906, -16.3291072845459, -15.16250991821289, -13.995911598205566, -12.829314231872559, -11.66271686553955, -10.496118545532227, -9.329521179199219, -8.162923812866211, -6.996326446533203, -5.829728603363037, -4.663130760192871, -3.4965333938598633, -2.3299360275268555, -1.1633381843566895, 0.0032596588134765625, 1.1698570251464844, 2.3364546298980713, 3.503052234649658, 4.669650077819824, 5.836247444152832, 7.00284481048584, 8.169443130493164, 9.336040496826172, 10.50263786315918, 11.669235229492188, 12.835832595825195, 14.00243091583252, 15.169028282165527, 16.33562469482422, 17.50222396850586, 18.668821334838867, 19.835418701171875, 21.002016067504883, 22.16861343383789, 23.3352108001709, 24.501808166503906, 25.668407440185547, 26.835004806518555, 28.001602172851562, 29.16819953918457, 30.334796905517578, 31.501394271850586, 32.667991638183594, 33.834590911865234, 35.00118637084961, 36.16778564453125, 37.334381103515625, 38.500980377197266]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 9.0, 11.0, 6.0, 9.0, 15.0, 12.0, 8.0, 23.0, 34.0, 22.0, 32.0, 34.0, 39.0, 49.0, 48.0, 55.0, 50.0, 58.0, 45.0, 59.0, 53.0, 56.0, 37.0, 38.0, 33.0, 32.0, 22.0, 24.0, 16.0, 16.0, 16.0, 9.0, 7.0, 13.0, 8.0, 6.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5546875, -5.3482666015625, -5.141845703125, -4.9354248046875, -4.72900390625, -4.5225830078125, -4.316162109375, -4.1097412109375, -3.9033203125, -3.6968994140625, -3.490478515625, -3.2840576171875, -3.07763671875, -2.8712158203125, -2.664794921875, -2.4583740234375, -2.251953125, -2.0455322265625, -1.839111328125, -1.6326904296875, -1.42626953125, -1.2198486328125, -1.013427734375, -0.8070068359375, -0.6005859375, -0.3941650390625, -0.187744140625, 0.0186767578125, 0.22509765625, 0.4315185546875, 0.637939453125, 0.8443603515625, 1.05078125, 1.2572021484375, 1.463623046875, 1.6700439453125, 1.87646484375, 2.0828857421875, 2.289306640625, 2.4957275390625, 2.7021484375, 2.9085693359375, 3.114990234375, 3.3214111328125, 3.52783203125, 3.7342529296875, 3.940673828125, 4.1470947265625, 4.353515625, 4.5599365234375, 4.766357421875, 4.9727783203125, 5.17919921875, 5.3856201171875, 5.592041015625, 5.7984619140625, 6.0048828125, 6.2113037109375, 6.417724609375, 6.6241455078125, 6.83056640625, 7.0369873046875, 7.243408203125, 7.4498291015625, 7.65625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 11.0, 6.0, 6.0, 10.0, 5.0, 10.0, 15.0, 15.0, 25.0, 22.0, 30.0, 27.0, 46.0, 69.0, 147.0, 377.0, 2019.0, 40141.0, 1761877.0, 2325088.0, 60943.0, 2506.0, 427.0, 146.0, 75.0, 49.0, 36.0, 18.0, 26.0, 22.0, 19.0, 19.0, 15.0, 7.0, 5.0, 9.0, 8.0, 3.0, 7.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.96875, -17.3046875, -16.640625, -15.9765625, -15.3125, -14.6484375, -13.984375, -13.3203125, -12.65625, -11.9921875, -11.328125, -10.6640625, -10.0, -9.3359375, -8.671875, -8.0078125, -7.34375, -6.6796875, -6.015625, -5.3515625, -4.6875, -4.0234375, -3.359375, -2.6953125, -2.03125, -1.3671875, -0.703125, -0.0390625, 0.625, 1.2890625, 1.953125, 2.6171875, 3.28125, 3.9453125, 4.609375, 5.2734375, 5.9375, 6.6015625, 7.265625, 7.9296875, 8.59375, 9.2578125, 9.921875, 10.5859375, 11.25, 11.9140625, 12.578125, 13.2421875, 13.90625, 14.5703125, 15.234375, 15.8984375, 16.5625, 17.2265625, 17.890625, 18.5546875, 19.21875, 19.8828125, 20.546875, 21.2109375, 21.875, 22.5390625, 23.203125, 23.8671875, 24.53125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 6.0, 11.0, 11.0, 19.0, 16.0, 24.0, 37.0, 43.0, 47.0, 98.0, 114.0, 169.0, 216.0, 262.0, 351.0, 412.0, 439.0, 399.0, 335.0, 258.0, 197.0, 169.0, 120.0, 89.0, 52.0, 54.0, 32.0, 20.0, 20.0, 12.0, 11.0, 9.0, 3.0, 4.0, 1.0, 1.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-11.140625, -10.78564453125, -10.4306640625, -10.07568359375, -9.720703125, -9.36572265625, -9.0107421875, -8.65576171875, -8.30078125, -7.94580078125, -7.5908203125, -7.23583984375, -6.880859375, -6.52587890625, -6.1708984375, -5.81591796875, -5.4609375, -5.10595703125, -4.7509765625, -4.39599609375, -4.041015625, -3.68603515625, -3.3310546875, -2.97607421875, -2.62109375, -2.26611328125, -1.9111328125, -1.55615234375, -1.201171875, -0.84619140625, -0.4912109375, -0.13623046875, 0.21875, 0.57373046875, 0.9287109375, 1.28369140625, 1.638671875, 1.99365234375, 2.3486328125, 2.70361328125, 3.05859375, 3.41357421875, 3.7685546875, 4.12353515625, 4.478515625, 4.83349609375, 5.1884765625, 5.54345703125, 5.8984375, 6.25341796875, 6.6083984375, 6.96337890625, 7.318359375, 7.67333984375, 8.0283203125, 8.38330078125, 8.73828125, 9.09326171875, 9.4482421875, 9.80322265625, 10.158203125, 10.51318359375, 10.8681640625, 11.22314453125, 11.578125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 5.0, 11.0, 18.0, 14.0, 28.0, 33.0, 48.0, 60.0, 89.0, 129.0, 203.0, 315.0, 503.0, 1567.0, 16376.0, 624180.0, 3353531.0, 188394.0, 6572.0, 957.0, 407.0, 242.0, 175.0, 101.0, 95.0, 61.0, 43.0, 37.0, 23.0, 18.0, 9.0, 8.0, 9.0, 7.0, 4.0, 2.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-27.578125, -26.6982421875, -25.818359375, -24.9384765625, -24.05859375, -23.1787109375, -22.298828125, -21.4189453125, -20.5390625, -19.6591796875, -18.779296875, -17.8994140625, -17.01953125, -16.1396484375, -15.259765625, -14.3798828125, -13.5, -12.6201171875, -11.740234375, -10.8603515625, -9.98046875, -9.1005859375, -8.220703125, -7.3408203125, -6.4609375, -5.5810546875, -4.701171875, -3.8212890625, -2.94140625, -2.0615234375, -1.181640625, -0.3017578125, 0.578125, 1.4580078125, 2.337890625, 3.2177734375, 4.09765625, 4.9775390625, 5.857421875, 6.7373046875, 7.6171875, 8.4970703125, 9.376953125, 10.2568359375, 11.13671875, 12.0166015625, 12.896484375, 13.7763671875, 14.65625, 15.5361328125, 16.416015625, 17.2958984375, 18.17578125, 19.0556640625, 19.935546875, 20.8154296875, 21.6953125, 22.5751953125, 23.455078125, 24.3349609375, 25.21484375, 26.0947265625, 26.974609375, 27.8544921875, 28.734375]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 17.0, 45.0, 72.0, 140.0, 206.0, 231.0, 146.0, 80.0, 45.0, 20.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-119.32610321044922, -116.27906036376953, -113.23200988769531, -110.18496704101562, -107.13792419433594, -104.09087371826172, -101.04383087158203, -97.99678039550781, -94.94973754882812, -91.90269470214844, -88.85564422607422, -85.80860137939453, -82.76155090332031, -79.71450805664062, -76.66746520996094, -73.62042236328125, -70.57337188720703, -67.52632904052734, -64.47927856445312, -61.43223571777344, -58.385189056396484, -55.33814239501953, -52.291099548339844, -49.24405288696289, -46.19700622558594, -43.149959564208984, -40.10291290283203, -37.055870056152344, -34.00882339477539, -30.961776733398438, -27.914731979370117, -24.867687225341797, -21.820648193359375, -18.773601531982422, -15.726556777954102, -12.679511070251465, -9.632465362548828, -6.585419654846191, -3.5383739471435547, -0.4913291931152344, 2.5557174682617188, 5.6027631759643555, 8.649808883666992, 11.696854591369629, 14.743900299072266, 17.79094696044922, 20.83799171447754, 23.88503646850586, 26.932083129882812, 29.979129791259766, 33.02617645263672, 36.073219299316406, 39.12026596069336, 42.16731262207031, 45.21435546875, 48.26140213012695, 51.308448791503906, 54.35549545288086, 57.40254211425781, 60.4495849609375, 63.49663162231445, 66.5436782836914, 69.5907211303711, 72.63777160644531, 75.684814453125]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 1.0, 3.0, 8.0, 8.0, 6.0, 13.0, 8.0, 12.0, 13.0, 16.0, 19.0, 30.0, 18.0, 26.0, 32.0, 40.0, 38.0, 36.0, 33.0, 48.0, 41.0, 27.0, 27.0, 47.0, 42.0, 40.0, 26.0, 50.0, 44.0, 31.0, 33.0, 24.0, 16.0, 28.0, 22.0, 12.0, 17.0, 12.0, 8.0, 12.0, 8.0, 7.0, 6.0, 8.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-31.032493591308594, -29.87095069885254, -28.709407806396484, -27.547866821289062, -26.386323928833008, -25.224781036376953, -24.0632381439209, -22.901695251464844, -21.740154266357422, -20.578611373901367, -19.417068481445312, -18.25552749633789, -17.093984603881836, -15.932441711425781, -14.770898818969727, -13.609356880187988, -12.447813034057617, -11.286270141601562, -10.124728202819824, -8.96318531036377, -7.801642894744873, -6.640100479125977, -5.478557586669922, -4.317015647888184, -3.155472755432129, -1.9939302206039429, -0.8323876857757568, 0.32915496826171875, 1.4906973838806152, 2.6522397994995117, 3.8137826919555664, 4.975324630737305, 6.136867523193359, 7.298409938812256, 8.459952354431152, 9.621495246887207, 10.783037185668945, 11.944580078125, 13.106122970581055, 14.267664909362793, 15.429207801818848, 16.590749740600586, 17.75229263305664, 18.913835525512695, 20.07537841796875, 21.236919403076172, 22.39846420288086, 23.56000518798828, 24.721548080444336, 25.88309097290039, 27.044633865356445, 28.2061767578125, 29.367717742919922, 30.529260635375977, 31.69080352783203, 32.85234451293945, 34.01388931274414, 35.17543029785156, 36.33697509765625, 37.49851608276367, 38.66006088256836, 39.82160186767578, 40.98314666748047, 42.14468765258789, 43.30622863769531]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 7.0, 10.0, 10.0, 17.0, 23.0, 22.0, 23.0, 22.0, 40.0, 49.0, 34.0, 45.0, 53.0, 53.0, 62.0, 38.0, 50.0, 55.0, 51.0, 41.0, 49.0, 43.0, 37.0, 26.0, 23.0, 20.0, 16.0, 21.0, 13.0, 16.0, 9.0, 5.0, 6.0, 3.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3984375, -5.19635009765625, -4.9942626953125, -4.79217529296875, -4.590087890625, -4.38800048828125, -4.1859130859375, -3.98382568359375, -3.78173828125, -3.57965087890625, -3.3775634765625, -3.17547607421875, -2.973388671875, -2.77130126953125, -2.5692138671875, -2.36712646484375, -2.1650390625, -1.96295166015625, -1.7608642578125, -1.55877685546875, -1.356689453125, -1.15460205078125, -0.9525146484375, -0.75042724609375, -0.54833984375, -0.34625244140625, -0.1441650390625, 0.05792236328125, 0.260009765625, 0.46209716796875, 0.6641845703125, 0.86627197265625, 1.068359375, 1.27044677734375, 1.4725341796875, 1.67462158203125, 1.876708984375, 2.07879638671875, 2.2808837890625, 2.48297119140625, 2.68505859375, 2.88714599609375, 3.0892333984375, 3.29132080078125, 3.493408203125, 3.69549560546875, 3.8975830078125, 4.09967041015625, 4.3017578125, 4.50384521484375, 4.7059326171875, 4.90802001953125, 5.110107421875, 5.31219482421875, 5.5142822265625, 5.71636962890625, 5.91845703125, 6.12054443359375, 6.3226318359375, 6.52471923828125, 6.726806640625, 6.92889404296875, 7.1309814453125, 7.33306884765625, 7.53515625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 4.0, 6.0, 4.0, 12.0, 16.0, 23.0, 27.0, 49.0, 75.0, 107.0, 190.0, 289.0, 432.0, 610.0, 983.0, 1531.0, 2238.0, 3490.0, 4872.0, 7490.0, 11368.0, 16852.0, 25797.0, 39578.0, 61426.0, 95631.0, 147396.0, 189593.0, 153337.0, 100035.0, 63772.0, 41387.0, 26777.0, 17890.0, 11686.0, 7831.0, 5283.0, 3629.0, 2390.0, 1632.0, 1004.0, 617.0, 425.0, 277.0, 194.0, 115.0, 82.0, 52.0, 31.0, 10.0, 8.0, 8.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.385498046875, -0.3732795715332031, -0.36106109619140625, -0.3488426208496094, -0.3366241455078125, -0.3244056701660156, -0.31218719482421875, -0.2999687194824219, -0.287750244140625, -0.2755317687988281, -0.26331329345703125, -0.2510948181152344, -0.2388763427734375, -0.22665786743164062, -0.21443939208984375, -0.20222091674804688, -0.19000244140625, -0.17778396606445312, -0.16556549072265625, -0.15334701538085938, -0.1411285400390625, -0.12891006469726562, -0.11669158935546875, -0.10447311401367188, -0.092254638671875, -0.08003616333007812, -0.06781768798828125, -0.055599212646484375, -0.0433807373046875, -0.031162261962890625, -0.01894378662109375, -0.006725311279296875, 0.0054931640625, 0.017711639404296875, 0.02993011474609375, 0.042148590087890625, 0.0543670654296875, 0.06658554077148438, 0.07880401611328125, 0.09102249145507812, 0.103240966796875, 0.11545944213867188, 0.12767791748046875, 0.13989639282226562, 0.1521148681640625, 0.16433334350585938, 0.17655181884765625, 0.18877029418945312, 0.20098876953125, 0.21320724487304688, 0.22542572021484375, 0.23764419555664062, 0.2498626708984375, 0.2620811462402344, 0.27429962158203125, 0.2865180969238281, 0.298736572265625, 0.3109550476074219, 0.32317352294921875, 0.3353919982910156, 0.3476104736328125, 0.3598289489746094, 0.37204742431640625, 0.3842658996582031, 0.396484375]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 8.0, 4.0, 8.0, 11.0, 8.0, 14.0, 16.0, 17.0, 20.0, 21.0, 29.0, 30.0, 28.0, 27.0, 30.0, 28.0, 32.0, 39.0, 34.0, 40.0, 1070.0, 33.0, 52.0, 37.0, 38.0, 32.0, 36.0, 31.0, 40.0, 37.0, 22.0, 21.0, 19.0, 31.0, 16.0, 7.0, 11.0, 7.0, 10.0, 9.0, 5.0, 6.0, 3.0, 3.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0], "bins": [-3.162109375, -3.0589599609375, -2.955810546875, -2.8526611328125, -2.74951171875, -2.6463623046875, -2.543212890625, -2.4400634765625, -2.3369140625, -2.2337646484375, -2.130615234375, -2.0274658203125, -1.92431640625, -1.8211669921875, -1.718017578125, -1.6148681640625, -1.51171875, -1.4085693359375, -1.305419921875, -1.2022705078125, -1.09912109375, -0.9959716796875, -0.892822265625, -0.7896728515625, -0.6865234375, -0.5833740234375, -0.480224609375, -0.3770751953125, -0.27392578125, -0.1707763671875, -0.067626953125, 0.0355224609375, 0.138671875, 0.2418212890625, 0.344970703125, 0.4481201171875, 0.55126953125, 0.6544189453125, 0.757568359375, 0.8607177734375, 0.9638671875, 1.0670166015625, 1.170166015625, 1.2733154296875, 1.37646484375, 1.4796142578125, 1.582763671875, 1.6859130859375, 1.7890625, 1.8922119140625, 1.995361328125, 2.0985107421875, 2.20166015625, 2.3048095703125, 2.407958984375, 2.5111083984375, 2.6142578125, 2.7174072265625, 2.820556640625, 2.9237060546875, 3.02685546875, 3.1300048828125, 3.233154296875, 3.3363037109375, 3.439453125]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 7.0, 10.0, 7.0, 12.0, 26.0, 36.0, 57.0, 62.0, 96.0, 150.0, 234.0, 333.0, 446.0, 734.0, 1195.0, 1797.0, 2792.0, 4224.0, 6683.0, 10502.0, 16838.0, 26446.0, 41941.0, 66105.0, 103386.0, 153481.0, 1232142.0, 150657.0, 100715.0, 64177.0, 40870.0, 25854.0, 16321.0, 10293.0, 6629.0, 4197.0, 2704.0, 1717.0, 1150.0, 690.0, 458.0, 310.0, 220.0, 159.0, 107.0, 55.0, 35.0, 25.0, 20.0, 11.0, 8.0, 6.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.34375, -0.3331642150878906, -0.32257843017578125, -0.3119926452636719, -0.3014068603515625, -0.2908210754394531, -0.28023529052734375, -0.2696495056152344, -0.259063720703125, -0.24847793579101562, -0.23789215087890625, -0.22730636596679688, -0.2167205810546875, -0.20613479614257812, -0.19554901123046875, -0.18496322631835938, -0.17437744140625, -0.16379165649414062, -0.15320587158203125, -0.14262008666992188, -0.1320343017578125, -0.12144851684570312, -0.11086273193359375, -0.10027694702148438, -0.089691162109375, -0.07910537719726562, -0.06851959228515625, -0.057933807373046875, -0.0473480224609375, -0.036762237548828125, -0.02617645263671875, -0.015590667724609375, -0.0050048828125, 0.005580902099609375, 0.01616668701171875, 0.026752471923828125, 0.0373382568359375, 0.047924041748046875, 0.05850982666015625, 0.06909561157226562, 0.079681396484375, 0.09026718139648438, 0.10085296630859375, 0.11143875122070312, 0.1220245361328125, 0.13261032104492188, 0.14319610595703125, 0.15378189086914062, 0.16436767578125, 0.17495346069335938, 0.18553924560546875, 0.19612503051757812, 0.2067108154296875, 0.21729660034179688, 0.22788238525390625, 0.23846817016601562, 0.249053955078125, 0.2596397399902344, 0.27022552490234375, 0.2808113098144531, 0.2913970947265625, 0.3019828796386719, 0.31256866455078125, 0.3231544494628906, 0.333740234375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 5.0, 4.0, 4.0, 5.0, 7.0, 20.0, 21.0, 16.0, 23.0, 36.0, 47.0, 57.0, 55.0, 59.0, 75.0, 70.0, 59.0, 66.0, 57.0, 54.0, 52.0, 48.0, 34.0, 27.0, 20.0, 24.0, 14.0, 12.0, 5.0, 5.0, 6.0, 4.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011997222900390625, -0.0011571943759918213, -0.00111466646194458, -0.0010721385478973389, -0.0010296106338500977, -0.0009870827198028564, -0.0009445548057556152, -0.000902026891708374, -0.0008594989776611328, -0.0008169710636138916, -0.0007744431495666504, -0.0007319152355194092, -0.000689387321472168, -0.0006468594074249268, -0.0006043314933776855, -0.0005618035793304443, -0.0005192756652832031, -0.0004767477512359619, -0.0004342198371887207, -0.0003916919231414795, -0.0003491640090942383, -0.00030663609504699707, -0.00026410818099975586, -0.00022158026695251465, -0.00017905235290527344, -0.00013652443885803223, -9.399652481079102e-05, -5.1468610763549805e-05, -8.940696716308594e-06, 3.358721733093262e-05, 7.611513137817383e-05, 0.00011864304542541504, 0.00016117095947265625, 0.00020369887351989746, 0.00024622678756713867, 0.0002887547016143799, 0.0003312826156616211, 0.0003738105297088623, 0.0004163384437561035, 0.0004588663578033447, 0.0005013942718505859, 0.0005439221858978271, 0.0005864500999450684, 0.0006289780139923096, 0.0006715059280395508, 0.000714033842086792, 0.0007565617561340332, 0.0007990896701812744, 0.0008416175842285156, 0.0008841454982757568, 0.000926673412322998, 0.0009692013263702393, 0.0010117292404174805, 0.0010542571544647217, 0.0010967850685119629, 0.001139312982559204, 0.0011818408966064453, 0.0012243688106536865, 0.0012668967247009277, 0.001309424638748169, 0.0013519525527954102, 0.0013944804668426514, 0.0014370083808898926, 0.0014795362949371338, 0.001522064208984375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 6.0, 6.0, 6.0, 9.0, 5.0, 15.0, 17.0, 35.0, 30.0, 43.0, 66.0, 100.0, 175.0, 335.0, 812.0, 19305.0, 1018849.0, 7211.0, 758.0, 274.0, 144.0, 116.0, 75.0, 40.0, 30.0, 33.0, 15.0, 18.0, 7.0, 5.0, 4.0, 4.0, 4.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.031890869140625, -0.0309906005859375, -0.03009033203125, -0.0291900634765625, -0.028289794921875, -0.0273895263671875, -0.0264892578125, -0.0255889892578125, -0.024688720703125, -0.0237884521484375, -0.02288818359375, -0.0219879150390625, -0.021087646484375, -0.0201873779296875, -0.019287109375, -0.0183868408203125, -0.017486572265625, -0.0165863037109375, -0.01568603515625, -0.0147857666015625, -0.013885498046875, -0.0129852294921875, -0.0120849609375, -0.0111846923828125, -0.010284423828125, -0.0093841552734375, -0.00848388671875, -0.0075836181640625, -0.006683349609375, -0.0057830810546875, -0.0048828125, -0.0039825439453125, -0.003082275390625, -0.0021820068359375, -0.00128173828125, -0.0003814697265625, 0.000518798828125, 0.0014190673828125, 0.0023193359375, 0.0032196044921875, 0.004119873046875, 0.0050201416015625, 0.00592041015625, 0.0068206787109375, 0.007720947265625, 0.0086212158203125, 0.009521484375, 0.0104217529296875, 0.011322021484375, 0.0122222900390625, 0.01312255859375, 0.0140228271484375, 0.014923095703125, 0.0158233642578125, 0.0167236328125, 0.0176239013671875, 0.018524169921875, 0.0194244384765625, 0.02032470703125, 0.0212249755859375, 0.022125244140625, 0.0230255126953125, 0.02392578125, 0.0248260498046875, 0.025726318359375]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 8.0, 12.0, 19.0, 54.0, 90.0, 120.0, 189.0, 155.0, 153.0, 109.0, 55.0, 22.0, 12.0, 10.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0015501226298511028, -0.0015071295201778412, -0.0014641364105045795, -0.001421143300831318, -0.0013781501911580563, -0.0013351570814847946, -0.001292163971811533, -0.0012491707457229495, -0.0012061776360496879, -0.0011631845263764262, -0.0011201914167031646, -0.001077198307029903, -0.0010342051973566413, -0.0009912119712680578, -0.0009482189198024571, -0.0009052257519215345, -0.0008622327004559338, -0.0008192395907826722, -0.0007762464811094105, -0.000733253313228488, -0.0006902602035552263, -0.0006472670938819647, -0.000604273984208703, -0.0005612808745354414, -0.0005182877648621798, -0.0004752946551889181, -0.000432301516411826, -0.00038930840673856437, -0.00034631526796147227, -0.00030332215828821063, -0.000260329048614949, -0.0002173359098378569, -0.0001743427710607648, -0.00013134964683558792, -8.835652988636866e-05, -4.5363412937149405e-05, -2.3702887119725347e-06, 4.0622835513204336e-05, 8.361594518646598e-05, 0.00012660908396355808, 0.00016960219363681972, 0.0002125953178619966, 0.00025558844208717346, 0.0002985815517604351, 0.00034157466143369675, 0.00038456780021078885, 0.0004275609098840505, 0.0004705540486611426, 0.0005135471583344042, 0.0005565402680076659, 0.0005995333776809275, 0.0006425265455618501, 0.0006855196552351117, 0.0007285127649083734, 0.000771505874581635, 0.0008144989842548966, 0.0008574920939281583, 0.0009004852036014199, 0.0009434783132746816, 0.0009864714229479432, 0.0010294645326212049, 0.0010724577587097883, 0.00111545086838305, 0.0011584439780563116, 0.0012014370877295732]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 2.0, 7.0, 3.0, 11.0, 7.0, 12.0, 8.0, 14.0, 11.0, 21.0, 19.0, 24.0, 29.0, 27.0, 27.0, 41.0, 28.0, 41.0, 48.0, 32.0, 36.0, 33.0, 46.0, 31.0, 49.0, 41.0, 37.0, 31.0, 39.0, 22.0, 29.0, 28.0, 20.0, 27.0, 25.0, 20.0, 13.0, 16.0, 4.0, 8.0, 11.0, 7.0, 7.0, 3.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0005868673324584961, -0.000568232499063015, -0.0005495976656675339, -0.0005309628322720528, -0.0005123279988765717, -0.0004936931654810905, -0.00047505833208560944, -0.0004564234986901283, -0.0004377886652946472, -0.0004191538318991661, -0.000400518998503685, -0.0003818841651082039, -0.0003632493317127228, -0.00034461449831724167, -0.00032597966492176056, -0.00030734483152627945, -0.00028870999813079834, -0.00027007516473531723, -0.0002514403313398361, -0.000232805497944355, -0.0002141706645488739, -0.0001955358311533928, -0.00017690099775791168, -0.00015826616436243057, -0.00013963133096694946, -0.00012099649757146835, -0.00010236166417598724, -8.372683078050613e-05, -6.509199738502502e-05, -4.6457163989543915e-05, -2.7822330594062805e-05, -9.187497198581696e-06, 9.447336196899414e-06, 2.8082169592380524e-05, 4.671700298786163e-05, 6.535183638334274e-05, 8.398666977882385e-05, 0.00010262150317430496, 0.00012125633656978607, 0.00013989116996526718, 0.0001585260033607483, 0.0001771608367562294, 0.0001957956701517105, 0.00021443050354719162, 0.00023306533694267273, 0.00025170017033815384, 0.00027033500373363495, 0.00028896983712911606, 0.00030760467052459717, 0.0003262395039200783, 0.0003448743373155594, 0.0003635091707110405, 0.0003821440041065216, 0.0004007788375020027, 0.0004194136708974838, 0.00043804850429296494, 0.00045668333768844604, 0.00047531817108392715, 0.0004939530044794083, 0.0005125878378748894, 0.0005312226712703705, 0.0005498575046658516, 0.0005684923380613327, 0.0005871271714568138, 0.0006057620048522949]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 7.0, 10.0, 10.0, 17.0, 23.0, 22.0, 23.0, 22.0, 40.0, 49.0, 34.0, 45.0, 52.0, 54.0, 62.0, 38.0, 50.0, 55.0, 51.0, 41.0, 49.0, 43.0, 37.0, 26.0, 23.0, 20.0, 16.0, 21.0, 13.0, 16.0, 9.0, 5.0, 6.0, 3.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3984375, -5.19635009765625, -4.9942626953125, -4.79217529296875, -4.590087890625, -4.38800048828125, -4.1859130859375, -3.98382568359375, -3.78173828125, -3.57965087890625, -3.3775634765625, -3.17547607421875, -2.973388671875, -2.77130126953125, -2.5692138671875, -2.36712646484375, -2.1650390625, -1.96295166015625, -1.7608642578125, -1.55877685546875, -1.356689453125, -1.15460205078125, -0.9525146484375, -0.75042724609375, -0.54833984375, -0.34625244140625, -0.1441650390625, 0.05792236328125, 0.260009765625, 0.46209716796875, 0.6641845703125, 0.86627197265625, 1.068359375, 1.27044677734375, 1.4725341796875, 1.67462158203125, 1.876708984375, 2.07879638671875, 2.2808837890625, 2.48297119140625, 2.68505859375, 2.88714599609375, 3.0892333984375, 3.29132080078125, 3.493408203125, 3.69549560546875, 3.8975830078125, 4.09967041015625, 4.3017578125, 4.50384521484375, 4.7059326171875, 4.90802001953125, 5.110107421875, 5.31219482421875, 5.5142822265625, 5.71636962890625, 5.91845703125, 6.12054443359375, 6.3226318359375, 6.52471923828125, 6.726806640625, 6.92889404296875, 7.1309814453125, 7.33306884765625, 7.53515625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 12.0, 10.0, 16.0, 20.0, 19.0, 39.0, 47.0, 81.0, 123.0, 232.0, 339.0, 632.0, 1290.0, 2653.0, 5845.0, 12352.0, 25966.0, 52539.0, 105078.0, 215558.0, 309392.0, 161488.0, 78577.0, 39557.0, 19122.0, 9159.0, 4201.0, 1960.0, 960.0, 488.0, 287.0, 152.0, 118.0, 64.0, 51.0, 23.0, 27.0, 21.0, 9.0, 4.0, 10.0, 5.0, 5.0, 3.0, 6.0, 3.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.85546875, -4.7003173828125, -4.545166015625, -4.3900146484375, -4.23486328125, -4.0797119140625, -3.924560546875, -3.7694091796875, -3.6142578125, -3.4591064453125, -3.303955078125, -3.1488037109375, -2.99365234375, -2.8385009765625, -2.683349609375, -2.5281982421875, -2.373046875, -2.2178955078125, -2.062744140625, -1.9075927734375, -1.75244140625, -1.5972900390625, -1.442138671875, -1.2869873046875, -1.1318359375, -0.9766845703125, -0.821533203125, -0.6663818359375, -0.51123046875, -0.3560791015625, -0.200927734375, -0.0457763671875, 0.109375, 0.2645263671875, 0.419677734375, 0.5748291015625, 0.72998046875, 0.8851318359375, 1.040283203125, 1.1954345703125, 1.3505859375, 1.5057373046875, 1.660888671875, 1.8160400390625, 1.97119140625, 2.1263427734375, 2.281494140625, 2.4366455078125, 2.591796875, 2.7469482421875, 2.902099609375, 3.0572509765625, 3.21240234375, 3.3675537109375, 3.522705078125, 3.6778564453125, 3.8330078125, 3.9881591796875, 4.143310546875, 4.2984619140625, 4.45361328125, 4.6087646484375, 4.763916015625, 4.9190673828125, 5.07421875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 6.0, 5.0, 3.0, 5.0, 7.0, 17.0, 12.0, 10.0, 23.0, 12.0, 13.0, 13.0, 21.0, 26.0, 31.0, 32.0, 27.0, 33.0, 38.0, 46.0, 65.0, 141.0, 314.0, 1400.0, 213.0, 84.0, 69.0, 49.0, 38.0, 30.0, 28.0, 18.0, 20.0, 24.0, 27.0, 24.0, 26.0, 18.0, 15.0, 13.0, 13.0, 7.0, 8.0, 5.0, 4.0, 5.0, 2.0, 4.0, 5.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0], "bins": [-16.015625, -15.517822265625, -15.02001953125, -14.522216796875, -14.0244140625, -13.526611328125, -13.02880859375, -12.531005859375, -12.033203125, -11.535400390625, -11.03759765625, -10.539794921875, -10.0419921875, -9.544189453125, -9.04638671875, -8.548583984375, -8.05078125, -7.552978515625, -7.05517578125, -6.557373046875, -6.0595703125, -5.561767578125, -5.06396484375, -4.566162109375, -4.068359375, -3.570556640625, -3.07275390625, -2.574951171875, -2.0771484375, -1.579345703125, -1.08154296875, -0.583740234375, -0.0859375, 0.411865234375, 0.90966796875, 1.407470703125, 1.9052734375, 2.403076171875, 2.90087890625, 3.398681640625, 3.896484375, 4.394287109375, 4.89208984375, 5.389892578125, 5.8876953125, 6.385498046875, 6.88330078125, 7.381103515625, 7.87890625, 8.376708984375, 8.87451171875, 9.372314453125, 9.8701171875, 10.367919921875, 10.86572265625, 11.363525390625, 11.861328125, 12.359130859375, 12.85693359375, 13.354736328125, 13.8525390625, 14.350341796875, 14.84814453125, 15.345947265625, 15.84375]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 5.0, 4.0, 9.0, 10.0, 3.0, 9.0, 8.0, 24.0, 23.0, 25.0, 39.0, 51.0, 68.0, 94.0, 137.0, 163.0, 289.0, 550.0, 2160.0, 1708812.0, 1429722.0, 2092.0, 543.0, 273.0, 175.0, 113.0, 68.0, 57.0, 53.0, 36.0, 27.0, 18.0, 13.0, 16.0, 8.0, 5.0, 7.0, 6.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.8125, -44.861328125, -42.91015625, -40.958984375, -39.0078125, -37.056640625, -35.10546875, -33.154296875, -31.203125, -29.251953125, -27.30078125, -25.349609375, -23.3984375, -21.447265625, -19.49609375, -17.544921875, -15.59375, -13.642578125, -11.69140625, -9.740234375, -7.7890625, -5.837890625, -3.88671875, -1.935546875, 0.015625, 1.966796875, 3.91796875, 5.869140625, 7.8203125, 9.771484375, 11.72265625, 13.673828125, 15.625, 17.576171875, 19.52734375, 21.478515625, 23.4296875, 25.380859375, 27.33203125, 29.283203125, 31.234375, 33.185546875, 35.13671875, 37.087890625, 39.0390625, 40.990234375, 42.94140625, 44.892578125, 46.84375, 48.794921875, 50.74609375, 52.697265625, 54.6484375, 56.599609375, 58.55078125, 60.501953125, 62.453125, 64.404296875, 66.35546875, 68.306640625, 70.2578125, 72.208984375, 74.16015625, 76.111328125, 78.0625]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 99.0, 563.0, 328.0, 22.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-266.1383972167969, -261.0935363769531, -256.04864501953125, -251.00376892089844, -245.95889282226562, -240.9140167236328, -235.869140625, -230.82427978515625, -225.77938842773438, -220.73451232910156, -215.68963623046875, -210.64476013183594, -205.59988403320312, -200.5550079345703, -195.5101318359375, -190.46527099609375, -185.42039489746094, -180.37551879882812, -175.3306427001953, -170.2857666015625, -165.2408905029297, -160.19601440429688, -155.15113830566406, -150.10626220703125, -145.0614013671875, -140.0165252685547, -134.97164916992188, -129.92677307128906, -124.88189697265625, -119.83702087402344, -114.79215240478516, -109.74727630615234, -104.70240020751953, -99.65752410888672, -94.6126480102539, -89.5677719116211, -84.52290344238281, -79.47802734375, -74.43315124511719, -69.38827514648438, -64.34339904785156, -59.29852294921875, -54.25364685058594, -49.20877456665039, -44.16389846801758, -39.119022369384766, -34.07415008544922, -29.029273986816406, -23.984397888183594, -18.93952178955078, -13.894647598266602, -8.849772453308105, -3.8048973083496094, 1.2399787902832031, 6.284852981567383, 11.329727172851562, 16.374603271484375, 21.419479370117188, 26.464353561401367, 31.509227752685547, 36.55410385131836, 41.59897994995117, 46.64385223388672, 51.68872833251953, 56.733604431152344]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 4.0, 6.0, 4.0, 5.0, 11.0, 9.0, 11.0, 17.0, 13.0, 19.0, 16.0, 18.0, 22.0, 19.0, 23.0, 26.0, 34.0, 39.0, 53.0, 39.0, 35.0, 47.0, 36.0, 38.0, 36.0, 39.0, 53.0, 48.0, 23.0, 41.0, 31.0, 20.0, 19.0, 19.0, 25.0, 17.0, 14.0, 13.0, 11.0, 13.0, 8.0, 7.0, 3.0, 5.0, 5.0, 1.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-53.213050842285156, -51.61225891113281, -50.01146697998047, -48.41067886352539, -46.80988693237305, -45.2090950012207, -43.608306884765625, -42.00751495361328, -40.40672302246094, -38.805931091308594, -37.20513916015625, -35.60435104370117, -34.00355911254883, -32.402767181396484, -30.801977157592773, -29.201187133789062, -27.60039520263672, -25.999603271484375, -24.398813247680664, -22.798023223876953, -21.19723129272461, -19.596439361572266, -17.995649337768555, -16.394859313964844, -14.7940673828125, -13.193276405334473, -11.592485427856445, -9.991694450378418, -8.39090347290039, -6.790112495422363, -5.189321517944336, -3.5885305404663086, -1.9877395629882812, -0.3869485855102539, 1.2138423919677734, 2.814633369445801, 4.415424346923828, 6.0162153244018555, 7.617006301879883, 9.21779727935791, 10.818588256835938, 12.419379234313965, 14.020170211791992, 15.62096118927002, 17.221752166748047, 18.82254409790039, 20.4233341217041, 22.024124145507812, 23.624916076660156, 25.2257080078125, 26.82649803161621, 28.427288055419922, 30.028079986572266, 31.62887191772461, 33.22966003417969, 34.83045196533203, 36.431243896484375, 38.03203582763672, 39.63282775878906, 41.23361587524414, 42.834407806396484, 44.43519973754883, 46.035987854003906, 47.63677978515625, 49.237571716308594]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0, 5.0, 8.0, 11.0, 14.0, 17.0, 17.0, 15.0, 26.0, 30.0, 28.0, 24.0, 40.0, 43.0, 43.0, 44.0, 42.0, 46.0, 58.0, 40.0, 46.0, 63.0, 47.0, 45.0, 33.0, 37.0, 31.0, 19.0, 20.0, 23.0, 21.0, 14.0, 14.0, 13.0, 7.0, 8.0, 4.0, 7.0, 3.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.97265625, -5.75830078125, -5.5439453125, -5.32958984375, -5.115234375, -4.90087890625, -4.6865234375, -4.47216796875, -4.2578125, -4.04345703125, -3.8291015625, -3.61474609375, -3.400390625, -3.18603515625, -2.9716796875, -2.75732421875, -2.54296875, -2.32861328125, -2.1142578125, -1.89990234375, -1.685546875, -1.47119140625, -1.2568359375, -1.04248046875, -0.828125, -0.61376953125, -0.3994140625, -0.18505859375, 0.029296875, 0.24365234375, 0.4580078125, 0.67236328125, 0.88671875, 1.10107421875, 1.3154296875, 1.52978515625, 1.744140625, 1.95849609375, 2.1728515625, 2.38720703125, 2.6015625, 2.81591796875, 3.0302734375, 3.24462890625, 3.458984375, 3.67333984375, 3.8876953125, 4.10205078125, 4.31640625, 4.53076171875, 4.7451171875, 4.95947265625, 5.173828125, 5.38818359375, 5.6025390625, 5.81689453125, 6.03125, 6.24560546875, 6.4599609375, 6.67431640625, 6.888671875, 7.10302734375, 7.3173828125, 7.53173828125, 7.74609375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 7.0, 3.0, 5.0, 8.0, 11.0, 15.0, 13.0, 17.0, 31.0, 29.0, 35.0, 53.0, 47.0, 105.0, 241.0, 580.0, 2709.0, 46087.0, 1527712.0, 2497631.0, 112881.0, 4543.0, 860.0, 264.0, 122.0, 61.0, 52.0, 18.0, 28.0, 23.0, 21.0, 16.0, 11.0, 11.0, 10.0, 8.0, 9.0, 5.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.90625, -17.26953125, -16.6328125, -15.99609375, -15.359375, -14.72265625, -14.0859375, -13.44921875, -12.8125, -12.17578125, -11.5390625, -10.90234375, -10.265625, -9.62890625, -8.9921875, -8.35546875, -7.71875, -7.08203125, -6.4453125, -5.80859375, -5.171875, -4.53515625, -3.8984375, -3.26171875, -2.625, -1.98828125, -1.3515625, -0.71484375, -0.078125, 0.55859375, 1.1953125, 1.83203125, 2.46875, 3.10546875, 3.7421875, 4.37890625, 5.015625, 5.65234375, 6.2890625, 6.92578125, 7.5625, 8.19921875, 8.8359375, 9.47265625, 10.109375, 10.74609375, 11.3828125, 12.01953125, 12.65625, 13.29296875, 13.9296875, 14.56640625, 15.203125, 15.83984375, 16.4765625, 17.11328125, 17.75, 18.38671875, 19.0234375, 19.66015625, 20.296875, 20.93359375, 21.5703125, 22.20703125, 22.84375]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 8.0, 6.0, 9.0, 15.0, 26.0, 27.0, 47.0, 67.0, 119.0, 191.0, 287.0, 382.0, 505.0, 599.0, 532.0, 411.0, 275.0, 209.0, 145.0, 93.0, 45.0, 34.0, 16.0, 19.0, 6.0, 4.0, 5.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.890625, -9.356689453125, -8.82275390625, -8.288818359375, -7.7548828125, -7.220947265625, -6.68701171875, -6.153076171875, -5.619140625, -5.085205078125, -4.55126953125, -4.017333984375, -3.4833984375, -2.949462890625, -2.41552734375, -1.881591796875, -1.34765625, -0.813720703125, -0.27978515625, 0.254150390625, 0.7880859375, 1.322021484375, 1.85595703125, 2.389892578125, 2.923828125, 3.457763671875, 3.99169921875, 4.525634765625, 5.0595703125, 5.593505859375, 6.12744140625, 6.661376953125, 7.1953125, 7.729248046875, 8.26318359375, 8.797119140625, 9.3310546875, 9.864990234375, 10.39892578125, 10.932861328125, 11.466796875, 12.000732421875, 12.53466796875, 13.068603515625, 13.6025390625, 14.136474609375, 14.67041015625, 15.204345703125, 15.73828125, 16.272216796875, 16.80615234375, 17.340087890625, 17.8740234375, 18.407958984375, 18.94189453125, 19.475830078125, 20.009765625, 20.543701171875, 21.07763671875, 21.611572265625, 22.1455078125, 22.679443359375, 23.21337890625, 23.747314453125, 24.28125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 5.0, 11.0, 25.0, 23.0, 42.0, 72.0, 127.0, 212.0, 285.0, 520.0, 1786.0, 320612.0, 3854938.0, 13749.0, 869.0, 373.0, 257.0, 148.0, 79.0, 51.0, 29.0, 29.0, 19.0, 13.0, 5.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-80.9375, -79.1494140625, -77.361328125, -75.5732421875, -73.78515625, -71.9970703125, -70.208984375, -68.4208984375, -66.6328125, -64.8447265625, -63.056640625, -61.2685546875, -59.48046875, -57.6923828125, -55.904296875, -54.1162109375, -52.328125, -50.5400390625, -48.751953125, -46.9638671875, -45.17578125, -43.3876953125, -41.599609375, -39.8115234375, -38.0234375, -36.2353515625, -34.447265625, -32.6591796875, -30.87109375, -29.0830078125, -27.294921875, -25.5068359375, -23.71875, -21.9306640625, -20.142578125, -18.3544921875, -16.56640625, -14.7783203125, -12.990234375, -11.2021484375, -9.4140625, -7.6259765625, -5.837890625, -4.0498046875, -2.26171875, -0.4736328125, 1.314453125, 3.1025390625, 4.890625, 6.6787109375, 8.466796875, 10.2548828125, 12.04296875, 13.8310546875, 15.619140625, 17.4072265625, 19.1953125, 20.9833984375, 22.771484375, 24.5595703125, 26.34765625, 28.1357421875, 29.923828125, 31.7119140625, 33.5]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 11.0, 56.0, 237.0, 398.0, 237.0, 64.0, 12.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-129.09307861328125, -123.33033752441406, -117.5676040649414, -111.80486297607422, -106.04212951660156, -100.27938842773438, -94.51664733886719, -88.75391387939453, -82.99118041992188, -77.22843933105469, -71.46570587158203, -65.70296478271484, -59.94023132324219, -54.177490234375, -48.41475296020508, -42.652015686035156, -36.88927459716797, -31.126537322998047, -25.363800048828125, -19.60106086730957, -13.838323593139648, -8.075586318969727, -2.312847137451172, 3.44989013671875, 9.212627410888672, 14.975364685058594, 20.738101959228516, 26.50084114074707, 32.263580322265625, 38.02631378173828, 43.78905487060547, 49.55179214477539, 55.31452941894531, 61.077266693115234, 66.84000396728516, 72.60274505615234, 78.365478515625, 84.12821960449219, 89.89096069335938, 95.65369415283203, 101.41642761230469, 107.17916870117188, 112.94190216064453, 118.70464324951172, 124.46737670898438, 130.23011779785156, 135.99285888671875, 141.75558471679688, 147.51834106445312, 153.2810821533203, 159.0438232421875, 164.80654907226562, 170.5692901611328, 176.33203125, 182.0947723388672, 187.85751342773438, 193.6202392578125, 199.3829803466797, 205.14572143554688, 210.908447265625, 216.6711883544922, 222.43392944335938, 228.19667053222656, 233.95941162109375, 239.72213745117188]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 6.0, 3.0, 4.0, 1.0, 8.0, 8.0, 9.0, 9.0, 9.0, 19.0, 12.0, 31.0, 32.0, 28.0, 32.0, 33.0, 51.0, 33.0, 39.0, 50.0, 41.0, 31.0, 37.0, 45.0, 38.0, 47.0, 34.0, 48.0, 36.0, 29.0, 33.0, 19.0, 31.0, 13.0, 16.0, 20.0, 18.0, 16.0, 6.0, 12.0, 4.0, 7.0, 2.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.532196044921875, -39.180660247802734, -37.82912063598633, -36.47758483886719, -35.12604904174805, -33.774513244628906, -32.4229736328125, -31.07143783569336, -29.719900131225586, -28.368362426757812, -27.016826629638672, -25.6652889251709, -24.313751220703125, -22.962215423583984, -21.61067771911621, -20.259140014648438, -18.907604217529297, -17.556066513061523, -16.204530715942383, -14.85299301147461, -13.501456260681152, -12.149919509887695, -10.798381805419922, -9.446845054626465, -8.095308303833008, -6.743771553039551, -5.3922343254089355, -4.04069709777832, -2.6891603469848633, -1.3376235961914062, 0.013914108276367188, 1.3654508590698242, 2.7169837951660156, 4.068520545959473, 5.420057773590088, 6.771595001220703, 8.12313175201416, 9.474668502807617, 10.82620620727539, 12.177742958068848, 13.529279708862305, 14.880816459655762, 16.23235321044922, 17.583890914916992, 18.935428619384766, 20.286964416503906, 21.63850212097168, 22.990039825439453, 24.341575622558594, 25.693113327026367, 27.044649124145508, 28.39618682861328, 29.747722625732422, 31.099260330200195, 32.45079803466797, 33.80233383178711, 35.15386962890625, 36.50540542602539, 37.8569450378418, 39.20848083496094, 40.56001663208008, 41.91155242919922, 43.263092041015625, 44.614627838134766, 45.96616744995117]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 8.0, 8.0, 16.0, 9.0, 14.0, 28.0, 20.0, 25.0, 30.0, 32.0, 24.0, 45.0, 39.0, 44.0, 49.0, 44.0, 39.0, 54.0, 58.0, 56.0, 50.0, 51.0, 32.0, 29.0, 30.0, 19.0, 33.0, 21.0, 28.0, 13.0, 12.0, 10.0, 8.0, 9.0, 9.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.390625, -5.1854248046875, -4.980224609375, -4.7750244140625, -4.56982421875, -4.3646240234375, -4.159423828125, -3.9542236328125, -3.7490234375, -3.5438232421875, -3.338623046875, -3.1334228515625, -2.92822265625, -2.7230224609375, -2.517822265625, -2.3126220703125, -2.107421875, -1.9022216796875, -1.697021484375, -1.4918212890625, -1.28662109375, -1.0814208984375, -0.876220703125, -0.6710205078125, -0.4658203125, -0.2606201171875, -0.055419921875, 0.1497802734375, 0.35498046875, 0.5601806640625, 0.765380859375, 0.9705810546875, 1.17578125, 1.3809814453125, 1.586181640625, 1.7913818359375, 1.99658203125, 2.2017822265625, 2.406982421875, 2.6121826171875, 2.8173828125, 3.0225830078125, 3.227783203125, 3.4329833984375, 3.63818359375, 3.8433837890625, 4.048583984375, 4.2537841796875, 4.458984375, 4.6641845703125, 4.869384765625, 5.0745849609375, 5.27978515625, 5.4849853515625, 5.690185546875, 5.8953857421875, 6.1005859375, 6.3057861328125, 6.510986328125, 6.7161865234375, 6.92138671875, 7.1265869140625, 7.331787109375, 7.5369873046875, 7.7421875]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 8.0, 18.0, 10.0, 17.0, 30.0, 42.0, 57.0, 94.0, 105.0, 183.0, 267.0, 386.0, 584.0, 902.0, 1338.0, 2002.0, 2987.0, 4740.0, 7309.0, 11482.0, 17703.0, 28207.0, 44224.0, 69633.0, 107740.0, 163500.0, 191305.0, 140936.0, 90954.0, 58243.0, 37265.0, 23647.0, 15017.0, 9546.0, 6231.0, 3957.0, 2648.0, 1691.0, 1227.0, 759.0, 509.0, 331.0, 237.0, 159.0, 102.0, 68.0, 60.0, 38.0, 20.0, 17.0, 13.0, 8.0, 4.0, 4.0, 1.0, 0.0, 2.0], "bins": [-0.43798828125, -0.4248390197753906, -0.41168975830078125, -0.3985404968261719, -0.3853912353515625, -0.3722419738769531, -0.35909271240234375, -0.3459434509277344, -0.332794189453125, -0.3196449279785156, -0.30649566650390625, -0.2933464050292969, -0.2801971435546875, -0.2670478820800781, -0.25389862060546875, -0.24074935913085938, -0.22760009765625, -0.21445083618164062, -0.20130157470703125, -0.18815231323242188, -0.1750030517578125, -0.16185379028320312, -0.14870452880859375, -0.13555526733398438, -0.122406005859375, -0.10925674438476562, -0.09610748291015625, -0.08295822143554688, -0.0698089599609375, -0.056659698486328125, -0.04351043701171875, -0.030361175537109375, -0.0172119140625, -0.004062652587890625, 0.00908660888671875, 0.022235870361328125, 0.0353851318359375, 0.048534393310546875, 0.06168365478515625, 0.07483291625976562, 0.087982177734375, 0.10113143920898438, 0.11428070068359375, 0.12742996215820312, 0.1405792236328125, 0.15372848510742188, 0.16687774658203125, 0.18002700805664062, 0.19317626953125, 0.20632553100585938, 0.21947479248046875, 0.23262405395507812, 0.2457733154296875, 0.2589225769042969, 0.27207183837890625, 0.2852210998535156, 0.298370361328125, 0.3115196228027344, 0.32466888427734375, 0.3378181457519531, 0.3509674072265625, 0.3641166687011719, 0.37726593017578125, 0.3904151916503906, 0.403564453125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 6.0, 4.0, 8.0, 12.0, 10.0, 16.0, 20.0, 27.0, 19.0, 26.0, 29.0, 38.0, 34.0, 32.0, 40.0, 30.0, 42.0, 48.0, 47.0, 1068.0, 33.0, 36.0, 46.0, 39.0, 44.0, 40.0, 26.0, 31.0, 26.0, 19.0, 23.0, 17.0, 16.0, 10.0, 13.0, 8.0, 4.0, 4.0, 13.0, 8.0, 5.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.9140625, -3.798187255859375, -3.68231201171875, -3.566436767578125, -3.4505615234375, -3.334686279296875, -3.21881103515625, -3.102935791015625, -2.987060546875, -2.871185302734375, -2.75531005859375, -2.639434814453125, -2.5235595703125, -2.407684326171875, -2.29180908203125, -2.175933837890625, -2.06005859375, -1.944183349609375, -1.82830810546875, -1.712432861328125, -1.5965576171875, -1.480682373046875, -1.36480712890625, -1.248931884765625, -1.133056640625, -1.017181396484375, -0.90130615234375, -0.785430908203125, -0.6695556640625, -0.553680419921875, -0.43780517578125, -0.321929931640625, -0.2060546875, -0.090179443359375, 0.02569580078125, 0.141571044921875, 0.2574462890625, 0.373321533203125, 0.48919677734375, 0.605072021484375, 0.720947265625, 0.836822509765625, 0.95269775390625, 1.068572998046875, 1.1844482421875, 1.300323486328125, 1.41619873046875, 1.532073974609375, 1.64794921875, 1.763824462890625, 1.87969970703125, 1.995574951171875, 2.1114501953125, 2.227325439453125, 2.34320068359375, 2.459075927734375, 2.574951171875, 2.690826416015625, 2.80670166015625, 2.922576904296875, 3.0384521484375, 3.154327392578125, 3.27020263671875, 3.386077880859375, 3.501953125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 6.0, 7.0, 8.0, 9.0, 23.0, 29.0, 59.0, 72.0, 114.0, 177.0, 253.0, 407.0, 647.0, 977.0, 1437.0, 2273.0, 3567.0, 5518.0, 8478.0, 12817.0, 19913.0, 30633.0, 46954.0, 71823.0, 108964.0, 151333.0, 1216983.0, 138771.0, 96053.0, 62833.0, 40628.0, 26457.0, 17178.0, 11107.0, 7210.0, 4696.0, 3120.0, 1983.0, 1235.0, 878.0, 546.0, 331.0, 224.0, 148.0, 82.0, 63.0, 52.0, 20.0, 14.0, 7.0, 8.0, 9.0, 2.0, 5.0, 1.0, 2.0], "bins": [-0.37451171875, -0.3636054992675781, -0.35269927978515625, -0.3417930603027344, -0.3308868408203125, -0.3199806213378906, -0.30907440185546875, -0.2981681823730469, -0.287261962890625, -0.2763557434082031, -0.26544952392578125, -0.2545433044433594, -0.2436370849609375, -0.23273086547851562, -0.22182464599609375, -0.21091842651367188, -0.20001220703125, -0.18910598754882812, -0.17819976806640625, -0.16729354858398438, -0.1563873291015625, -0.14548110961914062, -0.13457489013671875, -0.12366867065429688, -0.112762451171875, -0.10185623168945312, -0.09095001220703125, -0.08004379272460938, -0.0691375732421875, -0.058231353759765625, -0.04732513427734375, -0.036418914794921875, -0.0255126953125, -0.014606475830078125, -0.00370025634765625, 0.007205963134765625, 0.0181121826171875, 0.029018402099609375, 0.03992462158203125, 0.050830841064453125, 0.061737060546875, 0.07264328002929688, 0.08354949951171875, 0.09445571899414062, 0.1053619384765625, 0.11626815795898438, 0.12717437744140625, 0.13808059692382812, 0.14898681640625, 0.15989303588867188, 0.17079925537109375, 0.18170547485351562, 0.1926116943359375, 0.20351791381835938, 0.21442413330078125, 0.22533035278320312, 0.236236572265625, 0.24714279174804688, 0.25804901123046875, 0.2689552307128906, 0.2798614501953125, 0.2907676696777344, 0.30167388916015625, 0.3125801086425781, 0.323486328125]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 4.0, 7.0, 7.0, 10.0, 10.0, 20.0, 15.0, 26.0, 28.0, 32.0, 44.0, 46.0, 52.0, 58.0, 68.0, 60.0, 59.0, 61.0, 61.0, 59.0, 48.0, 53.0, 46.0, 30.0, 25.0, 16.0, 20.0, 13.0, 9.0, 8.0, 9.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013570785522460938, -0.0013088881969451904, -0.0012606978416442871, -0.0012125074863433838, -0.0011643171310424805, -0.0011161267757415771, -0.0010679364204406738, -0.0010197460651397705, -0.0009715557098388672, -0.0009233653545379639, -0.0008751749992370605, -0.0008269846439361572, -0.0007787942886352539, -0.0007306039333343506, -0.0006824135780334473, -0.0006342232227325439, -0.0005860328674316406, -0.0005378425121307373, -0.000489652156829834, -0.00044146180152893066, -0.00039327144622802734, -0.000345081090927124, -0.0002968907356262207, -0.0002487003803253174, -0.00020051002502441406, -0.00015231966972351074, -0.00010412931442260742, -5.59389591217041e-05, -7.748603820800781e-06, 4.044175148010254e-05, 8.863210678100586e-05, 0.00013682246208190918, 0.0001850128173828125, 0.00023320317268371582, 0.00028139352798461914, 0.00032958388328552246, 0.0003777742385864258, 0.0004259645938873291, 0.0004741549491882324, 0.0005223453044891357, 0.0005705356597900391, 0.0006187260150909424, 0.0006669163703918457, 0.000715106725692749, 0.0007632970809936523, 0.0008114874362945557, 0.000859677791595459, 0.0009078681468963623, 0.0009560585021972656, 0.001004248857498169, 0.0010524392127990723, 0.0011006295680999756, 0.001148819923400879, 0.0011970102787017822, 0.0012452006340026855, 0.0012933909893035889, 0.0013415813446044922, 0.0013897716999053955, 0.0014379620552062988, 0.0014861524105072021, 0.0015343427658081055, 0.0015825331211090088, 0.0016307234764099121, 0.0016789138317108154, 0.0017271041870117188]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 2.0, 8.0, 16.0, 20.0, 17.0, 31.0, 32.0, 56.0, 85.0, 109.0, 171.0, 373.0, 1184.0, 462298.0, 581960.0, 1230.0, 416.0, 169.0, 102.0, 66.0, 63.0, 44.0, 27.0, 17.0, 18.0, 11.0, 6.0, 10.0, 4.0, 7.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.03790283203125, -0.036820411682128906, -0.03573799133300781, -0.03465557098388672, -0.033573150634765625, -0.03249073028564453, -0.03140830993652344, -0.030325889587402344, -0.02924346923828125, -0.028161048889160156, -0.027078628540039062, -0.02599620819091797, -0.024913787841796875, -0.02383136749267578, -0.022748947143554688, -0.021666526794433594, -0.0205841064453125, -0.019501686096191406, -0.018419265747070312, -0.01733684539794922, -0.016254425048828125, -0.015172004699707031, -0.014089584350585938, -0.013007164001464844, -0.01192474365234375, -0.010842323303222656, -0.009759902954101562, -0.008677482604980469, -0.007595062255859375, -0.006512641906738281, -0.0054302215576171875, -0.004347801208496094, -0.003265380859375, -0.0021829605102539062, -0.0011005401611328125, -1.811981201171875e-05, 0.001064300537109375, 0.0021467208862304688, 0.0032291412353515625, 0.004311561584472656, 0.00539398193359375, 0.006476402282714844, 0.0075588226318359375, 0.008641242980957031, 0.009723663330078125, 0.010806083679199219, 0.011888504028320312, 0.012970924377441406, 0.0140533447265625, 0.015135765075683594, 0.016218185424804688, 0.01730060577392578, 0.018383026123046875, 0.01946544647216797, 0.020547866821289062, 0.021630287170410156, 0.02271270751953125, 0.023795127868652344, 0.024877548217773438, 0.02595996856689453, 0.027042388916015625, 0.02812480926513672, 0.029207229614257812, 0.030289649963378906, 0.0313720703125]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 83.0, 316.0, 414.0, 163.0, 24.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0012022546725347638, -0.0010945876128971577, -0.0009869205532595515, -0.0008792535518296063, -0.0007715865503996611, -0.0006639194907620549, -0.0005562524311244488, -0.00044858542969450355, -0.0003409183700568974, -0.00023325133952312171, -0.0001255842944374308, -1.7917249351739883e-05, 8.97497811820358e-05, 0.0001974168117158115, 0.00030508387135341763, 0.00041275087278336287, 0.000520417932420969, 0.0006280849920585752, 0.0007357519934885204, 0.0008434190531261265, 0.0009510860545560718, 0.001058753114193678, 0.001166420173831284, 0.0012740872334688902, 0.0013817541766911745, 0.0014894212363287807, 0.0015970882959663868, 0.0017047552391886711, 0.0018124222988262773, 0.0019200893584638834, 0.0020277565345168114, 0.0021354234777390957, 0.0022430906537920237, 0.002350757597014308, 0.002458424773067236, 0.0025660917162895203, 0.0026737588923424482, 0.0027814258355647326, 0.0028890930116176605, 0.002996759954839945, 0.003104426898062229, 0.0032120938412845135, 0.0033197610173374414, 0.0034274279605597258, 0.0035350951366126537, 0.003642762079834938, 0.0037504290230572224, 0.0038580961991101503, 0.003965763375163078, 0.004073430318385363, 0.004181097261607647, 0.004288764670491219, 0.004396431613713503, 0.004504098556935787, 0.0046117655001580715, 0.004719432443380356, 0.00482709938660264, 0.0049347663298249245, 0.005042433273047209, 0.00515010068193078, 0.005257767625153065, 0.005365434568375349, 0.005473101511597633, 0.005580768454819918, 0.005688435863703489]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 1.0, 8.0, 3.0, 15.0, 7.0, 10.0, 13.0, 15.0, 18.0, 24.0, 26.0, 33.0, 21.0, 38.0, 22.0, 35.0, 37.0, 36.0, 35.0, 42.0, 45.0, 44.0, 45.0, 32.0, 50.0, 38.0, 25.0, 40.0, 25.0, 40.0, 21.0, 24.0, 25.0, 8.0, 15.0, 13.0, 20.0, 19.0, 7.0, 8.0, 4.0, 0.0, 2.0, 6.0, 2.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000577092170715332, -0.0005559744313359261, -0.0005348566919565201, -0.0005137389525771141, -0.0004926212131977081, -0.00047150347381830215, -0.0004503857344388962, -0.0004292679950594902, -0.00040815025568008423, -0.00038703251630067825, -0.0003659147769212723, -0.0003447970375418663, -0.00032367929816246033, -0.00030256155878305435, -0.0002814438194036484, -0.0002603260800242424, -0.00023920834064483643, -0.00021809060126543045, -0.00019697286188602448, -0.0001758551225066185, -0.00015473738312721252, -0.00013361964374780655, -0.00011250190436840057, -9.13841649889946e-05, -7.026642560958862e-05, -4.914868623018265e-05, -2.8030946850776672e-05, -6.913207471370697e-06, 1.4204531908035278e-05, 3.5322271287441254e-05, 5.644001066684723e-05, 7.75577500462532e-05, 9.867548942565918e-05, 0.00011979322880506516, 0.00014091096818447113, 0.0001620287075638771, 0.00018314644694328308, 0.00020426418632268906, 0.00022538192570209503, 0.000246499665081501, 0.000267617404460907, 0.00028873514384031296, 0.00030985288321971893, 0.0003309706225991249, 0.0003520883619785309, 0.00037320610135793686, 0.00039432384073734283, 0.0004154415801167488, 0.0004365593194961548, 0.00045767705887556076, 0.00047879479825496674, 0.0004999125376343727, 0.0005210302770137787, 0.0005421480163931847, 0.0005632657557725906, 0.0005843834951519966, 0.0006055012345314026, 0.0006266189739108086, 0.0006477367132902145, 0.0006688544526696205, 0.0006899721920490265, 0.0007110899314284325, 0.0007322076708078384, 0.0007533254101872444, 0.0007744431495666504]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 8.0, 8.0, 16.0, 9.0, 14.0, 28.0, 20.0, 25.0, 30.0, 32.0, 24.0, 45.0, 39.0, 44.0, 49.0, 44.0, 39.0, 54.0, 58.0, 56.0, 50.0, 51.0, 32.0, 29.0, 30.0, 19.0, 33.0, 21.0, 28.0, 13.0, 12.0, 10.0, 8.0, 9.0, 9.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.390625, -5.1854248046875, -4.980224609375, -4.7750244140625, -4.56982421875, -4.3646240234375, -4.159423828125, -3.9542236328125, -3.7490234375, -3.5438232421875, -3.338623046875, -3.1334228515625, -2.92822265625, -2.7230224609375, -2.517822265625, -2.3126220703125, -2.107421875, -1.9022216796875, -1.697021484375, -1.4918212890625, -1.28662109375, -1.0814208984375, -0.876220703125, -0.6710205078125, -0.4658203125, -0.2606201171875, -0.055419921875, 0.1497802734375, 0.35498046875, 0.5601806640625, 0.765380859375, 0.9705810546875, 1.17578125, 1.3809814453125, 1.586181640625, 1.7913818359375, 1.99658203125, 2.2017822265625, 2.406982421875, 2.6121826171875, 2.8173828125, 3.0225830078125, 3.227783203125, 3.4329833984375, 3.63818359375, 3.8433837890625, 4.048583984375, 4.2537841796875, 4.458984375, 4.6641845703125, 4.869384765625, 5.0745849609375, 5.27978515625, 5.4849853515625, 5.690185546875, 5.8953857421875, 6.1005859375, 6.3057861328125, 6.510986328125, 6.7161865234375, 6.92138671875, 7.1265869140625, 7.331787109375, 7.5369873046875, 7.7421875]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 6.0, 4.0, 5.0, 13.0, 17.0, 15.0, 25.0, 48.0, 66.0, 116.0, 169.0, 225.0, 399.0, 689.0, 1142.0, 2262.0, 4514.0, 9722.0, 21865.0, 52206.0, 129725.0, 300145.0, 300907.0, 129755.0, 53081.0, 22018.0, 9714.0, 4535.0, 2275.0, 1189.0, 609.0, 398.0, 234.0, 135.0, 107.0, 82.0, 31.0, 33.0, 22.0, 9.0, 16.0, 8.0, 6.0, 3.0, 4.0, 10.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-5.14453125, -4.9608154296875, -4.777099609375, -4.5933837890625, -4.40966796875, -4.2259521484375, -4.042236328125, -3.8585205078125, -3.6748046875, -3.4910888671875, -3.307373046875, -3.1236572265625, -2.93994140625, -2.7562255859375, -2.572509765625, -2.3887939453125, -2.205078125, -2.0213623046875, -1.837646484375, -1.6539306640625, -1.47021484375, -1.2864990234375, -1.102783203125, -0.9190673828125, -0.7353515625, -0.5516357421875, -0.367919921875, -0.1842041015625, -0.00048828125, 0.1832275390625, 0.366943359375, 0.5506591796875, 0.734375, 0.9180908203125, 1.101806640625, 1.2855224609375, 1.46923828125, 1.6529541015625, 1.836669921875, 2.0203857421875, 2.2041015625, 2.3878173828125, 2.571533203125, 2.7552490234375, 2.93896484375, 3.1226806640625, 3.306396484375, 3.4901123046875, 3.673828125, 3.8575439453125, 4.041259765625, 4.2249755859375, 4.40869140625, 4.5924072265625, 4.776123046875, 4.9598388671875, 5.1435546875, 5.3272705078125, 5.510986328125, 5.6947021484375, 5.87841796875, 6.0621337890625, 6.245849609375, 6.4295654296875, 6.61328125]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 6.0, 8.0, 3.0, 4.0, 4.0, 4.0, 8.0, 11.0, 7.0, 11.0, 13.0, 18.0, 25.0, 26.0, 28.0, 21.0, 33.0, 25.0, 40.0, 43.0, 50.0, 83.0, 131.0, 227.0, 1429.0, 234.0, 133.0, 70.0, 41.0, 39.0, 37.0, 38.0, 39.0, 21.0, 23.0, 18.0, 16.0, 14.0, 17.0, 8.0, 15.0, 5.0, 9.0, 6.0, 1.0, 5.0, 1.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.328125, -16.759765625, -16.19140625, -15.623046875, -15.0546875, -14.486328125, -13.91796875, -13.349609375, -12.78125, -12.212890625, -11.64453125, -11.076171875, -10.5078125, -9.939453125, -9.37109375, -8.802734375, -8.234375, -7.666015625, -7.09765625, -6.529296875, -5.9609375, -5.392578125, -4.82421875, -4.255859375, -3.6875, -3.119140625, -2.55078125, -1.982421875, -1.4140625, -0.845703125, -0.27734375, 0.291015625, 0.859375, 1.427734375, 1.99609375, 2.564453125, 3.1328125, 3.701171875, 4.26953125, 4.837890625, 5.40625, 5.974609375, 6.54296875, 7.111328125, 7.6796875, 8.248046875, 8.81640625, 9.384765625, 9.953125, 10.521484375, 11.08984375, 11.658203125, 12.2265625, 12.794921875, 13.36328125, 13.931640625, 14.5, 15.068359375, 15.63671875, 16.205078125, 16.7734375, 17.341796875, 17.91015625, 18.478515625, 19.046875]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 4.0, 5.0, 6.0, 9.0, 11.0, 18.0, 25.0, 20.0, 32.0, 43.0, 79.0, 119.0, 202.0, 400.0, 913.0, 18236.0, 3118910.0, 5061.0, 668.0, 340.0, 209.0, 128.0, 79.0, 48.0, 43.0, 35.0, 14.0, 13.0, 8.0, 8.0, 4.0, 8.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-97.1875, -94.275390625, -91.36328125, -88.451171875, -85.5390625, -82.626953125, -79.71484375, -76.802734375, -73.890625, -70.978515625, -68.06640625, -65.154296875, -62.2421875, -59.330078125, -56.41796875, -53.505859375, -50.59375, -47.681640625, -44.76953125, -41.857421875, -38.9453125, -36.033203125, -33.12109375, -30.208984375, -27.296875, -24.384765625, -21.47265625, -18.560546875, -15.6484375, -12.736328125, -9.82421875, -6.912109375, -4.0, -1.087890625, 1.82421875, 4.736328125, 7.6484375, 10.560546875, 13.47265625, 16.384765625, 19.296875, 22.208984375, 25.12109375, 28.033203125, 30.9453125, 33.857421875, 36.76953125, 39.681640625, 42.59375, 45.505859375, 48.41796875, 51.330078125, 54.2421875, 57.154296875, 60.06640625, 62.978515625, 65.890625, 68.802734375, 71.71484375, 74.626953125, 77.5390625, 80.451171875, 83.36328125, 86.275390625, 89.1875]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 9.0, 41.0, 75.0, 154.0, 236.0, 228.0, 157.0, 79.0, 23.0, 7.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.55328369140625, -45.788604736328125, -44.023921966552734, -42.25924301147461, -40.494564056396484, -38.729881286621094, -36.96520233154297, -35.200523376464844, -33.43584442138672, -31.67116355895996, -29.906484603881836, -28.141803741455078, -26.377124786376953, -24.612443923950195, -22.847763061523438, -21.083084106445312, -19.318401336669922, -17.553720474243164, -15.789041519165039, -14.024360656738281, -12.25968074798584, -10.495000839233398, -8.73031997680664, -6.965640068054199, -5.200960159301758, -3.4362800121307373, -1.6715998649597168, 0.09308052062988281, 1.8577604293823242, 3.6224403381347656, 5.387121200561523, 7.151801109313965, 8.916481018066406, 10.681160926818848, 12.445840835571289, 14.210521697998047, 15.975201606750488, 17.73988151550293, 19.504562377929688, 21.269241333007812, 23.03392219543457, 24.798603057861328, 26.563282012939453, 28.32796287536621, 30.09264373779297, 31.857322692871094, 33.62200164794922, 35.38668441772461, 37.151363372802734, 38.91604232788086, 40.68072509765625, 42.445404052734375, 44.2100830078125, 45.974761962890625, 47.739444732666016, 49.50412368774414, 51.26880645751953, 53.033485412597656, 54.79816818237305, 56.56284713745117, 58.3275260925293, 60.09220886230469, 61.85688781738281, 63.62156677246094, 65.38624572753906]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 2.0, 4.0, 5.0, 10.0, 8.0, 9.0, 17.0, 20.0, 14.0, 13.0, 18.0, 20.0, 16.0, 30.0, 27.0, 33.0, 27.0, 35.0, 38.0, 28.0, 37.0, 38.0, 34.0, 48.0, 37.0, 42.0, 43.0, 35.0, 29.0, 25.0, 34.0, 29.0, 25.0, 22.0, 22.0, 17.0, 19.0, 13.0, 8.0, 17.0, 13.0, 8.0, 4.0, 7.0, 8.0, 4.0, 3.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-45.83510971069336, -44.48036575317383, -43.12561798095703, -41.7708740234375, -40.41613006591797, -39.06138229370117, -37.70663833618164, -36.351890563964844, -34.99714660644531, -33.64240264892578, -32.287654876708984, -30.932910919189453, -29.57816505432129, -28.223419189453125, -26.868675231933594, -25.51392936706543, -24.159183502197266, -22.8044376373291, -21.449691772460938, -20.094947814941406, -18.740201950073242, -17.385456085205078, -16.030712127685547, -14.675966262817383, -13.321220397949219, -11.966474533081055, -10.611729621887207, -9.25698471069336, -7.902238845825195, -6.5474934577941895, -5.192748069763184, -3.838003158569336, -2.4832534790039062, -1.1285080909729004, 0.22623729705810547, 1.5809826850891113, 2.935728073120117, 4.290473461151123, 5.645218849182129, 6.999963760375977, 8.35470962524414, 9.709455490112305, 11.064200401306152, 12.4189453125, 13.773691177368164, 15.128437042236328, 16.48318099975586, 17.837926864624023, 19.192672729492188, 20.54741859436035, 21.902164459228516, 23.256908416748047, 24.61165428161621, 25.966400146484375, 27.321144104003906, 28.67588996887207, 30.030635833740234, 31.3853816986084, 32.74012756347656, 34.094871520996094, 35.449615478515625, 36.80436325073242, 38.15910720825195, 39.51385498046875, 40.86859893798828]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 6.0, 10.0, 9.0, 9.0, 20.0, 16.0, 17.0, 31.0, 25.0, 18.0, 33.0, 31.0, 33.0, 43.0, 39.0, 49.0, 40.0, 45.0, 51.0, 55.0, 61.0, 46.0, 42.0, 34.0, 33.0, 33.0, 31.0, 20.0, 21.0, 23.0, 15.0, 18.0, 19.0, 8.0, 2.0, 6.0, 7.0, 3.0, 0.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.74609375, -5.52716064453125, -5.3082275390625, -5.08929443359375, -4.870361328125, -4.65142822265625, -4.4324951171875, -4.21356201171875, -3.99462890625, -3.77569580078125, -3.5567626953125, -3.33782958984375, -3.118896484375, -2.89996337890625, -2.6810302734375, -2.46209716796875, -2.2431640625, -2.02423095703125, -1.8052978515625, -1.58636474609375, -1.367431640625, -1.14849853515625, -0.9295654296875, -0.71063232421875, -0.49169921875, -0.27276611328125, -0.0538330078125, 0.16510009765625, 0.384033203125, 0.60296630859375, 0.8218994140625, 1.04083251953125, 1.259765625, 1.47869873046875, 1.6976318359375, 1.91656494140625, 2.135498046875, 2.35443115234375, 2.5733642578125, 2.79229736328125, 3.01123046875, 3.23016357421875, 3.4490966796875, 3.66802978515625, 3.886962890625, 4.10589599609375, 4.3248291015625, 4.54376220703125, 4.7626953125, 4.98162841796875, 5.2005615234375, 5.41949462890625, 5.638427734375, 5.85736083984375, 6.0762939453125, 6.29522705078125, 6.51416015625, 6.73309326171875, 6.9520263671875, 7.17095947265625, 7.389892578125, 7.60882568359375, 7.8277587890625, 8.04669189453125, 8.265625]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 0.0, 0.0, 4.0, 4.0, 3.0, 7.0, 4.0, 8.0, 6.0, 11.0, 19.0, 21.0, 39.0, 38.0, 47.0, 53.0, 91.0, 146.0, 210.0, 415.0, 1053.0, 4270.0, 31103.0, 330693.0, 1988301.0, 1600767.0, 212096.0, 20046.0, 3004.0, 845.0, 364.0, 209.0, 103.0, 82.0, 45.0, 45.0, 34.0, 21.0, 23.0, 15.0, 8.0, 15.0, 7.0, 6.0, 4.0, 5.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.078125, -10.6663818359375, -10.254638671875, -9.8428955078125, -9.43115234375, -9.0194091796875, -8.607666015625, -8.1959228515625, -7.7841796875, -7.3724365234375, -6.960693359375, -6.5489501953125, -6.13720703125, -5.7254638671875, -5.313720703125, -4.9019775390625, -4.490234375, -4.0784912109375, -3.666748046875, -3.2550048828125, -2.84326171875, -2.4315185546875, -2.019775390625, -1.6080322265625, -1.1962890625, -0.7845458984375, -0.372802734375, 0.0389404296875, 0.45068359375, 0.8624267578125, 1.274169921875, 1.6859130859375, 2.09765625, 2.5093994140625, 2.921142578125, 3.3328857421875, 3.74462890625, 4.1563720703125, 4.568115234375, 4.9798583984375, 5.3916015625, 5.8033447265625, 6.215087890625, 6.6268310546875, 7.03857421875, 7.4503173828125, 7.862060546875, 8.2738037109375, 8.685546875, 9.0972900390625, 9.509033203125, 9.9207763671875, 10.33251953125, 10.7442626953125, 11.156005859375, 11.5677490234375, 11.9794921875, 12.3912353515625, 12.802978515625, 13.2147216796875, 13.62646484375, 14.0382080078125, 14.449951171875, 14.8616943359375, 15.2734375]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 5.0, 4.0, 3.0, 9.0, 6.0, 14.0, 15.0, 14.0, 27.0, 41.0, 46.0, 75.0, 120.0, 152.0, 182.0, 253.0, 317.0, 409.0, 433.0, 422.0, 392.0, 309.0, 230.0, 174.0, 127.0, 85.0, 56.0, 37.0, 42.0, 27.0, 12.0, 10.0, 5.0, 8.0, 4.0, 5.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-13.09375, -12.693603515625, -12.29345703125, -11.893310546875, -11.4931640625, -11.093017578125, -10.69287109375, -10.292724609375, -9.892578125, -9.492431640625, -9.09228515625, -8.692138671875, -8.2919921875, -7.891845703125, -7.49169921875, -7.091552734375, -6.69140625, -6.291259765625, -5.89111328125, -5.490966796875, -5.0908203125, -4.690673828125, -4.29052734375, -3.890380859375, -3.490234375, -3.090087890625, -2.68994140625, -2.289794921875, -1.8896484375, -1.489501953125, -1.08935546875, -0.689208984375, -0.2890625, 0.111083984375, 0.51123046875, 0.911376953125, 1.3115234375, 1.711669921875, 2.11181640625, 2.511962890625, 2.912109375, 3.312255859375, 3.71240234375, 4.112548828125, 4.5126953125, 4.912841796875, 5.31298828125, 5.713134765625, 6.11328125, 6.513427734375, 6.91357421875, 7.313720703125, 7.7138671875, 8.114013671875, 8.51416015625, 8.914306640625, 9.314453125, 9.714599609375, 10.11474609375, 10.514892578125, 10.9150390625, 11.315185546875, 11.71533203125, 12.115478515625, 12.515625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 4.0, 12.0, 8.0, 15.0, 24.0, 40.0, 55.0, 64.0, 101.0, 154.0, 226.0, 337.0, 526.0, 1643.0, 93952.0, 3881716.0, 211358.0, 2319.0, 581.0, 349.0, 231.0, 170.0, 115.0, 81.0, 59.0, 32.0, 29.0, 17.0, 20.0, 7.0, 2.0, 10.0, 3.0, 6.0, 2.0, 6.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.84375, -41.48681640625, -40.1298828125, -38.77294921875, -37.416015625, -36.05908203125, -34.7021484375, -33.34521484375, -31.98828125, -30.63134765625, -29.2744140625, -27.91748046875, -26.560546875, -25.20361328125, -23.8466796875, -22.48974609375, -21.1328125, -19.77587890625, -18.4189453125, -17.06201171875, -15.705078125, -14.34814453125, -12.9912109375, -11.63427734375, -10.27734375, -8.92041015625, -7.5634765625, -6.20654296875, -4.849609375, -3.49267578125, -2.1357421875, -0.77880859375, 0.578125, 1.93505859375, 3.2919921875, 4.64892578125, 6.005859375, 7.36279296875, 8.7197265625, 10.07666015625, 11.43359375, 12.79052734375, 14.1474609375, 15.50439453125, 16.861328125, 18.21826171875, 19.5751953125, 20.93212890625, 22.2890625, 23.64599609375, 25.0029296875, 26.35986328125, 27.716796875, 29.07373046875, 30.4306640625, 31.78759765625, 33.14453125, 34.50146484375, 35.8583984375, 37.21533203125, 38.572265625, 39.92919921875, 41.2861328125, 42.64306640625, 44.0]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 3.0, 8.0, 23.0, 25.0, 39.0, 55.0, 72.0, 95.0, 95.0, 112.0, 114.0, 108.0, 92.0, 63.0, 47.0, 22.0, 15.0, 10.0, 5.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.18644714355469, -63.49686050415039, -61.807273864746094, -60.1176872253418, -58.4281005859375, -56.7385139465332, -55.048927307128906, -53.359336853027344, -51.66975402832031, -49.980167388916016, -48.29058074951172, -46.60099411010742, -44.911407470703125, -43.22182083129883, -41.53223419189453, -39.84264373779297, -38.15305709838867, -36.463470458984375, -34.77388381958008, -33.08429718017578, -31.394710540771484, -29.705123901367188, -28.015535354614258, -26.32594871520996, -24.636362075805664, -22.946775436401367, -21.25718879699707, -19.56760025024414, -17.878013610839844, -16.188426971435547, -14.49884033203125, -12.809253692626953, -11.119670867919922, -9.430084228515625, -7.74049711227417, -6.050909996032715, -4.361323356628418, -2.671736717224121, -0.9821491241455078, 0.7074375152587891, 2.397024154663086, 4.086610794067383, 5.776197910308838, 7.465785026550293, 9.15537166595459, 10.844958305358887, 12.5345458984375, 14.224132537841797, 15.913719177246094, 17.60330581665039, 19.292892456054688, 20.982479095458984, 22.67206573486328, 24.361652374267578, 26.051240921020508, 27.740827560424805, 29.4304141998291, 31.1200008392334, 32.80958938598633, 34.499176025390625, 36.18876266479492, 37.87834930419922, 39.567935943603516, 41.25752258300781, 42.94710922241211]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 6.0, 6.0, 7.0, 6.0, 6.0, 18.0, 14.0, 25.0, 17.0, 14.0, 18.0, 23.0, 20.0, 26.0, 21.0, 29.0, 39.0, 30.0, 39.0, 34.0, 28.0, 43.0, 34.0, 39.0, 43.0, 48.0, 33.0, 38.0, 33.0, 28.0, 35.0, 24.0, 24.0, 25.0, 23.0, 21.0, 17.0, 18.0, 14.0, 4.0, 9.0, 9.0, 6.0, 2.0, 1.0, 5.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-39.19010925292969, -38.00028610229492, -36.810462951660156, -35.620635986328125, -34.43081283569336, -33.240989685058594, -32.05116271972656, -30.861339569091797, -29.67151641845703, -28.481693267822266, -27.291868209838867, -26.10204315185547, -24.912220001220703, -23.722396850585938, -22.53257179260254, -21.34274673461914, -20.152923583984375, -18.96310043334961, -17.77327537536621, -16.583450317382812, -15.393627166748047, -14.203803062438965, -13.013978958129883, -11.8241548538208, -10.634330749511719, -9.444506645202637, -8.254682540893555, -7.064858436584473, -5.875034332275391, -4.685210227966309, -3.4953861236572266, -2.3055620193481445, -1.1157341003417969, 0.07409000396728516, 1.2639141082763672, 2.453738212585449, 3.6435623168945312, 4.833386421203613, 6.023210525512695, 7.213034629821777, 8.40285873413086, 9.592682838439941, 10.782506942749023, 11.972331047058105, 13.162155151367188, 14.35197925567627, 15.541803359985352, 16.73162841796875, 17.921451568603516, 19.11127471923828, 20.30109977722168, 21.490924835205078, 22.680747985839844, 23.87057113647461, 25.060396194458008, 26.250221252441406, 27.440044403076172, 28.629867553710938, 29.819692611694336, 31.009517669677734, 32.1993408203125, 33.389163970947266, 34.57898712158203, 35.76881408691406, 36.95863723754883]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 8.0, 10.0, 9.0, 9.0, 13.0, 18.0, 24.0, 19.0, 35.0, 25.0, 33.0, 37.0, 42.0, 38.0, 42.0, 48.0, 42.0, 47.0, 48.0, 60.0, 51.0, 56.0, 34.0, 49.0, 25.0, 28.0, 29.0, 23.0, 21.0, 15.0, 12.0, 9.0, 7.0, 10.0, 12.0, 8.0, 1.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.984375, -5.7679443359375, -5.551513671875, -5.3350830078125, -5.11865234375, -4.9022216796875, -4.685791015625, -4.4693603515625, -4.2529296875, -4.0364990234375, -3.820068359375, -3.6036376953125, -3.38720703125, -3.1707763671875, -2.954345703125, -2.7379150390625, -2.521484375, -2.3050537109375, -2.088623046875, -1.8721923828125, -1.65576171875, -1.4393310546875, -1.222900390625, -1.0064697265625, -0.7900390625, -0.5736083984375, -0.357177734375, -0.1407470703125, 0.07568359375, 0.2921142578125, 0.508544921875, 0.7249755859375, 0.94140625, 1.1578369140625, 1.374267578125, 1.5906982421875, 1.80712890625, 2.0235595703125, 2.239990234375, 2.4564208984375, 2.6728515625, 2.8892822265625, 3.105712890625, 3.3221435546875, 3.53857421875, 3.7550048828125, 3.971435546875, 4.1878662109375, 4.404296875, 4.6207275390625, 4.837158203125, 5.0535888671875, 5.27001953125, 5.4864501953125, 5.702880859375, 5.9193115234375, 6.1357421875, 6.3521728515625, 6.568603515625, 6.7850341796875, 7.00146484375, 7.2178955078125, 7.434326171875, 7.6507568359375, 7.8671875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 9.0, 6.0, 12.0, 27.0, 33.0, 62.0, 80.0, 141.0, 233.0, 381.0, 671.0, 1134.0, 1977.0, 3454.0, 5992.0, 11054.0, 20442.0, 38108.0, 73151.0, 139500.0, 243169.0, 233142.0, 130202.0, 67498.0, 35176.0, 19024.0, 10433.0, 5643.0, 3243.0, 1854.0, 1126.0, 630.0, 363.0, 212.0, 141.0, 77.0, 47.0, 44.0, 20.0, 16.0, 10.0, 11.0, 6.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5830078125, -0.56353759765625, -0.5440673828125, -0.52459716796875, -0.505126953125, -0.48565673828125, -0.4661865234375, -0.44671630859375, -0.42724609375, -0.40777587890625, -0.3883056640625, -0.36883544921875, -0.349365234375, -0.32989501953125, -0.3104248046875, -0.29095458984375, -0.271484375, -0.25201416015625, -0.2325439453125, -0.21307373046875, -0.193603515625, -0.17413330078125, -0.1546630859375, -0.13519287109375, -0.11572265625, -0.09625244140625, -0.0767822265625, -0.05731201171875, -0.037841796875, -0.01837158203125, 0.0010986328125, 0.02056884765625, 0.0400390625, 0.05950927734375, 0.0789794921875, 0.09844970703125, 0.117919921875, 0.13739013671875, 0.1568603515625, 0.17633056640625, 0.19580078125, 0.21527099609375, 0.2347412109375, 0.25421142578125, 0.273681640625, 0.29315185546875, 0.3126220703125, 0.33209228515625, 0.3515625, 0.37103271484375, 0.3905029296875, 0.40997314453125, 0.429443359375, 0.44891357421875, 0.4683837890625, 0.48785400390625, 0.50732421875, 0.52679443359375, 0.5462646484375, 0.56573486328125, 0.585205078125, 0.60467529296875, 0.6241455078125, 0.64361572265625, 0.6630859375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 6.0, 4.0, 8.0, 14.0, 21.0, 15.0, 28.0, 21.0, 19.0, 26.0, 23.0, 25.0, 43.0, 34.0, 48.0, 43.0, 37.0, 51.0, 1068.0, 35.0, 47.0, 38.0, 39.0, 40.0, 42.0, 43.0, 34.0, 19.0, 16.0, 22.0, 24.0, 15.0, 16.0, 15.0, 8.0, 7.0, 11.0, 9.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.78125, -3.65509033203125, -3.5289306640625, -3.40277099609375, -3.276611328125, -3.15045166015625, -3.0242919921875, -2.89813232421875, -2.77197265625, -2.64581298828125, -2.5196533203125, -2.39349365234375, -2.267333984375, -2.14117431640625, -2.0150146484375, -1.88885498046875, -1.7626953125, -1.63653564453125, -1.5103759765625, -1.38421630859375, -1.258056640625, -1.13189697265625, -1.0057373046875, -0.87957763671875, -0.75341796875, -0.62725830078125, -0.5010986328125, -0.37493896484375, -0.248779296875, -0.12261962890625, 0.0035400390625, 0.12969970703125, 0.255859375, 0.38201904296875, 0.5081787109375, 0.63433837890625, 0.760498046875, 0.88665771484375, 1.0128173828125, 1.13897705078125, 1.26513671875, 1.39129638671875, 1.5174560546875, 1.64361572265625, 1.769775390625, 1.89593505859375, 2.0220947265625, 2.14825439453125, 2.2744140625, 2.40057373046875, 2.5267333984375, 2.65289306640625, 2.779052734375, 2.90521240234375, 3.0313720703125, 3.15753173828125, 3.28369140625, 3.40985107421875, 3.5360107421875, 3.66217041015625, 3.788330078125, 3.91448974609375, 4.0406494140625, 4.16680908203125, 4.29296875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 7.0, 6.0, 13.0, 22.0, 40.0, 40.0, 70.0, 99.0, 144.0, 227.0, 358.0, 558.0, 781.0, 1124.0, 1766.0, 2714.0, 3942.0, 6076.0, 9029.0, 13464.0, 19794.0, 29967.0, 44785.0, 67038.0, 99963.0, 142231.0, 1215350.0, 140109.0, 98096.0, 66345.0, 44072.0, 29437.0, 19582.0, 13293.0, 9079.0, 6027.0, 3840.0, 2614.0, 1682.0, 1129.0, 743.0, 496.0, 341.0, 231.0, 148.0, 87.0, 58.0, 43.0, 27.0, 27.0, 9.0, 8.0, 4.0, 3.0, 3.0, 2.0, 2.0], "bins": [-0.359130859375, -0.3484039306640625, -0.337677001953125, -0.3269500732421875, -0.31622314453125, -0.3054962158203125, -0.294769287109375, -0.2840423583984375, -0.2733154296875, -0.2625885009765625, -0.251861572265625, -0.2411346435546875, -0.23040771484375, -0.2196807861328125, -0.208953857421875, -0.1982269287109375, -0.1875, -0.1767730712890625, -0.166046142578125, -0.1553192138671875, -0.14459228515625, -0.1338653564453125, -0.123138427734375, -0.1124114990234375, -0.1016845703125, -0.0909576416015625, -0.080230712890625, -0.0695037841796875, -0.05877685546875, -0.0480499267578125, -0.037322998046875, -0.0265960693359375, -0.015869140625, -0.0051422119140625, 0.005584716796875, 0.0163116455078125, 0.02703857421875, 0.0377655029296875, 0.048492431640625, 0.0592193603515625, 0.0699462890625, 0.0806732177734375, 0.091400146484375, 0.1021270751953125, 0.11285400390625, 0.1235809326171875, 0.134307861328125, 0.1450347900390625, 0.15576171875, 0.1664886474609375, 0.177215576171875, 0.1879425048828125, 0.19866943359375, 0.2093963623046875, 0.220123291015625, 0.2308502197265625, 0.2415771484375, 0.2523040771484375, 0.263031005859375, 0.2737579345703125, 0.28448486328125, 0.2952117919921875, 0.305938720703125, 0.3166656494140625, 0.327392578125]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 4.0, 4.0, 9.0, 13.0, 13.0, 12.0, 20.0, 25.0, 36.0, 31.0, 46.0, 45.0, 67.0, 67.0, 58.0, 67.0, 73.0, 60.0, 65.0, 41.0, 41.0, 38.0, 41.0, 21.0, 21.0, 17.0, 16.0, 16.0, 6.0, 10.0, 11.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011816024780273438, -0.0011403709650039673, -0.0010991394519805908, -0.0010579079389572144, -0.0010166764259338379, -0.0009754449129104614, -0.000934213399887085, -0.0008929818868637085, -0.000851750373840332, -0.0008105188608169556, -0.0007692873477935791, -0.0007280558347702026, -0.0006868243217468262, -0.0006455928087234497, -0.0006043612957000732, -0.0005631297826766968, -0.0005218982696533203, -0.00048066675662994385, -0.0004394352436065674, -0.0003982037305831909, -0.00035697221755981445, -0.000315740704536438, -0.0002745091915130615, -0.00023327767848968506, -0.0001920461654663086, -0.00015081465244293213, -0.00010958313941955566, -6.83516263961792e-05, -2.7120113372802734e-05, 1.411139965057373e-05, 5.5342912673950195e-05, 9.657442569732666e-05, 0.00013780593872070312, 0.0001790374517440796, 0.00022026896476745605, 0.0002615004777908325, 0.000302731990814209, 0.00034396350383758545, 0.0003851950168609619, 0.0004264265298843384, 0.00046765804290771484, 0.0005088895559310913, 0.0005501210689544678, 0.0005913525819778442, 0.0006325840950012207, 0.0006738156080245972, 0.0007150471210479736, 0.0007562786340713501, 0.0007975101470947266, 0.000838741660118103, 0.0008799731731414795, 0.000921204686164856, 0.0009624361991882324, 0.0010036677122116089, 0.0010448992252349854, 0.0010861307382583618, 0.0011273622512817383, 0.0011685937643051147, 0.0012098252773284912, 0.0012510567903518677, 0.0012922883033752441, 0.0013335198163986206, 0.001374751329421997, 0.0014159828424453735, 0.00145721435546875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 12.0, 10.0, 6.0, 14.0, 23.0, 31.0, 29.0, 40.0, 64.0, 73.0, 86.0, 115.0, 238.0, 566.0, 2644.0, 947660.0, 94929.0, 994.0, 374.0, 205.0, 131.0, 76.0, 54.0, 51.0, 35.0, 19.0, 19.0, 12.0, 12.0, 14.0, 7.0, 3.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0289764404296875, -0.02814316749572754, -0.027309894561767578, -0.026476621627807617, -0.025643348693847656, -0.024810075759887695, -0.023976802825927734, -0.023143529891967773, -0.022310256958007812, -0.02147698402404785, -0.02064371109008789, -0.01981043815612793, -0.01897716522216797, -0.018143892288208008, -0.017310619354248047, -0.016477346420288086, -0.015644073486328125, -0.014810800552368164, -0.013977527618408203, -0.013144254684448242, -0.012310981750488281, -0.01147770881652832, -0.01064443588256836, -0.009811162948608398, -0.008977890014648438, -0.008144617080688477, -0.007311344146728516, -0.006478071212768555, -0.005644798278808594, -0.004811525344848633, -0.003978252410888672, -0.003144979476928711, -0.00231170654296875, -0.001478433609008789, -0.0006451606750488281, 0.0001881122589111328, 0.0010213851928710938, 0.0018546581268310547, 0.0026879310607910156, 0.0035212039947509766, 0.0043544769287109375, 0.0051877498626708984, 0.006021022796630859, 0.00685429573059082, 0.007687568664550781, 0.008520841598510742, 0.009354114532470703, 0.010187387466430664, 0.011020660400390625, 0.011853933334350586, 0.012687206268310547, 0.013520479202270508, 0.014353752136230469, 0.01518702507019043, 0.01602029800415039, 0.01685357093811035, 0.017686843872070312, 0.018520116806030273, 0.019353389739990234, 0.020186662673950195, 0.021019935607910156, 0.021853208541870117, 0.022686481475830078, 0.02351975440979004, 0.02435302734375]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 15.0, 159.0, 508.0, 291.0, 39.0, 4.0, 1.0], "bins": [-0.007772545330226421, -0.0076423571445047855, -0.00751216895878315, -0.007381980773061514, -0.007251792587339878, -0.007121603935956955, -0.006991415750235319, -0.006861227564513683, -0.0067310393787920475, -0.006600851193070412, -0.006470663007348776, -0.00634047482162714, -0.006210286170244217, -0.006080097984522581, -0.005949909798800945, -0.0058197216130793095, -0.005689533427357674, -0.005559345241636038, -0.005429157055914402, -0.005298968870192766, -0.00516878068447113, -0.005038592033088207, -0.004908403847366571, -0.004778215661644936, -0.0046480274759233, -0.004517839290201664, -0.004387651104480028, -0.004257462918758392, -0.004127274267375469, -0.003997086081653833, -0.0038668978959321976, -0.0037367097102105618, -0.0036065219901502132, -0.0034763338044285774, -0.0033461456187069416, -0.003215957200154662, -0.0030857690144330263, -0.0029555808287113905, -0.002825392410159111, -0.002695204224437475, -0.0025650160387158394, -0.0024348278529942036, -0.0023046396672725677, -0.0021744512487202883, -0.0020442630629986525, -0.0019140748772770166, -0.001783886575140059, -0.0016536982730031013, -0.0015235100872814655, -0.0013933219015598297, -0.001263133599422872, -0.0011329452972859144, -0.0010027571115642786, -0.0008725688676349819, -0.0007423806237056851, -0.0006121923797763884, -0.00048200407763943076, -0.00035181583371013403, -0.0002216275897808373, -9.143934585154057e-05, 3.8748898077756166e-05, 0.0001689371420070529, 0.00029912538593634963, 0.00042931362986564636, 0.0005595018737949431]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 6.0, 5.0, 8.0, 6.0, 10.0, 5.0, 12.0, 16.0, 17.0, 15.0, 19.0, 22.0, 33.0, 19.0, 32.0, 34.0, 33.0, 25.0, 30.0, 41.0, 31.0, 37.0, 48.0, 30.0, 35.0, 30.0, 35.0, 36.0, 43.0, 42.0, 33.0, 26.0, 22.0, 24.0, 27.0, 18.0, 12.0, 16.0, 8.0, 11.0, 9.0, 11.0, 13.0, 4.0, 6.0, 5.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.000517725944519043, -0.0005003409460186958, -0.0004829559475183487, -0.00046557094901800156, -0.0004481859505176544, -0.0004308009520173073, -0.00041341595351696014, -0.000396030955016613, -0.00037864595651626587, -0.00036126095801591873, -0.0003438759595155716, -0.00032649096101522446, -0.0003091059625148773, -0.0002917209640145302, -0.00027433596551418304, -0.0002569509670138359, -0.00023956596851348877, -0.00022218097001314163, -0.0002047959715127945, -0.00018741097301244736, -0.00017002597451210022, -0.00015264097601175308, -0.00013525597751140594, -0.00011787097901105881, -0.00010048598051071167, -8.310098201036453e-05, -6.57159835100174e-05, -4.833098500967026e-05, -3.094598650932312e-05, -1.3560988008975983e-05, 3.824010491371155e-06, 2.1209008991718292e-05, 3.859400749206543e-05, 5.597900599241257e-05, 7.33640044927597e-05, 9.074900299310684e-05, 0.00010813400149345398, 0.00012551899999380112, 0.00014290399849414825, 0.0001602889969944954, 0.00017767399549484253, 0.00019505899399518967, 0.0002124439924955368, 0.00022982899099588394, 0.0002472139894962311, 0.0002645989879965782, 0.00028198398649692535, 0.0002993689849972725, 0.00031675398349761963, 0.00033413898199796677, 0.0003515239804983139, 0.00036890897899866104, 0.0003862939774990082, 0.0004036789759993553, 0.00042106397449970245, 0.0004384489730000496, 0.00045583397150039673, 0.00047321897000074387, 0.000490603968501091, 0.0005079889670014381, 0.0005253739655017853, 0.0005427589640021324, 0.0005601439625024796, 0.0005775289610028267, 0.0005949139595031738]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 8.0, 10.0, 9.0, 9.0, 13.0, 18.0, 24.0, 19.0, 35.0, 25.0, 33.0, 37.0, 42.0, 37.0, 43.0, 48.0, 42.0, 47.0, 48.0, 60.0, 51.0, 56.0, 34.0, 49.0, 25.0, 28.0, 29.0, 23.0, 21.0, 15.0, 12.0, 9.0, 7.0, 10.0, 12.0, 8.0, 1.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.984375, -5.7679443359375, -5.551513671875, -5.3350830078125, -5.11865234375, -4.9022216796875, -4.685791015625, -4.4693603515625, -4.2529296875, -4.0364990234375, -3.820068359375, -3.6036376953125, -3.38720703125, -3.1707763671875, -2.954345703125, -2.7379150390625, -2.521484375, -2.3050537109375, -2.088623046875, -1.8721923828125, -1.65576171875, -1.4393310546875, -1.222900390625, -1.0064697265625, -0.7900390625, -0.5736083984375, -0.357177734375, -0.1407470703125, 0.07568359375, 0.2921142578125, 0.508544921875, 0.7249755859375, 0.94140625, 1.1578369140625, 1.374267578125, 1.5906982421875, 1.80712890625, 2.0235595703125, 2.239990234375, 2.4564208984375, 2.6728515625, 2.8892822265625, 3.105712890625, 3.3221435546875, 3.53857421875, 3.7550048828125, 3.971435546875, 4.1878662109375, 4.404296875, 4.6207275390625, 4.837158203125, 5.0535888671875, 5.27001953125, 5.4864501953125, 5.702880859375, 5.9193115234375, 6.1357421875, 6.3521728515625, 6.568603515625, 6.7850341796875, 7.00146484375, 7.2178955078125, 7.434326171875, 7.6507568359375, 7.8671875]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 7.0, 7.0, 13.0, 10.0, 21.0, 32.0, 39.0, 75.0, 116.0, 169.0, 353.0, 611.0, 1292.0, 3405.0, 9194.0, 28635.0, 94740.0, 306959.0, 405852.0, 135334.0, 40707.0, 12989.0, 4509.0, 1754.0, 729.0, 408.0, 226.0, 126.0, 79.0, 61.0, 34.0, 32.0, 13.0, 7.0, 13.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.5546875, -8.296630859375, -8.03857421875, -7.780517578125, -7.5224609375, -7.264404296875, -7.00634765625, -6.748291015625, -6.490234375, -6.232177734375, -5.97412109375, -5.716064453125, -5.4580078125, -5.199951171875, -4.94189453125, -4.683837890625, -4.42578125, -4.167724609375, -3.90966796875, -3.651611328125, -3.3935546875, -3.135498046875, -2.87744140625, -2.619384765625, -2.361328125, -2.103271484375, -1.84521484375, -1.587158203125, -1.3291015625, -1.071044921875, -0.81298828125, -0.554931640625, -0.296875, -0.038818359375, 0.21923828125, 0.477294921875, 0.7353515625, 0.993408203125, 1.25146484375, 1.509521484375, 1.767578125, 2.025634765625, 2.28369140625, 2.541748046875, 2.7998046875, 3.057861328125, 3.31591796875, 3.573974609375, 3.83203125, 4.090087890625, 4.34814453125, 4.606201171875, 4.8642578125, 5.122314453125, 5.38037109375, 5.638427734375, 5.896484375, 6.154541015625, 6.41259765625, 6.670654296875, 6.9287109375, 7.186767578125, 7.44482421875, 7.702880859375, 7.9609375]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 1.0, 6.0, 11.0, 5.0, 10.0, 18.0, 20.0, 23.0, 19.0, 29.0, 25.0, 27.0, 27.0, 39.0, 53.0, 43.0, 51.0, 84.0, 192.0, 1447.0, 310.0, 140.0, 65.0, 62.0, 45.0, 41.0, 36.0, 46.0, 37.0, 24.0, 15.0, 23.0, 13.0, 5.0, 11.0, 8.0, 18.0, 2.0, 9.0, 7.0, 2.0, 5.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.9375, -20.279296875, -19.62109375, -18.962890625, -18.3046875, -17.646484375, -16.98828125, -16.330078125, -15.671875, -15.013671875, -14.35546875, -13.697265625, -13.0390625, -12.380859375, -11.72265625, -11.064453125, -10.40625, -9.748046875, -9.08984375, -8.431640625, -7.7734375, -7.115234375, -6.45703125, -5.798828125, -5.140625, -4.482421875, -3.82421875, -3.166015625, -2.5078125, -1.849609375, -1.19140625, -0.533203125, 0.125, 0.783203125, 1.44140625, 2.099609375, 2.7578125, 3.416015625, 4.07421875, 4.732421875, 5.390625, 6.048828125, 6.70703125, 7.365234375, 8.0234375, 8.681640625, 9.33984375, 9.998046875, 10.65625, 11.314453125, 11.97265625, 12.630859375, 13.2890625, 13.947265625, 14.60546875, 15.263671875, 15.921875, 16.580078125, 17.23828125, 17.896484375, 18.5546875, 19.212890625, 19.87109375, 20.529296875, 21.1875]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 4.0, 9.0, 10.0, 11.0, 28.0, 23.0, 46.0, 65.0, 83.0, 111.0, 215.0, 420.0, 1224.0, 36865.0, 3101728.0, 3465.0, 642.0, 257.0, 182.0, 102.0, 69.0, 53.0, 23.0, 22.0, 17.0, 13.0, 6.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-122.6875, -119.3935546875, -116.099609375, -112.8056640625, -109.51171875, -106.2177734375, -102.923828125, -99.6298828125, -96.3359375, -93.0419921875, -89.748046875, -86.4541015625, -83.16015625, -79.8662109375, -76.572265625, -73.2783203125, -69.984375, -66.6904296875, -63.396484375, -60.1025390625, -56.80859375, -53.5146484375, -50.220703125, -46.9267578125, -43.6328125, -40.3388671875, -37.044921875, -33.7509765625, -30.45703125, -27.1630859375, -23.869140625, -20.5751953125, -17.28125, -13.9873046875, -10.693359375, -7.3994140625, -4.10546875, -0.8115234375, 2.482421875, 5.7763671875, 9.0703125, 12.3642578125, 15.658203125, 18.9521484375, 22.24609375, 25.5400390625, 28.833984375, 32.1279296875, 35.421875, 38.7158203125, 42.009765625, 45.3037109375, 48.59765625, 51.8916015625, 55.185546875, 58.4794921875, 61.7734375, 65.0673828125, 68.361328125, 71.6552734375, 74.94921875, 78.2431640625, 81.537109375, 84.8310546875, 88.125]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 7.0, 5.0, 17.0, 31.0, 39.0, 51.0, 88.0, 108.0, 129.0, 130.0, 99.0, 107.0, 70.0, 66.0, 27.0, 20.0, 6.0, 7.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.224462509155273, -24.33256721496582, -23.440671920776367, -22.548778533935547, -21.656883239746094, -20.76498794555664, -19.873092651367188, -18.981197357177734, -18.08930206298828, -17.197406768798828, -16.305511474609375, -15.413617134094238, -14.521722793579102, -13.629827499389648, -12.737932205200195, -11.846036911010742, -10.954143524169922, -10.062248229980469, -9.170353889465332, -8.278458595275879, -7.386563777923584, -6.494668960571289, -5.602773666381836, -4.710878849029541, -3.818984031677246, -2.927089214324951, -2.035194158554077, -1.1432991027832031, -0.2514042854309082, 0.6404905319213867, 1.5323858261108398, 2.4242806434631348, 3.316173553466797, 4.208068370819092, 5.099963188171387, 5.99185848236084, 6.883753299713135, 7.77564811706543, 8.667543411254883, 9.559438705444336, 10.451333045959473, 11.343228340148926, 12.235122680664062, 13.127017974853516, 14.018913269042969, 14.910807609558105, 15.802702903747559, 16.694597244262695, 17.58649253845215, 18.4783878326416, 19.370283126831055, 20.262176513671875, 21.154071807861328, 22.04596710205078, 22.937862396240234, 23.829757690429688, 24.72165298461914, 25.613548278808594, 26.505443572998047, 27.3973388671875, 28.28923225402832, 29.181127548217773, 30.073022842407227, 30.96491813659668, 31.8568115234375]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 6.0, 5.0, 8.0, 10.0, 7.0, 10.0, 9.0, 13.0, 20.0, 17.0, 22.0, 15.0, 26.0, 22.0, 27.0, 44.0, 26.0, 42.0, 35.0, 38.0, 46.0, 28.0, 33.0, 51.0, 51.0, 30.0, 31.0, 44.0, 34.0, 25.0, 21.0, 28.0, 26.0, 24.0, 19.0, 21.0, 18.0, 19.0, 11.0, 9.0, 9.0, 7.0, 6.0, 3.0, 6.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-43.384952545166016, -41.89735794067383, -40.40976333618164, -38.92216873168945, -37.434574127197266, -35.94697952270508, -34.45938491821289, -32.9717903137207, -31.484195709228516, -29.996601104736328, -28.50900650024414, -27.021411895751953, -25.533817291259766, -24.046222686767578, -22.55862808227539, -21.071033477783203, -19.583438873291016, -18.095844268798828, -16.60824966430664, -15.120655059814453, -13.633060455322266, -12.145465850830078, -10.65787124633789, -9.170276641845703, -7.682682037353516, -6.195087432861328, -4.707492828369141, -3.219898223876953, -1.7323036193847656, -0.24470901489257812, 1.2428855895996094, 2.730480194091797, 4.218074798583984, 5.705669403076172, 7.193264007568359, 8.680858612060547, 10.168453216552734, 11.656047821044922, 13.14364242553711, 14.631237030029297, 16.118831634521484, 17.606426239013672, 19.09402084350586, 20.581615447998047, 22.069210052490234, 23.556804656982422, 25.04439926147461, 26.531993865966797, 28.019588470458984, 29.507183074951172, 30.99477767944336, 32.48237228393555, 33.969966888427734, 35.45756149291992, 36.94515609741211, 38.4327507019043, 39.920345306396484, 41.40793991088867, 42.89553451538086, 44.38312911987305, 45.870723724365234, 47.35831832885742, 48.84591293334961, 50.3335075378418, 51.821102142333984]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 5.0, 12.0, 10.0, 13.0, 22.0, 14.0, 19.0, 25.0, 29.0, 34.0, 41.0, 32.0, 30.0, 48.0, 33.0, 47.0, 51.0, 62.0, 58.0, 50.0, 42.0, 50.0, 34.0, 49.0, 31.0, 28.0, 28.0, 16.0, 19.0, 14.0, 11.0, 13.0, 7.0, 7.0, 7.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4609375, -6.223388671875, -5.98583984375, -5.748291015625, -5.5107421875, -5.273193359375, -5.03564453125, -4.798095703125, -4.560546875, -4.322998046875, -4.08544921875, -3.847900390625, -3.6103515625, -3.372802734375, -3.13525390625, -2.897705078125, -2.66015625, -2.422607421875, -2.18505859375, -1.947509765625, -1.7099609375, -1.472412109375, -1.23486328125, -0.997314453125, -0.759765625, -0.522216796875, -0.28466796875, -0.047119140625, 0.1904296875, 0.427978515625, 0.66552734375, 0.903076171875, 1.140625, 1.378173828125, 1.61572265625, 1.853271484375, 2.0908203125, 2.328369140625, 2.56591796875, 2.803466796875, 3.041015625, 3.278564453125, 3.51611328125, 3.753662109375, 3.9912109375, 4.228759765625, 4.46630859375, 4.703857421875, 4.94140625, 5.178955078125, 5.41650390625, 5.654052734375, 5.8916015625, 6.129150390625, 6.36669921875, 6.604248046875, 6.841796875, 7.079345703125, 7.31689453125, 7.554443359375, 7.7919921875, 8.029541015625, 8.26708984375, 8.504638671875, 8.7421875]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 5.0, 3.0, 4.0, 9.0, 8.0, 15.0, 10.0, 17.0, 22.0, 42.0, 39.0, 40.0, 45.0, 71.0, 81.0, 124.0, 167.0, 373.0, 977.0, 4206.0, 36560.0, 486669.0, 2438500.0, 1118283.0, 96809.0, 8442.0, 1478.0, 470.0, 250.0, 148.0, 85.0, 78.0, 47.0, 38.0, 35.0, 20.0, 30.0, 22.0, 17.0, 14.0, 8.0, 13.0, 2.0, 4.0, 4.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.328125, -12.863525390625, -12.39892578125, -11.934326171875, -11.4697265625, -11.005126953125, -10.54052734375, -10.075927734375, -9.611328125, -9.146728515625, -8.68212890625, -8.217529296875, -7.7529296875, -7.288330078125, -6.82373046875, -6.359130859375, -5.89453125, -5.429931640625, -4.96533203125, -4.500732421875, -4.0361328125, -3.571533203125, -3.10693359375, -2.642333984375, -2.177734375, -1.713134765625, -1.24853515625, -0.783935546875, -0.3193359375, 0.145263671875, 0.60986328125, 1.074462890625, 1.5390625, 2.003662109375, 2.46826171875, 2.932861328125, 3.3974609375, 3.862060546875, 4.32666015625, 4.791259765625, 5.255859375, 5.720458984375, 6.18505859375, 6.649658203125, 7.1142578125, 7.578857421875, 8.04345703125, 8.508056640625, 8.97265625, 9.437255859375, 9.90185546875, 10.366455078125, 10.8310546875, 11.295654296875, 11.76025390625, 12.224853515625, 12.689453125, 13.154052734375, 13.61865234375, 14.083251953125, 14.5478515625, 15.012451171875, 15.47705078125, 15.941650390625, 16.40625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 8.0, 12.0, 13.0, 17.0, 38.0, 38.0, 43.0, 65.0, 77.0, 143.0, 179.0, 294.0, 365.0, 484.0, 523.0, 462.0, 413.0, 269.0, 210.0, 128.0, 72.0, 52.0, 47.0, 38.0, 20.0, 21.0, 15.0, 6.0, 8.0, 6.0, 5.0, 6.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.546875, -12.09130859375, -11.6357421875, -11.18017578125, -10.724609375, -10.26904296875, -9.8134765625, -9.35791015625, -8.90234375, -8.44677734375, -7.9912109375, -7.53564453125, -7.080078125, -6.62451171875, -6.1689453125, -5.71337890625, -5.2578125, -4.80224609375, -4.3466796875, -3.89111328125, -3.435546875, -2.97998046875, -2.5244140625, -2.06884765625, -1.61328125, -1.15771484375, -0.7021484375, -0.24658203125, 0.208984375, 0.66455078125, 1.1201171875, 1.57568359375, 2.03125, 2.48681640625, 2.9423828125, 3.39794921875, 3.853515625, 4.30908203125, 4.7646484375, 5.22021484375, 5.67578125, 6.13134765625, 6.5869140625, 7.04248046875, 7.498046875, 7.95361328125, 8.4091796875, 8.86474609375, 9.3203125, 9.77587890625, 10.2314453125, 10.68701171875, 11.142578125, 11.59814453125, 12.0537109375, 12.50927734375, 12.96484375, 13.42041015625, 13.8759765625, 14.33154296875, 14.787109375, 15.24267578125, 15.6982421875, 16.15380859375, 16.609375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 7.0, 5.0, 4.0, 8.0, 9.0, 12.0, 11.0, 28.0, 40.0, 50.0, 51.0, 93.0, 137.0, 245.0, 381.0, 825.0, 45158.0, 4115479.0, 29888.0, 745.0, 393.0, 249.0, 154.0, 81.0, 67.0, 45.0, 45.0, 27.0, 17.0, 15.0, 6.0, 10.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.75, -76.6484375, -74.546875, -72.4453125, -70.34375, -68.2421875, -66.140625, -64.0390625, -61.9375, -59.8359375, -57.734375, -55.6328125, -53.53125, -51.4296875, -49.328125, -47.2265625, -45.125, -43.0234375, -40.921875, -38.8203125, -36.71875, -34.6171875, -32.515625, -30.4140625, -28.3125, -26.2109375, -24.109375, -22.0078125, -19.90625, -17.8046875, -15.703125, -13.6015625, -11.5, -9.3984375, -7.296875, -5.1953125, -3.09375, -0.9921875, 1.109375, 3.2109375, 5.3125, 7.4140625, 9.515625, 11.6171875, 13.71875, 15.8203125, 17.921875, 20.0234375, 22.125, 24.2265625, 26.328125, 28.4296875, 30.53125, 32.6328125, 34.734375, 36.8359375, 38.9375, 41.0390625, 43.140625, 45.2421875, 47.34375, 49.4453125, 51.546875, 53.6484375, 55.75]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 15.0, 107.0, 355.0, 382.0, 135.0, 21.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.03279876708984, -76.68462371826172, -70.33644104003906, -63.98826599121094, -57.64009094238281, -51.29191589355469, -44.9437370300293, -38.595558166503906, -32.24738311767578, -25.899206161499023, -19.551029205322266, -13.202852249145508, -6.85467529296875, -0.5064983367919922, 5.841678619384766, 12.189857482910156, 18.53803253173828, 24.88620948791504, 31.234386444091797, 37.58256530761719, 43.93074035644531, 50.27891540527344, 56.62709426879883, 62.97527313232422, 69.32344818115234, 75.67162322998047, 82.01980590820312, 88.36798095703125, 94.71615600585938, 101.0643310546875, 107.41250610351562, 113.76068878173828, 120.10885620117188, 126.45703125, 132.80520629882812, 139.15338134765625, 145.50155639648438, 151.84974670410156, 158.1979217529297, 164.5460968017578, 170.89427185058594, 177.24244689941406, 183.5906219482422, 189.9387969970703, 196.2869873046875, 202.63516235351562, 208.98333740234375, 215.33151245117188, 221.6796875, 228.02786254882812, 234.37603759765625, 240.72421264648438, 247.0723876953125, 253.4205780029297, 259.76873779296875, 266.116943359375, 272.465087890625, 278.8132629394531, 285.16143798828125, 291.5096130371094, 297.8577880859375, 304.2059631347656, 310.55413818359375, 316.90234375, 323.2505187988281]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 3.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 10.0, 18.0, 23.0, 26.0, 23.0, 31.0, 22.0, 35.0, 38.0, 32.0, 38.0, 42.0, 28.0, 42.0, 34.0, 49.0, 38.0, 46.0, 30.0, 36.0, 39.0, 32.0, 32.0, 28.0, 25.0, 22.0, 31.0, 13.0, 20.0, 10.0, 13.0, 10.0, 11.0, 7.0, 2.0, 8.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-33.16272735595703, -31.94219970703125, -30.721670150756836, -29.501142501831055, -28.280614852905273, -27.06008529663086, -25.839557647705078, -24.619029998779297, -23.398502349853516, -22.177974700927734, -20.95744514465332, -19.73691749572754, -18.516389846801758, -17.295860290527344, -16.075332641601562, -14.854804992675781, -13.634275436401367, -12.41374683380127, -11.193219184875488, -9.97269058227539, -8.75216293334961, -7.531634330749512, -6.311105728149414, -5.090578079223633, -3.870049476623535, -2.6495213508605957, -1.4289929866790771, -0.2084646224975586, 1.0120635032653809, 2.2325916290283203, 3.453120231628418, 4.673647880554199, 5.894176483154297, 7.114704608917236, 8.335232734680176, 9.555761337280273, 10.776288986206055, 11.996817588806152, 13.21734619140625, 14.437873840332031, 15.658402442932129, 16.878931045532227, 18.099458694458008, 19.319988250732422, 20.540515899658203, 21.761043548583984, 22.981571197509766, 24.202098846435547, 25.42262840270996, 26.643156051635742, 27.863685607910156, 29.084213256835938, 30.30474090576172, 31.5252685546875, 32.74579620361328, 33.96632385253906, 35.18685531616211, 36.40738296508789, 37.62791061401367, 38.84844207763672, 40.0689697265625, 41.28949737548828, 42.51002502441406, 43.730552673339844, 44.951080322265625]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 3.0, 7.0, 9.0, 10.0, 8.0, 11.0, 16.0, 20.0, 23.0, 22.0, 42.0, 27.0, 36.0, 51.0, 47.0, 40.0, 44.0, 44.0, 59.0, 42.0, 44.0, 43.0, 41.0, 30.0, 57.0, 31.0, 23.0, 27.0, 24.0, 19.0, 18.0, 18.0, 13.0, 13.0, 10.0, 7.0, 7.0, 5.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33984375, -6.1270751953125, -5.914306640625, -5.7015380859375, -5.48876953125, -5.2760009765625, -5.063232421875, -4.8504638671875, -4.6376953125, -4.4249267578125, -4.212158203125, -3.9993896484375, -3.78662109375, -3.5738525390625, -3.361083984375, -3.1483154296875, -2.935546875, -2.7227783203125, -2.510009765625, -2.2972412109375, -2.08447265625, -1.8717041015625, -1.658935546875, -1.4461669921875, -1.2333984375, -1.0206298828125, -0.807861328125, -0.5950927734375, -0.38232421875, -0.1695556640625, 0.043212890625, 0.2559814453125, 0.46875, 0.6815185546875, 0.894287109375, 1.1070556640625, 1.31982421875, 1.5325927734375, 1.745361328125, 1.9581298828125, 2.1708984375, 2.3836669921875, 2.596435546875, 2.8092041015625, 3.02197265625, 3.2347412109375, 3.447509765625, 3.6602783203125, 3.873046875, 4.0858154296875, 4.298583984375, 4.5113525390625, 4.72412109375, 4.9368896484375, 5.149658203125, 5.3624267578125, 5.5751953125, 5.7879638671875, 6.000732421875, 6.2135009765625, 6.42626953125, 6.6390380859375, 6.851806640625, 7.0645751953125, 7.27734375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 5.0, 8.0, 7.0, 6.0, 11.0, 20.0, 27.0, 34.0, 50.0, 96.0, 125.0, 152.0, 235.0, 320.0, 454.0, 667.0, 993.0, 1514.0, 2129.0, 3085.0, 4600.0, 6753.0, 10237.0, 15179.0, 22716.0, 34503.0, 53355.0, 83043.0, 128209.0, 177811.0, 168302.0, 116096.0, 74945.0, 48178.0, 31375.0, 20674.0, 13846.0, 9463.0, 6175.0, 4092.0, 2803.0, 1912.0, 1374.0, 954.0, 600.0, 419.0, 300.0, 206.0, 140.0, 110.0, 76.0, 58.0, 45.0, 20.0, 14.0, 21.0, 13.0, 1.0, 9.0, 0.0, 1.0, 2.0, 3.0], "bins": [-0.40185546875, -0.3888435363769531, -0.37583160400390625, -0.3628196716308594, -0.3498077392578125, -0.3367958068847656, -0.32378387451171875, -0.3107719421386719, -0.297760009765625, -0.2847480773925781, -0.27173614501953125, -0.2587242126464844, -0.2457122802734375, -0.23270034790039062, -0.21968841552734375, -0.20667648315429688, -0.19366455078125, -0.18065261840820312, -0.16764068603515625, -0.15462875366210938, -0.1416168212890625, -0.12860488891601562, -0.11559295654296875, -0.10258102416992188, -0.089569091796875, -0.07655715942382812, -0.06354522705078125, -0.050533294677734375, -0.0375213623046875, -0.024509429931640625, -0.01149749755859375, 0.001514434814453125, 0.0145263671875, 0.027538299560546875, 0.04055023193359375, 0.053562164306640625, 0.0665740966796875, 0.07958602905273438, 0.09259796142578125, 0.10560989379882812, 0.118621826171875, 0.13163375854492188, 0.14464569091796875, 0.15765762329101562, 0.1706695556640625, 0.18368148803710938, 0.19669342041015625, 0.20970535278320312, 0.22271728515625, 0.23572921752929688, 0.24874114990234375, 0.2617530822753906, 0.2747650146484375, 0.2877769470214844, 0.30078887939453125, 0.3138008117675781, 0.326812744140625, 0.3398246765136719, 0.35283660888671875, 0.3658485412597656, 0.3788604736328125, 0.3918724060058594, 0.40488433837890625, 0.4178962707519531, 0.430908203125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 5.0, 7.0, 12.0, 7.0, 12.0, 10.0, 10.0, 8.0, 14.0, 16.0, 20.0, 29.0, 18.0, 26.0, 21.0, 44.0, 35.0, 41.0, 38.0, 42.0, 39.0, 1066.0, 38.0, 41.0, 35.0, 34.0, 43.0, 37.0, 25.0, 41.0, 29.0, 30.0, 26.0, 22.0, 20.0, 21.0, 15.0, 8.0, 11.0, 5.0, 7.0, 2.0, 2.0, 2.0, 7.0, 3.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.59375, -3.47332763671875, -3.3529052734375, -3.23248291015625, -3.112060546875, -2.99163818359375, -2.8712158203125, -2.75079345703125, -2.63037109375, -2.50994873046875, -2.3895263671875, -2.26910400390625, -2.148681640625, -2.02825927734375, -1.9078369140625, -1.78741455078125, -1.6669921875, -1.54656982421875, -1.4261474609375, -1.30572509765625, -1.185302734375, -1.06488037109375, -0.9444580078125, -0.82403564453125, -0.70361328125, -0.58319091796875, -0.4627685546875, -0.34234619140625, -0.221923828125, -0.10150146484375, 0.0189208984375, 0.13934326171875, 0.259765625, 0.38018798828125, 0.5006103515625, 0.62103271484375, 0.741455078125, 0.86187744140625, 0.9822998046875, 1.10272216796875, 1.22314453125, 1.34356689453125, 1.4639892578125, 1.58441162109375, 1.704833984375, 1.82525634765625, 1.9456787109375, 2.06610107421875, 2.1865234375, 2.30694580078125, 2.4273681640625, 2.54779052734375, 2.668212890625, 2.78863525390625, 2.9090576171875, 3.02947998046875, 3.14990234375, 3.27032470703125, 3.3907470703125, 3.51116943359375, 3.631591796875, 3.75201416015625, 3.8724365234375, 3.99285888671875, 4.11328125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 6.0, 15.0, 11.0, 12.0, 17.0, 35.0, 52.0, 93.0, 117.0, 158.0, 239.0, 314.0, 493.0, 727.0, 1028.0, 1400.0, 2124.0, 2934.0, 4316.0, 6134.0, 8946.0, 12722.0, 17912.0, 26065.0, 37977.0, 55498.0, 80954.0, 114855.0, 440824.0, 901668.0, 116469.0, 81699.0, 55875.0, 38222.0, 26399.0, 18523.0, 12599.0, 8839.0, 6369.0, 4433.0, 3096.0, 2166.0, 1509.0, 1038.0, 682.0, 492.0, 366.0, 229.0, 152.0, 112.0, 65.0, 50.0, 38.0, 29.0, 16.0, 18.0, 5.0, 2.0, 4.0, 3.0, 2.0], "bins": [-0.323486328125, -0.3133811950683594, -0.30327606201171875, -0.2931709289550781, -0.2830657958984375, -0.2729606628417969, -0.26285552978515625, -0.2527503967285156, -0.242645263671875, -0.23254013061523438, -0.22243499755859375, -0.21232986450195312, -0.2022247314453125, -0.19211959838867188, -0.18201446533203125, -0.17190933227539062, -0.16180419921875, -0.15169906616210938, -0.14159393310546875, -0.13148880004882812, -0.1213836669921875, -0.11127853393554688, -0.10117340087890625, -0.09106826782226562, -0.080963134765625, -0.07085800170898438, -0.06075286865234375, -0.050647735595703125, -0.0405426025390625, -0.030437469482421875, -0.02033233642578125, -0.010227203369140625, -0.0001220703125, 0.009983062744140625, 0.02008819580078125, 0.030193328857421875, 0.0402984619140625, 0.050403594970703125, 0.06050872802734375, 0.07061386108398438, 0.080718994140625, 0.09082412719726562, 0.10092926025390625, 0.11103439331054688, 0.1211395263671875, 0.13124465942382812, 0.14134979248046875, 0.15145492553710938, 0.16156005859375, 0.17166519165039062, 0.18177032470703125, 0.19187545776367188, 0.2019805908203125, 0.21208572387695312, 0.22219085693359375, 0.23229598999023438, 0.242401123046875, 0.2525062561035156, 0.26261138916015625, 0.2727165222167969, 0.2828216552734375, 0.2929267883300781, 0.30303192138671875, 0.3131370544433594, 0.3232421875]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 2.0, 3.0, 7.0, 4.0, 7.0, 7.0, 12.0, 6.0, 7.0, 18.0, 19.0, 22.0, 30.0, 29.0, 43.0, 52.0, 51.0, 57.0, 60.0, 54.0, 56.0, 50.0, 62.0, 52.0, 44.0, 47.0, 34.0, 33.0, 36.0, 20.0, 13.0, 14.0, 14.0, 8.0, 8.0, 5.0, 2.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001148223876953125, -0.001107737421989441, -0.0010672509670257568, -0.0010267645120620728, -0.0009862780570983887, -0.0009457916021347046, -0.0009053051471710205, -0.0008648186922073364, -0.0008243322372436523, -0.0007838457822799683, -0.0007433593273162842, -0.0007028728723526001, -0.000662386417388916, -0.0006218999624252319, -0.0005814135074615479, -0.0005409270524978638, -0.0005004405975341797, -0.0004599541425704956, -0.0004194676876068115, -0.00037898123264312744, -0.00033849477767944336, -0.0002980083227157593, -0.0002575218677520752, -0.0002170354127883911, -0.00017654895782470703, -0.00013606250286102295, -9.557604789733887e-05, -5.5089592933654785e-05, -1.4603137969970703e-05, 2.588331699371338e-05, 6.636977195739746e-05, 0.00010685622692108154, 0.00014734268188476562, 0.0001878291368484497, 0.0002283155918121338, 0.00026880204677581787, 0.00030928850173950195, 0.00034977495670318604, 0.0003902614116668701, 0.0004307478666305542, 0.0004712343215942383, 0.0005117207765579224, 0.0005522072315216064, 0.0005926936864852905, 0.0006331801414489746, 0.0006736665964126587, 0.0007141530513763428, 0.0007546395063400269, 0.0007951259613037109, 0.000835612416267395, 0.0008760988712310791, 0.0009165853261947632, 0.0009570717811584473, 0.0009975582361221313, 0.0010380446910858154, 0.0010785311460494995, 0.0011190176010131836, 0.0011595040559768677, 0.0011999905109405518, 0.0012404769659042358, 0.00128096342086792, 0.001321449875831604, 0.001361936330795288, 0.0014024227857589722, 0.0014429092407226562]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 7.0, 4.0, 8.0, 8.0, 8.0, 10.0, 19.0, 24.0, 31.0, 34.0, 56.0, 57.0, 98.0, 130.0, 204.0, 427.0, 1436.0, 422944.0, 620195.0, 1668.0, 435.0, 220.0, 145.0, 93.0, 56.0, 49.0, 39.0, 35.0, 28.0, 15.0, 10.0, 11.0, 12.0, 10.0, 6.0, 4.0, 3.0, 6.0, 0.0, 2.0, 5.0, 2.0, 3.0, 1.0, 2.0], "bins": [-0.032379150390625, -0.03150486946105957, -0.03063058853149414, -0.02975630760192871, -0.02888202667236328, -0.02800774574279785, -0.027133464813232422, -0.026259183883666992, -0.025384902954101562, -0.024510622024536133, -0.023636341094970703, -0.022762060165405273, -0.021887779235839844, -0.021013498306274414, -0.020139217376708984, -0.019264936447143555, -0.018390655517578125, -0.017516374588012695, -0.016642093658447266, -0.015767812728881836, -0.014893531799316406, -0.014019250869750977, -0.013144969940185547, -0.012270689010620117, -0.011396408081054688, -0.010522127151489258, -0.009647846221923828, -0.008773565292358398, -0.007899284362792969, -0.007025003433227539, -0.006150722503662109, -0.00527644157409668, -0.00440216064453125, -0.0035278797149658203, -0.0026535987854003906, -0.001779317855834961, -0.0009050369262695312, -3.075599670410156e-05, 0.0008435249328613281, 0.0017178058624267578, 0.0025920867919921875, 0.003466367721557617, 0.004340648651123047, 0.0052149295806884766, 0.006089210510253906, 0.006963491439819336, 0.007837772369384766, 0.008712053298950195, 0.009586334228515625, 0.010460615158081055, 0.011334896087646484, 0.012209177017211914, 0.013083457946777344, 0.013957738876342773, 0.014832019805908203, 0.015706300735473633, 0.016580581665039062, 0.017454862594604492, 0.018329143524169922, 0.01920342445373535, 0.02007770538330078, 0.02095198631286621, 0.02182626724243164, 0.02270054817199707, 0.0235748291015625]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 356.0, 661.0], "bins": [-0.0279895830899477, -0.02754577435553074, -0.027101963758468628, -0.026658155024051666, -0.026214346289634705, -0.025770535692572594, -0.025326726958155632, -0.02488291636109352, -0.02443910762667656, -0.023995298892259598, -0.023551488295197487, -0.023107679560780525, -0.022663868963718414, -0.022220060229301453, -0.02177625149488449, -0.02133244089782238, -0.02088863216340542, -0.020444823428988457, -0.020001012831926346, -0.019557204097509384, -0.019113395363092422, -0.01866958476603031, -0.01822577603161335, -0.01778196543455124, -0.017338156700134277, -0.016894347965717316, -0.016450537368655205, -0.016006728634238243, -0.015562918968498707, -0.01511910930275917, -0.014675300568342209, -0.014231490902602673, -0.013787681236863136, -0.0133438715711236, -0.012900061905384064, -0.012456253170967102, -0.012012443505227566, -0.01156863383948803, -0.011124825105071068, -0.010681015439331532, -0.010237205773591995, -0.009793396107852459, -0.009349586442112923, -0.008905777707695961, -0.008461968041956425, -0.008018158376216888, -0.0075743491761386395, -0.0071305399760603905, -0.006686730310320854, -0.006242920644581318, -0.005799111444503069, -0.00535530224442482, -0.004911492578685284, -0.004467682912945747, -0.004023873712867498, -0.0035800642799586058, -0.003136254847049713, -0.0026924454141408205, -0.002248635981231928, -0.0018048265483230352, -0.0013610171154141426, -0.00091720768250525, -0.00047339824959635735, -2.9588816687464714e-05, 0.0004142206453252584]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 4.0, 7.0, 3.0, 6.0, 5.0, 12.0, 13.0, 25.0, 20.0, 25.0, 34.0, 31.0, 42.0, 56.0, 57.0, 55.0, 47.0, 60.0, 64.0, 60.0, 50.0, 36.0, 45.0, 37.0, 42.0, 27.0, 39.0, 33.0, 12.0, 16.0, 16.0, 11.0, 6.0, 4.0, 2.0, 1.0, 3.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005998015403747559, -0.0005710385739803314, -0.000542275607585907, -0.0005135126411914825, -0.0004847496747970581, -0.00045598670840263367, -0.00042722374200820923, -0.0003984607756137848, -0.00036969780921936035, -0.0003409348428249359, -0.0003121718764305115, -0.00028340891003608704, -0.0002546459436416626, -0.00022588297724723816, -0.00019712001085281372, -0.00016835704445838928, -0.00013959407806396484, -0.0001108311116695404, -8.206814527511597e-05, -5.330517888069153e-05, -2.454221248626709e-05, 4.220753908157349e-06, 3.298372030258179e-05, 6.174668669700623e-05, 9.050965309143066e-05, 0.0001192726194858551, 0.00014803558588027954, 0.00017679855227470398, 0.00020556151866912842, 0.00023432448506355286, 0.0002630874514579773, 0.00029185041785240173, 0.00032061338424682617, 0.0003493763506412506, 0.00037813931703567505, 0.0004069022834300995, 0.0004356652498245239, 0.00046442821621894836, 0.0004931911826133728, 0.0005219541490077972, 0.0005507171154022217, 0.0005794800817966461, 0.0006082430481910706, 0.000637006014585495, 0.0006657689809799194, 0.0006945319473743439, 0.0007232949137687683, 0.0007520578801631927, 0.0007808208465576172, 0.0008095838129520416, 0.0008383467793464661, 0.0008671097457408905, 0.0008958727121353149, 0.0009246356785297394, 0.0009533986449241638, 0.0009821616113185883, 0.0010109245777130127, 0.0010396875441074371, 0.0010684505105018616, 0.001097213476896286, 0.0011259764432907104, 0.0011547394096851349, 0.0011835023760795593, 0.0012122653424739838, 0.0012410283088684082]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 3.0, 7.0, 9.0, 10.0, 8.0, 11.0, 16.0, 20.0, 23.0, 22.0, 42.0, 27.0, 36.0, 51.0, 47.0, 40.0, 44.0, 44.0, 59.0, 42.0, 44.0, 43.0, 41.0, 30.0, 57.0, 31.0, 23.0, 27.0, 24.0, 19.0, 18.0, 18.0, 13.0, 13.0, 10.0, 7.0, 7.0, 5.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33984375, -6.1270751953125, -5.914306640625, -5.7015380859375, -5.48876953125, -5.2760009765625, -5.063232421875, -4.8504638671875, -4.6376953125, -4.4249267578125, -4.212158203125, -3.9993896484375, -3.78662109375, -3.5738525390625, -3.361083984375, -3.1483154296875, -2.935546875, -2.7227783203125, -2.510009765625, -2.2972412109375, -2.08447265625, -1.8717041015625, -1.658935546875, -1.4461669921875, -1.2333984375, -1.0206298828125, -0.807861328125, -0.5950927734375, -0.38232421875, -0.1695556640625, 0.043212890625, 0.2559814453125, 0.46875, 0.6815185546875, 0.894287109375, 1.1070556640625, 1.31982421875, 1.5325927734375, 1.745361328125, 1.9581298828125, 2.1708984375, 2.3836669921875, 2.596435546875, 2.8092041015625, 3.02197265625, 3.2347412109375, 3.447509765625, 3.6602783203125, 3.873046875, 4.0858154296875, 4.298583984375, 4.5113525390625, 4.72412109375, 4.9368896484375, 5.149658203125, 5.3624267578125, 5.5751953125, 5.7879638671875, 6.000732421875, 6.2135009765625, 6.42626953125, 6.6390380859375, 6.851806640625, 7.0645751953125, 7.27734375]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 5.0, 6.0, 11.0, 11.0, 14.0, 22.0, 39.0, 40.0, 60.0, 91.0, 137.0, 254.0, 482.0, 991.0, 2105.0, 4293.0, 8760.0, 18017.0, 39115.0, 98200.0, 263284.0, 354660.0, 152265.0, 57362.0, 24824.0, 11880.0, 5794.0, 2839.0, 1399.0, 684.0, 364.0, 188.0, 112.0, 74.0, 51.0, 29.0, 32.0, 19.0, 11.0, 12.0, 9.0, 3.0, 0.0, 4.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-6.2890625, -6.0809326171875, -5.872802734375, -5.6646728515625, -5.45654296875, -5.2484130859375, -5.040283203125, -4.8321533203125, -4.6240234375, -4.4158935546875, -4.207763671875, -3.9996337890625, -3.79150390625, -3.5833740234375, -3.375244140625, -3.1671142578125, -2.958984375, -2.7508544921875, -2.542724609375, -2.3345947265625, -2.12646484375, -1.9183349609375, -1.710205078125, -1.5020751953125, -1.2939453125, -1.0858154296875, -0.877685546875, -0.6695556640625, -0.46142578125, -0.2532958984375, -0.045166015625, 0.1629638671875, 0.37109375, 0.5792236328125, 0.787353515625, 0.9954833984375, 1.20361328125, 1.4117431640625, 1.619873046875, 1.8280029296875, 2.0361328125, 2.2442626953125, 2.452392578125, 2.6605224609375, 2.86865234375, 3.0767822265625, 3.284912109375, 3.4930419921875, 3.701171875, 3.9093017578125, 4.117431640625, 4.3255615234375, 4.53369140625, 4.7418212890625, 4.949951171875, 5.1580810546875, 5.3662109375, 5.5743408203125, 5.782470703125, 5.9906005859375, 6.19873046875, 6.4068603515625, 6.614990234375, 6.8231201171875, 7.03125]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 8.0, 13.0, 7.0, 8.0, 17.0, 28.0, 27.0, 28.0, 29.0, 34.0, 38.0, 49.0, 62.0, 99.0, 150.0, 397.0, 1425.0, 164.0, 92.0, 57.0, 59.0, 31.0, 34.0, 36.0, 23.0, 34.0, 21.0, 14.0, 9.0, 11.0, 11.0, 2.0, 7.0, 3.0, 5.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.609375, -27.7919921875, -26.974609375, -26.1572265625, -25.33984375, -24.5224609375, -23.705078125, -22.8876953125, -22.0703125, -21.2529296875, -20.435546875, -19.6181640625, -18.80078125, -17.9833984375, -17.166015625, -16.3486328125, -15.53125, -14.7138671875, -13.896484375, -13.0791015625, -12.26171875, -11.4443359375, -10.626953125, -9.8095703125, -8.9921875, -8.1748046875, -7.357421875, -6.5400390625, -5.72265625, -4.9052734375, -4.087890625, -3.2705078125, -2.453125, -1.6357421875, -0.818359375, -0.0009765625, 0.81640625, 1.6337890625, 2.451171875, 3.2685546875, 4.0859375, 4.9033203125, 5.720703125, 6.5380859375, 7.35546875, 8.1728515625, 8.990234375, 9.8076171875, 10.625, 11.4423828125, 12.259765625, 13.0771484375, 13.89453125, 14.7119140625, 15.529296875, 16.3466796875, 17.1640625, 17.9814453125, 18.798828125, 19.6162109375, 20.43359375, 21.2509765625, 22.068359375, 22.8857421875, 23.703125]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 6.0, 4.0, 9.0, 12.0, 11.0, 17.0, 20.0, 32.0, 38.0, 66.0, 84.0, 154.0, 215.0, 375.0, 890.0, 38517.0, 3099070.0, 4633.0, 645.0, 325.0, 174.0, 130.0, 78.0, 56.0, 45.0, 36.0, 20.0, 12.0, 14.0, 3.0, 6.0, 3.0, 5.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-85.4375, -82.7099609375, -79.982421875, -77.2548828125, -74.52734375, -71.7998046875, -69.072265625, -66.3447265625, -63.6171875, -60.8896484375, -58.162109375, -55.4345703125, -52.70703125, -49.9794921875, -47.251953125, -44.5244140625, -41.796875, -39.0693359375, -36.341796875, -33.6142578125, -30.88671875, -28.1591796875, -25.431640625, -22.7041015625, -19.9765625, -17.2490234375, -14.521484375, -11.7939453125, -9.06640625, -6.3388671875, -3.611328125, -0.8837890625, 1.84375, 4.5712890625, 7.298828125, 10.0263671875, 12.75390625, 15.4814453125, 18.208984375, 20.9365234375, 23.6640625, 26.3916015625, 29.119140625, 31.8466796875, 34.57421875, 37.3017578125, 40.029296875, 42.7568359375, 45.484375, 48.2119140625, 50.939453125, 53.6669921875, 56.39453125, 59.1220703125, 61.849609375, 64.5771484375, 67.3046875, 70.0322265625, 72.759765625, 75.4873046875, 78.21484375, 80.9423828125, 83.669921875, 86.3974609375, 89.125]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [101.0, 853.0, 61.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.375727653503418, -4.142389297485352, 4.090949058532715, 12.324288368225098, 20.55762481689453, 28.79096221923828, 37.0243034362793, 45.25764083862305, 53.4909782409668, 61.72431564331055, 69.95765686035156, 78.19099426269531, 86.42433166503906, 94.65766906738281, 102.89100646972656, 111.12434387207031, 119.3576889038086, 127.59102630615234, 135.82437133789062, 144.05770874023438, 152.29104614257812, 160.52438354492188, 168.75772094726562, 176.99105834960938, 185.22439575195312, 193.45773315429688, 201.69107055664062, 209.92440795898438, 218.15774536132812, 226.39108276367188, 234.62442016601562, 242.85775756835938, 251.09109497070312, 259.3244323730469, 267.5577697753906, 275.7911071777344, 284.0244445800781, 292.2577819824219, 300.4911193847656, 308.7244567871094, 316.9577941894531, 325.1911315917969, 333.4244689941406, 341.6578063964844, 349.8911437988281, 358.1244812011719, 366.3578186035156, 374.5911560058594, 382.82452392578125, 391.057861328125, 399.29119873046875, 407.5245361328125, 415.75787353515625, 423.9912109375, 432.22454833984375, 440.4578857421875, 448.69122314453125, 456.924560546875, 465.15789794921875, 473.3912353515625, 481.62457275390625, 489.85791015625, 498.09124755859375, 506.3245849609375, 514.5579223632812]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 3.0, 4.0, 5.0, 10.0, 9.0, 8.0, 15.0, 18.0, 14.0, 22.0, 12.0, 18.0, 12.0, 25.0, 26.0, 29.0, 28.0, 37.0, 38.0, 39.0, 45.0, 38.0, 26.0, 46.0, 35.0, 28.0, 40.0, 34.0, 31.0, 26.0, 38.0, 31.0, 30.0, 35.0, 14.0, 29.0, 20.0, 15.0, 14.0, 9.0, 13.0, 4.0, 6.0, 6.0, 9.0, 7.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-48.27090835571289, -46.7557487487793, -45.2405891418457, -43.72542953491211, -42.21026611328125, -40.695106506347656, -39.17994689941406, -37.66478729248047, -36.149627685546875, -34.63446807861328, -33.11930847167969, -31.60414695739746, -30.088987350463867, -28.573827743530273, -27.058666229248047, -25.543506622314453, -24.02834701538086, -22.513187408447266, -20.998027801513672, -19.482866287231445, -17.96770668029785, -16.452547073364258, -14.937386512756348, -13.422225952148438, -11.907066345214844, -10.39190673828125, -8.87674617767334, -7.361586093902588, -5.846426010131836, -4.331265926361084, -2.816105842590332, -1.3009452819824219, 0.21421432495117188, 1.7293744087219238, 3.244534492492676, 4.759694576263428, 6.27485466003418, 7.790014743804932, 9.305174827575684, 10.820335388183594, 12.335494995117188, 13.850654602050781, 15.365815162658691, 16.8809757232666, 18.396135330200195, 19.91129493713379, 21.426456451416016, 22.94161605834961, 24.456775665283203, 25.971935272216797, 27.48709487915039, 29.002256393432617, 30.51741600036621, 32.03257751464844, 33.54773712158203, 35.062896728515625, 36.57805633544922, 38.09321594238281, 39.608375549316406, 41.12353515625, 42.638694763183594, 44.15385818481445, 45.66901779174805, 47.18417739868164, 48.699337005615234]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 5.0, 2.0, 6.0, 7.0, 10.0, 10.0, 9.0, 14.0, 10.0, 19.0, 25.0, 21.0, 30.0, 33.0, 51.0, 38.0, 36.0, 24.0, 51.0, 42.0, 59.0, 38.0, 46.0, 38.0, 41.0, 39.0, 31.0, 44.0, 23.0, 29.0, 28.0, 26.0, 19.0, 15.0, 14.0, 17.0, 17.0, 7.0, 6.0, 12.0, 5.0, 6.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4375, -6.2113037109375, -5.985107421875, -5.7589111328125, -5.53271484375, -5.3065185546875, -5.080322265625, -4.8541259765625, -4.6279296875, -4.4017333984375, -4.175537109375, -3.9493408203125, -3.72314453125, -3.4969482421875, -3.270751953125, -3.0445556640625, -2.818359375, -2.5921630859375, -2.365966796875, -2.1397705078125, -1.91357421875, -1.6873779296875, -1.461181640625, -1.2349853515625, -1.0087890625, -0.7825927734375, -0.556396484375, -0.3302001953125, -0.10400390625, 0.1221923828125, 0.348388671875, 0.5745849609375, 0.80078125, 1.0269775390625, 1.253173828125, 1.4793701171875, 1.70556640625, 1.9317626953125, 2.157958984375, 2.3841552734375, 2.6103515625, 2.8365478515625, 3.062744140625, 3.2889404296875, 3.51513671875, 3.7413330078125, 3.967529296875, 4.1937255859375, 4.419921875, 4.6461181640625, 4.872314453125, 5.0985107421875, 5.32470703125, 5.5509033203125, 5.777099609375, 6.0032958984375, 6.2294921875, 6.4556884765625, 6.681884765625, 6.9080810546875, 7.13427734375, 7.3604736328125, 7.586669921875, 7.8128662109375, 8.0390625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 7.0, 15.0, 16.0, 13.0, 17.0, 34.0, 19.0, 36.0, 62.0, 69.0, 91.0, 126.0, 210.0, 342.0, 815.0, 3590.0, 49210.0, 1095016.0, 2735125.0, 293462.0, 13136.0, 1527.0, 483.0, 252.0, 161.0, 96.0, 73.0, 62.0, 44.0, 39.0, 26.0, 20.0, 22.0, 11.0, 7.0, 17.0, 9.0, 8.0, 3.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.09375, -17.49609375, -16.8984375, -16.30078125, -15.703125, -15.10546875, -14.5078125, -13.91015625, -13.3125, -12.71484375, -12.1171875, -11.51953125, -10.921875, -10.32421875, -9.7265625, -9.12890625, -8.53125, -7.93359375, -7.3359375, -6.73828125, -6.140625, -5.54296875, -4.9453125, -4.34765625, -3.75, -3.15234375, -2.5546875, -1.95703125, -1.359375, -0.76171875, -0.1640625, 0.43359375, 1.03125, 1.62890625, 2.2265625, 2.82421875, 3.421875, 4.01953125, 4.6171875, 5.21484375, 5.8125, 6.41015625, 7.0078125, 7.60546875, 8.203125, 8.80078125, 9.3984375, 9.99609375, 10.59375, 11.19140625, 11.7890625, 12.38671875, 12.984375, 13.58203125, 14.1796875, 14.77734375, 15.375, 15.97265625, 16.5703125, 17.16796875, 17.765625, 18.36328125, 18.9609375, 19.55859375, 20.15625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 8.0, 7.0, 13.0, 11.0, 14.0, 26.0, 43.0, 54.0, 66.0, 87.0, 138.0, 222.0, 287.0, 406.0, 466.0, 518.0, 415.0, 357.0, 262.0, 195.0, 132.0, 89.0, 79.0, 56.0, 33.0, 29.0, 20.0, 14.0, 8.0, 4.0, 8.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.0859375, -13.6221923828125, -13.158447265625, -12.6947021484375, -12.23095703125, -11.7672119140625, -11.303466796875, -10.8397216796875, -10.3759765625, -9.9122314453125, -9.448486328125, -8.9847412109375, -8.52099609375, -8.0572509765625, -7.593505859375, -7.1297607421875, -6.666015625, -6.2022705078125, -5.738525390625, -5.2747802734375, -4.81103515625, -4.3472900390625, -3.883544921875, -3.4197998046875, -2.9560546875, -2.4923095703125, -2.028564453125, -1.5648193359375, -1.10107421875, -0.6373291015625, -0.173583984375, 0.2901611328125, 0.75390625, 1.2176513671875, 1.681396484375, 2.1451416015625, 2.60888671875, 3.0726318359375, 3.536376953125, 4.0001220703125, 4.4638671875, 4.9276123046875, 5.391357421875, 5.8551025390625, 6.31884765625, 6.7825927734375, 7.246337890625, 7.7100830078125, 8.173828125, 8.6375732421875, 9.101318359375, 9.5650634765625, 10.02880859375, 10.4925537109375, 10.956298828125, 11.4200439453125, 11.8837890625, 12.3475341796875, 12.811279296875, 13.2750244140625, 13.73876953125, 14.2025146484375, 14.666259765625, 15.1300048828125, 15.59375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 5.0, 6.0, 18.0, 14.0, 29.0, 39.0, 56.0, 71.0, 99.0, 152.0, 210.0, 355.0, 624.0, 6151.0, 3625400.0, 557827.0, 1781.0, 541.0, 289.0, 212.0, 111.0, 76.0, 64.0, 43.0, 32.0, 19.0, 11.0, 12.0, 12.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-69.9375, -67.8701171875, -65.802734375, -63.7353515625, -61.66796875, -59.6005859375, -57.533203125, -55.4658203125, -53.3984375, -51.3310546875, -49.263671875, -47.1962890625, -45.12890625, -43.0615234375, -40.994140625, -38.9267578125, -36.859375, -34.7919921875, -32.724609375, -30.6572265625, -28.58984375, -26.5224609375, -24.455078125, -22.3876953125, -20.3203125, -18.2529296875, -16.185546875, -14.1181640625, -12.05078125, -9.9833984375, -7.916015625, -5.8486328125, -3.78125, -1.7138671875, 0.353515625, 2.4208984375, 4.48828125, 6.5556640625, 8.623046875, 10.6904296875, 12.7578125, 14.8251953125, 16.892578125, 18.9599609375, 21.02734375, 23.0947265625, 25.162109375, 27.2294921875, 29.296875, 31.3642578125, 33.431640625, 35.4990234375, 37.56640625, 39.6337890625, 41.701171875, 43.7685546875, 45.8359375, 47.9033203125, 49.970703125, 52.0380859375, 54.10546875, 56.1728515625, 58.240234375, 60.3076171875, 62.375]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 62.0, 367.0, 467.0, 106.0, 12.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.62886047363281, -69.0408706665039, -61.452880859375, -53.86488723754883, -46.27689743041992, -38.688907623291016, -31.100914001464844, -23.512924194335938, -15.924934387207031, -8.336943626403809, -0.7489528656005859, 6.839038848876953, 14.42702865600586, 22.015018463134766, 29.603012084960938, 37.191001892089844, 44.77899169921875, 52.366981506347656, 59.95497131347656, 67.54296875, 75.13095092773438, 82.71894836425781, 90.30693817138672, 97.89492797851562, 105.48291778564453, 113.07090759277344, 120.65889739990234, 128.24688720703125, 135.8348846435547, 143.42286682128906, 151.0108642578125, 158.59884643554688, 166.18682861328125, 173.7748260498047, 181.36280822753906, 188.9508056640625, 196.53878784179688, 204.1267852783203, 211.71478271484375, 219.30276489257812, 226.8907470703125, 234.47874450683594, 242.0667266845703, 249.65472412109375, 257.2427062988281, 264.8306884765625, 272.418701171875, 280.0066833496094, 287.59466552734375, 295.1826477050781, 302.7706604003906, 310.358642578125, 317.9466247558594, 325.53460693359375, 333.12261962890625, 340.7106018066406, 348.2986145019531, 355.8865966796875, 363.474609375, 371.0625915527344, 378.65057373046875, 386.2385559082031, 393.8265686035156, 401.41455078125, 409.0025329589844]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 2.0, 5.0, 5.0, 4.0, 7.0, 6.0, 8.0, 11.0, 8.0, 14.0, 18.0, 18.0, 23.0, 18.0, 33.0, 27.0, 38.0, 29.0, 39.0, 40.0, 39.0, 35.0, 32.0, 39.0, 55.0, 44.0, 33.0, 42.0, 37.0, 40.0, 32.0, 31.0, 26.0, 22.0, 23.0, 18.0, 19.0, 16.0, 13.0, 8.0, 7.0, 7.0, 10.0, 5.0, 5.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-44.74001693725586, -43.415733337402344, -42.09144592285156, -40.76716232299805, -39.44287872314453, -38.11859130859375, -36.794307708740234, -35.47002410888672, -34.14573669433594, -32.82145309448242, -31.497167587280273, -30.172882080078125, -28.84859848022461, -27.52431297302246, -26.200027465820312, -24.875743865966797, -23.55146026611328, -22.227174758911133, -20.902891159057617, -19.57860565185547, -18.254322052001953, -16.930036544799805, -15.605751037597656, -14.281466484069824, -12.957181930541992, -11.63289737701416, -10.308612823486328, -8.98432731628418, -7.660042762756348, -6.335758209228516, -5.011473178863525, -3.687188148498535, -2.3628997802734375, -1.0386149883270264, 0.28566980361938477, 1.609954595565796, 2.934239387512207, 4.258523941040039, 5.582808971405029, 6.9070940017700195, 8.231378555297852, 9.555663108825684, 10.879947662353516, 12.204233169555664, 13.528517723083496, 14.852802276611328, 16.177087783813477, 17.501373291015625, 18.82565689086914, 20.14994239807129, 21.474225997924805, 22.798511505126953, 24.12279510498047, 25.447080612182617, 26.771366119384766, 28.09564971923828, 29.41993522644043, 30.744220733642578, 32.068504333496094, 33.39278793334961, 34.71707534790039, 36.041358947753906, 37.36564254760742, 38.6899299621582, 40.01421356201172]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 11.0, 12.0, 16.0, 13.0, 12.0, 21.0, 29.0, 26.0, 32.0, 38.0, 33.0, 34.0, 42.0, 44.0, 48.0, 37.0, 47.0, 58.0, 49.0, 42.0, 43.0, 44.0, 37.0, 21.0, 26.0, 24.0, 24.0, 13.0, 22.0, 17.0, 17.0, 12.0, 7.0, 17.0, 9.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.45703125, -7.24188232421875, -7.0267333984375, -6.81158447265625, -6.596435546875, -6.38128662109375, -6.1661376953125, -5.95098876953125, -5.73583984375, -5.52069091796875, -5.3055419921875, -5.09039306640625, -4.875244140625, -4.66009521484375, -4.4449462890625, -4.22979736328125, -4.0146484375, -3.79949951171875, -3.5843505859375, -3.36920166015625, -3.154052734375, -2.93890380859375, -2.7237548828125, -2.50860595703125, -2.29345703125, -2.07830810546875, -1.8631591796875, -1.64801025390625, -1.432861328125, -1.21771240234375, -1.0025634765625, -0.78741455078125, -0.572265625, -0.35711669921875, -0.1419677734375, 0.07318115234375, 0.288330078125, 0.50347900390625, 0.7186279296875, 0.93377685546875, 1.14892578125, 1.36407470703125, 1.5792236328125, 1.79437255859375, 2.009521484375, 2.22467041015625, 2.4398193359375, 2.65496826171875, 2.8701171875, 3.08526611328125, 3.3004150390625, 3.51556396484375, 3.730712890625, 3.94586181640625, 4.1610107421875, 4.37615966796875, 4.59130859375, 4.80645751953125, 5.0216064453125, 5.23675537109375, 5.451904296875, 5.66705322265625, 5.8822021484375, 6.09735107421875, 6.3125]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 3.0, 3.0, 6.0, 8.0, 14.0, 11.0, 26.0, 38.0, 49.0, 74.0, 100.0, 149.0, 237.0, 325.0, 490.0, 711.0, 1076.0, 1552.0, 2193.0, 3509.0, 5023.0, 7573.0, 11442.0, 17476.0, 26631.0, 40192.0, 60999.0, 92166.0, 138853.0, 178494.0, 153339.0, 104069.0, 68644.0, 45024.0, 29419.0, 19762.0, 12790.0, 8563.0, 5677.0, 3885.0, 2552.0, 1731.0, 1188.0, 819.0, 530.0, 361.0, 241.0, 164.0, 128.0, 94.0, 54.0, 37.0, 21.0, 15.0, 11.0, 8.0, 7.0, 5.0, 4.0, 0.0, 2.0], "bins": [-0.4296875, -0.4165229797363281, -0.40335845947265625, -0.3901939392089844, -0.3770294189453125, -0.3638648986816406, -0.35070037841796875, -0.3375358581542969, -0.324371337890625, -0.3112068176269531, -0.29804229736328125, -0.2848777770996094, -0.2717132568359375, -0.2585487365722656, -0.24538421630859375, -0.23221969604492188, -0.21905517578125, -0.20589065551757812, -0.19272613525390625, -0.17956161499023438, -0.1663970947265625, -0.15323257446289062, -0.14006805419921875, -0.12690353393554688, -0.113739013671875, -0.10057449340820312, -0.08740997314453125, -0.07424545288085938, -0.0610809326171875, -0.047916412353515625, -0.03475189208984375, -0.021587371826171875, -0.0084228515625, 0.004741668701171875, 0.01790618896484375, 0.031070709228515625, 0.0442352294921875, 0.057399749755859375, 0.07056427001953125, 0.08372879028320312, 0.096893310546875, 0.11005783081054688, 0.12322235107421875, 0.13638687133789062, 0.1495513916015625, 0.16271591186523438, 0.17588043212890625, 0.18904495239257812, 0.20220947265625, 0.21537399291992188, 0.22853851318359375, 0.24170303344726562, 0.2548675537109375, 0.2680320739746094, 0.28119659423828125, 0.2943611145019531, 0.307525634765625, 0.3206901550292969, 0.33385467529296875, 0.3470191955566406, 0.3601837158203125, 0.3733482360839844, 0.38651275634765625, 0.3996772766113281, 0.412841796875]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 2.0, 3.0, 6.0, 7.0, 8.0, 8.0, 7.0, 11.0, 14.0, 18.0, 20.0, 25.0, 30.0, 17.0, 33.0, 34.0, 34.0, 37.0, 39.0, 42.0, 35.0, 38.0, 1062.0, 58.0, 40.0, 36.0, 29.0, 37.0, 33.0, 33.0, 30.0, 24.0, 23.0, 12.0, 24.0, 25.0, 22.0, 20.0, 10.0, 7.0, 9.0, 5.0, 3.0, 5.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0], "bins": [-4.296875, -4.171051025390625, -4.04522705078125, -3.919403076171875, -3.7935791015625, -3.667755126953125, -3.54193115234375, -3.416107177734375, -3.290283203125, -3.164459228515625, -3.03863525390625, -2.912811279296875, -2.7869873046875, -2.661163330078125, -2.53533935546875, -2.409515380859375, -2.28369140625, -2.157867431640625, -2.03204345703125, -1.906219482421875, -1.7803955078125, -1.654571533203125, -1.52874755859375, -1.402923583984375, -1.277099609375, -1.151275634765625, -1.02545166015625, -0.899627685546875, -0.7738037109375, -0.647979736328125, -0.52215576171875, -0.396331787109375, -0.2705078125, -0.144683837890625, -0.01885986328125, 0.106964111328125, 0.2327880859375, 0.358612060546875, 0.48443603515625, 0.610260009765625, 0.736083984375, 0.861907958984375, 0.98773193359375, 1.113555908203125, 1.2393798828125, 1.365203857421875, 1.49102783203125, 1.616851806640625, 1.74267578125, 1.868499755859375, 1.99432373046875, 2.120147705078125, 2.2459716796875, 2.371795654296875, 2.49761962890625, 2.623443603515625, 2.749267578125, 2.875091552734375, 3.00091552734375, 3.126739501953125, 3.2525634765625, 3.378387451171875, 3.50421142578125, 3.630035400390625, 3.755859375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 8.0, 13.0, 10.0, 14.0, 31.0, 30.0, 78.0, 96.0, 155.0, 198.0, 325.0, 504.0, 780.0, 1162.0, 1758.0, 2755.0, 4077.0, 6434.0, 10012.0, 15522.0, 24295.0, 37477.0, 58132.0, 90626.0, 137352.0, 1224580.0, 160375.0, 113310.0, 74219.0, 47532.0, 30391.0, 19555.0, 12564.0, 7990.0, 5104.0, 3338.0, 2193.0, 1453.0, 931.0, 614.0, 380.0, 261.0, 169.0, 111.0, 79.0, 50.0, 32.0, 25.0, 11.0, 12.0, 3.0, 7.0, 2.0, 2.0, 2.0, 3.0], "bins": [-0.421630859375, -0.40914154052734375, -0.3966522216796875, -0.38416290283203125, -0.371673583984375, -0.35918426513671875, -0.3466949462890625, -0.33420562744140625, -0.32171630859375, -0.30922698974609375, -0.2967376708984375, -0.28424835205078125, -0.271759033203125, -0.25926971435546875, -0.2467803955078125, -0.23429107666015625, -0.2218017578125, -0.20931243896484375, -0.1968231201171875, -0.18433380126953125, -0.171844482421875, -0.15935516357421875, -0.1468658447265625, -0.13437652587890625, -0.12188720703125, -0.10939788818359375, -0.0969085693359375, -0.08441925048828125, -0.071929931640625, -0.05944061279296875, -0.0469512939453125, -0.03446197509765625, -0.02197265625, -0.00948333740234375, 0.0030059814453125, 0.01549530029296875, 0.027984619140625, 0.04047393798828125, 0.0529632568359375, 0.06545257568359375, 0.07794189453125, 0.09043121337890625, 0.1029205322265625, 0.11540985107421875, 0.127899169921875, 0.14038848876953125, 0.1528778076171875, 0.16536712646484375, 0.1778564453125, 0.19034576416015625, 0.2028350830078125, 0.21532440185546875, 0.227813720703125, 0.24030303955078125, 0.2527923583984375, 0.26528167724609375, 0.27777099609375, 0.29026031494140625, 0.3027496337890625, 0.31523895263671875, 0.327728271484375, 0.34021759033203125, 0.3527069091796875, 0.36519622802734375, 0.377685546875]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 4.0, 7.0, 10.0, 14.0, 34.0, 37.0, 34.0, 35.0, 62.0, 85.0, 91.0, 105.0, 107.0, 99.0, 58.0, 65.0, 45.0, 35.0, 20.0, 16.0, 7.0, 3.0, 9.0, 6.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00237274169921875, -0.002306237816810608, -0.002239733934402466, -0.0021732300519943237, -0.0021067261695861816, -0.0020402222871780396, -0.0019737184047698975, -0.0019072145223617554, -0.0018407106399536133, -0.0017742067575454712, -0.001707702875137329, -0.001641198992729187, -0.001574695110321045, -0.0015081912279129028, -0.0014416873455047607, -0.0013751834630966187, -0.0013086795806884766, -0.0012421756982803345, -0.0011756718158721924, -0.0011091679334640503, -0.0010426640510559082, -0.0009761601686477661, -0.000909656286239624, -0.0008431524038314819, -0.0007766485214233398, -0.0007101446390151978, -0.0006436407566070557, -0.0005771368741989136, -0.0005106329917907715, -0.0004441291093826294, -0.0003776252269744873, -0.0003111213445663452, -0.0002446174621582031, -0.00017811357975006104, -0.00011160969734191895, -4.5105814933776855e-05, 2.1398067474365234e-05, 8.790194988250732e-05, 0.00015440583229064941, 0.0002209097146987915, 0.0002874135971069336, 0.0003539174795150757, 0.0004204213619232178, 0.00048692524433135986, 0.000553429126739502, 0.000619933009147644, 0.0006864368915557861, 0.0007529407739639282, 0.0008194446563720703, 0.0008859485387802124, 0.0009524524211883545, 0.0010189563035964966, 0.0010854601860046387, 0.0011519640684127808, 0.0012184679508209229, 0.001284971833229065, 0.001351475715637207, 0.0014179795980453491, 0.0014844834804534912, 0.0015509873628616333, 0.0016174912452697754, 0.0016839951276779175, 0.0017504990100860596, 0.0018170028924942017, 0.0018835067749023438]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 2.0, 7.0, 8.0, 11.0, 8.0, 20.0, 29.0, 41.0, 57.0, 84.0, 120.0, 270.0, 514.0, 13049.0, 1031862.0, 1562.0, 354.0, 164.0, 114.0, 88.0, 61.0, 35.0, 29.0, 13.0, 13.0, 4.0, 9.0, 3.0, 3.0, 6.0, 5.0, 3.0, 2.0, 1.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03778076171875, -0.036446571350097656, -0.03511238098144531, -0.03377819061279297, -0.032444000244140625, -0.03110980987548828, -0.029775619506835938, -0.028441429138183594, -0.02710723876953125, -0.025773048400878906, -0.024438858032226562, -0.02310466766357422, -0.021770477294921875, -0.02043628692626953, -0.019102096557617188, -0.017767906188964844, -0.0164337158203125, -0.015099525451660156, -0.013765335083007812, -0.012431144714355469, -0.011096954345703125, -0.009762763977050781, -0.008428573608398438, -0.007094383239746094, -0.00576019287109375, -0.004426002502441406, -0.0030918121337890625, -0.0017576217651367188, -0.000423431396484375, 0.0009107589721679688, 0.0022449493408203125, 0.0035791397094726562, 0.004913330078125, 0.006247520446777344, 0.0075817108154296875, 0.008915901184082031, 0.010250091552734375, 0.011584281921386719, 0.012918472290039062, 0.014252662658691406, 0.01558685302734375, 0.016921043395996094, 0.018255233764648438, 0.01958942413330078, 0.020923614501953125, 0.02225780487060547, 0.023591995239257812, 0.024926185607910156, 0.0262603759765625, 0.027594566345214844, 0.028928756713867188, 0.03026294708251953, 0.031597137451171875, 0.03293132781982422, 0.03426551818847656, 0.035599708557128906, 0.03693389892578125, 0.038268089294433594, 0.03960227966308594, 0.04093647003173828, 0.042270660400390625, 0.04360485076904297, 0.04493904113769531, 0.046273231506347656, 0.047607421875]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 14.0, 61.0, 180.0, 303.0, 253.0, 130.0, 45.0, 18.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0028207041323184967, -0.0027498146519064903, -0.0026789249386638403, -0.002608035458251834, -0.002537145745009184, -0.0024662562645971775, -0.002395366784185171, -0.0023244773037731647, -0.0022535875905305147, -0.0021826981101185083, -0.0021118083968758583, -0.002040918916463852, -0.0019700294360518456, -0.0018991397228091955, -0.0018282502423971891, -0.001757360645569861, -0.0016864710487425327, -0.0016155814519152045, -0.0015446918550878763, -0.00147380237467587, -0.0014029127778485417, -0.0013320231810212135, -0.0012611337006092072, -0.001190244103781879, -0.0011193545069545507, -0.0010484649101272225, -0.0009775753132998943, -0.000906685832887888, -0.0008357962360605597, -0.0007649066392332315, -0.0006940171006135643, -0.000623127561993897, -0.0005522381979972124, -0.00048134863027371466, -0.0004104590625502169, -0.00033956949482671916, -0.0002686799271032214, -0.00019779035937972367, -0.00012690079165622592, -5.601125303655863e-05, 1.4878343790769577e-05, 8.576791151426733e-05, 0.00015665747923776507, 0.00022754704696126282, 0.00029843661468476057, 0.0003693261824082583, 0.00044021575013175607, 0.0005111052887514234, 0.0005819948855787516, 0.0006528844824060798, 0.0007237740210257471, 0.0007946635596454144, 0.0008655531564727426, 0.0009364427533000708, 0.0010073322337120771, 0.0010782218305394053, 0.0011491114273667336, 0.0012200010241940618, 0.00129089062102139, 0.0013617801014333963, 0.0014326696982607245, 0.0015035592950880527, 0.0015744487755000591, 0.0016453383723273873, 0.0017162279691547155]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 5.0, 1.0, 5.0, 4.0, 5.0, 3.0, 6.0, 11.0, 19.0, 13.0, 12.0, 19.0, 23.0, 17.0, 17.0, 21.0, 35.0, 29.0, 34.0, 45.0, 43.0, 43.0, 46.0, 47.0, 35.0, 38.0, 49.0, 43.0, 38.0, 34.0, 28.0, 31.0, 30.0, 26.0, 27.0, 15.0, 16.0, 15.0, 18.0, 14.0, 9.0, 8.0, 8.0, 6.0, 7.0, 3.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0006355047225952148, -0.0006129816174507141, -0.0005904585123062134, -0.0005679354071617126, -0.0005454123020172119, -0.0005228891968727112, -0.0005003660917282104, -0.0004778429865837097, -0.000455319881439209, -0.00043279677629470825, -0.0004102736711502075, -0.0003877505660057068, -0.00036522746086120605, -0.0003427043557167053, -0.0003201812505722046, -0.00029765814542770386, -0.0002751350402832031, -0.0002526119351387024, -0.00023008882999420166, -0.00020756572484970093, -0.0001850426197052002, -0.00016251951456069946, -0.00013999640941619873, -0.000117473304271698, -9.495019912719727e-05, -7.242709398269653e-05, -4.99039888381958e-05, -2.738088369369507e-05, -4.857778549194336e-06, 1.7665326595306396e-05, 4.018843173980713e-05, 6.271153688430786e-05, 8.52346420288086e-05, 0.00010775774717330933, 0.00013028085231781006, 0.0001528039574623108, 0.00017532706260681152, 0.00019785016775131226, 0.000220373272895813, 0.00024289637804031372, 0.00026541948318481445, 0.0002879425883293152, 0.0003104656934738159, 0.00033298879861831665, 0.0003555119037628174, 0.0003780350089073181, 0.00040055811405181885, 0.0004230812191963196, 0.0004456043243408203, 0.00046812742948532104, 0.0004906505346298218, 0.0005131736397743225, 0.0005356967449188232, 0.000558219850063324, 0.0005807429552078247, 0.0006032660603523254, 0.0006257891654968262, 0.0006483122706413269, 0.0006708353757858276, 0.0006933584809303284, 0.0007158815860748291, 0.0007384046912193298, 0.0007609277963638306, 0.0007834509015083313, 0.000805974006652832]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 11.0, 12.0, 16.0, 13.0, 12.0, 21.0, 29.0, 26.0, 32.0, 38.0, 33.0, 33.0, 43.0, 44.0, 48.0, 37.0, 47.0, 58.0, 49.0, 42.0, 43.0, 44.0, 37.0, 21.0, 26.0, 24.0, 24.0, 13.0, 22.0, 17.0, 17.0, 12.0, 7.0, 17.0, 9.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.45703125, -7.24188232421875, -7.0267333984375, -6.81158447265625, -6.596435546875, -6.38128662109375, -6.1661376953125, -5.95098876953125, -5.73583984375, -5.52069091796875, -5.3055419921875, -5.09039306640625, -4.875244140625, -4.66009521484375, -4.4449462890625, -4.22979736328125, -4.0146484375, -3.79949951171875, -3.5843505859375, -3.36920166015625, -3.154052734375, -2.93890380859375, -2.7237548828125, -2.50860595703125, -2.29345703125, -2.07830810546875, -1.8631591796875, -1.64801025390625, -1.432861328125, -1.21771240234375, -1.0025634765625, -0.78741455078125, -0.572265625, -0.35711669921875, -0.1419677734375, 0.07318115234375, 0.288330078125, 0.50347900390625, 0.7186279296875, 0.93377685546875, 1.14892578125, 1.36407470703125, 1.5792236328125, 1.79437255859375, 2.009521484375, 2.22467041015625, 2.4398193359375, 2.65496826171875, 2.8701171875, 3.08526611328125, 3.3004150390625, 3.51556396484375, 3.730712890625, 3.94586181640625, 4.1610107421875, 4.37615966796875, 4.59130859375, 4.80645751953125, 5.0216064453125, 5.23675537109375, 5.451904296875, 5.66705322265625, 5.8822021484375, 6.09735107421875, 6.3125]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 8.0, 5.0, 4.0, 5.0, 17.0, 14.0, 27.0, 26.0, 35.0, 65.0, 68.0, 75.0, 129.0, 229.0, 399.0, 760.0, 1716.0, 4287.0, 10444.0, 26957.0, 67759.0, 166872.0, 358929.0, 239890.0, 101983.0, 40402.0, 16079.0, 6467.0, 2503.0, 1058.0, 506.0, 258.0, 121.0, 115.0, 73.0, 64.0, 59.0, 32.0, 30.0, 21.0, 29.0, 6.0, 14.0, 4.0, 5.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-7.65234375, -7.43194580078125, -7.2115478515625, -6.99114990234375, -6.770751953125, -6.55035400390625, -6.3299560546875, -6.10955810546875, -5.88916015625, -5.66876220703125, -5.4483642578125, -5.22796630859375, -5.007568359375, -4.78717041015625, -4.5667724609375, -4.34637451171875, -4.1259765625, -3.90557861328125, -3.6851806640625, -3.46478271484375, -3.244384765625, -3.02398681640625, -2.8035888671875, -2.58319091796875, -2.36279296875, -2.14239501953125, -1.9219970703125, -1.70159912109375, -1.481201171875, -1.26080322265625, -1.0404052734375, -0.82000732421875, -0.599609375, -0.37921142578125, -0.1588134765625, 0.06158447265625, 0.281982421875, 0.50238037109375, 0.7227783203125, 0.94317626953125, 1.16357421875, 1.38397216796875, 1.6043701171875, 1.82476806640625, 2.045166015625, 2.26556396484375, 2.4859619140625, 2.70635986328125, 2.9267578125, 3.14715576171875, 3.3675537109375, 3.58795166015625, 3.808349609375, 4.02874755859375, 4.2491455078125, 4.46954345703125, 4.68994140625, 4.91033935546875, 5.1307373046875, 5.35113525390625, 5.571533203125, 5.79193115234375, 6.0123291015625, 6.23272705078125, 6.453125]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 4.0, 4.0, 4.0, 7.0, 5.0, 7.0, 5.0, 13.0, 6.0, 10.0, 10.0, 24.0, 22.0, 20.0, 22.0, 30.0, 39.0, 35.0, 35.0, 32.0, 57.0, 63.0, 142.0, 350.0, 1453.0, 174.0, 84.0, 57.0, 46.0, 42.0, 35.0, 37.0, 24.0, 15.0, 25.0, 13.0, 21.0, 14.0, 18.0, 8.0, 17.0, 2.0, 10.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-22.6875, -22.0224609375, -21.357421875, -20.6923828125, -20.02734375, -19.3623046875, -18.697265625, -18.0322265625, -17.3671875, -16.7021484375, -16.037109375, -15.3720703125, -14.70703125, -14.0419921875, -13.376953125, -12.7119140625, -12.046875, -11.3818359375, -10.716796875, -10.0517578125, -9.38671875, -8.7216796875, -8.056640625, -7.3916015625, -6.7265625, -6.0615234375, -5.396484375, -4.7314453125, -4.06640625, -3.4013671875, -2.736328125, -2.0712890625, -1.40625, -0.7412109375, -0.076171875, 0.5888671875, 1.25390625, 1.9189453125, 2.583984375, 3.2490234375, 3.9140625, 4.5791015625, 5.244140625, 5.9091796875, 6.57421875, 7.2392578125, 7.904296875, 8.5693359375, 9.234375, 9.8994140625, 10.564453125, 11.2294921875, 11.89453125, 12.5595703125, 13.224609375, 13.8896484375, 14.5546875, 15.2197265625, 15.884765625, 16.5498046875, 17.21484375, 17.8798828125, 18.544921875, 19.2099609375, 19.875]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0, 7.0, 10.0, 9.0, 12.0, 12.0, 28.0, 33.0, 31.0, 70.0, 77.0, 111.0, 190.0, 400.0, 1078.0, 7822.0, 3123815.0, 9844.0, 1067.0, 434.0, 206.0, 152.0, 76.0, 43.0, 35.0, 32.0, 32.0, 19.0, 17.0, 8.0, 8.0, 4.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-104.5625, -101.443359375, -98.32421875, -95.205078125, -92.0859375, -88.966796875, -85.84765625, -82.728515625, -79.609375, -76.490234375, -73.37109375, -70.251953125, -67.1328125, -64.013671875, -60.89453125, -57.775390625, -54.65625, -51.537109375, -48.41796875, -45.298828125, -42.1796875, -39.060546875, -35.94140625, -32.822265625, -29.703125, -26.583984375, -23.46484375, -20.345703125, -17.2265625, -14.107421875, -10.98828125, -7.869140625, -4.75, -1.630859375, 1.48828125, 4.607421875, 7.7265625, 10.845703125, 13.96484375, 17.083984375, 20.203125, 23.322265625, 26.44140625, 29.560546875, 32.6796875, 35.798828125, 38.91796875, 42.037109375, 45.15625, 48.275390625, 51.39453125, 54.513671875, 57.6328125, 60.751953125, 63.87109375, 66.990234375, 70.109375, 73.228515625, 76.34765625, 79.466796875, 82.5859375, 85.705078125, 88.82421875, 91.943359375, 95.0625]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 19.0, 165.0, 449.0, 316.0, 62.0, 4.0, 1.0, 0.0, 1.0], "bins": [-207.00746154785156, -203.4169464111328, -199.82643127441406, -196.2359161376953, -192.64540100097656, -189.0548858642578, -185.46437072753906, -181.8738555908203, -178.28334045410156, -174.6928253173828, -171.10231018066406, -167.5117950439453, -163.92127990722656, -160.3307647705078, -156.74024963378906, -153.1497344970703, -149.5592041015625, -145.96868896484375, -142.378173828125, -138.78765869140625, -135.1971435546875, -131.60662841796875, -128.01611328125, -124.42559814453125, -120.8350830078125, -117.24456787109375, -113.654052734375, -110.06353759765625, -106.4730224609375, -102.88250732421875, -99.2919921875, -95.70147705078125, -92.1109619140625, -88.52044677734375, -84.929931640625, -81.33941650390625, -77.7489013671875, -74.15838623046875, -70.56787109375, -66.97735595703125, -63.386837005615234, -59.796321868896484, -56.205806732177734, -52.61528778076172, -49.02477264404297, -45.43425750732422, -41.84374237060547, -38.25322723388672, -34.66271209716797, -31.07219696044922, -27.48168182373047, -23.891164779663086, -20.300649642944336, -16.710134506225586, -13.119617462158203, -9.529102325439453, -5.938587188720703, -2.348071575164795, 1.2424440383911133, 4.83296012878418, 8.42347526550293, 12.01399040222168, 15.604507446289062, 19.195022583007812, 22.785537719726562]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 1.0, 7.0, 8.0, 9.0, 7.0, 9.0, 14.0, 13.0, 23.0, 18.0, 22.0, 19.0, 31.0, 27.0, 30.0, 31.0, 35.0, 43.0, 42.0, 46.0, 36.0, 38.0, 41.0, 53.0, 54.0, 37.0, 37.0, 36.0, 28.0, 32.0, 28.0, 22.0, 17.0, 16.0, 17.0, 17.0, 12.0, 15.0, 13.0, 7.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-59.79783630371094, -58.027889251708984, -56.25794219970703, -54.48799514770508, -52.718048095703125, -50.948097229003906, -49.17815017700195, -47.408203125, -45.63825607299805, -43.868309020996094, -42.09836196899414, -40.32841491699219, -38.55846405029297, -36.78852081298828, -35.01856994628906, -33.24862289428711, -31.478675842285156, -29.708728790283203, -27.93878173828125, -26.168832778930664, -24.39888572692871, -22.628938674926758, -20.858989715576172, -19.08904266357422, -17.319095611572266, -15.549148559570312, -13.779200553894043, -12.009252548217773, -10.23930549621582, -8.469358444213867, -6.699410438537598, -4.929462432861328, -3.159515380859375, -1.3895678520202637, 0.38037967681884766, 2.150327205657959, 3.9202747344970703, 5.690221786499023, 7.460169792175293, 9.230117797851562, 11.000064849853516, 12.770011901855469, 14.539959907531738, 16.309907913208008, 18.07985496520996, 19.849802017211914, 21.6197509765625, 23.389698028564453, 25.159645080566406, 26.92959213256836, 28.699539184570312, 30.4694881439209, 32.23943328857422, 34.00938415527344, 35.77933120727539, 37.549278259277344, 39.3192253112793, 41.08917236328125, 42.8591194152832, 44.629066467285156, 46.399017333984375, 48.16896057128906, 49.93891143798828, 51.708858489990234, 53.47880554199219]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 6.0, 2.0, 7.0, 8.0, 7.0, 10.0, 8.0, 19.0, 19.0, 23.0, 30.0, 27.0, 25.0, 36.0, 37.0, 43.0, 33.0, 46.0, 38.0, 52.0, 51.0, 44.0, 43.0, 42.0, 53.0, 44.0, 39.0, 25.0, 22.0, 25.0, 20.0, 24.0, 19.0, 16.0, 13.0, 15.0, 11.0, 10.0, 5.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.75, -7.5040283203125, -7.258056640625, -7.0120849609375, -6.76611328125, -6.5201416015625, -6.274169921875, -6.0281982421875, -5.7822265625, -5.5362548828125, -5.290283203125, -5.0443115234375, -4.79833984375, -4.5523681640625, -4.306396484375, -4.0604248046875, -3.814453125, -3.5684814453125, -3.322509765625, -3.0765380859375, -2.83056640625, -2.5845947265625, -2.338623046875, -2.0926513671875, -1.8466796875, -1.6007080078125, -1.354736328125, -1.1087646484375, -0.86279296875, -0.6168212890625, -0.370849609375, -0.1248779296875, 0.12109375, 0.3670654296875, 0.613037109375, 0.8590087890625, 1.10498046875, 1.3509521484375, 1.596923828125, 1.8428955078125, 2.0888671875, 2.3348388671875, 2.580810546875, 2.8267822265625, 3.07275390625, 3.3187255859375, 3.564697265625, 3.8106689453125, 4.056640625, 4.3026123046875, 4.548583984375, 4.7945556640625, 5.04052734375, 5.2864990234375, 5.532470703125, 5.7784423828125, 6.0244140625, 6.2703857421875, 6.516357421875, 6.7623291015625, 7.00830078125, 7.2542724609375, 7.500244140625, 7.7462158203125, 7.9921875]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 4.0, 5.0, 8.0, 12.0, 11.0, 15.0, 25.0, 32.0, 31.0, 30.0, 47.0, 66.0, 106.0, 99.0, 180.0, 319.0, 995.0, 8152.0, 247897.0, 3100914.0, 809707.0, 22795.0, 1676.0, 410.0, 202.0, 112.0, 86.0, 67.0, 56.0, 47.0, 38.0, 39.0, 20.0, 14.0, 15.0, 15.0, 11.0, 11.0, 3.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.984375, -22.28076171875, -21.5771484375, -20.87353515625, -20.169921875, -19.46630859375, -18.7626953125, -18.05908203125, -17.35546875, -16.65185546875, -15.9482421875, -15.24462890625, -14.541015625, -13.83740234375, -13.1337890625, -12.43017578125, -11.7265625, -11.02294921875, -10.3193359375, -9.61572265625, -8.912109375, -8.20849609375, -7.5048828125, -6.80126953125, -6.09765625, -5.39404296875, -4.6904296875, -3.98681640625, -3.283203125, -2.57958984375, -1.8759765625, -1.17236328125, -0.46875, 0.23486328125, 0.9384765625, 1.64208984375, 2.345703125, 3.04931640625, 3.7529296875, 4.45654296875, 5.16015625, 5.86376953125, 6.5673828125, 7.27099609375, 7.974609375, 8.67822265625, 9.3818359375, 10.08544921875, 10.7890625, 11.49267578125, 12.1962890625, 12.89990234375, 13.603515625, 14.30712890625, 15.0107421875, 15.71435546875, 16.41796875, 17.12158203125, 17.8251953125, 18.52880859375, 19.232421875, 19.93603515625, 20.6396484375, 21.34326171875, 22.046875]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 12.0, 14.0, 24.0, 15.0, 30.0, 47.0, 58.0, 90.0, 139.0, 213.0, 321.0, 456.0, 568.0, 583.0, 444.0, 343.0, 229.0, 139.0, 100.0, 61.0, 67.0, 35.0, 31.0, 18.0, 15.0, 8.0, 6.0, 6.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.9296875, -14.4166259765625, -13.903564453125, -13.3905029296875, -12.87744140625, -12.3643798828125, -11.851318359375, -11.3382568359375, -10.8251953125, -10.3121337890625, -9.799072265625, -9.2860107421875, -8.77294921875, -8.2598876953125, -7.746826171875, -7.2337646484375, -6.720703125, -6.2076416015625, -5.694580078125, -5.1815185546875, -4.66845703125, -4.1553955078125, -3.642333984375, -3.1292724609375, -2.6162109375, -2.1031494140625, -1.590087890625, -1.0770263671875, -0.56396484375, -0.0509033203125, 0.462158203125, 0.9752197265625, 1.48828125, 2.0013427734375, 2.514404296875, 3.0274658203125, 3.54052734375, 4.0535888671875, 4.566650390625, 5.0797119140625, 5.5927734375, 6.1058349609375, 6.618896484375, 7.1319580078125, 7.64501953125, 8.1580810546875, 8.671142578125, 9.1842041015625, 9.697265625, 10.2103271484375, 10.723388671875, 11.2364501953125, 11.74951171875, 12.2625732421875, 12.775634765625, 13.2886962890625, 13.8017578125, 14.3148193359375, 14.827880859375, 15.3409423828125, 15.85400390625, 16.3670654296875, 16.880126953125, 17.3931884765625, 17.90625]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 5.0, 8.0, 14.0, 17.0, 28.0, 35.0, 54.0, 68.0, 97.0, 165.0, 291.0, 479.0, 1477.0, 1449033.0, 2739366.0, 1869.0, 499.0, 289.0, 163.0, 97.0, 73.0, 49.0, 34.0, 24.0, 11.0, 17.0, 9.0, 3.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.625, -90.953125, -88.28125, -85.609375, -82.9375, -80.265625, -77.59375, -74.921875, -72.25, -69.578125, -66.90625, -64.234375, -61.5625, -58.890625, -56.21875, -53.546875, -50.875, -48.203125, -45.53125, -42.859375, -40.1875, -37.515625, -34.84375, -32.171875, -29.5, -26.828125, -24.15625, -21.484375, -18.8125, -16.140625, -13.46875, -10.796875, -8.125, -5.453125, -2.78125, -0.109375, 2.5625, 5.234375, 7.90625, 10.578125, 13.25, 15.921875, 18.59375, 21.265625, 23.9375, 26.609375, 29.28125, 31.953125, 34.625, 37.296875, 39.96875, 42.640625, 45.3125, 47.984375, 50.65625, 53.328125, 56.0, 58.671875, 61.34375, 64.015625, 66.6875, 69.359375, 72.03125, 74.703125, 77.375]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 41.0, 185.0, 395.0, 307.0, 76.0, 12.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-139.16845703125, -133.52442932128906, -127.88040924072266, -122.23638153076172, -116.59235382080078, -110.94833374023438, -105.30430603027344, -99.6602783203125, -94.01625061035156, -88.37222290039062, -82.72820281982422, -77.08417510986328, -71.44014739990234, -65.79612731933594, -60.152099609375, -54.50807189941406, -48.864051818847656, -43.220027923583984, -37.57600021362305, -31.931976318359375, -26.28795051574707, -20.643924713134766, -14.999900817871094, -9.355873107910156, -3.7118492126464844, 1.932176113128662, 7.576201438903809, 13.220226287841797, 18.8642520904541, 24.508277893066406, 30.152301788330078, 35.796329498291016, 41.44035339355469, 47.08437728881836, 52.7284049987793, 58.37242889404297, 64.0164566040039, 69.66047668457031, 75.30450439453125, 80.94853210449219, 86.59255981445312, 92.23658752441406, 97.88060760498047, 103.5246353149414, 109.16866302490234, 114.81268310546875, 120.45671081542969, 126.10073852539062, 131.7447509765625, 137.38877868652344, 143.03280639648438, 148.67681884765625, 154.3208465576172, 159.96487426757812, 165.60890197753906, 171.2529296875, 176.89695739746094, 182.54098510742188, 188.1850128173828, 193.82904052734375, 199.47305297851562, 205.11708068847656, 210.7611083984375, 216.40513610839844, 222.04916381835938]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 5.0, 5.0, 1.0, 9.0, 9.0, 19.0, 14.0, 20.0, 23.0, 32.0, 33.0, 28.0, 33.0, 33.0, 37.0, 39.0, 44.0, 50.0, 43.0, 55.0, 37.0, 40.0, 38.0, 34.0, 37.0, 34.0, 27.0, 25.0, 25.0, 33.0, 18.0, 22.0, 19.0, 14.0, 19.0, 14.0, 10.0, 6.0, 2.0, 6.0, 8.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.55462646484375, -44.15840530395508, -42.762184143066406, -41.365966796875, -39.96974563598633, -38.573524475097656, -37.177303314208984, -35.78108215332031, -34.384864807128906, -32.988643646240234, -31.592424392700195, -30.196203231811523, -28.799983978271484, -27.403762817382812, -26.00754165649414, -24.6113224029541, -23.21510124206543, -21.818880081176758, -20.42266082763672, -19.026439666748047, -17.630220413208008, -16.233999252319336, -14.83777904510498, -13.441558837890625, -12.04533863067627, -10.649118423461914, -9.252898216247559, -7.856677532196045, -6.4604573249816895, -5.064237117767334, -3.6680164337158203, -2.271796226501465, -0.8755760192871094, 0.5206443071365356, 1.9168646335601807, 3.3130850791931152, 4.709305286407471, 6.105525493621826, 7.50174617767334, 8.897966384887695, 10.29418659210205, 11.690406799316406, 13.086627006530762, 14.482847213745117, 15.879068374633789, 17.275287628173828, 18.6715087890625, 20.067729949951172, 21.46394920349121, 22.860170364379883, 24.256389617919922, 25.652610778808594, 27.048830032348633, 28.445051193237305, 29.841270446777344, 31.237491607666016, 32.63371276855469, 34.02993392944336, 35.42615509033203, 36.82237243652344, 38.21859359741211, 39.61481475830078, 41.01103591918945, 42.407257080078125, 43.80347442626953]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 6.0, 8.0, 9.0, 2.0, 12.0, 9.0, 14.0, 10.0, 19.0, 22.0, 25.0, 26.0, 33.0, 26.0, 35.0, 36.0, 38.0, 36.0, 38.0, 34.0, 38.0, 40.0, 41.0, 57.0, 44.0, 28.0, 43.0, 45.0, 22.0, 22.0, 32.0, 20.0, 19.0, 18.0, 17.0, 11.0, 18.0, 9.0, 11.0, 7.0, 9.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.37109375, -6.160888671875, -5.95068359375, -5.740478515625, -5.5302734375, -5.320068359375, -5.10986328125, -4.899658203125, -4.689453125, -4.479248046875, -4.26904296875, -4.058837890625, -3.8486328125, -3.638427734375, -3.42822265625, -3.218017578125, -3.0078125, -2.797607421875, -2.58740234375, -2.377197265625, -2.1669921875, -1.956787109375, -1.74658203125, -1.536376953125, -1.326171875, -1.115966796875, -0.90576171875, -0.695556640625, -0.4853515625, -0.275146484375, -0.06494140625, 0.145263671875, 0.35546875, 0.565673828125, 0.77587890625, 0.986083984375, 1.1962890625, 1.406494140625, 1.61669921875, 1.826904296875, 2.037109375, 2.247314453125, 2.45751953125, 2.667724609375, 2.8779296875, 3.088134765625, 3.29833984375, 3.508544921875, 3.71875, 3.928955078125, 4.13916015625, 4.349365234375, 4.5595703125, 4.769775390625, 4.97998046875, 5.190185546875, 5.400390625, 5.610595703125, 5.82080078125, 6.031005859375, 6.2412109375, 6.451416015625, 6.66162109375, 6.871826171875, 7.08203125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 7.0, 11.0, 26.0, 26.0, 41.0, 66.0, 100.0, 125.0, 196.0, 358.0, 511.0, 716.0, 1011.0, 1584.0, 2476.0, 3780.0, 5815.0, 8438.0, 13235.0, 19889.0, 30221.0, 45207.0, 69180.0, 105217.0, 152964.0, 178592.0, 138199.0, 92022.0, 60778.0, 40173.0, 26217.0, 17337.0, 11697.0, 7589.0, 5025.0, 3294.0, 2212.0, 1435.0, 940.0, 634.0, 438.0, 262.0, 171.0, 120.0, 61.0, 54.0, 41.0, 20.0, 17.0, 12.0, 11.0, 8.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.46923828125, -0.4547309875488281, -0.44022369384765625, -0.4257164001464844, -0.4112091064453125, -0.3967018127441406, -0.38219451904296875, -0.3676872253417969, -0.353179931640625, -0.3386726379394531, -0.32416534423828125, -0.3096580505371094, -0.2951507568359375, -0.2806434631347656, -0.26613616943359375, -0.2516288757324219, -0.23712158203125, -0.22261428833007812, -0.20810699462890625, -0.19359970092773438, -0.1790924072265625, -0.16458511352539062, -0.15007781982421875, -0.13557052612304688, -0.121063232421875, -0.10655593872070312, -0.09204864501953125, -0.07754135131835938, -0.0630340576171875, -0.048526763916015625, -0.03401947021484375, -0.019512176513671875, -0.0050048828125, 0.009502410888671875, 0.02400970458984375, 0.038516998291015625, 0.0530242919921875, 0.06753158569335938, 0.08203887939453125, 0.09654617309570312, 0.111053466796875, 0.12556076049804688, 0.14006805419921875, 0.15457534790039062, 0.1690826416015625, 0.18358993530273438, 0.19809722900390625, 0.21260452270507812, 0.22711181640625, 0.24161911010742188, 0.25612640380859375, 0.2706336975097656, 0.2851409912109375, 0.2996482849121094, 0.31415557861328125, 0.3286628723144531, 0.343170166015625, 0.3576774597167969, 0.37218475341796875, 0.3866920471191406, 0.4011993408203125, 0.4157066345214844, 0.43021392822265625, 0.4447212219238281, 0.459228515625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 4.0, 1.0, 5.0, 8.0, 7.0, 14.0, 17.0, 17.0, 20.0, 13.0, 18.0, 21.0, 28.0, 26.0, 25.0, 31.0, 28.0, 42.0, 29.0, 35.0, 53.0, 1064.0, 52.0, 29.0, 44.0, 46.0, 33.0, 35.0, 29.0, 34.0, 43.0, 31.0, 29.0, 20.0, 14.0, 17.0, 16.0, 11.0, 11.0, 11.0, 6.0, 1.0, 4.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0], "bins": [-4.84375, -4.707489013671875, -4.57122802734375, -4.434967041015625, -4.2987060546875, -4.162445068359375, -4.02618408203125, -3.889923095703125, -3.753662109375, -3.617401123046875, -3.48114013671875, -3.344879150390625, -3.2086181640625, -3.072357177734375, -2.93609619140625, -2.799835205078125, -2.66357421875, -2.527313232421875, -2.39105224609375, -2.254791259765625, -2.1185302734375, -1.982269287109375, -1.84600830078125, -1.709747314453125, -1.573486328125, -1.437225341796875, -1.30096435546875, -1.164703369140625, -1.0284423828125, -0.892181396484375, -0.75592041015625, -0.619659423828125, -0.4833984375, -0.347137451171875, -0.21087646484375, -0.074615478515625, 0.0616455078125, 0.197906494140625, 0.33416748046875, 0.470428466796875, 0.606689453125, 0.742950439453125, 0.87921142578125, 1.015472412109375, 1.1517333984375, 1.287994384765625, 1.42425537109375, 1.560516357421875, 1.69677734375, 1.833038330078125, 1.96929931640625, 2.105560302734375, 2.2418212890625, 2.378082275390625, 2.51434326171875, 2.650604248046875, 2.786865234375, 2.923126220703125, 3.05938720703125, 3.195648193359375, 3.3319091796875, 3.468170166015625, 3.60443115234375, 3.740692138671875, 3.876953125]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 6.0, 8.0, 9.0, 14.0, 26.0, 37.0, 59.0, 101.0, 122.0, 215.0, 313.0, 480.0, 715.0, 1209.0, 1774.0, 2706.0, 4177.0, 6368.0, 10014.0, 15408.0, 23974.0, 36626.0, 57117.0, 88577.0, 134592.0, 1222597.0, 162208.0, 115555.0, 75332.0, 48445.0, 30949.0, 20477.0, 13167.0, 8365.0, 5315.0, 3512.0, 2287.0, 1538.0, 943.0, 623.0, 369.0, 289.0, 187.0, 102.0, 75.0, 66.0, 36.0, 15.0, 13.0, 4.0, 9.0, 9.0, 5.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.43994140625, -0.4265327453613281, -0.41312408447265625, -0.3997154235839844, -0.3863067626953125, -0.3728981018066406, -0.35948944091796875, -0.3460807800292969, -0.332672119140625, -0.3192634582519531, -0.30585479736328125, -0.2924461364746094, -0.2790374755859375, -0.2656288146972656, -0.25222015380859375, -0.23881149291992188, -0.22540283203125, -0.21199417114257812, -0.19858551025390625, -0.18517684936523438, -0.1717681884765625, -0.15835952758789062, -0.14495086669921875, -0.13154220581054688, -0.118133544921875, -0.10472488403320312, -0.09131622314453125, -0.07790756225585938, -0.0644989013671875, -0.051090240478515625, -0.03768157958984375, -0.024272918701171875, -0.0108642578125, 0.002544403076171875, 0.01595306396484375, 0.029361724853515625, 0.0427703857421875, 0.056179046630859375, 0.06958770751953125, 0.08299636840820312, 0.096405029296875, 0.10981369018554688, 0.12322235107421875, 0.13663101196289062, 0.1500396728515625, 0.16344833374023438, 0.17685699462890625, 0.19026565551757812, 0.20367431640625, 0.21708297729492188, 0.23049163818359375, 0.24390029907226562, 0.2573089599609375, 0.2707176208496094, 0.28412628173828125, 0.2975349426269531, 0.310943603515625, 0.3243522644042969, 0.33776092529296875, 0.3511695861816406, 0.3645782470703125, 0.3779869079589844, 0.39139556884765625, 0.4048042297363281, 0.418212890625]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 7.0, 8.0, 5.0, 8.0, 13.0, 10.0, 12.0, 18.0, 22.0, 17.0, 34.0, 26.0, 30.0, 48.0, 39.0, 49.0, 47.0, 44.0, 63.0, 49.0, 59.0, 38.0, 47.0, 46.0, 33.0, 32.0, 30.0, 32.0, 37.0, 20.0, 19.0, 10.0, 9.0, 9.0, 8.0, 3.0, 6.0, 7.0, 2.0, 4.0, 1.0, 4.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001132965087890625, -0.00109119713306427, -0.001049429178237915, -0.00100766122341156, -0.0009658932685852051, -0.0009241253137588501, -0.0008823573589324951, -0.0008405894041061401, -0.0007988214492797852, -0.0007570534944534302, -0.0007152855396270752, -0.0006735175848007202, -0.0006317496299743652, -0.0005899816751480103, -0.0005482137203216553, -0.0005064457654953003, -0.0004646778106689453, -0.00042290985584259033, -0.00038114190101623535, -0.00033937394618988037, -0.0002976059913635254, -0.0002558380365371704, -0.00021407008171081543, -0.00017230212688446045, -0.00013053417205810547, -8.876621723175049e-05, -4.699826240539551e-05, -5.230307579040527e-06, 3.653764724731445e-05, 7.830560207366943e-05, 0.00012007355690002441, 0.0001618415117263794, 0.00020360946655273438, 0.00024537742137908936, 0.00028714537620544434, 0.0003289133310317993, 0.0003706812858581543, 0.0004124492406845093, 0.00045421719551086426, 0.0004959851503372192, 0.0005377531051635742, 0.0005795210599899292, 0.0006212890148162842, 0.0006630569696426392, 0.0007048249244689941, 0.0007465928792953491, 0.0007883608341217041, 0.0008301287889480591, 0.0008718967437744141, 0.000913664698600769, 0.000955432653427124, 0.000997200608253479, 0.001038968563079834, 0.001080736517906189, 0.001122504472732544, 0.001164272427558899, 0.001206040382385254, 0.0012478083372116089, 0.0012895762920379639, 0.0013313442468643188, 0.0013731122016906738, 0.0014148801565170288, 0.0014566481113433838, 0.0014984160661697388, 0.0015401840209960938]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 2.0, 2.0, 1.0, 1.0, 7.0, 13.0, 6.0, 14.0, 20.0, 21.0, 28.0, 35.0, 52.0, 60.0, 89.0, 121.0, 180.0, 355.0, 970.0, 44058.0, 997890.0, 3198.0, 617.0, 266.0, 137.0, 99.0, 66.0, 57.0, 47.0, 36.0, 35.0, 16.0, 17.0, 10.0, 6.0, 11.0, 5.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.036102294921875, -0.035079240798950195, -0.03405618667602539, -0.033033132553100586, -0.03201007843017578, -0.030987024307250977, -0.029963970184326172, -0.028940916061401367, -0.027917861938476562, -0.026894807815551758, -0.025871753692626953, -0.02484869956970215, -0.023825645446777344, -0.02280259132385254, -0.021779537200927734, -0.02075648307800293, -0.019733428955078125, -0.01871037483215332, -0.017687320709228516, -0.01666426658630371, -0.015641212463378906, -0.014618158340454102, -0.013595104217529297, -0.012572050094604492, -0.011548995971679688, -0.010525941848754883, -0.009502887725830078, -0.008479833602905273, -0.007456779479980469, -0.006433725357055664, -0.005410671234130859, -0.004387617111206055, -0.00336456298828125, -0.0023415088653564453, -0.0013184547424316406, -0.00029540061950683594, 0.0007276535034179688, 0.0017507076263427734, 0.002773761749267578, 0.003796815872192383, 0.0048198699951171875, 0.005842924118041992, 0.006865978240966797, 0.007889032363891602, 0.008912086486816406, 0.009935140609741211, 0.010958194732666016, 0.01198124885559082, 0.013004302978515625, 0.01402735710144043, 0.015050411224365234, 0.01607346534729004, 0.017096519470214844, 0.01811957359313965, 0.019142627716064453, 0.020165681838989258, 0.021188735961914062, 0.022211790084838867, 0.023234844207763672, 0.024257898330688477, 0.02528095245361328, 0.026304006576538086, 0.02732706069946289, 0.028350114822387695, 0.0293731689453125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 180.0, 720.0, 110.0, 2.0], "bins": [-0.013621442019939423, -0.013399799354374409, -0.01317815575748682, -0.012956513091921806, -0.012734870426356792, -0.012513226829469204, -0.01229158416390419, -0.012069941498339176, -0.011848297901451588, -0.011626655235886574, -0.011405011638998985, -0.011183368973433971, -0.010961726307868958, -0.010740082710981369, -0.010518440045416355, -0.010296797379851341, -0.010075154714286327, -0.009853512048721313, -0.009631868451833725, -0.009410225786268711, -0.009188583120703697, -0.008966939523816109, -0.008745296858251095, -0.008523654192686081, -0.008302010595798492, -0.008080367930233479, -0.00785872433334589, -0.007637081667780876, -0.007415438536554575, -0.007193795870989561, -0.00697215273976326, -0.006750510074198246, -0.006528867408633232, -0.006307224277406931, -0.006085581611841917, -0.005863938480615616, -0.005642295349389315, -0.005420652683824301, -0.005199009552598, -0.004977366887032986, -0.004755723290145397, -0.004534080158919096, -0.004312437493354082, -0.004090794362127781, -0.0038691514637321234, -0.003647508565336466, -0.0034258654341101646, -0.003204222535714507, -0.0029825796373188496, -0.002760936738923192, -0.002539293607696891, -0.0023176507093012333, -0.0020960078109055758, -0.0018743647960945964, -0.001652721781283617, -0.0014310788828879595, -0.001209435984492302, -0.0009877929696813226, -0.000766150071285665, -0.0005445070564746857, -0.0003228640998713672, -0.00010122114326804876, 0.0001204218715429306, 0.00034206476993858814, 0.0005637077847495675]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 4.0, 5.0, 3.0, 8.0, 7.0, 10.0, 11.0, 11.0, 15.0, 19.0, 20.0, 19.0, 22.0, 28.0, 29.0, 37.0, 33.0, 46.0, 50.0, 46.0, 53.0, 43.0, 45.0, 53.0, 45.0, 40.0, 45.0, 31.0, 32.0, 29.0, 26.0, 35.0, 21.0, 22.0, 18.0, 15.0, 5.0, 4.0, 8.0, 10.0, 3.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000621795654296875, -0.0005951123312115669, -0.0005684290081262589, -0.0005417456850409508, -0.0005150623619556427, -0.0004883790388703346, -0.00046169571578502655, -0.0004350123926997185, -0.0004083290696144104, -0.0003816457465291023, -0.00035496242344379425, -0.0003282791003584862, -0.0003015957772731781, -0.00027491245418787, -0.00024822913110256195, -0.00022154580801725388, -0.0001948624849319458, -0.00016817916184663773, -0.00014149583876132965, -0.00011481251567602158, -8.81291925907135e-05, -6.144586950540543e-05, -3.476254642009735e-05, -8.079223334789276e-06, 1.86040997505188e-05, 4.5287422835826874e-05, 7.197074592113495e-05, 9.865406900644302e-05, 0.0001253373920917511, 0.00015202071517705917, 0.00017870403826236725, 0.00020538736134767532, 0.0002320706844329834, 0.0002587540075182915, 0.00028543733060359955, 0.0003121206536889076, 0.0003388039767742157, 0.0003654872998595238, 0.00039217062294483185, 0.0004188539460301399, 0.000445537269115448, 0.0004722205922007561, 0.0004989039152860641, 0.0005255872383713722, 0.0005522705614566803, 0.0005789538845419884, 0.0006056372076272964, 0.0006323205307126045, 0.0006590038537979126, 0.0006856871768832207, 0.0007123704999685287, 0.0007390538230538368, 0.0007657371461391449, 0.000792420469224453, 0.000819103792309761, 0.0008457871153950691, 0.0008724704384803772, 0.0008991537615656853, 0.0009258370846509933, 0.0009525204077363014, 0.0009792037308216095, 0.0010058870539069176, 0.0010325703769922256, 0.0010592537000775337, 0.0010859370231628418]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 6.0, 8.0, 9.0, 2.0, 12.0, 9.0, 14.0, 10.0, 19.0, 22.0, 25.0, 26.0, 33.0, 26.0, 35.0, 36.0, 38.0, 36.0, 38.0, 34.0, 38.0, 40.0, 41.0, 57.0, 44.0, 28.0, 43.0, 44.0, 23.0, 22.0, 32.0, 20.0, 19.0, 18.0, 17.0, 11.0, 18.0, 9.0, 11.0, 7.0, 9.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.37109375, -6.160888671875, -5.95068359375, -5.740478515625, -5.5302734375, -5.320068359375, -5.10986328125, -4.899658203125, -4.689453125, -4.479248046875, -4.26904296875, -4.058837890625, -3.8486328125, -3.638427734375, -3.42822265625, -3.218017578125, -3.0078125, -2.797607421875, -2.58740234375, -2.377197265625, -2.1669921875, -1.956787109375, -1.74658203125, -1.536376953125, -1.326171875, -1.115966796875, -0.90576171875, -0.695556640625, -0.4853515625, -0.275146484375, -0.06494140625, 0.145263671875, 0.35546875, 0.565673828125, 0.77587890625, 0.986083984375, 1.1962890625, 1.406494140625, 1.61669921875, 1.826904296875, 2.037109375, 2.247314453125, 2.45751953125, 2.667724609375, 2.8779296875, 3.088134765625, 3.29833984375, 3.508544921875, 3.71875, 3.928955078125, 4.13916015625, 4.349365234375, 4.5595703125, 4.769775390625, 4.97998046875, 5.190185546875, 5.400390625, 5.610595703125, 5.82080078125, 6.031005859375, 6.2412109375, 6.451416015625, 6.66162109375, 6.871826171875, 7.08203125]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 9.0, 5.0, 9.0, 7.0, 15.0, 9.0, 23.0, 20.0, 27.0, 38.0, 58.0, 84.0, 92.0, 131.0, 212.0, 321.0, 617.0, 1752.0, 8739.0, 53814.0, 363762.0, 526061.0, 76350.0, 12134.0, 2375.0, 736.0, 369.0, 229.0, 141.0, 107.0, 67.0, 50.0, 47.0, 35.0, 24.0, 19.0, 18.0, 11.0, 8.0, 9.0, 4.0, 4.0, 3.0, 5.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-13.6796875, -13.23974609375, -12.7998046875, -12.35986328125, -11.919921875, -11.47998046875, -11.0400390625, -10.60009765625, -10.16015625, -9.72021484375, -9.2802734375, -8.84033203125, -8.400390625, -7.96044921875, -7.5205078125, -7.08056640625, -6.640625, -6.20068359375, -5.7607421875, -5.32080078125, -4.880859375, -4.44091796875, -4.0009765625, -3.56103515625, -3.12109375, -2.68115234375, -2.2412109375, -1.80126953125, -1.361328125, -0.92138671875, -0.4814453125, -0.04150390625, 0.3984375, 0.83837890625, 1.2783203125, 1.71826171875, 2.158203125, 2.59814453125, 3.0380859375, 3.47802734375, 3.91796875, 4.35791015625, 4.7978515625, 5.23779296875, 5.677734375, 6.11767578125, 6.5576171875, 6.99755859375, 7.4375, 7.87744140625, 8.3173828125, 8.75732421875, 9.197265625, 9.63720703125, 10.0771484375, 10.51708984375, 10.95703125, 11.39697265625, 11.8369140625, 12.27685546875, 12.716796875, 13.15673828125, 13.5966796875, 14.03662109375, 14.4765625]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 5.0, 3.0, 4.0, 5.0, 2.0, 11.0, 4.0, 12.0, 4.0, 14.0, 16.0, 14.0, 16.0, 18.0, 24.0, 27.0, 35.0, 33.0, 46.0, 38.0, 49.0, 64.0, 110.0, 199.0, 1496.0, 212.0, 96.0, 80.0, 49.0, 54.0, 44.0, 27.0, 30.0, 31.0, 34.0, 24.0, 25.0, 18.0, 23.0, 12.0, 8.0, 4.0, 10.0, 6.0, 5.0, 8.0, 6.0, 2.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-22.734375, -22.035888671875, -21.33740234375, -20.638916015625, -19.9404296875, -19.241943359375, -18.54345703125, -17.844970703125, -17.146484375, -16.447998046875, -15.74951171875, -15.051025390625, -14.3525390625, -13.654052734375, -12.95556640625, -12.257080078125, -11.55859375, -10.860107421875, -10.16162109375, -9.463134765625, -8.7646484375, -8.066162109375, -7.36767578125, -6.669189453125, -5.970703125, -5.272216796875, -4.57373046875, -3.875244140625, -3.1767578125, -2.478271484375, -1.77978515625, -1.081298828125, -0.3828125, 0.315673828125, 1.01416015625, 1.712646484375, 2.4111328125, 3.109619140625, 3.80810546875, 4.506591796875, 5.205078125, 5.903564453125, 6.60205078125, 7.300537109375, 7.9990234375, 8.697509765625, 9.39599609375, 10.094482421875, 10.79296875, 11.491455078125, 12.18994140625, 12.888427734375, 13.5869140625, 14.285400390625, 14.98388671875, 15.682373046875, 16.380859375, 17.079345703125, 17.77783203125, 18.476318359375, 19.1748046875, 19.873291015625, 20.57177734375, 21.270263671875, 21.96875]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 5.0, 9.0, 15.0, 8.0, 18.0, 24.0, 26.0, 44.0, 51.0, 76.0, 98.0, 115.0, 232.0, 401.0, 1121.0, 25260.0, 3111692.0, 4859.0, 737.0, 296.0, 179.0, 127.0, 79.0, 61.0, 54.0, 32.0, 26.0, 28.0, 13.0, 4.0, 6.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-108.1875, -105.037109375, -101.88671875, -98.736328125, -95.5859375, -92.435546875, -89.28515625, -86.134765625, -82.984375, -79.833984375, -76.68359375, -73.533203125, -70.3828125, -67.232421875, -64.08203125, -60.931640625, -57.78125, -54.630859375, -51.48046875, -48.330078125, -45.1796875, -42.029296875, -38.87890625, -35.728515625, -32.578125, -29.427734375, -26.27734375, -23.126953125, -19.9765625, -16.826171875, -13.67578125, -10.525390625, -7.375, -4.224609375, -1.07421875, 2.076171875, 5.2265625, 8.376953125, 11.52734375, 14.677734375, 17.828125, 20.978515625, 24.12890625, 27.279296875, 30.4296875, 33.580078125, 36.73046875, 39.880859375, 43.03125, 46.181640625, 49.33203125, 52.482421875, 55.6328125, 58.783203125, 61.93359375, 65.083984375, 68.234375, 71.384765625, 74.53515625, 77.685546875, 80.8359375, 83.986328125, 87.13671875, 90.287109375, 93.4375]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [4.0, 5.0, 59.0, 167.0, 295.0, 306.0, 144.0, 25.0, 8.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.430879592895508, -9.155984878540039, -6.88109016418457, -4.606195449829102, -2.331300735473633, -0.05640602111816406, 2.2184886932373047, 4.493383407592773, 6.768278121948242, 9.043172836303711, 11.31806755065918, 13.592962265014648, 15.867856979370117, 18.142751693725586, 20.417646408081055, 22.692541122436523, 24.967435836791992, 27.24233055114746, 29.51722526550293, 31.7921199798584, 34.0670166015625, 36.34191131591797, 38.61680603027344, 40.891700744628906, 43.166595458984375, 45.441490173339844, 47.71638488769531, 49.99127960205078, 52.26617431640625, 54.54106903076172, 56.81596374511719, 59.090858459472656, 61.365753173828125, 63.640647888183594, 65.91554260253906, 68.19043731689453, 70.46533203125, 72.74022674560547, 75.01512145996094, 77.2900161743164, 79.56491088867188, 81.83980560302734, 84.11470031738281, 86.38959503173828, 88.66448974609375, 90.93938446044922, 93.21427917480469, 95.48917388916016, 97.76406860351562, 100.0389633178711, 102.31385803222656, 104.58875274658203, 106.8636474609375, 109.13854217529297, 111.41343688964844, 113.6883316040039, 115.96322631835938, 118.23812103271484, 120.51301574707031, 122.78791046142578, 125.06280517578125, 127.33769989013672, 129.6125946044922, 131.88748168945312, 134.16238403320312]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 6.0, 8.0, 3.0, 3.0, 6.0, 9.0, 13.0, 7.0, 14.0, 12.0, 15.0, 22.0, 27.0, 22.0, 28.0, 33.0, 26.0, 39.0, 29.0, 36.0, 38.0, 31.0, 32.0, 43.0, 41.0, 45.0, 46.0, 34.0, 25.0, 28.0, 40.0, 25.0, 21.0, 27.0, 17.0, 19.0, 12.0, 16.0, 16.0, 13.0, 10.0, 16.0, 8.0, 10.0, 7.0, 7.0, 3.0, 4.0, 4.0, 2.0, 3.0, 4.0, 4.0, 3.0], "bins": [-57.18463134765625, -55.519927978515625, -53.855224609375, -52.190521240234375, -50.52581787109375, -48.861114501953125, -47.196414947509766, -45.53171157836914, -43.867008209228516, -42.20230484008789, -40.537601470947266, -38.87289810180664, -37.20819854736328, -35.543495178222656, -33.87879180908203, -32.214088439941406, -30.54938507080078, -28.884681701660156, -27.21997833251953, -25.55527687072754, -23.890573501586914, -22.22587013244629, -20.561168670654297, -18.896465301513672, -17.231761932373047, -15.567058563232422, -13.902356147766113, -12.237653732299805, -10.57295036315918, -8.908246994018555, -7.243544578552246, -5.5788421630859375, -3.914142608642578, -2.2494397163391113, -0.5847368240356445, 1.0799660682678223, 2.744668960571289, 4.409372329711914, 6.074074745178223, 7.738777160644531, 9.403480529785156, 11.068183898925781, 12.73288631439209, 14.397588729858398, 16.062292098999023, 17.72699546813965, 19.39169692993164, 21.056400299072266, 22.72110366821289, 24.385807037353516, 26.05051040649414, 27.715211868286133, 29.379915237426758, 31.044618606567383, 32.709320068359375, 34.3740234375, 36.038726806640625, 37.70343017578125, 39.368133544921875, 41.0328369140625, 42.697540283203125, 44.36224365234375, 46.02694320678711, 47.691646575927734, 49.35634994506836]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 3.0, 1.0, 3.0, 6.0, 9.0, 8.0, 9.0, 12.0, 9.0, 12.0, 16.0, 17.0, 14.0, 25.0, 28.0, 32.0, 39.0, 43.0, 41.0, 41.0, 48.0, 38.0, 41.0, 55.0, 37.0, 38.0, 45.0, 44.0, 33.0, 28.0, 33.0, 30.0, 26.0, 30.0, 12.0, 18.0, 15.0, 11.0, 10.0, 10.0, 10.0, 8.0, 4.0, 5.0, 2.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.171875, -6.92578125, -6.6796875, -6.43359375, -6.1875, -5.94140625, -5.6953125, -5.44921875, -5.203125, -4.95703125, -4.7109375, -4.46484375, -4.21875, -3.97265625, -3.7265625, -3.48046875, -3.234375, -2.98828125, -2.7421875, -2.49609375, -2.25, -2.00390625, -1.7578125, -1.51171875, -1.265625, -1.01953125, -0.7734375, -0.52734375, -0.28125, -0.03515625, 0.2109375, 0.45703125, 0.703125, 0.94921875, 1.1953125, 1.44140625, 1.6875, 1.93359375, 2.1796875, 2.42578125, 2.671875, 2.91796875, 3.1640625, 3.41015625, 3.65625, 3.90234375, 4.1484375, 4.39453125, 4.640625, 4.88671875, 5.1328125, 5.37890625, 5.625, 5.87109375, 6.1171875, 6.36328125, 6.609375, 6.85546875, 7.1015625, 7.34765625, 7.59375, 7.83984375, 8.0859375, 8.33203125, 8.578125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 5.0, 2.0, 5.0, 8.0, 5.0, 6.0, 3.0, 15.0, 19.0, 18.0, 34.0, 44.0, 74.0, 78.0, 103.0, 167.0, 315.0, 605.0, 1651.0, 5105.0, 20197.0, 90556.0, 387350.0, 1147223.0, 1532506.0, 743585.0, 203765.0, 45047.0, 10727.0, 2866.0, 1040.0, 440.0, 237.0, 142.0, 86.0, 56.0, 57.0, 28.0, 25.0, 21.0, 13.0, 20.0, 11.0, 6.0, 8.0, 8.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.15625, -8.853759765625, -8.55126953125, -8.248779296875, -7.9462890625, -7.643798828125, -7.34130859375, -7.038818359375, -6.736328125, -6.433837890625, -6.13134765625, -5.828857421875, -5.5263671875, -5.223876953125, -4.92138671875, -4.618896484375, -4.31640625, -4.013916015625, -3.71142578125, -3.408935546875, -3.1064453125, -2.803955078125, -2.50146484375, -2.198974609375, -1.896484375, -1.593994140625, -1.29150390625, -0.989013671875, -0.6865234375, -0.384033203125, -0.08154296875, 0.220947265625, 0.5234375, 0.825927734375, 1.12841796875, 1.430908203125, 1.7333984375, 2.035888671875, 2.33837890625, 2.640869140625, 2.943359375, 3.245849609375, 3.54833984375, 3.850830078125, 4.1533203125, 4.455810546875, 4.75830078125, 5.060791015625, 5.36328125, 5.665771484375, 5.96826171875, 6.270751953125, 6.5732421875, 6.875732421875, 7.17822265625, 7.480712890625, 7.783203125, 8.085693359375, 8.38818359375, 8.690673828125, 8.9931640625, 9.295654296875, 9.59814453125, 9.900634765625, 10.203125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 8.0, 13.0, 3.0, 17.0, 31.0, 44.0, 80.0, 100.0, 142.0, 201.0, 321.0, 473.0, 560.0, 594.0, 497.0, 333.0, 218.0, 143.0, 94.0, 59.0, 47.0, 37.0, 28.0, 14.0, 6.0, 8.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.640625, -21.0562744140625, -20.471923828125, -19.8875732421875, -19.30322265625, -18.7188720703125, -18.134521484375, -17.5501708984375, -16.9658203125, -16.3814697265625, -15.797119140625, -15.2127685546875, -14.62841796875, -14.0440673828125, -13.459716796875, -12.8753662109375, -12.291015625, -11.7066650390625, -11.122314453125, -10.5379638671875, -9.95361328125, -9.3692626953125, -8.784912109375, -8.2005615234375, -7.6162109375, -7.0318603515625, -6.447509765625, -5.8631591796875, -5.27880859375, -4.6944580078125, -4.110107421875, -3.5257568359375, -2.94140625, -2.3570556640625, -1.772705078125, -1.1883544921875, -0.60400390625, -0.0196533203125, 0.564697265625, 1.1490478515625, 1.7333984375, 2.3177490234375, 2.902099609375, 3.4864501953125, 4.07080078125, 4.6551513671875, 5.239501953125, 5.8238525390625, 6.408203125, 6.9925537109375, 7.576904296875, 8.1612548828125, 8.74560546875, 9.3299560546875, 9.914306640625, 10.4986572265625, 11.0830078125, 11.6673583984375, 12.251708984375, 12.8360595703125, 13.42041015625, 14.0047607421875, 14.589111328125, 15.1734619140625, 15.7578125]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 8.0, 9.0, 8.0, 24.0, 39.0, 49.0, 63.0, 118.0, 222.0, 407.0, 933.0, 3103.0, 342894.0, 3832103.0, 11570.0, 1413.0, 596.0, 309.0, 153.0, 110.0, 62.0, 43.0, 18.0, 8.0, 11.0, 6.0, 5.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.8125, -54.7265625, -52.640625, -50.5546875, -48.46875, -46.3828125, -44.296875, -42.2109375, -40.125, -38.0390625, -35.953125, -33.8671875, -31.78125, -29.6953125, -27.609375, -25.5234375, -23.4375, -21.3515625, -19.265625, -17.1796875, -15.09375, -13.0078125, -10.921875, -8.8359375, -6.75, -4.6640625, -2.578125, -0.4921875, 1.59375, 3.6796875, 5.765625, 7.8515625, 9.9375, 12.0234375, 14.109375, 16.1953125, 18.28125, 20.3671875, 22.453125, 24.5390625, 26.625, 28.7109375, 30.796875, 32.8828125, 34.96875, 37.0546875, 39.140625, 41.2265625, 43.3125, 45.3984375, 47.484375, 49.5703125, 51.65625, 53.7421875, 55.828125, 57.9140625, 60.0, 62.0859375, 64.171875, 66.2578125, 68.34375, 70.4296875, 72.515625, 74.6015625, 76.6875]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 9.0, 139.0, 435.0, 356.0, 71.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-233.53562927246094, -226.1939239501953, -218.85220336914062, -211.510498046875, -204.16879272460938, -196.8270721435547, -189.48536682128906, -182.14364624023438, -174.80194091796875, -167.46023559570312, -160.11851501464844, -152.7768096923828, -145.4351043701172, -138.0933837890625, -130.75167846679688, -123.40996551513672, -116.0682601928711, -108.72654724121094, -101.38484191894531, -94.04312896728516, -86.701416015625, -79.35971069335938, -72.01799774169922, -64.67628479003906, -57.33457565307617, -49.99286651611328, -42.651153564453125, -35.309444427490234, -27.96773338317871, -20.626022338867188, -13.284313201904297, -5.942600250244141, 1.39910888671875, 8.740819931030273, 16.082530975341797, 23.424240112304688, 30.76595115661621, 38.107662200927734, 45.449371337890625, 52.79108428955078, 60.13279342651367, 67.47450256347656, 74.81621551513672, 82.15792846679688, 89.4996337890625, 96.84134674072266, 104.18305969238281, 111.52476501464844, 118.8664779663086, 126.20819091796875, 133.54989624023438, 140.8916015625, 148.2333221435547, 155.5750274658203, 162.916748046875, 170.25845336914062, 177.60015869140625, 184.94186401367188, 192.28358459472656, 199.6252899169922, 206.9669952392578, 214.3087158203125, 221.65042114257812, 228.99212646484375, 236.33384704589844]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 11.0, 6.0, 8.0, 11.0, 10.0, 17.0, 18.0, 19.0, 24.0, 20.0, 17.0, 31.0, 34.0, 32.0, 34.0, 48.0, 40.0, 42.0, 39.0, 39.0, 49.0, 30.0, 38.0, 34.0, 28.0, 24.0, 43.0, 37.0, 30.0, 35.0, 27.0, 24.0, 18.0, 22.0, 10.0, 10.0, 10.0, 7.0, 6.0, 5.0, 4.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-46.094520568847656, -44.69280242919922, -43.29108428955078, -41.88936996459961, -40.48765182495117, -39.085933685302734, -37.6842155456543, -36.282501220703125, -34.88078308105469, -33.47906494140625, -32.07734680175781, -30.675630569458008, -29.273914337158203, -27.872196197509766, -26.470478057861328, -25.068761825561523, -23.667043685913086, -22.26532554626465, -20.863609313964844, -19.461891174316406, -18.0601749420166, -16.658456802368164, -15.256739616394043, -13.855022430419922, -12.4533052444458, -11.05158805847168, -9.649870872497559, -8.248153686523438, -6.846436023712158, -5.444718837738037, -4.043001174926758, -2.6412839889526367, -1.2395668029785156, 0.16215050220489502, 1.5638678073883057, 2.965585231781006, 4.367302417755127, 5.769019603729248, 7.170737266540527, 8.572454452514648, 9.97417163848877, 11.37588882446289, 12.777606010437012, 14.179323196411133, 15.58104133605957, 16.982757568359375, 18.384475708007812, 19.78619384765625, 21.187910079956055, 22.589628219604492, 23.991344451904297, 25.393062591552734, 26.79477882385254, 28.196496963500977, 29.59821319580078, 30.99993133544922, 32.401649475097656, 33.803367614746094, 35.20508575439453, 36.6068000793457, 38.00851821899414, 39.41023635864258, 40.811954498291016, 42.21366882324219, 43.615386962890625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 6.0, 7.0, 12.0, 9.0, 9.0, 9.0, 23.0, 20.0, 17.0, 21.0, 25.0, 29.0, 38.0, 41.0, 45.0, 34.0, 31.0, 47.0, 47.0, 34.0, 30.0, 50.0, 48.0, 32.0, 27.0, 32.0, 24.0, 23.0, 21.0, 26.0, 34.0, 24.0, 17.0, 25.0, 9.0, 10.0, 9.0, 8.0, 10.0, 6.0, 3.0, 4.0, 4.0, 7.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-7.5234375, -7.30572509765625, -7.0880126953125, -6.87030029296875, -6.652587890625, -6.43487548828125, -6.2171630859375, -5.99945068359375, -5.78173828125, -5.56402587890625, -5.3463134765625, -5.12860107421875, -4.910888671875, -4.69317626953125, -4.4754638671875, -4.25775146484375, -4.0400390625, -3.82232666015625, -3.6046142578125, -3.38690185546875, -3.169189453125, -2.95147705078125, -2.7337646484375, -2.51605224609375, -2.29833984375, -2.08062744140625, -1.8629150390625, -1.64520263671875, -1.427490234375, -1.20977783203125, -0.9920654296875, -0.77435302734375, -0.556640625, -0.33892822265625, -0.1212158203125, 0.09649658203125, 0.314208984375, 0.53192138671875, 0.7496337890625, 0.96734619140625, 1.18505859375, 1.40277099609375, 1.6204833984375, 1.83819580078125, 2.055908203125, 2.27362060546875, 2.4913330078125, 2.70904541015625, 2.9267578125, 3.14447021484375, 3.3621826171875, 3.57989501953125, 3.797607421875, 4.01531982421875, 4.2330322265625, 4.45074462890625, 4.66845703125, 4.88616943359375, 5.1038818359375, 5.32159423828125, 5.539306640625, 5.75701904296875, 5.9747314453125, 6.19244384765625, 6.41015625]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 4.0, 6.0, 3.0, 16.0, 9.0, 16.0, 39.0, 40.0, 59.0, 111.0, 134.0, 240.0, 368.0, 474.0, 806.0, 1096.0, 1662.0, 2535.0, 4005.0, 5702.0, 8705.0, 13450.0, 20482.0, 31647.0, 48791.0, 75336.0, 116379.0, 172006.0, 182280.0, 126674.0, 81928.0, 53161.0, 34550.0, 22760.0, 14681.0, 9601.0, 6346.0, 4137.0, 2729.0, 1921.0, 1207.0, 819.0, 525.0, 391.0, 262.0, 149.0, 124.0, 68.0, 50.0, 34.0, 16.0, 10.0, 10.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.49951171875, -0.48345184326171875, -0.4673919677734375, -0.45133209228515625, -0.435272216796875, -0.41921234130859375, -0.4031524658203125, -0.38709259033203125, -0.37103271484375, -0.35497283935546875, -0.3389129638671875, -0.32285308837890625, -0.306793212890625, -0.29073333740234375, -0.2746734619140625, -0.25861358642578125, -0.2425537109375, -0.22649383544921875, -0.2104339599609375, -0.19437408447265625, -0.178314208984375, -0.16225433349609375, -0.1461944580078125, -0.13013458251953125, -0.11407470703125, -0.09801483154296875, -0.0819549560546875, -0.06589508056640625, -0.049835205078125, -0.03377532958984375, -0.0177154541015625, -0.00165557861328125, 0.014404296875, 0.03046417236328125, 0.0465240478515625, 0.06258392333984375, 0.078643798828125, 0.09470367431640625, 0.1107635498046875, 0.12682342529296875, 0.14288330078125, 0.15894317626953125, 0.1750030517578125, 0.19106292724609375, 0.207122802734375, 0.22318267822265625, 0.2392425537109375, 0.25530242919921875, 0.2713623046875, 0.28742218017578125, 0.3034820556640625, 0.31954193115234375, 0.335601806640625, 0.35166168212890625, 0.3677215576171875, 0.38378143310546875, 0.39984130859375, 0.41590118408203125, 0.4319610595703125, 0.44802093505859375, 0.464080810546875, 0.48014068603515625, 0.4962005615234375, 0.5122604370117188, 0.5283203125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 5.0, 1.0, 2.0, 6.0, 10.0, 7.0, 13.0, 17.0, 6.0, 18.0, 12.0, 19.0, 29.0, 25.0, 30.0, 28.0, 39.0, 32.0, 32.0, 32.0, 47.0, 41.0, 1069.0, 44.0, 47.0, 41.0, 43.0, 50.0, 34.0, 40.0, 37.0, 22.0, 27.0, 23.0, 14.0, 19.0, 15.0, 15.0, 5.0, 9.0, 9.0, 2.0, 5.0, 4.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.02734375, -4.874267578125, -4.72119140625, -4.568115234375, -4.4150390625, -4.261962890625, -4.10888671875, -3.955810546875, -3.802734375, -3.649658203125, -3.49658203125, -3.343505859375, -3.1904296875, -3.037353515625, -2.88427734375, -2.731201171875, -2.578125, -2.425048828125, -2.27197265625, -2.118896484375, -1.9658203125, -1.812744140625, -1.65966796875, -1.506591796875, -1.353515625, -1.200439453125, -1.04736328125, -0.894287109375, -0.7412109375, -0.588134765625, -0.43505859375, -0.281982421875, -0.12890625, 0.024169921875, 0.17724609375, 0.330322265625, 0.4833984375, 0.636474609375, 0.78955078125, 0.942626953125, 1.095703125, 1.248779296875, 1.40185546875, 1.554931640625, 1.7080078125, 1.861083984375, 2.01416015625, 2.167236328125, 2.3203125, 2.473388671875, 2.62646484375, 2.779541015625, 2.9326171875, 3.085693359375, 3.23876953125, 3.391845703125, 3.544921875, 3.697998046875, 3.85107421875, 4.004150390625, 4.1572265625, 4.310302734375, 4.46337890625, 4.616455078125, 4.76953125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 5.0, 10.0, 5.0, 14.0, 16.0, 30.0, 36.0, 64.0, 52.0, 123.0, 155.0, 244.0, 367.0, 508.0, 780.0, 1103.0, 1627.0, 2264.0, 3328.0, 4737.0, 7113.0, 10266.0, 14912.0, 22071.0, 32274.0, 46914.0, 68790.0, 99836.0, 138226.0, 1207036.0, 132681.0, 94694.0, 65478.0, 44722.0, 30901.0, 20450.0, 14143.0, 9734.0, 6507.0, 4705.0, 3126.0, 2243.0, 1530.0, 990.0, 706.0, 518.0, 327.0, 253.0, 184.0, 102.0, 68.0, 47.0, 56.0, 30.0, 14.0, 12.0, 8.0, 7.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-0.376220703125, -0.3638572692871094, -0.35149383544921875, -0.3391304016113281, -0.3267669677734375, -0.3144035339355469, -0.30204010009765625, -0.2896766662597656, -0.277313232421875, -0.2649497985839844, -0.25258636474609375, -0.24022293090820312, -0.2278594970703125, -0.21549606323242188, -0.20313262939453125, -0.19076919555664062, -0.17840576171875, -0.16604232788085938, -0.15367889404296875, -0.14131546020507812, -0.1289520263671875, -0.11658859252929688, -0.10422515869140625, -0.09186172485351562, -0.079498291015625, -0.06713485717773438, -0.05477142333984375, -0.042407989501953125, -0.0300445556640625, -0.017681121826171875, -0.00531768798828125, 0.007045745849609375, 0.0194091796875, 0.031772613525390625, 0.04413604736328125, 0.056499481201171875, 0.0688629150390625, 0.08122634887695312, 0.09358978271484375, 0.10595321655273438, 0.118316650390625, 0.13068008422851562, 0.14304351806640625, 0.15540695190429688, 0.1677703857421875, 0.18013381958007812, 0.19249725341796875, 0.20486068725585938, 0.21722412109375, 0.22958755493164062, 0.24195098876953125, 0.2543144226074219, 0.2666778564453125, 0.2790412902832031, 0.29140472412109375, 0.3037681579589844, 0.316131591796875, 0.3284950256347656, 0.34085845947265625, 0.3532218933105469, 0.3655853271484375, 0.3779487609863281, 0.39031219482421875, 0.4026756286621094, 0.4150390625]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 3.0, 5.0, 10.0, 13.0, 18.0, 13.0, 13.0, 24.0, 28.0, 51.0, 32.0, 54.0, 72.0, 55.0, 67.0, 59.0, 61.0, 63.0, 56.0, 54.0, 47.0, 40.0, 29.0, 19.0, 26.0, 12.0, 18.0, 12.0, 6.0, 10.0, 8.0, 8.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0017175674438476562, -0.0016696304082870483, -0.0016216933727264404, -0.0015737563371658325, -0.0015258193016052246, -0.0014778822660446167, -0.0014299452304840088, -0.0013820081949234009, -0.001334071159362793, -0.001286134123802185, -0.0012381970882415771, -0.0011902600526809692, -0.0011423230171203613, -0.0010943859815597534, -0.0010464489459991455, -0.0009985119104385376, -0.0009505748748779297, -0.0009026378393173218, -0.0008547008037567139, -0.000806763768196106, -0.000758826732635498, -0.0007108896970748901, -0.0006629526615142822, -0.0006150156259536743, -0.0005670785903930664, -0.0005191415548324585, -0.0004712045192718506, -0.0004232674837112427, -0.00037533044815063477, -0.00032739341259002686, -0.00027945637702941895, -0.00023151934146881104, -0.00018358230590820312, -0.00013564527034759521, -8.77082347869873e-05, -3.9771199226379395e-05, 8.165836334228516e-06, 5.6102871894836426e-05, 0.00010403990745544434, 0.00015197694301605225, 0.00019991397857666016, 0.00024785101413726807, 0.000295788049697876, 0.0003437250852584839, 0.0003916621208190918, 0.0004395991563796997, 0.0004875361919403076, 0.0005354732275009155, 0.0005834102630615234, 0.0006313472986221313, 0.0006792843341827393, 0.0007272213697433472, 0.0007751584053039551, 0.000823095440864563, 0.0008710324764251709, 0.0009189695119857788, 0.0009669065475463867, 0.0010148435831069946, 0.0010627806186676025, 0.0011107176542282104, 0.0011586546897888184, 0.0012065917253494263, 0.0012545287609100342, 0.001302465796470642, 0.00135040283203125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 8.0, 4.0, 6.0, 13.0, 7.0, 20.0, 25.0, 27.0, 44.0, 55.0, 77.0, 94.0, 135.0, 264.0, 591.0, 1945.0, 909959.0, 133024.0, 1066.0, 441.0, 225.0, 148.0, 86.0, 84.0, 51.0, 42.0, 21.0, 18.0, 15.0, 11.0, 10.0, 11.0, 8.0, 3.0, 6.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028564453125, -0.027538299560546875, -0.02651214599609375, -0.025485992431640625, -0.0244598388671875, -0.023433685302734375, -0.02240753173828125, -0.021381378173828125, -0.020355224609375, -0.019329071044921875, -0.01830291748046875, -0.017276763916015625, -0.0162506103515625, -0.015224456787109375, -0.01419830322265625, -0.013172149658203125, -0.01214599609375, -0.011119842529296875, -0.01009368896484375, -0.009067535400390625, -0.0080413818359375, -0.007015228271484375, -0.00598907470703125, -0.004962921142578125, -0.003936767578125, -0.002910614013671875, -0.00188446044921875, -0.000858306884765625, 0.0001678466796875, 0.001194000244140625, 0.00222015380859375, 0.003246307373046875, 0.0042724609375, 0.005298614501953125, 0.00632476806640625, 0.007350921630859375, 0.0083770751953125, 0.009403228759765625, 0.01042938232421875, 0.011455535888671875, 0.012481689453125, 0.013507843017578125, 0.01453399658203125, 0.015560150146484375, 0.0165863037109375, 0.017612457275390625, 0.01863861083984375, 0.019664764404296875, 0.02069091796875, 0.021717071533203125, 0.02274322509765625, 0.023769378662109375, 0.0247955322265625, 0.025821685791015625, 0.02684783935546875, 0.027873992919921875, 0.028900146484375, 0.029926300048828125, 0.03095245361328125, 0.031978607177734375, 0.0330047607421875, 0.034030914306640625, 0.03505706787109375, 0.036083221435546875, 0.037109375]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 20.0, 100.0, 345.0, 366.0, 148.0, 27.0, 9.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005234075710177422, -0.0051317536272108555, -0.005029431078583002, -0.004927108995616436, -0.00482478691264987, -0.0047224643640220165, -0.0046201422810554504, -0.004517819732427597, -0.004415497649461031, -0.004313175566494465, -0.0042108530178666115, -0.004108530934900045, -0.004006208851933479, -0.003903886303305626, -0.00380156422033906, -0.00369924190454185, -0.0035969195887446404, -0.0034945972729474306, -0.0033922751899808645, -0.003289952874183655, -0.003187630558386445, -0.0030853082425892353, -0.0029829861596226692, -0.0028806638438254595, -0.0027783417608588934, -0.0026760194450616837, -0.0025736973620951176, -0.002471375046297908, -0.002369052730500698, -0.002266730647534132, -0.0021644083317369223, -0.0020620860159397125, -0.001959763700142503, -0.0018574415007606149, -0.0017551191849634051, -0.0016527969855815172, -0.0015504746697843075, -0.0014481524704024196, -0.0013458302710205317, -0.001243507955223322, -0.0011411856394261122, -0.0010388634400442243, -0.0009365411242470145, -0.0008342189248651266, -0.0007318966090679169, -0.000629574409686029, -0.0005272521520964801, -0.0004249298945069313, -0.0003226076951250434, -0.00022028543753549457, -0.00011796319449786097, -1.564095146022737e-05, 8.668130612932146e-05, 0.00018900353461503983, 0.00029132579220458865, 0.0003936480497941375, 0.0004959703073836863, 0.0005982925649732351, 0.000700614822562784, 0.0008029370801523328, 0.0009052592795342207, 0.0010075815953314304, 0.0011099037947133183, 0.0012122259940952063, 0.001314548309892416]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 5.0, 3.0, 2.0, 3.0, 15.0, 15.0, 20.0, 15.0, 17.0, 34.0, 25.0, 31.0, 31.0, 40.0, 40.0, 32.0, 62.0, 41.0, 48.0, 56.0, 52.0, 46.0, 32.0, 43.0, 30.0, 28.0, 43.0, 30.0, 31.0, 20.0, 23.0, 17.0, 14.0, 14.0, 11.0, 10.0, 8.0, 4.0, 7.0, 6.0, 6.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006322264671325684, -0.0006057368591427803, -0.0005792472511529922, -0.0005527576431632042, -0.0005262680351734161, -0.0004997784271836281, -0.00047328881919384, -0.00044679921120405197, -0.0004203096032142639, -0.00039381999522447586, -0.0003673303872346878, -0.00034084077924489975, -0.0003143511712551117, -0.00028786156326532364, -0.0002613719552755356, -0.00023488234728574753, -0.00020839273929595947, -0.00018190313130617142, -0.00015541352331638336, -0.0001289239153265953, -0.00010243430733680725, -7.59446993470192e-05, -4.945509135723114e-05, -2.2965483367443085e-05, 3.5241246223449707e-06, 3.0013732612133026e-05, 5.650334060192108e-05, 8.299294859170914e-05, 0.00010948255658149719, 0.00013597216457128525, 0.0001624617725610733, 0.00018895138055086136, 0.00021544098854064941, 0.00024193059653043747, 0.0002684202045202255, 0.0002949098125100136, 0.00032139942049980164, 0.0003478890284895897, 0.00037437863647937775, 0.0004008682444691658, 0.00042735785245895386, 0.0004538474604487419, 0.00048033706843852997, 0.000506826676428318, 0.0005333162844181061, 0.0005598058924078941, 0.0005862955003976822, 0.0006127851083874702, 0.0006392747163772583, 0.0006657643243670464, 0.0006922539323568344, 0.0007187435403466225, 0.0007452331483364105, 0.0007717227563261986, 0.0007982123643159866, 0.0008247019723057747, 0.0008511915802955627, 0.0008776811882853508, 0.0009041707962751389, 0.0009306604042649269, 0.000957150012254715, 0.000983639620244503, 0.001010129228234291, 0.0010366188362240791, 0.0010631084442138672]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 2.0, 6.0, 5.0, 6.0, 7.0, 12.0, 9.0, 9.0, 9.0, 23.0, 20.0, 17.0, 21.0, 25.0, 29.0, 38.0, 41.0, 45.0, 34.0, 31.0, 47.0, 47.0, 34.0, 30.0, 50.0, 48.0, 32.0, 27.0, 32.0, 24.0, 23.0, 21.0, 26.0, 34.0, 24.0, 17.0, 25.0, 9.0, 10.0, 9.0, 8.0, 10.0, 6.0, 3.0, 4.0, 4.0, 7.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-7.5234375, -7.30572509765625, -7.0880126953125, -6.87030029296875, -6.652587890625, -6.43487548828125, -6.2171630859375, -5.99945068359375, -5.78173828125, -5.56402587890625, -5.3463134765625, -5.12860107421875, -4.910888671875, -4.69317626953125, -4.4754638671875, -4.25775146484375, -4.0400390625, -3.82232666015625, -3.6046142578125, -3.38690185546875, -3.169189453125, -2.95147705078125, -2.7337646484375, -2.51605224609375, -2.29833984375, -2.08062744140625, -1.8629150390625, -1.64520263671875, -1.427490234375, -1.20977783203125, -0.9920654296875, -0.77435302734375, -0.556640625, -0.33892822265625, -0.1212158203125, 0.09649658203125, 0.314208984375, 0.53192138671875, 0.7496337890625, 0.96734619140625, 1.18505859375, 1.40277099609375, 1.6204833984375, 1.83819580078125, 2.055908203125, 2.27362060546875, 2.4913330078125, 2.70904541015625, 2.9267578125, 3.14447021484375, 3.3621826171875, 3.57989501953125, 3.797607421875, 4.01531982421875, 4.2330322265625, 4.45074462890625, 4.66845703125, 4.88616943359375, 5.1038818359375, 5.32159423828125, 5.539306640625, 5.75701904296875, 5.9747314453125, 6.19244384765625, 6.41015625]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 10.0, 3.0, 5.0, 6.0, 6.0, 6.0, 14.0, 11.0, 14.0, 30.0, 39.0, 74.0, 85.0, 236.0, 402.0, 904.0, 2080.0, 4901.0, 11473.0, 26692.0, 66531.0, 188736.0, 380918.0, 228999.0, 80651.0, 31916.0, 13320.0, 5843.0, 2500.0, 1093.0, 488.0, 235.0, 118.0, 66.0, 35.0, 34.0, 22.0, 15.0, 10.0, 9.0, 9.0, 6.0, 6.0, 7.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.2109375, -8.9447021484375, -8.678466796875, -8.4122314453125, -8.14599609375, -7.8797607421875, -7.613525390625, -7.3472900390625, -7.0810546875, -6.8148193359375, -6.548583984375, -6.2823486328125, -6.01611328125, -5.7498779296875, -5.483642578125, -5.2174072265625, -4.951171875, -4.6849365234375, -4.418701171875, -4.1524658203125, -3.88623046875, -3.6199951171875, -3.353759765625, -3.0875244140625, -2.8212890625, -2.5550537109375, -2.288818359375, -2.0225830078125, -1.75634765625, -1.4901123046875, -1.223876953125, -0.9576416015625, -0.69140625, -0.4251708984375, -0.158935546875, 0.1072998046875, 0.37353515625, 0.6397705078125, 0.906005859375, 1.1722412109375, 1.4384765625, 1.7047119140625, 1.970947265625, 2.2371826171875, 2.50341796875, 2.7696533203125, 3.035888671875, 3.3021240234375, 3.568359375, 3.8345947265625, 4.100830078125, 4.3670654296875, 4.63330078125, 4.8995361328125, 5.165771484375, 5.4320068359375, 5.6982421875, 5.9644775390625, 6.230712890625, 6.4969482421875, 6.76318359375, 7.0294189453125, 7.295654296875, 7.5618896484375, 7.828125]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 2.0, 5.0, 2.0, 5.0, 12.0, 8.0, 11.0, 11.0, 8.0, 18.0, 36.0, 22.0, 39.0, 38.0, 49.0, 38.0, 55.0, 58.0, 92.0, 178.0, 1410.0, 345.0, 131.0, 85.0, 89.0, 47.0, 44.0, 24.0, 32.0, 22.0, 27.0, 22.0, 16.0, 16.0, 16.0, 7.0, 12.0, 9.0, 4.0, 5.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.984375, -22.131103515625, -21.27783203125, -20.424560546875, -19.5712890625, -18.718017578125, -17.86474609375, -17.011474609375, -16.158203125, -15.304931640625, -14.45166015625, -13.598388671875, -12.7451171875, -11.891845703125, -11.03857421875, -10.185302734375, -9.33203125, -8.478759765625, -7.62548828125, -6.772216796875, -5.9189453125, -5.065673828125, -4.21240234375, -3.359130859375, -2.505859375, -1.652587890625, -0.79931640625, 0.053955078125, 0.9072265625, 1.760498046875, 2.61376953125, 3.467041015625, 4.3203125, 5.173583984375, 6.02685546875, 6.880126953125, 7.7333984375, 8.586669921875, 9.43994140625, 10.293212890625, 11.146484375, 11.999755859375, 12.85302734375, 13.706298828125, 14.5595703125, 15.412841796875, 16.26611328125, 17.119384765625, 17.97265625, 18.825927734375, 19.67919921875, 20.532470703125, 21.3857421875, 22.239013671875, 23.09228515625, 23.945556640625, 24.798828125, 25.652099609375, 26.50537109375, 27.358642578125, 28.2119140625, 29.065185546875, 29.91845703125, 30.771728515625, 31.625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 9.0, 8.0, 17.0, 27.0, 36.0, 55.0, 73.0, 113.0, 181.0, 277.0, 560.0, 1531.0, 47292.0, 3074774.0, 18222.0, 1175.0, 514.0, 309.0, 167.0, 113.0, 68.0, 36.0, 32.0, 23.0, 17.0, 17.0, 15.0, 8.0, 5.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.3125, -58.1123046875, -55.912109375, -53.7119140625, -51.51171875, -49.3115234375, -47.111328125, -44.9111328125, -42.7109375, -40.5107421875, -38.310546875, -36.1103515625, -33.91015625, -31.7099609375, -29.509765625, -27.3095703125, -25.109375, -22.9091796875, -20.708984375, -18.5087890625, -16.30859375, -14.1083984375, -11.908203125, -9.7080078125, -7.5078125, -5.3076171875, -3.107421875, -0.9072265625, 1.29296875, 3.4931640625, 5.693359375, 7.8935546875, 10.09375, 12.2939453125, 14.494140625, 16.6943359375, 18.89453125, 21.0947265625, 23.294921875, 25.4951171875, 27.6953125, 29.8955078125, 32.095703125, 34.2958984375, 36.49609375, 38.6962890625, 40.896484375, 43.0966796875, 45.296875, 47.4970703125, 49.697265625, 51.8974609375, 54.09765625, 56.2978515625, 58.498046875, 60.6982421875, 62.8984375, 65.0986328125, 67.298828125, 69.4990234375, 71.69921875, 73.8994140625, 76.099609375, 78.2998046875, 80.5]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 34.0, 66.0, 139.0, 200.0, 207.0, 165.0, 94.0, 72.0, 21.0, 5.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.15580940246582, -18.501420974731445, -16.84703254699707, -15.192644119262695, -13.53825569152832, -11.883867263793945, -10.22947883605957, -8.575090408325195, -6.92070198059082, -5.266313552856445, -3.6119251251220703, -1.9575366973876953, -0.3031482696533203, 1.3512401580810547, 3.0056285858154297, 4.660017013549805, 6.31440544128418, 7.968793869018555, 9.62318229675293, 11.277570724487305, 12.93195915222168, 14.586347579956055, 16.24073600769043, 17.895124435424805, 19.54951286315918, 21.203901290893555, 22.85828971862793, 24.512678146362305, 26.16706657409668, 27.821455001831055, 29.47584342956543, 31.130231857299805, 32.78462219238281, 34.43901062011719, 36.09339904785156, 37.74778747558594, 39.40217590332031, 41.05656433105469, 42.71095275878906, 44.36534118652344, 46.01972961425781, 47.67411804199219, 49.32850646972656, 50.98289489746094, 52.63728332519531, 54.29167175292969, 55.94606018066406, 57.60044860839844, 59.25483703613281, 60.90922546386719, 62.56361389160156, 64.21800231933594, 65.87239074707031, 67.52677917480469, 69.18116760253906, 70.83555603027344, 72.48994445800781, 74.14433288574219, 75.79872131347656, 77.45310974121094, 79.10749816894531, 80.76188659667969, 82.41627502441406, 84.07066345214844, 85.72505187988281]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 2.0, 6.0, 8.0, 10.0, 5.0, 7.0, 20.0, 26.0, 8.0, 15.0, 27.0, 24.0, 28.0, 41.0, 35.0, 35.0, 42.0, 38.0, 45.0, 43.0, 42.0, 38.0, 36.0, 40.0, 56.0, 36.0, 34.0, 37.0, 35.0, 26.0, 24.0, 30.0, 10.0, 17.0, 13.0, 13.0, 11.0, 13.0, 9.0, 5.0, 6.0, 4.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-54.48859786987305, -52.599021911621094, -50.70944595336914, -48.81986999511719, -46.93029022216797, -45.04071807861328, -43.15113830566406, -41.26156234741211, -39.371986389160156, -37.4824104309082, -35.59283447265625, -33.7032585144043, -31.81368064880371, -29.924104690551758, -28.034526824951172, -26.14495086669922, -24.255374908447266, -22.365798950195312, -20.47622299194336, -18.586645126342773, -16.69706916809082, -14.807493209838867, -12.917916297912598, -11.028339385986328, -9.138763427734375, -7.249186992645264, -5.359610557556152, -3.470034122467041, -1.5804576873779297, 0.30911827087402344, 2.198695182800293, 4.0882720947265625, 5.97784423828125, 7.867420673370361, 9.756997108459473, 11.646574020385742, 13.536149978637695, 15.425725936889648, 17.315303802490234, 19.204879760742188, 21.09445571899414, 22.984031677246094, 24.873607635498047, 26.763185501098633, 28.652761459350586, 30.54233741760254, 32.431915283203125, 34.32149124145508, 36.21106719970703, 38.100643157958984, 39.99021911621094, 41.87979507446289, 43.769371032714844, 45.65895080566406, 47.548526763916016, 49.43810272216797, 51.32767868041992, 53.217254638671875, 55.10683059692383, 56.99640655517578, 58.885986328125, 60.77555847167969, 62.665138244628906, 64.55471801757812, 66.44429016113281]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 3.0, 4.0, 1.0, 4.0, 4.0, 7.0, 2.0, 7.0, 4.0, 6.0, 15.0, 12.0, 14.0, 18.0, 9.0, 19.0, 17.0, 21.0, 29.0, 35.0, 26.0, 33.0, 32.0, 47.0, 39.0, 38.0, 45.0, 38.0, 44.0, 29.0, 38.0, 42.0, 37.0, 21.0, 32.0, 24.0, 30.0, 30.0, 17.0, 18.0, 16.0, 16.0, 15.0, 14.0, 11.0, 10.0, 8.0, 7.0, 8.0, 6.0, 3.0, 5.0, 2.0, 0.0, 2.0, 2.0, 1.0], "bins": [-7.6640625, -7.4420166015625, -7.219970703125, -6.9979248046875, -6.77587890625, -6.5538330078125, -6.331787109375, -6.1097412109375, -5.8876953125, -5.6656494140625, -5.443603515625, -5.2215576171875, -4.99951171875, -4.7774658203125, -4.555419921875, -4.3333740234375, -4.111328125, -3.8892822265625, -3.667236328125, -3.4451904296875, -3.22314453125, -3.0010986328125, -2.779052734375, -2.5570068359375, -2.3349609375, -2.1129150390625, -1.890869140625, -1.6688232421875, -1.44677734375, -1.2247314453125, -1.002685546875, -0.7806396484375, -0.55859375, -0.3365478515625, -0.114501953125, 0.1075439453125, 0.32958984375, 0.5516357421875, 0.773681640625, 0.9957275390625, 1.2177734375, 1.4398193359375, 1.661865234375, 1.8839111328125, 2.10595703125, 2.3280029296875, 2.550048828125, 2.7720947265625, 2.994140625, 3.2161865234375, 3.438232421875, 3.6602783203125, 3.88232421875, 4.1043701171875, 4.326416015625, 4.5484619140625, 4.7705078125, 4.9925537109375, 5.214599609375, 5.4366455078125, 5.65869140625, 5.8807373046875, 6.102783203125, 6.3248291015625, 6.546875]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 4.0, 3.0, 6.0, 8.0, 4.0, 5.0, 9.0, 8.0, 7.0, 23.0, 18.0, 25.0, 22.0, 26.0, 36.0, 42.0, 70.0, 70.0, 124.0, 165.0, 501.0, 2334.0, 26609.0, 491150.0, 2798075.0, 821885.0, 48049.0, 3639.0, 629.0, 208.0, 119.0, 71.0, 60.0, 47.0, 45.0, 38.0, 22.0, 17.0, 20.0, 11.0, 15.0, 10.0, 16.0, 10.0, 7.0, 9.0, 8.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-20.1875, -19.60400390625, -19.0205078125, -18.43701171875, -17.853515625, -17.27001953125, -16.6865234375, -16.10302734375, -15.51953125, -14.93603515625, -14.3525390625, -13.76904296875, -13.185546875, -12.60205078125, -12.0185546875, -11.43505859375, -10.8515625, -10.26806640625, -9.6845703125, -9.10107421875, -8.517578125, -7.93408203125, -7.3505859375, -6.76708984375, -6.18359375, -5.60009765625, -5.0166015625, -4.43310546875, -3.849609375, -3.26611328125, -2.6826171875, -2.09912109375, -1.515625, -0.93212890625, -0.3486328125, 0.23486328125, 0.818359375, 1.40185546875, 1.9853515625, 2.56884765625, 3.15234375, 3.73583984375, 4.3193359375, 4.90283203125, 5.486328125, 6.06982421875, 6.6533203125, 7.23681640625, 7.8203125, 8.40380859375, 8.9873046875, 9.57080078125, 10.154296875, 10.73779296875, 11.3212890625, 11.90478515625, 12.48828125, 13.07177734375, 13.6552734375, 14.23876953125, 14.822265625, 15.40576171875, 15.9892578125, 16.57275390625, 17.15625]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 5.0, 5.0, 16.0, 14.0, 16.0, 12.0, 31.0, 44.0, 59.0, 77.0, 84.0, 121.0, 207.0, 240.0, 357.0, 413.0, 423.0, 432.0, 398.0, 279.0, 218.0, 174.0, 118.0, 93.0, 48.0, 41.0, 44.0, 33.0, 25.0, 10.0, 9.0, 8.0, 6.0, 6.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.734375, -12.334716796875, -11.93505859375, -11.535400390625, -11.1357421875, -10.736083984375, -10.33642578125, -9.936767578125, -9.537109375, -9.137451171875, -8.73779296875, -8.338134765625, -7.9384765625, -7.538818359375, -7.13916015625, -6.739501953125, -6.33984375, -5.940185546875, -5.54052734375, -5.140869140625, -4.7412109375, -4.341552734375, -3.94189453125, -3.542236328125, -3.142578125, -2.742919921875, -2.34326171875, -1.943603515625, -1.5439453125, -1.144287109375, -0.74462890625, -0.344970703125, 0.0546875, 0.454345703125, 0.85400390625, 1.253662109375, 1.6533203125, 2.052978515625, 2.45263671875, 2.852294921875, 3.251953125, 3.651611328125, 4.05126953125, 4.450927734375, 4.8505859375, 5.250244140625, 5.64990234375, 6.049560546875, 6.44921875, 6.848876953125, 7.24853515625, 7.648193359375, 8.0478515625, 8.447509765625, 8.84716796875, 9.246826171875, 9.646484375, 10.046142578125, 10.44580078125, 10.845458984375, 11.2451171875, 11.644775390625, 12.04443359375, 12.444091796875, 12.84375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 6.0, 3.0, 5.0, 16.0, 10.0, 28.0, 34.0, 35.0, 59.0, 84.0, 115.0, 139.0, 216.0, 340.0, 586.0, 1554.0, 23994.0, 1886895.0, 2248074.0, 28478.0, 1857.0, 564.0, 389.0, 232.0, 143.0, 122.0, 90.0, 68.0, 41.0, 23.0, 29.0, 15.0, 13.0, 13.0, 4.0, 4.0, 4.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-42.1875, -40.8701171875, -39.552734375, -38.2353515625, -36.91796875, -35.6005859375, -34.283203125, -32.9658203125, -31.6484375, -30.3310546875, -29.013671875, -27.6962890625, -26.37890625, -25.0615234375, -23.744140625, -22.4267578125, -21.109375, -19.7919921875, -18.474609375, -17.1572265625, -15.83984375, -14.5224609375, -13.205078125, -11.8876953125, -10.5703125, -9.2529296875, -7.935546875, -6.6181640625, -5.30078125, -3.9833984375, -2.666015625, -1.3486328125, -0.03125, 1.2861328125, 2.603515625, 3.9208984375, 5.23828125, 6.5556640625, 7.873046875, 9.1904296875, 10.5078125, 11.8251953125, 13.142578125, 14.4599609375, 15.77734375, 17.0947265625, 18.412109375, 19.7294921875, 21.046875, 22.3642578125, 23.681640625, 24.9990234375, 26.31640625, 27.6337890625, 28.951171875, 30.2685546875, 31.5859375, 32.9033203125, 34.220703125, 35.5380859375, 36.85546875, 38.1728515625, 39.490234375, 40.8076171875, 42.125]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [4.0, 44.0, 311.0, 473.0, 172.0, 13.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.403104782104492, -17.038330078125, -9.673555374145508, -2.3087806701660156, 5.055994033813477, 12.420770645141602, 19.78554344177246, 27.15031623840332, 34.51509094238281, 41.87986755371094, 49.2446403503418, 56.609413146972656, 63.97418975830078, 71.3389663696289, 78.7037353515625, 86.06851196289062, 93.43328857421875, 100.79806518554688, 108.162841796875, 115.5276107788086, 122.89238739013672, 130.25717163085938, 137.62193298339844, 144.98670959472656, 152.3514862060547, 159.7162628173828, 167.08103942871094, 174.44581604003906, 181.81057739257812, 189.17535400390625, 196.54013061523438, 203.9049072265625, 211.26968383789062, 218.63446044921875, 225.99923706054688, 233.364013671875, 240.72879028320312, 248.09356689453125, 255.4583282470703, 262.8231201171875, 270.1878662109375, 277.5526428222656, 284.91741943359375, 292.2821960449219, 299.64697265625, 307.0117492675781, 314.37652587890625, 321.74127197265625, 329.1060791015625, 336.4708557128906, 343.83563232421875, 351.2004089355469, 358.565185546875, 365.9299621582031, 373.29473876953125, 380.65948486328125, 388.0242919921875, 395.3890686035156, 402.75384521484375, 410.1186218261719, 417.4833984375, 424.8481750488281, 432.21295166015625, 439.57769775390625, 446.9424743652344]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 13.0, 13.0, 21.0, 21.0, 21.0, 22.0, 28.0, 30.0, 43.0, 45.0, 30.0, 40.0, 54.0, 34.0, 51.0, 41.0, 42.0, 41.0, 26.0, 32.0, 42.0, 39.0, 26.0, 41.0, 31.0, 23.0, 15.0, 13.0, 15.0, 19.0, 13.0, 11.0, 7.0, 4.0, 8.0, 3.0, 7.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.55632019042969, -40.210723876953125, -38.86512756347656, -37.51953125, -36.17393493652344, -34.828338623046875, -33.48274230957031, -32.137149810791016, -30.791553497314453, -29.44595718383789, -28.100360870361328, -26.754764556884766, -25.409170150756836, -24.063573837280273, -22.71797752380371, -21.37238311767578, -20.026784896850586, -18.681188583374023, -17.33559226989746, -15.989996910095215, -14.644401550292969, -13.298805236816406, -11.953208923339844, -10.607613563537598, -9.262017250061035, -7.916421413421631, -6.570825576782227, -5.225229263305664, -3.8796334266662598, -2.5340375900268555, -1.188441276550293, 0.15715408325195312, 1.5027503967285156, 2.84834623336792, 4.193942070007324, 5.539538383483887, 6.885134220123291, 8.230730056762695, 9.576326370239258, 10.921921730041504, 12.267518043518066, 13.613114356994629, 14.958709716796875, 16.304306030273438, 17.64990234375, 18.995498657226562, 20.341094970703125, 21.686689376831055, 23.032285690307617, 24.37788200378418, 25.723478317260742, 27.069072723388672, 28.414669036865234, 29.760265350341797, 31.10586166381836, 32.45145797729492, 33.797054290771484, 35.14265060424805, 36.48824691772461, 37.83384323120117, 39.179439544677734, 40.52503204345703, 41.870628356933594, 43.216224670410156, 44.56182098388672]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 1.0, 4.0, 5.0, 8.0, 6.0, 7.0, 13.0, 6.0, 14.0, 14.0, 12.0, 14.0, 22.0, 19.0, 25.0, 36.0, 31.0, 32.0, 39.0, 35.0, 30.0, 32.0, 36.0, 42.0, 29.0, 42.0, 43.0, 37.0, 39.0, 35.0, 36.0, 35.0, 24.0, 28.0, 27.0, 21.0, 15.0, 23.0, 14.0, 15.0, 10.0, 9.0, 5.0, 5.0, 12.0, 14.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.10546875, -6.88580322265625, -6.6661376953125, -6.44647216796875, -6.226806640625, -6.00714111328125, -5.7874755859375, -5.56781005859375, -5.34814453125, -5.12847900390625, -4.9088134765625, -4.68914794921875, -4.469482421875, -4.24981689453125, -4.0301513671875, -3.81048583984375, -3.5908203125, -3.37115478515625, -3.1514892578125, -2.93182373046875, -2.712158203125, -2.49249267578125, -2.2728271484375, -2.05316162109375, -1.83349609375, -1.61383056640625, -1.3941650390625, -1.17449951171875, -0.954833984375, -0.73516845703125, -0.5155029296875, -0.29583740234375, -0.076171875, 0.14349365234375, 0.3631591796875, 0.58282470703125, 0.802490234375, 1.02215576171875, 1.2418212890625, 1.46148681640625, 1.68115234375, 1.90081787109375, 2.1204833984375, 2.34014892578125, 2.559814453125, 2.77947998046875, 2.9991455078125, 3.21881103515625, 3.4384765625, 3.65814208984375, 3.8778076171875, 4.09747314453125, 4.317138671875, 4.53680419921875, 4.7564697265625, 4.97613525390625, 5.19580078125, 5.41546630859375, 5.6351318359375, 5.85479736328125, 6.074462890625, 6.29412841796875, 6.5137939453125, 6.73345947265625, 6.953125]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 4.0, 10.0, 8.0, 7.0, 18.0, 23.0, 52.0, 67.0, 97.0, 126.0, 201.0, 254.0, 413.0, 655.0, 925.0, 1310.0, 1958.0, 2895.0, 4426.0, 6730.0, 10317.0, 16351.0, 25792.0, 41047.0, 67624.0, 108619.0, 166660.0, 195391.0, 146904.0, 93714.0, 57747.0, 35526.0, 22121.0, 14065.0, 8879.0, 5860.0, 3892.0, 2543.0, 1745.0, 1153.0, 805.0, 500.0, 367.0, 231.0, 165.0, 127.0, 72.0, 56.0, 42.0, 24.0, 16.0, 14.0, 7.0, 7.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.5439453125, -0.5265884399414062, -0.5092315673828125, -0.49187469482421875, -0.474517822265625, -0.45716094970703125, -0.4398040771484375, -0.42244720458984375, -0.40509033203125, -0.38773345947265625, -0.3703765869140625, -0.35301971435546875, -0.335662841796875, -0.31830596923828125, -0.3009490966796875, -0.28359222412109375, -0.2662353515625, -0.24887847900390625, -0.2315216064453125, -0.21416473388671875, -0.196807861328125, -0.17945098876953125, -0.1620941162109375, -0.14473724365234375, -0.12738037109375, -0.11002349853515625, -0.0926666259765625, -0.07530975341796875, -0.057952880859375, -0.04059600830078125, -0.0232391357421875, -0.00588226318359375, 0.011474609375, 0.02883148193359375, 0.0461883544921875, 0.06354522705078125, 0.080902099609375, 0.09825897216796875, 0.1156158447265625, 0.13297271728515625, 0.15032958984375, 0.16768646240234375, 0.1850433349609375, 0.20240020751953125, 0.219757080078125, 0.23711395263671875, 0.2544708251953125, 0.27182769775390625, 0.2891845703125, 0.30654144287109375, 0.3238983154296875, 0.34125518798828125, 0.358612060546875, 0.37596893310546875, 0.3933258056640625, 0.41068267822265625, 0.42803955078125, 0.44539642333984375, 0.4627532958984375, 0.48011016845703125, 0.497467041015625, 0.5148239135742188, 0.5321807861328125, 0.5495376586914062, 0.56689453125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 1.0, 4.0, 5.0, 12.0, 10.0, 13.0, 9.0, 15.0, 13.0, 25.0, 19.0, 26.0, 22.0, 25.0, 38.0, 33.0, 48.0, 28.0, 31.0, 46.0, 42.0, 55.0, 1068.0, 44.0, 41.0, 37.0, 42.0, 30.0, 33.0, 26.0, 24.0, 20.0, 27.0, 25.0, 15.0, 13.0, 10.0, 7.0, 9.0, 9.0, 5.0, 6.0, 6.0, 1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 3.0], "bins": [-5.44140625, -5.2904052734375, -5.139404296875, -4.9884033203125, -4.83740234375, -4.6864013671875, -4.535400390625, -4.3843994140625, -4.2333984375, -4.0823974609375, -3.931396484375, -3.7803955078125, -3.62939453125, -3.4783935546875, -3.327392578125, -3.1763916015625, -3.025390625, -2.8743896484375, -2.723388671875, -2.5723876953125, -2.42138671875, -2.2703857421875, -2.119384765625, -1.9683837890625, -1.8173828125, -1.6663818359375, -1.515380859375, -1.3643798828125, -1.21337890625, -1.0623779296875, -0.911376953125, -0.7603759765625, -0.609375, -0.4583740234375, -0.307373046875, -0.1563720703125, -0.00537109375, 0.1456298828125, 0.296630859375, 0.4476318359375, 0.5986328125, 0.7496337890625, 0.900634765625, 1.0516357421875, 1.20263671875, 1.3536376953125, 1.504638671875, 1.6556396484375, 1.806640625, 1.9576416015625, 2.108642578125, 2.2596435546875, 2.41064453125, 2.5616455078125, 2.712646484375, 2.8636474609375, 3.0146484375, 3.1656494140625, 3.316650390625, 3.4676513671875, 3.61865234375, 3.7696533203125, 3.920654296875, 4.0716552734375, 4.22265625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 0.0, 6.0, 4.0, 20.0, 25.0, 44.0, 44.0, 98.0, 139.0, 245.0, 381.0, 661.0, 1042.0, 1657.0, 2786.0, 4407.0, 7318.0, 11551.0, 18437.0, 30564.0, 50245.0, 83272.0, 134693.0, 1226085.0, 198172.0, 126636.0, 77621.0, 46589.0, 28461.0, 17432.0, 10779.0, 6777.0, 4170.0, 2568.0, 1611.0, 1052.0, 601.0, 383.0, 219.0, 107.0, 90.0, 56.0, 33.0, 26.0, 15.0, 7.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.5361328125, -0.5198516845703125, -0.503570556640625, -0.4872894287109375, -0.47100830078125, -0.4547271728515625, -0.438446044921875, -0.4221649169921875, -0.4058837890625, -0.3896026611328125, -0.373321533203125, -0.3570404052734375, -0.34075927734375, -0.3244781494140625, -0.308197021484375, -0.2919158935546875, -0.275634765625, -0.2593536376953125, -0.243072509765625, -0.2267913818359375, -0.21051025390625, -0.1942291259765625, -0.177947998046875, -0.1616668701171875, -0.1453857421875, -0.1291046142578125, -0.112823486328125, -0.0965423583984375, -0.08026123046875, -0.0639801025390625, -0.047698974609375, -0.0314178466796875, -0.01513671875, 0.0011444091796875, 0.017425537109375, 0.0337066650390625, 0.04998779296875, 0.0662689208984375, 0.082550048828125, 0.0988311767578125, 0.1151123046875, 0.1313934326171875, 0.147674560546875, 0.1639556884765625, 0.18023681640625, 0.1965179443359375, 0.212799072265625, 0.2290802001953125, 0.245361328125, 0.2616424560546875, 0.277923583984375, 0.2942047119140625, 0.31048583984375, 0.3267669677734375, 0.343048095703125, 0.3593292236328125, 0.3756103515625, 0.3918914794921875, 0.408172607421875, 0.4244537353515625, 0.44073486328125, 0.4570159912109375, 0.473297119140625, 0.4895782470703125, 0.505859375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 17.0, 19.0, 18.0, 28.0, 27.0, 32.0, 56.0, 75.0, 78.0, 93.0, 89.0, 87.0, 78.0, 80.0, 58.0, 34.0, 33.0, 20.0, 23.0, 13.0, 10.0, 12.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002216339111328125, -0.002132326364517212, -0.002048313617706299, -0.0019643008708953857, -0.0018802881240844727, -0.0017962753772735596, -0.0017122626304626465, -0.0016282498836517334, -0.0015442371368408203, -0.0014602243900299072, -0.0013762116432189941, -0.001292198896408081, -0.001208186149597168, -0.0011241734027862549, -0.0010401606559753418, -0.0009561479091644287, -0.0008721351623535156, -0.0007881224155426025, -0.0007041096687316895, -0.0006200969219207764, -0.0005360841751098633, -0.0004520714282989502, -0.0003680586814880371, -0.000284045934677124, -0.00020003318786621094, -0.00011602044105529785, -3.2007694244384766e-05, 5.200505256652832e-05, 0.0001360177993774414, 0.0002200305461883545, 0.0003040432929992676, 0.00038805603981018066, 0.00047206878662109375, 0.0005560815334320068, 0.0006400942802429199, 0.000724107027053833, 0.0008081197738647461, 0.0008921325206756592, 0.0009761452674865723, 0.0010601580142974854, 0.0011441707611083984, 0.0012281835079193115, 0.0013121962547302246, 0.0013962090015411377, 0.0014802217483520508, 0.0015642344951629639, 0.001648247241973877, 0.00173225998878479, 0.0018162727355957031, 0.0019002854824066162, 0.0019842982292175293, 0.0020683109760284424, 0.0021523237228393555, 0.0022363364696502686, 0.0023203492164611816, 0.0024043619632720947, 0.002488374710083008, 0.002572387456893921, 0.002656400203704834, 0.002740412950515747, 0.00282442569732666, 0.0029084384441375732, 0.0029924511909484863, 0.0030764639377593994, 0.0031604766845703125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.0, 1.0, 2.0, 4.0, 5.0, 4.0, 5.0, 15.0, 15.0, 15.0, 37.0, 41.0, 62.0, 95.0, 148.0, 248.0, 556.0, 2601.0, 1040640.0, 2823.0, 572.0, 238.0, 147.0, 66.0, 62.0, 50.0, 44.0, 23.0, 15.0, 6.0, 5.0, 4.0, 3.0, 4.0, 0.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0648193359375, -0.06309127807617188, -0.06136322021484375, -0.059635162353515625, -0.0579071044921875, -0.056179046630859375, -0.05445098876953125, -0.052722930908203125, -0.050994873046875, -0.049266815185546875, -0.04753875732421875, -0.045810699462890625, -0.0440826416015625, -0.042354583740234375, -0.04062652587890625, -0.038898468017578125, -0.03717041015625, -0.035442352294921875, -0.03371429443359375, -0.031986236572265625, -0.0302581787109375, -0.028530120849609375, -0.02680206298828125, -0.025074005126953125, -0.023345947265625, -0.021617889404296875, -0.01988983154296875, -0.018161773681640625, -0.0164337158203125, -0.014705657958984375, -0.01297760009765625, -0.011249542236328125, -0.009521484375, -0.007793426513671875, -0.00606536865234375, -0.004337310791015625, -0.0026092529296875, -0.000881195068359375, 0.00084686279296875, 0.002574920654296875, 0.004302978515625, 0.006031036376953125, 0.00775909423828125, 0.009487152099609375, 0.0112152099609375, 0.012943267822265625, 0.01467132568359375, 0.016399383544921875, 0.01812744140625, 0.019855499267578125, 0.02158355712890625, 0.023311614990234375, 0.0250396728515625, 0.026767730712890625, 0.02849578857421875, 0.030223846435546875, 0.031951904296875, 0.033679962158203125, 0.03540802001953125, 0.037136077880859375, 0.0388641357421875, 0.040592193603515625, 0.04232025146484375, 0.044048309326171875, 0.0457763671875]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 18.0, 159.0, 428.0, 350.0, 48.0, 12.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0007464042864739895, -0.0006164053920656443, -0.00048640643944963813, -0.00035640751593746245, -0.00022640859242528677, -9.640969801694155e-05, 3.358925459906459e-05, 0.00016358820721507072, 0.00029358710162341595, 0.0004235860251355916, 0.0005535849486477673, 0.0006835839012637734, 0.0008135827956721187, 0.0009435816900804639, 0.001073580700904131, 0.0012035795953124762, 0.0013335784897208214, 0.0014635773841291666, 0.0015935762785375118, 0.0017235752893611789, 0.001853574183769524, 0.001983573194593191, 0.0021135720890015364, 0.0022435709834098816, 0.002373569877818227, 0.002503568772226572, 0.0026335676666349173, 0.0027635665610432625, 0.0028935656882822514, 0.003023564349859953, 0.003153563477098942, 0.003283562371507287, 0.0034135612659156322, 0.0035435601603239775, 0.0036735590547323227, 0.003803557949140668, 0.003933556843549013, 0.004063555970788002, 0.004193554632365704, 0.0043235537596046925, 0.004453552886843681, 0.00458355201408267, 0.004713550675660372, 0.004843549802899361, 0.004973548464477062, 0.005103547591716051, 0.005233546253293753, 0.0053635453805327415, 0.005493544042110443, 0.005623543169349432, 0.0057535418309271336, 0.0058835409581661224, 0.006013539619743824, 0.006143538746982813, 0.0062735374085605145, 0.006403536535799503, 0.006533535197377205, 0.006663534324616194, 0.006793532986193895, 0.006923532113432884, 0.007053530775010586, 0.007183529902249575, 0.007313528563827276, 0.007443527691066265, 0.007573526818305254]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 4.0, 1.0, 6.0, 5.0, 12.0, 9.0, 9.0, 7.0, 16.0, 16.0, 29.0, 18.0, 22.0, 26.0, 34.0, 28.0, 23.0, 38.0, 36.0, 43.0, 43.0, 42.0, 40.0, 35.0, 44.0, 41.0, 30.0, 41.0, 29.0, 33.0, 37.0, 28.0, 35.0, 25.0, 16.0, 16.0, 18.0, 10.0, 16.0, 11.0, 5.0, 9.0, 7.0, 9.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009061694145202637, -0.0008759219199419022, -0.0008456744253635406, -0.0008154269307851791, -0.0007851794362068176, -0.0007549319416284561, -0.0007246844470500946, -0.0006944369524717331, -0.0006641894578933716, -0.0006339419633150101, -0.0006036944687366486, -0.000573446974158287, -0.0005431994795799255, -0.000512951985001564, -0.0004827044904232025, -0.000452456995844841, -0.0004222095012664795, -0.000391962006688118, -0.00036171451210975647, -0.00033146701753139496, -0.00030121952295303345, -0.00027097202837467194, -0.00024072453379631042, -0.00021047703921794891, -0.0001802295446395874, -0.0001499820500612259, -0.00011973455548286438, -8.948706090450287e-05, -5.923956632614136e-05, -2.8992071747779846e-05, 1.255422830581665e-06, 3.1502917408943176e-05, 6.175041198730469e-05, 9.19979065656662e-05, 0.0001222454011440277, 0.00015249289572238922, 0.00018274039030075073, 0.00021298788487911224, 0.00024323537945747375, 0.00027348287403583527, 0.0003037303686141968, 0.0003339778631925583, 0.0003642253577709198, 0.0003944728523492813, 0.0004247203469276428, 0.00045496784150600433, 0.00048521533608436584, 0.0005154628306627274, 0.0005457103252410889, 0.0005759578198194504, 0.0006062053143978119, 0.0006364528089761734, 0.0006667003035545349, 0.0006969477981328964, 0.0007271952927112579, 0.0007574427872896194, 0.000787690281867981, 0.0008179377764463425, 0.000848185271024704, 0.0008784327656030655, 0.000908680260181427, 0.0009389277547597885, 0.00096917524933815, 0.0009994227439165115, 0.001029670238494873]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 1.0, 4.0, 5.0, 8.0, 6.0, 7.0, 13.0, 6.0, 14.0, 14.0, 12.0, 14.0, 22.0, 19.0, 25.0, 36.0, 30.0, 33.0, 39.0, 35.0, 30.0, 32.0, 37.0, 41.0, 29.0, 42.0, 43.0, 37.0, 39.0, 35.0, 36.0, 35.0, 24.0, 28.0, 27.0, 21.0, 15.0, 23.0, 14.0, 15.0, 10.0, 9.0, 5.0, 5.0, 12.0, 14.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.10546875, -6.88580322265625, -6.6661376953125, -6.44647216796875, -6.226806640625, -6.00714111328125, -5.7874755859375, -5.56781005859375, -5.34814453125, -5.12847900390625, -4.9088134765625, -4.68914794921875, -4.469482421875, -4.24981689453125, -4.0301513671875, -3.81048583984375, -3.5908203125, -3.37115478515625, -3.1514892578125, -2.93182373046875, -2.712158203125, -2.49249267578125, -2.2728271484375, -2.05316162109375, -1.83349609375, -1.61383056640625, -1.3941650390625, -1.17449951171875, -0.954833984375, -0.73516845703125, -0.5155029296875, -0.29583740234375, -0.076171875, 0.14349365234375, 0.3631591796875, 0.58282470703125, 0.802490234375, 1.02215576171875, 1.2418212890625, 1.46148681640625, 1.68115234375, 1.90081787109375, 2.1204833984375, 2.34014892578125, 2.559814453125, 2.77947998046875, 2.9991455078125, 3.21881103515625, 3.4384765625, 3.65814208984375, 3.8778076171875, 4.09747314453125, 4.317138671875, 4.53680419921875, 4.7564697265625, 4.97613525390625, 5.19580078125, 5.41546630859375, 5.6351318359375, 5.85479736328125, 6.074462890625, 6.29412841796875, 6.5137939453125, 6.73345947265625, 6.953125]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 1.0, 2.0, 6.0, 9.0, 4.0, 8.0, 14.0, 16.0, 19.0, 26.0, 38.0, 45.0, 58.0, 54.0, 99.0, 107.0, 137.0, 228.0, 376.0, 769.0, 2184.0, 7234.0, 25169.0, 105291.0, 489146.0, 326764.0, 65662.0, 16896.0, 4921.0, 1544.0, 583.0, 335.0, 209.0, 139.0, 98.0, 89.0, 61.0, 46.0, 42.0, 30.0, 20.0, 21.0, 19.0, 11.0, 10.0, 7.0, 6.0, 3.0, 3.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.421875, -12.0191650390625, -11.616455078125, -11.2137451171875, -10.81103515625, -10.4083251953125, -10.005615234375, -9.6029052734375, -9.2001953125, -8.7974853515625, -8.394775390625, -7.9920654296875, -7.58935546875, -7.1866455078125, -6.783935546875, -6.3812255859375, -5.978515625, -5.5758056640625, -5.173095703125, -4.7703857421875, -4.36767578125, -3.9649658203125, -3.562255859375, -3.1595458984375, -2.7568359375, -2.3541259765625, -1.951416015625, -1.5487060546875, -1.14599609375, -0.7432861328125, -0.340576171875, 0.0621337890625, 0.46484375, 0.8675537109375, 1.270263671875, 1.6729736328125, 2.07568359375, 2.4783935546875, 2.881103515625, 3.2838134765625, 3.6865234375, 4.0892333984375, 4.491943359375, 4.8946533203125, 5.29736328125, 5.7000732421875, 6.102783203125, 6.5054931640625, 6.908203125, 7.3109130859375, 7.713623046875, 8.1163330078125, 8.51904296875, 8.9217529296875, 9.324462890625, 9.7271728515625, 10.1298828125, 10.5325927734375, 10.935302734375, 11.3380126953125, 11.74072265625, 12.1434326171875, 12.546142578125, 12.9488525390625, 13.3515625]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 8.0, 3.0, 2.0, 7.0, 5.0, 8.0, 7.0, 11.0, 14.0, 19.0, 21.0, 22.0, 16.0, 25.0, 34.0, 21.0, 28.0, 35.0, 48.0, 49.0, 70.0, 97.0, 254.0, 1449.0, 242.0, 114.0, 51.0, 52.0, 47.0, 35.0, 33.0, 30.0, 20.0, 25.0, 28.0, 23.0, 12.0, 21.0, 16.0, 11.0, 9.0, 8.0, 4.0, 4.0, 9.0, 4.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0], "bins": [-23.78125, -23.072998046875, -22.36474609375, -21.656494140625, -20.9482421875, -20.239990234375, -19.53173828125, -18.823486328125, -18.115234375, -17.406982421875, -16.69873046875, -15.990478515625, -15.2822265625, -14.573974609375, -13.86572265625, -13.157470703125, -12.44921875, -11.740966796875, -11.03271484375, -10.324462890625, -9.6162109375, -8.907958984375, -8.19970703125, -7.491455078125, -6.783203125, -6.074951171875, -5.36669921875, -4.658447265625, -3.9501953125, -3.241943359375, -2.53369140625, -1.825439453125, -1.1171875, -0.408935546875, 0.29931640625, 1.007568359375, 1.7158203125, 2.424072265625, 3.13232421875, 3.840576171875, 4.548828125, 5.257080078125, 5.96533203125, 6.673583984375, 7.3818359375, 8.090087890625, 8.79833984375, 9.506591796875, 10.21484375, 10.923095703125, 11.63134765625, 12.339599609375, 13.0478515625, 13.756103515625, 14.46435546875, 15.172607421875, 15.880859375, 16.589111328125, 17.29736328125, 18.005615234375, 18.7138671875, 19.422119140625, 20.13037109375, 20.838623046875, 21.546875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 3.0, 9.0, 7.0, 8.0, 12.0, 16.0, 15.0, 26.0, 24.0, 51.0, 51.0, 62.0, 89.0, 105.0, 173.0, 361.0, 762.0, 3685.0, 909502.0, 2223871.0, 4937.0, 873.0, 376.0, 188.0, 119.0, 101.0, 63.0, 47.0, 39.0, 44.0, 20.0, 19.0, 12.0, 8.0, 11.0, 9.0, 7.0, 4.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.625, -59.57421875, -57.5234375, -55.47265625, -53.421875, -51.37109375, -49.3203125, -47.26953125, -45.21875, -43.16796875, -41.1171875, -39.06640625, -37.015625, -34.96484375, -32.9140625, -30.86328125, -28.8125, -26.76171875, -24.7109375, -22.66015625, -20.609375, -18.55859375, -16.5078125, -14.45703125, -12.40625, -10.35546875, -8.3046875, -6.25390625, -4.203125, -2.15234375, -0.1015625, 1.94921875, 4.0, 6.05078125, 8.1015625, 10.15234375, 12.203125, 14.25390625, 16.3046875, 18.35546875, 20.40625, 22.45703125, 24.5078125, 26.55859375, 28.609375, 30.66015625, 32.7109375, 34.76171875, 36.8125, 38.86328125, 40.9140625, 42.96484375, 45.015625, 47.06640625, 49.1171875, 51.16796875, 53.21875, 55.26953125, 57.3203125, 59.37109375, 61.421875, 63.47265625, 65.5234375, 67.57421875, 69.625]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 11.0, 18.0, 57.0, 76.0, 121.0, 137.0, 157.0, 158.0, 126.0, 75.0, 40.0, 18.0, 15.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.489408493041992, -11.192358016967773, -9.895307540893555, -8.59825611114502, -7.301205635070801, -6.004155158996582, -4.707104206085205, -3.410053253173828, -2.1130027770996094, -0.8159520626068115, 0.48109865188598633, 1.7781493663787842, 3.075200080871582, 4.372250556945801, 5.669301509857178, 6.966352462768555, 8.263402938842773, 9.560453414916992, 10.857503890991211, 12.154555320739746, 13.451605796813965, 14.748656272888184, 16.04570770263672, 17.342758178710938, 18.639808654785156, 19.936859130859375, 21.233909606933594, 22.530960083007812, 23.82801055908203, 25.12506103515625, 26.4221134185791, 27.71916389465332, 29.016212463378906, 30.313262939453125, 31.610313415527344, 32.90736389160156, 34.20441436767578, 35.50146484375, 36.79851531982422, 38.09556579589844, 39.392616271972656, 40.689666748046875, 41.986717224121094, 43.28376770019531, 44.58081817626953, 45.87786865234375, 47.17491912841797, 48.47196960449219, 49.76902389526367, 51.06607437133789, 52.36312484741211, 53.66017532348633, 54.95722579956055, 56.254276275634766, 57.551326751708984, 58.84838104248047, 60.14543151855469, 61.442481994628906, 62.739532470703125, 64.03658294677734, 65.33363342285156, 66.63068389892578, 67.927734375, 69.22478485107422, 70.52183532714844]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 4.0, 5.0, 2.0, 4.0, 8.0, 7.0, 8.0, 12.0, 9.0, 13.0, 15.0, 14.0, 22.0, 21.0, 22.0, 23.0, 29.0, 28.0, 21.0, 28.0, 33.0, 29.0, 41.0, 34.0, 38.0, 43.0, 39.0, 35.0, 48.0, 30.0, 40.0, 34.0, 32.0, 24.0, 32.0, 27.0, 20.0, 17.0, 19.0, 15.0, 12.0, 14.0, 5.0, 4.0, 7.0, 7.0, 12.0, 4.0, 4.0, 1.0, 4.0, 3.0, 5.0, 1.0, 2.0, 1.0, 1.0], "bins": [-54.7220344543457, -53.028076171875, -51.3341178894043, -49.640159606933594, -47.94620132446289, -46.25224304199219, -44.558284759521484, -42.86432647705078, -41.17036819458008, -39.476409912109375, -37.78245162963867, -36.08849334716797, -34.394535064697266, -32.70057678222656, -31.00661849975586, -29.312660217285156, -27.618703842163086, -25.924745559692383, -24.23078727722168, -22.536828994750977, -20.842870712280273, -19.148914337158203, -17.4549560546875, -15.76099681854248, -14.067038536071777, -12.373080253601074, -10.679121971130371, -8.985164642333984, -7.291205883026123, -5.597248077392578, -3.903289794921875, -2.209331512451172, -0.5153732299804688, 1.1785849332809448, 2.8725430965423584, 4.566501140594482, 6.2604594230651855, 7.9544172286987305, 9.648375511169434, 11.342333793640137, 13.03629207611084, 14.730250358581543, 16.42420768737793, 18.118165969848633, 19.812124252319336, 21.50608253479004, 23.200040817260742, 24.893999099731445, 26.58795738220215, 28.28191566467285, 29.975873947143555, 31.669832229614258, 33.36378860473633, 35.05774688720703, 36.751705169677734, 38.44566345214844, 40.13962173461914, 41.833580017089844, 43.52753829956055, 45.22149658203125, 46.91545486450195, 48.609413146972656, 50.30337142944336, 51.99732971191406, 53.691287994384766]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 3.0, 1.0, 3.0, 3.0, 10.0, 8.0, 8.0, 9.0, 10.0, 14.0, 11.0, 18.0, 28.0, 25.0, 24.0, 33.0, 44.0, 32.0, 32.0, 40.0, 35.0, 37.0, 31.0, 40.0, 36.0, 49.0, 38.0, 35.0, 33.0, 41.0, 24.0, 38.0, 27.0, 23.0, 25.0, 17.0, 24.0, 18.0, 17.0, 11.0, 10.0, 15.0, 5.0, 4.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-7.703125, -7.46136474609375, -7.2196044921875, -6.97784423828125, -6.736083984375, -6.49432373046875, -6.2525634765625, -6.01080322265625, -5.76904296875, -5.52728271484375, -5.2855224609375, -5.04376220703125, -4.802001953125, -4.56024169921875, -4.3184814453125, -4.07672119140625, -3.8349609375, -3.59320068359375, -3.3514404296875, -3.10968017578125, -2.867919921875, -2.62615966796875, -2.3843994140625, -2.14263916015625, -1.90087890625, -1.65911865234375, -1.4173583984375, -1.17559814453125, -0.933837890625, -0.69207763671875, -0.4503173828125, -0.20855712890625, 0.033203125, 0.27496337890625, 0.5167236328125, 0.75848388671875, 1.000244140625, 1.24200439453125, 1.4837646484375, 1.72552490234375, 1.96728515625, 2.20904541015625, 2.4508056640625, 2.69256591796875, 2.934326171875, 3.17608642578125, 3.4178466796875, 3.65960693359375, 3.9013671875, 4.14312744140625, 4.3848876953125, 4.62664794921875, 4.868408203125, 5.11016845703125, 5.3519287109375, 5.59368896484375, 5.83544921875, 6.07720947265625, 6.3189697265625, 6.56072998046875, 6.802490234375, 7.04425048828125, 7.2860107421875, 7.52777099609375, 7.76953125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 3.0, 1.0, 6.0, 5.0, 4.0, 12.0, 13.0, 9.0, 20.0, 23.0, 33.0, 44.0, 51.0, 55.0, 104.0, 118.0, 281.0, 1000.0, 5888.0, 59876.0, 779520.0, 2615453.0, 673548.0, 51583.0, 5027.0, 851.0, 245.0, 138.0, 85.0, 51.0, 42.0, 50.0, 23.0, 20.0, 22.0, 18.0, 12.0, 16.0, 10.0, 10.0, 7.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.125, -16.580810546875, -16.03662109375, -15.492431640625, -14.9482421875, -14.404052734375, -13.85986328125, -13.315673828125, -12.771484375, -12.227294921875, -11.68310546875, -11.138916015625, -10.5947265625, -10.050537109375, -9.50634765625, -8.962158203125, -8.41796875, -7.873779296875, -7.32958984375, -6.785400390625, -6.2412109375, -5.697021484375, -5.15283203125, -4.608642578125, -4.064453125, -3.520263671875, -2.97607421875, -2.431884765625, -1.8876953125, -1.343505859375, -0.79931640625, -0.255126953125, 0.2890625, 0.833251953125, 1.37744140625, 1.921630859375, 2.4658203125, 3.010009765625, 3.55419921875, 4.098388671875, 4.642578125, 5.186767578125, 5.73095703125, 6.275146484375, 6.8193359375, 7.363525390625, 7.90771484375, 8.451904296875, 8.99609375, 9.540283203125, 10.08447265625, 10.628662109375, 11.1728515625, 11.717041015625, 12.26123046875, 12.805419921875, 13.349609375, 13.893798828125, 14.43798828125, 14.982177734375, 15.5263671875, 16.070556640625, 16.61474609375, 17.158935546875, 17.703125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 4.0, 9.0, 8.0, 12.0, 13.0, 17.0, 22.0, 25.0, 30.0, 42.0, 56.0, 90.0, 122.0, 183.0, 202.0, 302.0, 353.0, 402.0, 386.0, 403.0, 349.0, 253.0, 212.0, 151.0, 109.0, 82.0, 62.0, 45.0, 33.0, 23.0, 12.0, 11.0, 10.0, 10.0, 6.0, 5.0, 5.0, 0.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0], "bins": [-14.3515625, -13.9771728515625, -13.602783203125, -13.2283935546875, -12.85400390625, -12.4796142578125, -12.105224609375, -11.7308349609375, -11.3564453125, -10.9820556640625, -10.607666015625, -10.2332763671875, -9.85888671875, -9.4844970703125, -9.110107421875, -8.7357177734375, -8.361328125, -7.9869384765625, -7.612548828125, -7.2381591796875, -6.86376953125, -6.4893798828125, -6.114990234375, -5.7406005859375, -5.3662109375, -4.9918212890625, -4.617431640625, -4.2430419921875, -3.86865234375, -3.4942626953125, -3.119873046875, -2.7454833984375, -2.37109375, -1.9967041015625, -1.622314453125, -1.2479248046875, -0.87353515625, -0.4991455078125, -0.124755859375, 0.2496337890625, 0.6240234375, 0.9984130859375, 1.372802734375, 1.7471923828125, 2.12158203125, 2.4959716796875, 2.870361328125, 3.2447509765625, 3.619140625, 3.9935302734375, 4.367919921875, 4.7423095703125, 5.11669921875, 5.4910888671875, 5.865478515625, 6.2398681640625, 6.6142578125, 6.9886474609375, 7.363037109375, 7.7374267578125, 8.11181640625, 8.4862060546875, 8.860595703125, 9.2349853515625, 9.609375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0, 3.0, 4.0, 3.0, 11.0, 12.0, 20.0, 32.0, 24.0, 52.0, 51.0, 84.0, 115.0, 162.0, 219.0, 343.0, 566.0, 1360.0, 5952.0, 66461.0, 1757990.0, 2262799.0, 87553.0, 7194.0, 1443.0, 639.0, 354.0, 227.0, 158.0, 112.0, 90.0, 63.0, 47.0, 31.0, 31.0, 18.0, 17.0, 13.0, 8.0, 9.0, 7.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-27.0625, -26.130859375, -25.19921875, -24.267578125, -23.3359375, -22.404296875, -21.47265625, -20.541015625, -19.609375, -18.677734375, -17.74609375, -16.814453125, -15.8828125, -14.951171875, -14.01953125, -13.087890625, -12.15625, -11.224609375, -10.29296875, -9.361328125, -8.4296875, -7.498046875, -6.56640625, -5.634765625, -4.703125, -3.771484375, -2.83984375, -1.908203125, -0.9765625, -0.044921875, 0.88671875, 1.818359375, 2.75, 3.681640625, 4.61328125, 5.544921875, 6.4765625, 7.408203125, 8.33984375, 9.271484375, 10.203125, 11.134765625, 12.06640625, 12.998046875, 13.9296875, 14.861328125, 15.79296875, 16.724609375, 17.65625, 18.587890625, 19.51953125, 20.451171875, 21.3828125, 22.314453125, 23.24609375, 24.177734375, 25.109375, 26.041015625, 26.97265625, 27.904296875, 28.8359375, 29.767578125, 30.69921875, 31.630859375, 32.5625]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 54.0, 513.0, 419.0, 28.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.686092376708984, -50.46839141845703, -40.250694274902344, -30.032995223999023, -19.815296173095703, -9.597599029541016, 0.6201019287109375, 10.83780288696289, 21.055500030517578, 31.2731990814209, 41.49089813232422, 51.708595275878906, 61.92629623413086, 72.14399719238281, 82.3616943359375, 92.57939147949219, 102.79708862304688, 113.01478576660156, 123.23248291015625, 133.45018005371094, 143.66787719726562, 153.88558959960938, 164.10328674316406, 174.32098388671875, 184.53868103027344, 194.75637817382812, 204.9740753173828, 215.1917724609375, 225.40948486328125, 235.62716674804688, 245.84487915039062, 256.06256103515625, 266.2802734375, 276.49798583984375, 286.7156677246094, 296.9333801269531, 307.15106201171875, 317.3687744140625, 327.5864562988281, 337.8041687011719, 348.0218505859375, 358.23956298828125, 368.4572448730469, 378.6749572753906, 388.89263916015625, 399.1103515625, 409.3280334472656, 419.5457458496094, 429.7634582519531, 439.9811706542969, 450.1988525390625, 460.41656494140625, 470.6342468261719, 480.8519592285156, 491.06964111328125, 501.287353515625, 511.50506591796875, 521.7227783203125, 531.9404907226562, 542.1581420898438, 552.3758544921875, 562.5935668945312, 572.811279296875, 583.0289306640625, 593.2466430664062]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 5.0, 3.0, 6.0, 5.0, 7.0, 6.0, 7.0, 8.0, 15.0, 17.0, 14.0, 22.0, 17.0, 26.0, 31.0, 30.0, 28.0, 47.0, 43.0, 31.0, 41.0, 35.0, 58.0, 32.0, 43.0, 38.0, 33.0, 35.0, 30.0, 29.0, 27.0, 35.0, 27.0, 27.0, 17.0, 26.0, 13.0, 21.0, 11.0, 13.0, 14.0, 8.0, 4.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-45.06510925292969, -43.67557907104492, -42.286048889160156, -40.89651870727539, -39.506988525390625, -38.11745834350586, -36.727928161621094, -35.33839797973633, -33.94886779785156, -32.5593376159668, -31.16980743408203, -29.780277252197266, -28.3907470703125, -27.001216888427734, -25.61168670654297, -24.222156524658203, -22.83262825012207, -21.443098068237305, -20.05356788635254, -18.664037704467773, -17.274507522583008, -15.884977340698242, -14.495448112487793, -13.105917930603027, -11.716387748718262, -10.326857566833496, -8.93732738494873, -7.547797679901123, -6.158267498016357, -4.768737316131592, -3.3792076110839844, -1.9896774291992188, -0.6001472473144531, 0.789382815361023, 2.178912878036499, 3.5684428215026855, 4.957973003387451, 6.347503185272217, 7.737032890319824, 9.12656307220459, 10.516093254089355, 11.905623435974121, 13.295153617858887, 14.684682846069336, 16.0742130279541, 17.463743209838867, 18.853273391723633, 20.2428035736084, 21.632333755493164, 23.02186393737793, 24.411394119262695, 25.80092430114746, 27.190454483032227, 28.579984664916992, 29.969512939453125, 31.35904312133789, 32.748573303222656, 34.13810348510742, 35.52763366699219, 36.91716384887695, 38.30669403076172, 39.696224212646484, 41.08575439453125, 42.475284576416016, 43.86481475830078]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 4.0, 3.0, 5.0, 6.0, 7.0, 7.0, 12.0, 11.0, 12.0, 12.0, 20.0, 22.0, 22.0, 28.0, 27.0, 32.0, 24.0, 35.0, 33.0, 26.0, 31.0, 35.0, 38.0, 50.0, 42.0, 36.0, 37.0, 34.0, 44.0, 34.0, 33.0, 37.0, 32.0, 24.0, 25.0, 17.0, 13.0, 20.0, 16.0, 12.0, 9.0, 11.0, 2.0, 10.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-7.515625, -7.27972412109375, -7.0438232421875, -6.80792236328125, -6.572021484375, -6.33612060546875, -6.1002197265625, -5.86431884765625, -5.62841796875, -5.39251708984375, -5.1566162109375, -4.92071533203125, -4.684814453125, -4.44891357421875, -4.2130126953125, -3.97711181640625, -3.7412109375, -3.50531005859375, -3.2694091796875, -3.03350830078125, -2.797607421875, -2.56170654296875, -2.3258056640625, -2.08990478515625, -1.85400390625, -1.61810302734375, -1.3822021484375, -1.14630126953125, -0.910400390625, -0.67449951171875, -0.4385986328125, -0.20269775390625, 0.033203125, 0.26910400390625, 0.5050048828125, 0.74090576171875, 0.976806640625, 1.21270751953125, 1.4486083984375, 1.68450927734375, 1.92041015625, 2.15631103515625, 2.3922119140625, 2.62811279296875, 2.864013671875, 3.09991455078125, 3.3358154296875, 3.57171630859375, 3.8076171875, 4.04351806640625, 4.2794189453125, 4.51531982421875, 4.751220703125, 4.98712158203125, 5.2230224609375, 5.45892333984375, 5.69482421875, 5.93072509765625, 6.1666259765625, 6.40252685546875, 6.638427734375, 6.87432861328125, 7.1102294921875, 7.34613037109375, 7.58203125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 6.0, 11.0, 13.0, 19.0, 40.0, 60.0, 115.0, 180.0, 267.0, 434.0, 717.0, 1091.0, 1775.0, 2791.0, 4649.0, 7332.0, 12090.0, 19904.0, 32964.0, 54303.0, 91099.0, 150663.0, 218324.0, 176339.0, 108377.0, 64810.0, 39339.0, 23482.0, 14361.0, 8760.0, 5472.0, 3392.0, 2024.0, 1211.0, 830.0, 480.0, 298.0, 200.0, 132.0, 74.0, 48.0, 36.0, 14.0, 13.0, 4.0, 8.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.60009765625, -0.5798721313476562, -0.5596466064453125, -0.5394210815429688, -0.519195556640625, -0.49897003173828125, -0.4787445068359375, -0.45851898193359375, -0.43829345703125, -0.41806793212890625, -0.3978424072265625, -0.37761688232421875, -0.357391357421875, -0.33716583251953125, -0.3169403076171875, -0.29671478271484375, -0.2764892578125, -0.25626373291015625, -0.2360382080078125, -0.21581268310546875, -0.195587158203125, -0.17536163330078125, -0.1551361083984375, -0.13491058349609375, -0.11468505859375, -0.09445953369140625, -0.0742340087890625, -0.05400848388671875, -0.033782958984375, -0.01355743408203125, 0.0066680908203125, 0.02689361572265625, 0.047119140625, 0.06734466552734375, 0.0875701904296875, 0.10779571533203125, 0.128021240234375, 0.14824676513671875, 0.1684722900390625, 0.18869781494140625, 0.20892333984375, 0.22914886474609375, 0.2493743896484375, 0.26959991455078125, 0.289825439453125, 0.31005096435546875, 0.3302764892578125, 0.35050201416015625, 0.3707275390625, 0.39095306396484375, 0.4111785888671875, 0.43140411376953125, 0.451629638671875, 0.47185516357421875, 0.4920806884765625, 0.5123062133789062, 0.53253173828125, 0.5527572631835938, 0.5729827880859375, 0.5932083129882812, 0.613433837890625, 0.6336593627929688, 0.6538848876953125, 0.6741104125976562, 0.6943359375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 8.0, 6.0, 8.0, 6.0, 15.0, 5.0, 22.0, 8.0, 16.0, 15.0, 16.0, 16.0, 17.0, 26.0, 25.0, 22.0, 23.0, 23.0, 25.0, 30.0, 28.0, 34.0, 36.0, 1059.0, 51.0, 43.0, 38.0, 25.0, 37.0, 27.0, 33.0, 32.0, 31.0, 26.0, 33.0, 17.0, 20.0, 14.0, 19.0, 18.0, 14.0, 8.0, 13.0, 10.0, 6.0, 4.0, 7.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 3.0], "bins": [-4.546875, -4.404296875, -4.26171875, -4.119140625, -3.9765625, -3.833984375, -3.69140625, -3.548828125, -3.40625, -3.263671875, -3.12109375, -2.978515625, -2.8359375, -2.693359375, -2.55078125, -2.408203125, -2.265625, -2.123046875, -1.98046875, -1.837890625, -1.6953125, -1.552734375, -1.41015625, -1.267578125, -1.125, -0.982421875, -0.83984375, -0.697265625, -0.5546875, -0.412109375, -0.26953125, -0.126953125, 0.015625, 0.158203125, 0.30078125, 0.443359375, 0.5859375, 0.728515625, 0.87109375, 1.013671875, 1.15625, 1.298828125, 1.44140625, 1.583984375, 1.7265625, 1.869140625, 2.01171875, 2.154296875, 2.296875, 2.439453125, 2.58203125, 2.724609375, 2.8671875, 3.009765625, 3.15234375, 3.294921875, 3.4375, 3.580078125, 3.72265625, 3.865234375, 4.0078125, 4.150390625, 4.29296875, 4.435546875, 4.578125]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 10.0, 10.0, 16.0, 22.0, 22.0, 30.0, 79.0, 109.0, 135.0, 241.0, 343.0, 533.0, 811.0, 1209.0, 1848.0, 2676.0, 4091.0, 6297.0, 9261.0, 14092.0, 21283.0, 32121.0, 48358.0, 73198.0, 110175.0, 155831.0, 1217841.0, 132330.0, 89912.0, 58689.0, 38923.0, 25692.0, 17271.0, 11404.0, 7479.0, 4998.0, 3301.0, 2251.0, 1434.0, 987.0, 582.0, 430.0, 290.0, 177.0, 126.0, 75.0, 41.0, 38.0, 14.0, 20.0, 14.0, 8.0, 7.0, 0.0, 0.0, 3.0, 6.0], "bins": [-0.5009765625, -0.4859046936035156, -0.47083282470703125, -0.4557609558105469, -0.4406890869140625, -0.4256172180175781, -0.41054534912109375, -0.3954734802246094, -0.380401611328125, -0.3653297424316406, -0.35025787353515625, -0.3351860046386719, -0.3201141357421875, -0.3050422668457031, -0.28997039794921875, -0.2748985290527344, -0.25982666015625, -0.24475479125976562, -0.22968292236328125, -0.21461105346679688, -0.1995391845703125, -0.18446731567382812, -0.16939544677734375, -0.15432357788085938, -0.139251708984375, -0.12417984008789062, -0.10910797119140625, -0.09403610229492188, -0.0789642333984375, -0.06389236450195312, -0.04882049560546875, -0.033748626708984375, -0.0186767578125, -0.003604888916015625, 0.01146697998046875, 0.026538848876953125, 0.0416107177734375, 0.056682586669921875, 0.07175445556640625, 0.08682632446289062, 0.101898193359375, 0.11697006225585938, 0.13204193115234375, 0.14711380004882812, 0.1621856689453125, 0.17725753784179688, 0.19232940673828125, 0.20740127563476562, 0.22247314453125, 0.23754501342773438, 0.25261688232421875, 0.2676887512207031, 0.2827606201171875, 0.2978324890136719, 0.31290435791015625, 0.3279762268066406, 0.343048095703125, 0.3581199645996094, 0.37319183349609375, 0.3882637023925781, 0.4033355712890625, 0.4184074401855469, 0.43347930908203125, 0.4485511779785156, 0.463623046875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 7.0, 2.0, 5.0, 6.0, 7.0, 9.0, 4.0, 8.0, 3.0, 18.0, 26.0, 24.0, 20.0, 18.0, 30.0, 37.0, 28.0, 37.0, 37.0, 51.0, 50.0, 49.0, 58.0, 55.0, 49.0, 52.0, 32.0, 48.0, 28.0, 34.0, 26.0, 25.0, 21.0, 15.0, 15.0, 17.0, 6.0, 8.0, 9.0, 3.0, 5.0, 4.0, 3.0, 2.0, 3.0, 3.0, 6.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0018224716186523438, -0.0017688721418380737, -0.0017152726650238037, -0.0016616731882095337, -0.0016080737113952637, -0.0015544742345809937, -0.0015008747577667236, -0.0014472752809524536, -0.0013936758041381836, -0.0013400763273239136, -0.0012864768505096436, -0.0012328773736953735, -0.0011792778968811035, -0.0011256784200668335, -0.0010720789432525635, -0.0010184794664382935, -0.0009648799896240234, -0.0009112805128097534, -0.0008576810359954834, -0.0008040815591812134, -0.0007504820823669434, -0.0006968826055526733, -0.0006432831287384033, -0.0005896836519241333, -0.0005360841751098633, -0.00048248469829559326, -0.00042888522148132324, -0.0003752857446670532, -0.0003216862678527832, -0.0002680867910385132, -0.00021448731422424316, -0.00016088783740997314, -0.00010728836059570312, -5.3688883781433105e-05, -8.940696716308594e-08, 5.3510069847106934e-05, 0.00010710954666137695, 0.00016070902347564697, 0.000214308500289917, 0.000267907977104187, 0.00032150745391845703, 0.00037510693073272705, 0.00042870640754699707, 0.0004823058843612671, 0.0005359053611755371, 0.0005895048379898071, 0.0006431043148040771, 0.0006967037916183472, 0.0007503032684326172, 0.0008039027452468872, 0.0008575022220611572, 0.0009111016988754272, 0.0009647011756896973, 0.0010183006525039673, 0.0010719001293182373, 0.0011254996061325073, 0.0011790990829467773, 0.0012326985597610474, 0.0012862980365753174, 0.0013398975133895874, 0.0013934969902038574, 0.0014470964670181274, 0.0015006959438323975, 0.0015542954206466675, 0.0016078948974609375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 4.0, 6.0, 10.0, 5.0, 9.0, 11.0, 17.0, 32.0, 21.0, 33.0, 41.0, 46.0, 71.0, 94.0, 124.0, 193.0, 316.0, 617.0, 1793.0, 854170.0, 188401.0, 1115.0, 499.0, 273.0, 154.0, 104.0, 85.0, 63.0, 54.0, 47.0, 22.0, 25.0, 17.0, 12.0, 13.0, 13.0, 10.0, 6.0, 2.0, 6.0, 4.0, 5.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.037353515625, -0.03614377975463867, -0.034934043884277344, -0.033724308013916016, -0.03251457214355469, -0.03130483627319336, -0.03009510040283203, -0.028885364532470703, -0.027675628662109375, -0.026465892791748047, -0.02525615692138672, -0.02404642105102539, -0.022836685180664062, -0.021626949310302734, -0.020417213439941406, -0.019207477569580078, -0.01799774169921875, -0.016788005828857422, -0.015578269958496094, -0.014368534088134766, -0.013158798217773438, -0.01194906234741211, -0.010739326477050781, -0.009529590606689453, -0.008319854736328125, -0.007110118865966797, -0.005900382995605469, -0.004690647125244141, -0.0034809112548828125, -0.0022711753845214844, -0.0010614395141601562, 0.00014829635620117188, 0.0013580322265625, 0.002567768096923828, 0.0037775039672851562, 0.004987239837646484, 0.0061969757080078125, 0.007406711578369141, 0.008616447448730469, 0.009826183319091797, 0.011035919189453125, 0.012245655059814453, 0.013455390930175781, 0.01466512680053711, 0.015874862670898438, 0.017084598541259766, 0.018294334411621094, 0.019504070281982422, 0.02071380615234375, 0.021923542022705078, 0.023133277893066406, 0.024343013763427734, 0.025552749633789062, 0.02676248550415039, 0.02797222137451172, 0.029181957244873047, 0.030391693115234375, 0.0316014289855957, 0.03281116485595703, 0.03402090072631836, 0.03523063659667969, 0.036440372467041016, 0.037650108337402344, 0.03885984420776367, 0.040069580078125]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 247.0, 738.0, 25.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01695728488266468, -0.016632722690701485, -0.01630816049873829, -0.015983598306775093, -0.015659036114811897, -0.015334473922848701, -0.015009911730885506, -0.01468534953892231, -0.014360787346959114, -0.014036225154995918, -0.013711662963032722, -0.013387100771069527, -0.013062538579106331, -0.012737976387143135, -0.01241341419517994, -0.012088852003216743, -0.011764289811253548, -0.011439727619290352, -0.011115165427327156, -0.01079060323536396, -0.010466041043400764, -0.010141478851437569, -0.009816916659474373, -0.009492354467511177, -0.009167792275547981, -0.008843230083584785, -0.00851866789162159, -0.008194105699658394, -0.007869543507695198, -0.007544981315732002, -0.0072204191237688065, -0.006895856931805611, -0.006571294739842415, -0.006246732547879219, -0.005922170355916023, -0.0055976081639528275, -0.005273045971989632, -0.004948483780026436, -0.00462392158806324, -0.004299359396100044, -0.0039747972041368484, -0.0036502350121736526, -0.003325672820210457, -0.003001110628247261, -0.0026765484362840652, -0.0023519862443208694, -0.0020274240523576736, -0.0017028618603944778, -0.0013782994356006384, -0.0010537372436374426, -0.0007291750516742468, -0.000404612859711051, -8.005066774785519e-05, 0.0002445115242153406, 0.0005690737161785364, 0.0008936359081417322, 0.001218198100104928, 0.0015427602920681238, 0.0018673224840313196, 0.0021918846759945154, 0.0025164468679577112, 0.002841009059920907, 0.003165571251884103, 0.0034901334438472986, 0.0038146956358104944]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 4.0, 4.0, 7.0, 9.0, 9.0, 7.0, 15.0, 15.0, 20.0, 19.0, 22.0, 25.0, 30.0, 30.0, 38.0, 35.0, 36.0, 47.0, 32.0, 38.0, 50.0, 39.0, 38.0, 41.0, 45.0, 38.0, 36.0, 36.0, 29.0, 29.0, 30.0, 17.0, 21.0, 27.0, 15.0, 14.0, 11.0, 9.0, 9.0, 7.0, 5.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0010389089584350586, -0.0010055126622319221, -0.0009721163660287857, -0.0009387200698256493, -0.0009053237736225128, -0.0008719274774193764, -0.0008385311812162399, -0.0008051348850131035, -0.000771738588809967, -0.0007383422926068306, -0.0007049459964036942, -0.0006715497002005577, -0.0006381534039974213, -0.0006047571077942848, -0.0005713608115911484, -0.0005379645153880119, -0.0005045682191848755, -0.00047117192298173904, -0.0004377756267786026, -0.00040437933057546616, -0.0003709830343723297, -0.00033758673816919327, -0.0003041904419660568, -0.0002707941457629204, -0.00023739784955978394, -0.0002040015533566475, -0.00017060525715351105, -0.0001372089609503746, -0.00010381266474723816, -7.041636854410172e-05, -3.702007234096527e-05, -3.623776137828827e-06, 2.9772520065307617e-05, 6.316881626844406e-05, 9.65651124715805e-05, 0.00012996140867471695, 0.0001633577048778534, 0.00019675400108098984, 0.00023015029728412628, 0.0002635465934872627, 0.00029694288969039917, 0.0003303391858935356, 0.00036373548209667206, 0.0003971317782998085, 0.00043052807450294495, 0.0004639243707060814, 0.0004973206669092178, 0.0005307169631123543, 0.0005641132593154907, 0.0005975095555186272, 0.0006309058517217636, 0.0006643021479249001, 0.0006976984441280365, 0.0007310947403311729, 0.0007644910365343094, 0.0007978873327374458, 0.0008312836289405823, 0.0008646799251437187, 0.0008980762213468552, 0.0009314725175499916, 0.000964868813753128, 0.0009982651099562645, 0.001031661406159401, 0.0010650577023625374, 0.0010984539985656738]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 5.0, 6.0, 7.0, 7.0, 12.0, 11.0, 12.0, 12.0, 20.0, 22.0, 22.0, 28.0, 27.0, 33.0, 23.0, 35.0, 33.0, 26.0, 31.0, 35.0, 38.0, 50.0, 42.0, 36.0, 37.0, 34.0, 44.0, 34.0, 33.0, 37.0, 32.0, 24.0, 25.0, 17.0, 13.0, 20.0, 16.0, 12.0, 9.0, 11.0, 2.0, 10.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-7.515625, -7.27972412109375, -7.0438232421875, -6.80792236328125, -6.572021484375, -6.33612060546875, -6.1002197265625, -5.86431884765625, -5.62841796875, -5.39251708984375, -5.1566162109375, -4.92071533203125, -4.684814453125, -4.44891357421875, -4.2130126953125, -3.97711181640625, -3.7412109375, -3.50531005859375, -3.2694091796875, -3.03350830078125, -2.797607421875, -2.56170654296875, -2.3258056640625, -2.08990478515625, -1.85400390625, -1.61810302734375, -1.3822021484375, -1.14630126953125, -0.910400390625, -0.67449951171875, -0.4385986328125, -0.20269775390625, 0.033203125, 0.26910400390625, 0.5050048828125, 0.74090576171875, 0.976806640625, 1.21270751953125, 1.4486083984375, 1.68450927734375, 1.92041015625, 2.15631103515625, 2.3922119140625, 2.62811279296875, 2.864013671875, 3.09991455078125, 3.3358154296875, 3.57171630859375, 3.8076171875, 4.04351806640625, 4.2794189453125, 4.51531982421875, 4.751220703125, 4.98712158203125, 5.2230224609375, 5.45892333984375, 5.69482421875, 5.93072509765625, 6.1666259765625, 6.40252685546875, 6.638427734375, 6.87432861328125, 7.1102294921875, 7.34613037109375, 7.58203125]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 8.0, 6.0, 10.0, 11.0, 16.0, 13.0, 26.0, 30.0, 41.0, 60.0, 68.0, 86.0, 100.0, 155.0, 193.0, 260.0, 331.0, 477.0, 749.0, 1622.0, 4693.0, 17705.0, 90345.0, 746660.0, 149004.0, 24717.0, 6243.0, 2020.0, 857.0, 548.0, 379.0, 258.0, 209.0, 157.0, 113.0, 83.0, 81.0, 49.0, 45.0, 29.0, 25.0, 15.0, 11.0, 14.0, 8.0, 8.0, 5.0, 8.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-18.546875, -17.960205078125, -17.37353515625, -16.786865234375, -16.2001953125, -15.613525390625, -15.02685546875, -14.440185546875, -13.853515625, -13.266845703125, -12.68017578125, -12.093505859375, -11.5068359375, -10.920166015625, -10.33349609375, -9.746826171875, -9.16015625, -8.573486328125, -7.98681640625, -7.400146484375, -6.8134765625, -6.226806640625, -5.64013671875, -5.053466796875, -4.466796875, -3.880126953125, -3.29345703125, -2.706787109375, -2.1201171875, -1.533447265625, -0.94677734375, -0.360107421875, 0.2265625, 0.813232421875, 1.39990234375, 1.986572265625, 2.5732421875, 3.159912109375, 3.74658203125, 4.333251953125, 4.919921875, 5.506591796875, 6.09326171875, 6.679931640625, 7.2666015625, 7.853271484375, 8.43994140625, 9.026611328125, 9.61328125, 10.199951171875, 10.78662109375, 11.373291015625, 11.9599609375, 12.546630859375, 13.13330078125, 13.719970703125, 14.306640625, 14.893310546875, 15.47998046875, 16.066650390625, 16.6533203125, 17.239990234375, 17.82666015625, 18.413330078125, 19.0]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 4.0, 5.0, 6.0, 6.0, 2.0, 2.0, 3.0, 7.0, 13.0, 11.0, 14.0, 20.0, 30.0, 30.0, 25.0, 24.0, 27.0, 30.0, 42.0, 36.0, 47.0, 63.0, 89.0, 167.0, 1471.0, 330.0, 112.0, 63.0, 49.0, 40.0, 39.0, 36.0, 25.0, 38.0, 21.0, 23.0, 17.0, 17.0, 11.0, 15.0, 13.0, 9.0, 6.0, 7.0, 4.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0], "bins": [-22.265625, -21.564453125, -20.86328125, -20.162109375, -19.4609375, -18.759765625, -18.05859375, -17.357421875, -16.65625, -15.955078125, -15.25390625, -14.552734375, -13.8515625, -13.150390625, -12.44921875, -11.748046875, -11.046875, -10.345703125, -9.64453125, -8.943359375, -8.2421875, -7.541015625, -6.83984375, -6.138671875, -5.4375, -4.736328125, -4.03515625, -3.333984375, -2.6328125, -1.931640625, -1.23046875, -0.529296875, 0.171875, 0.873046875, 1.57421875, 2.275390625, 2.9765625, 3.677734375, 4.37890625, 5.080078125, 5.78125, 6.482421875, 7.18359375, 7.884765625, 8.5859375, 9.287109375, 9.98828125, 10.689453125, 11.390625, 12.091796875, 12.79296875, 13.494140625, 14.1953125, 14.896484375, 15.59765625, 16.298828125, 17.0, 17.701171875, 18.40234375, 19.103515625, 19.8046875, 20.505859375, 21.20703125, 21.908203125, 22.609375]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 5.0, 1.0, 1.0, 5.0, 1.0, 2.0, 3.0, 10.0, 6.0, 17.0, 19.0, 15.0, 17.0, 21.0, 39.0, 31.0, 55.0, 58.0, 90.0, 108.0, 218.0, 387.0, 1007.0, 4757.0, 287507.0, 2836732.0, 11690.0, 1527.0, 508.0, 257.0, 144.0, 98.0, 68.0, 57.0, 56.0, 47.0, 35.0, 26.0, 16.0, 21.0, 12.0, 6.0, 8.0, 3.0, 6.0, 8.0, 2.0, 6.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-63.6875, -61.6435546875, -59.599609375, -57.5556640625, -55.51171875, -53.4677734375, -51.423828125, -49.3798828125, -47.3359375, -45.2919921875, -43.248046875, -41.2041015625, -39.16015625, -37.1162109375, -35.072265625, -33.0283203125, -30.984375, -28.9404296875, -26.896484375, -24.8525390625, -22.80859375, -20.7646484375, -18.720703125, -16.6767578125, -14.6328125, -12.5888671875, -10.544921875, -8.5009765625, -6.45703125, -4.4130859375, -2.369140625, -0.3251953125, 1.71875, 3.7626953125, 5.806640625, 7.8505859375, 9.89453125, 11.9384765625, 13.982421875, 16.0263671875, 18.0703125, 20.1142578125, 22.158203125, 24.2021484375, 26.24609375, 28.2900390625, 30.333984375, 32.3779296875, 34.421875, 36.4658203125, 38.509765625, 40.5537109375, 42.59765625, 44.6416015625, 46.685546875, 48.7294921875, 50.7734375, 52.8173828125, 54.861328125, 56.9052734375, 58.94921875, 60.9931640625, 63.037109375, 65.0810546875, 67.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 10.0, 123.0, 336.0, 375.0, 139.0, 26.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.370925903320312, -17.832563400268555, -14.29420280456543, -10.755840301513672, -7.2174787521362305, -3.679117202758789, -0.14075469970703125, 3.3976058959960938, 6.935968399047852, 10.474329948425293, 14.012691497802734, 17.551054000854492, 21.08941650390625, 24.627777099609375, 28.166139602661133, 31.704500198364258, 35.242862701416016, 38.78122329711914, 42.31958770751953, 45.857948303222656, 49.39630889892578, 52.934669494628906, 56.4730339050293, 60.01139450073242, 63.54975891113281, 67.08811950683594, 70.62648010253906, 74.16484069824219, 77.70320892333984, 81.24156951904297, 84.7799301147461, 88.31829071044922, 91.85665130615234, 95.39501190185547, 98.9333724975586, 102.47174072265625, 106.01010131835938, 109.5484619140625, 113.08682250976562, 116.62518310546875, 120.16354370117188, 123.701904296875, 127.24026489257812, 130.77862548828125, 134.31698608398438, 137.8553466796875, 141.39370727539062, 144.93206787109375, 148.47044372558594, 152.00880432128906, 155.5471649169922, 159.0855255126953, 162.62388610839844, 166.16224670410156, 169.70062255859375, 173.23898315429688, 176.77732849121094, 180.31568908691406, 183.8540496826172, 187.3924102783203, 190.93077087402344, 194.46913146972656, 198.00750732421875, 201.54586791992188, 205.084228515625]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 5.0, 5.0, 3.0, 2.0, 6.0, 13.0, 8.0, 14.0, 12.0, 13.0, 12.0, 17.0, 22.0, 14.0, 22.0, 25.0, 40.0, 37.0, 21.0, 26.0, 21.0, 39.0, 38.0, 40.0, 39.0, 35.0, 47.0, 47.0, 32.0, 33.0, 26.0, 34.0, 31.0, 36.0, 26.0, 31.0, 23.0, 10.0, 19.0, 15.0, 12.0, 13.0, 10.0, 7.0, 7.0, 4.0, 5.0, 4.0, 6.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-61.70460510253906, -59.826332092285156, -57.948062896728516, -56.06978988647461, -54.1915168762207, -52.31324768066406, -50.434974670410156, -48.55670166015625, -46.678428649902344, -44.80015563964844, -42.9218864440918, -41.04361343383789, -39.165340423583984, -37.287071228027344, -35.40879821777344, -33.53052520751953, -31.65225601196289, -29.773984909057617, -27.89571189880371, -26.017440795898438, -24.13916778564453, -22.260896682739258, -20.382625579833984, -18.504352569580078, -16.626081466674805, -14.747809410095215, -12.869537353515625, -10.991266250610352, -9.112994194030762, -7.234722137451172, -5.356451034545898, -3.4781789779663086, -1.5999031066894531, 0.2783687114715576, 2.1566405296325684, 4.034912109375, 5.91318416595459, 7.79145622253418, 9.669727325439453, 11.547999382019043, 13.426271438598633, 15.304543495178223, 17.182815551757812, 19.061086654663086, 20.93935775756836, 22.817630767822266, 24.69590187072754, 26.574172973632812, 28.45244598388672, 30.330717086791992, 32.208988189697266, 34.08726119995117, 35.96553421020508, 37.84380340576172, 39.722076416015625, 41.60034942626953, 43.47862243652344, 45.356895446777344, 47.235164642333984, 49.11343765258789, 50.9917106628418, 52.86997985839844, 54.748252868652344, 56.62652587890625, 58.50479507446289]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 6.0, 7.0, 4.0, 3.0, 6.0, 10.0, 11.0, 10.0, 13.0, 13.0, 22.0, 19.0, 26.0, 30.0, 26.0, 25.0, 25.0, 27.0, 31.0, 31.0, 41.0, 52.0, 40.0, 36.0, 46.0, 40.0, 38.0, 39.0, 40.0, 40.0, 36.0, 19.0, 23.0, 20.0, 14.0, 27.0, 19.0, 15.0, 15.0, 10.0, 11.0, 5.0, 8.0, 7.0, 3.0, 7.0, 0.0, 1.0, 4.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-8.15625, -7.9111328125, -7.666015625, -7.4208984375, -7.17578125, -6.9306640625, -6.685546875, -6.4404296875, -6.1953125, -5.9501953125, -5.705078125, -5.4599609375, -5.21484375, -4.9697265625, -4.724609375, -4.4794921875, -4.234375, -3.9892578125, -3.744140625, -3.4990234375, -3.25390625, -3.0087890625, -2.763671875, -2.5185546875, -2.2734375, -2.0283203125, -1.783203125, -1.5380859375, -1.29296875, -1.0478515625, -0.802734375, -0.5576171875, -0.3125, -0.0673828125, 0.177734375, 0.4228515625, 0.66796875, 0.9130859375, 1.158203125, 1.4033203125, 1.6484375, 1.8935546875, 2.138671875, 2.3837890625, 2.62890625, 2.8740234375, 3.119140625, 3.3642578125, 3.609375, 3.8544921875, 4.099609375, 4.3447265625, 4.58984375, 4.8349609375, 5.080078125, 5.3251953125, 5.5703125, 5.8154296875, 6.060546875, 6.3056640625, 6.55078125, 6.7958984375, 7.041015625, 7.2861328125, 7.53125]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 6.0, 6.0, 9.0, 9.0, 15.0, 18.0, 12.0, 24.0, 27.0, 38.0, 43.0, 59.0, 77.0, 108.0, 147.0, 201.0, 326.0, 588.0, 1486.0, 5421.0, 28072.0, 199086.0, 1166425.0, 2041192.0, 635678.0, 95540.0, 14250.0, 3019.0, 977.0, 441.0, 272.0, 185.0, 123.0, 93.0, 57.0, 48.0, 49.0, 43.0, 19.0, 25.0, 22.0, 9.0, 12.0, 10.0, 8.0, 4.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.9375, -13.506103515625, -13.07470703125, -12.643310546875, -12.2119140625, -11.780517578125, -11.34912109375, -10.917724609375, -10.486328125, -10.054931640625, -9.62353515625, -9.192138671875, -8.7607421875, -8.329345703125, -7.89794921875, -7.466552734375, -7.03515625, -6.603759765625, -6.17236328125, -5.740966796875, -5.3095703125, -4.878173828125, -4.44677734375, -4.015380859375, -3.583984375, -3.152587890625, -2.72119140625, -2.289794921875, -1.8583984375, -1.427001953125, -0.99560546875, -0.564208984375, -0.1328125, 0.298583984375, 0.72998046875, 1.161376953125, 1.5927734375, 2.024169921875, 2.45556640625, 2.886962890625, 3.318359375, 3.749755859375, 4.18115234375, 4.612548828125, 5.0439453125, 5.475341796875, 5.90673828125, 6.338134765625, 6.76953125, 7.200927734375, 7.63232421875, 8.063720703125, 8.4951171875, 8.926513671875, 9.35791015625, 9.789306640625, 10.220703125, 10.652099609375, 11.08349609375, 11.514892578125, 11.9462890625, 12.377685546875, 12.80908203125, 13.240478515625, 13.671875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 7.0, 1.0, 4.0, 2.0, 6.0, 6.0, 5.0, 12.0, 6.0, 12.0, 29.0, 33.0, 38.0, 56.0, 75.0, 102.0, 122.0, 176.0, 201.0, 283.0, 324.0, 379.0, 396.0, 379.0, 329.0, 261.0, 201.0, 164.0, 121.0, 79.0, 65.0, 49.0, 27.0, 31.0, 12.0, 17.0, 14.0, 11.0, 9.0, 15.0, 5.0, 3.0, 6.0, 4.0, 2.0, 4.0, 2.0, 2.0, 2.0], "bins": [-13.90625, -13.5343017578125, -13.162353515625, -12.7904052734375, -12.41845703125, -12.0465087890625, -11.674560546875, -11.3026123046875, -10.9306640625, -10.5587158203125, -10.186767578125, -9.8148193359375, -9.44287109375, -9.0709228515625, -8.698974609375, -8.3270263671875, -7.955078125, -7.5831298828125, -7.211181640625, -6.8392333984375, -6.46728515625, -6.0953369140625, -5.723388671875, -5.3514404296875, -4.9794921875, -4.6075439453125, -4.235595703125, -3.8636474609375, -3.49169921875, -3.1197509765625, -2.747802734375, -2.3758544921875, -2.00390625, -1.6319580078125, -1.260009765625, -0.8880615234375, -0.51611328125, -0.1441650390625, 0.227783203125, 0.5997314453125, 0.9716796875, 1.3436279296875, 1.715576171875, 2.0875244140625, 2.45947265625, 2.8314208984375, 3.203369140625, 3.5753173828125, 3.947265625, 4.3192138671875, 4.691162109375, 5.0631103515625, 5.43505859375, 5.8070068359375, 6.178955078125, 6.5509033203125, 6.9228515625, 7.2947998046875, 7.666748046875, 8.0386962890625, 8.41064453125, 8.7825927734375, 9.154541015625, 9.5264892578125, 9.8984375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 3.0, 6.0, 1.0, 3.0, 3.0, 2.0, 4.0, 8.0, 6.0, 14.0, 12.0, 24.0, 20.0, 28.0, 35.0, 46.0, 57.0, 96.0, 129.0, 236.0, 319.0, 488.0, 863.0, 2073.0, 7647.0, 61020.0, 1181013.0, 2760357.0, 158765.0, 15047.0, 3119.0, 1146.0, 561.0, 349.0, 204.0, 184.0, 126.0, 68.0, 52.0, 44.0, 24.0, 20.0, 17.0, 19.0, 12.0, 9.0, 6.0, 2.0, 7.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-25.296875, -24.399169921875, -23.50146484375, -22.603759765625, -21.7060546875, -20.808349609375, -19.91064453125, -19.012939453125, -18.115234375, -17.217529296875, -16.31982421875, -15.422119140625, -14.5244140625, -13.626708984375, -12.72900390625, -11.831298828125, -10.93359375, -10.035888671875, -9.13818359375, -8.240478515625, -7.3427734375, -6.445068359375, -5.54736328125, -4.649658203125, -3.751953125, -2.854248046875, -1.95654296875, -1.058837890625, -0.1611328125, 0.736572265625, 1.63427734375, 2.531982421875, 3.4296875, 4.327392578125, 5.22509765625, 6.122802734375, 7.0205078125, 7.918212890625, 8.81591796875, 9.713623046875, 10.611328125, 11.509033203125, 12.40673828125, 13.304443359375, 14.2021484375, 15.099853515625, 15.99755859375, 16.895263671875, 17.79296875, 18.690673828125, 19.58837890625, 20.486083984375, 21.3837890625, 22.281494140625, 23.17919921875, 24.076904296875, 24.974609375, 25.872314453125, 26.77001953125, 27.667724609375, 28.5654296875, 29.463134765625, 30.36083984375, 31.258544921875, 32.15625]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 24.0, 431.0, 517.0, 42.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-195.6342010498047, -184.1607208251953, -172.68724060058594, -161.21376037597656, -149.7402801513672, -138.2667999267578, -126.79332733154297, -115.3198471069336, -103.84636688232422, -92.37288665771484, -80.89940643310547, -69.42593383789062, -57.952449798583984, -46.47896957397461, -35.0054931640625, -23.532012939453125, -12.05853271484375, -0.5850534439086914, 10.888425827026367, 22.36190414428711, 33.835384368896484, 45.30886459350586, 56.78234100341797, 68.25582122802734, 79.72930145263672, 91.2027816772461, 102.67626190185547, 114.14973449707031, 125.62321472167969, 137.09669494628906, 148.57017517089844, 160.0436553955078, 171.51712036132812, 182.9906005859375, 194.46408081054688, 205.93756103515625, 217.41104125976562, 228.884521484375, 240.35800170898438, 251.83148193359375, 263.3049621582031, 274.7784423828125, 286.2519226074219, 297.72540283203125, 309.1988830566406, 320.67236328125, 332.1458435058594, 343.61932373046875, 355.0927734375, 366.5662536621094, 378.03973388671875, 389.5132141113281, 400.9866943359375, 412.4601745605469, 423.93365478515625, 435.4071350097656, 446.880615234375, 458.3540954589844, 469.82757568359375, 481.3010559082031, 492.7745361328125, 504.2480163574219, 515.7214965820312, 527.1949462890625, 538.66845703125]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 8.0, 9.0, 4.0, 11.0, 10.0, 11.0, 15.0, 18.0, 22.0, 22.0, 21.0, 38.0, 31.0, 25.0, 31.0, 40.0, 32.0, 45.0, 37.0, 32.0, 50.0, 52.0, 45.0, 48.0, 35.0, 40.0, 24.0, 23.0, 26.0, 23.0, 21.0, 25.0, 24.0, 18.0, 13.0, 16.0, 5.0, 9.0, 13.0, 6.0, 6.0, 9.0, 2.0, 2.0, 4.0, 5.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-44.03077697753906, -42.614967346191406, -41.199153900146484, -39.78334426879883, -38.367530822753906, -36.95172119140625, -35.535911560058594, -34.12009811401367, -32.704288482666016, -31.288476943969727, -29.872665405273438, -28.45685577392578, -27.041044235229492, -25.625232696533203, -24.209421157836914, -22.793609619140625, -21.377798080444336, -19.961986541748047, -18.546175003051758, -17.13036346435547, -15.714553833007812, -14.298742294311523, -12.882930755615234, -11.467120170593262, -10.051308631896973, -8.635497093200684, -7.219686508178711, -5.803874969482422, -4.388063907623291, -2.97225284576416, -1.556441307067871, -0.14063072204589844, 1.2751808166503906, 2.6909918785095215, 4.106802940368652, 5.522614479064941, 6.938425540924072, 8.354236602783203, 9.770048141479492, 11.185858726501465, 12.601670265197754, 14.017481803894043, 15.433292388916016, 16.849103927612305, 18.264915466308594, 19.68072509765625, 21.096538543701172, 22.512348175048828, 23.928159713745117, 25.343971252441406, 26.759782791137695, 28.175594329833984, 29.59140396118164, 31.00721549987793, 32.42302703857422, 33.838836669921875, 35.2546501159668, 36.67045974731445, 38.086273193359375, 39.50208282470703, 40.91789627075195, 42.33370590209961, 43.74951934814453, 45.16532897949219, 46.581138610839844]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 5.0, 11.0, 5.0, 11.0, 7.0, 8.0, 10.0, 20.0, 14.0, 17.0, 15.0, 21.0, 15.0, 33.0, 28.0, 32.0, 31.0, 40.0, 31.0, 34.0, 41.0, 34.0, 43.0, 56.0, 32.0, 51.0, 38.0, 32.0, 39.0, 37.0, 24.0, 24.0, 21.0, 19.0, 18.0, 21.0, 16.0, 13.0, 7.0, 9.0, 4.0, 11.0, 11.0, 8.0, 2.0, 3.0, 0.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9569091796875, -7.695068359375, -7.4332275390625, -7.17138671875, -6.9095458984375, -6.647705078125, -6.3858642578125, -6.1240234375, -5.8621826171875, -5.600341796875, -5.3385009765625, -5.07666015625, -4.8148193359375, -4.552978515625, -4.2911376953125, -4.029296875, -3.7674560546875, -3.505615234375, -3.2437744140625, -2.98193359375, -2.7200927734375, -2.458251953125, -2.1964111328125, -1.9345703125, -1.6727294921875, -1.410888671875, -1.1490478515625, -0.88720703125, -0.6253662109375, -0.363525390625, -0.1016845703125, 0.16015625, 0.4219970703125, 0.683837890625, 0.9456787109375, 1.20751953125, 1.4693603515625, 1.731201171875, 1.9930419921875, 2.2548828125, 2.5167236328125, 2.778564453125, 3.0404052734375, 3.30224609375, 3.5640869140625, 3.825927734375, 4.0877685546875, 4.349609375, 4.6114501953125, 4.873291015625, 5.1351318359375, 5.39697265625, 5.6588134765625, 5.920654296875, 6.1824951171875, 6.4443359375, 6.7061767578125, 6.968017578125, 7.2298583984375, 7.49169921875, 7.7535400390625, 8.015380859375, 8.2772216796875, 8.5390625]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 6.0, 17.0, 11.0, 26.0, 27.0, 52.0, 67.0, 107.0, 147.0, 196.0, 293.0, 400.0, 587.0, 815.0, 1229.0, 1785.0, 2461.0, 3661.0, 5341.0, 7575.0, 11142.0, 16510.0, 24790.0, 36164.0, 54750.0, 83097.0, 122764.0, 164017.0, 159570.0, 115008.0, 78567.0, 51290.0, 33990.0, 23003.0, 15495.0, 10526.0, 7181.0, 4928.0, 3361.0, 2339.0, 1594.0, 1099.0, 708.0, 551.0, 397.0, 279.0, 193.0, 147.0, 84.0, 74.0, 43.0, 34.0, 25.0, 7.0, 14.0, 7.0, 9.0, 3.0, 3.0, 2.0], "bins": [-0.5625, -0.5448760986328125, -0.527252197265625, -0.5096282958984375, -0.49200439453125, -0.4743804931640625, -0.456756591796875, -0.4391326904296875, -0.4215087890625, -0.4038848876953125, -0.386260986328125, -0.3686370849609375, -0.35101318359375, -0.3333892822265625, -0.315765380859375, -0.2981414794921875, -0.280517578125, -0.2628936767578125, -0.245269775390625, -0.2276458740234375, -0.21002197265625, -0.1923980712890625, -0.174774169921875, -0.1571502685546875, -0.1395263671875, -0.1219024658203125, -0.104278564453125, -0.0866546630859375, -0.06903076171875, -0.0514068603515625, -0.033782958984375, -0.0161590576171875, 0.00146484375, 0.0190887451171875, 0.036712646484375, 0.0543365478515625, 0.07196044921875, 0.0895843505859375, 0.107208251953125, 0.1248321533203125, 0.1424560546875, 0.1600799560546875, 0.177703857421875, 0.1953277587890625, 0.21295166015625, 0.2305755615234375, 0.248199462890625, 0.2658233642578125, 0.283447265625, 0.3010711669921875, 0.318695068359375, 0.3363189697265625, 0.35394287109375, 0.3715667724609375, 0.389190673828125, 0.4068145751953125, 0.4244384765625, 0.4420623779296875, 0.459686279296875, 0.4773101806640625, 0.49493408203125, 0.5125579833984375, 0.530181884765625, 0.5478057861328125, 0.5654296875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.0, 2.0, 6.0, 4.0, 7.0, 6.0, 8.0, 14.0, 8.0, 18.0, 14.0, 17.0, 25.0, 18.0, 26.0, 18.0, 28.0, 26.0, 32.0, 34.0, 30.0, 53.0, 38.0, 51.0, 1056.0, 26.0, 51.0, 42.0, 43.0, 41.0, 47.0, 24.0, 23.0, 25.0, 27.0, 22.0, 15.0, 16.0, 24.0, 11.0, 11.0, 10.0, 5.0, 4.0, 12.0, 4.0, 3.0, 1.0, 4.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-5.6484375, -5.4747314453125, -5.301025390625, -5.1273193359375, -4.95361328125, -4.7799072265625, -4.606201171875, -4.4324951171875, -4.2587890625, -4.0850830078125, -3.911376953125, -3.7376708984375, -3.56396484375, -3.3902587890625, -3.216552734375, -3.0428466796875, -2.869140625, -2.6954345703125, -2.521728515625, -2.3480224609375, -2.17431640625, -2.0006103515625, -1.826904296875, -1.6531982421875, -1.4794921875, -1.3057861328125, -1.132080078125, -0.9583740234375, -0.78466796875, -0.6109619140625, -0.437255859375, -0.2635498046875, -0.08984375, 0.0838623046875, 0.257568359375, 0.4312744140625, 0.60498046875, 0.7786865234375, 0.952392578125, 1.1260986328125, 1.2998046875, 1.4735107421875, 1.647216796875, 1.8209228515625, 1.99462890625, 2.1683349609375, 2.342041015625, 2.5157470703125, 2.689453125, 2.8631591796875, 3.036865234375, 3.2105712890625, 3.38427734375, 3.5579833984375, 3.731689453125, 3.9053955078125, 4.0791015625, 4.2528076171875, 4.426513671875, 4.6002197265625, 4.77392578125, 4.9476318359375, 5.121337890625, 5.2950439453125, 5.46875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 2.0, 13.0, 5.0, 15.0, 28.0, 43.0, 73.0, 94.0, 169.0, 251.0, 376.0, 583.0, 857.0, 1381.0, 2198.0, 3439.0, 5049.0, 7791.0, 11650.0, 17835.0, 27090.0, 41204.0, 62391.0, 93849.0, 135115.0, 1214260.0, 150141.0, 108094.0, 72697.0, 47992.0, 31768.0, 20714.0, 14028.0, 8940.0, 6004.0, 3867.0, 2552.0, 1646.0, 1016.0, 721.0, 411.0, 277.0, 196.0, 122.0, 78.0, 43.0, 25.0, 14.0, 13.0, 3.0, 9.0, 3.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.56591796875, -0.5491180419921875, -0.532318115234375, -0.5155181884765625, -0.49871826171875, -0.4819183349609375, -0.465118408203125, -0.4483184814453125, -0.4315185546875, -0.4147186279296875, -0.397918701171875, -0.3811187744140625, -0.36431884765625, -0.3475189208984375, -0.330718994140625, -0.3139190673828125, -0.297119140625, -0.2803192138671875, -0.263519287109375, -0.2467193603515625, -0.22991943359375, -0.2131195068359375, -0.196319580078125, -0.1795196533203125, -0.1627197265625, -0.1459197998046875, -0.129119873046875, -0.1123199462890625, -0.09552001953125, -0.0787200927734375, -0.061920166015625, -0.0451202392578125, -0.0283203125, -0.0115203857421875, 0.005279541015625, 0.0220794677734375, 0.03887939453125, 0.0556793212890625, 0.072479248046875, 0.0892791748046875, 0.1060791015625, 0.1228790283203125, 0.139678955078125, 0.1564788818359375, 0.17327880859375, 0.1900787353515625, 0.206878662109375, 0.2236785888671875, 0.240478515625, 0.2572784423828125, 0.274078369140625, 0.2908782958984375, 0.30767822265625, 0.3244781494140625, 0.341278076171875, 0.3580780029296875, 0.3748779296875, 0.3916778564453125, 0.408477783203125, 0.4252777099609375, 0.44207763671875, 0.4588775634765625, 0.475677490234375, 0.4924774169921875, 0.50927734375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 9.0, 3.0, 9.0, 17.0, 14.0, 24.0, 27.0, 36.0, 41.0, 49.0, 62.0, 65.0, 74.0, 73.0, 69.0, 67.0, 65.0, 65.0, 49.0, 40.0, 31.0, 25.0, 13.0, 19.0, 11.0, 8.0, 7.0, 6.0, 7.0, 7.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0025463104248046875, -0.0024728775024414062, -0.002399444580078125, -0.0023260116577148438, -0.0022525787353515625, -0.0021791458129882812, -0.002105712890625, -0.0020322799682617188, -0.0019588470458984375, -0.0018854141235351562, -0.001811981201171875, -0.0017385482788085938, -0.0016651153564453125, -0.0015916824340820312, -0.00151824951171875, -0.0014448165893554688, -0.0013713836669921875, -0.0012979507446289062, -0.001224517822265625, -0.0011510848999023438, -0.0010776519775390625, -0.0010042190551757812, -0.0009307861328125, -0.0008573532104492188, -0.0007839202880859375, -0.0007104873657226562, -0.000637054443359375, -0.0005636215209960938, -0.0004901885986328125, -0.00041675567626953125, -0.00034332275390625, -0.00026988983154296875, -0.0001964569091796875, -0.00012302398681640625, -4.9591064453125e-05, 2.384185791015625e-05, 9.72747802734375e-05, 0.00017070770263671875, 0.000244140625, 0.00031757354736328125, 0.0003910064697265625, 0.00046443939208984375, 0.000537872314453125, 0.0006113052368164062, 0.0006847381591796875, 0.0007581710815429688, 0.00083160400390625, 0.0009050369262695312, 0.0009784698486328125, 0.0010519027709960938, 0.001125335693359375, 0.0011987686157226562, 0.0012722015380859375, 0.0013456344604492188, 0.0014190673828125, 0.0014925003051757812, 0.0015659332275390625, 0.0016393661499023438, 0.001712799072265625, 0.0017862319946289062, 0.0018596649169921875, 0.0019330978393554688, 0.00200653076171875, 0.0020799636840820312, 0.0021533966064453125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 6.0, 12.0, 15.0, 14.0, 16.0, 21.0, 42.0, 37.0, 55.0, 74.0, 123.0, 153.0, 325.0, 527.0, 1259.0, 524889.0, 518363.0, 1244.0, 527.0, 276.0, 156.0, 120.0, 84.0, 55.0, 41.0, 34.0, 17.0, 14.0, 13.0, 12.0, 7.0, 2.0, 5.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.042083740234375, -0.040680885314941406, -0.03927803039550781, -0.03787517547607422, -0.036472320556640625, -0.03506946563720703, -0.03366661071777344, -0.032263755798339844, -0.03086090087890625, -0.029458045959472656, -0.028055191040039062, -0.02665233612060547, -0.025249481201171875, -0.02384662628173828, -0.022443771362304688, -0.021040916442871094, -0.0196380615234375, -0.018235206604003906, -0.016832351684570312, -0.015429496765136719, -0.014026641845703125, -0.012623786926269531, -0.011220932006835938, -0.009818077087402344, -0.00841522216796875, -0.007012367248535156, -0.0056095123291015625, -0.004206657409667969, -0.002803802490234375, -0.0014009475708007812, 1.9073486328125e-06, 0.0014047622680664062, 0.0028076171875, 0.004210472106933594, 0.0056133270263671875, 0.007016181945800781, 0.008419036865234375, 0.009821891784667969, 0.011224746704101562, 0.012627601623535156, 0.01403045654296875, 0.015433311462402344, 0.016836166381835938, 0.01823902130126953, 0.019641876220703125, 0.02104473114013672, 0.022447586059570312, 0.023850440979003906, 0.0252532958984375, 0.026656150817871094, 0.028059005737304688, 0.02946186065673828, 0.030864715576171875, 0.03226757049560547, 0.03367042541503906, 0.035073280334472656, 0.03647613525390625, 0.037878990173339844, 0.03928184509277344, 0.04068470001220703, 0.042087554931640625, 0.04349040985107422, 0.04489326477050781, 0.046296119689941406, 0.047698974609375]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 19.0, 523.0, 454.0, 20.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.012449084781110287, -0.012172176502645016, -0.01189526915550232, -0.011618360877037048, -0.011341452598571777, -0.011064544320106506, -0.01078763697296381, -0.010510728694498539, -0.010233820416033268, -0.009956912137567997, -0.0096800047904253, -0.00940309651196003, -0.009126188233494759, -0.008849279955029488, -0.008572372607886791, -0.00829546432942152, -0.008018556982278824, -0.00774164916947484, -0.007464740891009569, -0.0071878330782055855, -0.0069109247997403145, -0.006634016986936331, -0.006357109174132347, -0.006080200895667076, -0.005803292617201805, -0.005526384804397821, -0.00524947652593255, -0.004972568713128567, -0.004695660434663296, -0.004418752621859312, -0.004141844809055328, -0.0038649365305900574, -0.0035880282521247864, -0.003311120206490159, -0.0030342121608555317, -0.002757304348051548, -0.002480396069586277, -0.0022034882567822933, -0.001926580211147666, -0.0016496721655130386, -0.0013727641198784113, -0.001095856074243784, -0.0008189480868168175, -0.0005420400993898511, -0.00026513205375522375, 1.1775991879403591e-05, 0.0002886839210987091, 0.0005655919667333364, 0.0008425000123679638, 0.0011194080580025911, 0.0013963161036372185, 0.001673224032856524, 0.0019501320784911513, 0.002227040007710457, 0.002503948053345084, 0.0027808560989797115, 0.003057764144614339, 0.0033346721902489662, 0.0036115802358835936, 0.003888488281518221, 0.004165396094322205, 0.004442304372787476, 0.004719212185591459, 0.004996119998395443, 0.005273028276860714]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 6.0, 5.0, 9.0, 3.0, 7.0, 5.0, 19.0, 14.0, 17.0, 15.0, 21.0, 24.0, 21.0, 28.0, 27.0, 32.0, 33.0, 38.0, 40.0, 47.0, 33.0, 48.0, 38.0, 29.0, 47.0, 37.0, 27.0, 36.0, 30.0, 39.0, 29.0, 23.0, 23.0, 22.0, 21.0, 23.0, 20.0, 13.0, 9.0, 10.0, 8.0, 9.0, 8.0, 4.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0008704066276550293, -0.0008399803191423416, -0.0008095540106296539, -0.0007791277021169662, -0.0007487013936042786, -0.0007182750850915909, -0.0006878487765789032, -0.0006574224680662155, -0.0006269961595535278, -0.0005965698510408401, -0.0005661435425281525, -0.0005357172340154648, -0.0005052909255027771, -0.0004748646169900894, -0.00044443830847740173, -0.00041401199996471405, -0.00038358569145202637, -0.0003531593829393387, -0.000322733074426651, -0.0002923067659139633, -0.00026188045740127563, -0.00023145414888858795, -0.00020102784037590027, -0.00017060153186321259, -0.0001401752233505249, -0.00010974891483783722, -7.932260632514954e-05, -4.889629781246185e-05, -1.846998929977417e-05, 1.1956319212913513e-05, 4.2382627725601196e-05, 7.280893623828888e-05, 0.00010323524475097656, 0.00013366155326366425, 0.00016408786177635193, 0.0001945141702890396, 0.0002249404788017273, 0.000255366787314415, 0.00028579309582710266, 0.00031621940433979034, 0.00034664571285247803, 0.0003770720213651657, 0.0004074983298778534, 0.0004379246383905411, 0.00046835094690322876, 0.0004987772554159164, 0.0005292035639286041, 0.0005596298724412918, 0.0005900561809539795, 0.0006204824894666672, 0.0006509087979793549, 0.0006813351064920425, 0.0007117614150047302, 0.0007421877235174179, 0.0007726140320301056, 0.0008030403405427933, 0.000833466649055481, 0.0008638929575681686, 0.0008943192660808563, 0.000924745574593544, 0.0009551718831062317, 0.0009855981916189194, 0.001016024500131607, 0.0010464508086442947, 0.0010768771171569824]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 5.0, 11.0, 5.0, 11.0, 7.0, 8.0, 10.0, 20.0, 14.0, 17.0, 15.0, 21.0, 15.0, 33.0, 28.0, 32.0, 31.0, 40.0, 31.0, 34.0, 41.0, 34.0, 43.0, 56.0, 32.0, 51.0, 38.0, 32.0, 39.0, 37.0, 24.0, 24.0, 21.0, 19.0, 18.0, 21.0, 16.0, 13.0, 7.0, 9.0, 4.0, 11.0, 11.0, 8.0, 2.0, 3.0, 0.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.21875, -7.9569091796875, -7.695068359375, -7.4332275390625, -7.17138671875, -6.9095458984375, -6.647705078125, -6.3858642578125, -6.1240234375, -5.8621826171875, -5.600341796875, -5.3385009765625, -5.07666015625, -4.8148193359375, -4.552978515625, -4.2911376953125, -4.029296875, -3.7674560546875, -3.505615234375, -3.2437744140625, -2.98193359375, -2.7200927734375, -2.458251953125, -2.1964111328125, -1.9345703125, -1.6727294921875, -1.410888671875, -1.1490478515625, -0.88720703125, -0.6253662109375, -0.363525390625, -0.1016845703125, 0.16015625, 0.4219970703125, 0.683837890625, 0.9456787109375, 1.20751953125, 1.4693603515625, 1.731201171875, 1.9930419921875, 2.2548828125, 2.5167236328125, 2.778564453125, 3.0404052734375, 3.30224609375, 3.5640869140625, 3.825927734375, 4.0877685546875, 4.349609375, 4.6114501953125, 4.873291015625, 5.1351318359375, 5.39697265625, 5.6588134765625, 5.920654296875, 6.1824951171875, 6.4443359375, 6.7061767578125, 6.968017578125, 7.2298583984375, 7.49169921875, 7.7535400390625, 8.015380859375, 8.2772216796875, 8.5390625]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 7.0, 3.0, 3.0, 3.0, 9.0, 13.0, 22.0, 17.0, 21.0, 30.0, 39.0, 53.0, 78.0, 99.0, 114.0, 156.0, 219.0, 374.0, 556.0, 1022.0, 1895.0, 3654.0, 8033.0, 18170.0, 44583.0, 110293.0, 289500.0, 342203.0, 133107.0, 53510.0, 22070.0, 9090.0, 4304.0, 2187.0, 1121.0, 662.0, 370.0, 256.0, 187.0, 139.0, 115.0, 63.0, 50.0, 45.0, 40.0, 23.0, 16.0, 12.0, 9.0, 13.0, 4.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.2734375, -9.943115234375, -9.61279296875, -9.282470703125, -8.9521484375, -8.621826171875, -8.29150390625, -7.961181640625, -7.630859375, -7.300537109375, -6.97021484375, -6.639892578125, -6.3095703125, -5.979248046875, -5.64892578125, -5.318603515625, -4.98828125, -4.657958984375, -4.32763671875, -3.997314453125, -3.6669921875, -3.336669921875, -3.00634765625, -2.676025390625, -2.345703125, -2.015380859375, -1.68505859375, -1.354736328125, -1.0244140625, -0.694091796875, -0.36376953125, -0.033447265625, 0.296875, 0.627197265625, 0.95751953125, 1.287841796875, 1.6181640625, 1.948486328125, 2.27880859375, 2.609130859375, 2.939453125, 3.269775390625, 3.60009765625, 3.930419921875, 4.2607421875, 4.591064453125, 4.92138671875, 5.251708984375, 5.58203125, 5.912353515625, 6.24267578125, 6.572998046875, 6.9033203125, 7.233642578125, 7.56396484375, 7.894287109375, 8.224609375, 8.554931640625, 8.88525390625, 9.215576171875, 9.5458984375, 9.876220703125, 10.20654296875, 10.536865234375, 10.8671875]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 6.0, 3.0, 5.0, 3.0, 6.0, 8.0, 7.0, 9.0, 12.0, 11.0, 8.0, 19.0, 11.0, 25.0, 29.0, 37.0, 30.0, 40.0, 40.0, 64.0, 80.0, 112.0, 181.0, 1427.0, 252.0, 126.0, 87.0, 56.0, 38.0, 55.0, 32.0, 27.0, 20.0, 33.0, 19.0, 21.0, 20.0, 21.0, 19.0, 6.0, 10.0, 4.0, 11.0, 2.0, 6.0, 3.0, 5.0, 6.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.703125, -22.931640625, -22.16015625, -21.388671875, -20.6171875, -19.845703125, -19.07421875, -18.302734375, -17.53125, -16.759765625, -15.98828125, -15.216796875, -14.4453125, -13.673828125, -12.90234375, -12.130859375, -11.359375, -10.587890625, -9.81640625, -9.044921875, -8.2734375, -7.501953125, -6.73046875, -5.958984375, -5.1875, -4.416015625, -3.64453125, -2.873046875, -2.1015625, -1.330078125, -0.55859375, 0.212890625, 0.984375, 1.755859375, 2.52734375, 3.298828125, 4.0703125, 4.841796875, 5.61328125, 6.384765625, 7.15625, 7.927734375, 8.69921875, 9.470703125, 10.2421875, 11.013671875, 11.78515625, 12.556640625, 13.328125, 14.099609375, 14.87109375, 15.642578125, 16.4140625, 17.185546875, 17.95703125, 18.728515625, 19.5, 20.271484375, 21.04296875, 21.814453125, 22.5859375, 23.357421875, 24.12890625, 24.900390625, 25.671875]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 4.0, 3.0, 4.0, 3.0, 9.0, 7.0, 13.0, 11.0, 20.0, 19.0, 28.0, 41.0, 62.0, 75.0, 141.0, 195.0, 308.0, 482.0, 1052.0, 7838.0, 466865.0, 2641512.0, 23829.0, 1635.0, 537.0, 333.0, 208.0, 127.0, 100.0, 67.0, 47.0, 39.0, 29.0, 15.0, 9.0, 13.0, 8.0, 5.0, 2.0, 3.0, 3.0, 4.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-60.59375, -58.81982421875, -57.0458984375, -55.27197265625, -53.498046875, -51.72412109375, -49.9501953125, -48.17626953125, -46.40234375, -44.62841796875, -42.8544921875, -41.08056640625, -39.306640625, -37.53271484375, -35.7587890625, -33.98486328125, -32.2109375, -30.43701171875, -28.6630859375, -26.88916015625, -25.115234375, -23.34130859375, -21.5673828125, -19.79345703125, -18.01953125, -16.24560546875, -14.4716796875, -12.69775390625, -10.923828125, -9.14990234375, -7.3759765625, -5.60205078125, -3.828125, -2.05419921875, -0.2802734375, 1.49365234375, 3.267578125, 5.04150390625, 6.8154296875, 8.58935546875, 10.36328125, 12.13720703125, 13.9111328125, 15.68505859375, 17.458984375, 19.23291015625, 21.0068359375, 22.78076171875, 24.5546875, 26.32861328125, 28.1025390625, 29.87646484375, 31.650390625, 33.42431640625, 35.1982421875, 36.97216796875, 38.74609375, 40.52001953125, 42.2939453125, 44.06787109375, 45.841796875, 47.61572265625, 49.3896484375, 51.16357421875, 52.9375]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [20.0, 179.0, 452.0, 308.0, 48.0, 7.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.707290649414062, -8.592103958129883, -3.476917266845703, 1.6382694244384766, 6.753456115722656, 11.868642807006836, 16.983829498291016, 22.099014282226562, 27.214202880859375, 32.32939147949219, 37.444576263427734, 42.55976104736328, 47.674949645996094, 52.790138244628906, 57.90532302856445, 63.0205078125, 68.13569641113281, 73.25088500976562, 78.36607360839844, 83.48125457763672, 88.59644317626953, 93.71163177490234, 98.82681274414062, 103.94200134277344, 109.05718994140625, 114.17237854003906, 119.28756713867188, 124.40274810791016, 129.5179443359375, 134.63311767578125, 139.74830627441406, 144.86349487304688, 149.97869873046875, 155.09388732910156, 160.20907592773438, 165.3242645263672, 170.439453125, 175.55462646484375, 180.66981506347656, 185.78500366210938, 190.9001922607422, 196.015380859375, 201.1305694580078, 206.24575805664062, 211.36093139648438, 216.4761199951172, 221.59130859375, 226.7064971923828, 231.82168579101562, 236.93687438964844, 242.05206298828125, 247.16725158691406, 252.28244018554688, 257.3976135253906, 262.5128173828125, 267.62799072265625, 272.7431640625, 277.85833740234375, 282.9735412597656, 288.0887145996094, 293.20391845703125, 298.319091796875, 303.4342956542969, 308.5494689941406, 313.6646728515625]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 2.0, 4.0, 8.0, 9.0, 5.0, 8.0, 19.0, 12.0, 26.0, 11.0, 19.0, 24.0, 20.0, 38.0, 24.0, 24.0, 32.0, 37.0, 39.0, 44.0, 56.0, 35.0, 39.0, 39.0, 54.0, 42.0, 40.0, 35.0, 30.0, 36.0, 28.0, 34.0, 18.0, 20.0, 15.0, 10.0, 9.0, 13.0, 13.0, 13.0, 5.0, 6.0, 4.0, 1.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.21308135986328, -58.29692459106445, -56.380767822265625, -54.46461486816406, -52.548458099365234, -50.632301330566406, -48.71614456176758, -46.79998779296875, -44.88383483886719, -42.96767807006836, -41.05152130126953, -39.13536834716797, -37.21921157836914, -35.30305480957031, -33.386898040771484, -31.47074317932129, -29.55458641052246, -27.638429641723633, -25.722274780273438, -23.80611801147461, -21.889963150024414, -19.973806381225586, -18.05765151977539, -16.141494750976562, -14.22533893585205, -12.309183120727539, -10.393027305603027, -8.476871490478516, -6.560715198516846, -4.644558906555176, -2.728403091430664, -0.8122472763061523, 1.1039085388183594, 3.020064353942871, 4.936220169067383, 6.852376461029053, 8.768531799316406, 10.684688568115234, 12.600844383239746, 14.517000198364258, 16.433155059814453, 18.34931182861328, 20.265466690063477, 22.181623458862305, 24.0977783203125, 26.013935089111328, 27.930091857910156, 29.84624671936035, 31.76240348815918, 33.678558349609375, 35.5947151184082, 37.51087188720703, 39.42702865600586, 41.34318542480469, 43.25933837890625, 45.17549514770508, 47.091651916503906, 49.007808685302734, 50.92396545410156, 52.840118408203125, 54.75627517700195, 56.67243194580078, 58.58858871459961, 60.50474548339844, 62.4208984375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 8.0, 1.0, 9.0, 4.0, 5.0, 9.0, 16.0, 9.0, 10.0, 27.0, 14.0, 18.0, 27.0, 25.0, 23.0, 29.0, 37.0, 33.0, 41.0, 51.0, 32.0, 48.0, 43.0, 41.0, 31.0, 52.0, 47.0, 23.0, 38.0, 42.0, 31.0, 24.0, 35.0, 21.0, 18.0, 18.0, 10.0, 6.0, 12.0, 10.0, 9.0, 6.0, 5.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-10.3828125, -10.0902099609375, -9.797607421875, -9.5050048828125, -9.21240234375, -8.9197998046875, -8.627197265625, -8.3345947265625, -8.0419921875, -7.7493896484375, -7.456787109375, -7.1641845703125, -6.87158203125, -6.5789794921875, -6.286376953125, -5.9937744140625, -5.701171875, -5.4085693359375, -5.115966796875, -4.8233642578125, -4.53076171875, -4.2381591796875, -3.945556640625, -3.6529541015625, -3.3603515625, -3.0677490234375, -2.775146484375, -2.4825439453125, -2.18994140625, -1.8973388671875, -1.604736328125, -1.3121337890625, -1.01953125, -0.7269287109375, -0.434326171875, -0.1417236328125, 0.15087890625, 0.4434814453125, 0.736083984375, 1.0286865234375, 1.3212890625, 1.6138916015625, 1.906494140625, 2.1990966796875, 2.49169921875, 2.7843017578125, 3.076904296875, 3.3695068359375, 3.662109375, 3.9547119140625, 4.247314453125, 4.5399169921875, 4.83251953125, 5.1251220703125, 5.417724609375, 5.7103271484375, 6.0029296875, 6.2955322265625, 6.588134765625, 6.8807373046875, 7.17333984375, 7.4659423828125, 7.758544921875, 8.0511474609375, 8.34375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 4.0, 6.0, 5.0, 9.0, 11.0, 16.0, 13.0, 25.0, 39.0, 54.0, 74.0, 95.0, 136.0, 222.0, 374.0, 714.0, 1530.0, 3854.0, 11812.0, 43088.0, 177190.0, 656773.0, 1511787.0, 1213300.0, 424470.0, 108388.0, 27026.0, 7782.0, 2793.0, 1123.0, 613.0, 331.0, 168.0, 145.0, 102.0, 51.0, 35.0, 36.0, 23.0, 16.0, 12.0, 14.0, 11.0, 8.0, 4.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.46875, -10.1497802734375, -9.830810546875, -9.5118408203125, -9.19287109375, -8.8739013671875, -8.554931640625, -8.2359619140625, -7.9169921875, -7.5980224609375, -7.279052734375, -6.9600830078125, -6.64111328125, -6.3221435546875, -6.003173828125, -5.6842041015625, -5.365234375, -5.0462646484375, -4.727294921875, -4.4083251953125, -4.08935546875, -3.7703857421875, -3.451416015625, -3.1324462890625, -2.8134765625, -2.4945068359375, -2.175537109375, -1.8565673828125, -1.53759765625, -1.2186279296875, -0.899658203125, -0.5806884765625, -0.26171875, 0.0572509765625, 0.376220703125, 0.6951904296875, 1.01416015625, 1.3331298828125, 1.652099609375, 1.9710693359375, 2.2900390625, 2.6090087890625, 2.927978515625, 3.2469482421875, 3.56591796875, 3.8848876953125, 4.203857421875, 4.5228271484375, 4.841796875, 5.1607666015625, 5.479736328125, 5.7987060546875, 6.11767578125, 6.4366455078125, 6.755615234375, 7.0745849609375, 7.3935546875, 7.7125244140625, 8.031494140625, 8.3504638671875, 8.66943359375, 8.9884033203125, 9.307373046875, 9.6263427734375, 9.9453125]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 7.0, 10.0, 7.0, 15.0, 9.0, 25.0, 19.0, 42.0, 47.0, 77.0, 101.0, 144.0, 225.0, 290.0, 424.0, 479.0, 526.0, 415.0, 357.0, 245.0, 194.0, 127.0, 93.0, 54.0, 37.0, 27.0, 28.0, 15.0, 13.0, 7.0, 5.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-14.0859375, -13.64892578125, -13.2119140625, -12.77490234375, -12.337890625, -11.90087890625, -11.4638671875, -11.02685546875, -10.58984375, -10.15283203125, -9.7158203125, -9.27880859375, -8.841796875, -8.40478515625, -7.9677734375, -7.53076171875, -7.09375, -6.65673828125, -6.2197265625, -5.78271484375, -5.345703125, -4.90869140625, -4.4716796875, -4.03466796875, -3.59765625, -3.16064453125, -2.7236328125, -2.28662109375, -1.849609375, -1.41259765625, -0.9755859375, -0.53857421875, -0.1015625, 0.33544921875, 0.7724609375, 1.20947265625, 1.646484375, 2.08349609375, 2.5205078125, 2.95751953125, 3.39453125, 3.83154296875, 4.2685546875, 4.70556640625, 5.142578125, 5.57958984375, 6.0166015625, 6.45361328125, 6.890625, 7.32763671875, 7.7646484375, 8.20166015625, 8.638671875, 9.07568359375, 9.5126953125, 9.94970703125, 10.38671875, 10.82373046875, 11.2607421875, 11.69775390625, 12.134765625, 12.57177734375, 13.0087890625, 13.44580078125, 13.8828125]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 3.0, 5.0, 4.0, 8.0, 7.0, 10.0, 15.0, 31.0, 32.0, 42.0, 70.0, 78.0, 190.0, 253.0, 537.0, 1065.0, 2531.0, 7641.0, 27919.0, 148319.0, 1215655.0, 2372509.0, 344528.0, 53578.0, 12392.0, 3777.0, 1461.0, 663.0, 344.0, 206.0, 122.0, 79.0, 64.0, 40.0, 27.0, 28.0, 14.0, 6.0, 12.0, 4.0, 4.0, 5.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.21875, -17.635009765625, -17.05126953125, -16.467529296875, -15.8837890625, -15.300048828125, -14.71630859375, -14.132568359375, -13.548828125, -12.965087890625, -12.38134765625, -11.797607421875, -11.2138671875, -10.630126953125, -10.04638671875, -9.462646484375, -8.87890625, -8.295166015625, -7.71142578125, -7.127685546875, -6.5439453125, -5.960205078125, -5.37646484375, -4.792724609375, -4.208984375, -3.625244140625, -3.04150390625, -2.457763671875, -1.8740234375, -1.290283203125, -0.70654296875, -0.122802734375, 0.4609375, 1.044677734375, 1.62841796875, 2.212158203125, 2.7958984375, 3.379638671875, 3.96337890625, 4.547119140625, 5.130859375, 5.714599609375, 6.29833984375, 6.882080078125, 7.4658203125, 8.049560546875, 8.63330078125, 9.217041015625, 9.80078125, 10.384521484375, 10.96826171875, 11.552001953125, 12.1357421875, 12.719482421875, 13.30322265625, 13.886962890625, 14.470703125, 15.054443359375, 15.63818359375, 16.221923828125, 16.8056640625, 17.389404296875, 17.97314453125, 18.556884765625, 19.140625]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 32.0, 386.0, 547.0, 49.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-213.45945739746094, -203.35934448242188, -193.2592315673828, -183.15911865234375, -173.0590057373047, -162.95889282226562, -152.85877990722656, -142.7586669921875, -132.65855407714844, -122.55844116210938, -112.45832824707031, -102.35821533203125, -92.25810241699219, -82.15798950195312, -72.05787658691406, -61.957763671875, -51.85765075683594, -41.757537841796875, -31.657424926757812, -21.55731201171875, -11.457199096679688, -1.357086181640625, 8.743026733398438, 18.8431396484375, 28.943252563476562, 39.043365478515625, 49.14347839355469, 59.24359130859375, 69.34370422363281, 79.44381713867188, 89.54393005371094, 99.64404296875, 109.744140625, 119.84425354003906, 129.94436645507812, 140.0444793701172, 150.14459228515625, 160.2447052001953, 170.34481811523438, 180.44493103027344, 190.5450439453125, 200.64515686035156, 210.74526977539062, 220.8453826904297, 230.94549560546875, 241.0456085205078, 251.14572143554688, 261.245849609375, 271.345947265625, 281.446044921875, 291.5461730957031, 301.64630126953125, 311.74639892578125, 321.84649658203125, 331.9466247558594, 342.0467529296875, 352.1468505859375, 362.2469482421875, 372.3470764160156, 382.44720458984375, 392.54730224609375, 402.64739990234375, 412.7475280761719, 422.84765625, 432.94775390625]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 6.0, 7.0, 6.0, 11.0, 11.0, 12.0, 8.0, 20.0, 14.0, 25.0, 20.0, 27.0, 28.0, 27.0, 31.0, 40.0, 37.0, 46.0, 48.0, 51.0, 38.0, 54.0, 44.0, 46.0, 40.0, 35.0, 29.0, 31.0, 30.0, 26.0, 34.0, 24.0, 21.0, 9.0, 16.0, 11.0, 2.0, 10.0, 4.0, 3.0, 6.0, 3.0, 4.0, 9.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-43.23390579223633, -41.83201217651367, -40.43012237548828, -39.028228759765625, -37.626338958740234, -36.22444534301758, -34.82255554199219, -33.42066192626953, -32.018768310546875, -30.61687660217285, -29.214984893798828, -27.813091278076172, -26.41119956970215, -25.009307861328125, -23.6074161529541, -22.205524444580078, -20.803632736206055, -19.40174102783203, -17.999849319458008, -16.597957611083984, -15.196063995361328, -13.794172286987305, -12.392280578613281, -10.990387916564941, -9.588496208190918, -8.186604499816895, -6.784711837768555, -5.382820129394531, -3.9809279441833496, -2.579035758972168, -1.1771440505981445, 0.2247486114501953, 1.6266403198242188, 3.0285325050354004, 4.430424690246582, 5.8323163986206055, 7.234208583831787, 8.636100769042969, 10.037992477416992, 11.439885139465332, 12.841776847839355, 14.243668556213379, 15.645561218261719, 17.047452926635742, 18.449344635009766, 19.851238250732422, 21.253128051757812, 22.65502166748047, 24.056913375854492, 25.458805084228516, 26.86069679260254, 28.262588500976562, 29.66448211669922, 31.066373825073242, 32.468265533447266, 33.87015914916992, 35.27204895019531, 36.67394256591797, 38.07583236694336, 39.477725982666016, 40.879615783691406, 42.28150939941406, 43.68340301513672, 45.08529281616211, 46.487186431884766]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 8.0, 7.0, 15.0, 19.0, 17.0, 18.0, 19.0, 25.0, 26.0, 28.0, 42.0, 35.0, 35.0, 33.0, 40.0, 34.0, 35.0, 43.0, 47.0, 39.0, 50.0, 35.0, 34.0, 35.0, 38.0, 27.0, 28.0, 37.0, 25.0, 23.0, 10.0, 12.0, 9.0, 7.0, 2.0, 8.0, 6.0, 10.0, 4.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-9.5234375, -9.239501953125, -8.95556640625, -8.671630859375, -8.3876953125, -8.103759765625, -7.81982421875, -7.535888671875, -7.251953125, -6.968017578125, -6.68408203125, -6.400146484375, -6.1162109375, -5.832275390625, -5.54833984375, -5.264404296875, -4.98046875, -4.696533203125, -4.41259765625, -4.128662109375, -3.8447265625, -3.560791015625, -3.27685546875, -2.992919921875, -2.708984375, -2.425048828125, -2.14111328125, -1.857177734375, -1.5732421875, -1.289306640625, -1.00537109375, -0.721435546875, -0.4375, -0.153564453125, 0.13037109375, 0.414306640625, 0.6982421875, 0.982177734375, 1.26611328125, 1.550048828125, 1.833984375, 2.117919921875, 2.40185546875, 2.685791015625, 2.9697265625, 3.253662109375, 3.53759765625, 3.821533203125, 4.10546875, 4.389404296875, 4.67333984375, 4.957275390625, 5.2412109375, 5.525146484375, 5.80908203125, 6.093017578125, 6.376953125, 6.660888671875, 6.94482421875, 7.228759765625, 7.5126953125, 7.796630859375, 8.08056640625, 8.364501953125, 8.6484375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 7.0, 5.0, 16.0, 13.0, 19.0, 41.0, 36.0, 61.0, 94.0, 170.0, 240.0, 444.0, 674.0, 1132.0, 1815.0, 2968.0, 4890.0, 8190.0, 13206.0, 22279.0, 37130.0, 63880.0, 109407.0, 185858.0, 230015.0, 150827.0, 87976.0, 51386.0, 29993.0, 18043.0, 10741.0, 6587.0, 3971.0, 2476.0, 1501.0, 935.0, 522.0, 378.0, 237.0, 139.0, 102.0, 59.0, 39.0, 21.0, 19.0, 14.0, 6.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.8525390625, -0.826934814453125, -0.80133056640625, -0.775726318359375, -0.7501220703125, -0.724517822265625, -0.69891357421875, -0.673309326171875, -0.647705078125, -0.622100830078125, -0.59649658203125, -0.570892333984375, -0.5452880859375, -0.519683837890625, -0.49407958984375, -0.468475341796875, -0.44287109375, -0.417266845703125, -0.39166259765625, -0.366058349609375, -0.3404541015625, -0.314849853515625, -0.28924560546875, -0.263641357421875, -0.238037109375, -0.212432861328125, -0.18682861328125, -0.161224365234375, -0.1356201171875, -0.110015869140625, -0.08441162109375, -0.058807373046875, -0.033203125, -0.007598876953125, 0.01800537109375, 0.043609619140625, 0.0692138671875, 0.094818115234375, 0.12042236328125, 0.146026611328125, 0.171630859375, 0.197235107421875, 0.22283935546875, 0.248443603515625, 0.2740478515625, 0.299652099609375, 0.32525634765625, 0.350860595703125, 0.37646484375, 0.402069091796875, 0.42767333984375, 0.453277587890625, 0.4788818359375, 0.504486083984375, 0.53009033203125, 0.555694580078125, 0.581298828125, 0.606903076171875, 0.63250732421875, 0.658111572265625, 0.6837158203125, 0.709320068359375, 0.73492431640625, 0.760528564453125, 0.7861328125]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 3.0, 7.0, 8.0, 6.0, 14.0, 7.0, 12.0, 17.0, 28.0, 15.0, 23.0, 26.0, 34.0, 26.0, 44.0, 46.0, 35.0, 31.0, 54.0, 45.0, 1078.0, 46.0, 53.0, 35.0, 40.0, 37.0, 27.0, 35.0, 41.0, 22.0, 28.0, 19.0, 18.0, 20.0, 13.0, 12.0, 3.0, 10.0, 2.0, 5.0, 6.0, 3.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.86328125, -5.669189453125, -5.47509765625, -5.281005859375, -5.0869140625, -4.892822265625, -4.69873046875, -4.504638671875, -4.310546875, -4.116455078125, -3.92236328125, -3.728271484375, -3.5341796875, -3.340087890625, -3.14599609375, -2.951904296875, -2.7578125, -2.563720703125, -2.36962890625, -2.175537109375, -1.9814453125, -1.787353515625, -1.59326171875, -1.399169921875, -1.205078125, -1.010986328125, -0.81689453125, -0.622802734375, -0.4287109375, -0.234619140625, -0.04052734375, 0.153564453125, 0.34765625, 0.541748046875, 0.73583984375, 0.929931640625, 1.1240234375, 1.318115234375, 1.51220703125, 1.706298828125, 1.900390625, 2.094482421875, 2.28857421875, 2.482666015625, 2.6767578125, 2.870849609375, 3.06494140625, 3.259033203125, 3.453125, 3.647216796875, 3.84130859375, 4.035400390625, 4.2294921875, 4.423583984375, 4.61767578125, 4.811767578125, 5.005859375, 5.199951171875, 5.39404296875, 5.588134765625, 5.7822265625, 5.976318359375, 6.17041015625, 6.364501953125, 6.55859375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 5.0, 3.0, 7.0, 13.0, 18.0, 21.0, 41.0, 48.0, 84.0, 136.0, 174.0, 285.0, 429.0, 678.0, 1089.0, 1652.0, 2451.0, 3846.0, 5847.0, 9031.0, 13935.0, 21447.0, 33371.0, 50735.0, 78891.0, 121607.0, 281174.0, 1114926.0, 124755.0, 81000.0, 52282.0, 34108.0, 22327.0, 14111.0, 9252.0, 6098.0, 3881.0, 2557.0, 1657.0, 1127.0, 666.0, 476.0, 300.0, 230.0, 128.0, 84.0, 60.0, 37.0, 25.0, 17.0, 6.0, 5.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.57177734375, -0.5533523559570312, -0.5349273681640625, -0.5165023803710938, -0.498077392578125, -0.47965240478515625, -0.4612274169921875, -0.44280242919921875, -0.42437744140625, -0.40595245361328125, -0.3875274658203125, -0.36910247802734375, -0.350677490234375, -0.33225250244140625, -0.3138275146484375, -0.29540252685546875, -0.2769775390625, -0.25855255126953125, -0.2401275634765625, -0.22170257568359375, -0.203277587890625, -0.18485260009765625, -0.1664276123046875, -0.14800262451171875, -0.12957763671875, -0.11115264892578125, -0.0927276611328125, -0.07430267333984375, -0.055877685546875, -0.03745269775390625, -0.0190277099609375, -0.00060272216796875, 0.017822265625, 0.03624725341796875, 0.0546722412109375, 0.07309722900390625, 0.091522216796875, 0.10994720458984375, 0.1283721923828125, 0.14679718017578125, 0.16522216796875, 0.18364715576171875, 0.2020721435546875, 0.22049713134765625, 0.238922119140625, 0.25734710693359375, 0.2757720947265625, 0.29419708251953125, 0.3126220703125, 0.33104705810546875, 0.3494720458984375, 0.36789703369140625, 0.386322021484375, 0.40474700927734375, 0.4231719970703125, 0.44159698486328125, 0.46002197265625, 0.47844696044921875, 0.4968719482421875, 0.5152969360351562, 0.533721923828125, 0.5521469116210938, 0.5705718994140625, 0.5889968872070312, 0.607421875]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 7.0, 5.0, 5.0, 9.0, 12.0, 15.0, 19.0, 22.0, 30.0, 21.0, 41.0, 39.0, 52.0, 55.0, 48.0, 52.0, 63.0, 68.0, 57.0, 47.0, 48.0, 37.0, 56.0, 47.0, 37.0, 28.0, 17.0, 25.0, 17.0, 9.0, 10.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002513885498046875, -0.0024206936359405518, -0.0023275017738342285, -0.0022343099117279053, -0.002141118049621582, -0.002047926187515259, -0.0019547343254089355, -0.0018615424633026123, -0.001768350601196289, -0.0016751587390899658, -0.0015819668769836426, -0.0014887750148773193, -0.001395583152770996, -0.0013023912906646729, -0.0012091994285583496, -0.0011160075664520264, -0.0010228157043457031, -0.0009296238422393799, -0.0008364319801330566, -0.0007432401180267334, -0.0006500482559204102, -0.0005568563938140869, -0.00046366453170776367, -0.00037047266960144043, -0.0002772808074951172, -0.00018408894538879395, -9.08970832824707e-05, 2.294778823852539e-06, 9.548664093017578e-05, 0.00018867850303649902, 0.00028187036514282227, 0.0003750622272491455, 0.00046825408935546875, 0.000561445951461792, 0.0006546378135681152, 0.0007478296756744385, 0.0008410215377807617, 0.000934213399887085, 0.0010274052619934082, 0.0011205971240997314, 0.0012137889862060547, 0.001306980848312378, 0.0014001727104187012, 0.0014933645725250244, 0.0015865564346313477, 0.001679748296737671, 0.0017729401588439941, 0.0018661320209503174, 0.0019593238830566406, 0.002052515745162964, 0.002145707607269287, 0.0022388994693756104, 0.0023320913314819336, 0.002425283193588257, 0.00251847505569458, 0.0026116669178009033, 0.0027048587799072266, 0.00279805064201355, 0.002891242504119873, 0.0029844343662261963, 0.0030776262283325195, 0.0031708180904388428, 0.003264009952545166, 0.0033572018146514893, 0.0034503936767578125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 5.0, 6.0, 4.0, 14.0, 24.0, 28.0, 43.0, 40.0, 57.0, 88.0, 98.0, 134.0, 176.0, 289.0, 549.0, 1040.0, 57291.0, 985205.0, 1641.0, 642.0, 357.0, 241.0, 165.0, 93.0, 58.0, 69.0, 47.0, 35.0, 37.0, 20.0, 17.0, 12.0, 10.0, 10.0, 3.0, 0.0, 2.0, 3.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.06158447265625, -0.05988311767578125, -0.0581817626953125, -0.05648040771484375, -0.054779052734375, -0.05307769775390625, -0.0513763427734375, -0.04967498779296875, -0.0479736328125, -0.04627227783203125, -0.0445709228515625, -0.04286956787109375, -0.041168212890625, -0.03946685791015625, -0.0377655029296875, -0.03606414794921875, -0.03436279296875, -0.03266143798828125, -0.0309600830078125, -0.02925872802734375, -0.027557373046875, -0.02585601806640625, -0.0241546630859375, -0.02245330810546875, -0.020751953125, -0.01905059814453125, -0.0173492431640625, -0.01564788818359375, -0.013946533203125, -0.01224517822265625, -0.0105438232421875, -0.00884246826171875, -0.00714111328125, -0.00543975830078125, -0.0037384033203125, -0.00203704833984375, -0.000335693359375, 0.00136566162109375, 0.0030670166015625, 0.00476837158203125, 0.0064697265625, 0.00817108154296875, 0.0098724365234375, 0.01157379150390625, 0.013275146484375, 0.01497650146484375, 0.0166778564453125, 0.01837921142578125, 0.02008056640625, 0.02178192138671875, 0.0234832763671875, 0.02518463134765625, 0.026885986328125, 0.02858734130859375, 0.0302886962890625, 0.03199005126953125, 0.03369140625, 0.03539276123046875, 0.0370941162109375, 0.03879547119140625, 0.040496826171875, 0.04219818115234375, 0.0438995361328125, 0.04560089111328125, 0.04730224609375]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 20.0, 97.0, 324.0, 405.0, 135.0, 25.0, 6.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.007933234795928001, -0.007791867479681969, -0.007650499697774649, -0.007509132381528616, -0.007367765065282583, -0.0072263977490365505, -0.0070850299671292305, -0.006943662650883198, -0.006802295334637165, -0.006660928018391132, -0.006519560236483812, -0.00637819292023778, -0.006236825603991747, -0.006095458287745714, -0.005954090505838394, -0.0058127231895923615, -0.005671355873346329, -0.005529988557100296, -0.005388620775192976, -0.005247253458946943, -0.005105886142700911, -0.004964518826454878, -0.004823151044547558, -0.004681783728301525, -0.004540415946394205, -0.004399048630148172, -0.004257680848240852, -0.00411631353199482, -0.003974946215748787, -0.0038335786666721106, -0.003692211117595434, -0.0035508438013494015, -0.003409476252272725, -0.0032681087031960487, -0.003126741386950016, -0.0029853738378733397, -0.002844006521627307, -0.0027026389725506306, -0.002561271656304598, -0.0024199041072279215, -0.002278536558151245, -0.0021371690090745687, -0.001995801692828536, -0.0018544341437518597, -0.001713066827505827, -0.0015716992784291506, -0.001430331845767796, -0.0012889644131064415, -0.0011475970968604088, -0.0010062296641990542, -0.0008648622315376997, -0.0007234947406686842, -0.0005821273080073297, -0.00044075987534597516, -0.0002993923844769597, -0.00015802495181560516, -1.6657519154250622e-05, 0.00012470992805901915, 0.0002660773752722889, 0.0004074448370374739, 0.0005488122696988285, 0.000690179702360183, 0.0008315471932291985, 0.000972914625890553, 0.0011142820585519075]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 4.0, 8.0, 6.0, 5.0, 5.0, 3.0, 9.0, 12.0, 15.0, 14.0, 16.0, 20.0, 21.0, 30.0, 29.0, 33.0, 39.0, 42.0, 32.0, 31.0, 32.0, 36.0, 41.0, 41.0, 35.0, 36.0, 45.0, 35.0, 33.0, 33.0, 39.0, 25.0, 27.0, 23.0, 27.0, 16.0, 20.0, 13.0, 9.0, 13.0, 11.0, 4.0, 9.0, 5.0, 8.0, 1.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011650919914245605, -0.001124613918364048, -0.0010841358453035355, -0.001043657772243023, -0.0010031796991825104, -0.0009627016261219978, -0.0009222235530614853, -0.0008817454800009727, -0.0008412674069404602, -0.0008007893338799477, -0.0007603112608194351, -0.0007198331877589226, -0.00067935511469841, -0.0006388770416378975, -0.000598398968577385, -0.0005579208955168724, -0.0005174428224563599, -0.0004769647493958473, -0.0004364866763353348, -0.00039600860327482224, -0.0003555305302143097, -0.00031505245715379715, -0.0002745743840932846, -0.00023409631103277206, -0.00019361823797225952, -0.00015314016491174698, -0.00011266209185123444, -7.21840187907219e-05, -3.170594573020935e-05, 8.772127330303192e-06, 4.9250200390815735e-05, 8.972827345132828e-05, 0.00013020634651184082, 0.00017068441957235336, 0.0002111624926328659, 0.00025164056569337845, 0.000292118638753891, 0.00033259671181440353, 0.0003730747848749161, 0.0004135528579354286, 0.00045403093099594116, 0.0004945090040564537, 0.0005349870771169662, 0.0005754651501774788, 0.0006159432232379913, 0.0006564212962985039, 0.0006968993693590164, 0.000737377442419529, 0.0007778555154800415, 0.000818333588540554, 0.0008588116616010666, 0.0008992897346615791, 0.0009397678077220917, 0.0009802458807826042, 0.0010207239538431168, 0.0010612020269036293, 0.0011016800999641418, 0.0011421581730246544, 0.001182636246085167, 0.0012231143191456795, 0.001263592392206192, 0.0013040704652667046, 0.001344548538327217, 0.0013850266113877296, 0.0014255046844482422]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 8.0, 7.0, 15.0, 19.0, 17.0, 18.0, 19.0, 25.0, 26.0, 28.0, 42.0, 35.0, 35.0, 33.0, 40.0, 34.0, 35.0, 43.0, 47.0, 39.0, 50.0, 35.0, 34.0, 35.0, 38.0, 27.0, 28.0, 37.0, 25.0, 23.0, 10.0, 12.0, 9.0, 7.0, 2.0, 8.0, 6.0, 10.0, 4.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-9.5234375, -9.239501953125, -8.95556640625, -8.671630859375, -8.3876953125, -8.103759765625, -7.81982421875, -7.535888671875, -7.251953125, -6.968017578125, -6.68408203125, -6.400146484375, -6.1162109375, -5.832275390625, -5.54833984375, -5.264404296875, -4.98046875, -4.696533203125, -4.41259765625, -4.128662109375, -3.8447265625, -3.560791015625, -3.27685546875, -2.992919921875, -2.708984375, -2.425048828125, -2.14111328125, -1.857177734375, -1.5732421875, -1.289306640625, -1.00537109375, -0.721435546875, -0.4375, -0.153564453125, 0.13037109375, 0.414306640625, 0.6982421875, 0.982177734375, 1.26611328125, 1.550048828125, 1.833984375, 2.117919921875, 2.40185546875, 2.685791015625, 2.9697265625, 3.253662109375, 3.53759765625, 3.821533203125, 4.10546875, 4.389404296875, 4.67333984375, 4.957275390625, 5.2412109375, 5.525146484375, 5.80908203125, 6.093017578125, 6.376953125, 6.660888671875, 6.94482421875, 7.228759765625, 7.5126953125, 7.796630859375, 8.08056640625, 8.364501953125, 8.6484375]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 3.0, 4.0, 4.0, 5.0, 6.0, 15.0, 18.0, 17.0, 26.0, 26.0, 43.0, 41.0, 44.0, 86.0, 102.0, 161.0, 174.0, 275.0, 381.0, 629.0, 1155.0, 2155.0, 4255.0, 10749.0, 29942.0, 85989.0, 243182.0, 385457.0, 182089.0, 63355.0, 21966.0, 8156.0, 3555.0, 1691.0, 938.0, 566.0, 316.0, 240.0, 192.0, 130.0, 113.0, 75.0, 53.0, 40.0, 39.0, 30.0, 16.0, 19.0, 11.0, 7.0, 12.0, 6.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-12.1640625, -11.776123046875, -11.38818359375, -11.000244140625, -10.6123046875, -10.224365234375, -9.83642578125, -9.448486328125, -9.060546875, -8.672607421875, -8.28466796875, -7.896728515625, -7.5087890625, -7.120849609375, -6.73291015625, -6.344970703125, -5.95703125, -5.569091796875, -5.18115234375, -4.793212890625, -4.4052734375, -4.017333984375, -3.62939453125, -3.241455078125, -2.853515625, -2.465576171875, -2.07763671875, -1.689697265625, -1.3017578125, -0.913818359375, -0.52587890625, -0.137939453125, 0.25, 0.637939453125, 1.02587890625, 1.413818359375, 1.8017578125, 2.189697265625, 2.57763671875, 2.965576171875, 3.353515625, 3.741455078125, 4.12939453125, 4.517333984375, 4.9052734375, 5.293212890625, 5.68115234375, 6.069091796875, 6.45703125, 6.844970703125, 7.23291015625, 7.620849609375, 8.0087890625, 8.396728515625, 8.78466796875, 9.172607421875, 9.560546875, 9.948486328125, 10.33642578125, 10.724365234375, 11.1123046875, 11.500244140625, 11.88818359375, 12.276123046875, 12.6640625]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 6.0, 1.0, 9.0, 7.0, 8.0, 12.0, 15.0, 26.0, 21.0, 28.0, 31.0, 32.0, 37.0, 49.0, 59.0, 58.0, 116.0, 193.0, 315.0, 1315.0, 179.0, 102.0, 82.0, 64.0, 48.0, 36.0, 35.0, 32.0, 23.0, 19.0, 12.0, 17.0, 15.0, 16.0, 9.0, 5.0, 5.0, 5.0, 8.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.1875, -22.388427734375, -21.58935546875, -20.790283203125, -19.9912109375, -19.192138671875, -18.39306640625, -17.593994140625, -16.794921875, -15.995849609375, -15.19677734375, -14.397705078125, -13.5986328125, -12.799560546875, -12.00048828125, -11.201416015625, -10.40234375, -9.603271484375, -8.80419921875, -8.005126953125, -7.2060546875, -6.406982421875, -5.60791015625, -4.808837890625, -4.009765625, -3.210693359375, -2.41162109375, -1.612548828125, -0.8134765625, -0.014404296875, 0.78466796875, 1.583740234375, 2.3828125, 3.181884765625, 3.98095703125, 4.780029296875, 5.5791015625, 6.378173828125, 7.17724609375, 7.976318359375, 8.775390625, 9.574462890625, 10.37353515625, 11.172607421875, 11.9716796875, 12.770751953125, 13.56982421875, 14.368896484375, 15.16796875, 15.967041015625, 16.76611328125, 17.565185546875, 18.3642578125, 19.163330078125, 19.96240234375, 20.761474609375, 21.560546875, 22.359619140625, 23.15869140625, 23.957763671875, 24.7568359375, 25.555908203125, 26.35498046875, 27.154052734375, 27.953125]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 2.0, 5.0, 5.0, 14.0, 11.0, 18.0, 17.0, 37.0, 39.0, 76.0, 80.0, 133.0, 222.0, 290.0, 514.0, 1034.0, 5748.0, 507501.0, 2612138.0, 14714.0, 1380.0, 626.0, 375.0, 252.0, 133.0, 101.0, 56.0, 54.0, 29.0, 30.0, 21.0, 18.0, 8.0, 7.0, 4.0, 3.0, 2.0, 1.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.15625, -46.24072265625, -44.3251953125, -42.40966796875, -40.494140625, -38.57861328125, -36.6630859375, -34.74755859375, -32.83203125, -30.91650390625, -29.0009765625, -27.08544921875, -25.169921875, -23.25439453125, -21.3388671875, -19.42333984375, -17.5078125, -15.59228515625, -13.6767578125, -11.76123046875, -9.845703125, -7.93017578125, -6.0146484375, -4.09912109375, -2.18359375, -0.26806640625, 1.6474609375, 3.56298828125, 5.478515625, 7.39404296875, 9.3095703125, 11.22509765625, 13.140625, 15.05615234375, 16.9716796875, 18.88720703125, 20.802734375, 22.71826171875, 24.6337890625, 26.54931640625, 28.46484375, 30.38037109375, 32.2958984375, 34.21142578125, 36.126953125, 38.04248046875, 39.9580078125, 41.87353515625, 43.7890625, 45.70458984375, 47.6201171875, 49.53564453125, 51.451171875, 53.36669921875, 55.2822265625, 57.19775390625, 59.11328125, 61.02880859375, 62.9443359375, 64.85986328125, 66.775390625, 68.69091796875, 70.6064453125, 72.52197265625, 74.4375]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 14.0, 96.0, 318.0, 412.0, 146.0, 23.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.877758026123047, -24.12710952758789, -19.376461029052734, -14.625810623168945, -9.875162124633789, -5.124513626098633, -0.37386322021484375, 4.3767852783203125, 9.127433776855469, 13.878082275390625, 18.62873077392578, 23.37938117980957, 28.130029678344727, 32.88067626953125, 37.63132858276367, 42.38197708129883, 47.132625579833984, 51.88327407836914, 56.6339225769043, 61.38457489013672, 66.13522338867188, 70.88587188720703, 75.63652038574219, 80.38716888427734, 85.1378173828125, 89.88846588134766, 94.63911437988281, 99.38976287841797, 104.14041137695312, 108.89105987548828, 113.64170837402344, 118.39236450195312, 123.14302062988281, 127.89366912841797, 132.64431762695312, 137.3949737548828, 142.14561462402344, 146.89627075195312, 151.64691162109375, 156.39756774902344, 161.14820861816406, 165.89886474609375, 170.64950561523438, 175.40016174316406, 180.1508026123047, 184.90145874023438, 189.652099609375, 194.4027557373047, 199.15341186523438, 203.90406799316406, 208.6547088623047, 213.40536499023438, 218.156005859375, 222.9066619873047, 227.6573028564453, 232.407958984375, 237.15859985351562, 241.9092559814453, 246.65989685058594, 251.41055297851562, 256.16119384765625, 260.9118347167969, 265.6625061035156, 270.41314697265625, 275.1637878417969]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [3.0, 0.0, 6.0, 1.0, 3.0, 4.0, 3.0, 5.0, 5.0, 9.0, 9.0, 6.0, 11.0, 19.0, 19.0, 18.0, 18.0, 19.0, 24.0, 34.0, 34.0, 30.0, 26.0, 34.0, 29.0, 29.0, 31.0, 34.0, 38.0, 45.0, 50.0, 31.0, 25.0, 36.0, 22.0, 36.0, 41.0, 29.0, 25.0, 19.0, 18.0, 22.0, 19.0, 16.0, 23.0, 14.0, 5.0, 9.0, 11.0, 5.0, 7.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-43.03937530517578, -41.484378814697266, -39.92938232421875, -38.374385833740234, -36.81938934326172, -35.2643928527832, -33.70939636230469, -32.15440368652344, -30.59940528869629, -29.044408798217773, -27.489412307739258, -25.934417724609375, -24.37942123413086, -22.824424743652344, -21.269428253173828, -19.714431762695312, -18.159435272216797, -16.60443878173828, -15.049442291259766, -13.494446754455566, -11.93945026397705, -10.384453773498535, -8.829458236694336, -7.27446174621582, -5.719465255737305, -4.164468765258789, -2.6094727516174316, -1.0544767379760742, 0.5005197525024414, 2.055516242980957, 3.6105117797851562, 5.165508270263672, 6.7205047607421875, 8.275501251220703, 9.830497741699219, 11.385493278503418, 12.940489768981934, 14.49548625946045, 16.05048179626465, 17.605478286743164, 19.16047477722168, 20.715471267700195, 22.27046775817871, 23.825462341308594, 25.38045883178711, 26.935455322265625, 28.49045181274414, 30.045448303222656, 31.600444793701172, 33.15544128417969, 34.7104377746582, 36.26543426513672, 37.820430755615234, 39.37542724609375, 40.930419921875, 42.48542022705078, 44.04041290283203, 45.59540939331055, 47.15040588378906, 48.70540237426758, 50.260398864746094, 51.81539535522461, 53.370391845703125, 54.925384521484375, 56.480384826660156]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 5.0, 4.0, 10.0, 9.0, 8.0, 17.0, 14.0, 11.0, 15.0, 19.0, 15.0, 28.0, 24.0, 27.0, 36.0, 29.0, 30.0, 30.0, 47.0, 35.0, 39.0, 38.0, 39.0, 44.0, 42.0, 35.0, 42.0, 41.0, 40.0, 33.0, 31.0, 31.0, 25.0, 16.0, 8.0, 15.0, 11.0, 9.0, 10.0, 5.0, 6.0, 5.0, 9.0, 3.0, 2.0, 3.0, 5.0, 2.0, 0.0, 3.0, 2.0, 2.0], "bins": [-9.4453125, -9.166259765625, -8.88720703125, -8.608154296875, -8.3291015625, -8.050048828125, -7.77099609375, -7.491943359375, -7.212890625, -6.933837890625, -6.65478515625, -6.375732421875, -6.0966796875, -5.817626953125, -5.53857421875, -5.259521484375, -4.98046875, -4.701416015625, -4.42236328125, -4.143310546875, -3.8642578125, -3.585205078125, -3.30615234375, -3.027099609375, -2.748046875, -2.468994140625, -2.18994140625, -1.910888671875, -1.6318359375, -1.352783203125, -1.07373046875, -0.794677734375, -0.515625, -0.236572265625, 0.04248046875, 0.321533203125, 0.6005859375, 0.879638671875, 1.15869140625, 1.437744140625, 1.716796875, 1.995849609375, 2.27490234375, 2.553955078125, 2.8330078125, 3.112060546875, 3.39111328125, 3.670166015625, 3.94921875, 4.228271484375, 4.50732421875, 4.786376953125, 5.0654296875, 5.344482421875, 5.62353515625, 5.902587890625, 6.181640625, 6.460693359375, 6.73974609375, 7.018798828125, 7.2978515625, 7.576904296875, 7.85595703125, 8.135009765625, 8.4140625]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, 7.0, 9.0, 9.0, 21.0, 26.0, 20.0, 25.0, 41.0, 37.0, 67.0, 77.0, 102.0, 140.0, 209.0, 274.0, 373.0, 551.0, 5431.0, 4016327.0, 167874.0, 991.0, 442.0, 288.0, 219.0, 186.0, 123.0, 87.0, 75.0, 47.0, 43.0, 27.0, 28.0, 18.0, 17.0, 17.0, 15.0, 5.0, 8.0, 4.0, 5.0, 7.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-90.6875, -87.9990234375, -85.310546875, -82.6220703125, -79.93359375, -77.2451171875, -74.556640625, -71.8681640625, -69.1796875, -66.4912109375, -63.802734375, -61.1142578125, -58.42578125, -55.7373046875, -53.048828125, -50.3603515625, -47.671875, -44.9833984375, -42.294921875, -39.6064453125, -36.91796875, -34.2294921875, -31.541015625, -28.8525390625, -26.1640625, -23.4755859375, -20.787109375, -18.0986328125, -15.41015625, -12.7216796875, -10.033203125, -7.3447265625, -4.65625, -1.9677734375, 0.720703125, 3.4091796875, 6.09765625, 8.7861328125, 11.474609375, 14.1630859375, 16.8515625, 19.5400390625, 22.228515625, 24.9169921875, 27.60546875, 30.2939453125, 32.982421875, 35.6708984375, 38.359375, 41.0478515625, 43.736328125, 46.4248046875, 49.11328125, 51.8017578125, 54.490234375, 57.1787109375, 59.8671875, 62.5556640625, 65.244140625, 67.9326171875, 70.62109375, 73.3095703125, 75.998046875, 78.6865234375, 81.375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 15.0, 8.0, 13.0, 21.0, 19.0, 31.0, 50.0, 54.0, 108.0, 123.0, 197.0, 305.0, 474.0, 602.0, 613.0, 439.0, 321.0, 207.0, 144.0, 99.0, 65.0, 35.0, 39.0, 24.0, 18.0, 16.0, 14.0, 9.0, 5.0, 2.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.234375, -19.68798828125, -19.1416015625, -18.59521484375, -18.048828125, -17.50244140625, -16.9560546875, -16.40966796875, -15.86328125, -15.31689453125, -14.7705078125, -14.22412109375, -13.677734375, -13.13134765625, -12.5849609375, -12.03857421875, -11.4921875, -10.94580078125, -10.3994140625, -9.85302734375, -9.306640625, -8.76025390625, -8.2138671875, -7.66748046875, -7.12109375, -6.57470703125, -6.0283203125, -5.48193359375, -4.935546875, -4.38916015625, -3.8427734375, -3.29638671875, -2.75, -2.20361328125, -1.6572265625, -1.11083984375, -0.564453125, -0.01806640625, 0.5283203125, 1.07470703125, 1.62109375, 2.16748046875, 2.7138671875, 3.26025390625, 3.806640625, 4.35302734375, 4.8994140625, 5.44580078125, 5.9921875, 6.53857421875, 7.0849609375, 7.63134765625, 8.177734375, 8.72412109375, 9.2705078125, 9.81689453125, 10.36328125, 10.90966796875, 11.4560546875, 12.00244140625, 12.548828125, 13.09521484375, 13.6416015625, 14.18798828125, 14.734375]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 5.0, 7.0, 10.0, 13.0, 22.0, 31.0, 36.0, 51.0, 83.0, 147.0, 289.0, 1161.0, 47756.0, 4042788.0, 99493.0, 1630.0, 322.0, 165.0, 82.0, 55.0, 33.0, 31.0, 26.0, 22.0, 11.0, 9.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.6875, -48.44140625, -46.1953125, -43.94921875, -41.703125, -39.45703125, -37.2109375, -34.96484375, -32.71875, -30.47265625, -28.2265625, -25.98046875, -23.734375, -21.48828125, -19.2421875, -16.99609375, -14.75, -12.50390625, -10.2578125, -8.01171875, -5.765625, -3.51953125, -1.2734375, 0.97265625, 3.21875, 5.46484375, 7.7109375, 9.95703125, 12.203125, 14.44921875, 16.6953125, 18.94140625, 21.1875, 23.43359375, 25.6796875, 27.92578125, 30.171875, 32.41796875, 34.6640625, 36.91015625, 39.15625, 41.40234375, 43.6484375, 45.89453125, 48.140625, 50.38671875, 52.6328125, 54.87890625, 57.125, 59.37109375, 61.6171875, 63.86328125, 66.109375, 68.35546875, 70.6015625, 72.84765625, 75.09375, 77.33984375, 79.5859375, 81.83203125, 84.078125, 86.32421875, 88.5703125, 90.81640625, 93.0625]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 28.0, 180.0, 483.0, 259.0, 57.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-216.067626953125, -209.0161895751953, -201.96475219726562, -194.91331481933594, -187.86187744140625, -180.8104248046875, -173.75900268554688, -166.70755004882812, -159.65611267089844, -152.60467529296875, -145.55323791503906, -138.50180053710938, -131.4503631591797, -124.39891815185547, -117.34748077392578, -110.29603576660156, -103.2446060180664, -96.19316864013672, -89.14173126220703, -82.09028625488281, -75.03884887695312, -67.98741149902344, -60.93597412109375, -53.8845329284668, -46.83309555053711, -39.78165817260742, -32.73021697998047, -25.67877960205078, -18.62734031677246, -11.57590103149414, -4.524463653564453, 2.5269775390625, 9.578414916992188, 16.629854202270508, 23.681293487548828, 30.732730865478516, 37.78417205810547, 44.835609436035156, 51.887046813964844, 58.9384880065918, 65.98992919921875, 73.04136657714844, 80.09280395507812, 87.14424133300781, 94.19568634033203, 101.24712371826172, 108.2985610961914, 115.35000610351562, 122.40143585205078, 129.452880859375, 136.5043182373047, 143.55575561523438, 150.60719299316406, 157.65863037109375, 164.71006774902344, 171.76150512695312, 178.8129425048828, 185.8643798828125, 192.9158172607422, 199.96725463867188, 207.01869201660156, 214.07012939453125, 221.12158203125, 228.1730194091797, 235.22445678710938]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 3.0, 8.0, 4.0, 8.0, 5.0, 2.0, 12.0, 16.0, 17.0, 17.0, 16.0, 21.0, 16.0, 23.0, 31.0, 33.0, 37.0, 33.0, 33.0, 40.0, 43.0, 49.0, 29.0, 35.0, 40.0, 46.0, 31.0, 39.0, 37.0, 34.0, 34.0, 32.0, 25.0, 18.0, 20.0, 17.0, 18.0, 14.0, 11.0, 7.0, 10.0, 10.0, 12.0, 6.0, 9.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-46.22955322265625, -44.726890563964844, -43.22422409057617, -41.721561431884766, -40.21889877319336, -38.71623229980469, -37.21356964111328, -35.710906982421875, -34.20824432373047, -32.70558166503906, -31.202917098999023, -29.700252532958984, -28.197589874267578, -26.69492530822754, -25.1922607421875, -23.689598083496094, -22.186931610107422, -20.684267044067383, -19.181604385375977, -17.678939819335938, -16.17627716064453, -14.673612594604492, -13.170948028564453, -11.66828441619873, -10.165620803833008, -8.662957191467285, -7.160293102264404, -5.657629013061523, -4.154965400695801, -2.652301788330078, -1.149637222290039, 0.3530263900756836, 1.8556861877441406, 3.3583500385284424, 4.861013889312744, 6.363677978515625, 7.866341590881348, 9.36900520324707, 10.87166976928711, 12.374333381652832, 13.876996994018555, 15.379660606384277, 16.88232421875, 18.38498878479004, 19.887653350830078, 21.390316009521484, 22.892980575561523, 24.395645141601562, 25.89830780029297, 27.400972366333008, 28.903635025024414, 30.406299591064453, 31.90896224975586, 33.41162872314453, 34.91429138183594, 36.416954040527344, 37.91961669921875, 39.422279357910156, 40.92494583129883, 42.427608489990234, 43.93027114868164, 45.43293762207031, 46.93560028076172, 48.438262939453125, 49.9409294128418]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 4.0, 6.0, 7.0, 8.0, 8.0, 9.0, 13.0, 11.0, 18.0, 9.0, 14.0, 17.0, 20.0, 25.0, 21.0, 37.0, 32.0, 30.0, 29.0, 39.0, 48.0, 41.0, 48.0, 41.0, 39.0, 35.0, 40.0, 34.0, 39.0, 26.0, 30.0, 30.0, 26.0, 28.0, 21.0, 17.0, 13.0, 10.0, 15.0, 12.0, 11.0, 6.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0], "bins": [-8.7734375, -8.5087890625, -8.244140625, -7.9794921875, -7.71484375, -7.4501953125, -7.185546875, -6.9208984375, -6.65625, -6.3916015625, -6.126953125, -5.8623046875, -5.59765625, -5.3330078125, -5.068359375, -4.8037109375, -4.5390625, -4.2744140625, -4.009765625, -3.7451171875, -3.48046875, -3.2158203125, -2.951171875, -2.6865234375, -2.421875, -2.1572265625, -1.892578125, -1.6279296875, -1.36328125, -1.0986328125, -0.833984375, -0.5693359375, -0.3046875, -0.0400390625, 0.224609375, 0.4892578125, 0.75390625, 1.0185546875, 1.283203125, 1.5478515625, 1.8125, 2.0771484375, 2.341796875, 2.6064453125, 2.87109375, 3.1357421875, 3.400390625, 3.6650390625, 3.9296875, 4.1943359375, 4.458984375, 4.7236328125, 4.98828125, 5.2529296875, 5.517578125, 5.7822265625, 6.046875, 6.3115234375, 6.576171875, 6.8408203125, 7.10546875, 7.3701171875, 7.634765625, 7.8994140625, 8.1640625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 8.0, 4.0, 10.0, 12.0, 14.0, 15.0, 44.0, 44.0, 74.0, 111.0, 156.0, 260.0, 419.0, 622.0, 948.0, 1555.0, 2466.0, 3776.0, 5955.0, 9553.0, 15205.0, 24512.0, 39350.0, 64575.0, 105843.0, 170705.0, 212473.0, 149359.0, 91654.0, 56316.0, 34463.0, 21649.0, 13455.0, 8419.0, 5319.0, 3344.0, 2121.0, 1317.0, 869.0, 560.0, 340.0, 229.0, 165.0, 104.0, 53.0, 45.0, 23.0, 23.0, 8.0, 6.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.76708984375, -0.7433853149414062, -0.7196807861328125, -0.6959762573242188, -0.672271728515625, -0.6485671997070312, -0.6248626708984375, -0.6011581420898438, -0.57745361328125, -0.5537490844726562, -0.5300445556640625, -0.5063400268554688, -0.482635498046875, -0.45893096923828125, -0.4352264404296875, -0.41152191162109375, -0.3878173828125, -0.36411285400390625, -0.3404083251953125, -0.31670379638671875, -0.292999267578125, -0.26929473876953125, -0.2455902099609375, -0.22188568115234375, -0.19818115234375, -0.17447662353515625, -0.1507720947265625, -0.12706756591796875, -0.103363037109375, -0.07965850830078125, -0.0559539794921875, -0.03224945068359375, -0.008544921875, 0.01515960693359375, 0.0388641357421875, 0.06256866455078125, 0.086273193359375, 0.10997772216796875, 0.1336822509765625, 0.15738677978515625, 0.18109130859375, 0.20479583740234375, 0.2285003662109375, 0.25220489501953125, 0.275909423828125, 0.29961395263671875, 0.3233184814453125, 0.34702301025390625, 0.3707275390625, 0.39443206787109375, 0.4181365966796875, 0.44184112548828125, 0.465545654296875, 0.48925018310546875, 0.5129547119140625, 0.5366592407226562, 0.56036376953125, 0.5840682983398438, 0.6077728271484375, 0.6314773559570312, 0.655181884765625, 0.6788864135742188, 0.7025909423828125, 0.7262954711914062, 0.75]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 5.0, 2.0, 4.0, 2.0, 6.0, 2.0, 7.0, 7.0, 14.0, 8.0, 10.0, 24.0, 13.0, 18.0, 23.0, 33.0, 30.0, 28.0, 30.0, 24.0, 36.0, 37.0, 28.0, 35.0, 35.0, 1072.0, 43.0, 42.0, 51.0, 39.0, 43.0, 28.0, 37.0, 22.0, 30.0, 25.0, 27.0, 25.0, 12.0, 14.0, 12.0, 12.0, 8.0, 3.0, 9.0, 5.0, 5.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.3828125, -5.20819091796875, -5.0335693359375, -4.85894775390625, -4.684326171875, -4.50970458984375, -4.3350830078125, -4.16046142578125, -3.98583984375, -3.81121826171875, -3.6365966796875, -3.46197509765625, -3.287353515625, -3.11273193359375, -2.9381103515625, -2.76348876953125, -2.5888671875, -2.41424560546875, -2.2396240234375, -2.06500244140625, -1.890380859375, -1.71575927734375, -1.5411376953125, -1.36651611328125, -1.19189453125, -1.01727294921875, -0.8426513671875, -0.66802978515625, -0.493408203125, -0.31878662109375, -0.1441650390625, 0.03045654296875, 0.205078125, 0.37969970703125, 0.5543212890625, 0.72894287109375, 0.903564453125, 1.07818603515625, 1.2528076171875, 1.42742919921875, 1.60205078125, 1.77667236328125, 1.9512939453125, 2.12591552734375, 2.300537109375, 2.47515869140625, 2.6497802734375, 2.82440185546875, 2.9990234375, 3.17364501953125, 3.3482666015625, 3.52288818359375, 3.697509765625, 3.87213134765625, 4.0467529296875, 4.22137451171875, 4.39599609375, 4.57061767578125, 4.7452392578125, 4.91986083984375, 5.094482421875, 5.26910400390625, 5.4437255859375, 5.61834716796875, 5.79296875]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 3.0, 2.0, 7.0, 12.0, 19.0, 35.0, 50.0, 58.0, 105.0, 169.0, 274.0, 369.0, 580.0, 903.0, 1431.0, 2306.0, 3363.0, 5320.0, 8153.0, 12096.0, 18964.0, 29562.0, 45161.0, 70594.0, 108848.0, 156427.0, 1225780.0, 139834.0, 93794.0, 60715.0, 39320.0, 25464.0, 16421.0, 10900.0, 6989.0, 4616.0, 2940.0, 1894.0, 1318.0, 797.0, 548.0, 365.0, 215.0, 140.0, 92.0, 67.0, 42.0, 33.0, 20.0, 10.0, 4.0, 6.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.6181640625, -0.60015869140625, -0.5821533203125, -0.56414794921875, -0.546142578125, -0.52813720703125, -0.5101318359375, -0.49212646484375, -0.47412109375, -0.45611572265625, -0.4381103515625, -0.42010498046875, -0.402099609375, -0.38409423828125, -0.3660888671875, -0.34808349609375, -0.330078125, -0.31207275390625, -0.2940673828125, -0.27606201171875, -0.258056640625, -0.24005126953125, -0.2220458984375, -0.20404052734375, -0.18603515625, -0.16802978515625, -0.1500244140625, -0.13201904296875, -0.114013671875, -0.09600830078125, -0.0780029296875, -0.05999755859375, -0.0419921875, -0.02398681640625, -0.0059814453125, 0.01202392578125, 0.030029296875, 0.04803466796875, 0.0660400390625, 0.08404541015625, 0.10205078125, 0.12005615234375, 0.1380615234375, 0.15606689453125, 0.174072265625, 0.19207763671875, 0.2100830078125, 0.22808837890625, 0.24609375, 0.26409912109375, 0.2821044921875, 0.30010986328125, 0.318115234375, 0.33612060546875, 0.3541259765625, 0.37213134765625, 0.39013671875, 0.40814208984375, 0.4261474609375, 0.44415283203125, 0.462158203125, 0.48016357421875, 0.4981689453125, 0.51617431640625, 0.5341796875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 1.0, 1.0, 2.0, 7.0, 12.0, 20.0, 14.0, 22.0, 25.0, 15.0, 24.0, 34.0, 44.0, 49.0, 68.0, 64.0, 61.0, 61.0, 74.0, 65.0, 67.0, 51.0, 36.0, 35.0, 34.0, 32.0, 20.0, 11.0, 14.0, 10.0, 6.0, 5.0, 6.0, 5.0, 2.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0025310516357421875, -0.002453625202178955, -0.0023761987686157227, -0.0022987723350524902, -0.002221345901489258, -0.0021439194679260254, -0.002066493034362793, -0.0019890666007995605, -0.0019116401672363281, -0.0018342137336730957, -0.0017567873001098633, -0.0016793608665466309, -0.0016019344329833984, -0.001524507999420166, -0.0014470815658569336, -0.0013696551322937012, -0.0012922286987304688, -0.0012148022651672363, -0.001137375831604004, -0.0010599493980407715, -0.000982522964477539, -0.0009050965309143066, -0.0008276700973510742, -0.0007502436637878418, -0.0006728172302246094, -0.000595390796661377, -0.0005179643630981445, -0.0004405379295349121, -0.0003631114959716797, -0.00028568506240844727, -0.00020825862884521484, -0.00013083219528198242, -5.340576171875e-05, 2.4020671844482422e-05, 0.00010144710540771484, 0.00017887353897094727, 0.0002562999725341797, 0.0003337264060974121, 0.00041115283966064453, 0.000488579273223877, 0.0005660057067871094, 0.0006434321403503418, 0.0007208585739135742, 0.0007982850074768066, 0.0008757114410400391, 0.0009531378746032715, 0.001030564308166504, 0.0011079907417297363, 0.0011854171752929688, 0.0012628436088562012, 0.0013402700424194336, 0.001417696475982666, 0.0014951229095458984, 0.0015725493431091309, 0.0016499757766723633, 0.0017274022102355957, 0.0018048286437988281, 0.0018822550773620605, 0.001959681510925293, 0.0020371079444885254, 0.002114534378051758, 0.0021919608116149902, 0.0022693872451782227, 0.002346813678741455, 0.0024242401123046875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 5.0, 3.0, 7.0, 10.0, 13.0, 15.0, 14.0, 25.0, 38.0, 40.0, 70.0, 82.0, 121.0, 173.0, 276.0, 565.0, 1134.0, 88656.0, 953666.0, 1864.0, 652.0, 386.0, 208.0, 132.0, 92.0, 58.0, 52.0, 46.0, 41.0, 38.0, 29.0, 13.0, 4.0, 2.0, 7.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.046356201171875, -0.04486894607543945, -0.043381690979003906, -0.04189443588256836, -0.04040718078613281, -0.038919925689697266, -0.03743267059326172, -0.03594541549682617, -0.034458160400390625, -0.03297090530395508, -0.03148365020751953, -0.029996395111083984, -0.028509140014648438, -0.02702188491821289, -0.025534629821777344, -0.024047374725341797, -0.02256011962890625, -0.021072864532470703, -0.019585609436035156, -0.01809835433959961, -0.016611099243164062, -0.015123844146728516, -0.013636589050292969, -0.012149333953857422, -0.010662078857421875, -0.009174823760986328, -0.007687568664550781, -0.006200313568115234, -0.0047130584716796875, -0.0032258033752441406, -0.0017385482788085938, -0.0002512931823730469, 0.0012359619140625, 0.002723217010498047, 0.004210472106933594, 0.005697727203369141, 0.0071849822998046875, 0.008672237396240234, 0.010159492492675781, 0.011646747589111328, 0.013134002685546875, 0.014621257781982422, 0.01610851287841797, 0.017595767974853516, 0.019083023071289062, 0.02057027816772461, 0.022057533264160156, 0.023544788360595703, 0.02503204345703125, 0.026519298553466797, 0.028006553649902344, 0.02949380874633789, 0.030981063842773438, 0.032468318939208984, 0.03395557403564453, 0.03544282913208008, 0.036930084228515625, 0.03841733932495117, 0.03990459442138672, 0.041391849517822266, 0.04287910461425781, 0.04436635971069336, 0.045853614807128906, 0.04734086990356445, 0.048828125]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 4.0, 27.0, 71.0, 151.0, 275.0, 258.0, 139.0, 61.0, 16.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00186209287494421, -0.0017769852420315146, -0.001691877725534141, -0.0016067700926214457, -0.001521662576124072, -0.0014365549432113767, -0.0013514473102986813, -0.0012663397938013077, -0.0011812321608886123, -0.0010961245279759169, -0.0010110170114785433, -0.0009259093785658479, -0.0008408018038608134, -0.0007556942291557789, -0.0006705865962430835, -0.000585479021538049, -0.0005003714468330145, -0.00041526387212798, -0.00033015626831911504, -0.0002450486645102501, -0.0001599410898052156, -7.48335151001811e-05, 1.0274117812514305e-05, 9.53816925175488e-05, 0.0001804892672225833, 0.0002655968419276178, 0.00035070444573648274, 0.0004358120495453477, 0.0005209196242503822, 0.0006060271989554167, 0.0006911348318681121, 0.0007762424065731466, 0.0008613502141088247, 0.0009464577888138592, 0.0010315653635188937, 0.0011166729964315891, 0.0012017805129289627, 0.0012868881458416581, 0.0013719957787543535, 0.001457103295251727, 0.0015422109281644225, 0.001627318561077118, 0.0017124260775744915, 0.001797533710487187, 0.0018826413433998823, 0.001967748859897256, 0.002052856609225273, 0.0021379641257226467, 0.0022230716422200203, 0.002308179158717394, 0.002393286908045411, 0.0024783944245427847, 0.0025635019410401583, 0.0026486096903681755, 0.002733717206865549, 0.0028188247233629227, 0.0029039322398602962, 0.00298903975635767, 0.003074147505685687, 0.0031592550221830606, 0.0032443625386804342, 0.0033294702880084515, 0.003414577804505825, 0.0034996853210031986, 0.003584793070331216]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 3.0, 8.0, 8.0, 3.0, 9.0, 6.0, 8.0, 14.0, 8.0, 23.0, 14.0, 18.0, 28.0, 33.0, 27.0, 37.0, 44.0, 36.0, 42.0, 37.0, 51.0, 45.0, 36.0, 35.0, 42.0, 41.0, 51.0, 40.0, 31.0, 43.0, 22.0, 28.0, 22.0, 19.0, 17.0, 20.0, 12.0, 9.0, 9.0, 9.0, 10.0, 2.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.001218259334564209, -0.0011798962950706482, -0.0011415332555770874, -0.0011031702160835266, -0.0010648071765899658, -0.001026444137096405, -0.0009880810976028442, -0.0009497180581092834, -0.0009113550186157227, -0.0008729919791221619, -0.0008346289396286011, -0.0007962659001350403, -0.0007579028606414795, -0.0007195398211479187, -0.0006811767816543579, -0.0006428137421607971, -0.0006044507026672363, -0.0005660876631736755, -0.0005277246236801147, -0.000489361584186554, -0.00045099854469299316, -0.0004126355051994324, -0.0003742724657058716, -0.0003359094262123108, -0.00029754638671875, -0.0002591833472251892, -0.00022082030773162842, -0.00018245726823806763, -0.00014409422874450684, -0.00010573118925094604, -6.736814975738525e-05, -2.9005110263824463e-05, 9.357929229736328e-06, 4.772096872329712e-05, 8.608400821685791e-05, 0.0001244470477104187, 0.0001628100872039795, 0.00020117312669754028, 0.00023953616619110107, 0.00027789920568466187, 0.00031626224517822266, 0.00035462528467178345, 0.00039298832416534424, 0.00043135136365890503, 0.0004697144031524658, 0.0005080774426460266, 0.0005464404821395874, 0.0005848035216331482, 0.000623166561126709, 0.0006615296006202698, 0.0006998926401138306, 0.0007382556796073914, 0.0007766187191009521, 0.0008149817585945129, 0.0008533447980880737, 0.0008917078375816345, 0.0009300708770751953, 0.0009684339165687561, 0.001006796956062317, 0.0010451599955558777, 0.0010835230350494385, 0.0011218860745429993, 0.00116024911403656, 0.0011986121535301208, 0.0012369751930236816]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 4.0, 6.0, 7.0, 8.0, 8.0, 9.0, 13.0, 11.0, 18.0, 9.0, 14.0, 17.0, 20.0, 26.0, 20.0, 37.0, 32.0, 30.0, 29.0, 39.0, 48.0, 41.0, 48.0, 41.0, 39.0, 35.0, 40.0, 34.0, 39.0, 26.0, 30.0, 30.0, 26.0, 29.0, 20.0, 17.0, 13.0, 10.0, 15.0, 12.0, 11.0, 6.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0], "bins": [-8.7734375, -8.5087890625, -8.244140625, -7.9794921875, -7.71484375, -7.4501953125, -7.185546875, -6.9208984375, -6.65625, -6.3916015625, -6.126953125, -5.8623046875, -5.59765625, -5.3330078125, -5.068359375, -4.8037109375, -4.5390625, -4.2744140625, -4.009765625, -3.7451171875, -3.48046875, -3.2158203125, -2.951171875, -2.6865234375, -2.421875, -2.1572265625, -1.892578125, -1.6279296875, -1.36328125, -1.0986328125, -0.833984375, -0.5693359375, -0.3046875, -0.0400390625, 0.224609375, 0.4892578125, 0.75390625, 1.0185546875, 1.283203125, 1.5478515625, 1.8125, 2.0771484375, 2.341796875, 2.6064453125, 2.87109375, 3.1357421875, 3.400390625, 3.6650390625, 3.9296875, 4.1943359375, 4.458984375, 4.7236328125, 4.98828125, 5.2529296875, 5.517578125, 5.7822265625, 6.046875, 6.3115234375, 6.576171875, 6.8408203125, 7.10546875, 7.3701171875, 7.634765625, 7.8994140625, 8.1640625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 10.0, 12.0, 21.0, 8.0, 13.0, 24.0, 27.0, 36.0, 52.0, 67.0, 104.0, 101.0, 130.0, 162.0, 223.0, 301.0, 353.0, 438.0, 606.0, 833.0, 1188.0, 1819.0, 3931.0, 13343.0, 61184.0, 267144.0, 484489.0, 159775.0, 35424.0, 8145.0, 2838.0, 1573.0, 998.0, 710.0, 513.0, 411.0, 345.0, 285.0, 201.0, 150.0, 133.0, 106.0, 63.0, 51.0, 42.0, 44.0, 28.0, 15.0, 21.0, 14.0, 13.0, 12.0, 11.0, 8.0, 3.0, 4.0, 3.0], "bins": [-17.359375, -16.83837890625, -16.3173828125, -15.79638671875, -15.275390625, -14.75439453125, -14.2333984375, -13.71240234375, -13.19140625, -12.67041015625, -12.1494140625, -11.62841796875, -11.107421875, -10.58642578125, -10.0654296875, -9.54443359375, -9.0234375, -8.50244140625, -7.9814453125, -7.46044921875, -6.939453125, -6.41845703125, -5.8974609375, -5.37646484375, -4.85546875, -4.33447265625, -3.8134765625, -3.29248046875, -2.771484375, -2.25048828125, -1.7294921875, -1.20849609375, -0.6875, -0.16650390625, 0.3544921875, 0.87548828125, 1.396484375, 1.91748046875, 2.4384765625, 2.95947265625, 3.48046875, 4.00146484375, 4.5224609375, 5.04345703125, 5.564453125, 6.08544921875, 6.6064453125, 7.12744140625, 7.6484375, 8.16943359375, 8.6904296875, 9.21142578125, 9.732421875, 10.25341796875, 10.7744140625, 11.29541015625, 11.81640625, 12.33740234375, 12.8583984375, 13.37939453125, 13.900390625, 14.42138671875, 14.9423828125, 15.46337890625, 15.984375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 6.0, 4.0, 5.0, 5.0, 7.0, 9.0, 11.0, 12.0, 10.0, 12.0, 17.0, 25.0, 21.0, 33.0, 35.0, 38.0, 45.0, 59.0, 84.0, 105.0, 200.0, 1403.0, 295.0, 144.0, 86.0, 50.0, 36.0, 41.0, 37.0, 35.0, 30.0, 18.0, 22.0, 22.0, 20.0, 15.0, 10.0, 12.0, 10.0, 7.0, 10.0, 5.0, 1.0, 2.0, 2.0, 2.0, 4.0], "bins": [-27.84375, -27.125244140625, -26.40673828125, -25.688232421875, -24.9697265625, -24.251220703125, -23.53271484375, -22.814208984375, -22.095703125, -21.377197265625, -20.65869140625, -19.940185546875, -19.2216796875, -18.503173828125, -17.78466796875, -17.066162109375, -16.34765625, -15.629150390625, -14.91064453125, -14.192138671875, -13.4736328125, -12.755126953125, -12.03662109375, -11.318115234375, -10.599609375, -9.881103515625, -9.16259765625, -8.444091796875, -7.7255859375, -7.007080078125, -6.28857421875, -5.570068359375, -4.8515625, -4.133056640625, -3.41455078125, -2.696044921875, -1.9775390625, -1.259033203125, -0.54052734375, 0.177978515625, 0.896484375, 1.614990234375, 2.33349609375, 3.052001953125, 3.7705078125, 4.489013671875, 5.20751953125, 5.926025390625, 6.64453125, 7.363037109375, 8.08154296875, 8.800048828125, 9.5185546875, 10.237060546875, 10.95556640625, 11.674072265625, 12.392578125, 13.111083984375, 13.82958984375, 14.548095703125, 15.2666015625, 15.985107421875, 16.70361328125, 17.422119140625, 18.140625]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 11.0, 7.0, 10.0, 9.0, 11.0, 12.0, 22.0, 14.0, 29.0, 28.0, 37.0, 58.0, 89.0, 118.0, 158.0, 198.0, 293.0, 597.0, 1167.0, 6187.0, 3126867.0, 6800.0, 1299.0, 545.0, 323.0, 195.0, 157.0, 126.0, 72.0, 63.0, 46.0, 36.0, 28.0, 13.0, 19.0, 9.0, 7.0, 8.0, 8.0, 7.0, 4.0, 5.0, 3.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-111.25, -107.828125, -104.40625, -100.984375, -97.5625, -94.140625, -90.71875, -87.296875, -83.875, -80.453125, -77.03125, -73.609375, -70.1875, -66.765625, -63.34375, -59.921875, -56.5, -53.078125, -49.65625, -46.234375, -42.8125, -39.390625, -35.96875, -32.546875, -29.125, -25.703125, -22.28125, -18.859375, -15.4375, -12.015625, -8.59375, -5.171875, -1.75, 1.671875, 5.09375, 8.515625, 11.9375, 15.359375, 18.78125, 22.203125, 25.625, 29.046875, 32.46875, 35.890625, 39.3125, 42.734375, 46.15625, 49.578125, 53.0, 56.421875, 59.84375, 63.265625, 66.6875, 70.109375, 73.53125, 76.953125, 80.375, 83.796875, 87.21875, 90.640625, 94.0625, 97.484375, 100.90625, 104.328125, 107.75]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 11.0, 51.0, 139.0, 250.0, 271.0, 186.0, 78.0, 17.0, 8.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-147.3551483154297, -143.85218811035156, -140.3492431640625, -136.84628295898438, -133.3433380126953, -129.8403778076172, -126.33743286132812, -122.83447265625, -119.33152770996094, -115.82857513427734, -112.32562255859375, -108.82266998291016, -105.31971740722656, -101.81676483154297, -98.31381225585938, -94.81085205078125, -91.30789947509766, -87.80494689941406, -84.30199432373047, -80.79904174804688, -77.29608917236328, -73.79313659667969, -70.29017639160156, -66.7872314453125, -63.28427505493164, -59.78132247924805, -56.27836990356445, -52.775413513183594, -49.2724609375, -45.769508361816406, -42.26655578613281, -38.76360321044922, -35.260650634765625, -31.75769805908203, -28.254745483398438, -24.75179100036621, -21.248838424682617, -17.745885848999023, -14.242931365966797, -10.739978790283203, -7.237026214599609, -3.7340731620788574, -0.23112010955810547, 3.2718334197998047, 6.774785995483398, 10.277738571166992, 13.780693054199219, 17.283645629882812, 20.786598205566406, 24.28955078125, 27.792503356933594, 31.29545783996582, 34.79840850830078, 38.301361083984375, 41.804317474365234, 45.30727005004883, 48.81022262573242, 52.313175201416016, 55.81612777709961, 59.31908416748047, 62.82203674316406, 66.32498931884766, 69.82794189453125, 73.33089447021484, 76.83384704589844]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 1.0, 2.0, 5.0, 9.0, 10.0, 14.0, 18.0, 19.0, 24.0, 14.0, 18.0, 34.0, 29.0, 41.0, 37.0, 37.0, 47.0, 48.0, 49.0, 47.0, 50.0, 39.0, 39.0, 55.0, 42.0, 38.0, 15.0, 35.0, 28.0, 25.0, 17.0, 22.0, 22.0, 9.0, 5.0, 8.0, 10.0, 9.0, 9.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-57.86651611328125, -55.76604080200195, -53.665565490722656, -51.56509017944336, -49.46461486816406, -47.36414337158203, -45.263668060302734, -43.16319274902344, -41.06271743774414, -38.962242126464844, -36.86176681518555, -34.76129150390625, -32.66082000732422, -30.56034278869629, -28.459869384765625, -26.359394073486328, -24.25891876220703, -22.158443450927734, -20.057968139648438, -17.957494735717773, -15.857019424438477, -13.75654411315918, -11.6560697555542, -9.555595397949219, -7.455120086669922, -5.354645252227783, -3.2541704177856445, -1.1536955833435059, 0.9467792510986328, 3.0472545623779297, 5.14772891998291, 7.248203277587891, 9.348678588867188, 11.449153900146484, 13.549628257751465, 15.650102615356445, 17.750577926635742, 19.85105323791504, 21.951526641845703, 24.052001953125, 26.152477264404297, 28.252952575683594, 30.35342788696289, 32.45390319824219, 34.55437469482422, 36.65485382080078, 38.75532531738281, 40.85580062866211, 42.956275939941406, 45.0567512512207, 47.1572265625, 49.2577018737793, 51.358177185058594, 53.458648681640625, 55.55912399291992, 57.65959930419922, 59.760074615478516, 61.86054992675781, 63.96102523803711, 66.0615005493164, 68.16197204589844, 70.262451171875, 72.36292266845703, 74.46339416503906, 76.56387329101562]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 3.0, 3.0, 7.0, 5.0, 6.0, 8.0, 6.0, 9.0, 10.0, 6.0, 9.0, 10.0, 12.0, 18.0, 13.0, 27.0, 27.0, 32.0, 28.0, 38.0, 30.0, 38.0, 43.0, 53.0, 36.0, 37.0, 41.0, 40.0, 37.0, 36.0, 30.0, 36.0, 25.0, 28.0, 28.0, 29.0, 28.0, 17.0, 18.0, 13.0, 11.0, 23.0, 10.0, 7.0, 5.0, 6.0, 2.0, 5.0, 7.0, 3.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0], "bins": [-8.8671875, -8.6043701171875, -8.341552734375, -8.0787353515625, -7.81591796875, -7.5531005859375, -7.290283203125, -7.0274658203125, -6.7646484375, -6.5018310546875, -6.239013671875, -5.9761962890625, -5.71337890625, -5.4505615234375, -5.187744140625, -4.9249267578125, -4.662109375, -4.3992919921875, -4.136474609375, -3.8736572265625, -3.61083984375, -3.3480224609375, -3.085205078125, -2.8223876953125, -2.5595703125, -2.2967529296875, -2.033935546875, -1.7711181640625, -1.50830078125, -1.2454833984375, -0.982666015625, -0.7198486328125, -0.45703125, -0.1942138671875, 0.068603515625, 0.3314208984375, 0.59423828125, 0.8570556640625, 1.119873046875, 1.3826904296875, 1.6455078125, 1.9083251953125, 2.171142578125, 2.4339599609375, 2.69677734375, 2.9595947265625, 3.222412109375, 3.4852294921875, 3.748046875, 4.0108642578125, 4.273681640625, 4.5364990234375, 4.79931640625, 5.0621337890625, 5.324951171875, 5.5877685546875, 5.8505859375, 6.1134033203125, 6.376220703125, 6.6390380859375, 6.90185546875, 7.1646728515625, 7.427490234375, 7.6903076171875, 7.953125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 5.0, 5.0, 6.0, 12.0, 21.0, 38.0, 64.0, 112.0, 225.0, 421.0, 982.0, 2540.0, 9752.0, 78476.0, 1185451.0, 2583370.0, 302907.0, 22939.0, 4408.0, 1363.0, 595.0, 272.0, 151.0, 71.0, 36.0, 23.0, 17.0, 3.0, 8.0, 6.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.96875, -19.235595703125, -18.50244140625, -17.769287109375, -17.0361328125, -16.302978515625, -15.56982421875, -14.836669921875, -14.103515625, -13.370361328125, -12.63720703125, -11.904052734375, -11.1708984375, -10.437744140625, -9.70458984375, -8.971435546875, -8.23828125, -7.505126953125, -6.77197265625, -6.038818359375, -5.3056640625, -4.572509765625, -3.83935546875, -3.106201171875, -2.373046875, -1.639892578125, -0.90673828125, -0.173583984375, 0.5595703125, 1.292724609375, 2.02587890625, 2.759033203125, 3.4921875, 4.225341796875, 4.95849609375, 5.691650390625, 6.4248046875, 7.157958984375, 7.89111328125, 8.624267578125, 9.357421875, 10.090576171875, 10.82373046875, 11.556884765625, 12.2900390625, 13.023193359375, 13.75634765625, 14.489501953125, 15.22265625, 15.955810546875, 16.68896484375, 17.422119140625, 18.1552734375, 18.888427734375, 19.62158203125, 20.354736328125, 21.087890625, 21.821044921875, 22.55419921875, 23.287353515625, 24.0205078125, 24.753662109375, 25.48681640625, 26.219970703125, 26.953125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 3.0, 8.0, 2.0, 7.0, 11.0, 16.0, 22.0, 29.0, 40.0, 50.0, 72.0, 92.0, 105.0, 138.0, 187.0, 284.0, 331.0, 485.0, 487.0, 411.0, 338.0, 217.0, 193.0, 141.0, 109.0, 71.0, 65.0, 44.0, 28.0, 23.0, 16.0, 19.0, 7.0, 10.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-16.3125, -15.80908203125, -15.3056640625, -14.80224609375, -14.298828125, -13.79541015625, -13.2919921875, -12.78857421875, -12.28515625, -11.78173828125, -11.2783203125, -10.77490234375, -10.271484375, -9.76806640625, -9.2646484375, -8.76123046875, -8.2578125, -7.75439453125, -7.2509765625, -6.74755859375, -6.244140625, -5.74072265625, -5.2373046875, -4.73388671875, -4.23046875, -3.72705078125, -3.2236328125, -2.72021484375, -2.216796875, -1.71337890625, -1.2099609375, -0.70654296875, -0.203125, 0.30029296875, 0.8037109375, 1.30712890625, 1.810546875, 2.31396484375, 2.8173828125, 3.32080078125, 3.82421875, 4.32763671875, 4.8310546875, 5.33447265625, 5.837890625, 6.34130859375, 6.8447265625, 7.34814453125, 7.8515625, 8.35498046875, 8.8583984375, 9.36181640625, 9.865234375, 10.36865234375, 10.8720703125, 11.37548828125, 11.87890625, 12.38232421875, 12.8857421875, 13.38916015625, 13.892578125, 14.39599609375, 14.8994140625, 15.40283203125, 15.90625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 1.0, 4.0, 5.0, 7.0, 10.0, 20.0, 21.0, 29.0, 32.0, 60.0, 78.0, 131.0, 176.0, 257.0, 405.0, 653.0, 1194.0, 2283.0, 6404.0, 41260.0, 509163.0, 3123225.0, 459833.0, 37578.0, 6117.0, 2252.0, 1112.0, 729.0, 419.0, 276.0, 140.0, 134.0, 81.0, 54.0, 43.0, 23.0, 22.0, 12.0, 13.0, 8.0, 8.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-25.328125, -24.54833984375, -23.7685546875, -22.98876953125, -22.208984375, -21.42919921875, -20.6494140625, -19.86962890625, -19.08984375, -18.31005859375, -17.5302734375, -16.75048828125, -15.970703125, -15.19091796875, -14.4111328125, -13.63134765625, -12.8515625, -12.07177734375, -11.2919921875, -10.51220703125, -9.732421875, -8.95263671875, -8.1728515625, -7.39306640625, -6.61328125, -5.83349609375, -5.0537109375, -4.27392578125, -3.494140625, -2.71435546875, -1.9345703125, -1.15478515625, -0.375, 0.40478515625, 1.1845703125, 1.96435546875, 2.744140625, 3.52392578125, 4.3037109375, 5.08349609375, 5.86328125, 6.64306640625, 7.4228515625, 8.20263671875, 8.982421875, 9.76220703125, 10.5419921875, 11.32177734375, 12.1015625, 12.88134765625, 13.6611328125, 14.44091796875, 15.220703125, 16.00048828125, 16.7802734375, 17.56005859375, 18.33984375, 19.11962890625, 19.8994140625, 20.67919921875, 21.458984375, 22.23876953125, 23.0185546875, 23.79833984375, 24.578125]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 58.0, 195.0, 344.0, 268.0, 111.0, 25.0, 6.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.53214263916016, -83.48588562011719, -77.43962097167969, -71.39336395263672, -65.34710693359375, -59.300846099853516, -53.25458526611328, -47.20832824707031, -41.16206741333008, -35.115806579589844, -29.069549560546875, -23.02328872680664, -16.97702980041504, -10.930770874023438, -4.884510040283203, 1.1617469787597656, 7.2080078125, 13.254266738891602, 19.300525665283203, 25.346786499023438, 31.39304542541504, 37.43930435180664, 43.485565185546875, 49.531822204589844, 55.57808303833008, 61.62434387207031, 67.67060089111328, 73.71685791015625, 79.76312255859375, 85.80937957763672, 91.85563659667969, 97.90190124511719, 103.94816589355469, 109.99442291259766, 116.04068756103516, 122.08694458007812, 128.13320922851562, 134.17945861816406, 140.22572326660156, 146.27197265625, 152.3182373046875, 158.364501953125, 164.41075134277344, 170.45701599121094, 176.50328063964844, 182.54953002929688, 188.59579467773438, 194.64205932617188, 200.68832397460938, 206.73458862304688, 212.7808380126953, 218.8271026611328, 224.8733673095703, 230.91961669921875, 236.96588134765625, 243.01214599609375, 249.0583953857422, 255.1046600341797, 261.1509094238281, 267.1971740722656, 273.2434387207031, 279.2897033691406, 285.3359375, 291.3822021484375, 297.428466796875]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 7.0, 7.0, 3.0, 4.0, 3.0, 12.0, 7.0, 10.0, 11.0, 16.0, 12.0, 15.0, 25.0, 27.0, 20.0, 16.0, 28.0, 29.0, 39.0, 25.0, 40.0, 49.0, 49.0, 40.0, 38.0, 52.0, 38.0, 44.0, 30.0, 37.0, 35.0, 31.0, 30.0, 24.0, 25.0, 21.0, 17.0, 11.0, 17.0, 19.0, 14.0, 5.0, 5.0, 9.0, 6.0, 3.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.56891632080078, -59.69052505493164, -57.8121337890625, -55.93374252319336, -54.05535125732422, -52.17695999145508, -50.29856872558594, -48.42017364501953, -46.541786193847656, -44.663394927978516, -42.785003662109375, -40.906612396240234, -39.028221130371094, -37.14982986450195, -35.27143859863281, -33.393043518066406, -31.514652252197266, -29.636260986328125, -27.757869720458984, -25.879478454589844, -24.001087188720703, -22.122695922851562, -20.24430274963379, -18.36591148376465, -16.487520217895508, -14.609128952026367, -12.730737686157227, -10.85234546661377, -8.973954200744629, -7.095562934875488, -5.217170715332031, -3.3387794494628906, -1.46038818359375, 0.4180033206939697, 2.2963948249816895, 4.174786567687988, 6.053177833557129, 7.9315690994262695, 9.809961318969727, 11.688352584838867, 13.566743850708008, 15.445135116577148, 17.32352638244629, 19.201919555664062, 21.080310821533203, 22.958702087402344, 24.837093353271484, 26.715484619140625, 28.593875885009766, 30.472267150878906, 32.35065841674805, 34.22904968261719, 36.10744094848633, 37.98583221435547, 39.864227294921875, 41.74261474609375, 43.621009826660156, 45.4994010925293, 47.37779235839844, 49.25618362426758, 51.13457489013672, 53.01296615600586, 54.891357421875, 56.769752502441406, 58.64813995361328]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 11.0, 4.0, 8.0, 11.0, 13.0, 12.0, 19.0, 11.0, 14.0, 19.0, 21.0, 28.0, 27.0, 30.0, 30.0, 36.0, 42.0, 45.0, 34.0, 31.0, 31.0, 44.0, 37.0, 34.0, 36.0, 30.0, 41.0, 25.0, 34.0, 31.0, 27.0, 23.0, 25.0, 25.0, 25.0, 20.0, 10.0, 7.0, 7.0, 6.0, 8.0, 5.0, 6.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.18359375, -5.98553466796875, -5.7874755859375, -5.58941650390625, -5.391357421875, -5.19329833984375, -4.9952392578125, -4.79718017578125, -4.59912109375, -4.40106201171875, -4.2030029296875, -4.00494384765625, -3.806884765625, -3.60882568359375, -3.4107666015625, -3.21270751953125, -3.0146484375, -2.81658935546875, -2.6185302734375, -2.42047119140625, -2.222412109375, -2.02435302734375, -1.8262939453125, -1.62823486328125, -1.43017578125, -1.23211669921875, -1.0340576171875, -0.83599853515625, -0.637939453125, -0.43988037109375, -0.2418212890625, -0.04376220703125, 0.154296875, 0.35235595703125, 0.5504150390625, 0.74847412109375, 0.946533203125, 1.14459228515625, 1.3426513671875, 1.54071044921875, 1.73876953125, 1.93682861328125, 2.1348876953125, 2.33294677734375, 2.531005859375, 2.72906494140625, 2.9271240234375, 3.12518310546875, 3.3232421875, 3.52130126953125, 3.7193603515625, 3.91741943359375, 4.115478515625, 4.31353759765625, 4.5115966796875, 4.70965576171875, 4.90771484375, 5.10577392578125, 5.3038330078125, 5.50189208984375, 5.699951171875, 5.89801025390625, 6.0960693359375, 6.29412841796875, 6.4921875]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 3.0, 8.0, 10.0, 13.0, 29.0, 37.0, 55.0, 80.0, 109.0, 179.0, 260.0, 380.0, 560.0, 840.0, 1293.0, 1916.0, 2795.0, 4474.0, 6692.0, 10641.0, 16623.0, 26038.0, 40787.0, 64357.0, 100521.0, 156352.0, 196660.0, 149996.0, 96402.0, 61561.0, 39152.0, 24912.0, 15766.0, 9932.0, 6509.0, 4316.0, 2786.0, 1839.0, 1248.0, 771.0, 544.0, 356.0, 250.0, 171.0, 120.0, 75.0, 52.0, 31.0, 19.0, 18.0, 9.0, 7.0, 4.0, 2.0, 4.0, 3.0], "bins": [-0.56298828125, -0.5466461181640625, -0.530303955078125, -0.5139617919921875, -0.49761962890625, -0.4812774658203125, -0.464935302734375, -0.4485931396484375, -0.4322509765625, -0.4159088134765625, -0.399566650390625, -0.3832244873046875, -0.36688232421875, -0.3505401611328125, -0.334197998046875, -0.3178558349609375, -0.301513671875, -0.2851715087890625, -0.268829345703125, -0.2524871826171875, -0.23614501953125, -0.2198028564453125, -0.203460693359375, -0.1871185302734375, -0.1707763671875, -0.1544342041015625, -0.138092041015625, -0.1217498779296875, -0.10540771484375, -0.0890655517578125, -0.072723388671875, -0.0563812255859375, -0.0400390625, -0.0236968994140625, -0.007354736328125, 0.0089874267578125, 0.02532958984375, 0.0416717529296875, 0.058013916015625, 0.0743560791015625, 0.0906982421875, 0.1070404052734375, 0.123382568359375, 0.1397247314453125, 0.15606689453125, 0.1724090576171875, 0.188751220703125, 0.2050933837890625, 0.221435546875, 0.2377777099609375, 0.254119873046875, 0.2704620361328125, 0.28680419921875, 0.3031463623046875, 0.319488525390625, 0.3358306884765625, 0.3521728515625, 0.3685150146484375, 0.384857177734375, 0.4011993408203125, 0.41754150390625, 0.4338836669921875, 0.450225830078125, 0.4665679931640625, 0.48291015625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 6.0, 4.0, 11.0, 7.0, 5.0, 12.0, 24.0, 19.0, 24.0, 28.0, 26.0, 24.0, 32.0, 28.0, 35.0, 47.0, 35.0, 37.0, 61.0, 1074.0, 41.0, 45.0, 39.0, 41.0, 30.0, 37.0, 31.0, 37.0, 36.0, 26.0, 8.0, 13.0, 22.0, 11.0, 16.0, 5.0, 10.0, 14.0, 4.0, 6.0, 9.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.25, -4.11053466796875, -3.9710693359375, -3.83160400390625, -3.692138671875, -3.55267333984375, -3.4132080078125, -3.27374267578125, -3.13427734375, -2.99481201171875, -2.8553466796875, -2.71588134765625, -2.576416015625, -2.43695068359375, -2.2974853515625, -2.15802001953125, -2.0185546875, -1.87908935546875, -1.7396240234375, -1.60015869140625, -1.460693359375, -1.32122802734375, -1.1817626953125, -1.04229736328125, -0.90283203125, -0.76336669921875, -0.6239013671875, -0.48443603515625, -0.344970703125, -0.20550537109375, -0.0660400390625, 0.07342529296875, 0.212890625, 0.35235595703125, 0.4918212890625, 0.63128662109375, 0.770751953125, 0.91021728515625, 1.0496826171875, 1.18914794921875, 1.32861328125, 1.46807861328125, 1.6075439453125, 1.74700927734375, 1.886474609375, 2.02593994140625, 2.1654052734375, 2.30487060546875, 2.4443359375, 2.58380126953125, 2.7232666015625, 2.86273193359375, 3.002197265625, 3.14166259765625, 3.2811279296875, 3.42059326171875, 3.56005859375, 3.69952392578125, 3.8389892578125, 3.97845458984375, 4.117919921875, 4.25738525390625, 4.3968505859375, 4.53631591796875, 4.67578125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 4.0, 7.0, 7.0, 14.0, 22.0, 26.0, 47.0, 67.0, 91.0, 151.0, 188.0, 279.0, 434.0, 617.0, 880.0, 1399.0, 1990.0, 2922.0, 4452.0, 6413.0, 9420.0, 14091.0, 21004.0, 31705.0, 48097.0, 72919.0, 110752.0, 157423.0, 1219555.0, 131620.0, 88109.0, 57745.0, 37732.0, 25343.0, 16898.0, 11207.0, 7770.0, 5167.0, 3405.0, 2399.0, 1575.0, 1019.0, 716.0, 463.0, 334.0, 235.0, 136.0, 101.0, 73.0, 42.0, 29.0, 17.0, 16.0, 7.0, 8.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.418701171875, -0.4052886962890625, -0.391876220703125, -0.3784637451171875, -0.36505126953125, -0.3516387939453125, -0.338226318359375, -0.3248138427734375, -0.3114013671875, -0.2979888916015625, -0.284576416015625, -0.2711639404296875, -0.25775146484375, -0.2443389892578125, -0.230926513671875, -0.2175140380859375, -0.2041015625, -0.1906890869140625, -0.177276611328125, -0.1638641357421875, -0.15045166015625, -0.1370391845703125, -0.123626708984375, -0.1102142333984375, -0.0968017578125, -0.0833892822265625, -0.069976806640625, -0.0565643310546875, -0.04315185546875, -0.0297393798828125, -0.016326904296875, -0.0029144287109375, 0.010498046875, 0.0239105224609375, 0.037322998046875, 0.0507354736328125, 0.06414794921875, 0.0775604248046875, 0.090972900390625, 0.1043853759765625, 0.1177978515625, 0.1312103271484375, 0.144622802734375, 0.1580352783203125, 0.17144775390625, 0.1848602294921875, 0.198272705078125, 0.2116851806640625, 0.22509765625, 0.2385101318359375, 0.251922607421875, 0.2653350830078125, 0.27874755859375, 0.2921600341796875, 0.305572509765625, 0.3189849853515625, 0.3323974609375, 0.3458099365234375, 0.359222412109375, 0.3726348876953125, 0.38604736328125, 0.3994598388671875, 0.412872314453125, 0.4262847900390625, 0.439697265625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 0.0, 6.0, 6.0, 3.0, 8.0, 2.0, 15.0, 14.0, 14.0, 11.0, 23.0, 18.0, 34.0, 29.0, 22.0, 34.0, 35.0, 35.0, 37.0, 39.0, 52.0, 39.0, 53.0, 36.0, 30.0, 43.0, 44.0, 30.0, 33.0, 34.0, 37.0, 25.0, 20.0, 17.0, 25.0, 23.0, 13.0, 9.0, 14.0, 13.0, 8.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0009455680847167969, -0.0009166374802589417, -0.0008877068758010864, -0.0008587762713432312, -0.000829845666885376, -0.0008009150624275208, -0.0007719844579696655, -0.0007430538535118103, -0.0007141232490539551, -0.0006851926445960999, -0.0006562620401382446, -0.0006273314356803894, -0.0005984008312225342, -0.000569470226764679, -0.0005405396223068237, -0.0005116090178489685, -0.0004826784133911133, -0.00045374780893325806, -0.00042481720447540283, -0.0003958866000175476, -0.0003669559955596924, -0.00033802539110183716, -0.00030909478664398193, -0.0002801641821861267, -0.0002512335777282715, -0.00022230297327041626, -0.00019337236881256104, -0.0001644417643547058, -0.00013551115989685059, -0.00010658055543899536, -7.764995098114014e-05, -4.871934652328491e-05, -1.9788742065429688e-05, 9.141862392425537e-06, 3.807246685028076e-05, 6.700307130813599e-05, 9.593367576599121e-05, 0.00012486428022384644, 0.00015379488468170166, 0.00018272548913955688, 0.0002116560935974121, 0.00024058669805526733, 0.00026951730251312256, 0.0002984479069709778, 0.000327378511428833, 0.00035630911588668823, 0.00038523972034454346, 0.0004141703248023987, 0.0004431009292602539, 0.00047203153371810913, 0.0005009621381759644, 0.0005298927426338196, 0.0005588233470916748, 0.00058775395154953, 0.0006166845560073853, 0.0006456151604652405, 0.0006745457649230957, 0.0007034763693809509, 0.0007324069738388062, 0.0007613375782966614, 0.0007902681827545166, 0.0008191987872123718, 0.000848129391670227, 0.0008770599961280823, 0.0009059906005859375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 7.0, 9.0, 10.0, 12.0, 20.0, 19.0, 21.0, 26.0, 43.0, 48.0, 40.0, 58.0, 73.0, 95.0, 150.0, 199.0, 339.0, 469.0, 809.0, 3201.0, 374313.0, 660854.0, 5094.0, 908.0, 503.0, 317.0, 228.0, 136.0, 119.0, 92.0, 74.0, 61.0, 37.0, 36.0, 28.0, 22.0, 19.0, 20.0, 8.0, 9.0, 8.0, 5.0, 7.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.019439697265625, -0.01885199546813965, -0.018264293670654297, -0.017676591873168945, -0.017088890075683594, -0.016501188278198242, -0.01591348648071289, -0.015325784683227539, -0.014738082885742188, -0.014150381088256836, -0.013562679290771484, -0.012974977493286133, -0.012387275695800781, -0.01179957389831543, -0.011211872100830078, -0.010624170303344727, -0.010036468505859375, -0.009448766708374023, -0.008861064910888672, -0.00827336311340332, -0.007685661315917969, -0.007097959518432617, -0.006510257720947266, -0.005922555923461914, -0.0053348541259765625, -0.004747152328491211, -0.004159450531005859, -0.003571748733520508, -0.0029840469360351562, -0.0023963451385498047, -0.0018086433410644531, -0.0012209415435791016, -0.00063323974609375, -4.553794860839844e-05, 0.0005421638488769531, 0.0011298656463623047, 0.0017175674438476562, 0.002305269241333008, 0.0028929710388183594, 0.003480672836303711, 0.0040683746337890625, 0.004656076431274414, 0.005243778228759766, 0.005831480026245117, 0.006419181823730469, 0.00700688362121582, 0.007594585418701172, 0.008182287216186523, 0.008769989013671875, 0.009357690811157227, 0.009945392608642578, 0.01053309440612793, 0.011120796203613281, 0.011708498001098633, 0.012296199798583984, 0.012883901596069336, 0.013471603393554688, 0.014059305191040039, 0.01464700698852539, 0.015234708786010742, 0.015822410583496094, 0.016410112380981445, 0.016997814178466797, 0.01758551597595215, 0.0181732177734375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 44.0, 737.0, 235.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006766138132661581, -0.006522041745483875, -0.006277945823967457, -0.006033849436789751, -0.005789753515273333, -0.005545657128095627, -0.005301561206579208, -0.005057464819401503, -0.004813368432223797, -0.004569272045046091, -0.004325176123529673, -0.004081079736351967, -0.0038369838148355484, -0.0035928874276578426, -0.0033487912733107805, -0.0031046951189637184, -0.0028605991974473, -0.002616503043100238, -0.0023724068887531757, -0.00212831050157547, -0.0018842144636437297, -0.0016401183092966676, -0.0013960220385342836, -0.0011519258841872215, -0.0009078297298401594, -0.0006637335754930973, -0.0004196373629383743, -0.00017554115038365126, 6.855500396341085e-05, 0.00031265115831047297, 0.0005567474290728569, 0.000800843583419919, 0.0010449392721056938, 0.001289035426452756, 0.001533131580799818, 0.001777227851562202, 0.0020213238894939423, 0.002265420276671648, 0.00250951643101871, 0.0027536125853657722, 0.0029977087397128344, 0.0032418048940598965, 0.0034859010484069586, 0.0037299972027540207, 0.0039740935899317265, 0.004218189511448145, 0.004462285898625851, 0.004706381820142269, 0.004950478207319975, 0.005194574594497681, 0.005438670516014099, 0.005682766903191805, 0.005926862824708223, 0.006170959211885929, 0.0064150551334023476, 0.006659151520580053, 0.006903247907757759, 0.007147344294935465, 0.007391440216451883, 0.007635536603629589, 0.007879632525146008, 0.008123728446662426, 0.008367825299501419, 0.008611921221017838, 0.008856017142534256]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 4.0, 6.0, 8.0, 9.0, 4.0, 6.0, 6.0, 14.0, 10.0, 9.0, 16.0, 12.0, 21.0, 16.0, 26.0, 24.0, 34.0, 32.0, 29.0, 26.0, 30.0, 35.0, 36.0, 39.0, 32.0, 29.0, 40.0, 33.0, 35.0, 33.0, 37.0, 26.0, 30.0, 18.0, 28.0, 31.0, 24.0, 19.0, 17.0, 21.0, 12.0, 12.0, 14.0, 10.0, 13.0, 9.0, 4.0, 6.0, 7.0, 6.0, 7.0, 1.0, 5.0, 1.0, 0.0, 3.0], "bins": [-0.0005356669425964355, -0.0005198512226343155, -0.0005040355026721954, -0.0004882197827100754, -0.0004724040627479553, -0.00045658834278583527, -0.0004407726228237152, -0.00042495690286159515, -0.0004091411828994751, -0.00039332546293735504, -0.000377509742975235, -0.00036169402301311493, -0.0003458783030509949, -0.0003300625830888748, -0.00031424686312675476, -0.0002984311431646347, -0.00028261542320251465, -0.0002667997032403946, -0.00025098398327827454, -0.00023516826331615448, -0.00021935254335403442, -0.00020353682339191437, -0.0001877211034297943, -0.00017190538346767426, -0.0001560896635055542, -0.00014027394354343414, -0.0001244582235813141, -0.00010864250361919403, -9.282678365707397e-05, -7.701106369495392e-05, -6.119534373283386e-05, -4.5379623770713806e-05, -2.956390380859375e-05, -1.3748183846473694e-05, 2.0675361156463623e-06, 1.788325607776642e-05, 3.3698976039886475e-05, 4.951469600200653e-05, 6.533041596412659e-05, 8.114613592624664e-05, 9.69618558883667e-05, 0.00011277757585048676, 0.0001285932958126068, 0.00014440901577472687, 0.00016022473573684692, 0.00017604045569896698, 0.00019185617566108704, 0.0002076718956232071, 0.00022348761558532715, 0.0002393033355474472, 0.00025511905550956726, 0.0002709347754716873, 0.0002867504954338074, 0.00030256621539592743, 0.0003183819353580475, 0.00033419765532016754, 0.0003500133752822876, 0.00036582909524440765, 0.0003816448152065277, 0.00039746053516864777, 0.0004132762551307678, 0.0004290919750928879, 0.00044490769505500793, 0.000460723415017128, 0.00047653913497924805]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 11.0, 4.0, 8.0, 11.0, 13.0, 12.0, 19.0, 11.0, 14.0, 19.0, 21.0, 28.0, 27.0, 30.0, 30.0, 36.0, 42.0, 45.0, 34.0, 31.0, 31.0, 44.0, 37.0, 34.0, 36.0, 30.0, 41.0, 25.0, 33.0, 32.0, 27.0, 23.0, 25.0, 25.0, 25.0, 20.0, 10.0, 7.0, 7.0, 6.0, 8.0, 5.0, 6.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.18359375, -5.98553466796875, -5.7874755859375, -5.58941650390625, -5.391357421875, -5.19329833984375, -4.9952392578125, -4.79718017578125, -4.59912109375, -4.40106201171875, -4.2030029296875, -4.00494384765625, -3.806884765625, -3.60882568359375, -3.4107666015625, -3.21270751953125, -3.0146484375, -2.81658935546875, -2.6185302734375, -2.42047119140625, -2.222412109375, -2.02435302734375, -1.8262939453125, -1.62823486328125, -1.43017578125, -1.23211669921875, -1.0340576171875, -0.83599853515625, -0.637939453125, -0.43988037109375, -0.2418212890625, -0.04376220703125, 0.154296875, 0.35235595703125, 0.5504150390625, 0.74847412109375, 0.946533203125, 1.14459228515625, 1.3426513671875, 1.54071044921875, 1.73876953125, 1.93682861328125, 2.1348876953125, 2.33294677734375, 2.531005859375, 2.72906494140625, 2.9271240234375, 3.12518310546875, 3.3232421875, 3.52130126953125, 3.7193603515625, 3.91741943359375, 4.115478515625, 4.31353759765625, 4.5115966796875, 4.70965576171875, 4.90771484375, 5.10577392578125, 5.3038330078125, 5.50189208984375, 5.699951171875, 5.89801025390625, 6.0960693359375, 6.29412841796875, 6.4921875]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 7.0, 6.0, 6.0, 13.0, 9.0, 19.0, 24.0, 36.0, 47.0, 50.0, 68.0, 78.0, 120.0, 141.0, 185.0, 236.0, 307.0, 377.0, 522.0, 692.0, 883.0, 1114.0, 1417.0, 2095.0, 3114.0, 5982.0, 20127.0, 109253.0, 579720.0, 255513.0, 42727.0, 9911.0, 3974.0, 2446.0, 1749.0, 1313.0, 999.0, 692.0, 588.0, 467.0, 346.0, 279.0, 219.0, 152.0, 123.0, 99.0, 79.0, 56.0, 50.0, 38.0, 28.0, 19.0, 17.0, 9.0, 11.0, 6.0, 5.0, 2.0, 1.0, 2.0, 4.0], "bins": [-15.3984375, -14.9132080078125, -14.427978515625, -13.9427490234375, -13.45751953125, -12.9722900390625, -12.487060546875, -12.0018310546875, -11.5166015625, -11.0313720703125, -10.546142578125, -10.0609130859375, -9.57568359375, -9.0904541015625, -8.605224609375, -8.1199951171875, -7.634765625, -7.1495361328125, -6.664306640625, -6.1790771484375, -5.69384765625, -5.2086181640625, -4.723388671875, -4.2381591796875, -3.7529296875, -3.2677001953125, -2.782470703125, -2.2972412109375, -1.81201171875, -1.3267822265625, -0.841552734375, -0.3563232421875, 0.12890625, 0.6141357421875, 1.099365234375, 1.5845947265625, 2.06982421875, 2.5550537109375, 3.040283203125, 3.5255126953125, 4.0107421875, 4.4959716796875, 4.981201171875, 5.4664306640625, 5.95166015625, 6.4368896484375, 6.922119140625, 7.4073486328125, 7.892578125, 8.3778076171875, 8.863037109375, 9.3482666015625, 9.83349609375, 10.3187255859375, 10.803955078125, 11.2891845703125, 11.7744140625, 12.2596435546875, 12.744873046875, 13.2301025390625, 13.71533203125, 14.2005615234375, 14.685791015625, 15.1710205078125, 15.65625]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 5.0, 5.0, 7.0, 4.0, 9.0, 14.0, 9.0, 8.0, 17.0, 15.0, 16.0, 21.0, 22.0, 32.0, 23.0, 43.0, 44.0, 55.0, 81.0, 138.0, 217.0, 1417.0, 220.0, 164.0, 85.0, 63.0, 46.0, 33.0, 30.0, 26.0, 21.0, 27.0, 14.0, 20.0, 15.0, 16.0, 11.0, 7.0, 9.0, 10.0, 6.0, 2.0, 6.0, 7.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0], "bins": [-19.46875, -18.905029296875, -18.34130859375, -17.777587890625, -17.2138671875, -16.650146484375, -16.08642578125, -15.522705078125, -14.958984375, -14.395263671875, -13.83154296875, -13.267822265625, -12.7041015625, -12.140380859375, -11.57666015625, -11.012939453125, -10.44921875, -9.885498046875, -9.32177734375, -8.758056640625, -8.1943359375, -7.630615234375, -7.06689453125, -6.503173828125, -5.939453125, -5.375732421875, -4.81201171875, -4.248291015625, -3.6845703125, -3.120849609375, -2.55712890625, -1.993408203125, -1.4296875, -0.865966796875, -0.30224609375, 0.261474609375, 0.8251953125, 1.388916015625, 1.95263671875, 2.516357421875, 3.080078125, 3.643798828125, 4.20751953125, 4.771240234375, 5.3349609375, 5.898681640625, 6.46240234375, 7.026123046875, 7.58984375, 8.153564453125, 8.71728515625, 9.281005859375, 9.8447265625, 10.408447265625, 10.97216796875, 11.535888671875, 12.099609375, 12.663330078125, 13.22705078125, 13.790771484375, 14.3544921875, 14.918212890625, 15.48193359375, 16.045654296875, 16.609375]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 5.0, 1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 7.0, 8.0, 12.0, 9.0, 16.0, 16.0, 21.0, 28.0, 30.0, 49.0, 51.0, 68.0, 95.0, 154.0, 210.0, 370.0, 708.0, 1535.0, 5277.0, 3071567.0, 60447.0, 2543.0, 1038.0, 494.0, 282.0, 177.0, 112.0, 82.0, 71.0, 48.0, 30.0, 29.0, 18.0, 19.0, 17.0, 18.0, 13.0, 9.0, 6.0, 7.0, 2.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.875, -65.4306640625, -62.986328125, -60.5419921875, -58.09765625, -55.6533203125, -53.208984375, -50.7646484375, -48.3203125, -45.8759765625, -43.431640625, -40.9873046875, -38.54296875, -36.0986328125, -33.654296875, -31.2099609375, -28.765625, -26.3212890625, -23.876953125, -21.4326171875, -18.98828125, -16.5439453125, -14.099609375, -11.6552734375, -9.2109375, -6.7666015625, -4.322265625, -1.8779296875, 0.56640625, 3.0107421875, 5.455078125, 7.8994140625, 10.34375, 12.7880859375, 15.232421875, 17.6767578125, 20.12109375, 22.5654296875, 25.009765625, 27.4541015625, 29.8984375, 32.3427734375, 34.787109375, 37.2314453125, 39.67578125, 42.1201171875, 44.564453125, 47.0087890625, 49.453125, 51.8974609375, 54.341796875, 56.7861328125, 59.23046875, 61.6748046875, 64.119140625, 66.5634765625, 69.0078125, 71.4521484375, 73.896484375, 76.3408203125, 78.78515625, 81.2294921875, 83.673828125, 86.1181640625, 88.5625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 16.0, 33.0, 77.0, 178.0, 218.0, 232.0, 150.0, 71.0, 23.0, 10.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.3274383544922, -155.07431030273438, -151.82119750976562, -148.5680694580078, -145.31495666503906, -142.06182861328125, -138.8087158203125, -135.5555877685547, -132.30247497558594, -129.04934692382812, -125.79623413085938, -122.5431137084961, -119.28999328613281, -116.03687286376953, -112.78375244140625, -109.53062438964844, -106.27750396728516, -103.02438354492188, -99.7712631225586, -96.51814270019531, -93.26502227783203, -90.01190185546875, -86.75877380371094, -83.50566101074219, -80.25253295898438, -76.9994125366211, -73.74629211425781, -70.49317169189453, -67.24005126953125, -63.98693084716797, -60.73380661010742, -57.48068618774414, -54.227569580078125, -50.974449157714844, -47.72132873535156, -44.46820831298828, -41.215087890625, -37.96196746826172, -34.70884323120117, -31.45572280883789, -28.20260238647461, -24.949481964111328, -21.696361541748047, -18.443239212036133, -15.190118789672852, -11.93699836730957, -8.683876037597656, -5.430755615234375, -2.1776351928710938, 1.0754857063293457, 4.328606605529785, 7.581727981567383, 10.834848403930664, 14.087968826293945, 17.34109115600586, 20.59421157836914, 23.847332000732422, 27.100452423095703, 30.353572845458984, 33.60669708251953, 36.85981750488281, 40.112937927246094, 43.366058349609375, 46.619178771972656, 49.87229919433594]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 6.0, 4.0, 0.0, 3.0, 8.0, 8.0, 9.0, 12.0, 5.0, 13.0, 17.0, 22.0, 24.0, 31.0, 27.0, 35.0, 34.0, 35.0, 31.0, 29.0, 52.0, 41.0, 59.0, 47.0, 40.0, 36.0, 33.0, 45.0, 27.0, 37.0, 25.0, 39.0, 22.0, 16.0, 24.0, 14.0, 24.0, 15.0, 12.0, 12.0, 13.0, 7.0, 4.0, 2.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.20210266113281, -52.51921081542969, -50.83631896972656, -49.15342712402344, -47.47053909301758, -45.78764724731445, -44.10475540161133, -42.4218635559082, -40.73897171020508, -39.05607986450195, -37.37318801879883, -35.69029998779297, -34.007408142089844, -32.32451629638672, -30.641624450683594, -28.95873260498047, -27.275842666625977, -25.59295082092285, -23.91006088256836, -22.227169036865234, -20.54427719116211, -18.861385345458984, -17.178495407104492, -15.495603561401367, -13.812712669372559, -12.12982177734375, -10.446929931640625, -8.764039039611816, -7.08114767074585, -5.398256301879883, -3.715365409851074, -2.032473564147949, -0.3495826721191406, 1.3333085775375366, 3.016199827194214, 4.699090957641602, 6.381982326507568, 8.064873695373535, 9.747764587402344, 11.430656433105469, 13.113547325134277, 14.796438217163086, 16.47933006286621, 18.162220001220703, 19.845111846923828, 21.528003692626953, 23.210895538330078, 24.893787384033203, 26.576677322387695, 28.25956916809082, 29.942459106445312, 31.625350952148438, 33.30824279785156, 34.99113464355469, 36.67402648925781, 38.35691833496094, 40.0398063659668, 41.72269821166992, 43.40559005737305, 45.088478088378906, 46.77136993408203, 48.454261779785156, 50.13715362548828, 51.820045471191406, 53.50293731689453]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 1.0, 4.0, 4.0, 8.0, 7.0, 7.0, 11.0, 17.0, 13.0, 12.0, 23.0, 15.0, 20.0, 24.0, 31.0, 24.0, 45.0, 34.0, 48.0, 43.0, 48.0, 44.0, 40.0, 51.0, 34.0, 43.0, 34.0, 31.0, 44.0, 27.0, 26.0, 30.0, 30.0, 19.0, 16.0, 9.0, 10.0, 14.0, 16.0, 10.0, 9.0, 6.0, 4.0, 5.0, 3.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-7.59375, -7.3668212890625, -7.139892578125, -6.9129638671875, -6.68603515625, -6.4591064453125, -6.232177734375, -6.0052490234375, -5.7783203125, -5.5513916015625, -5.324462890625, -5.0975341796875, -4.87060546875, -4.6436767578125, -4.416748046875, -4.1898193359375, -3.962890625, -3.7359619140625, -3.509033203125, -3.2821044921875, -3.05517578125, -2.8282470703125, -2.601318359375, -2.3743896484375, -2.1474609375, -1.9205322265625, -1.693603515625, -1.4666748046875, -1.23974609375, -1.0128173828125, -0.785888671875, -0.5589599609375, -0.33203125, -0.1051025390625, 0.121826171875, 0.3487548828125, 0.57568359375, 0.8026123046875, 1.029541015625, 1.2564697265625, 1.4833984375, 1.7103271484375, 1.937255859375, 2.1641845703125, 2.39111328125, 2.6180419921875, 2.844970703125, 3.0718994140625, 3.298828125, 3.5257568359375, 3.752685546875, 3.9796142578125, 4.20654296875, 4.4334716796875, 4.660400390625, 4.8873291015625, 5.1142578125, 5.3411865234375, 5.568115234375, 5.7950439453125, 6.02197265625, 6.2489013671875, 6.475830078125, 6.7027587890625, 6.9296875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 15.0, 21.0, 32.0, 41.0, 73.0, 141.0, 233.0, 500.0, 1093.0, 2816.0, 9157.0, 51339.0, 700770.0, 2783335.0, 587718.0, 44147.0, 8206.0, 2580.0, 1023.0, 453.0, 249.0, 119.0, 78.0, 41.0, 31.0, 31.0, 8.0, 6.0, 5.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.4375, -15.879638671875, -15.32177734375, -14.763916015625, -14.2060546875, -13.648193359375, -13.09033203125, -12.532470703125, -11.974609375, -11.416748046875, -10.85888671875, -10.301025390625, -9.7431640625, -9.185302734375, -8.62744140625, -8.069580078125, -7.51171875, -6.953857421875, -6.39599609375, -5.838134765625, -5.2802734375, -4.722412109375, -4.16455078125, -3.606689453125, -3.048828125, -2.490966796875, -1.93310546875, -1.375244140625, -0.8173828125, -0.259521484375, 0.29833984375, 0.856201171875, 1.4140625, 1.971923828125, 2.52978515625, 3.087646484375, 3.6455078125, 4.203369140625, 4.76123046875, 5.319091796875, 5.876953125, 6.434814453125, 6.99267578125, 7.550537109375, 8.1083984375, 8.666259765625, 9.22412109375, 9.781982421875, 10.33984375, 10.897705078125, 11.45556640625, 12.013427734375, 12.5712890625, 13.129150390625, 13.68701171875, 14.244873046875, 14.802734375, 15.360595703125, 15.91845703125, 16.476318359375, 17.0341796875, 17.592041015625, 18.14990234375, 18.707763671875, 19.265625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 6.0, 26.0, 95.0, 340.0, 948.0, 1662.0, 713.0, 216.0, 60.0, 13.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.0, -59.41064453125, -57.8212890625, -56.23193359375, -54.642578125, -53.05322265625, -51.4638671875, -49.87451171875, -48.28515625, -46.69580078125, -45.1064453125, -43.51708984375, -41.927734375, -40.33837890625, -38.7490234375, -37.15966796875, -35.5703125, -33.98095703125, -32.3916015625, -30.80224609375, -29.212890625, -27.62353515625, -26.0341796875, -24.44482421875, -22.85546875, -21.26611328125, -19.6767578125, -18.08740234375, -16.498046875, -14.90869140625, -13.3193359375, -11.72998046875, -10.140625, -8.55126953125, -6.9619140625, -5.37255859375, -3.783203125, -2.19384765625, -0.6044921875, 0.98486328125, 2.57421875, 4.16357421875, 5.7529296875, 7.34228515625, 8.931640625, 10.52099609375, 12.1103515625, 13.69970703125, 15.2890625, 16.87841796875, 18.4677734375, 20.05712890625, 21.646484375, 23.23583984375, 24.8251953125, 26.41455078125, 28.00390625, 29.59326171875, 31.1826171875, 32.77197265625, 34.361328125, 35.95068359375, 37.5400390625, 39.12939453125, 40.71875]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 13.0, 18.0, 78.0, 350.0, 2839.0, 4106209.0, 83254.0, 1249.0, 192.0, 58.0, 11.0, 6.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.625, -87.11328125, -83.6015625, -80.08984375, -76.578125, -73.06640625, -69.5546875, -66.04296875, -62.53125, -59.01953125, -55.5078125, -51.99609375, -48.484375, -44.97265625, -41.4609375, -37.94921875, -34.4375, -30.92578125, -27.4140625, -23.90234375, -20.390625, -16.87890625, -13.3671875, -9.85546875, -6.34375, -2.83203125, 0.6796875, 4.19140625, 7.703125, 11.21484375, 14.7265625, 18.23828125, 21.75, 25.26171875, 28.7734375, 32.28515625, 35.796875, 39.30859375, 42.8203125, 46.33203125, 49.84375, 53.35546875, 56.8671875, 60.37890625, 63.890625, 67.40234375, 70.9140625, 74.42578125, 77.9375, 81.44921875, 84.9609375, 88.47265625, 91.984375, 95.49609375, 99.0078125, 102.51953125, 106.03125, 109.54296875, 113.0546875, 116.56640625, 120.078125, 123.58984375, 127.1015625, 130.61328125, 134.125]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 46.0, 461.0, 457.0, 45.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-289.13006591796875, -279.4825134277344, -269.8349609375, -260.1874084472656, -250.53985595703125, -240.89230346679688, -231.2447509765625, -221.59719848632812, -211.94964599609375, -202.30209350585938, -192.654541015625, -183.00698852539062, -173.35943603515625, -163.71188354492188, -154.0643310546875, -144.41677856445312, -134.7692413330078, -125.12168884277344, -115.47413635253906, -105.82658386230469, -96.17903137207031, -86.53147888183594, -76.8839340209961, -67.23638153076172, -57.588829040527344, -47.94127655029297, -38.293724060058594, -28.646175384521484, -18.99862289428711, -9.351070404052734, 0.296478271484375, 9.94403076171875, 19.591583251953125, 29.2391357421875, 38.886688232421875, 48.534236907958984, 58.18178939819336, 67.829345703125, 77.47689056396484, 87.12444305419922, 96.7719955444336, 106.41954803466797, 116.06710052490234, 125.71464538574219, 135.36219787597656, 145.00975036621094, 154.6573028564453, 164.3048553466797, 173.95240783691406, 183.59996032714844, 193.2475128173828, 202.8950653076172, 212.54261779785156, 222.19017028808594, 231.83770751953125, 241.48526000976562, 251.1328125, 260.7803649902344, 270.42791748046875, 280.0754699707031, 289.7230224609375, 299.3705749511719, 309.01812744140625, 318.6656799316406, 328.313232421875]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 3.0, 8.0, 10.0, 10.0, 18.0, 16.0, 11.0, 26.0, 19.0, 32.0, 31.0, 39.0, 22.0, 45.0, 47.0, 35.0, 45.0, 51.0, 45.0, 37.0, 39.0, 41.0, 40.0, 50.0, 35.0, 27.0, 41.0, 30.0, 26.0, 24.0, 25.0, 12.0, 19.0, 9.0, 11.0, 9.0, 2.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.79084396362305, -45.97328567504883, -44.15572738647461, -42.33816909790039, -40.52061080932617, -38.70305252075195, -36.885494232177734, -35.067935943603516, -33.2503776550293, -31.432819366455078, -29.61526107788086, -27.79770278930664, -25.980144500732422, -24.162586212158203, -22.345027923583984, -20.527469635009766, -18.709911346435547, -16.892353057861328, -15.07479476928711, -13.25723648071289, -11.439678192138672, -9.622119903564453, -7.804561614990234, -5.987003326416016, -4.169445037841797, -2.351886749267578, -0.5343284606933594, 1.2832298278808594, 3.100788116455078, 4.918346405029297, 6.735904693603516, 8.553462982177734, 10.371017456054688, 12.188575744628906, 14.006134033203125, 15.823692321777344, 17.641250610351562, 19.45880889892578, 21.2763671875, 23.09392547607422, 24.911483764648438, 26.729042053222656, 28.546600341796875, 30.364158630371094, 32.18171691894531, 33.99927520751953, 35.81683349609375, 37.63439178466797, 39.45195007324219, 41.269508361816406, 43.087066650390625, 44.904624938964844, 46.72218322753906, 48.53974151611328, 50.3572998046875, 52.17485809326172, 53.99241638183594, 55.809974670410156, 57.627532958984375, 59.445091247558594, 61.26264953613281, 63.08020782470703, 64.89776611328125, 66.71532440185547, 68.53288269042969]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 7.0, 2.0, 2.0, 7.0, 5.0, 2.0, 10.0, 11.0, 16.0, 11.0, 21.0, 20.0, 33.0, 25.0, 31.0, 31.0, 29.0, 49.0, 54.0, 50.0, 43.0, 56.0, 44.0, 39.0, 47.0, 51.0, 48.0, 33.0, 37.0, 30.0, 25.0, 27.0, 14.0, 16.0, 21.0, 6.0, 15.0, 10.0, 10.0, 6.0, 3.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5234375, -5.325439453125, -5.12744140625, -4.929443359375, -4.7314453125, -4.533447265625, -4.33544921875, -4.137451171875, -3.939453125, -3.741455078125, -3.54345703125, -3.345458984375, -3.1474609375, -2.949462890625, -2.75146484375, -2.553466796875, -2.35546875, -2.157470703125, -1.95947265625, -1.761474609375, -1.5634765625, -1.365478515625, -1.16748046875, -0.969482421875, -0.771484375, -0.573486328125, -0.37548828125, -0.177490234375, 0.0205078125, 0.218505859375, 0.41650390625, 0.614501953125, 0.8125, 1.010498046875, 1.20849609375, 1.406494140625, 1.6044921875, 1.802490234375, 2.00048828125, 2.198486328125, 2.396484375, 2.594482421875, 2.79248046875, 2.990478515625, 3.1884765625, 3.386474609375, 3.58447265625, 3.782470703125, 3.98046875, 4.178466796875, 4.37646484375, 4.574462890625, 4.7724609375, 4.970458984375, 5.16845703125, 5.366455078125, 5.564453125, 5.762451171875, 5.96044921875, 6.158447265625, 6.3564453125, 6.554443359375, 6.75244140625, 6.950439453125, 7.1484375]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 9.0, 13.0, 16.0, 23.0, 38.0, 37.0, 54.0, 93.0, 154.0, 239.0, 352.0, 498.0, 744.0, 1133.0, 1758.0, 2609.0, 3940.0, 5992.0, 8986.0, 13844.0, 21539.0, 33967.0, 54452.0, 87990.0, 142932.0, 204429.0, 172586.0, 108240.0, 65649.0, 41390.0, 26180.0, 17120.0, 10896.0, 6850.0, 4706.0, 3110.0, 1970.0, 1401.0, 865.0, 552.0, 398.0, 251.0, 214.0, 103.0, 67.0, 53.0, 34.0, 24.0, 30.0, 10.0, 4.0, 5.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.4384765625, -0.4246482849121094, -0.41082000732421875, -0.3969917297363281, -0.3831634521484375, -0.3693351745605469, -0.35550689697265625, -0.3416786193847656, -0.327850341796875, -0.3140220642089844, -0.30019378662109375, -0.2863655090332031, -0.2725372314453125, -0.2587089538574219, -0.24488067626953125, -0.23105239868164062, -0.21722412109375, -0.20339584350585938, -0.18956756591796875, -0.17573928833007812, -0.1619110107421875, -0.14808273315429688, -0.13425445556640625, -0.12042617797851562, -0.106597900390625, -0.09276962280273438, -0.07894134521484375, -0.06511306762695312, -0.0512847900390625, -0.037456512451171875, -0.02362823486328125, -0.009799957275390625, 0.0040283203125, 0.017856597900390625, 0.03168487548828125, 0.045513153076171875, 0.0593414306640625, 0.07316970825195312, 0.08699798583984375, 0.10082626342773438, 0.114654541015625, 0.12848281860351562, 0.14231109619140625, 0.15613937377929688, 0.1699676513671875, 0.18379592895507812, 0.19762420654296875, 0.21145248413085938, 0.22528076171875, 0.23910903930664062, 0.25293731689453125, 0.2667655944824219, 0.2805938720703125, 0.2944221496582031, 0.30825042724609375, 0.3220787048339844, 0.335906982421875, 0.3497352600097656, 0.36356353759765625, 0.3773918151855469, 0.3912200927734375, 0.4050483703613281, 0.41887664794921875, 0.4327049255371094, 0.446533203125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 7.0, 7.0, 5.0, 6.0, 9.0, 7.0, 18.0, 14.0, 13.0, 26.0, 21.0, 28.0, 25.0, 19.0, 13.0, 31.0, 26.0, 32.0, 27.0, 38.0, 36.0, 36.0, 1077.0, 34.0, 33.0, 54.0, 38.0, 37.0, 44.0, 32.0, 31.0, 23.0, 19.0, 20.0, 18.0, 16.0, 25.0, 7.0, 14.0, 12.0, 15.0, 9.0, 7.0, 1.0, 7.0, 3.0, 5.0, 3.0, 2.0, 0.0, 2.0, 3.0], "bins": [-3.6015625, -3.49749755859375, -3.3934326171875, -3.28936767578125, -3.185302734375, -3.08123779296875, -2.9771728515625, -2.87310791015625, -2.76904296875, -2.66497802734375, -2.5609130859375, -2.45684814453125, -2.352783203125, -2.24871826171875, -2.1446533203125, -2.04058837890625, -1.9365234375, -1.83245849609375, -1.7283935546875, -1.62432861328125, -1.520263671875, -1.41619873046875, -1.3121337890625, -1.20806884765625, -1.10400390625, -0.99993896484375, -0.8958740234375, -0.79180908203125, -0.687744140625, -0.58367919921875, -0.4796142578125, -0.37554931640625, -0.271484375, -0.16741943359375, -0.0633544921875, 0.04071044921875, 0.144775390625, 0.24884033203125, 0.3529052734375, 0.45697021484375, 0.56103515625, 0.66510009765625, 0.7691650390625, 0.87322998046875, 0.977294921875, 1.08135986328125, 1.1854248046875, 1.28948974609375, 1.3935546875, 1.49761962890625, 1.6016845703125, 1.70574951171875, 1.809814453125, 1.91387939453125, 2.0179443359375, 2.12200927734375, 2.22607421875, 2.33013916015625, 2.4342041015625, 2.53826904296875, 2.642333984375, 2.74639892578125, 2.8504638671875, 2.95452880859375, 3.05859375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 7.0, 13.0, 7.0, 25.0, 24.0, 40.0, 75.0, 83.0, 128.0, 184.0, 307.0, 514.0, 726.0, 1156.0, 1791.0, 2699.0, 4138.0, 6513.0, 10119.0, 15890.0, 24559.0, 37890.0, 58809.0, 90498.0, 137906.0, 1227445.0, 160922.0, 111102.0, 72130.0, 47028.0, 30124.0, 19176.0, 12516.0, 8033.0, 5080.0, 3383.0, 2133.0, 1396.0, 843.0, 609.0, 414.0, 249.0, 153.0, 103.0, 74.0, 38.0, 33.0, 21.0, 9.0, 8.0, 5.0, 8.0, 4.0, 2.0, 0.0, 1.0], "bins": [-0.38671875, -0.3752555847167969, -0.36379241943359375, -0.3523292541503906, -0.3408660888671875, -0.3294029235839844, -0.31793975830078125, -0.3064765930175781, -0.295013427734375, -0.2835502624511719, -0.27208709716796875, -0.2606239318847656, -0.2491607666015625, -0.23769760131835938, -0.22623443603515625, -0.21477127075195312, -0.20330810546875, -0.19184494018554688, -0.18038177490234375, -0.16891860961914062, -0.1574554443359375, -0.14599227905273438, -0.13452911376953125, -0.12306594848632812, -0.111602783203125, -0.10013961791992188, -0.08867645263671875, -0.07721328735351562, -0.0657501220703125, -0.054286956787109375, -0.04282379150390625, -0.031360626220703125, -0.0198974609375, -0.008434295654296875, 0.00302886962890625, 0.014492034912109375, 0.0259552001953125, 0.037418365478515625, 0.04888153076171875, 0.060344696044921875, 0.071807861328125, 0.08327102661132812, 0.09473419189453125, 0.10619735717773438, 0.1176605224609375, 0.12912368774414062, 0.14058685302734375, 0.15205001831054688, 0.16351318359375, 0.17497634887695312, 0.18643951416015625, 0.19790267944335938, 0.2093658447265625, 0.22082901000976562, 0.23229217529296875, 0.24375534057617188, 0.255218505859375, 0.2666816711425781, 0.27814483642578125, 0.2896080017089844, 0.3010711669921875, 0.3125343322753906, 0.32399749755859375, 0.3354606628417969, 0.346923828125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 1.0, 6.0, 6.0, 5.0, 4.0, 8.0, 8.0, 10.0, 24.0, 27.0, 25.0, 35.0, 32.0, 33.0, 45.0, 37.0, 50.0, 53.0, 76.0, 52.0, 55.0, 43.0, 52.0, 40.0, 44.0, 39.0, 32.0, 24.0, 27.0, 27.0, 26.0, 8.0, 12.0, 10.0, 9.0, 6.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00109100341796875, -0.0010564029216766357, -0.0010218024253845215, -0.0009872019290924072, -0.000952601432800293, -0.0009180009365081787, -0.0008834004402160645, -0.0008487999439239502, -0.0008141994476318359, -0.0007795989513397217, -0.0007449984550476074, -0.0007103979587554932, -0.0006757974624633789, -0.0006411969661712646, -0.0006065964698791504, -0.0005719959735870361, -0.0005373954772949219, -0.0005027949810028076, -0.00046819448471069336, -0.0004335939884185791, -0.00039899349212646484, -0.0003643929958343506, -0.00032979249954223633, -0.00029519200325012207, -0.0002605915069580078, -0.00022599101066589355, -0.0001913905143737793, -0.00015679001808166504, -0.00012218952178955078, -8.758902549743652e-05, -5.2988529205322266e-05, -1.8388032913208008e-05, 1.621246337890625e-05, 5.081295967102051e-05, 8.541345596313477e-05, 0.00012001395225524902, 0.00015461444854736328, 0.00018921494483947754, 0.0002238154411315918, 0.00025841593742370605, 0.0002930164337158203, 0.00032761693000793457, 0.00036221742630004883, 0.0003968179225921631, 0.00043141841888427734, 0.0004660189151763916, 0.0005006194114685059, 0.0005352199077606201, 0.0005698204040527344, 0.0006044209003448486, 0.0006390213966369629, 0.0006736218929290771, 0.0007082223892211914, 0.0007428228855133057, 0.0007774233818054199, 0.0008120238780975342, 0.0008466243743896484, 0.0008812248706817627, 0.000915825366973877, 0.0009504258632659912, 0.0009850263595581055, 0.0010196268558502197, 0.001054227352142334, 0.0010888278484344482, 0.0011234283447265625]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 10.0, 7.0, 10.0, 11.0, 12.0, 17.0, 17.0, 44.0, 32.0, 47.0, 70.0, 88.0, 114.0, 156.0, 250.0, 345.0, 588.0, 1007.0, 9320.0, 1020753.0, 12771.0, 1049.0, 595.0, 360.0, 244.0, 139.0, 104.0, 103.0, 83.0, 54.0, 25.0, 30.0, 18.0, 20.0, 17.0, 9.0, 10.0, 10.0, 6.0, 4.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0196990966796875, -0.019074201583862305, -0.01844930648803711, -0.017824411392211914, -0.01719951629638672, -0.016574621200561523, -0.015949726104736328, -0.015324831008911133, -0.014699935913085938, -0.014075040817260742, -0.013450145721435547, -0.012825250625610352, -0.012200355529785156, -0.011575460433959961, -0.010950565338134766, -0.01032567024230957, -0.009700775146484375, -0.00907588005065918, -0.008450984954833984, -0.007826089859008789, -0.007201194763183594, -0.0065762996673583984, -0.005951404571533203, -0.005326509475708008, -0.0047016143798828125, -0.004076719284057617, -0.003451824188232422, -0.0028269290924072266, -0.0022020339965820312, -0.001577138900756836, -0.0009522438049316406, -0.0003273487091064453, 0.00029754638671875, 0.0009224414825439453, 0.0015473365783691406, 0.002172231674194336, 0.0027971267700195312, 0.0034220218658447266, 0.004046916961669922, 0.004671812057495117, 0.0052967071533203125, 0.005921602249145508, 0.006546497344970703, 0.0071713924407958984, 0.007796287536621094, 0.008421182632446289, 0.009046077728271484, 0.00967097282409668, 0.010295867919921875, 0.01092076301574707, 0.011545658111572266, 0.012170553207397461, 0.012795448303222656, 0.013420343399047852, 0.014045238494873047, 0.014670133590698242, 0.015295028686523438, 0.015919923782348633, 0.016544818878173828, 0.017169713973999023, 0.01779460906982422, 0.018419504165649414, 0.01904439926147461, 0.019669294357299805, 0.020294189453125]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 14.0, 35.0, 95.0, 179.0, 251.0, 219.0, 143.0, 57.0, 13.0, 6.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00028665721765719354, -0.00024328375002369285, -0.00019991029694210738, -0.0001565368438605219, -0.00011316337622702122, -6.978990859352052e-05, -2.6416470063850284e-05, 1.695699756965041e-05, 6.033046520315111e-05, 0.00010370392556069419, 0.00014707738591823727, 0.00019045083899982274, 0.00023382430663332343, 0.0002771977742668241, 0.00032057121279649436, 0.00036394468042999506, 0.00040731814806349576, 0.00045069161569699645, 0.0004940650542266667, 0.0005374385509639978, 0.0005808119894936681, 0.0006241854280233383, 0.0006675588665530086, 0.0007109323632903397, 0.0007543058600276709, 0.0007976792985573411, 0.0008410527952946723, 0.0008844262338243425, 0.0009277997305616736, 0.0009711731690913439, 0.0010145466076210141, 0.0010579200461506844, 0.0011012936010956764, 0.0011446670396253467, 0.001188040478155017, 0.001231414033100009, 0.0012747874716296792, 0.0013181609101593494, 0.0013615343486890197, 0.00140490778721869, 0.001448281342163682, 0.0014916547806933522, 0.0015350282192230225, 0.0015784017741680145, 0.0016217752126976848, 0.001665148651227355, 0.0017085220897570252, 0.0017518955282866955, 0.0017952689668163657, 0.001838642405346036, 0.0018820158438757062, 0.0019253892824053764, 0.0019687628373503685, 0.0020121363922953606, 0.002055509714409709, 0.002098883269354701, 0.0021422565914690495, 0.0021856301464140415, 0.00222900346852839, 0.002272377023473382, 0.0023157503455877304, 0.0023591239005327225, 0.0024024974554777145, 0.002445870777592063, 0.002489244332537055]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 8.0, 3.0, 8.0, 9.0, 5.0, 6.0, 7.0, 12.0, 16.0, 15.0, 24.0, 14.0, 31.0, 18.0, 24.0, 32.0, 27.0, 38.0, 36.0, 30.0, 49.0, 44.0, 48.0, 43.0, 35.0, 36.0, 42.0, 31.0, 25.0, 25.0, 32.0, 30.0, 26.0, 22.0, 21.0, 15.0, 15.0, 20.0, 12.0, 9.0, 8.0, 13.0, 12.0, 6.0, 7.0, 7.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.0005502700805664062, -0.0005331402644515038, -0.0005160104483366013, -0.0004988806322216988, -0.00048175081610679626, -0.00046462099999189377, -0.00044749118387699127, -0.0004303613677620888, -0.0004132315516471863, -0.0003961017355322838, -0.0003789719194173813, -0.0003618421033024788, -0.0003447122871875763, -0.0003275824710726738, -0.0003104526549577713, -0.0002933228388428688, -0.0002761930227279663, -0.0002590632066130638, -0.00024193339049816132, -0.00022480357438325882, -0.00020767375826835632, -0.00019054394215345383, -0.00017341412603855133, -0.00015628430992364883, -0.00013915449380874634, -0.00012202467769384384, -0.00010489486157894135, -8.776504546403885e-05, -7.063522934913635e-05, -5.3505413234233856e-05, -3.637559711933136e-05, -1.9245781004428864e-05, -2.115964889526367e-06, 1.5013851225376129e-05, 3.2143667340278625e-05, 4.927348345518112e-05, 6.640329957008362e-05, 8.353311568498611e-05, 0.00010066293179988861, 0.00011779274791479111, 0.0001349225640296936, 0.0001520523801445961, 0.0001691821962594986, 0.0001863120123744011, 0.0002034418284893036, 0.00022057164460420609, 0.00023770146071910858, 0.0002548312768340111, 0.0002719610929489136, 0.00028909090906381607, 0.00030622072517871857, 0.00032335054129362106, 0.00034048035740852356, 0.00035761017352342606, 0.00037473998963832855, 0.00039186980575323105, 0.00040899962186813354, 0.00042612943798303604, 0.00044325925409793854, 0.00046038907021284103, 0.00047751888632774353, 0.000494648702442646, 0.0005117785185575485, 0.000528908334672451, 0.0005460381507873535]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 1.0, 2.0, 0.0, 7.0, 2.0, 2.0, 7.0, 5.0, 2.0, 10.0, 11.0, 16.0, 11.0, 21.0, 20.0, 33.0, 25.0, 31.0, 31.0, 29.0, 49.0, 54.0, 50.0, 43.0, 56.0, 44.0, 39.0, 47.0, 51.0, 48.0, 33.0, 37.0, 30.0, 25.0, 27.0, 14.0, 16.0, 21.0, 6.0, 15.0, 10.0, 10.0, 6.0, 3.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5234375, -5.325439453125, -5.12744140625, -4.929443359375, -4.7314453125, -4.533447265625, -4.33544921875, -4.137451171875, -3.939453125, -3.741455078125, -3.54345703125, -3.345458984375, -3.1474609375, -2.949462890625, -2.75146484375, -2.553466796875, -2.35546875, -2.157470703125, -1.95947265625, -1.761474609375, -1.5634765625, -1.365478515625, -1.16748046875, -0.969482421875, -0.771484375, -0.573486328125, -0.37548828125, -0.177490234375, 0.0205078125, 0.218505859375, 0.41650390625, 0.614501953125, 0.8125, 1.010498046875, 1.20849609375, 1.406494140625, 1.6044921875, 1.802490234375, 2.00048828125, 2.198486328125, 2.396484375, 2.594482421875, 2.79248046875, 2.990478515625, 3.1884765625, 3.386474609375, 3.58447265625, 3.782470703125, 3.98046875, 4.178466796875, 4.37646484375, 4.574462890625, 4.7724609375, 4.970458984375, 5.16845703125, 5.366455078125, 5.564453125, 5.762451171875, 5.96044921875, 6.158447265625, 6.3564453125, 6.554443359375, 6.75244140625, 6.950439453125, 7.1484375]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 6.0, 5.0, 5.0, 7.0, 9.0, 13.0, 2.0, 18.0, 24.0, 29.0, 42.0, 57.0, 99.0, 112.0, 178.0, 231.0, 339.0, 509.0, 764.0, 1354.0, 2557.0, 6795.0, 27940.0, 245810.0, 657200.0, 81862.0, 13352.0, 4299.0, 1897.0, 1027.0, 602.0, 450.0, 263.0, 166.0, 155.0, 83.0, 89.0, 53.0, 53.0, 28.0, 26.0, 14.0, 17.0, 5.0, 6.0, 4.0, 2.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-12.3046875, -11.898193359375, -11.49169921875, -11.085205078125, -10.6787109375, -10.272216796875, -9.86572265625, -9.459228515625, -9.052734375, -8.646240234375, -8.23974609375, -7.833251953125, -7.4267578125, -7.020263671875, -6.61376953125, -6.207275390625, -5.80078125, -5.394287109375, -4.98779296875, -4.581298828125, -4.1748046875, -3.768310546875, -3.36181640625, -2.955322265625, -2.548828125, -2.142333984375, -1.73583984375, -1.329345703125, -0.9228515625, -0.516357421875, -0.10986328125, 0.296630859375, 0.703125, 1.109619140625, 1.51611328125, 1.922607421875, 2.3291015625, 2.735595703125, 3.14208984375, 3.548583984375, 3.955078125, 4.361572265625, 4.76806640625, 5.174560546875, 5.5810546875, 5.987548828125, 6.39404296875, 6.800537109375, 7.20703125, 7.613525390625, 8.02001953125, 8.426513671875, 8.8330078125, 9.239501953125, 9.64599609375, 10.052490234375, 10.458984375, 10.865478515625, 11.27197265625, 11.678466796875, 12.0849609375, 12.491455078125, 12.89794921875, 13.304443359375, 13.7109375]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 7.0, 3.0, 8.0, 17.0, 15.0, 18.0, 26.0, 38.0, 51.0, 60.0, 105.0, 144.0, 427.0, 1603.0, 179.0, 90.0, 70.0, 52.0, 31.0, 26.0, 20.0, 10.0, 15.0, 9.0, 5.0, 12.0, 9.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.6875, -26.703125, -25.71875, -24.734375, -23.75, -22.765625, -21.78125, -20.796875, -19.8125, -18.828125, -17.84375, -16.859375, -15.875, -14.890625, -13.90625, -12.921875, -11.9375, -10.953125, -9.96875, -8.984375, -8.0, -7.015625, -6.03125, -5.046875, -4.0625, -3.078125, -2.09375, -1.109375, -0.125, 0.859375, 1.84375, 2.828125, 3.8125, 4.796875, 5.78125, 6.765625, 7.75, 8.734375, 9.71875, 10.703125, 11.6875, 12.671875, 13.65625, 14.640625, 15.625, 16.609375, 17.59375, 18.578125, 19.5625, 20.546875, 21.53125, 22.515625, 23.5, 24.484375, 25.46875, 26.453125, 27.4375, 28.421875, 29.40625, 30.390625, 31.375, 32.359375, 33.34375, 34.328125, 35.3125]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 1.0, 4.0, 7.0, 8.0, 11.0, 21.0, 28.0, 40.0, 72.0, 116.0, 168.0, 266.0, 738.0, 5074.0, 3126082.0, 11230.0, 1005.0, 355.0, 190.0, 101.0, 62.0, 45.0, 27.0, 25.0, 9.0, 11.0, 1.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.4375, -73.318359375, -71.19921875, -69.080078125, -66.9609375, -64.841796875, -62.72265625, -60.603515625, -58.484375, -56.365234375, -54.24609375, -52.126953125, -50.0078125, -47.888671875, -45.76953125, -43.650390625, -41.53125, -39.412109375, -37.29296875, -35.173828125, -33.0546875, -30.935546875, -28.81640625, -26.697265625, -24.578125, -22.458984375, -20.33984375, -18.220703125, -16.1015625, -13.982421875, -11.86328125, -9.744140625, -7.625, -5.505859375, -3.38671875, -1.267578125, 0.8515625, 2.970703125, 5.08984375, 7.208984375, 9.328125, 11.447265625, 13.56640625, 15.685546875, 17.8046875, 19.923828125, 22.04296875, 24.162109375, 26.28125, 28.400390625, 30.51953125, 32.638671875, 34.7578125, 36.876953125, 38.99609375, 41.115234375, 43.234375, 45.353515625, 47.47265625, 49.591796875, 51.7109375, 53.830078125, 55.94921875, 58.068359375, 60.1875]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 78.0, 825.0, 103.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.83654022216797, -55.443214416503906, -48.049888610839844, -40.65656280517578, -33.26323699951172, -25.869911193847656, -18.476585388183594, -11.083259582519531, -3.6899337768554688, 3.7033920288085938, 11.096717834472656, 18.49004364013672, 25.88336944580078, 33.276695251464844, 40.670021057128906, 48.06334686279297, 55.45667266845703, 62.849998474121094, 70.24332427978516, 77.63665008544922, 85.02997589111328, 92.42330169677734, 99.8166275024414, 107.20995330810547, 114.60327911376953, 121.9966049194336, 129.38992309570312, 136.78326416015625, 144.17657470703125, 151.56991577148438, 158.96322631835938, 166.3565673828125, 173.7498779296875, 181.14320373535156, 188.53652954101562, 195.9298553466797, 203.32318115234375, 210.7165069580078, 218.10983276367188, 225.50315856933594, 232.896484375, 240.28981018066406, 247.68313598632812, 255.0764617919922, 262.46978759765625, 269.86309814453125, 277.2564392089844, 284.6497802734375, 292.0430908203125, 299.4364013671875, 306.8297424316406, 314.22308349609375, 321.61639404296875, 329.00970458984375, 336.4030456542969, 343.79638671875, 351.189697265625, 358.5830078125, 365.9763488769531, 373.36968994140625, 380.76300048828125, 388.15631103515625, 395.5496520996094, 402.9429931640625, 410.3363037109375]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 6.0, 5.0, 7.0, 9.0, 7.0, 8.0, 8.0, 10.0, 15.0, 19.0, 25.0, 23.0, 21.0, 30.0, 36.0, 23.0, 30.0, 36.0, 29.0, 47.0, 43.0, 34.0, 42.0, 42.0, 35.0, 36.0, 43.0, 36.0, 38.0, 30.0, 25.0, 28.0, 30.0, 21.0, 16.0, 14.0, 16.0, 15.0, 8.0, 10.0, 10.0, 9.0, 8.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 0.0, 2.0], "bins": [-49.26926803588867, -47.708824157714844, -46.148380279541016, -44.58793640136719, -43.027496337890625, -41.4670524597168, -39.90660858154297, -38.34616470336914, -36.78572082519531, -35.225276947021484, -33.664833068847656, -32.10438919067383, -30.543947219848633, -28.983503341674805, -27.42306137084961, -25.86261749267578, -24.302173614501953, -22.741729736328125, -21.181285858154297, -19.6208438873291, -18.060400009155273, -16.499956130981445, -14.939513206481934, -13.379070281982422, -11.818626403808594, -10.258182525634766, -8.697739601135254, -7.137296199798584, -5.576852798461914, -4.016409397125244, -2.455965995788574, -0.8955230712890625, 0.6649246215820312, 2.225368022918701, 3.785811424255371, 5.346254825592041, 6.906698226928711, 8.467142105102539, 10.02758502960205, 11.588027954101562, 13.14847183227539, 14.708915710449219, 16.269359588623047, 17.829801559448242, 19.39024543762207, 20.9506893157959, 22.511131286621094, 24.071575164794922, 25.63201904296875, 27.192462921142578, 28.752906799316406, 30.3133487701416, 31.87379264831543, 33.434234619140625, 34.99467849731445, 36.55512237548828, 38.11556625366211, 39.67601013183594, 41.236454010009766, 42.796897888183594, 44.357337951660156, 45.917781829833984, 47.47822570800781, 49.03866958618164, 50.59911346435547]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 8.0, 14.0, 11.0, 13.0, 18.0, 10.0, 17.0, 18.0, 31.0, 34.0, 32.0, 41.0, 38.0, 47.0, 35.0, 62.0, 37.0, 57.0, 39.0, 49.0, 41.0, 43.0, 37.0, 27.0, 39.0, 32.0, 25.0, 24.0, 22.0, 19.0, 19.0, 8.0, 8.0, 10.0, 10.0, 4.0, 4.0, 7.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.671875, -8.4202880859375, -8.168701171875, -7.9171142578125, -7.66552734375, -7.4139404296875, -7.162353515625, -6.9107666015625, -6.6591796875, -6.4075927734375, -6.156005859375, -5.9044189453125, -5.65283203125, -5.4012451171875, -5.149658203125, -4.8980712890625, -4.646484375, -4.3948974609375, -4.143310546875, -3.8917236328125, -3.64013671875, -3.3885498046875, -3.136962890625, -2.8853759765625, -2.6337890625, -2.3822021484375, -2.130615234375, -1.8790283203125, -1.62744140625, -1.3758544921875, -1.124267578125, -0.8726806640625, -0.62109375, -0.3695068359375, -0.117919921875, 0.1336669921875, 0.38525390625, 0.6368408203125, 0.888427734375, 1.1400146484375, 1.3916015625, 1.6431884765625, 1.894775390625, 2.1463623046875, 2.39794921875, 2.6495361328125, 2.901123046875, 3.1527099609375, 3.404296875, 3.6558837890625, 3.907470703125, 4.1590576171875, 4.41064453125, 4.6622314453125, 4.913818359375, 5.1654052734375, 5.4169921875, 5.6685791015625, 5.920166015625, 6.1717529296875, 6.42333984375, 6.6749267578125, 6.926513671875, 7.1781005859375, 7.4296875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 6.0, 2.0, 2.0, 5.0, 7.0, 10.0, 17.0, 21.0, 26.0, 27.0, 38.0, 52.0, 88.0, 128.0, 159.0, 189.0, 275.0, 442.0, 640.0, 1103.0, 2953.0, 34420.0, 3327362.0, 812960.0, 8800.0, 1860.0, 833.0, 560.0, 371.0, 246.0, 186.0, 124.0, 88.0, 78.0, 55.0, 41.0, 27.0, 21.0, 14.0, 9.0, 12.0, 6.0, 8.0, 6.0, 2.0, 4.0, 3.0, 2.0, 3.0, 3.0], "bins": [-47.96875, -46.6650390625, -45.361328125, -44.0576171875, -42.75390625, -41.4501953125, -40.146484375, -38.8427734375, -37.5390625, -36.2353515625, -34.931640625, -33.6279296875, -32.32421875, -31.0205078125, -29.716796875, -28.4130859375, -27.109375, -25.8056640625, -24.501953125, -23.1982421875, -21.89453125, -20.5908203125, -19.287109375, -17.9833984375, -16.6796875, -15.3759765625, -14.072265625, -12.7685546875, -11.46484375, -10.1611328125, -8.857421875, -7.5537109375, -6.25, -4.9462890625, -3.642578125, -2.3388671875, -1.03515625, 0.2685546875, 1.572265625, 2.8759765625, 4.1796875, 5.4833984375, 6.787109375, 8.0908203125, 9.39453125, 10.6982421875, 12.001953125, 13.3056640625, 14.609375, 15.9130859375, 17.216796875, 18.5205078125, 19.82421875, 21.1279296875, 22.431640625, 23.7353515625, 25.0390625, 26.3427734375, 27.646484375, 28.9501953125, 30.25390625, 31.5576171875, 32.861328125, 34.1650390625, 35.46875]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 8.0, 6.0, 22.0, 88.0, 368.0, 1253.0, 1512.0, 594.0, 153.0, 44.0, 17.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.36279296875, -39.0693359375, -37.77587890625, -36.482421875, -35.18896484375, -33.8955078125, -32.60205078125, -31.30859375, -30.01513671875, -28.7216796875, -27.42822265625, -26.134765625, -24.84130859375, -23.5478515625, -22.25439453125, -20.9609375, -19.66748046875, -18.3740234375, -17.08056640625, -15.787109375, -14.49365234375, -13.2001953125, -11.90673828125, -10.61328125, -9.31982421875, -8.0263671875, -6.73291015625, -5.439453125, -4.14599609375, -2.8525390625, -1.55908203125, -0.265625, 1.02783203125, 2.3212890625, 3.61474609375, 4.908203125, 6.20166015625, 7.4951171875, 8.78857421875, 10.08203125, 11.37548828125, 12.6689453125, 13.96240234375, 15.255859375, 16.54931640625, 17.8427734375, 19.13623046875, 20.4296875, 21.72314453125, 23.0166015625, 24.31005859375, 25.603515625, 26.89697265625, 28.1904296875, 29.48388671875, 30.77734375, 32.07080078125, 33.3642578125, 34.65771484375, 35.951171875, 37.24462890625, 38.5380859375, 39.83154296875, 41.125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 7.0, 2.0, 3.0, 3.0, 15.0, 21.0, 38.0, 91.0, 217.0, 442.0, 1231.0, 6943.0, 1344939.0, 2828211.0, 9755.0, 1459.0, 496.0, 199.0, 104.0, 39.0, 21.0, 10.0, 11.0, 7.0, 3.0, 1.0, 4.0, 2.0, 3.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.78125, -39.4248046875, -38.068359375, -36.7119140625, -35.35546875, -33.9990234375, -32.642578125, -31.2861328125, -29.9296875, -28.5732421875, -27.216796875, -25.8603515625, -24.50390625, -23.1474609375, -21.791015625, -20.4345703125, -19.078125, -17.7216796875, -16.365234375, -15.0087890625, -13.65234375, -12.2958984375, -10.939453125, -9.5830078125, -8.2265625, -6.8701171875, -5.513671875, -4.1572265625, -2.80078125, -1.4443359375, -0.087890625, 1.2685546875, 2.625, 3.9814453125, 5.337890625, 6.6943359375, 8.05078125, 9.4072265625, 10.763671875, 12.1201171875, 13.4765625, 14.8330078125, 16.189453125, 17.5458984375, 18.90234375, 20.2587890625, 21.615234375, 22.9716796875, 24.328125, 25.6845703125, 27.041015625, 28.3974609375, 29.75390625, 31.1103515625, 32.466796875, 33.8232421875, 35.1796875, 36.5361328125, 37.892578125, 39.2490234375, 40.60546875, 41.9619140625, 43.318359375, 44.6748046875, 46.03125]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 8.0, 9.0, 11.0, 23.0, 27.0, 51.0, 57.0, 75.0, 83.0, 102.0, 115.0, 110.0, 90.0, 76.0, 51.0, 41.0, 29.0, 25.0, 10.0, 6.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-100.0187759399414, -97.2203140258789, -94.42184448242188, -91.62338256835938, -88.82491302490234, -86.02645111083984, -83.22798156738281, -80.42951965332031, -77.63105773925781, -74.83259582519531, -72.03412628173828, -69.23566436767578, -66.43719482421875, -63.63873291015625, -60.840267181396484, -58.04180145263672, -55.24333572387695, -52.44486999511719, -49.64640426635742, -46.847938537597656, -44.049476623535156, -41.25101089477539, -38.452545166015625, -35.654083251953125, -32.855613708496094, -30.057147979736328, -27.258684158325195, -24.46021842956543, -21.661754608154297, -18.86328887939453, -16.064823150634766, -13.266359329223633, -10.4678955078125, -7.669430732727051, -4.870965480804443, -2.072500228881836, 0.7259645462036133, 3.5244293212890625, 6.322895050048828, 9.121358871459961, 11.919824600219727, 14.718289375305176, 17.516754150390625, 20.31521987915039, 23.113685607910156, 25.91214942932129, 28.710615158081055, 31.509078979492188, 34.30754470825195, 37.10601043701172, 39.904476165771484, 42.70294189453125, 45.50140380859375, 48.299869537353516, 51.09833526611328, 53.89679718017578, 56.69526672363281, 59.49373245239258, 62.292198181152344, 65.09066009521484, 67.88912963867188, 70.68759155273438, 73.48605346679688, 76.2845230102539, 79.0829849243164]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 11.0, 6.0, 6.0, 7.0, 8.0, 9.0, 17.0, 12.0, 22.0, 22.0, 35.0, 28.0, 33.0, 33.0, 38.0, 31.0, 34.0, 43.0, 43.0, 43.0, 44.0, 41.0, 49.0, 42.0, 45.0, 22.0, 40.0, 23.0, 33.0, 30.0, 20.0, 23.0, 19.0, 14.0, 11.0, 10.0, 11.0, 5.0, 11.0, 5.0, 9.0, 4.0, 8.0, 4.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.16655349731445, -37.79261016845703, -36.418663024902344, -35.04471969604492, -33.670772552490234, -32.29682922363281, -30.922883987426758, -29.548938751220703, -28.17499351501465, -26.801048278808594, -25.42710304260254, -24.053157806396484, -22.679214477539062, -21.305267333984375, -19.931324005126953, -18.5573787689209, -17.183433532714844, -15.809488296508789, -14.435543060302734, -13.061598777770996, -11.687653541564941, -10.313708305358887, -8.939764022827148, -7.565818786621094, -6.191873550415039, -4.817928314208984, -3.443983554840088, -2.0700387954711914, -0.6960935592651367, 0.677851676940918, 2.0517959594726562, 3.425741195678711, 4.799690246582031, 6.173635482788086, 7.547580242156982, 8.921525001525879, 10.295470237731934, 11.669415473937988, 13.043359756469727, 14.417304992675781, 15.791250228881836, 17.16519546508789, 18.539140701293945, 19.9130859375, 21.287029266357422, 22.66097640991211, 24.03491973876953, 25.408864974975586, 26.78281021118164, 28.156755447387695, 29.53070068359375, 30.904644012451172, 32.27859115600586, 33.65253448486328, 35.02648162841797, 36.40042495727539, 37.77436828613281, 39.148311614990234, 40.52225875854492, 41.896202087402344, 43.27014923095703, 44.64409255981445, 46.018035888671875, 47.39198303222656, 48.76593017578125]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 5.0, 7.0, 5.0, 7.0, 10.0, 11.0, 16.0, 16.0, 24.0, 15.0, 29.0, 30.0, 44.0, 31.0, 30.0, 38.0, 43.0, 41.0, 40.0, 46.0, 43.0, 49.0, 47.0, 42.0, 51.0, 24.0, 34.0, 25.0, 31.0, 29.0, 20.0, 14.0, 16.0, 17.0, 13.0, 11.0, 10.0, 6.0, 9.0, 9.0, 8.0, 4.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.6875, -76.0126953125, -73.337890625, -70.6630859375, -67.98828125, -65.3134765625, -62.638671875, -59.9638671875, -57.2890625, -54.6142578125, -51.939453125, -49.2646484375, -46.58984375, -43.9150390625, -41.240234375, -38.5654296875, -35.890625, -33.2158203125, -30.541015625, -27.8662109375, -25.19140625, -22.5166015625, -19.841796875, -17.1669921875, -14.4921875, -11.8173828125, -9.142578125, -6.4677734375, -3.79296875, -1.1181640625, 1.556640625, 4.2314453125, 6.90625, 9.5810546875, 12.255859375, 14.9306640625, 17.60546875, 20.2802734375, 22.955078125, 25.6298828125, 28.3046875, 30.9794921875, 33.654296875, 36.3291015625, 39.00390625, 41.6787109375, 44.353515625, 47.0283203125, 49.703125, 52.3779296875, 55.052734375, 57.7275390625, 60.40234375, 63.0771484375, 65.751953125, 68.4267578125, 71.1015625, 73.7763671875, 76.451171875, 79.1259765625, 81.80078125, 84.4755859375, 87.150390625, 89.8251953125, 92.5]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 5.0, 7.0, 13.0, 15.0, 36.0, 36.0, 67.0, 108.0, 176.0, 251.0, 445.0, 620.0, 955.0, 1421.0, 2157.0, 3130.0, 4821.0, 7104.0, 10613.0, 15360.0, 23306.0, 34972.0, 52678.0, 80170.0, 122507.0, 174126.0, 169161.0, 116686.0, 76830.0, 50292.0, 33468.0, 22085.0, 14756.0, 9894.0, 6636.0, 4510.0, 3163.0, 2005.0, 1313.0, 911.0, 593.0, 429.0, 271.0, 157.0, 114.0, 82.0, 43.0, 22.0, 16.0, 11.0, 4.0, 4.0, 4.0, 1.0, 2.0, 3.0], "bins": [-5.83984375, -5.66778564453125, -5.4957275390625, -5.32366943359375, -5.151611328125, -4.97955322265625, -4.8074951171875, -4.63543701171875, -4.46337890625, -4.29132080078125, -4.1192626953125, -3.94720458984375, -3.775146484375, -3.60308837890625, -3.4310302734375, -3.25897216796875, -3.0869140625, -2.91485595703125, -2.7427978515625, -2.57073974609375, -2.398681640625, -2.22662353515625, -2.0545654296875, -1.88250732421875, -1.71044921875, -1.53839111328125, -1.3663330078125, -1.19427490234375, -1.022216796875, -0.85015869140625, -0.6781005859375, -0.50604248046875, -0.333984375, -0.16192626953125, 0.0101318359375, 0.18218994140625, 0.354248046875, 0.52630615234375, 0.6983642578125, 0.87042236328125, 1.04248046875, 1.21453857421875, 1.3865966796875, 1.55865478515625, 1.730712890625, 1.90277099609375, 2.0748291015625, 2.24688720703125, 2.4189453125, 2.59100341796875, 2.7630615234375, 2.93511962890625, 3.107177734375, 3.27923583984375, 3.4512939453125, 3.62335205078125, 3.79541015625, 3.96746826171875, 4.1395263671875, 4.31158447265625, 4.483642578125, 4.65570068359375, 4.8277587890625, 4.99981689453125, 5.171875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 0.0, 1.0, 3.0, 10.0, 13.0, 13.0, 15.0, 13.0, 21.0, 19.0, 27.0, 21.0, 36.0, 45.0, 39.0, 38.0, 39.0, 50.0, 49.0, 52.0, 1075.0, 48.0, 52.0, 48.0, 42.0, 40.0, 30.0, 38.0, 27.0, 26.0, 19.0, 11.0, 13.0, 17.0, 10.0, 9.0, 6.0, 5.0, 3.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.15625, -52.23291015625, -50.3095703125, -48.38623046875, -46.462890625, -44.53955078125, -42.6162109375, -40.69287109375, -38.76953125, -36.84619140625, -34.9228515625, -32.99951171875, -31.076171875, -29.15283203125, -27.2294921875, -25.30615234375, -23.3828125, -21.45947265625, -19.5361328125, -17.61279296875, -15.689453125, -13.76611328125, -11.8427734375, -9.91943359375, -7.99609375, -6.07275390625, -4.1494140625, -2.22607421875, -0.302734375, 1.62060546875, 3.5439453125, 5.46728515625, 7.390625, 9.31396484375, 11.2373046875, 13.16064453125, 15.083984375, 17.00732421875, 18.9306640625, 20.85400390625, 22.77734375, 24.70068359375, 26.6240234375, 28.54736328125, 30.470703125, 32.39404296875, 34.3173828125, 36.24072265625, 38.1640625, 40.08740234375, 42.0107421875, 43.93408203125, 45.857421875, 47.78076171875, 49.7041015625, 51.62744140625, 53.55078125, 55.47412109375, 57.3974609375, 59.32080078125, 61.244140625, 63.16748046875, 65.0908203125, 67.01416015625, 68.9375]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 5.0, 8.0, 18.0, 14.0, 36.0, 67.0, 76.0, 100.0, 173.0, 250.0, 396.0, 606.0, 951.0, 1519.0, 2356.0, 3729.0, 5719.0, 9149.0, 14579.0, 22695.0, 36381.0, 57790.0, 92529.0, 146000.0, 1241888.0, 165937.0, 110278.0, 68265.0, 42413.0, 26499.0, 16829.0, 10809.0, 6843.0, 4324.0, 2832.0, 1712.0, 1162.0, 788.0, 487.0, 307.0, 217.0, 132.0, 102.0, 46.0, 45.0, 31.0, 18.0, 15.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.42578125, -5.2489013671875, -5.072021484375, -4.8951416015625, -4.71826171875, -4.5413818359375, -4.364501953125, -4.1876220703125, -4.0107421875, -3.8338623046875, -3.656982421875, -3.4801025390625, -3.30322265625, -3.1263427734375, -2.949462890625, -2.7725830078125, -2.595703125, -2.4188232421875, -2.241943359375, -2.0650634765625, -1.88818359375, -1.7113037109375, -1.534423828125, -1.3575439453125, -1.1806640625, -1.0037841796875, -0.826904296875, -0.6500244140625, -0.47314453125, -0.2962646484375, -0.119384765625, 0.0574951171875, 0.234375, 0.4112548828125, 0.588134765625, 0.7650146484375, 0.94189453125, 1.1187744140625, 1.295654296875, 1.4725341796875, 1.6494140625, 1.8262939453125, 2.003173828125, 2.1800537109375, 2.35693359375, 2.5338134765625, 2.710693359375, 2.8875732421875, 3.064453125, 3.2413330078125, 3.418212890625, 3.5950927734375, 3.77197265625, 3.9488525390625, 4.125732421875, 4.3026123046875, 4.4794921875, 4.6563720703125, 4.833251953125, 5.0101318359375, 5.18701171875, 5.3638916015625, 5.540771484375, 5.7176513671875, 5.89453125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 6.0, 4.0, 5.0, 8.0, 10.0, 14.0, 9.0, 19.0, 23.0, 25.0, 24.0, 38.0, 43.0, 52.0, 66.0, 56.0, 66.0, 75.0, 82.0, 58.0, 55.0, 45.0, 42.0, 30.0, 37.0, 19.0, 25.0, 17.0, 8.0, 10.0, 9.0, 7.0, 4.0, 2.0, 1.0, 2.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027374267578125, -0.02653050422668457, -0.02568674087524414, -0.02484297752380371, -0.02399921417236328, -0.02315545082092285, -0.022311687469482422, -0.021467924118041992, -0.020624160766601562, -0.019780397415161133, -0.018936634063720703, -0.018092870712280273, -0.017249107360839844, -0.016405344009399414, -0.015561580657958984, -0.014717817306518555, -0.013874053955078125, -0.013030290603637695, -0.012186527252197266, -0.011342763900756836, -0.010499000549316406, -0.009655237197875977, -0.008811473846435547, -0.007967710494995117, -0.0071239471435546875, -0.006280183792114258, -0.005436420440673828, -0.0045926570892333984, -0.0037488937377929688, -0.002905130386352539, -0.0020613670349121094, -0.0012176036834716797, -0.00037384033203125, 0.0004699230194091797, 0.0013136863708496094, 0.002157449722290039, 0.0030012130737304688, 0.0038449764251708984, 0.004688739776611328, 0.005532503128051758, 0.0063762664794921875, 0.007220029830932617, 0.008063793182373047, 0.008907556533813477, 0.009751319885253906, 0.010595083236694336, 0.011438846588134766, 0.012282609939575195, 0.013126373291015625, 0.013970136642456055, 0.014813899993896484, 0.015657663345336914, 0.016501426696777344, 0.017345190048217773, 0.018188953399658203, 0.019032716751098633, 0.019876480102539062, 0.020720243453979492, 0.021564006805419922, 0.02240777015686035, 0.02325153350830078, 0.02409529685974121, 0.02493906021118164, 0.02578282356262207, 0.0266265869140625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 5.0, 3.0, 4.0, 7.0, 13.0, 8.0, 22.0, 16.0, 16.0, 38.0, 41.0, 63.0, 90.0, 143.0, 245.0, 367.0, 716.0, 1430.0, 3374.0, 9344.0, 33075.0, 151756.0, 510965.0, 260345.0, 53758.0, 14073.0, 4667.0, 1855.0, 805.0, 455.0, 277.0, 190.0, 96.0, 89.0, 40.0, 44.0, 32.0, 32.0, 15.0, 11.0, 11.0, 4.0, 4.0, 6.0, 1.0, 3.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1533203125, -0.14832115173339844, -0.14332199096679688, -0.1383228302001953, -0.13332366943359375, -0.1283245086669922, -0.12332534790039062, -0.11832618713378906, -0.1133270263671875, -0.10832786560058594, -0.10332870483398438, -0.09832954406738281, -0.09333038330078125, -0.08833122253417969, -0.08333206176757812, -0.07833290100097656, -0.073333740234375, -0.06833457946777344, -0.06333541870117188, -0.05833625793457031, -0.05333709716796875, -0.04833793640136719, -0.043338775634765625, -0.03833961486816406, -0.0333404541015625, -0.028341293334960938, -0.023342132568359375, -0.018342971801757812, -0.01334381103515625, -0.008344650268554688, -0.003345489501953125, 0.0016536712646484375, 0.00665283203125, 0.011651992797851562, 0.016651153564453125, 0.021650314331054688, 0.02664947509765625, 0.03164863586425781, 0.036647796630859375, 0.04164695739746094, 0.0466461181640625, 0.05164527893066406, 0.056644439697265625, 0.06164360046386719, 0.06664276123046875, 0.07164192199707031, 0.07664108276367188, 0.08164024353027344, 0.086639404296875, 0.09163856506347656, 0.09663772583007812, 0.10163688659667969, 0.10663604736328125, 0.11163520812988281, 0.11663436889648438, 0.12163352966308594, 0.1266326904296875, 0.13163185119628906, 0.13663101196289062, 0.1416301727294922, 0.14662933349609375, 0.1516284942626953, 0.15662765502929688, 0.16162681579589844, 0.1666259765625]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 2.0, 6.0, 5.0, 13.0, 10.0, 18.0, 38.0, 40.0, 53.0, 89.0, 95.0, 120.0, 98.0, 79.0, 93.0, 66.0, 56.0, 39.0, 25.0, 17.0, 15.0, 9.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.030228953808546066, -0.029400598257780075, -0.028572242707014084, -0.027743887156248093, -0.0269155316054821, -0.02608717605471611, -0.02525882050395012, -0.024430466815829277, -0.023602111265063286, -0.022773755714297295, -0.021945400163531303, -0.021117044612765312, -0.02028868906199932, -0.01946033537387848, -0.018631979823112488, -0.017803624272346497, -0.016975268721580505, -0.016146913170814514, -0.015318557620048523, -0.014490202069282532, -0.013661847449839115, -0.012833491899073124, -0.012005136348307133, -0.011176781728863716, -0.010348424315452576, -0.009520068764686584, -0.008691713213920593, -0.007863357663154602, -0.0070350030437111855, -0.006206647492945194, -0.005378291942179203, -0.004549936857074499, -0.0037215817719697952, -0.0028932264540344477, -0.0020648711360991, -0.001236515585333109, -0.00040816026739776134, 0.0004201950505375862, 0.0012485506013035774, 0.0020769056864082813, 0.0029052612371742725, 0.00373361655510962, 0.004561971873044968, 0.005390327423810959, 0.00621868297457695, 0.007047038059681654, 0.007875394076108932, 0.008703748695552349, 0.00953210424631834, 0.010360459797084332, 0.011188815347850323, 0.01201716996729374, 0.01284552551805973, 0.013673881068825722, 0.014502236619591713, 0.015330592170357704, 0.016158947721123695, 0.016987303271889687, 0.017815658822655678, 0.01864401437342167, 0.01947236992418766, 0.020300723612308502, 0.021129079163074493, 0.021957434713840485, 0.022785790264606476]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 5.0, 5.0, 8.0, 5.0, 8.0, 10.0, 14.0, 19.0, 26.0, 22.0, 26.0, 24.0, 35.0, 45.0, 39.0, 50.0, 37.0, 49.0, 57.0, 46.0, 45.0, 38.0, 37.0, 29.0, 43.0, 28.0, 50.0, 30.0, 22.0, 31.0, 22.0, 19.0, 16.0, 13.0, 12.0, 8.0, 2.0, 10.0, 9.0, 0.0, 4.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.015018641948699951, -0.014564094133675098, -0.014109546318650246, -0.013654998503625393, -0.01320045068860054, -0.012745902873575687, -0.012291355058550835, -0.011836807243525982, -0.01138225942850113, -0.010927711613476276, -0.010473163798451424, -0.010018615983426571, -0.009564068168401718, -0.009109520353376865, -0.008654972538352013, -0.00820042472332716, -0.007745876908302307, -0.007291329093277454, -0.006836781278252602, -0.006382233463227749, -0.005927685648202896, -0.005473137833178043, -0.005018590018153191, -0.004564042203128338, -0.004109494388103485, -0.0036549465730786324, -0.0032003987580537796, -0.002745850943028927, -0.002291303128004074, -0.0018367553129792213, -0.0013822074979543686, -0.0009276596829295158, -0.0004731118679046631, -1.8564052879810333e-05, 0.0004359837621450424, 0.0008905315771698952, 0.001345079392194748, 0.0017996272072196007, 0.0022541750222444534, 0.002708722837269306, 0.003163270652294159, 0.0036178184673190117, 0.0040723662823438644, 0.004526914097368717, 0.00498146191239357, 0.005436009727418423, 0.0058905575424432755, 0.006345105357468128, 0.006799653172492981, 0.007254200987517834, 0.0077087488025426865, 0.00816329661756754, 0.008617844432592392, 0.009072392247617245, 0.009526940062642097, 0.00998148787766695, 0.010436035692691803, 0.010890583507716656, 0.011345131322741508, 0.011799679137766361, 0.012254226952791214, 0.012708774767816067, 0.01316332258284092, 0.013617870397865772, 0.014072418212890625]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 4.0, 8.0, 5.0, 7.0, 10.0, 11.0, 16.0, 15.0, 23.0, 16.0, 29.0, 30.0, 45.0, 30.0, 31.0, 37.0, 43.0, 40.0, 42.0, 45.0, 44.0, 45.0, 49.0, 43.0, 51.0, 25.0, 32.0, 24.0, 33.0, 30.0, 19.0, 14.0, 17.0, 16.0, 14.0, 11.0, 10.0, 6.0, 9.0, 9.0, 7.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.75, -76.0751953125, -73.400390625, -70.7255859375, -68.05078125, -65.3759765625, -62.701171875, -60.0263671875, -57.3515625, -54.6767578125, -52.001953125, -49.3271484375, -46.65234375, -43.9775390625, -41.302734375, -38.6279296875, -35.953125, -33.2783203125, -30.603515625, -27.9287109375, -25.25390625, -22.5791015625, -19.904296875, -17.2294921875, -14.5546875, -11.8798828125, -9.205078125, -6.5302734375, -3.85546875, -1.1806640625, 1.494140625, 4.1689453125, 6.84375, 9.5185546875, 12.193359375, 14.8681640625, 17.54296875, 20.2177734375, 22.892578125, 25.5673828125, 28.2421875, 30.9169921875, 33.591796875, 36.2666015625, 38.94140625, 41.6162109375, 44.291015625, 46.9658203125, 49.640625, 52.3154296875, 54.990234375, 57.6650390625, 60.33984375, 63.0146484375, 65.689453125, 68.3642578125, 71.0390625, 73.7138671875, 76.388671875, 79.0634765625, 81.73828125, 84.4130859375, 87.087890625, 89.7626953125, 92.4375]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 6.0, 5.0, 10.0, 15.0, 11.0, 22.0, 18.0, 34.0, 52.0, 82.0, 113.0, 159.0, 307.0, 410.0, 751.0, 1397.0, 2874.0, 7447.0, 26316.0, 137648.0, 538870.0, 264177.0, 48254.0, 11526.0, 3884.0, 1807.0, 853.0, 530.0, 310.0, 210.0, 124.0, 99.0, 76.0, 47.0, 23.0, 17.0, 27.0, 16.0, 11.0, 7.0, 6.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-15.265625, -14.75146484375, -14.2373046875, -13.72314453125, -13.208984375, -12.69482421875, -12.1806640625, -11.66650390625, -11.15234375, -10.63818359375, -10.1240234375, -9.60986328125, -9.095703125, -8.58154296875, -8.0673828125, -7.55322265625, -7.0390625, -6.52490234375, -6.0107421875, -5.49658203125, -4.982421875, -4.46826171875, -3.9541015625, -3.43994140625, -2.92578125, -2.41162109375, -1.8974609375, -1.38330078125, -0.869140625, -0.35498046875, 0.1591796875, 0.67333984375, 1.1875, 1.70166015625, 2.2158203125, 2.72998046875, 3.244140625, 3.75830078125, 4.2724609375, 4.78662109375, 5.30078125, 5.81494140625, 6.3291015625, 6.84326171875, 7.357421875, 7.87158203125, 8.3857421875, 8.89990234375, 9.4140625, 9.92822265625, 10.4423828125, 10.95654296875, 11.470703125, 11.98486328125, 12.4990234375, 13.01318359375, 13.52734375, 14.04150390625, 14.5556640625, 15.06982421875, 15.583984375, 16.09814453125, 16.6123046875, 17.12646484375, 17.640625]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 4.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 9.0, 12.0, 10.0, 9.0, 11.0, 23.0, 25.0, 25.0, 42.0, 41.0, 31.0, 47.0, 63.0, 56.0, 49.0, 586.0, 1552.0, 58.0, 62.0, 48.0, 41.0, 37.0, 31.0, 32.0, 26.0, 16.0, 27.0, 13.0, 13.0, 7.0, 11.0, 7.0, 5.0, 1.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-172.0, -167.0859375, -162.171875, -157.2578125, -152.34375, -147.4296875, -142.515625, -137.6015625, -132.6875, -127.7734375, -122.859375, -117.9453125, -113.03125, -108.1171875, -103.203125, -98.2890625, -93.375, -88.4609375, -83.546875, -78.6328125, -73.71875, -68.8046875, -63.890625, -58.9765625, -54.0625, -49.1484375, -44.234375, -39.3203125, -34.40625, -29.4921875, -24.578125, -19.6640625, -14.75, -9.8359375, -4.921875, -0.0078125, 4.90625, 9.8203125, 14.734375, 19.6484375, 24.5625, 29.4765625, 34.390625, 39.3046875, 44.21875, 49.1328125, 54.046875, 58.9609375, 63.875, 68.7890625, 73.703125, 78.6171875, 83.53125, 88.4453125, 93.359375, 98.2734375, 103.1875, 108.1015625, 113.015625, 117.9296875, 122.84375, 127.7578125, 132.671875, 137.5859375, 142.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 9.0, 5.0, 14.0, 17.0, 15.0, 31.0, 50.0, 113.0, 153.0, 266.0, 447.0, 953.0, 2624.0, 16056.0, 2694155.0, 415267.0, 11268.0, 2246.0, 873.0, 446.0, 277.0, 146.0, 86.0, 67.0, 31.0, 26.0, 19.0, 13.0, 14.0, 7.0, 6.0, 4.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.21875, -37.90869140625, -36.5986328125, -35.28857421875, -33.978515625, -32.66845703125, -31.3583984375, -30.04833984375, -28.73828125, -27.42822265625, -26.1181640625, -24.80810546875, -23.498046875, -22.18798828125, -20.8779296875, -19.56787109375, -18.2578125, -16.94775390625, -15.6376953125, -14.32763671875, -13.017578125, -11.70751953125, -10.3974609375, -9.08740234375, -7.77734375, -6.46728515625, -5.1572265625, -3.84716796875, -2.537109375, -1.22705078125, 0.0830078125, 1.39306640625, 2.703125, 4.01318359375, 5.3232421875, 6.63330078125, 7.943359375, 9.25341796875, 10.5634765625, 11.87353515625, 13.18359375, 14.49365234375, 15.8037109375, 17.11376953125, 18.423828125, 19.73388671875, 21.0439453125, 22.35400390625, 23.6640625, 24.97412109375, 26.2841796875, 27.59423828125, 28.904296875, 30.21435546875, 31.5244140625, 32.83447265625, 34.14453125, 35.45458984375, 36.7646484375, 38.07470703125, 39.384765625, 40.69482421875, 42.0048828125, 43.31494140625, 44.625]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 1.0, 17.0, 19.0, 65.0, 161.0, 297.0, 254.0, 90.0, 44.0, 14.0, 16.0, 5.0, 12.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.0509262084961, -107.7363510131836, -95.42178344726562, -83.10720825195312, -70.79263305664062, -58.478065490722656, -46.163490295410156, -33.84892272949219, -21.534347534179688, -9.219775199890137, 3.094797134399414, 15.409370422363281, 27.723941802978516, 40.03851318359375, 52.35308837890625, 64.66765594482422, 76.98223114013672, 89.29680633544922, 101.61137390136719, 113.92594909667969, 126.24052429199219, 138.55508422851562, 150.86965942382812, 163.18423461914062, 175.49880981445312, 187.81338500976562, 200.12796020507812, 212.44253540039062, 224.75709533691406, 237.07167053222656, 249.38624572753906, 261.7008056640625, 274.0154113769531, 286.3299865722656, 298.6445617675781, 310.9591369628906, 323.2737121582031, 335.5882568359375, 347.90283203125, 360.2174072265625, 372.531982421875, 384.8465576171875, 397.1611328125, 409.4757080078125, 421.790283203125, 434.1048583984375, 446.41943359375, 458.7339782714844, 471.048583984375, 483.3631591796875, 495.677734375, 507.9923095703125, 520.306884765625, 532.6214599609375, 544.93603515625, 557.2506103515625, 569.5651245117188, 581.8796997070312, 594.1942749023438, 606.5088500976562, 618.8234252929688, 631.1380004882812, 643.4525756835938, 655.7671508789062, 668.0817260742188]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 1.0, 8.0, 5.0, 4.0, 8.0, 8.0, 13.0, 11.0, 12.0, 14.0, 23.0, 20.0, 24.0, 26.0, 23.0, 43.0, 44.0, 41.0, 40.0, 46.0, 59.0, 48.0, 49.0, 52.0, 43.0, 44.0, 52.0, 36.0, 25.0, 31.0, 33.0, 22.0, 17.0, 21.0, 16.0, 11.0, 8.0, 7.0, 6.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-210.40274047851562, -204.1172332763672, -197.8317108154297, -191.54620361328125, -185.2606964111328, -178.97518920898438, -172.68966674804688, -166.40415954589844, -160.11865234375, -153.83314514160156, -147.54762268066406, -141.26211547851562, -134.9766082763672, -128.69110107421875, -122.40557861328125, -116.12007141113281, -109.83454895019531, -103.54903411865234, -97.2635269165039, -90.97801208496094, -84.6925048828125, -78.40699005126953, -72.12147521972656, -65.83596801757812, -59.550453186035156, -53.26494216918945, -46.97943115234375, -40.69391632080078, -34.40840530395508, -28.122894287109375, -21.837379455566406, -15.551868438720703, -9.266357421875, -2.9808454513549805, 3.304666519165039, 9.590179443359375, 15.875690460205078, 22.16120147705078, 28.44671630859375, 34.73222732543945, 41.017738342285156, 47.30324935913086, 53.58876037597656, 59.87427520751953, 66.1597900390625, 72.44529724121094, 78.7308120727539, 85.01632690429688, 91.30183410644531, 97.58734893798828, 103.87285614013672, 110.15837097167969, 116.44387817382812, 122.7293930053711, 129.01490783691406, 135.3004150390625, 141.5859375, 147.87144470214844, 154.15696716308594, 160.44247436523438, 166.7279815673828, 173.01348876953125, 179.29901123046875, 185.5845184326172, 191.87002563476562]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 1.0, 6.0, 3.0, 1.0, 3.0, 8.0, 5.0, 14.0, 27.0, 42.0, 53.0, 68.0, 150.0, 167.0, 287.0, 464.0, 729.0, 952.0, 1486.0, 2229.0, 3194.0, 4806.0, 1016280.0, 6003.0, 3606.0, 2561.0, 1784.0, 1219.0, 851.0, 535.0, 376.0, 222.0, 141.0, 102.0, 75.0, 37.0, 21.0, 18.0, 12.0, 9.0, 6.0, 1.0, 2.0, 6.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-37.234500885009766, -36.1920051574707, -35.14950942993164, -34.10701370239258, -33.06451416015625, -32.02201843261719, -30.979522705078125, -29.937026977539062, -28.89453125, -27.852035522460938, -26.809539794921875, -25.76704216003418, -24.724546432495117, -23.682050704956055, -22.63955307006836, -21.597057342529297, -20.554561614990234, -19.512065887451172, -18.46957015991211, -17.427072525024414, -16.38457679748535, -15.342081069946289, -14.29958438873291, -13.257087707519531, -12.214591979980469, -11.172096252441406, -10.129599571228027, -9.087102890014648, -8.044607162475586, -7.002110958099365, -5.9596147537231445, -4.917118549346924, -3.8746185302734375, -2.832122325897217, -1.789626121520996, -0.7471299171447754, 0.2953662872314453, 1.337862491607666, 2.3803586959838867, 3.4228549003601074, 4.465351104736328, 5.507847309112549, 6.5503435134887695, 7.59283971786499, 8.635335922241211, 9.677831649780273, 10.720328330993652, 11.762825012207031, 12.805320739746094, 13.847816467285156, 14.890313148498535, 15.932809829711914, 16.975305557250977, 18.01780128479004, 19.060298919677734, 20.102794647216797, 21.14529037475586, 22.187786102294922, 23.230281829833984, 24.27277946472168, 25.315275192260742, 26.357770919799805, 27.4002685546875, 28.442764282226562, 29.485260009765625]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 5.0, 6.0, 9.0, 21.0, 50.0, 81.0, 92.0, 640.0, 51461832.0, 245.0, 70.0, 44.0, 24.0, 15.0, 10.0, 7.0, 4.0, 6.0, 3.0, 4.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5684.64111328125, -5508.79443359375, -5332.94775390625, -5157.10107421875, -4981.25439453125, -4805.40771484375, -4629.5615234375, -4453.71484375, -4277.8681640625, -4102.021484375, -3926.1748046875, -3750.328125, -3574.4814453125, -3398.634765625, -3222.788330078125, -3046.941650390625, -2871.0947265625, -2695.248046875, -2519.4013671875, -2343.5546875, -2167.7080078125, -1991.8614501953125, -1816.014892578125, -1640.168212890625, -1464.321533203125, -1288.474853515625, -1112.628173828125, -936.7816162109375, -760.9349365234375, -585.0882568359375, -409.24169921875, -233.39501953125, -57.548828125, 118.29782104492188, 294.14447021484375, 469.9910888671875, 645.8377685546875, 821.6844482421875, 997.531005859375, 1173.377685546875, 1349.224365234375, 1525.071044921875, 1700.917724609375, 1876.7642822265625, 2052.61083984375, 2228.45751953125, 2404.30419921875, 2580.15087890625, 2755.99755859375, 2931.84423828125, 3107.69091796875, 3283.53759765625, 3459.38427734375, 3635.23095703125, 3811.077392578125, 3986.924072265625, 4162.7705078125, 4338.6171875, 4514.4638671875, 4690.310546875, 4866.1572265625, 5042.00390625, 5217.8505859375, 5393.697265625, 5569.5439453125]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 7.0, 8.0, 4.0, 20.0, 19.0, 25.0, 37.0, 62.0, 81.0, 119.0, 185.0, 333.0, 509.0, 816.0, 1390.0, 2227.0, 3288.0, 5395.0, 8653.0, 14019.0, 22324.0, 36217.0, 57725.0, 92325.0, 149768.0, 243521.0, 395920.0, 2914489.0, 1355507.0, 377133.0, 232158.0, 142141.0, 88373.0, 55325.0, 34506.0, 21444.0, 13351.0, 8328.0, 5169.0, 3166.0, 1928.0, 1233.0, 727.0, 523.0, 330.0, 201.0, 131.0, 94.0, 68.0, 41.0, 31.0, 31.0, 9.0, 6.0, 7.0, 2.0, 1.0, 1.0], "bins": [-2.349609375, -2.2803955078125, -2.211181640625, -2.1419677734375, -2.07275390625, -2.0035400390625, -1.934326171875, -1.8651123046875, -1.7958984375, -1.7266845703125, -1.657470703125, -1.5882568359375, -1.51904296875, -1.4498291015625, -1.380615234375, -1.3114013671875, -1.2421875, -1.1729736328125, -1.103759765625, -1.0345458984375, -0.96533203125, -0.8961181640625, -0.826904296875, -0.7576904296875, -0.6884765625, -0.6192626953125, -0.550048828125, -0.4808349609375, -0.41162109375, -0.3424072265625, -0.273193359375, -0.2039794921875, -0.134765625, -0.0655517578125, 0.003662109375, 0.0728759765625, 0.14208984375, 0.2113037109375, 0.280517578125, 0.3497314453125, 0.4189453125, 0.4881591796875, 0.557373046875, 0.6265869140625, 0.69580078125, 0.7650146484375, 0.834228515625, 0.9034423828125, 0.97265625, 1.0418701171875, 1.111083984375, 1.1802978515625, 1.24951171875, 1.3187255859375, 1.387939453125, 1.4571533203125, 1.5263671875, 1.5955810546875, 1.664794921875, 1.7340087890625, 1.80322265625, 1.8724365234375, 1.941650390625, 2.0108642578125, 2.080078125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 8.0, 9.0, 6.0, 10.0, 19.0, 12.0, 12.0, 22.0, 23.0, 22.0, 23.0, 29.0, 33.0, 36.0, 32.0, 36.0, 37.0, 41.0, 44.0, 312.0, 775.0, 59.0, 34.0, 36.0, 36.0, 45.0, 34.0, 38.0, 37.0, 32.0, 19.0, 13.0, 25.0, 10.0, 19.0, 9.0, 18.0, 7.0, 3.0, 5.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-19.109375, -18.566162109375, -18.02294921875, -17.479736328125, -16.9365234375, -16.393310546875, -15.85009765625, -15.306884765625, -14.763671875, -14.220458984375, -13.67724609375, -13.134033203125, -12.5908203125, -12.047607421875, -11.50439453125, -10.961181640625, -10.41796875, -9.874755859375, -9.33154296875, -8.788330078125, -8.2451171875, -7.701904296875, -7.15869140625, -6.615478515625, -6.072265625, -5.529052734375, -4.98583984375, -4.442626953125, -3.8994140625, -3.356201171875, -2.81298828125, -2.269775390625, -1.7265625, -1.183349609375, -0.64013671875, -0.096923828125, 0.4462890625, 0.989501953125, 1.53271484375, 2.075927734375, 2.619140625, 3.162353515625, 3.70556640625, 4.248779296875, 4.7919921875, 5.335205078125, 5.87841796875, 6.421630859375, 6.96484375, 7.508056640625, 8.05126953125, 8.594482421875, 9.1376953125, 9.680908203125, 10.22412109375, 10.767333984375, 11.310546875, 11.853759765625, 12.39697265625, 12.940185546875, 13.4833984375, 14.026611328125, 14.56982421875, 15.113037109375, 15.65625]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 3.0, 5.0, 5.0, 15.0, 33.0, 20.0, 36.0, 44.0, 78.0, 101.0, 153.0, 248.0, 313.0, 526.0, 776.0, 1091.0, 1634.0, 2398.0, 3865.0, 5695.0, 8769.0, 13756.0, 21510.0, 34239.0, 54895.0, 89018.0, 147162.0, 261902.0, 520578.0, 3860624.0, 574244.0, 279260.0, 156713.0, 93887.0, 57868.0, 35918.0, 22733.0, 14482.0, 9223.0, 5883.0, 4014.0, 2454.0, 1679.0, 1137.0, 784.0, 536.0, 362.0, 235.0, 166.0, 115.0, 67.0, 65.0, 53.0, 27.0, 16.0, 18.0, 8.0, 3.0, 0.0, 6.0, 3.0], "bins": [-3.21484375, -3.1160888671875, -3.017333984375, -2.9185791015625, -2.81982421875, -2.7210693359375, -2.622314453125, -2.5235595703125, -2.4248046875, -2.3260498046875, -2.227294921875, -2.1285400390625, -2.02978515625, -1.9310302734375, -1.832275390625, -1.7335205078125, -1.634765625, -1.5360107421875, -1.437255859375, -1.3385009765625, -1.23974609375, -1.1409912109375, -1.042236328125, -0.9434814453125, -0.8447265625, -0.7459716796875, -0.647216796875, -0.5484619140625, -0.44970703125, -0.3509521484375, -0.252197265625, -0.1534423828125, -0.0546875, 0.0440673828125, 0.142822265625, 0.2415771484375, 0.34033203125, 0.4390869140625, 0.537841796875, 0.6365966796875, 0.7353515625, 0.8341064453125, 0.932861328125, 1.0316162109375, 1.13037109375, 1.2291259765625, 1.327880859375, 1.4266357421875, 1.525390625, 1.6241455078125, 1.722900390625, 1.8216552734375, 1.92041015625, 2.0191650390625, 2.117919921875, 2.2166748046875, 2.3154296875, 2.4141845703125, 2.512939453125, 2.6116943359375, 2.71044921875, 2.8092041015625, 2.907958984375, 3.0067138671875, 3.10546875]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 6.0, 2.0, 2.0, 7.0, 3.0, 6.0, 6.0, 13.0, 12.0, 12.0, 13.0, 14.0, 22.0, 24.0, 23.0, 17.0, 23.0, 28.0, 35.0, 26.0, 26.0, 37.0, 39.0, 53.0, 426.0, 630.0, 75.0, 41.0, 40.0, 37.0, 40.0, 34.0, 20.0, 33.0, 18.0, 33.0, 23.0, 22.0, 18.0, 16.0, 11.0, 16.0, 11.0, 3.0, 8.0, 8.0, 9.0, 1.0, 3.0, 2.0, 4.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-9.90625, -9.5882568359375, -9.270263671875, -8.9522705078125, -8.63427734375, -8.3162841796875, -7.998291015625, -7.6802978515625, -7.3623046875, -7.0443115234375, -6.726318359375, -6.4083251953125, -6.09033203125, -5.7723388671875, -5.454345703125, -5.1363525390625, -4.818359375, -4.5003662109375, -4.182373046875, -3.8643798828125, -3.54638671875, -3.2283935546875, -2.910400390625, -2.5924072265625, -2.2744140625, -1.9564208984375, -1.638427734375, -1.3204345703125, -1.00244140625, -0.6844482421875, -0.366455078125, -0.0484619140625, 0.26953125, 0.5875244140625, 0.905517578125, 1.2235107421875, 1.54150390625, 1.8594970703125, 2.177490234375, 2.4954833984375, 2.8134765625, 3.1314697265625, 3.449462890625, 3.7674560546875, 4.08544921875, 4.4034423828125, 4.721435546875, 5.0394287109375, 5.357421875, 5.6754150390625, 5.993408203125, 6.3114013671875, 6.62939453125, 6.9473876953125, 7.265380859375, 7.5833740234375, 7.9013671875, 8.2193603515625, 8.537353515625, 8.8553466796875, 9.17333984375, 9.4913330078125, 9.809326171875, 10.1273193359375, 10.4453125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [5.0, 4.0, 7.0, 4.0, 3.0, 10.0, 11.0, 14.0, 21.0, 21.0, 29.0, 43.0, 53.0, 59.0, 108.0, 126.0, 150.0, 225.0, 313.0, 364.0, 488.0, 615.0, 969.0, 1255.0, 1807.0, 2703.0, 4477.0, 8403.0, 17109.0, 45463.0, 196810.0, 5861639.0, 89870.0, 28205.0, 12130.0, 6302.0, 3592.0, 2342.0, 1650.0, 1069.0, 773.0, 529.0, 404.0, 325.0, 228.0, 185.0, 130.0, 115.0, 73.0, 66.0, 42.0, 37.0, 24.0, 13.0, 8.0, 4.0, 8.0, 6.0, 2.0, 6.0, 1.0, 2.0, 3.0, 4.0], "bins": [-12.4921875, -12.0927734375, -11.693359375, -11.2939453125, -10.89453125, -10.4951171875, -10.095703125, -9.6962890625, -9.296875, -8.8974609375, -8.498046875, -8.0986328125, -7.69921875, -7.2998046875, -6.900390625, -6.5009765625, -6.1015625, -5.7021484375, -5.302734375, -4.9033203125, -4.50390625, -4.1044921875, -3.705078125, -3.3056640625, -2.90625, -2.5068359375, -2.107421875, -1.7080078125, -1.30859375, -0.9091796875, -0.509765625, -0.1103515625, 0.2890625, 0.6884765625, 1.087890625, 1.4873046875, 1.88671875, 2.2861328125, 2.685546875, 3.0849609375, 3.484375, 3.8837890625, 4.283203125, 4.6826171875, 5.08203125, 5.4814453125, 5.880859375, 6.2802734375, 6.6796875, 7.0791015625, 7.478515625, 7.8779296875, 8.27734375, 8.6767578125, 9.076171875, 9.4755859375, 9.875, 10.2744140625, 10.673828125, 11.0732421875, 11.47265625, 11.8720703125, 12.271484375, 12.6708984375, 13.0703125]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 13.0, 9.0, 14.0, 13.0, 15.0, 26.0, 22.0, 18.0, 21.0, 28.0, 37.0, 47.0, 56.0, 54.0, 75.0, 174.0, 646.0, 246.0, 77.0, 48.0, 44.0, 40.0, 39.0, 29.0, 37.0, 37.0, 36.0, 16.0, 15.0, 23.0, 20.0, 9.0, 7.0, 6.0, 5.0, 4.0, 7.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.3125, -8.05816650390625, -7.8038330078125, -7.54949951171875, -7.295166015625, -7.04083251953125, -6.7864990234375, -6.53216552734375, -6.27783203125, -6.02349853515625, -5.7691650390625, -5.51483154296875, -5.260498046875, -5.00616455078125, -4.7518310546875, -4.49749755859375, -4.2431640625, -3.98883056640625, -3.7344970703125, -3.48016357421875, -3.225830078125, -2.97149658203125, -2.7171630859375, -2.46282958984375, -2.20849609375, -1.95416259765625, -1.6998291015625, -1.44549560546875, -1.191162109375, -0.93682861328125, -0.6824951171875, -0.42816162109375, -0.173828125, 0.08050537109375, 0.3348388671875, 0.58917236328125, 0.843505859375, 1.09783935546875, 1.3521728515625, 1.60650634765625, 1.86083984375, 2.11517333984375, 2.3695068359375, 2.62384033203125, 2.878173828125, 3.13250732421875, 3.3868408203125, 3.64117431640625, 3.8955078125, 4.14984130859375, 4.4041748046875, 4.65850830078125, 4.912841796875, 5.16717529296875, 5.4215087890625, 5.67584228515625, 5.93017578125, 6.18450927734375, 6.4388427734375, 6.69317626953125, 6.947509765625, 7.20184326171875, 7.4561767578125, 7.71051025390625, 7.96484375]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 4.0, 8.0, 10.0, 18.0, 49.0, 97.0, 244.0, 339.0, 143.0, 46.0, 24.0, 7.0, 7.0, 9.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-47.907508850097656, -46.95437240600586, -46.00123977661133, -45.04810333251953, -44.094966888427734, -43.14183044433594, -42.188697814941406, -41.23556137084961, -40.28242492675781, -39.329288482666016, -38.376155853271484, -37.42301940917969, -36.46988296508789, -35.516746520996094, -34.56361389160156, -33.610477447509766, -32.657344818115234, -31.70421028137207, -30.751073837280273, -29.79793930053711, -28.844802856445312, -27.89166831970215, -26.938533782958984, -25.985397338867188, -25.03226089477539, -24.079126358032227, -23.12598991394043, -22.172855377197266, -21.21971893310547, -20.266584396362305, -19.31344985961914, -18.360313415527344, -17.40717887878418, -16.454044342041016, -15.500907897949219, -14.547773361206055, -13.594637870788574, -12.641502380371094, -11.688366889953613, -10.735231399536133, -9.782096862792969, -8.828961372375488, -7.875826358795166, -6.9226908683776855, -5.969555854797363, -5.016420364379883, -4.063284873962402, -3.11014986038208, -2.1570138931274414, -1.20387864112854, -0.2507432699203491, 0.7023921012878418, 1.6555273532867432, 2.6086626052856445, 3.561798095703125, 4.514933109283447, 5.468068599700928, 6.421204090118408, 7.3743391036987305, 8.327474594116211, 9.280610084533691, 10.233745574951172, 11.186880111694336, 12.140015602111816, 13.093151092529297]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 1.0, 0.0, 1.0, 3.0, 7.0, 3.0, 4.0, 2.0, 9.0, 10.0, 8.0, 9.0, 8.0, 14.0, 20.0, 14.0, 23.0, 17.0, 37.0, 31.0, 38.0, 39.0, 38.0, 38.0, 28.0, 45.0, 48.0, 40.0, 42.0, 48.0, 39.0, 30.0, 31.0, 30.0, 36.0, 30.0, 19.0, 22.0, 27.0, 13.0, 24.0, 10.0, 14.0, 14.0, 10.0, 10.0, 5.0, 3.0, 5.0, 2.0, 6.0, 5.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-9.6207275390625, -9.30551815032959, -8.990309715270996, -8.675100326538086, -8.359890937805176, -8.044682502746582, -7.729473114013672, -7.41426420211792, -7.099055290222168, -6.783846378326416, -6.468636989593506, -6.153428077697754, -5.838219165802002, -5.52301025390625, -5.20780086517334, -4.892591953277588, -4.577382564544678, -4.262173652648926, -3.9469645023345947, -3.6317553520202637, -3.3165464401245117, -3.0013372898101807, -2.6861281394958496, -2.3709192276000977, -2.0557100772857666, -1.740501046180725, -1.4252920150756836, -1.1100828647613525, -0.794873833656311, -0.47966480255126953, -0.16445565223693848, 0.15075325965881348, 0.46596240997314453, 0.781171441078186, 1.0963804721832275, 1.4115896224975586, 1.7267986536026, 2.0420076847076416, 2.3572168350219727, 2.6724257469177246, 2.9876348972320557, 3.3028440475463867, 3.6180529594421387, 3.9332621097564697, 4.248471260070801, 4.563680171966553, 4.878889083862305, 5.194098472595215, 5.509307384490967, 5.824516296386719, 6.139725685119629, 6.454934597015381, 6.770143508911133, 7.085352897644043, 7.400561809539795, 7.715770721435547, 8.030980110168457, 8.346189498901367, 8.661397933959961, 8.976607322692871, 9.291816711425781, 9.607025146484375, 9.922234535217285, 10.237443923950195, 10.552652359008789]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 5.0, 2.0, 5.0, 13.0, 27.0, 28.0, 49.0, 68.0, 151.0, 271.0, 541.0, 1061.0, 2240.0, 5626.0, 17872.0, 120686.0, 3978636.0, 45145.0, 12147.0, 4844.0, 2146.0, 1077.0, 556.0, 319.0, 212.0, 140.0, 110.0, 46.0, 64.0, 39.0, 38.0, 23.0, 16.0, 12.0, 8.0, 16.0, 8.0, 11.0, 11.0, 6.0, 3.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20654296875, -0.19584274291992188, -0.18514251708984375, -0.17444229125976562, -0.1637420654296875, -0.15304183959960938, -0.14234161376953125, -0.13164138793945312, -0.120941162109375, -0.11024093627929688, -0.09954071044921875, -0.08884048461914062, -0.0781402587890625, -0.06744003295898438, -0.05673980712890625, -0.046039581298828125, -0.03533935546875, -0.024639129638671875, -0.01393890380859375, -0.003238677978515625, 0.0074615478515625, 0.018161773681640625, 0.02886199951171875, 0.039562225341796875, 0.050262451171875, 0.060962677001953125, 0.07166290283203125, 0.08236312866210938, 0.0930633544921875, 0.10376358032226562, 0.11446380615234375, 0.12516403198242188, 0.1358642578125, 0.14656448364257812, 0.15726470947265625, 0.16796493530273438, 0.1786651611328125, 0.18936538696289062, 0.20006561279296875, 0.21076583862304688, 0.221466064453125, 0.23216629028320312, 0.24286651611328125, 0.2535667419433594, 0.2642669677734375, 0.2749671936035156, 0.28566741943359375, 0.2963676452636719, 0.30706787109375, 0.3177680969238281, 0.32846832275390625, 0.3391685485839844, 0.3498687744140625, 0.3605690002441406, 0.37126922607421875, 0.3819694519042969, 0.392669677734375, 0.4033699035644531, 0.41407012939453125, 0.4247703552246094, 0.4354705810546875, 0.4461708068847656, 0.45687103271484375, 0.4675712585449219, 0.478271484375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 1.0, 7.0, 9.0, 6.0, 7.0, 12.0, 18.0, 15.0, 24.0, 403.0, 408.0, 17.0, 9.0, 13.0, 8.0, 11.0, 6.0, 1.0, 7.0, 4.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.042510986328125, -0.04099559783935547, -0.03948020935058594, -0.037964820861816406, -0.036449432373046875, -0.034934043884277344, -0.03341865539550781, -0.03190326690673828, -0.03038787841796875, -0.02887248992919922, -0.027357101440429688, -0.025841712951660156, -0.024326324462890625, -0.022810935974121094, -0.021295547485351562, -0.01978015899658203, -0.0182647705078125, -0.01674938201904297, -0.015233993530273438, -0.013718605041503906, -0.012203216552734375, -0.010687828063964844, -0.009172439575195312, -0.007657051086425781, -0.00614166259765625, -0.004626274108886719, -0.0031108856201171875, -0.0015954971313476562, -8.0108642578125e-05, 0.0014352798461914062, 0.0029506683349609375, 0.004466056823730469, 0.0059814453125, 0.007496833801269531, 0.009012222290039062, 0.010527610778808594, 0.012042999267578125, 0.013558387756347656, 0.015073776245117188, 0.01658916473388672, 0.01810455322265625, 0.01961994171142578, 0.021135330200195312, 0.022650718688964844, 0.024166107177734375, 0.025681495666503906, 0.027196884155273438, 0.02871227264404297, 0.0302276611328125, 0.03174304962158203, 0.03325843811035156, 0.034773826599121094, 0.036289215087890625, 0.037804603576660156, 0.03931999206542969, 0.04083538055419922, 0.04235076904296875, 0.04386615753173828, 0.04538154602050781, 0.046896934509277344, 0.048412322998046875, 0.049927711486816406, 0.05144309997558594, 0.05295848846435547, 0.054473876953125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 7.0, 9.0, 10.0, 10.0, 25.0, 25.0, 31.0, 60.0, 100.0, 141.0, 226.0, 422.0, 995.0, 2859.0, 10734.0, 58370.0, 747966.0, 3235994.0, 111936.0, 17419.0, 4184.0, 1400.0, 611.0, 341.0, 177.0, 105.0, 58.0, 26.0, 19.0, 6.0, 7.0, 6.0, 2.0, 1.0, 0.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.33349609375, -0.3242759704589844, -0.31505584716796875, -0.3058357238769531, -0.2966156005859375, -0.2873954772949219, -0.27817535400390625, -0.2689552307128906, -0.259735107421875, -0.2505149841308594, -0.24129486083984375, -0.23207473754882812, -0.2228546142578125, -0.21363449096679688, -0.20441436767578125, -0.19519424438476562, -0.18597412109375, -0.17675399780273438, -0.16753387451171875, -0.15831375122070312, -0.1490936279296875, -0.13987350463867188, -0.13065338134765625, -0.12143325805664062, -0.112213134765625, -0.10299301147460938, -0.09377288818359375, -0.08455276489257812, -0.0753326416015625, -0.06611251831054688, -0.05689239501953125, -0.047672271728515625, -0.0384521484375, -0.029232025146484375, -0.02001190185546875, -0.010791778564453125, -0.0015716552734375, 0.007648468017578125, 0.01686859130859375, 0.026088714599609375, 0.035308837890625, 0.044528961181640625, 0.05374908447265625, 0.06296920776367188, 0.0721893310546875, 0.08140945434570312, 0.09062957763671875, 0.09984970092773438, 0.10906982421875, 0.11828994750976562, 0.12751007080078125, 0.13673019409179688, 0.1459503173828125, 0.15517044067382812, 0.16439056396484375, 0.17361068725585938, 0.182830810546875, 0.19205093383789062, 0.20127105712890625, 0.21049118041992188, 0.2197113037109375, 0.22893142700195312, 0.23815155029296875, 0.24737167358398438, 0.256591796875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 6.0, 7.0, 11.0, 11.0, 14.0, 19.0, 15.0, 28.0, 35.0, 53.0, 70.0, 91.0, 121.0, 135.0, 186.0, 332.0, 920.0, 935.0, 352.0, 227.0, 137.0, 109.0, 65.0, 50.0, 38.0, 25.0, 23.0, 19.0, 14.0, 8.0, 8.0, 4.0, 6.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.050018310546875, -0.04816579818725586, -0.04631328582763672, -0.04446077346801758, -0.04260826110839844, -0.0407557487487793, -0.038903236389160156, -0.037050724029541016, -0.035198211669921875, -0.033345699310302734, -0.031493186950683594, -0.029640674591064453, -0.027788162231445312, -0.025935649871826172, -0.02408313751220703, -0.02223062515258789, -0.02037811279296875, -0.01852560043334961, -0.01667308807373047, -0.014820575714111328, -0.012968063354492188, -0.011115550994873047, -0.009263038635253906, -0.007410526275634766, -0.005558013916015625, -0.0037055015563964844, -0.0018529891967773438, -4.76837158203125e-07, 0.0018520355224609375, 0.003704547882080078, 0.005557060241699219, 0.007409572601318359, 0.0092620849609375, 0.01111459732055664, 0.012967109680175781, 0.014819622039794922, 0.016672134399414062, 0.018524646759033203, 0.020377159118652344, 0.022229671478271484, 0.024082183837890625, 0.025934696197509766, 0.027787208557128906, 0.029639720916748047, 0.03149223327636719, 0.03334474563598633, 0.03519725799560547, 0.03704977035522461, 0.03890228271484375, 0.04075479507446289, 0.04260730743408203, 0.04445981979370117, 0.04631233215332031, 0.04816484451293945, 0.050017356872558594, 0.051869869232177734, 0.053722381591796875, 0.055574893951416016, 0.057427406311035156, 0.0592799186706543, 0.06113243103027344, 0.06298494338989258, 0.06483745574951172, 0.06668996810913086, 0.06854248046875]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 6.0, 10.0, 30.0, 63.0, 273.0, 464.0, 133.0, 22.0, 8.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.265873432159424, -2.2208850383758545, -2.175896644592285, -2.130908250808716, -2.0859198570251465, -2.040931463241577, -1.9959430694580078, -1.9509546756744385, -1.9059662818908691, -1.8609778881072998, -1.8159894943237305, -1.7710011005401611, -1.7260127067565918, -1.6810243129730225, -1.6360359191894531, -1.5910475254058838, -1.5460591316223145, -1.5010707378387451, -1.4560823440551758, -1.4110939502716064, -1.366105556488037, -1.3211171627044678, -1.2761287689208984, -1.231140375137329, -1.1861518621444702, -1.1411634683609009, -1.0961750745773315, -1.0511866807937622, -1.0061982870101929, -0.9612098932266235, -0.9162214994430542, -0.8712331056594849, -0.8262446522712708, -0.7812562584877014, -0.7362678647041321, -0.6912794709205627, -0.6462910771369934, -0.6013026833534241, -0.55631422996521, -0.5113258361816406, -0.4663374722003937, -0.42134907841682434, -0.376360684633255, -0.3313722610473633, -0.28638386726379395, -0.2413954883813858, -0.19640707969665527, -0.15141868591308594, -0.1064302921295166, -0.06144189462065697, -0.016453497111797333, 0.0285349041223526, 0.07352329790592194, 0.11851169168949127, 0.1635001003742218, 0.20848849415779114, 0.2534768879413605, 0.2984652817249298, 0.34345367550849915, 0.38844209909439087, 0.4334304928779602, 0.47841888666152954, 0.5234072804450989, 0.5683956742286682, 0.6133840680122375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 5.0, 4.0, 4.0, 6.0, 7.0, 5.0, 10.0, 4.0, 17.0, 15.0, 16.0, 18.0, 23.0, 30.0, 32.0, 34.0, 44.0, 49.0, 58.0, 33.0, 50.0, 55.0, 47.0, 51.0, 50.0, 42.0, 41.0, 42.0, 35.0, 27.0, 26.0, 29.0, 25.0, 16.0, 14.0, 13.0, 10.0, 10.0, 4.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.2884256839752197, -0.27962765097618103, -0.27082961797714233, -0.26203158497810364, -0.25323355197906494, -0.24443553388118744, -0.23563751578330994, -0.22683948278427124, -0.21804144978523254, -0.20924341678619385, -0.20044538378715515, -0.19164736568927765, -0.18284933269023895, -0.17405129969120026, -0.16525328159332275, -0.15645524859428406, -0.14765721559524536, -0.13885918259620667, -0.13006114959716797, -0.12126313149929047, -0.11246509850025177, -0.10366706550121307, -0.09486903995275497, -0.08607101440429688, -0.07727298140525818, -0.06847494840621948, -0.05967692285776138, -0.050878893584012985, -0.04208086431026459, -0.03328283503651619, -0.024484805762767792, -0.015686776489019394, -0.006888747215270996, 0.0019092820584774017, 0.0107073113322258, 0.019505340605974197, 0.028303369879722595, 0.03710139915347099, 0.04589942842721939, 0.05469745770096779, 0.06349548697471619, 0.07229351997375488, 0.08109154552221298, 0.08988957107067108, 0.09868760406970978, 0.10748563706874847, 0.11628366261720657, 0.12508168816566467, 0.13387972116470337, 0.14267775416374207, 0.15147578716278076, 0.16027380526065826, 0.16907183825969696, 0.17786987125873566, 0.18666788935661316, 0.19546592235565186, 0.20426395535469055, 0.21306198835372925, 0.22186002135276794, 0.23065803945064545, 0.23945607244968414, 0.24825410544872284, 0.25705212354660034, 0.26585015654563904, 0.27464818954467773]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 4.0, 2.0, 3.0, 6.0, 9.0, 16.0, 20.0, 24.0, 28.0, 41.0, 61.0, 94.0, 143.0, 182.0, 276.0, 430.0, 718.0, 1127.0, 1895.0, 3165.0, 5632.0, 10712.0, 24717.0, 239072.0, 698539.0, 32921.0, 12953.0, 6490.0, 3557.0, 2062.0, 1282.0, 809.0, 495.0, 333.0, 238.0, 148.0, 102.0, 73.0, 49.0, 47.0, 33.0, 13.0, 15.0, 6.0, 9.0, 3.0, 3.0, 2.0, 5.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.235595703125, -0.22869110107421875, -0.2217864990234375, -0.21488189697265625, -0.207977294921875, -0.20107269287109375, -0.1941680908203125, -0.18726348876953125, -0.18035888671875, -0.17345428466796875, -0.1665496826171875, -0.15964508056640625, -0.152740478515625, -0.14583587646484375, -0.1389312744140625, -0.13202667236328125, -0.1251220703125, -0.11821746826171875, -0.1113128662109375, -0.10440826416015625, -0.097503662109375, -0.09059906005859375, -0.0836944580078125, -0.07678985595703125, -0.06988525390625, -0.06298065185546875, -0.0560760498046875, -0.04917144775390625, -0.042266845703125, -0.03536224365234375, -0.0284576416015625, -0.02155303955078125, -0.0146484375, -0.00774383544921875, -0.0008392333984375, 0.00606536865234375, 0.012969970703125, 0.01987457275390625, 0.0267791748046875, 0.03368377685546875, 0.04058837890625, 0.04749298095703125, 0.0543975830078125, 0.06130218505859375, 0.068206787109375, 0.07511138916015625, 0.0820159912109375, 0.08892059326171875, 0.0958251953125, 0.10272979736328125, 0.1096343994140625, 0.11653900146484375, 0.123443603515625, 0.13034820556640625, 0.1372528076171875, 0.14415740966796875, 0.15106201171875, 0.15796661376953125, 0.1648712158203125, 0.17177581787109375, 0.178680419921875, 0.18558502197265625, 0.1924896240234375, 0.19939422607421875, 0.206298828125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 7.0, 7.0, 13.0, 4.0, 7.0, 21.0, 36.0, 94.0, 259.0, 301.0, 126.0, 43.0, 12.0, 11.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0372314453125, -0.036036014556884766, -0.03484058380126953, -0.0336451530456543, -0.03244972229003906, -0.03125429153442383, -0.030058860778808594, -0.02886343002319336, -0.027667999267578125, -0.02647256851196289, -0.025277137756347656, -0.024081707000732422, -0.022886276245117188, -0.021690845489501953, -0.02049541473388672, -0.019299983978271484, -0.01810455322265625, -0.016909122467041016, -0.01571369171142578, -0.014518260955810547, -0.013322830200195312, -0.012127399444580078, -0.010931968688964844, -0.00973653793334961, -0.008541107177734375, -0.007345676422119141, -0.006150245666503906, -0.004954814910888672, -0.0037593841552734375, -0.002563953399658203, -0.0013685226440429688, -0.00017309188842773438, 0.0010223388671875, 0.0022177696228027344, 0.0034132003784179688, 0.004608631134033203, 0.0058040618896484375, 0.006999492645263672, 0.008194923400878906, 0.00939035415649414, 0.010585784912109375, 0.01178121566772461, 0.012976646423339844, 0.014172077178955078, 0.015367507934570312, 0.016562938690185547, 0.01775836944580078, 0.018953800201416016, 0.02014923095703125, 0.021344661712646484, 0.02254009246826172, 0.023735523223876953, 0.024930953979492188, 0.026126384735107422, 0.027321815490722656, 0.02851724624633789, 0.029712677001953125, 0.03090810775756836, 0.032103538513183594, 0.03329896926879883, 0.03449440002441406, 0.0356898307800293, 0.03688526153564453, 0.038080692291259766, 0.039276123046875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 6.0, 8.0, 18.0, 15.0, 26.0, 35.0, 65.0, 117.0, 187.0, 409.0, 1077.0, 3971.0, 37585.0, 923216.0, 73859.0, 5615.0, 1309.0, 500.0, 225.0, 120.0, 72.0, 40.0, 19.0, 14.0, 14.0, 9.0, 5.0, 9.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58642578125, -0.56591796875, -0.54541015625, -0.52490234375, -0.50439453125, -0.48388671875, -0.46337890625, -0.44287109375, -0.42236328125, -0.40185546875, -0.38134765625, -0.36083984375, -0.34033203125, -0.31982421875, -0.29931640625, -0.27880859375, -0.25830078125, -0.23779296875, -0.21728515625, -0.19677734375, -0.17626953125, -0.15576171875, -0.13525390625, -0.11474609375, -0.09423828125, -0.07373046875, -0.05322265625, -0.03271484375, -0.01220703125, 0.00830078125, 0.02880859375, 0.04931640625, 0.06982421875, 0.09033203125, 0.11083984375, 0.13134765625, 0.15185546875, 0.17236328125, 0.19287109375, 0.21337890625, 0.23388671875, 0.25439453125, 0.27490234375, 0.29541015625, 0.31591796875, 0.33642578125, 0.35693359375, 0.37744140625, 0.39794921875, 0.41845703125, 0.43896484375, 0.45947265625, 0.47998046875, 0.50048828125, 0.52099609375, 0.54150390625, 0.56201171875, 0.58251953125, 0.60302734375, 0.62353515625, 0.64404296875, 0.66455078125, 0.68505859375, 0.70556640625, 0.72607421875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 5.0, 6.0, 11.0, 15.0, 12.0, 12.0, 24.0, 27.0, 19.0, 29.0, 32.0, 30.0, 39.0, 34.0, 41.0, 44.0, 52.0, 44.0, 34.0, 52.0, 45.0, 39.0, 35.0, 27.0, 35.0, 34.0, 26.0, 29.0, 26.0, 21.0, 19.0, 17.0, 16.0, 12.0, 8.0, 8.0, 8.0, 3.0, 7.0, 1.0, 8.0, 3.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1605224609375, -0.15567398071289062, -0.15082550048828125, -0.14597702026367188, -0.1411285400390625, -0.13628005981445312, -0.13143157958984375, -0.12658309936523438, -0.121734619140625, -0.11688613891601562, -0.11203765869140625, -0.10718917846679688, -0.1023406982421875, -0.09749221801757812, -0.09264373779296875, -0.08779525756835938, -0.08294677734375, -0.07809829711914062, -0.07324981689453125, -0.06840133666992188, -0.0635528564453125, -0.058704376220703125, -0.05385589599609375, -0.049007415771484375, -0.044158935546875, -0.039310455322265625, -0.03446197509765625, -0.029613494873046875, -0.0247650146484375, -0.019916534423828125, -0.01506805419921875, -0.010219573974609375, -0.00537109375, -0.000522613525390625, 0.00432586669921875, 0.009174346923828125, 0.0140228271484375, 0.018871307373046875, 0.02371978759765625, 0.028568267822265625, 0.033416748046875, 0.038265228271484375, 0.04311370849609375, 0.047962188720703125, 0.0528106689453125, 0.057659149169921875, 0.06250762939453125, 0.06735610961914062, 0.07220458984375, 0.07705307006835938, 0.08190155029296875, 0.08675003051757812, 0.0915985107421875, 0.09644699096679688, 0.10129547119140625, 0.10614395141601562, 0.110992431640625, 0.11584091186523438, 0.12068939208984375, 0.12553787231445312, 0.1303863525390625, 0.13523483276367188, 0.14008331298828125, 0.14493179321289062, 0.1497802734375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 4.0, 2.0, 5.0, 3.0, 4.0, 9.0, 9.0, 19.0, 21.0, 33.0, 39.0, 68.0, 118.0, 150.0, 275.0, 580.0, 1204.0, 3240.0, 13415.0, 339894.0, 666043.0, 16954.0, 3655.0, 1346.0, 610.0, 329.0, 197.0, 123.0, 67.0, 31.0, 30.0, 18.0, 13.0, 15.0, 10.0, 7.0, 4.0, 4.0, 5.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.404296875, -0.3912811279296875, -0.378265380859375, -0.3652496337890625, -0.35223388671875, -0.3392181396484375, -0.326202392578125, -0.3131866455078125, -0.3001708984375, -0.2871551513671875, -0.274139404296875, -0.2611236572265625, -0.24810791015625, -0.2350921630859375, -0.222076416015625, -0.2090606689453125, -0.196044921875, -0.1830291748046875, -0.170013427734375, -0.1569976806640625, -0.14398193359375, -0.1309661865234375, -0.117950439453125, -0.1049346923828125, -0.0919189453125, -0.0789031982421875, -0.065887451171875, -0.0528717041015625, -0.03985595703125, -0.0268402099609375, -0.013824462890625, -0.0008087158203125, 0.01220703125, 0.0252227783203125, 0.038238525390625, 0.0512542724609375, 0.06427001953125, 0.0772857666015625, 0.090301513671875, 0.1033172607421875, 0.1163330078125, 0.1293487548828125, 0.142364501953125, 0.1553802490234375, 0.16839599609375, 0.1814117431640625, 0.194427490234375, 0.2074432373046875, 0.220458984375, 0.2334747314453125, 0.246490478515625, 0.2595062255859375, 0.27252197265625, 0.2855377197265625, 0.298553466796875, 0.3115692138671875, 0.3245849609375, 0.3376007080078125, 0.350616455078125, 0.3636322021484375, 0.37664794921875, 0.3896636962890625, 0.402679443359375, 0.4156951904296875, 0.4287109375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 7.0, 5.0, 4.0, 8.0, 7.0, 15.0, 27.0, 38.0, 39.0, 67.0, 84.0, 141.0, 137.0, 113.0, 79.0, 58.0, 49.0, 21.0, 18.0, 22.0, 11.0, 10.0, 6.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0005664825439453125, -0.000547848641872406, -0.0005292147397994995, -0.000510580837726593, -0.0004919469356536865, -0.00047331303358078003, -0.00045467913150787354, -0.00043604522943496704, -0.00041741132736206055, -0.00039877742528915405, -0.00038014352321624756, -0.00036150962114334106, -0.00034287571907043457, -0.0003242418169975281, -0.0003056079149246216, -0.0002869740128517151, -0.0002683401107788086, -0.0002497062087059021, -0.0002310723066329956, -0.0002124384045600891, -0.00019380450248718262, -0.00017517060041427612, -0.00015653669834136963, -0.00013790279626846313, -0.00011926889419555664, -0.00010063499212265015, -8.200109004974365e-05, -6.336718797683716e-05, -4.4733285903930664e-05, -2.609938383102417e-05, -7.465481758117676e-06, 1.1168420314788818e-05, 2.9802322387695312e-05, 4.843622446060181e-05, 6.70701265335083e-05, 8.57040286064148e-05, 0.00010433793067932129, 0.00012297183275222778, 0.00014160573482513428, 0.00016023963689804077, 0.00017887353897094727, 0.00019750744104385376, 0.00021614134311676025, 0.00023477524518966675, 0.00025340914726257324, 0.00027204304933547974, 0.00029067695140838623, 0.0003093108534812927, 0.0003279447555541992, 0.0003465786576271057, 0.0003652125597000122, 0.0003838464617729187, 0.0004024803638458252, 0.0004211142659187317, 0.0004397481679916382, 0.0004583820700645447, 0.00047701597213745117, 0.0004956498742103577, 0.0005142837762832642, 0.0005329176783561707, 0.0005515515804290771, 0.0005701854825019836, 0.0005888193845748901, 0.0006074532866477966, 0.0006260871887207031]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 2.0, 3.0, 4.0, 3.0, 3.0, 10.0, 14.0, 19.0, 38.0, 56.0, 89.0, 168.0, 364.0, 1101.0, 3993.0, 26082.0, 671323.0, 323454.0, 17249.0, 3019.0, 849.0, 360.0, 158.0, 84.0, 46.0, 33.0, 22.0, 9.0, 7.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2352294921875, -0.22278785705566406, -0.21034622192382812, -0.1979045867919922, -0.18546295166015625, -0.1730213165283203, -0.16057968139648438, -0.14813804626464844, -0.1356964111328125, -0.12325477600097656, -0.11081314086914062, -0.09837150573730469, -0.08592987060546875, -0.07348823547363281, -0.061046600341796875, -0.04860496520996094, -0.036163330078125, -0.023721694946289062, -0.011280059814453125, 0.0011615753173828125, 0.01360321044921875, 0.026044845581054688, 0.038486480712890625, 0.05092811584472656, 0.0633697509765625, 0.07581138610839844, 0.08825302124023438, 0.10069465637207031, 0.11313629150390625, 0.1255779266357422, 0.13801956176757812, 0.15046119689941406, 0.16290283203125, 0.17534446716308594, 0.18778610229492188, 0.2002277374267578, 0.21266937255859375, 0.2251110076904297, 0.23755264282226562, 0.24999427795410156, 0.2624359130859375, 0.27487754821777344, 0.2873191833496094, 0.2997608184814453, 0.31220245361328125, 0.3246440887451172, 0.3370857238769531, 0.34952735900878906, 0.361968994140625, 0.37441062927246094, 0.3868522644042969, 0.3992938995361328, 0.41173553466796875, 0.4241771697998047, 0.4366188049316406, 0.44906044006347656, 0.4615020751953125, 0.47394371032714844, 0.4863853454589844, 0.4988269805908203, 0.5112686157226562, 0.5237102508544922, 0.5361518859863281, 0.5485935211181641, 0.56103515625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 5.0, 7.0, 9.0, 11.0, 20.0, 26.0, 39.0, 54.0, 97.0, 156.0, 186.0, 131.0, 91.0, 51.0, 45.0, 21.0, 16.0, 15.0, 4.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1812744140625, -0.1738414764404297, -0.16640853881835938, -0.15897560119628906, -0.15154266357421875, -0.14410972595214844, -0.13667678833007812, -0.1292438507080078, -0.1218109130859375, -0.11437797546386719, -0.10694503784179688, -0.09951210021972656, -0.09207916259765625, -0.08464622497558594, -0.07721328735351562, -0.06978034973144531, -0.062347412109375, -0.05491447448730469, -0.047481536865234375, -0.04004859924316406, -0.03261566162109375, -0.025182723999023438, -0.017749786376953125, -0.010316848754882812, -0.0028839111328125, 0.0045490264892578125, 0.011981964111328125, 0.019414901733398438, 0.02684783935546875, 0.03428077697753906, 0.041713714599609375, 0.04914665222167969, 0.05657958984375, 0.06401252746582031, 0.07144546508789062, 0.07887840270996094, 0.08631134033203125, 0.09374427795410156, 0.10117721557617188, 0.10861015319824219, 0.1160430908203125, 0.12347602844238281, 0.13090896606445312, 0.13834190368652344, 0.14577484130859375, 0.15320777893066406, 0.16064071655273438, 0.1680736541748047, 0.175506591796875, 0.1829395294189453, 0.19037246704101562, 0.19780540466308594, 0.20523834228515625, 0.21267127990722656, 0.22010421752929688, 0.2275371551513672, 0.2349700927734375, 0.2424030303955078, 0.24983596801757812, 0.25726890563964844, 0.26470184326171875, 0.27213478088378906, 0.2795677185058594, 0.2870006561279297, 0.29443359375]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 9.0, 5.0, 10.0, 29.0, 73.0, 301.0, 395.0, 105.0, 32.0, 16.0, 8.0, 6.0, 3.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.940474033355713, -5.80599308013916, -5.671512126922607, -5.5370306968688965, -5.402549743652344, -5.268068790435791, -5.133587837219238, -4.9991068840026855, -4.864625930786133, -4.73014497756958, -4.595664024353027, -4.461182594299316, -4.326701641082764, -4.192220687866211, -4.057739734649658, -3.9232587814331055, -3.7887773513793945, -3.654296398162842, -3.51981520652771, -3.3853342533111572, -3.2508530616760254, -3.1163721084594727, -2.98189115524292, -2.847410202026367, -2.7129290103912354, -2.5784480571746826, -2.443966865539551, -2.309485912322998, -2.1750049591064453, -2.0405237674713135, -1.9060428142547607, -1.7715617418289185, -1.6370806694030762, -1.5025995969772339, -1.3681185245513916, -1.2336375713348389, -1.0991564989089966, -0.9646754264831543, -0.8301944136619568, -0.6957134008407593, -0.561232328414917, -0.4267512857913971, -0.2922702431678772, -0.1577892005443573, -0.023308157920837402, 0.11117291450500488, 0.2456539273262024, 0.3801349401473999, 0.5146160125732422, 0.6490970849990845, 0.783578097820282, 0.9180591106414795, 1.0525401830673218, 1.187021255493164, 1.3215022087097168, 1.455983281135559, 1.5904643535614014, 1.7249454259872437, 1.859426498413086, 1.9939074516296387, 2.1283884048461914, 2.2628695964813232, 2.397350549697876, 2.531831741333008, 2.6663126945495605]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 6.0, 9.0, 9.0, 10.0, 17.0, 23.0, 35.0, 58.0, 103.0, 132.0, 171.0, 144.0, 94.0, 53.0, 43.0, 17.0, 12.0, 9.0, 5.0, 9.0, 4.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8885433673858643, -2.7938334941864014, -2.6991233825683594, -2.6044135093688965, -2.5097036361694336, -2.4149937629699707, -2.320283889770508, -2.225573778152466, -2.130863904953003, -2.03615403175354, -1.9414440393447876, -1.8467340469360352, -1.7520241737365723, -1.6573143005371094, -1.562604308128357, -1.4678943157196045, -1.3731844425201416, -1.2784745693206787, -1.1837645769119263, -1.0890545845031738, -0.9943447113037109, -0.8996347784996033, -0.8049248456954956, -0.7102149128913879, -0.6155049800872803, -0.5207950472831726, -0.42608511447906494, -0.3313751816749573, -0.2366652488708496, -0.14195531606674194, -0.04724538326263428, 0.04746454954147339, 0.14217472076416016, 0.23688465356826782, 0.3315945863723755, 0.42630451917648315, 0.5210144519805908, 0.6157243847846985, 0.7104343175888062, 0.8051442503929138, 0.8998541831970215, 0.9945641160011292, 1.0892740488052368, 1.1839840412139893, 1.2786939144134521, 1.373403787612915, 1.4681137800216675, 1.56282377243042, 1.6575336456298828, 1.7522435188293457, 1.8469535112380981, 1.9416635036468506, 2.0363733768463135, 2.1310832500457764, 2.2257933616638184, 2.3205032348632812, 2.415213108062744, 2.509922981262207, 2.60463285446167, 2.699342966079712, 2.794052839279175, 2.8887627124786377, 2.9834728240966797, 3.0781826972961426, 3.1728925704956055]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 2.0, 1.0, 4.0, 3.0, 3.0, 9.0, 7.0, 13.0, 12.0, 15.0, 20.0, 31.0, 23.0, 80.0, 393.0, 7879.0, 4106897.0, 76392.0, 2050.0, 260.0, 71.0, 34.0, 16.0, 22.0, 10.0, 9.0, 8.0, 2.0, 2.0, 8.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.93359375, -1.86639404296875, -1.7991943359375, -1.73199462890625, -1.664794921875, -1.59759521484375, -1.5303955078125, -1.46319580078125, -1.39599609375, -1.32879638671875, -1.2615966796875, -1.19439697265625, -1.127197265625, -1.05999755859375, -0.9927978515625, -0.92559814453125, -0.8583984375, -0.79119873046875, -0.7239990234375, -0.65679931640625, -0.589599609375, -0.52239990234375, -0.4552001953125, -0.38800048828125, -0.32080078125, -0.25360107421875, -0.1864013671875, -0.11920166015625, -0.052001953125, 0.01519775390625, 0.0823974609375, 0.14959716796875, 0.216796875, 0.28399658203125, 0.3511962890625, 0.41839599609375, 0.485595703125, 0.55279541015625, 0.6199951171875, 0.68719482421875, 0.75439453125, 0.82159423828125, 0.8887939453125, 0.95599365234375, 1.023193359375, 1.09039306640625, 1.1575927734375, 1.22479248046875, 1.2919921875, 1.35919189453125, 1.4263916015625, 1.49359130859375, 1.560791015625, 1.62799072265625, 1.6951904296875, 1.76239013671875, 1.82958984375, 1.89678955078125, 1.9639892578125, 2.03118896484375, 2.098388671875, 2.16558837890625, 2.2327880859375, 2.29998779296875, 2.3671875]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 2.0, 3.0, 4.0, 2.0, 6.0, 4.0, 10.0, 9.0, 5.0, 11.0, 13.0, 25.0, 35.0, 59.0, 122.0, 129.0, 149.0, 126.0, 102.0, 59.0, 31.0, 21.0, 8.0, 10.0, 7.0, 6.0, 13.0, 2.0, 4.0, 6.0, 6.0, 6.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0214691162109375, -0.020688533782958984, -0.01990795135498047, -0.019127368927001953, -0.018346786499023438, -0.017566204071044922, -0.016785621643066406, -0.01600503921508789, -0.015224456787109375, -0.01444387435913086, -0.013663291931152344, -0.012882709503173828, -0.012102127075195312, -0.011321544647216797, -0.010540962219238281, -0.009760379791259766, -0.00897979736328125, -0.008199214935302734, -0.007418632507324219, -0.006638050079345703, -0.0058574676513671875, -0.005076885223388672, -0.004296302795410156, -0.0035157203674316406, -0.002735137939453125, -0.0019545555114746094, -0.0011739730834960938, -0.0003933906555175781, 0.0003871917724609375, 0.0011677742004394531, 0.0019483566284179688, 0.0027289390563964844, 0.003509521484375, 0.004290103912353516, 0.005070686340332031, 0.005851268768310547, 0.0066318511962890625, 0.007412433624267578, 0.008193016052246094, 0.00897359848022461, 0.009754180908203125, 0.01053476333618164, 0.011315345764160156, 0.012095928192138672, 0.012876510620117188, 0.013657093048095703, 0.014437675476074219, 0.015218257904052734, 0.01599884033203125, 0.016779422760009766, 0.01756000518798828, 0.018340587615966797, 0.019121170043945312, 0.019901752471923828, 0.020682334899902344, 0.02146291732788086, 0.022243499755859375, 0.02302408218383789, 0.023804664611816406, 0.024585247039794922, 0.025365829467773438, 0.026146411895751953, 0.02692699432373047, 0.027707576751708984, 0.0284881591796875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 4.0, 3.0, 12.0, 16.0, 37.0, 50.0, 82.0, 209.0, 428.0, 1089.0, 3107.0, 12992.0, 115218.0, 3926190.0, 117190.0, 12690.0, 3022.0, 990.0, 440.0, 216.0, 139.0, 68.0, 39.0, 22.0, 18.0, 9.0, 7.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29833984375, -0.2812957763671875, -0.264251708984375, -0.2472076416015625, -0.23016357421875, -0.2131195068359375, -0.196075439453125, -0.1790313720703125, -0.1619873046875, -0.1449432373046875, -0.127899169921875, -0.1108551025390625, -0.09381103515625, -0.0767669677734375, -0.059722900390625, -0.0426788330078125, -0.025634765625, -0.0085906982421875, 0.008453369140625, 0.0254974365234375, 0.04254150390625, 0.0595855712890625, 0.076629638671875, 0.0936737060546875, 0.1107177734375, 0.1277618408203125, 0.144805908203125, 0.1618499755859375, 0.17889404296875, 0.1959381103515625, 0.212982177734375, 0.2300262451171875, 0.2470703125, 0.2641143798828125, 0.281158447265625, 0.2982025146484375, 0.31524658203125, 0.3322906494140625, 0.349334716796875, 0.3663787841796875, 0.3834228515625, 0.4004669189453125, 0.417510986328125, 0.4345550537109375, 0.45159912109375, 0.4686431884765625, 0.485687255859375, 0.5027313232421875, 0.519775390625, 0.5368194580078125, 0.553863525390625, 0.5709075927734375, 0.58795166015625, 0.6049957275390625, 0.622039794921875, 0.6390838623046875, 0.6561279296875, 0.6731719970703125, 0.690216064453125, 0.7072601318359375, 0.72430419921875, 0.7413482666015625, 0.758392333984375, 0.7754364013671875, 0.79248046875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 6.0, 4.0, 8.0, 5.0, 8.0, 11.0, 18.0, 21.0, 36.0, 56.0, 76.0, 135.0, 297.0, 1446.0, 1337.0, 308.0, 101.0, 79.0, 32.0, 26.0, 16.0, 13.0, 10.0, 5.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.052032470703125, -0.050295352935791016, -0.04855823516845703, -0.04682111740112305, -0.04508399963378906, -0.04334688186645508, -0.041609764099121094, -0.03987264633178711, -0.038135528564453125, -0.03639841079711914, -0.034661293029785156, -0.03292417526245117, -0.031187057495117188, -0.029449939727783203, -0.02771282196044922, -0.025975704193115234, -0.02423858642578125, -0.022501468658447266, -0.02076435089111328, -0.019027233123779297, -0.017290115356445312, -0.015552997589111328, -0.013815879821777344, -0.01207876205444336, -0.010341644287109375, -0.00860452651977539, -0.006867408752441406, -0.005130290985107422, -0.0033931732177734375, -0.0016560554504394531, 8.106231689453125e-05, 0.0018181800842285156, 0.0035552978515625, 0.005292415618896484, 0.007029533386230469, 0.008766651153564453, 0.010503768920898438, 0.012240886688232422, 0.013978004455566406, 0.01571512222290039, 0.017452239990234375, 0.01918935775756836, 0.020926475524902344, 0.022663593292236328, 0.024400711059570312, 0.026137828826904297, 0.02787494659423828, 0.029612064361572266, 0.03134918212890625, 0.033086299896240234, 0.03482341766357422, 0.0365605354309082, 0.03829765319824219, 0.04003477096557617, 0.041771888732910156, 0.04350900650024414, 0.045246124267578125, 0.04698324203491211, 0.048720359802246094, 0.05045747756958008, 0.05219459533691406, 0.05393171310424805, 0.05566883087158203, 0.057405948638916016, 0.05914306640625]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [15.0, 279.0, 681.0, 33.0, 8.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12057021260261536, -0.06381553411483765, -0.0070608556270599365, 0.04969382286071777, 0.10644850134849548, 0.1632031798362732, 0.2199578583240509, 0.2767125368118286, 0.3334672152996063, 0.39022189378738403, 0.44697657227516174, 0.5037312507629395, 0.5604859590530396, 0.6172406077384949, 0.6739952564239502, 0.7307499647140503, 0.7875046730041504, 0.8442593812942505, 0.9010140299797058, 0.9577686786651611, 1.0145233869552612, 1.0712780952453613, 1.1280326843261719, 1.184787392616272, 1.241542100906372, 1.2982968091964722, 1.3550515174865723, 1.4118061065673828, 1.468560814857483, 1.525315523147583, 1.5820701122283936, 1.6388248205184937, 1.6955795288085938, 1.7523342370986938, 1.809088945388794, 1.8658435344696045, 1.9225982427597046, 1.9793529510498047, 2.0361075401306152, 2.092862367630005, 2.1496169567108154, 2.206371545791626, 2.2631263732910156, 2.319880962371826, 2.3766355514526367, 2.4333903789520264, 2.490144968032837, 2.5468997955322266, 2.603654384613037, 2.6604089736938477, 2.7171638011932373, 2.773918390274048, 2.8306732177734375, 2.887427806854248, 2.9441823959350586, 3.0009372234344482, 3.057691812515259, 3.1144464015960693, 3.171201229095459, 3.2279558181762695, 3.28471040725708, 3.3414652347564697, 3.3982198238372803, 3.45497465133667, 3.5117292404174805]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 5.0, 5.0, 4.0, 4.0, 1.0, 1.0, 3.0, 7.0, 8.0, 11.0, 13.0, 27.0, 22.0, 47.0, 58.0, 85.0, 92.0, 106.0, 82.0, 95.0, 93.0, 72.0, 44.0, 20.0, 20.0, 20.0, 14.0, 10.0, 5.0, 6.0, 3.0, 4.0, 3.0, 4.0, 3.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-0.28923821449279785, -0.2812667489051819, -0.27329525351524353, -0.26532378792762756, -0.2573522925376892, -0.24938082695007324, -0.24140936136245728, -0.23343788087368011, -0.22546640038490295, -0.2174949198961258, -0.20952343940734863, -0.20155197381973267, -0.1935804933309555, -0.18560901284217834, -0.17763754725456238, -0.16966606676578522, -0.16169458627700806, -0.1537231057882309, -0.14575162529945374, -0.13778015971183777, -0.1298086792230606, -0.12183719873428345, -0.11386572569608688, -0.10589425265789032, -0.09792277216911316, -0.089951291680336, -0.08197981864213943, -0.07400834560394287, -0.06603686511516571, -0.05806538835167885, -0.050093911588191986, -0.042122434824705124, -0.03415095806121826, -0.0261794812977314, -0.018208004534244537, -0.010236527770757675, -0.002265051007270813, 0.005706425756216049, 0.013677902519702911, 0.021649379283189774, 0.029620856046676636, 0.0375923328101635, 0.04556380957365036, 0.05353528633713722, 0.061506763100624084, 0.06947824358940125, 0.07744971662759781, 0.08542118966579437, 0.09339267015457153, 0.1013641506433487, 0.10933562368154526, 0.11730709671974182, 0.12527857720851898, 0.13325005769729614, 0.1412215232849121, 0.14919300377368927, 0.15716448426246643, 0.1651359647512436, 0.17310744524002075, 0.18107891082763672, 0.18905039131641388, 0.19702187180519104, 0.204993337392807, 0.21296481788158417, 0.22093629837036133]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 6.0, 7.0, 17.0, 18.0, 47.0, 79.0, 191.0, 376.0, 999.0, 2362.0, 6714.0, 21573.0, 125753.0, 753310.0, 107041.0, 19969.0, 6178.0, 2276.0, 901.0, 355.0, 176.0, 99.0, 35.0, 30.0, 14.0, 8.0, 5.0, 6.0, 2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.225341796875, -0.216827392578125, -0.20831298828125, -0.199798583984375, -0.1912841796875, -0.182769775390625, -0.17425537109375, -0.165740966796875, -0.1572265625, -0.148712158203125, -0.14019775390625, -0.131683349609375, -0.1231689453125, -0.114654541015625, -0.10614013671875, -0.097625732421875, -0.089111328125, -0.080596923828125, -0.07208251953125, -0.063568115234375, -0.0550537109375, -0.046539306640625, -0.03802490234375, -0.029510498046875, -0.02099609375, -0.012481689453125, -0.00396728515625, 0.004547119140625, 0.0130615234375, 0.021575927734375, 0.03009033203125, 0.038604736328125, 0.047119140625, 0.055633544921875, 0.06414794921875, 0.072662353515625, 0.0811767578125, 0.089691162109375, 0.09820556640625, 0.106719970703125, 0.115234375, 0.123748779296875, 0.13226318359375, 0.140777587890625, 0.1492919921875, 0.157806396484375, 0.16632080078125, 0.174835205078125, 0.183349609375, 0.191864013671875, 0.20037841796875, 0.208892822265625, 0.2174072265625, 0.225921630859375, 0.23443603515625, 0.242950439453125, 0.25146484375, 0.259979248046875, 0.26849365234375, 0.277008056640625, 0.2855224609375, 0.294036865234375, 0.30255126953125, 0.311065673828125, 0.319580078125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 3.0, 3.0, 3.0, 2.0, 11.0, 6.0, 5.0, 5.0, 6.0, 13.0, 8.0, 17.0, 26.0, 47.0, 57.0, 74.0, 90.0, 94.0, 106.0, 106.0, 64.0, 71.0, 44.0, 39.0, 25.0, 14.0, 15.0, 7.0, 6.0, 5.0, 7.0, 7.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.016998291015625, -0.016397953033447266, -0.01579761505126953, -0.015197277069091797, -0.014596939086914062, -0.013996601104736328, -0.013396263122558594, -0.01279592514038086, -0.012195587158203125, -0.01159524917602539, -0.010994911193847656, -0.010394573211669922, -0.009794235229492188, -0.009193897247314453, -0.008593559265136719, -0.007993221282958984, -0.00739288330078125, -0.006792545318603516, -0.006192207336425781, -0.005591869354248047, -0.0049915313720703125, -0.004391193389892578, -0.0037908554077148438, -0.0031905174255371094, -0.002590179443359375, -0.0019898414611816406, -0.0013895034790039062, -0.0007891654968261719, -0.0001888275146484375, 0.0004115104675292969, 0.0010118484497070312, 0.0016121864318847656, 0.0022125244140625, 0.0028128623962402344, 0.0034132003784179688, 0.004013538360595703, 0.0046138763427734375, 0.005214214324951172, 0.005814552307128906, 0.006414890289306641, 0.007015228271484375, 0.007615566253662109, 0.008215904235839844, 0.008816242218017578, 0.009416580200195312, 0.010016918182373047, 0.010617256164550781, 0.011217594146728516, 0.01181793212890625, 0.012418270111083984, 0.013018608093261719, 0.013618946075439453, 0.014219284057617188, 0.014819622039794922, 0.015419960021972656, 0.01602029800415039, 0.016620635986328125, 0.01722097396850586, 0.017821311950683594, 0.018421649932861328, 0.019021987915039062, 0.019622325897216797, 0.02022266387939453, 0.020823001861572266, 0.02142333984375]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 6.0, 8.0, 10.0, 13.0, 22.0, 47.0, 143.0, 728.0, 7941.0, 646939.0, 386767.0, 5091.0, 556.0, 127.0, 28.0, 16.0, 9.0, 8.0, 8.0, 5.0, 4.0, 8.0, 9.0, 3.0, 6.0, 7.0, 5.0, 3.0, 3.0, 3.0, 2.0, 4.0, 0.0, 0.0, 2.0], "bins": [-0.7470703125, -0.7278823852539062, -0.7086944580078125, -0.6895065307617188, -0.670318603515625, -0.6511306762695312, -0.6319427490234375, -0.6127548217773438, -0.59356689453125, -0.5743789672851562, -0.5551910400390625, -0.5360031127929688, -0.516815185546875, -0.49762725830078125, -0.4784393310546875, -0.45925140380859375, -0.4400634765625, -0.42087554931640625, -0.4016876220703125, -0.38249969482421875, -0.363311767578125, -0.34412384033203125, -0.3249359130859375, -0.30574798583984375, -0.28656005859375, -0.26737213134765625, -0.2481842041015625, -0.22899627685546875, -0.209808349609375, -0.19062042236328125, -0.1714324951171875, -0.15224456787109375, -0.133056640625, -0.11386871337890625, -0.0946807861328125, -0.07549285888671875, -0.056304931640625, -0.03711700439453125, -0.0179290771484375, 0.00125885009765625, 0.02044677734375, 0.03963470458984375, 0.0588226318359375, 0.07801055908203125, 0.097198486328125, 0.11638641357421875, 0.1355743408203125, 0.15476226806640625, 0.1739501953125, 0.19313812255859375, 0.2123260498046875, 0.23151397705078125, 0.250701904296875, 0.26988983154296875, 0.2890777587890625, 0.30826568603515625, 0.32745361328125, 0.34664154052734375, 0.3658294677734375, 0.38501739501953125, 0.404205322265625, 0.42339324951171875, 0.4425811767578125, 0.46176910400390625, 0.48095703125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 4.0, 9.0, 2.0, 5.0, 4.0, 6.0, 8.0, 9.0, 13.0, 13.0, 7.0, 13.0, 19.0, 24.0, 22.0, 22.0, 23.0, 35.0, 38.0, 38.0, 33.0, 36.0, 47.0, 53.0, 34.0, 45.0, 34.0, 45.0, 44.0, 31.0, 36.0, 30.0, 31.0, 29.0, 34.0, 23.0, 17.0, 20.0, 18.0, 9.0, 11.0, 9.0, 7.0, 2.0, 4.0, 3.0, 3.0, 7.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0850830078125, -0.0824737548828125, -0.079864501953125, -0.0772552490234375, -0.07464599609375, -0.0720367431640625, -0.069427490234375, -0.0668182373046875, -0.064208984375, -0.0615997314453125, -0.058990478515625, -0.0563812255859375, -0.05377197265625, -0.0511627197265625, -0.048553466796875, -0.0459442138671875, -0.0433349609375, -0.0407257080078125, -0.038116455078125, -0.0355072021484375, -0.03289794921875, -0.0302886962890625, -0.027679443359375, -0.0250701904296875, -0.0224609375, -0.0198516845703125, -0.017242431640625, -0.0146331787109375, -0.01202392578125, -0.0094146728515625, -0.006805419921875, -0.0041961669921875, -0.0015869140625, 0.0010223388671875, 0.003631591796875, 0.0062408447265625, 0.00885009765625, 0.0114593505859375, 0.014068603515625, 0.0166778564453125, 0.019287109375, 0.0218963623046875, 0.024505615234375, 0.0271148681640625, 0.02972412109375, 0.0323333740234375, 0.034942626953125, 0.0375518798828125, 0.0401611328125, 0.0427703857421875, 0.045379638671875, 0.0479888916015625, 0.05059814453125, 0.0532073974609375, 0.055816650390625, 0.0584259033203125, 0.06103515625, 0.0636444091796875, 0.066253662109375, 0.0688629150390625, 0.07147216796875, 0.0740814208984375, 0.076690673828125, 0.0792999267578125, 0.0819091796875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 9.0, 8.0, 7.0, 22.0, 29.0, 56.0, 127.0, 468.0, 3414.0, 825038.0, 216589.0, 2222.0, 328.0, 91.0, 40.0, 31.0, 30.0, 6.0, 10.0, 2.0, 4.0, 8.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.0126953125, -0.9828338623046875, -0.952972412109375, -0.9231109619140625, -0.89324951171875, -0.8633880615234375, -0.833526611328125, -0.8036651611328125, -0.7738037109375, -0.7439422607421875, -0.714080810546875, -0.6842193603515625, -0.65435791015625, -0.6244964599609375, -0.594635009765625, -0.5647735595703125, -0.534912109375, -0.5050506591796875, -0.475189208984375, -0.4453277587890625, -0.41546630859375, -0.3856048583984375, -0.355743408203125, -0.3258819580078125, -0.2960205078125, -0.2661590576171875, -0.236297607421875, -0.2064361572265625, -0.17657470703125, -0.1467132568359375, -0.116851806640625, -0.0869903564453125, -0.05712890625, -0.0272674560546875, 0.002593994140625, 0.0324554443359375, 0.06231689453125, 0.0921783447265625, 0.122039794921875, 0.1519012451171875, 0.1817626953125, 0.2116241455078125, 0.241485595703125, 0.2713470458984375, 0.30120849609375, 0.3310699462890625, 0.360931396484375, 0.3907928466796875, 0.420654296875, 0.4505157470703125, 0.480377197265625, 0.5102386474609375, 0.54010009765625, 0.5699615478515625, 0.599822998046875, 0.6296844482421875, 0.6595458984375, 0.6894073486328125, 0.719268798828125, 0.7491302490234375, 0.77899169921875, 0.8088531494140625, 0.838714599609375, 0.8685760498046875, 0.8984375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 5.0, 2.0, 3.0, 5.0, 5.0, 10.0, 16.0, 9.0, 27.0, 31.0, 61.0, 181.0, 323.0, 138.0, 58.0, 30.0, 20.0, 20.0, 9.0, 6.0, 11.0, 8.0, 4.0, 5.0, 5.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0010747909545898438, -0.0010394752025604248, -0.0010041594505310059, -0.0009688436985015869, -0.000933527946472168, -0.000898212194442749, -0.0008628964424133301, -0.0008275806903839111, -0.0007922649383544922, -0.0007569491863250732, -0.0007216334342956543, -0.0006863176822662354, -0.0006510019302368164, -0.0006156861782073975, -0.0005803704261779785, -0.0005450546741485596, -0.0005097389221191406, -0.0004744231700897217, -0.00043910741806030273, -0.0004037916660308838, -0.00036847591400146484, -0.0003331601619720459, -0.00029784440994262695, -0.000262528657913208, -0.00022721290588378906, -0.00019189715385437012, -0.00015658140182495117, -0.00012126564979553223, -8.594989776611328e-05, -5.0634145736694336e-05, -1.531839370727539e-05, 1.9997358322143555e-05, 5.53131103515625e-05, 9.062886238098145e-05, 0.0001259446144104004, 0.00016126036643981934, 0.00019657611846923828, 0.00023189187049865723, 0.00026720762252807617, 0.0003025233745574951, 0.00033783912658691406, 0.000373154878616333, 0.00040847063064575195, 0.0004437863826751709, 0.00047910213470458984, 0.0005144178867340088, 0.0005497336387634277, 0.0005850493907928467, 0.0006203651428222656, 0.0006556808948516846, 0.0006909966468811035, 0.0007263123989105225, 0.0007616281509399414, 0.0007969439029693604, 0.0008322596549987793, 0.0008675754070281982, 0.0009028911590576172, 0.0009382069110870361, 0.0009735226631164551, 0.001008838415145874, 0.001044154167175293, 0.001079469919204712, 0.0011147856712341309, 0.0011501014232635498, 0.0011854171752929688]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 6.0, 3.0, 4.0, 12.0, 18.0, 39.0, 86.0, 225.0, 573.0, 2195.0, 12693.0, 437269.0, 576920.0, 15092.0, 2360.0, 660.0, 220.0, 84.0, 42.0, 20.0, 16.0, 6.0, 3.0, 4.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.619140625, -0.6036758422851562, -0.5882110595703125, -0.5727462768554688, -0.557281494140625, -0.5418167114257812, -0.5263519287109375, -0.5108871459960938, -0.49542236328125, -0.47995758056640625, -0.4644927978515625, -0.44902801513671875, -0.433563232421875, -0.41809844970703125, -0.4026336669921875, -0.38716888427734375, -0.3717041015625, -0.35623931884765625, -0.3407745361328125, -0.32530975341796875, -0.309844970703125, -0.29438018798828125, -0.2789154052734375, -0.26345062255859375, -0.24798583984375, -0.23252105712890625, -0.2170562744140625, -0.20159149169921875, -0.186126708984375, -0.17066192626953125, -0.1551971435546875, -0.13973236083984375, -0.124267578125, -0.10880279541015625, -0.0933380126953125, -0.07787322998046875, -0.062408447265625, -0.04694366455078125, -0.0314788818359375, -0.01601409912109375, -0.00054931640625, 0.01491546630859375, 0.0303802490234375, 0.04584503173828125, 0.061309814453125, 0.07677459716796875, 0.0922393798828125, 0.10770416259765625, 0.1231689453125, 0.13863372802734375, 0.1540985107421875, 0.16956329345703125, 0.185028076171875, 0.20049285888671875, 0.2159576416015625, 0.23142242431640625, 0.24688720703125, 0.26235198974609375, 0.2778167724609375, 0.29328155517578125, 0.308746337890625, 0.32421112060546875, 0.3396759033203125, 0.35514068603515625, 0.37060546875]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 4.0, 2.0, 3.0, 6.0, 11.0, 18.0, 19.0, 18.0, 29.0, 44.0, 50.0, 73.0, 72.0, 91.0, 85.0, 84.0, 82.0, 66.0, 42.0, 34.0, 36.0, 34.0, 15.0, 12.0, 13.0, 16.0, 4.0, 7.0, 5.0, 9.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.1033935546875, -0.09981727600097656, -0.09624099731445312, -0.09266471862792969, -0.08908843994140625, -0.08551216125488281, -0.08193588256835938, -0.07835960388183594, -0.0747833251953125, -0.07120704650878906, -0.06763076782226562, -0.06405448913574219, -0.06047821044921875, -0.05690193176269531, -0.053325653076171875, -0.04974937438964844, -0.046173095703125, -0.04259681701660156, -0.039020538330078125, -0.03544425964355469, -0.03186798095703125, -0.028291702270507812, -0.024715423583984375, -0.021139144897460938, -0.0175628662109375, -0.013986587524414062, -0.010410308837890625, -0.0068340301513671875, -0.00325775146484375, 0.0003185272216796875, 0.003894805908203125, 0.0074710845947265625, 0.01104736328125, 0.014623641967773438, 0.018199920654296875, 0.021776199340820312, 0.02535247802734375, 0.028928756713867188, 0.032505035400390625, 0.03608131408691406, 0.0396575927734375, 0.04323387145996094, 0.046810150146484375, 0.05038642883300781, 0.05396270751953125, 0.05753898620605469, 0.061115264892578125, 0.06469154357910156, 0.068267822265625, 0.07184410095214844, 0.07542037963867188, 0.07899665832519531, 0.08257293701171875, 0.08614921569824219, 0.08972549438476562, 0.09330177307128906, 0.0968780517578125, 0.10045433044433594, 0.10403060913085938, 0.10760688781738281, 0.11118316650390625, 0.11475944519042969, 0.11833572387695312, 0.12191200256347656, 0.12548828125]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 8.0, 17.0, 39.0, 87.0, 261.0, 335.0, 163.0, 51.0, 19.0, 9.0, 8.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9462106823921204, -0.887650728225708, -0.8290907740592957, -0.7705308198928833, -0.7119709253311157, -0.6534109115600586, -0.594851016998291, -0.5362910628318787, -0.4777311086654663, -0.41917115449905396, -0.3606112003326416, -0.30205127596855164, -0.24349132180213928, -0.18493136763572693, -0.12637144327163696, -0.06781148910522461, -0.009251534938812256, 0.0493084117770195, 0.10786835849285126, 0.16642829775810242, 0.22498825192451477, 0.2835482060909271, 0.3421081304550171, 0.40066808462142944, 0.4592280387878418, 0.5177879929542542, 0.5763479471206665, 0.6349078416824341, 0.6934678554534912, 0.7520277500152588, 0.8105877041816711, 0.8691476583480835, 0.9277076721191406, 0.986267626285553, 1.0448275804519653, 1.103387475013733, 1.16194748878479, 1.2205073833465576, 1.2790672779083252, 1.3376272916793823, 1.3961873054504395, 1.454747200012207, 1.5133072137832642, 1.5718671083450317, 1.6304271221160889, 1.6889870166778564, 1.747546911239624, 1.8061069250106812, 1.8646668195724487, 1.9232267141342163, 1.9817867279052734, 2.040346622467041, 2.0989065170288086, 2.1574666500091553, 2.216026544570923, 2.2745864391326904, 2.333146333694458, 2.3917062282562256, 2.450266122817993, 2.50882625579834, 2.5673861503601074, 2.625946044921875, 2.6845059394836426, 2.74306583404541, 2.801625967025757]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 5.0, 12.0, 14.0, 21.0, 28.0, 32.0, 63.0, 86.0, 87.0, 110.0, 140.0, 124.0, 85.0, 59.0, 45.0, 23.0, 20.0, 9.0, 8.0, 8.0, 6.0, 8.0, 3.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1661746501922607, -1.1209747791290283, -1.0757747888565063, -1.030574917793274, -0.9853750467300415, -0.9401751160621643, -0.8949751853942871, -0.8497753143310547, -0.8045753836631775, -0.7593754529953003, -0.7141755819320679, -0.6689756512641907, -0.6237757205963135, -0.578575849533081, -0.5333759188652039, -0.48817601799964905, -0.44297611713409424, -0.39777621626853943, -0.3525763154029846, -0.3073763847351074, -0.2621764838695526, -0.2169765830039978, -0.1717766523361206, -0.1265767514705658, -0.08137685060501099, -0.03617694228887558, 0.009022966027259827, 0.05422288179397583, 0.09942278265953064, 0.14462268352508545, 0.18982261419296265, 0.23502251505851746, 0.2802225351333618, 0.3254224359989166, 0.37062233686447144, 0.41582226753234863, 0.46102216839790344, 0.5062220692634583, 0.5514219999313354, 0.5966218709945679, 0.6418218016624451, 0.6870217323303223, 0.7322216033935547, 0.7774215340614319, 0.8226214647293091, 0.8678213357925415, 0.9130212664604187, 0.9582211971282959, 1.0034210681915283, 1.0486209392547607, 1.0938209295272827, 1.1390208005905151, 1.1842206716537476, 1.2294206619262695, 1.274620532989502, 1.3198204040527344, 1.3650202751159668, 1.4102201461791992, 1.4554201364517212, 1.5006200075149536, 1.545819878578186, 1.591019868850708, 1.6362197399139404, 1.6814196109771729, 1.7266196012496948]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 8.0, 8.0, 8.0, 4.0, 8.0, 12.0, 0.0, 7.0, 13.0, 14.0, 10.0, 28.0, 51.0, 124.0, 287.0, 962.0, 4988.0, 123970.0, 4044390.0, 16579.0, 1914.0, 510.0, 154.0, 63.0, 50.0, 37.0, 28.0, 7.0, 5.0, 9.0, 1.0, 1.0, 4.0, 5.0, 13.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-1.2109375, -1.1765670776367188, -1.1421966552734375, -1.1078262329101562, -1.073455810546875, -1.0390853881835938, -1.0047149658203125, -0.9703445434570312, -0.93597412109375, -0.9016036987304688, -0.8672332763671875, -0.8328628540039062, -0.798492431640625, -0.7641220092773438, -0.7297515869140625, -0.6953811645507812, -0.6610107421875, -0.6266403198242188, -0.5922698974609375, -0.5578994750976562, -0.523529052734375, -0.48915863037109375, -0.4547882080078125, -0.42041778564453125, -0.38604736328125, -0.35167694091796875, -0.3173065185546875, -0.28293609619140625, -0.248565673828125, -0.21419525146484375, -0.1798248291015625, -0.14545440673828125, -0.111083984375, -0.07671356201171875, -0.0423431396484375, -0.00797271728515625, 0.026397705078125, 0.06076812744140625, 0.0951385498046875, 0.12950897216796875, 0.16387939453125, 0.19824981689453125, 0.2326202392578125, 0.26699066162109375, 0.301361083984375, 0.33573150634765625, 0.3701019287109375, 0.40447235107421875, 0.4388427734375, 0.47321319580078125, 0.5075836181640625, 0.5419540405273438, 0.576324462890625, 0.6106948852539062, 0.6450653076171875, 0.6794357299804688, 0.71380615234375, 0.7481765747070312, 0.7825469970703125, 0.8169174194335938, 0.851287841796875, 0.8856582641601562, 0.9200286865234375, 0.9543991088867188, 0.98876953125]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 4.0, 2.0, 4.0, 3.0, 2.0, 4.0, 4.0, 11.0, 10.0, 14.0, 23.0, 33.0, 48.0, 55.0, 61.0, 76.0, 85.0, 84.0, 94.0, 79.0, 75.0, 44.0, 51.0, 45.0, 34.0, 18.0, 6.0, 7.0, 4.0, 7.0, 7.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0194091796875, -0.01886439323425293, -0.01831960678100586, -0.01777482032775879, -0.01723003387451172, -0.01668524742126465, -0.016140460968017578, -0.015595674514770508, -0.015050888061523438, -0.014506101608276367, -0.013961315155029297, -0.013416528701782227, -0.012871742248535156, -0.012326955795288086, -0.011782169342041016, -0.011237382888793945, -0.010692596435546875, -0.010147809982299805, -0.009603023529052734, -0.009058237075805664, -0.008513450622558594, -0.007968664169311523, -0.007423877716064453, -0.006879091262817383, -0.0063343048095703125, -0.005789518356323242, -0.005244731903076172, -0.0046999454498291016, -0.004155158996582031, -0.003610372543334961, -0.0030655860900878906, -0.0025207996368408203, -0.00197601318359375, -0.0014312267303466797, -0.0008864402770996094, -0.00034165382385253906, 0.00020313262939453125, 0.0007479190826416016, 0.0012927055358886719, 0.0018374919891357422, 0.0023822784423828125, 0.002927064895629883, 0.003471851348876953, 0.0040166378021240234, 0.004561424255371094, 0.005106210708618164, 0.005650997161865234, 0.006195783615112305, 0.006740570068359375, 0.007285356521606445, 0.007830142974853516, 0.008374929428100586, 0.008919715881347656, 0.009464502334594727, 0.010009288787841797, 0.010554075241088867, 0.011098861694335938, 0.011643648147583008, 0.012188434600830078, 0.012733221054077148, 0.013278007507324219, 0.013822793960571289, 0.01436758041381836, 0.01491236686706543, 0.0154571533203125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 6.0, 12.0, 49.0, 125.0, 575.0, 8043.0, 4137203.0, 46768.0, 1203.0, 198.0, 71.0, 23.0, 7.0, 10.0, 1.0, 1.0], "bins": [-2.3828125, -2.3392333984375, -2.295654296875, -2.2520751953125, -2.20849609375, -2.1649169921875, -2.121337890625, -2.0777587890625, -2.0341796875, -1.9906005859375, -1.947021484375, -1.9034423828125, -1.85986328125, -1.8162841796875, -1.772705078125, -1.7291259765625, -1.685546875, -1.6419677734375, -1.598388671875, -1.5548095703125, -1.51123046875, -1.4676513671875, -1.424072265625, -1.3804931640625, -1.3369140625, -1.2933349609375, -1.249755859375, -1.2061767578125, -1.16259765625, -1.1190185546875, -1.075439453125, -1.0318603515625, -0.98828125, -0.9447021484375, -0.901123046875, -0.8575439453125, -0.81396484375, -0.7703857421875, -0.726806640625, -0.6832275390625, -0.6396484375, -0.5960693359375, -0.552490234375, -0.5089111328125, -0.46533203125, -0.4217529296875, -0.378173828125, -0.3345947265625, -0.291015625, -0.2474365234375, -0.203857421875, -0.1602783203125, -0.11669921875, -0.0731201171875, -0.029541015625, 0.0140380859375, 0.0576171875, 0.1011962890625, 0.144775390625, 0.1883544921875, 0.23193359375, 0.2755126953125, 0.319091796875, 0.3626708984375, 0.40625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 7.0, 13.0, 13.0, 13.0, 25.0, 24.0, 44.0, 51.0, 97.0, 153.0, 284.0, 777.0, 1516.0, 498.0, 195.0, 118.0, 63.0, 62.0, 34.0, 24.0, 10.0, 9.0, 8.0, 4.0, 7.0, 6.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.043365478515625, -0.04202413558959961, -0.04068279266357422, -0.03934144973754883, -0.03800010681152344, -0.03665876388549805, -0.035317420959472656, -0.033976078033447266, -0.032634735107421875, -0.031293392181396484, -0.029952049255371094, -0.028610706329345703, -0.027269363403320312, -0.025928020477294922, -0.02458667755126953, -0.02324533462524414, -0.02190399169921875, -0.02056264877319336, -0.01922130584716797, -0.017879962921142578, -0.016538619995117188, -0.015197277069091797, -0.013855934143066406, -0.012514591217041016, -0.011173248291015625, -0.009831905364990234, -0.008490562438964844, -0.007149219512939453, -0.0058078765869140625, -0.004466533660888672, -0.0031251907348632812, -0.0017838478088378906, -0.0004425048828125, 0.0008988380432128906, 0.0022401809692382812, 0.003581523895263672, 0.0049228668212890625, 0.006264209747314453, 0.007605552673339844, 0.008946895599365234, 0.010288238525390625, 0.011629581451416016, 0.012970924377441406, 0.014312267303466797, 0.015653610229492188, 0.016994953155517578, 0.01833629608154297, 0.01967763900756836, 0.02101898193359375, 0.02236032485961914, 0.02370166778564453, 0.025043010711669922, 0.026384353637695312, 0.027725696563720703, 0.029067039489746094, 0.030408382415771484, 0.031749725341796875, 0.033091068267822266, 0.034432411193847656, 0.03577375411987305, 0.03711509704589844, 0.03845643997192383, 0.03979778289794922, 0.04113912582397461, 0.04248046875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 42.0, 804.0, 153.0, 13.0, 1.0], "bins": [-5.712839603424072, -5.6185622215271, -5.524285316467285, -5.4300079345703125, -5.33573055267334, -5.241453170776367, -5.147176265716553, -5.05289888381958, -4.958621501922607, -4.864344120025635, -4.77006721496582, -4.675789833068848, -4.581512451171875, -4.487235069274902, -4.392958164215088, -4.298680782318115, -4.204403400421143, -4.11012601852417, -4.0158491134643555, -3.921571731567383, -3.82729434967041, -3.7330172061920166, -3.638739824295044, -3.5444626808166504, -3.450185537338257, -3.3559083938598633, -3.2616310119628906, -3.167353868484497, -3.0730764865875244, -2.978799343109131, -2.884521961212158, -2.7902448177337646, -2.695967435836792, -2.6016902923583984, -2.507412910461426, -2.4131357669830322, -2.3188583850860596, -2.224581241607666, -2.1303038597106934, -2.0360267162323, -1.9417493343353271, -1.847472071647644, -1.753194808959961, -1.6589175462722778, -1.5646402835845947, -1.4703630208969116, -1.3760857582092285, -1.281808614730835, -1.1875313520431519, -1.0932540893554688, -0.9989768266677856, -0.9046995639801025, -0.8104223012924194, -0.7161450386047363, -0.621867835521698, -0.5275905728340149, -0.4333133101463318, -0.3390360474586487, -0.24475879967212677, -0.15048155188560486, -0.05620428919792175, 0.03807297348976135, 0.13235020637512207, 0.22662746906280518, 0.3209047317504883]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 13.0, 9.0, 32.0, 67.0, 76.0, 103.0, 157.0, 165.0, 148.0, 97.0, 69.0, 30.0, 20.0, 13.0, 7.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-0.9175159335136414, -0.8995136618614197, -0.881511390209198, -0.8635091185569763, -0.8455069065093994, -0.8275046348571777, -0.809502363204956, -0.7915000915527344, -0.7734978199005127, -0.755495548248291, -0.7374932765960693, -0.7194910049438477, -0.701488733291626, -0.6834865212440491, -0.6654842495918274, -0.6474819779396057, -0.629479706287384, -0.6114774346351624, -0.5934751629829407, -0.575472891330719, -0.5574706792831421, -0.5394684076309204, -0.5214661359786987, -0.503463864326477, -0.48546159267425537, -0.4674593210220337, -0.449457049369812, -0.4314548075199127, -0.41345253586769104, -0.39545026421546936, -0.37744802236557007, -0.3594457507133484, -0.3414434790611267, -0.32344120740890503, -0.30543893575668335, -0.28743669390678406, -0.2694344222545624, -0.2514321506023407, -0.2334298938512802, -0.21542763710021973, -0.19742536544799805, -0.17942309379577637, -0.16142083704471588, -0.1434185802936554, -0.12541630864143372, -0.10741404443979263, -0.08941178023815155, -0.07140952348709106, -0.053407251834869385, -0.0354049876332283, -0.01740272343158722, 0.0005995407700538635, 0.018601804971694946, 0.03660406917333603, 0.05460633337497711, 0.0726085901260376, 0.09061086177825928, 0.10861312597990036, 0.12661539018154144, 0.14461764693260193, 0.1626199185848236, 0.1806221902370453, 0.19862444698810577, 0.21662670373916626, 0.23462897539138794]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 0.0, 4.0, 3.0, 3.0, 3.0, 7.0, 4.0, 11.0, 4.0, 7.0, 12.0, 21.0, 16.0, 32.0, 32.0, 25.0, 27.0, 34.0, 41.0, 65.0, 52.0, 838.0, 1045748.0, 1137.0, 61.0, 57.0, 50.0, 45.0, 32.0, 39.0, 27.0, 34.0, 11.0, 26.0, 9.0, 8.0, 11.0, 7.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8740234375, -1.8005828857421875, -1.727142333984375, -1.6537017822265625, -1.58026123046875, -1.5068206787109375, -1.433380126953125, -1.3599395751953125, -1.2864990234375, -1.2130584716796875, -1.139617919921875, -1.0661773681640625, -0.99273681640625, -0.9192962646484375, -0.845855712890625, -0.7724151611328125, -0.698974609375, -0.6255340576171875, -0.552093505859375, -0.4786529541015625, -0.40521240234375, -0.3317718505859375, -0.258331298828125, -0.1848907470703125, -0.1114501953125, -0.0380096435546875, 0.035430908203125, 0.1088714599609375, 0.18231201171875, 0.2557525634765625, 0.329193115234375, 0.4026336669921875, 0.47607421875, 0.5495147705078125, 0.622955322265625, 0.6963958740234375, 0.76983642578125, 0.8432769775390625, 0.916717529296875, 0.9901580810546875, 1.0635986328125, 1.1370391845703125, 1.210479736328125, 1.2839202880859375, 1.35736083984375, 1.4308013916015625, 1.504241943359375, 1.5776824951171875, 1.651123046875, 1.7245635986328125, 1.798004150390625, 1.8714447021484375, 1.94488525390625, 2.0183258056640625, 2.091766357421875, 2.1652069091796875, 2.2386474609375, 2.3120880126953125, 2.385528564453125, 2.4589691162109375, 2.53240966796875, 2.6058502197265625, 2.679290771484375, 2.7527313232421875, 2.826171875]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 34.0, 191.0, 440.0, 277.0, 58.0, 13.0, 1.0], "bins": [-0.2120361328125, -0.20847773551940918, -0.20491933822631836, -0.20136094093322754, -0.19780254364013672, -0.1942441463470459, -0.19068574905395508, -0.18712735176086426, -0.18356895446777344, -0.18001055717468262, -0.1764521598815918, -0.17289376258850098, -0.16933536529541016, -0.16577696800231934, -0.16221857070922852, -0.1586601734161377, -0.15510177612304688, -0.15154337882995605, -0.14798498153686523, -0.14442658424377441, -0.1408681869506836, -0.13730978965759277, -0.13375139236450195, -0.13019299507141113, -0.1266345977783203, -0.12307620048522949, -0.11951780319213867, -0.11595940589904785, -0.11240100860595703, -0.10884261131286621, -0.10528421401977539, -0.10172581672668457, -0.09816741943359375, -0.09460902214050293, -0.09105062484741211, -0.08749222755432129, -0.08393383026123047, -0.08037543296813965, -0.07681703567504883, -0.07325863838195801, -0.06970024108886719, -0.06614184379577637, -0.06258344650268555, -0.05902504920959473, -0.055466651916503906, -0.051908254623413086, -0.048349857330322266, -0.044791460037231445, -0.041233062744140625, -0.037674665451049805, -0.034116268157958984, -0.030557870864868164, -0.026999473571777344, -0.023441076278686523, -0.019882678985595703, -0.016324281692504883, -0.012765884399414062, -0.009207487106323242, -0.005649089813232422, -0.0020906925201416016, 0.0014677047729492188, 0.005026102066040039, 0.00858449935913086, 0.01214289665222168, 0.0157012939453125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 2.0, 2.0, 5.0, 8.0, 20.0, 20.0, 25.0, 32.0, 43.0, 72.0, 93.0, 123.0, 209.0, 359.0, 648.0, 1131.0, 2263.0, 4743.0, 11802.0, 36218.0, 164355.0, 560586.0, 199781.0, 42227.0, 13404.0, 5109.0, 2314.0, 1134.0, 679.0, 402.0, 234.0, 168.0, 118.0, 76.0, 37.0, 29.0, 22.0, 14.0, 14.0, 10.0, 8.0, 7.0, 6.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.41162109375, -0.39935302734375, -0.3870849609375, -0.37481689453125, -0.362548828125, -0.35028076171875, -0.3380126953125, -0.32574462890625, -0.3134765625, -0.30120849609375, -0.2889404296875, -0.27667236328125, -0.264404296875, -0.25213623046875, -0.2398681640625, -0.22760009765625, -0.21533203125, -0.20306396484375, -0.1907958984375, -0.17852783203125, -0.166259765625, -0.15399169921875, -0.1417236328125, -0.12945556640625, -0.1171875, -0.10491943359375, -0.0926513671875, -0.08038330078125, -0.068115234375, -0.05584716796875, -0.0435791015625, -0.03131103515625, -0.01904296875, -0.00677490234375, 0.0054931640625, 0.01776123046875, 0.030029296875, 0.04229736328125, 0.0545654296875, 0.06683349609375, 0.0791015625, 0.09136962890625, 0.1036376953125, 0.11590576171875, 0.128173828125, 0.14044189453125, 0.1527099609375, 0.16497802734375, 0.17724609375, 0.18951416015625, 0.2017822265625, 0.21405029296875, 0.226318359375, 0.23858642578125, 0.2508544921875, 0.26312255859375, 0.275390625, 0.28765869140625, 0.2999267578125, 0.31219482421875, 0.324462890625, 0.33673095703125, 0.3489990234375, 0.36126708984375, 0.37353515625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 1.0, 1.0, 4.0, 2.0, 3.0, 4.0, 3.0, 7.0, 13.0, 13.0, 20.0, 21.0, 27.0, 38.0, 39.0, 49.0, 48.0, 74.0, 56.0, 71.0, 62.0, 75.0, 65.0, 57.0, 48.0, 42.0, 39.0, 37.0, 21.0, 15.0, 10.0, 12.0, 7.0, 11.0, 7.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1796875, -0.1738605499267578, -0.16803359985351562, -0.16220664978027344, -0.15637969970703125, -0.15055274963378906, -0.14472579956054688, -0.1388988494873047, -0.1330718994140625, -0.1272449493408203, -0.12141799926757812, -0.11559104919433594, -0.10976409912109375, -0.10393714904785156, -0.09811019897460938, -0.09228324890136719, -0.086456298828125, -0.08062934875488281, -0.07480239868164062, -0.06897544860839844, -0.06314849853515625, -0.05732154846191406, -0.051494598388671875, -0.04566764831542969, -0.0398406982421875, -0.03401374816894531, -0.028186798095703125, -0.022359848022460938, -0.01653289794921875, -0.010705947875976562, -0.004878997802734375, 0.0009479522705078125, 0.00677490234375, 0.012601852416992188, 0.018428802490234375, 0.024255752563476562, 0.03008270263671875, 0.03590965270996094, 0.041736602783203125, 0.04756355285644531, 0.0533905029296875, 0.05921745300292969, 0.06504440307617188, 0.07087135314941406, 0.07669830322265625, 0.08252525329589844, 0.08835220336914062, 0.09417915344238281, 0.100006103515625, 0.10583305358886719, 0.11166000366210938, 0.11748695373535156, 0.12331390380859375, 0.12914085388183594, 0.13496780395507812, 0.1407947540283203, 0.1466217041015625, 0.1524486541748047, 0.15827560424804688, 0.16410255432128906, 0.16992950439453125, 0.17575645446777344, 0.18158340454101562, 0.1874103546142578, 0.1932373046875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 5.0, 1.0, 6.0, 1.0, 8.0, 9.0, 6.0, 7.0, 13.0, 16.0, 29.0, 35.0, 44.0, 65.0, 98.0, 137.0, 278.0, 446.0, 1080.0, 2888.0, 9695.0, 45474.0, 309479.0, 556002.0, 97495.0, 17523.0, 4579.0, 1590.0, 672.0, 314.0, 187.0, 117.0, 70.0, 44.0, 32.0, 29.0, 18.0, 22.0, 8.0, 13.0, 6.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.39697265625, -0.3838310241699219, -0.37068939208984375, -0.3575477600097656, -0.3444061279296875, -0.3312644958496094, -0.31812286376953125, -0.3049812316894531, -0.291839599609375, -0.2786979675292969, -0.26555633544921875, -0.2524147033691406, -0.2392730712890625, -0.22613143920898438, -0.21298980712890625, -0.19984817504882812, -0.18670654296875, -0.17356491088867188, -0.16042327880859375, -0.14728164672851562, -0.1341400146484375, -0.12099838256835938, -0.10785675048828125, -0.09471511840820312, -0.081573486328125, -0.06843185424804688, -0.05529022216796875, -0.042148590087890625, -0.0290069580078125, -0.015865325927734375, -0.00272369384765625, 0.010417938232421875, 0.0235595703125, 0.036701202392578125, 0.04984283447265625, 0.06298446655273438, 0.0761260986328125, 0.08926773071289062, 0.10240936279296875, 0.11555099487304688, 0.128692626953125, 0.14183425903320312, 0.15497589111328125, 0.16811752319335938, 0.1812591552734375, 0.19440078735351562, 0.20754241943359375, 0.22068405151367188, 0.23382568359375, 0.24696731567382812, 0.26010894775390625, 0.2732505798339844, 0.2863922119140625, 0.2995338439941406, 0.31267547607421875, 0.3258171081542969, 0.338958740234375, 0.3521003723144531, 0.36524200439453125, 0.3783836364746094, 0.3915252685546875, 0.4046669006347656, 0.41780853271484375, 0.4309501647949219, 0.444091796875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 6.0, 4.0, 5.0, 7.0, 9.0, 6.0, 12.0, 11.0, 10.0, 20.0, 29.0, 21.0, 30.0, 35.0, 38.0, 60.0, 74.0, 85.0, 92.0, 68.0, 76.0, 65.0, 45.0, 33.0, 26.0, 24.0, 26.0, 22.0, 13.0, 11.0, 7.0, 6.0, 11.0, 3.0, 2.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00013589859008789062, -0.00013161450624465942, -0.00012733042240142822, -0.00012304633855819702, -0.00011876225471496582, -0.00011447817087173462, -0.00011019408702850342, -0.00010591000318527222, -0.00010162591934204102, -9.734183549880981e-05, -9.305775165557861e-05, -8.877366781234741e-05, -8.448958396911621e-05, -8.020550012588501e-05, -7.592141628265381e-05, -7.163733243942261e-05, -6.73532485961914e-05, -6.30691647529602e-05, -5.8785080909729004e-05, -5.45009970664978e-05, -5.02169132232666e-05, -4.59328293800354e-05, -4.16487455368042e-05, -3.7364661693573e-05, -3.30805778503418e-05, -2.8796494007110596e-05, -2.4512410163879395e-05, -2.0228326320648193e-05, -1.5944242477416992e-05, -1.1660158634185791e-05, -7.37607479095459e-06, -3.0919909477233887e-06, 1.1920928955078125e-06, 5.476176738739014e-06, 9.760260581970215e-06, 1.4044344425201416e-05, 1.8328428268432617e-05, 2.261251211166382e-05, 2.689659595489502e-05, 3.118067979812622e-05, 3.546476364135742e-05, 3.974884748458862e-05, 4.4032931327819824e-05, 4.8317015171051025e-05, 5.2601099014282227e-05, 5.688518285751343e-05, 6.116926670074463e-05, 6.545335054397583e-05, 6.973743438720703e-05, 7.402151823043823e-05, 7.830560207366943e-05, 8.258968591690063e-05, 8.687376976013184e-05, 9.115785360336304e-05, 9.544193744659424e-05, 9.972602128982544e-05, 0.00010401010513305664, 0.00010829418897628784, 0.00011257827281951904, 0.00011686235666275024, 0.00012114644050598145, 0.00012543052434921265, 0.00012971460819244385, 0.00013399869203567505, 0.00013828277587890625]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 17.0, 7.0, 14.0, 31.0, 58.0, 71.0, 142.0, 253.0, 449.0, 1072.0, 3330.0, 15729.0, 136055.0, 755469.0, 117040.0, 13837.0, 2961.0, 1024.0, 461.0, 214.0, 121.0, 69.0, 42.0, 25.0, 23.0, 15.0, 6.0, 3.0, 8.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.40771484375, -0.3895721435546875, -0.371429443359375, -0.3532867431640625, -0.33514404296875, -0.3170013427734375, -0.298858642578125, -0.2807159423828125, -0.2625732421875, -0.2444305419921875, -0.226287841796875, -0.2081451416015625, -0.19000244140625, -0.1718597412109375, -0.153717041015625, -0.1355743408203125, -0.117431640625, -0.0992889404296875, -0.081146240234375, -0.0630035400390625, -0.04486083984375, -0.0267181396484375, -0.008575439453125, 0.0095672607421875, 0.0277099609375, 0.0458526611328125, 0.063995361328125, 0.0821380615234375, 0.10028076171875, 0.1184234619140625, 0.136566162109375, 0.1547088623046875, 0.1728515625, 0.1909942626953125, 0.209136962890625, 0.2272796630859375, 0.24542236328125, 0.2635650634765625, 0.281707763671875, 0.2998504638671875, 0.3179931640625, 0.3361358642578125, 0.354278564453125, 0.3724212646484375, 0.39056396484375, 0.4087066650390625, 0.426849365234375, 0.4449920654296875, 0.463134765625, 0.4812774658203125, 0.499420166015625, 0.5175628662109375, 0.53570556640625, 0.5538482666015625, 0.571990966796875, 0.5901336669921875, 0.6082763671875, 0.6264190673828125, 0.644561767578125, 0.6627044677734375, 0.68084716796875, 0.6989898681640625, 0.717132568359375, 0.7352752685546875, 0.75341796875]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 5.0, 4.0, 7.0, 16.0, 20.0, 23.0, 39.0, 41.0, 73.0, 84.0, 103.0, 122.0, 115.0, 97.0, 75.0, 62.0, 40.0, 22.0, 13.0, 12.0, 9.0, 5.0, 4.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2310791015625, -0.2242145538330078, -0.21735000610351562, -0.21048545837402344, -0.20362091064453125, -0.19675636291503906, -0.18989181518554688, -0.1830272674560547, -0.1761627197265625, -0.1692981719970703, -0.16243362426757812, -0.15556907653808594, -0.14870452880859375, -0.14183998107910156, -0.13497543334960938, -0.1281108856201172, -0.121246337890625, -0.11438179016113281, -0.10751724243164062, -0.10065269470214844, -0.09378814697265625, -0.08692359924316406, -0.08005905151367188, -0.07319450378417969, -0.0663299560546875, -0.05946540832519531, -0.052600860595703125, -0.04573631286621094, -0.03887176513671875, -0.03200721740722656, -0.025142669677734375, -0.018278121948242188, -0.01141357421875, -0.0045490264892578125, 0.002315521240234375, 0.009180068969726562, 0.01604461669921875, 0.022909164428710938, 0.029773712158203125, 0.03663825988769531, 0.0435028076171875, 0.05036735534667969, 0.057231903076171875, 0.06409645080566406, 0.07096099853515625, 0.07782554626464844, 0.08469009399414062, 0.09155464172363281, 0.098419189453125, 0.10528373718261719, 0.11214828491210938, 0.11901283264160156, 0.12587738037109375, 0.13274192810058594, 0.13960647583007812, 0.1464710235595703, 0.1533355712890625, 0.1602001190185547, 0.16706466674804688, 0.17392921447753906, 0.18079376220703125, 0.18765830993652344, 0.19452285766601562, 0.2013874053955078, 0.208251953125]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 7.0, 173.0, 708.0, 109.0, 10.0, 6.0, 1.0, 0.0, 1.0], "bins": [-22.180301666259766, -21.79336929321289, -21.406436920166016, -21.01950454711914, -20.632572174072266, -20.24563980102539, -19.858707427978516, -19.47177505493164, -19.084842681884766, -18.69791030883789, -18.310977935791016, -17.92404556274414, -17.537113189697266, -17.15018081665039, -16.763248443603516, -16.37631607055664, -15.989385604858398, -15.602453231811523, -15.215520858764648, -14.828588485717773, -14.441656112670898, -14.05472469329834, -13.667792320251465, -13.28085994720459, -12.893927574157715, -12.50699520111084, -12.120062828063965, -11.73313045501709, -11.346199035644531, -10.959266662597656, -10.572334289550781, -10.185401916503906, -9.798469543457031, -9.411537170410156, -9.024604797363281, -8.637672424316406, -8.250740051269531, -7.8638081550598145, -7.476876258850098, -7.089943885803223, -6.7030110359191895, -6.3160786628723145, -5.929146766662598, -5.542214393615723, -5.155282020568848, -4.768349647521973, -4.381417274475098, -3.994485378265381, -3.607553005218506, -3.220620632171631, -2.833688497543335, -2.446756362915039, -2.059823989868164, -1.6728917360305786, -1.2859594821929932, -0.8990273475646973, -0.5120949745178223, -0.12516272068023682, 0.26176953315734863, 0.6487017869949341, 1.0356340408325195, 1.422566294670105, 1.8094985485076904, 2.1964306831359863, 2.5833630561828613]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 9.0, 12.0, 10.0, 21.0, 22.0, 29.0, 29.0, 49.0, 46.0, 71.0, 89.0, 87.0, 92.0, 74.0, 70.0, 73.0, 49.0, 49.0, 38.0, 29.0, 20.0, 15.0, 8.0, 6.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9082056283950806, -1.8585504293441772, -1.8088951110839844, -1.759239912033081, -1.7095847129821777, -1.6599295139312744, -1.6102741956710815, -1.5606189966201782, -1.5109636783599854, -1.461308479309082, -1.4116531610488892, -1.3619979619979858, -1.3123427629470825, -1.2626874446868896, -1.2130322456359863, -1.163377046585083, -1.1137218475341797, -1.0640666484832764, -1.0144113302230835, -0.9647561311721802, -0.9151009321212769, -0.8654456734657288, -0.8157904148101807, -0.7661352157592773, -0.7164799571037292, -0.6668246984481812, -0.6171694993972778, -0.5675142407417297, -0.5178589820861816, -0.4682037830352783, -0.4185485243797302, -0.3688932955265045, -0.3192380666732788, -0.2695828378200531, -0.2199275940656662, -0.1702723503112793, -0.12061712145805359, -0.07096189260482788, -0.021306633949279785, 0.028348594903945923, 0.07800382375717163, 0.12765905261039734, 0.17731429636478424, 0.22696954011917114, 0.27662476897239685, 0.32627999782562256, 0.37593525648117065, 0.42559048533439636, 0.47524571418762207, 0.5249009728431702, 0.5745561718940735, 0.6242114305496216, 0.6738666296005249, 0.723521888256073, 0.7731771469116211, 0.8228323459625244, 0.8724876046180725, 0.9221428632736206, 0.9717980623245239, 1.0214533805847168, 1.0711085796356201, 1.1207637786865234, 1.1704189777374268, 1.2200742959976196, 1.269729495048523]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 5.0, 6.0, 10.0, 14.0, 15.0, 16.0, 38.0, 30.0, 34.0, 54.0, 61.0, 82.0, 104.0, 143.0, 236.0, 514.0, 1536068.0, 2655931.0, 492.0, 179.0, 88.0, 67.0, 36.0, 20.0, 11.0, 8.0, 8.0, 5.0, 6.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.171875, -6.0316162109375, -5.891357421875, -5.7510986328125, -5.61083984375, -5.4705810546875, -5.330322265625, -5.1900634765625, -5.0498046875, -4.9095458984375, -4.769287109375, -4.6290283203125, -4.48876953125, -4.3485107421875, -4.208251953125, -4.0679931640625, -3.927734375, -3.7874755859375, -3.647216796875, -3.5069580078125, -3.36669921875, -3.2264404296875, -3.086181640625, -2.9459228515625, -2.8056640625, -2.6654052734375, -2.525146484375, -2.3848876953125, -2.24462890625, -2.1043701171875, -1.964111328125, -1.8238525390625, -1.68359375, -1.5433349609375, -1.403076171875, -1.2628173828125, -1.12255859375, -0.9822998046875, -0.842041015625, -0.7017822265625, -0.5615234375, -0.4212646484375, -0.281005859375, -0.1407470703125, -0.00048828125, 0.1397705078125, 0.280029296875, 0.4202880859375, 0.560546875, 0.7008056640625, 0.841064453125, 0.9813232421875, 1.12158203125, 1.2618408203125, 1.402099609375, 1.5423583984375, 1.6826171875, 1.8228759765625, 1.963134765625, 2.1033935546875, 2.24365234375, 2.3839111328125, 2.524169921875, 2.6644287109375, 2.8046875]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 11.0, 33.0, 109.0, 223.0, 331.0, 199.0, 75.0, 24.0, 7.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.2305908203125, -0.22642183303833008, -0.22225284576416016, -0.21808385848999023, -0.2139148712158203, -0.2097458839416504, -0.20557689666748047, -0.20140790939331055, -0.19723892211914062, -0.1930699348449707, -0.18890094757080078, -0.18473196029663086, -0.18056297302246094, -0.17639398574829102, -0.1722249984741211, -0.16805601119995117, -0.16388702392578125, -0.15971803665161133, -0.1555490493774414, -0.15138006210327148, -0.14721107482910156, -0.14304208755493164, -0.13887310028076172, -0.1347041130065918, -0.13053512573242188, -0.12636613845825195, -0.12219715118408203, -0.11802816390991211, -0.11385917663574219, -0.10969018936157227, -0.10552120208740234, -0.10135221481323242, -0.0971832275390625, -0.09301424026489258, -0.08884525299072266, -0.08467626571655273, -0.08050727844238281, -0.07633829116821289, -0.07216930389404297, -0.06800031661987305, -0.06383132934570312, -0.0596623420715332, -0.05549335479736328, -0.05132436752319336, -0.04715538024902344, -0.042986392974853516, -0.038817405700683594, -0.03464841842651367, -0.03047943115234375, -0.026310443878173828, -0.022141456604003906, -0.017972469329833984, -0.013803482055664062, -0.00963449478149414, -0.005465507507324219, -0.0012965202331542969, 0.002872467041015625, 0.007041454315185547, 0.011210441589355469, 0.01537942886352539, 0.019548416137695312, 0.023717403411865234, 0.027886390686035156, 0.03205537796020508, 0.036224365234375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 19.0, 27.0, 42.0, 68.0, 111.0, 225.0, 903.0, 4157383.0, 34683.0, 466.0, 169.0, 85.0, 39.0, 25.0, 20.0, 11.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.7314453125, -6.408203125, -6.0849609375, -5.76171875, -5.4384765625, -5.115234375, -4.7919921875, -4.46875, -4.1455078125, -3.822265625, -3.4990234375, -3.17578125, -2.8525390625, -2.529296875, -2.2060546875, -1.8828125, -1.5595703125, -1.236328125, -0.9130859375, -0.58984375, -0.2666015625, 0.056640625, 0.3798828125, 0.703125, 1.0263671875, 1.349609375, 1.6728515625, 1.99609375, 2.3193359375, 2.642578125, 2.9658203125, 3.2890625, 3.6123046875, 3.935546875, 4.2587890625, 4.58203125, 4.9052734375, 5.228515625, 5.5517578125, 5.875, 6.1982421875, 6.521484375, 6.8447265625, 7.16796875, 7.4912109375, 7.814453125, 8.1376953125, 8.4609375, 8.7841796875, 9.107421875, 9.4306640625, 9.75390625, 10.0771484375, 10.400390625, 10.7236328125, 11.046875, 11.3701171875, 11.693359375, 12.0166015625, 12.33984375, 12.6630859375, 12.986328125, 13.3095703125, 13.6328125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 17.0, 76.0, 597.0, 2998.0, 271.0, 78.0, 29.0, 12.0, 4.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.67822265625, -0.6613922119140625, -0.644561767578125, -0.6277313232421875, -0.61090087890625, -0.5940704345703125, -0.577239990234375, -0.5604095458984375, -0.5435791015625, -0.5267486572265625, -0.509918212890625, -0.4930877685546875, -0.47625732421875, -0.4594268798828125, -0.442596435546875, -0.4257659912109375, -0.408935546875, -0.3921051025390625, -0.375274658203125, -0.3584442138671875, -0.34161376953125, -0.3247833251953125, -0.307952880859375, -0.2911224365234375, -0.2742919921875, -0.2574615478515625, -0.240631103515625, -0.2238006591796875, -0.20697021484375, -0.1901397705078125, -0.173309326171875, -0.1564788818359375, -0.1396484375, -0.1228179931640625, -0.105987548828125, -0.0891571044921875, -0.07232666015625, -0.0554962158203125, -0.038665771484375, -0.0218353271484375, -0.0050048828125, 0.0118255615234375, 0.028656005859375, 0.0454864501953125, 0.06231689453125, 0.0791473388671875, 0.095977783203125, 0.1128082275390625, 0.129638671875, 0.1464691162109375, 0.163299560546875, 0.1801300048828125, 0.19696044921875, 0.2137908935546875, 0.230621337890625, 0.2474517822265625, 0.2642822265625, 0.2811126708984375, 0.297943115234375, 0.3147735595703125, 0.33160400390625, 0.3484344482421875, 0.365264892578125, 0.3820953369140625, 0.39892578125]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 16.0, 41.0, 222.0, 510.0, 149.0, 34.0, 15.0, 10.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0], "bins": [-10.385074615478516, -10.190502166748047, -9.995928764343262, -9.801355361938477, -9.606782913208008, -9.412210464477539, -9.217637062072754, -9.023063659667969, -8.8284912109375, -8.633918762207031, -8.439345359802246, -8.244771957397461, -8.050199508666992, -7.855626583099365, -7.661053657531738, -7.466480731964111, -7.271907806396484, -7.077334880828857, -6.8827619552612305, -6.6881890296936035, -6.493616104125977, -6.29904317855835, -6.104470252990723, -5.909897327423096, -5.715324401855469, -5.520751476287842, -5.326178550720215, -5.131605625152588, -4.937032699584961, -4.742459774017334, -4.547886848449707, -4.35331392288208, -4.158741474151611, -3.9641685485839844, -3.7695956230163574, -3.5750226974487305, -3.3804497718811035, -3.1858768463134766, -2.9913039207458496, -2.7967309951782227, -2.6021580696105957, -2.4075851440429688, -2.213012218475342, -2.018439292907715, -1.823866367340088, -1.629293441772461, -1.434720516204834, -1.240147590637207, -1.04557466506958, -0.8510017395019531, -0.6564288139343262, -0.4618558883666992, -0.26728296279907227, -0.07271003723144531, 0.12186288833618164, 0.3164358139038086, 0.5110087394714355, 0.7055816650390625, 0.9001545906066895, 1.0947275161743164, 1.2893004417419434, 1.4838733673095703, 1.6784462928771973, 1.8730192184448242, 2.067592144012451]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 4.0, 11.0, 12.0, 24.0, 47.0, 50.0, 90.0, 94.0, 96.0, 131.0, 119.0, 98.0, 72.0, 55.0, 34.0, 27.0, 17.0, 9.0, 5.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4142887592315674, -1.3648885488510132, -1.315488338470459, -1.2660882472991943, -1.2166880369186401, -1.167287826538086, -1.1178877353668213, -1.068487524986267, -1.019087314605713, -0.9696871042251587, -0.9202869534492493, -0.8708868026733398, -0.8214865922927856, -0.7720863819122314, -0.722686231136322, -0.6732860803604126, -0.6238858699798584, -0.5744856595993042, -0.5250855088233948, -0.47568532824516296, -0.42628514766693115, -0.37688496708869934, -0.32748478651046753, -0.2780846059322357, -0.2286844253540039, -0.1792842447757721, -0.12988406419754028, -0.08048388361930847, -0.03108370304107666, 0.01831647753715515, 0.06771665811538696, 0.11711683869361877, 0.16651701927185059, 0.2159171998500824, 0.2653173804283142, 0.314717561006546, 0.36411774158477783, 0.41351792216300964, 0.46291810274124146, 0.5123182535171509, 0.5617184638977051, 0.6111186742782593, 0.6605188250541687, 0.7099189758300781, 0.7593191862106323, 0.8087193965911865, 0.858119547367096, 0.9075196981430054, 0.9569199085235596, 1.0063201189041138, 1.055720329284668, 1.1051204204559326, 1.1545206308364868, 1.203920841217041, 1.2533209323883057, 1.3027211427688599, 1.352121353149414, 1.4015215635299683, 1.4509217739105225, 1.500321865081787, 1.5497220754623413, 1.5991222858428955, 1.6485223770141602, 1.6979225873947144, 1.7473227977752686]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 5.0, 3.0, 9.0, 12.0, 17.0, 11.0, 16.0, 29.0, 37.0, 50.0, 54.0, 91.0, 183.0, 302.0, 781.0, 3049.0, 26968.0, 784521.0, 220657.0, 9049.0, 1562.0, 513.0, 219.0, 119.0, 75.0, 45.0, 37.0, 18.0, 20.0, 22.0, 19.0, 13.0, 6.0, 8.0, 6.0, 7.0, 3.0, 2.0, 4.0, 2.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8076171875, -1.7469329833984375, -1.686248779296875, -1.6255645751953125, -1.56488037109375, -1.5041961669921875, -1.443511962890625, -1.3828277587890625, -1.3221435546875, -1.2614593505859375, -1.200775146484375, -1.1400909423828125, -1.07940673828125, -1.0187225341796875, -0.958038330078125, -0.8973541259765625, -0.836669921875, -0.7759857177734375, -0.715301513671875, -0.6546173095703125, -0.59393310546875, -0.5332489013671875, -0.472564697265625, -0.4118804931640625, -0.3511962890625, -0.2905120849609375, -0.229827880859375, -0.1691436767578125, -0.10845947265625, -0.0477752685546875, 0.012908935546875, 0.0735931396484375, 0.13427734375, 0.1949615478515625, 0.255645751953125, 0.3163299560546875, 0.37701416015625, 0.4376983642578125, 0.498382568359375, 0.5590667724609375, 0.6197509765625, 0.6804351806640625, 0.741119384765625, 0.8018035888671875, 0.86248779296875, 0.9231719970703125, 0.983856201171875, 1.0445404052734375, 1.105224609375, 1.1659088134765625, 1.226593017578125, 1.2872772216796875, 1.34796142578125, 1.4086456298828125, 1.469329833984375, 1.5300140380859375, 1.5906982421875, 1.6513824462890625, 1.712066650390625, 1.7727508544921875, 1.83343505859375, 1.8941192626953125, 1.954803466796875, 2.0154876708984375, 2.076171875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 12.0, 38.0, 64.0, 138.0, 178.0, 232.0, 165.0, 105.0, 46.0, 16.0, 6.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.261962890625, -0.2569441795349121, -0.2519254684448242, -0.24690675735473633, -0.24188804626464844, -0.23686933517456055, -0.23185062408447266, -0.22683191299438477, -0.22181320190429688, -0.21679449081420898, -0.2117757797241211, -0.2067570686340332, -0.2017383575439453, -0.19671964645385742, -0.19170093536376953, -0.18668222427368164, -0.18166351318359375, -0.17664480209350586, -0.17162609100341797, -0.16660737991333008, -0.1615886688232422, -0.1565699577331543, -0.1515512466430664, -0.14653253555297852, -0.14151382446289062, -0.13649511337280273, -0.13147640228271484, -0.12645769119262695, -0.12143898010253906, -0.11642026901245117, -0.11140155792236328, -0.10638284683227539, -0.1013641357421875, -0.09634542465209961, -0.09132671356201172, -0.08630800247192383, -0.08128929138183594, -0.07627058029174805, -0.07125186920166016, -0.06623315811157227, -0.061214447021484375, -0.056195735931396484, -0.051177024841308594, -0.0461583137512207, -0.04113960266113281, -0.03612089157104492, -0.03110218048095703, -0.02608346939086914, -0.02106475830078125, -0.01604604721069336, -0.011027336120605469, -0.006008625030517578, -0.0009899139404296875, 0.004028797149658203, 0.009047508239746094, 0.014066219329833984, 0.019084930419921875, 0.024103641510009766, 0.029122352600097656, 0.03414106369018555, 0.03915977478027344, 0.04417848587036133, 0.04919719696044922, 0.05421590805053711, 0.059234619140625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 7.0, 5.0, 5.0, 6.0, 11.0, 16.0, 15.0, 24.0, 32.0, 29.0, 52.0, 81.0, 112.0, 150.0, 242.0, 360.0, 545.0, 1062.0, 2020.0, 4595.0, 13991.0, 59591.0, 318282.0, 495579.0, 115676.0, 23076.0, 6842.0, 2812.0, 1307.0, 733.0, 437.0, 256.0, 174.0, 134.0, 88.0, 59.0, 46.0, 24.0, 20.0, 17.0, 13.0, 8.0, 4.0, 6.0, 8.0, 5.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.51025390625, -0.492156982421875, -0.47406005859375, -0.455963134765625, -0.4378662109375, -0.419769287109375, -0.40167236328125, -0.383575439453125, -0.365478515625, -0.347381591796875, -0.32928466796875, -0.311187744140625, -0.2930908203125, -0.274993896484375, -0.25689697265625, -0.238800048828125, -0.220703125, -0.202606201171875, -0.18450927734375, -0.166412353515625, -0.1483154296875, -0.130218505859375, -0.11212158203125, -0.094024658203125, -0.075927734375, -0.057830810546875, -0.03973388671875, -0.021636962890625, -0.0035400390625, 0.014556884765625, 0.03265380859375, 0.050750732421875, 0.06884765625, 0.086944580078125, 0.10504150390625, 0.123138427734375, 0.1412353515625, 0.159332275390625, 0.17742919921875, 0.195526123046875, 0.213623046875, 0.231719970703125, 0.24981689453125, 0.267913818359375, 0.2860107421875, 0.304107666015625, 0.32220458984375, 0.340301513671875, 0.3583984375, 0.376495361328125, 0.39459228515625, 0.412689208984375, 0.4307861328125, 0.448883056640625, 0.46697998046875, 0.485076904296875, 0.503173828125, 0.521270751953125, 0.53936767578125, 0.557464599609375, 0.5755615234375, 0.593658447265625, 0.61175537109375, 0.629852294921875, 0.64794921875]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 5.0, 10.0, 9.0, 8.0, 20.0, 13.0, 20.0, 22.0, 25.0, 36.0, 40.0, 46.0, 51.0, 49.0, 51.0, 48.0, 43.0, 63.0, 66.0, 60.0, 64.0, 48.0, 42.0, 32.0, 26.0, 25.0, 25.0, 14.0, 10.0, 9.0, 5.0, 3.0, 6.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.29833984375, -0.28994178771972656, -0.2815437316894531, -0.2731456756591797, -0.26474761962890625, -0.2563495635986328, -0.24795150756835938, -0.23955345153808594, -0.2311553955078125, -0.22275733947753906, -0.21435928344726562, -0.2059612274169922, -0.19756317138671875, -0.1891651153564453, -0.18076705932617188, -0.17236900329589844, -0.163970947265625, -0.15557289123535156, -0.14717483520507812, -0.1387767791748047, -0.13037872314453125, -0.12198066711425781, -0.11358261108398438, -0.10518455505371094, -0.0967864990234375, -0.08838844299316406, -0.07999038696289062, -0.07159233093261719, -0.06319427490234375, -0.05479621887207031, -0.046398162841796875, -0.03800010681152344, -0.02960205078125, -0.021203994750976562, -0.012805938720703125, -0.0044078826904296875, 0.00399017333984375, 0.012388229370117188, 0.020786285400390625, 0.029184341430664062, 0.0375823974609375, 0.04598045349121094, 0.054378509521484375, 0.06277656555175781, 0.07117462158203125, 0.07957267761230469, 0.08797073364257812, 0.09636878967285156, 0.104766845703125, 0.11316490173339844, 0.12156295776367188, 0.1299610137939453, 0.13835906982421875, 0.1467571258544922, 0.15515518188476562, 0.16355323791503906, 0.1719512939453125, 0.18034934997558594, 0.18874740600585938, 0.1971454620361328, 0.20554351806640625, 0.2139415740966797, 0.22233963012695312, 0.23073768615722656, 0.2391357421875]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 8.0, 6.0, 19.0, 27.0, 43.0, 80.0, 154.0, 317.0, 698.0, 3330.0, 128839.0, 904774.0, 8252.0, 1181.0, 400.0, 211.0, 109.0, 42.0, 25.0, 12.0, 9.0, 6.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.072265625, -2.0149993896484375, -1.957733154296875, -1.9004669189453125, -1.84320068359375, -1.7859344482421875, -1.728668212890625, -1.6714019775390625, -1.6141357421875, -1.5568695068359375, -1.499603271484375, -1.4423370361328125, -1.38507080078125, -1.3278045654296875, -1.270538330078125, -1.2132720947265625, -1.156005859375, -1.0987396240234375, -1.041473388671875, -0.9842071533203125, -0.92694091796875, -0.8696746826171875, -0.812408447265625, -0.7551422119140625, -0.6978759765625, -0.6406097412109375, -0.583343505859375, -0.5260772705078125, -0.46881103515625, -0.4115447998046875, -0.354278564453125, -0.2970123291015625, -0.23974609375, -0.1824798583984375, -0.125213623046875, -0.0679473876953125, -0.01068115234375, 0.0465850830078125, 0.103851318359375, 0.1611175537109375, 0.2183837890625, 0.2756500244140625, 0.332916259765625, 0.3901824951171875, 0.44744873046875, 0.5047149658203125, 0.561981201171875, 0.6192474365234375, 0.676513671875, 0.7337799072265625, 0.791046142578125, 0.8483123779296875, 0.90557861328125, 0.9628448486328125, 1.020111083984375, 1.0773773193359375, 1.1346435546875, 1.1919097900390625, 1.249176025390625, 1.3064422607421875, 1.36370849609375, 1.4209747314453125, 1.478240966796875, 1.5355072021484375, 1.5927734375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 4.0, 7.0, 2.0, 9.0, 9.0, 6.0, 11.0, 6.0, 16.0, 24.0, 17.0, 38.0, 34.0, 38.0, 57.0, 71.0, 81.0, 86.0, 87.0, 75.0, 54.0, 52.0, 44.0, 33.0, 25.0, 18.0, 15.0, 12.0, 17.0, 7.0, 10.0, 10.0, 4.0, 7.0, 6.0, 0.0, 3.0, 4.0, 2.0, 1.0, 5.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011813640594482422, -0.00011442694813013077, -0.00011071749031543732, -0.00010700803250074387, -0.00010329857468605042, -9.958911687135696e-05, -9.587965905666351e-05, -9.217020124197006e-05, -8.846074342727661e-05, -8.475128561258316e-05, -8.104182779788971e-05, -7.733236998319626e-05, -7.362291216850281e-05, -6.991345435380936e-05, -6.62039965391159e-05, -6.249453872442245e-05, -5.8785080909729004e-05, -5.507562309503555e-05, -5.13661652803421e-05, -4.765670746564865e-05, -4.39472496509552e-05, -4.023779183626175e-05, -3.65283340215683e-05, -3.281887620687485e-05, -2.9109418392181396e-05, -2.5399960577487946e-05, -2.1690502762794495e-05, -1.7981044948101044e-05, -1.4271587133407593e-05, -1.0562129318714142e-05, -6.852671504020691e-06, -3.14321368932724e-06, 5.662441253662109e-07, 4.275701940059662e-06, 7.985159754753113e-06, 1.1694617569446564e-05, 1.5404075384140015e-05, 1.9113533198833466e-05, 2.2822991013526917e-05, 2.6532448828220367e-05, 3.024190664291382e-05, 3.395136445760727e-05, 3.766082227230072e-05, 4.137028008699417e-05, 4.507973790168762e-05, 4.878919571638107e-05, 5.2498653531074524e-05, 5.6208111345767975e-05, 5.9917569160461426e-05, 6.362702697515488e-05, 6.733648478984833e-05, 7.104594260454178e-05, 7.475540041923523e-05, 7.846485823392868e-05, 8.217431604862213e-05, 8.588377386331558e-05, 8.959323167800903e-05, 9.330268949270248e-05, 9.701214730739594e-05, 0.00010072160512208939, 0.00010443106293678284, 0.00010814052075147629, 0.00011184997856616974, 0.00011555943638086319, 0.00011926889419555664]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 9.0, 6.0, 3.0, 12.0, 16.0, 28.0, 51.0, 78.0, 131.0, 223.0, 426.0, 876.0, 2669.0, 10718.0, 87232.0, 831652.0, 98520.0, 11342.0, 2775.0, 923.0, 376.0, 179.0, 112.0, 62.0, 39.0, 25.0, 15.0, 12.0, 9.0, 7.0, 6.0, 5.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.77294921875, -0.74676513671875, -0.7205810546875, -0.69439697265625, -0.668212890625, -0.64202880859375, -0.6158447265625, -0.58966064453125, -0.5634765625, -0.53729248046875, -0.5111083984375, -0.48492431640625, -0.458740234375, -0.43255615234375, -0.4063720703125, -0.38018798828125, -0.35400390625, -0.32781982421875, -0.3016357421875, -0.27545166015625, -0.249267578125, -0.22308349609375, -0.1968994140625, -0.17071533203125, -0.14453125, -0.11834716796875, -0.0921630859375, -0.06597900390625, -0.039794921875, -0.01361083984375, 0.0125732421875, 0.03875732421875, 0.06494140625, 0.09112548828125, 0.1173095703125, 0.14349365234375, 0.169677734375, 0.19586181640625, 0.2220458984375, 0.24822998046875, 0.2744140625, 0.30059814453125, 0.3267822265625, 0.35296630859375, 0.379150390625, 0.40533447265625, 0.4315185546875, 0.45770263671875, 0.48388671875, 0.51007080078125, 0.5362548828125, 0.56243896484375, 0.588623046875, 0.61480712890625, 0.6409912109375, 0.66717529296875, 0.693359375, 0.71954345703125, 0.7457275390625, 0.77191162109375, 0.798095703125, 0.82427978515625, 0.8504638671875, 0.87664794921875, 0.90283203125]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 3.0, 5.0, 5.0, 7.0, 10.0, 14.0, 28.0, 54.0, 81.0, 109.0, 154.0, 147.0, 114.0, 100.0, 67.0, 43.0, 15.0, 15.0, 7.0, 8.0, 3.0, 6.0, 3.0, 4.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.445068359375, -0.4340171813964844, -0.42296600341796875, -0.4119148254394531, -0.4008636474609375, -0.3898124694824219, -0.37876129150390625, -0.3677101135253906, -0.356658935546875, -0.3456077575683594, -0.33455657958984375, -0.3235054016113281, -0.3124542236328125, -0.3014030456542969, -0.29035186767578125, -0.2793006896972656, -0.26824951171875, -0.2571983337402344, -0.24614715576171875, -0.23509597778320312, -0.2240447998046875, -0.21299362182617188, -0.20194244384765625, -0.19089126586914062, -0.179840087890625, -0.16878890991210938, -0.15773773193359375, -0.14668655395507812, -0.1356353759765625, -0.12458419799804688, -0.11353302001953125, -0.10248184204101562, -0.0914306640625, -0.08037948608398438, -0.06932830810546875, -0.058277130126953125, -0.0472259521484375, -0.036174774169921875, -0.02512359619140625, -0.014072418212890625, -0.003021240234375, 0.008029937744140625, 0.01908111572265625, 0.030132293701171875, 0.0411834716796875, 0.052234649658203125, 0.06328582763671875, 0.07433700561523438, 0.08538818359375, 0.09643936157226562, 0.10749053955078125, 0.11854171752929688, 0.1295928955078125, 0.14064407348632812, 0.15169525146484375, 0.16274642944335938, 0.173797607421875, 0.18484878540039062, 0.19589996337890625, 0.20695114135742188, 0.2180023193359375, 0.22905349731445312, 0.24010467529296875, 0.2511558532714844, 0.26220703125]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 2.0, 6.0, 10.0, 13.0, 39.0, 97.0, 198.0, 272.0, 179.0, 117.0, 47.0, 14.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-7.4218430519104, -7.24770975112915, -7.0735764503479, -6.899442672729492, -6.725309371948242, -6.551176071166992, -6.377042770385742, -6.202909469604492, -6.028775691986084, -5.854642391204834, -5.680509090423584, -5.506375312805176, -5.332242012023926, -5.158108711242676, -4.983975410461426, -4.809842109680176, -4.635708808898926, -4.461575508117676, -4.287442207336426, -4.113308429718018, -3.9391751289367676, -3.7650418281555176, -3.5909085273742676, -3.4167749881744385, -3.2426414489746094, -3.0685081481933594, -2.8943746089935303, -2.7202413082122803, -2.546107769012451, -2.371974468231201, -2.197841167449951, -2.023707628250122, -1.8495738506317139, -1.6754404306411743, -1.5013070106506348, -1.3271737098693848, -1.1530401706695557, -0.9789068102836609, -0.8047734498977661, -0.6306400299072266, -0.456506609916687, -0.28237318992614746, -0.1082397997379303, 0.06589359045028687, 0.24002701044082642, 0.41416043043136597, 0.5882937908172607, 0.7624272108078003, 0.9365606307983398, 1.1106940507888794, 1.284827470779419, 1.458960771560669, 1.633094310760498, 1.807227611541748, 1.9813610315322876, 2.155494451522827, 2.3296279907226562, 2.5037612915039062, 2.6778948307037354, 2.8520281314849854, 3.0261616706848145, 3.2002949714660645, 3.3744282722473145, 3.5485618114471436, 3.7226951122283936]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 2.0, 7.0, 6.0, 8.0, 14.0, 21.0, 13.0, 32.0, 29.0, 24.0, 44.0, 48.0, 54.0, 49.0, 45.0, 64.0, 57.0, 53.0, 61.0, 40.0, 54.0, 48.0, 39.0, 38.0, 26.0, 27.0, 17.0, 13.0, 24.0, 11.0, 11.0, 7.0, 8.0, 1.0, 4.0, 2.0, 3.0, 0.0, 1.0, 2.0, 4.0], "bins": [-2.2807202339172363, -2.2223715782165527, -2.164022922515869, -2.1056742668151855, -2.047325611114502, -1.9889768362045288, -1.9306280612945557, -1.872279405593872, -1.8139307498931885, -1.7555820941925049, -1.6972334384918213, -1.6388846635818481, -1.5805360078811646, -1.522187352180481, -1.4638385772705078, -1.4054899215698242, -1.3471412658691406, -1.288792610168457, -1.2304439544677734, -1.1720951795578003, -1.1137465238571167, -1.055397868156433, -0.9970491528511047, -0.9387004375457764, -0.8803517818450928, -0.8220031261444092, -0.7636544108390808, -0.7053056955337524, -0.6469570398330688, -0.5886083841323853, -0.5302596688270569, -0.4719109833240509, -0.4135622978210449, -0.35521361231803894, -0.29686492681503296, -0.23851624131202698, -0.180167555809021, -0.12181887030601501, -0.06347018480300903, -0.005121499300003052, 0.05322718620300293, 0.11157587170600891, 0.1699245572090149, 0.22827324271202087, 0.28662192821502686, 0.34497061371803284, 0.4033192992210388, 0.4616679847240448, 0.5200166702270508, 0.5783653259277344, 0.6367140412330627, 0.6950627565383911, 0.7534114122390747, 0.8117600679397583, 0.8701087832450867, 0.928457498550415, 0.9868061542510986, 1.0451548099517822, 1.1035034656524658, 1.161852240562439, 1.2202008962631226, 1.2785495519638062, 1.3368983268737793, 1.395246982574463, 1.4535956382751465]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 2.0, 5.0, 3.0, 2.0, 7.0, 5.0, 13.0, 13.0, 15.0, 23.0, 17.0, 18.0, 24.0, 35.0, 33.0, 61.0, 63.0, 98.0, 144.0, 218.0, 589.0, 1993.0, 16505.0, 3264047.0, 893623.0, 14081.0, 1825.0, 425.0, 167.0, 72.0, 48.0, 34.0, 21.0, 18.0, 14.0, 6.0, 6.0, 1.0, 4.0, 3.0, 4.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.751953125, -2.6732177734375, -2.594482421875, -2.5157470703125, -2.43701171875, -2.3582763671875, -2.279541015625, -2.2008056640625, -2.1220703125, -2.0433349609375, -1.964599609375, -1.8858642578125, -1.80712890625, -1.7283935546875, -1.649658203125, -1.5709228515625, -1.4921875, -1.4134521484375, -1.334716796875, -1.2559814453125, -1.17724609375, -1.0985107421875, -1.019775390625, -0.9410400390625, -0.8623046875, -0.7835693359375, -0.704833984375, -0.6260986328125, -0.54736328125, -0.4686279296875, -0.389892578125, -0.3111572265625, -0.232421875, -0.1536865234375, -0.074951171875, 0.0037841796875, 0.08251953125, 0.1612548828125, 0.239990234375, 0.3187255859375, 0.3974609375, 0.4761962890625, 0.554931640625, 0.6336669921875, 0.71240234375, 0.7911376953125, 0.869873046875, 0.9486083984375, 1.02734375, 1.1060791015625, 1.184814453125, 1.2635498046875, 1.34228515625, 1.4210205078125, 1.499755859375, 1.5784912109375, 1.6572265625, 1.7359619140625, 1.814697265625, 1.8934326171875, 1.97216796875, 2.0509033203125, 2.129638671875, 2.2083740234375, 2.287109375]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 11.0, 33.0, 78.0, 126.0, 142.0, 193.0, 159.0, 117.0, 76.0, 37.0, 18.0, 11.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.250244140625, -0.24529647827148438, -0.24034881591796875, -0.23540115356445312, -0.2304534912109375, -0.22550582885742188, -0.22055816650390625, -0.21561050415039062, -0.210662841796875, -0.20571517944335938, -0.20076751708984375, -0.19581985473632812, -0.1908721923828125, -0.18592453002929688, -0.18097686767578125, -0.17602920532226562, -0.17108154296875, -0.16613388061523438, -0.16118621826171875, -0.15623855590820312, -0.1512908935546875, -0.14634323120117188, -0.14139556884765625, -0.13644790649414062, -0.131500244140625, -0.12655258178710938, -0.12160491943359375, -0.11665725708007812, -0.1117095947265625, -0.10676193237304688, -0.10181427001953125, -0.09686660766601562, -0.0919189453125, -0.08697128295898438, -0.08202362060546875, -0.07707595825195312, -0.0721282958984375, -0.06718063354492188, -0.06223297119140625, -0.057285308837890625, -0.052337646484375, -0.047389984130859375, -0.04244232177734375, -0.037494659423828125, -0.0325469970703125, -0.027599334716796875, -0.02265167236328125, -0.017704010009765625, -0.01275634765625, -0.007808685302734375, -0.00286102294921875, 0.002086639404296875, 0.0070343017578125, 0.011981964111328125, 0.01692962646484375, 0.021877288818359375, 0.026824951171875, 0.031772613525390625, 0.03672027587890625, 0.041667938232421875, 0.0466156005859375, 0.051563262939453125, 0.05651092529296875, 0.061458587646484375, 0.06640625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 6.0, 4.0, 10.0, 13.0, 12.0, 20.0, 30.0, 49.0, 47.0, 92.0, 202.0, 609.0, 2918.0, 3919008.0, 268346.0, 2044.0, 491.0, 160.0, 64.0, 46.0, 39.0, 22.0, 19.0, 9.0, 10.0, 8.0, 3.0, 3.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.3046875, -7.08935546875, -6.8740234375, -6.65869140625, -6.443359375, -6.22802734375, -6.0126953125, -5.79736328125, -5.58203125, -5.36669921875, -5.1513671875, -4.93603515625, -4.720703125, -4.50537109375, -4.2900390625, -4.07470703125, -3.859375, -3.64404296875, -3.4287109375, -3.21337890625, -2.998046875, -2.78271484375, -2.5673828125, -2.35205078125, -2.13671875, -1.92138671875, -1.7060546875, -1.49072265625, -1.275390625, -1.06005859375, -0.8447265625, -0.62939453125, -0.4140625, -0.19873046875, 0.0166015625, 0.23193359375, 0.447265625, 0.66259765625, 0.8779296875, 1.09326171875, 1.30859375, 1.52392578125, 1.7392578125, 1.95458984375, 2.169921875, 2.38525390625, 2.6005859375, 2.81591796875, 3.03125, 3.24658203125, 3.4619140625, 3.67724609375, 3.892578125, 4.10791015625, 4.3232421875, 4.53857421875, 4.75390625, 4.96923828125, 5.1845703125, 5.39990234375, 5.615234375, 5.83056640625, 6.0458984375, 6.26123046875, 6.4765625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 12.0, 15.0, 33.0, 109.0, 468.0, 2817.0, 453.0, 124.0, 27.0, 16.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.73828125, -0.7204818725585938, -0.7026824951171875, -0.6848831176757812, -0.667083740234375, -0.6492843627929688, -0.6314849853515625, -0.6136856079101562, -0.59588623046875, -0.5780868530273438, -0.5602874755859375, -0.5424880981445312, -0.524688720703125, -0.5068893432617188, -0.4890899658203125, -0.47129058837890625, -0.4534912109375, -0.43569183349609375, -0.4178924560546875, -0.40009307861328125, -0.382293701171875, -0.36449432373046875, -0.3466949462890625, -0.32889556884765625, -0.31109619140625, -0.29329681396484375, -0.2754974365234375, -0.25769805908203125, -0.239898681640625, -0.22209930419921875, -0.2042999267578125, -0.18650054931640625, -0.168701171875, -0.15090179443359375, -0.1331024169921875, -0.11530303955078125, -0.097503662109375, -0.07970428466796875, -0.0619049072265625, -0.04410552978515625, -0.02630615234375, -0.00850677490234375, 0.0092926025390625, 0.02709197998046875, 0.044891357421875, 0.06269073486328125, 0.0804901123046875, 0.09828948974609375, 0.1160888671875, 0.13388824462890625, 0.1516876220703125, 0.16948699951171875, 0.187286376953125, 0.20508575439453125, 0.2228851318359375, 0.24068450927734375, 0.25848388671875, 0.27628326416015625, 0.2940826416015625, 0.31188201904296875, 0.329681396484375, 0.34748077392578125, 0.3652801513671875, 0.38307952880859375, 0.40087890625]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 3.0, 3.0, 12.0, 11.0, 24.0, 55.0, 106.0, 164.0, 215.0, 174.0, 109.0, 46.0, 29.0, 18.0, 9.0, 7.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.0067596435546875, -3.910369396209717, -3.813978910446167, -3.7175886631011963, -3.6211984157562256, -3.524807929992676, -3.428417682647705, -3.3320274353027344, -3.2356371879577637, -3.139246940612793, -3.042856454849243, -2.9464662075042725, -2.8500759601593018, -2.753685474395752, -2.6572952270507812, -2.5609049797058105, -2.4645144939422607, -2.36812424659729, -2.2717337608337402, -2.1753435134887695, -2.078953266143799, -1.9825628995895386, -1.8861725330352783, -1.7897822856903076, -1.6933919191360474, -1.597001552581787, -1.5006113052368164, -1.4042209386825562, -1.307830572128296, -1.2114403247833252, -1.115049958229065, -1.0186595916748047, -0.9222695827484131, -0.8258792757987976, -0.7294889688491821, -0.6330986022949219, -0.5367082953453064, -0.4403179883956909, -0.34392762184143066, -0.24753731489181519, -0.1511470079421997, -0.054756686091423035, 0.04163363575935364, 0.1380239725112915, 0.23441427946090698, 0.33080458641052246, 0.4271949529647827, 0.5235852599143982, 0.6199755668640137, 0.7163658738136292, 0.8127561807632446, 0.9091465473175049, 1.0055367946624756, 1.1019271612167358, 1.198317527770996, 1.2947077751159668, 1.391098141670227, 1.4874885082244873, 1.583878755569458, 1.6802691221237183, 1.7766594886779785, 1.8730497360229492, 1.9694401025772095, 2.0658304691314697, 2.1622207164764404]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 2.0, 3.0, 7.0, 9.0, 11.0, 16.0, 19.0, 27.0, 38.0, 41.0, 35.0, 68.0, 64.0, 72.0, 53.0, 60.0, 82.0, 62.0, 51.0, 62.0, 42.0, 32.0, 35.0, 25.0, 28.0, 13.0, 12.0, 12.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0176641941070557, -0.9888597130775452, -0.9600552916526794, -0.931250810623169, -0.9024463295936584, -0.873641848564148, -0.8448374271392822, -0.8160329461097717, -0.7872284650802612, -0.7584239840507507, -0.729619562625885, -0.7008150815963745, -0.672010600566864, -0.6432061195373535, -0.6144016981124878, -0.5855972170829773, -0.5567927360534668, -0.5279882550239563, -0.4991838037967682, -0.4703793525695801, -0.4415748715400696, -0.41277042031288147, -0.38396596908569336, -0.35516148805618286, -0.32635706663131714, -0.29755261540412903, -0.26874813437461853, -0.23994368314743042, -0.21113920211791992, -0.1823347508907318, -0.1535302847623825, -0.1247258186340332, -0.0959213376045227, -0.0671168714761734, -0.038312409073114395, -0.00950794667005539, 0.019296519458293915, 0.04810097813606262, 0.07690544426441193, 0.10570991039276123, 0.13451437652111053, 0.16331884264945984, 0.19212330877780914, 0.22092777490615845, 0.24973222613334656, 0.27853667736053467, 0.30734115839004517, 0.33614563941955566, 0.3649500906467438, 0.3937545418739319, 0.4225590229034424, 0.4513634741306305, 0.480167955160141, 0.5089724063873291, 0.5377768874168396, 0.5665813684463501, 0.5953857898712158, 0.6241902709007263, 0.652994692325592, 0.6817991733551025, 0.710603654384613, 0.7394081354141235, 0.7682125568389893, 0.7970170378684998, 0.8258215188980103]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 4.0, 7.0, 4.0, 11.0, 21.0, 12.0, 36.0, 35.0, 82.0, 96.0, 214.0, 449.0, 1202.0, 5922.0, 76843.0, 871069.0, 84084.0, 6085.0, 1280.0, 502.0, 242.0, 122.0, 81.0, 48.0, 29.0, 18.0, 20.0, 12.0, 13.0, 6.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.958984375, -1.892608642578125, -1.82623291015625, -1.759857177734375, -1.6934814453125, -1.627105712890625, -1.56072998046875, -1.494354248046875, -1.427978515625, -1.361602783203125, -1.29522705078125, -1.228851318359375, -1.1624755859375, -1.096099853515625, -1.02972412109375, -0.963348388671875, -0.89697265625, -0.830596923828125, -0.76422119140625, -0.697845458984375, -0.6314697265625, -0.565093994140625, -0.49871826171875, -0.432342529296875, -0.365966796875, -0.299591064453125, -0.23321533203125, -0.166839599609375, -0.1004638671875, -0.034088134765625, 0.03228759765625, 0.098663330078125, 0.1650390625, 0.231414794921875, 0.29779052734375, 0.364166259765625, 0.4305419921875, 0.496917724609375, 0.56329345703125, 0.629669189453125, 0.696044921875, 0.762420654296875, 0.82879638671875, 0.895172119140625, 0.9615478515625, 1.027923583984375, 1.09429931640625, 1.160675048828125, 1.22705078125, 1.293426513671875, 1.35980224609375, 1.426177978515625, 1.4925537109375, 1.558929443359375, 1.62530517578125, 1.691680908203125, 1.758056640625, 1.824432373046875, 1.89080810546875, 1.957183837890625, 2.0235595703125, 2.089935302734375, 2.15631103515625, 2.222686767578125, 2.2890625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 6.0, 15.0, 35.0, 62.0, 76.0, 110.0, 139.0, 170.0, 140.0, 95.0, 66.0, 30.0, 22.0, 23.0, 13.0, 6.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.230224609375, -0.22546005249023438, -0.22069549560546875, -0.21593093872070312, -0.2111663818359375, -0.20640182495117188, -0.20163726806640625, -0.19687271118164062, -0.192108154296875, -0.18734359741210938, -0.18257904052734375, -0.17781448364257812, -0.1730499267578125, -0.16828536987304688, -0.16352081298828125, -0.15875625610351562, -0.15399169921875, -0.14922714233398438, -0.14446258544921875, -0.13969802856445312, -0.1349334716796875, -0.13016891479492188, -0.12540435791015625, -0.12063980102539062, -0.115875244140625, -0.11111068725585938, -0.10634613037109375, -0.10158157348632812, -0.0968170166015625, -0.09205245971679688, -0.08728790283203125, -0.08252334594726562, -0.0777587890625, -0.07299423217773438, -0.06822967529296875, -0.06346511840820312, -0.0587005615234375, -0.053936004638671875, -0.04917144775390625, -0.044406890869140625, -0.039642333984375, -0.034877777099609375, -0.03011322021484375, -0.025348663330078125, -0.0205841064453125, -0.015819549560546875, -0.01105499267578125, -0.006290435791015625, -0.00152587890625, 0.003238677978515625, 0.00800323486328125, 0.012767791748046875, 0.0175323486328125, 0.022296905517578125, 0.02706146240234375, 0.031826019287109375, 0.036590576171875, 0.041355133056640625, 0.04611968994140625, 0.050884246826171875, 0.0556488037109375, 0.060413360595703125, 0.06517791748046875, 0.06994247436523438, 0.07470703125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 3.0, 8.0, 4.0, 8.0, 10.0, 24.0, 16.0, 15.0, 29.0, 33.0, 54.0, 74.0, 110.0, 158.0, 253.0, 384.0, 696.0, 1320.0, 2645.0, 6691.0, 20248.0, 95108.0, 499867.0, 340031.0, 56742.0, 14190.0, 5029.0, 2193.0, 1024.0, 592.0, 325.0, 207.0, 127.0, 98.0, 68.0, 50.0, 41.0, 20.0, 17.0, 12.0, 8.0, 5.0, 7.0, 4.0, 6.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.8017578125, -0.7773513793945312, -0.7529449462890625, -0.7285385131835938, -0.704132080078125, -0.6797256469726562, -0.6553192138671875, -0.6309127807617188, -0.60650634765625, -0.5820999145507812, -0.5576934814453125, -0.5332870483398438, -0.508880615234375, -0.48447418212890625, -0.4600677490234375, -0.43566131591796875, -0.4112548828125, -0.38684844970703125, -0.3624420166015625, -0.33803558349609375, -0.313629150390625, -0.28922271728515625, -0.2648162841796875, -0.24040985107421875, -0.21600341796875, -0.19159698486328125, -0.1671905517578125, -0.14278411865234375, -0.118377685546875, -0.09397125244140625, -0.0695648193359375, -0.04515838623046875, -0.020751953125, 0.00365447998046875, 0.0280609130859375, 0.05246734619140625, 0.076873779296875, 0.10128021240234375, 0.1256866455078125, 0.15009307861328125, 0.17449951171875, 0.19890594482421875, 0.2233123779296875, 0.24771881103515625, 0.272125244140625, 0.29653167724609375, 0.3209381103515625, 0.34534454345703125, 0.3697509765625, 0.39415740966796875, 0.4185638427734375, 0.44297027587890625, 0.467376708984375, 0.49178314208984375, 0.5161895751953125, 0.5405960083007812, 0.56500244140625, 0.5894088745117188, 0.6138153076171875, 0.6382217407226562, 0.662628173828125, 0.6870346069335938, 0.7114410400390625, 0.7358474731445312, 0.76025390625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 4.0, 2.0, 2.0, 4.0, 3.0, 4.0, 7.0, 5.0, 8.0, 9.0, 8.0, 8.0, 24.0, 17.0, 21.0, 25.0, 30.0, 28.0, 34.0, 31.0, 43.0, 34.0, 40.0, 50.0, 46.0, 36.0, 50.0, 47.0, 44.0, 43.0, 38.0, 39.0, 30.0, 38.0, 27.0, 17.0, 15.0, 18.0, 11.0, 7.0, 9.0, 9.0, 7.0, 6.0, 5.0, 7.0, 6.0, 5.0, 2.0, 2.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.2264404296875, -0.21875572204589844, -0.21107101440429688, -0.2033863067626953, -0.19570159912109375, -0.1880168914794922, -0.18033218383789062, -0.17264747619628906, -0.1649627685546875, -0.15727806091308594, -0.14959335327148438, -0.1419086456298828, -0.13422393798828125, -0.1265392303466797, -0.11885452270507812, -0.11116981506347656, -0.103485107421875, -0.09580039978027344, -0.08811569213867188, -0.08043098449707031, -0.07274627685546875, -0.06506156921386719, -0.057376861572265625, -0.04969215393066406, -0.0420074462890625, -0.03432273864746094, -0.026638031005859375, -0.018953323364257812, -0.01126861572265625, -0.0035839080810546875, 0.004100799560546875, 0.011785507202148438, 0.01947021484375, 0.027154922485351562, 0.034839630126953125, 0.04252433776855469, 0.05020904541015625, 0.05789375305175781, 0.06557846069335938, 0.07326316833496094, 0.0809478759765625, 0.08863258361816406, 0.09631729125976562, 0.10400199890136719, 0.11168670654296875, 0.11937141418457031, 0.12705612182617188, 0.13474082946777344, 0.142425537109375, 0.15011024475097656, 0.15779495239257812, 0.1654796600341797, 0.17316436767578125, 0.1808490753173828, 0.18853378295898438, 0.19621849060058594, 0.2039031982421875, 0.21158790588378906, 0.21927261352539062, 0.2269573211669922, 0.23464202880859375, 0.2423267364501953, 0.2500114440917969, 0.25769615173339844, 0.265380859375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 6.0, 8.0, 6.0, 13.0, 21.0, 24.0, 33.0, 44.0, 78.0, 123.0, 193.0, 457.0, 942.0, 2254.0, 6448.0, 22241.0, 113931.0, 566597.0, 274424.0, 43709.0, 10749.0, 3563.0, 1351.0, 622.0, 286.0, 156.0, 94.0, 54.0, 34.0, 34.0, 18.0, 17.0, 9.0, 6.0, 5.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.43798828125, -0.4241828918457031, -0.41037750244140625, -0.3965721130371094, -0.3827667236328125, -0.3689613342285156, -0.35515594482421875, -0.3413505554199219, -0.327545166015625, -0.3137397766113281, -0.29993438720703125, -0.2861289978027344, -0.2723236083984375, -0.2585182189941406, -0.24471282958984375, -0.23090744018554688, -0.21710205078125, -0.20329666137695312, -0.18949127197265625, -0.17568588256835938, -0.1618804931640625, -0.14807510375976562, -0.13426971435546875, -0.12046432495117188, -0.106658935546875, -0.09285354614257812, -0.07904815673828125, -0.06524276733398438, -0.0514373779296875, -0.037631988525390625, -0.02382659912109375, -0.010021209716796875, 0.0037841796875, 0.017589569091796875, 0.03139495849609375, 0.045200347900390625, 0.0590057373046875, 0.07281112670898438, 0.08661651611328125, 0.10042190551757812, 0.114227294921875, 0.12803268432617188, 0.14183807373046875, 0.15564346313476562, 0.1694488525390625, 0.18325424194335938, 0.19705963134765625, 0.21086502075195312, 0.22467041015625, 0.23847579956054688, 0.25228118896484375, 0.2660865783691406, 0.2798919677734375, 0.2936973571777344, 0.30750274658203125, 0.3213081359863281, 0.335113525390625, 0.3489189147949219, 0.36272430419921875, 0.3765296936035156, 0.3903350830078125, 0.4041404724121094, 0.41794586181640625, 0.4317512512207031, 0.445556640625]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 8.0, 8.0, 9.0, 5.0, 11.0, 10.0, 12.0, 12.0, 24.0, 19.0, 34.0, 24.0, 42.0, 44.0, 63.0, 50.0, 63.0, 64.0, 78.0, 62.0, 52.0, 43.0, 41.0, 41.0, 37.0, 24.0, 27.0, 23.0, 16.0, 12.0, 8.0, 9.0, 4.0, 6.0, 7.0, 1.0, 1.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0], "bins": [-9.018182754516602e-05, -8.776038885116577e-05, -8.533895015716553e-05, -8.291751146316528e-05, -8.049607276916504e-05, -7.80746340751648e-05, -7.565319538116455e-05, -7.32317566871643e-05, -7.081031799316406e-05, -6.838887929916382e-05, -6.596744060516357e-05, -6.354600191116333e-05, -6.112456321716309e-05, -5.870312452316284e-05, -5.62816858291626e-05, -5.3860247135162354e-05, -5.143880844116211e-05, -4.9017369747161865e-05, -4.659593105316162e-05, -4.417449235916138e-05, -4.175305366516113e-05, -3.933161497116089e-05, -3.6910176277160645e-05, -3.44887375831604e-05, -3.2067298889160156e-05, -2.9645860195159912e-05, -2.7224421501159668e-05, -2.4802982807159424e-05, -2.238154411315918e-05, -1.9960105419158936e-05, -1.753866672515869e-05, -1.5117228031158447e-05, -1.2695789337158203e-05, -1.0274350643157959e-05, -7.852911949157715e-06, -5.431473255157471e-06, -3.0100345611572266e-06, -5.885958671569824e-07, 1.8328428268432617e-06, 4.254281520843506e-06, 6.67572021484375e-06, 9.097158908843994e-06, 1.1518597602844238e-05, 1.3940036296844482e-05, 1.6361474990844727e-05, 1.878291368484497e-05, 2.1204352378845215e-05, 2.362579107284546e-05, 2.6047229766845703e-05, 2.8468668460845947e-05, 3.089010715484619e-05, 3.3311545848846436e-05, 3.573298454284668e-05, 3.8154423236846924e-05, 4.057586193084717e-05, 4.299730062484741e-05, 4.5418739318847656e-05, 4.78401780128479e-05, 5.0261616706848145e-05, 5.268305540084839e-05, 5.510449409484863e-05, 5.752593278884888e-05, 5.994737148284912e-05, 6.236881017684937e-05, 6.479024887084961e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 6.0, 8.0, 8.0, 12.0, 6.0, 28.0, 33.0, 48.0, 120.0, 195.0, 427.0, 1096.0, 3248.0, 13414.0, 105775.0, 747449.0, 153504.0, 17184.0, 3776.0, 1264.0, 470.0, 219.0, 112.0, 57.0, 43.0, 17.0, 10.0, 12.0, 3.0, 6.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.446044921875, -0.4271202087402344, -0.40819549560546875, -0.3892707824707031, -0.3703460693359375, -0.3514213562011719, -0.33249664306640625, -0.3135719299316406, -0.294647216796875, -0.2757225036621094, -0.25679779052734375, -0.23787307739257812, -0.2189483642578125, -0.20002365112304688, -0.18109893798828125, -0.16217422485351562, -0.14324951171875, -0.12432479858398438, -0.10540008544921875, -0.08647537231445312, -0.0675506591796875, -0.048625946044921875, -0.02970123291015625, -0.010776519775390625, 0.008148193359375, 0.027072906494140625, 0.04599761962890625, 0.06492233276367188, 0.0838470458984375, 0.10277175903320312, 0.12169647216796875, 0.14062118530273438, 0.1595458984375, 0.17847061157226562, 0.19739532470703125, 0.21632003784179688, 0.2352447509765625, 0.2541694641113281, 0.27309417724609375, 0.2920188903808594, 0.310943603515625, 0.3298683166503906, 0.34879302978515625, 0.3677177429199219, 0.3866424560546875, 0.4055671691894531, 0.42449188232421875, 0.4434165954589844, 0.46234130859375, 0.4812660217285156, 0.5001907348632812, 0.5191154479980469, 0.5380401611328125, 0.5569648742675781, 0.5758895874023438, 0.5948143005371094, 0.613739013671875, 0.6326637268066406, 0.6515884399414062, 0.6705131530761719, 0.6894378662109375, 0.7083625793457031, 0.7272872924804688, 0.7462120056152344, 0.76513671875]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 5.0, 2.0, 0.0, 6.0, 4.0, 6.0, 3.0, 15.0, 16.0, 27.0, 25.0, 52.0, 47.0, 85.0, 81.0, 115.0, 93.0, 100.0, 81.0, 64.0, 53.0, 31.0, 30.0, 19.0, 20.0, 7.0, 5.0, 4.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25244140625, -0.24381637573242188, -0.23519134521484375, -0.22656631469726562, -0.2179412841796875, -0.20931625366210938, -0.20069122314453125, -0.19206619262695312, -0.183441162109375, -0.17481613159179688, -0.16619110107421875, -0.15756607055664062, -0.1489410400390625, -0.14031600952148438, -0.13169097900390625, -0.12306594848632812, -0.11444091796875, -0.10581588745117188, -0.09719085693359375, -0.08856582641601562, -0.0799407958984375, -0.07131576538085938, -0.06269073486328125, -0.054065704345703125, -0.045440673828125, -0.036815643310546875, -0.02819061279296875, -0.019565582275390625, -0.0109405517578125, -0.002315521240234375, 0.00630950927734375, 0.014934539794921875, 0.0235595703125, 0.032184600830078125, 0.04080963134765625, 0.049434661865234375, 0.0580596923828125, 0.06668472290039062, 0.07530975341796875, 0.08393478393554688, 0.092559814453125, 0.10118484497070312, 0.10980987548828125, 0.11843490600585938, 0.1270599365234375, 0.13568496704101562, 0.14430999755859375, 0.15293502807617188, 0.16156005859375, 0.17018508911132812, 0.17881011962890625, 0.18743515014648438, 0.1960601806640625, 0.20468521118164062, 0.21331024169921875, 0.22193527221679688, 0.230560302734375, 0.23918533325195312, 0.24781036376953125, 0.2564353942871094, 0.2650604248046875, 0.2736854553222656, 0.28231048583984375, 0.2909355163574219, 0.299560546875]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 14.0, 15.0, 20.0, 43.0, 80.0, 126.0, 137.0, 139.0, 128.0, 123.0, 67.0, 41.0, 25.0, 16.0, 6.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.451085090637207, -5.325107097625732, -5.199129581451416, -5.073151588439941, -4.947173595428467, -4.821195602416992, -4.695218086242676, -4.569240093231201, -4.443262100219727, -4.317284107208252, -4.1913065910339355, -4.065328598022461, -3.9393506050109863, -3.813372850418091, -3.6873950958251953, -3.5614171028137207, -3.435439348220825, -3.3094615936279297, -3.183483600616455, -3.0575058460235596, -2.931527853012085, -2.8055500984191895, -2.679572105407715, -2.5535943508148193, -2.427616596221924, -2.3016388416290283, -2.1756608486175537, -2.049683094024658, -1.9237051010131836, -1.797727346420288, -1.671749472618103, -1.545771598815918, -1.4197933673858643, -1.2938154935836792, -1.1678376197814941, -1.0418598651885986, -0.9158819317817688, -0.7899040579795837, -0.6639262437820435, -0.5379483699798584, -0.41197049617767334, -0.2859926223754883, -0.1600147783756256, -0.03403693437576294, 0.09194093942642212, 0.21791881322860718, 0.34389662742614746, 0.4698745012283325, 0.5958523750305176, 0.7218302488327026, 0.8478081226348877, 0.973785936832428, 1.0997638702392578, 1.2257416248321533, 1.3517194986343384, 1.4776973724365234, 1.6036752462387085, 1.7296531200408936, 1.8556309938430786, 1.9816088676452637, 2.107586622238159, 2.233564615249634, 2.3595423698425293, 2.485520362854004, 2.6114981174468994]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 5.0, 2.0, 5.0, 10.0, 11.0, 10.0, 15.0, 22.0, 19.0, 15.0, 22.0, 33.0, 40.0, 41.0, 42.0, 46.0, 40.0, 37.0, 56.0, 40.0, 45.0, 42.0, 51.0, 40.0, 43.0, 40.0, 23.0, 28.0, 19.0, 25.0, 23.0, 21.0, 10.0, 13.0, 17.0, 9.0, 8.0, 6.0, 5.0, 8.0, 6.0, 3.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6530956029891968, -1.6028468608856201, -1.552598237991333, -1.5023494958877563, -1.4521007537841797, -1.401852011680603, -1.3516032695770264, -1.3013546466827393, -1.2511059045791626, -1.200857162475586, -1.1506085395812988, -1.1003597974777222, -1.0501110553741455, -0.9998623132705688, -0.949613630771637, -0.8993649482727051, -0.8491162061691284, -0.7988674640655518, -0.7486187815666199, -0.698370099067688, -0.6481213569641113, -0.5978726148605347, -0.5476239323616028, -0.4973752200603485, -0.44712650775909424, -0.39687779545783997, -0.3466290831565857, -0.2963803708553314, -0.24613165855407715, -0.19588294625282288, -0.1456342339515686, -0.09538552165031433, -0.04513680934906006, 0.005111902952194214, 0.055360615253448486, 0.10560932755470276, 0.15585803985595703, 0.2061067521572113, 0.2563554644584656, 0.30660417675971985, 0.3568528890609741, 0.4071016013622284, 0.45735031366348267, 0.5075989961624146, 0.5578477382659912, 0.6080964803695679, 0.6583451628684998, 0.7085938453674316, 0.7588425874710083, 0.809091329574585, 0.8593400120735168, 0.9095886945724487, 0.9598374366760254, 1.010086178779602, 1.0603349208831787, 1.1105835437774658, 1.1608322858810425, 1.2110810279846191, 1.2613296508789062, 1.311578392982483, 1.3618271350860596, 1.4120758771896362, 1.462324619293213, 1.5125732421875, 1.5628219842910767]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 4.0, 4.0, 6.0, 3.0, 9.0, 5.0, 7.0, 11.0, 9.0, 18.0, 17.0, 34.0, 32.0, 35.0, 43.0, 72.0, 89.0, 105.0, 195.0, 324.0, 639.0, 1497.0, 5714.0, 41968.0, 3572025.0, 540538.0, 24178.0, 4333.0, 1240.0, 532.0, 247.0, 140.0, 80.0, 49.0, 29.0, 20.0, 15.0, 5.0, 7.0, 9.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.71484375, -2.6500091552734375, -2.585174560546875, -2.5203399658203125, -2.45550537109375, -2.3906707763671875, -2.325836181640625, -2.2610015869140625, -2.1961669921875, -2.1313323974609375, -2.066497802734375, -2.0016632080078125, -1.93682861328125, -1.8719940185546875, -1.807159423828125, -1.7423248291015625, -1.677490234375, -1.6126556396484375, -1.547821044921875, -1.4829864501953125, -1.41815185546875, -1.3533172607421875, -1.288482666015625, -1.2236480712890625, -1.1588134765625, -1.0939788818359375, -1.029144287109375, -0.9643096923828125, -0.89947509765625, -0.8346405029296875, -0.769805908203125, -0.7049713134765625, -0.64013671875, -0.5753021240234375, -0.510467529296875, -0.4456329345703125, -0.38079833984375, -0.3159637451171875, -0.251129150390625, -0.1862945556640625, -0.1214599609375, -0.0566253662109375, 0.008209228515625, 0.0730438232421875, 0.13787841796875, 0.2027130126953125, 0.267547607421875, 0.3323822021484375, 0.397216796875, 0.4620513916015625, 0.526885986328125, 0.5917205810546875, 0.65655517578125, 0.7213897705078125, 0.786224365234375, 0.8510589599609375, 0.9158935546875, 0.9807281494140625, 1.045562744140625, 1.1103973388671875, 1.17523193359375, 1.2400665283203125, 1.304901123046875, 1.3697357177734375, 1.4345703125]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 11.0, 23.0, 43.0, 58.0, 87.0, 115.0, 145.0, 144.0, 123.0, 90.0, 67.0, 41.0, 26.0, 17.0, 8.0, 9.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.240966796875, -0.23606109619140625, -0.2311553955078125, -0.22624969482421875, -0.221343994140625, -0.21643829345703125, -0.2115325927734375, -0.20662689208984375, -0.20172119140625, -0.19681549072265625, -0.1919097900390625, -0.18700408935546875, -0.182098388671875, -0.17719268798828125, -0.1722869873046875, -0.16738128662109375, -0.1624755859375, -0.15756988525390625, -0.1526641845703125, -0.14775848388671875, -0.142852783203125, -0.13794708251953125, -0.1330413818359375, -0.12813568115234375, -0.12322998046875, -0.11832427978515625, -0.1134185791015625, -0.10851287841796875, -0.103607177734375, -0.09870147705078125, -0.0937957763671875, -0.08889007568359375, -0.083984375, -0.07907867431640625, -0.0741729736328125, -0.06926727294921875, -0.064361572265625, -0.05945587158203125, -0.0545501708984375, -0.04964447021484375, -0.04473876953125, -0.03983306884765625, -0.0349273681640625, -0.03002166748046875, -0.025115966796875, -0.02021026611328125, -0.0153045654296875, -0.01039886474609375, -0.0054931640625, -0.00058746337890625, 0.0043182373046875, 0.00922393798828125, 0.014129638671875, 0.01903533935546875, 0.0239410400390625, 0.02884674072265625, 0.03375244140625, 0.03865814208984375, 0.0435638427734375, 0.04846954345703125, 0.053375244140625, 0.05828094482421875, 0.0631866455078125, 0.06809234619140625, 0.072998046875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 7.0, 6.0, 11.0, 27.0, 44.0, 97.0, 120.0, 255.0, 851.0, 23273.0, 4155876.0, 12668.0, 659.0, 196.0, 102.0, 51.0, 22.0, 15.0, 7.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.85546875, -4.705810546875, -4.55615234375, -4.406494140625, -4.2568359375, -4.107177734375, -3.95751953125, -3.807861328125, -3.658203125, -3.508544921875, -3.35888671875, -3.209228515625, -3.0595703125, -2.909912109375, -2.76025390625, -2.610595703125, -2.4609375, -2.311279296875, -2.16162109375, -2.011962890625, -1.8623046875, -1.712646484375, -1.56298828125, -1.413330078125, -1.263671875, -1.114013671875, -0.96435546875, -0.814697265625, -0.6650390625, -0.515380859375, -0.36572265625, -0.216064453125, -0.06640625, 0.083251953125, 0.23291015625, 0.382568359375, 0.5322265625, 0.681884765625, 0.83154296875, 0.981201171875, 1.130859375, 1.280517578125, 1.43017578125, 1.579833984375, 1.7294921875, 1.879150390625, 2.02880859375, 2.178466796875, 2.328125, 2.477783203125, 2.62744140625, 2.777099609375, 2.9267578125, 3.076416015625, 3.22607421875, 3.375732421875, 3.525390625, 3.675048828125, 3.82470703125, 3.974365234375, 4.1240234375, 4.273681640625, 4.42333984375, 4.572998046875, 4.72265625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 7.0, 8.0, 18.0, 36.0, 135.0, 434.0, 2560.0, 640.0, 147.0, 67.0, 27.0, 5.0, 2.0, 1.0, 4.0], "bins": [-1.0185546875, -1.0002384185791016, -0.9819221496582031, -0.9636058807373047, -0.9452896118164062, -0.9269733428955078, -0.9086570739746094, -0.8903408050537109, -0.8720245361328125, -0.8537082672119141, -0.8353919982910156, -0.8170757293701172, -0.7987594604492188, -0.7804431915283203, -0.7621269226074219, -0.7438106536865234, -0.725494384765625, -0.7071781158447266, -0.6888618469238281, -0.6705455780029297, -0.6522293090820312, -0.6339130401611328, -0.6155967712402344, -0.5972805023193359, -0.5789642333984375, -0.5606479644775391, -0.5423316955566406, -0.5240154266357422, -0.5056991577148438, -0.4873828887939453, -0.4690666198730469, -0.45075035095214844, -0.43243408203125, -0.41411781311035156, -0.3958015441894531, -0.3774852752685547, -0.35916900634765625, -0.3408527374267578, -0.3225364685058594, -0.30422019958496094, -0.2859039306640625, -0.26758766174316406, -0.24927139282226562, -0.2309551239013672, -0.21263885498046875, -0.1943225860595703, -0.17600631713867188, -0.15769004821777344, -0.139373779296875, -0.12105751037597656, -0.10274124145507812, -0.08442497253417969, -0.06610870361328125, -0.04779243469238281, -0.029476165771484375, -0.011159896850585938, 0.0071563720703125, 0.025472640991210938, 0.043788909912109375, 0.06210517883300781, 0.08042144775390625, 0.09873771667480469, 0.11705398559570312, 0.13537025451660156, 0.1536865234375]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 4.0, 7.0, 8.0, 9.0, 35.0, 64.0, 108.0, 189.0, 215.0, 159.0, 82.0, 56.0, 22.0, 16.0, 10.0, 3.0, 4.0, 4.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.8358864784240723, -2.7601702213287354, -2.6844539642333984, -2.6087377071380615, -2.5330214500427246, -2.457305431365967, -2.381588935852051, -2.305872917175293, -2.230156660079956, -2.154440402984619, -2.0787241458892822, -2.0030078887939453, -1.927291750907898, -1.851575493812561, -1.7758592367172241, -1.7001430988311768, -1.6244267225265503, -1.5487104654312134, -1.4729942083358765, -1.397278070449829, -1.3215618133544922, -1.2458455562591553, -1.1701292991638184, -1.0944130420684814, -1.0186967849731445, -0.9429805278778076, -0.8672643303871155, -0.7915480732917786, -0.7158318758010864, -0.6401156187057495, -0.5643993616104126, -0.48868316411972046, -0.4129669666290283, -0.3372507393360138, -0.26153451204299927, -0.18581825494766235, -0.11010202765464783, -0.0343858003616333, 0.04133045673370361, 0.11704665422439575, 0.19276291131973267, 0.2684791386127472, 0.3441953659057617, 0.41991162300109863, 0.49562785029411316, 0.5713440775871277, 0.6470603346824646, 0.7227765321731567, 0.7984927892684937, 0.8742090463638306, 0.9499252438545227, 1.0256414413452148, 1.1013576984405518, 1.1770739555358887, 1.2527902126312256, 1.3285064697265625, 1.4042227268218994, 1.4799389839172363, 1.5556552410125732, 1.6313714981079102, 1.7070876359939575, 1.7828038930892944, 1.8585201501846313, 1.9342362880706787, 2.0099525451660156]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 11.0, 9.0, 3.0, 11.0, 21.0, 18.0, 44.0, 46.0, 54.0, 58.0, 74.0, 83.0, 73.0, 73.0, 87.0, 68.0, 67.0, 49.0, 38.0, 31.0, 25.0, 18.0, 20.0, 10.0, 3.0, 6.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.430869221687317, -1.3889741897583008, -1.3470792770385742, -1.305184245109558, -1.263289213180542, -1.2213941812515259, -1.1794991493225098, -1.1376042366027832, -1.095709204673767, -1.053814172744751, -1.0119192600250244, -0.9700242280960083, -0.9281291961669922, -0.8862341642379761, -0.8443391919136047, -0.8024442195892334, -0.7605491876602173, -0.7186541557312012, -0.6767591834068298, -0.6348642110824585, -0.5929691791534424, -0.5510741472244263, -0.5091791749000549, -0.4672841727733612, -0.4253891706466675, -0.38349416851997375, -0.34159916639328003, -0.2997041642665863, -0.2578091621398926, -0.21591416001319885, -0.17401915788650513, -0.1321241557598114, -0.09022927284240723, -0.0483342707157135, -0.006439268589019775, 0.03545573353767395, 0.07735073566436768, 0.1192457377910614, 0.16114073991775513, 0.20303574204444885, 0.24493074417114258, 0.2868257462978363, 0.32872074842453003, 0.37061575055122375, 0.4125107526779175, 0.4544057548046112, 0.49630075693130493, 0.5381957292556763, 0.5800907611846924, 0.6219857931137085, 0.6638807654380798, 0.7057757377624512, 0.7476707696914673, 0.7895658016204834, 0.8314607739448547, 0.8733557462692261, 0.9152507781982422, 0.9571458101272583, 0.9990407824516296, 1.040935754776001, 1.082830786705017, 1.1247258186340332, 1.1666207313537598, 1.2085157632827759, 1.250410795211792]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 2.0, 10.0, 5.0, 8.0, 16.0, 16.0, 25.0, 29.0, 61.0, 92.0, 150.0, 243.0, 459.0, 1071.0, 3452.0, 19132.0, 262649.0, 692292.0, 58431.0, 7126.0, 1778.0, 715.0, 329.0, 179.0, 95.0, 58.0, 37.0, 31.0, 22.0, 11.0, 10.0, 10.0, 4.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.9306640625, -1.878814697265625, -1.82696533203125, -1.775115966796875, -1.7232666015625, -1.671417236328125, -1.61956787109375, -1.567718505859375, -1.515869140625, -1.464019775390625, -1.41217041015625, -1.360321044921875, -1.3084716796875, -1.256622314453125, -1.20477294921875, -1.152923583984375, -1.10107421875, -1.049224853515625, -0.99737548828125, -0.945526123046875, -0.8936767578125, -0.841827392578125, -0.78997802734375, -0.738128662109375, -0.686279296875, -0.634429931640625, -0.58258056640625, -0.530731201171875, -0.4788818359375, -0.427032470703125, -0.37518310546875, -0.323333740234375, -0.271484375, -0.219635009765625, -0.16778564453125, -0.115936279296875, -0.0640869140625, -0.012237548828125, 0.03961181640625, 0.091461181640625, 0.143310546875, 0.195159912109375, 0.24700927734375, 0.298858642578125, 0.3507080078125, 0.402557373046875, 0.45440673828125, 0.506256103515625, 0.55810546875, 0.609954833984375, 0.66180419921875, 0.713653564453125, 0.7655029296875, 0.817352294921875, 0.86920166015625, 0.921051025390625, 0.972900390625, 1.024749755859375, 1.07659912109375, 1.128448486328125, 1.1802978515625, 1.232147216796875, 1.28399658203125, 1.335845947265625, 1.3876953125]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 20.0, 19.0, 34.0, 64.0, 66.0, 102.0, 111.0, 125.0, 111.0, 109.0, 70.0, 49.0, 44.0, 27.0, 17.0, 10.0, 9.0, 6.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.2457275390625, -0.2404613494873047, -0.23519515991210938, -0.22992897033691406, -0.22466278076171875, -0.21939659118652344, -0.21413040161132812, -0.2088642120361328, -0.2035980224609375, -0.1983318328857422, -0.19306564331054688, -0.18779945373535156, -0.18253326416015625, -0.17726707458496094, -0.17200088500976562, -0.1667346954345703, -0.161468505859375, -0.1562023162841797, -0.15093612670898438, -0.14566993713378906, -0.14040374755859375, -0.13513755798339844, -0.12987136840820312, -0.12460517883300781, -0.1193389892578125, -0.11407279968261719, -0.10880661010742188, -0.10354042053222656, -0.09827423095703125, -0.09300804138183594, -0.08774185180664062, -0.08247566223144531, -0.07720947265625, -0.07194328308105469, -0.06667709350585938, -0.06141090393066406, -0.05614471435546875, -0.05087852478027344, -0.045612335205078125, -0.04034614562988281, -0.0350799560546875, -0.029813766479492188, -0.024547576904296875, -0.019281387329101562, -0.01401519775390625, -0.008749008178710938, -0.003482818603515625, 0.0017833709716796875, 0.007049560546875, 0.012315750122070312, 0.017581939697265625, 0.022848129272460938, 0.02811431884765625, 0.03338050842285156, 0.038646697998046875, 0.04391288757324219, 0.0491790771484375, 0.05444526672363281, 0.059711456298828125, 0.06497764587402344, 0.07024383544921875, 0.07551002502441406, 0.08077621459960938, 0.08604240417480469, 0.09130859375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 8.0, 3.0, 3.0, 3.0, 2.0, 6.0, 6.0, 10.0, 16.0, 21.0, 37.0, 45.0, 59.0, 87.0, 185.0, 310.0, 554.0, 1219.0, 2695.0, 7270.0, 33650.0, 334587.0, 584753.0, 65189.0, 11146.0, 3501.0, 1508.0, 734.0, 391.0, 189.0, 142.0, 57.0, 46.0, 33.0, 22.0, 17.0, 12.0, 7.0, 7.0, 6.0, 4.0, 7.0, 1.0, 6.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0810546875, -1.0452117919921875, -1.009368896484375, -0.9735260009765625, -0.93768310546875, -0.9018402099609375, -0.865997314453125, -0.8301544189453125, -0.7943115234375, -0.7584686279296875, -0.722625732421875, -0.6867828369140625, -0.65093994140625, -0.6150970458984375, -0.579254150390625, -0.5434112548828125, -0.507568359375, -0.4717254638671875, -0.435882568359375, -0.4000396728515625, -0.36419677734375, -0.3283538818359375, -0.292510986328125, -0.2566680908203125, -0.2208251953125, -0.1849822998046875, -0.149139404296875, -0.1132965087890625, -0.07745361328125, -0.0416107177734375, -0.005767822265625, 0.0300750732421875, 0.06591796875, 0.1017608642578125, 0.137603759765625, 0.1734466552734375, 0.20928955078125, 0.2451324462890625, 0.280975341796875, 0.3168182373046875, 0.3526611328125, 0.3885040283203125, 0.424346923828125, 0.4601898193359375, 0.49603271484375, 0.5318756103515625, 0.567718505859375, 0.6035614013671875, 0.639404296875, 0.6752471923828125, 0.711090087890625, 0.7469329833984375, 0.78277587890625, 0.8186187744140625, 0.854461669921875, 0.8903045654296875, 0.9261474609375, 0.9619903564453125, 0.997833251953125, 1.0336761474609375, 1.06951904296875, 1.1053619384765625, 1.141204833984375, 1.1770477294921875, 1.212890625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 9.0, 4.0, 9.0, 12.0, 16.0, 25.0, 33.0, 47.0, 38.0, 52.0, 60.0, 71.0, 64.0, 87.0, 71.0, 72.0, 48.0, 73.0, 49.0, 40.0, 34.0, 16.0, 18.0, 17.0, 11.0, 8.0, 9.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.63818359375, -0.62091064453125, -0.6036376953125, -0.58636474609375, -0.569091796875, -0.55181884765625, -0.5345458984375, -0.51727294921875, -0.5, -0.48272705078125, -0.4654541015625, -0.44818115234375, -0.430908203125, -0.41363525390625, -0.3963623046875, -0.37908935546875, -0.36181640625, -0.34454345703125, -0.3272705078125, -0.30999755859375, -0.292724609375, -0.27545166015625, -0.2581787109375, -0.24090576171875, -0.2236328125, -0.20635986328125, -0.1890869140625, -0.17181396484375, -0.154541015625, -0.13726806640625, -0.1199951171875, -0.10272216796875, -0.08544921875, -0.06817626953125, -0.0509033203125, -0.03363037109375, -0.016357421875, 0.00091552734375, 0.0181884765625, 0.03546142578125, 0.052734375, 0.07000732421875, 0.0872802734375, 0.10455322265625, 0.121826171875, 0.13909912109375, 0.1563720703125, 0.17364501953125, 0.19091796875, 0.20819091796875, 0.2254638671875, 0.24273681640625, 0.260009765625, 0.27728271484375, 0.2945556640625, 0.31182861328125, 0.3291015625, 0.34637451171875, 0.3636474609375, 0.38092041015625, 0.398193359375, 0.41546630859375, 0.4327392578125, 0.45001220703125, 0.46728515625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 8.0, 11.0, 27.0, 31.0, 56.0, 91.0, 190.0, 454.0, 1105.0, 3388.0, 17654.0, 260446.0, 702641.0, 52721.0, 6663.0, 1791.0, 670.0, 279.0, 149.0, 73.0, 37.0, 31.0, 20.0, 3.0, 3.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.818359375, -0.7963790893554688, -0.7743988037109375, -0.7524185180664062, -0.730438232421875, -0.7084579467773438, -0.6864776611328125, -0.6644973754882812, -0.64251708984375, -0.6205368041992188, -0.5985565185546875, -0.5765762329101562, -0.554595947265625, -0.5326156616210938, -0.5106353759765625, -0.48865509033203125, -0.4666748046875, -0.44469451904296875, -0.4227142333984375, -0.40073394775390625, -0.378753662109375, -0.35677337646484375, -0.3347930908203125, -0.31281280517578125, -0.29083251953125, -0.26885223388671875, -0.2468719482421875, -0.22489166259765625, -0.202911376953125, -0.18093109130859375, -0.1589508056640625, -0.13697052001953125, -0.114990234375, -0.09300994873046875, -0.0710296630859375, -0.04904937744140625, -0.027069091796875, -0.00508880615234375, 0.0168914794921875, 0.03887176513671875, 0.06085205078125, 0.08283233642578125, 0.1048126220703125, 0.12679290771484375, 0.148773193359375, 0.17075347900390625, 0.1927337646484375, 0.21471405029296875, 0.2366943359375, 0.25867462158203125, 0.2806549072265625, 0.30263519287109375, 0.324615478515625, 0.34659576416015625, 0.3685760498046875, 0.39055633544921875, 0.41253662109375, 0.43451690673828125, 0.4564971923828125, 0.47847747802734375, 0.500457763671875, 0.5224380493164062, 0.5444183349609375, 0.5663986206054688, 0.58837890625]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 5.0, 8.0, 7.0, 6.0, 15.0, 14.0, 20.0, 20.0, 30.0, 48.0, 60.0, 67.0, 87.0, 96.0, 106.0, 96.0, 67.0, 68.0, 43.0, 30.0, 25.0, 21.0, 25.0, 7.0, 8.0, 10.0, 7.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00011032819747924805, -0.00010719709098339081, -0.00010406598448753357, -0.00010093487799167633, -9.780377149581909e-05, -9.467266499996185e-05, -9.154155850410461e-05, -8.841045200824738e-05, -8.527934551239014e-05, -8.21482390165329e-05, -7.901713252067566e-05, -7.588602602481842e-05, -7.275491952896118e-05, -6.962381303310394e-05, -6.64927065372467e-05, -6.336160004138947e-05, -6.0230493545532227e-05, -5.709938704967499e-05, -5.396828055381775e-05, -5.083717405796051e-05, -4.770606756210327e-05, -4.457496106624603e-05, -4.1443854570388794e-05, -3.8312748074531555e-05, -3.5181641578674316e-05, -3.205053508281708e-05, -2.891942858695984e-05, -2.57883220911026e-05, -2.265721559524536e-05, -1.9526109099388123e-05, -1.6395002603530884e-05, -1.3263896107673645e-05, -1.0132789611816406e-05, -7.0016831159591675e-06, -3.870576620101929e-06, -7.394701242446899e-07, 2.391636371612549e-06, 5.522742867469788e-06, 8.653849363327026e-06, 1.1784955859184265e-05, 1.4916062355041504e-05, 1.8047168850898743e-05, 2.117827534675598e-05, 2.430938184261322e-05, 2.744048833847046e-05, 3.05715948343277e-05, 3.3702701330184937e-05, 3.6833807826042175e-05, 3.9964914321899414e-05, 4.309602081775665e-05, 4.622712731361389e-05, 4.935823380947113e-05, 5.248934030532837e-05, 5.562044680118561e-05, 5.875155329704285e-05, 6.188265979290009e-05, 6.501376628875732e-05, 6.814487278461456e-05, 7.12759792804718e-05, 7.440708577632904e-05, 7.753819227218628e-05, 8.066929876804352e-05, 8.380040526390076e-05, 8.6931511759758e-05, 9.006261825561523e-05]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 4.0, 13.0, 19.0, 26.0, 49.0, 100.0, 176.0, 460.0, 1316.0, 5657.0, 71942.0, 896783.0, 64539.0, 5358.0, 1273.0, 427.0, 186.0, 85.0, 60.0, 26.0, 16.0, 20.0, 10.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69287109375, -0.6633682250976562, -0.6338653564453125, -0.6043624877929688, -0.574859619140625, -0.5453567504882812, -0.5158538818359375, -0.48635101318359375, -0.45684814453125, -0.42734527587890625, -0.3978424072265625, -0.36833953857421875, -0.338836669921875, -0.30933380126953125, -0.2798309326171875, -0.25032806396484375, -0.2208251953125, -0.19132232666015625, -0.1618194580078125, -0.13231658935546875, -0.102813720703125, -0.07331085205078125, -0.0438079833984375, -0.01430511474609375, 0.01519775390625, 0.04470062255859375, 0.0742034912109375, 0.10370635986328125, 0.133209228515625, 0.16271209716796875, 0.1922149658203125, 0.22171783447265625, 0.251220703125, 0.28072357177734375, 0.3102264404296875, 0.33972930908203125, 0.369232177734375, 0.39873504638671875, 0.4282379150390625, 0.45774078369140625, 0.48724365234375, 0.5167465209960938, 0.5462493896484375, 0.5757522583007812, 0.605255126953125, 0.6347579956054688, 0.6642608642578125, 0.6937637329101562, 0.7232666015625, 0.7527694702148438, 0.7822723388671875, 0.8117752075195312, 0.841278076171875, 0.8707809448242188, 0.9002838134765625, 0.9297866821289062, 0.95928955078125, 0.9887924194335938, 1.0182952880859375, 1.0477981567382812, 1.077301025390625, 1.1068038940429688, 1.1363067626953125, 1.1658096313476562, 1.1953125]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 7.0, 17.0, 31.0, 32.0, 36.0, 77.0, 108.0, 114.0, 134.0, 120.0, 97.0, 77.0, 50.0, 23.0, 24.0, 21.0, 9.0, 6.0, 7.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29296875, -0.28058624267578125, -0.2682037353515625, -0.25582122802734375, -0.243438720703125, -0.23105621337890625, -0.2186737060546875, -0.20629119873046875, -0.19390869140625, -0.18152618408203125, -0.1691436767578125, -0.15676116943359375, -0.144378662109375, -0.13199615478515625, -0.1196136474609375, -0.10723114013671875, -0.0948486328125, -0.08246612548828125, -0.0700836181640625, -0.05770111083984375, -0.045318603515625, -0.03293609619140625, -0.0205535888671875, -0.00817108154296875, 0.00421142578125, 0.01659393310546875, 0.0289764404296875, 0.04135894775390625, 0.053741455078125, 0.06612396240234375, 0.0785064697265625, 0.09088897705078125, 0.103271484375, 0.11565399169921875, 0.1280364990234375, 0.14041900634765625, 0.152801513671875, 0.16518402099609375, 0.1775665283203125, 0.18994903564453125, 0.20233154296875, 0.21471405029296875, 0.2270965576171875, 0.23947906494140625, 0.251861572265625, 0.26424407958984375, 0.2766265869140625, 0.28900909423828125, 0.3013916015625, 0.31377410888671875, 0.3261566162109375, 0.33853912353515625, 0.350921630859375, 0.36330413818359375, 0.3756866455078125, 0.38806915283203125, 0.40045166015625, 0.41283416748046875, 0.4252166748046875, 0.43759918212890625, 0.449981689453125, 0.46236419677734375, 0.4747467041015625, 0.48712921142578125, 0.49951171875]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 10.0, 18.0, 47.0, 64.0, 118.0, 173.0, 214.0, 174.0, 87.0, 49.0, 22.0, 9.0, 2.0, 3.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.256625175476074, -6.062235355377197, -5.86784553527832, -5.673455238342285, -5.479065418243408, -5.284675598144531, -5.090285778045654, -4.895895957946777, -4.701505661010742, -4.507115840911865, -4.312726020812988, -4.118335723876953, -3.923945903778076, -3.729556083679199, -3.5351662635803223, -3.3407764434814453, -3.1463866233825684, -2.9519968032836914, -2.7576067447662354, -2.5632169246673584, -2.3688268661499023, -2.1744370460510254, -1.9800472259521484, -1.785657286643982, -1.5912673473358154, -1.396877408027649, -1.2024874687194824, -1.0080976486206055, -0.813707709312439, -0.6193177700042725, -0.4249279499053955, -0.230538010597229, -0.0361475944519043, 0.15824231505393982, 0.35263222455978394, 0.5470221042633057, 0.7414120435714722, 0.9358019828796387, 1.1301918029785156, 1.3245817422866821, 1.5189716815948486, 1.7133616209030151, 1.9077515602111816, 2.1021413803100586, 2.2965312004089355, 2.4909212589263916, 2.6853110790252686, 2.8797011375427246, 3.0740909576416016, 3.2684807777404785, 3.4628708362579346, 3.6572606563568115, 3.8516507148742676, 4.0460405349731445, 4.2404303550720215, 4.434820175170898, 4.629210472106934, 4.8236002922058105, 5.0179901123046875, 5.212380409240723, 5.4067702293396, 5.601160049438477, 5.7955498695373535, 5.9899396896362305, 6.184329509735107]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 6.0, 7.0, 3.0, 9.0, 12.0, 17.0, 10.0, 18.0, 30.0, 41.0, 39.0, 55.0, 57.0, 42.0, 75.0, 72.0, 72.0, 74.0, 69.0, 54.0, 38.0, 38.0, 42.0, 33.0, 26.0, 19.0, 12.0, 9.0, 11.0, 6.0, 5.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.0189208984375, -3.9207749366760254, -3.82262921333313, -3.7244832515716553, -3.6263375282287598, -3.528191566467285, -3.4300458431243896, -3.331899881362915, -3.2337541580200195, -3.135608196258545, -3.0374624729156494, -2.939316511154175, -2.8411707878112793, -2.7430248260498047, -2.644879102706909, -2.5467331409454346, -2.448587417602539, -2.3504414558410645, -2.252295732498169, -2.1541497707366943, -2.056004047393799, -1.9578582048416138, -1.8597123622894287, -1.761566400527954, -1.6634204387664795, -1.5652745962142944, -1.4671287536621094, -1.3689829111099243, -1.2708370685577393, -1.1726912260055542, -1.0745453834533691, -0.9763994812965393, -0.878253698348999, -0.780107855796814, -0.6819620132446289, -0.5838161706924438, -0.4856702983379364, -0.38752445578575134, -0.2893785834312439, -0.19123274087905884, -0.09308689832687378, 0.005058951675891876, 0.10320480167865753, 0.20135065913200378, 0.29949650168418884, 0.3976423442363739, 0.49578821659088135, 0.5939340591430664, 0.6920799016952515, 0.7902257442474365, 0.8883715867996216, 0.9865174293518066, 1.0846632719039917, 1.1828091144561768, 1.2809550762176514, 1.3791007995605469, 1.4772467613220215, 1.5753926038742065, 1.6735384464263916, 1.7716842889785767, 1.8698301315307617, 1.9679759740829468, 2.066121816635132, 2.1642677783966064, 2.262413501739502]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 3.0, 3.0, 8.0, 6.0, 3.0, 13.0, 11.0, 26.0, 33.0, 40.0, 45.0, 75.0, 111.0, 123.0, 204.0, 292.0, 456.0, 1123.0, 3066.0, 15026.0, 175363.0, 3830082.0, 149012.0, 14101.0, 3065.0, 1079.0, 449.0, 201.0, 95.0, 73.0, 38.0, 18.0, 19.0, 10.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.59765625, -2.5318450927734375, -2.466033935546875, -2.4002227783203125, -2.33441162109375, -2.2686004638671875, -2.202789306640625, -2.1369781494140625, -2.0711669921875, -2.0053558349609375, -1.939544677734375, -1.8737335205078125, -1.80792236328125, -1.7421112060546875, -1.676300048828125, -1.6104888916015625, -1.544677734375, -1.4788665771484375, -1.413055419921875, -1.3472442626953125, -1.28143310546875, -1.2156219482421875, -1.149810791015625, -1.0839996337890625, -1.0181884765625, -0.9523773193359375, -0.886566162109375, -0.8207550048828125, -0.75494384765625, -0.6891326904296875, -0.623321533203125, -0.5575103759765625, -0.49169921875, -0.4258880615234375, -0.360076904296875, -0.2942657470703125, -0.22845458984375, -0.1626434326171875, -0.096832275390625, -0.0310211181640625, 0.0347900390625, 0.1006011962890625, 0.166412353515625, 0.2322235107421875, 0.29803466796875, 0.3638458251953125, 0.429656982421875, 0.4954681396484375, 0.561279296875, 0.6270904541015625, 0.692901611328125, 0.7587127685546875, 0.82452392578125, 0.8903350830078125, 0.956146240234375, 1.0219573974609375, 1.0877685546875, 1.1535797119140625, 1.219390869140625, 1.2852020263671875, 1.35101318359375, 1.4168243408203125, 1.482635498046875, 1.5484466552734375, 1.6142578125]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 15.0, 17.0, 25.0, 49.0, 61.0, 78.0, 99.0, 116.0, 123.0, 103.0, 82.0, 71.0, 56.0, 33.0, 24.0, 15.0, 8.0, 9.0, 7.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.237060546875, -0.23192787170410156, -0.22679519653320312, -0.2216625213623047, -0.21652984619140625, -0.2113971710205078, -0.20626449584960938, -0.20113182067871094, -0.1959991455078125, -0.19086647033691406, -0.18573379516601562, -0.1806011199951172, -0.17546844482421875, -0.1703357696533203, -0.16520309448242188, -0.16007041931152344, -0.154937744140625, -0.14980506896972656, -0.14467239379882812, -0.1395397186279297, -0.13440704345703125, -0.1292743682861328, -0.12414169311523438, -0.11900901794433594, -0.1138763427734375, -0.10874366760253906, -0.10361099243164062, -0.09847831726074219, -0.09334564208984375, -0.08821296691894531, -0.08308029174804688, -0.07794761657714844, -0.07281494140625, -0.06768226623535156, -0.06254959106445312, -0.05741691589355469, -0.05228424072265625, -0.04715156555175781, -0.042018890380859375, -0.03688621520996094, -0.0317535400390625, -0.026620864868164062, -0.021488189697265625, -0.016355514526367188, -0.01122283935546875, -0.0060901641845703125, -0.000957489013671875, 0.0041751861572265625, 0.009307861328125, 0.014440536499023438, 0.019573211669921875, 0.024705886840820312, 0.02983856201171875, 0.03497123718261719, 0.040103912353515625, 0.04523658752441406, 0.0503692626953125, 0.05550193786621094, 0.060634613037109375, 0.06576728820800781, 0.07089996337890625, 0.07603263854980469, 0.08116531372070312, 0.08629798889160156, 0.0914306640625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 8.0, 3.0, 11.0, 18.0, 16.0, 25.0, 37.0, 40.0, 57.0, 116.0, 225.0, 652.0, 2064.0, 9118.0, 409306.0, 3754673.0, 13925.0, 2632.0, 716.0, 275.0, 127.0, 72.0, 54.0, 31.0, 31.0, 14.0, 16.0, 6.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.28515625, -4.137939453125, -3.99072265625, -3.843505859375, -3.6962890625, -3.549072265625, -3.40185546875, -3.254638671875, -3.107421875, -2.960205078125, -2.81298828125, -2.665771484375, -2.5185546875, -2.371337890625, -2.22412109375, -2.076904296875, -1.9296875, -1.782470703125, -1.63525390625, -1.488037109375, -1.3408203125, -1.193603515625, -1.04638671875, -0.899169921875, -0.751953125, -0.604736328125, -0.45751953125, -0.310302734375, -0.1630859375, -0.015869140625, 0.13134765625, 0.278564453125, 0.42578125, 0.572998046875, 0.72021484375, 0.867431640625, 1.0146484375, 1.161865234375, 1.30908203125, 1.456298828125, 1.603515625, 1.750732421875, 1.89794921875, 2.045166015625, 2.1923828125, 2.339599609375, 2.48681640625, 2.634033203125, 2.78125, 2.928466796875, 3.07568359375, 3.222900390625, 3.3701171875, 3.517333984375, 3.66455078125, 3.811767578125, 3.958984375, 4.106201171875, 4.25341796875, 4.400634765625, 4.5478515625, 4.695068359375, 4.84228515625, 4.989501953125, 5.13671875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 10.0, 16.0, 23.0, 35.0, 98.0, 247.0, 761.0, 1922.0, 576.0, 197.0, 81.0, 32.0, 29.0, 7.0, 9.0, 9.0, 3.0, 2.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0], "bins": [-1.0048828125, -0.9827499389648438, -0.9606170654296875, -0.9384841918945312, -0.916351318359375, -0.8942184448242188, -0.8720855712890625, -0.8499526977539062, -0.82781982421875, -0.8056869506835938, -0.7835540771484375, -0.7614212036132812, -0.739288330078125, -0.7171554565429688, -0.6950225830078125, -0.6728897094726562, -0.6507568359375, -0.6286239624023438, -0.6064910888671875, -0.5843582153320312, -0.562225341796875, -0.5400924682617188, -0.5179595947265625, -0.49582672119140625, -0.47369384765625, -0.45156097412109375, -0.4294281005859375, -0.40729522705078125, -0.385162353515625, -0.36302947998046875, -0.3408966064453125, -0.31876373291015625, -0.296630859375, -0.27449798583984375, -0.2523651123046875, -0.23023223876953125, -0.208099365234375, -0.18596649169921875, -0.1638336181640625, -0.14170074462890625, -0.11956787109375, -0.09743499755859375, -0.0753021240234375, -0.05316925048828125, -0.031036376953125, -0.00890350341796875, 0.0132293701171875, 0.03536224365234375, 0.0574951171875, 0.07962799072265625, 0.1017608642578125, 0.12389373779296875, 0.146026611328125, 0.16815948486328125, 0.1902923583984375, 0.21242523193359375, 0.23455810546875, 0.25669097900390625, 0.2788238525390625, 0.30095672607421875, 0.323089599609375, 0.34522247314453125, 0.3673553466796875, 0.38948822021484375, 0.41162109375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 9.0, 17.0, 23.0, 82.0, 192.0, 264.0, 209.0, 120.0, 45.0, 19.0, 7.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.52221965789795, -9.303872108459473, -9.085525512695312, -8.867177963256836, -8.648831367492676, -8.4304838180542, -8.212137222290039, -7.9937896728515625, -7.775442600250244, -7.557095527648926, -7.338748455047607, -7.120401382446289, -6.9020538330078125, -6.683706760406494, -6.465359687805176, -6.247012615203857, -6.028665542602539, -5.810318470001221, -5.591971397399902, -5.373623847961426, -5.155276775360107, -4.936929702758789, -4.718582630157471, -4.500235557556152, -4.281888008117676, -4.063540935516357, -3.84519362449646, -3.6268465518951416, -3.4084994792938232, -3.190152168273926, -2.9718050956726074, -2.753458023071289, -2.5351109504699707, -2.3167638778686523, -2.098416566848755, -1.8800694942474365, -1.6617224216461182, -1.4433752298355103, -1.2250280380249023, -1.006680965423584, -0.7883337736129761, -0.5699866414070129, -0.3516394793987274, -0.1332923173904419, 0.08505481481552124, 0.3034019470214844, 0.5217491388320923, 0.7400962114334106, 0.9584434032440186, 1.1767905950546265, 1.3951376676559448, 1.6134848594665527, 1.831831932067871, 2.0501790046691895, 2.268526315689087, 2.4868733882904053, 2.7052206993103027, 2.923567771911621, 3.1419150829315186, 3.360262155532837, 3.5786092281341553, 3.7969565391540527, 4.015303611755371, 4.2336506843566895, 4.451997756958008]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 4.0, 6.0, 10.0, 15.0, 20.0, 32.0, 45.0, 48.0, 60.0, 73.0, 87.0, 84.0, 75.0, 102.0, 75.0, 65.0, 65.0, 45.0, 20.0, 24.0, 19.0, 7.0, 10.0, 10.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4079484939575195, -2.3296139240264893, -2.251279354095459, -2.1729447841644287, -2.0946102142333984, -2.016275644302368, -1.937941074371338, -1.8596065044403076, -1.7812719345092773, -1.702937364578247, -1.6246027946472168, -1.5462682247161865, -1.4679336547851562, -1.389599084854126, -1.3112645149230957, -1.2329299449920654, -1.1545952558517456, -1.0762606859207153, -0.9979261159896851, -0.9195915460586548, -0.8412569761276245, -0.7629224061965942, -0.6845877766609192, -0.6062532067298889, -0.5279186367988586, -0.44958406686782837, -0.3712494969367981, -0.29291489720344543, -0.21458032727241516, -0.1362457573413849, -0.05791115760803223, 0.020423412322998047, 0.09875798225402832, 0.1770925521850586, 0.25542712211608887, 0.33376172184944153, 0.4120962917804718, 0.4904308617115021, 0.5687654614448547, 0.647100031375885, 0.7254346013069153, 0.8037691712379456, 0.8821037411689758, 0.9604383707046509, 1.0387729406356812, 1.1171075105667114, 1.1954420804977417, 1.273776650428772, 1.3521112203598022, 1.4304457902908325, 1.5087803602218628, 1.587114930152893, 1.6654495000839233, 1.7437840700149536, 1.8221187591552734, 1.9004533290863037, 1.978787899017334, 2.0571224689483643, 2.1354570388793945, 2.213791608810425, 2.292126178741455, 2.3704607486724854, 2.4487953186035156, 2.527129888534546, 2.605464458465576]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 6.0, 4.0, 5.0, 12.0, 16.0, 11.0, 15.0, 31.0, 33.0, 54.0, 83.0, 104.0, 146.0, 225.0, 329.0, 581.0, 1036.0, 2288.0, 8143.0, 49820.0, 496934.0, 434351.0, 42488.0, 7073.0, 2126.0, 1004.0, 531.0, 344.0, 208.0, 146.0, 100.0, 91.0, 48.0, 39.0, 30.0, 21.0, 28.0, 15.0, 12.0, 5.0, 5.0, 2.0, 3.0, 0.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.37890625, -1.3328857421875, -1.286865234375, -1.2408447265625, -1.19482421875, -1.1488037109375, -1.102783203125, -1.0567626953125, -1.0107421875, -0.9647216796875, -0.918701171875, -0.8726806640625, -0.82666015625, -0.7806396484375, -0.734619140625, -0.6885986328125, -0.642578125, -0.5965576171875, -0.550537109375, -0.5045166015625, -0.45849609375, -0.4124755859375, -0.366455078125, -0.3204345703125, -0.2744140625, -0.2283935546875, -0.182373046875, -0.1363525390625, -0.09033203125, -0.0443115234375, 0.001708984375, 0.0477294921875, 0.09375, 0.1397705078125, 0.185791015625, 0.2318115234375, 0.27783203125, 0.3238525390625, 0.369873046875, 0.4158935546875, 0.4619140625, 0.5079345703125, 0.553955078125, 0.5999755859375, 0.64599609375, 0.6920166015625, 0.738037109375, 0.7840576171875, 0.830078125, 0.8760986328125, 0.922119140625, 0.9681396484375, 1.01416015625, 1.0601806640625, 1.106201171875, 1.1522216796875, 1.1982421875, 1.2442626953125, 1.290283203125, 1.3363037109375, 1.38232421875, 1.4283447265625, 1.474365234375, 1.5203857421875, 1.56640625]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 9.0, 7.0, 13.0, 31.0, 52.0, 66.0, 96.0, 110.0, 132.0, 129.0, 126.0, 71.0, 69.0, 32.0, 22.0, 15.0, 10.0, 6.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.284912109375, -0.27809715270996094, -0.2712821960449219, -0.2644672393798828, -0.25765228271484375, -0.2508373260498047, -0.24402236938476562, -0.23720741271972656, -0.2303924560546875, -0.22357749938964844, -0.21676254272460938, -0.2099475860595703, -0.20313262939453125, -0.1963176727294922, -0.18950271606445312, -0.18268775939941406, -0.175872802734375, -0.16905784606933594, -0.16224288940429688, -0.1554279327392578, -0.14861297607421875, -0.1417980194091797, -0.13498306274414062, -0.12816810607910156, -0.1213531494140625, -0.11453819274902344, -0.10772323608398438, -0.10090827941894531, -0.09409332275390625, -0.08727836608886719, -0.08046340942382812, -0.07364845275878906, -0.06683349609375, -0.06001853942871094, -0.053203582763671875, -0.04638862609863281, -0.03957366943359375, -0.03275871276855469, -0.025943756103515625, -0.019128799438476562, -0.0123138427734375, -0.0054988861083984375, 0.001316070556640625, 0.008131027221679688, 0.01494598388671875, 0.021760940551757812, 0.028575897216796875, 0.03539085388183594, 0.042205810546875, 0.04902076721191406, 0.055835723876953125, 0.06265068054199219, 0.06946563720703125, 0.07628059387207031, 0.08309555053710938, 0.08991050720214844, 0.0967254638671875, 0.10354042053222656, 0.11035537719726562, 0.11717033386230469, 0.12398529052734375, 0.1308002471923828, 0.13761520385742188, 0.14443016052246094, 0.1512451171875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 4.0, 7.0, 12.0, 8.0, 10.0, 17.0, 26.0, 55.0, 49.0, 79.0, 131.0, 206.0, 282.0, 527.0, 1090.0, 2384.0, 6030.0, 18975.0, 67774.0, 230811.0, 417944.0, 213078.0, 61514.0, 17313.0, 5682.0, 2170.0, 970.0, 504.0, 302.0, 189.0, 111.0, 106.0, 67.0, 38.0, 28.0, 21.0, 18.0, 9.0, 5.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5419921875, -0.524200439453125, -0.50640869140625, -0.488616943359375, -0.4708251953125, -0.453033447265625, -0.43524169921875, -0.417449951171875, -0.399658203125, -0.381866455078125, -0.36407470703125, -0.346282958984375, -0.3284912109375, -0.310699462890625, -0.29290771484375, -0.275115966796875, -0.25732421875, -0.239532470703125, -0.22174072265625, -0.203948974609375, -0.1861572265625, -0.168365478515625, -0.15057373046875, -0.132781982421875, -0.114990234375, -0.097198486328125, -0.07940673828125, -0.061614990234375, -0.0438232421875, -0.026031494140625, -0.00823974609375, 0.009552001953125, 0.02734375, 0.045135498046875, 0.06292724609375, 0.080718994140625, 0.0985107421875, 0.116302490234375, 0.13409423828125, 0.151885986328125, 0.169677734375, 0.187469482421875, 0.20526123046875, 0.223052978515625, 0.2408447265625, 0.258636474609375, 0.27642822265625, 0.294219970703125, 0.31201171875, 0.329803466796875, 0.34759521484375, 0.365386962890625, 0.3831787109375, 0.400970458984375, 0.41876220703125, 0.436553955078125, 0.454345703125, 0.472137451171875, 0.48992919921875, 0.507720947265625, 0.5255126953125, 0.543304443359375, 0.56109619140625, 0.578887939453125, 0.5966796875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 8.0, 7.0, 5.0, 6.0, 15.0, 11.0, 16.0, 20.0, 16.0, 19.0, 22.0, 21.0, 34.0, 34.0, 43.0, 34.0, 44.0, 56.0, 48.0, 49.0, 42.0, 44.0, 43.0, 46.0, 47.0, 41.0, 23.0, 36.0, 24.0, 39.0, 12.0, 22.0, 10.0, 8.0, 19.0, 6.0, 11.0, 5.0, 9.0, 4.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.375, -0.36243438720703125, -0.3498687744140625, -0.33730316162109375, -0.324737548828125, -0.31217193603515625, -0.2996063232421875, -0.28704071044921875, -0.27447509765625, -0.26190948486328125, -0.2493438720703125, -0.23677825927734375, -0.224212646484375, -0.21164703369140625, -0.1990814208984375, -0.18651580810546875, -0.1739501953125, -0.16138458251953125, -0.1488189697265625, -0.13625335693359375, -0.123687744140625, -0.11112213134765625, -0.0985565185546875, -0.08599090576171875, -0.07342529296875, -0.06085968017578125, -0.0482940673828125, -0.03572845458984375, -0.023162841796875, -0.01059722900390625, 0.0019683837890625, 0.01453399658203125, 0.027099609375, 0.03966522216796875, 0.0522308349609375, 0.06479644775390625, 0.077362060546875, 0.08992767333984375, 0.1024932861328125, 0.11505889892578125, 0.12762451171875, 0.14019012451171875, 0.1527557373046875, 0.16532135009765625, 0.177886962890625, 0.19045257568359375, 0.2030181884765625, 0.21558380126953125, 0.2281494140625, 0.24071502685546875, 0.2532806396484375, 0.26584625244140625, 0.278411865234375, 0.29097747802734375, 0.3035430908203125, 0.31610870361328125, 0.32867431640625, 0.34123992919921875, 0.3538055419921875, 0.36637115478515625, 0.378936767578125, 0.39150238037109375, 0.4040679931640625, 0.41663360595703125, 0.42919921875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 7.0, 4.0, 8.0, 8.0, 11.0, 22.0, 20.0, 32.0, 40.0, 89.0, 137.0, 189.0, 367.0, 689.0, 1485.0, 3825.0, 12736.0, 53562.0, 243581.0, 473372.0, 199019.0, 42895.0, 10389.0, 3252.0, 1288.0, 654.0, 300.0, 200.0, 119.0, 77.0, 52.0, 38.0, 26.0, 15.0, 15.0, 14.0, 8.0, 6.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.332763671875, -0.3218269348144531, -0.31089019775390625, -0.2999534606933594, -0.2890167236328125, -0.2780799865722656, -0.26714324951171875, -0.2562065124511719, -0.245269775390625, -0.23433303833007812, -0.22339630126953125, -0.21245956420898438, -0.2015228271484375, -0.19058609008789062, -0.17964935302734375, -0.16871261596679688, -0.15777587890625, -0.14683914184570312, -0.13590240478515625, -0.12496566772460938, -0.1140289306640625, -0.10309219360351562, -0.09215545654296875, -0.08121871948242188, -0.070281982421875, -0.059345245361328125, -0.04840850830078125, -0.037471771240234375, -0.0265350341796875, -0.015598297119140625, -0.00466156005859375, 0.006275177001953125, 0.0172119140625, 0.028148651123046875, 0.03908538818359375, 0.050022125244140625, 0.0609588623046875, 0.07189559936523438, 0.08283233642578125, 0.09376907348632812, 0.104705810546875, 0.11564254760742188, 0.12657928466796875, 0.13751602172851562, 0.1484527587890625, 0.15938949584960938, 0.17032623291015625, 0.18126296997070312, 0.19219970703125, 0.20313644409179688, 0.21407318115234375, 0.22500991821289062, 0.2359466552734375, 0.24688339233398438, 0.25782012939453125, 0.2687568664550781, 0.279693603515625, 0.2906303405761719, 0.30156707763671875, 0.3125038146972656, 0.3234405517578125, 0.3343772888183594, 0.34531402587890625, 0.3562507629394531, 0.3671875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 3.0, 12.0, 13.0, 15.0, 15.0, 30.0, 52.0, 38.0, 72.0, 91.0, 91.0, 97.0, 87.0, 92.0, 85.0, 59.0, 40.0, 35.0, 18.0, 18.0, 13.0, 11.0, 5.0, 10.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001112222671508789, -0.00010801013559103012, -0.00010479800403118134, -0.00010158587247133255, -9.837374091148376e-05, -9.516160935163498e-05, -9.19494777917862e-05, -8.873734623193741e-05, -8.552521467208862e-05, -8.231308311223984e-05, -7.910095155239105e-05, -7.588881999254227e-05, -7.267668843269348e-05, -6.94645568728447e-05, -6.625242531299591e-05, -6.304029375314713e-05, -5.982816219329834e-05, -5.6616030633449554e-05, -5.340389907360077e-05, -5.0191767513751984e-05, -4.69796359539032e-05, -4.376750439405441e-05, -4.055537283420563e-05, -3.734324127435684e-05, -3.413110971450806e-05, -3.091897815465927e-05, -2.7706846594810486e-05, -2.44947150349617e-05, -2.1282583475112915e-05, -1.807045191526413e-05, -1.4858320355415344e-05, -1.1646188795566559e-05, -8.434057235717773e-06, -5.221925675868988e-06, -2.0097941160202026e-06, 1.2023374438285828e-06, 4.414469003677368e-06, 7.6266005635261536e-06, 1.0838732123374939e-05, 1.4050863683223724e-05, 1.726299524307251e-05, 2.0475126802921295e-05, 2.368725836277008e-05, 2.6899389922618866e-05, 3.011152148246765e-05, 3.332365304231644e-05, 3.653578460216522e-05, 3.974791616201401e-05, 4.296004772186279e-05, 4.617217928171158e-05, 4.9384310841560364e-05, 5.259644240140915e-05, 5.5808573961257935e-05, 5.902070552110672e-05, 6.22328370809555e-05, 6.544496864080429e-05, 6.865710020065308e-05, 7.186923176050186e-05, 7.508136332035065e-05, 7.829349488019943e-05, 8.150562644004822e-05, 8.4717757999897e-05, 8.792988955974579e-05, 9.114202111959457e-05, 9.435415267944336e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 6.0, 3.0, 11.0, 28.0, 34.0, 53.0, 79.0, 172.0, 397.0, 1009.0, 3482.0, 21691.0, 311556.0, 637844.0, 62870.0, 6701.0, 1582.0, 561.0, 218.0, 123.0, 72.0, 32.0, 13.0, 12.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.79931640625, -0.7799186706542969, -0.7605209350585938, -0.7411231994628906, -0.7217254638671875, -0.7023277282714844, -0.6829299926757812, -0.6635322570800781, -0.644134521484375, -0.6247367858886719, -0.6053390502929688, -0.5859413146972656, -0.5665435791015625, -0.5471458435058594, -0.5277481079101562, -0.5083503723144531, -0.48895263671875, -0.4695549011230469, -0.45015716552734375, -0.4307594299316406, -0.4113616943359375, -0.3919639587402344, -0.37256622314453125, -0.3531684875488281, -0.333770751953125, -0.3143730163574219, -0.29497528076171875, -0.2755775451660156, -0.2561798095703125, -0.23678207397460938, -0.21738433837890625, -0.19798660278320312, -0.1785888671875, -0.15919113159179688, -0.13979339599609375, -0.12039566040039062, -0.1009979248046875, -0.08160018920898438, -0.06220245361328125, -0.042804718017578125, -0.023406982421875, -0.004009246826171875, 0.01538848876953125, 0.034786224365234375, 0.0541839599609375, 0.07358169555664062, 0.09297943115234375, 0.11237716674804688, 0.13177490234375, 0.15117263793945312, 0.17057037353515625, 0.18996810913085938, 0.2093658447265625, 0.22876358032226562, 0.24816131591796875, 0.2675590515136719, 0.286956787109375, 0.3063545227050781, 0.32575225830078125, 0.3451499938964844, 0.3645477294921875, 0.3839454650878906, 0.40334320068359375, 0.4227409362792969, 0.442138671875]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 6.0, 5.0, 6.0, 8.0, 24.0, 35.0, 32.0, 59.0, 91.0, 114.0, 112.0, 139.0, 112.0, 102.0, 69.0, 35.0, 24.0, 21.0, 12.0, 5.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.19970703125, -0.18547821044921875, -0.1712493896484375, -0.15702056884765625, -0.142791748046875, -0.12856292724609375, -0.1143341064453125, -0.10010528564453125, -0.08587646484375, -0.07164764404296875, -0.0574188232421875, -0.04319000244140625, -0.028961181640625, -0.01473236083984375, -0.0005035400390625, 0.01372528076171875, 0.0279541015625, 0.04218292236328125, 0.0564117431640625, 0.07064056396484375, 0.084869384765625, 0.09909820556640625, 0.1133270263671875, 0.12755584716796875, 0.14178466796875, 0.15601348876953125, 0.1702423095703125, 0.18447113037109375, 0.198699951171875, 0.21292877197265625, 0.2271575927734375, 0.24138641357421875, 0.255615234375, 0.26984405517578125, 0.2840728759765625, 0.29830169677734375, 0.312530517578125, 0.32675933837890625, 0.3409881591796875, 0.35521697998046875, 0.36944580078125, 0.38367462158203125, 0.3979034423828125, 0.41213226318359375, 0.426361083984375, 0.44058990478515625, 0.4548187255859375, 0.46904754638671875, 0.4832763671875, 0.49750518798828125, 0.5117340087890625, 0.5259628295898438, 0.540191650390625, 0.5544204711914062, 0.5686492919921875, 0.5828781127929688, 0.59710693359375, 0.6113357543945312, 0.6255645751953125, 0.6397933959960938, 0.654022216796875, 0.6682510375976562, 0.6824798583984375, 0.6967086791992188, 0.7109375]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 13.0, 20.0, 73.0, 175.0, 242.0, 237.0, 163.0, 50.0, 21.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.493064880371094, -10.223384857177734, -9.953704833984375, -9.684024810791016, -9.414344787597656, -9.144664764404297, -8.874984741210938, -8.605304718017578, -8.335624694824219, -8.06594467163086, -7.7962646484375, -7.526584625244141, -7.256904602050781, -6.987224578857422, -6.7175445556640625, -6.447864532470703, -6.178184509277344, -5.908504486083984, -5.638824462890625, -5.369144439697266, -5.099464416503906, -4.829784393310547, -4.5601043701171875, -4.290424346923828, -4.020744323730469, -3.7510643005371094, -3.48138427734375, -3.2117042541503906, -2.9420242309570312, -2.672344207763672, -2.4026641845703125, -2.132984161376953, -1.8633041381835938, -1.5936241149902344, -1.323944091796875, -1.0542640686035156, -0.7845840454101562, -0.5149040222167969, -0.2452239990234375, 0.024456024169921875, 0.29413604736328125, 0.5638160705566406, 0.83349609375, 1.1031761169433594, 1.3728561401367188, 1.6425361633300781, 1.9122161865234375, 2.181896209716797, 2.4515762329101562, 2.7212562561035156, 2.990936279296875, 3.2606163024902344, 3.5302963256835938, 3.799976348876953, 4.0696563720703125, 4.339336395263672, 4.609016418457031, 4.878696441650391, 5.14837646484375, 5.418056488037109, 5.687736511230469, 5.957416534423828, 6.2270965576171875, 6.496776580810547, 6.766456604003906]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 7.0, 8.0, 6.0, 13.0, 11.0, 19.0, 13.0, 27.0, 29.0, 30.0, 46.0, 50.0, 44.0, 57.0, 62.0, 69.0, 50.0, 59.0, 57.0, 68.0, 51.0, 42.0, 38.0, 35.0, 26.0, 27.0, 13.0, 12.0, 11.0, 5.0, 4.0, 9.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0462570190429688, -2.949765920639038, -2.8532748222351074, -2.7567834854125977, -2.660292387008667, -2.5638012886047363, -2.4673101902008057, -2.370819091796875, -2.2743279933929443, -2.1778368949890137, -2.081345796585083, -1.9848545789718628, -1.8883634805679321, -1.791872262954712, -1.6953811645507812, -1.5988900661468506, -1.5023988485336304, -1.4059077501296997, -1.3094165325164795, -1.2129254341125488, -1.1164343357086182, -1.0199432373046875, -0.9234520196914673, -0.8269609212875366, -0.7304697632789612, -0.6339786052703857, -0.5374875068664551, -0.44099634885787964, -0.3445052206516266, -0.24801409244537354, -0.1515229344367981, -0.05503183603286743, 0.04145932197570801, 0.13795045018196106, 0.2344415932893753, 0.33093273639678955, 0.4274238646030426, 0.5239149928092957, 0.6204061508178711, 0.7168972492218018, 0.8133884072303772, 0.9098795652389526, 1.0063706636428833, 1.1028618812561035, 1.1993529796600342, 1.2958440780639648, 1.3923351764678955, 1.4888262748718262, 1.5853174924850464, 1.681808590888977, 1.7782998085021973, 1.874790906906128, 1.9712820053100586, 2.0677731037139893, 2.16426420211792, 2.2607555389404297, 2.3572466373443604, 2.453737735748291, 2.5502288341522217, 2.6467199325561523, 2.743211269378662, 2.8397023677825928, 2.9361934661865234, 3.032684564590454, 3.1291756629943848]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 6.0, 7.0, 5.0, 11.0, 7.0, 14.0, 18.0, 12.0, 27.0, 32.0, 46.0, 50.0, 77.0, 100.0, 145.0, 200.0, 326.0, 447.0, 798.0, 1893.0, 6126.0, 34675.0, 1125523.0, 2970950.0, 41162.0, 7091.0, 2151.0, 945.0, 485.0, 315.0, 191.0, 133.0, 69.0, 70.0, 53.0, 37.0, 17.0, 19.0, 11.0, 6.0, 8.0, 5.0, 5.0, 6.0, 0.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.078125, -2.0170745849609375, -1.956024169921875, -1.8949737548828125, -1.83392333984375, -1.7728729248046875, -1.711822509765625, -1.6507720947265625, -1.5897216796875, -1.5286712646484375, -1.467620849609375, -1.4065704345703125, -1.34552001953125, -1.2844696044921875, -1.223419189453125, -1.1623687744140625, -1.101318359375, -1.0402679443359375, -0.979217529296875, -0.9181671142578125, -0.85711669921875, -0.7960662841796875, -0.735015869140625, -0.6739654541015625, -0.6129150390625, -0.5518646240234375, -0.490814208984375, -0.4297637939453125, -0.36871337890625, -0.3076629638671875, -0.246612548828125, -0.1855621337890625, -0.12451171875, -0.0634613037109375, -0.002410888671875, 0.0586395263671875, 0.11968994140625, 0.1807403564453125, 0.241790771484375, 0.3028411865234375, 0.3638916015625, 0.4249420166015625, 0.485992431640625, 0.5470428466796875, 0.60809326171875, 0.6691436767578125, 0.730194091796875, 0.7912445068359375, 0.852294921875, 0.9133453369140625, 0.974395751953125, 1.0354461669921875, 1.09649658203125, 1.1575469970703125, 1.218597412109375, 1.2796478271484375, 1.3406982421875, 1.4017486572265625, 1.462799072265625, 1.5238494873046875, 1.58489990234375, 1.6459503173828125, 1.707000732421875, 1.7680511474609375, 1.8291015625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 7.0, 12.0, 23.0, 29.0, 45.0, 59.0, 75.0, 102.0, 108.0, 112.0, 116.0, 97.0, 62.0, 50.0, 34.0, 33.0, 19.0, 7.0, 5.0, 4.0, 3.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.269287109375, -0.2627716064453125, -0.256256103515625, -0.2497406005859375, -0.24322509765625, -0.2367095947265625, -0.230194091796875, -0.2236785888671875, -0.2171630859375, -0.2106475830078125, -0.204132080078125, -0.1976165771484375, -0.19110107421875, -0.1845855712890625, -0.178070068359375, -0.1715545654296875, -0.1650390625, -0.1585235595703125, -0.152008056640625, -0.1454925537109375, -0.13897705078125, -0.1324615478515625, -0.125946044921875, -0.1194305419921875, -0.1129150390625, -0.1063995361328125, -0.099884033203125, -0.0933685302734375, -0.08685302734375, -0.0803375244140625, -0.073822021484375, -0.0673065185546875, -0.060791015625, -0.0542755126953125, -0.047760009765625, -0.0412445068359375, -0.03472900390625, -0.0282135009765625, -0.021697998046875, -0.0151824951171875, -0.0086669921875, -0.0021514892578125, 0.004364013671875, 0.0108795166015625, 0.01739501953125, 0.0239105224609375, 0.030426025390625, 0.0369415283203125, 0.04345703125, 0.0499725341796875, 0.056488037109375, 0.0630035400390625, 0.06951904296875, 0.0760345458984375, 0.082550048828125, 0.0890655517578125, 0.0955810546875, 0.1020965576171875, 0.108612060546875, 0.1151275634765625, 0.12164306640625, 0.1281585693359375, 0.134674072265625, 0.1411895751953125, 0.147705078125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 3.0, 3.0, 10.0, 15.0, 14.0, 19.0, 30.0, 57.0, 99.0, 138.0, 242.0, 419.0, 903.0, 2599.0, 9205.0, 44810.0, 512200.0, 3480231.0, 116132.0, 19412.0, 4903.0, 1457.0, 603.0, 278.0, 178.0, 99.0, 79.0, 37.0, 32.0, 28.0, 13.0, 12.0, 8.0, 3.0, 5.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.2001953125, -1.1629638671875, -1.125732421875, -1.0885009765625, -1.05126953125, -1.0140380859375, -0.976806640625, -0.9395751953125, -0.90234375, -0.8651123046875, -0.827880859375, -0.7906494140625, -0.75341796875, -0.7161865234375, -0.678955078125, -0.6417236328125, -0.6044921875, -0.5672607421875, -0.530029296875, -0.4927978515625, -0.45556640625, -0.4183349609375, -0.381103515625, -0.3438720703125, -0.306640625, -0.2694091796875, -0.232177734375, -0.1949462890625, -0.15771484375, -0.1204833984375, -0.083251953125, -0.0460205078125, -0.0087890625, 0.0284423828125, 0.065673828125, 0.1029052734375, 0.14013671875, 0.1773681640625, 0.214599609375, 0.2518310546875, 0.2890625, 0.3262939453125, 0.363525390625, 0.4007568359375, 0.43798828125, 0.4752197265625, 0.512451171875, 0.5496826171875, 0.5869140625, 0.6241455078125, 0.661376953125, 0.6986083984375, 0.73583984375, 0.7730712890625, 0.810302734375, 0.8475341796875, 0.884765625, 0.9219970703125, 0.959228515625, 0.9964599609375, 1.03369140625, 1.0709228515625, 1.108154296875, 1.1453857421875, 1.1826171875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 7.0, 9.0, 14.0, 11.0, 30.0, 32.0, 74.0, 136.0, 271.0, 683.0, 1763.0, 508.0, 215.0, 110.0, 77.0, 53.0, 23.0, 25.0, 14.0, 9.0, 7.0, 7.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.333984375, -0.32131195068359375, -0.3086395263671875, -0.29596710205078125, -0.283294677734375, -0.27062225341796875, -0.2579498291015625, -0.24527740478515625, -0.23260498046875, -0.21993255615234375, -0.2072601318359375, -0.19458770751953125, -0.181915283203125, -0.16924285888671875, -0.1565704345703125, -0.14389801025390625, -0.1312255859375, -0.11855316162109375, -0.1058807373046875, -0.09320831298828125, -0.080535888671875, -0.06786346435546875, -0.0551910400390625, -0.04251861572265625, -0.02984619140625, -0.01717376708984375, -0.0045013427734375, 0.00817108154296875, 0.020843505859375, 0.03351593017578125, 0.0461883544921875, 0.05886077880859375, 0.071533203125, 0.08420562744140625, 0.0968780517578125, 0.10955047607421875, 0.122222900390625, 0.13489532470703125, 0.1475677490234375, 0.16024017333984375, 0.17291259765625, 0.18558502197265625, 0.1982574462890625, 0.21092987060546875, 0.223602294921875, 0.23627471923828125, 0.2489471435546875, 0.26161956787109375, 0.2742919921875, 0.28696441650390625, 0.2996368408203125, 0.31230926513671875, 0.324981689453125, 0.33765411376953125, 0.3503265380859375, 0.36299896240234375, 0.37567138671875, 0.38834381103515625, 0.4010162353515625, 0.41368865966796875, 0.426361083984375, 0.43903350830078125, 0.4517059326171875, 0.46437835693359375, 0.47705078125]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 4.0, 7.0, 14.0, 25.0, 51.0, 86.0, 166.0, 191.0, 153.0, 124.0, 83.0, 46.0, 28.0, 6.0, 8.0, 2.0, 10.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3724277019500732, -2.2828047275543213, -2.1931819915771484, -2.1035590171813965, -2.0139360427856445, -1.9243131875991821, -1.8346903324127197, -1.7450673580169678, -1.6554445028305054, -1.565821647644043, -1.476198673248291, -1.3865758180618286, -1.2969529628753662, -1.2073299884796143, -1.1177071332931519, -1.0280842781066895, -0.9384613037109375, -0.8488383889198303, -0.7592154741287231, -0.6695926189422607, -0.5799697041511536, -0.4903467893600464, -0.400723934173584, -0.3111010193824768, -0.22147810459136963, -0.13185520470142365, -0.04223230481147766, 0.04739058017730713, 0.1370134949684143, 0.22663640975952148, 0.3162592649459839, 0.40588217973709106, 0.49550509452819824, 0.5851280093193054, 0.6747509241104126, 0.764373779296875, 0.8539966940879822, 0.9436196088790894, 1.0332424640655518, 1.1228654384613037, 1.2124882936477661, 1.3021111488342285, 1.3917341232299805, 1.4813569784164429, 1.5709798336029053, 1.6606028079986572, 1.7502256631851196, 1.839848518371582, 1.929471492767334, 2.019094467163086, 2.108717203140259, 2.1983401775360107, 2.2879631519317627, 2.3775858879089355, 2.4672088623046875, 2.5568318367004395, 2.6464548110961914, 2.7360777854919434, 2.825700521469116, 2.915323495864868, 3.00494647026062, 3.094569206237793, 3.184192180633545, 3.273815155029297, 3.3634378910064697]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 9.0, 9.0, 9.0, 17.0, 17.0, 25.0, 22.0, 30.0, 35.0, 35.0, 43.0, 50.0, 56.0, 57.0, 54.0, 61.0, 69.0, 59.0, 51.0, 56.0, 39.0, 41.0, 38.0, 24.0, 33.0, 16.0, 15.0, 11.0, 8.0, 10.0, 2.0, 3.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2504863739013672, -1.208534598350525, -1.166582703590393, -1.1246309280395508, -1.082679033279419, -1.0407272577285767, -0.9987754225730896, -0.9568235874176025, -0.9148717522621155, -0.8729199171066284, -0.8309680819511414, -0.7890162467956543, -0.747064471244812, -0.7051125764846802, -0.6631608009338379, -0.6212089657783508, -0.5792571306228638, -0.5373052954673767, -0.49535346031188965, -0.453401654958725, -0.4114498198032379, -0.36949798464775085, -0.3275461792945862, -0.2855943441390991, -0.24364250898361206, -0.201690673828125, -0.15973885357379913, -0.11778703331947327, -0.0758351981639862, -0.033883363008499146, 0.008068442344665527, 0.05002027750015259, 0.09197211265563965, 0.1339239478111267, 0.17587576806545258, 0.21782758831977844, 0.2597794234752655, 0.30173125863075256, 0.34368306398391724, 0.3856348991394043, 0.42758673429489136, 0.4695385694503784, 0.5114904046058655, 0.5534422397613525, 0.5953940153121948, 0.6373459100723267, 0.679297685623169, 0.721249520778656, 0.7632013559341431, 0.8051531910896301, 0.8471050262451172, 0.8890568017959595, 0.9310086965560913, 0.9729604721069336, 1.0149123668670654, 1.0568641424179077, 1.09881591796875, 1.1407676935195923, 1.1827195882797241, 1.2246713638305664, 1.2666232585906982, 1.3085750341415405, 1.3505268096923828, 1.3924787044525146, 1.4344305992126465]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 5.0, 9.0, 6.0, 10.0, 12.0, 15.0, 32.0, 29.0, 43.0, 66.0, 103.0, 130.0, 229.0, 365.0, 608.0, 1338.0, 3888.0, 20911.0, 260750.0, 678350.0, 69107.0, 8355.0, 2150.0, 815.0, 441.0, 264.0, 138.0, 130.0, 74.0, 57.0, 30.0, 18.0, 23.0, 15.0, 10.0, 4.0, 11.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.763671875, -1.71063232421875, -1.6575927734375, -1.60455322265625, -1.551513671875, -1.49847412109375, -1.4454345703125, -1.39239501953125, -1.33935546875, -1.28631591796875, -1.2332763671875, -1.18023681640625, -1.127197265625, -1.07415771484375, -1.0211181640625, -0.96807861328125, -0.9150390625, -0.86199951171875, -0.8089599609375, -0.75592041015625, -0.702880859375, -0.64984130859375, -0.5968017578125, -0.54376220703125, -0.49072265625, -0.43768310546875, -0.3846435546875, -0.33160400390625, -0.278564453125, -0.22552490234375, -0.1724853515625, -0.11944580078125, -0.06640625, -0.01336669921875, 0.0396728515625, 0.09271240234375, 0.145751953125, 0.19879150390625, 0.2518310546875, 0.30487060546875, 0.35791015625, 0.41094970703125, 0.4639892578125, 0.51702880859375, 0.570068359375, 0.62310791015625, 0.6761474609375, 0.72918701171875, 0.7822265625, 0.83526611328125, 0.8883056640625, 0.94134521484375, 0.994384765625, 1.04742431640625, 1.1004638671875, 1.15350341796875, 1.20654296875, 1.25958251953125, 1.3126220703125, 1.36566162109375, 1.418701171875, 1.47174072265625, 1.5247802734375, 1.57781982421875, 1.630859375]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 4.0, 8.0, 13.0, 16.0, 39.0, 55.0, 69.0, 67.0, 93.0, 114.0, 97.0, 118.0, 75.0, 69.0, 47.0, 37.0, 39.0, 14.0, 11.0, 8.0, 6.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.264404296875, -0.25766563415527344, -0.2509269714355469, -0.2441883087158203, -0.23744964599609375, -0.2307109832763672, -0.22397232055664062, -0.21723365783691406, -0.2104949951171875, -0.20375633239746094, -0.19701766967773438, -0.1902790069580078, -0.18354034423828125, -0.1768016815185547, -0.17006301879882812, -0.16332435607910156, -0.156585693359375, -0.14984703063964844, -0.14310836791992188, -0.1363697052001953, -0.12963104248046875, -0.12289237976074219, -0.11615371704101562, -0.10941505432128906, -0.1026763916015625, -0.09593772888183594, -0.08919906616210938, -0.08246040344238281, -0.07572174072265625, -0.06898307800292969, -0.062244415283203125, -0.05550575256347656, -0.04876708984375, -0.04202842712402344, -0.035289764404296875, -0.028551101684570312, -0.02181243896484375, -0.015073776245117188, -0.008335113525390625, -0.0015964508056640625, 0.0051422119140625, 0.011880874633789062, 0.018619537353515625, 0.025358200073242188, 0.03209686279296875, 0.03883552551269531, 0.045574188232421875, 0.05231285095214844, 0.059051513671875, 0.06579017639160156, 0.07252883911132812, 0.07926750183105469, 0.08600616455078125, 0.09274482727050781, 0.09948348999023438, 0.10622215270996094, 0.1129608154296875, 0.11969947814941406, 0.12643814086914062, 0.1331768035888672, 0.13991546630859375, 0.1466541290283203, 0.15339279174804688, 0.16013145446777344, 0.1668701171875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 2.0, 6.0, 4.0, 9.0, 15.0, 14.0, 11.0, 19.0, 34.0, 35.0, 46.0, 70.0, 110.0, 146.0, 208.0, 355.0, 598.0, 1084.0, 2602.0, 7035.0, 23461.0, 89649.0, 324858.0, 411001.0, 135891.0, 34574.0, 9980.0, 3304.0, 1447.0, 681.0, 404.0, 257.0, 196.0, 126.0, 94.0, 65.0, 37.0, 47.0, 23.0, 13.0, 11.0, 9.0, 7.0, 8.0, 3.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.6533203125, -0.6323471069335938, -0.6113739013671875, -0.5904006958007812, -0.569427490234375, -0.5484542846679688, -0.5274810791015625, -0.5065078735351562, -0.48553466796875, -0.46456146240234375, -0.4435882568359375, -0.42261505126953125, -0.401641845703125, -0.38066864013671875, -0.3596954345703125, -0.33872222900390625, -0.3177490234375, -0.29677581787109375, -0.2758026123046875, -0.25482940673828125, -0.233856201171875, -0.21288299560546875, -0.1919097900390625, -0.17093658447265625, -0.14996337890625, -0.12899017333984375, -0.1080169677734375, -0.08704376220703125, -0.066070556640625, -0.04509735107421875, -0.0241241455078125, -0.00315093994140625, 0.017822265625, 0.03879547119140625, 0.0597686767578125, 0.08074188232421875, 0.101715087890625, 0.12268829345703125, 0.1436614990234375, 0.16463470458984375, 0.18560791015625, 0.20658111572265625, 0.2275543212890625, 0.24852752685546875, 0.269500732421875, 0.29047393798828125, 0.3114471435546875, 0.33242034912109375, 0.3533935546875, 0.37436676025390625, 0.3953399658203125, 0.41631317138671875, 0.437286376953125, 0.45825958251953125, 0.4792327880859375, 0.5002059936523438, 0.52117919921875, 0.5421524047851562, 0.5631256103515625, 0.5840988159179688, 0.605072021484375, 0.6260452270507812, 0.6470184326171875, 0.6679916381835938, 0.68896484375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 7.0, 9.0, 10.0, 14.0, 25.0, 24.0, 26.0, 28.0, 37.0, 39.0, 45.0, 35.0, 51.0, 44.0, 44.0, 64.0, 45.0, 64.0, 47.0, 37.0, 43.0, 29.0, 48.0, 28.0, 32.0, 23.0, 22.0, 16.0, 19.0, 12.0, 8.0, 8.0, 4.0, 2.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.474365234375, -0.4579505920410156, -0.44153594970703125, -0.4251213073730469, -0.4087066650390625, -0.3922920227050781, -0.37587738037109375, -0.3594627380371094, -0.343048095703125, -0.3266334533691406, -0.31021881103515625, -0.2938041687011719, -0.2773895263671875, -0.2609748840332031, -0.24456024169921875, -0.22814559936523438, -0.21173095703125, -0.19531631469726562, -0.17890167236328125, -0.16248703002929688, -0.1460723876953125, -0.12965774536132812, -0.11324310302734375, -0.09682846069335938, -0.080413818359375, -0.06399917602539062, -0.04758453369140625, -0.031169891357421875, -0.0147552490234375, 0.001659393310546875, 0.01807403564453125, 0.034488677978515625, 0.0509033203125, 0.06731796264648438, 0.08373260498046875, 0.10014724731445312, 0.1165618896484375, 0.13297653198242188, 0.14939117431640625, 0.16580581665039062, 0.182220458984375, 0.19863510131835938, 0.21504974365234375, 0.23146438598632812, 0.2478790283203125, 0.2642936706542969, 0.28070831298828125, 0.2971229553222656, 0.31353759765625, 0.3299522399902344, 0.34636688232421875, 0.3627815246582031, 0.3791961669921875, 0.3956108093261719, 0.41202545166015625, 0.4284400939941406, 0.444854736328125, 0.4612693786621094, 0.47768402099609375, 0.4940986633300781, 0.5105133056640625, 0.5269279479980469, 0.5433425903320312, 0.5597572326660156, 0.576171875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 3.0, 1.0, 5.0, 6.0, 10.0, 11.0, 17.0, 17.0, 53.0, 59.0, 100.0, 189.0, 264.0, 495.0, 956.0, 2007.0, 4832.0, 13599.0, 46829.0, 181553.0, 429887.0, 264842.0, 71769.0, 19240.0, 6529.0, 2555.0, 1259.0, 596.0, 357.0, 195.0, 102.0, 77.0, 48.0, 27.0, 18.0, 14.0, 7.0, 5.0, 7.0, 5.0, 1.0, 1.0, 3.0, 3.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.27099609375, -0.2615394592285156, -0.25208282470703125, -0.24262619018554688, -0.2331695556640625, -0.22371292114257812, -0.21425628662109375, -0.20479965209960938, -0.195343017578125, -0.18588638305664062, -0.17642974853515625, -0.16697311401367188, -0.1575164794921875, -0.14805984497070312, -0.13860321044921875, -0.12914657592773438, -0.11968994140625, -0.11023330688476562, -0.10077667236328125, -0.09132003784179688, -0.0818634033203125, -0.07240676879882812, -0.06295013427734375, -0.053493499755859375, -0.044036865234375, -0.034580230712890625, -0.02512359619140625, -0.015666961669921875, -0.0062103271484375, 0.003246307373046875, 0.01270294189453125, 0.022159576416015625, 0.0316162109375, 0.041072845458984375, 0.05052947998046875, 0.059986114501953125, 0.0694427490234375, 0.07889938354492188, 0.08835601806640625, 0.09781265258789062, 0.107269287109375, 0.11672592163085938, 0.12618255615234375, 0.13563919067382812, 0.1450958251953125, 0.15455245971679688, 0.16400909423828125, 0.17346572875976562, 0.18292236328125, 0.19237899780273438, 0.20183563232421875, 0.21129226684570312, 0.2207489013671875, 0.23020553588867188, 0.23966217041015625, 0.24911880493164062, 0.258575439453125, 0.2680320739746094, 0.27748870849609375, 0.2869453430175781, 0.2964019775390625, 0.3058586120605469, 0.31531524658203125, 0.3247718811035156, 0.334228515625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 8.0, 2.0, 1.0, 1.0, 3.0, 9.0, 7.0, 8.0, 10.0, 4.0, 7.0, 13.0, 8.0, 12.0, 15.0, 20.0, 30.0, 32.0, 53.0, 55.0, 66.0, 61.0, 68.0, 79.0, 91.0, 70.0, 46.0, 35.0, 31.0, 24.0, 23.0, 17.0, 24.0, 10.0, 10.0, 5.0, 7.0, 7.0, 8.0, 7.0, 5.0, 3.0, 4.0, 4.0, 5.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.547306060791016e-05, -8.297152817249298e-05, -8.04699957370758e-05, -7.796846330165863e-05, -7.546693086624146e-05, -7.296539843082428e-05, -7.04638659954071e-05, -6.796233355998993e-05, -6.546080112457275e-05, -6.295926868915558e-05, -6.04577362537384e-05, -5.795620381832123e-05, -5.545467138290405e-05, -5.295313894748688e-05, -5.04516065120697e-05, -4.795007407665253e-05, -4.544854164123535e-05, -4.2947009205818176e-05, -4.0445476770401e-05, -3.7943944334983826e-05, -3.544241189956665e-05, -3.2940879464149475e-05, -3.04393470287323e-05, -2.7937814593315125e-05, -2.543628215789795e-05, -2.2934749722480774e-05, -2.04332172870636e-05, -1.7931684851646423e-05, -1.5430152416229248e-05, -1.2928619980812073e-05, -1.0427087545394897e-05, -7.925555109977722e-06, -5.424022674560547e-06, -2.9224902391433716e-06, -4.209578037261963e-07, 2.080574631690979e-06, 4.582107067108154e-06, 7.08363950252533e-06, 9.585171937942505e-06, 1.208670437335968e-05, 1.4588236808776855e-05, 1.708976924419403e-05, 1.9591301679611206e-05, 2.209283411502838e-05, 2.4594366550445557e-05, 2.7095898985862732e-05, 2.9597431421279907e-05, 3.209896385669708e-05, 3.460049629211426e-05, 3.710202872753143e-05, 3.960356116294861e-05, 4.2105093598365784e-05, 4.460662603378296e-05, 4.7108158469200134e-05, 4.960969090461731e-05, 5.2111223340034485e-05, 5.461275577545166e-05, 5.7114288210868835e-05, 5.961582064628601e-05, 6.211735308170319e-05, 6.461888551712036e-05, 6.712041795253754e-05, 6.962195038795471e-05, 7.212348282337189e-05, 7.462501525878906e-05]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 4.0, 2.0, 6.0, 11.0, 11.0, 18.0, 29.0, 32.0, 60.0, 75.0, 130.0, 180.0, 335.0, 558.0, 1227.0, 2825.0, 9017.0, 38178.0, 204187.0, 519927.0, 215845.0, 40807.0, 9389.0, 2976.0, 1254.0, 595.0, 300.0, 208.0, 109.0, 76.0, 48.0, 45.0, 27.0, 13.0, 13.0, 7.0, 10.0, 2.0, 4.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.341064453125, -0.3295097351074219, -0.31795501708984375, -0.3064002990722656, -0.2948455810546875, -0.2832908630371094, -0.27173614501953125, -0.2601814270019531, -0.248626708984375, -0.23707199096679688, -0.22551727294921875, -0.21396255493164062, -0.2024078369140625, -0.19085311889648438, -0.17929840087890625, -0.16774368286132812, -0.15618896484375, -0.14463424682617188, -0.13307952880859375, -0.12152481079101562, -0.1099700927734375, -0.09841537475585938, -0.08686065673828125, -0.07530593872070312, -0.063751220703125, -0.052196502685546875, -0.04064178466796875, -0.029087066650390625, -0.0175323486328125, -0.005977630615234375, 0.00557708740234375, 0.017131805419921875, 0.0286865234375, 0.040241241455078125, 0.05179595947265625, 0.06335067749023438, 0.0749053955078125, 0.08646011352539062, 0.09801483154296875, 0.10956954956054688, 0.121124267578125, 0.13267898559570312, 0.14423370361328125, 0.15578842163085938, 0.1673431396484375, 0.17889785766601562, 0.19045257568359375, 0.20200729370117188, 0.21356201171875, 0.22511672973632812, 0.23667144775390625, 0.24822616577148438, 0.2597808837890625, 0.2713356018066406, 0.28289031982421875, 0.2944450378417969, 0.305999755859375, 0.3175544738769531, 0.32910919189453125, 0.3406639099121094, 0.3522186279296875, 0.3637733459472656, 0.37532806396484375, 0.3868827819824219, 0.3984375]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 1.0, 2.0, 3.0, 10.0, 9.0, 17.0, 18.0, 22.0, 35.0, 62.0, 47.0, 69.0, 80.0, 92.0, 101.0, 82.0, 72.0, 71.0, 62.0, 46.0, 31.0, 23.0, 18.0, 8.0, 12.0, 11.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.46923828125, -0.4588775634765625, -0.448516845703125, -0.4381561279296875, -0.42779541015625, -0.4174346923828125, -0.407073974609375, -0.3967132568359375, -0.3863525390625, -0.3759918212890625, -0.365631103515625, -0.3552703857421875, -0.34490966796875, -0.3345489501953125, -0.324188232421875, -0.3138275146484375, -0.303466796875, -0.2931060791015625, -0.282745361328125, -0.2723846435546875, -0.26202392578125, -0.2516632080078125, -0.241302490234375, -0.2309417724609375, -0.2205810546875, -0.2102203369140625, -0.199859619140625, -0.1894989013671875, -0.17913818359375, -0.1687774658203125, -0.158416748046875, -0.1480560302734375, -0.1376953125, -0.1273345947265625, -0.116973876953125, -0.1066131591796875, -0.09625244140625, -0.0858917236328125, -0.075531005859375, -0.0651702880859375, -0.0548095703125, -0.0444488525390625, -0.034088134765625, -0.0237274169921875, -0.01336669921875, -0.0030059814453125, 0.007354736328125, 0.0177154541015625, 0.028076171875, 0.0384368896484375, 0.048797607421875, 0.0591583251953125, 0.06951904296875, 0.0798797607421875, 0.090240478515625, 0.1006011962890625, 0.1109619140625, 0.1213226318359375, 0.131683349609375, 0.1420440673828125, 0.15240478515625, 0.1627655029296875, 0.173126220703125, 0.1834869384765625, 0.19384765625]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 7.0, 11.0, 23.0, 30.0, 61.0, 97.0, 140.0, 148.0, 163.0, 108.0, 96.0, 60.0, 25.0, 16.0, 10.0, 5.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.731440544128418, -4.548333168029785, -4.3652262687683105, -4.182119369506836, -3.999011993408203, -3.8159048557281494, -3.6327977180480957, -3.449690580368042, -3.2665834426879883, -3.0834763050079346, -2.900369167327881, -2.717262029647827, -2.5341548919677734, -2.3510477542877197, -2.167940616607666, -1.9848334789276123, -1.8017263412475586, -1.6186192035675049, -1.4355120658874512, -1.2524049282073975, -1.0692977905273438, -0.88619065284729, -0.7030835151672363, -0.5199763774871826, -0.3368692398071289, -0.1537621021270752, 0.029345035552978516, 0.21245217323303223, 0.39555931091308594, 0.5786664485931396, 0.7617735862731934, 0.9448807239532471, 1.127988338470459, 1.3110954761505127, 1.4942026138305664, 1.6773097515106201, 1.8604168891906738, 2.0435240268707275, 2.2266311645507812, 2.409738302230835, 2.5928454399108887, 2.7759525775909424, 2.959059715270996, 3.14216685295105, 3.3252739906311035, 3.5083811283111572, 3.691488265991211, 3.8745954036712646, 4.057702541351318, 4.240809440612793, 4.423916816711426, 4.607024192810059, 4.790131092071533, 4.973237991333008, 5.156345367431641, 5.339452743530273, 5.522559642791748, 5.705666542053223, 5.8887739181518555, 6.071881294250488, 6.254988193511963, 6.4380950927734375, 6.62120246887207, 6.804309844970703, 6.987416744232178]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 4.0, 6.0, 2.0, 6.0, 4.0, 10.0, 9.0, 14.0, 16.0, 22.0, 19.0, 14.0, 23.0, 27.0, 28.0, 36.0, 55.0, 46.0, 39.0, 48.0, 35.0, 49.0, 47.0, 50.0, 56.0, 39.0, 61.0, 38.0, 28.0, 28.0, 28.0, 28.0, 17.0, 14.0, 13.0, 12.0, 3.0, 9.0, 8.0, 6.0, 3.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8268818855285645, -2.734602451324463, -2.6423230171203613, -2.5500435829162598, -2.457764148712158, -2.3654849529266357, -2.273205518722534, -2.1809260845184326, -2.088646650314331, -1.9963672161102295, -1.904087781906128, -1.811808466911316, -1.7195290327072144, -1.6272495985031128, -1.5349702835083008, -1.4426908493041992, -1.3504114151000977, -1.258131980895996, -1.1658525466918945, -1.0735732316970825, -0.981293797492981, -0.8890143632888794, -0.7967349886894226, -0.7044556140899658, -0.6121761798858643, -0.5198967456817627, -0.4276173710823059, -0.33533796668052673, -0.24305856227874756, -0.15077915787696838, -0.05849975347518921, 0.03377962112426758, 0.12605905532836914, 0.21833845973014832, 0.3106178641319275, 0.40289726853370667, 0.49517667293548584, 0.5874561071395874, 0.6797354817390442, 0.772014856338501, 0.8642942905426025, 0.9565737247467041, 1.0488531589508057, 1.1411324739456177, 1.2334119081497192, 1.3256913423538208, 1.4179706573486328, 1.5102500915527344, 1.602529525756836, 1.6948089599609375, 1.787088394165039, 1.879367709159851, 1.9716471433639526, 2.0639264583587646, 2.156205892562866, 2.2484853267669678, 2.3407647609710693, 2.433044195175171, 2.5253236293792725, 2.617603063583374, 2.7098822593688965, 2.802161693572998, 2.8944411277770996, 2.986720561981201, 3.0789999961853027]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 4.0, 9.0, 12.0, 11.0, 12.0, 22.0, 23.0, 35.0, 34.0, 50.0, 81.0, 94.0, 172.0, 222.0, 379.0, 703.0, 1564.0, 5188.0, 35809.0, 2444908.0, 1662321.0, 33675.0, 5259.0, 1711.0, 783.0, 438.0, 238.0, 177.0, 102.0, 73.0, 43.0, 41.0, 21.0, 20.0, 14.0, 10.0, 8.0, 8.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.1875, -2.123138427734375, -2.05877685546875, -1.994415283203125, -1.9300537109375, -1.865692138671875, -1.80133056640625, -1.736968994140625, -1.672607421875, -1.608245849609375, -1.54388427734375, -1.479522705078125, -1.4151611328125, -1.350799560546875, -1.28643798828125, -1.222076416015625, -1.15771484375, -1.093353271484375, -1.02899169921875, -0.964630126953125, -0.9002685546875, -0.835906982421875, -0.77154541015625, -0.707183837890625, -0.642822265625, -0.578460693359375, -0.51409912109375, -0.449737548828125, -0.3853759765625, -0.321014404296875, -0.25665283203125, -0.192291259765625, -0.1279296875, -0.063568115234375, 0.00079345703125, 0.065155029296875, 0.1295166015625, 0.193878173828125, 0.25823974609375, 0.322601318359375, 0.386962890625, 0.451324462890625, 0.51568603515625, 0.580047607421875, 0.6444091796875, 0.708770751953125, 0.77313232421875, 0.837493896484375, 0.90185546875, 0.966217041015625, 1.03057861328125, 1.094940185546875, 1.1593017578125, 1.223663330078125, 1.28802490234375, 1.352386474609375, 1.416748046875, 1.481109619140625, 1.54547119140625, 1.609832763671875, 1.6741943359375, 1.738555908203125, 1.80291748046875, 1.867279052734375, 1.931640625]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 7.0, 7.0, 12.0, 22.0, 43.0, 58.0, 64.0, 72.0, 80.0, 98.0, 110.0, 96.0, 84.0, 60.0, 65.0, 38.0, 26.0, 22.0, 11.0, 6.0, 7.0, 3.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.27734375, -0.27045631408691406, -0.2635688781738281, -0.2566814422607422, -0.24979400634765625, -0.2429065704345703, -0.23601913452148438, -0.22913169860839844, -0.2222442626953125, -0.21535682678222656, -0.20846939086914062, -0.2015819549560547, -0.19469451904296875, -0.1878070831298828, -0.18091964721679688, -0.17403221130371094, -0.167144775390625, -0.16025733947753906, -0.15336990356445312, -0.1464824676513672, -0.13959503173828125, -0.1327075958251953, -0.12582015991210938, -0.11893272399902344, -0.1120452880859375, -0.10515785217285156, -0.09827041625976562, -0.09138298034667969, -0.08449554443359375, -0.07760810852050781, -0.07072067260742188, -0.06383323669433594, -0.05694580078125, -0.05005836486816406, -0.043170928955078125, -0.03628349304199219, -0.02939605712890625, -0.022508621215820312, -0.015621185302734375, -0.008733749389648438, -0.0018463134765625, 0.0050411224365234375, 0.011928558349609375, 0.018815994262695312, 0.02570343017578125, 0.03259086608886719, 0.039478302001953125, 0.04636573791503906, 0.053253173828125, 0.06014060974121094, 0.06702804565429688, 0.07391548156738281, 0.08080291748046875, 0.08769035339355469, 0.09457778930664062, 0.10146522521972656, 0.1083526611328125, 0.11524009704589844, 0.12212753295898438, 0.1290149688720703, 0.13590240478515625, 0.1427898406982422, 0.14967727661132812, 0.15656471252441406, 0.1634521484375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 7.0, 7.0, 13.0, 18.0, 21.0, 32.0, 55.0, 112.0, 232.0, 518.0, 1344.0, 3755.0, 15786.0, 118680.0, 3552209.0, 455872.0, 35402.0, 6814.0, 2017.0, 747.0, 317.0, 154.0, 81.0, 39.0, 22.0, 9.0, 9.0, 8.0, 3.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.45703125, -1.415069580078125, -1.37310791015625, -1.331146240234375, -1.2891845703125, -1.247222900390625, -1.20526123046875, -1.163299560546875, -1.121337890625, -1.079376220703125, -1.03741455078125, -0.995452880859375, -0.9534912109375, -0.911529541015625, -0.86956787109375, -0.827606201171875, -0.78564453125, -0.743682861328125, -0.70172119140625, -0.659759521484375, -0.6177978515625, -0.575836181640625, -0.53387451171875, -0.491912841796875, -0.449951171875, -0.407989501953125, -0.36602783203125, -0.324066162109375, -0.2821044921875, -0.240142822265625, -0.19818115234375, -0.156219482421875, -0.1142578125, -0.072296142578125, -0.03033447265625, 0.011627197265625, 0.0535888671875, 0.095550537109375, 0.13751220703125, 0.179473876953125, 0.221435546875, 0.263397216796875, 0.30535888671875, 0.347320556640625, 0.3892822265625, 0.431243896484375, 0.47320556640625, 0.515167236328125, 0.55712890625, 0.599090576171875, 0.64105224609375, 0.683013916015625, 0.7249755859375, 0.766937255859375, 0.80889892578125, 0.850860595703125, 0.892822265625, 0.934783935546875, 0.97674560546875, 1.018707275390625, 1.0606689453125, 1.102630615234375, 1.14459228515625, 1.186553955078125, 1.228515625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 7.0, 5.0, 7.0, 18.0, 21.0, 23.0, 28.0, 47.0, 55.0, 80.0, 155.0, 250.0, 509.0, 1282.0, 699.0, 328.0, 176.0, 133.0, 77.0, 58.0, 35.0, 26.0, 13.0, 8.0, 12.0, 8.0, 4.0, 3.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.381591796875, -0.3699188232421875, -0.358245849609375, -0.3465728759765625, -0.33489990234375, -0.3232269287109375, -0.311553955078125, -0.2998809814453125, -0.2882080078125, -0.2765350341796875, -0.264862060546875, -0.2531890869140625, -0.24151611328125, -0.2298431396484375, -0.218170166015625, -0.2064971923828125, -0.19482421875, -0.1831512451171875, -0.171478271484375, -0.1598052978515625, -0.14813232421875, -0.1364593505859375, -0.124786376953125, -0.1131134033203125, -0.1014404296875, -0.0897674560546875, -0.078094482421875, -0.0664215087890625, -0.05474853515625, -0.0430755615234375, -0.031402587890625, -0.0197296142578125, -0.008056640625, 0.0036163330078125, 0.015289306640625, 0.0269622802734375, 0.03863525390625, 0.0503082275390625, 0.061981201171875, 0.0736541748046875, 0.0853271484375, 0.0970001220703125, 0.108673095703125, 0.1203460693359375, 0.13201904296875, 0.1436920166015625, 0.155364990234375, 0.1670379638671875, 0.1787109375, 0.1903839111328125, 0.202056884765625, 0.2137298583984375, 0.22540283203125, 0.2370758056640625, 0.248748779296875, 0.2604217529296875, 0.2720947265625, 0.2837677001953125, 0.295440673828125, 0.3071136474609375, 0.31878662109375, 0.3304595947265625, 0.342132568359375, 0.3538055419921875, 0.365478515625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 1.0, 2.0, 12.0, 19.0, 27.0, 68.0, 125.0, 200.0, 195.0, 174.0, 87.0, 50.0, 22.0, 11.0, 4.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.545332670211792, -1.4198428392410278, -1.2943530082702637, -1.1688631772994995, -1.0433733463287354, -0.9178835153579712, -0.792393684387207, -0.6669038534164429, -0.5414140224456787, -0.41592419147491455, -0.2904343605041504, -0.16494452953338623, -0.03945469856262207, 0.08603513240814209, 0.21152496337890625, 0.3370147943496704, 0.46250462532043457, 0.5879944562911987, 0.7134842872619629, 0.838974118232727, 0.9644639492034912, 1.0899537801742554, 1.2154436111450195, 1.3409334421157837, 1.4664232730865479, 1.591913104057312, 1.7174029350280762, 1.8428927659988403, 1.9683825969696045, 2.093872547149658, 2.219362258911133, 2.3448519706726074, 2.4703421592712402, 2.595831871032715, 2.7213218212127686, 2.8468117713928223, 2.972301483154297, 3.0977911949157715, 3.223281145095825, 3.348771095275879, 3.4742608070373535, 3.599750518798828, 3.725240468978882, 3.8507304191589355, 3.97622013092041, 4.101709842681885, 4.227199554443359, 4.352689743041992, 4.478179454803467, 4.603669166564941, 4.729159355163574, 4.854649066925049, 4.980138778686523, 5.105628490447998, 5.231118202209473, 5.3566083908081055, 5.48209810256958, 5.607587814331055, 5.7330780029296875, 5.858567714691162, 5.984057426452637, 6.109547138214111, 6.235036849975586, 6.360527038574219, 6.486016750335693]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 6.0, 5.0, 5.0, 6.0, 6.0, 7.0, 11.0, 8.0, 11.0, 18.0, 27.0, 16.0, 28.0, 28.0, 33.0, 37.0, 52.0, 49.0, 34.0, 36.0, 49.0, 38.0, 54.0, 41.0, 40.0, 44.0, 42.0, 43.0, 26.0, 39.0, 28.0, 23.0, 17.0, 23.0, 20.0, 11.0, 11.0, 6.0, 5.0, 8.0, 2.0, 2.0, 1.0, 3.0, 2.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.2867462635040283, -1.2483692169189453, -1.2099921703338623, -1.1716152429580688, -1.1332381963729858, -1.0948611497879028, -1.0564841032028198, -1.0181070566177368, -0.9797300696372986, -0.9413530230522156, -0.9029760360717773, -0.8645989894866943, -0.8262219429016113, -0.7878449559211731, -0.7494679093360901, -0.7110909223556519, -0.6727138757705688, -0.6343368291854858, -0.5959598422050476, -0.5575827956199646, -0.5192058086395264, -0.48082876205444336, -0.44245171546936035, -0.40407469868659973, -0.3656976819038391, -0.3273206651210785, -0.28894364833831787, -0.25056660175323486, -0.21218958497047424, -0.17381256818771362, -0.1354355365037918, -0.09705850481987, -0.058681488037109375, -0.020304463803768158, 0.01807256042957306, 0.056449584662914276, 0.0948266088962555, 0.1332036256790161, 0.17158065736293793, 0.20995768904685974, 0.24833470582962036, 0.286711722612381, 0.3250887393951416, 0.3634657859802246, 0.40184280276298523, 0.44021981954574585, 0.47859686613082886, 0.5169738531112671, 0.5553508996963501, 0.5937279462814331, 0.6321049332618713, 0.6704819798469543, 0.7088589668273926, 0.7472360134124756, 0.7856130599975586, 0.8239901065826416, 0.8623670935630798, 0.9007441401481628, 0.9391211271286011, 0.9774981737136841, 1.015875220298767, 1.0542521476745605, 1.0926291942596436, 1.1310062408447266, 1.1693832874298096]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 1.0, 3.0, 1.0, 5.0, 5.0, 7.0, 10.0, 13.0, 11.0, 25.0, 23.0, 48.0, 63.0, 83.0, 181.0, 294.0, 643.0, 1666.0, 6687.0, 55763.0, 726125.0, 235078.0, 16691.0, 3144.0, 1002.0, 386.0, 232.0, 133.0, 64.0, 51.0, 37.0, 29.0, 11.0, 13.0, 14.0, 4.0, 4.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.458984375, -1.39483642578125, -1.3306884765625, -1.26654052734375, -1.202392578125, -1.13824462890625, -1.0740966796875, -1.00994873046875, -0.94580078125, -0.88165283203125, -0.8175048828125, -0.75335693359375, -0.689208984375, -0.62506103515625, -0.5609130859375, -0.49676513671875, -0.4326171875, -0.36846923828125, -0.3043212890625, -0.24017333984375, -0.176025390625, -0.11187744140625, -0.0477294921875, 0.01641845703125, 0.08056640625, 0.14471435546875, 0.2088623046875, 0.27301025390625, 0.337158203125, 0.40130615234375, 0.4654541015625, 0.52960205078125, 0.59375, 0.65789794921875, 0.7220458984375, 0.78619384765625, 0.850341796875, 0.91448974609375, 0.9786376953125, 1.04278564453125, 1.10693359375, 1.17108154296875, 1.2352294921875, 1.29937744140625, 1.363525390625, 1.42767333984375, 1.4918212890625, 1.55596923828125, 1.6201171875, 1.68426513671875, 1.7484130859375, 1.81256103515625, 1.876708984375, 1.94085693359375, 2.0050048828125, 2.06915283203125, 2.13330078125, 2.19744873046875, 2.2615966796875, 2.32574462890625, 2.389892578125, 2.45404052734375, 2.5181884765625, 2.58233642578125, 2.646484375]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 6.0, 17.0, 18.0, 23.0, 39.0, 49.0, 58.0, 65.0, 110.0, 90.0, 92.0, 76.0, 95.0, 65.0, 55.0, 44.0, 31.0, 17.0, 17.0, 13.0, 12.0, 5.0, 4.0, 4.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.281982421875, -0.27482032775878906, -0.2676582336425781, -0.2604961395263672, -0.25333404541015625, -0.2461719512939453, -0.23900985717773438, -0.23184776306152344, -0.2246856689453125, -0.21752357482910156, -0.21036148071289062, -0.2031993865966797, -0.19603729248046875, -0.1888751983642578, -0.18171310424804688, -0.17455101013183594, -0.167388916015625, -0.16022682189941406, -0.15306472778320312, -0.1459026336669922, -0.13874053955078125, -0.1315784454345703, -0.12441635131835938, -0.11725425720214844, -0.1100921630859375, -0.10293006896972656, -0.09576797485351562, -0.08860588073730469, -0.08144378662109375, -0.07428169250488281, -0.06711959838867188, -0.05995750427246094, -0.05279541015625, -0.04563331604003906, -0.038471221923828125, -0.03130912780761719, -0.02414703369140625, -0.016984939575195312, -0.009822845458984375, -0.0026607513427734375, 0.0045013427734375, 0.011663436889648438, 0.018825531005859375, 0.025987625122070312, 0.03314971923828125, 0.04031181335449219, 0.047473907470703125, 0.05463600158691406, 0.061798095703125, 0.06896018981933594, 0.07612228393554688, 0.08328437805175781, 0.09044647216796875, 0.09760856628417969, 0.10477066040039062, 0.11193275451660156, 0.1190948486328125, 0.12625694274902344, 0.13341903686523438, 0.1405811309814453, 0.14774322509765625, 0.1549053192138672, 0.16206741333007812, 0.16922950744628906, 0.1763916015625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 3.0, 4.0, 3.0, 5.0, 5.0, 5.0, 6.0, 5.0, 11.0, 10.0, 19.0, 27.0, 16.0, 36.0, 58.0, 53.0, 62.0, 100.0, 142.0, 182.0, 353.0, 657.0, 1232.0, 2822.0, 8303.0, 29914.0, 125870.0, 433548.0, 332706.0, 81751.0, 19893.0, 6043.0, 2190.0, 943.0, 529.0, 319.0, 218.0, 109.0, 95.0, 85.0, 47.0, 43.0, 36.0, 23.0, 13.0, 18.0, 11.0, 12.0, 7.0, 10.0, 1.0, 1.0, 3.0, 0.0, 0.0, 5.0, 1.0, 2.0, 0.0, 1.0, 4.0], "bins": [-0.76123046875, -0.7365570068359375, -0.711883544921875, -0.6872100830078125, -0.66253662109375, -0.6378631591796875, -0.613189697265625, -0.5885162353515625, -0.5638427734375, -0.5391693115234375, -0.514495849609375, -0.4898223876953125, -0.46514892578125, -0.4404754638671875, -0.415802001953125, -0.3911285400390625, -0.366455078125, -0.3417816162109375, -0.317108154296875, -0.2924346923828125, -0.26776123046875, -0.2430877685546875, -0.218414306640625, -0.1937408447265625, -0.1690673828125, -0.1443939208984375, -0.119720458984375, -0.0950469970703125, -0.07037353515625, -0.0457000732421875, -0.021026611328125, 0.0036468505859375, 0.0283203125, 0.0529937744140625, 0.077667236328125, 0.1023406982421875, 0.12701416015625, 0.1516876220703125, 0.176361083984375, 0.2010345458984375, 0.2257080078125, 0.2503814697265625, 0.275054931640625, 0.2997283935546875, 0.32440185546875, 0.3490753173828125, 0.373748779296875, 0.3984222412109375, 0.423095703125, 0.4477691650390625, 0.472442626953125, 0.4971160888671875, 0.52178955078125, 0.5464630126953125, 0.571136474609375, 0.5958099365234375, 0.6204833984375, 0.6451568603515625, 0.669830322265625, 0.6945037841796875, 0.71917724609375, 0.7438507080078125, 0.768524169921875, 0.7931976318359375, 0.81787109375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 6.0, 5.0, 9.0, 12.0, 16.0, 10.0, 8.0, 15.0, 20.0, 30.0, 32.0, 27.0, 42.0, 43.0, 42.0, 52.0, 34.0, 41.0, 47.0, 40.0, 49.0, 38.0, 45.0, 45.0, 30.0, 36.0, 37.0, 42.0, 22.0, 18.0, 22.0, 13.0, 20.0, 12.0, 12.0, 11.0, 9.0, 4.0, 5.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.476806640625, -0.4592247009277344, -0.44164276123046875, -0.4240608215332031, -0.4064788818359375, -0.3888969421386719, -0.37131500244140625, -0.3537330627441406, -0.336151123046875, -0.3185691833496094, -0.30098724365234375, -0.2834053039550781, -0.2658233642578125, -0.24824142456054688, -0.23065948486328125, -0.21307754516601562, -0.19549560546875, -0.17791366577148438, -0.16033172607421875, -0.14274978637695312, -0.1251678466796875, -0.10758590698242188, -0.09000396728515625, -0.07242202758789062, -0.054840087890625, -0.037258148193359375, -0.01967620849609375, -0.002094268798828125, 0.0154876708984375, 0.033069610595703125, 0.05065155029296875, 0.06823348999023438, 0.0858154296875, 0.10339736938476562, 0.12097930908203125, 0.13856124877929688, 0.1561431884765625, 0.17372512817382812, 0.19130706787109375, 0.20888900756835938, 0.226470947265625, 0.24405288696289062, 0.26163482666015625, 0.2792167663574219, 0.2967987060546875, 0.3143806457519531, 0.33196258544921875, 0.3495445251464844, 0.36712646484375, 0.3847084045410156, 0.40229034423828125, 0.4198722839355469, 0.4374542236328125, 0.4550361633300781, 0.47261810302734375, 0.4902000427246094, 0.507781982421875, 0.5253639221191406, 0.5429458618164062, 0.5605278015136719, 0.5781097412109375, 0.5956916809082031, 0.6132736206054688, 0.6308555603027344, 0.6484375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 7.0, 10.0, 8.0, 12.0, 32.0, 25.0, 59.0, 92.0, 187.0, 334.0, 659.0, 2046.0, 9882.0, 142499.0, 815237.0, 68274.0, 6363.0, 1599.0, 551.0, 277.0, 138.0, 98.0, 59.0, 28.0, 23.0, 10.0, 8.0, 9.0, 4.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.732421875, -0.7046661376953125, -0.676910400390625, -0.6491546630859375, -0.62139892578125, -0.5936431884765625, -0.565887451171875, -0.5381317138671875, -0.5103759765625, -0.4826202392578125, -0.454864501953125, -0.4271087646484375, -0.39935302734375, -0.3715972900390625, -0.343841552734375, -0.3160858154296875, -0.288330078125, -0.2605743408203125, -0.232818603515625, -0.2050628662109375, -0.17730712890625, -0.1495513916015625, -0.121795654296875, -0.0940399169921875, -0.0662841796875, -0.0385284423828125, -0.010772705078125, 0.0169830322265625, 0.04473876953125, 0.0724945068359375, 0.100250244140625, 0.1280059814453125, 0.15576171875, 0.1835174560546875, 0.211273193359375, 0.2390289306640625, 0.26678466796875, 0.2945404052734375, 0.322296142578125, 0.3500518798828125, 0.3778076171875, 0.4055633544921875, 0.433319091796875, 0.4610748291015625, 0.48883056640625, 0.5165863037109375, 0.544342041015625, 0.5720977783203125, 0.599853515625, 0.6276092529296875, 0.655364990234375, 0.6831207275390625, 0.71087646484375, 0.7386322021484375, 0.766387939453125, 0.7941436767578125, 0.8218994140625, 0.8496551513671875, 0.877410888671875, 0.9051666259765625, 0.93292236328125, 0.9606781005859375, 0.988433837890625, 1.0161895751953125, 1.0439453125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 13.0, 13.0, 16.0, 38.0, 75.0, 194.0, 333.0, 180.0, 90.0, 29.0, 17.0, 5.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00063323974609375, -0.0006211623549461365, -0.000609084963798523, -0.0005970075726509094, -0.0005849301815032959, -0.0005728527903556824, -0.0005607753992080688, -0.0005486980080604553, -0.0005366206169128418, -0.0005245432257652283, -0.0005124658346176147, -0.0005003884434700012, -0.0004883110523223877, -0.00047623366117477417, -0.00046415627002716064, -0.0004520788788795471, -0.0004400014877319336, -0.00042792409658432007, -0.00041584670543670654, -0.000403769314289093, -0.0003916919231414795, -0.00037961453199386597, -0.00036753714084625244, -0.0003554597496986389, -0.0003433823585510254, -0.00033130496740341187, -0.00031922757625579834, -0.0003071501851081848, -0.0002950727939605713, -0.00028299540281295776, -0.00027091801166534424, -0.0002588406205177307, -0.0002467632293701172, -0.00023468583822250366, -0.00022260844707489014, -0.0002105310559272766, -0.00019845366477966309, -0.00018637627363204956, -0.00017429888248443604, -0.0001622214913368225, -0.00015014410018920898, -0.00013806670904159546, -0.00012598931789398193, -0.00011391192674636841, -0.00010183453559875488, -8.975714445114136e-05, -7.767975330352783e-05, -6.56023621559143e-05, -5.352497100830078e-05, -4.1447579860687256e-05, -2.937018871307373e-05, -1.7292797565460205e-05, -5.21540641784668e-06, 6.861984729766846e-06, 1.893937587738037e-05, 3.1016767024993896e-05, 4.309415817260742e-05, 5.517154932022095e-05, 6.724894046783447e-05, 7.9326331615448e-05, 9.140372276306152e-05, 0.00010348111391067505, 0.00011555850505828857, 0.0001276358962059021, 0.00013971328735351562]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 2.0, 7.0, 8.0, 14.0, 20.0, 31.0, 56.0, 80.0, 149.0, 295.0, 1002.0, 9550.0, 914717.0, 118698.0, 2842.0, 578.0, 221.0, 124.0, 63.0, 38.0, 26.0, 11.0, 8.0, 10.0, 0.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1.95703125, -1.9052276611328125, -1.853424072265625, -1.8016204833984375, -1.74981689453125, -1.6980133056640625, -1.646209716796875, -1.5944061279296875, -1.5426025390625, -1.4907989501953125, -1.438995361328125, -1.3871917724609375, -1.33538818359375, -1.2835845947265625, -1.231781005859375, -1.1799774169921875, -1.128173828125, -1.0763702392578125, -1.024566650390625, -0.9727630615234375, -0.92095947265625, -0.8691558837890625, -0.817352294921875, -0.7655487060546875, -0.7137451171875, -0.6619415283203125, -0.610137939453125, -0.5583343505859375, -0.50653076171875, -0.4547271728515625, -0.402923583984375, -0.3511199951171875, -0.29931640625, -0.2475128173828125, -0.195709228515625, -0.1439056396484375, -0.09210205078125, -0.0402984619140625, 0.011505126953125, 0.0633087158203125, 0.1151123046875, 0.1669158935546875, 0.218719482421875, 0.2705230712890625, 0.32232666015625, 0.3741302490234375, 0.425933837890625, 0.4777374267578125, 0.529541015625, 0.5813446044921875, 0.633148193359375, 0.6849517822265625, 0.73675537109375, 0.7885589599609375, 0.840362548828125, 0.8921661376953125, 0.9439697265625, 0.9957733154296875, 1.047576904296875, 1.0993804931640625, 1.15118408203125, 1.2029876708984375, 1.254791259765625, 1.3065948486328125, 1.3583984375]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 3.0, 11.0, 19.0, 9.0, 20.0, 28.0, 65.0, 79.0, 100.0, 97.0, 109.0, 107.0, 100.0, 89.0, 48.0, 37.0, 17.0, 16.0, 16.0, 14.0, 4.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.57568359375, -0.5599098205566406, -0.5441360473632812, -0.5283622741699219, -0.5125885009765625, -0.4968147277832031, -0.48104095458984375, -0.4652671813964844, -0.449493408203125, -0.4337196350097656, -0.41794586181640625, -0.4021720886230469, -0.3863983154296875, -0.3706245422363281, -0.35485076904296875, -0.3390769958496094, -0.32330322265625, -0.3075294494628906, -0.29175567626953125, -0.2759819030761719, -0.2602081298828125, -0.24443435668945312, -0.22866058349609375, -0.21288681030273438, -0.197113037109375, -0.18133926391601562, -0.16556549072265625, -0.14979171752929688, -0.1340179443359375, -0.11824417114257812, -0.10247039794921875, -0.08669662475585938, -0.0709228515625, -0.055149078369140625, -0.03937530517578125, -0.023601531982421875, -0.0078277587890625, 0.007946014404296875, 0.02371978759765625, 0.039493560791015625, 0.055267333984375, 0.07104110717773438, 0.08681488037109375, 0.10258865356445312, 0.1183624267578125, 0.13413619995117188, 0.14990997314453125, 0.16568374633789062, 0.18145751953125, 0.19723129272460938, 0.21300506591796875, 0.22877883911132812, 0.2445526123046875, 0.2603263854980469, 0.27610015869140625, 0.2918739318847656, 0.307647705078125, 0.3234214782714844, 0.33919525146484375, 0.3549690246582031, 0.3707427978515625, 0.3865165710449219, 0.40229034423828125, 0.4180641174316406, 0.433837890625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 5.0, 12.0, 21.0, 33.0, 67.0, 87.0, 143.0, 190.0, 162.0, 128.0, 64.0, 43.0, 24.0, 14.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.981949806213379, -7.737807750701904, -7.493666172027588, -7.249524116516113, -7.005382537841797, -6.761240482330322, -6.517098426818848, -6.272956848144531, -6.028814792633057, -5.784672737121582, -5.540531158447266, -5.296389102935791, -5.052247524261475, -4.80810546875, -4.563963890075684, -4.319821834564209, -4.075679779052734, -3.831537961959839, -3.5873961448669434, -3.3432540893554688, -3.0991122722625732, -2.8549704551696777, -2.6108286380767822, -2.3666868209838867, -2.1225452423095703, -1.8784034252166748, -1.6342614889144897, -1.3901196718215942, -1.1459777355194092, -0.9018359184265137, -0.6576941013336182, -0.4135521650314331, -0.16941022872924805, 0.07473163306713104, 0.31887349486351013, 0.563015341758728, 0.8071572184562683, 1.0512990951538086, 1.295440912246704, 1.5395828485488892, 1.7837246656417847, 2.0278666019439697, 2.2720084190368652, 2.5161502361297607, 2.7602920532226562, 3.004434108734131, 3.2485756874084473, 3.492717742919922, 3.7368595600128174, 3.981001377105713, 4.2251434326171875, 4.469285011291504, 4.7134270668029785, 4.957569122314453, 5.2017107009887695, 5.445852756500244, 5.6899943351745605, 5.934136390686035, 6.178277969360352, 6.422420024871826, 6.666561603546143, 6.910703659057617, 7.154845237731934, 7.398987293243408, 7.643129348754883]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 11.0, 8.0, 11.0, 14.0, 15.0, 13.0, 22.0, 26.0, 25.0, 36.0, 35.0, 47.0, 51.0, 42.0, 51.0, 52.0, 40.0, 51.0, 52.0, 53.0, 44.0, 40.0, 39.0, 35.0, 23.0, 24.0, 28.0, 24.0, 17.0, 14.0, 12.0, 11.0, 8.0, 5.0, 7.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9368813037872314, -2.835259437561035, -2.733637809753418, -2.6320159435272217, -2.5303940773010254, -2.428772449493408, -2.327150583267212, -2.2255287170410156, -2.1239070892333984, -2.022285223007202, -1.9206634759902954, -1.8190417289733887, -1.7174198627471924, -1.6157981157302856, -1.514176368713379, -1.4125545024871826, -1.3109326362609863, -1.2093108892440796, -1.1076890230178833, -1.0060672760009766, -0.904445469379425, -0.8028236627578735, -0.7012019157409668, -0.5995801091194153, -0.49795830249786377, -0.39633649587631226, -0.29471471905708313, -0.193092942237854, -0.09147113561630249, 0.010150671005249023, 0.11177241802215576, 0.21339422464370728, 0.3150162696838379, 0.4166380763053894, 0.5182598829269409, 0.6198816299438477, 0.7215034365653992, 0.8231252431869507, 0.9247469902038574, 1.0263688564300537, 1.1279906034469604, 1.2296123504638672, 1.3312342166900635, 1.4328559637069702, 1.534477710723877, 1.6360995769500732, 1.73772132396698, 1.8393430709838867, 1.940964937210083, 2.0425868034362793, 2.1442084312438965, 2.2458302974700928, 2.347452163696289, 2.4490737915039062, 2.5506956577301025, 2.652317523956299, 2.753939151763916, 2.8555610179901123, 2.9571826457977295, 3.058804512023926, 3.160426378250122, 3.2620482444763184, 3.3636698722839355, 3.465291738510132, 3.566913604736328]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 7.0, 4.0, 8.0, 6.0, 6.0, 10.0, 10.0, 31.0, 26.0, 58.0, 63.0, 95.0, 158.0, 388.0, 959.0, 3720.0, 36620.0, 3913386.0, 225013.0, 10465.0, 1897.0, 631.0, 309.0, 143.0, 100.0, 60.0, 39.0, 23.0, 19.0, 10.0, 7.0, 7.0, 5.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5703125, -2.486724853515625, -2.40313720703125, -2.319549560546875, -2.2359619140625, -2.152374267578125, -2.06878662109375, -1.985198974609375, -1.901611328125, -1.818023681640625, -1.73443603515625, -1.650848388671875, -1.5672607421875, -1.483673095703125, -1.40008544921875, -1.316497802734375, -1.23291015625, -1.149322509765625, -1.06573486328125, -0.982147216796875, -0.8985595703125, -0.814971923828125, -0.73138427734375, -0.647796630859375, -0.564208984375, -0.480621337890625, -0.39703369140625, -0.313446044921875, -0.2298583984375, -0.146270751953125, -0.06268310546875, 0.020904541015625, 0.1044921875, 0.188079833984375, 0.27166748046875, 0.355255126953125, 0.4388427734375, 0.522430419921875, 0.60601806640625, 0.689605712890625, 0.773193359375, 0.856781005859375, 0.94036865234375, 1.023956298828125, 1.1075439453125, 1.191131591796875, 1.27471923828125, 1.358306884765625, 1.44189453125, 1.525482177734375, 1.60906982421875, 1.692657470703125, 1.7762451171875, 1.859832763671875, 1.94342041015625, 2.027008056640625, 2.110595703125, 2.194183349609375, 2.27777099609375, 2.361358642578125, 2.4449462890625, 2.528533935546875, 2.61212158203125, 2.695709228515625, 2.779296875]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 7.0, 9.0, 12.0, 16.0, 26.0, 41.0, 53.0, 63.0, 73.0, 70.0, 82.0, 81.0, 82.0, 69.0, 77.0, 57.0, 38.0, 43.0, 36.0, 16.0, 16.0, 16.0, 6.0, 6.0, 6.0, 4.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.275390625, -0.268463134765625, -0.26153564453125, -0.254608154296875, -0.2476806640625, -0.240753173828125, -0.23382568359375, -0.226898193359375, -0.219970703125, -0.213043212890625, -0.20611572265625, -0.199188232421875, -0.1922607421875, -0.185333251953125, -0.17840576171875, -0.171478271484375, -0.16455078125, -0.157623291015625, -0.15069580078125, -0.143768310546875, -0.1368408203125, -0.129913330078125, -0.12298583984375, -0.116058349609375, -0.109130859375, -0.102203369140625, -0.09527587890625, -0.088348388671875, -0.0814208984375, -0.074493408203125, -0.06756591796875, -0.060638427734375, -0.0537109375, -0.046783447265625, -0.03985595703125, -0.032928466796875, -0.0260009765625, -0.019073486328125, -0.01214599609375, -0.005218505859375, 0.001708984375, 0.008636474609375, 0.01556396484375, 0.022491455078125, 0.0294189453125, 0.036346435546875, 0.04327392578125, 0.050201416015625, 0.05712890625, 0.064056396484375, 0.07098388671875, 0.077911376953125, 0.0848388671875, 0.091766357421875, 0.09869384765625, 0.105621337890625, 0.112548828125, 0.119476318359375, 0.12640380859375, 0.133331298828125, 0.1402587890625, 0.147186279296875, 0.15411376953125, 0.161041259765625, 0.16796875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 1.0, 10.0, 12.0, 19.0, 29.0, 49.0, 89.0, 114.0, 228.0, 477.0, 1644.0, 12462.0, 2526764.0, 1638130.0, 11591.0, 1576.0, 550.0, 230.0, 136.0, 57.0, 36.0, 30.0, 22.0, 14.0, 7.0, 6.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.05078125, -2.955413818359375, -2.86004638671875, -2.764678955078125, -2.6693115234375, -2.573944091796875, -2.47857666015625, -2.383209228515625, -2.287841796875, -2.192474365234375, -2.09710693359375, -2.001739501953125, -1.9063720703125, -1.811004638671875, -1.71563720703125, -1.620269775390625, -1.52490234375, -1.429534912109375, -1.33416748046875, -1.238800048828125, -1.1434326171875, -1.048065185546875, -0.95269775390625, -0.857330322265625, -0.761962890625, -0.666595458984375, -0.57122802734375, -0.475860595703125, -0.3804931640625, -0.285125732421875, -0.18975830078125, -0.094390869140625, 0.0009765625, 0.096343994140625, 0.19171142578125, 0.287078857421875, 0.3824462890625, 0.477813720703125, 0.57318115234375, 0.668548583984375, 0.763916015625, 0.859283447265625, 0.95465087890625, 1.050018310546875, 1.1453857421875, 1.240753173828125, 1.33612060546875, 1.431488037109375, 1.52685546875, 1.622222900390625, 1.71759033203125, 1.812957763671875, 1.9083251953125, 2.003692626953125, 2.09906005859375, 2.194427490234375, 2.289794921875, 2.385162353515625, 2.48052978515625, 2.575897216796875, 2.6712646484375, 2.766632080078125, 2.86199951171875, 2.957366943359375, 3.052734375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 4.0, 6.0, 5.0, 14.0, 17.0, 27.0, 48.0, 89.0, 164.0, 377.0, 1757.0, 1028.0, 265.0, 125.0, 69.0, 31.0, 20.0, 9.0, 10.0, 6.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.44140625, -0.4229278564453125, -0.404449462890625, -0.3859710693359375, -0.36749267578125, -0.3490142822265625, -0.330535888671875, -0.3120574951171875, -0.2935791015625, -0.2751007080078125, -0.256622314453125, -0.2381439208984375, -0.21966552734375, -0.2011871337890625, -0.182708740234375, -0.1642303466796875, -0.145751953125, -0.1272735595703125, -0.108795166015625, -0.0903167724609375, -0.07183837890625, -0.0533599853515625, -0.034881591796875, -0.0164031982421875, 0.0020751953125, 0.0205535888671875, 0.039031982421875, 0.0575103759765625, 0.07598876953125, 0.0944671630859375, 0.112945556640625, 0.1314239501953125, 0.14990234375, 0.1683807373046875, 0.186859130859375, 0.2053375244140625, 0.22381591796875, 0.2422943115234375, 0.260772705078125, 0.2792510986328125, 0.2977294921875, 0.3162078857421875, 0.334686279296875, 0.3531646728515625, 0.37164306640625, 0.3901214599609375, 0.408599853515625, 0.4270782470703125, 0.445556640625, 0.4640350341796875, 0.482513427734375, 0.5009918212890625, 0.51947021484375, 0.5379486083984375, 0.556427001953125, 0.5749053955078125, 0.5933837890625, 0.6118621826171875, 0.630340576171875, 0.6488189697265625, 0.66729736328125, 0.6857757568359375, 0.704254150390625, 0.7227325439453125, 0.7412109375]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 4.0, 6.0, 16.0, 56.0, 105.0, 218.0, 298.0, 182.0, 77.0, 19.0, 10.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.7136688232421875, -7.544812202453613, -7.375955581665039, -7.207098960876465, -7.038242340087891, -6.869385719299316, -6.700529098510742, -6.53167200088501, -6.3628153800964355, -6.193958759307861, -6.025102138519287, -5.856245517730713, -5.687388896942139, -5.518531799316406, -5.349675178527832, -5.180818557739258, -5.011961936950684, -4.843105316162109, -4.674248695373535, -4.505392074584961, -4.336535453796387, -4.1676788330078125, -3.998821973800659, -3.829965114593506, -3.66110897064209, -3.4922523498535156, -3.3233957290649414, -3.154539108276367, -2.985682249069214, -2.8168256282806396, -2.6479690074920654, -2.479112148284912, -2.310255527496338, -2.1413989067077637, -1.9725421667099, -1.8036855459213257, -1.634828805923462, -1.4659721851348877, -1.2971155643463135, -1.1282588243484497, -0.9594022035598755, -0.7905455231666565, -0.6216888427734375, -0.4528322219848633, -0.2839755415916443, -0.11511886119842529, 0.053737759590148926, 0.2225944995880127, 0.3914511203765869, 0.5603078007698059, 0.7291644811630249, 0.8980211019515991, 1.066877841949463, 1.235734462738037, 1.4045910835266113, 1.573447823524475, 1.7423044443130493, 1.9111610651016235, 2.0800178050994873, 2.2488744258880615, 2.4177310466766357, 2.586587905883789, 2.7554445266723633, 2.9243011474609375, 3.0931577682495117]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 2.0, 2.0, 5.0, 8.0, 9.0, 10.0, 15.0, 17.0, 14.0, 25.0, 28.0, 30.0, 36.0, 43.0, 48.0, 60.0, 61.0, 54.0, 62.0, 54.0, 56.0, 60.0, 63.0, 50.0, 41.0, 34.0, 20.0, 23.0, 20.0, 18.0, 7.0, 10.0, 5.0, 5.0, 7.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6444146633148193, -1.5893096923828125, -1.5342048406600952, -1.4790998697280884, -1.423995018005371, -1.3688900470733643, -1.3137850761413574, -1.2586801052093506, -1.2035752534866333, -1.1484702825546265, -1.0933654308319092, -1.0382604598999023, -0.9831555485725403, -0.9280506372451782, -0.8729456663131714, -0.8178407549858093, -0.7627358436584473, -0.7076309323310852, -0.6525260210037231, -0.5974210500717163, -0.5423161387443542, -0.4872112274169922, -0.43210628628730774, -0.3770013451576233, -0.32189643383026123, -0.26679152250289917, -0.21168658137321472, -0.15658165514469147, -0.10147672891616821, -0.04637181758880615, 0.008733123540878296, 0.06383806467056274, 0.1189429759979248, 0.17404790222644806, 0.2291528284549713, 0.28425776958465576, 0.3393626809120178, 0.3944675922393799, 0.44957253336906433, 0.5046774744987488, 0.5597823858261108, 0.6148872971534729, 0.669992208480835, 0.7250971794128418, 0.7802020907402039, 0.8353070020675659, 0.8904119729995728, 0.9455168843269348, 1.0006217956542969, 1.0557267665863037, 1.110831618309021, 1.1659365892410278, 1.2210414409637451, 1.276146411895752, 1.3312513828277588, 1.3863563537597656, 1.441461205482483, 1.4965661764144897, 1.551671028137207, 1.6067759990692139, 1.6618809700012207, 1.716985821723938, 1.7720907926559448, 1.827195644378662, 1.882300615310669]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 4.0, 4.0, 1.0, 13.0, 3.0, 7.0, 14.0, 14.0, 22.0, 22.0, 31.0, 48.0, 92.0, 131.0, 207.0, 441.0, 1009.0, 3575.0, 21835.0, 329025.0, 633819.0, 49643.0, 5835.0, 1488.0, 597.0, 243.0, 152.0, 89.0, 56.0, 37.0, 27.0, 23.0, 18.0, 8.0, 5.0, 4.0, 3.0, 4.0, 5.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-1.6923828125, -1.6343536376953125, -1.576324462890625, -1.5182952880859375, -1.46026611328125, -1.4022369384765625, -1.344207763671875, -1.2861785888671875, -1.2281494140625, -1.1701202392578125, -1.112091064453125, -1.0540618896484375, -0.99603271484375, -0.9380035400390625, -0.879974365234375, -0.8219451904296875, -0.763916015625, -0.7058868408203125, -0.647857666015625, -0.5898284912109375, -0.53179931640625, -0.4737701416015625, -0.415740966796875, -0.3577117919921875, -0.2996826171875, -0.2416534423828125, -0.183624267578125, -0.1255950927734375, -0.06756591796875, -0.0095367431640625, 0.048492431640625, 0.1065216064453125, 0.16455078125, 0.2225799560546875, 0.280609130859375, 0.3386383056640625, 0.39666748046875, 0.4546966552734375, 0.512725830078125, 0.5707550048828125, 0.6287841796875, 0.6868133544921875, 0.744842529296875, 0.8028717041015625, 0.86090087890625, 0.9189300537109375, 0.976959228515625, 1.0349884033203125, 1.093017578125, 1.1510467529296875, 1.209075927734375, 1.2671051025390625, 1.32513427734375, 1.3831634521484375, 1.441192626953125, 1.4992218017578125, 1.5572509765625, 1.6152801513671875, 1.673309326171875, 1.7313385009765625, 1.78936767578125, 1.8473968505859375, 1.905426025390625, 1.9634552001953125, 2.021484375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 7.0, 6.0, 16.0, 12.0, 18.0, 27.0, 34.0, 34.0, 45.0, 51.0, 66.0, 70.0, 66.0, 71.0, 67.0, 73.0, 58.0, 51.0, 49.0, 35.0, 29.0, 36.0, 23.0, 14.0, 13.0, 12.0, 8.0, 2.0, 6.0, 1.0, 4.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-0.270751953125, -0.26425933837890625, -0.2577667236328125, -0.25127410888671875, -0.244781494140625, -0.23828887939453125, -0.2317962646484375, -0.22530364990234375, -0.21881103515625, -0.21231842041015625, -0.2058258056640625, -0.19933319091796875, -0.192840576171875, -0.18634796142578125, -0.1798553466796875, -0.17336273193359375, -0.1668701171875, -0.16037750244140625, -0.1538848876953125, -0.14739227294921875, -0.140899658203125, -0.13440704345703125, -0.1279144287109375, -0.12142181396484375, -0.11492919921875, -0.10843658447265625, -0.1019439697265625, -0.09545135498046875, -0.088958740234375, -0.08246612548828125, -0.0759735107421875, -0.06948089599609375, -0.06298828125, -0.05649566650390625, -0.0500030517578125, -0.04351043701171875, -0.037017822265625, -0.03052520751953125, -0.0240325927734375, -0.01753997802734375, -0.01104736328125, -0.00455474853515625, 0.0019378662109375, 0.00843048095703125, 0.014923095703125, 0.02141571044921875, 0.0279083251953125, 0.03440093994140625, 0.0408935546875, 0.04738616943359375, 0.0538787841796875, 0.06037139892578125, 0.066864013671875, 0.07335662841796875, 0.0798492431640625, 0.08634185791015625, 0.09283447265625, 0.09932708740234375, 0.1058197021484375, 0.11231231689453125, 0.118804931640625, 0.12529754638671875, 0.1317901611328125, 0.13828277587890625, 0.144775390625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 4.0, 3.0, 7.0, 12.0, 23.0, 9.0, 24.0, 37.0, 48.0, 51.0, 100.0, 209.0, 419.0, 1239.0, 3881.0, 16084.0, 80325.0, 368982.0, 441297.0, 107432.0, 20841.0, 4962.0, 1468.0, 518.0, 222.0, 113.0, 80.0, 36.0, 34.0, 28.0, 13.0, 15.0, 13.0, 6.0, 6.0, 4.0, 0.0, 2.0, 1.0, 5.0, 1.0, 1.0, 1.0], "bins": [-1.0595703125, -1.0324630737304688, -1.0053558349609375, -0.9782485961914062, -0.951141357421875, -0.9240341186523438, -0.8969268798828125, -0.8698196411132812, -0.84271240234375, -0.8156051635742188, -0.7884979248046875, -0.7613906860351562, -0.734283447265625, -0.7071762084960938, -0.6800689697265625, -0.6529617309570312, -0.6258544921875, -0.5987472534179688, -0.5716400146484375, -0.5445327758789062, -0.517425537109375, -0.49031829833984375, -0.4632110595703125, -0.43610382080078125, -0.40899658203125, -0.38188934326171875, -0.3547821044921875, -0.32767486572265625, -0.300567626953125, -0.27346038818359375, -0.2463531494140625, -0.21924591064453125, -0.192138671875, -0.16503143310546875, -0.1379241943359375, -0.11081695556640625, -0.083709716796875, -0.05660247802734375, -0.0294952392578125, -0.00238800048828125, 0.02471923828125, 0.05182647705078125, 0.0789337158203125, 0.10604095458984375, 0.133148193359375, 0.16025543212890625, 0.1873626708984375, 0.21446990966796875, 0.2415771484375, 0.26868438720703125, 0.2957916259765625, 0.32289886474609375, 0.350006103515625, 0.37711334228515625, 0.4042205810546875, 0.43132781982421875, 0.45843505859375, 0.48554229736328125, 0.5126495361328125, 0.5397567749023438, 0.566864013671875, 0.5939712524414062, 0.6210784912109375, 0.6481857299804688, 0.67529296875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 3.0, 7.0, 12.0, 12.0, 11.0, 15.0, 19.0, 19.0, 21.0, 35.0, 40.0, 42.0, 51.0, 56.0, 66.0, 68.0, 57.0, 50.0, 72.0, 66.0, 51.0, 52.0, 34.0, 29.0, 35.0, 17.0, 24.0, 12.0, 7.0, 7.0, 9.0, 9.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0595703125, -1.03350830078125, -1.0074462890625, -0.98138427734375, -0.955322265625, -0.92926025390625, -0.9031982421875, -0.87713623046875, -0.85107421875, -0.82501220703125, -0.7989501953125, -0.77288818359375, -0.746826171875, -0.72076416015625, -0.6947021484375, -0.66864013671875, -0.642578125, -0.61651611328125, -0.5904541015625, -0.56439208984375, -0.538330078125, -0.51226806640625, -0.4862060546875, -0.46014404296875, -0.43408203125, -0.40802001953125, -0.3819580078125, -0.35589599609375, -0.329833984375, -0.30377197265625, -0.2777099609375, -0.25164794921875, -0.2255859375, -0.19952392578125, -0.1734619140625, -0.14739990234375, -0.121337890625, -0.09527587890625, -0.0692138671875, -0.04315185546875, -0.01708984375, 0.00897216796875, 0.0350341796875, 0.06109619140625, 0.087158203125, 0.11322021484375, 0.1392822265625, 0.16534423828125, 0.19140625, 0.21746826171875, 0.2435302734375, 0.26959228515625, 0.295654296875, 0.32171630859375, 0.3477783203125, 0.37384033203125, 0.39990234375, 0.42596435546875, 0.4520263671875, 0.47808837890625, 0.504150390625, 0.53021240234375, 0.5562744140625, 0.58233642578125, 0.6083984375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 2.0, 1.0, 3.0, 10.0, 9.0, 15.0, 13.0, 23.0, 33.0, 44.0, 72.0, 125.0, 231.0, 521.0, 1740.0, 7855.0, 68324.0, 552323.0, 373617.0, 36398.0, 4912.0, 1323.0, 440.0, 183.0, 104.0, 60.0, 54.0, 41.0, 23.0, 14.0, 14.0, 9.0, 6.0, 6.0, 1.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.533203125, -0.51593017578125, -0.4986572265625, -0.48138427734375, -0.464111328125, -0.44683837890625, -0.4295654296875, -0.41229248046875, -0.39501953125, -0.37774658203125, -0.3604736328125, -0.34320068359375, -0.325927734375, -0.30865478515625, -0.2913818359375, -0.27410888671875, -0.2568359375, -0.23956298828125, -0.2222900390625, -0.20501708984375, -0.187744140625, -0.17047119140625, -0.1531982421875, -0.13592529296875, -0.11865234375, -0.10137939453125, -0.0841064453125, -0.06683349609375, -0.049560546875, -0.03228759765625, -0.0150146484375, 0.00225830078125, 0.01953125, 0.03680419921875, 0.0540771484375, 0.07135009765625, 0.088623046875, 0.10589599609375, 0.1231689453125, 0.14044189453125, 0.15771484375, 0.17498779296875, 0.1922607421875, 0.20953369140625, 0.226806640625, 0.24407958984375, 0.2613525390625, 0.27862548828125, 0.2958984375, 0.31317138671875, 0.3304443359375, 0.34771728515625, 0.364990234375, 0.38226318359375, 0.3995361328125, 0.41680908203125, 0.43408203125, 0.45135498046875, 0.4686279296875, 0.48590087890625, 0.503173828125, 0.52044677734375, 0.5377197265625, 0.55499267578125, 0.572265625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 8.0, 14.0, 17.0, 21.0, 48.0, 74.0, 90.0, 141.0, 151.0, 136.0, 111.0, 72.0, 44.0, 23.0, 22.0, 16.0, 7.0, 5.0, 4.0, 1.0, 3.0, 0.0, 2.0, 3.0], "bins": [-0.0001881122589111328, -0.00018424354493618011, -0.00018037483096122742, -0.00017650611698627472, -0.00017263740301132202, -0.00016876868903636932, -0.00016489997506141663, -0.00016103126108646393, -0.00015716254711151123, -0.00015329383313655853, -0.00014942511916160583, -0.00014555640518665314, -0.00014168769121170044, -0.00013781897723674774, -0.00013395026326179504, -0.00013008154928684235, -0.00012621283531188965, -0.00012234412133693695, -0.00011847540736198425, -0.00011460669338703156, -0.00011073797941207886, -0.00010686926543712616, -0.00010300055146217346, -9.913183748722076e-05, -9.526312351226807e-05, -9.139440953731537e-05, -8.752569556236267e-05, -8.365698158740997e-05, -7.978826761245728e-05, -7.591955363750458e-05, -7.205083966255188e-05, -6.818212568759918e-05, -6.431341171264648e-05, -6.044469773769379e-05, -5.657598376274109e-05, -5.270726978778839e-05, -4.883855581283569e-05, -4.4969841837882996e-05, -4.11011278629303e-05, -3.72324138879776e-05, -3.33636999130249e-05, -2.9494985938072205e-05, -2.5626271963119507e-05, -2.175755798816681e-05, -1.788884401321411e-05, -1.4020130038261414e-05, -1.0151416063308716e-05, -6.282702088356018e-06, -2.4139881134033203e-06, 1.4547258615493774e-06, 5.323439836502075e-06, 9.192153811454773e-06, 1.306086778640747e-05, 1.692958176136017e-05, 2.0798295736312866e-05, 2.4667009711265564e-05, 2.8535723686218262e-05, 3.240443766117096e-05, 3.627315163612366e-05, 4.0141865611076355e-05, 4.401057958602905e-05, 4.787929356098175e-05, 5.174800753593445e-05, 5.5616721510887146e-05, 5.9485435485839844e-05]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 3.0, 4.0, 8.0, 14.0, 29.0, 35.0, 72.0, 134.0, 260.0, 696.0, 2208.0, 13769.0, 380423.0, 620703.0, 25742.0, 2989.0, 838.0, 319.0, 144.0, 61.0, 36.0, 25.0, 16.0, 12.0, 6.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.59765625, -0.5738525390625, -0.550048828125, -0.5262451171875, -0.50244140625, -0.4786376953125, -0.454833984375, -0.4310302734375, -0.4072265625, -0.3834228515625, -0.359619140625, -0.3358154296875, -0.31201171875, -0.2882080078125, -0.264404296875, -0.2406005859375, -0.216796875, -0.1929931640625, -0.169189453125, -0.1453857421875, -0.12158203125, -0.0977783203125, -0.073974609375, -0.0501708984375, -0.0263671875, -0.0025634765625, 0.021240234375, 0.0450439453125, 0.06884765625, 0.0926513671875, 0.116455078125, 0.1402587890625, 0.1640625, 0.1878662109375, 0.211669921875, 0.2354736328125, 0.25927734375, 0.2830810546875, 0.306884765625, 0.3306884765625, 0.3544921875, 0.3782958984375, 0.402099609375, 0.4259033203125, 0.44970703125, 0.4735107421875, 0.497314453125, 0.5211181640625, 0.544921875, 0.5687255859375, 0.592529296875, 0.6163330078125, 0.64013671875, 0.6639404296875, 0.687744140625, 0.7115478515625, 0.7353515625, 0.7591552734375, 0.782958984375, 0.8067626953125, 0.83056640625, 0.8543701171875, 0.878173828125, 0.9019775390625, 0.92578125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 9.0, 9.0, 5.0, 17.0, 21.0, 36.0, 30.0, 50.0, 66.0, 82.0, 87.0, 90.0, 116.0, 86.0, 80.0, 51.0, 49.0, 34.0, 30.0, 21.0, 12.0, 7.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.3115234375, -0.2994384765625, -0.287353515625, -0.2752685546875, -0.26318359375, -0.2510986328125, -0.239013671875, -0.2269287109375, -0.21484375, -0.2027587890625, -0.190673828125, -0.1785888671875, -0.16650390625, -0.1544189453125, -0.142333984375, -0.1302490234375, -0.1181640625, -0.1060791015625, -0.093994140625, -0.0819091796875, -0.06982421875, -0.0577392578125, -0.045654296875, -0.0335693359375, -0.021484375, -0.0093994140625, 0.002685546875, 0.0147705078125, 0.02685546875, 0.0389404296875, 0.051025390625, 0.0631103515625, 0.0751953125, 0.0872802734375, 0.099365234375, 0.1114501953125, 0.12353515625, 0.1356201171875, 0.147705078125, 0.1597900390625, 0.171875, 0.1839599609375, 0.196044921875, 0.2081298828125, 0.22021484375, 0.2322998046875, 0.244384765625, 0.2564697265625, 0.2685546875, 0.2806396484375, 0.292724609375, 0.3048095703125, 0.31689453125, 0.3289794921875, 0.341064453125, 0.3531494140625, 0.365234375, 0.3773193359375, 0.389404296875, 0.4014892578125, 0.41357421875, 0.4256591796875, 0.437744140625, 0.4498291015625, 0.4619140625]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 5.0, 17.0, 38.0, 98.0, 195.0, 307.0, 183.0, 110.0, 40.0, 9.0, 7.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.664787292480469, -9.301289558410645, -8.937792778015137, -8.574295043945312, -8.210797309875488, -7.847300052642822, -7.483802795410156, -7.120305061340332, -6.756807804107666, -6.393310546875, -6.029812812805176, -5.66631555557251, -5.302818298339844, -4.9393205642700195, -4.5758233070373535, -4.2123260498046875, -3.8488283157348633, -3.485330820083618, -3.121833324432373, -2.758336067199707, -2.394838571548462, -2.031341075897217, -1.6678438186645508, -1.3043463230133057, -0.9408488273620605, -0.5773513913154602, -0.21385395526885986, 0.1496434211730957, 0.5131409168243408, 0.8766384124755859, 1.240135669708252, 1.603633165359497, 1.9671306610107422, 2.3306281566619873, 2.6941256523132324, 3.0576229095458984, 3.4211204051971436, 3.7846179008483887, 4.148115158081055, 4.511612892150879, 4.875110149383545, 5.238607406616211, 5.602105140686035, 5.965602397918701, 6.329099655151367, 6.692597389221191, 7.056094646453857, 7.419591903686523, 7.783089637756348, 8.146587371826172, 8.51008415222168, 8.873581886291504, 9.237079620361328, 9.600576400756836, 9.96407413482666, 10.327571868896484, 10.691068649291992, 11.054566383361816, 11.418063163757324, 11.781560897827148, 12.145058631896973, 12.508556365966797, 12.872053146362305, 13.235550880432129, 13.599048614501953]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 10.0, 4.0, 13.0, 5.0, 8.0, 17.0, 14.0, 16.0, 24.0, 17.0, 27.0, 25.0, 31.0, 26.0, 25.0, 27.0, 32.0, 29.0, 39.0, 31.0, 36.0, 50.0, 39.0, 26.0, 30.0, 53.0, 36.0, 35.0, 25.0, 33.0, 24.0, 27.0, 18.0, 23.0, 22.0, 15.0, 17.0, 16.0, 10.0, 8.0, 10.0, 3.0, 7.0, 2.0, 7.0, 6.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.555950164794922, -2.475146770477295, -2.394343376159668, -2.313539743423462, -2.232736349105835, -2.151932954788208, -2.071129560470581, -1.990326166152954, -1.9095226526260376, -1.8287192583084106, -1.7479157447814941, -1.6671123504638672, -1.5863089561462402, -1.5055054426193237, -1.4247020483016968, -1.3438985347747803, -1.2630951404571533, -1.1822917461395264, -1.1014882326126099, -1.020684838294983, -0.9398813843727112, -0.8590779304504395, -0.7782745361328125, -0.6974710822105408, -0.616667628288269, -0.5358641743659973, -0.455060750246048, -0.37425732612609863, -0.2934538722038269, -0.21265041828155518, -0.13184699416160583, -0.051043570041656494, 0.029759883880615234, 0.11056332290172577, 0.1913667619228363, 0.27217018604278564, 0.3529736399650574, 0.4337770938873291, 0.514580488204956, 0.5953839421272278, 0.6761873960494995, 0.7569908499717712, 0.837794303894043, 0.9185976982116699, 0.9994011521339417, 1.0802046060562134, 1.1610080003738403, 1.2418115139007568, 1.3226149082183838, 1.4034183025360107, 1.4842218160629272, 1.5650252103805542, 1.6458287239074707, 1.7266321182250977, 1.8074355125427246, 1.8882389068603516, 1.969042420387268, 2.0498459339141846, 2.1306493282318115, 2.2114527225494385, 2.2922561168670654, 2.3730597496032715, 2.4538631439208984, 2.5346665382385254, 2.6154699325561523]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 3.0, 5.0, 4.0, 4.0, 4.0, 7.0, 8.0, 10.0, 12.0, 23.0, 28.0, 31.0, 50.0, 84.0, 129.0, 246.0, 519.0, 1502.0, 6358.0, 51297.0, 3955624.0, 162089.0, 12086.0, 2387.0, 897.0, 342.0, 209.0, 104.0, 72.0, 59.0, 21.0, 17.0, 20.0, 19.0, 6.0, 1.0, 6.0, 8.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.796875, -1.714019775390625, -1.63116455078125, -1.548309326171875, -1.4654541015625, -1.382598876953125, -1.29974365234375, -1.216888427734375, -1.134033203125, -1.051177978515625, -0.96832275390625, -0.885467529296875, -0.8026123046875, -0.719757080078125, -0.63690185546875, -0.554046630859375, -0.47119140625, -0.388336181640625, -0.30548095703125, -0.222625732421875, -0.1397705078125, -0.056915283203125, 0.02593994140625, 0.108795166015625, 0.191650390625, 0.274505615234375, 0.35736083984375, 0.440216064453125, 0.5230712890625, 0.605926513671875, 0.68878173828125, 0.771636962890625, 0.8544921875, 0.937347412109375, 1.02020263671875, 1.103057861328125, 1.1859130859375, 1.268768310546875, 1.35162353515625, 1.434478759765625, 1.517333984375, 1.600189208984375, 1.68304443359375, 1.765899658203125, 1.8487548828125, 1.931610107421875, 2.01446533203125, 2.097320556640625, 2.18017578125, 2.263031005859375, 2.34588623046875, 2.428741455078125, 2.5115966796875, 2.594451904296875, 2.67730712890625, 2.760162353515625, 2.843017578125, 2.925872802734375, 3.00872802734375, 3.091583251953125, 3.1744384765625, 3.257293701171875, 3.34014892578125, 3.423004150390625, 3.505859375]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 5.0, 2.0, 7.0, 8.0, 7.0, 16.0, 22.0, 21.0, 27.0, 25.0, 49.0, 48.0, 57.0, 58.0, 57.0, 72.0, 71.0, 67.0, 63.0, 55.0, 44.0, 42.0, 41.0, 40.0, 26.0, 16.0, 19.0, 11.0, 10.0, 6.0, 8.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.257568359375, -0.2508506774902344, -0.24413299560546875, -0.23741531372070312, -0.2306976318359375, -0.22397994995117188, -0.21726226806640625, -0.21054458618164062, -0.203826904296875, -0.19710922241210938, -0.19039154052734375, -0.18367385864257812, -0.1769561767578125, -0.17023849487304688, -0.16352081298828125, -0.15680313110351562, -0.15008544921875, -0.14336776733398438, -0.13665008544921875, -0.12993240356445312, -0.1232147216796875, -0.11649703979492188, -0.10977935791015625, -0.10306167602539062, -0.096343994140625, -0.08962631225585938, -0.08290863037109375, -0.07619094848632812, -0.0694732666015625, -0.06275558471679688, -0.05603790283203125, -0.049320220947265625, -0.0426025390625, -0.035884857177734375, -0.02916717529296875, -0.022449493408203125, -0.0157318115234375, -0.009014129638671875, -0.00229644775390625, 0.004421234130859375, 0.011138916015625, 0.017856597900390625, 0.02457427978515625, 0.031291961669921875, 0.0380096435546875, 0.044727325439453125, 0.05144500732421875, 0.058162689208984375, 0.06488037109375, 0.07159805297851562, 0.07831573486328125, 0.08503341674804688, 0.0917510986328125, 0.09846878051757812, 0.10518646240234375, 0.11190414428710938, 0.118621826171875, 0.12533950805664062, 0.13205718994140625, 0.13877487182617188, 0.1454925537109375, 0.15221023559570312, 0.15892791748046875, 0.16564559936523438, 0.17236328125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 9.0, 7.0, 4.0, 4.0, 8.0, 6.0, 11.0, 20.0, 21.0, 30.0, 46.0, 58.0, 96.0, 149.0, 236.0, 436.0, 866.0, 1884.0, 4965.0, 17088.0, 88733.0, 3479543.0, 535310.0, 47397.0, 10815.0, 3540.0, 1446.0, 666.0, 329.0, 172.0, 109.0, 68.0, 63.0, 36.0, 26.0, 17.0, 17.0, 11.0, 5.0, 10.0, 8.0, 4.0, 8.0, 4.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.32421875, -1.2812652587890625, -1.238311767578125, -1.1953582763671875, -1.15240478515625, -1.1094512939453125, -1.066497802734375, -1.0235443115234375, -0.9805908203125, -0.9376373291015625, -0.894683837890625, -0.8517303466796875, -0.80877685546875, -0.7658233642578125, -0.722869873046875, -0.6799163818359375, -0.636962890625, -0.5940093994140625, -0.551055908203125, -0.5081024169921875, -0.46514892578125, -0.4221954345703125, -0.379241943359375, -0.3362884521484375, -0.2933349609375, -0.2503814697265625, -0.207427978515625, -0.1644744873046875, -0.12152099609375, -0.0785675048828125, -0.035614013671875, 0.0073394775390625, 0.05029296875, 0.0932464599609375, 0.136199951171875, 0.1791534423828125, 0.22210693359375, 0.2650604248046875, 0.308013916015625, 0.3509674072265625, 0.3939208984375, 0.4368743896484375, 0.479827880859375, 0.5227813720703125, 0.56573486328125, 0.6086883544921875, 0.651641845703125, 0.6945953369140625, 0.737548828125, 0.7805023193359375, 0.823455810546875, 0.8664093017578125, 0.90936279296875, 0.9523162841796875, 0.995269775390625, 1.0382232666015625, 1.0811767578125, 1.1241302490234375, 1.167083740234375, 1.2100372314453125, 1.25299072265625, 1.2959442138671875, 1.338897705078125, 1.3818511962890625, 1.4248046875]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 2.0, 6.0, 12.0, 3.0, 13.0, 21.0, 23.0, 51.0, 79.0, 126.0, 263.0, 1483.0, 1294.0, 293.0, 130.0, 90.0, 49.0, 44.0, 29.0, 15.0, 23.0, 4.0, 7.0, 3.0, 3.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0], "bins": [-0.64794921875, -0.633544921875, -0.619140625, -0.604736328125, -0.59033203125, -0.575927734375, -0.5615234375, -0.547119140625, -0.53271484375, -0.518310546875, -0.50390625, -0.489501953125, -0.47509765625, -0.460693359375, -0.4462890625, -0.431884765625, -0.41748046875, -0.403076171875, -0.388671875, -0.374267578125, -0.35986328125, -0.345458984375, -0.3310546875, -0.316650390625, -0.30224609375, -0.287841796875, -0.2734375, -0.259033203125, -0.24462890625, -0.230224609375, -0.2158203125, -0.201416015625, -0.18701171875, -0.172607421875, -0.158203125, -0.143798828125, -0.12939453125, -0.114990234375, -0.1005859375, -0.086181640625, -0.07177734375, -0.057373046875, -0.04296875, -0.028564453125, -0.01416015625, 0.000244140625, 0.0146484375, 0.029052734375, 0.04345703125, 0.057861328125, 0.072265625, 0.086669921875, 0.10107421875, 0.115478515625, 0.1298828125, 0.144287109375, 0.15869140625, 0.173095703125, 0.1875, 0.201904296875, 0.21630859375, 0.230712890625, 0.2451171875, 0.259521484375, 0.27392578125]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 6.0, 55.0, 205.0, 406.0, 243.0, 65.0, 9.0, 5.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.044795513153076, -6.840387344360352, -6.635979175567627, -6.431571006774902, -6.2271623611450195, -6.022754192352295, -5.81834602355957, -5.613937854766846, -5.409529685974121, -5.2051215171813965, -5.000713348388672, -4.796304702758789, -4.5918965339660645, -4.38748836517334, -4.183080196380615, -3.9786720275878906, -3.774263381958008, -3.569855213165283, -3.3654468059539795, -3.161038637161255, -2.956630229949951, -2.7522220611572266, -2.547813892364502, -2.3434057235717773, -2.1389973163604736, -1.9345890283584595, -1.7301807403564453, -1.5257725715637207, -1.3213642835617065, -1.1169559955596924, -0.9125478267669678, -0.7081395387649536, -0.5037307739257812, -0.2993225157260895, -0.0949142575263977, 0.10949397087097168, 0.31390225887298584, 0.518310546875, 0.7227187156677246, 0.9271270036697388, 1.131535291671753, 1.335943579673767, 1.5403518676757812, 1.7447600364685059, 1.94916832447052, 2.153576612472534, 2.357984781265259, 2.5623931884765625, 2.766801357269287, 2.9712095260620117, 3.1756179332733154, 3.38002610206604, 3.5844345092773438, 3.7888426780700684, 3.993250846862793, 4.197659015655518, 4.402067184448242, 4.606475353240967, 4.810883522033691, 5.015292167663574, 5.219700336456299, 5.424108505249023, 5.628516674041748, 5.832924842834473, 6.0373334884643555]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 7.0, 9.0, 5.0, 12.0, 15.0, 18.0, 18.0, 23.0, 27.0, 35.0, 41.0, 57.0, 51.0, 46.0, 64.0, 52.0, 59.0, 52.0, 60.0, 56.0, 42.0, 63.0, 37.0, 34.0, 28.0, 27.0, 20.0, 12.0, 7.0, 12.0, 6.0, 4.0, 4.0, 6.0, 1.0, 3.0, 0.0, 1.0], "bins": [-1.8850996494293213, -1.8407809734344482, -1.7964622974395752, -1.7521436214447021, -1.707824945449829, -1.6635061502456665, -1.6191874742507935, -1.5748687982559204, -1.5305501222610474, -1.4862314462661743, -1.4419127702713013, -1.3975940942764282, -1.3532752990722656, -1.3089566230773926, -1.2646379470825195, -1.2203192710876465, -1.1760005950927734, -1.1316819190979004, -1.0873632431030273, -1.0430445671081543, -0.9987258315086365, -0.9544071555137634, -0.9100884199142456, -0.8657697439193726, -0.8214510679244995, -0.7771323919296265, -0.7328137159347534, -0.6884949803352356, -0.6441763043403625, -0.5998576283454895, -0.5555388927459717, -0.5112202167510986, -0.46690165996551514, -0.4225829839706421, -0.37826427817344666, -0.3339455723762512, -0.2896268963813782, -0.24530820548534393, -0.2009895145893097, -0.15667080879211426, -0.11235213279724121, -0.06803344190120697, -0.02371475100517273, 0.02060393989086151, 0.06492263078689575, 0.10924132168292999, 0.15356001257896423, 0.19787871837615967, 0.24219739437103271, 0.28651607036590576, 0.3308347761631012, 0.37515348196029663, 0.4194721579551697, 0.4637908339500427, 0.5081095695495605, 0.5524282455444336, 0.5967469215393066, 0.6410655975341797, 0.6853842735290527, 0.7297030091285706, 0.7740216851234436, 0.8183403611183167, 0.8626590967178345, 0.9069777727127075, 0.9512964487075806]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 10.0, 4.0, 4.0, 10.0, 17.0, 17.0, 30.0, 41.0, 64.0, 122.0, 198.0, 361.0, 811.0, 2608.0, 13573.0, 167036.0, 728369.0, 120701.0, 10729.0, 2232.0, 769.0, 344.0, 193.0, 105.0, 63.0, 42.0, 34.0, 18.0, 12.0, 16.0, 10.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.80859375, -1.747161865234375, -1.68572998046875, -1.624298095703125, -1.5628662109375, -1.501434326171875, -1.44000244140625, -1.378570556640625, -1.317138671875, -1.255706787109375, -1.19427490234375, -1.132843017578125, -1.0714111328125, -1.009979248046875, -0.94854736328125, -0.887115478515625, -0.82568359375, -0.764251708984375, -0.70281982421875, -0.641387939453125, -0.5799560546875, -0.518524169921875, -0.45709228515625, -0.395660400390625, -0.334228515625, -0.272796630859375, -0.21136474609375, -0.149932861328125, -0.0885009765625, -0.027069091796875, 0.03436279296875, 0.095794677734375, 0.1572265625, 0.218658447265625, 0.28009033203125, 0.341522216796875, 0.4029541015625, 0.464385986328125, 0.52581787109375, 0.587249755859375, 0.648681640625, 0.710113525390625, 0.77154541015625, 0.832977294921875, 0.8944091796875, 0.955841064453125, 1.01727294921875, 1.078704833984375, 1.14013671875, 1.201568603515625, 1.26300048828125, 1.324432373046875, 1.3858642578125, 1.447296142578125, 1.50872802734375, 1.570159912109375, 1.631591796875, 1.693023681640625, 1.75445556640625, 1.815887451171875, 1.8773193359375, 1.938751220703125, 2.00018310546875, 2.061614990234375, 2.123046875]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0, 3.0, 4.0, 5.0, 8.0, 11.0, 19.0, 21.0, 14.0, 35.0, 48.0, 42.0, 56.0, 47.0, 54.0, 62.0, 61.0, 70.0, 59.0, 77.0, 60.0, 46.0, 37.0, 37.0, 23.0, 29.0, 13.0, 19.0, 13.0, 11.0, 9.0, 2.0, 4.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.2744140625, -0.2673912048339844, -0.26036834716796875, -0.2533454895019531, -0.2463226318359375, -0.23929977416992188, -0.23227691650390625, -0.22525405883789062, -0.218231201171875, -0.21120834350585938, -0.20418548583984375, -0.19716262817382812, -0.1901397705078125, -0.18311691284179688, -0.17609405517578125, -0.16907119750976562, -0.16204833984375, -0.15502548217773438, -0.14800262451171875, -0.14097976684570312, -0.1339569091796875, -0.12693405151367188, -0.11991119384765625, -0.11288833618164062, -0.105865478515625, -0.09884262084960938, -0.09181976318359375, -0.08479690551757812, -0.0777740478515625, -0.07075119018554688, -0.06372833251953125, -0.056705474853515625, -0.0496826171875, -0.042659759521484375, -0.03563690185546875, -0.028614044189453125, -0.0215911865234375, -0.014568328857421875, -0.00754547119140625, -0.000522613525390625, 0.006500244140625, 0.013523101806640625, 0.02054595947265625, 0.027568817138671875, 0.0345916748046875, 0.041614532470703125, 0.04863739013671875, 0.055660247802734375, 0.06268310546875, 0.06970596313476562, 0.07672882080078125, 0.08375167846679688, 0.0907745361328125, 0.09779739379882812, 0.10482025146484375, 0.11184310913085938, 0.118865966796875, 0.12588882446289062, 0.13291168212890625, 0.13993453979492188, 0.1469573974609375, 0.15398025512695312, 0.16100311279296875, 0.16802597045898438, 0.175048828125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 3.0, 3.0, 4.0, 1.0, 8.0, 11.0, 10.0, 14.0, 18.0, 32.0, 34.0, 38.0, 92.0, 74.0, 126.0, 149.0, 234.0, 333.0, 509.0, 996.0, 2424.0, 6659.0, 23159.0, 93412.0, 323846.0, 405042.0, 140982.0, 34531.0, 9527.0, 3110.0, 1298.0, 623.0, 361.0, 246.0, 176.0, 120.0, 89.0, 82.0, 39.0, 51.0, 30.0, 17.0, 14.0, 11.0, 13.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.6611328125, -0.63677978515625, -0.6124267578125, -0.58807373046875, -0.563720703125, -0.53936767578125, -0.5150146484375, -0.49066162109375, -0.46630859375, -0.44195556640625, -0.4176025390625, -0.39324951171875, -0.368896484375, -0.34454345703125, -0.3201904296875, -0.29583740234375, -0.271484375, -0.24713134765625, -0.2227783203125, -0.19842529296875, -0.174072265625, -0.14971923828125, -0.1253662109375, -0.10101318359375, -0.07666015625, -0.05230712890625, -0.0279541015625, -0.00360107421875, 0.020751953125, 0.04510498046875, 0.0694580078125, 0.09381103515625, 0.1181640625, 0.14251708984375, 0.1668701171875, 0.19122314453125, 0.215576171875, 0.23992919921875, 0.2642822265625, 0.28863525390625, 0.31298828125, 0.33734130859375, 0.3616943359375, 0.38604736328125, 0.410400390625, 0.43475341796875, 0.4591064453125, 0.48345947265625, 0.5078125, 0.53216552734375, 0.5565185546875, 0.58087158203125, 0.605224609375, 0.62957763671875, 0.6539306640625, 0.67828369140625, 0.70263671875, 0.72698974609375, 0.7513427734375, 0.77569580078125, 0.800048828125, 0.82440185546875, 0.8487548828125, 0.87310791015625, 0.8974609375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 7.0, 5.0, 4.0, 14.0, 9.0, 5.0, 11.0, 10.0, 25.0, 38.0, 31.0, 33.0, 30.0, 38.0, 48.0, 49.0, 42.0, 66.0, 63.0, 39.0, 40.0, 45.0, 40.0, 42.0, 32.0, 40.0, 41.0, 38.0, 24.0, 17.0, 18.0, 15.0, 17.0, 6.0, 6.0, 5.0, 8.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69091796875, -0.6640548706054688, -0.6371917724609375, -0.6103286743164062, -0.583465576171875, -0.5566024780273438, -0.5297393798828125, -0.5028762817382812, -0.47601318359375, -0.44915008544921875, -0.4222869873046875, -0.39542388916015625, -0.368560791015625, -0.34169769287109375, -0.3148345947265625, -0.28797149658203125, -0.2611083984375, -0.23424530029296875, -0.2073822021484375, -0.18051910400390625, -0.153656005859375, -0.12679290771484375, -0.0999298095703125, -0.07306671142578125, -0.04620361328125, -0.01934051513671875, 0.0075225830078125, 0.03438568115234375, 0.061248779296875, 0.08811187744140625, 0.1149749755859375, 0.14183807373046875, 0.168701171875, 0.19556427001953125, 0.2224273681640625, 0.24929046630859375, 0.276153564453125, 0.30301666259765625, 0.3298797607421875, 0.35674285888671875, 0.38360595703125, 0.41046905517578125, 0.4373321533203125, 0.46419525146484375, 0.491058349609375, 0.5179214477539062, 0.5447845458984375, 0.5716476440429688, 0.5985107421875, 0.6253738403320312, 0.6522369384765625, 0.6791000366210938, 0.705963134765625, 0.7328262329101562, 0.7596893310546875, 0.7865524291992188, 0.81341552734375, 0.8402786254882812, 0.8671417236328125, 0.8940048217773438, 0.920867919921875, 0.9477310180664062, 0.9745941162109375, 1.0014572143554688, 1.0283203125]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 4.0, 15.0, 10.0, 15.0, 19.0, 29.0, 53.0, 97.0, 207.0, 374.0, 974.0, 3120.0, 16426.0, 168183.0, 723187.0, 118795.0, 12899.0, 2554.0, 844.0, 380.0, 167.0, 91.0, 55.0, 23.0, 8.0, 8.0, 3.0, 7.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68994140625, -0.6679916381835938, -0.6460418701171875, -0.6240921020507812, -0.602142333984375, -0.5801925659179688, -0.5582427978515625, -0.5362930297851562, -0.51434326171875, -0.49239349365234375, -0.4704437255859375, -0.44849395751953125, -0.426544189453125, -0.40459442138671875, -0.3826446533203125, -0.36069488525390625, -0.3387451171875, -0.31679534912109375, -0.2948455810546875, -0.27289581298828125, -0.250946044921875, -0.22899627685546875, -0.2070465087890625, -0.18509674072265625, -0.16314697265625, -0.14119720458984375, -0.1192474365234375, -0.09729766845703125, -0.075347900390625, -0.05339813232421875, -0.0314483642578125, -0.00949859619140625, 0.012451171875, 0.03440093994140625, 0.0563507080078125, 0.07830047607421875, 0.100250244140625, 0.12220001220703125, 0.1441497802734375, 0.16609954833984375, 0.18804931640625, 0.20999908447265625, 0.2319488525390625, 0.25389862060546875, 0.275848388671875, 0.29779815673828125, 0.3197479248046875, 0.34169769287109375, 0.3636474609375, 0.38559722900390625, 0.4075469970703125, 0.42949676513671875, 0.451446533203125, 0.47339630126953125, 0.4953460693359375, 0.5172958374023438, 0.53924560546875, 0.5611953735351562, 0.5831451416015625, 0.6050949096679688, 0.627044677734375, 0.6489944458007812, 0.6709442138671875, 0.6928939819335938, 0.71484375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 9.0, 6.0, 10.0, 16.0, 16.0, 22.0, 37.0, 45.0, 44.0, 72.0, 90.0, 99.0, 104.0, 90.0, 62.0, 55.0, 59.0, 27.0, 24.0, 23.0, 17.0, 15.0, 13.0, 9.0, 5.0, 3.0, 5.0, 7.0, 2.0, 5.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.00010752677917480469, -0.00010478124022483826, -0.00010203570127487183, -9.92901623249054e-05, -9.654462337493896e-05, -9.379908442497253e-05, -9.10535454750061e-05, -8.830800652503967e-05, -8.556246757507324e-05, -8.281692862510681e-05, -8.007138967514038e-05, -7.732585072517395e-05, -7.458031177520752e-05, -7.183477282524109e-05, -6.908923387527466e-05, -6.634369492530823e-05, -6.35981559753418e-05, -6.0852617025375366e-05, -5.8107078075408936e-05, -5.5361539125442505e-05, -5.2616000175476074e-05, -4.9870461225509644e-05, -4.712492227554321e-05, -4.437938332557678e-05, -4.163384437561035e-05, -3.888830542564392e-05, -3.614276647567749e-05, -3.339722752571106e-05, -3.065168857574463e-05, -2.7906149625778198e-05, -2.5160610675811768e-05, -2.2415071725845337e-05, -1.9669532775878906e-05, -1.6923993825912476e-05, -1.4178454875946045e-05, -1.1432915925979614e-05, -8.687376976013184e-06, -5.941838026046753e-06, -3.1962990760803223e-06, -4.507601261138916e-07, 2.294778823852539e-06, 5.04031777381897e-06, 7.7858567237854e-06, 1.0531395673751831e-05, 1.3276934623718262e-05, 1.6022473573684692e-05, 1.8768012523651123e-05, 2.1513551473617554e-05, 2.4259090423583984e-05, 2.7004629373550415e-05, 2.9750168323516846e-05, 3.2495707273483276e-05, 3.524124622344971e-05, 3.798678517341614e-05, 4.073232412338257e-05, 4.3477863073349e-05, 4.622340202331543e-05, 4.896894097328186e-05, 5.171447992324829e-05, 5.446001887321472e-05, 5.720555782318115e-05, 5.995109677314758e-05, 6.269663572311401e-05, 6.544217467308044e-05, 6.818771362304688e-05]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 1.0, 11.0, 12.0, 35.0, 52.0, 123.0, 180.0, 530.0, 1973.0, 15414.0, 380342.0, 618642.0, 27328.0, 2744.0, 650.0, 268.0, 123.0, 59.0, 27.0, 16.0, 12.0, 5.0, 2.0, 6.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.2646484375, -1.2338790893554688, -1.2031097412109375, -1.1723403930664062, -1.141571044921875, -1.1108016967773438, -1.0800323486328125, -1.0492630004882812, -1.01849365234375, -0.9877243041992188, -0.9569549560546875, -0.9261856079101562, -0.895416259765625, -0.8646469116210938, -0.8338775634765625, -0.8031082153320312, -0.7723388671875, -0.7415695190429688, -0.7108001708984375, -0.6800308227539062, -0.649261474609375, -0.6184921264648438, -0.5877227783203125, -0.5569534301757812, -0.52618408203125, -0.49541473388671875, -0.4646453857421875, -0.43387603759765625, -0.403106689453125, -0.37233734130859375, -0.3415679931640625, -0.31079864501953125, -0.280029296875, -0.24925994873046875, -0.2184906005859375, -0.18772125244140625, -0.156951904296875, -0.12618255615234375, -0.0954132080078125, -0.06464385986328125, -0.03387451171875, -0.00310516357421875, 0.0276641845703125, 0.05843353271484375, 0.089202880859375, 0.11997222900390625, 0.1507415771484375, 0.18151092529296875, 0.2122802734375, 0.24304962158203125, 0.2738189697265625, 0.30458831787109375, 0.335357666015625, 0.36612701416015625, 0.3968963623046875, 0.42766571044921875, 0.45843505859375, 0.48920440673828125, 0.5199737548828125, 0.5507431030273438, 0.581512451171875, 0.6122817993164062, 0.6430511474609375, 0.6738204956054688, 0.70458984375]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 4.0, 6.0, 3.0, 14.0, 13.0, 27.0, 32.0, 40.0, 49.0, 68.0, 89.0, 93.0, 117.0, 102.0, 80.0, 69.0, 53.0, 44.0, 35.0, 19.0, 12.0, 10.0, 5.0, 7.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5556640625, -0.53912353515625, -0.5225830078125, -0.50604248046875, -0.489501953125, -0.47296142578125, -0.4564208984375, -0.43988037109375, -0.42333984375, -0.40679931640625, -0.3902587890625, -0.37371826171875, -0.357177734375, -0.34063720703125, -0.3240966796875, -0.30755615234375, -0.291015625, -0.27447509765625, -0.2579345703125, -0.24139404296875, -0.224853515625, -0.20831298828125, -0.1917724609375, -0.17523193359375, -0.15869140625, -0.14215087890625, -0.1256103515625, -0.10906982421875, -0.092529296875, -0.07598876953125, -0.0594482421875, -0.04290771484375, -0.0263671875, -0.00982666015625, 0.0067138671875, 0.02325439453125, 0.039794921875, 0.05633544921875, 0.0728759765625, 0.08941650390625, 0.10595703125, 0.12249755859375, 0.1390380859375, 0.15557861328125, 0.172119140625, 0.18865966796875, 0.2052001953125, 0.22174072265625, 0.23828125, 0.25482177734375, 0.2713623046875, 0.28790283203125, 0.304443359375, 0.32098388671875, 0.3375244140625, 0.35406494140625, 0.37060546875, 0.38714599609375, 0.4036865234375, 0.42022705078125, 0.436767578125, 0.45330810546875, 0.4698486328125, 0.48638916015625, 0.5029296875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 3.0, 5.0, 13.0, 28.0, 69.0, 89.0, 131.0, 165.0, 172.0, 145.0, 91.0, 46.0, 36.0, 8.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-13.52730941772461, -13.234052658081055, -12.940794944763184, -12.647537231445312, -12.354280471801758, -12.061023712158203, -11.767765998840332, -11.474508285522461, -11.181251525878906, -10.887994766235352, -10.59473705291748, -10.30147933959961, -10.008222579956055, -9.7149658203125, -9.421708106994629, -9.128450393676758, -8.835193634033203, -8.541936874389648, -8.248679161071777, -7.9554219245910645, -7.662164688110352, -7.368907451629639, -7.075650215148926, -6.782392978668213, -6.4891357421875, -6.195878505706787, -5.902621269226074, -5.609364032745361, -5.316106796264648, -5.0228495597839355, -4.729592323303223, -4.43633508682251, -4.1430768966674805, -3.8498196601867676, -3.5565624237060547, -3.263305187225342, -2.970047950744629, -2.676790714263916, -2.383533477783203, -2.0902762413024902, -1.7970190048217773, -1.5037617683410645, -1.2105045318603516, -0.9172472953796387, -0.6239900588989258, -0.3307328224182129, -0.0374755859375, 0.2557816505432129, 0.5490388870239258, 0.8422961235046387, 1.1355533599853516, 1.4288105964660645, 1.7220678329467773, 2.0153250694274902, 2.308582305908203, 2.601839542388916, 2.895096778869629, 3.188354015350342, 3.4816112518310547, 3.7748684883117676, 4.0681257247924805, 4.361382961273193, 4.654640197753906, 4.947897434234619, 5.241154670715332]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 5.0, 4.0, 8.0, 9.0, 8.0, 14.0, 17.0, 25.0, 24.0, 41.0, 25.0, 31.0, 30.0, 54.0, 39.0, 34.0, 38.0, 42.0, 42.0, 50.0, 51.0, 36.0, 41.0, 36.0, 45.0, 38.0, 27.0, 35.0, 22.0, 22.0, 19.0, 20.0, 12.0, 18.0, 13.0, 10.0, 8.0, 3.0, 5.0, 4.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6102945804595947, -3.4762771129608154, -3.3422598838806152, -3.208242416381836, -3.0742249488830566, -2.9402074813842773, -2.806190013885498, -2.672172784805298, -2.5381553173065186, -2.4041378498077393, -2.270120620727539, -2.1361031532287598, -2.0020856857299805, -1.8680682182312012, -1.7340508699417114, -1.6000335216522217, -1.4660160541534424, -1.331998586654663, -1.1979812383651733, -1.0639638900756836, -0.9299464225769043, -0.7959290146827698, -0.6619116067886353, -0.5278941988945007, -0.3938767910003662, -0.2598593831062317, -0.12584197521209717, 0.008175432682037354, 0.14219284057617188, 0.2762102484703064, 0.4102276563644409, 0.5442450642585754, 0.6782627105712891, 0.8122801184654236, 0.9462975263595581, 1.0803148746490479, 1.2143323421478271, 1.3483498096466064, 1.4823671579360962, 1.616384506225586, 1.7504019737243652, 1.8844194412231445, 2.018436908721924, 2.152454137802124, 2.2864716053009033, 2.4204890727996826, 2.554506301879883, 2.688523769378662, 2.8225412368774414, 2.9565587043762207, 3.090576171875, 3.2245934009552, 3.3586108684539795, 3.492628335952759, 3.626645565032959, 3.7606630325317383, 3.8946805000305176, 4.028697967529297, 4.162715435028076, 4.2967329025268555, 4.430749893188477, 4.564767360687256, 4.698784828186035, 4.8328022956848145, 4.966819763183594]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [4.0, 3.0, 3.0, 1.0, 3.0, 8.0, 13.0, 9.0, 15.0, 29.0, 27.0, 59.0, 99.0, 127.0, 305.0, 772.0, 3422.0, 33518.0, 4007929.0, 137681.0, 7688.0, 1531.0, 540.0, 234.0, 115.0, 62.0, 36.0, 23.0, 11.0, 12.0, 4.0, 8.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.908203125, -1.806182861328125, -1.70416259765625, -1.602142333984375, -1.5001220703125, -1.398101806640625, -1.29608154296875, -1.194061279296875, -1.092041015625, -0.990020751953125, -0.88800048828125, -0.785980224609375, -0.6839599609375, -0.581939697265625, -0.47991943359375, -0.377899169921875, -0.27587890625, -0.173858642578125, -0.07183837890625, 0.030181884765625, 0.1322021484375, 0.234222412109375, 0.33624267578125, 0.438262939453125, 0.540283203125, 0.642303466796875, 0.74432373046875, 0.846343994140625, 0.9483642578125, 1.050384521484375, 1.15240478515625, 1.254425048828125, 1.3564453125, 1.458465576171875, 1.56048583984375, 1.662506103515625, 1.7645263671875, 1.866546630859375, 1.96856689453125, 2.070587158203125, 2.172607421875, 2.274627685546875, 2.37664794921875, 2.478668212890625, 2.5806884765625, 2.682708740234375, 2.78472900390625, 2.886749267578125, 2.98876953125, 3.090789794921875, 3.19281005859375, 3.294830322265625, 3.3968505859375, 3.498870849609375, 3.60089111328125, 3.702911376953125, 3.804931640625, 3.906951904296875, 4.00897216796875, 4.110992431640625, 4.2130126953125, 4.315032958984375, 4.41705322265625, 4.519073486328125, 4.62109375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 3.0, 13.0, 15.0, 13.0, 21.0, 30.0, 46.0, 41.0, 42.0, 43.0, 50.0, 67.0, 53.0, 62.0, 73.0, 53.0, 61.0, 55.0, 36.0, 37.0, 41.0, 31.0, 26.0, 22.0, 14.0, 14.0, 11.0, 3.0, 8.0, 7.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.284912109375, -0.2776031494140625, -0.270294189453125, -0.2629852294921875, -0.25567626953125, -0.2483673095703125, -0.241058349609375, -0.2337493896484375, -0.2264404296875, -0.2191314697265625, -0.211822509765625, -0.2045135498046875, -0.19720458984375, -0.1898956298828125, -0.182586669921875, -0.1752777099609375, -0.16796875, -0.1606597900390625, -0.153350830078125, -0.1460418701171875, -0.13873291015625, -0.1314239501953125, -0.124114990234375, -0.1168060302734375, -0.1094970703125, -0.1021881103515625, -0.094879150390625, -0.0875701904296875, -0.08026123046875, -0.0729522705078125, -0.065643310546875, -0.0583343505859375, -0.051025390625, -0.0437164306640625, -0.036407470703125, -0.0290985107421875, -0.02178955078125, -0.0144805908203125, -0.007171630859375, 0.0001373291015625, 0.0074462890625, 0.0147552490234375, 0.022064208984375, 0.0293731689453125, 0.03668212890625, 0.0439910888671875, 0.051300048828125, 0.0586090087890625, 0.06591796875, 0.0732269287109375, 0.080535888671875, 0.0878448486328125, 0.09515380859375, 0.1024627685546875, 0.109771728515625, 0.1170806884765625, 0.1243896484375, 0.1316986083984375, 0.139007568359375, 0.1463165283203125, 0.15362548828125, 0.1609344482421875, 0.168243408203125, 0.1755523681640625, 0.182861328125]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 4.0, 9.0, 18.0, 29.0, 52.0, 94.0, 151.0, 282.0, 485.0, 792.0, 1529.0, 3131.0, 7602.0, 26910.0, 174221.0, 3700974.0, 230766.0, 31712.0, 8424.0, 3334.0, 1727.0, 873.0, 513.0, 268.0, 163.0, 97.0, 50.0, 34.0, 16.0, 9.0, 6.0, 7.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.27734375, -1.2368621826171875, -1.196380615234375, -1.1558990478515625, -1.11541748046875, -1.0749359130859375, -1.034454345703125, -0.9939727783203125, -0.9534912109375, -0.9130096435546875, -0.872528076171875, -0.8320465087890625, -0.79156494140625, -0.7510833740234375, -0.710601806640625, -0.6701202392578125, -0.629638671875, -0.5891571044921875, -0.548675537109375, -0.5081939697265625, -0.46771240234375, -0.4272308349609375, -0.386749267578125, -0.3462677001953125, -0.3057861328125, -0.2653045654296875, -0.224822998046875, -0.1843414306640625, -0.14385986328125, -0.1033782958984375, -0.062896728515625, -0.0224151611328125, 0.01806640625, 0.0585479736328125, 0.099029541015625, 0.1395111083984375, 0.17999267578125, 0.2204742431640625, 0.260955810546875, 0.3014373779296875, 0.3419189453125, 0.3824005126953125, 0.422882080078125, 0.4633636474609375, 0.50384521484375, 0.5443267822265625, 0.584808349609375, 0.6252899169921875, 0.665771484375, 0.7062530517578125, 0.746734619140625, 0.7872161865234375, 0.82769775390625, 0.8681793212890625, 0.908660888671875, 0.9491424560546875, 0.9896240234375, 1.0301055908203125, 1.070587158203125, 1.1110687255859375, 1.15155029296875, 1.1920318603515625, 1.232513427734375, 1.2729949951171875, 1.3134765625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 3.0, 1.0, 4.0, 7.0, 5.0, 4.0, 9.0, 11.0, 19.0, 29.0, 37.0, 96.0, 167.0, 463.0, 2191.0, 587.0, 194.0, 82.0, 57.0, 28.0, 29.0, 12.0, 16.0, 8.0, 7.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.66748046875, -0.6463394165039062, -0.6251983642578125, -0.6040573120117188, -0.582916259765625, -0.5617752075195312, -0.5406341552734375, -0.5194931030273438, -0.49835205078125, -0.47721099853515625, -0.4560699462890625, -0.43492889404296875, -0.413787841796875, -0.39264678955078125, -0.3715057373046875, -0.35036468505859375, -0.3292236328125, -0.30808258056640625, -0.2869415283203125, -0.26580047607421875, -0.244659423828125, -0.22351837158203125, -0.2023773193359375, -0.18123626708984375, -0.16009521484375, -0.13895416259765625, -0.1178131103515625, -0.09667205810546875, -0.075531005859375, -0.05438995361328125, -0.0332489013671875, -0.01210784912109375, 0.009033203125, 0.03017425537109375, 0.0513153076171875, 0.07245635986328125, 0.093597412109375, 0.11473846435546875, 0.1358795166015625, 0.15702056884765625, 0.17816162109375, 0.19930267333984375, 0.2204437255859375, 0.24158477783203125, 0.262725830078125, 0.28386688232421875, 0.3050079345703125, 0.32614898681640625, 0.3472900390625, 0.36843109130859375, 0.3895721435546875, 0.41071319580078125, 0.431854248046875, 0.45299530029296875, 0.4741363525390625, 0.49527740478515625, 0.51641845703125, 0.5375595092773438, 0.5587005615234375, 0.5798416137695312, 0.600982666015625, 0.6221237182617188, 0.6432647705078125, 0.6644058227539062, 0.685546875]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 7.0, 6.0, 20.0, 42.0, 119.0, 241.0, 247.0, 174.0, 80.0, 32.0, 18.0, 6.0, 4.0, 3.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0390124320983887, -2.8628294467926025, -2.6866466999053955, -2.5104637145996094, -2.3342809677124023, -2.158097982406616, -1.98191499710083, -1.8057321310043335, -1.629549264907837, -1.4533663988113403, -1.2771835327148438, -1.1010005474090576, -0.924817681312561, -0.7486348152160645, -0.5724518299102783, -0.39626896381378174, -0.22008609771728516, -0.043903201818466187, 0.13227969408035278, 0.30846261978149414, 0.4846454858779907, 0.6608283519744873, 0.8370113372802734, 1.01319420337677, 1.1893770694732666, 1.3655599355697632, 1.5417428016662598, 1.717925786972046, 1.8941086530685425, 2.070291519165039, 2.246474504470825, 2.4226574897766113, 2.59883975982666, 2.7750227451324463, 2.9512054920196533, 3.1273884773254395, 3.3035712242126465, 3.4797542095184326, 3.6559371948242188, 3.832119941711426, 4.008302688598633, 4.18448543548584, 4.360668659210205, 4.536851406097412, 4.713034152984619, 4.889217376708984, 5.065400123596191, 5.241582870483398, 5.417766094207764, 5.593948841094971, 5.770132064819336, 5.946314811706543, 6.12249755859375, 6.298680305480957, 6.474863529205322, 6.651046276092529, 6.8272294998168945, 7.003412246704102, 7.179595470428467, 7.355778217315674, 7.531960964202881, 7.708144187927246, 7.884326934814453, 8.06050968170166, 8.236692428588867]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 8.0, 10.0, 11.0, 6.0, 10.0, 15.0, 20.0, 27.0, 32.0, 37.0, 31.0, 37.0, 41.0, 45.0, 53.0, 57.0, 61.0, 56.0, 36.0, 50.0, 49.0, 62.0, 38.0, 46.0, 25.0, 20.0, 17.0, 24.0, 14.0, 9.0, 10.0, 11.0, 8.0, 4.0, 7.0, 6.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0], "bins": [-2.198601007461548, -2.1419484615325928, -2.0852956771850586, -2.0286431312561035, -1.9719904661178589, -1.9153378009796143, -1.8586851358413696, -1.802032470703125, -1.74537992477417, -1.6887272596359253, -1.6320745944976807, -1.5754220485687256, -1.518769383430481, -1.4621167182922363, -1.4054640531539917, -1.348811388015747, -1.2921587228775024, -1.2355060577392578, -1.1788533926010132, -1.1222007274627686, -1.0655481815338135, -1.0088955163955688, -0.9522428512573242, -0.8955901861190796, -0.8389375805854797, -0.7822849154472351, -0.7256323099136353, -0.6689796447753906, -0.612326979637146, -0.5556743741035461, -0.4990217089653015, -0.4423690736293793, -0.38571643829345703, -0.3290638029575348, -0.27241116762161255, -0.21575850248336792, -0.15910586714744568, -0.10245323181152344, -0.04580056667327881, 0.010852068662643433, 0.06750470399856567, 0.12415734678506851, 0.18080998957157135, 0.23746263980865479, 0.294115275144577, 0.35076791048049927, 0.4074205756187439, 0.46407321095466614, 0.5207258462905884, 0.577378511428833, 0.6340311169624329, 0.6906837821006775, 0.7473363876342773, 0.803989052772522, 0.8606417179107666, 0.9172943830490112, 0.9739469885826111, 1.030599594116211, 1.0872522592544556, 1.1439049243927002, 1.2005575895309448, 1.2572102546691895, 1.3138628005981445, 1.3705154657363892, 1.4271681308746338]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 5.0, 0.0, 3.0, 3.0, 6.0, 6.0, 6.0, 11.0, 12.0, 44.0, 41.0, 73.0, 149.0, 284.0, 931.0, 4438.0, 60018.0, 784877.0, 185853.0, 9443.0, 1491.0, 446.0, 176.0, 96.0, 49.0, 37.0, 21.0, 16.0, 11.0, 8.0, 4.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.681640625, -1.604034423828125, -1.52642822265625, -1.448822021484375, -1.3712158203125, -1.293609619140625, -1.21600341796875, -1.138397216796875, -1.060791015625, -0.983184814453125, -0.90557861328125, -0.827972412109375, -0.7503662109375, -0.672760009765625, -0.59515380859375, -0.517547607421875, -0.43994140625, -0.362335205078125, -0.28472900390625, -0.207122802734375, -0.1295166015625, -0.051910400390625, 0.02569580078125, 0.103302001953125, 0.180908203125, 0.258514404296875, 0.33612060546875, 0.413726806640625, 0.4913330078125, 0.568939208984375, 0.64654541015625, 0.724151611328125, 0.8017578125, 0.879364013671875, 0.95697021484375, 1.034576416015625, 1.1121826171875, 1.189788818359375, 1.26739501953125, 1.345001220703125, 1.422607421875, 1.500213623046875, 1.57781982421875, 1.655426025390625, 1.7330322265625, 1.810638427734375, 1.88824462890625, 1.965850830078125, 2.04345703125, 2.121063232421875, 2.19866943359375, 2.276275634765625, 2.3538818359375, 2.431488037109375, 2.50909423828125, 2.586700439453125, 2.664306640625, 2.741912841796875, 2.81951904296875, 2.897125244140625, 2.9747314453125, 3.052337646484375, 3.12994384765625, 3.207550048828125, 3.28515625]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 4.0, 3.0, 5.0, 6.0, 6.0, 9.0, 19.0, 15.0, 26.0, 24.0, 27.0, 53.0, 49.0, 44.0, 50.0, 57.0, 58.0, 66.0, 55.0, 50.0, 48.0, 40.0, 47.0, 45.0, 41.0, 43.0, 23.0, 20.0, 14.0, 13.0, 11.0, 10.0, 10.0, 8.0, 1.0, 2.0, 3.0, 6.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.324462890625, -0.3163471221923828, -0.3082313537597656, -0.30011558532714844, -0.29199981689453125, -0.28388404846191406, -0.2757682800292969, -0.2676525115966797, -0.2595367431640625, -0.2514209747314453, -0.24330520629882812, -0.23518943786621094, -0.22707366943359375, -0.21895790100097656, -0.21084213256835938, -0.2027263641357422, -0.194610595703125, -0.1864948272705078, -0.17837905883789062, -0.17026329040527344, -0.16214752197265625, -0.15403175354003906, -0.14591598510742188, -0.1378002166748047, -0.1296844482421875, -0.12156867980957031, -0.11345291137695312, -0.10533714294433594, -0.09722137451171875, -0.08910560607910156, -0.08098983764648438, -0.07287406921386719, -0.06475830078125, -0.05664253234863281, -0.048526763916015625, -0.04041099548339844, -0.03229522705078125, -0.024179458618164062, -0.016063690185546875, -0.007947921752929688, 0.0001678466796875, 0.008283615112304688, 0.016399383544921875, 0.024515151977539062, 0.03263092041015625, 0.04074668884277344, 0.048862457275390625, 0.05697822570800781, 0.065093994140625, 0.07320976257324219, 0.08132553100585938, 0.08944129943847656, 0.09755706787109375, 0.10567283630371094, 0.11378860473632812, 0.12190437316894531, 0.1300201416015625, 0.1381359100341797, 0.14625167846679688, 0.15436744689941406, 0.16248321533203125, 0.17059898376464844, 0.17871475219726562, 0.1868305206298828, 0.1949462890625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 12.0, 5.0, 5.0, 13.0, 13.0, 19.0, 28.0, 45.0, 51.0, 93.0, 137.0, 167.0, 279.0, 431.0, 740.0, 1432.0, 3328.0, 8900.0, 31183.0, 127677.0, 440195.0, 323096.0, 78818.0, 20176.0, 6400.0, 2446.0, 1151.0, 587.0, 387.0, 214.0, 137.0, 107.0, 91.0, 55.0, 46.0, 26.0, 13.0, 13.0, 11.0, 6.0, 3.0, 8.0, 3.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.9423828125, -0.9136810302734375, -0.884979248046875, -0.8562774658203125, -0.82757568359375, -0.7988739013671875, -0.770172119140625, -0.7414703369140625, -0.7127685546875, -0.6840667724609375, -0.655364990234375, -0.6266632080078125, -0.59796142578125, -0.5692596435546875, -0.540557861328125, -0.5118560791015625, -0.483154296875, -0.4544525146484375, -0.425750732421875, -0.3970489501953125, -0.36834716796875, -0.3396453857421875, -0.310943603515625, -0.2822418212890625, -0.2535400390625, -0.2248382568359375, -0.196136474609375, -0.1674346923828125, -0.13873291015625, -0.1100311279296875, -0.081329345703125, -0.0526275634765625, -0.02392578125, 0.0047760009765625, 0.033477783203125, 0.0621795654296875, 0.09088134765625, 0.1195831298828125, 0.148284912109375, 0.1769866943359375, 0.2056884765625, 0.2343902587890625, 0.263092041015625, 0.2917938232421875, 0.32049560546875, 0.3491973876953125, 0.377899169921875, 0.4066009521484375, 0.435302734375, 0.4640045166015625, 0.492706298828125, 0.5214080810546875, 0.55010986328125, 0.5788116455078125, 0.607513427734375, 0.6362152099609375, 0.6649169921875, 0.6936187744140625, 0.722320556640625, 0.7510223388671875, 0.77972412109375, 0.8084259033203125, 0.837127685546875, 0.8658294677734375, 0.89453125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 9.0, 9.0, 4.0, 6.0, 13.0, 24.0, 21.0, 22.0, 32.0, 29.0, 43.0, 44.0, 62.0, 56.0, 64.0, 60.0, 51.0, 58.0, 40.0, 64.0, 41.0, 38.0, 25.0, 35.0, 28.0, 28.0, 21.0, 15.0, 11.0, 18.0, 9.0, 5.0, 6.0, 2.0, 4.0, 2.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.056640625, -1.0169525146484375, -0.977264404296875, -0.9375762939453125, -0.89788818359375, -0.8582000732421875, -0.818511962890625, -0.7788238525390625, -0.7391357421875, -0.6994476318359375, -0.659759521484375, -0.6200714111328125, -0.58038330078125, -0.5406951904296875, -0.501007080078125, -0.4613189697265625, -0.421630859375, -0.3819427490234375, -0.342254638671875, -0.3025665283203125, -0.26287841796875, -0.2231903076171875, -0.183502197265625, -0.1438140869140625, -0.1041259765625, -0.0644378662109375, -0.024749755859375, 0.0149383544921875, 0.05462646484375, 0.0943145751953125, 0.134002685546875, 0.1736907958984375, 0.21337890625, 0.2530670166015625, 0.292755126953125, 0.3324432373046875, 0.37213134765625, 0.4118194580078125, 0.451507568359375, 0.4911956787109375, 0.5308837890625, 0.5705718994140625, 0.610260009765625, 0.6499481201171875, 0.68963623046875, 0.7293243408203125, 0.769012451171875, 0.8087005615234375, 0.848388671875, 0.8880767822265625, 0.927764892578125, 0.9674530029296875, 1.00714111328125, 1.0468292236328125, 1.086517333984375, 1.1262054443359375, 1.1658935546875, 1.2055816650390625, 1.245269775390625, 1.2849578857421875, 1.32464599609375, 1.3643341064453125, 1.404022216796875, 1.4437103271484375, 1.4833984375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0, 7.0, 8.0, 16.0, 11.0, 31.0, 26.0, 50.0, 66.0, 105.0, 183.0, 339.0, 707.0, 1606.0, 4135.0, 15294.0, 98310.0, 635416.0, 251044.0, 30160.0, 6753.0, 2284.0, 914.0, 475.0, 238.0, 120.0, 83.0, 46.0, 31.0, 19.0, 14.0, 18.0, 8.0, 8.0, 7.0, 6.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.6474609375, -0.6256637573242188, -0.6038665771484375, -0.5820693969726562, -0.560272216796875, -0.5384750366210938, -0.5166778564453125, -0.49488067626953125, -0.47308349609375, -0.45128631591796875, -0.4294891357421875, -0.40769195556640625, -0.385894775390625, -0.36409759521484375, -0.3423004150390625, -0.32050323486328125, -0.2987060546875, -0.27690887451171875, -0.2551116943359375, -0.23331451416015625, -0.211517333984375, -0.18972015380859375, -0.1679229736328125, -0.14612579345703125, -0.12432861328125, -0.10253143310546875, -0.0807342529296875, -0.05893707275390625, -0.037139892578125, -0.01534271240234375, 0.0064544677734375, 0.02825164794921875, 0.050048828125, 0.07184600830078125, 0.0936431884765625, 0.11544036865234375, 0.137237548828125, 0.15903472900390625, 0.1808319091796875, 0.20262908935546875, 0.22442626953125, 0.24622344970703125, 0.2680206298828125, 0.28981781005859375, 0.311614990234375, 0.33341217041015625, 0.3552093505859375, 0.37700653076171875, 0.3988037109375, 0.42060089111328125, 0.4423980712890625, 0.46419525146484375, 0.485992431640625, 0.5077896118164062, 0.5295867919921875, 0.5513839721679688, 0.57318115234375, 0.5949783325195312, 0.6167755126953125, 0.6385726928710938, 0.660369873046875, 0.6821670532226562, 0.7039642333984375, 0.7257614135742188, 0.74755859375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 7.0, 5.0, 10.0, 7.0, 10.0, 22.0, 29.0, 28.0, 37.0, 35.0, 43.0, 73.0, 62.0, 86.0, 103.0, 74.0, 73.0, 60.0, 55.0, 47.0, 33.0, 26.0, 23.0, 12.0, 5.0, 12.0, 6.0, 4.0, 6.0, 3.0, 3.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.279085159301758e-05, -8.021295070648193e-05, -7.763504981994629e-05, -7.505714893341064e-05, -7.2479248046875e-05, -6.990134716033936e-05, -6.732344627380371e-05, -6.474554538726807e-05, -6.216764450073242e-05, -5.958974361419678e-05, -5.701184272766113e-05, -5.443394184112549e-05, -5.1856040954589844e-05, -4.92781400680542e-05, -4.6700239181518555e-05, -4.412233829498291e-05, -4.1544437408447266e-05, -3.896653652191162e-05, -3.6388635635375977e-05, -3.381073474884033e-05, -3.123283386230469e-05, -2.8654932975769043e-05, -2.60770320892334e-05, -2.3499131202697754e-05, -2.092123031616211e-05, -1.8343329429626465e-05, -1.576542854309082e-05, -1.3187527656555176e-05, -1.0609626770019531e-05, -8.031725883483887e-06, -5.453824996948242e-06, -2.8759241104125977e-06, -2.980232238769531e-07, 2.2798776626586914e-06, 4.857778549194336e-06, 7.4356794357299805e-06, 1.0013580322265625e-05, 1.259148120880127e-05, 1.5169382095336914e-05, 1.774728298187256e-05, 2.0325183868408203e-05, 2.2903084754943848e-05, 2.5480985641479492e-05, 2.8058886528015137e-05, 3.063678741455078e-05, 3.3214688301086426e-05, 3.579258918762207e-05, 3.8370490074157715e-05, 4.094839096069336e-05, 4.3526291847229004e-05, 4.610419273376465e-05, 4.868209362030029e-05, 5.125999450683594e-05, 5.383789539337158e-05, 5.6415796279907227e-05, 5.899369716644287e-05, 6.157159805297852e-05, 6.414949893951416e-05, 6.67273998260498e-05, 6.930530071258545e-05, 7.18832015991211e-05, 7.446110248565674e-05, 7.703900337219238e-05, 7.961690425872803e-05, 8.219480514526367e-05]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 7.0, 14.0, 20.0, 34.0, 44.0, 109.0, 185.0, 461.0, 1311.0, 5401.0, 40717.0, 587404.0, 382345.0, 24939.0, 3840.0, 978.0, 385.0, 163.0, 77.0, 42.0, 33.0, 23.0, 8.0, 12.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.89013671875, -0.8622283935546875, -0.834320068359375, -0.8064117431640625, -0.77850341796875, -0.7505950927734375, -0.722686767578125, -0.6947784423828125, -0.6668701171875, -0.6389617919921875, -0.611053466796875, -0.5831451416015625, -0.55523681640625, -0.5273284912109375, -0.499420166015625, -0.4715118408203125, -0.443603515625, -0.4156951904296875, -0.387786865234375, -0.3598785400390625, -0.33197021484375, -0.3040618896484375, -0.276153564453125, -0.2482452392578125, -0.2203369140625, -0.1924285888671875, -0.164520263671875, -0.1366119384765625, -0.10870361328125, -0.0807952880859375, -0.052886962890625, -0.0249786376953125, 0.0029296875, 0.0308380126953125, 0.058746337890625, 0.0866546630859375, 0.11456298828125, 0.1424713134765625, 0.170379638671875, 0.1982879638671875, 0.2261962890625, 0.2541046142578125, 0.282012939453125, 0.3099212646484375, 0.33782958984375, 0.3657379150390625, 0.393646240234375, 0.4215545654296875, 0.449462890625, 0.4773712158203125, 0.505279541015625, 0.5331878662109375, 0.56109619140625, 0.5890045166015625, 0.616912841796875, 0.6448211669921875, 0.6727294921875, 0.7006378173828125, 0.728546142578125, 0.7564544677734375, 0.78436279296875, 0.8122711181640625, 0.840179443359375, 0.8680877685546875, 0.89599609375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 6.0, 4.0, 5.0, 6.0, 9.0, 19.0, 18.0, 27.0, 27.0, 36.0, 41.0, 52.0, 62.0, 78.0, 96.0, 87.0, 70.0, 67.0, 55.0, 42.0, 43.0, 28.0, 26.0, 12.0, 18.0, 16.0, 11.0, 7.0, 7.0, 4.0, 5.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.51904296875, -0.5043144226074219, -0.48958587646484375, -0.4748573303222656, -0.4601287841796875, -0.4454002380371094, -0.43067169189453125, -0.4159431457519531, -0.401214599609375, -0.3864860534667969, -0.37175750732421875, -0.3570289611816406, -0.3423004150390625, -0.3275718688964844, -0.31284332275390625, -0.2981147766113281, -0.28338623046875, -0.2686576843261719, -0.25392913818359375, -0.23920059204101562, -0.2244720458984375, -0.20974349975585938, -0.19501495361328125, -0.18028640747070312, -0.165557861328125, -0.15082931518554688, -0.13610076904296875, -0.12137222290039062, -0.1066436767578125, -0.09191513061523438, -0.07718658447265625, -0.062458038330078125, -0.0477294921875, -0.033000946044921875, -0.01827239990234375, -0.003543853759765625, 0.0111846923828125, 0.025913238525390625, 0.04064178466796875, 0.055370330810546875, 0.070098876953125, 0.08482742309570312, 0.09955596923828125, 0.11428451538085938, 0.1290130615234375, 0.14374160766601562, 0.15847015380859375, 0.17319869995117188, 0.18792724609375, 0.20265579223632812, 0.21738433837890625, 0.23211288452148438, 0.2468414306640625, 0.2615699768066406, 0.27629852294921875, 0.2910270690917969, 0.305755615234375, 0.3204841613769531, 0.33521270751953125, 0.3499412536621094, 0.3646697998046875, 0.3793983459472656, 0.39412689208984375, 0.4088554382324219, 0.423583984375]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 10.0, 19.0, 58.0, 179.0, 285.0, 288.0, 120.0, 37.0, 15.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.48061752319336, -26.85401725769043, -26.227418899536133, -25.600818634033203, -24.974220275878906, -24.347620010375977, -23.72102165222168, -23.09442138671875, -22.467823028564453, -21.841222763061523, -21.214624404907227, -20.588024139404297, -19.96142578125, -19.33482551574707, -18.708227157592773, -18.081626892089844, -17.455026626586914, -16.828426361083984, -16.201828002929688, -15.575228691101074, -14.948629379272461, -14.322029113769531, -13.695429801940918, -13.068830490112305, -12.442231178283691, -11.815631866455078, -11.189032554626465, -10.562433242797852, -9.935832977294922, -9.309234619140625, -8.682634353637695, -8.056035041809082, -7.429435729980469, -6.8028364181518555, -6.176237106323242, -5.549637317657471, -4.923038005828857, -4.296438694000244, -3.6698391437530518, -3.0432395935058594, -2.416640281677246, -1.7900408506393433, -1.1634414196014404, -0.5368419885635376, 0.08975744247436523, 0.7163567543029785, 1.342956304550171, 1.9695558547973633, 2.5961551666259766, 3.22275447845459, 3.8493540287017822, 4.475953578948975, 5.102552890777588, 5.729152202606201, 6.355751991271973, 6.982351303100586, 7.608950614929199, 8.235549926757812, 8.862149238586426, 9.488748550415039, 10.115348815917969, 10.741947174072266, 11.368547439575195, 11.995146751403809, 12.621746063232422]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 2.0, 5.0, 12.0, 9.0, 14.0, 18.0, 23.0, 13.0, 13.0, 21.0, 34.0, 26.0, 41.0, 36.0, 38.0, 43.0, 48.0, 41.0, 47.0, 55.0, 46.0, 52.0, 43.0, 37.0, 41.0, 31.0, 31.0, 24.0, 18.0, 23.0, 18.0, 22.0, 8.0, 12.0, 12.0, 6.0, 10.0, 6.0, 6.0, 7.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.342444896697998, -5.167723178863525, -4.993001461029053, -4.81827974319458, -4.643557548522949, -4.468835830688477, -4.294114112854004, -4.119392395019531, -3.9446706771850586, -3.769948959350586, -3.5952272415161133, -3.4205052852630615, -3.245783567428589, -3.071061849594116, -2.8963398933410645, -2.721618175506592, -2.546896457672119, -2.3721747398376465, -2.197453022003174, -2.022731065750122, -1.8480093479156494, -1.6732876300811768, -1.4985657930374146, -1.3238439559936523, -1.1491222381591797, -0.9744004607200623, -0.7996786832809448, -0.6249569058418274, -0.45023512840270996, -0.27551335096359253, -0.1007915735244751, 0.07393026351928711, 0.24865198135375977, 0.4233737587928772, 0.5980955362319946, 0.7728173136711121, 0.9475390911102295, 1.1222608089447021, 1.2969826459884644, 1.4717044830322266, 1.6464262008666992, 1.8211479187011719, 1.995869755744934, 2.1705915927886963, 2.345313310623169, 2.5200350284576416, 2.6947569847106934, 2.869478702545166, 3.0442004203796387, 3.2189221382141113, 3.393643856048584, 3.5683658123016357, 3.7430875301361084, 3.917809247970581, 4.092531204223633, 4.2672529220581055, 4.441974639892578, 4.616696357727051, 4.791418075561523, 4.966139793395996, 5.140861511230469, 5.3155837059021, 5.490305423736572, 5.665027141571045, 5.839748859405518]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 1.0, 6.0, 8.0, 2.0, 12.0, 13.0, 15.0, 15.0, 45.0, 78.0, 184.0, 549.0, 2347.0, 23426.0, 4143215.0, 21234.0, 2245.0, 535.0, 165.0, 70.0, 41.0, 27.0, 15.0, 8.0, 9.0, 7.0, 5.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.02734375, -3.916961669921875, -3.80657958984375, -3.696197509765625, -3.5858154296875, -3.475433349609375, -3.36505126953125, -3.254669189453125, -3.144287109375, -3.033905029296875, -2.92352294921875, -2.813140869140625, -2.7027587890625, -2.592376708984375, -2.48199462890625, -2.371612548828125, -2.26123046875, -2.150848388671875, -2.04046630859375, -1.930084228515625, -1.8197021484375, -1.709320068359375, -1.59893798828125, -1.488555908203125, -1.378173828125, -1.267791748046875, -1.15740966796875, -1.047027587890625, -0.9366455078125, -0.826263427734375, -0.71588134765625, -0.605499267578125, -0.4951171875, -0.384735107421875, -0.27435302734375, -0.163970947265625, -0.0535888671875, 0.056793212890625, 0.16717529296875, 0.277557373046875, 0.387939453125, 0.498321533203125, 0.60870361328125, 0.719085693359375, 0.8294677734375, 0.939849853515625, 1.05023193359375, 1.160614013671875, 1.27099609375, 1.381378173828125, 1.49176025390625, 1.602142333984375, 1.7125244140625, 1.822906494140625, 1.93328857421875, 2.043670654296875, 2.154052734375, 2.264434814453125, 2.37481689453125, 2.485198974609375, 2.5955810546875, 2.705963134765625, 2.81634521484375, 2.926727294921875, 3.037109375]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 3.0, 3.0, 12.0, 7.0, 11.0, 17.0, 31.0, 23.0, 43.0, 43.0, 40.0, 44.0, 56.0, 63.0, 78.0, 47.0, 51.0, 56.0, 40.0, 58.0, 45.0, 51.0, 28.0, 29.0, 21.0, 19.0, 20.0, 23.0, 13.0, 4.0, 6.0, 5.0, 1.0, 4.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.324462890625, -0.3150978088378906, -0.30573272705078125, -0.2963676452636719, -0.2870025634765625, -0.2776374816894531, -0.26827239990234375, -0.2589073181152344, -0.249542236328125, -0.24017715454101562, -0.23081207275390625, -0.22144699096679688, -0.2120819091796875, -0.20271682739257812, -0.19335174560546875, -0.18398666381835938, -0.17462158203125, -0.16525650024414062, -0.15589141845703125, -0.14652633666992188, -0.1371612548828125, -0.12779617309570312, -0.11843109130859375, -0.10906600952148438, -0.099700927734375, -0.09033584594726562, -0.08097076416015625, -0.07160568237304688, -0.0622406005859375, -0.052875518798828125, -0.04351043701171875, -0.034145355224609375, -0.0247802734375, -0.015415191650390625, -0.00605010986328125, 0.003314971923828125, 0.0126800537109375, 0.022045135498046875, 0.03141021728515625, 0.040775299072265625, 0.050140380859375, 0.059505462646484375, 0.06887054443359375, 0.07823562622070312, 0.0876007080078125, 0.09696578979492188, 0.10633087158203125, 0.11569595336914062, 0.12506103515625, 0.13442611694335938, 0.14379119873046875, 0.15315628051757812, 0.1625213623046875, 0.17188644409179688, 0.18125152587890625, 0.19061660766601562, 0.199981689453125, 0.20934677124023438, 0.21871185302734375, 0.22807693481445312, 0.2374420166015625, 0.24680709838867188, 0.25617218017578125, 0.2655372619628906, 0.27490234375]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 9.0, 2.0, 8.0, 11.0, 14.0, 18.0, 14.0, 35.0, 38.0, 73.0, 91.0, 145.0, 245.0, 424.0, 737.0, 1322.0, 2755.0, 5640.0, 13345.0, 45302.0, 3716111.0, 355705.0, 31970.0, 10579.0, 4707.0, 2249.0, 1134.0, 634.0, 358.0, 221.0, 122.0, 64.0, 35.0, 46.0, 40.0, 15.0, 19.0, 9.0, 9.0, 7.0, 3.0, 8.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.8994140625, -0.8712005615234375, -0.842987060546875, -0.8147735595703125, -0.78656005859375, -0.7583465576171875, -0.730133056640625, -0.7019195556640625, -0.6737060546875, -0.6454925537109375, -0.617279052734375, -0.5890655517578125, -0.56085205078125, -0.5326385498046875, -0.504425048828125, -0.4762115478515625, -0.447998046875, -0.4197845458984375, -0.391571044921875, -0.3633575439453125, -0.33514404296875, -0.3069305419921875, -0.278717041015625, -0.2505035400390625, -0.2222900390625, -0.1940765380859375, -0.165863037109375, -0.1376495361328125, -0.10943603515625, -0.0812225341796875, -0.053009033203125, -0.0247955322265625, 0.00341796875, 0.0316314697265625, 0.059844970703125, 0.0880584716796875, 0.11627197265625, 0.1444854736328125, 0.172698974609375, 0.2009124755859375, 0.2291259765625, 0.2573394775390625, 0.285552978515625, 0.3137664794921875, 0.34197998046875, 0.3701934814453125, 0.398406982421875, 0.4266204833984375, 0.454833984375, 0.4830474853515625, 0.511260986328125, 0.5394744873046875, 0.56768798828125, 0.5959014892578125, 0.624114990234375, 0.6523284912109375, 0.6805419921875, 0.7087554931640625, 0.736968994140625, 0.7651824951171875, 0.79339599609375, 0.8216094970703125, 0.849822998046875, 0.8780364990234375, 0.90625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 7.0, 3.0, 11.0, 10.0, 16.0, 17.0, 56.0, 227.0, 3362.0, 223.0, 62.0, 28.0, 18.0, 12.0, 6.0, 5.0, 6.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.428955078125, -0.4195270538330078, -0.4100990295410156, -0.40067100524902344, -0.39124298095703125, -0.38181495666503906, -0.3723869323730469, -0.3629589080810547, -0.3535308837890625, -0.3441028594970703, -0.3346748352050781, -0.32524681091308594, -0.31581878662109375, -0.30639076232910156, -0.2969627380371094, -0.2875347137451172, -0.278106689453125, -0.2686786651611328, -0.2592506408691406, -0.24982261657714844, -0.24039459228515625, -0.23096656799316406, -0.22153854370117188, -0.2121105194091797, -0.2026824951171875, -0.1932544708251953, -0.18382644653320312, -0.17439842224121094, -0.16497039794921875, -0.15554237365722656, -0.14611434936523438, -0.1366863250732422, -0.12725830078125, -0.11783027648925781, -0.10840225219726562, -0.09897422790527344, -0.08954620361328125, -0.08011817932128906, -0.07069015502929688, -0.06126213073730469, -0.0518341064453125, -0.04240608215332031, -0.032978057861328125, -0.023550033569335938, -0.01412200927734375, -0.0046939849853515625, 0.004734039306640625, 0.014162063598632812, 0.023590087890625, 0.03301811218261719, 0.042446136474609375, 0.05187416076660156, 0.06130218505859375, 0.07073020935058594, 0.08015823364257812, 0.08958625793457031, 0.0990142822265625, 0.10844230651855469, 0.11787033081054688, 0.12729835510253906, 0.13672637939453125, 0.14615440368652344, 0.15558242797851562, 0.1650104522705078, 0.1744384765625]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 7.0, 17.0, 77.0, 220.0, 305.0, 227.0, 113.0, 26.0, 11.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7380779385566711, -0.6724780201911926, -0.6068781018257141, -0.5412781834602356, -0.4756782650947571, -0.41007834672927856, -0.34447842836380005, -0.27887850999832153, -0.21327859163284302, -0.1476786732673645, -0.08207875490188599, -0.01647883653640747, 0.049121081829071045, 0.11472100019454956, 0.18032091856002808, 0.2459208369255066, 0.3115207552909851, 0.3771206736564636, 0.44272059202194214, 0.5083205103874207, 0.5739204287528992, 0.6395203471183777, 0.7051202654838562, 0.7707201838493347, 0.8363201022148132, 0.9019200205802917, 0.9675199389457703, 1.0331199169158936, 1.098719835281372, 1.1643197536468506, 1.229919672012329, 1.2955195903778076, 1.3611195087432861, 1.4267194271087646, 1.4923193454742432, 1.5579192638397217, 1.6235191822052002, 1.6891191005706787, 1.7547190189361572, 1.8203189373016357, 1.8859188556671143, 1.9515187740325928, 2.0171186923980713, 2.08271861076355, 2.1483185291290283, 2.213918447494507, 2.2795183658599854, 2.345118284225464, 2.4107182025909424, 2.476318120956421, 2.5419180393218994, 2.607517957687378, 2.6731178760528564, 2.738717794418335, 2.8043177127838135, 2.869917631149292, 2.9355175495147705, 3.001117467880249, 3.0667173862457275, 3.132317304611206, 3.1979172229766846, 3.263517141342163, 3.3291170597076416, 3.39471697807312, 3.4603168964385986]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 7.0, 1.0, 5.0, 4.0, 8.0, 8.0, 7.0, 12.0, 10.0, 22.0, 22.0, 25.0, 25.0, 32.0, 33.0, 36.0, 31.0, 39.0, 41.0, 35.0, 52.0, 50.0, 41.0, 39.0, 37.0, 40.0, 31.0, 42.0, 42.0, 29.0, 34.0, 29.0, 19.0, 20.0, 13.0, 15.0, 14.0, 12.0, 9.0, 11.0, 12.0, 4.0, 2.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 1.0], "bins": [-0.5773084759712219, -0.5618777275085449, -0.5464469790458679, -0.5310162305831909, -0.5155854225158691, -0.5001546740531921, -0.48472392559051514, -0.46929317712783813, -0.45386242866516113, -0.43843168020248413, -0.42300093173980713, -0.40757015347480774, -0.39213940501213074, -0.37670865654945374, -0.36127787828445435, -0.34584712982177734, -0.33041638135910034, -0.31498563289642334, -0.29955488443374634, -0.28412410616874695, -0.26869335770606995, -0.25326260924339294, -0.23783184587955475, -0.22240108251571655, -0.20697033405303955, -0.19153958559036255, -0.17610882222652435, -0.16067805886268616, -0.14524731040000916, -0.12981656193733215, -0.11438579857349396, -0.09895504266023636, -0.08352428674697876, -0.06809353083372116, -0.05266277492046356, -0.03723201900720596, -0.021801263093948364, -0.006370507180690765, 0.009060248732566833, 0.024491004645824432, 0.03992176055908203, 0.05535251647233963, 0.07078327238559723, 0.08621402829885483, 0.10164478421211243, 0.11707554012537003, 0.13250629603862762, 0.14793705940246582, 0.16336780786514282, 0.17879855632781982, 0.19422931969165802, 0.20966008305549622, 0.22509083151817322, 0.24052157998085022, 0.2559523582458496, 0.2713831067085266, 0.2868138551712036, 0.3022446036338806, 0.3176753520965576, 0.333106130361557, 0.348536878824234, 0.363967627286911, 0.3793984055519104, 0.3948291540145874, 0.4102599024772644]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 7.0, 5.0, 8.0, 6.0, 10.0, 10.0, 21.0, 31.0, 53.0, 68.0, 104.0, 222.0, 461.0, 1440.0, 5779.0, 37468.0, 322714.0, 567646.0, 96527.0, 12112.0, 2440.0, 731.0, 340.0, 131.0, 84.0, 41.0, 39.0, 24.0, 8.0, 15.0, 7.0, 5.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.84375, -1.788238525390625, -1.73272705078125, -1.677215576171875, -1.6217041015625, -1.566192626953125, -1.51068115234375, -1.455169677734375, -1.399658203125, -1.344146728515625, -1.28863525390625, -1.233123779296875, -1.1776123046875, -1.122100830078125, -1.06658935546875, -1.011077880859375, -0.95556640625, -0.900054931640625, -0.84454345703125, -0.789031982421875, -0.7335205078125, -0.678009033203125, -0.62249755859375, -0.566986083984375, -0.511474609375, -0.455963134765625, -0.40045166015625, -0.344940185546875, -0.2894287109375, -0.233917236328125, -0.17840576171875, -0.122894287109375, -0.0673828125, -0.011871337890625, 0.04364013671875, 0.099151611328125, 0.1546630859375, 0.210174560546875, 0.26568603515625, 0.321197509765625, 0.376708984375, 0.432220458984375, 0.48773193359375, 0.543243408203125, 0.5987548828125, 0.654266357421875, 0.70977783203125, 0.765289306640625, 0.82080078125, 0.876312255859375, 0.93182373046875, 0.987335205078125, 1.0428466796875, 1.098358154296875, 1.15386962890625, 1.209381103515625, 1.264892578125, 1.320404052734375, 1.37591552734375, 1.431427001953125, 1.4869384765625, 1.542449951171875, 1.59796142578125, 1.653472900390625, 1.708984375]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 8.0, 4.0, 6.0, 12.0, 11.0, 16.0, 23.0, 25.0, 39.0, 33.0, 31.0, 33.0, 40.0, 50.0, 49.0, 48.0, 72.0, 40.0, 54.0, 44.0, 49.0, 40.0, 36.0, 38.0, 37.0, 28.0, 25.0, 23.0, 18.0, 19.0, 10.0, 16.0, 6.0, 8.0, 3.0, 0.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.319091796875, -0.3104724884033203, -0.3018531799316406, -0.29323387145996094, -0.28461456298828125, -0.27599525451660156, -0.2673759460449219, -0.2587566375732422, -0.2501373291015625, -0.2415180206298828, -0.23289871215820312, -0.22427940368652344, -0.21566009521484375, -0.20704078674316406, -0.19842147827148438, -0.1898021697998047, -0.181182861328125, -0.1725635528564453, -0.16394424438476562, -0.15532493591308594, -0.14670562744140625, -0.13808631896972656, -0.12946701049804688, -0.12084770202636719, -0.1122283935546875, -0.10360908508300781, -0.09498977661132812, -0.08637046813964844, -0.07775115966796875, -0.06913185119628906, -0.060512542724609375, -0.05189323425292969, -0.04327392578125, -0.03465461730957031, -0.026035308837890625, -0.017416000366210938, -0.00879669189453125, -0.0001773834228515625, 0.008441925048828125, 0.017061233520507812, 0.0256805419921875, 0.03429985046386719, 0.042919158935546875, 0.05153846740722656, 0.06015777587890625, 0.06877708435058594, 0.07739639282226562, 0.08601570129394531, 0.094635009765625, 0.10325431823730469, 0.11187362670898438, 0.12049293518066406, 0.12911224365234375, 0.13773155212402344, 0.14635086059570312, 0.1549701690673828, 0.1635894775390625, 0.1722087860107422, 0.18082809448242188, 0.18944740295410156, 0.19806671142578125, 0.20668601989746094, 0.21530532836914062, 0.2239246368408203, 0.2325439453125]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 1.0, 5.0, 5.0, 6.0, 10.0, 9.0, 20.0, 32.0, 36.0, 57.0, 73.0, 130.0, 207.0, 320.0, 697.0, 1553.0, 4919.0, 21906.0, 149015.0, 631015.0, 200586.0, 28424.0, 5897.0, 1837.0, 796.0, 378.0, 213.0, 129.0, 101.0, 77.0, 35.0, 23.0, 21.0, 9.0, 5.0, 3.0, 3.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2294921875, -1.18646240234375, -1.1434326171875, -1.10040283203125, -1.057373046875, -1.01434326171875, -0.9713134765625, -0.92828369140625, -0.88525390625, -0.84222412109375, -0.7991943359375, -0.75616455078125, -0.713134765625, -0.67010498046875, -0.6270751953125, -0.58404541015625, -0.541015625, -0.49798583984375, -0.4549560546875, -0.41192626953125, -0.368896484375, -0.32586669921875, -0.2828369140625, -0.23980712890625, -0.19677734375, -0.15374755859375, -0.1107177734375, -0.06768798828125, -0.024658203125, 0.01837158203125, 0.0614013671875, 0.10443115234375, 0.1474609375, 0.19049072265625, 0.2335205078125, 0.27655029296875, 0.319580078125, 0.36260986328125, 0.4056396484375, 0.44866943359375, 0.49169921875, 0.53472900390625, 0.5777587890625, 0.62078857421875, 0.663818359375, 0.70684814453125, 0.7498779296875, 0.79290771484375, 0.8359375, 0.87896728515625, 0.9219970703125, 0.96502685546875, 1.008056640625, 1.05108642578125, 1.0941162109375, 1.13714599609375, 1.18017578125, 1.22320556640625, 1.2662353515625, 1.30926513671875, 1.352294921875, 1.39532470703125, 1.4383544921875, 1.48138427734375, 1.5244140625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 6.0, 8.0, 4.0, 9.0, 6.0, 10.0, 12.0, 14.0, 18.0, 18.0, 31.0, 19.0, 21.0, 27.0, 34.0, 42.0, 36.0, 34.0, 45.0, 56.0, 52.0, 43.0, 47.0, 39.0, 37.0, 29.0, 48.0, 30.0, 31.0, 25.0, 32.0, 29.0, 20.0, 16.0, 20.0, 16.0, 9.0, 6.0, 6.0, 4.0, 7.0, 3.0, 4.0, 2.0, 3.0, 0.0, 2.0, 1.0, 2.0], "bins": [-1.2685546875, -1.2338943481445312, -1.1992340087890625, -1.1645736694335938, -1.129913330078125, -1.0952529907226562, -1.0605926513671875, -1.0259323120117188, -0.99127197265625, -0.9566116333007812, -0.9219512939453125, -0.8872909545898438, -0.852630615234375, -0.8179702758789062, -0.7833099365234375, -0.7486495971679688, -0.7139892578125, -0.6793289184570312, -0.6446685791015625, -0.6100082397460938, -0.575347900390625, -0.5406875610351562, -0.5060272216796875, -0.47136688232421875, -0.43670654296875, -0.40204620361328125, -0.3673858642578125, -0.33272552490234375, -0.298065185546875, -0.26340484619140625, -0.2287445068359375, -0.19408416748046875, -0.159423828125, -0.12476348876953125, -0.0901031494140625, -0.05544281005859375, -0.020782470703125, 0.01387786865234375, 0.0485382080078125, 0.08319854736328125, 0.11785888671875, 0.15251922607421875, 0.1871795654296875, 0.22183990478515625, 0.256500244140625, 0.29116058349609375, 0.3258209228515625, 0.36048126220703125, 0.3951416015625, 0.42980194091796875, 0.4644622802734375, 0.49912261962890625, 0.533782958984375, 0.5684432983398438, 0.6031036376953125, 0.6377639770507812, 0.67242431640625, 0.7070846557617188, 0.7417449951171875, 0.7764053344726562, 0.811065673828125, 0.8457260131835938, 0.8803863525390625, 0.9150466918945312, 0.94970703125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 6.0, 5.0, 8.0, 12.0, 5.0, 12.0, 14.0, 24.0, 27.0, 40.0, 53.0, 59.0, 93.0, 140.0, 221.0, 354.0, 656.0, 1548.0, 5648.0, 61164.0, 922030.0, 48167.0, 5045.0, 1551.0, 638.0, 365.0, 182.0, 128.0, 93.0, 61.0, 56.0, 32.0, 26.0, 25.0, 21.0, 7.0, 8.0, 10.0, 5.0, 4.0, 2.0, 4.0, 3.0, 3.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.6806640625, -1.6304473876953125, -1.580230712890625, -1.5300140380859375, -1.47979736328125, -1.4295806884765625, -1.379364013671875, -1.3291473388671875, -1.2789306640625, -1.2287139892578125, -1.178497314453125, -1.1282806396484375, -1.07806396484375, -1.0278472900390625, -0.977630615234375, -0.9274139404296875, -0.877197265625, -0.8269805908203125, -0.776763916015625, -0.7265472412109375, -0.67633056640625, -0.6261138916015625, -0.575897216796875, -0.5256805419921875, -0.4754638671875, -0.4252471923828125, -0.375030517578125, -0.3248138427734375, -0.27459716796875, -0.2243804931640625, -0.174163818359375, -0.1239471435546875, -0.07373046875, -0.0235137939453125, 0.026702880859375, 0.0769195556640625, 0.12713623046875, 0.1773529052734375, 0.227569580078125, 0.2777862548828125, 0.3280029296875, 0.3782196044921875, 0.428436279296875, 0.4786529541015625, 0.52886962890625, 0.5790863037109375, 0.629302978515625, 0.6795196533203125, 0.729736328125, 0.7799530029296875, 0.830169677734375, 0.8803863525390625, 0.93060302734375, 0.9808197021484375, 1.031036376953125, 1.0812530517578125, 1.1314697265625, 1.1816864013671875, 1.231903076171875, 1.2821197509765625, 1.33233642578125, 1.3825531005859375, 1.432769775390625, 1.4829864501953125, 1.533203125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 6.0, 4.0, 13.0, 10.0, 17.0, 47.0, 84.0, 86.0, 115.0, 171.0, 143.0, 114.0, 70.0, 54.0, 29.0, 24.0, 14.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011461973190307617, -0.0001095188781619072, -0.00010441802442073822, -9.931717067956924e-05, -9.421631693840027e-05, -8.911546319723129e-05, -8.401460945606232e-05, -7.891375571489334e-05, -7.381290197372437e-05, -6.871204823255539e-05, -6.361119449138641e-05, -5.851034075021744e-05, -5.340948700904846e-05, -4.8308633267879486e-05, -4.320777952671051e-05, -3.8106925785541534e-05, -3.300607204437256e-05, -2.7905218303203583e-05, -2.2804364562034607e-05, -1.770351082086563e-05, -1.2602657079696655e-05, -7.5018033385276794e-06, -2.4009495973587036e-06, 2.6999041438102722e-06, 7.800757884979248e-06, 1.2901611626148224e-05, 1.80024653673172e-05, 2.3103319108486176e-05, 2.820417284965515e-05, 3.330502659082413e-05, 3.84058803319931e-05, 4.350673407316208e-05, 4.8607587814331055e-05, 5.370844155550003e-05, 5.8809295296669006e-05, 6.391014903783798e-05, 6.901100277900696e-05, 7.411185652017593e-05, 7.921271026134491e-05, 8.431356400251389e-05, 8.941441774368286e-05, 9.451527148485184e-05, 9.961612522602081e-05, 0.00010471697896718979, 0.00010981783270835876, 0.00011491868644952774, 0.00012001954019069672, 0.0001251203939318657, 0.00013022124767303467, 0.00013532210141420364, 0.00014042295515537262, 0.0001455238088965416, 0.00015062466263771057, 0.00015572551637887955, 0.00016082637012004852, 0.0001659272238612175, 0.00017102807760238647, 0.00017612893134355545, 0.00018122978508472443, 0.0001863306388258934, 0.00019143149256706238, 0.00019653234630823135, 0.00020163320004940033, 0.0002067340537905693, 0.00021183490753173828]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 1.0, 9.0, 9.0, 16.0, 23.0, 28.0, 47.0, 70.0, 108.0, 238.0, 436.0, 1182.0, 3956.0, 42088.0, 948907.0, 45061.0, 4161.0, 1154.0, 460.0, 253.0, 129.0, 75.0, 44.0, 25.0, 22.0, 15.0, 11.0, 9.0, 5.0, 4.0, 3.0, 7.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.73828125, -1.681304931640625, -1.62432861328125, -1.567352294921875, -1.5103759765625, -1.453399658203125, -1.39642333984375, -1.339447021484375, -1.282470703125, -1.225494384765625, -1.16851806640625, -1.111541748046875, -1.0545654296875, -0.997589111328125, -0.94061279296875, -0.883636474609375, -0.82666015625, -0.769683837890625, -0.71270751953125, -0.655731201171875, -0.5987548828125, -0.541778564453125, -0.48480224609375, -0.427825927734375, -0.370849609375, -0.313873291015625, -0.25689697265625, -0.199920654296875, -0.1429443359375, -0.085968017578125, -0.02899169921875, 0.027984619140625, 0.0849609375, 0.141937255859375, 0.19891357421875, 0.255889892578125, 0.3128662109375, 0.369842529296875, 0.42681884765625, 0.483795166015625, 0.540771484375, 0.597747802734375, 0.65472412109375, 0.711700439453125, 0.7686767578125, 0.825653076171875, 0.88262939453125, 0.939605712890625, 0.99658203125, 1.053558349609375, 1.11053466796875, 1.167510986328125, 1.2244873046875, 1.281463623046875, 1.33843994140625, 1.395416259765625, 1.452392578125, 1.509368896484375, 1.56634521484375, 1.623321533203125, 1.6802978515625, 1.737274169921875, 1.79425048828125, 1.851226806640625, 1.908203125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 4.0, 10.0, 22.0, 58.0, 173.0, 296.0, 248.0, 111.0, 37.0, 13.0, 10.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.96875, -2.8922882080078125, -2.815826416015625, -2.7393646240234375, -2.66290283203125, -2.5864410400390625, -2.509979248046875, -2.4335174560546875, -2.3570556640625, -2.2805938720703125, -2.204132080078125, -2.1276702880859375, -2.05120849609375, -1.9747467041015625, -1.898284912109375, -1.8218231201171875, -1.745361328125, -1.6688995361328125, -1.592437744140625, -1.5159759521484375, -1.43951416015625, -1.3630523681640625, -1.286590576171875, -1.2101287841796875, -1.1336669921875, -1.0572052001953125, -0.980743408203125, -0.9042816162109375, -0.82781982421875, -0.7513580322265625, -0.674896240234375, -0.5984344482421875, -0.52197265625, -0.4455108642578125, -0.369049072265625, -0.2925872802734375, -0.21612548828125, -0.1396636962890625, -0.063201904296875, 0.0132598876953125, 0.0897216796875, 0.1661834716796875, 0.242645263671875, 0.3191070556640625, 0.39556884765625, 0.4720306396484375, 0.548492431640625, 0.6249542236328125, 0.701416015625, 0.7778778076171875, 0.854339599609375, 0.9308013916015625, 1.00726318359375, 1.0837249755859375, 1.160186767578125, 1.2366485595703125, 1.3131103515625, 1.3895721435546875, 1.466033935546875, 1.5424957275390625, 1.61895751953125, 1.6954193115234375, 1.771881103515625, 1.8483428955078125, 1.9248046875]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 5.0, 7.0, 17.0, 20.0, 18.0, 34.0, 44.0, 56.0, 78.0, 83.0, 96.0, 89.0, 72.0, 75.0, 79.0, 68.0, 38.0, 27.0, 24.0, 18.0, 13.0, 13.0, 9.0, 2.0, 4.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.188946723937988, -5.946016311645508, -5.703085899353027, -5.460155010223389, -5.217224597930908, -4.974294185638428, -4.731363296508789, -4.488432884216309, -4.245502471923828, -4.002572059631348, -3.759641408920288, -3.5167107582092285, -3.273780345916748, -3.0308499336242676, -2.787919282913208, -2.5449886322021484, -2.302058219909668, -2.0591278076171875, -1.816197156906128, -1.573266625404358, -1.330336093902588, -1.0874055624008179, -0.8444750308990479, -0.6015444993972778, -0.3586139678955078, -0.11568343639373779, 0.12724709510803223, 0.37017762660980225, 0.6131081581115723, 0.8560386896133423, 1.0989692211151123, 1.3418997526168823, 1.5848302841186523, 1.8277608156204224, 2.0706913471221924, 2.313621997833252, 2.5565524101257324, 2.799482822418213, 3.0424134731292725, 3.285344123840332, 3.5282745361328125, 3.771204948425293, 4.014135360717773, 4.257066249847412, 4.499996662139893, 4.742927074432373, 4.985857963562012, 5.228788375854492, 5.471718788146973, 5.714649200439453, 5.957579612731934, 6.200510501861572, 6.443440914154053, 6.686371326446533, 6.929302215576172, 7.172232627868652, 7.415163040161133, 7.658093452453613, 7.901023864746094, 8.143954277038574, 8.386884689331055, 8.629816055297852, 8.872746467590332, 9.115676879882812, 9.358607292175293]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 2.0, 3.0, 4.0, 11.0, 6.0, 7.0, 12.0, 7.0, 14.0, 10.0, 11.0, 15.0, 15.0, 22.0, 19.0, 32.0, 25.0, 34.0, 20.0, 30.0, 34.0, 44.0, 54.0, 49.0, 33.0, 47.0, 34.0, 41.0, 35.0, 40.0, 37.0, 37.0, 22.0, 27.0, 27.0, 25.0, 13.0, 21.0, 19.0, 12.0, 9.0, 8.0, 6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-6.115711212158203, -5.9148054122924805, -5.713899612426758, -5.512993812561035, -5.312088489532471, -5.111182689666748, -4.910276889801025, -4.709371089935303, -4.50846529006958, -4.307559490203857, -4.106653690338135, -3.905748128890991, -3.7048423290252686, -3.503936767578125, -3.3030309677124023, -3.1021251678466797, -2.901219606399536, -2.7003138065338135, -2.49940824508667, -2.2985024452209473, -2.0975966453552246, -1.8966909646987915, -1.6957852840423584, -1.4948794841766357, -1.2939738035202026, -1.0930681228637695, -0.8921623229980469, -0.6912566423416138, -0.4903509020805359, -0.289445161819458, -0.0885394811630249, 0.11236631870269775, 0.31327199935913086, 0.5141777396202087, 0.7150834798812866, 0.9159891605377197, 1.1168949604034424, 1.3178006410598755, 1.5187063217163086, 1.7196121215820312, 1.9205178022384644, 2.1214234828948975, 2.32232928276062, 2.5232348442077637, 2.7241406440734863, 2.925046443939209, 3.1259522438049316, 3.3268580436706543, 3.527763605117798, 3.7286694049835205, 3.929574966430664, 4.130480766296387, 4.331386566162109, 4.532292366027832, 4.733198165893555, 4.934103965759277, 5.135009288787842, 5.3359150886535645, 5.536820888519287, 5.737726211547852, 5.938632011413574, 6.139537811279297, 6.3404436111450195, 6.541349411010742, 6.742255210876465]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 7.0, 2.0, 5.0, 8.0, 18.0, 20.0, 66.0, 120.0, 332.0, 1494.0, 18217.0, 4154248.0, 17811.0, 1415.0, 320.0, 104.0, 40.0, 26.0, 16.0, 8.0, 2.0, 6.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.713623046875, -4.56396484375, -4.414306640625, -4.2646484375, -4.114990234375, -3.96533203125, -3.815673828125, -3.666015625, -3.516357421875, -3.36669921875, -3.217041015625, -3.0673828125, -2.917724609375, -2.76806640625, -2.618408203125, -2.46875, -2.319091796875, -2.16943359375, -2.019775390625, -1.8701171875, -1.720458984375, -1.57080078125, -1.421142578125, -1.271484375, -1.121826171875, -0.97216796875, -0.822509765625, -0.6728515625, -0.523193359375, -0.37353515625, -0.223876953125, -0.07421875, 0.075439453125, 0.22509765625, 0.374755859375, 0.5244140625, 0.674072265625, 0.82373046875, 0.973388671875, 1.123046875, 1.272705078125, 1.42236328125, 1.572021484375, 1.7216796875, 1.871337890625, 2.02099609375, 2.170654296875, 2.3203125, 2.469970703125, 2.61962890625, 2.769287109375, 2.9189453125, 3.068603515625, 3.21826171875, 3.367919921875, 3.517578125, 3.667236328125, 3.81689453125, 3.966552734375, 4.1162109375, 4.265869140625, 4.41552734375, 4.565185546875, 4.71484375]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 5.0, 3.0, 12.0, 8.0, 10.0, 15.0, 22.0, 20.0, 22.0, 27.0, 39.0, 42.0, 46.0, 57.0, 53.0, 54.0, 57.0, 68.0, 57.0, 54.0, 44.0, 40.0, 60.0, 28.0, 30.0, 31.0, 21.0, 22.0, 6.0, 10.0, 9.0, 15.0, 1.0, 6.0, 5.0, 0.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32373046875, -0.3129768371582031, -0.30222320556640625, -0.2914695739746094, -0.2807159423828125, -0.2699623107910156, -0.25920867919921875, -0.24845504760742188, -0.237701416015625, -0.22694778442382812, -0.21619415283203125, -0.20544052124023438, -0.1946868896484375, -0.18393325805664062, -0.17317962646484375, -0.16242599487304688, -0.15167236328125, -0.14091873168945312, -0.13016510009765625, -0.11941146850585938, -0.1086578369140625, -0.09790420532226562, -0.08715057373046875, -0.07639694213867188, -0.065643310546875, -0.054889678955078125, -0.04413604736328125, -0.033382415771484375, -0.0226287841796875, -0.011875152587890625, -0.00112152099609375, 0.009632110595703125, 0.0203857421875, 0.031139373779296875, 0.04189300537109375, 0.052646636962890625, 0.0634002685546875, 0.07415390014648438, 0.08490753173828125, 0.09566116333007812, 0.106414794921875, 0.11716842651367188, 0.12792205810546875, 0.13867568969726562, 0.1494293212890625, 0.16018295288085938, 0.17093658447265625, 0.18169021606445312, 0.19244384765625, 0.20319747924804688, 0.21395111083984375, 0.22470474243164062, 0.2354583740234375, 0.24621200561523438, 0.25696563720703125, 0.2677192687988281, 0.278472900390625, 0.2892265319824219, 0.29998016357421875, 0.3107337951660156, 0.3214874267578125, 0.3322410583496094, 0.34299468994140625, 0.3537483215332031, 0.364501953125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 6.0, 5.0, 6.0, 9.0, 11.0, 24.0, 34.0, 46.0, 62.0, 80.0, 93.0, 185.0, 264.0, 412.0, 541.0, 834.0, 1339.0, 2209.0, 3597.0, 7278.0, 16084.0, 47382.0, 324438.0, 3642325.0, 99036.0, 25084.0, 10224.0, 4990.0, 2809.0, 1690.0, 1075.0, 661.0, 460.0, 310.0, 211.0, 153.0, 90.0, 58.0, 51.0, 36.0, 24.0, 21.0, 18.0, 6.0, 11.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.8447265625, -0.8167877197265625, -0.788848876953125, -0.7609100341796875, -0.73297119140625, -0.7050323486328125, -0.677093505859375, -0.6491546630859375, -0.6212158203125, -0.5932769775390625, -0.565338134765625, -0.5373992919921875, -0.50946044921875, -0.4815216064453125, -0.453582763671875, -0.4256439208984375, -0.397705078125, -0.3697662353515625, -0.341827392578125, -0.3138885498046875, -0.28594970703125, -0.2580108642578125, -0.230072021484375, -0.2021331787109375, -0.1741943359375, -0.1462554931640625, -0.118316650390625, -0.0903778076171875, -0.06243896484375, -0.0345001220703125, -0.006561279296875, 0.0213775634765625, 0.04931640625, 0.0772552490234375, 0.105194091796875, 0.1331329345703125, 0.16107177734375, 0.1890106201171875, 0.216949462890625, 0.2448883056640625, 0.2728271484375, 0.3007659912109375, 0.328704833984375, 0.3566436767578125, 0.38458251953125, 0.4125213623046875, 0.440460205078125, 0.4683990478515625, 0.496337890625, 0.5242767333984375, 0.552215576171875, 0.5801544189453125, 0.60809326171875, 0.6360321044921875, 0.663970947265625, 0.6919097900390625, 0.7198486328125, 0.7477874755859375, 0.775726318359375, 0.8036651611328125, 0.83160400390625, 0.8595428466796875, 0.887481689453125, 0.9154205322265625, 0.943359375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 4.0, 7.0, 3.0, 4.0, 5.0, 13.0, 13.0, 30.0, 52.0, 116.0, 319.0, 3034.0, 250.0, 94.0, 46.0, 25.0, 14.0, 9.0, 18.0, 3.0, 3.0, 5.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.375732421875, -0.364166259765625, -0.35260009765625, -0.341033935546875, -0.3294677734375, -0.317901611328125, -0.30633544921875, -0.294769287109375, -0.283203125, -0.271636962890625, -0.26007080078125, -0.248504638671875, -0.2369384765625, -0.225372314453125, -0.21380615234375, -0.202239990234375, -0.190673828125, -0.179107666015625, -0.16754150390625, -0.155975341796875, -0.1444091796875, -0.132843017578125, -0.12127685546875, -0.109710693359375, -0.09814453125, -0.086578369140625, -0.07501220703125, -0.063446044921875, -0.0518798828125, -0.040313720703125, -0.02874755859375, -0.017181396484375, -0.005615234375, 0.005950927734375, 0.01751708984375, 0.029083251953125, 0.0406494140625, 0.052215576171875, 0.06378173828125, 0.075347900390625, 0.0869140625, 0.098480224609375, 0.11004638671875, 0.121612548828125, 0.1331787109375, 0.144744873046875, 0.15631103515625, 0.167877197265625, 0.179443359375, 0.191009521484375, 0.20257568359375, 0.214141845703125, 0.2257080078125, 0.237274169921875, 0.24884033203125, 0.260406494140625, 0.27197265625, 0.283538818359375, 0.29510498046875, 0.306671142578125, 0.3182373046875, 0.329803466796875, 0.34136962890625, 0.352935791015625, 0.364501953125]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 15.0, 74.0, 280.0, 433.0, 172.0, 27.0, 9.0, 4.0], "bins": [-9.228479385375977, -9.073238372802734, -8.917998313903809, -8.762757301330566, -8.607516288757324, -8.452276229858398, -8.297035217285156, -8.141794204711914, -7.98655366897583, -7.831313133239746, -7.676072120666504, -7.52083158493042, -7.365591049194336, -7.210350036621094, -7.05510950088501, -6.899868965148926, -6.744627952575684, -6.5893874168396, -6.434146404266357, -6.278905868530273, -6.1236653327941895, -5.968424320220947, -5.813183784484863, -5.657942771911621, -5.502702713012695, -5.347462177276611, -5.192221164703369, -5.036980628967285, -4.881740093231201, -4.726499080657959, -4.571258544921875, -4.416017532348633, -4.260776519775391, -4.105535984039307, -3.9502952098846436, -3.7950544357299805, -3.6398136615753174, -3.4845728874206543, -3.3293323516845703, -3.1740915775299072, -3.0188510417938232, -2.86361026763916, -2.708369731903076, -2.553128957748413, -2.39788818359375, -2.242647647857666, -2.087406873703003, -1.9321660995483398, -1.7769255638122559, -1.6216849088668823, -1.4664441347122192, -1.3112034797668457, -1.1559627056121826, -1.000722050666809, -0.8454813957214355, -0.6902406215667725, -0.5349999070167542, -0.37975919246673584, -0.22451850771903992, -0.069277822971344, 0.08596289157867432, 0.24120360612869263, 0.39644426107406616, 0.5516850352287292, 0.7069256901741028]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 2.0, 8.0, 6.0, 9.0, 9.0, 13.0, 16.0, 15.0, 11.0, 14.0, 33.0, 24.0, 29.0, 22.0, 36.0, 35.0, 29.0, 38.0, 40.0, 37.0, 43.0, 36.0, 36.0, 39.0, 48.0, 42.0, 23.0, 33.0, 36.0, 27.0, 34.0, 22.0, 19.0, 19.0, 22.0, 15.0, 12.0, 19.0, 12.0, 8.0, 8.0, 8.0, 3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.6587540507316589, -0.6377313137054443, -0.6167086362838745, -0.5956858992576599, -0.5746631622314453, -0.5536404848098755, -0.5326177477836609, -0.5115950107574463, -0.4905723035335541, -0.46954959630966187, -0.44852685928344727, -0.42750415205955505, -0.40648144483566284, -0.38545870780944824, -0.36443600058555603, -0.3434132933616638, -0.3223905563354492, -0.301367849111557, -0.2803451120853424, -0.2593224048614502, -0.2382996827363968, -0.21727696061134338, -0.19625425338745117, -0.17523153126239777, -0.15420880913734436, -0.13318608701229095, -0.11216337233781815, -0.09114065766334534, -0.07011793553829193, -0.049095213413238525, -0.028072506189346313, -0.007049784064292908, 0.013972878456115723, 0.03499559685587883, 0.05601831525564194, 0.07704102993011475, 0.09806375205516815, 0.11908647418022156, 0.14010918140411377, 0.16113190352916718, 0.18215462565422058, 0.203177347779274, 0.2242000699043274, 0.2452227771282196, 0.2662454843521118, 0.2872682213783264, 0.30829092860221863, 0.32931363582611084, 0.35033637285232544, 0.37135908007621765, 0.39238181710243225, 0.41340452432632446, 0.43442726135253906, 0.4554499685764313, 0.4764726758003235, 0.4974954128265381, 0.5185180902481079, 0.5395408272743225, 0.5605635046958923, 0.5815862417221069, 0.6026089787483215, 0.6236317157745361, 0.644654393196106, 0.6656771302223206, 0.6866998672485352]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 4.0, 2.0, 6.0, 4.0, 9.0, 10.0, 6.0, 18.0, 20.0, 17.0, 29.0, 41.0, 61.0, 84.0, 156.0, 220.0, 402.0, 733.0, 1519.0, 3526.0, 9054.0, 25921.0, 81428.0, 239638.0, 376133.0, 206335.0, 67336.0, 21712.0, 7838.0, 3155.0, 1415.0, 681.0, 398.0, 217.0, 125.0, 77.0, 64.0, 52.0, 31.0, 23.0, 10.0, 9.0, 7.0, 7.0, 6.0, 6.0, 5.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0], "bins": [-0.99609375, -0.9671554565429688, -0.9382171630859375, -0.9092788696289062, -0.880340576171875, -0.8514022827148438, -0.8224639892578125, -0.7935256958007812, -0.76458740234375, -0.7356491088867188, -0.7067108154296875, -0.6777725219726562, -0.648834228515625, -0.6198959350585938, -0.5909576416015625, -0.5620193481445312, -0.5330810546875, -0.5041427612304688, -0.4752044677734375, -0.44626617431640625, -0.417327880859375, -0.38838958740234375, -0.3594512939453125, -0.33051300048828125, -0.30157470703125, -0.27263641357421875, -0.2436981201171875, -0.21475982666015625, -0.185821533203125, -0.15688323974609375, -0.1279449462890625, -0.09900665283203125, -0.070068359375, -0.04113006591796875, -0.0121917724609375, 0.01674652099609375, 0.045684814453125, 0.07462310791015625, 0.1035614013671875, 0.13249969482421875, 0.16143798828125, 0.19037628173828125, 0.2193145751953125, 0.24825286865234375, 0.277191162109375, 0.30612945556640625, 0.3350677490234375, 0.36400604248046875, 0.3929443359375, 0.42188262939453125, 0.4508209228515625, 0.47975921630859375, 0.508697509765625, 0.5376358032226562, 0.5665740966796875, 0.5955123901367188, 0.62445068359375, 0.6533889770507812, 0.6823272705078125, 0.7112655639648438, 0.740203857421875, 0.7691421508789062, 0.7980804443359375, 0.8270187377929688, 0.85595703125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 10.0, 8.0, 14.0, 19.0, 22.0, 24.0, 22.0, 26.0, 45.0, 39.0, 43.0, 57.0, 51.0, 52.0, 51.0, 54.0, 54.0, 43.0, 37.0, 44.0, 35.0, 44.0, 32.0, 33.0, 32.0, 13.0, 10.0, 10.0, 11.0, 9.0, 9.0, 5.0, 4.0, 4.0, 5.0, 2.0, 7.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.3359375, -0.32544708251953125, -0.3149566650390625, -0.30446624755859375, -0.293975830078125, -0.28348541259765625, -0.2729949951171875, -0.26250457763671875, -0.25201416015625, -0.24152374267578125, -0.2310333251953125, -0.22054290771484375, -0.210052490234375, -0.19956207275390625, -0.1890716552734375, -0.17858123779296875, -0.1680908203125, -0.15760040283203125, -0.1471099853515625, -0.13661956787109375, -0.126129150390625, -0.11563873291015625, -0.1051483154296875, -0.09465789794921875, -0.08416748046875, -0.07367706298828125, -0.0631866455078125, -0.05269622802734375, -0.042205810546875, -0.03171539306640625, -0.0212249755859375, -0.01073455810546875, -0.000244140625, 0.01024627685546875, 0.0207366943359375, 0.03122711181640625, 0.041717529296875, 0.05220794677734375, 0.0626983642578125, 0.07318878173828125, 0.08367919921875, 0.09416961669921875, 0.1046600341796875, 0.11515045166015625, 0.125640869140625, 0.13613128662109375, 0.1466217041015625, 0.15711212158203125, 0.1676025390625, 0.17809295654296875, 0.1885833740234375, 0.19907379150390625, 0.209564208984375, 0.22005462646484375, 0.2305450439453125, 0.24103546142578125, 0.25152587890625, 0.26201629638671875, 0.2725067138671875, 0.28299713134765625, 0.293487548828125, 0.30397796630859375, 0.3144683837890625, 0.32495880126953125, 0.33544921875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 6.0, 3.0, 9.0, 18.0, 15.0, 31.0, 26.0, 45.0, 76.0, 91.0, 184.0, 257.0, 459.0, 808.0, 1749.0, 3962.0, 11173.0, 44029.0, 266752.0, 562989.0, 120254.0, 23109.0, 6898.0, 2714.0, 1290.0, 620.0, 376.0, 222.0, 136.0, 83.0, 51.0, 43.0, 22.0, 24.0, 9.0, 10.0, 4.0, 2.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.306640625, -1.2648773193359375, -1.223114013671875, -1.1813507080078125, -1.13958740234375, -1.0978240966796875, -1.056060791015625, -1.0142974853515625, -0.9725341796875, -0.9307708740234375, -0.889007568359375, -0.8472442626953125, -0.80548095703125, -0.7637176513671875, -0.721954345703125, -0.6801910400390625, -0.638427734375, -0.5966644287109375, -0.554901123046875, -0.5131378173828125, -0.47137451171875, -0.4296112060546875, -0.387847900390625, -0.3460845947265625, -0.3043212890625, -0.2625579833984375, -0.220794677734375, -0.1790313720703125, -0.13726806640625, -0.0955047607421875, -0.053741455078125, -0.0119781494140625, 0.02978515625, 0.0715484619140625, 0.113311767578125, 0.1550750732421875, 0.19683837890625, 0.2386016845703125, 0.280364990234375, 0.3221282958984375, 0.3638916015625, 0.4056549072265625, 0.447418212890625, 0.4891815185546875, 0.53094482421875, 0.5727081298828125, 0.614471435546875, 0.6562347412109375, 0.697998046875, 0.7397613525390625, 0.781524658203125, 0.8232879638671875, 0.86505126953125, 0.9068145751953125, 0.948577880859375, 0.9903411865234375, 1.0321044921875, 1.0738677978515625, 1.115631103515625, 1.1573944091796875, 1.19915771484375, 1.2409210205078125, 1.282684326171875, 1.3244476318359375, 1.3662109375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 5.0, 1.0, 6.0, 5.0, 8.0, 5.0, 14.0, 19.0, 20.0, 30.0, 28.0, 34.0, 40.0, 44.0, 40.0, 59.0, 45.0, 63.0, 54.0, 61.0, 65.0, 43.0, 49.0, 49.0, 30.0, 39.0, 32.0, 27.0, 19.0, 15.0, 16.0, 6.0, 10.0, 11.0, 2.0, 4.0, 5.0, 2.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.4951171875, -1.441497802734375, -1.38787841796875, -1.334259033203125, -1.2806396484375, -1.227020263671875, -1.17340087890625, -1.119781494140625, -1.066162109375, -1.012542724609375, -0.95892333984375, -0.905303955078125, -0.8516845703125, -0.798065185546875, -0.74444580078125, -0.690826416015625, -0.63720703125, -0.583587646484375, -0.52996826171875, -0.476348876953125, -0.4227294921875, -0.369110107421875, -0.31549072265625, -0.261871337890625, -0.208251953125, -0.154632568359375, -0.10101318359375, -0.047393798828125, 0.0062255859375, 0.059844970703125, 0.11346435546875, 0.167083740234375, 0.220703125, 0.274322509765625, 0.32794189453125, 0.381561279296875, 0.4351806640625, 0.488800048828125, 0.54241943359375, 0.596038818359375, 0.649658203125, 0.703277587890625, 0.75689697265625, 0.810516357421875, 0.8641357421875, 0.917755126953125, 0.97137451171875, 1.024993896484375, 1.07861328125, 1.132232666015625, 1.18585205078125, 1.239471435546875, 1.2930908203125, 1.346710205078125, 1.40032958984375, 1.453948974609375, 1.507568359375, 1.561187744140625, 1.61480712890625, 1.668426513671875, 1.7220458984375, 1.775665283203125, 1.82928466796875, 1.882904052734375, 1.9365234375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 6.0, 10.0, 6.0, 39.0, 71.0, 146.0, 342.0, 944.0, 3860.0, 72160.0, 948272.0, 19481.0, 2055.0, 649.0, 264.0, 134.0, 52.0, 34.0, 11.0, 8.0, 9.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.373046875, -2.29730224609375, -2.2215576171875, -2.14581298828125, -2.070068359375, -1.99432373046875, -1.9185791015625, -1.84283447265625, -1.76708984375, -1.69134521484375, -1.6156005859375, -1.53985595703125, -1.464111328125, -1.38836669921875, -1.3126220703125, -1.23687744140625, -1.1611328125, -1.08538818359375, -1.0096435546875, -0.93389892578125, -0.858154296875, -0.78240966796875, -0.7066650390625, -0.63092041015625, -0.55517578125, -0.47943115234375, -0.4036865234375, -0.32794189453125, -0.252197265625, -0.17645263671875, -0.1007080078125, -0.02496337890625, 0.05078125, 0.12652587890625, 0.2022705078125, 0.27801513671875, 0.353759765625, 0.42950439453125, 0.5052490234375, 0.58099365234375, 0.65673828125, 0.73248291015625, 0.8082275390625, 0.88397216796875, 0.959716796875, 1.03546142578125, 1.1112060546875, 1.18695068359375, 1.2626953125, 1.33843994140625, 1.4141845703125, 1.48992919921875, 1.565673828125, 1.64141845703125, 1.7171630859375, 1.79290771484375, 1.86865234375, 1.94439697265625, 2.0201416015625, 2.09588623046875, 2.171630859375, 2.24737548828125, 2.3231201171875, 2.39886474609375, 2.474609375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 13.0, 18.0, 38.0, 76.0, 160.0, 193.0, 211.0, 149.0, 78.0, 42.0, 18.0, 7.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003209114074707031, -0.0003134477883577347, -0.00030598416924476624, -0.0002985205501317978, -0.00029105693101882935, -0.0002835933119058609, -0.00027612969279289246, -0.000268666073679924, -0.00026120245456695557, -0.0002537388354539871, -0.0002462752163410187, -0.00023881159722805023, -0.0002313479781150818, -0.00022388435900211334, -0.0002164207398891449, -0.00020895712077617645, -0.000201493501663208, -0.00019402988255023956, -0.00018656626343727112, -0.00017910264432430267, -0.00017163902521133423, -0.00016417540609836578, -0.00015671178698539734, -0.0001492481678724289, -0.00014178454875946045, -0.000134320929646492, -0.00012685731053352356, -0.00011939369142055511, -0.00011193007230758667, -0.00010446645319461823, -9.700283408164978e-05, -8.953921496868134e-05, -8.207559585571289e-05, -7.461197674274445e-05, -6.7148357629776e-05, -5.9684738516807556e-05, -5.222111940383911e-05, -4.4757500290870667e-05, -3.729388117790222e-05, -2.9830262064933777e-05, -2.2366642951965332e-05, -1.4903023838996887e-05, -7.439404726028442e-06, 2.421438694000244e-08, 7.487833499908447e-06, 1.4951452612876892e-05, 2.2415071725845337e-05, 2.9878690838813782e-05, 3.7342309951782227e-05, 4.480592906475067e-05, 5.2269548177719116e-05, 5.973316729068756e-05, 6.7196786403656e-05, 7.466040551662445e-05, 8.21240246295929e-05, 8.958764374256134e-05, 9.705126285552979e-05, 0.00010451488196849823, 0.00011197850108146667, 0.00011944212019443512, 0.00012690573930740356, 0.000134369358420372, 0.00014183297753334045, 0.0001492965966463089, 0.00015676021575927734]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 4.0, 3.0, 5.0, 9.0, 17.0, 27.0, 40.0, 73.0, 159.0, 341.0, 925.0, 4243.0, 55987.0, 947725.0, 34236.0, 3315.0, 806.0, 319.0, 135.0, 88.0, 43.0, 14.0, 21.0, 12.0, 6.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.125, -2.065032958984375, -2.00506591796875, -1.945098876953125, -1.8851318359375, -1.825164794921875, -1.76519775390625, -1.705230712890625, -1.645263671875, -1.585296630859375, -1.52532958984375, -1.465362548828125, -1.4053955078125, -1.345428466796875, -1.28546142578125, -1.225494384765625, -1.16552734375, -1.105560302734375, -1.04559326171875, -0.985626220703125, -0.9256591796875, -0.865692138671875, -0.80572509765625, -0.745758056640625, -0.685791015625, -0.625823974609375, -0.56585693359375, -0.505889892578125, -0.4459228515625, -0.385955810546875, -0.32598876953125, -0.266021728515625, -0.2060546875, -0.146087646484375, -0.08612060546875, -0.026153564453125, 0.0338134765625, 0.093780517578125, 0.15374755859375, 0.213714599609375, 0.273681640625, 0.333648681640625, 0.39361572265625, 0.453582763671875, 0.5135498046875, 0.573516845703125, 0.63348388671875, 0.693450927734375, 0.75341796875, 0.813385009765625, 0.87335205078125, 0.933319091796875, 0.9932861328125, 1.053253173828125, 1.11322021484375, 1.173187255859375, 1.233154296875, 1.293121337890625, 1.35308837890625, 1.413055419921875, 1.4730224609375, 1.532989501953125, 1.59295654296875, 1.652923583984375, 1.712890625]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 5.0, 2.0, 2.0, 5.0, 11.0, 12.0, 16.0, 24.0, 42.0, 74.0, 109.0, 124.0, 163.0, 159.0, 108.0, 57.0, 37.0, 21.0, 7.0, 7.0, 11.0, 3.0, 1.0, 3.0, 3.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.86328125, -1.8203811645507812, -1.7774810791015625, -1.7345809936523438, -1.691680908203125, -1.6487808227539062, -1.6058807373046875, -1.5629806518554688, -1.52008056640625, -1.4771804809570312, -1.4342803955078125, -1.3913803100585938, -1.348480224609375, -1.3055801391601562, -1.2626800537109375, -1.2197799682617188, -1.1768798828125, -1.1339797973632812, -1.0910797119140625, -1.0481796264648438, -1.005279541015625, -0.9623794555664062, -0.9194793701171875, -0.8765792846679688, -0.83367919921875, -0.7907791137695312, -0.7478790283203125, -0.7049789428710938, -0.662078857421875, -0.6191787719726562, -0.5762786865234375, -0.5333786010742188, -0.490478515625, -0.44757843017578125, -0.4046783447265625, -0.36177825927734375, -0.318878173828125, -0.27597808837890625, -0.2330780029296875, -0.19017791748046875, -0.14727783203125, -0.10437774658203125, -0.0614776611328125, -0.01857757568359375, 0.024322509765625, 0.06722259521484375, 0.1101226806640625, 0.15302276611328125, 0.1959228515625, 0.23882293701171875, 0.2817230224609375, 0.32462310791015625, 0.367523193359375, 0.41042327880859375, 0.4533233642578125, 0.49622344970703125, 0.53912353515625, 0.5820236206054688, 0.6249237060546875, 0.6678237915039062, 0.710723876953125, 0.7536239624023438, 0.7965240478515625, 0.8394241333007812, 0.88232421875]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 8.0, 4.0, 6.0, 9.0, 9.0, 18.0, 21.0, 37.0, 46.0, 66.0, 89.0, 105.0, 116.0, 130.0, 88.0, 87.0, 53.0, 41.0, 25.0, 18.0, 14.0, 7.0, 3.0, 6.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.19670581817627, -12.809350967407227, -12.421996116638184, -12.03464126586914, -11.647287368774414, -11.259932518005371, -10.872577667236328, -10.485222816467285, -10.097867965698242, -9.7105131149292, -9.323158264160156, -8.935803413391113, -8.54844856262207, -8.161094665527344, -7.773739814758301, -7.386384963989258, -6.999030113220215, -6.611675262451172, -6.224320411682129, -5.836966037750244, -5.449611186981201, -5.062256336212158, -4.674901962280273, -4.2875471115112305, -3.9001922607421875, -3.5128374099731445, -3.1254827976226807, -2.738128185272217, -2.350773334503174, -1.9634186029434204, -1.576063871383667, -1.1887092590332031, -0.8013553619384766, -0.41400063037872314, -0.026645898818969727, 0.3607088327407837, 0.7480635643005371, 1.1354182958602905, 1.522773027420044, 1.9101276397705078, 2.297482490539551, 2.6848373413085938, 3.0721919536590576, 3.4595465660095215, 3.8469014167785645, 4.234256267547607, 4.621610641479492, 5.008965492248535, 5.396320343017578, 5.783675193786621, 6.171030044555664, 6.558384418487549, 6.945739269256592, 7.333094120025635, 7.7204484939575195, 8.107803344726562, 8.495158195495605, 8.882513046264648, 9.269867897033691, 9.657222747802734, 10.044576644897461, 10.431931495666504, 10.819286346435547, 11.20664119720459, 11.593996047973633]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 1.0, 5.0, 1.0, 0.0, 3.0, 2.0, 5.0, 2.0, 7.0, 6.0, 6.0, 10.0, 6.0, 15.0, 12.0, 19.0, 16.0, 24.0, 35.0, 18.0, 34.0, 27.0, 42.0, 29.0, 49.0, 47.0, 51.0, 42.0, 43.0, 47.0, 43.0, 43.0, 41.0, 39.0, 29.0, 28.0, 33.0, 27.0, 18.0, 22.0, 24.0, 14.0, 7.0, 8.0, 5.0, 7.0, 3.0, 2.0, 3.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-7.183813095092773, -6.948230743408203, -6.712648391723633, -6.4770660400390625, -6.241483688354492, -6.005901336669922, -5.770318984985352, -5.534736633300781, -5.299154281616211, -5.063571929931641, -4.82798957824707, -4.5924072265625, -4.35682487487793, -4.121242523193359, -3.88565993309021, -3.6500775814056396, -3.4144949913024902, -3.17891263961792, -2.9433302879333496, -2.7077479362487793, -2.472165584564209, -2.2365832328796387, -2.0010006427764893, -1.765418291091919, -1.5298359394073486, -1.2942535877227783, -1.058671236038208, -0.8230887651443481, -0.5875064134597778, -0.3519240617752075, -0.11634159088134766, 0.11924076080322266, 0.35482358932495117, 0.5904059410095215, 0.8259883522987366, 1.0615707635879517, 1.297153115272522, 1.5327354669570923, 1.7683179378509521, 2.0039002895355225, 2.2394826412200928, 2.475064992904663, 2.7106473445892334, 2.946229934692383, 3.181812286376953, 3.4173946380615234, 3.6529769897460938, 3.888559341430664, 4.124141693115234, 4.359724044799805, 4.595306396484375, 4.830888748168945, 5.066471099853516, 5.302053451538086, 5.537635803222656, 5.773218154907227, 6.008800506591797, 6.244382858276367, 6.4799652099609375, 6.715547561645508, 6.951129913330078, 7.186712265014648, 7.422294616699219, 7.657876968383789, 7.893459796905518]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 9.0, 7.0, 10.0, 10.0, 20.0, 41.0, 85.0, 141.0, 249.0, 628.0, 2472.0, 25277.0, 4145818.0, 16380.0, 2017.0, 570.0, 225.0, 128.0, 79.0, 47.0, 20.0, 11.0, 18.0, 6.0, 5.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.24609375, -4.1065673828125, -3.967041015625, -3.8275146484375, -3.68798828125, -3.5484619140625, -3.408935546875, -3.2694091796875, -3.1298828125, -2.9903564453125, -2.850830078125, -2.7113037109375, -2.57177734375, -2.4322509765625, -2.292724609375, -2.1531982421875, -2.013671875, -1.8741455078125, -1.734619140625, -1.5950927734375, -1.45556640625, -1.3160400390625, -1.176513671875, -1.0369873046875, -0.8974609375, -0.7579345703125, -0.618408203125, -0.4788818359375, -0.33935546875, -0.1998291015625, -0.060302734375, 0.0792236328125, 0.21875, 0.3582763671875, 0.497802734375, 0.6373291015625, 0.77685546875, 0.9163818359375, 1.055908203125, 1.1954345703125, 1.3349609375, 1.4744873046875, 1.614013671875, 1.7535400390625, 1.89306640625, 2.0325927734375, 2.172119140625, 2.3116455078125, 2.451171875, 2.5906982421875, 2.730224609375, 2.8697509765625, 3.00927734375, 3.1488037109375, 3.288330078125, 3.4278564453125, 3.5673828125, 3.7069091796875, 3.846435546875, 3.9859619140625, 4.12548828125, 4.2650146484375, 4.404541015625, 4.5440673828125, 4.68359375]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 2.0, 5.0, 8.0, 7.0, 14.0, 18.0, 17.0, 14.0, 24.0, 35.0, 38.0, 39.0, 35.0, 40.0, 40.0, 44.0, 55.0, 51.0, 49.0, 57.0, 57.0, 42.0, 45.0, 33.0, 32.0, 31.0, 29.0, 25.0, 18.0, 22.0, 15.0, 12.0, 5.0, 10.0, 7.0, 7.0, 5.0, 2.0, 5.0, 4.0, 3.0, 1.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.343994140625, -0.3327140808105469, -0.32143402099609375, -0.3101539611816406, -0.2988739013671875, -0.2875938415527344, -0.27631378173828125, -0.2650337219238281, -0.253753662109375, -0.24247360229492188, -0.23119354248046875, -0.21991348266601562, -0.2086334228515625, -0.19735336303710938, -0.18607330322265625, -0.17479324340820312, -0.16351318359375, -0.15223312377929688, -0.14095306396484375, -0.12967300415039062, -0.1183929443359375, -0.10711288452148438, -0.09583282470703125, -0.08455276489257812, -0.073272705078125, -0.061992645263671875, -0.05071258544921875, -0.039432525634765625, -0.0281524658203125, -0.016872406005859375, -0.00559234619140625, 0.005687713623046875, 0.0169677734375, 0.028247833251953125, 0.03952789306640625, 0.050807952880859375, 0.0620880126953125, 0.07336807250976562, 0.08464813232421875, 0.09592819213867188, 0.107208251953125, 0.11848831176757812, 0.12976837158203125, 0.14104843139648438, 0.1523284912109375, 0.16360855102539062, 0.17488861083984375, 0.18616867065429688, 0.19744873046875, 0.20872879028320312, 0.22000885009765625, 0.23128890991210938, 0.2425689697265625, 0.2538490295410156, 0.26512908935546875, 0.2764091491699219, 0.287689208984375, 0.2989692687988281, 0.31024932861328125, 0.3215293884277344, 0.3328094482421875, 0.3440895080566406, 0.35536956787109375, 0.3666496276855469, 0.3779296875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 7.0, 7.0, 9.0, 12.0, 13.0, 32.0, 34.0, 47.0, 52.0, 132.0, 153.0, 245.0, 411.0, 735.0, 1429.0, 3799.0, 15956.0, 307409.0, 3835657.0, 20162.0, 4373.0, 1617.0, 785.0, 429.0, 241.0, 177.0, 118.0, 98.0, 51.0, 33.0, 20.0, 8.0, 16.0, 8.0, 2.0, 5.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.439453125, -2.358306884765625, -2.27716064453125, -2.196014404296875, -2.1148681640625, -2.033721923828125, -1.95257568359375, -1.871429443359375, -1.790283203125, -1.709136962890625, -1.62799072265625, -1.546844482421875, -1.4656982421875, -1.384552001953125, -1.30340576171875, -1.222259521484375, -1.14111328125, -1.059967041015625, -0.97882080078125, -0.897674560546875, -0.8165283203125, -0.735382080078125, -0.65423583984375, -0.573089599609375, -0.491943359375, -0.410797119140625, -0.32965087890625, -0.248504638671875, -0.1673583984375, -0.086212158203125, -0.00506591796875, 0.076080322265625, 0.1572265625, 0.238372802734375, 0.31951904296875, 0.400665283203125, 0.4818115234375, 0.562957763671875, 0.64410400390625, 0.725250244140625, 0.806396484375, 0.887542724609375, 0.96868896484375, 1.049835205078125, 1.1309814453125, 1.212127685546875, 1.29327392578125, 1.374420166015625, 1.45556640625, 1.536712646484375, 1.61785888671875, 1.699005126953125, 1.7801513671875, 1.861297607421875, 1.94244384765625, 2.023590087890625, 2.104736328125, 2.185882568359375, 2.26702880859375, 2.348175048828125, 2.4293212890625, 2.510467529296875, 2.59161376953125, 2.672760009765625, 2.75390625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 5.0, 15.0, 18.0, 41.0, 94.0, 2972.0, 764.0, 76.0, 31.0, 20.0, 9.0, 11.0, 8.0, 8.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.80810546875, -0.78839111328125, -0.7686767578125, -0.74896240234375, -0.729248046875, -0.70953369140625, -0.6898193359375, -0.67010498046875, -0.650390625, -0.63067626953125, -0.6109619140625, -0.59124755859375, -0.571533203125, -0.55181884765625, -0.5321044921875, -0.51239013671875, -0.49267578125, -0.47296142578125, -0.4532470703125, -0.43353271484375, -0.413818359375, -0.39410400390625, -0.3743896484375, -0.35467529296875, -0.3349609375, -0.31524658203125, -0.2955322265625, -0.27581787109375, -0.256103515625, -0.23638916015625, -0.2166748046875, -0.19696044921875, -0.17724609375, -0.15753173828125, -0.1378173828125, -0.11810302734375, -0.098388671875, -0.07867431640625, -0.0589599609375, -0.03924560546875, -0.01953125, 0.00018310546875, 0.0198974609375, 0.03961181640625, 0.059326171875, 0.07904052734375, 0.0987548828125, 0.11846923828125, 0.13818359375, 0.15789794921875, 0.1776123046875, 0.19732666015625, 0.217041015625, 0.23675537109375, 0.2564697265625, 0.27618408203125, 0.2958984375, 0.31561279296875, 0.3353271484375, 0.35504150390625, 0.374755859375, 0.39447021484375, 0.4141845703125, 0.43389892578125, 0.45361328125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 4.0, 7.0, 18.0, 19.0, 30.0, 48.0, 82.0, 106.0, 115.0, 124.0, 122.0, 89.0, 75.0, 59.0, 41.0, 24.0, 13.0, 12.0, 7.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0], "bins": [-2.144212484359741, -2.0932729244232178, -2.0423333644866943, -1.9913936853408813, -1.940454125404358, -1.8895145654678345, -1.838575005531311, -1.787635326385498, -1.7366957664489746, -1.6857562065124512, -1.6348166465759277, -1.5838769674301147, -1.5329374074935913, -1.4819978475570679, -1.4310582876205444, -1.3801186084747314, -1.329179048538208, -1.2782394886016846, -1.2272999286651611, -1.1763602495193481, -1.1254206895828247, -1.0744811296463013, -1.0235415697097778, -0.9726019501686096, -0.921662449836731, -0.8707228899002075, -0.8197832703590393, -0.7688437104225159, -0.7179040908813477, -0.6669645309448242, -0.6160249710083008, -0.5650853514671326, -0.5141457319259644, -0.46320614218711853, -0.4122665524482727, -0.36132699251174927, -0.31038737297058105, -0.2594478130340576, -0.2085082232952118, -0.15756863355636597, -0.10662904381752014, -0.055689457803964615, -0.004749871790409088, 0.04618971049785614, 0.09712930023670197, 0.1480688750743866, 0.19900846481323242, 0.24994805455207825, 0.3008876442909241, 0.3518272340297699, 0.4027668237686157, 0.45370638370513916, 0.5046460032463074, 0.5555855631828308, 0.606525182723999, 0.6574647426605225, 0.7084043025970459, 0.7593438625335693, 0.8102834820747375, 0.861223042011261, 0.9121626615524292, 0.9631022214889526, 1.014041781425476, 1.064981460571289, 1.1159210205078125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 4.0, 5.0, 5.0, 0.0, 4.0, 3.0, 3.0, 3.0, 7.0, 5.0, 7.0, 13.0, 20.0, 24.0, 23.0, 25.0, 22.0, 29.0, 33.0, 31.0, 39.0, 38.0, 46.0, 44.0, 44.0, 45.0, 54.0, 38.0, 44.0, 36.0, 36.0, 34.0, 29.0, 32.0, 29.0, 17.0, 11.0, 19.0, 20.0, 13.0, 26.0, 11.0, 9.0, 6.0, 7.0, 3.0, 6.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 3.0], "bins": [-0.8593247532844543, -0.8347761034965515, -0.8102275133132935, -0.7856788635253906, -0.7611302137374878, -0.7365816235542297, -0.7120329737663269, -0.6874843835830688, -0.662935733795166, -0.6383870840072632, -0.6138384938240051, -0.5892898440361023, -0.5647412538528442, -0.5401926040649414, -0.5156439542770386, -0.49109533429145813, -0.4665467143058777, -0.44199809432029724, -0.4174494743347168, -0.39290082454681396, -0.3683522045612335, -0.3438035845756531, -0.31925493478775024, -0.2947063148021698, -0.27015769481658936, -0.2456090748310089, -0.22106043994426727, -0.19651180505752563, -0.1719631850719452, -0.14741456508636475, -0.12286593019962311, -0.09831729531288147, -0.07376861572265625, -0.04921998828649521, -0.024671360850334167, -0.00012273341417312622, 0.024425894021987915, 0.048974521458148956, 0.07352314889431, 0.09807178378105164, 0.12262040376663208, 0.14716902375221252, 0.17171765863895416, 0.1962662935256958, 0.22081491351127625, 0.2453635334968567, 0.2699121832847595, 0.29446080327033997, 0.3190094232559204, 0.34355804324150085, 0.3681066632270813, 0.39265531301498413, 0.4172039330005646, 0.441752552986145, 0.46630120277404785, 0.4908498227596283, 0.5153984427452087, 0.5399470925331116, 0.5644956827163696, 0.5890443325042725, 0.6135929822921753, 0.6381415724754333, 0.6626902222633362, 0.6872388124465942, 0.7117874622344971]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 0.0, 4.0, 5.0, 6.0, 12.0, 9.0, 10.0, 20.0, 24.0, 25.0, 46.0, 49.0, 73.0, 97.0, 144.0, 250.0, 354.0, 561.0, 1043.0, 1829.0, 3875.0, 9421.0, 27491.0, 93302.0, 294836.0, 389129.0, 155724.0, 44975.0, 14076.0, 5415.0, 2492.0, 1260.0, 673.0, 418.0, 270.0, 170.0, 145.0, 93.0, 69.0, 37.0, 33.0, 24.0, 22.0, 15.0, 13.0, 8.0, 4.0, 5.0, 7.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.88720703125, -0.8557510375976562, -0.8242950439453125, -0.7928390502929688, -0.761383056640625, -0.7299270629882812, -0.6984710693359375, -0.6670150756835938, -0.63555908203125, -0.6041030883789062, -0.5726470947265625, -0.5411911010742188, -0.509735107421875, -0.47827911376953125, -0.4468231201171875, -0.41536712646484375, -0.3839111328125, -0.35245513916015625, -0.3209991455078125, -0.28954315185546875, -0.258087158203125, -0.22663116455078125, -0.1951751708984375, -0.16371917724609375, -0.13226318359375, -0.10080718994140625, -0.0693511962890625, -0.03789520263671875, -0.006439208984375, 0.02501678466796875, 0.0564727783203125, 0.08792877197265625, 0.119384765625, 0.15084075927734375, 0.1822967529296875, 0.21375274658203125, 0.245208740234375, 0.27666473388671875, 0.3081207275390625, 0.33957672119140625, 0.37103271484375, 0.40248870849609375, 0.4339447021484375, 0.46540069580078125, 0.496856689453125, 0.5283126831054688, 0.5597686767578125, 0.5912246704101562, 0.6226806640625, 0.6541366577148438, 0.6855926513671875, 0.7170486450195312, 0.748504638671875, 0.7799606323242188, 0.8114166259765625, 0.8428726196289062, 0.87432861328125, 0.9057846069335938, 0.9372406005859375, 0.9686965942382812, 1.000152587890625, 1.0316085815429688, 1.0630645751953125, 1.0945205688476562, 1.1259765625]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 7.0, 6.0, 7.0, 10.0, 9.0, 11.0, 20.0, 22.0, 25.0, 25.0, 29.0, 37.0, 30.0, 33.0, 43.0, 39.0, 51.0, 40.0, 62.0, 44.0, 51.0, 41.0, 50.0, 47.0, 31.0, 29.0, 25.0, 35.0, 25.0, 15.0, 15.0, 16.0, 12.0, 7.0, 8.0, 10.0, 5.0, 6.0, 6.0, 8.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3427734375, -0.3313484191894531, -0.31992340087890625, -0.3084983825683594, -0.2970733642578125, -0.2856483459472656, -0.27422332763671875, -0.2627983093261719, -0.251373291015625, -0.23994827270507812, -0.22852325439453125, -0.21709823608398438, -0.2056732177734375, -0.19424819946289062, -0.18282318115234375, -0.17139816284179688, -0.15997314453125, -0.14854812622070312, -0.13712310791015625, -0.12569808959960938, -0.1142730712890625, -0.10284805297851562, -0.09142303466796875, -0.07999801635742188, -0.068572998046875, -0.057147979736328125, -0.04572296142578125, -0.034297943115234375, -0.0228729248046875, -0.011447906494140625, -2.288818359375e-05, 0.011402130126953125, 0.0228271484375, 0.034252166748046875, 0.04567718505859375, 0.057102203369140625, 0.0685272216796875, 0.07995223999023438, 0.09137725830078125, 0.10280227661132812, 0.114227294921875, 0.12565231323242188, 0.13707733154296875, 0.14850234985351562, 0.1599273681640625, 0.17135238647460938, 0.18277740478515625, 0.19420242309570312, 0.20562744140625, 0.21705245971679688, 0.22847747802734375, 0.23990249633789062, 0.2513275146484375, 0.2627525329589844, 0.27417755126953125, 0.2856025695800781, 0.297027587890625, 0.3084526062011719, 0.31987762451171875, 0.3313026428222656, 0.3427276611328125, 0.3541526794433594, 0.36557769775390625, 0.3770027160644531, 0.388427734375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 7.0, 6.0, 5.0, 17.0, 18.0, 21.0, 33.0, 42.0, 64.0, 93.0, 135.0, 182.0, 299.0, 468.0, 778.0, 1337.0, 2629.0, 5981.0, 18485.0, 82802.0, 400384.0, 416557.0, 86633.0, 19166.0, 6062.0, 2755.0, 1383.0, 738.0, 479.0, 329.0, 209.0, 128.0, 110.0, 65.0, 37.0, 32.0, 23.0, 20.0, 13.0, 10.0, 8.0, 4.0, 6.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.04296875, -1.0057373046875, -0.968505859375, -0.9312744140625, -0.89404296875, -0.8568115234375, -0.819580078125, -0.7823486328125, -0.7451171875, -0.7078857421875, -0.670654296875, -0.6334228515625, -0.59619140625, -0.5589599609375, -0.521728515625, -0.4844970703125, -0.447265625, -0.4100341796875, -0.372802734375, -0.3355712890625, -0.29833984375, -0.2611083984375, -0.223876953125, -0.1866455078125, -0.1494140625, -0.1121826171875, -0.074951171875, -0.0377197265625, -0.00048828125, 0.0367431640625, 0.073974609375, 0.1112060546875, 0.1484375, 0.1856689453125, 0.222900390625, 0.2601318359375, 0.29736328125, 0.3345947265625, 0.371826171875, 0.4090576171875, 0.4462890625, 0.4835205078125, 0.520751953125, 0.5579833984375, 0.59521484375, 0.6324462890625, 0.669677734375, 0.7069091796875, 0.744140625, 0.7813720703125, 0.818603515625, 0.8558349609375, 0.89306640625, 0.9302978515625, 0.967529296875, 1.0047607421875, 1.0419921875, 1.0792236328125, 1.116455078125, 1.1536865234375, 1.19091796875, 1.2281494140625, 1.265380859375, 1.3026123046875, 1.33984375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 0.0, 3.0, 2.0, 5.0, 9.0, 6.0, 15.0, 14.0, 14.0, 19.0, 25.0, 17.0, 31.0, 20.0, 27.0, 36.0, 39.0, 35.0, 37.0, 59.0, 43.0, 40.0, 54.0, 40.0, 44.0, 42.0, 35.0, 40.0, 25.0, 35.0, 30.0, 28.0, 20.0, 17.0, 22.0, 14.0, 15.0, 9.0, 6.0, 6.0, 10.0, 4.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.646484375, -1.5935821533203125, -1.540679931640625, -1.4877777099609375, -1.43487548828125, -1.3819732666015625, -1.329071044921875, -1.2761688232421875, -1.2232666015625, -1.1703643798828125, -1.117462158203125, -1.0645599365234375, -1.01165771484375, -0.9587554931640625, -0.905853271484375, -0.8529510498046875, -0.800048828125, -0.7471466064453125, -0.694244384765625, -0.6413421630859375, -0.58843994140625, -0.5355377197265625, -0.482635498046875, -0.4297332763671875, -0.3768310546875, -0.3239288330078125, -0.271026611328125, -0.2181243896484375, -0.16522216796875, -0.1123199462890625, -0.059417724609375, -0.0065155029296875, 0.04638671875, 0.0992889404296875, 0.152191162109375, 0.2050933837890625, 0.25799560546875, 0.3108978271484375, 0.363800048828125, 0.4167022705078125, 0.4696044921875, 0.5225067138671875, 0.575408935546875, 0.6283111572265625, 0.68121337890625, 0.7341156005859375, 0.787017822265625, 0.8399200439453125, 0.892822265625, 0.9457244873046875, 0.998626708984375, 1.0515289306640625, 1.10443115234375, 1.1573333740234375, 1.210235595703125, 1.2631378173828125, 1.3160400390625, 1.3689422607421875, 1.421844482421875, 1.4747467041015625, 1.52764892578125, 1.5805511474609375, 1.633453369140625, 1.6863555908203125, 1.7392578125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 4.0, 9.0, 12.0, 17.0, 24.0, 39.0, 98.0, 159.0, 351.0, 1131.0, 6785.0, 515700.0, 515519.0, 6939.0, 1116.0, 344.0, 146.0, 73.0, 33.0, 21.0, 17.0, 6.0, 3.0, 4.0, 3.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.58203125, -1.51611328125, -1.4501953125, -1.38427734375, -1.318359375, -1.25244140625, -1.1865234375, -1.12060546875, -1.0546875, -0.98876953125, -0.9228515625, -0.85693359375, -0.791015625, -0.72509765625, -0.6591796875, -0.59326171875, -0.52734375, -0.46142578125, -0.3955078125, -0.32958984375, -0.263671875, -0.19775390625, -0.1318359375, -0.06591796875, 0.0, 0.06591796875, 0.1318359375, 0.19775390625, 0.263671875, 0.32958984375, 0.3955078125, 0.46142578125, 0.52734375, 0.59326171875, 0.6591796875, 0.72509765625, 0.791015625, 0.85693359375, 0.9228515625, 0.98876953125, 1.0546875, 1.12060546875, 1.1865234375, 1.25244140625, 1.318359375, 1.38427734375, 1.4501953125, 1.51611328125, 1.58203125, 1.64794921875, 1.7138671875, 1.77978515625, 1.845703125, 1.91162109375, 1.9775390625, 2.04345703125, 2.109375, 2.17529296875, 2.2412109375, 2.30712890625, 2.373046875, 2.43896484375, 2.5048828125, 2.57080078125, 2.63671875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 10.0, 8.0, 7.0, 8.0, 17.0, 14.0, 17.0, 34.0, 25.0, 47.0, 54.0, 62.0, 66.0, 72.0, 94.0, 71.0, 65.0, 70.0, 44.0, 51.0, 38.0, 31.0, 15.0, 17.0, 18.0, 11.0, 9.0, 7.0, 4.0, 7.0, 3.0, 6.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.302927017211914e-05, -8.033681660890579e-05, -7.764436304569244e-05, -7.49519094824791e-05, -7.225945591926575e-05, -6.95670023560524e-05, -6.687454879283905e-05, -6.41820952296257e-05, -6.148964166641235e-05, -5.8797188103199005e-05, -5.610473453998566e-05, -5.341228097677231e-05, -5.071982741355896e-05, -4.802737385034561e-05, -4.533492028713226e-05, -4.2642466723918915e-05, -3.9950013160705566e-05, -3.725755959749222e-05, -3.456510603427887e-05, -3.187265247106552e-05, -2.9180198907852173e-05, -2.6487745344638824e-05, -2.3795291781425476e-05, -2.1102838218212128e-05, -1.841038465499878e-05, -1.571793109178543e-05, -1.3025477528572083e-05, -1.0333023965358734e-05, -7.640570402145386e-06, -4.948116838932037e-06, -2.255663275718689e-06, 4.367902874946594e-07, 3.129243850708008e-06, 5.821697413921356e-06, 8.514150977134705e-06, 1.1206604540348053e-05, 1.3899058103561401e-05, 1.659151166677475e-05, 1.9283965229988098e-05, 2.1976418793201447e-05, 2.4668872356414795e-05, 2.7361325919628143e-05, 3.0053779482841492e-05, 3.274623304605484e-05, 3.543868660926819e-05, 3.813114017248154e-05, 4.0823593735694885e-05, 4.3516047298908234e-05, 4.620850086212158e-05, 4.890095442533493e-05, 5.159340798854828e-05, 5.428586155176163e-05, 5.6978315114974976e-05, 5.9670768678188324e-05, 6.236322224140167e-05, 6.505567580461502e-05, 6.774812936782837e-05, 7.044058293104172e-05, 7.313303649425507e-05, 7.582549005746841e-05, 7.851794362068176e-05, 8.121039718389511e-05, 8.390285074710846e-05, 8.659530431032181e-05, 8.928775787353516e-05]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 11.0, 25.0, 34.0, 66.0, 154.0, 350.0, 1091.0, 7040.0, 341367.0, 685280.0, 10977.0, 1403.0, 423.0, 144.0, 73.0, 38.0, 29.0, 18.0, 8.0, 3.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.328125, -1.272979736328125, -1.21783447265625, -1.162689208984375, -1.1075439453125, -1.052398681640625, -0.99725341796875, -0.942108154296875, -0.886962890625, -0.831817626953125, -0.77667236328125, -0.721527099609375, -0.6663818359375, -0.611236572265625, -0.55609130859375, -0.500946044921875, -0.44580078125, -0.390655517578125, -0.33551025390625, -0.280364990234375, -0.2252197265625, -0.170074462890625, -0.11492919921875, -0.059783935546875, -0.004638671875, 0.050506591796875, 0.10565185546875, 0.160797119140625, 0.2159423828125, 0.271087646484375, 0.32623291015625, 0.381378173828125, 0.4365234375, 0.491668701171875, 0.54681396484375, 0.601959228515625, 0.6571044921875, 0.712249755859375, 0.76739501953125, 0.822540283203125, 0.877685546875, 0.932830810546875, 0.98797607421875, 1.043121337890625, 1.0982666015625, 1.153411865234375, 1.20855712890625, 1.263702392578125, 1.31884765625, 1.373992919921875, 1.42913818359375, 1.484283447265625, 1.5394287109375, 1.594573974609375, 1.64971923828125, 1.704864501953125, 1.760009765625, 1.815155029296875, 1.87030029296875, 1.925445556640625, 1.9805908203125, 2.035736083984375, 2.09088134765625, 2.146026611328125, 2.201171875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 5.0, 4.0, 12.0, 15.0, 28.0, 30.0, 50.0, 94.0, 116.0, 152.0, 149.0, 127.0, 88.0, 43.0, 33.0, 28.0, 15.0, 7.0, 9.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.75146484375, -0.7096939086914062, -0.6679229736328125, -0.6261520385742188, -0.584381103515625, -0.5426101684570312, -0.5008392333984375, -0.45906829833984375, -0.41729736328125, -0.37552642822265625, -0.3337554931640625, -0.29198455810546875, -0.250213623046875, -0.20844268798828125, -0.1666717529296875, -0.12490081787109375, -0.0831298828125, -0.04135894775390625, 0.0004119873046875, 0.04218292236328125, 0.083953857421875, 0.12572479248046875, 0.1674957275390625, 0.20926666259765625, 0.25103759765625, 0.29280853271484375, 0.3345794677734375, 0.37635040283203125, 0.418121337890625, 0.45989227294921875, 0.5016632080078125, 0.5434341430664062, 0.585205078125, 0.6269760131835938, 0.6687469482421875, 0.7105178833007812, 0.752288818359375, 0.7940597534179688, 0.8358306884765625, 0.8776016235351562, 0.91937255859375, 0.9611434936523438, 1.0029144287109375, 1.0446853637695312, 1.086456298828125, 1.1282272338867188, 1.1699981689453125, 1.2117691040039062, 1.2535400390625, 1.2953109741210938, 1.3370819091796875, 1.3788528442382812, 1.420623779296875, 1.4623947143554688, 1.5041656494140625, 1.5459365844726562, 1.58770751953125, 1.6294784545898438, 1.6712493896484375, 1.7130203247070312, 1.754791259765625, 1.7965621948242188, 1.8383331298828125, 1.8801040649414062, 1.921875]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 8.0, 10.0, 94.0, 414.0, 396.0, 72.0, 15.0, 5.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.544710159301758, -6.038434982299805, -4.532159328460693, -3.025883674621582, -1.519608497619629, -0.013333320617675781, 1.4929428100585938, 2.999217987060547, 4.5054931640625, 6.011768341064453, 7.5180439949035645, 9.024319648742676, 10.530594825744629, 12.036870002746582, 13.543146133422852, 15.049421310424805, 16.555696487426758, 18.06197166442871, 19.568246841430664, 21.07452392578125, 22.580799102783203, 24.087074279785156, 25.59334945678711, 27.099624633789062, 28.605899810791016, 30.11217498779297, 31.618450164794922, 33.124725341796875, 34.63100051879883, 36.13727569580078, 37.6435546875, 39.14982604980469, 40.656105041503906, 42.16238021850586, 43.66865539550781, 45.174930572509766, 46.68120574951172, 48.18748092651367, 49.693756103515625, 51.200035095214844, 52.70630645751953, 54.212581634521484, 55.71885681152344, 57.22513198852539, 58.731407165527344, 60.2376823425293, 61.74395751953125, 63.25023651123047, 64.75651550292969, 66.2627944946289, 67.7690658569336, 69.27534484863281, 70.7816162109375, 72.28789520263672, 73.7941665649414, 75.30044555664062, 76.80671691894531, 78.31299591064453, 79.81926727294922, 81.32554626464844, 82.83181762695312, 84.33809661865234, 85.84436798095703, 87.35064697265625, 88.85691833496094]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 5.0, 9.0, 6.0, 5.0, 8.0, 12.0, 12.0, 20.0, 15.0, 23.0, 16.0, 20.0, 38.0, 28.0, 44.0, 31.0, 48.0, 37.0, 46.0, 52.0, 50.0, 63.0, 50.0, 46.0, 38.0, 47.0, 33.0, 31.0, 28.0, 21.0, 12.0, 23.0, 16.0, 15.0, 12.0, 12.0, 9.0, 7.0, 1.0, 4.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.179851531982422, -7.908729076385498, -7.637606143951416, -7.366483688354492, -7.09536075592041, -6.824238300323486, -6.5531158447265625, -6.2819929122924805, -6.010870456695557, -5.739748001098633, -5.468625068664551, -5.197502613067627, -4.926380157470703, -4.655257225036621, -4.384134769439697, -4.113012313842773, -3.8418893814086914, -3.5707666873931885, -3.2996439933776855, -3.0285215377807617, -2.757398843765259, -2.486276149749756, -2.215153694152832, -1.944031000137329, -1.6729083061218262, -1.4017856121063232, -1.1306630373001099, -0.8595404028892517, -0.5884177684783936, -0.3172950744628906, -0.046172499656677246, 0.22495007514953613, 0.49607372283935547, 0.7671963572502136, 1.0383189916610718, 1.3094415664672852, 1.580564260482788, 1.851686954498291, 2.122809410095215, 2.3939321041107178, 2.6650547981262207, 2.9361774921417236, 3.2073001861572266, 3.4784226417541504, 3.7495453357696533, 4.020668029785156, 4.29179048538208, 4.562912940979004, 4.834035873413086, 5.10515832901001, 5.376281261444092, 5.647403717041016, 5.918526649475098, 6.1896491050720215, 6.460771560668945, 6.731894493103027, 7.003016948699951, 7.274139404296875, 7.545262336730957, 7.816384792327881, 8.087507247924805, 8.358630180358887, 8.629753112792969, 8.900875091552734, 9.171998023986816]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 6.0, 6.0, 9.0, 15.0, 16.0, 28.0, 48.0, 70.0, 108.0, 137.0, 252.0, 458.0, 953.0, 2457.0, 8546.0, 58228.0, 4016910.0, 89962.0, 10814.0, 2899.0, 1081.0, 471.0, 282.0, 180.0, 109.0, 68.0, 43.0, 38.0, 25.0, 19.0, 12.0, 6.0, 6.0, 2.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-2.2734375, -2.2076873779296875, -2.141937255859375, -2.0761871337890625, -2.01043701171875, -1.9446868896484375, -1.878936767578125, -1.8131866455078125, -1.7474365234375, -1.6816864013671875, -1.615936279296875, -1.5501861572265625, -1.48443603515625, -1.4186859130859375, -1.352935791015625, -1.2871856689453125, -1.221435546875, -1.1556854248046875, -1.089935302734375, -1.0241851806640625, -0.95843505859375, -0.8926849365234375, -0.826934814453125, -0.7611846923828125, -0.6954345703125, -0.6296844482421875, -0.563934326171875, -0.4981842041015625, -0.43243408203125, -0.3666839599609375, -0.300933837890625, -0.2351837158203125, -0.16943359375, -0.1036834716796875, -0.037933349609375, 0.0278167724609375, 0.09356689453125, 0.1593170166015625, 0.225067138671875, 0.2908172607421875, 0.3565673828125, 0.4223175048828125, 0.488067626953125, 0.5538177490234375, 0.61956787109375, 0.6853179931640625, 0.751068115234375, 0.8168182373046875, 0.882568359375, 0.9483184814453125, 1.014068603515625, 1.0798187255859375, 1.14556884765625, 1.2113189697265625, 1.277069091796875, 1.3428192138671875, 1.4085693359375, 1.4743194580078125, 1.540069580078125, 1.6058197021484375, 1.67156982421875, 1.7373199462890625, 1.803070068359375, 1.8688201904296875, 1.9345703125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 3.0, 1.0, 2.0, 9.0, 5.0, 16.0, 18.0, 21.0, 23.0, 50.0, 65.0, 78.0, 91.0, 98.0, 95.0, 99.0, 69.0, 61.0, 67.0, 45.0, 32.0, 17.0, 11.0, 12.0, 2.0, 5.0, 6.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.86865234375, -0.8459091186523438, -0.8231658935546875, -0.8004226684570312, -0.777679443359375, -0.7549362182617188, -0.7321929931640625, -0.7094497680664062, -0.68670654296875, -0.6639633178710938, -0.6412200927734375, -0.6184768676757812, -0.595733642578125, -0.5729904174804688, -0.5502471923828125, -0.5275039672851562, -0.5047607421875, -0.48201751708984375, -0.4592742919921875, -0.43653106689453125, -0.413787841796875, -0.39104461669921875, -0.3683013916015625, -0.34555816650390625, -0.32281494140625, -0.30007171630859375, -0.2773284912109375, -0.25458526611328125, -0.231842041015625, -0.20909881591796875, -0.1863555908203125, -0.16361236572265625, -0.140869140625, -0.11812591552734375, -0.0953826904296875, -0.07263946533203125, -0.049896240234375, -0.02715301513671875, -0.0044097900390625, 0.01833343505859375, 0.04107666015625, 0.06381988525390625, 0.0865631103515625, 0.10930633544921875, 0.132049560546875, 0.15479278564453125, 0.1775360107421875, 0.20027923583984375, 0.2230224609375, 0.24576568603515625, 0.2685089111328125, 0.29125213623046875, 0.313995361328125, 0.33673858642578125, 0.3594818115234375, 0.38222503662109375, 0.40496826171875, 0.42771148681640625, 0.4504547119140625, 0.47319793701171875, 0.495941162109375, 0.5186843872070312, 0.5414276123046875, 0.5641708374023438, 0.5869140625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 8.0, 9.0, 14.0, 18.0, 32.0, 39.0, 71.0, 111.0, 178.0, 299.0, 498.0, 801.0, 1714.0, 3754.0, 11169.0, 51464.0, 3687395.0, 391648.0, 31221.0, 7870.0, 2814.0, 1329.0, 705.0, 439.0, 239.0, 147.0, 95.0, 65.0, 47.0, 35.0, 23.0, 12.0, 8.0, 7.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.494140625, -1.4405059814453125, -1.386871337890625, -1.3332366943359375, -1.27960205078125, -1.2259674072265625, -1.172332763671875, -1.1186981201171875, -1.0650634765625, -1.0114288330078125, -0.957794189453125, -0.9041595458984375, -0.85052490234375, -0.7968902587890625, -0.743255615234375, -0.6896209716796875, -0.635986328125, -0.5823516845703125, -0.528717041015625, -0.4750823974609375, -0.42144775390625, -0.3678131103515625, -0.314178466796875, -0.2605438232421875, -0.2069091796875, -0.1532745361328125, -0.099639892578125, -0.0460052490234375, 0.00762939453125, 0.0612640380859375, 0.114898681640625, 0.1685333251953125, 0.22216796875, 0.2758026123046875, 0.329437255859375, 0.3830718994140625, 0.43670654296875, 0.4903411865234375, 0.543975830078125, 0.5976104736328125, 0.6512451171875, 0.7048797607421875, 0.758514404296875, 0.8121490478515625, 0.86578369140625, 0.9194183349609375, 0.973052978515625, 1.0266876220703125, 1.080322265625, 1.1339569091796875, 1.187591552734375, 1.2412261962890625, 1.29486083984375, 1.3484954833984375, 1.402130126953125, 1.4557647705078125, 1.5093994140625, 1.5630340576171875, 1.616668701171875, 1.6703033447265625, 1.72393798828125, 1.7775726318359375, 1.831207275390625, 1.8848419189453125, 1.9384765625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 10.0, 6.0, 18.0, 20.0, 31.0, 79.0, 145.0, 662.0, 2697.0, 197.0, 90.0, 48.0, 24.0, 16.0, 11.0, 4.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.97412109375, -0.9471664428710938, -0.9202117919921875, -0.8932571411132812, -0.866302490234375, -0.8393478393554688, -0.8123931884765625, -0.7854385375976562, -0.75848388671875, -0.7315292358398438, -0.7045745849609375, -0.6776199340820312, -0.650665283203125, -0.6237106323242188, -0.5967559814453125, -0.5698013305664062, -0.5428466796875, -0.5158920288085938, -0.4889373779296875, -0.46198272705078125, -0.435028076171875, -0.40807342529296875, -0.3811187744140625, -0.35416412353515625, -0.32720947265625, -0.30025482177734375, -0.2733001708984375, -0.24634552001953125, -0.219390869140625, -0.19243621826171875, -0.1654815673828125, -0.13852691650390625, -0.111572265625, -0.08461761474609375, -0.0576629638671875, -0.03070831298828125, -0.003753662109375, 0.02320098876953125, 0.0501556396484375, 0.07711029052734375, 0.10406494140625, 0.13101959228515625, 0.1579742431640625, 0.18492889404296875, 0.211883544921875, 0.23883819580078125, 0.2657928466796875, 0.29274749755859375, 0.3197021484375, 0.34665679931640625, 0.3736114501953125, 0.40056610107421875, 0.427520751953125, 0.45447540283203125, 0.4814300537109375, 0.5083847045898438, 0.53533935546875, 0.5622940063476562, 0.5892486572265625, 0.6162033081054688, 0.643157958984375, 0.6701126098632812, 0.6970672607421875, 0.7240219116210938, 0.7509765625]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 5.0, 9.0, 14.0, 44.0, 54.0, 120.0, 195.0, 200.0, 173.0, 99.0, 49.0, 29.0, 8.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.250251531600952, -3.105616331100464, -2.9609808921813965, -2.816345691680908, -2.671710252761841, -2.5270750522613525, -2.382439613342285, -2.237804412841797, -2.0931692123413086, -1.9485338926315308, -1.803898572921753, -1.6592633724212646, -1.5146279335021973, -1.369992733001709, -1.2253574132919312, -1.0807220935821533, -0.9360866546630859, -0.7914513349533081, -0.6468160152435303, -0.5021807551383972, -0.3575454354286194, -0.21291011571884155, -0.0682748556137085, 0.07636046409606934, 0.22099578380584717, 0.365631103515625, 0.5102664232254028, 0.6549016833305359, 0.7995370030403137, 0.9441723227500916, 1.0888075828552246, 1.2334429025650024, 1.3780779838562012, 1.522713303565979, 1.6673486232757568, 1.8119838237762451, 1.9566192626953125, 2.101254463195801, 2.245889663696289, 2.3905251026153564, 2.535160541534424, 2.679795742034912, 2.8244311809539795, 2.9690663814544678, 3.113701820373535, 3.2583370208740234, 3.4029722213745117, 3.547607660293579, 3.6922428607940674, 3.8368780612945557, 3.981513500213623, 4.126148700714111, 4.2707839012146, 4.415419578552246, 4.560054779052734, 4.704689979553223, 4.849325180053711, 4.993960380554199, 5.1385955810546875, 5.283231258392334, 5.427866458892822, 5.5725016593933105, 5.717136859893799, 5.861772537231445, 6.006407737731934]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 6.0, 6.0, 5.0, 7.0, 11.0, 13.0, 21.0, 17.0, 27.0, 19.0, 18.0, 31.0, 27.0, 41.0, 40.0, 36.0, 45.0, 53.0, 50.0, 40.0, 52.0, 40.0, 33.0, 51.0, 47.0, 41.0, 28.0, 30.0, 29.0, 21.0, 23.0, 22.0, 13.0, 13.0, 11.0, 9.0, 5.0, 6.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.692967176437378, -1.6411323547363281, -1.5892975330352783, -1.5374627113342285, -1.4856278896331787, -1.433793067932129, -1.381958246231079, -1.3301235437393188, -1.278288722038269, -1.2264539003372192, -1.1746190786361694, -1.1227842569351196, -1.0709494352340698, -1.0191147327423096, -0.967279851436615, -0.91544508934021, -0.8636102080345154, -0.8117753863334656, -0.7599405646324158, -0.7081058025360107, -0.6562709808349609, -0.6044361591339111, -0.5526013374328613, -0.5007665157318115, -0.4489317238330841, -0.3970969021320343, -0.3452621102333069, -0.2934272885322571, -0.24159248173236847, -0.18975767493247986, -0.13792285323143005, -0.08608806133270264, -0.03425323963165283, 0.017581570893526077, 0.06941638141870499, 0.1212511956691742, 0.1730860024690628, 0.22492080926895142, 0.2767556309700012, 0.32859042286872864, 0.38042524456977844, 0.43226006627082825, 0.48409485816955566, 0.5359296798706055, 0.5877645015716553, 0.6395993232727051, 0.6914341449737549, 0.7432689070701599, 0.7951037287712097, 0.8469385504722595, 0.8987733721733093, 0.9506081342697144, 1.0024429559707642, 1.054277777671814, 1.1061125993728638, 1.1579474210739136, 1.2097822427749634, 1.2616170644760132, 1.313451886177063, 1.3652867078781128, 1.4171215295791626, 1.4689562320709229, 1.5207910537719727, 1.5726258754730225, 1.6244606971740723]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 4.0, 9.0, 15.0, 12.0, 27.0, 51.0, 73.0, 132.0, 270.0, 575.0, 1369.0, 4109.0, 16229.0, 92156.0, 474471.0, 376450.0, 64743.0, 12451.0, 3299.0, 1123.0, 483.0, 212.0, 100.0, 73.0, 37.0, 32.0, 16.0, 11.0, 10.0, 4.0, 3.0, 5.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.9892578125, -1.9381256103515625, -1.886993408203125, -1.8358612060546875, -1.78472900390625, -1.7335968017578125, -1.682464599609375, -1.6313323974609375, -1.5802001953125, -1.5290679931640625, -1.477935791015625, -1.4268035888671875, -1.37567138671875, -1.3245391845703125, -1.273406982421875, -1.2222747802734375, -1.171142578125, -1.1200103759765625, -1.068878173828125, -1.0177459716796875, -0.96661376953125, -0.9154815673828125, -0.864349365234375, -0.8132171630859375, -0.7620849609375, -0.7109527587890625, -0.659820556640625, -0.6086883544921875, -0.55755615234375, -0.5064239501953125, -0.455291748046875, -0.4041595458984375, -0.35302734375, -0.3018951416015625, -0.250762939453125, -0.1996307373046875, -0.14849853515625, -0.0973663330078125, -0.046234130859375, 0.0048980712890625, 0.0560302734375, 0.1071624755859375, 0.158294677734375, 0.2094268798828125, 0.26055908203125, 0.3116912841796875, 0.362823486328125, 0.4139556884765625, 0.465087890625, 0.5162200927734375, 0.567352294921875, 0.6184844970703125, 0.66961669921875, 0.7207489013671875, 0.771881103515625, 0.8230133056640625, 0.8741455078125, 0.9252777099609375, 0.976409912109375, 1.0275421142578125, 1.07867431640625, 1.1298065185546875, 1.180938720703125, 1.2320709228515625, 1.283203125]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 0.0, 1.0, 4.0, 4.0, 7.0, 8.0, 11.0, 15.0, 19.0, 27.0, 27.0, 51.0, 61.0, 58.0, 83.0, 57.0, 84.0, 83.0, 64.0, 64.0, 64.0, 46.0, 49.0, 27.0, 27.0, 21.0, 9.0, 6.0, 8.0, 5.0, 2.0, 6.0, 2.0, 2.0, 0.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69921875, -0.679656982421875, -0.66009521484375, -0.640533447265625, -0.6209716796875, -0.601409912109375, -0.58184814453125, -0.562286376953125, -0.542724609375, -0.523162841796875, -0.50360107421875, -0.484039306640625, -0.4644775390625, -0.444915771484375, -0.42535400390625, -0.405792236328125, -0.38623046875, -0.366668701171875, -0.34710693359375, -0.327545166015625, -0.3079833984375, -0.288421630859375, -0.26885986328125, -0.249298095703125, -0.229736328125, -0.210174560546875, -0.19061279296875, -0.171051025390625, -0.1514892578125, -0.131927490234375, -0.11236572265625, -0.092803955078125, -0.0732421875, -0.053680419921875, -0.03411865234375, -0.014556884765625, 0.0050048828125, 0.024566650390625, 0.04412841796875, 0.063690185546875, 0.083251953125, 0.102813720703125, 0.12237548828125, 0.141937255859375, 0.1614990234375, 0.181060791015625, 0.20062255859375, 0.220184326171875, 0.23974609375, 0.259307861328125, 0.27886962890625, 0.298431396484375, 0.3179931640625, 0.337554931640625, 0.35711669921875, 0.376678466796875, 0.396240234375, 0.415802001953125, 0.43536376953125, 0.454925537109375, 0.4744873046875, 0.494049072265625, 0.51361083984375, 0.533172607421875, 0.552734375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 5.0, 3.0, 2.0, 5.0, 6.0, 3.0, 8.0, 19.0, 19.0, 32.0, 52.0, 62.0, 124.0, 188.0, 295.0, 626.0, 1189.0, 3109.0, 10738.0, 79137.0, 650900.0, 267179.0, 25946.0, 5216.0, 1800.0, 794.0, 420.0, 254.0, 132.0, 101.0, 69.0, 41.0, 27.0, 27.0, 15.0, 4.0, 10.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.44921875, -1.390655517578125, -1.33209228515625, -1.273529052734375, -1.2149658203125, -1.156402587890625, -1.09783935546875, -1.039276123046875, -0.980712890625, -0.922149658203125, -0.86358642578125, -0.805023193359375, -0.7464599609375, -0.687896728515625, -0.62933349609375, -0.570770263671875, -0.51220703125, -0.453643798828125, -0.39508056640625, -0.336517333984375, -0.2779541015625, -0.219390869140625, -0.16082763671875, -0.102264404296875, -0.043701171875, 0.014862060546875, 0.07342529296875, 0.131988525390625, 0.1905517578125, 0.249114990234375, 0.30767822265625, 0.366241455078125, 0.4248046875, 0.483367919921875, 0.54193115234375, 0.600494384765625, 0.6590576171875, 0.717620849609375, 0.77618408203125, 0.834747314453125, 0.893310546875, 0.951873779296875, 1.01043701171875, 1.069000244140625, 1.1275634765625, 1.186126708984375, 1.24468994140625, 1.303253173828125, 1.36181640625, 1.420379638671875, 1.47894287109375, 1.537506103515625, 1.5960693359375, 1.654632568359375, 1.71319580078125, 1.771759033203125, 1.830322265625, 1.888885498046875, 1.94744873046875, 2.006011962890625, 2.0645751953125, 2.123138427734375, 2.18170166015625, 2.240264892578125, 2.298828125]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 3.0, 5.0, 5.0, 7.0, 1.0, 7.0, 15.0, 10.0, 17.0, 23.0, 28.0, 35.0, 27.0, 31.0, 34.0, 38.0, 42.0, 47.0, 52.0, 60.0, 42.0, 49.0, 69.0, 51.0, 39.0, 48.0, 41.0, 31.0, 21.0, 26.0, 26.0, 16.0, 13.0, 9.0, 7.0, 11.0, 6.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8125, -1.7547454833984375, -1.696990966796875, -1.6392364501953125, -1.58148193359375, -1.5237274169921875, -1.465972900390625, -1.4082183837890625, -1.3504638671875, -1.2927093505859375, -1.234954833984375, -1.1772003173828125, -1.11944580078125, -1.0616912841796875, -1.003936767578125, -0.9461822509765625, -0.888427734375, -0.8306732177734375, -0.772918701171875, -0.7151641845703125, -0.65740966796875, -0.5996551513671875, -0.541900634765625, -0.4841461181640625, -0.4263916015625, -0.3686370849609375, -0.310882568359375, -0.2531280517578125, -0.19537353515625, -0.1376190185546875, -0.079864501953125, -0.0221099853515625, 0.03564453125, 0.0933990478515625, 0.151153564453125, 0.2089080810546875, 0.26666259765625, 0.3244171142578125, 0.382171630859375, 0.4399261474609375, 0.4976806640625, 0.5554351806640625, 0.613189697265625, 0.6709442138671875, 0.72869873046875, 0.7864532470703125, 0.844207763671875, 0.9019622802734375, 0.959716796875, 1.0174713134765625, 1.075225830078125, 1.1329803466796875, 1.19073486328125, 1.2484893798828125, 1.306243896484375, 1.3639984130859375, 1.4217529296875, 1.4795074462890625, 1.537261962890625, 1.5950164794921875, 1.65277099609375, 1.7105255126953125, 1.768280029296875, 1.8260345458984375, 1.8837890625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 4.0, 3.0, 5.0, 10.0, 12.0, 9.0, 35.0, 39.0, 62.0, 77.0, 143.0, 299.0, 728.0, 1919.0, 8126.0, 90707.0, 794700.0, 136975.0, 10796.0, 2300.0, 796.0, 357.0, 184.0, 91.0, 49.0, 35.0, 15.0, 19.0, 16.0, 19.0, 6.0, 6.0, 7.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.95361328125, -0.9275360107421875, -0.901458740234375, -0.8753814697265625, -0.84930419921875, -0.8232269287109375, -0.797149658203125, -0.7710723876953125, -0.7449951171875, -0.7189178466796875, -0.692840576171875, -0.6667633056640625, -0.64068603515625, -0.6146087646484375, -0.588531494140625, -0.5624542236328125, -0.536376953125, -0.5102996826171875, -0.484222412109375, -0.4581451416015625, -0.43206787109375, -0.4059906005859375, -0.379913330078125, -0.3538360595703125, -0.3277587890625, -0.3016815185546875, -0.275604248046875, -0.2495269775390625, -0.22344970703125, -0.1973724365234375, -0.171295166015625, -0.1452178955078125, -0.119140625, -0.0930633544921875, -0.066986083984375, -0.0409088134765625, -0.01483154296875, 0.0112457275390625, 0.037322998046875, 0.0634002685546875, 0.0894775390625, 0.1155548095703125, 0.141632080078125, 0.1677093505859375, 0.19378662109375, 0.2198638916015625, 0.245941162109375, 0.2720184326171875, 0.298095703125, 0.3241729736328125, 0.350250244140625, 0.3763275146484375, 0.40240478515625, 0.4284820556640625, 0.454559326171875, 0.4806365966796875, 0.5067138671875, 0.5327911376953125, 0.558868408203125, 0.5849456787109375, 0.61102294921875, 0.6371002197265625, 0.663177490234375, 0.6892547607421875, 0.71533203125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 3.0, 3.0, 4.0, 2.0, 1.0, 5.0, 10.0, 10.0, 16.0, 13.0, 23.0, 34.0, 42.0, 56.0, 64.0, 90.0, 89.0, 83.0, 94.0, 75.0, 68.0, 53.0, 47.0, 40.0, 24.0, 18.0, 15.0, 5.0, 3.0, 5.0, 5.0, 5.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.893013000488281e-05, -8.591823279857635e-05, -8.29063355922699e-05, -7.989443838596344e-05, -7.688254117965698e-05, -7.387064397335052e-05, -7.085874676704407e-05, -6.784684956073761e-05, -6.483495235443115e-05, -6.18230551481247e-05, -5.881115794181824e-05, -5.579926073551178e-05, -5.278736352920532e-05, -4.9775466322898865e-05, -4.676356911659241e-05, -4.375167191028595e-05, -4.073977470397949e-05, -3.7727877497673035e-05, -3.471598029136658e-05, -3.170408308506012e-05, -2.8692185878753662e-05, -2.5680288672447205e-05, -2.2668391466140747e-05, -1.965649425983429e-05, -1.6644597053527832e-05, -1.3632699847221375e-05, -1.0620802640914917e-05, -7.6089054346084595e-06, -4.597008228302002e-06, -1.5851110219955444e-06, 1.426786184310913e-06, 4.438683390617371e-06, 7.450580596923828e-06, 1.0462477803230286e-05, 1.3474375009536743e-05, 1.64862722158432e-05, 1.9498169422149658e-05, 2.2510066628456116e-05, 2.5521963834762573e-05, 2.853386104106903e-05, 3.154575824737549e-05, 3.4557655453681946e-05, 3.75695526599884e-05, 4.058144986629486e-05, 4.359334707260132e-05, 4.6605244278907776e-05, 4.9617141485214233e-05, 5.262903869152069e-05, 5.564093589782715e-05, 5.8652833104133606e-05, 6.166473031044006e-05, 6.467662751674652e-05, 6.768852472305298e-05, 7.070042192935944e-05, 7.37123191356659e-05, 7.672421634197235e-05, 7.973611354827881e-05, 8.274801075458527e-05, 8.575990796089172e-05, 8.877180516719818e-05, 9.178370237350464e-05, 9.47955995798111e-05, 9.780749678611755e-05, 0.00010081939399242401, 0.00010383129119873047]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 5.0, 14.0, 21.0, 21.0, 27.0, 63.0, 128.0, 229.0, 539.0, 1616.0, 6302.0, 74121.0, 838598.0, 115823.0, 8044.0, 1763.0, 669.0, 259.0, 129.0, 71.0, 44.0, 29.0, 14.0, 8.0, 6.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8349609375, -0.8046722412109375, -0.774383544921875, -0.7440948486328125, -0.71380615234375, -0.6835174560546875, -0.653228759765625, -0.6229400634765625, -0.5926513671875, -0.5623626708984375, -0.532073974609375, -0.5017852783203125, -0.47149658203125, -0.4412078857421875, -0.410919189453125, -0.3806304931640625, -0.350341796875, -0.3200531005859375, -0.289764404296875, -0.2594757080078125, -0.22918701171875, -0.1988983154296875, -0.168609619140625, -0.1383209228515625, -0.1080322265625, -0.0777435302734375, -0.047454833984375, -0.0171661376953125, 0.01312255859375, 0.0434112548828125, 0.073699951171875, 0.1039886474609375, 0.13427734375, 0.1645660400390625, 0.194854736328125, 0.2251434326171875, 0.25543212890625, 0.2857208251953125, 0.316009521484375, 0.3462982177734375, 0.3765869140625, 0.4068756103515625, 0.437164306640625, 0.4674530029296875, 0.49774169921875, 0.5280303955078125, 0.558319091796875, 0.5886077880859375, 0.618896484375, 0.6491851806640625, 0.679473876953125, 0.7097625732421875, 0.74005126953125, 0.7703399658203125, 0.800628662109375, 0.8309173583984375, 0.8612060546875, 0.8914947509765625, 0.921783447265625, 0.9520721435546875, 0.98236083984375, 1.0126495361328125, 1.042938232421875, 1.0732269287109375, 1.103515625]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 5.0, 7.0, 5.0, 8.0, 14.0, 16.0, 26.0, 24.0, 36.0, 47.0, 52.0, 58.0, 69.0, 77.0, 84.0, 87.0, 86.0, 66.0, 67.0, 38.0, 32.0, 37.0, 23.0, 15.0, 10.0, 5.0, 4.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.78466796875, -0.7642059326171875, -0.743743896484375, -0.7232818603515625, -0.70281982421875, -0.6823577880859375, -0.661895751953125, -0.6414337158203125, -0.6209716796875, -0.6005096435546875, -0.580047607421875, -0.5595855712890625, -0.53912353515625, -0.5186614990234375, -0.498199462890625, -0.4777374267578125, -0.457275390625, -0.4368133544921875, -0.416351318359375, -0.3958892822265625, -0.37542724609375, -0.3549652099609375, -0.334503173828125, -0.3140411376953125, -0.2935791015625, -0.2731170654296875, -0.252655029296875, -0.2321929931640625, -0.21173095703125, -0.1912689208984375, -0.170806884765625, -0.1503448486328125, -0.1298828125, -0.1094207763671875, -0.088958740234375, -0.0684967041015625, -0.04803466796875, -0.0275726318359375, -0.007110595703125, 0.0133514404296875, 0.0338134765625, 0.0542755126953125, 0.074737548828125, 0.0951995849609375, 0.11566162109375, 0.1361236572265625, 0.156585693359375, 0.1770477294921875, 0.197509765625, 0.2179718017578125, 0.238433837890625, 0.2588958740234375, 0.27935791015625, 0.2998199462890625, 0.320281982421875, 0.3407440185546875, 0.3612060546875, 0.3816680908203125, 0.402130126953125, 0.4225921630859375, 0.44305419921875, 0.4635162353515625, 0.483978271484375, 0.5044403076171875, 0.52490234375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 0.0, 8.0, 8.0, 15.0, 31.0, 43.0, 87.0, 112.0, 166.0, 153.0, 145.0, 102.0, 60.0, 33.0, 13.0, 11.0, 6.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-21.54706573486328, -21.114044189453125, -20.6810245513916, -20.248003005981445, -19.81498146057129, -19.381959915161133, -18.94894027709961, -18.515918731689453, -18.082897186279297, -17.64987564086914, -17.216856002807617, -16.78383445739746, -16.350812911987305, -15.917792320251465, -15.484770774841309, -15.051750183105469, -14.618728637695312, -14.185708045959473, -13.752686500549316, -13.319665908813477, -12.88664436340332, -12.45362377166748, -12.020602226257324, -11.587581634521484, -11.154561042785645, -10.721540451049805, -10.288518905639648, -9.855498313903809, -9.422476768493652, -8.989456176757812, -8.556434631347656, -8.123414039611816, -7.690392971038818, -7.25737190246582, -6.824350833892822, -6.391329765319824, -5.958308696746826, -5.525287628173828, -5.092267036437988, -4.659245491027832, -4.226224899291992, -3.793203830718994, -3.360182762145996, -2.927161693572998, -2.494140625, -2.061119794845581, -1.628098726272583, -1.195077657699585, -0.7620563507080078, -0.32903531193733215, 0.1039857268333435, 0.5370067358016968, 0.9700278043746948, 1.4030487537384033, 1.8360698223114014, 2.2690908908843994, 2.7021119594573975, 3.1351330280303955, 3.5681540966033936, 4.0011749267578125, 4.4341959953308105, 4.867217063903809, 5.300238132476807, 5.733259201049805, 6.166280269622803]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 6.0, 5.0, 2.0, 3.0, 3.0, 5.0, 6.0, 11.0, 9.0, 18.0, 21.0, 15.0, 16.0, 24.0, 28.0, 38.0, 32.0, 35.0, 26.0, 40.0, 44.0, 46.0, 54.0, 43.0, 53.0, 40.0, 46.0, 34.0, 46.0, 44.0, 24.0, 39.0, 22.0, 25.0, 16.0, 12.0, 16.0, 9.0, 12.0, 9.0, 5.0, 8.0, 5.0, 7.0, 5.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.958752155303955, -6.720723628997803, -6.482695579528809, -6.244667053222656, -6.006638526916504, -5.768610000610352, -5.530581474304199, -5.292553424835205, -5.054524898529053, -4.8164963722229, -4.578468322753906, -4.340439796447754, -4.102411270141602, -3.864382743835449, -3.626354455947876, -3.3883261680603027, -3.1502976417541504, -2.912269115447998, -2.674240827560425, -2.4362125396728516, -2.198184013366699, -1.9601556062698364, -1.7221271991729736, -1.4840987920761108, -1.246070384979248, -1.0080419778823853, -0.7700135707855225, -0.5319851636886597, -0.2939567565917969, -0.05592834949493408, 0.1821000576019287, 0.4201284646987915, 0.6581573486328125, 0.8961857557296753, 1.134214162826538, 1.3722425699234009, 1.6102709770202637, 1.8482993841171265, 2.0863277912139893, 2.3243560791015625, 2.562384605407715, 2.800413131713867, 3.0384414196014404, 3.2764697074890137, 3.514498233795166, 3.7525267601013184, 3.9905550479888916, 4.228583335876465, 4.466611862182617, 4.7046403884887695, 4.942668914794922, 5.180696964263916, 5.418725490570068, 5.656754016876221, 5.894782066345215, 6.132810592651367, 6.3708391189575195, 6.608867645263672, 6.846896171569824, 7.084924221038818, 7.322952747344971, 7.560981273651123, 7.799009323120117, 8.03703784942627, 8.275066375732422]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 5.0, 2.0, 1.0, 5.0, 4.0, 6.0, 10.0, 14.0, 23.0, 32.0, 45.0, 71.0, 94.0, 174.0, 353.0, 856.0, 2098.0, 5815.0, 19371.0, 91700.0, 1724204.0, 2216325.0, 101744.0, 20630.0, 6255.0, 2293.0, 1011.0, 501.0, 245.0, 129.0, 86.0, 51.0, 35.0, 28.0, 25.0, 8.0, 7.0, 6.0, 10.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-1.24609375, -1.2115097045898438, -1.1769256591796875, -1.1423416137695312, -1.107757568359375, -1.0731735229492188, -1.0385894775390625, -1.0040054321289062, -0.96942138671875, -0.9348373413085938, -0.9002532958984375, -0.8656692504882812, -0.831085205078125, -0.7965011596679688, -0.7619171142578125, -0.7273330688476562, -0.6927490234375, -0.6581649780273438, -0.6235809326171875, -0.5889968872070312, -0.554412841796875, -0.5198287963867188, -0.4852447509765625, -0.45066070556640625, -0.41607666015625, -0.38149261474609375, -0.3469085693359375, -0.31232452392578125, -0.277740478515625, -0.24315643310546875, -0.2085723876953125, -0.17398834228515625, -0.139404296875, -0.10482025146484375, -0.0702362060546875, -0.03565216064453125, -0.001068115234375, 0.03351593017578125, 0.0680999755859375, 0.10268402099609375, 0.13726806640625, 0.17185211181640625, 0.2064361572265625, 0.24102020263671875, 0.275604248046875, 0.31018829345703125, 0.3447723388671875, 0.37935638427734375, 0.4139404296875, 0.44852447509765625, 0.4831085205078125, 0.5176925659179688, 0.552276611328125, 0.5868606567382812, 0.6214447021484375, 0.6560287475585938, 0.69061279296875, 0.7251968383789062, 0.7597808837890625, 0.7943649291992188, 0.828948974609375, 0.8635330200195312, 0.8981170654296875, 0.9327011108398438, 0.96728515625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 5.0, 6.0, 11.0, 8.0, 17.0, 10.0, 9.0, 26.0, 27.0, 27.0, 35.0, 43.0, 46.0, 60.0, 56.0, 61.0, 68.0, 57.0, 66.0, 55.0, 54.0, 33.0, 46.0, 37.0, 31.0, 25.0, 22.0, 15.0, 15.0, 7.0, 6.0, 6.0, 8.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.379150390625, -0.3655204772949219, -0.35189056396484375, -0.3382606506347656, -0.3246307373046875, -0.3110008239746094, -0.29737091064453125, -0.2837409973144531, -0.270111083984375, -0.2564811706542969, -0.24285125732421875, -0.22922134399414062, -0.2155914306640625, -0.20196151733398438, -0.18833160400390625, -0.17470169067382812, -0.16107177734375, -0.14744186401367188, -0.13381195068359375, -0.12018203735351562, -0.1065521240234375, -0.09292221069335938, -0.07929229736328125, -0.06566238403320312, -0.052032470703125, -0.038402557373046875, -0.02477264404296875, -0.011142730712890625, 0.0024871826171875, 0.016117095947265625, 0.02974700927734375, 0.043376922607421875, 0.0570068359375, 0.07063674926757812, 0.08426666259765625, 0.09789657592773438, 0.1115264892578125, 0.12515640258789062, 0.13878631591796875, 0.15241622924804688, 0.166046142578125, 0.17967605590820312, 0.19330596923828125, 0.20693588256835938, 0.2205657958984375, 0.23419570922851562, 0.24782562255859375, 0.2614555358886719, 0.27508544921875, 0.2887153625488281, 0.30234527587890625, 0.3159751892089844, 0.3296051025390625, 0.3432350158691406, 0.35686492919921875, 0.3704948425292969, 0.384124755859375, 0.3977546691894531, 0.41138458251953125, 0.4250144958496094, 0.4386444091796875, 0.4522743225097656, 0.46590423583984375, 0.4795341491699219, 0.4931640625]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 9.0, 20.0, 24.0, 58.0, 119.0, 262.0, 834.0, 3716.0, 33525.0, 3267053.0, 864748.0, 19970.0, 2709.0, 741.0, 272.0, 110.0, 42.0, 17.0, 17.0, 10.0, 11.0, 7.0, 8.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.08203125, -2.009979248046875, -1.93792724609375, -1.865875244140625, -1.7938232421875, -1.721771240234375, -1.64971923828125, -1.577667236328125, -1.505615234375, -1.433563232421875, -1.36151123046875, -1.289459228515625, -1.2174072265625, -1.145355224609375, -1.07330322265625, -1.001251220703125, -0.92919921875, -0.857147216796875, -0.78509521484375, -0.713043212890625, -0.6409912109375, -0.568939208984375, -0.49688720703125, -0.424835205078125, -0.352783203125, -0.280731201171875, -0.20867919921875, -0.136627197265625, -0.0645751953125, 0.007476806640625, 0.07952880859375, 0.151580810546875, 0.2236328125, 0.295684814453125, 0.36773681640625, 0.439788818359375, 0.5118408203125, 0.583892822265625, 0.65594482421875, 0.727996826171875, 0.800048828125, 0.872100830078125, 0.94415283203125, 1.016204833984375, 1.0882568359375, 1.160308837890625, 1.23236083984375, 1.304412841796875, 1.37646484375, 1.448516845703125, 1.52056884765625, 1.592620849609375, 1.6646728515625, 1.736724853515625, 1.80877685546875, 1.880828857421875, 1.952880859375, 2.024932861328125, 2.09698486328125, 2.169036865234375, 2.2410888671875, 2.313140869140625, 2.38519287109375, 2.457244873046875, 2.529296875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 13.0, 23.0, 37.0, 84.0, 164.0, 424.0, 1120.0, 1330.0, 481.0, 174.0, 91.0, 50.0, 24.0, 15.0, 13.0, 8.0, 2.0, 4.0, 1.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4052734375, -1.3443756103515625, -1.283477783203125, -1.2225799560546875, -1.16168212890625, -1.1007843017578125, -1.039886474609375, -0.9789886474609375, -0.9180908203125, -0.8571929931640625, -0.796295166015625, -0.7353973388671875, -0.67449951171875, -0.6136016845703125, -0.552703857421875, -0.4918060302734375, -0.430908203125, -0.3700103759765625, -0.309112548828125, -0.2482147216796875, -0.18731689453125, -0.1264190673828125, -0.065521240234375, -0.0046234130859375, 0.0562744140625, 0.1171722412109375, 0.178070068359375, 0.2389678955078125, 0.29986572265625, 0.3607635498046875, 0.421661376953125, 0.4825592041015625, 0.54345703125, 0.6043548583984375, 0.665252685546875, 0.7261505126953125, 0.78704833984375, 0.8479461669921875, 0.908843994140625, 0.9697418212890625, 1.0306396484375, 1.0915374755859375, 1.152435302734375, 1.2133331298828125, 1.27423095703125, 1.3351287841796875, 1.396026611328125, 1.4569244384765625, 1.517822265625, 1.5787200927734375, 1.639617919921875, 1.7005157470703125, 1.76141357421875, 1.8223114013671875, 1.883209228515625, 1.9441070556640625, 2.0050048828125, 2.0659027099609375, 2.126800537109375, 2.1876983642578125, 2.24859619140625, 2.3094940185546875, 2.370391845703125, 2.4312896728515625, 2.4921875]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 5.0, 7.0, 17.0, 39.0, 90.0, 195.0, 192.0, 199.0, 131.0, 55.0, 27.0, 13.0, 17.0, 7.0, 5.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.360735893249512, -8.938557624816895, -8.516378402709961, -8.094200134277344, -7.672021865844727, -7.249843120574951, -6.827664375305176, -6.405486106872559, -5.983307361602783, -5.561128616333008, -5.138950347900391, -4.716771602630615, -4.29459285736084, -3.8724145889282227, -3.4502358436584473, -3.028057336807251, -2.6058788299560547, -2.1837003231048584, -1.7615216970443726, -1.3393430709838867, -0.9171645641326904, -0.49498605728149414, -0.07280731201171875, 0.34937119483947754, 0.7715497016906738, 1.1937282085418701, 1.615906834602356, 2.038085460662842, 2.460263967514038, 2.8824424743652344, 3.3046212196350098, 3.726799726486206, 4.148977279663086, 4.571156024932861, 4.9933342933654785, 5.415513038635254, 5.837691307067871, 6.2598700523376465, 6.682048797607422, 7.104227066040039, 7.5264058113098145, 7.94858455657959, 8.370762825012207, 8.79294204711914, 9.215120315551758, 9.637298583984375, 10.059476852416992, 10.481656074523926, 10.903834342956543, 11.32601261138916, 11.748191833496094, 12.170370101928711, 12.592548370361328, 13.014726638793945, 13.436905860900879, 13.859084129333496, 14.28126335144043, 14.703441619873047, 15.12562084197998, 15.547799110412598, 15.969977378845215, 16.39215660095215, 16.814334869384766, 17.236513137817383, 17.65869140625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 2.0, 6.0, 5.0, 4.0, 8.0, 13.0, 27.0, 20.0, 31.0, 24.0, 33.0, 29.0, 33.0, 30.0, 46.0, 37.0, 48.0, 38.0, 50.0, 42.0, 38.0, 31.0, 44.0, 42.0, 41.0, 45.0, 24.0, 35.0, 30.0, 28.0, 18.0, 13.0, 14.0, 15.0, 9.0, 7.0, 8.0, 7.0, 12.0, 5.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0], "bins": [-5.8065924644470215, -5.6538519859313965, -5.5011115074157715, -5.348371505737305, -5.19563102722168, -5.042890548706055, -4.89015007019043, -4.737409591674805, -4.58466911315918, -4.431928634643555, -4.27918815612793, -4.126448154449463, -3.973707675933838, -3.820967197418213, -3.668226718902588, -3.515486240386963, -3.362746238708496, -3.210005760192871, -3.057265520095825, -2.9045250415802, -2.7517848014831543, -2.5990443229675293, -2.4463038444519043, -2.2935633659362793, -2.1408231258392334, -1.988082766532898, -1.8353424072265625, -1.6826019287109375, -1.529861569404602, -1.3771212100982666, -1.2243807315826416, -1.0716403722763062, -0.9188995361328125, -0.766159176826477, -0.6134187579154968, -0.460678368806839, -0.30793797969818115, -0.1551976203918457, -0.0024572014808654785, 0.15028321743011475, 0.3030235767364502, 0.45576396584510803, 0.6085043549537659, 0.7612447738647461, 0.9139851331710815, 1.066725492477417, 1.219465970993042, 1.3722063302993774, 1.524946689605713, 1.6776870489120483, 1.8304274082183838, 1.9831678867340088, 2.1359081268310547, 2.2886486053466797, 2.4413890838623047, 2.5941295623779297, 2.7468698024749756, 2.8996102809906006, 3.0523505210876465, 3.2050909996032715, 3.3578314781188965, 3.5105717182159424, 3.6633121967315674, 3.8160524368286133, 3.9687929153442383]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 9.0, 3.0, 2.0, 8.0, 16.0, 18.0, 30.0, 49.0, 68.0, 122.0, 248.0, 578.0, 1361.0, 3380.0, 10370.0, 37686.0, 159048.0, 477311.0, 268950.0, 64244.0, 16483.0, 5107.0, 1922.0, 768.0, 355.0, 180.0, 87.0, 51.0, 36.0, 25.0, 7.0, 14.0, 7.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-1.2978515625, -1.2643051147460938, -1.2307586669921875, -1.1972122192382812, -1.163665771484375, -1.1301193237304688, -1.0965728759765625, -1.0630264282226562, -1.02947998046875, -0.9959335327148438, -0.9623870849609375, -0.9288406372070312, -0.895294189453125, -0.8617477416992188, -0.8282012939453125, -0.7946548461914062, -0.7611083984375, -0.7275619506835938, -0.6940155029296875, -0.6604690551757812, -0.626922607421875, -0.5933761596679688, -0.5598297119140625, -0.5262832641601562, -0.49273681640625, -0.45919036865234375, -0.4256439208984375, -0.39209747314453125, -0.358551025390625, -0.32500457763671875, -0.2914581298828125, -0.25791168212890625, -0.224365234375, -0.19081878662109375, -0.1572723388671875, -0.12372589111328125, -0.090179443359375, -0.05663299560546875, -0.0230865478515625, 0.01045989990234375, 0.04400634765625, 0.07755279541015625, 0.1110992431640625, 0.14464569091796875, 0.178192138671875, 0.21173858642578125, 0.2452850341796875, 0.27883148193359375, 0.3123779296875, 0.34592437744140625, 0.3794708251953125, 0.41301727294921875, 0.446563720703125, 0.48011016845703125, 0.5136566162109375, 0.5472030639648438, 0.58074951171875, 0.6142959594726562, 0.6478424072265625, 0.6813888549804688, 0.714935302734375, 0.7484817504882812, 0.7820281982421875, 0.8155746459960938, 0.84912109375]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 8.0, 3.0, 6.0, 7.0, 6.0, 11.0, 14.0, 15.0, 14.0, 27.0, 27.0, 20.0, 20.0, 42.0, 36.0, 41.0, 52.0, 49.0, 54.0, 48.0, 55.0, 53.0, 41.0, 50.0, 39.0, 24.0, 35.0, 34.0, 31.0, 26.0, 25.0, 23.0, 14.0, 13.0, 12.0, 4.0, 8.0, 2.0, 4.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.33349609375, -0.32195281982421875, -0.3104095458984375, -0.29886627197265625, -0.287322998046875, -0.27577972412109375, -0.2642364501953125, -0.25269317626953125, -0.24114990234375, -0.22960662841796875, -0.2180633544921875, -0.20652008056640625, -0.194976806640625, -0.18343353271484375, -0.1718902587890625, -0.16034698486328125, -0.1488037109375, -0.13726043701171875, -0.1257171630859375, -0.11417388916015625, -0.102630615234375, -0.09108734130859375, -0.0795440673828125, -0.06800079345703125, -0.05645751953125, -0.04491424560546875, -0.0333709716796875, -0.02182769775390625, -0.010284423828125, 0.00125885009765625, 0.0128021240234375, 0.02434539794921875, 0.035888671875, 0.04743194580078125, 0.0589752197265625, 0.07051849365234375, 0.082061767578125, 0.09360504150390625, 0.1051483154296875, 0.11669158935546875, 0.12823486328125, 0.13977813720703125, 0.1513214111328125, 0.16286468505859375, 0.174407958984375, 0.18595123291015625, 0.1974945068359375, 0.20903778076171875, 0.2205810546875, 0.23212432861328125, 0.2436676025390625, 0.25521087646484375, 0.266754150390625, 0.27829742431640625, 0.2898406982421875, 0.30138397216796875, 0.31292724609375, 0.32447052001953125, 0.3360137939453125, 0.34755706787109375, 0.359100341796875, 0.37064361572265625, 0.3821868896484375, 0.39373016357421875, 0.4052734375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 4.0, 7.0, 12.0, 9.0, 23.0, 31.0, 37.0, 61.0, 106.0, 138.0, 237.0, 349.0, 557.0, 870.0, 1783.0, 4456.0, 21167.0, 229345.0, 697755.0, 75811.0, 9566.0, 2887.0, 1325.0, 769.0, 438.0, 264.0, 186.0, 106.0, 78.0, 43.0, 38.0, 32.0, 24.0, 13.0, 14.0, 3.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0], "bins": [-1.8408203125, -1.7951202392578125, -1.749420166015625, -1.7037200927734375, -1.65802001953125, -1.6123199462890625, -1.566619873046875, -1.5209197998046875, -1.4752197265625, -1.4295196533203125, -1.383819580078125, -1.3381195068359375, -1.29241943359375, -1.2467193603515625, -1.201019287109375, -1.1553192138671875, -1.109619140625, -1.0639190673828125, -1.018218994140625, -0.9725189208984375, -0.92681884765625, -0.8811187744140625, -0.835418701171875, -0.7897186279296875, -0.7440185546875, -0.6983184814453125, -0.652618408203125, -0.6069183349609375, -0.56121826171875, -0.5155181884765625, -0.469818115234375, -0.4241180419921875, -0.37841796875, -0.3327178955078125, -0.287017822265625, -0.2413177490234375, -0.19561767578125, -0.1499176025390625, -0.104217529296875, -0.0585174560546875, -0.0128173828125, 0.0328826904296875, 0.078582763671875, 0.1242828369140625, 0.16998291015625, 0.2156829833984375, 0.261383056640625, 0.3070831298828125, 0.352783203125, 0.3984832763671875, 0.444183349609375, 0.4898834228515625, 0.53558349609375, 0.5812835693359375, 0.626983642578125, 0.6726837158203125, 0.7183837890625, 0.7640838623046875, 0.809783935546875, 0.8554840087890625, 0.90118408203125, 0.9468841552734375, 0.992584228515625, 1.0382843017578125, 1.083984375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 7.0, 9.0, 14.0, 14.0, 9.0, 16.0, 24.0, 25.0, 32.0, 37.0, 38.0, 55.0, 48.0, 41.0, 43.0, 51.0, 52.0, 56.0, 60.0, 48.0, 50.0, 45.0, 36.0, 44.0, 26.0, 24.0, 21.0, 12.0, 15.0, 8.0, 11.0, 5.0, 2.0, 6.0, 4.0, 1.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.56640625, -1.514678955078125, -1.46295166015625, -1.411224365234375, -1.3594970703125, -1.307769775390625, -1.25604248046875, -1.204315185546875, -1.152587890625, -1.100860595703125, -1.04913330078125, -0.997406005859375, -0.9456787109375, -0.893951416015625, -0.84222412109375, -0.790496826171875, -0.73876953125, -0.687042236328125, -0.63531494140625, -0.583587646484375, -0.5318603515625, -0.480133056640625, -0.42840576171875, -0.376678466796875, -0.324951171875, -0.273223876953125, -0.22149658203125, -0.169769287109375, -0.1180419921875, -0.066314697265625, -0.01458740234375, 0.037139892578125, 0.0888671875, 0.140594482421875, 0.19232177734375, 0.244049072265625, 0.2957763671875, 0.347503662109375, 0.39923095703125, 0.450958251953125, 0.502685546875, 0.554412841796875, 0.60614013671875, 0.657867431640625, 0.7095947265625, 0.761322021484375, 0.81304931640625, 0.864776611328125, 0.91650390625, 0.968231201171875, 1.01995849609375, 1.071685791015625, 1.1234130859375, 1.175140380859375, 1.22686767578125, 1.278594970703125, 1.330322265625, 1.382049560546875, 1.43377685546875, 1.485504150390625, 1.5372314453125, 1.588958740234375, 1.64068603515625, 1.692413330078125, 1.744140625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 5.0, 5.0, 5.0, 4.0, 9.0, 12.0, 7.0, 13.0, 17.0, 19.0, 25.0, 51.0, 64.0, 121.0, 193.0, 370.0, 824.0, 2243.0, 8800.0, 67347.0, 726345.0, 215849.0, 19929.0, 3870.0, 1248.0, 504.0, 265.0, 137.0, 89.0, 35.0, 44.0, 22.0, 21.0, 20.0, 10.0, 7.0, 3.0, 4.0, 5.0, 4.0, 4.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.55517578125, -0.5364990234375, -0.517822265625, -0.4991455078125, -0.48046875, -0.4617919921875, -0.443115234375, -0.4244384765625, -0.40576171875, -0.3870849609375, -0.368408203125, -0.3497314453125, -0.3310546875, -0.3123779296875, -0.293701171875, -0.2750244140625, -0.25634765625, -0.2376708984375, -0.218994140625, -0.2003173828125, -0.181640625, -0.1629638671875, -0.144287109375, -0.1256103515625, -0.10693359375, -0.0882568359375, -0.069580078125, -0.0509033203125, -0.0322265625, -0.0135498046875, 0.005126953125, 0.0238037109375, 0.04248046875, 0.0611572265625, 0.079833984375, 0.0985107421875, 0.1171875, 0.1358642578125, 0.154541015625, 0.1732177734375, 0.19189453125, 0.2105712890625, 0.229248046875, 0.2479248046875, 0.2666015625, 0.2852783203125, 0.303955078125, 0.3226318359375, 0.34130859375, 0.3599853515625, 0.378662109375, 0.3973388671875, 0.416015625, 0.4346923828125, 0.453369140625, 0.4720458984375, 0.49072265625, 0.5093994140625, 0.528076171875, 0.5467529296875, 0.5654296875, 0.5841064453125, 0.602783203125, 0.6214599609375, 0.64013671875]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 1.0, 1.0, 5.0, 3.0, 1.0, 3.0, 8.0, 9.0, 14.0, 10.0, 12.0, 13.0, 19.0, 29.0, 38.0, 41.0, 69.0, 80.0, 78.0, 118.0, 91.0, 105.0, 48.0, 50.0, 34.0, 21.0, 33.0, 14.0, 17.0, 9.0, 9.0, 7.0, 4.0, 9.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010442733764648438, -0.00010148249566555023, -9.853765368461609e-05, -9.559281170368195e-05, -9.26479697227478e-05, -8.970312774181366e-05, -8.675828576087952e-05, -8.381344377994537e-05, -8.086860179901123e-05, -7.792375981807709e-05, -7.497891783714294e-05, -7.20340758562088e-05, -6.908923387527466e-05, -6.614439189434052e-05, -6.319954991340637e-05, -6.025470793247223e-05, -5.7309865951538086e-05, -5.436502397060394e-05, -5.14201819896698e-05, -4.847534000873566e-05, -4.5530498027801514e-05, -4.258565604686737e-05, -3.964081406593323e-05, -3.6695972084999084e-05, -3.375113010406494e-05, -3.08062881231308e-05, -2.7861446142196655e-05, -2.4916604161262512e-05, -2.197176218032837e-05, -1.9026920199394226e-05, -1.6082078218460083e-05, -1.313723623752594e-05, -1.0192394256591797e-05, -7.247552275657654e-06, -4.302710294723511e-06, -1.3578683137893677e-06, 1.5869736671447754e-06, 4.5318156480789185e-06, 7.4766576290130615e-06, 1.0421499609947205e-05, 1.3366341590881348e-05, 1.631118357181549e-05, 1.9256025552749634e-05, 2.2200867533683777e-05, 2.514570951461792e-05, 2.8090551495552063e-05, 3.1035393476486206e-05, 3.398023545742035e-05, 3.692507743835449e-05, 3.9869919419288635e-05, 4.281476140022278e-05, 4.575960338115692e-05, 4.8704445362091064e-05, 5.164928734302521e-05, 5.459412932395935e-05, 5.7538971304893494e-05, 6.048381328582764e-05, 6.342865526676178e-05, 6.637349724769592e-05, 6.931833922863007e-05, 7.226318120956421e-05, 7.520802319049835e-05, 7.81528651714325e-05, 8.109770715236664e-05, 8.404254913330078e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 6.0, 6.0, 16.0, 22.0, 32.0, 44.0, 83.0, 127.0, 248.0, 471.0, 1060.0, 3048.0, 14583.0, 194610.0, 767026.0, 56962.0, 6808.0, 1878.0, 740.0, 355.0, 184.0, 83.0, 63.0, 38.0, 23.0, 10.0, 11.0, 7.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.68310546875, -0.6619491577148438, -0.6407928466796875, -0.6196365356445312, -0.598480224609375, -0.5773239135742188, -0.5561676025390625, -0.5350112915039062, -0.51385498046875, -0.49269866943359375, -0.4715423583984375, -0.45038604736328125, -0.429229736328125, -0.40807342529296875, -0.3869171142578125, -0.36576080322265625, -0.3446044921875, -0.32344818115234375, -0.3022918701171875, -0.28113555908203125, -0.259979248046875, -0.23882293701171875, -0.2176666259765625, -0.19651031494140625, -0.17535400390625, -0.15419769287109375, -0.1330413818359375, -0.11188507080078125, -0.090728759765625, -0.06957244873046875, -0.0484161376953125, -0.02725982666015625, -0.006103515625, 0.01505279541015625, 0.0362091064453125, 0.05736541748046875, 0.078521728515625, 0.09967803955078125, 0.1208343505859375, 0.14199066162109375, 0.16314697265625, 0.18430328369140625, 0.2054595947265625, 0.22661590576171875, 0.247772216796875, 0.26892852783203125, 0.2900848388671875, 0.31124114990234375, 0.3323974609375, 0.35355377197265625, 0.3747100830078125, 0.39586639404296875, 0.417022705078125, 0.43817901611328125, 0.4593353271484375, 0.48049163818359375, 0.50164794921875, 0.5228042602539062, 0.5439605712890625, 0.5651168823242188, 0.586273193359375, 0.6074295043945312, 0.6285858154296875, 0.6497421264648438, 0.6708984375]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 3.0, 0.0, 4.0, 3.0, 7.0, 2.0, 1.0, 7.0, 7.0, 11.0, 18.0, 14.0, 17.0, 30.0, 24.0, 35.0, 36.0, 44.0, 47.0, 56.0, 55.0, 73.0, 56.0, 50.0, 54.0, 57.0, 56.0, 38.0, 31.0, 37.0, 26.0, 26.0, 15.0, 12.0, 15.0, 10.0, 9.0, 4.0, 2.0, 5.0, 4.0, 2.0, 6.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.46337890625, -0.4516754150390625, -0.439971923828125, -0.4282684326171875, -0.41656494140625, -0.4048614501953125, -0.393157958984375, -0.3814544677734375, -0.3697509765625, -0.3580474853515625, -0.346343994140625, -0.3346405029296875, -0.32293701171875, -0.3112335205078125, -0.299530029296875, -0.2878265380859375, -0.276123046875, -0.2644195556640625, -0.252716064453125, -0.2410125732421875, -0.22930908203125, -0.2176055908203125, -0.205902099609375, -0.1941986083984375, -0.1824951171875, -0.1707916259765625, -0.159088134765625, -0.1473846435546875, -0.13568115234375, -0.1239776611328125, -0.112274169921875, -0.1005706787109375, -0.0888671875, -0.0771636962890625, -0.065460205078125, -0.0537567138671875, -0.04205322265625, -0.0303497314453125, -0.018646240234375, -0.0069427490234375, 0.0047607421875, 0.0164642333984375, 0.028167724609375, 0.0398712158203125, 0.05157470703125, 0.0632781982421875, 0.074981689453125, 0.0866851806640625, 0.098388671875, 0.1100921630859375, 0.121795654296875, 0.1334991455078125, 0.14520263671875, 0.1569061279296875, 0.168609619140625, 0.1803131103515625, 0.1920166015625, 0.2037200927734375, 0.215423583984375, 0.2271270751953125, 0.23883056640625, 0.2505340576171875, 0.262237548828125, 0.2739410400390625, 0.28564453125]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 7.0, 28.0, 20.0, 52.0, 106.0, 143.0, 178.0, 168.0, 139.0, 89.0, 34.0, 10.0, 13.0, 2.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-13.555559158325195, -13.2539644241333, -12.952369689941406, -12.650774955749512, -12.349180221557617, -12.047586441040039, -11.745991706848145, -11.44439697265625, -11.142802238464355, -10.841207504272461, -10.539612770080566, -10.238018035888672, -9.936424255371094, -9.6348295211792, -9.333234786987305, -9.03164005279541, -8.730045318603516, -8.428450584411621, -8.126855850219727, -7.82526159286499, -7.523666858673096, -7.222072124481201, -6.920477867126465, -6.61888313293457, -6.317288398742676, -6.015693664550781, -5.714098930358887, -5.41250467300415, -5.110909938812256, -4.809315204620361, -4.507720947265625, -4.2061262130737305, -3.904531955718994, -3.6029372215270996, -3.301342725753784, -2.9997482299804688, -2.698153495788574, -2.3965587615966797, -2.0949642658233643, -1.7933697700500488, -1.4917750358581543, -1.1901804208755493, -0.8885858058929443, -0.5869911909103394, -0.2853965759277344, 0.016198039054870605, 0.3177926540374756, 0.619387149810791, 0.9209818840026855, 1.2225764989852905, 1.5241711139678955, 1.8257657289505005, 2.1273603439331055, 2.428955078125, 2.7305495738983154, 3.032144069671631, 3.3337388038635254, 3.63533353805542, 3.9369280338287354, 4.238522529602051, 4.540117263793945, 4.84171199798584, 5.143306732177734, 5.444900989532471, 5.746495723724365]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 10.0, 5.0, 5.0, 7.0, 7.0, 17.0, 13.0, 21.0, 19.0, 29.0, 29.0, 36.0, 47.0, 39.0, 60.0, 81.0, 75.0, 83.0, 66.0, 44.0, 56.0, 44.0, 47.0, 44.0, 20.0, 22.0, 11.0, 17.0, 15.0, 9.0, 9.0, 6.0, 2.0, 1.0, 3.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-9.460855484008789, -9.210884094238281, -8.960912704467773, -8.710941314697266, -8.460969924926758, -8.21099853515625, -7.961026668548584, -7.711055278778076, -7.461083889007568, -7.2111124992370605, -6.961141109466553, -6.711169719696045, -6.461197853088379, -6.211226463317871, -5.961255073547363, -5.7112836837768555, -5.461312294006348, -5.21134090423584, -4.961369514465332, -4.711398124694824, -4.461426734924316, -4.211455345153809, -3.9614834785461426, -3.7115120887756348, -3.461540699005127, -3.211569309234619, -2.9615979194641113, -2.7116262912750244, -2.4616549015045166, -2.211683511734009, -1.9617120027542114, -1.711740493774414, -1.4617695808410645, -1.2117981910705566, -0.9618266820907593, -0.7118552327156067, -0.4618837833404541, -0.2119123935699463, 0.038059115409851074, 0.28803062438964844, 0.5380020141601562, 0.7879734635353088, 1.0379449129104614, 1.2879164218902588, 1.5378878116607666, 1.7878592014312744, 2.0378308296203613, 2.287802219390869, 2.537773609161377, 2.7877449989318848, 3.0377163887023926, 3.2876880168914795, 3.5376594066619873, 3.787630796432495, 4.037602424621582, 4.28757381439209, 4.537545204162598, 4.7875165939331055, 5.037487983703613, 5.287459373474121, 5.537430763244629, 5.787402153015137, 6.037374019622803, 6.2873454093933105, 6.537316799163818]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0, 4.0, 6.0, 7.0, 8.0, 19.0, 23.0, 26.0, 31.0, 42.0, 68.0, 65.0, 108.0, 146.0, 221.0, 474.0, 1182.0, 4962.0, 43933.0, 3606888.0, 514819.0, 16885.0, 2639.0, 736.0, 329.0, 191.0, 124.0, 91.0, 54.0, 53.0, 38.0, 30.0, 13.0, 11.0, 15.0, 7.0, 11.0, 6.0, 3.0, 5.0, 2.0, 5.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0], "bins": [-1.884765625, -1.832122802734375, -1.77947998046875, -1.726837158203125, -1.6741943359375, -1.621551513671875, -1.56890869140625, -1.516265869140625, -1.463623046875, -1.410980224609375, -1.35833740234375, -1.305694580078125, -1.2530517578125, -1.200408935546875, -1.14776611328125, -1.095123291015625, -1.04248046875, -0.989837646484375, -0.93719482421875, -0.884552001953125, -0.8319091796875, -0.779266357421875, -0.72662353515625, -0.673980712890625, -0.621337890625, -0.568695068359375, -0.51605224609375, -0.463409423828125, -0.4107666015625, -0.358123779296875, -0.30548095703125, -0.252838134765625, -0.2001953125, -0.147552490234375, -0.09490966796875, -0.042266845703125, 0.0103759765625, 0.063018798828125, 0.11566162109375, 0.168304443359375, 0.220947265625, 0.273590087890625, 0.32623291015625, 0.378875732421875, 0.4315185546875, 0.484161376953125, 0.53680419921875, 0.589447021484375, 0.64208984375, 0.694732666015625, 0.74737548828125, 0.800018310546875, 0.8526611328125, 0.905303955078125, 0.95794677734375, 1.010589599609375, 1.063232421875, 1.115875244140625, 1.16851806640625, 1.221160888671875, 1.2738037109375, 1.326446533203125, 1.37908935546875, 1.431732177734375, 1.484375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 5.0, 3.0, 4.0, 8.0, 12.0, 15.0, 17.0, 21.0, 21.0, 42.0, 47.0, 62.0, 80.0, 69.0, 69.0, 84.0, 86.0, 86.0, 62.0, 53.0, 39.0, 46.0, 27.0, 20.0, 10.0, 2.0, 8.0, 4.0, 1.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.50927734375, -0.4910888671875, -0.472900390625, -0.4547119140625, -0.4365234375, -0.4183349609375, -0.400146484375, -0.3819580078125, -0.36376953125, -0.3455810546875, -0.327392578125, -0.3092041015625, -0.291015625, -0.2728271484375, -0.254638671875, -0.2364501953125, -0.21826171875, -0.2000732421875, -0.181884765625, -0.1636962890625, -0.1455078125, -0.1273193359375, -0.109130859375, -0.0909423828125, -0.07275390625, -0.0545654296875, -0.036376953125, -0.0181884765625, 0.0, 0.0181884765625, 0.036376953125, 0.0545654296875, 0.07275390625, 0.0909423828125, 0.109130859375, 0.1273193359375, 0.1455078125, 0.1636962890625, 0.181884765625, 0.2000732421875, 0.21826171875, 0.2364501953125, 0.254638671875, 0.2728271484375, 0.291015625, 0.3092041015625, 0.327392578125, 0.3455810546875, 0.36376953125, 0.3819580078125, 0.400146484375, 0.4183349609375, 0.4365234375, 0.4547119140625, 0.472900390625, 0.4910888671875, 0.50927734375, 0.5274658203125, 0.545654296875, 0.5638427734375, 0.58203125, 0.6002197265625, 0.618408203125, 0.6365966796875, 0.65478515625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 5.0, 8.0, 13.0, 14.0, 18.0, 24.0, 42.0, 72.0, 108.0, 152.0, 280.0, 571.0, 1101.0, 2353.0, 5288.0, 13897.0, 46963.0, 308845.0, 3205780.0, 512751.0, 64021.0, 18409.0, 7136.0, 3131.0, 1520.0, 781.0, 422.0, 216.0, 127.0, 75.0, 40.0, 39.0, 19.0, 13.0, 11.0, 11.0, 7.0, 4.0, 3.0, 4.0, 4.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.84375, -0.8193588256835938, -0.7949676513671875, -0.7705764770507812, -0.746185302734375, -0.7217941284179688, -0.6974029541015625, -0.6730117797851562, -0.64862060546875, -0.6242294311523438, -0.5998382568359375, -0.5754470825195312, -0.551055908203125, -0.5266647338867188, -0.5022735595703125, -0.47788238525390625, -0.4534912109375, -0.42910003662109375, -0.4047088623046875, -0.38031768798828125, -0.355926513671875, -0.33153533935546875, -0.3071441650390625, -0.28275299072265625, -0.25836181640625, -0.23397064208984375, -0.2095794677734375, -0.18518829345703125, -0.160797119140625, -0.13640594482421875, -0.1120147705078125, -0.08762359619140625, -0.063232421875, -0.03884124755859375, -0.0144500732421875, 0.00994110107421875, 0.034332275390625, 0.05872344970703125, 0.0831146240234375, 0.10750579833984375, 0.13189697265625, 0.15628814697265625, 0.1806793212890625, 0.20507049560546875, 0.229461669921875, 0.25385284423828125, 0.2782440185546875, 0.30263519287109375, 0.3270263671875, 0.35141754150390625, 0.3758087158203125, 0.40019989013671875, 0.424591064453125, 0.44898223876953125, 0.4733734130859375, 0.49776458740234375, 0.52215576171875, 0.5465469360351562, 0.5709381103515625, 0.5953292846679688, 0.619720458984375, 0.6441116333007812, 0.6685028076171875, 0.6928939819335938, 0.71728515625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 2.0, 5.0, 7.0, 14.0, 17.0, 25.0, 43.0, 73.0, 97.0, 176.0, 339.0, 570.0, 860.0, 741.0, 455.0, 226.0, 137.0, 74.0, 48.0, 36.0, 21.0, 16.0, 13.0, 18.0, 9.0, 11.0, 6.0, 1.0, 4.0, 5.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.8662109375, -0.83892822265625, -0.8116455078125, -0.78436279296875, -0.757080078125, -0.72979736328125, -0.7025146484375, -0.67523193359375, -0.64794921875, -0.62066650390625, -0.5933837890625, -0.56610107421875, -0.538818359375, -0.51153564453125, -0.4842529296875, -0.45697021484375, -0.4296875, -0.40240478515625, -0.3751220703125, -0.34783935546875, -0.320556640625, -0.29327392578125, -0.2659912109375, -0.23870849609375, -0.21142578125, -0.18414306640625, -0.1568603515625, -0.12957763671875, -0.102294921875, -0.07501220703125, -0.0477294921875, -0.02044677734375, 0.0068359375, 0.03411865234375, 0.0614013671875, 0.08868408203125, 0.115966796875, 0.14324951171875, 0.1705322265625, 0.19781494140625, 0.22509765625, 0.25238037109375, 0.2796630859375, 0.30694580078125, 0.334228515625, 0.36151123046875, 0.3887939453125, 0.41607666015625, 0.443359375, 0.47064208984375, 0.4979248046875, 0.52520751953125, 0.552490234375, 0.57977294921875, 0.6070556640625, 0.63433837890625, 0.66162109375, 0.68890380859375, 0.7161865234375, 0.74346923828125, 0.770751953125, 0.79803466796875, 0.8253173828125, 0.85260009765625, 0.8798828125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 5.0, 8.0, 34.0, 127.0, 341.0, 311.0, 121.0, 28.0, 12.0, 6.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.42849349975586, -17.928373336791992, -17.428253173828125, -16.92813491821289, -16.428014755249023, -15.927894592285156, -15.427775382995605, -14.927656173706055, -14.427536010742188, -13.92741584777832, -13.42729663848877, -12.927177429199219, -12.427057266235352, -11.926937103271484, -11.426817893981934, -10.926698684692383, -10.426578521728516, -9.926458358764648, -9.426339149475098, -8.926219940185547, -8.42609977722168, -7.925980091094971, -7.425860404968262, -6.925740718841553, -6.425621032714844, -5.925501346588135, -5.425381660461426, -4.925261974334717, -4.425142288208008, -3.925022602081299, -3.42490291595459, -2.924783229827881, -2.424661636352539, -1.92454195022583, -1.424422264099121, -0.9243025779724121, -0.4241828918457031, 0.07593679428100586, 0.5760564804077148, 1.0761761665344238, 1.5762958526611328, 2.076415538787842, 2.576535224914551, 3.0766549110412598, 3.5767745971679688, 4.076894283294678, 4.577013969421387, 5.077133655548096, 5.577253341674805, 6.077373027801514, 6.577492713928223, 7.077612400054932, 7.577732086181641, 8.077852249145508, 8.577971458435059, 9.07809066772461, 9.578210830688477, 10.078330993652344, 10.578450202941895, 11.078569412231445, 11.578689575195312, 12.07880973815918, 12.57892894744873, 13.079048156738281, 13.579168319702148]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 0.0, 3.0, 4.0, 4.0, 12.0, 17.0, 18.0, 27.0, 40.0, 48.0, 33.0, 43.0, 50.0, 64.0, 50.0, 69.0, 61.0, 72.0, 48.0, 51.0, 47.0, 35.0, 47.0, 31.0, 27.0, 28.0, 16.0, 9.0, 9.0, 7.0, 9.0, 8.0, 5.0, 2.0, 2.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.135585784912109, -5.9689154624938965, -5.802245616912842, -5.635575294494629, -5.468905448913574, -5.302235126495361, -5.135564804077148, -4.968894958496094, -4.802224636077881, -4.635554313659668, -4.468884468078613, -4.3022141456604, -4.135544300079346, -3.968873977661133, -3.802203893661499, -3.6355338096618652, -3.4688637256622314, -3.3021936416625977, -3.135523557662964, -2.96885347366333, -2.802183151245117, -2.6355130672454834, -2.4688429832458496, -2.3021726608276367, -2.135502815246582, -1.9688327312469482, -1.802162528038025, -1.6354924440383911, -1.4688222408294678, -1.302152156829834, -1.1354820728302002, -0.9688118696212769, -0.8021416664123535, -0.635471522808075, -0.4688014090061188, -0.3021312952041626, -0.13546115159988403, 0.03120899200439453, 0.19787907600402832, 0.36454927921295166, 0.5312193632125854, 0.697889506816864, 0.8645596504211426, 1.0312297344207764, 1.1978998184204102, 1.3645700216293335, 1.5312401056289673, 1.6979103088378906, 1.8645803928375244, 2.031250476837158, 2.197920560836792, 2.364590644836426, 2.5312609672546387, 2.6979310512542725, 2.8646011352539062, 3.031271457672119, 3.197941303253174, 3.3646113872528076, 3.5312814712524414, 3.6979517936706543, 3.864621877670288, 4.031291961669922, 4.197961807250977, 4.3646321296691895, 4.531302452087402]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 4.0, 4.0, 2.0, 18.0, 13.0, 23.0, 45.0, 72.0, 132.0, 263.0, 607.0, 1273.0, 3343.0, 9624.0, 33429.0, 137254.0, 458068.0, 303430.0, 72172.0, 18940.0, 5952.0, 2152.0, 892.0, 352.0, 216.0, 100.0, 65.0, 29.0, 20.0, 18.0, 9.0, 11.0, 4.0, 6.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93994140625, -0.9083786010742188, -0.8768157958984375, -0.8452529907226562, -0.813690185546875, -0.7821273803710938, -0.7505645751953125, -0.7190017700195312, -0.68743896484375, -0.6558761596679688, -0.6243133544921875, -0.5927505493164062, -0.561187744140625, -0.5296249389648438, -0.4980621337890625, -0.46649932861328125, -0.4349365234375, -0.40337371826171875, -0.3718109130859375, -0.34024810791015625, -0.308685302734375, -0.27712249755859375, -0.2455596923828125, -0.21399688720703125, -0.18243408203125, -0.15087127685546875, -0.1193084716796875, -0.08774566650390625, -0.056182861328125, -0.02462005615234375, 0.0069427490234375, 0.03850555419921875, 0.070068359375, 0.10163116455078125, 0.1331939697265625, 0.16475677490234375, 0.196319580078125, 0.22788238525390625, 0.2594451904296875, 0.29100799560546875, 0.32257080078125, 0.35413360595703125, 0.3856964111328125, 0.41725921630859375, 0.448822021484375, 0.48038482666015625, 0.5119476318359375, 0.5435104370117188, 0.5750732421875, 0.6066360473632812, 0.6381988525390625, 0.6697616577148438, 0.701324462890625, 0.7328872680664062, 0.7644500732421875, 0.7960128784179688, 0.82757568359375, 0.8591384887695312, 0.8907012939453125, 0.9222640991210938, 0.953826904296875, 0.9853897094726562, 1.0169525146484375, 1.0485153198242188, 1.080078125]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 4.0, 1.0, 10.0, 8.0, 10.0, 13.0, 30.0, 22.0, 25.0, 30.0, 42.0, 45.0, 65.0, 69.0, 85.0, 83.0, 64.0, 75.0, 57.0, 59.0, 58.0, 42.0, 30.0, 32.0, 17.0, 14.0, 9.0, 3.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.44189453125, -0.42455291748046875, -0.4072113037109375, -0.38986968994140625, -0.372528076171875, -0.35518646240234375, -0.3378448486328125, -0.32050323486328125, -0.30316162109375, -0.28582000732421875, -0.2684783935546875, -0.25113677978515625, -0.233795166015625, -0.21645355224609375, -0.1991119384765625, -0.18177032470703125, -0.1644287109375, -0.14708709716796875, -0.1297454833984375, -0.11240386962890625, -0.095062255859375, -0.07772064208984375, -0.0603790283203125, -0.04303741455078125, -0.02569580078125, -0.00835418701171875, 0.0089874267578125, 0.02632904052734375, 0.043670654296875, 0.06101226806640625, 0.0783538818359375, 0.09569549560546875, 0.113037109375, 0.13037872314453125, 0.1477203369140625, 0.16506195068359375, 0.182403564453125, 0.19974517822265625, 0.2170867919921875, 0.23442840576171875, 0.25177001953125, 0.26911163330078125, 0.2864532470703125, 0.30379486083984375, 0.321136474609375, 0.33847808837890625, 0.3558197021484375, 0.37316131591796875, 0.3905029296875, 0.40784454345703125, 0.4251861572265625, 0.44252777099609375, 0.459869384765625, 0.47721099853515625, 0.4945526123046875, 0.5118942260742188, 0.52923583984375, 0.5465774536132812, 0.5639190673828125, 0.5812606811523438, 0.598602294921875, 0.6159439086914062, 0.6332855224609375, 0.6506271362304688, 0.66796875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 4.0, 6.0, 4.0, 13.0, 9.0, 17.0, 19.0, 47.0, 75.0, 90.0, 136.0, 228.0, 398.0, 727.0, 1636.0, 4645.0, 24028.0, 234389.0, 687622.0, 78835.0, 10362.0, 2737.0, 1164.0, 528.0, 289.0, 177.0, 124.0, 68.0, 50.0, 38.0, 19.0, 21.0, 16.0, 11.0, 3.0, 6.0, 4.0, 5.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4287109375, -1.383026123046875, -1.33734130859375, -1.291656494140625, -1.2459716796875, -1.200286865234375, -1.15460205078125, -1.108917236328125, -1.063232421875, -1.017547607421875, -0.97186279296875, -0.926177978515625, -0.8804931640625, -0.834808349609375, -0.78912353515625, -0.743438720703125, -0.69775390625, -0.652069091796875, -0.60638427734375, -0.560699462890625, -0.5150146484375, -0.469329833984375, -0.42364501953125, -0.377960205078125, -0.332275390625, -0.286590576171875, -0.24090576171875, -0.195220947265625, -0.1495361328125, -0.103851318359375, -0.05816650390625, -0.012481689453125, 0.033203125, 0.078887939453125, 0.12457275390625, 0.170257568359375, 0.2159423828125, 0.261627197265625, 0.30731201171875, 0.352996826171875, 0.398681640625, 0.444366455078125, 0.49005126953125, 0.535736083984375, 0.5814208984375, 0.627105712890625, 0.67279052734375, 0.718475341796875, 0.76416015625, 0.809844970703125, 0.85552978515625, 0.901214599609375, 0.9468994140625, 0.992584228515625, 1.03826904296875, 1.083953857421875, 1.129638671875, 1.175323486328125, 1.22100830078125, 1.266693115234375, 1.3123779296875, 1.358062744140625, 1.40374755859375, 1.449432373046875, 1.4951171875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 5.0, 8.0, 15.0, 19.0, 16.0, 29.0, 32.0, 29.0, 37.0, 35.0, 40.0, 48.0, 57.0, 64.0, 62.0, 64.0, 43.0, 52.0, 58.0, 50.0, 43.0, 35.0, 31.0, 30.0, 22.0, 16.0, 12.0, 14.0, 11.0, 2.0, 6.0, 3.0, 3.0, 3.0, 1.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.599609375, -1.542816162109375, -1.48602294921875, -1.429229736328125, -1.3724365234375, -1.315643310546875, -1.25885009765625, -1.202056884765625, -1.145263671875, -1.088470458984375, -1.03167724609375, -0.974884033203125, -0.9180908203125, -0.861297607421875, -0.80450439453125, -0.747711181640625, -0.69091796875, -0.634124755859375, -0.57733154296875, -0.520538330078125, -0.4637451171875, -0.406951904296875, -0.35015869140625, -0.293365478515625, -0.236572265625, -0.179779052734375, -0.12298583984375, -0.066192626953125, -0.0093994140625, 0.047393798828125, 0.10418701171875, 0.160980224609375, 0.2177734375, 0.274566650390625, 0.33135986328125, 0.388153076171875, 0.4449462890625, 0.501739501953125, 0.55853271484375, 0.615325927734375, 0.672119140625, 0.728912353515625, 0.78570556640625, 0.842498779296875, 0.8992919921875, 0.956085205078125, 1.01287841796875, 1.069671630859375, 1.12646484375, 1.183258056640625, 1.24005126953125, 1.296844482421875, 1.3536376953125, 1.410430908203125, 1.46722412109375, 1.524017333984375, 1.580810546875, 1.637603759765625, 1.69439697265625, 1.751190185546875, 1.8079833984375, 1.864776611328125, 1.92156982421875, 1.978363037109375, 2.03515625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 7.0, 4.0, 5.0, 6.0, 6.0, 15.0, 31.0, 64.0, 115.0, 291.0, 936.0, 5584.0, 220089.0, 804829.0, 14237.0, 1586.0, 408.0, 162.0, 75.0, 33.0, 22.0, 16.0, 9.0, 6.0, 8.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0439453125, -1.00933837890625, -0.9747314453125, -0.94012451171875, -0.905517578125, -0.87091064453125, -0.8363037109375, -0.80169677734375, -0.76708984375, -0.73248291015625, -0.6978759765625, -0.66326904296875, -0.628662109375, -0.59405517578125, -0.5594482421875, -0.52484130859375, -0.490234375, -0.45562744140625, -0.4210205078125, -0.38641357421875, -0.351806640625, -0.31719970703125, -0.2825927734375, -0.24798583984375, -0.21337890625, -0.17877197265625, -0.1441650390625, -0.10955810546875, -0.074951171875, -0.04034423828125, -0.0057373046875, 0.02886962890625, 0.0634765625, 0.09808349609375, 0.1326904296875, 0.16729736328125, 0.201904296875, 0.23651123046875, 0.2711181640625, 0.30572509765625, 0.34033203125, 0.37493896484375, 0.4095458984375, 0.44415283203125, 0.478759765625, 0.51336669921875, 0.5479736328125, 0.58258056640625, 0.6171875, 0.65179443359375, 0.6864013671875, 0.72100830078125, 0.755615234375, 0.79022216796875, 0.8248291015625, 0.85943603515625, 0.89404296875, 0.92864990234375, 0.9632568359375, 0.99786376953125, 1.032470703125, 1.06707763671875, 1.1016845703125, 1.13629150390625, 1.1708984375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 5.0, 7.0, 13.0, 18.0, 30.0, 45.0, 64.0, 108.0, 121.0, 139.0, 122.0, 110.0, 78.0, 44.0, 29.0, 26.0, 14.0, 9.0, 5.0, 5.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012564659118652344, -0.00012168660759925842, -0.00011772662401199341, -0.0001137666404247284, -0.00010980665683746338, -0.00010584667325019836, -0.00010188668966293335, -9.792670607566833e-05, -9.396672248840332e-05, -9.00067389011383e-05, -8.604675531387329e-05, -8.208677172660828e-05, -7.812678813934326e-05, -7.416680455207825e-05, -7.020682096481323e-05, -6.624683737754822e-05, -6.22868537902832e-05, -5.832687020301819e-05, -5.4366886615753174e-05, -5.040690302848816e-05, -4.6446919441223145e-05, -4.248693585395813e-05, -3.8526952266693115e-05, -3.45669686794281e-05, -3.0606985092163086e-05, -2.664700150489807e-05, -2.2687017917633057e-05, -1.8727034330368042e-05, -1.4767050743103027e-05, -1.0807067155838013e-05, -6.847083568572998e-06, -2.8870999813079834e-06, 1.0728836059570312e-06, 5.032867193222046e-06, 8.99285078048706e-06, 1.2952834367752075e-05, 1.691281795501709e-05, 2.0872801542282104e-05, 2.483278512954712e-05, 2.8792768716812134e-05, 3.275275230407715e-05, 3.671273589134216e-05, 4.067271947860718e-05, 4.463270306587219e-05, 4.859268665313721e-05, 5.255267024040222e-05, 5.6512653827667236e-05, 6.047263741493225e-05, 6.443262100219727e-05, 6.839260458946228e-05, 7.23525881767273e-05, 7.631257176399231e-05, 8.027255535125732e-05, 8.423253893852234e-05, 8.819252252578735e-05, 9.215250611305237e-05, 9.611248970031738e-05, 0.0001000724732875824, 0.00010403245687484741, 0.00010799244046211243, 0.00011195242404937744, 0.00011591240763664246, 0.00011987239122390747, 0.00012383237481117249, 0.0001277923583984375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 7.0, 9.0, 9.0, 12.0, 17.0, 29.0, 46.0, 68.0, 101.0, 150.0, 284.0, 596.0, 1228.0, 3278.0, 12226.0, 91897.0, 683452.0, 223140.0, 23736.0, 4915.0, 1719.0, 734.0, 372.0, 210.0, 117.0, 64.0, 50.0, 23.0, 14.0, 15.0, 15.0, 8.0, 3.0, 4.0, 4.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.56103515625, -0.54486083984375, -0.5286865234375, -0.51251220703125, -0.496337890625, -0.48016357421875, -0.4639892578125, -0.44781494140625, -0.431640625, -0.41546630859375, -0.3992919921875, -0.38311767578125, -0.366943359375, -0.35076904296875, -0.3345947265625, -0.31842041015625, -0.30224609375, -0.28607177734375, -0.2698974609375, -0.25372314453125, -0.237548828125, -0.22137451171875, -0.2052001953125, -0.18902587890625, -0.1728515625, -0.15667724609375, -0.1405029296875, -0.12432861328125, -0.108154296875, -0.09197998046875, -0.0758056640625, -0.05963134765625, -0.04345703125, -0.02728271484375, -0.0111083984375, 0.00506591796875, 0.021240234375, 0.03741455078125, 0.0535888671875, 0.06976318359375, 0.0859375, 0.10211181640625, 0.1182861328125, 0.13446044921875, 0.150634765625, 0.16680908203125, 0.1829833984375, 0.19915771484375, 0.21533203125, 0.23150634765625, 0.2476806640625, 0.26385498046875, 0.280029296875, 0.29620361328125, 0.3123779296875, 0.32855224609375, 0.3447265625, 0.36090087890625, 0.3770751953125, 0.39324951171875, 0.409423828125, 0.42559814453125, 0.4417724609375, 0.45794677734375, 0.47412109375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 10.0, 10.0, 10.0, 24.0, 25.0, 46.0, 40.0, 70.0, 90.0, 94.0, 105.0, 85.0, 91.0, 84.0, 59.0, 45.0, 37.0, 13.0, 21.0, 13.0, 9.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.66748046875, -0.6488418579101562, -0.6302032470703125, -0.6115646362304688, -0.592926025390625, -0.5742874145507812, -0.5556488037109375, -0.5370101928710938, -0.51837158203125, -0.49973297119140625, -0.4810943603515625, -0.46245574951171875, -0.443817138671875, -0.42517852783203125, -0.4065399169921875, -0.38790130615234375, -0.3692626953125, -0.35062408447265625, -0.3319854736328125, -0.31334686279296875, -0.294708251953125, -0.27606964111328125, -0.2574310302734375, -0.23879241943359375, -0.22015380859375, -0.20151519775390625, -0.1828765869140625, -0.16423797607421875, -0.145599365234375, -0.12696075439453125, -0.1083221435546875, -0.08968353271484375, -0.071044921875, -0.05240631103515625, -0.0337677001953125, -0.01512908935546875, 0.003509521484375, 0.02214813232421875, 0.0407867431640625, 0.05942535400390625, 0.07806396484375, 0.09670257568359375, 0.1153411865234375, 0.13397979736328125, 0.152618408203125, 0.17125701904296875, 0.1898956298828125, 0.20853424072265625, 0.2271728515625, 0.24581146240234375, 0.2644500732421875, 0.28308868408203125, 0.301727294921875, 0.32036590576171875, 0.3390045166015625, 0.35764312744140625, 0.37628173828125, 0.39492034912109375, 0.4135589599609375, 0.43219757080078125, 0.450836181640625, 0.46947479248046875, 0.4881134033203125, 0.5067520141601562, 0.525390625]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 6.0, 2.0, 6.0, 12.0, 40.0, 108.0, 223.0, 300.0, 182.0, 79.0, 23.0, 10.0, 6.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-19.266334533691406, -18.833110809326172, -18.399887084960938, -17.966663360595703, -17.53343963623047, -17.100215911865234, -16.6669921875, -16.233768463134766, -15.800543785095215, -15.36732006072998, -14.934096336364746, -14.500872611999512, -14.067647933959961, -13.634424209594727, -13.201200485229492, -12.767976760864258, -12.334753036499023, -11.901529312133789, -11.468305587768555, -11.03508186340332, -10.601858139038086, -10.168634414672852, -9.7354097366333, -9.302186012268066, -8.868962287902832, -8.435738563537598, -8.002514839172363, -7.569290637969971, -7.136066913604736, -6.702843189239502, -6.269618988037109, -5.836395263671875, -5.403171539306641, -4.969947814941406, -4.536724090576172, -4.103499889373779, -3.670276165008545, -3.2370524406433105, -2.803828477859497, -2.3706045150756836, -1.9373807907104492, -1.5041569471359253, -1.0709331035614014, -0.6377092599868774, -0.20448541641235352, 0.22873830795288086, 0.6619622707366943, 1.0951862335205078, 1.5284099578857422, 1.9616338014602661, 2.39485764503479, 2.8280816078186035, 3.261305332183838, 3.6945290565490723, 4.127753257751465, 4.560976982116699, 4.994200706481934, 5.427424430847168, 5.860648155212402, 6.293872356414795, 6.727096080780029, 7.160319805145264, 7.593544006347656, 8.02676773071289, 8.459991455078125]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 3.0, 10.0, 13.0, 22.0, 22.0, 31.0, 45.0, 55.0, 81.0, 76.0, 93.0, 107.0, 98.0, 74.0, 70.0, 53.0, 42.0, 33.0, 17.0, 16.0, 10.0, 16.0, 6.0, 7.0, 2.0, 3.0, 4.0], "bins": [-18.6641902923584, -18.279586791992188, -17.894981384277344, -17.510377883911133, -17.12577247619629, -16.741168975830078, -16.356563568115234, -15.971959114074707, -15.58735466003418, -15.202750205993652, -14.818145751953125, -14.433541297912598, -14.04893684387207, -13.664332389831543, -13.279727935791016, -12.895123481750488, -12.510519027709961, -12.125914573669434, -11.741310119628906, -11.356705665588379, -10.972101211547852, -10.587496757507324, -10.202892303466797, -9.81828784942627, -9.433684349060059, -9.049079895019531, -8.664475440979004, -8.279870986938477, -7.895266532897949, -7.510662078857422, -7.1260576248168945, -6.741453170776367, -6.35684871673584, -5.9722442626953125, -5.587639808654785, -5.203035354614258, -4.8184309005737305, -4.433826446533203, -4.049221992492676, -3.6646177768707275, -3.2800133228302, -2.895408868789673, -2.5108044147491455, -2.1262001991271973, -1.7415956258773804, -1.356991171836853, -0.9723868370056152, -0.5877823829650879, -0.20317792892456055, 0.1814264953136444, 0.5660309195518494, 0.9506353139877319, 1.3352397680282593, 1.7198442220687866, 2.1044485569000244, 2.4890530109405518, 2.873657464981079, 3.2582619190216064, 3.642866373062134, 4.027470588684082, 4.412075042724609, 4.796679496765137, 5.181283950805664, 5.565888404846191, 5.950492858886719]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 7.0, 2.0, 6.0, 6.0, 9.0, 7.0, 9.0, 16.0, 20.0, 18.0, 36.0, 38.0, 67.0, 90.0, 162.0, 283.0, 720.0, 2045.0, 7367.0, 37705.0, 400046.0, 3366171.0, 331779.0, 35427.0, 7881.0, 2441.0, 1028.0, 422.0, 224.0, 104.0, 57.0, 39.0, 14.0, 17.0, 8.0, 6.0, 4.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.08984375, -1.0622329711914062, -1.0346221923828125, -1.0070114135742188, -0.979400634765625, -0.9517898559570312, -0.9241790771484375, -0.8965682983398438, -0.86895751953125, -0.8413467407226562, -0.8137359619140625, -0.7861251831054688, -0.758514404296875, -0.7309036254882812, -0.7032928466796875, -0.6756820678710938, -0.6480712890625, -0.6204605102539062, -0.5928497314453125, -0.5652389526367188, -0.537628173828125, -0.5100173950195312, -0.4824066162109375, -0.45479583740234375, -0.42718505859375, -0.39957427978515625, -0.3719635009765625, -0.34435272216796875, -0.316741943359375, -0.28913116455078125, -0.2615203857421875, -0.23390960693359375, -0.206298828125, -0.17868804931640625, -0.1510772705078125, -0.12346649169921875, -0.095855712890625, -0.06824493408203125, -0.0406341552734375, -0.01302337646484375, 0.01458740234375, 0.04219818115234375, 0.0698089599609375, 0.09741973876953125, 0.125030517578125, 0.15264129638671875, 0.1802520751953125, 0.20786285400390625, 0.2354736328125, 0.26308441162109375, 0.2906951904296875, 0.31830596923828125, 0.345916748046875, 0.37352752685546875, 0.4011383056640625, 0.42874908447265625, 0.45635986328125, 0.48397064208984375, 0.5115814208984375, 0.5391921997070312, 0.566802978515625, 0.5944137573242188, 0.6220245361328125, 0.6496353149414062, 0.67724609375]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 19.0, 22.0, 33.0, 56.0, 94.0, 125.0, 155.0, 131.0, 135.0, 94.0, 64.0, 34.0, 28.0, 7.0, 8.0, 3.0, 3.0], "bins": [-1.650390625, -1.6197929382324219, -1.5891952514648438, -1.5585975646972656, -1.5279998779296875, -1.4974021911621094, -1.4668045043945312, -1.4362068176269531, -1.405609130859375, -1.3750114440917969, -1.3444137573242188, -1.3138160705566406, -1.2832183837890625, -1.2526206970214844, -1.2220230102539062, -1.1914253234863281, -1.16082763671875, -1.1302299499511719, -1.0996322631835938, -1.0690345764160156, -1.0384368896484375, -1.0078392028808594, -0.9772415161132812, -0.9466438293457031, -0.916046142578125, -0.8854484558105469, -0.8548507690429688, -0.8242530822753906, -0.7936553955078125, -0.7630577087402344, -0.7324600219726562, -0.7018623352050781, -0.6712646484375, -0.6406669616699219, -0.6100692749023438, -0.5794715881347656, -0.5488739013671875, -0.5182762145996094, -0.48767852783203125, -0.4570808410644531, -0.426483154296875, -0.3958854675292969, -0.36528778076171875, -0.3346900939941406, -0.3040924072265625, -0.2734947204589844, -0.24289703369140625, -0.21229934692382812, -0.18170166015625, -0.15110397338867188, -0.12050628662109375, -0.08990859985351562, -0.0593109130859375, -0.028713226318359375, 0.00188446044921875, 0.032482147216796875, 0.063079833984375, 0.09367752075195312, 0.12427520751953125, 0.15487289428710938, 0.1854705810546875, 0.21606826782226562, 0.24666595458984375, 0.2772636413574219, 0.307861328125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 7.0, 7.0, 11.0, 17.0, 33.0, 24.0, 24.0, 36.0, 76.0, 130.0, 185.0, 325.0, 723.0, 1841.0, 5041.0, 16946.0, 77204.0, 1001704.0, 2875080.0, 170930.0, 30809.0, 8128.0, 2672.0, 1074.0, 546.0, 255.0, 147.0, 99.0, 73.0, 47.0, 30.0, 24.0, 8.0, 9.0, 8.0, 5.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.7705078125, -0.7431640625, -0.7158203125, -0.6884765625, -0.6611328125, -0.6337890625, -0.6064453125, -0.5791015625, -0.5517578125, -0.5244140625, -0.4970703125, -0.4697265625, -0.4423828125, -0.4150390625, -0.3876953125, -0.3603515625, -0.3330078125, -0.3056640625, -0.2783203125, -0.2509765625, -0.2236328125, -0.1962890625, -0.1689453125, -0.1416015625, -0.1142578125, -0.0869140625, -0.0595703125, -0.0322265625, -0.0048828125, 0.0224609375, 0.0498046875, 0.0771484375, 0.1044921875, 0.1318359375, 0.1591796875, 0.1865234375, 0.2138671875, 0.2412109375, 0.2685546875, 0.2958984375, 0.3232421875, 0.3505859375, 0.3779296875, 0.4052734375, 0.4326171875, 0.4599609375, 0.4873046875, 0.5146484375, 0.5419921875, 0.5693359375, 0.5966796875, 0.6240234375, 0.6513671875, 0.6787109375, 0.7060546875, 0.7333984375, 0.7607421875, 0.7880859375, 0.8154296875, 0.8427734375, 0.8701171875, 0.8974609375, 0.9248046875, 0.9521484375, 0.9794921875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 4.0, 6.0, 2.0, 6.0, 7.0, 12.0, 14.0, 24.0, 39.0, 46.0, 60.0, 103.0, 199.0, 295.0, 566.0, 780.0, 702.0, 430.0, 236.0, 180.0, 90.0, 84.0, 43.0, 30.0, 30.0, 21.0, 17.0, 10.0, 12.0, 9.0, 6.0, 2.0, 4.0, 1.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.91796875, -0.8881607055664062, -0.8583526611328125, -0.8285446166992188, -0.798736572265625, -0.7689285278320312, -0.7391204833984375, -0.7093124389648438, -0.67950439453125, -0.6496963500976562, -0.6198883056640625, -0.5900802612304688, -0.560272216796875, -0.5304641723632812, -0.5006561279296875, -0.47084808349609375, -0.4410400390625, -0.41123199462890625, -0.3814239501953125, -0.35161590576171875, -0.321807861328125, -0.29199981689453125, -0.2621917724609375, -0.23238372802734375, -0.20257568359375, -0.17276763916015625, -0.1429595947265625, -0.11315155029296875, -0.083343505859375, -0.05353546142578125, -0.0237274169921875, 0.00608062744140625, 0.035888671875, 0.06569671630859375, 0.0955047607421875, 0.12531280517578125, 0.155120849609375, 0.18492889404296875, 0.2147369384765625, 0.24454498291015625, 0.27435302734375, 0.30416107177734375, 0.3339691162109375, 0.36377716064453125, 0.393585205078125, 0.42339324951171875, 0.4532012939453125, 0.48300933837890625, 0.5128173828125, 0.5426254272460938, 0.5724334716796875, 0.6022415161132812, 0.632049560546875, 0.6618576049804688, 0.6916656494140625, 0.7214736938476562, 0.75128173828125, 0.7810897827148438, 0.8108978271484375, 0.8407058715820312, 0.870513916015625, 0.9003219604492188, 0.9301300048828125, 0.9599380493164062, 0.98974609375]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 7.0, 11.0, 30.0, 60.0, 165.0, 250.0, 208.0, 153.0, 54.0, 33.0, 11.0, 4.0, 5.0, 5.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.791822910308838, -7.416045188903809, -7.040267467498779, -6.66448974609375, -6.2887115478515625, -5.912933826446533, -5.537156105041504, -5.161377906799316, -4.785600662231445, -4.409822940826416, -4.034045219421387, -3.6582672595977783, -3.28248929977417, -2.9067115783691406, -2.5309338569641113, -2.155155897140503, -1.7793779373168945, -1.4036000967025757, -1.0278222560882568, -0.6520445346832275, -0.2762666940689087, 0.09951114654541016, 0.47528886795043945, 0.8510668277740479, 1.2268445491790771, 1.602622389793396, 1.9784002304077148, 2.354177951812744, 2.7299556732177734, 3.105733633041382, 3.481511354446411, 3.8572893142700195, 4.233067512512207, 4.608845233917236, 4.984622955322266, 5.360401153564453, 5.736178874969482, 6.111956596374512, 6.487734317779541, 6.86351203918457, 7.239290237426758, 7.615067958831787, 7.990845680236816, 8.366623878479004, 8.742401123046875, 9.118179321289062, 9.49395751953125, 9.869734764099121, 10.245512008666992, 10.62129020690918, 10.99706745147705, 11.372845649719238, 11.74862289428711, 12.124401092529297, 12.500179290771484, 12.875956535339355, 13.251734733581543, 13.62751293182373, 14.003290176391602, 14.379068374633789, 14.75484561920166, 15.130623817443848, 15.506401062011719, 15.882179260253906, 16.257957458496094]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 8.0, 9.0, 9.0, 19.0, 21.0, 47.0, 53.0, 46.0, 71.0, 92.0, 73.0, 98.0, 88.0, 83.0, 77.0, 56.0, 51.0, 29.0, 26.0, 25.0, 9.0, 9.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.828957557678223, -5.5522847175598145, -5.275611877441406, -4.998939037322998, -4.72226619720459, -4.445592880249023, -4.168920040130615, -3.892247200012207, -3.615574359893799, -3.3389015197753906, -3.0622286796569824, -2.785555601119995, -2.508882761001587, -2.2322099208831787, -1.955536961555481, -1.6788640022277832, -1.402191162109375, -1.1255183219909668, -0.848845362663269, -0.5721724629402161, -0.2954995632171631, -0.018826723098754883, 0.25784623622894287, 0.5345191955566406, 0.8111920356750488, 1.087864875793457, 1.3645378351211548, 1.6412107944488525, 1.9178836345672607, 2.194556474685669, 2.4712295532226562, 2.7479023933410645, 3.024576187133789, 3.3012490272521973, 3.5779218673706055, 3.8545949459075928, 4.131267547607422, 4.407940864562988, 4.6846137046813965, 4.961286544799805, 5.237959384918213, 5.514632225036621, 5.791305065155029, 6.0679779052734375, 6.344651222229004, 6.621323585510254, 6.89799690246582, 7.1746697425842285, 7.451342582702637, 7.728015422821045, 8.004688262939453, 8.28136157989502, 8.55803394317627, 8.834707260131836, 9.111379623413086, 9.388052940368652, 9.664726257324219, 9.941399574279785, 10.218071937561035, 10.494745254516602, 10.771417617797852, 11.048090934753418, 11.324763298034668, 11.601436614990234, 11.878108978271484]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 7.0, 4.0, 9.0, 10.0, 14.0, 15.0, 34.0, 50.0, 60.0, 113.0, 262.0, 511.0, 1236.0, 3310.0, 10908.0, 43911.0, 185602.0, 460690.0, 256569.0, 62853.0, 15083.0, 4465.0, 1485.0, 644.0, 283.0, 158.0, 98.0, 64.0, 38.0, 23.0, 17.0, 9.0, 7.0, 4.0, 5.0, 3.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.9814453125, -0.9538955688476562, -0.9263458251953125, -0.8987960815429688, -0.871246337890625, -0.8436965942382812, -0.8161468505859375, -0.7885971069335938, -0.76104736328125, -0.7334976196289062, -0.7059478759765625, -0.6783981323242188, -0.650848388671875, -0.6232986450195312, -0.5957489013671875, -0.5681991577148438, -0.5406494140625, -0.5130996704101562, -0.4855499267578125, -0.45800018310546875, -0.430450439453125, -0.40290069580078125, -0.3753509521484375, -0.34780120849609375, -0.32025146484375, -0.29270172119140625, -0.2651519775390625, -0.23760223388671875, -0.210052490234375, -0.18250274658203125, -0.1549530029296875, -0.12740325927734375, -0.099853515625, -0.07230377197265625, -0.0447540283203125, -0.01720428466796875, 0.010345458984375, 0.03789520263671875, 0.0654449462890625, 0.09299468994140625, 0.12054443359375, 0.14809417724609375, 0.1756439208984375, 0.20319366455078125, 0.230743408203125, 0.25829315185546875, 0.2858428955078125, 0.31339263916015625, 0.3409423828125, 0.36849212646484375, 0.3960418701171875, 0.42359161376953125, 0.451141357421875, 0.47869110107421875, 0.5062408447265625, 0.5337905883789062, 0.56134033203125, 0.5888900756835938, 0.6164398193359375, 0.6439895629882812, 0.671539306640625, 0.6990890502929688, 0.7266387939453125, 0.7541885375976562, 0.78173828125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 5.0, 2.0, 3.0, 14.0, 11.0, 19.0, 16.0, 23.0, 21.0, 32.0, 31.0, 45.0, 39.0, 55.0, 60.0, 56.0, 76.0, 50.0, 66.0, 57.0, 62.0, 47.0, 39.0, 29.0, 37.0, 23.0, 19.0, 19.0, 17.0, 15.0, 3.0, 4.0, 5.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.52392578125, -0.5107879638671875, -0.497650146484375, -0.4845123291015625, -0.47137451171875, -0.4582366943359375, -0.445098876953125, -0.4319610595703125, -0.4188232421875, -0.4056854248046875, -0.392547607421875, -0.3794097900390625, -0.36627197265625, -0.3531341552734375, -0.339996337890625, -0.3268585205078125, -0.313720703125, -0.3005828857421875, -0.287445068359375, -0.2743072509765625, -0.26116943359375, -0.2480316162109375, -0.234893798828125, -0.2217559814453125, -0.2086181640625, -0.1954803466796875, -0.182342529296875, -0.1692047119140625, -0.15606689453125, -0.1429290771484375, -0.129791259765625, -0.1166534423828125, -0.103515625, -0.0903778076171875, -0.077239990234375, -0.0641021728515625, -0.05096435546875, -0.0378265380859375, -0.024688720703125, -0.0115509033203125, 0.0015869140625, 0.0147247314453125, 0.027862548828125, 0.0410003662109375, 0.05413818359375, 0.0672760009765625, 0.080413818359375, 0.0935516357421875, 0.106689453125, 0.1198272705078125, 0.132965087890625, 0.1461029052734375, 0.15924072265625, 0.1723785400390625, 0.185516357421875, 0.1986541748046875, 0.2117919921875, 0.2249298095703125, 0.238067626953125, 0.2512054443359375, 0.26434326171875, 0.2774810791015625, 0.290618896484375, 0.3037567138671875, 0.31689453125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 4.0, 5.0, 3.0, 8.0, 13.0, 17.0, 27.0, 20.0, 46.0, 41.0, 80.0, 100.0, 142.0, 169.0, 275.0, 359.0, 518.0, 813.0, 1315.0, 2586.0, 7046.0, 37800.0, 332249.0, 589506.0, 58574.0, 9443.0, 3062.0, 1432.0, 921.0, 555.0, 383.0, 249.0, 221.0, 164.0, 101.0, 84.0, 61.0, 47.0, 37.0, 20.0, 15.0, 15.0, 9.0, 9.0, 5.0, 3.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-1.2529296875, -1.2172393798828125, -1.181549072265625, -1.1458587646484375, -1.11016845703125, -1.0744781494140625, -1.038787841796875, -1.0030975341796875, -0.9674072265625, -0.9317169189453125, -0.896026611328125, -0.8603363037109375, -0.82464599609375, -0.7889556884765625, -0.753265380859375, -0.7175750732421875, -0.681884765625, -0.6461944580078125, -0.610504150390625, -0.5748138427734375, -0.53912353515625, -0.5034332275390625, -0.467742919921875, -0.4320526123046875, -0.3963623046875, -0.3606719970703125, -0.324981689453125, -0.2892913818359375, -0.25360107421875, -0.2179107666015625, -0.182220458984375, -0.1465301513671875, -0.11083984375, -0.0751495361328125, -0.039459228515625, -0.0037689208984375, 0.03192138671875, 0.0676116943359375, 0.103302001953125, 0.1389923095703125, 0.1746826171875, 0.2103729248046875, 0.246063232421875, 0.2817535400390625, 0.31744384765625, 0.3531341552734375, 0.388824462890625, 0.4245147705078125, 0.460205078125, 0.4958953857421875, 0.531585693359375, 0.5672760009765625, 0.60296630859375, 0.6386566162109375, 0.674346923828125, 0.7100372314453125, 0.7457275390625, 0.7814178466796875, 0.817108154296875, 0.8527984619140625, 0.88848876953125, 0.9241790771484375, 0.959869384765625, 0.9955596923828125, 1.03125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 4.0, 3.0, 4.0, 7.0, 5.0, 16.0, 9.0, 16.0, 11.0, 24.0, 29.0, 34.0, 36.0, 35.0, 44.0, 58.0, 46.0, 43.0, 47.0, 71.0, 41.0, 39.0, 48.0, 44.0, 45.0, 38.0, 27.0, 29.0, 30.0, 16.0, 16.0, 19.0, 12.0, 17.0, 9.0, 11.0, 12.0, 5.0, 1.0, 2.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.705078125, -1.6544189453125, -1.603759765625, -1.5531005859375, -1.50244140625, -1.4517822265625, -1.401123046875, -1.3504638671875, -1.2998046875, -1.2491455078125, -1.198486328125, -1.1478271484375, -1.09716796875, -1.0465087890625, -0.995849609375, -0.9451904296875, -0.89453125, -0.8438720703125, -0.793212890625, -0.7425537109375, -0.69189453125, -0.6412353515625, -0.590576171875, -0.5399169921875, -0.4892578125, -0.4385986328125, -0.387939453125, -0.3372802734375, -0.28662109375, -0.2359619140625, -0.185302734375, -0.1346435546875, -0.083984375, -0.0333251953125, 0.017333984375, 0.0679931640625, 0.11865234375, 0.1693115234375, 0.219970703125, 0.2706298828125, 0.3212890625, 0.3719482421875, 0.422607421875, 0.4732666015625, 0.52392578125, 0.5745849609375, 0.625244140625, 0.6759033203125, 0.7265625, 0.7772216796875, 0.827880859375, 0.8785400390625, 0.92919921875, 0.9798583984375, 1.030517578125, 1.0811767578125, 1.1318359375, 1.1824951171875, 1.233154296875, 1.2838134765625, 1.33447265625, 1.3851318359375, 1.435791015625, 1.4864501953125, 1.537109375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 7.0, 14.0, 13.0, 28.0, 31.0, 72.0, 135.0, 250.0, 651.0, 2031.0, 10794.0, 145599.0, 827217.0, 53713.0, 5759.0, 1334.0, 450.0, 186.0, 89.0, 84.0, 34.0, 22.0, 18.0, 7.0, 9.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.47314453125, -0.4544677734375, -0.435791015625, -0.4171142578125, -0.3984375, -0.3797607421875, -0.361083984375, -0.3424072265625, -0.32373046875, -0.3050537109375, -0.286376953125, -0.2677001953125, -0.2490234375, -0.2303466796875, -0.211669921875, -0.1929931640625, -0.17431640625, -0.1556396484375, -0.136962890625, -0.1182861328125, -0.099609375, -0.0809326171875, -0.062255859375, -0.0435791015625, -0.02490234375, -0.0062255859375, 0.012451171875, 0.0311279296875, 0.0498046875, 0.0684814453125, 0.087158203125, 0.1058349609375, 0.12451171875, 0.1431884765625, 0.161865234375, 0.1805419921875, 0.19921875, 0.2178955078125, 0.236572265625, 0.2552490234375, 0.27392578125, 0.2926025390625, 0.311279296875, 0.3299560546875, 0.3486328125, 0.3673095703125, 0.385986328125, 0.4046630859375, 0.42333984375, 0.4420166015625, 0.460693359375, 0.4793701171875, 0.498046875, 0.5167236328125, 0.535400390625, 0.5540771484375, 0.57275390625, 0.5914306640625, 0.610107421875, 0.6287841796875, 0.6474609375, 0.6661376953125, 0.684814453125, 0.7034912109375, 0.72216796875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 1.0, 2.0, 3.0, 5.0, 10.0, 1.0, 6.0, 10.0, 15.0, 15.0, 19.0, 38.0, 43.0, 46.0, 63.0, 61.0, 76.0, 79.0, 92.0, 94.0, 57.0, 44.0, 47.0, 34.0, 26.0, 18.0, 15.0, 18.0, 19.0, 9.0, 4.0, 8.0, 2.0, 7.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.867813110351562e-05, -7.628742605447769e-05, -7.389672100543976e-05, -7.150601595640182e-05, -6.911531090736389e-05, -6.672460585832596e-05, -6.433390080928802e-05, -6.194319576025009e-05, -5.955249071121216e-05, -5.7161785662174225e-05, -5.477108061313629e-05, -5.238037556409836e-05, -4.9989670515060425e-05, -4.759896546602249e-05, -4.520826041698456e-05, -4.2817555367946625e-05, -4.042685031890869e-05, -3.803614526987076e-05, -3.5645440220832825e-05, -3.325473517179489e-05, -3.086403012275696e-05, -2.8473325073719025e-05, -2.608262002468109e-05, -2.3691914975643158e-05, -2.1301209926605225e-05, -1.891050487756729e-05, -1.6519799828529358e-05, -1.4129094779491425e-05, -1.1738389730453491e-05, -9.347684681415558e-06, -6.9569796323776245e-06, -4.566274583339691e-06, -2.175569534301758e-06, 2.1513551473617554e-07, 2.605840563774109e-06, 4.996545612812042e-06, 7.387250661849976e-06, 9.777955710887909e-06, 1.2168660759925842e-05, 1.4559365808963776e-05, 1.695007085800171e-05, 1.9340775907039642e-05, 2.1731480956077576e-05, 2.412218600511551e-05, 2.6512891054153442e-05, 2.8903596103191376e-05, 3.129430115222931e-05, 3.368500620126724e-05, 3.6075711250305176e-05, 3.846641629934311e-05, 4.085712134838104e-05, 4.3247826397418976e-05, 4.563853144645691e-05, 4.802923649549484e-05, 5.0419941544532776e-05, 5.281064659357071e-05, 5.520135164260864e-05, 5.7592056691646576e-05, 5.998276174068451e-05, 6.237346678972244e-05, 6.476417183876038e-05, 6.715487688779831e-05, 6.954558193683624e-05, 7.193628698587418e-05, 7.432699203491211e-05]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 13.0, 11.0, 15.0, 30.0, 36.0, 58.0, 83.0, 165.0, 254.0, 491.0, 1054.0, 2689.0, 8412.0, 40940.0, 502482.0, 438362.0, 40301.0, 8251.0, 2626.0, 1007.0, 551.0, 260.0, 165.0, 101.0, 60.0, 44.0, 28.0, 21.0, 18.0, 8.0, 5.0, 5.0, 1.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.47021484375, -0.45638275146484375, -0.4425506591796875, -0.42871856689453125, -0.414886474609375, -0.40105438232421875, -0.3872222900390625, -0.37339019775390625, -0.35955810546875, -0.34572601318359375, -0.3318939208984375, -0.31806182861328125, -0.304229736328125, -0.29039764404296875, -0.2765655517578125, -0.26273345947265625, -0.2489013671875, -0.23506927490234375, -0.2212371826171875, -0.20740509033203125, -0.193572998046875, -0.17974090576171875, -0.1659088134765625, -0.15207672119140625, -0.13824462890625, -0.12441253662109375, -0.1105804443359375, -0.09674835205078125, -0.082916259765625, -0.06908416748046875, -0.0552520751953125, -0.04141998291015625, -0.027587890625, -0.01375579833984375, 7.62939453125e-05, 0.01390838623046875, 0.027740478515625, 0.04157257080078125, 0.0554046630859375, 0.06923675537109375, 0.08306884765625, 0.09690093994140625, 0.1107330322265625, 0.12456512451171875, 0.138397216796875, 0.15222930908203125, 0.1660614013671875, 0.17989349365234375, 0.1937255859375, 0.20755767822265625, 0.2213897705078125, 0.23522186279296875, 0.249053955078125, 0.26288604736328125, 0.2767181396484375, 0.29055023193359375, 0.30438232421875, 0.31821441650390625, 0.3320465087890625, 0.34587860107421875, 0.359710693359375, 0.37354278564453125, 0.3873748779296875, 0.40120697021484375, 0.4150390625]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 4.0, 7.0, 3.0, 5.0, 14.0, 15.0, 12.0, 12.0, 17.0, 23.0, 31.0, 31.0, 37.0, 35.0, 40.0, 32.0, 47.0, 62.0, 72.0, 69.0, 53.0, 59.0, 48.0, 34.0, 49.0, 22.0, 27.0, 29.0, 25.0, 20.0, 8.0, 10.0, 8.0, 10.0, 5.0, 4.0, 3.0, 4.0, 1.0, 3.0, 4.0, 4.0, 1.0, 4.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-0.28466796875, -0.27577972412109375, -0.2668914794921875, -0.25800323486328125, -0.249114990234375, -0.24022674560546875, -0.2313385009765625, -0.22245025634765625, -0.21356201171875, -0.20467376708984375, -0.1957855224609375, -0.18689727783203125, -0.178009033203125, -0.16912078857421875, -0.1602325439453125, -0.15134429931640625, -0.1424560546875, -0.13356781005859375, -0.1246795654296875, -0.11579132080078125, -0.106903076171875, -0.09801483154296875, -0.0891265869140625, -0.08023834228515625, -0.07135009765625, -0.06246185302734375, -0.0535736083984375, -0.04468536376953125, -0.035797119140625, -0.02690887451171875, -0.0180206298828125, -0.00913238525390625, -0.000244140625, 0.00864410400390625, 0.0175323486328125, 0.02642059326171875, 0.035308837890625, 0.04419708251953125, 0.0530853271484375, 0.06197357177734375, 0.07086181640625, 0.07975006103515625, 0.0886383056640625, 0.09752655029296875, 0.106414794921875, 0.11530303955078125, 0.1241912841796875, 0.13307952880859375, 0.1419677734375, 0.15085601806640625, 0.1597442626953125, 0.16863250732421875, 0.177520751953125, 0.18640899658203125, 0.1952972412109375, 0.20418548583984375, 0.21307373046875, 0.22196197509765625, 0.2308502197265625, 0.23973846435546875, 0.248626708984375, 0.25751495361328125, 0.2664031982421875, 0.27529144287109375, 0.2841796875]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 8.0, 6.0, 11.0, 49.0, 122.0, 293.0, 299.0, 131.0, 47.0, 18.0, 5.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.055168151855469, -11.624258041381836, -11.193347930908203, -10.76243782043457, -10.331526756286621, -9.900616645812988, -9.469706535339355, -9.038796424865723, -8.60788631439209, -8.176976203918457, -7.746065616607666, -7.315155506134033, -6.8842453956604, -6.453334808349609, -6.022424697875977, -5.591514587402344, -5.160604000091553, -4.72969388961792, -4.298783302307129, -3.867873191833496, -3.4369630813598633, -3.0060527324676514, -2.5751423835754395, -2.1442322731018066, -1.7133219242095947, -1.2824116945266724, -0.8515014052391052, -0.4205911159515381, 0.010319113731384277, 0.44122934341430664, 0.8721396923065186, 1.3030498027801514, 1.7339601516723633, 2.164870500564575, 2.595780611038208, 3.02669095993042, 3.4576010704040527, 3.8885114192962646, 4.319421768188477, 4.750331878662109, 5.181241989135742, 5.612152099609375, 6.043062686920166, 6.473972797393799, 6.904882907867432, 7.335793495178223, 7.7667036056518555, 8.197613716125488, 8.628524780273438, 9.05943489074707, 9.490345001220703, 9.921255111694336, 10.352166175842285, 10.783076286315918, 11.21398639678955, 11.644896507263184, 12.075806617736816, 12.50671672821045, 12.937626838684082, 13.368537902832031, 13.799448013305664, 14.230358123779297, 14.66126823425293, 15.092178344726562, 15.523088455200195]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 8.0, 6.0, 11.0, 9.0, 11.0, 11.0, 21.0, 29.0, 30.0, 35.0, 36.0, 50.0, 52.0, 68.0, 88.0, 97.0, 78.0, 53.0, 57.0, 43.0, 39.0, 33.0, 35.0, 28.0, 20.0, 14.0, 18.0, 7.0, 6.0, 3.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.397326469421387, -6.133326053619385, -5.869325637817383, -5.605325222015381, -5.341324806213379, -5.077323913574219, -4.813323497772217, -4.549323081970215, -4.285322666168213, -4.021322250366211, -3.757321834564209, -3.493321180343628, -3.229320764541626, -2.965320348739624, -2.701319694519043, -2.437319278717041, -2.173318862915039, -1.909318447113037, -1.6453179121017456, -1.381317377090454, -1.1173169612884521, -0.8533165454864502, -0.5893160104751587, -0.3253154754638672, -0.061315059661865234, 0.2026854157447815, 0.4666858911514282, 0.730686366558075, 0.9946868419647217, 1.2586872577667236, 1.5226877927780151, 1.7866883277893066, 2.0506887435913086, 2.3146891593933105, 2.5786895751953125, 2.8426902294158936, 3.1066906452178955, 3.3706910610198975, 3.6346917152404785, 3.8986921310424805, 4.162692546844482, 4.426692962646484, 4.690693378448486, 4.954693794250488, 5.218694686889648, 5.482694625854492, 5.746695518493652, 6.010695934295654, 6.274696350097656, 6.538696765899658, 6.80269718170166, 7.066697597503662, 7.330698013305664, 7.594698905944824, 7.858699321746826, 8.122699737548828, 8.386699676513672, 8.650700569152832, 8.914700508117676, 9.178701400756836, 9.44270133972168, 9.70670223236084, 9.970702171325684, 10.234703063964844, 10.498703956604004]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 11.0, 6.0, 8.0, 13.0, 10.0, 17.0, 32.0, 53.0, 57.0, 111.0, 238.0, 573.0, 1710.0, 6501.0, 34786.0, 409029.0, 3316914.0, 381750.0, 33058.0, 6379.0, 1670.0, 650.0, 308.0, 151.0, 81.0, 55.0, 41.0, 19.0, 19.0, 12.0, 3.0, 2.0, 7.0, 2.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9287109375, -0.90325927734375, -0.8778076171875, -0.85235595703125, -0.826904296875, -0.80145263671875, -0.7760009765625, -0.75054931640625, -0.72509765625, -0.69964599609375, -0.6741943359375, -0.64874267578125, -0.623291015625, -0.59783935546875, -0.5723876953125, -0.54693603515625, -0.521484375, -0.49603271484375, -0.4705810546875, -0.44512939453125, -0.419677734375, -0.39422607421875, -0.3687744140625, -0.34332275390625, -0.31787109375, -0.29241943359375, -0.2669677734375, -0.24151611328125, -0.216064453125, -0.19061279296875, -0.1651611328125, -0.13970947265625, -0.1142578125, -0.08880615234375, -0.0633544921875, -0.03790283203125, -0.012451171875, 0.01300048828125, 0.0384521484375, 0.06390380859375, 0.08935546875, 0.11480712890625, 0.1402587890625, 0.16571044921875, 0.191162109375, 0.21661376953125, 0.2420654296875, 0.26751708984375, 0.29296875, 0.31842041015625, 0.3438720703125, 0.36932373046875, 0.394775390625, 0.42022705078125, 0.4456787109375, 0.47113037109375, 0.49658203125, 0.52203369140625, 0.5474853515625, 0.57293701171875, 0.598388671875, 0.62384033203125, 0.6492919921875, 0.67474365234375, 0.7001953125]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 6.0, 8.0, 14.0, 11.0, 25.0, 19.0, 47.0, 47.0, 59.0, 72.0, 73.0, 108.0, 76.0, 79.0, 90.0, 56.0, 49.0, 40.0, 43.0, 34.0, 15.0, 11.0, 10.0, 8.0, 4.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3408203125, -0.32404327392578125, -0.3072662353515625, -0.29048919677734375, -0.273712158203125, -0.25693511962890625, -0.2401580810546875, -0.22338104248046875, -0.20660400390625, -0.18982696533203125, -0.1730499267578125, -0.15627288818359375, -0.139495849609375, -0.12271881103515625, -0.1059417724609375, -0.08916473388671875, -0.0723876953125, -0.05561065673828125, -0.0388336181640625, -0.02205657958984375, -0.005279541015625, 0.01149749755859375, 0.0282745361328125, 0.04505157470703125, 0.06182861328125, 0.07860565185546875, 0.0953826904296875, 0.11215972900390625, 0.128936767578125, 0.14571380615234375, 0.1624908447265625, 0.17926788330078125, 0.196044921875, 0.21282196044921875, 0.2295989990234375, 0.24637603759765625, 0.263153076171875, 0.27993011474609375, 0.2967071533203125, 0.31348419189453125, 0.33026123046875, 0.34703826904296875, 0.3638153076171875, 0.38059234619140625, 0.397369384765625, 0.41414642333984375, 0.4309234619140625, 0.44770050048828125, 0.4644775390625, 0.48125457763671875, 0.4980316162109375, 0.5148086547851562, 0.531585693359375, 0.5483627319335938, 0.5651397705078125, 0.5819168090820312, 0.59869384765625, 0.6154708862304688, 0.6322479248046875, 0.6490249633789062, 0.665802001953125, 0.6825790405273438, 0.6993560791015625, 0.7161331176757812, 0.73291015625]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 7.0, 12.0, 10.0, 13.0, 25.0, 24.0, 40.0, 72.0, 104.0, 187.0, 314.0, 575.0, 1175.0, 3163.0, 9716.0, 38475.0, 243415.0, 2991290.0, 796814.0, 80377.0, 18413.0, 5558.0, 2096.0, 1010.0, 556.0, 331.0, 178.0, 110.0, 81.0, 46.0, 33.0, 15.0, 12.0, 11.0, 4.0, 10.0, 6.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5908203125, -0.5694961547851562, -0.5481719970703125, -0.5268478393554688, -0.505523681640625, -0.48419952392578125, -0.4628753662109375, -0.44155120849609375, -0.42022705078125, -0.39890289306640625, -0.3775787353515625, -0.35625457763671875, -0.334930419921875, -0.31360626220703125, -0.2922821044921875, -0.27095794677734375, -0.2496337890625, -0.22830963134765625, -0.2069854736328125, -0.18566131591796875, -0.164337158203125, -0.14301300048828125, -0.1216888427734375, -0.10036468505859375, -0.07904052734375, -0.05771636962890625, -0.0363922119140625, -0.01506805419921875, 0.006256103515625, 0.02758026123046875, 0.0489044189453125, 0.07022857666015625, 0.091552734375, 0.11287689208984375, 0.1342010498046875, 0.15552520751953125, 0.176849365234375, 0.19817352294921875, 0.2194976806640625, 0.24082183837890625, 0.26214599609375, 0.28347015380859375, 0.3047943115234375, 0.32611846923828125, 0.347442626953125, 0.36876678466796875, 0.3900909423828125, 0.41141510009765625, 0.4327392578125, 0.45406341552734375, 0.4753875732421875, 0.49671173095703125, 0.518035888671875, 0.5393600463867188, 0.5606842041015625, 0.5820083618164062, 0.60333251953125, 0.6246566772460938, 0.6459808349609375, 0.6673049926757812, 0.688629150390625, 0.7099533081054688, 0.7312774658203125, 0.7526016235351562, 0.77392578125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 8.0, 6.0, 7.0, 15.0, 28.0, 33.0, 58.0, 104.0, 245.0, 386.0, 782.0, 902.0, 673.0, 348.0, 204.0, 96.0, 55.0, 40.0, 21.0, 24.0, 15.0, 9.0, 7.0, 7.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.82763671875, -0.7949752807617188, -0.7623138427734375, -0.7296524047851562, -0.696990966796875, -0.6643295288085938, -0.6316680908203125, -0.5990066528320312, -0.56634521484375, -0.5336837768554688, -0.5010223388671875, -0.46836090087890625, -0.435699462890625, -0.40303802490234375, -0.3703765869140625, -0.33771514892578125, -0.3050537109375, -0.27239227294921875, -0.2397308349609375, -0.20706939697265625, -0.174407958984375, -0.14174652099609375, -0.1090850830078125, -0.07642364501953125, -0.04376220703125, -0.01110076904296875, 0.0215606689453125, 0.05422210693359375, 0.086883544921875, 0.11954498291015625, 0.1522064208984375, 0.18486785888671875, 0.217529296875, 0.25019073486328125, 0.2828521728515625, 0.31551361083984375, 0.348175048828125, 0.38083648681640625, 0.4134979248046875, 0.44615936279296875, 0.47882080078125, 0.5114822387695312, 0.5441436767578125, 0.5768051147460938, 0.609466552734375, 0.6421279907226562, 0.6747894287109375, 0.7074508666992188, 0.7401123046875, 0.7727737426757812, 0.8054351806640625, 0.8380966186523438, 0.870758056640625, 0.9034194946289062, 0.9360809326171875, 0.9687423706054688, 1.00140380859375, 1.0340652465820312, 1.0667266845703125, 1.0993881225585938, 1.132049560546875, 1.1647109985351562, 1.1973724365234375, 1.2300338745117188, 1.2626953125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 3.0, 13.0, 22.0, 105.0, 233.0, 305.0, 174.0, 76.0, 27.0, 16.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.742885589599609, -5.364243507385254, -4.985601902008057, -4.606959819793701, -4.228318214416504, -3.8496761322021484, -3.471034049987793, -3.0923922061920166, -2.7137503623962402, -2.335108518600464, -1.956466555595398, -1.577824592590332, -1.1991827487945557, -0.8205409049987793, -0.44189882278442383, -0.06325697898864746, 0.3153848648071289, 0.69402676820755, 1.0726686716079712, 1.451310634613037, 1.8299524784088135, 2.20859432220459, 2.5872364044189453, 2.9658782482147217, 3.344520092010498, 3.7231619358062744, 4.101803779602051, 4.480445861816406, 4.859087944030762, 5.237729549407959, 5.6163716316223145, 5.995013236999512, 6.373655319213867, 6.752297401428223, 7.13093900680542, 7.509581089019775, 7.888222694396973, 8.266864776611328, 8.645506858825684, 9.024148941040039, 9.402790069580078, 9.781432151794434, 10.160074234008789, 10.538715362548828, 10.917357444763184, 11.295999526977539, 11.674641609191895, 12.05328369140625, 12.431925773620605, 12.810567855834961, 13.189209938049316, 13.567852020263672, 13.946493148803711, 14.325135231018066, 14.703777313232422, 15.082419395446777, 15.461061477661133, 15.839703559875488, 16.218345642089844, 16.596986770629883, 16.975629806518555, 17.354270935058594, 17.732913970947266, 18.111555099487305, 18.490196228027344]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 4.0, 5.0, 8.0, 15.0, 15.0, 28.0, 41.0, 58.0, 44.0, 61.0, 74.0, 56.0, 69.0, 69.0, 71.0, 68.0, 48.0, 57.0, 45.0, 33.0, 39.0, 17.0, 25.0, 15.0, 11.0, 9.0, 5.0, 7.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.110647678375244, -5.93239164352417, -5.754136085510254, -5.57588005065918, -5.3976240158081055, -5.219367980957031, -5.041112422943115, -4.862856388092041, -4.684600830078125, -4.506344795227051, -4.328089237213135, -4.1498332023620605, -3.9715771675109863, -3.793321371078491, -3.615065574645996, -3.436809539794922, -3.2585535049438477, -3.0802977085113525, -2.9020416736602783, -2.723785877227783, -2.545529842376709, -2.367274045944214, -2.1890182495117188, -2.0107622146606445, -1.8325064182281494, -1.6542505025863647, -1.47599458694458, -1.297738790512085, -1.1194828748703003, -0.9412269592285156, -0.7629711627960205, -0.5847152471542358, -0.4064598083496094, -0.2282039225101471, -0.049948036670684814, 0.12830781936645508, 0.30656373500823975, 0.4848196506500244, 0.6630754470825195, 0.8413313627243042, 1.0195872783660889, 1.1978431940078735, 1.3760991096496582, 1.5543549060821533, 1.732610821723938, 1.9108667373657227, 2.0891225337982178, 2.267378330230713, 2.445634365081787, 2.6238901615142822, 2.8021461963653564, 2.9804019927978516, 3.158658027648926, 3.336913824081421, 3.515169620513916, 3.6934256553649902, 3.8716814517974854, 4.0499372482299805, 4.228193283081055, 4.406449317932129, 4.584704875946045, 4.762960910797119, 4.941216468811035, 5.119472503662109, 5.297728538513184]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 8.0, 11.0, 10.0, 17.0, 24.0, 28.0, 43.0, 87.0, 140.0, 271.0, 554.0, 1215.0, 2678.0, 7009.0, 21716.0, 73315.0, 248842.0, 424743.0, 187575.0, 54362.0, 16230.0, 5448.0, 2194.0, 971.0, 489.0, 238.0, 143.0, 60.0, 49.0, 29.0, 21.0, 10.0, 10.0, 8.0, 1.0, 2.0, 6.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59228515625, -0.5689468383789062, -0.5456085205078125, -0.5222702026367188, -0.498931884765625, -0.47559356689453125, -0.4522552490234375, -0.42891693115234375, -0.40557861328125, -0.38224029541015625, -0.3589019775390625, -0.33556365966796875, -0.312225341796875, -0.28888702392578125, -0.2655487060546875, -0.24221038818359375, -0.2188720703125, -0.19553375244140625, -0.1721954345703125, -0.14885711669921875, -0.125518798828125, -0.10218048095703125, -0.0788421630859375, -0.05550384521484375, -0.03216552734375, -0.00882720947265625, 0.0145111083984375, 0.03784942626953125, 0.061187744140625, 0.08452606201171875, 0.1078643798828125, 0.13120269775390625, 0.154541015625, 0.17787933349609375, 0.2012176513671875, 0.22455596923828125, 0.247894287109375, 0.27123260498046875, 0.2945709228515625, 0.31790924072265625, 0.34124755859375, 0.36458587646484375, 0.3879241943359375, 0.41126251220703125, 0.434600830078125, 0.45793914794921875, 0.4812774658203125, 0.5046157836914062, 0.5279541015625, 0.5512924194335938, 0.5746307373046875, 0.5979690551757812, 0.621307373046875, 0.6446456909179688, 0.6679840087890625, 0.6913223266601562, 0.71466064453125, 0.7379989624023438, 0.7613372802734375, 0.7846755981445312, 0.808013916015625, 0.8313522338867188, 0.8546905517578125, 0.8780288696289062, 0.9013671875]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 8.0, 3.0, 7.0, 6.0, 9.0, 12.0, 14.0, 13.0, 16.0, 25.0, 43.0, 28.0, 35.0, 46.0, 53.0, 56.0, 56.0, 68.0, 51.0, 53.0, 52.0, 43.0, 45.0, 43.0, 47.0, 30.0, 26.0, 23.0, 30.0, 14.0, 8.0, 8.0, 16.0, 7.0, 5.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32666015625, -0.3155059814453125, -0.304351806640625, -0.2931976318359375, -0.28204345703125, -0.2708892822265625, -0.259735107421875, -0.2485809326171875, -0.2374267578125, -0.2262725830078125, -0.215118408203125, -0.2039642333984375, -0.19281005859375, -0.1816558837890625, -0.170501708984375, -0.1593475341796875, -0.148193359375, -0.1370391845703125, -0.125885009765625, -0.1147308349609375, -0.10357666015625, -0.0924224853515625, -0.081268310546875, -0.0701141357421875, -0.0589599609375, -0.0478057861328125, -0.036651611328125, -0.0254974365234375, -0.01434326171875, -0.0031890869140625, 0.007965087890625, 0.0191192626953125, 0.0302734375, 0.0414276123046875, 0.052581787109375, 0.0637359619140625, 0.07489013671875, 0.0860443115234375, 0.097198486328125, 0.1083526611328125, 0.1195068359375, 0.1306610107421875, 0.141815185546875, 0.1529693603515625, 0.16412353515625, 0.1752777099609375, 0.186431884765625, 0.1975860595703125, 0.208740234375, 0.2198944091796875, 0.231048583984375, 0.2422027587890625, 0.25335693359375, 0.2645111083984375, 0.275665283203125, 0.2868194580078125, 0.2979736328125, 0.3091278076171875, 0.320281982421875, 0.3314361572265625, 0.34259033203125, 0.3537445068359375, 0.364898681640625, 0.3760528564453125, 0.38720703125]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 10.0, 3.0, 7.0, 15.0, 13.0, 19.0, 32.0, 38.0, 74.0, 69.0, 116.0, 197.0, 267.0, 398.0, 639.0, 1189.0, 2453.0, 7884.0, 46130.0, 530862.0, 402582.0, 42908.0, 7272.0, 2390.0, 1125.0, 644.0, 395.0, 246.0, 180.0, 121.0, 82.0, 52.0, 44.0, 28.0, 20.0, 15.0, 13.0, 8.0, 0.0, 3.0, 10.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.28125, -1.2435455322265625, -1.205841064453125, -1.1681365966796875, -1.13043212890625, -1.0927276611328125, -1.055023193359375, -1.0173187255859375, -0.9796142578125, -0.9419097900390625, -0.904205322265625, -0.8665008544921875, -0.82879638671875, -0.7910919189453125, -0.753387451171875, -0.7156829833984375, -0.677978515625, -0.6402740478515625, -0.602569580078125, -0.5648651123046875, -0.52716064453125, -0.4894561767578125, -0.451751708984375, -0.4140472412109375, -0.3763427734375, -0.3386383056640625, -0.300933837890625, -0.2632293701171875, -0.22552490234375, -0.1878204345703125, -0.150115966796875, -0.1124114990234375, -0.07470703125, -0.0370025634765625, 0.000701904296875, 0.0384063720703125, 0.07611083984375, 0.1138153076171875, 0.151519775390625, 0.1892242431640625, 0.2269287109375, 0.2646331787109375, 0.302337646484375, 0.3400421142578125, 0.37774658203125, 0.4154510498046875, 0.453155517578125, 0.4908599853515625, 0.528564453125, 0.5662689208984375, 0.603973388671875, 0.6416778564453125, 0.67938232421875, 0.7170867919921875, 0.754791259765625, 0.7924957275390625, 0.8302001953125, 0.8679046630859375, 0.905609130859375, 0.9433135986328125, 0.98101806640625, 1.0187225341796875, 1.056427001953125, 1.0941314697265625, 1.1318359375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 5.0, 12.0, 5.0, 9.0, 13.0, 19.0, 27.0, 24.0, 30.0, 30.0, 51.0, 44.0, 55.0, 72.0, 67.0, 70.0, 67.0, 75.0, 52.0, 50.0, 34.0, 30.0, 35.0, 28.0, 22.0, 15.0, 8.0, 11.0, 4.0, 15.0, 6.0, 4.0, 1.0, 5.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5771484375, -1.531494140625, -1.48583984375, -1.440185546875, -1.39453125, -1.348876953125, -1.30322265625, -1.257568359375, -1.2119140625, -1.166259765625, -1.12060546875, -1.074951171875, -1.029296875, -0.983642578125, -0.93798828125, -0.892333984375, -0.8466796875, -0.801025390625, -0.75537109375, -0.709716796875, -0.6640625, -0.618408203125, -0.57275390625, -0.527099609375, -0.4814453125, -0.435791015625, -0.39013671875, -0.344482421875, -0.298828125, -0.253173828125, -0.20751953125, -0.161865234375, -0.1162109375, -0.070556640625, -0.02490234375, 0.020751953125, 0.06640625, 0.112060546875, 0.15771484375, 0.203369140625, 0.2490234375, 0.294677734375, 0.34033203125, 0.385986328125, 0.431640625, 0.477294921875, 0.52294921875, 0.568603515625, 0.6142578125, 0.659912109375, 0.70556640625, 0.751220703125, 0.796875, 0.842529296875, 0.88818359375, 0.933837890625, 0.9794921875, 1.025146484375, 1.07080078125, 1.116455078125, 1.162109375, 1.207763671875, 1.25341796875, 1.299072265625, 1.3447265625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 0.0, 2.0, 2.0, 7.0, 9.0, 8.0, 13.0, 18.0, 18.0, 43.0, 58.0, 69.0, 123.0, 147.0, 279.0, 428.0, 932.0, 2250.0, 6936.0, 28160.0, 132295.0, 603332.0, 212058.0, 45099.0, 10645.0, 3041.0, 1205.0, 560.0, 295.0, 156.0, 118.0, 73.0, 53.0, 32.0, 14.0, 25.0, 14.0, 9.0, 12.0, 2.0, 6.0, 6.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.274658203125, -0.2662467956542969, -0.25783538818359375, -0.24942398071289062, -0.2410125732421875, -0.23260116577148438, -0.22418975830078125, -0.21577835083007812, -0.207366943359375, -0.19895553588867188, -0.19054412841796875, -0.18213272094726562, -0.1737213134765625, -0.16530990600585938, -0.15689849853515625, -0.14848709106445312, -0.14007568359375, -0.13166427612304688, -0.12325286865234375, -0.11484146118164062, -0.1064300537109375, -0.09801864624023438, -0.08960723876953125, -0.08119583129882812, -0.072784423828125, -0.06437301635742188, -0.05596160888671875, -0.047550201416015625, -0.0391387939453125, -0.030727386474609375, -0.02231597900390625, -0.013904571533203125, -0.0054931640625, 0.002918243408203125, 0.01132965087890625, 0.019741058349609375, 0.0281524658203125, 0.036563873291015625, 0.04497528076171875, 0.053386688232421875, 0.061798095703125, 0.07020950317382812, 0.07862091064453125, 0.08703231811523438, 0.0954437255859375, 0.10385513305664062, 0.11226654052734375, 0.12067794799804688, 0.12908935546875, 0.13750076293945312, 0.14591217041015625, 0.15432357788085938, 0.1627349853515625, 0.17114639282226562, 0.17955780029296875, 0.18796920776367188, 0.196380615234375, 0.20479202270507812, 0.21320343017578125, 0.22161483764648438, 0.2300262451171875, 0.23843765258789062, 0.24684906005859375, 0.2552604675292969, 0.263671875]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 2.0, 5.0, 2.0, 1.0, 3.0, 3.0, 7.0, 6.0, 8.0, 9.0, 16.0, 11.0, 20.0, 26.0, 29.0, 44.0, 44.0, 51.0, 60.0, 59.0, 72.0, 73.0, 69.0, 75.0, 41.0, 62.0, 33.0, 36.0, 16.0, 22.0, 20.0, 15.0, 8.0, 11.0, 5.0, 11.0, 5.0, 4.0, 11.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.7756900787353516e-05, -5.570054054260254e-05, -5.364418029785156e-05, -5.1587820053100586e-05, -4.953145980834961e-05, -4.747509956359863e-05, -4.5418739318847656e-05, -4.336237907409668e-05, -4.13060188293457e-05, -3.9249658584594727e-05, -3.719329833984375e-05, -3.5136938095092773e-05, -3.30805778503418e-05, -3.102421760559082e-05, -2.8967857360839844e-05, -2.6911497116088867e-05, -2.485513687133789e-05, -2.2798776626586914e-05, -2.0742416381835938e-05, -1.868605613708496e-05, -1.6629695892333984e-05, -1.4573335647583008e-05, -1.2516975402832031e-05, -1.0460615158081055e-05, -8.404254913330078e-06, -6.3478946685791016e-06, -4.291534423828125e-06, -2.2351741790771484e-06, -1.7881393432617188e-07, 1.8775463104248047e-06, 3.933906555175781e-06, 5.990266799926758e-06, 8.046627044677734e-06, 1.0102987289428711e-05, 1.2159347534179688e-05, 1.4215707778930664e-05, 1.627206802368164e-05, 1.8328428268432617e-05, 2.0384788513183594e-05, 2.244114875793457e-05, 2.4497509002685547e-05, 2.6553869247436523e-05, 2.86102294921875e-05, 3.0666589736938477e-05, 3.272294998168945e-05, 3.477931022644043e-05, 3.6835670471191406e-05, 3.889203071594238e-05, 4.094839096069336e-05, 4.3004751205444336e-05, 4.506111145019531e-05, 4.711747169494629e-05, 4.9173831939697266e-05, 5.123019218444824e-05, 5.328655242919922e-05, 5.5342912673950195e-05, 5.739927291870117e-05, 5.945563316345215e-05, 6.151199340820312e-05, 6.35683536529541e-05, 6.562471389770508e-05, 6.768107414245605e-05, 6.973743438720703e-05, 7.179379463195801e-05, 7.385015487670898e-05]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 5.0, 0.0, 4.0, 9.0, 5.0, 9.0, 9.0, 12.0, 30.0, 45.0, 53.0, 91.0, 135.0, 237.0, 364.0, 731.0, 1394.0, 3183.0, 9380.0, 37882.0, 184714.0, 613579.0, 151388.0, 31429.0, 8094.0, 2869.0, 1233.0, 683.0, 392.0, 197.0, 124.0, 87.0, 57.0, 40.0, 29.0, 22.0, 13.0, 12.0, 6.0, 5.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.310791015625, -0.3022613525390625, -0.293731689453125, -0.2852020263671875, -0.27667236328125, -0.2681427001953125, -0.259613037109375, -0.2510833740234375, -0.2425537109375, -0.2340240478515625, -0.225494384765625, -0.2169647216796875, -0.20843505859375, -0.1999053955078125, -0.191375732421875, -0.1828460693359375, -0.17431640625, -0.1657867431640625, -0.157257080078125, -0.1487274169921875, -0.14019775390625, -0.1316680908203125, -0.123138427734375, -0.1146087646484375, -0.1060791015625, -0.0975494384765625, -0.089019775390625, -0.0804901123046875, -0.07196044921875, -0.0634307861328125, -0.054901123046875, -0.0463714599609375, -0.037841796875, -0.0293121337890625, -0.020782470703125, -0.0122528076171875, -0.00372314453125, 0.0048065185546875, 0.013336181640625, 0.0218658447265625, 0.0303955078125, 0.0389251708984375, 0.047454833984375, 0.0559844970703125, 0.06451416015625, 0.0730438232421875, 0.081573486328125, 0.0901031494140625, 0.0986328125, 0.1071624755859375, 0.115692138671875, 0.1242218017578125, 0.13275146484375, 0.1412811279296875, 0.149810791015625, 0.1583404541015625, 0.1668701171875, 0.1753997802734375, 0.183929443359375, 0.1924591064453125, 0.20098876953125, 0.2095184326171875, 0.218048095703125, 0.2265777587890625, 0.235107421875]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 5.0, 8.0, 6.0, 11.0, 8.0, 16.0, 12.0, 19.0, 20.0, 26.0, 31.0, 39.0, 36.0, 56.0, 52.0, 61.0, 62.0, 68.0, 55.0, 62.0, 51.0, 41.0, 48.0, 41.0, 28.0, 30.0, 20.0, 17.0, 16.0, 4.0, 7.0, 14.0, 9.0, 4.0, 10.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.273681640625, -0.26654815673828125, -0.2594146728515625, -0.25228118896484375, -0.245147705078125, -0.23801422119140625, -0.2308807373046875, -0.22374725341796875, -0.21661376953125, -0.20948028564453125, -0.2023468017578125, -0.19521331787109375, -0.188079833984375, -0.18094635009765625, -0.1738128662109375, -0.16667938232421875, -0.1595458984375, -0.15241241455078125, -0.1452789306640625, -0.13814544677734375, -0.131011962890625, -0.12387847900390625, -0.1167449951171875, -0.10961151123046875, -0.10247802734375, -0.09534454345703125, -0.0882110595703125, -0.08107757568359375, -0.073944091796875, -0.06681060791015625, -0.0596771240234375, -0.05254364013671875, -0.04541015625, -0.03827667236328125, -0.0311431884765625, -0.02400970458984375, -0.016876220703125, -0.00974273681640625, -0.0026092529296875, 0.00452423095703125, 0.01165771484375, 0.01879119873046875, 0.0259246826171875, 0.03305816650390625, 0.040191650390625, 0.04732513427734375, 0.0544586181640625, 0.06159210205078125, 0.0687255859375, 0.07585906982421875, 0.0829925537109375, 0.09012603759765625, 0.097259521484375, 0.10439300537109375, 0.1115264892578125, 0.11865997314453125, 0.12579345703125, 0.13292694091796875, 0.1400604248046875, 0.14719390869140625, 0.154327392578125, 0.16146087646484375, 0.1685943603515625, 0.17572784423828125, 0.182861328125]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 1.0, 3.0, 18.0, 14.0, 49.0, 128.0, 243.0, 320.0, 143.0, 55.0, 15.0, 4.0, 4.0, 2.0, 0.0, 3.0], "bins": [-20.081356048583984, -19.719009399414062, -19.356664657592773, -18.99431800842285, -18.63197135925293, -18.26962661743164, -17.90727996826172, -17.544933319091797, -17.182586669921875, -16.820240020751953, -16.457895278930664, -16.095548629760742, -15.73320198059082, -15.370856285095215, -15.00851058959961, -14.646163940429688, -14.283819198608398, -13.921473503112793, -13.559126853942871, -13.196781158447266, -12.834434509277344, -12.472088813781738, -12.109743118286133, -11.747396469116211, -11.385049819946289, -11.022704124450684, -10.660357475280762, -10.298011779785156, -9.935665130615234, -9.573319435119629, -9.210973739624023, -8.848627090454102, -8.486282348632812, -8.123936653137207, -7.761590003967285, -7.39924430847168, -7.036898136138916, -6.674551963806152, -6.312205791473389, -5.949859619140625, -5.587512969970703, -5.2251667976379395, -4.862820625305176, -4.50047492980957, -4.138128757476807, -3.775782585144043, -3.4134364128112793, -3.0510904788970947, -2.68874454498291, -2.3263983726501465, -1.964052438735962, -1.6017062664031982, -1.2393602132797241, -0.87701416015625, -0.5146679878234863, -0.15232205390930176, 0.21002411842346191, 0.572370171546936, 0.9347162842750549, 1.2970623970031738, 1.659408450126648, 2.021754503250122, 2.3841006755828857, 2.7464466094970703, 3.108792781829834]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 5.0, 2.0, 6.0, 9.0, 8.0, 11.0, 12.0, 16.0, 14.0, 28.0, 24.0, 27.0, 48.0, 34.0, 43.0, 48.0, 57.0, 71.0, 89.0, 74.0, 51.0, 52.0, 49.0, 27.0, 41.0, 17.0, 20.0, 17.0, 21.0, 14.0, 11.0, 15.0, 9.0, 4.0, 11.0, 5.0, 5.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.043880939483643, -4.883065223693848, -4.7222490310668945, -4.5614333152771, -4.400617599487305, -4.23980188369751, -4.078986167907715, -3.9181699752807617, -3.757354259490967, -3.596538543701172, -3.435722589492798, -3.274906635284424, -3.114090919494629, -2.953275203704834, -2.79245924949646, -2.631643295288086, -2.470827579498291, -2.310011863708496, -2.149195909500122, -1.9883800745010376, -1.8275642395019531, -1.6667484045028687, -1.5059325695037842, -1.3451167345046997, -1.1843008995056152, -1.0234850645065308, -0.8626692295074463, -0.7018533945083618, -0.5410375595092773, -0.38022172451019287, -0.2194058895111084, -0.058590054512023926, 0.10222625732421875, 0.2630420923233032, 0.4238579273223877, 0.5846737623214722, 0.7454895973205566, 0.9063054323196411, 1.0671212673187256, 1.22793710231781, 1.3887529373168945, 1.549568772315979, 1.7103846073150635, 1.871200442314148, 2.0320162773132324, 2.1928319931030273, 2.3536479473114014, 2.5144639015197754, 2.6752796173095703, 2.8360953330993652, 2.9969112873077393, 3.1577272415161133, 3.318542957305908, 3.479358673095703, 3.640174627304077, 3.800990581512451, 3.961806297302246, 4.122622013092041, 4.283437728881836, 4.444253921508789, 4.605069637298584, 4.765885353088379, 4.926701545715332, 5.087517261505127, 5.248332977294922]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 8.0, 4.0, 9.0, 10.0, 21.0, 35.0, 48.0, 81.0, 156.0, 254.0, 544.0, 1508.0, 4665.0, 22190.0, 232088.0, 3267693.0, 614942.0, 38892.0, 7448.0, 2128.0, 776.0, 321.0, 172.0, 100.0, 71.0, 37.0, 25.0, 15.0, 7.0, 14.0, 5.0, 6.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58642578125, -0.5626296997070312, -0.5388336181640625, -0.5150375366210938, -0.491241455078125, -0.46744537353515625, -0.4436492919921875, -0.41985321044921875, -0.39605712890625, -0.37226104736328125, -0.3484649658203125, -0.32466888427734375, -0.300872802734375, -0.27707672119140625, -0.2532806396484375, -0.22948455810546875, -0.2056884765625, -0.18189239501953125, -0.1580963134765625, -0.13430023193359375, -0.110504150390625, -0.08670806884765625, -0.0629119873046875, -0.03911590576171875, -0.01531982421875, 0.00847625732421875, 0.0322723388671875, 0.05606842041015625, 0.079864501953125, 0.10366058349609375, 0.1274566650390625, 0.15125274658203125, 0.175048828125, 0.19884490966796875, 0.2226409912109375, 0.24643707275390625, 0.270233154296875, 0.29402923583984375, 0.3178253173828125, 0.34162139892578125, 0.36541748046875, 0.38921356201171875, 0.4130096435546875, 0.43680572509765625, 0.460601806640625, 0.48439788818359375, 0.5081939697265625, 0.5319900512695312, 0.5557861328125, 0.5795822143554688, 0.6033782958984375, 0.6271743774414062, 0.650970458984375, 0.6747665405273438, 0.6985626220703125, 0.7223587036132812, 0.74615478515625, 0.7699508666992188, 0.7937469482421875, 0.8175430297851562, 0.841339111328125, 0.8651351928710938, 0.8889312744140625, 0.9127273559570312, 0.9365234375]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 6.0, 4.0, 11.0, 11.0, 11.0, 15.0, 15.0, 22.0, 23.0, 21.0, 31.0, 21.0, 36.0, 46.0, 42.0, 65.0, 61.0, 49.0, 51.0, 45.0, 46.0, 36.0, 60.0, 38.0, 45.0, 39.0, 24.0, 24.0, 21.0, 14.0, 11.0, 14.0, 12.0, 11.0, 5.0, 1.0, 4.0, 3.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.333984375, -0.3247489929199219, -0.31551361083984375, -0.3062782287597656, -0.2970428466796875, -0.2878074645996094, -0.27857208251953125, -0.2693367004394531, -0.260101318359375, -0.2508659362792969, -0.24163055419921875, -0.23239517211914062, -0.2231597900390625, -0.21392440795898438, -0.20468902587890625, -0.19545364379882812, -0.18621826171875, -0.17698287963867188, -0.16774749755859375, -0.15851211547851562, -0.1492767333984375, -0.14004135131835938, -0.13080596923828125, -0.12157058715820312, -0.112335205078125, -0.10309982299804688, -0.09386444091796875, -0.08462905883789062, -0.0753936767578125, -0.06615829467773438, -0.05692291259765625, -0.047687530517578125, -0.0384521484375, -0.029216766357421875, -0.01998138427734375, -0.010746002197265625, -0.0015106201171875, 0.007724761962890625, 0.01696014404296875, 0.026195526123046875, 0.035430908203125, 0.044666290283203125, 0.05390167236328125, 0.06313705444335938, 0.0723724365234375, 0.08160781860351562, 0.09084320068359375, 0.10007858276367188, 0.10931396484375, 0.11854934692382812, 0.12778472900390625, 0.13702011108398438, 0.1462554931640625, 0.15549087524414062, 0.16472625732421875, 0.17396163940429688, 0.183197021484375, 0.19243240356445312, 0.20166778564453125, 0.21090316772460938, 0.2201385498046875, 0.22937393188476562, 0.23860931396484375, 0.24784469604492188, 0.257080078125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 12.0, 11.0, 17.0, 32.0, 49.0, 72.0, 155.0, 323.0, 560.0, 1533.0, 5149.0, 24859.0, 316449.0, 3642266.0, 179366.0, 17410.0, 3765.0, 1150.0, 527.0, 258.0, 135.0, 68.0, 34.0, 32.0, 14.0, 14.0, 8.0, 4.0, 3.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.052734375, -1.0221481323242188, -0.9915618896484375, -0.9609756469726562, -0.930389404296875, -0.8998031616210938, -0.8692169189453125, -0.8386306762695312, -0.80804443359375, -0.7774581909179688, -0.7468719482421875, -0.7162857055664062, -0.685699462890625, -0.6551132202148438, -0.6245269775390625, -0.5939407348632812, -0.5633544921875, -0.5327682495117188, -0.5021820068359375, -0.47159576416015625, -0.441009521484375, -0.41042327880859375, -0.3798370361328125, -0.34925079345703125, -0.31866455078125, -0.28807830810546875, -0.2574920654296875, -0.22690582275390625, -0.196319580078125, -0.16573333740234375, -0.1351470947265625, -0.10456085205078125, -0.073974609375, -0.04338836669921875, -0.0128021240234375, 0.01778411865234375, 0.048370361328125, 0.07895660400390625, 0.1095428466796875, 0.14012908935546875, 0.17071533203125, 0.20130157470703125, 0.2318878173828125, 0.26247406005859375, 0.293060302734375, 0.32364654541015625, 0.3542327880859375, 0.38481903076171875, 0.4154052734375, 0.44599151611328125, 0.4765777587890625, 0.5071640014648438, 0.537750244140625, 0.5683364868164062, 0.5989227294921875, 0.6295089721679688, 0.66009521484375, 0.6906814575195312, 0.7212677001953125, 0.7518539428710938, 0.782440185546875, 0.8130264282226562, 0.8436126708984375, 0.8741989135742188, 0.90478515625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 8.0, 4.0, 12.0, 7.0, 10.0, 7.0, 17.0, 19.0, 32.0, 45.0, 74.0, 129.0, 182.0, 349.0, 526.0, 688.0, 722.0, 497.0, 292.0, 132.0, 116.0, 62.0, 44.0, 32.0, 20.0, 12.0, 13.0, 7.0, 3.0, 4.0, 0.0, 3.0, 3.0, 1.0, 0.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.92236328125, -0.8986053466796875, -0.874847412109375, -0.8510894775390625, -0.82733154296875, -0.8035736083984375, -0.779815673828125, -0.7560577392578125, -0.7322998046875, -0.7085418701171875, -0.684783935546875, -0.6610260009765625, -0.63726806640625, -0.6135101318359375, -0.589752197265625, -0.5659942626953125, -0.542236328125, -0.5184783935546875, -0.494720458984375, -0.4709625244140625, -0.44720458984375, -0.4234466552734375, -0.399688720703125, -0.3759307861328125, -0.3521728515625, -0.3284149169921875, -0.304656982421875, -0.2808990478515625, -0.25714111328125, -0.2333831787109375, -0.209625244140625, -0.1858673095703125, -0.162109375, -0.1383514404296875, -0.114593505859375, -0.0908355712890625, -0.06707763671875, -0.0433197021484375, -0.019561767578125, 0.0041961669921875, 0.0279541015625, 0.0517120361328125, 0.075469970703125, 0.0992279052734375, 0.12298583984375, 0.1467437744140625, 0.170501708984375, 0.1942596435546875, 0.218017578125, 0.2417755126953125, 0.265533447265625, 0.2892913818359375, 0.31304931640625, 0.3368072509765625, 0.360565185546875, 0.3843231201171875, 0.4080810546875, 0.4318389892578125, 0.455596923828125, 0.4793548583984375, 0.50311279296875, 0.5268707275390625, 0.550628662109375, 0.5743865966796875, 0.59814453125]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 3.0, 6.0, 14.0, 31.0, 68.0, 120.0, 167.0, 187.0, 183.0, 100.0, 52.0, 30.0, 16.0, 5.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.0898027420043945, -4.85304594039917, -4.616289138793945, -4.379532337188721, -4.142775535583496, -3.9060187339782715, -3.669261932373047, -3.4325051307678223, -3.1957483291625977, -2.958991527557373, -2.7222347259521484, -2.485477924346924, -2.248721122741699, -2.0119643211364746, -1.77520751953125, -1.5384507179260254, -1.3016939163208008, -1.0649371147155762, -0.8281803131103516, -0.591423511505127, -0.35466670989990234, -0.11790990829467773, 0.11884689331054688, 0.3556036949157715, 0.5923604965209961, 0.8291172981262207, 1.0658740997314453, 1.30263090133667, 1.5393877029418945, 1.7761445045471191, 2.0129013061523438, 2.2496581077575684, 2.486414909362793, 2.7231717109680176, 2.959928512573242, 3.196685314178467, 3.4334421157836914, 3.670198917388916, 3.9069557189941406, 4.143712520599365, 4.38046932220459, 4.6172261238098145, 4.853982925415039, 5.090739727020264, 5.327496528625488, 5.564253330230713, 5.8010101318359375, 6.037766933441162, 6.274523735046387, 6.511280536651611, 6.748037338256836, 6.9847941398620605, 7.221550941467285, 7.45830774307251, 7.695064544677734, 7.931821346282959, 8.168578147888184, 8.40533447265625, 8.642091751098633, 8.878849029541016, 9.115605354309082, 9.352361679077148, 9.589118957519531, 9.825876235961914, 10.06263256072998]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 6.0, 4.0, 6.0, 14.0, 13.0, 16.0, 21.0, 27.0, 39.0, 35.0, 41.0, 44.0, 51.0, 55.0, 91.0, 50.0, 62.0, 55.0, 48.0, 54.0, 57.0, 40.0, 30.0, 34.0, 28.0, 21.0, 15.0, 10.0, 12.0, 6.0, 6.0, 1.0, 3.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.12051248550415, -3.9956350326538086, -3.870757818222046, -3.745880603790283, -3.6210031509399414, -3.4961256980895996, -3.371248483657837, -3.246371269226074, -3.1214938163757324, -2.9966163635253906, -2.871739149093628, -2.7468619346618652, -2.6219844818115234, -2.4971070289611816, -2.372229814529419, -2.2473526000976562, -2.1224751472473145, -1.9975978136062622, -1.87272047996521, -1.7478431463241577, -1.6229658126831055, -1.4980884790420532, -1.373211145401001, -1.2483338117599487, -1.1234564781188965, -0.9985791444778442, -0.873701810836792, -0.7488244771957397, -0.6239471435546875, -0.49906980991363525, -0.374192476272583, -0.24931514263153076, -0.12443804740905762, 0.0004392862319946289, 0.12531661987304688, 0.2501939535140991, 0.37507128715515137, 0.4999486207962036, 0.6248259544372559, 0.7497032880783081, 0.8745806217193604, 0.9994579553604126, 1.1243352890014648, 1.249212622642517, 1.3740899562835693, 1.4989672899246216, 1.6238446235656738, 1.748721957206726, 1.8735992908477783, 1.9984766244888306, 2.123353958129883, 2.2482314109802246, 2.3731086254119873, 2.49798583984375, 2.622863292694092, 2.7477407455444336, 2.8726179599761963, 2.997495174407959, 3.122372627258301, 3.2472500801086426, 3.3721272945404053, 3.497004508972168, 3.6218819618225098, 3.7467594146728516, 3.8716366291046143]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 2.0, 6.0, 5.0, 3.0, 9.0, 9.0, 15.0, 19.0, 24.0, 36.0, 72.0, 97.0, 198.0, 367.0, 700.0, 1501.0, 3333.0, 9044.0, 28027.0, 97554.0, 293362.0, 382472.0, 161451.0, 46940.0, 13984.0, 5029.0, 2169.0, 972.0, 521.0, 262.0, 131.0, 97.0, 51.0, 29.0, 16.0, 13.0, 8.0, 8.0, 9.0, 3.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.7607421875, -0.7391357421875, -0.717529296875, -0.6959228515625, -0.67431640625, -0.6527099609375, -0.631103515625, -0.6094970703125, -0.587890625, -0.5662841796875, -0.544677734375, -0.5230712890625, -0.50146484375, -0.4798583984375, -0.458251953125, -0.4366455078125, -0.4150390625, -0.3934326171875, -0.371826171875, -0.3502197265625, -0.32861328125, -0.3070068359375, -0.285400390625, -0.2637939453125, -0.2421875, -0.2205810546875, -0.198974609375, -0.1773681640625, -0.15576171875, -0.1341552734375, -0.112548828125, -0.0909423828125, -0.0693359375, -0.0477294921875, -0.026123046875, -0.0045166015625, 0.01708984375, 0.0386962890625, 0.060302734375, 0.0819091796875, 0.103515625, 0.1251220703125, 0.146728515625, 0.1683349609375, 0.18994140625, 0.2115478515625, 0.233154296875, 0.2547607421875, 0.2763671875, 0.2979736328125, 0.319580078125, 0.3411865234375, 0.36279296875, 0.3843994140625, 0.406005859375, 0.4276123046875, 0.44921875, 0.4708251953125, 0.492431640625, 0.5140380859375, 0.53564453125, 0.5572509765625, 0.578857421875, 0.6004638671875, 0.6220703125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 6.0, 2.0, 6.0, 2.0, 6.0, 10.0, 14.0, 11.0, 17.0, 23.0, 27.0, 35.0, 30.0, 38.0, 41.0, 41.0, 51.0, 60.0, 53.0, 71.0, 41.0, 64.0, 38.0, 45.0, 43.0, 39.0, 35.0, 28.0, 31.0, 21.0, 15.0, 13.0, 10.0, 11.0, 12.0, 9.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.344482421875, -0.33420562744140625, -0.3239288330078125, -0.31365203857421875, -0.303375244140625, -0.29309844970703125, -0.2828216552734375, -0.27254486083984375, -0.26226806640625, -0.25199127197265625, -0.2417144775390625, -0.23143768310546875, -0.221160888671875, -0.21088409423828125, -0.2006072998046875, -0.19033050537109375, -0.1800537109375, -0.16977691650390625, -0.1595001220703125, -0.14922332763671875, -0.138946533203125, -0.12866973876953125, -0.1183929443359375, -0.10811614990234375, -0.09783935546875, -0.08756256103515625, -0.0772857666015625, -0.06700897216796875, -0.056732177734375, -0.04645538330078125, -0.0361785888671875, -0.02590179443359375, -0.015625, -0.00534820556640625, 0.0049285888671875, 0.01520538330078125, 0.025482177734375, 0.03575897216796875, 0.0460357666015625, 0.05631256103515625, 0.06658935546875, 0.07686614990234375, 0.0871429443359375, 0.09741973876953125, 0.107696533203125, 0.11797332763671875, 0.1282501220703125, 0.13852691650390625, 0.1488037109375, 0.15908050537109375, 0.1693572998046875, 0.17963409423828125, 0.189910888671875, 0.20018768310546875, 0.2104644775390625, 0.22074127197265625, 0.23101806640625, 0.24129486083984375, 0.2515716552734375, 0.26184844970703125, 0.272125244140625, 0.28240203857421875, 0.2926788330078125, 0.30295562744140625, 0.313232421875]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 5.0, 2.0, 6.0, 5.0, 5.0, 10.0, 7.0, 11.0, 11.0, 23.0, 26.0, 25.0, 44.0, 63.0, 91.0, 118.0, 252.0, 465.0, 984.0, 2740.0, 10557.0, 52131.0, 284272.0, 580017.0, 92336.0, 17556.0, 4143.0, 1324.0, 540.0, 246.0, 159.0, 112.0, 72.0, 41.0, 44.0, 33.0, 24.0, 12.0, 14.0, 6.0, 6.0, 6.0, 6.0, 4.0, 4.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.7841796875, -0.75531005859375, -0.7264404296875, -0.69757080078125, -0.668701171875, -0.63983154296875, -0.6109619140625, -0.58209228515625, -0.55322265625, -0.52435302734375, -0.4954833984375, -0.46661376953125, -0.437744140625, -0.40887451171875, -0.3800048828125, -0.35113525390625, -0.322265625, -0.29339599609375, -0.2645263671875, -0.23565673828125, -0.206787109375, -0.17791748046875, -0.1490478515625, -0.12017822265625, -0.09130859375, -0.06243896484375, -0.0335693359375, -0.00469970703125, 0.024169921875, 0.05303955078125, 0.0819091796875, 0.11077880859375, 0.1396484375, 0.16851806640625, 0.1973876953125, 0.22625732421875, 0.255126953125, 0.28399658203125, 0.3128662109375, 0.34173583984375, 0.37060546875, 0.39947509765625, 0.4283447265625, 0.45721435546875, 0.486083984375, 0.51495361328125, 0.5438232421875, 0.57269287109375, 0.6015625, 0.63043212890625, 0.6593017578125, 0.68817138671875, 0.717041015625, 0.74591064453125, 0.7747802734375, 0.80364990234375, 0.83251953125, 0.86138916015625, 0.8902587890625, 0.91912841796875, 0.947998046875, 0.97686767578125, 1.0057373046875, 1.03460693359375, 1.0634765625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [5.0, 2.0, 0.0, 2.0, 2.0, 7.0, 5.0, 5.0, 2.0, 2.0, 7.0, 15.0, 7.0, 14.0, 12.0, 16.0, 23.0, 18.0, 22.0, 21.0, 21.0, 33.0, 27.0, 22.0, 44.0, 37.0, 41.0, 41.0, 31.0, 45.0, 53.0, 48.0, 28.0, 36.0, 42.0, 23.0, 43.0, 22.0, 25.0, 20.0, 25.0, 22.0, 18.0, 13.0, 9.0, 8.0, 7.0, 9.0, 8.0, 8.0, 4.0, 2.0, 4.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.82568359375, -0.797943115234375, -0.77020263671875, -0.742462158203125, -0.7147216796875, -0.686981201171875, -0.65924072265625, -0.631500244140625, -0.603759765625, -0.576019287109375, -0.54827880859375, -0.520538330078125, -0.4927978515625, -0.465057373046875, -0.43731689453125, -0.409576416015625, -0.3818359375, -0.354095458984375, -0.32635498046875, -0.298614501953125, -0.2708740234375, -0.243133544921875, -0.21539306640625, -0.187652587890625, -0.159912109375, -0.132171630859375, -0.10443115234375, -0.076690673828125, -0.0489501953125, -0.021209716796875, 0.00653076171875, 0.034271240234375, 0.06201171875, 0.089752197265625, 0.11749267578125, 0.145233154296875, 0.1729736328125, 0.200714111328125, 0.22845458984375, 0.256195068359375, 0.283935546875, 0.311676025390625, 0.33941650390625, 0.367156982421875, 0.3948974609375, 0.422637939453125, 0.45037841796875, 0.478118896484375, 0.505859375, 0.533599853515625, 0.56134033203125, 0.589080810546875, 0.6168212890625, 0.644561767578125, 0.67230224609375, 0.700042724609375, 0.727783203125, 0.755523681640625, 0.78326416015625, 0.811004638671875, 0.8387451171875, 0.866485595703125, 0.89422607421875, 0.921966552734375, 0.94970703125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 4.0, 7.0, 12.0, 11.0, 23.0, 51.0, 54.0, 107.0, 199.0, 513.0, 1148.0, 3274.0, 9840.0, 34891.0, 138254.0, 585450.0, 203091.0, 50680.0, 13853.0, 4364.0, 1496.0, 609.0, 274.0, 131.0, 78.0, 55.0, 32.0, 16.0, 10.0, 13.0, 6.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2978515625, -0.2897224426269531, -0.28159332275390625, -0.2734642028808594, -0.2653350830078125, -0.2572059631347656, -0.24907684326171875, -0.24094772338867188, -0.232818603515625, -0.22468948364257812, -0.21656036376953125, -0.20843124389648438, -0.2003021240234375, -0.19217300415039062, -0.18404388427734375, -0.17591476440429688, -0.16778564453125, -0.15965652465820312, -0.15152740478515625, -0.14339828491210938, -0.1352691650390625, -0.12714004516601562, -0.11901092529296875, -0.11088180541992188, -0.102752685546875, -0.09462356567382812, -0.08649444580078125, -0.07836532592773438, -0.0702362060546875, -0.062107086181640625, -0.05397796630859375, -0.045848846435546875, -0.0377197265625, -0.029590606689453125, -0.02146148681640625, -0.013332366943359375, -0.0052032470703125, 0.002925872802734375, 0.01105499267578125, 0.019184112548828125, 0.027313232421875, 0.035442352294921875, 0.04357147216796875, 0.051700592041015625, 0.0598297119140625, 0.06795883178710938, 0.07608795166015625, 0.08421707153320312, 0.09234619140625, 0.10047531127929688, 0.10860443115234375, 0.11673355102539062, 0.1248626708984375, 0.13299179077148438, 0.14112091064453125, 0.14925003051757812, 0.157379150390625, 0.16550827026367188, 0.17363739013671875, 0.18176651000976562, 0.1898956298828125, 0.19802474975585938, 0.20615386962890625, 0.21428298950195312, 0.222412109375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 12.0, 8.0, 13.0, 11.0, 18.0, 22.0, 26.0, 53.0, 60.0, 80.0, 112.0, 91.0, 88.0, 92.0, 71.0, 59.0, 60.0, 24.0, 34.0, 24.0, 11.0, 13.0, 8.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.104873657226562e-05, -6.851367652416229e-05, -6.597861647605896e-05, -6.344355642795563e-05, -6.0908496379852295e-05, -5.837343633174896e-05, -5.583837628364563e-05, -5.33033162355423e-05, -5.0768256187438965e-05, -4.823319613933563e-05, -4.56981360912323e-05, -4.316307604312897e-05, -4.0628015995025635e-05, -3.80929559469223e-05, -3.555789589881897e-05, -3.302283585071564e-05, -3.0487775802612305e-05, -2.7952715754508972e-05, -2.541765570640564e-05, -2.2882595658302307e-05, -2.0347535610198975e-05, -1.7812475562095642e-05, -1.527741551399231e-05, -1.2742355465888977e-05, -1.0207295417785645e-05, -7.672235369682312e-06, -5.1371753215789795e-06, -2.602115273475647e-06, -6.705522537231445e-08, 2.468004822731018e-06, 5.003064870834351e-06, 7.538124918937683e-06, 1.0073184967041016e-05, 1.2608245015144348e-05, 1.514330506324768e-05, 1.7678365111351013e-05, 2.0213425159454346e-05, 2.2748485207557678e-05, 2.528354525566101e-05, 2.7818605303764343e-05, 3.0353665351867676e-05, 3.288872539997101e-05, 3.542378544807434e-05, 3.795884549617767e-05, 4.0493905544281006e-05, 4.302896559238434e-05, 4.556402564048767e-05, 4.8099085688591003e-05, 5.0634145736694336e-05, 5.316920578479767e-05, 5.5704265832901e-05, 5.8239325881004333e-05, 6.0774385929107666e-05, 6.3309445977211e-05, 6.584450602531433e-05, 6.837956607341766e-05, 7.0914626121521e-05, 7.344968616962433e-05, 7.598474621772766e-05, 7.8519806265831e-05, 8.105486631393433e-05, 8.358992636203766e-05, 8.612498641014099e-05, 8.866004645824432e-05, 9.119510650634766e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 1.0, 3.0, 10.0, 5.0, 18.0, 15.0, 20.0, 34.0, 70.0, 142.0, 355.0, 880.0, 3080.0, 16347.0, 124212.0, 708727.0, 167404.0, 21554.0, 3868.0, 1019.0, 401.0, 156.0, 90.0, 58.0, 25.0, 16.0, 20.0, 8.0, 9.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4814453125, -0.468963623046875, -0.45648193359375, -0.444000244140625, -0.4315185546875, -0.419036865234375, -0.40655517578125, -0.394073486328125, -0.381591796875, -0.369110107421875, -0.35662841796875, -0.344146728515625, -0.3316650390625, -0.319183349609375, -0.30670166015625, -0.294219970703125, -0.28173828125, -0.269256591796875, -0.25677490234375, -0.244293212890625, -0.2318115234375, -0.219329833984375, -0.20684814453125, -0.194366455078125, -0.181884765625, -0.169403076171875, -0.15692138671875, -0.144439697265625, -0.1319580078125, -0.119476318359375, -0.10699462890625, -0.094512939453125, -0.08203125, -0.069549560546875, -0.05706787109375, -0.044586181640625, -0.0321044921875, -0.019622802734375, -0.00714111328125, 0.005340576171875, 0.017822265625, 0.030303955078125, 0.04278564453125, 0.055267333984375, 0.0677490234375, 0.080230712890625, 0.09271240234375, 0.105194091796875, 0.11767578125, 0.130157470703125, 0.14263916015625, 0.155120849609375, 0.1676025390625, 0.180084228515625, 0.19256591796875, 0.205047607421875, 0.217529296875, 0.230010986328125, 0.24249267578125, 0.254974365234375, 0.2674560546875, 0.279937744140625, 0.29241943359375, 0.304901123046875, 0.3173828125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 2.0, 6.0, 4.0, 11.0, 8.0, 20.0, 19.0, 28.0, 32.0, 37.0, 38.0, 51.0, 60.0, 56.0, 65.0, 70.0, 82.0, 60.0, 62.0, 62.0, 44.0, 38.0, 27.0, 17.0, 22.0, 23.0, 14.0, 8.0, 13.0, 8.0, 5.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.23779296875, -0.22983932495117188, -0.22188568115234375, -0.21393203735351562, -0.2059783935546875, -0.19802474975585938, -0.19007110595703125, -0.18211746215820312, -0.174163818359375, -0.16621017456054688, -0.15825653076171875, -0.15030288696289062, -0.1423492431640625, -0.13439559936523438, -0.12644195556640625, -0.11848831176757812, -0.11053466796875, -0.10258102416992188, -0.09462738037109375, -0.08667373657226562, -0.0787200927734375, -0.07076644897460938, -0.06281280517578125, -0.054859161376953125, -0.046905517578125, -0.038951873779296875, -0.03099822998046875, -0.023044586181640625, -0.0150909423828125, -0.007137298583984375, 0.00081634521484375, 0.008769989013671875, 0.0167236328125, 0.024677276611328125, 0.03263092041015625, 0.040584564208984375, 0.0485382080078125, 0.056491851806640625, 0.06444549560546875, 0.07239913940429688, 0.080352783203125, 0.08830642700195312, 0.09626007080078125, 0.10421371459960938, 0.1121673583984375, 0.12012100219726562, 0.12807464599609375, 0.13602828979492188, 0.14398193359375, 0.15193557739257812, 0.15988922119140625, 0.16784286499023438, 0.1757965087890625, 0.18375015258789062, 0.19170379638671875, 0.19965744018554688, 0.207611083984375, 0.21556472778320312, 0.22351837158203125, 0.23147201538085938, 0.2394256591796875, 0.24737930297851562, 0.25533294677734375, 0.2632865905761719, 0.271240234375]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 10.0, 12.0, 22.0, 75.0, 130.0, 308.0, 255.0, 120.0, 43.0, 13.0, 2.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.501377105712891, -7.187282085418701, -6.873187065124512, -6.559092044830322, -6.244997024536133, -5.930902004241943, -5.616806983947754, -5.302712440490723, -4.988616943359375, -4.6745219230651855, -4.360426902770996, -4.046331882476807, -3.732236862182617, -3.4181418418884277, -3.1040470600128174, -2.789952039718628, -2.4758572578430176, -2.161762237548828, -1.8476672172546387, -1.5335723161697388, -1.2194772958755493, -0.9053822755813599, -0.59128737449646, -0.2771923542022705, 0.036902666091918945, 0.350997656583786, 0.6650926470756531, 0.9791876077651978, 1.2932826280593872, 1.6073776483535767, 1.9214725494384766, 2.235567569732666, 2.5496625900268555, 2.863757610321045, 3.1778526306152344, 3.491947650909424, 3.8060426712036133, 4.120137691497803, 4.434232711791992, 4.748327255249023, 5.062422752380371, 5.3765177726745605, 5.69061279296875, 6.0047078132629395, 6.318802833557129, 6.632897853851318, 6.946992874145508, 7.261087417602539, 7.5751824378967285, 7.889277458190918, 8.20337200164795, 8.517467498779297, 8.831562042236328, 9.145657539367676, 9.459752082824707, 9.773847579956055, 10.087942123413086, 10.402036666870117, 10.716132164001465, 11.030226707458496, 11.344322204589844, 11.658416748046875, 11.972512245178223, 12.286606788635254, 12.600702285766602]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 6.0, 3.0, 4.0, 13.0, 5.0, 8.0, 9.0, 14.0, 16.0, 7.0, 23.0, 12.0, 14.0, 25.0, 28.0, 24.0, 30.0, 30.0, 46.0, 48.0, 70.0, 75.0, 70.0, 56.0, 49.0, 28.0, 42.0, 32.0, 35.0, 27.0, 25.0, 20.0, 16.0, 23.0, 12.0, 13.0, 11.0, 7.0, 4.0, 6.0, 4.0, 4.0, 3.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.901181221008301, -3.781344413757324, -3.6615076065063477, -3.541670799255371, -3.4218339920043945, -3.301997184753418, -3.1821601390838623, -3.0623233318328857, -2.942486524581909, -2.8226497173309326, -2.702812910079956, -2.5829761028289795, -2.463139057159424, -2.3433022499084473, -2.2234654426574707, -2.103628635406494, -1.9837918281555176, -1.863955020904541, -1.7441182136535645, -1.6242812871932983, -1.5044444799423218, -1.3846076726913452, -1.264770746231079, -1.1449339389801025, -1.025097131729126, -0.9052603244781494, -0.7854234576225281, -0.6655865907669067, -0.5457497835159302, -0.4259129762649536, -0.3060761094093323, -0.18623924255371094, -0.06640267372131348, 0.053434163331985474, 0.17327100038528442, 0.2931078374385834, 0.4129446744918823, 0.5327814817428589, 0.6526183485984802, 0.7724552154541016, 0.8922920227050781, 1.0121288299560547, 1.1319656372070312, 1.2518025636672974, 1.371639370918274, 1.4914761781692505, 1.6113131046295166, 1.7311499118804932, 1.8509867191314697, 1.9708235263824463, 2.090660333633423, 2.2104971408843994, 2.330334186553955, 2.4501709938049316, 2.570007801055908, 2.6898446083068848, 2.8096814155578613, 2.929518222808838, 3.0493550300598145, 3.169191837310791, 3.2890286445617676, 3.408865451812744, 3.5287024974823, 3.6485393047332764, 3.768376111984253]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 6.0, 6.0, 3.0, 7.0, 9.0, 21.0, 32.0, 39.0, 64.0, 170.0, 263.0, 520.0, 1288.0, 4660.0, 27286.0, 464856.0, 3397993.0, 270234.0, 20425.0, 4023.0, 1270.0, 505.0, 247.0, 169.0, 68.0, 46.0, 30.0, 15.0, 11.0, 6.0, 3.0, 3.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7236328125, -0.6981735229492188, -0.6727142333984375, -0.6472549438476562, -0.621795654296875, -0.5963363647460938, -0.5708770751953125, -0.5454177856445312, -0.51995849609375, -0.49449920654296875, -0.4690399169921875, -0.44358062744140625, -0.418121337890625, -0.39266204833984375, -0.3672027587890625, -0.34174346923828125, -0.3162841796875, -0.29082489013671875, -0.2653656005859375, -0.23990631103515625, -0.214447021484375, -0.18898773193359375, -0.1635284423828125, -0.13806915283203125, -0.11260986328125, -0.08715057373046875, -0.0616912841796875, -0.03623199462890625, -0.010772705078125, 0.01468658447265625, 0.0401458740234375, 0.06560516357421875, 0.091064453125, 0.11652374267578125, 0.1419830322265625, 0.16744232177734375, 0.192901611328125, 0.21836090087890625, 0.2438201904296875, 0.26927947998046875, 0.29473876953125, 0.32019805908203125, 0.3456573486328125, 0.37111663818359375, 0.396575927734375, 0.42203521728515625, 0.4474945068359375, 0.47295379638671875, 0.4984130859375, 0.5238723754882812, 0.5493316650390625, 0.5747909545898438, 0.600250244140625, 0.6257095336914062, 0.6511688232421875, 0.6766281127929688, 0.70208740234375, 0.7275466918945312, 0.7530059814453125, 0.7784652709960938, 0.803924560546875, 0.8293838500976562, 0.8548431396484375, 0.8803024291992188, 0.90576171875]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 6.0, 4.0, 7.0, 10.0, 14.0, 12.0, 14.0, 25.0, 14.0, 33.0, 25.0, 36.0, 41.0, 38.0, 58.0, 51.0, 50.0, 64.0, 50.0, 52.0, 49.0, 51.0, 38.0, 39.0, 42.0, 29.0, 38.0, 18.0, 20.0, 14.0, 17.0, 12.0, 13.0, 6.0, 6.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0], "bins": [-0.354736328125, -0.34516143798828125, -0.3355865478515625, -0.32601165771484375, -0.316436767578125, -0.30686187744140625, -0.2972869873046875, -0.28771209716796875, -0.27813720703125, -0.26856231689453125, -0.2589874267578125, -0.24941253662109375, -0.239837646484375, -0.23026275634765625, -0.2206878662109375, -0.21111297607421875, -0.2015380859375, -0.19196319580078125, -0.1823883056640625, -0.17281341552734375, -0.163238525390625, -0.15366363525390625, -0.1440887451171875, -0.13451385498046875, -0.12493896484375, -0.11536407470703125, -0.1057891845703125, -0.09621429443359375, -0.086639404296875, -0.07706451416015625, -0.0674896240234375, -0.05791473388671875, -0.04833984375, -0.03876495361328125, -0.0291900634765625, -0.01961517333984375, -0.010040283203125, -0.00046539306640625, 0.0091094970703125, 0.01868438720703125, 0.02825927734375, 0.03783416748046875, 0.0474090576171875, 0.05698394775390625, 0.066558837890625, 0.07613372802734375, 0.0857086181640625, 0.09528350830078125, 0.1048583984375, 0.11443328857421875, 0.1240081787109375, 0.13358306884765625, 0.143157958984375, 0.15273284912109375, 0.1623077392578125, 0.17188262939453125, 0.18145751953125, 0.19103240966796875, 0.2006072998046875, 0.21018218994140625, 0.219757080078125, 0.22933197021484375, 0.2389068603515625, 0.24848175048828125, 0.258056640625]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 5.0, 8.0, 7.0, 14.0, 27.0, 19.0, 40.0, 62.0, 101.0, 260.0, 680.0, 2533.0, 16629.0, 340358.0, 3715270.0, 107181.0, 8685.0, 1505.0, 443.0, 192.0, 109.0, 45.0, 43.0, 20.0, 17.0, 16.0, 5.0, 6.0, 3.0, 1.0, 0.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.060546875, -1.0244140625, -0.98828125, -0.9521484375, -0.916015625, -0.8798828125, -0.84375, -0.8076171875, -0.771484375, -0.7353515625, -0.69921875, -0.6630859375, -0.626953125, -0.5908203125, -0.5546875, -0.5185546875, -0.482421875, -0.4462890625, -0.41015625, -0.3740234375, -0.337890625, -0.3017578125, -0.265625, -0.2294921875, -0.193359375, -0.1572265625, -0.12109375, -0.0849609375, -0.048828125, -0.0126953125, 0.0234375, 0.0595703125, 0.095703125, 0.1318359375, 0.16796875, 0.2041015625, 0.240234375, 0.2763671875, 0.3125, 0.3486328125, 0.384765625, 0.4208984375, 0.45703125, 0.4931640625, 0.529296875, 0.5654296875, 0.6015625, 0.6376953125, 0.673828125, 0.7099609375, 0.74609375, 0.7822265625, 0.818359375, 0.8544921875, 0.890625, 0.9267578125, 0.962890625, 0.9990234375, 1.03515625, 1.0712890625, 1.107421875, 1.1435546875, 1.1796875, 1.2158203125, 1.251953125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 3.0, 7.0, 11.0, 9.0, 21.0, 22.0, 22.0, 44.0, 53.0, 87.0, 123.0, 212.0, 364.0, 559.0, 681.0, 634.0, 461.0, 297.0, 156.0, 100.0, 72.0, 33.0, 33.0, 24.0, 11.0, 10.0, 9.0, 8.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.666015625, -0.642852783203125, -0.61968994140625, -0.596527099609375, -0.5733642578125, -0.550201416015625, -0.52703857421875, -0.503875732421875, -0.480712890625, -0.457550048828125, -0.43438720703125, -0.411224365234375, -0.3880615234375, -0.364898681640625, -0.34173583984375, -0.318572998046875, -0.29541015625, -0.272247314453125, -0.24908447265625, -0.225921630859375, -0.2027587890625, -0.179595947265625, -0.15643310546875, -0.133270263671875, -0.110107421875, -0.086944580078125, -0.06378173828125, -0.040618896484375, -0.0174560546875, 0.005706787109375, 0.02886962890625, 0.052032470703125, 0.0751953125, 0.098358154296875, 0.12152099609375, 0.144683837890625, 0.1678466796875, 0.191009521484375, 0.21417236328125, 0.237335205078125, 0.260498046875, 0.283660888671875, 0.30682373046875, 0.329986572265625, 0.3531494140625, 0.376312255859375, 0.39947509765625, 0.422637939453125, 0.44580078125, 0.468963623046875, 0.49212646484375, 0.515289306640625, 0.5384521484375, 0.561614990234375, 0.58477783203125, 0.607940673828125, 0.631103515625, 0.654266357421875, 0.67742919921875, 0.700592041015625, 0.7237548828125, 0.746917724609375, 0.77008056640625, 0.793243408203125, 0.81640625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 6.0, 2.0, 5.0, 3.0, 11.0, 18.0, 24.0, 73.0, 124.0, 208.0, 187.0, 151.0, 102.0, 59.0, 20.0, 8.0, 8.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.71927261352539, -11.491153717041016, -11.263035774230957, -11.034916877746582, -10.806797981262207, -10.578680038452148, -10.350561141967773, -10.122442245483398, -9.894323348999023, -9.666204452514648, -9.43808650970459, -9.209967613220215, -8.98184871673584, -8.753730773925781, -8.525611877441406, -8.297492980957031, -8.069375038146973, -7.841256618499756, -7.613137722015381, -7.385019302368164, -7.156900405883789, -6.928781986236572, -6.7006635665893555, -6.4725446701049805, -6.244426250457764, -6.016307830810547, -5.788188934326172, -5.560070514678955, -5.331952095031738, -5.103833198547363, -4.8757147789001465, -4.64759635925293, -4.419477462768555, -4.191359043121338, -3.963240146636963, -3.735121726989746, -3.50700306892395, -3.2788844108581543, -3.0507659912109375, -2.8226473331451416, -2.5945284366607666, -2.3664097785949707, -2.138291358947754, -1.910172700881958, -1.682054042816162, -1.4539353847503662, -1.2258168458938599, -0.9976983070373535, -0.7695796489715576, -0.5414610505104065, -0.31334245204925537, -0.08522385358810425, 0.14289474487304688, 0.3710134029388428, 0.5991319417953491, 0.8272504806518555, 1.0553691387176514, 1.2834877967834473, 1.5116063356399536, 1.73972487449646, 1.9678435325622559, 2.1959621906280518, 2.4240808486938477, 2.6521992683410645, 2.8803179264068604]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 1.0, 0.0, 3.0, 4.0, 7.0, 3.0, 2.0, 8.0, 5.0, 8.0, 11.0, 9.0, 14.0, 21.0, 13.0, 18.0, 20.0, 24.0, 20.0, 28.0, 44.0, 30.0, 49.0, 33.0, 47.0, 38.0, 38.0, 42.0, 43.0, 37.0, 36.0, 31.0, 40.0, 27.0, 29.0, 35.0, 25.0, 23.0, 17.0, 17.0, 14.0, 15.0, 13.0, 16.0, 7.0, 9.0, 9.0, 5.0, 4.0, 5.0, 2.0, 6.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-2.3750357627868652, -2.2939507961273193, -2.2128658294677734, -2.1317811012268066, -2.0506961345672607, -1.9696111679077148, -1.888526201248169, -1.807441234588623, -1.7263563871383667, -1.6452714204788208, -1.5641865730285645, -1.4831016063690186, -1.4020166397094727, -1.3209317922592163, -1.2398468255996704, -1.158761978149414, -1.0776770114898682, -0.996592104434967, -0.9155071973800659, -0.83442223072052, -0.7533373236656189, -0.6722524166107178, -0.5911674499511719, -0.5100825428962708, -0.42899763584136963, -0.3479127287864685, -0.266827791929245, -0.18574286997318268, -0.10465794801712036, -0.02357304096221924, 0.05751189589500427, 0.13859683275222778, 0.2196817398071289, 0.30076664686203003, 0.38185158371925354, 0.46293652057647705, 0.5440214276313782, 0.6251063346862793, 0.7061913013458252, 0.7872762084007263, 0.8683611154556274, 0.9494460225105286, 1.0305309295654297, 1.1116158962249756, 1.1927008628845215, 1.2737857103347778, 1.3548706769943237, 1.43595552444458, 1.517040491104126, 1.5981254577636719, 1.6792103052139282, 1.7602952718734741, 1.8413801193237305, 1.9224650859832764, 2.0035500526428223, 2.084635019302368, 2.165719985961914, 2.24680495262146, 2.327889919281006, 2.4089746475219727, 2.4900596141815186, 2.5711445808410645, 2.6522295475006104, 2.7333145141601562, 2.814399242401123]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 7.0, 5.0, 14.0, 26.0, 33.0, 41.0, 58.0, 110.0, 183.0, 325.0, 675.0, 1429.0, 3384.0, 8822.0, 24978.0, 76902.0, 241540.0, 409023.0, 188995.0, 59803.0, 19926.0, 7028.0, 2795.0, 1183.0, 537.0, 333.0, 151.0, 75.0, 63.0, 46.0, 17.0, 14.0, 16.0, 10.0, 3.0, 7.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.6279296875, -0.6091232299804688, -0.5903167724609375, -0.5715103149414062, -0.552703857421875, -0.5338973999023438, -0.5150909423828125, -0.49628448486328125, -0.47747802734375, -0.45867156982421875, -0.4398651123046875, -0.42105865478515625, -0.402252197265625, -0.38344573974609375, -0.3646392822265625, -0.34583282470703125, -0.3270263671875, -0.30821990966796875, -0.2894134521484375, -0.27060699462890625, -0.251800537109375, -0.23299407958984375, -0.2141876220703125, -0.19538116455078125, -0.17657470703125, -0.15776824951171875, -0.1389617919921875, -0.12015533447265625, -0.101348876953125, -0.08254241943359375, -0.0637359619140625, -0.04492950439453125, -0.026123046875, -0.00731658935546875, 0.0114898681640625, 0.03029632568359375, 0.049102783203125, 0.06790924072265625, 0.0867156982421875, 0.10552215576171875, 0.12432861328125, 0.14313507080078125, 0.1619415283203125, 0.18074798583984375, 0.199554443359375, 0.21836090087890625, 0.2371673583984375, 0.25597381591796875, 0.2747802734375, 0.29358673095703125, 0.3123931884765625, 0.33119964599609375, 0.350006103515625, 0.36881256103515625, 0.3876190185546875, 0.40642547607421875, 0.42523193359375, 0.44403839111328125, 0.4628448486328125, 0.48165130615234375, 0.500457763671875, 0.5192642211914062, 0.5380706787109375, 0.5568771362304688, 0.57568359375]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 4.0, 1.0, 3.0, 5.0, 2.0, 6.0, 10.0, 19.0, 13.0, 17.0, 22.0, 25.0, 34.0, 33.0, 33.0, 41.0, 44.0, 53.0, 52.0, 53.0, 58.0, 58.0, 52.0, 53.0, 54.0, 36.0, 33.0, 34.0, 30.0, 23.0, 18.0, 18.0, 14.0, 17.0, 10.0, 9.0, 3.0, 8.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.361572265625, -0.3513336181640625, -0.341094970703125, -0.3308563232421875, -0.32061767578125, -0.3103790283203125, -0.300140380859375, -0.2899017333984375, -0.2796630859375, -0.2694244384765625, -0.259185791015625, -0.2489471435546875, -0.23870849609375, -0.2284698486328125, -0.218231201171875, -0.2079925537109375, -0.19775390625, -0.1875152587890625, -0.177276611328125, -0.1670379638671875, -0.15679931640625, -0.1465606689453125, -0.136322021484375, -0.1260833740234375, -0.1158447265625, -0.1056060791015625, -0.095367431640625, -0.0851287841796875, -0.07489013671875, -0.0646514892578125, -0.054412841796875, -0.0441741943359375, -0.033935546875, -0.0236968994140625, -0.013458251953125, -0.0032196044921875, 0.00701904296875, 0.0172576904296875, 0.027496337890625, 0.0377349853515625, 0.0479736328125, 0.0582122802734375, 0.068450927734375, 0.0786895751953125, 0.08892822265625, 0.0991668701171875, 0.109405517578125, 0.1196441650390625, 0.1298828125, 0.1401214599609375, 0.150360107421875, 0.1605987548828125, 0.17083740234375, 0.1810760498046875, 0.191314697265625, 0.2015533447265625, 0.2117919921875, 0.2220306396484375, 0.232269287109375, 0.2425079345703125, 0.25274658203125, 0.2629852294921875, 0.273223876953125, 0.2834625244140625, 0.293701171875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 5.0, 7.0, 6.0, 13.0, 18.0, 29.0, 49.0, 66.0, 112.0, 151.0, 292.0, 472.0, 972.0, 2669.0, 13201.0, 129919.0, 791153.0, 94442.0, 10557.0, 2349.0, 852.0, 454.0, 282.0, 181.0, 111.0, 79.0, 44.0, 29.0, 18.0, 10.0, 9.0, 2.0, 6.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.98291015625, -0.9457321166992188, -0.9085540771484375, -0.8713760375976562, -0.834197998046875, -0.7970199584960938, -0.7598419189453125, -0.7226638793945312, -0.68548583984375, -0.6483078002929688, -0.6111297607421875, -0.5739517211914062, -0.536773681640625, -0.49959564208984375, -0.4624176025390625, -0.42523956298828125, -0.3880615234375, -0.35088348388671875, -0.3137054443359375, -0.27652740478515625, -0.239349365234375, -0.20217132568359375, -0.1649932861328125, -0.12781524658203125, -0.09063720703125, -0.05345916748046875, -0.0162811279296875, 0.02089691162109375, 0.058074951171875, 0.09525299072265625, 0.1324310302734375, 0.16960906982421875, 0.206787109375, 0.24396514892578125, 0.2811431884765625, 0.31832122802734375, 0.355499267578125, 0.39267730712890625, 0.4298553466796875, 0.46703338623046875, 0.50421142578125, 0.5413894653320312, 0.5785675048828125, 0.6157455444335938, 0.652923583984375, 0.6901016235351562, 0.7272796630859375, 0.7644577026367188, 0.8016357421875, 0.8388137817382812, 0.8759918212890625, 0.9131698608398438, 0.950347900390625, 0.9875259399414062, 1.0247039794921875, 1.0618820190429688, 1.09906005859375, 1.1362380981445312, 1.1734161376953125, 1.2105941772460938, 1.247772216796875, 1.2849502563476562, 1.3221282958984375, 1.3593063354492188, 1.396484375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 6.0, 3.0, 7.0, 5.0, 3.0, 9.0, 13.0, 10.0, 15.0, 23.0, 21.0, 30.0, 42.0, 55.0, 56.0, 52.0, 53.0, 50.0, 64.0, 70.0, 68.0, 53.0, 34.0, 50.0, 42.0, 29.0, 29.0, 19.0, 15.0, 19.0, 13.0, 12.0, 12.0, 5.0, 7.0, 2.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.4384765625, -1.4010162353515625, -1.363555908203125, -1.3260955810546875, -1.28863525390625, -1.2511749267578125, -1.213714599609375, -1.1762542724609375, -1.1387939453125, -1.1013336181640625, -1.063873291015625, -1.0264129638671875, -0.98895263671875, -0.9514923095703125, -0.914031982421875, -0.8765716552734375, -0.839111328125, -0.8016510009765625, -0.764190673828125, -0.7267303466796875, -0.68927001953125, -0.6518096923828125, -0.614349365234375, -0.5768890380859375, -0.5394287109375, -0.5019683837890625, -0.464508056640625, -0.4270477294921875, -0.38958740234375, -0.3521270751953125, -0.314666748046875, -0.2772064208984375, -0.23974609375, -0.2022857666015625, -0.164825439453125, -0.1273651123046875, -0.08990478515625, -0.0524444580078125, -0.014984130859375, 0.0224761962890625, 0.0599365234375, 0.0973968505859375, 0.134857177734375, 0.1723175048828125, 0.20977783203125, 0.2472381591796875, 0.284698486328125, 0.3221588134765625, 0.359619140625, 0.3970794677734375, 0.434539794921875, 0.4720001220703125, 0.50946044921875, 0.5469207763671875, 0.584381103515625, 0.6218414306640625, 0.6593017578125, 0.6967620849609375, 0.734222412109375, 0.7716827392578125, 0.80914306640625, 0.8466033935546875, 0.884063720703125, 0.9215240478515625, 0.958984375]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 4.0, 5.0, 3.0, 5.0, 3.0, 8.0, 12.0, 15.0, 18.0, 36.0, 71.0, 113.0, 193.0, 334.0, 648.0, 1411.0, 3080.0, 7769.0, 22140.0, 73021.0, 521404.0, 315494.0, 68660.0, 21082.0, 7400.0, 2969.0, 1237.0, 624.0, 315.0, 185.0, 103.0, 73.0, 39.0, 25.0, 20.0, 10.0, 4.0, 8.0, 6.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.241455078125, -0.2339038848876953, -0.22635269165039062, -0.21880149841308594, -0.21125030517578125, -0.20369911193847656, -0.19614791870117188, -0.1885967254638672, -0.1810455322265625, -0.1734943389892578, -0.16594314575195312, -0.15839195251464844, -0.15084075927734375, -0.14328956604003906, -0.13573837280273438, -0.1281871795654297, -0.120635986328125, -0.11308479309082031, -0.10553359985351562, -0.09798240661621094, -0.09043121337890625, -0.08288002014160156, -0.07532882690429688, -0.06777763366699219, -0.0602264404296875, -0.05267524719238281, -0.045124053955078125, -0.03757286071777344, -0.03002166748046875, -0.022470474243164062, -0.014919281005859375, -0.0073680877685546875, 0.00018310546875, 0.0077342987060546875, 0.015285491943359375, 0.022836685180664062, 0.03038787841796875, 0.03793907165527344, 0.045490264892578125, 0.05304145812988281, 0.0605926513671875, 0.06814384460449219, 0.07569503784179688, 0.08324623107910156, 0.09079742431640625, 0.09834861755371094, 0.10589981079101562, 0.11345100402832031, 0.121002197265625, 0.1285533905029297, 0.13610458374023438, 0.14365577697753906, 0.15120697021484375, 0.15875816345214844, 0.16630935668945312, 0.1738605499267578, 0.1814117431640625, 0.1889629364013672, 0.19651412963867188, 0.20406532287597656, 0.21161651611328125, 0.21916770935058594, 0.22671890258789062, 0.2342700958251953, 0.2418212890625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 8.0, 14.0, 27.0, 34.0, 28.0, 61.0, 67.0, 131.0, 128.0, 117.0, 93.0, 72.0, 62.0, 26.0, 35.0, 26.0, 15.0, 9.0, 11.0, 5.0, 2.0, 5.0, 3.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011235475540161133, -0.00010947417467832565, -0.00010659359395503998, -0.0001037130132317543, -0.00010083243250846863, -9.795185178518295e-05, -9.507127106189728e-05, -9.21906903386116e-05, -8.931010961532593e-05, -8.642952889204025e-05, -8.354894816875458e-05, -8.06683674454689e-05, -7.778778672218323e-05, -7.490720599889755e-05, -7.202662527561188e-05, -6.91460445523262e-05, -6.626546382904053e-05, -6.338488310575485e-05, -6.050430238246918e-05, -5.76237216591835e-05, -5.474314093589783e-05, -5.186256021261215e-05, -4.898197948932648e-05, -4.61013987660408e-05, -4.322081804275513e-05, -4.034023731946945e-05, -3.745965659618378e-05, -3.45790758728981e-05, -3.169849514961243e-05, -2.8817914426326752e-05, -2.5937333703041077e-05, -2.30567529797554e-05, -2.0176172256469727e-05, -1.729559153318405e-05, -1.4415010809898376e-05, -1.1534430086612701e-05, -8.653849363327026e-06, -5.773268640041351e-06, -2.8926879167556763e-06, -1.210719347000122e-08, 2.868473529815674e-06, 5.749054253101349e-06, 8.629634976387024e-06, 1.1510215699672699e-05, 1.4390796422958374e-05, 1.727137714624405e-05, 2.0151957869529724e-05, 2.30325385928154e-05, 2.5913119316101074e-05, 2.879370003938675e-05, 3.1674280762672424e-05, 3.45548614859581e-05, 3.7435442209243774e-05, 4.031602293252945e-05, 4.3196603655815125e-05, 4.60771843791008e-05, 4.8957765102386475e-05, 5.183834582567215e-05, 5.4718926548957825e-05, 5.75995072722435e-05, 6.0480087995529175e-05, 6.336066871881485e-05, 6.624124944210052e-05, 6.91218301653862e-05, 7.200241088867188e-05]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 5.0, 8.0, 15.0, 23.0, 17.0, 28.0, 54.0, 60.0, 107.0, 156.0, 255.0, 420.0, 749.0, 1487.0, 3322.0, 8438.0, 23998.0, 79742.0, 544925.0, 284143.0, 66698.0, 20406.0, 7379.0, 2941.0, 1341.0, 721.0, 425.0, 245.0, 141.0, 108.0, 61.0, 39.0, 26.0, 25.0, 12.0, 11.0, 6.0, 6.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.27197265625, -0.26460838317871094, -0.2572441101074219, -0.2498798370361328, -0.24251556396484375, -0.2351512908935547, -0.22778701782226562, -0.22042274475097656, -0.2130584716796875, -0.20569419860839844, -0.19832992553710938, -0.1909656524658203, -0.18360137939453125, -0.1762371063232422, -0.16887283325195312, -0.16150856018066406, -0.154144287109375, -0.14678001403808594, -0.13941574096679688, -0.1320514678955078, -0.12468719482421875, -0.11732292175292969, -0.10995864868164062, -0.10259437561035156, -0.0952301025390625, -0.08786582946777344, -0.08050155639648438, -0.07313728332519531, -0.06577301025390625, -0.05840873718261719, -0.051044464111328125, -0.04368019104003906, -0.03631591796875, -0.028951644897460938, -0.021587371826171875, -0.014223098754882812, -0.00685882568359375, 0.0005054473876953125, 0.007869720458984375, 0.015233993530273438, 0.0225982666015625, 0.029962539672851562, 0.037326812744140625, 0.04469108581542969, 0.05205535888671875, 0.05941963195800781, 0.06678390502929688, 0.07414817810058594, 0.081512451171875, 0.08887672424316406, 0.09624099731445312, 0.10360527038574219, 0.11096954345703125, 0.11833381652832031, 0.12569808959960938, 0.13306236267089844, 0.1404266357421875, 0.14779090881347656, 0.15515518188476562, 0.1625194549560547, 0.16988372802734375, 0.1772480010986328, 0.18461227416992188, 0.19197654724121094, 0.1993408203125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 7.0, 1.0, 4.0, 6.0, 4.0, 6.0, 6.0, 6.0, 13.0, 18.0, 16.0, 24.0, 40.0, 45.0, 54.0, 71.0, 96.0, 103.0, 96.0, 80.0, 67.0, 66.0, 39.0, 46.0, 23.0, 17.0, 10.0, 13.0, 8.0, 7.0, 10.0, 0.0, 5.0, 0.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3251953125, -0.3161773681640625, -0.307159423828125, -0.2981414794921875, -0.28912353515625, -0.2801055908203125, -0.271087646484375, -0.2620697021484375, -0.2530517578125, -0.2440338134765625, -0.235015869140625, -0.2259979248046875, -0.21697998046875, -0.2079620361328125, -0.198944091796875, -0.1899261474609375, -0.180908203125, -0.1718902587890625, -0.162872314453125, -0.1538543701171875, -0.14483642578125, -0.1358184814453125, -0.126800537109375, -0.1177825927734375, -0.1087646484375, -0.0997467041015625, -0.090728759765625, -0.0817108154296875, -0.07269287109375, -0.0636749267578125, -0.054656982421875, -0.0456390380859375, -0.03662109375, -0.0276031494140625, -0.018585205078125, -0.0095672607421875, -0.00054931640625, 0.0084686279296875, 0.017486572265625, 0.0265045166015625, 0.0355224609375, 0.0445404052734375, 0.053558349609375, 0.0625762939453125, 0.07159423828125, 0.0806121826171875, 0.089630126953125, 0.0986480712890625, 0.107666015625, 0.1166839599609375, 0.125701904296875, 0.1347198486328125, 0.14373779296875, 0.1527557373046875, 0.161773681640625, 0.1707916259765625, 0.1798095703125, 0.1888275146484375, 0.197845458984375, 0.2068634033203125, 0.21588134765625, 0.2248992919921875, 0.233917236328125, 0.2429351806640625, 0.251953125]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 6.0, 1.0, 9.0, 24.0, 34.0, 82.0, 148.0, 302.0, 211.0, 90.0, 41.0, 25.0, 10.0, 6.0, 6.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.440393447875977, -6.190920829772949, -5.941448211669922, -5.6919755935668945, -5.442502975463867, -5.19303035736084, -4.9435577392578125, -4.694085121154785, -4.444612503051758, -4.1951398849487305, -3.945667266845703, -3.696194648742676, -3.4467220306396484, -3.197249412536621, -2.9477767944335938, -2.6983041763305664, -2.448831558227539, -2.1993589401245117, -1.9498863220214844, -1.700413703918457, -1.4509410858154297, -1.2014684677124023, -0.951995849609375, -0.7025232315063477, -0.4530506134033203, -0.20357799530029297, 0.045894622802734375, 0.2953672409057617, 0.5448398590087891, 0.7943124771118164, 1.0437850952148438, 1.293257713317871, 1.5427303314208984, 1.7922029495239258, 2.041675567626953, 2.2911481857299805, 2.540620803833008, 2.790093421936035, 3.0395660400390625, 3.28903865814209, 3.538511276245117, 3.7879838943481445, 4.037456512451172, 4.286929130554199, 4.536401748657227, 4.785874366760254, 5.035346984863281, 5.284819602966309, 5.534292221069336, 5.783764839172363, 6.033237457275391, 6.282710075378418, 6.532182693481445, 6.781655311584473, 7.0311279296875, 7.280600547790527, 7.530073165893555, 7.779545783996582, 8.02901840209961, 8.278491020202637, 8.527963638305664, 8.777436256408691, 9.026908874511719, 9.276381492614746, 9.525854110717773]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 6.0, 2.0, 6.0, 5.0, 6.0, 11.0, 11.0, 13.0, 18.0, 22.0, 23.0, 22.0, 23.0, 21.0, 19.0, 39.0, 52.0, 87.0, 95.0, 124.0, 66.0, 43.0, 34.0, 32.0, 31.0, 22.0, 23.0, 25.0, 22.0, 14.0, 21.0, 11.0, 12.0, 7.0, 12.0, 7.0, 3.0, 4.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.863215923309326, -3.7298285961151123, -3.5964412689208984, -3.4630539417266846, -3.3296666145324707, -3.1962790489196777, -3.062891721725464, -2.92950439453125, -2.796117067337036, -2.6627297401428223, -2.5293424129486084, -2.3959550857543945, -2.2625675201416016, -2.129180431365967, -1.9957928657531738, -1.86240553855896, -1.729018211364746, -1.5956308841705322, -1.4622435569763184, -1.328856110572815, -1.195468783378601, -1.0620814561843872, -0.9286940693855286, -0.7953066825866699, -0.661919355392456, -0.5285320281982422, -0.39514464139938354, -0.2617572844028473, -0.12836992740631104, 0.005017399787902832, 0.13840478658676147, 0.2717921733856201, 0.4051799774169922, 0.538567304611206, 0.6719546914100647, 0.8053420782089233, 0.9387294054031372, 1.072116732597351, 1.2055041790008545, 1.3388915061950684, 1.4722788333892822, 1.605666160583496, 1.73905348777771, 1.8724409341812134, 2.005828380584717, 2.1392154693603516, 2.2726030349731445, 2.4059903621673584, 2.5393776893615723, 2.672765016555786, 2.80615234375, 2.939539670944214, 3.0729269981384277, 3.2063145637512207, 3.3397018909454346, 3.4730892181396484, 3.6064765453338623, 3.739863872528076, 3.87325119972229, 4.006638526916504, 4.140026092529297, 4.273413181304932, 4.406800746917725, 4.540187835693359, 4.673575401306152]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 10.0, 8.0, 5.0, 16.0, 17.0, 15.0, 43.0, 47.0, 75.0, 99.0, 120.0, 221.0, 393.0, 701.0, 1649.0, 4536.0, 20237.0, 186845.0, 2503499.0, 1370087.0, 86796.0, 12657.0, 3371.0, 1311.0, 585.0, 279.0, 194.0, 118.0, 78.0, 70.0, 54.0, 32.0, 35.0, 19.0, 14.0, 14.0, 4.0, 8.0, 8.0, 2.0, 3.0, 2.0, 6.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.69482421875, -0.67230224609375, -0.6497802734375, -0.62725830078125, -0.604736328125, -0.58221435546875, -0.5596923828125, -0.53717041015625, -0.5146484375, -0.49212646484375, -0.4696044921875, -0.44708251953125, -0.424560546875, -0.40203857421875, -0.3795166015625, -0.35699462890625, -0.33447265625, -0.31195068359375, -0.2894287109375, -0.26690673828125, -0.244384765625, -0.22186279296875, -0.1993408203125, -0.17681884765625, -0.154296875, -0.13177490234375, -0.1092529296875, -0.08673095703125, -0.064208984375, -0.04168701171875, -0.0191650390625, 0.00335693359375, 0.02587890625, 0.04840087890625, 0.0709228515625, 0.09344482421875, 0.115966796875, 0.13848876953125, 0.1610107421875, 0.18353271484375, 0.2060546875, 0.22857666015625, 0.2510986328125, 0.27362060546875, 0.296142578125, 0.31866455078125, 0.3411865234375, 0.36370849609375, 0.38623046875, 0.40875244140625, 0.4312744140625, 0.45379638671875, 0.476318359375, 0.49884033203125, 0.5213623046875, 0.54388427734375, 0.56640625, 0.58892822265625, 0.6114501953125, 0.63397216796875, 0.656494140625, 0.67901611328125, 0.7015380859375, 0.72406005859375, 0.74658203125]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 4.0, 4.0, 6.0, 5.0, 8.0, 17.0, 23.0, 15.0, 22.0, 34.0, 30.0, 43.0, 35.0, 46.0, 61.0, 60.0, 52.0, 77.0, 57.0, 68.0, 41.0, 46.0, 33.0, 37.0, 31.0, 35.0, 28.0, 23.0, 20.0, 10.0, 7.0, 5.0, 8.0, 7.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.37158203125, -0.36011505126953125, -0.3486480712890625, -0.33718109130859375, -0.325714111328125, -0.31424713134765625, -0.3027801513671875, -0.29131317138671875, -0.27984619140625, -0.26837921142578125, -0.2569122314453125, -0.24544525146484375, -0.233978271484375, -0.22251129150390625, -0.2110443115234375, -0.19957733154296875, -0.1881103515625, -0.17664337158203125, -0.1651763916015625, -0.15370941162109375, -0.142242431640625, -0.13077545166015625, -0.1193084716796875, -0.10784149169921875, -0.09637451171875, -0.08490753173828125, -0.0734405517578125, -0.06197357177734375, -0.050506591796875, -0.03903961181640625, -0.0275726318359375, -0.01610565185546875, -0.004638671875, 0.00682830810546875, 0.0182952880859375, 0.02976226806640625, 0.041229248046875, 0.05269622802734375, 0.0641632080078125, 0.07563018798828125, 0.08709716796875, 0.09856414794921875, 0.1100311279296875, 0.12149810791015625, 0.132965087890625, 0.14443206787109375, 0.1558990478515625, 0.16736602783203125, 0.1788330078125, 0.19029998779296875, 0.2017669677734375, 0.21323394775390625, 0.224700927734375, 0.23616790771484375, 0.2476348876953125, 0.25910186767578125, 0.27056884765625, 0.28203582763671875, 0.2935028076171875, 0.30496978759765625, 0.316436767578125, 0.32790374755859375, 0.3393707275390625, 0.35083770751953125, 0.3623046875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 9.0, 5.0, 18.0, 12.0, 35.0, 39.0, 98.0, 185.0, 362.0, 952.0, 4239.0, 59308.0, 3882484.0, 236301.0, 8056.0, 1333.0, 435.0, 190.0, 101.0, 49.0, 23.0, 16.0, 14.0, 7.0, 6.0, 6.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.373046875, -1.32513427734375, -1.2772216796875, -1.22930908203125, -1.181396484375, -1.13348388671875, -1.0855712890625, -1.03765869140625, -0.98974609375, -0.94183349609375, -0.8939208984375, -0.84600830078125, -0.798095703125, -0.75018310546875, -0.7022705078125, -0.65435791015625, -0.6064453125, -0.55853271484375, -0.5106201171875, -0.46270751953125, -0.414794921875, -0.36688232421875, -0.3189697265625, -0.27105712890625, -0.22314453125, -0.17523193359375, -0.1273193359375, -0.07940673828125, -0.031494140625, 0.01641845703125, 0.0643310546875, 0.11224365234375, 0.16015625, 0.20806884765625, 0.2559814453125, 0.30389404296875, 0.351806640625, 0.39971923828125, 0.4476318359375, 0.49554443359375, 0.54345703125, 0.59136962890625, 0.6392822265625, 0.68719482421875, 0.735107421875, 0.78302001953125, 0.8309326171875, 0.87884521484375, 0.9267578125, 0.97467041015625, 1.0225830078125, 1.07049560546875, 1.118408203125, 1.16632080078125, 1.2142333984375, 1.26214599609375, 1.31005859375, 1.35797119140625, 1.4058837890625, 1.45379638671875, 1.501708984375, 1.54962158203125, 1.5975341796875, 1.64544677734375, 1.693359375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 7.0, 6.0, 6.0, 13.0, 16.0, 27.0, 42.0, 70.0, 144.0, 259.0, 490.0, 798.0, 920.0, 620.0, 303.0, 158.0, 84.0, 52.0, 22.0, 15.0, 9.0, 5.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.591796875, -1.5557022094726562, -1.5196075439453125, -1.4835128784179688, -1.447418212890625, -1.4113235473632812, -1.3752288818359375, -1.3391342163085938, -1.30303955078125, -1.2669448852539062, -1.2308502197265625, -1.1947555541992188, -1.158660888671875, -1.1225662231445312, -1.0864715576171875, -1.0503768920898438, -1.0142822265625, -0.9781875610351562, -0.9420928955078125, -0.9059982299804688, -0.869903564453125, -0.8338088989257812, -0.7977142333984375, -0.7616195678710938, -0.72552490234375, -0.6894302368164062, -0.6533355712890625, -0.6172409057617188, -0.581146240234375, -0.5450515747070312, -0.5089569091796875, -0.47286224365234375, -0.436767578125, -0.40067291259765625, -0.3645782470703125, -0.32848358154296875, -0.292388916015625, -0.25629425048828125, -0.2201995849609375, -0.18410491943359375, -0.14801025390625, -0.11191558837890625, -0.0758209228515625, -0.03972625732421875, -0.003631591796875, 0.03246307373046875, 0.0685577392578125, 0.10465240478515625, 0.1407470703125, 0.17684173583984375, 0.2129364013671875, 0.24903106689453125, 0.285125732421875, 0.32122039794921875, 0.3573150634765625, 0.39340972900390625, 0.42950439453125, 0.46559906005859375, 0.5016937255859375, 0.5377883911132812, 0.573883056640625, 0.6099777221679688, 0.6460723876953125, 0.6821670532226562, 0.71826171875]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 8.0, 13.0, 7.0, 24.0, 33.0, 59.0, 91.0, 133.0, 163.0, 166.0, 124.0, 81.0, 48.0, 21.0, 12.0, 7.0, 5.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.421880722045898, -10.205196380615234, -9.988511085510254, -9.77182674407959, -9.55514144897461, -9.338457107543945, -9.121771812438965, -8.9050874710083, -8.68840217590332, -8.471717834472656, -8.255032539367676, -8.038348197937012, -7.821662902832031, -7.604978084564209, -7.388293266296387, -7.1716084480285645, -6.954923629760742, -6.73823881149292, -6.521553993225098, -6.304869174957275, -6.088184356689453, -5.871499538421631, -5.654814720153809, -5.438129901885986, -5.221445560455322, -5.0047607421875, -4.788075923919678, -4.5713911056518555, -4.354706287384033, -4.138021469116211, -3.9213366508483887, -3.7046518325805664, -3.487967014312744, -3.271282196044922, -3.0545973777770996, -2.8379125595092773, -2.621227741241455, -2.404542922973633, -2.1878581047058105, -1.9711734056472778, -1.7544885873794556, -1.5378037691116333, -1.321118950843811, -1.1044342517852783, -0.8877493739128113, -0.6710646152496338, -0.4543797969818115, -0.23769497871398926, -0.021010160446166992, 0.19567464292049408, 0.41235944628715515, 0.629044234752655, 0.8457290530204773, 1.0624138116836548, 1.279098629951477, 1.4957834482192993, 1.7124682664871216, 1.9291530847549438, 2.1458377838134766, 2.362522602081299, 2.579207420349121, 2.7958922386169434, 3.0125770568847656, 3.229261875152588, 3.44594669342041]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 1.0, 5.0, 3.0, 5.0, 5.0, 8.0, 14.0, 17.0, 14.0, 12.0, 21.0, 24.0, 27.0, 31.0, 31.0, 30.0, 38.0, 49.0, 43.0, 55.0, 30.0, 44.0, 50.0, 45.0, 48.0, 49.0, 44.0, 31.0, 27.0, 28.0, 25.0, 27.0, 20.0, 16.0, 16.0, 13.0, 10.0, 13.0, 8.0, 7.0, 12.0, 4.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2860822677612305, -3.170498847961426, -3.054915189743042, -2.9393317699432373, -2.8237481117248535, -2.708164691925049, -2.592581033706665, -2.4769976139068604, -2.3614139556884766, -2.245830535888672, -2.130246877670288, -2.0146634578704834, -1.8990797996520996, -1.783496379852295, -1.6679127216339111, -1.5523293018341064, -1.4367457628250122, -1.321162223815918, -1.2055786848068237, -1.0899951457977295, -0.9744116067886353, -0.8588281273841858, -0.7432445883750916, -0.6276610493659973, -0.5120775103569031, -0.39649397134780884, -0.2809104323387146, -0.16532692313194275, -0.04974338412284851, 0.06584012508392334, 0.18142366409301758, 0.2970072031021118, 0.41259074211120605, 0.5281742811203003, 0.6437578201293945, 0.7593413591384888, 0.874924898147583, 0.9905083775520325, 1.1060919761657715, 1.2216753959655762, 1.33725905418396, 1.4528425931930542, 1.5684261322021484, 1.6840096712112427, 1.799593210220337, 1.9151766300201416, 2.0307602882385254, 2.14634370803833, 2.2619271278381348, 2.3775105476379395, 2.4930942058563232, 2.608677625656128, 2.7242612838745117, 2.8398447036743164, 2.9554283618927, 3.071011781692505, 3.1865954399108887, 3.3021788597106934, 3.417762517929077, 3.533345937728882, 3.6489295959472656, 3.7645130157470703, 3.880096673965454, 3.995680093765259, 4.111263751983643]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 12.0, 9.0, 29.0, 33.0, 61.0, 88.0, 147.0, 235.0, 425.0, 724.0, 1229.0, 2125.0, 3859.0, 7358.0, 14876.0, 31952.0, 75326.0, 188256.0, 350697.0, 215737.0, 85345.0, 35673.0, 16345.0, 8180.0, 4284.0, 2364.0, 1308.0, 747.0, 440.0, 243.0, 151.0, 97.0, 85.0, 39.0, 25.0, 16.0, 8.0, 7.0, 6.0, 2.0, 3.0, 1.0, 4.0, 1.0, 0.0, 1.0], "bins": [-0.5830078125, -0.5670661926269531, -0.5511245727539062, -0.5351829528808594, -0.5192413330078125, -0.5032997131347656, -0.48735809326171875, -0.4714164733886719, -0.455474853515625, -0.4395332336425781, -0.42359161376953125, -0.4076499938964844, -0.3917083740234375, -0.3757667541503906, -0.35982513427734375, -0.3438835144042969, -0.32794189453125, -0.3120002746582031, -0.29605865478515625, -0.2801170349121094, -0.2641754150390625, -0.24823379516601562, -0.23229217529296875, -0.21635055541992188, -0.200408935546875, -0.18446731567382812, -0.16852569580078125, -0.15258407592773438, -0.1366424560546875, -0.12070083618164062, -0.10475921630859375, -0.08881759643554688, -0.0728759765625, -0.056934356689453125, -0.04099273681640625, -0.025051116943359375, -0.0091094970703125, 0.006832122802734375, 0.02277374267578125, 0.038715362548828125, 0.054656982421875, 0.07059860229492188, 0.08654022216796875, 0.10248184204101562, 0.1184234619140625, 0.13436508178710938, 0.15030670166015625, 0.16624832153320312, 0.18218994140625, 0.19813156127929688, 0.21407318115234375, 0.23001480102539062, 0.2459564208984375, 0.2618980407714844, 0.27783966064453125, 0.2937812805175781, 0.309722900390625, 0.3256645202636719, 0.34160614013671875, 0.3575477600097656, 0.3734893798828125, 0.3894309997558594, 0.40537261962890625, 0.4213142395019531, 0.437255859375]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 7.0, 4.0, 9.0, 7.0, 11.0, 9.0, 11.0, 18.0, 28.0, 41.0, 26.0, 42.0, 33.0, 51.0, 42.0, 43.0, 57.0, 55.0, 76.0, 65.0, 50.0, 41.0, 39.0, 38.0, 34.0, 28.0, 33.0, 24.0, 11.0, 14.0, 13.0, 7.0, 14.0, 9.0, 1.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.373779296875, -0.3620338439941406, -0.35028839111328125, -0.3385429382324219, -0.3267974853515625, -0.3150520324707031, -0.30330657958984375, -0.2915611267089844, -0.279815673828125, -0.2680702209472656, -0.25632476806640625, -0.24457931518554688, -0.2328338623046875, -0.22108840942382812, -0.20934295654296875, -0.19759750366210938, -0.18585205078125, -0.17410659790039062, -0.16236114501953125, -0.15061569213867188, -0.1388702392578125, -0.12712478637695312, -0.11537933349609375, -0.10363388061523438, -0.091888427734375, -0.08014297485351562, -0.06839752197265625, -0.056652069091796875, -0.0449066162109375, -0.033161163330078125, -0.02141571044921875, -0.009670257568359375, 0.0020751953125, 0.013820648193359375, 0.02556610107421875, 0.037311553955078125, 0.0490570068359375, 0.060802459716796875, 0.07254791259765625, 0.08429336547851562, 0.096038818359375, 0.10778427124023438, 0.11952972412109375, 0.13127517700195312, 0.1430206298828125, 0.15476608276367188, 0.16651153564453125, 0.17825698852539062, 0.19000244140625, 0.20174789428710938, 0.21349334716796875, 0.22523880004882812, 0.2369842529296875, 0.24872970581054688, 0.26047515869140625, 0.2722206115722656, 0.283966064453125, 0.2957115173339844, 0.30745697021484375, 0.3192024230957031, 0.3309478759765625, 0.3426933288574219, 0.35443878173828125, 0.3661842346191406, 0.3779296875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 5.0, 6.0, 4.0, 5.0, 14.0, 13.0, 16.0, 25.0, 32.0, 41.0, 55.0, 83.0, 107.0, 148.0, 216.0, 287.0, 471.0, 765.0, 1352.0, 3118.0, 12097.0, 106648.0, 822981.0, 83297.0, 10430.0, 2788.0, 1300.0, 747.0, 455.0, 277.0, 210.0, 132.0, 119.0, 73.0, 55.0, 44.0, 30.0, 29.0, 24.0, 9.0, 12.0, 6.0, 8.0, 5.0, 2.0, 4.0, 0.0, 6.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.123046875, -1.0849151611328125, -1.046783447265625, -1.0086517333984375, -0.97052001953125, -0.9323883056640625, -0.894256591796875, -0.8561248779296875, -0.8179931640625, -0.7798614501953125, -0.741729736328125, -0.7035980224609375, -0.66546630859375, -0.6273345947265625, -0.589202880859375, -0.5510711669921875, -0.512939453125, -0.4748077392578125, -0.436676025390625, -0.3985443115234375, -0.36041259765625, -0.3222808837890625, -0.284149169921875, -0.2460174560546875, -0.2078857421875, -0.1697540283203125, -0.131622314453125, -0.0934906005859375, -0.05535888671875, -0.0172271728515625, 0.020904541015625, 0.0590362548828125, 0.09716796875, 0.1352996826171875, 0.173431396484375, 0.2115631103515625, 0.24969482421875, 0.2878265380859375, 0.325958251953125, 0.3640899658203125, 0.4022216796875, 0.4403533935546875, 0.478485107421875, 0.5166168212890625, 0.55474853515625, 0.5928802490234375, 0.631011962890625, 0.6691436767578125, 0.707275390625, 0.7454071044921875, 0.783538818359375, 0.8216705322265625, 0.85980224609375, 0.8979339599609375, 0.936065673828125, 0.9741973876953125, 1.0123291015625, 1.0504608154296875, 1.088592529296875, 1.1267242431640625, 1.16485595703125, 1.2029876708984375, 1.241119384765625, 1.2792510986328125, 1.3173828125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 6.0, 3.0, 5.0, 7.0, 8.0, 20.0, 15.0, 17.0, 17.0, 22.0, 30.0, 29.0, 41.0, 33.0, 44.0, 68.0, 75.0, 72.0, 65.0, 53.0, 54.0, 46.0, 47.0, 38.0, 42.0, 25.0, 26.0, 15.0, 14.0, 16.0, 8.0, 5.0, 5.0, 7.0, 7.0, 5.0, 2.0, 2.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5498046875, -1.5025177001953125, -1.455230712890625, -1.4079437255859375, -1.36065673828125, -1.3133697509765625, -1.266082763671875, -1.2187957763671875, -1.1715087890625, -1.1242218017578125, -1.076934814453125, -1.0296478271484375, -0.98236083984375, -0.9350738525390625, -0.887786865234375, -0.8404998779296875, -0.793212890625, -0.7459259033203125, -0.698638916015625, -0.6513519287109375, -0.60406494140625, -0.5567779541015625, -0.509490966796875, -0.4622039794921875, -0.4149169921875, -0.3676300048828125, -0.320343017578125, -0.2730560302734375, -0.22576904296875, -0.1784820556640625, -0.131195068359375, -0.0839080810546875, -0.03662109375, 0.0106658935546875, 0.057952880859375, 0.1052398681640625, 0.15252685546875, 0.1998138427734375, 0.247100830078125, 0.2943878173828125, 0.3416748046875, 0.3889617919921875, 0.436248779296875, 0.4835357666015625, 0.53082275390625, 0.5781097412109375, 0.625396728515625, 0.6726837158203125, 0.719970703125, 0.7672576904296875, 0.814544677734375, 0.8618316650390625, 0.90911865234375, 0.9564056396484375, 1.003692626953125, 1.0509796142578125, 1.0982666015625, 1.1455535888671875, 1.192840576171875, 1.2401275634765625, 1.28741455078125, 1.3347015380859375, 1.381988525390625, 1.4292755126953125, 1.4765625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 6.0, 4.0, 11.0, 16.0, 16.0, 22.0, 38.0, 58.0, 95.0, 185.0, 443.0, 1019.0, 3183.0, 13945.0, 104359.0, 826193.0, 82502.0, 11945.0, 2707.0, 931.0, 351.0, 194.0, 117.0, 80.0, 48.0, 22.0, 19.0, 18.0, 13.0, 6.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.346435546875, -0.3360786437988281, -0.32572174072265625, -0.3153648376464844, -0.3050079345703125, -0.2946510314941406, -0.28429412841796875, -0.2739372253417969, -0.263580322265625, -0.2532234191894531, -0.24286651611328125, -0.23250961303710938, -0.2221527099609375, -0.21179580688476562, -0.20143890380859375, -0.19108200073242188, -0.18072509765625, -0.17036819458007812, -0.16001129150390625, -0.14965438842773438, -0.1392974853515625, -0.12894058227539062, -0.11858367919921875, -0.10822677612304688, -0.097869873046875, -0.08751296997070312, -0.07715606689453125, -0.06679916381835938, -0.0564422607421875, -0.046085357666015625, -0.03572845458984375, -0.025371551513671875, -0.0150146484375, -0.004657745361328125, 0.00569915771484375, 0.016056060791015625, 0.0264129638671875, 0.036769866943359375, 0.04712677001953125, 0.057483673095703125, 0.067840576171875, 0.07819747924804688, 0.08855438232421875, 0.09891128540039062, 0.1092681884765625, 0.11962509155273438, 0.12998199462890625, 0.14033889770507812, 0.15069580078125, 0.16105270385742188, 0.17140960693359375, 0.18176651000976562, 0.1921234130859375, 0.20248031616210938, 0.21283721923828125, 0.22319412231445312, 0.233551025390625, 0.24390792846679688, 0.25426483154296875, 0.2646217346191406, 0.2749786376953125, 0.2853355407714844, 0.29569244384765625, 0.3060493469238281, 0.31640625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 12.0, 13.0, 11.0, 15.0, 21.0, 23.0, 20.0, 25.0, 28.0, 46.0, 47.0, 69.0, 61.0, 94.0, 80.0, 66.0, 49.0, 56.0, 42.0, 34.0, 26.0, 30.0, 19.0, 17.0, 13.0, 10.0, 10.0, 10.0, 7.0, 6.0, 6.0, 1.0, 3.0, 4.0, 4.0, 0.0, 5.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.125999450683594e-05, -4.9598515033721924e-05, -4.793703556060791e-05, -4.6275556087493896e-05, -4.461407661437988e-05, -4.295259714126587e-05, -4.1291117668151855e-05, -3.962963819503784e-05, -3.796815872192383e-05, -3.6306679248809814e-05, -3.46451997756958e-05, -3.298372030258179e-05, -3.1322240829467773e-05, -2.966076135635376e-05, -2.7999281883239746e-05, -2.6337802410125732e-05, -2.467632293701172e-05, -2.3014843463897705e-05, -2.135336399078369e-05, -1.9691884517669678e-05, -1.8030405044555664e-05, -1.636892557144165e-05, -1.4707446098327637e-05, -1.3045966625213623e-05, -1.138448715209961e-05, -9.723007678985596e-06, -8.061528205871582e-06, -6.400048732757568e-06, -4.738569259643555e-06, -3.077089786529541e-06, -1.4156103134155273e-06, 2.4586915969848633e-07, 1.9073486328125e-06, 3.5688281059265137e-06, 5.230307579040527e-06, 6.891787052154541e-06, 8.553266525268555e-06, 1.0214745998382568e-05, 1.1876225471496582e-05, 1.3537704944610596e-05, 1.519918441772461e-05, 1.6860663890838623e-05, 1.8522143363952637e-05, 2.018362283706665e-05, 2.1845102310180664e-05, 2.3506581783294678e-05, 2.516806125640869e-05, 2.6829540729522705e-05, 2.849102020263672e-05, 3.0152499675750732e-05, 3.1813979148864746e-05, 3.347545862197876e-05, 3.5136938095092773e-05, 3.679841756820679e-05, 3.84598970413208e-05, 4.0121376514434814e-05, 4.178285598754883e-05, 4.344433546066284e-05, 4.5105814933776855e-05, 4.676729440689087e-05, 4.842877388000488e-05, 5.0090253353118896e-05, 5.175173282623291e-05, 5.3413212299346924e-05, 5.507469177246094e-05]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 7.0, 9.0, 25.0, 32.0, 63.0, 93.0, 213.0, 483.0, 1031.0, 2874.0, 12381.0, 114285.0, 833485.0, 70880.0, 8844.0, 2262.0, 779.0, 376.0, 203.0, 83.0, 55.0, 38.0, 23.0, 12.0, 3.0, 6.0, 6.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.376220703125, -0.3658866882324219, -0.35555267333984375, -0.3452186584472656, -0.3348846435546875, -0.3245506286621094, -0.31421661376953125, -0.3038825988769531, -0.293548583984375, -0.2832145690917969, -0.27288055419921875, -0.2625465393066406, -0.2522125244140625, -0.24187850952148438, -0.23154449462890625, -0.22121047973632812, -0.21087646484375, -0.20054244995117188, -0.19020843505859375, -0.17987442016601562, -0.1695404052734375, -0.15920639038085938, -0.14887237548828125, -0.13853836059570312, -0.128204345703125, -0.11787033081054688, -0.10753631591796875, -0.09720230102539062, -0.0868682861328125, -0.07653427124023438, -0.06620025634765625, -0.055866241455078125, -0.0455322265625, -0.035198211669921875, -0.02486419677734375, -0.014530181884765625, -0.0041961669921875, 0.006137847900390625, 0.01647186279296875, 0.026805877685546875, 0.037139892578125, 0.047473907470703125, 0.05780792236328125, 0.06814193725585938, 0.0784759521484375, 0.08880996704101562, 0.09914398193359375, 0.10947799682617188, 0.11981201171875, 0.13014602661132812, 0.14048004150390625, 0.15081405639648438, 0.1611480712890625, 0.17148208618164062, 0.18181610107421875, 0.19215011596679688, 0.202484130859375, 0.21281814575195312, 0.22315216064453125, 0.23348617553710938, 0.2438201904296875, 0.2541542053222656, 0.26448822021484375, 0.2748222351074219, 0.28515625]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 7.0, 8.0, 15.0, 21.0, 20.0, 28.0, 44.0, 38.0, 58.0, 82.0, 90.0, 98.0, 88.0, 86.0, 70.0, 63.0, 56.0, 39.0, 23.0, 23.0, 10.0, 12.0, 5.0, 6.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.146240234375, -0.13813400268554688, -0.13002777099609375, -0.12192153930664062, -0.1138153076171875, -0.10570907592773438, -0.09760284423828125, -0.08949661254882812, -0.081390380859375, -0.07328414916992188, -0.06517791748046875, -0.057071685791015625, -0.0489654541015625, -0.040859222412109375, -0.03275299072265625, -0.024646759033203125, -0.01654052734375, -0.008434295654296875, -0.00032806396484375, 0.007778167724609375, 0.0158843994140625, 0.023990631103515625, 0.03209686279296875, 0.040203094482421875, 0.048309326171875, 0.056415557861328125, 0.06452178955078125, 0.07262802124023438, 0.0807342529296875, 0.08884048461914062, 0.09694671630859375, 0.10505294799804688, 0.1131591796875, 0.12126541137695312, 0.12937164306640625, 0.13747787475585938, 0.1455841064453125, 0.15369033813476562, 0.16179656982421875, 0.16990280151367188, 0.178009033203125, 0.18611526489257812, 0.19422149658203125, 0.20232772827148438, 0.2104339599609375, 0.21854019165039062, 0.22664642333984375, 0.23475265502929688, 0.24285888671875, 0.2509651184082031, 0.25907135009765625, 0.2671775817871094, 0.2752838134765625, 0.2833900451660156, 0.29149627685546875, 0.2996025085449219, 0.307708740234375, 0.3158149719238281, 0.32392120361328125, 0.3320274353027344, 0.3401336669921875, 0.3482398986816406, 0.35634613037109375, 0.3644523620605469, 0.37255859375]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 3.0, 4.0, 5.0, 10.0, 20.0, 49.0, 116.0, 336.0, 269.0, 91.0, 50.0, 24.0, 16.0, 7.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.845046997070312, -8.539398193359375, -8.233749389648438, -7.928099632263184, -7.622450828552246, -7.316802024841309, -7.011152744293213, -6.705503463745117, -6.39985466003418, -6.094205856323242, -5.7885565757751465, -5.482907295227051, -5.177258491516113, -4.871609687805176, -4.56596040725708, -4.260311126708984, -3.954662322998047, -3.6490132808685303, -3.3433642387390137, -3.037715196609497, -2.7320661544799805, -2.426417112350464, -2.1207680702209473, -1.8151190280914307, -1.509469985961914, -1.2038209438323975, -0.8981719017028809, -0.5925228595733643, -0.28687381744384766, 0.018775224685668945, 0.32442426681518555, 0.6300733089447021, 0.9357233047485352, 1.2413723468780518, 1.5470213890075684, 1.852670431137085, 2.1583194732666016, 2.463968515396118, 2.7696175575256348, 3.0752665996551514, 3.380915641784668, 3.6865646839141846, 3.992213726043701, 4.297863006591797, 4.603511810302734, 4.909160614013672, 5.214809894561768, 5.520459175109863, 5.826107978820801, 6.131756782531738, 6.437406063079834, 6.74305534362793, 7.048704147338867, 7.354352951049805, 7.6600022315979, 7.965651512145996, 8.271300315856934, 8.576949119567871, 8.882598876953125, 9.188247680664062, 9.493896484375, 9.799545288085938, 10.105194091796875, 10.410843849182129, 10.716492652893066]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 6.0, 4.0, 2.0, 5.0, 8.0, 5.0, 11.0, 13.0, 18.0, 12.0, 12.0, 23.0, 19.0, 22.0, 31.0, 26.0, 37.0, 25.0, 50.0, 99.0, 166.0, 89.0, 47.0, 39.0, 26.0, 22.0, 21.0, 22.0, 18.0, 19.0, 21.0, 14.0, 7.0, 6.0, 8.0, 8.0, 8.0, 7.0, 2.0, 4.0, 3.0, 2.0, 9.0, 3.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.150975227355957, -4.969811916351318, -4.788649082183838, -4.607485771179199, -4.426322937011719, -4.24515962600708, -4.063996315002441, -3.882833480834961, -3.7016704082489014, -3.520507335662842, -3.3393442630767822, -3.1581811904907227, -2.977017879486084, -2.7958550453186035, -2.614691734313965, -2.4335286617279053, -2.2523655891418457, -2.071202516555786, -1.8900394439697266, -1.7088762521743774, -1.5277131795883179, -1.3465501070022583, -1.1653869152069092, -0.9842238426208496, -0.80306077003479, -0.6218976974487305, -0.4407345652580261, -0.2595714330673218, -0.07840836048126221, 0.10275471210479736, 0.2839179039001465, 0.46508097648620605, 0.6462440490722656, 0.8274071216583252, 1.0085701942443848, 1.1897333860397339, 1.3708964586257935, 1.552059531211853, 1.7332227230072021, 1.9143857955932617, 2.0955488681793213, 2.276711940765381, 2.4578750133514404, 2.6390380859375, 2.8202013969421387, 3.001364231109619, 3.182527542114258, 3.3636906147003174, 3.544853687286377, 3.7260167598724365, 3.907179832458496, 4.088343143463135, 4.269505977630615, 4.450669288635254, 4.631832122802734, 4.812995433807373, 4.994158744812012, 5.17532205581665, 5.356484889984131, 5.5376482009887695, 5.71881103515625, 5.899974346160889, 6.081137657165527, 6.262300491333008, 6.443463325500488]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 4.0, 5.0, 7.0, 12.0, 10.0, 12.0, 26.0, 47.0, 71.0, 145.0, 244.0, 512.0, 943.0, 2315.0, 6326.0, 26968.0, 222531.0, 1947236.0, 1757586.0, 193331.0, 25206.0, 6272.0, 2332.0, 1036.0, 520.0, 257.0, 129.0, 80.0, 47.0, 26.0, 13.0, 10.0, 5.0, 7.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.85986328125, -0.8352737426757812, -0.8106842041015625, -0.7860946655273438, -0.761505126953125, -0.7369155883789062, -0.7123260498046875, -0.6877365112304688, -0.66314697265625, -0.6385574340820312, -0.6139678955078125, -0.5893783569335938, -0.564788818359375, -0.5401992797851562, -0.5156097412109375, -0.49102020263671875, -0.4664306640625, -0.44184112548828125, -0.4172515869140625, -0.39266204833984375, -0.368072509765625, -0.34348297119140625, -0.3188934326171875, -0.29430389404296875, -0.26971435546875, -0.24512481689453125, -0.2205352783203125, -0.19594573974609375, -0.171356201171875, -0.14676666259765625, -0.1221771240234375, -0.09758758544921875, -0.072998046875, -0.04840850830078125, -0.0238189697265625, 0.00077056884765625, 0.025360107421875, 0.04994964599609375, 0.0745391845703125, 0.09912872314453125, 0.12371826171875, 0.14830780029296875, 0.1728973388671875, 0.19748687744140625, 0.222076416015625, 0.24666595458984375, 0.2712554931640625, 0.29584503173828125, 0.3204345703125, 0.34502410888671875, 0.3696136474609375, 0.39420318603515625, 0.418792724609375, 0.44338226318359375, 0.4679718017578125, 0.49256134033203125, 0.51715087890625, 0.5417404174804688, 0.5663299560546875, 0.5909194946289062, 0.615509033203125, 0.6400985717773438, 0.6646881103515625, 0.6892776489257812, 0.7138671875]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 6.0, 7.0, 7.0, 9.0, 11.0, 18.0, 19.0, 20.0, 23.0, 31.0, 42.0, 37.0, 42.0, 71.0, 64.0, 60.0, 58.0, 66.0, 60.0, 49.0, 45.0, 35.0, 34.0, 24.0, 36.0, 24.0, 26.0, 19.0, 15.0, 12.0, 6.0, 3.0, 7.0, 5.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.361572265625, -0.3504638671875, -0.33935546875, -0.3282470703125, -0.317138671875, -0.3060302734375, -0.294921875, -0.2838134765625, -0.272705078125, -0.2615966796875, -0.25048828125, -0.2393798828125, -0.228271484375, -0.2171630859375, -0.2060546875, -0.1949462890625, -0.183837890625, -0.1727294921875, -0.16162109375, -0.1505126953125, -0.139404296875, -0.1282958984375, -0.1171875, -0.1060791015625, -0.094970703125, -0.0838623046875, -0.07275390625, -0.0616455078125, -0.050537109375, -0.0394287109375, -0.0283203125, -0.0172119140625, -0.006103515625, 0.0050048828125, 0.01611328125, 0.0272216796875, 0.038330078125, 0.0494384765625, 0.060546875, 0.0716552734375, 0.082763671875, 0.0938720703125, 0.10498046875, 0.1160888671875, 0.127197265625, 0.1383056640625, 0.1494140625, 0.1605224609375, 0.171630859375, 0.1827392578125, 0.19384765625, 0.2049560546875, 0.216064453125, 0.2271728515625, 0.23828125, 0.2493896484375, 0.260498046875, 0.2716064453125, 0.28271484375, 0.2938232421875, 0.304931640625, 0.3160400390625, 0.3271484375, 0.3382568359375, 0.349365234375]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 6.0, 6.0, 6.0, 10.0, 8.0, 22.0, 20.0, 40.0, 47.0, 66.0, 83.0, 131.0, 211.0, 350.0, 604.0, 1140.0, 2553.0, 7780.0, 47250.0, 2319179.0, 1760310.0, 42195.0, 7201.0, 2410.0, 1094.0, 580.0, 318.0, 229.0, 150.0, 93.0, 51.0, 36.0, 30.0, 19.0, 17.0, 10.0, 8.0, 8.0, 8.0, 2.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6005859375, -1.5534820556640625, -1.506378173828125, -1.4592742919921875, -1.41217041015625, -1.3650665283203125, -1.317962646484375, -1.2708587646484375, -1.2237548828125, -1.1766510009765625, -1.129547119140625, -1.0824432373046875, -1.03533935546875, -0.9882354736328125, -0.941131591796875, -0.8940277099609375, -0.846923828125, -0.7998199462890625, -0.752716064453125, -0.7056121826171875, -0.65850830078125, -0.6114044189453125, -0.564300537109375, -0.5171966552734375, -0.4700927734375, -0.4229888916015625, -0.375885009765625, -0.3287811279296875, -0.28167724609375, -0.2345733642578125, -0.187469482421875, -0.1403656005859375, -0.09326171875, -0.0461578369140625, 0.000946044921875, 0.0480499267578125, 0.09515380859375, 0.1422576904296875, 0.189361572265625, 0.2364654541015625, 0.2835693359375, 0.3306732177734375, 0.377777099609375, 0.4248809814453125, 0.47198486328125, 0.5190887451171875, 0.566192626953125, 0.6132965087890625, 0.660400390625, 0.7075042724609375, 0.754608154296875, 0.8017120361328125, 0.84881591796875, 0.8959197998046875, 0.943023681640625, 0.9901275634765625, 1.0372314453125, 1.0843353271484375, 1.131439208984375, 1.1785430908203125, 1.22564697265625, 1.2727508544921875, 1.319854736328125, 1.3669586181640625, 1.4140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 8.0, 4.0, 5.0, 9.0, 12.0, 15.0, 13.0, 33.0, 48.0, 61.0, 80.0, 120.0, 179.0, 229.0, 337.0, 468.0, 558.0, 558.0, 407.0, 276.0, 209.0, 149.0, 92.0, 67.0, 44.0, 34.0, 21.0, 16.0, 11.0, 10.0, 2.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.4072265625, -1.3709259033203125, -1.334625244140625, -1.2983245849609375, -1.26202392578125, -1.2257232666015625, -1.189422607421875, -1.1531219482421875, -1.1168212890625, -1.0805206298828125, -1.044219970703125, -1.0079193115234375, -0.97161865234375, -0.9353179931640625, -0.899017333984375, -0.8627166748046875, -0.826416015625, -0.7901153564453125, -0.753814697265625, -0.7175140380859375, -0.68121337890625, -0.6449127197265625, -0.608612060546875, -0.5723114013671875, -0.5360107421875, -0.4997100830078125, -0.463409423828125, -0.4271087646484375, -0.39080810546875, -0.3545074462890625, -0.318206787109375, -0.2819061279296875, -0.24560546875, -0.2093048095703125, -0.173004150390625, -0.1367034912109375, -0.10040283203125, -0.0641021728515625, -0.027801513671875, 0.0084991455078125, 0.0447998046875, 0.0811004638671875, 0.117401123046875, 0.1537017822265625, 0.19000244140625, 0.2263031005859375, 0.262603759765625, 0.2989044189453125, 0.335205078125, 0.3715057373046875, 0.407806396484375, 0.4441070556640625, 0.48040771484375, 0.5167083740234375, 0.553009033203125, 0.5893096923828125, 0.6256103515625, 0.6619110107421875, 0.698211669921875, 0.7345123291015625, 0.77081298828125, 0.8071136474609375, 0.843414306640625, 0.8797149658203125, 0.916015625]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 3.0, 4.0, 6.0, 8.0, 14.0, 30.0, 56.0, 112.0, 232.0, 262.0, 143.0, 76.0, 26.0, 15.0, 7.0, 2.0, 5.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.248767852783203, -19.638607025146484, -19.028446197509766, -18.418283462524414, -17.808122634887695, -17.197961807250977, -16.587799072265625, -15.977638244628906, -15.367477416992188, -14.757316589355469, -14.147154808044434, -13.536993026733398, -12.92683219909668, -12.316671371459961, -11.706509590148926, -11.09634780883789, -10.486186981201172, -9.876026153564453, -9.265864372253418, -8.655702590942383, -8.045541763305664, -7.435380458831787, -6.82521915435791, -6.215057849884033, -5.604896545410156, -4.994735240936279, -4.384573936462402, -3.7744126319885254, -3.1642513275146484, -2.5540900230407715, -1.9439287185668945, -1.3337674140930176, -0.7236080169677734, -0.11344671249389648, 0.49671459197998047, 1.1068758964538574, 1.7170372009277344, 2.3271985054016113, 2.9373598098754883, 3.5475211143493652, 4.157682418823242, 4.767843723297119, 5.378005027770996, 5.988166332244873, 6.59832763671875, 7.208488941192627, 7.818650245666504, 8.428812026977539, 9.038972854614258, 9.649133682250977, 10.259295463562012, 10.869457244873047, 11.479618072509766, 12.089778900146484, 12.69994068145752, 13.310102462768555, 13.920263290405273, 14.530424118041992, 15.140585899353027, 15.750747680664062, 16.36090850830078, 16.9710693359375, 17.58123016357422, 18.19139289855957, 18.80155372619629]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 4.0, 1.0, 5.0, 5.0, 13.0, 11.0, 14.0, 14.0, 20.0, 27.0, 20.0, 22.0, 26.0, 28.0, 31.0, 26.0, 32.0, 32.0, 34.0, 57.0, 64.0, 70.0, 43.0, 42.0, 50.0, 32.0, 32.0, 30.0, 34.0, 25.0, 34.0, 25.0, 14.0, 15.0, 19.0, 8.0, 8.0, 8.0, 5.0, 4.0, 6.0, 3.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.119375228881836, -5.936121940612793, -5.752869129180908, -5.569616317749023, -5.3863630294799805, -5.2031097412109375, -5.019856929779053, -4.836604118347168, -4.653350830078125, -4.470097541809082, -4.286844730377197, -4.1035919189453125, -3.9203386306762695, -3.7370855808258057, -3.553832530975342, -3.370579481124878, -3.187326431274414, -3.00407338142395, -2.8208203315734863, -2.6375672817230225, -2.4543142318725586, -2.2710611820220947, -2.087808132171631, -1.904555082321167, -1.7213020324707031, -1.5380489826202393, -1.3547959327697754, -1.1715428829193115, -0.9882898330688477, -0.8050367832183838, -0.6217837333679199, -0.43853068351745605, -0.2552781105041504, -0.07202506065368652, 0.11122798919677734, 0.2944810390472412, 0.4777340888977051, 0.660987138748169, 0.8442401885986328, 1.0274932384490967, 1.2107462882995605, 1.3939993381500244, 1.5772523880004883, 1.7605054378509521, 1.943758487701416, 2.12701153755188, 2.3102645874023438, 2.4935176372528076, 2.6767706871032715, 2.8600237369537354, 3.043276786804199, 3.226529836654663, 3.409782886505127, 3.593035936355591, 3.7762889862060547, 3.9595420360565186, 4.142795085906982, 4.326047897338867, 4.50930118560791, 4.692554473876953, 4.875807285308838, 5.059060096740723, 5.242313385009766, 5.425566673278809, 5.608819484710693]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 6.0, 8.0, 10.0, 14.0, 13.0, 9.0, 20.0, 35.0, 52.0, 86.0, 142.0, 243.0, 395.0, 704.0, 1478.0, 3029.0, 7000.0, 18730.0, 63785.0, 359880.0, 481678.0, 75037.0, 21450.0, 7862.0, 3395.0, 1578.0, 795.0, 425.0, 254.0, 145.0, 99.0, 53.0, 26.0, 27.0, 21.0, 19.0, 6.0, 9.0, 15.0, 3.0, 5.0, 6.0, 2.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.72216796875, -0.7009658813476562, -0.6797637939453125, -0.6585617065429688, -0.637359619140625, -0.6161575317382812, -0.5949554443359375, -0.5737533569335938, -0.55255126953125, -0.5313491821289062, -0.5101470947265625, -0.48894500732421875, -0.467742919921875, -0.44654083251953125, -0.4253387451171875, -0.40413665771484375, -0.3829345703125, -0.36173248291015625, -0.3405303955078125, -0.31932830810546875, -0.298126220703125, -0.27692413330078125, -0.2557220458984375, -0.23451995849609375, -0.21331787109375, -0.19211578369140625, -0.1709136962890625, -0.14971160888671875, -0.128509521484375, -0.10730743408203125, -0.0861053466796875, -0.06490325927734375, -0.043701171875, -0.02249908447265625, -0.0012969970703125, 0.01990509033203125, 0.041107177734375, 0.06230926513671875, 0.0835113525390625, 0.10471343994140625, 0.12591552734375, 0.14711761474609375, 0.1683197021484375, 0.18952178955078125, 0.210723876953125, 0.23192596435546875, 0.2531280517578125, 0.27433013916015625, 0.2955322265625, 0.31673431396484375, 0.3379364013671875, 0.35913848876953125, 0.380340576171875, 0.40154266357421875, 0.4227447509765625, 0.44394683837890625, 0.46514892578125, 0.48635101318359375, 0.5075531005859375, 0.5287551879882812, 0.549957275390625, 0.5711593627929688, 0.5923614501953125, 0.6135635375976562, 0.634765625]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 5.0, 5.0, 13.0, 9.0, 13.0, 19.0, 18.0, 21.0, 28.0, 33.0, 41.0, 45.0, 57.0, 56.0, 64.0, 55.0, 65.0, 71.0, 54.0, 42.0, 49.0, 38.0, 37.0, 30.0, 35.0, 18.0, 13.0, 13.0, 12.0, 8.0, 9.0, 5.0, 9.0, 3.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.462158203125, -0.4486351013183594, -0.43511199951171875, -0.4215888977050781, -0.4080657958984375, -0.3945426940917969, -0.38101959228515625, -0.3674964904785156, -0.353973388671875, -0.3404502868652344, -0.32692718505859375, -0.3134040832519531, -0.2998809814453125, -0.2863578796386719, -0.27283477783203125, -0.2593116760253906, -0.24578857421875, -0.23226547241210938, -0.21874237060546875, -0.20521926879882812, -0.1916961669921875, -0.17817306518554688, -0.16464996337890625, -0.15112686157226562, -0.137603759765625, -0.12408065795898438, -0.11055755615234375, -0.09703445434570312, -0.0835113525390625, -0.06998825073242188, -0.05646514892578125, -0.042942047119140625, -0.0294189453125, -0.015895843505859375, -0.00237274169921875, 0.011150360107421875, 0.0246734619140625, 0.038196563720703125, 0.05171966552734375, 0.06524276733398438, 0.078765869140625, 0.09228897094726562, 0.10581207275390625, 0.11933517456054688, 0.1328582763671875, 0.14638137817382812, 0.15990447998046875, 0.17342758178710938, 0.18695068359375, 0.20047378540039062, 0.21399688720703125, 0.22751998901367188, 0.2410430908203125, 0.2545661926269531, 0.26808929443359375, 0.2816123962402344, 0.295135498046875, 0.3086585998535156, 0.32218170166015625, 0.3357048034667969, 0.3492279052734375, 0.3627510070800781, 0.37627410888671875, 0.3897972106933594, 0.4033203125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 1.0, 8.0, 5.0, 3.0, 8.0, 14.0, 19.0, 21.0, 29.0, 54.0, 81.0, 134.0, 192.0, 310.0, 609.0, 1378.0, 4693.0, 31372.0, 882322.0, 112405.0, 10606.0, 2242.0, 838.0, 444.0, 253.0, 162.0, 110.0, 85.0, 46.0, 29.0, 33.0, 21.0, 10.0, 5.0, 6.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 4.0], "bins": [-1.2197265625, -1.187408447265625, -1.15509033203125, -1.122772216796875, -1.0904541015625, -1.058135986328125, -1.02581787109375, -0.993499755859375, -0.961181640625, -0.928863525390625, -0.89654541015625, -0.864227294921875, -0.8319091796875, -0.799591064453125, -0.76727294921875, -0.734954833984375, -0.70263671875, -0.670318603515625, -0.63800048828125, -0.605682373046875, -0.5733642578125, -0.541046142578125, -0.50872802734375, -0.476409912109375, -0.444091796875, -0.411773681640625, -0.37945556640625, -0.347137451171875, -0.3148193359375, -0.282501220703125, -0.25018310546875, -0.217864990234375, -0.185546875, -0.153228759765625, -0.12091064453125, -0.088592529296875, -0.0562744140625, -0.023956298828125, 0.00836181640625, 0.040679931640625, 0.072998046875, 0.105316162109375, 0.13763427734375, 0.169952392578125, 0.2022705078125, 0.234588623046875, 0.26690673828125, 0.299224853515625, 0.33154296875, 0.363861083984375, 0.39617919921875, 0.428497314453125, 0.4608154296875, 0.493133544921875, 0.52545166015625, 0.557769775390625, 0.590087890625, 0.622406005859375, 0.65472412109375, 0.687042236328125, 0.7193603515625, 0.751678466796875, 0.78399658203125, 0.816314697265625, 0.8486328125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 5.0, 4.0, 8.0, 7.0, 11.0, 7.0, 13.0, 14.0, 17.0, 20.0, 36.0, 29.0, 44.0, 42.0, 53.0, 84.0, 82.0, 98.0, 83.0, 68.0, 42.0, 38.0, 28.0, 26.0, 24.0, 19.0, 18.0, 14.0, 12.0, 10.0, 12.0, 5.0, 10.0, 7.0, 5.0, 5.0, 1.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.8505859375, -1.7966461181640625, -1.742706298828125, -1.6887664794921875, -1.63482666015625, -1.5808868408203125, -1.526947021484375, -1.4730072021484375, -1.4190673828125, -1.3651275634765625, -1.311187744140625, -1.2572479248046875, -1.20330810546875, -1.1493682861328125, -1.095428466796875, -1.0414886474609375, -0.987548828125, -0.9336090087890625, -0.879669189453125, -0.8257293701171875, -0.77178955078125, -0.7178497314453125, -0.663909912109375, -0.6099700927734375, -0.5560302734375, -0.5020904541015625, -0.448150634765625, -0.3942108154296875, -0.34027099609375, -0.2863311767578125, -0.232391357421875, -0.1784515380859375, -0.12451171875, -0.0705718994140625, -0.016632080078125, 0.0373077392578125, 0.09124755859375, 0.1451873779296875, 0.199127197265625, 0.2530670166015625, 0.3070068359375, 0.3609466552734375, 0.414886474609375, 0.4688262939453125, 0.52276611328125, 0.5767059326171875, 0.630645751953125, 0.6845855712890625, 0.738525390625, 0.7924652099609375, 0.846405029296875, 0.9003448486328125, 0.95428466796875, 1.0082244873046875, 1.062164306640625, 1.1161041259765625, 1.1700439453125, 1.2239837646484375, 1.277923583984375, 1.3318634033203125, 1.38580322265625, 1.4397430419921875, 1.493682861328125, 1.5476226806640625, 1.6015625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 3.0, 6.0, 7.0, 7.0, 20.0, 18.0, 30.0, 40.0, 78.0, 142.0, 287.0, 682.0, 1837.0, 6790.0, 41730.0, 905192.0, 77254.0, 10406.0, 2546.0, 788.0, 299.0, 154.0, 86.0, 54.0, 26.0, 17.0, 17.0, 10.0, 11.0, 9.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.187744140625, -0.1821613311767578, -0.17657852172851562, -0.17099571228027344, -0.16541290283203125, -0.15983009338378906, -0.15424728393554688, -0.1486644744873047, -0.1430816650390625, -0.1374988555908203, -0.13191604614257812, -0.12633323669433594, -0.12075042724609375, -0.11516761779785156, -0.10958480834960938, -0.10400199890136719, -0.098419189453125, -0.09283638000488281, -0.08725357055664062, -0.08167076110839844, -0.07608795166015625, -0.07050514221191406, -0.06492233276367188, -0.05933952331542969, -0.0537567138671875, -0.04817390441894531, -0.042591094970703125, -0.03700828552246094, -0.03142547607421875, -0.025842666625976562, -0.020259857177734375, -0.014677047729492188, -0.00909423828125, -0.0035114288330078125, 0.002071380615234375, 0.0076541900634765625, 0.01323699951171875, 0.018819808959960938, 0.024402618408203125, 0.029985427856445312, 0.0355682373046875, 0.04115104675292969, 0.046733856201171875, 0.05231666564941406, 0.05789947509765625, 0.06348228454589844, 0.06906509399414062, 0.07464790344238281, 0.080230712890625, 0.08581352233886719, 0.09139633178710938, 0.09697914123535156, 0.10256195068359375, 0.10814476013183594, 0.11372756958007812, 0.11931037902832031, 0.1248931884765625, 0.1304759979248047, 0.13605880737304688, 0.14164161682128906, 0.14722442626953125, 0.15280723571777344, 0.15839004516601562, 0.1639728546142578, 0.1695556640625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 6.0, 3.0, 8.0, 11.0, 10.0, 10.0, 15.0, 18.0, 28.0, 26.0, 29.0, 49.0, 72.0, 88.0, 120.0, 109.0, 99.0, 71.0, 57.0, 43.0, 32.0, 27.0, 15.0, 16.0, 9.0, 9.0, 7.0, 4.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.93986701965332e-05, -3.812834620475769e-05, -3.685802221298218e-05, -3.5587698221206665e-05, -3.431737422943115e-05, -3.304705023765564e-05, -3.177672624588013e-05, -3.0506402254104614e-05, -2.92360782623291e-05, -2.796575427055359e-05, -2.6695430278778076e-05, -2.5425106287002563e-05, -2.415478229522705e-05, -2.2884458303451538e-05, -2.1614134311676025e-05, -2.0343810319900513e-05, -1.9073486328125e-05, -1.7803162336349487e-05, -1.6532838344573975e-05, -1.5262514352798462e-05, -1.399219036102295e-05, -1.2721866369247437e-05, -1.1451542377471924e-05, -1.0181218385696411e-05, -8.910894393920898e-06, -7.640570402145386e-06, -6.370246410369873e-06, -5.09992241859436e-06, -3.829598426818848e-06, -2.559274435043335e-06, -1.2889504432678223e-06, -1.862645149230957e-08, 1.2516975402832031e-06, 2.522021532058716e-06, 3.7923455238342285e-06, 5.062669515609741e-06, 6.332993507385254e-06, 7.603317499160767e-06, 8.87364149093628e-06, 1.0143965482711792e-05, 1.1414289474487305e-05, 1.2684613466262817e-05, 1.395493745803833e-05, 1.5225261449813843e-05, 1.6495585441589355e-05, 1.7765909433364868e-05, 1.903623342514038e-05, 2.0306557416915894e-05, 2.1576881408691406e-05, 2.284720540046692e-05, 2.411752939224243e-05, 2.5387853384017944e-05, 2.6658177375793457e-05, 2.792850136756897e-05, 2.9198825359344482e-05, 3.0469149351119995e-05, 3.173947334289551e-05, 3.300979733467102e-05, 3.428012132644653e-05, 3.5550445318222046e-05, 3.682076930999756e-05, 3.809109330177307e-05, 3.9361417293548584e-05, 4.06317412853241e-05, 4.190206527709961e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 1.0, 3.0, 6.0, 9.0, 9.0, 16.0, 11.0, 24.0, 28.0, 48.0, 46.0, 78.0, 129.0, 195.0, 307.0, 474.0, 739.0, 1350.0, 2488.0, 5260.0, 12045.0, 35909.0, 198063.0, 717550.0, 46552.0, 14540.0, 5928.0, 2897.0, 1513.0, 874.0, 502.0, 303.0, 211.0, 143.0, 104.0, 56.0, 37.0, 21.0, 28.0, 21.0, 15.0, 7.0, 9.0, 3.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0936279296875, -0.09051704406738281, -0.08740615844726562, -0.08429527282714844, -0.08118438720703125, -0.07807350158691406, -0.07496261596679688, -0.07185173034667969, -0.0687408447265625, -0.06562995910644531, -0.06251907348632812, -0.05940818786621094, -0.05629730224609375, -0.05318641662597656, -0.050075531005859375, -0.04696464538574219, -0.043853759765625, -0.04074287414550781, -0.037631988525390625, -0.03452110290527344, -0.03141021728515625, -0.028299331665039062, -0.025188446044921875, -0.022077560424804688, -0.0189666748046875, -0.015855789184570312, -0.012744903564453125, -0.009634017944335938, -0.00652313232421875, -0.0034122467041015625, -0.000301361083984375, 0.0028095245361328125, 0.00592041015625, 0.009031295776367188, 0.012142181396484375, 0.015253067016601562, 0.01836395263671875, 0.021474838256835938, 0.024585723876953125, 0.027696609497070312, 0.0308074951171875, 0.03391838073730469, 0.037029266357421875, 0.04014015197753906, 0.04325103759765625, 0.04636192321777344, 0.049472808837890625, 0.05258369445800781, 0.055694580078125, 0.05880546569824219, 0.061916351318359375, 0.06502723693847656, 0.06813812255859375, 0.07124900817871094, 0.07435989379882812, 0.07747077941894531, 0.0805816650390625, 0.08369255065917969, 0.08680343627929688, 0.08991432189941406, 0.09302520751953125, 0.09613609313964844, 0.09924697875976562, 0.10235786437988281, 0.10546875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 4.0, 7.0, 2.0, 8.0, 5.0, 13.0, 18.0, 14.0, 21.0, 40.0, 34.0, 63.0, 69.0, 81.0, 101.0, 91.0, 77.0, 72.0, 55.0, 40.0, 34.0, 26.0, 29.0, 21.0, 12.0, 9.0, 13.0, 9.0, 5.0, 4.0, 5.0, 4.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1219482421875, -0.11815738677978516, -0.11436653137207031, -0.11057567596435547, -0.10678482055664062, -0.10299396514892578, -0.09920310974121094, -0.0954122543334961, -0.09162139892578125, -0.0878305435180664, -0.08403968811035156, -0.08024883270263672, -0.07645797729492188, -0.07266712188720703, -0.06887626647949219, -0.06508541107177734, -0.0612945556640625, -0.057503700256347656, -0.05371284484863281, -0.04992198944091797, -0.046131134033203125, -0.04234027862548828, -0.03854942321777344, -0.034758567810058594, -0.03096771240234375, -0.027176856994628906, -0.023386001586914062, -0.01959514617919922, -0.015804290771484375, -0.012013435363769531, -0.008222579956054688, -0.004431724548339844, -0.000640869140625, 0.0031499862670898438, 0.0069408416748046875, 0.010731697082519531, 0.014522552490234375, 0.01831340789794922, 0.022104263305664062, 0.025895118713378906, 0.02968597412109375, 0.033476829528808594, 0.03726768493652344, 0.04105854034423828, 0.044849395751953125, 0.04864025115966797, 0.05243110656738281, 0.056221961975097656, 0.0600128173828125, 0.06380367279052734, 0.06759452819824219, 0.07138538360595703, 0.07517623901367188, 0.07896709442138672, 0.08275794982910156, 0.0865488052368164, 0.09033966064453125, 0.0941305160522461, 0.09792137145996094, 0.10171222686767578, 0.10550308227539062, 0.10929393768310547, 0.11308479309082031, 0.11687564849853516, 0.12066650390625]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 1.0, 11.0, 13.0, 22.0, 39.0, 53.0, 88.0, 519.0, 100.0, 67.0, 27.0, 17.0, 21.0, 10.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.364222526550293, -5.18243932723999, -5.0006561279296875, -4.818872451782227, -4.637089252471924, -4.455306053161621, -4.27352237701416, -4.091739177703857, -3.9099559783935547, -3.728172779083252, -3.54638934135437, -3.3646059036254883, -3.1828227043151855, -3.001039505004883, -2.819256067276001, -2.637472629547119, -2.4556894302368164, -2.2739062309265137, -2.092122793197632, -1.9103394746780396, -1.7285561561584473, -1.546772837638855, -1.3649895191192627, -1.1832062005996704, -1.0014228820800781, -0.8196395635604858, -0.6378562450408936, -0.45607292652130127, -0.274289608001709, -0.0925062894821167, 0.08927702903747559, 0.27106034755706787, 0.45284414291381836, 0.6346274614334106, 0.8164107799530029, 0.9981940984725952, 1.1799774169921875, 1.3617607355117798, 1.543544054031372, 1.7253273725509644, 1.9071106910705566, 2.0888938903808594, 2.270677328109741, 2.452460765838623, 2.634243965148926, 2.8160271644592285, 2.9978106021881104, 3.179594039916992, 3.361377239227295, 3.5431604385375977, 3.7249438762664795, 3.9067273139953613, 4.088510513305664, 4.270293712615967, 4.4520769119262695, 4.6338605880737305, 4.815643787384033, 4.997426986694336, 5.179210662841797, 5.3609938621521, 5.542777061462402, 5.724560260772705, 5.906343460083008, 6.088127136230469, 6.2699103355407715]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 8.0, 6.0, 6.0, 4.0, 7.0, 8.0, 4.0, 8.0, 12.0, 19.0, 18.0, 21.0, 23.0, 26.0, 27.0, 25.0, 38.0, 180.0, 260.0, 83.0, 32.0, 25.0, 14.0, 15.0, 15.0, 12.0, 9.0, 10.0, 19.0, 8.0, 8.0, 10.0, 8.0, 6.0, 8.0, 4.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.015302658081055, -3.894341230392456, -3.7733800411224365, -3.652418613433838, -3.5314571857452393, -3.4104957580566406, -3.289534568786621, -3.1685731410980225, -3.047611713409424, -2.926650285720825, -2.8056890964508057, -2.684727668762207, -2.5637662410736084, -2.4428048133850098, -2.3218436241149902, -2.2008821964263916, -2.079921007156372, -1.958959698677063, -1.8379982709884644, -1.7170369625091553, -1.5960755348205566, -1.4751142263412476, -1.3541529178619385, -1.2331914901733398, -1.1122301816940308, -0.9912688136100769, -0.870307445526123, -0.749346137046814, -0.6283847689628601, -0.5074234008789062, -0.38646209239959717, -0.2655007243156433, -0.14453959465026855, -0.02357824146747589, 0.09738311171531677, 0.21834444999694824, 0.3393058180809021, 0.46026718616485596, 0.581228494644165, 0.7021898627281189, 0.8231512308120728, 0.9441125988960266, 1.0650739669799805, 1.1860352754592896, 1.3069965839385986, 1.4279580116271973, 1.5489193201065063, 1.6698806285858154, 1.790842056274414, 1.9118033647537231, 2.0327646732330322, 2.153726100921631, 2.2746875286102295, 2.395648956298828, 2.5166101455688477, 2.6375715732574463, 2.758533000946045, 2.8794944286346436, 3.000455617904663, 3.1214170455932617, 3.2423784732818604, 3.363339900970459, 3.4843010902404785, 3.605262517929077, 3.7262237071990967]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 5.0, 5.0, 4.0, 4.0, 8.0, 10.0, 11.0, 9.0, 11.0, 11.0, 18.0, 21.0, 34.0, 22.0, 27.0, 39.0, 33.0, 37.0, 109.0, 212.0, 85.0, 41.0, 27.0, 14.0, 32.0, 15.0, 25.0, 14.0, 21.0, 12.0, 15.0, 12.0, 13.0, 12.0, 3.0, 4.0, 7.0, 9.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.28076171875, -0.2715034484863281, -0.26224517822265625, -0.2529869079589844, -0.2437286376953125, -0.23447036743164062, -0.22521209716796875, -0.21595382690429688, -0.206695556640625, -0.19743728637695312, -0.18817901611328125, -0.17892074584960938, -0.1696624755859375, -0.16040420532226562, -0.15114593505859375, -0.14188766479492188, -0.13262939453125, -0.12337112426757812, -0.11411285400390625, -0.10485458374023438, -0.0955963134765625, -0.08633804321289062, -0.07707977294921875, -0.06782150268554688, -0.058563232421875, -0.049304962158203125, -0.04004669189453125, -0.030788421630859375, -0.0215301513671875, -0.012271881103515625, -0.00301361083984375, 0.006244659423828125, 0.0155029296875, 0.024761199951171875, 0.03401947021484375, 0.043277740478515625, 0.0525360107421875, 0.061794281005859375, 0.07105255126953125, 0.08031082153320312, 0.089569091796875, 0.09882736206054688, 0.10808563232421875, 0.11734390258789062, 0.1266021728515625, 0.13586044311523438, 0.14511871337890625, 0.15437698364257812, 0.16363525390625, 0.17289352416992188, 0.18215179443359375, 0.19141006469726562, 0.2006683349609375, 0.20992660522460938, 0.21918487548828125, 0.22844314575195312, 0.237701416015625, 0.24695968627929688, 0.25621795654296875, 0.2654762268066406, 0.2747344970703125, 0.2839927673339844, 0.29325103759765625, 0.3025093078613281, 0.311767578125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 4.0, 6.0, 12.0, 15.0, 21.0, 27.0, 70.0, 214.0, 1587.0, 91547.0, 8292478.0, 2144.0, 262.0, 79.0, 56.0, 33.0, 15.0, 5.0, 1.0, 4.0, 2.0, 0.0, 2.0, 4.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.132739067077637, -7.870877742767334, -7.609016418457031, -7.34715461730957, -7.085293292999268, -6.823431968688965, -6.561570167541504, -6.299708843231201, -6.037847518920898, -5.775986194610596, -5.514124870300293, -5.252263069152832, -4.990401744842529, -4.728540420532227, -4.466678619384766, -4.204817295074463, -3.94295597076416, -3.6810946464538574, -3.4192330837249756, -3.1573715209960938, -2.895510196685791, -2.6336488723754883, -2.3717873096466064, -2.1099257469177246, -1.8480644226074219, -1.5862029790878296, -1.3243415355682373, -1.062480092048645, -0.8006186485290527, -0.5387572050094604, -0.27689576148986816, -0.015034317970275879, 0.2468271255493164, 0.5086885690689087, 0.770550012588501, 1.0324114561080933, 1.2942728996276855, 1.5561343431472778, 1.8179957866668701, 2.079857349395752, 2.3417186737060547, 2.6035799980163574, 2.8654415607452393, 3.127303123474121, 3.389164447784424, 3.6510257720947266, 3.9128873348236084, 4.17474889755249, 4.436610221862793, 4.698471546173096, 4.960332870483398, 5.222194671630859, 5.484055995941162, 5.745917320251465, 6.007779121398926, 6.2696404457092285, 6.531501770019531, 6.793363094329834, 7.055224418640137, 7.317086219787598, 7.5789475440979, 7.840808868408203, 8.102670669555664, 8.364531517028809, 8.62639331817627]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 2.0, 2.0, 2.0, 7.0, 3.0, 7.0, 4.0, 5.0, 7.0, 3.0, 3.0, 6.0, 4.0, 4.0, 7.0, 7.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.435853958129883, -6.282285213470459, -6.128715991973877, -5.975147247314453, -5.821578025817871, -5.668009281158447, -5.514440536499023, -5.360871315002441, -5.207302570343018, -5.053733825683594, -4.900164604187012, -4.746595859527588, -4.593026638031006, -4.439457893371582, -4.285888671875, -4.132319927215576, -3.9787509441375732, -3.8251819610595703, -3.6716129779815674, -3.5180439949035645, -3.3644752502441406, -3.2109062671661377, -3.0573372840881348, -2.903768539428711, -2.750199317932129, -2.596630334854126, -2.443061351776123, -2.289492607116699, -2.1359236240386963, -1.9823546409606934, -1.8287856578826904, -1.675216794013977, -1.5216476917266846, -1.3680787086486816, -1.2145098447799683, -1.0609408617019653, -0.9073719382286072, -0.753803014755249, -0.6002340316772461, -0.4466651678085327, -0.2930961847305298, -0.13952724635601044, 0.014041692018508911, 0.16761064529418945, 0.3211795687675476, 0.47474849224090576, 0.6283174753189087, 0.7818863391876221, 0.935455322265625, 1.089024305343628, 1.2425931692123413, 1.3961621522903442, 1.5497310161590576, 1.7032999992370605, 1.8568689823150635, 2.0104379653930664, 2.1640067100524902, 2.317575693130493, 2.471144676208496, 2.62471342086792, 2.778282403945923, 2.931851387023926, 3.0854203701019287, 3.2389893531799316, 3.3925583362579346]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 1.0, 1.0, 1.0, 3.0, 6.0, 6.0, 11.0, 17.0, 15.0, 24.0, 31.0, 60.0, 72.0, 108.0, 164.0, 298.0, 611.0, 2132.0, 10046.0, 60171.0, 295552.0, 129081.0, 19904.0, 3834.0, 1032.0, 447.0, 179.0, 134.0, 96.0, 54.0, 56.0, 28.0, 23.0, 22.0, 16.0, 9.0, 6.0, 6.0, 5.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.69921875, -5.5194091796875, -5.339599609375, -5.1597900390625, -4.97998046875, -4.8001708984375, -4.620361328125, -4.4405517578125, -4.2607421875, -4.0809326171875, -3.901123046875, -3.7213134765625, -3.54150390625, -3.3616943359375, -3.181884765625, -3.0020751953125, -2.822265625, -2.6424560546875, -2.462646484375, -2.2828369140625, -2.10302734375, -1.9232177734375, -1.743408203125, -1.5635986328125, -1.3837890625, -1.2039794921875, -1.024169921875, -0.8443603515625, -0.66455078125, -0.4847412109375, -0.304931640625, -0.1251220703125, 0.0546875, 0.2344970703125, 0.414306640625, 0.5941162109375, 0.77392578125, 0.9537353515625, 1.133544921875, 1.3133544921875, 1.4931640625, 1.6729736328125, 1.852783203125, 2.0325927734375, 2.21240234375, 2.3922119140625, 2.572021484375, 2.7518310546875, 2.931640625, 3.1114501953125, 3.291259765625, 3.4710693359375, 3.65087890625, 3.8306884765625, 4.010498046875, 4.1903076171875, 4.3701171875, 4.5499267578125, 4.729736328125, 4.9095458984375, 5.08935546875, 5.2691650390625, 5.448974609375, 5.6287841796875, 5.80859375]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 6.0, 4.0, 10.0, 7.0, 13.0, 12.0, 10.0, 22.0, 20.0, 34.0, 45.0, 45.0, 42.0, 61.0, 74.0, 65.0, 85.0, 63.0, 54.0, 59.0, 45.0, 41.0, 46.0, 31.0, 24.0, 16.0, 14.0, 14.0, 7.0, 8.0, 6.0, 5.0, 6.0, 2.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.470458984375, -0.4566993713378906, -0.44293975830078125, -0.4291801452636719, -0.4154205322265625, -0.4016609191894531, -0.38790130615234375, -0.3741416931152344, -0.360382080078125, -0.3466224670410156, -0.33286285400390625, -0.3191032409667969, -0.3053436279296875, -0.2915840148925781, -0.27782440185546875, -0.2640647888183594, -0.25030517578125, -0.23654556274414062, -0.22278594970703125, -0.20902633666992188, -0.1952667236328125, -0.18150711059570312, -0.16774749755859375, -0.15398788452148438, -0.140228271484375, -0.12646865844726562, -0.11270904541015625, -0.09894943237304688, -0.0851898193359375, -0.07143020629882812, -0.05767059326171875, -0.043910980224609375, -0.0301513671875, -0.016391754150390625, -0.00263214111328125, 0.011127471923828125, 0.0248870849609375, 0.038646697998046875, 0.05240631103515625, 0.06616592407226562, 0.079925537109375, 0.09368515014648438, 0.10744476318359375, 0.12120437622070312, 0.1349639892578125, 0.14872360229492188, 0.16248321533203125, 0.17624282836914062, 0.19000244140625, 0.20376205444335938, 0.21752166748046875, 0.23128128051757812, 0.2450408935546875, 0.2588005065917969, 0.27256011962890625, 0.2863197326660156, 0.300079345703125, 0.3138389587402344, 0.32759857177734375, 0.3413581848144531, 0.3551177978515625, 0.3688774108886719, 0.38263702392578125, 0.3963966369628906, 0.41015625]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 5.0, 6.0, 2.0, 11.0, 10.0, 30.0, 62.0, 91.0, 93.0, 69.0, 46.0, 21.0, 10.0, 9.0, 3.0, 5.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9703562259674072, -2.837705612182617, -2.7050552368164062, -2.5724048614501953, -2.4397542476654053, -2.3071036338806152, -2.1744532585144043, -2.0418028831481934, -1.9091522693634033, -1.7765017747879028, -1.6438512802124023, -1.5112007856369019, -1.3785502910614014, -1.2458997964859009, -1.1132493019104004, -0.9805988073348999, -0.8479483127593994, -0.7152978181838989, -0.5826473236083984, -0.44999682903289795, -0.31734633445739746, -0.18469583988189697, -0.052045345306396484, 0.080605149269104, 0.2132556438446045, 0.345906138420105, 0.47855663299560547, 0.611207127571106, 0.7438576221466064, 0.8765081167221069, 1.0091586112976074, 1.141809105873108, 1.2744593620300293, 1.4071098566055298, 1.5397603511810303, 1.6724108457565308, 1.8050613403320312, 1.9377118349075317, 2.0703623294830322, 2.2030129432678223, 2.335663318634033, 2.468313694000244, 2.600964307785034, 2.733614921569824, 2.866265296936035, 2.998915672302246, 3.131566286087036, 3.264216899871826, 3.396867275238037, 3.529517650604248, 3.662168264389038, 3.794818878173828, 3.927469253540039, 4.06011962890625, 4.192770004272461, 4.32542085647583, 4.458071231842041, 4.590721607208252, 4.723372459411621, 4.856022834777832, 4.988673210144043, 5.121323585510254, 5.253973960876465, 5.386624813079834, 5.519275188446045]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 5.0, 1.0, 3.0, 1.0, 1.0, 3.0, 5.0, 1.0, 3.0, 4.0, 2.0, 4.0, 6.0, 4.0, 7.0, 6.0, 16.0, 35.0, 68.0, 69.0, 82.0, 61.0, 37.0, 15.0, 12.0, 7.0, 5.0, 5.0, 8.0, 2.0, 3.0, 3.0, 4.0, 0.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7188446521759033, -1.6588523387908936, -1.5988600254058838, -1.5388678312301636, -1.4788755178451538, -1.418883204460144, -1.3588908910751343, -1.298898696899414, -1.2389063835144043, -1.1789140701293945, -1.1189217567443848, -1.0589295625686646, -0.9989372491836548, -0.938944935798645, -0.8789526224136353, -0.8189603686332703, -0.7589680552482605, -0.6989757418632507, -0.6389834880828857, -0.578991174697876, -0.518998920917511, -0.4590066075325012, -0.39901432394981384, -0.33902204036712646, -0.2790297567844391, -0.2190374732017517, -0.15904518961906433, -0.09905289113521576, -0.03906060755252838, 0.02093169093132019, 0.08092397451400757, 0.14091625809669495, 0.20090854167938232, 0.2609008252620697, 0.3208931088447571, 0.38088542222976685, 0.44087767601013184, 0.5008699893951416, 0.5608623027801514, 0.6208545565605164, 0.6808468103408813, 0.7408391237258911, 0.8008313775062561, 0.8608236908912659, 0.9208159446716309, 0.9808082580566406, 1.0408005714416504, 1.1007928848266602, 1.16078519821167, 1.2207775115966797, 1.2807698249816895, 1.3407620191574097, 1.4007543325424194, 1.4607466459274292, 1.520738959312439, 1.5807311534881592, 1.640723466873169, 1.7007157802581787, 1.7607080936431885, 1.8207002878189087, 1.8806926012039185, 1.9406849145889282, 2.0006771087646484, 2.060669422149658, 2.120661735534668]}, "train/train_runtime": 4741.9429, "train/train_samples_per_second": 6.018, "train/train_steps_per_second": 0.063, "train/total_flos": 0.0, "train/train_loss": 4.266032724669485, "eval/loss": 4.213485240936279, "eval/wer": 2.3599563665212218, "eval/runtime": 939.6426, "eval/samples_per_second": 2.812, "eval/steps_per_second": 0.235, "_wandb": {"runtime": 6059}} \ No newline at end of file diff --git a/wandb/run-20220302_041332-j5suzd56/logs/debug-internal.log b/wandb/run-20220302_041332-j5suzd56/logs/debug-internal.log index 1c08921c92704d172d0a86bcc77a2f5001142b3e..d6eb6bfe50d613fd2597bac2ff11cf25f4f9d954 100644 --- a/wandb/run-20220302_041332-j5suzd56/logs/debug-internal.log +++ b/wandb/run-20220302_041332-j5suzd56/logs/debug-internal.log @@ -7502,3 +7502,117 @@ 2022-03-02 05:54:18,049 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: stop_status 2022-03-02 05:54:18,049 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: stop_status 2022-03-02 05:54:18,804 DEBUG SenderThread:252447 [sender.py:send():235] send: stats +2022-03-02 05:54:26,927 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/output.log +2022-03-02 05:54:28,928 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/output.log +2022-03-02 05:54:32,833 DEBUG SenderThread:252447 [sender.py:send():235] send: telemetry +2022-03-02 05:54:32,834 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:32,834 DEBUG SenderThread:252447 [sender.py:send():235] send: exit +2022-03-02 05:54:32,834 INFO SenderThread:252447 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 05:54:32,834 INFO SenderThread:252447 [sender.py:send_exit():373] handling runtime: 6059 +2022-03-02 05:54:32,887 INFO SenderThread:252447 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:54:32,888 INFO SenderThread:252447 [sender.py:send_exit():379] send defer +2022-03-02 05:54:32,888 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:32,888 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:32,889 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 05:54:32,889 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:32,889 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 05:54:32,889 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 1 +2022-03-02 05:54:32,889 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:32,889 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 05:54:32,929 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json +2022-03-02 05:54:32,929 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/output.log +2022-03-02 05:54:33,004 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:33,005 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:33,005 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 05:54:33,005 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 2 +2022-03-02 05:54:33,005 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:33,005 DEBUG SenderThread:252447 [sender.py:send():235] send: stats +2022-03-02 05:54:33,006 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:33,006 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 05:54:33,007 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:33,007 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 05:54:33,007 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 3 +2022-03-02 05:54:33,007 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:33,007 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 05:54:33,067 DEBUG SenderThread:252447 [sender.py:send():235] send: summary +2022-03-02 05:54:33,133 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:33,195 INFO SenderThread:252447 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:54:33,195 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:33,195 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 05:54:33,195 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 4 +2022-03-02 05:54:33,195 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:33,196 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:33,196 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 05:54:33,196 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:33,196 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 05:54:33,297 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:33,982 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json +2022-03-02 05:54:33,983 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 5 +2022-03-02 05:54:33,983 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:33,984 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:33,984 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 05:54:33,984 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:33,984 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 05:54:33,984 INFO SenderThread:252447 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 05:54:34,085 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:34,982 INFO Thread-8 :252447 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/config.yaml +2022-03-02 05:54:34,983 INFO SenderThread:252447 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files +2022-03-02 05:54:34,983 INFO SenderThread:252447 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/wandb-metadata.json wandb-metadata.json +2022-03-02 05:54:34,983 INFO SenderThread:252447 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/output.log output.log +2022-03-02 05:54:34,983 INFO SenderThread:252447 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json wandb-summary.json +2022-03-02 05:54:34,984 INFO SenderThread:252447 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/requirements.txt requirements.txt +2022-03-02 05:54:34,984 INFO SenderThread:252447 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/config.yaml config.yaml +2022-03-02 05:54:34,984 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 6 +2022-03-02 05:54:34,984 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:34,990 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:34,991 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 05:54:34,994 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:34,994 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 05:54:34,994 INFO SenderThread:252447 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 05:54:35,093 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,095 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,196 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,196 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,288 INFO Thread-12 :252447 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/output.log +2022-03-02 05:54:35,298 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,298 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,328 INFO Thread-14 :252447 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/requirements.txt +2022-03-02 05:54:35,336 INFO Thread-13 :252447 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/wandb-summary.json +2022-03-02 05:54:35,399 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,399 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,416 INFO Thread-15 :252447 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/files/config.yaml +2022-03-02 05:54:35,501 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,501 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,602 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:35,602 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:35,617 INFO Thread-7 :252447 [sender.py:transition_state():392] send defer: 7 +2022-03-02 05:54:35,617 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:35,617 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 05:54:35,618 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:35,618 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 05:54:35,703 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:36,916 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 8 +2022-03-02 05:54:36,916 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:36,917 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:36,917 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 05:54:36,917 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:36,917 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 05:54:36,917 INFO SenderThread:252447 [sender.py:transition_state():392] send defer: 9 +2022-03-02 05:54:36,918 DEBUG SenderThread:252447 [sender.py:send():235] send: final +2022-03-02 05:54:36,919 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: defer +2022-03-02 05:54:36,920 DEBUG SenderThread:252447 [sender.py:send():235] send: footer +2022-03-02 05:54:36,920 INFO HandlerThread:252447 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 05:54:36,920 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: defer +2022-03-02 05:54:36,920 INFO SenderThread:252447 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 05:54:37,018 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 05:54:37,018 DEBUG SenderThread:252447 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 05:54:37,018 INFO SenderThread:252447 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 05:54:37,081 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 05:54:37,184 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 05:54:37,187 DEBUG HandlerThread:252447 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 05:54:37,187 INFO HandlerThread:252447 [handler.py:finish():739] shutting down handler +2022-03-02 05:54:37,919 INFO WriterThread:252447 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb +2022-03-02 05:54:38,080 INFO SenderThread:252447 [sender.py:finish():1075] shutting down sender +2022-03-02 05:54:38,080 INFO SenderThread:252447 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 05:54:38,080 INFO SenderThread:252447 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 05:54:38,087 INFO MainThread:252447 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_041332-j5suzd56/logs/debug.log b/wandb/run-20220302_041332-j5suzd56/logs/debug.log index c066729c5200db159dc3926d3fb90fcb0b71b9ad..1e9e520cd2f7614eb0aa8ae7df141c776290e797 100644 --- a/wandb/run-20220302_041332-j5suzd56/logs/debug.log +++ b/wandb/run-20220302_041332-j5suzd56/logs/debug.log @@ -25,3 +25,117 @@ config: {} 2022-03-02 04:13:33,866 INFO MainThread:252348 [wandb_init.py:init():651] run started, returning control to user process 2022-03-02 04:13:33,868 INFO MainThread:252348 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 12, 'per_device_eval_batch_size': 12, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_04-12-50_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 12, 'eval_batch_size': 12} 2022-03-02 04:13:33,872 INFO MainThread:252348 [wandb_watch.py:watch():43] Watching +2022-03-02 05:54:30,324 INFO MainThread:252348 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 05:54:30,327 INFO MainThread:252348 [wandb_run.py:_restore():1769] restore +2022-03-02 05:54:32,889 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 05:54:33,006 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 05:54:33,196 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 05:54:33,984 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 05:54:34,991 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2391419 +} + +2022-03-02 05:54:35,095 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 914810 + total_bytes: 2692823 +} + +2022-03-02 05:54:35,197 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:35,298 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:35,400 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:35,501 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:35,603 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:36,917 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} + +2022-03-02 05:54:37,080 INFO MainThread:252348 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2692823 + total_bytes: 2692823 +} +local_info { +} + +2022-03-02 05:54:38,224 INFO MainThread:252348 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 05:54:38,225 INFO MainThread:252348 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 05:54:38,225 INFO MainThread:252348 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb b/wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb index e1569748da15793b08a2af17f728c5302a8cf316..aafb3c41f64849e5a6e8a4f9dcc7a42d1093fa21 100644 --- a/wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb +++ b/wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:229bc5195c54ed39017b8e9aa44f1db2dd40c9060bf0579751030f07deb5b4d1 -size 36421026 +oid sha256:1d110957484b68080ee103fb92c345f9656f34b2deb75d1df2b764632ebaf12a +size 37940125 diff --git a/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml b/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b825d254e02c1223459e082fa774dcbcd1fad27 --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646200556 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_05-55-14_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_055556-ymuc7hv0/files/output.log b/wandb/run-20220302_055556-ymuc7hv0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f0e97fcfe254fd8af20d1824d1a738ba639d1a08 --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/files/output.log @@ -0,0 +1,1518 @@ + + + 0%| | 0/1019 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.795, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:04,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%| | 1/1019 [00:06<1:55:02, 6.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:07,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0164, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:10,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 2/1019 [00:12<1:47:09, 6.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:13,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0042, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:16,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 3/1019 [00:18<1:45:53, 6.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:19,323 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8341, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:22,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 4/1019 [00:24<1:44:06, 6.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:25,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6488, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:28,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▍ | 5/1019 [00:30<1:41:45, 6.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:31,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:33,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 6/1019 [00:36<1:40:49, 5.97s/it] + + 1%|▍ | 6/1019 [00:36<1:40:49, 5.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:36,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:39,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 7/1019 [00:42<1:39:48, 5.92s/it] + + 1%|▌ | 7/1019 [00:42<1:39:48, 5.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:42,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7924, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:45,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 8/1019 [00:48<1:38:45, 5.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:48,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9304, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:51,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 9/1019 [00:53<1:37:34, 5.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:54,029 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:56:56,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 10/1019 [00:59<1:36:28, 5.74s/it] + + 1%|▊ | 10/1019 [00:59<1:36:28, 5.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:56:59,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:02,408 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 11/1019 [01:05<1:36:05, 5.72s/it] + + 1%|▊ | 11/1019 [01:05<1:36:05, 5.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:05,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8212, 'learning_rate': 1.8e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:08,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 12/1019 [01:10<1:35:20, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:10,875 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7876, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:13,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 13/1019 [01:16<1:34:47, 5.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:16,406 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:19,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 14/1019 [01:21<1:33:59, 5.61s/it] + + 1%|█ | 14/1019 [01:21<1:33:59, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:21,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:24,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█▏ | 15/1019 [01:27<1:32:58, 5.56s/it] + + 1%|█▏ | 15/1019 [01:27<1:32:58, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:27,358 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8406, 'learning_rate': 2.6e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:29,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▏ | 16/1019 [01:32<1:32:25, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:32,721 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:35,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 17/1019 [01:38<1:31:22, 5.47s/it] + + 2%|█▎ | 17/1019 [01:38<1:31:22, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:38,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:40,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 18/1019 [01:43<1:30:15, 5.41s/it] + + 2%|█▍ | 18/1019 [01:43<1:30:15, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:43,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:45,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 19/1019 [01:48<1:29:37, 5.38s/it] + + 2%|█▍ | 19/1019 [01:48<1:29:37, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:48,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:51,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 20/1019 [01:53<1:29:15, 5.36s/it] + + 2%|█▌ | 20/1019 [01:53<1:29:15, 5.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:53,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:57:56,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 21/1019 [01:59<1:28:23, 5.31s/it] + + 2%|█▋ | 21/1019 [01:59<1:28:23, 5.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:57:59,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:01,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 22/1019 [02:04<1:27:49, 5.29s/it] + + 2%|█▋ | 22/1019 [02:04<1:27:49, 5.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:04,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6241, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:06,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▊ | 23/1019 [02:09<1:27:28, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:09,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:12,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 24/1019 [02:14<1:26:39, 5.23s/it] + + 2%|█▊ | 24/1019 [02:14<1:26:39, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:14,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5592, 'learning_rate': 4.4e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:17,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▉ | 25/1019 [02:19<1:25:41, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:19,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:22,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 26/1019 [02:24<1:25:07, 5.14s/it] + + 3%|██ | 26/1019 [02:24<1:25:07, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:24,710 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:27,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 27/1019 [02:29<1:24:17, 5.10s/it] + + 3%|██ | 27/1019 [02:29<1:24:17, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:32,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 28/1019 [02:34<1:23:20, 5.05s/it] + + 3%|██▏ | 28/1019 [02:34<1:23:20, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:34,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:37,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 29/1019 [02:39<1:22:48, 5.02s/it] + + 3%|██▏ | 29/1019 [02:39<1:22:48, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:39,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:41,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 30/1019 [02:44<1:22:05, 4.98s/it] + + 3%|██▎ | 30/1019 [02:44<1:22:05, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:44,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:46,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 31/1019 [02:49<1:21:37, 4.96s/it] + + 3%|██▍ | 31/1019 [02:49<1:21:37, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:49,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:51,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 32/1019 [02:54<1:20:40, 4.90s/it] + + 3%|██▍ | 32/1019 [02:54<1:20:40, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:54,020 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:58:56,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3961, 'learning_rate': 6e-06, 'epoch': 0.03} + + 3%|██▌ | 33/1019 [02:59<1:19:45, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:58:58,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:00,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 34/1019 [03:03<1:18:17, 4.77s/it] + + 3%|██▋ | 34/1019 [03:03<1:18:17, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:03,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:05,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 35/1019 [03:08<1:16:57, 4.69s/it] + + 3%|██▋ | 35/1019 [03:08<1:16:57, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:07,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5288, 'learning_rate': 6.6e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:09,900 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▊ | 36/1019 [03:12<1:15:57, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:12,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4346, 'learning_rate': 6.800000000000001e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:14,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 37/1019 [03:17<1:14:52, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:16,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4915, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:18,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▉ | 38/1019 [03:21<1:12:59, 4.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:20,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6269, 'learning_rate': 7.2e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:22,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███ | 39/1019 [03:25<1:11:17, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:24,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4273, 'learning_rate': 7.4e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:26,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███ | 40/1019 [03:29<1:09:27, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:28,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2809, 'learning_rate': 7.6e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:30,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███▏ | 41/1019 [03:33<1:07:22, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:32,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4596, 'learning_rate': 7.8e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:34,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███▎ | 42/1019 [03:36<1:04:32, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:35,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3236, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:37,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 43/1019 [03:40<1:01:06, 3.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:38,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5072, 'learning_rate': 8.200000000000001e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:40,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 44/1019 [03:43<57:33, 3.54s/it] + 4%|███▍ | 44/1019 [03:43<57:33, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:41,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:43,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███▌ | 45/1019 [03:45<53:12, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:44,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.662, 'learning_rate': 8.599999999999999e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:45,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 46/1019 [03:48<48:37, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:46,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5382, 'learning_rate': 8.8e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:47,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 47/1019 [03:50<44:05, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:48,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:49,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8504, 'learning_rate': 9e-06, 'epoch': 0.05} +{'loss': 5.2149, 'learning_rate': 9.2e-06, 'epoch': 0.05} + 5%|███▊ | 48/1019 [03:52<39:45, 2.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:50,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:50,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 49/1019 [03:53<35:38, 2.20s/it] + 5%|███▉ | 49/1019 [03:53<35:38, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:51,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:59:52,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 50/1019 [03:55<34:25, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:56,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 50/1019 [03:55<34:25, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:59:56,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:01<54:22, 3.37s/it]g-point operations will not be computed-02 05:59:56,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:01<54:22, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:02,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 51/1019 [04:01<54:22, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:02,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 52/1019 [04:07<1:06:57, 4.15s/it]g-point operations will not be computed-02 06:00:02,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 52/1019 [04:07<1:06:57, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:08,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:13<1:15:26, 4.69s/it]g-point operations will not be computed-02 06:00:08,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:13<1:15:26, 4.69s/it]g-point operations will not be computed-02 06:00:08,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 53/1019 [04:13<1:15:26, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:14,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:19<1:21:05, 5.04s/it]g-point operations will not be computed-02 06:00:14,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:19<1:21:05, 5.04s/it]g-point operations will not be computed-02 06:00:14,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 54/1019 [04:19<1:21:05, 5.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:19,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:25<1:24:26, 5.26s/it]g-point operations will not be computed-02 06:00:19,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:25<1:24:26, 5.26s/it]g-point operations will not be computed-02 06:00:19,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 55/1019 [04:25<1:24:26, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:25,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:31<1:27:04, 5.43s/it]g-point operations will not be computed-02 06:00:25,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:31<1:27:04, 5.43s/it]g-point operations will not be computed-02 06:00:25,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:31<1:27:04, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:31,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 56/1019 [04:31<1:27:04, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:31,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:37<1:28:43, 5.53s/it]g-point operations will not be computed-02 06:00:31,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:37<1:28:43, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:37,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 57/1019 [04:37<1:28:43, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:37,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:42<1:29:26, 5.58s/it]g-point operations will not be computed-02 06:00:37,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:42<1:29:26, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:42,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 58/1019 [04:42<1:29:26, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:42,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:48<1:29:48, 5.61s/it]g-point operations will not be computed-02 06:00:42,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:48<1:29:48, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:48,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 59/1019 [04:48<1:29:48, 5.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:48,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 60/1019 [04:54<1:30:04, 5.64s/it]g-point operations will not be computed-02 06:00:48,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 60/1019 [04:54<1:30:04, 5.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:54,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [04:59<1:29:13, 5.59s/it]g-point operations will not be computed-02 06:00:54,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [04:59<1:29:13, 5.59s/it]g-point operations will not be computed-02 06:00:54,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [04:59<1:29:13, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:59,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 61/1019 [04:59<1:29:13, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:00:59,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:05<1:28:56, 5.58s/it]g-point operations will not be computed-02 06:00:59,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:05<1:28:56, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:05,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 62/1019 [05:05<1:28:56, 5.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:05,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 63/1019 [05:10<1:28:30, 5.55s/it]g-point operations will not be computed-02 06:01:05,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 63/1019 [05:10<1:28:30, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:10,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:16<1:28:12, 5.54s/it]g-point operations will not be computed-02 06:01:10,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:16<1:28:12, 5.54s/it]g-point operations will not be computed-02 06:01:10,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 64/1019 [05:16<1:28:12, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:16,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:21<1:27:38, 5.51s/it]g-point operations will not be computed-02 06:01:16,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:21<1:27:38, 5.51s/it]g-point operations will not be computed-02 06:01:16,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:21<1:27:38, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:21,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 65/1019 [05:21<1:27:38, 5.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:21,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 66/1019 [05:26<1:27:06, 5.48s/it]g-point operations will not be computed-02 06:01:21,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 66/1019 [05:26<1:27:06, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:27,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:32<1:27:02, 5.49s/it]g-point operations will not be computed-02 06:01:27,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:32<1:27:02, 5.49s/it]g-point operations will not be computed-02 06:01:27,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:32<1:27:02, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:32,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 67/1019 [05:32<1:27:02, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:32,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:37<1:26:08, 5.44s/it]g-point operations will not be computed-02 06:01:32,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:37<1:26:08, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 68/1019 [05:37<1:26:08, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 69/1019 [05:43<1:25:58, 5.43s/it]g-point operations will not be computed-02 06:01:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 69/1019 [05:43<1:25:58, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:43,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:48<1:25:01, 5.38s/it]g-point operations will not be computed-02 06:01:43,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:48<1:25:01, 5.38s/it]g-point operations will not be computed-02 06:01:43,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:48<1:25:01, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:48,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 70/1019 [05:48<1:25:01, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:48,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [05:53<1:24:20, 5.34s/it]g-point operations will not be computed-02 06:01:48,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 71/1019 [05:53<1:24:20, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [05:58<1:23:56, 5.32s/it]g-point operations will not be computed-02 06:01:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [05:58<1:23:56, 5.32s/it]g-point operations will not be computed-02 06:01:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [05:58<1:23:56, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:58,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 72/1019 [05:58<1:23:56, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:01:58,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 73/1019 [06:04<1:23:08, 5.27s/it]g-point operations will not be computed-02 06:01:58,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 73/1019 [06:04<1:23:08, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:04,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:09<1:22:25, 5.23s/it]g-point operations will not be computed-02 06:02:04,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:09<1:22:25, 5.23s/it]g-point operations will not be computed-02 06:02:04,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:09<1:22:25, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:09,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 74/1019 [06:09<1:22:25, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:09,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 75/1019 [06:14<1:21:54, 5.21s/it]g-point operations will not be computed-02 06:02:09,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 75/1019 [06:14<1:21:54, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:14,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:19<1:21:17, 5.17s/it]g-point operations will not be computed-02 06:02:14,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:19<1:21:17, 5.17s/it]g-point operations will not be computed-02 06:02:14,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:19<1:21:17, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:19,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 76/1019 [06:19<1:21:17, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:19,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 77/1019 [06:24<1:20:18, 5.12s/it]g-point operations will not be computed-02 06:02:19,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 77/1019 [06:24<1:20:18, 5.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:24,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:29<1:19:26, 5.07s/it]g-point operations will not be computed-02 06:02:24,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:29<1:19:26, 5.07s/it]g-point operations will not be computed-02 06:02:24,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:29<1:19:26, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:29,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 78/1019 [06:29<1:19:26, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:29,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 79/1019 [06:34<1:18:47, 5.03s/it]g-point operations will not be computed-02 06:02:29,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 79/1019 [06:34<1:18:47, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:34,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:39<1:18:03, 4.99s/it]g-point operations will not be computed-02 06:02:34,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:39<1:18:03, 4.99s/it]g-point operations will not be computed-02 06:02:34,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 80/1019 [06:39<1:18:03, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:39,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:44<1:17:24, 4.95s/it]g-point operations will not be computed-02 06:02:39,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:44<1:17:24, 4.95s/it]g-point operations will not be computed-02 06:02:39,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:44<1:17:24, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:43,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 81/1019 [06:44<1:17:24, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:43,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 82/1019 [06:48<1:16:30, 4.90s/it]g-point operations will not be computed-02 06:02:43,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 82/1019 [06:48<1:16:30, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:48,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [06:53<1:15:25, 4.83s/it]g-point operations will not be computed-02 06:02:48,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [06:53<1:15:25, 4.83s/it]g-point operations will not be computed-02 06:02:48,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 83/1019 [06:53<1:15:25, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [06:58<1:14:08, 4.76s/it]g-point operations will not be computed-02 06:02:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [06:58<1:14:08, 4.76s/it]g-point operations will not be computed-02 06:02:53,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 84/1019 [06:58<1:14:08, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:02:57,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 85/1019 [07:02<1:12:31, 4.66s/it]g-point operations will not be computed-02 06:02:57,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 85/1019 [07:02<1:12:31, 4.66s/it]g-point operations will not be computed-02 06:02:57,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 85/1019 [07:02<1:12:31, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:02,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:07<1:11:40, 4.61s/it]g-point operations will not be computed-02 06:03:02,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:07<1:11:40, 4.61s/it]g-point operations will not be computed-02 06:03:02,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:07<1:11:40, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:06,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 86/1019 [07:07<1:11:40, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:06,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:11<1:10:20, 4.53s/it]g-point operations will not be computed-02 06:03:06,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:11<1:10:20, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:10,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 87/1019 [07:11<1:10:20, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:10,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:15<1:08:52, 4.44s/it]g-point operations will not be computed-02 06:03:10,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:15<1:08:52, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:15,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 88/1019 [07:15<1:08:52, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:15,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:19<1:06:50, 4.31s/it]g-point operations will not be computed-02 06:03:15,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:19<1:06:50, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:19,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 89/1019 [07:19<1:06:50, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:19,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 90/1019 [07:23<1:04:25, 4.16s/it]g-point operations will not be computed-02 06:03:19,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:03:24,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:03:22,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:03:24,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:03:22,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3593, 'learning_rate': 1.76e-05, 'epoch': 0.09} + 9%|███████ | 91/1019 [07:27<1:01:52, 4.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:26,234 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:03:27,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 92/1019 [07:30<59:11, 3.83s/it] + 9%|███████▎ | 92/1019 [07:30<59:11, 3.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:29,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 92/1019 [07:30<59:11, 3.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:29,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 93/1019 [07:33<56:08, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:32,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:36<53:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:32,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:36<53:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:32,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:36<53:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:35,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 94/1019 [07:36<53:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:35,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:39<49:36, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:38,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 95/1019 [07:39<49:36, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:38,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 96/1019 [07:41<46:03, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:40,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 97/1019 [07:44<42:07, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:42,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 97/1019 [07:44<42:07, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:42,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2987, 'learning_rate': 1.88e-05, 'epoch': 0.1} + 10%|███████▊ | 98/1019 [07:45<38:06, 2.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:44,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 98/1019 [07:45<38:06, 2.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:44,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:03:45,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:03:45,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [07:49<32:33, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:45,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [07:49<32:33, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:50,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 100/1019 [07:49<32:33, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:50,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 101/1019 [07:55<51:16, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:50,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 101/1019 [07:55<51:16, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:55,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:01<1:02:27, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:55,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:01<1:02:27, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:03:55,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 102/1019 [08:01<1:02:27, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:01,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:07<1:10:55, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:01,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:07<1:10:55, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:01,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 103/1019 [08:07<1:10:55, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:07,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:13<1:15:38, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:07,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:13<1:15:38, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:07,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:13<1:15:38, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:13,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 104/1019 [08:13<1:15:38, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:13,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 105/1019 [08:18<1:19:10, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:13,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 105/1019 [08:18<1:19:10, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:19,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:24<1:21:50, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:19,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:24<1:21:50, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:19,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 106/1019 [08:24<1:21:50, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:24,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:30<1:23:15, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:24,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:30<1:23:15, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:24,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 107/1019 [08:30<1:23:15, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:30,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 108/1019 [08:36<1:24:03, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:30,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 108/1019 [08:36<1:24:03, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:30,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 108/1019 [08:36<1:24:03, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:36,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:41<1:24:33, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:36,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:41<1:24:33, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:36,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 109/1019 [08:41<1:24:33, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:41,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [08:47<1:24:47, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:41,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [08:47<1:24:47, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:41,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 110/1019 [08:47<1:24:47, 5.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:47,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [08:52<1:24:17, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:47,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [08:52<1:24:17, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:47,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 111/1019 [08:52<1:24:17, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:52,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [08:58<1:23:44, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:52,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [08:58<1:23:44, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:52,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 112/1019 [08:58<1:23:44, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:58,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:03<1:23:20, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:58,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:03<1:23:20, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:04:58,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 113/1019 [09:03<1:23:20, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:03,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 114/1019 [09:09<1:22:45, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:03,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 114/1019 [09:09<1:22:45, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:03,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 114/1019 [09:09<1:22:45, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:09,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:14<1:22:27, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:09,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:14<1:22:27, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:09,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:14<1:22:27, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:14,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 115/1019 [09:14<1:22:27, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:14,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 116/1019 [09:20<1:22:07, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:14,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 116/1019 [09:20<1:22:07, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:20,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:05:20,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:05:20,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 117/1019 [09:25<1:22:07, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:25,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:30<1:20:58, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:25,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:30<1:20:58, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:25,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 118/1019 [09:30<1:20:58, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:30,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:36<1:20:09, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:30,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:36<1:20:09, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:30,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 119/1019 [09:36<1:20:09, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:36,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:41<1:19:42, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:36,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:41<1:19:42, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:36,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:41<1:19:42, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:41,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:41<1:19:42, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:41,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 120/1019 [09:41<1:19:42, 5.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:41,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [09:46<1:19:23, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:41,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 121/1019 [09:46<1:19:23, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:46,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [09:51<1:18:38, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:46,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [09:51<1:18:38, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:46,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 122/1019 [09:51<1:18:38, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:51,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:05:51,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:05:51,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [09:56<1:18:16, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:51,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [09:56<1:18:16, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:56,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 123/1019 [09:56<1:18:16, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:56,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 124/1019 [10:02<1:17:43, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:05:56,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 124/1019 [10:02<1:17:43, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:01,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:07<1:16:52, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:01,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:07<1:16:52, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:01,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:07<1:16:52, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:07,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 125/1019 [10:07<1:16:52, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:07,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:12<1:16:29, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:07,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:12<1:16:29, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:12,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 126/1019 [10:12<1:16:29, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:12,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:17<1:15:44, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:12,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:17<1:15:44, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:16,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 127/1019 [10:17<1:15:44, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:16,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 128/1019 [10:22<1:14:46, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:16,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 128/1019 [10:22<1:14:46, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:21,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 128/1019 [10:22<1:14:46, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:21,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:26<1:14:11, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:21,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:26<1:14:11, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:26,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 129/1019 [10:26<1:14:11, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:26,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:31<1:13:11, 4.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:26,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:31<1:13:11, 4.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:31,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 130/1019 [10:31<1:13:11, 4.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:31,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 131/1019 [10:36<1:12:33, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:31,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 131/1019 [10:36<1:12:33, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:36,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:41<1:11:48, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:36,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:41<1:11:48, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:36,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:41<1:11:48, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:41,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 132/1019 [10:41<1:11:48, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:41,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [10:46<1:11:14, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:41,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [10:46<1:11:14, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:45,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 133/1019 [10:46<1:11:14, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:45,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [10:50<1:10:09, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:45,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [10:50<1:10:09, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:50,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 134/1019 [10:50<1:10:09, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:50,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [10:55<1:09:08, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:50,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [10:55<1:09:08, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:54,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 135/1019 [10:55<1:09:08, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:54,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 136/1019 [10:59<1:08:03, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:54,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 136/1019 [10:59<1:08:03, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:59,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 136/1019 [10:59<1:08:03, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:59,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:04<1:06:56, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:06:59,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:04<1:06:56, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:03,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 137/1019 [11:04<1:06:56, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:03,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:08<1:05:19, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:03,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:08<1:05:19, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:07,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 138/1019 [11:08<1:05:19, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:07,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:12<1:03:19, 4.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:07,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:12<1:03:19, 4.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:11,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 139/1019 [11:12<1:03:19, 4.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:11,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:16<1:01:37, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:11,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:16<1:01:37, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:15,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 140/1019 [11:16<1:01:37, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:15,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 141/1019 [11:19<59:09, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:15,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 141/1019 [11:19<59:09, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:19,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 141/1019 [11:19<59:09, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:19,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 142/1019 [11:23<56:11, 3.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:19,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 142/1019 [11:23<56:11, 3.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:19,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:26<52:57, 3.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:22,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:26<52:57, 3.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:25,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 143/1019 [11:26<52:57, 3.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:25,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 144/1019 [11:29<49:21, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:27,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 144/1019 [11:29<49:21, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:27,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 145/1019 [11:31<46:00, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:27,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 145/1019 [11:31<46:00, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:27,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 146/1019 [11:34<42:10, 2.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:30,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 146/1019 [11:34<42:10, 2.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:30,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 147/1019 [11:36<38:30, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:32,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 147/1019 [11:36<38:30, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:32,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 148/1019 [11:38<34:50, 2.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:36,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 148/1019 [11:38<34:50, 2.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:36,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 149/1019 [11:39<31:18, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:37,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 149/1019 [11:39<31:18, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:37,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:41<30:03, 2.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:37,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:41<30:03, 2.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:42,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 150/1019 [11:41<30:03, 2.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:42,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [11:47<48:33, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:42,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [11:47<48:33, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:48,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 151/1019 [11:47<48:33, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:48,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 152/1019 [11:53<59:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:48,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 152/1019 [11:53<59:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 152/1019 [11:53<59:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 152/1019 [11:53<59:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3893, 'learning_rate': 3e-05, 'epoch': 0.15} + 15%|███████████▉ | 152/1019 [11:53<59:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 154/1019 [12:05<1:11:55, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 154/1019 [12:05<1:11:55, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2352, 'learning_rate': 3.02e-05, 'epoch': 0.15} + 15%|███████████▊ | 155/1019 [12:11<1:14:47, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 155/1019 [12:11<1:14:47, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1155, 'learning_rate': 3.04e-05, 'epoch': 0.15} + 15%|███████████▊ | 155/1019 [12:11<1:14:47, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 156/1019 [12:16<1:16:46, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 156/1019 [12:16<1:16:46, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 156/1019 [12:16<1:16:46, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 157/1019 [12:22<1:18:18, 5.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 157/1019 [12:22<1:18:18, 5.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 158/1019 [12:28<1:19:21, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 158/1019 [12:28<1:19:21, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4512, 'learning_rate': 3.1e-05, 'epoch': 0.15} + 16%|████████████▏ | 159/1019 [12:33<1:19:50, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 159/1019 [12:33<1:19:50, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3256, 'learning_rate': 3.12e-05, 'epoch': 0.16} + 16%|████████████▏ | 160/1019 [12:39<1:20:05, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 160/1019 [12:39<1:20:05, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.199, 'learning_rate': 3.1400000000000004e-05, 'epoch': 0.16} + 16%|████████████▎ | 161/1019 [12:45<1:19:56, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 161/1019 [12:45<1:19:56, 5.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2251, 'learning_rate': 3.16e-05, 'epoch': 0.16} + 16%|████████████▍ | 162/1019 [12:50<1:19:23, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 162/1019 [12:50<1:19:23, 5.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3597, 'learning_rate': 3.18e-05, 'epoch': 0.16} + 16%|████████████▍ | 163/1019 [12:56<1:19:10, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 163/1019 [12:56<1:19:10, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2757, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.16} + 16%|████████████▍ | 163/1019 [12:56<1:19:10, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 163/1019 [12:56<1:19:10, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2402, 'learning_rate': 3.2200000000000003e-05, 'epoch': 0.16} + 16%|████████████▍ | 163/1019 [12:56<1:19:10, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 165/1019 [13:07<1:18:17, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 165/1019 [13:07<1:18:17, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2667, 'learning_rate': 3.24e-05, 'epoch': 0.16} + 16%|████████████▋ | 165/1019 [13:07<1:18:17, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 166/1019 [13:12<1:17:56, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 166/1019 [13:12<1:17:56, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 167/1019 [13:17<1:17:28, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 167/1019 [13:17<1:17:28, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2134, 'learning_rate': 3.2800000000000004e-05, 'epoch': 0.16} + 16%|████████████▊ | 168/1019 [13:23<1:16:56, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 168/1019 [13:23<1:16:56, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0583, 'learning_rate': 3.3e-05, 'epoch': 0.16} + 16%|████████████▊ | 168/1019 [13:23<1:16:56, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|████████████▉ | 169/1019 [13:28<1:16:22, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|████████████▉ | 169/1019 [13:28<1:16:22, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|████████████▉ | 169/1019 [13:28<1:16:22, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 170/1019 [13:33<1:15:37, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 170/1019 [13:33<1:15:37, 5.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 171/1019 [13:38<1:14:32, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████ | 171/1019 [13:38<1:14:32, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3876, 'learning_rate': 3.3600000000000004e-05, 'epoch': 0.17} + 17%|█████████████ | 171/1019 [13:38<1:14:32, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 172/1019 [13:44<1:13:59, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 172/1019 [13:44<1:13:59, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 173/1019 [13:49<1:13:04, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 173/1019 [13:49<1:13:04, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2226, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.17} + 17%|█████████████▏ | 173/1019 [13:49<1:13:04, 5.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [13:54<1:12:11, 5.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [13:54<1:12:11, 5.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 174/1019 [13:54<1:12:11, 5.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 175/1019 [13:59<1:11:23, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 175/1019 [13:59<1:11:23, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 175/1019 [13:59<1:11:23, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 176/1019 [14:04<1:10:54, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 176/1019 [14:04<1:10:54, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 177/1019 [14:09<1:10:32, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 177/1019 [14:09<1:10:32, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1949, 'learning_rate': 3.48e-05, 'epoch': 0.17} + 17%|█████████████▋ | 178/1019 [14:13<1:09:56, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 178/1019 [14:13<1:09:56, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2429, 'learning_rate': 3.5e-05, 'epoch': 0.17} + 17%|█████████████▋ | 178/1019 [14:13<1:09:56, 4.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 179/1019 [14:18<1:09:14, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 179/1019 [14:18<1:09:14, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 180/1019 [14:23<1:08:35, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 180/1019 [14:23<1:08:35, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3763, 'learning_rate': 3.54e-05, 'epoch': 0.18} + 18%|█████████████▊ | 181/1019 [14:28<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 181/1019 [14:28<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4287, 'learning_rate': 3.56e-05, 'epoch': 0.18} + 18%|█████████████▊ | 181/1019 [14:28<1:07:56, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 182/1019 [14:33<1:07:23, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 182/1019 [14:33<1:07:23, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 183/1019 [14:37<1:06:18, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 183/1019 [14:37<1:06:18, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2801, 'learning_rate': 3.6e-05, 'epoch': 0.18} + 18%|██████████████ | 184/1019 [14:42<1:05:30, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 184/1019 [14:42<1:05:30, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2117, 'learning_rate': 3.62e-05, 'epoch': 0.18} + 18%|██████████████▏ | 185/1019 [14:46<1:04:37, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 185/1019 [14:46<1:04:37, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1775, 'learning_rate': 3.6400000000000004e-05, 'epoch': 0.18} + 18%|██████████████▏ | 185/1019 [14:46<1:04:37, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 186/1019 [14:51<1:03:44, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 186/1019 [14:51<1:03:44, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 186/1019 [14:51<1:03:44, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:07:54,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 187/1019 [14:55<1:02:31, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 188/1019 [14:59<1:01:21, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 188/1019 [14:59<1:01:21, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 188/1019 [14:59<1:01:21, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 189/1019 [15:03<59:51, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 189/1019 [15:03<59:51, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.183, 'learning_rate': 3.72e-05, 'epoch': 0.19} + 19%|██████████████▉ | 190/1019 [15:07<58:02, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 190/1019 [15:07<58:02, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3126, 'learning_rate': 3.74e-05, 'epoch': 0.19} + 19%|██████████████▉ | 191/1019 [15:11<55:47, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 191/1019 [15:11<55:47, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3151, 'learning_rate': 3.76e-05, 'epoch': 0.19} + 19%|███████████████ | 192/1019 [15:15<53:33, 3.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 192/1019 [15:15<53:33, 3.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2967, 'learning_rate': 3.7800000000000004e-05, 'epoch': 0.19} + 19%|███████████████ | 192/1019 [15:15<53:33, 3.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:10:55,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 193/1019 [15:18<50:51, 3.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 194/1019 [15:21<48:05, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 194/1019 [15:21<48:05, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:11:21,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:11:21,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3432, 'learning_rate': 3.8400000000000005e-05, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 06:11:21,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:17,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 196/1019 [15:26<41:51, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:25,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 197/1019 [15:28<38:49, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:27,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 197/1019 [15:28<38:49, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:27,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 198/1019 [15:30<35:28, 2.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:29,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 198/1019 [15:30<35:28, 2.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:29,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4893, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.19} + 20%|███████████████▌ | 199/1019 [15:32<32:01, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 199/1019 [15:32<32:01, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:34<30:20, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:34<30:20, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 200/1019 [15:34<30:20, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 201/1019 [15:40<46:22, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 201/1019 [15:40<46:22, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 201/1019 [15:40<46:22, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 201/1019 [15:40<46:22, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2302, 'learning_rate': 3.9800000000000005e-05, 'epoch': 0.2} + 20%|███████████████▊ | 201/1019 [15:40<46:22, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 203/1019 [15:52<1:03:30, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 203/1019 [15:52<1:03:30, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2423, 'learning_rate': 4e-05, 'epoch': 0.2} + 20%|███████████████▌ | 204/1019 [15:58<1:08:04, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 204/1019 [15:58<1:08:04, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1356, 'learning_rate': 4.02e-05, 'epoch': 0.2} + 20%|███████████████▋ | 205/1019 [16:04<1:10:37, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:04<1:10:37, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0912, 'learning_rate': 4.0400000000000006e-05, 'epoch': 0.2} + 20%|███████████████▋ | 205/1019 [16:04<1:10:37, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 205/1019 [16:04<1:10:37, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1332, 'learning_rate': 4.0600000000000004e-05, 'epoch': 0.2} + 20%|███████████████▋ | 205/1019 [16:04<1:10:37, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 207/1019 [16:15<1:13:29, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 207/1019 [16:15<1:13:29, 5.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1937, 'learning_rate': 4.08e-05, 'epoch': 0.2} + 20%|███████████████▉ | 208/1019 [16:20<1:14:08, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 208/1019 [16:20<1:14:08, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2914, 'learning_rate': 4.1e-05, 'epoch': 0.2} + 21%|███████████████▉ | 209/1019 [16:26<1:14:09, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|███████████████▉ | 209/1019 [16:26<1:14:09, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4561, 'learning_rate': 4.12e-05, 'epoch': 0.21} + 21%|████████████████ | 210/1019 [16:32<1:14:28, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 210/1019 [16:32<1:14:28, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2576, 'learning_rate': 4.14e-05, 'epoch': 0.21} + 21%|████████████████ | 210/1019 [16:32<1:14:28, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 211/1019 [16:37<1:14:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 211/1019 [16:37<1:14:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 211/1019 [16:37<1:14:38, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1316, 'learning_rate': 4.2e-05, 'epoch': 0.21} + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 212/1019 [16:43<1:14:33, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 214/1019 [16:54<1:13:40, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 214/1019 [16:54<1:13:40, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 214/1019 [16:54<1:13:40, 5.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 215/1019 [16:59<1:13:01, 5.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 215/1019 [16:59<1:13:01, 5.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 216/1019 [17:04<1:12:24, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 216/1019 [17:04<1:12:24, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3526, 'learning_rate': 4.26e-05, 'epoch': 0.21} + 21%|████████████████▌ | 216/1019 [17:04<1:12:24, 5.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 217/1019 [17:10<1:11:53, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 217/1019 [17:10<1:11:53, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 218/1019 [17:15<1:11:49, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 218/1019 [17:15<1:11:49, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1111, 'learning_rate': 4.3e-05, 'epoch': 0.21} + 21%|████████████████▋ | 218/1019 [17:15<1:11:49, 5.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 219/1019 [17:20<1:11:35, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 219/1019 [17:20<1:11:35, 5.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1343, 'learning_rate': 4.3400000000000005e-05, 'epoch': 0.22} + [WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 221/1019 [17:31<1:09:43, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 221/1019 [17:31<1:09:43, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 222/1019 [17:36<1:08:54, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 222/1019 [17:36<1:08:54, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4093, 'learning_rate': 4.38e-05, 'epoch': 0.22} + 22%|█████████████████ | 223/1019 [17:41<1:08:31, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 223/1019 [17:41<1:08:31, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1783, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.22} + 22%|█████████████████▏ | 224/1019 [17:46<1:07:49, 5.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 224/1019 [17:46<1:07:49, 5.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3176, 'learning_rate': 4.4200000000000004e-05, 'epoch': 0.22} + 22%|█████████████████▏ | 224/1019 [17:46<1:07:49, 5.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 225/1019 [17:51<1:07:07, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 225/1019 [17:51<1:07:07, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 226/1019 [17:56<1:06:47, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 226/1019 [17:56<1:06:47, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1444, 'learning_rate': 4.46e-05, 'epoch': 0.22} + 22%|█████████████████▎ | 226/1019 [17:56<1:06:47, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 226/1019 [17:56<1:06:47, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1897, 'learning_rate': 4.4800000000000005e-05, 'epoch': 0.22} + 22%|█████████████████▎ | 226/1019 [17:56<1:06:47, 5.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 228/1019 [18:06<1:05:30, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 228/1019 [18:06<1:05:30, 4.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.095, 'learning_rate': 4.5e-05, 'epoch': 0.22} + 22%|█████████████████▌ | 229/1019 [18:10<1:04:31, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 229/1019 [18:10<1:04:31, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1946, 'learning_rate': 4.52e-05, 'epoch': 0.22} + 23%|█████████████████▌ | 230/1019 [18:15<1:03:51, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 230/1019 [18:15<1:03:51, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2182, 'learning_rate': 4.5400000000000006e-05, 'epoch': 0.23} + 23%|█████████████████▌ | 230/1019 [18:15<1:03:51, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 231/1019 [18:20<1:03:02, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 231/1019 [18:20<1:03:02, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 232/1019 [18:24<1:02:33, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 232/1019 [18:24<1:02:33, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1917, 'learning_rate': 4.58e-05, 'epoch': 0.23} + 23%|█████████████████▊ | 233/1019 [18:29<1:01:32, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 233/1019 [18:29<1:01:32, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3202, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.23} + 23%|█████████████████▉ | 234/1019 [18:33<1:00:33, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 234/1019 [18:33<1:00:33, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2299, 'learning_rate': 4.6200000000000005e-05, 'epoch': 0.23} + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3656, 'learning_rate': 4.64e-05, 'epoch': 0.23} + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1586, 'learning_rate': 4.660000000000001e-05, 'epoch': 0.23} + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▍ | 235/1019 [18:38<59:51, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 237/1019 [18:46<57:34, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 237/1019 [18:46<57:34, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 237/1019 [18:46<57:34, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 238/1019 [18:51<56:28, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 238/1019 [18:51<56:28, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 238/1019 [18:51<56:28, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 239/1019 [18:55<55:12, 4.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 239/1019 [18:55<55:12, 4.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 239/1019 [18:55<55:12, 4.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 240/1019 [18:58<53:39, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:14:59,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:14:59,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2641, 'learning_rate': 4.76e-05, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 06:14:59,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 242/1019 [19:06<49:16, 3.80s/it]g-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 242/1019 [19:06<49:16, 3.80s/it]g-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 242/1019 [19:06<49:16, 3.80s/it]g-point operations will not be computed-02 06:11:30,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 243/1019 [19:09<46:25, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 243/1019 [19:09<46:25, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 244/1019 [19:11<43:11, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 244/1019 [19:11<43:11, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:11,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:11,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:14,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:14,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:16,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:16,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:17,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:17,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:19,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:19,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:21,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:15:21,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 251/1019 [19:30<42:23, 3.31s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 251/1019 [19:30<42:23, 3.31s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0882, 'learning_rate': 4.96e-05, 'epoch': 0.25} + 25%|███████████████████▊ | 252/1019 [19:36<52:14, 4.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 252/1019 [19:36<52:14, 4.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3265, 'learning_rate': 4.9800000000000004e-05, 'epoch': 0.25} + 25%|███████████████████▊ | 252/1019 [19:36<52:14, 4.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [19:41<58:48, 4.61s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [19:41<58:48, 4.61s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [19:41<58:48, 4.61s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 253/1019 [19:41<58:48, 4.61s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9841, 'learning_rate': 5.02e-05, 'epoch': 0.25} + 25%|███████████████████▊ | 253/1019 [19:41<58:48, 4.61s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 255/1019 [19:53<1:06:26, 5.22s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 255/1019 [19:53<1:06:26, 5.22s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1311, 'learning_rate': 5.0400000000000005e-05, 'epoch': 0.25} + 25%|███████████████████▌ | 255/1019 [19:53<1:06:26, 5.22s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 256/1019 [19:59<1:08:23, 5.38s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 256/1019 [19:59<1:08:23, 5.38s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▌ | 256/1019 [19:59<1:08:23, 5.38s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 257/1019 [20:04<1:09:04, 5.44s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 257/1019 [20:04<1:09:04, 5.44s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 258/1019 [20:10<1:09:42, 5.50s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 258/1019 [20:10<1:09:42, 5.50s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9816, 'learning_rate': 5.1000000000000006e-05, 'epoch': 0.25} + 25%|███████████████████▋ | 258/1019 [20:10<1:09:42, 5.50s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 259/1019 [20:16<1:10:15, 5.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 259/1019 [20:16<1:10:15, 5.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 259/1019 [20:16<1:10:15, 5.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 260/1019 [20:21<1:10:14, 5.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 260/1019 [20:21<1:10:14, 5.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 261/1019 [20:27<1:09:34, 5.51s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 261/1019 [20:27<1:09:34, 5.51s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1301, 'learning_rate': 5.16e-05, 'epoch': 0.26} + 26%|████████████████████ | 262/1019 [20:32<1:09:06, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 262/1019 [20:32<1:09:06, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.173, 'learning_rate': 5.1800000000000005e-05, 'epoch': 0.26} + 26%|████████████████████ | 262/1019 [20:32<1:09:06, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 263/1019 [20:38<1:09:05, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 263/1019 [20:38<1:09:05, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 263/1019 [20:38<1:09:05, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 263/1019 [20:38<1:09:05, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2179, 'learning_rate': 5.22e-05, 'epoch': 0.26} + 26%|████████████████████▏ | 263/1019 [20:38<1:09:05, 5.48s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 265/1019 [20:48<1:08:27, 5.45s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 265/1019 [20:48<1:08:27, 5.45s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0886, 'learning_rate': 5.2400000000000007e-05, 'epoch': 0.26} + 26%|████████████████████▎ | 265/1019 [20:48<1:08:27, 5.45s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 266/1019 [20:54<1:08:18, 5.44s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 266/1019 [20:54<1:08:18, 5.44s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 267/1019 [20:59<1:07:58, 5.42s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 267/1019 [20:59<1:07:58, 5.42s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3285, 'learning_rate': 5.28e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 267/1019 [20:59<1:07:58, 5.42s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 268/1019 [21:05<1:07:29, 5.39s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 268/1019 [21:05<1:07:29, 5.39s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 268/1019 [21:05<1:07:29, 5.39s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 269/1019 [21:10<1:07:11, 5.38s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 269/1019 [21:10<1:07:11, 5.38s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 270/1019 [21:15<1:06:45, 5.35s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 270/1019 [21:15<1:06:45, 5.35s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.375, 'learning_rate': 5.3400000000000004e-05, 'epoch': 0.26} + 26%|████████████████████▋ | 270/1019 [21:15<1:06:45, 5.35s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 271/1019 [21:20<1:06:06, 5.30s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 271/1019 [21:20<1:06:06, 5.30s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 272/1019 [21:26<1:05:22, 5.25s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 272/1019 [21:26<1:05:22, 5.25s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1679, 'learning_rate': 5.380000000000001e-05, 'epoch': 0.27} + 27%|████████████████████▊ | 272/1019 [21:26<1:05:22, 5.25s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 272/1019 [21:26<1:05:22, 5.25s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2512, 'learning_rate': 5.4000000000000005e-05, 'epoch': 0.27} + 27%|████████████████████▊ | 272/1019 [21:26<1:05:22, 5.25s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 274/1019 [21:36<1:04:06, 5.16s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 274/1019 [21:36<1:04:06, 5.16s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.432, 'learning_rate': 5.420000000000001e-05, 'epoch': 0.27} + 27%|█████████████████████ | 275/1019 [21:41<1:03:28, 5.12s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 275/1019 [21:41<1:03:28, 5.12s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2654, 'learning_rate': 5.440000000000001e-05, 'epoch': 0.27} + 27%|█████████████████████▏ | 276/1019 [21:46<1:03:14, 5.11s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 276/1019 [21:46<1:03:14, 5.11s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5613, 'learning_rate': 5.4600000000000006e-05, 'epoch': 0.27} + 27%|█████████████████████▏ | 277/1019 [21:51<1:02:53, 5.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 277/1019 [21:51<1:02:53, 5.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5257, 'learning_rate': 5.4800000000000004e-05, 'epoch': 0.27} + 27%|█████████████████████▏ | 277/1019 [21:51<1:02:53, 5.09s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 278/1019 [21:56<1:02:27, 5.06s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 278/1019 [21:56<1:02:27, 5.06s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 279/1019 [22:01<1:02:00, 5.03s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 279/1019 [22:01<1:02:00, 5.03s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3021, 'learning_rate': 5.520000000000001e-05, 'epoch': 0.27} + 27%|█████████████████████▎ | 279/1019 [22:01<1:02:00, 5.03s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 280/1019 [22:06<1:01:21, 4.98s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 280/1019 [22:06<1:01:21, 4.98s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 281/1019 [22:10<1:00:27, 4.92s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 281/1019 [22:10<1:00:27, 4.92s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.439, 'learning_rate': 5.560000000000001e-05, 'epoch': 0.28} + 28%|██████████████████████▏ | 282/1019 [22:15<59:52, 4.87s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 282/1019 [22:15<59:52, 4.87s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2421, 'learning_rate': 5.580000000000001e-05, 'epoch': 0.28} + 28%|██████████████████████▏ | 282/1019 [22:15<59:52, 4.87s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 283/1019 [22:20<58:57, 4.81s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 283/1019 [22:20<58:57, 4.81s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 283/1019 [22:20<58:57, 4.81s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 284/1019 [22:24<58:15, 4.76s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 284/1019 [22:24<58:15, 4.76s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 285/1019 [22:29<57:28, 4.70s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 285/1019 [22:29<57:28, 4.70s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5336, 'learning_rate': 5.6399999999999995e-05, 'epoch': 0.28} + 28%|██████████████████████▍ | 286/1019 [22:34<56:42, 4.64s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 286/1019 [22:34<56:42, 4.64s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2397, 'learning_rate': 5.66e-05, 'epoch': 0.28} + 28%|██████████████████████▌ | 287/1019 [22:38<55:29, 4.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 287/1019 [22:38<55:29, 4.55s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2868, 'learning_rate': 5.68e-05, 'epoch': 0.28} + 28%|██████████████████████▌ | 288/1019 [22:42<54:18, 4.46s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 288/1019 [22:42<54:18, 4.46s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5147, 'learning_rate': 5.6999999999999996e-05, 'epoch': 0.28} + 28%|██████████████████████▋ | 289/1019 [22:46<52:44, 4.34s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 289/1019 [22:46<52:44, 4.34s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3183, 'learning_rate': 5.72e-05, 'epoch': 0.28} + 28%|██████████████████████▊ | 290/1019 [22:50<50:56, 4.19s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▊ | 290/1019 [22:50<50:56, 4.19s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3677, 'learning_rate': 5.74e-05, 'epoch': 0.28} + 29%|██████████████████████▊ | 291/1019 [22:54<48:53, 4.03s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▊ | 291/1019 [22:54<48:53, 4.03s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3637, 'learning_rate': 5.76e-05, 'epoch': 0.29} + 29%|██████████████████████▉ | 292/1019 [22:57<46:47, 3.86s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 292/1019 [22:57<46:47, 3.86s/it]g-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:18:58,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:18:58,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:15:07,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4503, 'learning_rate': 5.8e-05, 'epoch': 0.29} + 29%|███████████████████████ | 294/1019 [23:03<41:38, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 294/1019 [23:03<41:38, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 295/1019 [23:06<39:02, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 295/1019 [23:06<39:02, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:06,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:06,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:08,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:08,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:10,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:10,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:11,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:11,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7895, 'learning_rate': 5.92e-05, 'epoch': 0.29} +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:13,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:19:13,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 301/1019 [23:22<39:12, 3.28s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 301/1019 [23:22<39:12, 3.28s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5139, 'learning_rate': 5.96e-05, 'epoch': 0.3} + 30%|███████████████████████▋ | 302/1019 [23:28<48:36, 4.07s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 302/1019 [23:28<48:36, 4.07s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2047, 'learning_rate': 5.9800000000000003e-05, 'epoch': 0.3} + 30%|███████████████████████▋ | 302/1019 [23:28<48:36, 4.07s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 303/1019 [23:34<54:49, 4.59s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 303/1019 [23:34<54:49, 4.59s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 303/1019 [23:34<54:49, 4.59s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 304/1019 [23:39<58:59, 4.95s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 304/1019 [23:39<58:59, 4.95s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 304/1019 [23:39<58:59, 4.95s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 305/1019 [23:45<1:01:41, 5.18s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 305/1019 [23:45<1:01:41, 5.18s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 305/1019 [23:45<1:01:41, 5.18s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [23:51<1:03:32, 5.35s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [23:51<1:03:32, 5.35s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 306/1019 [23:51<1:03:32, 5.35s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 307/1019 [23:57<1:04:40, 5.45s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 307/1019 [23:57<1:04:40, 5.45s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1365, 'learning_rate': 6.1e-05, 'epoch': 0.3} + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 309/1019 [24:08<1:05:48, 5.56s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 309/1019 [24:08<1:05:48, 5.56s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3245, 'learning_rate': 6.12e-05, 'epoch': 0.3} + 30%|███████████████████████▋ | 309/1019 [24:08<1:05:48, 5.56s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 310/1019 [24:13<1:05:48, 5.57s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 310/1019 [24:13<1:05:48, 5.57s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 310/1019 [24:13<1:05:48, 5.57s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 311/1019 [24:19<1:05:34, 5.56s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 311/1019 [24:19<1:05:34, 5.56s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 312/1019 [24:24<1:05:02, 5.52s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 312/1019 [24:24<1:05:02, 5.52s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2266, 'learning_rate': 6.18e-05, 'epoch': 0.31} + 31%|███████████████████████▉ | 312/1019 [24:24<1:05:02, 5.52s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 313/1019 [24:30<1:04:23, 5.47s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 313/1019 [24:30<1:04:23, 5.47s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 313/1019 [24:30<1:04:23, 5.47s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:35<1:03:57, 5.44s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 314/1019 [24:35<1:03:57, 5.44s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 315/1019 [24:40<1:03:33, 5.42s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 315/1019 [24:40<1:03:33, 5.42s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2351, 'learning_rate': 6.24e-05, 'epoch': 0.31} + 31%|████████████████████████ | 315/1019 [24:40<1:03:33, 5.42s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3449, 'learning_rate': 6.280000000000001e-05, 'epoch': 0.31} + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 316/1019 [24:46<1:03:18, 5.40s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 318/1019 [24:57<1:02:40, 5.36s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 318/1019 [24:57<1:02:40, 5.36s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 318/1019 [24:57<1:02:40, 5.36s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 319/1019 [25:02<1:02:02, 5.32s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 319/1019 [25:02<1:02:02, 5.32s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 319/1019 [25:02<1:02:02, 5.32s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 320/1019 [25:07<1:01:30, 5.28s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 320/1019 [25:07<1:01:30, 5.28s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 320/1019 [25:07<1:01:30, 5.28s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 321/1019 [25:12<1:01:05, 5.25s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 321/1019 [25:12<1:01:05, 5.25s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 321/1019 [25:12<1:01:05, 5.25s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 322/1019 [25:17<1:00:32, 5.21s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 322/1019 [25:17<1:00:32, 5.21s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 322/1019 [25:17<1:00:32, 5.21s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 323/1019 [25:22<59:52, 5.16s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 323/1019 [25:22<59:52, 5.16s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 323/1019 [25:22<59:52, 5.16s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 324/1019 [25:27<59:30, 5.14s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 324/1019 [25:27<59:30, 5.14s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.342, 'learning_rate': 6.440000000000001e-05, 'epoch': 0.32} + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 326/1019 [25:37<57:55, 5.02s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 326/1019 [25:37<57:55, 5.02s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 327/1019 [25:42<57:35, 4.99s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 327/1019 [25:42<57:35, 4.99s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1516, 'learning_rate': 6.48e-05, 'epoch': 0.32} + 32%|█████████████████████████▋ | 327/1019 [25:42<57:35, 4.99s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [25:47<57:14, 4.97s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [25:47<57:14, 4.97s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 328/1019 [25:47<57:14, 4.97s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 329/1019 [25:52<56:25, 4.91s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 329/1019 [25:52<56:25, 4.91s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 330/1019 [25:57<55:58, 4.87s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 330/1019 [25:57<55:58, 4.87s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:21:59,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:21:59,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2191, 'learning_rate': 6.560000000000001e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-02 06:21:59,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 332/1019 [26:06<54:43, 4.78s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 332/1019 [26:06<54:43, 4.78s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 333/1019 [26:10<53:47, 4.70s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 333/1019 [26:10<53:47, 4.70s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0214, 'learning_rate': 6.6e-05, 'epoch': 0.33} + 33%|██████████████████████████▏ | 334/1019 [26:15<53:04, 4.65s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 334/1019 [26:15<53:04, 4.65s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3892, 'learning_rate': 6.620000000000001e-05, 'epoch': 0.33} + 33%|██████████████████████████▎ | 335/1019 [26:19<52:16, 4.59s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▎ | 335/1019 [26:19<52:16, 4.59s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3803, 'learning_rate': 6.64e-05, 'epoch': 0.33} + 33%|██████████████████████████▍ | 336/1019 [26:24<51:18, 4.51s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 336/1019 [26:24<51:18, 4.51s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2512, 'learning_rate': 6.66e-05, 'epoch': 0.33} + 33%|██████████████████████████▍ | 337/1019 [26:28<50:31, 4.44s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 337/1019 [26:28<50:31, 4.44s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4332, 'learning_rate': 6.680000000000001e-05, 'epoch': 0.33} + 33%|██████████████████████████▌ | 338/1019 [26:32<49:18, 4.34s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 338/1019 [26:32<49:18, 4.34s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3038, 'learning_rate': 6.7e-05, 'epoch': 0.33} + 33%|██████████████████████████▌ | 339/1019 [26:36<47:58, 4.23s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 339/1019 [26:36<47:58, 4.23s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4374, 'learning_rate': 6.720000000000001e-05, 'epoch': 0.33} + 33%|██████████████████████████▋ | 340/1019 [26:40<46:16, 4.09s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 340/1019 [26:40<46:16, 4.09s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4142, 'learning_rate': 6.740000000000001e-05, 'epoch': 0.33} + 33%|██████████████████████████▊ | 341/1019 [26:43<44:25, 3.93s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 341/1019 [26:43<44:25, 3.93s/it]g-point operations will not be computed-02 06:19:02,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3873, 'learning_rate': 6.76e-05, 'epoch': 0.33} + 34%|██████████████████████████▊ | 342/1019 [26:47<42:29, 3.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:46,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 342/1019 [26:47<42:29, 3.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:46,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 343/1019 [26:50<40:19, 3.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:46,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 343/1019 [26:50<40:19, 3.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:46,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8563, 'learning_rate': 6.800000000000001e-05, 'epoch': 0.34} + 34%|██████████████████████████▉ | 343/1019 [26:50<40:19, 3.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:46,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 344/1019 [26:53<37:58, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 345/1019 [26:56<35:35, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 345/1019 [26:56<35:35, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:55,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:55,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:57,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:57,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:59,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:22:59,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:23:01,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:23:01,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:23:03,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:23:03,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6739, 'learning_rate': 6.939999999999999e-05, 'epoch': 0.34} +[WARNING|modeling_utils.py:388] 2022-03-02 06:23:03,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:12<36:58, 3.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:12<36:58, 3.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 351/1019 [27:12<36:58, 3.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 352/1019 [27:17<45:27, 4.09s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 352/1019 [27:17<45:27, 4.09s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:23<51:07, 4.61s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 353/1019 [27:23<51:07, 4.61s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2387, 'learning_rate': 7e-05, 'epoch': 0.35} + 35%|███████████████████████████▋ | 353/1019 [27:23<51:07, 4.61s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 354/1019 [27:29<55:05, 4.97s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 354/1019 [27:29<55:05, 4.97s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 354/1019 [27:29<55:05, 4.97s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 355/1019 [27:35<57:38, 5.21s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 355/1019 [27:35<57:38, 5.21s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 355/1019 [27:35<57:38, 5.21s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [27:40<58:56, 5.33s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [27:40<58:56, 5.33s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [27:40<58:56, 5.33s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 356/1019 [27:40<58:56, 5.33s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3082, 'learning_rate': 7.08e-05, 'epoch': 0.35} + 35%|███████████████████████████▉ | 356/1019 [27:40<58:56, 5.33s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 358/1019 [27:52<1:00:44, 5.51s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 358/1019 [27:52<1:00:44, 5.51s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6187, 'learning_rate': 7.1e-05, 'epoch': 0.35} + 35%|███████████████████████████▍ | 358/1019 [27:52<1:00:44, 5.51s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [27:58<1:01:17, 5.57s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [27:58<1:01:17, 5.57s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 359/1019 [27:58<1:01:17, 5.57s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 360/1019 [28:03<1:01:31, 5.60s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 360/1019 [28:03<1:01:31, 5.60s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 361/1019 [28:09<1:01:14, 5.58s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 361/1019 [28:09<1:01:14, 5.58s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3837, 'learning_rate': 7.16e-05, 'epoch': 0.35} + 36%|███████████████████████████▋ | 362/1019 [28:14<1:00:41, 5.54s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▋ | 362/1019 [28:14<1:00:41, 5.54s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5376, 'learning_rate': 7.18e-05, 'epoch': 0.36} + 36%|███████████████████████████▋ | 362/1019 [28:14<1:00:41, 5.54s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 363/1019 [28:20<1:00:15, 5.51s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████████████▊ | 363/1019 [28:20<1:00:15, 5.51s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▌ | 364/1019 [28:25<59:47, 5.48s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▌ | 364/1019 [28:25<59:47, 5.48s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3393, 'learning_rate': 7.22e-05, 'epoch': 0.36} + 36%|████████████████████████████▋ | 365/1019 [28:30<59:14, 5.43s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 365/1019 [28:30<59:14, 5.43s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3848, 'learning_rate': 7.24e-05, 'epoch': 0.36} + 36%|████████████████████████████▋ | 365/1019 [28:30<59:14, 5.43s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 366/1019 [28:36<59:00, 5.42s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 366/1019 [28:36<59:00, 5.42s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 367/1019 [28:41<58:37, 5.39s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 367/1019 [28:41<58:37, 5.39s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1314, 'learning_rate': 7.280000000000001e-05, 'epoch': 0.36} + 36%|████████████████████████████▊ | 367/1019 [28:41<58:37, 5.39s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3967, 'learning_rate': 7.32e-05, 'epoch': 0.36} + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 368/1019 [28:46<58:18, 5.37s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [28:57<57:32, 5.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [28:57<57:32, 5.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 370/1019 [28:57<57:32, 5.32s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 371/1019 [29:02<57:06, 5.29s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 371/1019 [29:02<57:06, 5.29s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▏ | 372/1019 [29:07<56:33, 5.24s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▏ | 372/1019 [29:07<56:33, 5.24s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2514, 'learning_rate': 7.38e-05, 'epoch': 0.36} + 37%|█████████████████████████████▏ | 372/1019 [29:07<56:33, 5.24s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 373/1019 [29:12<56:07, 5.21s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 373/1019 [29:12<56:07, 5.21s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 374/1019 [29:18<55:41, 5.18s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 374/1019 [29:18<55:41, 5.18s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1936, 'learning_rate': 7.42e-05, 'epoch': 0.37} + 37%|█████████████████████████████▍ | 375/1019 [29:23<54:54, 5.12s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 375/1019 [29:23<54:54, 5.12s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.721, 'learning_rate': 7.44e-05, 'epoch': 0.37} + 37%|█████████████████████████████▌ | 376/1019 [29:28<54:29, 5.08s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 376/1019 [29:28<54:29, 5.08s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4243, 'learning_rate': 7.46e-05, 'epoch': 0.37} + 37%|█████████████████████████████▌ | 377/1019 [29:33<54:08, 5.06s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 377/1019 [29:33<54:08, 5.06s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2335, 'learning_rate': 7.48e-05, 'epoch': 0.37} + 37%|█████████████████████████████▌ | 377/1019 [29:33<54:08, 5.06s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 378/1019 [29:37<53:40, 5.02s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 378/1019 [29:37<53:40, 5.02s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 379/1019 [29:42<53:08, 4.98s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 379/1019 [29:42<53:08, 4.98s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5591, 'learning_rate': 7.52e-05, 'epoch': 0.37} + 37%|█████████████████████████████▊ | 380/1019 [29:47<52:31, 4.93s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 380/1019 [29:47<52:31, 4.93s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6034, 'learning_rate': 7.54e-05, 'epoch': 0.37} + 37%|█████████████████████████████▉ | 381/1019 [29:52<51:54, 4.88s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 381/1019 [29:52<51:54, 4.88s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.193, 'learning_rate': 7.560000000000001e-05, 'epoch': 0.37} + 37%|█████████████████████████████▉ | 382/1019 [29:57<51:08, 4.82s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 382/1019 [29:57<51:08, 4.82s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2547, 'learning_rate': 7.58e-05, 'epoch': 0.37} + 38%|██████████████████████████████ | 383/1019 [30:01<50:32, 4.77s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████ | 383/1019 [30:01<50:32, 4.77s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.316, 'learning_rate': 7.6e-05, 'epoch': 0.38} + 38%|██████████████████████████████ | 383/1019 [30:01<50:32, 4.77s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████ | 383/1019 [30:01<50:32, 4.77s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████ | 383/1019 [30:01<50:32, 4.77s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3824, 'learning_rate': 7.620000000000001e-05, 'epoch': 0.38} + 38%|██████████████████████████████▏ | 385/1019 [30:10<49:11, 4.66s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▏ | 385/1019 [30:10<49:11, 4.66s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4127, 'learning_rate': 7.64e-05, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 386/1019 [30:15<48:26, 4.59s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 386/1019 [30:15<48:26, 4.59s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3601, 'learning_rate': 7.66e-05, 'epoch': 0.38} + 38%|██████████████████████████████▍ | 387/1019 [30:19<47:24, 4.50s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 387/1019 [30:19<47:24, 4.50s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1869, 'learning_rate': 7.680000000000001e-05, 'epoch': 0.38} + 38%|██████████████████████████████▍ | 388/1019 [30:23<46:24, 4.41s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 388/1019 [30:23<46:24, 4.41s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3994, 'learning_rate': 7.7e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 389/1019 [30:27<45:12, 4.30s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 389/1019 [30:27<45:12, 4.30s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2377, 'learning_rate': 7.72e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 390/1019 [30:31<43:31, 4.15s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 390/1019 [30:31<43:31, 4.15s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3469, 'learning_rate': 7.740000000000001e-05, 'epoch': 0.38} + 38%|██████████████████████████████▋ | 391/1019 [30:35<41:28, 3.96s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 391/1019 [30:35<41:28, 3.96s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:35,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:35,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3896, 'learning_rate': 7.780000000000001e-05, 'epoch': 0.38} + 39%|██████████████████████████████▊ | 393/1019 [30:41<37:52, 3.63s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▊ | 393/1019 [30:41<37:52, 3.63s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.316, 'learning_rate': 7.800000000000001e-05, 'epoch': 0.39} + 39%|██████████████████████████████▊ | 393/1019 [30:41<37:52, 3.63s/it]g-point operations will not be computed-02 06:22:52,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▉ | 394/1019 [30:44<35:52, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:26:43,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 395/1019 [30:47<33:18, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 395/1019 [30:47<33:18, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 396/1019 [30:49<30:36, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 396/1019 [30:49<30:36, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:49,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:49,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:51,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:51,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:52,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:52,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:54,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:26:54,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2974, 'learning_rate': 7.94e-05, 'epoch': 0.39} + 39%|███████████████████████████████▍ | 401/1019 [31:03<34:08, 3.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 401/1019 [31:03<34:08, 3.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3962, 'learning_rate': 7.960000000000001e-05, 'epoch': 0.39} + 39%|███████████████████████████████▍ | 401/1019 [31:03<34:08, 3.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 401/1019 [31:03<34:08, 3.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3178, 'learning_rate': 7.98e-05, 'epoch': 0.39} + 39%|███████████████████████████████▍ | 401/1019 [31:03<34:08, 3.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 403/1019 [31:15<47:12, 4.60s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 403/1019 [31:15<47:12, 4.60s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3112, 'learning_rate': 8e-05, 'epoch': 0.4} + 40%|███████████████████████████████▋ | 404/1019 [31:20<50:38, 4.94s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 404/1019 [31:20<50:38, 4.94s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2446, 'learning_rate': 8.020000000000001e-05, 'epoch': 0.4} + 40%|███████████████████████████████▋ | 404/1019 [31:20<50:38, 4.94s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 405/1019 [31:26<53:10, 5.20s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 405/1019 [31:26<53:10, 5.20s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7824, 'learning_rate': 8.060000000000001e-05, 'epoch': 0.4} + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.457, 'learning_rate': 8.080000000000001e-05, 'epoch': 0.4} + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 406/1019 [31:32<54:36, 5.34s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 408/1019 [31:43<55:50, 5.48s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 408/1019 [31:43<55:50, 5.48s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 408/1019 [31:43<55:50, 5.48s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [31:49<56:14, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [31:49<56:14, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 409/1019 [31:49<56:14, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [31:54<56:09, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [31:54<56:09, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 410/1019 [31:54<56:09, 5.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 411/1019 [32:00<55:54, 5.52s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 411/1019 [32:00<55:54, 5.52s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 411/1019 [32:00<55:54, 5.52s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:05<55:36, 5.50s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:05<55:36, 5.50s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:05<55:36, 5.50s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 412/1019 [32:05<55:36, 5.50s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1417, 'learning_rate': 8.2e-05, 'epoch': 0.41} + 40%|████████████████████████████████▎ | 412/1019 [32:05<55:36, 5.50s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 414/1019 [32:16<55:00, 5.46s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 414/1019 [32:16<55:00, 5.46s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3982, 'learning_rate': 8.22e-05, 'epoch': 0.41} + 41%|████████████████████████████████▌ | 414/1019 [32:16<55:00, 5.46s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 415/1019 [32:21<54:31, 5.42s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 415/1019 [32:21<54:31, 5.42s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3939, 'learning_rate': 8.26e-05, 'epoch': 0.41} + g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [32:32<53:47, 5.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [32:32<53:47, 5.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [32:32<53:47, 5.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 417/1019 [32:32<53:47, 5.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4452, 'learning_rate': 8.3e-05, 'epoch': 0.41} + 41%|████████████████████████████████▋ | 417/1019 [32:32<53:47, 5.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 419/1019 [32:42<53:04, 5.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 419/1019 [32:42<53:04, 5.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3762, 'learning_rate': 8.32e-05, 'epoch': 0.41} + 41%|████████████████████████████████▉ | 419/1019 [32:42<53:04, 5.31s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 420/1019 [32:48<52:38, 5.27s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 420/1019 [32:48<52:38, 5.27s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 421/1019 [32:53<52:30, 5.27s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 421/1019 [32:53<52:30, 5.27s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2238, 'learning_rate': 8.36e-05, 'epoch': 0.41} + 41%|█████████████████████████████████ | 421/1019 [32:53<52:30, 5.27s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 422/1019 [32:58<52:11, 5.24s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 422/1019 [32:58<52:11, 5.24s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▏ | 423/1019 [33:03<51:39, 5.20s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▏ | 423/1019 [33:03<51:39, 5.20s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2047, 'learning_rate': 8.4e-05, 'epoch': 0.41} + 42%|█████████████████████████████████▎ | 424/1019 [33:08<50:56, 5.14s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 424/1019 [33:08<50:56, 5.14s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6421, 'learning_rate': 8.42e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▎ | 425/1019 [33:13<50:31, 5.10s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 425/1019 [33:13<50:31, 5.10s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2442, 'learning_rate': 8.44e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▍ | 426/1019 [33:18<50:11, 5.08s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▍ | 426/1019 [33:18<50:11, 5.08s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2809, 'learning_rate': 8.46e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▍ | 426/1019 [33:18<50:11, 5.08s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 427/1019 [33:23<49:34, 5.02s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 427/1019 [33:23<49:34, 5.02s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 428/1019 [33:28<49:17, 5.00s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 428/1019 [33:28<49:17, 5.00s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4218, 'learning_rate': 8.5e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▌ | 428/1019 [33:28<49:17, 5.00s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 429/1019 [33:33<48:44, 4.96s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 429/1019 [33:33<48:44, 4.96s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 430/1019 [33:38<48:12, 4.91s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 430/1019 [33:38<48:12, 4.91s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3741, 'learning_rate': 8.54e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▊ | 431/1019 [33:43<47:38, 4.86s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 431/1019 [33:43<47:38, 4.86s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3195, 'learning_rate': 8.560000000000001e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▉ | 432/1019 [33:47<46:51, 4.79s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 432/1019 [33:47<46:51, 4.79s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4016, 'learning_rate': 8.58e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▉ | 432/1019 [33:47<46:51, 4.79s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 432/1019 [33:47<46:51, 4.79s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▉ | 432/1019 [33:47<46:51, 4.79s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6563, 'learning_rate': 8.6e-05, 'epoch': 0.42} + 43%|██████████████████████████████████ | 434/1019 [33:56<45:44, 4.69s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████ | 434/1019 [33:56<45:44, 4.69s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2991, 'learning_rate': 8.620000000000001e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▏ | 435/1019 [34:01<45:14, 4.65s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▏ | 435/1019 [34:01<45:14, 4.65s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2143, 'learning_rate': 8.64e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▏ | 436/1019 [34:05<44:17, 4.56s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▏ | 436/1019 [34:05<44:17, 4.56s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4766, 'learning_rate': 8.66e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▎ | 437/1019 [34:10<43:23, 4.47s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▎ | 437/1019 [34:10<43:23, 4.47s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2582, 'learning_rate': 8.680000000000001e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▍ | 438/1019 [34:14<42:15, 4.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 438/1019 [34:14<42:15, 4.36s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3976, 'learning_rate': 8.7e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▍ | 439/1019 [34:18<41:13, 4.26s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 439/1019 [34:18<41:13, 4.26s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2676, 'learning_rate': 8.72e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▌ | 440/1019 [34:22<39:56, 4.14s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 440/1019 [34:22<39:56, 4.14s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3342, 'learning_rate': 8.740000000000001e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▌ | 441/1019 [34:25<38:37, 4.01s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 441/1019 [34:25<38:37, 4.01s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6316, 'learning_rate': 8.76e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▋ | 442/1019 [34:29<37:18, 3.88s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 442/1019 [34:29<37:18, 3.88s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:30:29,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:30:29,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3716, 'learning_rate': 8.800000000000001e-05, 'epoch': 0.43} + 44%|██████████████████████████████████▊ | 444/1019 [34:35<33:49, 3.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|██████████████████████████████████▊ | 444/1019 [34:35<33:49, 3.53s/it]g-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:30:35,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:30:35,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:26:46,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.874, 'learning_rate': 8.840000000000001e-05, 'epoch': 0.44} + 44%|███████████████████████████████████ | 446/1019 [34:41<29:22, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:39,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████ | 446/1019 [34:41<29:22, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:39,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████ | 447/1019 [34:43<26:46, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:41,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████ | 447/1019 [34:43<26:46, 2.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:41,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 449/1019 [34:46<21:20, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:43,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 449/1019 [34:46<21:20, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:43,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5231, 'learning_rate': 8.900000000000001e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▎ | 450/1019 [34:48<20:26, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:44,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [34:48<20:26, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:44,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 450/1019 [34:48<20:26, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 451/1019 [34:54<32:17, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 451/1019 [34:54<32:17, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0006, 'learning_rate': 8.960000000000001e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▍ | 452/1019 [35:00<39:07, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 452/1019 [35:00<39:07, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5564, 'learning_rate': 8.98e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▌ | 453/1019 [35:06<43:48, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 453/1019 [35:06<43:48, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5007, 'learning_rate': 9e-05, 'epoch': 0.44} + 45%|███████████████████████████████████▋ | 454/1019 [35:12<47:13, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 454/1019 [35:12<47:13, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6749, 'learning_rate': 9.020000000000001e-05, 'epoch': 0.45} + 45%|███████████████████████████████████▋ | 455/1019 [35:18<48:59, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▋ | 455/1019 [35:18<48:59, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4925, 'learning_rate': 9.04e-05, 'epoch': 0.45} + 45%|███████████████████████████████████▋ | 455/1019 [35:18<48:59, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▊ | 456/1019 [35:23<50:17, 5.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▊ | 456/1019 [35:23<50:17, 5.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [35:29<51:11, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 457/1019 [35:29<51:11, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1533, 'learning_rate': 9.080000000000001e-05, 'epoch': 0.45} + 45%|███████████████████████████████████▉ | 458/1019 [35:35<51:42, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|███████████████████████████████████▉ | 458/1019 [35:35<51:42, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3659, 'learning_rate': 9.1e-05, 'epoch': 0.45} + 45%|███████████████████████████████████▉ | 458/1019 [35:35<51:42, 5.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 459/1019 [35:40<51:45, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 459/1019 [35:40<51:45, 5.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 460/1019 [35:46<51:34, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████ | 460/1019 [35:46<51:34, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5776, 'learning_rate': 9.140000000000001e-05, 'epoch': 0.45} + 45%|████████████████████████████████████ | 460/1019 [35:46<51:34, 5.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 461/1019 [35:51<51:21, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 461/1019 [35:51<51:21, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 461/1019 [35:51<51:21, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 462/1019 [35:57<51:04, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 462/1019 [35:57<51:04, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 462/1019 [35:57<51:04, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 463/1019 [36:02<50:40, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 463/1019 [36:02<50:40, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 463/1019 [36:02<50:40, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:08<50:31, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:08<50:31, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▍ | 464/1019 [36:08<50:31, 5.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 465/1019 [36:13<50:01, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 465/1019 [36:13<50:01, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 465/1019 [36:13<50:01, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 466/1019 [36:18<49:55, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 466/1019 [36:18<49:55, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▌ | 466/1019 [36:18<49:55, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:24<49:38, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:24<49:38, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:24<49:38, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▋ | 467/1019 [36:24<49:38, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2403, 'learning_rate': 9.300000000000001e-05, 'epoch': 0.46} + 46%|████████████████████████████████████▋ | 467/1019 [36:24<49:38, 5.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▊ | 469/1019 [36:34<48:54, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▊ | 469/1019 [36:34<48:54, 5.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 470/1019 [36:40<48:31, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 470/1019 [36:40<48:31, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4285, 'learning_rate': 9.340000000000001e-05, 'epoch': 0.46} + 46%|████████████████████████████████████▉ | 470/1019 [36:40<48:31, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 471/1019 [36:45<48:07, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 471/1019 [36:45<48:07, 5.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 472/1019 [36:50<47:30, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 472/1019 [36:50<47:30, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.992, 'learning_rate': 9.38e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████ | 472/1019 [36:50<47:30, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 473/1019 [36:55<47:23, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 473/1019 [36:55<47:23, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▏ | 474/1019 [37:00<47:12, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▏ | 474/1019 [37:00<47:12, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5496, 'learning_rate': 9.42e-05, 'epoch': 0.46} + 47%|█████████████████████████████████████▏ | 474/1019 [37:00<47:12, 5.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 475/1019 [37:05<46:47, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 475/1019 [37:05<46:47, 5.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 476/1019 [37:10<46:06, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▎ | 476/1019 [37:10<46:06, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4337, 'learning_rate': 9.46e-05, 'epoch': 0.47} + 47%|█████████████████████████████████████▎ | 476/1019 [37:10<46:06, 5.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▍ | 477/1019 [37:15<45:47, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▍ | 477/1019 [37:15<45:47, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 478/1019 [37:20<45:20, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 478/1019 [37:20<45:20, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.593, 'learning_rate': 9.5e-05, 'epoch': 0.47} + 47%|█████████████████████████████████████▌ | 478/1019 [37:20<45:20, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 479/1019 [37:25<45:07, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 479/1019 [37:25<45:07, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▌ | 479/1019 [37:25<45:07, 5.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 480/1019 [37:30<44:45, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 480/1019 [37:30<44:45, 4.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 481/1019 [37:35<44:12, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 481/1019 [37:35<44:12, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4579, 'learning_rate': 9.56e-05, 'epoch': 0.47} + 47%|█████████████████████████████████████▊ | 481/1019 [37:35<44:12, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 482/1019 [37:40<43:34, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 482/1019 [37:40<43:34, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 482/1019 [37:40<43:34, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 483/1019 [37:44<43:08, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 483/1019 [37:44<43:08, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 484/1019 [37:49<42:44, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 484/1019 [37:49<42:44, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5826, 'learning_rate': 9.620000000000001e-05, 'epoch': 0.47} + 48%|██████████████████████████████████████ | 485/1019 [37:54<41:56, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████ | 485/1019 [37:54<41:56, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0891, 'learning_rate': 9.64e-05, 'epoch': 0.48} + [WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6883, 'learning_rate': 9.66e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:00,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:00,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4641, 'learning_rate': 9.680000000000001e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:00,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▎ | 488/1019 [38:07<39:38, 4.48s/it]g-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▎ | 488/1019 [38:07<39:38, 4.48s/it]g-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▎ | 488/1019 [38:07<39:38, 4.48s/it]g-point operations will not be computed-02 06:30:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 489/1019 [38:11<38:45, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 490/1019 [38:15<37:41, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 490/1019 [38:15<37:41, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2576, 'learning_rate': 9.74e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▌ | 491/1019 [38:19<36:17, 4.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 491/1019 [38:19<36:17, 4.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4475, 'learning_rate': 9.76e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▋ | 492/1019 [38:22<34:38, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 492/1019 [38:22<34:38, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:23,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:23,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:23,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 494/1019 [38:28<30:53, 3.53s/it]g-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 494/1019 [38:28<30:53, 3.53s/it]g-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:29,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:29,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:29,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▉ | 496/1019 [38:34<26:50, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|██████████████████████████████████████▉ | 496/1019 [38:34<26:50, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 497/1019 [38:36<24:47, 2.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████ | 497/1019 [38:36<24:47, 2.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:35,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:35,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:37,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:34:37,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2369] 2022-03-02 06:34:39,691 >> Batch size = 14luation *****e number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2369] 2022-03-02 06:34:39,691 >> Batch size = 14luation *****e number of tokens of the input, floating-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 0/189 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 2/189 [00:02<04:02, 1.30s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 3/189 [00:06<06:50, 2.21s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 4/189 [00:09<07:41, 2.50s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 5/189 [00:12<08:53, 2.90s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 6/189 [00:16<10:06, 3.32s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 7/189 [00:20<09:59, 3.30s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 8/189 [00:23<09:53, 3.28s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 9/189 [00:28<11:11, 3.73s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 10/189 [00:32<11:29, 3.85s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 11/189 [00:35<10:56, 3.69s/it]g-point operations will not be computed-02 06:34:32,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +RuntimeError: CUDA out of memory. Tried to allocate 1.69 GiB (GPU 0; 15.78 GiB total capacity; 9.19 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.69 GiB (GPU 0; 15.78 GiB total capacity; 9.19 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.69 GiB (GPU 0; 15.78 GiB total capacity; 9.19 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt b/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_055556-ymuc7hv0/files/wandb-metadata.json b/wandb/run-20220302_055556-ymuc7hv0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..7a31d89b16ec7cd7cb6681d7950dc6a819b826c9 --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T05:55:57.281359", + "startedAt": "2022-03-02T05:55:56.178653", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json b/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a58f303dec81c147445d24dd3e9326fd500de806 --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 5.5014, "train/learning_rate": 9.94e-05, "train/epoch": 0.49, "train/global_step": 500, "_runtime": 2323, "_timestamp": 1646202879, "_step": 499, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 12.0, 14.0, 54.0, 159.0, 427.0, 210.0, 70.0, 34.0, 18.0, 9.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-137.7477569580078, -119.74454498291016, -101.7413330078125, -83.73812103271484, -65.73490905761719, -47.73169708251953, -29.728485107421875, -11.725273132324219, 6.2779388427734375, 24.281150817871094, 42.28436279296875, 60.287574768066406, 78.29078674316406, 96.29399871826172, 114.29721069335938, 132.3004150390625, 150.3036346435547, 168.30685424804688, 186.31005859375, 204.31326293945312, 222.3164825439453, 240.3197021484375, 258.3229064941406, 276.32611083984375, 294.329345703125, 312.3325500488281, 330.33575439453125, 348.3389892578125, 366.3421936035156, 384.34539794921875, 402.3486328125, 420.3518371582031, 438.35498046875, 456.3581848144531, 474.36138916015625, 492.3646240234375, 510.3678283691406, 528.3710327148438, 546.374267578125, 564.37744140625, 582.3806762695312, 600.3839111328125, 618.3870849609375, 636.3903198242188, 654.3935546875, 672.396728515625, 690.3999633789062, 708.4031372070312, 726.4063720703125, 744.4096069335938, 762.4127807617188, 780.416015625, 798.419189453125, 816.4224243164062, 834.4256591796875, 852.4288330078125, 870.4320678710938, 888.435302734375, 906.4384765625, 924.4417114257812, 942.4449462890625, 960.4481201171875, 978.4513549804688, 996.4545288085938, 1014.457763671875]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 5.0, 2.0, 2.0, 2.0, 6.0, 8.0, 10.0, 5.0, 11.0, 21.0, 15.0, 15.0, 22.0, 24.0, 27.0, 19.0, 40.0, 26.0, 29.0, 35.0, 34.0, 38.0, 44.0, 27.0, 54.0, 50.0, 46.0, 40.0, 39.0, 29.0, 34.0, 30.0, 31.0, 22.0, 18.0, 23.0, 24.0, 12.0, 17.0, 18.0, 9.0, 7.0, 13.0, 6.0, 7.0, 1.0, 2.0, 3.0, 5.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-428.36077880859375, -414.86602783203125, -401.3713073730469, -387.8765563964844, -374.3818054199219, -360.8870849609375, -347.392333984375, -333.8975830078125, -320.40283203125, -306.9080810546875, -293.4133605957031, -279.9186096191406, -266.4238586425781, -252.9291229248047, -239.43438720703125, -225.93963623046875, -212.44491577148438, -198.95018005371094, -185.45542907714844, -171.960693359375, -158.4659423828125, -144.97120666503906, -131.47647094726562, -117.98172760009766, -104.48698425292969, -90.99224090576172, -77.49749755859375, -64.00276184082031, -50.508018493652344, -37.013275146484375, -23.518539428710938, -10.023796081542969, 3.470916748046875, 16.96565818786621, 30.460399627685547, 43.95513916015625, 57.44988250732422, 70.94462585449219, 84.43936157226562, 97.9341049194336, 111.42884826660156, 124.92359161376953, 138.4183349609375, 151.91307067871094, 165.40780639648438, 178.90255737304688, 192.3972930908203, 205.89202880859375, 219.38677978515625, 232.8815155029297, 246.3762664794922, 259.8710021972656, 273.3657531738281, 286.8604736328125, 300.355224609375, 313.8499755859375, 327.3447265625, 340.8394775390625, 354.3341979980469, 367.8289489746094, 381.3236999511719, 394.81842041015625, 408.31317138671875, 421.80792236328125, 435.3026428222656]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 7.0, 3.0, 7.0, 10.0, 18.0, 17.0, 23.0, 20.0, 32.0, 32.0, 28.0, 48.0, 42.0, 43.0, 51.0, 56.0, 41.0, 70.0, 63.0, 50.0, 40.0, 51.0, 38.0, 39.0, 22.0, 32.0, 31.0, 17.0, 12.0, 14.0, 15.0, 6.0, 7.0, 5.0, 4.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-26.6875, -25.923828125, -25.16015625, -24.396484375, -23.6328125, -22.869140625, -22.10546875, -21.341796875, -20.578125, -19.814453125, -19.05078125, -18.287109375, -17.5234375, -16.759765625, -15.99609375, -15.232421875, -14.46875, -13.705078125, -12.94140625, -12.177734375, -11.4140625, -10.650390625, -9.88671875, -9.123046875, -8.359375, -7.595703125, -6.83203125, -6.068359375, -5.3046875, -4.541015625, -3.77734375, -3.013671875, -2.25, -1.486328125, -0.72265625, 0.041015625, 0.8046875, 1.568359375, 2.33203125, 3.095703125, 3.859375, 4.623046875, 5.38671875, 6.150390625, 6.9140625, 7.677734375, 8.44140625, 9.205078125, 9.96875, 10.732421875, 11.49609375, 12.259765625, 13.0234375, 13.787109375, 14.55078125, 15.314453125, 16.078125, 16.841796875, 17.60546875, 18.369140625, 19.1328125, 19.896484375, 20.66015625, 21.423828125, 22.1875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 8.0, 11.0, 19.0, 18.0, 23.0, 40.0, 43.0, 79.0, 79.0, 163.0, 211.0, 281.0, 448.0, 692.0, 1070.0, 1834.0, 3454.0, 7373.0, 17263.0, 55457.0, 2888956.0, 1146331.0, 42617.0, 14359.0, 6125.0, 3010.0, 1564.0, 958.0, 551.0, 396.0, 252.0, 180.0, 111.0, 94.0, 54.0, 46.0, 27.0, 21.0, 16.0, 14.0, 8.0, 4.0, 3.0, 2.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0], "bins": [-123.375, -119.83984375, -116.3046875, -112.76953125, -109.234375, -105.69921875, -102.1640625, -98.62890625, -95.09375, -91.55859375, -88.0234375, -84.48828125, -80.953125, -77.41796875, -73.8828125, -70.34765625, -66.8125, -63.27734375, -59.7421875, -56.20703125, -52.671875, -49.13671875, -45.6015625, -42.06640625, -38.53125, -34.99609375, -31.4609375, -27.92578125, -24.390625, -20.85546875, -17.3203125, -13.78515625, -10.25, -6.71484375, -3.1796875, 0.35546875, 3.890625, 7.42578125, 10.9609375, 14.49609375, 18.03125, 21.56640625, 25.1015625, 28.63671875, 32.171875, 35.70703125, 39.2421875, 42.77734375, 46.3125, 49.84765625, 53.3828125, 56.91796875, 60.453125, 63.98828125, 67.5234375, 71.05859375, 74.59375, 78.12890625, 81.6640625, 85.19921875, 88.734375, 92.26953125, 95.8046875, 99.33984375, 102.875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 8.0, 2.0, 11.0, 8.0, 7.0, 9.0, 12.0, 22.0, 36.0, 50.0, 77.0, 120.0, 298.0, 962.0, 1501.0, 504.0, 159.0, 71.0, 69.0, 43.0, 27.0, 27.0, 10.0, 18.0, 10.0, 5.0, 6.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.625, -129.341796875, -125.05859375, -120.775390625, -116.4921875, -112.208984375, -107.92578125, -103.642578125, -99.359375, -95.076171875, -90.79296875, -86.509765625, -82.2265625, -77.943359375, -73.66015625, -69.376953125, -65.09375, -60.810546875, -56.52734375, -52.244140625, -47.9609375, -43.677734375, -39.39453125, -35.111328125, -30.828125, -26.544921875, -22.26171875, -17.978515625, -13.6953125, -9.412109375, -5.12890625, -0.845703125, 3.4375, 7.720703125, 12.00390625, 16.287109375, 20.5703125, 24.853515625, 29.13671875, 33.419921875, 37.703125, 41.986328125, 46.26953125, 50.552734375, 54.8359375, 59.119140625, 63.40234375, 67.685546875, 71.96875, 76.251953125, 80.53515625, 84.818359375, 89.1015625, 93.384765625, 97.66796875, 101.951171875, 106.234375, 110.517578125, 114.80078125, 119.083984375, 123.3671875, 127.650390625, 131.93359375, 136.216796875, 140.5]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 5.0, 11.0, 9.0, 14.0, 22.0, 23.0, 21.0, 31.0, 62.0, 101.0, 154.0, 349.0, 1082.0, 6044.0, 89072.0, 4053568.0, 38605.0, 3558.0, 789.0, 303.0, 152.0, 88.0, 54.0, 51.0, 36.0, 25.0, 16.0, 10.0, 6.0, 4.0, 7.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-434.25, -420.41015625, -406.5703125, -392.73046875, -378.890625, -365.05078125, -351.2109375, -337.37109375, -323.53125, -309.69140625, -295.8515625, -282.01171875, -268.171875, -254.33203125, -240.4921875, -226.65234375, -212.8125, -198.97265625, -185.1328125, -171.29296875, -157.453125, -143.61328125, -129.7734375, -115.93359375, -102.09375, -88.25390625, -74.4140625, -60.57421875, -46.734375, -32.89453125, -19.0546875, -5.21484375, 8.625, 22.46484375, 36.3046875, 50.14453125, 63.984375, 77.82421875, 91.6640625, 105.50390625, 119.34375, 133.18359375, 147.0234375, 160.86328125, 174.703125, 188.54296875, 202.3828125, 216.22265625, 230.0625, 243.90234375, 257.7421875, 271.58203125, 285.421875, 299.26171875, 313.1015625, 326.94140625, 340.78125, 354.62109375, 368.4609375, 382.30078125, 396.140625, 409.98046875, 423.8203125, 437.66015625, 451.5]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 13.0, 43.0, 178.0, 660.0, 94.0, 19.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1595.5902099609375, -1559.6495361328125, -1523.7088623046875, -1487.768310546875, -1451.82763671875, -1415.886962890625, -1379.9462890625, -1344.005615234375, -1308.06494140625, -1272.124267578125, -1236.18359375, -1200.2430419921875, -1164.3023681640625, -1128.3616943359375, -1092.4210205078125, -1056.4803466796875, -1020.539794921875, -984.59912109375, -948.6585083007812, -912.7178344726562, -876.7772216796875, -840.8365478515625, -804.8958740234375, -768.9552001953125, -733.0145874023438, -697.0739135742188, -661.13330078125, -625.192626953125, -589.251953125, -553.3113403320312, -517.3706665039062, -481.4300231933594, -445.489501953125, -409.5488586425781, -373.60821533203125, -337.66754150390625, -301.7268981933594, -265.7862548828125, -229.84559631347656, -193.90493774414062, -157.96429443359375, -122.02364349365234, -86.08299255371094, -50.14234161376953, -14.201690673828125, 21.73895263671875, 57.67961120605469, 93.62026977539062, 129.5609130859375, 165.50155639648438, 201.4422149658203, 237.38287353515625, 273.3235168457031, 309.26416015625, 345.204833984375, 381.1454772949219, 417.08612060546875, 453.0267639160156, 488.9674072265625, 524.9080810546875, 560.8487548828125, 596.7893676757812, 632.7300415039062, 668.670654296875, 704.611328125]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 6.0, 5.0, 4.0, 7.0, 7.0, 13.0, 10.0, 16.0, 18.0, 25.0, 22.0, 24.0, 24.0, 36.0, 33.0, 30.0, 40.0, 47.0, 49.0, 61.0, 40.0, 43.0, 49.0, 50.0, 40.0, 45.0, 33.0, 28.0, 31.0, 20.0, 22.0, 24.0, 19.0, 14.0, 7.0, 10.0, 13.0, 6.0, 11.0, 13.0, 5.0, 4.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-306.07330322265625, -296.5806579589844, -287.0880126953125, -277.59539794921875, -268.1027526855469, -258.610107421875, -249.1174774169922, -239.62484741210938, -230.1322021484375, -220.63955688476562, -211.1469268798828, -201.654296875, -192.16165161132812, -182.66900634765625, -173.17637634277344, -163.68374633789062, -154.19110107421875, -144.69845581054688, -135.20582580566406, -125.71318817138672, -116.22055053710938, -106.72791290283203, -97.23527526855469, -87.74263763427734, -78.25, -68.75736236572266, -59.26472473144531, -49.77208709716797, -40.279449462890625, -30.78681182861328, -21.294174194335938, -11.801536560058594, -2.308868408203125, 7.183769226074219, 16.676406860351562, 26.169044494628906, 35.66168212890625, 45.154319763183594, 54.64695739746094, 64.13959503173828, 73.63223266601562, 83.12487030029297, 92.61750793457031, 102.11014556884766, 111.602783203125, 121.09542083740234, 130.5880584716797, 140.0806884765625, 149.57333374023438, 159.06597900390625, 168.55860900878906, 178.05123901367188, 187.54388427734375, 197.03652954101562, 206.52915954589844, 216.02178955078125, 225.51443481445312, 235.007080078125, 244.4997100830078, 253.99234008789062, 263.4849853515625, 272.9776306152344, 282.47027587890625, 291.962890625, 301.4555358886719]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 7.0, 6.0, 5.0, 13.0, 8.0, 22.0, 16.0, 20.0, 21.0, 31.0, 28.0, 33.0, 41.0, 48.0, 49.0, 69.0, 47.0, 60.0, 43.0, 52.0, 47.0, 45.0, 55.0, 42.0, 35.0, 27.0, 26.0, 15.0, 24.0, 17.0, 7.0, 14.0, 9.0, 7.0, 6.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-29.671875, -28.8642578125, -28.056640625, -27.2490234375, -26.44140625, -25.6337890625, -24.826171875, -24.0185546875, -23.2109375, -22.4033203125, -21.595703125, -20.7880859375, -19.98046875, -19.1728515625, -18.365234375, -17.5576171875, -16.75, -15.9423828125, -15.134765625, -14.3271484375, -13.51953125, -12.7119140625, -11.904296875, -11.0966796875, -10.2890625, -9.4814453125, -8.673828125, -7.8662109375, -7.05859375, -6.2509765625, -5.443359375, -4.6357421875, -3.828125, -3.0205078125, -2.212890625, -1.4052734375, -0.59765625, 0.2099609375, 1.017578125, 1.8251953125, 2.6328125, 3.4404296875, 4.248046875, 5.0556640625, 5.86328125, 6.6708984375, 7.478515625, 8.2861328125, 9.09375, 9.9013671875, 10.708984375, 11.5166015625, 12.32421875, 13.1318359375, 13.939453125, 14.7470703125, 15.5546875, 16.3623046875, 17.169921875, 17.9775390625, 18.78515625, 19.5927734375, 20.400390625, 21.2080078125, 22.015625]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 5.0, 5.0, 9.0, 10.0, 19.0, 17.0, 24.0, 31.0, 50.0, 75.0, 90.0, 152.0, 218.0, 383.0, 550.0, 986.0, 1489.0, 2417.0, 3966.0, 6795.0, 11216.0, 19512.0, 35146.0, 68001.0, 148036.0, 323843.0, 221834.0, 95431.0, 46596.0, 25463.0, 14614.0, 8438.0, 4941.0, 3020.0, 1893.0, 1145.0, 735.0, 468.0, 295.0, 219.0, 147.0, 86.0, 65.0, 37.0, 23.0, 19.0, 10.0, 7.0, 12.0, 4.0, 4.0, 2.0, 6.0, 2.0, 2.0], "bins": [-1.3671875, -1.32781982421875, -1.2884521484375, -1.24908447265625, -1.209716796875, -1.17034912109375, -1.1309814453125, -1.09161376953125, -1.05224609375, -1.01287841796875, -0.9735107421875, -0.93414306640625, -0.894775390625, -0.85540771484375, -0.8160400390625, -0.77667236328125, -0.7373046875, -0.69793701171875, -0.6585693359375, -0.61920166015625, -0.579833984375, -0.54046630859375, -0.5010986328125, -0.46173095703125, -0.42236328125, -0.38299560546875, -0.3436279296875, -0.30426025390625, -0.264892578125, -0.22552490234375, -0.1861572265625, -0.14678955078125, -0.107421875, -0.06805419921875, -0.0286865234375, 0.01068115234375, 0.050048828125, 0.08941650390625, 0.1287841796875, 0.16815185546875, 0.20751953125, 0.24688720703125, 0.2862548828125, 0.32562255859375, 0.364990234375, 0.40435791015625, 0.4437255859375, 0.48309326171875, 0.5224609375, 0.56182861328125, 0.6011962890625, 0.64056396484375, 0.679931640625, 0.71929931640625, 0.7586669921875, 0.79803466796875, 0.83740234375, 0.87677001953125, 0.9161376953125, 0.95550537109375, 0.994873046875, 1.03424072265625, 1.0736083984375, 1.11297607421875, 1.15234375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 1.0, 4.0, 2.0, 10.0, 4.0, 4.0, 7.0, 8.0, 11.0, 14.0, 15.0, 20.0, 16.0, 11.0, 20.0, 31.0, 27.0, 24.0, 29.0, 28.0, 37.0, 28.0, 35.0, 45.0, 37.0, 43.0, 1054.0, 40.0, 32.0, 37.0, 41.0, 29.0, 22.0, 26.0, 37.0, 31.0, 23.0, 19.0, 28.0, 11.0, 9.0, 14.0, 20.0, 11.0, 12.0, 3.0, 7.0, 7.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-12.421875, -12.01904296875, -11.6162109375, -11.21337890625, -10.810546875, -10.40771484375, -10.0048828125, -9.60205078125, -9.19921875, -8.79638671875, -8.3935546875, -7.99072265625, -7.587890625, -7.18505859375, -6.7822265625, -6.37939453125, -5.9765625, -5.57373046875, -5.1708984375, -4.76806640625, -4.365234375, -3.96240234375, -3.5595703125, -3.15673828125, -2.75390625, -2.35107421875, -1.9482421875, -1.54541015625, -1.142578125, -0.73974609375, -0.3369140625, 0.06591796875, 0.46875, 0.87158203125, 1.2744140625, 1.67724609375, 2.080078125, 2.48291015625, 2.8857421875, 3.28857421875, 3.69140625, 4.09423828125, 4.4970703125, 4.89990234375, 5.302734375, 5.70556640625, 6.1083984375, 6.51123046875, 6.9140625, 7.31689453125, 7.7197265625, 8.12255859375, 8.525390625, 8.92822265625, 9.3310546875, 9.73388671875, 10.13671875, 10.53955078125, 10.9423828125, 11.34521484375, 11.748046875, 12.15087890625, 12.5537109375, 12.95654296875, 13.359375]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 6.0, 5.0, 8.0, 15.0, 15.0, 26.0, 29.0, 44.0, 71.0, 96.0, 163.0, 225.0, 324.0, 448.0, 694.0, 1029.0, 1628.0, 2482.0, 3821.0, 5799.0, 9377.0, 14857.0, 24339.0, 40531.0, 73002.0, 145234.0, 1327772.0, 214938.0, 96798.0, 52180.0, 30687.0, 18316.0, 11561.0, 7386.0, 4648.0, 2899.0, 1949.0, 1271.0, 834.0, 549.0, 353.0, 243.0, 157.0, 110.0, 77.0, 38.0, 38.0, 21.0, 11.0, 16.0, 11.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-1.0537109375, -1.0194091796875, -0.985107421875, -0.9508056640625, -0.91650390625, -0.8822021484375, -0.847900390625, -0.8135986328125, -0.779296875, -0.7449951171875, -0.710693359375, -0.6763916015625, -0.64208984375, -0.6077880859375, -0.573486328125, -0.5391845703125, -0.5048828125, -0.4705810546875, -0.436279296875, -0.4019775390625, -0.36767578125, -0.3333740234375, -0.299072265625, -0.2647705078125, -0.23046875, -0.1961669921875, -0.161865234375, -0.1275634765625, -0.09326171875, -0.0589599609375, -0.024658203125, 0.0096435546875, 0.0439453125, 0.0782470703125, 0.112548828125, 0.1468505859375, 0.18115234375, 0.2154541015625, 0.249755859375, 0.2840576171875, 0.318359375, 0.3526611328125, 0.386962890625, 0.4212646484375, 0.45556640625, 0.4898681640625, 0.524169921875, 0.5584716796875, 0.5927734375, 0.6270751953125, 0.661376953125, 0.6956787109375, 0.72998046875, 0.7642822265625, 0.798583984375, 0.8328857421875, 0.8671875, 0.9014892578125, 0.935791015625, 0.9700927734375, 1.00439453125, 1.0386962890625, 1.072998046875, 1.1072998046875, 1.1416015625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 7.0, 5.0, 5.0, 7.0, 8.0, 14.0, 17.0, 10.0, 22.0, 29.0, 39.0, 41.0, 48.0, 62.0, 51.0, 65.0, 71.0, 81.0, 84.0, 58.0, 55.0, 45.0, 44.0, 30.0, 19.0, 25.0, 15.0, 7.0, 10.0, 6.0, 6.0, 4.0, 10.0, 5.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015716552734375, -0.015219926834106445, -0.01472330093383789, -0.014226675033569336, -0.013730049133300781, -0.013233423233032227, -0.012736797332763672, -0.012240171432495117, -0.011743545532226562, -0.011246919631958008, -0.010750293731689453, -0.010253667831420898, -0.009757041931152344, -0.009260416030883789, -0.008763790130615234, -0.00826716423034668, -0.007770538330078125, -0.00727391242980957, -0.006777286529541016, -0.006280660629272461, -0.005784034729003906, -0.0052874088287353516, -0.004790782928466797, -0.004294157028198242, -0.0037975311279296875, -0.003300905227661133, -0.002804279327392578, -0.0023076534271240234, -0.0018110275268554688, -0.001314401626586914, -0.0008177757263183594, -0.0003211498260498047, 0.00017547607421875, 0.0006721019744873047, 0.0011687278747558594, 0.001665353775024414, 0.0021619796752929688, 0.0026586055755615234, 0.003155231475830078, 0.003651857376098633, 0.0041484832763671875, 0.004645109176635742, 0.005141735076904297, 0.0056383609771728516, 0.006134986877441406, 0.006631612777709961, 0.007128238677978516, 0.00762486457824707, 0.008121490478515625, 0.00861811637878418, 0.009114742279052734, 0.009611368179321289, 0.010107994079589844, 0.010604619979858398, 0.011101245880126953, 0.011597871780395508, 0.012094497680664062, 0.012591123580932617, 0.013087749481201172, 0.013584375381469727, 0.014081001281738281, 0.014577627182006836, 0.01507425308227539, 0.015570878982543945, 0.0160675048828125]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 4.0, 4.0, 9.0, 10.0, 13.0, 14.0, 16.0, 33.0, 35.0, 38.0, 42.0, 65.0, 102.0, 118.0, 140.0, 194.0, 265.0, 797.0, 668427.0, 376337.0, 773.0, 258.0, 212.0, 152.0, 130.0, 75.0, 67.0, 65.0, 33.0, 28.0, 21.0, 14.0, 15.0, 14.0, 15.0, 9.0, 5.0, 2.0, 4.0, 0.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.302001953125, -0.2922554016113281, -0.28250885009765625, -0.2727622985839844, -0.2630157470703125, -0.2532691955566406, -0.24352264404296875, -0.23377609252929688, -0.224029541015625, -0.21428298950195312, -0.20453643798828125, -0.19478988647460938, -0.1850433349609375, -0.17529678344726562, -0.16555023193359375, -0.15580368041992188, -0.14605712890625, -0.13631057739257812, -0.12656402587890625, -0.11681747436523438, -0.1070709228515625, -0.09732437133789062, -0.08757781982421875, -0.07783126831054688, -0.068084716796875, -0.058338165283203125, -0.04859161376953125, -0.038845062255859375, -0.0290985107421875, -0.019351959228515625, -0.00960540771484375, 0.000141143798828125, 0.0098876953125, 0.019634246826171875, 0.02938079833984375, 0.039127349853515625, 0.0488739013671875, 0.058620452880859375, 0.06836700439453125, 0.07811355590820312, 0.087860107421875, 0.09760665893554688, 0.10735321044921875, 0.11709976196289062, 0.1268463134765625, 0.13659286499023438, 0.14633941650390625, 0.15608596801757812, 0.16583251953125, 0.17557907104492188, 0.18532562255859375, 0.19507217407226562, 0.2048187255859375, 0.21456527709960938, 0.22431182861328125, 0.23405838012695312, 0.243804931640625, 0.2535514831542969, 0.26329803466796875, 0.2730445861816406, 0.2827911376953125, 0.2925376892089844, 0.30228424072265625, 0.3120307922363281, 0.32177734375]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 844.0, 170.0], "bins": [-0.2517983019351959, -0.24779370427131653, -0.24378910660743713, -0.23978449404239655, -0.23577989637851715, -0.23177529871463776, -0.22777068614959717, -0.22376608848571777, -0.21976149082183838, -0.21575689315795898, -0.2117522954940796, -0.207747682929039, -0.2037430852651596, -0.1997384876012802, -0.19573387503623962, -0.19172927737236023, -0.18772467970848083, -0.18372008204460144, -0.17971548438072205, -0.17571087181568146, -0.17170627415180206, -0.16770167648792267, -0.16369706392288208, -0.15969246625900269, -0.1556878685951233, -0.1516832709312439, -0.1476786732673645, -0.1436740607023239, -0.13966946303844452, -0.13566486537456512, -0.13166025280952454, -0.12765565514564514, -0.12365106493234634, -0.11964645981788635, -0.11564186215400696, -0.11163726449012756, -0.10763265937566757, -0.10362805426120758, -0.09962345659732819, -0.09561885893344879, -0.0916142538189888, -0.08760964870452881, -0.08360505104064941, -0.07960045337677002, -0.07559584826231003, -0.07159124314785004, -0.06758664548397064, -0.06358204782009125, -0.059577446430921555, -0.05557284504175186, -0.05156824365258217, -0.047563642263412476, -0.04355904087424278, -0.03955443948507309, -0.0355498380959034, -0.031545236706733704, -0.02754063531756401, -0.023536033928394318, -0.019531432539224625, -0.015526831150054932, -0.011522229760885239, -0.007517628371715546, -0.0035130269825458527, 0.0004915744066238403, 0.004496176727116108]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 9.0, 9.0, 7.0, 18.0, 17.0, 30.0, 54.0, 51.0, 53.0, 59.0, 72.0, 88.0, 79.0, 83.0, 83.0, 68.0, 53.0, 40.0, 49.0, 27.0, 25.0, 15.0, 11.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013592243194580078, -0.013156288303434849, -0.01272033341228962, -0.01228437852114439, -0.01184842362999916, -0.011412468738853931, -0.010976513847708702, -0.010540558956563473, -0.010104604065418243, -0.009668649174273014, -0.009232694283127785, -0.008796739391982555, -0.008360784500837326, -0.007924829609692097, -0.007488874718546867, -0.007052919827401638, -0.006616964936256409, -0.006181010045111179, -0.00574505515396595, -0.005309100262820721, -0.004873145371675491, -0.004437190480530262, -0.004001235589385033, -0.0035652806982398033, -0.003129325807094574, -0.0026933709159493446, -0.0022574160248041153, -0.001821461133658886, -0.0013855062425136566, -0.0009495513513684273, -0.0005135964602231979, -7.76415690779686e-05, 0.00035831332206726074, 0.0007942682132124901, 0.0012302231043577194, 0.0016661779955029488, 0.002102132886648178, 0.0025380877777934074, 0.0029740426689386368, 0.003409997560083866, 0.0038459524512290955, 0.004281907342374325, 0.004717862233519554, 0.0051538171246647835, 0.005589772015810013, 0.006025726906955242, 0.0064616817981004715, 0.006897636689245701, 0.00733359158039093, 0.0077695464715361595, 0.008205501362681389, 0.008641456253826618, 0.009077411144971848, 0.009513366036117077, 0.009949320927262306, 0.010385275818407536, 0.010821230709552765, 0.011257185600697994, 0.011693140491843224, 0.012129095382988453, 0.012565050274133682, 0.013001005165278912, 0.013436960056424141, 0.01387291494756937, 0.0143088698387146]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 7.0, 6.0, 5.0, 13.0, 8.0, 22.0, 16.0, 20.0, 21.0, 31.0, 28.0, 33.0, 41.0, 48.0, 49.0, 69.0, 47.0, 60.0, 43.0, 52.0, 47.0, 45.0, 55.0, 42.0, 35.0, 27.0, 26.0, 15.0, 24.0, 17.0, 7.0, 14.0, 9.0, 7.0, 6.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-29.671875, -28.8642578125, -28.056640625, -27.2490234375, -26.44140625, -25.6337890625, -24.826171875, -24.0185546875, -23.2109375, -22.4033203125, -21.595703125, -20.7880859375, -19.98046875, -19.1728515625, -18.365234375, -17.5576171875, -16.75, -15.9423828125, -15.134765625, -14.3271484375, -13.51953125, -12.7119140625, -11.904296875, -11.0966796875, -10.2890625, -9.4814453125, -8.673828125, -7.8662109375, -7.05859375, -6.2509765625, -5.443359375, -4.6357421875, -3.828125, -3.0205078125, -2.212890625, -1.4052734375, -0.59765625, 0.2099609375, 1.017578125, 1.8251953125, 2.6328125, 3.4404296875, 4.248046875, 5.0556640625, 5.86328125, 6.6708984375, 7.478515625, 8.2861328125, 9.09375, 9.9013671875, 10.708984375, 11.5166015625, 12.32421875, 13.1318359375, 13.939453125, 14.7470703125, 15.5546875, 16.3623046875, 17.169921875, 17.9775390625, 18.78515625, 19.5927734375, 20.400390625, 21.2080078125, 22.015625]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 7.0, 4.0, 6.0, 12.0, 9.0, 15.0, 19.0, 29.0, 43.0, 72.0, 115.0, 173.0, 287.0, 444.0, 767.0, 1151.0, 1951.0, 3351.0, 6262.0, 13005.0, 35173.0, 171646.0, 673034.0, 94029.0, 24467.0, 10167.0, 5077.0, 2874.0, 1697.0, 981.0, 595.0, 400.0, 236.0, 143.0, 99.0, 63.0, 42.0, 39.0, 25.0, 16.0, 15.0, 11.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-86.3125, -83.9375, -81.5625, -79.1875, -76.8125, -74.4375, -72.0625, -69.6875, -67.3125, -64.9375, -62.5625, -60.1875, -57.8125, -55.4375, -53.0625, -50.6875, -48.3125, -45.9375, -43.5625, -41.1875, -38.8125, -36.4375, -34.0625, -31.6875, -29.3125, -26.9375, -24.5625, -22.1875, -19.8125, -17.4375, -15.0625, -12.6875, -10.3125, -7.9375, -5.5625, -3.1875, -0.8125, 1.5625, 3.9375, 6.3125, 8.6875, 11.0625, 13.4375, 15.8125, 18.1875, 20.5625, 22.9375, 25.3125, 27.6875, 30.0625, 32.4375, 34.8125, 37.1875, 39.5625, 41.9375, 44.3125, 46.6875, 49.0625, 51.4375, 53.8125, 56.1875, 58.5625, 60.9375, 63.3125, 65.6875]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 1.0, 3.0, 7.0, 7.0, 11.0, 9.0, 3.0, 17.0, 20.0, 17.0, 26.0, 29.0, 32.0, 36.0, 33.0, 56.0, 39.0, 67.0, 78.0, 312.0, 1675.0, 128.0, 48.0, 56.0, 44.0, 48.0, 37.0, 34.0, 27.0, 23.0, 20.0, 26.0, 19.0, 12.0, 10.0, 14.0, 11.0, 4.0, 3.0, 2.0, 7.0, 1.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-86.0, -83.5537109375, -81.107421875, -78.6611328125, -76.21484375, -73.7685546875, -71.322265625, -68.8759765625, -66.4296875, -63.9833984375, -61.537109375, -59.0908203125, -56.64453125, -54.1982421875, -51.751953125, -49.3056640625, -46.859375, -44.4130859375, -41.966796875, -39.5205078125, -37.07421875, -34.6279296875, -32.181640625, -29.7353515625, -27.2890625, -24.8427734375, -22.396484375, -19.9501953125, -17.50390625, -15.0576171875, -12.611328125, -10.1650390625, -7.71875, -5.2724609375, -2.826171875, -0.3798828125, 2.06640625, 4.5126953125, 6.958984375, 9.4052734375, 11.8515625, 14.2978515625, 16.744140625, 19.1904296875, 21.63671875, 24.0830078125, 26.529296875, 28.9755859375, 31.421875, 33.8681640625, 36.314453125, 38.7607421875, 41.20703125, 43.6533203125, 46.099609375, 48.5458984375, 50.9921875, 53.4384765625, 55.884765625, 58.3310546875, 60.77734375, 63.2236328125, 65.669921875, 68.1162109375, 70.5625]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 8.0, 2.0, 5.0, 3.0, 7.0, 11.0, 10.0, 13.0, 19.0, 20.0, 18.0, 20.0, 28.0, 32.0, 48.0, 73.0, 84.0, 172.0, 567.0, 6762.0, 3123705.0, 12929.0, 632.0, 172.0, 89.0, 69.0, 33.0, 40.0, 22.0, 21.0, 21.0, 13.0, 18.0, 9.0, 5.0, 10.0, 9.0, 3.0, 3.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-331.25, -319.6484375, -308.046875, -296.4453125, -284.84375, -273.2421875, -261.640625, -250.0390625, -238.4375, -226.8359375, -215.234375, -203.6328125, -192.03125, -180.4296875, -168.828125, -157.2265625, -145.625, -134.0234375, -122.421875, -110.8203125, -99.21875, -87.6171875, -76.015625, -64.4140625, -52.8125, -41.2109375, -29.609375, -18.0078125, -6.40625, 5.1953125, 16.796875, 28.3984375, 40.0, 51.6015625, 63.203125, 74.8046875, 86.40625, 98.0078125, 109.609375, 121.2109375, 132.8125, 144.4140625, 156.015625, 167.6171875, 179.21875, 190.8203125, 202.421875, 214.0234375, 225.625, 237.2265625, 248.828125, 260.4296875, 272.03125, 283.6328125, 295.234375, 306.8359375, 318.4375, 330.0390625, 341.640625, 353.2421875, 364.84375, 376.4453125, 388.046875, 399.6484375, 411.25]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [969.0, 50.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.86767578125, 15.905780792236328, 76.67923736572266, 137.45269775390625, 198.2261505126953, 258.9996032714844, 319.7730712890625, 380.5465087890625, 441.3199768066406, 502.09344482421875, 562.8668823242188, 623.640380859375, 684.413818359375, 745.187255859375, 805.960693359375, 866.734130859375, 927.5076293945312, 988.2810668945312, 1049.0545654296875, 1109.8280029296875, 1170.6014404296875, 1231.3748779296875, 1292.1484375, 1352.921875, 1413.6953125, 1474.46875, 1535.2421875, 1596.015625, 1656.7891845703125, 1717.5626220703125, 1778.3360595703125, 1839.1094970703125, 1899.8829345703125, 1960.6563720703125, 2021.4298095703125, 2082.203369140625, 2142.976806640625, 2203.750244140625, 2264.523681640625, 2325.297119140625, 2386.070556640625, 2446.843994140625, 2507.617431640625, 2568.390869140625, 2629.164306640625, 2689.937744140625, 2750.71142578125, 2811.48486328125, 2872.25830078125, 2933.03173828125, 2993.80517578125, 3054.57861328125, 3115.35205078125, 3176.12548828125, 3236.89892578125, 3297.67236328125, 3358.44580078125, 3419.21923828125, 3479.99267578125, 3540.76611328125, 3601.53955078125, 3662.31298828125, 3723.08642578125, 3783.85986328125, 3844.633544921875]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 5.0, 5.0, 8.0, 5.0, 6.0, 11.0, 5.0, 13.0, 23.0, 17.0, 23.0, 16.0, 28.0, 25.0, 28.0, 28.0, 25.0, 30.0, 40.0, 36.0, 45.0, 37.0, 39.0, 42.0, 37.0, 45.0, 41.0, 36.0, 49.0, 30.0, 31.0, 28.0, 26.0, 33.0, 21.0, 19.0, 16.0, 12.0, 12.0, 6.0, 7.0, 8.0, 6.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-228.02313232421875, -220.68858337402344, -213.35401916503906, -206.01947021484375, -198.68492126464844, -191.35037231445312, -184.01580810546875, -176.68125915527344, -169.34671020507812, -162.0121612548828, -154.67759704589844, -147.34304809570312, -140.0084991455078, -132.6739501953125, -125.33938598632812, -118.00483703613281, -110.67028045654297, -103.33572387695312, -96.00117492675781, -88.66661834716797, -81.33206939697266, -73.99751281738281, -66.6629638671875, -59.328407287597656, -51.99385452270508, -44.6593017578125, -37.32474899291992, -29.99019432067871, -22.655641555786133, -15.321086883544922, -7.986534118652344, -0.6519813537597656, 6.6825714111328125, 14.01712417602539, 21.35167694091797, 28.68623161315918, 36.020782470703125, 43.35533905029297, 50.68989181518555, 58.024444580078125, 65.35899353027344, 72.69355010986328, 80.0280990600586, 87.36265563964844, 94.69720458984375, 102.0317611694336, 109.36631774902344, 116.70086669921875, 124.0354232788086, 131.36997985839844, 138.70452880859375, 146.03907775878906, 153.37364196777344, 160.70819091796875, 168.04273986816406, 175.37728881835938, 182.71185302734375, 190.04640197753906, 197.38096618652344, 204.71551513671875, 212.05006408691406, 219.38461303710938, 226.71917724609375, 234.05372619628906, 241.38827514648438]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 7.0, 5.0, 10.0, 7.0, 12.0, 18.0, 25.0, 21.0, 25.0, 21.0, 37.0, 31.0, 36.0, 52.0, 56.0, 57.0, 40.0, 70.0, 39.0, 47.0, 51.0, 58.0, 43.0, 44.0, 29.0, 25.0, 25.0, 26.0, 17.0, 14.0, 15.0, 11.0, 10.0, 1.0, 6.0, 5.0, 1.0, 5.0, 4.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-31.515625, -30.655029296875, -29.79443359375, -28.933837890625, -28.0732421875, -27.212646484375, -26.35205078125, -25.491455078125, -24.630859375, -23.770263671875, -22.90966796875, -22.049072265625, -21.1884765625, -20.327880859375, -19.46728515625, -18.606689453125, -17.74609375, -16.885498046875, -16.02490234375, -15.164306640625, -14.3037109375, -13.443115234375, -12.58251953125, -11.721923828125, -10.861328125, -10.000732421875, -9.14013671875, -8.279541015625, -7.4189453125, -6.558349609375, -5.69775390625, -4.837158203125, -3.9765625, -3.115966796875, -2.25537109375, -1.394775390625, -0.5341796875, 0.326416015625, 1.18701171875, 2.047607421875, 2.908203125, 3.768798828125, 4.62939453125, 5.489990234375, 6.3505859375, 7.211181640625, 8.07177734375, 8.932373046875, 9.79296875, 10.653564453125, 11.51416015625, 12.374755859375, 13.2353515625, 14.095947265625, 14.95654296875, 15.817138671875, 16.677734375, 17.538330078125, 18.39892578125, 19.259521484375, 20.1201171875, 20.980712890625, 21.84130859375, 22.701904296875, 23.5625]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 6.0, 5.0, 9.0, 8.0, 12.0, 14.0, 22.0, 25.0, 46.0, 79.0, 89.0, 154.0, 219.0, 382.0, 614.0, 1211.0, 3015.0, 8990.0, 56148.0, 4038779.0, 68760.0, 9634.0, 3113.0, 1288.0, 634.0, 354.0, 222.0, 133.0, 93.0, 68.0, 45.0, 35.0, 20.0, 18.0, 13.0, 9.0, 8.0, 2.0, 4.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-195.875, -190.51171875, -185.1484375, -179.78515625, -174.421875, -169.05859375, -163.6953125, -158.33203125, -152.96875, -147.60546875, -142.2421875, -136.87890625, -131.515625, -126.15234375, -120.7890625, -115.42578125, -110.0625, -104.69921875, -99.3359375, -93.97265625, -88.609375, -83.24609375, -77.8828125, -72.51953125, -67.15625, -61.79296875, -56.4296875, -51.06640625, -45.703125, -40.33984375, -34.9765625, -29.61328125, -24.25, -18.88671875, -13.5234375, -8.16015625, -2.796875, 2.56640625, 7.9296875, 13.29296875, 18.65625, 24.01953125, 29.3828125, 34.74609375, 40.109375, 45.47265625, 50.8359375, 56.19921875, 61.5625, 66.92578125, 72.2890625, 77.65234375, 83.015625, 88.37890625, 93.7421875, 99.10546875, 104.46875, 109.83203125, 115.1953125, 120.55859375, 125.921875, 131.28515625, 136.6484375, 142.01171875, 147.375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 4.0, 6.0, 7.0, 12.0, 12.0, 24.0, 32.0, 41.0, 85.0, 304.0, 1334.0, 1515.0, 388.0, 113.0, 72.0, 27.0, 40.0, 18.0, 13.0, 11.0, 9.0, 3.0, 8.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-214.375, -209.71875, -205.0625, -200.40625, -195.75, -191.09375, -186.4375, -181.78125, -177.125, -172.46875, -167.8125, -163.15625, -158.5, -153.84375, -149.1875, -144.53125, -139.875, -135.21875, -130.5625, -125.90625, -121.25, -116.59375, -111.9375, -107.28125, -102.625, -97.96875, -93.3125, -88.65625, -84.0, -79.34375, -74.6875, -70.03125, -65.375, -60.71875, -56.0625, -51.40625, -46.75, -42.09375, -37.4375, -32.78125, -28.125, -23.46875, -18.8125, -14.15625, -9.5, -4.84375, -0.1875, 4.46875, 9.125, 13.78125, 18.4375, 23.09375, 27.75, 32.40625, 37.0625, 41.71875, 46.375, 51.03125, 55.6875, 60.34375, 65.0, 69.65625, 74.3125, 78.96875, 83.625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 4.0, 1.0, 3.0, 4.0, 5.0, 4.0, 8.0, 10.0, 19.0, 21.0, 23.0, 24.0, 31.0, 57.0, 96.0, 170.0, 343.0, 799.0, 1953.0, 5621.0, 19156.0, 110234.0, 3896952.0, 128354.0, 20801.0, 5966.0, 2029.0, 824.0, 327.0, 162.0, 85.0, 64.0, 33.0, 28.0, 16.0, 15.0, 12.0, 11.0, 0.0, 8.0, 6.0, 3.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-181.75, -174.8984375, -168.046875, -161.1953125, -154.34375, -147.4921875, -140.640625, -133.7890625, -126.9375, -120.0859375, -113.234375, -106.3828125, -99.53125, -92.6796875, -85.828125, -78.9765625, -72.125, -65.2734375, -58.421875, -51.5703125, -44.71875, -37.8671875, -31.015625, -24.1640625, -17.3125, -10.4609375, -3.609375, 3.2421875, 10.09375, 16.9453125, 23.796875, 30.6484375, 37.5, 44.3515625, 51.203125, 58.0546875, 64.90625, 71.7578125, 78.609375, 85.4609375, 92.3125, 99.1640625, 106.015625, 112.8671875, 119.71875, 126.5703125, 133.421875, 140.2734375, 147.125, 153.9765625, 160.828125, 167.6796875, 174.53125, 181.3828125, 188.234375, 195.0859375, 201.9375, 208.7890625, 215.640625, 222.4921875, 229.34375, 236.1953125, 243.046875, 249.8984375, 256.75]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 25.0, 126.0, 610.0, 192.0, 46.0, 8.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1158.3404541015625, -1119.2901611328125, -1080.2398681640625, -1041.189453125, -1002.13916015625, -963.0888671875, -924.03857421875, -884.98828125, -845.9379272460938, -806.8876342773438, -767.8372802734375, -728.7869873046875, -689.7366943359375, -650.6863403320312, -611.6360473632812, -572.585693359375, -533.535400390625, -494.4850769042969, -455.43475341796875, -416.38446044921875, -377.3341369628906, -338.2838134765625, -299.2335205078125, -260.1831970214844, -221.13287353515625, -182.08255004882812, -143.03224182128906, -103.98192596435547, -64.93161010742188, -25.88128662109375, 13.169021606445312, 52.219329833984375, 91.269775390625, 130.32009887695312, 169.3704071044922, 208.42071533203125, 247.47103881835938, 286.5213623046875, 325.5716552734375, 364.6219787597656, 403.67230224609375, 442.7226257324219, 481.77294921875, 520.8232421875, 559.87353515625, 598.9238891601562, 637.9741821289062, 677.0245361328125, 716.0748291015625, 755.1251220703125, 794.1754760742188, 833.2257690429688, 872.276123046875, 911.326416015625, 950.376708984375, 989.427001953125, 1028.477294921875, 1067.527587890625, 1106.577880859375, 1145.6282958984375, 1184.6785888671875, 1223.7288818359375, 1262.7791748046875, 1301.8294677734375, 1340.8798828125]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 3.0, 3.0, 7.0, 5.0, 10.0, 7.0, 15.0, 11.0, 13.0, 15.0, 27.0, 35.0, 33.0, 40.0, 42.0, 38.0, 48.0, 40.0, 42.0, 36.0, 51.0, 43.0, 51.0, 45.0, 44.0, 43.0, 41.0, 38.0, 31.0, 22.0, 27.0, 13.0, 15.0, 15.0, 12.0, 12.0, 7.0, 8.0, 6.0, 6.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-227.433837890625, -219.14378356933594, -210.8537139892578, -202.56365966796875, -194.27359008789062, -185.98353576660156, -177.6934814453125, -169.40341186523438, -161.11334228515625, -152.8232879638672, -144.53321838378906, -136.2431640625, -127.95309448242188, -119.66304016113281, -111.37297821044922, -103.08291625976562, -94.79286193847656, -86.50279998779297, -78.21273803710938, -69.92268371582031, -61.63261795043945, -53.34255599975586, -45.05249786376953, -36.76243591308594, -28.472373962402344, -20.18231201171875, -11.892251968383789, -3.602191925048828, 4.687870025634766, 12.97793197631836, 21.267990112304688, 29.55805206298828, 37.848114013671875, 46.13817596435547, 54.42823791503906, 62.71829605102539, 71.00836181640625, 79.29841613769531, 87.5884780883789, 95.8785400390625, 104.1686019897461, 112.45866394042969, 120.74872589111328, 129.03878784179688, 137.32884216308594, 145.61891174316406, 153.90896606445312, 162.19903564453125, 170.4890899658203, 178.77914428710938, 187.0692138671875, 195.35926818847656, 203.6493377685547, 211.93939208984375, 220.22946166992188, 228.51951599121094, 236.8095703125, 245.09962463378906, 253.3896942138672, 261.67974853515625, 269.9698181152344, 278.2598876953125, 286.5499267578125, 294.8399963378906, 303.13006591796875]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 14.0, 7.0, 23.0, 17.0, 14.0, 23.0, 22.0, 40.0, 43.0, 36.0, 41.0, 43.0, 50.0, 58.0, 48.0, 54.0, 59.0, 56.0, 48.0, 48.0, 33.0, 46.0, 28.0, 25.0, 22.0, 25.0, 12.0, 15.0, 13.0, 8.0, 9.0, 3.0, 2.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.75, -32.788818359375, -31.82763671875, -30.866455078125, -29.9052734375, -28.944091796875, -27.98291015625, -27.021728515625, -26.060546875, -25.099365234375, -24.13818359375, -23.177001953125, -22.2158203125, -21.254638671875, -20.29345703125, -19.332275390625, -18.37109375, -17.409912109375, -16.44873046875, -15.487548828125, -14.5263671875, -13.565185546875, -12.60400390625, -11.642822265625, -10.681640625, -9.720458984375, -8.75927734375, -7.798095703125, -6.8369140625, -5.875732421875, -4.91455078125, -3.953369140625, -2.9921875, -2.031005859375, -1.06982421875, -0.108642578125, 0.8525390625, 1.813720703125, 2.77490234375, 3.736083984375, 4.697265625, 5.658447265625, 6.61962890625, 7.580810546875, 8.5419921875, 9.503173828125, 10.46435546875, 11.425537109375, 12.38671875, 13.347900390625, 14.30908203125, 15.270263671875, 16.2314453125, 17.192626953125, 18.15380859375, 19.114990234375, 20.076171875, 21.037353515625, 21.99853515625, 22.959716796875, 23.9208984375, 24.882080078125, 25.84326171875, 26.804443359375, 27.765625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 4.0, 6.0, 13.0, 19.0, 31.0, 32.0, 42.0, 69.0, 86.0, 126.0, 183.0, 296.0, 419.0, 661.0, 1106.0, 1813.0, 3138.0, 5270.0, 8793.0, 15012.0, 26838.0, 49970.0, 100179.0, 233324.0, 318161.0, 137384.0, 65255.0, 34164.0, 19064.0, 10901.0, 6391.0, 3716.0, 2314.0, 1372.0, 848.0, 524.0, 345.0, 226.0, 130.0, 106.0, 65.0, 46.0, 37.0, 29.0, 9.0, 14.0, 7.0, 7.0, 5.0, 3.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.474609375, -1.42578125, -1.376953125, -1.328125, -1.279296875, -1.23046875, -1.181640625, -1.1328125, -1.083984375, -1.03515625, -0.986328125, -0.9375, -0.888671875, -0.83984375, -0.791015625, -0.7421875, -0.693359375, -0.64453125, -0.595703125, -0.546875, -0.498046875, -0.44921875, -0.400390625, -0.3515625, -0.302734375, -0.25390625, -0.205078125, -0.15625, -0.107421875, -0.05859375, -0.009765625, 0.0390625, 0.087890625, 0.13671875, 0.185546875, 0.234375, 0.283203125, 0.33203125, 0.380859375, 0.4296875, 0.478515625, 0.52734375, 0.576171875, 0.625, 0.673828125, 0.72265625, 0.771484375, 0.8203125, 0.869140625, 0.91796875, 0.966796875, 1.015625, 1.064453125, 1.11328125, 1.162109375, 1.2109375, 1.259765625, 1.30859375, 1.357421875, 1.40625, 1.455078125, 1.50390625, 1.552734375, 1.6015625, 1.650390625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 6.0, 3.0, 6.0, 9.0, 7.0, 17.0, 12.0, 19.0, 26.0, 34.0, 27.0, 32.0, 25.0, 42.0, 38.0, 56.0, 45.0, 31.0, 42.0, 34.0, 1070.0, 46.0, 44.0, 41.0, 33.0, 40.0, 29.0, 35.0, 36.0, 22.0, 24.0, 23.0, 15.0, 17.0, 10.0, 11.0, 3.0, 12.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.65625, -16.1025390625, -15.548828125, -14.9951171875, -14.44140625, -13.8876953125, -13.333984375, -12.7802734375, -12.2265625, -11.6728515625, -11.119140625, -10.5654296875, -10.01171875, -9.4580078125, -8.904296875, -8.3505859375, -7.796875, -7.2431640625, -6.689453125, -6.1357421875, -5.58203125, -5.0283203125, -4.474609375, -3.9208984375, -3.3671875, -2.8134765625, -2.259765625, -1.7060546875, -1.15234375, -0.5986328125, -0.044921875, 0.5087890625, 1.0625, 1.6162109375, 2.169921875, 2.7236328125, 3.27734375, 3.8310546875, 4.384765625, 4.9384765625, 5.4921875, 6.0458984375, 6.599609375, 7.1533203125, 7.70703125, 8.2607421875, 8.814453125, 9.3681640625, 9.921875, 10.4755859375, 11.029296875, 11.5830078125, 12.13671875, 12.6904296875, 13.244140625, 13.7978515625, 14.3515625, 14.9052734375, 15.458984375, 16.0126953125, 16.56640625, 17.1201171875, 17.673828125, 18.2275390625, 18.78125]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 9.0, 7.0, 7.0, 15.0, 18.0, 34.0, 48.0, 57.0, 97.0, 170.0, 265.0, 393.0, 660.0, 1038.0, 1743.0, 2916.0, 5142.0, 8879.0, 15756.0, 28804.0, 54168.0, 109825.0, 311440.0, 1296276.0, 125653.0, 60718.0, 31820.0, 17644.0, 9825.0, 5581.0, 3280.0, 1854.0, 1147.0, 667.0, 428.0, 282.0, 174.0, 99.0, 63.0, 41.0, 37.0, 24.0, 5.0, 10.0, 8.0, 5.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.455078125, -1.40826416015625, -1.3614501953125, -1.31463623046875, -1.267822265625, -1.22100830078125, -1.1741943359375, -1.12738037109375, -1.08056640625, -1.03375244140625, -0.9869384765625, -0.94012451171875, -0.893310546875, -0.84649658203125, -0.7996826171875, -0.75286865234375, -0.7060546875, -0.65924072265625, -0.6124267578125, -0.56561279296875, -0.518798828125, -0.47198486328125, -0.4251708984375, -0.37835693359375, -0.33154296875, -0.28472900390625, -0.2379150390625, -0.19110107421875, -0.144287109375, -0.09747314453125, -0.0506591796875, -0.00384521484375, 0.04296875, 0.08978271484375, 0.1365966796875, 0.18341064453125, 0.230224609375, 0.27703857421875, 0.3238525390625, 0.37066650390625, 0.41748046875, 0.46429443359375, 0.5111083984375, 0.55792236328125, 0.604736328125, 0.65155029296875, 0.6983642578125, 0.74517822265625, 0.7919921875, 0.83880615234375, 0.8856201171875, 0.93243408203125, 0.979248046875, 1.02606201171875, 1.0728759765625, 1.11968994140625, 1.16650390625, 1.21331787109375, 1.2601318359375, 1.30694580078125, 1.353759765625, 1.40057373046875, 1.4473876953125, 1.49420166015625, 1.541015625]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0, 6.0, 4.0, 4.0, 5.0, 4.0, 16.0, 13.0, 14.0, 21.0, 22.0, 32.0, 40.0, 79.0, 87.0, 126.0, 132.0, 107.0, 66.0, 44.0, 31.0, 21.0, 25.0, 16.0, 15.0, 9.0, 9.0, 14.0, 12.0, 11.0, 5.0, 7.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0174102783203125, -0.016900181770324707, -0.016390085220336914, -0.01587998867034912, -0.015369892120361328, -0.014859795570373535, -0.014349699020385742, -0.01383960247039795, -0.013329505920410156, -0.012819409370422363, -0.01230931282043457, -0.011799216270446777, -0.011289119720458984, -0.010779023170471191, -0.010268926620483398, -0.009758830070495605, -0.009248733520507812, -0.00873863697052002, -0.008228540420532227, -0.007718443870544434, -0.007208347320556641, -0.006698250770568848, -0.006188154220581055, -0.005678057670593262, -0.005167961120605469, -0.004657864570617676, -0.004147768020629883, -0.00363767147064209, -0.003127574920654297, -0.002617478370666504, -0.002107381820678711, -0.001597285270690918, -0.001087188720703125, -0.000577092170715332, -6.699562072753906e-05, 0.0004431009292602539, 0.0009531974792480469, 0.0014632940292358398, 0.001973390579223633, 0.0024834871292114258, 0.0029935836791992188, 0.0035036802291870117, 0.004013776779174805, 0.004523873329162598, 0.005033969879150391, 0.005544066429138184, 0.0060541629791259766, 0.0065642595291137695, 0.0070743560791015625, 0.0075844526290893555, 0.008094549179077148, 0.008604645729064941, 0.009114742279052734, 0.009624838829040527, 0.01013493537902832, 0.010645031929016113, 0.011155128479003906, 0.0116652250289917, 0.012175321578979492, 0.012685418128967285, 0.013195514678955078, 0.013705611228942871, 0.014215707778930664, 0.014725804328918457, 0.01523590087890625]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 5.0, 6.0, 14.0, 16.0, 21.0, 21.0, 27.0, 41.0, 33.0, 47.0, 64.0, 87.0, 126.0, 217.0, 531.0, 5361.0, 998082.0, 41921.0, 1047.0, 280.0, 131.0, 94.0, 73.0, 59.0, 49.0, 30.0, 29.0, 34.0, 29.0, 20.0, 16.0, 9.0, 8.0, 4.0, 4.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.290283203125, -0.28167724609375, -0.2730712890625, -0.26446533203125, -0.255859375, -0.24725341796875, -0.2386474609375, -0.23004150390625, -0.221435546875, -0.21282958984375, -0.2042236328125, -0.19561767578125, -0.18701171875, -0.17840576171875, -0.1697998046875, -0.16119384765625, -0.152587890625, -0.14398193359375, -0.1353759765625, -0.12677001953125, -0.1181640625, -0.10955810546875, -0.1009521484375, -0.09234619140625, -0.083740234375, -0.07513427734375, -0.0665283203125, -0.05792236328125, -0.04931640625, -0.04071044921875, -0.0321044921875, -0.02349853515625, -0.014892578125, -0.00628662109375, 0.0023193359375, 0.01092529296875, 0.01953125, 0.02813720703125, 0.0367431640625, 0.04534912109375, 0.053955078125, 0.06256103515625, 0.0711669921875, 0.07977294921875, 0.08837890625, 0.09698486328125, 0.1055908203125, 0.11419677734375, 0.122802734375, 0.13140869140625, 0.1400146484375, 0.14862060546875, 0.1572265625, 0.16583251953125, 0.1744384765625, 0.18304443359375, 0.191650390625, 0.20025634765625, 0.2088623046875, 0.21746826171875, 0.22607421875, 0.23468017578125, 0.2432861328125, 0.25189208984375, 0.260498046875]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 33.0, 794.0, 183.0, 8.0], "bins": [-0.15262740850448608, -0.15015652775764465, -0.14768564701080322, -0.1452147662639618, -0.14274387061595917, -0.14027298986911774, -0.1378021091222763, -0.13533122837543488, -0.13286034762859344, -0.13038946688175201, -0.12791858613491058, -0.12544770538806915, -0.12297681719064713, -0.1205059289932251, -0.11803504824638367, -0.11556416749954224, -0.11309327930212021, -0.11062239855527878, -0.10815151035785675, -0.10568062961101532, -0.10320974886417389, -0.10073886811733246, -0.09826797991991043, -0.095797099173069, -0.09332621097564697, -0.09085533022880554, -0.08838444203138351, -0.08591356128454208, -0.08344268053770065, -0.08097179234027863, -0.0785009115934372, -0.07603003084659576, -0.07355915755033493, -0.0710882768034935, -0.06861738860607147, -0.06614650785923004, -0.06367562711238861, -0.06120474264025688, -0.05873385816812515, -0.05626297742128372, -0.053792089223861694, -0.051321204751729965, -0.048850324004888535, -0.046379439532756805, -0.043908558785915375, -0.041437674313783646, -0.038966789841651917, -0.036495909094810486, -0.03402502462267876, -0.03155414015054703, -0.029083259403705597, -0.026612374931573868, -0.024141492322087288, -0.021670609712600708, -0.01919972524046898, -0.0167288426309824, -0.014257960021495819, -0.01178707741200924, -0.009316193871200085, -0.0068453107960522175, -0.00437442772090435, -0.0019035451114177704, 0.0005673384293913841, 0.0030382219702005386, 0.005509104114025831]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 3.0, 3.0, 9.0, 12.0, 12.0, 16.0, 18.0, 19.0, 25.0, 24.0, 32.0, 33.0, 36.0, 50.0, 45.0, 48.0, 47.0, 47.0, 49.0, 56.0, 42.0, 48.0, 43.0, 33.0, 37.0, 40.0, 35.0, 18.0, 28.0, 23.0, 18.0, 13.0, 12.0, 7.0, 7.0, 8.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.009079039096832275, -0.008834026753902435, -0.008589014410972595, -0.008344002068042755, -0.008098989725112915, -0.007853977382183075, -0.007608965039253235, -0.007363952696323395, -0.007118940353393555, -0.006873928010463715, -0.0066289156675338745, -0.006383903324604034, -0.006138890981674194, -0.005893878638744354, -0.005648866295814514, -0.005403853952884674, -0.005158841609954834, -0.004913829267024994, -0.004668816924095154, -0.004423804581165314, -0.004178792238235474, -0.0039337798953056335, -0.0036887675523757935, -0.0034437552094459534, -0.0031987428665161133, -0.002953730523586273, -0.002708718180656433, -0.002463705837726593, -0.002218693494796753, -0.001973681151866913, -0.0017286688089370728, -0.0014836564660072327, -0.0012386441230773926, -0.0009936317801475525, -0.0007486194372177124, -0.0005036070942878723, -0.0002585947513580322, -1.3582408428192139e-05, 0.00023142993450164795, 0.00047644227743148804, 0.0007214546203613281, 0.0009664669632911682, 0.0012114793062210083, 0.0014564916491508484, 0.0017015039920806885, 0.0019465163350105286, 0.0021915286779403687, 0.0024365410208702087, 0.002681553363800049, 0.002926565706729889, 0.003171578049659729, 0.003416590392589569, 0.003661602735519409, 0.003906615078449249, 0.004151627421379089, 0.0043966397643089294, 0.0046416521072387695, 0.00488666445016861, 0.00513167679309845, 0.00537668913602829, 0.00562170147895813, 0.00586671382188797, 0.00611172616481781, 0.00635673850774765, 0.00660175085067749]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 14.0, 7.0, 23.0, 17.0, 14.0, 23.0, 22.0, 40.0, 43.0, 36.0, 41.0, 43.0, 50.0, 58.0, 48.0, 54.0, 59.0, 56.0, 48.0, 48.0, 33.0, 46.0, 28.0, 25.0, 22.0, 25.0, 12.0, 15.0, 13.0, 8.0, 9.0, 3.0, 2.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.75, -32.788818359375, -31.82763671875, -30.866455078125, -29.9052734375, -28.944091796875, -27.98291015625, -27.021728515625, -26.060546875, -25.099365234375, -24.13818359375, -23.177001953125, -22.2158203125, -21.254638671875, -20.29345703125, -19.332275390625, -18.37109375, -17.409912109375, -16.44873046875, -15.487548828125, -14.5263671875, -13.565185546875, -12.60400390625, -11.642822265625, -10.681640625, -9.720458984375, -8.75927734375, -7.798095703125, -6.8369140625, -5.875732421875, -4.91455078125, -3.953369140625, -2.9921875, -2.031005859375, -1.06982421875, -0.108642578125, 0.8525390625, 1.813720703125, 2.77490234375, 3.736083984375, 4.697265625, 5.658447265625, 6.61962890625, 7.580810546875, 8.5419921875, 9.503173828125, 10.46435546875, 11.425537109375, 12.38671875, 13.347900390625, 14.30908203125, 15.270263671875, 16.2314453125, 17.192626953125, 18.15380859375, 19.114990234375, 20.076171875, 21.037353515625, 21.99853515625, 22.959716796875, 23.9208984375, 24.882080078125, 25.84326171875, 26.804443359375, 27.765625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 2.0, 3.0, 4.0, 9.0, 7.0, 12.0, 13.0, 22.0, 37.0, 46.0, 76.0, 122.0, 142.0, 234.0, 308.0, 540.0, 929.0, 1822.0, 4150.0, 11245.0, 35205.0, 154908.0, 663081.0, 126878.0, 31118.0, 9872.0, 3769.0, 1602.0, 872.0, 532.0, 291.0, 213.0, 142.0, 96.0, 66.0, 60.0, 35.0, 28.0, 16.0, 17.0, 10.0, 7.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-30.03125, -29.13330078125, -28.2353515625, -27.33740234375, -26.439453125, -25.54150390625, -24.6435546875, -23.74560546875, -22.84765625, -21.94970703125, -21.0517578125, -20.15380859375, -19.255859375, -18.35791015625, -17.4599609375, -16.56201171875, -15.6640625, -14.76611328125, -13.8681640625, -12.97021484375, -12.072265625, -11.17431640625, -10.2763671875, -9.37841796875, -8.48046875, -7.58251953125, -6.6845703125, -5.78662109375, -4.888671875, -3.99072265625, -3.0927734375, -2.19482421875, -1.296875, -0.39892578125, 0.4990234375, 1.39697265625, 2.294921875, 3.19287109375, 4.0908203125, 4.98876953125, 5.88671875, 6.78466796875, 7.6826171875, 8.58056640625, 9.478515625, 10.37646484375, 11.2744140625, 12.17236328125, 13.0703125, 13.96826171875, 14.8662109375, 15.76416015625, 16.662109375, 17.56005859375, 18.4580078125, 19.35595703125, 20.25390625, 21.15185546875, 22.0498046875, 22.94775390625, 23.845703125, 24.74365234375, 25.6416015625, 26.53955078125, 27.4375]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 3.0, 2.0, 4.0, 6.0, 4.0, 8.0, 9.0, 15.0, 10.0, 14.0, 17.0, 19.0, 29.0, 35.0, 25.0, 37.0, 33.0, 33.0, 52.0, 42.0, 56.0, 168.0, 1954.0, 52.0, 30.0, 37.0, 51.0, 56.0, 32.0, 29.0, 32.0, 27.0, 24.0, 15.0, 24.0, 18.0, 5.0, 11.0, 8.0, 7.0, 4.0, 5.0, 4.0, 6.0, 4.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-104.8125, -101.6513671875, -98.490234375, -95.3291015625, -92.16796875, -89.0068359375, -85.845703125, -82.6845703125, -79.5234375, -76.3623046875, -73.201171875, -70.0400390625, -66.87890625, -63.7177734375, -60.556640625, -57.3955078125, -54.234375, -51.0732421875, -47.912109375, -44.7509765625, -41.58984375, -38.4287109375, -35.267578125, -32.1064453125, -28.9453125, -25.7841796875, -22.623046875, -19.4619140625, -16.30078125, -13.1396484375, -9.978515625, -6.8173828125, -3.65625, -0.4951171875, 2.666015625, 5.8271484375, 8.98828125, 12.1494140625, 15.310546875, 18.4716796875, 21.6328125, 24.7939453125, 27.955078125, 31.1162109375, 34.27734375, 37.4384765625, 40.599609375, 43.7607421875, 46.921875, 50.0830078125, 53.244140625, 56.4052734375, 59.56640625, 62.7275390625, 65.888671875, 69.0498046875, 72.2109375, 75.3720703125, 78.533203125, 81.6943359375, 84.85546875, 88.0166015625, 91.177734375, 94.3388671875, 97.5]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 5.0, 5.0, 5.0, 4.0, 4.0, 4.0, 8.0, 9.0, 8.0, 13.0, 15.0, 23.0, 23.0, 20.0, 50.0, 34.0, 46.0, 59.0, 115.0, 156.0, 256.0, 690.0, 2779.0, 3130698.0, 8954.0, 821.0, 323.0, 170.0, 88.0, 61.0, 53.0, 40.0, 33.0, 36.0, 16.0, 18.0, 12.0, 12.0, 15.0, 9.0, 6.0, 4.0, 6.0, 5.0, 2.0, 2.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-225.5, -218.1328125, -210.765625, -203.3984375, -196.03125, -188.6640625, -181.296875, -173.9296875, -166.5625, -159.1953125, -151.828125, -144.4609375, -137.09375, -129.7265625, -122.359375, -114.9921875, -107.625, -100.2578125, -92.890625, -85.5234375, -78.15625, -70.7890625, -63.421875, -56.0546875, -48.6875, -41.3203125, -33.953125, -26.5859375, -19.21875, -11.8515625, -4.484375, 2.8828125, 10.25, 17.6171875, 24.984375, 32.3515625, 39.71875, 47.0859375, 54.453125, 61.8203125, 69.1875, 76.5546875, 83.921875, 91.2890625, 98.65625, 106.0234375, 113.390625, 120.7578125, 128.125, 135.4921875, 142.859375, 150.2265625, 157.59375, 164.9609375, 172.328125, 179.6953125, 187.0625, 194.4296875, 201.796875, 209.1640625, 216.53125, 223.8984375, 231.265625, 238.6328125, 246.0]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [615.0, 405.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.915279388427734, 1.1850128173828125, 21.28530502319336, 41.385597229003906, 61.48588943481445, 81.586181640625, 101.68647766113281, 121.78675842285156, 141.88705444335938, 161.9873504638672, 182.087646484375, 202.18792724609375, 222.28822326660156, 242.38851928710938, 262.4888000488281, 282.589111328125, 302.68939208984375, 322.7896728515625, 342.8899841308594, 362.9902648925781, 383.090576171875, 403.19085693359375, 423.2911376953125, 443.39141845703125, 463.4917297363281, 483.5920104980469, 503.69232177734375, 523.7926025390625, 543.8928833007812, 563.9931640625, 584.093505859375, 604.1937866210938, 624.2940673828125, 644.3943481445312, 664.49462890625, 684.594970703125, 704.6952514648438, 724.7955322265625, 744.8958129882812, 764.99609375, 785.096435546875, 805.1967163085938, 825.2969970703125, 845.3973388671875, 865.4976196289062, 885.597900390625, 905.6981811523438, 925.7984619140625, 945.8987426757812, 965.9990234375, 986.0993041992188, 1006.1996459960938, 1026.2999267578125, 1046.400146484375, 1066.50048828125, 1086.600830078125, 1106.7010498046875, 1126.8013916015625, 1146.901611328125, 1167.001953125, 1187.1021728515625, 1207.2025146484375, 1227.3028564453125, 1247.403076171875, 1267.50341796875]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 5.0, 6.0, 14.0, 8.0, 9.0, 15.0, 14.0, 28.0, 18.0, 33.0, 21.0, 37.0, 37.0, 28.0, 26.0, 35.0, 37.0, 24.0, 31.0, 40.0, 35.0, 44.0, 45.0, 47.0, 36.0, 40.0, 38.0, 25.0, 27.0, 31.0, 20.0, 28.0, 13.0, 14.0, 22.0, 9.0, 9.0, 9.0, 9.0, 5.0, 5.0, 7.0, 5.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-281.6238708496094, -272.5931396484375, -263.56243896484375, -254.53170776367188, -245.50099182128906, -236.47027587890625, -227.43954467773438, -218.40882873535156, -209.37811279296875, -200.34739685058594, -191.31668090820312, -182.28594970703125, -173.25523376464844, -164.22451782226562, -155.19378662109375, -146.16307067871094, -137.13235473632812, -128.1016387939453, -119.07091522216797, -110.04019165039062, -101.00947570800781, -91.978759765625, -82.94803619384766, -73.91731262207031, -64.8865966796875, -55.85587692260742, -46.825157165527344, -37.794437408447266, -28.763717651367188, -19.73299789428711, -10.702278137207031, -1.6715583801269531, 7.35919189453125, 16.389911651611328, 25.420631408691406, 34.451351165771484, 43.48207092285156, 52.51279067993164, 61.54351043701172, 70.57423400878906, 79.60494995117188, 88.63566589355469, 97.66638946533203, 106.69711303710938, 115.72782897949219, 124.758544921875, 133.78927612304688, 142.8199920654297, 151.8507080078125, 160.8814239501953, 169.91213989257812, 178.94287109375, 187.9735870361328, 197.00430297851562, 206.0350341796875, 215.0657501220703, 224.09646606445312, 233.12718200683594, 242.15789794921875, 251.18862915039062, 260.2193603515625, 269.25006103515625, 278.2807922363281, 287.3114929199219, 296.34222412109375]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 4.0, 9.0, 11.0, 16.0, 17.0, 14.0, 20.0, 18.0, 31.0, 30.0, 47.0, 35.0, 40.0, 42.0, 47.0, 56.0, 52.0, 48.0, 55.0, 61.0, 46.0, 49.0, 39.0, 33.0, 32.0, 27.0, 25.0, 23.0, 15.0, 15.0, 10.0, 10.0, 10.0, 3.0, 3.0, 4.0, 3.0, 1.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-33.71875, -32.759033203125, -31.79931640625, -30.839599609375, -29.8798828125, -28.920166015625, -27.96044921875, -27.000732421875, -26.041015625, -25.081298828125, -24.12158203125, -23.161865234375, -22.2021484375, -21.242431640625, -20.28271484375, -19.322998046875, -18.36328125, -17.403564453125, -16.44384765625, -15.484130859375, -14.5244140625, -13.564697265625, -12.60498046875, -11.645263671875, -10.685546875, -9.725830078125, -8.76611328125, -7.806396484375, -6.8466796875, -5.886962890625, -4.92724609375, -3.967529296875, -3.0078125, -2.048095703125, -1.08837890625, -0.128662109375, 0.8310546875, 1.790771484375, 2.75048828125, 3.710205078125, 4.669921875, 5.629638671875, 6.58935546875, 7.549072265625, 8.5087890625, 9.468505859375, 10.42822265625, 11.387939453125, 12.34765625, 13.307373046875, 14.26708984375, 15.226806640625, 16.1865234375, 17.146240234375, 18.10595703125, 19.065673828125, 20.025390625, 20.985107421875, 21.94482421875, 22.904541015625, 23.8642578125, 24.823974609375, 25.78369140625, 26.743408203125, 27.703125]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 6.0, 2.0, 13.0, 14.0, 14.0, 17.0, 22.0, 23.0, 51.0, 71.0, 110.0, 170.0, 288.0, 665.0, 1917.0, 6761.0, 75707.0, 4076109.0, 25335.0, 4400.0, 1395.0, 493.0, 284.0, 130.0, 94.0, 42.0, 44.0, 28.0, 20.0, 15.0, 10.0, 13.0, 10.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-241.875, -235.033203125, -228.19140625, -221.349609375, -214.5078125, -207.666015625, -200.82421875, -193.982421875, -187.140625, -180.298828125, -173.45703125, -166.615234375, -159.7734375, -152.931640625, -146.08984375, -139.248046875, -132.40625, -125.564453125, -118.72265625, -111.880859375, -105.0390625, -98.197265625, -91.35546875, -84.513671875, -77.671875, -70.830078125, -63.98828125, -57.146484375, -50.3046875, -43.462890625, -36.62109375, -29.779296875, -22.9375, -16.095703125, -9.25390625, -2.412109375, 4.4296875, 11.271484375, 18.11328125, 24.955078125, 31.796875, 38.638671875, 45.48046875, 52.322265625, 59.1640625, 66.005859375, 72.84765625, 79.689453125, 86.53125, 93.373046875, 100.21484375, 107.056640625, 113.8984375, 120.740234375, 127.58203125, 134.423828125, 141.265625, 148.107421875, 154.94921875, 161.791015625, 168.6328125, 175.474609375, 182.31640625, 189.158203125, 196.0]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 4.0, 1.0, 2.0, 9.0, 12.0, 8.0, 8.0, 6.0, 8.0, 17.0, 17.0, 34.0, 41.0, 83.0, 237.0, 857.0, 1607.0, 686.0, 205.0, 77.0, 47.0, 25.0, 21.0, 16.0, 16.0, 12.0, 3.0, 6.0, 4.0, 4.0, 3.0, 2.0, 1.0, 5.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-71.3125, -67.6376953125, -63.962890625, -60.2880859375, -56.61328125, -52.9384765625, -49.263671875, -45.5888671875, -41.9140625, -38.2392578125, -34.564453125, -30.8896484375, -27.21484375, -23.5400390625, -19.865234375, -16.1904296875, -12.515625, -8.8408203125, -5.166015625, -1.4912109375, 2.18359375, 5.8583984375, 9.533203125, 13.2080078125, 16.8828125, 20.5576171875, 24.232421875, 27.9072265625, 31.58203125, 35.2568359375, 38.931640625, 42.6064453125, 46.28125, 49.9560546875, 53.630859375, 57.3056640625, 60.98046875, 64.6552734375, 68.330078125, 72.0048828125, 75.6796875, 79.3544921875, 83.029296875, 86.7041015625, 90.37890625, 94.0537109375, 97.728515625, 101.4033203125, 105.078125, 108.7529296875, 112.427734375, 116.1025390625, 119.77734375, 123.4521484375, 127.126953125, 130.8017578125, 134.4765625, 138.1513671875, 141.826171875, 145.5009765625, 149.17578125, 152.8505859375, 156.525390625, 160.2001953125, 163.875]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 7.0, 3.0, 4.0, 7.0, 5.0, 8.0, 10.0, 14.0, 17.0, 36.0, 49.0, 58.0, 83.0, 195.0, 324.0, 578.0, 1196.0, 2455.0, 5371.0, 12405.0, 36755.0, 379472.0, 3640501.0, 79306.0, 20238.0, 8031.0, 3499.0, 1757.0, 847.0, 417.0, 261.0, 135.0, 79.0, 52.0, 25.0, 24.0, 19.0, 15.0, 9.0, 8.0, 5.0, 3.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-191.625, -186.029296875, -180.43359375, -174.837890625, -169.2421875, -163.646484375, -158.05078125, -152.455078125, -146.859375, -141.263671875, -135.66796875, -130.072265625, -124.4765625, -118.880859375, -113.28515625, -107.689453125, -102.09375, -96.498046875, -90.90234375, -85.306640625, -79.7109375, -74.115234375, -68.51953125, -62.923828125, -57.328125, -51.732421875, -46.13671875, -40.541015625, -34.9453125, -29.349609375, -23.75390625, -18.158203125, -12.5625, -6.966796875, -1.37109375, 4.224609375, 9.8203125, 15.416015625, 21.01171875, 26.607421875, 32.203125, 37.798828125, 43.39453125, 48.990234375, 54.5859375, 60.181640625, 65.77734375, 71.373046875, 76.96875, 82.564453125, 88.16015625, 93.755859375, 99.3515625, 104.947265625, 110.54296875, 116.138671875, 121.734375, 127.330078125, 132.92578125, 138.521484375, 144.1171875, 149.712890625, 155.30859375, 160.904296875, 166.5]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 12.0, 14.0, 27.0, 57.0, 114.0, 237.0, 273.0, 133.0, 64.0, 28.0, 23.0, 12.0, 7.0, 2.0, 3.0, 4.0, 1.0, 2.0], "bins": [-894.5339965820312, -877.389404296875, -860.2448120117188, -843.1002197265625, -825.9556274414062, -808.81103515625, -791.6664428710938, -774.5218505859375, -757.377197265625, -740.2326049804688, -723.0880126953125, -705.9434204101562, -688.798828125, -671.6542358398438, -654.5096435546875, -637.364990234375, -620.220458984375, -603.0758666992188, -585.9312744140625, -568.7866821289062, -551.64208984375, -534.4974975585938, -517.3529052734375, -500.2082824707031, -483.0636901855469, -465.9190979003906, -448.7745056152344, -431.6299133300781, -414.48529052734375, -397.3406982421875, -380.19610595703125, -363.051513671875, -345.906982421875, -328.76239013671875, -311.6177978515625, -294.47320556640625, -277.32861328125, -260.18402099609375, -243.03939819335938, -225.89480590820312, -208.75021362304688, -191.60562133789062, -174.46102905273438, -157.31642150878906, -140.1718292236328, -123.02723693847656, -105.88263702392578, -88.738037109375, -71.59344482421875, -54.448848724365234, -37.30425262451172, -20.159656524658203, -3.0150604248046875, 14.129531860351562, 31.274131774902344, 48.418731689453125, 65.56332397460938, 82.70791625976562, 99.8525161743164, 116.99711608886719, 134.14170837402344, 151.2863006591797, 168.430908203125, 185.57550048828125, 202.7200927734375]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 3.0, 6.0, 5.0, 7.0, 9.0, 11.0, 13.0, 17.0, 19.0, 19.0, 25.0, 29.0, 27.0, 17.0, 42.0, 40.0, 52.0, 44.0, 37.0, 55.0, 41.0, 55.0, 42.0, 43.0, 45.0, 53.0, 31.0, 34.0, 31.0, 23.0, 23.0, 20.0, 18.0, 14.0, 12.0, 16.0, 5.0, 14.0, 7.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-255.06039428710938, -247.67257690429688, -240.28475952148438, -232.89694213867188, -225.50912475585938, -218.12130737304688, -210.73348999023438, -203.34568786621094, -195.95787048339844, -188.57005310058594, -181.18223571777344, -173.79441833496094, -166.40660095214844, -159.018798828125, -151.6309814453125, -144.2431640625, -136.8553466796875, -129.467529296875, -122.0797119140625, -114.69189453125, -107.30408477783203, -99.91626739501953, -92.52845001220703, -85.14064025878906, -77.7528076171875, -70.364990234375, -62.977176666259766, -55.589359283447266, -48.20154571533203, -40.81372833251953, -33.42591094970703, -26.038097381591797, -18.650283813476562, -11.262468338012695, -3.8746519088745117, 3.513164520263672, 10.900979995727539, 18.288795471191406, 25.676612854003906, 33.06442642211914, 40.45224380493164, 47.84006118774414, 55.227874755859375, 62.615692138671875, 70.00350952148438, 77.39132690429688, 84.77914428710938, 92.16695404052734, 99.55477142333984, 106.94258880615234, 114.33040618896484, 121.71821594238281, 129.1060333251953, 136.4938507080078, 143.8816680908203, 151.2694854736328, 158.6573028564453, 166.0451202392578, 173.4329376220703, 180.8207550048828, 188.2085723876953, 195.59637451171875, 202.98419189453125, 210.37200927734375, 217.75982666015625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 3.0, 5.0, 6.0, 6.0, 8.0, 15.0, 13.0, 13.0, 14.0, 18.0, 25.0, 30.0, 34.0, 39.0, 32.0, 38.0, 33.0, 40.0, 45.0, 49.0, 58.0, 52.0, 46.0, 52.0, 44.0, 34.0, 47.0, 32.0, 32.0, 16.0, 23.0, 20.0, 18.0, 12.0, 12.0, 6.0, 6.0, 9.0, 5.0, 7.0, 6.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-33.21875, -32.30615234375, -31.3935546875, -30.48095703125, -29.568359375, -28.65576171875, -27.7431640625, -26.83056640625, -25.91796875, -25.00537109375, -24.0927734375, -23.18017578125, -22.267578125, -21.35498046875, -20.4423828125, -19.52978515625, -18.6171875, -17.70458984375, -16.7919921875, -15.87939453125, -14.966796875, -14.05419921875, -13.1416015625, -12.22900390625, -11.31640625, -10.40380859375, -9.4912109375, -8.57861328125, -7.666015625, -6.75341796875, -5.8408203125, -4.92822265625, -4.015625, -3.10302734375, -2.1904296875, -1.27783203125, -0.365234375, 0.54736328125, 1.4599609375, 2.37255859375, 3.28515625, 4.19775390625, 5.1103515625, 6.02294921875, 6.935546875, 7.84814453125, 8.7607421875, 9.67333984375, 10.5859375, 11.49853515625, 12.4111328125, 13.32373046875, 14.236328125, 15.14892578125, 16.0615234375, 16.97412109375, 17.88671875, 18.79931640625, 19.7119140625, 20.62451171875, 21.537109375, 22.44970703125, 23.3623046875, 24.27490234375, 25.1875]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 3.0, 4.0, 4.0, 4.0, 8.0, 10.0, 17.0, 26.0, 39.0, 53.0, 78.0, 123.0, 145.0, 251.0, 419.0, 636.0, 942.0, 1537.0, 2419.0, 3889.0, 6219.0, 10307.0, 16984.0, 29091.0, 51045.0, 95703.0, 204148.0, 305142.0, 146948.0, 72931.0, 39874.0, 23456.0, 13850.0, 8392.0, 5122.0, 3199.0, 2026.0, 1193.0, 835.0, 486.0, 328.0, 231.0, 138.0, 105.0, 71.0, 32.0, 37.0, 14.0, 18.0, 8.0, 11.0, 7.0, 5.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.40625, -1.3613128662109375, -1.316375732421875, -1.2714385986328125, -1.22650146484375, -1.1815643310546875, -1.136627197265625, -1.0916900634765625, -1.0467529296875, -1.0018157958984375, -0.956878662109375, -0.9119415283203125, -0.86700439453125, -0.8220672607421875, -0.777130126953125, -0.7321929931640625, -0.687255859375, -0.6423187255859375, -0.597381591796875, -0.5524444580078125, -0.50750732421875, -0.4625701904296875, -0.417633056640625, -0.3726959228515625, -0.3277587890625, -0.2828216552734375, -0.237884521484375, -0.1929473876953125, -0.14801025390625, -0.1030731201171875, -0.058135986328125, -0.0131988525390625, 0.03173828125, 0.0766754150390625, 0.121612548828125, 0.1665496826171875, 0.21148681640625, 0.2564239501953125, 0.301361083984375, 0.3462982177734375, 0.3912353515625, 0.4361724853515625, 0.481109619140625, 0.5260467529296875, 0.57098388671875, 0.6159210205078125, 0.660858154296875, 0.7057952880859375, 0.750732421875, 0.7956695556640625, 0.840606689453125, 0.8855438232421875, 0.93048095703125, 0.9754180908203125, 1.020355224609375, 1.0652923583984375, 1.1102294921875, 1.1551666259765625, 1.200103759765625, 1.2450408935546875, 1.28997802734375, 1.3349151611328125, 1.379852294921875, 1.4247894287109375, 1.4697265625]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 11.0, 7.0, 14.0, 7.0, 11.0, 10.0, 9.0, 14.0, 20.0, 18.0, 15.0, 19.0, 38.0, 33.0, 51.0, 31.0, 38.0, 36.0, 41.0, 38.0, 28.0, 1061.0, 40.0, 45.0, 38.0, 29.0, 35.0, 34.0, 25.0, 29.0, 30.0, 27.0, 14.0, 27.0, 24.0, 18.0, 10.0, 16.0, 10.0, 8.0, 5.0, 6.0, 3.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-16.953125, -16.41162109375, -15.8701171875, -15.32861328125, -14.787109375, -14.24560546875, -13.7041015625, -13.16259765625, -12.62109375, -12.07958984375, -11.5380859375, -10.99658203125, -10.455078125, -9.91357421875, -9.3720703125, -8.83056640625, -8.2890625, -7.74755859375, -7.2060546875, -6.66455078125, -6.123046875, -5.58154296875, -5.0400390625, -4.49853515625, -3.95703125, -3.41552734375, -2.8740234375, -2.33251953125, -1.791015625, -1.24951171875, -0.7080078125, -0.16650390625, 0.375, 0.91650390625, 1.4580078125, 1.99951171875, 2.541015625, 3.08251953125, 3.6240234375, 4.16552734375, 4.70703125, 5.24853515625, 5.7900390625, 6.33154296875, 6.873046875, 7.41455078125, 7.9560546875, 8.49755859375, 9.0390625, 9.58056640625, 10.1220703125, 10.66357421875, 11.205078125, 11.74658203125, 12.2880859375, 12.82958984375, 13.37109375, 13.91259765625, 14.4541015625, 14.99560546875, 15.537109375, 16.07861328125, 16.6201171875, 17.16162109375, 17.703125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 12.0, 13.0, 21.0, 33.0, 37.0, 54.0, 81.0, 110.0, 166.0, 264.0, 365.0, 589.0, 882.0, 1302.0, 1957.0, 3223.0, 5204.0, 8002.0, 12892.0, 20920.0, 35044.0, 61704.0, 117043.0, 784133.0, 772772.0, 117581.0, 61718.0, 35076.0, 20713.0, 12899.0, 7962.0, 5107.0, 3212.0, 2066.0, 1350.0, 871.0, 613.0, 390.0, 247.0, 168.0, 99.0, 72.0, 59.0, 33.0, 25.0, 17.0, 12.0, 9.0, 6.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-1.3466796875, -1.3032379150390625, -1.259796142578125, -1.2163543701171875, -1.17291259765625, -1.1294708251953125, -1.086029052734375, -1.0425872802734375, -0.9991455078125, -0.9557037353515625, -0.912261962890625, -0.8688201904296875, -0.82537841796875, -0.7819366455078125, -0.738494873046875, -0.6950531005859375, -0.651611328125, -0.6081695556640625, -0.564727783203125, -0.5212860107421875, -0.47784423828125, -0.4344024658203125, -0.390960693359375, -0.3475189208984375, -0.3040771484375, -0.2606353759765625, -0.217193603515625, -0.1737518310546875, -0.13031005859375, -0.0868682861328125, -0.043426513671875, 1.52587890625e-05, 0.04345703125, 0.0868988037109375, 0.130340576171875, 0.1737823486328125, 0.21722412109375, 0.2606658935546875, 0.304107666015625, 0.3475494384765625, 0.3909912109375, 0.4344329833984375, 0.477874755859375, 0.5213165283203125, 0.56475830078125, 0.6082000732421875, 0.651641845703125, 0.6950836181640625, 0.738525390625, 0.7819671630859375, 0.825408935546875, 0.8688507080078125, 0.91229248046875, 0.9557342529296875, 0.999176025390625, 1.0426177978515625, 1.0860595703125, 1.1295013427734375, 1.172943115234375, 1.2163848876953125, 1.25982666015625, 1.3032684326171875, 1.346710205078125, 1.3901519775390625, 1.43359375]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 6.0, 13.0, 16.0, 12.0, 26.0, 26.0, 45.0, 54.0, 74.0, 99.0, 119.0, 138.0, 89.0, 68.0, 48.0, 43.0, 29.0, 22.0, 17.0, 10.0, 11.0, 8.0, 5.0, 4.0, 3.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.03240966796875, -0.031482696533203125, -0.03055572509765625, -0.029628753662109375, -0.0287017822265625, -0.027774810791015625, -0.02684783935546875, -0.025920867919921875, -0.024993896484375, -0.024066925048828125, -0.02313995361328125, -0.022212982177734375, -0.0212860107421875, -0.020359039306640625, -0.01943206787109375, -0.018505096435546875, -0.017578125, -0.016651153564453125, -0.01572418212890625, -0.014797210693359375, -0.0138702392578125, -0.012943267822265625, -0.01201629638671875, -0.011089324951171875, -0.010162353515625, -0.009235382080078125, -0.00830841064453125, -0.007381439208984375, -0.0064544677734375, -0.005527496337890625, -0.00460052490234375, -0.003673553466796875, -0.00274658203125, -0.001819610595703125, -0.00089263916015625, 3.4332275390625e-05, 0.0009613037109375, 0.001888275146484375, 0.00281524658203125, 0.003742218017578125, 0.004669189453125, 0.005596160888671875, 0.00652313232421875, 0.007450103759765625, 0.0083770751953125, 0.009304046630859375, 0.01023101806640625, 0.011157989501953125, 0.0120849609375, 0.013011932373046875, 0.01393890380859375, 0.014865875244140625, 0.0157928466796875, 0.016719818115234375, 0.01764678955078125, 0.018573760986328125, 0.019500732421875, 0.020427703857421875, 0.02135467529296875, 0.022281646728515625, 0.0232086181640625, 0.024135589599609375, 0.02506256103515625, 0.025989532470703125, 0.02691650390625]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 0.0, 4.0, 4.0, 4.0, 5.0, 6.0, 4.0, 8.0, 7.0, 16.0, 21.0, 31.0, 46.0, 61.0, 72.0, 125.0, 185.0, 381.0, 1565.0, 42731.0, 992914.0, 8735.0, 829.0, 281.0, 156.0, 100.0, 78.0, 50.0, 36.0, 33.0, 17.0, 16.0, 8.0, 6.0, 4.0, 7.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.452880859375, -0.4384193420410156, -0.42395782470703125, -0.4094963073730469, -0.3950347900390625, -0.3805732727050781, -0.36611175537109375, -0.3516502380371094, -0.337188720703125, -0.3227272033691406, -0.30826568603515625, -0.2938041687011719, -0.2793426513671875, -0.2648811340332031, -0.25041961669921875, -0.23595809936523438, -0.22149658203125, -0.20703506469726562, -0.19257354736328125, -0.17811203002929688, -0.1636505126953125, -0.14918899536132812, -0.13472747802734375, -0.12026596069335938, -0.105804443359375, -0.09134292602539062, -0.07688140869140625, -0.062419891357421875, -0.0479583740234375, -0.033496856689453125, -0.01903533935546875, -0.004573822021484375, 0.0098876953125, 0.024349212646484375, 0.03881072998046875, 0.053272247314453125, 0.0677337646484375, 0.08219528198242188, 0.09665679931640625, 0.11111831665039062, 0.125579833984375, 0.14004135131835938, 0.15450286865234375, 0.16896438598632812, 0.1834259033203125, 0.19788742065429688, 0.21234893798828125, 0.22681045532226562, 0.24127197265625, 0.2557334899902344, 0.27019500732421875, 0.2846565246582031, 0.2991180419921875, 0.3135795593261719, 0.32804107666015625, 0.3425025939941406, 0.356964111328125, 0.3714256286621094, 0.38588714599609375, 0.4003486633300781, 0.4148101806640625, 0.4292716979980469, 0.44373321533203125, 0.4581947326660156, 0.47265625]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 27.0, 145.0, 671.0, 143.0, 24.0, 4.0], "bins": [-0.16430501639842987, -0.1615900844335556, -0.15887513756752014, -0.15616020560264587, -0.1534452587366104, -0.15073032677173615, -0.14801537990570068, -0.14530044794082642, -0.14258550107479095, -0.1398705691099167, -0.13715562224388123, -0.13444069027900696, -0.1317257434129715, -0.12901081144809723, -0.12629586458206177, -0.1235809326171875, -0.12086600065231323, -0.11815106123685837, -0.1154361218214035, -0.11272118240594864, -0.11000624299049377, -0.1072913110256195, -0.10457636415958405, -0.10186143219470978, -0.09914648532867432, -0.09643154591321945, -0.09371660649776459, -0.09100166708230972, -0.08828672766685486, -0.08557179570198059, -0.08285684883594513, -0.08014191687107086, -0.077426977455616, -0.07471203804016113, -0.07199709862470627, -0.0692821592092514, -0.06656721979379654, -0.06385228037834167, -0.06113734468817711, -0.05842240899801254, -0.05570746958255768, -0.052992530167102814, -0.05027759075164795, -0.047562651336193085, -0.04484771564602852, -0.042132776230573654, -0.03941783681511879, -0.036702901124954224, -0.03398795798420906, -0.031273018568754196, -0.02855808101594448, -0.025843141600489616, -0.0231282040476799, -0.020413264632225037, -0.017698325216770172, -0.014983387663960457, -0.012268448248505592, -0.009553509764373302, -0.006838570814579725, -0.004123631864786148, -0.0014086933806538582, 0.0013062451034784317, 0.004021184518933296, 0.0067361220717430115, 0.009451061487197876]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 6.0, 7.0, 7.0, 4.0, 15.0, 10.0, 22.0, 19.0, 17.0, 22.0, 20.0, 23.0, 28.0, 37.0, 25.0, 40.0, 46.0, 38.0, 40.0, 34.0, 44.0, 34.0, 35.0, 36.0, 35.0, 49.0, 34.0, 27.0, 37.0, 25.0, 29.0, 25.0, 27.0, 25.0, 18.0, 12.0, 12.0, 9.0, 4.0, 9.0, 8.0, 4.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.012568652629852295, -0.01219777762889862, -0.011826902627944946, -0.011456027626991272, -0.011085152626037598, -0.010714277625083923, -0.010343402624130249, -0.009972527623176575, -0.0096016526222229, -0.009230777621269226, -0.008859902620315552, -0.008489027619361877, -0.008118152618408203, -0.007747277617454529, -0.0073764026165008545, -0.00700552761554718, -0.006634652614593506, -0.0062637776136398315, -0.005892902612686157, -0.005522027611732483, -0.005151152610778809, -0.004780277609825134, -0.00440940260887146, -0.004038527607917786, -0.0036676526069641113, -0.003296777606010437, -0.0029259026050567627, -0.0025550276041030884, -0.002184152603149414, -0.0018132776021957397, -0.0014424026012420654, -0.0010715276002883911, -0.0007006525993347168, -0.0003297775983810425, 4.1097402572631836e-05, 0.00041197240352630615, 0.0007828474044799805, 0.0011537224054336548, 0.001524597406387329, 0.0018954724073410034, 0.0022663474082946777, 0.002637222409248352, 0.0030080974102020264, 0.0033789724111557007, 0.003749847412109375, 0.004120722413063049, 0.004491597414016724, 0.004862472414970398, 0.005233347415924072, 0.005604222416877747, 0.005975097417831421, 0.006345972418785095, 0.0067168474197387695, 0.007087722420692444, 0.007458597421646118, 0.007829472422599792, 0.008200347423553467, 0.008571222424507141, 0.008942097425460815, 0.00931297242641449, 0.009683847427368164, 0.010054722428321838, 0.010425597429275513, 0.010796472430229187, 0.011167347431182861]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 3.0, 5.0, 6.0, 6.0, 8.0, 15.0, 13.0, 13.0, 14.0, 18.0, 25.0, 30.0, 34.0, 39.0, 32.0, 38.0, 33.0, 40.0, 45.0, 49.0, 58.0, 52.0, 46.0, 52.0, 44.0, 34.0, 47.0, 32.0, 32.0, 16.0, 23.0, 20.0, 18.0, 12.0, 12.0, 6.0, 6.0, 9.0, 5.0, 7.0, 6.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-33.21875, -32.30615234375, -31.3935546875, -30.48095703125, -29.568359375, -28.65576171875, -27.7431640625, -26.83056640625, -25.91796875, -25.00537109375, -24.0927734375, -23.18017578125, -22.267578125, -21.35498046875, -20.4423828125, -19.52978515625, -18.6171875, -17.70458984375, -16.7919921875, -15.87939453125, -14.966796875, -14.05419921875, -13.1416015625, -12.22900390625, -11.31640625, -10.40380859375, -9.4912109375, -8.57861328125, -7.666015625, -6.75341796875, -5.8408203125, -4.92822265625, -4.015625, -3.10302734375, -2.1904296875, -1.27783203125, -0.365234375, 0.54736328125, 1.4599609375, 2.37255859375, 3.28515625, 4.19775390625, 5.1103515625, 6.02294921875, 6.935546875, 7.84814453125, 8.7607421875, 9.67333984375, 10.5859375, 11.49853515625, 12.4111328125, 13.32373046875, 14.236328125, 15.14892578125, 16.0615234375, 16.97412109375, 17.88671875, 18.79931640625, 19.7119140625, 20.62451171875, 21.537109375, 22.44970703125, 23.3623046875, 24.27490234375, 25.1875]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 3.0, 4.0, 7.0, 9.0, 8.0, 9.0, 25.0, 29.0, 32.0, 60.0, 102.0, 161.0, 303.0, 496.0, 971.0, 1903.0, 4013.0, 9328.0, 23480.0, 66954.0, 270755.0, 500695.0, 110130.0, 34638.0, 13357.0, 5827.0, 2472.0, 1248.0, 676.0, 316.0, 181.0, 112.0, 90.0, 42.0, 35.0, 24.0, 17.0, 9.0, 12.0, 8.0, 8.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-19.53125, -18.99169921875, -18.4521484375, -17.91259765625, -17.373046875, -16.83349609375, -16.2939453125, -15.75439453125, -15.21484375, -14.67529296875, -14.1357421875, -13.59619140625, -13.056640625, -12.51708984375, -11.9775390625, -11.43798828125, -10.8984375, -10.35888671875, -9.8193359375, -9.27978515625, -8.740234375, -8.20068359375, -7.6611328125, -7.12158203125, -6.58203125, -6.04248046875, -5.5029296875, -4.96337890625, -4.423828125, -3.88427734375, -3.3447265625, -2.80517578125, -2.265625, -1.72607421875, -1.1865234375, -0.64697265625, -0.107421875, 0.43212890625, 0.9716796875, 1.51123046875, 2.05078125, 2.59033203125, 3.1298828125, 3.66943359375, 4.208984375, 4.74853515625, 5.2880859375, 5.82763671875, 6.3671875, 6.90673828125, 7.4462890625, 7.98583984375, 8.525390625, 9.06494140625, 9.6044921875, 10.14404296875, 10.68359375, 11.22314453125, 11.7626953125, 12.30224609375, 12.841796875, 13.38134765625, 13.9208984375, 14.46044921875, 15.0]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 4.0, 1.0, 6.0, 3.0, 6.0, 13.0, 12.0, 12.0, 17.0, 16.0, 26.0, 22.0, 14.0, 28.0, 36.0, 42.0, 41.0, 42.0, 56.0, 50.0, 68.0, 2008.0, 77.0, 68.0, 42.0, 44.0, 37.0, 28.0, 36.0, 40.0, 23.0, 37.0, 22.0, 20.0, 19.0, 15.0, 5.0, 7.0, 11.0, 1.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-107.875, -104.5751953125, -101.275390625, -97.9755859375, -94.67578125, -91.3759765625, -88.076171875, -84.7763671875, -81.4765625, -78.1767578125, -74.876953125, -71.5771484375, -68.27734375, -64.9775390625, -61.677734375, -58.3779296875, -55.078125, -51.7783203125, -48.478515625, -45.1787109375, -41.87890625, -38.5791015625, -35.279296875, -31.9794921875, -28.6796875, -25.3798828125, -22.080078125, -18.7802734375, -15.48046875, -12.1806640625, -8.880859375, -5.5810546875, -2.28125, 1.0185546875, 4.318359375, 7.6181640625, 10.91796875, 14.2177734375, 17.517578125, 20.8173828125, 24.1171875, 27.4169921875, 30.716796875, 34.0166015625, 37.31640625, 40.6162109375, 43.916015625, 47.2158203125, 50.515625, 53.8154296875, 57.115234375, 60.4150390625, 63.71484375, 67.0146484375, 70.314453125, 73.6142578125, 76.9140625, 80.2138671875, 83.513671875, 86.8134765625, 90.11328125, 93.4130859375, 96.712890625, 100.0126953125, 103.3125]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 11.0, 5.0, 5.0, 14.0, 22.0, 19.0, 18.0, 47.0, 28.0, 43.0, 53.0, 61.0, 142.0, 306.0, 619.0, 1741.0, 24816.0, 3108507.0, 6756.0, 1342.0, 447.0, 229.0, 132.0, 87.0, 57.0, 32.0, 28.0, 21.0, 28.0, 19.0, 14.0, 14.0, 11.0, 12.0, 7.0, 4.0, 7.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-180.375, -174.62109375, -168.8671875, -163.11328125, -157.359375, -151.60546875, -145.8515625, -140.09765625, -134.34375, -128.58984375, -122.8359375, -117.08203125, -111.328125, -105.57421875, -99.8203125, -94.06640625, -88.3125, -82.55859375, -76.8046875, -71.05078125, -65.296875, -59.54296875, -53.7890625, -48.03515625, -42.28125, -36.52734375, -30.7734375, -25.01953125, -19.265625, -13.51171875, -7.7578125, -2.00390625, 3.75, 9.50390625, 15.2578125, 21.01171875, 26.765625, 32.51953125, 38.2734375, 44.02734375, 49.78125, 55.53515625, 61.2890625, 67.04296875, 72.796875, 78.55078125, 84.3046875, 90.05859375, 95.8125, 101.56640625, 107.3203125, 113.07421875, 118.828125, 124.58203125, 130.3359375, 136.08984375, 141.84375, 147.59765625, 153.3515625, 159.10546875, 164.859375, 170.61328125, 176.3671875, 182.12109375, 187.875]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 26.0, 580.0, 367.0, 40.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-663.2680053710938, -650.4867553710938, -637.7055053710938, -624.9241943359375, -612.1429443359375, -599.3616943359375, -586.5804443359375, -573.7991943359375, -561.0178833007812, -548.2366333007812, -535.4553833007812, -522.674072265625, -509.892822265625, -497.111572265625, -484.330322265625, -471.5490417480469, -458.767822265625, -445.986572265625, -433.2052917480469, -420.4240417480469, -407.64276123046875, -394.86151123046875, -382.08026123046875, -369.2989807128906, -356.5177001953125, -343.7364501953125, -330.9551696777344, -318.1739196777344, -305.39263916015625, -292.61138916015625, -279.83013916015625, -267.0488586425781, -254.2676239013672, -241.48635864257812, -228.70509338378906, -215.923828125, -203.142578125, -190.36131286621094, -177.58004760742188, -164.79879760742188, -152.01751708984375, -139.2362518310547, -126.45499420166016, -113.6737289428711, -100.89247131347656, -88.1112060546875, -75.32994079589844, -62.548683166503906, -49.767425537109375, -36.98616409301758, -24.20490074157715, -11.423637390136719, 1.3576240539550781, 14.138885498046875, 26.920150756835938, 39.70140838623047, 52.48267364501953, 65.2639389038086, 78.04519653320312, 90.82646179199219, 103.60772705078125, 116.38898468017578, 129.17025756835938, 141.95150756835938, 154.73277282714844]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 6.0, 2.0, 6.0, 8.0, 12.0, 8.0, 13.0, 16.0, 20.0, 19.0, 16.0, 24.0, 22.0, 34.0, 35.0, 32.0, 37.0, 36.0, 37.0, 39.0, 43.0, 51.0, 40.0, 45.0, 39.0, 37.0, 31.0, 33.0, 30.0, 34.0, 22.0, 39.0, 19.0, 25.0, 17.0, 14.0, 11.0, 11.0, 5.0, 2.0, 7.0, 8.0, 4.0, 5.0, 3.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-301.5364074707031, -291.8893737792969, -282.2423400878906, -272.5953063964844, -262.94830322265625, -253.30125427246094, -243.65423583984375, -234.0072021484375, -224.36016845703125, -214.713134765625, -205.06610107421875, -195.41908264160156, -185.7720489501953, -176.12501525878906, -166.47799682617188, -156.83096313476562, -147.18392944335938, -137.53689575195312, -127.8898696899414, -118.24284362792969, -108.59580993652344, -98.94877624511719, -89.30175018310547, -79.65472412109375, -70.0076904296875, -60.360660552978516, -50.71363067626953, -41.06660079956055, -31.419570922851562, -21.772541046142578, -12.125511169433594, -2.478485107421875, 7.168548583984375, 16.81557846069336, 26.462608337402344, 36.10963821411133, 45.75666809082031, 55.4036979675293, 65.05072784423828, 74.69775390625, 84.34478759765625, 93.9918212890625, 103.63884735107422, 113.28587341308594, 122.93290710449219, 132.57994079589844, 142.22695922851562, 151.87399291992188, 161.52102661132812, 171.16806030273438, 180.81509399414062, 190.4621124267578, 200.10914611816406, 209.7561798095703, 219.4031982421875, 229.05023193359375, 238.697265625, 248.34429931640625, 257.9913330078125, 267.63836669921875, 277.285400390625, 286.9324035644531, 296.5794372558594, 306.2264709472656, 315.8735046386719]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 6.0, 4.0, 10.0, 14.0, 11.0, 13.0, 15.0, 18.0, 28.0, 21.0, 29.0, 34.0, 37.0, 32.0, 40.0, 35.0, 43.0, 54.0, 45.0, 52.0, 58.0, 45.0, 51.0, 33.0, 36.0, 42.0, 35.0, 22.0, 16.0, 21.0, 21.0, 11.0, 13.0, 7.0, 13.0, 6.0, 6.0, 5.0, 4.0, 9.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-32.75, -31.835205078125, -30.92041015625, -30.005615234375, -29.0908203125, -28.176025390625, -27.26123046875, -26.346435546875, -25.431640625, -24.516845703125, -23.60205078125, -22.687255859375, -21.7724609375, -20.857666015625, -19.94287109375, -19.028076171875, -18.11328125, -17.198486328125, -16.28369140625, -15.368896484375, -14.4541015625, -13.539306640625, -12.62451171875, -11.709716796875, -10.794921875, -9.880126953125, -8.96533203125, -8.050537109375, -7.1357421875, -6.220947265625, -5.30615234375, -4.391357421875, -3.4765625, -2.561767578125, -1.64697265625, -0.732177734375, 0.1826171875, 1.097412109375, 2.01220703125, 2.927001953125, 3.841796875, 4.756591796875, 5.67138671875, 6.586181640625, 7.5009765625, 8.415771484375, 9.33056640625, 10.245361328125, 11.16015625, 12.074951171875, 12.98974609375, 13.904541015625, 14.8193359375, 15.734130859375, 16.64892578125, 17.563720703125, 18.478515625, 19.393310546875, 20.30810546875, 21.222900390625, 22.1376953125, 23.052490234375, 23.96728515625, 24.882080078125, 25.796875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 8.0, 3.0, 9.0, 14.0, 11.0, 13.0, 15.0, 26.0, 22.0, 43.0, 48.0, 73.0, 129.0, 227.0, 470.0, 1047.0, 2765.0, 9541.0, 415606.0, 3739322.0, 18018.0, 4127.0, 1389.0, 620.0, 270.0, 164.0, 102.0, 46.0, 29.0, 22.0, 20.0, 16.0, 13.0, 8.0, 14.0, 5.0, 8.0, 4.0, 5.0, 8.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-207.375, -201.642578125, -195.91015625, -190.177734375, -184.4453125, -178.712890625, -172.98046875, -167.248046875, -161.515625, -155.783203125, -150.05078125, -144.318359375, -138.5859375, -132.853515625, -127.12109375, -121.388671875, -115.65625, -109.923828125, -104.19140625, -98.458984375, -92.7265625, -86.994140625, -81.26171875, -75.529296875, -69.796875, -64.064453125, -58.33203125, -52.599609375, -46.8671875, -41.134765625, -35.40234375, -29.669921875, -23.9375, -18.205078125, -12.47265625, -6.740234375, -1.0078125, 4.724609375, 10.45703125, 16.189453125, 21.921875, 27.654296875, 33.38671875, 39.119140625, 44.8515625, 50.583984375, 56.31640625, 62.048828125, 67.78125, 73.513671875, 79.24609375, 84.978515625, 90.7109375, 96.443359375, 102.17578125, 107.908203125, 113.640625, 119.373046875, 125.10546875, 130.837890625, 136.5703125, 142.302734375, 148.03515625, 153.767578125, 159.5]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 5.0, 5.0, 11.0, 10.0, 8.0, 9.0, 15.0, 13.0, 14.0, 24.0, 27.0, 74.0, 160.0, 362.0, 817.0, 1247.0, 664.0, 292.0, 112.0, 48.0, 29.0, 19.0, 24.0, 11.0, 16.0, 9.0, 9.0, 2.0, 3.0, 2.0, 8.0, 3.0, 7.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-95.5, -92.873046875, -90.24609375, -87.619140625, -84.9921875, -82.365234375, -79.73828125, -77.111328125, -74.484375, -71.857421875, -69.23046875, -66.603515625, -63.9765625, -61.349609375, -58.72265625, -56.095703125, -53.46875, -50.841796875, -48.21484375, -45.587890625, -42.9609375, -40.333984375, -37.70703125, -35.080078125, -32.453125, -29.826171875, -27.19921875, -24.572265625, -21.9453125, -19.318359375, -16.69140625, -14.064453125, -11.4375, -8.810546875, -6.18359375, -3.556640625, -0.9296875, 1.697265625, 4.32421875, 6.951171875, 9.578125, 12.205078125, 14.83203125, 17.458984375, 20.0859375, 22.712890625, 25.33984375, 27.966796875, 30.59375, 33.220703125, 35.84765625, 38.474609375, 41.1015625, 43.728515625, 46.35546875, 48.982421875, 51.609375, 54.236328125, 56.86328125, 59.490234375, 62.1171875, 64.744140625, 67.37109375, 69.998046875, 72.625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 9.0, 12.0, 13.0, 23.0, 37.0, 60.0, 105.0, 175.0, 302.0, 507.0, 892.0, 1653.0, 3171.0, 6540.0, 14430.0, 42757.0, 739559.0, 3279486.0, 68900.0, 19250.0, 8083.0, 3843.0, 1973.0, 997.0, 628.0, 352.0, 197.0, 126.0, 76.0, 42.0, 30.0, 16.0, 12.0, 6.0, 4.0, 6.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-138.375, -133.458984375, -128.54296875, -123.626953125, -118.7109375, -113.794921875, -108.87890625, -103.962890625, -99.046875, -94.130859375, -89.21484375, -84.298828125, -79.3828125, -74.466796875, -69.55078125, -64.634765625, -59.71875, -54.802734375, -49.88671875, -44.970703125, -40.0546875, -35.138671875, -30.22265625, -25.306640625, -20.390625, -15.474609375, -10.55859375, -5.642578125, -0.7265625, 4.189453125, 9.10546875, 14.021484375, 18.9375, 23.853515625, 28.76953125, 33.685546875, 38.6015625, 43.517578125, 48.43359375, 53.349609375, 58.265625, 63.181640625, 68.09765625, 73.013671875, 77.9296875, 82.845703125, 87.76171875, 92.677734375, 97.59375, 102.509765625, 107.42578125, 112.341796875, 117.2578125, 122.173828125, 127.08984375, 132.005859375, 136.921875, 141.837890625, 146.75390625, 151.669921875, 156.5859375, 161.501953125, 166.41796875, 171.333984375, 176.25]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 7.0, 5.0, 7.0, 6.0, 7.0, 21.0, 14.0, 19.0, 56.0, 78.0, 154.0, 202.0, 179.0, 109.0, 47.0, 32.0, 28.0, 12.0, 8.0, 8.0, 4.0, 2.0, 2.0, 5.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-483.0494384765625, -470.2247009277344, -457.3999328613281, -444.5751953125, -431.7504577636719, -418.92572021484375, -406.1009521484375, -393.2762145996094, -380.45147705078125, -367.6267395019531, -354.8019714355469, -341.97723388671875, -329.1524963378906, -316.3277587890625, -303.50299072265625, -290.6782531738281, -277.8534851074219, -265.02874755859375, -252.20399475097656, -239.37924194335938, -226.55450439453125, -213.72975158691406, -200.90499877929688, -188.08026123046875, -175.25550842285156, -162.43075561523438, -149.60601806640625, -136.78126525878906, -123.9565200805664, -111.13177490234375, -98.30702209472656, -85.4822769165039, -72.65756225585938, -59.83281707763672, -47.0080680847168, -34.183319091796875, -21.35857391357422, -8.533828735351562, 4.290924072265625, 17.11566925048828, 29.940414428710938, 42.765159606933594, 55.589908599853516, 68.41465759277344, 81.2394027709961, 94.06414794921875, 106.88890075683594, 119.7136459350586, 132.53839111328125, 145.36314392089844, 158.18788146972656, 171.01263427734375, 183.83737182617188, 196.66212463378906, 209.48687744140625, 222.31161499023438, 235.13636779785156, 247.96112060546875, 260.7858581542969, 273.610595703125, 286.43536376953125, 299.2601013183594, 312.0848388671875, 324.90960693359375, 337.7343444824219]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 5.0, 1.0, 0.0, 3.0, 3.0, 8.0, 1.0, 9.0, 13.0, 10.0, 12.0, 12.0, 14.0, 18.0, 28.0, 30.0, 27.0, 34.0, 27.0, 29.0, 38.0, 33.0, 41.0, 33.0, 30.0, 40.0, 53.0, 42.0, 31.0, 34.0, 31.0, 24.0, 32.0, 35.0, 29.0, 28.0, 25.0, 28.0, 19.0, 18.0, 11.0, 18.0, 13.0, 8.0, 6.0, 10.0, 2.0, 4.0, 3.0, 2.0, 2.0, 4.0, 2.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-173.11122131347656, -167.31906127929688, -161.52688598632812, -155.73472595214844, -149.94256591796875, -144.150390625, -138.3582305908203, -132.56607055664062, -126.7739028930664, -120.98173522949219, -115.1895751953125, -109.39740753173828, -103.60523986816406, -97.81307983398438, -92.02091217041016, -86.22874450683594, -80.43658447265625, -74.64441680908203, -68.85225677490234, -63.060089111328125, -57.26792526245117, -51.47576141357422, -45.68359375, -39.89142990112305, -34.099266052246094, -28.30710220336914, -22.514936447143555, -16.72277069091797, -10.930606842041016, -5.1384429931640625, 0.6537246704101562, 6.445888519287109, 12.238067626953125, 18.030231475830078, 23.822397232055664, 29.61456298828125, 35.4067268371582, 41.198890686035156, 46.991058349609375, 52.78322219848633, 58.57538604736328, 64.3675537109375, 70.15971374511719, 75.9518814086914, 81.74404907226562, 87.53620910644531, 93.32837677001953, 99.12054443359375, 104.91270446777344, 110.70487213134766, 116.49703216552734, 122.28919982910156, 128.08135986328125, 133.87353515625, 139.6656951904297, 145.45785522460938, 151.25003051757812, 157.0421905517578, 162.83436584472656, 168.62652587890625, 174.41868591308594, 180.21084594726562, 186.00302124023438, 191.79518127441406, 197.58734130859375]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 5.0, 2.0, 12.0, 10.0, 7.0, 12.0, 20.0, 15.0, 14.0, 28.0, 20.0, 32.0, 31.0, 33.0, 30.0, 49.0, 54.0, 43.0, 50.0, 52.0, 57.0, 39.0, 32.0, 39.0, 43.0, 29.0, 33.0, 23.0, 33.0, 29.0, 18.0, 20.0, 12.0, 9.0, 12.0, 5.0, 7.0, 10.0, 7.0, 9.0, 5.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0], "bins": [-32.4375, -31.529052734375, -30.62060546875, -29.712158203125, -28.8037109375, -27.895263671875, -26.98681640625, -26.078369140625, -25.169921875, -24.261474609375, -23.35302734375, -22.444580078125, -21.5361328125, -20.627685546875, -19.71923828125, -18.810791015625, -17.90234375, -16.993896484375, -16.08544921875, -15.177001953125, -14.2685546875, -13.360107421875, -12.45166015625, -11.543212890625, -10.634765625, -9.726318359375, -8.81787109375, -7.909423828125, -7.0009765625, -6.092529296875, -5.18408203125, -4.275634765625, -3.3671875, -2.458740234375, -1.55029296875, -0.641845703125, 0.2666015625, 1.175048828125, 2.08349609375, 2.991943359375, 3.900390625, 4.808837890625, 5.71728515625, 6.625732421875, 7.5341796875, 8.442626953125, 9.35107421875, 10.259521484375, 11.16796875, 12.076416015625, 12.98486328125, 13.893310546875, 14.8017578125, 15.710205078125, 16.61865234375, 17.527099609375, 18.435546875, 19.343994140625, 20.25244140625, 21.160888671875, 22.0693359375, 22.977783203125, 23.88623046875, 24.794677734375, 25.703125]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 0.0, 4.0, 4.0, 3.0, 8.0, 6.0, 17.0, 11.0, 21.0, 37.0, 60.0, 75.0, 87.0, 147.0, 231.0, 326.0, 488.0, 702.0, 1021.0, 1410.0, 2154.0, 3198.0, 4752.0, 7444.0, 11118.0, 17603.0, 27838.0, 45911.0, 81538.0, 162298.0, 291364.0, 173409.0, 85885.0, 47776.0, 29010.0, 18201.0, 11629.0, 7431.0, 4977.0, 3379.0, 2209.0, 1491.0, 1050.0, 667.0, 483.0, 332.0, 222.0, 149.0, 125.0, 80.0, 55.0, 35.0, 34.0, 19.0, 21.0, 10.0, 5.0, 0.0, 1.0, 5.0, 4.0], "bins": [-1.4140625, -1.3719024658203125, -1.329742431640625, -1.2875823974609375, -1.24542236328125, -1.2032623291015625, -1.161102294921875, -1.1189422607421875, -1.0767822265625, -1.0346221923828125, -0.992462158203125, -0.9503021240234375, -0.90814208984375, -0.8659820556640625, -0.823822021484375, -0.7816619873046875, -0.739501953125, -0.6973419189453125, -0.655181884765625, -0.6130218505859375, -0.57086181640625, -0.5287017822265625, -0.486541748046875, -0.4443817138671875, -0.4022216796875, -0.3600616455078125, -0.317901611328125, -0.2757415771484375, -0.23358154296875, -0.1914215087890625, -0.149261474609375, -0.1071014404296875, -0.06494140625, -0.0227813720703125, 0.019378662109375, 0.0615386962890625, 0.10369873046875, 0.1458587646484375, 0.188018798828125, 0.2301788330078125, 0.2723388671875, 0.3144989013671875, 0.356658935546875, 0.3988189697265625, 0.44097900390625, 0.4831390380859375, 0.525299072265625, 0.5674591064453125, 0.609619140625, 0.6517791748046875, 0.693939208984375, 0.7360992431640625, 0.77825927734375, 0.8204193115234375, 0.862579345703125, 0.9047393798828125, 0.9468994140625, 0.9890594482421875, 1.031219482421875, 1.0733795166015625, 1.11553955078125, 1.1576995849609375, 1.199859619140625, 1.2420196533203125, 1.2841796875]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 6.0, 3.0, 7.0, 6.0, 5.0, 18.0, 12.0, 12.0, 13.0, 24.0, 17.0, 27.0, 22.0, 20.0, 33.0, 28.0, 33.0, 28.0, 37.0, 45.0, 30.0, 39.0, 36.0, 1055.0, 30.0, 49.0, 38.0, 35.0, 30.0, 40.0, 27.0, 28.0, 30.0, 24.0, 18.0, 18.0, 20.0, 14.0, 9.0, 11.0, 14.0, 8.0, 9.0, 10.0, 2.0, 4.0, 0.0, 1.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0], "bins": [-18.515625, -17.96337890625, -17.4111328125, -16.85888671875, -16.306640625, -15.75439453125, -15.2021484375, -14.64990234375, -14.09765625, -13.54541015625, -12.9931640625, -12.44091796875, -11.888671875, -11.33642578125, -10.7841796875, -10.23193359375, -9.6796875, -9.12744140625, -8.5751953125, -8.02294921875, -7.470703125, -6.91845703125, -6.3662109375, -5.81396484375, -5.26171875, -4.70947265625, -4.1572265625, -3.60498046875, -3.052734375, -2.50048828125, -1.9482421875, -1.39599609375, -0.84375, -0.29150390625, 0.2607421875, 0.81298828125, 1.365234375, 1.91748046875, 2.4697265625, 3.02197265625, 3.57421875, 4.12646484375, 4.6787109375, 5.23095703125, 5.783203125, 6.33544921875, 6.8876953125, 7.43994140625, 7.9921875, 8.54443359375, 9.0966796875, 9.64892578125, 10.201171875, 10.75341796875, 11.3056640625, 11.85791015625, 12.41015625, 12.96240234375, 13.5146484375, 14.06689453125, 14.619140625, 15.17138671875, 15.7236328125, 16.27587890625, 16.828125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 7.0, 10.0, 21.0, 30.0, 39.0, 50.0, 79.0, 110.0, 162.0, 239.0, 324.0, 534.0, 796.0, 1212.0, 1704.0, 2677.0, 4266.0, 6551.0, 10283.0, 16507.0, 26783.0, 45244.0, 80609.0, 167190.0, 1347251.0, 177579.0, 84948.0, 46759.0, 27901.0, 17206.0, 10764.0, 6790.0, 4331.0, 2763.0, 1806.0, 1222.0, 789.0, 504.0, 344.0, 242.0, 140.0, 118.0, 77.0, 63.0, 36.0, 26.0, 14.0, 9.0, 7.0, 0.0, 6.0, 7.0, 2.0, 3.0], "bins": [-1.5703125, -1.52484130859375, -1.4793701171875, -1.43389892578125, -1.388427734375, -1.34295654296875, -1.2974853515625, -1.25201416015625, -1.20654296875, -1.16107177734375, -1.1156005859375, -1.07012939453125, -1.024658203125, -0.97918701171875, -0.9337158203125, -0.88824462890625, -0.8427734375, -0.79730224609375, -0.7518310546875, -0.70635986328125, -0.660888671875, -0.61541748046875, -0.5699462890625, -0.52447509765625, -0.47900390625, -0.43353271484375, -0.3880615234375, -0.34259033203125, -0.297119140625, -0.25164794921875, -0.2061767578125, -0.16070556640625, -0.115234375, -0.06976318359375, -0.0242919921875, 0.02117919921875, 0.066650390625, 0.11212158203125, 0.1575927734375, 0.20306396484375, 0.24853515625, 0.29400634765625, 0.3394775390625, 0.38494873046875, 0.430419921875, 0.47589111328125, 0.5213623046875, 0.56683349609375, 0.6123046875, 0.65777587890625, 0.7032470703125, 0.74871826171875, 0.794189453125, 0.83966064453125, 0.8851318359375, 0.93060302734375, 0.97607421875, 1.02154541015625, 1.0670166015625, 1.11248779296875, 1.157958984375, 1.20343017578125, 1.2489013671875, 1.29437255859375, 1.33984375]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 6.0, 2.0, 5.0, 10.0, 10.0, 10.0, 10.0, 18.0, 18.0, 28.0, 24.0, 39.0, 40.0, 50.0, 73.0, 81.0, 118.0, 90.0, 77.0, 60.0, 32.0, 49.0, 26.0, 28.0, 12.0, 17.0, 11.0, 16.0, 8.0, 7.0, 4.0, 7.0, 3.0, 4.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0196990966796875, -0.019016027450561523, -0.018332958221435547, -0.01764988899230957, -0.016966819763183594, -0.016283750534057617, -0.01560068130493164, -0.014917612075805664, -0.014234542846679688, -0.013551473617553711, -0.012868404388427734, -0.012185335159301758, -0.011502265930175781, -0.010819196701049805, -0.010136127471923828, -0.009453058242797852, -0.008769989013671875, -0.008086919784545898, -0.007403850555419922, -0.006720781326293945, -0.006037712097167969, -0.005354642868041992, -0.004671573638916016, -0.003988504409790039, -0.0033054351806640625, -0.002622365951538086, -0.0019392967224121094, -0.0012562274932861328, -0.0005731582641601562, 0.00010991096496582031, 0.0007929801940917969, 0.0014760494232177734, 0.00215911865234375, 0.0028421878814697266, 0.003525257110595703, 0.00420832633972168, 0.004891395568847656, 0.005574464797973633, 0.006257534027099609, 0.006940603256225586, 0.0076236724853515625, 0.008306741714477539, 0.008989810943603516, 0.009672880172729492, 0.010355949401855469, 0.011039018630981445, 0.011722087860107422, 0.012405157089233398, 0.013088226318359375, 0.013771295547485352, 0.014454364776611328, 0.015137434005737305, 0.01582050323486328, 0.016503572463989258, 0.017186641693115234, 0.01786971092224121, 0.018552780151367188, 0.019235849380493164, 0.01991891860961914, 0.020601987838745117, 0.021285057067871094, 0.02196812629699707, 0.022651195526123047, 0.023334264755249023, 0.024017333984375]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 3.0, 8.0, 10.0, 5.0, 11.0, 11.0, 23.0, 28.0, 32.0, 41.0, 49.0, 70.0, 91.0, 127.0, 182.0, 340.0, 1235.0, 12500.0, 865461.0, 161229.0, 5449.0, 750.0, 263.0, 161.0, 110.0, 87.0, 66.0, 44.0, 43.0, 25.0, 23.0, 21.0, 11.0, 12.0, 4.0, 10.0, 5.0, 4.0, 2.0, 4.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.352783203125, -0.34265899658203125, -0.3325347900390625, -0.32241058349609375, -0.312286376953125, -0.30216217041015625, -0.2920379638671875, -0.28191375732421875, -0.27178955078125, -0.26166534423828125, -0.2515411376953125, -0.24141693115234375, -0.231292724609375, -0.22116851806640625, -0.2110443115234375, -0.20092010498046875, -0.1907958984375, -0.18067169189453125, -0.1705474853515625, -0.16042327880859375, -0.150299072265625, -0.14017486572265625, -0.1300506591796875, -0.11992645263671875, -0.10980224609375, -0.09967803955078125, -0.0895538330078125, -0.07942962646484375, -0.069305419921875, -0.05918121337890625, -0.0490570068359375, -0.03893280029296875, -0.02880859375, -0.01868438720703125, -0.0085601806640625, 0.00156402587890625, 0.011688232421875, 0.02181243896484375, 0.0319366455078125, 0.04206085205078125, 0.05218505859375, 0.06230926513671875, 0.0724334716796875, 0.08255767822265625, 0.092681884765625, 0.10280609130859375, 0.1129302978515625, 0.12305450439453125, 0.1331787109375, 0.14330291748046875, 0.1534271240234375, 0.16355133056640625, 0.173675537109375, 0.18379974365234375, 0.1939239501953125, 0.20404815673828125, 0.21417236328125, 0.22429656982421875, 0.2344207763671875, 0.24454498291015625, 0.254669189453125, 0.26479339599609375, 0.2749176025390625, 0.28504180908203125, 0.295166015625]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 11.0, 56.0, 537.0, 371.0, 36.0, 1.0, 3.0], "bins": [-0.20111361145973206, -0.19775904715061188, -0.1944044828414917, -0.19104990363121033, -0.18769533932209015, -0.18434077501296997, -0.1809862107038498, -0.17763164639472961, -0.17427708208560944, -0.17092251777648926, -0.16756795346736908, -0.1642133891582489, -0.16085880994796753, -0.15750424563884735, -0.15414968132972717, -0.150795117020607, -0.14744055271148682, -0.14408598840236664, -0.14073142409324646, -0.1373768448829651, -0.1340222805738449, -0.13066771626472473, -0.12731315195560455, -0.12395858764648438, -0.120604008436203, -0.11724944412708282, -0.11389487236738205, -0.11054030805826187, -0.1071857437491417, -0.10383117198944092, -0.10047660768032074, -0.09712204337120056, -0.09376747161149979, -0.09041290730237961, -0.08705833554267883, -0.08370377123355865, -0.08034920692443848, -0.0769946426153183, -0.07364007085561752, -0.07028550654649734, -0.06693094223737717, -0.06357637792825699, -0.06022180989384651, -0.056867241859436035, -0.05351267755031586, -0.05015810951590538, -0.046803541481494904, -0.043448977172374725, -0.04009440541267395, -0.036739837378263474, -0.033385273069143295, -0.03003070503473282, -0.02667613886296749, -0.023321572691202164, -0.019967004656791687, -0.01661243848502636, -0.013257873244583607, -0.009903306141495705, -0.006548739969730377, -0.003194172866642475, 0.00016039330512285233, 0.0035149594768881798, 0.0068695275112986565, 0.010224093683063984, 0.013578659854829311]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 5.0, 4.0, 7.0, 9.0, 13.0, 5.0, 15.0, 24.0, 17.0, 23.0, 28.0, 34.0, 36.0, 47.0, 46.0, 51.0, 50.0, 37.0, 52.0, 42.0, 48.0, 55.0, 31.0, 44.0, 33.0, 28.0, 46.0, 29.0, 26.0, 13.0, 22.0, 15.0, 16.0, 11.0, 11.0, 7.0, 7.0, 10.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.012270510196685791, -0.011865060776472092, -0.011459611356258392, -0.011054161936044693, -0.010648712515830994, -0.010243263095617294, -0.009837813675403595, -0.009432364255189896, -0.009026914834976196, -0.008621465414762497, -0.008216015994548798, -0.007810566574335098, -0.007405117154121399, -0.0069996677339077, -0.006594218313694, -0.006188768893480301, -0.0057833194732666016, -0.005377870053052902, -0.004972420632839203, -0.0045669712126255035, -0.004161521792411804, -0.003756072372198105, -0.0033506229519844055, -0.002945173531770706, -0.002539724111557007, -0.0021342746913433075, -0.0017288252711296082, -0.0013233758509159088, -0.0009179264307022095, -0.0005124770104885101, -0.00010702759027481079, 0.00029842182993888855, 0.0007038712501525879, 0.0011093206703662872, 0.0015147700905799866, 0.001920219510793686, 0.0023256689310073853, 0.0027311183512210846, 0.003136567771434784, 0.0035420171916484833, 0.003947466611862183, 0.004352916032075882, 0.004758365452289581, 0.005163814872503281, 0.00556926429271698, 0.005974713712930679, 0.006380163133144379, 0.006785612553358078, 0.007191061973571777, 0.007596511393785477, 0.008001960813999176, 0.008407410234212875, 0.008812859654426575, 0.009218309074640274, 0.009623758494853973, 0.010029207915067673, 0.010434657335281372, 0.010840106755495071, 0.01124555617570877, 0.01165100559592247, 0.01205645501613617, 0.012461904436349869, 0.012867353856563568, 0.013272803276777267, 0.013678252696990967]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 5.0, 2.0, 12.0, 10.0, 7.0, 12.0, 20.0, 15.0, 14.0, 28.0, 20.0, 32.0, 31.0, 33.0, 30.0, 49.0, 54.0, 44.0, 51.0, 51.0, 56.0, 39.0, 34.0, 37.0, 43.0, 29.0, 33.0, 23.0, 33.0, 30.0, 17.0, 20.0, 12.0, 9.0, 12.0, 5.0, 7.0, 10.0, 7.0, 10.0, 4.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0], "bins": [-32.4375, -31.52880859375, -30.6201171875, -29.71142578125, -28.802734375, -27.89404296875, -26.9853515625, -26.07666015625, -25.16796875, -24.25927734375, -23.3505859375, -22.44189453125, -21.533203125, -20.62451171875, -19.7158203125, -18.80712890625, -17.8984375, -16.98974609375, -16.0810546875, -15.17236328125, -14.263671875, -13.35498046875, -12.4462890625, -11.53759765625, -10.62890625, -9.72021484375, -8.8115234375, -7.90283203125, -6.994140625, -6.08544921875, -5.1767578125, -4.26806640625, -3.359375, -2.45068359375, -1.5419921875, -0.63330078125, 0.275390625, 1.18408203125, 2.0927734375, 3.00146484375, 3.91015625, 4.81884765625, 5.7275390625, 6.63623046875, 7.544921875, 8.45361328125, 9.3623046875, 10.27099609375, 11.1796875, 12.08837890625, 12.9970703125, 13.90576171875, 14.814453125, 15.72314453125, 16.6318359375, 17.54052734375, 18.44921875, 19.35791015625, 20.2666015625, 21.17529296875, 22.083984375, 22.99267578125, 23.9013671875, 24.81005859375, 25.71875]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 1.0, 6.0, 4.0, 7.0, 6.0, 14.0, 22.0, 25.0, 31.0, 47.0, 56.0, 71.0, 101.0, 125.0, 201.0, 281.0, 420.0, 605.0, 807.0, 1193.0, 1891.0, 3117.0, 6968.0, 50173.0, 885533.0, 78888.0, 8439.0, 3295.0, 1981.0, 1320.0, 840.0, 592.0, 423.0, 303.0, 239.0, 136.0, 108.0, 83.0, 54.0, 49.0, 32.0, 20.0, 16.0, 14.0, 7.0, 4.0, 9.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-48.15625, -46.470703125, -44.78515625, -43.099609375, -41.4140625, -39.728515625, -38.04296875, -36.357421875, -34.671875, -32.986328125, -31.30078125, -29.615234375, -27.9296875, -26.244140625, -24.55859375, -22.873046875, -21.1875, -19.501953125, -17.81640625, -16.130859375, -14.4453125, -12.759765625, -11.07421875, -9.388671875, -7.703125, -6.017578125, -4.33203125, -2.646484375, -0.9609375, 0.724609375, 2.41015625, 4.095703125, 5.78125, 7.466796875, 9.15234375, 10.837890625, 12.5234375, 14.208984375, 15.89453125, 17.580078125, 19.265625, 20.951171875, 22.63671875, 24.322265625, 26.0078125, 27.693359375, 29.37890625, 31.064453125, 32.75, 34.435546875, 36.12109375, 37.806640625, 39.4921875, 41.177734375, 42.86328125, 44.548828125, 46.234375, 47.919921875, 49.60546875, 51.291015625, 52.9765625, 54.662109375, 56.34765625, 58.033203125, 59.71875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 6.0, 4.0, 5.0, 9.0, 12.0, 15.0, 15.0, 20.0, 20.0, 29.0, 20.0, 33.0, 32.0, 39.0, 32.0, 29.0, 44.0, 50.0, 60.0, 1988.0, 122.0, 52.0, 47.0, 37.0, 42.0, 34.0, 39.0, 25.0, 34.0, 26.0, 15.0, 23.0, 20.0, 13.0, 12.0, 10.0, 13.0, 8.0, 5.0, 4.0, 4.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-108.75, -105.525390625, -102.30078125, -99.076171875, -95.8515625, -92.626953125, -89.40234375, -86.177734375, -82.953125, -79.728515625, -76.50390625, -73.279296875, -70.0546875, -66.830078125, -63.60546875, -60.380859375, -57.15625, -53.931640625, -50.70703125, -47.482421875, -44.2578125, -41.033203125, -37.80859375, -34.583984375, -31.359375, -28.134765625, -24.91015625, -21.685546875, -18.4609375, -15.236328125, -12.01171875, -8.787109375, -5.5625, -2.337890625, 0.88671875, 4.111328125, 7.3359375, 10.560546875, 13.78515625, 17.009765625, 20.234375, 23.458984375, 26.68359375, 29.908203125, 33.1328125, 36.357421875, 39.58203125, 42.806640625, 46.03125, 49.255859375, 52.48046875, 55.705078125, 58.9296875, 62.154296875, 65.37890625, 68.603515625, 71.828125, 75.052734375, 78.27734375, 81.501953125, 84.7265625, 87.951171875, 91.17578125, 94.400390625, 97.625]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 5.0, 4.0, 6.0, 7.0, 14.0, 14.0, 12.0, 13.0, 23.0, 25.0, 17.0, 35.0, 55.0, 53.0, 88.0, 133.0, 219.0, 385.0, 1084.0, 3748.0, 56607.0, 3071513.0, 8362.0, 1776.0, 672.0, 278.0, 157.0, 99.0, 68.0, 54.0, 32.0, 33.0, 19.0, 20.0, 18.0, 19.0, 9.0, 12.0, 5.0, 2.0, 7.0, 4.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-156.75, -151.56640625, -146.3828125, -141.19921875, -136.015625, -130.83203125, -125.6484375, -120.46484375, -115.28125, -110.09765625, -104.9140625, -99.73046875, -94.546875, -89.36328125, -84.1796875, -78.99609375, -73.8125, -68.62890625, -63.4453125, -58.26171875, -53.078125, -47.89453125, -42.7109375, -37.52734375, -32.34375, -27.16015625, -21.9765625, -16.79296875, -11.609375, -6.42578125, -1.2421875, 3.94140625, 9.125, 14.30859375, 19.4921875, 24.67578125, 29.859375, 35.04296875, 40.2265625, 45.41015625, 50.59375, 55.77734375, 60.9609375, 66.14453125, 71.328125, 76.51171875, 81.6953125, 86.87890625, 92.0625, 97.24609375, 102.4296875, 107.61328125, 112.796875, 117.98046875, 123.1640625, 128.34765625, 133.53125, 138.71484375, 143.8984375, 149.08203125, 154.265625, 159.44921875, 164.6328125, 169.81640625, 175.0]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 8.0, 68.0, 658.0, 271.0, 14.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.49669647216797, -68.96754455566406, -54.43840026855469, -39.90924835205078, -25.38010025024414, -10.8509521484375, 3.6781997680664062, 18.20734405517578, 32.73649597167969, 47.26564407348633, 61.79479217529297, 76.32394409179688, 90.85308837890625, 105.38224029541016, 119.91139221191406, 134.44053649902344, 148.96969604492188, 163.49884033203125, 178.0279998779297, 192.55714416503906, 207.08628845214844, 221.61544799804688, 236.14459228515625, 250.67373657226562, 265.202880859375, 279.7320251464844, 294.26116943359375, 308.79034423828125, 323.3194885253906, 337.8486328125, 352.3777770996094, 366.90692138671875, 381.4360656738281, 395.9652099609375, 410.4943542480469, 425.02349853515625, 439.55267333984375, 454.0818176269531, 468.6109619140625, 483.1401062011719, 497.66925048828125, 512.1984252929688, 526.7275390625, 541.2567138671875, 555.7858276367188, 570.3150024414062, 584.8441162109375, 599.373291015625, 613.9024658203125, 628.431640625, 642.9607543945312, 657.4899291992188, 672.01904296875, 686.5482177734375, 701.077392578125, 715.6065063476562, 730.1356201171875, 744.664794921875, 759.1939086914062, 773.7230834960938, 788.252197265625, 802.7813720703125, 817.310546875, 831.8396606445312, 846.3688354492188]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 3.0, 9.0, 11.0, 11.0, 14.0, 16.0, 19.0, 24.0, 25.0, 24.0, 40.0, 35.0, 42.0, 40.0, 49.0, 55.0, 55.0, 50.0, 60.0, 58.0, 38.0, 34.0, 40.0, 33.0, 32.0, 38.0, 28.0, 13.0, 19.0, 16.0, 7.0, 14.0, 11.0, 8.0, 9.0, 6.0, 4.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-377.87835693359375, -367.0213928222656, -356.1644287109375, -345.3074645996094, -334.45050048828125, -323.5935363769531, -312.736572265625, -301.8796081542969, -291.02264404296875, -280.1656799316406, -269.3087158203125, -258.4517517089844, -247.59478759765625, -236.73782348632812, -225.880859375, -215.02389526367188, -204.16693115234375, -193.30996704101562, -182.4530029296875, -171.59603881835938, -160.73907470703125, -149.88211059570312, -139.025146484375, -128.16818237304688, -117.31121826171875, -106.45425415039062, -95.5972900390625, -84.74032592773438, -73.88336181640625, -63.026397705078125, -52.16943359375, -41.312469482421875, -30.45550537109375, -19.598541259765625, -8.7415771484375, 2.115386962890625, 12.97235107421875, 23.829315185546875, 34.686279296875, 45.543243408203125, 56.40020751953125, 67.25717163085938, 78.1141357421875, 88.97109985351562, 99.82806396484375, 110.68502807617188, 121.5419921875, 132.39895629882812, 143.25592041015625, 154.11288452148438, 164.9698486328125, 175.82681274414062, 186.68377685546875, 197.54074096679688, 208.397705078125, 219.25466918945312, 230.11163330078125, 240.96859741210938, 251.8255615234375, 262.6825256347656, 273.53948974609375, 284.3964538574219, 295.25341796875, 306.1103820800781, 316.96734619140625]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 4.0, 2.0, 4.0, 3.0, 8.0, 9.0, 10.0, 7.0, 6.0, 23.0, 13.0, 21.0, 13.0, 30.0, 37.0, 28.0, 31.0, 31.0, 44.0, 58.0, 36.0, 51.0, 55.0, 52.0, 46.0, 37.0, 34.0, 40.0, 35.0, 33.0, 35.0, 22.0, 29.0, 22.0, 19.0, 7.0, 12.0, 13.0, 6.0, 5.0, 7.0, 12.0, 6.0, 7.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 4.0], "bins": [-33.03125, -32.108642578125, -31.18603515625, -30.263427734375, -29.3408203125, -28.418212890625, -27.49560546875, -26.572998046875, -25.650390625, -24.727783203125, -23.80517578125, -22.882568359375, -21.9599609375, -21.037353515625, -20.11474609375, -19.192138671875, -18.26953125, -17.346923828125, -16.42431640625, -15.501708984375, -14.5791015625, -13.656494140625, -12.73388671875, -11.811279296875, -10.888671875, -9.966064453125, -9.04345703125, -8.120849609375, -7.1982421875, -6.275634765625, -5.35302734375, -4.430419921875, -3.5078125, -2.585205078125, -1.66259765625, -0.739990234375, 0.1826171875, 1.105224609375, 2.02783203125, 2.950439453125, 3.873046875, 4.795654296875, 5.71826171875, 6.640869140625, 7.5634765625, 8.486083984375, 9.40869140625, 10.331298828125, 11.25390625, 12.176513671875, 13.09912109375, 14.021728515625, 14.9443359375, 15.866943359375, 16.78955078125, 17.712158203125, 18.634765625, 19.557373046875, 20.47998046875, 21.402587890625, 22.3251953125, 23.247802734375, 24.17041015625, 25.093017578125, 26.015625]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 5.0, 8.0, 13.0, 12.0, 14.0, 26.0, 31.0, 53.0, 67.0, 70.0, 137.0, 203.0, 305.0, 483.0, 777.0, 1252.0, 2219.0, 4188.0, 8243.0, 20241.0, 444759.0, 3565528.0, 115687.0, 14784.0, 6584.0, 3462.0, 1963.0, 1202.0, 669.0, 455.0, 252.0, 178.0, 146.0, 80.0, 49.0, 41.0, 30.0, 21.0, 4.0, 9.0, 16.0, 4.0, 1.0, 4.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0], "bins": [-104.125, -101.177734375, -98.23046875, -95.283203125, -92.3359375, -89.388671875, -86.44140625, -83.494140625, -80.546875, -77.599609375, -74.65234375, -71.705078125, -68.7578125, -65.810546875, -62.86328125, -59.916015625, -56.96875, -54.021484375, -51.07421875, -48.126953125, -45.1796875, -42.232421875, -39.28515625, -36.337890625, -33.390625, -30.443359375, -27.49609375, -24.548828125, -21.6015625, -18.654296875, -15.70703125, -12.759765625, -9.8125, -6.865234375, -3.91796875, -0.970703125, 1.9765625, 4.923828125, 7.87109375, 10.818359375, 13.765625, 16.712890625, 19.66015625, 22.607421875, 25.5546875, 28.501953125, 31.44921875, 34.396484375, 37.34375, 40.291015625, 43.23828125, 46.185546875, 49.1328125, 52.080078125, 55.02734375, 57.974609375, 60.921875, 63.869140625, 66.81640625, 69.763671875, 72.7109375, 75.658203125, 78.60546875, 81.552734375, 84.5]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 0.0, 2.0, 1.0, 5.0, 2.0, 4.0, 5.0, 5.0, 8.0, 10.0, 10.0, 12.0, 20.0, 20.0, 36.0, 60.0, 139.0, 469.0, 1161.0, 1250.0, 496.0, 156.0, 60.0, 33.0, 31.0, 22.0, 7.0, 12.0, 8.0, 7.0, 5.0, 4.0, 3.0, 3.0, 1.0, 2.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.5625, -81.1865234375, -77.810546875, -74.4345703125, -71.05859375, -67.6826171875, -64.306640625, -60.9306640625, -57.5546875, -54.1787109375, -50.802734375, -47.4267578125, -44.05078125, -40.6748046875, -37.298828125, -33.9228515625, -30.546875, -27.1708984375, -23.794921875, -20.4189453125, -17.04296875, -13.6669921875, -10.291015625, -6.9150390625, -3.5390625, -0.1630859375, 3.212890625, 6.5888671875, 9.96484375, 13.3408203125, 16.716796875, 20.0927734375, 23.46875, 26.8447265625, 30.220703125, 33.5966796875, 36.97265625, 40.3486328125, 43.724609375, 47.1005859375, 50.4765625, 53.8525390625, 57.228515625, 60.6044921875, 63.98046875, 67.3564453125, 70.732421875, 74.1083984375, 77.484375, 80.8603515625, 84.236328125, 87.6123046875, 90.98828125, 94.3642578125, 97.740234375, 101.1162109375, 104.4921875, 107.8681640625, 111.244140625, 114.6201171875, 117.99609375, 121.3720703125, 124.748046875, 128.1240234375, 131.5]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 4.0, 3.0, 14.0, 14.0, 18.0, 27.0, 55.0, 59.0, 99.0, 153.0, 216.0, 364.0, 587.0, 995.0, 1495.0, 2614.0, 4201.0, 7870.0, 16151.0, 43033.0, 384658.0, 3558917.0, 117523.0, 27985.0, 11979.0, 6090.0, 3507.0, 2126.0, 1332.0, 809.0, 528.0, 295.0, 202.0, 112.0, 88.0, 46.0, 35.0, 34.0, 19.0, 5.0, 8.0, 3.0, 8.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-147.25, -142.9580078125, -138.666015625, -134.3740234375, -130.08203125, -125.7900390625, -121.498046875, -117.2060546875, -112.9140625, -108.6220703125, -104.330078125, -100.0380859375, -95.74609375, -91.4541015625, -87.162109375, -82.8701171875, -78.578125, -74.2861328125, -69.994140625, -65.7021484375, -61.41015625, -57.1181640625, -52.826171875, -48.5341796875, -44.2421875, -39.9501953125, -35.658203125, -31.3662109375, -27.07421875, -22.7822265625, -18.490234375, -14.1982421875, -9.90625, -5.6142578125, -1.322265625, 2.9697265625, 7.26171875, 11.5537109375, 15.845703125, 20.1376953125, 24.4296875, 28.7216796875, 33.013671875, 37.3056640625, 41.59765625, 45.8896484375, 50.181640625, 54.4736328125, 58.765625, 63.0576171875, 67.349609375, 71.6416015625, 75.93359375, 80.2255859375, 84.517578125, 88.8095703125, 93.1015625, 97.3935546875, 101.685546875, 105.9775390625, 110.26953125, 114.5615234375, 118.853515625, 123.1455078125, 127.4375]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 8.0, 10.0, 25.0, 44.0, 84.0, 199.0, 336.0, 156.0, 58.0, 35.0, 24.0, 16.0, 7.0, 5.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-383.6897277832031, -361.6182556152344, -339.5467834472656, -317.4753112792969, -295.4038391113281, -273.3323669433594, -251.2609100341797, -229.18943786621094, -207.1179656982422, -185.04649353027344, -162.9750213623047, -140.903564453125, -118.83208465576172, -96.76061248779297, -74.68914794921875, -52.61767578125, -30.54620361328125, -8.474733352661133, 13.596736907958984, 35.66820526123047, 57.73967742919922, 79.81114959716797, 101.88261413574219, 123.95408630371094, 146.0255584716797, 168.09703063964844, 190.1685028076172, 212.23995971679688, 234.31143188476562, 256.3829040527344, 278.4543762207031, 300.5258483886719, 322.59735107421875, 344.6688232421875, 366.74029541015625, 388.811767578125, 410.88323974609375, 432.9547119140625, 455.02618408203125, 477.09765625, 499.16912841796875, 521.2406005859375, 543.3120727539062, 565.383544921875, 587.4550170898438, 609.5264892578125, 631.5979614257812, 653.66943359375, 675.7408447265625, 697.8123168945312, 719.8837890625, 741.9552612304688, 764.0267333984375, 786.0982055664062, 808.169677734375, 830.2411499023438, 852.3126220703125, 874.3840942382812, 896.45556640625, 918.5270385742188, 940.5985107421875, 962.6699829101562, 984.741455078125, 1006.8129272460938, 1028.8843994140625]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 7.0, 3.0, 6.0, 3.0, 12.0, 4.0, 12.0, 16.0, 19.0, 21.0, 28.0, 25.0, 26.0, 30.0, 42.0, 27.0, 42.0, 29.0, 50.0, 40.0, 40.0, 45.0, 48.0, 31.0, 40.0, 52.0, 39.0, 35.0, 37.0, 32.0, 31.0, 28.0, 22.0, 21.0, 14.0, 8.0, 3.0, 10.0, 8.0, 3.0, 4.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-228.30801391601562, -221.19290161132812, -214.07778930664062, -206.96266174316406, -199.84754943847656, -192.73243713378906, -185.6173095703125, -178.502197265625, -171.3870849609375, -164.27197265625, -157.1568603515625, -150.04173278808594, -142.92662048339844, -135.81150817871094, -128.69638061523438, -121.58126831054688, -114.46615600585938, -107.35104370117188, -100.23592376708984, -93.12080383300781, -86.00569152832031, -78.89057922363281, -71.77545928955078, -64.66033935546875, -57.54522705078125, -50.430110931396484, -43.31499481201172, -36.19987869262695, -29.084762573242188, -21.969646453857422, -14.854530334472656, -7.739414215087891, -0.624298095703125, 6.490818023681641, 13.605934143066406, 20.721050262451172, 27.836166381835938, 34.9512825012207, 42.06639862060547, 49.181514739990234, 56.296630859375, 63.411746978759766, 70.52686309814453, 77.64198303222656, 84.75709533691406, 91.87220764160156, 98.9873275756836, 106.10244750976562, 113.21755981445312, 120.33267211914062, 127.44779205322266, 134.5629119873047, 141.6780242919922, 148.7931365966797, 155.90826416015625, 163.02337646484375, 170.13848876953125, 177.25360107421875, 184.36871337890625, 191.4838409423828, 198.5989532470703, 205.7140655517578, 212.82919311523438, 219.94430541992188, 227.05941772460938]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 2.0, 8.0, 5.0, 3.0, 11.0, 16.0, 13.0, 21.0, 15.0, 20.0, 32.0, 27.0, 32.0, 32.0, 33.0, 35.0, 34.0, 46.0, 56.0, 62.0, 57.0, 46.0, 40.0, 34.0, 50.0, 37.0, 34.0, 24.0, 28.0, 25.0, 18.0, 15.0, 18.0, 14.0, 13.0, 10.0, 13.0, 9.0, 4.0, 5.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-34.09375, -33.090087890625, -32.08642578125, -31.082763671875, -30.0791015625, -29.075439453125, -28.07177734375, -27.068115234375, -26.064453125, -25.060791015625, -24.05712890625, -23.053466796875, -22.0498046875, -21.046142578125, -20.04248046875, -19.038818359375, -18.03515625, -17.031494140625, -16.02783203125, -15.024169921875, -14.0205078125, -13.016845703125, -12.01318359375, -11.009521484375, -10.005859375, -9.002197265625, -7.99853515625, -6.994873046875, -5.9912109375, -4.987548828125, -3.98388671875, -2.980224609375, -1.9765625, -0.972900390625, 0.03076171875, 1.034423828125, 2.0380859375, 3.041748046875, 4.04541015625, 5.049072265625, 6.052734375, 7.056396484375, 8.06005859375, 9.063720703125, 10.0673828125, 11.071044921875, 12.07470703125, 13.078369140625, 14.08203125, 15.085693359375, 16.08935546875, 17.093017578125, 18.0966796875, 19.100341796875, 20.10400390625, 21.107666015625, 22.111328125, 23.114990234375, 24.11865234375, 25.122314453125, 26.1259765625, 27.129638671875, 28.13330078125, 29.136962890625, 30.140625]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 6.0, 12.0, 12.0, 16.0, 21.0, 39.0, 60.0, 84.0, 100.0, 169.0, 244.0, 348.0, 546.0, 771.0, 1151.0, 1739.0, 2551.0, 3923.0, 5957.0, 8948.0, 14097.0, 22026.0, 35705.0, 59938.0, 110165.0, 232521.0, 255165.0, 121710.0, 65163.0, 38251.0, 23773.0, 15106.0, 9488.0, 6252.0, 4066.0, 2843.0, 1836.0, 1215.0, 806.0, 539.0, 362.0, 274.0, 180.0, 120.0, 75.0, 63.0, 36.0, 29.0, 13.0, 19.0, 9.0, 8.0, 7.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.5078125, -1.4608001708984375, -1.413787841796875, -1.3667755126953125, -1.31976318359375, -1.2727508544921875, -1.225738525390625, -1.1787261962890625, -1.1317138671875, -1.0847015380859375, -1.037689208984375, -0.9906768798828125, -0.94366455078125, -0.8966522216796875, -0.849639892578125, -0.8026275634765625, -0.755615234375, -0.7086029052734375, -0.661590576171875, -0.6145782470703125, -0.56756591796875, -0.5205535888671875, -0.473541259765625, -0.4265289306640625, -0.3795166015625, -0.3325042724609375, -0.285491943359375, -0.2384796142578125, -0.19146728515625, -0.1444549560546875, -0.097442626953125, -0.0504302978515625, -0.00341796875, 0.0435943603515625, 0.090606689453125, 0.1376190185546875, 0.18463134765625, 0.2316436767578125, 0.278656005859375, 0.3256683349609375, 0.3726806640625, 0.4196929931640625, 0.466705322265625, 0.5137176513671875, 0.56072998046875, 0.6077423095703125, 0.654754638671875, 0.7017669677734375, 0.748779296875, 0.7957916259765625, 0.842803955078125, 0.8898162841796875, 0.93682861328125, 0.9838409423828125, 1.030853271484375, 1.0778656005859375, 1.1248779296875, 1.1718902587890625, 1.218902587890625, 1.2659149169921875, 1.31292724609375, 1.3599395751953125, 1.406951904296875, 1.4539642333984375, 1.5009765625]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 2.0, 5.0, 12.0, 7.0, 7.0, 12.0, 14.0, 16.0, 24.0, 22.0, 19.0, 21.0, 24.0, 23.0, 32.0, 36.0, 36.0, 41.0, 46.0, 40.0, 51.0, 36.0, 1063.0, 35.0, 39.0, 33.0, 42.0, 39.0, 30.0, 25.0, 34.0, 35.0, 23.0, 14.0, 14.0, 19.0, 10.0, 11.0, 11.0, 1.0, 5.0, 10.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-18.328125, -17.739501953125, -17.15087890625, -16.562255859375, -15.9736328125, -15.385009765625, -14.79638671875, -14.207763671875, -13.619140625, -13.030517578125, -12.44189453125, -11.853271484375, -11.2646484375, -10.676025390625, -10.08740234375, -9.498779296875, -8.91015625, -8.321533203125, -7.73291015625, -7.144287109375, -6.5556640625, -5.967041015625, -5.37841796875, -4.789794921875, -4.201171875, -3.612548828125, -3.02392578125, -2.435302734375, -1.8466796875, -1.258056640625, -0.66943359375, -0.080810546875, 0.5078125, 1.096435546875, 1.68505859375, 2.273681640625, 2.8623046875, 3.450927734375, 4.03955078125, 4.628173828125, 5.216796875, 5.805419921875, 6.39404296875, 6.982666015625, 7.5712890625, 8.159912109375, 8.74853515625, 9.337158203125, 9.92578125, 10.514404296875, 11.10302734375, 11.691650390625, 12.2802734375, 12.868896484375, 13.45751953125, 14.046142578125, 14.634765625, 15.223388671875, 15.81201171875, 16.400634765625, 16.9892578125, 17.577880859375, 18.16650390625, 18.755126953125, 19.34375]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 4.0, 3.0, 0.0, 2.0, 5.0, 10.0, 11.0, 24.0, 27.0, 31.0, 52.0, 75.0, 106.0, 174.0, 214.0, 378.0, 552.0, 788.0, 1191.0, 1876.0, 3086.0, 4757.0, 7592.0, 13081.0, 21586.0, 38175.0, 69695.0, 143680.0, 1339581.0, 227624.0, 99069.0, 51115.0, 28669.0, 16959.0, 10163.0, 6216.0, 3868.0, 2416.0, 1527.0, 937.0, 621.0, 419.0, 283.0, 178.0, 103.0, 64.0, 47.0, 39.0, 23.0, 18.0, 12.0, 8.0, 2.0, 4.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.458984375, -1.409942626953125, -1.36090087890625, -1.311859130859375, -1.2628173828125, -1.213775634765625, -1.16473388671875, -1.115692138671875, -1.066650390625, -1.017608642578125, -0.96856689453125, -0.919525146484375, -0.8704833984375, -0.821441650390625, -0.77239990234375, -0.723358154296875, -0.67431640625, -0.625274658203125, -0.57623291015625, -0.527191162109375, -0.4781494140625, -0.429107666015625, -0.38006591796875, -0.331024169921875, -0.281982421875, -0.232940673828125, -0.18389892578125, -0.134857177734375, -0.0858154296875, -0.036773681640625, 0.01226806640625, 0.061309814453125, 0.1103515625, 0.159393310546875, 0.20843505859375, 0.257476806640625, 0.3065185546875, 0.355560302734375, 0.40460205078125, 0.453643798828125, 0.502685546875, 0.551727294921875, 0.60076904296875, 0.649810791015625, 0.6988525390625, 0.747894287109375, 0.79693603515625, 0.845977783203125, 0.89501953125, 0.944061279296875, 0.99310302734375, 1.042144775390625, 1.0911865234375, 1.140228271484375, 1.18927001953125, 1.238311767578125, 1.287353515625, 1.336395263671875, 1.38543701171875, 1.434478759765625, 1.4835205078125, 1.532562255859375, 1.58160400390625, 1.630645751953125, 1.6796875]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 3.0, 1.0, 4.0, 4.0, 8.0, 8.0, 7.0, 8.0, 16.0, 26.0, 27.0, 25.0, 29.0, 25.0, 44.0, 69.0, 91.0, 108.0, 87.0, 89.0, 69.0, 46.0, 40.0, 31.0, 26.0, 31.0, 21.0, 20.0, 15.0, 13.0, 5.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0246124267578125, -0.023933887481689453, -0.023255348205566406, -0.02257680892944336, -0.021898269653320312, -0.021219730377197266, -0.02054119110107422, -0.019862651824951172, -0.019184112548828125, -0.018505573272705078, -0.01782703399658203, -0.017148494720458984, -0.016469955444335938, -0.01579141616821289, -0.015112876892089844, -0.014434337615966797, -0.01375579833984375, -0.013077259063720703, -0.012398719787597656, -0.01172018051147461, -0.011041641235351562, -0.010363101959228516, -0.009684562683105469, -0.009006023406982422, -0.008327484130859375, -0.007648944854736328, -0.006970405578613281, -0.006291866302490234, -0.0056133270263671875, -0.004934787750244141, -0.004256248474121094, -0.003577709197998047, -0.002899169921875, -0.002220630645751953, -0.0015420913696289062, -0.0008635520935058594, -0.0001850128173828125, 0.0004935264587402344, 0.0011720657348632812, 0.0018506050109863281, 0.002529144287109375, 0.003207683563232422, 0.0038862228393554688, 0.004564762115478516, 0.0052433013916015625, 0.005921840667724609, 0.006600379943847656, 0.007278919219970703, 0.00795745849609375, 0.008635997772216797, 0.009314537048339844, 0.00999307632446289, 0.010671615600585938, 0.011350154876708984, 0.012028694152832031, 0.012707233428955078, 0.013385772705078125, 0.014064311981201172, 0.014742851257324219, 0.015421390533447266, 0.016099929809570312, 0.01677846908569336, 0.017457008361816406, 0.018135547637939453, 0.0188140869140625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 7.0, 11.0, 14.0, 17.0, 34.0, 46.0, 48.0, 78.0, 130.0, 176.0, 273.0, 850.0, 11052.0, 890122.0, 140562.0, 3874.0, 549.0, 231.0, 142.0, 92.0, 80.0, 55.0, 33.0, 22.0, 10.0, 10.0, 10.0, 8.0, 3.0, 5.0, 5.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.302490234375, -0.29161834716796875, -0.2807464599609375, -0.26987457275390625, -0.259002685546875, -0.24813079833984375, -0.2372589111328125, -0.22638702392578125, -0.21551513671875, -0.20464324951171875, -0.1937713623046875, -0.18289947509765625, -0.172027587890625, -0.16115570068359375, -0.1502838134765625, -0.13941192626953125, -0.1285400390625, -0.11766815185546875, -0.1067962646484375, -0.09592437744140625, -0.085052490234375, -0.07418060302734375, -0.0633087158203125, -0.05243682861328125, -0.04156494140625, -0.03069305419921875, -0.0198211669921875, -0.00894927978515625, 0.001922607421875, 0.01279449462890625, 0.0236663818359375, 0.03453826904296875, 0.04541015625, 0.05628204345703125, 0.0671539306640625, 0.07802581787109375, 0.088897705078125, 0.09976959228515625, 0.1106414794921875, 0.12151336669921875, 0.13238525390625, 0.14325714111328125, 0.1541290283203125, 0.16500091552734375, 0.175872802734375, 0.18674468994140625, 0.1976165771484375, 0.20848846435546875, 0.2193603515625, 0.23023223876953125, 0.2411041259765625, 0.25197601318359375, 0.262847900390625, 0.27371978759765625, 0.2845916748046875, 0.29546356201171875, 0.30633544921875, 0.31720733642578125, 0.3280792236328125, 0.33895111083984375, 0.349822998046875, 0.36069488525390625, 0.3715667724609375, 0.38243865966796875, 0.393310546875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 17.0, 16.0, 29.0, 65.0, 239.0, 414.0, 143.0, 51.0, 17.0, 10.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06522466242313385, -0.06387220323085785, -0.06251974403858185, -0.06116728484630585, -0.059814825654029846, -0.058462366461753845, -0.057109907269477844, -0.05575744807720184, -0.05440498888492584, -0.05305252969264984, -0.05170007050037384, -0.05034761130809784, -0.04899515211582184, -0.04764269292354584, -0.046290233731269836, -0.044937774538993835, -0.043585315346717834, -0.042232856154441833, -0.04088039696216583, -0.03952793776988983, -0.03817547857761383, -0.03682301938533783, -0.03547056019306183, -0.03411810100078583, -0.032765645533800125, -0.031413186341524124, -0.030060727149248123, -0.028708267956972122, -0.02735580876469612, -0.02600334957242012, -0.02465089038014412, -0.02329843118786812, -0.021945973858237267, -0.020593514665961266, -0.019241055473685265, -0.017888596281409264, -0.016536137089133263, -0.015183677896857262, -0.013831219635903835, -0.012478760443627834, -0.011126301251351833, -0.009773842059075832, -0.008421382866799831, -0.007068924140185118, -0.005716464947909117, -0.004364005755633116, -0.003011547029018402, -0.0016590878367424011, -0.00030662864446640015, 0.001045830431394279, 0.002398289507254958, 0.0037507484667003155, 0.0051032076589763165, 0.006455666851252317, 0.007808125577867031, 0.009160584770143032, 0.010513043962419033, 0.011865503154695034, 0.013217962346971035, 0.014570420607924461, 0.015922879800200462, 0.017275338992476463, 0.018627798184752464, 0.019980257377028465, 0.021332716569304466]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 5.0, 5.0, 7.0, 7.0, 8.0, 6.0, 6.0, 15.0, 14.0, 25.0, 20.0, 34.0, 26.0, 27.0, 28.0, 33.0, 37.0, 41.0, 39.0, 53.0, 48.0, 42.0, 53.0, 53.0, 42.0, 50.0, 37.0, 32.0, 44.0, 30.0, 29.0, 23.0, 14.0, 12.0, 6.0, 20.0, 9.0, 10.0, 8.0, 6.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.012850642204284668, -0.012486969120800495, -0.012123296037316322, -0.01175962295383215, -0.011395949870347977, -0.011032276786863804, -0.010668603703379631, -0.010304930619895458, -0.009941257536411285, -0.009577584452927113, -0.00921391136944294, -0.008850238285958767, -0.008486565202474594, -0.008122892118990421, -0.0077592190355062485, -0.007395545952022076, -0.007031872868537903, -0.00666819978505373, -0.006304526701569557, -0.005940853618085384, -0.0055771805346012115, -0.005213507451117039, -0.004849834367632866, -0.004486161284148693, -0.00412248820066452, -0.0037588151171803474, -0.0033951420336961746, -0.003031468950212002, -0.002667795866727829, -0.002304122783243656, -0.0019404496997594833, -0.0015767766162753105, -0.0012131035327911377, -0.0008494304493069649, -0.00048575736582279205, -0.00012208428233861923, 0.0002415888011455536, 0.0006052618846297264, 0.0009689349681138992, 0.001332608051598072, 0.0016962811350822449, 0.0020599542185664177, 0.0024236273020505905, 0.0027873003855347633, 0.003150973469018936, 0.003514646552503109, 0.003878319635987282, 0.004241992719471455, 0.0046056658029556274, 0.0049693388864398, 0.005333011969923973, 0.005696685053408146, 0.006060358136892319, 0.0064240312203764915, 0.006787704303860664, 0.007151377387344837, 0.00751505047082901, 0.007878723554313183, 0.008242396637797356, 0.008606069721281528, 0.008969742804765701, 0.009333415888249874, 0.009697088971734047, 0.01006076205521822, 0.010424435138702393]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 2.0, 8.0, 5.0, 3.0, 11.0, 16.0, 13.0, 21.0, 15.0, 20.0, 32.0, 27.0, 32.0, 32.0, 33.0, 35.0, 34.0, 46.0, 56.0, 62.0, 57.0, 46.0, 40.0, 34.0, 50.0, 37.0, 34.0, 24.0, 28.0, 25.0, 18.0, 15.0, 18.0, 14.0, 13.0, 10.0, 13.0, 9.0, 4.0, 5.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-34.09375, -33.090087890625, -32.08642578125, -31.082763671875, -30.0791015625, -29.075439453125, -28.07177734375, -27.068115234375, -26.064453125, -25.060791015625, -24.05712890625, -23.053466796875, -22.0498046875, -21.046142578125, -20.04248046875, -19.038818359375, -18.03515625, -17.031494140625, -16.02783203125, -15.024169921875, -14.0205078125, -13.016845703125, -12.01318359375, -11.009521484375, -10.005859375, -9.002197265625, -7.99853515625, -6.994873046875, -5.9912109375, -4.987548828125, -3.98388671875, -2.980224609375, -1.9765625, -0.972900390625, 0.03076171875, 1.034423828125, 2.0380859375, 3.041748046875, 4.04541015625, 5.049072265625, 6.052734375, 7.056396484375, 8.06005859375, 9.063720703125, 10.0673828125, 11.071044921875, 12.07470703125, 13.078369140625, 14.08203125, 15.085693359375, 16.08935546875, 17.093017578125, 18.0966796875, 19.100341796875, 20.10400390625, 21.107666015625, 22.111328125, 23.114990234375, 24.11865234375, 25.122314453125, 26.1259765625, 27.129638671875, 28.13330078125, 29.136962890625, 30.140625]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 3.0, 8.0, 3.0, 6.0, 16.0, 16.0, 22.0, 29.0, 29.0, 56.0, 92.0, 155.0, 268.0, 545.0, 1139.0, 2746.0, 7328.0, 24102.0, 95942.0, 572587.0, 267737.0, 52652.0, 14613.0, 4809.0, 1871.0, 777.0, 409.0, 206.0, 120.0, 87.0, 39.0, 36.0, 23.0, 18.0, 17.0, 9.0, 18.0, 7.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-21.1875, -20.560546875, -19.93359375, -19.306640625, -18.6796875, -18.052734375, -17.42578125, -16.798828125, -16.171875, -15.544921875, -14.91796875, -14.291015625, -13.6640625, -13.037109375, -12.41015625, -11.783203125, -11.15625, -10.529296875, -9.90234375, -9.275390625, -8.6484375, -8.021484375, -7.39453125, -6.767578125, -6.140625, -5.513671875, -4.88671875, -4.259765625, -3.6328125, -3.005859375, -2.37890625, -1.751953125, -1.125, -0.498046875, 0.12890625, 0.755859375, 1.3828125, 2.009765625, 2.63671875, 3.263671875, 3.890625, 4.517578125, 5.14453125, 5.771484375, 6.3984375, 7.025390625, 7.65234375, 8.279296875, 8.90625, 9.533203125, 10.16015625, 10.787109375, 11.4140625, 12.041015625, 12.66796875, 13.294921875, 13.921875, 14.548828125, 15.17578125, 15.802734375, 16.4296875, 17.056640625, 17.68359375, 18.310546875, 18.9375]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 6.0, 2.0, 5.0, 2.0, 2.0, 3.0, 6.0, 8.0, 11.0, 12.0, 7.0, 16.0, 12.0, 18.0, 16.0, 26.0, 28.0, 24.0, 34.0, 24.0, 30.0, 29.0, 33.0, 37.0, 40.0, 214.0, 1891.0, 54.0, 54.0, 38.0, 41.0, 29.0, 27.0, 36.0, 35.0, 31.0, 23.0, 38.0, 20.0, 19.0, 9.0, 11.0, 14.0, 7.0, 9.0, 5.0, 6.0, 4.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-84.25, -81.4541015625, -78.658203125, -75.8623046875, -73.06640625, -70.2705078125, -67.474609375, -64.6787109375, -61.8828125, -59.0869140625, -56.291015625, -53.4951171875, -50.69921875, -47.9033203125, -45.107421875, -42.3115234375, -39.515625, -36.7197265625, -33.923828125, -31.1279296875, -28.33203125, -25.5361328125, -22.740234375, -19.9443359375, -17.1484375, -14.3525390625, -11.556640625, -8.7607421875, -5.96484375, -3.1689453125, -0.373046875, 2.4228515625, 5.21875, 8.0146484375, 10.810546875, 13.6064453125, 16.40234375, 19.1982421875, 21.994140625, 24.7900390625, 27.5859375, 30.3818359375, 33.177734375, 35.9736328125, 38.76953125, 41.5654296875, 44.361328125, 47.1572265625, 49.953125, 52.7490234375, 55.544921875, 58.3408203125, 61.13671875, 63.9326171875, 66.728515625, 69.5244140625, 72.3203125, 75.1162109375, 77.912109375, 80.7080078125, 83.50390625, 86.2998046875, 89.095703125, 91.8916015625, 94.6875]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 3.0, 5.0, 7.0, 4.0, 10.0, 6.0, 12.0, 13.0, 10.0, 23.0, 23.0, 41.0, 34.0, 50.0, 56.0, 75.0, 92.0, 146.0, 223.0, 331.0, 646.0, 2144.0, 3009687.0, 129205.0, 1350.0, 546.0, 278.0, 168.0, 116.0, 80.0, 65.0, 45.0, 42.0, 33.0, 25.0, 19.0, 14.0, 14.0, 10.0, 12.0, 9.0, 8.0, 5.0, 3.0, 3.0, 2.0, 4.0, 4.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-157.75, -153.091796875, -148.43359375, -143.775390625, -139.1171875, -134.458984375, -129.80078125, -125.142578125, -120.484375, -115.826171875, -111.16796875, -106.509765625, -101.8515625, -97.193359375, -92.53515625, -87.876953125, -83.21875, -78.560546875, -73.90234375, -69.244140625, -64.5859375, -59.927734375, -55.26953125, -50.611328125, -45.953125, -41.294921875, -36.63671875, -31.978515625, -27.3203125, -22.662109375, -18.00390625, -13.345703125, -8.6875, -4.029296875, 0.62890625, 5.287109375, 9.9453125, 14.603515625, 19.26171875, 23.919921875, 28.578125, 33.236328125, 37.89453125, 42.552734375, 47.2109375, 51.869140625, 56.52734375, 61.185546875, 65.84375, 70.501953125, 75.16015625, 79.818359375, 84.4765625, 89.134765625, 93.79296875, 98.451171875, 103.109375, 107.767578125, 112.42578125, 117.083984375, 121.7421875, 126.400390625, 131.05859375, 135.716796875, 140.375]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 21.0, 893.0, 105.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-85.62777709960938, -67.1789779663086, -48.73017501831055, -30.2813720703125, -11.832572937011719, 6.6162261962890625, 25.065032958984375, 43.513824462890625, 61.96263122558594, 80.41143035888672, 98.8602294921875, 117.30903625488281, 135.75784301757812, 154.20663452148438, 172.6554412841797, 191.10423278808594, 209.55303955078125, 228.00184631347656, 246.4506378173828, 264.8994445800781, 283.3482360839844, 301.79705810546875, 320.245849609375, 338.69464111328125, 357.1434326171875, 375.59222412109375, 394.0410461425781, 412.4898376464844, 430.9386291503906, 449.387451171875, 467.83624267578125, 486.2850341796875, 504.73382568359375, 523.1826171875, 541.6314086914062, 560.0802001953125, 578.529052734375, 596.9778442382812, 615.4266357421875, 633.8754272460938, 652.32421875, 670.7730102539062, 689.2218017578125, 707.670654296875, 726.1194458007812, 744.5682373046875, 763.0170288085938, 781.4658203125, 799.9146728515625, 818.3634643554688, 836.812255859375, 855.2611083984375, 873.7098999023438, 892.15869140625, 910.6074829101562, 929.0562744140625, 947.5050659179688, 965.953857421875, 984.4026489257812, 1002.8514404296875, 1021.30029296875, 1039.7490234375, 1058.1978759765625, 1076.646728515625, 1095.095458984375]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 9.0, 3.0, 9.0, 15.0, 10.0, 15.0, 11.0, 15.0, 17.0, 16.0, 31.0, 25.0, 29.0, 36.0, 28.0, 35.0, 45.0, 38.0, 42.0, 33.0, 45.0, 43.0, 40.0, 35.0, 34.0, 36.0, 39.0, 43.0, 22.0, 28.0, 34.0, 25.0, 17.0, 18.0, 21.0, 10.0, 9.0, 5.0, 9.0, 8.0, 7.0, 3.0, 3.0, 2.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-301.77392578125, -292.3157653808594, -282.8575744628906, -273.3994140625, -263.9412536621094, -254.4830780029297, -245.02490234375, -235.56674194335938, -226.1085662841797, -216.650390625, -207.19223022460938, -197.7340545654297, -188.27587890625, -178.81771850585938, -169.3595428466797, -159.9013671875, -150.44320678710938, -140.9850311279297, -131.52687072753906, -122.06869506835938, -112.61052703857422, -103.15235900878906, -93.69418334960938, -84.23601531982422, -74.77784729003906, -65.3196792602539, -55.861507415771484, -46.40333557128906, -36.945167541503906, -27.48699951171875, -18.028827667236328, -8.570655822753906, 0.887542724609375, 10.345712661743164, 19.803882598876953, 29.262052536010742, 38.72022247314453, 48.17839050292969, 57.63656234741211, 67.09473419189453, 76.55290222167969, 86.01107025146484, 95.46923828125, 104.92741394042969, 114.38558197021484, 123.84375, 133.3019256591797, 142.76010131835938, 152.21826171875, 161.6764373779297, 171.1345977783203, 180.5927734375, 190.05093383789062, 199.5091094970703, 208.96728515625, 218.42544555664062, 227.8836212158203, 237.341796875, 246.79995727539062, 256.25811767578125, 265.71630859375, 275.1744689941406, 284.63262939453125, 294.0908203125, 303.5489807128906]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 2.0, 7.0, 4.0, 4.0, 10.0, 9.0, 16.0, 17.0, 22.0, 15.0, 32.0, 29.0, 32.0, 28.0, 39.0, 36.0, 34.0, 35.0, 58.0, 62.0, 56.0, 48.0, 40.0, 39.0, 50.0, 31.0, 38.0, 27.0, 24.0, 29.0, 19.0, 14.0, 17.0, 18.0, 12.0, 11.0, 10.0, 12.0, 4.0, 7.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-34.625, -33.613525390625, -32.60205078125, -31.590576171875, -30.5791015625, -29.567626953125, -28.55615234375, -27.544677734375, -26.533203125, -25.521728515625, -24.51025390625, -23.498779296875, -22.4873046875, -21.475830078125, -20.46435546875, -19.452880859375, -18.44140625, -17.429931640625, -16.41845703125, -15.406982421875, -14.3955078125, -13.384033203125, -12.37255859375, -11.361083984375, -10.349609375, -9.338134765625, -8.32666015625, -7.315185546875, -6.3037109375, -5.292236328125, -4.28076171875, -3.269287109375, -2.2578125, -1.246337890625, -0.23486328125, 0.776611328125, 1.7880859375, 2.799560546875, 3.81103515625, 4.822509765625, 5.833984375, 6.845458984375, 7.85693359375, 8.868408203125, 9.8798828125, 10.891357421875, 11.90283203125, 12.914306640625, 13.92578125, 14.937255859375, 15.94873046875, 16.960205078125, 17.9716796875, 18.983154296875, 19.99462890625, 21.006103515625, 22.017578125, 23.029052734375, 24.04052734375, 25.052001953125, 26.0634765625, 27.074951171875, 28.08642578125, 29.097900390625, 30.109375]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 5.0, 3.0, 7.0, 5.0, 14.0, 11.0, 18.0, 23.0, 33.0, 63.0, 59.0, 119.0, 170.0, 286.0, 481.0, 783.0, 1394.0, 2667.0, 5515.0, 13404.0, 163783.0, 3775296.0, 203582.0, 14589.0, 5650.0, 2877.0, 1398.0, 811.0, 432.0, 303.0, 184.0, 107.0, 71.0, 29.0, 34.0, 22.0, 16.0, 19.0, 9.0, 4.0, 9.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-129.0, -125.263671875, -121.52734375, -117.791015625, -114.0546875, -110.318359375, -106.58203125, -102.845703125, -99.109375, -95.373046875, -91.63671875, -87.900390625, -84.1640625, -80.427734375, -76.69140625, -72.955078125, -69.21875, -65.482421875, -61.74609375, -58.009765625, -54.2734375, -50.537109375, -46.80078125, -43.064453125, -39.328125, -35.591796875, -31.85546875, -28.119140625, -24.3828125, -20.646484375, -16.91015625, -13.173828125, -9.4375, -5.701171875, -1.96484375, 1.771484375, 5.5078125, 9.244140625, 12.98046875, 16.716796875, 20.453125, 24.189453125, 27.92578125, 31.662109375, 35.3984375, 39.134765625, 42.87109375, 46.607421875, 50.34375, 54.080078125, 57.81640625, 61.552734375, 65.2890625, 69.025390625, 72.76171875, 76.498046875, 80.234375, 83.970703125, 87.70703125, 91.443359375, 95.1796875, 98.916015625, 102.65234375, 106.388671875, 110.125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 7.0, 7.0, 9.0, 7.0, 12.0, 13.0, 24.0, 12.0, 28.0, 43.0, 80.0, 190.0, 529.0, 1261.0, 1069.0, 430.0, 152.0, 60.0, 26.0, 22.0, 22.0, 9.0, 14.0, 12.0, 9.0, 4.0, 6.0, 6.0, 7.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-127.375, -124.0986328125, -120.822265625, -117.5458984375, -114.26953125, -110.9931640625, -107.716796875, -104.4404296875, -101.1640625, -97.8876953125, -94.611328125, -91.3349609375, -88.05859375, -84.7822265625, -81.505859375, -78.2294921875, -74.953125, -71.6767578125, -68.400390625, -65.1240234375, -61.84765625, -58.5712890625, -55.294921875, -52.0185546875, -48.7421875, -45.4658203125, -42.189453125, -38.9130859375, -35.63671875, -32.3603515625, -29.083984375, -25.8076171875, -22.53125, -19.2548828125, -15.978515625, -12.7021484375, -9.42578125, -6.1494140625, -2.873046875, 0.4033203125, 3.6796875, 6.9560546875, 10.232421875, 13.5087890625, 16.78515625, 20.0615234375, 23.337890625, 26.6142578125, 29.890625, 33.1669921875, 36.443359375, 39.7197265625, 42.99609375, 46.2724609375, 49.548828125, 52.8251953125, 56.1015625, 59.3779296875, 62.654296875, 65.9306640625, 69.20703125, 72.4833984375, 75.759765625, 79.0361328125, 82.3125]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 7.0, 7.0, 9.0, 9.0, 15.0, 26.0, 31.0, 53.0, 63.0, 101.0, 126.0, 246.0, 362.0, 631.0, 991.0, 1844.0, 3380.0, 6460.0, 13184.0, 31685.0, 134571.0, 3564825.0, 356389.0, 44570.0, 17137.0, 8149.0, 4103.0, 2234.0, 1177.0, 684.0, 413.0, 237.0, 175.0, 117.0, 74.0, 58.0, 38.0, 27.0, 25.0, 16.0, 12.0, 6.0, 4.0, 6.0, 5.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-120.9375, -116.7177734375, -112.498046875, -108.2783203125, -104.05859375, -99.8388671875, -95.619140625, -91.3994140625, -87.1796875, -82.9599609375, -78.740234375, -74.5205078125, -70.30078125, -66.0810546875, -61.861328125, -57.6416015625, -53.421875, -49.2021484375, -44.982421875, -40.7626953125, -36.54296875, -32.3232421875, -28.103515625, -23.8837890625, -19.6640625, -15.4443359375, -11.224609375, -7.0048828125, -2.78515625, 1.4345703125, 5.654296875, 9.8740234375, 14.09375, 18.3134765625, 22.533203125, 26.7529296875, 30.97265625, 35.1923828125, 39.412109375, 43.6318359375, 47.8515625, 52.0712890625, 56.291015625, 60.5107421875, 64.73046875, 68.9501953125, 73.169921875, 77.3896484375, 81.609375, 85.8291015625, 90.048828125, 94.2685546875, 98.48828125, 102.7080078125, 106.927734375, 111.1474609375, 115.3671875, 119.5869140625, 123.806640625, 128.0263671875, 132.24609375, 136.4658203125, 140.685546875, 144.9052734375, 149.125]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 4.0, 6.0, 5.0, 13.0, 15.0, 20.0, 35.0, 40.0, 57.0, 95.0, 175.0, 192.0, 126.0, 78.0, 52.0, 36.0, 19.0, 16.0, 8.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-570.85302734375, -557.64697265625, -544.4409790039062, -531.2349243164062, -518.0289306640625, -504.8229064941406, -491.61688232421875, -478.41082763671875, -465.204833984375, -451.9988098144531, -438.79278564453125, -425.5867614746094, -412.3807373046875, -399.1747131347656, -385.96868896484375, -372.76263427734375, -359.5566101074219, -346.3505859375, -333.1445617675781, -319.93853759765625, -306.7325134277344, -293.5264892578125, -280.3204345703125, -267.11444091796875, -253.9084014892578, -240.70237731933594, -227.49635314941406, -214.29031372070312, -201.08428955078125, -187.87826538085938, -174.6722412109375, -161.46621704101562, -148.26019287109375, -135.05416870117188, -121.84814453125, -108.6421127319336, -95.43608856201172, -82.23006439208984, -69.02403259277344, -55.81800842285156, -42.61198425292969, -29.40595817565918, -16.199932098388672, -2.9939041137695312, 10.212120056152344, 23.41814422607422, 36.624176025390625, 49.8302001953125, 63.036224365234375, 76.24224853515625, 89.44827270507812, 102.65430450439453, 115.8603286743164, 129.06634521484375, 142.2723846435547, 155.47840881347656, 168.68443298339844, 181.8904571533203, 195.0964813232422, 208.30252075195312, 221.508544921875, 234.71456909179688, 247.92059326171875, 261.1266174316406, 274.3326416015625]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [2.0, 4.0, 0.0, 3.0, 2.0, 1.0, 2.0, 4.0, 3.0, 5.0, 6.0, 8.0, 14.0, 18.0, 12.0, 13.0, 21.0, 24.0, 17.0, 20.0, 31.0, 35.0, 30.0, 30.0, 31.0, 35.0, 48.0, 51.0, 39.0, 36.0, 49.0, 44.0, 30.0, 31.0, 31.0, 32.0, 38.0, 36.0, 16.0, 28.0, 23.0, 22.0, 21.0, 10.0, 14.0, 8.0, 6.0, 9.0, 6.0, 3.0, 5.0, 1.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-190.68612670898438, -184.14071655273438, -177.59530639648438, -171.04989624023438, -164.50448608398438, -157.95907592773438, -151.41366577148438, -144.86825561523438, -138.32284545898438, -131.77743530273438, -125.23202514648438, -118.68661499023438, -112.14120483398438, -105.59579467773438, -99.05038452148438, -92.50497436523438, -85.9595718383789, -79.4141616821289, -72.8687515258789, -66.3233413696289, -59.777931213378906, -53.23252487182617, -46.68711471557617, -40.14170455932617, -33.59629440307617, -27.050884246826172, -20.505474090576172, -13.960065841674805, -7.414655685424805, -0.8692474365234375, 5.6761627197265625, 12.221572875976562, 18.766983032226562, 25.312393188476562, 31.857803344726562, 38.40321350097656, 44.94862365722656, 51.4940299987793, 58.0394401550293, 64.58485412597656, 71.13026428222656, 77.67567443847656, 84.22108459472656, 90.76649475097656, 97.31190490722656, 103.85731506347656, 110.40272521972656, 116.94813537597656, 123.49353790283203, 130.0389404296875, 136.5843505859375, 143.1297607421875, 149.6751708984375, 156.2205810546875, 162.7659912109375, 169.3114013671875, 175.8568115234375, 182.4022216796875, 188.9476318359375, 195.4930419921875, 202.0384521484375, 208.5838623046875, 215.1292724609375, 221.6746826171875, 228.2200927734375]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 6.0, 3.0, 3.0, 10.0, 13.0, 9.0, 16.0, 11.0, 16.0, 27.0, 27.0, 33.0, 18.0, 34.0, 32.0, 40.0, 34.0, 45.0, 37.0, 46.0, 55.0, 46.0, 46.0, 48.0, 39.0, 43.0, 33.0, 40.0, 26.0, 18.0, 24.0, 24.0, 16.0, 12.0, 12.0, 15.0, 9.0, 5.0, 12.0, 3.0, 5.0, 4.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-32.0, -30.9912109375, -29.982421875, -28.9736328125, -27.96484375, -26.9560546875, -25.947265625, -24.9384765625, -23.9296875, -22.9208984375, -21.912109375, -20.9033203125, -19.89453125, -18.8857421875, -17.876953125, -16.8681640625, -15.859375, -14.8505859375, -13.841796875, -12.8330078125, -11.82421875, -10.8154296875, -9.806640625, -8.7978515625, -7.7890625, -6.7802734375, -5.771484375, -4.7626953125, -3.75390625, -2.7451171875, -1.736328125, -0.7275390625, 0.28125, 1.2900390625, 2.298828125, 3.3076171875, 4.31640625, 5.3251953125, 6.333984375, 7.3427734375, 8.3515625, 9.3603515625, 10.369140625, 11.3779296875, 12.38671875, 13.3955078125, 14.404296875, 15.4130859375, 16.421875, 17.4306640625, 18.439453125, 19.4482421875, 20.45703125, 21.4658203125, 22.474609375, 23.4833984375, 24.4921875, 25.5009765625, 26.509765625, 27.5185546875, 28.52734375, 29.5361328125, 30.544921875, 31.5537109375, 32.5625]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 4.0, 8.0, 7.0, 11.0, 11.0, 31.0, 31.0, 46.0, 70.0, 122.0, 164.0, 222.0, 321.0, 461.0, 689.0, 1078.0, 1582.0, 2536.0, 3955.0, 6385.0, 10522.0, 18144.0, 32091.0, 58406.0, 116898.0, 273176.0, 270483.0, 115483.0, 58047.0, 31687.0, 17923.0, 10386.0, 6368.0, 3931.0, 2493.0, 1643.0, 1020.0, 678.0, 452.0, 319.0, 209.0, 130.0, 89.0, 75.0, 53.0, 54.0, 25.0, 13.0, 15.0, 9.0, 3.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.841796875, -1.7842254638671875, -1.726654052734375, -1.6690826416015625, -1.61151123046875, -1.5539398193359375, -1.496368408203125, -1.4387969970703125, -1.3812255859375, -1.3236541748046875, -1.266082763671875, -1.2085113525390625, -1.15093994140625, -1.0933685302734375, -1.035797119140625, -0.9782257080078125, -0.920654296875, -0.8630828857421875, -0.805511474609375, -0.7479400634765625, -0.69036865234375, -0.6327972412109375, -0.575225830078125, -0.5176544189453125, -0.4600830078125, -0.4025115966796875, -0.344940185546875, -0.2873687744140625, -0.22979736328125, -0.1722259521484375, -0.114654541015625, -0.0570831298828125, 0.00048828125, 0.0580596923828125, 0.115631103515625, 0.1732025146484375, 0.23077392578125, 0.2883453369140625, 0.345916748046875, 0.4034881591796875, 0.4610595703125, 0.5186309814453125, 0.576202392578125, 0.6337738037109375, 0.69134521484375, 0.7489166259765625, 0.806488037109375, 0.8640594482421875, 0.921630859375, 0.9792022705078125, 1.036773681640625, 1.0943450927734375, 1.15191650390625, 1.2094879150390625, 1.267059326171875, 1.3246307373046875, 1.3822021484375, 1.4397735595703125, 1.497344970703125, 1.5549163818359375, 1.61248779296875, 1.6700592041015625, 1.727630615234375, 1.7852020263671875, 1.8427734375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 7.0, 0.0, 10.0, 9.0, 18.0, 16.0, 12.0, 18.0, 22.0, 17.0, 32.0, 28.0, 34.0, 42.0, 46.0, 30.0, 44.0, 45.0, 44.0, 1059.0, 39.0, 41.0, 40.0, 40.0, 31.0, 31.0, 33.0, 32.0, 31.0, 19.0, 26.0, 20.0, 18.0, 20.0, 17.0, 13.0, 10.0, 5.0, 6.0, 12.0, 4.0, 2.0, 1.0, 3.0, 3.0, 2.0], "bins": [-24.40625, -23.761962890625, -23.11767578125, -22.473388671875, -21.8291015625, -21.184814453125, -20.54052734375, -19.896240234375, -19.251953125, -18.607666015625, -17.96337890625, -17.319091796875, -16.6748046875, -16.030517578125, -15.38623046875, -14.741943359375, -14.09765625, -13.453369140625, -12.80908203125, -12.164794921875, -11.5205078125, -10.876220703125, -10.23193359375, -9.587646484375, -8.943359375, -8.299072265625, -7.65478515625, -7.010498046875, -6.3662109375, -5.721923828125, -5.07763671875, -4.433349609375, -3.7890625, -3.144775390625, -2.50048828125, -1.856201171875, -1.2119140625, -0.567626953125, 0.07666015625, 0.720947265625, 1.365234375, 2.009521484375, 2.65380859375, 3.298095703125, 3.9423828125, 4.586669921875, 5.23095703125, 5.875244140625, 6.51953125, 7.163818359375, 7.80810546875, 8.452392578125, 9.0966796875, 9.740966796875, 10.38525390625, 11.029541015625, 11.673828125, 12.318115234375, 12.96240234375, 13.606689453125, 14.2509765625, 14.895263671875, 15.53955078125, 16.183837890625, 16.828125]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 7.0, 6.0, 5.0, 11.0, 13.0, 17.0, 37.0, 51.0, 59.0, 112.0, 181.0, 228.0, 367.0, 579.0, 884.0, 1488.0, 2256.0, 3649.0, 6053.0, 9925.0, 16739.0, 28540.0, 50233.0, 94451.0, 220907.0, 1356829.0, 141103.0, 69067.0, 38035.0, 21808.0, 12814.0, 7849.0, 4830.0, 2866.0, 1820.0, 1180.0, 728.0, 449.0, 331.0, 199.0, 131.0, 110.0, 53.0, 53.0, 33.0, 18.0, 5.0, 10.0, 8.0, 4.0, 3.0, 4.0], "bins": [-1.9892578125, -1.9358673095703125, -1.882476806640625, -1.8290863037109375, -1.77569580078125, -1.7223052978515625, -1.668914794921875, -1.6155242919921875, -1.5621337890625, -1.5087432861328125, -1.455352783203125, -1.4019622802734375, -1.34857177734375, -1.2951812744140625, -1.241790771484375, -1.1884002685546875, -1.135009765625, -1.0816192626953125, -1.028228759765625, -0.9748382568359375, -0.92144775390625, -0.8680572509765625, -0.814666748046875, -0.7612762451171875, -0.7078857421875, -0.6544952392578125, -0.601104736328125, -0.5477142333984375, -0.49432373046875, -0.4409332275390625, -0.387542724609375, -0.3341522216796875, -0.28076171875, -0.2273712158203125, -0.173980712890625, -0.1205902099609375, -0.06719970703125, -0.0138092041015625, 0.039581298828125, 0.0929718017578125, 0.1463623046875, 0.1997528076171875, 0.253143310546875, 0.3065338134765625, 0.35992431640625, 0.4133148193359375, 0.466705322265625, 0.5200958251953125, 0.573486328125, 0.6268768310546875, 0.680267333984375, 0.7336578369140625, 0.78704833984375, 0.8404388427734375, 0.893829345703125, 0.9472198486328125, 1.0006103515625, 1.0540008544921875, 1.107391357421875, 1.1607818603515625, 1.21417236328125, 1.2675628662109375, 1.320953369140625, 1.3743438720703125, 1.427734375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 5.0, 1.0, 6.0, 8.0, 9.0, 7.0, 10.0, 18.0, 31.0, 20.0, 45.0, 45.0, 76.0, 93.0, 107.0, 128.0, 106.0, 89.0, 51.0, 23.0, 24.0, 24.0, 20.0, 11.0, 8.0, 9.0, 8.0, 5.0, 6.0, 5.0, 4.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0251617431640625, -0.02435159683227539, -0.02354145050048828, -0.022731304168701172, -0.021921157836914062, -0.021111011505126953, -0.020300865173339844, -0.019490718841552734, -0.018680572509765625, -0.017870426177978516, -0.017060279846191406, -0.016250133514404297, -0.015439987182617188, -0.014629840850830078, -0.013819694519042969, -0.01300954818725586, -0.01219940185546875, -0.01138925552368164, -0.010579109191894531, -0.009768962860107422, -0.008958816528320312, -0.008148670196533203, -0.007338523864746094, -0.006528377532958984, -0.005718231201171875, -0.004908084869384766, -0.004097938537597656, -0.003287792205810547, -0.0024776458740234375, -0.0016674995422363281, -0.0008573532104492188, -4.7206878662109375e-05, 0.000762939453125, 0.0015730857849121094, 0.0023832321166992188, 0.003193378448486328, 0.0040035247802734375, 0.004813671112060547, 0.005623817443847656, 0.006433963775634766, 0.007244110107421875, 0.008054256439208984, 0.008864402770996094, 0.009674549102783203, 0.010484695434570312, 0.011294841766357422, 0.012104988098144531, 0.01291513442993164, 0.01372528076171875, 0.01453542709350586, 0.015345573425292969, 0.016155719757080078, 0.016965866088867188, 0.017776012420654297, 0.018586158752441406, 0.019396305084228516, 0.020206451416015625, 0.021016597747802734, 0.021826744079589844, 0.022636890411376953, 0.023447036743164062, 0.024257183074951172, 0.02506732940673828, 0.02587747573852539, 0.0266876220703125]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 7.0, 8.0, 6.0, 6.0, 12.0, 8.0, 27.0, 26.0, 58.0, 57.0, 84.0, 163.0, 462.0, 2433.0, 52710.0, 979013.0, 11665.0, 1114.0, 282.0, 133.0, 86.0, 53.0, 50.0, 26.0, 9.0, 17.0, 10.0, 10.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.42578125, -0.412994384765625, -0.40020751953125, -0.387420654296875, -0.3746337890625, -0.361846923828125, -0.34906005859375, -0.336273193359375, -0.323486328125, -0.310699462890625, -0.29791259765625, -0.285125732421875, -0.2723388671875, -0.259552001953125, -0.24676513671875, -0.233978271484375, -0.22119140625, -0.208404541015625, -0.19561767578125, -0.182830810546875, -0.1700439453125, -0.157257080078125, -0.14447021484375, -0.131683349609375, -0.118896484375, -0.106109619140625, -0.09332275390625, -0.080535888671875, -0.0677490234375, -0.054962158203125, -0.04217529296875, -0.029388427734375, -0.0166015625, -0.003814697265625, 0.00897216796875, 0.021759033203125, 0.0345458984375, 0.047332763671875, 0.06011962890625, 0.072906494140625, 0.085693359375, 0.098480224609375, 0.11126708984375, 0.124053955078125, 0.1368408203125, 0.149627685546875, 0.16241455078125, 0.175201416015625, 0.18798828125, 0.200775146484375, 0.21356201171875, 0.226348876953125, 0.2391357421875, 0.251922607421875, 0.26470947265625, 0.277496337890625, 0.290283203125, 0.303070068359375, 0.31585693359375, 0.328643798828125, 0.3414306640625, 0.354217529296875, 0.36700439453125, 0.379791259765625, 0.392578125]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 7.0, 5.0, 13.0, 33.0, 67.0, 231.0, 453.0, 115.0, 54.0, 21.0, 12.0, 4.0, 0.0, 2.0], "bins": [-0.07912231981754303, -0.07771774381399155, -0.07631316781044006, -0.07490859180688858, -0.0735040158033371, -0.07209943979978561, -0.07069486379623413, -0.06929028779268265, -0.06788571178913116, -0.06648113578557968, -0.0650765597820282, -0.06367198377847672, -0.06226740777492523, -0.06086283177137375, -0.059458255767822266, -0.05805367976427078, -0.0566491037607193, -0.055244527757167816, -0.05383995175361633, -0.05243537575006485, -0.05103079974651337, -0.049626223742961884, -0.0482216477394104, -0.04681707173585892, -0.045412495732307434, -0.04400791972875595, -0.04260334372520447, -0.041198767721652985, -0.0397941917181015, -0.03838961571455002, -0.036985039710998535, -0.03558046370744705, -0.03417588397860527, -0.03277130797505379, -0.031366731971502304, -0.02996215596795082, -0.028557579964399338, -0.027153003960847855, -0.02574842795729637, -0.02434385195374489, -0.022939275950193405, -0.021534699946641922, -0.02013012394309044, -0.018725547939538956, -0.017320971935987473, -0.01591639593243599, -0.014511819928884506, -0.013107243925333023, -0.01170266792178154, -0.010298091918230057, -0.008893515914678574, -0.0074889399111270905, -0.006084363907575607, -0.004679787904024124, -0.003275211900472641, -0.0018706358969211578, -0.0004660598933696747, 0.0009385161101818085, 0.0023430921137332916, 0.0037476681172847748, 0.005152244120836258, 0.006556820124387741, 0.007961396127939224, 0.009365972131490707, 0.01077054813504219]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 1.0, 6.0, 3.0, 5.0, 3.0, 12.0, 14.0, 13.0, 19.0, 26.0, 25.0, 27.0, 14.0, 28.0, 46.0, 37.0, 34.0, 40.0, 49.0, 38.0, 52.0, 35.0, 48.0, 41.0, 39.0, 31.0, 34.0, 25.0, 36.0, 24.0, 32.0, 32.0, 22.0, 14.0, 15.0, 15.0, 9.0, 7.0, 10.0, 9.0, 7.0, 11.0, 4.0, 4.0, 7.0, 4.0, 1.0, 1.0, 1.0], "bins": [-0.0127488374710083, -0.012401076965034008, -0.012053316459059715, -0.011705555953085423, -0.01135779544711113, -0.011010034941136837, -0.010662274435162544, -0.010314513929188251, -0.009966753423213959, -0.009618992917239666, -0.009271232411265373, -0.00892347190529108, -0.008575711399316788, -0.008227950893342495, -0.007880190387368202, -0.0075324298813939095, -0.007184669375419617, -0.006836908869445324, -0.006489148363471031, -0.0061413878574967384, -0.005793627351522446, -0.005445866845548153, -0.00509810633957386, -0.004750345833599567, -0.004402585327625275, -0.004054824821650982, -0.003707064315676689, -0.0033593038097023964, -0.0030115433037281036, -0.002663782797753811, -0.002316022291779518, -0.0019682617858052254, -0.0016205012798309326, -0.0012727407738566399, -0.0009249802678823471, -0.0005772197619080544, -0.0002294592559337616, 0.00011830125004053116, 0.0004660617560148239, 0.0008138222619891167, 0.0011615827679634094, 0.0015093432739377022, 0.001857103779911995, 0.0022048642858862877, 0.0025526247918605804, 0.002900385297834873, 0.003248145803809166, 0.0035959063097834587, 0.0039436668157577515, 0.004291427321732044, 0.004639187827706337, 0.00498694833368063, 0.0053347088396549225, 0.005682469345629215, 0.006030229851603508, 0.006377990357577801, 0.0067257508635520935, 0.007073511369526386, 0.007421271875500679, 0.007769032381474972, 0.008116792887449265, 0.008464553393423557, 0.00881231389939785, 0.009160074405372143, 0.009507834911346436]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 6.0, 4.0, 2.0, 10.0, 13.0, 9.0, 16.0, 11.0, 16.0, 27.0, 27.0, 33.0, 18.0, 34.0, 32.0, 40.0, 34.0, 45.0, 37.0, 46.0, 55.0, 46.0, 46.0, 48.0, 39.0, 43.0, 33.0, 40.0, 26.0, 18.0, 24.0, 24.0, 16.0, 12.0, 12.0, 15.0, 9.0, 5.0, 12.0, 3.0, 5.0, 4.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-32.0, -30.9912109375, -29.982421875, -28.9736328125, -27.96484375, -26.9560546875, -25.947265625, -24.9384765625, -23.9296875, -22.9208984375, -21.912109375, -20.9033203125, -19.89453125, -18.8857421875, -17.876953125, -16.8681640625, -15.859375, -14.8505859375, -13.841796875, -12.8330078125, -11.82421875, -10.8154296875, -9.806640625, -8.7978515625, -7.7890625, -6.7802734375, -5.771484375, -4.7626953125, -3.75390625, -2.7451171875, -1.736328125, -0.7275390625, 0.28125, 1.2900390625, 2.298828125, 3.3076171875, 4.31640625, 5.3251953125, 6.333984375, 7.3427734375, 8.3515625, 9.3603515625, 10.369140625, 11.3779296875, 12.38671875, 13.3955078125, 14.404296875, 15.4130859375, 16.421875, 17.4306640625, 18.439453125, 19.4482421875, 20.45703125, 21.4658203125, 22.474609375, 23.4833984375, 24.4921875, 25.5009765625, 26.509765625, 27.5185546875, 28.52734375, 29.5361328125, 30.544921875, 31.5537109375, 32.5625]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 2.0, 5.0, 5.0, 6.0, 4.0, 8.0, 9.0, 17.0, 20.0, 27.0, 30.0, 47.0, 51.0, 62.0, 113.0, 120.0, 152.0, 201.0, 284.0, 396.0, 469.0, 630.0, 933.0, 1343.0, 2186.0, 3760.0, 8127.0, 25861.0, 181847.0, 715203.0, 76104.0, 15505.0, 5751.0, 3058.0, 1811.0, 1206.0, 777.0, 637.0, 426.0, 339.0, 233.0, 187.0, 140.0, 114.0, 74.0, 62.0, 56.0, 33.0, 35.0, 20.0, 19.0, 16.0, 10.0, 11.0, 7.0, 5.0, 5.0, 3.0, 3.0, 3.0, 1.0], "bins": [-38.96875, -37.7626953125, -36.556640625, -35.3505859375, -34.14453125, -32.9384765625, -31.732421875, -30.5263671875, -29.3203125, -28.1142578125, -26.908203125, -25.7021484375, -24.49609375, -23.2900390625, -22.083984375, -20.8779296875, -19.671875, -18.4658203125, -17.259765625, -16.0537109375, -14.84765625, -13.6416015625, -12.435546875, -11.2294921875, -10.0234375, -8.8173828125, -7.611328125, -6.4052734375, -5.19921875, -3.9931640625, -2.787109375, -1.5810546875, -0.375, 0.8310546875, 2.037109375, 3.2431640625, 4.44921875, 5.6552734375, 6.861328125, 8.0673828125, 9.2734375, 10.4794921875, 11.685546875, 12.8916015625, 14.09765625, 15.3037109375, 16.509765625, 17.7158203125, 18.921875, 20.1279296875, 21.333984375, 22.5400390625, 23.74609375, 24.9521484375, 26.158203125, 27.3642578125, 28.5703125, 29.7763671875, 30.982421875, 32.1884765625, 33.39453125, 34.6005859375, 35.806640625, 37.0126953125, 38.21875]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 6.0, 1.0, 2.0, 7.0, 5.0, 6.0, 9.0, 9.0, 9.0, 17.0, 20.0, 14.0, 16.0, 27.0, 23.0, 25.0, 29.0, 39.0, 32.0, 48.0, 52.0, 53.0, 70.0, 386.0, 1639.0, 71.0, 58.0, 42.0, 37.0, 49.0, 34.0, 39.0, 19.0, 22.0, 30.0, 22.0, 14.0, 15.0, 8.0, 9.0, 4.0, 8.0, 5.0, 5.0, 4.0, 5.0, 5.0, 2.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.9375, -81.912109375, -78.88671875, -75.861328125, -72.8359375, -69.810546875, -66.78515625, -63.759765625, -60.734375, -57.708984375, -54.68359375, -51.658203125, -48.6328125, -45.607421875, -42.58203125, -39.556640625, -36.53125, -33.505859375, -30.48046875, -27.455078125, -24.4296875, -21.404296875, -18.37890625, -15.353515625, -12.328125, -9.302734375, -6.27734375, -3.251953125, -0.2265625, 2.798828125, 5.82421875, 8.849609375, 11.875, 14.900390625, 17.92578125, 20.951171875, 23.9765625, 27.001953125, 30.02734375, 33.052734375, 36.078125, 39.103515625, 42.12890625, 45.154296875, 48.1796875, 51.205078125, 54.23046875, 57.255859375, 60.28125, 63.306640625, 66.33203125, 69.357421875, 72.3828125, 75.408203125, 78.43359375, 81.458984375, 84.484375, 87.509765625, 90.53515625, 93.560546875, 96.5859375, 99.611328125, 102.63671875, 105.662109375, 108.6875]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 5.0, 4.0, 5.0, 6.0, 6.0, 9.0, 9.0, 11.0, 14.0, 17.0, 21.0, 39.0, 57.0, 62.0, 89.0, 169.0, 332.0, 829.0, 3311.0, 32280.0, 3078066.0, 25917.0, 2834.0, 723.0, 358.0, 165.0, 98.0, 67.0, 55.0, 33.0, 28.0, 28.0, 13.0, 10.0, 7.0, 11.0, 6.0, 2.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-226.75, -219.54296875, -212.3359375, -205.12890625, -197.921875, -190.71484375, -183.5078125, -176.30078125, -169.09375, -161.88671875, -154.6796875, -147.47265625, -140.265625, -133.05859375, -125.8515625, -118.64453125, -111.4375, -104.23046875, -97.0234375, -89.81640625, -82.609375, -75.40234375, -68.1953125, -60.98828125, -53.78125, -46.57421875, -39.3671875, -32.16015625, -24.953125, -17.74609375, -10.5390625, -3.33203125, 3.875, 11.08203125, 18.2890625, 25.49609375, 32.703125, 39.91015625, 47.1171875, 54.32421875, 61.53125, 68.73828125, 75.9453125, 83.15234375, 90.359375, 97.56640625, 104.7734375, 111.98046875, 119.1875, 126.39453125, 133.6015625, 140.80859375, 148.015625, 155.22265625, 162.4296875, 169.63671875, 176.84375, 184.05078125, 191.2578125, 198.46484375, 205.671875, 212.87890625, 220.0859375, 227.29296875, 234.5]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 5.0, 8.0, 6.0, 21.0, 48.0, 89.0, 162.0, 227.0, 170.0, 120.0, 65.0, 42.0, 29.0, 7.0, 6.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-287.5108337402344, -281.60833740234375, -275.70587158203125, -269.8033752441406, -263.9009094238281, -257.9984130859375, -252.09593200683594, -246.19345092773438, -240.29095458984375, -234.3884735107422, -228.48599243164062, -222.58349609375, -216.68101501464844, -210.77853393554688, -204.8760528564453, -198.97357177734375, -193.0710906982422, -187.16860961914062, -181.26612854003906, -175.3636474609375, -169.46115112304688, -163.5586700439453, -157.65618896484375, -151.7537078857422, -145.85122680664062, -139.94874572753906, -134.0462646484375, -128.14376831054688, -122.24128723144531, -116.33880615234375, -110.43632507324219, -104.53384399414062, -98.63134002685547, -92.7288589477539, -86.82637023925781, -80.92388916015625, -75.02140808105469, -69.1189193725586, -63.21643829345703, -57.3139533996582, -51.411468505859375, -45.50898361206055, -39.60649871826172, -33.704017639160156, -27.801532745361328, -21.8990478515625, -15.996566772460938, -10.09408187866211, -4.191596984863281, 1.7108869552612305, 7.613370895385742, 13.515853881835938, 19.418338775634766, 25.320823669433594, 31.223304748535156, 37.125789642333984, 43.02827453613281, 48.93075942993164, 54.83324432373047, 60.73572540283203, 66.63821411132812, 72.54069519042969, 78.44317626953125, 84.34565734863281, 90.2481460571289]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 2.0, 4.0, 2.0, 5.0, 8.0, 10.0, 9.0, 11.0, 10.0, 17.0, 20.0, 23.0, 27.0, 23.0, 24.0, 18.0, 28.0, 46.0, 43.0, 32.0, 35.0, 52.0, 38.0, 48.0, 36.0, 35.0, 44.0, 33.0, 30.0, 26.0, 30.0, 23.0, 34.0, 25.0, 22.0, 18.0, 17.0, 16.0, 13.0, 12.0, 5.0, 8.0, 8.0, 7.0, 2.0, 8.0, 3.0, 7.0, 2.0, 2.0, 0.0, 0.0, 6.0, 3.0], "bins": [-278.3049621582031, -269.94921875, -261.59344482421875, -253.23768615722656, -244.88192749023438, -236.52618408203125, -228.17042541503906, -219.81466674804688, -211.4589080810547, -203.1031494140625, -194.7473907470703, -186.39163208007812, -178.035888671875, -169.68011474609375, -161.32437133789062, -152.96861267089844, -144.61285400390625, -136.25709533691406, -127.90133666992188, -119.54558563232422, -111.18982696533203, -102.83406829833984, -94.47831726074219, -86.12255859375, -77.76679992675781, -69.41104125976562, -61.0552864074707, -52.69953155517578, -44.343772888183594, -35.988014221191406, -27.632259368896484, -19.276504516601562, -10.920745849609375, -2.5649890899658203, 5.790767669677734, 14.146524429321289, 22.502281188964844, 30.85803985595703, 39.21379470825195, 47.569549560546875, 55.92530822753906, 64.28106689453125, 72.63682556152344, 80.9925765991211, 89.34833526611328, 97.70409393310547, 106.05984497070312, 114.41560363769531, 122.7713623046875, 131.1271209716797, 139.48287963867188, 147.83863830566406, 156.19439697265625, 164.55014038085938, 172.90589904785156, 181.26165771484375, 189.61741638183594, 197.97317504882812, 206.3289337158203, 214.6846923828125, 223.04043579101562, 231.39620971679688, 239.751953125, 248.1077117919922, 256.4634704589844]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 5.0, 4.0, 8.0, 9.0, 8.0, 13.0, 19.0, 17.0, 25.0, 19.0, 27.0, 21.0, 24.0, 35.0, 46.0, 33.0, 42.0, 35.0, 54.0, 44.0, 49.0, 47.0, 51.0, 50.0, 40.0, 30.0, 37.0, 37.0, 22.0, 17.0, 22.0, 20.0, 16.0, 9.0, 15.0, 11.0, 9.0, 9.0, 7.0, 4.0, 4.0, 3.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.15625, -32.12353515625, -31.0908203125, -30.05810546875, -29.025390625, -27.99267578125, -26.9599609375, -25.92724609375, -24.89453125, -23.86181640625, -22.8291015625, -21.79638671875, -20.763671875, -19.73095703125, -18.6982421875, -17.66552734375, -16.6328125, -15.60009765625, -14.5673828125, -13.53466796875, -12.501953125, -11.46923828125, -10.4365234375, -9.40380859375, -8.37109375, -7.33837890625, -6.3056640625, -5.27294921875, -4.240234375, -3.20751953125, -2.1748046875, -1.14208984375, -0.109375, 0.92333984375, 1.9560546875, 2.98876953125, 4.021484375, 5.05419921875, 6.0869140625, 7.11962890625, 8.15234375, 9.18505859375, 10.2177734375, 11.25048828125, 12.283203125, 13.31591796875, 14.3486328125, 15.38134765625, 16.4140625, 17.44677734375, 18.4794921875, 19.51220703125, 20.544921875, 21.57763671875, 22.6103515625, 23.64306640625, 24.67578125, 25.70849609375, 26.7412109375, 27.77392578125, 28.806640625, 29.83935546875, 30.8720703125, 31.90478515625, 32.9375]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 6.0, 6.0, 5.0, 6.0, 12.0, 26.0, 30.0, 37.0, 50.0, 78.0, 112.0, 164.0, 225.0, 364.0, 562.0, 879.0, 1479.0, 2468.0, 4379.0, 8417.0, 25250.0, 399412.0, 3415528.0, 295265.0, 21600.0, 7760.0, 3986.0, 2297.0, 1318.0, 892.0, 537.0, 362.0, 239.0, 157.0, 106.0, 73.0, 58.0, 43.0, 30.0, 18.0, 9.0, 15.0, 6.0, 6.0, 5.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-93.8125, -91.0078125, -88.203125, -85.3984375, -82.59375, -79.7890625, -76.984375, -74.1796875, -71.375, -68.5703125, -65.765625, -62.9609375, -60.15625, -57.3515625, -54.546875, -51.7421875, -48.9375, -46.1328125, -43.328125, -40.5234375, -37.71875, -34.9140625, -32.109375, -29.3046875, -26.5, -23.6953125, -20.890625, -18.0859375, -15.28125, -12.4765625, -9.671875, -6.8671875, -4.0625, -1.2578125, 1.546875, 4.3515625, 7.15625, 9.9609375, 12.765625, 15.5703125, 18.375, 21.1796875, 23.984375, 26.7890625, 29.59375, 32.3984375, 35.203125, 38.0078125, 40.8125, 43.6171875, 46.421875, 49.2265625, 52.03125, 54.8359375, 57.640625, 60.4453125, 63.25, 66.0546875, 68.859375, 71.6640625, 74.46875, 77.2734375, 80.078125, 82.8828125, 85.6875]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 4.0, 6.0, 6.0, 12.0, 6.0, 10.0, 12.0, 16.0, 18.0, 25.0, 36.0, 69.0, 116.0, 358.0, 831.0, 1250.0, 696.0, 275.0, 125.0, 47.0, 27.0, 26.0, 21.0, 9.0, 13.0, 13.0, 11.0, 8.0, 5.0, 5.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-108.625, -105.466796875, -102.30859375, -99.150390625, -95.9921875, -92.833984375, -89.67578125, -86.517578125, -83.359375, -80.201171875, -77.04296875, -73.884765625, -70.7265625, -67.568359375, -64.41015625, -61.251953125, -58.09375, -54.935546875, -51.77734375, -48.619140625, -45.4609375, -42.302734375, -39.14453125, -35.986328125, -32.828125, -29.669921875, -26.51171875, -23.353515625, -20.1953125, -17.037109375, -13.87890625, -10.720703125, -7.5625, -4.404296875, -1.24609375, 1.912109375, 5.0703125, 8.228515625, 11.38671875, 14.544921875, 17.703125, 20.861328125, 24.01953125, 27.177734375, 30.3359375, 33.494140625, 36.65234375, 39.810546875, 42.96875, 46.126953125, 49.28515625, 52.443359375, 55.6015625, 58.759765625, 61.91796875, 65.076171875, 68.234375, 71.392578125, 74.55078125, 77.708984375, 80.8671875, 84.025390625, 87.18359375, 90.341796875, 93.5]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 2.0, 4.0, 6.0, 9.0, 20.0, 23.0, 27.0, 36.0, 54.0, 76.0, 100.0, 140.0, 211.0, 323.0, 475.0, 708.0, 963.0, 1516.0, 2255.0, 3859.0, 6191.0, 10559.0, 19235.0, 41458.0, 173536.0, 3251033.0, 558261.0, 63885.0, 25858.0, 13056.0, 7649.0, 4465.0, 2720.0, 1774.0, 1171.0, 819.0, 550.0, 365.0, 267.0, 191.0, 123.0, 79.0, 81.0, 41.0, 35.0, 29.0, 15.0, 12.0, 6.0, 8.0, 6.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-99.125, -95.896484375, -92.66796875, -89.439453125, -86.2109375, -82.982421875, -79.75390625, -76.525390625, -73.296875, -70.068359375, -66.83984375, -63.611328125, -60.3828125, -57.154296875, -53.92578125, -50.697265625, -47.46875, -44.240234375, -41.01171875, -37.783203125, -34.5546875, -31.326171875, -28.09765625, -24.869140625, -21.640625, -18.412109375, -15.18359375, -11.955078125, -8.7265625, -5.498046875, -2.26953125, 0.958984375, 4.1875, 7.416015625, 10.64453125, 13.873046875, 17.1015625, 20.330078125, 23.55859375, 26.787109375, 30.015625, 33.244140625, 36.47265625, 39.701171875, 42.9296875, 46.158203125, 49.38671875, 52.615234375, 55.84375, 59.072265625, 62.30078125, 65.529296875, 68.7578125, 71.986328125, 75.21484375, 78.443359375, 81.671875, 84.900390625, 88.12890625, 91.357421875, 94.5859375, 97.814453125, 101.04296875, 104.271484375, 107.5]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 7.0, 13.0, 21.0, 48.0, 58.0, 98.0, 248.0, 266.0, 121.0, 56.0, 22.0, 19.0, 22.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-311.68115234375, -289.49346923828125, -267.3057861328125, -245.1180877685547, -222.93038940429688, -200.74270629882812, -178.55502319335938, -156.36732482910156, -134.1796417236328, -111.99195098876953, -89.80426025390625, -67.6165771484375, -45.42888641357422, -23.241195678710938, -1.0535125732421875, 21.134185791015625, 43.321868896484375, 65.50955963134766, 87.69725036621094, 109.88493347167969, 132.0726318359375, 154.26031494140625, 176.447998046875, 198.6356964111328, 220.82337951660156, 243.0110626220703, 265.1987609863281, 287.3864440917969, 309.5741271972656, 331.7618408203125, 353.94952392578125, 376.13720703125, 398.32489013671875, 420.5125732421875, 442.70025634765625, 464.887939453125, 487.0756530761719, 509.2633361816406, 531.4510498046875, 553.6387329101562, 575.826416015625, 598.0140991210938, 620.2017822265625, 642.3894653320312, 664.5771484375, 686.764892578125, 708.9525146484375, 731.1402587890625, 753.327880859375, 775.5155639648438, 797.7032470703125, 819.8909301757812, 842.07861328125, 864.266357421875, 886.4539794921875, 908.6417236328125, 930.8294067382812, 953.01708984375, 975.2047729492188, 997.3924560546875, 1019.5801391601562, 1041.767822265625, 1063.95556640625, 1086.1431884765625, 1108.3309326171875]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 6.0, 6.0, 4.0, 6.0, 3.0, 14.0, 10.0, 5.0, 11.0, 15.0, 30.0, 18.0, 26.0, 29.0, 22.0, 25.0, 36.0, 33.0, 30.0, 22.0, 40.0, 36.0, 33.0, 29.0, 37.0, 40.0, 31.0, 37.0, 37.0, 43.0, 22.0, 25.0, 29.0, 28.0, 24.0, 31.0, 18.0, 21.0, 13.0, 6.0, 10.0, 11.0, 14.0, 6.0, 8.0, 4.0, 10.0, 2.0, 2.0, 8.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-197.08865356445312, -190.61080932617188, -184.1329803466797, -177.65513610839844, -171.1772918701172, -164.699462890625, -158.22161865234375, -151.7437744140625, -145.26593017578125, -138.7880859375, -132.3102569580078, -125.83241271972656, -119.35456848144531, -112.8767318725586, -106.39889526367188, -99.92105102539062, -93.4432144165039, -86.96537780761719, -80.48753356933594, -74.00969696044922, -67.53185272216797, -61.05401611328125, -54.576175689697266, -48.09833526611328, -41.6204948425293, -35.14265441894531, -28.664813995361328, -22.186975479125977, -15.709135055541992, -9.23129653930664, -2.7534561157226562, 3.724384307861328, 10.202224731445312, 16.680065155029297, 23.15790557861328, 29.635744094848633, 36.11358642578125, 42.59142303466797, 49.06926345825195, 55.54710388183594, 62.02494430541992, 68.5027847290039, 74.98062133789062, 81.45846557617188, 87.9363021850586, 94.41413879394531, 100.89198303222656, 107.36982727050781, 113.84766387939453, 120.32550048828125, 126.8033447265625, 133.28118896484375, 139.75901794433594, 146.2368621826172, 152.71470642089844, 159.19253540039062, 165.67037963867188, 172.14822387695312, 178.6260528564453, 185.10389709472656, 191.5817413330078, 198.0595703125, 204.53741455078125, 211.0152587890625, 217.49310302734375]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 4.0, 2.0, 5.0, 10.0, 12.0, 9.0, 9.0, 15.0, 18.0, 22.0, 25.0, 30.0, 28.0, 25.0, 30.0, 35.0, 36.0, 44.0, 34.0, 45.0, 46.0, 53.0, 52.0, 49.0, 38.0, 25.0, 43.0, 36.0, 28.0, 33.0, 32.0, 18.0, 17.0, 14.0, 14.0, 11.0, 14.0, 10.0, 8.0, 7.0, 6.0, 5.0, 2.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-36.75, -35.676513671875, -34.60302734375, -33.529541015625, -32.4560546875, -31.382568359375, -30.30908203125, -29.235595703125, -28.162109375, -27.088623046875, -26.01513671875, -24.941650390625, -23.8681640625, -22.794677734375, -21.72119140625, -20.647705078125, -19.57421875, -18.500732421875, -17.42724609375, -16.353759765625, -15.2802734375, -14.206787109375, -13.13330078125, -12.059814453125, -10.986328125, -9.912841796875, -8.83935546875, -7.765869140625, -6.6923828125, -5.618896484375, -4.54541015625, -3.471923828125, -2.3984375, -1.324951171875, -0.25146484375, 0.822021484375, 1.8955078125, 2.968994140625, 4.04248046875, 5.115966796875, 6.189453125, 7.262939453125, 8.33642578125, 9.409912109375, 10.4833984375, 11.556884765625, 12.63037109375, 13.703857421875, 14.77734375, 15.850830078125, 16.92431640625, 17.997802734375, 19.0712890625, 20.144775390625, 21.21826171875, 22.291748046875, 23.365234375, 24.438720703125, 25.51220703125, 26.585693359375, 27.6591796875, 28.732666015625, 29.80615234375, 30.879638671875, 31.953125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 7.0, 20.0, 20.0, 36.0, 52.0, 64.0, 144.0, 191.0, 298.0, 447.0, 737.0, 1156.0, 1850.0, 2948.0, 4915.0, 8095.0, 13425.0, 22826.0, 39408.0, 72810.0, 150662.0, 326391.0, 197865.0, 89212.0, 47613.0, 27302.0, 15703.0, 9341.0, 5737.0, 3458.0, 2102.0, 1400.0, 880.0, 523.0, 323.0, 209.0, 128.0, 95.0, 56.0, 31.0, 22.0, 19.0, 10.0, 8.0, 8.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0], "bins": [-2.134765625, -2.0731658935546875, -2.011566162109375, -1.9499664306640625, -1.88836669921875, -1.8267669677734375, -1.765167236328125, -1.7035675048828125, -1.6419677734375, -1.5803680419921875, -1.518768310546875, -1.4571685791015625, -1.39556884765625, -1.3339691162109375, -1.272369384765625, -1.2107696533203125, -1.149169921875, -1.0875701904296875, -1.025970458984375, -0.9643707275390625, -0.90277099609375, -0.8411712646484375, -0.779571533203125, -0.7179718017578125, -0.6563720703125, -0.5947723388671875, -0.533172607421875, -0.4715728759765625, -0.40997314453125, -0.3483734130859375, -0.286773681640625, -0.2251739501953125, -0.16357421875, -0.1019744873046875, -0.040374755859375, 0.0212249755859375, 0.08282470703125, 0.1444244384765625, 0.206024169921875, 0.2676239013671875, 0.3292236328125, 0.3908233642578125, 0.452423095703125, 0.5140228271484375, 0.57562255859375, 0.6372222900390625, 0.698822021484375, 0.7604217529296875, 0.822021484375, 0.8836212158203125, 0.945220947265625, 1.0068206787109375, 1.06842041015625, 1.1300201416015625, 1.191619873046875, 1.2532196044921875, 1.3148193359375, 1.3764190673828125, 1.438018798828125, 1.4996185302734375, 1.56121826171875, 1.6228179931640625, 1.684417724609375, 1.7460174560546875, 1.8076171875]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 3.0, 2.0, 5.0, 4.0, 6.0, 6.0, 11.0, 14.0, 17.0, 20.0, 20.0, 24.0, 28.0, 24.0, 31.0, 33.0, 35.0, 34.0, 39.0, 37.0, 41.0, 52.0, 1063.0, 31.0, 43.0, 27.0, 51.0, 24.0, 37.0, 32.0, 28.0, 27.0, 26.0, 26.0, 25.0, 16.0, 20.0, 10.0, 10.0, 14.0, 10.0, 7.0, 2.0, 3.0, 7.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.890625, -21.21728515625, -20.5439453125, -19.87060546875, -19.197265625, -18.52392578125, -17.8505859375, -17.17724609375, -16.50390625, -15.83056640625, -15.1572265625, -14.48388671875, -13.810546875, -13.13720703125, -12.4638671875, -11.79052734375, -11.1171875, -10.44384765625, -9.7705078125, -9.09716796875, -8.423828125, -7.75048828125, -7.0771484375, -6.40380859375, -5.73046875, -5.05712890625, -4.3837890625, -3.71044921875, -3.037109375, -2.36376953125, -1.6904296875, -1.01708984375, -0.34375, 0.32958984375, 1.0029296875, 1.67626953125, 2.349609375, 3.02294921875, 3.6962890625, 4.36962890625, 5.04296875, 5.71630859375, 6.3896484375, 7.06298828125, 7.736328125, 8.40966796875, 9.0830078125, 9.75634765625, 10.4296875, 11.10302734375, 11.7763671875, 12.44970703125, 13.123046875, 13.79638671875, 14.4697265625, 15.14306640625, 15.81640625, 16.48974609375, 17.1630859375, 17.83642578125, 18.509765625, 19.18310546875, 19.8564453125, 20.52978515625, 21.203125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 7.0, 10.0, 13.0, 17.0, 23.0, 25.0, 56.0, 67.0, 111.0, 155.0, 260.0, 379.0, 607.0, 952.0, 1425.0, 2349.0, 3761.0, 6117.0, 10216.0, 17241.0, 29579.0, 52700.0, 100934.0, 253500.0, 1333392.0, 130802.0, 64690.0, 35883.0, 20623.0, 11997.0, 7352.0, 4335.0, 2704.0, 1673.0, 1106.0, 695.0, 462.0, 283.0, 216.0, 131.0, 94.0, 70.0, 35.0, 34.0, 16.0, 14.0, 11.0, 10.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.853515625, -1.7958984375, -1.73828125, -1.6806640625, -1.623046875, -1.5654296875, -1.5078125, -1.4501953125, -1.392578125, -1.3349609375, -1.27734375, -1.2197265625, -1.162109375, -1.1044921875, -1.046875, -0.9892578125, -0.931640625, -0.8740234375, -0.81640625, -0.7587890625, -0.701171875, -0.6435546875, -0.5859375, -0.5283203125, -0.470703125, -0.4130859375, -0.35546875, -0.2978515625, -0.240234375, -0.1826171875, -0.125, -0.0673828125, -0.009765625, 0.0478515625, 0.10546875, 0.1630859375, 0.220703125, 0.2783203125, 0.3359375, 0.3935546875, 0.451171875, 0.5087890625, 0.56640625, 0.6240234375, 0.681640625, 0.7392578125, 0.796875, 0.8544921875, 0.912109375, 0.9697265625, 1.02734375, 1.0849609375, 1.142578125, 1.2001953125, 1.2578125, 1.3154296875, 1.373046875, 1.4306640625, 1.48828125, 1.5458984375, 1.603515625, 1.6611328125, 1.71875, 1.7763671875, 1.833984375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 6.0, 0.0, 7.0, 5.0, 6.0, 4.0, 9.0, 11.0, 15.0, 20.0, 20.0, 22.0, 32.0, 47.0, 40.0, 44.0, 63.0, 83.0, 113.0, 89.0, 45.0, 54.0, 34.0, 40.0, 28.0, 39.0, 22.0, 20.0, 14.0, 14.0, 14.0, 12.0, 2.0, 8.0, 4.0, 4.0, 6.0, 3.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0248870849609375, -0.024206876754760742, -0.023526668548583984, -0.022846460342407227, -0.02216625213623047, -0.02148604393005371, -0.020805835723876953, -0.020125627517700195, -0.019445419311523438, -0.01876521110534668, -0.018085002899169922, -0.017404794692993164, -0.016724586486816406, -0.01604437828063965, -0.01536417007446289, -0.014683961868286133, -0.014003753662109375, -0.013323545455932617, -0.01264333724975586, -0.011963129043579102, -0.011282920837402344, -0.010602712631225586, -0.009922504425048828, -0.00924229621887207, -0.008562088012695312, -0.007881879806518555, -0.007201671600341797, -0.006521463394165039, -0.005841255187988281, -0.0051610469818115234, -0.004480838775634766, -0.003800630569458008, -0.00312042236328125, -0.002440214157104492, -0.0017600059509277344, -0.0010797977447509766, -0.00039958953857421875, 0.00028061866760253906, 0.0009608268737792969, 0.0016410350799560547, 0.0023212432861328125, 0.0030014514923095703, 0.003681659698486328, 0.004361867904663086, 0.005042076110839844, 0.0057222843170166016, 0.006402492523193359, 0.007082700729370117, 0.007762908935546875, 0.008443117141723633, 0.00912332534790039, 0.009803533554077148, 0.010483741760253906, 0.011163949966430664, 0.011844158172607422, 0.01252436637878418, 0.013204574584960938, 0.013884782791137695, 0.014564990997314453, 0.015245199203491211, 0.01592540740966797, 0.016605615615844727, 0.017285823822021484, 0.017966032028198242, 0.018646240234375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 7.0, 2.0, 4.0, 9.0, 5.0, 4.0, 6.0, 11.0, 15.0, 22.0, 27.0, 32.0, 44.0, 56.0, 70.0, 102.0, 161.0, 250.0, 829.0, 6108.0, 141603.0, 878285.0, 18278.0, 1587.0, 410.0, 185.0, 126.0, 95.0, 51.0, 47.0, 32.0, 26.0, 20.0, 16.0, 8.0, 5.0, 7.0, 5.0, 6.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2939453125, -0.2835235595703125, -0.273101806640625, -0.2626800537109375, -0.25225830078125, -0.2418365478515625, -0.231414794921875, -0.2209930419921875, -0.2105712890625, -0.2001495361328125, -0.189727783203125, -0.1793060302734375, -0.16888427734375, -0.1584625244140625, -0.148040771484375, -0.1376190185546875, -0.127197265625, -0.1167755126953125, -0.106353759765625, -0.0959320068359375, -0.08551025390625, -0.0750885009765625, -0.064666748046875, -0.0542449951171875, -0.0438232421875, -0.0334014892578125, -0.022979736328125, -0.0125579833984375, -0.00213623046875, 0.0082855224609375, 0.018707275390625, 0.0291290283203125, 0.03955078125, 0.0499725341796875, 0.060394287109375, 0.0708160400390625, 0.08123779296875, 0.0916595458984375, 0.102081298828125, 0.1125030517578125, 0.1229248046875, 0.1333465576171875, 0.143768310546875, 0.1541900634765625, 0.16461181640625, 0.1750335693359375, 0.185455322265625, 0.1958770751953125, 0.206298828125, 0.2167205810546875, 0.227142333984375, 0.2375640869140625, 0.24798583984375, 0.2584075927734375, 0.268829345703125, 0.2792510986328125, 0.2896728515625, 0.3000946044921875, 0.310516357421875, 0.3209381103515625, 0.33135986328125, 0.3417816162109375, 0.352203369140625, 0.3626251220703125, 0.373046875]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 25.0, 72.0, 260.0, 540.0, 81.0, 26.0, 5.0, 4.0], "bins": [-0.1447385996580124, -0.14229464530944824, -0.1398507058620453, -0.13740675151348114, -0.134962797164917, -0.13251885771751404, -0.1300749033689499, -0.12763094902038574, -0.1251869946718216, -0.12274304777383804, -0.1202990934252739, -0.11785514652729034, -0.1154111921787262, -0.11296724528074265, -0.1105232983827591, -0.10807934403419495, -0.10563540458679199, -0.10319145768880844, -0.1007475033402443, -0.09830355644226074, -0.0958596020936966, -0.09341565519571304, -0.09097170829772949, -0.08852775394916534, -0.0860837996006012, -0.08363985270261765, -0.0811958983540535, -0.07875195145606995, -0.0763079971075058, -0.07386405020952225, -0.0714201033115387, -0.06897614896297455, -0.0665321946144104, -0.06408824771642685, -0.0616442933678627, -0.05920034646987915, -0.0567563958466053, -0.05431244522333145, -0.0518684946000576, -0.04942454397678375, -0.0469805970788002, -0.04453664645552635, -0.0420926958322525, -0.03964874893426895, -0.0372047983109951, -0.03476084768772125, -0.0323168970644474, -0.029872948303818703, -0.027428999543190002, -0.024985048919916153, -0.022541100159287453, -0.020097149536013603, -0.017653200775384903, -0.015209250152111053, -0.012765299528837204, -0.010321350768208504, -0.007877400144934654, -0.005433450452983379, -0.002989500295370817, -0.000545550137758255, 0.0018983995541930199, 0.004342349246144295, 0.006786299869418144, 0.009230248630046844, 0.011674199253320694]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 5.0, 4.0, 3.0, 4.0, 6.0, 4.0, 15.0, 10.0, 9.0, 18.0, 19.0, 17.0, 25.0, 22.0, 34.0, 32.0, 34.0, 41.0, 48.0, 41.0, 26.0, 55.0, 58.0, 45.0, 53.0, 35.0, 49.0, 45.0, 36.0, 33.0, 34.0, 24.0, 16.0, 25.0, 16.0, 21.0, 11.0, 11.0, 13.0, 2.0, 4.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.016730785369873047, -0.016304954886436462, -0.01587912254035473, -0.01545329112559557, -0.01502745971083641, -0.014601628296077251, -0.014175796881318092, -0.013749965466558933, -0.013324134051799774, -0.012898302637040615, -0.012472471222281456, -0.012046639807522297, -0.011620808392763138, -0.011194976978003979, -0.01076914556324482, -0.01034331414848566, -0.009917482733726501, -0.009491651318967342, -0.009065819904208183, -0.008639988489449024, -0.008214157074689865, -0.007788325659930706, -0.007362494245171547, -0.006936662830412388, -0.006510831415653229, -0.00608500000089407, -0.005659168586134911, -0.0052333371713757515, -0.004807505756616592, -0.004381674341857433, -0.003955842927098274, -0.003530011512339115, -0.003104180097579956, -0.002678348682820797, -0.002252517268061638, -0.0018266858533024788, -0.0014008544385433197, -0.0009750230237841606, -0.0005491916090250015, -0.00012336019426584244, 0.00030247122049331665, 0.0007283026352524757, 0.0011541340500116348, 0.001579965464770794, 0.002005796879529953, 0.002431628294289112, 0.002857459709048271, 0.0032832911238074303, 0.0037091225385665894, 0.0041349539533257484, 0.0045607853680849075, 0.004986616782844067, 0.005412448197603226, 0.005838279612362385, 0.006264111027121544, 0.006689942441880703, 0.007115773856639862, 0.007541605271399021, 0.00796743668615818, 0.00839326810091734, 0.008819099515676498, 0.009244930930435658, 0.009670762345194817, 0.010096593759953976, 0.010522425174713135]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 4.0, 2.0, 5.0, 10.0, 12.0, 9.0, 9.0, 15.0, 18.0, 22.0, 25.0, 30.0, 28.0, 25.0, 30.0, 35.0, 36.0, 44.0, 34.0, 45.0, 46.0, 53.0, 52.0, 49.0, 38.0, 25.0, 43.0, 36.0, 28.0, 33.0, 32.0, 18.0, 17.0, 14.0, 14.0, 11.0, 14.0, 10.0, 8.0, 7.0, 6.0, 5.0, 2.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-36.75, -35.676513671875, -34.60302734375, -33.529541015625, -32.4560546875, -31.382568359375, -30.30908203125, -29.235595703125, -28.162109375, -27.088623046875, -26.01513671875, -24.941650390625, -23.8681640625, -22.794677734375, -21.72119140625, -20.647705078125, -19.57421875, -18.500732421875, -17.42724609375, -16.353759765625, -15.2802734375, -14.206787109375, -13.13330078125, -12.059814453125, -10.986328125, -9.912841796875, -8.83935546875, -7.765869140625, -6.6923828125, -5.618896484375, -4.54541015625, -3.471923828125, -2.3984375, -1.324951171875, -0.25146484375, 0.822021484375, 1.8955078125, 2.968994140625, 4.04248046875, 5.115966796875, 6.189453125, 7.262939453125, 8.33642578125, 9.409912109375, 10.4833984375, 11.556884765625, 12.63037109375, 13.703857421875, 14.77734375, 15.850830078125, 16.92431640625, 17.997802734375, 19.0712890625, 20.144775390625, 21.21826171875, 22.291748046875, 23.365234375, 24.438720703125, 25.51220703125, 26.585693359375, 27.6591796875, 28.732666015625, 29.80615234375, 30.879638671875, 31.953125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 7.0, 3.0, 5.0, 11.0, 16.0, 22.0, 32.0, 25.0, 51.0, 71.0, 98.0, 163.0, 248.0, 348.0, 576.0, 944.0, 1756.0, 3195.0, 6523.0, 14515.0, 39111.0, 132827.0, 513970.0, 234452.0, 60439.0, 20710.0, 8832.0, 4191.0, 2243.0, 1204.0, 746.0, 414.0, 249.0, 159.0, 103.0, 76.0, 56.0, 38.0, 35.0, 27.0, 16.0, 23.0, 8.0, 7.0, 8.0, 8.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-21.234375, -20.622314453125, -20.01025390625, -19.398193359375, -18.7861328125, -18.174072265625, -17.56201171875, -16.949951171875, -16.337890625, -15.725830078125, -15.11376953125, -14.501708984375, -13.8896484375, -13.277587890625, -12.66552734375, -12.053466796875, -11.44140625, -10.829345703125, -10.21728515625, -9.605224609375, -8.9931640625, -8.381103515625, -7.76904296875, -7.156982421875, -6.544921875, -5.932861328125, -5.32080078125, -4.708740234375, -4.0966796875, -3.484619140625, -2.87255859375, -2.260498046875, -1.6484375, -1.036376953125, -0.42431640625, 0.187744140625, 0.7998046875, 1.411865234375, 2.02392578125, 2.635986328125, 3.248046875, 3.860107421875, 4.47216796875, 5.084228515625, 5.6962890625, 6.308349609375, 6.92041015625, 7.532470703125, 8.14453125, 8.756591796875, 9.36865234375, 9.980712890625, 10.5927734375, 11.204833984375, 11.81689453125, 12.428955078125, 13.041015625, 13.653076171875, 14.26513671875, 14.877197265625, 15.4892578125, 16.101318359375, 16.71337890625, 17.325439453125, 17.9375]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 0.0, 4.0, 3.0, 6.0, 8.0, 9.0, 9.0, 16.0, 11.0, 16.0, 21.0, 29.0, 16.0, 36.0, 30.0, 42.0, 51.0, 36.0, 34.0, 71.0, 144.0, 1897.0, 100.0, 43.0, 56.0, 33.0, 37.0, 45.0, 30.0, 37.0, 27.0, 25.0, 20.0, 17.0, 25.0, 14.0, 16.0, 9.0, 8.0, 10.0, 3.0, 2.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0], "bins": [-123.75, -120.2509765625, -116.751953125, -113.2529296875, -109.75390625, -106.2548828125, -102.755859375, -99.2568359375, -95.7578125, -92.2587890625, -88.759765625, -85.2607421875, -81.76171875, -78.2626953125, -74.763671875, -71.2646484375, -67.765625, -64.2666015625, -60.767578125, -57.2685546875, -53.76953125, -50.2705078125, -46.771484375, -43.2724609375, -39.7734375, -36.2744140625, -32.775390625, -29.2763671875, -25.77734375, -22.2783203125, -18.779296875, -15.2802734375, -11.78125, -8.2822265625, -4.783203125, -1.2841796875, 2.21484375, 5.7138671875, 9.212890625, 12.7119140625, 16.2109375, 19.7099609375, 23.208984375, 26.7080078125, 30.20703125, 33.7060546875, 37.205078125, 40.7041015625, 44.203125, 47.7021484375, 51.201171875, 54.7001953125, 58.19921875, 61.6982421875, 65.197265625, 68.6962890625, 72.1953125, 75.6943359375, 79.193359375, 82.6923828125, 86.19140625, 89.6904296875, 93.189453125, 96.6884765625, 100.1875]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [4.0, 1.0, 2.0, 3.0, 0.0, 2.0, 2.0, 4.0, 2.0, 14.0, 3.0, 11.0, 10.0, 13.0, 19.0, 31.0, 22.0, 36.0, 45.0, 77.0, 65.0, 104.0, 145.0, 175.0, 290.0, 669.0, 2367.0, 520663.0, 2616517.0, 2691.0, 737.0, 272.0, 185.0, 140.0, 91.0, 82.0, 46.0, 29.0, 43.0, 23.0, 14.0, 12.0, 16.0, 9.0, 11.0, 8.0, 4.0, 2.0, 3.0, 1.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-194.0, -187.08203125, -180.1640625, -173.24609375, -166.328125, -159.41015625, -152.4921875, -145.57421875, -138.65625, -131.73828125, -124.8203125, -117.90234375, -110.984375, -104.06640625, -97.1484375, -90.23046875, -83.3125, -76.39453125, -69.4765625, -62.55859375, -55.640625, -48.72265625, -41.8046875, -34.88671875, -27.96875, -21.05078125, -14.1328125, -7.21484375, -0.296875, 6.62109375, 13.5390625, 20.45703125, 27.375, 34.29296875, 41.2109375, 48.12890625, 55.046875, 61.96484375, 68.8828125, 75.80078125, 82.71875, 89.63671875, 96.5546875, 103.47265625, 110.390625, 117.30859375, 124.2265625, 131.14453125, 138.0625, 144.98046875, 151.8984375, 158.81640625, 165.734375, 172.65234375, 179.5703125, 186.48828125, 193.40625, 200.32421875, 207.2421875, 214.16015625, 221.078125, 227.99609375, 234.9140625, 241.83203125, 248.75]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [983.0, 34.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.62297058105469, 16.221084594726562, 67.06513977050781, 117.90919494628906, 168.7532501220703, 219.59730529785156, 270.44134521484375, 321.285400390625, 372.12945556640625, 422.9735107421875, 473.81756591796875, 524.66162109375, 575.5056762695312, 626.3497314453125, 677.1937866210938, 728.037841796875, 778.8818969726562, 829.7259521484375, 880.5700073242188, 931.4140625, 982.2581176757812, 1033.1021728515625, 1083.9462890625, 1134.790283203125, 1185.6343994140625, 1236.478515625, 1287.322509765625, 1338.16650390625, 1389.0106201171875, 1439.854736328125, 1490.69873046875, 1541.542724609375, 1592.38671875, 1643.230712890625, 1694.0748291015625, 1744.9189453125, 1795.762939453125, 1846.60693359375, 1897.4510498046875, 1948.295166015625, 1999.13916015625, 2049.983154296875, 2100.8271484375, 2151.67138671875, 2202.515380859375, 2253.359375, 2304.20361328125, 2355.047607421875, 2405.8916015625, 2456.735595703125, 2507.57958984375, 2558.423828125, 2609.267822265625, 2660.11181640625, 2710.9560546875, 2761.800048828125, 2812.64404296875, 2863.488037109375, 2914.33203125, 2965.17626953125, 3016.020263671875, 3066.8642578125, 3117.70849609375, 3168.552490234375, 3219.396484375]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 3.0, 1.0, 2.0, 6.0, 5.0, 2.0, 11.0, 14.0, 17.0, 28.0, 13.0, 20.0, 28.0, 24.0, 32.0, 36.0, 38.0, 34.0, 37.0, 52.0, 34.0, 45.0, 48.0, 43.0, 43.0, 41.0, 44.0, 30.0, 42.0, 30.0, 28.0, 36.0, 22.0, 21.0, 20.0, 11.0, 13.0, 12.0, 8.0, 10.0, 5.0, 5.0, 3.0, 3.0, 4.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-289.0094909667969, -279.8565673828125, -270.70361328125, -261.5506896972656, -252.39776611328125, -243.2448272705078, -234.09188842773438, -224.93896484375, -215.78602600097656, -206.63308715820312, -197.48016357421875, -188.3272247314453, -179.17428588867188, -170.0213623046875, -160.86842346191406, -151.71548461914062, -142.56256103515625, -133.4096221923828, -124.25669860839844, -115.103759765625, -105.9508285522461, -96.79789733886719, -87.64495849609375, -78.49202728271484, -69.33909606933594, -60.18616485595703, -51.03322982788086, -41.88029479980469, -32.72736358642578, -23.574432373046875, -14.421497344970703, -5.268562316894531, 3.884368896484375, 13.037302017211914, 22.190235137939453, 31.343168258666992, 40.49610137939453, 49.64903259277344, 58.80196762084961, 67.95490264892578, 77.10783386230469, 86.2607650756836, 95.4136962890625, 104.56663513183594, 113.71956634521484, 122.87249755859375, 132.0254364013672, 141.17837524414062, 150.331298828125, 159.48423767089844, 168.6371612548828, 177.79010009765625, 186.94302368164062, 196.09596252441406, 205.2489013671875, 214.40182495117188, 223.5547637939453, 232.70770263671875, 241.86062622070312, 251.01356506347656, 260.16650390625, 269.3194274902344, 278.47235107421875, 287.62530517578125, 296.7782287597656]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 3.0, 4.0, 6.0, 10.0, 8.0, 11.0, 10.0, 14.0, 16.0, 21.0, 20.0, 33.0, 28.0, 20.0, 29.0, 33.0, 33.0, 45.0, 38.0, 44.0, 45.0, 41.0, 53.0, 59.0, 33.0, 27.0, 39.0, 44.0, 28.0, 37.0, 26.0, 26.0, 17.0, 16.0, 11.0, 10.0, 14.0, 10.0, 11.0, 8.0, 7.0, 6.0, 2.0, 4.0, 3.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-35.53125, -34.4736328125, -33.416015625, -32.3583984375, -31.30078125, -30.2431640625, -29.185546875, -28.1279296875, -27.0703125, -26.0126953125, -24.955078125, -23.8974609375, -22.83984375, -21.7822265625, -20.724609375, -19.6669921875, -18.609375, -17.5517578125, -16.494140625, -15.4365234375, -14.37890625, -13.3212890625, -12.263671875, -11.2060546875, -10.1484375, -9.0908203125, -8.033203125, -6.9755859375, -5.91796875, -4.8603515625, -3.802734375, -2.7451171875, -1.6875, -0.6298828125, 0.427734375, 1.4853515625, 2.54296875, 3.6005859375, 4.658203125, 5.7158203125, 6.7734375, 7.8310546875, 8.888671875, 9.9462890625, 11.00390625, 12.0615234375, 13.119140625, 14.1767578125, 15.234375, 16.2919921875, 17.349609375, 18.4072265625, 19.46484375, 20.5224609375, 21.580078125, 22.6376953125, 23.6953125, 24.7529296875, 25.810546875, 26.8681640625, 27.92578125, 28.9833984375, 30.041015625, 31.0986328125, 32.15625]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 5.0, 4.0, 6.0, 8.0, 11.0, 16.0, 11.0, 19.0, 31.0, 27.0, 53.0, 87.0, 95.0, 166.0, 282.0, 504.0, 850.0, 1555.0, 3049.0, 6586.0, 15913.0, 323769.0, 3690491.0, 126693.0, 12672.0, 5446.0, 2606.0, 1411.0, 735.0, 427.0, 262.0, 160.0, 106.0, 65.0, 32.0, 27.0, 22.0, 21.0, 9.0, 17.0, 6.0, 12.0, 5.0, 5.0, 1.0, 4.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-129.5, -125.501953125, -121.50390625, -117.505859375, -113.5078125, -109.509765625, -105.51171875, -101.513671875, -97.515625, -93.517578125, -89.51953125, -85.521484375, -81.5234375, -77.525390625, -73.52734375, -69.529296875, -65.53125, -61.533203125, -57.53515625, -53.537109375, -49.5390625, -45.541015625, -41.54296875, -37.544921875, -33.546875, -29.548828125, -25.55078125, -21.552734375, -17.5546875, -13.556640625, -9.55859375, -5.560546875, -1.5625, 2.435546875, 6.43359375, 10.431640625, 14.4296875, 18.427734375, 22.42578125, 26.423828125, 30.421875, 34.419921875, 38.41796875, 42.416015625, 46.4140625, 50.412109375, 54.41015625, 58.408203125, 62.40625, 66.404296875, 70.40234375, 74.400390625, 78.3984375, 82.396484375, 86.39453125, 90.392578125, 94.390625, 98.388671875, 102.38671875, 106.384765625, 110.3828125, 114.380859375, 118.37890625, 122.376953125, 126.375]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 6.0, 3.0, 6.0, 3.0, 9.0, 4.0, 9.0, 15.0, 18.0, 27.0, 63.0, 149.0, 504.0, 1241.0, 1242.0, 427.0, 131.0, 59.0, 36.0, 20.0, 26.0, 21.0, 10.0, 15.0, 8.0, 11.0, 5.0, 1.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.0625, -116.0576171875, -112.052734375, -108.0478515625, -104.04296875, -100.0380859375, -96.033203125, -92.0283203125, -88.0234375, -84.0185546875, -80.013671875, -76.0087890625, -72.00390625, -67.9990234375, -63.994140625, -59.9892578125, -55.984375, -51.9794921875, -47.974609375, -43.9697265625, -39.96484375, -35.9599609375, -31.955078125, -27.9501953125, -23.9453125, -19.9404296875, -15.935546875, -11.9306640625, -7.92578125, -3.9208984375, 0.083984375, 4.0888671875, 8.09375, 12.0986328125, 16.103515625, 20.1083984375, 24.11328125, 28.1181640625, 32.123046875, 36.1279296875, 40.1328125, 44.1376953125, 48.142578125, 52.1474609375, 56.15234375, 60.1572265625, 64.162109375, 68.1669921875, 72.171875, 76.1767578125, 80.181640625, 84.1865234375, 88.19140625, 92.1962890625, 96.201171875, 100.2060546875, 104.2109375, 108.2158203125, 112.220703125, 116.2255859375, 120.23046875, 124.2353515625, 128.240234375, 132.2451171875, 136.25]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 5.0, 4.0, 3.0, 5.0, 13.0, 20.0, 29.0, 42.0, 96.0, 170.0, 320.0, 745.0, 1798.0, 4836.0, 15441.0, 80991.0, 3948966.0, 113932.0, 17921.0, 5494.0, 1970.0, 774.0, 359.0, 158.0, 93.0, 43.0, 18.0, 16.0, 9.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-286.0, -277.4765625, -268.953125, -260.4296875, -251.90625, -243.3828125, -234.859375, -226.3359375, -217.8125, -209.2890625, -200.765625, -192.2421875, -183.71875, -175.1953125, -166.671875, -158.1484375, -149.625, -141.1015625, -132.578125, -124.0546875, -115.53125, -107.0078125, -98.484375, -89.9609375, -81.4375, -72.9140625, -64.390625, -55.8671875, -47.34375, -38.8203125, -30.296875, -21.7734375, -13.25, -4.7265625, 3.796875, 12.3203125, 20.84375, 29.3671875, 37.890625, 46.4140625, 54.9375, 63.4609375, 71.984375, 80.5078125, 89.03125, 97.5546875, 106.078125, 114.6015625, 123.125, 131.6484375, 140.171875, 148.6953125, 157.21875, 165.7421875, 174.265625, 182.7890625, 191.3125, 199.8359375, 208.359375, 216.8828125, 225.40625, 233.9296875, 242.453125, 250.9765625, 259.5]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 5.0, 5.0, 3.0, 26.0, 23.0, 25.0, 48.0, 89.0, 154.0, 242.0, 153.0, 66.0, 63.0, 34.0, 23.0, 11.0, 17.0, 4.0, 11.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-526.2769165039062, -507.6903991699219, -489.1039123535156, -470.51739501953125, -451.930908203125, -433.3443908691406, -414.75787353515625, -396.17138671875, -377.5848693847656, -358.99835205078125, -340.411865234375, -321.8253479003906, -303.23883056640625, -284.65234375, -266.0658264160156, -247.4793243408203, -228.892822265625, -210.3063201904297, -191.71981811523438, -173.13330078125, -154.5467987060547, -135.96029663085938, -117.37378692626953, -98.78727722167969, -80.20077514648438, -61.6142692565918, -43.02776336669922, -24.44125747680664, -5.8547515869140625, 12.73175048828125, 31.318260192871094, 49.90476989746094, 68.4913330078125, 87.07783508300781, 105.66434478759766, 124.2508544921875, 142.8373565673828, 161.42385864257812, 180.0103759765625, 198.5968780517578, 217.18338012695312, 235.76988220214844, 254.35638427734375, 272.9429016113281, 291.5294189453125, 310.11590576171875, 328.7024230957031, 347.2889404296875, 365.87542724609375, 384.4619445800781, 403.0484313964844, 421.63494873046875, 440.221435546875, 458.8079528808594, 477.39447021484375, 495.98095703125, 514.5675048828125, 533.1539916992188, 551.7405395507812, 570.3270263671875, 588.9135131835938, 607.5, 626.0865478515625, 644.6730346679688, 663.259521484375]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 6.0, 1.0, 3.0, 6.0, 8.0, 15.0, 9.0, 7.0, 15.0, 15.0, 27.0, 24.0, 28.0, 29.0, 34.0, 39.0, 49.0, 39.0, 47.0, 51.0, 47.0, 55.0, 45.0, 48.0, 48.0, 35.0, 41.0, 33.0, 37.0, 29.0, 23.0, 26.0, 18.0, 16.0, 15.0, 7.0, 9.0, 7.0, 6.0, 5.0, 3.0, 1.0, 6.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-267.3556213378906, -258.1657409667969, -248.97589111328125, -239.7860107421875, -230.5961456298828, -221.40628051757812, -212.21641540527344, -203.02655029296875, -193.836669921875, -184.6468048095703, -175.45693969726562, -166.26705932617188, -157.0771942138672, -147.8873291015625, -138.6974639892578, -129.50759887695312, -120.31773376464844, -111.12786865234375, -101.93799591064453, -92.74813079833984, -83.55825805664062, -74.36839294433594, -65.17852783203125, -55.98865509033203, -46.798789978027344, -37.60892105102539, -28.41905403137207, -19.22918701171875, -10.039318084716797, -0.8494491577148438, 8.340415954589844, 17.530288696289062, 26.72015380859375, 35.9100227355957, 45.099891662597656, 54.289756774902344, 63.4796257019043, 72.66949462890625, 81.85935974121094, 91.04923248291016, 100.23909759521484, 109.42896270751953, 118.61883544921875, 127.80870056152344, 136.99856567382812, 146.18844604492188, 155.3782958984375, 164.56817626953125, 173.75804138183594, 182.94790649414062, 192.1377716064453, 201.32763671875, 210.51751708984375, 219.70738220214844, 228.89724731445312, 238.08712768554688, 247.2769775390625, 256.46685791015625, 265.6567077636719, 274.8465881347656, 284.03643798828125, 293.226318359375, 302.41619873046875, 311.6060485839844, 320.7959289550781]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 7.0, 6.0, 6.0, 6.0, 6.0, 5.0, 15.0, 12.0, 24.0, 22.0, 19.0, 32.0, 39.0, 27.0, 28.0, 28.0, 39.0, 41.0, 50.0, 55.0, 35.0, 49.0, 51.0, 45.0, 48.0, 50.0, 38.0, 29.0, 30.0, 29.0, 28.0, 24.0, 20.0, 11.0, 11.0, 12.0, 9.0, 4.0, 9.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.5, -39.2998046875, -38.099609375, -36.8994140625, -35.69921875, -34.4990234375, -33.298828125, -32.0986328125, -30.8984375, -29.6982421875, -28.498046875, -27.2978515625, -26.09765625, -24.8974609375, -23.697265625, -22.4970703125, -21.296875, -20.0966796875, -18.896484375, -17.6962890625, -16.49609375, -15.2958984375, -14.095703125, -12.8955078125, -11.6953125, -10.4951171875, -9.294921875, -8.0947265625, -6.89453125, -5.6943359375, -4.494140625, -3.2939453125, -2.09375, -0.8935546875, 0.306640625, 1.5068359375, 2.70703125, 3.9072265625, 5.107421875, 6.3076171875, 7.5078125, 8.7080078125, 9.908203125, 11.1083984375, 12.30859375, 13.5087890625, 14.708984375, 15.9091796875, 17.109375, 18.3095703125, 19.509765625, 20.7099609375, 21.91015625, 23.1103515625, 24.310546875, 25.5107421875, 26.7109375, 27.9111328125, 29.111328125, 30.3115234375, 31.51171875, 32.7119140625, 33.912109375, 35.1123046875, 36.3125]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 11.0, 7.0, 11.0, 15.0, 34.0, 42.0, 55.0, 108.0, 162.0, 223.0, 329.0, 491.0, 750.0, 1096.0, 1769.0, 2492.0, 3895.0, 6036.0, 9192.0, 14599.0, 22901.0, 36243.0, 61083.0, 112519.0, 233094.0, 249141.0, 122071.0, 64665.0, 38417.0, 23810.0, 15137.0, 9812.0, 6207.0, 4144.0, 2778.0, 1724.0, 1175.0, 749.0, 488.0, 385.0, 207.0, 151.0, 101.0, 76.0, 52.0, 32.0, 25.0, 12.0, 14.0, 10.0, 4.0, 5.0, 2.0, 4.0, 2.0, 2.0], "bins": [-1.806640625, -1.751983642578125, -1.69732666015625, -1.642669677734375, -1.5880126953125, -1.533355712890625, -1.47869873046875, -1.424041748046875, -1.369384765625, -1.314727783203125, -1.26007080078125, -1.205413818359375, -1.1507568359375, -1.096099853515625, -1.04144287109375, -0.986785888671875, -0.93212890625, -0.877471923828125, -0.82281494140625, -0.768157958984375, -0.7135009765625, -0.658843994140625, -0.60418701171875, -0.549530029296875, -0.494873046875, -0.440216064453125, -0.38555908203125, -0.330902099609375, -0.2762451171875, -0.221588134765625, -0.16693115234375, -0.112274169921875, -0.0576171875, -0.002960205078125, 0.05169677734375, 0.106353759765625, 0.1610107421875, 0.215667724609375, 0.27032470703125, 0.324981689453125, 0.379638671875, 0.434295654296875, 0.48895263671875, 0.543609619140625, 0.5982666015625, 0.652923583984375, 0.70758056640625, 0.762237548828125, 0.81689453125, 0.871551513671875, 0.92620849609375, 0.980865478515625, 1.0355224609375, 1.090179443359375, 1.14483642578125, 1.199493408203125, 1.254150390625, 1.308807373046875, 1.36346435546875, 1.418121337890625, 1.4727783203125, 1.527435302734375, 1.58209228515625, 1.636749267578125, 1.69140625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 7.0, 9.0, 19.0, 12.0, 10.0, 17.0, 13.0, 19.0, 19.0, 23.0, 29.0, 30.0, 36.0, 40.0, 45.0, 36.0, 35.0, 42.0, 42.0, 1065.0, 51.0, 28.0, 38.0, 40.0, 45.0, 41.0, 28.0, 27.0, 19.0, 24.0, 24.0, 18.0, 15.0, 14.0, 7.0, 10.0, 9.0, 6.0, 7.0, 5.0, 9.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-22.953125, -22.239501953125, -21.52587890625, -20.812255859375, -20.0986328125, -19.385009765625, -18.67138671875, -17.957763671875, -17.244140625, -16.530517578125, -15.81689453125, -15.103271484375, -14.3896484375, -13.676025390625, -12.96240234375, -12.248779296875, -11.53515625, -10.821533203125, -10.10791015625, -9.394287109375, -8.6806640625, -7.967041015625, -7.25341796875, -6.539794921875, -5.826171875, -5.112548828125, -4.39892578125, -3.685302734375, -2.9716796875, -2.258056640625, -1.54443359375, -0.830810546875, -0.1171875, 0.596435546875, 1.31005859375, 2.023681640625, 2.7373046875, 3.450927734375, 4.16455078125, 4.878173828125, 5.591796875, 6.305419921875, 7.01904296875, 7.732666015625, 8.4462890625, 9.159912109375, 9.87353515625, 10.587158203125, 11.30078125, 12.014404296875, 12.72802734375, 13.441650390625, 14.1552734375, 14.868896484375, 15.58251953125, 16.296142578125, 17.009765625, 17.723388671875, 18.43701171875, 19.150634765625, 19.8642578125, 20.577880859375, 21.29150390625, 22.005126953125, 22.71875]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 7.0, 5.0, 15.0, 9.0, 18.0, 17.0, 40.0, 53.0, 83.0, 115.0, 163.0, 273.0, 414.0, 602.0, 866.0, 1388.0, 2226.0, 3448.0, 5536.0, 8795.0, 14297.0, 24064.0, 42479.0, 78699.0, 169873.0, 1368688.0, 182401.0, 83465.0, 44085.0, 25066.0, 14966.0, 9129.0, 5701.0, 3652.0, 2296.0, 1446.0, 950.0, 612.0, 394.0, 262.0, 177.0, 127.0, 70.0, 50.0, 40.0, 25.0, 19.0, 14.0, 9.0, 5.0, 6.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0], "bins": [-1.9189453125, -1.8599700927734375, -1.800994873046875, -1.7420196533203125, -1.68304443359375, -1.6240692138671875, -1.565093994140625, -1.5061187744140625, -1.4471435546875, -1.3881683349609375, -1.329193115234375, -1.2702178955078125, -1.21124267578125, -1.1522674560546875, -1.093292236328125, -1.0343170166015625, -0.975341796875, -0.9163665771484375, -0.857391357421875, -0.7984161376953125, -0.73944091796875, -0.6804656982421875, -0.621490478515625, -0.5625152587890625, -0.5035400390625, -0.4445648193359375, -0.385589599609375, -0.3266143798828125, -0.26763916015625, -0.2086639404296875, -0.149688720703125, -0.0907135009765625, -0.03173828125, 0.0272369384765625, 0.086212158203125, 0.1451873779296875, 0.20416259765625, 0.2631378173828125, 0.322113037109375, 0.3810882568359375, 0.4400634765625, 0.4990386962890625, 0.558013916015625, 0.6169891357421875, 0.67596435546875, 0.7349395751953125, 0.793914794921875, 0.8528900146484375, 0.911865234375, 0.9708404541015625, 1.029815673828125, 1.0887908935546875, 1.14776611328125, 1.2067413330078125, 1.265716552734375, 1.3246917724609375, 1.3836669921875, 1.4426422119140625, 1.501617431640625, 1.5605926513671875, 1.61956787109375, 1.6785430908203125, 1.737518310546875, 1.7964935302734375, 1.85546875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 5.0, 2.0, 2.0, 5.0, 6.0, 8.0, 9.0, 13.0, 16.0, 20.0, 23.0, 25.0, 34.0, 49.0, 64.0, 69.0, 78.0, 79.0, 82.0, 71.0, 62.0, 53.0, 42.0, 33.0, 24.0, 19.0, 18.0, 19.0, 16.0, 13.0, 9.0, 4.0, 14.0, 5.0, 5.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0274658203125, -0.02664923667907715, -0.025832653045654297, -0.025016069412231445, -0.024199485778808594, -0.023382902145385742, -0.02256631851196289, -0.02174973487854004, -0.020933151245117188, -0.020116567611694336, -0.019299983978271484, -0.018483400344848633, -0.01766681671142578, -0.01685023307800293, -0.016033649444580078, -0.015217065811157227, -0.014400482177734375, -0.013583898544311523, -0.012767314910888672, -0.01195073127746582, -0.011134147644042969, -0.010317564010620117, -0.009500980377197266, -0.008684396743774414, -0.007867813110351562, -0.007051229476928711, -0.006234645843505859, -0.005418062210083008, -0.004601478576660156, -0.0037848949432373047, -0.002968311309814453, -0.0021517276763916016, -0.00133514404296875, -0.0005185604095458984, 0.0002980232238769531, 0.0011146068572998047, 0.0019311904907226562, 0.002747774124145508, 0.0035643577575683594, 0.004380941390991211, 0.0051975250244140625, 0.006014108657836914, 0.006830692291259766, 0.007647275924682617, 0.008463859558105469, 0.00928044319152832, 0.010097026824951172, 0.010913610458374023, 0.011730194091796875, 0.012546777725219727, 0.013363361358642578, 0.01417994499206543, 0.014996528625488281, 0.015813112258911133, 0.016629695892333984, 0.017446279525756836, 0.018262863159179688, 0.01907944679260254, 0.01989603042602539, 0.020712614059448242, 0.021529197692871094, 0.022345781326293945, 0.023162364959716797, 0.02397894859313965, 0.0247955322265625]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 4.0, 5.0, 8.0, 12.0, 5.0, 9.0, 17.0, 19.0, 37.0, 39.0, 54.0, 51.0, 77.0, 108.0, 178.0, 481.0, 2820.0, 45435.0, 952243.0, 43156.0, 2723.0, 467.0, 203.0, 116.0, 73.0, 62.0, 39.0, 28.0, 20.0, 16.0, 11.0, 10.0, 7.0, 7.0, 3.0, 5.0, 4.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.382568359375, -0.3700218200683594, -0.35747528076171875, -0.3449287414550781, -0.3323822021484375, -0.3198356628417969, -0.30728912353515625, -0.2947425842285156, -0.282196044921875, -0.2696495056152344, -0.25710296630859375, -0.24455642700195312, -0.2320098876953125, -0.21946334838867188, -0.20691680908203125, -0.19437026977539062, -0.18182373046875, -0.16927719116210938, -0.15673065185546875, -0.14418411254882812, -0.1316375732421875, -0.11909103393554688, -0.10654449462890625, -0.09399795532226562, -0.081451416015625, -0.06890487670898438, -0.05635833740234375, -0.043811798095703125, -0.0312652587890625, -0.018718719482421875, -0.00617218017578125, 0.006374359130859375, 0.0189208984375, 0.031467437744140625, 0.04401397705078125, 0.056560516357421875, 0.0691070556640625, 0.08165359497070312, 0.09420013427734375, 0.10674667358398438, 0.119293212890625, 0.13183975219726562, 0.14438629150390625, 0.15693283081054688, 0.1694793701171875, 0.18202590942382812, 0.19457244873046875, 0.20711898803710938, 0.21966552734375, 0.23221206665039062, 0.24475860595703125, 0.2573051452636719, 0.2698516845703125, 0.2823982238769531, 0.29494476318359375, 0.3074913024902344, 0.320037841796875, 0.3325843811035156, 0.34513092041015625, 0.3576774597167969, 0.3702239990234375, 0.3827705383300781, 0.39531707763671875, 0.4078636169433594, 0.42041015625]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 8.0, 46.0, 239.0, 616.0, 87.0, 13.0, 6.0], "bins": [-0.20605339109897614, -0.20263195037841797, -0.1992105096578598, -0.19578906893730164, -0.19236762821674347, -0.1889461874961853, -0.18552473187446594, -0.18210329115390778, -0.1786818504333496, -0.17526040971279144, -0.17183896899223328, -0.1684175282716751, -0.16499608755111694, -0.16157463192939758, -0.15815319120883942, -0.15473175048828125, -0.15131030976772308, -0.14788886904716492, -0.14446742832660675, -0.14104598760604858, -0.13762454688549042, -0.13420310616493225, -0.1307816505432129, -0.12736020982265472, -0.12393877655267715, -0.12051733583211899, -0.11709589511156082, -0.11367444694042206, -0.11025300621986389, -0.10683156549930573, -0.10341012477874756, -0.09998868405818939, -0.09656722843647003, -0.09314578771591187, -0.0897243469953537, -0.08630289882421494, -0.08288145810365677, -0.0794600173830986, -0.07603857666254044, -0.07261712849140167, -0.0691956952214241, -0.06577425450086594, -0.06235281005501747, -0.058931369334459305, -0.05550992488861084, -0.05208848416805267, -0.04866704344749451, -0.04524559900164604, -0.04182415455579758, -0.03840271383523941, -0.034981269389390945, -0.03155982866883278, -0.028138384222984314, -0.024716943502426147, -0.02129550091922283, -0.017874058336019516, -0.0144526157528162, -0.011031173169612885, -0.007609731052070856, -0.004188288934528828, -0.0007668463513255119, 0.002654595300555229, 0.006076037883758545, 0.00949748046696186, 0.012918923050165176]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 4.0, 4.0, 8.0, 9.0, 10.0, 14.0, 13.0, 18.0, 20.0, 25.0, 20.0, 23.0, 26.0, 29.0, 34.0, 36.0, 47.0, 46.0, 54.0, 38.0, 54.0, 48.0, 31.0, 35.0, 37.0, 31.0, 39.0, 38.0, 27.0, 36.0, 26.0, 24.0, 25.0, 18.0, 11.0, 16.0, 12.0, 6.0, 5.0, 4.0, 0.0, 4.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.014981687068939209, -0.014536437578499317, -0.014091188088059425, -0.013645938597619534, -0.013200689107179642, -0.01275543961673975, -0.012310190126299858, -0.011864940635859966, -0.011419691145420074, -0.010974441654980183, -0.01052919216454029, -0.010083942674100399, -0.009638693183660507, -0.009193443693220615, -0.008748194202780724, -0.008302944712340832, -0.00785769522190094, -0.007412445731461048, -0.006967196241021156, -0.0065219467505812645, -0.006076697260141373, -0.005631447769701481, -0.005186198279261589, -0.004740948788821697, -0.004295699298381805, -0.0038504498079419136, -0.003405200317502022, -0.00295995082706213, -0.002514701336622238, -0.0020694518461823463, -0.0016242023557424545, -0.0011789528653025627, -0.0007337033748626709, -0.0002884538844227791, 0.00015679560601711273, 0.0006020450964570045, 0.0010472945868968964, 0.0014925440773367882, 0.00193779356777668, 0.002383043058216572, 0.0028282925486564636, 0.0032735420390963554, 0.0037187915295362473, 0.004164041019976139, 0.004609290510416031, 0.005054540000855923, 0.0054997894912958145, 0.005945038981735706, 0.006390288472175598, 0.00683553796261549, 0.007280787453055382, 0.007726036943495274, 0.008171286433935165, 0.008616535924375057, 0.009061785414814949, 0.00950703490525484, 0.009952284395694733, 0.010397533886134624, 0.010842783376574516, 0.011288032867014408, 0.0117332823574543, 0.012178531847894192, 0.012623781338334084, 0.013069030828773975, 0.013514280319213867]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 7.0, 6.0, 6.0, 6.0, 6.0, 5.0, 15.0, 12.0, 24.0, 22.0, 19.0, 32.0, 39.0, 27.0, 28.0, 28.0, 39.0, 41.0, 50.0, 55.0, 35.0, 49.0, 51.0, 45.0, 48.0, 50.0, 38.0, 29.0, 30.0, 29.0, 28.0, 24.0, 20.0, 11.0, 11.0, 12.0, 9.0, 4.0, 9.0, 5.0, 2.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.5, -39.2998046875, -38.099609375, -36.8994140625, -35.69921875, -34.4990234375, -33.298828125, -32.0986328125, -30.8984375, -29.6982421875, -28.498046875, -27.2978515625, -26.09765625, -24.8974609375, -23.697265625, -22.4970703125, -21.296875, -20.0966796875, -18.896484375, -17.6962890625, -16.49609375, -15.2958984375, -14.095703125, -12.8955078125, -11.6953125, -10.4951171875, -9.294921875, -8.0947265625, -6.89453125, -5.6943359375, -4.494140625, -3.2939453125, -2.09375, -0.8935546875, 0.306640625, 1.5068359375, 2.70703125, 3.9072265625, 5.107421875, 6.3076171875, 7.5078125, 8.7080078125, 9.908203125, 11.1083984375, 12.30859375, 13.5087890625, 14.708984375, 15.9091796875, 17.109375, 18.3095703125, 19.509765625, 20.7099609375, 21.91015625, 23.1103515625, 24.310546875, 25.5107421875, 26.7109375, 27.9111328125, 29.111328125, 30.3115234375, 31.51171875, 32.7119140625, 33.912109375, 35.1123046875, 36.3125]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 4.0, 3.0, 6.0, 7.0, 9.0, 12.0, 10.0, 23.0, 35.0, 45.0, 63.0, 106.0, 119.0, 192.0, 235.0, 413.0, 573.0, 778.0, 1129.0, 1648.0, 2573.0, 4523.0, 10602.0, 96319.0, 859261.0, 50058.0, 8480.0, 3936.0, 2405.0, 1548.0, 1062.0, 725.0, 494.0, 333.0, 244.0, 186.0, 107.0, 92.0, 66.0, 39.0, 24.0, 24.0, 24.0, 13.0, 6.0, 5.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-55.375, -53.76513671875, -52.1552734375, -50.54541015625, -48.935546875, -47.32568359375, -45.7158203125, -44.10595703125, -42.49609375, -40.88623046875, -39.2763671875, -37.66650390625, -36.056640625, -34.44677734375, -32.8369140625, -31.22705078125, -29.6171875, -28.00732421875, -26.3974609375, -24.78759765625, -23.177734375, -21.56787109375, -19.9580078125, -18.34814453125, -16.73828125, -15.12841796875, -13.5185546875, -11.90869140625, -10.298828125, -8.68896484375, -7.0791015625, -5.46923828125, -3.859375, -2.24951171875, -0.6396484375, 0.97021484375, 2.580078125, 4.18994140625, 5.7998046875, 7.40966796875, 9.01953125, 10.62939453125, 12.2392578125, 13.84912109375, 15.458984375, 17.06884765625, 18.6787109375, 20.28857421875, 21.8984375, 23.50830078125, 25.1181640625, 26.72802734375, 28.337890625, 29.94775390625, 31.5576171875, 33.16748046875, 34.77734375, 36.38720703125, 37.9970703125, 39.60693359375, 41.216796875, 42.82666015625, 44.4365234375, 46.04638671875, 47.65625]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 8.0, 7.0, 16.0, 10.0, 15.0, 19.0, 21.0, 30.0, 26.0, 32.0, 34.0, 48.0, 40.0, 41.0, 33.0, 51.0, 61.0, 171.0, 1823.0, 114.0, 54.0, 44.0, 36.0, 39.0, 29.0, 38.0, 33.0, 16.0, 27.0, 18.0, 14.0, 12.0, 11.0, 16.0, 9.0, 12.0, 11.0, 8.0, 5.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-104.9375, -101.4912109375, -98.044921875, -94.5986328125, -91.15234375, -87.7060546875, -84.259765625, -80.8134765625, -77.3671875, -73.9208984375, -70.474609375, -67.0283203125, -63.58203125, -60.1357421875, -56.689453125, -53.2431640625, -49.796875, -46.3505859375, -42.904296875, -39.4580078125, -36.01171875, -32.5654296875, -29.119140625, -25.6728515625, -22.2265625, -18.7802734375, -15.333984375, -11.8876953125, -8.44140625, -4.9951171875, -1.548828125, 1.8974609375, 5.34375, 8.7900390625, 12.236328125, 15.6826171875, 19.12890625, 22.5751953125, 26.021484375, 29.4677734375, 32.9140625, 36.3603515625, 39.806640625, 43.2529296875, 46.69921875, 50.1455078125, 53.591796875, 57.0380859375, 60.484375, 63.9306640625, 67.376953125, 70.8232421875, 74.26953125, 77.7158203125, 81.162109375, 84.6083984375, 88.0546875, 91.5009765625, 94.947265625, 98.3935546875, 101.83984375, 105.2861328125, 108.732421875, 112.1787109375, 115.625]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 2.0, 3.0, 6.0, 8.0, 12.0, 28.0, 52.0, 73.0, 276.0, 1141.0, 11141.0, 3123284.0, 8209.0, 1016.0, 251.0, 88.0, 49.0, 21.0, 17.0, 7.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-542.5, -522.734375, -502.96875, -483.203125, -463.4375, -443.671875, -423.90625, -404.140625, -384.375, -364.609375, -344.84375, -325.078125, -305.3125, -285.546875, -265.78125, -246.015625, -226.25, -206.484375, -186.71875, -166.953125, -147.1875, -127.421875, -107.65625, -87.890625, -68.125, -48.359375, -28.59375, -8.828125, 10.9375, 30.703125, 50.46875, 70.234375, 90.0, 109.765625, 129.53125, 149.296875, 169.0625, 188.828125, 208.59375, 228.359375, 248.125, 267.890625, 287.65625, 307.421875, 327.1875, 346.953125, 366.71875, 386.484375, 406.25, 426.015625, 445.78125, 465.546875, 485.3125, 505.078125, 524.84375, 544.609375, 564.375, 584.140625, 603.90625, 623.671875, 643.4375, 663.203125, 682.96875, 702.734375, 722.5]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 20.0, 877.0, 123.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-417.8130187988281, -363.2269592285156, -308.64093017578125, -254.05487060546875, -199.4688262939453, -144.88278198242188, -90.29672241210938, -35.710693359375, 18.8753662109375, 73.46141052246094, 128.04745483398438, 182.63351440429688, 237.2195587158203, 291.80560302734375, 346.39166259765625, 400.9776916503906, 455.5637512207031, 510.1498107910156, 564.73583984375, 619.3218994140625, 673.907958984375, 728.4940185546875, 783.080078125, 837.6660766601562, 892.2521362304688, 946.8381958007812, 1001.4242553710938, 1056.01025390625, 1110.5963134765625, 1165.182373046875, 1219.7684326171875, 1274.3544921875, 1328.9405517578125, 1383.526611328125, 1438.1126708984375, 1492.69873046875, 1547.2847900390625, 1601.870849609375, 1656.456787109375, 1711.0428466796875, 1765.62890625, 1820.2149658203125, 1874.801025390625, 1929.3870849609375, 1983.97314453125, 2038.55908203125, 2093.145263671875, 2147.731201171875, 2202.3173828125, 2256.9033203125, 2311.489501953125, 2366.075439453125, 2420.66162109375, 2475.24755859375, 2529.833740234375, 2584.419677734375, 2639.005615234375, 2693.591552734375, 2748.177734375, 2802.763671875, 2857.349853515625, 2911.935791015625, 2966.52197265625, 3021.10791015625, 3075.694091796875]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 5.0, 5.0, 6.0, 5.0, 14.0, 9.0, 12.0, 18.0, 18.0, 18.0, 21.0, 14.0, 27.0, 19.0, 31.0, 31.0, 28.0, 29.0, 39.0, 34.0, 41.0, 43.0, 49.0, 37.0, 37.0, 52.0, 44.0, 43.0, 35.0, 31.0, 24.0, 24.0, 22.0, 15.0, 21.0, 20.0, 14.0, 9.0, 15.0, 7.0, 12.0, 8.0, 9.0, 9.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0], "bins": [-350.0292663574219, -340.3514099121094, -330.67352294921875, -320.99566650390625, -311.31781005859375, -301.6399230957031, -291.9620666503906, -282.2841796875, -272.6063232421875, -262.928466796875, -253.25057983398438, -243.57272338867188, -233.8948516845703, -224.21697998046875, -214.53912353515625, -204.8612518310547, -195.18338012695312, -185.50550842285156, -175.82763671875, -166.1497802734375, -156.47190856933594, -146.79403686523438, -137.11618041992188, -127.43830871582031, -117.76043701171875, -108.08256530761719, -98.40470123291016, -88.72683715820312, -79.04896545410156, -69.37109375, -59.69322967529297, -50.01536560058594, -40.33746337890625, -30.659595489501953, -20.981727600097656, -11.30385971069336, -1.6259918212890625, 8.051876068115234, 17.72974395751953, 27.407608032226562, 37.085479736328125, 46.76334762573242, 56.44121551513672, 66.11907958984375, 75.79695129394531, 85.47482299804688, 95.1526870727539, 104.83055114746094, 114.5084228515625, 124.18629455566406, 133.86416625976562, 143.54202270507812, 153.2198944091797, 162.89776611328125, 172.57562255859375, 182.2534942626953, 191.93136596679688, 201.60923767089844, 211.287109375, 220.9649658203125, 230.64283752441406, 240.32070922851562, 249.99856567382812, 259.67645263671875, 269.35430908203125]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 7.0, 2.0, 3.0, 11.0, 6.0, 11.0, 14.0, 20.0, 19.0, 24.0, 18.0, 33.0, 35.0, 23.0, 32.0, 29.0, 44.0, 41.0, 48.0, 52.0, 51.0, 38.0, 49.0, 51.0, 43.0, 48.0, 32.0, 38.0, 29.0, 27.0, 17.0, 28.0, 18.0, 11.0, 11.0, 10.0, 10.0, 7.0, 5.0, 4.0, 1.0, 7.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.3125, -40.07666015625, -38.8408203125, -37.60498046875, -36.369140625, -35.13330078125, -33.8974609375, -32.66162109375, -31.42578125, -30.18994140625, -28.9541015625, -27.71826171875, -26.482421875, -25.24658203125, -24.0107421875, -22.77490234375, -21.5390625, -20.30322265625, -19.0673828125, -17.83154296875, -16.595703125, -15.35986328125, -14.1240234375, -12.88818359375, -11.65234375, -10.41650390625, -9.1806640625, -7.94482421875, -6.708984375, -5.47314453125, -4.2373046875, -3.00146484375, -1.765625, -0.52978515625, 0.7060546875, 1.94189453125, 3.177734375, 4.41357421875, 5.6494140625, 6.88525390625, 8.12109375, 9.35693359375, 10.5927734375, 11.82861328125, 13.064453125, 14.30029296875, 15.5361328125, 16.77197265625, 18.0078125, 19.24365234375, 20.4794921875, 21.71533203125, 22.951171875, 24.18701171875, 25.4228515625, 26.65869140625, 27.89453125, 29.13037109375, 30.3662109375, 31.60205078125, 32.837890625, 34.07373046875, 35.3095703125, 36.54541015625, 37.78125]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 3.0, 3.0, 3.0, 6.0, 7.0, 8.0, 16.0, 14.0, 20.0, 29.0, 33.0, 54.0, 86.0, 97.0, 168.0, 211.0, 292.0, 432.0, 694.0, 1043.0, 1636.0, 2586.0, 4173.0, 7133.0, 14357.0, 91166.0, 1513143.0, 2366608.0, 150959.0, 19254.0, 7899.0, 4458.0, 2733.0, 1743.0, 1069.0, 720.0, 453.0, 301.0, 203.0, 144.0, 92.0, 81.0, 42.0, 44.0, 14.0, 21.0, 13.0, 4.0, 11.0, 4.0, 4.0, 2.0, 4.0, 0.0, 4.0], "bins": [-95.1875, -92.5498046875, -89.912109375, -87.2744140625, -84.63671875, -81.9990234375, -79.361328125, -76.7236328125, -74.0859375, -71.4482421875, -68.810546875, -66.1728515625, -63.53515625, -60.8974609375, -58.259765625, -55.6220703125, -52.984375, -50.3466796875, -47.708984375, -45.0712890625, -42.43359375, -39.7958984375, -37.158203125, -34.5205078125, -31.8828125, -29.2451171875, -26.607421875, -23.9697265625, -21.33203125, -18.6943359375, -16.056640625, -13.4189453125, -10.78125, -8.1435546875, -5.505859375, -2.8681640625, -0.23046875, 2.4072265625, 5.044921875, 7.6826171875, 10.3203125, 12.9580078125, 15.595703125, 18.2333984375, 20.87109375, 23.5087890625, 26.146484375, 28.7841796875, 31.421875, 34.0595703125, 36.697265625, 39.3349609375, 41.97265625, 44.6103515625, 47.248046875, 49.8857421875, 52.5234375, 55.1611328125, 57.798828125, 60.4365234375, 63.07421875, 65.7119140625, 68.349609375, 70.9873046875, 73.625]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 4.0, 5.0, 4.0, 6.0, 11.0, 12.0, 7.0, 19.0, 15.0, 24.0, 55.0, 70.0, 155.0, 380.0, 951.0, 1246.0, 589.0, 221.0, 108.0, 50.0, 34.0, 21.0, 23.0, 16.0, 4.0, 12.0, 10.0, 6.0, 5.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.625, -116.884765625, -113.14453125, -109.404296875, -105.6640625, -101.923828125, -98.18359375, -94.443359375, -90.703125, -86.962890625, -83.22265625, -79.482421875, -75.7421875, -72.001953125, -68.26171875, -64.521484375, -60.78125, -57.041015625, -53.30078125, -49.560546875, -45.8203125, -42.080078125, -38.33984375, -34.599609375, -30.859375, -27.119140625, -23.37890625, -19.638671875, -15.8984375, -12.158203125, -8.41796875, -4.677734375, -0.9375, 2.802734375, 6.54296875, 10.283203125, 14.0234375, 17.763671875, 21.50390625, 25.244140625, 28.984375, 32.724609375, 36.46484375, 40.205078125, 43.9453125, 47.685546875, 51.42578125, 55.166015625, 58.90625, 62.646484375, 66.38671875, 70.126953125, 73.8671875, 77.607421875, 81.34765625, 85.087890625, 88.828125, 92.568359375, 96.30859375, 100.048828125, 103.7890625, 107.529296875, 111.26953125, 115.009765625, 118.75]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 4.0, 11.0, 12.0, 18.0, 38.0, 39.0, 60.0, 111.0, 157.0, 241.0, 329.0, 584.0, 961.0, 1704.0, 3098.0, 5944.0, 12259.0, 31327.0, 136399.0, 3565833.0, 357119.0, 45715.0, 16172.0, 7416.0, 3700.0, 1996.0, 1103.0, 686.0, 438.0, 255.0, 183.0, 116.0, 95.0, 48.0, 33.0, 26.0, 21.0, 13.0, 10.0, 4.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-160.25, -155.34375, -150.4375, -145.53125, -140.625, -135.71875, -130.8125, -125.90625, -121.0, -116.09375, -111.1875, -106.28125, -101.375, -96.46875, -91.5625, -86.65625, -81.75, -76.84375, -71.9375, -67.03125, -62.125, -57.21875, -52.3125, -47.40625, -42.5, -37.59375, -32.6875, -27.78125, -22.875, -17.96875, -13.0625, -8.15625, -3.25, 1.65625, 6.5625, 11.46875, 16.375, 21.28125, 26.1875, 31.09375, 36.0, 40.90625, 45.8125, 50.71875, 55.625, 60.53125, 65.4375, 70.34375, 75.25, 80.15625, 85.0625, 89.96875, 94.875, 99.78125, 104.6875, 109.59375, 114.5, 119.40625, 124.3125, 129.21875, 134.125, 139.03125, 143.9375, 148.84375, 153.75]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 7.0, 14.0, 25.0, 48.0, 84.0, 169.0, 268.0, 190.0, 79.0, 47.0, 41.0, 15.0, 11.0, 6.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-299.8214111328125, -276.0696716308594, -252.31793212890625, -228.56619262695312, -204.814453125, -181.06271362304688, -157.3109588623047, -133.55921936035156, -109.80747985839844, -86.05574035644531, -62.30399703979492, -38.55225372314453, -14.800514221191406, 8.951225280761719, 32.702972412109375, 56.4547119140625, 80.20645141601562, 103.95819091796875, 127.70993041992188, 151.461669921875, 175.21340942382812, 198.96514892578125, 222.71690368652344, 246.46864318847656, 270.22039794921875, 293.9721374511719, 317.723876953125, 341.4756164550781, 365.22735595703125, 388.9790954589844, 412.7308349609375, 436.48260498046875, 460.23431396484375, 483.9860534667969, 507.73779296875, 531.4895629882812, 555.2412719726562, 578.9930419921875, 602.7447509765625, 626.4965209960938, 650.2482299804688, 674.0, 697.751708984375, 721.5034790039062, 745.2551879882812, 769.0069580078125, 792.7586669921875, 816.5104370117188, 840.26220703125, 864.0139770507812, 887.7656860351562, 911.5174560546875, 935.2691650390625, 959.0209350585938, 982.7726440429688, 1006.5244140625, 1030.276123046875, 1054.02783203125, 1077.7796630859375, 1101.5313720703125, 1125.2830810546875, 1149.0347900390625, 1172.78662109375, 1196.538330078125, 1220.2900390625]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0, 4.0, 4.0, 12.0, 9.0, 12.0, 13.0, 11.0, 14.0, 15.0, 15.0, 20.0, 26.0, 17.0, 34.0, 30.0, 35.0, 31.0, 36.0, 31.0, 39.0, 30.0, 40.0, 38.0, 39.0, 40.0, 42.0, 36.0, 37.0, 32.0, 31.0, 32.0, 19.0, 20.0, 21.0, 18.0, 21.0, 17.0, 18.0, 10.0, 19.0, 6.0, 8.0, 4.0, 5.0, 6.0, 6.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-227.4165802001953, -220.11477661132812, -212.81298828125, -205.5111846923828, -198.20938110351562, -190.90757751464844, -183.60577392578125, -176.30398559570312, -169.00218200683594, -161.70037841796875, -154.39859008789062, -147.09678649902344, -139.79498291015625, -132.49317932128906, -125.1913833618164, -117.88958740234375, -110.58778381347656, -103.28598022460938, -95.98418426513672, -88.68238830566406, -81.38058471679688, -74.07878112792969, -66.77698516845703, -59.47518539428711, -52.17338562011719, -44.871585845947266, -37.569786071777344, -30.267986297607422, -22.9661865234375, -15.664386749267578, -8.362586975097656, -1.0607872009277344, 6.24102783203125, 13.542827606201172, 20.844627380371094, 28.146427154541016, 35.44822692871094, 42.75002670288086, 50.05182647705078, 57.3536262512207, 64.65542602539062, 71.95722961425781, 79.25902557373047, 86.56082153320312, 93.86262512207031, 101.1644287109375, 108.46622467041016, 115.76802062988281, 123.06982421875, 130.3716278076172, 137.67343139648438, 144.9752197265625, 152.2770233154297, 159.57882690429688, 166.880615234375, 174.1824188232422, 181.48422241210938, 188.78602600097656, 196.08782958984375, 203.38961791992188, 210.69142150878906, 217.99322509765625, 225.29501342773438, 232.59681701660156, 239.89862060546875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 6.0, 4.0, 9.0, 6.0, 7.0, 8.0, 9.0, 16.0, 16.0, 15.0, 15.0, 13.0, 25.0, 27.0, 29.0, 26.0, 45.0, 35.0, 30.0, 37.0, 39.0, 31.0, 43.0, 37.0, 55.0, 46.0, 43.0, 33.0, 46.0, 28.0, 31.0, 23.0, 26.0, 27.0, 27.0, 17.0, 11.0, 15.0, 14.0, 7.0, 7.0, 5.0, 5.0, 5.0, 2.0, 0.0, 3.0, 4.0, 1.0, 1.0, 2.0], "bins": [-40.21875, -39.09228515625, -37.9658203125, -36.83935546875, -35.712890625, -34.58642578125, -33.4599609375, -32.33349609375, -31.20703125, -30.08056640625, -28.9541015625, -27.82763671875, -26.701171875, -25.57470703125, -24.4482421875, -23.32177734375, -22.1953125, -21.06884765625, -19.9423828125, -18.81591796875, -17.689453125, -16.56298828125, -15.4365234375, -14.31005859375, -13.18359375, -12.05712890625, -10.9306640625, -9.80419921875, -8.677734375, -7.55126953125, -6.4248046875, -5.29833984375, -4.171875, -3.04541015625, -1.9189453125, -0.79248046875, 0.333984375, 1.46044921875, 2.5869140625, 3.71337890625, 4.83984375, 5.96630859375, 7.0927734375, 8.21923828125, 9.345703125, 10.47216796875, 11.5986328125, 12.72509765625, 13.8515625, 14.97802734375, 16.1044921875, 17.23095703125, 18.357421875, 19.48388671875, 20.6103515625, 21.73681640625, 22.86328125, 23.98974609375, 25.1162109375, 26.24267578125, 27.369140625, 28.49560546875, 29.6220703125, 30.74853515625, 31.875]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 6.0, 1.0, 16.0, 8.0, 10.0, 22.0, 34.0, 68.0, 101.0, 144.0, 214.0, 336.0, 450.0, 692.0, 1015.0, 1506.0, 2404.0, 3497.0, 5333.0, 8157.0, 12610.0, 19990.0, 32515.0, 54584.0, 97890.0, 189910.0, 266712.0, 150307.0, 78757.0, 45322.0, 27566.0, 16976.0, 10794.0, 7053.0, 4587.0, 2944.0, 2002.0, 1311.0, 866.0, 622.0, 395.0, 272.0, 163.0, 130.0, 100.0, 50.0, 41.0, 21.0, 22.0, 15.0, 9.0, 9.0, 4.0, 5.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.5849609375, -1.5290069580078125, -1.473052978515625, -1.4170989990234375, -1.36114501953125, -1.3051910400390625, -1.249237060546875, -1.1932830810546875, -1.1373291015625, -1.0813751220703125, -1.025421142578125, -0.9694671630859375, -0.91351318359375, -0.8575592041015625, -0.801605224609375, -0.7456512451171875, -0.689697265625, -0.6337432861328125, -0.577789306640625, -0.5218353271484375, -0.46588134765625, -0.4099273681640625, -0.353973388671875, -0.2980194091796875, -0.2420654296875, -0.1861114501953125, -0.130157470703125, -0.0742034912109375, -0.01824951171875, 0.0377044677734375, 0.093658447265625, 0.1496124267578125, 0.20556640625, 0.2615203857421875, 0.317474365234375, 0.3734283447265625, 0.42938232421875, 0.4853363037109375, 0.541290283203125, 0.5972442626953125, 0.6531982421875, 0.7091522216796875, 0.765106201171875, 0.8210601806640625, 0.87701416015625, 0.9329681396484375, 0.988922119140625, 1.0448760986328125, 1.100830078125, 1.1567840576171875, 1.212738037109375, 1.2686920166015625, 1.32464599609375, 1.3805999755859375, 1.436553955078125, 1.4925079345703125, 1.5484619140625, 1.6044158935546875, 1.660369873046875, 1.7163238525390625, 1.77227783203125, 1.8282318115234375, 1.884185791015625, 1.9401397705078125, 1.99609375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 3.0, 4.0, 6.0, 4.0, 8.0, 9.0, 10.0, 13.0, 9.0, 13.0, 18.0, 16.0, 30.0, 23.0, 24.0, 32.0, 35.0, 25.0, 31.0, 40.0, 33.0, 41.0, 33.0, 1061.0, 51.0, 41.0, 34.0, 45.0, 38.0, 36.0, 32.0, 29.0, 24.0, 17.0, 20.0, 24.0, 16.0, 15.0, 12.0, 14.0, 7.0, 12.0, 6.0, 6.0, 7.0, 3.0, 6.0, 2.0, 3.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0], "bins": [-20.671875, -19.974365234375, -19.27685546875, -18.579345703125, -17.8818359375, -17.184326171875, -16.48681640625, -15.789306640625, -15.091796875, -14.394287109375, -13.69677734375, -12.999267578125, -12.3017578125, -11.604248046875, -10.90673828125, -10.209228515625, -9.51171875, -8.814208984375, -8.11669921875, -7.419189453125, -6.7216796875, -6.024169921875, -5.32666015625, -4.629150390625, -3.931640625, -3.234130859375, -2.53662109375, -1.839111328125, -1.1416015625, -0.444091796875, 0.25341796875, 0.950927734375, 1.6484375, 2.345947265625, 3.04345703125, 3.740966796875, 4.4384765625, 5.135986328125, 5.83349609375, 6.531005859375, 7.228515625, 7.926025390625, 8.62353515625, 9.321044921875, 10.0185546875, 10.716064453125, 11.41357421875, 12.111083984375, 12.80859375, 13.506103515625, 14.20361328125, 14.901123046875, 15.5986328125, 16.296142578125, 16.99365234375, 17.691162109375, 18.388671875, 19.086181640625, 19.78369140625, 20.481201171875, 21.1787109375, 21.876220703125, 22.57373046875, 23.271240234375, 23.96875]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 14.0, 9.0, 14.0, 13.0, 35.0, 50.0, 80.0, 105.0, 179.0, 223.0, 328.0, 519.0, 713.0, 1101.0, 1704.0, 2592.0, 3885.0, 6018.0, 9601.0, 15128.0, 25070.0, 43006.0, 80086.0, 172986.0, 1365798.0, 175979.0, 80386.0, 43733.0, 25210.0, 15388.0, 9541.0, 6082.0, 3873.0, 2546.0, 1701.0, 1112.0, 745.0, 476.0, 343.0, 222.0, 164.0, 121.0, 86.0, 52.0, 34.0, 23.0, 17.0, 14.0, 10.0, 8.0, 3.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-1.818359375, -1.758758544921875, -1.69915771484375, -1.639556884765625, -1.5799560546875, -1.520355224609375, -1.46075439453125, -1.401153564453125, -1.341552734375, -1.281951904296875, -1.22235107421875, -1.162750244140625, -1.1031494140625, -1.043548583984375, -0.98394775390625, -0.924346923828125, -0.86474609375, -0.805145263671875, -0.74554443359375, -0.685943603515625, -0.6263427734375, -0.566741943359375, -0.50714111328125, -0.447540283203125, -0.387939453125, -0.328338623046875, -0.26873779296875, -0.209136962890625, -0.1495361328125, -0.089935302734375, -0.03033447265625, 0.029266357421875, 0.0888671875, 0.148468017578125, 0.20806884765625, 0.267669677734375, 0.3272705078125, 0.386871337890625, 0.44647216796875, 0.506072998046875, 0.565673828125, 0.625274658203125, 0.68487548828125, 0.744476318359375, 0.8040771484375, 0.863677978515625, 0.92327880859375, 0.982879638671875, 1.04248046875, 1.102081298828125, 1.16168212890625, 1.221282958984375, 1.2808837890625, 1.340484619140625, 1.40008544921875, 1.459686279296875, 1.519287109375, 1.578887939453125, 1.63848876953125, 1.698089599609375, 1.7576904296875, 1.817291259765625, 1.87689208984375, 1.936492919921875, 1.99609375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 7.0, 5.0, 10.0, 12.0, 10.0, 9.0, 10.0, 24.0, 24.0, 35.0, 41.0, 49.0, 46.0, 50.0, 83.0, 76.0, 91.0, 66.0, 71.0, 53.0, 37.0, 44.0, 31.0, 22.0, 17.0, 12.0, 9.0, 8.0, 8.0, 5.0, 8.0, 5.0, 4.0, 4.0, 6.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0217742919921875, -0.021062612533569336, -0.020350933074951172, -0.019639253616333008, -0.018927574157714844, -0.01821589469909668, -0.017504215240478516, -0.01679253578186035, -0.016080856323242188, -0.015369176864624023, -0.01465749740600586, -0.013945817947387695, -0.013234138488769531, -0.012522459030151367, -0.011810779571533203, -0.011099100112915039, -0.010387420654296875, -0.009675741195678711, -0.008964061737060547, -0.008252382278442383, -0.007540702819824219, -0.006829023361206055, -0.006117343902587891, -0.0054056644439697266, -0.0046939849853515625, -0.0039823055267333984, -0.0032706260681152344, -0.0025589466094970703, -0.0018472671508789062, -0.0011355876922607422, -0.0004239082336425781, 0.00028777122497558594, 0.00099945068359375, 0.001711130142211914, 0.002422809600830078, 0.003134489059448242, 0.0038461685180664062, 0.00455784797668457, 0.005269527435302734, 0.0059812068939208984, 0.0066928863525390625, 0.0074045658111572266, 0.00811624526977539, 0.008827924728393555, 0.009539604187011719, 0.010251283645629883, 0.010962963104248047, 0.011674642562866211, 0.012386322021484375, 0.013098001480102539, 0.013809680938720703, 0.014521360397338867, 0.015233039855957031, 0.015944719314575195, 0.01665639877319336, 0.017368078231811523, 0.018079757690429688, 0.01879143714904785, 0.019503116607666016, 0.02021479606628418, 0.020926475524902344, 0.021638154983520508, 0.022349834442138672, 0.023061513900756836, 0.023773193359375]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 5.0, 4.0, 9.0, 9.0, 7.0, 14.0, 11.0, 14.0, 19.0, 33.0, 37.0, 51.0, 65.0, 99.0, 172.0, 516.0, 3303.0, 59391.0, 953349.0, 28517.0, 2069.0, 358.0, 156.0, 90.0, 75.0, 50.0, 28.0, 24.0, 16.0, 16.0, 10.0, 11.0, 4.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.39697265625, -0.3850860595703125, -0.373199462890625, -0.3613128662109375, -0.34942626953125, -0.3375396728515625, -0.325653076171875, -0.3137664794921875, -0.3018798828125, -0.2899932861328125, -0.278106689453125, -0.2662200927734375, -0.25433349609375, -0.2424468994140625, -0.230560302734375, -0.2186737060546875, -0.206787109375, -0.1949005126953125, -0.183013916015625, -0.1711273193359375, -0.15924072265625, -0.1473541259765625, -0.135467529296875, -0.1235809326171875, -0.1116943359375, -0.0998077392578125, -0.087921142578125, -0.0760345458984375, -0.06414794921875, -0.0522613525390625, -0.040374755859375, -0.0284881591796875, -0.0166015625, -0.0047149658203125, 0.007171630859375, 0.0190582275390625, 0.03094482421875, 0.0428314208984375, 0.054718017578125, 0.0666046142578125, 0.0784912109375, 0.0903778076171875, 0.102264404296875, 0.1141510009765625, 0.12603759765625, 0.1379241943359375, 0.149810791015625, 0.1616973876953125, 0.173583984375, 0.1854705810546875, 0.197357177734375, 0.2092437744140625, 0.22113037109375, 0.2330169677734375, 0.244903564453125, 0.2567901611328125, 0.2686767578125, 0.2805633544921875, 0.292449951171875, 0.3043365478515625, 0.31622314453125, 0.3281097412109375, 0.339996337890625, 0.3518829345703125, 0.36376953125]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 47.0, 608.0, 317.0, 40.0, 7.0], "bins": [-0.2597496807575226, -0.2554858922958374, -0.2512221038341522, -0.24695831537246704, -0.24269452691078186, -0.23843073844909668, -0.2341669499874115, -0.22990316152572632, -0.22563937306404114, -0.22137558460235596, -0.21711179614067078, -0.2128480076789856, -0.20858421921730042, -0.20432043075561523, -0.20005664229393005, -0.19579285383224487, -0.1915290653705597, -0.1872652769088745, -0.18300148844718933, -0.17873769998550415, -0.17447391152381897, -0.1702101230621338, -0.1659463346004486, -0.16168254613876343, -0.15741875767707825, -0.15315496921539307, -0.14889118075370789, -0.1446273922920227, -0.14036360383033752, -0.13609981536865234, -0.13183602690696716, -0.12757223844528198, -0.1233084499835968, -0.11904466152191162, -0.11478087306022644, -0.11051708459854126, -0.10625329613685608, -0.1019895076751709, -0.09772571921348572, -0.09346193075180054, -0.08919814229011536, -0.08493435382843018, -0.080670565366745, -0.07640677690505981, -0.07214298844337463, -0.06787919998168945, -0.06361541152000427, -0.05935162305831909, -0.05508783459663391, -0.05082404613494873, -0.04656025767326355, -0.04229646921157837, -0.03803268074989319, -0.03376889228820801, -0.029505103826522827, -0.025241315364837646, -0.020977528765797615, -0.016713740304112434, -0.012449951842427254, -0.008186163380742073, -0.003922374919056892, 0.00034141354262828827, 0.004605202004313469, 0.00886899046599865, 0.01313277892768383]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 0.0, 6.0, 6.0, 7.0, 11.0, 10.0, 17.0, 21.0, 22.0, 28.0, 26.0, 30.0, 31.0, 29.0, 35.0, 40.0, 45.0, 58.0, 45.0, 44.0, 51.0, 36.0, 59.0, 46.0, 38.0, 32.0, 38.0, 24.0, 27.0, 24.0, 17.0, 19.0, 20.0, 12.0, 9.0, 8.0, 7.0, 5.0, 6.0, 5.0, 3.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.011905908584594727, -0.01147399190813303, -0.011042075231671333, -0.010610158555209637, -0.01017824187874794, -0.009746325202286243, -0.009314408525824547, -0.00888249184936285, -0.008450575172901154, -0.008018658496439457, -0.00758674181997776, -0.007154825143516064, -0.006722908467054367, -0.0062909917905926704, -0.005859075114130974, -0.005427158437669277, -0.004995241761207581, -0.004563325084745884, -0.004131408408284187, -0.0036994917318224907, -0.003267575055360794, -0.0028356583788990974, -0.002403741702437401, -0.001971825025975704, -0.0015399083495140076, -0.001107991673052311, -0.0006760749965906143, -0.0002441583201289177, 0.00018775835633277893, 0.0006196750327944756, 0.0010515917092561722, 0.0014835083857178688, 0.0019154250621795654, 0.002347341738641262, 0.0027792584151029587, 0.0032111750915646553, 0.003643091768026352, 0.0040750084444880486, 0.004506925120949745, 0.004938841797411442, 0.005370758473873138, 0.005802675150334835, 0.006234591826796532, 0.006666508503258228, 0.007098425179719925, 0.0075303418561816216, 0.007962258532643318, 0.008394175209105015, 0.008826091885566711, 0.009258008562028408, 0.009689925238490105, 0.010121841914951801, 0.010553758591413498, 0.010985675267875195, 0.011417591944336891, 0.011849508620798588, 0.012281425297260284, 0.012713341973721981, 0.013145258650183678, 0.013577175326645374, 0.014009092003107071, 0.014441008679568768, 0.014872925356030464, 0.01530484203249216, 0.015736758708953857]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 6.0, 4.0, 9.0, 6.0, 7.0, 8.0, 9.0, 16.0, 16.0, 15.0, 15.0, 13.0, 25.0, 26.0, 30.0, 26.0, 45.0, 35.0, 30.0, 37.0, 39.0, 31.0, 43.0, 37.0, 55.0, 46.0, 43.0, 33.0, 46.0, 27.0, 31.0, 24.0, 26.0, 27.0, 27.0, 17.0, 11.0, 15.0, 14.0, 7.0, 7.0, 5.0, 5.0, 5.0, 2.0, 0.0, 3.0, 4.0, 1.0, 1.0, 2.0], "bins": [-40.21875, -39.092529296875, -37.96630859375, -36.840087890625, -35.7138671875, -34.587646484375, -33.46142578125, -32.335205078125, -31.208984375, -30.082763671875, -28.95654296875, -27.830322265625, -26.7041015625, -25.577880859375, -24.45166015625, -23.325439453125, -22.19921875, -21.072998046875, -19.94677734375, -18.820556640625, -17.6943359375, -16.568115234375, -15.44189453125, -14.315673828125, -13.189453125, -12.063232421875, -10.93701171875, -9.810791015625, -8.6845703125, -7.558349609375, -6.43212890625, -5.305908203125, -4.1796875, -3.053466796875, -1.92724609375, -0.801025390625, 0.3251953125, 1.451416015625, 2.57763671875, 3.703857421875, 4.830078125, 5.956298828125, 7.08251953125, 8.208740234375, 9.3349609375, 10.461181640625, 11.58740234375, 12.713623046875, 13.83984375, 14.966064453125, 16.09228515625, 17.218505859375, 18.3447265625, 19.470947265625, 20.59716796875, 21.723388671875, 22.849609375, 23.975830078125, 25.10205078125, 26.228271484375, 27.3544921875, 28.480712890625, 29.60693359375, 30.733154296875, 31.859375]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 3.0, 2.0, 5.0, 9.0, 10.0, 8.0, 12.0, 19.0, 39.0, 53.0, 67.0, 76.0, 117.0, 132.0, 182.0, 279.0, 402.0, 519.0, 786.0, 1100.0, 1701.0, 2450.0, 3908.0, 6707.0, 12651.0, 29148.0, 89096.0, 392241.0, 363800.0, 84681.0, 27832.0, 12348.0, 6614.0, 3751.0, 2454.0, 1587.0, 1146.0, 736.0, 500.0, 380.0, 238.0, 223.0, 154.0, 108.0, 77.0, 59.0, 49.0, 31.0, 27.0, 18.0, 4.0, 8.0, 8.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0], "bins": [-25.46875, -24.619140625, -23.76953125, -22.919921875, -22.0703125, -21.220703125, -20.37109375, -19.521484375, -18.671875, -17.822265625, -16.97265625, -16.123046875, -15.2734375, -14.423828125, -13.57421875, -12.724609375, -11.875, -11.025390625, -10.17578125, -9.326171875, -8.4765625, -7.626953125, -6.77734375, -5.927734375, -5.078125, -4.228515625, -3.37890625, -2.529296875, -1.6796875, -0.830078125, 0.01953125, 0.869140625, 1.71875, 2.568359375, 3.41796875, 4.267578125, 5.1171875, 5.966796875, 6.81640625, 7.666015625, 8.515625, 9.365234375, 10.21484375, 11.064453125, 11.9140625, 12.763671875, 13.61328125, 14.462890625, 15.3125, 16.162109375, 17.01171875, 17.861328125, 18.7109375, 19.560546875, 20.41015625, 21.259765625, 22.109375, 22.958984375, 23.80859375, 24.658203125, 25.5078125, 26.357421875, 27.20703125, 28.056640625, 28.90625]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 5.0, 7.0, 5.0, 3.0, 6.0, 13.0, 16.0, 12.0, 22.0, 11.0, 22.0, 20.0, 27.0, 22.0, 32.0, 31.0, 44.0, 43.0, 37.0, 61.0, 158.0, 1790.0, 165.0, 75.0, 59.0, 39.0, 30.0, 32.0, 26.0, 25.0, 35.0, 32.0, 25.0, 20.0, 17.0, 12.0, 10.0, 16.0, 10.0, 8.0, 13.0, 8.0, 4.0, 1.0, 0.0, 2.0, 3.0, 2.0], "bins": [-132.875, -129.33203125, -125.7890625, -122.24609375, -118.703125, -115.16015625, -111.6171875, -108.07421875, -104.53125, -100.98828125, -97.4453125, -93.90234375, -90.359375, -86.81640625, -83.2734375, -79.73046875, -76.1875, -72.64453125, -69.1015625, -65.55859375, -62.015625, -58.47265625, -54.9296875, -51.38671875, -47.84375, -44.30078125, -40.7578125, -37.21484375, -33.671875, -30.12890625, -26.5859375, -23.04296875, -19.5, -15.95703125, -12.4140625, -8.87109375, -5.328125, -1.78515625, 1.7578125, 5.30078125, 8.84375, 12.38671875, 15.9296875, 19.47265625, 23.015625, 26.55859375, 30.1015625, 33.64453125, 37.1875, 40.73046875, 44.2734375, 47.81640625, 51.359375, 54.90234375, 58.4453125, 61.98828125, 65.53125, 69.07421875, 72.6171875, 76.16015625, 79.703125, 83.24609375, 86.7890625, 90.33203125, 93.875]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 8.0, 9.0, 10.0, 15.0, 19.0, 23.0, 23.0, 29.0, 51.0, 66.0, 78.0, 97.0, 117.0, 164.0, 186.0, 315.0, 1070.0, 10068.0, 3056477.0, 72470.0, 2884.0, 583.0, 236.0, 144.0, 122.0, 113.0, 74.0, 50.0, 31.0, 24.0, 41.0, 25.0, 27.0, 16.0, 11.0, 5.0, 8.0, 5.0, 4.0, 3.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-262.25, -253.4375, -244.625, -235.8125, -227.0, -218.1875, -209.375, -200.5625, -191.75, -182.9375, -174.125, -165.3125, -156.5, -147.6875, -138.875, -130.0625, -121.25, -112.4375, -103.625, -94.8125, -86.0, -77.1875, -68.375, -59.5625, -50.75, -41.9375, -33.125, -24.3125, -15.5, -6.6875, 2.125, 10.9375, 19.75, 28.5625, 37.375, 46.1875, 55.0, 63.8125, 72.625, 81.4375, 90.25, 99.0625, 107.875, 116.6875, 125.5, 134.3125, 143.125, 151.9375, 160.75, 169.5625, 178.375, 187.1875, 196.0, 204.8125, 213.625, 222.4375, 231.25, 240.0625, 248.875, 257.6875, 266.5, 275.3125, 284.125, 292.9375, 301.75]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 7.0, 258.0, 727.0, 29.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-132.9928436279297, -91.32991027832031, -49.666969299316406, -8.0040283203125, 33.658905029296875, 75.32183837890625, 116.98478698730469, 158.64772033691406, 200.31065368652344, 241.9735870361328, 283.63653564453125, 325.2994689941406, 366.96240234375, 408.6253356933594, 450.28826904296875, 491.95123291015625, 533.6141357421875, 575.277099609375, 616.9400024414062, 658.6029663085938, 700.265869140625, 741.9288330078125, 783.591796875, 825.2546997070312, 866.9176635742188, 908.5806274414062, 950.2435302734375, 991.906494140625, 1033.5694580078125, 1075.2322998046875, 1116.895263671875, 1158.5582275390625, 1200.2210693359375, 1241.884033203125, 1283.5469970703125, 1325.2098388671875, 1366.872802734375, 1408.5357666015625, 1450.19873046875, 1491.861572265625, 1533.5245361328125, 1575.1875, 1616.8504638671875, 1658.5133056640625, 1700.17626953125, 1741.8392333984375, 1783.502197265625, 1825.1650390625, 1866.828125, 1908.4910888671875, 1950.154052734375, 1991.81689453125, 2033.4798583984375, 2075.142822265625, 2116.8056640625, 2158.46875, 2200.131591796875, 2241.79443359375, 2283.45751953125, 2325.120361328125, 2366.783447265625, 2408.4462890625, 2450.109130859375, 2491.772216796875, 2533.43505859375]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 2.0, 6.0, 16.0, 10.0, 13.0, 17.0, 22.0, 18.0, 25.0, 18.0, 28.0, 33.0, 47.0, 36.0, 33.0, 33.0, 41.0, 32.0, 45.0, 47.0, 42.0, 45.0, 38.0, 43.0, 50.0, 39.0, 28.0, 28.0, 34.0, 14.0, 29.0, 17.0, 10.0, 11.0, 5.0, 8.0, 9.0, 9.0, 3.0, 3.0, 0.0, 5.0, 6.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-322.0838623046875, -312.42578125, -302.7677001953125, -293.1096496582031, -283.4515686035156, -273.7934875488281, -264.13543701171875, -254.47735595703125, -244.81927490234375, -235.16119384765625, -225.5031280517578, -215.84506225585938, -206.18698120117188, -196.52890014648438, -186.87083435058594, -177.2127685546875, -167.5546875, -157.8966064453125, -148.23854064941406, -138.58047485351562, -128.92239379882812, -119.26432037353516, -109.60624694824219, -99.94817352294922, -90.29010009765625, -80.63202667236328, -70.97395324707031, -61.315879821777344, -51.657806396484375, -41.999732971191406, -32.34165954589844, -22.68358612060547, -13.0255126953125, -3.3674392700195312, 6.2906341552734375, 15.948707580566406, 25.606781005859375, 35.264854431152344, 44.92292785644531, 54.58100128173828, 64.23907470703125, 73.89714813232422, 83.55522155761719, 93.21329498291016, 102.87136840820312, 112.5294418334961, 122.18751525878906, 131.8455810546875, 141.503662109375, 151.1617431640625, 160.81980895996094, 170.47787475585938, 180.13595581054688, 189.79403686523438, 199.4521026611328, 209.11016845703125, 218.76824951171875, 228.42633056640625, 238.0843963623047, 247.74246215820312, 257.4005432128906, 267.0586242675781, 276.7166748046875, 286.374755859375, 296.0328369140625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 7.0, 4.0, 4.0, 4.0, 7.0, 3.0, 10.0, 13.0, 9.0, 8.0, 14.0, 14.0, 18.0, 6.0, 22.0, 24.0, 29.0, 26.0, 35.0, 26.0, 37.0, 34.0, 43.0, 41.0, 27.0, 34.0, 39.0, 45.0, 45.0, 45.0, 35.0, 30.0, 36.0, 28.0, 32.0, 25.0, 25.0, 21.0, 20.0, 14.0, 16.0, 10.0, 14.0, 4.0, 11.0, 5.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 5.0, 0.0, 2.0], "bins": [-38.25, -37.146484375, -36.04296875, -34.939453125, -33.8359375, -32.732421875, -31.62890625, -30.525390625, -29.421875, -28.318359375, -27.21484375, -26.111328125, -25.0078125, -23.904296875, -22.80078125, -21.697265625, -20.59375, -19.490234375, -18.38671875, -17.283203125, -16.1796875, -15.076171875, -13.97265625, -12.869140625, -11.765625, -10.662109375, -9.55859375, -8.455078125, -7.3515625, -6.248046875, -5.14453125, -4.041015625, -2.9375, -1.833984375, -0.73046875, 0.373046875, 1.4765625, 2.580078125, 3.68359375, 4.787109375, 5.890625, 6.994140625, 8.09765625, 9.201171875, 10.3046875, 11.408203125, 12.51171875, 13.615234375, 14.71875, 15.822265625, 16.92578125, 18.029296875, 19.1328125, 20.236328125, 21.33984375, 22.443359375, 23.546875, 24.650390625, 25.75390625, 26.857421875, 27.9609375, 29.064453125, 30.16796875, 31.271484375, 32.375]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 7.0, 4.0, 8.0, 3.0, 9.0, 6.0, 17.0, 13.0, 24.0, 19.0, 21.0, 40.0, 45.0, 76.0, 116.0, 150.0, 269.0, 428.0, 744.0, 1308.0, 2207.0, 4389.0, 10823.0, 198203.0, 3775138.0, 180526.0, 10414.0, 4339.0, 2140.0, 1127.0, 668.0, 347.0, 212.0, 129.0, 81.0, 58.0, 45.0, 25.0, 29.0, 14.0, 14.0, 12.0, 10.0, 13.0, 4.0, 4.0, 4.0, 0.0, 0.0, 3.0, 3.0, 1.0, 4.0], "bins": [-165.875, -161.19921875, -156.5234375, -151.84765625, -147.171875, -142.49609375, -137.8203125, -133.14453125, -128.46875, -123.79296875, -119.1171875, -114.44140625, -109.765625, -105.08984375, -100.4140625, -95.73828125, -91.0625, -86.38671875, -81.7109375, -77.03515625, -72.359375, -67.68359375, -63.0078125, -58.33203125, -53.65625, -48.98046875, -44.3046875, -39.62890625, -34.953125, -30.27734375, -25.6015625, -20.92578125, -16.25, -11.57421875, -6.8984375, -2.22265625, 2.453125, 7.12890625, 11.8046875, 16.48046875, 21.15625, 25.83203125, 30.5078125, 35.18359375, 39.859375, 44.53515625, 49.2109375, 53.88671875, 58.5625, 63.23828125, 67.9140625, 72.58984375, 77.265625, 81.94140625, 86.6171875, 91.29296875, 95.96875, 100.64453125, 105.3203125, 109.99609375, 114.671875, 119.34765625, 124.0234375, 128.69921875, 133.375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 10.0, 9.0, 6.0, 18.0, 19.0, 24.0, 50.0, 72.0, 264.0, 832.0, 1595.0, 737.0, 249.0, 75.0, 43.0, 19.0, 14.0, 10.0, 16.0, 6.0, 6.0, 0.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-158.625, -154.1494140625, -149.673828125, -145.1982421875, -140.72265625, -136.2470703125, -131.771484375, -127.2958984375, -122.8203125, -118.3447265625, -113.869140625, -109.3935546875, -104.91796875, -100.4423828125, -95.966796875, -91.4912109375, -87.015625, -82.5400390625, -78.064453125, -73.5888671875, -69.11328125, -64.6376953125, -60.162109375, -55.6865234375, -51.2109375, -46.7353515625, -42.259765625, -37.7841796875, -33.30859375, -28.8330078125, -24.357421875, -19.8818359375, -15.40625, -10.9306640625, -6.455078125, -1.9794921875, 2.49609375, 6.9716796875, 11.447265625, 15.9228515625, 20.3984375, 24.8740234375, 29.349609375, 33.8251953125, 38.30078125, 42.7763671875, 47.251953125, 51.7275390625, 56.203125, 60.6787109375, 65.154296875, 69.6298828125, 74.10546875, 78.5810546875, 83.056640625, 87.5322265625, 92.0078125, 96.4833984375, 100.958984375, 105.4345703125, 109.91015625, 114.3857421875, 118.861328125, 123.3369140625, 127.8125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 6.0, 8.0, 10.0, 19.0, 36.0, 51.0, 65.0, 130.0, 183.0, 283.0, 550.0, 1107.0, 2953.0, 9966.0, 49287.0, 3708275.0, 388220.0, 23374.0, 5977.0, 1943.0, 808.0, 412.0, 226.0, 149.0, 94.0, 65.0, 43.0, 19.0, 10.0, 8.0, 3.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-209.0, -201.48046875, -193.9609375, -186.44140625, -178.921875, -171.40234375, -163.8828125, -156.36328125, -148.84375, -141.32421875, -133.8046875, -126.28515625, -118.765625, -111.24609375, -103.7265625, -96.20703125, -88.6875, -81.16796875, -73.6484375, -66.12890625, -58.609375, -51.08984375, -43.5703125, -36.05078125, -28.53125, -21.01171875, -13.4921875, -5.97265625, 1.546875, 9.06640625, 16.5859375, 24.10546875, 31.625, 39.14453125, 46.6640625, 54.18359375, 61.703125, 69.22265625, 76.7421875, 84.26171875, 91.78125, 99.30078125, 106.8203125, 114.33984375, 121.859375, 129.37890625, 136.8984375, 144.41796875, 151.9375, 159.45703125, 166.9765625, 174.49609375, 182.015625, 189.53515625, 197.0546875, 204.57421875, 212.09375, 219.61328125, 227.1328125, 234.65234375, 242.171875, 249.69140625, 257.2109375, 264.73046875, 272.25]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 2.0, 2.0, 8.0, 18.0, 20.0, 29.0, 40.0, 52.0, 93.0, 187.0, 216.0, 125.0, 71.0, 48.0, 32.0, 34.0, 10.0, 8.0, 4.0, 5.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-285.93731689453125, -270.15240478515625, -254.36749267578125, -238.58258056640625, -222.79766845703125, -207.01275634765625, -191.2278594970703, -175.4429473876953, -159.6580352783203, -143.8731231689453, -128.0882110595703, -112.30330657958984, -96.51839447021484, -80.73348236083984, -64.94857788085938, -49.163665771484375, -33.378753662109375, -17.593843460083008, -1.8089332580566406, 13.975975036621094, 29.760887145996094, 45.545799255371094, 61.33070373535156, 77.11561584472656, 92.90052795410156, 108.68544006347656, 124.47035217285156, 140.2552490234375, 156.0401611328125, 171.8250732421875, 187.6099853515625, 203.3948974609375, 219.1798095703125, 234.9647216796875, 250.7496337890625, 266.5345458984375, 282.3194580078125, 298.1043701171875, 313.8892822265625, 329.6741943359375, 345.4591064453125, 361.2440185546875, 377.0289306640625, 392.8138427734375, 408.5987548828125, 424.3836669921875, 440.1685791015625, 455.9534912109375, 471.7383728027344, 487.5232849121094, 503.3081970214844, 519.0930786132812, 534.8779907226562, 550.6629028320312, 566.4478149414062, 582.2327270507812, 598.0176391601562, 613.8025512695312, 629.5874633789062, 645.3723754882812, 661.1572875976562, 676.9421997070312, 692.7271118164062, 708.5120239257812, 724.2969360351562]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 2.0, 8.0, 2.0, 1.0, 3.0, 8.0, 5.0, 10.0, 9.0, 15.0, 15.0, 6.0, 14.0, 13.0, 23.0, 17.0, 25.0, 25.0, 21.0, 34.0, 30.0, 38.0, 23.0, 38.0, 34.0, 36.0, 43.0, 37.0, 44.0, 25.0, 38.0, 33.0, 32.0, 29.0, 23.0, 34.0, 23.0, 26.0, 15.0, 18.0, 21.0, 13.0, 13.0, 9.0, 18.0, 11.0, 13.0, 7.0, 5.0, 3.0, 9.0, 4.0, 3.0, 1.0, 1.0, 8.0, 1.0, 1.0, 1.0, 2.0], "bins": [-193.8929443359375, -187.69061279296875, -181.48826599121094, -175.28591918945312, -169.08358764648438, -162.88125610351562, -156.6789093017578, -150.4765625, -144.27423095703125, -138.0718994140625, -131.8695526123047, -125.6672134399414, -119.46487426757812, -113.26253509521484, -107.06019592285156, -100.85785675048828, -94.655517578125, -88.45317840576172, -82.25083923339844, -76.04850006103516, -69.84616088867188, -63.643821716308594, -57.44148254394531, -51.23914337158203, -45.03680419921875, -38.83446502685547, -32.63212585449219, -26.429786682128906, -20.227447509765625, -14.025108337402344, -7.8227691650390625, -1.6204299926757812, 4.5819091796875, 10.784248352050781, 16.986587524414062, 23.188926696777344, 29.391265869140625, 35.593605041503906, 41.79594421386719, 47.99828338623047, 54.20062255859375, 60.40296173095703, 66.60530090332031, 72.8076400756836, 79.00997924804688, 85.21231842041016, 91.41465759277344, 97.61699676513672, 103.8193359375, 110.02167510986328, 116.22401428222656, 122.42635345458984, 128.62869262695312, 134.83102416992188, 141.0333709716797, 147.2357177734375, 153.43804931640625, 159.640380859375, 165.8427276611328, 172.04507446289062, 178.24740600585938, 184.44973754882812, 190.65208435058594, 196.85443115234375, 203.0567626953125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 5.0, 5.0, 10.0, 4.0, 10.0, 11.0, 7.0, 20.0, 12.0, 18.0, 13.0, 18.0, 32.0, 25.0, 26.0, 30.0, 31.0, 23.0, 39.0, 34.0, 37.0, 35.0, 44.0, 46.0, 37.0, 45.0, 35.0, 25.0, 34.0, 36.0, 35.0, 25.0, 25.0, 27.0, 26.0, 17.0, 10.0, 22.0, 19.0, 14.0, 4.0, 9.0, 5.0, 8.0, 2.0, 0.0, 5.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-37.96875, -36.81591796875, -35.6630859375, -34.51025390625, -33.357421875, -32.20458984375, -31.0517578125, -29.89892578125, -28.74609375, -27.59326171875, -26.4404296875, -25.28759765625, -24.134765625, -22.98193359375, -21.8291015625, -20.67626953125, -19.5234375, -18.37060546875, -17.2177734375, -16.06494140625, -14.912109375, -13.75927734375, -12.6064453125, -11.45361328125, -10.30078125, -9.14794921875, -7.9951171875, -6.84228515625, -5.689453125, -4.53662109375, -3.3837890625, -2.23095703125, -1.078125, 0.07470703125, 1.2275390625, 2.38037109375, 3.533203125, 4.68603515625, 5.8388671875, 6.99169921875, 8.14453125, 9.29736328125, 10.4501953125, 11.60302734375, 12.755859375, 13.90869140625, 15.0615234375, 16.21435546875, 17.3671875, 18.52001953125, 19.6728515625, 20.82568359375, 21.978515625, 23.13134765625, 24.2841796875, 25.43701171875, 26.58984375, 27.74267578125, 28.8955078125, 30.04833984375, 31.201171875, 32.35400390625, 33.5068359375, 34.65966796875, 35.8125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 4.0, 1.0, 6.0, 8.0, 13.0, 27.0, 25.0, 45.0, 44.0, 84.0, 110.0, 169.0, 284.0, 385.0, 610.0, 847.0, 1297.0, 1921.0, 2880.0, 4366.0, 6684.0, 10336.0, 15740.0, 24483.0, 40249.0, 66002.0, 116280.0, 221712.0, 230145.0, 120664.0, 68375.0, 41729.0, 25832.0, 16384.0, 10569.0, 6903.0, 4536.0, 2981.0, 1992.0, 1311.0, 832.0, 559.0, 374.0, 217.0, 192.0, 128.0, 74.0, 46.0, 40.0, 28.0, 12.0, 4.0, 10.0, 8.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-1.720703125, -1.66143798828125, -1.6021728515625, -1.54290771484375, -1.483642578125, -1.42437744140625, -1.3651123046875, -1.30584716796875, -1.24658203125, -1.18731689453125, -1.1280517578125, -1.06878662109375, -1.009521484375, -0.95025634765625, -0.8909912109375, -0.83172607421875, -0.7724609375, -0.71319580078125, -0.6539306640625, -0.59466552734375, -0.535400390625, -0.47613525390625, -0.4168701171875, -0.35760498046875, -0.29833984375, -0.23907470703125, -0.1798095703125, -0.12054443359375, -0.061279296875, -0.00201416015625, 0.0572509765625, 0.11651611328125, 0.17578125, 0.23504638671875, 0.2943115234375, 0.35357666015625, 0.412841796875, 0.47210693359375, 0.5313720703125, 0.59063720703125, 0.64990234375, 0.70916748046875, 0.7684326171875, 0.82769775390625, 0.886962890625, 0.94622802734375, 1.0054931640625, 1.06475830078125, 1.1240234375, 1.18328857421875, 1.2425537109375, 1.30181884765625, 1.361083984375, 1.42034912109375, 1.4796142578125, 1.53887939453125, 1.59814453125, 1.65740966796875, 1.7166748046875, 1.77593994140625, 1.835205078125, 1.89447021484375, 1.9537353515625, 2.01300048828125, 2.072265625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 7.0, 2.0, 2.0, 3.0, 2.0, 5.0, 13.0, 10.0, 6.0, 13.0, 7.0, 13.0, 17.0, 16.0, 21.0, 24.0, 29.0, 31.0, 31.0, 18.0, 38.0, 34.0, 34.0, 37.0, 46.0, 35.0, 35.0, 1062.0, 35.0, 33.0, 31.0, 30.0, 30.0, 27.0, 25.0, 29.0, 24.0, 18.0, 29.0, 18.0, 19.0, 13.0, 18.0, 13.0, 12.0, 12.0, 8.0, 3.0, 3.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-22.875, -22.16552734375, -21.4560546875, -20.74658203125, -20.037109375, -19.32763671875, -18.6181640625, -17.90869140625, -17.19921875, -16.48974609375, -15.7802734375, -15.07080078125, -14.361328125, -13.65185546875, -12.9423828125, -12.23291015625, -11.5234375, -10.81396484375, -10.1044921875, -9.39501953125, -8.685546875, -7.97607421875, -7.2666015625, -6.55712890625, -5.84765625, -5.13818359375, -4.4287109375, -3.71923828125, -3.009765625, -2.30029296875, -1.5908203125, -0.88134765625, -0.171875, 0.53759765625, 1.2470703125, 1.95654296875, 2.666015625, 3.37548828125, 4.0849609375, 4.79443359375, 5.50390625, 6.21337890625, 6.9228515625, 7.63232421875, 8.341796875, 9.05126953125, 9.7607421875, 10.47021484375, 11.1796875, 11.88916015625, 12.5986328125, 13.30810546875, 14.017578125, 14.72705078125, 15.4365234375, 16.14599609375, 16.85546875, 17.56494140625, 18.2744140625, 18.98388671875, 19.693359375, 20.40283203125, 21.1123046875, 21.82177734375, 22.53125]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 1.0, 5.0, 4.0, 9.0, 10.0, 12.0, 15.0, 28.0, 47.0, 61.0, 88.0, 114.0, 187.0, 235.0, 382.0, 502.0, 850.0, 1266.0, 1949.0, 2876.0, 4501.0, 7308.0, 11339.0, 18113.0, 29836.0, 50003.0, 89841.0, 192041.0, 1343434.0, 152114.0, 76408.0, 43436.0, 26135.0, 15890.0, 9994.0, 6312.0, 4045.0, 2659.0, 1689.0, 1146.0, 762.0, 489.0, 314.0, 228.0, 153.0, 106.0, 80.0, 37.0, 26.0, 25.0, 15.0, 6.0, 6.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.9599609375, -1.8993988037109375, -1.838836669921875, -1.7782745361328125, -1.71771240234375, -1.6571502685546875, -1.596588134765625, -1.5360260009765625, -1.4754638671875, -1.4149017333984375, -1.354339599609375, -1.2937774658203125, -1.23321533203125, -1.1726531982421875, -1.112091064453125, -1.0515289306640625, -0.990966796875, -0.9304046630859375, -0.869842529296875, -0.8092803955078125, -0.74871826171875, -0.6881561279296875, -0.627593994140625, -0.5670318603515625, -0.5064697265625, -0.4459075927734375, -0.385345458984375, -0.3247833251953125, -0.26422119140625, -0.2036590576171875, -0.143096923828125, -0.0825347900390625, -0.02197265625, 0.0385894775390625, 0.099151611328125, 0.1597137451171875, 0.22027587890625, 0.2808380126953125, 0.341400146484375, 0.4019622802734375, 0.4625244140625, 0.5230865478515625, 0.583648681640625, 0.6442108154296875, 0.70477294921875, 0.7653350830078125, 0.825897216796875, 0.8864593505859375, 0.947021484375, 1.0075836181640625, 1.068145751953125, 1.1287078857421875, 1.18927001953125, 1.2498321533203125, 1.310394287109375, 1.3709564208984375, 1.4315185546875, 1.4920806884765625, 1.552642822265625, 1.6132049560546875, 1.67376708984375, 1.7343292236328125, 1.794891357421875, 1.8554534912109375, 1.916015625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 4.0, 4.0, 7.0, 3.0, 12.0, 7.0, 11.0, 13.0, 13.0, 21.0, 15.0, 27.0, 33.0, 43.0, 58.0, 54.0, 53.0, 72.0, 54.0, 60.0, 52.0, 71.0, 44.0, 49.0, 32.0, 30.0, 21.0, 28.0, 25.0, 14.0, 18.0, 14.0, 9.0, 9.0, 8.0, 2.0, 1.0, 6.0, 2.0, 2.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0215606689453125, -0.02093338966369629, -0.020306110382080078, -0.019678831100463867, -0.019051551818847656, -0.018424272537231445, -0.017796993255615234, -0.017169713973999023, -0.016542434692382812, -0.0159151554107666, -0.01528787612915039, -0.01466059684753418, -0.014033317565917969, -0.013406038284301758, -0.012778759002685547, -0.012151479721069336, -0.011524200439453125, -0.010896921157836914, -0.010269641876220703, -0.009642362594604492, -0.009015083312988281, -0.00838780403137207, -0.007760524749755859, -0.0071332454681396484, -0.0065059661865234375, -0.0058786869049072266, -0.005251407623291016, -0.004624128341674805, -0.003996849060058594, -0.003369569778442383, -0.002742290496826172, -0.002115011215209961, -0.00148773193359375, -0.0008604526519775391, -0.00023317337036132812, 0.0003941059112548828, 0.0010213851928710938, 0.0016486644744873047, 0.0022759437561035156, 0.0029032230377197266, 0.0035305023193359375, 0.0041577816009521484, 0.004785060882568359, 0.00541234016418457, 0.006039619445800781, 0.006666898727416992, 0.007294178009033203, 0.007921457290649414, 0.008548736572265625, 0.009176015853881836, 0.009803295135498047, 0.010430574417114258, 0.011057853698730469, 0.01168513298034668, 0.01231241226196289, 0.012939691543579102, 0.013566970825195312, 0.014194250106811523, 0.014821529388427734, 0.015448808670043945, 0.016076087951660156, 0.016703367233276367, 0.017330646514892578, 0.01795792579650879, 0.018585205078125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 1.0, 3.0, 6.0, 3.0, 4.0, 7.0, 8.0, 13.0, 19.0, 18.0, 28.0, 30.0, 45.0, 49.0, 55.0, 99.0, 124.0, 275.0, 886.0, 6600.0, 95201.0, 891067.0, 48541.0, 4014.0, 709.0, 240.0, 146.0, 81.0, 58.0, 46.0, 32.0, 30.0, 25.0, 23.0, 16.0, 9.0, 15.0, 6.0, 9.0, 3.0, 6.0, 4.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.288330078125, -0.2788429260253906, -0.26935577392578125, -0.2598686218261719, -0.2503814697265625, -0.24089431762695312, -0.23140716552734375, -0.22192001342773438, -0.212432861328125, -0.20294570922851562, -0.19345855712890625, -0.18397140502929688, -0.1744842529296875, -0.16499710083007812, -0.15550994873046875, -0.14602279663085938, -0.13653564453125, -0.12704849243164062, -0.11756134033203125, -0.10807418823242188, -0.0985870361328125, -0.08909988403320312, -0.07961273193359375, -0.07012557983398438, -0.060638427734375, -0.051151275634765625, -0.04166412353515625, -0.032176971435546875, -0.0226898193359375, -0.013202667236328125, -0.00371551513671875, 0.005771636962890625, 0.0152587890625, 0.024745941162109375, 0.03423309326171875, 0.043720245361328125, 0.0532073974609375, 0.06269454956054688, 0.07218170166015625, 0.08166885375976562, 0.091156005859375, 0.10064315795898438, 0.11013031005859375, 0.11961746215820312, 0.1291046142578125, 0.13859176635742188, 0.14807891845703125, 0.15756607055664062, 0.16705322265625, 0.17654037475585938, 0.18602752685546875, 0.19551467895507812, 0.2050018310546875, 0.21448898315429688, 0.22397613525390625, 0.23346328735351562, 0.242950439453125, 0.2524375915527344, 0.26192474365234375, 0.2714118957519531, 0.2808990478515625, 0.2903861999511719, 0.29987335205078125, 0.3093605041503906, 0.31884765625]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 14.0, 51.0, 463.0, 411.0, 64.0, 9.0, 4.0], "bins": [-0.220830500125885, -0.2171487957239151, -0.21346710622310638, -0.20978540182113647, -0.20610369741916656, -0.20242200791835785, -0.19874030351638794, -0.19505859911441803, -0.19137690961360931, -0.1876952052116394, -0.1840135157108307, -0.18033181130886078, -0.17665010690689087, -0.17296841740608215, -0.16928671300411224, -0.16560500860214233, -0.16192330420017242, -0.15824159979820251, -0.1545599102973938, -0.1508782058954239, -0.14719650149345398, -0.14351481199264526, -0.13983310759067535, -0.13615140318870544, -0.13246971368789673, -0.12878800928592682, -0.1251063197851181, -0.1214246153831482, -0.11774291843175888, -0.11406121402978897, -0.11037951707839966, -0.10669781267642975, -0.10301612317562103, -0.09933442622423172, -0.09565272182226181, -0.0919710248708725, -0.08828932791948318, -0.08460762351751328, -0.08092592656612396, -0.07724422216415405, -0.07356252521276474, -0.06988082826137543, -0.06619912385940552, -0.0625174269080162, -0.058835726231336594, -0.05515402555465698, -0.05147232860326767, -0.04779062792658806, -0.04410892724990845, -0.040427226573228836, -0.03674552962183952, -0.03306382894515991, -0.0293821282684803, -0.02570042945444584, -0.022018730640411377, -0.018337029963731766, -0.014655332081019878, -0.010973632335662842, -0.0072919330559670925, -0.0036102337762713432, 7.146596908569336e-05, 0.00375316571444273, 0.007434864528477192, 0.011116565205156803, 0.014798264019191265]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 4.0, 7.0, 10.0, 8.0, 8.0, 13.0, 17.0, 24.0, 24.0, 33.0, 36.0, 26.0, 42.0, 36.0, 49.0, 47.0, 39.0, 56.0, 49.0, 51.0, 55.0, 43.0, 44.0, 37.0, 33.0, 34.0, 39.0, 24.0, 19.0, 23.0, 25.0, 14.0, 10.0, 8.0, 6.0, 4.0, 7.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01298147439956665, -0.012542663142085075, -0.0121038518846035, -0.011665040627121925, -0.01122622936964035, -0.010787418112158775, -0.0103486068546772, -0.009909795597195625, -0.00947098433971405, -0.009032173082232475, -0.0085933618247509, -0.008154550567269325, -0.00771573930978775, -0.007276928052306175, -0.0068381167948246, -0.006399305537343025, -0.00596049427986145, -0.005521683022379875, -0.0050828717648983, -0.004644060507416725, -0.00420524924993515, -0.003766437992453575, -0.003327626734972, -0.002888815477490425, -0.00245000422000885, -0.002011192962527275, -0.0015723817050457, -0.001133570447564125, -0.00069475919008255, -0.00025594793260097504, 0.00018286332488059998, 0.000621674582362175, 0.00106048583984375, 0.001499297097325325, 0.0019381083548069, 0.002376919612288475, 0.00281573086977005, 0.003254542127251625, 0.0036933533847332, 0.004132164642214775, 0.00457097589969635, 0.005009787157177925, 0.0054485984146595, 0.005887409672141075, 0.00632622092962265, 0.006765032187104225, 0.0072038434445858, 0.007642654702067375, 0.00808146595954895, 0.008520277217030525, 0.0089590884745121, 0.009397899731993675, 0.00983671098947525, 0.010275522246956825, 0.0107143335044384, 0.011153144761919975, 0.01159195601940155, 0.012030767276883125, 0.0124695785343647, 0.012908389791846275, 0.01334720104932785, 0.013786012306809425, 0.014224823564291, 0.014663634821772575, 0.01510244607925415]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 5.0, 5.0, 10.0, 4.0, 10.0, 11.0, 7.0, 20.0, 12.0, 18.0, 13.0, 18.0, 32.0, 25.0, 26.0, 30.0, 31.0, 23.0, 39.0, 34.0, 37.0, 35.0, 44.0, 46.0, 37.0, 45.0, 35.0, 25.0, 34.0, 36.0, 35.0, 25.0, 25.0, 27.0, 26.0, 17.0, 10.0, 22.0, 19.0, 14.0, 4.0, 9.0, 5.0, 8.0, 2.0, 0.0, 5.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-37.96875, -36.81591796875, -35.6630859375, -34.51025390625, -33.357421875, -32.20458984375, -31.0517578125, -29.89892578125, -28.74609375, -27.59326171875, -26.4404296875, -25.28759765625, -24.134765625, -22.98193359375, -21.8291015625, -20.67626953125, -19.5234375, -18.37060546875, -17.2177734375, -16.06494140625, -14.912109375, -13.75927734375, -12.6064453125, -11.45361328125, -10.30078125, -9.14794921875, -7.9951171875, -6.84228515625, -5.689453125, -4.53662109375, -3.3837890625, -2.23095703125, -1.078125, 0.07470703125, 1.2275390625, 2.38037109375, 3.533203125, 4.68603515625, 5.8388671875, 6.99169921875, 8.14453125, 9.29736328125, 10.4501953125, 11.60302734375, 12.755859375, 13.90869140625, 15.0615234375, 16.21435546875, 17.3671875, 18.52001953125, 19.6728515625, 20.82568359375, 21.978515625, 23.13134765625, 24.2841796875, 25.43701171875, 26.58984375, 27.74267578125, 28.8955078125, 30.04833984375, 31.201171875, 32.35400390625, 33.5068359375, 34.65966796875, 35.8125]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 2.0, 6.0, 8.0, 15.0, 28.0, 20.0, 34.0, 42.0, 54.0, 87.0, 140.0, 131.0, 222.0, 269.0, 392.0, 534.0, 688.0, 1075.0, 1435.0, 2146.0, 3291.0, 5122.0, 9000.0, 19208.0, 64079.0, 387403.0, 437002.0, 69812.0, 20580.0, 9405.0, 5442.0, 3360.0, 2241.0, 1453.0, 1076.0, 776.0, 523.0, 369.0, 321.0, 184.0, 168.0, 117.0, 97.0, 55.0, 36.0, 29.0, 20.0, 21.0, 16.0, 15.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0], "bins": [-32.71875, -31.697265625, -30.67578125, -29.654296875, -28.6328125, -27.611328125, -26.58984375, -25.568359375, -24.546875, -23.525390625, -22.50390625, -21.482421875, -20.4609375, -19.439453125, -18.41796875, -17.396484375, -16.375, -15.353515625, -14.33203125, -13.310546875, -12.2890625, -11.267578125, -10.24609375, -9.224609375, -8.203125, -7.181640625, -6.16015625, -5.138671875, -4.1171875, -3.095703125, -2.07421875, -1.052734375, -0.03125, 0.990234375, 2.01171875, 3.033203125, 4.0546875, 5.076171875, 6.09765625, 7.119140625, 8.140625, 9.162109375, 10.18359375, 11.205078125, 12.2265625, 13.248046875, 14.26953125, 15.291015625, 16.3125, 17.333984375, 18.35546875, 19.376953125, 20.3984375, 21.419921875, 22.44140625, 23.462890625, 24.484375, 25.505859375, 26.52734375, 27.548828125, 28.5703125, 29.591796875, 30.61328125, 31.634765625, 32.65625]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 3.0, 6.0, 4.0, 8.0, 4.0, 6.0, 8.0, 10.0, 18.0, 23.0, 25.0, 31.0, 38.0, 32.0, 34.0, 49.0, 55.0, 52.0, 76.0, 73.0, 360.0, 1578.0, 103.0, 59.0, 37.0, 52.0, 47.0, 40.0, 42.0, 31.0, 20.0, 21.0, 25.0, 13.0, 16.0, 10.0, 10.0, 8.0, 6.0, 6.0, 7.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-153.875, -149.724609375, -145.57421875, -141.423828125, -137.2734375, -133.123046875, -128.97265625, -124.822265625, -120.671875, -116.521484375, -112.37109375, -108.220703125, -104.0703125, -99.919921875, -95.76953125, -91.619140625, -87.46875, -83.318359375, -79.16796875, -75.017578125, -70.8671875, -66.716796875, -62.56640625, -58.416015625, -54.265625, -50.115234375, -45.96484375, -41.814453125, -37.6640625, -33.513671875, -29.36328125, -25.212890625, -21.0625, -16.912109375, -12.76171875, -8.611328125, -4.4609375, -0.310546875, 3.83984375, 7.990234375, 12.140625, 16.291015625, 20.44140625, 24.591796875, 28.7421875, 32.892578125, 37.04296875, 41.193359375, 45.34375, 49.494140625, 53.64453125, 57.794921875, 61.9453125, 66.095703125, 70.24609375, 74.396484375, 78.546875, 82.697265625, 86.84765625, 90.998046875, 95.1484375, 99.298828125, 103.44921875, 107.599609375, 111.75]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 1.0, 4.0, 5.0, 9.0, 10.0, 16.0, 9.0, 18.0, 23.0, 35.0, 41.0, 63.0, 68.0, 127.0, 189.0, 420.0, 1523.0, 8626.0, 297282.0, 2822346.0, 11783.0, 1953.0, 481.0, 210.0, 136.0, 99.0, 69.0, 50.0, 26.0, 18.0, 14.0, 8.0, 13.0, 5.0, 11.0, 4.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-416.25, -404.03515625, -391.8203125, -379.60546875, -367.390625, -355.17578125, -342.9609375, -330.74609375, -318.53125, -306.31640625, -294.1015625, -281.88671875, -269.671875, -257.45703125, -245.2421875, -233.02734375, -220.8125, -208.59765625, -196.3828125, -184.16796875, -171.953125, -159.73828125, -147.5234375, -135.30859375, -123.09375, -110.87890625, -98.6640625, -86.44921875, -74.234375, -62.01953125, -49.8046875, -37.58984375, -25.375, -13.16015625, -0.9453125, 11.26953125, 23.484375, 35.69921875, 47.9140625, 60.12890625, 72.34375, 84.55859375, 96.7734375, 108.98828125, 121.203125, 133.41796875, 145.6328125, 157.84765625, 170.0625, 182.27734375, 194.4921875, 206.70703125, 218.921875, 231.13671875, 243.3515625, 255.56640625, 267.78125, 279.99609375, 292.2109375, 304.42578125, 316.640625, 328.85546875, 341.0703125, 353.28515625, 365.5]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 6.0, 7.0, 12.0, 20.0, 39.0, 58.0, 156.0, 185.0, 227.0, 140.0, 78.0, 40.0, 22.0, 18.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-126.31800842285156, -115.85316467285156, -105.38831329345703, -94.9234619140625, -84.4586181640625, -73.9937744140625, -63.52892303466797, -53.06407165527344, -42.59922790527344, -32.13438034057617, -21.669532775878906, -11.20468521118164, -0.739837646484375, 9.72500991821289, 20.189857482910156, 30.654708862304688, 41.11955261230469, 51.58440017700195, 62.04924774169922, 72.51409912109375, 82.97894287109375, 93.44378662109375, 103.90863800048828, 114.37348937988281, 124.83833312988281, 135.3031768798828, 145.76803588867188, 156.23287963867188, 166.69772338867188, 177.16256713867188, 187.62741088867188, 198.09226989746094, 208.55712890625, 219.02197265625, 229.48681640625, 239.95167541503906, 250.41651916503906, 260.8813781738281, 271.3462219238281, 281.8110656738281, 292.2759094238281, 302.7407531738281, 313.2055969238281, 323.6704406738281, 334.13531494140625, 344.60015869140625, 355.06500244140625, 365.52984619140625, 375.99468994140625, 386.45953369140625, 396.92437744140625, 407.38922119140625, 417.85406494140625, 428.3189392089844, 438.7837829589844, 449.2486267089844, 459.7134704589844, 470.1783142089844, 480.6431579589844, 491.1080017089844, 501.5728759765625, 512.0377197265625, 522.5025634765625, 532.9674072265625, 543.4322509765625]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 6.0, 2.0, 6.0, 8.0, 9.0, 10.0, 16.0, 8.0, 13.0, 21.0, 23.0, 18.0, 23.0, 29.0, 28.0, 31.0, 45.0, 49.0, 49.0, 45.0, 40.0, 40.0, 42.0, 38.0, 38.0, 50.0, 33.0, 29.0, 37.0, 33.0, 26.0, 29.0, 21.0, 24.0, 23.0, 13.0, 8.0, 7.0, 10.0, 10.0, 3.0, 4.0, 2.0, 4.0, 4.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-312.5811767578125, -300.86993408203125, -289.15869140625, -277.44744873046875, -265.7362060546875, -254.0249786376953, -242.31375122070312, -230.60250854492188, -218.89126586914062, -207.18002319335938, -195.46878051757812, -183.75755310058594, -172.0463104248047, -160.33506774902344, -148.62384033203125, -136.91259765625, -125.20135498046875, -113.4901123046875, -101.77887725830078, -90.06764221191406, -78.35639953613281, -66.64515686035156, -54.933921813964844, -43.222686767578125, -31.511444091796875, -19.80020523071289, -8.088966369628906, 3.622272491455078, 15.333511352539062, 27.044750213623047, 38.75598907470703, 50.46722412109375, 62.178466796875, 73.88970947265625, 85.60094451904297, 97.31217956542969, 109.02342224121094, 120.73466491699219, 132.44589233398438, 144.15713500976562, 155.86837768554688, 167.57962036132812, 179.29086303710938, 191.00209045410156, 202.7133331298828, 214.42457580566406, 226.13580322265625, 237.8470458984375, 249.55828857421875, 261.26953125, 272.98077392578125, 284.6920166015625, 296.40325927734375, 308.1144714355469, 319.8257141113281, 331.5369567871094, 343.2481994628906, 354.9594421386719, 366.6706848144531, 378.3819274902344, 390.0931396484375, 401.80438232421875, 413.515625, 425.22686767578125, 436.9381103515625]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 9.0, 2.0, 7.0, 6.0, 13.0, 9.0, 10.0, 8.0, 19.0, 13.0, 18.0, 16.0, 20.0, 38.0, 22.0, 23.0, 30.0, 31.0, 46.0, 40.0, 38.0, 37.0, 39.0, 33.0, 51.0, 38.0, 38.0, 32.0, 35.0, 40.0, 30.0, 30.0, 28.0, 27.0, 21.0, 18.0, 15.0, 9.0, 17.0, 12.0, 13.0, 5.0, 4.0, 10.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-41.15625, -39.90966796875, -38.6630859375, -37.41650390625, -36.169921875, -34.92333984375, -33.6767578125, -32.43017578125, -31.18359375, -29.93701171875, -28.6904296875, -27.44384765625, -26.197265625, -24.95068359375, -23.7041015625, -22.45751953125, -21.2109375, -19.96435546875, -18.7177734375, -17.47119140625, -16.224609375, -14.97802734375, -13.7314453125, -12.48486328125, -11.23828125, -9.99169921875, -8.7451171875, -7.49853515625, -6.251953125, -5.00537109375, -3.7587890625, -2.51220703125, -1.265625, -0.01904296875, 1.2275390625, 2.47412109375, 3.720703125, 4.96728515625, 6.2138671875, 7.46044921875, 8.70703125, 9.95361328125, 11.2001953125, 12.44677734375, 13.693359375, 14.93994140625, 16.1865234375, 17.43310546875, 18.6796875, 19.92626953125, 21.1728515625, 22.41943359375, 23.666015625, 24.91259765625, 26.1591796875, 27.40576171875, 28.65234375, 29.89892578125, 31.1455078125, 32.39208984375, 33.638671875, 34.88525390625, 36.1318359375, 37.37841796875, 38.625]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 6.0, 7.0, 5.0, 8.0, 9.0, 14.0, 11.0, 18.0, 20.0, 26.0, 44.0, 53.0, 78.0, 142.0, 206.0, 286.0, 506.0, 783.0, 1494.0, 2898.0, 6436.0, 17677.0, 339433.0, 3621109.0, 177573.0, 14699.0, 5399.0, 2372.0, 1234.0, 670.0, 379.0, 241.0, 137.0, 97.0, 57.0, 42.0, 24.0, 25.0, 14.0, 19.0, 13.0, 4.0, 5.0, 9.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-158.375, -153.634765625, -148.89453125, -144.154296875, -139.4140625, -134.673828125, -129.93359375, -125.193359375, -120.453125, -115.712890625, -110.97265625, -106.232421875, -101.4921875, -96.751953125, -92.01171875, -87.271484375, -82.53125, -77.791015625, -73.05078125, -68.310546875, -63.5703125, -58.830078125, -54.08984375, -49.349609375, -44.609375, -39.869140625, -35.12890625, -30.388671875, -25.6484375, -20.908203125, -16.16796875, -11.427734375, -6.6875, -1.947265625, 2.79296875, 7.533203125, 12.2734375, 17.013671875, 21.75390625, 26.494140625, 31.234375, 35.974609375, 40.71484375, 45.455078125, 50.1953125, 54.935546875, 59.67578125, 64.416015625, 69.15625, 73.896484375, 78.63671875, 83.376953125, 88.1171875, 92.857421875, 97.59765625, 102.337890625, 107.078125, 111.818359375, 116.55859375, 121.298828125, 126.0390625, 130.779296875, 135.51953125, 140.259765625, 145.0]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [4.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 7.0, 1.0, 6.0, 7.0, 8.0, 9.0, 16.0, 8.0, 19.0, 23.0, 26.0, 34.0, 38.0, 76.0, 157.0, 311.0, 663.0, 941.0, 781.0, 384.0, 224.0, 99.0, 58.0, 36.0, 25.0, 20.0, 21.0, 16.0, 14.0, 9.0, 6.0, 6.0, 2.0, 4.0, 2.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-75.4375, -72.6083984375, -69.779296875, -66.9501953125, -64.12109375, -61.2919921875, -58.462890625, -55.6337890625, -52.8046875, -49.9755859375, -47.146484375, -44.3173828125, -41.48828125, -38.6591796875, -35.830078125, -33.0009765625, -30.171875, -27.3427734375, -24.513671875, -21.6845703125, -18.85546875, -16.0263671875, -13.197265625, -10.3681640625, -7.5390625, -4.7099609375, -1.880859375, 0.9482421875, 3.77734375, 6.6064453125, 9.435546875, 12.2646484375, 15.09375, 17.9228515625, 20.751953125, 23.5810546875, 26.41015625, 29.2392578125, 32.068359375, 34.8974609375, 37.7265625, 40.5556640625, 43.384765625, 46.2138671875, 49.04296875, 51.8720703125, 54.701171875, 57.5302734375, 60.359375, 63.1884765625, 66.017578125, 68.8466796875, 71.67578125, 74.5048828125, 77.333984375, 80.1630859375, 82.9921875, 85.8212890625, 88.650390625, 91.4794921875, 94.30859375, 97.1376953125, 99.966796875, 102.7958984375, 105.625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 5.0, 2.0, 2.0, 7.0, 17.0, 12.0, 17.0, 29.0, 59.0, 117.0, 255.0, 512.0, 1196.0, 3126.0, 9439.0, 37184.0, 887756.0, 3183914.0, 52289.0, 11761.0, 3899.0, 1491.0, 593.0, 265.0, 141.0, 66.0, 35.0, 21.0, 18.0, 15.0, 5.0, 4.0, 4.0, 6.0, 4.0, 3.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-283.75, -276.1015625, -268.453125, -260.8046875, -253.15625, -245.5078125, -237.859375, -230.2109375, -222.5625, -214.9140625, -207.265625, -199.6171875, -191.96875, -184.3203125, -176.671875, -169.0234375, -161.375, -153.7265625, -146.078125, -138.4296875, -130.78125, -123.1328125, -115.484375, -107.8359375, -100.1875, -92.5390625, -84.890625, -77.2421875, -69.59375, -61.9453125, -54.296875, -46.6484375, -39.0, -31.3515625, -23.703125, -16.0546875, -8.40625, -0.7578125, 6.890625, 14.5390625, 22.1875, 29.8359375, 37.484375, 45.1328125, 52.78125, 60.4296875, 68.078125, 75.7265625, 83.375, 91.0234375, 98.671875, 106.3203125, 113.96875, 121.6171875, 129.265625, 136.9140625, 144.5625, 152.2109375, 159.859375, 167.5078125, 175.15625, 182.8046875, 190.453125, 198.1015625, 205.75]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 6.0, 15.0, 13.0, 30.0, 62.0, 112.0, 219.0, 273.0, 145.0, 67.0, 26.0, 16.0, 8.0, 7.0, 3.0, 6.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-453.4952087402344, -429.783447265625, -406.07171630859375, -382.3599853515625, -358.6482238769531, -334.93646240234375, -311.2247314453125, -287.51300048828125, -263.8012390136719, -240.08949279785156, -216.37774658203125, -192.66600036621094, -168.95425415039062, -145.2425079345703, -121.53076171875, -97.81901550292969, -74.10726928710938, -50.39552307128906, -26.68377685546875, -2.9720306396484375, 20.739715576171875, 44.45146179199219, 68.1632080078125, 91.87495422363281, 115.58670043945312, 139.29844665527344, 163.01019287109375, 186.72193908691406, 210.43368530273438, 234.1454315185547, 257.857177734375, 281.56890869140625, 305.2806396484375, 328.99237060546875, 352.7041320800781, 376.4158935546875, 400.12762451171875, 423.83935546875, 447.5511169433594, 471.26287841796875, 494.974609375, 518.6863403320312, 542.3980712890625, 566.10986328125, 589.8215942382812, 613.5333251953125, 637.2451171875, 660.9568481445312, 684.6685791015625, 708.3803100585938, 732.092041015625, 755.8038330078125, 779.5155639648438, 803.227294921875, 826.9390869140625, 850.6508178710938, 874.362548828125, 898.0742797851562, 921.7860107421875, 945.497802734375, 969.2095336914062, 992.9212646484375, 1016.633056640625, 1040.3447265625, 1064.0565185546875]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 5.0, 5.0, 7.0, 3.0, 9.0, 4.0, 4.0, 4.0, 11.0, 17.0, 18.0, 22.0, 12.0, 27.0, 21.0, 16.0, 26.0, 37.0, 33.0, 34.0, 38.0, 42.0, 41.0, 33.0, 46.0, 25.0, 37.0, 37.0, 38.0, 35.0, 34.0, 33.0, 29.0, 24.0, 27.0, 25.0, 15.0, 15.0, 14.0, 21.0, 16.0, 10.0, 10.0, 5.0, 16.0, 3.0, 5.0, 7.0, 3.0, 6.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-201.67822265625, -194.50619506835938, -187.33416748046875, -180.16213989257812, -172.9901123046875, -165.81808471679688, -158.64605712890625, -151.47402954101562, -144.302001953125, -137.12997436523438, -129.95794677734375, -122.78591918945312, -115.6138916015625, -108.44186401367188, -101.26984405517578, -94.09781646728516, -86.92579650878906, -79.75376892089844, -72.58174133300781, -65.40971374511719, -58.23768997192383, -51.0656623840332, -43.893638610839844, -36.72161102294922, -29.549583435058594, -22.37755584716797, -15.205530166625977, -8.033504486083984, -0.8614768981933594, 6.310550689697266, 13.482574462890625, 20.65460205078125, 27.826629638671875, 34.9986572265625, 42.170684814453125, 49.342708587646484, 56.51473617553711, 63.686763763427734, 70.8587875366211, 78.03081512451172, 85.20284271240234, 92.37487030029297, 99.5468978881836, 106.71891784667969, 113.89094543457031, 121.06297302246094, 128.23500061035156, 135.4070281982422, 142.5790557861328, 149.75108337402344, 156.92311096191406, 164.0951385498047, 171.2671661376953, 178.43919372558594, 185.6112060546875, 192.78323364257812, 199.95526123046875, 207.12728881835938, 214.29931640625, 221.47134399414062, 228.64337158203125, 235.81539916992188, 242.9874267578125, 250.15945434570312, 257.33148193359375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 2.0, 2.0, 3.0, 6.0, 8.0, 8.0, 12.0, 7.0, 12.0, 20.0, 14.0, 17.0, 33.0, 17.0, 21.0, 32.0, 47.0, 34.0, 35.0, 35.0, 54.0, 53.0, 40.0, 40.0, 39.0, 39.0, 45.0, 33.0, 36.0, 42.0, 27.0, 20.0, 27.0, 20.0, 30.0, 19.0, 11.0, 19.0, 8.0, 8.0, 6.0, 8.0, 7.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.3125, -38.96875, -37.625, -36.28125, -34.9375, -33.59375, -32.25, -30.90625, -29.5625, -28.21875, -26.875, -25.53125, -24.1875, -22.84375, -21.5, -20.15625, -18.8125, -17.46875, -16.125, -14.78125, -13.4375, -12.09375, -10.75, -9.40625, -8.0625, -6.71875, -5.375, -4.03125, -2.6875, -1.34375, 0.0, 1.34375, 2.6875, 4.03125, 5.375, 6.71875, 8.0625, 9.40625, 10.75, 12.09375, 13.4375, 14.78125, 16.125, 17.46875, 18.8125, 20.15625, 21.5, 22.84375, 24.1875, 25.53125, 26.875, 28.21875, 29.5625, 30.90625, 32.25, 33.59375, 34.9375, 36.28125, 37.625, 38.96875, 40.3125, 41.65625, 43.0, 44.34375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 9.0, 7.0, 13.0, 18.0, 40.0, 56.0, 72.0, 100.0, 188.0, 254.0, 408.0, 600.0, 955.0, 1429.0, 2124.0, 3193.0, 4885.0, 7631.0, 11363.0, 18204.0, 28725.0, 46942.0, 81079.0, 152183.0, 263424.0, 184616.0, 95283.0, 54125.0, 32682.0, 20405.0, 12951.0, 8398.0, 5440.0, 3670.0, 2420.0, 1571.0, 1092.0, 679.0, 440.0, 317.0, 183.0, 133.0, 88.0, 55.0, 45.0, 25.0, 14.0, 9.0, 10.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-2.103515625, -2.0390625, -1.974609375, -1.91015625, -1.845703125, -1.78125, -1.716796875, -1.65234375, -1.587890625, -1.5234375, -1.458984375, -1.39453125, -1.330078125, -1.265625, -1.201171875, -1.13671875, -1.072265625, -1.0078125, -0.943359375, -0.87890625, -0.814453125, -0.75, -0.685546875, -0.62109375, -0.556640625, -0.4921875, -0.427734375, -0.36328125, -0.298828125, -0.234375, -0.169921875, -0.10546875, -0.041015625, 0.0234375, 0.087890625, 0.15234375, 0.216796875, 0.28125, 0.345703125, 0.41015625, 0.474609375, 0.5390625, 0.603515625, 0.66796875, 0.732421875, 0.796875, 0.861328125, 0.92578125, 0.990234375, 1.0546875, 1.119140625, 1.18359375, 1.248046875, 1.3125, 1.376953125, 1.44140625, 1.505859375, 1.5703125, 1.634765625, 1.69921875, 1.763671875, 1.828125, 1.892578125, 1.95703125, 2.021484375]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 6.0, 4.0, 7.0, 6.0, 18.0, 14.0, 17.0, 21.0, 21.0, 29.0, 24.0, 30.0, 27.0, 36.0, 35.0, 35.0, 38.0, 46.0, 35.0, 1079.0, 52.0, 48.0, 52.0, 45.0, 41.0, 33.0, 33.0, 30.0, 24.0, 32.0, 22.0, 19.0, 20.0, 15.0, 8.0, 9.0, 8.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-33.875, -32.9111328125, -31.947265625, -30.9833984375, -30.01953125, -29.0556640625, -28.091796875, -27.1279296875, -26.1640625, -25.2001953125, -24.236328125, -23.2724609375, -22.30859375, -21.3447265625, -20.380859375, -19.4169921875, -18.453125, -17.4892578125, -16.525390625, -15.5615234375, -14.59765625, -13.6337890625, -12.669921875, -11.7060546875, -10.7421875, -9.7783203125, -8.814453125, -7.8505859375, -6.88671875, -5.9228515625, -4.958984375, -3.9951171875, -3.03125, -2.0673828125, -1.103515625, -0.1396484375, 0.82421875, 1.7880859375, 2.751953125, 3.7158203125, 4.6796875, 5.6435546875, 6.607421875, 7.5712890625, 8.53515625, 9.4990234375, 10.462890625, 11.4267578125, 12.390625, 13.3544921875, 14.318359375, 15.2822265625, 16.24609375, 17.2099609375, 18.173828125, 19.1376953125, 20.1015625, 21.0654296875, 22.029296875, 22.9931640625, 23.95703125, 24.9208984375, 25.884765625, 26.8486328125, 27.8125]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 8.0, 9.0, 6.0, 16.0, 17.0, 25.0, 38.0, 45.0, 79.0, 118.0, 198.0, 272.0, 436.0, 705.0, 1176.0, 1915.0, 3347.0, 5663.0, 10158.0, 18011.0, 32771.0, 62186.0, 136582.0, 1378053.0, 245368.0, 94487.0, 46788.0, 25194.0, 14037.0, 8060.0, 4569.0, 2569.0, 1588.0, 981.0, 578.0, 373.0, 237.0, 148.0, 105.0, 72.0, 47.0, 33.0, 24.0, 12.0, 9.0, 12.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-2.853515625, -2.771484375, -2.689453125, -2.607421875, -2.525390625, -2.443359375, -2.361328125, -2.279296875, -2.197265625, -2.115234375, -2.033203125, -1.951171875, -1.869140625, -1.787109375, -1.705078125, -1.623046875, -1.541015625, -1.458984375, -1.376953125, -1.294921875, -1.212890625, -1.130859375, -1.048828125, -0.966796875, -0.884765625, -0.802734375, -0.720703125, -0.638671875, -0.556640625, -0.474609375, -0.392578125, -0.310546875, -0.228515625, -0.146484375, -0.064453125, 0.017578125, 0.099609375, 0.181640625, 0.263671875, 0.345703125, 0.427734375, 0.509765625, 0.591796875, 0.673828125, 0.755859375, 0.837890625, 0.919921875, 1.001953125, 1.083984375, 1.166015625, 1.248046875, 1.330078125, 1.412109375, 1.494140625, 1.576171875, 1.658203125, 1.740234375, 1.822265625, 1.904296875, 1.986328125, 2.068359375, 2.150390625, 2.232421875, 2.314453125, 2.396484375]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 4.0, 2.0, 4.0, 3.0, 4.0, 6.0, 12.0, 6.0, 16.0, 18.0, 9.0, 15.0, 19.0, 26.0, 35.0, 59.0, 78.0, 93.0, 101.0, 86.0, 87.0, 52.0, 53.0, 35.0, 17.0, 30.0, 13.0, 17.0, 14.0, 14.0, 7.0, 5.0, 6.0, 7.0, 5.0, 10.0, 7.0, 3.0, 2.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.0289154052734375, -0.02808070182800293, -0.02724599838256836, -0.02641129493713379, -0.02557659149169922, -0.02474188804626465, -0.023907184600830078, -0.023072481155395508, -0.022237777709960938, -0.021403074264526367, -0.020568370819091797, -0.019733667373657227, -0.018898963928222656, -0.018064260482788086, -0.017229557037353516, -0.016394853591918945, -0.015560150146484375, -0.014725446701049805, -0.013890743255615234, -0.013056039810180664, -0.012221336364746094, -0.011386632919311523, -0.010551929473876953, -0.009717226028442383, -0.008882522583007812, -0.008047819137573242, -0.007213115692138672, -0.0063784122467041016, -0.005543708801269531, -0.004709005355834961, -0.0038743019104003906, -0.0030395984649658203, -0.00220489501953125, -0.0013701915740966797, -0.0005354881286621094, 0.00029921531677246094, 0.0011339187622070312, 0.0019686222076416016, 0.002803325653076172, 0.003638029098510742, 0.0044727325439453125, 0.005307435989379883, 0.006142139434814453, 0.0069768428802490234, 0.007811546325683594, 0.008646249771118164, 0.009480953216552734, 0.010315656661987305, 0.011150360107421875, 0.011985063552856445, 0.012819766998291016, 0.013654470443725586, 0.014489173889160156, 0.015323877334594727, 0.016158580780029297, 0.016993284225463867, 0.017827987670898438, 0.018662691116333008, 0.019497394561767578, 0.02033209800720215, 0.02116680145263672, 0.02200150489807129, 0.02283620834350586, 0.02367091178894043, 0.024505615234375]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 4.0, 7.0, 10.0, 11.0, 7.0, 14.0, 9.0, 11.0, 22.0, 25.0, 15.0, 33.0, 41.0, 59.0, 70.0, 120.0, 253.0, 1030.0, 10481.0, 814777.0, 213919.0, 6295.0, 729.0, 220.0, 97.0, 59.0, 46.0, 28.0, 22.0, 23.0, 14.0, 17.0, 15.0, 12.0, 8.0, 12.0, 3.0, 5.0, 8.0, 6.0, 5.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.403076171875, -0.389129638671875, -0.37518310546875, -0.361236572265625, -0.3472900390625, -0.333343505859375, -0.31939697265625, -0.305450439453125, -0.29150390625, -0.277557373046875, -0.26361083984375, -0.249664306640625, -0.2357177734375, -0.221771240234375, -0.20782470703125, -0.193878173828125, -0.179931640625, -0.165985107421875, -0.15203857421875, -0.138092041015625, -0.1241455078125, -0.110198974609375, -0.09625244140625, -0.082305908203125, -0.068359375, -0.054412841796875, -0.04046630859375, -0.026519775390625, -0.0125732421875, 0.001373291015625, 0.01531982421875, 0.029266357421875, 0.043212890625, 0.057159423828125, 0.07110595703125, 0.085052490234375, 0.0989990234375, 0.112945556640625, 0.12689208984375, 0.140838623046875, 0.15478515625, 0.168731689453125, 0.18267822265625, 0.196624755859375, 0.2105712890625, 0.224517822265625, 0.23846435546875, 0.252410888671875, 0.266357421875, 0.280303955078125, 0.29425048828125, 0.308197021484375, 0.3221435546875, 0.336090087890625, 0.35003662109375, 0.363983154296875, 0.3779296875, 0.391876220703125, 0.40582275390625, 0.419769287109375, 0.4337158203125, 0.447662353515625, 0.46160888671875, 0.475555419921875, 0.489501953125]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 9.0, 473.0, 530.0, 5.0], "bins": [-0.5342034101486206, -0.5255879163742065, -0.5169724225997925, -0.5083569288253784, -0.49974146485328674, -0.4911259710788727, -0.482510507106781, -0.47389501333236694, -0.4652795195579529, -0.4566640257835388, -0.44804853200912476, -0.4394330680370331, -0.430817574262619, -0.42220208048820496, -0.4135866165161133, -0.4049711227416992, -0.39635562896728516, -0.3877401351928711, -0.37912464141845703, -0.37050917744636536, -0.3618936836719513, -0.35327818989753723, -0.34466272592544556, -0.3360472321510315, -0.32743173837661743, -0.31881624460220337, -0.3102007508277893, -0.30158528685569763, -0.29296979308128357, -0.2843542993068695, -0.27573883533477783, -0.26712334156036377, -0.2585078775882721, -0.24989238381385803, -0.24127690494060516, -0.2326614260673523, -0.22404593229293823, -0.21543043851852417, -0.2068149596452713, -0.19819948077201843, -0.18958397209644318, -0.1809684932231903, -0.17235299944877625, -0.16373750567436218, -0.15512202680110931, -0.14650654792785645, -0.13789105415344238, -0.12927556037902832, -0.12066008150577545, -0.11204459518194199, -0.10342910885810852, -0.09481362253427505, -0.08619813621044159, -0.07758264988660812, -0.06896716356277466, -0.06035167723894119, -0.05173619091510773, -0.04312070459127426, -0.034505218267440796, -0.02588973194360733, -0.017274245619773865, -0.0086587592959404, -4.3272972106933594e-05, 0.008572213351726532, 0.01718769781291485]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 12.0, 15.0, 13.0, 14.0, 33.0, 33.0, 49.0, 61.0, 65.0, 57.0, 63.0, 82.0, 55.0, 71.0, 69.0, 52.0, 47.0, 51.0, 38.0, 30.0, 23.0, 19.0, 14.0, 12.0, 7.0, 6.0, 5.0, 1.0, 6.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01769155263900757, -0.01692652888596058, -0.01616150513291359, -0.0153964813798666, -0.01463145762681961, -0.013866433873772621, -0.013101410120725632, -0.012336386367678642, -0.011571362614631653, -0.010806338861584663, -0.010041315108537674, -0.009276291355490685, -0.008511267602443695, -0.007746243849396706, -0.006981220096349716, -0.006216196343302727, -0.005451172590255737, -0.004686148837208748, -0.003921125084161758, -0.003156101331114769, -0.0023910775780677795, -0.00162605382502079, -0.0008610300719738007, -9.600631892681122e-05, 0.0006690174341201782, 0.0014340411871671677, 0.002199064940214157, 0.0029640886932611465, 0.003729112446308136, 0.004494136199355125, 0.005259159952402115, 0.006024183705449104, 0.006789207458496094, 0.007554231211543083, 0.008319254964590073, 0.009084278717637062, 0.009849302470684052, 0.010614326223731041, 0.01137934997677803, 0.01214437372982502, 0.01290939748287201, 0.013674421235918999, 0.014439444988965988, 0.015204468742012978, 0.015969492495059967, 0.016734516248106956, 0.017499540001153946, 0.018264563754200935, 0.019029587507247925, 0.019794611260294914, 0.020559635013341904, 0.021324658766388893, 0.022089682519435883, 0.022854706272482872, 0.02361973002552986, 0.02438475377857685, 0.02514977753162384, 0.02591480128467083, 0.02667982503771782, 0.02744484879076481, 0.028209872543811798, 0.028974896296858788, 0.029739920049905777, 0.030504943802952766, 0.031269967555999756]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 2.0, 2.0, 3.0, 6.0, 8.0, 8.0, 12.0, 7.0, 12.0, 20.0, 14.0, 17.0, 33.0, 17.0, 21.0, 32.0, 47.0, 34.0, 35.0, 35.0, 54.0, 53.0, 40.0, 40.0, 39.0, 38.0, 46.0, 33.0, 36.0, 42.0, 27.0, 20.0, 27.0, 20.0, 30.0, 19.0, 11.0, 19.0, 8.0, 8.0, 6.0, 8.0, 7.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.3125, -38.96875, -37.625, -36.28125, -34.9375, -33.59375, -32.25, -30.90625, -29.5625, -28.21875, -26.875, -25.53125, -24.1875, -22.84375, -21.5, -20.15625, -18.8125, -17.46875, -16.125, -14.78125, -13.4375, -12.09375, -10.75, -9.40625, -8.0625, -6.71875, -5.375, -4.03125, -2.6875, -1.34375, 0.0, 1.34375, 2.6875, 4.03125, 5.375, 6.71875, 8.0625, 9.40625, 10.75, 12.09375, 13.4375, 14.78125, 16.125, 17.46875, 18.8125, 20.15625, 21.5, 22.84375, 24.1875, 25.53125, 26.875, 28.21875, 29.5625, 30.90625, 32.25, 33.59375, 34.9375, 36.28125, 37.625, 38.96875, 40.3125, 41.65625, 43.0, 44.34375]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 4.0, 3.0, 4.0, 8.0, 6.0, 11.0, 18.0, 17.0, 23.0, 41.0, 43.0, 47.0, 85.0, 110.0, 151.0, 161.0, 263.0, 356.0, 509.0, 694.0, 958.0, 1473.0, 2065.0, 3256.0, 4960.0, 8238.0, 17341.0, 63448.0, 635039.0, 242740.0, 34467.0, 12441.0, 6603.0, 4164.0, 2676.0, 1808.0, 1257.0, 812.0, 647.0, 444.0, 311.0, 198.0, 157.0, 126.0, 102.0, 76.0, 51.0, 47.0, 34.0, 18.0, 9.0, 20.0, 8.0, 7.0, 6.0, 3.0, 2.0, 3.0, 3.0, 0.0, 1.0], "bins": [-42.40625, -41.07275390625, -39.7392578125, -38.40576171875, -37.072265625, -35.73876953125, -34.4052734375, -33.07177734375, -31.73828125, -30.40478515625, -29.0712890625, -27.73779296875, -26.404296875, -25.07080078125, -23.7373046875, -22.40380859375, -21.0703125, -19.73681640625, -18.4033203125, -17.06982421875, -15.736328125, -14.40283203125, -13.0693359375, -11.73583984375, -10.40234375, -9.06884765625, -7.7353515625, -6.40185546875, -5.068359375, -3.73486328125, -2.4013671875, -1.06787109375, 0.265625, 1.59912109375, 2.9326171875, 4.26611328125, 5.599609375, 6.93310546875, 8.2666015625, 9.60009765625, 10.93359375, 12.26708984375, 13.6005859375, 14.93408203125, 16.267578125, 17.60107421875, 18.9345703125, 20.26806640625, 21.6015625, 22.93505859375, 24.2685546875, 25.60205078125, 26.935546875, 28.26904296875, 29.6025390625, 30.93603515625, 32.26953125, 33.60302734375, 34.9365234375, 36.27001953125, 37.603515625, 38.93701171875, 40.2705078125, 41.60400390625, 42.9375]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 6.0, 7.0, 7.0, 7.0, 10.0, 11.0, 21.0, 22.0, 35.0, 29.0, 34.0, 43.0, 29.0, 41.0, 45.0, 52.0, 60.0, 88.0, 1733.0, 252.0, 78.0, 58.0, 45.0, 53.0, 54.0, 33.0, 29.0, 34.0, 23.0, 12.0, 14.0, 12.0, 24.0, 7.0, 5.0, 7.0, 4.0, 4.0, 6.0, 6.0, 3.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-138.75, -134.525390625, -130.30078125, -126.076171875, -121.8515625, -117.626953125, -113.40234375, -109.177734375, -104.953125, -100.728515625, -96.50390625, -92.279296875, -88.0546875, -83.830078125, -79.60546875, -75.380859375, -71.15625, -66.931640625, -62.70703125, -58.482421875, -54.2578125, -50.033203125, -45.80859375, -41.583984375, -37.359375, -33.134765625, -28.91015625, -24.685546875, -20.4609375, -16.236328125, -12.01171875, -7.787109375, -3.5625, 0.662109375, 4.88671875, 9.111328125, 13.3359375, 17.560546875, 21.78515625, 26.009765625, 30.234375, 34.458984375, 38.68359375, 42.908203125, 47.1328125, 51.357421875, 55.58203125, 59.806640625, 64.03125, 68.255859375, 72.48046875, 76.705078125, 80.9296875, 85.154296875, 89.37890625, 93.603515625, 97.828125, 102.052734375, 106.27734375, 110.501953125, 114.7265625, 118.951171875, 123.17578125, 127.400390625, 131.625]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 3.0, 13.0, 10.0, 21.0, 29.0, 32.0, 70.0, 92.0, 151.0, 360.0, 1429.0, 19687.0, 3112163.0, 9813.0, 1063.0, 325.0, 171.0, 114.0, 54.0, 32.0, 25.0, 15.0, 8.0, 5.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-570.0, -550.6171875, -531.234375, -511.8515625, -492.46875, -473.0859375, -453.703125, -434.3203125, -414.9375, -395.5546875, -376.171875, -356.7890625, -337.40625, -318.0234375, -298.640625, -279.2578125, -259.875, -240.4921875, -221.109375, -201.7265625, -182.34375, -162.9609375, -143.578125, -124.1953125, -104.8125, -85.4296875, -66.046875, -46.6640625, -27.28125, -7.8984375, 11.484375, 30.8671875, 50.25, 69.6328125, 89.015625, 108.3984375, 127.78125, 147.1640625, 166.546875, 185.9296875, 205.3125, 224.6953125, 244.078125, 263.4609375, 282.84375, 302.2265625, 321.609375, 340.9921875, 360.375, 379.7578125, 399.140625, 418.5234375, 437.90625, 457.2890625, 476.671875, 496.0546875, 515.4375, 534.8203125, 554.203125, 573.5859375, 592.96875, 612.3515625, 631.734375, 651.1171875, 670.5]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 563.0, 448.0, 3.0], "bins": [-5059.2060546875, -4977.53857421875, -4895.87109375, -4814.20361328125, -4732.53564453125, -4650.8681640625, -4569.20068359375, -4487.533203125, -4405.86572265625, -4324.1982421875, -4242.53076171875, -4160.86328125, -4079.195556640625, -3997.528076171875, -3915.8603515625, -3834.19287109375, -3752.525390625, -3670.85791015625, -3589.190185546875, -3507.522705078125, -3425.855224609375, -3344.187744140625, -3262.52001953125, -3180.8525390625, -3099.184814453125, -3017.517333984375, -2935.849609375, -2854.18212890625, -2772.5146484375, -2690.84716796875, -2609.179443359375, -2527.511962890625, -2445.844482421875, -2364.177001953125, -2282.50927734375, -2200.841796875, -2119.17431640625, -2037.5067138671875, -1955.839111328125, -1874.171630859375, -1792.504150390625, -1710.8365478515625, -1629.1690673828125, -1547.50146484375, -1465.833984375, -1384.1663818359375, -1302.498779296875, -1220.831298828125, -1139.1636962890625, -1057.49609375, -975.82861328125, -894.1610107421875, -812.4935302734375, -730.825927734375, -649.1583862304688, -567.4908447265625, -485.8233642578125, -404.15582275390625, -322.48828125, -240.82070922851562, -159.15316772460938, -77.48562622070312, 4.18194580078125, 85.8494873046875, 167.5170440673828]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 5.0, 6.0, 5.0, 14.0, 10.0, 13.0, 10.0, 16.0, 25.0, 18.0, 28.0, 25.0, 26.0, 27.0, 29.0, 26.0, 39.0, 43.0, 45.0, 42.0, 46.0, 42.0, 47.0, 37.0, 38.0, 36.0, 39.0, 38.0, 25.0, 24.0, 29.0, 17.0, 22.0, 20.0, 15.0, 14.0, 13.0, 9.0, 9.0, 4.0, 5.0, 7.0, 2.0, 3.0, 2.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-334.1227111816406, -323.0453796386719, -311.96807861328125, -300.8907470703125, -289.81341552734375, -278.7361145019531, -267.6587829589844, -256.58148193359375, -245.504150390625, -234.4268341064453, -223.34951782226562, -212.27218627929688, -201.1948699951172, -190.1175537109375, -179.04022216796875, -167.96290588378906, -156.88558959960938, -145.8082733154297, -134.73095703125, -123.65362548828125, -112.57630920410156, -101.49899291992188, -90.42166900634766, -79.34434509277344, -68.26702880859375, -57.1897087097168, -46.112388610839844, -35.03506851196289, -23.957748413085938, -12.880428314208984, -1.8031082153320312, 9.274215698242188, 20.351531982421875, 31.428852081298828, 42.50617218017578, 53.583492279052734, 64.66081237792969, 75.73812866210938, 86.8154525756836, 97.89277648925781, 108.9700927734375, 120.04740905761719, 131.12472534179688, 142.20205688476562, 153.2793731689453, 164.356689453125, 175.43402099609375, 186.51133728027344, 197.58865356445312, 208.6659698486328, 219.7432861328125, 230.82061767578125, 241.89793395996094, 252.97525024414062, 264.0525817871094, 275.1298828125, 286.20721435546875, 297.2845458984375, 308.3618469238281, 319.4391784667969, 330.5164794921875, 341.59381103515625, 352.671142578125, 363.74847412109375, 374.8257751464844]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 4.0, 0.0, 2.0, 4.0, 6.0, 4.0, 6.0, 7.0, 9.0, 9.0, 12.0, 21.0, 9.0, 15.0, 24.0, 23.0, 23.0, 26.0, 25.0, 39.0, 40.0, 30.0, 45.0, 48.0, 38.0, 36.0, 47.0, 46.0, 38.0, 42.0, 36.0, 36.0, 34.0, 28.0, 16.0, 21.0, 22.0, 29.0, 25.0, 14.0, 16.0, 12.0, 13.0, 7.0, 8.0, 5.0, 4.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-40.625, -39.2724609375, -37.919921875, -36.5673828125, -35.21484375, -33.8623046875, -32.509765625, -31.1572265625, -29.8046875, -28.4521484375, -27.099609375, -25.7470703125, -24.39453125, -23.0419921875, -21.689453125, -20.3369140625, -18.984375, -17.6318359375, -16.279296875, -14.9267578125, -13.57421875, -12.2216796875, -10.869140625, -9.5166015625, -8.1640625, -6.8115234375, -5.458984375, -4.1064453125, -2.75390625, -1.4013671875, -0.048828125, 1.3037109375, 2.65625, 4.0087890625, 5.361328125, 6.7138671875, 8.06640625, 9.4189453125, 10.771484375, 12.1240234375, 13.4765625, 14.8291015625, 16.181640625, 17.5341796875, 18.88671875, 20.2392578125, 21.591796875, 22.9443359375, 24.296875, 25.6494140625, 27.001953125, 28.3544921875, 29.70703125, 31.0595703125, 32.412109375, 33.7646484375, 35.1171875, 36.4697265625, 37.822265625, 39.1748046875, 40.52734375, 41.8798828125, 43.232421875, 44.5849609375, 45.9375]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 6.0, 4.0, 11.0, 12.0, 22.0, 21.0, 38.0, 47.0, 68.0, 100.0, 118.0, 171.0, 279.0, 375.0, 560.0, 832.0, 1307.0, 2037.0, 3378.0, 6079.0, 12759.0, 59267.0, 1257652.0, 2670305.0, 144373.0, 16710.0, 7315.0, 3841.0, 2306.0, 1455.0, 927.0, 592.0, 389.0, 274.0, 213.0, 117.0, 95.0, 65.0, 42.0, 33.0, 34.0, 15.0, 15.0, 5.0, 9.0, 2.0, 4.0, 7.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-106.125, -102.6044921875, -99.083984375, -95.5634765625, -92.04296875, -88.5224609375, -85.001953125, -81.4814453125, -77.9609375, -74.4404296875, -70.919921875, -67.3994140625, -63.87890625, -60.3583984375, -56.837890625, -53.3173828125, -49.796875, -46.2763671875, -42.755859375, -39.2353515625, -35.71484375, -32.1943359375, -28.673828125, -25.1533203125, -21.6328125, -18.1123046875, -14.591796875, -11.0712890625, -7.55078125, -4.0302734375, -0.509765625, 3.0107421875, 6.53125, 10.0517578125, 13.572265625, 17.0927734375, 20.61328125, 24.1337890625, 27.654296875, 31.1748046875, 34.6953125, 38.2158203125, 41.736328125, 45.2568359375, 48.77734375, 52.2978515625, 55.818359375, 59.3388671875, 62.859375, 66.3798828125, 69.900390625, 73.4208984375, 76.94140625, 80.4619140625, 83.982421875, 87.5029296875, 91.0234375, 94.5439453125, 98.064453125, 101.5849609375, 105.10546875, 108.6259765625, 112.146484375, 115.6669921875, 119.1875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 3.0, 9.0, 6.0, 7.0, 9.0, 6.0, 7.0, 22.0, 33.0, 27.0, 42.0, 62.0, 92.0, 226.0, 637.0, 1186.0, 955.0, 379.0, 177.0, 64.0, 51.0, 26.0, 18.0, 5.0, 9.0, 6.0, 2.0, 1.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-155.5, -151.681640625, -147.86328125, -144.044921875, -140.2265625, -136.408203125, -132.58984375, -128.771484375, -124.953125, -121.134765625, -117.31640625, -113.498046875, -109.6796875, -105.861328125, -102.04296875, -98.224609375, -94.40625, -90.587890625, -86.76953125, -82.951171875, -79.1328125, -75.314453125, -71.49609375, -67.677734375, -63.859375, -60.041015625, -56.22265625, -52.404296875, -48.5859375, -44.767578125, -40.94921875, -37.130859375, -33.3125, -29.494140625, -25.67578125, -21.857421875, -18.0390625, -14.220703125, -10.40234375, -6.583984375, -2.765625, 1.052734375, 4.87109375, 8.689453125, 12.5078125, 16.326171875, 20.14453125, 23.962890625, 27.78125, 31.599609375, 35.41796875, 39.236328125, 43.0546875, 46.873046875, 50.69140625, 54.509765625, 58.328125, 62.146484375, 65.96484375, 69.783203125, 73.6015625, 77.419921875, 81.23828125, 85.056640625, 88.875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 7.0, 7.0, 12.0, 27.0, 38.0, 74.0, 124.0, 292.0, 625.0, 1576.0, 4726.0, 20114.0, 301017.0, 3803061.0, 49352.0, 8686.0, 2641.0, 974.0, 401.0, 211.0, 99.0, 78.0, 42.0, 30.0, 16.0, 12.0, 11.0, 11.0, 9.0, 2.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-231.125, -221.587890625, -212.05078125, -202.513671875, -192.9765625, -183.439453125, -173.90234375, -164.365234375, -154.828125, -145.291015625, -135.75390625, -126.216796875, -116.6796875, -107.142578125, -97.60546875, -88.068359375, -78.53125, -68.994140625, -59.45703125, -49.919921875, -40.3828125, -30.845703125, -21.30859375, -11.771484375, -2.234375, 7.302734375, 16.83984375, 26.376953125, 35.9140625, 45.451171875, 54.98828125, 64.525390625, 74.0625, 83.599609375, 93.13671875, 102.673828125, 112.2109375, 121.748046875, 131.28515625, 140.822265625, 150.359375, 159.896484375, 169.43359375, 178.970703125, 188.5078125, 198.044921875, 207.58203125, 217.119140625, 226.65625, 236.193359375, 245.73046875, 255.267578125, 264.8046875, 274.341796875, 283.87890625, 293.416015625, 302.953125, 312.490234375, 322.02734375, 331.564453125, 341.1015625, 350.638671875, 360.17578125, 369.712890625, 379.25]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 6.0, 13.0, 30.0, 49.0, 129.0, 281.0, 311.0, 127.0, 35.0, 18.0, 7.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-622.1337890625, -587.7919311523438, -553.4500732421875, -519.1082763671875, -484.76641845703125, -450.424560546875, -416.0827331542969, -381.74090576171875, -347.3990478515625, -313.05718994140625, -278.7153625488281, -244.37351989746094, -210.03167724609375, -175.68983459472656, -141.34799194335938, -107.00614929199219, -72.664306640625, -38.32246398925781, -3.980621337890625, 30.361221313476562, 64.70306396484375, 99.04490661621094, 133.38674926757812, 167.7285919189453, 202.0704345703125, 236.4122772216797, 270.7541198730469, 305.095947265625, 339.43780517578125, 373.7796630859375, 408.1214904785156, 442.46331787109375, 476.80517578125, 511.14703369140625, 545.4888916015625, 579.8306884765625, 614.1725463867188, 648.514404296875, 682.856201171875, 717.1980590820312, 751.5399169921875, 785.8817749023438, 820.2236328125, 854.5654296875, 888.9072875976562, 923.2491455078125, 957.5909423828125, 991.9328002929688, 1026.274658203125, 1060.616455078125, 1094.9583740234375, 1129.3001708984375, 1163.64208984375, 1197.98388671875, 1232.32568359375, 1266.6676025390625, 1301.0093994140625, 1335.3511962890625, 1369.693115234375, 1404.034912109375, 1438.376708984375, 1472.7186279296875, 1507.0604248046875, 1541.40234375, 1575.744140625]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 5.0, 5.0, 9.0, 4.0, 9.0, 10.0, 7.0, 11.0, 17.0, 15.0, 20.0, 17.0, 14.0, 26.0, 28.0, 23.0, 31.0, 28.0, 44.0, 40.0, 44.0, 31.0, 32.0, 45.0, 48.0, 40.0, 47.0, 37.0, 43.0, 38.0, 26.0, 29.0, 33.0, 22.0, 23.0, 22.0, 15.0, 18.0, 14.0, 14.0, 6.0, 3.0, 6.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-277.356201171875, -268.98248291015625, -260.6087951660156, -252.23507690429688, -243.8613739013672, -235.4876708984375, -227.11395263671875, -218.74024963378906, -210.36654663085938, -201.9928436279297, -193.619140625, -185.24542236328125, -176.87171936035156, -168.49801635742188, -160.12429809570312, -151.75059509277344, -143.37689208984375, -135.00318908691406, -126.62947845458984, -118.25576782226562, -109.88206481933594, -101.50836181640625, -93.13465118408203, -84.76094055175781, -76.38723754882812, -68.01353454589844, -59.63982391357422, -51.266117095947266, -42.89241027832031, -34.51870346069336, -26.144996643066406, -17.771289825439453, -9.397552490234375, -1.0238456726074219, 7.349861145019531, 15.723567962646484, 24.097274780273438, 32.47098159790039, 40.844688415527344, 49.2183952331543, 57.59210205078125, 65.96580505371094, 74.33951568603516, 82.71322631835938, 91.08692932128906, 99.46063232421875, 107.83434295654297, 116.20805358886719, 124.58175659179688, 132.95545959472656, 141.32916259765625, 149.702880859375, 158.0765838623047, 166.45028686523438, 174.82400512695312, 183.1977081298828, 191.5714111328125, 199.9451141357422, 208.31881713867188, 216.69253540039062, 225.0662384033203, 233.43994140625, 241.81365966796875, 250.18736267089844, 258.5610656738281]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 2.0, 3.0, 4.0, 3.0, 8.0, 8.0, 9.0, 12.0, 11.0, 16.0, 18.0, 19.0, 8.0, 24.0, 25.0, 37.0, 37.0, 33.0, 39.0, 41.0, 38.0, 48.0, 45.0, 38.0, 30.0, 45.0, 42.0, 35.0, 38.0, 41.0, 27.0, 23.0, 31.0, 25.0, 18.0, 14.0, 20.0, 15.0, 15.0, 10.0, 11.0, 11.0, 7.0, 6.0, 4.0, 3.0, 5.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0], "bins": [-45.96875, -44.64111328125, -43.3134765625, -41.98583984375, -40.658203125, -39.33056640625, -38.0029296875, -36.67529296875, -35.34765625, -34.02001953125, -32.6923828125, -31.36474609375, -30.037109375, -28.70947265625, -27.3818359375, -26.05419921875, -24.7265625, -23.39892578125, -22.0712890625, -20.74365234375, -19.416015625, -18.08837890625, -16.7607421875, -15.43310546875, -14.10546875, -12.77783203125, -11.4501953125, -10.12255859375, -8.794921875, -7.46728515625, -6.1396484375, -4.81201171875, -3.484375, -2.15673828125, -0.8291015625, 0.49853515625, 1.826171875, 3.15380859375, 4.4814453125, 5.80908203125, 7.13671875, 8.46435546875, 9.7919921875, 11.11962890625, 12.447265625, 13.77490234375, 15.1025390625, 16.43017578125, 17.7578125, 19.08544921875, 20.4130859375, 21.74072265625, 23.068359375, 24.39599609375, 25.7236328125, 27.05126953125, 28.37890625, 29.70654296875, 31.0341796875, 32.36181640625, 33.689453125, 35.01708984375, 36.3447265625, 37.67236328125, 39.0]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 10.0, 6.0, 14.0, 14.0, 28.0, 49.0, 69.0, 112.0, 137.0, 220.0, 350.0, 507.0, 762.0, 1089.0, 1652.0, 2457.0, 3539.0, 5072.0, 7427.0, 10698.0, 16244.0, 24136.0, 38476.0, 63145.0, 112598.0, 216212.0, 232621.0, 122840.0, 68075.0, 41154.0, 26008.0, 16933.0, 11378.0, 7555.0, 5336.0, 3722.0, 2546.0, 1687.0, 1188.0, 815.0, 586.0, 340.0, 248.0, 164.0, 115.0, 80.0, 60.0, 33.0, 23.0, 14.0, 8.0, 4.0, 3.0, 2.0, 3.0, 1.0], "bins": [-2.1796875, -2.1157073974609375, -2.051727294921875, -1.9877471923828125, -1.92376708984375, -1.8597869873046875, -1.795806884765625, -1.7318267822265625, -1.6678466796875, -1.6038665771484375, -1.539886474609375, -1.4759063720703125, -1.41192626953125, -1.3479461669921875, -1.283966064453125, -1.2199859619140625, -1.156005859375, -1.0920257568359375, -1.028045654296875, -0.9640655517578125, -0.90008544921875, -0.8361053466796875, -0.772125244140625, -0.7081451416015625, -0.6441650390625, -0.5801849365234375, -0.516204833984375, -0.4522247314453125, -0.38824462890625, -0.3242645263671875, -0.260284423828125, -0.1963043212890625, -0.13232421875, -0.0683441162109375, -0.004364013671875, 0.0596160888671875, 0.12359619140625, 0.1875762939453125, 0.251556396484375, 0.3155364990234375, 0.3795166015625, 0.4434967041015625, 0.507476806640625, 0.5714569091796875, 0.63543701171875, 0.6994171142578125, 0.763397216796875, 0.8273773193359375, 0.891357421875, 0.9553375244140625, 1.019317626953125, 1.0832977294921875, 1.14727783203125, 1.2112579345703125, 1.275238037109375, 1.3392181396484375, 1.4031982421875, 1.4671783447265625, 1.531158447265625, 1.5951385498046875, 1.65911865234375, 1.7230987548828125, 1.787078857421875, 1.8510589599609375, 1.9150390625]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 6.0, 7.0, 6.0, 3.0, 5.0, 11.0, 15.0, 11.0, 21.0, 24.0, 14.0, 16.0, 21.0, 30.0, 19.0, 34.0, 25.0, 29.0, 41.0, 36.0, 33.0, 40.0, 31.0, 1069.0, 46.0, 32.0, 35.0, 35.0, 27.0, 40.0, 36.0, 24.0, 31.0, 33.0, 19.0, 21.0, 13.0, 23.0, 11.0, 9.0, 8.0, 9.0, 10.0, 6.0, 8.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-25.671875, -24.822509765625, -23.97314453125, -23.123779296875, -22.2744140625, -21.425048828125, -20.57568359375, -19.726318359375, -18.876953125, -18.027587890625, -17.17822265625, -16.328857421875, -15.4794921875, -14.630126953125, -13.78076171875, -12.931396484375, -12.08203125, -11.232666015625, -10.38330078125, -9.533935546875, -8.6845703125, -7.835205078125, -6.98583984375, -6.136474609375, -5.287109375, -4.437744140625, -3.58837890625, -2.739013671875, -1.8896484375, -1.040283203125, -0.19091796875, 0.658447265625, 1.5078125, 2.357177734375, 3.20654296875, 4.055908203125, 4.9052734375, 5.754638671875, 6.60400390625, 7.453369140625, 8.302734375, 9.152099609375, 10.00146484375, 10.850830078125, 11.7001953125, 12.549560546875, 13.39892578125, 14.248291015625, 15.09765625, 15.947021484375, 16.79638671875, 17.645751953125, 18.4951171875, 19.344482421875, 20.19384765625, 21.043212890625, 21.892578125, 22.741943359375, 23.59130859375, 24.440673828125, 25.2900390625, 26.139404296875, 26.98876953125, 27.838134765625, 28.6875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 11.0, 2.0, 10.0, 16.0, 26.0, 23.0, 32.0, 53.0, 100.0, 133.0, 206.0, 294.0, 427.0, 648.0, 979.0, 1480.0, 2390.0, 3669.0, 5748.0, 9197.0, 14738.0, 24132.0, 40065.0, 70070.0, 138718.0, 1329843.0, 223042.0, 96167.0, 52839.0, 30945.0, 18909.0, 11794.0, 7408.0, 4559.0, 2960.0, 1882.0, 1268.0, 761.0, 549.0, 335.0, 244.0, 154.0, 95.0, 70.0, 48.0, 37.0, 20.0, 18.0, 11.0, 6.0, 4.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.083984375, -2.013946533203125, -1.94390869140625, -1.873870849609375, -1.8038330078125, -1.733795166015625, -1.66375732421875, -1.593719482421875, -1.523681640625, -1.453643798828125, -1.38360595703125, -1.313568115234375, -1.2435302734375, -1.173492431640625, -1.10345458984375, -1.033416748046875, -0.96337890625, -0.893341064453125, -0.82330322265625, -0.753265380859375, -0.6832275390625, -0.613189697265625, -0.54315185546875, -0.473114013671875, -0.403076171875, -0.333038330078125, -0.26300048828125, -0.192962646484375, -0.1229248046875, -0.052886962890625, 0.01715087890625, 0.087188720703125, 0.1572265625, 0.227264404296875, 0.29730224609375, 0.367340087890625, 0.4373779296875, 0.507415771484375, 0.57745361328125, 0.647491455078125, 0.717529296875, 0.787567138671875, 0.85760498046875, 0.927642822265625, 0.9976806640625, 1.067718505859375, 1.13775634765625, 1.207794189453125, 1.27783203125, 1.347869873046875, 1.41790771484375, 1.487945556640625, 1.5579833984375, 1.628021240234375, 1.69805908203125, 1.768096923828125, 1.838134765625, 1.908172607421875, 1.97821044921875, 2.048248291015625, 2.1182861328125, 2.188323974609375, 2.25836181640625, 2.328399658203125, 2.3984375]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 5.0, 4.0, 3.0, 1.0, 5.0, 8.0, 10.0, 16.0, 21.0, 19.0, 26.0, 28.0, 36.0, 34.0, 68.0, 74.0, 66.0, 93.0, 92.0, 66.0, 66.0, 57.0, 47.0, 22.0, 16.0, 27.0, 17.0, 14.0, 21.0, 9.0, 8.0, 8.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.036590576171875, -0.03556513786315918, -0.03453969955444336, -0.03351426124572754, -0.03248882293701172, -0.0314633846282959, -0.030437946319580078, -0.029412508010864258, -0.028387069702148438, -0.027361631393432617, -0.026336193084716797, -0.025310754776000977, -0.024285316467285156, -0.023259878158569336, -0.022234439849853516, -0.021209001541137695, -0.020183563232421875, -0.019158124923706055, -0.018132686614990234, -0.017107248306274414, -0.016081809997558594, -0.015056371688842773, -0.014030933380126953, -0.013005495071411133, -0.011980056762695312, -0.010954618453979492, -0.009929180145263672, -0.008903741836547852, -0.007878303527832031, -0.006852865219116211, -0.005827426910400391, -0.00480198860168457, -0.00377655029296875, -0.0027511119842529297, -0.0017256736755371094, -0.0007002353668212891, 0.00032520294189453125, 0.0013506412506103516, 0.002376079559326172, 0.003401517868041992, 0.0044269561767578125, 0.005452394485473633, 0.006477832794189453, 0.0075032711029052734, 0.008528709411621094, 0.009554147720336914, 0.010579586029052734, 0.011605024337768555, 0.012630462646484375, 0.013655900955200195, 0.014681339263916016, 0.015706777572631836, 0.016732215881347656, 0.017757654190063477, 0.018783092498779297, 0.019808530807495117, 0.020833969116210938, 0.021859407424926758, 0.022884845733642578, 0.0239102840423584, 0.02493572235107422, 0.02596116065979004, 0.02698659896850586, 0.02801203727722168, 0.0290374755859375]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 11.0, 8.0, 16.0, 24.0, 20.0, 35.0, 42.0, 44.0, 70.0, 83.0, 162.0, 464.0, 2585.0, 45499.0, 971089.0, 25667.0, 1822.0, 385.0, 152.0, 91.0, 60.0, 49.0, 35.0, 27.0, 25.0, 18.0, 16.0, 9.0, 6.0, 3.0, 3.0, 5.0, 4.0, 5.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.480224609375, -0.4633293151855469, -0.44643402099609375, -0.4295387268066406, -0.4126434326171875, -0.3957481384277344, -0.37885284423828125, -0.3619575500488281, -0.345062255859375, -0.3281669616699219, -0.31127166748046875, -0.2943763732910156, -0.2774810791015625, -0.2605857849121094, -0.24369049072265625, -0.22679519653320312, -0.20989990234375, -0.19300460815429688, -0.17610931396484375, -0.15921401977539062, -0.1423187255859375, -0.12542343139648438, -0.10852813720703125, -0.09163284301757812, -0.074737548828125, -0.057842254638671875, -0.04094696044921875, -0.024051666259765625, -0.0071563720703125, 0.009738922119140625, 0.02663421630859375, 0.043529510498046875, 0.0604248046875, 0.07732009887695312, 0.09421539306640625, 0.11111068725585938, 0.1280059814453125, 0.14490127563476562, 0.16179656982421875, 0.17869186401367188, 0.195587158203125, 0.21248245239257812, 0.22937774658203125, 0.24627304077148438, 0.2631683349609375, 0.2800636291503906, 0.29695892333984375, 0.3138542175292969, 0.33074951171875, 0.3476448059082031, 0.36454010009765625, 0.3814353942871094, 0.3983306884765625, 0.4152259826660156, 0.43212127685546875, 0.4490165710449219, 0.465911865234375, 0.4828071594238281, 0.49970245361328125, 0.5165977478027344, 0.5334930419921875, 0.5503883361816406, 0.5672836303710938, 0.5841789245605469, 0.60107421875]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 21.0, 104.0, 574.0, 261.0, 40.0, 11.0, 4.0], "bins": [-0.23129884898662567, -0.22742971777915955, -0.22356057167053223, -0.2196914404630661, -0.21582230925559998, -0.21195316314697266, -0.20808403193950653, -0.2042149007320404, -0.20034576952457428, -0.19647663831710815, -0.19260749220848083, -0.1887383610010147, -0.18486922979354858, -0.18100008368492126, -0.17713095247745514, -0.173261821269989, -0.1693926751613617, -0.16552354395389557, -0.16165439784526825, -0.15778526663780212, -0.153916135430336, -0.15004700422286987, -0.14617785811424255, -0.14230872690677643, -0.1384395956993103, -0.13457046449184418, -0.13070131838321686, -0.12683218717575073, -0.1229630559682846, -0.11909391731023788, -0.11522477865219116, -0.11135564744472504, -0.10748650878667831, -0.10361737012863159, -0.09974823892116547, -0.09587910026311874, -0.09200996905565262, -0.0881408303976059, -0.08427169919013977, -0.08040256053209305, -0.07653342187404633, -0.0726642832159996, -0.06879515200853348, -0.06492601335048676, -0.06105687841773033, -0.05718774348497391, -0.053318608552217484, -0.04944947361946106, -0.045580342411994934, -0.04171120747923851, -0.037842072546482086, -0.033972933888435364, -0.03010379895567894, -0.026234664022922516, -0.022365529090166092, -0.01849639229476452, -0.014627255499362946, -0.010758119635283947, -0.006888984236866236, -0.0030198488384485245, 0.0008492870256304741, 0.004718422889709473, 0.008587557822465897, 0.01245669461786747, 0.016325829550623894]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 5.0, 5.0, 8.0, 6.0, 13.0, 24.0, 18.0, 25.0, 21.0, 31.0, 30.0, 31.0, 37.0, 31.0, 47.0, 60.0, 53.0, 45.0, 47.0, 33.0, 64.0, 38.0, 53.0, 39.0, 31.0, 30.0, 25.0, 32.0, 21.0, 17.0, 24.0, 15.0, 16.0, 5.0, 7.0, 11.0, 7.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.017859458923339844, -0.017259661108255386, -0.016659865155816078, -0.01606006920337677, -0.015460271388292313, -0.01486047450453043, -0.014260677620768547, -0.013660880737006664, -0.013061083853244781, -0.012461286969482899, -0.011861490085721016, -0.011261693201959133, -0.01066189631819725, -0.010062099434435368, -0.009462302550673485, -0.008862505666911602, -0.00826270878314972, -0.0076629118993878365, -0.007063115015625954, -0.006463318131864071, -0.005863521248102188, -0.005263724364340305, -0.0046639274805784225, -0.00406413059681654, -0.003464333713054657, -0.002864536829292774, -0.0022647399455308914, -0.0016649430617690086, -0.0010651461780071259, -0.00046534929424524307, 0.0001344475895166397, 0.0007342444732785225, 0.0013340413570404053, 0.001933838240802288, 0.002533635124564171, 0.0031334320083260536, 0.0037332288920879364, 0.004333025775849819, 0.004932822659611702, 0.005532619543373585, 0.0061324164271354675, 0.00673221331089735, 0.007332010194659233, 0.007931807078421116, 0.008531603962182999, 0.009131400845944881, 0.009731197729706764, 0.010330994613468647, 0.01093079149723053, 0.011530588380992413, 0.012130385264754295, 0.012730182148516178, 0.013329979032278061, 0.013929775916039944, 0.014529572799801826, 0.01512936968356371, 0.015729166567325592, 0.01632896438241005, 0.016928760334849358, 0.017528556287288666, 0.018128354102373123, 0.01872815191745758, 0.01932794786989689, 0.019927743822336197, 0.020527541637420654]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 2.0, 3.0, 4.0, 3.0, 8.0, 8.0, 9.0, 12.0, 11.0, 16.0, 18.0, 19.0, 8.0, 24.0, 25.0, 37.0, 37.0, 33.0, 39.0, 41.0, 38.0, 48.0, 45.0, 38.0, 30.0, 45.0, 42.0, 35.0, 38.0, 41.0, 27.0, 23.0, 31.0, 25.0, 18.0, 14.0, 20.0, 15.0, 15.0, 10.0, 11.0, 11.0, 7.0, 6.0, 4.0, 3.0, 5.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0], "bins": [-45.96875, -44.64111328125, -43.3134765625, -41.98583984375, -40.658203125, -39.33056640625, -38.0029296875, -36.67529296875, -35.34765625, -34.02001953125, -32.6923828125, -31.36474609375, -30.037109375, -28.70947265625, -27.3818359375, -26.05419921875, -24.7265625, -23.39892578125, -22.0712890625, -20.74365234375, -19.416015625, -18.08837890625, -16.7607421875, -15.43310546875, -14.10546875, -12.77783203125, -11.4501953125, -10.12255859375, -8.794921875, -7.46728515625, -6.1396484375, -4.81201171875, -3.484375, -2.15673828125, -0.8291015625, 0.49853515625, 1.826171875, 3.15380859375, 4.4814453125, 5.80908203125, 7.13671875, 8.46435546875, 9.7919921875, 11.11962890625, 12.447265625, 13.77490234375, 15.1025390625, 16.43017578125, 17.7578125, 19.08544921875, 20.4130859375, 21.74072265625, 23.068359375, 24.39599609375, 25.7236328125, 27.05126953125, 28.37890625, 29.70654296875, 31.0341796875, 32.36181640625, 33.689453125, 35.01708984375, 36.3447265625, 37.67236328125, 39.0]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 6.0, 8.0, 13.0, 22.0, 21.0, 34.0, 50.0, 68.0, 111.0, 145.0, 196.0, 294.0, 405.0, 590.0, 906.0, 1274.0, 2049.0, 3343.0, 6641.0, 17868.0, 86523.0, 784457.0, 106357.0, 19958.0, 7259.0, 3548.0, 2093.0, 1330.0, 912.0, 645.0, 449.0, 278.0, 217.0, 144.0, 102.0, 73.0, 49.0, 28.0, 29.0, 19.0, 13.0, 11.0, 8.0, 9.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-60.78125, -59.0712890625, -57.361328125, -55.6513671875, -53.94140625, -52.2314453125, -50.521484375, -48.8115234375, -47.1015625, -45.3916015625, -43.681640625, -41.9716796875, -40.26171875, -38.5517578125, -36.841796875, -35.1318359375, -33.421875, -31.7119140625, -30.001953125, -28.2919921875, -26.58203125, -24.8720703125, -23.162109375, -21.4521484375, -19.7421875, -18.0322265625, -16.322265625, -14.6123046875, -12.90234375, -11.1923828125, -9.482421875, -7.7724609375, -6.0625, -4.3525390625, -2.642578125, -0.9326171875, 0.77734375, 2.4873046875, 4.197265625, 5.9072265625, 7.6171875, 9.3271484375, 11.037109375, 12.7470703125, 14.45703125, 16.1669921875, 17.876953125, 19.5869140625, 21.296875, 23.0068359375, 24.716796875, 26.4267578125, 28.13671875, 29.8466796875, 31.556640625, 33.2666015625, 34.9765625, 36.6865234375, 38.396484375, 40.1064453125, 41.81640625, 43.5263671875, 45.236328125, 46.9462890625, 48.65625]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0, 4.0, 3.0, 6.0, 5.0, 8.0, 12.0, 18.0, 16.0, 18.0, 22.0, 20.0, 23.0, 30.0, 30.0, 50.0, 33.0, 48.0, 55.0, 57.0, 484.0, 1611.0, 79.0, 51.0, 47.0, 46.0, 36.0, 32.0, 33.0, 29.0, 20.0, 26.0, 18.0, 18.0, 15.0, 14.0, 9.0, 10.0, 5.0, 1.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-136.0, -131.61328125, -127.2265625, -122.83984375, -118.453125, -114.06640625, -109.6796875, -105.29296875, -100.90625, -96.51953125, -92.1328125, -87.74609375, -83.359375, -78.97265625, -74.5859375, -70.19921875, -65.8125, -61.42578125, -57.0390625, -52.65234375, -48.265625, -43.87890625, -39.4921875, -35.10546875, -30.71875, -26.33203125, -21.9453125, -17.55859375, -13.171875, -8.78515625, -4.3984375, -0.01171875, 4.375, 8.76171875, 13.1484375, 17.53515625, 21.921875, 26.30859375, 30.6953125, 35.08203125, 39.46875, 43.85546875, 48.2421875, 52.62890625, 57.015625, 61.40234375, 65.7890625, 70.17578125, 74.5625, 78.94921875, 83.3359375, 87.72265625, 92.109375, 96.49609375, 100.8828125, 105.26953125, 109.65625, 114.04296875, 118.4296875, 122.81640625, 127.203125, 131.58984375, 135.9765625, 140.36328125, 144.75]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 1.0, 7.0, 8.0, 11.0, 19.0, 35.0, 48.0, 73.0, 146.0, 265.0, 866.0, 10245.0, 3126000.0, 6764.0, 655.0, 256.0, 116.0, 64.0, 46.0, 38.0, 16.0, 9.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-472.5, -453.1640625, -433.828125, -414.4921875, -395.15625, -375.8203125, -356.484375, -337.1484375, -317.8125, -298.4765625, -279.140625, -259.8046875, -240.46875, -221.1328125, -201.796875, -182.4609375, -163.125, -143.7890625, -124.453125, -105.1171875, -85.78125, -66.4453125, -47.109375, -27.7734375, -8.4375, 10.8984375, 30.234375, 49.5703125, 68.90625, 88.2421875, 107.578125, 126.9140625, 146.25, 165.5859375, 184.921875, 204.2578125, 223.59375, 242.9296875, 262.265625, 281.6015625, 300.9375, 320.2734375, 339.609375, 358.9453125, 378.28125, 397.6171875, 416.953125, 436.2890625, 455.625, 474.9609375, 494.296875, 513.6328125, 532.96875, 552.3046875, 571.640625, 590.9765625, 610.3125, 629.6484375, 648.984375, 668.3203125, 687.65625, 706.9921875, 726.328125, 745.6640625, 765.0]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 34.0, 366.0, 541.0, 59.0, 13.0], "bins": [-1305.7406005859375, -1284.364501953125, -1262.98828125, -1241.6121826171875, -1220.236083984375, -1198.85986328125, -1177.4837646484375, -1156.107666015625, -1134.7314453125, -1113.3553466796875, -1091.9791259765625, -1070.60302734375, -1049.2269287109375, -1027.8507080078125, -1006.474609375, -985.0984497070312, -963.7223510742188, -942.34619140625, -920.9700927734375, -899.5939331054688, -878.2177734375, -856.8416748046875, -835.4655151367188, -814.08935546875, -792.7132568359375, -771.3370971679688, -749.9609985351562, -728.5848388671875, -707.2086791992188, -685.83251953125, -664.4564208984375, -643.0802612304688, -621.7041015625, -600.3279418945312, -578.9518432617188, -557.57568359375, -536.1995239257812, -514.8233642578125, -493.447265625, -472.07110595703125, -450.6949768066406, -429.31884765625, -407.94268798828125, -386.5665588378906, -365.1904296875, -343.81427001953125, -322.4381408691406, -301.06201171875, -279.68585205078125, -258.3097229003906, -236.93356323242188, -215.55743408203125, -194.18128967285156, -172.80514526367188, -151.42901611328125, -130.05287170410156, -108.67672729492188, -87.30058288574219, -65.92444610595703, -44.548309326171875, -23.172164916992188, -1.7960205078125, 19.580108642578125, 40.95625305175781, 62.332393646240234]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 4.0, 4.0, 5.0, 6.0, 11.0, 10.0, 12.0, 15.0, 18.0, 34.0, 23.0, 29.0, 33.0, 35.0, 31.0, 42.0, 53.0, 57.0, 47.0, 60.0, 50.0, 51.0, 60.0, 39.0, 34.0, 38.0, 34.0, 24.0, 34.0, 21.0, 13.0, 13.0, 13.0, 16.0, 8.0, 7.0, 6.0, 6.0, 5.0, 5.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-445.086669921875, -432.382080078125, -419.6774597167969, -406.9728698730469, -394.2682800292969, -381.5636901855469, -368.85906982421875, -356.15447998046875, -343.44989013671875, -330.74530029296875, -318.0406799316406, -305.3360900878906, -292.6315002441406, -279.9269104003906, -267.2222900390625, -254.5177001953125, -241.81309509277344, -229.10848999023438, -216.40390014648438, -203.6992950439453, -190.9947052001953, -178.29010009765625, -165.58551025390625, -152.8809051513672, -140.17630004882812, -127.4717025756836, -114.76710510253906, -102.0625, -89.35791015625, -76.65330505371094, -63.948707580566406, -51.244110107421875, -38.539520263671875, -25.834922790527344, -13.13032341003418, -0.4257240295410156, 12.278873443603516, 24.983470916748047, 37.688072204589844, 50.392669677734375, 63.097267150878906, 75.80186462402344, 88.50646209716797, 101.2110595703125, 113.91566467285156, 126.62025451660156, 139.32485961914062, 152.02944946289062, 164.7340545654297, 177.43865966796875, 190.14324951171875, 202.8478546142578, 215.5524444580078, 228.25704956054688, 240.96163940429688, 253.66624450683594, 266.370849609375, 279.075439453125, 291.7800598144531, 304.4846496582031, 317.1892395019531, 329.8938293457031, 342.59844970703125, 355.30303955078125, 368.00762939453125]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 1.0, 3.0, 3.0, 2.0, 5.0, 6.0, 8.0, 12.0, 6.0, 14.0, 8.0, 18.0, 16.0, 19.0, 18.0, 28.0, 26.0, 30.0, 33.0, 30.0, 26.0, 43.0, 38.0, 41.0, 35.0, 41.0, 36.0, 44.0, 32.0, 40.0, 34.0, 44.0, 22.0, 35.0, 22.0, 23.0, 17.0, 22.0, 20.0, 18.0, 8.0, 9.0, 16.0, 12.0, 9.0, 10.0, 4.0, 6.0, 2.0, 8.0, 0.0, 3.0, 1.0, 1.0, 1.0, 4.0], "bins": [-43.625, -42.3359375, -41.046875, -39.7578125, -38.46875, -37.1796875, -35.890625, -34.6015625, -33.3125, -32.0234375, -30.734375, -29.4453125, -28.15625, -26.8671875, -25.578125, -24.2890625, -23.0, -21.7109375, -20.421875, -19.1328125, -17.84375, -16.5546875, -15.265625, -13.9765625, -12.6875, -11.3984375, -10.109375, -8.8203125, -7.53125, -6.2421875, -4.953125, -3.6640625, -2.375, -1.0859375, 0.203125, 1.4921875, 2.78125, 4.0703125, 5.359375, 6.6484375, 7.9375, 9.2265625, 10.515625, 11.8046875, 13.09375, 14.3828125, 15.671875, 16.9609375, 18.25, 19.5390625, 20.828125, 22.1171875, 23.40625, 24.6953125, 25.984375, 27.2734375, 28.5625, 29.8515625, 31.140625, 32.4296875, 33.71875, 35.0078125, 36.296875, 37.5859375, 38.875]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0, 6.0, 6.0, 7.0, 10.0, 9.0, 16.0, 20.0, 21.0, 32.0, 39.0, 63.0, 96.0, 138.0, 188.0, 240.0, 367.0, 580.0, 857.0, 1325.0, 2219.0, 3924.0, 7611.0, 18941.0, 191787.0, 2821387.0, 1057749.0, 60621.0, 12527.0, 5527.0, 2964.0, 1770.0, 1085.0, 654.0, 434.0, 313.0, 207.0, 160.0, 114.0, 74.0, 50.0, 37.0, 30.0, 21.0, 16.0, 14.0, 11.0, 5.0, 7.0, 4.0, 4.0, 1.0, 2.0, 2.0, 2.0], "bins": [-121.5625, -118.068359375, -114.57421875, -111.080078125, -107.5859375, -104.091796875, -100.59765625, -97.103515625, -93.609375, -90.115234375, -86.62109375, -83.126953125, -79.6328125, -76.138671875, -72.64453125, -69.150390625, -65.65625, -62.162109375, -58.66796875, -55.173828125, -51.6796875, -48.185546875, -44.69140625, -41.197265625, -37.703125, -34.208984375, -30.71484375, -27.220703125, -23.7265625, -20.232421875, -16.73828125, -13.244140625, -9.75, -6.255859375, -2.76171875, 0.732421875, 4.2265625, 7.720703125, 11.21484375, 14.708984375, 18.203125, 21.697265625, 25.19140625, 28.685546875, 32.1796875, 35.673828125, 39.16796875, 42.662109375, 46.15625, 49.650390625, 53.14453125, 56.638671875, 60.1328125, 63.626953125, 67.12109375, 70.615234375, 74.109375, 77.603515625, 81.09765625, 84.591796875, 88.0859375, 91.580078125, 95.07421875, 98.568359375, 102.0625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 6.0, 0.0, 6.0, 3.0, 5.0, 8.0, 6.0, 9.0, 9.0, 15.0, 23.0, 31.0, 35.0, 44.0, 75.0, 106.0, 192.0, 416.0, 786.0, 901.0, 615.0, 303.0, 172.0, 91.0, 46.0, 37.0, 21.0, 24.0, 20.0, 17.0, 16.0, 9.0, 7.0, 5.0, 7.0, 4.0, 4.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-105.9375, -102.93359375, -99.9296875, -96.92578125, -93.921875, -90.91796875, -87.9140625, -84.91015625, -81.90625, -78.90234375, -75.8984375, -72.89453125, -69.890625, -66.88671875, -63.8828125, -60.87890625, -57.875, -54.87109375, -51.8671875, -48.86328125, -45.859375, -42.85546875, -39.8515625, -36.84765625, -33.84375, -30.83984375, -27.8359375, -24.83203125, -21.828125, -18.82421875, -15.8203125, -12.81640625, -9.8125, -6.80859375, -3.8046875, -0.80078125, 2.203125, 5.20703125, 8.2109375, 11.21484375, 14.21875, 17.22265625, 20.2265625, 23.23046875, 26.234375, 29.23828125, 32.2421875, 35.24609375, 38.25, 41.25390625, 44.2578125, 47.26171875, 50.265625, 53.26953125, 56.2734375, 59.27734375, 62.28125, 65.28515625, 68.2890625, 71.29296875, 74.296875, 77.30078125, 80.3046875, 83.30859375, 86.3125]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 6.0, 4.0, 8.0, 10.0, 7.0, 18.0, 24.0, 25.0, 28.0, 39.0, 89.0, 237.0, 700.0, 2369.0, 10624.0, 92774.0, 3975837.0, 97008.0, 10894.0, 2325.0, 719.0, 222.0, 117.0, 55.0, 42.0, 21.0, 17.0, 11.0, 11.0, 8.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-300.0, -289.09375, -278.1875, -267.28125, -256.375, -245.46875, -234.5625, -223.65625, -212.75, -201.84375, -190.9375, -180.03125, -169.125, -158.21875, -147.3125, -136.40625, -125.5, -114.59375, -103.6875, -92.78125, -81.875, -70.96875, -60.0625, -49.15625, -38.25, -27.34375, -16.4375, -5.53125, 5.375, 16.28125, 27.1875, 38.09375, 49.0, 59.90625, 70.8125, 81.71875, 92.625, 103.53125, 114.4375, 125.34375, 136.25, 147.15625, 158.0625, 168.96875, 179.875, 190.78125, 201.6875, 212.59375, 223.5, 234.40625, 245.3125, 256.21875, 267.125, 278.03125, 288.9375, 299.84375, 310.75, 321.65625, 332.5625, 343.46875, 354.375, 365.28125, 376.1875, 387.09375, 398.0]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 6.0, 7.0, 31.0, 62.0, 118.0, 308.0, 266.0, 108.0, 66.0, 19.0, 9.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1017.845458984375, -988.7223510742188, -959.5993041992188, -930.4761962890625, -901.3531494140625, -872.2300415039062, -843.10693359375, -813.98388671875, -784.8607788085938, -755.7376708984375, -726.6146240234375, -697.4915161132812, -668.368408203125, -639.245361328125, -610.1222534179688, -580.9991455078125, -551.8760986328125, -522.7529907226562, -493.62994384765625, -464.5068359375, -435.3837585449219, -406.26068115234375, -377.1375732421875, -348.0144958496094, -318.89141845703125, -289.7683410644531, -260.645263671875, -231.52215576171875, -202.39907836914062, -173.2760009765625, -144.1529083251953, -115.02981567382812, -85.90673828125, -56.783653259277344, -27.660568237304688, 1.4625167846679688, 30.585601806640625, 59.70867919921875, 88.83177185058594, 117.95486450195312, 147.07794189453125, 176.20101928710938, 205.32411193847656, 234.44720458984375, 263.5702819824219, 292.693359375, 321.81646728515625, 350.9395446777344, 380.0626220703125, 409.1856994628906, 438.30877685546875, 467.431884765625, 496.5549621582031, 525.6780395507812, 554.8011474609375, 583.9241943359375, 613.0473022460938, 642.17041015625, 671.29345703125, 700.4165649414062, 729.5396728515625, 758.6627197265625, 787.7858276367188, 816.908935546875, 846.031982421875]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 2.0, 5.0, 7.0, 6.0, 1.0, 5.0, 8.0, 8.0, 9.0, 10.0, 10.0, 14.0, 18.0, 21.0, 29.0, 20.0, 27.0, 27.0, 32.0, 26.0, 30.0, 41.0, 32.0, 43.0, 41.0, 22.0, 45.0, 37.0, 31.0, 35.0, 31.0, 42.0, 33.0, 35.0, 29.0, 32.0, 20.0, 20.0, 26.0, 19.0, 16.0, 13.0, 12.0, 7.0, 10.0, 7.0, 2.0, 4.0, 4.0, 8.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-233.20901489257812, -225.56715393066406, -217.92529296875, -210.283447265625, -202.64158630371094, -194.99972534179688, -187.3578643798828, -179.71600341796875, -172.07415771484375, -164.4322967529297, -156.79043579101562, -149.14859008789062, -141.50672912597656, -133.8648681640625, -126.22300720214844, -118.58114624023438, -110.93928527832031, -103.29742431640625, -95.65557098388672, -88.01371002197266, -80.37185668945312, -72.72999572753906, -65.088134765625, -57.4462776184082, -49.804420471191406, -42.16256332397461, -34.52070617675781, -26.87884521484375, -19.236988067626953, -11.595130920410156, -3.9532699584960938, 3.688587188720703, 11.3304443359375, 18.972301483154297, 26.614160537719727, 34.256019592285156, 41.89787673950195, 49.53973388671875, 57.18159484863281, 64.82345581054688, 72.4653091430664, 80.10717010498047, 87.7490234375, 95.39088439941406, 103.03274536132812, 110.67459869384766, 118.31645965576172, 125.95831298828125, 133.6001739501953, 141.24203491210938, 148.88389587402344, 156.5257568359375, 164.1676025390625, 171.80946350097656, 179.45132446289062, 187.0931854248047, 194.73504638671875, 202.3769073486328, 210.01876831054688, 217.66061401367188, 225.30247497558594, 232.9443359375, 240.58619689941406, 248.22805786132812, 255.86990356445312]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 5.0, 1.0, 5.0, 6.0, 4.0, 3.0, 7.0, 12.0, 10.0, 10.0, 16.0, 11.0, 21.0, 20.0, 18.0, 29.0, 31.0, 23.0, 32.0, 31.0, 40.0, 28.0, 39.0, 44.0, 32.0, 34.0, 44.0, 47.0, 33.0, 30.0, 51.0, 22.0, 27.0, 23.0, 30.0, 16.0, 25.0, 20.0, 18.0, 14.0, 24.0, 15.0, 9.0, 8.0, 9.0, 4.0, 6.0, 6.0, 2.0, 8.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0], "bins": [-41.3125, -40.0517578125, -38.791015625, -37.5302734375, -36.26953125, -35.0087890625, -33.748046875, -32.4873046875, -31.2265625, -29.9658203125, -28.705078125, -27.4443359375, -26.18359375, -24.9228515625, -23.662109375, -22.4013671875, -21.140625, -19.8798828125, -18.619140625, -17.3583984375, -16.09765625, -14.8369140625, -13.576171875, -12.3154296875, -11.0546875, -9.7939453125, -8.533203125, -7.2724609375, -6.01171875, -4.7509765625, -3.490234375, -2.2294921875, -0.96875, 0.2919921875, 1.552734375, 2.8134765625, 4.07421875, 5.3349609375, 6.595703125, 7.8564453125, 9.1171875, 10.3779296875, 11.638671875, 12.8994140625, 14.16015625, 15.4208984375, 16.681640625, 17.9423828125, 19.203125, 20.4638671875, 21.724609375, 22.9853515625, 24.24609375, 25.5068359375, 26.767578125, 28.0283203125, 29.2890625, 30.5498046875, 31.810546875, 33.0712890625, 34.33203125, 35.5927734375, 36.853515625, 38.1142578125, 39.375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 5.0, 3.0, 6.0, 2.0, 10.0, 21.0, 16.0, 23.0, 37.0, 42.0, 74.0, 122.0, 161.0, 234.0, 356.0, 558.0, 828.0, 1315.0, 1971.0, 3161.0, 4967.0, 7870.0, 12397.0, 20025.0, 32723.0, 55238.0, 100907.0, 210616.0, 275045.0, 139147.0, 72229.0, 41328.0, 24892.0, 15607.0, 9596.0, 6053.0, 3900.0, 2459.0, 1605.0, 1010.0, 667.0, 440.0, 265.0, 196.0, 143.0, 94.0, 65.0, 35.0, 43.0, 17.0, 19.0, 10.0, 6.0, 1.0, 6.0, 1.0, 3.0, 0.0, 1.0, 1.0], "bins": [-2.3359375, -2.263458251953125, -2.19097900390625, -2.118499755859375, -2.0460205078125, -1.973541259765625, -1.90106201171875, -1.828582763671875, -1.756103515625, -1.683624267578125, -1.61114501953125, -1.538665771484375, -1.4661865234375, -1.393707275390625, -1.32122802734375, -1.248748779296875, -1.17626953125, -1.103790283203125, -1.03131103515625, -0.958831787109375, -0.8863525390625, -0.813873291015625, -0.74139404296875, -0.668914794921875, -0.596435546875, -0.523956298828125, -0.45147705078125, -0.378997802734375, -0.3065185546875, -0.234039306640625, -0.16156005859375, -0.089080810546875, -0.0166015625, 0.055877685546875, 0.12835693359375, 0.200836181640625, 0.2733154296875, 0.345794677734375, 0.41827392578125, 0.490753173828125, 0.563232421875, 0.635711669921875, 0.70819091796875, 0.780670166015625, 0.8531494140625, 0.925628662109375, 0.99810791015625, 1.070587158203125, 1.14306640625, 1.215545654296875, 1.28802490234375, 1.360504150390625, 1.4329833984375, 1.505462646484375, 1.57794189453125, 1.650421142578125, 1.722900390625, 1.795379638671875, 1.86785888671875, 1.940338134765625, 2.0128173828125, 2.085296630859375, 2.15777587890625, 2.230255126953125, 2.302734375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 7.0, 6.0, 6.0, 10.0, 12.0, 12.0, 11.0, 8.0, 18.0, 23.0, 18.0, 39.0, 31.0, 27.0, 27.0, 34.0, 34.0, 43.0, 39.0, 41.0, 1078.0, 45.0, 38.0, 52.0, 35.0, 51.0, 31.0, 28.0, 32.0, 34.0, 27.0, 18.0, 18.0, 14.0, 13.0, 7.0, 11.0, 15.0, 8.0, 4.0, 7.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-26.515625, -25.627685546875, -24.73974609375, -23.851806640625, -22.9638671875, -22.075927734375, -21.18798828125, -20.300048828125, -19.412109375, -18.524169921875, -17.63623046875, -16.748291015625, -15.8603515625, -14.972412109375, -14.08447265625, -13.196533203125, -12.30859375, -11.420654296875, -10.53271484375, -9.644775390625, -8.7568359375, -7.868896484375, -6.98095703125, -6.093017578125, -5.205078125, -4.317138671875, -3.42919921875, -2.541259765625, -1.6533203125, -0.765380859375, 0.12255859375, 1.010498046875, 1.8984375, 2.786376953125, 3.67431640625, 4.562255859375, 5.4501953125, 6.338134765625, 7.22607421875, 8.114013671875, 9.001953125, 9.889892578125, 10.77783203125, 11.665771484375, 12.5537109375, 13.441650390625, 14.32958984375, 15.217529296875, 16.10546875, 16.993408203125, 17.88134765625, 18.769287109375, 19.6572265625, 20.545166015625, 21.43310546875, 22.321044921875, 23.208984375, 24.096923828125, 24.98486328125, 25.872802734375, 26.7607421875, 27.648681640625, 28.53662109375, 29.424560546875, 30.3125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 5.0, 6.0, 10.0, 21.0, 26.0, 30.0, 48.0, 62.0, 100.0, 149.0, 197.0, 325.0, 495.0, 728.0, 1151.0, 1756.0, 2771.0, 4339.0, 7182.0, 11650.0, 19891.0, 35203.0, 65807.0, 139829.0, 1357629.0, 235113.0, 96070.0, 48960.0, 26982.0, 15531.0, 9373.0, 5752.0, 3509.0, 2255.0, 1427.0, 936.0, 624.0, 401.0, 246.0, 177.0, 114.0, 84.0, 56.0, 38.0, 24.0, 18.0, 15.0, 8.0, 4.0, 6.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.3203125, -2.244903564453125, -2.16949462890625, -2.094085693359375, -2.0186767578125, -1.943267822265625, -1.86785888671875, -1.792449951171875, -1.717041015625, -1.641632080078125, -1.56622314453125, -1.490814208984375, -1.4154052734375, -1.339996337890625, -1.26458740234375, -1.189178466796875, -1.11376953125, -1.038360595703125, -0.96295166015625, -0.887542724609375, -0.8121337890625, -0.736724853515625, -0.66131591796875, -0.585906982421875, -0.510498046875, -0.435089111328125, -0.35968017578125, -0.284271240234375, -0.2088623046875, -0.133453369140625, -0.05804443359375, 0.017364501953125, 0.0927734375, 0.168182373046875, 0.24359130859375, 0.319000244140625, 0.3944091796875, 0.469818115234375, 0.54522705078125, 0.620635986328125, 0.696044921875, 0.771453857421875, 0.84686279296875, 0.922271728515625, 0.9976806640625, 1.073089599609375, 1.14849853515625, 1.223907470703125, 1.29931640625, 1.374725341796875, 1.45013427734375, 1.525543212890625, 1.6009521484375, 1.676361083984375, 1.75177001953125, 1.827178955078125, 1.902587890625, 1.977996826171875, 2.05340576171875, 2.128814697265625, 2.2042236328125, 2.279632568359375, 2.35504150390625, 2.430450439453125, 2.505859375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 8.0, 2.0, 6.0, 8.0, 6.0, 11.0, 13.0, 11.0, 10.0, 15.0, 17.0, 33.0, 29.0, 45.0, 40.0, 48.0, 50.0, 77.0, 76.0, 80.0, 56.0, 45.0, 49.0, 46.0, 32.0, 24.0, 32.0, 27.0, 27.0, 13.0, 15.0, 8.0, 10.0, 6.0, 2.0, 4.0, 5.0, 5.0, 1.0, 2.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0279083251953125, -0.02710866928100586, -0.02630901336669922, -0.025509357452392578, -0.024709701538085938, -0.023910045623779297, -0.023110389709472656, -0.022310733795166016, -0.021511077880859375, -0.020711421966552734, -0.019911766052246094, -0.019112110137939453, -0.018312454223632812, -0.017512798309326172, -0.01671314239501953, -0.01591348648071289, -0.01511383056640625, -0.01431417465209961, -0.013514518737792969, -0.012714862823486328, -0.011915206909179688, -0.011115550994873047, -0.010315895080566406, -0.009516239166259766, -0.008716583251953125, -0.007916927337646484, -0.007117271423339844, -0.006317615509033203, -0.0055179595947265625, -0.004718303680419922, -0.003918647766113281, -0.0031189918518066406, -0.0023193359375, -0.0015196800231933594, -0.0007200241088867188, 7.963180541992188e-05, 0.0008792877197265625, 0.0016789436340332031, 0.0024785995483398438, 0.0032782554626464844, 0.004077911376953125, 0.004877567291259766, 0.005677223205566406, 0.006476879119873047, 0.0072765350341796875, 0.008076190948486328, 0.008875846862792969, 0.00967550277709961, 0.01047515869140625, 0.01127481460571289, 0.012074470520019531, 0.012874126434326172, 0.013673782348632812, 0.014473438262939453, 0.015273094177246094, 0.016072750091552734, 0.016872406005859375, 0.017672061920166016, 0.018471717834472656, 0.019271373748779297, 0.020071029663085938, 0.020870685577392578, 0.02167034149169922, 0.02246999740600586, 0.0232696533203125]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 8.0, 6.0, 5.0, 6.0, 7.0, 17.0, 15.0, 16.0, 20.0, 29.0, 37.0, 49.0, 75.0, 79.0, 113.0, 185.0, 559.0, 4801.0, 144880.0, 882788.0, 12915.0, 1139.0, 280.0, 129.0, 93.0, 65.0, 46.0, 47.0, 22.0, 24.0, 13.0, 19.0, 19.0, 9.0, 9.0, 5.0, 2.0, 10.0, 3.0, 2.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.4189453125, -0.4045906066894531, -0.39023590087890625, -0.3758811950683594, -0.3615264892578125, -0.3471717834472656, -0.33281707763671875, -0.3184623718261719, -0.304107666015625, -0.2897529602050781, -0.27539825439453125, -0.2610435485839844, -0.2466888427734375, -0.23233413696289062, -0.21797943115234375, -0.20362472534179688, -0.18927001953125, -0.17491531372070312, -0.16056060791015625, -0.14620590209960938, -0.1318511962890625, -0.11749649047851562, -0.10314178466796875, -0.08878707885742188, -0.074432373046875, -0.060077667236328125, -0.04572296142578125, -0.031368255615234375, -0.0170135498046875, -0.002658843994140625, 0.01169586181640625, 0.026050567626953125, 0.0404052734375, 0.054759979248046875, 0.06911468505859375, 0.08346939086914062, 0.0978240966796875, 0.11217880249023438, 0.12653350830078125, 0.14088821411132812, 0.155242919921875, 0.16959762573242188, 0.18395233154296875, 0.19830703735351562, 0.2126617431640625, 0.22701644897460938, 0.24137115478515625, 0.2557258605957031, 0.27008056640625, 0.2844352722167969, 0.29878997802734375, 0.3131446838378906, 0.3274993896484375, 0.3418540954589844, 0.35620880126953125, 0.3705635070800781, 0.384918212890625, 0.3992729187011719, 0.41362762451171875, 0.4279823303222656, 0.4423370361328125, 0.4566917419433594, 0.47104644775390625, 0.4854011535644531, 0.499755859375]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 24.0, 188.0, 725.0, 73.0, 5.0, 2.0, 1.0], "bins": [-0.27860090136528015, -0.27390599250793457, -0.269211083650589, -0.2645161747932434, -0.2598212659358978, -0.25512635707855225, -0.25043144822120667, -0.24573653936386108, -0.2410416305065155, -0.23634672164916992, -0.23165181279182434, -0.22695690393447876, -0.22226199507713318, -0.2175670862197876, -0.21287217736244202, -0.20817726850509644, -0.20348235964775085, -0.19878745079040527, -0.1940925419330597, -0.1893976330757141, -0.18470272421836853, -0.18000781536102295, -0.17531290650367737, -0.1706179976463318, -0.165923073887825, -0.16122816503047943, -0.15653325617313385, -0.15183834731578827, -0.1471434384584427, -0.1424485296010971, -0.13775362074375153, -0.13305871188640594, -0.12836380302906036, -0.12366889417171478, -0.1189739853143692, -0.11427907645702362, -0.10958416759967804, -0.10488925874233246, -0.10019434988498688, -0.0954994410276413, -0.09080452471971512, -0.08610961586236954, -0.08141470700502396, -0.07671979814767838, -0.0720248892903328, -0.06732998043298721, -0.06263506412506104, -0.05794015899300575, -0.05324524641036987, -0.04855033755302429, -0.04385542869567871, -0.03916051983833313, -0.03446561098098755, -0.02977070026099682, -0.025075789541006088, -0.020380880683660507, -0.015685973688960075, -0.010991064831614494, -0.006296155042946339, -0.001601245254278183, 0.003093663603067398, 0.007788572460412979, 0.01248348318040371, 0.01717839203774929, 0.02187330089509487]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 2.0, 2.0, 1.0, 4.0, 10.0, 10.0, 5.0, 12.0, 8.0, 21.0, 15.0, 17.0, 20.0, 18.0, 18.0, 31.0, 23.0, 33.0, 40.0, 34.0, 36.0, 38.0, 48.0, 41.0, 54.0, 40.0, 42.0, 42.0, 38.0, 38.0, 28.0, 28.0, 25.0, 22.0, 23.0, 27.0, 16.0, 16.0, 13.0, 18.0, 11.0, 10.0, 6.0, 4.0, 5.0, 5.0, 7.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.013894498348236084, -0.013451100327074528, -0.013007702305912971, -0.012564304284751415, -0.012120906263589859, -0.011677508242428303, -0.011234110221266747, -0.01079071220010519, -0.010347314178943634, -0.009903916157782078, -0.009460518136620522, -0.009017120115458965, -0.008573722094297409, -0.008130324073135853, -0.007686926051974297, -0.00724352803081274, -0.006800130009651184, -0.006356731988489628, -0.005913333967328072, -0.005469935946166515, -0.005026537925004959, -0.004583139903843403, -0.004139741882681847, -0.0036963438615202904, -0.003252945840358734, -0.002809547819197178, -0.0023661497980356216, -0.0019227517768740654, -0.0014793537557125092, -0.001035955734550953, -0.0005925577133893967, -0.00014915969222784042, 0.0002942383289337158, 0.0007376363500952721, 0.0011810343712568283, 0.0016244323924183846, 0.002067830413579941, 0.002511228434741497, 0.0029546264559030533, 0.0033980244770646095, 0.0038414224982261658, 0.004284820519387722, 0.004728218540549278, 0.0051716165617108345, 0.005615014582872391, 0.006058412604033947, 0.006501810625195503, 0.0069452086463570595, 0.007388606667518616, 0.007832004688680172, 0.008275402709841728, 0.008718800731003284, 0.00916219875216484, 0.009605596773326397, 0.010048994794487953, 0.01049239281564951, 0.010935790836811066, 0.011379188857972622, 0.011822586879134178, 0.012265984900295734, 0.01270938292145729, 0.013152780942618847, 0.013596178963780403, 0.01403957698494196, 0.014482975006103516]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 5.0, 1.0, 5.0, 6.0, 4.0, 3.0, 8.0, 11.0, 10.0, 10.0, 16.0, 11.0, 21.0, 20.0, 18.0, 29.0, 31.0, 23.0, 32.0, 31.0, 40.0, 28.0, 39.0, 44.0, 32.0, 34.0, 44.0, 47.0, 33.0, 30.0, 51.0, 22.0, 27.0, 23.0, 30.0, 16.0, 25.0, 20.0, 18.0, 14.0, 24.0, 15.0, 9.0, 8.0, 9.0, 4.0, 6.0, 6.0, 2.0, 8.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0], "bins": [-41.3125, -40.0517578125, -38.791015625, -37.5302734375, -36.26953125, -35.0087890625, -33.748046875, -32.4873046875, -31.2265625, -29.9658203125, -28.705078125, -27.4443359375, -26.18359375, -24.9228515625, -23.662109375, -22.4013671875, -21.140625, -19.8798828125, -18.619140625, -17.3583984375, -16.09765625, -14.8369140625, -13.576171875, -12.3154296875, -11.0546875, -9.7939453125, -8.533203125, -7.2724609375, -6.01171875, -4.7509765625, -3.490234375, -2.2294921875, -0.96875, 0.2919921875, 1.552734375, 2.8134765625, 4.07421875, 5.3349609375, 6.595703125, 7.8564453125, 9.1171875, 10.3779296875, 11.638671875, 12.8994140625, 14.16015625, 15.4208984375, 16.681640625, 17.9423828125, 19.203125, 20.4638671875, 21.724609375, 22.9853515625, 24.24609375, 25.5068359375, 26.767578125, 28.0283203125, 29.2890625, 30.5498046875, 31.810546875, 33.0712890625, 34.33203125, 35.5927734375, 36.853515625, 38.1142578125, 39.375]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 7.0, 3.0, 12.0, 12.0, 11.0, 12.0, 23.0, 34.0, 41.0, 51.0, 92.0, 125.0, 169.0, 283.0, 467.0, 701.0, 1140.0, 1850.0, 3107.0, 5312.0, 9104.0, 17000.0, 32615.0, 67058.0, 151648.0, 346962.0, 223132.0, 93565.0, 43550.0, 21954.0, 11983.0, 6458.0, 3872.0, 2219.0, 1418.0, 901.0, 552.0, 363.0, 220.0, 154.0, 110.0, 82.0, 46.0, 41.0, 27.0, 13.0, 17.0, 15.0, 6.0, 8.0, 5.0, 4.0, 2.0, 6.0, 1.0, 1.0, 1.0], "bins": [-15.984375, -15.4957275390625, -15.007080078125, -14.5184326171875, -14.02978515625, -13.5411376953125, -13.052490234375, -12.5638427734375, -12.0751953125, -11.5865478515625, -11.097900390625, -10.6092529296875, -10.12060546875, -9.6319580078125, -9.143310546875, -8.6546630859375, -8.166015625, -7.6773681640625, -7.188720703125, -6.7000732421875, -6.21142578125, -5.7227783203125, -5.234130859375, -4.7454833984375, -4.2568359375, -3.7681884765625, -3.279541015625, -2.7908935546875, -2.30224609375, -1.8135986328125, -1.324951171875, -0.8363037109375, -0.34765625, 0.1409912109375, 0.629638671875, 1.1182861328125, 1.60693359375, 2.0955810546875, 2.584228515625, 3.0728759765625, 3.5615234375, 4.0501708984375, 4.538818359375, 5.0274658203125, 5.51611328125, 6.0047607421875, 6.493408203125, 6.9820556640625, 7.470703125, 7.9593505859375, 8.447998046875, 8.9366455078125, 9.42529296875, 9.9139404296875, 10.402587890625, 10.8912353515625, 11.3798828125, 11.8685302734375, 12.357177734375, 12.8458251953125, 13.33447265625, 13.8231201171875, 14.311767578125, 14.8004150390625, 15.2890625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 5.0, 1.0, 4.0, 7.0, 5.0, 8.0, 17.0, 11.0, 19.0, 28.0, 29.0, 26.0, 25.0, 25.0, 41.0, 32.0, 42.0, 53.0, 43.0, 57.0, 288.0, 1762.0, 94.0, 57.0, 50.0, 32.0, 43.0, 35.0, 37.0, 30.0, 28.0, 14.0, 19.0, 11.0, 15.0, 14.0, 12.0, 11.0, 6.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-113.9375, -109.7509765625, -105.564453125, -101.3779296875, -97.19140625, -93.0048828125, -88.818359375, -84.6318359375, -80.4453125, -76.2587890625, -72.072265625, -67.8857421875, -63.69921875, -59.5126953125, -55.326171875, -51.1396484375, -46.953125, -42.7666015625, -38.580078125, -34.3935546875, -30.20703125, -26.0205078125, -21.833984375, -17.6474609375, -13.4609375, -9.2744140625, -5.087890625, -0.9013671875, 3.28515625, 7.4716796875, 11.658203125, 15.8447265625, 20.03125, 24.2177734375, 28.404296875, 32.5908203125, 36.77734375, 40.9638671875, 45.150390625, 49.3369140625, 53.5234375, 57.7099609375, 61.896484375, 66.0830078125, 70.26953125, 74.4560546875, 78.642578125, 82.8291015625, 87.015625, 91.2021484375, 95.388671875, 99.5751953125, 103.76171875, 107.9482421875, 112.134765625, 116.3212890625, 120.5078125, 124.6943359375, 128.880859375, 133.0673828125, 137.25390625, 141.4404296875, 145.626953125, 149.8134765625, 154.0]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 4.0, 2.0, 4.0, 5.0, 5.0, 5.0, 9.0, 7.0, 15.0, 17.0, 25.0, 24.0, 23.0, 34.0, 43.0, 72.0, 91.0, 93.0, 163.0, 190.0, 307.0, 773.0, 5214.0, 3113589.0, 22488.0, 1154.0, 372.0, 221.0, 185.0, 120.0, 129.0, 67.0, 56.0, 36.0, 38.0, 45.0, 24.0, 13.0, 17.0, 5.0, 8.0, 6.0, 2.0, 5.0, 2.0, 3.0, 4.0, 2.0, 1.0, 1.0], "bins": [-363.75, -354.08984375, -344.4296875, -334.76953125, -325.109375, -315.44921875, -305.7890625, -296.12890625, -286.46875, -276.80859375, -267.1484375, -257.48828125, -247.828125, -238.16796875, -228.5078125, -218.84765625, -209.1875, -199.52734375, -189.8671875, -180.20703125, -170.546875, -160.88671875, -151.2265625, -141.56640625, -131.90625, -122.24609375, -112.5859375, -102.92578125, -93.265625, -83.60546875, -73.9453125, -64.28515625, -54.625, -44.96484375, -35.3046875, -25.64453125, -15.984375, -6.32421875, 3.3359375, 12.99609375, 22.65625, 32.31640625, 41.9765625, 51.63671875, 61.296875, 70.95703125, 80.6171875, 90.27734375, 99.9375, 109.59765625, 119.2578125, 128.91796875, 138.578125, 148.23828125, 157.8984375, 167.55859375, 177.21875, 186.87890625, 196.5390625, 206.19921875, 215.859375, 225.51953125, 235.1796875, 244.83984375, 254.5]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 126.0, 748.0, 137.0, 3.0, 1.0, 2.0], "bins": [-1179.3955078125, -1159.5711669921875, -1139.746826171875, -1119.9224853515625, -1100.0980224609375, -1080.273681640625, -1060.4493408203125, -1040.625, -1020.8006591796875, -1000.976318359375, -981.1519165039062, -961.3275756835938, -941.5032348632812, -921.6788940429688, -901.8544921875, -882.0301513671875, -862.205810546875, -842.3814697265625, -822.5570678710938, -802.7327270507812, -782.9083862304688, -763.0840454101562, -743.2596435546875, -723.435302734375, -703.6109008789062, -683.7865600585938, -663.962158203125, -644.1378173828125, -624.3134765625, -604.4891357421875, -584.6647338867188, -564.8403930664062, -545.0159912109375, -525.191650390625, -505.3672790527344, -485.54290771484375, -465.71856689453125, -445.8941955566406, -426.06982421875, -406.2454833984375, -386.421142578125, -366.5967712402344, -346.7724304199219, -326.94805908203125, -307.12371826171875, -287.2993469238281, -267.4749755859375, -247.650634765625, -227.8262939453125, -208.00193786621094, -188.17758178710938, -168.35321044921875, -148.52886962890625, -128.70449829101562, -108.88014221191406, -89.0557861328125, -69.23143005371094, -49.407073974609375, -29.582714080810547, -9.758354187011719, 10.066001892089844, 29.890357971191406, 49.7147216796875, 69.53907775878906, 89.36343383789062]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 6.0, 1.0, 5.0, 6.0, 10.0, 9.0, 20.0, 18.0, 19.0, 18.0, 29.0, 21.0, 34.0, 26.0, 31.0, 41.0, 53.0, 59.0, 49.0, 48.0, 57.0, 42.0, 42.0, 38.0, 35.0, 37.0, 37.0, 34.0, 31.0, 23.0, 23.0, 16.0, 18.0, 14.0, 11.0, 17.0, 8.0, 3.0, 4.0, 6.0, 0.0, 4.0, 0.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-346.5399169921875, -335.1388854980469, -323.7378234863281, -312.3367919921875, -300.9357604980469, -289.53472900390625, -278.1336669921875, -266.7326354980469, -255.3315887451172, -243.9305419921875, -232.52951049804688, -221.1284637451172, -209.7274169921875, -198.32638549804688, -186.9253387451172, -175.5242919921875, -164.12326049804688, -152.7222137451172, -141.32118225097656, -129.92013549804688, -118.51909637451172, -107.11805725097656, -95.71701049804688, -84.31597137451172, -72.91493225097656, -61.513893127441406, -50.112850189208984, -38.71180725097656, -27.310768127441406, -15.90972900390625, -4.5086822509765625, 6.892356872558594, 18.29339599609375, 29.69443702697754, 41.09547805786133, 52.49652099609375, 63.897560119628906, 75.29859924316406, 86.69964599609375, 98.1006851196289, 109.50172424316406, 120.90276336669922, 132.30380249023438, 143.70484924316406, 155.10589599609375, 166.50692749023438, 177.90797424316406, 189.30902099609375, 200.71005249023438, 212.11109924316406, 223.5121307373047, 234.91317749023438, 246.314208984375, 257.71527099609375, 269.1163024902344, 280.517333984375, 291.91839599609375, 303.3194274902344, 314.7204895019531, 326.12152099609375, 337.5225524902344, 348.923583984375, 360.32464599609375, 371.7256774902344, 383.126708984375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 5.0, 5.0, 4.0, 5.0, 9.0, 5.0, 15.0, 7.0, 13.0, 16.0, 13.0, 19.0, 18.0, 24.0, 32.0, 23.0, 23.0, 37.0, 30.0, 41.0, 33.0, 34.0, 33.0, 45.0, 41.0, 39.0, 31.0, 30.0, 46.0, 43.0, 24.0, 23.0, 20.0, 27.0, 22.0, 14.0, 23.0, 18.0, 14.0, 22.0, 17.0, 14.0, 4.0, 8.0, 5.0, 8.0, 3.0, 3.0, 5.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0], "bins": [-39.15625, -37.9091796875, -36.662109375, -35.4150390625, -34.16796875, -32.9208984375, -31.673828125, -30.4267578125, -29.1796875, -27.9326171875, -26.685546875, -25.4384765625, -24.19140625, -22.9443359375, -21.697265625, -20.4501953125, -19.203125, -17.9560546875, -16.708984375, -15.4619140625, -14.21484375, -12.9677734375, -11.720703125, -10.4736328125, -9.2265625, -7.9794921875, -6.732421875, -5.4853515625, -4.23828125, -2.9912109375, -1.744140625, -0.4970703125, 0.75, 1.9970703125, 3.244140625, 4.4912109375, 5.73828125, 6.9853515625, 8.232421875, 9.4794921875, 10.7265625, 11.9736328125, 13.220703125, 14.4677734375, 15.71484375, 16.9619140625, 18.208984375, 19.4560546875, 20.703125, 21.9501953125, 23.197265625, 24.4443359375, 25.69140625, 26.9384765625, 28.185546875, 29.4326171875, 30.6796875, 31.9267578125, 33.173828125, 34.4208984375, 35.66796875, 36.9150390625, 38.162109375, 39.4091796875, 40.65625]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [5.0, 2.0, 2.0, 1.0, 8.0, 3.0, 2.0, 6.0, 6.0, 12.0, 16.0, 9.0, 19.0, 30.0, 24.0, 40.0, 82.0, 83.0, 118.0, 141.0, 186.0, 249.0, 382.0, 502.0, 765.0, 1135.0, 1757.0, 2890.0, 5208.0, 11255.0, 76758.0, 1695900.0, 2265230.0, 104036.0, 13062.0, 5722.0, 3035.0, 1836.0, 1145.0, 736.0, 505.0, 343.0, 260.0, 210.0, 134.0, 110.0, 88.0, 48.0, 49.0, 39.0, 25.0, 20.0, 19.0, 12.0, 12.0, 4.0, 6.0, 8.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0], "bins": [-116.8125, -113.16796875, -109.5234375, -105.87890625, -102.234375, -98.58984375, -94.9453125, -91.30078125, -87.65625, -84.01171875, -80.3671875, -76.72265625, -73.078125, -69.43359375, -65.7890625, -62.14453125, -58.5, -54.85546875, -51.2109375, -47.56640625, -43.921875, -40.27734375, -36.6328125, -32.98828125, -29.34375, -25.69921875, -22.0546875, -18.41015625, -14.765625, -11.12109375, -7.4765625, -3.83203125, -0.1875, 3.45703125, 7.1015625, 10.74609375, 14.390625, 18.03515625, 21.6796875, 25.32421875, 28.96875, 32.61328125, 36.2578125, 39.90234375, 43.546875, 47.19140625, 50.8359375, 54.48046875, 58.125, 61.76953125, 65.4140625, 69.05859375, 72.703125, 76.34765625, 79.9921875, 83.63671875, 87.28125, 90.92578125, 94.5703125, 98.21484375, 101.859375, 105.50390625, 109.1484375, 112.79296875, 116.4375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 4.0, 5.0, 8.0, 14.0, 10.0, 12.0, 11.0, 20.0, 22.0, 34.0, 70.0, 99.0, 184.0, 386.0, 704.0, 966.0, 702.0, 317.0, 183.0, 96.0, 61.0, 32.0, 27.0, 15.0, 17.0, 22.0, 13.0, 6.0, 6.0, 3.0, 7.0, 2.0, 4.0, 3.0, 6.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-106.3125, -103.474609375, -100.63671875, -97.798828125, -94.9609375, -92.123046875, -89.28515625, -86.447265625, -83.609375, -80.771484375, -77.93359375, -75.095703125, -72.2578125, -69.419921875, -66.58203125, -63.744140625, -60.90625, -58.068359375, -55.23046875, -52.392578125, -49.5546875, -46.716796875, -43.87890625, -41.041015625, -38.203125, -35.365234375, -32.52734375, -29.689453125, -26.8515625, -24.013671875, -21.17578125, -18.337890625, -15.5, -12.662109375, -9.82421875, -6.986328125, -4.1484375, -1.310546875, 1.52734375, 4.365234375, 7.203125, 10.041015625, 12.87890625, 15.716796875, 18.5546875, 21.392578125, 24.23046875, 27.068359375, 29.90625, 32.744140625, 35.58203125, 38.419921875, 41.2578125, 44.095703125, 46.93359375, 49.771484375, 52.609375, 55.447265625, 58.28515625, 61.123046875, 63.9609375, 66.798828125, 69.63671875, 72.474609375, 75.3125]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 7.0, 6.0, 1.0, 4.0, 5.0, 3.0, 11.0, 15.0, 20.0, 23.0, 18.0, 46.0, 78.0, 160.0, 380.0, 1161.0, 4721.0, 28290.0, 3214492.0, 918071.0, 21285.0, 3883.0, 948.0, 318.0, 128.0, 75.0, 38.0, 21.0, 13.0, 11.0, 17.0, 14.0, 7.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-286.75, -276.1015625, -265.453125, -254.8046875, -244.15625, -233.5078125, -222.859375, -212.2109375, -201.5625, -190.9140625, -180.265625, -169.6171875, -158.96875, -148.3203125, -137.671875, -127.0234375, -116.375, -105.7265625, -95.078125, -84.4296875, -73.78125, -63.1328125, -52.484375, -41.8359375, -31.1875, -20.5390625, -9.890625, 0.7578125, 11.40625, 22.0546875, 32.703125, 43.3515625, 54.0, 64.6484375, 75.296875, 85.9453125, 96.59375, 107.2421875, 117.890625, 128.5390625, 139.1875, 149.8359375, 160.484375, 171.1328125, 181.78125, 192.4296875, 203.078125, 213.7265625, 224.375, 235.0234375, 245.671875, 256.3203125, 266.96875, 277.6171875, 288.265625, 298.9140625, 309.5625, 320.2109375, 330.859375, 341.5078125, 352.15625, 362.8046875, 373.453125, 384.1015625, 394.75]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 6.0, 14.0, 21.0, 35.0, 72.0, 181.0, 269.0, 208.0, 102.0, 39.0, 28.0, 25.0, 9.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-794.7993774414062, -774.7706298828125, -754.7418823242188, -734.713134765625, -714.6843872070312, -694.6556396484375, -674.6268920898438, -654.59814453125, -634.5693969726562, -614.5406494140625, -594.5119018554688, -574.483154296875, -554.4544067382812, -534.4256591796875, -514.3969116210938, -494.3681640625, -474.33941650390625, -454.3106689453125, -434.28192138671875, -414.253173828125, -394.22442626953125, -374.1956787109375, -354.16693115234375, -334.13818359375, -314.10943603515625, -294.0806884765625, -274.05194091796875, -254.023193359375, -233.99444580078125, -213.9656982421875, -193.93695068359375, -173.908203125, -153.87945556640625, -133.8507080078125, -113.82196044921875, -93.793212890625, -73.76446533203125, -53.7357177734375, -33.70697021484375, -13.67822265625, 6.35052490234375, 26.3792724609375, 46.40802001953125, 66.436767578125, 86.46551513671875, 106.4942626953125, 126.52301025390625, 146.5517578125, 166.58050537109375, 186.6092529296875, 206.63800048828125, 226.666748046875, 246.69549560546875, 266.7242431640625, 286.75299072265625, 306.78173828125, 326.81048583984375, 346.8392333984375, 366.86798095703125, 386.896728515625, 406.92547607421875, 426.9542236328125, 446.98297119140625, 467.01171875, 487.04046630859375]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 1.0, 5.0, 11.0, 9.0, 6.0, 5.0, 13.0, 13.0, 16.0, 13.0, 16.0, 18.0, 21.0, 33.0, 30.0, 36.0, 27.0, 33.0, 42.0, 41.0, 37.0, 45.0, 46.0, 40.0, 47.0, 36.0, 44.0, 28.0, 37.0, 29.0, 21.0, 34.0, 21.0, 26.0, 20.0, 16.0, 7.0, 12.0, 17.0, 6.0, 8.0, 9.0, 7.0, 7.0, 8.0, 2.0, 3.0, 1.0, 2.0, 5.0, 2.0], "bins": [-243.2111053466797, -236.49725341796875, -229.7834014892578, -223.06954956054688, -216.35569763183594, -209.641845703125, -202.92799377441406, -196.21414184570312, -189.5002899169922, -182.78643798828125, -176.0725860595703, -169.35873413085938, -162.64488220214844, -155.9310302734375, -149.21717834472656, -142.50332641601562, -135.7894744873047, -129.07562255859375, -122.36177062988281, -115.64791870117188, -108.93406677246094, -102.22021484375, -95.50636291503906, -88.79251098632812, -82.07865905761719, -75.36480712890625, -68.65095520019531, -61.937103271484375, -55.22325134277344, -48.5093994140625, -41.79554748535156, -35.081695556640625, -28.36785888671875, -21.654006958007812, -14.940155029296875, -8.226303100585938, -1.512451171875, 5.2014007568359375, 11.915252685546875, 18.629104614257812, 25.34295654296875, 32.05680847167969, 38.770660400390625, 45.48451232910156, 52.1983642578125, 58.91221618652344, 65.62606811523438, 72.33992004394531, 79.05377197265625, 85.76762390136719, 92.48147583007812, 99.19532775878906, 105.9091796875, 112.62303161621094, 119.33688354492188, 126.05073547363281, 132.76458740234375, 139.4784393310547, 146.19229125976562, 152.90614318847656, 159.6199951171875, 166.33384704589844, 173.04769897460938, 179.7615509033203, 186.47540283203125]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 7.0, 2.0, 4.0, 2.0, 4.0, 5.0, 3.0, 4.0, 9.0, 7.0, 9.0, 12.0, 13.0, 15.0, 20.0, 22.0, 18.0, 33.0, 30.0, 32.0, 27.0, 38.0, 26.0, 41.0, 40.0, 44.0, 42.0, 47.0, 39.0, 35.0, 38.0, 29.0, 33.0, 21.0, 27.0, 34.0, 31.0, 22.0, 18.0, 22.0, 17.0, 17.0, 13.0, 5.0, 13.0, 8.0, 8.0, 12.0, 3.0, 1.0, 8.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-42.8125, -41.48291015625, -40.1533203125, -38.82373046875, -37.494140625, -36.16455078125, -34.8349609375, -33.50537109375, -32.17578125, -30.84619140625, -29.5166015625, -28.18701171875, -26.857421875, -25.52783203125, -24.1982421875, -22.86865234375, -21.5390625, -20.20947265625, -18.8798828125, -17.55029296875, -16.220703125, -14.89111328125, -13.5615234375, -12.23193359375, -10.90234375, -9.57275390625, -8.2431640625, -6.91357421875, -5.583984375, -4.25439453125, -2.9248046875, -1.59521484375, -0.265625, 1.06396484375, 2.3935546875, 3.72314453125, 5.052734375, 6.38232421875, 7.7119140625, 9.04150390625, 10.37109375, 11.70068359375, 13.0302734375, 14.35986328125, 15.689453125, 17.01904296875, 18.3486328125, 19.67822265625, 21.0078125, 22.33740234375, 23.6669921875, 24.99658203125, 26.326171875, 27.65576171875, 28.9853515625, 30.31494140625, 31.64453125, 32.97412109375, 34.3037109375, 35.63330078125, 36.962890625, 38.29248046875, 39.6220703125, 40.95166015625, 42.28125]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 6.0, 4.0, 6.0, 7.0, 19.0, 26.0, 55.0, 68.0, 126.0, 183.0, 230.0, 347.0, 432.0, 719.0, 1036.0, 1388.0, 2077.0, 3016.0, 4437.0, 6469.0, 9719.0, 14537.0, 22377.0, 34614.0, 56526.0, 100057.0, 206923.0, 262389.0, 130142.0, 69405.0, 42132.0, 26830.0, 17175.0, 11251.0, 7532.0, 5037.0, 3454.0, 2405.0, 1678.0, 1071.0, 857.0, 555.0, 403.0, 278.0, 176.0, 115.0, 76.0, 66.0, 45.0, 36.0, 18.0, 12.0, 10.0, 4.0, 7.0, 1.0, 2.0], "bins": [-2.296875, -2.229705810546875, -2.16253662109375, -2.095367431640625, -2.0281982421875, -1.961029052734375, -1.89385986328125, -1.826690673828125, -1.759521484375, -1.692352294921875, -1.62518310546875, -1.558013916015625, -1.4908447265625, -1.423675537109375, -1.35650634765625, -1.289337158203125, -1.22216796875, -1.154998779296875, -1.08782958984375, -1.020660400390625, -0.9534912109375, -0.886322021484375, -0.81915283203125, -0.751983642578125, -0.684814453125, -0.617645263671875, -0.55047607421875, -0.483306884765625, -0.4161376953125, -0.348968505859375, -0.28179931640625, -0.214630126953125, -0.1474609375, -0.080291748046875, -0.01312255859375, 0.054046630859375, 0.1212158203125, 0.188385009765625, 0.25555419921875, 0.322723388671875, 0.389892578125, 0.457061767578125, 0.52423095703125, 0.591400146484375, 0.6585693359375, 0.725738525390625, 0.79290771484375, 0.860076904296875, 0.92724609375, 0.994415283203125, 1.06158447265625, 1.128753662109375, 1.1959228515625, 1.263092041015625, 1.33026123046875, 1.397430419921875, 1.464599609375, 1.531768798828125, 1.59893798828125, 1.666107177734375, 1.7332763671875, 1.800445556640625, 1.86761474609375, 1.934783935546875, 2.001953125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 2.0, 7.0, 6.0, 4.0, 9.0, 10.0, 16.0, 17.0, 22.0, 24.0, 34.0, 30.0, 32.0, 32.0, 44.0, 36.0, 23.0, 39.0, 49.0, 40.0, 1066.0, 50.0, 40.0, 37.0, 27.0, 42.0, 28.0, 39.0, 29.0, 23.0, 33.0, 25.0, 20.0, 18.0, 15.0, 10.0, 11.0, 12.0, 6.0, 6.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-32.71875, -31.747314453125, -30.77587890625, -29.804443359375, -28.8330078125, -27.861572265625, -26.89013671875, -25.918701171875, -24.947265625, -23.975830078125, -23.00439453125, -22.032958984375, -21.0615234375, -20.090087890625, -19.11865234375, -18.147216796875, -17.17578125, -16.204345703125, -15.23291015625, -14.261474609375, -13.2900390625, -12.318603515625, -11.34716796875, -10.375732421875, -9.404296875, -8.432861328125, -7.46142578125, -6.489990234375, -5.5185546875, -4.547119140625, -3.57568359375, -2.604248046875, -1.6328125, -0.661376953125, 0.31005859375, 1.281494140625, 2.2529296875, 3.224365234375, 4.19580078125, 5.167236328125, 6.138671875, 7.110107421875, 8.08154296875, 9.052978515625, 10.0244140625, 10.995849609375, 11.96728515625, 12.938720703125, 13.91015625, 14.881591796875, 15.85302734375, 16.824462890625, 17.7958984375, 18.767333984375, 19.73876953125, 20.710205078125, 21.681640625, 22.653076171875, 23.62451171875, 24.595947265625, 25.5673828125, 26.538818359375, 27.51025390625, 28.481689453125, 29.453125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 6.0, 7.0, 12.0, 19.0, 28.0, 33.0, 60.0, 70.0, 111.0, 136.0, 191.0, 321.0, 502.0, 731.0, 1093.0, 1663.0, 2737.0, 4415.0, 7199.0, 12213.0, 20948.0, 36921.0, 67298.0, 135699.0, 1355143.0, 230681.0, 97193.0, 51343.0, 28642.0, 16475.0, 9565.0, 5821.0, 3544.0, 2157.0, 1359.0, 939.0, 610.0, 423.0, 251.0, 174.0, 113.0, 92.0, 65.0, 36.0, 36.0, 22.0, 12.0, 10.0, 5.0, 1.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0], "bins": [-2.640625, -2.56005859375, -2.4794921875, -2.39892578125, -2.318359375, -2.23779296875, -2.1572265625, -2.07666015625, -1.99609375, -1.91552734375, -1.8349609375, -1.75439453125, -1.673828125, -1.59326171875, -1.5126953125, -1.43212890625, -1.3515625, -1.27099609375, -1.1904296875, -1.10986328125, -1.029296875, -0.94873046875, -0.8681640625, -0.78759765625, -0.70703125, -0.62646484375, -0.5458984375, -0.46533203125, -0.384765625, -0.30419921875, -0.2236328125, -0.14306640625, -0.0625, 0.01806640625, 0.0986328125, 0.17919921875, 0.259765625, 0.34033203125, 0.4208984375, 0.50146484375, 0.58203125, 0.66259765625, 0.7431640625, 0.82373046875, 0.904296875, 0.98486328125, 1.0654296875, 1.14599609375, 1.2265625, 1.30712890625, 1.3876953125, 1.46826171875, 1.548828125, 1.62939453125, 1.7099609375, 1.79052734375, 1.87109375, 1.95166015625, 2.0322265625, 2.11279296875, 2.193359375, 2.27392578125, 2.3544921875, 2.43505859375, 2.515625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 6.0, 2.0, 10.0, 7.0, 8.0, 12.0, 14.0, 18.0, 17.0, 25.0, 25.0, 38.0, 51.0, 58.0, 82.0, 96.0, 106.0, 74.0, 76.0, 54.0, 46.0, 34.0, 33.0, 26.0, 12.0, 13.0, 9.0, 12.0, 10.0, 10.0, 5.0, 2.0, 5.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.036590576171875, -0.03548240661621094, -0.034374237060546875, -0.03326606750488281, -0.03215789794921875, -0.031049728393554688, -0.029941558837890625, -0.028833389282226562, -0.0277252197265625, -0.026617050170898438, -0.025508880615234375, -0.024400711059570312, -0.02329254150390625, -0.022184371948242188, -0.021076202392578125, -0.019968032836914062, -0.01885986328125, -0.017751693725585938, -0.016643524169921875, -0.015535354614257812, -0.01442718505859375, -0.013319015502929688, -0.012210845947265625, -0.011102676391601562, -0.0099945068359375, -0.008886337280273438, -0.007778167724609375, -0.0066699981689453125, -0.00556182861328125, -0.0044536590576171875, -0.003345489501953125, -0.0022373199462890625, -0.001129150390625, -2.09808349609375e-05, 0.001087188720703125, 0.0021953582763671875, 0.00330352783203125, 0.0044116973876953125, 0.005519866943359375, 0.0066280364990234375, 0.0077362060546875, 0.008844375610351562, 0.009952545166015625, 0.011060714721679688, 0.01216888427734375, 0.013277053833007812, 0.014385223388671875, 0.015493392944335938, 0.0166015625, 0.017709732055664062, 0.018817901611328125, 0.019926071166992188, 0.02103424072265625, 0.022142410278320312, 0.023250579833984375, 0.024358749389648438, 0.0254669189453125, 0.026575088500976562, 0.027683258056640625, 0.028791427612304688, 0.02989959716796875, 0.031007766723632812, 0.032115936279296875, 0.03322410583496094, 0.034332275390625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 0.0, 1.0, 1.0, 6.0, 4.0, 9.0, 13.0, 15.0, 7.0, 18.0, 22.0, 23.0, 50.0, 45.0, 66.0, 121.0, 164.0, 390.0, 3000.0, 840422.0, 201523.0, 1855.0, 319.0, 141.0, 81.0, 63.0, 46.0, 24.0, 32.0, 20.0, 18.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 2.0, 2.0, 4.0, 4.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.630859375, -0.6104583740234375, -0.590057373046875, -0.5696563720703125, -0.54925537109375, -0.5288543701171875, -0.508453369140625, -0.4880523681640625, -0.4676513671875, -0.4472503662109375, -0.426849365234375, -0.4064483642578125, -0.38604736328125, -0.3656463623046875, -0.345245361328125, -0.3248443603515625, -0.304443359375, -0.2840423583984375, -0.263641357421875, -0.2432403564453125, -0.22283935546875, -0.2024383544921875, -0.182037353515625, -0.1616363525390625, -0.1412353515625, -0.1208343505859375, -0.100433349609375, -0.0800323486328125, -0.05963134765625, -0.0392303466796875, -0.018829345703125, 0.0015716552734375, 0.02197265625, 0.0423736572265625, 0.062774658203125, 0.0831756591796875, 0.10357666015625, 0.1239776611328125, 0.144378662109375, 0.1647796630859375, 0.1851806640625, 0.2055816650390625, 0.225982666015625, 0.2463836669921875, 0.26678466796875, 0.2871856689453125, 0.307586669921875, 0.3279876708984375, 0.348388671875, 0.3687896728515625, 0.389190673828125, 0.4095916748046875, 0.42999267578125, 0.4503936767578125, 0.470794677734375, 0.4911956787109375, 0.5115966796875, 0.5319976806640625, 0.552398681640625, 0.5727996826171875, 0.59320068359375, 0.6136016845703125, 0.634002685546875, 0.6544036865234375, 0.6748046875]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 25.0, 473.0, 483.0, 29.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.26946908235549927, -0.2646583616733551, -0.2598476707935333, -0.25503695011138916, -0.2502262592315674, -0.24541553854942322, -0.24060481786727905, -0.23579411208629608, -0.2309834063053131, -0.22617270052433014, -0.22136199474334717, -0.216551274061203, -0.21174056828022003, -0.20692986249923706, -0.2021191418170929, -0.19730843603610992, -0.19249773025512695, -0.18768702447414398, -0.182876318693161, -0.17806559801101685, -0.17325489223003387, -0.1684441864490509, -0.16363346576690674, -0.15882275998592377, -0.1540120542049408, -0.14920134842395782, -0.14439064264297485, -0.1395799219608307, -0.13476921617984772, -0.12995851039886475, -0.12514778971672058, -0.12033708393573761, -0.11552637815475464, -0.11071567237377167, -0.1059049591422081, -0.10109424591064453, -0.09628354012966156, -0.09147283434867859, -0.08666212111711502, -0.08185140788555145, -0.07704070210456848, -0.07222999632358551, -0.06741928309202194, -0.06260856986045837, -0.0577978640794754, -0.05298715457320213, -0.048176445066928864, -0.043365735560655594, -0.038555022329092026, -0.033744312822818756, -0.028933603316545486, -0.024122893810272217, -0.019312184303998947, -0.014501474797725677, -0.009690765291452408, -0.004880055785179138, -6.934627890586853e-05, 0.004741363227367401, 0.00955207273364067, 0.01436278223991394, 0.01917349174618721, 0.02398420125246048, 0.02879491075873375, 0.03360562026500702, 0.03841632977128029]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 0.0, 2.0, 5.0, 8.0, 8.0, 10.0, 12.0, 19.0, 16.0, 25.0, 31.0, 24.0, 31.0, 37.0, 45.0, 44.0, 37.0, 34.0, 49.0, 33.0, 40.0, 45.0, 34.0, 41.0, 35.0, 40.0, 42.0, 28.0, 36.0, 23.0, 30.0, 19.0, 23.0, 11.0, 19.0, 18.0, 16.0, 10.0, 9.0, 5.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018032431602478027, -0.017484823241829872, -0.016937214881181717, -0.01638960652053356, -0.015841998159885406, -0.015294389799237251, -0.014746781438589096, -0.01419917307794094, -0.013651564717292786, -0.01310395635664463, -0.012556347995996475, -0.01200873963534832, -0.011461131274700165, -0.01091352291405201, -0.010365914553403854, -0.0098183061927557, -0.009270697832107544, -0.008723089471459389, -0.008175481110811234, -0.007627872750163078, -0.007080264389514923, -0.006532656028866768, -0.005985047668218613, -0.0054374393075704575, -0.004889830946922302, -0.004342222586274147, -0.003794614225625992, -0.0032470058649778366, -0.0026993975043296814, -0.002151789143681526, -0.001604180783033371, -0.0010565724223852158, -0.0005089640617370605, 3.8644298911094666e-05, 0.0005862526595592499, 0.001133861020207405, 0.0016814693808555603, 0.0022290777415037155, 0.0027766861021518707, 0.003324294462800026, 0.003871902823448181, 0.004419511184096336, 0.004967119544744492, 0.005514727905392647, 0.006062336266040802, 0.006609944626688957, 0.007157552987337112, 0.007705161347985268, 0.008252769708633423, 0.008800378069281578, 0.009347986429929733, 0.009895594790577888, 0.010443203151226044, 0.010990811511874199, 0.011538419872522354, 0.01208602823317051, 0.012633636593818665, 0.01318124495446682, 0.013728853315114975, 0.01427646167576313, 0.014824070036411285, 0.01537167839705944, 0.015919286757707596, 0.01646689511835575, 0.017014503479003906]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 7.0, 2.0, 4.0, 2.0, 4.0, 5.0, 3.0, 4.0, 9.0, 7.0, 9.0, 12.0, 13.0, 15.0, 20.0, 22.0, 18.0, 33.0, 30.0, 32.0, 27.0, 38.0, 26.0, 41.0, 40.0, 44.0, 42.0, 47.0, 39.0, 35.0, 38.0, 29.0, 33.0, 21.0, 27.0, 34.0, 31.0, 22.0, 18.0, 22.0, 17.0, 17.0, 13.0, 5.0, 13.0, 8.0, 8.0, 12.0, 3.0, 1.0, 8.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-42.8125, -41.48291015625, -40.1533203125, -38.82373046875, -37.494140625, -36.16455078125, -34.8349609375, -33.50537109375, -32.17578125, -30.84619140625, -29.5166015625, -28.18701171875, -26.857421875, -25.52783203125, -24.1982421875, -22.86865234375, -21.5390625, -20.20947265625, -18.8798828125, -17.55029296875, -16.220703125, -14.89111328125, -13.5615234375, -12.23193359375, -10.90234375, -9.57275390625, -8.2431640625, -6.91357421875, -5.583984375, -4.25439453125, -2.9248046875, -1.59521484375, -0.265625, 1.06396484375, 2.3935546875, 3.72314453125, 5.052734375, 6.38232421875, 7.7119140625, 9.04150390625, 10.37109375, 11.70068359375, 13.0302734375, 14.35986328125, 15.689453125, 17.01904296875, 18.3486328125, 19.67822265625, 21.0078125, 22.33740234375, 23.6669921875, 24.99658203125, 26.326171875, 27.65576171875, 28.9853515625, 30.31494140625, 31.64453125, 32.97412109375, 34.3037109375, 35.63330078125, 36.962890625, 38.29248046875, 39.6220703125, 40.95166015625, 42.28125]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 6.0, 1.0, 3.0, 7.0, 11.0, 7.0, 8.0, 18.0, 12.0, 19.0, 21.0, 25.0, 35.0, 42.0, 55.0, 83.0, 102.0, 146.0, 210.0, 281.0, 411.0, 739.0, 1426.0, 3572.0, 10996.0, 44952.0, 318503.0, 575125.0, 67589.0, 15234.0, 4687.0, 1767.0, 847.0, 441.0, 302.0, 211.0, 143.0, 111.0, 94.0, 68.0, 47.0, 45.0, 36.0, 24.0, 23.0, 16.0, 15.0, 17.0, 7.0, 7.0, 6.0, 6.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 2.0], "bins": [-44.09375, -42.72119140625, -41.3486328125, -39.97607421875, -38.603515625, -37.23095703125, -35.8583984375, -34.48583984375, -33.11328125, -31.74072265625, -30.3681640625, -28.99560546875, -27.623046875, -26.25048828125, -24.8779296875, -23.50537109375, -22.1328125, -20.76025390625, -19.3876953125, -18.01513671875, -16.642578125, -15.27001953125, -13.8974609375, -12.52490234375, -11.15234375, -9.77978515625, -8.4072265625, -7.03466796875, -5.662109375, -4.28955078125, -2.9169921875, -1.54443359375, -0.171875, 1.20068359375, 2.5732421875, 3.94580078125, 5.318359375, 6.69091796875, 8.0634765625, 9.43603515625, 10.80859375, 12.18115234375, 13.5537109375, 14.92626953125, 16.298828125, 17.67138671875, 19.0439453125, 20.41650390625, 21.7890625, 23.16162109375, 24.5341796875, 25.90673828125, 27.279296875, 28.65185546875, 30.0244140625, 31.39697265625, 32.76953125, 34.14208984375, 35.5146484375, 36.88720703125, 38.259765625, 39.63232421875, 41.0048828125, 42.37744140625, 43.75]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 3.0, 6.0, 9.0, 11.0, 13.0, 16.0, 18.0, 25.0, 33.0, 29.0, 35.0, 41.0, 32.0, 32.0, 41.0, 38.0, 51.0, 58.0, 175.0, 1870.0, 95.0, 40.0, 59.0, 32.0, 34.0, 37.0, 28.0, 32.0, 23.0, 23.0, 28.0, 16.0, 16.0, 7.0, 15.0, 4.0, 7.0, 4.0, 4.0, 7.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-125.0625, -120.9345703125, -116.806640625, -112.6787109375, -108.55078125, -104.4228515625, -100.294921875, -96.1669921875, -92.0390625, -87.9111328125, -83.783203125, -79.6552734375, -75.52734375, -71.3994140625, -67.271484375, -63.1435546875, -59.015625, -54.8876953125, -50.759765625, -46.6318359375, -42.50390625, -38.3759765625, -34.248046875, -30.1201171875, -25.9921875, -21.8642578125, -17.736328125, -13.6083984375, -9.48046875, -5.3525390625, -1.224609375, 2.9033203125, 7.03125, 11.1591796875, 15.287109375, 19.4150390625, 23.54296875, 27.6708984375, 31.798828125, 35.9267578125, 40.0546875, 44.1826171875, 48.310546875, 52.4384765625, 56.56640625, 60.6943359375, 64.822265625, 68.9501953125, 73.078125, 77.2060546875, 81.333984375, 85.4619140625, 89.58984375, 93.7177734375, 97.845703125, 101.9736328125, 106.1015625, 110.2294921875, 114.357421875, 118.4853515625, 122.61328125, 126.7412109375, 130.869140625, 134.9970703125, 139.125]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 10.0, 11.0, 6.0, 9.0, 6.0, 23.0, 21.0, 32.0, 57.0, 41.0, 74.0, 109.0, 120.0, 206.0, 347.0, 1222.0, 9860.0, 3108939.0, 21450.0, 1817.0, 461.0, 248.0, 164.0, 126.0, 92.0, 67.0, 48.0, 43.0, 25.0, 28.0, 8.0, 15.0, 7.0, 7.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-339.0, -328.27734375, -317.5546875, -306.83203125, -296.109375, -285.38671875, -274.6640625, -263.94140625, -253.21875, -242.49609375, -231.7734375, -221.05078125, -210.328125, -199.60546875, -188.8828125, -178.16015625, -167.4375, -156.71484375, -145.9921875, -135.26953125, -124.546875, -113.82421875, -103.1015625, -92.37890625, -81.65625, -70.93359375, -60.2109375, -49.48828125, -38.765625, -28.04296875, -17.3203125, -6.59765625, 4.125, 14.84765625, 25.5703125, 36.29296875, 47.015625, 57.73828125, 68.4609375, 79.18359375, 89.90625, 100.62890625, 111.3515625, 122.07421875, 132.796875, 143.51953125, 154.2421875, 164.96484375, 175.6875, 186.41015625, 197.1328125, 207.85546875, 218.578125, 229.30078125, 240.0234375, 250.74609375, 261.46875, 272.19140625, 282.9140625, 293.63671875, 304.359375, 315.08203125, 325.8046875, 336.52734375, 347.25]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 9.0, 893.0, 112.0, 4.0], "bins": [-2719.709716796875, -2675.62353515625, -2631.537353515625, -2587.451171875, -2543.364990234375, -2499.27880859375, -2455.19287109375, -2411.1064453125, -2367.0205078125, -2322.934326171875, -2278.84814453125, -2234.761962890625, -2190.67578125, -2146.589599609375, -2102.50341796875, -2058.41748046875, -2014.3310546875, -1970.244873046875, -1926.15869140625, -1882.072509765625, -1837.9864501953125, -1793.9002685546875, -1749.8140869140625, -1705.7279052734375, -1661.641845703125, -1617.5556640625, -1573.469482421875, -1529.38330078125, -1485.2972412109375, -1441.2110595703125, -1397.1248779296875, -1353.0386962890625, -1308.9525146484375, -1264.8663330078125, -1220.7801513671875, -1176.694091796875, -1132.60791015625, -1088.521728515625, -1044.435546875, -1000.349365234375, -956.2632446289062, -912.1770629882812, -868.0909423828125, -824.0047607421875, -779.9185791015625, -735.8323974609375, -691.7462768554688, -647.6600952148438, -603.5739135742188, -559.4877319335938, -515.401611328125, -471.3154296875, -427.229248046875, -383.1430969238281, -339.05694580078125, -294.97076416015625, -250.88462829589844, -206.7984619140625, -162.71231079101562, -118.62614440917969, -74.53997802734375, -30.453811645507812, 13.632339477539062, 57.71852111816406, 101.80467224121094]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 6.0, 7.0, 9.0, 11.0, 10.0, 17.0, 11.0, 14.0, 22.0, 14.0, 32.0, 22.0, 31.0, 36.0, 46.0, 45.0, 39.0, 39.0, 40.0, 51.0, 49.0, 38.0, 31.0, 34.0, 41.0, 38.0, 28.0, 35.0, 40.0, 20.0, 31.0, 16.0, 18.0, 16.0, 11.0, 12.0, 9.0, 8.0, 6.0, 6.0, 7.0, 5.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-338.1504211425781, -327.89068603515625, -317.6309509277344, -307.3712463378906, -297.11151123046875, -286.8517761230469, -276.592041015625, -266.3323059082031, -256.07257080078125, -245.81283569335938, -235.55311584472656, -225.2933807373047, -215.03366088867188, -204.77392578125, -194.51419067382812, -184.25445556640625, -173.9947509765625, -163.73501586914062, -153.4752960205078, -143.21556091308594, -132.95584106445312, -122.69610595703125, -112.43637084960938, -102.17664337158203, -91.91691589355469, -81.65718841552734, -71.3974609375, -61.137725830078125, -50.87799835205078, -40.61827087402344, -30.358539581298828, -20.09880828857422, -9.839080810546875, 0.42064857482910156, 10.680377960205078, 20.940107345581055, 31.19983673095703, 41.459564208984375, 51.719295501708984, 61.979026794433594, 72.23875427246094, 82.49848175048828, 92.75820922851562, 103.0179443359375, 113.27767181396484, 123.53739929199219, 133.79713439941406, 144.05685424804688, 154.31658935546875, 164.57632446289062, 174.83604431152344, 185.0957794189453, 195.35549926757812, 205.615234375, 215.87496948242188, 226.13470458984375, 236.39442443847656, 246.65415954589844, 256.91387939453125, 267.1736145019531, 277.433349609375, 287.69305419921875, 297.9527893066406, 308.2125244140625, 318.4722595214844]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 4.0, 4.0, 5.0, 4.0, 4.0, 3.0, 5.0, 10.0, 10.0, 10.0, 13.0, 24.0, 15.0, 15.0, 26.0, 20.0, 33.0, 27.0, 29.0, 46.0, 42.0, 42.0, 39.0, 34.0, 49.0, 36.0, 43.0, 47.0, 31.0, 28.0, 26.0, 31.0, 34.0, 22.0, 30.0, 23.0, 22.0, 17.0, 17.0, 17.0, 6.0, 14.0, 10.0, 10.0, 5.0, 11.0, 4.0, 5.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-42.71875, -41.34375, -39.96875, -38.59375, -37.21875, -35.84375, -34.46875, -33.09375, -31.71875, -30.34375, -28.96875, -27.59375, -26.21875, -24.84375, -23.46875, -22.09375, -20.71875, -19.34375, -17.96875, -16.59375, -15.21875, -13.84375, -12.46875, -11.09375, -9.71875, -8.34375, -6.96875, -5.59375, -4.21875, -2.84375, -1.46875, -0.09375, 1.28125, 2.65625, 4.03125, 5.40625, 6.78125, 8.15625, 9.53125, 10.90625, 12.28125, 13.65625, 15.03125, 16.40625, 17.78125, 19.15625, 20.53125, 21.90625, 23.28125, 24.65625, 26.03125, 27.40625, 28.78125, 30.15625, 31.53125, 32.90625, 34.28125, 35.65625, 37.03125, 38.40625, 39.78125, 41.15625, 42.53125, 43.90625, 45.28125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 0.0, 7.0, 8.0, 6.0, 11.0, 23.0, 18.0, 30.0, 35.0, 48.0, 62.0, 98.0, 147.0, 211.0, 341.0, 454.0, 679.0, 1204.0, 2241.0, 5216.0, 22770.0, 1145606.0, 2953036.0, 48789.0, 6957.0, 2643.0, 1289.0, 757.0, 494.0, 336.0, 230.0, 140.0, 98.0, 76.0, 51.0, 40.0, 27.0, 16.0, 13.0, 17.0, 12.0, 10.0, 7.0, 5.0, 3.0, 4.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0], "bins": [-167.875, -162.6484375, -157.421875, -152.1953125, -146.96875, -141.7421875, -136.515625, -131.2890625, -126.0625, -120.8359375, -115.609375, -110.3828125, -105.15625, -99.9296875, -94.703125, -89.4765625, -84.25, -79.0234375, -73.796875, -68.5703125, -63.34375, -58.1171875, -52.890625, -47.6640625, -42.4375, -37.2109375, -31.984375, -26.7578125, -21.53125, -16.3046875, -11.078125, -5.8515625, -0.625, 4.6015625, 9.828125, 15.0546875, 20.28125, 25.5078125, 30.734375, 35.9609375, 41.1875, 46.4140625, 51.640625, 56.8671875, 62.09375, 67.3203125, 72.546875, 77.7734375, 83.0, 88.2265625, 93.453125, 98.6796875, 103.90625, 109.1328125, 114.359375, 119.5859375, 124.8125, 130.0390625, 135.265625, 140.4921875, 145.71875, 150.9453125, 156.171875, 161.3984375, 166.625]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 5.0, 5.0, 3.0, 8.0, 4.0, 5.0, 9.0, 17.0, 23.0, 29.0, 50.0, 78.0, 128.0, 332.0, 795.0, 1190.0, 741.0, 288.0, 136.0, 81.0, 47.0, 31.0, 18.0, 15.0, 11.0, 9.0, 5.0, 6.0, 5.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-103.9375, -100.408203125, -96.87890625, -93.349609375, -89.8203125, -86.291015625, -82.76171875, -79.232421875, -75.703125, -72.173828125, -68.64453125, -65.115234375, -61.5859375, -58.056640625, -54.52734375, -50.998046875, -47.46875, -43.939453125, -40.41015625, -36.880859375, -33.3515625, -29.822265625, -26.29296875, -22.763671875, -19.234375, -15.705078125, -12.17578125, -8.646484375, -5.1171875, -1.587890625, 1.94140625, 5.470703125, 9.0, 12.529296875, 16.05859375, 19.587890625, 23.1171875, 26.646484375, 30.17578125, 33.705078125, 37.234375, 40.763671875, 44.29296875, 47.822265625, 51.3515625, 54.880859375, 58.41015625, 61.939453125, 65.46875, 68.998046875, 72.52734375, 76.056640625, 79.5859375, 83.115234375, 86.64453125, 90.173828125, 93.703125, 97.232421875, 100.76171875, 104.291015625, 107.8203125, 111.349609375, 114.87890625, 118.408203125, 121.9375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 5.0, 6.0, 9.0, 7.0, 10.0, 22.0, 39.0, 60.0, 131.0, 361.0, 1340.0, 7482.0, 195536.0, 3966075.0, 19677.0, 2505.0, 604.0, 205.0, 94.0, 37.0, 25.0, 12.0, 6.0, 6.0, 10.0, 6.0, 4.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-539.0, -523.2421875, -507.484375, -491.7265625, -475.96875, -460.2109375, -444.453125, -428.6953125, -412.9375, -397.1796875, -381.421875, -365.6640625, -349.90625, -334.1484375, -318.390625, -302.6328125, -286.875, -271.1171875, -255.359375, -239.6015625, -223.84375, -208.0859375, -192.328125, -176.5703125, -160.8125, -145.0546875, -129.296875, -113.5390625, -97.78125, -82.0234375, -66.265625, -50.5078125, -34.75, -18.9921875, -3.234375, 12.5234375, 28.28125, 44.0390625, 59.796875, 75.5546875, 91.3125, 107.0703125, 122.828125, 138.5859375, 154.34375, 170.1015625, 185.859375, 201.6171875, 217.375, 233.1328125, 248.890625, 264.6484375, 280.40625, 296.1640625, 311.921875, 327.6796875, 343.4375, 359.1953125, 374.953125, 390.7109375, 406.46875, 422.2265625, 437.984375, 453.7421875, 469.5]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 9.0, 19.0, 53.0, 173.0, 363.0, 258.0, 84.0, 29.0, 14.0, 7.0, 6.0, 2.0, 1.0], "bins": [-1721.7830810546875, -1690.8748779296875, -1659.966552734375, -1629.058349609375, -1598.150146484375, -1567.2418212890625, -1536.3336181640625, -1505.4254150390625, -1474.51708984375, -1443.60888671875, -1412.7005615234375, -1381.7923583984375, -1350.8841552734375, -1319.975830078125, -1289.067626953125, -1258.159423828125, -1227.251220703125, -1196.343017578125, -1165.4346923828125, -1134.5264892578125, -1103.6182861328125, -1072.7099609375, -1041.8017578125, -1010.8934936523438, -979.9852294921875, -949.0769653320312, -918.1687622070312, -887.260498046875, -856.3522338867188, -825.4439697265625, -794.5357666015625, -763.6275024414062, -732.7191772460938, -701.8109130859375, -670.9027099609375, -639.9944458007812, -609.086181640625, -578.177978515625, -547.2697143554688, -516.3614501953125, -485.4532165527344, -454.54498291015625, -423.63671875, -392.7284851074219, -361.82025146484375, -330.9119873046875, -300.0037536621094, -269.09552001953125, -238.187255859375, -207.2790069580078, -176.37075805664062, -145.4625244140625, -114.55427551269531, -83.64602661132812, -52.73779296875, -21.829544067382812, 9.078704833984375, 39.9869499206543, 70.89519500732422, 101.80343627929688, 132.71168518066406, 163.61993408203125, 194.52816772460938, 225.43641662597656, 256.34466552734375]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 4.0, 7.0, 5.0, 14.0, 8.0, 11.0, 15.0, 15.0, 23.0, 16.0, 19.0, 24.0, 25.0, 38.0, 23.0, 28.0, 38.0, 40.0, 28.0, 47.0, 43.0, 34.0, 32.0, 37.0, 34.0, 35.0, 32.0, 35.0, 28.0, 25.0, 29.0, 33.0, 35.0, 20.0, 23.0, 12.0, 17.0, 10.0, 10.0, 12.0, 6.0, 8.0, 6.0, 2.0, 7.0, 4.0, 2.0, 3.0, 1.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-197.28427124023438, -191.06707763671875, -184.84988403320312, -178.6326904296875, -172.41549682617188, -166.19830322265625, -159.98110961914062, -153.76393127441406, -147.54673767089844, -141.3295440673828, -135.1123504638672, -128.89515686035156, -122.67797088623047, -116.46077728271484, -110.24358367919922, -104.02639770507812, -97.80919647216797, -91.59200286865234, -85.37480926513672, -79.15762329101562, -72.9404296875, -66.72323608398438, -60.50604248046875, -54.28885269165039, -48.071659088134766, -41.85446548461914, -35.63727569580078, -29.420082092285156, -23.202890396118164, -16.985698699951172, -10.768505096435547, -4.5513153076171875, 1.6658782958984375, 7.883070468902588, 14.100262641906738, 20.317455291748047, 26.53464698791504, 32.75183868408203, 38.969032287597656, 45.186222076416016, 51.40341567993164, 57.620609283447266, 63.837799072265625, 70.05499267578125, 76.27218627929688, 82.4893798828125, 88.70657348632812, 94.92375946044922, 101.14095306396484, 107.35814666748047, 113.5753402709961, 119.79252624511719, 126.00971984863281, 132.22691345214844, 138.44410705566406, 144.6613006591797, 150.8784942626953, 157.09568786621094, 163.31288146972656, 169.5300750732422, 175.7472686767578, 181.96444702148438, 188.181640625, 194.39883422851562, 200.61602783203125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 8.0, 9.0, 12.0, 11.0, 9.0, 8.0, 13.0, 16.0, 16.0, 29.0, 22.0, 33.0, 38.0, 44.0, 30.0, 45.0, 38.0, 53.0, 38.0, 42.0, 39.0, 32.0, 43.0, 38.0, 30.0, 38.0, 29.0, 28.0, 27.0, 35.0, 25.0, 17.0, 30.0, 15.0, 14.0, 8.0, 7.0, 5.0, 5.0, 4.0, 3.0, 3.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-49.28125, -47.77783203125, -46.2744140625, -44.77099609375, -43.267578125, -41.76416015625, -40.2607421875, -38.75732421875, -37.25390625, -35.75048828125, -34.2470703125, -32.74365234375, -31.240234375, -29.73681640625, -28.2333984375, -26.72998046875, -25.2265625, -23.72314453125, -22.2197265625, -20.71630859375, -19.212890625, -17.70947265625, -16.2060546875, -14.70263671875, -13.19921875, -11.69580078125, -10.1923828125, -8.68896484375, -7.185546875, -5.68212890625, -4.1787109375, -2.67529296875, -1.171875, 0.33154296875, 1.8349609375, 3.33837890625, 4.841796875, 6.34521484375, 7.8486328125, 9.35205078125, 10.85546875, 12.35888671875, 13.8623046875, 15.36572265625, 16.869140625, 18.37255859375, 19.8759765625, 21.37939453125, 22.8828125, 24.38623046875, 25.8896484375, 27.39306640625, 28.896484375, 30.39990234375, 31.9033203125, 33.40673828125, 34.91015625, 36.41357421875, 37.9169921875, 39.42041015625, 40.923828125, 42.42724609375, 43.9306640625, 45.43408203125, 46.9375]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 5.0, 4.0, 6.0, 6.0, 9.0, 8.0, 18.0, 21.0, 40.0, 71.0, 72.0, 107.0, 148.0, 205.0, 326.0, 416.0, 560.0, 731.0, 1133.0, 1644.0, 2422.0, 3427.0, 4740.0, 7231.0, 10915.0, 16632.0, 25217.0, 40032.0, 64565.0, 113346.0, 210825.0, 226541.0, 123476.0, 70149.0, 42752.0, 26990.0, 17489.0, 11375.0, 7769.0, 5169.0, 3602.0, 2433.0, 1652.0, 1323.0, 836.0, 615.0, 421.0, 338.0, 198.0, 157.0, 118.0, 82.0, 55.0, 60.0, 30.0, 18.0, 17.0, 13.0, 5.0, 4.0, 3.0, 1.0], "bins": [-2.228515625, -2.1611328125, -2.09375, -2.0263671875, -1.958984375, -1.8916015625, -1.82421875, -1.7568359375, -1.689453125, -1.6220703125, -1.5546875, -1.4873046875, -1.419921875, -1.3525390625, -1.28515625, -1.2177734375, -1.150390625, -1.0830078125, -1.015625, -0.9482421875, -0.880859375, -0.8134765625, -0.74609375, -0.6787109375, -0.611328125, -0.5439453125, -0.4765625, -0.4091796875, -0.341796875, -0.2744140625, -0.20703125, -0.1396484375, -0.072265625, -0.0048828125, 0.0625, 0.1298828125, 0.197265625, 0.2646484375, 0.33203125, 0.3994140625, 0.466796875, 0.5341796875, 0.6015625, 0.6689453125, 0.736328125, 0.8037109375, 0.87109375, 0.9384765625, 1.005859375, 1.0732421875, 1.140625, 1.2080078125, 1.275390625, 1.3427734375, 1.41015625, 1.4775390625, 1.544921875, 1.6123046875, 1.6796875, 1.7470703125, 1.814453125, 1.8818359375, 1.94921875, 2.0166015625, 2.083984375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 7.0, 4.0, 1.0, 4.0, 8.0, 14.0, 11.0, 5.0, 12.0, 14.0, 11.0, 24.0, 26.0, 27.0, 23.0, 28.0, 37.0, 31.0, 41.0, 36.0, 33.0, 31.0, 41.0, 34.0, 1065.0, 33.0, 39.0, 27.0, 34.0, 43.0, 17.0, 28.0, 32.0, 26.0, 23.0, 22.0, 22.0, 12.0, 20.0, 13.0, 16.0, 14.0, 6.0, 6.0, 7.0, 6.0, 7.0, 3.0, 4.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0], "bins": [-28.765625, -27.875732421875, -26.98583984375, -26.095947265625, -25.2060546875, -24.316162109375, -23.42626953125, -22.536376953125, -21.646484375, -20.756591796875, -19.86669921875, -18.976806640625, -18.0869140625, -17.197021484375, -16.30712890625, -15.417236328125, -14.52734375, -13.637451171875, -12.74755859375, -11.857666015625, -10.9677734375, -10.077880859375, -9.18798828125, -8.298095703125, -7.408203125, -6.518310546875, -5.62841796875, -4.738525390625, -3.8486328125, -2.958740234375, -2.06884765625, -1.178955078125, -0.2890625, 0.600830078125, 1.49072265625, 2.380615234375, 3.2705078125, 4.160400390625, 5.05029296875, 5.940185546875, 6.830078125, 7.719970703125, 8.60986328125, 9.499755859375, 10.3896484375, 11.279541015625, 12.16943359375, 13.059326171875, 13.94921875, 14.839111328125, 15.72900390625, 16.618896484375, 17.5087890625, 18.398681640625, 19.28857421875, 20.178466796875, 21.068359375, 21.958251953125, 22.84814453125, 23.738037109375, 24.6279296875, 25.517822265625, 26.40771484375, 27.297607421875, 28.1875]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 2.0, 10.0, 10.0, 14.0, 30.0, 39.0, 55.0, 90.0, 115.0, 156.0, 264.0, 423.0, 602.0, 894.0, 1366.0, 2080.0, 3247.0, 5150.0, 7970.0, 12578.0, 20785.0, 34027.0, 58410.0, 109027.0, 331333.0, 1225671.0, 121877.0, 63862.0, 37547.0, 21898.0, 13831.0, 8380.0, 5369.0, 3461.0, 2171.0, 1484.0, 979.0, 610.0, 412.0, 265.0, 210.0, 129.0, 94.0, 70.0, 47.0, 25.0, 23.0, 15.0, 11.0, 1.0, 10.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0], "bins": [-2.43359375, -2.35772705078125, -2.2818603515625, -2.20599365234375, -2.130126953125, -2.05426025390625, -1.9783935546875, -1.90252685546875, -1.82666015625, -1.75079345703125, -1.6749267578125, -1.59906005859375, -1.523193359375, -1.44732666015625, -1.3714599609375, -1.29559326171875, -1.2197265625, -1.14385986328125, -1.0679931640625, -0.99212646484375, -0.916259765625, -0.84039306640625, -0.7645263671875, -0.68865966796875, -0.61279296875, -0.53692626953125, -0.4610595703125, -0.38519287109375, -0.309326171875, -0.23345947265625, -0.1575927734375, -0.08172607421875, -0.005859375, 0.07000732421875, 0.1458740234375, 0.22174072265625, 0.297607421875, 0.37347412109375, 0.4493408203125, 0.52520751953125, 0.60107421875, 0.67694091796875, 0.7528076171875, 0.82867431640625, 0.904541015625, 0.98040771484375, 1.0562744140625, 1.13214111328125, 1.2080078125, 1.28387451171875, 1.3597412109375, 1.43560791015625, 1.511474609375, 1.58734130859375, 1.6632080078125, 1.73907470703125, 1.81494140625, 1.89080810546875, 1.9666748046875, 2.04254150390625, 2.118408203125, 2.19427490234375, 2.2701416015625, 2.34600830078125, 2.421875]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 3.0, 5.0, 4.0, 9.0, 7.0, 5.0, 9.0, 13.0, 15.0, 24.0, 26.0, 24.0, 28.0, 39.0, 65.0, 63.0, 82.0, 95.0, 88.0, 93.0, 59.0, 39.0, 28.0, 24.0, 32.0, 24.0, 13.0, 13.0, 16.0, 11.0, 14.0, 6.0, 7.0, 5.0, 6.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0345458984375, -0.03352069854736328, -0.03249549865722656, -0.031470298767089844, -0.030445098876953125, -0.029419898986816406, -0.028394699096679688, -0.02736949920654297, -0.02634429931640625, -0.02531909942626953, -0.024293899536132812, -0.023268699645996094, -0.022243499755859375, -0.021218299865722656, -0.020193099975585938, -0.01916790008544922, -0.0181427001953125, -0.01711750030517578, -0.016092300415039062, -0.015067100524902344, -0.014041900634765625, -0.013016700744628906, -0.011991500854492188, -0.010966300964355469, -0.00994110107421875, -0.008915901184082031, -0.007890701293945312, -0.006865501403808594, -0.005840301513671875, -0.004815101623535156, -0.0037899017333984375, -0.0027647018432617188, -0.001739501953125, -0.0007143020629882812, 0.0003108978271484375, 0.0013360977172851562, 0.002361297607421875, 0.0033864974975585938, 0.0044116973876953125, 0.005436897277832031, 0.00646209716796875, 0.007487297058105469, 0.008512496948242188, 0.009537696838378906, 0.010562896728515625, 0.011588096618652344, 0.012613296508789062, 0.013638496398925781, 0.0146636962890625, 0.01568889617919922, 0.016714096069335938, 0.017739295959472656, 0.018764495849609375, 0.019789695739746094, 0.020814895629882812, 0.02184009552001953, 0.02286529541015625, 0.02389049530029297, 0.024915695190429688, 0.025940895080566406, 0.026966094970703125, 0.027991294860839844, 0.029016494750976562, 0.03004169464111328, 0.03106689453125]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 7.0, 2.0, 6.0, 3.0, 11.0, 10.0, 8.0, 25.0, 18.0, 20.0, 21.0, 30.0, 51.0, 49.0, 55.0, 90.0, 135.0, 287.0, 1248.0, 37981.0, 997298.0, 9844.0, 660.0, 235.0, 112.0, 91.0, 56.0, 39.0, 39.0, 34.0, 24.0, 16.0, 5.0, 10.0, 11.0, 5.0, 6.0, 7.0, 1.0, 2.0, 0.0, 5.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.5654296875, -0.5467987060546875, -0.528167724609375, -0.5095367431640625, -0.49090576171875, -0.4722747802734375, -0.453643798828125, -0.4350128173828125, -0.4163818359375, -0.3977508544921875, -0.379119873046875, -0.3604888916015625, -0.34185791015625, -0.3232269287109375, -0.304595947265625, -0.2859649658203125, -0.267333984375, -0.2487030029296875, -0.230072021484375, -0.2114410400390625, -0.19281005859375, -0.1741790771484375, -0.155548095703125, -0.1369171142578125, -0.1182861328125, -0.0996551513671875, -0.081024169921875, -0.0623931884765625, -0.04376220703125, -0.0251312255859375, -0.006500244140625, 0.0121307373046875, 0.03076171875, 0.0493927001953125, 0.068023681640625, 0.0866546630859375, 0.10528564453125, 0.1239166259765625, 0.142547607421875, 0.1611785888671875, 0.1798095703125, 0.1984405517578125, 0.217071533203125, 0.2357025146484375, 0.25433349609375, 0.2729644775390625, 0.291595458984375, 0.3102264404296875, 0.328857421875, 0.3474884033203125, 0.366119384765625, 0.3847503662109375, 0.40338134765625, 0.4220123291015625, 0.440643310546875, 0.4592742919921875, 0.4779052734375, 0.4965362548828125, 0.515167236328125, 0.5337982177734375, 0.55242919921875, 0.5710601806640625, 0.589691162109375, 0.6083221435546875, 0.626953125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 5.0, 4.0, 21.0, 52.0, 90.0, 295.0, 363.0, 111.0, 53.0, 14.0, 1.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013796009123325348, -0.01209486834704876, -0.010393726639449596, -0.008692585863173008, -0.006991444621235132, -0.0052903033792972565, -0.003589162603020668, -0.001888020895421505, -0.00018688011914491653, 0.0015142610063776374, 0.0032154021319001913, 0.004916543141007423, 0.006617684382945299, 0.008318825624883175, 0.010019966401159763, 0.011721108108758926, 0.013422248885035515, 0.015123389661312103, 0.01682453043758869, 0.01852567121386528, 0.020226813852787018, 0.021927954629063606, 0.023629095405340195, 0.025330238044261932, 0.02703137695789337, 0.02873251773416996, 0.03043365851044655, 0.032134801149368286, 0.033835940062999725, 0.03553708270192146, 0.0372382253408432, 0.03893936425447464, 0.04064050689339638, 0.042341649532318115, 0.044042788445949554, 0.04574393108487129, 0.04744506999850273, 0.04914621263742447, 0.05084735155105591, 0.052548494189977646, 0.054249636828899384, 0.05595077946782112, 0.05765191838145256, 0.0593530610203743, 0.06105419993400574, 0.06275534629821777, 0.06445648521184921, 0.06615762412548065, 0.06785876303911209, 0.06955990195274353, 0.07126104831695557, 0.072962187230587, 0.07466332614421844, 0.07636447250843048, 0.07806561142206192, 0.07976675033569336, 0.0814678966999054, 0.08316903561353683, 0.08487018197774887, 0.08657132089138031, 0.08827245980501175, 0.08997359871864319, 0.09167474508285522, 0.09337588399648666, 0.0950770229101181]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 8.0, 7.0, 13.0, 12.0, 27.0, 14.0, 20.0, 20.0, 21.0, 30.0, 33.0, 24.0, 30.0, 32.0, 36.0, 37.0, 30.0, 49.0, 45.0, 40.0, 62.0, 43.0, 38.0, 29.0, 38.0, 30.0, 36.0, 34.0, 25.0, 21.0, 14.0, 16.0, 15.0, 27.0, 13.0, 14.0, 5.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.017375648021697998, -0.01683177426457405, -0.016287900507450104, -0.015744026750326157, -0.01520015299320221, -0.014656279236078262, -0.014112405478954315, -0.013568531721830368, -0.013024657964706421, -0.012480784207582474, -0.011936910450458527, -0.01139303669333458, -0.010849162936210632, -0.010305289179086685, -0.009761415421962738, -0.009217541664838791, -0.008673667907714844, -0.008129794150590897, -0.0075859203934669495, -0.007042046636343002, -0.006498172879219055, -0.005954299122095108, -0.005410425364971161, -0.004866551607847214, -0.004322677850723267, -0.0037788040935993195, -0.0032349303364753723, -0.002691056579351425, -0.002147182822227478, -0.0016033090651035309, -0.0010594353079795837, -0.0005155615508556366, 2.8312206268310547e-05, 0.0005721859633922577, 0.0011160597205162048, 0.001659933477640152, 0.002203807234764099, 0.0027476809918880463, 0.0032915547490119934, 0.0038354285061359406, 0.004379302263259888, 0.004923176020383835, 0.005467049777507782, 0.006010923534631729, 0.006554797291755676, 0.007098671048879623, 0.0076425448060035706, 0.008186418563127518, 0.008730292320251465, 0.009274166077375412, 0.009818039834499359, 0.010361913591623306, 0.010905787348747253, 0.0114496611058712, 0.011993534862995148, 0.012537408620119095, 0.013081282377243042, 0.01362515613436699, 0.014169029891490936, 0.014712903648614883, 0.01525677740573883, 0.015800651162862778, 0.016344524919986725, 0.016888398677110672, 0.01743227243423462]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 8.0, 9.0, 12.0, 11.0, 9.0, 8.0, 13.0, 16.0, 16.0, 29.0, 22.0, 33.0, 38.0, 44.0, 30.0, 45.0, 38.0, 53.0, 38.0, 42.0, 39.0, 32.0, 43.0, 38.0, 31.0, 37.0, 29.0, 28.0, 27.0, 35.0, 25.0, 17.0, 30.0, 15.0, 14.0, 8.0, 7.0, 5.0, 5.0, 4.0, 3.0, 3.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-49.28125, -47.77783203125, -46.2744140625, -44.77099609375, -43.267578125, -41.76416015625, -40.2607421875, -38.75732421875, -37.25390625, -35.75048828125, -34.2470703125, -32.74365234375, -31.240234375, -29.73681640625, -28.2333984375, -26.72998046875, -25.2265625, -23.72314453125, -22.2197265625, -20.71630859375, -19.212890625, -17.70947265625, -16.2060546875, -14.70263671875, -13.19921875, -11.69580078125, -10.1923828125, -8.68896484375, -7.185546875, -5.68212890625, -4.1787109375, -2.67529296875, -1.171875, 0.33154296875, 1.8349609375, 3.33837890625, 4.841796875, 6.34521484375, 7.8486328125, 9.35205078125, 10.85546875, 12.35888671875, 13.8623046875, 15.36572265625, 16.869140625, 18.37255859375, 19.8759765625, 21.37939453125, 22.8828125, 24.38623046875, 25.8896484375, 27.39306640625, 28.896484375, 30.39990234375, 31.9033203125, 33.40673828125, 34.91015625, 36.41357421875, 37.9169921875, 39.42041015625, 40.923828125, 42.42724609375, 43.9306640625, 45.43408203125, 46.9375]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 1.0, 6.0, 9.0, 11.0, 17.0, 17.0, 17.0, 23.0, 33.0, 39.0, 42.0, 79.0, 102.0, 139.0, 195.0, 325.0, 501.0, 930.0, 2181.0, 7211.0, 44912.0, 809751.0, 160669.0, 14549.0, 3540.0, 1340.0, 625.0, 375.0, 245.0, 177.0, 112.0, 98.0, 69.0, 40.0, 51.0, 29.0, 19.0, 20.0, 11.0, 10.0, 7.0, 6.0, 5.0, 4.0, 5.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-87.5625, -84.88671875, -82.2109375, -79.53515625, -76.859375, -74.18359375, -71.5078125, -68.83203125, -66.15625, -63.48046875, -60.8046875, -58.12890625, -55.453125, -52.77734375, -50.1015625, -47.42578125, -44.75, -42.07421875, -39.3984375, -36.72265625, -34.046875, -31.37109375, -28.6953125, -26.01953125, -23.34375, -20.66796875, -17.9921875, -15.31640625, -12.640625, -9.96484375, -7.2890625, -4.61328125, -1.9375, 0.73828125, 3.4140625, 6.08984375, 8.765625, 11.44140625, 14.1171875, 16.79296875, 19.46875, 22.14453125, 24.8203125, 27.49609375, 30.171875, 32.84765625, 35.5234375, 38.19921875, 40.875, 43.55078125, 46.2265625, 48.90234375, 51.578125, 54.25390625, 56.9296875, 59.60546875, 62.28125, 64.95703125, 67.6328125, 70.30859375, 72.984375, 75.66015625, 78.3359375, 81.01171875, 83.6875]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 4.0, 5.0, 5.0, 12.0, 10.0, 11.0, 20.0, 10.0, 22.0, 25.0, 37.0, 36.0, 53.0, 44.0, 57.0, 50.0, 72.0, 176.0, 1933.0, 86.0, 52.0, 45.0, 54.0, 29.0, 34.0, 29.0, 25.0, 28.0, 17.0, 23.0, 6.0, 14.0, 7.0, 7.0, 2.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-181.0, -175.560546875, -170.12109375, -164.681640625, -159.2421875, -153.802734375, -148.36328125, -142.923828125, -137.484375, -132.044921875, -126.60546875, -121.166015625, -115.7265625, -110.287109375, -104.84765625, -99.408203125, -93.96875, -88.529296875, -83.08984375, -77.650390625, -72.2109375, -66.771484375, -61.33203125, -55.892578125, -50.453125, -45.013671875, -39.57421875, -34.134765625, -28.6953125, -23.255859375, -17.81640625, -12.376953125, -6.9375, -1.498046875, 3.94140625, 9.380859375, 14.8203125, 20.259765625, 25.69921875, 31.138671875, 36.578125, 42.017578125, 47.45703125, 52.896484375, 58.3359375, 63.775390625, 69.21484375, 74.654296875, 80.09375, 85.533203125, 90.97265625, 96.412109375, 101.8515625, 107.291015625, 112.73046875, 118.169921875, 123.609375, 129.048828125, 134.48828125, 139.927734375, 145.3671875, 150.806640625, 156.24609375, 161.685546875, 167.125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 5.0, 6.0, 16.0, 8.0, 16.0, 25.0, 24.0, 27.0, 35.0, 43.0, 51.0, 104.0, 168.0, 347.0, 1946.0, 3129967.0, 11655.0, 541.0, 209.0, 136.0, 84.0, 72.0, 50.0, 38.0, 22.0, 23.0, 18.0, 13.0, 9.0, 11.0, 13.0, 5.0, 5.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-502.0, -485.640625, -469.28125, -452.921875, -436.5625, -420.203125, -403.84375, -387.484375, -371.125, -354.765625, -338.40625, -322.046875, -305.6875, -289.328125, -272.96875, -256.609375, -240.25, -223.890625, -207.53125, -191.171875, -174.8125, -158.453125, -142.09375, -125.734375, -109.375, -93.015625, -76.65625, -60.296875, -43.9375, -27.578125, -11.21875, 5.140625, 21.5, 37.859375, 54.21875, 70.578125, 86.9375, 103.296875, 119.65625, 136.015625, 152.375, 168.734375, 185.09375, 201.453125, 217.8125, 234.171875, 250.53125, 266.890625, 283.25, 299.609375, 315.96875, 332.328125, 348.6875, 365.046875, 381.40625, 397.765625, 414.125, 430.484375, 446.84375, 463.203125, 479.5625, 495.921875, 512.28125, 528.640625, 545.0]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 110.0, 900.0, 10.0], "bins": [-2062.98291015625, -2029.8673095703125, -1996.7518310546875, -1963.63623046875, -1930.520751953125, -1897.4051513671875, -1864.2896728515625, -1831.174072265625, -1798.05859375, -1764.9429931640625, -1731.8275146484375, -1698.7119140625, -1665.596435546875, -1632.4808349609375, -1599.3653564453125, -1566.249755859375, -1533.1341552734375, -1500.0185546875, -1466.903076171875, -1433.7874755859375, -1400.6719970703125, -1367.556396484375, -1334.44091796875, -1301.3253173828125, -1268.209716796875, -1235.0941162109375, -1201.9786376953125, -1168.863037109375, -1135.74755859375, -1102.6319580078125, -1069.5164794921875, -1036.40087890625, -1003.2854614257812, -970.169921875, -937.0543823242188, -903.9388427734375, -870.8232421875, -837.707763671875, -804.5921630859375, -771.4766235351562, -738.361083984375, -705.2455444335938, -672.1300048828125, -639.0144653320312, -605.89892578125, -572.7833251953125, -539.6677856445312, -506.55224609375, -473.4367370605469, -440.3211975097656, -407.20562744140625, -374.090087890625, -340.97454833984375, -307.8590087890625, -274.74346923828125, -241.62789916992188, -208.51235961914062, -175.39682006835938, -142.28126525878906, -109.16571807861328, -76.0501708984375, -42.93463134765625, -9.819076538085938, 23.296478271484375, 56.412025451660156]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 4.0, 4.0, 3.0, 6.0, 6.0, 6.0, 10.0, 13.0, 10.0, 14.0, 14.0, 22.0, 21.0, 25.0, 34.0, 37.0, 22.0, 32.0, 36.0, 43.0, 37.0, 40.0, 45.0, 38.0, 44.0, 38.0, 37.0, 39.0, 29.0, 40.0, 34.0, 35.0, 29.0, 22.0, 23.0, 20.0, 15.0, 17.0, 17.0, 12.0, 5.0, 8.0, 4.0, 12.0, 3.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-423.25286865234375, -410.153076171875, -397.05328369140625, -383.9534912109375, -370.85369873046875, -357.75390625, -344.65411376953125, -331.5543212890625, -318.45452880859375, -305.354736328125, -292.25494384765625, -279.1551513671875, -266.05535888671875, -252.95556640625, -239.85577392578125, -226.7559814453125, -213.65618896484375, -200.556396484375, -187.45660400390625, -174.3568115234375, -161.25701904296875, -148.1572265625, -135.05743408203125, -121.9576416015625, -108.85784912109375, -95.758056640625, -82.65826416015625, -69.5584716796875, -56.45867919921875, -43.35888671875, -30.25909423828125, -17.1593017578125, -4.059539794921875, 9.040252685546875, 22.140045166015625, 35.239837646484375, 48.339630126953125, 61.439422607421875, 74.53921508789062, 87.63900756835938, 100.73880004882812, 113.83859252929688, 126.93838500976562, 140.03817749023438, 153.13796997070312, 166.23776245117188, 179.33755493164062, 192.43734741210938, 205.53713989257812, 218.63693237304688, 231.73672485351562, 244.83651733398438, 257.9363098144531, 271.0361022949219, 284.1358947753906, 297.2356872558594, 310.3354797363281, 323.4352722167969, 336.5350646972656, 349.6348571777344, 362.7346496582031, 375.8344421386719, 388.9342346191406, 402.0340270996094, 415.1338195800781]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 5.0, 1.0, 0.0, 9.0, 11.0, 7.0, 9.0, 12.0, 6.0, 17.0, 11.0, 12.0, 21.0, 24.0, 37.0, 29.0, 33.0, 35.0, 48.0, 45.0, 42.0, 35.0, 32.0, 40.0, 42.0, 33.0, 41.0, 37.0, 35.0, 35.0, 35.0, 18.0, 30.0, 25.0, 31.0, 23.0, 27.0, 16.0, 10.0, 11.0, 7.0, 5.0, 3.0, 5.0, 5.0, 0.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-48.84375, -47.341796875, -45.83984375, -44.337890625, -42.8359375, -41.333984375, -39.83203125, -38.330078125, -36.828125, -35.326171875, -33.82421875, -32.322265625, -30.8203125, -29.318359375, -27.81640625, -26.314453125, -24.8125, -23.310546875, -21.80859375, -20.306640625, -18.8046875, -17.302734375, -15.80078125, -14.298828125, -12.796875, -11.294921875, -9.79296875, -8.291015625, -6.7890625, -5.287109375, -3.78515625, -2.283203125, -0.78125, 0.720703125, 2.22265625, 3.724609375, 5.2265625, 6.728515625, 8.23046875, 9.732421875, 11.234375, 12.736328125, 14.23828125, 15.740234375, 17.2421875, 18.744140625, 20.24609375, 21.748046875, 23.25, 24.751953125, 26.25390625, 27.755859375, 29.2578125, 30.759765625, 32.26171875, 33.763671875, 35.265625, 36.767578125, 38.26953125, 39.771484375, 41.2734375, 42.775390625, 44.27734375, 45.779296875, 47.28125]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 6.0, 6.0, 4.0, 10.0, 7.0, 23.0, 19.0, 36.0, 44.0, 53.0, 73.0, 134.0, 156.0, 254.0, 339.0, 541.0, 802.0, 1265.0, 2193.0, 4374.0, 9531.0, 35637.0, 707367.0, 3183026.0, 212279.0, 21146.0, 7013.0, 3238.0, 1786.0, 993.0, 634.0, 399.0, 288.0, 193.0, 132.0, 74.0, 58.0, 51.0, 32.0, 20.0, 14.0, 12.0, 12.0, 6.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-150.125, -145.8740234375, -141.623046875, -137.3720703125, -133.12109375, -128.8701171875, -124.619140625, -120.3681640625, -116.1171875, -111.8662109375, -107.615234375, -103.3642578125, -99.11328125, -94.8623046875, -90.611328125, -86.3603515625, -82.109375, -77.8583984375, -73.607421875, -69.3564453125, -65.10546875, -60.8544921875, -56.603515625, -52.3525390625, -48.1015625, -43.8505859375, -39.599609375, -35.3486328125, -31.09765625, -26.8466796875, -22.595703125, -18.3447265625, -14.09375, -9.8427734375, -5.591796875, -1.3408203125, 2.91015625, 7.1611328125, 11.412109375, 15.6630859375, 19.9140625, 24.1650390625, 28.416015625, 32.6669921875, 36.91796875, 41.1689453125, 45.419921875, 49.6708984375, 53.921875, 58.1728515625, 62.423828125, 66.6748046875, 70.92578125, 75.1767578125, 79.427734375, 83.6787109375, 87.9296875, 92.1806640625, 96.431640625, 100.6826171875, 104.93359375, 109.1845703125, 113.435546875, 117.6865234375, 121.9375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 2.0, 2.0, 4.0, 2.0, 4.0, 7.0, 6.0, 4.0, 8.0, 9.0, 16.0, 19.0, 19.0, 32.0, 54.0, 64.0, 97.0, 216.0, 420.0, 879.0, 966.0, 607.0, 257.0, 134.0, 76.0, 49.0, 26.0, 25.0, 14.0, 15.0, 11.0, 15.0, 3.0, 5.0, 2.0, 4.0, 1.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-122.9375, -119.4443359375, -115.951171875, -112.4580078125, -108.96484375, -105.4716796875, -101.978515625, -98.4853515625, -94.9921875, -91.4990234375, -88.005859375, -84.5126953125, -81.01953125, -77.5263671875, -74.033203125, -70.5400390625, -67.046875, -63.5537109375, -60.060546875, -56.5673828125, -53.07421875, -49.5810546875, -46.087890625, -42.5947265625, -39.1015625, -35.6083984375, -32.115234375, -28.6220703125, -25.12890625, -21.6357421875, -18.142578125, -14.6494140625, -11.15625, -7.6630859375, -4.169921875, -0.6767578125, 2.81640625, 6.3095703125, 9.802734375, 13.2958984375, 16.7890625, 20.2822265625, 23.775390625, 27.2685546875, 30.76171875, 34.2548828125, 37.748046875, 41.2412109375, 44.734375, 48.2275390625, 51.720703125, 55.2138671875, 58.70703125, 62.2001953125, 65.693359375, 69.1865234375, 72.6796875, 76.1728515625, 79.666015625, 83.1591796875, 86.65234375, 90.1455078125, 93.638671875, 97.1318359375, 100.625]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 1.0, 4.0, 3.0, 3.0, 10.0, 13.0, 21.0, 21.0, 29.0, 83.0, 132.0, 295.0, 665.0, 2111.0, 8188.0, 53061.0, 3872497.0, 234165.0, 17185.0, 3766.0, 1133.0, 448.0, 176.0, 111.0, 43.0, 34.0, 23.0, 15.0, 9.0, 7.0, 7.0, 3.0, 5.0, 6.0, 3.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-303.5, -292.5546875, -281.609375, -270.6640625, -259.71875, -248.7734375, -237.828125, -226.8828125, -215.9375, -204.9921875, -194.046875, -183.1015625, -172.15625, -161.2109375, -150.265625, -139.3203125, -128.375, -117.4296875, -106.484375, -95.5390625, -84.59375, -73.6484375, -62.703125, -51.7578125, -40.8125, -29.8671875, -18.921875, -7.9765625, 2.96875, 13.9140625, 24.859375, 35.8046875, 46.75, 57.6953125, 68.640625, 79.5859375, 90.53125, 101.4765625, 112.421875, 123.3671875, 134.3125, 145.2578125, 156.203125, 167.1484375, 178.09375, 189.0390625, 199.984375, 210.9296875, 221.875, 232.8203125, 243.765625, 254.7109375, 265.65625, 276.6015625, 287.546875, 298.4921875, 309.4375, 320.3828125, 331.328125, 342.2734375, 353.21875, 364.1640625, 375.109375, 386.0546875, 397.0]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 12.0, 42.0, 171.0, 432.0, 244.0, 62.0, 32.0, 8.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2098.664794921875, -2058.842041015625, -2019.0194091796875, -1979.19677734375, -1939.3740234375, -1899.55126953125, -1859.7286376953125, -1819.906005859375, -1780.083251953125, -1740.260498046875, -1700.4378662109375, -1660.615234375, -1620.79248046875, -1580.9697265625, -1541.1470947265625, -1501.324462890625, -1461.501708984375, -1421.678955078125, -1381.8563232421875, -1342.03369140625, -1302.2109375, -1262.38818359375, -1222.5655517578125, -1182.742919921875, -1142.920166015625, -1103.097412109375, -1063.2747802734375, -1023.4520874023438, -983.62939453125, -943.8067016601562, -903.9840087890625, -864.1613159179688, -824.3387451171875, -784.5160522460938, -744.693359375, -704.8706665039062, -665.0479736328125, -625.2252807617188, -585.402587890625, -545.5798950195312, -505.7572021484375, -465.93450927734375, -426.11181640625, -386.28912353515625, -346.4664306640625, -306.64373779296875, -266.821044921875, -226.99835205078125, -187.1756591796875, -147.35296630859375, -107.5302734375, -67.70758056640625, -27.8848876953125, 11.93780517578125, 51.760498046875, 91.58319091796875, 131.4058837890625, 171.22857666015625, 211.05126953125, 250.87396240234375, 290.6966552734375, 330.51934814453125, 370.342041015625, 410.16473388671875, 449.9874267578125]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 6.0, 5.0, 5.0, 12.0, 14.0, 10.0, 19.0, 12.0, 15.0, 15.0, 22.0, 19.0, 29.0, 31.0, 24.0, 29.0, 34.0, 28.0, 37.0, 24.0, 39.0, 43.0, 43.0, 40.0, 49.0, 42.0, 38.0, 30.0, 23.0, 25.0, 30.0, 28.0, 20.0, 23.0, 20.0, 20.0, 20.0, 14.0, 15.0, 11.0, 9.0, 8.0, 11.0, 2.0, 3.0, 4.0, 3.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-241.13265991210938, -233.33627319335938, -225.53988647460938, -217.7434844970703, -209.9470977783203, -202.1507110595703, -194.35430908203125, -186.55792236328125, -178.76153564453125, -170.96514892578125, -163.16876220703125, -155.3723602294922, -147.5759735107422, -139.7795867919922, -131.98318481445312, -124.18679809570312, -116.39041137695312, -108.59402465820312, -100.7976303100586, -93.00123596191406, -85.20484924316406, -77.40846252441406, -69.61206817626953, -61.815677642822266, -54.019287109375, -46.222896575927734, -38.42650604248047, -30.630115509033203, -22.833724975585938, -15.037334442138672, -7.240943908691406, 0.5554466247558594, 8.351837158203125, 16.14822769165039, 23.944618225097656, 31.741008758544922, 39.53739929199219, 47.33378982543945, 55.13018035888672, 62.926570892333984, 70.72296142578125, 78.51934814453125, 86.31574249267578, 94.11213684082031, 101.90852355957031, 109.70491027832031, 117.50130462646484, 125.29769897460938, 133.09408569335938, 140.89047241210938, 148.68685913085938, 156.48326110839844, 164.27964782714844, 172.07603454589844, 179.8724365234375, 187.6688232421875, 195.4652099609375, 203.2615966796875, 211.0579833984375, 218.85438537597656, 226.65077209472656, 234.44715881347656, 242.24356079101562, 250.03994750976562, 257.8363342285156]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 5.0, 7.0, 6.0, 3.0, 10.0, 11.0, 14.0, 21.0, 25.0, 17.0, 34.0, 22.0, 29.0, 39.0, 47.0, 34.0, 40.0, 35.0, 47.0, 53.0, 48.0, 50.0, 51.0, 40.0, 44.0, 39.0, 44.0, 30.0, 23.0, 26.0, 21.0, 23.0, 19.0, 10.0, 8.0, 8.0, 9.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.90625, -55.16748046875, -53.4287109375, -51.68994140625, -49.951171875, -48.21240234375, -46.4736328125, -44.73486328125, -42.99609375, -41.25732421875, -39.5185546875, -37.77978515625, -36.041015625, -34.30224609375, -32.5634765625, -30.82470703125, -29.0859375, -27.34716796875, -25.6083984375, -23.86962890625, -22.130859375, -20.39208984375, -18.6533203125, -16.91455078125, -15.17578125, -13.43701171875, -11.6982421875, -9.95947265625, -8.220703125, -6.48193359375, -4.7431640625, -3.00439453125, -1.265625, 0.47314453125, 2.2119140625, 3.95068359375, 5.689453125, 7.42822265625, 9.1669921875, 10.90576171875, 12.64453125, 14.38330078125, 16.1220703125, 17.86083984375, 19.599609375, 21.33837890625, 23.0771484375, 24.81591796875, 26.5546875, 28.29345703125, 30.0322265625, 31.77099609375, 33.509765625, 35.24853515625, 36.9873046875, 38.72607421875, 40.46484375, 42.20361328125, 43.9423828125, 45.68115234375, 47.419921875, 49.15869140625, 50.8974609375, 52.63623046875, 54.375]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 0.0, 0.0, 2.0, 3.0, 4.0, 10.0, 11.0, 23.0, 16.0, 36.0, 43.0, 67.0, 110.0, 122.0, 192.0, 285.0, 467.0, 570.0, 806.0, 1172.0, 1794.0, 2637.0, 4010.0, 5900.0, 8961.0, 13862.0, 21106.0, 34053.0, 55921.0, 98944.0, 192478.0, 254435.0, 147231.0, 77120.0, 45509.0, 28007.0, 17709.0, 11597.0, 7676.0, 5041.0, 3422.0, 2262.0, 1501.0, 1089.0, 762.0, 469.0, 332.0, 239.0, 175.0, 116.0, 85.0, 58.0, 44.0, 30.0, 23.0, 11.0, 9.0, 3.0, 3.0, 2.0, 5.0], "bins": [-2.419921875, -2.349365234375, -2.27880859375, -2.208251953125, -2.1376953125, -2.067138671875, -1.99658203125, -1.926025390625, -1.85546875, -1.784912109375, -1.71435546875, -1.643798828125, -1.5732421875, -1.502685546875, -1.43212890625, -1.361572265625, -1.291015625, -1.220458984375, -1.14990234375, -1.079345703125, -1.0087890625, -0.938232421875, -0.86767578125, -0.797119140625, -0.7265625, -0.656005859375, -0.58544921875, -0.514892578125, -0.4443359375, -0.373779296875, -0.30322265625, -0.232666015625, -0.162109375, -0.091552734375, -0.02099609375, 0.049560546875, 0.1201171875, 0.190673828125, 0.26123046875, 0.331787109375, 0.40234375, 0.472900390625, 0.54345703125, 0.614013671875, 0.6845703125, 0.755126953125, 0.82568359375, 0.896240234375, 0.966796875, 1.037353515625, 1.10791015625, 1.178466796875, 1.2490234375, 1.319580078125, 1.39013671875, 1.460693359375, 1.53125, 1.601806640625, 1.67236328125, 1.742919921875, 1.8134765625, 1.884033203125, 1.95458984375, 2.025146484375, 2.095703125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 7.0, 3.0, 7.0, 5.0, 3.0, 6.0, 8.0, 6.0, 11.0, 14.0, 19.0, 29.0, 24.0, 21.0, 22.0, 34.0, 32.0, 38.0, 37.0, 28.0, 53.0, 43.0, 46.0, 1066.0, 46.0, 52.0, 31.0, 45.0, 44.0, 38.0, 41.0, 21.0, 33.0, 22.0, 13.0, 17.0, 9.0, 14.0, 12.0, 5.0, 4.0, 9.0, 4.0, 8.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.3125, -29.2724609375, -28.232421875, -27.1923828125, -26.15234375, -25.1123046875, -24.072265625, -23.0322265625, -21.9921875, -20.9521484375, -19.912109375, -18.8720703125, -17.83203125, -16.7919921875, -15.751953125, -14.7119140625, -13.671875, -12.6318359375, -11.591796875, -10.5517578125, -9.51171875, -8.4716796875, -7.431640625, -6.3916015625, -5.3515625, -4.3115234375, -3.271484375, -2.2314453125, -1.19140625, -0.1513671875, 0.888671875, 1.9287109375, 2.96875, 4.0087890625, 5.048828125, 6.0888671875, 7.12890625, 8.1689453125, 9.208984375, 10.2490234375, 11.2890625, 12.3291015625, 13.369140625, 14.4091796875, 15.44921875, 16.4892578125, 17.529296875, 18.5693359375, 19.609375, 20.6494140625, 21.689453125, 22.7294921875, 23.76953125, 24.8095703125, 25.849609375, 26.8896484375, 27.9296875, 28.9697265625, 30.009765625, 31.0498046875, 32.08984375, 33.1298828125, 34.169921875, 35.2099609375, 36.25]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 7.0, 8.0, 15.0, 17.0, 20.0, 20.0, 39.0, 67.0, 96.0, 161.0, 234.0, 354.0, 519.0, 872.0, 1285.0, 2181.0, 3447.0, 5580.0, 9322.0, 15963.0, 28607.0, 53910.0, 111940.0, 454492.0, 1162338.0, 117721.0, 56335.0, 29961.0, 16694.0, 9747.0, 5699.0, 3511.0, 2170.0, 1349.0, 845.0, 557.0, 344.0, 259.0, 144.0, 111.0, 59.0, 52.0, 33.0, 12.0, 13.0, 11.0, 9.0, 5.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.681640625, -2.59234619140625, -2.5030517578125, -2.41375732421875, -2.324462890625, -2.23516845703125, -2.1458740234375, -2.05657958984375, -1.96728515625, -1.87799072265625, -1.7886962890625, -1.69940185546875, -1.610107421875, -1.52081298828125, -1.4315185546875, -1.34222412109375, -1.2529296875, -1.16363525390625, -1.0743408203125, -0.98504638671875, -0.895751953125, -0.80645751953125, -0.7171630859375, -0.62786865234375, -0.53857421875, -0.44927978515625, -0.3599853515625, -0.27069091796875, -0.181396484375, -0.09210205078125, -0.0028076171875, 0.08648681640625, 0.17578125, 0.26507568359375, 0.3543701171875, 0.44366455078125, 0.532958984375, 0.62225341796875, 0.7115478515625, 0.80084228515625, 0.89013671875, 0.97943115234375, 1.0687255859375, 1.15802001953125, 1.247314453125, 1.33660888671875, 1.4259033203125, 1.51519775390625, 1.6044921875, 1.69378662109375, 1.7830810546875, 1.87237548828125, 1.961669921875, 2.05096435546875, 2.1402587890625, 2.22955322265625, 2.31884765625, 2.40814208984375, 2.4974365234375, 2.58673095703125, 2.676025390625, 2.76531982421875, 2.8546142578125, 2.94390869140625, 3.033203125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 5.0, 3.0, 9.0, 5.0, 8.0, 13.0, 11.0, 16.0, 19.0, 10.0, 16.0, 24.0, 22.0, 28.0, 42.0, 40.0, 58.0, 87.0, 86.0, 114.0, 59.0, 48.0, 46.0, 21.0, 36.0, 26.0, 23.0, 15.0, 15.0, 15.0, 12.0, 16.0, 7.0, 11.0, 6.0, 6.0, 10.0, 4.0, 2.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.031280517578125, -0.03035426139831543, -0.02942800521850586, -0.02850174903869629, -0.02757549285888672, -0.02664923667907715, -0.025722980499267578, -0.024796724319458008, -0.023870468139648438, -0.022944211959838867, -0.022017955780029297, -0.021091699600219727, -0.020165443420410156, -0.019239187240600586, -0.018312931060791016, -0.017386674880981445, -0.016460418701171875, -0.015534162521362305, -0.014607906341552734, -0.013681650161743164, -0.012755393981933594, -0.011829137802124023, -0.010902881622314453, -0.009976625442504883, -0.009050369262695312, -0.008124113082885742, -0.007197856903076172, -0.0062716007232666016, -0.005345344543457031, -0.004419088363647461, -0.0034928321838378906, -0.0025665760040283203, -0.00164031982421875, -0.0007140636444091797, 0.00021219253540039062, 0.001138448715209961, 0.0020647048950195312, 0.0029909610748291016, 0.003917217254638672, 0.004843473434448242, 0.0057697296142578125, 0.006695985794067383, 0.007622241973876953, 0.008548498153686523, 0.009474754333496094, 0.010401010513305664, 0.011327266693115234, 0.012253522872924805, 0.013179779052734375, 0.014106035232543945, 0.015032291412353516, 0.015958547592163086, 0.016884803771972656, 0.017811059951782227, 0.018737316131591797, 0.019663572311401367, 0.020589828491210938, 0.021516084671020508, 0.022442340850830078, 0.02336859703063965, 0.02429485321044922, 0.02522110939025879, 0.02614736557006836, 0.02707362174987793, 0.0279998779296875]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 7.0, 7.0, 8.0, 9.0, 12.0, 13.0, 19.0, 21.0, 21.0, 34.0, 33.0, 42.0, 64.0, 62.0, 95.0, 205.0, 444.0, 5204.0, 1004720.0, 36043.0, 763.0, 214.0, 151.0, 63.0, 67.0, 42.0, 38.0, 25.0, 25.0, 18.0, 20.0, 11.0, 12.0, 11.0, 7.0, 5.0, 7.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5517578125, -0.5325241088867188, -0.5132904052734375, -0.49405670166015625, -0.474822998046875, -0.45558929443359375, -0.4363555908203125, -0.41712188720703125, -0.39788818359375, -0.37865447998046875, -0.3594207763671875, -0.34018707275390625, -0.320953369140625, -0.30171966552734375, -0.2824859619140625, -0.26325225830078125, -0.2440185546875, -0.22478485107421875, -0.2055511474609375, -0.18631744384765625, -0.167083740234375, -0.14785003662109375, -0.1286163330078125, -0.10938262939453125, -0.09014892578125, -0.07091522216796875, -0.0516815185546875, -0.03244781494140625, -0.013214111328125, 0.00601959228515625, 0.0252532958984375, 0.04448699951171875, 0.063720703125, 0.08295440673828125, 0.1021881103515625, 0.12142181396484375, 0.140655517578125, 0.15988922119140625, 0.1791229248046875, 0.19835662841796875, 0.21759033203125, 0.23682403564453125, 0.2560577392578125, 0.27529144287109375, 0.294525146484375, 0.31375885009765625, 0.3329925537109375, 0.35222625732421875, 0.3714599609375, 0.39069366455078125, 0.4099273681640625, 0.42916107177734375, 0.448394775390625, 0.46762847900390625, 0.4868621826171875, 0.5060958862304688, 0.52532958984375, 0.5445632934570312, 0.5637969970703125, 0.5830307006835938, 0.602264404296875, 0.6214981079101562, 0.6407318115234375, 0.6599655151367188, 0.67919921875]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 15.0, 39.0, 150.0, 477.0, 240.0, 64.0, 14.0, 10.0, 2.0, 3.0], "bins": [-0.1085503026843071, -0.10666616261005402, -0.10478202998638153, -0.10289788991212845, -0.10101374983787537, -0.09912961721420288, -0.0972454771399498, -0.09536133706569672, -0.09347720444202423, -0.09159306436777115, -0.08970893174409866, -0.08782479166984558, -0.0859406515955925, -0.08405651897192001, -0.08217237889766693, -0.08028823882341385, -0.07840409874916077, -0.07651995867490768, -0.0746358260512352, -0.07275168597698212, -0.07086754590272903, -0.06898341327905655, -0.06709927320480347, -0.06521513313055038, -0.0633310005068779, -0.061446864157915115, -0.05956272408366203, -0.05767858773469925, -0.055794451385736465, -0.05391031503677368, -0.0520261749625206, -0.050142038613557816, -0.04825790226459503, -0.04637376591563225, -0.044489625841379166, -0.04260548949241638, -0.0407213531434536, -0.038837216794490814, -0.03695307672023773, -0.03506894037127495, -0.033184800297021866, -0.03130066394805908, -0.02941652573645115, -0.027532387524843216, -0.025648251175880432, -0.0237641129642725, -0.021879974752664566, -0.019995838403701782, -0.018111702054739, -0.016227563843131065, -0.014343427494168282, -0.012459289282560349, -0.01057515200227499, -0.008691014721989632, -0.006806876510381699, -0.00492273923009634, -0.003038601018488407, -0.001154463505372405, 0.000729674007743597, 0.0026138117536902428, 0.004497949033975601, 0.00638208631426096, 0.008266224525868893, 0.010150361806154251, 0.01203449908643961]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 3.0, 4.0, 6.0, 10.0, 4.0, 11.0, 15.0, 18.0, 14.0, 23.0, 35.0, 30.0, 26.0, 31.0, 37.0, 37.0, 56.0, 36.0, 43.0, 35.0, 36.0, 45.0, 55.0, 44.0, 44.0, 45.0, 38.0, 37.0, 34.0, 25.0, 31.0, 26.0, 11.0, 11.0, 16.0, 10.0, 2.0, 7.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.02012455463409424, -0.019553914666175842, -0.018983274698257446, -0.01841263473033905, -0.017841994762420654, -0.01727135479450226, -0.016700714826583862, -0.016130074858665466, -0.01555943489074707, -0.014988794922828674, -0.014418154954910278, -0.013847514986991882, -0.013276875019073486, -0.01270623505115509, -0.012135595083236694, -0.011564955115318298, -0.010994315147399902, -0.010423675179481506, -0.00985303521156311, -0.009282395243644714, -0.008711755275726318, -0.008141115307807922, -0.007570475339889526, -0.00699983537197113, -0.006429195404052734, -0.005858555436134338, -0.005287915468215942, -0.004717275500297546, -0.00414663553237915, -0.0035759955644607544, -0.0030053555965423584, -0.0024347156286239624, -0.0018640756607055664, -0.0012934356927871704, -0.0007227957248687744, -0.00015215575695037842, 0.0004184842109680176, 0.0009891241788864136, 0.0015597641468048096, 0.0021304041147232056, 0.0027010440826416016, 0.0032716840505599976, 0.0038423240184783936, 0.0044129639863967896, 0.0049836039543151855, 0.0055542439222335815, 0.0061248838901519775, 0.0066955238580703735, 0.0072661638259887695, 0.007836803793907166, 0.008407443761825562, 0.008978083729743958, 0.009548723697662354, 0.01011936366558075, 0.010690003633499146, 0.011260643601417542, 0.011831283569335938, 0.012401923537254333, 0.01297256350517273, 0.013543203473091125, 0.014113843441009521, 0.014684483408927917, 0.015255123376846313, 0.01582576334476471, 0.016396403312683105]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 5.0, 7.0, 6.0, 3.0, 10.0, 11.0, 14.0, 21.0, 25.0, 17.0, 34.0, 22.0, 29.0, 39.0, 47.0, 34.0, 40.0, 35.0, 47.0, 53.0, 48.0, 50.0, 51.0, 40.0, 44.0, 39.0, 44.0, 30.0, 23.0, 26.0, 21.0, 23.0, 19.0, 10.0, 8.0, 8.0, 9.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.90625, -55.16748046875, -53.4287109375, -51.68994140625, -49.951171875, -48.21240234375, -46.4736328125, -44.73486328125, -42.99609375, -41.25732421875, -39.5185546875, -37.77978515625, -36.041015625, -34.30224609375, -32.5634765625, -30.82470703125, -29.0859375, -27.34716796875, -25.6083984375, -23.86962890625, -22.130859375, -20.39208984375, -18.6533203125, -16.91455078125, -15.17578125, -13.43701171875, -11.6982421875, -9.95947265625, -8.220703125, -6.48193359375, -4.7431640625, -3.00439453125, -1.265625, 0.47314453125, 2.2119140625, 3.95068359375, 5.689453125, 7.42822265625, 9.1669921875, 10.90576171875, 12.64453125, 14.38330078125, 16.1220703125, 17.86083984375, 19.599609375, 21.33837890625, 23.0771484375, 24.81591796875, 26.5546875, 28.29345703125, 30.0322265625, 31.77099609375, 33.509765625, 35.24853515625, 36.9873046875, 38.72607421875, 40.46484375, 42.20361328125, 43.9423828125, 45.68115234375, 47.419921875, 49.15869140625, 50.8974609375, 52.63623046875, 54.375]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 10.0, 7.0, 13.0, 13.0, 19.0, 31.0, 39.0, 50.0, 107.0, 133.0, 231.0, 341.0, 567.0, 886.0, 1512.0, 2651.0, 5064.0, 10963.0, 30424.0, 145961.0, 664028.0, 135106.0, 28556.0, 10308.0, 4963.0, 2665.0, 1542.0, 842.0, 546.0, 326.0, 223.0, 131.0, 96.0, 62.0, 46.0, 27.0, 19.0, 15.0, 8.0, 12.0, 3.0, 4.0, 7.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-57.5625, -55.73388671875, -53.9052734375, -52.07666015625, -50.248046875, -48.41943359375, -46.5908203125, -44.76220703125, -42.93359375, -41.10498046875, -39.2763671875, -37.44775390625, -35.619140625, -33.79052734375, -31.9619140625, -30.13330078125, -28.3046875, -26.47607421875, -24.6474609375, -22.81884765625, -20.990234375, -19.16162109375, -17.3330078125, -15.50439453125, -13.67578125, -11.84716796875, -10.0185546875, -8.18994140625, -6.361328125, -4.53271484375, -2.7041015625, -0.87548828125, 0.953125, 2.78173828125, 4.6103515625, 6.43896484375, 8.267578125, 10.09619140625, 11.9248046875, 13.75341796875, 15.58203125, 17.41064453125, 19.2392578125, 21.06787109375, 22.896484375, 24.72509765625, 26.5537109375, 28.38232421875, 30.2109375, 32.03955078125, 33.8681640625, 35.69677734375, 37.525390625, 39.35400390625, 41.1826171875, 43.01123046875, 44.83984375, 46.66845703125, 48.4970703125, 50.32568359375, 52.154296875, 53.98291015625, 55.8115234375, 57.64013671875, 59.46875]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 5.0, 9.0, 10.0, 11.0, 16.0, 19.0, 19.0, 35.0, 35.0, 49.0, 43.0, 38.0, 48.0, 64.0, 60.0, 1634.0, 470.0, 76.0, 62.0, 42.0, 39.0, 36.0, 38.0, 32.0, 24.0, 29.0, 21.0, 22.0, 9.0, 13.0, 8.0, 5.0, 13.0, 3.0, 4.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-176.875, -171.5078125, -166.140625, -160.7734375, -155.40625, -150.0390625, -144.671875, -139.3046875, -133.9375, -128.5703125, -123.203125, -117.8359375, -112.46875, -107.1015625, -101.734375, -96.3671875, -91.0, -85.6328125, -80.265625, -74.8984375, -69.53125, -64.1640625, -58.796875, -53.4296875, -48.0625, -42.6953125, -37.328125, -31.9609375, -26.59375, -21.2265625, -15.859375, -10.4921875, -5.125, 0.2421875, 5.609375, 10.9765625, 16.34375, 21.7109375, 27.078125, 32.4453125, 37.8125, 43.1796875, 48.546875, 53.9140625, 59.28125, 64.6484375, 70.015625, 75.3828125, 80.75, 86.1171875, 91.484375, 96.8515625, 102.21875, 107.5859375, 112.953125, 118.3203125, 123.6875, 129.0546875, 134.421875, 139.7890625, 145.15625, 150.5234375, 155.890625, 161.2578125, 166.625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 6.0, 10.0, 6.0, 14.0, 20.0, 9.0, 28.0, 33.0, 44.0, 53.0, 72.0, 92.0, 124.0, 213.0, 301.0, 878.0, 4950.0, 135212.0, 2989291.0, 11711.0, 1409.0, 446.0, 229.0, 140.0, 125.0, 79.0, 62.0, 38.0, 26.0, 20.0, 17.0, 12.0, 10.0, 9.0, 6.0, 6.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-279.0, -270.05078125, -261.1015625, -252.15234375, -243.203125, -234.25390625, -225.3046875, -216.35546875, -207.40625, -198.45703125, -189.5078125, -180.55859375, -171.609375, -162.66015625, -153.7109375, -144.76171875, -135.8125, -126.86328125, -117.9140625, -108.96484375, -100.015625, -91.06640625, -82.1171875, -73.16796875, -64.21875, -55.26953125, -46.3203125, -37.37109375, -28.421875, -19.47265625, -10.5234375, -1.57421875, 7.375, 16.32421875, 25.2734375, 34.22265625, 43.171875, 52.12109375, 61.0703125, 70.01953125, 78.96875, 87.91796875, 96.8671875, 105.81640625, 114.765625, 123.71484375, 132.6640625, 141.61328125, 150.5625, 159.51171875, 168.4609375, 177.41015625, 186.359375, 195.30859375, 204.2578125, 213.20703125, 222.15625, 231.10546875, 240.0546875, 249.00390625, 257.953125, 266.90234375, 275.8515625, 284.80078125, 293.75]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 191.0, 799.0, 24.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2496.6630859375, -2453.805908203125, -2410.94873046875, -2368.091552734375, -2325.234130859375, -2282.376953125, -2239.519775390625, -2196.66259765625, -2153.805419921875, -2110.9482421875, -2068.091064453125, -2025.2337646484375, -1982.3765869140625, -1939.5194091796875, -1896.662109375, -1853.804931640625, -1810.94775390625, -1768.090576171875, -1725.2332763671875, -1682.3760986328125, -1639.5189208984375, -1596.6617431640625, -1553.804443359375, -1510.947265625, -1468.0899658203125, -1425.2327880859375, -1382.37548828125, -1339.518310546875, -1296.6611328125, -1253.803955078125, -1210.9466552734375, -1168.0894775390625, -1125.232177734375, -1082.375, -1039.5177001953125, -996.6605224609375, -953.8033447265625, -910.9461059570312, -868.0888671875, -825.231689453125, -782.37451171875, -739.5172729492188, -696.6600952148438, -653.8028564453125, -610.9456787109375, -568.0884399414062, -525.231201171875, -482.3740234375, -439.5168151855469, -396.65960693359375, -353.8023986816406, -310.9451904296875, -268.08795166015625, -225.2307586669922, -182.37353515625, -139.51632690429688, -96.65911865234375, -53.80190658569336, -10.944694519042969, 31.912521362304688, 74.76972961425781, 117.62693786621094, 160.48416137695312, 203.34136962890625, 246.19857788085938]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 5.0, 5.0, 7.0, 7.0, 12.0, 19.0, 13.0, 12.0, 19.0, 19.0, 22.0, 29.0, 28.0, 20.0, 34.0, 38.0, 49.0, 46.0, 31.0, 54.0, 48.0, 34.0, 49.0, 47.0, 44.0, 28.0, 25.0, 33.0, 38.0, 31.0, 36.0, 22.0, 21.0, 16.0, 12.0, 13.0, 12.0, 8.0, 7.0, 9.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-351.8155212402344, -339.8583984375, -327.9012451171875, -315.9441223144531, -303.98699951171875, -292.0298767089844, -280.07275390625, -268.1156005859375, -256.1584777832031, -244.20135498046875, -232.2442169189453, -220.28707885742188, -208.3299560546875, -196.37283325195312, -184.4156951904297, -172.45855712890625, -160.50143432617188, -148.5443115234375, -136.58717346191406, -124.63004302978516, -112.67291259765625, -100.71578216552734, -88.75865173339844, -76.80152130126953, -64.84439086914062, -52.88726043701172, -40.93013000488281, -28.972999572753906, -17.015869140625, -5.058738708496094, 6.8983917236328125, 18.85552215576172, 30.8126220703125, 42.769752502441406, 54.72688293457031, 66.68401336669922, 78.64114379882812, 90.59827423095703, 102.55540466308594, 114.51253509521484, 126.46966552734375, 138.42678833007812, 150.38392639160156, 162.341064453125, 174.29818725585938, 186.25531005859375, 198.2124481201172, 210.16958618164062, 222.126708984375, 234.08383178710938, 246.0409698486328, 257.99810791015625, 269.9552307128906, 281.912353515625, 293.8695068359375, 305.8266296386719, 317.78375244140625, 329.7408752441406, 341.697998046875, 353.6551513671875, 365.6122741699219, 377.56939697265625, 389.52655029296875, 401.4836730957031, 413.4407958984375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 6.0, 4.0, 1.0, 6.0, 5.0, 11.0, 11.0, 16.0, 21.0, 19.0, 20.0, 26.0, 31.0, 25.0, 33.0, 38.0, 34.0, 45.0, 36.0, 41.0, 48.0, 46.0, 49.0, 45.0, 45.0, 38.0, 39.0, 49.0, 36.0, 30.0, 25.0, 16.0, 28.0, 22.0, 20.0, 9.0, 5.0, 10.0, 5.0, 0.0, 5.0, 2.0, 1.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-55.65625, -53.9814453125, -52.306640625, -50.6318359375, -48.95703125, -47.2822265625, -45.607421875, -43.9326171875, -42.2578125, -40.5830078125, -38.908203125, -37.2333984375, -35.55859375, -33.8837890625, -32.208984375, -30.5341796875, -28.859375, -27.1845703125, -25.509765625, -23.8349609375, -22.16015625, -20.4853515625, -18.810546875, -17.1357421875, -15.4609375, -13.7861328125, -12.111328125, -10.4365234375, -8.76171875, -7.0869140625, -5.412109375, -3.7373046875, -2.0625, -0.3876953125, 1.287109375, 2.9619140625, 4.63671875, 6.3115234375, 7.986328125, 9.6611328125, 11.3359375, 13.0107421875, 14.685546875, 16.3603515625, 18.03515625, 19.7099609375, 21.384765625, 23.0595703125, 24.734375, 26.4091796875, 28.083984375, 29.7587890625, 31.43359375, 33.1083984375, 34.783203125, 36.4580078125, 38.1328125, 39.8076171875, 41.482421875, 43.1572265625, 44.83203125, 46.5068359375, 48.181640625, 49.8564453125, 51.53125]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 6.0, 5.0, 7.0, 11.0, 16.0, 23.0, 21.0, 39.0, 50.0, 69.0, 109.0, 126.0, 165.0, 275.0, 383.0, 531.0, 787.0, 1102.0, 1782.0, 2826.0, 5055.0, 10264.0, 31124.0, 337426.0, 3338372.0, 403824.0, 35804.0, 10817.0, 5112.0, 2852.0, 1743.0, 1086.0, 764.0, 520.0, 360.0, 235.0, 203.0, 120.0, 82.0, 66.0, 40.0, 23.0, 19.0, 12.0, 10.0, 4.0, 11.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-122.1875, -118.4306640625, -114.673828125, -110.9169921875, -107.16015625, -103.4033203125, -99.646484375, -95.8896484375, -92.1328125, -88.3759765625, -84.619140625, -80.8623046875, -77.10546875, -73.3486328125, -69.591796875, -65.8349609375, -62.078125, -58.3212890625, -54.564453125, -50.8076171875, -47.05078125, -43.2939453125, -39.537109375, -35.7802734375, -32.0234375, -28.2666015625, -24.509765625, -20.7529296875, -16.99609375, -13.2392578125, -9.482421875, -5.7255859375, -1.96875, 1.7880859375, 5.544921875, 9.3017578125, 13.05859375, 16.8154296875, 20.572265625, 24.3291015625, 28.0859375, 31.8427734375, 35.599609375, 39.3564453125, 43.11328125, 46.8701171875, 50.626953125, 54.3837890625, 58.140625, 61.8974609375, 65.654296875, 69.4111328125, 73.16796875, 76.9248046875, 80.681640625, 84.4384765625, 88.1953125, 91.9521484375, 95.708984375, 99.4658203125, 103.22265625, 106.9794921875, 110.736328125, 114.4931640625, 118.25]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 5.0, 1.0, 5.0, 6.0, 4.0, 5.0, 8.0, 8.0, 9.0, 6.0, 10.0, 25.0, 31.0, 41.0, 43.0, 85.0, 139.0, 309.0, 540.0, 969.0, 781.0, 432.0, 259.0, 118.0, 66.0, 30.0, 38.0, 22.0, 17.0, 13.0, 17.0, 8.0, 12.0, 6.0, 5.0, 1.0, 2.0, 4.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-93.5625, -90.6171875, -87.671875, -84.7265625, -81.78125, -78.8359375, -75.890625, -72.9453125, -70.0, -67.0546875, -64.109375, -61.1640625, -58.21875, -55.2734375, -52.328125, -49.3828125, -46.4375, -43.4921875, -40.546875, -37.6015625, -34.65625, -31.7109375, -28.765625, -25.8203125, -22.875, -19.9296875, -16.984375, -14.0390625, -11.09375, -8.1484375, -5.203125, -2.2578125, 0.6875, 3.6328125, 6.578125, 9.5234375, 12.46875, 15.4140625, 18.359375, 21.3046875, 24.25, 27.1953125, 30.140625, 33.0859375, 36.03125, 38.9765625, 41.921875, 44.8671875, 47.8125, 50.7578125, 53.703125, 56.6484375, 59.59375, 62.5390625, 65.484375, 68.4296875, 71.375, 74.3203125, 77.265625, 80.2109375, 83.15625, 86.1015625, 89.046875, 91.9921875, 94.9375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 4.0, 6.0, 5.0, 12.0, 14.0, 17.0, 18.0, 29.0, 40.0, 58.0, 96.0, 176.0, 336.0, 809.0, 2119.0, 6722.0, 25783.0, 247471.0, 3804286.0, 83789.0, 15370.0, 4408.0, 1487.0, 544.0, 275.0, 155.0, 82.0, 55.0, 28.0, 20.0, 16.0, 17.0, 12.0, 5.0, 8.0, 7.0, 2.0, 4.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-275.5, -266.9765625, -258.453125, -249.9296875, -241.40625, -232.8828125, -224.359375, -215.8359375, -207.3125, -198.7890625, -190.265625, -181.7421875, -173.21875, -164.6953125, -156.171875, -147.6484375, -139.125, -130.6015625, -122.078125, -113.5546875, -105.03125, -96.5078125, -87.984375, -79.4609375, -70.9375, -62.4140625, -53.890625, -45.3671875, -36.84375, -28.3203125, -19.796875, -11.2734375, -2.75, 5.7734375, 14.296875, 22.8203125, 31.34375, 39.8671875, 48.390625, 56.9140625, 65.4375, 73.9609375, 82.484375, 91.0078125, 99.53125, 108.0546875, 116.578125, 125.1015625, 133.625, 142.1484375, 150.671875, 159.1953125, 167.71875, 176.2421875, 184.765625, 193.2890625, 201.8125, 210.3359375, 218.859375, 227.3828125, 235.90625, 244.4296875, 252.953125, 261.4765625, 270.0]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 5.0, 9.0, 11.0, 19.0, 44.0, 76.0, 149.0, 284.0, 194.0, 110.0, 58.0, 27.0, 11.0, 11.0, 2.0, 2.0, 1.0, 4.0], "bins": [-1070.8450927734375, -1050.9007568359375, -1030.956298828125, -1011.011962890625, -991.0675659179688, -971.1232299804688, -951.1788330078125, -931.2344970703125, -911.2901000976562, -891.345703125, -871.4013671875, -851.4569702148438, -831.5125732421875, -811.5682373046875, -791.6238403320312, -771.679443359375, -751.735107421875, -731.7907104492188, -711.8463745117188, -691.9019775390625, -671.9575805664062, -652.0132446289062, -632.06884765625, -612.12451171875, -592.1800537109375, -572.2356567382812, -552.2913208007812, -532.346923828125, -512.4025268554688, -492.45819091796875, -472.5137939453125, -452.5694274902344, -432.62506103515625, -412.6806945800781, -392.7362976074219, -372.79193115234375, -352.8475646972656, -332.9031982421875, -312.95880126953125, -293.0144348144531, -273.0700378417969, -253.1256561279297, -233.18128967285156, -213.23690795898438, -193.29254150390625, -173.34815979003906, -153.40377807617188, -133.45941162109375, -113.51502990722656, -93.5706558227539, -73.62628173828125, -53.68190002441406, -33.737525939941406, -13.79315185546875, 6.1512298583984375, 26.095596313476562, 46.03997802734375, 65.9843521118164, 85.92872619628906, 105.87310791015625, 125.8174819946289, 145.76185607910156, 165.70623779296875, 185.65060424804688, 205.59498596191406]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 7.0, 5.0, 4.0, 2.0, 6.0, 4.0, 9.0, 8.0, 13.0, 13.0, 17.0, 23.0, 20.0, 25.0, 18.0, 26.0, 35.0, 26.0, 24.0, 38.0, 46.0, 34.0, 48.0, 39.0, 38.0, 35.0, 39.0, 32.0, 37.0, 44.0, 37.0, 35.0, 29.0, 36.0, 22.0, 17.0, 20.0, 8.0, 14.0, 15.0, 14.0, 12.0, 9.0, 3.0, 10.0, 4.0, 5.0, 5.0, 5.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-234.14645385742188, -227.0242462158203, -219.90203857421875, -212.7798309326172, -205.65762329101562, -198.535400390625, -191.41319274902344, -184.29098510742188, -177.1687774658203, -170.04656982421875, -162.9243621826172, -155.80215454101562, -148.679931640625, -141.5577392578125, -134.43551635742188, -127.31330871582031, -120.19110107421875, -113.06889343261719, -105.94668579101562, -98.82447052001953, -91.70226287841797, -84.5800552368164, -77.45783996582031, -70.33563232421875, -63.21342468261719, -56.091217041015625, -48.9690055847168, -41.84679412841797, -34.724586486816406, -27.602378845214844, -20.480167388916016, -13.357955932617188, -6.235748291015625, 0.8864612579345703, 8.008670806884766, 15.130880355834961, 22.253089904785156, 29.37529754638672, 36.49750900268555, 43.619720458984375, 50.74192810058594, 57.8641357421875, 64.98634338378906, 72.10855865478516, 79.23076629638672, 86.35297393798828, 93.47518920898438, 100.59739685058594, 107.7196044921875, 114.84181213378906, 121.96401977539062, 129.0862274169922, 136.20843505859375, 143.33065795898438, 150.45286560058594, 157.5750732421875, 164.69728088378906, 171.81948852539062, 178.9416961669922, 186.06390380859375, 193.18612670898438, 200.30831909179688, 207.4305419921875, 214.55274963378906, 221.67495727539062]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 4.0, 3.0, 4.0, 6.0, 5.0, 8.0, 15.0, 14.0, 12.0, 17.0, 21.0, 17.0, 36.0, 30.0, 31.0, 31.0, 33.0, 36.0, 45.0, 40.0, 50.0, 40.0, 48.0, 38.0, 30.0, 44.0, 39.0, 42.0, 45.0, 35.0, 27.0, 29.0, 23.0, 20.0, 17.0, 19.0, 9.0, 8.0, 5.0, 11.0, 3.0, 4.0, 1.0, 2.0, 2.0, 5.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.46875, -45.82470703125, -44.1806640625, -42.53662109375, -40.892578125, -39.24853515625, -37.6044921875, -35.96044921875, -34.31640625, -32.67236328125, -31.0283203125, -29.38427734375, -27.740234375, -26.09619140625, -24.4521484375, -22.80810546875, -21.1640625, -19.52001953125, -17.8759765625, -16.23193359375, -14.587890625, -12.94384765625, -11.2998046875, -9.65576171875, -8.01171875, -6.36767578125, -4.7236328125, -3.07958984375, -1.435546875, 0.20849609375, 1.8525390625, 3.49658203125, 5.140625, 6.78466796875, 8.4287109375, 10.07275390625, 11.716796875, 13.36083984375, 15.0048828125, 16.64892578125, 18.29296875, 19.93701171875, 21.5810546875, 23.22509765625, 24.869140625, 26.51318359375, 28.1572265625, 29.80126953125, 31.4453125, 33.08935546875, 34.7333984375, 36.37744140625, 38.021484375, 39.66552734375, 41.3095703125, 42.95361328125, 44.59765625, 46.24169921875, 47.8857421875, 49.52978515625, 51.173828125, 52.81787109375, 54.4619140625, 56.10595703125, 57.75]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 9.0, 18.0, 36.0, 48.0, 72.0, 104.0, 167.0, 232.0, 340.0, 512.0, 720.0, 1145.0, 1811.0, 2706.0, 4319.0, 6952.0, 11433.0, 19601.0, 33986.0, 63927.0, 132806.0, 293956.0, 242501.0, 105785.0, 53000.0, 28921.0, 16724.0, 10051.0, 5968.0, 3825.0, 2327.0, 1580.0, 1026.0, 636.0, 454.0, 265.0, 190.0, 118.0, 101.0, 64.0, 39.0, 27.0, 23.0, 8.0, 6.0, 2.0, 8.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.998046875, -2.90679931640625, -2.8155517578125, -2.72430419921875, -2.633056640625, -2.54180908203125, -2.4505615234375, -2.35931396484375, -2.26806640625, -2.17681884765625, -2.0855712890625, -1.99432373046875, -1.903076171875, -1.81182861328125, -1.7205810546875, -1.62933349609375, -1.5380859375, -1.44683837890625, -1.3555908203125, -1.26434326171875, -1.173095703125, -1.08184814453125, -0.9906005859375, -0.89935302734375, -0.80810546875, -0.71685791015625, -0.6256103515625, -0.53436279296875, -0.443115234375, -0.35186767578125, -0.2606201171875, -0.16937255859375, -0.078125, 0.01312255859375, 0.1043701171875, 0.19561767578125, 0.286865234375, 0.37811279296875, 0.4693603515625, 0.56060791015625, 0.65185546875, 0.74310302734375, 0.8343505859375, 0.92559814453125, 1.016845703125, 1.10809326171875, 1.1993408203125, 1.29058837890625, 1.3818359375, 1.47308349609375, 1.5643310546875, 1.65557861328125, 1.746826171875, 1.83807373046875, 1.9293212890625, 2.02056884765625, 2.11181640625, 2.20306396484375, 2.2943115234375, 2.38555908203125, 2.476806640625, 2.56805419921875, 2.6593017578125, 2.75054931640625, 2.841796875]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 5.0, 5.0, 2.0, 9.0, 9.0, 13.0, 14.0, 19.0, 14.0, 16.0, 21.0, 21.0, 27.0, 29.0, 36.0, 35.0, 28.0, 36.0, 30.0, 46.0, 48.0, 35.0, 1056.0, 29.0, 42.0, 45.0, 47.0, 29.0, 33.0, 28.0, 32.0, 23.0, 28.0, 14.0, 21.0, 12.0, 15.0, 16.0, 15.0, 7.0, 3.0, 16.0, 6.0, 3.0, 3.0, 3.0, 6.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-34.78125, -33.71728515625, -32.6533203125, -31.58935546875, -30.525390625, -29.46142578125, -28.3974609375, -27.33349609375, -26.26953125, -25.20556640625, -24.1416015625, -23.07763671875, -22.013671875, -20.94970703125, -19.8857421875, -18.82177734375, -17.7578125, -16.69384765625, -15.6298828125, -14.56591796875, -13.501953125, -12.43798828125, -11.3740234375, -10.31005859375, -9.24609375, -8.18212890625, -7.1181640625, -6.05419921875, -4.990234375, -3.92626953125, -2.8623046875, -1.79833984375, -0.734375, 0.32958984375, 1.3935546875, 2.45751953125, 3.521484375, 4.58544921875, 5.6494140625, 6.71337890625, 7.77734375, 8.84130859375, 9.9052734375, 10.96923828125, 12.033203125, 13.09716796875, 14.1611328125, 15.22509765625, 16.2890625, 17.35302734375, 18.4169921875, 19.48095703125, 20.544921875, 21.60888671875, 22.6728515625, 23.73681640625, 24.80078125, 25.86474609375, 26.9287109375, 27.99267578125, 29.056640625, 30.12060546875, 31.1845703125, 32.24853515625, 33.3125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 9.0, 9.0, 14.0, 25.0, 19.0, 48.0, 69.0, 119.0, 131.0, 251.0, 368.0, 509.0, 863.0, 1291.0, 2069.0, 3255.0, 5114.0, 8301.0, 13603.0, 22026.0, 37482.0, 68435.0, 137980.0, 1341717.0, 229912.0, 96691.0, 51260.0, 29383.0, 17610.0, 10596.0, 6659.0, 4111.0, 2565.0, 1656.0, 1050.0, 637.0, 441.0, 276.0, 202.0, 126.0, 73.0, 59.0, 37.0, 28.0, 18.0, 14.0, 12.0, 5.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-2.876953125, -2.786376953125, -2.69580078125, -2.605224609375, -2.5146484375, -2.424072265625, -2.33349609375, -2.242919921875, -2.15234375, -2.061767578125, -1.97119140625, -1.880615234375, -1.7900390625, -1.699462890625, -1.60888671875, -1.518310546875, -1.427734375, -1.337158203125, -1.24658203125, -1.156005859375, -1.0654296875, -0.974853515625, -0.88427734375, -0.793701171875, -0.703125, -0.612548828125, -0.52197265625, -0.431396484375, -0.3408203125, -0.250244140625, -0.15966796875, -0.069091796875, 0.021484375, 0.112060546875, 0.20263671875, 0.293212890625, 0.3837890625, 0.474365234375, 0.56494140625, 0.655517578125, 0.74609375, 0.836669921875, 0.92724609375, 1.017822265625, 1.1083984375, 1.198974609375, 1.28955078125, 1.380126953125, 1.470703125, 1.561279296875, 1.65185546875, 1.742431640625, 1.8330078125, 1.923583984375, 2.01416015625, 2.104736328125, 2.1953125, 2.285888671875, 2.37646484375, 2.467041015625, 2.5576171875, 2.648193359375, 2.73876953125, 2.829345703125, 2.919921875]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 5.0, 6.0, 7.0, 7.0, 5.0, 14.0, 18.0, 26.0, 18.0, 30.0, 31.0, 45.0, 69.0, 90.0, 154.0, 144.0, 73.0, 60.0, 50.0, 36.0, 18.0, 20.0, 21.0, 5.0, 11.0, 9.0, 7.0, 4.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.051971435546875, -0.05039548873901367, -0.048819541931152344, -0.047243595123291016, -0.04566764831542969, -0.04409170150756836, -0.04251575469970703, -0.0409398078918457, -0.039363861083984375, -0.03778791427612305, -0.03621196746826172, -0.03463602066040039, -0.03306007385253906, -0.031484127044677734, -0.029908180236816406, -0.028332233428955078, -0.02675628662109375, -0.025180339813232422, -0.023604393005371094, -0.022028446197509766, -0.020452499389648438, -0.01887655258178711, -0.01730060577392578, -0.015724658966064453, -0.014148712158203125, -0.012572765350341797, -0.010996818542480469, -0.00942087173461914, -0.007844924926757812, -0.006268978118896484, -0.004693031311035156, -0.003117084503173828, -0.0015411376953125, 3.4809112548828125e-05, 0.0016107559204101562, 0.0031867027282714844, 0.0047626495361328125, 0.006338596343994141, 0.007914543151855469, 0.009490489959716797, 0.011066436767578125, 0.012642383575439453, 0.014218330383300781, 0.01579427719116211, 0.017370223999023438, 0.018946170806884766, 0.020522117614746094, 0.022098064422607422, 0.02367401123046875, 0.025249958038330078, 0.026825904846191406, 0.028401851654052734, 0.029977798461914062, 0.03155374526977539, 0.03312969207763672, 0.03470563888549805, 0.036281585693359375, 0.0378575325012207, 0.03943347930908203, 0.04100942611694336, 0.04258537292480469, 0.044161319732666016, 0.045737266540527344, 0.04731321334838867, 0.04888916015625]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 9.0, 7.0, 4.0, 7.0, 12.0, 19.0, 22.0, 35.0, 33.0, 51.0, 106.0, 168.0, 425.0, 6970.0, 1029586.0, 10066.0, 470.0, 191.0, 95.0, 83.0, 44.0, 30.0, 36.0, 23.0, 12.0, 14.0, 9.0, 6.0, 6.0, 4.0, 3.0, 3.0, 2.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.96240234375, -0.9308853149414062, -0.8993682861328125, -0.8678512573242188, -0.836334228515625, -0.8048171997070312, -0.7733001708984375, -0.7417831420898438, -0.71026611328125, -0.6787490844726562, -0.6472320556640625, -0.6157150268554688, -0.584197998046875, -0.5526809692382812, -0.5211639404296875, -0.48964691162109375, -0.4581298828125, -0.42661285400390625, -0.3950958251953125, -0.36357879638671875, -0.332061767578125, -0.30054473876953125, -0.2690277099609375, -0.23751068115234375, -0.20599365234375, -0.17447662353515625, -0.1429595947265625, -0.11144256591796875, -0.079925537109375, -0.04840850830078125, -0.0168914794921875, 0.01462554931640625, 0.046142578125, 0.07765960693359375, 0.1091766357421875, 0.14069366455078125, 0.172210693359375, 0.20372772216796875, 0.2352447509765625, 0.26676177978515625, 0.29827880859375, 0.32979583740234375, 0.3613128662109375, 0.39282989501953125, 0.424346923828125, 0.45586395263671875, 0.4873809814453125, 0.5188980102539062, 0.5504150390625, 0.5819320678710938, 0.6134490966796875, 0.6449661254882812, 0.676483154296875, 0.7080001831054688, 0.7395172119140625, 0.7710342407226562, 0.80255126953125, 0.8340682983398438, 0.8655853271484375, 0.8971023559570312, 0.928619384765625, 0.9601364135742188, 0.9916534423828125, 1.0231704711914062, 1.0546875]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 13.0, 24.0, 55.0, 163.0, 377.0, 239.0, 95.0, 29.0, 6.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.025633031502366066, -0.023612642660737038, -0.02159225568175316, -0.01957186684012413, -0.017551477998495102, -0.015531089156866074, -0.01351070124655962, -0.011490313336253166, -0.009469924494624138, -0.007449536118656397, -0.005429147742688656, -0.003408759366720915, -0.0013883709907531738, 0.0006320178508758545, 0.002652405761182308, 0.004672793671488762, 0.00669318251311779, 0.008713571354746819, 0.010733959265053272, 0.012754347175359726, 0.014774736016988754, 0.016795124858617783, 0.01881551370024681, 0.02083590067923069, 0.02285628952085972, 0.024876678362488747, 0.026897065341472626, 0.028917454183101654, 0.030937843024730682, 0.03295823186635971, 0.03497862070798874, 0.03699900954961777, 0.0390193909406662, 0.04103977978229523, 0.043060168623924255, 0.045080557465553284, 0.04710094630718231, 0.04912133514881134, 0.05114172026515007, 0.0531621091067791, 0.05518249794840813, 0.057202886790037155, 0.059223275631666183, 0.06124366447329521, 0.06326404958963394, 0.06528443843126297, 0.067304827272892, 0.06932521611452103, 0.07134560495615005, 0.07336599379777908, 0.07538638263940811, 0.07740677148103714, 0.07942716032266617, 0.0814475491642952, 0.08346793800592422, 0.08548831939697266, 0.08750870823860168, 0.08952909708023071, 0.09154948592185974, 0.09356987476348877, 0.0955902636051178, 0.09761065244674683, 0.09963104128837585, 0.10165143013000488, 0.10367181897163391]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 3.0, 6.0, 8.0, 14.0, 11.0, 14.0, 13.0, 24.0, 16.0, 20.0, 32.0, 36.0, 32.0, 45.0, 33.0, 27.0, 56.0, 56.0, 47.0, 49.0, 54.0, 51.0, 40.0, 43.0, 42.0, 32.0, 35.0, 21.0, 25.0, 23.0, 18.0, 24.0, 9.0, 10.0, 14.0, 6.0, 12.0, 3.0, 1.0, 2.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.021982908248901367, -0.02121187001466751, -0.020440833643078804, -0.019669797271490097, -0.01889875903725624, -0.018127720803022385, -0.017356684431433678, -0.01658564805984497, -0.015814609825611115, -0.015043572522699833, -0.014272535219788551, -0.01350149791687727, -0.012730460613965988, -0.011959423311054707, -0.011188386008143425, -0.010417348705232143, -0.009646311402320862, -0.00887527409940958, -0.008104236796498299, -0.007333199493587017, -0.0065621621906757355, -0.005791124887764454, -0.005020087584853172, -0.004249050281941891, -0.003478012979030609, -0.0027069756761193275, -0.001935938373208046, -0.0011649010702967644, -0.0003938637673854828, 0.0003771735355257988, 0.0011482108384370804, 0.001919248141348362, 0.0026902854442596436, 0.003461322747170925, 0.004232360050082207, 0.005003397352993488, 0.00577443465590477, 0.0065454719588160515, 0.007316509261727333, 0.008087546564638615, 0.008858583867549896, 0.009629621170461178, 0.01040065847337246, 0.011171695776283741, 0.011942733079195023, 0.012713770382106304, 0.013484807685017586, 0.014255844987928867, 0.015026882290840149, 0.015797920525074005, 0.016568956896662712, 0.01733999326825142, 0.018111031502485275, 0.01888206973671913, 0.01965310610830784, 0.020424142479896545, 0.0211951807141304, 0.021966218948364258, 0.022737255319952965, 0.023508291691541672, 0.024279329925775528, 0.025050368160009384, 0.02582140453159809, 0.026592440903186798, 0.027363479137420654]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 4.0, 3.0, 4.0, 6.0, 5.0, 8.0, 16.0, 13.0, 12.0, 17.0, 21.0, 17.0, 36.0, 30.0, 31.0, 31.0, 33.0, 36.0, 45.0, 40.0, 50.0, 40.0, 48.0, 38.0, 30.0, 44.0, 39.0, 42.0, 45.0, 35.0, 27.0, 29.0, 23.0, 20.0, 17.0, 19.0, 9.0, 8.0, 5.0, 11.0, 3.0, 4.0, 1.0, 2.0, 2.0, 5.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.46875, -45.82470703125, -44.1806640625, -42.53662109375, -40.892578125, -39.24853515625, -37.6044921875, -35.96044921875, -34.31640625, -32.67236328125, -31.0283203125, -29.38427734375, -27.740234375, -26.09619140625, -24.4521484375, -22.80810546875, -21.1640625, -19.52001953125, -17.8759765625, -16.23193359375, -14.587890625, -12.94384765625, -11.2998046875, -9.65576171875, -8.01171875, -6.36767578125, -4.7236328125, -3.07958984375, -1.435546875, 0.20849609375, 1.8525390625, 3.49658203125, 5.140625, 6.78466796875, 8.4287109375, 10.07275390625, 11.716796875, 13.36083984375, 15.0048828125, 16.64892578125, 18.29296875, 19.93701171875, 21.5810546875, 23.22509765625, 24.869140625, 26.51318359375, 28.1572265625, 29.80126953125, 31.4453125, 33.08935546875, 34.7333984375, 36.37744140625, 38.021484375, 39.66552734375, 41.3095703125, 42.95361328125, 44.59765625, 46.24169921875, 47.8857421875, 49.52978515625, 51.173828125, 52.81787109375, 54.4619140625, 56.10595703125, 57.75]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 3.0, 5.0, 3.0, 4.0, 10.0, 4.0, 11.0, 20.0, 15.0, 20.0, 27.0, 43.0, 39.0, 75.0, 85.0, 126.0, 181.0, 243.0, 467.0, 867.0, 2772.0, 19668.0, 748100.0, 261340.0, 10523.0, 1939.0, 729.0, 361.0, 250.0, 165.0, 117.0, 75.0, 62.0, 54.0, 33.0, 26.0, 26.0, 14.0, 13.0, 11.0, 12.0, 5.0, 5.0, 1.0, 2.0, 2.0, 5.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.4375, -92.1318359375, -88.826171875, -85.5205078125, -82.21484375, -78.9091796875, -75.603515625, -72.2978515625, -68.9921875, -65.6865234375, -62.380859375, -59.0751953125, -55.76953125, -52.4638671875, -49.158203125, -45.8525390625, -42.546875, -39.2412109375, -35.935546875, -32.6298828125, -29.32421875, -26.0185546875, -22.712890625, -19.4072265625, -16.1015625, -12.7958984375, -9.490234375, -6.1845703125, -2.87890625, 0.4267578125, 3.732421875, 7.0380859375, 10.34375, 13.6494140625, 16.955078125, 20.2607421875, 23.56640625, 26.8720703125, 30.177734375, 33.4833984375, 36.7890625, 40.0947265625, 43.400390625, 46.7060546875, 50.01171875, 53.3173828125, 56.623046875, 59.9287109375, 63.234375, 66.5400390625, 69.845703125, 73.1513671875, 76.45703125, 79.7626953125, 83.068359375, 86.3740234375, 89.6796875, 92.9853515625, 96.291015625, 99.5966796875, 102.90234375, 106.2080078125, 109.513671875, 112.8193359375, 116.125]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 6.0, 6.0, 10.0, 8.0, 11.0, 14.0, 15.0, 22.0, 18.0, 17.0, 28.0, 23.0, 30.0, 29.0, 30.0, 33.0, 32.0, 33.0, 54.0, 161.0, 1868.0, 102.0, 60.0, 44.0, 51.0, 38.0, 42.0, 34.0, 22.0, 23.0, 21.0, 23.0, 23.0, 27.0, 11.0, 11.0, 14.0, 5.0, 8.0, 10.0, 12.0, 7.0, 2.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0], "bins": [-123.1875, -119.1416015625, -115.095703125, -111.0498046875, -107.00390625, -102.9580078125, -98.912109375, -94.8662109375, -90.8203125, -86.7744140625, -82.728515625, -78.6826171875, -74.63671875, -70.5908203125, -66.544921875, -62.4990234375, -58.453125, -54.4072265625, -50.361328125, -46.3154296875, -42.26953125, -38.2236328125, -34.177734375, -30.1318359375, -26.0859375, -22.0400390625, -17.994140625, -13.9482421875, -9.90234375, -5.8564453125, -1.810546875, 2.2353515625, 6.28125, 10.3271484375, 14.373046875, 18.4189453125, 22.46484375, 26.5107421875, 30.556640625, 34.6025390625, 38.6484375, 42.6943359375, 46.740234375, 50.7861328125, 54.83203125, 58.8779296875, 62.923828125, 66.9697265625, 71.015625, 75.0615234375, 79.107421875, 83.1533203125, 87.19921875, 91.2451171875, 95.291015625, 99.3369140625, 103.3828125, 107.4287109375, 111.474609375, 115.5205078125, 119.56640625, 123.6123046875, 127.658203125, 131.7041015625, 135.75]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 5.0, 6.0, 12.0, 10.0, 7.0, 9.0, 14.0, 18.0, 12.0, 29.0, 27.0, 34.0, 29.0, 42.0, 44.0, 76.0, 111.0, 167.0, 269.0, 581.0, 2467.0, 2909520.0, 229291.0, 1629.0, 487.0, 248.0, 137.0, 100.0, 55.0, 46.0, 38.0, 30.0, 26.0, 15.0, 28.0, 15.0, 15.0, 16.0, 6.0, 9.0, 8.0, 5.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-322.0, -312.50390625, -303.0078125, -293.51171875, -284.015625, -274.51953125, -265.0234375, -255.52734375, -246.03125, -236.53515625, -227.0390625, -217.54296875, -208.046875, -198.55078125, -189.0546875, -179.55859375, -170.0625, -160.56640625, -151.0703125, -141.57421875, -132.078125, -122.58203125, -113.0859375, -103.58984375, -94.09375, -84.59765625, -75.1015625, -65.60546875, -56.109375, -46.61328125, -37.1171875, -27.62109375, -18.125, -8.62890625, 0.8671875, 10.36328125, 19.859375, 29.35546875, 38.8515625, 48.34765625, 57.84375, 67.33984375, 76.8359375, 86.33203125, 95.828125, 105.32421875, 114.8203125, 124.31640625, 133.8125, 143.30859375, 152.8046875, 162.30078125, 171.796875, 181.29296875, 190.7890625, 200.28515625, 209.78125, 219.27734375, 228.7734375, 238.26953125, 247.765625, 257.26171875, 266.7578125, 276.25390625, 285.75]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 87.0, 384.0, 442.0, 89.0, 10.0, 1.0, 0.0, 0.0, 2.0], "bins": [-645.3421630859375, -634.0117797851562, -622.6814575195312, -611.35107421875, -600.020751953125, -588.6903686523438, -577.3600463867188, -566.0296630859375, -554.6993408203125, -543.3689575195312, -532.0386352539062, -520.708251953125, -509.3779296875, -498.04754638671875, -486.71722412109375, -475.3868408203125, -464.05645751953125, -452.7261047363281, -441.395751953125, -430.0653991699219, -418.73504638671875, -407.4046630859375, -396.0743408203125, -384.74395751953125, -373.41363525390625, -362.0832824707031, -350.7529296875, -339.4225769042969, -328.09222412109375, -316.7618408203125, -305.4315185546875, -294.10113525390625, -282.7707824707031, -271.4404296875, -260.1100769042969, -248.77972412109375, -237.44935607910156, -226.11900329589844, -214.7886505126953, -203.45828247070312, -192.1279296875, -180.79757690429688, -169.46722412109375, -158.13687133789062, -146.80650329589844, -135.4761505126953, -124.14579772949219, -112.81543731689453, -101.4850845336914, -90.15473175048828, -78.82437133789062, -67.4940185546875, -56.16366195678711, -44.83330535888672, -33.502952575683594, -22.172592163085938, -10.842239379882812, 0.4881162643432617, 11.818471908569336, 23.148826599121094, 34.479183197021484, 45.809539794921875, 57.139892578125, 68.47025299072266, 79.80060577392578]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 7.0, 5.0, 5.0, 5.0, 8.0, 15.0, 13.0, 20.0, 19.0, 20.0, 18.0, 36.0, 24.0, 20.0, 24.0, 32.0, 31.0, 40.0, 47.0, 41.0, 45.0, 47.0, 46.0, 50.0, 36.0, 29.0, 33.0, 27.0, 38.0, 28.0, 24.0, 26.0, 22.0, 19.0, 17.0, 14.0, 14.0, 14.0, 11.0, 10.0, 5.0, 4.0, 7.0, 5.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-303.28436279296875, -293.1263427734375, -282.96832275390625, -272.8102722167969, -262.6522521972656, -252.49423217773438, -242.33621215820312, -232.1781768798828, -222.0201416015625, -211.86212158203125, -201.70408630371094, -191.5460662841797, -181.38803100585938, -171.23001098632812, -161.07199096679688, -150.91395568847656, -140.7559356689453, -130.59791564941406, -120.43988037109375, -110.2818603515625, -100.12382507324219, -89.96580505371094, -79.80777740478516, -69.64974975585938, -59.491722106933594, -49.33369445800781, -39.17566680908203, -29.017642974853516, -18.859615325927734, -8.701587677001953, 1.4564361572265625, 11.614463806152344, 21.772491455078125, 31.930519104003906, 42.08854675292969, 52.2465705871582, 62.404598236083984, 72.5626220703125, 82.72064971923828, 92.87867736816406, 103.03670501708984, 113.19473266601562, 123.3527603149414, 133.5107879638672, 143.66880798339844, 153.82684326171875, 163.98486328125, 174.14288330078125, 184.30091857910156, 194.4589385986328, 204.61697387695312, 214.77499389648438, 224.9330291748047, 235.09104919433594, 245.24908447265625, 255.4071044921875, 265.56512451171875, 275.72314453125, 285.88116455078125, 296.0392150878906, 306.1972351074219, 316.3552551269531, 326.5132751464844, 336.67132568359375, 346.829345703125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 3.0, 4.0, 2.0, 3.0, 2.0, 3.0, 8.0, 4.0, 9.0, 9.0, 14.0, 12.0, 19.0, 18.0, 21.0, 32.0, 21.0, 38.0, 31.0, 33.0, 29.0, 41.0, 39.0, 55.0, 52.0, 29.0, 41.0, 40.0, 42.0, 41.0, 42.0, 39.0, 40.0, 33.0, 27.0, 26.0, 22.0, 21.0, 13.0, 14.0, 8.0, 8.0, 5.0, 5.0, 6.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.84375, -45.181640625, -43.51953125, -41.857421875, -40.1953125, -38.533203125, -36.87109375, -35.208984375, -33.546875, -31.884765625, -30.22265625, -28.560546875, -26.8984375, -25.236328125, -23.57421875, -21.912109375, -20.25, -18.587890625, -16.92578125, -15.263671875, -13.6015625, -11.939453125, -10.27734375, -8.615234375, -6.953125, -5.291015625, -3.62890625, -1.966796875, -0.3046875, 1.357421875, 3.01953125, 4.681640625, 6.34375, 8.005859375, 9.66796875, 11.330078125, 12.9921875, 14.654296875, 16.31640625, 17.978515625, 19.640625, 21.302734375, 22.96484375, 24.626953125, 26.2890625, 27.951171875, 29.61328125, 31.275390625, 32.9375, 34.599609375, 36.26171875, 37.923828125, 39.5859375, 41.248046875, 42.91015625, 44.572265625, 46.234375, 47.896484375, 49.55859375, 51.220703125, 52.8828125, 54.544921875, 56.20703125, 57.869140625, 59.53125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 6.0, 6.0, 12.0, 8.0, 19.0, 11.0, 25.0, 51.0, 43.0, 72.0, 108.0, 151.0, 202.0, 323.0, 446.0, 735.0, 1044.0, 1724.0, 2727.0, 4812.0, 9266.0, 20569.0, 102826.0, 3386183.0, 597189.0, 36271.0, 13351.0, 6370.0, 3658.0, 2133.0, 1317.0, 897.0, 574.0, 355.0, 259.0, 181.0, 122.0, 78.0, 47.0, 36.0, 25.0, 16.0, 16.0, 9.0, 8.0, 5.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-174.5, -169.001953125, -163.50390625, -158.005859375, -152.5078125, -147.009765625, -141.51171875, -136.013671875, -130.515625, -125.017578125, -119.51953125, -114.021484375, -108.5234375, -103.025390625, -97.52734375, -92.029296875, -86.53125, -81.033203125, -75.53515625, -70.037109375, -64.5390625, -59.041015625, -53.54296875, -48.044921875, -42.546875, -37.048828125, -31.55078125, -26.052734375, -20.5546875, -15.056640625, -9.55859375, -4.060546875, 1.4375, 6.935546875, 12.43359375, 17.931640625, 23.4296875, 28.927734375, 34.42578125, 39.923828125, 45.421875, 50.919921875, 56.41796875, 61.916015625, 67.4140625, 72.912109375, 78.41015625, 83.908203125, 89.40625, 94.904296875, 100.40234375, 105.900390625, 111.3984375, 116.896484375, 122.39453125, 127.892578125, 133.390625, 138.888671875, 144.38671875, 149.884765625, 155.3828125, 160.880859375, 166.37890625, 171.876953125, 177.375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 7.0, 1.0, 3.0, 2.0, 3.0, 4.0, 8.0, 5.0, 10.0, 10.0, 15.0, 19.0, 14.0, 22.0, 39.0, 46.0, 66.0, 124.0, 254.0, 433.0, 814.0, 991.0, 564.0, 237.0, 135.0, 70.0, 37.0, 33.0, 28.0, 20.0, 16.0, 7.0, 2.0, 5.0, 5.0, 8.0, 7.0, 4.0, 6.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.25, -110.9990234375, -107.748046875, -104.4970703125, -101.24609375, -97.9951171875, -94.744140625, -91.4931640625, -88.2421875, -84.9912109375, -81.740234375, -78.4892578125, -75.23828125, -71.9873046875, -68.736328125, -65.4853515625, -62.234375, -58.9833984375, -55.732421875, -52.4814453125, -49.23046875, -45.9794921875, -42.728515625, -39.4775390625, -36.2265625, -32.9755859375, -29.724609375, -26.4736328125, -23.22265625, -19.9716796875, -16.720703125, -13.4697265625, -10.21875, -6.9677734375, -3.716796875, -0.4658203125, 2.78515625, 6.0361328125, 9.287109375, 12.5380859375, 15.7890625, 19.0400390625, 22.291015625, 25.5419921875, 28.79296875, 32.0439453125, 35.294921875, 38.5458984375, 41.796875, 45.0478515625, 48.298828125, 51.5498046875, 54.80078125, 58.0517578125, 61.302734375, 64.5537109375, 67.8046875, 71.0556640625, 74.306640625, 77.5576171875, 80.80859375, 84.0595703125, 87.310546875, 90.5615234375, 93.8125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 6.0, 7.0, 12.0, 5.0, 11.0, 14.0, 30.0, 37.0, 51.0, 79.0, 107.0, 204.0, 324.0, 595.0, 1127.0, 2268.0, 4636.0, 10643.0, 26272.0, 98264.0, 3153869.0, 797785.0, 62264.0, 19669.0, 8139.0, 3692.0, 1884.0, 969.0, 502.0, 293.0, 179.0, 123.0, 63.0, 47.0, 28.0, 29.0, 15.0, 13.0, 7.0, 8.0, 3.0, 3.0, 5.0, 0.0, 4.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-178.375, -172.193359375, -166.01171875, -159.830078125, -153.6484375, -147.466796875, -141.28515625, -135.103515625, -128.921875, -122.740234375, -116.55859375, -110.376953125, -104.1953125, -98.013671875, -91.83203125, -85.650390625, -79.46875, -73.287109375, -67.10546875, -60.923828125, -54.7421875, -48.560546875, -42.37890625, -36.197265625, -30.015625, -23.833984375, -17.65234375, -11.470703125, -5.2890625, 0.892578125, 7.07421875, 13.255859375, 19.4375, 25.619140625, 31.80078125, 37.982421875, 44.1640625, 50.345703125, 56.52734375, 62.708984375, 68.890625, 75.072265625, 81.25390625, 87.435546875, 93.6171875, 99.798828125, 105.98046875, 112.162109375, 118.34375, 124.525390625, 130.70703125, 136.888671875, 143.0703125, 149.251953125, 155.43359375, 161.615234375, 167.796875, 173.978515625, 180.16015625, 186.341796875, 192.5234375, 198.705078125, 204.88671875, 211.068359375, 217.25]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 6.0, 3.0, 9.0, 13.0, 21.0, 42.0, 91.0, 170.0, 270.0, 193.0, 103.0, 42.0, 24.0, 13.0, 4.0, 7.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-254.5532684326172, -232.78933715820312, -211.025390625, -189.26145935058594, -167.49752807617188, -145.7335968017578, -123.96965026855469, -102.20571899414062, -80.44178771972656, -58.677852630615234, -36.913917541503906, -15.149978637695312, 6.61395263671875, 28.377883911132812, 50.14183044433594, 71.90576171875, 93.66969299316406, 115.43362426757812, 137.19757080078125, 158.9615020751953, 180.72543334960938, 202.48936462402344, 224.25331115722656, 246.01724243164062, 267.78118896484375, 289.5451354980469, 311.3090515136719, 333.072998046875, 354.8369140625, 376.6008605957031, 398.36480712890625, 420.12872314453125, 441.8927001953125, 463.6566467285156, 485.4205627441406, 507.18450927734375, 528.9484252929688, 550.71240234375, 572.476318359375, 594.240234375, 616.004150390625, 637.76806640625, 659.5320434570312, 681.2959594726562, 703.0598754882812, 724.8238525390625, 746.5877685546875, 768.3516845703125, 790.1156616210938, 811.8795776367188, 833.6435546875, 855.407470703125, 877.17138671875, 898.935302734375, 920.6992797851562, 942.4631958007812, 964.2271728515625, 985.9910888671875, 1007.7550659179688, 1029.51904296875, 1051.282958984375, 1073.046875, 1094.810791015625, 1116.57470703125, 1138.338623046875]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 6.0, 5.0, 8.0, 2.0, 13.0, 10.0, 15.0, 16.0, 12.0, 23.0, 27.0, 19.0, 31.0, 32.0, 40.0, 32.0, 40.0, 34.0, 44.0, 51.0, 52.0, 43.0, 40.0, 33.0, 47.0, 44.0, 37.0, 32.0, 27.0, 28.0, 23.0, 19.0, 21.0, 15.0, 13.0, 13.0, 13.0, 10.0, 8.0, 8.0, 8.0, 5.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-290.7010192871094, -281.8777770996094, -273.0545349121094, -264.23126220703125, -255.40802001953125, -246.58477783203125, -237.76153564453125, -228.93829345703125, -220.1150360107422, -211.2917938232422, -202.46853637695312, -193.64529418945312, -184.82205200195312, -175.99879455566406, -167.17555236816406, -158.352294921875, -149.529052734375, -140.705810546875, -131.88255310058594, -123.05931091308594, -114.2360610961914, -105.41281127929688, -96.58956909179688, -87.76631927490234, -78.94306945800781, -70.11981964111328, -61.296573638916016, -52.47332763671875, -43.65007781982422, -34.82682800292969, -26.003582000732422, -17.180335998535156, -8.357086181640625, 0.46616172790527344, 9.289409637451172, 18.11265754699707, 26.93590545654297, 35.7591552734375, 44.582401275634766, 53.40564727783203, 62.22889709472656, 71.0521469116211, 79.87539672851562, 88.69863891601562, 97.52188873291016, 106.34513854980469, 115.16838073730469, 123.99163055419922, 132.81488037109375, 141.63812255859375, 150.4613800048828, 159.2846221923828, 168.10787963867188, 176.93112182617188, 185.75436401367188, 194.57760620117188, 203.40086364746094, 212.22410583496094, 221.04736328125, 229.87060546875, 238.69384765625, 247.51710510253906, 256.3403625488281, 265.1636047363281, 273.9868469238281]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 4.0, 2.0, 4.0, 5.0, 11.0, 9.0, 17.0, 15.0, 9.0, 10.0, 20.0, 23.0, 33.0, 28.0, 24.0, 31.0, 30.0, 34.0, 44.0, 47.0, 43.0, 39.0, 41.0, 52.0, 40.0, 47.0, 40.0, 36.0, 37.0, 31.0, 26.0, 23.0, 29.0, 18.0, 23.0, 13.0, 14.0, 9.0, 9.0, 9.0, 10.0, 7.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.0625, -46.3583984375, -44.654296875, -42.9501953125, -41.24609375, -39.5419921875, -37.837890625, -36.1337890625, -34.4296875, -32.7255859375, -31.021484375, -29.3173828125, -27.61328125, -25.9091796875, -24.205078125, -22.5009765625, -20.796875, -19.0927734375, -17.388671875, -15.6845703125, -13.98046875, -12.2763671875, -10.572265625, -8.8681640625, -7.1640625, -5.4599609375, -3.755859375, -2.0517578125, -0.34765625, 1.3564453125, 3.060546875, 4.7646484375, 6.46875, 8.1728515625, 9.876953125, 11.5810546875, 13.28515625, 14.9892578125, 16.693359375, 18.3974609375, 20.1015625, 21.8056640625, 23.509765625, 25.2138671875, 26.91796875, 28.6220703125, 30.326171875, 32.0302734375, 33.734375, 35.4384765625, 37.142578125, 38.8466796875, 40.55078125, 42.2548828125, 43.958984375, 45.6630859375, 47.3671875, 49.0712890625, 50.775390625, 52.4794921875, 54.18359375, 55.8876953125, 57.591796875, 59.2958984375, 61.0]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 4.0, 7.0, 11.0, 24.0, 26.0, 39.0, 66.0, 99.0, 151.0, 247.0, 361.0, 503.0, 820.0, 1304.0, 1994.0, 2984.0, 4688.0, 7186.0, 11567.0, 18104.0, 30189.0, 51747.0, 96385.0, 197681.0, 285033.0, 151198.0, 76411.0, 42259.0, 24952.0, 15275.0, 9731.0, 6221.0, 3931.0, 2535.0, 1640.0, 1081.0, 733.0, 464.0, 301.0, 205.0, 123.0, 89.0, 73.0, 41.0, 25.0, 17.0, 17.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-2.984375, -2.894805908203125, -2.80523681640625, -2.715667724609375, -2.6260986328125, -2.536529541015625, -2.44696044921875, -2.357391357421875, -2.267822265625, -2.178253173828125, -2.08868408203125, -1.999114990234375, -1.9095458984375, -1.819976806640625, -1.73040771484375, -1.640838623046875, -1.55126953125, -1.461700439453125, -1.37213134765625, -1.282562255859375, -1.1929931640625, -1.103424072265625, -1.01385498046875, -0.924285888671875, -0.834716796875, -0.745147705078125, -0.65557861328125, -0.566009521484375, -0.4764404296875, -0.386871337890625, -0.29730224609375, -0.207733154296875, -0.1181640625, -0.028594970703125, 0.06097412109375, 0.150543212890625, 0.2401123046875, 0.329681396484375, 0.41925048828125, 0.508819580078125, 0.598388671875, 0.687957763671875, 0.77752685546875, 0.867095947265625, 0.9566650390625, 1.046234130859375, 1.13580322265625, 1.225372314453125, 1.31494140625, 1.404510498046875, 1.49407958984375, 1.583648681640625, 1.6732177734375, 1.762786865234375, 1.85235595703125, 1.941925048828125, 2.031494140625, 2.121063232421875, 2.21063232421875, 2.300201416015625, 2.3897705078125, 2.479339599609375, 2.56890869140625, 2.658477783203125, 2.748046875]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 7.0, 7.0, 5.0, 10.0, 11.0, 13.0, 19.0, 17.0, 15.0, 16.0, 25.0, 30.0, 40.0, 38.0, 41.0, 40.0, 37.0, 49.0, 40.0, 1071.0, 53.0, 49.0, 43.0, 31.0, 38.0, 42.0, 35.0, 25.0, 22.0, 20.0, 26.0, 24.0, 11.0, 14.0, 16.0, 11.0, 11.0, 7.0, 3.0, 5.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-36.09375, -34.91455078125, -33.7353515625, -32.55615234375, -31.376953125, -30.19775390625, -29.0185546875, -27.83935546875, -26.66015625, -25.48095703125, -24.3017578125, -23.12255859375, -21.943359375, -20.76416015625, -19.5849609375, -18.40576171875, -17.2265625, -16.04736328125, -14.8681640625, -13.68896484375, -12.509765625, -11.33056640625, -10.1513671875, -8.97216796875, -7.79296875, -6.61376953125, -5.4345703125, -4.25537109375, -3.076171875, -1.89697265625, -0.7177734375, 0.46142578125, 1.640625, 2.81982421875, 3.9990234375, 5.17822265625, 6.357421875, 7.53662109375, 8.7158203125, 9.89501953125, 11.07421875, 12.25341796875, 13.4326171875, 14.61181640625, 15.791015625, 16.97021484375, 18.1494140625, 19.32861328125, 20.5078125, 21.68701171875, 22.8662109375, 24.04541015625, 25.224609375, 26.40380859375, 27.5830078125, 28.76220703125, 29.94140625, 31.12060546875, 32.2998046875, 33.47900390625, 34.658203125, 35.83740234375, 37.0166015625, 38.19580078125, 39.375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 8.0, 8.0, 11.0, 14.0, 26.0, 36.0, 33.0, 78.0, 86.0, 134.0, 234.0, 326.0, 487.0, 749.0, 1183.0, 1897.0, 3044.0, 5038.0, 8422.0, 14022.0, 24185.0, 43911.0, 87687.0, 208008.0, 1392112.0, 150488.0, 69009.0, 35491.0, 20252.0, 11699.0, 6990.0, 4213.0, 2629.0, 1631.0, 1024.0, 673.0, 453.0, 273.0, 185.0, 129.0, 80.0, 55.0, 39.0, 31.0, 12.0, 18.0, 10.0, 6.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0], "bins": [-3.236328125, -3.13623046875, -3.0361328125, -2.93603515625, -2.8359375, -2.73583984375, -2.6357421875, -2.53564453125, -2.435546875, -2.33544921875, -2.2353515625, -2.13525390625, -2.03515625, -1.93505859375, -1.8349609375, -1.73486328125, -1.634765625, -1.53466796875, -1.4345703125, -1.33447265625, -1.234375, -1.13427734375, -1.0341796875, -0.93408203125, -0.833984375, -0.73388671875, -0.6337890625, -0.53369140625, -0.43359375, -0.33349609375, -0.2333984375, -0.13330078125, -0.033203125, 0.06689453125, 0.1669921875, 0.26708984375, 0.3671875, 0.46728515625, 0.5673828125, 0.66748046875, 0.767578125, 0.86767578125, 0.9677734375, 1.06787109375, 1.16796875, 1.26806640625, 1.3681640625, 1.46826171875, 1.568359375, 1.66845703125, 1.7685546875, 1.86865234375, 1.96875, 2.06884765625, 2.1689453125, 2.26904296875, 2.369140625, 2.46923828125, 2.5693359375, 2.66943359375, 2.76953125, 2.86962890625, 2.9697265625, 3.06982421875, 3.169921875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 1.0, 1.0, 3.0, 5.0, 1.0, 3.0, 5.0, 4.0, 9.0, 8.0, 14.0, 14.0, 16.0, 21.0, 19.0, 23.0, 30.0, 30.0, 27.0, 43.0, 40.0, 62.0, 60.0, 66.0, 81.0, 68.0, 47.0, 42.0, 36.0, 34.0, 27.0, 28.0, 24.0, 20.0, 15.0, 14.0, 13.0, 11.0, 8.0, 7.0, 8.0, 4.0, 5.0, 3.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0298004150390625, -0.028895854949951172, -0.027991294860839844, -0.027086734771728516, -0.026182174682617188, -0.02527761459350586, -0.02437305450439453, -0.023468494415283203, -0.022563934326171875, -0.021659374237060547, -0.02075481414794922, -0.01985025405883789, -0.018945693969726562, -0.018041133880615234, -0.017136573791503906, -0.016232013702392578, -0.01532745361328125, -0.014422893524169922, -0.013518333435058594, -0.012613773345947266, -0.011709213256835938, -0.01080465316772461, -0.009900093078613281, -0.008995532989501953, -0.008090972900390625, -0.007186412811279297, -0.006281852722167969, -0.005377292633056641, -0.0044727325439453125, -0.0035681724548339844, -0.0026636123657226562, -0.0017590522766113281, -0.0008544921875, 5.0067901611328125e-05, 0.0009546279907226562, 0.0018591880798339844, 0.0027637481689453125, 0.0036683082580566406, 0.004572868347167969, 0.005477428436279297, 0.006381988525390625, 0.007286548614501953, 0.008191108703613281, 0.00909566879272461, 0.010000228881835938, 0.010904788970947266, 0.011809349060058594, 0.012713909149169922, 0.01361846923828125, 0.014523029327392578, 0.015427589416503906, 0.016332149505615234, 0.017236709594726562, 0.01814126968383789, 0.01904582977294922, 0.019950389862060547, 0.020854949951171875, 0.021759510040283203, 0.02266407012939453, 0.02356863021850586, 0.024473190307617188, 0.025377750396728516, 0.026282310485839844, 0.027186870574951172, 0.0280914306640625]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 5.0, 1.0, 11.0, 5.0, 9.0, 12.0, 11.0, 17.0, 23.0, 27.0, 36.0, 42.0, 50.0, 56.0, 84.0, 127.0, 202.0, 446.0, 3554.0, 888943.0, 152289.0, 1577.0, 348.0, 186.0, 116.0, 76.0, 66.0, 41.0, 36.0, 35.0, 19.0, 32.0, 21.0, 15.0, 10.0, 5.0, 8.0, 5.0, 3.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.62744140625, -0.608917236328125, -0.59039306640625, -0.571868896484375, -0.5533447265625, -0.534820556640625, -0.51629638671875, -0.497772216796875, -0.479248046875, -0.460723876953125, -0.44219970703125, -0.423675537109375, -0.4051513671875, -0.386627197265625, -0.36810302734375, -0.349578857421875, -0.3310546875, -0.312530517578125, -0.29400634765625, -0.275482177734375, -0.2569580078125, -0.238433837890625, -0.21990966796875, -0.201385498046875, -0.182861328125, -0.164337158203125, -0.14581298828125, -0.127288818359375, -0.1087646484375, -0.090240478515625, -0.07171630859375, -0.053192138671875, -0.03466796875, -0.016143798828125, 0.00238037109375, 0.020904541015625, 0.0394287109375, 0.057952880859375, 0.07647705078125, 0.095001220703125, 0.113525390625, 0.132049560546875, 0.15057373046875, 0.169097900390625, 0.1876220703125, 0.206146240234375, 0.22467041015625, 0.243194580078125, 0.26171875, 0.280242919921875, 0.29876708984375, 0.317291259765625, 0.3358154296875, 0.354339599609375, 0.37286376953125, 0.391387939453125, 0.409912109375, 0.428436279296875, 0.44696044921875, 0.465484619140625, 0.4840087890625, 0.502532958984375, 0.52105712890625, 0.539581298828125, 0.55810546875]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 29.0, 740.0, 237.0, 8.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.031296346336603165, -0.02586252987384796, -0.020428715273737907, -0.014994900673627853, -0.00956108421087265, -0.004127267748117447, 0.0013065449893474579, 0.006740361452102661, 0.012174177914857864, 0.017607994377613068, 0.02304180897772312, 0.028475623577833176, 0.03390944004058838, 0.03934325650334358, 0.04477706924080849, 0.05021088570356369, 0.05564470216631889, 0.0610785186290741, 0.066512331366539, 0.0719461441040039, 0.07737996429204941, 0.08281378448009491, 0.08824759721755981, 0.09368140995502472, 0.09911522269248962, 0.10454903542995453, 0.10998285561800003, 0.11541666835546494, 0.12085048854351044, 0.12628430128097534, 0.13171811401844025, 0.13715192675590515, 0.14258575439453125, 0.14801956713199615, 0.15345337986946106, 0.15888720750808716, 0.16432102024555206, 0.16975483298301697, 0.17518864572048187, 0.18062245845794678, 0.18605628609657288, 0.19149009883403778, 0.19692391157150269, 0.20235773921012878, 0.2077915519475937, 0.2132253646850586, 0.2186591774225235, 0.2240929901599884, 0.2295268028974533, 0.2349606156349182, 0.24039442837238312, 0.24582824110984802, 0.2512620687484741, 0.25669586658477783, 0.26212969422340393, 0.26756352186203003, 0.27299731969833374, 0.27843114733695984, 0.28386494517326355, 0.28929877281188965, 0.29473257064819336, 0.30016639828681946, 0.30560022592544556, 0.31103402376174927, 0.31646785140037537]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 8.0, 12.0, 7.0, 7.0, 15.0, 16.0, 16.0, 23.0, 18.0, 26.0, 27.0, 31.0, 29.0, 34.0, 37.0, 38.0, 31.0, 42.0, 36.0, 40.0, 38.0, 35.0, 45.0, 38.0, 43.0, 29.0, 37.0, 36.0, 32.0, 24.0, 28.0, 25.0, 21.0, 12.0, 16.0, 9.0, 5.0, 4.0, 10.0, 5.0, 4.0, 7.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.016697227954864502, -0.016158541664481163, -0.015619855374097824, -0.015081169083714485, -0.014542482793331146, -0.014003796502947807, -0.013465110212564468, -0.01292642392218113, -0.01238773763179779, -0.011849051341414452, -0.011310365051031113, -0.010771678760647774, -0.010232992470264435, -0.009694306179881096, -0.009155619889497757, -0.008616933599114418, -0.008078247308731079, -0.00753956101834774, -0.007000874727964401, -0.006462188437581062, -0.005923502147197723, -0.0053848158568143845, -0.0048461295664310455, -0.004307443276047707, -0.0037687569856643677, -0.0032300706952810287, -0.00269138440489769, -0.002152698114514351, -0.001614011824131012, -0.001075325533747673, -0.0005366392433643341, 2.0470470190048218e-06, 0.0005407333374023438, 0.0010794196277856827, 0.0016181059181690216, 0.0021567922085523605, 0.0026954784989356995, 0.0032341647893190384, 0.0037728510797023773, 0.004311537370085716, 0.004850223660469055, 0.005388909950852394, 0.005927596241235733, 0.006466282531619072, 0.007004968822002411, 0.00754365511238575, 0.008082341402769089, 0.008621027693152428, 0.009159713983535767, 0.009698400273919106, 0.010237086564302444, 0.010775772854685783, 0.011314459145069122, 0.011853145435452461, 0.0123918317258358, 0.012930518016219139, 0.013469204306602478, 0.014007890596985817, 0.014546576887369156, 0.015085263177752495, 0.015623949468135834, 0.016162635758519173, 0.01670132204890251, 0.01724000833928585, 0.01777869462966919]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 4.0, 2.0, 4.0, 5.0, 11.0, 9.0, 17.0, 15.0, 9.0, 10.0, 20.0, 23.0, 33.0, 28.0, 24.0, 31.0, 30.0, 34.0, 44.0, 47.0, 43.0, 39.0, 41.0, 52.0, 41.0, 46.0, 39.0, 37.0, 37.0, 31.0, 26.0, 23.0, 29.0, 18.0, 23.0, 13.0, 14.0, 9.0, 9.0, 9.0, 10.0, 7.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.0625, -46.3583984375, -44.654296875, -42.9501953125, -41.24609375, -39.5419921875, -37.837890625, -36.1337890625, -34.4296875, -32.7255859375, -31.021484375, -29.3173828125, -27.61328125, -25.9091796875, -24.205078125, -22.5009765625, -20.796875, -19.0927734375, -17.388671875, -15.6845703125, -13.98046875, -12.2763671875, -10.572265625, -8.8681640625, -7.1640625, -5.4599609375, -3.755859375, -2.0517578125, -0.34765625, 1.3564453125, 3.060546875, 4.7646484375, 6.46875, 8.1728515625, 9.876953125, 11.5810546875, 13.28515625, 14.9892578125, 16.693359375, 18.3974609375, 20.1015625, 21.8056640625, 23.509765625, 25.2138671875, 26.91796875, 28.6220703125, 30.326171875, 32.0302734375, 33.734375, 35.4384765625, 37.142578125, 38.8466796875, 40.55078125, 42.2548828125, 43.958984375, 45.6630859375, 47.3671875, 49.0712890625, 50.775390625, 52.4794921875, 54.18359375, 55.8876953125, 57.591796875, 59.2958984375, 61.0]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 4.0, 17.0, 26.0, 27.0, 39.0, 39.0, 35.0, 67.0, 87.0, 149.0, 225.0, 354.0, 529.0, 814.0, 1494.0, 2742.0, 5672.0, 14485.0, 100773.0, 868444.0, 33332.0, 9608.0, 4226.0, 2120.0, 1143.0, 706.0, 453.0, 264.0, 179.0, 118.0, 101.0, 64.0, 43.0, 33.0, 24.0, 26.0, 21.0, 17.0, 13.0, 7.0, 5.0, 3.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-122.375, -118.05078125, -113.7265625, -109.40234375, -105.078125, -100.75390625, -96.4296875, -92.10546875, -87.78125, -83.45703125, -79.1328125, -74.80859375, -70.484375, -66.16015625, -61.8359375, -57.51171875, -53.1875, -48.86328125, -44.5390625, -40.21484375, -35.890625, -31.56640625, -27.2421875, -22.91796875, -18.59375, -14.26953125, -9.9453125, -5.62109375, -1.296875, 3.02734375, 7.3515625, 11.67578125, 16.0, 20.32421875, 24.6484375, 28.97265625, 33.296875, 37.62109375, 41.9453125, 46.26953125, 50.59375, 54.91796875, 59.2421875, 63.56640625, 67.890625, 72.21484375, 76.5390625, 80.86328125, 85.1875, 89.51171875, 93.8359375, 98.16015625, 102.484375, 106.80859375, 111.1328125, 115.45703125, 119.78125, 124.10546875, 128.4296875, 132.75390625, 137.078125, 141.40234375, 145.7265625, 150.05078125, 154.375]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 2.0, 5.0, 2.0, 7.0, 7.0, 8.0, 9.0, 8.0, 13.0, 11.0, 21.0, 27.0, 27.0, 24.0, 40.0, 39.0, 39.0, 46.0, 34.0, 61.0, 87.0, 1676.0, 375.0, 79.0, 46.0, 45.0, 37.0, 52.0, 37.0, 30.0, 29.0, 25.0, 24.0, 14.0, 12.0, 6.0, 12.0, 14.0, 5.0, 7.0, 4.0, 4.0, 3.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-143.375, -138.736328125, -134.09765625, -129.458984375, -124.8203125, -120.181640625, -115.54296875, -110.904296875, -106.265625, -101.626953125, -96.98828125, -92.349609375, -87.7109375, -83.072265625, -78.43359375, -73.794921875, -69.15625, -64.517578125, -59.87890625, -55.240234375, -50.6015625, -45.962890625, -41.32421875, -36.685546875, -32.046875, -27.408203125, -22.76953125, -18.130859375, -13.4921875, -8.853515625, -4.21484375, 0.423828125, 5.0625, 9.701171875, 14.33984375, 18.978515625, 23.6171875, 28.255859375, 32.89453125, 37.533203125, 42.171875, 46.810546875, 51.44921875, 56.087890625, 60.7265625, 65.365234375, 70.00390625, 74.642578125, 79.28125, 83.919921875, 88.55859375, 93.197265625, 97.8359375, 102.474609375, 107.11328125, 111.751953125, 116.390625, 121.029296875, 125.66796875, 130.306640625, 134.9453125, 139.583984375, 144.22265625, 148.861328125, 153.5]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0, 7.0, 8.0, 20.0, 11.0, 9.0, 15.0, 19.0, 20.0, 35.0, 39.0, 36.0, 55.0, 72.0, 109.0, 236.0, 724.0, 4993.0, 1836369.0, 1296464.0, 5015.0, 727.0, 246.0, 104.0, 75.0, 54.0, 46.0, 28.0, 31.0, 23.0, 23.0, 15.0, 14.0, 10.0, 11.0, 7.0, 10.0, 6.0, 2.0, 5.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-414.25, -401.6953125, -389.140625, -376.5859375, -364.03125, -351.4765625, -338.921875, -326.3671875, -313.8125, -301.2578125, -288.703125, -276.1484375, -263.59375, -251.0390625, -238.484375, -225.9296875, -213.375, -200.8203125, -188.265625, -175.7109375, -163.15625, -150.6015625, -138.046875, -125.4921875, -112.9375, -100.3828125, -87.828125, -75.2734375, -62.71875, -50.1640625, -37.609375, -25.0546875, -12.5, 0.0546875, 12.609375, 25.1640625, 37.71875, 50.2734375, 62.828125, 75.3828125, 87.9375, 100.4921875, 113.046875, 125.6015625, 138.15625, 150.7109375, 163.265625, 175.8203125, 188.375, 200.9296875, 213.484375, 226.0390625, 238.59375, 251.1484375, 263.703125, 276.2578125, 288.8125, 301.3671875, 313.921875, 326.4765625, 339.03125, 351.5859375, 364.140625, 376.6953125, 389.25]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 19.0, 144.0, 529.0, 269.0, 46.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-809.7523193359375, -794.6707153320312, -779.5890502929688, -764.5074462890625, -749.42578125, -734.3441772460938, -719.2625122070312, -704.180908203125, -689.0992431640625, -674.0176391601562, -658.9359741210938, -643.8543701171875, -628.772705078125, -613.6911010742188, -598.6094360351562, -583.52783203125, -568.4461669921875, -553.3645629882812, -538.2828979492188, -523.2012939453125, -508.11962890625, -493.0379943847656, -477.95635986328125, -462.8747253417969, -447.7931213378906, -432.71148681640625, -417.6298522949219, -402.5482177734375, -387.4665832519531, -372.38494873046875, -357.3033142089844, -342.2216796875, -327.1400451660156, -312.05841064453125, -296.9767761230469, -281.8951416015625, -266.8135070800781, -251.73187255859375, -236.65023803710938, -221.568603515625, -206.48696899414062, -191.40533447265625, -176.32369995117188, -161.2420654296875, -146.16043090820312, -131.07879638671875, -115.9971694946289, -100.91553497314453, -85.83390808105469, -70.75227355957031, -55.67063903808594, -40.58900833129883, -25.507373809814453, -10.425743103027344, 4.655891418457031, 19.737525939941406, 34.81916046142578, 49.900794982910156, 64.98242950439453, 80.06405639648438, 95.14569091796875, 110.22732543945312, 125.3089599609375, 140.39059448242188, 155.47222900390625]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 4.0, 7.0, 6.0, 8.0, 3.0, 12.0, 12.0, 10.0, 9.0, 16.0, 14.0, 30.0, 19.0, 24.0, 38.0, 29.0, 29.0, 44.0, 34.0, 41.0, 36.0, 52.0, 48.0, 36.0, 39.0, 43.0, 37.0, 30.0, 22.0, 37.0, 41.0, 26.0, 27.0, 22.0, 21.0, 20.0, 11.0, 15.0, 15.0, 8.0, 5.0, 6.0, 9.0, 4.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-351.0616149902344, -339.9876403808594, -328.9136962890625, -317.8397216796875, -306.7657775878906, -295.6918029785156, -284.61785888671875, -273.54388427734375, -262.46990966796875, -251.3959503173828, -240.32199096679688, -229.24801635742188, -218.174072265625, -207.10009765625, -196.02613830566406, -184.95217895507812, -173.87823486328125, -162.8042755126953, -151.73031616210938, -140.65634155273438, -129.5823974609375, -118.50843048095703, -107.43446350097656, -96.36050415039062, -85.28654479980469, -74.21258544921875, -63.13862228393555, -52.064659118652344, -40.990699768066406, -29.91674041748047, -18.8427734375, -7.7688140869140625, 3.30511474609375, 14.37907600402832, 25.45303726196289, 36.527000427246094, 47.60095977783203, 58.67491912841797, 69.74888610839844, 80.82284545898438, 91.89680480957031, 102.97076416015625, 114.04472351074219, 125.11869049072266, 136.19265747070312, 147.2666015625, 158.340576171875, 169.41453552246094, 180.48849487304688, 191.5624542236328, 202.63641357421875, 213.71038818359375, 224.78433227539062, 235.85830688476562, 246.93226623535156, 258.0062255859375, 269.0802001953125, 280.1541748046875, 291.2281188964844, 302.3020935058594, 313.37603759765625, 324.45001220703125, 335.52398681640625, 346.5979309082031, 357.671875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 5.0, 4.0, 2.0, 7.0, 6.0, 11.0, 11.0, 13.0, 15.0, 15.0, 13.0, 28.0, 26.0, 23.0, 27.0, 43.0, 30.0, 34.0, 41.0, 41.0, 35.0, 46.0, 39.0, 50.0, 51.0, 42.0, 43.0, 35.0, 42.0, 27.0, 30.0, 34.0, 23.0, 15.0, 16.0, 14.0, 17.0, 13.0, 10.0, 9.0, 9.0, 5.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.15625, -47.42041015625, -45.6845703125, -43.94873046875, -42.212890625, -40.47705078125, -38.7412109375, -37.00537109375, -35.26953125, -33.53369140625, -31.7978515625, -30.06201171875, -28.326171875, -26.59033203125, -24.8544921875, -23.11865234375, -21.3828125, -19.64697265625, -17.9111328125, -16.17529296875, -14.439453125, -12.70361328125, -10.9677734375, -9.23193359375, -7.49609375, -5.76025390625, -4.0244140625, -2.28857421875, -0.552734375, 1.18310546875, 2.9189453125, 4.65478515625, 6.390625, 8.12646484375, 9.8623046875, 11.59814453125, 13.333984375, 15.06982421875, 16.8056640625, 18.54150390625, 20.27734375, 22.01318359375, 23.7490234375, 25.48486328125, 27.220703125, 28.95654296875, 30.6923828125, 32.42822265625, 34.1640625, 35.89990234375, 37.6357421875, 39.37158203125, 41.107421875, 42.84326171875, 44.5791015625, 46.31494140625, 48.05078125, 49.78662109375, 51.5224609375, 53.25830078125, 54.994140625, 56.72998046875, 58.4658203125, 60.20166015625, 61.9375]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 6.0, 5.0, 21.0, 18.0, 24.0, 28.0, 54.0, 77.0, 130.0, 175.0, 313.0, 521.0, 825.0, 1552.0, 2940.0, 6444.0, 16534.0, 93686.0, 3591313.0, 434504.0, 26914.0, 9348.0, 4112.0, 1989.0, 1076.0, 629.0, 360.0, 252.0, 152.0, 94.0, 61.0, 36.0, 30.0, 16.0, 7.0, 5.0, 12.0, 10.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-207.75, -201.396484375, -195.04296875, -188.689453125, -182.3359375, -175.982421875, -169.62890625, -163.275390625, -156.921875, -150.568359375, -144.21484375, -137.861328125, -131.5078125, -125.154296875, -118.80078125, -112.447265625, -106.09375, -99.740234375, -93.38671875, -87.033203125, -80.6796875, -74.326171875, -67.97265625, -61.619140625, -55.265625, -48.912109375, -42.55859375, -36.205078125, -29.8515625, -23.498046875, -17.14453125, -10.791015625, -4.4375, 1.916015625, 8.26953125, 14.623046875, 20.9765625, 27.330078125, 33.68359375, 40.037109375, 46.390625, 52.744140625, 59.09765625, 65.451171875, 71.8046875, 78.158203125, 84.51171875, 90.865234375, 97.21875, 103.572265625, 109.92578125, 116.279296875, 122.6328125, 128.986328125, 135.33984375, 141.693359375, 148.046875, 154.400390625, 160.75390625, 167.107421875, 173.4609375, 179.814453125, 186.16796875, 192.521484375, 198.875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 3.0, 10.0, 9.0, 9.0, 8.0, 10.0, 19.0, 23.0, 28.0, 63.0, 66.0, 127.0, 301.0, 710.0, 1195.0, 733.0, 362.0, 135.0, 70.0, 44.0, 30.0, 32.0, 19.0, 15.0, 12.0, 6.0, 12.0, 3.0, 9.0, 3.0, 3.0, 2.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-102.4375, -98.7138671875, -94.990234375, -91.2666015625, -87.54296875, -83.8193359375, -80.095703125, -76.3720703125, -72.6484375, -68.9248046875, -65.201171875, -61.4775390625, -57.75390625, -54.0302734375, -50.306640625, -46.5830078125, -42.859375, -39.1357421875, -35.412109375, -31.6884765625, -27.96484375, -24.2412109375, -20.517578125, -16.7939453125, -13.0703125, -9.3466796875, -5.623046875, -1.8994140625, 1.82421875, 5.5478515625, 9.271484375, 12.9951171875, 16.71875, 20.4423828125, 24.166015625, 27.8896484375, 31.61328125, 35.3369140625, 39.060546875, 42.7841796875, 46.5078125, 50.2314453125, 53.955078125, 57.6787109375, 61.40234375, 65.1259765625, 68.849609375, 72.5732421875, 76.296875, 80.0205078125, 83.744140625, 87.4677734375, 91.19140625, 94.9150390625, 98.638671875, 102.3623046875, 106.0859375, 109.8095703125, 113.533203125, 117.2568359375, 120.98046875, 124.7041015625, 128.427734375, 132.1513671875, 135.875]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 2.0, 2.0, 5.0, 18.0, 11.0, 25.0, 36.0, 55.0, 130.0, 290.0, 737.0, 2092.0, 7046.0, 30313.0, 640711.0, 3450772.0, 48004.0, 9623.0, 2757.0, 949.0, 358.0, 169.0, 73.0, 37.0, 25.0, 17.0, 13.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-348.0, -337.80078125, -327.6015625, -317.40234375, -307.203125, -297.00390625, -286.8046875, -276.60546875, -266.40625, -256.20703125, -246.0078125, -235.80859375, -225.609375, -215.41015625, -205.2109375, -195.01171875, -184.8125, -174.61328125, -164.4140625, -154.21484375, -144.015625, -133.81640625, -123.6171875, -113.41796875, -103.21875, -93.01953125, -82.8203125, -72.62109375, -62.421875, -52.22265625, -42.0234375, -31.82421875, -21.625, -11.42578125, -1.2265625, 8.97265625, 19.171875, 29.37109375, 39.5703125, 49.76953125, 59.96875, 70.16796875, 80.3671875, 90.56640625, 100.765625, 110.96484375, 121.1640625, 131.36328125, 141.5625, 151.76171875, 161.9609375, 172.16015625, 182.359375, 192.55859375, 202.7578125, 212.95703125, 223.15625, 233.35546875, 243.5546875, 253.75390625, 263.953125, 274.15234375, 284.3515625, 294.55078125, 304.75]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 24.0, 106.0, 433.0, 350.0, 69.0, 22.0, 3.0, 2.0, 1.0, 1.0], "bins": [-2600.3037109375, -2554.5888671875, -2508.873779296875, -2463.158935546875, -2417.44384765625, -2371.72900390625, -2326.013916015625, -2280.299072265625, -2234.583984375, -2188.869140625, -2143.154052734375, -2097.439208984375, -2051.72412109375, -2006.0091552734375, -1960.294189453125, -1914.5792236328125, -1868.8642578125, -1823.1492919921875, -1777.434326171875, -1731.7193603515625, -1686.00439453125, -1640.2894287109375, -1594.574462890625, -1548.8594970703125, -1503.1446533203125, -1457.4296875, -1411.7147216796875, -1365.999755859375, -1320.2847900390625, -1274.56982421875, -1228.8548583984375, -1183.139892578125, -1137.4249267578125, -1091.7099609375, -1045.9949951171875, -1000.280029296875, -954.5650634765625, -908.85009765625, -863.1351318359375, -817.420166015625, -771.7052001953125, -725.990234375, -680.2752685546875, -634.560302734375, -588.8453369140625, -543.13037109375, -497.4154357910156, -451.7004699707031, -405.9855041503906, -360.2705383300781, -314.5555725097656, -268.84063720703125, -223.1256561279297, -177.4106903076172, -131.69573974609375, -85.98077392578125, -40.26580810546875, 5.449153900146484, 51.16411590576172, 96.87907409667969, 142.5940399169922, 188.3090057373047, 234.02395629882812, 279.7389221191406, 325.4538879394531]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0, 4.0, 9.0, 5.0, 12.0, 11.0, 8.0, 13.0, 11.0, 14.0, 18.0, 23.0, 25.0, 34.0, 33.0, 37.0, 30.0, 36.0, 35.0, 27.0, 32.0, 42.0, 42.0, 29.0, 54.0, 43.0, 36.0, 42.0, 29.0, 34.0, 28.0, 30.0, 26.0, 22.0, 25.0, 18.0, 18.0, 12.0, 12.0, 10.0, 11.0, 6.0, 5.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0], "bins": [-281.7017822265625, -273.3728332519531, -265.0438537597656, -256.71490478515625, -248.3859405517578, -240.05697631835938, -231.72802734375, -223.39906311035156, -215.07009887695312, -206.7411346435547, -198.41217041015625, -190.08322143554688, -181.75425720214844, -173.42529296875, -165.09634399414062, -156.7673797607422, -148.43841552734375, -140.1094512939453, -131.78048706054688, -123.4515380859375, -115.12257385253906, -106.79360961914062, -98.46465301513672, -90.13569641113281, -81.80673217773438, -73.47776794433594, -65.14881134033203, -56.81985092163086, -48.49089050292969, -40.161930084228516, -31.832969665527344, -23.504009246826172, -15.175018310546875, -6.846057891845703, 1.4829025268554688, 9.81186294555664, 18.140823364257812, 26.469783782958984, 34.798744201660156, 43.12770462036133, 51.4566650390625, 59.78562545776367, 68.11458587646484, 76.44354248046875, 84.77250671386719, 93.10147094726562, 101.43042755126953, 109.75938415527344, 118.08834838867188, 126.41731262207031, 134.74627685546875, 143.07522583007812, 151.40419006347656, 159.733154296875, 168.06210327148438, 176.3910675048828, 184.72003173828125, 193.0489959716797, 201.37796020507812, 209.7069091796875, 218.03587341308594, 226.36483764648438, 234.69378662109375, 243.0227508544922, 251.35171508789062]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 7.0, 5.0, 4.0, 4.0, 8.0, 9.0, 5.0, 16.0, 13.0, 14.0, 12.0, 17.0, 24.0, 20.0, 36.0, 25.0, 34.0, 36.0, 33.0, 34.0, 35.0, 38.0, 40.0, 51.0, 31.0, 43.0, 45.0, 47.0, 26.0, 44.0, 33.0, 24.0, 23.0, 29.0, 25.0, 20.0, 17.0, 13.0, 18.0, 8.0, 6.0, 9.0, 10.0, 3.0, 2.0, 3.0, 6.0, 1.0, 1.0, 5.0, 2.0, 2.0], "bins": [-59.53125, -57.8583984375, -56.185546875, -54.5126953125, -52.83984375, -51.1669921875, -49.494140625, -47.8212890625, -46.1484375, -44.4755859375, -42.802734375, -41.1298828125, -39.45703125, -37.7841796875, -36.111328125, -34.4384765625, -32.765625, -31.0927734375, -29.419921875, -27.7470703125, -26.07421875, -24.4013671875, -22.728515625, -21.0556640625, -19.3828125, -17.7099609375, -16.037109375, -14.3642578125, -12.69140625, -11.0185546875, -9.345703125, -7.6728515625, -6.0, -4.3271484375, -2.654296875, -0.9814453125, 0.69140625, 2.3642578125, 4.037109375, 5.7099609375, 7.3828125, 9.0556640625, 10.728515625, 12.4013671875, 14.07421875, 15.7470703125, 17.419921875, 19.0927734375, 20.765625, 22.4384765625, 24.111328125, 25.7841796875, 27.45703125, 29.1298828125, 30.802734375, 32.4755859375, 34.1484375, 35.8212890625, 37.494140625, 39.1669921875, 40.83984375, 42.5126953125, 44.185546875, 45.8583984375, 47.53125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 7.0, 16.0, 10.0, 19.0, 30.0, 48.0, 72.0, 107.0, 176.0, 242.0, 351.0, 493.0, 699.0, 1039.0, 1426.0, 2107.0, 3024.0, 4464.0, 6400.0, 9287.0, 13576.0, 19644.0, 29516.0, 44922.0, 72802.0, 128766.0, 232255.0, 193113.0, 103749.0, 60715.0, 38754.0, 25295.0, 17419.0, 11779.0, 8206.0, 5567.0, 3852.0, 2622.0, 1817.0, 1325.0, 900.0, 577.0, 435.0, 313.0, 220.0, 146.0, 93.0, 55.0, 38.0, 24.0, 17.0, 16.0, 10.0, 6.0, 1.0, 4.0, 3.0], "bins": [-2.50390625, -2.42987060546875, -2.3558349609375, -2.28179931640625, -2.207763671875, -2.13372802734375, -2.0596923828125, -1.98565673828125, -1.91162109375, -1.83758544921875, -1.7635498046875, -1.68951416015625, -1.615478515625, -1.54144287109375, -1.4674072265625, -1.39337158203125, -1.3193359375, -1.24530029296875, -1.1712646484375, -1.09722900390625, -1.023193359375, -0.94915771484375, -0.8751220703125, -0.80108642578125, -0.72705078125, -0.65301513671875, -0.5789794921875, -0.50494384765625, -0.430908203125, -0.35687255859375, -0.2828369140625, -0.20880126953125, -0.134765625, -0.06072998046875, 0.0133056640625, 0.08734130859375, 0.161376953125, 0.23541259765625, 0.3094482421875, 0.38348388671875, 0.45751953125, 0.53155517578125, 0.6055908203125, 0.67962646484375, 0.753662109375, 0.82769775390625, 0.9017333984375, 0.97576904296875, 1.0498046875, 1.12384033203125, 1.1978759765625, 1.27191162109375, 1.345947265625, 1.41998291015625, 1.4940185546875, 1.56805419921875, 1.64208984375, 1.71612548828125, 1.7901611328125, 1.86419677734375, 1.938232421875, 2.01226806640625, 2.0863037109375, 2.16033935546875, 2.234375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 5.0, 4.0, 13.0, 6.0, 8.0, 16.0, 11.0, 15.0, 20.0, 23.0, 33.0, 25.0, 32.0, 38.0, 40.0, 35.0, 49.0, 38.0, 33.0, 49.0, 1064.0, 54.0, 39.0, 46.0, 39.0, 39.0, 34.0, 40.0, 36.0, 25.0, 23.0, 19.0, 16.0, 16.0, 8.0, 8.0, 4.0, 10.0, 9.0, 7.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.5, -37.27001953125, -36.0400390625, -34.81005859375, -33.580078125, -32.35009765625, -31.1201171875, -29.89013671875, -28.66015625, -27.43017578125, -26.2001953125, -24.97021484375, -23.740234375, -22.51025390625, -21.2802734375, -20.05029296875, -18.8203125, -17.59033203125, -16.3603515625, -15.13037109375, -13.900390625, -12.67041015625, -11.4404296875, -10.21044921875, -8.98046875, -7.75048828125, -6.5205078125, -5.29052734375, -4.060546875, -2.83056640625, -1.6005859375, -0.37060546875, 0.859375, 2.08935546875, 3.3193359375, 4.54931640625, 5.779296875, 7.00927734375, 8.2392578125, 9.46923828125, 10.69921875, 11.92919921875, 13.1591796875, 14.38916015625, 15.619140625, 16.84912109375, 18.0791015625, 19.30908203125, 20.5390625, 21.76904296875, 22.9990234375, 24.22900390625, 25.458984375, 26.68896484375, 27.9189453125, 29.14892578125, 30.37890625, 31.60888671875, 32.8388671875, 34.06884765625, 35.298828125, 36.52880859375, 37.7587890625, 38.98876953125, 40.21875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 6.0, 5.0, 10.0, 20.0, 32.0, 36.0, 62.0, 81.0, 115.0, 161.0, 316.0, 504.0, 760.0, 1250.0, 2078.0, 3577.0, 6258.0, 11101.0, 20428.0, 39142.0, 79509.0, 186599.0, 1405545.0, 178873.0, 77027.0, 37887.0, 20002.0, 10855.0, 6129.0, 3540.0, 1981.0, 1236.0, 748.0, 475.0, 278.0, 182.0, 93.0, 86.0, 52.0, 29.0, 26.0, 14.0, 10.0, 7.0, 1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.3984375, -3.28692626953125, -3.1754150390625, -3.06390380859375, -2.952392578125, -2.84088134765625, -2.7293701171875, -2.61785888671875, -2.50634765625, -2.39483642578125, -2.2833251953125, -2.17181396484375, -2.060302734375, -1.94879150390625, -1.8372802734375, -1.72576904296875, -1.6142578125, -1.50274658203125, -1.3912353515625, -1.27972412109375, -1.168212890625, -1.05670166015625, -0.9451904296875, -0.83367919921875, -0.72216796875, -0.61065673828125, -0.4991455078125, -0.38763427734375, -0.276123046875, -0.16461181640625, -0.0531005859375, 0.05841064453125, 0.169921875, 0.28143310546875, 0.3929443359375, 0.50445556640625, 0.615966796875, 0.72747802734375, 0.8389892578125, 0.95050048828125, 1.06201171875, 1.17352294921875, 1.2850341796875, 1.39654541015625, 1.508056640625, 1.61956787109375, 1.7310791015625, 1.84259033203125, 1.9541015625, 2.06561279296875, 2.1771240234375, 2.28863525390625, 2.400146484375, 2.51165771484375, 2.6231689453125, 2.73468017578125, 2.84619140625, 2.95770263671875, 3.0692138671875, 3.18072509765625, 3.292236328125, 3.40374755859375, 3.5152587890625, 3.62677001953125, 3.73828125]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 4.0, 7.0, 6.0, 16.0, 8.0, 13.0, 11.0, 20.0, 17.0, 21.0, 34.0, 28.0, 41.0, 55.0, 70.0, 77.0, 72.0, 72.0, 74.0, 53.0, 46.0, 56.0, 28.0, 28.0, 24.0, 21.0, 20.0, 11.0, 14.0, 12.0, 15.0, 5.0, 4.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.051513671875, -0.04984903335571289, -0.04818439483642578, -0.04651975631713867, -0.04485511779785156, -0.04319047927856445, -0.041525840759277344, -0.039861202239990234, -0.038196563720703125, -0.036531925201416016, -0.034867286682128906, -0.0332026481628418, -0.03153800964355469, -0.029873371124267578, -0.02820873260498047, -0.02654409408569336, -0.02487945556640625, -0.02321481704711914, -0.02155017852783203, -0.019885540008544922, -0.018220901489257812, -0.016556262969970703, -0.014891624450683594, -0.013226985931396484, -0.011562347412109375, -0.009897708892822266, -0.008233070373535156, -0.006568431854248047, -0.0049037933349609375, -0.003239154815673828, -0.0015745162963867188, 9.012222290039062e-05, 0.0017547607421875, 0.0034193992614746094, 0.005084037780761719, 0.006748676300048828, 0.008413314819335938, 0.010077953338623047, 0.011742591857910156, 0.013407230377197266, 0.015071868896484375, 0.016736507415771484, 0.018401145935058594, 0.020065784454345703, 0.021730422973632812, 0.023395061492919922, 0.02505970001220703, 0.02672433853149414, 0.02838897705078125, 0.03005361557006836, 0.03171825408935547, 0.03338289260864258, 0.03504753112792969, 0.0367121696472168, 0.038376808166503906, 0.040041446685791016, 0.041706085205078125, 0.043370723724365234, 0.045035362243652344, 0.04670000076293945, 0.04836463928222656, 0.05002927780151367, 0.05169391632080078, 0.05335855484008789, 0.055023193359375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 11.0, 8.0, 11.0, 24.0, 18.0, 22.0, 32.0, 49.0, 41.0, 68.0, 75.0, 121.0, 210.0, 395.0, 1515.0, 469221.0, 574006.0, 1642.0, 403.0, 192.0, 116.0, 77.0, 66.0, 33.0, 33.0, 37.0, 31.0, 19.0, 12.0, 20.0, 4.0, 10.0, 7.0, 5.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.064453125, -1.0322036743164062, -0.9999542236328125, -0.9677047729492188, -0.935455322265625, -0.9032058715820312, -0.8709564208984375, -0.8387069702148438, -0.80645751953125, -0.7742080688476562, -0.7419586181640625, -0.7097091674804688, -0.677459716796875, -0.6452102661132812, -0.6129608154296875, -0.5807113647460938, -0.5484619140625, -0.5162124633789062, -0.4839630126953125, -0.45171356201171875, -0.419464111328125, -0.38721466064453125, -0.3549652099609375, -0.32271575927734375, -0.29046630859375, -0.25821685791015625, -0.2259674072265625, -0.19371795654296875, -0.161468505859375, -0.12921905517578125, -0.0969696044921875, -0.06472015380859375, -0.032470703125, -0.00022125244140625, 0.0320281982421875, 0.06427764892578125, 0.096527099609375, 0.12877655029296875, 0.1610260009765625, 0.19327545166015625, 0.22552490234375, 0.25777435302734375, 0.2900238037109375, 0.32227325439453125, 0.354522705078125, 0.38677215576171875, 0.4190216064453125, 0.45127105712890625, 0.4835205078125, 0.5157699584960938, 0.5480194091796875, 0.5802688598632812, 0.612518310546875, 0.6447677612304688, 0.6770172119140625, 0.7092666625976562, 0.74151611328125, 0.7737655639648438, 0.8060150146484375, 0.8382644653320312, 0.870513916015625, 0.9027633666992188, 0.9350128173828125, 0.9672622680664062, 0.99951171875]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [783.0, 234.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0168991070240736, 0.001999128609895706, 0.020897364243865013, 0.03979559987783432, 0.05869383364915848, 0.07759206742048264, 0.09649030864238739, 0.11538854241371155, 0.1342867761850357, 0.15318500995635986, 0.17208324372768402, 0.19098147749900818, 0.20987972617149353, 0.2287779450416565, 0.24767619371414185, 0.2665744423866272, 0.28547266125679016, 0.3043709099292755, 0.3232691287994385, 0.34216737747192383, 0.3610655963420868, 0.37996384501457214, 0.3988620638847351, 0.41776031255722046, 0.4366585612297058, 0.45555680990219116, 0.4744550287723541, 0.4933532774448395, 0.5122514963150024, 0.5311497449874878, 0.5500479936599731, 0.5689462423324585, 0.5878444314002991, 0.6067426800727844, 0.6256409287452698, 0.6445391178131104, 0.6634373664855957, 0.682335615158081, 0.7012338638305664, 0.7201321125030518, 0.7390303015708923, 0.7579285502433777, 0.776826798915863, 0.7957249879837036, 0.814623236656189, 0.8335214853286743, 0.8524197340011597, 0.871317982673645, 0.8902162313461304, 0.9091144800186157, 0.9280127286911011, 0.9469109177589417, 0.965809166431427, 0.9847074151039124, 1.003605604171753, 1.0225038528442383, 1.0414021015167236, 1.060300350189209, 1.0791985988616943, 1.0980968475341797, 1.116995096206665, 1.1358932256698608, 1.1547914743423462, 1.1736897230148315, 1.192587971687317]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 7.0, 6.0, 12.0, 9.0, 19.0, 15.0, 35.0, 44.0, 40.0, 45.0, 50.0, 65.0, 52.0, 78.0, 65.0, 64.0, 74.0, 55.0, 52.0, 52.0, 26.0, 41.0, 40.0, 12.0, 14.0, 9.0, 7.0, 8.0, 5.0, 6.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.06185096502304077, -0.06038789451122284, -0.058924827724695206, -0.05746176093816757, -0.05599869042634964, -0.05453561991453171, -0.053072553128004074, -0.05160948634147644, -0.05014641582965851, -0.048683345317840576, -0.04722027853131294, -0.04575721174478531, -0.04429414123296738, -0.042831070721149445, -0.04136800393462181, -0.03990493714809418, -0.038441866636276245, -0.03697879612445831, -0.03551572933793068, -0.034052662551403046, -0.032589592039585114, -0.03112652339041233, -0.029663454741239548, -0.028200386092066765, -0.026737317442893982, -0.0252742487937212, -0.023811180144548416, -0.022348111495375633, -0.02088504284620285, -0.019421974197030067, -0.017958905547857285, -0.0164958368986845, -0.015032768249511719, -0.013569699600338936, -0.012106630951166153, -0.01064356230199337, -0.009180493652820587, -0.007717425003647804, -0.006254356354475021, -0.0047912877053022385, -0.0033282190561294556, -0.0018651504069566727, -0.00040208175778388977, 0.0010609868913888931, 0.002524055540561676, 0.003987124189734459, 0.005450192838907242, 0.006913261488080025, 0.008376330137252808, 0.00983939878642559, 0.011302467435598373, 0.012765536084771156, 0.01422860473394394, 0.015691673383116722, 0.017154742032289505, 0.018617810681462288, 0.02008087933063507, 0.021543947979807854, 0.023007016628980637, 0.02447008527815342, 0.025933153927326202, 0.027396222576498985, 0.028859291225671768, 0.03032235987484455, 0.031785428524017334]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 7.0, 5.0, 4.0, 4.0, 8.0, 9.0, 5.0, 16.0, 13.0, 14.0, 12.0, 17.0, 24.0, 20.0, 35.0, 26.0, 34.0, 36.0, 33.0, 34.0, 35.0, 38.0, 39.0, 52.0, 31.0, 43.0, 45.0, 47.0, 26.0, 44.0, 33.0, 24.0, 23.0, 29.0, 25.0, 20.0, 17.0, 13.0, 17.0, 9.0, 6.0, 9.0, 10.0, 3.0, 2.0, 3.0, 6.0, 1.0, 1.0, 5.0, 2.0, 2.0], "bins": [-59.53125, -57.8583984375, -56.185546875, -54.5126953125, -52.83984375, -51.1669921875, -49.494140625, -47.8212890625, -46.1484375, -44.4755859375, -42.802734375, -41.1298828125, -39.45703125, -37.7841796875, -36.111328125, -34.4384765625, -32.765625, -31.0927734375, -29.419921875, -27.7470703125, -26.07421875, -24.4013671875, -22.728515625, -21.0556640625, -19.3828125, -17.7099609375, -16.037109375, -14.3642578125, -12.69140625, -11.0185546875, -9.345703125, -7.6728515625, -6.0, -4.3271484375, -2.654296875, -0.9814453125, 0.69140625, 2.3642578125, 4.037109375, 5.7099609375, 7.3828125, 9.0556640625, 10.728515625, 12.4013671875, 14.07421875, 15.7470703125, 17.419921875, 19.0927734375, 20.765625, 22.4384765625, 24.111328125, 25.7841796875, 27.45703125, 29.1298828125, 30.802734375, 32.4755859375, 34.1484375, 35.8212890625, 37.494140625, 39.1669921875, 40.83984375, 42.5126953125, 44.185546875, 45.8583984375, 47.53125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 3.0, 4.0, 9.0, 6.0, 7.0, 8.0, 14.0, 22.0, 26.0, 33.0, 60.0, 67.0, 97.0, 134.0, 166.0, 260.0, 352.0, 599.0, 929.0, 1611.0, 2786.0, 5835.0, 15601.0, 79698.0, 744580.0, 157661.0, 22269.0, 7298.0, 3452.0, 1836.0, 1033.0, 679.0, 438.0, 272.0, 207.0, 121.0, 115.0, 73.0, 60.0, 29.0, 35.0, 21.0, 14.0, 8.0, 8.0, 8.0, 3.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 4.0, 2.0], "bins": [-93.75, -90.962890625, -88.17578125, -85.388671875, -82.6015625, -79.814453125, -77.02734375, -74.240234375, -71.453125, -68.666015625, -65.87890625, -63.091796875, -60.3046875, -57.517578125, -54.73046875, -51.943359375, -49.15625, -46.369140625, -43.58203125, -40.794921875, -38.0078125, -35.220703125, -32.43359375, -29.646484375, -26.859375, -24.072265625, -21.28515625, -18.498046875, -15.7109375, -12.923828125, -10.13671875, -7.349609375, -4.5625, -1.775390625, 1.01171875, 3.798828125, 6.5859375, 9.373046875, 12.16015625, 14.947265625, 17.734375, 20.521484375, 23.30859375, 26.095703125, 28.8828125, 31.669921875, 34.45703125, 37.244140625, 40.03125, 42.818359375, 45.60546875, 48.392578125, 51.1796875, 53.966796875, 56.75390625, 59.541015625, 62.328125, 65.115234375, 67.90234375, 70.689453125, 73.4765625, 76.263671875, 79.05078125, 81.837890625, 84.625]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 5.0, 7.0, 8.0, 3.0, 9.0, 7.0, 17.0, 7.0, 17.0, 18.0, 18.0, 23.0, 25.0, 32.0, 43.0, 42.0, 42.0, 66.0, 53.0, 92.0, 1598.0, 439.0, 89.0, 53.0, 40.0, 41.0, 45.0, 34.0, 35.0, 29.0, 19.0, 19.0, 18.0, 22.0, 8.0, 5.0, 8.0, 11.0, 3.0, 3.0, 1.0, 4.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-137.125, -132.390625, -127.65625, -122.921875, -118.1875, -113.453125, -108.71875, -103.984375, -99.25, -94.515625, -89.78125, -85.046875, -80.3125, -75.578125, -70.84375, -66.109375, -61.375, -56.640625, -51.90625, -47.171875, -42.4375, -37.703125, -32.96875, -28.234375, -23.5, -18.765625, -14.03125, -9.296875, -4.5625, 0.171875, 4.90625, 9.640625, 14.375, 19.109375, 23.84375, 28.578125, 33.3125, 38.046875, 42.78125, 47.515625, 52.25, 56.984375, 61.71875, 66.453125, 71.1875, 75.921875, 80.65625, 85.390625, 90.125, 94.859375, 99.59375, 104.328125, 109.0625, 113.796875, 118.53125, 123.265625, 128.0, 132.734375, 137.46875, 142.203125, 146.9375, 151.671875, 156.40625, 161.140625, 165.875]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 7.0, 7.0, 8.0, 26.0, 35.0, 59.0, 81.0, 166.0, 353.0, 868.0, 4783.0, 1814568.0, 1318500.0, 4664.0, 846.0, 314.0, 159.0, 89.0, 70.0, 40.0, 25.0, 20.0, 15.0, 3.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-345.25, -335.09375, -324.9375, -314.78125, -304.625, -294.46875, -284.3125, -274.15625, -264.0, -253.84375, -243.6875, -233.53125, -223.375, -213.21875, -203.0625, -192.90625, -182.75, -172.59375, -162.4375, -152.28125, -142.125, -131.96875, -121.8125, -111.65625, -101.5, -91.34375, -81.1875, -71.03125, -60.875, -50.71875, -40.5625, -30.40625, -20.25, -10.09375, 0.0625, 10.21875, 20.375, 30.53125, 40.6875, 50.84375, 61.0, 71.15625, 81.3125, 91.46875, 101.625, 111.78125, 121.9375, 132.09375, 142.25, 152.40625, 162.5625, 172.71875, 182.875, 193.03125, 203.1875, 213.34375, 223.5, 233.65625, 243.8125, 253.96875, 264.125, 274.28125, 284.4375, 294.59375, 304.75]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10.0, 540.0, 457.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1083.6590576171875, -1052.6412353515625, -1021.6233520507812, -990.6055297851562, -959.5877075195312, -928.56982421875, -897.552001953125, -866.5341796875, -835.516357421875, -804.49853515625, -773.4806518554688, -742.4628295898438, -711.4450073242188, -680.4271240234375, -649.4093017578125, -618.3914794921875, -587.3735961914062, -556.3557739257812, -525.337890625, -494.320068359375, -463.30224609375, -432.2843933105469, -401.26654052734375, -370.24871826171875, -339.2308654785156, -308.2130126953125, -277.1951904296875, -246.17733764648438, -215.1595001220703, -184.14166259765625, -153.12380981445312, -122.10597229003906, -91.08819580078125, -60.07035446166992, -29.052513122558594, 1.96533203125, 32.98316955566406, 64.00100708007812, 95.01885986328125, 126.03669738769531, 157.05453491210938, 188.07237243652344, 219.0902099609375, 250.10806274414062, 281.12591552734375, 312.14373779296875, 343.1615905761719, 374.179443359375, 405.197265625, 436.2151184082031, 467.2329406738281, 498.25079345703125, 529.2686157226562, 560.2864990234375, 591.3043212890625, 622.3221435546875, 653.3399658203125, 684.3577880859375, 715.3756713867188, 746.3934936523438, 777.4113159179688, 808.42919921875, 839.447021484375, 870.46484375, 901.4827270507812]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 0.0, 1.0, 5.0, 1.0, 2.0, 5.0, 6.0, 8.0, 7.0, 13.0, 15.0, 12.0, 9.0, 23.0, 19.0, 28.0, 32.0, 29.0, 31.0, 46.0, 40.0, 39.0, 34.0, 36.0, 37.0, 52.0, 40.0, 43.0, 36.0, 35.0, 31.0, 29.0, 40.0, 29.0, 28.0, 26.0, 19.0, 20.0, 22.0, 10.0, 6.0, 16.0, 10.0, 11.0, 6.0, 4.0, 5.0, 5.0, 7.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-275.23089599609375, -266.0248107910156, -256.8187255859375, -247.61265563964844, -238.40658569335938, -229.20050048828125, -219.99441528320312, -210.78834533691406, -201.582275390625, -192.37619018554688, -183.1701202392578, -173.9640350341797, -164.75796508789062, -155.5518798828125, -146.34579467773438, -137.1397247314453, -127.93363952636719, -118.7275619506836, -109.521484375, -100.31539916992188, -91.10932922363281, -81.90324401855469, -72.6971664428711, -63.4910888671875, -54.285011291503906, -45.07893371582031, -35.87285614013672, -26.66677474975586, -17.460697174072266, -8.254619598388672, 0.9514617919921875, 10.157539367675781, 19.363616943359375, 28.56969451904297, 37.77577209472656, 46.98185348510742, 56.187931060791016, 65.39401245117188, 74.60009002685547, 83.80616760253906, 93.01224517822266, 102.21832275390625, 111.42440032958984, 120.63047790527344, 129.83656311035156, 139.04263305664062, 148.24871826171875, 157.45480346679688, 166.66087341308594, 175.86695861816406, 185.07302856445312, 194.27911376953125, 203.4851837158203, 212.69126892089844, 221.8973388671875, 231.10342407226562, 240.30950927734375, 249.51559448242188, 258.7216796875, 267.927734375, 277.1338195800781, 286.33990478515625, 295.5459899902344, 304.7520751953125, 313.9581298828125]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 4.0, 7.0, 5.0, 6.0, 7.0, 7.0, 13.0, 9.0, 12.0, 20.0, 13.0, 16.0, 31.0, 30.0, 25.0, 21.0, 34.0, 48.0, 38.0, 29.0, 35.0, 36.0, 55.0, 45.0, 47.0, 45.0, 34.0, 38.0, 37.0, 31.0, 34.0, 21.0, 27.0, 25.0, 21.0, 18.0, 18.0, 9.0, 6.0, 14.0, 8.0, 4.0, 9.0, 5.0, 1.0, 4.0, 3.0, 3.0, 4.0, 1.0, 0.0, 1.0], "bins": [-59.875, -58.15869140625, -56.4423828125, -54.72607421875, -53.009765625, -51.29345703125, -49.5771484375, -47.86083984375, -46.14453125, -44.42822265625, -42.7119140625, -40.99560546875, -39.279296875, -37.56298828125, -35.8466796875, -34.13037109375, -32.4140625, -30.69775390625, -28.9814453125, -27.26513671875, -25.548828125, -23.83251953125, -22.1162109375, -20.39990234375, -18.68359375, -16.96728515625, -15.2509765625, -13.53466796875, -11.818359375, -10.10205078125, -8.3857421875, -6.66943359375, -4.953125, -3.23681640625, -1.5205078125, 0.19580078125, 1.912109375, 3.62841796875, 5.3447265625, 7.06103515625, 8.77734375, 10.49365234375, 12.2099609375, 13.92626953125, 15.642578125, 17.35888671875, 19.0751953125, 20.79150390625, 22.5078125, 24.22412109375, 25.9404296875, 27.65673828125, 29.373046875, 31.08935546875, 32.8056640625, 34.52197265625, 36.23828125, 37.95458984375, 39.6708984375, 41.38720703125, 43.103515625, 44.81982421875, 46.5361328125, 48.25244140625, 49.96875]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 5.0, 5.0, 11.0, 8.0, 13.0, 10.0, 17.0, 29.0, 41.0, 53.0, 59.0, 101.0, 151.0, 190.0, 308.0, 516.0, 851.0, 1459.0, 2835.0, 5823.0, 14067.0, 54046.0, 1933887.0, 2095884.0, 56474.0, 14596.0, 6007.0, 2893.0, 1476.0, 913.0, 525.0, 323.0, 206.0, 142.0, 110.0, 68.0, 43.0, 29.0, 30.0, 25.0, 12.0, 10.0, 12.0, 5.0, 9.0, 4.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-198.875, -193.02734375, -187.1796875, -181.33203125, -175.484375, -169.63671875, -163.7890625, -157.94140625, -152.09375, -146.24609375, -140.3984375, -134.55078125, -128.703125, -122.85546875, -117.0078125, -111.16015625, -105.3125, -99.46484375, -93.6171875, -87.76953125, -81.921875, -76.07421875, -70.2265625, -64.37890625, -58.53125, -52.68359375, -46.8359375, -40.98828125, -35.140625, -29.29296875, -23.4453125, -17.59765625, -11.75, -5.90234375, -0.0546875, 5.79296875, 11.640625, 17.48828125, 23.3359375, 29.18359375, 35.03125, 40.87890625, 46.7265625, 52.57421875, 58.421875, 64.26953125, 70.1171875, 75.96484375, 81.8125, 87.66015625, 93.5078125, 99.35546875, 105.203125, 111.05078125, 116.8984375, 122.74609375, 128.59375, 134.44140625, 140.2890625, 146.13671875, 151.984375, 157.83203125, 163.6796875, 169.52734375, 175.375]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 9.0, 5.0, 6.0, 5.0, 6.0, 8.0, 13.0, 23.0, 34.0, 35.0, 47.0, 77.0, 195.0, 558.0, 1369.0, 1042.0, 313.0, 125.0, 65.0, 44.0, 27.0, 15.0, 19.0, 13.0, 4.0, 7.0, 1.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-141.5, -136.90625, -132.3125, -127.71875, -123.125, -118.53125, -113.9375, -109.34375, -104.75, -100.15625, -95.5625, -90.96875, -86.375, -81.78125, -77.1875, -72.59375, -68.0, -63.40625, -58.8125, -54.21875, -49.625, -45.03125, -40.4375, -35.84375, -31.25, -26.65625, -22.0625, -17.46875, -12.875, -8.28125, -3.6875, 0.90625, 5.5, 10.09375, 14.6875, 19.28125, 23.875, 28.46875, 33.0625, 37.65625, 42.25, 46.84375, 51.4375, 56.03125, 60.625, 65.21875, 69.8125, 74.40625, 79.0, 83.59375, 88.1875, 92.78125, 97.375, 101.96875, 106.5625, 111.15625, 115.75, 120.34375, 124.9375, 129.53125, 134.125, 138.71875, 143.3125, 147.90625, 152.5]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 6.0, 4.0, 3.0, 5.0, 11.0, 14.0, 12.0, 30.0, 29.0, 60.0, 59.0, 92.0, 160.0, 207.0, 395.0, 549.0, 838.0, 1405.0, 2377.0, 4098.0, 7777.0, 16606.0, 43606.0, 208462.0, 3549955.0, 271672.0, 48817.0, 18196.0, 8287.0, 4175.0, 2378.0, 1505.0, 840.0, 544.0, 387.0, 219.0, 148.0, 98.0, 90.0, 51.0, 32.0, 29.0, 22.0, 10.0, 11.0, 7.0, 7.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-160.75, -155.48828125, -150.2265625, -144.96484375, -139.703125, -134.44140625, -129.1796875, -123.91796875, -118.65625, -113.39453125, -108.1328125, -102.87109375, -97.609375, -92.34765625, -87.0859375, -81.82421875, -76.5625, -71.30078125, -66.0390625, -60.77734375, -55.515625, -50.25390625, -44.9921875, -39.73046875, -34.46875, -29.20703125, -23.9453125, -18.68359375, -13.421875, -8.16015625, -2.8984375, 2.36328125, 7.625, 12.88671875, 18.1484375, 23.41015625, 28.671875, 33.93359375, 39.1953125, 44.45703125, 49.71875, 54.98046875, 60.2421875, 65.50390625, 70.765625, 76.02734375, 81.2890625, 86.55078125, 91.8125, 97.07421875, 102.3359375, 107.59765625, 112.859375, 118.12109375, 123.3828125, 128.64453125, 133.90625, 139.16796875, 144.4296875, 149.69140625, 154.953125, 160.21484375, 165.4765625, 170.73828125, 176.0]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 5.0, 19.0, 70.0, 288.0, 455.0, 119.0, 39.0, 11.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-589.4744262695312, -545.1289672851562, -500.783447265625, -456.43798828125, -412.0924987792969, -367.74700927734375, -323.40155029296875, -279.0560607910156, -234.7105712890625, -190.36508178710938, -146.0196075439453, -101.67413330078125, -57.328643798828125, -12.983154296875, 31.3623046875, 75.70779418945312, 120.05328369140625, 164.39877319335938, 208.74424743652344, 253.0897216796875, 297.4352111816406, 341.78070068359375, 386.12615966796875, 430.4716491699219, 474.817138671875, 519.16259765625, 563.5081176757812, 607.8535766601562, 652.1990966796875, 696.5445556640625, 740.8900146484375, 785.2354736328125, 829.5810546875, 873.926513671875, 918.2720336914062, 962.6174926757812, 1006.9630126953125, 1051.3084716796875, 1095.6539306640625, 1139.9993896484375, 1184.344970703125, 1228.6904296875, 1273.035888671875, 1317.38134765625, 1361.7269287109375, 1406.0723876953125, 1450.4178466796875, 1494.7633056640625, 1539.1087646484375, 1583.4542236328125, 1627.7996826171875, 1672.145263671875, 1716.49072265625, 1760.836181640625, 1805.181640625, 1849.527099609375, 1893.87255859375, 1938.218017578125, 1982.5634765625, 2026.908935546875, 2071.25439453125, 2115.60009765625, 2159.9453125, 2204.291015625, 2248.636474609375]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 5.0, 3.0, 9.0, 12.0, 6.0, 8.0, 13.0, 10.0, 12.0, 11.0, 26.0, 30.0, 33.0, 29.0, 44.0, 30.0, 39.0, 29.0, 55.0, 36.0, 32.0, 58.0, 52.0, 50.0, 40.0, 42.0, 35.0, 35.0, 31.0, 31.0, 29.0, 22.0, 19.0, 19.0, 23.0, 13.0, 4.0, 8.0, 8.0, 8.0, 3.0, 3.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-293.8227233886719, -283.9903259277344, -274.1579284667969, -264.3255310058594, -254.49313354492188, -244.66073608398438, -234.82835388183594, -224.99595642089844, -215.16355895996094, -205.33116149902344, -195.49876403808594, -185.66636657714844, -175.833984375, -166.0015869140625, -156.169189453125, -146.3367919921875, -136.50439453125, -126.6719970703125, -116.839599609375, -107.00720977783203, -97.17481231689453, -87.34241485595703, -77.51002502441406, -67.67762756347656, -57.84523010253906, -48.01283264160156, -38.18043899536133, -28.34804344177246, -18.515647888183594, -8.683250427246094, 1.1491432189941406, 10.981536865234375, 20.81396484375, 30.646360397338867, 40.478755950927734, 50.31114959716797, 60.14354705810547, 69.97594451904297, 79.80833435058594, 89.64073181152344, 99.47312927246094, 109.30552673339844, 119.13792419433594, 128.97032165527344, 138.80270385742188, 148.63510131835938, 158.46749877929688, 168.29989624023438, 178.13229370117188, 187.96469116210938, 197.79708862304688, 207.62948608398438, 217.46188354492188, 227.29428100585938, 237.1266632080078, 246.9590606689453, 256.79144287109375, 266.62384033203125, 276.45623779296875, 286.28863525390625, 296.12103271484375, 305.95343017578125, 315.78582763671875, 325.61822509765625, 335.45062255859375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 3.0, 8.0, 11.0, 12.0, 5.0, 12.0, 20.0, 8.0, 23.0, 23.0, 22.0, 36.0, 29.0, 37.0, 34.0, 31.0, 46.0, 43.0, 32.0, 37.0, 32.0, 46.0, 44.0, 46.0, 42.0, 54.0, 31.0, 32.0, 31.0, 25.0, 28.0, 23.0, 20.0, 20.0, 7.0, 8.0, 8.0, 11.0, 6.0, 7.0, 2.0, 3.0, 4.0, 2.0, 6.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.75, -51.794921875, -49.83984375, -47.884765625, -45.9296875, -43.974609375, -42.01953125, -40.064453125, -38.109375, -36.154296875, -34.19921875, -32.244140625, -30.2890625, -28.333984375, -26.37890625, -24.423828125, -22.46875, -20.513671875, -18.55859375, -16.603515625, -14.6484375, -12.693359375, -10.73828125, -8.783203125, -6.828125, -4.873046875, -2.91796875, -0.962890625, 0.9921875, 2.947265625, 4.90234375, 6.857421875, 8.8125, 10.767578125, 12.72265625, 14.677734375, 16.6328125, 18.587890625, 20.54296875, 22.498046875, 24.453125, 26.408203125, 28.36328125, 30.318359375, 32.2734375, 34.228515625, 36.18359375, 38.138671875, 40.09375, 42.048828125, 44.00390625, 45.958984375, 47.9140625, 49.869140625, 51.82421875, 53.779296875, 55.734375, 57.689453125, 59.64453125, 61.599609375, 63.5546875, 65.509765625, 67.46484375, 69.419921875, 71.375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 6.0, 15.0, 14.0, 27.0, 30.0, 49.0, 86.0, 130.0, 175.0, 269.0, 402.0, 583.0, 831.0, 1363.0, 2017.0, 2950.0, 4651.0, 6925.0, 10654.0, 16650.0, 26415.0, 43911.0, 76665.0, 149779.0, 275252.0, 193786.0, 95413.0, 52773.0, 31151.0, 19537.0, 12308.0, 7987.0, 5310.0, 3597.0, 2266.0, 1525.0, 1059.0, 674.0, 423.0, 294.0, 198.0, 141.0, 95.0, 62.0, 36.0, 29.0, 18.0, 16.0, 8.0, 6.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.751953125, -2.659210205078125, -2.56646728515625, -2.473724365234375, -2.3809814453125, -2.288238525390625, -2.19549560546875, -2.102752685546875, -2.010009765625, -1.917266845703125, -1.82452392578125, -1.731781005859375, -1.6390380859375, -1.546295166015625, -1.45355224609375, -1.360809326171875, -1.26806640625, -1.175323486328125, -1.08258056640625, -0.989837646484375, -0.8970947265625, -0.804351806640625, -0.71160888671875, -0.618865966796875, -0.526123046875, -0.433380126953125, -0.34063720703125, -0.247894287109375, -0.1551513671875, -0.062408447265625, 0.03033447265625, 0.123077392578125, 0.2158203125, 0.308563232421875, 0.40130615234375, 0.494049072265625, 0.5867919921875, 0.679534912109375, 0.77227783203125, 0.865020751953125, 0.957763671875, 1.050506591796875, 1.14324951171875, 1.235992431640625, 1.3287353515625, 1.421478271484375, 1.51422119140625, 1.606964111328125, 1.69970703125, 1.792449951171875, 1.88519287109375, 1.977935791015625, 2.0706787109375, 2.163421630859375, 2.25616455078125, 2.348907470703125, 2.441650390625, 2.534393310546875, 2.62713623046875, 2.719879150390625, 2.8126220703125, 2.905364990234375, 2.99810791015625, 3.090850830078125, 3.18359375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 4.0, 7.0, 5.0, 2.0, 1.0, 7.0, 8.0, 15.0, 12.0, 11.0, 17.0, 15.0, 22.0, 23.0, 30.0, 13.0, 33.0, 32.0, 37.0, 34.0, 49.0, 42.0, 33.0, 40.0, 1059.0, 39.0, 41.0, 31.0, 40.0, 24.0, 28.0, 34.0, 31.0, 35.0, 29.0, 19.0, 19.0, 20.0, 15.0, 14.0, 10.0, 14.0, 11.0, 10.0, 5.0, 2.0, 3.0, 7.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.21875, -39.0322265625, -37.845703125, -36.6591796875, -35.47265625, -34.2861328125, -33.099609375, -31.9130859375, -30.7265625, -29.5400390625, -28.353515625, -27.1669921875, -25.98046875, -24.7939453125, -23.607421875, -22.4208984375, -21.234375, -20.0478515625, -18.861328125, -17.6748046875, -16.48828125, -15.3017578125, -14.115234375, -12.9287109375, -11.7421875, -10.5556640625, -9.369140625, -8.1826171875, -6.99609375, -5.8095703125, -4.623046875, -3.4365234375, -2.25, -1.0634765625, 0.123046875, 1.3095703125, 2.49609375, 3.6826171875, 4.869140625, 6.0556640625, 7.2421875, 8.4287109375, 9.615234375, 10.8017578125, 11.98828125, 13.1748046875, 14.361328125, 15.5478515625, 16.734375, 17.9208984375, 19.107421875, 20.2939453125, 21.48046875, 22.6669921875, 23.853515625, 25.0400390625, 26.2265625, 27.4130859375, 28.599609375, 29.7861328125, 30.97265625, 32.1591796875, 33.345703125, 34.5322265625, 35.71875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 5.0, 9.0, 12.0, 18.0, 19.0, 20.0, 32.0, 58.0, 80.0, 137.0, 158.0, 277.0, 374.0, 669.0, 888.0, 1506.0, 2284.0, 3476.0, 5767.0, 9314.0, 14912.0, 25000.0, 43200.0, 78288.0, 161364.0, 1359048.0, 184738.0, 87565.0, 47237.0, 27286.0, 16231.0, 10025.0, 6244.0, 3892.0, 2502.0, 1592.0, 966.0, 675.0, 444.0, 273.0, 185.0, 122.0, 103.0, 55.0, 24.0, 20.0, 17.0, 11.0, 7.0, 7.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.482421875, -3.3817138671875, -3.281005859375, -3.1802978515625, -3.07958984375, -2.9788818359375, -2.878173828125, -2.7774658203125, -2.6767578125, -2.5760498046875, -2.475341796875, -2.3746337890625, -2.27392578125, -2.1732177734375, -2.072509765625, -1.9718017578125, -1.87109375, -1.7703857421875, -1.669677734375, -1.5689697265625, -1.46826171875, -1.3675537109375, -1.266845703125, -1.1661376953125, -1.0654296875, -0.9647216796875, -0.864013671875, -0.7633056640625, -0.66259765625, -0.5618896484375, -0.461181640625, -0.3604736328125, -0.259765625, -0.1590576171875, -0.058349609375, 0.0423583984375, 0.14306640625, 0.2437744140625, 0.344482421875, 0.4451904296875, 0.5458984375, 0.6466064453125, 0.747314453125, 0.8480224609375, 0.94873046875, 1.0494384765625, 1.150146484375, 1.2508544921875, 1.3515625, 1.4522705078125, 1.552978515625, 1.6536865234375, 1.75439453125, 1.8551025390625, 1.955810546875, 2.0565185546875, 2.1572265625, 2.2579345703125, 2.358642578125, 2.4593505859375, 2.56005859375, 2.6607666015625, 2.761474609375, 2.8621826171875, 2.962890625]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 5.0, 6.0, 4.0, 2.0, 2.0, 9.0, 7.0, 14.0, 14.0, 11.0, 18.0, 23.0, 38.0, 46.0, 68.0, 70.0, 91.0, 82.0, 78.0, 73.0, 83.0, 64.0, 55.0, 29.0, 20.0, 16.0, 16.0, 17.0, 9.0, 6.0, 3.0, 4.0, 8.0, 4.0, 2.0, 4.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.03131103515625, -0.030415058135986328, -0.029519081115722656, -0.028623104095458984, -0.027727127075195312, -0.02683115005493164, -0.02593517303466797, -0.025039196014404297, -0.024143218994140625, -0.023247241973876953, -0.02235126495361328, -0.02145528793334961, -0.020559310913085938, -0.019663333892822266, -0.018767356872558594, -0.017871379852294922, -0.01697540283203125, -0.016079425811767578, -0.015183448791503906, -0.014287471771240234, -0.013391494750976562, -0.01249551773071289, -0.011599540710449219, -0.010703563690185547, -0.009807586669921875, -0.008911609649658203, -0.008015632629394531, -0.007119655609130859, -0.0062236785888671875, -0.005327701568603516, -0.004431724548339844, -0.003535747528076172, -0.0026397705078125, -0.0017437934875488281, -0.0008478164672851562, 4.8160552978515625e-05, 0.0009441375732421875, 0.0018401145935058594, 0.0027360916137695312, 0.003632068634033203, 0.004528045654296875, 0.005424022674560547, 0.006319999694824219, 0.007215976715087891, 0.008111953735351562, 0.009007930755615234, 0.009903907775878906, 0.010799884796142578, 0.01169586181640625, 0.012591838836669922, 0.013487815856933594, 0.014383792877197266, 0.015279769897460938, 0.01617574691772461, 0.01707172393798828, 0.017967700958251953, 0.018863677978515625, 0.019759654998779297, 0.02065563201904297, 0.02155160903930664, 0.022447586059570312, 0.023343563079833984, 0.024239540100097656, 0.025135517120361328, 0.026031494140625]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 5.0, 3.0, 6.0, 1.0, 4.0, 6.0, 13.0, 11.0, 8.0, 19.0, 18.0, 31.0, 27.0, 47.0, 48.0, 73.0, 141.0, 230.0, 470.0, 3108.0, 346567.0, 692346.0, 4140.0, 534.0, 226.0, 142.0, 91.0, 38.0, 44.0, 27.0, 29.0, 31.0, 10.0, 16.0, 15.0, 7.0, 5.0, 9.0, 7.0, 4.0, 4.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.47021484375, -0.4540252685546875, -0.437835693359375, -0.4216461181640625, -0.40545654296875, -0.3892669677734375, -0.373077392578125, -0.3568878173828125, -0.3406982421875, -0.3245086669921875, -0.308319091796875, -0.2921295166015625, -0.27593994140625, -0.2597503662109375, -0.243560791015625, -0.2273712158203125, -0.211181640625, -0.1949920654296875, -0.178802490234375, -0.1626129150390625, -0.14642333984375, -0.1302337646484375, -0.114044189453125, -0.0978546142578125, -0.0816650390625, -0.0654754638671875, -0.049285888671875, -0.0330963134765625, -0.01690673828125, -0.0007171630859375, 0.015472412109375, 0.0316619873046875, 0.0478515625, 0.0640411376953125, 0.080230712890625, 0.0964202880859375, 0.11260986328125, 0.1287994384765625, 0.144989013671875, 0.1611785888671875, 0.1773681640625, 0.1935577392578125, 0.209747314453125, 0.2259368896484375, 0.24212646484375, 0.2583160400390625, 0.274505615234375, 0.2906951904296875, 0.306884765625, 0.3230743408203125, 0.339263916015625, 0.3554534912109375, 0.37164306640625, 0.3878326416015625, 0.404022216796875, 0.4202117919921875, 0.4364013671875, 0.4525909423828125, 0.468780517578125, 0.4849700927734375, 0.50115966796875, 0.5173492431640625, 0.533538818359375, 0.5497283935546875, 0.56591796875]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 21.0, 189.0, 626.0, 154.0, 21.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.017965836450457573, -0.014657502993941307, -0.011349169537425041, -0.0080408351495862, -0.004732501693069935, -0.0014241673052310944, 0.0018841661512851715, 0.005192499607801437, 0.008500833064317703, 0.011809166520833969, 0.015117499977350235, 0.0184258334338665, 0.021734166890382767, 0.025042502209544182, 0.028350835666060448, 0.03165917098522186, 0.03496750444173813, 0.038275837898254395, 0.04158417135477066, 0.044892504811286926, 0.04820083826780319, 0.05150917172431946, 0.054817505180835724, 0.05812583863735199, 0.061434172093868256, 0.06474250555038452, 0.06805083900690079, 0.07135917246341705, 0.07466750591993332, 0.07797583937644958, 0.08128417283296585, 0.08459250628948212, 0.08790083974599838, 0.09120917320251465, 0.09451750665903091, 0.09782584011554718, 0.10113417357206345, 0.10444250702857971, 0.10775084048509598, 0.11105917394161224, 0.11436750739812851, 0.11767584085464478, 0.12098417431116104, 0.12429250776767731, 0.12760084867477417, 0.13090917468070984, 0.1342175155878067, 0.13752584159374237, 0.14083418250083923, 0.1441425234079361, 0.14745084941387177, 0.15075919032096863, 0.1540675163269043, 0.15737585723400116, 0.16068418323993683, 0.1639925241470337, 0.16730085015296936, 0.17060919106006622, 0.1739175170660019, 0.17722585797309875, 0.18053418397903442, 0.1838425248861313, 0.18715085089206696, 0.19045919179916382, 0.1937675178050995]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 9.0, 5.0, 12.0, 9.0, 20.0, 13.0, 14.0, 21.0, 31.0, 29.0, 31.0, 34.0, 28.0, 24.0, 37.0, 46.0, 35.0, 35.0, 39.0, 65.0, 44.0, 50.0, 34.0, 35.0, 29.0, 29.0, 38.0, 24.0, 28.0, 28.0, 30.0, 17.0, 12.0, 14.0, 9.0, 13.0, 12.0, 4.0, 4.0, 6.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.01277703046798706, -0.012350988574326038, -0.011924946680665016, -0.011498904787003994, -0.011072862893342972, -0.01064682099968195, -0.010220779106020927, -0.009794737212359905, -0.009368695318698883, -0.008942653425037861, -0.008516611531376839, -0.008090569637715816, -0.007664527744054794, -0.007238485850393772, -0.00681244395673275, -0.006386402063071728, -0.005960360169410706, -0.005534318275749683, -0.005108276382088661, -0.004682234488427639, -0.004256192594766617, -0.0038301507011055946, -0.0034041088074445724, -0.0029780669137835503, -0.002552025020122528, -0.002125983126461506, -0.0016999412328004837, -0.0012738993391394615, -0.0008478574454784393, -0.00042181555181741714, 4.2263418436050415e-06, 0.00043026823550462723, 0.0008563101291656494, 0.0012823520228266716, 0.0017083939164876938, 0.002134435810148716, 0.002560477703809738, 0.0029865195974707603, 0.0034125614911317825, 0.0038386033847928047, 0.004264645278453827, 0.004690687172114849, 0.005116729065775871, 0.0055427709594368935, 0.005968812853097916, 0.006394854746758938, 0.00682089664041996, 0.007246938534080982, 0.007672980427742004, 0.008099022321403027, 0.008525064215064049, 0.008951106108725071, 0.009377148002386093, 0.009803189896047115, 0.010229231789708138, 0.01065527368336916, 0.011081315577030182, 0.011507357470691204, 0.011933399364352226, 0.012359441258013248, 0.01278548315167427, 0.013211525045335293, 0.013637566938996315, 0.014063608832657337, 0.01448965072631836]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 3.0, 8.0, 11.0, 12.0, 5.0, 12.0, 20.0, 8.0, 23.0, 23.0, 22.0, 36.0, 29.0, 37.0, 34.0, 31.0, 46.0, 43.0, 32.0, 37.0, 32.0, 46.0, 44.0, 46.0, 42.0, 54.0, 31.0, 32.0, 31.0, 25.0, 28.0, 23.0, 20.0, 20.0, 7.0, 8.0, 8.0, 11.0, 6.0, 7.0, 2.0, 3.0, 4.0, 2.0, 6.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.75, -51.794921875, -49.83984375, -47.884765625, -45.9296875, -43.974609375, -42.01953125, -40.064453125, -38.109375, -36.154296875, -34.19921875, -32.244140625, -30.2890625, -28.333984375, -26.37890625, -24.423828125, -22.46875, -20.513671875, -18.55859375, -16.603515625, -14.6484375, -12.693359375, -10.73828125, -8.783203125, -6.828125, -4.873046875, -2.91796875, -0.962890625, 0.9921875, 2.947265625, 4.90234375, 6.857421875, 8.8125, 10.767578125, 12.72265625, 14.677734375, 16.6328125, 18.587890625, 20.54296875, 22.498046875, 24.453125, 26.408203125, 28.36328125, 30.318359375, 32.2734375, 34.228515625, 36.18359375, 38.138671875, 40.09375, 42.048828125, 44.00390625, 45.958984375, 47.9140625, 49.869140625, 51.82421875, 53.779296875, 55.734375, 57.689453125, 59.64453125, 61.599609375, 63.5546875, 65.509765625, 67.46484375, 69.419921875, 71.375]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 2.0, 10.0, 3.0, 5.0, 8.0, 21.0, 26.0, 28.0, 49.0, 52.0, 85.0, 123.0, 175.0, 272.0, 359.0, 537.0, 731.0, 1104.0, 1638.0, 2467.0, 4163.0, 7046.0, 13662.0, 32159.0, 115560.0, 646710.0, 149360.0, 36961.0, 15003.0, 7656.0, 4308.0, 2697.0, 1784.0, 1174.0, 799.0, 583.0, 360.0, 251.0, 194.0, 110.0, 100.0, 59.0, 43.0, 40.0, 23.0, 22.0, 10.0, 11.0, 7.0, 4.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0], "bins": [-93.125, -90.265625, -87.40625, -84.546875, -81.6875, -78.828125, -75.96875, -73.109375, -70.25, -67.390625, -64.53125, -61.671875, -58.8125, -55.953125, -53.09375, -50.234375, -47.375, -44.515625, -41.65625, -38.796875, -35.9375, -33.078125, -30.21875, -27.359375, -24.5, -21.640625, -18.78125, -15.921875, -13.0625, -10.203125, -7.34375, -4.484375, -1.625, 1.234375, 4.09375, 6.953125, 9.8125, 12.671875, 15.53125, 18.390625, 21.25, 24.109375, 26.96875, 29.828125, 32.6875, 35.546875, 38.40625, 41.265625, 44.125, 46.984375, 49.84375, 52.703125, 55.5625, 58.421875, 61.28125, 64.140625, 67.0, 69.859375, 72.71875, 75.578125, 78.4375, 81.296875, 84.15625, 87.015625, 89.875]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 0.0, 4.0, 8.0, 2.0, 3.0, 6.0, 8.0, 14.0, 18.0, 15.0, 19.0, 24.0, 16.0, 24.0, 27.0, 44.0, 44.0, 48.0, 53.0, 71.0, 130.0, 267.0, 1539.0, 176.0, 98.0, 57.0, 66.0, 27.0, 37.0, 47.0, 29.0, 25.0, 25.0, 13.0, 17.0, 16.0, 6.0, 11.0, 3.0, 6.0, 5.0, 5.0, 2.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-131.75, -127.08984375, -122.4296875, -117.76953125, -113.109375, -108.44921875, -103.7890625, -99.12890625, -94.46875, -89.80859375, -85.1484375, -80.48828125, -75.828125, -71.16796875, -66.5078125, -61.84765625, -57.1875, -52.52734375, -47.8671875, -43.20703125, -38.546875, -33.88671875, -29.2265625, -24.56640625, -19.90625, -15.24609375, -10.5859375, -5.92578125, -1.265625, 3.39453125, 8.0546875, 12.71484375, 17.375, 22.03515625, 26.6953125, 31.35546875, 36.015625, 40.67578125, 45.3359375, 49.99609375, 54.65625, 59.31640625, 63.9765625, 68.63671875, 73.296875, 77.95703125, 82.6171875, 87.27734375, 91.9375, 96.59765625, 101.2578125, 105.91796875, 110.578125, 115.23828125, 119.8984375, 124.55859375, 129.21875, 133.87890625, 138.5390625, 143.19921875, 147.859375, 152.51953125, 157.1796875, 161.83984375, 166.5]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 5.0, 6.0, 8.0, 8.0, 13.0, 22.0, 26.0, 36.0, 47.0, 71.0, 106.0, 183.0, 355.0, 1018.0, 4544.0, 41387.0, 2997114.0, 91023.0, 7216.0, 1421.0, 458.0, 214.0, 115.0, 81.0, 63.0, 64.0, 37.0, 14.0, 12.0, 11.0, 8.0, 8.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-337.75, -328.26953125, -318.7890625, -309.30859375, -299.828125, -290.34765625, -280.8671875, -271.38671875, -261.90625, -252.42578125, -242.9453125, -233.46484375, -223.984375, -214.50390625, -205.0234375, -195.54296875, -186.0625, -176.58203125, -167.1015625, -157.62109375, -148.140625, -138.66015625, -129.1796875, -119.69921875, -110.21875, -100.73828125, -91.2578125, -81.77734375, -72.296875, -62.81640625, -53.3359375, -43.85546875, -34.375, -24.89453125, -15.4140625, -5.93359375, 3.546875, 13.02734375, 22.5078125, 31.98828125, 41.46875, 50.94921875, 60.4296875, 69.91015625, 79.390625, 88.87109375, 98.3515625, 107.83203125, 117.3125, 126.79296875, 136.2734375, 145.75390625, 155.234375, 164.71484375, 174.1953125, 183.67578125, 193.15625, 202.63671875, 212.1171875, 221.59765625, 231.078125, 240.55859375, 250.0390625, 259.51953125, 269.0]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 18.0, 160.0, 573.0, 236.0, 26.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1370.4422607421875, -1342.791259765625, -1315.140380859375, -1287.4893798828125, -1259.83837890625, -1232.1875, -1204.5364990234375, -1176.885498046875, -1149.234619140625, -1121.5836181640625, -1093.9327392578125, -1066.28173828125, -1038.6307373046875, -1010.9798583984375, -983.328857421875, -955.6779174804688, -928.0269165039062, -900.3759765625, -872.7249755859375, -845.0740356445312, -817.423095703125, -789.7720947265625, -762.1211547851562, -734.47021484375, -706.8192138671875, -679.1682739257812, -651.5172729492188, -623.8663330078125, -596.2153930664062, -568.564453125, -540.9134521484375, -513.2625122070312, -485.611572265625, -457.9606018066406, -430.3096618652344, -402.65869140625, -375.00775146484375, -347.3567810058594, -319.705810546875, -292.05487060546875, -264.4039001464844, -236.75294494628906, -209.10198974609375, -181.45101928710938, -153.80006408691406, -126.14910888671875, -98.49813842773438, -70.84718322753906, -43.19622802734375, -15.545269012451172, 12.105690002441406, 39.75665283203125, 67.40760803222656, 95.05856323242188, 122.70953369140625, 150.36048889160156, 178.01144409179688, 205.6623992919922, 233.3133544921875, 260.9643249511719, 288.61529541015625, 316.2662353515625, 343.9172058105469, 371.56817626953125, 399.2191162109375]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 1.0, 1.0, 6.0, 6.0, 4.0, 17.0, 13.0, 13.0, 11.0, 20.0, 19.0, 29.0, 19.0, 27.0, 32.0, 45.0, 23.0, 47.0, 47.0, 42.0, 40.0, 45.0, 44.0, 42.0, 52.0, 39.0, 29.0, 31.0, 41.0, 22.0, 23.0, 31.0, 23.0, 16.0, 20.0, 17.0, 13.0, 18.0, 5.0, 13.0, 4.0, 7.0, 3.0, 4.0, 2.0, 1.0, 3.0, 3.0], "bins": [-360.70159912109375, -351.1728515625, -341.64410400390625, -332.1153259277344, -322.5865783691406, -313.0578308105469, -303.5290832519531, -294.00030517578125, -284.4715576171875, -274.94281005859375, -265.4140625, -255.8852996826172, -246.35653686523438, -236.82778930664062, -227.29904174804688, -217.77027893066406, -208.2415313720703, -198.71278381347656, -189.18402099609375, -179.6552734375, -170.1265106201172, -160.59776306152344, -151.06900024414062, -141.54025268554688, -132.01150512695312, -122.48274993896484, -112.95399475097656, -103.42524719238281, -93.896484375, -84.36773681640625, -74.83898162841797, -65.31022644042969, -55.781463623046875, -46.252708435058594, -36.72395324707031, -27.195201873779297, -17.666446685791016, -8.137691497802734, 1.3910598754882812, 10.919815063476562, 20.448570251464844, 29.977325439453125, 39.506080627441406, 49.03483200073242, 58.5635871887207, 68.09234619140625, 77.62109375, 87.14984893798828, 96.67860412597656, 106.20735931396484, 115.73611450195312, 125.26486206054688, 134.7936248779297, 144.32237243652344, 153.85113525390625, 163.3798828125, 172.90863037109375, 182.4373779296875, 191.9661407470703, 201.49488830566406, 211.02365112304688, 220.55239868164062, 230.08114624023438, 239.6099090576172, 249.138671875]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 7.0, 11.0, 4.0, 11.0, 10.0, 14.0, 15.0, 21.0, 17.0, 35.0, 22.0, 35.0, 34.0, 27.0, 30.0, 43.0, 29.0, 50.0, 39.0, 43.0, 34.0, 39.0, 32.0, 47.0, 46.0, 25.0, 31.0, 29.0, 37.0, 22.0, 23.0, 22.0, 26.0, 14.0, 15.0, 16.0, 12.0, 7.0, 5.0, 3.0, 2.0, 7.0, 3.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-56.0, -54.134765625, -52.26953125, -50.404296875, -48.5390625, -46.673828125, -44.80859375, -42.943359375, -41.078125, -39.212890625, -37.34765625, -35.482421875, -33.6171875, -31.751953125, -29.88671875, -28.021484375, -26.15625, -24.291015625, -22.42578125, -20.560546875, -18.6953125, -16.830078125, -14.96484375, -13.099609375, -11.234375, -9.369140625, -7.50390625, -5.638671875, -3.7734375, -1.908203125, -0.04296875, 1.822265625, 3.6875, 5.552734375, 7.41796875, 9.283203125, 11.1484375, 13.013671875, 14.87890625, 16.744140625, 18.609375, 20.474609375, 22.33984375, 24.205078125, 26.0703125, 27.935546875, 29.80078125, 31.666015625, 33.53125, 35.396484375, 37.26171875, 39.126953125, 40.9921875, 42.857421875, 44.72265625, 46.587890625, 48.453125, 50.318359375, 52.18359375, 54.048828125, 55.9140625, 57.779296875, 59.64453125, 61.509765625, 63.375]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 3.0, 7.0, 2.0, 7.0, 5.0, 7.0, 15.0, 22.0, 25.0, 42.0, 40.0, 39.0, 63.0, 100.0, 135.0, 207.0, 436.0, 987.0, 3107.0, 23365.0, 4125795.0, 33869.0, 3712.0, 1068.0, 439.0, 230.0, 144.0, 111.0, 67.0, 60.0, 34.0, 35.0, 25.0, 15.0, 19.0, 12.0, 8.0, 3.0, 5.0, 7.0, 5.0, 4.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-655.5, -636.53125, -617.5625, -598.59375, -579.625, -560.65625, -541.6875, -522.71875, -503.75, -484.78125, -465.8125, -446.84375, -427.875, -408.90625, -389.9375, -370.96875, -352.0, -333.03125, -314.0625, -295.09375, -276.125, -257.15625, -238.1875, -219.21875, -200.25, -181.28125, -162.3125, -143.34375, -124.375, -105.40625, -86.4375, -67.46875, -48.5, -29.53125, -10.5625, 8.40625, 27.375, 46.34375, 65.3125, 84.28125, 103.25, 122.21875, 141.1875, 160.15625, 179.125, 198.09375, 217.0625, 236.03125, 255.0, 273.96875, 292.9375, 311.90625, 330.875, 349.84375, 368.8125, 387.78125, 406.75, 425.71875, 444.6875, 463.65625, 482.625, 501.59375, 520.5625, 539.53125, 558.5]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 1.0, 2.0, 3.0, 6.0, 5.0, 3.0, 9.0, 10.0, 10.0, 14.0, 21.0, 41.0, 50.0, 79.0, 118.0, 220.0, 507.0, 1006.0, 877.0, 412.0, 251.0, 113.0, 92.0, 42.0, 39.0, 23.0, 20.0, 20.0, 20.0, 11.0, 3.0, 11.0, 6.0, 4.0, 3.0, 5.0, 6.0, 3.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-102.0625, -98.8642578125, -95.666015625, -92.4677734375, -89.26953125, -86.0712890625, -82.873046875, -79.6748046875, -76.4765625, -73.2783203125, -70.080078125, -66.8818359375, -63.68359375, -60.4853515625, -57.287109375, -54.0888671875, -50.890625, -47.6923828125, -44.494140625, -41.2958984375, -38.09765625, -34.8994140625, -31.701171875, -28.5029296875, -25.3046875, -22.1064453125, -18.908203125, -15.7099609375, -12.51171875, -9.3134765625, -6.115234375, -2.9169921875, 0.28125, 3.4794921875, 6.677734375, 9.8759765625, 13.07421875, 16.2724609375, 19.470703125, 22.6689453125, 25.8671875, 29.0654296875, 32.263671875, 35.4619140625, 38.66015625, 41.8583984375, 45.056640625, 48.2548828125, 51.453125, 54.6513671875, 57.849609375, 61.0478515625, 64.24609375, 67.4443359375, 70.642578125, 73.8408203125, 77.0390625, 80.2373046875, 83.435546875, 86.6337890625, 89.83203125, 93.0302734375, 96.228515625, 99.4267578125, 102.625]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 3.0, 4.0, 7.0, 6.0, 8.0, 14.0, 11.0, 22.0, 30.0, 52.0, 113.0, 289.0, 1081.0, 4420.0, 30574.0, 3866165.0, 275071.0, 12984.0, 2451.0, 610.0, 175.0, 71.0, 36.0, 25.0, 19.0, 13.0, 11.0, 7.0, 10.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-492.25, -478.10546875, -463.9609375, -449.81640625, -435.671875, -421.52734375, -407.3828125, -393.23828125, -379.09375, -364.94921875, -350.8046875, -336.66015625, -322.515625, -308.37109375, -294.2265625, -280.08203125, -265.9375, -251.79296875, -237.6484375, -223.50390625, -209.359375, -195.21484375, -181.0703125, -166.92578125, -152.78125, -138.63671875, -124.4921875, -110.34765625, -96.203125, -82.05859375, -67.9140625, -53.76953125, -39.625, -25.48046875, -11.3359375, 2.80859375, 16.953125, 31.09765625, 45.2421875, 59.38671875, 73.53125, 87.67578125, 101.8203125, 115.96484375, 130.109375, 144.25390625, 158.3984375, 172.54296875, 186.6875, 200.83203125, 214.9765625, 229.12109375, 243.265625, 257.41015625, 271.5546875, 285.69921875, 299.84375, 313.98828125, 328.1328125, 342.27734375, 356.421875, 370.56640625, 384.7109375, 398.85546875, 413.0]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 14.0, 70.0, 485.0, 379.0, 60.0, 8.0, 2.0], "bins": [-3549.258056640625, -3489.983642578125, -3430.709228515625, -3371.434814453125, -3312.16015625, -3252.8857421875, -3193.611328125, -3134.3369140625, -3075.0625, -3015.7880859375, -2956.513671875, -2897.2392578125, -2837.964599609375, -2778.690185546875, -2719.415771484375, -2660.141357421875, -2600.866943359375, -2541.592529296875, -2482.318115234375, -2423.043701171875, -2363.76904296875, -2304.49462890625, -2245.22021484375, -2185.94580078125, -2126.67138671875, -2067.39697265625, -2008.1224365234375, -1948.8480224609375, -1889.5736083984375, -1830.299072265625, -1771.024658203125, -1711.750244140625, -1652.4757080078125, -1593.2012939453125, -1533.9267578125, -1474.65234375, -1415.3779296875, -1356.103515625, -1296.8289794921875, -1237.5545654296875, -1178.280029296875, -1119.005615234375, -1059.7310791015625, -1000.4566650390625, -941.1822509765625, -881.9077758789062, -822.63330078125, -763.35888671875, -704.08447265625, -644.8099975585938, -585.5355834960938, -526.2611083984375, -466.9866943359375, -407.71221923828125, -348.4377746582031, -289.163330078125, -229.88890075683594, -170.6144561767578, -111.34000396728516, -52.0655517578125, 7.208892822265625, 66.48335266113281, 125.75779724121094, 185.03224182128906, 244.3066864013672]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 6.0, 4.0, 3.0, 10.0, 11.0, 11.0, 9.0, 9.0, 20.0, 19.0, 22.0, 23.0, 18.0, 18.0, 34.0, 33.0, 26.0, 40.0, 41.0, 44.0, 44.0, 49.0, 53.0, 41.0, 56.0, 37.0, 40.0, 39.0, 36.0, 43.0, 21.0, 31.0, 17.0, 27.0, 11.0, 4.0, 9.0, 10.0, 4.0, 10.0, 6.0, 4.0, 5.0, 6.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-333.1680908203125, -323.7839660644531, -314.39984130859375, -305.0157470703125, -295.6316223144531, -286.24749755859375, -276.8633728027344, -267.479248046875, -258.09515380859375, -248.71102905273438, -239.32691955566406, -229.9427947998047, -220.55868530273438, -211.174560546875, -201.79043579101562, -192.4063262939453, -183.02220153808594, -173.63807678222656, -164.25396728515625, -154.86984252929688, -145.48573303222656, -136.1016082763672, -126.71749114990234, -117.3333740234375, -107.94925689697266, -98.56513977050781, -89.18102264404297, -79.79690551757812, -70.41278076171875, -61.02866744995117, -51.64454650878906, -42.26042938232422, -32.876312255859375, -23.49219512939453, -14.108076095581055, -4.723957061767578, 4.660160064697266, 14.04427719116211, 23.42839813232422, 32.81251525878906, 42.196632385253906, 51.58074951171875, 60.964866638183594, 70.34898376464844, 79.73310852050781, 89.11721801757812, 98.5013427734375, 107.88545989990234, 117.26957702636719, 126.65369415283203, 136.03781127929688, 145.42193603515625, 154.80604553222656, 164.19017028808594, 173.57427978515625, 182.95840454101562, 192.342529296875, 201.72665405273438, 211.1107635498047, 220.49488830566406, 229.87899780273438, 239.26312255859375, 248.64724731445312, 258.0313720703125, 267.41546630859375]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 4.0, 3.0, 7.0, 9.0, 10.0, 8.0, 12.0, 9.0, 13.0, 22.0, 15.0, 22.0, 28.0, 31.0, 36.0, 35.0, 35.0, 46.0, 47.0, 47.0, 39.0, 34.0, 36.0, 41.0, 31.0, 46.0, 31.0, 36.0, 44.0, 33.0, 34.0, 21.0, 26.0, 23.0, 20.0, 16.0, 13.0, 8.0, 9.0, 3.0, 9.0, 5.0, 5.0, 4.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.9375, -46.01953125, -44.1015625, -42.18359375, -40.265625, -38.34765625, -36.4296875, -34.51171875, -32.59375, -30.67578125, -28.7578125, -26.83984375, -24.921875, -23.00390625, -21.0859375, -19.16796875, -17.25, -15.33203125, -13.4140625, -11.49609375, -9.578125, -7.66015625, -5.7421875, -3.82421875, -1.90625, 0.01171875, 1.9296875, 3.84765625, 5.765625, 7.68359375, 9.6015625, 11.51953125, 13.4375, 15.35546875, 17.2734375, 19.19140625, 21.109375, 23.02734375, 24.9453125, 26.86328125, 28.78125, 30.69921875, 32.6171875, 34.53515625, 36.453125, 38.37109375, 40.2890625, 42.20703125, 44.125, 46.04296875, 47.9609375, 49.87890625, 51.796875, 53.71484375, 55.6328125, 57.55078125, 59.46875, 61.38671875, 63.3046875, 65.22265625, 67.140625, 69.05859375, 70.9765625, 72.89453125, 74.8125]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 7.0, 8.0, 13.0, 13.0, 29.0, 56.0, 94.0, 123.0, 212.0, 402.0, 594.0, 902.0, 1365.0, 2197.0, 3145.0, 4739.0, 7205.0, 11358.0, 18190.0, 29530.0, 50056.0, 89591.0, 179084.0, 290478.0, 157676.0, 81135.0, 45645.0, 27278.0, 17012.0, 10712.0, 6781.0, 4405.0, 2943.0, 2016.0, 1310.0, 825.0, 527.0, 347.0, 208.0, 125.0, 70.0, 52.0, 36.0, 24.0, 17.0, 9.0, 8.0, 4.0, 2.0, 5.0, 1.0, 2.0], "bins": [-3.560546875, -3.46002197265625, -3.3594970703125, -3.25897216796875, -3.158447265625, -3.05792236328125, -2.9573974609375, -2.85687255859375, -2.75634765625, -2.65582275390625, -2.5552978515625, -2.45477294921875, -2.354248046875, -2.25372314453125, -2.1531982421875, -2.05267333984375, -1.9521484375, -1.85162353515625, -1.7510986328125, -1.65057373046875, -1.550048828125, -1.44952392578125, -1.3489990234375, -1.24847412109375, -1.14794921875, -1.04742431640625, -0.9468994140625, -0.84637451171875, -0.745849609375, -0.64532470703125, -0.5447998046875, -0.44427490234375, -0.34375, -0.24322509765625, -0.1427001953125, -0.04217529296875, 0.058349609375, 0.15887451171875, 0.2593994140625, 0.35992431640625, 0.46044921875, 0.56097412109375, 0.6614990234375, 0.76202392578125, 0.862548828125, 0.96307373046875, 1.0635986328125, 1.16412353515625, 1.2646484375, 1.36517333984375, 1.4656982421875, 1.56622314453125, 1.666748046875, 1.76727294921875, 1.8677978515625, 1.96832275390625, 2.06884765625, 2.16937255859375, 2.2698974609375, 2.37042236328125, 2.470947265625, 2.57147216796875, 2.6719970703125, 2.77252197265625, 2.873046875]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 4.0, 2.0, 1.0, 9.0, 4.0, 7.0, 19.0, 11.0, 10.0, 10.0, 17.0, 17.0, 19.0, 21.0, 28.0, 35.0, 39.0, 29.0, 44.0, 44.0, 43.0, 40.0, 31.0, 1065.0, 45.0, 46.0, 37.0, 46.0, 32.0, 31.0, 27.0, 27.0, 29.0, 14.0, 18.0, 16.0, 22.0, 17.0, 17.0, 12.0, 8.0, 5.0, 6.0, 4.0, 6.0, 7.0, 2.0, 5.0, 3.0, 0.0, 3.0, 1.0], "bins": [-40.5, -39.36083984375, -38.2216796875, -37.08251953125, -35.943359375, -34.80419921875, -33.6650390625, -32.52587890625, -31.38671875, -30.24755859375, -29.1083984375, -27.96923828125, -26.830078125, -25.69091796875, -24.5517578125, -23.41259765625, -22.2734375, -21.13427734375, -19.9951171875, -18.85595703125, -17.716796875, -16.57763671875, -15.4384765625, -14.29931640625, -13.16015625, -12.02099609375, -10.8818359375, -9.74267578125, -8.603515625, -7.46435546875, -6.3251953125, -5.18603515625, -4.046875, -2.90771484375, -1.7685546875, -0.62939453125, 0.509765625, 1.64892578125, 2.7880859375, 3.92724609375, 5.06640625, 6.20556640625, 7.3447265625, 8.48388671875, 9.623046875, 10.76220703125, 11.9013671875, 13.04052734375, 14.1796875, 15.31884765625, 16.4580078125, 17.59716796875, 18.736328125, 19.87548828125, 21.0146484375, 22.15380859375, 23.29296875, 24.43212890625, 25.5712890625, 26.71044921875, 27.849609375, 28.98876953125, 30.1279296875, 31.26708984375, 32.40625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 4.0, 6.0, 12.0, 13.0, 15.0, 25.0, 43.0, 60.0, 100.0, 136.0, 217.0, 275.0, 433.0, 688.0, 1037.0, 1619.0, 2471.0, 3897.0, 6157.0, 9953.0, 15628.0, 26216.0, 45630.0, 86588.0, 194113.0, 1360933.0, 162146.0, 75532.0, 40878.0, 23481.0, 14344.0, 8877.0, 5410.0, 3571.0, 2331.0, 1481.0, 963.0, 588.0, 387.0, 291.0, 198.0, 137.0, 91.0, 47.0, 33.0, 20.0, 24.0, 8.0, 10.0, 9.0, 3.0, 5.0, 3.0, 2.0], "bins": [-3.376953125, -3.28155517578125, -3.1861572265625, -3.09075927734375, -2.995361328125, -2.89996337890625, -2.8045654296875, -2.70916748046875, -2.61376953125, -2.51837158203125, -2.4229736328125, -2.32757568359375, -2.232177734375, -2.13677978515625, -2.0413818359375, -1.94598388671875, -1.8505859375, -1.75518798828125, -1.6597900390625, -1.56439208984375, -1.468994140625, -1.37359619140625, -1.2781982421875, -1.18280029296875, -1.08740234375, -0.99200439453125, -0.8966064453125, -0.80120849609375, -0.705810546875, -0.61041259765625, -0.5150146484375, -0.41961669921875, -0.32421875, -0.22882080078125, -0.1334228515625, -0.03802490234375, 0.057373046875, 0.15277099609375, 0.2481689453125, 0.34356689453125, 0.43896484375, 0.53436279296875, 0.6297607421875, 0.72515869140625, 0.820556640625, 0.91595458984375, 1.0113525390625, 1.10675048828125, 1.2021484375, 1.29754638671875, 1.3929443359375, 1.48834228515625, 1.583740234375, 1.67913818359375, 1.7745361328125, 1.86993408203125, 1.96533203125, 2.06072998046875, 2.1561279296875, 2.25152587890625, 2.346923828125, 2.44232177734375, 2.5377197265625, 2.63311767578125, 2.728515625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 0.0, 5.0, 4.0, 5.0, 13.0, 8.0, 19.0, 20.0, 35.0, 34.0, 81.0, 68.0, 102.0, 95.0, 105.0, 92.0, 66.0, 54.0, 61.0, 32.0, 25.0, 17.0, 13.0, 13.0, 6.0, 3.0, 3.0, 8.0, 7.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0484619140625, -0.04712343215942383, -0.045784950256347656, -0.044446468353271484, -0.04310798645019531, -0.04176950454711914, -0.04043102264404297, -0.0390925407409668, -0.037754058837890625, -0.03641557693481445, -0.03507709503173828, -0.03373861312866211, -0.03240013122558594, -0.031061649322509766, -0.029723167419433594, -0.028384685516357422, -0.02704620361328125, -0.025707721710205078, -0.024369239807128906, -0.023030757904052734, -0.021692276000976562, -0.02035379409790039, -0.01901531219482422, -0.017676830291748047, -0.016338348388671875, -0.014999866485595703, -0.013661384582519531, -0.01232290267944336, -0.010984420776367188, -0.009645938873291016, -0.008307456970214844, -0.006968975067138672, -0.0056304931640625, -0.004292011260986328, -0.0029535293579101562, -0.0016150474548339844, -0.0002765655517578125, 0.0010619163513183594, 0.0024003982543945312, 0.003738880157470703, 0.005077362060546875, 0.006415843963623047, 0.007754325866699219, 0.00909280776977539, 0.010431289672851562, 0.011769771575927734, 0.013108253479003906, 0.014446735382080078, 0.01578521728515625, 0.017123699188232422, 0.018462181091308594, 0.019800662994384766, 0.021139144897460938, 0.02247762680053711, 0.02381610870361328, 0.025154590606689453, 0.026493072509765625, 0.027831554412841797, 0.02917003631591797, 0.03050851821899414, 0.03184700012207031, 0.033185482025146484, 0.034523963928222656, 0.03586244583129883, 0.037200927734375]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 6.0, 2.0, 8.0, 7.0, 10.0, 9.0, 10.0, 12.0, 22.0, 29.0, 27.0, 48.0, 86.0, 146.0, 221.0, 495.0, 4037.0, 1016695.0, 24993.0, 905.0, 313.0, 174.0, 95.0, 54.0, 39.0, 28.0, 20.0, 19.0, 12.0, 5.0, 8.0, 4.0, 4.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.66796875, -0.6438522338867188, -0.6197357177734375, -0.5956192016601562, -0.571502685546875, -0.5473861694335938, -0.5232696533203125, -0.49915313720703125, -0.47503662109375, -0.45092010498046875, -0.4268035888671875, -0.40268707275390625, -0.378570556640625, -0.35445404052734375, -0.3303375244140625, -0.30622100830078125, -0.2821044921875, -0.25798797607421875, -0.2338714599609375, -0.20975494384765625, -0.185638427734375, -0.16152191162109375, -0.1374053955078125, -0.11328887939453125, -0.08917236328125, -0.06505584716796875, -0.0409393310546875, -0.01682281494140625, 0.007293701171875, 0.03141021728515625, 0.0555267333984375, 0.07964324951171875, 0.103759765625, 0.12787628173828125, 0.1519927978515625, 0.17610931396484375, 0.200225830078125, 0.22434234619140625, 0.2484588623046875, 0.27257537841796875, 0.29669189453125, 0.32080841064453125, 0.3449249267578125, 0.36904144287109375, 0.393157958984375, 0.41727447509765625, 0.4413909912109375, 0.46550750732421875, 0.4896240234375, 0.5137405395507812, 0.5378570556640625, 0.5619735717773438, 0.586090087890625, 0.6102066040039062, 0.6343231201171875, 0.6584396362304688, 0.68255615234375, 0.7066726684570312, 0.7307891845703125, 0.7549057006835938, 0.779022216796875, 0.8031387329101562, 0.8272552490234375, 0.8513717651367188, 0.87548828125]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 23.0, 959.0, 36.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11683978885412216, -0.10670732706785202, -0.09657486528158188, -0.08644240349531174, -0.0763099417090416, -0.06617747992277145, -0.05604501813650131, -0.04591255635023117, -0.03578009456396103, -0.025647632777690887, -0.015515170991420746, -0.005382709205150604, 0.004749752581119537, 0.014882214367389679, 0.02501467615365982, 0.03514713793992996, 0.045279599726200104, 0.055412061512470245, 0.06554452329874039, 0.07567698508501053, 0.08580944687128067, 0.09594190865755081, 0.10607437044382095, 0.1162068322300911, 0.12633928656578064, 0.13647174835205078, 0.14660421013832092, 0.15673667192459106, 0.1668691337108612, 0.17700159549713135, 0.1871340572834015, 0.19726651906967163, 0.20739901065826416, 0.2175314724445343, 0.22766393423080444, 0.23779639601707458, 0.24792885780334473, 0.25806131958961487, 0.268193781375885, 0.27832624316215515, 0.2884587049484253, 0.29859116673469543, 0.3087236285209656, 0.3188560903072357, 0.32898855209350586, 0.339121013879776, 0.34925347566604614, 0.3593859374523163, 0.3695183992385864, 0.37965086102485657, 0.3897833228111267, 0.39991578459739685, 0.410048246383667, 0.42018070816993713, 0.4303131699562073, 0.4404456317424774, 0.45057809352874756, 0.4607105553150177, 0.47084301710128784, 0.480975478887558, 0.4911079406738281, 0.5012403726577759, 0.5113728642463684, 0.5215053558349609, 0.5316377878189087]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 6.0, 2.0, 10.0, 6.0, 10.0, 17.0, 24.0, 21.0, 24.0, 27.0, 21.0, 44.0, 48.0, 59.0, 50.0, 57.0, 48.0, 61.0, 50.0, 50.0, 57.0, 41.0, 49.0, 42.0, 45.0, 32.0, 28.0, 16.0, 14.0, 13.0, 12.0, 7.0, 9.0, 6.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.02974987030029297, -0.029008792713284492, -0.028267715126276016, -0.02752663753926754, -0.026785559952259064, -0.026044482365250587, -0.02530340477824211, -0.024562327191233635, -0.02382124960422516, -0.023080172017216682, -0.022339094430208206, -0.02159801684319973, -0.020856939256191254, -0.020115861669182777, -0.0193747840821743, -0.018633706495165825, -0.01789262890815735, -0.017151551321148872, -0.016410473734140396, -0.01566939614713192, -0.014928318560123444, -0.014187240973114967, -0.013446163386106491, -0.012705085799098015, -0.011964008212089539, -0.011222930625081062, -0.010481853038072586, -0.00974077545106411, -0.008999697864055634, -0.008258620277047157, -0.007517542690038681, -0.006776465103030205, -0.0060353875160217285, -0.005294309929013252, -0.004553232342004776, -0.0038121547549962997, -0.0030710771679878235, -0.0023299995809793472, -0.001588921993970871, -0.0008478444069623947, -0.00010676681995391846, 0.0006343107670545578, 0.001375388354063034, 0.0021164659410715103, 0.0028575435280799866, 0.003598621115088463, 0.004339698702096939, 0.005080776289105415, 0.005821853876113892, 0.006562931463122368, 0.007304009050130844, 0.00804508663713932, 0.008786164224147797, 0.009527241811156273, 0.01026831939816475, 0.011009396985173225, 0.011750474572181702, 0.012491552159190178, 0.013232629746198654, 0.01397370733320713, 0.014714784920215607, 0.015455862507224083, 0.01619694009423256, 0.016938017681241035, 0.01767909526824951]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [5.0, 1.0, 2.0, 4.0, 3.0, 7.0, 9.0, 10.0, 8.0, 12.0, 9.0, 13.0, 22.0, 15.0, 22.0, 28.0, 31.0, 36.0, 35.0, 35.0, 46.0, 47.0, 47.0, 39.0, 34.0, 36.0, 41.0, 31.0, 46.0, 31.0, 36.0, 44.0, 33.0, 34.0, 21.0, 26.0, 23.0, 20.0, 16.0, 13.0, 8.0, 9.0, 3.0, 9.0, 5.0, 5.0, 4.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.9375, -46.01953125, -44.1015625, -42.18359375, -40.265625, -38.34765625, -36.4296875, -34.51171875, -32.59375, -30.67578125, -28.7578125, -26.83984375, -24.921875, -23.00390625, -21.0859375, -19.16796875, -17.25, -15.33203125, -13.4140625, -11.49609375, -9.578125, -7.66015625, -5.7421875, -3.82421875, -1.90625, 0.01171875, 1.9296875, 3.84765625, 5.765625, 7.68359375, 9.6015625, 11.51953125, 13.4375, 15.35546875, 17.2734375, 19.19140625, 21.109375, 23.02734375, 24.9453125, 26.86328125, 28.78125, 30.69921875, 32.6171875, 34.53515625, 36.453125, 38.37109375, 40.2890625, 42.20703125, 44.125, 46.04296875, 47.9609375, 49.87890625, 51.796875, 53.71484375, 55.6328125, 57.55078125, 59.46875, 61.38671875, 63.3046875, 65.22265625, 67.140625, 69.05859375, 70.9765625, 72.89453125, 74.8125]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 7.0, 7.0, 9.0, 13.0, 20.0, 40.0, 74.0, 75.0, 122.0, 218.0, 289.0, 368.0, 490.0, 680.0, 958.0, 1440.0, 1994.0, 3207.0, 5513.0, 11746.0, 31317.0, 143836.0, 643832.0, 143686.0, 31603.0, 11569.0, 5488.0, 3124.0, 2025.0, 1383.0, 996.0, 735.0, 485.0, 358.0, 285.0, 194.0, 134.0, 95.0, 55.0, 34.0, 22.0, 19.0, 8.0, 5.0, 1.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-107.375, -103.8544921875, -100.333984375, -96.8134765625, -93.29296875, -89.7724609375, -86.251953125, -82.7314453125, -79.2109375, -75.6904296875, -72.169921875, -68.6494140625, -65.12890625, -61.6083984375, -58.087890625, -54.5673828125, -51.046875, -47.5263671875, -44.005859375, -40.4853515625, -36.96484375, -33.4443359375, -29.923828125, -26.4033203125, -22.8828125, -19.3623046875, -15.841796875, -12.3212890625, -8.80078125, -5.2802734375, -1.759765625, 1.7607421875, 5.28125, 8.8017578125, 12.322265625, 15.8427734375, 19.36328125, 22.8837890625, 26.404296875, 29.9248046875, 33.4453125, 36.9658203125, 40.486328125, 44.0068359375, 47.52734375, 51.0478515625, 54.568359375, 58.0888671875, 61.609375, 65.1298828125, 68.650390625, 72.1708984375, 75.69140625, 79.2119140625, 82.732421875, 86.2529296875, 89.7734375, 93.2939453125, 96.814453125, 100.3349609375, 103.85546875, 107.3759765625, 110.896484375, 114.4169921875, 117.9375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 7.0, 9.0, 8.0, 7.0, 8.0, 12.0, 18.0, 27.0, 34.0, 37.0, 35.0, 47.0, 45.0, 63.0, 97.0, 211.0, 1586.0, 292.0, 94.0, 60.0, 51.0, 54.0, 42.0, 42.0, 33.0, 29.0, 16.0, 26.0, 16.0, 15.0, 10.0, 7.0, 4.0, 5.0, 1.0, 0.0, 4.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.4375, -115.3408203125, -110.244140625, -105.1474609375, -100.05078125, -94.9541015625, -89.857421875, -84.7607421875, -79.6640625, -74.5673828125, -69.470703125, -64.3740234375, -59.27734375, -54.1806640625, -49.083984375, -43.9873046875, -38.890625, -33.7939453125, -28.697265625, -23.6005859375, -18.50390625, -13.4072265625, -8.310546875, -3.2138671875, 1.8828125, 6.9794921875, 12.076171875, 17.1728515625, 22.26953125, 27.3662109375, 32.462890625, 37.5595703125, 42.65625, 47.7529296875, 52.849609375, 57.9462890625, 63.04296875, 68.1396484375, 73.236328125, 78.3330078125, 83.4296875, 88.5263671875, 93.623046875, 98.7197265625, 103.81640625, 108.9130859375, 114.009765625, 119.1064453125, 124.203125, 129.2998046875, 134.396484375, 139.4931640625, 144.58984375, 149.6865234375, 154.783203125, 159.8798828125, 164.9765625, 170.0732421875, 175.169921875, 180.2666015625, 185.36328125, 190.4599609375, 195.556640625, 200.6533203125, 205.75]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 5.0, 9.0, 12.0, 13.0, 14.0, 11.0, 25.0, 28.0, 39.0, 54.0, 73.0, 107.0, 245.0, 601.0, 2771.0, 2930809.0, 207960.0, 1912.0, 481.0, 221.0, 106.0, 63.0, 36.0, 25.0, 20.0, 15.0, 8.0, 12.0, 12.0, 7.0, 4.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-702.5, -682.9375, -663.375, -643.8125, -624.25, -604.6875, -585.125, -565.5625, -546.0, -526.4375, -506.875, -487.3125, -467.75, -448.1875, -428.625, -409.0625, -389.5, -369.9375, -350.375, -330.8125, -311.25, -291.6875, -272.125, -252.5625, -233.0, -213.4375, -193.875, -174.3125, -154.75, -135.1875, -115.625, -96.0625, -76.5, -56.9375, -37.375, -17.8125, 1.75, 21.3125, 40.875, 60.4375, 80.0, 99.5625, 119.125, 138.6875, 158.25, 177.8125, 197.375, 216.9375, 236.5, 256.0625, 275.625, 295.1875, 314.75, 334.3125, 353.875, 373.4375, 393.0, 412.5625, 432.125, 451.6875, 471.25, 490.8125, 510.375, 529.9375, 549.5]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 100.0, 780.0, 130.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1219.6351318359375, -1160.1058349609375, -1100.576416015625, -1041.047119140625, -981.5177001953125, -921.9884033203125, -862.4590454101562, -802.9296875, -743.4003295898438, -683.8709716796875, -624.3416137695312, -564.812255859375, -505.2829284667969, -445.7535705566406, -386.2242431640625, -326.69488525390625, -267.16552734375, -207.63616943359375, -148.10682678222656, -88.57748413085938, -29.048126220703125, 30.481231689453125, 90.01055908203125, 149.5399169921875, 209.06927490234375, 268.5986328125, 328.12799072265625, 387.6573181152344, 447.1866760253906, 506.7160339355469, 566.245361328125, 625.7747192382812, 685.30419921875, 744.8335571289062, 804.3629150390625, 863.8922119140625, 923.421630859375, 982.950927734375, 1042.480224609375, 1102.0096435546875, 1161.5390625, 1221.068359375, 1280.5977783203125, 1340.1270751953125, 1399.656494140625, 1459.185791015625, 1518.715087890625, 1578.2445068359375, 1637.7738037109375, 1697.3031005859375, 1756.83251953125, 1816.36181640625, 1875.8912353515625, 1935.4205322265625, 1994.949951171875, 2054.479248046875, 2114.008544921875, 2173.537841796875, 2233.067138671875, 2292.5966796875, 2352.1259765625, 2411.6552734375, 2471.1845703125, 2530.7138671875, 2590.243408203125]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 5.0, 5.0, 4.0, 5.0, 8.0, 11.0, 10.0, 13.0, 20.0, 15.0, 10.0, 15.0, 31.0, 30.0, 30.0, 27.0, 26.0, 40.0, 25.0, 38.0, 39.0, 31.0, 43.0, 31.0, 40.0, 36.0, 35.0, 31.0, 28.0, 27.0, 26.0, 35.0, 28.0, 31.0, 30.0, 15.0, 24.0, 17.0, 11.0, 10.0, 14.0, 16.0, 10.0, 5.0, 7.0, 3.0, 10.0, 0.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-298.99188232421875, -288.9653625488281, -278.9388122558594, -268.91229248046875, -258.8857727050781, -248.85923767089844, -238.83270263671875, -228.80618286132812, -218.7796630859375, -208.7531280517578, -198.7266082763672, -188.7000732421875, -178.67355346679688, -168.6470184326172, -158.6204833984375, -148.59396362304688, -138.5674285888672, -128.5408935546875, -118.51437377929688, -108.48783874511719, -98.46131896972656, -88.43478393554688, -78.40825653076172, -68.38172912597656, -58.355201721191406, -48.32867431640625, -38.302146911621094, -28.275615692138672, -18.249088287353516, -8.22256088256836, 1.8039703369140625, 11.830497741699219, 21.857025146484375, 31.88355255126953, 41.91007995605469, 51.93661117553711, 61.963138580322266, 71.98966979980469, 82.01619720458984, 92.042724609375, 102.06925201416016, 112.09577941894531, 122.12230682373047, 132.14883422851562, 142.1753692626953, 152.20188903808594, 162.22842407226562, 172.25494384765625, 182.28147888183594, 192.30801391601562, 202.33453369140625, 212.36106872558594, 222.38758850097656, 232.41412353515625, 242.44064331054688, 252.46717834472656, 262.49371337890625, 272.5202331542969, 282.5467834472656, 292.57330322265625, 302.5998229980469, 312.6263427734375, 322.65289306640625, 332.6794128417969, 342.7059326171875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 3.0, 5.0, 5.0, 8.0, 11.0, 14.0, 13.0, 12.0, 15.0, 19.0, 17.0, 22.0, 32.0, 35.0, 29.0, 33.0, 39.0, 45.0, 40.0, 45.0, 37.0, 51.0, 36.0, 38.0, 37.0, 35.0, 34.0, 31.0, 32.0, 41.0, 32.0, 34.0, 26.0, 22.0, 13.0, 12.0, 11.0, 11.0, 11.0, 7.0, 6.0, 7.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.78125, -54.80224609375, -52.8232421875, -50.84423828125, -48.865234375, -46.88623046875, -44.9072265625, -42.92822265625, -40.94921875, -38.97021484375, -36.9912109375, -35.01220703125, -33.033203125, -31.05419921875, -29.0751953125, -27.09619140625, -25.1171875, -23.13818359375, -21.1591796875, -19.18017578125, -17.201171875, -15.22216796875, -13.2431640625, -11.26416015625, -9.28515625, -7.30615234375, -5.3271484375, -3.34814453125, -1.369140625, 0.60986328125, 2.5888671875, 4.56787109375, 6.546875, 8.52587890625, 10.5048828125, 12.48388671875, 14.462890625, 16.44189453125, 18.4208984375, 20.39990234375, 22.37890625, 24.35791015625, 26.3369140625, 28.31591796875, 30.294921875, 32.27392578125, 34.2529296875, 36.23193359375, 38.2109375, 40.18994140625, 42.1689453125, 44.14794921875, 46.126953125, 48.10595703125, 50.0849609375, 52.06396484375, 54.04296875, 56.02197265625, 58.0009765625, 59.97998046875, 61.958984375, 63.93798828125, 65.9169921875, 67.89599609375, 69.875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 7.0, 6.0, 8.0, 7.0, 15.0, 26.0, 32.0, 42.0, 57.0, 85.0, 134.0, 182.0, 299.0, 477.0, 706.0, 1153.0, 1800.0, 3000.0, 5388.0, 11085.0, 34905.0, 281830.0, 2552320.0, 1152385.0, 111424.0, 18389.0, 7945.0, 4131.0, 2401.0, 1443.0, 867.0, 575.0, 359.0, 237.0, 188.0, 122.0, 69.0, 50.0, 36.0, 39.0, 23.0, 8.0, 12.0, 5.0, 5.0, 5.0, 4.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-116.25, -112.34765625, -108.4453125, -104.54296875, -100.640625, -96.73828125, -92.8359375, -88.93359375, -85.03125, -81.12890625, -77.2265625, -73.32421875, -69.421875, -65.51953125, -61.6171875, -57.71484375, -53.8125, -49.91015625, -46.0078125, -42.10546875, -38.203125, -34.30078125, -30.3984375, -26.49609375, -22.59375, -18.69140625, -14.7890625, -10.88671875, -6.984375, -3.08203125, 0.8203125, 4.72265625, 8.625, 12.52734375, 16.4296875, 20.33203125, 24.234375, 28.13671875, 32.0390625, 35.94140625, 39.84375, 43.74609375, 47.6484375, 51.55078125, 55.453125, 59.35546875, 63.2578125, 67.16015625, 71.0625, 74.96484375, 78.8671875, 82.76953125, 86.671875, 90.57421875, 94.4765625, 98.37890625, 102.28125, 106.18359375, 110.0859375, 113.98828125, 117.890625, 121.79296875, 125.6953125, 129.59765625, 133.5]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 2.0, 4.0, 3.0, 4.0, 10.0, 8.0, 8.0, 15.0, 23.0, 20.0, 38.0, 48.0, 71.0, 115.0, 184.0, 306.0, 577.0, 967.0, 693.0, 343.0, 220.0, 126.0, 86.0, 52.0, 40.0, 34.0, 20.0, 14.0, 10.0, 7.0, 8.0, 3.0, 7.0, 5.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0], "bins": [-161.5, -157.4296875, -153.359375, -149.2890625, -145.21875, -141.1484375, -137.078125, -133.0078125, -128.9375, -124.8671875, -120.796875, -116.7265625, -112.65625, -108.5859375, -104.515625, -100.4453125, -96.375, -92.3046875, -88.234375, -84.1640625, -80.09375, -76.0234375, -71.953125, -67.8828125, -63.8125, -59.7421875, -55.671875, -51.6015625, -47.53125, -43.4609375, -39.390625, -35.3203125, -31.25, -27.1796875, -23.109375, -19.0390625, -14.96875, -10.8984375, -6.828125, -2.7578125, 1.3125, 5.3828125, 9.453125, 13.5234375, 17.59375, 21.6640625, 25.734375, 29.8046875, 33.875, 37.9453125, 42.015625, 46.0859375, 50.15625, 54.2265625, 58.296875, 62.3671875, 66.4375, 70.5078125, 74.578125, 78.6484375, 82.71875, 86.7890625, 90.859375, 94.9296875, 99.0]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 4.0, 5.0, 7.0, 16.0, 21.0, 45.0, 61.0, 89.0, 187.0, 346.0, 606.0, 1244.0, 2561.0, 6430.0, 19006.0, 84009.0, 2247900.0, 1724948.0, 77682.0, 17998.0, 6133.0, 2492.0, 1177.0, 571.0, 301.0, 171.0, 104.0, 70.0, 35.0, 25.0, 13.0, 11.0, 10.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-204.625, -198.01953125, -191.4140625, -184.80859375, -178.203125, -171.59765625, -164.9921875, -158.38671875, -151.78125, -145.17578125, -138.5703125, -131.96484375, -125.359375, -118.75390625, -112.1484375, -105.54296875, -98.9375, -92.33203125, -85.7265625, -79.12109375, -72.515625, -65.91015625, -59.3046875, -52.69921875, -46.09375, -39.48828125, -32.8828125, -26.27734375, -19.671875, -13.06640625, -6.4609375, 0.14453125, 6.75, 13.35546875, 19.9609375, 26.56640625, 33.171875, 39.77734375, 46.3828125, 52.98828125, 59.59375, 66.19921875, 72.8046875, 79.41015625, 86.015625, 92.62109375, 99.2265625, 105.83203125, 112.4375, 119.04296875, 125.6484375, 132.25390625, 138.859375, 145.46484375, 152.0703125, 158.67578125, 165.28125, 171.88671875, 178.4921875, 185.09765625, 191.703125, 198.30859375, 204.9140625, 211.51953125, 218.125]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 1002.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12957.9404296875, -12645.447265625, -12332.955078125, -12020.462890625, -11707.9697265625, -11395.4765625, -11082.984375, -10770.4921875, -10457.9990234375, -10145.505859375, -9833.013671875, -9520.521484375, -9208.0283203125, -8895.53515625, -8583.04296875, -8270.55078125, -7958.0576171875, -7645.56494140625, -7333.072265625, -7020.57958984375, -6708.0869140625, -6395.59423828125, -6083.1015625, -5770.60888671875, -5458.1162109375, -5145.62353515625, -4833.130859375, -4520.63818359375, -4208.1455078125, -3895.65283203125, -3583.16015625, -3270.66748046875, -2958.17578125, -2645.68310546875, -2333.1904296875, -2020.69775390625, -1708.205078125, -1395.71240234375, -1083.2197265625, -770.72705078125, -458.234375, -145.74169921875, 166.7509765625, 479.24365234375, 791.736328125, 1104.22900390625, 1416.7216796875, 1729.21435546875, 2041.70703125, 2354.19970703125, 2666.6923828125, 2979.18505859375, 3291.677734375, 3604.17041015625, 3916.6630859375, 4229.15576171875, 4541.6484375, 4854.14111328125, 5166.6337890625, 5479.12646484375, 5791.619140625, 6104.11181640625, 6416.6044921875, 6729.09716796875, 7041.58984375]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 6.0, 17.0, 15.0, 26.0, 37.0, 33.0, 39.0, 56.0, 44.0, 60.0, 95.0, 79.0, 83.0, 66.0, 64.0, 84.0, 52.0, 37.0, 35.0, 23.0, 14.0, 14.0, 10.0, 6.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-380.5922546386719, -361.2588195800781, -341.9253845214844, -322.5919494628906, -303.258544921875, -283.92510986328125, -264.5916748046875, -245.25823974609375, -225.9248046875, -206.59136962890625, -187.2579345703125, -167.9245147705078, -148.59107971191406, -129.2576446533203, -109.9242172241211, -90.59078979492188, -71.25735473632812, -51.92392349243164, -32.590492248535156, -13.257061004638672, 6.0763702392578125, 25.409805297851562, 44.74323272705078, 64.07666015625, 83.41009521484375, 102.7435302734375, 122.07695770263672, 141.41038513183594, 160.7438201904297, 180.07725524902344, 199.41067504882812, 218.74411010742188, 238.07757568359375, 257.4110107421875, 276.74444580078125, 296.077880859375, 315.41131591796875, 334.7447509765625, 354.0781555175781, 373.4115905761719, 392.7450256347656, 412.0784606933594, 431.4118957519531, 450.7453308105469, 470.0787353515625, 489.41217041015625, 508.74560546875, 528.0790405273438, 547.4124755859375, 566.7459106445312, 586.079345703125, 605.4127807617188, 624.7462158203125, 644.0796508789062, 663.4130859375, 682.7464599609375, 702.0799560546875, 721.4133911132812, 740.746826171875, 760.0802612304688, 779.4136962890625, 798.7471313476562, 818.08056640625, 837.4139404296875, 856.7473754882812]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 7.0, 6.0, 5.0, 7.0, 8.0, 11.0, 15.0, 12.0, 21.0, 17.0, 12.0, 17.0, 21.0, 18.0, 30.0, 27.0, 36.0, 46.0, 40.0, 41.0, 38.0, 47.0, 53.0, 34.0, 35.0, 42.0, 34.0, 27.0, 31.0, 33.0, 35.0, 36.0, 16.0, 24.0, 25.0, 17.0, 15.0, 13.0, 10.0, 7.0, 9.0, 11.0, 4.0, 2.0, 4.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-52.03125, -50.47216796875, -48.9130859375, -47.35400390625, -45.794921875, -44.23583984375, -42.6767578125, -41.11767578125, -39.55859375, -37.99951171875, -36.4404296875, -34.88134765625, -33.322265625, -31.76318359375, -30.2041015625, -28.64501953125, -27.0859375, -25.52685546875, -23.9677734375, -22.40869140625, -20.849609375, -19.29052734375, -17.7314453125, -16.17236328125, -14.61328125, -13.05419921875, -11.4951171875, -9.93603515625, -8.376953125, -6.81787109375, -5.2587890625, -3.69970703125, -2.140625, -0.58154296875, 0.9775390625, 2.53662109375, 4.095703125, 5.65478515625, 7.2138671875, 8.77294921875, 10.33203125, 11.89111328125, 13.4501953125, 15.00927734375, 16.568359375, 18.12744140625, 19.6865234375, 21.24560546875, 22.8046875, 24.36376953125, 25.9228515625, 27.48193359375, 29.041015625, 30.60009765625, 32.1591796875, 33.71826171875, 35.27734375, 36.83642578125, 38.3955078125, 39.95458984375, 41.513671875, 43.07275390625, 44.6318359375, 46.19091796875, 47.75]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 5.0, 6.0, 8.0, 19.0, 25.0, 30.0, 44.0, 66.0, 90.0, 132.0, 211.0, 293.0, 411.0, 544.0, 866.0, 1242.0, 1762.0, 2463.0, 3702.0, 5270.0, 7696.0, 11196.0, 16680.0, 24847.0, 38563.0, 61524.0, 106650.0, 196743.0, 232005.0, 128270.0, 73957.0, 44960.0, 28686.0, 18925.0, 12767.0, 8736.0, 5889.0, 4032.0, 2805.0, 1981.0, 1393.0, 966.0, 616.0, 468.0, 323.0, 206.0, 149.0, 122.0, 77.0, 47.0, 34.0, 26.0, 13.0, 9.0, 7.0, 5.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0], "bins": [-2.068359375, -1.999786376953125, -1.93121337890625, -1.862640380859375, -1.7940673828125, -1.725494384765625, -1.65692138671875, -1.588348388671875, -1.519775390625, -1.451202392578125, -1.38262939453125, -1.314056396484375, -1.2454833984375, -1.176910400390625, -1.10833740234375, -1.039764404296875, -0.97119140625, -0.902618408203125, -0.83404541015625, -0.765472412109375, -0.6968994140625, -0.628326416015625, -0.55975341796875, -0.491180419921875, -0.422607421875, -0.354034423828125, -0.28546142578125, -0.216888427734375, -0.1483154296875, -0.079742431640625, -0.01116943359375, 0.057403564453125, 0.1259765625, 0.194549560546875, 0.26312255859375, 0.331695556640625, 0.4002685546875, 0.468841552734375, 0.53741455078125, 0.605987548828125, 0.674560546875, 0.743133544921875, 0.81170654296875, 0.880279541015625, 0.9488525390625, 1.017425537109375, 1.08599853515625, 1.154571533203125, 1.22314453125, 1.291717529296875, 1.36029052734375, 1.428863525390625, 1.4974365234375, 1.566009521484375, 1.63458251953125, 1.703155517578125, 1.771728515625, 1.840301513671875, 1.90887451171875, 1.977447509765625, 2.0460205078125, 2.114593505859375, 2.18316650390625, 2.251739501953125, 2.3203125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 6.0, 3.0, 2.0, 7.0, 11.0, 7.0, 5.0, 11.0, 15.0, 12.0, 19.0, 14.0, 22.0, 26.0, 34.0, 23.0, 26.0, 28.0, 43.0, 34.0, 34.0, 52.0, 35.0, 1067.0, 34.0, 42.0, 44.0, 31.0, 44.0, 34.0, 22.0, 36.0, 28.0, 33.0, 21.0, 9.0, 19.0, 16.0, 16.0, 10.0, 10.0, 3.0, 11.0, 5.0, 6.0, 8.0, 2.0, 4.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-29.875, -28.88037109375, -27.8857421875, -26.89111328125, -25.896484375, -24.90185546875, -23.9072265625, -22.91259765625, -21.91796875, -20.92333984375, -19.9287109375, -18.93408203125, -17.939453125, -16.94482421875, -15.9501953125, -14.95556640625, -13.9609375, -12.96630859375, -11.9716796875, -10.97705078125, -9.982421875, -8.98779296875, -7.9931640625, -6.99853515625, -6.00390625, -5.00927734375, -4.0146484375, -3.02001953125, -2.025390625, -1.03076171875, -0.0361328125, 0.95849609375, 1.953125, 2.94775390625, 3.9423828125, 4.93701171875, 5.931640625, 6.92626953125, 7.9208984375, 8.91552734375, 9.91015625, 10.90478515625, 11.8994140625, 12.89404296875, 13.888671875, 14.88330078125, 15.8779296875, 16.87255859375, 17.8671875, 18.86181640625, 19.8564453125, 20.85107421875, 21.845703125, 22.84033203125, 23.8349609375, 24.82958984375, 25.82421875, 26.81884765625, 27.8134765625, 28.80810546875, 29.802734375, 30.79736328125, 31.7919921875, 32.78662109375, 33.78125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 5.0, 5.0, 7.0, 13.0, 9.0, 23.0, 31.0, 36.0, 72.0, 103.0, 131.0, 204.0, 313.0, 431.0, 634.0, 903.0, 1428.0, 2187.0, 3226.0, 4908.0, 7818.0, 12157.0, 19365.0, 32705.0, 57562.0, 111168.0, 409113.0, 1164553.0, 117420.0, 59977.0, 34475.0, 20411.0, 12573.0, 7989.0, 5005.0, 3394.0, 2197.0, 1478.0, 973.0, 644.0, 482.0, 293.0, 217.0, 154.0, 111.0, 72.0, 45.0, 33.0, 29.0, 17.0, 13.0, 17.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0], "bins": [-2.458984375, -2.37713623046875, -2.2952880859375, -2.21343994140625, -2.131591796875, -2.04974365234375, -1.9678955078125, -1.88604736328125, -1.80419921875, -1.72235107421875, -1.6405029296875, -1.55865478515625, -1.476806640625, -1.39495849609375, -1.3131103515625, -1.23126220703125, -1.1494140625, -1.06756591796875, -0.9857177734375, -0.90386962890625, -0.822021484375, -0.74017333984375, -0.6583251953125, -0.57647705078125, -0.49462890625, -0.41278076171875, -0.3309326171875, -0.24908447265625, -0.167236328125, -0.08538818359375, -0.0035400390625, 0.07830810546875, 0.16015625, 0.24200439453125, 0.3238525390625, 0.40570068359375, 0.487548828125, 0.56939697265625, 0.6512451171875, 0.73309326171875, 0.81494140625, 0.89678955078125, 0.9786376953125, 1.06048583984375, 1.142333984375, 1.22418212890625, 1.3060302734375, 1.38787841796875, 1.4697265625, 1.55157470703125, 1.6334228515625, 1.71527099609375, 1.797119140625, 1.87896728515625, 1.9608154296875, 2.04266357421875, 2.12451171875, 2.20635986328125, 2.2882080078125, 2.37005615234375, 2.451904296875, 2.53375244140625, 2.6156005859375, 2.69744873046875, 2.779296875]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 9.0, 0.0, 6.0, 4.0, 9.0, 7.0, 5.0, 5.0, 6.0, 10.0, 11.0, 20.0, 14.0, 23.0, 26.0, 22.0, 28.0, 29.0, 55.0, 52.0, 66.0, 80.0, 71.0, 78.0, 52.0, 35.0, 38.0, 35.0, 30.0, 23.0, 22.0, 16.0, 21.0, 15.0, 9.0, 20.0, 11.0, 8.0, 6.0, 5.0, 6.0, 5.0, 9.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025421142578125, -0.024588823318481445, -0.02375650405883789, -0.022924184799194336, -0.02209186553955078, -0.021259546279907227, -0.020427227020263672, -0.019594907760620117, -0.018762588500976562, -0.017930269241333008, -0.017097949981689453, -0.0162656307220459, -0.015433311462402344, -0.014600992202758789, -0.013768672943115234, -0.01293635368347168, -0.012104034423828125, -0.01127171516418457, -0.010439395904541016, -0.009607076644897461, -0.008774757385253906, -0.007942438125610352, -0.007110118865966797, -0.006277799606323242, -0.0054454803466796875, -0.004613161087036133, -0.003780841827392578, -0.0029485225677490234, -0.0021162033081054688, -0.001283884048461914, -0.0004515647888183594, 0.0003807544708251953, 0.00121307373046875, 0.0020453929901123047, 0.0028777122497558594, 0.003710031509399414, 0.004542350769042969, 0.0053746700286865234, 0.006206989288330078, 0.007039308547973633, 0.007871627807617188, 0.008703947067260742, 0.009536266326904297, 0.010368585586547852, 0.011200904846191406, 0.012033224105834961, 0.012865543365478516, 0.01369786262512207, 0.014530181884765625, 0.01536250114440918, 0.016194820404052734, 0.01702713966369629, 0.017859458923339844, 0.0186917781829834, 0.019524097442626953, 0.020356416702270508, 0.021188735961914062, 0.022021055221557617, 0.022853374481201172, 0.023685693740844727, 0.02451801300048828, 0.025350332260131836, 0.02618265151977539, 0.027014970779418945, 0.0278472900390625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 6.0, 5.0, 5.0, 15.0, 7.0, 14.0, 11.0, 15.0, 15.0, 31.0, 24.0, 30.0, 35.0, 35.0, 43.0, 72.0, 85.0, 114.0, 183.0, 303.0, 697.0, 9274.0, 950254.0, 84015.0, 1932.0, 450.0, 275.0, 127.0, 96.0, 80.0, 58.0, 41.0, 43.0, 29.0, 23.0, 14.0, 14.0, 8.0, 13.0, 12.0, 18.0, 17.0, 5.0, 8.0, 5.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.496826171875, -0.4816551208496094, -0.46648406982421875, -0.4513130187988281, -0.4361419677734375, -0.4209709167480469, -0.40579986572265625, -0.3906288146972656, -0.375457763671875, -0.3602867126464844, -0.34511566162109375, -0.3299446105957031, -0.3147735595703125, -0.2996025085449219, -0.28443145751953125, -0.2692604064941406, -0.25408935546875, -0.23891830444335938, -0.22374725341796875, -0.20857620239257812, -0.1934051513671875, -0.17823410034179688, -0.16306304931640625, -0.14789199829101562, -0.132720947265625, -0.11754989624023438, -0.10237884521484375, -0.08720779418945312, -0.0720367431640625, -0.056865692138671875, -0.04169464111328125, -0.026523590087890625, -0.0113525390625, 0.003818511962890625, 0.01898956298828125, 0.034160614013671875, 0.0493316650390625, 0.06450271606445312, 0.07967376708984375, 0.09484481811523438, 0.110015869140625, 0.12518692016601562, 0.14035797119140625, 0.15552902221679688, 0.1707000732421875, 0.18587112426757812, 0.20104217529296875, 0.21621322631835938, 0.23138427734375, 0.24655532836914062, 0.26172637939453125, 0.2768974304199219, 0.2920684814453125, 0.3072395324707031, 0.32241058349609375, 0.3375816345214844, 0.352752685546875, 0.3679237365722656, 0.38309478759765625, 0.3982658386230469, 0.4134368896484375, 0.4286079406738281, 0.44377899169921875, 0.4589500427246094, 0.47412109375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 21.0, 353.0, 625.0, 19.0], "bins": [-0.3406594395637512, -0.3351737856864929, -0.3296881318092346, -0.3242024779319763, -0.318716824054718, -0.3132311701774597, -0.3077455163002014, -0.3022598624229431, -0.2967742085456848, -0.2912885546684265, -0.2858029007911682, -0.2803172469139099, -0.2748315930366516, -0.2693459391593933, -0.263860285282135, -0.2583746314048767, -0.2528889775276184, -0.2474033236503601, -0.2419176697731018, -0.2364320158958435, -0.2309463620185852, -0.2254607081413269, -0.2199750542640686, -0.2144894003868103, -0.2090037763118744, -0.2035181224346161, -0.1980324685573578, -0.1925468146800995, -0.1870611608028412, -0.18157550692558289, -0.17608985304832458, -0.17060419917106628, -0.16511856019496918, -0.15963290631771088, -0.15414725244045258, -0.14866159856319427, -0.14317594468593597, -0.13769029080867767, -0.13220465183258057, -0.12671899795532227, -0.12123333662748337, -0.11574768275022507, -0.11026202887296677, -0.10477638244628906, -0.09929072856903076, -0.09380507469177246, -0.08831942081451416, -0.08283376693725586, -0.07734811305999756, -0.07186245918273926, -0.06637680530548096, -0.060891155153512955, -0.055405501276254654, -0.04991984739899635, -0.04443419724702835, -0.03894854336977005, -0.03346288949251175, -0.02797723561525345, -0.022491583600640297, -0.017005931586027145, -0.011520277708768845, -0.006034623831510544, -0.0005489736795425415, 0.004936680197715759, 0.01042233593761921]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 5.0, 3.0, 4.0, 3.0, 5.0, 9.0, 18.0, 20.0, 20.0, 29.0, 23.0, 32.0, 35.0, 49.0, 52.0, 53.0, 42.0, 47.0, 55.0, 54.0, 51.0, 54.0, 48.0, 40.0, 38.0, 32.0, 35.0, 23.0, 29.0, 20.0, 17.0, 19.0, 9.0, 6.0, 7.0, 7.0, 9.0, 6.0, 5.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01752525568008423, -0.01690787822008133, -0.01629050076007843, -0.01567312330007553, -0.015055745840072632, -0.014438368380069733, -0.013820990920066833, -0.013203613460063934, -0.012586236000061035, -0.011968858540058136, -0.011351481080055237, -0.010734103620052338, -0.010116726160049438, -0.00949934870004654, -0.00888197124004364, -0.008264593780040741, -0.007647216320037842, -0.007029838860034943, -0.0064124614000320435, -0.005795083940029144, -0.005177706480026245, -0.004560329020023346, -0.003942951560020447, -0.0033255741000175476, -0.0027081966400146484, -0.0020908191800117493, -0.00147344172000885, -0.0008560642600059509, -0.00023868680000305176, 0.0003786906599998474, 0.0009960681200027466, 0.0016134455800056458, 0.002230823040008545, 0.002848200500011444, 0.0034655779600143433, 0.004082955420017242, 0.004700332880020142, 0.005317710340023041, 0.00593508780002594, 0.006552465260028839, 0.007169842720031738, 0.0077872201800346375, 0.008404597640037537, 0.009021975100040436, 0.009639352560043335, 0.010256730020046234, 0.010874107480049133, 0.011491484940052032, 0.012108862400054932, 0.01272623986005783, 0.01334361732006073, 0.01396099478006363, 0.014578372240066528, 0.015195749700069427, 0.015813127160072327, 0.016430504620075226, 0.017047882080078125, 0.017665259540081024, 0.018282637000083923, 0.018900014460086823, 0.01951739192008972, 0.02013476938009262, 0.02075214684009552, 0.02136952430009842, 0.02198690176010132]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 7.0, 6.0, 5.0, 7.0, 8.0, 11.0, 15.0, 12.0, 21.0, 17.0, 12.0, 17.0, 21.0, 18.0, 30.0, 27.0, 36.0, 46.0, 40.0, 41.0, 38.0, 47.0, 53.0, 34.0, 35.0, 42.0, 34.0, 27.0, 31.0, 33.0, 35.0, 36.0, 16.0, 24.0, 25.0, 17.0, 15.0, 13.0, 10.0, 7.0, 9.0, 11.0, 4.0, 2.0, 4.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-52.03125, -50.47216796875, -48.9130859375, -47.35400390625, -45.794921875, -44.23583984375, -42.6767578125, -41.11767578125, -39.55859375, -37.99951171875, -36.4404296875, -34.88134765625, -33.322265625, -31.76318359375, -30.2041015625, -28.64501953125, -27.0859375, -25.52685546875, -23.9677734375, -22.40869140625, -20.849609375, -19.29052734375, -17.7314453125, -16.17236328125, -14.61328125, -13.05419921875, -11.4951171875, -9.93603515625, -8.376953125, -6.81787109375, -5.2587890625, -3.69970703125, -2.140625, -0.58154296875, 0.9775390625, 2.53662109375, 4.095703125, 5.65478515625, 7.2138671875, 8.77294921875, 10.33203125, 11.89111328125, 13.4501953125, 15.00927734375, 16.568359375, 18.12744140625, 19.6865234375, 21.24560546875, 22.8046875, 24.36376953125, 25.9228515625, 27.48193359375, 29.041015625, 30.60009765625, 32.1591796875, 33.71826171875, 35.27734375, 36.83642578125, 38.3955078125, 39.95458984375, 41.513671875, 43.07275390625, 44.6318359375, 46.19091796875, 47.75]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 5.0, 4.0, 5.0, 11.0, 16.0, 23.0, 23.0, 21.0, 52.0, 72.0, 98.0, 138.0, 160.0, 244.0, 354.0, 555.0, 742.0, 1135.0, 1787.0, 2874.0, 4908.0, 10247.0, 26682.0, 109938.0, 648071.0, 176433.0, 36262.0, 12728.0, 5837.0, 3244.0, 1932.0, 1233.0, 845.0, 558.0, 371.0, 278.0, 172.0, 133.0, 99.0, 65.0, 57.0, 43.0, 39.0, 13.0, 15.0, 15.0, 6.0, 2.0, 8.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-115.4375, -111.787109375, -108.13671875, -104.486328125, -100.8359375, -97.185546875, -93.53515625, -89.884765625, -86.234375, -82.583984375, -78.93359375, -75.283203125, -71.6328125, -67.982421875, -64.33203125, -60.681640625, -57.03125, -53.380859375, -49.73046875, -46.080078125, -42.4296875, -38.779296875, -35.12890625, -31.478515625, -27.828125, -24.177734375, -20.52734375, -16.876953125, -13.2265625, -9.576171875, -5.92578125, -2.275390625, 1.375, 5.025390625, 8.67578125, 12.326171875, 15.9765625, 19.626953125, 23.27734375, 26.927734375, 30.578125, 34.228515625, 37.87890625, 41.529296875, 45.1796875, 48.830078125, 52.48046875, 56.130859375, 59.78125, 63.431640625, 67.08203125, 70.732421875, 74.3828125, 78.033203125, 81.68359375, 85.333984375, 88.984375, 92.634765625, 96.28515625, 99.935546875, 103.5859375, 107.236328125, 110.88671875, 114.537109375, 118.1875]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 4.0, 2.0, 5.0, 1.0, 7.0, 9.0, 9.0, 10.0, 8.0, 13.0, 21.0, 20.0, 16.0, 19.0, 32.0, 28.0, 34.0, 47.0, 47.0, 59.0, 88.0, 180.0, 1634.0, 222.0, 108.0, 64.0, 49.0, 53.0, 40.0, 34.0, 35.0, 36.0, 19.0, 17.0, 19.0, 23.0, 13.0, 6.0, 5.0, 4.0, 0.0, 4.0, 4.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-114.3125, -110.802734375, -107.29296875, -103.783203125, -100.2734375, -96.763671875, -93.25390625, -89.744140625, -86.234375, -82.724609375, -79.21484375, -75.705078125, -72.1953125, -68.685546875, -65.17578125, -61.666015625, -58.15625, -54.646484375, -51.13671875, -47.626953125, -44.1171875, -40.607421875, -37.09765625, -33.587890625, -30.078125, -26.568359375, -23.05859375, -19.548828125, -16.0390625, -12.529296875, -9.01953125, -5.509765625, -2.0, 1.509765625, 5.01953125, 8.529296875, 12.0390625, 15.548828125, 19.05859375, 22.568359375, 26.078125, 29.587890625, 33.09765625, 36.607421875, 40.1171875, 43.626953125, 47.13671875, 50.646484375, 54.15625, 57.666015625, 61.17578125, 64.685546875, 68.1953125, 71.705078125, 75.21484375, 78.724609375, 82.234375, 85.744140625, 89.25390625, 92.763671875, 96.2734375, 99.783203125, 103.29296875, 106.802734375, 110.3125]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 3.0, 4.0, 7.0, 5.0, 12.0, 20.0, 28.0, 41.0, 67.0, 88.0, 140.0, 256.0, 430.0, 1385.0, 21251.0, 3110447.0, 9551.0, 981.0, 366.0, 209.0, 132.0, 78.0, 61.0, 44.0, 28.0, 20.0, 9.0, 9.0, 11.0, 6.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-465.25, -451.32421875, -437.3984375, -423.47265625, -409.546875, -395.62109375, -381.6953125, -367.76953125, -353.84375, -339.91796875, -325.9921875, -312.06640625, -298.140625, -284.21484375, -270.2890625, -256.36328125, -242.4375, -228.51171875, -214.5859375, -200.66015625, -186.734375, -172.80859375, -158.8828125, -144.95703125, -131.03125, -117.10546875, -103.1796875, -89.25390625, -75.328125, -61.40234375, -47.4765625, -33.55078125, -19.625, -5.69921875, 8.2265625, 22.15234375, 36.078125, 50.00390625, 63.9296875, 77.85546875, 91.78125, 105.70703125, 119.6328125, 133.55859375, 147.484375, 161.41015625, 175.3359375, 189.26171875, 203.1875, 217.11328125, 231.0390625, 244.96484375, 258.890625, 272.81640625, 286.7421875, 300.66796875, 314.59375, 328.51953125, 342.4453125, 356.37109375, 370.296875, 384.22265625, 398.1484375, 412.07421875, 426.0]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 10.0, 23.0, 37.0, 47.0, 102.0, 179.0, 202.0, 135.0, 107.0, 76.0, 41.0, 21.0, 12.0, 8.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-538.4881591796875, -527.806396484375, -517.1246948242188, -506.4429626464844, -495.76123046875, -485.0794982910156, -474.39776611328125, -463.7160339355469, -453.0343017578125, -442.3525695800781, -431.67083740234375, -420.9891052246094, -410.307373046875, -399.6256408691406, -388.94390869140625, -378.2621765136719, -367.5804443359375, -356.8987121582031, -346.21697998046875, -335.5352478027344, -324.853515625, -314.1717834472656, -303.49005126953125, -292.8083190917969, -282.1265869140625, -271.4448547363281, -260.76312255859375, -250.08139038085938, -239.399658203125, -228.71792602539062, -218.03619384765625, -207.35446166992188, -196.67274475097656, -185.9910125732422, -175.3092803955078, -164.62754821777344, -153.94581604003906, -143.2640838623047, -132.5823516845703, -121.90061950683594, -111.21888732910156, -100.53715515136719, -89.85542297363281, -79.17369079589844, -68.49195861816406, -57.81022644042969, -47.12849426269531, -36.44676208496094, -25.765029907226562, -15.083297729492188, -4.4015655517578125, 6.2801666259765625, 16.961898803710938, 27.643630981445312, 38.32536315917969, 49.00709533691406, 59.68882751464844, 70.37055969238281, 81.05229187011719, 91.73402404785156, 102.41575622558594, 113.09748840332031, 123.77922058105469, 134.46095275878906, 145.14268493652344]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 6.0, 1.0, 5.0, 10.0, 9.0, 11.0, 3.0, 10.0, 15.0, 14.0, 19.0, 19.0, 17.0, 28.0, 22.0, 32.0, 33.0, 34.0, 40.0, 43.0, 41.0, 46.0, 55.0, 53.0, 57.0, 40.0, 46.0, 36.0, 27.0, 37.0, 26.0, 30.0, 29.0, 24.0, 13.0, 15.0, 13.0, 11.0, 1.0, 8.0, 5.0, 3.0, 3.0, 5.0, 7.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-306.19561767578125, -295.8919677734375, -285.58831787109375, -275.28466796875, -264.98101806640625, -254.67735290527344, -244.3737030029297, -234.07003784179688, -223.76638793945312, -213.46273803710938, -203.15908813476562, -192.85543823242188, -182.55177307128906, -172.2481231689453, -161.94447326660156, -151.64080810546875, -141.33717346191406, -131.0335235595703, -120.72986602783203, -110.42621612548828, -100.12255859375, -89.81890869140625, -79.5152587890625, -69.21160125732422, -58.90795135498047, -48.60429763793945, -38.30064392089844, -27.996994018554688, -17.693340301513672, -7.389686584472656, 2.9139633178710938, 13.217620849609375, 23.521270751953125, 33.82492446899414, 44.128578186035156, 54.432228088378906, 64.73588562011719, 75.03953552246094, 85.34318542480469, 95.64684295654297, 105.95049285888672, 116.25414276123047, 126.55780029296875, 136.8614501953125, 147.16510009765625, 157.46875, 167.77239990234375, 178.07606506347656, 188.3797149658203, 198.68336486816406, 208.9870147705078, 219.29067993164062, 229.59432983398438, 239.89797973632812, 250.20162963867188, 260.5052795410156, 270.8089294433594, 281.1125793457031, 291.4162292480469, 301.7198791503906, 312.0235290527344, 322.32720947265625, 332.630859375, 342.93450927734375, 353.2381591796875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 8.0, 9.0, 9.0, 6.0, 14.0, 15.0, 18.0, 19.0, 24.0, 25.0, 26.0, 22.0, 31.0, 28.0, 39.0, 40.0, 43.0, 40.0, 54.0, 51.0, 45.0, 51.0, 45.0, 33.0, 40.0, 33.0, 28.0, 38.0, 21.0, 26.0, 21.0, 14.0, 27.0, 9.0, 14.0, 10.0, 6.0, 7.0, 3.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-58.3125, -56.513671875, -54.71484375, -52.916015625, -51.1171875, -49.318359375, -47.51953125, -45.720703125, -43.921875, -42.123046875, -40.32421875, -38.525390625, -36.7265625, -34.927734375, -33.12890625, -31.330078125, -29.53125, -27.732421875, -25.93359375, -24.134765625, -22.3359375, -20.537109375, -18.73828125, -16.939453125, -15.140625, -13.341796875, -11.54296875, -9.744140625, -7.9453125, -6.146484375, -4.34765625, -2.548828125, -0.75, 1.048828125, 2.84765625, 4.646484375, 6.4453125, 8.244140625, 10.04296875, 11.841796875, 13.640625, 15.439453125, 17.23828125, 19.037109375, 20.8359375, 22.634765625, 24.43359375, 26.232421875, 28.03125, 29.830078125, 31.62890625, 33.427734375, 35.2265625, 37.025390625, 38.82421875, 40.623046875, 42.421875, 44.220703125, 46.01953125, 47.818359375, 49.6171875, 51.416015625, 53.21484375, 55.013671875, 56.8125]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 3.0, 5.0, 12.0, 11.0, 12.0, 18.0, 26.0, 41.0, 41.0, 56.0, 86.0, 116.0, 178.0, 250.0, 347.0, 476.0, 682.0, 1079.0, 1929.0, 3853.0, 8318.0, 23855.0, 253047.0, 3044260.0, 784909.0, 48079.0, 11341.0, 4739.0, 2347.0, 1438.0, 868.0, 560.0, 377.0, 264.0, 172.0, 133.0, 100.0, 74.0, 54.0, 34.0, 25.0, 14.0, 22.0, 7.0, 5.0, 9.0, 6.0, 4.0, 1.0, 0.0, 2.0, 4.0, 0.0, 3.0, 2.0], "bins": [-148.875, -144.3203125, -139.765625, -135.2109375, -130.65625, -126.1015625, -121.546875, -116.9921875, -112.4375, -107.8828125, -103.328125, -98.7734375, -94.21875, -89.6640625, -85.109375, -80.5546875, -76.0, -71.4453125, -66.890625, -62.3359375, -57.78125, -53.2265625, -48.671875, -44.1171875, -39.5625, -35.0078125, -30.453125, -25.8984375, -21.34375, -16.7890625, -12.234375, -7.6796875, -3.125, 1.4296875, 5.984375, 10.5390625, 15.09375, 19.6484375, 24.203125, 28.7578125, 33.3125, 37.8671875, 42.421875, 46.9765625, 51.53125, 56.0859375, 60.640625, 65.1953125, 69.75, 74.3046875, 78.859375, 83.4140625, 87.96875, 92.5234375, 97.078125, 101.6328125, 106.1875, 110.7421875, 115.296875, 119.8515625, 124.40625, 128.9609375, 133.515625, 138.0703125, 142.625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 5.0, 3.0, 5.0, 6.0, 10.0, 16.0, 23.0, 50.0, 74.0, 128.0, 216.0, 471.0, 1121.0, 1025.0, 454.0, 206.0, 106.0, 68.0, 34.0, 24.0, 14.0, 8.0, 4.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-101.375, -96.03125, -90.6875, -85.34375, -80.0, -74.65625, -69.3125, -63.96875, -58.625, -53.28125, -47.9375, -42.59375, -37.25, -31.90625, -26.5625, -21.21875, -15.875, -10.53125, -5.1875, 0.15625, 5.5, 10.84375, 16.1875, 21.53125, 26.875, 32.21875, 37.5625, 42.90625, 48.25, 53.59375, 58.9375, 64.28125, 69.625, 74.96875, 80.3125, 85.65625, 91.0, 96.34375, 101.6875, 107.03125, 112.375, 117.71875, 123.0625, 128.40625, 133.75, 139.09375, 144.4375, 149.78125, 155.125, 160.46875, 165.8125, 171.15625, 176.5, 181.84375, 187.1875, 192.53125, 197.875, 203.21875, 208.5625, 213.90625, 219.25, 224.59375, 229.9375, 235.28125, 240.625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 5.0, 10.0, 13.0, 29.0, 44.0, 96.0, 172.0, 471.0, 1515.0, 10387.0, 437298.0, 3716701.0, 23743.0, 2604.0, 653.0, 281.0, 129.0, 68.0, 34.0, 14.0, 10.0, 6.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-558.0, -545.0703125, -532.140625, -519.2109375, -506.28125, -493.3515625, -480.421875, -467.4921875, -454.5625, -441.6328125, -428.703125, -415.7734375, -402.84375, -389.9140625, -376.984375, -364.0546875, -351.125, -338.1953125, -325.265625, -312.3359375, -299.40625, -286.4765625, -273.546875, -260.6171875, -247.6875, -234.7578125, -221.828125, -208.8984375, -195.96875, -183.0390625, -170.109375, -157.1796875, -144.25, -131.3203125, -118.390625, -105.4609375, -92.53125, -79.6015625, -66.671875, -53.7421875, -40.8125, -27.8828125, -14.953125, -2.0234375, 10.90625, 23.8359375, 36.765625, 49.6953125, 62.625, 75.5546875, 88.484375, 101.4140625, 114.34375, 127.2734375, 140.203125, 153.1328125, 166.0625, 178.9921875, 191.921875, 204.8515625, 217.78125, 230.7109375, 243.640625, 256.5703125, 269.5]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 17.0, 110.0, 550.0, 276.0, 51.0, 9.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1232.1685791015625, -1166.4124755859375, -1100.6563720703125, -1034.9002685546875, -969.1441650390625, -903.3880615234375, -837.6320190429688, -771.8759155273438, -706.1198120117188, -640.3637084960938, -574.6076049804688, -508.8515319824219, -443.0954284667969, -377.3393249511719, -311.583251953125, -245.8271484375, -180.071044921875, -114.31494903564453, -48.55885314941406, 17.197235107421875, 82.95333862304688, 148.70944213867188, 214.46551513671875, 280.22161865234375, 345.97772216796875, 411.73382568359375, 477.48992919921875, 543.2459716796875, 609.0020751953125, 674.7581787109375, 740.5142822265625, 806.2703857421875, 872.0263671875, 937.782470703125, 1003.53857421875, 1069.294677734375, 1135.05078125, 1200.806884765625, 1266.56298828125, 1332.319091796875, 1398.0751953125, 1463.831298828125, 1529.58740234375, 1595.343505859375, 1661.099609375, 1726.855712890625, 1792.61181640625, 1858.367919921875, 1924.1239013671875, 1989.8800048828125, 2055.635986328125, 2121.39208984375, 2187.148193359375, 2252.904296875, 2318.660400390625, 2384.41650390625, 2450.172607421875, 2515.9287109375, 2581.684814453125, 2647.44091796875, 2713.197021484375, 2778.953125, 2844.709228515625, 2910.46533203125, 2976.221435546875]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 5.0, 11.0, 8.0, 10.0, 15.0, 17.0, 15.0, 18.0, 19.0, 26.0, 28.0, 37.0, 39.0, 47.0, 27.0, 46.0, 46.0, 40.0, 33.0, 59.0, 39.0, 39.0, 31.0, 44.0, 40.0, 42.0, 28.0, 26.0, 25.0, 26.0, 20.0, 14.0, 14.0, 17.0, 5.0, 11.0, 7.0, 8.0, 9.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-365.26678466796875, -354.63714599609375, -344.0074768066406, -333.3778381347656, -322.7481994628906, -312.1185302734375, -301.4888916015625, -290.8592529296875, -280.2295837402344, -269.5999450683594, -258.97027587890625, -248.34063720703125, -237.7109832763672, -227.08132934570312, -216.45169067382812, -205.82203674316406, -195.19239807128906, -184.562744140625, -173.93310546875, -163.30345153808594, -152.67379760742188, -142.04415893554688, -131.4145050048828, -120.78485107421875, -110.15520477294922, -99.52555847167969, -88.89590454101562, -78.2662582397461, -67.63661193847656, -57.0069580078125, -46.37731170654297, -35.747657775878906, -25.118011474609375, -14.488362312316895, -3.858713150024414, 6.77093505859375, 17.400585174560547, 28.030235290527344, 38.659881591796875, 49.28953552246094, 59.91918182373047, 70.548828125, 81.17848205566406, 91.8081283569336, 102.43777465820312, 113.06742858886719, 123.69707489013672, 134.32672119140625, 144.9563751220703, 155.58602905273438, 166.21566772460938, 176.84532165527344, 187.4749755859375, 198.1046142578125, 208.73426818847656, 219.36392211914062, 229.99356079101562, 240.6232147216797, 251.2528533935547, 261.88250732421875, 272.51214599609375, 283.1418151855469, 293.7714538574219, 304.401123046875, 315.03076171875]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 4.0, 2.0, 2.0, 5.0, 9.0, 11.0, 6.0, 16.0, 12.0, 21.0, 18.0, 30.0, 27.0, 30.0, 29.0, 28.0, 40.0, 36.0, 47.0, 52.0, 59.0, 42.0, 42.0, 57.0, 40.0, 44.0, 34.0, 30.0, 32.0, 36.0, 20.0, 19.0, 22.0, 25.0, 10.0, 14.0, 9.0, 9.0, 9.0, 9.0, 8.0, 4.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-59.0, -57.33203125, -55.6640625, -53.99609375, -52.328125, -50.66015625, -48.9921875, -47.32421875, -45.65625, -43.98828125, -42.3203125, -40.65234375, -38.984375, -37.31640625, -35.6484375, -33.98046875, -32.3125, -30.64453125, -28.9765625, -27.30859375, -25.640625, -23.97265625, -22.3046875, -20.63671875, -18.96875, -17.30078125, -15.6328125, -13.96484375, -12.296875, -10.62890625, -8.9609375, -7.29296875, -5.625, -3.95703125, -2.2890625, -0.62109375, 1.046875, 2.71484375, 4.3828125, 6.05078125, 7.71875, 9.38671875, 11.0546875, 12.72265625, 14.390625, 16.05859375, 17.7265625, 19.39453125, 21.0625, 22.73046875, 24.3984375, 26.06640625, 27.734375, 29.40234375, 31.0703125, 32.73828125, 34.40625, 36.07421875, 37.7421875, 39.41015625, 41.078125, 42.74609375, 44.4140625, 46.08203125, 47.75]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 13.0, 12.0, 24.0, 23.0, 36.0, 68.0, 112.0, 146.0, 271.0, 369.0, 578.0, 949.0, 1406.0, 2128.0, 3338.0, 5235.0, 8336.0, 13376.0, 22186.0, 36850.0, 64465.0, 128127.0, 291493.0, 231528.0, 101641.0, 54013.0, 31252.0, 18745.0, 11534.0, 7453.0, 4646.0, 2913.0, 1875.0, 1250.0, 752.0, 498.0, 346.0, 186.0, 128.0, 84.0, 63.0, 44.0, 33.0, 8.0, 8.0, 7.0, 4.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-2.830078125, -2.7464599609375, -2.662841796875, -2.5792236328125, -2.49560546875, -2.4119873046875, -2.328369140625, -2.2447509765625, -2.1611328125, -2.0775146484375, -1.993896484375, -1.9102783203125, -1.82666015625, -1.7430419921875, -1.659423828125, -1.5758056640625, -1.4921875, -1.4085693359375, -1.324951171875, -1.2413330078125, -1.15771484375, -1.0740966796875, -0.990478515625, -0.9068603515625, -0.8232421875, -0.7396240234375, -0.656005859375, -0.5723876953125, -0.48876953125, -0.4051513671875, -0.321533203125, -0.2379150390625, -0.154296875, -0.0706787109375, 0.012939453125, 0.0965576171875, 0.18017578125, 0.2637939453125, 0.347412109375, 0.4310302734375, 0.5146484375, 0.5982666015625, 0.681884765625, 0.7655029296875, 0.84912109375, 0.9327392578125, 1.016357421875, 1.0999755859375, 1.18359375, 1.2672119140625, 1.350830078125, 1.4344482421875, 1.51806640625, 1.6016845703125, 1.685302734375, 1.7689208984375, 1.8525390625, 1.9361572265625, 2.019775390625, 2.1033935546875, 2.18701171875, 2.2706298828125, 2.354248046875, 2.4378662109375, 2.521484375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 3.0, 3.0, 7.0, 9.0, 6.0, 10.0, 14.0, 17.0, 28.0, 20.0, 23.0, 23.0, 36.0, 48.0, 27.0, 51.0, 36.0, 40.0, 32.0, 46.0, 1073.0, 45.0, 42.0, 47.0, 33.0, 30.0, 40.0, 26.0, 32.0, 29.0, 23.0, 23.0, 23.0, 11.0, 14.0, 4.0, 12.0, 7.0, 7.0, 7.0, 3.0, 6.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-28.984375, -28.005615234375, -27.02685546875, -26.048095703125, -25.0693359375, -24.090576171875, -23.11181640625, -22.133056640625, -21.154296875, -20.175537109375, -19.19677734375, -18.218017578125, -17.2392578125, -16.260498046875, -15.28173828125, -14.302978515625, -13.32421875, -12.345458984375, -11.36669921875, -10.387939453125, -9.4091796875, -8.430419921875, -7.45166015625, -6.472900390625, -5.494140625, -4.515380859375, -3.53662109375, -2.557861328125, -1.5791015625, -0.600341796875, 0.37841796875, 1.357177734375, 2.3359375, 3.314697265625, 4.29345703125, 5.272216796875, 6.2509765625, 7.229736328125, 8.20849609375, 9.187255859375, 10.166015625, 11.144775390625, 12.12353515625, 13.102294921875, 14.0810546875, 15.059814453125, 16.03857421875, 17.017333984375, 17.99609375, 18.974853515625, 19.95361328125, 20.932373046875, 21.9111328125, 22.889892578125, 23.86865234375, 24.847412109375, 25.826171875, 26.804931640625, 27.78369140625, 28.762451171875, 29.7412109375, 30.719970703125, 31.69873046875, 32.677490234375, 33.65625]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 7.0, 6.0, 8.0, 12.0, 15.0, 34.0, 38.0, 61.0, 97.0, 136.0, 220.0, 320.0, 495.0, 729.0, 1080.0, 1748.0, 2697.0, 4377.0, 7022.0, 11741.0, 20179.0, 35614.0, 66501.0, 137076.0, 1366018.0, 225977.0, 97327.0, 49852.0, 27493.0, 15809.0, 9123.0, 5646.0, 3476.0, 2186.0, 1327.0, 857.0, 597.0, 434.0, 262.0, 154.0, 111.0, 82.0, 53.0, 43.0, 28.0, 26.0, 23.0, 7.0, 4.0, 3.0, 6.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-2.498046875, -2.4168701171875, -2.335693359375, -2.2545166015625, -2.17333984375, -2.0921630859375, -2.010986328125, -1.9298095703125, -1.8486328125, -1.7674560546875, -1.686279296875, -1.6051025390625, -1.52392578125, -1.4427490234375, -1.361572265625, -1.2803955078125, -1.19921875, -1.1180419921875, -1.036865234375, -0.9556884765625, -0.87451171875, -0.7933349609375, -0.712158203125, -0.6309814453125, -0.5498046875, -0.4686279296875, -0.387451171875, -0.3062744140625, -0.22509765625, -0.1439208984375, -0.062744140625, 0.0184326171875, 0.099609375, 0.1807861328125, 0.261962890625, 0.3431396484375, 0.42431640625, 0.5054931640625, 0.586669921875, 0.6678466796875, 0.7490234375, 0.8302001953125, 0.911376953125, 0.9925537109375, 1.07373046875, 1.1549072265625, 1.236083984375, 1.3172607421875, 1.3984375, 1.4796142578125, 1.560791015625, 1.6419677734375, 1.72314453125, 1.8043212890625, 1.885498046875, 1.9666748046875, 2.0478515625, 2.1290283203125, 2.210205078125, 2.2913818359375, 2.37255859375, 2.4537353515625, 2.534912109375, 2.6160888671875, 2.697265625]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 4.0, 3.0, 6.0, 6.0, 0.0, 7.0, 4.0, 6.0, 2.0, 10.0, 10.0, 12.0, 16.0, 22.0, 28.0, 26.0, 49.0, 57.0, 87.0, 85.0, 110.0, 86.0, 78.0, 70.0, 40.0, 34.0, 30.0, 18.0, 13.0, 11.0, 12.0, 7.0, 6.0, 8.0, 10.0, 7.0, 5.0, 3.0, 4.0, 5.0, 3.0, 3.0, 3.0, 5.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0217437744140625, -0.021120548248291016, -0.02049732208251953, -0.019874095916748047, -0.019250869750976562, -0.018627643585205078, -0.018004417419433594, -0.01738119125366211, -0.016757965087890625, -0.01613473892211914, -0.015511512756347656, -0.014888286590576172, -0.014265060424804688, -0.013641834259033203, -0.013018608093261719, -0.012395381927490234, -0.01177215576171875, -0.011148929595947266, -0.010525703430175781, -0.009902477264404297, -0.009279251098632812, -0.008656024932861328, -0.008032798767089844, -0.007409572601318359, -0.006786346435546875, -0.006163120269775391, -0.005539894104003906, -0.004916667938232422, -0.0042934417724609375, -0.003670215606689453, -0.0030469894409179688, -0.0024237632751464844, -0.001800537109375, -0.0011773109436035156, -0.0005540847778320312, 6.914138793945312e-05, 0.0006923675537109375, 0.0013155937194824219, 0.0019388198852539062, 0.0025620460510253906, 0.003185272216796875, 0.0038084983825683594, 0.004431724548339844, 0.005054950714111328, 0.0056781768798828125, 0.006301403045654297, 0.006924629211425781, 0.007547855377197266, 0.00817108154296875, 0.008794307708740234, 0.009417533874511719, 0.010040760040283203, 0.010663986206054688, 0.011287212371826172, 0.011910438537597656, 0.01253366470336914, 0.013156890869140625, 0.01378011703491211, 0.014403343200683594, 0.015026569366455078, 0.015649795532226562, 0.016273021697998047, 0.01689624786376953, 0.017519474029541016, 0.0181427001953125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 4.0, 4.0, 8.0, 9.0, 6.0, 8.0, 11.0, 11.0, 15.0, 23.0, 20.0, 32.0, 36.0, 40.0, 61.0, 83.0, 146.0, 208.0, 452.0, 2119.0, 117930.0, 919986.0, 5765.0, 651.0, 313.0, 179.0, 113.0, 69.0, 60.0, 41.0, 38.0, 17.0, 15.0, 15.0, 6.0, 12.0, 13.0, 10.0, 8.0, 7.0, 3.0, 6.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.323974609375, -0.3132362365722656, -0.30249786376953125, -0.2917594909667969, -0.2810211181640625, -0.2702827453613281, -0.25954437255859375, -0.24880599975585938, -0.238067626953125, -0.22732925415039062, -0.21659088134765625, -0.20585250854492188, -0.1951141357421875, -0.18437576293945312, -0.17363739013671875, -0.16289901733398438, -0.15216064453125, -0.14142227172851562, -0.13068389892578125, -0.11994552612304688, -0.1092071533203125, -0.09846878051757812, -0.08773040771484375, -0.07699203491210938, -0.066253662109375, -0.055515289306640625, -0.04477691650390625, -0.034038543701171875, -0.0233001708984375, -0.012561798095703125, -0.00182342529296875, 0.008914947509765625, 0.0196533203125, 0.030391693115234375, 0.04113006591796875, 0.051868438720703125, 0.0626068115234375, 0.07334518432617188, 0.08408355712890625, 0.09482192993164062, 0.105560302734375, 0.11629867553710938, 0.12703704833984375, 0.13777542114257812, 0.1485137939453125, 0.15925216674804688, 0.16999053955078125, 0.18072891235351562, 0.19146728515625, 0.20220565795898438, 0.21294403076171875, 0.22368240356445312, 0.2344207763671875, 0.24515914916992188, 0.25589752197265625, 0.2666358947753906, 0.277374267578125, 0.2881126403808594, 0.29885101318359375, 0.3095893859863281, 0.3203277587890625, 0.3310661315917969, 0.34180450439453125, 0.3525428771972656, 0.36328125]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 12.0, 4.0, 20.0, 15.0, 30.0, 60.0, 94.0, 139.0, 177.0, 164.0, 106.0, 60.0, 53.0, 25.0, 18.0, 6.0, 8.0, 7.0, 2.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.014822851866483688, -0.014396080747246742, -0.01396931055933237, -0.013542540371418, -0.013115769252181053, -0.012688998132944107, -0.012262227945029736, -0.011835457757115364, -0.011408686637878418, -0.010981915518641472, -0.0105551453307271, -0.010128375142812729, -0.009701604023575783, -0.009274832904338837, -0.008848062716424465, -0.008421292528510094, -0.007994521409273148, -0.007567750755697489, -0.00714098010212183, -0.006714209448546171, -0.006287438794970512, -0.005860668141394854, -0.005433897487819195, -0.005007126834243536, -0.004580356180667877, -0.004153585527092218, -0.0037268148735165596, -0.003300044219940901, -0.002873273566365242, -0.002446502912789583, -0.0020197322592139244, -0.0015929616056382656, -0.0011661909520626068, -0.000739420298486948, -0.0003126496449112892, 0.00011412100866436958, 0.0005408916622400284, 0.0009676623158156872, 0.001394432969391346, 0.0018212036229670048, 0.0022479742765426636, 0.0026747449301183224, 0.003101515583693981, 0.00352828623726964, 0.003955056890845299, 0.0043818275444209576, 0.004808598197996616, 0.005235368851572275, 0.005662139505147934, 0.006088910158723593, 0.0065156808122992516, 0.00694245146587491, 0.007369222119450569, 0.007795992773026228, 0.008222763426601887, 0.008649533614516258, 0.009076304733753204, 0.00950307585299015, 0.009929846040904522, 0.010356616228818893, 0.01078338734805584, 0.011210158467292786, 0.011636928655207157, 0.012063698843121529, 0.012490469962358475]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 4.0, 2.0, 1.0, 6.0, 9.0, 9.0, 9.0, 9.0, 14.0, 19.0, 24.0, 16.0, 24.0, 28.0, 27.0, 33.0, 42.0, 43.0, 31.0, 41.0, 51.0, 45.0, 50.0, 44.0, 47.0, 38.0, 38.0, 37.0, 36.0, 34.0, 31.0, 25.0, 20.0, 22.0, 17.0, 21.0, 13.0, 9.0, 14.0, 9.0, 5.0, 7.0, 5.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007675349712371826, -0.007352359592914581, -0.007029369473457336, -0.0067063793540000916, -0.006383389234542847, -0.006060399115085602, -0.005737408995628357, -0.005414418876171112, -0.005091428756713867, -0.004768438637256622, -0.0044454485177993774, -0.004122458398342133, -0.0037994682788848877, -0.003476478159427643, -0.003153488039970398, -0.002830497920513153, -0.002507507801055908, -0.0021845176815986633, -0.0018615275621414185, -0.0015385374426841736, -0.0012155473232269287, -0.0008925572037696838, -0.000569567084312439, -0.0002465769648551941, 7.641315460205078e-05, 0.00039940327405929565, 0.0007223933935165405, 0.0010453835129737854, 0.0013683736324310303, 0.0016913637518882751, 0.00201435387134552, 0.002337343990802765, 0.0026603341102600098, 0.0029833242297172546, 0.0033063143491744995, 0.0036293044686317444, 0.003952294588088989, 0.004275284707546234, 0.004598274827003479, 0.004921264946460724, 0.005244255065917969, 0.005567245185375214, 0.0058902353048324585, 0.006213225424289703, 0.006536215543746948, 0.006859205663204193, 0.007182195782661438, 0.007505185902118683, 0.007828176021575928, 0.008151166141033173, 0.008474156260490417, 0.008797146379947662, 0.009120136499404907, 0.009443126618862152, 0.009766116738319397, 0.010089106857776642, 0.010412096977233887, 0.010735087096691132, 0.011058077216148376, 0.011381067335605621, 0.011704057455062866, 0.012027047574520111, 0.012350037693977356, 0.0126730278134346, 0.012996017932891846]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 4.0, 2.0, 2.0, 5.0, 9.0, 11.0, 6.0, 16.0, 12.0, 21.0, 18.0, 30.0, 27.0, 30.0, 29.0, 28.0, 40.0, 36.0, 47.0, 52.0, 59.0, 42.0, 42.0, 57.0, 40.0, 44.0, 34.0, 30.0, 32.0, 36.0, 20.0, 19.0, 22.0, 25.0, 10.0, 14.0, 9.0, 9.0, 9.0, 9.0, 8.0, 4.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-59.0, -57.33203125, -55.6640625, -53.99609375, -52.328125, -50.66015625, -48.9921875, -47.32421875, -45.65625, -43.98828125, -42.3203125, -40.65234375, -38.984375, -37.31640625, -35.6484375, -33.98046875, -32.3125, -30.64453125, -28.9765625, -27.30859375, -25.640625, -23.97265625, -22.3046875, -20.63671875, -18.96875, -17.30078125, -15.6328125, -13.96484375, -12.296875, -10.62890625, -8.9609375, -7.29296875, -5.625, -3.95703125, -2.2890625, -0.62109375, 1.046875, 2.71484375, 4.3828125, 6.05078125, 7.71875, 9.38671875, 11.0546875, 12.72265625, 14.390625, 16.05859375, 17.7265625, 19.39453125, 21.0625, 22.73046875, 24.3984375, 26.06640625, 27.734375, 29.40234375, 31.0703125, 32.73828125, 34.40625, 36.07421875, 37.7421875, 39.41015625, 41.078125, 42.74609375, 44.4140625, 46.08203125, 47.75]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 9.0, 6.0, 14.0, 22.0, 17.0, 43.0, 60.0, 86.0, 153.0, 240.0, 331.0, 531.0, 839.0, 1420.0, 2498.0, 4814.0, 10013.0, 27214.0, 104054.0, 602539.0, 219482.0, 45095.0, 14924.0, 6264.0, 3202.0, 1824.0, 1055.0, 630.0, 402.0, 269.0, 167.0, 110.0, 82.0, 50.0, 28.0, 24.0, 10.0, 9.0, 8.0, 7.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-92.5, -89.8349609375, -87.169921875, -84.5048828125, -81.83984375, -79.1748046875, -76.509765625, -73.8447265625, -71.1796875, -68.5146484375, -65.849609375, -63.1845703125, -60.51953125, -57.8544921875, -55.189453125, -52.5244140625, -49.859375, -47.1943359375, -44.529296875, -41.8642578125, -39.19921875, -36.5341796875, -33.869140625, -31.2041015625, -28.5390625, -25.8740234375, -23.208984375, -20.5439453125, -17.87890625, -15.2138671875, -12.548828125, -9.8837890625, -7.21875, -4.5537109375, -1.888671875, 0.7763671875, 3.44140625, 6.1064453125, 8.771484375, 11.4365234375, 14.1015625, 16.7666015625, 19.431640625, 22.0966796875, 24.76171875, 27.4267578125, 30.091796875, 32.7568359375, 35.421875, 38.0869140625, 40.751953125, 43.4169921875, 46.08203125, 48.7470703125, 51.412109375, 54.0771484375, 56.7421875, 59.4072265625, 62.072265625, 64.7373046875, 67.40234375, 70.0673828125, 72.732421875, 75.3974609375, 78.0625]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 3.0, 5.0, 2.0, 7.0, 14.0, 14.0, 25.0, 20.0, 19.0, 29.0, 32.0, 33.0, 43.0, 52.0, 49.0, 86.0, 163.0, 1583.0, 356.0, 117.0, 74.0, 55.0, 45.0, 40.0, 30.0, 21.0, 27.0, 18.0, 11.0, 17.0, 12.0, 13.0, 7.0, 5.0, 4.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-122.4375, -118.5869140625, -114.736328125, -110.8857421875, -107.03515625, -103.1845703125, -99.333984375, -95.4833984375, -91.6328125, -87.7822265625, -83.931640625, -80.0810546875, -76.23046875, -72.3798828125, -68.529296875, -64.6787109375, -60.828125, -56.9775390625, -53.126953125, -49.2763671875, -45.42578125, -41.5751953125, -37.724609375, -33.8740234375, -30.0234375, -26.1728515625, -22.322265625, -18.4716796875, -14.62109375, -10.7705078125, -6.919921875, -3.0693359375, 0.78125, 4.6318359375, 8.482421875, 12.3330078125, 16.18359375, 20.0341796875, 23.884765625, 27.7353515625, 31.5859375, 35.4365234375, 39.287109375, 43.1376953125, 46.98828125, 50.8388671875, 54.689453125, 58.5400390625, 62.390625, 66.2412109375, 70.091796875, 73.9423828125, 77.79296875, 81.6435546875, 85.494140625, 89.3447265625, 93.1953125, 97.0458984375, 100.896484375, 104.7470703125, 108.59765625, 112.4482421875, 116.298828125, 120.1494140625, 124.0]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 11.0, 4.0, 7.0, 11.0, 15.0, 25.0, 24.0, 22.0, 50.0, 68.0, 109.0, 183.0, 358.0, 993.0, 9459.0, 3114014.0, 18042.0, 1274.0, 437.0, 196.0, 129.0, 66.0, 44.0, 45.0, 27.0, 19.0, 25.0, 12.0, 10.0, 6.0, 2.0, 4.0, 2.0, 4.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-360.5, -349.43359375, -338.3671875, -327.30078125, -316.234375, -305.16796875, -294.1015625, -283.03515625, -271.96875, -260.90234375, -249.8359375, -238.76953125, -227.703125, -216.63671875, -205.5703125, -194.50390625, -183.4375, -172.37109375, -161.3046875, -150.23828125, -139.171875, -128.10546875, -117.0390625, -105.97265625, -94.90625, -83.83984375, -72.7734375, -61.70703125, -50.640625, -39.57421875, -28.5078125, -17.44140625, -6.375, 4.69140625, 15.7578125, 26.82421875, 37.890625, 48.95703125, 60.0234375, 71.08984375, 82.15625, 93.22265625, 104.2890625, 115.35546875, 126.421875, 137.48828125, 148.5546875, 159.62109375, 170.6875, 181.75390625, 192.8203125, 203.88671875, 214.953125, 226.01953125, 237.0859375, 248.15234375, 259.21875, 270.28515625, 281.3515625, 292.41796875, 303.484375, 314.55078125, 325.6171875, 336.68359375, 347.75]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 32.0, 367.0, 536.0, 70.0, 7.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-130.91610717773438, -105.24034118652344, -79.56456756591797, -53.8887939453125, -28.213027954101562, -2.537261962890625, 23.138519287109375, 48.81428527832031, 74.49005126953125, 100.16581726074219, 125.84159088134766, 151.51736450195312, 177.19313049316406, 202.868896484375, 228.544677734375, 254.22044372558594, 279.8962097167969, 305.5719909667969, 331.24774169921875, 356.92352294921875, 382.59930419921875, 408.2750549316406, 433.9508361816406, 459.6265869140625, 485.3023681640625, 510.9781494140625, 536.6539306640625, 562.3297119140625, 588.0054321289062, 613.6812133789062, 639.3569946289062, 665.0327758789062, 690.70849609375, 716.38427734375, 742.06005859375, 767.73583984375, 793.4115600585938, 819.0873413085938, 844.7631225585938, 870.4389038085938, 896.1146240234375, 921.7904052734375, 947.4661865234375, 973.1419677734375, 998.8176879882812, 1024.493408203125, 1050.169189453125, 1075.844970703125, 1101.520751953125, 1127.196533203125, 1152.872314453125, 1178.548095703125, 1204.223876953125, 1229.899658203125, 1255.575439453125, 1281.2510986328125, 1306.927001953125, 1332.602783203125, 1358.278564453125, 1383.954345703125, 1409.630126953125, 1435.305908203125, 1460.981689453125, 1486.6573486328125, 1512.3331298828125]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 8.0, 4.0, 4.0, 8.0, 6.0, 14.0, 10.0, 12.0, 14.0, 28.0, 22.0, 23.0, 29.0, 30.0, 34.0, 28.0, 33.0, 44.0, 43.0, 33.0, 38.0, 43.0, 33.0, 32.0, 46.0, 46.0, 40.0, 25.0, 36.0, 32.0, 30.0, 21.0, 21.0, 31.0, 18.0, 12.0, 16.0, 13.0, 4.0, 14.0, 4.0, 3.0, 8.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-214.01531982421875, -207.11715698242188, -200.21900939941406, -193.32086181640625, -186.42269897460938, -179.5245361328125, -172.6263885498047, -165.72824096679688, -158.830078125, -151.93191528320312, -145.0337677001953, -138.1356201171875, -131.23745727539062, -124.33930206298828, -117.44114685058594, -110.5429916381836, -103.64483642578125, -96.7466812133789, -89.84852600097656, -82.95037078857422, -76.05221557617188, -69.15406036376953, -62.25590515136719, -55.357749938964844, -48.4595947265625, -41.561439514160156, -34.66328430175781, -27.76512908935547, -20.866973876953125, -13.968818664550781, -7.0706634521484375, -0.17250823974609375, 6.7256317138671875, 13.623786926269531, 20.521942138671875, 27.42009735107422, 34.31825256347656, 41.216407775878906, 48.11456298828125, 55.012718200683594, 61.91087341308594, 68.80902862548828, 75.70718383789062, 82.60533905029297, 89.50349426269531, 96.40164947509766, 103.2998046875, 110.19795989990234, 117.09611511230469, 123.99427032470703, 130.89242553710938, 137.79058837890625, 144.68873596191406, 151.58688354492188, 158.48504638671875, 165.38320922851562, 172.28135681152344, 179.17950439453125, 186.07766723632812, 192.975830078125, 199.8739776611328, 206.77212524414062, 213.6702880859375, 220.56845092773438, 227.4665985107422]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 4.0, 4.0, 8.0, 10.0, 11.0, 13.0, 20.0, 18.0, 23.0, 29.0, 31.0, 38.0, 33.0, 40.0, 36.0, 38.0, 47.0, 44.0, 54.0, 50.0, 48.0, 57.0, 40.0, 34.0, 39.0, 29.0, 33.0, 20.0, 25.0, 21.0, 31.0, 12.0, 16.0, 9.0, 12.0, 4.0, 7.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-59.375, -57.5078125, -55.640625, -53.7734375, -51.90625, -50.0390625, -48.171875, -46.3046875, -44.4375, -42.5703125, -40.703125, -38.8359375, -36.96875, -35.1015625, -33.234375, -31.3671875, -29.5, -27.6328125, -25.765625, -23.8984375, -22.03125, -20.1640625, -18.296875, -16.4296875, -14.5625, -12.6953125, -10.828125, -8.9609375, -7.09375, -5.2265625, -3.359375, -1.4921875, 0.375, 2.2421875, 4.109375, 5.9765625, 7.84375, 9.7109375, 11.578125, 13.4453125, 15.3125, 17.1796875, 19.046875, 20.9140625, 22.78125, 24.6484375, 26.515625, 28.3828125, 30.25, 32.1171875, 33.984375, 35.8515625, 37.71875, 39.5859375, 41.453125, 43.3203125, 45.1875, 47.0546875, 48.921875, 50.7890625, 52.65625, 54.5234375, 56.390625, 58.2578125, 60.125]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 5.0, 1.0, 5.0, 7.0, 11.0, 10.0, 15.0, 19.0, 19.0, 46.0, 52.0, 81.0, 126.0, 149.0, 239.0, 374.0, 761.0, 1483.0, 3446.0, 10033.0, 39092.0, 1751273.0, 2322910.0, 46051.0, 10882.0, 3794.0, 1508.0, 731.0, 392.0, 240.0, 148.0, 95.0, 76.0, 54.0, 58.0, 27.0, 19.0, 24.0, 9.0, 11.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-300.5, -291.12109375, -281.7421875, -272.36328125, -262.984375, -253.60546875, -244.2265625, -234.84765625, -225.46875, -216.08984375, -206.7109375, -197.33203125, -187.953125, -178.57421875, -169.1953125, -159.81640625, -150.4375, -141.05859375, -131.6796875, -122.30078125, -112.921875, -103.54296875, -94.1640625, -84.78515625, -75.40625, -66.02734375, -56.6484375, -47.26953125, -37.890625, -28.51171875, -19.1328125, -9.75390625, -0.375, 9.00390625, 18.3828125, 27.76171875, 37.140625, 46.51953125, 55.8984375, 65.27734375, 74.65625, 84.03515625, 93.4140625, 102.79296875, 112.171875, 121.55078125, 130.9296875, 140.30859375, 149.6875, 159.06640625, 168.4453125, 177.82421875, 187.203125, 196.58203125, 205.9609375, 215.33984375, 224.71875, 234.09765625, 243.4765625, 252.85546875, 262.234375, 271.61328125, 280.9921875, 290.37109375, 299.75]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 4.0, 8.0, 14.0, 14.0, 16.0, 18.0, 37.0, 48.0, 72.0, 101.0, 147.0, 230.0, 413.0, 723.0, 940.0, 477.0, 272.0, 164.0, 110.0, 72.0, 45.0, 34.0, 22.0, 21.0, 15.0, 12.0, 10.0, 9.0, 6.0, 7.0, 4.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-174.625, -169.669921875, -164.71484375, -159.759765625, -154.8046875, -149.849609375, -144.89453125, -139.939453125, -134.984375, -130.029296875, -125.07421875, -120.119140625, -115.1640625, -110.208984375, -105.25390625, -100.298828125, -95.34375, -90.388671875, -85.43359375, -80.478515625, -75.5234375, -70.568359375, -65.61328125, -60.658203125, -55.703125, -50.748046875, -45.79296875, -40.837890625, -35.8828125, -30.927734375, -25.97265625, -21.017578125, -16.0625, -11.107421875, -6.15234375, -1.197265625, 3.7578125, 8.712890625, 13.66796875, 18.623046875, 23.578125, 28.533203125, 33.48828125, 38.443359375, 43.3984375, 48.353515625, 53.30859375, 58.263671875, 63.21875, 68.173828125, 73.12890625, 78.083984375, 83.0390625, 87.994140625, 92.94921875, 97.904296875, 102.859375, 107.814453125, 112.76953125, 117.724609375, 122.6796875, 127.634765625, 132.58984375, 137.544921875, 142.5]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 3.0, 4.0, 3.0, 2.0, 12.0, 6.0, 16.0, 21.0, 26.0, 30.0, 41.0, 79.0, 86.0, 129.0, 178.0, 306.0, 428.0, 640.0, 958.0, 1589.0, 2536.0, 4349.0, 7745.0, 14676.0, 30259.0, 73729.0, 236167.0, 2034237.0, 1464963.0, 197486.0, 65020.0, 27341.0, 13124.0, 7242.0, 4007.0, 2476.0, 1529.0, 969.0, 576.0, 428.0, 270.0, 168.0, 130.0, 80.0, 66.0, 57.0, 35.0, 19.0, 20.0, 8.0, 8.0, 6.0, 4.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-94.4375, -91.384765625, -88.33203125, -85.279296875, -82.2265625, -79.173828125, -76.12109375, -73.068359375, -70.015625, -66.962890625, -63.91015625, -60.857421875, -57.8046875, -54.751953125, -51.69921875, -48.646484375, -45.59375, -42.541015625, -39.48828125, -36.435546875, -33.3828125, -30.330078125, -27.27734375, -24.224609375, -21.171875, -18.119140625, -15.06640625, -12.013671875, -8.9609375, -5.908203125, -2.85546875, 0.197265625, 3.25, 6.302734375, 9.35546875, 12.408203125, 15.4609375, 18.513671875, 21.56640625, 24.619140625, 27.671875, 30.724609375, 33.77734375, 36.830078125, 39.8828125, 42.935546875, 45.98828125, 49.041015625, 52.09375, 55.146484375, 58.19921875, 61.251953125, 64.3046875, 67.357421875, 70.41015625, 73.462890625, 76.515625, 79.568359375, 82.62109375, 85.673828125, 88.7265625, 91.779296875, 94.83203125, 97.884765625, 100.9375]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 5.0, 16.0, 4.0, 15.0, 20.0, 19.0, 31.0, 46.0, 80.0, 113.0, 148.0, 158.0, 102.0, 74.0, 54.0, 35.0, 24.0, 13.0, 15.0, 8.0, 5.0, 4.0, 8.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-1648.35986328125, -1612.5533447265625, -1576.746826171875, -1540.9403076171875, -1505.1337890625, -1469.3272705078125, -1433.520751953125, -1397.714111328125, -1361.90771484375, -1326.1011962890625, -1290.294677734375, -1254.4881591796875, -1218.681640625, -1182.8751220703125, -1147.068603515625, -1111.261962890625, -1075.4554443359375, -1039.64892578125, -1003.8424072265625, -968.035888671875, -932.2293701171875, -896.4228515625, -860.6162719726562, -824.8097534179688, -789.0032348632812, -753.1967163085938, -717.3901977539062, -681.5836791992188, -645.777099609375, -609.9705810546875, -574.1640625, -538.3575439453125, -502.55108642578125, -466.74456787109375, -430.93804931640625, -395.1315002441406, -359.3249816894531, -323.5184631347656, -287.7119140625, -251.9053955078125, -216.098876953125, -180.2923583984375, -144.48582458496094, -108.6792984008789, -72.87277221679688, -37.066253662109375, -1.2597198486328125, 34.54681396484375, 70.35333251953125, 106.15985870361328, 141.9663848876953, 177.77291870117188, 213.57943725585938, 249.38595581054688, 285.1925048828125, 320.9990234375, 356.8055419921875, 392.612060546875, 428.4185791015625, 464.2251281738281, 500.0316467285156, 535.838134765625, 571.6447143554688, 607.4512329101562, 643.2577514648438]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 6.0, 2.0, 0.0, 7.0, 5.0, 8.0, 10.0, 13.0, 16.0, 16.0, 18.0, 26.0, 18.0, 35.0, 25.0, 19.0, 40.0, 41.0, 31.0, 48.0, 64.0, 40.0, 39.0, 41.0, 32.0, 42.0, 50.0, 35.0, 41.0, 28.0, 38.0, 24.0, 27.0, 26.0, 16.0, 14.0, 15.0, 15.0, 9.0, 7.0, 6.0, 3.0, 6.0, 2.0, 4.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-533.291259765625, -515.253662109375, -497.2160339355469, -479.1784362792969, -461.14080810546875, -443.10321044921875, -425.06561279296875, -407.02801513671875, -388.9903869628906, -370.9527893066406, -352.9151611328125, -334.8775634765625, -316.8399658203125, -298.8023376464844, -280.7647399902344, -262.72711181640625, -244.68951416015625, -226.6519012451172, -208.61428833007812, -190.57669067382812, -172.53907775878906, -154.50146484375, -136.4638671875, -118.42625427246094, -100.38864135742188, -82.35102844238281, -64.31342315673828, -46.275814056396484, -28.238204956054688, -10.200592041015625, 7.837013244628906, 25.874618530273438, 43.91229248046875, 61.94990158081055, 79.98751068115234, 98.02511596679688, 116.06272888183594, 134.100341796875, 152.137939453125, 170.17555236816406, 188.21316528320312, 206.2507781982422, 224.28839111328125, 242.32598876953125, 260.36358642578125, 278.4012145996094, 296.4388122558594, 314.4764404296875, 332.5140380859375, 350.5516357421875, 368.5892639160156, 386.6268615722656, 404.66448974609375, 422.70208740234375, 440.73968505859375, 458.77728271484375, 476.8149108886719, 494.8525085449219, 512.89013671875, 530.927734375, 548.96533203125, 567.0029296875, 585.0405883789062, 603.0781860351562, 621.1157836914062]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 3.0, 5.0, 9.0, 9.0, 14.0, 16.0, 13.0, 19.0, 15.0, 23.0, 26.0, 22.0, 29.0, 31.0, 37.0, 37.0, 41.0, 50.0, 60.0, 41.0, 50.0, 33.0, 50.0, 41.0, 38.0, 38.0, 36.0, 29.0, 27.0, 34.0, 20.0, 18.0, 19.0, 11.0, 15.0, 7.0, 11.0, 7.0, 6.0, 7.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-920.5, -890.90625, -861.3125, -831.71875, -802.125, -772.53125, -742.9375, -713.34375, -683.75, -654.15625, -624.5625, -594.96875, -565.375, -535.78125, -506.1875, -476.59375, -447.0, -417.40625, -387.8125, -358.21875, -328.625, -299.03125, -269.4375, -239.84375, -210.25, -180.65625, -151.0625, -121.46875, -91.875, -62.28125, -32.6875, -3.09375, 26.5, 56.09375, 85.6875, 115.28125, 144.875, 174.46875, 204.0625, 233.65625, 263.25, 292.84375, 322.4375, 352.03125, 381.625, 411.21875, 440.8125, 470.40625, 500.0, 529.59375, 559.1875, 588.78125, 618.375, 647.96875, 677.5625, 707.15625, 736.75, 766.34375, 795.9375, 825.53125, 855.125, 884.71875, 914.3125, 943.90625, 973.5]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 5.0, 8.0, 2.0, 9.0, 12.0, 19.0, 19.0, 43.0, 56.0, 69.0, 97.0, 164.0, 256.0, 367.0, 505.0, 818.0, 1246.0, 1950.0, 2927.0, 4547.0, 7066.0, 11052.0, 17783.0, 28347.0, 46726.0, 82804.0, 175213.0, 322555.0, 153438.0, 75195.0, 42968.0, 26551.0, 16423.0, 10434.0, 6612.0, 4255.0, 2684.0, 1835.0, 1145.0, 770.0, 556.0, 331.0, 207.0, 153.0, 114.0, 84.0, 38.0, 34.0, 29.0, 16.0, 15.0, 4.0, 5.0, 4.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-50.625, -48.9609375, -47.296875, -45.6328125, -43.96875, -42.3046875, -40.640625, -38.9765625, -37.3125, -35.6484375, -33.984375, -32.3203125, -30.65625, -28.9921875, -27.328125, -25.6640625, -24.0, -22.3359375, -20.671875, -19.0078125, -17.34375, -15.6796875, -14.015625, -12.3515625, -10.6875, -9.0234375, -7.359375, -5.6953125, -4.03125, -2.3671875, -0.703125, 0.9609375, 2.625, 4.2890625, 5.953125, 7.6171875, 9.28125, 10.9453125, 12.609375, 14.2734375, 15.9375, 17.6015625, 19.265625, 20.9296875, 22.59375, 24.2578125, 25.921875, 27.5859375, 29.25, 30.9140625, 32.578125, 34.2421875, 35.90625, 37.5703125, 39.234375, 40.8984375, 42.5625, 44.2265625, 45.890625, 47.5546875, 49.21875, 50.8828125, 52.546875, 54.2109375, 55.875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 5.0, 7.0, 7.0, 6.0, 9.0, 4.0, 6.0, 15.0, 10.0, 13.0, 20.0, 11.0, 26.0, 23.0, 23.0, 22.0, 28.0, 30.0, 30.0, 34.0, 29.0, 32.0, 23.0, 42.0, 31.0, 1069.0, 42.0, 38.0, 30.0, 23.0, 36.0, 35.0, 32.0, 28.0, 20.0, 20.0, 16.0, 23.0, 18.0, 17.0, 10.0, 14.0, 11.0, 14.0, 8.0, 10.0, 8.0, 10.0, 6.0, 4.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-512.0, -496.5078125, -481.015625, -465.5234375, -450.03125, -434.5390625, -419.046875, -403.5546875, -388.0625, -372.5703125, -357.078125, -341.5859375, -326.09375, -310.6015625, -295.109375, -279.6171875, -264.125, -248.6328125, -233.140625, -217.6484375, -202.15625, -186.6640625, -171.171875, -155.6796875, -140.1875, -124.6953125, -109.203125, -93.7109375, -78.21875, -62.7265625, -47.234375, -31.7421875, -16.25, -0.7578125, 14.734375, 30.2265625, 45.71875, 61.2109375, 76.703125, 92.1953125, 107.6875, 123.1796875, 138.671875, 154.1640625, 169.65625, 185.1484375, 200.640625, 216.1328125, 231.625, 247.1171875, 262.609375, 278.1015625, 293.59375, 309.0859375, 324.578125, 340.0703125, 355.5625, 371.0546875, 386.546875, 402.0390625, 417.53125, 433.0234375, 448.515625, 464.0078125, 479.5]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 5.0, 4.0, 6.0, 4.0, 13.0, 16.0, 19.0, 30.0, 40.0, 71.0, 115.0, 128.0, 202.0, 311.0, 418.0, 662.0, 915.0, 1363.0, 2153.0, 3124.0, 4673.0, 6814.0, 10133.0, 15228.0, 23142.0, 36628.0, 60467.0, 109757.0, 955601.0, 593071.0, 107422.0, 59821.0, 35889.0, 22916.0, 15148.0, 10041.0, 6748.0, 4500.0, 3091.0, 2093.0, 1361.0, 976.0, 629.0, 439.0, 340.0, 193.0, 139.0, 99.0, 67.0, 45.0, 28.0, 24.0, 12.0, 9.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-38.15625, -36.9248046875, -35.693359375, -34.4619140625, -33.23046875, -31.9990234375, -30.767578125, -29.5361328125, -28.3046875, -27.0732421875, -25.841796875, -24.6103515625, -23.37890625, -22.1474609375, -20.916015625, -19.6845703125, -18.453125, -17.2216796875, -15.990234375, -14.7587890625, -13.52734375, -12.2958984375, -11.064453125, -9.8330078125, -8.6015625, -7.3701171875, -6.138671875, -4.9072265625, -3.67578125, -2.4443359375, -1.212890625, 0.0185546875, 1.25, 2.4814453125, 3.712890625, 4.9443359375, 6.17578125, 7.4072265625, 8.638671875, 9.8701171875, 11.1015625, 12.3330078125, 13.564453125, 14.7958984375, 16.02734375, 17.2587890625, 18.490234375, 19.7216796875, 20.953125, 22.1845703125, 23.416015625, 24.6474609375, 25.87890625, 27.1103515625, 28.341796875, 29.5732421875, 30.8046875, 32.0361328125, 33.267578125, 34.4990234375, 35.73046875, 36.9619140625, 38.193359375, 39.4248046875, 40.65625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 6.0, 10.0, 13.0, 13.0, 9.0, 6.0, 18.0, 24.0, 35.0, 28.0, 51.0, 53.0, 62.0, 93.0, 115.0, 122.0, 63.0, 55.0, 48.0, 43.0, 34.0, 26.0, 11.0, 15.0, 15.0, 5.0, 9.0, 4.0, 3.0, 3.0, 0.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.74609375, -0.7225189208984375, -0.698944091796875, -0.6753692626953125, -0.65179443359375, -0.6282196044921875, -0.604644775390625, -0.5810699462890625, -0.5574951171875, -0.5339202880859375, -0.510345458984375, -0.4867706298828125, -0.46319580078125, -0.4396209716796875, -0.416046142578125, -0.3924713134765625, -0.368896484375, -0.3453216552734375, -0.321746826171875, -0.2981719970703125, -0.27459716796875, -0.2510223388671875, -0.227447509765625, -0.2038726806640625, -0.1802978515625, -0.1567230224609375, -0.133148193359375, -0.1095733642578125, -0.08599853515625, -0.0624237060546875, -0.038848876953125, -0.0152740478515625, 0.00830078125, 0.0318756103515625, 0.055450439453125, 0.0790252685546875, 0.10260009765625, 0.1261749267578125, 0.149749755859375, 0.1733245849609375, 0.1968994140625, 0.2204742431640625, 0.244049072265625, 0.2676239013671875, 0.29119873046875, 0.3147735595703125, 0.338348388671875, 0.3619232177734375, 0.385498046875, 0.4090728759765625, 0.432647705078125, 0.4562225341796875, 0.47979736328125, 0.5033721923828125, 0.526947021484375, 0.5505218505859375, 0.5740966796875, 0.5976715087890625, 0.621246337890625, 0.6448211669921875, 0.66839599609375, 0.6919708251953125, 0.715545654296875, 0.7391204833984375, 0.7626953125]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 7.0, 7.0, 10.0, 10.0, 14.0, 13.0, 38.0, 48.0, 106.0, 149.0, 233.0, 446.0, 795.0, 1572.0, 3228.0, 7210.0, 17192.0, 47626.0, 179364.0, 595573.0, 130789.0, 38184.0, 14056.0, 6134.0, 2693.0, 1320.0, 687.0, 381.0, 235.0, 144.0, 98.0, 68.0, 46.0, 23.0, 19.0, 7.0, 5.0, 4.0, 4.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.978515625, -3.855712890625, -3.73291015625, -3.610107421875, -3.4873046875, -3.364501953125, -3.24169921875, -3.118896484375, -2.99609375, -2.873291015625, -2.75048828125, -2.627685546875, -2.5048828125, -2.382080078125, -2.25927734375, -2.136474609375, -2.013671875, -1.890869140625, -1.76806640625, -1.645263671875, -1.5224609375, -1.399658203125, -1.27685546875, -1.154052734375, -1.03125, -0.908447265625, -0.78564453125, -0.662841796875, -0.5400390625, -0.417236328125, -0.29443359375, -0.171630859375, -0.048828125, 0.073974609375, 0.19677734375, 0.319580078125, 0.4423828125, 0.565185546875, 0.68798828125, 0.810791015625, 0.93359375, 1.056396484375, 1.17919921875, 1.302001953125, 1.4248046875, 1.547607421875, 1.67041015625, 1.793212890625, 1.916015625, 2.038818359375, 2.16162109375, 2.284423828125, 2.4072265625, 2.530029296875, 2.65283203125, 2.775634765625, 2.8984375, 3.021240234375, 3.14404296875, 3.266845703125, 3.3896484375, 3.512451171875, 3.63525390625, 3.758056640625, 3.880859375]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 1.0, 2.0, 3.0, 7.0, 4.0, 3.0, 11.0, 7.0, 9.0, 20.0, 11.0, 28.0, 24.0, 30.0, 54.0, 57.0, 86.0, 105.0, 122.0, 107.0, 104.0, 41.0, 48.0, 29.0, 24.0, 21.0, 7.0, 17.0, 7.0, 9.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8118162751197815, -0.789677083492279, -0.7675378322601318, -0.7453986406326294, -0.7232593894004822, -0.7011201977729797, -0.6789809465408325, -0.6568417549133301, -0.6347025036811829, -0.6125633120536804, -0.5904240608215332, -0.5682848691940308, -0.5461456179618835, -0.5240064263343811, -0.5018671751022339, -0.47972798347473145, -0.4575887620449066, -0.4354495406150818, -0.41331031918525696, -0.39117109775543213, -0.3690318763256073, -0.34689265489578247, -0.32475346326828003, -0.3026142120361328, -0.28047502040863037, -0.25833579897880554, -0.2361965775489807, -0.21405735611915588, -0.19191813468933105, -0.16977891325950623, -0.1476397067308426, -0.12550048530101776, -0.10336124897003174, -0.08122202754020691, -0.05908280983567238, -0.03694359213113785, -0.014804370701313019, 0.00733485072851181, 0.029474064707756042, 0.05161328613758087, 0.0737525075674057, 0.09589172899723053, 0.11803095042705536, 0.140170156955719, 0.16230937838554382, 0.18444859981536865, 0.20658782124519348, 0.2287270426750183, 0.25086626410484314, 0.27300548553466797, 0.2951447069644928, 0.3172839283943176, 0.33942314982414246, 0.3615623712539673, 0.3837015628814697, 0.40584081411361694, 0.4279800057411194, 0.4501192271709442, 0.47225844860076904, 0.49439767003059387, 0.5165368914604187, 0.5386760830879211, 0.5608153343200684, 0.5829545259475708, 0.605093777179718]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 7.0, 3.0, 9.0, 5.0, 15.0, 16.0, 22.0, 7.0, 29.0, 19.0, 33.0, 33.0, 28.0, 27.0, 43.0, 42.0, 43.0, 38.0, 43.0, 29.0, 43.0, 34.0, 46.0, 52.0, 39.0, 28.0, 33.0, 26.0, 26.0, 25.0, 35.0, 24.0, 13.0, 16.0, 16.0, 16.0, 15.0, 10.0, 7.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0], "bins": [-0.4518786072731018, -0.438330739736557, -0.4247828423976898, -0.411234974861145, -0.3976871073246002, -0.3841392397880554, -0.37059134244918823, -0.35704347491264343, -0.34349560737609863, -0.32994773983955383, -0.31639984250068665, -0.30285197496414185, -0.28930410742759705, -0.27575623989105225, -0.26220834255218506, -0.24866047501564026, -0.23511257767677307, -0.22156469523906708, -0.20801682770252228, -0.19446894526481628, -0.18092107772827148, -0.1673731952905655, -0.1538253128528595, -0.1402774453163147, -0.1267295628786087, -0.1131816878914833, -0.09963381290435791, -0.08608593046665192, -0.07253805547952652, -0.05899018049240112, -0.04544229805469513, -0.03189442306756973, -0.018346548080444336, -0.00479867123067379, 0.008749205619096756, 0.02229708433151245, 0.03584495931863785, 0.049392834305763245, 0.06294071674346924, 0.07648859173059464, 0.09003646671772003, 0.10358434170484543, 0.11713221669197083, 0.13068009912967682, 0.1442279815673828, 0.1577758491039276, 0.1713237315416336, 0.1848716139793396, 0.1984194815158844, 0.2119673639535904, 0.2255152314901352, 0.2390631139278412, 0.252610981464386, 0.2661588788032532, 0.279706746339798, 0.2932546138763428, 0.30680251121520996, 0.32035037875175476, 0.33389827609062195, 0.34744614362716675, 0.36099401116371155, 0.37454187870025635, 0.38808977603912354, 0.40163764357566833, 0.41518551111221313]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 3.0, 6.0, 8.0, 9.0, 14.0, 16.0, 13.0, 19.0, 16.0, 23.0, 25.0, 23.0, 29.0, 30.0, 38.0, 38.0, 39.0, 53.0, 56.0, 43.0, 49.0, 32.0, 52.0, 41.0, 37.0, 39.0, 34.0, 30.0, 27.0, 34.0, 19.0, 19.0, 18.0, 14.0, 13.0, 6.0, 12.0, 6.0, 7.0, 7.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-919.0, -889.4375, -859.875, -830.3125, -800.75, -771.1875, -741.625, -712.0625, -682.5, -652.9375, -623.375, -593.8125, -564.25, -534.6875, -505.125, -475.5625, -446.0, -416.4375, -386.875, -357.3125, -327.75, -298.1875, -268.625, -239.0625, -209.5, -179.9375, -150.375, -120.8125, -91.25, -61.6875, -32.125, -2.5625, 27.0, 56.5625, 86.125, 115.6875, 145.25, 174.8125, 204.375, 233.9375, 263.5, 293.0625, 322.625, 352.1875, 381.75, 411.3125, 440.875, 470.4375, 500.0, 529.5625, 559.125, 588.6875, 618.25, 647.8125, 677.375, 706.9375, 736.5, 766.0625, 795.625, 825.1875, 854.75, 884.3125, 913.875, 943.4375, 973.0]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 2.0, 2.0, 5.0, 4.0, 12.0, 16.0, 20.0, 24.0, 45.0, 46.0, 54.0, 100.0, 159.0, 214.0, 288.0, 409.0, 669.0, 1048.0, 1888.0, 3913.0, 9219.0, 26162.0, 89134.0, 470477.0, 337968.0, 69707.0, 21220.0, 7798.0, 3489.0, 1649.0, 928.0, 564.0, 395.0, 275.0, 181.0, 146.0, 96.0, 66.0, 40.0, 30.0, 27.0, 18.0, 17.0, 8.0, 6.0, 9.0, 3.0, 5.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-170.125, -164.79296875, -159.4609375, -154.12890625, -148.796875, -143.46484375, -138.1328125, -132.80078125, -127.46875, -122.13671875, -116.8046875, -111.47265625, -106.140625, -100.80859375, -95.4765625, -90.14453125, -84.8125, -79.48046875, -74.1484375, -68.81640625, -63.484375, -58.15234375, -52.8203125, -47.48828125, -42.15625, -36.82421875, -31.4921875, -26.16015625, -20.828125, -15.49609375, -10.1640625, -4.83203125, 0.5, 5.83203125, 11.1640625, 16.49609375, 21.828125, 27.16015625, 32.4921875, 37.82421875, 43.15625, 48.48828125, 53.8203125, 59.15234375, 64.484375, 69.81640625, 75.1484375, 80.48046875, 85.8125, 91.14453125, 96.4765625, 101.80859375, 107.140625, 112.47265625, 117.8046875, 123.13671875, 128.46875, 133.80078125, 139.1328125, 144.46484375, 149.796875, 155.12890625, 160.4609375, 165.79296875, 171.125]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 3.0, 1.0, 5.0, 6.0, 9.0, 12.0, 12.0, 20.0, 12.0, 21.0, 31.0, 31.0, 28.0, 40.0, 50.0, 40.0, 46.0, 40.0, 49.0, 2091.0, 72.0, 52.0, 52.0, 52.0, 42.0, 40.0, 45.0, 25.0, 23.0, 21.0, 14.0, 15.0, 12.0, 11.0, 9.0, 4.0, 1.0, 10.0, 4.0, 4.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1075.0, -1039.875, -1004.75, -969.625, -934.5, -899.375, -864.25, -829.125, -794.0, -758.875, -723.75, -688.625, -653.5, -618.375, -583.25, -548.125, -513.0, -477.875, -442.75, -407.625, -372.5, -337.375, -302.25, -267.125, -232.0, -196.875, -161.75, -126.625, -91.5, -56.375, -21.25, 13.875, 49.0, 84.125, 119.25, 154.375, 189.5, 224.625, 259.75, 294.875, 330.0, 365.125, 400.25, 435.375, 470.5, 505.625, 540.75, 575.875, 611.0, 646.125, 681.25, 716.375, 751.5, 786.625, 821.75, 856.875, 892.0, 927.125, 962.25, 997.375, 1032.5, 1067.625, 1102.75, 1137.875, 1173.0]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 4.0, 7.0, 11.0, 13.0, 21.0, 26.0, 38.0, 46.0, 59.0, 92.0, 115.0, 157.0, 220.0, 325.0, 582.0, 1162.0, 3019.0, 12271.0, 88575.0, 2855293.0, 157423.0, 18857.0, 4012.0, 1414.0, 662.0, 419.0, 224.0, 173.0, 150.0, 91.0, 59.0, 40.0, 40.0, 30.0, 28.0, 15.0, 9.0, 8.0, 7.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-224.25, -217.375, -210.5, -203.625, -196.75, -189.875, -183.0, -176.125, -169.25, -162.375, -155.5, -148.625, -141.75, -134.875, -128.0, -121.125, -114.25, -107.375, -100.5, -93.625, -86.75, -79.875, -73.0, -66.125, -59.25, -52.375, -45.5, -38.625, -31.75, -24.875, -18.0, -11.125, -4.25, 2.625, 9.5, 16.375, 23.25, 30.125, 37.0, 43.875, 50.75, 57.625, 64.5, 71.375, 78.25, 85.125, 92.0, 98.875, 105.75, 112.625, 119.5, 126.375, 133.25, 140.125, 147.0, 153.875, 160.75, 167.625, 174.5, 181.375, 188.25, 195.125, 202.0, 208.875, 215.75]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 2.0, 5.0, 2.0, 2.0, 3.0, 2.0, 7.0, 7.0, 20.0, 28.0, 51.0, 83.0, 167.0, 248.0, 179.0, 81.0, 41.0, 29.0, 15.0, 13.0, 9.0, 8.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2271.555908203125, -2210.9716796875, -2150.3876953125, -2089.803466796875, -2029.21923828125, -1968.6351318359375, -1908.051025390625, -1847.466796875, -1786.8826904296875, -1726.298583984375, -1665.71435546875, -1605.1302490234375, -1544.546142578125, -1483.9619140625, -1423.3778076171875, -1362.793701171875, -1302.20947265625, -1241.6253662109375, -1181.0411376953125, -1120.45703125, -1059.872802734375, -999.2886962890625, -938.70458984375, -878.1204223632812, -817.5362548828125, -756.9520874023438, -696.367919921875, -635.7838134765625, -575.1996459960938, -514.615478515625, -454.0313415527344, -393.44720458984375, -332.8631591796875, -272.27899169921875, -211.69485473632812, -151.11070251464844, -90.52655029296875, -29.9423828125, 30.641754150390625, 91.22589111328125, 151.81005859375, 212.3942108154297, 272.9783630371094, 333.5625, 394.14666748046875, 454.7308349609375, 515.31494140625, 575.8991088867188, 636.4832763671875, 697.0674438476562, 757.651611328125, 818.2357177734375, 878.8198852539062, 939.404052734375, 999.9881591796875, 1060.572265625, 1121.156494140625, 1181.7406005859375, 1242.3248291015625, 1302.908935546875, 1363.4931640625, 1424.0772705078125, 1484.661376953125, 1545.24560546875, 1605.8297119140625]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 5.0, 0.0, 4.0, 1.0, 7.0, 7.0, 10.0, 6.0, 9.0, 12.0, 25.0, 23.0, 20.0, 20.0, 33.0, 30.0, 43.0, 33.0, 39.0, 37.0, 50.0, 40.0, 52.0, 40.0, 50.0, 43.0, 43.0, 47.0, 31.0, 44.0, 29.0, 39.0, 25.0, 25.0, 19.0, 13.0, 18.0, 3.0, 7.0, 5.0, 4.0, 3.0, 9.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1486.2545166015625, -1440.933349609375, -1395.6123046875, -1350.2911376953125, -1304.969970703125, -1259.6488037109375, -1214.32763671875, -1169.006591796875, -1123.6854248046875, -1078.3642578125, -1033.043212890625, -987.7220458984375, -942.40087890625, -897.0797119140625, -851.7586059570312, -806.4375, -761.1163330078125, -715.795166015625, -670.4740600585938, -625.1529541015625, -579.831787109375, -534.5106201171875, -489.18951416015625, -443.8683776855469, -398.5472412109375, -353.2261047363281, -307.90496826171875, -262.5838317871094, -217.2626953125, -171.94155883789062, -126.62042236328125, -81.29928588867188, -35.9781494140625, 9.342987060546875, 54.66412353515625, 99.98526000976562, 145.306396484375, 190.62753295898438, 235.94866943359375, 281.2698059082031, 326.5909423828125, 371.9120788574219, 417.23321533203125, 462.5543518066406, 507.87548828125, 553.1966552734375, 598.5177612304688, 643.8388671875, 689.1600341796875, 734.481201171875, 779.8023071289062, 825.1234130859375, 870.444580078125, 915.7657470703125, 961.0868530273438, 1006.407958984375, 1051.7291259765625, 1097.05029296875, 1142.371337890625, 1187.6925048828125, 1233.013671875, 1278.3348388671875, 1323.656005859375, 1368.97705078125, 1414.2982177734375]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 4.0, 5.0, 2.0, 9.0, 6.0, 15.0, 11.0, 12.0, 16.0, 14.0, 23.0, 19.0, 18.0, 40.0, 37.0, 46.0, 92.0, 171.0, 281.0, 410.0, 609.0, 2672.0, 1042215.0, 659.0, 409.0, 262.0, 150.0, 81.0, 49.0, 26.0, 35.0, 31.0, 20.0, 21.0, 19.0, 13.0, 11.0, 7.0, 13.0, 10.0, 3.0, 7.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-677.0581665039062, -649.98095703125, -622.9037475585938, -595.8265380859375, -568.749267578125, -541.672119140625, -514.5948486328125, -487.51763916015625, -460.4404296875, -433.36322021484375, -406.2860107421875, -379.2087707519531, -352.1315612792969, -325.0543518066406, -297.97711181640625, -270.89990234375, -243.82269287109375, -216.7454833984375, -189.6682586669922, -162.59103393554688, -135.51382446289062, -108.43661499023438, -81.35939025878906, -54.28216552734375, -27.2049560546875, -0.12773895263671875, 26.949478149414062, 54.026695251464844, 81.10391235351562, 108.18112182617188, 135.2583465576172, 162.3355712890625, 189.412841796875, 216.49005126953125, 243.56727600097656, 270.6445007324219, 297.7217102050781, 324.7989196777344, 351.87615966796875, 378.953369140625, 406.03057861328125, 433.1077880859375, 460.18499755859375, 487.2622375488281, 514.3394775390625, 541.4166259765625, 568.493896484375, 595.5711059570312, 622.6483154296875, 649.7255249023438, 676.802734375, 703.8799438476562, 730.9571533203125, 758.034423828125, 785.1116333007812, 812.1888427734375, 839.2660522460938, 866.34326171875, 893.4204711914062, 920.4976806640625, 947.574951171875, 974.652099609375, 1001.7293701171875, 1028.806640625, 1055.8837890625]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 5.0, 1.0, 7.0, 10.0, 6.0, 10.0, 12.0, 15.0, 17.0, 34.0, 114.0, 257.0, 3928.0, 51458124.0, 397.0, 119.0, 50.0, 18.0, 10.0, 5.0, 8.0, 2.0, 5.0, 1.0, 1.0, 1.0, 6.0, 2.0, 4.0, 2.0, 2.0, 2.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2840.538330078125, -2665.204345703125, -2489.870361328125, -2314.5361328125, -2139.2021484375, -1963.8682861328125, -1788.5341796875, -1613.2001953125, -1437.8662109375, -1262.5322265625, -1087.1982421875, -911.8641357421875, -736.5301513671875, -561.1961669921875, -385.862060546875, -210.528076171875, -35.194091796875, 140.13992309570312, 315.47393798828125, 490.8079833984375, 666.1419677734375, 841.4759521484375, 1016.81005859375, 1192.14404296875, 1367.47802734375, 1542.81201171875, 1718.14599609375, 1893.4801025390625, 2068.81396484375, 2244.14794921875, 2419.482177734375, 2594.816162109375, 2770.150390625, 2945.484375, 3120.818359375, 3296.15234375, 3471.486328125, 3646.8203125, 3822.154541015625, 3997.488525390625, 4172.822265625, 4348.15625, 4523.490234375, 4698.82421875, 4874.158203125, 5049.4921875, 5224.826171875, 5400.16015625, 5575.49462890625, 5750.82861328125, 5926.16259765625, 6101.49658203125, 6276.83056640625, 6452.16455078125, 6627.4990234375, 6802.8330078125, 6978.1669921875, 7153.5009765625, 7328.8349609375, 7504.1689453125, 7679.5029296875, 7854.8369140625, 8030.1708984375, 8205.5048828125, 8380.8388671875]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [3.0, 4.0, 8.0, 6.0, 7.0, 18.0, 21.0, 27.0, 48.0, 57.0, 80.0, 143.0, 195.0, 283.0, 460.0, 626.0, 1066.0, 1528.0, 2296.0, 3545.0, 5335.0, 8315.0, 13133.0, 20142.0, 32477.0, 51256.0, 82950.0, 136849.0, 242663.0, 481360.0, 3977069.0, 565175.0, 270697.0, 150370.0, 90540.0, 55660.0, 35234.0, 22054.0, 14131.0, 9000.0, 5720.0, 3761.0, 2417.0, 1566.0, 1042.0, 703.0, 440.0, 311.0, 216.0, 135.0, 89.0, 56.0, 60.0, 38.0, 19.0, 10.0, 8.0, 9.0, 7.0, 8.0, 3.0, 3.0, 3.0, 1.0], "bins": [-16.921875, -16.36865234375, -15.8154296875, -15.26220703125, -14.708984375, -14.15576171875, -13.6025390625, -13.04931640625, -12.49609375, -11.94287109375, -11.3896484375, -10.83642578125, -10.283203125, -9.72998046875, -9.1767578125, -8.62353515625, -8.0703125, -7.51708984375, -6.9638671875, -6.41064453125, -5.857421875, -5.30419921875, -4.7509765625, -4.19775390625, -3.64453125, -3.09130859375, -2.5380859375, -1.98486328125, -1.431640625, -0.87841796875, -0.3251953125, 0.22802734375, 0.78125, 1.33447265625, 1.8876953125, 2.44091796875, 2.994140625, 3.54736328125, 4.1005859375, 4.65380859375, 5.20703125, 5.76025390625, 6.3134765625, 6.86669921875, 7.419921875, 7.97314453125, 8.5263671875, 9.07958984375, 9.6328125, 10.18603515625, 10.7392578125, 11.29248046875, 11.845703125, 12.39892578125, 12.9521484375, 13.50537109375, 14.05859375, 14.61181640625, 15.1650390625, 15.71826171875, 16.271484375, 16.82470703125, 17.3779296875, 17.93115234375, 18.484375]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 4.0, 1.0, 2.0, 0.0, 1.0, 5.0, 6.0, 5.0, 8.0, 9.0, 7.0, 13.0, 9.0, 13.0, 18.0, 17.0, 18.0, 23.0, 31.0, 22.0, 26.0, 26.0, 28.0, 44.0, 28.0, 43.0, 32.0, 20.0, 56.0, 992.0, 63.0, 27.0, 25.0, 40.0, 37.0, 31.0, 30.0, 28.0, 28.0, 28.0, 16.0, 24.0, 23.0, 23.0, 14.0, 18.0, 9.0, 13.0, 14.0, 10.0, 4.0, 5.0, 3.0, 5.0, 3.0, 4.0, 3.0, 3.0, 2.0, 3.0], "bins": [-197.375, -191.486328125, -185.59765625, -179.708984375, -173.8203125, -167.931640625, -162.04296875, -156.154296875, -150.265625, -144.376953125, -138.48828125, -132.599609375, -126.7109375, -120.822265625, -114.93359375, -109.044921875, -103.15625, -97.267578125, -91.37890625, -85.490234375, -79.6015625, -73.712890625, -67.82421875, -61.935546875, -56.046875, -50.158203125, -44.26953125, -38.380859375, -32.4921875, -26.603515625, -20.71484375, -14.826171875, -8.9375, -3.048828125, 2.83984375, 8.728515625, 14.6171875, 20.505859375, 26.39453125, 32.283203125, 38.171875, 44.060546875, 49.94921875, 55.837890625, 61.7265625, 67.615234375, 73.50390625, 79.392578125, 85.28125, 91.169921875, 97.05859375, 102.947265625, 108.8359375, 114.724609375, 120.61328125, 126.501953125, 132.390625, 138.279296875, 144.16796875, 150.056640625, 155.9453125, 161.833984375, 167.72265625, 173.611328125, 179.5]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 6.0, 7.0, 8.0, 2.0, 4.0, 14.0, 20.0, 28.0, 50.0, 65.0, 103.0, 137.0, 224.0, 299.0, 456.0, 662.0, 1068.0, 1499.0, 2421.0, 3575.0, 5848.0, 9072.0, 14449.0, 23755.0, 38481.0, 65017.0, 111636.0, 196973.0, 364471.0, 1335967.0, 3193523.0, 404565.0, 215218.0, 120961.0, 70607.0, 41811.0, 25411.0, 15579.0, 9803.0, 6163.0, 3931.0, 2568.0, 1663.0, 1079.0, 699.0, 540.0, 316.0, 222.0, 138.0, 106.0, 80.0, 57.0, 34.0, 18.0, 16.0, 6.0, 4.0, 8.0, 3.0, 1.0, 2.0], "bins": [-19.921875, -19.319580078125, -18.71728515625, -18.114990234375, -17.5126953125, -16.910400390625, -16.30810546875, -15.705810546875, -15.103515625, -14.501220703125, -13.89892578125, -13.296630859375, -12.6943359375, -12.092041015625, -11.48974609375, -10.887451171875, -10.28515625, -9.682861328125, -9.08056640625, -8.478271484375, -7.8759765625, -7.273681640625, -6.67138671875, -6.069091796875, -5.466796875, -4.864501953125, -4.26220703125, -3.659912109375, -3.0576171875, -2.455322265625, -1.85302734375, -1.250732421875, -0.6484375, -0.046142578125, 0.55615234375, 1.158447265625, 1.7607421875, 2.363037109375, 2.96533203125, 3.567626953125, 4.169921875, 4.772216796875, 5.37451171875, 5.976806640625, 6.5791015625, 7.181396484375, 7.78369140625, 8.385986328125, 8.98828125, 9.590576171875, 10.19287109375, 10.795166015625, 11.3974609375, 11.999755859375, 12.60205078125, 13.204345703125, 13.806640625, 14.408935546875, 15.01123046875, 15.613525390625, 16.2158203125, 16.818115234375, 17.42041015625, 18.022705078125, 18.625]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 3.0, 6.0, 6.0, 4.0, 12.0, 17.0, 10.0, 15.0, 15.0, 20.0, 27.0, 26.0, 31.0, 51.0, 36.0, 38.0, 39.0, 32.0, 48.0, 55.0, 932.0, 148.0, 35.0, 41.0, 28.0, 38.0, 36.0, 36.0, 26.0, 32.0, 21.0, 25.0, 24.0, 18.0, 20.0, 16.0, 8.0, 12.0, 6.0, 6.0, 5.0, 6.0, 4.0, 6.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-202.625, -196.630859375, -190.63671875, -184.642578125, -178.6484375, -172.654296875, -166.66015625, -160.666015625, -154.671875, -148.677734375, -142.68359375, -136.689453125, -130.6953125, -124.701171875, -118.70703125, -112.712890625, -106.71875, -100.724609375, -94.73046875, -88.736328125, -82.7421875, -76.748046875, -70.75390625, -64.759765625, -58.765625, -52.771484375, -46.77734375, -40.783203125, -34.7890625, -28.794921875, -22.80078125, -16.806640625, -10.8125, -4.818359375, 1.17578125, 7.169921875, 13.1640625, 19.158203125, 25.15234375, 31.146484375, 37.140625, 43.134765625, 49.12890625, 55.123046875, 61.1171875, 67.111328125, 73.10546875, 79.099609375, 85.09375, 91.087890625, 97.08203125, 103.076171875, 109.0703125, 115.064453125, 121.05859375, 127.052734375, 133.046875, 139.041015625, 145.03515625, 151.029296875, 157.0234375, 163.017578125, 169.01171875, 175.005859375, 181.0]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 8.0, 15.0, 7.0, 11.0, 15.0, 33.0, 36.0, 45.0, 71.0, 92.0, 149.0, 211.0, 306.0, 481.0, 697.0, 1043.0, 1591.0, 2437.0, 3927.0, 6513.0, 11403.0, 21355.0, 46481.0, 133703.0, 5779824.0, 170346.0, 54326.0, 24412.0, 12613.0, 7295.0, 4218.0, 2657.0, 1712.0, 1082.0, 733.0, 483.0, 304.0, 227.0, 189.0, 103.0, 84.0, 51.0, 46.0, 29.0, 16.0, 11.0, 11.0, 14.0, 6.0, 4.0, 5.0, 1.0, 0.0, 2.0], "bins": [-65.125, -63.2421875, -61.359375, -59.4765625, -57.59375, -55.7109375, -53.828125, -51.9453125, -50.0625, -48.1796875, -46.296875, -44.4140625, -42.53125, -40.6484375, -38.765625, -36.8828125, -35.0, -33.1171875, -31.234375, -29.3515625, -27.46875, -25.5859375, -23.703125, -21.8203125, -19.9375, -18.0546875, -16.171875, -14.2890625, -12.40625, -10.5234375, -8.640625, -6.7578125, -4.875, -2.9921875, -1.109375, 0.7734375, 2.65625, 4.5390625, 6.421875, 8.3046875, 10.1875, 12.0703125, 13.953125, 15.8359375, 17.71875, 19.6015625, 21.484375, 23.3671875, 25.25, 27.1328125, 29.015625, 30.8984375, 32.78125, 34.6640625, 36.546875, 38.4296875, 40.3125, 42.1953125, 44.078125, 45.9609375, 47.84375, 49.7265625, 51.609375, 53.4921875, 55.375]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 6.0, 1.0, 2.0, 2.0, 5.0, 6.0, 10.0, 14.0, 15.0, 11.0, 14.0, 24.0, 15.0, 20.0, 34.0, 31.0, 28.0, 34.0, 48.0, 53.0, 52.0, 46.0, 223.0, 883.0, 39.0, 52.0, 54.0, 31.0, 36.0, 32.0, 27.0, 24.0, 25.0, 23.0, 22.0, 18.0, 16.0, 10.0, 11.0, 5.0, 9.0, 4.0, 8.0, 5.0, 0.0, 2.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-190.25, -184.515625, -178.78125, -173.046875, -167.3125, -161.578125, -155.84375, -150.109375, -144.375, -138.640625, -132.90625, -127.171875, -121.4375, -115.703125, -109.96875, -104.234375, -98.5, -92.765625, -87.03125, -81.296875, -75.5625, -69.828125, -64.09375, -58.359375, -52.625, -46.890625, -41.15625, -35.421875, -29.6875, -23.953125, -18.21875, -12.484375, -6.75, -1.015625, 4.71875, 10.453125, 16.1875, 21.921875, 27.65625, 33.390625, 39.125, 44.859375, 50.59375, 56.328125, 62.0625, 67.796875, 73.53125, 79.265625, 85.0, 90.734375, 96.46875, 102.203125, 107.9375, 113.671875, 119.40625, 125.140625, 130.875, 136.609375, 142.34375, 148.078125, 153.8125, 159.546875, 165.28125, 171.015625, 176.75]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 5.0, 13.0, 22.0, 32.0, 41.0, 91.0, 228.0, 321.0, 115.0, 67.0, 25.0, 14.0, 14.0, 8.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-1283.35791015625, -1257.22802734375, -1231.0980224609375, -1204.9681396484375, -1178.8382568359375, -1152.708251953125, -1126.578369140625, -1100.448486328125, -1074.318603515625, -1048.188720703125, -1022.0587768554688, -995.9288330078125, -969.7989501953125, -943.6690063476562, -917.5390625, -891.4091796875, -865.2791748046875, -839.1492309570312, -813.0193481445312, -786.889404296875, -760.759521484375, -734.6295776367188, -708.4996337890625, -682.3697509765625, -656.2398071289062, -630.10986328125, -603.97998046875, -577.8500366210938, -551.7200927734375, -525.5902099609375, -499.46026611328125, -473.3303527832031, -447.2004699707031, -421.070556640625, -394.94061279296875, -368.8106994628906, -342.6807861328125, -316.5508728027344, -290.42095947265625, -264.291015625, -238.16110229492188, -212.03118896484375, -185.90126037597656, -159.77133178710938, -133.64141845703125, -107.51150512695312, -81.38157653808594, -55.25164794921875, -29.121734619140625, -2.9918136596679688, 23.138107299804688, 49.268028259277344, 75.39794921875, 101.52786254882812, 127.65779113769531, 153.7877197265625, 179.91763305664062, 206.04754638671875, 232.17747497558594, 258.3074035644531, 284.43731689453125, 310.5672302246094, 336.6971435546875, 362.82708740234375, 388.9570007324219]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 1.0, 3.0, 5.0, 7.0, 10.0, 11.0, 14.0, 7.0, 10.0, 15.0, 15.0, 19.0, 24.0, 19.0, 29.0, 22.0, 25.0, 21.0, 35.0, 39.0, 29.0, 37.0, 43.0, 29.0, 36.0, 46.0, 43.0, 30.0, 39.0, 28.0, 33.0, 35.0, 26.0, 35.0, 27.0, 19.0, 16.0, 27.0, 17.0, 12.0, 10.0, 19.0, 5.0, 9.0, 10.0, 6.0, 4.0, 4.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0], "bins": [-618.5052490234375, -600.7103271484375, -582.9153442382812, -565.1204223632812, -547.325439453125, -529.530517578125, -511.7355651855469, -493.94061279296875, -476.1456604003906, -458.3507080078125, -440.5557556152344, -422.76080322265625, -404.96588134765625, -387.1708984375, -369.3759765625, -351.5810241699219, -333.78607177734375, -315.9911193847656, -298.1961669921875, -280.4012145996094, -262.60626220703125, -244.8113250732422, -227.01638793945312, -209.221435546875, -191.42648315429688, -173.63153076171875, -155.83657836914062, -138.04164123535156, -120.24668884277344, -102.45173645019531, -84.65679168701172, -66.86184692382812, -49.06689453125, -31.27194595336914, -13.476997375488281, 4.317951202392578, 22.112899780273438, 39.90785217285156, 57.702796936035156, 75.49774169921875, 93.29269409179688, 111.087646484375, 128.88259887695312, 146.6775360107422, 164.4724884033203, 182.26744079589844, 200.0623779296875, 217.85733032226562, 235.65228271484375, 253.44723510742188, 271.2421875, 289.0371398925781, 306.83209228515625, 324.62701416015625, 342.4219665527344, 360.2169189453125, 378.0118713378906, 395.80682373046875, 413.6017761230469, 431.396728515625, 449.191650390625, 466.98663330078125, 484.78155517578125, 502.5765075683594, 520.3714599609375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 4.0, 3.0, 4.0, 7.0, 13.0, 23.0, 18.0, 38.0, 49.0, 86.0, 112.0, 173.0, 259.0, 447.0, 747.0, 1313.0, 2447.0, 5351.0, 15053.0, 90864.0, 4022037.0, 35354.0, 10331.0, 4351.0, 2215.0, 1200.0, 641.0, 406.0, 267.0, 168.0, 92.0, 68.0, 36.0, 44.0, 21.0, 12.0, 7.0, 9.0, 8.0, 5.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.20703125, -5.05059814453125, -4.8941650390625, -4.73773193359375, -4.581298828125, -4.42486572265625, -4.2684326171875, -4.11199951171875, -3.95556640625, -3.79913330078125, -3.6427001953125, -3.48626708984375, -3.329833984375, -3.17340087890625, -3.0169677734375, -2.86053466796875, -2.7041015625, -2.54766845703125, -2.3912353515625, -2.23480224609375, -2.078369140625, -1.92193603515625, -1.7655029296875, -1.60906982421875, -1.45263671875, -1.29620361328125, -1.1397705078125, -0.98333740234375, -0.826904296875, -0.67047119140625, -0.5140380859375, -0.35760498046875, -0.201171875, -0.04473876953125, 0.1116943359375, 0.26812744140625, 0.424560546875, 0.58099365234375, 0.7374267578125, 0.89385986328125, 1.05029296875, 1.20672607421875, 1.3631591796875, 1.51959228515625, 1.676025390625, 1.83245849609375, 1.9888916015625, 2.14532470703125, 2.3017578125, 2.45819091796875, 2.6146240234375, 2.77105712890625, 2.927490234375, 3.08392333984375, 3.2403564453125, 3.39678955078125, 3.55322265625, 3.70965576171875, 3.8660888671875, 4.02252197265625, 4.178955078125, 4.33538818359375, 4.4918212890625, 4.64825439453125, 4.8046875]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 4.0, 3.0, 7.0, 8.0, 10.0, 14.0, 16.0, 11.0, 16.0, 737.0, 64.0, 20.0, 12.0, 13.0, 8.0, 7.0, 9.0, 5.0, 4.0, 4.0, 2.0, 2.0, 6.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.9755859375, -1.91558837890625, -1.8555908203125, -1.79559326171875, -1.735595703125, -1.67559814453125, -1.6156005859375, -1.55560302734375, -1.49560546875, -1.43560791015625, -1.3756103515625, -1.31561279296875, -1.255615234375, -1.19561767578125, -1.1356201171875, -1.07562255859375, -1.015625, -0.95562744140625, -0.8956298828125, -0.83563232421875, -0.775634765625, -0.71563720703125, -0.6556396484375, -0.59564208984375, -0.53564453125, -0.47564697265625, -0.4156494140625, -0.35565185546875, -0.295654296875, -0.23565673828125, -0.1756591796875, -0.11566162109375, -0.0556640625, 0.00433349609375, 0.0643310546875, 0.12432861328125, 0.184326171875, 0.24432373046875, 0.3043212890625, 0.36431884765625, 0.42431640625, 0.48431396484375, 0.5443115234375, 0.60430908203125, 0.664306640625, 0.72430419921875, 0.7843017578125, 0.84429931640625, 0.904296875, 0.96429443359375, 1.0242919921875, 1.08428955078125, 1.144287109375, 1.20428466796875, 1.2642822265625, 1.32427978515625, 1.38427734375, 1.44427490234375, 1.5042724609375, 1.56427001953125, 1.624267578125, 1.68426513671875, 1.7442626953125, 1.80426025390625, 1.8642578125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 3.0, 5.0, 6.0, 8.0, 24.0, 32.0, 40.0, 55.0, 83.0, 146.0, 270.0, 482.0, 919.0, 1944.0, 4633.0, 14370.0, 72121.0, 3445410.0, 592524.0, 44071.0, 10435.0, 3589.0, 1471.0, 700.0, 354.0, 211.0, 118.0, 89.0, 70.0, 29.0, 24.0, 13.0, 16.0, 5.0, 4.0, 5.0, 3.0, 0.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.0859375, -4.93115234375, -4.7763671875, -4.62158203125, -4.466796875, -4.31201171875, -4.1572265625, -4.00244140625, -3.84765625, -3.69287109375, -3.5380859375, -3.38330078125, -3.228515625, -3.07373046875, -2.9189453125, -2.76416015625, -2.609375, -2.45458984375, -2.2998046875, -2.14501953125, -1.990234375, -1.83544921875, -1.6806640625, -1.52587890625, -1.37109375, -1.21630859375, -1.0615234375, -0.90673828125, -0.751953125, -0.59716796875, -0.4423828125, -0.28759765625, -0.1328125, 0.02197265625, 0.1767578125, 0.33154296875, 0.486328125, 0.64111328125, 0.7958984375, 0.95068359375, 1.10546875, 1.26025390625, 1.4150390625, 1.56982421875, 1.724609375, 1.87939453125, 2.0341796875, 2.18896484375, 2.34375, 2.49853515625, 2.6533203125, 2.80810546875, 2.962890625, 3.11767578125, 3.2724609375, 3.42724609375, 3.58203125, 3.73681640625, 3.8916015625, 4.04638671875, 4.201171875, 4.35595703125, 4.5107421875, 4.66552734375, 4.8203125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0, 6.0, 3.0, 9.0, 7.0, 17.0, 19.0, 38.0, 68.0, 91.0, 155.0, 524.0, 2295.0, 425.0, 138.0, 99.0, 52.0, 43.0, 33.0, 16.0, 11.0, 6.0, 7.0, 6.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.564453125, -2.474273681640625, -2.38409423828125, -2.293914794921875, -2.2037353515625, -2.113555908203125, -2.02337646484375, -1.933197021484375, -1.843017578125, -1.752838134765625, -1.66265869140625, -1.572479248046875, -1.4822998046875, -1.392120361328125, -1.30194091796875, -1.211761474609375, -1.12158203125, -1.031402587890625, -0.94122314453125, -0.851043701171875, -0.7608642578125, -0.670684814453125, -0.58050537109375, -0.490325927734375, -0.400146484375, -0.309967041015625, -0.21978759765625, -0.129608154296875, -0.0394287109375, 0.050750732421875, 0.14093017578125, 0.231109619140625, 0.3212890625, 0.411468505859375, 0.50164794921875, 0.591827392578125, 0.6820068359375, 0.772186279296875, 0.86236572265625, 0.952545166015625, 1.042724609375, 1.132904052734375, 1.22308349609375, 1.313262939453125, 1.4034423828125, 1.493621826171875, 1.58380126953125, 1.673980712890625, 1.76416015625, 1.854339599609375, 1.94451904296875, 2.034698486328125, 2.1248779296875, 2.215057373046875, 2.30523681640625, 2.395416259765625, 2.485595703125, 2.575775146484375, 2.66595458984375, 2.756134033203125, 2.8463134765625, 2.936492919921875, 3.02667236328125, 3.116851806640625, 3.20703125]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 5.0, 19.0, 86.0, 509.0, 324.0, 40.0, 20.0, 4.0, 0.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.623452186584473, -4.802863597869873, -3.9822750091552734, -3.161686420440674, -2.341097831726074, -1.5205092430114746, -0.699920654296875, 0.12066793441772461, 0.9412565231323242, 1.7618451118469238, 2.5824337005615234, 3.403022289276123, 4.223610877990723, 5.044199466705322, 5.864788055419922, 6.6853766441345215, 7.505965232849121, 8.326553344726562, 9.14714241027832, 9.967731475830078, 10.78831958770752, 11.608907699584961, 12.429496765136719, 13.250085830688477, 14.070673942565918, 14.89126205444336, 15.711851119995117, 16.532440185546875, 17.35302734375, 18.173616409301758, 18.994205474853516, 19.814794540405273, 20.63538360595703, 21.45597267150879, 22.276561737060547, 23.097148895263672, 23.91773796081543, 24.738327026367188, 25.558914184570312, 26.37950325012207, 27.200092315673828, 28.020681381225586, 28.841270446777344, 29.66185760498047, 30.482446670532227, 31.303035736083984, 32.12362289428711, 32.9442138671875, 33.764801025390625, 34.58538818359375, 35.40597915649414, 36.226566314697266, 37.047157287597656, 37.86774444580078, 38.688331604003906, 39.5089225769043, 40.32950973510742, 41.15009689331055, 41.97068786621094, 42.79127502441406, 43.61186218261719, 44.43245315551758, 45.2530403137207, 46.073631286621094, 46.89421844482422]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 1.0, 2.0, 6.0, 6.0, 13.0, 15.0, 18.0, 19.0, 20.0, 17.0, 44.0, 33.0, 47.0, 50.0, 61.0, 48.0, 50.0, 57.0, 54.0, 67.0, 60.0, 38.0, 41.0, 42.0, 35.0, 32.0, 23.0, 22.0, 16.0, 19.0, 8.0, 10.0, 7.0, 6.0, 6.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0], "bins": [-8.067513465881348, -7.841927528381348, -7.616341590881348, -7.390755653381348, -7.165169715881348, -6.939583778381348, -6.713997840881348, -6.4884114265441895, -6.2628254890441895, -6.0372395515441895, -5.8116536140441895, -5.5860676765441895, -5.3604817390441895, -5.134895324707031, -4.909309387207031, -4.683723449707031, -4.458137512207031, -4.232551574707031, -4.006965637207031, -3.7813796997070312, -3.555793523788452, -3.330207586288452, -3.104621648788452, -2.879035472869873, -2.6534500122070312, -2.4278640747070312, -2.2022781372070312, -1.9766920804977417, -1.7511060237884521, -1.5255200862884521, -1.2999341487884521, -1.0743480920791626, -0.848762035369873, -0.6231760382652283, -0.3975900709629059, -0.1720041036605835, 0.05358189344406128, 0.27916789054870605, 0.504753828048706, 0.7303398847579956, 0.9559258222579956, 1.1815117597579956, 1.4070978164672852, 1.6326837539672852, 1.8582696914672852, 2.083855628967285, 2.309441566467285, 2.5350277423858643, 2.7606136798858643, 2.9861996173858643, 3.2117855548858643, 3.4373717308044434, 3.6629576683044434, 3.8885436058044434, 4.114129543304443, 4.339715480804443, 4.565301418304443, 4.790887355804443, 5.016473293304443, 5.242059230804443, 5.467645168304443, 5.693231582641602, 5.918817520141602, 6.144403457641602, 6.369989395141602]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 2.0, 6.0, 5.0, 9.0, 16.0, 23.0, 20.0, 24.0, 49.0, 63.0, 97.0, 116.0, 161.0, 216.0, 295.0, 437.0, 578.0, 874.0, 1166.0, 1713.0, 2659.0, 4148.0, 7248.0, 13939.0, 33604.0, 898371.0, 44303.0, 16228.0, 8269.0, 4658.0, 2892.0, 1901.0, 1298.0, 914.0, 603.0, 481.0, 339.0, 214.0, 172.0, 125.0, 74.0, 67.0, 54.0, 31.0, 30.0, 15.0, 17.0, 13.0, 7.0, 6.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-9.3125, -9.017822265625, -8.72314453125, -8.428466796875, -8.1337890625, -7.839111328125, -7.54443359375, -7.249755859375, -6.955078125, -6.660400390625, -6.36572265625, -6.071044921875, -5.7763671875, -5.481689453125, -5.18701171875, -4.892333984375, -4.59765625, -4.302978515625, -4.00830078125, -3.713623046875, -3.4189453125, -3.124267578125, -2.82958984375, -2.534912109375, -2.240234375, -1.945556640625, -1.65087890625, -1.356201171875, -1.0615234375, -0.766845703125, -0.47216796875, -0.177490234375, 0.1171875, 0.411865234375, 0.70654296875, 1.001220703125, 1.2958984375, 1.590576171875, 1.88525390625, 2.179931640625, 2.474609375, 2.769287109375, 3.06396484375, 3.358642578125, 3.6533203125, 3.947998046875, 4.24267578125, 4.537353515625, 4.83203125, 5.126708984375, 5.42138671875, 5.716064453125, 6.0107421875, 6.305419921875, 6.60009765625, 6.894775390625, 7.189453125, 7.484130859375, 7.77880859375, 8.073486328125, 8.3681640625, 8.662841796875, 8.95751953125, 9.252197265625, 9.546875]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 7.0, 5.0, 11.0, 12.0, 15.0, 13.0, 18.0, 203.0, 586.0, 31.0, 12.0, 11.0, 12.0, 11.0, 6.0, 7.0, 2.0, 4.0, 6.0, 1.0, 4.0, 2.0, 4.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.958984375, -1.8983001708984375, -1.837615966796875, -1.7769317626953125, -1.71624755859375, -1.6555633544921875, -1.594879150390625, -1.5341949462890625, -1.4735107421875, -1.4128265380859375, -1.352142333984375, -1.2914581298828125, -1.23077392578125, -1.1700897216796875, -1.109405517578125, -1.0487213134765625, -0.988037109375, -0.9273529052734375, -0.866668701171875, -0.8059844970703125, -0.74530029296875, -0.6846160888671875, -0.623931884765625, -0.5632476806640625, -0.5025634765625, -0.4418792724609375, -0.381195068359375, -0.3205108642578125, -0.25982666015625, -0.1991424560546875, -0.138458251953125, -0.0777740478515625, -0.01708984375, 0.0435943603515625, 0.104278564453125, 0.1649627685546875, 0.22564697265625, 0.2863311767578125, 0.347015380859375, 0.4076995849609375, 0.4683837890625, 0.5290679931640625, 0.589752197265625, 0.6504364013671875, 0.71112060546875, 0.7718048095703125, 0.832489013671875, 0.8931732177734375, 0.953857421875, 1.0145416259765625, 1.075225830078125, 1.1359100341796875, 1.19659423828125, 1.2572784423828125, 1.317962646484375, 1.3786468505859375, 1.4393310546875, 1.5000152587890625, 1.560699462890625, 1.6213836669921875, 1.68206787109375, 1.7427520751953125, 1.803436279296875, 1.8641204833984375, 1.9248046875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 1.0, 8.0, 1.0, 7.0, 12.0, 14.0, 14.0, 17.0, 26.0, 30.0, 36.0, 48.0, 69.0, 117.0, 130.0, 191.0, 276.0, 391.0, 679.0, 1058.0, 1911.0, 3649.0, 8167.0, 19293.0, 52577.0, 182946.0, 576778.0, 129541.0, 40815.0, 15401.0, 6666.0, 3143.0, 1759.0, 902.0, 555.0, 387.0, 242.0, 181.0, 121.0, 101.0, 72.0, 56.0, 45.0, 39.0, 21.0, 20.0, 10.0, 7.0, 14.0, 5.0, 9.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-5.1484375, -4.98944091796875, -4.8304443359375, -4.67144775390625, -4.512451171875, -4.35345458984375, -4.1944580078125, -4.03546142578125, -3.87646484375, -3.71746826171875, -3.5584716796875, -3.39947509765625, -3.240478515625, -3.08148193359375, -2.9224853515625, -2.76348876953125, -2.6044921875, -2.44549560546875, -2.2864990234375, -2.12750244140625, -1.968505859375, -1.80950927734375, -1.6505126953125, -1.49151611328125, -1.33251953125, -1.17352294921875, -1.0145263671875, -0.85552978515625, -0.696533203125, -0.53753662109375, -0.3785400390625, -0.21954345703125, -0.060546875, 0.09844970703125, 0.2574462890625, 0.41644287109375, 0.575439453125, 0.73443603515625, 0.8934326171875, 1.05242919921875, 1.21142578125, 1.37042236328125, 1.5294189453125, 1.68841552734375, 1.847412109375, 2.00640869140625, 2.1654052734375, 2.32440185546875, 2.4833984375, 2.64239501953125, 2.8013916015625, 2.96038818359375, 3.119384765625, 3.27838134765625, 3.4373779296875, 3.59637451171875, 3.75537109375, 3.91436767578125, 4.0733642578125, 4.23236083984375, 4.391357421875, 4.55035400390625, 4.7093505859375, 4.86834716796875, 5.02734375]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 5.0, 2.0, 3.0, 1.0, 1.0, 4.0, 4.0, 7.0, 6.0, 10.0, 21.0, 18.0, 17.0, 13.0, 29.0, 24.0, 26.0, 25.0, 33.0, 36.0, 44.0, 48.0, 45.0, 35.0, 52.0, 38.0, 43.0, 37.0, 42.0, 34.0, 17.0, 34.0, 36.0, 27.0, 32.0, 18.0, 24.0, 11.0, 24.0, 15.0, 16.0, 4.0, 12.0, 6.0, 7.0, 8.0, 6.0, 2.0, 4.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.0625, -5.86993408203125, -5.6773681640625, -5.48480224609375, -5.292236328125, -5.09967041015625, -4.9071044921875, -4.71453857421875, -4.52197265625, -4.32940673828125, -4.1368408203125, -3.94427490234375, -3.751708984375, -3.55914306640625, -3.3665771484375, -3.17401123046875, -2.9814453125, -2.78887939453125, -2.5963134765625, -2.40374755859375, -2.211181640625, -2.01861572265625, -1.8260498046875, -1.63348388671875, -1.44091796875, -1.24835205078125, -1.0557861328125, -0.86322021484375, -0.670654296875, -0.47808837890625, -0.2855224609375, -0.09295654296875, 0.099609375, 0.29217529296875, 0.4847412109375, 0.67730712890625, 0.869873046875, 1.06243896484375, 1.2550048828125, 1.44757080078125, 1.64013671875, 1.83270263671875, 2.0252685546875, 2.21783447265625, 2.410400390625, 2.60296630859375, 2.7955322265625, 2.98809814453125, 3.1806640625, 3.37322998046875, 3.5657958984375, 3.75836181640625, 3.950927734375, 4.14349365234375, 4.3360595703125, 4.52862548828125, 4.72119140625, 4.91375732421875, 5.1063232421875, 5.29888916015625, 5.491455078125, 5.68402099609375, 5.8765869140625, 6.06915283203125, 6.26171875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 6.0, 1.0, 3.0, 6.0, 9.0, 11.0, 10.0, 14.0, 19.0, 28.0, 34.0, 54.0, 93.0, 125.0, 212.0, 382.0, 716.0, 1774.0, 5618.0, 35293.0, 947843.0, 46226.0, 6311.0, 1919.0, 797.0, 421.0, 217.0, 130.0, 70.0, 63.0, 32.0, 33.0, 24.0, 18.0, 11.0, 4.0, 4.0, 7.0, 4.0, 4.0, 4.0, 4.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-7.65625, -7.3970947265625, -7.137939453125, -6.8787841796875, -6.61962890625, -6.3604736328125, -6.101318359375, -5.8421630859375, -5.5830078125, -5.3238525390625, -5.064697265625, -4.8055419921875, -4.54638671875, -4.2872314453125, -4.028076171875, -3.7689208984375, -3.509765625, -3.2506103515625, -2.991455078125, -2.7322998046875, -2.47314453125, -2.2139892578125, -1.954833984375, -1.6956787109375, -1.4365234375, -1.1773681640625, -0.918212890625, -0.6590576171875, -0.39990234375, -0.1407470703125, 0.118408203125, 0.3775634765625, 0.63671875, 0.8958740234375, 1.155029296875, 1.4141845703125, 1.67333984375, 1.9324951171875, 2.191650390625, 2.4508056640625, 2.7099609375, 2.9691162109375, 3.228271484375, 3.4874267578125, 3.74658203125, 4.0057373046875, 4.264892578125, 4.5240478515625, 4.783203125, 5.0423583984375, 5.301513671875, 5.5606689453125, 5.81982421875, 6.0789794921875, 6.338134765625, 6.5972900390625, 6.8564453125, 7.1156005859375, 7.374755859375, 7.6339111328125, 7.89306640625, 8.1522216796875, 8.411376953125, 8.6705322265625, 8.9296875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 6.0, 4.0, 9.0, 9.0, 15.0, 27.0, 43.0, 81.0, 130.0, 195.0, 169.0, 109.0, 76.0, 46.0, 25.0, 18.0, 9.0, 11.0, 7.0, 2.0, 2.0, 1.0, 4.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004100799560546875, -0.00039636343717575073, -0.00038264691829681396, -0.0003689303994178772, -0.00035521388053894043, -0.00034149736166000366, -0.0003277808427810669, -0.0003140643239021301, -0.00030034780502319336, -0.0002866312861442566, -0.0002729147672653198, -0.00025919824838638306, -0.0002454817295074463, -0.00023176521062850952, -0.00021804869174957275, -0.00020433217287063599, -0.00019061565399169922, -0.00017689913511276245, -0.00016318261623382568, -0.00014946609735488892, -0.00013574957847595215, -0.00012203305959701538, -0.00010831654071807861, -9.460002183914185e-05, -8.088350296020508e-05, -6.716698408126831e-05, -5.345046520233154e-05, -3.9733946323394775e-05, -2.6017427444458008e-05, -1.230090856552124e-05, 1.4156103134155273e-06, 1.5132129192352295e-05, 2.8848648071289062e-05, 4.256516695022583e-05, 5.62816858291626e-05, 6.999820470809937e-05, 8.371472358703613e-05, 9.74312424659729e-05, 0.00011114776134490967, 0.00012486428022384644, 0.0001385807991027832, 0.00015229731798171997, 0.00016601383686065674, 0.0001797303557395935, 0.00019344687461853027, 0.00020716339349746704, 0.0002208799123764038, 0.00023459643125534058, 0.00024831295013427734, 0.0002620294690132141, 0.0002757459878921509, 0.00028946250677108765, 0.0003031790256500244, 0.0003168955445289612, 0.00033061206340789795, 0.0003443285822868347, 0.0003580451011657715, 0.00037176162004470825, 0.000385478138923645, 0.0003991946578025818, 0.00041291117668151855, 0.0004266276955604553, 0.0004403442144393921, 0.00045406073331832886, 0.0004677772521972656]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 6.0, 5.0, 5.0, 10.0, 12.0, 20.0, 30.0, 39.0, 70.0, 97.0, 178.0, 331.0, 640.0, 1388.0, 3716.0, 14002.0, 101135.0, 824901.0, 83465.0, 12291.0, 3478.0, 1344.0, 615.0, 319.0, 176.0, 95.0, 46.0, 57.0, 29.0, 22.0, 8.0, 8.0, 5.0, 7.0, 0.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.40625, -5.24468994140625, -5.0831298828125, -4.92156982421875, -4.760009765625, -4.59844970703125, -4.4368896484375, -4.27532958984375, -4.11376953125, -3.95220947265625, -3.7906494140625, -3.62908935546875, -3.467529296875, -3.30596923828125, -3.1444091796875, -2.98284912109375, -2.8212890625, -2.65972900390625, -2.4981689453125, -2.33660888671875, -2.175048828125, -2.01348876953125, -1.8519287109375, -1.69036865234375, -1.52880859375, -1.36724853515625, -1.2056884765625, -1.04412841796875, -0.882568359375, -0.72100830078125, -0.5594482421875, -0.39788818359375, -0.236328125, -0.07476806640625, 0.0867919921875, 0.24835205078125, 0.409912109375, 0.57147216796875, 0.7330322265625, 0.89459228515625, 1.05615234375, 1.21771240234375, 1.3792724609375, 1.54083251953125, 1.702392578125, 1.86395263671875, 2.0255126953125, 2.18707275390625, 2.3486328125, 2.51019287109375, 2.6717529296875, 2.83331298828125, 2.994873046875, 3.15643310546875, 3.3179931640625, 3.47955322265625, 3.64111328125, 3.80267333984375, 3.9642333984375, 4.12579345703125, 4.287353515625, 4.44891357421875, 4.6104736328125, 4.77203369140625, 4.93359375]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 3.0, 6.0, 7.0, 11.0, 15.0, 19.0, 26.0, 38.0, 60.0, 80.0, 131.0, 125.0, 112.0, 95.0, 86.0, 50.0, 39.0, 28.0, 28.0, 16.0, 8.0, 4.0, 4.0, 3.0, 6.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.18359375, -4.9989013671875, -4.814208984375, -4.6295166015625, -4.44482421875, -4.2601318359375, -4.075439453125, -3.8907470703125, -3.7060546875, -3.5213623046875, -3.336669921875, -3.1519775390625, -2.96728515625, -2.7825927734375, -2.597900390625, -2.4132080078125, -2.228515625, -2.0438232421875, -1.859130859375, -1.6744384765625, -1.48974609375, -1.3050537109375, -1.120361328125, -0.9356689453125, -0.7509765625, -0.5662841796875, -0.381591796875, -0.1968994140625, -0.01220703125, 0.1724853515625, 0.357177734375, 0.5418701171875, 0.7265625, 0.9112548828125, 1.095947265625, 1.2806396484375, 1.46533203125, 1.6500244140625, 1.834716796875, 2.0194091796875, 2.2041015625, 2.3887939453125, 2.573486328125, 2.7581787109375, 2.94287109375, 3.1275634765625, 3.312255859375, 3.4969482421875, 3.681640625, 3.8663330078125, 4.051025390625, 4.2357177734375, 4.42041015625, 4.6051025390625, 4.789794921875, 4.9744873046875, 5.1591796875, 5.3438720703125, 5.528564453125, 5.7132568359375, 5.89794921875, 6.0826416015625, 6.267333984375, 6.4520263671875, 6.63671875]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 10.0, 6.0, 12.0, 34.0, 71.0, 204.0, 436.0, 134.0, 41.0, 24.0, 13.0, 4.0, 5.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.56300735473633, -57.570335388183594, -54.577659606933594, -51.58498764038086, -48.59231185913086, -45.599639892578125, -42.606964111328125, -39.61429214477539, -36.621620178222656, -33.62894821166992, -30.636272430419922, -27.643600463867188, -24.650924682617188, -21.658252716064453, -18.665578842163086, -15.672904968261719, -12.680229187011719, -9.687555313110352, -6.694881916046143, -3.7022085189819336, -0.7095346450805664, 2.283139228820801, 5.275812149047852, 8.268486022949219, 11.261159896850586, 14.253833770751953, 17.24650764465332, 20.239181518554688, 23.231853485107422, 26.224529266357422, 29.217201232910156, 32.209877014160156, 35.202552795410156, 38.19522476196289, 41.18790054321289, 44.180572509765625, 47.173248291015625, 50.16592025756836, 53.158592224121094, 56.151268005371094, 59.143943786621094, 62.13661575317383, 65.12928771972656, 68.12196350097656, 71.11463928222656, 74.10731506347656, 77.09998321533203, 80.09265899658203, 83.0853271484375, 86.0780029296875, 89.07067108154297, 92.06334686279297, 95.05602264404297, 98.04869842529297, 101.04136657714844, 104.03404235839844, 107.02671813964844, 110.01939392089844, 113.0120620727539, 116.0047378540039, 118.9974136352539, 121.9900894165039, 124.98275756835938, 127.97543334960938, 130.96810913085938]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 4.0, 4.0, 3.0, 5.0, 10.0, 4.0, 19.0, 26.0, 32.0, 54.0, 88.0, 92.0, 142.0, 154.0, 105.0, 68.0, 40.0, 45.0, 22.0, 17.0, 11.0, 12.0, 8.0, 11.0, 4.0, 6.0, 3.0, 2.0, 4.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0], "bins": [-151.12779235839844, -147.177734375, -143.2276611328125, -139.27760314941406, -135.32754516601562, -131.3774871826172, -127.42742156982422, -123.47735595703125, -119.52729797363281, -115.57723999023438, -111.6271743774414, -107.67710876464844, -103.72705078125, -99.77699279785156, -95.8269271850586, -91.87686157226562, -87.92680358886719, -83.97674560546875, -80.02667999267578, -76.07661437988281, -72.12655639648438, -68.17649841308594, -64.22643280029297, -60.276371002197266, -56.32630920410156, -52.37624740600586, -48.426185607910156, -44.47612380981445, -40.52606201171875, -36.57600021362305, -32.625938415527344, -28.67587661743164, -24.725807189941406, -20.775745391845703, -16.82568359375, -12.875621795654297, -8.925559997558594, -4.975498199462891, -1.0254364013671875, 2.9246253967285156, 6.874687194824219, 10.824748992919922, 14.774810791015625, 18.724872589111328, 22.67493438720703, 26.624996185302734, 30.575057983398438, 34.52511978149414, 38.475181579589844, 42.42524337768555, 46.37530517578125, 50.32536697387695, 54.275428771972656, 58.22549057006836, 62.17555236816406, 66.1256103515625, 70.07567596435547, 74.02574157714844, 77.97579956054688, 81.92585754394531, 85.87592315673828, 89.82598876953125, 93.77604675292969, 97.72610473632812, 101.6761703491211]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 8.0, 12.0, 17.0, 20.0, 46.0, 101.0, 176.0, 401.0, 893.0, 2181.0, 5922.0, 21134.0, 226562.0, 3877422.0, 43006.0, 10256.0, 3572.0, 1411.0, 580.0, 265.0, 127.0, 69.0, 33.0, 21.0, 15.0, 9.0, 5.0, 3.0, 5.0, 1.0, 1.0, 6.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.890625, -20.220947265625, -19.55126953125, -18.881591796875, -18.2119140625, -17.542236328125, -16.87255859375, -16.202880859375, -15.533203125, -14.863525390625, -14.19384765625, -13.524169921875, -12.8544921875, -12.184814453125, -11.51513671875, -10.845458984375, -10.17578125, -9.506103515625, -8.83642578125, -8.166748046875, -7.4970703125, -6.827392578125, -6.15771484375, -5.488037109375, -4.818359375, -4.148681640625, -3.47900390625, -2.809326171875, -2.1396484375, -1.469970703125, -0.80029296875, -0.130615234375, 0.5390625, 1.208740234375, 1.87841796875, 2.548095703125, 3.2177734375, 3.887451171875, 4.55712890625, 5.226806640625, 5.896484375, 6.566162109375, 7.23583984375, 7.905517578125, 8.5751953125, 9.244873046875, 9.91455078125, 10.584228515625, 11.25390625, 11.923583984375, 12.59326171875, 13.262939453125, 13.9326171875, 14.602294921875, 15.27197265625, 15.941650390625, 16.611328125, 17.281005859375, 17.95068359375, 18.620361328125, 19.2900390625, 19.959716796875, 20.62939453125, 21.299072265625, 21.96875]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 4.0, 1.0, 3.0, 3.0, 4.0, 3.0, 3.0, 3.0, 6.0, 9.0, 9.0, 14.0, 10.0, 16.0, 32.0, 102.0, 235.0, 273.0, 123.0, 45.0, 25.0, 14.0, 12.0, 10.0, 4.0, 7.0, 7.0, 4.0, 2.0, 2.0, 7.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.2978515625, -1.257598876953125, -1.21734619140625, -1.177093505859375, -1.1368408203125, -1.096588134765625, -1.05633544921875, -1.016082763671875, -0.975830078125, -0.935577392578125, -0.89532470703125, -0.855072021484375, -0.8148193359375, -0.774566650390625, -0.73431396484375, -0.694061279296875, -0.65380859375, -0.613555908203125, -0.57330322265625, -0.533050537109375, -0.4927978515625, -0.452545166015625, -0.41229248046875, -0.372039794921875, -0.331787109375, -0.291534423828125, -0.25128173828125, -0.211029052734375, -0.1707763671875, -0.130523681640625, -0.09027099609375, -0.050018310546875, -0.009765625, 0.030487060546875, 0.07073974609375, 0.110992431640625, 0.1512451171875, 0.191497802734375, 0.23175048828125, 0.272003173828125, 0.312255859375, 0.352508544921875, 0.39276123046875, 0.433013916015625, 0.4732666015625, 0.513519287109375, 0.55377197265625, 0.594024658203125, 0.63427734375, 0.674530029296875, 0.71478271484375, 0.755035400390625, 0.7952880859375, 0.835540771484375, 0.87579345703125, 0.916046142578125, 0.956298828125, 0.996551513671875, 1.03680419921875, 1.077056884765625, 1.1173095703125, 1.157562255859375, 1.19781494140625, 1.238067626953125, 1.2783203125]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 6.0, 3.0, 8.0, 12.0, 16.0, 8.0, 23.0, 28.0, 25.0, 41.0, 53.0, 65.0, 120.0, 152.0, 276.0, 332.0, 680.0, 1299.0, 2747.0, 6543.0, 18569.0, 80022.0, 2763557.0, 1224908.0, 67828.0, 16186.0, 5770.0, 2391.0, 1084.0, 573.0, 342.0, 198.0, 121.0, 81.0, 54.0, 44.0, 30.0, 32.0, 19.0, 10.0, 6.0, 7.0, 6.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0], "bins": [-8.921875, -8.67376708984375, -8.4256591796875, -8.17755126953125, -7.929443359375, -7.68133544921875, -7.4332275390625, -7.18511962890625, -6.93701171875, -6.68890380859375, -6.4407958984375, -6.19268798828125, -5.944580078125, -5.69647216796875, -5.4483642578125, -5.20025634765625, -4.9521484375, -4.70404052734375, -4.4559326171875, -4.20782470703125, -3.959716796875, -3.71160888671875, -3.4635009765625, -3.21539306640625, -2.96728515625, -2.71917724609375, -2.4710693359375, -2.22296142578125, -1.974853515625, -1.72674560546875, -1.4786376953125, -1.23052978515625, -0.982421875, -0.73431396484375, -0.4862060546875, -0.23809814453125, 0.010009765625, 0.25811767578125, 0.5062255859375, 0.75433349609375, 1.00244140625, 1.25054931640625, 1.4986572265625, 1.74676513671875, 1.994873046875, 2.24298095703125, 2.4910888671875, 2.73919677734375, 2.9873046875, 3.23541259765625, 3.4835205078125, 3.73162841796875, 3.979736328125, 4.22784423828125, 4.4759521484375, 4.72406005859375, 4.97216796875, 5.22027587890625, 5.4683837890625, 5.71649169921875, 5.964599609375, 6.21270751953125, 6.4608154296875, 6.70892333984375, 6.95703125]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 6.0, 9.0, 1.0, 5.0, 16.0, 6.0, 10.0, 16.0, 18.0, 17.0, 26.0, 35.0, 64.0, 80.0, 116.0, 168.0, 279.0, 761.0, 1554.0, 286.0, 190.0, 120.0, 63.0, 59.0, 46.0, 19.0, 24.0, 17.0, 17.0, 4.0, 7.0, 8.0, 3.0, 5.0, 4.0, 2.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.5986328125, -1.5544281005859375, -1.510223388671875, -1.4660186767578125, -1.42181396484375, -1.3776092529296875, -1.333404541015625, -1.2891998291015625, -1.2449951171875, -1.2007904052734375, -1.156585693359375, -1.1123809814453125, -1.06817626953125, -1.0239715576171875, -0.979766845703125, -0.9355621337890625, -0.891357421875, -0.8471527099609375, -0.802947998046875, -0.7587432861328125, -0.71453857421875, -0.6703338623046875, -0.626129150390625, -0.5819244384765625, -0.5377197265625, -0.4935150146484375, -0.449310302734375, -0.4051055908203125, -0.36090087890625, -0.3166961669921875, -0.272491455078125, -0.2282867431640625, -0.18408203125, -0.1398773193359375, -0.095672607421875, -0.0514678955078125, -0.00726318359375, 0.0369415283203125, 0.081146240234375, 0.1253509521484375, 0.1695556640625, 0.2137603759765625, 0.257965087890625, 0.3021697998046875, 0.34637451171875, 0.3905792236328125, 0.434783935546875, 0.4789886474609375, 0.523193359375, 0.5673980712890625, 0.611602783203125, 0.6558074951171875, 0.70001220703125, 0.7442169189453125, 0.788421630859375, 0.8326263427734375, 0.8768310546875, 0.9210357666015625, 0.965240478515625, 1.0094451904296875, 1.05364990234375, 1.0978546142578125, 1.142059326171875, 1.1862640380859375, 1.23046875]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 3.0, 10.0, 24.0, 105.0, 572.0, 252.0, 30.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-58.69364547729492, -57.62516784667969, -56.55668640136719, -55.48820877075195, -54.41973114013672, -53.35124969482422, -52.282772064208984, -51.21429443359375, -50.14581298828125, -49.077335357666016, -48.008853912353516, -46.94037628173828, -45.87189865112305, -44.80341720581055, -43.73493957519531, -42.66646194458008, -41.597984313964844, -40.52950668334961, -39.46102523803711, -38.392547607421875, -37.32406997680664, -36.25558853149414, -35.187110900878906, -34.11863327026367, -33.05015182495117, -31.981672286987305, -30.91319465637207, -29.844715118408203, -28.776235580444336, -27.70775604248047, -26.639278411865234, -25.570798873901367, -24.5023193359375, -23.433839797973633, -22.3653621673584, -21.29688262939453, -20.228403091430664, -19.159923553466797, -18.091445922851562, -17.022966384887695, -15.954487800598145, -14.886009216308594, -13.817529678344727, -12.749051094055176, -11.680572509765625, -10.612092971801758, -9.543614387512207, -8.475135803222656, -7.406656265258789, -6.33817720413208, -5.269698143005371, -4.20121955871582, -3.1327404975891113, -2.0642614364624023, -0.9957828521728516, 0.07269620895385742, 1.1411752700805664, 2.2096543312072754, 3.2781331539154053, 4.346611976623535, 5.415091037750244, 6.483570098876953, 7.552048683166504, 8.620527267456055, 9.689006805419922]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 5.0, 2.0, 6.0, 12.0, 8.0, 12.0, 21.0, 25.0, 18.0, 40.0, 49.0, 61.0, 51.0, 49.0, 59.0, 58.0, 61.0, 71.0, 75.0, 48.0, 35.0, 44.0, 40.0, 31.0, 24.0, 25.0, 8.0, 10.0, 12.0, 9.0, 2.0, 9.0, 7.0, 5.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.984344482421875, -8.689896583557129, -8.395448684692383, -8.101000785827637, -7.806553363800049, -7.512105464935303, -7.217658042907715, -6.923210144042969, -6.628762245178223, -6.334314346313477, -6.0398664474487305, -5.745419025421143, -5.4509711265563965, -5.15652322769165, -4.8620758056640625, -4.567627906799316, -4.27318000793457, -3.978732109069824, -3.6842844486236572, -3.3898367881774902, -3.095388889312744, -2.800940990447998, -2.506493330001831, -2.212045669555664, -1.917597770690918, -1.6231499910354614, -1.3287022113800049, -1.0342544317245483, -0.7398066520690918, -0.44535887241363525, -0.1509110927581787, 0.14353656768798828, 0.43798351287841797, 0.7324312925338745, 1.026879072189331, 1.3213268518447876, 1.6157746315002441, 1.9102224111557007, 2.2046701908111572, 2.499117851257324, 2.7935657501220703, 3.0880136489868164, 3.3824613094329834, 3.6769089698791504, 3.9713568687438965, 4.265804767608643, 4.5602521896362305, 4.854700088500977, 5.149147987365723, 5.443595886230469, 5.738043785095215, 6.032491207122803, 6.326939105987549, 6.621387004852295, 6.915834426879883, 7.210282325744629, 7.504730224609375, 7.799178123474121, 8.093626022338867, 8.388073921203613, 8.68252182006836, 8.976968765258789, 9.271416664123535, 9.565864562988281, 9.860312461853027]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 1.0, 1.0, 4.0, 11.0, 12.0, 12.0, 16.0, 34.0, 40.0, 55.0, 76.0, 128.0, 170.0, 268.0, 408.0, 636.0, 1022.0, 1667.0, 3015.0, 5416.0, 10843.0, 27529.0, 201325.0, 712029.0, 52015.0, 15177.0, 7156.0, 3874.0, 2145.0, 1282.0, 744.0, 487.0, 297.0, 197.0, 137.0, 88.0, 57.0, 55.0, 36.0, 35.0, 15.0, 11.0, 4.0, 8.0, 3.0, 4.0, 6.0, 3.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.3828125, -8.1142578125, -7.845703125, -7.5771484375, -7.30859375, -7.0400390625, -6.771484375, -6.5029296875, -6.234375, -5.9658203125, -5.697265625, -5.4287109375, -5.16015625, -4.8916015625, -4.623046875, -4.3544921875, -4.0859375, -3.8173828125, -3.548828125, -3.2802734375, -3.01171875, -2.7431640625, -2.474609375, -2.2060546875, -1.9375, -1.6689453125, -1.400390625, -1.1318359375, -0.86328125, -0.5947265625, -0.326171875, -0.0576171875, 0.2109375, 0.4794921875, 0.748046875, 1.0166015625, 1.28515625, 1.5537109375, 1.822265625, 2.0908203125, 2.359375, 2.6279296875, 2.896484375, 3.1650390625, 3.43359375, 3.7021484375, 3.970703125, 4.2392578125, 4.5078125, 4.7763671875, 5.044921875, 5.3134765625, 5.58203125, 5.8505859375, 6.119140625, 6.3876953125, 6.65625, 6.9248046875, 7.193359375, 7.4619140625, 7.73046875, 7.9990234375, 8.267578125, 8.5361328125, 8.8046875]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 4.0, 5.0, 4.0, 6.0, 4.0, 6.0, 5.0, 10.0, 17.0, 24.0, 65.0, 142.0, 200.0, 220.0, 131.0, 61.0, 20.0, 23.0, 12.0, 8.0, 6.0, 5.0, 4.0, 5.0, 4.0, 5.0, 3.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.490234375, -1.4452056884765625, -1.400177001953125, -1.3551483154296875, -1.31011962890625, -1.2650909423828125, -1.220062255859375, -1.1750335693359375, -1.1300048828125, -1.0849761962890625, -1.039947509765625, -0.9949188232421875, -0.94989013671875, -0.9048614501953125, -0.859832763671875, -0.8148040771484375, -0.769775390625, -0.7247467041015625, -0.679718017578125, -0.6346893310546875, -0.58966064453125, -0.5446319580078125, -0.499603271484375, -0.4545745849609375, -0.4095458984375, -0.3645172119140625, -0.319488525390625, -0.2744598388671875, -0.22943115234375, -0.1844024658203125, -0.139373779296875, -0.0943450927734375, -0.04931640625, -0.0042877197265625, 0.040740966796875, 0.0857696533203125, 0.13079833984375, 0.1758270263671875, 0.220855712890625, 0.2658843994140625, 0.3109130859375, 0.3559417724609375, 0.400970458984375, 0.4459991455078125, 0.49102783203125, 0.5360565185546875, 0.581085205078125, 0.6261138916015625, 0.671142578125, 0.7161712646484375, 0.761199951171875, 0.8062286376953125, 0.85125732421875, 0.8962860107421875, 0.941314697265625, 0.9863433837890625, 1.0313720703125, 1.0764007568359375, 1.121429443359375, 1.1664581298828125, 1.21148681640625, 1.2565155029296875, 1.301544189453125, 1.3465728759765625, 1.3916015625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 5.0, 2.0, 5.0, 2.0, 21.0, 24.0, 34.0, 41.0, 42.0, 80.0, 110.0, 137.0, 210.0, 329.0, 481.0, 807.0, 1575.0, 3215.0, 7442.0, 18988.0, 54022.0, 190890.0, 564733.0, 135318.0, 42161.0, 15201.0, 6302.0, 2699.0, 1390.0, 769.0, 499.0, 294.0, 190.0, 164.0, 115.0, 68.0, 54.0, 44.0, 35.0, 24.0, 10.0, 9.0, 8.0, 2.0, 3.0, 3.0, 3.0, 4.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.06640625, -3.9122314453125, -3.758056640625, -3.6038818359375, -3.44970703125, -3.2955322265625, -3.141357421875, -2.9871826171875, -2.8330078125, -2.6788330078125, -2.524658203125, -2.3704833984375, -2.21630859375, -2.0621337890625, -1.907958984375, -1.7537841796875, -1.599609375, -1.4454345703125, -1.291259765625, -1.1370849609375, -0.98291015625, -0.8287353515625, -0.674560546875, -0.5203857421875, -0.3662109375, -0.2120361328125, -0.057861328125, 0.0963134765625, 0.25048828125, 0.4046630859375, 0.558837890625, 0.7130126953125, 0.8671875, 1.0213623046875, 1.175537109375, 1.3297119140625, 1.48388671875, 1.6380615234375, 1.792236328125, 1.9464111328125, 2.1005859375, 2.2547607421875, 2.408935546875, 2.5631103515625, 2.71728515625, 2.8714599609375, 3.025634765625, 3.1798095703125, 3.333984375, 3.4881591796875, 3.642333984375, 3.7965087890625, 3.95068359375, 4.1048583984375, 4.259033203125, 4.4132080078125, 4.5673828125, 4.7215576171875, 4.875732421875, 5.0299072265625, 5.18408203125, 5.3382568359375, 5.492431640625, 5.6466064453125, 5.80078125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 5.0, 7.0, 7.0, 11.0, 11.0, 15.0, 12.0, 17.0, 17.0, 25.0, 35.0, 29.0, 32.0, 35.0, 35.0, 32.0, 41.0, 53.0, 46.0, 47.0, 46.0, 42.0, 42.0, 32.0, 41.0, 35.0, 31.0, 37.0, 29.0, 23.0, 27.0, 21.0, 16.0, 11.0, 12.0, 15.0, 8.0, 8.0, 7.0, 3.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.32421875, -3.19866943359375, -3.0731201171875, -2.94757080078125, -2.822021484375, -2.69647216796875, -2.5709228515625, -2.44537353515625, -2.31982421875, -2.19427490234375, -2.0687255859375, -1.94317626953125, -1.817626953125, -1.69207763671875, -1.5665283203125, -1.44097900390625, -1.3154296875, -1.18988037109375, -1.0643310546875, -0.93878173828125, -0.813232421875, -0.68768310546875, -0.5621337890625, -0.43658447265625, -0.31103515625, -0.18548583984375, -0.0599365234375, 0.06561279296875, 0.191162109375, 0.31671142578125, 0.4422607421875, 0.56781005859375, 0.693359375, 0.81890869140625, 0.9444580078125, 1.07000732421875, 1.195556640625, 1.32110595703125, 1.4466552734375, 1.57220458984375, 1.69775390625, 1.82330322265625, 1.9488525390625, 2.07440185546875, 2.199951171875, 2.32550048828125, 2.4510498046875, 2.57659912109375, 2.7021484375, 2.82769775390625, 2.9532470703125, 3.07879638671875, 3.204345703125, 3.32989501953125, 3.4554443359375, 3.58099365234375, 3.70654296875, 3.83209228515625, 3.9576416015625, 4.08319091796875, 4.208740234375, 4.33428955078125, 4.4598388671875, 4.58538818359375, 4.7109375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 4.0, 5.0, 6.0, 9.0, 19.0, 35.0, 55.0, 113.0, 260.0, 681.0, 2126.0, 16088.0, 966060.0, 58019.0, 3506.0, 897.0, 347.0, 163.0, 54.0, 37.0, 22.0, 15.0, 10.0, 6.0, 8.0, 5.0, 3.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.609375, -11.2308349609375, -10.852294921875, -10.4737548828125, -10.09521484375, -9.7166748046875, -9.338134765625, -8.9595947265625, -8.5810546875, -8.2025146484375, -7.823974609375, -7.4454345703125, -7.06689453125, -6.6883544921875, -6.309814453125, -5.9312744140625, -5.552734375, -5.1741943359375, -4.795654296875, -4.4171142578125, -4.03857421875, -3.6600341796875, -3.281494140625, -2.9029541015625, -2.5244140625, -2.1458740234375, -1.767333984375, -1.3887939453125, -1.01025390625, -0.6317138671875, -0.253173828125, 0.1253662109375, 0.50390625, 0.8824462890625, 1.260986328125, 1.6395263671875, 2.01806640625, 2.3966064453125, 2.775146484375, 3.1536865234375, 3.5322265625, 3.9107666015625, 4.289306640625, 4.6678466796875, 5.04638671875, 5.4249267578125, 5.803466796875, 6.1820068359375, 6.560546875, 6.9390869140625, 7.317626953125, 7.6961669921875, 8.07470703125, 8.4532470703125, 8.831787109375, 9.2103271484375, 9.5888671875, 9.9674072265625, 10.345947265625, 10.7244873046875, 11.10302734375, 11.4815673828125, 11.860107421875, 12.2386474609375, 12.6171875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 11.0, 12.0, 30.0, 117.0, 315.0, 374.0, 95.0, 34.0, 14.0, 5.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00025653839111328125, -0.00022448599338531494, -0.00019243359565734863, -0.00016038119792938232, -0.00012832880020141602, -9.627640247344971e-05, -6.42240047454834e-05, -3.217160701751709e-05, -1.1920928955078125e-07, 3.193318843841553e-05, 6.398558616638184e-05, 9.603798389434814e-05, 0.00012809038162231445, 0.00016014277935028076, 0.00019219517707824707, 0.00022424757480621338, 0.0002562999725341797, 0.000288352370262146, 0.0003204047679901123, 0.0003524571657180786, 0.0003845095634460449, 0.00041656196117401123, 0.00044861435890197754, 0.00048066675662994385, 0.0005127191543579102, 0.0005447715520858765, 0.0005768239498138428, 0.0006088763475418091, 0.0006409287452697754, 0.0006729811429977417, 0.000705033540725708, 0.0007370859384536743, 0.0007691383361816406, 0.0008011907339096069, 0.0008332431316375732, 0.0008652955293655396, 0.0008973479270935059, 0.0009294003248214722, 0.0009614527225494385, 0.0009935051202774048, 0.001025557518005371, 0.0010576099157333374, 0.0010896623134613037, 0.00112171471118927, 0.0011537671089172363, 0.0011858195066452026, 0.001217871904373169, 0.0012499243021011353, 0.0012819766998291016, 0.0013140290975570679, 0.0013460814952850342, 0.0013781338930130005, 0.0014101862907409668, 0.001442238688468933, 0.0014742910861968994, 0.0015063434839248657, 0.001538395881652832, 0.0015704482793807983, 0.0016025006771087646, 0.001634553074836731, 0.0016666054725646973, 0.0016986578702926636, 0.0017307102680206299, 0.0017627626657485962, 0.0017948150634765625]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 5.0, 1.0, 3.0, 3.0, 8.0, 15.0, 13.0, 15.0, 19.0, 37.0, 54.0, 72.0, 96.0, 135.0, 224.0, 367.0, 630.0, 1144.0, 2362.0, 6138.0, 17645.0, 73682.0, 602433.0, 282609.0, 41430.0, 11308.0, 4089.0, 1849.0, 883.0, 481.0, 287.0, 158.0, 122.0, 65.0, 61.0, 33.0, 25.0, 14.0, 10.0, 10.0, 6.0, 4.0, 7.0, 4.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.14453125, -4.02880859375, -3.9130859375, -3.79736328125, -3.681640625, -3.56591796875, -3.4501953125, -3.33447265625, -3.21875, -3.10302734375, -2.9873046875, -2.87158203125, -2.755859375, -2.64013671875, -2.5244140625, -2.40869140625, -2.29296875, -2.17724609375, -2.0615234375, -1.94580078125, -1.830078125, -1.71435546875, -1.5986328125, -1.48291015625, -1.3671875, -1.25146484375, -1.1357421875, -1.02001953125, -0.904296875, -0.78857421875, -0.6728515625, -0.55712890625, -0.44140625, -0.32568359375, -0.2099609375, -0.09423828125, 0.021484375, 0.13720703125, 0.2529296875, 0.36865234375, 0.484375, 0.60009765625, 0.7158203125, 0.83154296875, 0.947265625, 1.06298828125, 1.1787109375, 1.29443359375, 1.41015625, 1.52587890625, 1.6416015625, 1.75732421875, 1.873046875, 1.98876953125, 2.1044921875, 2.22021484375, 2.3359375, 2.45166015625, 2.5673828125, 2.68310546875, 2.798828125, 2.91455078125, 3.0302734375, 3.14599609375, 3.26171875]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 4.0, 9.0, 7.0, 4.0, 13.0, 14.0, 14.0, 16.0, 19.0, 23.0, 40.0, 52.0, 47.0, 77.0, 80.0, 101.0, 101.0, 92.0, 58.0, 49.0, 42.0, 28.0, 22.0, 20.0, 16.0, 12.0, 12.0, 8.0, 5.0, 1.0, 2.0, 4.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-3.3984375, -3.30511474609375, -3.2117919921875, -3.11846923828125, -3.025146484375, -2.93182373046875, -2.8385009765625, -2.74517822265625, -2.65185546875, -2.55853271484375, -2.4652099609375, -2.37188720703125, -2.278564453125, -2.18524169921875, -2.0919189453125, -1.99859619140625, -1.9052734375, -1.81195068359375, -1.7186279296875, -1.62530517578125, -1.531982421875, -1.43865966796875, -1.3453369140625, -1.25201416015625, -1.15869140625, -1.06536865234375, -0.9720458984375, -0.87872314453125, -0.785400390625, -0.69207763671875, -0.5987548828125, -0.50543212890625, -0.412109375, -0.31878662109375, -0.2254638671875, -0.13214111328125, -0.038818359375, 0.05450439453125, 0.1478271484375, 0.24114990234375, 0.33447265625, 0.42779541015625, 0.5211181640625, 0.61444091796875, 0.707763671875, 0.80108642578125, 0.8944091796875, 0.98773193359375, 1.0810546875, 1.17437744140625, 1.2677001953125, 1.36102294921875, 1.454345703125, 1.54766845703125, 1.6409912109375, 1.73431396484375, 1.82763671875, 1.92095947265625, 2.0142822265625, 2.10760498046875, 2.200927734375, 2.29425048828125, 2.3875732421875, 2.48089599609375, 2.57421875]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 17.0, 48.0, 179.0, 532.0, 170.0, 35.0, 12.0, 8.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.53518676757812, -72.88227844238281, -70.22936248779297, -67.57645416259766, -64.92354583740234, -62.2706298828125, -59.61772155761719, -56.96480941772461, -54.31189727783203, -51.65898513793945, -49.00607681274414, -46.35316467285156, -43.700252532958984, -41.047340393066406, -38.394432067871094, -35.741519927978516, -33.0886116027832, -30.435701370239258, -27.78278923034668, -25.129878997802734, -22.476966857910156, -19.82405662536621, -17.171146392822266, -14.518234252929688, -11.865324020385742, -9.21241283416748, -6.559502124786377, -3.9065914154052734, -1.2536802291870117, 1.39923095703125, 4.052141189575195, 6.705053329467773, 9.357963562011719, 12.01087474822998, 14.663785934448242, 17.316696166992188, 19.969608306884766, 22.62251853942871, 25.275428771972656, 27.928340911865234, 30.58125114440918, 33.234161376953125, 35.8870735168457, 38.53998565673828, 41.192893981933594, 43.84580612182617, 46.49871826171875, 49.15162658691406, 51.80453872680664, 54.45745086669922, 57.11035919189453, 59.76327133178711, 62.41618347167969, 65.069091796875, 67.72200012207031, 70.37491607666016, 73.02782440185547, 75.68073272705078, 78.33364868164062, 80.98655700683594, 83.63946533203125, 86.2923812866211, 88.9452896118164, 91.59820556640625, 94.25111389160156]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 5.0, 3.0, 9.0, 5.0, 9.0, 10.0, 12.0, 18.0, 31.0, 51.0, 33.0, 66.0, 76.0, 90.0, 97.0, 88.0, 77.0, 68.0, 60.0, 50.0, 36.0, 25.0, 17.0, 14.0, 9.0, 7.0, 6.0, 2.0, 7.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.034263610839844, -38.54399490356445, -37.0537223815918, -35.563453674316406, -34.073184967041016, -32.582916259765625, -31.09264373779297, -29.602375030517578, -28.112104415893555, -26.62183380126953, -25.13156509399414, -23.641294479370117, -22.151023864746094, -20.660755157470703, -19.17048454284668, -17.680213928222656, -16.189945220947266, -14.699675559997559, -13.209405899047852, -11.719135284423828, -10.228865623474121, -8.738595962524414, -7.248325347900391, -5.758055686950684, -4.267786026000977, -2.7775161266326904, -1.2872462272644043, 0.20302391052246094, 1.693293571472168, 3.183563232421875, 4.673833847045898, 6.1641035079956055, 7.654369354248047, 9.144639015197754, 10.634908676147461, 12.125179290771484, 13.615448951721191, 15.105718612670898, 16.595989227294922, 18.086257934570312, 19.576528549194336, 21.06679916381836, 22.55706787109375, 24.047338485717773, 25.537609100341797, 27.027877807617188, 28.51814842224121, 30.008419036865234, 31.498687744140625, 32.988956451416016, 34.47922897338867, 35.96949768066406, 37.45976638793945, 38.950035095214844, 40.4403076171875, 41.93057632446289, 43.42084503173828, 44.91111373901367, 46.40138626098633, 47.89165496826172, 49.38192367553711, 50.8721923828125, 52.362464904785156, 53.85273361206055, 55.3430061340332]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 5.0, 10.0, 5.0, 7.0, 11.0, 16.0, 23.0, 29.0, 35.0, 60.0, 105.0, 160.0, 278.0, 481.0, 896.0, 1906.0, 5511.0, 35758.0, 4095855.0, 42290.0, 6231.0, 2151.0, 914.0, 555.0, 338.0, 183.0, 127.0, 83.0, 68.0, 48.0, 38.0, 26.0, 21.0, 22.0, 11.0, 6.0, 6.0, 4.0, 5.0, 1.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-35.1875, -34.13818359375, -33.0888671875, -32.03955078125, -30.990234375, -29.94091796875, -28.8916015625, -27.84228515625, -26.79296875, -25.74365234375, -24.6943359375, -23.64501953125, -22.595703125, -21.54638671875, -20.4970703125, -19.44775390625, -18.3984375, -17.34912109375, -16.2998046875, -15.25048828125, -14.201171875, -13.15185546875, -12.1025390625, -11.05322265625, -10.00390625, -8.95458984375, -7.9052734375, -6.85595703125, -5.806640625, -4.75732421875, -3.7080078125, -2.65869140625, -1.609375, -0.56005859375, 0.4892578125, 1.53857421875, 2.587890625, 3.63720703125, 4.6865234375, 5.73583984375, 6.78515625, 7.83447265625, 8.8837890625, 9.93310546875, 10.982421875, 12.03173828125, 13.0810546875, 14.13037109375, 15.1796875, 16.22900390625, 17.2783203125, 18.32763671875, 19.376953125, 20.42626953125, 21.4755859375, 22.52490234375, 23.57421875, 24.62353515625, 25.6728515625, 26.72216796875, 27.771484375, 28.82080078125, 29.8701171875, 30.91943359375, 31.96875]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 2.0, 3.0, 2.0, 5.0, 7.0, 9.0, 9.0, 18.0, 35.0, 75.0, 142.0, 211.0, 213.0, 133.0, 61.0, 22.0, 19.0, 10.0, 11.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.6298828125, -1.58184814453125, -1.5338134765625, -1.48577880859375, -1.437744140625, -1.38970947265625, -1.3416748046875, -1.29364013671875, -1.24560546875, -1.19757080078125, -1.1495361328125, -1.10150146484375, -1.053466796875, -1.00543212890625, -0.9573974609375, -0.90936279296875, -0.861328125, -0.81329345703125, -0.7652587890625, -0.71722412109375, -0.669189453125, -0.62115478515625, -0.5731201171875, -0.52508544921875, -0.47705078125, -0.42901611328125, -0.3809814453125, -0.33294677734375, -0.284912109375, -0.23687744140625, -0.1888427734375, -0.14080810546875, -0.0927734375, -0.04473876953125, 0.0032958984375, 0.05133056640625, 0.099365234375, 0.14739990234375, 0.1954345703125, 0.24346923828125, 0.29150390625, 0.33953857421875, 0.3875732421875, 0.43560791015625, 0.483642578125, 0.53167724609375, 0.5797119140625, 0.62774658203125, 0.67578125, 0.72381591796875, 0.7718505859375, 0.81988525390625, 0.867919921875, 0.91595458984375, 0.9639892578125, 1.01202392578125, 1.06005859375, 1.10809326171875, 1.1561279296875, 1.20416259765625, 1.252197265625, 1.30023193359375, 1.3482666015625, 1.39630126953125, 1.4443359375]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 5.0, 7.0, 7.0, 8.0, 9.0, 14.0, 10.0, 22.0, 36.0, 46.0, 102.0, 382.0, 2656.0, 74236.0, 4104023.0, 11173.0, 1050.0, 252.0, 87.0, 53.0, 27.0, 19.0, 18.0, 10.0, 6.0, 4.0, 3.0, 6.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.9375, -41.6845703125, -40.431640625, -39.1787109375, -37.92578125, -36.6728515625, -35.419921875, -34.1669921875, -32.9140625, -31.6611328125, -30.408203125, -29.1552734375, -27.90234375, -26.6494140625, -25.396484375, -24.1435546875, -22.890625, -21.6376953125, -20.384765625, -19.1318359375, -17.87890625, -16.6259765625, -15.373046875, -14.1201171875, -12.8671875, -11.6142578125, -10.361328125, -9.1083984375, -7.85546875, -6.6025390625, -5.349609375, -4.0966796875, -2.84375, -1.5908203125, -0.337890625, 0.9150390625, 2.16796875, 3.4208984375, 4.673828125, 5.9267578125, 7.1796875, 8.4326171875, 9.685546875, 10.9384765625, 12.19140625, 13.4443359375, 14.697265625, 15.9501953125, 17.203125, 18.4560546875, 19.708984375, 20.9619140625, 22.21484375, 23.4677734375, 24.720703125, 25.9736328125, 27.2265625, 28.4794921875, 29.732421875, 30.9853515625, 32.23828125, 33.4912109375, 34.744140625, 35.9970703125, 37.25]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 3.0, 3.0, 5.0, 7.0, 4.0, 10.0, 11.0, 22.0, 31.0, 36.0, 70.0, 101.0, 193.0, 636.0, 2240.0, 311.0, 129.0, 68.0, 48.0, 38.0, 20.0, 21.0, 11.0, 11.0, 13.0, 9.0, 4.0, 3.0, 7.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.966796875, -1.901824951171875, -1.83685302734375, -1.771881103515625, -1.7069091796875, -1.641937255859375, -1.57696533203125, -1.511993408203125, -1.447021484375, -1.382049560546875, -1.31707763671875, -1.252105712890625, -1.1871337890625, -1.122161865234375, -1.05718994140625, -0.992218017578125, -0.92724609375, -0.862274169921875, -0.79730224609375, -0.732330322265625, -0.6673583984375, -0.602386474609375, -0.53741455078125, -0.472442626953125, -0.407470703125, -0.342498779296875, -0.27752685546875, -0.212554931640625, -0.1475830078125, -0.082611083984375, -0.01763916015625, 0.047332763671875, 0.1123046875, 0.177276611328125, 0.24224853515625, 0.307220458984375, 0.3721923828125, 0.437164306640625, 0.50213623046875, 0.567108154296875, 0.632080078125, 0.697052001953125, 0.76202392578125, 0.826995849609375, 0.8919677734375, 0.956939697265625, 1.02191162109375, 1.086883544921875, 1.15185546875, 1.216827392578125, 1.28179931640625, 1.346771240234375, 1.4117431640625, 1.476715087890625, 1.54168701171875, 1.606658935546875, 1.671630859375, 1.736602783203125, 1.80157470703125, 1.866546630859375, 1.9315185546875, 1.996490478515625, 2.06146240234375, 2.126434326171875, 2.19140625]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 9.0, 39.0, 212.0, 482.0, 178.0, 42.0, 23.0, 11.0, 8.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.168739318847656, -14.386367797851562, -13.603996276855469, -12.821623802185059, -12.039252281188965, -11.256880760192871, -10.474508285522461, -9.692136764526367, -8.909765243530273, -8.12739372253418, -7.345021724700928, -6.562649726867676, -5.780278205871582, -4.997906684875488, -4.215534687042236, -3.4331626892089844, -2.6507911682128906, -1.8684194087982178, -1.086047649383545, -0.30367588996887207, 0.4786958694458008, 1.2610676288604736, 2.0434393882751465, 2.8258113861083984, 3.608182907104492, 4.390554428100586, 5.172926425933838, 5.95529842376709, 6.737669944763184, 7.520041465759277, 8.302413940429688, 9.084785461425781, 9.867156982421875, 10.649528503417969, 11.431900024414062, 12.214272499084473, 12.996644020080566, 13.77901554107666, 14.56138801574707, 15.343759536743164, 16.126131057739258, 16.90850257873535, 17.690874099731445, 18.47324562072754, 19.255619049072266, 20.03799057006836, 20.820362091064453, 21.602733612060547, 22.38510513305664, 23.167476654052734, 23.949848175048828, 24.732219696044922, 25.514591217041016, 26.296964645385742, 27.079336166381836, 27.86170768737793, 28.644079208374023, 29.426450729370117, 30.20882225036621, 30.991193771362305, 31.77356719970703, 32.555938720703125, 33.33831024169922, 34.12068176269531, 34.903053283691406]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 5.0, 6.0, 9.0, 13.0, 16.0, 21.0, 26.0, 34.0, 45.0, 54.0, 47.0, 66.0, 64.0, 62.0, 68.0, 63.0, 71.0, 67.0, 54.0, 38.0, 39.0, 28.0, 22.0, 24.0, 15.0, 13.0, 6.0, 6.0, 5.0, 2.0, 2.0, 5.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.673911094665527, -9.397655487060547, -9.121399879455566, -8.845144271850586, -8.568888664245605, -8.292633056640625, -8.016376495361328, -7.740121364593506, -7.463865280151367, -7.187609672546387, -6.911354064941406, -6.635098457336426, -6.358842372894287, -6.082586765289307, -5.806331157684326, -5.530075550079346, -5.253819942474365, -4.977564334869385, -4.701308727264404, -4.425052642822266, -4.148797035217285, -3.8725414276123047, -3.596285820007324, -3.3200302124023438, -3.043774366378784, -2.7675187587738037, -2.491262912750244, -2.2150073051452637, -1.9387515783309937, -1.6624958515167236, -1.3862402439117432, -1.1099845170974731, -0.8337287902832031, -0.5574730634689331, -0.28121739625930786, -0.004961729049682617, 0.2712939977645874, 0.5475497245788574, 0.8238053321838379, 1.100061058998108, 1.376316785812378, 1.652572512626648, 1.928828239440918, 2.2050838470458984, 2.481339454650879, 2.7575953006744385, 3.033850908279419, 3.3101067543029785, 3.586362361907959, 3.8626179695129395, 4.13887357711792, 4.415129661560059, 4.691385269165039, 4.9676408767700195, 5.243896484375, 5.5201520919799805, 5.796407699584961, 6.072663307189941, 6.348918914794922, 6.625174522399902, 6.901430606842041, 7.1776862144470215, 7.453941822052002, 7.730197429656982, 8.006453514099121]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 5.0, 3.0, 1.0, 11.0, 2.0, 13.0, 8.0, 31.0, 34.0, 38.0, 76.0, 94.0, 134.0, 204.0, 311.0, 574.0, 992.0, 2197.0, 6035.0, 28802.0, 677788.0, 303611.0, 19024.0, 4487.0, 1814.0, 906.0, 479.0, 291.0, 201.0, 114.0, 90.0, 57.0, 31.0, 21.0, 23.0, 9.0, 19.0, 6.0, 9.0, 4.0, 9.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.734375, -13.3037109375, -12.873046875, -12.4423828125, -12.01171875, -11.5810546875, -11.150390625, -10.7197265625, -10.2890625, -9.8583984375, -9.427734375, -8.9970703125, -8.56640625, -8.1357421875, -7.705078125, -7.2744140625, -6.84375, -6.4130859375, -5.982421875, -5.5517578125, -5.12109375, -4.6904296875, -4.259765625, -3.8291015625, -3.3984375, -2.9677734375, -2.537109375, -2.1064453125, -1.67578125, -1.2451171875, -0.814453125, -0.3837890625, 0.046875, 0.4775390625, 0.908203125, 1.3388671875, 1.76953125, 2.2001953125, 2.630859375, 3.0615234375, 3.4921875, 3.9228515625, 4.353515625, 4.7841796875, 5.21484375, 5.6455078125, 6.076171875, 6.5068359375, 6.9375, 7.3681640625, 7.798828125, 8.2294921875, 8.66015625, 9.0908203125, 9.521484375, 9.9521484375, 10.3828125, 10.8134765625, 11.244140625, 11.6748046875, 12.10546875, 12.5361328125, 12.966796875, 13.3974609375, 13.828125]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 5.0, 7.0, 20.0, 36.0, 71.0, 134.0, 219.0, 205.0, 147.0, 65.0, 35.0, 14.0, 13.0, 11.0, 4.0, 1.0, 2.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.7490234375, -1.70050048828125, -1.6519775390625, -1.60345458984375, -1.554931640625, -1.50640869140625, -1.4578857421875, -1.40936279296875, -1.36083984375, -1.31231689453125, -1.2637939453125, -1.21527099609375, -1.166748046875, -1.11822509765625, -1.0697021484375, -1.02117919921875, -0.97265625, -0.92413330078125, -0.8756103515625, -0.82708740234375, -0.778564453125, -0.73004150390625, -0.6815185546875, -0.63299560546875, -0.58447265625, -0.53594970703125, -0.4874267578125, -0.43890380859375, -0.390380859375, -0.34185791015625, -0.2933349609375, -0.24481201171875, -0.1962890625, -0.14776611328125, -0.0992431640625, -0.05072021484375, -0.002197265625, 0.04632568359375, 0.0948486328125, 0.14337158203125, 0.19189453125, 0.24041748046875, 0.2889404296875, 0.33746337890625, 0.385986328125, 0.43450927734375, 0.4830322265625, 0.53155517578125, 0.580078125, 0.62860107421875, 0.6771240234375, 0.72564697265625, 0.774169921875, 0.82269287109375, 0.8712158203125, 0.91973876953125, 0.96826171875, 1.01678466796875, 1.0653076171875, 1.11383056640625, 1.162353515625, 1.21087646484375, 1.2593994140625, 1.30792236328125, 1.3564453125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 8.0, 6.0, 9.0, 9.0, 22.0, 20.0, 45.0, 46.0, 52.0, 62.0, 89.0, 128.0, 207.0, 298.0, 495.0, 727.0, 1391.0, 2627.0, 5362.0, 12276.0, 31556.0, 102804.0, 429029.0, 332958.0, 80488.0, 26250.0, 10671.0, 4841.0, 2481.0, 1274.0, 720.0, 480.0, 304.0, 216.0, 165.0, 103.0, 87.0, 59.0, 46.0, 34.0, 23.0, 26.0, 22.0, 12.0, 5.0, 10.0, 5.0, 4.0, 3.0, 1.0, 0.0, 3.0, 0.0, 1.0], "bins": [-6.26171875, -6.0714111328125, -5.881103515625, -5.6907958984375, -5.50048828125, -5.3101806640625, -5.119873046875, -4.9295654296875, -4.7392578125, -4.5489501953125, -4.358642578125, -4.1683349609375, -3.97802734375, -3.7877197265625, -3.597412109375, -3.4071044921875, -3.216796875, -3.0264892578125, -2.836181640625, -2.6458740234375, -2.45556640625, -2.2652587890625, -2.074951171875, -1.8846435546875, -1.6943359375, -1.5040283203125, -1.313720703125, -1.1234130859375, -0.93310546875, -0.7427978515625, -0.552490234375, -0.3621826171875, -0.171875, 0.0184326171875, 0.208740234375, 0.3990478515625, 0.58935546875, 0.7796630859375, 0.969970703125, 1.1602783203125, 1.3505859375, 1.5408935546875, 1.731201171875, 1.9215087890625, 2.11181640625, 2.3021240234375, 2.492431640625, 2.6827392578125, 2.873046875, 3.0633544921875, 3.253662109375, 3.4439697265625, 3.63427734375, 3.8245849609375, 4.014892578125, 4.2052001953125, 4.3955078125, 4.5858154296875, 4.776123046875, 4.9664306640625, 5.15673828125, 5.3470458984375, 5.537353515625, 5.7276611328125, 5.91796875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 3.0, 5.0, 1.0, 4.0, 5.0, 6.0, 4.0, 11.0, 15.0, 17.0, 14.0, 26.0, 17.0, 28.0, 29.0, 29.0, 48.0, 34.0, 55.0, 44.0, 43.0, 50.0, 45.0, 37.0, 41.0, 47.0, 32.0, 42.0, 46.0, 32.0, 23.0, 13.0, 20.0, 16.0, 19.0, 14.0, 13.0, 11.0, 12.0, 11.0, 6.0, 5.0, 4.0, 6.0, 4.0, 3.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.52734375, -3.40673828125, -3.2861328125, -3.16552734375, -3.044921875, -2.92431640625, -2.8037109375, -2.68310546875, -2.5625, -2.44189453125, -2.3212890625, -2.20068359375, -2.080078125, -1.95947265625, -1.8388671875, -1.71826171875, -1.59765625, -1.47705078125, -1.3564453125, -1.23583984375, -1.115234375, -0.99462890625, -0.8740234375, -0.75341796875, -0.6328125, -0.51220703125, -0.3916015625, -0.27099609375, -0.150390625, -0.02978515625, 0.0908203125, 0.21142578125, 0.33203125, 0.45263671875, 0.5732421875, 0.69384765625, 0.814453125, 0.93505859375, 1.0556640625, 1.17626953125, 1.296875, 1.41748046875, 1.5380859375, 1.65869140625, 1.779296875, 1.89990234375, 2.0205078125, 2.14111328125, 2.26171875, 2.38232421875, 2.5029296875, 2.62353515625, 2.744140625, 2.86474609375, 2.9853515625, 3.10595703125, 3.2265625, 3.34716796875, 3.4677734375, 3.58837890625, 3.708984375, 3.82958984375, 3.9501953125, 4.07080078125, 4.19140625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 5.0, 5.0, 5.0, 13.0, 9.0, 37.0, 51.0, 86.0, 185.0, 473.0, 1196.0, 5666.0, 84571.0, 899954.0, 50232.0, 4283.0, 1052.0, 376.0, 172.0, 77.0, 38.0, 23.0, 16.0, 9.0, 11.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.234375, -8.9405517578125, -8.646728515625, -8.3529052734375, -8.05908203125, -7.7652587890625, -7.471435546875, -7.1776123046875, -6.8837890625, -6.5899658203125, -6.296142578125, -6.0023193359375, -5.70849609375, -5.4146728515625, -5.120849609375, -4.8270263671875, -4.533203125, -4.2393798828125, -3.945556640625, -3.6517333984375, -3.35791015625, -3.0640869140625, -2.770263671875, -2.4764404296875, -2.1826171875, -1.8887939453125, -1.594970703125, -1.3011474609375, -1.00732421875, -0.7135009765625, -0.419677734375, -0.1258544921875, 0.16796875, 0.4617919921875, 0.755615234375, 1.0494384765625, 1.34326171875, 1.6370849609375, 1.930908203125, 2.2247314453125, 2.5185546875, 2.8123779296875, 3.106201171875, 3.4000244140625, 3.69384765625, 3.9876708984375, 4.281494140625, 4.5753173828125, 4.869140625, 5.1629638671875, 5.456787109375, 5.7506103515625, 6.04443359375, 6.3382568359375, 6.632080078125, 6.9259033203125, 7.2197265625, 7.5135498046875, 7.807373046875, 8.1011962890625, 8.39501953125, 8.6888427734375, 8.982666015625, 9.2764892578125, 9.5703125]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 5.0, 5.0, 3.0, 11.0, 11.0, 11.0, 8.0, 20.0, 18.0, 23.0, 43.0, 33.0, 46.0, 68.0, 79.0, 82.0, 102.0, 74.0, 66.0, 73.0, 48.0, 31.0, 33.0, 17.0, 22.0, 11.0, 10.0, 7.0, 4.0, 5.0, 5.0, 5.0, 1.0, 4.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.0003845691680908203, -0.00037327781319618225, -0.0003619864583015442, -0.00035069510340690613, -0.00033940374851226807, -0.00032811239361763, -0.00031682103872299194, -0.0003055296838283539, -0.0002942383289337158, -0.00028294697403907776, -0.0002716556191444397, -0.00026036426424980164, -0.0002490729093551636, -0.0002377815544605255, -0.00022649019956588745, -0.0002151988446712494, -0.00020390748977661133, -0.00019261613488197327, -0.0001813247799873352, -0.00017003342509269714, -0.00015874207019805908, -0.00014745071530342102, -0.00013615936040878296, -0.0001248680055141449, -0.00011357665061950684, -0.00010228529572486877, -9.099394083023071e-05, -7.970258593559265e-05, -6.841123104095459e-05, -5.711987614631653e-05, -4.582852125167847e-05, -3.4537166357040405e-05, -2.3245811462402344e-05, -1.1954456567764282e-05, -6.631016731262207e-07, 1.062825322151184e-05, 2.1919608116149902e-05, 3.3210963010787964e-05, 4.4502317905426025e-05, 5.579367280006409e-05, 6.708502769470215e-05, 7.837638258934021e-05, 8.966773748397827e-05, 0.00010095909237861633, 0.0001122504472732544, 0.00012354180216789246, 0.00013483315706253052, 0.00014612451195716858, 0.00015741586685180664, 0.0001687072217464447, 0.00017999857664108276, 0.00019128993153572083, 0.0002025812864303589, 0.00021387264132499695, 0.000225163996219635, 0.00023645535111427307, 0.00024774670600891113, 0.0002590380609035492, 0.00027032941579818726, 0.0002816207706928253, 0.0002929121255874634, 0.00030420348048210144, 0.0003154948353767395, 0.00032678619027137756, 0.0003380775451660156]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 6.0, 2.0, 2.0, 10.0, 6.0, 17.0, 27.0, 31.0, 58.0, 124.0, 194.0, 370.0, 900.0, 2708.0, 12795.0, 172346.0, 796059.0, 53370.0, 6498.0, 1728.0, 646.0, 292.0, 134.0, 98.0, 44.0, 29.0, 19.0, 6.0, 10.0, 5.0, 2.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.58203125, -7.34735107421875, -7.1126708984375, -6.87799072265625, -6.643310546875, -6.40863037109375, -6.1739501953125, -5.93927001953125, -5.70458984375, -5.46990966796875, -5.2352294921875, -5.00054931640625, -4.765869140625, -4.53118896484375, -4.2965087890625, -4.06182861328125, -3.8271484375, -3.59246826171875, -3.3577880859375, -3.12310791015625, -2.888427734375, -2.65374755859375, -2.4190673828125, -2.18438720703125, -1.94970703125, -1.71502685546875, -1.4803466796875, -1.24566650390625, -1.010986328125, -0.77630615234375, -0.5416259765625, -0.30694580078125, -0.072265625, 0.16241455078125, 0.3970947265625, 0.63177490234375, 0.866455078125, 1.10113525390625, 1.3358154296875, 1.57049560546875, 1.80517578125, 2.03985595703125, 2.2745361328125, 2.50921630859375, 2.743896484375, 2.97857666015625, 3.2132568359375, 3.44793701171875, 3.6826171875, 3.91729736328125, 4.1519775390625, 4.38665771484375, 4.621337890625, 4.85601806640625, 5.0906982421875, 5.32537841796875, 5.56005859375, 5.79473876953125, 6.0294189453125, 6.26409912109375, 6.498779296875, 6.73345947265625, 6.9681396484375, 7.20281982421875, 7.4375]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 6.0, 5.0, 9.0, 19.0, 9.0, 16.0, 31.0, 40.0, 46.0, 65.0, 66.0, 92.0, 102.0, 116.0, 88.0, 82.0, 44.0, 34.0, 33.0, 33.0, 20.0, 12.0, 14.0, 8.0, 8.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.111328125, -3.0366363525390625, -2.961944580078125, -2.8872528076171875, -2.81256103515625, -2.7378692626953125, -2.663177490234375, -2.5884857177734375, -2.5137939453125, -2.4391021728515625, -2.364410400390625, -2.2897186279296875, -2.21502685546875, -2.1403350830078125, -2.065643310546875, -1.9909515380859375, -1.916259765625, -1.8415679931640625, -1.766876220703125, -1.6921844482421875, -1.61749267578125, -1.5428009033203125, -1.468109130859375, -1.3934173583984375, -1.3187255859375, -1.2440338134765625, -1.169342041015625, -1.0946502685546875, -1.01995849609375, -0.9452667236328125, -0.870574951171875, -0.7958831787109375, -0.72119140625, -0.6464996337890625, -0.571807861328125, -0.4971160888671875, -0.42242431640625, -0.3477325439453125, -0.273040771484375, -0.1983489990234375, -0.1236572265625, -0.0489654541015625, 0.025726318359375, 0.1004180908203125, 0.17510986328125, 0.2498016357421875, 0.324493408203125, 0.3991851806640625, 0.473876953125, 0.5485687255859375, 0.623260498046875, 0.6979522705078125, 0.77264404296875, 0.8473358154296875, 0.922027587890625, 0.9967193603515625, 1.0714111328125, 1.1461029052734375, 1.220794677734375, 1.2954864501953125, 1.37017822265625, 1.4448699951171875, 1.519561767578125, 1.5942535400390625, 1.6689453125]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 8.0, 14.0, 17.0, 55.0, 133.0, 402.0, 271.0, 66.0, 24.0, 8.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-80.47560119628906, -77.24583435058594, -74.01606750488281, -70.78630065917969, -67.5565414428711, -64.32677459716797, -61.097007751464844, -57.86724090576172, -54.63747787475586, -51.407711029052734, -48.177947998046875, -44.94818115234375, -41.718414306640625, -38.488651275634766, -35.25888442993164, -32.02912139892578, -28.799354553222656, -25.569589614868164, -22.339824676513672, -19.110057830810547, -15.880292892456055, -12.650527954101562, -9.420761108398438, -6.190996170043945, -2.961231231689453, 0.26853418350219727, 3.4982995986938477, 6.728065490722656, 9.957830429077148, 13.18759536743164, 16.417362213134766, 19.647127151489258, 22.87689208984375, 26.106657028198242, 29.336421966552734, 32.56618881225586, 35.79595184326172, 39.025718688964844, 42.25548553466797, 45.485252380371094, 48.71501541137695, 51.94478225708008, 55.17454528808594, 58.40431213378906, 61.63407897949219, 64.86384582519531, 68.09361267089844, 71.32337188720703, 74.55313873291016, 77.78290557861328, 81.0126724243164, 84.242431640625, 87.47219848632812, 90.70196533203125, 93.93173217773438, 97.1614990234375, 100.39126586914062, 103.62103271484375, 106.85079956054688, 110.08056640625, 113.3103256225586, 116.54009246826172, 119.76985931396484, 122.99962615966797, 126.22938537597656]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 6.0, 3.0, 10.0, 15.0, 24.0, 27.0, 24.0, 43.0, 77.0, 78.0, 76.0, 77.0, 99.0, 95.0, 57.0, 83.0, 55.0, 33.0, 41.0, 26.0, 15.0, 19.0, 9.0, 4.0, 1.0, 6.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-69.1520004272461, -67.35523223876953, -65.55845642089844, -63.761688232421875, -61.96491622924805, -60.16814422607422, -58.371376037597656, -56.57460403442383, -54.77783203125, -52.98106002807617, -51.18429183959961, -49.38751983642578, -47.59074783325195, -45.793975830078125, -43.99720764160156, -42.200435638427734, -40.40366744995117, -38.606895446777344, -36.81012725830078, -35.01335525512695, -33.216583251953125, -31.41981315612793, -29.623043060302734, -27.826271057128906, -26.02950096130371, -24.232730865478516, -22.435958862304688, -20.639188766479492, -18.842418670654297, -17.04564666748047, -15.248876571655273, -13.452105522155762, -11.655330657958984, -9.858559608459473, -8.061788558959961, -6.265018463134766, -4.468247413635254, -2.671476364135742, -0.8747062683105469, 0.9220647811889648, 2.7188358306884766, 4.515606880187988, 6.312377452850342, 8.109148025512695, 9.905919075012207, 11.702690124511719, 13.499460220336914, 15.296231269836426, 17.093002319335938, 18.889772415161133, 20.68654441833496, 22.483314514160156, 24.280086517333984, 26.07685661315918, 27.873626708984375, 29.670398712158203, 31.4671688079834, 33.263938903808594, 35.06071090698242, 36.85748291015625, 38.65425109863281, 40.45102310180664, 42.24779510498047, 44.04456329345703, 45.84133529663086]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 4.0, 12.0, 14.0, 37.0, 94.0, 229.0, 809.0, 12281.0, 4175435.0, 4556.0, 532.0, 174.0, 57.0, 21.0, 10.0, 7.0, 4.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-128.0, -124.6640625, -121.328125, -117.9921875, -114.65625, -111.3203125, -107.984375, -104.6484375, -101.3125, -97.9765625, -94.640625, -91.3046875, -87.96875, -84.6328125, -81.296875, -77.9609375, -74.625, -71.2890625, -67.953125, -64.6171875, -61.28125, -57.9453125, -54.609375, -51.2734375, -47.9375, -44.6015625, -41.265625, -37.9296875, -34.59375, -31.2578125, -27.921875, -24.5859375, -21.25, -17.9140625, -14.578125, -11.2421875, -7.90625, -4.5703125, -1.234375, 2.1015625, 5.4375, 8.7734375, 12.109375, 15.4453125, 18.78125, 22.1171875, 25.453125, 28.7890625, 32.125, 35.4609375, 38.796875, 42.1328125, 45.46875, 48.8046875, 52.140625, 55.4765625, 58.8125, 62.1484375, 65.484375, 68.8203125, 72.15625, 75.4921875, 78.828125, 82.1640625, 85.5]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 3.0, 3.0, 8.0, 15.0, 18.0, 60.0, 100.0, 145.0, 159.0, 170.0, 137.0, 80.0, 46.0, 24.0, 14.0, 10.0, 4.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.603515625, -2.534942626953125, -2.46636962890625, -2.397796630859375, -2.3292236328125, -2.260650634765625, -2.19207763671875, -2.123504638671875, -2.054931640625, -1.986358642578125, -1.91778564453125, -1.849212646484375, -1.7806396484375, -1.712066650390625, -1.64349365234375, -1.574920654296875, -1.50634765625, -1.437774658203125, -1.36920166015625, -1.300628662109375, -1.2320556640625, -1.163482666015625, -1.09490966796875, -1.026336669921875, -0.957763671875, -0.889190673828125, -0.82061767578125, -0.752044677734375, -0.6834716796875, -0.614898681640625, -0.54632568359375, -0.477752685546875, -0.4091796875, -0.340606689453125, -0.27203369140625, -0.203460693359375, -0.1348876953125, -0.066314697265625, 0.00225830078125, 0.070831298828125, 0.139404296875, 0.207977294921875, 0.27655029296875, 0.345123291015625, 0.4136962890625, 0.482269287109375, 0.55084228515625, 0.619415283203125, 0.68798828125, 0.756561279296875, 0.82513427734375, 0.893707275390625, 0.9622802734375, 1.030853271484375, 1.09942626953125, 1.167999267578125, 1.236572265625, 1.305145263671875, 1.37371826171875, 1.442291259765625, 1.5108642578125, 1.579437255859375, 1.64801025390625, 1.716583251953125, 1.78515625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 13.0, 11.0, 19.0, 30.0, 35.0, 52.0, 88.0, 165.0, 620.0, 8336.0, 4134805.0, 48166.0, 1357.0, 297.0, 117.0, 58.0, 51.0, 25.0, 16.0, 16.0, 5.0, 5.0, 5.0, 0.0, 1.0, 3.0], "bins": [-78.25, -76.642578125, -75.03515625, -73.427734375, -71.8203125, -70.212890625, -68.60546875, -66.998046875, -65.390625, -63.783203125, -62.17578125, -60.568359375, -58.9609375, -57.353515625, -55.74609375, -54.138671875, -52.53125, -50.923828125, -49.31640625, -47.708984375, -46.1015625, -44.494140625, -42.88671875, -41.279296875, -39.671875, -38.064453125, -36.45703125, -34.849609375, -33.2421875, -31.634765625, -30.02734375, -28.419921875, -26.8125, -25.205078125, -23.59765625, -21.990234375, -20.3828125, -18.775390625, -17.16796875, -15.560546875, -13.953125, -12.345703125, -10.73828125, -9.130859375, -7.5234375, -5.916015625, -4.30859375, -2.701171875, -1.09375, 0.513671875, 2.12109375, 3.728515625, 5.3359375, 6.943359375, 8.55078125, 10.158203125, 11.765625, 13.373046875, 14.98046875, 16.587890625, 18.1953125, 19.802734375, 21.41015625, 23.017578125, 24.625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 5.0, 7.0, 20.0, 46.0, 92.0, 231.0, 2328.0, 989.0, 188.0, 64.0, 49.0, 21.0, 10.0, 9.0, 8.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5546875, -5.40423583984375, -5.2537841796875, -5.10333251953125, -4.952880859375, -4.80242919921875, -4.6519775390625, -4.50152587890625, -4.35107421875, -4.20062255859375, -4.0501708984375, -3.89971923828125, -3.749267578125, -3.59881591796875, -3.4483642578125, -3.29791259765625, -3.1474609375, -2.99700927734375, -2.8465576171875, -2.69610595703125, -2.545654296875, -2.39520263671875, -2.2447509765625, -2.09429931640625, -1.94384765625, -1.79339599609375, -1.6429443359375, -1.49249267578125, -1.342041015625, -1.19158935546875, -1.0411376953125, -0.89068603515625, -0.740234375, -0.58978271484375, -0.4393310546875, -0.28887939453125, -0.138427734375, 0.01202392578125, 0.1624755859375, 0.31292724609375, 0.46337890625, 0.61383056640625, 0.7642822265625, 0.91473388671875, 1.065185546875, 1.21563720703125, 1.3660888671875, 1.51654052734375, 1.6669921875, 1.81744384765625, 1.9678955078125, 2.11834716796875, 2.268798828125, 2.41925048828125, 2.5697021484375, 2.72015380859375, 2.87060546875, 3.02105712890625, 3.1715087890625, 3.32196044921875, 3.472412109375, 3.62286376953125, 3.7733154296875, 3.92376708984375, 4.07421875]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 4.0, 13.0, 28.0, 81.0, 195.0, 329.0, 211.0, 74.0, 37.0, 19.0, 7.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.809279441833496, -13.914764404296875, -13.02025032043457, -12.12573528289795, -11.231220245361328, -10.336705207824707, -9.442190170288086, -8.547676086425781, -7.65316104888916, -6.758646011352539, -5.864131450653076, -4.969616889953613, -4.075101852416992, -3.180586814880371, -2.286072254180908, -1.3915576934814453, -0.4970426559448242, 0.3974721431732178, 1.2919869422912598, 2.1865017414093018, 3.0810165405273438, 3.975531578063965, 4.870046138763428, 5.764560699462891, 6.659075736999512, 7.553590774536133, 8.448104858398438, 9.342619895935059, 10.23713493347168, 11.1316499710083, 12.026165008544922, 12.920679092407227, 13.815193176269531, 14.709708213806152, 15.604223251342773, 16.498737335205078, 17.393253326416016, 18.28776741027832, 19.182281494140625, 20.076797485351562, 20.971311569213867, 21.865825653076172, 22.76034164428711, 23.654855728149414, 24.54936981201172, 25.443885803222656, 26.33839988708496, 27.232913970947266, 28.127429962158203, 29.021944046020508, 29.916460037231445, 30.81097412109375, 31.705490112304688, 32.600006103515625, 33.4945182800293, 34.389034271240234, 35.283546447753906, 36.178062438964844, 37.072574615478516, 37.96709060668945, 38.86160659790039, 39.75611877441406, 40.650634765625, 41.54515075683594, 42.439666748046875]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 6.0, 4.0, 2.0, 10.0, 14.0, 15.0, 12.0, 25.0, 31.0, 31.0, 35.0, 33.0, 36.0, 45.0, 44.0, 53.0, 60.0, 57.0, 60.0, 51.0, 48.0, 50.0, 35.0, 35.0, 40.0, 30.0, 27.0, 26.0, 16.0, 19.0, 14.0, 8.0, 8.0, 7.0, 7.0, 0.0, 1.0, 1.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.732166290283203, -8.455063819885254, -8.177961349487305, -7.900857925415039, -7.62375545501709, -7.346652984619141, -7.069550037384033, -6.792447090148926, -6.515344619750977, -6.238242149353027, -5.96113920211792, -5.6840362548828125, -5.406933784484863, -5.129831314086914, -4.852728366851807, -4.575625419616699, -4.29852294921875, -4.021420478820801, -3.7443175315856934, -3.467214822769165, -3.1901121139526367, -2.9130094051361084, -2.63590669631958, -2.3588039875030518, -2.0817012786865234, -1.8045985698699951, -1.5274958610534668, -1.2503931522369385, -0.9732904434204102, -0.6961877346038818, -0.4190850257873535, -0.1419823169708252, 0.13511943817138672, 0.41222214698791504, 0.6893248558044434, 0.9664275646209717, 1.2435302734375, 1.5206329822540283, 1.7977356910705566, 2.074838399887085, 2.3519411087036133, 2.6290438175201416, 2.90614652633667, 3.1832492351531982, 3.4603519439697266, 3.737454652786255, 4.014557361602783, 4.291660308837891, 4.56876277923584, 4.845865249633789, 5.1229681968688965, 5.400071144104004, 5.677173614501953, 5.954276084899902, 6.23137903213501, 6.508481979370117, 6.785584449768066, 7.062686920166016, 7.339789867401123, 7.6168928146362305, 7.89399528503418, 8.171097755432129, 8.448200225830078, 8.725303649902344, 9.002406120300293]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 4.0, 7.0, 12.0, 22.0, 30.0, 44.0, 61.0, 72.0, 121.0, 226.0, 341.0, 559.0, 1061.0, 2366.0, 7742.0, 71425.0, 816845.0, 131543.0, 10382.0, 2820.0, 1264.0, 608.0, 386.0, 215.0, 123.0, 87.0, 72.0, 30.0, 18.0, 18.0, 11.0, 11.0, 7.0, 6.0, 3.0, 4.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.09375, -15.5999755859375, -15.106201171875, -14.6124267578125, -14.11865234375, -13.6248779296875, -13.131103515625, -12.6373291015625, -12.1435546875, -11.6497802734375, -11.156005859375, -10.6622314453125, -10.16845703125, -9.6746826171875, -9.180908203125, -8.6871337890625, -8.193359375, -7.6995849609375, -7.205810546875, -6.7120361328125, -6.21826171875, -5.7244873046875, -5.230712890625, -4.7369384765625, -4.2431640625, -3.7493896484375, -3.255615234375, -2.7618408203125, -2.26806640625, -1.7742919921875, -1.280517578125, -0.7867431640625, -0.29296875, 0.2008056640625, 0.694580078125, 1.1883544921875, 1.68212890625, 2.1759033203125, 2.669677734375, 3.1634521484375, 3.6572265625, 4.1510009765625, 4.644775390625, 5.1385498046875, 5.63232421875, 6.1260986328125, 6.619873046875, 7.1136474609375, 7.607421875, 8.1011962890625, 8.594970703125, 9.0887451171875, 9.58251953125, 10.0762939453125, 10.570068359375, 11.0638427734375, 11.5576171875, 12.0513916015625, 12.545166015625, 13.0389404296875, 13.53271484375, 14.0264892578125, 14.520263671875, 15.0140380859375, 15.5078125]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 2.0, 11.0, 9.0, 20.0, 50.0, 81.0, 121.0, 174.0, 173.0, 140.0, 95.0, 65.0, 32.0, 12.0, 5.0, 4.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.609375, -2.5428009033203125, -2.476226806640625, -2.4096527099609375, -2.34307861328125, -2.2765045166015625, -2.209930419921875, -2.1433563232421875, -2.0767822265625, -2.0102081298828125, -1.943634033203125, -1.8770599365234375, -1.81048583984375, -1.7439117431640625, -1.677337646484375, -1.6107635498046875, -1.544189453125, -1.4776153564453125, -1.411041259765625, -1.3444671630859375, -1.27789306640625, -1.2113189697265625, -1.144744873046875, -1.0781707763671875, -1.0115966796875, -0.9450225830078125, -0.878448486328125, -0.8118743896484375, -0.74530029296875, -0.6787261962890625, -0.612152099609375, -0.5455780029296875, -0.47900390625, -0.4124298095703125, -0.345855712890625, -0.2792816162109375, -0.21270751953125, -0.1461334228515625, -0.079559326171875, -0.0129852294921875, 0.0535888671875, 0.1201629638671875, 0.186737060546875, 0.2533111572265625, 0.31988525390625, 0.3864593505859375, 0.453033447265625, 0.5196075439453125, 0.586181640625, 0.6527557373046875, 0.719329833984375, 0.7859039306640625, 0.85247802734375, 0.9190521240234375, 0.985626220703125, 1.0522003173828125, 1.1187744140625, 1.1853485107421875, 1.251922607421875, 1.3184967041015625, 1.38507080078125, 1.4516448974609375, 1.518218994140625, 1.5847930908203125, 1.6513671875]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 5.0, 5.0, 8.0, 11.0, 13.0, 23.0, 24.0, 44.0, 76.0, 107.0, 184.0, 328.0, 592.0, 1200.0, 2883.0, 9130.0, 46434.0, 437124.0, 482911.0, 51853.0, 9900.0, 3029.0, 1247.0, 607.0, 311.0, 170.0, 124.0, 71.0, 47.0, 28.0, 25.0, 10.0, 14.0, 7.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-12.2890625, -11.9571533203125, -11.625244140625, -11.2933349609375, -10.96142578125, -10.6295166015625, -10.297607421875, -9.9656982421875, -9.6337890625, -9.3018798828125, -8.969970703125, -8.6380615234375, -8.30615234375, -7.9742431640625, -7.642333984375, -7.3104248046875, -6.978515625, -6.6466064453125, -6.314697265625, -5.9827880859375, -5.65087890625, -5.3189697265625, -4.987060546875, -4.6551513671875, -4.3232421875, -3.9913330078125, -3.659423828125, -3.3275146484375, -2.99560546875, -2.6636962890625, -2.331787109375, -1.9998779296875, -1.66796875, -1.3360595703125, -1.004150390625, -0.6722412109375, -0.34033203125, -0.0084228515625, 0.323486328125, 0.6553955078125, 0.9873046875, 1.3192138671875, 1.651123046875, 1.9830322265625, 2.31494140625, 2.6468505859375, 2.978759765625, 3.3106689453125, 3.642578125, 3.9744873046875, 4.306396484375, 4.6383056640625, 4.97021484375, 5.3021240234375, 5.634033203125, 5.9659423828125, 6.2978515625, 6.6297607421875, 6.961669921875, 7.2935791015625, 7.62548828125, 7.9573974609375, 8.289306640625, 8.6212158203125, 8.953125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 4.0, 11.0, 10.0, 11.0, 13.0, 17.0, 38.0, 36.0, 52.0, 65.0, 74.0, 87.0, 111.0, 104.0, 71.0, 66.0, 59.0, 49.0, 30.0, 33.0, 19.0, 17.0, 11.0, 4.0, 2.0, 1.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.796875, -9.54278564453125, -9.2886962890625, -9.03460693359375, -8.780517578125, -8.52642822265625, -8.2723388671875, -8.01824951171875, -7.76416015625, -7.51007080078125, -7.2559814453125, -7.00189208984375, -6.747802734375, -6.49371337890625, -6.2396240234375, -5.98553466796875, -5.7314453125, -5.47735595703125, -5.2232666015625, -4.96917724609375, -4.715087890625, -4.46099853515625, -4.2069091796875, -3.95281982421875, -3.69873046875, -3.44464111328125, -3.1905517578125, -2.93646240234375, -2.682373046875, -2.42828369140625, -2.1741943359375, -1.92010498046875, -1.666015625, -1.41192626953125, -1.1578369140625, -0.90374755859375, -0.649658203125, -0.39556884765625, -0.1414794921875, 0.11260986328125, 0.36669921875, 0.62078857421875, 0.8748779296875, 1.12896728515625, 1.383056640625, 1.63714599609375, 1.8912353515625, 2.14532470703125, 2.3994140625, 2.65350341796875, 2.9075927734375, 3.16168212890625, 3.415771484375, 3.66986083984375, 3.9239501953125, 4.17803955078125, 4.43212890625, 4.68621826171875, 4.9403076171875, 5.19439697265625, 5.448486328125, 5.70257568359375, 5.9566650390625, 6.21075439453125, 6.46484375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 7.0, 9.0, 12.0, 23.0, 71.0, 143.0, 360.0, 2425.0, 895620.0, 147810.0, 1542.0, 307.0, 121.0, 63.0, 18.0, 11.0, 8.0, 2.0, 3.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.40625, -38.3935546875, -37.380859375, -36.3681640625, -35.35546875, -34.3427734375, -33.330078125, -32.3173828125, -31.3046875, -30.2919921875, -29.279296875, -28.2666015625, -27.25390625, -26.2412109375, -25.228515625, -24.2158203125, -23.203125, -22.1904296875, -21.177734375, -20.1650390625, -19.15234375, -18.1396484375, -17.126953125, -16.1142578125, -15.1015625, -14.0888671875, -13.076171875, -12.0634765625, -11.05078125, -10.0380859375, -9.025390625, -8.0126953125, -7.0, -5.9873046875, -4.974609375, -3.9619140625, -2.94921875, -1.9365234375, -0.923828125, 0.0888671875, 1.1015625, 2.1142578125, 3.126953125, 4.1396484375, 5.15234375, 6.1650390625, 7.177734375, 8.1904296875, 9.203125, 10.2158203125, 11.228515625, 12.2412109375, 13.25390625, 14.2666015625, 15.279296875, 16.2919921875, 17.3046875, 18.3173828125, 19.330078125, 20.3427734375, 21.35546875, 22.3681640625, 23.380859375, 24.3935546875, 25.40625]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 3.0, 4.0, 7.0, 15.0, 16.0, 22.0, 41.0, 50.0, 71.0, 112.0, 143.0, 126.0, 137.0, 73.0, 58.0, 33.0, 24.0, 13.0, 15.0, 4.0, 7.0, 4.0, 6.0, 2.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005950927734375, -0.0005752593278884888, -0.0005554258823394775, -0.0005355924367904663, -0.0005157589912414551, -0.0004959255456924438, -0.0004760921001434326, -0.0004562586545944214, -0.00043642520904541016, -0.0004165917634963989, -0.0003967583179473877, -0.00037692487239837646, -0.00035709142684936523, -0.000337257981300354, -0.0003174245357513428, -0.00029759109020233154, -0.0002777576446533203, -0.0002579241991043091, -0.00023809075355529785, -0.00021825730800628662, -0.0001984238624572754, -0.00017859041690826416, -0.00015875697135925293, -0.0001389235258102417, -0.00011909008026123047, -9.925663471221924e-05, -7.942318916320801e-05, -5.958974361419678e-05, -3.975629806518555e-05, -1.9922852516174316e-05, -8.940696716308594e-08, 1.9744038581848145e-05, 3.9577484130859375e-05, 5.9410929679870605e-05, 7.924437522888184e-05, 9.907782077789307e-05, 0.0001189112663269043, 0.00013874471187591553, 0.00015857815742492676, 0.000178411602973938, 0.00019824504852294922, 0.00021807849407196045, 0.00023791193962097168, 0.0002577453851699829, 0.00027757883071899414, 0.00029741227626800537, 0.0003172457218170166, 0.00033707916736602783, 0.00035691261291503906, 0.0003767460584640503, 0.0003965795040130615, 0.00041641294956207275, 0.000436246395111084, 0.0004560798406600952, 0.00047591328620910645, 0.0004957467317581177, 0.0005155801773071289, 0.0005354136228561401, 0.0005552470684051514, 0.0005750805139541626, 0.0005949139595031738, 0.0006147474050521851, 0.0006345808506011963, 0.0006544142961502075, 0.0006742477416992188]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 2.0, 7.0, 8.0, 19.0, 13.0, 26.0, 64.0, 136.0, 375.0, 1233.0, 6186.0, 286368.0, 743000.0, 8834.0, 1542.0, 420.0, 167.0, 64.0, 40.0, 16.0, 13.0, 9.0, 7.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.5234375, -14.0240478515625, -13.524658203125, -13.0252685546875, -12.52587890625, -12.0264892578125, -11.527099609375, -11.0277099609375, -10.5283203125, -10.0289306640625, -9.529541015625, -9.0301513671875, -8.53076171875, -8.0313720703125, -7.531982421875, -7.0325927734375, -6.533203125, -6.0338134765625, -5.534423828125, -5.0350341796875, -4.53564453125, -4.0362548828125, -3.536865234375, -3.0374755859375, -2.5380859375, -2.0386962890625, -1.539306640625, -1.0399169921875, -0.54052734375, -0.0411376953125, 0.458251953125, 0.9576416015625, 1.45703125, 1.9564208984375, 2.455810546875, 2.9552001953125, 3.45458984375, 3.9539794921875, 4.453369140625, 4.9527587890625, 5.4521484375, 5.9515380859375, 6.450927734375, 6.9503173828125, 7.44970703125, 7.9490966796875, 8.448486328125, 8.9478759765625, 9.447265625, 9.9466552734375, 10.446044921875, 10.9454345703125, 11.44482421875, 11.9442138671875, 12.443603515625, 12.9429931640625, 13.4423828125, 13.9417724609375, 14.441162109375, 14.9405517578125, 15.43994140625, 15.9393310546875, 16.438720703125, 16.9381103515625, 17.4375]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 4.0, 18.0, 23.0, 53.0, 57.0, 101.0, 161.0, 132.0, 166.0, 103.0, 68.0, 50.0, 22.0, 11.0, 10.0, 5.0, 4.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.3671875, -4.243743896484375, -4.12030029296875, -3.996856689453125, -3.8734130859375, -3.749969482421875, -3.62652587890625, -3.503082275390625, -3.379638671875, -3.256195068359375, -3.13275146484375, -3.009307861328125, -2.8858642578125, -2.762420654296875, -2.63897705078125, -2.515533447265625, -2.39208984375, -2.268646240234375, -2.14520263671875, -2.021759033203125, -1.8983154296875, -1.774871826171875, -1.65142822265625, -1.527984619140625, -1.404541015625, -1.281097412109375, -1.15765380859375, -1.034210205078125, -0.9107666015625, -0.787322998046875, -0.66387939453125, -0.540435791015625, -0.4169921875, -0.293548583984375, -0.17010498046875, -0.046661376953125, 0.0767822265625, 0.200225830078125, 0.32366943359375, 0.447113037109375, 0.570556640625, 0.694000244140625, 0.81744384765625, 0.940887451171875, 1.0643310546875, 1.187774658203125, 1.31121826171875, 1.434661865234375, 1.55810546875, 1.681549072265625, 1.80499267578125, 1.928436279296875, 2.0518798828125, 2.175323486328125, 2.29876708984375, 2.422210693359375, 2.545654296875, 2.669097900390625, 2.79254150390625, 2.915985107421875, 3.0394287109375, 3.162872314453125, 3.28631591796875, 3.409759521484375, 3.533203125]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 14.0, 54.0, 263.0, 501.0, 139.0, 21.0, 7.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-129.68284606933594, -125.55628967285156, -121.42972564697266, -117.30316162109375, -113.17660522460938, -109.050048828125, -104.9234848022461, -100.79692077636719, -96.67036437988281, -92.54380798339844, -88.41724395751953, -84.29067993164062, -80.16412353515625, -76.03756713867188, -71.91100311279297, -67.78443908691406, -63.65788269042969, -59.53132247924805, -55.404762268066406, -51.278202056884766, -47.151641845703125, -43.025081634521484, -38.898521423339844, -34.7719612121582, -30.645401000976562, -26.518840789794922, -22.39228057861328, -18.26572036743164, -14.13916015625, -10.01259994506836, -5.886039733886719, -1.7594795227050781, 2.367095947265625, 6.493656158447266, 10.620216369628906, 14.746776580810547, 18.873336791992188, 22.999897003173828, 27.12645721435547, 31.25301742553711, 35.37957763671875, 39.50613784790039, 43.63269805908203, 47.75925827026367, 51.88581848144531, 56.01237869262695, 60.138938903808594, 64.2655029296875, 68.39205932617188, 72.51861572265625, 76.64517974853516, 80.77174377441406, 84.89830017089844, 89.02485656738281, 93.15142059326172, 97.27798461914062, 101.404541015625, 105.53109741210938, 109.65766143798828, 113.78422546386719, 117.91078186035156, 122.03733825683594, 126.16390228271484, 130.29046630859375, 134.41702270507812]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 0.0, 3.0, 8.0, 2.0, 7.0, 11.0, 6.0, 14.0, 20.0, 25.0, 27.0, 31.0, 28.0, 33.0, 54.0, 57.0, 49.0, 56.0, 60.0, 72.0, 51.0, 60.0, 53.0, 36.0, 44.0, 42.0, 20.0, 38.0, 22.0, 20.0, 14.0, 12.0, 8.0, 5.0, 5.0, 2.0, 7.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-41.000144958496094, -39.90468215942383, -38.80922317504883, -37.71376037597656, -36.61830139160156, -35.5228385925293, -34.42737579345703, -33.33191680908203, -32.236454010009766, -31.140993118286133, -30.0455322265625, -28.950069427490234, -27.8546085357666, -26.75914764404297, -25.663684844970703, -24.56822395324707, -23.472763061523438, -22.377302169799805, -21.281841278076172, -20.186378479003906, -19.090917587280273, -17.99545669555664, -16.899993896484375, -15.804533004760742, -14.70907211303711, -13.613611221313477, -12.518149375915527, -11.422687530517578, -10.327226638793945, -9.231765747070312, -8.136303901672363, -7.040842533111572, -5.945381164550781, -4.84991979598999, -3.754458427429199, -2.658997058868408, -1.5635356903076172, -0.46807432174682617, 0.6273870468139648, 1.7228484153747559, 2.818309783935547, 3.913771152496338, 5.009232521057129, 6.10469388961792, 7.200155258178711, 8.295616149902344, 9.391077995300293, 10.486539840698242, 11.582000732421875, 12.677461624145508, 13.772923469543457, 14.868385314941406, 15.963846206665039, 17.059307098388672, 18.154769897460938, 19.25023078918457, 20.345691680908203, 21.441152572631836, 22.53661346435547, 23.632076263427734, 24.727537155151367, 25.822998046875, 26.918460845947266, 28.0139217376709, 29.10938262939453]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 5.0, 7.0, 9.0, 15.0, 16.0, 30.0, 46.0, 82.0, 121.0, 250.0, 573.0, 1742.0, 8735.0, 186588.0, 3968147.0, 22925.0, 3267.0, 946.0, 401.0, 168.0, 83.0, 54.0, 26.0, 21.0, 11.0, 5.0, 3.0, 3.0, 5.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.796875, -30.98583984375, -30.1748046875, -29.36376953125, -28.552734375, -27.74169921875, -26.9306640625, -26.11962890625, -25.30859375, -24.49755859375, -23.6865234375, -22.87548828125, -22.064453125, -21.25341796875, -20.4423828125, -19.63134765625, -18.8203125, -18.00927734375, -17.1982421875, -16.38720703125, -15.576171875, -14.76513671875, -13.9541015625, -13.14306640625, -12.33203125, -11.52099609375, -10.7099609375, -9.89892578125, -9.087890625, -8.27685546875, -7.4658203125, -6.65478515625, -5.84375, -5.03271484375, -4.2216796875, -3.41064453125, -2.599609375, -1.78857421875, -0.9775390625, -0.16650390625, 0.64453125, 1.45556640625, 2.2666015625, 3.07763671875, 3.888671875, 4.69970703125, 5.5107421875, 6.32177734375, 7.1328125, 7.94384765625, 8.7548828125, 9.56591796875, 10.376953125, 11.18798828125, 11.9990234375, 12.81005859375, 13.62109375, 14.43212890625, 15.2431640625, 16.05419921875, 16.865234375, 17.67626953125, 18.4873046875, 19.29833984375, 20.109375]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 7.0, 5.0, 12.0, 15.0, 31.0, 35.0, 38.0, 77.0, 93.0, 111.0, 130.0, 122.0, 95.0, 78.0, 44.0, 29.0, 41.0, 12.0, 10.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.181640625, -2.123504638671875, -2.06536865234375, -2.007232666015625, -1.9490966796875, -1.890960693359375, -1.83282470703125, -1.774688720703125, -1.716552734375, -1.658416748046875, -1.60028076171875, -1.542144775390625, -1.4840087890625, -1.425872802734375, -1.36773681640625, -1.309600830078125, -1.25146484375, -1.193328857421875, -1.13519287109375, -1.077056884765625, -1.0189208984375, -0.960784912109375, -0.90264892578125, -0.844512939453125, -0.786376953125, -0.728240966796875, -0.67010498046875, -0.611968994140625, -0.5538330078125, -0.495697021484375, -0.43756103515625, -0.379425048828125, -0.3212890625, -0.263153076171875, -0.20501708984375, -0.146881103515625, -0.0887451171875, -0.030609130859375, 0.02752685546875, 0.085662841796875, 0.143798828125, 0.201934814453125, 0.26007080078125, 0.318206787109375, 0.3763427734375, 0.434478759765625, 0.49261474609375, 0.550750732421875, 0.60888671875, 0.667022705078125, 0.72515869140625, 0.783294677734375, 0.8414306640625, 0.899566650390625, 0.95770263671875, 1.015838623046875, 1.073974609375, 1.132110595703125, 1.19024658203125, 1.248382568359375, 1.3065185546875, 1.364654541015625, 1.42279052734375, 1.480926513671875, 1.5390625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 13.0, 15.0, 38.0, 43.0, 55.0, 71.0, 152.0, 502.0, 9135.0, 4137523.0, 45178.0, 1084.0, 208.0, 109.0, 60.0, 51.0, 16.0, 15.0, 7.0, 7.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-63.21875, -61.834228515625, -60.44970703125, -59.065185546875, -57.6806640625, -56.296142578125, -54.91162109375, -53.527099609375, -52.142578125, -50.758056640625, -49.37353515625, -47.989013671875, -46.6044921875, -45.219970703125, -43.83544921875, -42.450927734375, -41.06640625, -39.681884765625, -38.29736328125, -36.912841796875, -35.5283203125, -34.143798828125, -32.75927734375, -31.374755859375, -29.990234375, -28.605712890625, -27.22119140625, -25.836669921875, -24.4521484375, -23.067626953125, -21.68310546875, -20.298583984375, -18.9140625, -17.529541015625, -16.14501953125, -14.760498046875, -13.3759765625, -11.991455078125, -10.60693359375, -9.222412109375, -7.837890625, -6.453369140625, -5.06884765625, -3.684326171875, -2.2998046875, -0.915283203125, 0.46923828125, 1.853759765625, 3.23828125, 4.622802734375, 6.00732421875, 7.391845703125, 8.7763671875, 10.160888671875, 11.54541015625, 12.929931640625, 14.314453125, 15.698974609375, 17.08349609375, 18.468017578125, 19.8525390625, 21.237060546875, 22.62158203125, 24.006103515625, 25.390625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 7.0, 9.0, 6.0, 14.0, 21.0, 43.0, 88.0, 197.0, 927.0, 2279.0, 241.0, 95.0, 53.0, 32.0, 26.0, 16.0, 14.0, 5.0, 3.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5078125, -5.3828125, -5.2578125, -5.1328125, -5.0078125, -4.8828125, -4.7578125, -4.6328125, -4.5078125, -4.3828125, -4.2578125, -4.1328125, -4.0078125, -3.8828125, -3.7578125, -3.6328125, -3.5078125, -3.3828125, -3.2578125, -3.1328125, -3.0078125, -2.8828125, -2.7578125, -2.6328125, -2.5078125, -2.3828125, -2.2578125, -2.1328125, -2.0078125, -1.8828125, -1.7578125, -1.6328125, -1.5078125, -1.3828125, -1.2578125, -1.1328125, -1.0078125, -0.8828125, -0.7578125, -0.6328125, -0.5078125, -0.3828125, -0.2578125, -0.1328125, -0.0078125, 0.1171875, 0.2421875, 0.3671875, 0.4921875, 0.6171875, 0.7421875, 0.8671875, 0.9921875, 1.1171875, 1.2421875, 1.3671875, 1.4921875, 1.6171875, 1.7421875, 1.8671875, 1.9921875, 2.1171875, 2.2421875, 2.3671875, 2.4921875]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 7.0, 27.0, 83.0, 195.0, 339.0, 223.0, 83.0, 15.0, 14.0, 8.0, 8.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-30.32158851623535, -29.387624740600586, -28.45366096496582, -27.519697189331055, -26.585731506347656, -25.65176773071289, -24.717803955078125, -23.78384017944336, -22.849876403808594, -21.915912628173828, -20.981948852539062, -20.047985076904297, -19.11402130126953, -18.180057525634766, -17.246091842651367, -16.3121280670166, -15.378164291381836, -14.44420051574707, -13.510236740112305, -12.576272010803223, -11.642308235168457, -10.708344459533691, -9.77437973022461, -8.840415954589844, -7.906452178955078, -6.9724884033203125, -6.038524150848389, -5.104559898376465, -4.170596122741699, -3.2366323471069336, -2.3026680946350098, -1.368703842163086, -0.4347419738769531, 0.4992220401763916, 1.4331860542297363, 2.367150068283081, 3.301114082336426, 4.235077857971191, 5.169042110443115, 6.103006362915039, 7.036970138549805, 7.97093391418457, 8.904897689819336, 9.838862419128418, 10.772826194763184, 11.70678997039795, 12.640754699707031, 13.574718475341797, 14.508682250976562, 15.442646026611328, 16.376609802246094, 17.31057357788086, 18.244537353515625, 19.17850112915039, 20.11246681213379, 21.046430587768555, 21.98039436340332, 22.914358139038086, 23.84832191467285, 24.782285690307617, 25.716251373291016, 26.65021514892578, 27.584178924560547, 28.518142700195312, 29.452106475830078]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 7.0, 4.0, 9.0, 11.0, 15.0, 15.0, 24.0, 25.0, 40.0, 38.0, 42.0, 41.0, 57.0, 44.0, 46.0, 47.0, 68.0, 64.0, 48.0, 42.0, 44.0, 42.0, 43.0, 40.0, 28.0, 23.0, 18.0, 15.0, 10.0, 11.0, 11.0, 7.0, 10.0, 3.0, 1.0, 3.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.262730121612549, -7.011105060577393, -6.759479522705078, -6.507854461669922, -6.256229400634766, -6.004603862762451, -5.752978801727295, -5.5013532638549805, -5.249728202819824, -4.998103141784668, -4.7464776039123535, -4.494852542877197, -4.243227005004883, -3.9916019439697266, -3.7399768829345703, -3.488351583480835, -3.2367262840270996, -2.9851009845733643, -2.733475685119629, -2.4818506240844727, -2.2302253246307373, -1.978600025177002, -1.7269748449325562, -1.4753496646881104, -1.223724365234375, -0.9720991253852844, -0.7204738855361938, -0.46884864568710327, -0.2172234058380127, 0.034401893615722656, 0.28602707386016846, 0.5376522541046143, 0.7892780303955078, 1.0409033298492432, 1.292528510093689, 1.5441536903381348, 1.7957789897918701, 2.0474042892456055, 2.2990293502807617, 2.550654649734497, 2.8022799491882324, 3.0539052486419678, 3.305530548095703, 3.5571556091308594, 3.8087809085845947, 4.06040620803833, 4.312031269073486, 4.563656806945801, 4.815281867980957, 5.066906929016113, 5.318532466888428, 5.570157527923584, 5.821783065795898, 6.073408126831055, 6.325033187866211, 6.576658248901367, 6.828283786773682, 7.079908847808838, 7.331534385681152, 7.583159446716309, 7.834784507751465, 8.086410522460938, 8.338035583496094, 8.58966064453125, 8.841285705566406]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 6.0, 10.0, 10.0, 15.0, 21.0, 22.0, 40.0, 53.0, 79.0, 94.0, 143.0, 219.0, 356.0, 574.0, 916.0, 1722.0, 3433.0, 8558.0, 27305.0, 113200.0, 448038.0, 333315.0, 77300.0, 19884.0, 6676.0, 2811.0, 1424.0, 828.0, 502.0, 327.0, 212.0, 136.0, 100.0, 71.0, 45.0, 34.0, 15.0, 16.0, 12.0, 9.0, 7.0, 8.0, 5.0, 1.0, 2.0, 0.0, 3.0, 4.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.44140625, -7.21502685546875, -6.9886474609375, -6.76226806640625, -6.535888671875, -6.30950927734375, -6.0831298828125, -5.85675048828125, -5.63037109375, -5.40399169921875, -5.1776123046875, -4.95123291015625, -4.724853515625, -4.49847412109375, -4.2720947265625, -4.04571533203125, -3.8193359375, -3.59295654296875, -3.3665771484375, -3.14019775390625, -2.913818359375, -2.68743896484375, -2.4610595703125, -2.23468017578125, -2.00830078125, -1.78192138671875, -1.5555419921875, -1.32916259765625, -1.102783203125, -0.87640380859375, -0.6500244140625, -0.42364501953125, -0.197265625, 0.02911376953125, 0.2554931640625, 0.48187255859375, 0.708251953125, 0.93463134765625, 1.1610107421875, 1.38739013671875, 1.61376953125, 1.84014892578125, 2.0665283203125, 2.29290771484375, 2.519287109375, 2.74566650390625, 2.9720458984375, 3.19842529296875, 3.4248046875, 3.65118408203125, 3.8775634765625, 4.10394287109375, 4.330322265625, 4.55670166015625, 4.7830810546875, 5.00946044921875, 5.23583984375, 5.46221923828125, 5.6885986328125, 5.91497802734375, 6.141357421875, 6.36773681640625, 6.5941162109375, 6.82049560546875, 7.046875]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 4.0, 10.0, 8.0, 13.0, 18.0, 20.0, 36.0, 50.0, 61.0, 78.0, 92.0, 99.0, 108.0, 85.0, 85.0, 76.0, 52.0, 39.0, 21.0, 17.0, 12.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.9599609375, -1.9084320068359375, -1.856903076171875, -1.8053741455078125, -1.75384521484375, -1.7023162841796875, -1.650787353515625, -1.5992584228515625, -1.5477294921875, -1.4962005615234375, -1.444671630859375, -1.3931427001953125, -1.34161376953125, -1.2900848388671875, -1.238555908203125, -1.1870269775390625, -1.135498046875, -1.0839691162109375, -1.032440185546875, -0.9809112548828125, -0.92938232421875, -0.8778533935546875, -0.826324462890625, -0.7747955322265625, -0.7232666015625, -0.6717376708984375, -0.620208740234375, -0.5686798095703125, -0.51715087890625, -0.4656219482421875, -0.414093017578125, -0.3625640869140625, -0.31103515625, -0.2595062255859375, -0.207977294921875, -0.1564483642578125, -0.10491943359375, -0.0533905029296875, -0.001861572265625, 0.0496673583984375, 0.1011962890625, 0.1527252197265625, 0.204254150390625, 0.2557830810546875, 0.30731201171875, 0.3588409423828125, 0.410369873046875, 0.4618988037109375, 0.513427734375, 0.5649566650390625, 0.616485595703125, 0.6680145263671875, 0.71954345703125, 0.7710723876953125, 0.822601318359375, 0.8741302490234375, 0.9256591796875, 0.9771881103515625, 1.028717041015625, 1.0802459716796875, 1.13177490234375, 1.1833038330078125, 1.234832763671875, 1.2863616943359375, 1.337890625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 7.0, 5.0, 9.0, 13.0, 9.0, 7.0, 24.0, 33.0, 32.0, 62.0, 82.0, 168.0, 290.0, 606.0, 1420.0, 3610.0, 11220.0, 41958.0, 196880.0, 564454.0, 173837.0, 37837.0, 10103.0, 3357.0, 1209.0, 610.0, 275.0, 172.0, 96.0, 52.0, 38.0, 24.0, 16.0, 19.0, 13.0, 5.0, 5.0, 3.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6484375, -7.40533447265625, -7.1622314453125, -6.91912841796875, -6.676025390625, -6.43292236328125, -6.1898193359375, -5.94671630859375, -5.70361328125, -5.46051025390625, -5.2174072265625, -4.97430419921875, -4.731201171875, -4.48809814453125, -4.2449951171875, -4.00189208984375, -3.7587890625, -3.51568603515625, -3.2725830078125, -3.02947998046875, -2.786376953125, -2.54327392578125, -2.3001708984375, -2.05706787109375, -1.81396484375, -1.57086181640625, -1.3277587890625, -1.08465576171875, -0.841552734375, -0.59844970703125, -0.3553466796875, -0.11224365234375, 0.130859375, 0.37396240234375, 0.6170654296875, 0.86016845703125, 1.103271484375, 1.34637451171875, 1.5894775390625, 1.83258056640625, 2.07568359375, 2.31878662109375, 2.5618896484375, 2.80499267578125, 3.048095703125, 3.29119873046875, 3.5343017578125, 3.77740478515625, 4.0205078125, 4.26361083984375, 4.5067138671875, 4.74981689453125, 4.992919921875, 5.23602294921875, 5.4791259765625, 5.72222900390625, 5.96533203125, 6.20843505859375, 6.4515380859375, 6.69464111328125, 6.937744140625, 7.18084716796875, 7.4239501953125, 7.66705322265625, 7.91015625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 2.0, 7.0, 12.0, 8.0, 9.0, 13.0, 14.0, 19.0, 24.0, 20.0, 34.0, 30.0, 33.0, 29.0, 38.0, 32.0, 51.0, 56.0, 58.0, 45.0, 48.0, 50.0, 46.0, 34.0, 41.0, 34.0, 30.0, 30.0, 23.0, 25.0, 21.0, 15.0, 12.0, 9.0, 9.0, 11.0, 7.0, 3.0, 6.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.47265625, -4.31494140625, -4.1572265625, -3.99951171875, -3.841796875, -3.68408203125, -3.5263671875, -3.36865234375, -3.2109375, -3.05322265625, -2.8955078125, -2.73779296875, -2.580078125, -2.42236328125, -2.2646484375, -2.10693359375, -1.94921875, -1.79150390625, -1.6337890625, -1.47607421875, -1.318359375, -1.16064453125, -1.0029296875, -0.84521484375, -0.6875, -0.52978515625, -0.3720703125, -0.21435546875, -0.056640625, 0.10107421875, 0.2587890625, 0.41650390625, 0.57421875, 0.73193359375, 0.8896484375, 1.04736328125, 1.205078125, 1.36279296875, 1.5205078125, 1.67822265625, 1.8359375, 1.99365234375, 2.1513671875, 2.30908203125, 2.466796875, 2.62451171875, 2.7822265625, 2.93994140625, 3.09765625, 3.25537109375, 3.4130859375, 3.57080078125, 3.728515625, 3.88623046875, 4.0439453125, 4.20166015625, 4.359375, 4.51708984375, 4.6748046875, 4.83251953125, 4.990234375, 5.14794921875, 5.3056640625, 5.46337890625, 5.62109375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 10.0, 13.0, 11.0, 16.0, 23.0, 33.0, 59.0, 92.0, 144.0, 314.0, 672.0, 1650.0, 4928.0, 19156.0, 129569.0, 743733.0, 121786.0, 18328.0, 4851.0, 1687.0, 699.0, 311.0, 165.0, 92.0, 53.0, 32.0, 27.0, 32.0, 15.0, 12.0, 14.0, 13.0, 8.0, 2.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0], "bins": [-5.7578125, -5.61199951171875, -5.4661865234375, -5.32037353515625, -5.174560546875, -5.02874755859375, -4.8829345703125, -4.73712158203125, -4.59130859375, -4.44549560546875, -4.2996826171875, -4.15386962890625, -4.008056640625, -3.86224365234375, -3.7164306640625, -3.57061767578125, -3.4248046875, -3.27899169921875, -3.1331787109375, -2.98736572265625, -2.841552734375, -2.69573974609375, -2.5499267578125, -2.40411376953125, -2.25830078125, -2.11248779296875, -1.9666748046875, -1.82086181640625, -1.675048828125, -1.52923583984375, -1.3834228515625, -1.23760986328125, -1.091796875, -0.94598388671875, -0.8001708984375, -0.65435791015625, -0.508544921875, -0.36273193359375, -0.2169189453125, -0.07110595703125, 0.07470703125, 0.22052001953125, 0.3663330078125, 0.51214599609375, 0.657958984375, 0.80377197265625, 0.9495849609375, 1.09539794921875, 1.2412109375, 1.38702392578125, 1.5328369140625, 1.67864990234375, 1.824462890625, 1.97027587890625, 2.1160888671875, 2.26190185546875, 2.40771484375, 2.55352783203125, 2.6993408203125, 2.84515380859375, 2.990966796875, 3.13677978515625, 3.2825927734375, 3.42840576171875, 3.57421875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 3.0, 7.0, 5.0, 8.0, 6.0, 20.0, 19.0, 28.0, 35.0, 52.0, 69.0, 94.0, 111.0, 115.0, 128.0, 81.0, 58.0, 40.0, 22.0, 17.0, 19.0, 17.0, 7.0, 9.0, 7.0, 4.0, 8.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0004911422729492188, -0.00047593191266059875, -0.00046072155237197876, -0.00044551119208335876, -0.00043030083179473877, -0.0004150904715061188, -0.0003998801112174988, -0.0003846697509288788, -0.0003694593906402588, -0.0003542490303516388, -0.0003390386700630188, -0.0003238283097743988, -0.0003086179494857788, -0.0002934075891971588, -0.0002781972289085388, -0.0002629868686199188, -0.00024777650833129883, -0.00023256614804267883, -0.00021735578775405884, -0.00020214542746543884, -0.00018693506717681885, -0.00017172470688819885, -0.00015651434659957886, -0.00014130398631095886, -0.00012609362602233887, -0.00011088326573371887, -9.567290544509888e-05, -8.046254515647888e-05, -6.525218486785889e-05, -5.004182457923889e-05, -3.4831464290618896e-05, -1.96211040019989e-05, -4.410743713378906e-06, 1.0799616575241089e-05, 2.6009976863861084e-05, 4.122033715248108e-05, 5.6430697441101074e-05, 7.164105772972107e-05, 8.685141801834106e-05, 0.00010206177830696106, 0.00011727213859558105, 0.00013248249888420105, 0.00014769285917282104, 0.00016290321946144104, 0.00017811357975006104, 0.00019332394003868103, 0.00020853430032730103, 0.00022374466061592102, 0.00023895502090454102, 0.000254165381193161, 0.000269375741481781, 0.000284586101770401, 0.000299796462059021, 0.000315006822347641, 0.000330217182636261, 0.000345427542924881, 0.000360637903213501, 0.00037584826350212097, 0.00039105862379074097, 0.00040626898407936096, 0.00042147934436798096, 0.00043668970465660095, 0.00045190006494522095, 0.00046711042523384094, 0.00048232078552246094]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 0.0, 0.0, 4.0, 3.0, 3.0, 4.0, 10.0, 18.0, 24.0, 25.0, 59.0, 106.0, 224.0, 740.0, 2540.0, 17552.0, 544005.0, 463939.0, 15690.0, 2479.0, 663.0, 236.0, 104.0, 51.0, 25.0, 21.0, 6.0, 3.0, 7.0, 3.0, 4.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5703125, -7.30908203125, -7.0478515625, -6.78662109375, -6.525390625, -6.26416015625, -6.0029296875, -5.74169921875, -5.48046875, -5.21923828125, -4.9580078125, -4.69677734375, -4.435546875, -4.17431640625, -3.9130859375, -3.65185546875, -3.390625, -3.12939453125, -2.8681640625, -2.60693359375, -2.345703125, -2.08447265625, -1.8232421875, -1.56201171875, -1.30078125, -1.03955078125, -0.7783203125, -0.51708984375, -0.255859375, 0.00537109375, 0.2666015625, 0.52783203125, 0.7890625, 1.05029296875, 1.3115234375, 1.57275390625, 1.833984375, 2.09521484375, 2.3564453125, 2.61767578125, 2.87890625, 3.14013671875, 3.4013671875, 3.66259765625, 3.923828125, 4.18505859375, 4.4462890625, 4.70751953125, 4.96875, 5.22998046875, 5.4912109375, 5.75244140625, 6.013671875, 6.27490234375, 6.5361328125, 6.79736328125, 7.05859375, 7.31982421875, 7.5810546875, 7.84228515625, 8.103515625, 8.36474609375, 8.6259765625, 8.88720703125, 9.1484375]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 9.0, 4.0, 5.0, 8.0, 11.0, 9.0, 9.0, 13.0, 28.0, 44.0, 49.0, 49.0, 78.0, 92.0, 98.0, 88.0, 98.0, 64.0, 53.0, 59.0, 29.0, 24.0, 24.0, 19.0, 10.0, 8.0, 4.0, 5.0, 5.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.51953125, -2.441131591796875, -2.36273193359375, -2.284332275390625, -2.2059326171875, -2.127532958984375, -2.04913330078125, -1.970733642578125, -1.892333984375, -1.813934326171875, -1.73553466796875, -1.657135009765625, -1.5787353515625, -1.500335693359375, -1.42193603515625, -1.343536376953125, -1.26513671875, -1.186737060546875, -1.10833740234375, -1.029937744140625, -0.9515380859375, -0.873138427734375, -0.79473876953125, -0.716339111328125, -0.637939453125, -0.559539794921875, -0.48114013671875, -0.402740478515625, -0.3243408203125, -0.245941162109375, -0.16754150390625, -0.089141845703125, -0.0107421875, 0.067657470703125, 0.14605712890625, 0.224456787109375, 0.3028564453125, 0.381256103515625, 0.45965576171875, 0.538055419921875, 0.616455078125, 0.694854736328125, 0.77325439453125, 0.851654052734375, 0.9300537109375, 1.008453369140625, 1.08685302734375, 1.165252685546875, 1.24365234375, 1.322052001953125, 1.40045166015625, 1.478851318359375, 1.5572509765625, 1.635650634765625, 1.71405029296875, 1.792449951171875, 1.870849609375, 1.949249267578125, 2.02764892578125, 2.106048583984375, 2.1844482421875, 2.262847900390625, 2.34124755859375, 2.419647216796875, 2.498046875]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 3.0, 22.0, 95.0, 320.0, 408.0, 110.0, 34.0, 9.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-197.99044799804688, -194.19415283203125, -190.3978729248047, -186.60157775878906, -182.8052978515625, -179.00900268554688, -175.21270751953125, -171.4164276123047, -167.62013244628906, -163.82383728027344, -160.02755737304688, -156.23126220703125, -152.43496704101562, -148.63868713378906, -144.84239196777344, -141.04611206054688, -137.24981689453125, -133.45352172851562, -129.65724182128906, -125.86094665527344, -122.06465911865234, -118.26837158203125, -114.47207641601562, -110.67578887939453, -106.87950134277344, -103.08321380615234, -99.28692626953125, -95.49063110351562, -91.69434356689453, -87.89805603027344, -84.10176086425781, -80.30547332763672, -76.50918579101562, -72.71289825439453, -68.91661071777344, -65.12031555175781, -61.32402801513672, -57.527740478515625, -53.731449127197266, -49.935157775878906, -46.13887023925781, -42.34258270263672, -38.54629135131836, -34.75, -30.953712463378906, -27.15742301940918, -23.361133575439453, -19.564844131469727, -15.7685546875, -11.972265243530273, -8.175975799560547, -4.37968635559082, -0.5833969116210938, 3.212892532348633, 7.009181976318359, 10.805471420288086, 14.601760864257812, 18.39805030822754, 22.194339752197266, 25.990629196166992, 29.78691864013672, 33.58320617675781, 37.37949752807617, 41.17578887939453, 44.972076416015625]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 5.0, 3.0, 8.0, 8.0, 15.0, 10.0, 14.0, 16.0, 25.0, 20.0, 41.0, 41.0, 37.0, 53.0, 60.0, 62.0, 72.0, 66.0, 64.0, 49.0, 49.0, 40.0, 45.0, 36.0, 35.0, 22.0, 25.0, 27.0, 15.0, 19.0, 10.0, 6.0, 6.0, 2.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.848594665527344, -40.72073745727539, -39.59288024902344, -38.46501922607422, -37.337162017822266, -36.20930480957031, -35.08144760131836, -33.953590393066406, -32.82573318481445, -31.6978759765625, -30.570016860961914, -29.44215965270996, -28.314302444458008, -27.186443328857422, -26.05858612060547, -24.930728912353516, -23.80286979675293, -22.675012588500977, -21.54715347290039, -20.419296264648438, -19.291439056396484, -18.16358184814453, -17.035722732543945, -15.907865524291992, -14.780007362365723, -13.652149200439453, -12.5242919921875, -11.39643383026123, -10.268575668334961, -9.140718460083008, -8.012860298156738, -6.885002613067627, -5.757146835327148, -4.629289150238037, -3.5014312267303467, -2.3735733032226562, -1.245715618133545, -0.1178579330444336, 1.010000228881836, 2.1378579139709473, 3.2657155990600586, 4.39357328414917, 5.521430969238281, 6.649289131164551, 7.777146816253662, 8.905004501342773, 10.032862663269043, 11.160720825195312, 12.288578033447266, 13.416436195373535, 14.544293403625488, 15.672151565551758, 16.80000877380371, 17.927867889404297, 19.05572509765625, 20.183582305908203, 21.311439514160156, 22.43929672241211, 23.567155838012695, 24.69501304626465, 25.8228702545166, 26.950729370117188, 28.07858657836914, 29.206443786621094, 30.33430290222168]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 7.0, 3.0, 3.0, 9.0, 3.0, 1.0, 5.0, 13.0, 20.0, 24.0, 36.0, 46.0, 62.0, 106.0, 178.0, 285.0, 541.0, 1237.0, 3208.0, 11984.0, 92960.0, 3981419.0, 84787.0, 11658.0, 3227.0, 1210.0, 558.0, 300.0, 144.0, 107.0, 53.0, 40.0, 18.0, 12.0, 4.0, 7.0, 4.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.6875, -14.295654296875, -13.90380859375, -13.511962890625, -13.1201171875, -12.728271484375, -12.33642578125, -11.944580078125, -11.552734375, -11.160888671875, -10.76904296875, -10.377197265625, -9.9853515625, -9.593505859375, -9.20166015625, -8.809814453125, -8.41796875, -8.026123046875, -7.63427734375, -7.242431640625, -6.8505859375, -6.458740234375, -6.06689453125, -5.675048828125, -5.283203125, -4.891357421875, -4.49951171875, -4.107666015625, -3.7158203125, -3.323974609375, -2.93212890625, -2.540283203125, -2.1484375, -1.756591796875, -1.36474609375, -0.972900390625, -0.5810546875, -0.189208984375, 0.20263671875, 0.594482421875, 0.986328125, 1.378173828125, 1.77001953125, 2.161865234375, 2.5537109375, 2.945556640625, 3.33740234375, 3.729248046875, 4.12109375, 4.512939453125, 4.90478515625, 5.296630859375, 5.6884765625, 6.080322265625, 6.47216796875, 6.864013671875, 7.255859375, 7.647705078125, 8.03955078125, 8.431396484375, 8.8232421875, 9.215087890625, 9.60693359375, 9.998779296875, 10.390625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 6.0, 5.0, 10.0, 7.0, 18.0, 24.0, 26.0, 36.0, 47.0, 56.0, 80.0, 78.0, 93.0, 82.0, 74.0, 72.0, 74.0, 61.0, 40.0, 26.0, 25.0, 21.0, 14.0, 12.0, 6.0, 3.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.7958984375, -1.744903564453125, -1.69390869140625, -1.642913818359375, -1.5919189453125, -1.540924072265625, -1.48992919921875, -1.438934326171875, -1.387939453125, -1.336944580078125, -1.28594970703125, -1.234954833984375, -1.1839599609375, -1.132965087890625, -1.08197021484375, -1.030975341796875, -0.97998046875, -0.928985595703125, -0.87799072265625, -0.826995849609375, -0.7760009765625, -0.725006103515625, -0.67401123046875, -0.623016357421875, -0.572021484375, -0.521026611328125, -0.47003173828125, -0.419036865234375, -0.3680419921875, -0.317047119140625, -0.26605224609375, -0.215057373046875, -0.1640625, -0.113067626953125, -0.06207275390625, -0.011077880859375, 0.0399169921875, 0.090911865234375, 0.14190673828125, 0.192901611328125, 0.243896484375, 0.294891357421875, 0.34588623046875, 0.396881103515625, 0.4478759765625, 0.498870849609375, 0.54986572265625, 0.600860595703125, 0.65185546875, 0.702850341796875, 0.75384521484375, 0.804840087890625, 0.8558349609375, 0.906829833984375, 0.95782470703125, 1.008819580078125, 1.059814453125, 1.110809326171875, 1.16180419921875, 1.212799072265625, 1.2637939453125, 1.314788818359375, 1.36578369140625, 1.416778564453125, 1.4677734375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 3.0, 3.0, 3.0, 12.0, 15.0, 50.0, 109.0, 321.0, 1198.0, 7297.0, 163519.0, 3995718.0, 22357.0, 2690.0, 632.0, 199.0, 72.0, 39.0, 13.0, 8.0, 7.0, 7.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.65625, -19.02685546875, -18.3974609375, -17.76806640625, -17.138671875, -16.50927734375, -15.8798828125, -15.25048828125, -14.62109375, -13.99169921875, -13.3623046875, -12.73291015625, -12.103515625, -11.47412109375, -10.8447265625, -10.21533203125, -9.5859375, -8.95654296875, -8.3271484375, -7.69775390625, -7.068359375, -6.43896484375, -5.8095703125, -5.18017578125, -4.55078125, -3.92138671875, -3.2919921875, -2.66259765625, -2.033203125, -1.40380859375, -0.7744140625, -0.14501953125, 0.484375, 1.11376953125, 1.7431640625, 2.37255859375, 3.001953125, 3.63134765625, 4.2607421875, 4.89013671875, 5.51953125, 6.14892578125, 6.7783203125, 7.40771484375, 8.037109375, 8.66650390625, 9.2958984375, 9.92529296875, 10.5546875, 11.18408203125, 11.8134765625, 12.44287109375, 13.072265625, 13.70166015625, 14.3310546875, 14.96044921875, 15.58984375, 16.21923828125, 16.8486328125, 17.47802734375, 18.107421875, 18.73681640625, 19.3662109375, 19.99560546875, 20.625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 7.0, 7.0, 0.0, 6.0, 3.0, 7.0, 8.0, 14.0, 25.0, 33.0, 72.0, 115.0, 241.0, 1148.0, 1809.0, 230.0, 127.0, 68.0, 53.0, 25.0, 23.0, 15.0, 4.0, 16.0, 6.0, 8.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.189453125, -2.12109375, -2.052734375, -1.984375, -1.916015625, -1.84765625, -1.779296875, -1.7109375, -1.642578125, -1.57421875, -1.505859375, -1.4375, -1.369140625, -1.30078125, -1.232421875, -1.1640625, -1.095703125, -1.02734375, -0.958984375, -0.890625, -0.822265625, -0.75390625, -0.685546875, -0.6171875, -0.548828125, -0.48046875, -0.412109375, -0.34375, -0.275390625, -0.20703125, -0.138671875, -0.0703125, -0.001953125, 0.06640625, 0.134765625, 0.203125, 0.271484375, 0.33984375, 0.408203125, 0.4765625, 0.544921875, 0.61328125, 0.681640625, 0.75, 0.818359375, 0.88671875, 0.955078125, 1.0234375, 1.091796875, 1.16015625, 1.228515625, 1.296875, 1.365234375, 1.43359375, 1.501953125, 1.5703125, 1.638671875, 1.70703125, 1.775390625, 1.84375, 1.912109375, 1.98046875, 2.048828125, 2.1171875, 2.185546875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 8.0, 9.0, 21.0, 74.0, 153.0, 254.0, 251.0, 131.0, 49.0, 36.0, 7.0, 5.0, 3.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.15121841430664, -15.548493385314941, -14.945768356323242, -14.343043327331543, -13.740318298339844, -13.137594223022461, -12.534869194030762, -11.932144165039062, -11.329419136047363, -10.726694107055664, -10.123969078063965, -9.521244049072266, -8.918519973754883, -8.315793991088867, -7.713069915771484, -7.110344886779785, -6.507619857788086, -5.904894828796387, -5.3021697998046875, -4.6994452476501465, -4.096720218658447, -3.493995189666748, -2.891270399093628, -2.288545608520508, -1.6858205795288086, -1.083095669746399, -0.48037075996398926, 0.12235414981842041, 0.7250790596008301, 1.3278040885925293, 1.9305288791656494, 2.5332536697387695, 3.135976791381836, 3.738701820373535, 4.341426849365234, 4.944151401519775, 5.546876430511475, 6.149601459503174, 6.752326011657715, 7.355051040649414, 7.957776069641113, 8.560501098632812, 9.163226127624512, 9.765951156616211, 10.368675231933594, 10.97140121459961, 11.574125289916992, 12.176850318908691, 12.77957534790039, 13.38230037689209, 13.985025405883789, 14.587750434875488, 15.190475463867188, 15.79319953918457, 16.395925521850586, 16.99864959716797, 17.601375579833984, 18.204099655151367, 18.806825637817383, 19.409549713134766, 20.01227569580078, 20.614999771118164, 21.21772575378418, 21.820449829101562, 22.423173904418945]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 1.0, 8.0, 3.0, 3.0, 5.0, 7.0, 8.0, 9.0, 15.0, 12.0, 16.0, 19.0, 32.0, 25.0, 41.0, 37.0, 34.0, 45.0, 36.0, 42.0, 34.0, 48.0, 45.0, 45.0, 43.0, 43.0, 41.0, 32.0, 36.0, 37.0, 34.0, 33.0, 28.0, 18.0, 7.0, 14.0, 13.0, 13.0, 8.0, 12.0, 5.0, 5.0, 6.0, 2.0, 1.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.3957624435424805, -5.2276787757873535, -5.059594631195068, -4.891510963439941, -4.723426818847656, -4.555343151092529, -4.387259483337402, -4.219175338745117, -4.05109167098999, -3.883007764816284, -3.714923858642578, -3.546840190887451, -3.378756284713745, -3.210672378540039, -3.042588472366333, -2.874504566192627, -2.706420660018921, -2.538336753845215, -2.370252847671509, -2.2021689414978027, -2.034085273742676, -1.8660013675689697, -1.6979174613952637, -1.5298336744308472, -1.3617497682571411, -1.193665862083435, -1.0255820751190186, -0.8574981689453125, -0.6894143223762512, -0.5213304758071899, -0.3532465696334839, -0.18516278266906738, -0.017078876495361328, 0.15100498497486115, 0.3190888464450836, 0.4871727228164673, 0.6552565693855286, 0.8233404159545898, 0.9914243221282959, 1.1595081090927124, 1.3275920152664185, 1.4956759214401245, 1.663759708404541, 1.831843614578247, 1.9999275207519531, 2.16801118850708, 2.3360953330993652, 2.504179000854492, 2.6722629070281982, 2.8403468132019043, 3.0084307193756104, 3.1765146255493164, 3.3445982933044434, 3.5126821994781494, 3.6807661056518555, 3.8488497734069824, 4.016933917999268, 4.1850175857543945, 4.35310173034668, 4.521185398101807, 4.689269542694092, 4.857353210449219, 5.025437355041504, 5.193521022796631, 5.361604690551758]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 4.0, 5.0, 5.0, 6.0, 14.0, 16.0, 22.0, 27.0, 50.0, 47.0, 112.0, 156.0, 256.0, 367.0, 711.0, 1304.0, 2538.0, 5743.0, 15781.0, 52762.0, 204456.0, 482830.0, 202494.0, 51789.0, 15592.0, 5712.0, 2514.0, 1345.0, 732.0, 421.0, 255.0, 178.0, 107.0, 70.0, 40.0, 39.0, 11.0, 10.0, 10.0, 9.0, 6.0, 6.0, 0.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.2109375, -5.99298095703125, -5.7750244140625, -5.55706787109375, -5.339111328125, -5.12115478515625, -4.9031982421875, -4.68524169921875, -4.46728515625, -4.24932861328125, -4.0313720703125, -3.81341552734375, -3.595458984375, -3.37750244140625, -3.1595458984375, -2.94158935546875, -2.7236328125, -2.50567626953125, -2.2877197265625, -2.06976318359375, -1.851806640625, -1.63385009765625, -1.4158935546875, -1.19793701171875, -0.97998046875, -0.76202392578125, -0.5440673828125, -0.32611083984375, -0.108154296875, 0.10980224609375, 0.3277587890625, 0.54571533203125, 0.763671875, 0.98162841796875, 1.1995849609375, 1.41754150390625, 1.635498046875, 1.85345458984375, 2.0714111328125, 2.28936767578125, 2.50732421875, 2.72528076171875, 2.9432373046875, 3.16119384765625, 3.379150390625, 3.59710693359375, 3.8150634765625, 4.03302001953125, 4.2509765625, 4.46893310546875, 4.6868896484375, 4.90484619140625, 5.122802734375, 5.34075927734375, 5.5587158203125, 5.77667236328125, 5.99462890625, 6.21258544921875, 6.4305419921875, 6.64849853515625, 6.866455078125, 7.08441162109375, 7.3023681640625, 7.52032470703125, 7.73828125]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 5.0, 6.0, 9.0, 11.0, 9.0, 18.0, 24.0, 40.0, 56.0, 65.0, 76.0, 65.0, 92.0, 98.0, 67.0, 80.0, 64.0, 53.0, 40.0, 36.0, 26.0, 20.0, 8.0, 12.0, 10.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.787109375, -1.7348785400390625, -1.682647705078125, -1.6304168701171875, -1.57818603515625, -1.5259552001953125, -1.473724365234375, -1.4214935302734375, -1.3692626953125, -1.3170318603515625, -1.264801025390625, -1.2125701904296875, -1.16033935546875, -1.1081085205078125, -1.055877685546875, -1.0036468505859375, -0.951416015625, -0.8991851806640625, -0.846954345703125, -0.7947235107421875, -0.74249267578125, -0.6902618408203125, -0.638031005859375, -0.5858001708984375, -0.5335693359375, -0.4813385009765625, -0.429107666015625, -0.3768768310546875, -0.32464599609375, -0.2724151611328125, -0.220184326171875, -0.1679534912109375, -0.11572265625, -0.0634918212890625, -0.011260986328125, 0.0409698486328125, 0.09320068359375, 0.1454315185546875, 0.197662353515625, 0.2498931884765625, 0.3021240234375, 0.3543548583984375, 0.406585693359375, 0.4588165283203125, 0.51104736328125, 0.5632781982421875, 0.615509033203125, 0.6677398681640625, 0.719970703125, 0.7722015380859375, 0.824432373046875, 0.8766632080078125, 0.92889404296875, 0.9811248779296875, 1.033355712890625, 1.0855865478515625, 1.1378173828125, 1.1900482177734375, 1.242279052734375, 1.2945098876953125, 1.34674072265625, 1.3989715576171875, 1.451202392578125, 1.5034332275390625, 1.5556640625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 4.0, 5.0, 8.0, 5.0, 4.0, 12.0, 14.0, 13.0, 24.0, 36.0, 64.0, 89.0, 132.0, 210.0, 448.0, 854.0, 1996.0, 5836.0, 19905.0, 79347.0, 363311.0, 439919.0, 100809.0, 24251.0, 6876.0, 2281.0, 939.0, 472.0, 266.0, 142.0, 71.0, 65.0, 39.0, 23.0, 25.0, 18.0, 10.0, 4.0, 13.0, 5.0, 4.0, 3.0, 3.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.2578125, -8.02239990234375, -7.7869873046875, -7.55157470703125, -7.316162109375, -7.08074951171875, -6.8453369140625, -6.60992431640625, -6.37451171875, -6.13909912109375, -5.9036865234375, -5.66827392578125, -5.432861328125, -5.19744873046875, -4.9620361328125, -4.72662353515625, -4.4912109375, -4.25579833984375, -4.0203857421875, -3.78497314453125, -3.549560546875, -3.31414794921875, -3.0787353515625, -2.84332275390625, -2.60791015625, -2.37249755859375, -2.1370849609375, -1.90167236328125, -1.666259765625, -1.43084716796875, -1.1954345703125, -0.96002197265625, -0.724609375, -0.48919677734375, -0.2537841796875, -0.01837158203125, 0.217041015625, 0.45245361328125, 0.6878662109375, 0.92327880859375, 1.15869140625, 1.39410400390625, 1.6295166015625, 1.86492919921875, 2.100341796875, 2.33575439453125, 2.5711669921875, 2.80657958984375, 3.0419921875, 3.27740478515625, 3.5128173828125, 3.74822998046875, 3.983642578125, 4.21905517578125, 4.4544677734375, 4.68988037109375, 4.92529296875, 5.16070556640625, 5.3961181640625, 5.63153076171875, 5.866943359375, 6.10235595703125, 6.3377685546875, 6.57318115234375, 6.80859375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 7.0, 4.0, 7.0, 7.0, 9.0, 15.0, 17.0, 16.0, 22.0, 32.0, 21.0, 37.0, 31.0, 37.0, 28.0, 42.0, 40.0, 48.0, 46.0, 36.0, 52.0, 49.0, 41.0, 49.0, 28.0, 29.0, 34.0, 39.0, 30.0, 21.0, 28.0, 15.0, 13.0, 17.0, 14.0, 7.0, 5.0, 6.0, 4.0, 6.0, 4.0, 4.0, 3.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.125, -4.970947265625, -4.81689453125, -4.662841796875, -4.5087890625, -4.354736328125, -4.20068359375, -4.046630859375, -3.892578125, -3.738525390625, -3.58447265625, -3.430419921875, -3.2763671875, -3.122314453125, -2.96826171875, -2.814208984375, -2.66015625, -2.506103515625, -2.35205078125, -2.197998046875, -2.0439453125, -1.889892578125, -1.73583984375, -1.581787109375, -1.427734375, -1.273681640625, -1.11962890625, -0.965576171875, -0.8115234375, -0.657470703125, -0.50341796875, -0.349365234375, -0.1953125, -0.041259765625, 0.11279296875, 0.266845703125, 0.4208984375, 0.574951171875, 0.72900390625, 0.883056640625, 1.037109375, 1.191162109375, 1.34521484375, 1.499267578125, 1.6533203125, 1.807373046875, 1.96142578125, 2.115478515625, 2.26953125, 2.423583984375, 2.57763671875, 2.731689453125, 2.8857421875, 3.039794921875, 3.19384765625, 3.347900390625, 3.501953125, 3.656005859375, 3.81005859375, 3.964111328125, 4.1181640625, 4.272216796875, 4.42626953125, 4.580322265625, 4.734375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 8.0, 12.0, 18.0, 22.0, 40.0, 67.0, 121.0, 215.0, 423.0, 856.0, 2189.0, 5817.0, 21999.0, 154106.0, 721656.0, 114426.0, 18033.0, 5016.0, 1865.0, 767.0, 395.0, 205.0, 113.0, 63.0, 30.0, 23.0, 13.0, 12.0, 6.0, 11.0, 4.0, 3.0, 2.0, 1.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.634765625, -3.502288818359375, -3.36981201171875, -3.237335205078125, -3.1048583984375, -2.972381591796875, -2.83990478515625, -2.707427978515625, -2.574951171875, -2.442474365234375, -2.30999755859375, -2.177520751953125, -2.0450439453125, -1.912567138671875, -1.78009033203125, -1.647613525390625, -1.51513671875, -1.382659912109375, -1.25018310546875, -1.117706298828125, -0.9852294921875, -0.852752685546875, -0.72027587890625, -0.587799072265625, -0.455322265625, -0.322845458984375, -0.19036865234375, -0.057891845703125, 0.0745849609375, 0.207061767578125, 0.33953857421875, 0.472015380859375, 0.6044921875, 0.736968994140625, 0.86944580078125, 1.001922607421875, 1.1343994140625, 1.266876220703125, 1.39935302734375, 1.531829833984375, 1.664306640625, 1.796783447265625, 1.92926025390625, 2.061737060546875, 2.1942138671875, 2.326690673828125, 2.45916748046875, 2.591644287109375, 2.72412109375, 2.856597900390625, 2.98907470703125, 3.121551513671875, 3.2540283203125, 3.386505126953125, 3.51898193359375, 3.651458740234375, 3.783935546875, 3.916412353515625, 4.04888916015625, 4.181365966796875, 4.3138427734375, 4.446319580078125, 4.57879638671875, 4.711273193359375, 4.84375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 3.0, 1.0, 3.0, 7.0, 9.0, 22.0, 20.0, 44.0, 63.0, 176.0, 250.0, 170.0, 95.0, 54.0, 28.0, 22.0, 13.0, 9.0, 5.0, 2.0, 3.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0008573532104492188, -0.0008252710103988647, -0.0007931888103485107, -0.0007611066102981567, -0.0007290244102478027, -0.0006969422101974487, -0.0006648600101470947, -0.0006327778100967407, -0.0006006956100463867, -0.0005686134099960327, -0.0005365312099456787, -0.0005044490098953247, -0.0004723668098449707, -0.0004402846097946167, -0.0004082024097442627, -0.0003761202096939087, -0.0003440380096435547, -0.0003119558095932007, -0.0002798736095428467, -0.0002477914094924927, -0.00021570920944213867, -0.00018362700939178467, -0.00015154480934143066, -0.00011946260929107666, -8.738040924072266e-05, -5.529820919036865e-05, -2.321600914001465e-05, 8.866190910339355e-06, 4.094839096069336e-05, 7.303059101104736e-05, 0.00010511279106140137, 0.00013719499111175537, 0.00016927719116210938, 0.00020135939121246338, 0.00023344159126281738, 0.0002655237913131714, 0.0002976059913635254, 0.0003296881914138794, 0.0003617703914642334, 0.0003938525915145874, 0.0004259347915649414, 0.0004580169916152954, 0.0004900991916656494, 0.0005221813917160034, 0.0005542635917663574, 0.0005863457918167114, 0.0006184279918670654, 0.0006505101919174194, 0.0006825923919677734, 0.0007146745920181274, 0.0007467567920684814, 0.0007788389921188354, 0.0008109211921691895, 0.0008430033922195435, 0.0008750855922698975, 0.0009071677923202515, 0.0009392499923706055, 0.0009713321924209595, 0.0010034143924713135, 0.0010354965925216675, 0.0010675787925720215, 0.0010996609926223755, 0.0011317431926727295, 0.0011638253927230835, 0.0011959075927734375]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 3.0, 1.0, 4.0, 0.0, 2.0, 4.0, 8.0, 6.0, 9.0, 12.0, 21.0, 36.0, 49.0, 92.0, 216.0, 581.0, 2335.0, 14412.0, 358721.0, 646734.0, 21029.0, 3027.0, 749.0, 244.0, 104.0, 64.0, 31.0, 26.0, 8.0, 11.0, 7.0, 4.0, 4.0, 3.0, 2.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.34375, -7.099609375, -6.85546875, -6.611328125, -6.3671875, -6.123046875, -5.87890625, -5.634765625, -5.390625, -5.146484375, -4.90234375, -4.658203125, -4.4140625, -4.169921875, -3.92578125, -3.681640625, -3.4375, -3.193359375, -2.94921875, -2.705078125, -2.4609375, -2.216796875, -1.97265625, -1.728515625, -1.484375, -1.240234375, -0.99609375, -0.751953125, -0.5078125, -0.263671875, -0.01953125, 0.224609375, 0.46875, 0.712890625, 0.95703125, 1.201171875, 1.4453125, 1.689453125, 1.93359375, 2.177734375, 2.421875, 2.666015625, 2.91015625, 3.154296875, 3.3984375, 3.642578125, 3.88671875, 4.130859375, 4.375, 4.619140625, 4.86328125, 5.107421875, 5.3515625, 5.595703125, 5.83984375, 6.083984375, 6.328125, 6.572265625, 6.81640625, 7.060546875, 7.3046875, 7.548828125, 7.79296875, 8.037109375, 8.28125]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 3.0, 5.0, 1.0, 8.0, 7.0, 12.0, 14.0, 16.0, 22.0, 35.0, 31.0, 42.0, 59.0, 73.0, 99.0, 117.0, 86.0, 73.0, 83.0, 53.0, 38.0, 27.0, 23.0, 18.0, 16.0, 8.0, 6.0, 11.0, 3.0, 5.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3515625, -2.274566650390625, -2.19757080078125, -2.120574951171875, -2.0435791015625, -1.966583251953125, -1.88958740234375, -1.812591552734375, -1.735595703125, -1.658599853515625, -1.58160400390625, -1.504608154296875, -1.4276123046875, -1.350616455078125, -1.27362060546875, -1.196624755859375, -1.11962890625, -1.042633056640625, -0.96563720703125, -0.888641357421875, -0.8116455078125, -0.734649658203125, -0.65765380859375, -0.580657958984375, -0.503662109375, -0.426666259765625, -0.34967041015625, -0.272674560546875, -0.1956787109375, -0.118682861328125, -0.04168701171875, 0.035308837890625, 0.1123046875, 0.189300537109375, 0.26629638671875, 0.343292236328125, 0.4202880859375, 0.497283935546875, 0.57427978515625, 0.651275634765625, 0.728271484375, 0.805267333984375, 0.88226318359375, 0.959259033203125, 1.0362548828125, 1.113250732421875, 1.19024658203125, 1.267242431640625, 1.34423828125, 1.421234130859375, 1.49822998046875, 1.575225830078125, 1.6522216796875, 1.729217529296875, 1.80621337890625, 1.883209228515625, 1.960205078125, 2.037200927734375, 2.11419677734375, 2.191192626953125, 2.2681884765625, 2.345184326171875, 2.42218017578125, 2.499176025390625, 2.576171875]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 10.0, 7.0, 7.0, 25.0, 34.0, 76.0, 94.0, 164.0, 212.0, 153.0, 82.0, 50.0, 38.0, 19.0, 16.0, 5.0, 8.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.842506408691406, -44.15380859375, -42.465110778808594, -40.77641296386719, -39.08771514892578, -37.399017333984375, -35.7103157043457, -34.0216178894043, -32.33292007446289, -30.644222259521484, -28.955524444580078, -27.26682472229004, -25.578126907348633, -23.889429092407227, -22.200729370117188, -20.51203155517578, -18.823333740234375, -17.13463592529297, -15.445937156677246, -13.757238388061523, -12.068540573120117, -10.379842758178711, -8.691143989562988, -7.002445220947266, -5.313747406005859, -3.625049114227295, -1.9363508224487305, -0.24765253067016602, 1.4410457611083984, 3.1297435760498047, 4.818442344665527, 6.50714111328125, 8.195842742919922, 9.884540557861328, 11.57323932647705, 13.261938095092773, 14.95063591003418, 16.639333724975586, 18.328033447265625, 20.01673126220703, 21.705429077148438, 23.394126892089844, 25.08282470703125, 26.77152442932129, 28.460222244262695, 30.1489200592041, 31.83761978149414, 33.52631759643555, 35.21501541137695, 36.90371322631836, 38.592411041259766, 40.28110885620117, 41.969810485839844, 43.65850830078125, 45.347206115722656, 47.03590393066406, 48.72460174560547, 50.413299560546875, 52.10199737548828, 53.79069519042969, 55.479393005371094, 57.1680908203125, 58.85679244995117, 60.54549026489258, 62.234188079833984]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 3.0, 5.0, 3.0, 10.0, 9.0, 13.0, 20.0, 19.0, 28.0, 44.0, 33.0, 40.0, 48.0, 49.0, 67.0, 50.0, 66.0, 51.0, 61.0, 49.0, 72.0, 43.0, 38.0, 39.0, 30.0, 29.0, 20.0, 18.0, 15.0, 9.0, 7.0, 5.0, 5.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.17283630371094, -41.99806594848633, -40.823299407958984, -39.648529052734375, -38.473758697509766, -37.29899215698242, -36.12422180175781, -34.94945526123047, -33.77468490600586, -32.59991455078125, -31.425146102905273, -30.250377655029297, -29.07560920715332, -27.900840759277344, -26.726070404052734, -25.551301956176758, -24.37653160095215, -23.201763153076172, -22.026992797851562, -20.852224349975586, -19.67745590209961, -18.502685546875, -17.327917098999023, -16.153148651123047, -14.978379249572754, -13.803609848022461, -12.628841400146484, -11.454071998596191, -10.279302597045898, -9.104534149169922, -7.929764747619629, -6.754996299743652, -5.580226898193359, -4.405457973480225, -3.2306888103485107, -2.055919647216797, -0.8811507225036621, 0.29361820220947266, 1.4683876037597656, 2.643156051635742, 3.817925453186035, 4.99269437789917, 6.167463302612305, 7.342232704162598, 8.51700210571289, 9.691770553588867, 10.86653995513916, 12.041308403015137, 13.21607780456543, 14.390847206115723, 15.5656156539917, 16.740385055541992, 17.91515350341797, 19.089923858642578, 20.264692306518555, 21.43946075439453, 22.61423110961914, 23.788999557495117, 24.963769912719727, 26.138538360595703, 27.31330680847168, 28.488075256347656, 29.662845611572266, 30.837614059448242, 32.01238250732422]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 5.0, 6.0, 4.0, 6.0, 7.0, 6.0, 7.0, 16.0, 33.0, 37.0, 48.0, 60.0, 99.0, 130.0, 258.0, 364.0, 619.0, 1194.0, 2657.0, 7277.0, 28026.0, 281387.0, 3777946.0, 71993.0, 14067.0, 4364.0, 1728.0, 837.0, 447.0, 253.0, 147.0, 91.0, 63.0, 37.0, 21.0, 16.0, 9.0, 6.0, 2.0, 6.0, 4.0, 1.0, 4.0, 1.0, 0.0, 3.0, 2.0], "bins": [-14.0390625, -13.681396484375, -13.32373046875, -12.966064453125, -12.6083984375, -12.250732421875, -11.89306640625, -11.535400390625, -11.177734375, -10.820068359375, -10.46240234375, -10.104736328125, -9.7470703125, -9.389404296875, -9.03173828125, -8.674072265625, -8.31640625, -7.958740234375, -7.60107421875, -7.243408203125, -6.8857421875, -6.528076171875, -6.17041015625, -5.812744140625, -5.455078125, -5.097412109375, -4.73974609375, -4.382080078125, -4.0244140625, -3.666748046875, -3.30908203125, -2.951416015625, -2.59375, -2.236083984375, -1.87841796875, -1.520751953125, -1.1630859375, -0.805419921875, -0.44775390625, -0.090087890625, 0.267578125, 0.625244140625, 0.98291015625, 1.340576171875, 1.6982421875, 2.055908203125, 2.41357421875, 2.771240234375, 3.12890625, 3.486572265625, 3.84423828125, 4.201904296875, 4.5595703125, 4.917236328125, 5.27490234375, 5.632568359375, 5.990234375, 6.347900390625, 6.70556640625, 7.063232421875, 7.4208984375, 7.778564453125, 8.13623046875, 8.493896484375, 8.8515625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 10.0, 7.0, 8.0, 7.0, 15.0, 18.0, 31.0, 35.0, 50.0, 61.0, 60.0, 86.0, 90.0, 78.0, 85.0, 80.0, 57.0, 63.0, 37.0, 34.0, 28.0, 22.0, 13.0, 10.0, 5.0, 3.0, 4.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.9765625, -1.91888427734375, -1.8612060546875, -1.80352783203125, -1.745849609375, -1.68817138671875, -1.6304931640625, -1.57281494140625, -1.51513671875, -1.45745849609375, -1.3997802734375, -1.34210205078125, -1.284423828125, -1.22674560546875, -1.1690673828125, -1.11138916015625, -1.0537109375, -0.99603271484375, -0.9383544921875, -0.88067626953125, -0.822998046875, -0.76531982421875, -0.7076416015625, -0.64996337890625, -0.59228515625, -0.53460693359375, -0.4769287109375, -0.41925048828125, -0.361572265625, -0.30389404296875, -0.2462158203125, -0.18853759765625, -0.130859375, -0.07318115234375, -0.0155029296875, 0.04217529296875, 0.099853515625, 0.15753173828125, 0.2152099609375, 0.27288818359375, 0.33056640625, 0.38824462890625, 0.4459228515625, 0.50360107421875, 0.561279296875, 0.61895751953125, 0.6766357421875, 0.73431396484375, 0.7919921875, 0.84967041015625, 0.9073486328125, 0.96502685546875, 1.022705078125, 1.08038330078125, 1.1380615234375, 1.19573974609375, 1.25341796875, 1.31109619140625, 1.3687744140625, 1.42645263671875, 1.484130859375, 1.54180908203125, 1.5994873046875, 1.65716552734375, 1.71484375]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 15.0, 29.0, 44.0, 93.0, 141.0, 472.0, 5681.0, 4121343.0, 64722.0, 1303.0, 265.0, 85.0, 47.0, 20.0, 9.0, 7.0, 3.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.8125, -66.3623046875, -64.912109375, -63.4619140625, -62.01171875, -60.5615234375, -59.111328125, -57.6611328125, -56.2109375, -54.7607421875, -53.310546875, -51.8603515625, -50.41015625, -48.9599609375, -47.509765625, -46.0595703125, -44.609375, -43.1591796875, -41.708984375, -40.2587890625, -38.80859375, -37.3583984375, -35.908203125, -34.4580078125, -33.0078125, -31.5576171875, -30.107421875, -28.6572265625, -27.20703125, -25.7568359375, -24.306640625, -22.8564453125, -21.40625, -19.9560546875, -18.505859375, -17.0556640625, -15.60546875, -14.1552734375, -12.705078125, -11.2548828125, -9.8046875, -8.3544921875, -6.904296875, -5.4541015625, -4.00390625, -2.5537109375, -1.103515625, 0.3466796875, 1.796875, 3.2470703125, 4.697265625, 6.1474609375, 7.59765625, 9.0478515625, 10.498046875, 11.9482421875, 13.3984375, 14.8486328125, 16.298828125, 17.7490234375, 19.19921875, 20.6494140625, 22.099609375, 23.5498046875, 25.0]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 2.0, 2.0, 8.0, 9.0, 6.0, 18.0, 18.0, 45.0, 81.0, 201.0, 723.0, 2380.0, 349.0, 100.0, 55.0, 31.0, 19.0, 13.0, 3.0, 5.0, 4.0, 4.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.7890625, -7.6168212890625, -7.444580078125, -7.2723388671875, -7.10009765625, -6.9278564453125, -6.755615234375, -6.5833740234375, -6.4111328125, -6.2388916015625, -6.066650390625, -5.8944091796875, -5.72216796875, -5.5499267578125, -5.377685546875, -5.2054443359375, -5.033203125, -4.8609619140625, -4.688720703125, -4.5164794921875, -4.34423828125, -4.1719970703125, -3.999755859375, -3.8275146484375, -3.6552734375, -3.4830322265625, -3.310791015625, -3.1385498046875, -2.96630859375, -2.7940673828125, -2.621826171875, -2.4495849609375, -2.27734375, -2.1051025390625, -1.932861328125, -1.7606201171875, -1.58837890625, -1.4161376953125, -1.243896484375, -1.0716552734375, -0.8994140625, -0.7271728515625, -0.554931640625, -0.3826904296875, -0.21044921875, -0.0382080078125, 0.134033203125, 0.3062744140625, 0.478515625, 0.6507568359375, 0.822998046875, 0.9952392578125, 1.16748046875, 1.3397216796875, 1.511962890625, 1.6842041015625, 1.8564453125, 2.0286865234375, 2.200927734375, 2.3731689453125, 2.54541015625, 2.7176513671875, 2.889892578125, 3.0621337890625, 3.234375]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 8.0, 9.0, 22.0, 59.0, 204.0, 355.0, 223.0, 88.0, 22.0, 6.0, 5.0, 4.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-56.79380416870117, -55.32221984863281, -53.85063552856445, -52.37905502319336, -50.907470703125, -49.43588638305664, -47.96430206298828, -46.49272155761719, -45.02113723754883, -43.54955291748047, -42.07796859741211, -40.606388092041016, -39.134803771972656, -37.6632194519043, -36.19163513183594, -34.720054626464844, -33.24846649169922, -31.77688217163086, -30.305299758911133, -28.833715438842773, -27.362133026123047, -25.890548706054688, -24.418964385986328, -22.9473819732666, -21.475799560546875, -20.004215240478516, -18.53263282775879, -17.06104850769043, -15.589466094970703, -14.117881774902344, -12.6462984085083, -11.174715042114258, -9.703132629394531, -8.231549263000488, -6.759965896606445, -5.288382053375244, -3.816798686981201, -2.345215320587158, -0.873631477355957, 0.5979518890380859, 2.069535255432129, 3.541118621826172, 5.012701988220215, 6.484285831451416, 7.955869197845459, 9.427452087402344, 10.899036407470703, 12.370619773864746, 13.842203140258789, 15.313786506652832, 16.785369873046875, 18.256954193115234, 19.72853660583496, 21.20012092590332, 22.671703338623047, 24.143287658691406, 25.614871978759766, 27.086456298828125, 28.55803871154785, 30.02962303161621, 31.501205444335938, 32.9727897644043, 34.444374084472656, 35.91595458984375, 37.38753890991211]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 5.0, 8.0, 7.0, 14.0, 20.0, 20.0, 25.0, 24.0, 38.0, 31.0, 45.0, 44.0, 57.0, 55.0, 66.0, 73.0, 68.0, 62.0, 52.0, 45.0, 46.0, 31.0, 36.0, 24.0, 19.0, 24.0, 16.0, 10.0, 7.0, 7.0, 5.0, 4.0, 1.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.519977569580078, -14.105334281921387, -13.690690994262695, -13.27604866027832, -12.861405372619629, -12.446762084960938, -12.032118797302246, -11.617475509643555, -11.20283317565918, -10.788189888000488, -10.373546600341797, -9.958904266357422, -9.54426097869873, -9.129617691040039, -8.714974403381348, -8.300331115722656, -7.885687828063965, -7.471044540405273, -7.05640172958374, -6.641758441925049, -6.227115631103516, -5.812472343444824, -5.397829055786133, -4.983185768127441, -4.568542957305908, -4.153899669647217, -3.7392568588256836, -3.324613571166992, -2.90997052192688, -2.4953274726867676, -2.080684185028076, -1.6660411357879639, -1.251399040222168, -0.8367559313774109, -0.4221128225326538, -0.007469654083251953, 0.40717339515686035, 0.8218164443969727, 1.236459732055664, 1.6511027812957764, 2.0657458305358887, 2.480388879776001, 2.8950319290161133, 3.3096752166748047, 3.724318265914917, 4.138961315155029, 4.553604602813721, 4.968247413635254, 5.382890701293945, 5.797533988952637, 6.21217679977417, 6.626820087432861, 7.0414628982543945, 7.456106185913086, 7.870749473571777, 8.285392761230469, 8.700035095214844, 9.114678382873535, 9.529321670532227, 9.943964004516602, 10.358607292175293, 10.773250579833984, 11.187893867492676, 11.602537155151367, 12.017180442810059]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 8.0, 10.0, 6.0, 16.0, 18.0, 34.0, 43.0, 62.0, 99.0, 146.0, 244.0, 363.0, 568.0, 905.0, 1751.0, 3577.0, 9822.0, 35497.0, 167777.0, 557105.0, 207384.0, 43046.0, 11319.0, 4129.0, 1900.0, 1019.0, 592.0, 384.0, 269.0, 165.0, 97.0, 64.0, 44.0, 31.0, 25.0, 12.0, 9.0, 8.0, 5.0, 6.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.21875, -8.9356689453125, -8.652587890625, -8.3695068359375, -8.08642578125, -7.8033447265625, -7.520263671875, -7.2371826171875, -6.9541015625, -6.6710205078125, -6.387939453125, -6.1048583984375, -5.82177734375, -5.5386962890625, -5.255615234375, -4.9725341796875, -4.689453125, -4.4063720703125, -4.123291015625, -3.8402099609375, -3.55712890625, -3.2740478515625, -2.990966796875, -2.7078857421875, -2.4248046875, -2.1417236328125, -1.858642578125, -1.5755615234375, -1.29248046875, -1.0093994140625, -0.726318359375, -0.4432373046875, -0.16015625, 0.1229248046875, 0.406005859375, 0.6890869140625, 0.97216796875, 1.2552490234375, 1.538330078125, 1.8214111328125, 2.1044921875, 2.3875732421875, 2.670654296875, 2.9537353515625, 3.23681640625, 3.5198974609375, 3.802978515625, 4.0860595703125, 4.369140625, 4.6522216796875, 4.935302734375, 5.2183837890625, 5.50146484375, 5.7845458984375, 6.067626953125, 6.3507080078125, 6.6337890625, 6.9168701171875, 7.199951171875, 7.4830322265625, 7.76611328125, 8.0491943359375, 8.332275390625, 8.6153564453125, 8.8984375]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 7.0, 11.0, 11.0, 26.0, 33.0, 42.0, 53.0, 79.0, 85.0, 85.0, 95.0, 105.0, 85.0, 84.0, 48.0, 46.0, 36.0, 31.0, 11.0, 6.0, 7.0, 2.0, 1.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.228515625, -2.162017822265625, -2.09552001953125, -2.029022216796875, -1.9625244140625, -1.896026611328125, -1.82952880859375, -1.763031005859375, -1.696533203125, -1.630035400390625, -1.56353759765625, -1.497039794921875, -1.4305419921875, -1.364044189453125, -1.29754638671875, -1.231048583984375, -1.16455078125, -1.098052978515625, -1.03155517578125, -0.965057373046875, -0.8985595703125, -0.832061767578125, -0.76556396484375, -0.699066162109375, -0.632568359375, -0.566070556640625, -0.49957275390625, -0.433074951171875, -0.3665771484375, -0.300079345703125, -0.23358154296875, -0.167083740234375, -0.1005859375, -0.034088134765625, 0.03240966796875, 0.098907470703125, 0.1654052734375, 0.231903076171875, 0.29840087890625, 0.364898681640625, 0.431396484375, 0.497894287109375, 0.56439208984375, 0.630889892578125, 0.6973876953125, 0.763885498046875, 0.83038330078125, 0.896881103515625, 0.96337890625, 1.029876708984375, 1.09637451171875, 1.162872314453125, 1.2293701171875, 1.295867919921875, 1.36236572265625, 1.428863525390625, 1.495361328125, 1.561859130859375, 1.62835693359375, 1.694854736328125, 1.7613525390625, 1.827850341796875, 1.89434814453125, 1.960845947265625, 2.02734375]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 1.0, 4.0, 3.0, 6.0, 9.0, 11.0, 6.0, 13.0, 14.0, 18.0, 36.0, 42.0, 45.0, 54.0, 88.0, 184.0, 358.0, 829.0, 2221.0, 5789.0, 17917.0, 64939.0, 278442.0, 490337.0, 136774.0, 34475.0, 10207.0, 3304.0, 1247.0, 535.0, 218.0, 127.0, 85.0, 61.0, 42.0, 27.0, 17.0, 14.0, 9.0, 8.0, 4.0, 7.0, 6.0, 4.0, 4.0, 4.0, 2.0, 5.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.90234375, -6.6815185546875, -6.460693359375, -6.2398681640625, -6.01904296875, -5.7982177734375, -5.577392578125, -5.3565673828125, -5.1357421875, -4.9149169921875, -4.694091796875, -4.4732666015625, -4.25244140625, -4.0316162109375, -3.810791015625, -3.5899658203125, -3.369140625, -3.1483154296875, -2.927490234375, -2.7066650390625, -2.48583984375, -2.2650146484375, -2.044189453125, -1.8233642578125, -1.6025390625, -1.3817138671875, -1.160888671875, -0.9400634765625, -0.71923828125, -0.4984130859375, -0.277587890625, -0.0567626953125, 0.1640625, 0.3848876953125, 0.605712890625, 0.8265380859375, 1.04736328125, 1.2681884765625, 1.489013671875, 1.7098388671875, 1.9306640625, 2.1514892578125, 2.372314453125, 2.5931396484375, 2.81396484375, 3.0347900390625, 3.255615234375, 3.4764404296875, 3.697265625, 3.9180908203125, 4.138916015625, 4.3597412109375, 4.58056640625, 4.8013916015625, 5.022216796875, 5.2430419921875, 5.4638671875, 5.6846923828125, 5.905517578125, 6.1263427734375, 6.34716796875, 6.5679931640625, 6.788818359375, 7.0096435546875, 7.23046875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 6.0, 4.0, 6.0, 4.0, 6.0, 13.0, 15.0, 12.0, 16.0, 22.0, 15.0, 21.0, 31.0, 24.0, 36.0, 27.0, 36.0, 45.0, 46.0, 50.0, 37.0, 43.0, 41.0, 45.0, 44.0, 32.0, 35.0, 33.0, 24.0, 36.0, 24.0, 21.0, 23.0, 21.0, 24.0, 17.0, 8.0, 9.0, 8.0, 8.0, 7.0, 4.0, 6.0, 4.0, 6.0, 5.0, 3.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0], "bins": [-4.81640625, -4.6702880859375, -4.524169921875, -4.3780517578125, -4.23193359375, -4.0858154296875, -3.939697265625, -3.7935791015625, -3.6474609375, -3.5013427734375, -3.355224609375, -3.2091064453125, -3.06298828125, -2.9168701171875, -2.770751953125, -2.6246337890625, -2.478515625, -2.3323974609375, -2.186279296875, -2.0401611328125, -1.89404296875, -1.7479248046875, -1.601806640625, -1.4556884765625, -1.3095703125, -1.1634521484375, -1.017333984375, -0.8712158203125, -0.72509765625, -0.5789794921875, -0.432861328125, -0.2867431640625, -0.140625, 0.0054931640625, 0.151611328125, 0.2977294921875, 0.44384765625, 0.5899658203125, 0.736083984375, 0.8822021484375, 1.0283203125, 1.1744384765625, 1.320556640625, 1.4666748046875, 1.61279296875, 1.7589111328125, 1.905029296875, 2.0511474609375, 2.197265625, 2.3433837890625, 2.489501953125, 2.6356201171875, 2.78173828125, 2.9278564453125, 3.073974609375, 3.2200927734375, 3.3662109375, 3.5123291015625, 3.658447265625, 3.8045654296875, 3.95068359375, 4.0968017578125, 4.242919921875, 4.3890380859375, 4.53515625]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 9.0, 4.0, 1.0, 6.0, 8.0, 8.0, 16.0, 13.0, 31.0, 71.0, 124.0, 313.0, 680.0, 1901.0, 7084.0, 60144.0, 886713.0, 79874.0, 7956.0, 2194.0, 771.0, 310.0, 141.0, 67.0, 42.0, 21.0, 16.0, 7.0, 7.0, 6.0, 5.0, 4.0, 0.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.2734375, -7.0428466796875, -6.812255859375, -6.5816650390625, -6.35107421875, -6.1204833984375, -5.889892578125, -5.6593017578125, -5.4287109375, -5.1981201171875, -4.967529296875, -4.7369384765625, -4.50634765625, -4.2757568359375, -4.045166015625, -3.8145751953125, -3.583984375, -3.3533935546875, -3.122802734375, -2.8922119140625, -2.66162109375, -2.4310302734375, -2.200439453125, -1.9698486328125, -1.7392578125, -1.5086669921875, -1.278076171875, -1.0474853515625, -0.81689453125, -0.5863037109375, -0.355712890625, -0.1251220703125, 0.10546875, 0.3360595703125, 0.566650390625, 0.7972412109375, 1.02783203125, 1.2584228515625, 1.489013671875, 1.7196044921875, 1.9501953125, 2.1807861328125, 2.411376953125, 2.6419677734375, 2.87255859375, 3.1031494140625, 3.333740234375, 3.5643310546875, 3.794921875, 4.0255126953125, 4.256103515625, 4.4866943359375, 4.71728515625, 4.9478759765625, 5.178466796875, 5.4090576171875, 5.6396484375, 5.8702392578125, 6.100830078125, 6.3314208984375, 6.56201171875, 6.7926025390625, 7.023193359375, 7.2537841796875, 7.484375]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 5.0, 1.0, 2.0, 4.0, 4.0, 9.0, 13.0, 19.0, 30.0, 43.0, 72.0, 102.0, 116.0, 129.0, 118.0, 88.0, 74.0, 50.0, 33.0, 23.0, 20.0, 9.0, 14.0, 9.0, 6.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005321502685546875, -0.0005148202180862427, -0.0004974901676177979, -0.00048016011714935303, -0.0004628300666809082, -0.0004455000162124634, -0.00042816996574401855, -0.00041083991527557373, -0.0003935098648071289, -0.0003761798143386841, -0.00035884976387023926, -0.00034151971340179443, -0.0003241896629333496, -0.0003068596124649048, -0.00028952956199645996, -0.00027219951152801514, -0.0002548694610595703, -0.0002375394105911255, -0.00022020936012268066, -0.00020287930965423584, -0.00018554925918579102, -0.0001682192087173462, -0.00015088915824890137, -0.00013355910778045654, -0.00011622905731201172, -9.88990068435669e-05, -8.156895637512207e-05, -6.423890590667725e-05, -4.690885543823242e-05, -2.9578804969787598e-05, -1.2248754501342773e-05, 5.081295967102051e-06, 2.2411346435546875e-05, 3.97413969039917e-05, 5.7071447372436523e-05, 7.440149784088135e-05, 9.173154830932617e-05, 0.000109061598777771, 0.00012639164924621582, 0.00014372169971466064, 0.00016105175018310547, 0.0001783818006515503, 0.00019571185111999512, 0.00021304190158843994, 0.00023037195205688477, 0.0002477020025253296, 0.0002650320529937744, 0.00028236210346221924, 0.00029969215393066406, 0.0003170222043991089, 0.0003343522548675537, 0.00035168230533599854, 0.00036901235580444336, 0.0003863424062728882, 0.000403672456741333, 0.00042100250720977783, 0.00043833255767822266, 0.0004556626081466675, 0.0004729926586151123, 0.0004903227090835571, 0.000507652759552002, 0.0005249828100204468, 0.0005423128604888916, 0.0005596429109573364, 0.0005769729614257812]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 9.0, 5.0, 23.0, 27.0, 40.0, 84.0, 182.0, 443.0, 1587.0, 8607.0, 309516.0, 713308.0, 11816.0, 1904.0, 567.0, 196.0, 103.0, 48.0, 33.0, 17.0, 13.0, 4.0, 5.0, 6.0, 1.0, 2.0, 9.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.2109375, -8.8829345703125, -8.554931640625, -8.2269287109375, -7.89892578125, -7.5709228515625, -7.242919921875, -6.9149169921875, -6.5869140625, -6.2589111328125, -5.930908203125, -5.6029052734375, -5.27490234375, -4.9468994140625, -4.618896484375, -4.2908935546875, -3.962890625, -3.6348876953125, -3.306884765625, -2.9788818359375, -2.65087890625, -2.3228759765625, -1.994873046875, -1.6668701171875, -1.3388671875, -1.0108642578125, -0.682861328125, -0.3548583984375, -0.02685546875, 0.3011474609375, 0.629150390625, 0.9571533203125, 1.28515625, 1.6131591796875, 1.941162109375, 2.2691650390625, 2.59716796875, 2.9251708984375, 3.253173828125, 3.5811767578125, 3.9091796875, 4.2371826171875, 4.565185546875, 4.8931884765625, 5.22119140625, 5.5491943359375, 5.877197265625, 6.2052001953125, 6.533203125, 6.8612060546875, 7.189208984375, 7.5172119140625, 7.84521484375, 8.1732177734375, 8.501220703125, 8.8292236328125, 9.1572265625, 9.4852294921875, 9.813232421875, 10.1412353515625, 10.46923828125, 10.7972412109375, 11.125244140625, 11.4532470703125, 11.78125]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 5.0, 5.0, 11.0, 12.0, 31.0, 42.0, 90.0, 200.0, 229.0, 171.0, 90.0, 45.0, 24.0, 12.0, 12.0, 11.0, 8.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.61328125, -3.43609619140625, -3.2589111328125, -3.08172607421875, -2.904541015625, -2.72735595703125, -2.5501708984375, -2.37298583984375, -2.19580078125, -2.01861572265625, -1.8414306640625, -1.66424560546875, -1.487060546875, -1.30987548828125, -1.1326904296875, -0.95550537109375, -0.7783203125, -0.60113525390625, -0.4239501953125, -0.24676513671875, -0.069580078125, 0.10760498046875, 0.2847900390625, 0.46197509765625, 0.63916015625, 0.81634521484375, 0.9935302734375, 1.17071533203125, 1.347900390625, 1.52508544921875, 1.7022705078125, 1.87945556640625, 2.056640625, 2.23382568359375, 2.4110107421875, 2.58819580078125, 2.765380859375, 2.94256591796875, 3.1197509765625, 3.29693603515625, 3.47412109375, 3.65130615234375, 3.8284912109375, 4.00567626953125, 4.182861328125, 4.36004638671875, 4.5372314453125, 4.71441650390625, 4.8916015625, 5.06878662109375, 5.2459716796875, 5.42315673828125, 5.600341796875, 5.77752685546875, 5.9547119140625, 6.13189697265625, 6.30908203125, 6.48626708984375, 6.6634521484375, 6.84063720703125, 7.017822265625, 7.19500732421875, 7.3721923828125, 7.54937744140625, 7.7265625]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 5.0, 5.0, 10.0, 10.0, 19.0, 34.0, 59.0, 111.0, 175.0, 200.0, 160.0, 106.0, 49.0, 31.0, 17.0, 8.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.546142578125, -57.65206527709961, -55.75798797607422, -53.863914489746094, -51.9698371887207, -50.07575988769531, -48.18168258666992, -46.28760528564453, -44.393531799316406, -42.499454498291016, -40.605377197265625, -38.7113037109375, -36.81722640991211, -34.92314910888672, -33.02907180786133, -31.134994506835938, -29.240917205810547, -27.346839904785156, -25.4527645111084, -23.558687210083008, -21.66461181640625, -19.77053451538086, -17.87645721435547, -15.982380867004395, -14.08830451965332, -12.194228172302246, -10.300151824951172, -8.406074523925781, -6.511998176574707, -4.617921829223633, -2.723844528198242, -0.829768180847168, 1.0643081665039062, 2.9583847522735596, 4.852461338043213, 6.746538162231445, 8.64061450958252, 10.534690856933594, 12.428768157958984, 14.322844505310059, 16.216920852661133, 18.110998153686523, 20.00507354736328, 21.899150848388672, 23.793228149414062, 25.68730354309082, 27.58138084411621, 29.47545623779297, 31.36953353881836, 33.26361083984375, 35.15768814086914, 37.05176544189453, 38.945838928222656, 40.83991622924805, 42.73399353027344, 44.62807083129883, 46.52214813232422, 48.41622543334961, 50.310302734375, 52.204376220703125, 54.098453521728516, 55.992530822753906, 57.8866081237793, 59.78068542480469, 61.67475891113281]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 4.0, 1.0, 4.0, 11.0, 12.0, 9.0, 13.0, 20.0, 19.0, 25.0, 35.0, 28.0, 34.0, 54.0, 47.0, 51.0, 53.0, 60.0, 64.0, 56.0, 53.0, 47.0, 45.0, 44.0, 35.0, 31.0, 36.0, 26.0, 14.0, 13.0, 18.0, 15.0, 9.0, 6.0, 6.0, 5.0, 2.0, 0.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-31.852067947387695, -30.700029373168945, -29.547992706298828, -28.395954132080078, -27.243915557861328, -26.09187889099121, -24.93984031677246, -23.787803649902344, -22.635765075683594, -21.483726501464844, -20.331689834594727, -19.179651260375977, -18.02761459350586, -16.87557601928711, -15.72353744506836, -14.571499824523926, -13.419462203979492, -12.267424583435059, -11.115386962890625, -9.963348388671875, -8.811310768127441, -7.659273147583008, -6.507235050201416, -5.355196952819824, -4.203159332275391, -3.051121473312378, -1.8990836143493652, -0.7470457553863525, 0.40499210357666016, 1.5570297241210938, 2.7090678215026855, 3.8611059188842773, 5.013145446777344, 6.165183067321777, 7.317221164703369, 8.469259262084961, 9.621296882629395, 10.773334503173828, 11.925373077392578, 13.077410697937012, 14.229448318481445, 15.381485939025879, 16.533523559570312, 17.685562133789062, 18.837600708007812, 19.98963737487793, 21.14167594909668, 22.293712615966797, 23.445751190185547, 24.597789764404297, 25.749826431274414, 26.901865005493164, 28.05390167236328, 29.20594024658203, 30.35797882080078, 31.51001739501953, 32.66205596923828, 33.81409454345703, 34.96613311767578, 36.118167877197266, 37.270206451416016, 38.422245025634766, 39.574283599853516, 40.726322174072266, 41.87835693359375]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 3.0, 3.0, 9.0, 9.0, 6.0, 11.0, 19.0, 18.0, 33.0, 49.0, 53.0, 87.0, 143.0, 254.0, 484.0, 801.0, 1899.0, 6020.0, 26776.0, 2463611.0, 1657960.0, 26357.0, 5933.0, 1936.0, 841.0, 361.0, 258.0, 121.0, 88.0, 48.0, 29.0, 22.0, 14.0, 4.0, 9.0, 8.0, 2.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.265625, -11.8939208984375, -11.522216796875, -11.1505126953125, -10.77880859375, -10.4071044921875, -10.035400390625, -9.6636962890625, -9.2919921875, -8.9202880859375, -8.548583984375, -8.1768798828125, -7.80517578125, -7.4334716796875, -7.061767578125, -6.6900634765625, -6.318359375, -5.9466552734375, -5.574951171875, -5.2032470703125, -4.83154296875, -4.4598388671875, -4.088134765625, -3.7164306640625, -3.3447265625, -2.9730224609375, -2.601318359375, -2.2296142578125, -1.85791015625, -1.4862060546875, -1.114501953125, -0.7427978515625, -0.37109375, 0.0006103515625, 0.372314453125, 0.7440185546875, 1.11572265625, 1.4874267578125, 1.859130859375, 2.2308349609375, 2.6025390625, 2.9742431640625, 3.345947265625, 3.7176513671875, 4.08935546875, 4.4610595703125, 4.832763671875, 5.2044677734375, 5.576171875, 5.9478759765625, 6.319580078125, 6.6912841796875, 7.06298828125, 7.4346923828125, 7.806396484375, 8.1781005859375, 8.5498046875, 8.9215087890625, 9.293212890625, 9.6649169921875, 10.03662109375, 10.4083251953125, 10.780029296875, 11.1517333984375, 11.5234375]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 8.0, 15.0, 27.0, 34.0, 33.0, 60.0, 71.0, 73.0, 85.0, 121.0, 103.0, 93.0, 66.0, 71.0, 36.0, 36.0, 26.0, 9.0, 10.0, 5.0, 2.0, 4.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.59375, -2.516510009765625, -2.43927001953125, -2.362030029296875, -2.2847900390625, -2.207550048828125, -2.13031005859375, -2.053070068359375, -1.975830078125, -1.898590087890625, -1.82135009765625, -1.744110107421875, -1.6668701171875, -1.589630126953125, -1.51239013671875, -1.435150146484375, -1.35791015625, -1.280670166015625, -1.20343017578125, -1.126190185546875, -1.0489501953125, -0.971710205078125, -0.89447021484375, -0.817230224609375, -0.739990234375, -0.662750244140625, -0.58551025390625, -0.508270263671875, -0.4310302734375, -0.353790283203125, -0.27655029296875, -0.199310302734375, -0.1220703125, -0.044830322265625, 0.03240966796875, 0.109649658203125, 0.1868896484375, 0.264129638671875, 0.34136962890625, 0.418609619140625, 0.495849609375, 0.573089599609375, 0.65032958984375, 0.727569580078125, 0.8048095703125, 0.882049560546875, 0.95928955078125, 1.036529541015625, 1.11376953125, 1.191009521484375, 1.26824951171875, 1.345489501953125, 1.4227294921875, 1.499969482421875, 1.57720947265625, 1.654449462890625, 1.731689453125, 1.808929443359375, 1.88616943359375, 1.963409423828125, 2.0406494140625, 2.117889404296875, 2.19512939453125, 2.272369384765625, 2.349609375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 5.0, 6.0, 12.0, 20.0, 31.0, 37.0, 60.0, 80.0, 210.0, 741.0, 5721.0, 150389.0, 4017096.0, 17554.0, 1702.0, 336.0, 110.0, 58.0, 42.0, 23.0, 16.0, 12.0, 11.0, 6.0, 4.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.625, -20.953857421875, -20.28271484375, -19.611572265625, -18.9404296875, -18.269287109375, -17.59814453125, -16.927001953125, -16.255859375, -15.584716796875, -14.91357421875, -14.242431640625, -13.5712890625, -12.900146484375, -12.22900390625, -11.557861328125, -10.88671875, -10.215576171875, -9.54443359375, -8.873291015625, -8.2021484375, -7.531005859375, -6.85986328125, -6.188720703125, -5.517578125, -4.846435546875, -4.17529296875, -3.504150390625, -2.8330078125, -2.161865234375, -1.49072265625, -0.819580078125, -0.1484375, 0.522705078125, 1.19384765625, 1.864990234375, 2.5361328125, 3.207275390625, 3.87841796875, 4.549560546875, 5.220703125, 5.891845703125, 6.56298828125, 7.234130859375, 7.9052734375, 8.576416015625, 9.24755859375, 9.918701171875, 10.58984375, 11.260986328125, 11.93212890625, 12.603271484375, 13.2744140625, 13.945556640625, 14.61669921875, 15.287841796875, 15.958984375, 16.630126953125, 17.30126953125, 17.972412109375, 18.6435546875, 19.314697265625, 19.98583984375, 20.656982421875, 21.328125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 7.0, 7.0, 10.0, 7.0, 17.0, 30.0, 70.0, 168.0, 848.0, 2594.0, 186.0, 62.0, 31.0, 17.0, 14.0, 9.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5859375, -7.421295166015625, -7.25665283203125, -7.092010498046875, -6.9273681640625, -6.762725830078125, -6.59808349609375, -6.433441162109375, -6.268798828125, -6.104156494140625, -5.93951416015625, -5.774871826171875, -5.6102294921875, -5.445587158203125, -5.28094482421875, -5.116302490234375, -4.95166015625, -4.787017822265625, -4.62237548828125, -4.457733154296875, -4.2930908203125, -4.128448486328125, -3.96380615234375, -3.799163818359375, -3.634521484375, -3.469879150390625, -3.30523681640625, -3.140594482421875, -2.9759521484375, -2.811309814453125, -2.64666748046875, -2.482025146484375, -2.3173828125, -2.152740478515625, -1.98809814453125, -1.823455810546875, -1.6588134765625, -1.494171142578125, -1.32952880859375, -1.164886474609375, -1.000244140625, -0.835601806640625, -0.67095947265625, -0.506317138671875, -0.3416748046875, -0.177032470703125, -0.01239013671875, 0.152252197265625, 0.31689453125, 0.481536865234375, 0.64617919921875, 0.810821533203125, 0.9754638671875, 1.140106201171875, 1.30474853515625, 1.469390869140625, 1.634033203125, 1.798675537109375, 1.96331787109375, 2.127960205078125, 2.2926025390625, 2.457244873046875, 2.62188720703125, 2.786529541015625, 2.951171875]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 5.0, 13.0, 15.0, 66.0, 171.0, 371.0, 221.0, 96.0, 29.0, 10.0, 10.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.182174682617188, -18.104724884033203, -17.02727699279785, -15.949827194213867, -14.8723783493042, -13.794929504394531, -12.717479705810547, -11.640030860900879, -10.562582015991211, -9.485133171081543, -8.407684326171875, -7.330234527587891, -6.252785682678223, -5.175336837768555, -4.0978875160217285, -3.0204381942749023, -1.9429893493652344, -0.8655402660369873, 0.21190881729125977, 1.2893579006195068, 2.366806983947754, 3.444255828857422, 4.521705150604248, 5.599154472351074, 6.676603317260742, 7.75405216217041, 8.831501007080078, 9.908950805664062, 10.98639965057373, 12.063848495483398, 13.141298294067383, 14.21874713897705, 15.296192169189453, 16.373641967773438, 17.45108985900879, 18.528539657592773, 19.605987548828125, 20.68343734741211, 21.760887145996094, 22.838336944580078, 23.91578483581543, 24.993234634399414, 26.070682525634766, 27.14813232421875, 28.225582122802734, 29.303030014038086, 30.38047981262207, 31.457927703857422, 32.535377502441406, 33.61282730102539, 34.690277099609375, 35.767723083496094, 36.84517288208008, 37.92262268066406, 39.00007247924805, 40.07752227783203, 41.15496826171875, 42.232418060302734, 43.30986785888672, 44.38731384277344, 45.46476364135742, 46.542213439941406, 47.61966323852539, 48.697113037109375, 49.77456283569336]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 7.0, 5.0, 7.0, 5.0, 14.0, 10.0, 8.0, 11.0, 19.0, 17.0, 17.0, 18.0, 29.0, 18.0, 25.0, 32.0, 30.0, 41.0, 35.0, 43.0, 44.0, 42.0, 38.0, 42.0, 49.0, 30.0, 31.0, 42.0, 34.0, 23.0, 26.0, 16.0, 26.0, 21.0, 26.0, 15.0, 11.0, 10.0, 12.0, 14.0, 11.0, 8.0, 9.0, 4.0, 6.0, 4.0, 3.0, 3.0, 2.0, 1.0, 5.0, 1.0, 0.0, 2.0, 2.0], "bins": [-6.807323932647705, -6.588171482086182, -6.369019031524658, -6.149866104125977, -5.930713653564453, -5.71156120300293, -5.492408752441406, -5.273256301879883, -5.054103851318359, -4.834951400756836, -4.6157989501953125, -4.396646499633789, -4.177493572235107, -3.958341121673584, -3.7391886711120605, -3.520036220550537, -3.3008832931518555, -3.081730842590332, -2.8625781536102295, -2.643425703048706, -2.4242730140686035, -2.20512056350708, -1.9859681129455566, -1.7668155431747437, -1.5476629734039307, -1.3285104036331177, -1.1093578338623047, -0.8902053833007812, -0.6710528135299683, -0.4519002437591553, -0.23274779319763184, -0.013595223426818848, 0.20555782318115234, 0.42471036314964294, 0.6438629031181335, 0.8630154132843018, 1.0821679830551147, 1.3013205528259277, 1.5204730033874512, 1.7396255731582642, 1.9587781429290771, 2.1779305934906006, 2.397083282470703, 2.6162357330322266, 2.83538818359375, 3.0545408725738525, 3.273693323135376, 3.4928460121154785, 3.711998462677002, 3.9311509132385254, 4.150303363800049, 4.3694562911987305, 4.588608741760254, 4.807761192321777, 5.026913642883301, 5.246066093444824, 5.465218544006348, 5.684370994567871, 5.9035234451293945, 6.122675895690918, 6.3418288230896, 6.560981273651123, 6.7801337242126465, 6.99928617477417, 7.218439102172852]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 5.0, 9.0, 15.0, 10.0, 21.0, 21.0, 24.0, 50.0, 70.0, 87.0, 144.0, 204.0, 222.0, 412.0, 623.0, 942.0, 1732.0, 3327.0, 7465.0, 20194.0, 64971.0, 234721.0, 472827.0, 166232.0, 46149.0, 15280.0, 5889.0, 2758.0, 1491.0, 884.0, 546.0, 355.0, 273.0, 170.0, 115.0, 82.0, 63.0, 51.0, 37.0, 22.0, 17.0, 14.0, 12.0, 7.0, 7.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.19140625, -6.95458984375, -6.7177734375, -6.48095703125, -6.244140625, -6.00732421875, -5.7705078125, -5.53369140625, -5.296875, -5.06005859375, -4.8232421875, -4.58642578125, -4.349609375, -4.11279296875, -3.8759765625, -3.63916015625, -3.40234375, -3.16552734375, -2.9287109375, -2.69189453125, -2.455078125, -2.21826171875, -1.9814453125, -1.74462890625, -1.5078125, -1.27099609375, -1.0341796875, -0.79736328125, -0.560546875, -0.32373046875, -0.0869140625, 0.14990234375, 0.38671875, 0.62353515625, 0.8603515625, 1.09716796875, 1.333984375, 1.57080078125, 1.8076171875, 2.04443359375, 2.28125, 2.51806640625, 2.7548828125, 2.99169921875, 3.228515625, 3.46533203125, 3.7021484375, 3.93896484375, 4.17578125, 4.41259765625, 4.6494140625, 4.88623046875, 5.123046875, 5.35986328125, 5.5966796875, 5.83349609375, 6.0703125, 6.30712890625, 6.5439453125, 6.78076171875, 7.017578125, 7.25439453125, 7.4912109375, 7.72802734375, 7.96484375]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 14.0, 13.0, 32.0, 37.0, 42.0, 63.0, 87.0, 91.0, 113.0, 100.0, 100.0, 86.0, 63.0, 49.0, 37.0, 26.0, 14.0, 9.0, 5.0, 1.0, 8.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.64453125, -2.5616455078125, -2.478759765625, -2.3958740234375, -2.31298828125, -2.2301025390625, -2.147216796875, -2.0643310546875, -1.9814453125, -1.8985595703125, -1.815673828125, -1.7327880859375, -1.64990234375, -1.5670166015625, -1.484130859375, -1.4012451171875, -1.318359375, -1.2354736328125, -1.152587890625, -1.0697021484375, -0.98681640625, -0.9039306640625, -0.821044921875, -0.7381591796875, -0.6552734375, -0.5723876953125, -0.489501953125, -0.4066162109375, -0.32373046875, -0.2408447265625, -0.157958984375, -0.0750732421875, 0.0078125, 0.0906982421875, 0.173583984375, 0.2564697265625, 0.33935546875, 0.4222412109375, 0.505126953125, 0.5880126953125, 0.6708984375, 0.7537841796875, 0.836669921875, 0.9195556640625, 1.00244140625, 1.0853271484375, 1.168212890625, 1.2510986328125, 1.333984375, 1.4168701171875, 1.499755859375, 1.5826416015625, 1.66552734375, 1.7484130859375, 1.831298828125, 1.9141845703125, 1.9970703125, 2.0799560546875, 2.162841796875, 2.2457275390625, 2.32861328125, 2.4114990234375, 2.494384765625, 2.5772705078125, 2.66015625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 4.0, 1.0, 4.0, 6.0, 6.0, 6.0, 7.0, 18.0, 12.0, 24.0, 28.0, 41.0, 57.0, 78.0, 157.0, 305.0, 649.0, 1650.0, 4633.0, 14728.0, 56839.0, 250105.0, 524594.0, 145214.0, 34437.0, 9646.0, 3070.0, 1151.0, 488.0, 238.0, 113.0, 87.0, 33.0, 24.0, 21.0, 21.0, 17.0, 5.0, 9.0, 11.0, 7.0, 4.0, 4.0, 0.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.1875, -6.950439453125, -6.71337890625, -6.476318359375, -6.2392578125, -6.002197265625, -5.76513671875, -5.528076171875, -5.291015625, -5.053955078125, -4.81689453125, -4.579833984375, -4.3427734375, -4.105712890625, -3.86865234375, -3.631591796875, -3.39453125, -3.157470703125, -2.92041015625, -2.683349609375, -2.4462890625, -2.209228515625, -1.97216796875, -1.735107421875, -1.498046875, -1.260986328125, -1.02392578125, -0.786865234375, -0.5498046875, -0.312744140625, -0.07568359375, 0.161376953125, 0.3984375, 0.635498046875, 0.87255859375, 1.109619140625, 1.3466796875, 1.583740234375, 1.82080078125, 2.057861328125, 2.294921875, 2.531982421875, 2.76904296875, 3.006103515625, 3.2431640625, 3.480224609375, 3.71728515625, 3.954345703125, 4.19140625, 4.428466796875, 4.66552734375, 4.902587890625, 5.1396484375, 5.376708984375, 5.61376953125, 5.850830078125, 6.087890625, 6.324951171875, 6.56201171875, 6.799072265625, 7.0361328125, 7.273193359375, 7.51025390625, 7.747314453125, 7.984375]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 13.0, 8.0, 10.0, 14.0, 9.0, 11.0, 22.0, 22.0, 23.0, 29.0, 27.0, 27.0, 28.0, 43.0, 37.0, 37.0, 32.0, 45.0, 42.0, 41.0, 45.0, 32.0, 39.0, 44.0, 39.0, 27.0, 21.0, 24.0, 22.0, 27.0, 21.0, 23.0, 25.0, 14.0, 5.0, 13.0, 9.0, 9.0, 5.0, 10.0, 7.0, 4.0, 4.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.21484375, -5.0567626953125, -4.898681640625, -4.7406005859375, -4.58251953125, -4.4244384765625, -4.266357421875, -4.1082763671875, -3.9501953125, -3.7921142578125, -3.634033203125, -3.4759521484375, -3.31787109375, -3.1597900390625, -3.001708984375, -2.8436279296875, -2.685546875, -2.5274658203125, -2.369384765625, -2.2113037109375, -2.05322265625, -1.8951416015625, -1.737060546875, -1.5789794921875, -1.4208984375, -1.2628173828125, -1.104736328125, -0.9466552734375, -0.78857421875, -0.6304931640625, -0.472412109375, -0.3143310546875, -0.15625, 0.0018310546875, 0.159912109375, 0.3179931640625, 0.47607421875, 0.6341552734375, 0.792236328125, 0.9503173828125, 1.1083984375, 1.2664794921875, 1.424560546875, 1.5826416015625, 1.74072265625, 1.8988037109375, 2.056884765625, 2.2149658203125, 2.373046875, 2.5311279296875, 2.689208984375, 2.8472900390625, 3.00537109375, 3.1634521484375, 3.321533203125, 3.4796142578125, 3.6376953125, 3.7957763671875, 3.953857421875, 4.1119384765625, 4.27001953125, 4.4281005859375, 4.586181640625, 4.7442626953125, 4.90234375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 4.0, 5.0, 8.0, 6.0, 7.0, 10.0, 24.0, 19.0, 34.0, 45.0, 91.0, 162.0, 296.0, 614.0, 1337.0, 3231.0, 9579.0, 48739.0, 639140.0, 307368.0, 26496.0, 6717.0, 2449.0, 1139.0, 454.0, 252.0, 117.0, 76.0, 36.0, 31.0, 16.0, 11.0, 10.0, 13.0, 6.0, 3.0, 2.0, 6.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.56640625, -5.3916015625, -5.216796875, -5.0419921875, -4.8671875, -4.6923828125, -4.517578125, -4.3427734375, -4.16796875, -3.9931640625, -3.818359375, -3.6435546875, -3.46875, -3.2939453125, -3.119140625, -2.9443359375, -2.76953125, -2.5947265625, -2.419921875, -2.2451171875, -2.0703125, -1.8955078125, -1.720703125, -1.5458984375, -1.37109375, -1.1962890625, -1.021484375, -0.8466796875, -0.671875, -0.4970703125, -0.322265625, -0.1474609375, 0.02734375, 0.2021484375, 0.376953125, 0.5517578125, 0.7265625, 0.9013671875, 1.076171875, 1.2509765625, 1.42578125, 1.6005859375, 1.775390625, 1.9501953125, 2.125, 2.2998046875, 2.474609375, 2.6494140625, 2.82421875, 2.9990234375, 3.173828125, 3.3486328125, 3.5234375, 3.6982421875, 3.873046875, 4.0478515625, 4.22265625, 4.3974609375, 4.572265625, 4.7470703125, 4.921875, 5.0966796875, 5.271484375, 5.4462890625, 5.62109375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 5.0, 3.0, 13.0, 12.0, 20.0, 31.0, 58.0, 72.0, 133.0, 173.0, 175.0, 106.0, 77.0, 44.0, 23.0, 14.0, 9.0, 9.0, 3.0, 5.0, 4.0, 6.0, 1.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005078315734863281, -0.00048207491636276245, -0.0004563182592391968, -0.0004305616021156311, -0.00040480494499206543, -0.00037904828786849976, -0.0003532916307449341, -0.0003275349736213684, -0.00030177831649780273, -0.00027602165937423706, -0.0002502650022506714, -0.0002245083451271057, -0.00019875168800354004, -0.00017299503087997437, -0.0001472383737564087, -0.00012148171663284302, -9.572505950927734e-05, -6.996840238571167e-05, -4.4211745262145996e-05, -1.8455088138580322e-05, 7.3015689849853516e-06, 3.3058226108551025e-05, 5.88148832321167e-05, 8.457154035568237e-05, 0.00011032819747924805, 0.00013608485460281372, 0.0001618415117263794, 0.00018759816884994507, 0.00021335482597351074, 0.00023911148309707642, 0.0002648681402206421, 0.00029062479734420776, 0.00031638145446777344, 0.0003421381115913391, 0.0003678947687149048, 0.00039365142583847046, 0.00041940808296203613, 0.0004451647400856018, 0.0004709213972091675, 0.0004966780543327332, 0.0005224347114562988, 0.0005481913685798645, 0.0005739480257034302, 0.0005997046828269958, 0.0006254613399505615, 0.0006512179970741272, 0.0006769746541976929, 0.0007027313113212585, 0.0007284879684448242, 0.0007542446255683899, 0.0007800012826919556, 0.0008057579398155212, 0.0008315145969390869, 0.0008572712540626526, 0.0008830279111862183, 0.0009087845683097839, 0.0009345412254333496, 0.0009602978825569153, 0.000986054539680481, 0.0010118111968040466, 0.0010375678539276123, 0.001063324511051178, 0.0010890811681747437, 0.0011148378252983093, 0.001140594482421875]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 11.0, 6.0, 12.0, 16.0, 29.0, 62.0, 143.0, 409.0, 1559.0, 8687.0, 520446.0, 506274.0, 8678.0, 1476.0, 450.0, 138.0, 68.0, 29.0, 13.0, 15.0, 7.0, 8.0, 4.0, 3.0, 5.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.578125, -13.200927734375, -12.82373046875, -12.446533203125, -12.0693359375, -11.692138671875, -11.31494140625, -10.937744140625, -10.560546875, -10.183349609375, -9.80615234375, -9.428955078125, -9.0517578125, -8.674560546875, -8.29736328125, -7.920166015625, -7.54296875, -7.165771484375, -6.78857421875, -6.411376953125, -6.0341796875, -5.656982421875, -5.27978515625, -4.902587890625, -4.525390625, -4.148193359375, -3.77099609375, -3.393798828125, -3.0166015625, -2.639404296875, -2.26220703125, -1.885009765625, -1.5078125, -1.130615234375, -0.75341796875, -0.376220703125, 0.0009765625, 0.378173828125, 0.75537109375, 1.132568359375, 1.509765625, 1.886962890625, 2.26416015625, 2.641357421875, 3.0185546875, 3.395751953125, 3.77294921875, 4.150146484375, 4.52734375, 4.904541015625, 5.28173828125, 5.658935546875, 6.0361328125, 6.413330078125, 6.79052734375, 7.167724609375, 7.544921875, 7.922119140625, 8.29931640625, 8.676513671875, 9.0537109375, 9.430908203125, 9.80810546875, 10.185302734375, 10.5625]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 3.0, 3.0, 9.0, 3.0, 5.0, 10.0, 19.0, 27.0, 50.0, 89.0, 117.0, 143.0, 148.0, 100.0, 99.0, 56.0, 44.0, 24.0, 12.0, 8.0, 9.0, 3.0, 7.0, 4.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8828125, -4.7479248046875, -4.613037109375, -4.4781494140625, -4.34326171875, -4.2083740234375, -4.073486328125, -3.9385986328125, -3.8037109375, -3.6688232421875, -3.533935546875, -3.3990478515625, -3.26416015625, -3.1292724609375, -2.994384765625, -2.8594970703125, -2.724609375, -2.5897216796875, -2.454833984375, -2.3199462890625, -2.18505859375, -2.0501708984375, -1.915283203125, -1.7803955078125, -1.6455078125, -1.5106201171875, -1.375732421875, -1.2408447265625, -1.10595703125, -0.9710693359375, -0.836181640625, -0.7012939453125, -0.56640625, -0.4315185546875, -0.296630859375, -0.1617431640625, -0.02685546875, 0.1080322265625, 0.242919921875, 0.3778076171875, 0.5126953125, 0.6475830078125, 0.782470703125, 0.9173583984375, 1.05224609375, 1.1871337890625, 1.322021484375, 1.4569091796875, 1.591796875, 1.7266845703125, 1.861572265625, 1.9964599609375, 2.13134765625, 2.2662353515625, 2.401123046875, 2.5360107421875, 2.6708984375, 2.8057861328125, 2.940673828125, 3.0755615234375, 3.21044921875, 3.3453369140625, 3.480224609375, 3.6151123046875, 3.75]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 7.0, 24.0, 48.0, 114.0, 262.0, 301.0, 163.0, 47.0, 18.0, 8.0, 2.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-128.00192260742188, -124.81848907470703, -121.63505554199219, -118.45162200927734, -115.2681884765625, -112.08475494384766, -108.90132141113281, -105.71788787841797, -102.53445434570312, -99.35102081298828, -96.16758728027344, -92.9841537475586, -89.80072021484375, -86.6172866821289, -83.43385314941406, -80.25041961669922, -77.06698608398438, -73.88355255126953, -70.70011901855469, -67.51668548583984, -64.333251953125, -61.149818420410156, -57.96638488769531, -54.78295135498047, -51.599525451660156, -48.41609191894531, -45.23265838623047, -42.049224853515625, -38.86579132080078, -35.68235778808594, -32.498924255371094, -29.315492630004883, -26.132057189941406, -22.948623657226562, -19.76519012451172, -16.581756591796875, -13.398324012756348, -10.214890480041504, -7.031457901000977, -3.848024368286133, -0.6645908355712891, 2.5188424587249756, 5.70227575302124, 8.885708808898926, 12.06914234161377, 15.252575874328613, 18.43600845336914, 21.619441986083984, 24.802875518798828, 27.986309051513672, 31.169742584228516, 34.35317611694336, 37.5366096496582, 40.72004318237305, 43.903472900390625, 47.08690643310547, 50.27033996582031, 53.453773498535156, 56.63720703125, 59.820640563964844, 63.00407409667969, 66.18750762939453, 69.37094116210938, 72.55437469482422, 75.73780822753906]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 4.0, 4.0, 5.0, 10.0, 8.0, 15.0, 16.0, 17.0, 17.0, 31.0, 29.0, 28.0, 35.0, 32.0, 39.0, 49.0, 47.0, 58.0, 50.0, 54.0, 42.0, 50.0, 56.0, 38.0, 46.0, 42.0, 22.0, 28.0, 27.0, 21.0, 20.0, 17.0, 10.0, 10.0, 14.0, 4.0, 6.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-41.9231071472168, -40.73573684692383, -39.54836654663086, -38.360992431640625, -37.173622131347656, -35.98625183105469, -34.79888153076172, -33.61151123046875, -32.42414093017578, -31.236770629882812, -30.04939842224121, -28.862028121948242, -27.67465591430664, -26.487285614013672, -25.299915313720703, -24.112545013427734, -22.9251708984375, -21.73780059814453, -20.55042839050293, -19.36305809020996, -18.17568588256836, -16.98831558227539, -15.800945281982422, -14.613574028015137, -13.426202774047852, -12.238831520080566, -11.051460266113281, -9.864089965820312, -8.676718711853027, -7.489347457885742, -6.301976680755615, -5.114605903625488, -3.9272384643554688, -2.7398674488067627, -1.5524964332580566, -0.3651254177093506, 0.8222455978393555, 2.0096168518066406, 3.1969876289367676, 4.3843584060668945, 5.57172966003418, 6.759100914001465, 7.946471691131592, 9.133842468261719, 10.321213722229004, 11.508584976196289, 12.695955276489258, 13.883326530456543, 15.070697784423828, 16.258068084716797, 17.4454402923584, 18.632810592651367, 19.82018280029297, 21.007553100585938, 22.194923400878906, 23.382293701171875, 24.569665908813477, 25.757036209106445, 26.944408416748047, 28.131778717041016, 29.319149017333984, 30.506521224975586, 31.693891525268555, 32.881263732910156, 34.068634033203125]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 3.0, 8.0, 14.0, 10.0, 18.0, 23.0, 21.0, 37.0, 50.0, 85.0, 128.0, 211.0, 345.0, 627.0, 1256.0, 3140.0, 12197.0, 118068.0, 3993501.0, 52600.0, 7629.0, 2246.0, 937.0, 481.0, 272.0, 118.0, 93.0, 54.0, 26.0, 23.0, 18.0, 9.0, 8.0, 4.0, 3.0, 5.0, 2.0, 4.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.9609375, -13.4854736328125, -13.010009765625, -12.5345458984375, -12.05908203125, -11.5836181640625, -11.108154296875, -10.6326904296875, -10.1572265625, -9.6817626953125, -9.206298828125, -8.7308349609375, -8.25537109375, -7.7799072265625, -7.304443359375, -6.8289794921875, -6.353515625, -5.8780517578125, -5.402587890625, -4.9271240234375, -4.45166015625, -3.9761962890625, -3.500732421875, -3.0252685546875, -2.5498046875, -2.0743408203125, -1.598876953125, -1.1234130859375, -0.64794921875, -0.1724853515625, 0.302978515625, 0.7784423828125, 1.25390625, 1.7293701171875, 2.204833984375, 2.6802978515625, 3.15576171875, 3.6312255859375, 4.106689453125, 4.5821533203125, 5.0576171875, 5.5330810546875, 6.008544921875, 6.4840087890625, 6.95947265625, 7.4349365234375, 7.910400390625, 8.3858642578125, 8.861328125, 9.3367919921875, 9.812255859375, 10.2877197265625, 10.76318359375, 11.2386474609375, 11.714111328125, 12.1895751953125, 12.6650390625, 13.1405029296875, 13.615966796875, 14.0914306640625, 14.56689453125, 15.0423583984375, 15.517822265625, 15.9932861328125, 16.46875]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 8.0, 9.0, 15.0, 19.0, 29.0, 34.0, 47.0, 54.0, 76.0, 77.0, 80.0, 95.0, 106.0, 77.0, 74.0, 63.0, 29.0, 40.0, 13.0, 18.0, 11.0, 9.0, 1.0, 3.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.669921875, -2.584136962890625, -2.49835205078125, -2.412567138671875, -2.3267822265625, -2.240997314453125, -2.15521240234375, -2.069427490234375, -1.983642578125, -1.897857666015625, -1.81207275390625, -1.726287841796875, -1.6405029296875, -1.554718017578125, -1.46893310546875, -1.383148193359375, -1.29736328125, -1.211578369140625, -1.12579345703125, -1.040008544921875, -0.9542236328125, -0.868438720703125, -0.78265380859375, -0.696868896484375, -0.611083984375, -0.525299072265625, -0.43951416015625, -0.353729248046875, -0.2679443359375, -0.182159423828125, -0.09637451171875, -0.010589599609375, 0.0751953125, 0.160980224609375, 0.24676513671875, 0.332550048828125, 0.4183349609375, 0.504119873046875, 0.58990478515625, 0.675689697265625, 0.761474609375, 0.847259521484375, 0.93304443359375, 1.018829345703125, 1.1046142578125, 1.190399169921875, 1.27618408203125, 1.361968994140625, 1.44775390625, 1.533538818359375, 1.61932373046875, 1.705108642578125, 1.7908935546875, 1.876678466796875, 1.96246337890625, 2.048248291015625, 2.134033203125, 2.219818115234375, 2.30560302734375, 2.391387939453125, 2.4771728515625, 2.562957763671875, 2.64874267578125, 2.734527587890625, 2.8203125]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 5.0, 3.0, 2.0, 7.0, 9.0, 5.0, 14.0, 22.0, 42.0, 106.0, 361.0, 1450.0, 9794.0, 174039.0, 3973804.0, 30144.0, 3417.0, 717.0, 198.0, 73.0, 28.0, 15.0, 9.0, 8.0, 5.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.09375, -17.453125, -16.8125, -16.171875, -15.53125, -14.890625, -14.25, -13.609375, -12.96875, -12.328125, -11.6875, -11.046875, -10.40625, -9.765625, -9.125, -8.484375, -7.84375, -7.203125, -6.5625, -5.921875, -5.28125, -4.640625, -4.0, -3.359375, -2.71875, -2.078125, -1.4375, -0.796875, -0.15625, 0.484375, 1.125, 1.765625, 2.40625, 3.046875, 3.6875, 4.328125, 4.96875, 5.609375, 6.25, 6.890625, 7.53125, 8.171875, 8.8125, 9.453125, 10.09375, 10.734375, 11.375, 12.015625, 12.65625, 13.296875, 13.9375, 14.578125, 15.21875, 15.859375, 16.5, 17.140625, 17.78125, 18.421875, 19.0625, 19.703125, 20.34375, 20.984375, 21.625, 22.265625, 22.90625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 3.0, 2.0, 4.0, 3.0, 6.0, 5.0, 11.0, 6.0, 10.0, 16.0, 23.0, 31.0, 33.0, 45.0, 84.0, 185.0, 499.0, 2390.0, 342.0, 143.0, 86.0, 45.0, 28.0, 15.0, 11.0, 15.0, 11.0, 8.0, 5.0, 2.0, 5.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2734375, -3.15362548828125, -3.0338134765625, -2.91400146484375, -2.794189453125, -2.67437744140625, -2.5545654296875, -2.43475341796875, -2.31494140625, -2.19512939453125, -2.0753173828125, -1.95550537109375, -1.835693359375, -1.71588134765625, -1.5960693359375, -1.47625732421875, -1.3564453125, -1.23663330078125, -1.1168212890625, -0.99700927734375, -0.877197265625, -0.75738525390625, -0.6375732421875, -0.51776123046875, -0.39794921875, -0.27813720703125, -0.1583251953125, -0.03851318359375, 0.081298828125, 0.20111083984375, 0.3209228515625, 0.44073486328125, 0.560546875, 0.68035888671875, 0.8001708984375, 0.91998291015625, 1.039794921875, 1.15960693359375, 1.2794189453125, 1.39923095703125, 1.51904296875, 1.63885498046875, 1.7586669921875, 1.87847900390625, 1.998291015625, 2.11810302734375, 2.2379150390625, 2.35772705078125, 2.4775390625, 2.59735107421875, 2.7171630859375, 2.83697509765625, 2.956787109375, 3.07659912109375, 3.1964111328125, 3.31622314453125, 3.43603515625, 3.55584716796875, 3.6756591796875, 3.79547119140625, 3.915283203125, 4.03509521484375, 4.1549072265625, 4.27471923828125, 4.39453125]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 17.0, 41.0, 135.0, 419.0, 298.0, 72.0, 22.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-91.73446655273438, -89.85538482666016, -87.97630310058594, -86.09722137451172, -84.2181396484375, -82.33906555175781, -80.4599838256836, -78.58090209960938, -76.70182037353516, -74.82273864746094, -72.94365692138672, -71.0645751953125, -69.18549346923828, -67.30641174316406, -65.42733764648438, -63.548255920410156, -61.66917419433594, -59.79009246826172, -57.9110107421875, -56.03193283081055, -54.15285110473633, -52.27376937866211, -50.39468765258789, -48.51560974121094, -46.63652420043945, -44.757442474365234, -42.878360748291016, -40.99928283691406, -39.120201110839844, -37.241119384765625, -35.362037658691406, -33.48295593261719, -31.603878021240234, -29.724796295166016, -27.84571647644043, -25.96663475036621, -24.087554931640625, -22.208473205566406, -20.329391479492188, -18.4503116607666, -16.571231842041016, -14.692151069641113, -12.813070297241211, -10.933988571166992, -9.054908752441406, -7.1758270263671875, -5.296746253967285, -3.417665481567383, -1.538583755493164, 0.34049713611602783, 2.2195780277252197, 4.098659038543701, 5.9777398109436035, 7.856821060180664, 9.735901832580566, 11.614982604980469, 13.494063377380371, 15.373144149780273, 17.252225875854492, 19.131305694580078, 21.010387420654297, 22.889469146728516, 24.7685489654541, 26.647628784179688, 28.526710510253906]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 2.0, 7.0, 6.0, 11.0, 13.0, 14.0, 17.0, 26.0, 30.0, 38.0, 31.0, 48.0, 58.0, 52.0, 51.0, 51.0, 54.0, 61.0, 44.0, 55.0, 56.0, 54.0, 46.0, 41.0, 33.0, 18.0, 23.0, 12.0, 16.0, 7.0, 13.0, 7.0, 6.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-16.47601318359375, -16.03172492980957, -15.587434768676758, -15.143145561218262, -14.698856353759766, -14.254568099975586, -13.810277938842773, -13.365989685058594, -12.921700477600098, -12.477411270141602, -12.033122062683105, -11.58883285522461, -11.144543647766113, -10.700254440307617, -10.255966186523438, -9.811676979064941, -9.367387771606445, -8.92309856414795, -8.478809356689453, -8.034520149230957, -7.590231418609619, -7.145942211151123, -6.701653003692627, -6.257364273071289, -5.813074111938477, -5.3687849044799805, -4.924495697021484, -4.480206489562988, -4.03591775894165, -3.5916285514831543, -3.147339344024658, -2.703050374984741, -2.258761405944824, -1.8144723176956177, -1.3701832294464111, -0.925894021987915, -0.4816049337387085, -0.03731584548950195, 0.40697336196899414, 0.8512623310089111, 1.2955515384674072, 1.7398406267166138, 2.1841297149658203, 2.6284189224243164, 3.0727081298828125, 3.5169970989227295, 3.9612863063812256, 4.405575275421143, 4.849864482879639, 5.294153690338135, 5.738442897796631, 6.182731628417969, 6.627020835876465, 7.071310043334961, 7.515599250793457, 7.959888458251953, 8.40417766571045, 8.848466873168945, 9.292756080627441, 9.737045288085938, 10.181334495544434, 10.62562370300293, 11.06991195678711, 11.514201164245605, 11.958490371704102]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 6.0, 3.0, 5.0, 8.0, 9.0, 12.0, 18.0, 27.0, 52.0, 87.0, 110.0, 224.0, 396.0, 855.0, 1956.0, 5471.0, 21819.0, 153572.0, 735174.0, 104412.0, 16468.0, 4490.0, 1764.0, 740.0, 397.0, 170.0, 105.0, 73.0, 43.0, 30.0, 25.0, 11.0, 10.0, 7.0, 4.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.484375, -13.041015625, -12.59765625, -12.154296875, -11.7109375, -11.267578125, -10.82421875, -10.380859375, -9.9375, -9.494140625, -9.05078125, -8.607421875, -8.1640625, -7.720703125, -7.27734375, -6.833984375, -6.390625, -5.947265625, -5.50390625, -5.060546875, -4.6171875, -4.173828125, -3.73046875, -3.287109375, -2.84375, -2.400390625, -1.95703125, -1.513671875, -1.0703125, -0.626953125, -0.18359375, 0.259765625, 0.703125, 1.146484375, 1.58984375, 2.033203125, 2.4765625, 2.919921875, 3.36328125, 3.806640625, 4.25, 4.693359375, 5.13671875, 5.580078125, 6.0234375, 6.466796875, 6.91015625, 7.353515625, 7.796875, 8.240234375, 8.68359375, 9.126953125, 9.5703125, 10.013671875, 10.45703125, 10.900390625, 11.34375, 11.787109375, 12.23046875, 12.673828125, 13.1171875, 13.560546875, 14.00390625, 14.447265625, 14.890625]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 2.0, 5.0, 9.0, 16.0, 14.0, 28.0, 25.0, 52.0, 53.0, 71.0, 107.0, 81.0, 103.0, 80.0, 93.0, 72.0, 46.0, 42.0, 43.0, 17.0, 13.0, 11.0, 7.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2578125, -3.16534423828125, -3.0728759765625, -2.98040771484375, -2.887939453125, -2.79547119140625, -2.7030029296875, -2.61053466796875, -2.51806640625, -2.42559814453125, -2.3331298828125, -2.24066162109375, -2.148193359375, -2.05572509765625, -1.9632568359375, -1.87078857421875, -1.7783203125, -1.68585205078125, -1.5933837890625, -1.50091552734375, -1.408447265625, -1.31597900390625, -1.2235107421875, -1.13104248046875, -1.03857421875, -0.94610595703125, -0.8536376953125, -0.76116943359375, -0.668701171875, -0.57623291015625, -0.4837646484375, -0.39129638671875, -0.298828125, -0.20635986328125, -0.1138916015625, -0.02142333984375, 0.071044921875, 0.16351318359375, 0.2559814453125, 0.34844970703125, 0.44091796875, 0.53338623046875, 0.6258544921875, 0.71832275390625, 0.810791015625, 0.90325927734375, 0.9957275390625, 1.08819580078125, 1.1806640625, 1.27313232421875, 1.3656005859375, 1.45806884765625, 1.550537109375, 1.64300537109375, 1.7354736328125, 1.82794189453125, 1.92041015625, 2.01287841796875, 2.1053466796875, 2.19781494140625, 2.290283203125, 2.38275146484375, 2.4752197265625, 2.56768798828125, 2.66015625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 6.0, 3.0, 15.0, 12.0, 21.0, 30.0, 42.0, 73.0, 156.0, 339.0, 1066.0, 4265.0, 32456.0, 658733.0, 327604.0, 19314.0, 3024.0, 789.0, 264.0, 141.0, 66.0, 46.0, 25.0, 15.0, 23.0, 8.0, 8.0, 3.0, 4.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.734375, -16.2406005859375, -15.746826171875, -15.2530517578125, -14.75927734375, -14.2655029296875, -13.771728515625, -13.2779541015625, -12.7841796875, -12.2904052734375, -11.796630859375, -11.3028564453125, -10.80908203125, -10.3153076171875, -9.821533203125, -9.3277587890625, -8.833984375, -8.3402099609375, -7.846435546875, -7.3526611328125, -6.85888671875, -6.3651123046875, -5.871337890625, -5.3775634765625, -4.8837890625, -4.3900146484375, -3.896240234375, -3.4024658203125, -2.90869140625, -2.4149169921875, -1.921142578125, -1.4273681640625, -0.93359375, -0.4398193359375, 0.053955078125, 0.5477294921875, 1.04150390625, 1.5352783203125, 2.029052734375, 2.5228271484375, 3.0166015625, 3.5103759765625, 4.004150390625, 4.4979248046875, 4.99169921875, 5.4854736328125, 5.979248046875, 6.4730224609375, 6.966796875, 7.4605712890625, 7.954345703125, 8.4481201171875, 8.94189453125, 9.4356689453125, 9.929443359375, 10.4232177734375, 10.9169921875, 11.4107666015625, 11.904541015625, 12.3983154296875, 12.89208984375, 13.3858642578125, 13.879638671875, 14.3734130859375, 14.8671875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 6.0, 0.0, 5.0, 0.0, 1.0, 2.0, 3.0, 5.0, 7.0, 8.0, 11.0, 14.0, 21.0, 25.0, 31.0, 23.0, 44.0, 42.0, 41.0, 43.0, 45.0, 50.0, 49.0, 54.0, 59.0, 54.0, 44.0, 57.0, 39.0, 28.0, 39.0, 29.0, 22.0, 22.0, 20.0, 24.0, 9.0, 12.0, 7.0, 3.0, 3.0, 5.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.33203125, -7.099365234375, -6.86669921875, -6.634033203125, -6.4013671875, -6.168701171875, -5.93603515625, -5.703369140625, -5.470703125, -5.238037109375, -5.00537109375, -4.772705078125, -4.5400390625, -4.307373046875, -4.07470703125, -3.842041015625, -3.609375, -3.376708984375, -3.14404296875, -2.911376953125, -2.6787109375, -2.446044921875, -2.21337890625, -1.980712890625, -1.748046875, -1.515380859375, -1.28271484375, -1.050048828125, -0.8173828125, -0.584716796875, -0.35205078125, -0.119384765625, 0.11328125, 0.345947265625, 0.57861328125, 0.811279296875, 1.0439453125, 1.276611328125, 1.50927734375, 1.741943359375, 1.974609375, 2.207275390625, 2.43994140625, 2.672607421875, 2.9052734375, 3.137939453125, 3.37060546875, 3.603271484375, 3.8359375, 4.068603515625, 4.30126953125, 4.533935546875, 4.7666015625, 4.999267578125, 5.23193359375, 5.464599609375, 5.697265625, 5.929931640625, 6.16259765625, 6.395263671875, 6.6279296875, 6.860595703125, 7.09326171875, 7.325927734375, 7.55859375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 0.0, 2.0, 11.0, 6.0, 12.0, 26.0, 21.0, 66.0, 128.0, 327.0, 815.0, 2533.0, 9401.0, 70036.0, 913557.0, 41361.0, 7099.0, 1966.0, 671.0, 260.0, 124.0, 50.0, 30.0, 19.0, 14.0, 4.0, 7.0, 3.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.0078125, -8.6790771484375, -8.350341796875, -8.0216064453125, -7.69287109375, -7.3641357421875, -7.035400390625, -6.7066650390625, -6.3779296875, -6.0491943359375, -5.720458984375, -5.3917236328125, -5.06298828125, -4.7342529296875, -4.405517578125, -4.0767822265625, -3.748046875, -3.4193115234375, -3.090576171875, -2.7618408203125, -2.43310546875, -2.1043701171875, -1.775634765625, -1.4468994140625, -1.1181640625, -0.7894287109375, -0.460693359375, -0.1319580078125, 0.19677734375, 0.5255126953125, 0.854248046875, 1.1829833984375, 1.51171875, 1.8404541015625, 2.169189453125, 2.4979248046875, 2.82666015625, 3.1553955078125, 3.484130859375, 3.8128662109375, 4.1416015625, 4.4703369140625, 4.799072265625, 5.1278076171875, 5.45654296875, 5.7852783203125, 6.114013671875, 6.4427490234375, 6.771484375, 7.1002197265625, 7.428955078125, 7.7576904296875, 8.08642578125, 8.4151611328125, 8.743896484375, 9.0726318359375, 9.4013671875, 9.7301025390625, 10.058837890625, 10.3875732421875, 10.71630859375, 11.0450439453125, 11.373779296875, 11.7025146484375, 12.03125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 2.0, 4.0, 6.0, 13.0, 21.0, 49.0, 85.0, 297.0, 320.0, 96.0, 44.0, 22.0, 13.0, 8.0, 10.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0019140243530273438, -0.001857861876487732, -0.0018016993999481201, -0.0017455369234085083, -0.0016893744468688965, -0.0016332119703292847, -0.0015770494937896729, -0.001520887017250061, -0.0014647245407104492, -0.0014085620641708374, -0.0013523995876312256, -0.0012962371110916138, -0.001240074634552002, -0.0011839121580123901, -0.0011277496814727783, -0.0010715872049331665, -0.0010154247283935547, -0.0009592622518539429, -0.0009030997753143311, -0.0008469372987747192, -0.0007907748222351074, -0.0007346123456954956, -0.0006784498691558838, -0.000622287392616272, -0.0005661249160766602, -0.0005099624395370483, -0.0004537999629974365, -0.0003976374864578247, -0.0003414750099182129, -0.0002853125333786011, -0.00022915005683898926, -0.00017298758029937744, -0.00011682510375976562, -6.066262722015381e-05, -4.500150680541992e-06, 5.1662325859069824e-05, 0.00010782480239868164, 0.00016398727893829346, 0.00022014975547790527, 0.0002763122320175171, 0.0003324747085571289, 0.0003886371850967407, 0.00044479966163635254, 0.0005009621381759644, 0.0005571246147155762, 0.000613287091255188, 0.0006694495677947998, 0.0007256120443344116, 0.0007817745208740234, 0.0008379369974136353, 0.0008940994739532471, 0.0009502619504928589, 0.0010064244270324707, 0.0010625869035720825, 0.0011187493801116943, 0.0011749118566513062, 0.001231074333190918, 0.0012872368097305298, 0.0013433992862701416, 0.0013995617628097534, 0.0014557242393493652, 0.001511886715888977, 0.0015680491924285889, 0.0016242116689682007, 0.0016803741455078125]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 2.0, 6.0, 5.0, 10.0, 5.0, 8.0, 15.0, 14.0, 16.0, 25.0, 51.0, 108.0, 241.0, 788.0, 3620.0, 33955.0, 970836.0, 33952.0, 3607.0, 793.0, 249.0, 98.0, 55.0, 27.0, 19.0, 12.0, 9.0, 9.0, 5.0, 4.0, 6.0, 4.0, 3.0, 0.0, 2.0, 0.0, 1.0, 4.0], "bins": [-21.15625, -20.658447265625, -20.16064453125, -19.662841796875, -19.1650390625, -18.667236328125, -18.16943359375, -17.671630859375, -17.173828125, -16.676025390625, -16.17822265625, -15.680419921875, -15.1826171875, -14.684814453125, -14.18701171875, -13.689208984375, -13.19140625, -12.693603515625, -12.19580078125, -11.697998046875, -11.2001953125, -10.702392578125, -10.20458984375, -9.706787109375, -9.208984375, -8.711181640625, -8.21337890625, -7.715576171875, -7.2177734375, -6.719970703125, -6.22216796875, -5.724365234375, -5.2265625, -4.728759765625, -4.23095703125, -3.733154296875, -3.2353515625, -2.737548828125, -2.23974609375, -1.741943359375, -1.244140625, -0.746337890625, -0.24853515625, 0.249267578125, 0.7470703125, 1.244873046875, 1.74267578125, 2.240478515625, 2.73828125, 3.236083984375, 3.73388671875, 4.231689453125, 4.7294921875, 5.227294921875, 5.72509765625, 6.222900390625, 6.720703125, 7.218505859375, 7.71630859375, 8.214111328125, 8.7119140625, 9.209716796875, 9.70751953125, 10.205322265625, 10.703125]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 7.0, 3.0, 5.0, 8.0, 6.0, 5.0, 8.0, 2.0, 9.0, 19.0, 10.0, 14.0, 29.0, 49.0, 94.0, 179.0, 202.0, 119.0, 63.0, 32.0, 32.0, 17.0, 17.0, 12.0, 11.0, 8.0, 6.0, 7.0, 8.0, 4.0, 6.0, 5.0, 0.0, 1.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-5.7578125, -5.611175537109375, -5.46453857421875, -5.317901611328125, -5.1712646484375, -5.024627685546875, -4.87799072265625, -4.731353759765625, -4.584716796875, -4.438079833984375, -4.29144287109375, -4.144805908203125, -3.9981689453125, -3.851531982421875, -3.70489501953125, -3.558258056640625, -3.41162109375, -3.264984130859375, -3.11834716796875, -2.971710205078125, -2.8250732421875, -2.678436279296875, -2.53179931640625, -2.385162353515625, -2.238525390625, -2.091888427734375, -1.94525146484375, -1.798614501953125, -1.6519775390625, -1.505340576171875, -1.35870361328125, -1.212066650390625, -1.0654296875, -0.918792724609375, -0.77215576171875, -0.625518798828125, -0.4788818359375, -0.332244873046875, -0.18560791015625, -0.038970947265625, 0.107666015625, 0.254302978515625, 0.40093994140625, 0.547576904296875, 0.6942138671875, 0.840850830078125, 0.98748779296875, 1.134124755859375, 1.28076171875, 1.427398681640625, 1.57403564453125, 1.720672607421875, 1.8673095703125, 2.013946533203125, 2.16058349609375, 2.307220458984375, 2.453857421875, 2.600494384765625, 2.74713134765625, 2.893768310546875, 3.0404052734375, 3.187042236328125, 3.33367919921875, 3.480316162109375, 3.626953125]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 10.0, 35.0, 87.0, 280.0, 398.0, 137.0, 42.0, 7.0, 8.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-120.37384033203125, -115.61735534667969, -110.86087036132812, -106.10438537597656, -101.347900390625, -96.59141540527344, -91.83493041992188, -87.07844543457031, -82.32196044921875, -77.56547546386719, -72.80899047851562, -68.05250549316406, -63.2960205078125, -58.53953552246094, -53.78305435180664, -49.02656936645508, -44.27008819580078, -39.51360321044922, -34.757118225097656, -30.000635147094727, -25.244150161743164, -20.4876651763916, -15.731182098388672, -10.97469711303711, -6.218212127685547, -1.4617276191711426, 3.2947568893432617, 8.051240921020508, 12.80772590637207, 17.564210891723633, 22.320693969726562, 27.077178955078125, 31.833663940429688, 36.59014892578125, 41.34663391113281, 46.103118896484375, 50.85960388183594, 55.6160888671875, 60.3725700378418, 65.12905883789062, 69.88554382324219, 74.64202880859375, 79.39851379394531, 84.15499877929688, 88.91148376464844, 93.66796875, 98.42445373535156, 103.18093872070312, 107.93741607666016, 112.69390106201172, 117.45038604736328, 122.20687103271484, 126.9633560180664, 131.71983337402344, 136.476318359375, 141.23280334472656, 145.98928833007812, 150.7457733154297, 155.50225830078125, 160.2587432861328, 165.01522827148438, 169.77171325683594, 174.5281982421875, 179.28468322753906, 184.04116821289062]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 5.0, 10.0, 4.0, 11.0, 7.0, 5.0, 14.0, 26.0, 16.0, 20.0, 18.0, 21.0, 24.0, 26.0, 32.0, 23.0, 43.0, 39.0, 44.0, 36.0, 37.0, 46.0, 46.0, 36.0, 39.0, 36.0, 37.0, 34.0, 30.0, 30.0, 26.0, 28.0, 30.0, 23.0, 24.0, 18.0, 9.0, 13.0, 4.0, 9.0, 9.0, 4.0, 6.0, 2.0, 4.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-35.40213394165039, -34.38330078125, -33.364463806152344, -32.34563064575195, -31.32679557800293, -30.307960510253906, -29.289127349853516, -28.270292282104492, -27.25145721435547, -26.232622146606445, -25.213787078857422, -24.19495391845703, -23.176118850708008, -22.157283782958984, -21.138450622558594, -20.11961555480957, -19.100780487060547, -18.081945419311523, -17.0631103515625, -16.04427719116211, -15.025442123413086, -14.006607055664062, -12.987772941589355, -11.968938827514648, -10.950103759765625, -9.931268692016602, -8.912434577941895, -7.893599987030029, -6.874765396118164, -5.855930805206299, -4.837096214294434, -3.8182616233825684, -2.799427032470703, -1.780592441558838, -0.7617578506469727, 0.2570767402648926, 1.2759113311767578, 2.294745922088623, 3.3135805130004883, 4.3324151039123535, 5.351249694824219, 6.370084285736084, 7.388918876647949, 8.407752990722656, 9.42658805847168, 10.445423126220703, 11.46425724029541, 12.483091354370117, 13.50192642211914, 14.520761489868164, 15.539595603942871, 16.558429718017578, 17.5772647857666, 18.596099853515625, 19.614933013916016, 20.63376808166504, 21.652603149414062, 22.671438217163086, 23.69027328491211, 24.7091064453125, 25.727941513061523, 26.746776580810547, 27.765609741210938, 28.78444480895996, 29.803279876708984]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 3.0, 6.0, 10.0, 8.0, 8.0, 18.0, 16.0, 21.0, 37.0, 60.0, 83.0, 103.0, 172.0, 321.0, 508.0, 1048.0, 2580.0, 7753.0, 40825.0, 3956410.0, 161446.0, 15644.0, 4143.0, 1505.0, 687.0, 328.0, 183.0, 114.0, 82.0, 47.0, 40.0, 17.0, 21.0, 9.0, 7.0, 4.0, 6.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.4609375, -13.0946044921875, -12.728271484375, -12.3619384765625, -11.99560546875, -11.6292724609375, -11.262939453125, -10.8966064453125, -10.5302734375, -10.1639404296875, -9.797607421875, -9.4312744140625, -9.06494140625, -8.6986083984375, -8.332275390625, -7.9659423828125, -7.599609375, -7.2332763671875, -6.866943359375, -6.5006103515625, -6.13427734375, -5.7679443359375, -5.401611328125, -5.0352783203125, -4.6689453125, -4.3026123046875, -3.936279296875, -3.5699462890625, -3.20361328125, -2.8372802734375, -2.470947265625, -2.1046142578125, -1.73828125, -1.3719482421875, -1.005615234375, -0.6392822265625, -0.27294921875, 0.0933837890625, 0.459716796875, 0.8260498046875, 1.1923828125, 1.5587158203125, 1.925048828125, 2.2913818359375, 2.65771484375, 3.0240478515625, 3.390380859375, 3.7567138671875, 4.123046875, 4.4893798828125, 4.855712890625, 5.2220458984375, 5.58837890625, 5.9547119140625, 6.321044921875, 6.6873779296875, 7.0537109375, 7.4200439453125, 7.786376953125, 8.1527099609375, 8.51904296875, 8.8853759765625, 9.251708984375, 9.6180419921875, 9.984375]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 7.0, 12.0, 9.0, 20.0, 18.0, 28.0, 44.0, 51.0, 64.0, 76.0, 83.0, 109.0, 104.0, 90.0, 64.0, 59.0, 39.0, 42.0, 33.0, 13.0, 13.0, 6.0, 5.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.55859375, -3.461181640625, -3.36376953125, -3.266357421875, -3.1689453125, -3.071533203125, -2.97412109375, -2.876708984375, -2.779296875, -2.681884765625, -2.58447265625, -2.487060546875, -2.3896484375, -2.292236328125, -2.19482421875, -2.097412109375, -2.0, -1.902587890625, -1.80517578125, -1.707763671875, -1.6103515625, -1.512939453125, -1.41552734375, -1.318115234375, -1.220703125, -1.123291015625, -1.02587890625, -0.928466796875, -0.8310546875, -0.733642578125, -0.63623046875, -0.538818359375, -0.44140625, -0.343994140625, -0.24658203125, -0.149169921875, -0.0517578125, 0.045654296875, 0.14306640625, 0.240478515625, 0.337890625, 0.435302734375, 0.53271484375, 0.630126953125, 0.7275390625, 0.824951171875, 0.92236328125, 1.019775390625, 1.1171875, 1.214599609375, 1.31201171875, 1.409423828125, 1.5068359375, 1.604248046875, 1.70166015625, 1.799072265625, 1.896484375, 1.993896484375, 2.09130859375, 2.188720703125, 2.2861328125, 2.383544921875, 2.48095703125, 2.578369140625, 2.67578125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 8.0, 8.0, 29.0, 57.0, 87.0, 218.0, 474.0, 2187.0, 26235.0, 4127368.0, 34222.0, 2360.0, 578.0, 248.0, 108.0, 43.0, 27.0, 12.0, 8.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.390625, -13.520263671875, -12.64990234375, -11.779541015625, -10.9091796875, -10.038818359375, -9.16845703125, -8.298095703125, -7.427734375, -6.557373046875, -5.68701171875, -4.816650390625, -3.9462890625, -3.075927734375, -2.20556640625, -1.335205078125, -0.46484375, 0.405517578125, 1.27587890625, 2.146240234375, 3.0166015625, 3.886962890625, 4.75732421875, 5.627685546875, 6.498046875, 7.368408203125, 8.23876953125, 9.109130859375, 9.9794921875, 10.849853515625, 11.72021484375, 12.590576171875, 13.4609375, 14.331298828125, 15.20166015625, 16.072021484375, 16.9423828125, 17.812744140625, 18.68310546875, 19.553466796875, 20.423828125, 21.294189453125, 22.16455078125, 23.034912109375, 23.9052734375, 24.775634765625, 25.64599609375, 26.516357421875, 27.38671875, 28.257080078125, 29.12744140625, 29.997802734375, 30.8681640625, 31.738525390625, 32.60888671875, 33.479248046875, 34.349609375, 35.219970703125, 36.09033203125, 36.960693359375, 37.8310546875, 38.701416015625, 39.57177734375, 40.442138671875, 41.3125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 8.0, 19.0, 26.0, 53.0, 70.0, 178.0, 2621.0, 787.0, 147.0, 53.0, 41.0, 19.0, 10.0, 18.0, 7.0, 4.0, 4.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.197265625, -3.044525146484375, -2.89178466796875, -2.739044189453125, -2.5863037109375, -2.433563232421875, -2.28082275390625, -2.128082275390625, -1.975341796875, -1.822601318359375, -1.66986083984375, -1.517120361328125, -1.3643798828125, -1.211639404296875, -1.05889892578125, -0.906158447265625, -0.75341796875, -0.600677490234375, -0.44793701171875, -0.295196533203125, -0.1424560546875, 0.010284423828125, 0.16302490234375, 0.315765380859375, 0.468505859375, 0.621246337890625, 0.77398681640625, 0.926727294921875, 1.0794677734375, 1.232208251953125, 1.38494873046875, 1.537689208984375, 1.6904296875, 1.843170166015625, 1.99591064453125, 2.148651123046875, 2.3013916015625, 2.454132080078125, 2.60687255859375, 2.759613037109375, 2.912353515625, 3.065093994140625, 3.21783447265625, 3.370574951171875, 3.5233154296875, 3.676055908203125, 3.82879638671875, 3.981536865234375, 4.13427734375, 4.287017822265625, 4.43975830078125, 4.592498779296875, 4.7452392578125, 4.897979736328125, 5.05072021484375, 5.203460693359375, 5.356201171875, 5.508941650390625, 5.66168212890625, 5.814422607421875, 5.9671630859375, 6.119903564453125, 6.27264404296875, 6.425384521484375, 6.578125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 14.0, 20.0, 68.0, 216.0, 367.0, 212.0, 68.0, 18.0, 8.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-40.676841735839844, -39.50069808959961, -38.324554443359375, -37.14841079711914, -35.972267150878906, -34.79612350463867, -33.61997985839844, -32.4438362121582, -31.26769256591797, -30.091548919677734, -28.9154052734375, -27.739261627197266, -26.56311798095703, -25.386974334716797, -24.210830688476562, -23.034687042236328, -21.858545303344727, -20.682401657104492, -19.506258010864258, -18.330114364624023, -17.15397071838379, -15.977827072143555, -14.801684379577637, -13.625540733337402, -12.449397087097168, -11.273253440856934, -10.0971097946167, -8.920967102050781, -7.744822978973389, -6.568679332733154, -5.392536163330078, -4.216392517089844, -3.0402488708496094, -1.8641053438186646, -0.6879618167877197, 0.48818159103393555, 1.66432523727417, 2.8404688835144043, 4.0166120529174805, 5.192755699157715, 6.368899345397949, 7.545042991638184, 8.721186637878418, 9.897329330444336, 11.07347297668457, 12.249616622924805, 13.425760269165039, 14.601903915405273, 15.778047561645508, 16.954191207885742, 18.130334854125977, 19.30647850036621, 20.482622146606445, 21.65876579284668, 22.83490753173828, 24.011051177978516, 25.18719482421875, 26.363338470458984, 27.53948211669922, 28.715625762939453, 29.891769409179688, 31.067913055419922, 32.244056701660156, 33.42020034790039, 34.596343994140625]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 7.0, 10.0, 12.0, 22.0, 36.0, 38.0, 63.0, 56.0, 72.0, 67.0, 69.0, 69.0, 67.0, 73.0, 77.0, 60.0, 48.0, 40.0, 35.0, 26.0, 21.0, 17.0, 6.0, 6.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.466180801391602, -15.019157409667969, -14.572134971618652, -14.12511157989502, -13.678088188171387, -13.23106575012207, -12.784042358398438, -12.337018966674805, -11.889995574951172, -11.442972183227539, -10.995949745178223, -10.54892635345459, -10.101902961730957, -9.65488052368164, -9.207857131958008, -8.760833740234375, -8.313811302185059, -7.866788387298584, -7.419764995574951, -6.972742080688477, -6.525718688964844, -6.078695774078369, -5.6316728591918945, -5.184649467468262, -4.737626552581787, -4.2906036376953125, -3.8435802459716797, -3.396557331085205, -2.9495341777801514, -2.5025110244750977, -2.055488109588623, -1.6084649562835693, -1.1614418029785156, -0.7144187092781067, -0.26739561557769775, 0.1796274185180664, 0.6266505718231201, 1.0736737251281738, 1.5206966400146484, 1.9677197933197021, 2.414742946624756, 2.8617660999298096, 3.3087892532348633, 3.755812168121338, 4.2028350830078125, 4.649858474731445, 5.09688138961792, 5.5439043045043945, 5.990927696228027, 6.437950611114502, 6.884974002838135, 7.331996917724609, 7.779020309448242, 8.226043701171875, 8.673066139221191, 9.120089530944824, 9.56711196899414, 10.014135360717773, 10.46115779876709, 10.908181190490723, 11.355204582214355, 11.802227020263672, 12.249250411987305, 12.696273803710938, 13.14329719543457]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 7.0, 6.0, 8.0, 27.0, 27.0, 61.0, 122.0, 227.0, 626.0, 2146.0, 12086.0, 163174.0, 803892.0, 57792.0, 6196.0, 1379.0, 422.0, 173.0, 85.0, 41.0, 22.0, 14.0, 7.0, 5.0, 7.0, 0.0, 5.0, 1.0, 2.0, 2.0, 1.0], "bins": [-25.234375, -24.6776123046875, -24.120849609375, -23.5640869140625, -23.00732421875, -22.4505615234375, -21.893798828125, -21.3370361328125, -20.7802734375, -20.2235107421875, -19.666748046875, -19.1099853515625, -18.55322265625, -17.9964599609375, -17.439697265625, -16.8829345703125, -16.326171875, -15.7694091796875, -15.212646484375, -14.6558837890625, -14.09912109375, -13.5423583984375, -12.985595703125, -12.4288330078125, -11.8720703125, -11.3153076171875, -10.758544921875, -10.2017822265625, -9.64501953125, -9.0882568359375, -8.531494140625, -7.9747314453125, -7.41796875, -6.8612060546875, -6.304443359375, -5.7476806640625, -5.19091796875, -4.6341552734375, -4.077392578125, -3.5206298828125, -2.9638671875, -2.4071044921875, -1.850341796875, -1.2935791015625, -0.73681640625, -0.1800537109375, 0.376708984375, 0.9334716796875, 1.490234375, 2.0469970703125, 2.603759765625, 3.1605224609375, 3.71728515625, 4.2740478515625, 4.830810546875, 5.3875732421875, 5.9443359375, 6.5010986328125, 7.057861328125, 7.6146240234375, 8.17138671875, 8.7281494140625, 9.284912109375, 9.8416748046875, 10.3984375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 8.0, 5.0, 12.0, 15.0, 12.0, 20.0, 42.0, 54.0, 45.0, 75.0, 86.0, 127.0, 89.0, 99.0, 81.0, 58.0, 45.0, 49.0, 29.0, 19.0, 13.0, 3.0, 6.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.892578125, -3.7900390625, -3.6875, -3.5849609375, -3.482421875, -3.3798828125, -3.27734375, -3.1748046875, -3.072265625, -2.9697265625, -2.8671875, -2.7646484375, -2.662109375, -2.5595703125, -2.45703125, -2.3544921875, -2.251953125, -2.1494140625, -2.046875, -1.9443359375, -1.841796875, -1.7392578125, -1.63671875, -1.5341796875, -1.431640625, -1.3291015625, -1.2265625, -1.1240234375, -1.021484375, -0.9189453125, -0.81640625, -0.7138671875, -0.611328125, -0.5087890625, -0.40625, -0.3037109375, -0.201171875, -0.0986328125, 0.00390625, 0.1064453125, 0.208984375, 0.3115234375, 0.4140625, 0.5166015625, 0.619140625, 0.7216796875, 0.82421875, 0.9267578125, 1.029296875, 1.1318359375, 1.234375, 1.3369140625, 1.439453125, 1.5419921875, 1.64453125, 1.7470703125, 1.849609375, 1.9521484375, 2.0546875, 2.1572265625, 2.259765625, 2.3623046875, 2.46484375, 2.5673828125, 2.669921875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 4.0, 1.0, 5.0, 7.0, 6.0, 10.0, 5.0, 13.0, 12.0, 14.0, 19.0, 43.0, 48.0, 67.0, 113.0, 162.0, 368.0, 899.0, 2362.0, 8130.0, 34669.0, 192337.0, 653471.0, 122644.0, 23739.0, 6092.0, 1866.0, 654.0, 300.0, 149.0, 105.0, 61.0, 37.0, 32.0, 24.0, 17.0, 19.0, 9.0, 7.0, 8.0, 6.0, 5.0, 4.0, 1.0, 5.0, 2.0, 1.0, 1.0, 4.0, 2.0, 0.0, 2.0], "bins": [-9.421875, -9.1478271484375, -8.873779296875, -8.5997314453125, -8.32568359375, -8.0516357421875, -7.777587890625, -7.5035400390625, -7.2294921875, -6.9554443359375, -6.681396484375, -6.4073486328125, -6.13330078125, -5.8592529296875, -5.585205078125, -5.3111572265625, -5.037109375, -4.7630615234375, -4.489013671875, -4.2149658203125, -3.94091796875, -3.6668701171875, -3.392822265625, -3.1187744140625, -2.8447265625, -2.5706787109375, -2.296630859375, -2.0225830078125, -1.74853515625, -1.4744873046875, -1.200439453125, -0.9263916015625, -0.65234375, -0.3782958984375, -0.104248046875, 0.1697998046875, 0.44384765625, 0.7178955078125, 0.991943359375, 1.2659912109375, 1.5400390625, 1.8140869140625, 2.088134765625, 2.3621826171875, 2.63623046875, 2.9102783203125, 3.184326171875, 3.4583740234375, 3.732421875, 4.0064697265625, 4.280517578125, 4.5545654296875, 4.82861328125, 5.1026611328125, 5.376708984375, 5.6507568359375, 5.9248046875, 6.1988525390625, 6.472900390625, 6.7469482421875, 7.02099609375, 7.2950439453125, 7.569091796875, 7.8431396484375, 8.1171875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 4.0, 1.0, 6.0, 5.0, 11.0, 8.0, 11.0, 15.0, 13.0, 14.0, 21.0, 28.0, 27.0, 32.0, 28.0, 29.0, 43.0, 39.0, 53.0, 48.0, 41.0, 45.0, 48.0, 54.0, 35.0, 47.0, 36.0, 35.0, 41.0, 33.0, 26.0, 22.0, 20.0, 21.0, 16.0, 7.0, 9.0, 7.0, 8.0, 4.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-7.0234375, -6.81524658203125, -6.6070556640625, -6.39886474609375, -6.190673828125, -5.98248291015625, -5.7742919921875, -5.56610107421875, -5.35791015625, -5.14971923828125, -4.9415283203125, -4.73333740234375, -4.525146484375, -4.31695556640625, -4.1087646484375, -3.90057373046875, -3.6923828125, -3.48419189453125, -3.2760009765625, -3.06781005859375, -2.859619140625, -2.65142822265625, -2.4432373046875, -2.23504638671875, -2.02685546875, -1.81866455078125, -1.6104736328125, -1.40228271484375, -1.194091796875, -0.98590087890625, -0.7777099609375, -0.56951904296875, -0.361328125, -0.15313720703125, 0.0550537109375, 0.26324462890625, 0.471435546875, 0.67962646484375, 0.8878173828125, 1.09600830078125, 1.30419921875, 1.51239013671875, 1.7205810546875, 1.92877197265625, 2.136962890625, 2.34515380859375, 2.5533447265625, 2.76153564453125, 2.9697265625, 3.17791748046875, 3.3861083984375, 3.59429931640625, 3.802490234375, 4.01068115234375, 4.2188720703125, 4.42706298828125, 4.63525390625, 4.84344482421875, 5.0516357421875, 5.25982666015625, 5.468017578125, 5.67620849609375, 5.8843994140625, 6.09259033203125, 6.30078125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 4.0, 2.0, 3.0, 2.0, 2.0, 8.0, 5.0, 8.0, 12.0, 24.0, 27.0, 54.0, 74.0, 145.0, 301.0, 571.0, 1408.0, 3967.0, 17839.0, 290536.0, 694103.0, 30586.0, 5602.0, 1789.0, 720.0, 351.0, 160.0, 94.0, 60.0, 42.0, 18.0, 15.0, 9.0, 3.0, 6.0, 2.0, 5.0, 5.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.12890625, -4.95867919921875, -4.7884521484375, -4.61822509765625, -4.447998046875, -4.27777099609375, -4.1075439453125, -3.93731689453125, -3.76708984375, -3.59686279296875, -3.4266357421875, -3.25640869140625, -3.086181640625, -2.91595458984375, -2.7457275390625, -2.57550048828125, -2.4052734375, -2.23504638671875, -2.0648193359375, -1.89459228515625, -1.724365234375, -1.55413818359375, -1.3839111328125, -1.21368408203125, -1.04345703125, -0.87322998046875, -0.7030029296875, -0.53277587890625, -0.362548828125, -0.19232177734375, -0.0220947265625, 0.14813232421875, 0.318359375, 0.48858642578125, 0.6588134765625, 0.82904052734375, 0.999267578125, 1.16949462890625, 1.3397216796875, 1.50994873046875, 1.68017578125, 1.85040283203125, 2.0206298828125, 2.19085693359375, 2.361083984375, 2.53131103515625, 2.7015380859375, 2.87176513671875, 3.0419921875, 3.21221923828125, 3.3824462890625, 3.55267333984375, 3.722900390625, 3.89312744140625, 4.0633544921875, 4.23358154296875, 4.40380859375, 4.57403564453125, 4.7442626953125, 4.91448974609375, 5.084716796875, 5.25494384765625, 5.4251708984375, 5.59539794921875, 5.765625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 8.0, 10.0, 8.0, 13.0, 19.0, 36.0, 82.0, 179.0, 246.0, 173.0, 91.0, 63.0, 22.0, 19.0, 18.0, 8.0, 3.0, 3.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011386871337890625, -0.001109100878238678, -0.0010795146226882935, -0.001049928367137909, -0.0010203421115875244, -0.00099075585603714, -0.0009611696004867554, -0.0009315833449363708, -0.0009019970893859863, -0.0008724108338356018, -0.0008428245782852173, -0.0008132383227348328, -0.0007836520671844482, -0.0007540658116340637, -0.0007244795560836792, -0.0006948933005332947, -0.0006653070449829102, -0.0006357207894325256, -0.0006061345338821411, -0.0005765482783317566, -0.0005469620227813721, -0.0005173757672309875, -0.00048778951168060303, -0.0004582032561302185, -0.000428617000579834, -0.00039903074502944946, -0.00036944448947906494, -0.0003398582339286804, -0.0003102719783782959, -0.0002806857228279114, -0.00025109946727752686, -0.00022151321172714233, -0.0001919269561767578, -0.0001623407006263733, -0.00013275444507598877, -0.00010316818952560425, -7.358193397521973e-05, -4.3995678424835205e-05, -1.4409422874450684e-05, 1.5176832675933838e-05, 4.476308822631836e-05, 7.434934377670288e-05, 0.0001039355993270874, 0.00013352185487747192, 0.00016310811042785645, 0.00019269436597824097, 0.0002222806215286255, 0.00025186687707901, 0.00028145313262939453, 0.00031103938817977905, 0.0003406256437301636, 0.0003702118992805481, 0.0003997981548309326, 0.00042938441038131714, 0.00045897066593170166, 0.0004885569214820862, 0.0005181431770324707, 0.0005477294325828552, 0.0005773156881332397, 0.0006069019436836243, 0.0006364881992340088, 0.0006660744547843933, 0.0006956607103347778, 0.0007252469658851624, 0.0007548332214355469]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 4.0, 3.0, 7.0, 3.0, 6.0, 10.0, 23.0, 28.0, 74.0, 133.0, 420.0, 1830.0, 11559.0, 616497.0, 405777.0, 9797.0, 1685.0, 412.0, 136.0, 61.0, 28.0, 20.0, 11.0, 6.0, 9.0, 2.0, 3.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.390625, -9.1295166015625, -8.868408203125, -8.6072998046875, -8.34619140625, -8.0850830078125, -7.823974609375, -7.5628662109375, -7.3017578125, -7.0406494140625, -6.779541015625, -6.5184326171875, -6.25732421875, -5.9962158203125, -5.735107421875, -5.4739990234375, -5.212890625, -4.9517822265625, -4.690673828125, -4.4295654296875, -4.16845703125, -3.9073486328125, -3.646240234375, -3.3851318359375, -3.1240234375, -2.8629150390625, -2.601806640625, -2.3406982421875, -2.07958984375, -1.8184814453125, -1.557373046875, -1.2962646484375, -1.03515625, -0.7740478515625, -0.512939453125, -0.2518310546875, 0.00927734375, 0.2703857421875, 0.531494140625, 0.7926025390625, 1.0537109375, 1.3148193359375, 1.575927734375, 1.8370361328125, 2.09814453125, 2.3592529296875, 2.620361328125, 2.8814697265625, 3.142578125, 3.4036865234375, 3.664794921875, 3.9259033203125, 4.18701171875, 4.4481201171875, 4.709228515625, 4.9703369140625, 5.2314453125, 5.4925537109375, 5.753662109375, 6.0147705078125, 6.27587890625, 6.5369873046875, 6.798095703125, 7.0592041015625, 7.3203125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 4.0, 2.0, 5.0, 9.0, 9.0, 11.0, 18.0, 28.0, 43.0, 54.0, 72.0, 110.0, 151.0, 153.0, 102.0, 62.0, 44.0, 24.0, 16.0, 16.0, 7.0, 12.0, 7.0, 3.0, 7.0, 5.0, 2.0, 2.0, 1.0, 6.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.423828125, -3.3310546875, -3.23828125, -3.1455078125, -3.052734375, -2.9599609375, -2.8671875, -2.7744140625, -2.681640625, -2.5888671875, -2.49609375, -2.4033203125, -2.310546875, -2.2177734375, -2.125, -2.0322265625, -1.939453125, -1.8466796875, -1.75390625, -1.6611328125, -1.568359375, -1.4755859375, -1.3828125, -1.2900390625, -1.197265625, -1.1044921875, -1.01171875, -0.9189453125, -0.826171875, -0.7333984375, -0.640625, -0.5478515625, -0.455078125, -0.3623046875, -0.26953125, -0.1767578125, -0.083984375, 0.0087890625, 0.1015625, 0.1943359375, 0.287109375, 0.3798828125, 0.47265625, 0.5654296875, 0.658203125, 0.7509765625, 0.84375, 0.9365234375, 1.029296875, 1.1220703125, 1.21484375, 1.3076171875, 1.400390625, 1.4931640625, 1.5859375, 1.6787109375, 1.771484375, 1.8642578125, 1.95703125, 2.0498046875, 2.142578125, 2.2353515625, 2.328125, 2.4208984375, 2.513671875]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 10.0, 21.0, 54.0, 303.0, 420.0, 160.0, 35.0, 10.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-87.89505004882812, -83.34735870361328, -78.79966735839844, -74.2519760131836, -69.70428466796875, -65.1565933227539, -60.60890579223633, -56.061214447021484, -51.51352310180664, -46.9658317565918, -42.41814041137695, -37.870452880859375, -33.32276153564453, -28.775068283081055, -24.227378845214844, -19.6796875, -15.131996154785156, -10.584304809570312, -6.036614418029785, -1.4889240264892578, 3.058767318725586, 7.60645866394043, 12.15414810180664, 16.701839447021484, 21.249530792236328, 25.797222137451172, 30.344913482666016, 34.892601013183594, 39.44029235839844, 43.98798370361328, 48.535675048828125, 53.08336639404297, 57.63105773925781, 62.178749084472656, 66.7264404296875, 71.27413177490234, 75.82182312011719, 80.36951446533203, 84.91720581054688, 89.46488952636719, 94.01258850097656, 98.5602798461914, 103.10797119140625, 107.6556625366211, 112.20335388183594, 116.75104522705078, 121.29873657226562, 125.84642028808594, 130.39410400390625, 134.94178771972656, 139.48948669433594, 144.03717041015625, 148.58486938476562, 153.13255310058594, 157.6802520751953, 162.22793579101562, 166.775634765625, 171.3233184814453, 175.8710174560547, 180.418701171875, 184.96640014648438, 189.5140838623047, 194.06178283691406, 198.60946655273438, 203.15716552734375]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 0.0, 5.0, 5.0, 5.0, 10.0, 12.0, 9.0, 11.0, 13.0, 15.0, 25.0, 16.0, 28.0, 21.0, 32.0, 27.0, 25.0, 27.0, 30.0, 31.0, 48.0, 30.0, 29.0, 42.0, 42.0, 43.0, 42.0, 44.0, 22.0, 28.0, 36.0, 33.0, 24.0, 34.0, 19.0, 15.0, 12.0, 20.0, 17.0, 14.0, 12.0, 12.0, 10.0, 5.0, 5.0, 3.0, 2.0, 2.0, 3.0, 2.0, 5.0, 1.0, 2.0, 0.0, 2.0], "bins": [-27.235368728637695, -26.405624389648438, -25.57588005065918, -24.746135711669922, -23.916391372680664, -23.086647033691406, -22.25690460205078, -21.42715835571289, -20.597415924072266, -19.767671585083008, -18.93792724609375, -18.108182907104492, -17.278438568115234, -16.448694229125977, -15.618950843811035, -14.789206504821777, -13.959461212158203, -13.129716873168945, -12.299972534179688, -11.47022819519043, -10.640483856201172, -9.810739517211914, -8.980996131896973, -8.151251792907715, -7.321507453918457, -6.491763114929199, -5.662018775939941, -4.832274913787842, -4.002530574798584, -3.172786235809326, -2.3430423736572266, -1.5132980346679688, -0.6835556030273438, 0.1461886167526245, 0.9759328365325928, 1.8056769371032715, 2.6354212760925293, 3.465165615081787, 4.294909477233887, 5.1246538162231445, 5.954398155212402, 6.78414249420166, 7.613886833190918, 8.44363021850586, 9.273374557495117, 10.103118896484375, 10.932863235473633, 11.76260757446289, 12.592351913452148, 13.422096252441406, 14.251840591430664, 15.081584930419922, 15.91132926940918, 16.741073608398438, 17.570816040039062, 18.400562286376953, 19.230304718017578, 20.060049057006836, 20.889793395996094, 21.71953773498535, 22.54928207397461, 23.379026412963867, 24.208770751953125, 25.03851318359375, 25.86825942993164]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 3.0, 5.0, 6.0, 4.0, 7.0, 4.0, 11.0, 14.0, 12.0, 20.0, 26.0, 26.0, 63.0, 91.0, 145.0, 241.0, 517.0, 1214.0, 2974.0, 11253.0, 89062.0, 4032797.0, 45015.0, 7166.0, 2010.0, 799.0, 377.0, 178.0, 100.0, 50.0, 32.0, 25.0, 12.0, 12.0, 5.0, 4.0, 3.0, 4.0, 1.0, 3.0], "bins": [-28.671875, -28.039794921875, -27.40771484375, -26.775634765625, -26.1435546875, -25.511474609375, -24.87939453125, -24.247314453125, -23.615234375, -22.983154296875, -22.35107421875, -21.718994140625, -21.0869140625, -20.454833984375, -19.82275390625, -19.190673828125, -18.55859375, -17.926513671875, -17.29443359375, -16.662353515625, -16.0302734375, -15.398193359375, -14.76611328125, -14.134033203125, -13.501953125, -12.869873046875, -12.23779296875, -11.605712890625, -10.9736328125, -10.341552734375, -9.70947265625, -9.077392578125, -8.4453125, -7.813232421875, -7.18115234375, -6.549072265625, -5.9169921875, -5.284912109375, -4.65283203125, -4.020751953125, -3.388671875, -2.756591796875, -2.12451171875, -1.492431640625, -0.8603515625, -0.228271484375, 0.40380859375, 1.035888671875, 1.66796875, 2.300048828125, 2.93212890625, 3.564208984375, 4.1962890625, 4.828369140625, 5.46044921875, 6.092529296875, 6.724609375, 7.356689453125, 7.98876953125, 8.620849609375, 9.2529296875, 9.885009765625, 10.51708984375, 11.149169921875, 11.78125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 8.0, 10.0, 15.0, 20.0, 28.0, 31.0, 43.0, 52.0, 55.0, 89.0, 97.0, 82.0, 99.0, 84.0, 61.0, 53.0, 52.0, 37.0, 24.0, 22.0, 18.0, 2.0, 4.0, 5.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.96875, -3.86541748046875, -3.7620849609375, -3.65875244140625, -3.555419921875, -3.45208740234375, -3.3487548828125, -3.24542236328125, -3.14208984375, -3.03875732421875, -2.9354248046875, -2.83209228515625, -2.728759765625, -2.62542724609375, -2.5220947265625, -2.41876220703125, -2.3154296875, -2.21209716796875, -2.1087646484375, -2.00543212890625, -1.902099609375, -1.79876708984375, -1.6954345703125, -1.59210205078125, -1.48876953125, -1.38543701171875, -1.2821044921875, -1.17877197265625, -1.075439453125, -0.97210693359375, -0.8687744140625, -0.76544189453125, -0.662109375, -0.55877685546875, -0.4554443359375, -0.35211181640625, -0.248779296875, -0.14544677734375, -0.0421142578125, 0.06121826171875, 0.16455078125, 0.26788330078125, 0.3712158203125, 0.47454833984375, 0.577880859375, 0.68121337890625, 0.7845458984375, 0.88787841796875, 0.9912109375, 1.09454345703125, 1.1978759765625, 1.30120849609375, 1.404541015625, 1.50787353515625, 1.6112060546875, 1.71453857421875, 1.81787109375, 1.92120361328125, 2.0245361328125, 2.12786865234375, 2.231201171875, 2.33453369140625, 2.4378662109375, 2.54119873046875, 2.64453125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 5.0, 8.0, 12.0, 21.0, 29.0, 57.0, 69.0, 153.0, 327.0, 715.0, 2190.0, 9447.0, 109451.0, 4033713.0, 31145.0, 4708.0, 1271.0, 501.0, 223.0, 113.0, 59.0, 28.0, 18.0, 6.0, 5.0, 4.0, 3.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.4375, -23.6806640625, -22.923828125, -22.1669921875, -21.41015625, -20.6533203125, -19.896484375, -19.1396484375, -18.3828125, -17.6259765625, -16.869140625, -16.1123046875, -15.35546875, -14.5986328125, -13.841796875, -13.0849609375, -12.328125, -11.5712890625, -10.814453125, -10.0576171875, -9.30078125, -8.5439453125, -7.787109375, -7.0302734375, -6.2734375, -5.5166015625, -4.759765625, -4.0029296875, -3.24609375, -2.4892578125, -1.732421875, -0.9755859375, -0.21875, 0.5380859375, 1.294921875, 2.0517578125, 2.80859375, 3.5654296875, 4.322265625, 5.0791015625, 5.8359375, 6.5927734375, 7.349609375, 8.1064453125, 8.86328125, 9.6201171875, 10.376953125, 11.1337890625, 11.890625, 12.6474609375, 13.404296875, 14.1611328125, 14.91796875, 15.6748046875, 16.431640625, 17.1884765625, 17.9453125, 18.7021484375, 19.458984375, 20.2158203125, 20.97265625, 21.7294921875, 22.486328125, 23.2431640625, 24.0]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 3.0, 4.0, 4.0, 9.0, 20.0, 31.0, 35.0, 72.0, 212.0, 3307.0, 207.0, 75.0, 30.0, 21.0, 16.0, 5.0, 6.0, 5.0, 3.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.515625, -10.2789306640625, -10.042236328125, -9.8055419921875, -9.56884765625, -9.3321533203125, -9.095458984375, -8.8587646484375, -8.6220703125, -8.3853759765625, -8.148681640625, -7.9119873046875, -7.67529296875, -7.4385986328125, -7.201904296875, -6.9652099609375, -6.728515625, -6.4918212890625, -6.255126953125, -6.0184326171875, -5.78173828125, -5.5450439453125, -5.308349609375, -5.0716552734375, -4.8349609375, -4.5982666015625, -4.361572265625, -4.1248779296875, -3.88818359375, -3.6514892578125, -3.414794921875, -3.1781005859375, -2.94140625, -2.7047119140625, -2.468017578125, -2.2313232421875, -1.99462890625, -1.7579345703125, -1.521240234375, -1.2845458984375, -1.0478515625, -0.8111572265625, -0.574462890625, -0.3377685546875, -0.10107421875, 0.1356201171875, 0.372314453125, 0.6090087890625, 0.845703125, 1.0823974609375, 1.319091796875, 1.5557861328125, 1.79248046875, 2.0291748046875, 2.265869140625, 2.5025634765625, 2.7392578125, 2.9759521484375, 3.212646484375, 3.4493408203125, 3.68603515625, 3.9227294921875, 4.159423828125, 4.3961181640625, 4.6328125]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 5.0, 6.0, 12.0, 64.0, 480.0, 404.0, 30.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0], "bins": [-185.8597869873047, -182.5428009033203, -179.22579956054688, -175.9088134765625, -172.59182739257812, -169.27484130859375, -165.95785522460938, -162.64085388183594, -159.32386779785156, -156.0068817138672, -152.68988037109375, -149.37289428710938, -146.055908203125, -142.73892211914062, -139.42193603515625, -136.1049346923828, -132.78794860839844, -129.47096252441406, -126.15396881103516, -122.83697509765625, -119.51998901367188, -116.2030029296875, -112.8860092163086, -109.56901550292969, -106.25202941894531, -102.93504333496094, -99.61804962158203, -96.30105590820312, -92.98406982421875, -89.66708374023438, -86.35009002685547, -83.03309631347656, -79.71610260009766, -76.39910888671875, -73.08212280273438, -69.76513671875, -66.4481430053711, -63.13115310668945, -59.81416320800781, -56.49717330932617, -53.18018341064453, -49.86319351196289, -46.54620361328125, -43.22921371459961, -39.91222381591797, -36.59523391723633, -33.27824401855469, -29.961254119873047, -26.644264221191406, -23.327274322509766, -20.010284423828125, -16.693294525146484, -13.376304626464844, -10.059314727783203, -6.7423248291015625, -3.425334930419922, -0.10834503173828125, 3.2086448669433594, 6.525634765625, 9.84262466430664, 13.159614562988281, 16.476604461669922, 19.793594360351562, 23.110584259033203, 26.427574157714844]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 12.0, 7.0, 23.0, 27.0, 27.0, 38.0, 38.0, 41.0, 44.0, 57.0, 56.0, 74.0, 71.0, 69.0, 61.0, 67.0, 48.0, 43.0, 40.0, 34.0, 29.0, 21.0, 17.0, 12.0, 10.0, 9.0, 7.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-23.015426635742188, -22.394935607910156, -21.774442672729492, -21.15395164489746, -20.53346061706543, -19.912967681884766, -19.292476654052734, -18.671985626220703, -18.051494598388672, -17.43100357055664, -16.810510635375977, -16.190019607543945, -15.569528579711914, -14.949036598205566, -14.328544616699219, -13.708053588867188, -13.087560653686523, -12.467068672180176, -11.846577644348145, -11.226085662841797, -10.605594635009766, -9.985102653503418, -9.36461067199707, -8.744119644165039, -8.123627662658691, -7.503136157989502, -6.8826446533203125, -6.262152671813965, -5.641661167144775, -5.021169662475586, -4.400677680969238, -3.780186176300049, -3.1596946716308594, -2.53920316696167, -1.9187114238739014, -1.2982197999954224, -0.6777281761169434, -0.057236671447753906, 0.5632550716400146, 1.1837468147277832, 1.8042383193969727, 2.424729824066162, 3.0452215671539307, 3.665713310241699, 4.286204814910889, 4.906696319580078, 5.527188301086426, 6.147679805755615, 6.768171310424805, 7.388662815093994, 8.009154319763184, 8.629646301269531, 9.250137329101562, 9.87062931060791, 10.491121292114258, 11.111612319946289, 11.732104301452637, 12.352596282958984, 12.973087310791016, 13.593579292297363, 14.214071273803711, 14.834562301635742, 15.45505428314209, 16.075546264648438, 16.69603729248047]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 2.0, 2.0, 5.0, 6.0, 7.0, 8.0, 13.0, 19.0, 23.0, 36.0, 42.0, 104.0, 188.0, 346.0, 805.0, 2047.0, 7094.0, 34707.0, 274404.0, 632498.0, 77545.0, 13220.0, 3246.0, 1108.0, 489.0, 265.0, 129.0, 66.0, 41.0, 29.0, 14.0, 12.0, 13.0, 8.0, 8.0, 5.0, 0.0, 4.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.5625, -10.0869140625, -9.611328125, -9.1357421875, -8.66015625, -8.1845703125, -7.708984375, -7.2333984375, -6.7578125, -6.2822265625, -5.806640625, -5.3310546875, -4.85546875, -4.3798828125, -3.904296875, -3.4287109375, -2.953125, -2.4775390625, -2.001953125, -1.5263671875, -1.05078125, -0.5751953125, -0.099609375, 0.3759765625, 0.8515625, 1.3271484375, 1.802734375, 2.2783203125, 2.75390625, 3.2294921875, 3.705078125, 4.1806640625, 4.65625, 5.1318359375, 5.607421875, 6.0830078125, 6.55859375, 7.0341796875, 7.509765625, 7.9853515625, 8.4609375, 8.9365234375, 9.412109375, 9.8876953125, 10.36328125, 10.8388671875, 11.314453125, 11.7900390625, 12.265625, 12.7412109375, 13.216796875, 13.6923828125, 14.16796875, 14.6435546875, 15.119140625, 15.5947265625, 16.0703125, 16.5458984375, 17.021484375, 17.4970703125, 17.97265625, 18.4482421875, 18.923828125, 19.3994140625, 19.875]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 7.0, 10.0, 16.0, 22.0, 25.0, 26.0, 57.0, 63.0, 65.0, 98.0, 97.0, 87.0, 84.0, 76.0, 67.0, 62.0, 47.0, 33.0, 21.0, 15.0, 9.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.7734375, -4.654571533203125, -4.53570556640625, -4.416839599609375, -4.2979736328125, -4.179107666015625, -4.06024169921875, -3.941375732421875, -3.822509765625, -3.703643798828125, -3.58477783203125, -3.465911865234375, -3.3470458984375, -3.228179931640625, -3.10931396484375, -2.990447998046875, -2.87158203125, -2.752716064453125, -2.63385009765625, -2.514984130859375, -2.3961181640625, -2.277252197265625, -2.15838623046875, -2.039520263671875, -1.920654296875, -1.801788330078125, -1.68292236328125, -1.564056396484375, -1.4451904296875, -1.326324462890625, -1.20745849609375, -1.088592529296875, -0.9697265625, -0.850860595703125, -0.73199462890625, -0.613128662109375, -0.4942626953125, -0.375396728515625, -0.25653076171875, -0.137664794921875, -0.018798828125, 0.100067138671875, 0.21893310546875, 0.337799072265625, 0.4566650390625, 0.575531005859375, 0.69439697265625, 0.813262939453125, 0.93212890625, 1.050994873046875, 1.16986083984375, 1.288726806640625, 1.4075927734375, 1.526458740234375, 1.64532470703125, 1.764190673828125, 1.883056640625, 2.001922607421875, 2.12078857421875, 2.239654541015625, 2.3585205078125, 2.477386474609375, 2.59625244140625, 2.715118408203125, 2.833984375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 6.0, 1.0, 4.0, 11.0, 11.0, 19.0, 23.0, 40.0, 48.0, 59.0, 116.0, 237.0, 454.0, 1114.0, 3070.0, 10665.0, 52985.0, 407417.0, 493500.0, 61319.0, 11892.0, 3362.0, 1162.0, 485.0, 220.0, 121.0, 61.0, 43.0, 23.0, 23.0, 19.0, 11.0, 6.0, 12.0, 3.0, 4.0, 6.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-12.1875, -11.80712890625, -11.4267578125, -11.04638671875, -10.666015625, -10.28564453125, -9.9052734375, -9.52490234375, -9.14453125, -8.76416015625, -8.3837890625, -8.00341796875, -7.623046875, -7.24267578125, -6.8623046875, -6.48193359375, -6.1015625, -5.72119140625, -5.3408203125, -4.96044921875, -4.580078125, -4.19970703125, -3.8193359375, -3.43896484375, -3.05859375, -2.67822265625, -2.2978515625, -1.91748046875, -1.537109375, -1.15673828125, -0.7763671875, -0.39599609375, -0.015625, 0.36474609375, 0.7451171875, 1.12548828125, 1.505859375, 1.88623046875, 2.2666015625, 2.64697265625, 3.02734375, 3.40771484375, 3.7880859375, 4.16845703125, 4.548828125, 4.92919921875, 5.3095703125, 5.68994140625, 6.0703125, 6.45068359375, 6.8310546875, 7.21142578125, 7.591796875, 7.97216796875, 8.3525390625, 8.73291015625, 9.11328125, 9.49365234375, 9.8740234375, 10.25439453125, 10.634765625, 11.01513671875, 11.3955078125, 11.77587890625, 12.15625]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 9.0, 9.0, 6.0, 8.0, 20.0, 12.0, 22.0, 28.0, 36.0, 31.0, 32.0, 52.0, 49.0, 55.0, 39.0, 59.0, 64.0, 61.0, 53.0, 45.0, 51.0, 44.0, 41.0, 37.0, 32.0, 21.0, 27.0, 16.0, 10.0, 9.0, 7.0, 2.0, 4.0, 5.0, 2.0, 0.0, 0.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.9375, -11.582763671875, -11.22802734375, -10.873291015625, -10.5185546875, -10.163818359375, -9.80908203125, -9.454345703125, -9.099609375, -8.744873046875, -8.39013671875, -8.035400390625, -7.6806640625, -7.325927734375, -6.97119140625, -6.616455078125, -6.26171875, -5.906982421875, -5.55224609375, -5.197509765625, -4.8427734375, -4.488037109375, -4.13330078125, -3.778564453125, -3.423828125, -3.069091796875, -2.71435546875, -2.359619140625, -2.0048828125, -1.650146484375, -1.29541015625, -0.940673828125, -0.5859375, -0.231201171875, 0.12353515625, 0.478271484375, 0.8330078125, 1.187744140625, 1.54248046875, 1.897216796875, 2.251953125, 2.606689453125, 2.96142578125, 3.316162109375, 3.6708984375, 4.025634765625, 4.38037109375, 4.735107421875, 5.08984375, 5.444580078125, 5.79931640625, 6.154052734375, 6.5087890625, 6.863525390625, 7.21826171875, 7.572998046875, 7.927734375, 8.282470703125, 8.63720703125, 8.991943359375, 9.3466796875, 9.701416015625, 10.05615234375, 10.410888671875, 10.765625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 7.0, 10.0, 25.0, 33.0, 65.0, 186.0, 632.0, 2868.0, 18881.0, 987956.0, 32790.0, 3855.0, 836.0, 240.0, 69.0, 42.0, 22.0, 17.0, 5.0, 5.0, 2.0, 0.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.8125, -17.116455078125, -16.42041015625, -15.724365234375, -15.0283203125, -14.332275390625, -13.63623046875, -12.940185546875, -12.244140625, -11.548095703125, -10.85205078125, -10.156005859375, -9.4599609375, -8.763916015625, -8.06787109375, -7.371826171875, -6.67578125, -5.979736328125, -5.28369140625, -4.587646484375, -3.8916015625, -3.195556640625, -2.49951171875, -1.803466796875, -1.107421875, -0.411376953125, 0.28466796875, 0.980712890625, 1.6767578125, 2.372802734375, 3.06884765625, 3.764892578125, 4.4609375, 5.156982421875, 5.85302734375, 6.549072265625, 7.2451171875, 7.941162109375, 8.63720703125, 9.333251953125, 10.029296875, 10.725341796875, 11.42138671875, 12.117431640625, 12.8134765625, 13.509521484375, 14.20556640625, 14.901611328125, 15.59765625, 16.293701171875, 16.98974609375, 17.685791015625, 18.3818359375, 19.077880859375, 19.77392578125, 20.469970703125, 21.166015625, 21.862060546875, 22.55810546875, 23.254150390625, 23.9501953125, 24.646240234375, 25.34228515625, 26.038330078125, 26.734375]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 4.0, 5.0, 3.0, 9.0, 19.0, 35.0, 66.0, 163.0, 294.0, 219.0, 85.0, 54.0, 20.0, 10.0, 8.0, 4.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010986328125, -0.0010479092597961426, -0.0009971857070922852, -0.0009464621543884277, -0.0008957386016845703, -0.0008450150489807129, -0.0007942914962768555, -0.000743567943572998, -0.0006928443908691406, -0.0006421208381652832, -0.0005913972854614258, -0.0005406737327575684, -0.0004899501800537109, -0.0004392266273498535, -0.0003885030746459961, -0.00033777952194213867, -0.00028705596923828125, -0.00023633241653442383, -0.0001856088638305664, -0.00013488531112670898, -8.416175842285156e-05, -3.343820571899414e-05, 1.728534698486328e-05, 6.80088996887207e-05, 0.00011873245239257812, 0.00016945600509643555, 0.00022017955780029297, 0.0002709031105041504, 0.0003216266632080078, 0.00037235021591186523, 0.00042307376861572266, 0.0004737973213195801, 0.0005245208740234375, 0.0005752444267272949, 0.0006259679794311523, 0.0006766915321350098, 0.0007274150848388672, 0.0007781386375427246, 0.000828862190246582, 0.0008795857429504395, 0.0009303092956542969, 0.0009810328483581543, 0.0010317564010620117, 0.0010824799537658691, 0.0011332035064697266, 0.001183927059173584, 0.0012346506118774414, 0.0012853741645812988, 0.0013360977172851562, 0.0013868212699890137, 0.001437544822692871, 0.0014882683753967285, 0.001538991928100586, 0.0015897154808044434, 0.0016404390335083008, 0.0016911625862121582, 0.0017418861389160156, 0.001792609691619873, 0.0018433332443237305, 0.0018940567970275879, 0.0019447803497314453, 0.0019955039024353027, 0.00204622745513916, 0.0020969510078430176, 0.002147674560546875]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 4.0, 7.0, 5.0, 11.0, 12.0, 31.0, 37.0, 80.0, 186.0, 573.0, 1969.0, 9263.0, 914322.0, 114280.0, 5675.0, 1393.0, 403.0, 169.0, 64.0, 18.0, 13.0, 7.0, 10.0, 4.0, 1.0, 3.0, 2.0, 2.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0], "bins": [-33.71875, -32.872802734375, -32.02685546875, -31.180908203125, -30.3349609375, -29.489013671875, -28.64306640625, -27.797119140625, -26.951171875, -26.105224609375, -25.25927734375, -24.413330078125, -23.5673828125, -22.721435546875, -21.87548828125, -21.029541015625, -20.18359375, -19.337646484375, -18.49169921875, -17.645751953125, -16.7998046875, -15.953857421875, -15.10791015625, -14.261962890625, -13.416015625, -12.570068359375, -11.72412109375, -10.878173828125, -10.0322265625, -9.186279296875, -8.34033203125, -7.494384765625, -6.6484375, -5.802490234375, -4.95654296875, -4.110595703125, -3.2646484375, -2.418701171875, -1.57275390625, -0.726806640625, 0.119140625, 0.965087890625, 1.81103515625, 2.656982421875, 3.5029296875, 4.348876953125, 5.19482421875, 6.040771484375, 6.88671875, 7.732666015625, 8.57861328125, 9.424560546875, 10.2705078125, 11.116455078125, 11.96240234375, 12.808349609375, 13.654296875, 14.500244140625, 15.34619140625, 16.192138671875, 17.0380859375, 17.884033203125, 18.72998046875, 19.575927734375, 20.421875]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 2.0, 7.0, 6.0, 9.0, 13.0, 20.0, 24.0, 48.0, 106.0, 207.0, 248.0, 115.0, 63.0, 35.0, 24.0, 11.0, 13.0, 8.0, 7.0, 2.0, 5.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-13.4765625, -13.140869140625, -12.80517578125, -12.469482421875, -12.1337890625, -11.798095703125, -11.46240234375, -11.126708984375, -10.791015625, -10.455322265625, -10.11962890625, -9.783935546875, -9.4482421875, -9.112548828125, -8.77685546875, -8.441162109375, -8.10546875, -7.769775390625, -7.43408203125, -7.098388671875, -6.7626953125, -6.427001953125, -6.09130859375, -5.755615234375, -5.419921875, -5.084228515625, -4.74853515625, -4.412841796875, -4.0771484375, -3.741455078125, -3.40576171875, -3.070068359375, -2.734375, -2.398681640625, -2.06298828125, -1.727294921875, -1.3916015625, -1.055908203125, -0.72021484375, -0.384521484375, -0.048828125, 0.286865234375, 0.62255859375, 0.958251953125, 1.2939453125, 1.629638671875, 1.96533203125, 2.301025390625, 2.63671875, 2.972412109375, 3.30810546875, 3.643798828125, 3.9794921875, 4.315185546875, 4.65087890625, 4.986572265625, 5.322265625, 5.657958984375, 5.99365234375, 6.329345703125, 6.6650390625, 7.000732421875, 7.33642578125, 7.672119140625, 8.0078125]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 7.0, 36.0, 138.0, 495.0, 268.0, 46.0, 17.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-435.25030517578125, -425.3087158203125, -415.3671569824219, -405.4255676269531, -395.4840087890625, -385.54241943359375, -375.6008605957031, -365.6592712402344, -355.71771240234375, -345.776123046875, -335.8345642089844, -325.8929748535156, -315.951416015625, -306.00982666015625, -296.0682678222656, -286.1266784667969, -276.18511962890625, -266.2435302734375, -256.3019714355469, -246.3603973388672, -236.4188232421875, -226.4772491455078, -216.53567504882812, -206.59408569335938, -196.65249633789062, -186.71092224121094, -176.76934814453125, -166.82777404785156, -156.88619995117188, -146.9446258544922, -137.0030517578125, -127.06147003173828, -117.11990356445312, -107.17832946777344, -97.23675537109375, -87.29518127441406, -77.35360717773438, -67.41203308105469, -57.47045135498047, -47.52887725830078, -37.587303161621094, -27.645729064941406, -17.704153060913086, -7.762577056884766, 2.178997039794922, 12.12057113647461, 22.062149047851562, 32.00372314453125, 41.94529724121094, 51.886871337890625, 61.82844543457031, 71.77001953125, 81.71159362792969, 91.65316772460938, 101.5947494506836, 111.53632354736328, 121.47789764404297, 131.4194793701172, 141.36105346679688, 151.30262756347656, 161.24420166015625, 171.18577575683594, 181.12734985351562, 191.0689239501953, 201.010498046875]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 8.0, 1.0, 4.0, 2.0, 6.0, 5.0, 13.0, 10.0, 4.0, 11.0, 4.0, 17.0, 11.0, 18.0, 20.0, 19.0, 22.0, 20.0, 37.0, 32.0, 35.0, 46.0, 39.0, 39.0, 45.0, 39.0, 31.0, 37.0, 43.0, 48.0, 28.0, 29.0, 27.0, 31.0, 29.0, 25.0, 20.0, 23.0, 24.0, 15.0, 17.0, 17.0, 12.0, 10.0, 8.0, 10.0, 6.0, 2.0, 7.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 4.0], "bins": [-54.1911506652832, -52.606178283691406, -51.021209716796875, -49.436241149902344, -47.85126876831055, -46.26629638671875, -44.68132781982422, -43.09635925292969, -41.51138687133789, -39.926414489746094, -38.34144592285156, -36.75647735595703, -35.171504974365234, -33.58653259277344, -32.001564025878906, -30.416593551635742, -28.831623077392578, -27.246652603149414, -25.66168212890625, -24.076711654663086, -22.491741180419922, -20.906770706176758, -19.321800231933594, -17.73682975769043, -16.151859283447266, -14.566888809204102, -12.981918334960938, -11.396947860717773, -9.81197738647461, -8.227006912231445, -6.642036437988281, -5.057065963745117, -3.4720916748046875, -1.8871212005615234, -0.3021507263183594, 1.2828197479248047, 2.8677902221679688, 4.452760696411133, 6.037731170654297, 7.622701644897461, 9.207672119140625, 10.792642593383789, 12.377613067626953, 13.962583541870117, 15.547554016113281, 17.132524490356445, 18.71749496459961, 20.302465438842773, 21.887435913085938, 23.4724063873291, 25.057376861572266, 26.64234733581543, 28.227317810058594, 29.812288284301758, 31.397258758544922, 32.98223114013672, 34.56719970703125, 36.15216827392578, 37.73714065551758, 39.322113037109375, 40.907081604003906, 42.49205017089844, 44.077022552490234, 45.66199493408203, 47.24696350097656]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 3.0, 6.0, 9.0, 3.0, 4.0, 4.0, 6.0, 9.0, 9.0, 18.0, 15.0, 30.0, 47.0, 56.0, 65.0, 108.0, 219.0, 380.0, 852.0, 2086.0, 5809.0, 20517.0, 141498.0, 3950805.0, 53861.0, 11755.0, 3549.0, 1366.0, 585.0, 271.0, 142.0, 79.0, 45.0, 30.0, 17.0, 11.0, 7.0, 3.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-29.546875, -28.8797607421875, -28.212646484375, -27.5455322265625, -26.87841796875, -26.2113037109375, -25.544189453125, -24.8770751953125, -24.2099609375, -23.5428466796875, -22.875732421875, -22.2086181640625, -21.54150390625, -20.8743896484375, -20.207275390625, -19.5401611328125, -18.873046875, -18.2059326171875, -17.538818359375, -16.8717041015625, -16.20458984375, -15.5374755859375, -14.870361328125, -14.2032470703125, -13.5361328125, -12.8690185546875, -12.201904296875, -11.5347900390625, -10.86767578125, -10.2005615234375, -9.533447265625, -8.8663330078125, -8.19921875, -7.5321044921875, -6.864990234375, -6.1978759765625, -5.53076171875, -4.8636474609375, -4.196533203125, -3.5294189453125, -2.8623046875, -2.1951904296875, -1.528076171875, -0.8609619140625, -0.19384765625, 0.4732666015625, 1.140380859375, 1.8074951171875, 2.474609375, 3.1417236328125, 3.808837890625, 4.4759521484375, 5.14306640625, 5.8101806640625, 6.477294921875, 7.1444091796875, 7.8115234375, 8.4786376953125, 9.145751953125, 9.8128662109375, 10.47998046875, 11.1470947265625, 11.814208984375, 12.4813232421875, 13.1484375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 13.0, 5.0, 10.0, 19.0, 20.0, 32.0, 27.0, 52.0, 52.0, 71.0, 56.0, 76.0, 86.0, 57.0, 73.0, 53.0, 64.0, 51.0, 47.0, 37.0, 24.0, 26.0, 12.0, 9.0, 13.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-4.94140625, -4.820159912109375, -4.69891357421875, -4.577667236328125, -4.4564208984375, -4.335174560546875, -4.21392822265625, -4.092681884765625, -3.971435546875, -3.850189208984375, -3.72894287109375, -3.607696533203125, -3.4864501953125, -3.365203857421875, -3.24395751953125, -3.122711181640625, -3.00146484375, -2.880218505859375, -2.75897216796875, -2.637725830078125, -2.5164794921875, -2.395233154296875, -2.27398681640625, -2.152740478515625, -2.031494140625, -1.910247802734375, -1.78900146484375, -1.667755126953125, -1.5465087890625, -1.425262451171875, -1.30401611328125, -1.182769775390625, -1.0615234375, -0.940277099609375, -0.81903076171875, -0.697784423828125, -0.5765380859375, -0.455291748046875, -0.33404541015625, -0.212799072265625, -0.091552734375, 0.029693603515625, 0.15093994140625, 0.272186279296875, 0.3934326171875, 0.514678955078125, 0.63592529296875, 0.757171630859375, 0.87841796875, 0.999664306640625, 1.12091064453125, 1.242156982421875, 1.3634033203125, 1.484649658203125, 1.60589599609375, 1.727142333984375, 1.848388671875, 1.969635009765625, 2.09088134765625, 2.212127685546875, 2.3333740234375, 2.454620361328125, 2.57586669921875, 2.697113037109375, 2.818359375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 5.0, 6.0, 4.0, 7.0, 16.0, 29.0, 49.0, 97.0, 220.0, 468.0, 1165.0, 3035.0, 10823.0, 68125.0, 3946228.0, 140782.0, 16412.0, 4205.0, 1522.0, 621.0, 214.0, 110.0, 65.0, 32.0, 21.0, 7.0, 4.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.96875, -18.256591796875, -17.54443359375, -16.832275390625, -16.1201171875, -15.407958984375, -14.69580078125, -13.983642578125, -13.271484375, -12.559326171875, -11.84716796875, -11.135009765625, -10.4228515625, -9.710693359375, -8.99853515625, -8.286376953125, -7.57421875, -6.862060546875, -6.14990234375, -5.437744140625, -4.7255859375, -4.013427734375, -3.30126953125, -2.589111328125, -1.876953125, -1.164794921875, -0.45263671875, 0.259521484375, 0.9716796875, 1.683837890625, 2.39599609375, 3.108154296875, 3.8203125, 4.532470703125, 5.24462890625, 5.956787109375, 6.6689453125, 7.381103515625, 8.09326171875, 8.805419921875, 9.517578125, 10.229736328125, 10.94189453125, 11.654052734375, 12.3662109375, 13.078369140625, 13.79052734375, 14.502685546875, 15.21484375, 15.927001953125, 16.63916015625, 17.351318359375, 18.0634765625, 18.775634765625, 19.48779296875, 20.199951171875, 20.912109375, 21.624267578125, 22.33642578125, 23.048583984375, 23.7607421875, 24.472900390625, 25.18505859375, 25.897216796875, 26.609375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 4.0, 0.0, 2.0, 3.0, 3.0, 3.0, 1.0, 4.0, 3.0, 1.0, 5.0, 10.0, 9.0, 7.0, 18.0, 23.0, 22.0, 33.0, 42.0, 74.0, 106.0, 308.0, 2794.0, 250.0, 102.0, 72.0, 56.0, 36.0, 19.0, 12.0, 14.0, 9.0, 10.0, 3.0, 5.0, 4.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-5.359375, -5.15673828125, -4.9541015625, -4.75146484375, -4.548828125, -4.34619140625, -4.1435546875, -3.94091796875, -3.73828125, -3.53564453125, -3.3330078125, -3.13037109375, -2.927734375, -2.72509765625, -2.5224609375, -2.31982421875, -2.1171875, -1.91455078125, -1.7119140625, -1.50927734375, -1.306640625, -1.10400390625, -0.9013671875, -0.69873046875, -0.49609375, -0.29345703125, -0.0908203125, 0.11181640625, 0.314453125, 0.51708984375, 0.7197265625, 0.92236328125, 1.125, 1.32763671875, 1.5302734375, 1.73291015625, 1.935546875, 2.13818359375, 2.3408203125, 2.54345703125, 2.74609375, 2.94873046875, 3.1513671875, 3.35400390625, 3.556640625, 3.75927734375, 3.9619140625, 4.16455078125, 4.3671875, 4.56982421875, 4.7724609375, 4.97509765625, 5.177734375, 5.38037109375, 5.5830078125, 5.78564453125, 5.98828125, 6.19091796875, 6.3935546875, 6.59619140625, 6.798828125, 7.00146484375, 7.2041015625, 7.40673828125, 7.609375]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 5.0, 7.0, 8.0, 56.0, 173.0, 346.0, 265.0, 94.0, 25.0, 16.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-113.9105224609375, -111.41545867919922, -108.9203872680664, -106.42532348632812, -103.93025207519531, -101.43518829345703, -98.94011688232422, -96.44505310058594, -93.94998168945312, -91.45491790771484, -88.95984649658203, -86.46478271484375, -83.96971130371094, -81.47464752197266, -78.97957611083984, -76.48451232910156, -73.98944854736328, -71.494384765625, -68.99931335449219, -66.5042495727539, -64.0091781616211, -61.51411437988281, -59.019046783447266, -56.52397918701172, -54.02891159057617, -51.533843994140625, -49.03877639770508, -46.54370880126953, -44.04864501953125, -41.55357360839844, -39.058509826660156, -36.56344223022461, -34.06836700439453, -31.573299407958984, -29.078231811523438, -26.583166122436523, -24.088098526000977, -21.59303092956543, -19.097965240478516, -16.60289764404297, -14.107830047607422, -11.612762451171875, -9.117695808410645, -6.622628688812256, -4.127561569213867, -1.6324939727783203, 0.8625726699829102, 3.3576393127441406, 5.8527069091796875, 8.347774505615234, 10.842841148376465, 13.337907791137695, 15.832975387573242, 18.32804298400879, 20.823108673095703, 23.31817626953125, 25.813243865966797, 28.308311462402344, 30.80337905883789, 33.29844665527344, 35.79351043701172, 38.28858184814453, 40.78364562988281, 43.27871322631836, 45.773780822753906]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 3.0, 4.0, 4.0, 5.0, 4.0, 10.0, 13.0, 10.0, 21.0, 25.0, 24.0, 28.0, 31.0, 39.0, 31.0, 39.0, 44.0, 52.0, 42.0, 47.0, 51.0, 52.0, 43.0, 61.0, 46.0, 44.0, 34.0, 33.0, 28.0, 25.0, 15.0, 17.0, 25.0, 11.0, 15.0, 6.0, 5.0, 13.0, 0.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.497329711914062, -18.81647491455078, -18.1356201171875, -17.45476531982422, -16.773910522460938, -16.093055725097656, -15.412200927734375, -14.731346130371094, -14.050491333007812, -13.369636535644531, -12.68878173828125, -12.007926940917969, -11.327072143554688, -10.646217346191406, -9.965362548828125, -9.284507751464844, -8.603652954101562, -7.922798156738281, -7.241943359375, -6.561088562011719, -5.8802337646484375, -5.199378967285156, -4.518524169921875, -3.8376693725585938, -3.1568145751953125, -2.4759597778320312, -1.79510498046875, -1.1142501831054688, -0.4333953857421875, 0.24745941162109375, 0.928314208984375, 1.6091690063476562, 2.2900238037109375, 2.9708786010742188, 3.6517333984375, 4.332588195800781, 5.0134429931640625, 5.694297790527344, 6.375152587890625, 7.056007385253906, 7.7368621826171875, 8.417716979980469, 9.09857177734375, 9.779426574707031, 10.460281372070312, 11.141136169433594, 11.821990966796875, 12.502845764160156, 13.183700561523438, 13.864555358886719, 14.54541015625, 15.226264953613281, 15.907119750976562, 16.587974548339844, 17.268829345703125, 17.949684143066406, 18.630538940429688, 19.31139373779297, 19.99224853515625, 20.67310333251953, 21.353958129882812, 22.034812927246094, 22.715667724609375, 23.396522521972656, 24.077377319335938]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 4.0, 10.0, 5.0, 9.0, 12.0, 22.0, 26.0, 51.0, 101.0, 201.0, 405.0, 996.0, 3258.0, 13270.0, 70392.0, 510564.0, 382089.0, 52490.0, 10307.0, 2733.0, 855.0, 364.0, 165.0, 87.0, 46.0, 28.0, 18.0, 16.0, 7.0, 10.0, 4.0, 4.0, 6.0, 0.0, 1.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0], "bins": [-21.796875, -21.23681640625, -20.6767578125, -20.11669921875, -19.556640625, -18.99658203125, -18.4365234375, -17.87646484375, -17.31640625, -16.75634765625, -16.1962890625, -15.63623046875, -15.076171875, -14.51611328125, -13.9560546875, -13.39599609375, -12.8359375, -12.27587890625, -11.7158203125, -11.15576171875, -10.595703125, -10.03564453125, -9.4755859375, -8.91552734375, -8.35546875, -7.79541015625, -7.2353515625, -6.67529296875, -6.115234375, -5.55517578125, -4.9951171875, -4.43505859375, -3.875, -3.31494140625, -2.7548828125, -2.19482421875, -1.634765625, -1.07470703125, -0.5146484375, 0.04541015625, 0.60546875, 1.16552734375, 1.7255859375, 2.28564453125, 2.845703125, 3.40576171875, 3.9658203125, 4.52587890625, 5.0859375, 5.64599609375, 6.2060546875, 6.76611328125, 7.326171875, 7.88623046875, 8.4462890625, 9.00634765625, 9.56640625, 10.12646484375, 10.6865234375, 11.24658203125, 11.806640625, 12.36669921875, 12.9267578125, 13.48681640625, 14.046875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 4.0, 3.0, 5.0, 9.0, 10.0, 12.0, 12.0, 29.0, 29.0, 39.0, 55.0, 45.0, 60.0, 66.0, 74.0, 78.0, 75.0, 62.0, 61.0, 56.0, 43.0, 54.0, 36.0, 29.0, 14.0, 14.0, 14.0, 8.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-5.296875, -5.16632080078125, -5.0357666015625, -4.90521240234375, -4.774658203125, -4.64410400390625, -4.5135498046875, -4.38299560546875, -4.25244140625, -4.12188720703125, -3.9913330078125, -3.86077880859375, -3.730224609375, -3.59967041015625, -3.4691162109375, -3.33856201171875, -3.2080078125, -3.07745361328125, -2.9468994140625, -2.81634521484375, -2.685791015625, -2.55523681640625, -2.4246826171875, -2.29412841796875, -2.16357421875, -2.03302001953125, -1.9024658203125, -1.77191162109375, -1.641357421875, -1.51080322265625, -1.3802490234375, -1.24969482421875, -1.119140625, -0.98858642578125, -0.8580322265625, -0.72747802734375, -0.596923828125, -0.46636962890625, -0.3358154296875, -0.20526123046875, -0.07470703125, 0.05584716796875, 0.1864013671875, 0.31695556640625, 0.447509765625, 0.57806396484375, 0.7086181640625, 0.83917236328125, 0.9697265625, 1.10028076171875, 1.2308349609375, 1.36138916015625, 1.491943359375, 1.62249755859375, 1.7530517578125, 1.88360595703125, 2.01416015625, 2.14471435546875, 2.2752685546875, 2.40582275390625, 2.536376953125, 2.66693115234375, 2.7974853515625, 2.92803955078125, 3.05859375]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 5.0, 7.0, 11.0, 7.0, 21.0, 37.0, 42.0, 75.0, 106.0, 245.0, 452.0, 1074.0, 3029.0, 9591.0, 40350.0, 257145.0, 606541.0, 101339.0, 19858.0, 5293.0, 1793.0, 756.0, 343.0, 159.0, 101.0, 71.0, 35.0, 20.0, 7.0, 8.0, 10.0, 8.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.234375, -11.7685546875, -11.302734375, -10.8369140625, -10.37109375, -9.9052734375, -9.439453125, -8.9736328125, -8.5078125, -8.0419921875, -7.576171875, -7.1103515625, -6.64453125, -6.1787109375, -5.712890625, -5.2470703125, -4.78125, -4.3154296875, -3.849609375, -3.3837890625, -2.91796875, -2.4521484375, -1.986328125, -1.5205078125, -1.0546875, -0.5888671875, -0.123046875, 0.3427734375, 0.80859375, 1.2744140625, 1.740234375, 2.2060546875, 2.671875, 3.1376953125, 3.603515625, 4.0693359375, 4.53515625, 5.0009765625, 5.466796875, 5.9326171875, 6.3984375, 6.8642578125, 7.330078125, 7.7958984375, 8.26171875, 8.7275390625, 9.193359375, 9.6591796875, 10.125, 10.5908203125, 11.056640625, 11.5224609375, 11.98828125, 12.4541015625, 12.919921875, 13.3857421875, 13.8515625, 14.3173828125, 14.783203125, 15.2490234375, 15.71484375, 16.1806640625, 16.646484375, 17.1123046875, 17.578125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 8.0, 7.0, 12.0, 10.0, 9.0, 18.0, 14.0, 22.0, 31.0, 29.0, 34.0, 41.0, 54.0, 42.0, 51.0, 65.0, 51.0, 58.0, 51.0, 49.0, 47.0, 48.0, 41.0, 37.0, 42.0, 28.0, 20.0, 17.0, 12.0, 10.0, 6.0, 5.0, 6.0, 4.0, 7.0, 5.0, 2.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.84375, -13.3642578125, -12.884765625, -12.4052734375, -11.92578125, -11.4462890625, -10.966796875, -10.4873046875, -10.0078125, -9.5283203125, -9.048828125, -8.5693359375, -8.08984375, -7.6103515625, -7.130859375, -6.6513671875, -6.171875, -5.6923828125, -5.212890625, -4.7333984375, -4.25390625, -3.7744140625, -3.294921875, -2.8154296875, -2.3359375, -1.8564453125, -1.376953125, -0.8974609375, -0.41796875, 0.0615234375, 0.541015625, 1.0205078125, 1.5, 1.9794921875, 2.458984375, 2.9384765625, 3.41796875, 3.8974609375, 4.376953125, 4.8564453125, 5.3359375, 5.8154296875, 6.294921875, 6.7744140625, 7.25390625, 7.7333984375, 8.212890625, 8.6923828125, 9.171875, 9.6513671875, 10.130859375, 10.6103515625, 11.08984375, 11.5693359375, 12.048828125, 12.5283203125, 13.0078125, 13.4873046875, 13.966796875, 14.4462890625, 14.92578125, 15.4052734375, 15.884765625, 16.3642578125, 16.84375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 7.0, 2.0, 5.0, 9.0, 13.0, 20.0, 32.0, 49.0, 105.0, 221.0, 424.0, 1061.0, 3175.0, 12692.0, 113763.0, 788997.0, 110569.0, 12447.0, 3026.0, 1073.0, 436.0, 193.0, 100.0, 54.0, 23.0, 17.0, 17.0, 8.0, 7.0, 4.0, 2.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.5078125, -6.2794189453125, -6.051025390625, -5.8226318359375, -5.59423828125, -5.3658447265625, -5.137451171875, -4.9090576171875, -4.6806640625, -4.4522705078125, -4.223876953125, -3.9954833984375, -3.76708984375, -3.5386962890625, -3.310302734375, -3.0819091796875, -2.853515625, -2.6251220703125, -2.396728515625, -2.1683349609375, -1.93994140625, -1.7115478515625, -1.483154296875, -1.2547607421875, -1.0263671875, -0.7979736328125, -0.569580078125, -0.3411865234375, -0.11279296875, 0.1156005859375, 0.343994140625, 0.5723876953125, 0.80078125, 1.0291748046875, 1.257568359375, 1.4859619140625, 1.71435546875, 1.9427490234375, 2.171142578125, 2.3995361328125, 2.6279296875, 2.8563232421875, 3.084716796875, 3.3131103515625, 3.54150390625, 3.7698974609375, 3.998291015625, 4.2266845703125, 4.455078125, 4.6834716796875, 4.911865234375, 5.1402587890625, 5.36865234375, 5.5970458984375, 5.825439453125, 6.0538330078125, 6.2822265625, 6.5106201171875, 6.739013671875, 6.9674072265625, 7.19580078125, 7.4241943359375, 7.652587890625, 7.8809814453125, 8.109375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 2.0, 4.0, 8.0, 8.0, 7.0, 11.0, 20.0, 28.0, 56.0, 57.0, 102.0, 137.0, 142.0, 119.0, 100.0, 63.0, 43.0, 27.0, 20.0, 15.0, 11.0, 4.0, 7.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00119781494140625, -0.001165740191936493, -0.0011336654424667358, -0.0011015906929969788, -0.0010695159435272217, -0.0010374411940574646, -0.0010053664445877075, -0.0009732916951179504, -0.0009412169456481934, -0.0009091421961784363, -0.0008770674467086792, -0.0008449926972389221, -0.000812917947769165, -0.000780843198299408, -0.0007487684488296509, -0.0007166936993598938, -0.0006846189498901367, -0.0006525442004203796, -0.0006204694509506226, -0.0005883947014808655, -0.0005563199520111084, -0.0005242452025413513, -0.0004921704530715942, -0.00046009570360183716, -0.0004280209541320801, -0.000395946204662323, -0.0003638714551925659, -0.00033179670572280884, -0.00029972195625305176, -0.0002676472067832947, -0.0002355724573135376, -0.00020349770784378052, -0.00017142295837402344, -0.00013934820890426636, -0.00010727345943450928, -7.51987099647522e-05, -4.312396049499512e-05, -1.1049211025238037e-05, 2.1025538444519043e-05, 5.310028791427612e-05, 8.51750373840332e-05, 0.00011724978685379028, 0.00014932453632354736, 0.00018139928579330444, 0.00021347403526306152, 0.0002455487847328186, 0.0002776235342025757, 0.00030969828367233276, 0.00034177303314208984, 0.0003738477826118469, 0.000405922532081604, 0.0004379972815513611, 0.00047007203102111816, 0.0005021467804908752, 0.0005342215299606323, 0.0005662962794303894, 0.0005983710289001465, 0.0006304457783699036, 0.0006625205278396606, 0.0006945952773094177, 0.0007266700267791748, 0.0007587447762489319, 0.000790819525718689, 0.000822894275188446, 0.0008549690246582031]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 6.0, 12.0, 14.0, 19.0, 44.0, 69.0, 103.0, 257.0, 638.0, 1715.0, 6497.0, 34186.0, 482171.0, 479314.0, 34357.0, 6291.0, 1713.0, 612.0, 270.0, 94.0, 59.0, 40.0, 23.0, 12.0, 8.0, 8.0, 8.0, 4.0, 5.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.01171875, -6.76129150390625, -6.5108642578125, -6.26043701171875, -6.010009765625, -5.75958251953125, -5.5091552734375, -5.25872802734375, -5.00830078125, -4.75787353515625, -4.5074462890625, -4.25701904296875, -4.006591796875, -3.75616455078125, -3.5057373046875, -3.25531005859375, -3.0048828125, -2.75445556640625, -2.5040283203125, -2.25360107421875, -2.003173828125, -1.75274658203125, -1.5023193359375, -1.25189208984375, -1.00146484375, -0.75103759765625, -0.5006103515625, -0.25018310546875, 0.000244140625, 0.25067138671875, 0.5010986328125, 0.75152587890625, 1.001953125, 1.25238037109375, 1.5028076171875, 1.75323486328125, 2.003662109375, 2.25408935546875, 2.5045166015625, 2.75494384765625, 3.00537109375, 3.25579833984375, 3.5062255859375, 3.75665283203125, 4.007080078125, 4.25750732421875, 4.5079345703125, 4.75836181640625, 5.0087890625, 5.25921630859375, 5.5096435546875, 5.76007080078125, 6.010498046875, 6.26092529296875, 6.5113525390625, 6.76177978515625, 7.01220703125, 7.26263427734375, 7.5130615234375, 7.76348876953125, 8.013916015625, 8.26434326171875, 8.5147705078125, 8.76519775390625, 9.015625]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 4.0, 7.0, 6.0, 11.0, 12.0, 25.0, 24.0, 39.0, 71.0, 91.0, 120.0, 116.0, 134.0, 98.0, 63.0, 41.0, 30.0, 29.0, 19.0, 16.0, 7.0, 11.0, 8.0, 7.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.671875, -6.46783447265625, -6.2637939453125, -6.05975341796875, -5.855712890625, -5.65167236328125, -5.4476318359375, -5.24359130859375, -5.03955078125, -4.83551025390625, -4.6314697265625, -4.42742919921875, -4.223388671875, -4.01934814453125, -3.8153076171875, -3.61126708984375, -3.4072265625, -3.20318603515625, -2.9991455078125, -2.79510498046875, -2.591064453125, -2.38702392578125, -2.1829833984375, -1.97894287109375, -1.77490234375, -1.57086181640625, -1.3668212890625, -1.16278076171875, -0.958740234375, -0.75469970703125, -0.5506591796875, -0.34661865234375, -0.142578125, 0.06146240234375, 0.2655029296875, 0.46954345703125, 0.673583984375, 0.87762451171875, 1.0816650390625, 1.28570556640625, 1.48974609375, 1.69378662109375, 1.8978271484375, 2.10186767578125, 2.305908203125, 2.50994873046875, 2.7139892578125, 2.91802978515625, 3.1220703125, 3.32611083984375, 3.5301513671875, 3.73419189453125, 3.938232421875, 4.14227294921875, 4.3463134765625, 4.55035400390625, 4.75439453125, 4.95843505859375, 5.1624755859375, 5.36651611328125, 5.570556640625, 5.77459716796875, 5.9786376953125, 6.18267822265625, 6.38671875]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 5.0, 5.0, 15.0, 34.0, 55.0, 137.0, 220.0, 223.0, 142.0, 80.0, 38.0, 28.0, 11.0, 1.0, 4.0, 2.0, 0.0, 0.0, 5.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.68299102783203, -89.24522399902344, -84.80745697021484, -80.36968994140625, -75.93192291259766, -71.49415588378906, -67.05638885498047, -62.618621826171875, -58.18085479736328, -53.74308776855469, -49.305320739746094, -44.8675537109375, -40.429786682128906, -35.99201965332031, -31.55425262451172, -27.116485595703125, -22.67871856689453, -18.240951538085938, -13.803184509277344, -9.36541748046875, -4.927650451660156, -0.4898834228515625, 3.9478836059570312, 8.385650634765625, 12.823417663574219, 17.261184692382812, 21.698951721191406, 26.13671875, 30.574485778808594, 35.01225280761719, 39.45001983642578, 43.887786865234375, 48.32554626464844, 52.76331329345703, 57.201080322265625, 61.63884735107422, 66.07661437988281, 70.5143814086914, 74.9521484375, 79.3899154663086, 83.82768249511719, 88.26544952392578, 92.70321655273438, 97.14098358154297, 101.57875061035156, 106.01651763916016, 110.45428466796875, 114.89205169677734, 119.32981872558594, 123.76758575439453, 128.20535278320312, 132.64312744140625, 137.0808868408203, 141.51864624023438, 145.9564208984375, 150.39419555664062, 154.8319549560547, 159.26971435546875, 163.70748901367188, 168.145263671875, 172.58302307128906, 177.02078247070312, 181.45855712890625, 185.89633178710938, 190.33409118652344]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 8.0, 10.0, 11.0, 9.0, 17.0, 13.0, 19.0, 22.0, 18.0, 33.0, 31.0, 33.0, 36.0, 33.0, 52.0, 45.0, 58.0, 43.0, 52.0, 47.0, 58.0, 41.0, 37.0, 39.0, 36.0, 36.0, 19.0, 37.0, 30.0, 21.0, 10.0, 14.0, 10.0, 10.0, 3.0, 7.0, 3.0, 2.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-86.352294921875, -83.84137725830078, -81.33045196533203, -78.81953430175781, -76.3086166381836, -73.79769897460938, -71.28677368164062, -68.7758560180664, -66.26493835449219, -63.7540168762207, -61.243099212646484, -58.732177734375, -56.22126007080078, -53.7103385925293, -51.19941711425781, -48.688499450683594, -46.17757797241211, -43.666656494140625, -41.155738830566406, -38.64481735229492, -36.1338996887207, -33.62297821044922, -31.112058639526367, -28.601139068603516, -26.090219497680664, -23.579299926757812, -21.06838035583496, -18.55746078491211, -16.046539306640625, -13.53562068939209, -11.024700164794922, -8.51378059387207, -6.002861022949219, -3.491941213607788, -0.9810214042663574, 1.5298986434936523, 4.040818214416504, 6.5517377853393555, 9.062658309936523, 11.573577880859375, 14.084497451782227, 16.595417022705078, 19.10633659362793, 21.61725616455078, 24.128177642822266, 26.639095306396484, 29.15001678466797, 31.66093635559082, 34.17185592651367, 36.682777404785156, 39.193695068359375, 41.70461654663086, 44.21553421020508, 46.72645568847656, 49.23737335205078, 51.748294830322266, 54.25921630859375, 56.770137786865234, 59.28105545043945, 61.79197692871094, 64.30289459228516, 66.81381225585938, 69.32473754882812, 71.83565521240234, 74.34657287597656]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 14.0, 22.0, 63.0, 150.0, 407.0, 1295.0, 7746.0, 4166541.0, 15292.0, 1912.0, 523.0, 189.0, 77.0, 33.0, 13.0, 5.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-56.4375, -55.252685546875, -54.06787109375, -52.883056640625, -51.6982421875, -50.513427734375, -49.32861328125, -48.143798828125, -46.958984375, -45.774169921875, -44.58935546875, -43.404541015625, -42.2197265625, -41.034912109375, -39.85009765625, -38.665283203125, -37.48046875, -36.295654296875, -35.11083984375, -33.926025390625, -32.7412109375, -31.556396484375, -30.37158203125, -29.186767578125, -28.001953125, -26.817138671875, -25.63232421875, -24.447509765625, -23.2626953125, -22.077880859375, -20.89306640625, -19.708251953125, -18.5234375, -17.338623046875, -16.15380859375, -14.968994140625, -13.7841796875, -12.599365234375, -11.41455078125, -10.229736328125, -9.044921875, -7.860107421875, -6.67529296875, -5.490478515625, -4.3056640625, -3.120849609375, -1.93603515625, -0.751220703125, 0.43359375, 1.618408203125, 2.80322265625, 3.988037109375, 5.1728515625, 6.357666015625, 7.54248046875, 8.727294921875, 9.912109375, 11.096923828125, 12.28173828125, 13.466552734375, 14.6513671875, 15.836181640625, 17.02099609375, 18.205810546875, 19.390625]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 4.0, 5.0, 10.0, 9.0, 14.0, 27.0, 26.0, 35.0, 38.0, 53.0, 48.0, 59.0, 67.0, 69.0, 69.0, 75.0, 60.0, 58.0, 55.0, 53.0, 39.0, 28.0, 25.0, 24.0, 18.0, 15.0, 6.0, 6.0, 3.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.03515625, -5.878173828125, -5.72119140625, -5.564208984375, -5.4072265625, -5.250244140625, -5.09326171875, -4.936279296875, -4.779296875, -4.622314453125, -4.46533203125, -4.308349609375, -4.1513671875, -3.994384765625, -3.83740234375, -3.680419921875, -3.5234375, -3.366455078125, -3.20947265625, -3.052490234375, -2.8955078125, -2.738525390625, -2.58154296875, -2.424560546875, -2.267578125, -2.110595703125, -1.95361328125, -1.796630859375, -1.6396484375, -1.482666015625, -1.32568359375, -1.168701171875, -1.01171875, -0.854736328125, -0.69775390625, -0.540771484375, -0.3837890625, -0.226806640625, -0.06982421875, 0.087158203125, 0.244140625, 0.401123046875, 0.55810546875, 0.715087890625, 0.8720703125, 1.029052734375, 1.18603515625, 1.343017578125, 1.5, 1.656982421875, 1.81396484375, 1.970947265625, 2.1279296875, 2.284912109375, 2.44189453125, 2.598876953125, 2.755859375, 2.912841796875, 3.06982421875, 3.226806640625, 3.3837890625, 3.540771484375, 3.69775390625, 3.854736328125, 4.01171875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 9.0, 9.0, 11.0, 17.0, 21.0, 33.0, 57.0, 64.0, 113.0, 135.0, 248.0, 356.0, 586.0, 1014.0, 1908.0, 4247.0, 13521.0, 4083685.0, 70529.0, 10332.0, 3483.0, 1601.0, 873.0, 512.0, 322.0, 195.0, 137.0, 88.0, 62.0, 36.0, 19.0, 21.0, 16.0, 7.0, 5.0, 6.0, 6.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-17.921875, -17.279541015625, -16.63720703125, -15.994873046875, -15.3525390625, -14.710205078125, -14.06787109375, -13.425537109375, -12.783203125, -12.140869140625, -11.49853515625, -10.856201171875, -10.2138671875, -9.571533203125, -8.92919921875, -8.286865234375, -7.64453125, -7.002197265625, -6.35986328125, -5.717529296875, -5.0751953125, -4.432861328125, -3.79052734375, -3.148193359375, -2.505859375, -1.863525390625, -1.22119140625, -0.578857421875, 0.0634765625, 0.705810546875, 1.34814453125, 1.990478515625, 2.6328125, 3.275146484375, 3.91748046875, 4.559814453125, 5.2021484375, 5.844482421875, 6.48681640625, 7.129150390625, 7.771484375, 8.413818359375, 9.05615234375, 9.698486328125, 10.3408203125, 10.983154296875, 11.62548828125, 12.267822265625, 12.91015625, 13.552490234375, 14.19482421875, 14.837158203125, 15.4794921875, 16.121826171875, 16.76416015625, 17.406494140625, 18.048828125, 18.691162109375, 19.33349609375, 19.975830078125, 20.6181640625, 21.260498046875, 21.90283203125, 22.545166015625, 23.1875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 6.0, 3.0, 6.0, 6.0, 10.0, 13.0, 36.0, 3902.0, 43.0, 12.0, 9.0, 13.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.62109375, -5.482177734375, -5.34326171875, -5.204345703125, -5.0654296875, -4.926513671875, -4.78759765625, -4.648681640625, -4.509765625, -4.370849609375, -4.23193359375, -4.093017578125, -3.9541015625, -3.815185546875, -3.67626953125, -3.537353515625, -3.3984375, -3.259521484375, -3.12060546875, -2.981689453125, -2.8427734375, -2.703857421875, -2.56494140625, -2.426025390625, -2.287109375, -2.148193359375, -2.00927734375, -1.870361328125, -1.7314453125, -1.592529296875, -1.45361328125, -1.314697265625, -1.17578125, -1.036865234375, -0.89794921875, -0.759033203125, -0.6201171875, -0.481201171875, -0.34228515625, -0.203369140625, -0.064453125, 0.074462890625, 0.21337890625, 0.352294921875, 0.4912109375, 0.630126953125, 0.76904296875, 0.907958984375, 1.046875, 1.185791015625, 1.32470703125, 1.463623046875, 1.6025390625, 1.741455078125, 1.88037109375, 2.019287109375, 2.158203125, 2.297119140625, 2.43603515625, 2.574951171875, 2.7138671875, 2.852783203125, 2.99169921875, 3.130615234375, 3.26953125]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 5.0, 16.0, 22.0, 49.0, 64.0, 151.0, 204.0, 202.0, 130.0, 85.0, 33.0, 26.0, 6.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-24.159090042114258, -23.636138916015625, -23.113189697265625, -22.590240478515625, -22.067289352416992, -21.54433822631836, -21.02138900756836, -20.49843978881836, -19.975488662719727, -19.452537536621094, -18.929588317871094, -18.406639099121094, -17.88368797302246, -17.360736846923828, -16.837787628173828, -16.314838409423828, -15.791887283325195, -15.268937110900879, -14.745986938476562, -14.223036766052246, -13.70008659362793, -13.177136421203613, -12.654186248779297, -12.13123607635498, -11.608285903930664, -11.085335731506348, -10.562385559082031, -10.039435386657715, -9.516485214233398, -8.993535041809082, -8.470584869384766, -7.947634696960449, -7.424686431884766, -6.901736259460449, -6.378786087036133, -5.855835914611816, -5.3328857421875, -4.809935569763184, -4.286985397338867, -3.764035224914551, -3.2410850524902344, -2.718134880065918, -2.1951847076416016, -1.6722345352172852, -1.1492843627929688, -0.6263341903686523, -0.10338401794433594, 0.41956615447998047, 0.9425163269042969, 1.4654664993286133, 1.9884166717529297, 2.511366844177246, 3.0343170166015625, 3.557267189025879, 4.080217361450195, 4.603167533874512, 5.126117706298828, 5.6490678787231445, 6.172018051147461, 6.694968223571777, 7.217918395996094, 7.74086856842041, 8.263818740844727, 8.786768913269043, 9.30971908569336]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 6.0, 12.0, 11.0, 11.0, 9.0, 9.0, 19.0, 12.0, 25.0, 24.0, 19.0, 27.0, 27.0, 33.0, 36.0, 31.0, 33.0, 30.0, 38.0, 42.0, 43.0, 57.0, 43.0, 35.0, 56.0, 35.0, 37.0, 30.0, 36.0, 28.0, 20.0, 23.0, 22.0, 9.0, 11.0, 16.0, 7.0, 9.0, 8.0, 12.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.569635391235352, -6.362862586975098, -6.1560893058776855, -5.949316024780273, -5.7425432205200195, -5.535770416259766, -5.3289971351623535, -5.122223854064941, -4.9154510498046875, -4.708678245544434, -4.5019049644470215, -4.295131683349609, -4.0883588790893555, -3.8815858364105225, -3.6748127937316895, -3.4680397510528564, -3.2612667083740234, -3.0544936656951904, -2.8477206230163574, -2.6409475803375244, -2.4341745376586914, -2.2274014949798584, -2.0206284523010254, -1.8138554096221924, -1.6070823669433594, -1.4003093242645264, -1.1935362815856934, -0.9867632389068604, -0.7799901962280273, -0.5732171535491943, -0.36644411087036133, -0.15967106819152832, 0.04710197448730469, 0.2538750171661377, 0.4606480598449707, 0.6674211025238037, 0.8741941452026367, 1.0809671878814697, 1.2877402305603027, 1.4945132732391357, 1.7012863159179688, 1.9080593585968018, 2.1148324012756348, 2.3216054439544678, 2.528378486633301, 2.735151529312134, 2.941924571990967, 3.1486976146698, 3.355470657348633, 3.562243700027466, 3.769016742706299, 3.975789785385132, 4.182562828063965, 4.389335632324219, 4.596108913421631, 4.802882194519043, 5.009654998779297, 5.216427803039551, 5.423201084136963, 5.629974365234375, 5.836747169494629, 6.043519973754883, 6.250293254852295, 6.457066535949707, 6.663839340209961]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 2.0, 3.0, 5.0, 2.0, 9.0, 8.0, 16.0, 25.0, 38.0, 47.0, 86.0, 163.0, 264.0, 605.0, 1381.0, 3545.0, 11045.0, 37656.0, 148890.0, 544466.0, 224919.0, 52310.0, 14933.0, 4808.0, 1807.0, 737.0, 378.0, 147.0, 103.0, 53.0, 27.0, 28.0, 9.0, 9.0, 12.0, 5.0, 2.0, 5.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 3.0], "bins": [-15.265625, -14.848876953125, -14.43212890625, -14.015380859375, -13.5986328125, -13.181884765625, -12.76513671875, -12.348388671875, -11.931640625, -11.514892578125, -11.09814453125, -10.681396484375, -10.2646484375, -9.847900390625, -9.43115234375, -9.014404296875, -8.59765625, -8.180908203125, -7.76416015625, -7.347412109375, -6.9306640625, -6.513916015625, -6.09716796875, -5.680419921875, -5.263671875, -4.846923828125, -4.43017578125, -4.013427734375, -3.5966796875, -3.179931640625, -2.76318359375, -2.346435546875, -1.9296875, -1.512939453125, -1.09619140625, -0.679443359375, -0.2626953125, 0.154052734375, 0.57080078125, 0.987548828125, 1.404296875, 1.821044921875, 2.23779296875, 2.654541015625, 3.0712890625, 3.488037109375, 3.90478515625, 4.321533203125, 4.73828125, 5.155029296875, 5.57177734375, 5.988525390625, 6.4052734375, 6.822021484375, 7.23876953125, 7.655517578125, 8.072265625, 8.489013671875, 8.90576171875, 9.322509765625, 9.7392578125, 10.156005859375, 10.57275390625, 10.989501953125, 11.40625]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 6.0, 8.0, 6.0, 5.0, 18.0, 13.0, 26.0, 25.0, 30.0, 45.0, 40.0, 57.0, 57.0, 60.0, 82.0, 60.0, 69.0, 65.0, 59.0, 54.0, 40.0, 46.0, 29.0, 25.0, 20.0, 19.0, 14.0, 9.0, 9.0, 5.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.046875, -5.8857421875, -5.724609375, -5.5634765625, -5.40234375, -5.2412109375, -5.080078125, -4.9189453125, -4.7578125, -4.5966796875, -4.435546875, -4.2744140625, -4.11328125, -3.9521484375, -3.791015625, -3.6298828125, -3.46875, -3.3076171875, -3.146484375, -2.9853515625, -2.82421875, -2.6630859375, -2.501953125, -2.3408203125, -2.1796875, -2.0185546875, -1.857421875, -1.6962890625, -1.53515625, -1.3740234375, -1.212890625, -1.0517578125, -0.890625, -0.7294921875, -0.568359375, -0.4072265625, -0.24609375, -0.0849609375, 0.076171875, 0.2373046875, 0.3984375, 0.5595703125, 0.720703125, 0.8818359375, 1.04296875, 1.2041015625, 1.365234375, 1.5263671875, 1.6875, 1.8486328125, 2.009765625, 2.1708984375, 2.33203125, 2.4931640625, 2.654296875, 2.8154296875, 2.9765625, 3.1376953125, 3.298828125, 3.4599609375, 3.62109375, 3.7822265625, 3.943359375, 4.1044921875, 4.265625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 4.0, 18.0, 11.0, 22.0, 26.0, 22.0, 41.0, 51.0, 81.0, 142.0, 232.0, 358.0, 766.0, 1620.0, 3622.0, 8962.0, 24052.0, 77622.0, 339822.0, 446252.0, 97534.0, 28687.0, 10571.0, 4238.0, 1819.0, 867.0, 401.0, 231.0, 151.0, 97.0, 81.0, 41.0, 25.0, 21.0, 20.0, 13.0, 4.0, 6.0, 5.0, 1.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.7890625, -10.4049072265625, -10.020751953125, -9.6365966796875, -9.25244140625, -8.8682861328125, -8.484130859375, -8.0999755859375, -7.7158203125, -7.3316650390625, -6.947509765625, -6.5633544921875, -6.17919921875, -5.7950439453125, -5.410888671875, -5.0267333984375, -4.642578125, -4.2584228515625, -3.874267578125, -3.4901123046875, -3.10595703125, -2.7218017578125, -2.337646484375, -1.9534912109375, -1.5693359375, -1.1851806640625, -0.801025390625, -0.4168701171875, -0.03271484375, 0.3514404296875, 0.735595703125, 1.1197509765625, 1.50390625, 1.8880615234375, 2.272216796875, 2.6563720703125, 3.04052734375, 3.4246826171875, 3.808837890625, 4.1929931640625, 4.5771484375, 4.9613037109375, 5.345458984375, 5.7296142578125, 6.11376953125, 6.4979248046875, 6.882080078125, 7.2662353515625, 7.650390625, 8.0345458984375, 8.418701171875, 8.8028564453125, 9.18701171875, 9.5711669921875, 9.955322265625, 10.3394775390625, 10.7236328125, 11.1077880859375, 11.491943359375, 11.8760986328125, 12.26025390625, 12.6444091796875, 13.028564453125, 13.4127197265625, 13.796875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 5.0, 8.0, 14.0, 8.0, 24.0, 19.0, 24.0, 33.0, 36.0, 50.0, 60.0, 70.0, 63.0, 74.0, 65.0, 64.0, 70.0, 62.0, 45.0, 45.0, 44.0, 29.0, 22.0, 18.0, 17.0, 11.0, 9.0, 4.0, 8.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.4375, -32.563232421875, -31.68896484375, -30.814697265625, -29.9404296875, -29.066162109375, -28.19189453125, -27.317626953125, -26.443359375, -25.569091796875, -24.69482421875, -23.820556640625, -22.9462890625, -22.072021484375, -21.19775390625, -20.323486328125, -19.44921875, -18.574951171875, -17.70068359375, -16.826416015625, -15.9521484375, -15.077880859375, -14.20361328125, -13.329345703125, -12.455078125, -11.580810546875, -10.70654296875, -9.832275390625, -8.9580078125, -8.083740234375, -7.20947265625, -6.335205078125, -5.4609375, -4.586669921875, -3.71240234375, -2.838134765625, -1.9638671875, -1.089599609375, -0.21533203125, 0.658935546875, 1.533203125, 2.407470703125, 3.28173828125, 4.156005859375, 5.0302734375, 5.904541015625, 6.77880859375, 7.653076171875, 8.52734375, 9.401611328125, 10.27587890625, 11.150146484375, 12.0244140625, 12.898681640625, 13.77294921875, 14.647216796875, 15.521484375, 16.395751953125, 17.27001953125, 18.144287109375, 19.0185546875, 19.892822265625, 20.76708984375, 21.641357421875, 22.515625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 8.0, 10.0, 24.0, 33.0, 75.0, 145.0, 362.0, 1612.0, 43935.0, 997223.0, 4120.0, 594.0, 215.0, 109.0, 39.0, 33.0, 12.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-44.65625, -43.4228515625, -42.189453125, -40.9560546875, -39.72265625, -38.4892578125, -37.255859375, -36.0224609375, -34.7890625, -33.5556640625, -32.322265625, -31.0888671875, -29.85546875, -28.6220703125, -27.388671875, -26.1552734375, -24.921875, -23.6884765625, -22.455078125, -21.2216796875, -19.98828125, -18.7548828125, -17.521484375, -16.2880859375, -15.0546875, -13.8212890625, -12.587890625, -11.3544921875, -10.12109375, -8.8876953125, -7.654296875, -6.4208984375, -5.1875, -3.9541015625, -2.720703125, -1.4873046875, -0.25390625, 0.9794921875, 2.212890625, 3.4462890625, 4.6796875, 5.9130859375, 7.146484375, 8.3798828125, 9.61328125, 10.8466796875, 12.080078125, 13.3134765625, 14.546875, 15.7802734375, 17.013671875, 18.2470703125, 19.48046875, 20.7138671875, 21.947265625, 23.1806640625, 24.4140625, 25.6474609375, 26.880859375, 28.1142578125, 29.34765625, 30.5810546875, 31.814453125, 33.0478515625, 34.28125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 5.0, 4.0, 3.0, 12.0, 17.0, 20.0, 38.0, 74.0, 105.0, 155.0, 181.0, 146.0, 82.0, 69.0, 31.0, 30.0, 12.0, 7.0, 5.0, 2.0, 3.0, 0.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001255035400390625, -0.0012123733758926392, -0.0011697113513946533, -0.0011270493268966675, -0.0010843873023986816, -0.0010417252779006958, -0.00099906325340271, -0.0009564012289047241, -0.0009137392044067383, -0.0008710771799087524, -0.0008284151554107666, -0.0007857531309127808, -0.0007430911064147949, -0.0007004290819168091, -0.0006577670574188232, -0.0006151050329208374, -0.0005724430084228516, -0.0005297809839248657, -0.0004871189594268799, -0.00044445693492889404, -0.0004017949104309082, -0.00035913288593292236, -0.0003164708614349365, -0.0002738088369369507, -0.00023114681243896484, -0.000188484787940979, -0.00014582276344299316, -0.00010316073894500732, -6.0498714447021484e-05, -1.7836689949035645e-05, 2.4825334548950195e-05, 6.748735904693604e-05, 0.00011014938354492188, 0.00015281140804290771, 0.00019547343254089355, 0.0002381354570388794, 0.00028079748153686523, 0.0003234595060348511, 0.0003661215305328369, 0.00040878355503082275, 0.0004514455795288086, 0.0004941076040267944, 0.0005367696285247803, 0.0005794316530227661, 0.000622093677520752, 0.0006647557020187378, 0.0007074177265167236, 0.0007500797510147095, 0.0007927417755126953, 0.0008354038000106812, 0.000878065824508667, 0.0009207278490066528, 0.0009633898735046387, 0.0010060518980026245, 0.0010487139225006104, 0.0010913759469985962, 0.001134037971496582, 0.0011766999959945679, 0.0012193620204925537, 0.0012620240449905396, 0.0013046860694885254, 0.0013473480939865112, 0.001390010118484497, 0.001432672142982483, 0.0014753341674804688]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 6.0, 4.0, 6.0, 12.0, 23.0, 30.0, 71.0, 128.0, 285.0, 658.0, 1895.0, 7112.0, 109791.0, 898789.0, 24060.0, 3676.0, 1198.0, 401.0, 191.0, 98.0, 57.0, 30.0, 14.0, 6.0, 5.0, 6.0, 1.0, 4.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.71875, -11.2891845703125, -10.859619140625, -10.4300537109375, -10.00048828125, -9.5709228515625, -9.141357421875, -8.7117919921875, -8.2822265625, -7.8526611328125, -7.423095703125, -6.9935302734375, -6.56396484375, -6.1343994140625, -5.704833984375, -5.2752685546875, -4.845703125, -4.4161376953125, -3.986572265625, -3.5570068359375, -3.12744140625, -2.6978759765625, -2.268310546875, -1.8387451171875, -1.4091796875, -0.9796142578125, -0.550048828125, -0.1204833984375, 0.30908203125, 0.7386474609375, 1.168212890625, 1.5977783203125, 2.02734375, 2.4569091796875, 2.886474609375, 3.3160400390625, 3.74560546875, 4.1751708984375, 4.604736328125, 5.0343017578125, 5.4638671875, 5.8934326171875, 6.322998046875, 6.7525634765625, 7.18212890625, 7.6116943359375, 8.041259765625, 8.4708251953125, 8.900390625, 9.3299560546875, 9.759521484375, 10.1890869140625, 10.61865234375, 11.0482177734375, 11.477783203125, 11.9073486328125, 12.3369140625, 12.7664794921875, 13.196044921875, 13.6256103515625, 14.05517578125, 14.4847412109375, 14.914306640625, 15.3438720703125, 15.7734375]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 1.0, 3.0, 7.0, 0.0, 9.0, 22.0, 30.0, 73.0, 131.0, 322.0, 203.0, 94.0, 49.0, 26.0, 12.0, 7.0, 4.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.375, -15.823486328125, -15.27197265625, -14.720458984375, -14.1689453125, -13.617431640625, -13.06591796875, -12.514404296875, -11.962890625, -11.411376953125, -10.85986328125, -10.308349609375, -9.7568359375, -9.205322265625, -8.65380859375, -8.102294921875, -7.55078125, -6.999267578125, -6.44775390625, -5.896240234375, -5.3447265625, -4.793212890625, -4.24169921875, -3.690185546875, -3.138671875, -2.587158203125, -2.03564453125, -1.484130859375, -0.9326171875, -0.381103515625, 0.17041015625, 0.721923828125, 1.2734375, 1.824951171875, 2.37646484375, 2.927978515625, 3.4794921875, 4.031005859375, 4.58251953125, 5.134033203125, 5.685546875, 6.237060546875, 6.78857421875, 7.340087890625, 7.8916015625, 8.443115234375, 8.99462890625, 9.546142578125, 10.09765625, 10.649169921875, 11.20068359375, 11.752197265625, 12.3037109375, 12.855224609375, 13.40673828125, 13.958251953125, 14.509765625, 15.061279296875, 15.61279296875, 16.164306640625, 16.7158203125, 17.267333984375, 17.81884765625, 18.370361328125, 18.921875]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 13.0, 21.0, 35.0, 68.0, 102.0, 157.0, 245.0, 158.0, 99.0, 55.0, 19.0, 19.0, 6.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.37255859375, -89.57077026367188, -83.76897430419922, -77.96717834472656, -72.16539001464844, -66.36360168457031, -60.561805725097656, -54.760013580322266, -48.958221435546875, -43.156429290771484, -37.354637145996094, -31.552845001220703, -25.751052856445312, -19.949260711669922, -14.147468566894531, -8.34567642211914, -2.54388427734375, 3.2579078674316406, 9.059700012207031, 14.861492156982422, 20.663284301757812, 26.465076446533203, 32.266868591308594, 38.068660736083984, 43.870452880859375, 49.672245025634766, 55.474037170410156, 61.27582931518555, 67.07762145996094, 72.87940979003906, 78.68120574951172, 84.48300170898438, 90.2847900390625, 96.08657836914062, 101.88837432861328, 107.69017028808594, 113.49195861816406, 119.29374694824219, 125.09554290771484, 130.8973388671875, 136.69912719726562, 142.50091552734375, 148.30270385742188, 154.10450744628906, 159.9062957763672, 165.7080841064453, 171.5098876953125, 177.31167602539062, 183.11346435546875, 188.91525268554688, 194.717041015625, 200.5188446044922, 206.3206329345703, 212.12242126464844, 217.92422485351562, 223.72601318359375, 229.52780151367188, 235.32958984375, 241.13137817382812, 246.9331817626953, 252.73497009277344, 258.5367736816406, 264.33856201171875, 270.1403503417969, 275.942138671875]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 6.0, 2.0, 6.0, 9.0, 16.0, 14.0, 18.0, 26.0, 28.0, 33.0, 41.0, 35.0, 44.0, 34.0, 37.0, 53.0, 52.0, 65.0, 57.0, 51.0, 54.0, 46.0, 45.0, 28.0, 31.0, 32.0, 28.0, 22.0, 20.0, 11.0, 15.0, 17.0, 7.0, 8.0, 1.0, 6.0, 2.0, 5.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.41399383544922, -75.10453033447266, -71.79507446289062, -68.48561096191406, -65.1761474609375, -61.86669158935547, -58.557228088378906, -55.24776840209961, -51.93830871582031, -48.628849029541016, -45.31938934326172, -42.009925842285156, -38.70046615600586, -35.39100646972656, -32.08154296875, -28.772083282470703, -25.462623596191406, -22.15316390991211, -18.84370231628418, -15.534241676330566, -12.224781036376953, -8.915321350097656, -5.605859756469727, -2.296398162841797, 1.0130615234375, 4.322522163391113, 7.631982803344727, 10.94144344329834, 14.250904083251953, 17.56036376953125, 20.86982536315918, 24.17928695678711, 27.488754272460938, 30.798213958740234, 34.10767364501953, 37.417137145996094, 40.72659683227539, 44.03605651855469, 47.34552001953125, 50.65497970581055, 53.964439392089844, 57.27389907836914, 60.58335876464844, 63.892822265625, 67.20228576660156, 70.5117416381836, 73.82120513916016, 77.13066101074219, 80.44012451171875, 83.74958801269531, 87.05904388427734, 90.3685073852539, 93.67796325683594, 96.9874267578125, 100.29689025878906, 103.60635375976562, 106.91580963134766, 110.22527313232422, 113.53472900390625, 116.84419250488281, 120.15365600585938, 123.4631118774414, 126.77257537841797, 130.08203125, 133.39149475097656]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 1.0, 6.0, 3.0, 6.0, 7.0, 15.0, 8.0, 18.0, 20.0, 52.0, 51.0, 81.0, 168.0, 285.0, 544.0, 1032.0, 2795.0, 13388.0, 4158380.0, 12482.0, 2736.0, 985.0, 447.0, 299.0, 184.0, 96.0, 65.0, 35.0, 33.0, 18.0, 12.0, 13.0, 7.0, 6.0, 2.0, 7.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.625, -25.829833984375, -25.03466796875, -24.239501953125, -23.4443359375, -22.649169921875, -21.85400390625, -21.058837890625, -20.263671875, -19.468505859375, -18.67333984375, -17.878173828125, -17.0830078125, -16.287841796875, -15.49267578125, -14.697509765625, -13.90234375, -13.107177734375, -12.31201171875, -11.516845703125, -10.7216796875, -9.926513671875, -9.13134765625, -8.336181640625, -7.541015625, -6.745849609375, -5.95068359375, -5.155517578125, -4.3603515625, -3.565185546875, -2.77001953125, -1.974853515625, -1.1796875, -0.384521484375, 0.41064453125, 1.205810546875, 2.0009765625, 2.796142578125, 3.59130859375, 4.386474609375, 5.181640625, 5.976806640625, 6.77197265625, 7.567138671875, 8.3623046875, 9.157470703125, 9.95263671875, 10.747802734375, 11.54296875, 12.338134765625, 13.13330078125, 13.928466796875, 14.7236328125, 15.518798828125, 16.31396484375, 17.109130859375, 17.904296875, 18.699462890625, 19.49462890625, 20.289794921875, 21.0849609375, 21.880126953125, 22.67529296875, 23.470458984375, 24.265625]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 3.0, 7.0, 9.0, 6.0, 13.0, 12.0, 10.0, 17.0, 23.0, 33.0, 32.0, 35.0, 49.0, 49.0, 63.0, 67.0, 72.0, 65.0, 52.0, 71.0, 54.0, 49.0, 39.0, 37.0, 22.0, 29.0, 21.0, 14.0, 13.0, 10.0, 4.0, 7.0, 7.0, 7.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-5.84375, -5.6768798828125, -5.510009765625, -5.3431396484375, -5.17626953125, -5.0093994140625, -4.842529296875, -4.6756591796875, -4.5087890625, -4.3419189453125, -4.175048828125, -4.0081787109375, -3.84130859375, -3.6744384765625, -3.507568359375, -3.3406982421875, -3.173828125, -3.0069580078125, -2.840087890625, -2.6732177734375, -2.50634765625, -2.3394775390625, -2.172607421875, -2.0057373046875, -1.8388671875, -1.6719970703125, -1.505126953125, -1.3382568359375, -1.17138671875, -1.0045166015625, -0.837646484375, -0.6707763671875, -0.50390625, -0.3370361328125, -0.170166015625, -0.0032958984375, 0.16357421875, 0.3304443359375, 0.497314453125, 0.6641845703125, 0.8310546875, 0.9979248046875, 1.164794921875, 1.3316650390625, 1.49853515625, 1.6654052734375, 1.832275390625, 1.9991455078125, 2.166015625, 2.3328857421875, 2.499755859375, 2.6666259765625, 2.83349609375, 3.0003662109375, 3.167236328125, 3.3341064453125, 3.5009765625, 3.6678466796875, 3.834716796875, 4.0015869140625, 4.16845703125, 4.3353271484375, 4.502197265625, 4.6690673828125, 4.8359375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 5.0, 2.0, 5.0, 12.0, 12.0, 9.0, 12.0, 23.0, 36.0, 47.0, 86.0, 86.0, 129.0, 230.0, 363.0, 569.0, 1041.0, 1923.0, 4433.0, 15875.0, 4120293.0, 36099.0, 7071.0, 2617.0, 1319.0, 734.0, 412.0, 295.0, 159.0, 102.0, 91.0, 45.0, 32.0, 37.0, 20.0, 12.0, 7.0, 7.0, 12.0, 13.0, 3.0, 1.0, 2.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-24.234375, -23.418701171875, -22.60302734375, -21.787353515625, -20.9716796875, -20.156005859375, -19.34033203125, -18.524658203125, -17.708984375, -16.893310546875, -16.07763671875, -15.261962890625, -14.4462890625, -13.630615234375, -12.81494140625, -11.999267578125, -11.18359375, -10.367919921875, -9.55224609375, -8.736572265625, -7.9208984375, -7.105224609375, -6.28955078125, -5.473876953125, -4.658203125, -3.842529296875, -3.02685546875, -2.211181640625, -1.3955078125, -0.579833984375, 0.23583984375, 1.051513671875, 1.8671875, 2.682861328125, 3.49853515625, 4.314208984375, 5.1298828125, 5.945556640625, 6.76123046875, 7.576904296875, 8.392578125, 9.208251953125, 10.02392578125, 10.839599609375, 11.6552734375, 12.470947265625, 13.28662109375, 14.102294921875, 14.91796875, 15.733642578125, 16.54931640625, 17.364990234375, 18.1806640625, 18.996337890625, 19.81201171875, 20.627685546875, 21.443359375, 22.259033203125, 23.07470703125, 23.890380859375, 24.7060546875, 25.521728515625, 26.33740234375, 27.153076171875, 27.96875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 9.0, 13.0, 17.0, 203.0, 3760.0, 21.0, 13.0, 14.0, 6.0, 1.0, 5.0, 4.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.9453125, -6.72174072265625, -6.4981689453125, -6.27459716796875, -6.051025390625, -5.82745361328125, -5.6038818359375, -5.38031005859375, -5.15673828125, -4.93316650390625, -4.7095947265625, -4.48602294921875, -4.262451171875, -4.03887939453125, -3.8153076171875, -3.59173583984375, -3.3681640625, -3.14459228515625, -2.9210205078125, -2.69744873046875, -2.473876953125, -2.25030517578125, -2.0267333984375, -1.80316162109375, -1.57958984375, -1.35601806640625, -1.1324462890625, -0.90887451171875, -0.685302734375, -0.46173095703125, -0.2381591796875, -0.01458740234375, 0.208984375, 0.43255615234375, 0.6561279296875, 0.87969970703125, 1.103271484375, 1.32684326171875, 1.5504150390625, 1.77398681640625, 1.99755859375, 2.22113037109375, 2.4447021484375, 2.66827392578125, 2.891845703125, 3.11541748046875, 3.3389892578125, 3.56256103515625, 3.7861328125, 4.00970458984375, 4.2332763671875, 4.45684814453125, 4.680419921875, 4.90399169921875, 5.1275634765625, 5.35113525390625, 5.57470703125, 5.79827880859375, 6.0218505859375, 6.24542236328125, 6.468994140625, 6.69256591796875, 6.9161376953125, 7.13970947265625, 7.36328125]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 6.0, 10.0, 20.0, 62.0, 115.0, 243.0, 243.0, 133.0, 84.0, 39.0, 23.0, 13.0, 6.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.189288139343262, -13.073980331420898, -11.958673477172852, -10.843366622924805, -9.728058815002441, -8.612751007080078, -7.497444152832031, -6.382136821746826, -5.266829490661621, -4.151522159576416, -3.036214828491211, -1.9209074974060059, -0.8056001663208008, 0.3097071647644043, 1.4250144958496094, 2.5403218269348145, 3.6556291580200195, 4.770936489105225, 5.88624382019043, 7.001551151275635, 8.11685848236084, 9.232166290283203, 10.34747314453125, 11.462779998779297, 12.57808780670166, 13.693395614624023, 14.80870246887207, 15.924009323120117, 17.039318084716797, 18.154624938964844, 19.26993179321289, 20.385238647460938, 21.50054931640625, 22.615856170654297, 23.731163024902344, 24.846471786499023, 25.96177864074707, 27.077085494995117, 28.192394256591797, 29.307701110839844, 30.42300796508789, 31.538314819335938, 32.653621673583984, 33.76892852783203, 34.884239196777344, 35.99954605102539, 37.11485290527344, 38.230159759521484, 39.34546661376953, 40.46077346801758, 41.576080322265625, 42.69138717651367, 43.80669403076172, 44.92200469970703, 46.03731155395508, 47.152618408203125, 48.26792526245117, 49.38323211669922, 50.498538970947266, 51.61384582519531, 52.729156494140625, 53.84446334838867, 54.95977020263672, 56.075077056884766, 57.19038391113281]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 7.0, 8.0, 5.0, 8.0, 7.0, 13.0, 10.0, 7.0, 14.0, 13.0, 16.0, 13.0, 23.0, 19.0, 27.0, 23.0, 37.0, 41.0, 33.0, 39.0, 28.0, 25.0, 37.0, 42.0, 35.0, 35.0, 35.0, 39.0, 37.0, 29.0, 38.0, 37.0, 26.0, 30.0, 24.0, 26.0, 21.0, 12.0, 12.0, 11.0, 16.0, 3.0, 13.0, 7.0, 6.0, 7.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-9.801961898803711, -9.484550476074219, -9.167140007019043, -8.849729537963867, -8.532318115234375, -8.214906692504883, -7.897496223449707, -7.580085277557373, -7.262674331665039, -6.945263385772705, -6.627852439880371, -6.310441493988037, -5.993030548095703, -5.675619602203369, -5.358208656311035, -5.040797710418701, -4.723386764526367, -4.405975818634033, -4.088564872741699, -3.7711539268493652, -3.4537429809570312, -3.1363320350646973, -2.8189210891723633, -2.5015101432800293, -2.1840991973876953, -1.8666882514953613, -1.5492773056030273, -1.2318663597106934, -0.9144554138183594, -0.5970444679260254, -0.2796335220336914, 0.03777742385864258, 0.35518741607666016, 0.6725983619689941, 0.9900093078613281, 1.307420253753662, 1.624831199645996, 1.94224214553833, 2.259653091430664, 2.577064037322998, 2.894474983215332, 3.211885929107666, 3.529296875, 3.846707820892334, 4.164118766784668, 4.481529712677002, 4.798940658569336, 5.11635160446167, 5.433762550354004, 5.751173496246338, 6.068584442138672, 6.385995388031006, 6.70340633392334, 7.020817279815674, 7.338228225708008, 7.655639171600342, 7.973050117492676, 8.290460586547852, 8.607872009277344, 8.925283432006836, 9.242693901062012, 9.560104370117188, 9.87751579284668, 10.194927215576172, 10.512337684631348]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 3.0, 7.0, 10.0, 16.0, 19.0, 28.0, 49.0, 100.0, 108.0, 160.0, 265.0, 491.0, 843.0, 1532.0, 2935.0, 6039.0, 13601.0, 34496.0, 101125.0, 353538.0, 365539.0, 105056.0, 35456.0, 14105.0, 6291.0, 2978.0, 1588.0, 854.0, 514.0, 309.0, 184.0, 92.0, 91.0, 41.0, 27.0, 30.0, 12.0, 9.0, 12.0, 4.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.109375, -9.7725830078125, -9.435791015625, -9.0989990234375, -8.76220703125, -8.4254150390625, -8.088623046875, -7.7518310546875, -7.4150390625, -7.0782470703125, -6.741455078125, -6.4046630859375, -6.06787109375, -5.7310791015625, -5.394287109375, -5.0574951171875, -4.720703125, -4.3839111328125, -4.047119140625, -3.7103271484375, -3.37353515625, -3.0367431640625, -2.699951171875, -2.3631591796875, -2.0263671875, -1.6895751953125, -1.352783203125, -1.0159912109375, -0.67919921875, -0.3424072265625, -0.005615234375, 0.3311767578125, 0.66796875, 1.0047607421875, 1.341552734375, 1.6783447265625, 2.01513671875, 2.3519287109375, 2.688720703125, 3.0255126953125, 3.3623046875, 3.6990966796875, 4.035888671875, 4.3726806640625, 4.70947265625, 5.0462646484375, 5.383056640625, 5.7198486328125, 6.056640625, 6.3934326171875, 6.730224609375, 7.0670166015625, 7.40380859375, 7.7406005859375, 8.077392578125, 8.4141845703125, 8.7509765625, 9.0877685546875, 9.424560546875, 9.7613525390625, 10.09814453125, 10.4349365234375, 10.771728515625, 11.1085205078125, 11.4453125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 3.0, 6.0, 8.0, 9.0, 8.0, 12.0, 11.0, 15.0, 26.0, 21.0, 24.0, 44.0, 41.0, 41.0, 53.0, 37.0, 63.0, 58.0, 63.0, 60.0, 73.0, 52.0, 45.0, 40.0, 31.0, 39.0, 23.0, 22.0, 23.0, 16.0, 4.0, 6.0, 9.0, 5.0, 6.0, 4.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.703125, -5.53594970703125, -5.3687744140625, -5.20159912109375, -5.034423828125, -4.86724853515625, -4.7000732421875, -4.53289794921875, -4.36572265625, -4.19854736328125, -4.0313720703125, -3.86419677734375, -3.697021484375, -3.52984619140625, -3.3626708984375, -3.19549560546875, -3.0283203125, -2.86114501953125, -2.6939697265625, -2.52679443359375, -2.359619140625, -2.19244384765625, -2.0252685546875, -1.85809326171875, -1.69091796875, -1.52374267578125, -1.3565673828125, -1.18939208984375, -1.022216796875, -0.85504150390625, -0.6878662109375, -0.52069091796875, -0.353515625, -0.18634033203125, -0.0191650390625, 0.14801025390625, 0.315185546875, 0.48236083984375, 0.6495361328125, 0.81671142578125, 0.98388671875, 1.15106201171875, 1.3182373046875, 1.48541259765625, 1.652587890625, 1.81976318359375, 1.9869384765625, 2.15411376953125, 2.3212890625, 2.48846435546875, 2.6556396484375, 2.82281494140625, 2.989990234375, 3.15716552734375, 3.3243408203125, 3.49151611328125, 3.65869140625, 3.82586669921875, 3.9930419921875, 4.16021728515625, 4.327392578125, 4.49456787109375, 4.6617431640625, 4.82891845703125, 4.99609375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 7.0, 2.0, 8.0, 3.0, 11.0, 11.0, 9.0, 17.0, 25.0, 30.0, 51.0, 85.0, 108.0, 180.0, 253.0, 457.0, 762.0, 1438.0, 2606.0, 5443.0, 13380.0, 39346.0, 174861.0, 623320.0, 131816.0, 32641.0, 11262.0, 5095.0, 2303.0, 1212.0, 684.0, 407.0, 264.0, 150.0, 105.0, 72.0, 46.0, 33.0, 21.0, 9.0, 10.0, 6.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-15.4453125, -14.9537353515625, -14.462158203125, -13.9705810546875, -13.47900390625, -12.9874267578125, -12.495849609375, -12.0042724609375, -11.5126953125, -11.0211181640625, -10.529541015625, -10.0379638671875, -9.54638671875, -9.0548095703125, -8.563232421875, -8.0716552734375, -7.580078125, -7.0885009765625, -6.596923828125, -6.1053466796875, -5.61376953125, -5.1221923828125, -4.630615234375, -4.1390380859375, -3.6474609375, -3.1558837890625, -2.664306640625, -2.1727294921875, -1.68115234375, -1.1895751953125, -0.697998046875, -0.2064208984375, 0.28515625, 0.7767333984375, 1.268310546875, 1.7598876953125, 2.25146484375, 2.7430419921875, 3.234619140625, 3.7261962890625, 4.2177734375, 4.7093505859375, 5.200927734375, 5.6925048828125, 6.18408203125, 6.6756591796875, 7.167236328125, 7.6588134765625, 8.150390625, 8.6419677734375, 9.133544921875, 9.6251220703125, 10.11669921875, 10.6082763671875, 11.099853515625, 11.5914306640625, 12.0830078125, 12.5745849609375, 13.066162109375, 13.5577392578125, 14.04931640625, 14.5408935546875, 15.032470703125, 15.5240478515625, 16.015625]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 5.0, 1.0, 2.0, 5.0, 3.0, 1.0, 5.0, 8.0, 5.0, 9.0, 21.0, 16.0, 22.0, 25.0, 34.0, 40.0, 49.0, 52.0, 47.0, 47.0, 74.0, 72.0, 71.0, 59.0, 50.0, 55.0, 40.0, 42.0, 34.0, 23.0, 26.0, 18.0, 13.0, 9.0, 12.0, 7.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-26.9375, -26.025634765625, -25.11376953125, -24.201904296875, -23.2900390625, -22.378173828125, -21.46630859375, -20.554443359375, -19.642578125, -18.730712890625, -17.81884765625, -16.906982421875, -15.9951171875, -15.083251953125, -14.17138671875, -13.259521484375, -12.34765625, -11.435791015625, -10.52392578125, -9.612060546875, -8.7001953125, -7.788330078125, -6.87646484375, -5.964599609375, -5.052734375, -4.140869140625, -3.22900390625, -2.317138671875, -1.4052734375, -0.493408203125, 0.41845703125, 1.330322265625, 2.2421875, 3.154052734375, 4.06591796875, 4.977783203125, 5.8896484375, 6.801513671875, 7.71337890625, 8.625244140625, 9.537109375, 10.448974609375, 11.36083984375, 12.272705078125, 13.1845703125, 14.096435546875, 15.00830078125, 15.920166015625, 16.83203125, 17.743896484375, 18.65576171875, 19.567626953125, 20.4794921875, 21.391357421875, 22.30322265625, 23.215087890625, 24.126953125, 25.038818359375, 25.95068359375, 26.862548828125, 27.7744140625, 28.686279296875, 29.59814453125, 30.510009765625, 31.421875]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 7.0, 4.0, 22.0, 18.0, 26.0, 47.0, 90.0, 176.0, 324.0, 705.0, 1906.0, 7021.0, 50293.0, 861035.0, 111485.0, 10985.0, 2625.0, 938.0, 401.0, 180.0, 116.0, 64.0, 36.0, 21.0, 13.0, 9.0, 3.0, 7.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.1796875, -13.7012939453125, -13.222900390625, -12.7445068359375, -12.26611328125, -11.7877197265625, -11.309326171875, -10.8309326171875, -10.3525390625, -9.8741455078125, -9.395751953125, -8.9173583984375, -8.43896484375, -7.9605712890625, -7.482177734375, -7.0037841796875, -6.525390625, -6.0469970703125, -5.568603515625, -5.0902099609375, -4.61181640625, -4.1334228515625, -3.655029296875, -3.1766357421875, -2.6982421875, -2.2198486328125, -1.741455078125, -1.2630615234375, -0.78466796875, -0.3062744140625, 0.172119140625, 0.6505126953125, 1.12890625, 1.6072998046875, 2.085693359375, 2.5640869140625, 3.04248046875, 3.5208740234375, 3.999267578125, 4.4776611328125, 4.9560546875, 5.4344482421875, 5.912841796875, 6.3912353515625, 6.86962890625, 7.3480224609375, 7.826416015625, 8.3048095703125, 8.783203125, 9.2615966796875, 9.739990234375, 10.2183837890625, 10.69677734375, 11.1751708984375, 11.653564453125, 12.1319580078125, 12.6103515625, 13.0887451171875, 13.567138671875, 14.0455322265625, 14.52392578125, 15.0023193359375, 15.480712890625, 15.9591064453125, 16.4375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 7.0, 10.0, 11.0, 7.0, 15.0, 17.0, 21.0, 25.0, 35.0, 58.0, 78.0, 117.0, 151.0, 123.0, 93.0, 58.0, 54.0, 30.0, 19.0, 15.0, 10.0, 12.0, 6.0, 8.0, 4.0, 2.0, 5.0, 2.0, 1.0, 2.0, 5.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0013456344604492188, -0.0013079643249511719, -0.001270294189453125, -0.0012326240539550781, -0.0011949539184570312, -0.0011572837829589844, -0.0011196136474609375, -0.0010819435119628906, -0.0010442733764648438, -0.0010066032409667969, -0.00096893310546875, -0.0009312629699707031, -0.0008935928344726562, -0.0008559226989746094, -0.0008182525634765625, -0.0007805824279785156, -0.0007429122924804688, -0.0007052421569824219, -0.000667572021484375, -0.0006299018859863281, -0.0005922317504882812, -0.0005545616149902344, -0.0005168914794921875, -0.0004792213439941406, -0.00044155120849609375, -0.0004038810729980469, -0.0003662109375, -0.0003285408020019531, -0.00029087066650390625, -0.0002532005310058594, -0.0002155303955078125, -0.00017786026000976562, -0.00014019012451171875, -0.00010251998901367188, -6.4849853515625e-05, -2.7179718017578125e-05, 1.049041748046875e-05, 4.8160552978515625e-05, 8.58306884765625e-05, 0.00012350082397460938, 0.00016117095947265625, 0.00019884109497070312, 0.00023651123046875, 0.0002741813659667969, 0.00031185150146484375, 0.0003495216369628906, 0.0003871917724609375, 0.0004248619079589844, 0.00046253204345703125, 0.0005002021789550781, 0.000537872314453125, 0.0005755424499511719, 0.0006132125854492188, 0.0006508827209472656, 0.0006885528564453125, 0.0007262229919433594, 0.0007638931274414062, 0.0008015632629394531, 0.0008392333984375, 0.0008769035339355469, 0.0009145736694335938, 0.0009522438049316406, 0.0009899139404296875, 0.0010275840759277344, 0.0010652542114257812]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 4.0, 4.0, 5.0, 11.0, 35.0, 42.0, 81.0, 267.0, 769.0, 3816.0, 48522.0, 958708.0, 32211.0, 2986.0, 698.0, 218.0, 87.0, 47.0, 26.0, 12.0, 9.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.125, -32.28515625, -31.4453125, -30.60546875, -29.765625, -28.92578125, -28.0859375, -27.24609375, -26.40625, -25.56640625, -24.7265625, -23.88671875, -23.046875, -22.20703125, -21.3671875, -20.52734375, -19.6875, -18.84765625, -18.0078125, -17.16796875, -16.328125, -15.48828125, -14.6484375, -13.80859375, -12.96875, -12.12890625, -11.2890625, -10.44921875, -9.609375, -8.76953125, -7.9296875, -7.08984375, -6.25, -5.41015625, -4.5703125, -3.73046875, -2.890625, -2.05078125, -1.2109375, -0.37109375, 0.46875, 1.30859375, 2.1484375, 2.98828125, 3.828125, 4.66796875, 5.5078125, 6.34765625, 7.1875, 8.02734375, 8.8671875, 9.70703125, 10.546875, 11.38671875, 12.2265625, 13.06640625, 13.90625, 14.74609375, 15.5859375, 16.42578125, 17.265625, 18.10546875, 18.9453125, 19.78515625, 20.625]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 3.0, 5.0, 3.0, 5.0, 7.0, 8.0, 10.0, 13.0, 21.0, 25.0, 33.0, 44.0, 66.0, 114.0, 131.0, 141.0, 113.0, 88.0, 58.0, 33.0, 20.0, 17.0, 10.0, 10.0, 7.0, 8.0, 4.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.0546875, -9.6995849609375, -9.344482421875, -8.9893798828125, -8.63427734375, -8.2791748046875, -7.924072265625, -7.5689697265625, -7.2138671875, -6.8587646484375, -6.503662109375, -6.1485595703125, -5.79345703125, -5.4383544921875, -5.083251953125, -4.7281494140625, -4.373046875, -4.0179443359375, -3.662841796875, -3.3077392578125, -2.95263671875, -2.5975341796875, -2.242431640625, -1.8873291015625, -1.5322265625, -1.1771240234375, -0.822021484375, -0.4669189453125, -0.11181640625, 0.2432861328125, 0.598388671875, 0.9534912109375, 1.30859375, 1.6636962890625, 2.018798828125, 2.3739013671875, 2.72900390625, 3.0841064453125, 3.439208984375, 3.7943115234375, 4.1494140625, 4.5045166015625, 4.859619140625, 5.2147216796875, 5.56982421875, 5.9249267578125, 6.280029296875, 6.6351318359375, 6.990234375, 7.3453369140625, 7.700439453125, 8.0555419921875, 8.41064453125, 8.7657470703125, 9.120849609375, 9.4759521484375, 9.8310546875, 10.1861572265625, 10.541259765625, 10.8963623046875, 11.25146484375, 11.6065673828125, 11.961669921875, 12.3167724609375, 12.671875]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 6.0, 9.0, 19.0, 38.0, 91.0, 213.0, 323.0, 184.0, 77.0, 30.0, 16.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-159.25106811523438, -149.0913848876953, -138.9317169189453, -128.77203369140625, -118.61235809326172, -108.45268249511719, -98.29299926757812, -88.1333236694336, -77.97364807128906, -67.81397247314453, -57.654293060302734, -47.49461364746094, -37.334938049316406, -27.175262451171875, -17.015583038330078, -6.855903625488281, 3.30377197265625, 13.463449478149414, 23.623126983642578, 33.782806396484375, 43.942481994628906, 54.10215759277344, 64.2618408203125, 74.42151641845703, 84.58119201660156, 94.7408676147461, 104.90054321289062, 115.06022644042969, 125.21990203857422, 135.37957763671875, 145.5392608642578, 155.69894409179688, 165.858642578125, 176.01832580566406, 186.17799377441406, 196.33767700195312, 206.49734497070312, 216.6570281982422, 226.81671142578125, 236.97637939453125, 247.1360626220703, 257.2957458496094, 267.4554138183594, 277.6150817871094, 287.7747802734375, 297.9344482421875, 308.0941162109375, 318.2538146972656, 328.4134826660156, 338.5731506347656, 348.73284912109375, 358.89251708984375, 369.05218505859375, 379.21185302734375, 389.3715515136719, 399.5312194824219, 409.69091796875, 419.8505859375, 430.0102844238281, 440.1699523925781, 450.3296203613281, 460.48931884765625, 470.64898681640625, 480.80865478515625, 490.96832275390625]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 6.0, 9.0, 5.0, 5.0, 9.0, 8.0, 12.0, 12.0, 11.0, 17.0, 19.0, 24.0, 26.0, 31.0, 41.0, 44.0, 48.0, 46.0, 51.0, 60.0, 57.0, 51.0, 46.0, 47.0, 42.0, 52.0, 26.0, 33.0, 22.0, 23.0, 25.0, 21.0, 14.0, 19.0, 7.0, 5.0, 7.0, 10.0, 5.0, 0.0, 6.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-126.72867584228516, -123.26766967773438, -119.8066635131836, -116.34565734863281, -112.88465118408203, -109.42364501953125, -105.96263885498047, -102.50163269042969, -99.0406265258789, -95.57962036132812, -92.11861419677734, -88.65760803222656, -85.19660186767578, -81.735595703125, -78.27458953857422, -74.81358337402344, -71.35256958007812, -67.89156341552734, -64.43055725097656, -60.96955108642578, -57.508544921875, -54.04753875732422, -50.58653259277344, -47.125526428222656, -43.664520263671875, -40.203514099121094, -36.74250793457031, -33.28150177001953, -29.82049560546875, -26.359487533569336, -22.898481369018555, -19.437475204467773, -15.976470947265625, -12.515464782714844, -9.054458618164062, -5.593451499938965, -2.1324453353881836, 1.328561782836914, 4.789567947387695, 8.250574111938477, 11.711580276489258, 15.172586441040039, 18.63359260559082, 22.094600677490234, 25.555606842041016, 29.016613006591797, 32.47761917114258, 35.93862533569336, 39.39963150024414, 42.86063766479492, 46.3216438293457, 49.782649993896484, 53.243656158447266, 56.70466613769531, 60.165672302246094, 63.626678466796875, 67.08768463134766, 70.54869079589844, 74.00969696044922, 77.470703125, 80.93170928955078, 84.39271545410156, 87.85372161865234, 91.31472778320312, 94.7757339477539]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 6.0, 2.0, 5.0, 9.0, 8.0, 14.0, 15.0, 13.0, 35.0, 40.0, 53.0, 85.0, 114.0, 170.0, 302.0, 521.0, 1129.0, 12230.0, 4176369.0, 1612.0, 557.0, 342.0, 203.0, 119.0, 85.0, 68.0, 35.0, 52.0, 33.0, 6.0, 17.0, 8.0, 6.0, 9.0, 3.0, 3.0, 2.0, 0.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-54.09375, -52.4609375, -50.828125, -49.1953125, -47.5625, -45.9296875, -44.296875, -42.6640625, -41.03125, -39.3984375, -37.765625, -36.1328125, -34.5, -32.8671875, -31.234375, -29.6015625, -27.96875, -26.3359375, -24.703125, -23.0703125, -21.4375, -19.8046875, -18.171875, -16.5390625, -14.90625, -13.2734375, -11.640625, -10.0078125, -8.375, -6.7421875, -5.109375, -3.4765625, -1.84375, -0.2109375, 1.421875, 3.0546875, 4.6875, 6.3203125, 7.953125, 9.5859375, 11.21875, 12.8515625, 14.484375, 16.1171875, 17.75, 19.3828125, 21.015625, 22.6484375, 24.28125, 25.9140625, 27.546875, 29.1796875, 30.8125, 32.4453125, 34.078125, 35.7109375, 37.34375, 38.9765625, 40.609375, 42.2421875, 43.875, 45.5078125, 47.140625, 48.7734375, 50.40625]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 5.0, 9.0, 5.0, 9.0, 13.0, 19.0, 14.0, 10.0, 22.0, 25.0, 27.0, 33.0, 33.0, 39.0, 41.0, 73.0, 52.0, 67.0, 66.0, 56.0, 51.0, 52.0, 51.0, 35.0, 32.0, 33.0, 25.0, 21.0, 17.0, 12.0, 8.0, 9.0, 10.0, 4.0, 6.0, 7.0, 0.0, 2.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-6.07421875, -5.89483642578125, -5.7154541015625, -5.53607177734375, -5.356689453125, -5.17730712890625, -4.9979248046875, -4.81854248046875, -4.63916015625, -4.45977783203125, -4.2803955078125, -4.10101318359375, -3.921630859375, -3.74224853515625, -3.5628662109375, -3.38348388671875, -3.2041015625, -3.02471923828125, -2.8453369140625, -2.66595458984375, -2.486572265625, -2.30718994140625, -2.1278076171875, -1.94842529296875, -1.76904296875, -1.58966064453125, -1.4102783203125, -1.23089599609375, -1.051513671875, -0.87213134765625, -0.6927490234375, -0.51336669921875, -0.333984375, -0.15460205078125, 0.0247802734375, 0.20416259765625, 0.383544921875, 0.56292724609375, 0.7423095703125, 0.92169189453125, 1.10107421875, 1.28045654296875, 1.4598388671875, 1.63922119140625, 1.818603515625, 1.99798583984375, 2.1773681640625, 2.35675048828125, 2.5361328125, 2.71551513671875, 2.8948974609375, 3.07427978515625, 3.253662109375, 3.43304443359375, 3.6124267578125, 3.79180908203125, 3.97119140625, 4.15057373046875, 4.3299560546875, 4.50933837890625, 4.688720703125, 4.86810302734375, 5.0474853515625, 5.22686767578125, 5.40625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 4.0, 5.0, 8.0, 6.0, 5.0, 14.0, 30.0, 38.0, 46.0, 71.0, 95.0, 130.0, 194.0, 288.0, 417.0, 670.0, 1195.0, 2714.0, 8967.0, 4160672.0, 11893.0, 3272.0, 1437.0, 689.0, 425.0, 304.0, 213.0, 140.0, 106.0, 56.0, 59.0, 34.0, 24.0, 13.0, 21.0, 6.0, 7.0, 4.0, 7.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-33.71875, -32.6162109375, -31.513671875, -30.4111328125, -29.30859375, -28.2060546875, -27.103515625, -26.0009765625, -24.8984375, -23.7958984375, -22.693359375, -21.5908203125, -20.48828125, -19.3857421875, -18.283203125, -17.1806640625, -16.078125, -14.9755859375, -13.873046875, -12.7705078125, -11.66796875, -10.5654296875, -9.462890625, -8.3603515625, -7.2578125, -6.1552734375, -5.052734375, -3.9501953125, -2.84765625, -1.7451171875, -0.642578125, 0.4599609375, 1.5625, 2.6650390625, 3.767578125, 4.8701171875, 5.97265625, 7.0751953125, 8.177734375, 9.2802734375, 10.3828125, 11.4853515625, 12.587890625, 13.6904296875, 14.79296875, 15.8955078125, 16.998046875, 18.1005859375, 19.203125, 20.3056640625, 21.408203125, 22.5107421875, 23.61328125, 24.7158203125, 25.818359375, 26.9208984375, 28.0234375, 29.1259765625, 30.228515625, 31.3310546875, 32.43359375, 33.5361328125, 34.638671875, 35.7412109375, 36.84375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 6.0, 7.0, 9.0, 3900.0, 135.0, 8.0, 4.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.03125, -9.73583984375, -9.4404296875, -9.14501953125, -8.849609375, -8.55419921875, -8.2587890625, -7.96337890625, -7.66796875, -7.37255859375, -7.0771484375, -6.78173828125, -6.486328125, -6.19091796875, -5.8955078125, -5.60009765625, -5.3046875, -5.00927734375, -4.7138671875, -4.41845703125, -4.123046875, -3.82763671875, -3.5322265625, -3.23681640625, -2.94140625, -2.64599609375, -2.3505859375, -2.05517578125, -1.759765625, -1.46435546875, -1.1689453125, -0.87353515625, -0.578125, -0.28271484375, 0.0126953125, 0.30810546875, 0.603515625, 0.89892578125, 1.1943359375, 1.48974609375, 1.78515625, 2.08056640625, 2.3759765625, 2.67138671875, 2.966796875, 3.26220703125, 3.5576171875, 3.85302734375, 4.1484375, 4.44384765625, 4.7392578125, 5.03466796875, 5.330078125, 5.62548828125, 5.9208984375, 6.21630859375, 6.51171875, 6.80712890625, 7.1025390625, 7.39794921875, 7.693359375, 7.98876953125, 8.2841796875, 8.57958984375, 8.875]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 5.0, 12.0, 25.0, 44.0, 95.0, 175.0, 193.0, 185.0, 119.0, 49.0, 41.0, 22.0, 14.0, 10.0, 6.0, 6.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.10572624206543, -29.258777618408203, -28.41183090209961, -27.564882278442383, -26.717933654785156, -25.870986938476562, -25.024038314819336, -24.17708969116211, -23.330141067504883, -22.483192443847656, -21.636245727539062, -20.789297103881836, -19.94234848022461, -19.095401763916016, -18.24845314025879, -17.401504516601562, -16.55455780029297, -15.707610130310059, -14.860661506652832, -14.013713836669922, -13.166765213012695, -12.319817543029785, -11.472869873046875, -10.625921249389648, -9.778972625732422, -8.932024955749512, -8.085076332092285, -7.238128662109375, -6.391180038452148, -5.544232368469238, -4.69728422164917, -3.8503360748291016, -3.003387451171875, -2.1564393043518066, -1.3094912767410278, -0.462543249130249, 0.38440489768981934, 1.2313528060913086, 2.078300952911377, 2.9252490997314453, 3.7721972465515137, 4.619145393371582, 5.46609354019165, 6.313041687011719, 7.159989356994629, 8.006937026977539, 8.853885650634766, 9.700834274291992, 10.547781944274902, 11.394729614257812, 12.241678237915039, 13.08862590789795, 13.935574531555176, 14.782522201538086, 15.629470825195312, 16.476417541503906, 17.323366165161133, 18.17031478881836, 19.017261505126953, 19.86421012878418, 20.711158752441406, 21.55810546875, 22.405054092407227, 23.252002716064453, 24.09895133972168]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 3.0, 2.0, 6.0, 2.0, 6.0, 4.0, 2.0, 10.0, 7.0, 11.0, 16.0, 23.0, 11.0, 23.0, 16.0, 23.0, 20.0, 21.0, 23.0, 37.0, 34.0, 33.0, 45.0, 49.0, 50.0, 54.0, 38.0, 43.0, 34.0, 37.0, 37.0, 32.0, 28.0, 28.0, 27.0, 29.0, 12.0, 12.0, 20.0, 14.0, 13.0, 18.0, 14.0, 6.0, 9.0, 5.0, 6.0, 6.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.614145278930664, -8.327057838439941, -8.039969444274902, -7.7528815269470215, -7.465793609619141, -7.178706169128418, -6.891618251800537, -6.604530334472656, -6.317442417144775, -6.0303544998168945, -5.743266582489014, -5.456178665161133, -5.16909122467041, -4.882002830505371, -4.594915390014648, -4.307827472686768, -4.020739555358887, -3.733651638031006, -3.446563720703125, -3.1594760417938232, -2.8723881244659424, -2.5853002071380615, -2.2982125282287598, -2.011124610900879, -1.724036693572998, -1.4369487762451172, -1.1498609781265259, -0.8627731204032898, -0.5756852626800537, -0.28859734535217285, -0.001509547233581543, 0.28557825088500977, 0.5726652145385742, 0.8597530722618103, 1.1468409299850464, 1.4339287281036377, 1.7210166454315186, 2.0081045627593994, 2.295192241668701, 2.582280158996582, 2.869368076324463, 3.1564559936523438, 3.4435439109802246, 3.7306315898895264, 4.017719268798828, 4.304807662963867, 4.59189510345459, 4.878983020782471, 5.166070938110352, 5.453158855438232, 5.740246772766113, 6.027334690093994, 6.314422607421875, 6.601510047912598, 6.8885979652404785, 7.175685882568359, 7.46277379989624, 7.749861717224121, 8.036949157714844, 8.324037551879883, 8.611124992370605, 8.898213386535645, 9.185300827026367, 9.472389221191406, 9.759476661682129]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 3.0, 7.0, 10.0, 10.0, 16.0, 20.0, 27.0, 47.0, 62.0, 83.0, 119.0, 163.0, 239.0, 353.0, 573.0, 912.0, 1502.0, 2685.0, 5011.0, 10477.0, 24110.0, 68738.0, 258485.0, 473082.0, 129758.0, 40042.0, 15781.0, 7192.0, 3743.0, 2051.0, 1148.0, 708.0, 451.0, 288.0, 190.0, 144.0, 99.0, 62.0, 59.0, 32.0, 11.0, 17.0, 12.0, 16.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-10.65625, -10.3153076171875, -9.974365234375, -9.6334228515625, -9.29248046875, -8.9515380859375, -8.610595703125, -8.2696533203125, -7.9287109375, -7.5877685546875, -7.246826171875, -6.9058837890625, -6.56494140625, -6.2239990234375, -5.883056640625, -5.5421142578125, -5.201171875, -4.8602294921875, -4.519287109375, -4.1783447265625, -3.83740234375, -3.4964599609375, -3.155517578125, -2.8145751953125, -2.4736328125, -2.1326904296875, -1.791748046875, -1.4508056640625, -1.10986328125, -0.7689208984375, -0.427978515625, -0.0870361328125, 0.25390625, 0.5948486328125, 0.935791015625, 1.2767333984375, 1.61767578125, 1.9586181640625, 2.299560546875, 2.6405029296875, 2.9814453125, 3.3223876953125, 3.663330078125, 4.0042724609375, 4.34521484375, 4.6861572265625, 5.027099609375, 5.3680419921875, 5.708984375, 6.0499267578125, 6.390869140625, 6.7318115234375, 7.07275390625, 7.4136962890625, 7.754638671875, 8.0955810546875, 8.4365234375, 8.7774658203125, 9.118408203125, 9.4593505859375, 9.80029296875, 10.1412353515625, 10.482177734375, 10.8231201171875, 11.1640625]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 5.0, 3.0, 4.0, 1.0, 8.0, 8.0, 14.0, 7.0, 12.0, 18.0, 20.0, 19.0, 28.0, 32.0, 34.0, 34.0, 42.0, 57.0, 58.0, 60.0, 65.0, 61.0, 61.0, 41.0, 54.0, 40.0, 36.0, 28.0, 40.0, 24.0, 22.0, 13.0, 10.0, 13.0, 8.0, 8.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.05078125, -5.85919189453125, -5.6676025390625, -5.47601318359375, -5.284423828125, -5.09283447265625, -4.9012451171875, -4.70965576171875, -4.51806640625, -4.32647705078125, -4.1348876953125, -3.94329833984375, -3.751708984375, -3.56011962890625, -3.3685302734375, -3.17694091796875, -2.9853515625, -2.79376220703125, -2.6021728515625, -2.41058349609375, -2.218994140625, -2.02740478515625, -1.8358154296875, -1.64422607421875, -1.45263671875, -1.26104736328125, -1.0694580078125, -0.87786865234375, -0.686279296875, -0.49468994140625, -0.3031005859375, -0.11151123046875, 0.080078125, 0.27166748046875, 0.4632568359375, 0.65484619140625, 0.846435546875, 1.03802490234375, 1.2296142578125, 1.42120361328125, 1.61279296875, 1.80438232421875, 1.9959716796875, 2.18756103515625, 2.379150390625, 2.57073974609375, 2.7623291015625, 2.95391845703125, 3.1455078125, 3.33709716796875, 3.5286865234375, 3.72027587890625, 3.911865234375, 4.10345458984375, 4.2950439453125, 4.48663330078125, 4.67822265625, 4.86981201171875, 5.0614013671875, 5.25299072265625, 5.444580078125, 5.63616943359375, 5.8277587890625, 6.01934814453125, 6.2109375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 2.0, 7.0, 7.0, 9.0, 17.0, 20.0, 24.0, 39.0, 68.0, 94.0, 137.0, 279.0, 402.0, 846.0, 1849.0, 5303.0, 21717.0, 177450.0, 745055.0, 76000.0, 12529.0, 3562.0, 1501.0, 653.0, 383.0, 214.0, 127.0, 92.0, 45.0, 38.0, 31.0, 11.0, 14.0, 7.0, 6.0, 6.0, 2.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-19.140625, -18.41357421875, -17.6865234375, -16.95947265625, -16.232421875, -15.50537109375, -14.7783203125, -14.05126953125, -13.32421875, -12.59716796875, -11.8701171875, -11.14306640625, -10.416015625, -9.68896484375, -8.9619140625, -8.23486328125, -7.5078125, -6.78076171875, -6.0537109375, -5.32666015625, -4.599609375, -3.87255859375, -3.1455078125, -2.41845703125, -1.69140625, -0.96435546875, -0.2373046875, 0.48974609375, 1.216796875, 1.94384765625, 2.6708984375, 3.39794921875, 4.125, 4.85205078125, 5.5791015625, 6.30615234375, 7.033203125, 7.76025390625, 8.4873046875, 9.21435546875, 9.94140625, 10.66845703125, 11.3955078125, 12.12255859375, 12.849609375, 13.57666015625, 14.3037109375, 15.03076171875, 15.7578125, 16.48486328125, 17.2119140625, 17.93896484375, 18.666015625, 19.39306640625, 20.1201171875, 20.84716796875, 21.57421875, 22.30126953125, 23.0283203125, 23.75537109375, 24.482421875, 25.20947265625, 25.9365234375, 26.66357421875, 27.390625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 8.0, 6.0, 15.0, 9.0, 24.0, 19.0, 27.0, 39.0, 53.0, 55.0, 67.0, 58.0, 79.0, 82.0, 66.0, 75.0, 78.0, 54.0, 54.0, 32.0, 29.0, 22.0, 21.0, 8.0, 11.0, 2.0, 2.0, 4.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.28125, -31.96630859375, -30.6513671875, -29.33642578125, -28.021484375, -26.70654296875, -25.3916015625, -24.07666015625, -22.76171875, -21.44677734375, -20.1318359375, -18.81689453125, -17.501953125, -16.18701171875, -14.8720703125, -13.55712890625, -12.2421875, -10.92724609375, -9.6123046875, -8.29736328125, -6.982421875, -5.66748046875, -4.3525390625, -3.03759765625, -1.72265625, -0.40771484375, 0.9072265625, 2.22216796875, 3.537109375, 4.85205078125, 6.1669921875, 7.48193359375, 8.796875, 10.11181640625, 11.4267578125, 12.74169921875, 14.056640625, 15.37158203125, 16.6865234375, 18.00146484375, 19.31640625, 20.63134765625, 21.9462890625, 23.26123046875, 24.576171875, 25.89111328125, 27.2060546875, 28.52099609375, 29.8359375, 31.15087890625, 32.4658203125, 33.78076171875, 35.095703125, 36.41064453125, 37.7255859375, 39.04052734375, 40.35546875, 41.67041015625, 42.9853515625, 44.30029296875, 45.615234375, 46.93017578125, 48.2451171875, 49.56005859375, 50.875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 8.0, 4.0, 4.0, 10.0, 9.0, 8.0, 16.0, 32.0, 39.0, 61.0, 139.0, 281.0, 742.0, 2480.0, 12879.0, 793356.0, 226944.0, 8552.0, 1842.0, 621.0, 266.0, 106.0, 66.0, 22.0, 18.0, 13.0, 9.0, 12.0, 4.0, 6.0, 2.0, 2.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.296875, -16.737060546875, -16.17724609375, -15.617431640625, -15.0576171875, -14.497802734375, -13.93798828125, -13.378173828125, -12.818359375, -12.258544921875, -11.69873046875, -11.138916015625, -10.5791015625, -10.019287109375, -9.45947265625, -8.899658203125, -8.33984375, -7.780029296875, -7.22021484375, -6.660400390625, -6.1005859375, -5.540771484375, -4.98095703125, -4.421142578125, -3.861328125, -3.301513671875, -2.74169921875, -2.181884765625, -1.6220703125, -1.062255859375, -0.50244140625, 0.057373046875, 0.6171875, 1.177001953125, 1.73681640625, 2.296630859375, 2.8564453125, 3.416259765625, 3.97607421875, 4.535888671875, 5.095703125, 5.655517578125, 6.21533203125, 6.775146484375, 7.3349609375, 7.894775390625, 8.45458984375, 9.014404296875, 9.57421875, 10.134033203125, 10.69384765625, 11.253662109375, 11.8134765625, 12.373291015625, 12.93310546875, 13.492919921875, 14.052734375, 14.612548828125, 15.17236328125, 15.732177734375, 16.2919921875, 16.851806640625, 17.41162109375, 17.971435546875, 18.53125]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 7.0, 11.0, 12.0, 18.0, 33.0, 51.0, 82.0, 156.0, 196.0, 148.0, 106.0, 65.0, 39.0, 21.0, 15.0, 12.0, 5.0, 7.0, 4.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011911392211914062, -0.0011429041624069214, -0.0010946691036224365, -0.0010464340448379517, -0.0009981989860534668, -0.0009499639272689819, -0.0009017288684844971, -0.0008534938097000122, -0.0008052587509155273, -0.0007570236921310425, -0.0007087886333465576, -0.0006605535745620728, -0.0006123185157775879, -0.000564083456993103, -0.0005158483982086182, -0.0004676133394241333, -0.00041937828063964844, -0.0003711432218551636, -0.0003229081630706787, -0.00027467310428619385, -0.00022643804550170898, -0.00017820298671722412, -0.00012996792793273926, -8.17328691482544e-05, -3.349781036376953e-05, 1.4737248420715332e-05, 6.29723072052002e-05, 0.00011120736598968506, 0.00015944242477416992, 0.00020767748355865479, 0.00025591254234313965, 0.0003041476011276245, 0.0003523826599121094, 0.00040061771869659424, 0.0004488527774810791, 0.000497087836265564, 0.0005453228950500488, 0.0005935579538345337, 0.0006417930126190186, 0.0006900280714035034, 0.0007382631301879883, 0.0007864981889724731, 0.000834733247756958, 0.0008829683065414429, 0.0009312033653259277, 0.0009794384241104126, 0.0010276734828948975, 0.0010759085416793823, 0.0011241436004638672, 0.001172378659248352, 0.001220613718032837, 0.0012688487768173218, 0.0013170838356018066, 0.0013653188943862915, 0.0014135539531707764, 0.0014617890119552612, 0.001510024070739746, 0.001558259129524231, 0.0016064941883087158, 0.0016547292470932007, 0.0017029643058776855, 0.0017511993646621704, 0.0017994344234466553, 0.0018476694822311401, 0.001895904541015625]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 4.0, 5.0, 11.0, 8.0, 12.0, 22.0, 29.0, 60.0, 93.0, 139.0, 330.0, 557.0, 1240.0, 2993.0, 11014.0, 117823.0, 867760.0, 36121.0, 6388.0, 2040.0, 879.0, 430.0, 248.0, 130.0, 100.0, 49.0, 20.0, 17.0, 15.0, 9.0, 4.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-14.3671875, -13.971435546875, -13.57568359375, -13.179931640625, -12.7841796875, -12.388427734375, -11.99267578125, -11.596923828125, -11.201171875, -10.805419921875, -10.40966796875, -10.013916015625, -9.6181640625, -9.222412109375, -8.82666015625, -8.430908203125, -8.03515625, -7.639404296875, -7.24365234375, -6.847900390625, -6.4521484375, -6.056396484375, -5.66064453125, -5.264892578125, -4.869140625, -4.473388671875, -4.07763671875, -3.681884765625, -3.2861328125, -2.890380859375, -2.49462890625, -2.098876953125, -1.703125, -1.307373046875, -0.91162109375, -0.515869140625, -0.1201171875, 0.275634765625, 0.67138671875, 1.067138671875, 1.462890625, 1.858642578125, 2.25439453125, 2.650146484375, 3.0458984375, 3.441650390625, 3.83740234375, 4.233154296875, 4.62890625, 5.024658203125, 5.42041015625, 5.816162109375, 6.2119140625, 6.607666015625, 7.00341796875, 7.399169921875, 7.794921875, 8.190673828125, 8.58642578125, 8.982177734375, 9.3779296875, 9.773681640625, 10.16943359375, 10.565185546875, 10.9609375]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 10.0, 13.0, 29.0, 44.0, 82.0, 200.0, 244.0, 170.0, 83.0, 39.0, 24.0, 12.0, 12.0, 8.0, 7.0, 1.0, 4.0, 1.0, 3.0, 2.0, 6.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.875, -19.297607421875, -18.72021484375, -18.142822265625, -17.5654296875, -16.988037109375, -16.41064453125, -15.833251953125, -15.255859375, -14.678466796875, -14.10107421875, -13.523681640625, -12.9462890625, -12.368896484375, -11.79150390625, -11.214111328125, -10.63671875, -10.059326171875, -9.48193359375, -8.904541015625, -8.3271484375, -7.749755859375, -7.17236328125, -6.594970703125, -6.017578125, -5.440185546875, -4.86279296875, -4.285400390625, -3.7080078125, -3.130615234375, -2.55322265625, -1.975830078125, -1.3984375, -0.821044921875, -0.24365234375, 0.333740234375, 0.9111328125, 1.488525390625, 2.06591796875, 2.643310546875, 3.220703125, 3.798095703125, 4.37548828125, 4.952880859375, 5.5302734375, 6.107666015625, 6.68505859375, 7.262451171875, 7.83984375, 8.417236328125, 8.99462890625, 9.572021484375, 10.1494140625, 10.726806640625, 11.30419921875, 11.881591796875, 12.458984375, 13.036376953125, 13.61376953125, 14.191162109375, 14.7685546875, 15.345947265625, 15.92333984375, 16.500732421875, 17.078125]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 19.0, 28.0, 66.0, 191.0, 315.0, 251.0, 94.0, 27.0, 10.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-535.55419921875, -525.1531372070312, -514.7521362304688, -504.3511047363281, -493.9500732421875, -483.5490417480469, -473.14801025390625, -462.7469482421875, -452.345947265625, -441.9449157714844, -431.54388427734375, -421.1428527832031, -410.7418212890625, -400.3407897949219, -389.93975830078125, -379.5386962890625, -369.1376647949219, -358.73663330078125, -348.3356018066406, -337.9345703125, -327.5335388183594, -317.13250732421875, -306.7314758300781, -296.3304443359375, -285.92938232421875, -275.5283508300781, -265.1273193359375, -254.72628784179688, -244.32525634765625, -233.92422485351562, -223.52317810058594, -213.1221466064453, -202.7211456298828, -192.3201141357422, -181.91908264160156, -171.51805114746094, -161.11700439453125, -150.71597290039062, -140.31494140625, -129.91390991210938, -119.51287841796875, -109.11184692382812, -98.7108154296875, -88.30977630615234, -77.90874481201172, -67.5077133178711, -57.1066780090332, -46.70564270019531, -36.30461120605469, -25.90357780456543, -15.502544403076172, -5.101511001586914, 5.299522399902344, 15.700553894042969, 26.10158920288086, 36.50262451171875, 46.903656005859375, 57.3046875, 67.70571899414062, 78.10675811767578, 88.5077896118164, 98.90882110595703, 109.30986022949219, 119.71089172363281, 130.11192321777344]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 9.0, 12.0, 7.0, 11.0, 15.0, 8.0, 18.0, 22.0, 19.0, 31.0, 36.0, 35.0, 48.0, 44.0, 49.0, 35.0, 53.0, 57.0, 50.0, 53.0, 51.0, 32.0, 33.0, 31.0, 32.0, 32.0, 25.0, 30.0, 20.0, 20.0, 15.0, 10.0, 15.0, 8.0, 8.0, 7.0, 2.0, 5.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0], "bins": [-145.06704711914062, -141.10931396484375, -137.1515655517578, -133.19381713867188, -129.236083984375, -125.2783432006836, -121.32060241699219, -117.36286163330078, -113.40512084960938, -109.44738006591797, -105.48963928222656, -101.53189849853516, -97.57415771484375, -93.61641693115234, -89.65867614746094, -85.70093536376953, -81.74319458007812, -77.78545379638672, -73.82771301269531, -69.8699722290039, -65.9122314453125, -61.954490661621094, -57.99674987792969, -54.03900909423828, -50.081268310546875, -46.12352752685547, -42.16578674316406, -38.208045959472656, -34.25030517578125, -30.292564392089844, -26.334823608398438, -22.37708282470703, -18.419342041015625, -14.461601257324219, -10.503860473632812, -6.546119689941406, -2.58837890625, 1.3693618774414062, 5.3271026611328125, 9.284843444824219, 13.242584228515625, 17.20032501220703, 21.158065795898438, 25.115806579589844, 29.07354736328125, 33.031288146972656, 36.98902893066406, 40.94676971435547, 44.904510498046875, 48.86225128173828, 52.81999206542969, 56.777732849121094, 60.7354736328125, 64.6932144165039, 68.65095520019531, 72.60869598388672, 76.56643676757812, 80.52417755126953, 84.48191833496094, 88.43965911865234, 92.39739990234375, 96.35514068603516, 100.31288146972656, 104.27062225341797, 108.22836303710938]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 1.0, 3.0, 6.0, 5.0, 3.0, 9.0, 6.0, 10.0, 19.0, 22.0, 36.0, 46.0, 67.0, 82.0, 146.0, 202.0, 285.0, 578.0, 955.0, 2162.0, 7352.0, 4142711.0, 31714.0, 4291.0, 1529.0, 730.0, 444.0, 248.0, 178.0, 116.0, 88.0, 57.0, 52.0, 36.0, 27.0, 18.0, 10.0, 11.0, 5.0, 5.0, 4.0, 3.0, 1.0, 7.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-32.03125, -31.05517578125, -30.0791015625, -29.10302734375, -28.126953125, -27.15087890625, -26.1748046875, -25.19873046875, -24.22265625, -23.24658203125, -22.2705078125, -21.29443359375, -20.318359375, -19.34228515625, -18.3662109375, -17.39013671875, -16.4140625, -15.43798828125, -14.4619140625, -13.48583984375, -12.509765625, -11.53369140625, -10.5576171875, -9.58154296875, -8.60546875, -7.62939453125, -6.6533203125, -5.67724609375, -4.701171875, -3.72509765625, -2.7490234375, -1.77294921875, -0.796875, 0.17919921875, 1.1552734375, 2.13134765625, 3.107421875, 4.08349609375, 5.0595703125, 6.03564453125, 7.01171875, 7.98779296875, 8.9638671875, 9.93994140625, 10.916015625, 11.89208984375, 12.8681640625, 13.84423828125, 14.8203125, 15.79638671875, 16.7724609375, 17.74853515625, 18.724609375, 19.70068359375, 20.6767578125, 21.65283203125, 22.62890625, 23.60498046875, 24.5810546875, 25.55712890625, 26.533203125, 27.50927734375, 28.4853515625, 29.46142578125, 30.4375]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 2.0, 8.0, 13.0, 20.0, 31.0, 46.0, 64.0, 71.0, 90.0, 100.0, 101.0, 96.0, 84.0, 73.0, 57.0, 35.0, 37.0, 24.0, 24.0, 11.0, 6.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.8125, -11.4989013671875, -11.185302734375, -10.8717041015625, -10.55810546875, -10.2445068359375, -9.930908203125, -9.6173095703125, -9.3037109375, -8.9901123046875, -8.676513671875, -8.3629150390625, -8.04931640625, -7.7357177734375, -7.422119140625, -7.1085205078125, -6.794921875, -6.4813232421875, -6.167724609375, -5.8541259765625, -5.54052734375, -5.2269287109375, -4.913330078125, -4.5997314453125, -4.2861328125, -3.9725341796875, -3.658935546875, -3.3453369140625, -3.03173828125, -2.7181396484375, -2.404541015625, -2.0909423828125, -1.77734375, -1.4637451171875, -1.150146484375, -0.8365478515625, -0.52294921875, -0.2093505859375, 0.104248046875, 0.4178466796875, 0.7314453125, 1.0450439453125, 1.358642578125, 1.6722412109375, 1.98583984375, 2.2994384765625, 2.613037109375, 2.9266357421875, 3.240234375, 3.5538330078125, 3.867431640625, 4.1810302734375, 4.49462890625, 4.8082275390625, 5.121826171875, 5.4354248046875, 5.7490234375, 6.0626220703125, 6.376220703125, 6.6898193359375, 7.00341796875, 7.3170166015625, 7.630615234375, 7.9442138671875, 8.2578125]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 1.0, 5.0, 7.0, 8.0, 13.0, 12.0, 25.0, 16.0, 34.0, 40.0, 75.0, 112.0, 185.0, 339.0, 610.0, 1148.0, 2565.0, 7334.0, 42144.0, 4115960.0, 15444.0, 4438.0, 1756.0, 829.0, 458.0, 270.0, 163.0, 109.0, 47.0, 41.0, 26.0, 15.0, 18.0, 15.0, 11.0, 11.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.34375, -33.3115234375, -32.279296875, -31.2470703125, -30.21484375, -29.1826171875, -28.150390625, -27.1181640625, -26.0859375, -25.0537109375, -24.021484375, -22.9892578125, -21.95703125, -20.9248046875, -19.892578125, -18.8603515625, -17.828125, -16.7958984375, -15.763671875, -14.7314453125, -13.69921875, -12.6669921875, -11.634765625, -10.6025390625, -9.5703125, -8.5380859375, -7.505859375, -6.4736328125, -5.44140625, -4.4091796875, -3.376953125, -2.3447265625, -1.3125, -0.2802734375, 0.751953125, 1.7841796875, 2.81640625, 3.8486328125, 4.880859375, 5.9130859375, 6.9453125, 7.9775390625, 9.009765625, 10.0419921875, 11.07421875, 12.1064453125, 13.138671875, 14.1708984375, 15.203125, 16.2353515625, 17.267578125, 18.2998046875, 19.33203125, 20.3642578125, 21.396484375, 22.4287109375, 23.4609375, 24.4931640625, 25.525390625, 26.5576171875, 27.58984375, 28.6220703125, 29.654296875, 30.6865234375, 31.71875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 3.0, 3.0, 6.0, 7.0, 8.0, 14.0, 29.0, 3827.0, 109.0, 33.0, 17.0, 5.0, 3.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.6875, -17.242431640625, -16.79736328125, -16.352294921875, -15.9072265625, -15.462158203125, -15.01708984375, -14.572021484375, -14.126953125, -13.681884765625, -13.23681640625, -12.791748046875, -12.3466796875, -11.901611328125, -11.45654296875, -11.011474609375, -10.56640625, -10.121337890625, -9.67626953125, -9.231201171875, -8.7861328125, -8.341064453125, -7.89599609375, -7.450927734375, -7.005859375, -6.560791015625, -6.11572265625, -5.670654296875, -5.2255859375, -4.780517578125, -4.33544921875, -3.890380859375, -3.4453125, -3.000244140625, -2.55517578125, -2.110107421875, -1.6650390625, -1.219970703125, -0.77490234375, -0.329833984375, 0.115234375, 0.560302734375, 1.00537109375, 1.450439453125, 1.8955078125, 2.340576171875, 2.78564453125, 3.230712890625, 3.67578125, 4.120849609375, 4.56591796875, 5.010986328125, 5.4560546875, 5.901123046875, 6.34619140625, 6.791259765625, 7.236328125, 7.681396484375, 8.12646484375, 8.571533203125, 9.0166015625, 9.461669921875, 9.90673828125, 10.351806640625, 10.796875]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 6.0, 8.0, 17.0, 46.0, 94.0, 185.0, 245.0, 182.0, 115.0, 50.0, 27.0, 16.0, 6.0, 5.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-58.75697326660156, -56.887794494628906, -55.018611907958984, -53.14943313598633, -51.28025436401367, -49.41107177734375, -47.541893005371094, -45.67271423339844, -43.80353546142578, -41.934356689453125, -40.0651741027832, -38.19599533081055, -36.32681655883789, -34.45763397216797, -32.58845520019531, -30.719276428222656, -28.850093841552734, -26.980913162231445, -25.11173439025879, -23.2425537109375, -21.373374938964844, -19.504194259643555, -17.635013580322266, -15.765833854675293, -13.89665412902832, -12.027474403381348, -10.158294677734375, -8.289113998413086, -6.419934272766113, -4.550754547119141, -2.6815738677978516, -0.8123941421508789, 1.0567893981933594, 2.925969362258911, 4.795149326324463, 6.664329528808594, 8.533509254455566, 10.402688980102539, 12.271869659423828, 14.1410493850708, 16.010229110717773, 17.879409790039062, 19.74858856201172, 21.617769241333008, 23.486949920654297, 25.356128692626953, 27.225309371948242, 29.09449005126953, 30.963668823242188, 32.832847595214844, 34.702030181884766, 36.57120895385742, 38.44038772583008, 40.3095703125, 42.178749084472656, 44.04792785644531, 45.91710662841797, 47.786285400390625, 49.65546798706055, 51.5246467590332, 53.39382553100586, 55.26300811767578, 57.13218688964844, 59.001365661621094, 60.870548248291016]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 13.0, 6.0, 17.0, 12.0, 17.0, 15.0, 24.0, 43.0, 37.0, 42.0, 47.0, 63.0, 62.0, 72.0, 61.0, 67.0, 61.0, 55.0, 71.0, 43.0, 34.0, 30.0, 35.0, 20.0, 20.0, 12.0, 12.0, 5.0, 4.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.450119018554688, -28.517234802246094, -27.5843505859375, -26.65146827697754, -25.718584060668945, -24.78569984436035, -23.85281753540039, -22.919933319091797, -21.987049102783203, -21.05416488647461, -20.121280670166016, -19.188398361206055, -18.25551414489746, -17.322629928588867, -16.389747619628906, -15.456863403320312, -14.523979187011719, -13.591094970703125, -12.658211708068848, -11.72532844543457, -10.792444229125977, -9.859560012817383, -8.926676750183105, -7.99379301071167, -7.060909271240234, -6.128025531768799, -5.195141792297363, -4.262258052825928, -3.329374313354492, -2.3964905738830566, -1.463606834411621, -0.5307230949401855, 0.40216064453125, 1.3350443840026855, 2.267928123474121, 3.2008118629455566, 4.133695602416992, 5.066579341888428, 5.999463081359863, 6.932346820831299, 7.865230560302734, 8.798114776611328, 9.730998039245605, 10.663881301879883, 11.596765518188477, 12.52964973449707, 13.462532997131348, 14.395416259765625, 15.328300476074219, 16.261184692382812, 17.194068908691406, 18.126951217651367, 19.05983543395996, 19.992719650268555, 20.925601959228516, 21.85848617553711, 22.791370391845703, 23.724254608154297, 24.65713882446289, 25.59002113342285, 26.522905349731445, 27.45578956604004, 28.388671875, 29.321556091308594, 30.254440307617188]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 7.0, 5.0, 10.0, 10.0, 22.0, 22.0, 31.0, 33.0, 51.0, 91.0, 164.0, 220.0, 404.0, 720.0, 1489.0, 3144.0, 7374.0, 19606.0, 59142.0, 218194.0, 489482.0, 171957.0, 48169.0, 16157.0, 6340.0, 2851.0, 1275.0, 636.0, 364.0, 190.0, 116.0, 84.0, 57.0, 37.0, 29.0, 19.0, 12.0, 15.0, 9.0, 5.0, 4.0, 5.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-16.21875, -15.7607421875, -15.302734375, -14.8447265625, -14.38671875, -13.9287109375, -13.470703125, -13.0126953125, -12.5546875, -12.0966796875, -11.638671875, -11.1806640625, -10.72265625, -10.2646484375, -9.806640625, -9.3486328125, -8.890625, -8.4326171875, -7.974609375, -7.5166015625, -7.05859375, -6.6005859375, -6.142578125, -5.6845703125, -5.2265625, -4.7685546875, -4.310546875, -3.8525390625, -3.39453125, -2.9365234375, -2.478515625, -2.0205078125, -1.5625, -1.1044921875, -0.646484375, -0.1884765625, 0.26953125, 0.7275390625, 1.185546875, 1.6435546875, 2.1015625, 2.5595703125, 3.017578125, 3.4755859375, 3.93359375, 4.3916015625, 4.849609375, 5.3076171875, 5.765625, 6.2236328125, 6.681640625, 7.1396484375, 7.59765625, 8.0556640625, 8.513671875, 8.9716796875, 9.4296875, 9.8876953125, 10.345703125, 10.8037109375, 11.26171875, 11.7197265625, 12.177734375, 12.6357421875, 13.09375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 6.0, 14.0, 12.0, 19.0, 19.0, 44.0, 39.0, 66.0, 74.0, 86.0, 87.0, 89.0, 82.0, 80.0, 67.0, 56.0, 46.0, 34.0, 27.0, 30.0, 14.0, 6.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.9296875, -9.62451171875, -9.3193359375, -9.01416015625, -8.708984375, -8.40380859375, -8.0986328125, -7.79345703125, -7.48828125, -7.18310546875, -6.8779296875, -6.57275390625, -6.267578125, -5.96240234375, -5.6572265625, -5.35205078125, -5.046875, -4.74169921875, -4.4365234375, -4.13134765625, -3.826171875, -3.52099609375, -3.2158203125, -2.91064453125, -2.60546875, -2.30029296875, -1.9951171875, -1.68994140625, -1.384765625, -1.07958984375, -0.7744140625, -0.46923828125, -0.1640625, 0.14111328125, 0.4462890625, 0.75146484375, 1.056640625, 1.36181640625, 1.6669921875, 1.97216796875, 2.27734375, 2.58251953125, 2.8876953125, 3.19287109375, 3.498046875, 3.80322265625, 4.1083984375, 4.41357421875, 4.71875, 5.02392578125, 5.3291015625, 5.63427734375, 5.939453125, 6.24462890625, 6.5498046875, 6.85498046875, 7.16015625, 7.46533203125, 7.7705078125, 8.07568359375, 8.380859375, 8.68603515625, 8.9912109375, 9.29638671875, 9.6015625]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 6.0, 4.0, 3.0, 4.0, 5.0, 2.0, 17.0, 19.0, 31.0, 37.0, 77.0, 94.0, 157.0, 258.0, 487.0, 952.0, 2253.0, 6040.0, 22066.0, 124205.0, 674022.0, 177094.0, 28503.0, 7136.0, 2599.0, 1124.0, 550.0, 300.0, 160.0, 113.0, 66.0, 48.0, 39.0, 20.0, 20.0, 20.0, 7.0, 5.0, 6.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-22.8125, -22.18896484375, -21.5654296875, -20.94189453125, -20.318359375, -19.69482421875, -19.0712890625, -18.44775390625, -17.82421875, -17.20068359375, -16.5771484375, -15.95361328125, -15.330078125, -14.70654296875, -14.0830078125, -13.45947265625, -12.8359375, -12.21240234375, -11.5888671875, -10.96533203125, -10.341796875, -9.71826171875, -9.0947265625, -8.47119140625, -7.84765625, -7.22412109375, -6.6005859375, -5.97705078125, -5.353515625, -4.72998046875, -4.1064453125, -3.48291015625, -2.859375, -2.23583984375, -1.6123046875, -0.98876953125, -0.365234375, 0.25830078125, 0.8818359375, 1.50537109375, 2.12890625, 2.75244140625, 3.3759765625, 3.99951171875, 4.623046875, 5.24658203125, 5.8701171875, 6.49365234375, 7.1171875, 7.74072265625, 8.3642578125, 8.98779296875, 9.611328125, 10.23486328125, 10.8583984375, 11.48193359375, 12.10546875, 12.72900390625, 13.3525390625, 13.97607421875, 14.599609375, 15.22314453125, 15.8466796875, 16.47021484375, 17.09375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 4.0, 11.0, 8.0, 7.0, 6.0, 18.0, 16.0, 18.0, 22.0, 22.0, 30.0, 27.0, 42.0, 32.0, 45.0, 46.0, 60.0, 61.0, 50.0, 53.0, 54.0, 49.0, 42.0, 31.0, 40.0, 38.0, 24.0, 23.0, 21.0, 16.0, 18.0, 12.0, 12.0, 15.0, 7.0, 6.0, 6.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.390625, -25.576904296875, -24.76318359375, -23.949462890625, -23.1357421875, -22.322021484375, -21.50830078125, -20.694580078125, -19.880859375, -19.067138671875, -18.25341796875, -17.439697265625, -16.6259765625, -15.812255859375, -14.99853515625, -14.184814453125, -13.37109375, -12.557373046875, -11.74365234375, -10.929931640625, -10.1162109375, -9.302490234375, -8.48876953125, -7.675048828125, -6.861328125, -6.047607421875, -5.23388671875, -4.420166015625, -3.6064453125, -2.792724609375, -1.97900390625, -1.165283203125, -0.3515625, 0.462158203125, 1.27587890625, 2.089599609375, 2.9033203125, 3.717041015625, 4.53076171875, 5.344482421875, 6.158203125, 6.971923828125, 7.78564453125, 8.599365234375, 9.4130859375, 10.226806640625, 11.04052734375, 11.854248046875, 12.66796875, 13.481689453125, 14.29541015625, 15.109130859375, 15.9228515625, 16.736572265625, 17.55029296875, 18.364013671875, 19.177734375, 19.991455078125, 20.80517578125, 21.618896484375, 22.4326171875, 23.246337890625, 24.06005859375, 24.873779296875, 25.6875]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 2.0, 0.0, 2.0, 3.0, 7.0, 13.0, 9.0, 27.0, 38.0, 61.0, 122.0, 274.0, 746.0, 2713.0, 25374.0, 904399.0, 107380.0, 5493.0, 1144.0, 409.0, 152.0, 73.0, 47.0, 23.0, 17.0, 11.0, 8.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.1484375, -10.745849609375, -10.34326171875, -9.940673828125, -9.5380859375, -9.135498046875, -8.73291015625, -8.330322265625, -7.927734375, -7.525146484375, -7.12255859375, -6.719970703125, -6.3173828125, -5.914794921875, -5.51220703125, -5.109619140625, -4.70703125, -4.304443359375, -3.90185546875, -3.499267578125, -3.0966796875, -2.694091796875, -2.29150390625, -1.888916015625, -1.486328125, -1.083740234375, -0.68115234375, -0.278564453125, 0.1240234375, 0.526611328125, 0.92919921875, 1.331787109375, 1.734375, 2.136962890625, 2.53955078125, 2.942138671875, 3.3447265625, 3.747314453125, 4.14990234375, 4.552490234375, 4.955078125, 5.357666015625, 5.76025390625, 6.162841796875, 6.5654296875, 6.968017578125, 7.37060546875, 7.773193359375, 8.17578125, 8.578369140625, 8.98095703125, 9.383544921875, 9.7861328125, 10.188720703125, 10.59130859375, 10.993896484375, 11.396484375, 11.799072265625, 12.20166015625, 12.604248046875, 13.0068359375, 13.409423828125, 13.81201171875, 14.214599609375, 14.6171875]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0, 4.0, 3.0, 6.0, 3.0, 8.0, 7.0, 10.0, 11.0, 20.0, 16.0, 25.0, 30.0, 33.0, 63.0, 57.0, 64.0, 87.0, 80.0, 76.0, 74.0, 60.0, 43.0, 42.0, 30.0, 29.0, 25.0, 20.0, 8.0, 9.0, 8.0, 8.0, 10.0, 9.0, 5.0, 5.0, 3.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0006632804870605469, -0.0006410554051399231, -0.0006188303232192993, -0.0005966052412986755, -0.0005743801593780518, -0.000552155077457428, -0.0005299299955368042, -0.0005077049136161804, -0.00048547983169555664, -0.00046325474977493286, -0.0004410296678543091, -0.0004188045859336853, -0.0003965795040130615, -0.00037435442209243774, -0.00035212934017181396, -0.0003299042582511902, -0.0003076791763305664, -0.0002854540944099426, -0.00026322901248931885, -0.00024100393056869507, -0.0002187788486480713, -0.0001965537667274475, -0.00017432868480682373, -0.00015210360288619995, -0.00012987852096557617, -0.00010765343904495239, -8.542835712432861e-05, -6.320327520370483e-05, -4.0978193283081055e-05, -1.8753111362457275e-05, 3.471970558166504e-06, 2.5697052478790283e-05, 4.792213439941406e-05, 7.014721632003784e-05, 9.237229824066162e-05, 0.0001145973801612854, 0.00013682246208190918, 0.00015904754400253296, 0.00018127262592315674, 0.00020349770784378052, 0.0002257227897644043, 0.0002479478716850281, 0.00027017295360565186, 0.00029239803552627563, 0.0003146231174468994, 0.0003368481993675232, 0.00035907328128814697, 0.00038129836320877075, 0.00040352344512939453, 0.0004257485270500183, 0.0004479736089706421, 0.00047019869089126587, 0.0004924237728118896, 0.0005146488547325134, 0.0005368739366531372, 0.000559099018573761, 0.0005813241004943848, 0.0006035491824150085, 0.0006257742643356323, 0.0006479993462562561, 0.0006702244281768799, 0.0006924495100975037, 0.0007146745920181274, 0.0007368996739387512, 0.000759124755859375]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 5.0, 5.0, 10.0, 22.0, 47.0, 97.0, 334.0, 1638.0, 23971.0, 991396.0, 28726.0, 1705.0, 384.0, 125.0, 44.0, 20.0, 10.0, 16.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.984375, -15.403564453125, -14.82275390625, -14.241943359375, -13.6611328125, -13.080322265625, -12.49951171875, -11.918701171875, -11.337890625, -10.757080078125, -10.17626953125, -9.595458984375, -9.0146484375, -8.433837890625, -7.85302734375, -7.272216796875, -6.69140625, -6.110595703125, -5.52978515625, -4.948974609375, -4.3681640625, -3.787353515625, -3.20654296875, -2.625732421875, -2.044921875, -1.464111328125, -0.88330078125, -0.302490234375, 0.2783203125, 0.859130859375, 1.43994140625, 2.020751953125, 2.6015625, 3.182373046875, 3.76318359375, 4.343994140625, 4.9248046875, 5.505615234375, 6.08642578125, 6.667236328125, 7.248046875, 7.828857421875, 8.40966796875, 8.990478515625, 9.5712890625, 10.152099609375, 10.73291015625, 11.313720703125, 11.89453125, 12.475341796875, 13.05615234375, 13.636962890625, 14.2177734375, 14.798583984375, 15.37939453125, 15.960205078125, 16.541015625, 17.121826171875, 17.70263671875, 18.283447265625, 18.8642578125, 19.445068359375, 20.02587890625, 20.606689453125, 21.1875]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 4.0, 5.0, 7.0, 12.0, 20.0, 37.0, 54.0, 108.0, 156.0, 184.0, 150.0, 98.0, 76.0, 37.0, 30.0, 16.0, 6.0, 5.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-18.015625, -17.60107421875, -17.1865234375, -16.77197265625, -16.357421875, -15.94287109375, -15.5283203125, -15.11376953125, -14.69921875, -14.28466796875, -13.8701171875, -13.45556640625, -13.041015625, -12.62646484375, -12.2119140625, -11.79736328125, -11.3828125, -10.96826171875, -10.5537109375, -10.13916015625, -9.724609375, -9.31005859375, -8.8955078125, -8.48095703125, -8.06640625, -7.65185546875, -7.2373046875, -6.82275390625, -6.408203125, -5.99365234375, -5.5791015625, -5.16455078125, -4.75, -4.33544921875, -3.9208984375, -3.50634765625, -3.091796875, -2.67724609375, -2.2626953125, -1.84814453125, -1.43359375, -1.01904296875, -0.6044921875, -0.18994140625, 0.224609375, 0.63916015625, 1.0537109375, 1.46826171875, 1.8828125, 2.29736328125, 2.7119140625, 3.12646484375, 3.541015625, 3.95556640625, 4.3701171875, 4.78466796875, 5.19921875, 5.61376953125, 6.0283203125, 6.44287109375, 6.857421875, 7.27197265625, 7.6865234375, 8.10107421875, 8.515625]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 7.0, 14.0, 20.0, 53.0, 112.0, 207.0, 211.0, 196.0, 105.0, 46.0, 12.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-137.77745056152344, -130.9923858642578, -124.20731353759766, -117.42224884033203, -110.63717651367188, -103.85211181640625, -97.06704711914062, -90.281982421875, -83.49691009521484, -76.71184539794922, -69.92677307128906, -63.14170837402344, -56.35663986206055, -49.571571350097656, -42.78650665283203, -36.00143814086914, -29.21636962890625, -22.43130111694336, -15.646234512329102, -8.861167907714844, -2.076099395751953, 4.7089691162109375, 11.494033813476562, 18.279102325439453, 25.064170837402344, 31.849239349365234, 38.634307861328125, 45.41937255859375, 52.20444107055664, 58.98950958251953, 65.77457427978516, 72.55964660644531, 79.34469604492188, 86.1297607421875, 92.91483306884766, 99.69989776611328, 106.48497009277344, 113.27003479003906, 120.05509948730469, 126.84016418457031, 133.625244140625, 140.41030883789062, 147.19537353515625, 153.98043823242188, 160.76551818847656, 167.5505828857422, 174.3356475830078, 181.12071228027344, 187.90577697753906, 194.6908416748047, 201.4759063720703, 208.260986328125, 215.04605102539062, 221.83111572265625, 228.61618041992188, 235.4012451171875, 242.18630981445312, 248.97137451171875, 255.75643920898438, 262.54150390625, 269.3265686035156, 276.11163330078125, 282.896728515625, 289.6817932128906, 296.46685791015625]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 7.0, 5.0, 3.0, 5.0, 10.0, 6.0, 16.0, 14.0, 14.0, 21.0, 27.0, 27.0, 24.0, 36.0, 29.0, 45.0, 34.0, 51.0, 62.0, 67.0, 53.0, 50.0, 56.0, 50.0, 36.0, 29.0, 37.0, 38.0, 38.0, 28.0, 26.0, 15.0, 19.0, 9.0, 6.0, 5.0, 3.0, 5.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.80905151367188, -114.03343200683594, -110.25780487060547, -106.48218536376953, -102.70655822753906, -98.93093872070312, -95.15531158447266, -91.37969207763672, -87.60406494140625, -83.82844543457031, -80.05281829833984, -76.2771987915039, -72.50157165527344, -68.7259521484375, -64.95032501220703, -61.174705505371094, -57.39908218383789, -53.62345886230469, -49.847835540771484, -46.07221221923828, -42.29658889770508, -38.520965576171875, -34.74534606933594, -30.9697208404541, -27.1940975189209, -23.418474197387695, -19.642850875854492, -15.867228507995605, -12.091605186462402, -8.315982818603516, -4.5403594970703125, -0.7647361755371094, 3.0108871459960938, 6.786510467529297, 10.5621337890625, 14.337756156921387, 18.113380432128906, 21.889001846313477, 25.66462516784668, 29.440248489379883, 33.21587371826172, 36.99149703979492, 40.767120361328125, 44.54274368286133, 48.31836700439453, 52.09398651123047, 55.86961364746094, 59.645233154296875, 63.42085647583008, 67.19647979736328, 70.97209930419922, 74.74772644042969, 78.52334594726562, 82.2989730834961, 86.07459259033203, 89.8502197265625, 93.62583923339844, 97.40145874023438, 101.17708587646484, 104.95270538330078, 108.72833251953125, 112.50395202636719, 116.27957916259766, 120.0551986694336, 123.83082580566406]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 0.0, 5.0, 9.0, 7.0, 12.0, 19.0, 20.0, 40.0, 47.0, 61.0, 110.0, 145.0, 212.0, 415.0, 750.0, 1788.0, 5701.0, 30167.0, 3522443.0, 604899.0, 19819.0, 4500.0, 1410.0, 654.0, 341.0, 205.0, 170.0, 103.0, 77.0, 45.0, 42.0, 23.0, 14.0, 10.0, 7.0, 3.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.578125, -24.776123046875, -23.97412109375, -23.172119140625, -22.3701171875, -21.568115234375, -20.76611328125, -19.964111328125, -19.162109375, -18.360107421875, -17.55810546875, -16.756103515625, -15.9541015625, -15.152099609375, -14.35009765625, -13.548095703125, -12.74609375, -11.944091796875, -11.14208984375, -10.340087890625, -9.5380859375, -8.736083984375, -7.93408203125, -7.132080078125, -6.330078125, -5.528076171875, -4.72607421875, -3.924072265625, -3.1220703125, -2.320068359375, -1.51806640625, -0.716064453125, 0.0859375, 0.887939453125, 1.68994140625, 2.491943359375, 3.2939453125, 4.095947265625, 4.89794921875, 5.699951171875, 6.501953125, 7.303955078125, 8.10595703125, 8.907958984375, 9.7099609375, 10.511962890625, 11.31396484375, 12.115966796875, 12.91796875, 13.719970703125, 14.52197265625, 15.323974609375, 16.1259765625, 16.927978515625, 17.72998046875, 18.531982421875, 19.333984375, 20.135986328125, 20.93798828125, 21.739990234375, 22.5419921875, 23.343994140625, 24.14599609375, 24.947998046875, 25.75]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 9.0, 9.0, 8.0, 14.0, 19.0, 12.0, 26.0, 27.0, 31.0, 41.0, 45.0, 63.0, 50.0, 62.0, 52.0, 65.0, 63.0, 60.0, 55.0, 46.0, 51.0, 41.0, 25.0, 32.0, 25.0, 23.0, 9.0, 12.0, 4.0, 12.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.8515625, -6.6632080078125, -6.474853515625, -6.2864990234375, -6.09814453125, -5.9097900390625, -5.721435546875, -5.5330810546875, -5.3447265625, -5.1563720703125, -4.968017578125, -4.7796630859375, -4.59130859375, -4.4029541015625, -4.214599609375, -4.0262451171875, -3.837890625, -3.6495361328125, -3.461181640625, -3.2728271484375, -3.08447265625, -2.8961181640625, -2.707763671875, -2.5194091796875, -2.3310546875, -2.1427001953125, -1.954345703125, -1.7659912109375, -1.57763671875, -1.3892822265625, -1.200927734375, -1.0125732421875, -0.82421875, -0.6358642578125, -0.447509765625, -0.2591552734375, -0.07080078125, 0.1175537109375, 0.305908203125, 0.4942626953125, 0.6826171875, 0.8709716796875, 1.059326171875, 1.2476806640625, 1.43603515625, 1.6243896484375, 1.812744140625, 2.0010986328125, 2.189453125, 2.3778076171875, 2.566162109375, 2.7545166015625, 2.94287109375, 3.1312255859375, 3.319580078125, 3.5079345703125, 3.6962890625, 3.8846435546875, 4.072998046875, 4.2613525390625, 4.44970703125, 4.6380615234375, 4.826416015625, 5.0147705078125, 5.203125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 7.0, 2.0, 2.0, 2.0, 9.0, 5.0, 7.0, 23.0, 31.0, 50.0, 120.0, 335.0, 1323.0, 6298.0, 57215.0, 3998235.0, 118472.0, 9682.0, 1760.0, 429.0, 139.0, 54.0, 28.0, 15.0, 13.0, 8.0, 8.0, 8.0, 2.0, 5.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.921875, -30.97216796875, -30.0224609375, -29.07275390625, -28.123046875, -27.17333984375, -26.2236328125, -25.27392578125, -24.32421875, -23.37451171875, -22.4248046875, -21.47509765625, -20.525390625, -19.57568359375, -18.6259765625, -17.67626953125, -16.7265625, -15.77685546875, -14.8271484375, -13.87744140625, -12.927734375, -11.97802734375, -11.0283203125, -10.07861328125, -9.12890625, -8.17919921875, -7.2294921875, -6.27978515625, -5.330078125, -4.38037109375, -3.4306640625, -2.48095703125, -1.53125, -0.58154296875, 0.3681640625, 1.31787109375, 2.267578125, 3.21728515625, 4.1669921875, 5.11669921875, 6.06640625, 7.01611328125, 7.9658203125, 8.91552734375, 9.865234375, 10.81494140625, 11.7646484375, 12.71435546875, 13.6640625, 14.61376953125, 15.5634765625, 16.51318359375, 17.462890625, 18.41259765625, 19.3623046875, 20.31201171875, 21.26171875, 22.21142578125, 23.1611328125, 24.11083984375, 25.060546875, 26.01025390625, 26.9599609375, 27.90966796875, 28.859375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 2.0, 0.0, 4.0, 2.0, 6.0, 5.0, 7.0, 9.0, 6.0, 8.0, 7.0, 16.0, 23.0, 42.0, 58.0, 86.0, 187.0, 905.0, 1781.0, 500.0, 138.0, 83.0, 63.0, 44.0, 25.0, 17.0, 14.0, 10.0, 10.0, 6.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-19.625, -19.0693359375, -18.513671875, -17.9580078125, -17.40234375, -16.8466796875, -16.291015625, -15.7353515625, -15.1796875, -14.6240234375, -14.068359375, -13.5126953125, -12.95703125, -12.4013671875, -11.845703125, -11.2900390625, -10.734375, -10.1787109375, -9.623046875, -9.0673828125, -8.51171875, -7.9560546875, -7.400390625, -6.8447265625, -6.2890625, -5.7333984375, -5.177734375, -4.6220703125, -4.06640625, -3.5107421875, -2.955078125, -2.3994140625, -1.84375, -1.2880859375, -0.732421875, -0.1767578125, 0.37890625, 0.9345703125, 1.490234375, 2.0458984375, 2.6015625, 3.1572265625, 3.712890625, 4.2685546875, 4.82421875, 5.3798828125, 5.935546875, 6.4912109375, 7.046875, 7.6025390625, 8.158203125, 8.7138671875, 9.26953125, 9.8251953125, 10.380859375, 10.9365234375, 11.4921875, 12.0478515625, 12.603515625, 13.1591796875, 13.71484375, 14.2705078125, 14.826171875, 15.3818359375, 15.9375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 5.0, 9.0, 8.0, 22.0, 44.0, 88.0, 148.0, 198.0, 200.0, 127.0, 63.0, 30.0, 21.0, 12.0, 6.0, 6.0, 4.0, 4.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-114.9828872680664, -110.6606674194336, -106.33843994140625, -102.01622009277344, -97.69400024414062, -93.37178039550781, -89.049560546875, -84.72733306884766, -80.40511322021484, -76.08289337158203, -71.76066589355469, -67.43844604492188, -63.11622619628906, -58.79400634765625, -54.47178268432617, -50.149559020996094, -45.82733917236328, -41.50511932373047, -37.18289566040039, -32.86067199707031, -28.5384521484375, -24.216230392456055, -19.89400863647461, -15.571786880493164, -11.249565124511719, -6.927343368530273, -2.605121612548828, 1.7171001434326172, 6.0393218994140625, 10.361543655395508, 14.683765411376953, 19.0059871673584, 23.328216552734375, 27.65043830871582, 31.972660064697266, 36.294883728027344, 40.617103576660156, 44.93932342529297, 49.26154708862305, 53.583770751953125, 57.90599060058594, 62.22821044921875, 66.55043029785156, 70.8726577758789, 75.19487762451172, 79.51709747314453, 83.83932495117188, 88.16154479980469, 92.4837646484375, 96.80598449707031, 101.12820434570312, 105.45043182373047, 109.77265167236328, 114.0948715209961, 118.41709899902344, 122.73931884765625, 127.06153869628906, 131.38375854492188, 135.7059783935547, 140.0281982421875, 144.35043334960938, 148.6726531982422, 152.994873046875, 157.3170928955078, 161.63931274414062]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 2.0, 6.0, 4.0, 10.0, 22.0, 15.0, 15.0, 15.0, 25.0, 19.0, 36.0, 26.0, 45.0, 47.0, 39.0, 52.0, 56.0, 54.0, 58.0, 55.0, 55.0, 38.0, 41.0, 46.0, 37.0, 36.0, 29.0, 27.0, 17.0, 15.0, 12.0, 12.0, 7.0, 12.0, 3.0, 5.0, 4.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-58.789066314697266, -56.751033782958984, -54.7130012512207, -52.674964904785156, -50.636932373046875, -48.598899841308594, -46.56086730957031, -44.52283477783203, -42.48480224609375, -40.44676971435547, -38.40873718261719, -36.370704650878906, -34.33266830444336, -32.29463577270508, -30.256603240966797, -28.218570709228516, -26.18053436279297, -24.142501831054688, -22.104467391967773, -20.066434860229492, -18.028400421142578, -15.990367889404297, -13.952335357666016, -11.914301872253418, -9.87626838684082, -7.838234901428223, -5.800201892852783, -3.7621688842773438, -1.724135398864746, 0.31389808654785156, 2.351930618286133, 4.3899641036987305, 6.428001403808594, 8.466034889221191, 10.504068374633789, 12.54210090637207, 14.580134391784668, 16.618167877197266, 18.656200408935547, 20.694232940673828, 22.732267379760742, 24.770299911499023, 26.808334350585938, 28.84636688232422, 30.8843994140625, 32.92243194580078, 34.96046447753906, 36.99850082397461, 39.03653335571289, 41.07456588745117, 43.11259841918945, 45.150634765625, 47.18866729736328, 49.22669982910156, 51.264732360839844, 53.302764892578125, 55.340797424316406, 57.37882995605469, 59.41686248779297, 61.45489501953125, 63.4929313659668, 65.53096008300781, 67.56900024414062, 69.6070327758789, 71.64506530761719]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 7.0, 13.0, 7.0, 19.0, 30.0, 49.0, 77.0, 106.0, 177.0, 316.0, 556.0, 1010.0, 1814.0, 3558.0, 7497.0, 17313.0, 43253.0, 114547.0, 296849.0, 335780.0, 136557.0, 51568.0, 20203.0, 8607.0, 3993.0, 1992.0, 1138.0, 609.0, 378.0, 224.0, 126.0, 62.0, 42.0, 32.0, 18.0, 16.0, 6.0, 6.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.9453125, -9.653564453125, -9.36181640625, -9.070068359375, -8.7783203125, -8.486572265625, -8.19482421875, -7.903076171875, -7.611328125, -7.319580078125, -7.02783203125, -6.736083984375, -6.4443359375, -6.152587890625, -5.86083984375, -5.569091796875, -5.27734375, -4.985595703125, -4.69384765625, -4.402099609375, -4.1103515625, -3.818603515625, -3.52685546875, -3.235107421875, -2.943359375, -2.651611328125, -2.35986328125, -2.068115234375, -1.7763671875, -1.484619140625, -1.19287109375, -0.901123046875, -0.609375, -0.317626953125, -0.02587890625, 0.265869140625, 0.5576171875, 0.849365234375, 1.14111328125, 1.432861328125, 1.724609375, 2.016357421875, 2.30810546875, 2.599853515625, 2.8916015625, 3.183349609375, 3.47509765625, 3.766845703125, 4.05859375, 4.350341796875, 4.64208984375, 4.933837890625, 5.2255859375, 5.517333984375, 5.80908203125, 6.100830078125, 6.392578125, 6.684326171875, 6.97607421875, 7.267822265625, 7.5595703125, 7.851318359375, 8.14306640625, 8.434814453125, 8.7265625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 8.0, 2.0, 3.0, 6.0, 6.0, 15.0, 13.0, 12.0, 17.0, 31.0, 23.0, 26.0, 37.0, 34.0, 41.0, 48.0, 35.0, 39.0, 49.0, 46.0, 38.0, 57.0, 42.0, 52.0, 62.0, 42.0, 37.0, 29.0, 21.0, 28.0, 13.0, 22.0, 17.0, 14.0, 6.0, 7.0, 9.0, 8.0, 6.0, 5.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-5.80859375, -5.651611328125, -5.49462890625, -5.337646484375, -5.1806640625, -5.023681640625, -4.86669921875, -4.709716796875, -4.552734375, -4.395751953125, -4.23876953125, -4.081787109375, -3.9248046875, -3.767822265625, -3.61083984375, -3.453857421875, -3.296875, -3.139892578125, -2.98291015625, -2.825927734375, -2.6689453125, -2.511962890625, -2.35498046875, -2.197998046875, -2.041015625, -1.884033203125, -1.72705078125, -1.570068359375, -1.4130859375, -1.256103515625, -1.09912109375, -0.942138671875, -0.78515625, -0.628173828125, -0.47119140625, -0.314208984375, -0.1572265625, -0.000244140625, 0.15673828125, 0.313720703125, 0.470703125, 0.627685546875, 0.78466796875, 0.941650390625, 1.0986328125, 1.255615234375, 1.41259765625, 1.569580078125, 1.7265625, 1.883544921875, 2.04052734375, 2.197509765625, 2.3544921875, 2.511474609375, 2.66845703125, 2.825439453125, 2.982421875, 3.139404296875, 3.29638671875, 3.453369140625, 3.6103515625, 3.767333984375, 3.92431640625, 4.081298828125, 4.23828125]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 7.0, 6.0, 11.0, 19.0, 20.0, 26.0, 51.0, 64.0, 131.0, 191.0, 361.0, 642.0, 1292.0, 3310.0, 12283.0, 82704.0, 713906.0, 201570.0, 23326.0, 5035.0, 1709.0, 833.0, 453.0, 256.0, 132.0, 75.0, 52.0, 26.0, 25.0, 9.0, 11.0, 5.0, 6.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.71875, -17.10107421875, -16.4833984375, -15.86572265625, -15.248046875, -14.63037109375, -14.0126953125, -13.39501953125, -12.77734375, -12.15966796875, -11.5419921875, -10.92431640625, -10.306640625, -9.68896484375, -9.0712890625, -8.45361328125, -7.8359375, -7.21826171875, -6.6005859375, -5.98291015625, -5.365234375, -4.74755859375, -4.1298828125, -3.51220703125, -2.89453125, -2.27685546875, -1.6591796875, -1.04150390625, -0.423828125, 0.19384765625, 0.8115234375, 1.42919921875, 2.046875, 2.66455078125, 3.2822265625, 3.89990234375, 4.517578125, 5.13525390625, 5.7529296875, 6.37060546875, 6.98828125, 7.60595703125, 8.2236328125, 8.84130859375, 9.458984375, 10.07666015625, 10.6943359375, 11.31201171875, 11.9296875, 12.54736328125, 13.1650390625, 13.78271484375, 14.400390625, 15.01806640625, 15.6357421875, 16.25341796875, 16.87109375, 17.48876953125, 18.1064453125, 18.72412109375, 19.341796875, 19.95947265625, 20.5771484375, 21.19482421875, 21.8125]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 5.0, 1.0, 8.0, 4.0, 7.0, 9.0, 11.0, 21.0, 34.0, 20.0, 26.0, 39.0, 41.0, 38.0, 44.0, 44.0, 41.0, 56.0, 53.0, 51.0, 37.0, 43.0, 51.0, 42.0, 42.0, 29.0, 34.0, 37.0, 20.0, 17.0, 22.0, 22.0, 18.0, 5.0, 9.0, 8.0, 6.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.109375, -17.431396484375, -16.75341796875, -16.075439453125, -15.3974609375, -14.719482421875, -14.04150390625, -13.363525390625, -12.685546875, -12.007568359375, -11.32958984375, -10.651611328125, -9.9736328125, -9.295654296875, -8.61767578125, -7.939697265625, -7.26171875, -6.583740234375, -5.90576171875, -5.227783203125, -4.5498046875, -3.871826171875, -3.19384765625, -2.515869140625, -1.837890625, -1.159912109375, -0.48193359375, 0.196044921875, 0.8740234375, 1.552001953125, 2.22998046875, 2.907958984375, 3.5859375, 4.263916015625, 4.94189453125, 5.619873046875, 6.2978515625, 6.975830078125, 7.65380859375, 8.331787109375, 9.009765625, 9.687744140625, 10.36572265625, 11.043701171875, 11.7216796875, 12.399658203125, 13.07763671875, 13.755615234375, 14.43359375, 15.111572265625, 15.78955078125, 16.467529296875, 17.1455078125, 17.823486328125, 18.50146484375, 19.179443359375, 19.857421875, 20.535400390625, 21.21337890625, 21.891357421875, 22.5693359375, 23.247314453125, 23.92529296875, 24.603271484375, 25.28125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 6.0, 6.0, 4.0, 12.0, 10.0, 13.0, 35.0, 54.0, 79.0, 115.0, 241.0, 527.0, 1319.0, 4564.0, 29646.0, 772191.0, 221815.0, 13422.0, 2762.0, 894.0, 366.0, 199.0, 91.0, 51.0, 38.0, 30.0, 15.0, 12.0, 15.0, 7.0, 8.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.96875, -8.66796875, -8.3671875, -8.06640625, -7.765625, -7.46484375, -7.1640625, -6.86328125, -6.5625, -6.26171875, -5.9609375, -5.66015625, -5.359375, -5.05859375, -4.7578125, -4.45703125, -4.15625, -3.85546875, -3.5546875, -3.25390625, -2.953125, -2.65234375, -2.3515625, -2.05078125, -1.75, -1.44921875, -1.1484375, -0.84765625, -0.546875, -0.24609375, 0.0546875, 0.35546875, 0.65625, 0.95703125, 1.2578125, 1.55859375, 1.859375, 2.16015625, 2.4609375, 2.76171875, 3.0625, 3.36328125, 3.6640625, 3.96484375, 4.265625, 4.56640625, 4.8671875, 5.16796875, 5.46875, 5.76953125, 6.0703125, 6.37109375, 6.671875, 6.97265625, 7.2734375, 7.57421875, 7.875, 8.17578125, 8.4765625, 8.77734375, 9.078125, 9.37890625, 9.6796875, 9.98046875, 10.28125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 8.0, 4.0, 7.0, 11.0, 17.0, 13.0, 28.0, 40.0, 42.0, 52.0, 82.0, 109.0, 130.0, 107.0, 104.0, 70.0, 46.0, 44.0, 18.0, 19.0, 17.0, 9.0, 5.0, 8.0, 5.0, 3.0, 2.0, 5.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0013637542724609375, -0.0013275966048240662, -0.0012914389371871948, -0.0012552812695503235, -0.0012191236019134521, -0.0011829659342765808, -0.0011468082666397095, -0.0011106505990028381, -0.0010744929313659668, -0.0010383352637290955, -0.0010021775960922241, -0.0009660199284553528, -0.0009298622608184814, -0.0008937045931816101, -0.0008575469255447388, -0.0008213892579078674, -0.0007852315902709961, -0.0007490739226341248, -0.0007129162549972534, -0.0006767585873603821, -0.0006406009197235107, -0.0006044432520866394, -0.0005682855844497681, -0.0005321279168128967, -0.0004959702491760254, -0.00045981258153915405, -0.0004236549139022827, -0.0003874972462654114, -0.00035133957862854004, -0.0003151819109916687, -0.00027902424335479736, -0.00024286657571792603, -0.0002067089080810547, -0.00017055124044418335, -0.000134393572807312, -9.823590517044067e-05, -6.207823753356934e-05, -2.5920569896697998e-05, 1.023709774017334e-05, 4.639476537704468e-05, 8.255243301391602e-05, 0.00011871010065078735, 0.0001548677682876587, 0.00019102543592453003, 0.00022718310356140137, 0.0002633407711982727, 0.00029949843883514404, 0.0003356561064720154, 0.0003718137741088867, 0.00040797144174575806, 0.0004441291093826294, 0.00048028677701950073, 0.0005164444446563721, 0.0005526021122932434, 0.0005887597799301147, 0.0006249174475669861, 0.0006610751152038574, 0.0006972327828407288, 0.0007333904504776001, 0.0007695481181144714, 0.0008057057857513428, 0.0008418634533882141, 0.0008780211210250854, 0.0009141787886619568, 0.0009503364562988281]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 2.0, 2.0, 6.0, 10.0, 15.0, 11.0, 18.0, 23.0, 46.0, 68.0, 124.0, 239.0, 471.0, 1236.0, 4068.0, 18809.0, 512764.0, 485606.0, 18710.0, 4058.0, 1218.0, 502.0, 215.0, 113.0, 62.0, 35.0, 29.0, 26.0, 18.0, 17.0, 9.0, 6.0, 2.0, 3.0, 3.0, 5.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-10.4765625, -10.1273193359375, -9.778076171875, -9.4288330078125, -9.07958984375, -8.7303466796875, -8.381103515625, -8.0318603515625, -7.6826171875, -7.3333740234375, -6.984130859375, -6.6348876953125, -6.28564453125, -5.9364013671875, -5.587158203125, -5.2379150390625, -4.888671875, -4.5394287109375, -4.190185546875, -3.8409423828125, -3.49169921875, -3.1424560546875, -2.793212890625, -2.4439697265625, -2.0947265625, -1.7454833984375, -1.396240234375, -1.0469970703125, -0.69775390625, -0.3485107421875, 0.000732421875, 0.3499755859375, 0.69921875, 1.0484619140625, 1.397705078125, 1.7469482421875, 2.09619140625, 2.4454345703125, 2.794677734375, 3.1439208984375, 3.4931640625, 3.8424072265625, 4.191650390625, 4.5408935546875, 4.89013671875, 5.2393798828125, 5.588623046875, 5.9378662109375, 6.287109375, 6.6363525390625, 6.985595703125, 7.3348388671875, 7.68408203125, 8.0333251953125, 8.382568359375, 8.7318115234375, 9.0810546875, 9.4302978515625, 9.779541015625, 10.1287841796875, 10.47802734375, 10.8272705078125, 11.176513671875, 11.5257568359375, 11.875]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 0.0, 2.0, 3.0, 1.0, 5.0, 6.0, 10.0, 10.0, 20.0, 20.0, 26.0, 32.0, 40.0, 55.0, 59.0, 87.0, 86.0, 97.0, 87.0, 79.0, 66.0, 53.0, 40.0, 23.0, 16.0, 27.0, 14.0, 13.0, 7.0, 6.0, 2.0, 7.0, 3.0, 2.0, 1.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.87890625, -6.6656494140625, -6.452392578125, -6.2391357421875, -6.02587890625, -5.8126220703125, -5.599365234375, -5.3861083984375, -5.1728515625, -4.9595947265625, -4.746337890625, -4.5330810546875, -4.31982421875, -4.1065673828125, -3.893310546875, -3.6800537109375, -3.466796875, -3.2535400390625, -3.040283203125, -2.8270263671875, -2.61376953125, -2.4005126953125, -2.187255859375, -1.9739990234375, -1.7607421875, -1.5474853515625, -1.334228515625, -1.1209716796875, -0.90771484375, -0.6944580078125, -0.481201171875, -0.2679443359375, -0.0546875, 0.1585693359375, 0.371826171875, 0.5850830078125, 0.79833984375, 1.0115966796875, 1.224853515625, 1.4381103515625, 1.6513671875, 1.8646240234375, 2.077880859375, 2.2911376953125, 2.50439453125, 2.7176513671875, 2.930908203125, 3.1441650390625, 3.357421875, 3.5706787109375, 3.783935546875, 3.9971923828125, 4.21044921875, 4.4237060546875, 4.636962890625, 4.8502197265625, 5.0634765625, 5.2767333984375, 5.489990234375, 5.7032470703125, 5.91650390625, 6.1297607421875, 6.343017578125, 6.5562744140625, 6.76953125]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 10.0, 37.0, 274.0, 524.0, 119.0, 29.0, 11.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-571.5399780273438, -559.1664428710938, -546.79296875, -534.41943359375, -522.0458984375, -509.6723937988281, -497.2988586425781, -484.92535400390625, -472.55181884765625, -460.1783142089844, -447.8047790527344, -435.4312744140625, -423.0577392578125, -410.6842346191406, -398.3106994628906, -385.93719482421875, -373.56365966796875, -361.1901550292969, -348.8166198730469, -336.443115234375, -324.069580078125, -311.6960754394531, -299.3225402832031, -286.94903564453125, -274.5755310058594, -262.2020263671875, -249.8284912109375, -237.45497131347656, -225.08145141601562, -212.7079315185547, -200.33441162109375, -187.96090698242188, -175.58737182617188, -163.21385192871094, -150.84033203125, -138.46681213378906, -126.09329223632812, -113.71977233886719, -101.34626007080078, -88.97274017333984, -76.5992202758789, -64.22570037841797, -51.85218048095703, -39.47866439819336, -27.105144500732422, -14.731624603271484, -2.3581085205078125, 10.015411376953125, 22.388931274414062, 34.762451171875, 47.13597106933594, 59.50948715209961, 71.88301086425781, 84.25653076171875, 96.63004302978516, 109.0035629272461, 121.37708282470703, 133.75059509277344, 146.12411499023438, 158.4976348876953, 170.87115478515625, 183.2446746826172, 195.61819458007812, 207.99171447753906, 220.365234375]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 3.0, 5.0, 7.0, 5.0, 9.0, 11.0, 11.0, 10.0, 18.0, 13.0, 21.0, 39.0, 26.0, 25.0, 25.0, 37.0, 48.0, 54.0, 47.0, 46.0, 53.0, 56.0, 58.0, 47.0, 41.0, 27.0, 50.0, 21.0, 36.0, 30.0, 19.0, 16.0, 19.0, 15.0, 15.0, 12.0, 9.0, 5.0, 3.0, 6.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-74.9045639038086, -72.42153930664062, -69.93852233886719, -67.45549774169922, -64.97247314453125, -62.48945617675781, -60.006431579589844, -57.52341079711914, -55.04039001464844, -52.557369232177734, -50.07434844970703, -47.59132385253906, -45.10830307006836, -42.625282287597656, -40.14225769042969, -37.659236907958984, -35.17621612548828, -32.69319534301758, -30.210172653198242, -27.727149963378906, -25.244129180908203, -22.7611083984375, -20.278085708618164, -17.795063018798828, -15.312042236328125, -12.829020500183105, -10.345998764038086, -7.862977027893066, -5.379955291748047, -2.8969335556030273, -0.4139118194580078, 2.069110870361328, 4.5521392822265625, 7.035161018371582, 9.518182754516602, 12.001204490661621, 14.48422622680664, 16.967247009277344, 19.45026969909668, 21.933292388916016, 24.41631317138672, 26.899333953857422, 29.382356643676758, 31.865379333496094, 34.3484001159668, 36.8314208984375, 39.31444549560547, 41.79746627807617, 44.280487060546875, 46.76350784301758, 49.24652862548828, 51.72955322265625, 54.21257400512695, 56.695594787597656, 59.178619384765625, 61.66164016723633, 64.14466094970703, 66.627685546875, 69.11070251464844, 71.5937271118164, 74.07675170898438, 76.55976867675781, 79.04279327392578, 81.52581787109375, 84.00883483886719]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 5.0, 6.0, 11.0, 14.0, 11.0, 17.0, 24.0, 38.0, 40.0, 70.0, 144.0, 316.0, 698.0, 2379.0, 11548.0, 161060.0, 3971409.0, 38552.0, 5458.0, 1423.0, 466.0, 217.0, 116.0, 86.0, 53.0, 28.0, 19.0, 20.0, 10.0, 8.0, 5.0, 8.0, 7.0, 4.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-24.078125, -23.3759765625, -22.673828125, -21.9716796875, -21.26953125, -20.5673828125, -19.865234375, -19.1630859375, -18.4609375, -17.7587890625, -17.056640625, -16.3544921875, -15.65234375, -14.9501953125, -14.248046875, -13.5458984375, -12.84375, -12.1416015625, -11.439453125, -10.7373046875, -10.03515625, -9.3330078125, -8.630859375, -7.9287109375, -7.2265625, -6.5244140625, -5.822265625, -5.1201171875, -4.41796875, -3.7158203125, -3.013671875, -2.3115234375, -1.609375, -0.9072265625, -0.205078125, 0.4970703125, 1.19921875, 1.9013671875, 2.603515625, 3.3056640625, 4.0078125, 4.7099609375, 5.412109375, 6.1142578125, 6.81640625, 7.5185546875, 8.220703125, 8.9228515625, 9.625, 10.3271484375, 11.029296875, 11.7314453125, 12.43359375, 13.1357421875, 13.837890625, 14.5400390625, 15.2421875, 15.9443359375, 16.646484375, 17.3486328125, 18.05078125, 18.7529296875, 19.455078125, 20.1572265625, 20.859375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 5.0, 9.0, 5.0, 14.0, 16.0, 26.0, 27.0, 45.0, 65.0, 53.0, 71.0, 68.0, 74.0, 91.0, 70.0, 68.0, 60.0, 53.0, 48.0, 34.0, 26.0, 19.0, 28.0, 13.0, 9.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.671875, -4.4427490234375, -4.213623046875, -3.9844970703125, -3.75537109375, -3.5262451171875, -3.297119140625, -3.0679931640625, -2.8388671875, -2.6097412109375, -2.380615234375, -2.1514892578125, -1.92236328125, -1.6932373046875, -1.464111328125, -1.2349853515625, -1.005859375, -0.7767333984375, -0.547607421875, -0.3184814453125, -0.08935546875, 0.1397705078125, 0.368896484375, 0.5980224609375, 0.8271484375, 1.0562744140625, 1.285400390625, 1.5145263671875, 1.74365234375, 1.9727783203125, 2.201904296875, 2.4310302734375, 2.66015625, 2.8892822265625, 3.118408203125, 3.3475341796875, 3.57666015625, 3.8057861328125, 4.034912109375, 4.2640380859375, 4.4931640625, 4.7222900390625, 4.951416015625, 5.1805419921875, 5.40966796875, 5.6387939453125, 5.867919921875, 6.0970458984375, 6.326171875, 6.5552978515625, 6.784423828125, 7.0135498046875, 7.24267578125, 7.4718017578125, 7.700927734375, 7.9300537109375, 8.1591796875, 8.3883056640625, 8.617431640625, 8.8465576171875, 9.07568359375, 9.3048095703125, 9.533935546875, 9.7630615234375, 9.9921875]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 1.0, 5.0, 6.0, 23.0, 30.0, 48.0, 96.0, 204.0, 515.0, 1731.0, 8629.0, 65362.0, 3927009.0, 170838.0, 15574.0, 2961.0, 744.0, 270.0, 113.0, 52.0, 32.0, 13.0, 14.0, 5.0, 5.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-24.6875, -23.975341796875, -23.26318359375, -22.551025390625, -21.8388671875, -21.126708984375, -20.41455078125, -19.702392578125, -18.990234375, -18.278076171875, -17.56591796875, -16.853759765625, -16.1416015625, -15.429443359375, -14.71728515625, -14.005126953125, -13.29296875, -12.580810546875, -11.86865234375, -11.156494140625, -10.4443359375, -9.732177734375, -9.02001953125, -8.307861328125, -7.595703125, -6.883544921875, -6.17138671875, -5.459228515625, -4.7470703125, -4.034912109375, -3.32275390625, -2.610595703125, -1.8984375, -1.186279296875, -0.47412109375, 0.238037109375, 0.9501953125, 1.662353515625, 2.37451171875, 3.086669921875, 3.798828125, 4.510986328125, 5.22314453125, 5.935302734375, 6.6474609375, 7.359619140625, 8.07177734375, 8.783935546875, 9.49609375, 10.208251953125, 10.92041015625, 11.632568359375, 12.3447265625, 13.056884765625, 13.76904296875, 14.481201171875, 15.193359375, 15.905517578125, 16.61767578125, 17.329833984375, 18.0419921875, 18.754150390625, 19.46630859375, 20.178466796875, 20.890625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 5.0, 6.0, 6.0, 8.0, 7.0, 12.0, 20.0, 15.0, 30.0, 46.0, 62.0, 105.0, 263.0, 1015.0, 1671.0, 409.0, 163.0, 57.0, 49.0, 38.0, 13.0, 12.0, 14.0, 14.0, 11.0, 7.0, 5.0, 5.0, 3.0, 3.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-16.59375, -16.17041015625, -15.7470703125, -15.32373046875, -14.900390625, -14.47705078125, -14.0537109375, -13.63037109375, -13.20703125, -12.78369140625, -12.3603515625, -11.93701171875, -11.513671875, -11.09033203125, -10.6669921875, -10.24365234375, -9.8203125, -9.39697265625, -8.9736328125, -8.55029296875, -8.126953125, -7.70361328125, -7.2802734375, -6.85693359375, -6.43359375, -6.01025390625, -5.5869140625, -5.16357421875, -4.740234375, -4.31689453125, -3.8935546875, -3.47021484375, -3.046875, -2.62353515625, -2.2001953125, -1.77685546875, -1.353515625, -0.93017578125, -0.5068359375, -0.08349609375, 0.33984375, 0.76318359375, 1.1865234375, 1.60986328125, 2.033203125, 2.45654296875, 2.8798828125, 3.30322265625, 3.7265625, 4.14990234375, 4.5732421875, 4.99658203125, 5.419921875, 5.84326171875, 6.2666015625, 6.68994140625, 7.11328125, 7.53662109375, 7.9599609375, 8.38330078125, 8.806640625, 9.22998046875, 9.6533203125, 10.07666015625, 10.5]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 1.0, 3.0, 12.0, 22.0, 58.0, 109.0, 237.0, 260.0, 140.0, 73.0, 34.0, 18.0, 10.0, 9.0, 5.0, 2.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-112.82405090332031, -109.06523895263672, -105.30642700195312, -101.54762268066406, -97.78881072998047, -94.02999877929688, -90.27118682861328, -86.51237487792969, -82.75357055664062, -78.99475860595703, -75.23594665527344, -71.47714233398438, -67.71833038330078, -63.95951843261719, -60.200706481933594, -56.44189453125, -52.683082580566406, -48.92427062988281, -45.165462493896484, -41.40665054321289, -37.64784240722656, -33.88903045654297, -30.130218505859375, -26.371408462524414, -22.612598419189453, -18.853788375854492, -15.094977378845215, -11.336166381835938, -7.577356338500977, -3.8185462951660156, -0.059734344482421875, 3.699075698852539, 7.4578857421875, 11.216695785522461, 14.975506782531738, 18.734317779541016, 22.493127822875977, 26.251937866210938, 30.01074981689453, 33.769561767578125, 37.52836990356445, 41.28718185424805, 45.045989990234375, 48.80480194091797, 52.56361389160156, 56.32242202758789, 60.081233978271484, 63.84004211425781, 67.5988540649414, 71.357666015625, 75.1164779663086, 78.87528991699219, 82.63409423828125, 86.39290618896484, 90.15171813964844, 93.91053009033203, 97.66934204101562, 101.42815399169922, 105.18696594238281, 108.94577026367188, 112.70458221435547, 116.46339416503906, 120.22220611572266, 123.98101806640625, 127.73982238769531]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 5.0, 5.0, 13.0, 11.0, 7.0, 9.0, 15.0, 14.0, 23.0, 14.0, 29.0, 21.0, 31.0, 32.0, 32.0, 42.0, 37.0, 45.0, 40.0, 42.0, 48.0, 41.0, 54.0, 45.0, 40.0, 27.0, 39.0, 28.0, 28.0, 22.0, 26.0, 20.0, 23.0, 18.0, 16.0, 18.0, 9.0, 8.0, 6.0, 4.0, 2.0, 7.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-43.01543045043945, -41.7248420715332, -40.43425750732422, -39.14366912841797, -37.853084564208984, -36.562496185302734, -35.27191162109375, -33.9813232421875, -32.69073486328125, -31.400148391723633, -30.109561920166016, -28.8189754486084, -27.52838897705078, -26.23780059814453, -24.947214126586914, -23.656627655029297, -22.366043090820312, -21.075456619262695, -19.784870147705078, -18.49428367614746, -17.203697204589844, -15.91310977935791, -14.622522354125977, -13.33193588256836, -12.041349411010742, -10.750762939453125, -9.460176467895508, -8.169589042663574, -6.879002571105957, -5.58841609954834, -4.2978291511535645, -3.007242202758789, -1.7166519165039062, -0.42606520652770996, 0.8645215034484863, 2.1551082134246826, 3.445694923400879, 4.736281394958496, 6.0268683433532715, 7.317455291748047, 8.608041763305664, 9.898628234863281, 11.189214706420898, 12.479802131652832, 13.77038860321045, 15.060975074768066, 16.3515625, 17.642148971557617, 18.932735443115234, 20.22332191467285, 21.51390838623047, 22.804494857788086, 24.095081329345703, 25.385669708251953, 26.67625617980957, 27.966842651367188, 29.257429122924805, 30.548015594482422, 31.83860206604004, 33.129188537597656, 34.419776916503906, 35.71036148071289, 37.00094985961914, 38.291534423828125, 39.582122802734375]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 4.0, 5.0, 3.0, 4.0, 11.0, 14.0, 14.0, 43.0, 59.0, 111.0, 190.0, 331.0, 593.0, 1176.0, 2500.0, 5828.0, 14371.0, 38200.0, 108679.0, 304945.0, 359376.0, 135121.0, 46655.0, 17439.0, 6906.0, 3009.0, 1426.0, 714.0, 350.0, 191.0, 129.0, 50.0, 36.0, 25.0, 19.0, 13.0, 5.0, 8.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.1171875, -10.7708740234375, -10.424560546875, -10.0782470703125, -9.73193359375, -9.3856201171875, -9.039306640625, -8.6929931640625, -8.3466796875, -8.0003662109375, -7.654052734375, -7.3077392578125, -6.96142578125, -6.6151123046875, -6.268798828125, -5.9224853515625, -5.576171875, -5.2298583984375, -4.883544921875, -4.5372314453125, -4.19091796875, -3.8446044921875, -3.498291015625, -3.1519775390625, -2.8056640625, -2.4593505859375, -2.113037109375, -1.7667236328125, -1.42041015625, -1.0740966796875, -0.727783203125, -0.3814697265625, -0.03515625, 0.3111572265625, 0.657470703125, 1.0037841796875, 1.35009765625, 1.6964111328125, 2.042724609375, 2.3890380859375, 2.7353515625, 3.0816650390625, 3.427978515625, 3.7742919921875, 4.12060546875, 4.4669189453125, 4.813232421875, 5.1595458984375, 5.505859375, 5.8521728515625, 6.198486328125, 6.5447998046875, 6.89111328125, 7.2374267578125, 7.583740234375, 7.9300537109375, 8.2763671875, 8.6226806640625, 8.968994140625, 9.3153076171875, 9.66162109375, 10.0079345703125, 10.354248046875, 10.7005615234375, 11.046875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 4.0, 3.0, 6.0, 4.0, 8.0, 8.0, 21.0, 17.0, 14.0, 22.0, 28.0, 37.0, 34.0, 41.0, 36.0, 38.0, 51.0, 40.0, 55.0, 49.0, 45.0, 46.0, 39.0, 50.0, 48.0, 34.0, 29.0, 30.0, 28.0, 26.0, 18.0, 13.0, 21.0, 20.0, 11.0, 8.0, 8.0, 7.0, 1.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-4.8046875, -4.658447265625, -4.51220703125, -4.365966796875, -4.2197265625, -4.073486328125, -3.92724609375, -3.781005859375, -3.634765625, -3.488525390625, -3.34228515625, -3.196044921875, -3.0498046875, -2.903564453125, -2.75732421875, -2.611083984375, -2.46484375, -2.318603515625, -2.17236328125, -2.026123046875, -1.8798828125, -1.733642578125, -1.58740234375, -1.441162109375, -1.294921875, -1.148681640625, -1.00244140625, -0.856201171875, -0.7099609375, -0.563720703125, -0.41748046875, -0.271240234375, -0.125, 0.021240234375, 0.16748046875, 0.313720703125, 0.4599609375, 0.606201171875, 0.75244140625, 0.898681640625, 1.044921875, 1.191162109375, 1.33740234375, 1.483642578125, 1.6298828125, 1.776123046875, 1.92236328125, 2.068603515625, 2.21484375, 2.361083984375, 2.50732421875, 2.653564453125, 2.7998046875, 2.946044921875, 3.09228515625, 3.238525390625, 3.384765625, 3.531005859375, 3.67724609375, 3.823486328125, 3.9697265625, 4.115966796875, 4.26220703125, 4.408447265625, 4.5546875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 2.0, 7.0, 2.0, 8.0, 13.0, 22.0, 11.0, 42.0, 51.0, 82.0, 124.0, 212.0, 519.0, 1087.0, 3514.0, 15861.0, 119728.0, 739656.0, 142887.0, 18573.0, 3777.0, 1192.0, 550.0, 255.0, 115.0, 91.0, 56.0, 28.0, 24.0, 15.0, 15.0, 10.0, 13.0, 4.0, 4.0, 5.0, 0.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.3125, -21.647216796875, -20.98193359375, -20.316650390625, -19.6513671875, -18.986083984375, -18.32080078125, -17.655517578125, -16.990234375, -16.324951171875, -15.65966796875, -14.994384765625, -14.3291015625, -13.663818359375, -12.99853515625, -12.333251953125, -11.66796875, -11.002685546875, -10.33740234375, -9.672119140625, -9.0068359375, -8.341552734375, -7.67626953125, -7.010986328125, -6.345703125, -5.680419921875, -5.01513671875, -4.349853515625, -3.6845703125, -3.019287109375, -2.35400390625, -1.688720703125, -1.0234375, -0.358154296875, 0.30712890625, 0.972412109375, 1.6376953125, 2.302978515625, 2.96826171875, 3.633544921875, 4.298828125, 4.964111328125, 5.62939453125, 6.294677734375, 6.9599609375, 7.625244140625, 8.29052734375, 8.955810546875, 9.62109375, 10.286376953125, 10.95166015625, 11.616943359375, 12.2822265625, 12.947509765625, 13.61279296875, 14.278076171875, 14.943359375, 15.608642578125, 16.27392578125, 16.939208984375, 17.6044921875, 18.269775390625, 18.93505859375, 19.600341796875, 20.265625]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 7.0, 6.0, 4.0, 4.0, 9.0, 11.0, 15.0, 18.0, 16.0, 18.0, 16.0, 28.0, 23.0, 46.0, 35.0, 39.0, 46.0, 51.0, 55.0, 45.0, 53.0, 46.0, 46.0, 48.0, 61.0, 35.0, 38.0, 27.0, 29.0, 31.0, 17.0, 15.0, 16.0, 10.0, 10.0, 6.0, 8.0, 3.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0], "bins": [-16.328125, -15.781005859375, -15.23388671875, -14.686767578125, -14.1396484375, -13.592529296875, -13.04541015625, -12.498291015625, -11.951171875, -11.404052734375, -10.85693359375, -10.309814453125, -9.7626953125, -9.215576171875, -8.66845703125, -8.121337890625, -7.57421875, -7.027099609375, -6.47998046875, -5.932861328125, -5.3857421875, -4.838623046875, -4.29150390625, -3.744384765625, -3.197265625, -2.650146484375, -2.10302734375, -1.555908203125, -1.0087890625, -0.461669921875, 0.08544921875, 0.632568359375, 1.1796875, 1.726806640625, 2.27392578125, 2.821044921875, 3.3681640625, 3.915283203125, 4.46240234375, 5.009521484375, 5.556640625, 6.103759765625, 6.65087890625, 7.197998046875, 7.7451171875, 8.292236328125, 8.83935546875, 9.386474609375, 9.93359375, 10.480712890625, 11.02783203125, 11.574951171875, 12.1220703125, 12.669189453125, 13.21630859375, 13.763427734375, 14.310546875, 14.857666015625, 15.40478515625, 15.951904296875, 16.4990234375, 17.046142578125, 17.59326171875, 18.140380859375, 18.6875]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 5.0, 12.0, 13.0, 18.0, 18.0, 21.0, 50.0, 72.0, 147.0, 306.0, 770.0, 2635.0, 18352.0, 739583.0, 273229.0, 10356.0, 1867.0, 559.0, 243.0, 122.0, 66.0, 32.0, 23.0, 14.0, 13.0, 6.0, 5.0, 4.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.984375, -8.623291015625, -8.26220703125, -7.901123046875, -7.5400390625, -7.178955078125, -6.81787109375, -6.456787109375, -6.095703125, -5.734619140625, -5.37353515625, -5.012451171875, -4.6513671875, -4.290283203125, -3.92919921875, -3.568115234375, -3.20703125, -2.845947265625, -2.48486328125, -2.123779296875, -1.7626953125, -1.401611328125, -1.04052734375, -0.679443359375, -0.318359375, 0.042724609375, 0.40380859375, 0.764892578125, 1.1259765625, 1.487060546875, 1.84814453125, 2.209228515625, 2.5703125, 2.931396484375, 3.29248046875, 3.653564453125, 4.0146484375, 4.375732421875, 4.73681640625, 5.097900390625, 5.458984375, 5.820068359375, 6.18115234375, 6.542236328125, 6.9033203125, 7.264404296875, 7.62548828125, 7.986572265625, 8.34765625, 8.708740234375, 9.06982421875, 9.430908203125, 9.7919921875, 10.153076171875, 10.51416015625, 10.875244140625, 11.236328125, 11.597412109375, 11.95849609375, 12.319580078125, 12.6806640625, 13.041748046875, 13.40283203125, 13.763916015625, 14.125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 8.0, 11.0, 15.0, 18.0, 46.0, 80.0, 88.0, 174.0, 219.0, 133.0, 93.0, 52.0, 31.0, 19.0, 17.0, 5.0, 2.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002613067626953125, -0.0025446414947509766, -0.002476215362548828, -0.0024077892303466797, -0.0023393630981445312, -0.002270936965942383, -0.0022025108337402344, -0.002134084701538086, -0.0020656585693359375, -0.001997232437133789, -0.0019288063049316406, -0.0018603801727294922, -0.0017919540405273438, -0.0017235279083251953, -0.0016551017761230469, -0.0015866756439208984, -0.00151824951171875, -0.0014498233795166016, -0.0013813972473144531, -0.0013129711151123047, -0.0012445449829101562, -0.0011761188507080078, -0.0011076927185058594, -0.001039266586303711, -0.0009708404541015625, -0.0009024143218994141, -0.0008339881896972656, -0.0007655620574951172, -0.0006971359252929688, -0.0006287097930908203, -0.0005602836608886719, -0.0004918575286865234, -0.000423431396484375, -0.00035500526428222656, -0.0002865791320800781, -0.0002181529998779297, -0.00014972686767578125, -8.130073547363281e-05, -1.2874603271484375e-05, 5.555152893066406e-05, 0.0001239776611328125, 0.00019240379333496094, 0.0002608299255371094, 0.0003292560577392578, 0.00039768218994140625, 0.0004661083221435547, 0.0005345344543457031, 0.0006029605865478516, 0.00067138671875, 0.0007398128509521484, 0.0008082389831542969, 0.0008766651153564453, 0.0009450912475585938, 0.0010135173797607422, 0.0010819435119628906, 0.001150369644165039, 0.0012187957763671875, 0.001287221908569336, 0.0013556480407714844, 0.0014240741729736328, 0.0014925003051757812, 0.0015609264373779297, 0.0016293525695800781, 0.0016977787017822266, 0.001766204833984375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 5.0, 12.0, 10.0, 13.0, 40.0, 60.0, 101.0, 193.0, 412.0, 1043.0, 3690.0, 25619.0, 618297.0, 374796.0, 19388.0, 3145.0, 941.0, 385.0, 194.0, 83.0, 47.0, 27.0, 16.0, 6.0, 10.0, 10.0, 5.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-10.984375, -10.7093505859375, -10.434326171875, -10.1593017578125, -9.88427734375, -9.6092529296875, -9.334228515625, -9.0592041015625, -8.7841796875, -8.5091552734375, -8.234130859375, -7.9591064453125, -7.68408203125, -7.4090576171875, -7.134033203125, -6.8590087890625, -6.583984375, -6.3089599609375, -6.033935546875, -5.7589111328125, -5.48388671875, -5.2088623046875, -4.933837890625, -4.6588134765625, -4.3837890625, -4.1087646484375, -3.833740234375, -3.5587158203125, -3.28369140625, -3.0086669921875, -2.733642578125, -2.4586181640625, -2.18359375, -1.9085693359375, -1.633544921875, -1.3585205078125, -1.08349609375, -0.8084716796875, -0.533447265625, -0.2584228515625, 0.0166015625, 0.2916259765625, 0.566650390625, 0.8416748046875, 1.11669921875, 1.3917236328125, 1.666748046875, 1.9417724609375, 2.216796875, 2.4918212890625, 2.766845703125, 3.0418701171875, 3.31689453125, 3.5919189453125, 3.866943359375, 4.1419677734375, 4.4169921875, 4.6920166015625, 4.967041015625, 5.2420654296875, 5.51708984375, 5.7921142578125, 6.067138671875, 6.3421630859375, 6.6171875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 7.0, 6.0, 4.0, 7.0, 17.0, 26.0, 19.0, 43.0, 65.0, 97.0, 123.0, 131.0, 129.0, 98.0, 72.0, 51.0, 40.0, 26.0, 18.0, 5.0, 8.0, 2.0, 4.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.1015625, -9.8258056640625, -9.550048828125, -9.2742919921875, -8.99853515625, -8.7227783203125, -8.447021484375, -8.1712646484375, -7.8955078125, -7.6197509765625, -7.343994140625, -7.0682373046875, -6.79248046875, -6.5167236328125, -6.240966796875, -5.9652099609375, -5.689453125, -5.4136962890625, -5.137939453125, -4.8621826171875, -4.58642578125, -4.3106689453125, -4.034912109375, -3.7591552734375, -3.4833984375, -3.2076416015625, -2.931884765625, -2.6561279296875, -2.38037109375, -2.1046142578125, -1.828857421875, -1.5531005859375, -1.27734375, -1.0015869140625, -0.725830078125, -0.4500732421875, -0.17431640625, 0.1014404296875, 0.377197265625, 0.6529541015625, 0.9287109375, 1.2044677734375, 1.480224609375, 1.7559814453125, 2.03173828125, 2.3074951171875, 2.583251953125, 2.8590087890625, 3.134765625, 3.4105224609375, 3.686279296875, 3.9620361328125, 4.23779296875, 4.5135498046875, 4.789306640625, 5.0650634765625, 5.3408203125, 5.6165771484375, 5.892333984375, 6.1680908203125, 6.44384765625, 6.7196044921875, 6.995361328125, 7.2711181640625, 7.546875]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 5.0, 2.0, 7.0, 13.0, 23.0, 107.0, 239.0, 316.0, 164.0, 76.0, 34.0, 11.0, 6.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-169.85491943359375, -163.13082885742188, -156.40673828125, -149.68264770507812, -142.9585723876953, -136.23448181152344, -129.51039123535156, -122.78630065917969, -116.06221771240234, -109.33812713623047, -102.61404418945312, -95.88995361328125, -89.16586303710938, -82.44178009033203, -75.71768951416016, -68.99360656738281, -62.26951599121094, -55.54542922973633, -48.82134246826172, -42.097251892089844, -35.373165130615234, -28.649078369140625, -21.92498779296875, -15.20090103149414, -8.476814270019531, -1.7527265548706055, 4.97136116027832, 11.695449829101562, 18.419536590576172, 25.14362335205078, 31.867713928222656, 38.591800689697266, 45.315887451171875, 52.039974212646484, 58.764060974121094, 65.48815155029297, 72.21223449707031, 78.93632507324219, 85.66041564941406, 92.38450622558594, 99.10858917236328, 105.83267974853516, 112.5567626953125, 119.28085327148438, 126.00494384765625, 132.72903442382812, 139.453125, 146.1772003173828, 152.9012908935547, 159.62538146972656, 166.34947204589844, 173.07354736328125, 179.79763793945312, 186.521728515625, 193.24581909179688, 199.96990966796875, 206.69400024414062, 213.4180908203125, 220.14218139648438, 226.86627197265625, 233.59034729003906, 240.31443786621094, 247.0385284423828, 253.7626190185547, 260.4866943359375]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 3.0, 3.0, 4.0, 6.0, 12.0, 6.0, 10.0, 13.0, 17.0, 15.0, 22.0, 30.0, 25.0, 33.0, 25.0, 40.0, 34.0, 32.0, 27.0, 54.0, 52.0, 54.0, 54.0, 58.0, 42.0, 38.0, 25.0, 37.0, 22.0, 26.0, 20.0, 31.0, 19.0, 19.0, 17.0, 12.0, 9.0, 13.0, 12.0, 5.0, 4.0, 10.0, 1.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-58.842262268066406, -56.85445785522461, -54.86664962768555, -52.87884521484375, -50.89104080200195, -48.903236389160156, -46.915428161621094, -44.9276237487793, -42.9398193359375, -40.9520149230957, -38.96420669555664, -36.976402282714844, -34.98859786987305, -33.00079345703125, -31.012985229492188, -29.02518081665039, -27.037372589111328, -25.0495662689209, -23.0617618560791, -21.073955535888672, -19.086151123046875, -17.098344802856445, -15.110538482666016, -13.122733116149902, -11.134927749633789, -9.147122383117676, -7.159316539764404, -5.171510696411133, -3.1837053298950195, -1.1958999633789062, 0.7919063568115234, 2.7797117233276367, 4.76751708984375, 6.755322456359863, 8.743127822875977, 10.730934143066406, 12.71873950958252, 14.706544876098633, 16.694351196289062, 18.68215560913086, 20.66996192932129, 22.65776824951172, 24.645572662353516, 26.633378982543945, 28.621185302734375, 30.608989715576172, 32.59679412841797, 34.58460235595703, 36.57240676879883, 38.560211181640625, 40.54801940917969, 42.535823822021484, 44.52362823486328, 46.511436462402344, 48.49924087524414, 50.48704528808594, 52.474853515625, 54.4626579284668, 56.45046615600586, 58.438270568847656, 60.42607498168945, 62.41387939453125, 64.40168762207031, 66.38949584960938, 68.3772964477539]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 4.0, 2.0, 9.0, 5.0, 9.0, 14.0, 27.0, 42.0, 45.0, 86.0, 130.0, 258.0, 423.0, 860.0, 1727.0, 3803.0, 10204.0, 34217.0, 202708.0, 3291668.0, 566757.0, 56181.0, 14907.0, 5390.0, 2264.0, 1086.0, 576.0, 314.0, 200.0, 139.0, 59.0, 43.0, 40.0, 22.0, 15.0, 11.0, 13.0, 4.0, 8.0, 6.0, 2.0, 0.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.875, -8.585693359375, -8.29638671875, -8.007080078125, -7.7177734375, -7.428466796875, -7.13916015625, -6.849853515625, -6.560546875, -6.271240234375, -5.98193359375, -5.692626953125, -5.4033203125, -5.114013671875, -4.82470703125, -4.535400390625, -4.24609375, -3.956787109375, -3.66748046875, -3.378173828125, -3.0888671875, -2.799560546875, -2.51025390625, -2.220947265625, -1.931640625, -1.642333984375, -1.35302734375, -1.063720703125, -0.7744140625, -0.485107421875, -0.19580078125, 0.093505859375, 0.3828125, 0.672119140625, 0.96142578125, 1.250732421875, 1.5400390625, 1.829345703125, 2.11865234375, 2.407958984375, 2.697265625, 2.986572265625, 3.27587890625, 3.565185546875, 3.8544921875, 4.143798828125, 4.43310546875, 4.722412109375, 5.01171875, 5.301025390625, 5.59033203125, 5.879638671875, 6.1689453125, 6.458251953125, 6.74755859375, 7.036865234375, 7.326171875, 7.615478515625, 7.90478515625, 8.194091796875, 8.4833984375, 8.772705078125, 9.06201171875, 9.351318359375, 9.640625]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 6.0, 1.0, 3.0, 6.0, 10.0, 9.0, 10.0, 18.0, 15.0, 24.0, 26.0, 31.0, 28.0, 41.0, 56.0, 42.0, 37.0, 42.0, 46.0, 48.0, 46.0, 45.0, 60.0, 45.0, 42.0, 40.0, 37.0, 23.0, 32.0, 22.0, 20.0, 17.0, 23.0, 13.0, 13.0, 7.0, 8.0, 6.0, 7.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.171875, -4.03802490234375, -3.9041748046875, -3.77032470703125, -3.636474609375, -3.50262451171875, -3.3687744140625, -3.23492431640625, -3.10107421875, -2.96722412109375, -2.8333740234375, -2.69952392578125, -2.565673828125, -2.43182373046875, -2.2979736328125, -2.16412353515625, -2.0302734375, -1.89642333984375, -1.7625732421875, -1.62872314453125, -1.494873046875, -1.36102294921875, -1.2271728515625, -1.09332275390625, -0.95947265625, -0.82562255859375, -0.6917724609375, -0.55792236328125, -0.424072265625, -0.29022216796875, -0.1563720703125, -0.02252197265625, 0.111328125, 0.24517822265625, 0.3790283203125, 0.51287841796875, 0.646728515625, 0.78057861328125, 0.9144287109375, 1.04827880859375, 1.18212890625, 1.31597900390625, 1.4498291015625, 1.58367919921875, 1.717529296875, 1.85137939453125, 1.9852294921875, 2.11907958984375, 2.2529296875, 2.38677978515625, 2.5206298828125, 2.65447998046875, 2.788330078125, 2.92218017578125, 3.0560302734375, 3.18988037109375, 3.32373046875, 3.45758056640625, 3.5914306640625, 3.72528076171875, 3.859130859375, 3.99298095703125, 4.1268310546875, 4.26068115234375, 4.39453125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 1.0, 4.0, 3.0, 10.0, 8.0, 14.0, 35.0, 28.0, 91.0, 167.0, 407.0, 1142.0, 4367.0, 31229.0, 2000839.0, 2120961.0, 29494.0, 4017.0, 961.0, 259.0, 120.0, 54.0, 29.0, 19.0, 8.0, 8.0, 4.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-32.3125, -31.594482421875, -30.87646484375, -30.158447265625, -29.4404296875, -28.722412109375, -28.00439453125, -27.286376953125, -26.568359375, -25.850341796875, -25.13232421875, -24.414306640625, -23.6962890625, -22.978271484375, -22.26025390625, -21.542236328125, -20.82421875, -20.106201171875, -19.38818359375, -18.670166015625, -17.9521484375, -17.234130859375, -16.51611328125, -15.798095703125, -15.080078125, -14.362060546875, -13.64404296875, -12.926025390625, -12.2080078125, -11.489990234375, -10.77197265625, -10.053955078125, -9.3359375, -8.617919921875, -7.89990234375, -7.181884765625, -6.4638671875, -5.745849609375, -5.02783203125, -4.309814453125, -3.591796875, -2.873779296875, -2.15576171875, -1.437744140625, -0.7197265625, -0.001708984375, 0.71630859375, 1.434326171875, 2.15234375, 2.870361328125, 3.58837890625, 4.306396484375, 5.0244140625, 5.742431640625, 6.46044921875, 7.178466796875, 7.896484375, 8.614501953125, 9.33251953125, 10.050537109375, 10.7685546875, 11.486572265625, 12.20458984375, 12.922607421875, 13.640625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 4.0, 5.0, 9.0, 9.0, 15.0, 10.0, 27.0, 34.0, 40.0, 42.0, 80.0, 108.0, 163.0, 326.0, 758.0, 1215.0, 543.0, 246.0, 137.0, 89.0, 57.0, 38.0, 30.0, 23.0, 14.0, 10.0, 11.0, 7.0, 6.0, 4.0, 5.0, 4.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-15.8359375, -15.452880859375, -15.06982421875, -14.686767578125, -14.3037109375, -13.920654296875, -13.53759765625, -13.154541015625, -12.771484375, -12.388427734375, -12.00537109375, -11.622314453125, -11.2392578125, -10.856201171875, -10.47314453125, -10.090087890625, -9.70703125, -9.323974609375, -8.94091796875, -8.557861328125, -8.1748046875, -7.791748046875, -7.40869140625, -7.025634765625, -6.642578125, -6.259521484375, -5.87646484375, -5.493408203125, -5.1103515625, -4.727294921875, -4.34423828125, -3.961181640625, -3.578125, -3.195068359375, -2.81201171875, -2.428955078125, -2.0458984375, -1.662841796875, -1.27978515625, -0.896728515625, -0.513671875, -0.130615234375, 0.25244140625, 0.635498046875, 1.0185546875, 1.401611328125, 1.78466796875, 2.167724609375, 2.55078125, 2.933837890625, 3.31689453125, 3.699951171875, 4.0830078125, 4.466064453125, 4.84912109375, 5.232177734375, 5.615234375, 5.998291015625, 6.38134765625, 6.764404296875, 7.1474609375, 7.530517578125, 7.91357421875, 8.296630859375, 8.6796875]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 12.0, 16.0, 66.0, 300.0, 413.0, 138.0, 30.0, 14.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-406.7867736816406, -398.69659423828125, -390.60638427734375, -382.5162048339844, -374.426025390625, -366.3358154296875, -358.2456359863281, -350.1554260253906, -342.06524658203125, -333.9750671386719, -325.8848571777344, -317.794677734375, -309.7044677734375, -301.6142883300781, -293.52410888671875, -285.43389892578125, -277.3437194824219, -269.2535400390625, -261.163330078125, -253.07315063476562, -244.9829559326172, -236.89276123046875, -228.80258178710938, -220.71238708496094, -212.6221923828125, -204.53199768066406, -196.44180297851562, -188.35162353515625, -180.2614288330078, -172.17123413085938, -164.0810546875, -155.99085998535156, -147.9006805419922, -139.81048583984375, -131.72030639648438, -123.63011169433594, -115.5399169921875, -107.44972229003906, -99.35953521728516, -91.26934814453125, -83.17915344238281, -75.08895874023438, -66.99877166748047, -58.9085807800293, -50.818389892578125, -42.72819900512695, -34.63800811767578, -26.54781723022461, -18.457626342773438, -10.367435455322266, -2.2772445678710938, 5.812946319580078, 13.90313720703125, 21.993328094482422, 30.083518981933594, 38.173709869384766, 46.26390075683594, 54.35409164428711, 62.44428253173828, 70.53446960449219, 78.62466430664062, 86.71485900878906, 94.80504608154297, 102.89523315429688, 110.98542785644531]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 2.0, 2.0, 5.0, 14.0, 10.0, 12.0, 11.0, 13.0, 21.0, 20.0, 20.0, 33.0, 32.0, 44.0, 37.0, 46.0, 41.0, 49.0, 51.0, 47.0, 49.0, 45.0, 45.0, 41.0, 45.0, 44.0, 35.0, 32.0, 31.0, 26.0, 20.0, 17.0, 15.0, 8.0, 8.0, 8.0, 2.0, 7.0, 2.0, 4.0, 4.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.77496337890625, -45.157508850097656, -43.54005432128906, -41.92259979248047, -40.305145263671875, -38.68769073486328, -37.07023620605469, -35.452781677246094, -33.8353271484375, -32.217872619628906, -30.600418090820312, -28.98296356201172, -27.365509033203125, -25.74805450439453, -24.13060188293457, -22.513147354125977, -20.895694732666016, -19.278240203857422, -17.660785675048828, -16.043331146240234, -14.425877571105957, -12.808423042297363, -11.190969467163086, -9.573514938354492, -7.956060409545898, -6.338605880737305, -4.721151828765869, -3.1036977767944336, -1.4862432479858398, 0.1312112808227539, 1.7486648559570312, 3.366119384765625, 4.983573913574219, 6.6010284423828125, 8.218482971191406, 9.835936546325684, 11.453391075134277, 13.070845603942871, 14.688299179077148, 16.305753707885742, 17.923208236694336, 19.54066276550293, 21.158117294311523, 22.775569915771484, 24.393024444580078, 26.010478973388672, 27.627933502197266, 29.24538803100586, 30.862842559814453, 32.48029708862305, 34.09775161743164, 35.715206146240234, 37.33266067504883, 38.95011520385742, 40.56756591796875, 42.185020446777344, 43.80247497558594, 45.41992950439453, 47.037384033203125, 48.65483856201172, 50.27229309082031, 51.889747619628906, 53.5072021484375, 55.124656677246094, 56.74211120605469]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 7.0, 5.0, 14.0, 10.0, 25.0, 26.0, 48.0, 63.0, 89.0, 185.0, 266.0, 421.0, 682.0, 1211.0, 2424.0, 5150.0, 11225.0, 26893.0, 68002.0, 171647.0, 351085.0, 242338.0, 98458.0, 38667.0, 15846.0, 6834.0, 3097.0, 1587.0, 851.0, 514.0, 312.0, 230.0, 128.0, 68.0, 54.0, 36.0, 29.0, 10.0, 4.0, 5.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.9140625, -8.6138916015625, -8.313720703125, -8.0135498046875, -7.71337890625, -7.4132080078125, -7.113037109375, -6.8128662109375, -6.5126953125, -6.2125244140625, -5.912353515625, -5.6121826171875, -5.31201171875, -5.0118408203125, -4.711669921875, -4.4114990234375, -4.111328125, -3.8111572265625, -3.510986328125, -3.2108154296875, -2.91064453125, -2.6104736328125, -2.310302734375, -2.0101318359375, -1.7099609375, -1.4097900390625, -1.109619140625, -0.8094482421875, -0.50927734375, -0.2091064453125, 0.091064453125, 0.3912353515625, 0.69140625, 0.9915771484375, 1.291748046875, 1.5919189453125, 1.89208984375, 2.1922607421875, 2.492431640625, 2.7926025390625, 3.0927734375, 3.3929443359375, 3.693115234375, 3.9932861328125, 4.29345703125, 4.5936279296875, 4.893798828125, 5.1939697265625, 5.494140625, 5.7943115234375, 6.094482421875, 6.3946533203125, 6.69482421875, 6.9949951171875, 7.295166015625, 7.5953369140625, 7.8955078125, 8.1956787109375, 8.495849609375, 8.7960205078125, 9.09619140625, 9.3963623046875, 9.696533203125, 9.9967041015625, 10.296875]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 5.0, 2.0, 6.0, 5.0, 4.0, 5.0, 5.0, 8.0, 11.0, 12.0, 12.0, 22.0, 14.0, 29.0, 30.0, 34.0, 30.0, 40.0, 40.0, 41.0, 33.0, 41.0, 47.0, 43.0, 57.0, 46.0, 41.0, 47.0, 24.0, 32.0, 37.0, 38.0, 28.0, 26.0, 19.0, 15.0, 14.0, 9.0, 11.0, 12.0, 7.0, 4.0, 8.0, 2.0, 3.0, 6.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.12890625, -4.005157470703125, -3.88140869140625, -3.757659912109375, -3.6339111328125, -3.510162353515625, -3.38641357421875, -3.262664794921875, -3.138916015625, -3.015167236328125, -2.89141845703125, -2.767669677734375, -2.6439208984375, -2.520172119140625, -2.39642333984375, -2.272674560546875, -2.14892578125, -2.025177001953125, -1.90142822265625, -1.777679443359375, -1.6539306640625, -1.530181884765625, -1.40643310546875, -1.282684326171875, -1.158935546875, -1.035186767578125, -0.91143798828125, -0.787689208984375, -0.6639404296875, -0.540191650390625, -0.41644287109375, -0.292694091796875, -0.1689453125, -0.045196533203125, 0.07855224609375, 0.202301025390625, 0.3260498046875, 0.449798583984375, 0.57354736328125, 0.697296142578125, 0.821044921875, 0.944793701171875, 1.06854248046875, 1.192291259765625, 1.3160400390625, 1.439788818359375, 1.56353759765625, 1.687286376953125, 1.81103515625, 1.934783935546875, 2.05853271484375, 2.182281494140625, 2.3060302734375, 2.429779052734375, 2.55352783203125, 2.677276611328125, 2.801025390625, 2.924774169921875, 3.04852294921875, 3.172271728515625, 3.2960205078125, 3.419769287109375, 3.54351806640625, 3.667266845703125, 3.791015625]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 6.0, 8.0, 3.0, 8.0, 12.0, 22.0, 29.0, 48.0, 63.0, 121.0, 200.0, 378.0, 741.0, 1612.0, 4729.0, 26825.0, 419419.0, 556449.0, 29531.0, 5057.0, 1639.0, 750.0, 408.0, 214.0, 106.0, 67.0, 33.0, 31.0, 14.0, 10.0, 14.0, 6.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.703125, -24.900390625, -24.09765625, -23.294921875, -22.4921875, -21.689453125, -20.88671875, -20.083984375, -19.28125, -18.478515625, -17.67578125, -16.873046875, -16.0703125, -15.267578125, -14.46484375, -13.662109375, -12.859375, -12.056640625, -11.25390625, -10.451171875, -9.6484375, -8.845703125, -8.04296875, -7.240234375, -6.4375, -5.634765625, -4.83203125, -4.029296875, -3.2265625, -2.423828125, -1.62109375, -0.818359375, -0.015625, 0.787109375, 1.58984375, 2.392578125, 3.1953125, 3.998046875, 4.80078125, 5.603515625, 6.40625, 7.208984375, 8.01171875, 8.814453125, 9.6171875, 10.419921875, 11.22265625, 12.025390625, 12.828125, 13.630859375, 14.43359375, 15.236328125, 16.0390625, 16.841796875, 17.64453125, 18.447265625, 19.25, 20.052734375, 20.85546875, 21.658203125, 22.4609375, 23.263671875, 24.06640625, 24.869140625, 25.671875]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 3.0, 5.0, 4.0, 4.0, 9.0, 14.0, 8.0, 15.0, 23.0, 28.0, 28.0, 31.0, 27.0, 41.0, 48.0, 45.0, 41.0, 44.0, 53.0, 41.0, 53.0, 45.0, 53.0, 60.0, 35.0, 34.0, 28.0, 23.0, 36.0, 27.0, 19.0, 12.0, 20.0, 12.0, 13.0, 10.0, 6.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.609375, -18.995361328125, -18.38134765625, -17.767333984375, -17.1533203125, -16.539306640625, -15.92529296875, -15.311279296875, -14.697265625, -14.083251953125, -13.46923828125, -12.855224609375, -12.2412109375, -11.627197265625, -11.01318359375, -10.399169921875, -9.78515625, -9.171142578125, -8.55712890625, -7.943115234375, -7.3291015625, -6.715087890625, -6.10107421875, -5.487060546875, -4.873046875, -4.259033203125, -3.64501953125, -3.031005859375, -2.4169921875, -1.802978515625, -1.18896484375, -0.574951171875, 0.0390625, 0.653076171875, 1.26708984375, 1.881103515625, 2.4951171875, 3.109130859375, 3.72314453125, 4.337158203125, 4.951171875, 5.565185546875, 6.17919921875, 6.793212890625, 7.4072265625, 8.021240234375, 8.63525390625, 9.249267578125, 9.86328125, 10.477294921875, 11.09130859375, 11.705322265625, 12.3193359375, 12.933349609375, 13.54736328125, 14.161376953125, 14.775390625, 15.389404296875, 16.00341796875, 16.617431640625, 17.2314453125, 17.845458984375, 18.45947265625, 19.073486328125, 19.6875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 3.0, 7.0, 2.0, 9.0, 10.0, 8.0, 15.0, 17.0, 30.0, 52.0, 90.0, 106.0, 290.0, 640.0, 1840.0, 7457.0, 129662.0, 887226.0, 16367.0, 2921.0, 947.0, 375.0, 168.0, 103.0, 70.0, 42.0, 25.0, 21.0, 13.0, 9.0, 10.0, 9.0, 2.0, 5.0, 4.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.03125, -18.440185546875, -17.84912109375, -17.258056640625, -16.6669921875, -16.075927734375, -15.48486328125, -14.893798828125, -14.302734375, -13.711669921875, -13.12060546875, -12.529541015625, -11.9384765625, -11.347412109375, -10.75634765625, -10.165283203125, -9.57421875, -8.983154296875, -8.39208984375, -7.801025390625, -7.2099609375, -6.618896484375, -6.02783203125, -5.436767578125, -4.845703125, -4.254638671875, -3.66357421875, -3.072509765625, -2.4814453125, -1.890380859375, -1.29931640625, -0.708251953125, -0.1171875, 0.473876953125, 1.06494140625, 1.656005859375, 2.2470703125, 2.838134765625, 3.42919921875, 4.020263671875, 4.611328125, 5.202392578125, 5.79345703125, 6.384521484375, 6.9755859375, 7.566650390625, 8.15771484375, 8.748779296875, 9.33984375, 9.930908203125, 10.52197265625, 11.113037109375, 11.7041015625, 12.295166015625, 12.88623046875, 13.477294921875, 14.068359375, 14.659423828125, 15.25048828125, 15.841552734375, 16.4326171875, 17.023681640625, 17.61474609375, 18.205810546875, 18.796875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 8.0, 14.0, 20.0, 24.0, 54.0, 142.0, 261.0, 260.0, 98.0, 55.0, 20.0, 11.0, 11.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004344940185546875, -0.004204094409942627, -0.004063248634338379, -0.003922402858734131, -0.003781557083129883, -0.0036407113075256348, -0.0034998655319213867, -0.0033590197563171387, -0.0032181739807128906, -0.0030773282051086426, -0.0029364824295043945, -0.0027956366539001465, -0.0026547908782958984, -0.0025139451026916504, -0.0023730993270874023, -0.0022322535514831543, -0.0020914077758789062, -0.0019505620002746582, -0.0018097162246704102, -0.0016688704490661621, -0.001528024673461914, -0.001387178897857666, -0.001246333122253418, -0.00110548734664917, -0.0009646415710449219, -0.0008237957954406738, -0.0006829500198364258, -0.0005421042442321777, -0.0004012584686279297, -0.00026041269302368164, -0.0001195669174194336, 2.1278858184814453e-05, 0.0001621246337890625, 0.00030297040939331055, 0.0004438161849975586, 0.0005846619606018066, 0.0007255077362060547, 0.0008663535118103027, 0.0010071992874145508, 0.0011480450630187988, 0.0012888908386230469, 0.001429736614227295, 0.001570582389831543, 0.001711428165435791, 0.001852273941040039, 0.001993119716644287, 0.002133965492248535, 0.002274811267852783, 0.0024156570434570312, 0.0025565028190612793, 0.0026973485946655273, 0.0028381943702697754, 0.0029790401458740234, 0.0031198859214782715, 0.0032607316970825195, 0.0034015774726867676, 0.0035424232482910156, 0.0036832690238952637, 0.0038241147994995117, 0.00396496057510376, 0.004105806350708008, 0.004246652126312256, 0.004387497901916504, 0.004528343677520752, 0.004669189453125]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 7.0, 8.0, 6.0, 14.0, 15.0, 22.0, 29.0, 46.0, 63.0, 122.0, 263.0, 605.0, 1598.0, 5463.0, 112707.0, 914857.0, 9119.0, 2192.0, 754.0, 272.0, 157.0, 77.0, 62.0, 27.0, 22.0, 10.0, 6.0, 7.0, 7.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-33.8125, -32.90234375, -31.9921875, -31.08203125, -30.171875, -29.26171875, -28.3515625, -27.44140625, -26.53125, -25.62109375, -24.7109375, -23.80078125, -22.890625, -21.98046875, -21.0703125, -20.16015625, -19.25, -18.33984375, -17.4296875, -16.51953125, -15.609375, -14.69921875, -13.7890625, -12.87890625, -11.96875, -11.05859375, -10.1484375, -9.23828125, -8.328125, -7.41796875, -6.5078125, -5.59765625, -4.6875, -3.77734375, -2.8671875, -1.95703125, -1.046875, -0.13671875, 0.7734375, 1.68359375, 2.59375, 3.50390625, 4.4140625, 5.32421875, 6.234375, 7.14453125, 8.0546875, 8.96484375, 9.875, 10.78515625, 11.6953125, 12.60546875, 13.515625, 14.42578125, 15.3359375, 16.24609375, 17.15625, 18.06640625, 18.9765625, 19.88671875, 20.796875, 21.70703125, 22.6171875, 23.52734375, 24.4375]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 6.0, 4.0, 6.0, 6.0, 7.0, 5.0, 14.0, 34.0, 73.0, 154.0, 272.0, 223.0, 106.0, 47.0, 19.0, 10.0, 7.0, 3.0, 2.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-24.25, -23.625732421875, -23.00146484375, -22.377197265625, -21.7529296875, -21.128662109375, -20.50439453125, -19.880126953125, -19.255859375, -18.631591796875, -18.00732421875, -17.383056640625, -16.7587890625, -16.134521484375, -15.51025390625, -14.885986328125, -14.26171875, -13.637451171875, -13.01318359375, -12.388916015625, -11.7646484375, -11.140380859375, -10.51611328125, -9.891845703125, -9.267578125, -8.643310546875, -8.01904296875, -7.394775390625, -6.7705078125, -6.146240234375, -5.52197265625, -4.897705078125, -4.2734375, -3.649169921875, -3.02490234375, -2.400634765625, -1.7763671875, -1.152099609375, -0.52783203125, 0.096435546875, 0.720703125, 1.344970703125, 1.96923828125, 2.593505859375, 3.2177734375, 3.842041015625, 4.46630859375, 5.090576171875, 5.71484375, 6.339111328125, 6.96337890625, 7.587646484375, 8.2119140625, 8.836181640625, 9.46044921875, 10.084716796875, 10.708984375, 11.333251953125, 11.95751953125, 12.581787109375, 13.2060546875, 13.830322265625, 14.45458984375, 15.078857421875, 15.703125]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 8.0, 17.0, 45.0, 155.0, 408.0, 234.0, 79.0, 24.0, 14.0, 7.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-273.2576599121094, -264.6259460449219, -255.9942169189453, -247.36248779296875, -238.73077392578125, -230.0990447998047, -221.46731567382812, -212.83560180664062, -204.20387268066406, -195.5721435546875, -186.9404296875, -178.30870056152344, -169.67697143554688, -161.04525756835938, -152.4135284423828, -143.78179931640625, -135.15008544921875, -126.51836395263672, -117.88664245605469, -109.25491333007812, -100.6231918334961, -91.99147033691406, -83.3597412109375, -74.72801971435547, -66.09629821777344, -57.464576721191406, -48.83285140991211, -40.20112609863281, -31.56940460205078, -22.93768310546875, -14.305957794189453, -5.674232482910156, 2.957489013671875, 11.589212417602539, 20.220935821533203, 28.852659225463867, 37.48438262939453, 46.11610412597656, 54.74782943725586, 63.379554748535156, 72.01127624511719, 80.64299774169922, 89.27471923828125, 97.90644836425781, 106.53816986083984, 115.16989135742188, 123.80162048339844, 132.433349609375, 141.0650634765625, 149.69679260253906, 158.32850646972656, 166.96023559570312, 175.59194946289062, 184.2236785888672, 192.85540771484375, 201.48712158203125, 210.1188507080078, 218.75057983398438, 227.38229370117188, 236.01402282714844, 244.645751953125, 253.2774658203125, 261.9091796875, 270.5409240722656, 279.1726379394531]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 3.0, 6.0, 6.0, 6.0, 7.0, 9.0, 14.0, 14.0, 17.0, 22.0, 26.0, 27.0, 44.0, 40.0, 44.0, 49.0, 51.0, 74.0, 85.0, 72.0, 46.0, 66.0, 36.0, 38.0, 29.0, 26.0, 24.0, 27.0, 23.0, 15.0, 9.0, 8.0, 12.0, 8.0, 5.0, 8.0, 4.0, 0.0, 3.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-78.81991577148438, -76.06816864013672, -73.31642150878906, -70.5646743774414, -67.81292724609375, -65.0611801147461, -62.30943298339844, -59.55768585205078, -56.805938720703125, -54.05419158935547, -51.30244445800781, -48.550697326660156, -45.7989501953125, -43.047203063964844, -40.29545593261719, -37.54370880126953, -34.79196548461914, -32.040218353271484, -29.288471221923828, -26.536724090576172, -23.784976959228516, -21.03322982788086, -18.281484603881836, -15.52973747253418, -12.777990341186523, -10.026243209838867, -7.274496555328369, -4.522749900817871, -1.7710027694702148, 0.9807443618774414, 3.7324905395507812, 6.4842376708984375, 9.235984802246094, 11.98773193359375, 14.739479064941406, 17.491226196289062, 20.24297332763672, 22.994720458984375, 25.7464656829834, 28.498212814331055, 31.24995994567871, 34.001705169677734, 36.75345230102539, 39.50519943237305, 42.2569465637207, 45.00869369506836, 47.760440826416016, 50.51218795776367, 53.26393508911133, 56.015682220458984, 58.76742935180664, 61.5191764831543, 64.27091979980469, 67.02266693115234, 69.7744140625, 72.52616119384766, 75.27790832519531, 78.02965545654297, 80.78140258789062, 83.53314971923828, 86.28489685058594, 89.0366439819336, 91.78839111328125, 94.5401382446289, 97.29188537597656]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 7.0, 8.0, 15.0, 28.0, 50.0, 127.0, 259.0, 958.0, 4574.0, 116206.0, 4040094.0, 28837.0, 2161.0, 571.0, 212.0, 76.0, 35.0, 19.0, 14.0, 10.0, 4.0, 6.0, 1.0, 7.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.078125, -19.218017578125, -18.35791015625, -17.497802734375, -16.6376953125, -15.777587890625, -14.91748046875, -14.057373046875, -13.197265625, -12.337158203125, -11.47705078125, -10.616943359375, -9.7568359375, -8.896728515625, -8.03662109375, -7.176513671875, -6.31640625, -5.456298828125, -4.59619140625, -3.736083984375, -2.8759765625, -2.015869140625, -1.15576171875, -0.295654296875, 0.564453125, 1.424560546875, 2.28466796875, 3.144775390625, 4.0048828125, 4.864990234375, 5.72509765625, 6.585205078125, 7.4453125, 8.305419921875, 9.16552734375, 10.025634765625, 10.8857421875, 11.745849609375, 12.60595703125, 13.466064453125, 14.326171875, 15.186279296875, 16.04638671875, 16.906494140625, 17.7666015625, 18.626708984375, 19.48681640625, 20.346923828125, 21.20703125, 22.067138671875, 22.92724609375, 23.787353515625, 24.6474609375, 25.507568359375, 26.36767578125, 27.227783203125, 28.087890625, 28.947998046875, 29.80810546875, 30.668212890625, 31.5283203125, 32.388427734375, 33.24853515625, 34.108642578125, 34.96875]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 6.0, 3.0, 6.0, 9.0, 13.0, 14.0, 22.0, 35.0, 53.0, 56.0, 75.0, 69.0, 80.0, 72.0, 90.0, 82.0, 71.0, 63.0, 56.0, 43.0, 26.0, 28.0, 15.0, 7.0, 10.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.93359375, -6.68377685546875, -6.4339599609375, -6.18414306640625, -5.934326171875, -5.68450927734375, -5.4346923828125, -5.18487548828125, -4.93505859375, -4.68524169921875, -4.4354248046875, -4.18560791015625, -3.935791015625, -3.68597412109375, -3.4361572265625, -3.18634033203125, -2.9365234375, -2.68670654296875, -2.4368896484375, -2.18707275390625, -1.937255859375, -1.68743896484375, -1.4376220703125, -1.18780517578125, -0.93798828125, -0.68817138671875, -0.4383544921875, -0.18853759765625, 0.061279296875, 0.31109619140625, 0.5609130859375, 0.81072998046875, 1.060546875, 1.31036376953125, 1.5601806640625, 1.80999755859375, 2.059814453125, 2.30963134765625, 2.5594482421875, 2.80926513671875, 3.05908203125, 3.30889892578125, 3.5587158203125, 3.80853271484375, 4.058349609375, 4.30816650390625, 4.5579833984375, 4.80780029296875, 5.0576171875, 5.30743408203125, 5.5572509765625, 5.80706787109375, 6.056884765625, 6.30670166015625, 6.5565185546875, 6.80633544921875, 7.05615234375, 7.30596923828125, 7.5557861328125, 7.80560302734375, 8.055419921875, 8.30523681640625, 8.5550537109375, 8.80487060546875, 9.0546875]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 4.0, 9.0, 12.0, 21.0, 46.0, 86.0, 140.0, 286.0, 579.0, 1393.0, 5033.0, 29124.0, 751532.0, 3343946.0, 51407.0, 7324.0, 1909.0, 715.0, 306.0, 165.0, 93.0, 66.0, 36.0, 24.0, 8.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-23.296875, -22.701416015625, -22.10595703125, -21.510498046875, -20.9150390625, -20.319580078125, -19.72412109375, -19.128662109375, -18.533203125, -17.937744140625, -17.34228515625, -16.746826171875, -16.1513671875, -15.555908203125, -14.96044921875, -14.364990234375, -13.76953125, -13.174072265625, -12.57861328125, -11.983154296875, -11.3876953125, -10.792236328125, -10.19677734375, -9.601318359375, -9.005859375, -8.410400390625, -7.81494140625, -7.219482421875, -6.6240234375, -6.028564453125, -5.43310546875, -4.837646484375, -4.2421875, -3.646728515625, -3.05126953125, -2.455810546875, -1.8603515625, -1.264892578125, -0.66943359375, -0.073974609375, 0.521484375, 1.116943359375, 1.71240234375, 2.307861328125, 2.9033203125, 3.498779296875, 4.09423828125, 4.689697265625, 5.28515625, 5.880615234375, 6.47607421875, 7.071533203125, 7.6669921875, 8.262451171875, 8.85791015625, 9.453369140625, 10.048828125, 10.644287109375, 11.23974609375, 11.835205078125, 12.4306640625, 13.026123046875, 13.62158203125, 14.217041015625, 14.8125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 5.0, 6.0, 11.0, 8.0, 24.0, 21.0, 38.0, 65.0, 76.0, 151.0, 304.0, 801.0, 1280.0, 664.0, 263.0, 131.0, 66.0, 45.0, 30.0, 26.0, 21.0, 16.0, 7.0, 3.0, 1.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.0234375, -14.55908203125, -14.0947265625, -13.63037109375, -13.166015625, -12.70166015625, -12.2373046875, -11.77294921875, -11.30859375, -10.84423828125, -10.3798828125, -9.91552734375, -9.451171875, -8.98681640625, -8.5224609375, -8.05810546875, -7.59375, -7.12939453125, -6.6650390625, -6.20068359375, -5.736328125, -5.27197265625, -4.8076171875, -4.34326171875, -3.87890625, -3.41455078125, -2.9501953125, -2.48583984375, -2.021484375, -1.55712890625, -1.0927734375, -0.62841796875, -0.1640625, 0.30029296875, 0.7646484375, 1.22900390625, 1.693359375, 2.15771484375, 2.6220703125, 3.08642578125, 3.55078125, 4.01513671875, 4.4794921875, 4.94384765625, 5.408203125, 5.87255859375, 6.3369140625, 6.80126953125, 7.265625, 7.72998046875, 8.1943359375, 8.65869140625, 9.123046875, 9.58740234375, 10.0517578125, 10.51611328125, 10.98046875, 11.44482421875, 11.9091796875, 12.37353515625, 12.837890625, 13.30224609375, 13.7666015625, 14.23095703125, 14.6953125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 8.0, 8.0, 13.0, 34.0, 69.0, 137.0, 246.0, 238.0, 134.0, 59.0, 29.0, 11.0, 10.0, 7.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-122.30014038085938, -117.5668716430664, -112.8335952758789, -108.10032653808594, -103.36705017089844, -98.63378143310547, -93.9005126953125, -89.167236328125, -84.43396759033203, -79.70069885253906, -74.96742248535156, -70.2341537475586, -65.50088500976562, -60.767608642578125, -56.034339904785156, -51.30106735229492, -46.56779479980469, -41.83452224731445, -37.10124969482422, -32.36798095703125, -27.634708404541016, -22.90143585205078, -18.16816520690918, -13.434894561767578, -8.701622009277344, -3.968350410461426, 0.7649211883544922, 5.49819278717041, 10.231464385986328, 14.964736938476562, 19.698007583618164, 24.431278228759766, 29.16455078125, 33.897823333740234, 38.63109588623047, 43.36436462402344, 48.09763717651367, 52.830909729003906, 57.564178466796875, 62.29745101928711, 67.03072357177734, 71.76399230957031, 76.49726867675781, 81.23053741455078, 85.96380615234375, 90.69708251953125, 95.43035125732422, 100.16361999511719, 104.89689636230469, 109.63016510009766, 114.36344146728516, 119.09671020507812, 123.82998657226562, 128.56326293945312, 133.29652404785156, 138.02980041503906, 142.7630615234375, 147.496337890625, 152.22959899902344, 156.96287536621094, 161.69615173339844, 166.42941284179688, 171.16268920898438, 175.89596557617188, 180.62924194335938]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 6.0, 7.0, 12.0, 7.0, 18.0, 14.0, 28.0, 24.0, 23.0, 21.0, 37.0, 37.0, 36.0, 41.0, 48.0, 49.0, 42.0, 57.0, 48.0, 53.0, 54.0, 35.0, 46.0, 37.0, 38.0, 29.0, 42.0, 24.0, 17.0, 10.0, 10.0, 13.0, 9.0, 5.0, 9.0, 8.0, 2.0, 5.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.42506408691406, -58.70603942871094, -56.98701095581055, -55.267982482910156, -53.54895782470703, -51.829933166503906, -50.110904693603516, -48.391876220703125, -46.6728515625, -44.953826904296875, -43.234798431396484, -41.515769958496094, -39.79674530029297, -38.077720642089844, -36.35869216918945, -34.63966369628906, -32.92063903808594, -31.20161247253418, -29.482585906982422, -27.763559341430664, -26.044532775878906, -24.32550621032715, -22.60647964477539, -20.887453079223633, -19.168426513671875, -17.449399948120117, -15.73037338256836, -14.011346817016602, -12.292320251464844, -10.573293685913086, -8.854267120361328, -7.13524055480957, -5.416217803955078, -3.6971912384033203, -1.9781646728515625, -0.2591381072998047, 1.4598884582519531, 3.178915023803711, 4.897941589355469, 6.616968154907227, 8.335994720458984, 10.055021286010742, 11.7740478515625, 13.493074417114258, 15.212100982666016, 16.931127548217773, 18.65015411376953, 20.36918067932129, 22.088207244873047, 23.807233810424805, 25.526260375976562, 27.24528694152832, 28.964313507080078, 30.683340072631836, 32.402366638183594, 34.12139129638672, 35.84041976928711, 37.5594482421875, 39.278472900390625, 40.99749755859375, 42.71652603149414, 44.43555450439453, 46.154579162597656, 47.87360382080078, 49.59263229370117]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 6.0, 5.0, 4.0, 8.0, 11.0, 11.0, 19.0, 31.0, 42.0, 67.0, 133.0, 189.0, 356.0, 871.0, 2378.0, 8041.0, 35562.0, 198531.0, 594056.0, 167263.0, 30052.0, 7161.0, 2138.0, 775.0, 354.0, 168.0, 107.0, 70.0, 39.0, 29.0, 26.0, 16.0, 12.0, 3.0, 8.0, 7.0, 4.0, 2.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.5, -15.89892578125, -15.2978515625, -14.69677734375, -14.095703125, -13.49462890625, -12.8935546875, -12.29248046875, -11.69140625, -11.09033203125, -10.4892578125, -9.88818359375, -9.287109375, -8.68603515625, -8.0849609375, -7.48388671875, -6.8828125, -6.28173828125, -5.6806640625, -5.07958984375, -4.478515625, -3.87744140625, -3.2763671875, -2.67529296875, -2.07421875, -1.47314453125, -0.8720703125, -0.27099609375, 0.330078125, 0.93115234375, 1.5322265625, 2.13330078125, 2.734375, 3.33544921875, 3.9365234375, 4.53759765625, 5.138671875, 5.73974609375, 6.3408203125, 6.94189453125, 7.54296875, 8.14404296875, 8.7451171875, 9.34619140625, 9.947265625, 10.54833984375, 11.1494140625, 11.75048828125, 12.3515625, 12.95263671875, 13.5537109375, 14.15478515625, 14.755859375, 15.35693359375, 15.9580078125, 16.55908203125, 17.16015625, 17.76123046875, 18.3623046875, 18.96337890625, 19.564453125, 20.16552734375, 20.7666015625, 21.36767578125, 21.96875]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 2.0, 5.0, 8.0, 7.0, 16.0, 28.0, 37.0, 40.0, 48.0, 56.0, 55.0, 85.0, 91.0, 72.0, 79.0, 78.0, 63.0, 61.0, 46.0, 43.0, 26.0, 24.0, 14.0, 8.0, 9.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.805419921875, -6.55615234375, -6.306884765625, -6.0576171875, -5.808349609375, -5.55908203125, -5.309814453125, -5.060546875, -4.811279296875, -4.56201171875, -4.312744140625, -4.0634765625, -3.814208984375, -3.56494140625, -3.315673828125, -3.06640625, -2.817138671875, -2.56787109375, -2.318603515625, -2.0693359375, -1.820068359375, -1.57080078125, -1.321533203125, -1.072265625, -0.822998046875, -0.57373046875, -0.324462890625, -0.0751953125, 0.174072265625, 0.42333984375, 0.672607421875, 0.921875, 1.171142578125, 1.42041015625, 1.669677734375, 1.9189453125, 2.168212890625, 2.41748046875, 2.666748046875, 2.916015625, 3.165283203125, 3.41455078125, 3.663818359375, 3.9130859375, 4.162353515625, 4.41162109375, 4.660888671875, 4.91015625, 5.159423828125, 5.40869140625, 5.657958984375, 5.9072265625, 6.156494140625, 6.40576171875, 6.655029296875, 6.904296875, 7.153564453125, 7.40283203125, 7.652099609375, 7.9013671875, 8.150634765625, 8.39990234375, 8.649169921875, 8.8984375]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 7.0, 5.0, 7.0, 16.0, 30.0, 44.0, 58.0, 114.0, 221.0, 431.0, 1093.0, 3951.0, 27961.0, 799796.0, 199969.0, 11131.0, 2262.0, 751.0, 331.0, 159.0, 74.0, 47.0, 35.0, 15.0, 13.0, 11.0, 7.0, 3.0, 10.0, 6.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.75, -35.47412109375, -34.1982421875, -32.92236328125, -31.646484375, -30.37060546875, -29.0947265625, -27.81884765625, -26.54296875, -25.26708984375, -23.9912109375, -22.71533203125, -21.439453125, -20.16357421875, -18.8876953125, -17.61181640625, -16.3359375, -15.06005859375, -13.7841796875, -12.50830078125, -11.232421875, -9.95654296875, -8.6806640625, -7.40478515625, -6.12890625, -4.85302734375, -3.5771484375, -2.30126953125, -1.025390625, 0.25048828125, 1.5263671875, 2.80224609375, 4.078125, 5.35400390625, 6.6298828125, 7.90576171875, 9.181640625, 10.45751953125, 11.7333984375, 13.00927734375, 14.28515625, 15.56103515625, 16.8369140625, 18.11279296875, 19.388671875, 20.66455078125, 21.9404296875, 23.21630859375, 24.4921875, 25.76806640625, 27.0439453125, 28.31982421875, 29.595703125, 30.87158203125, 32.1474609375, 33.42333984375, 34.69921875, 35.97509765625, 37.2509765625, 38.52685546875, 39.802734375, 41.07861328125, 42.3544921875, 43.63037109375, 44.90625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 3.0, 5.0, 3.0, 7.0, 9.0, 6.0, 10.0, 12.0, 19.0, 21.0, 39.0, 30.0, 42.0, 49.0, 55.0, 66.0, 62.0, 61.0, 57.0, 64.0, 61.0, 53.0, 58.0, 35.0, 31.0, 29.0, 24.0, 22.0, 20.0, 16.0, 13.0, 2.0, 2.0, 9.0, 1.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-23.21875, -22.426513671875, -21.63427734375, -20.842041015625, -20.0498046875, -19.257568359375, -18.46533203125, -17.673095703125, -16.880859375, -16.088623046875, -15.29638671875, -14.504150390625, -13.7119140625, -12.919677734375, -12.12744140625, -11.335205078125, -10.54296875, -9.750732421875, -8.95849609375, -8.166259765625, -7.3740234375, -6.581787109375, -5.78955078125, -4.997314453125, -4.205078125, -3.412841796875, -2.62060546875, -1.828369140625, -1.0361328125, -0.243896484375, 0.54833984375, 1.340576171875, 2.1328125, 2.925048828125, 3.71728515625, 4.509521484375, 5.3017578125, 6.093994140625, 6.88623046875, 7.678466796875, 8.470703125, 9.262939453125, 10.05517578125, 10.847412109375, 11.6396484375, 12.431884765625, 13.22412109375, 14.016357421875, 14.80859375, 15.600830078125, 16.39306640625, 17.185302734375, 17.9775390625, 18.769775390625, 19.56201171875, 20.354248046875, 21.146484375, 21.938720703125, 22.73095703125, 23.523193359375, 24.3154296875, 25.107666015625, 25.89990234375, 26.692138671875, 27.484375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 8.0, 12.0, 20.0, 27.0, 50.0, 70.0, 146.0, 297.0, 651.0, 1542.0, 4709.0, 23782.0, 810142.0, 189769.0, 12117.0, 3122.0, 1078.0, 483.0, 214.0, 120.0, 65.0, 46.0, 24.0, 19.0, 9.0, 16.0, 5.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.296875, -18.7867431640625, -18.276611328125, -17.7664794921875, -17.25634765625, -16.7462158203125, -16.236083984375, -15.7259521484375, -15.2158203125, -14.7056884765625, -14.195556640625, -13.6854248046875, -13.17529296875, -12.6651611328125, -12.155029296875, -11.6448974609375, -11.134765625, -10.6246337890625, -10.114501953125, -9.6043701171875, -9.09423828125, -8.5841064453125, -8.073974609375, -7.5638427734375, -7.0537109375, -6.5435791015625, -6.033447265625, -5.5233154296875, -5.01318359375, -4.5030517578125, -3.992919921875, -3.4827880859375, -2.97265625, -2.4625244140625, -1.952392578125, -1.4422607421875, -0.93212890625, -0.4219970703125, 0.088134765625, 0.5982666015625, 1.1083984375, 1.6185302734375, 2.128662109375, 2.6387939453125, 3.14892578125, 3.6590576171875, 4.169189453125, 4.6793212890625, 5.189453125, 5.6995849609375, 6.209716796875, 6.7198486328125, 7.22998046875, 7.7401123046875, 8.250244140625, 8.7603759765625, 9.2705078125, 9.7806396484375, 10.290771484375, 10.8009033203125, 11.31103515625, 11.8211669921875, 12.331298828125, 12.8414306640625, 13.3515625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 5.0, 7.0, 9.0, 11.0, 16.0, 18.0, 24.0, 52.0, 99.0, 140.0, 227.0, 159.0, 90.0, 47.0, 26.0, 19.0, 18.0, 12.0, 9.0, 5.0, 1.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004344940185546875, -0.004237711429595947, -0.0041304826736450195, -0.004023253917694092, -0.003916025161743164, -0.0038087964057922363, -0.0037015676498413086, -0.003594338893890381, -0.003487110137939453, -0.0033798813819885254, -0.0032726526260375977, -0.00316542387008667, -0.003058195114135742, -0.0029509663581848145, -0.0028437376022338867, -0.002736508846282959, -0.0026292800903320312, -0.0025220513343811035, -0.0024148225784301758, -0.002307593822479248, -0.0022003650665283203, -0.0020931363105773926, -0.001985907554626465, -0.0018786787986755371, -0.0017714500427246094, -0.0016642212867736816, -0.001556992530822754, -0.0014497637748718262, -0.0013425350189208984, -0.0012353062629699707, -0.001128077507019043, -0.0010208487510681152, -0.0009136199951171875, -0.0008063912391662598, -0.000699162483215332, -0.0005919337272644043, -0.00048470497131347656, -0.00037747621536254883, -0.0002702474594116211, -0.00016301870346069336, -5.5789947509765625e-05, 5.143880844116211e-05, 0.00015866756439208984, 0.0002658963203430176, 0.0003731250762939453, 0.00048035383224487305, 0.0005875825881958008, 0.0006948113441467285, 0.0008020401000976562, 0.000909268856048584, 0.0010164976119995117, 0.0011237263679504395, 0.0012309551239013672, 0.001338183879852295, 0.0014454126358032227, 0.0015526413917541504, 0.0016598701477050781, 0.0017670989036560059, 0.0018743276596069336, 0.0019815564155578613, 0.002088785171508789, 0.002196013927459717, 0.0023032426834106445, 0.0024104714393615723, 0.0025177001953125]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 8.0, 2.0, 6.0, 11.0, 9.0, 20.0, 21.0, 23.0, 33.0, 53.0, 69.0, 90.0, 145.0, 242.0, 371.0, 621.0, 1046.0, 1942.0, 4141.0, 10338.0, 40063.0, 530523.0, 401626.0, 38414.0, 10343.0, 3929.0, 1823.0, 1047.0, 581.0, 368.0, 196.0, 134.0, 102.0, 51.0, 46.0, 37.0, 16.0, 19.0, 9.0, 14.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-10.046875, -9.7423095703125, -9.437744140625, -9.1331787109375, -8.82861328125, -8.5240478515625, -8.219482421875, -7.9149169921875, -7.6103515625, -7.3057861328125, -7.001220703125, -6.6966552734375, -6.39208984375, -6.0875244140625, -5.782958984375, -5.4783935546875, -5.173828125, -4.8692626953125, -4.564697265625, -4.2601318359375, -3.95556640625, -3.6510009765625, -3.346435546875, -3.0418701171875, -2.7373046875, -2.4327392578125, -2.128173828125, -1.8236083984375, -1.51904296875, -1.2144775390625, -0.909912109375, -0.6053466796875, -0.30078125, 0.0037841796875, 0.308349609375, 0.6129150390625, 0.91748046875, 1.2220458984375, 1.526611328125, 1.8311767578125, 2.1357421875, 2.4403076171875, 2.744873046875, 3.0494384765625, 3.35400390625, 3.6585693359375, 3.963134765625, 4.2677001953125, 4.572265625, 4.8768310546875, 5.181396484375, 5.4859619140625, 5.79052734375, 6.0950927734375, 6.399658203125, 6.7042236328125, 7.0087890625, 7.3133544921875, 7.617919921875, 7.9224853515625, 8.22705078125, 8.5316162109375, 8.836181640625, 9.1407470703125, 9.4453125]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 9.0, 6.0, 1.0, 8.0, 9.0, 3.0, 14.0, 18.0, 17.0, 14.0, 22.0, 31.0, 50.0, 74.0, 115.0, 113.0, 111.0, 112.0, 80.0, 57.0, 33.0, 15.0, 19.0, 20.0, 10.0, 10.0, 9.0, 8.0, 7.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-11.6796875, -11.38818359375, -11.0966796875, -10.80517578125, -10.513671875, -10.22216796875, -9.9306640625, -9.63916015625, -9.34765625, -9.05615234375, -8.7646484375, -8.47314453125, -8.181640625, -7.89013671875, -7.5986328125, -7.30712890625, -7.015625, -6.72412109375, -6.4326171875, -6.14111328125, -5.849609375, -5.55810546875, -5.2666015625, -4.97509765625, -4.68359375, -4.39208984375, -4.1005859375, -3.80908203125, -3.517578125, -3.22607421875, -2.9345703125, -2.64306640625, -2.3515625, -2.06005859375, -1.7685546875, -1.47705078125, -1.185546875, -0.89404296875, -0.6025390625, -0.31103515625, -0.01953125, 0.27197265625, 0.5634765625, 0.85498046875, 1.146484375, 1.43798828125, 1.7294921875, 2.02099609375, 2.3125, 2.60400390625, 2.8955078125, 3.18701171875, 3.478515625, 3.77001953125, 4.0615234375, 4.35302734375, 4.64453125, 4.93603515625, 5.2275390625, 5.51904296875, 5.810546875, 6.10205078125, 6.3935546875, 6.68505859375, 6.9765625]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 13.0, 20.0, 76.0, 318.0, 449.0, 91.0, 26.0, 4.0, 6.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-564.4317626953125, -551.3387451171875, -538.2456665039062, -525.1526489257812, -512.0595703125, -498.966552734375, -485.8735046386719, -472.78045654296875, -459.68743896484375, -446.5943908691406, -433.5013427734375, -420.4083251953125, -407.3152770996094, -394.22222900390625, -381.1291809082031, -368.0361328125, -354.9430847167969, -341.85003662109375, -328.7569885253906, -315.6639404296875, -302.5709228515625, -289.4778747558594, -276.38482666015625, -263.2917785644531, -250.19874572753906, -237.10569763183594, -224.01266479492188, -210.91961669921875, -197.82656860351562, -184.73353576660156, -171.64048767089844, -158.54745483398438, -145.45440673828125, -132.36135864257812, -119.26832580566406, -106.17527770996094, -93.08223724365234, -79.98919677734375, -66.89614868164062, -53.80310821533203, -40.71006774902344, -27.61702537536621, -14.523983001708984, -1.430938720703125, 11.662101745605469, 24.755142211914062, 37.84819030761719, 50.94123077392578, 64.03427124023438, 77.12731170654297, 90.22035217285156, 103.31340026855469, 116.40644073486328, 129.49948120117188, 142.592529296875, 155.68557739257812, 168.7786102294922, 181.8716583251953, 194.96469116210938, 208.0577392578125, 221.15078735351562, 234.2438201904297, 247.3368682861328, 260.4299011230469, 273.52294921875]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 7.0, 1.0, 6.0, 7.0, 5.0, 7.0, 5.0, 11.0, 14.0, 20.0, 20.0, 23.0, 23.0, 23.0, 27.0, 22.0, 28.0, 37.0, 54.0, 65.0, 78.0, 81.0, 68.0, 47.0, 53.0, 45.0, 33.0, 21.0, 29.0, 25.0, 21.0, 19.0, 11.0, 15.0, 15.0, 11.0, 9.0, 9.0, 6.0, 6.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.65325927734375, -88.95291900634766, -86.25257873535156, -83.55223846435547, -80.85189819335938, -78.15155792236328, -75.45121765136719, -72.7508773803711, -70.050537109375, -67.3501968383789, -64.64985656738281, -61.94951629638672, -59.249176025390625, -56.54883575439453, -53.84849548339844, -51.148155212402344, -48.44781494140625, -45.747474670410156, -43.04713439941406, -40.34679412841797, -37.646453857421875, -34.94611358642578, -32.24577331542969, -29.545433044433594, -26.8450927734375, -24.144752502441406, -21.444412231445312, -18.74407196044922, -16.043731689453125, -13.343391418457031, -10.643051147460938, -7.942710876464844, -5.242378234863281, -2.5420379638671875, 0.15830230712890625, 2.858642578125, 5.558982849121094, 8.259323120117188, 10.959663391113281, 13.660003662109375, 16.36034393310547, 19.060684204101562, 21.761024475097656, 24.46136474609375, 27.161705017089844, 29.862045288085938, 32.56238555908203, 35.262725830078125, 37.96306610107422, 40.66340637207031, 43.363746643066406, 46.0640869140625, 48.764427185058594, 51.46476745605469, 54.16510772705078, 56.865447998046875, 59.56578826904297, 62.26612854003906, 64.96646881103516, 67.66680908203125, 70.36714935302734, 73.06748962402344, 75.76782989501953, 78.46817016601562, 81.16851043701172]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 6.0, 3.0, 8.0, 9.0, 13.0, 32.0, 35.0, 69.0, 132.0, 378.0, 1319.0, 6963.0, 1256620.0, 2918526.0, 8186.0, 1314.0, 319.0, 159.0, 70.0, 43.0, 21.0, 13.0, 12.0, 7.0, 11.0, 5.0, 2.0, 2.0, 2.0, 0.0, 3.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-26.078125, -24.993408203125, -23.90869140625, -22.823974609375, -21.7392578125, -20.654541015625, -19.56982421875, -18.485107421875, -17.400390625, -16.315673828125, -15.23095703125, -14.146240234375, -13.0615234375, -11.976806640625, -10.89208984375, -9.807373046875, -8.72265625, -7.637939453125, -6.55322265625, -5.468505859375, -4.3837890625, -3.299072265625, -2.21435546875, -1.129638671875, -0.044921875, 1.039794921875, 2.12451171875, 3.209228515625, 4.2939453125, 5.378662109375, 6.46337890625, 7.548095703125, 8.6328125, 9.717529296875, 10.80224609375, 11.886962890625, 12.9716796875, 14.056396484375, 15.14111328125, 16.225830078125, 17.310546875, 18.395263671875, 19.47998046875, 20.564697265625, 21.6494140625, 22.734130859375, 23.81884765625, 24.903564453125, 25.98828125, 27.072998046875, 28.15771484375, 29.242431640625, 30.3271484375, 31.411865234375, 32.49658203125, 33.581298828125, 34.666015625, 35.750732421875, 36.83544921875, 37.920166015625, 39.0048828125, 40.089599609375, 41.17431640625, 42.259033203125, 43.34375]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 9.0, 4.0, 5.0, 9.0, 15.0, 27.0, 47.0, 41.0, 41.0, 72.0, 82.0, 83.0, 77.0, 77.0, 87.0, 88.0, 63.0, 55.0, 34.0, 38.0, 20.0, 16.0, 9.0, 7.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.25390625, -6.99322509765625, -6.7325439453125, -6.47186279296875, -6.211181640625, -5.95050048828125, -5.6898193359375, -5.42913818359375, -5.16845703125, -4.90777587890625, -4.6470947265625, -4.38641357421875, -4.125732421875, -3.86505126953125, -3.6043701171875, -3.34368896484375, -3.0830078125, -2.82232666015625, -2.5616455078125, -2.30096435546875, -2.040283203125, -1.77960205078125, -1.5189208984375, -1.25823974609375, -0.99755859375, -0.73687744140625, -0.4761962890625, -0.21551513671875, 0.045166015625, 0.30584716796875, 0.5665283203125, 0.82720947265625, 1.087890625, 1.34857177734375, 1.6092529296875, 1.86993408203125, 2.130615234375, 2.39129638671875, 2.6519775390625, 2.91265869140625, 3.17333984375, 3.43402099609375, 3.6947021484375, 3.95538330078125, 4.216064453125, 4.47674560546875, 4.7374267578125, 4.99810791015625, 5.2587890625, 5.51947021484375, 5.7801513671875, 6.04083251953125, 6.301513671875, 6.56219482421875, 6.8228759765625, 7.08355712890625, 7.34423828125, 7.60491943359375, 7.8656005859375, 8.12628173828125, 8.386962890625, 8.64764404296875, 8.9083251953125, 9.16900634765625, 9.4296875]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 6.0, 2.0, 8.0, 5.0, 14.0, 17.0, 21.0, 49.0, 95.0, 169.0, 331.0, 851.0, 3665.0, 53112.0, 4115424.0, 17222.0, 2155.0, 620.0, 260.0, 126.0, 67.0, 28.0, 18.0, 12.0, 12.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.65625, -48.02099609375, -46.3857421875, -44.75048828125, -43.115234375, -41.47998046875, -39.8447265625, -38.20947265625, -36.57421875, -34.93896484375, -33.3037109375, -31.66845703125, -30.033203125, -28.39794921875, -26.7626953125, -25.12744140625, -23.4921875, -21.85693359375, -20.2216796875, -18.58642578125, -16.951171875, -15.31591796875, -13.6806640625, -12.04541015625, -10.41015625, -8.77490234375, -7.1396484375, -5.50439453125, -3.869140625, -2.23388671875, -0.5986328125, 1.03662109375, 2.671875, 4.30712890625, 5.9423828125, 7.57763671875, 9.212890625, 10.84814453125, 12.4833984375, 14.11865234375, 15.75390625, 17.38916015625, 19.0244140625, 20.65966796875, 22.294921875, 23.93017578125, 25.5654296875, 27.20068359375, 28.8359375, 30.47119140625, 32.1064453125, 33.74169921875, 35.376953125, 37.01220703125, 38.6474609375, 40.28271484375, 41.91796875, 43.55322265625, 45.1884765625, 46.82373046875, 48.458984375, 50.09423828125, 51.7294921875, 53.36474609375, 55.0]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 3.0, 5.0, 4.0, 3.0, 7.0, 5.0, 16.0, 15.0, 46.0, 49.0, 125.0, 355.0, 1512.0, 1354.0, 352.0, 97.0, 52.0, 32.0, 13.0, 16.0, 10.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-28.40625, -27.658447265625, -26.91064453125, -26.162841796875, -25.4150390625, -24.667236328125, -23.91943359375, -23.171630859375, -22.423828125, -21.676025390625, -20.92822265625, -20.180419921875, -19.4326171875, -18.684814453125, -17.93701171875, -17.189208984375, -16.44140625, -15.693603515625, -14.94580078125, -14.197998046875, -13.4501953125, -12.702392578125, -11.95458984375, -11.206787109375, -10.458984375, -9.711181640625, -8.96337890625, -8.215576171875, -7.4677734375, -6.719970703125, -5.97216796875, -5.224365234375, -4.4765625, -3.728759765625, -2.98095703125, -2.233154296875, -1.4853515625, -0.737548828125, 0.01025390625, 0.758056640625, 1.505859375, 2.253662109375, 3.00146484375, 3.749267578125, 4.4970703125, 5.244873046875, 5.99267578125, 6.740478515625, 7.48828125, 8.236083984375, 8.98388671875, 9.731689453125, 10.4794921875, 11.227294921875, 11.97509765625, 12.722900390625, 13.470703125, 14.218505859375, 14.96630859375, 15.714111328125, 16.4619140625, 17.209716796875, 17.95751953125, 18.705322265625, 19.453125]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 0.0, 4.0, 5.0, 12.0, 38.0, 145.0, 496.0, 238.0, 54.0, 9.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-499.31146240234375, -486.387939453125, -473.4644470214844, -460.5409240722656, -447.6174011230469, -434.69390869140625, -421.7703857421875, -408.84686279296875, -395.92333984375, -382.99981689453125, -370.0763244628906, -357.1528015136719, -344.2292785644531, -331.3057861328125, -318.38226318359375, -305.458740234375, -292.5352478027344, -279.6117248535156, -266.688232421875, -253.76470947265625, -240.8411865234375, -227.9176788330078, -214.99417114257812, -202.07064819335938, -189.1471405029297, -176.2236328125, -163.30010986328125, -150.37660217285156, -137.45309448242188, -124.52957153320312, -111.60606384277344, -98.68254852294922, -85.75906372070312, -72.8355484008789, -59.91203689575195, -46.988525390625, -34.06501007080078, -21.141494750976562, -8.217987060546875, 4.705528259277344, 17.629043579101562, 30.55255699157715, 43.476070404052734, 56.39958190917969, 69.3230972290039, 82.24661254882812, 95.17012023925781, 108.09363555908203, 121.01715087890625, 133.94065856933594, 146.8641815185547, 159.78768920898438, 172.71121215820312, 185.6347198486328, 198.5582275390625, 211.48175048828125, 224.40525817871094, 237.32876586914062, 250.25228881835938, 263.17578125, 276.09930419921875, 289.0228271484375, 301.94635009765625, 314.8698425292969, 327.7933654785156]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 6.0, 9.0, 16.0, 25.0, 56.0, 75.0, 104.0, 106.0, 118.0, 112.0, 102.0, 110.0, 71.0, 46.0, 21.0, 14.0, 8.0, 6.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-185.2910919189453, -180.87242126464844, -176.4537353515625, -172.03506469726562, -167.6163787841797, -163.1977081298828, -158.77902221679688, -154.3603515625, -149.94166564941406, -145.5229949951172, -141.10430908203125, -136.68563842773438, -132.26695251464844, -127.84827423095703, -123.42959594726562, -119.01092529296875, -114.59224700927734, -110.17356872558594, -105.75489044189453, -101.33621215820312, -96.91753387451172, -92.49885559082031, -88.08018493652344, -83.6614990234375, -79.24282836914062, -74.82415008544922, -70.40547180175781, -65.9867935180664, -61.568115234375, -57.149436950683594, -52.73076248168945, -48.31208419799805, -43.893402099609375, -39.47472381591797, -35.05604553222656, -30.63736915588379, -26.218690872192383, -21.800012588500977, -17.381336212158203, -12.962657928466797, -8.54397964477539, -4.125301837921143, 0.29337596893310547, 4.712053298950195, 9.130731582641602, 13.549409866333008, 17.96808624267578, 22.386764526367188, 26.805442810058594, 31.22412109375, 35.642799377441406, 40.06147766113281, 44.48015594482422, 48.898834228515625, 53.317508697509766, 57.73618698120117, 62.15486526489258, 66.57353973388672, 70.99221801757812, 75.41089630126953, 79.82957458496094, 84.24825286865234, 88.66693115234375, 93.08560943603516, 97.50428771972656]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 5.0, 5.0, 3.0, 5.0, 7.0, 9.0, 15.0, 25.0, 24.0, 32.0, 46.0, 70.0, 106.0, 181.0, 319.0, 719.0, 1892.0, 5800.0, 20584.0, 88750.0, 416790.0, 399890.0, 84503.0, 19814.0, 5560.0, 1850.0, 698.0, 357.0, 151.0, 103.0, 65.0, 54.0, 40.0, 20.0, 16.0, 18.0, 7.0, 8.0, 5.0, 5.0, 1.0, 4.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-16.5, -15.94970703125, -15.3994140625, -14.84912109375, -14.298828125, -13.74853515625, -13.1982421875, -12.64794921875, -12.09765625, -11.54736328125, -10.9970703125, -10.44677734375, -9.896484375, -9.34619140625, -8.7958984375, -8.24560546875, -7.6953125, -7.14501953125, -6.5947265625, -6.04443359375, -5.494140625, -4.94384765625, -4.3935546875, -3.84326171875, -3.29296875, -2.74267578125, -2.1923828125, -1.64208984375, -1.091796875, -0.54150390625, 0.0087890625, 0.55908203125, 1.109375, 1.65966796875, 2.2099609375, 2.76025390625, 3.310546875, 3.86083984375, 4.4111328125, 4.96142578125, 5.51171875, 6.06201171875, 6.6123046875, 7.16259765625, 7.712890625, 8.26318359375, 8.8134765625, 9.36376953125, 9.9140625, 10.46435546875, 11.0146484375, 11.56494140625, 12.115234375, 12.66552734375, 13.2158203125, 13.76611328125, 14.31640625, 14.86669921875, 15.4169921875, 15.96728515625, 16.517578125, 17.06787109375, 17.6181640625, 18.16845703125, 18.71875]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 6.0, 8.0, 3.0, 9.0, 10.0, 21.0, 24.0, 20.0, 40.0, 39.0, 50.0, 74.0, 60.0, 73.0, 81.0, 67.0, 67.0, 67.0, 75.0, 52.0, 43.0, 39.0, 29.0, 23.0, 11.0, 2.0, 9.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.6953125, -6.444580078125, -6.19384765625, -5.943115234375, -5.6923828125, -5.441650390625, -5.19091796875, -4.940185546875, -4.689453125, -4.438720703125, -4.18798828125, -3.937255859375, -3.6865234375, -3.435791015625, -3.18505859375, -2.934326171875, -2.68359375, -2.432861328125, -2.18212890625, -1.931396484375, -1.6806640625, -1.429931640625, -1.17919921875, -0.928466796875, -0.677734375, -0.427001953125, -0.17626953125, 0.074462890625, 0.3251953125, 0.575927734375, 0.82666015625, 1.077392578125, 1.328125, 1.578857421875, 1.82958984375, 2.080322265625, 2.3310546875, 2.581787109375, 2.83251953125, 3.083251953125, 3.333984375, 3.584716796875, 3.83544921875, 4.086181640625, 4.3369140625, 4.587646484375, 4.83837890625, 5.089111328125, 5.33984375, 5.590576171875, 5.84130859375, 6.092041015625, 6.3427734375, 6.593505859375, 6.84423828125, 7.094970703125, 7.345703125, 7.596435546875, 7.84716796875, 8.097900390625, 8.3486328125, 8.599365234375, 8.85009765625, 9.100830078125, 9.3515625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 0.0, 3.0, 1.0, 5.0, 0.0, 9.0, 10.0, 15.0, 13.0, 31.0, 46.0, 62.0, 117.0, 189.0, 388.0, 908.0, 2130.0, 6444.0, 25070.0, 142922.0, 705152.0, 131401.0, 23957.0, 6074.0, 1971.0, 763.0, 374.0, 185.0, 104.0, 76.0, 49.0, 22.0, 22.0, 15.0, 11.0, 4.0, 2.0, 4.0, 3.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-18.703125, -18.04638671875, -17.3896484375, -16.73291015625, -16.076171875, -15.41943359375, -14.7626953125, -14.10595703125, -13.44921875, -12.79248046875, -12.1357421875, -11.47900390625, -10.822265625, -10.16552734375, -9.5087890625, -8.85205078125, -8.1953125, -7.53857421875, -6.8818359375, -6.22509765625, -5.568359375, -4.91162109375, -4.2548828125, -3.59814453125, -2.94140625, -2.28466796875, -1.6279296875, -0.97119140625, -0.314453125, 0.34228515625, 0.9990234375, 1.65576171875, 2.3125, 2.96923828125, 3.6259765625, 4.28271484375, 4.939453125, 5.59619140625, 6.2529296875, 6.90966796875, 7.56640625, 8.22314453125, 8.8798828125, 9.53662109375, 10.193359375, 10.85009765625, 11.5068359375, 12.16357421875, 12.8203125, 13.47705078125, 14.1337890625, 14.79052734375, 15.447265625, 16.10400390625, 16.7607421875, 17.41748046875, 18.07421875, 18.73095703125, 19.3876953125, 20.04443359375, 20.701171875, 21.35791015625, 22.0146484375, 22.67138671875, 23.328125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 5.0, 5.0, 3.0, 8.0, 15.0, 16.0, 14.0, 18.0, 21.0, 19.0, 29.0, 32.0, 30.0, 42.0, 52.0, 38.0, 54.0, 44.0, 72.0, 47.0, 64.0, 56.0, 41.0, 47.0, 35.0, 25.0, 26.0, 18.0, 25.0, 16.0, 21.0, 17.0, 13.0, 9.0, 11.0, 4.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-21.140625, -20.532470703125, -19.92431640625, -19.316162109375, -18.7080078125, -18.099853515625, -17.49169921875, -16.883544921875, -16.275390625, -15.667236328125, -15.05908203125, -14.450927734375, -13.8427734375, -13.234619140625, -12.62646484375, -12.018310546875, -11.41015625, -10.802001953125, -10.19384765625, -9.585693359375, -8.9775390625, -8.369384765625, -7.76123046875, -7.153076171875, -6.544921875, -5.936767578125, -5.32861328125, -4.720458984375, -4.1123046875, -3.504150390625, -2.89599609375, -2.287841796875, -1.6796875, -1.071533203125, -0.46337890625, 0.144775390625, 0.7529296875, 1.361083984375, 1.96923828125, 2.577392578125, 3.185546875, 3.793701171875, 4.40185546875, 5.010009765625, 5.6181640625, 6.226318359375, 6.83447265625, 7.442626953125, 8.05078125, 8.658935546875, 9.26708984375, 9.875244140625, 10.4833984375, 11.091552734375, 11.69970703125, 12.307861328125, 12.916015625, 13.524169921875, 14.13232421875, 14.740478515625, 15.3486328125, 15.956787109375, 16.56494140625, 17.173095703125, 17.78125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 5.0, 5.0, 7.0, 8.0, 21.0, 14.0, 32.0, 43.0, 67.0, 109.0, 169.0, 293.0, 569.0, 1007.0, 2160.0, 4761.0, 11764.0, 34432.0, 117818.0, 547521.0, 229541.0, 63895.0, 20256.0, 7521.0, 3218.0, 1502.0, 772.0, 405.0, 245.0, 141.0, 90.0, 56.0, 38.0, 24.0, 18.0, 8.0, 3.0, 6.0, 3.0, 3.0, 3.0, 0.0, 3.0, 3.0, 1.0, 0.0, 1.0], "bins": [-5.37109375, -5.2249755859375, -5.078857421875, -4.9327392578125, -4.78662109375, -4.6405029296875, -4.494384765625, -4.3482666015625, -4.2021484375, -4.0560302734375, -3.909912109375, -3.7637939453125, -3.61767578125, -3.4715576171875, -3.325439453125, -3.1793212890625, -3.033203125, -2.8870849609375, -2.740966796875, -2.5948486328125, -2.44873046875, -2.3026123046875, -2.156494140625, -2.0103759765625, -1.8642578125, -1.7181396484375, -1.572021484375, -1.4259033203125, -1.27978515625, -1.1336669921875, -0.987548828125, -0.8414306640625, -0.6953125, -0.5491943359375, -0.403076171875, -0.2569580078125, -0.11083984375, 0.0352783203125, 0.181396484375, 0.3275146484375, 0.4736328125, 0.6197509765625, 0.765869140625, 0.9119873046875, 1.05810546875, 1.2042236328125, 1.350341796875, 1.4964599609375, 1.642578125, 1.7886962890625, 1.934814453125, 2.0809326171875, 2.22705078125, 2.3731689453125, 2.519287109375, 2.6654052734375, 2.8115234375, 2.9576416015625, 3.103759765625, 3.2498779296875, 3.39599609375, 3.5421142578125, 3.688232421875, 3.8343505859375, 3.98046875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 7.0, 3.0, 3.0, 7.0, 11.0, 21.0, 23.0, 61.0, 94.0, 145.0, 177.0, 155.0, 124.0, 70.0, 40.0, 21.0, 16.0, 11.0, 5.0, 3.0, 3.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0027141571044921875, -0.00261610746383667, -0.0025180578231811523, -0.0024200081825256348, -0.002321958541870117, -0.0022239089012145996, -0.002125859260559082, -0.0020278096199035645, -0.0019297599792480469, -0.0018317103385925293, -0.0017336606979370117, -0.0016356110572814941, -0.0015375614166259766, -0.001439511775970459, -0.0013414621353149414, -0.0012434124946594238, -0.0011453628540039062, -0.0010473132133483887, -0.0009492635726928711, -0.0008512139320373535, -0.0007531642913818359, -0.0006551146507263184, -0.0005570650100708008, -0.0004590153694152832, -0.0003609657287597656, -0.00026291608810424805, -0.00016486644744873047, -6.681680679321289e-05, 3.123283386230469e-05, 0.00012928247451782227, 0.00022733211517333984, 0.0003253817558288574, 0.000423431396484375, 0.0005214810371398926, 0.0006195306777954102, 0.0007175803184509277, 0.0008156299591064453, 0.0009136795997619629, 0.0010117292404174805, 0.001109778881072998, 0.0012078285217285156, 0.0013058781623840332, 0.0014039278030395508, 0.0015019774436950684, 0.001600027084350586, 0.0016980767250061035, 0.001796126365661621, 0.0018941760063171387, 0.0019922256469726562, 0.002090275287628174, 0.0021883249282836914, 0.002286374568939209, 0.0023844242095947266, 0.002482473850250244, 0.0025805234909057617, 0.0026785731315612793, 0.002776622772216797, 0.0028746724128723145, 0.002972722053527832, 0.0030707716941833496, 0.003168821334838867, 0.0032668709754943848, 0.0033649206161499023, 0.00346297025680542, 0.0035610198974609375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 6.0, 5.0, 7.0, 12.0, 23.0, 31.0, 39.0, 98.0, 137.0, 317.0, 622.0, 1262.0, 3181.0, 10019.0, 47333.0, 371450.0, 545934.0, 51603.0, 10574.0, 3334.0, 1288.0, 610.0, 301.0, 137.0, 86.0, 56.0, 36.0, 17.0, 22.0, 4.0, 4.0, 4.0, 6.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.89453125, -7.64801025390625, -7.4014892578125, -7.15496826171875, -6.908447265625, -6.66192626953125, -6.4154052734375, -6.16888427734375, -5.92236328125, -5.67584228515625, -5.4293212890625, -5.18280029296875, -4.936279296875, -4.68975830078125, -4.4432373046875, -4.19671630859375, -3.9501953125, -3.70367431640625, -3.4571533203125, -3.21063232421875, -2.964111328125, -2.71759033203125, -2.4710693359375, -2.22454833984375, -1.97802734375, -1.73150634765625, -1.4849853515625, -1.23846435546875, -0.991943359375, -0.74542236328125, -0.4989013671875, -0.25238037109375, -0.005859375, 0.24066162109375, 0.4871826171875, 0.73370361328125, 0.980224609375, 1.22674560546875, 1.4732666015625, 1.71978759765625, 1.96630859375, 2.21282958984375, 2.4593505859375, 2.70587158203125, 2.952392578125, 3.19891357421875, 3.4454345703125, 3.69195556640625, 3.9384765625, 4.18499755859375, 4.4315185546875, 4.67803955078125, 4.924560546875, 5.17108154296875, 5.4176025390625, 5.66412353515625, 5.91064453125, 6.15716552734375, 6.4036865234375, 6.65020751953125, 6.896728515625, 7.14324951171875, 7.3897705078125, 7.63629150390625, 7.8828125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 3.0, 2.0, 4.0, 14.0, 7.0, 16.0, 20.0, 23.0, 36.0, 56.0, 48.0, 63.0, 69.0, 86.0, 87.0, 91.0, 75.0, 62.0, 53.0, 40.0, 39.0, 27.0, 20.0, 14.0, 10.0, 8.0, 5.0, 6.0, 2.0, 3.0, 7.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.125, -4.93402099609375, -4.7430419921875, -4.55206298828125, -4.361083984375, -4.17010498046875, -3.9791259765625, -3.78814697265625, -3.59716796875, -3.40618896484375, -3.2152099609375, -3.02423095703125, -2.833251953125, -2.64227294921875, -2.4512939453125, -2.26031494140625, -2.0693359375, -1.87835693359375, -1.6873779296875, -1.49639892578125, -1.305419921875, -1.11444091796875, -0.9234619140625, -0.73248291015625, -0.54150390625, -0.35052490234375, -0.1595458984375, 0.03143310546875, 0.222412109375, 0.41339111328125, 0.6043701171875, 0.79534912109375, 0.986328125, 1.17730712890625, 1.3682861328125, 1.55926513671875, 1.750244140625, 1.94122314453125, 2.1322021484375, 2.32318115234375, 2.51416015625, 2.70513916015625, 2.8961181640625, 3.08709716796875, 3.278076171875, 3.46905517578125, 3.6600341796875, 3.85101318359375, 4.0419921875, 4.23297119140625, 4.4239501953125, 4.61492919921875, 4.805908203125, 4.99688720703125, 5.1878662109375, 5.37884521484375, 5.56982421875, 5.76080322265625, 5.9517822265625, 6.14276123046875, 6.333740234375, 6.52471923828125, 6.7156982421875, 6.90667724609375, 7.09765625]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 0.0, 3.0, 1.0, 2.0, 7.0, 8.0, 12.0, 14.0, 27.0, 43.0, 84.0, 107.0, 243.0, 207.0, 115.0, 59.0, 27.0, 13.0, 15.0, 4.0, 6.0, 5.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-114.07264709472656, -109.68798828125, -105.30332946777344, -100.91867065429688, -96.53401184082031, -92.14935302734375, -87.76469421386719, -83.38003540039062, -78.99537658691406, -74.6107177734375, -70.22605895996094, -65.84140014648438, -61.45674133300781, -57.07208251953125, -52.68742370605469, -48.302764892578125, -43.91810607910156, -39.533447265625, -35.14878845214844, -30.764129638671875, -26.379470825195312, -21.99481201171875, -17.610153198242188, -13.225494384765625, -8.840835571289062, -4.4561767578125, -0.0715179443359375, 4.313140869140625, 8.697799682617188, 13.08245849609375, 17.467117309570312, 21.851776123046875, 26.2364501953125, 30.621109008789062, 35.005767822265625, 39.39042663574219, 43.77508544921875, 48.15974426269531, 52.544403076171875, 56.92906188964844, 61.313720703125, 65.69837951660156, 70.08303833007812, 74.46769714355469, 78.85235595703125, 83.23701477050781, 87.62167358398438, 92.00633239746094, 96.3909912109375, 100.77565002441406, 105.16030883789062, 109.54496765136719, 113.92962646484375, 118.31428527832031, 122.69894409179688, 127.08360290527344, 131.46826171875, 135.85292053222656, 140.23757934570312, 144.6222381591797, 149.00689697265625, 153.3915557861328, 157.77621459960938, 162.16087341308594, 166.5455322265625]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 7.0, 4.0, 5.0, 6.0, 7.0, 5.0, 8.0, 8.0, 16.0, 19.0, 22.0, 25.0, 12.0, 24.0, 18.0, 23.0, 25.0, 37.0, 37.0, 55.0, 58.0, 70.0, 81.0, 51.0, 57.0, 29.0, 28.0, 33.0, 22.0, 26.0, 23.0, 15.0, 18.0, 21.0, 16.0, 19.0, 14.0, 13.0, 7.0, 9.0, 6.0, 4.0, 6.0, 6.0, 2.0, 5.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-67.41326141357422, -65.29957580566406, -63.185890197753906, -61.07220458984375, -58.958518981933594, -56.84483337402344, -54.731143951416016, -52.61745834350586, -50.5037727355957, -48.39008712768555, -46.27640151977539, -44.162715911865234, -42.04902648925781, -39.935340881347656, -37.8216552734375, -35.707969665527344, -33.59428405761719, -31.48059844970703, -29.366912841796875, -27.253225326538086, -25.13953971862793, -23.025854110717773, -20.912166595458984, -18.798480987548828, -16.684795379638672, -14.571109771728516, -12.457423210144043, -10.34373664855957, -8.230051040649414, -6.116365432739258, -4.002678871154785, -1.8889923095703125, 0.2246856689453125, 2.338371753692627, 4.452057838439941, 6.565743923187256, 8.67943000793457, 10.793115615844727, 12.9068021774292, 15.020488739013672, 17.134174346923828, 19.247859954833984, 21.36154556274414, 23.47523307800293, 25.588918685913086, 27.702604293823242, 29.81629180908203, 31.929977416992188, 34.043663024902344, 36.1573486328125, 38.271034240722656, 40.38471984863281, 42.49840545654297, 44.612091064453125, 46.72578048706055, 48.8394660949707, 50.95315170288086, 53.066837310791016, 55.18052291870117, 57.29420852661133, 59.40789794921875, 61.521583557128906, 63.63526916503906, 65.74895477294922, 67.86264038085938]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 4.0, 8.0, 6.0, 9.0, 16.0, 15.0, 26.0, 51.0, 89.0, 174.0, 404.0, 1025.0, 4016.0, 38227.0, 4076819.0, 66782.0, 4625.0, 1087.0, 404.0, 229.0, 93.0, 49.0, 30.0, 27.0, 20.0, 8.0, 10.0, 15.0, 3.0, 1.0, 2.0, 4.0, 4.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.984375, -26.889892578125, -25.79541015625, -24.700927734375, -23.6064453125, -22.511962890625, -21.41748046875, -20.322998046875, -19.228515625, -18.134033203125, -17.03955078125, -15.945068359375, -14.8505859375, -13.756103515625, -12.66162109375, -11.567138671875, -10.47265625, -9.378173828125, -8.28369140625, -7.189208984375, -6.0947265625, -5.000244140625, -3.90576171875, -2.811279296875, -1.716796875, -0.622314453125, 0.47216796875, 1.566650390625, 2.6611328125, 3.755615234375, 4.85009765625, 5.944580078125, 7.0390625, 8.133544921875, 9.22802734375, 10.322509765625, 11.4169921875, 12.511474609375, 13.60595703125, 14.700439453125, 15.794921875, 16.889404296875, 17.98388671875, 19.078369140625, 20.1728515625, 21.267333984375, 22.36181640625, 23.456298828125, 24.55078125, 25.645263671875, 26.73974609375, 27.834228515625, 28.9287109375, 30.023193359375, 31.11767578125, 32.212158203125, 33.306640625, 34.401123046875, 35.49560546875, 36.590087890625, 37.6845703125, 38.779052734375, 39.87353515625, 40.968017578125, 42.0625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 3.0, 3.0, 4.0, 15.0, 9.0, 17.0, 21.0, 18.0, 15.0, 31.0, 26.0, 44.0, 48.0, 46.0, 58.0, 61.0, 62.0, 62.0, 70.0, 48.0, 49.0, 47.0, 45.0, 34.0, 39.0, 26.0, 16.0, 21.0, 20.0, 15.0, 8.0, 7.0, 3.0, 6.0, 0.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.21484375, -5.02288818359375, -4.8309326171875, -4.63897705078125, -4.447021484375, -4.25506591796875, -4.0631103515625, -3.87115478515625, -3.67919921875, -3.48724365234375, -3.2952880859375, -3.10333251953125, -2.911376953125, -2.71942138671875, -2.5274658203125, -2.33551025390625, -2.1435546875, -1.95159912109375, -1.7596435546875, -1.56768798828125, -1.375732421875, -1.18377685546875, -0.9918212890625, -0.79986572265625, -0.60791015625, -0.41595458984375, -0.2239990234375, -0.03204345703125, 0.159912109375, 0.35186767578125, 0.5438232421875, 0.73577880859375, 0.927734375, 1.11968994140625, 1.3116455078125, 1.50360107421875, 1.695556640625, 1.88751220703125, 2.0794677734375, 2.27142333984375, 2.46337890625, 2.65533447265625, 2.8472900390625, 3.03924560546875, 3.231201171875, 3.42315673828125, 3.6151123046875, 3.80706787109375, 3.9990234375, 4.19097900390625, 4.3829345703125, 4.57489013671875, 4.766845703125, 4.95880126953125, 5.1507568359375, 5.34271240234375, 5.53466796875, 5.72662353515625, 5.9185791015625, 6.11053466796875, 6.302490234375, 6.49444580078125, 6.6864013671875, 6.87835693359375, 7.0703125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 11.0, 8.0, 29.0, 38.0, 80.0, 281.0, 1172.0, 15874.0, 4137523.0, 37065.0, 1641.0, 341.0, 119.0, 48.0, 22.0, 12.0, 8.0, 5.0, 4.0, 0.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.6875, -57.6708984375, -55.654296875, -53.6376953125, -51.62109375, -49.6044921875, -47.587890625, -45.5712890625, -43.5546875, -41.5380859375, -39.521484375, -37.5048828125, -35.48828125, -33.4716796875, -31.455078125, -29.4384765625, -27.421875, -25.4052734375, -23.388671875, -21.3720703125, -19.35546875, -17.3388671875, -15.322265625, -13.3056640625, -11.2890625, -9.2724609375, -7.255859375, -5.2392578125, -3.22265625, -1.2060546875, 0.810546875, 2.8271484375, 4.84375, 6.8603515625, 8.876953125, 10.8935546875, 12.91015625, 14.9267578125, 16.943359375, 18.9599609375, 20.9765625, 22.9931640625, 25.009765625, 27.0263671875, 29.04296875, 31.0595703125, 33.076171875, 35.0927734375, 37.109375, 39.1259765625, 41.142578125, 43.1591796875, 45.17578125, 47.1923828125, 49.208984375, 51.2255859375, 53.2421875, 55.2587890625, 57.275390625, 59.2919921875, 61.30859375, 63.3251953125, 65.341796875, 67.3583984375, 69.375]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 3.0, 4.0, 7.0, 10.0, 12.0, 15.0, 29.0, 44.0, 72.0, 169.0, 441.0, 1272.0, 1256.0, 403.0, 135.0, 75.0, 41.0, 26.0, 20.0, 17.0, 8.0, 4.0, 8.0, 1.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 3.0], "bins": [-31.328125, -30.632568359375, -29.93701171875, -29.241455078125, -28.5458984375, -27.850341796875, -27.15478515625, -26.459228515625, -25.763671875, -25.068115234375, -24.37255859375, -23.677001953125, -22.9814453125, -22.285888671875, -21.59033203125, -20.894775390625, -20.19921875, -19.503662109375, -18.80810546875, -18.112548828125, -17.4169921875, -16.721435546875, -16.02587890625, -15.330322265625, -14.634765625, -13.939208984375, -13.24365234375, -12.548095703125, -11.8525390625, -11.156982421875, -10.46142578125, -9.765869140625, -9.0703125, -8.374755859375, -7.67919921875, -6.983642578125, -6.2880859375, -5.592529296875, -4.89697265625, -4.201416015625, -3.505859375, -2.810302734375, -2.11474609375, -1.419189453125, -0.7236328125, -0.028076171875, 0.66748046875, 1.363037109375, 2.05859375, 2.754150390625, 3.44970703125, 4.145263671875, 4.8408203125, 5.536376953125, 6.23193359375, 6.927490234375, 7.623046875, 8.318603515625, 9.01416015625, 9.709716796875, 10.4052734375, 11.100830078125, 11.79638671875, 12.491943359375, 13.1875]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 8.0, 6.0, 24.0, 49.0, 145.0, 357.0, 278.0, 95.0, 30.0, 6.0, 5.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-370.10137939453125, -361.2460021972656, -352.390625, -343.53521728515625, -334.6798400878906, -325.824462890625, -316.9690856933594, -308.11370849609375, -299.25830078125, -290.4029235839844, -281.54754638671875, -272.692138671875, -263.8367614746094, -254.98138427734375, -246.12600708007812, -237.2706298828125, -228.41525268554688, -219.55987548828125, -210.70448303222656, -201.84910583496094, -192.99371337890625, -184.13833618164062, -175.282958984375, -166.42758178710938, -157.5721893310547, -148.71681213378906, -139.86141967773438, -131.00604248046875, -122.1506576538086, -113.29527282714844, -104.43989562988281, -95.58451080322266, -86.7291259765625, -77.87374114990234, -69.01835632324219, -60.16297912597656, -51.307594299316406, -42.45220947265625, -33.59682846069336, -24.74144744873047, -15.886062622070312, -7.030679702758789, 1.8247032165527344, 10.680086135864258, 19.53546905517578, 28.390853881835938, 37.24623489379883, 46.10161590576172, 54.957000732421875, 63.81238555908203, 72.66777038574219, 81.52314758300781, 90.37853240966797, 99.23391723632812, 108.08929443359375, 116.9446792602539, 125.80006408691406, 134.6554412841797, 143.51083374023438, 152.3662109375, 161.22158813476562, 170.0769805908203, 178.93235778808594, 187.78775024414062, 196.64312744140625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 5.0, 4.0, 2.0, 7.0, 5.0, 7.0, 13.0, 16.0, 25.0, 40.0, 33.0, 33.0, 48.0, 42.0, 52.0, 64.0, 44.0, 64.0, 65.0, 64.0, 51.0, 63.0, 56.0, 39.0, 36.0, 29.0, 29.0, 21.0, 16.0, 14.0, 8.0, 6.0, 4.0, 2.0, 4.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.507728576660156, -50.88240051269531, -48.257076263427734, -45.631752014160156, -43.00642395019531, -40.38109588623047, -37.75577163696289, -35.13044738769531, -32.50511932373047, -29.879793167114258, -27.254467010498047, -24.629140853881836, -22.003814697265625, -19.378488540649414, -16.753162384033203, -14.127836227416992, -11.502510070800781, -8.87718391418457, -6.251857757568359, -3.6265316009521484, -1.0012054443359375, 1.6241207122802734, 4.249446868896484, 6.874773025512695, 9.500099182128906, 12.125425338745117, 14.750751495361328, 17.37607765197754, 20.00140380859375, 22.62672996520996, 25.252056121826172, 27.877382278442383, 30.502716064453125, 33.12804412841797, 35.75336837768555, 38.378692626953125, 41.00402069091797, 43.62934875488281, 46.25467300415039, 48.87999725341797, 51.50532531738281, 54.130653381347656, 56.755977630615234, 59.38130187988281, 62.006629943847656, 64.6319580078125, 67.25727844238281, 69.88260650634766, 72.5079345703125, 75.13326263427734, 77.75859069824219, 80.3839111328125, 83.00923919677734, 85.63456726074219, 88.2598876953125, 90.88521575927734, 93.51054382324219, 96.13587188720703, 98.76119995117188, 101.38652038574219, 104.01184844970703, 106.63717651367188, 109.26249694824219, 111.88782501220703, 114.51315307617188]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 3.0, 11.0, 12.0, 20.0, 24.0, 42.0, 57.0, 105.0, 239.0, 565.0, 1369.0, 3835.0, 13551.0, 62005.0, 453556.0, 434106.0, 59748.0, 13094.0, 3844.0, 1305.0, 536.0, 227.0, 97.0, 63.0, 38.0, 28.0, 18.0, 8.0, 18.0, 10.0, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-26.796875, -26.031005859375, -25.26513671875, -24.499267578125, -23.7333984375, -22.967529296875, -22.20166015625, -21.435791015625, -20.669921875, -19.904052734375, -19.13818359375, -18.372314453125, -17.6064453125, -16.840576171875, -16.07470703125, -15.308837890625, -14.54296875, -13.777099609375, -13.01123046875, -12.245361328125, -11.4794921875, -10.713623046875, -9.94775390625, -9.181884765625, -8.416015625, -7.650146484375, -6.88427734375, -6.118408203125, -5.3525390625, -4.586669921875, -3.82080078125, -3.054931640625, -2.2890625, -1.523193359375, -0.75732421875, 0.008544921875, 0.7744140625, 1.540283203125, 2.30615234375, 3.072021484375, 3.837890625, 4.603759765625, 5.36962890625, 6.135498046875, 6.9013671875, 7.667236328125, 8.43310546875, 9.198974609375, 9.96484375, 10.730712890625, 11.49658203125, 12.262451171875, 13.0283203125, 13.794189453125, 14.56005859375, 15.325927734375, 16.091796875, 16.857666015625, 17.62353515625, 18.389404296875, 19.1552734375, 19.921142578125, 20.68701171875, 21.452880859375, 22.21875]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 3.0, 5.0, 4.0, 4.0, 2.0, 5.0, 7.0, 11.0, 10.0, 4.0, 10.0, 25.0, 19.0, 21.0, 29.0, 26.0, 34.0, 40.0, 46.0, 47.0, 58.0, 55.0, 45.0, 65.0, 48.0, 57.0, 48.0, 35.0, 46.0, 31.0, 35.0, 30.0, 16.0, 17.0, 19.0, 11.0, 10.0, 4.0, 10.0, 3.0, 3.0, 6.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.12890625, -4.93731689453125, -4.7457275390625, -4.55413818359375, -4.362548828125, -4.17095947265625, -3.9793701171875, -3.78778076171875, -3.59619140625, -3.40460205078125, -3.2130126953125, -3.02142333984375, -2.829833984375, -2.63824462890625, -2.4466552734375, -2.25506591796875, -2.0634765625, -1.87188720703125, -1.6802978515625, -1.48870849609375, -1.297119140625, -1.10552978515625, -0.9139404296875, -0.72235107421875, -0.53076171875, -0.33917236328125, -0.1475830078125, 0.04400634765625, 0.235595703125, 0.42718505859375, 0.6187744140625, 0.81036376953125, 1.001953125, 1.19354248046875, 1.3851318359375, 1.57672119140625, 1.768310546875, 1.95989990234375, 2.1514892578125, 2.34307861328125, 2.53466796875, 2.72625732421875, 2.9178466796875, 3.10943603515625, 3.301025390625, 3.49261474609375, 3.6842041015625, 3.87579345703125, 4.0673828125, 4.25897216796875, 4.4505615234375, 4.64215087890625, 4.833740234375, 5.02532958984375, 5.2169189453125, 5.40850830078125, 5.60009765625, 5.79168701171875, 5.9832763671875, 6.17486572265625, 6.366455078125, 6.55804443359375, 6.7496337890625, 6.94122314453125, 7.1328125]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 11.0, 11.0, 23.0, 29.0, 45.0, 76.0, 132.0, 253.0, 404.0, 1046.0, 3320.0, 13112.0, 86666.0, 813140.0, 108972.0, 15359.0, 3678.0, 1252.0, 446.0, 233.0, 135.0, 76.0, 54.0, 21.0, 26.0, 8.0, 12.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.796875, -21.828857421875, -20.86083984375, -19.892822265625, -18.9248046875, -17.956787109375, -16.98876953125, -16.020751953125, -15.052734375, -14.084716796875, -13.11669921875, -12.148681640625, -11.1806640625, -10.212646484375, -9.24462890625, -8.276611328125, -7.30859375, -6.340576171875, -5.37255859375, -4.404541015625, -3.4365234375, -2.468505859375, -1.50048828125, -0.532470703125, 0.435546875, 1.403564453125, 2.37158203125, 3.339599609375, 4.3076171875, 5.275634765625, 6.24365234375, 7.211669921875, 8.1796875, 9.147705078125, 10.11572265625, 11.083740234375, 12.0517578125, 13.019775390625, 13.98779296875, 14.955810546875, 15.923828125, 16.891845703125, 17.85986328125, 18.827880859375, 19.7958984375, 20.763916015625, 21.73193359375, 22.699951171875, 23.66796875, 24.635986328125, 25.60400390625, 26.572021484375, 27.5400390625, 28.508056640625, 29.47607421875, 30.444091796875, 31.412109375, 32.380126953125, 33.34814453125, 34.316162109375, 35.2841796875, 36.252197265625, 37.22021484375, 38.188232421875, 39.15625]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 4.0, 3.0, 3.0, 5.0, 6.0, 8.0, 5.0, 7.0, 14.0, 20.0, 20.0, 36.0, 38.0, 45.0, 58.0, 58.0, 64.0, 64.0, 62.0, 47.0, 70.0, 56.0, 60.0, 34.0, 45.0, 34.0, 26.0, 29.0, 17.0, 8.0, 16.0, 12.0, 8.0, 5.0, 6.0, 3.0, 1.0, 2.0, 4.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.0625, -29.175537109375, -28.28857421875, -27.401611328125, -26.5146484375, -25.627685546875, -24.74072265625, -23.853759765625, -22.966796875, -22.079833984375, -21.19287109375, -20.305908203125, -19.4189453125, -18.531982421875, -17.64501953125, -16.758056640625, -15.87109375, -14.984130859375, -14.09716796875, -13.210205078125, -12.3232421875, -11.436279296875, -10.54931640625, -9.662353515625, -8.775390625, -7.888427734375, -7.00146484375, -6.114501953125, -5.2275390625, -4.340576171875, -3.45361328125, -2.566650390625, -1.6796875, -0.792724609375, 0.09423828125, 0.981201171875, 1.8681640625, 2.755126953125, 3.64208984375, 4.529052734375, 5.416015625, 6.302978515625, 7.18994140625, 8.076904296875, 8.9638671875, 9.850830078125, 10.73779296875, 11.624755859375, 12.51171875, 13.398681640625, 14.28564453125, 15.172607421875, 16.0595703125, 16.946533203125, 17.83349609375, 18.720458984375, 19.607421875, 20.494384765625, 21.38134765625, 22.268310546875, 23.1552734375, 24.042236328125, 24.92919921875, 25.816162109375, 26.703125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 5.0, 4.0, 4.0, 7.0, 8.0, 8.0, 16.0, 22.0, 28.0, 47.0, 51.0, 85.0, 110.0, 191.0, 350.0, 571.0, 1057.0, 2067.0, 4742.0, 12713.0, 44287.0, 239084.0, 641694.0, 71216.0, 18173.0, 6284.0, 2604.0, 1310.0, 681.0, 416.0, 260.0, 142.0, 82.0, 74.0, 49.0, 28.0, 16.0, 19.0, 10.0, 10.0, 8.0, 8.0, 6.0, 4.0, 4.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.0], "bins": [-7.44140625, -7.22357177734375, -7.0057373046875, -6.78790283203125, -6.570068359375, -6.35223388671875, -6.1343994140625, -5.91656494140625, -5.69873046875, -5.48089599609375, -5.2630615234375, -5.04522705078125, -4.827392578125, -4.60955810546875, -4.3917236328125, -4.17388916015625, -3.9560546875, -3.73822021484375, -3.5203857421875, -3.30255126953125, -3.084716796875, -2.86688232421875, -2.6490478515625, -2.43121337890625, -2.21337890625, -1.99554443359375, -1.7777099609375, -1.55987548828125, -1.342041015625, -1.12420654296875, -0.9063720703125, -0.68853759765625, -0.470703125, -0.25286865234375, -0.0350341796875, 0.18280029296875, 0.400634765625, 0.61846923828125, 0.8363037109375, 1.05413818359375, 1.27197265625, 1.48980712890625, 1.7076416015625, 1.92547607421875, 2.143310546875, 2.36114501953125, 2.5789794921875, 2.79681396484375, 3.0146484375, 3.23248291015625, 3.4503173828125, 3.66815185546875, 3.885986328125, 4.10382080078125, 4.3216552734375, 4.53948974609375, 4.75732421875, 4.97515869140625, 5.1929931640625, 5.41082763671875, 5.628662109375, 5.84649658203125, 6.0643310546875, 6.28216552734375, 6.5]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 0.0, 2.0, 5.0, 10.0, 17.0, 23.0, 27.0, 61.0, 101.0, 207.0, 230.0, 127.0, 76.0, 48.0, 22.0, 23.0, 6.0, 6.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0034961700439453125, -0.0033760368824005127, -0.003255903720855713, -0.003135770559310913, -0.0030156373977661133, -0.0028955042362213135, -0.0027753710746765137, -0.002655237913131714, -0.002535104751586914, -0.0024149715900421143, -0.0022948384284973145, -0.0021747052669525146, -0.002054572105407715, -0.001934438943862915, -0.0018143057823181152, -0.0016941726207733154, -0.0015740394592285156, -0.0014539062976837158, -0.001333773136138916, -0.0012136399745941162, -0.0010935068130493164, -0.0009733736515045166, -0.0008532404899597168, -0.000733107328414917, -0.0006129741668701172, -0.0004928410053253174, -0.0003727078437805176, -0.0002525746822357178, -0.00013244152069091797, -1.2308359146118164e-05, 0.00010782480239868164, 0.00022795796394348145, 0.00034809112548828125, 0.00046822428703308105, 0.0005883574485778809, 0.0007084906101226807, 0.0008286237716674805, 0.0009487569332122803, 0.00106889009475708, 0.0011890232563018799, 0.0013091564178466797, 0.0014292895793914795, 0.0015494227409362793, 0.001669555902481079, 0.001789689064025879, 0.0019098222255706787, 0.0020299553871154785, 0.0021500885486602783, 0.002270221710205078, 0.002390354871749878, 0.0025104880332946777, 0.0026306211948394775, 0.0027507543563842773, 0.002870887517929077, 0.002991020679473877, 0.0031111538410186768, 0.0032312870025634766, 0.0033514201641082764, 0.003471553325653076, 0.003591686487197876, 0.0037118196487426758, 0.0038319528102874756, 0.003952085971832275, 0.004072219133377075, 0.004192352294921875]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 2.0, 2.0, 12.0, 12.0, 28.0, 41.0, 55.0, 123.0, 233.0, 482.0, 1281.0, 3712.0, 16768.0, 152892.0, 813034.0, 48007.0, 8158.0, 2212.0, 790.0, 317.0, 164.0, 78.0, 50.0, 42.0, 19.0, 15.0, 13.0, 10.0, 4.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.3671875, -9.0106201171875, -8.654052734375, -8.2974853515625, -7.94091796875, -7.5843505859375, -7.227783203125, -6.8712158203125, -6.5146484375, -6.1580810546875, -5.801513671875, -5.4449462890625, -5.08837890625, -4.7318115234375, -4.375244140625, -4.0186767578125, -3.662109375, -3.3055419921875, -2.948974609375, -2.5924072265625, -2.23583984375, -1.8792724609375, -1.522705078125, -1.1661376953125, -0.8095703125, -0.4530029296875, -0.096435546875, 0.2601318359375, 0.61669921875, 0.9732666015625, 1.329833984375, 1.6864013671875, 2.04296875, 2.3995361328125, 2.756103515625, 3.1126708984375, 3.46923828125, 3.8258056640625, 4.182373046875, 4.5389404296875, 4.8955078125, 5.2520751953125, 5.608642578125, 5.9652099609375, 6.32177734375, 6.6783447265625, 7.034912109375, 7.3914794921875, 7.748046875, 8.1046142578125, 8.461181640625, 8.8177490234375, 9.17431640625, 9.5308837890625, 9.887451171875, 10.2440185546875, 10.6005859375, 10.9571533203125, 11.313720703125, 11.6702880859375, 12.02685546875, 12.3834228515625, 12.739990234375, 13.0965576171875, 13.453125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 5.0, 3.0, 4.0, 6.0, 5.0, 7.0, 8.0, 7.0, 10.0, 9.0, 12.0, 20.0, 28.0, 22.0, 31.0, 54.0, 54.0, 67.0, 74.0, 92.0, 88.0, 72.0, 75.0, 53.0, 31.0, 28.0, 23.0, 19.0, 16.0, 8.0, 10.0, 10.0, 9.0, 8.0, 9.0, 6.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.69140625, -5.51690673828125, -5.3424072265625, -5.16790771484375, -4.993408203125, -4.81890869140625, -4.6444091796875, -4.46990966796875, -4.29541015625, -4.12091064453125, -3.9464111328125, -3.77191162109375, -3.597412109375, -3.42291259765625, -3.2484130859375, -3.07391357421875, -2.8994140625, -2.72491455078125, -2.5504150390625, -2.37591552734375, -2.201416015625, -2.02691650390625, -1.8524169921875, -1.67791748046875, -1.50341796875, -1.32891845703125, -1.1544189453125, -0.97991943359375, -0.805419921875, -0.63092041015625, -0.4564208984375, -0.28192138671875, -0.107421875, 0.06707763671875, 0.2415771484375, 0.41607666015625, 0.590576171875, 0.76507568359375, 0.9395751953125, 1.11407470703125, 1.28857421875, 1.46307373046875, 1.6375732421875, 1.81207275390625, 1.986572265625, 2.16107177734375, 2.3355712890625, 2.51007080078125, 2.6845703125, 2.85906982421875, 3.0335693359375, 3.20806884765625, 3.382568359375, 3.55706787109375, 3.7315673828125, 3.90606689453125, 4.08056640625, 4.25506591796875, 4.4295654296875, 4.60406494140625, 4.778564453125, 4.95306396484375, 5.1275634765625, 5.30206298828125, 5.4765625]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 2.0, 8.0, 20.0, 34.0, 55.0, 122.0, 308.0, 235.0, 119.0, 48.0, 27.0, 13.0, 6.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-130.7552947998047, -124.18218231201172, -117.60906982421875, -111.03596496582031, -104.46284484863281, -97.88973999023438, -91.3166275024414, -84.74351501464844, -78.17040252685547, -71.5972900390625, -65.02417755126953, -58.45106887817383, -51.87795639038086, -45.30484390258789, -38.73173522949219, -32.15862274169922, -25.58551025390625, -19.01239776611328, -12.439287185668945, -5.866176605224609, 0.7069358825683594, 7.280048370361328, 13.853157043457031, 20.42626953125, 26.99938201904297, 33.57249450683594, 40.145606994628906, 46.71871566772461, 53.29182815551758, 59.86494064331055, 66.43804931640625, 73.01116180419922, 79.58428955078125, 86.15740203857422, 92.73051452636719, 99.30361938476562, 105.87673950195312, 112.44984436035156, 119.02295684814453, 125.5960693359375, 132.169189453125, 138.74229431152344, 145.31541442871094, 151.88851928710938, 158.46163940429688, 165.0347442626953, 171.60784912109375, 178.18096923828125, 184.7540740966797, 191.32717895507812, 197.90029907226562, 204.47340393066406, 211.04652404785156, 217.61962890625, 224.1927490234375, 230.76585388183594, 237.33895874023438, 243.9120635986328, 250.4851837158203, 257.05828857421875, 263.63140869140625, 270.20452880859375, 276.7776184082031, 283.3507385253906, 289.9238586425781]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 5.0, 7.0, 3.0, 10.0, 3.0, 4.0, 7.0, 10.0, 7.0, 16.0, 17.0, 15.0, 25.0, 21.0, 30.0, 40.0, 29.0, 41.0, 50.0, 96.0, 111.0, 87.0, 60.0, 40.0, 33.0, 34.0, 33.0, 19.0, 35.0, 19.0, 17.0, 13.0, 14.0, 8.0, 12.0, 12.0, 3.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 5.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-90.29438781738281, -87.5201187133789, -84.74585723876953, -81.97158813476562, -79.19731903076172, -76.42304992675781, -73.64878845214844, -70.87451934814453, -68.10025024414062, -65.32598114013672, -62.55171585083008, -59.77745056152344, -57.00318145751953, -54.22891616821289, -51.45465087890625, -48.680381774902344, -45.90612030029297, -43.13185501098633, -40.35758590698242, -37.58332061767578, -34.809051513671875, -32.034786224365234, -29.260520935058594, -26.48625373840332, -23.711986541748047, -20.937719345092773, -18.1634521484375, -15.38918685913086, -12.614919662475586, -9.840652465820312, -7.066387176513672, -4.292119979858398, -1.5178451538085938, 1.2564215660095215, 4.030688285827637, 6.804954528808594, 9.579221725463867, 12.35348892211914, 15.127754211425781, 17.902021408081055, 20.676288604736328, 23.4505558013916, 26.224822998046875, 28.999088287353516, 31.77335548400879, 34.54762268066406, 37.3218879699707, 40.096153259277344, 42.87042236328125, 45.64468765258789, 48.4189567565918, 51.19322204589844, 53.967491149902344, 56.741756439208984, 59.516021728515625, 62.29029083251953, 65.06455993652344, 67.83882904052734, 70.61309051513672, 73.38735961914062, 76.16162872314453, 78.93589782714844, 81.71015930175781, 84.48442840576172, 87.2586898803711]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 8.0, 0.0, 8.0, 13.0, 24.0, 47.0, 87.0, 170.0, 436.0, 1246.0, 5195.0, 66813.0, 4081665.0, 33562.0, 3549.0, 904.0, 300.0, 103.0, 58.0, 34.0, 19.0, 16.0, 9.0, 6.0, 9.0, 2.0, 1.0, 6.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.875, -33.50244140625, -32.1298828125, -30.75732421875, -29.384765625, -28.01220703125, -26.6396484375, -25.26708984375, -23.89453125, -22.52197265625, -21.1494140625, -19.77685546875, -18.404296875, -17.03173828125, -15.6591796875, -14.28662109375, -12.9140625, -11.54150390625, -10.1689453125, -8.79638671875, -7.423828125, -6.05126953125, -4.6787109375, -3.30615234375, -1.93359375, -0.56103515625, 0.8115234375, 2.18408203125, 3.556640625, 4.92919921875, 6.3017578125, 7.67431640625, 9.046875, 10.41943359375, 11.7919921875, 13.16455078125, 14.537109375, 15.90966796875, 17.2822265625, 18.65478515625, 20.02734375, 21.39990234375, 22.7724609375, 24.14501953125, 25.517578125, 26.89013671875, 28.2626953125, 29.63525390625, 31.0078125, 32.38037109375, 33.7529296875, 35.12548828125, 36.498046875, 37.87060546875, 39.2431640625, 40.61572265625, 41.98828125, 43.36083984375, 44.7333984375, 46.10595703125, 47.478515625, 48.85107421875, 50.2236328125, 51.59619140625, 52.96875]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 0.0, 3.0, 3.0, 3.0, 3.0, 9.0, 9.0, 11.0, 8.0, 13.0, 17.0, 22.0, 25.0, 35.0, 32.0, 59.0, 47.0, 48.0, 54.0, 74.0, 50.0, 63.0, 50.0, 54.0, 66.0, 40.0, 42.0, 31.0, 30.0, 25.0, 20.0, 10.0, 13.0, 10.0, 4.0, 9.0, 5.0, 5.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.22265625, -7.0133056640625, -6.803955078125, -6.5946044921875, -6.38525390625, -6.1759033203125, -5.966552734375, -5.7572021484375, -5.5478515625, -5.3385009765625, -5.129150390625, -4.9197998046875, -4.71044921875, -4.5010986328125, -4.291748046875, -4.0823974609375, -3.873046875, -3.6636962890625, -3.454345703125, -3.2449951171875, -3.03564453125, -2.8262939453125, -2.616943359375, -2.4075927734375, -2.1982421875, -1.9888916015625, -1.779541015625, -1.5701904296875, -1.36083984375, -1.1514892578125, -0.942138671875, -0.7327880859375, -0.5234375, -0.3140869140625, -0.104736328125, 0.1046142578125, 0.31396484375, 0.5233154296875, 0.732666015625, 0.9420166015625, 1.1513671875, 1.3607177734375, 1.570068359375, 1.7794189453125, 1.98876953125, 2.1981201171875, 2.407470703125, 2.6168212890625, 2.826171875, 3.0355224609375, 3.244873046875, 3.4542236328125, 3.66357421875, 3.8729248046875, 4.082275390625, 4.2916259765625, 4.5009765625, 4.7103271484375, 4.919677734375, 5.1290283203125, 5.33837890625, 5.5477294921875, 5.757080078125, 5.9664306640625, 6.17578125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 3.0, 4.0, 3.0, 7.0, 4.0, 14.0, 14.0, 21.0, 35.0, 66.0, 101.0, 180.0, 487.0, 1352.0, 5275.0, 40123.0, 3687162.0, 437392.0, 17350.0, 3119.0, 872.0, 332.0, 163.0, 80.0, 42.0, 25.0, 19.0, 12.0, 10.0, 3.0, 3.0, 3.0, 5.0, 3.0, 1.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-33.03125, -31.9931640625, -30.955078125, -29.9169921875, -28.87890625, -27.8408203125, -26.802734375, -25.7646484375, -24.7265625, -23.6884765625, -22.650390625, -21.6123046875, -20.57421875, -19.5361328125, -18.498046875, -17.4599609375, -16.421875, -15.3837890625, -14.345703125, -13.3076171875, -12.26953125, -11.2314453125, -10.193359375, -9.1552734375, -8.1171875, -7.0791015625, -6.041015625, -5.0029296875, -3.96484375, -2.9267578125, -1.888671875, -0.8505859375, 0.1875, 1.2255859375, 2.263671875, 3.3017578125, 4.33984375, 5.3779296875, 6.416015625, 7.4541015625, 8.4921875, 9.5302734375, 10.568359375, 11.6064453125, 12.64453125, 13.6826171875, 14.720703125, 15.7587890625, 16.796875, 17.8349609375, 18.873046875, 19.9111328125, 20.94921875, 21.9873046875, 23.025390625, 24.0634765625, 25.1015625, 26.1396484375, 27.177734375, 28.2158203125, 29.25390625, 30.2919921875, 31.330078125, 32.3681640625, 33.40625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 2.0, 5.0, 2.0, 8.0, 12.0, 6.0, 13.0, 22.0, 30.0, 32.0, 60.0, 77.0, 121.0, 254.0, 541.0, 991.0, 890.0, 437.0, 193.0, 133.0, 68.0, 42.0, 44.0, 24.0, 12.0, 17.0, 10.0, 3.0, 8.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-20.0, -19.42822265625, -18.8564453125, -18.28466796875, -17.712890625, -17.14111328125, -16.5693359375, -15.99755859375, -15.42578125, -14.85400390625, -14.2822265625, -13.71044921875, -13.138671875, -12.56689453125, -11.9951171875, -11.42333984375, -10.8515625, -10.27978515625, -9.7080078125, -9.13623046875, -8.564453125, -7.99267578125, -7.4208984375, -6.84912109375, -6.27734375, -5.70556640625, -5.1337890625, -4.56201171875, -3.990234375, -3.41845703125, -2.8466796875, -2.27490234375, -1.703125, -1.13134765625, -0.5595703125, 0.01220703125, 0.583984375, 1.15576171875, 1.7275390625, 2.29931640625, 2.87109375, 3.44287109375, 4.0146484375, 4.58642578125, 5.158203125, 5.72998046875, 6.3017578125, 6.87353515625, 7.4453125, 8.01708984375, 8.5888671875, 9.16064453125, 9.732421875, 10.30419921875, 10.8759765625, 11.44775390625, 12.01953125, 12.59130859375, 13.1630859375, 13.73486328125, 14.306640625, 14.87841796875, 15.4501953125, 16.02197265625, 16.59375]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 5.0, 2.0, 9.0, 8.0, 11.0, 16.0, 54.0, 103.0, 157.0, 250.0, 180.0, 103.0, 48.0, 28.0, 18.0, 8.0, 5.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-269.1133117675781, -263.5760803222656, -258.0388488769531, -252.50161743164062, -246.96438598632812, -241.42715454101562, -235.88992309570312, -230.35269165039062, -224.81546020507812, -219.27822875976562, -213.74099731445312, -208.20376586914062, -202.66653442382812, -197.12930297851562, -191.59207153320312, -186.05484008789062, -180.51760864257812, -174.98037719726562, -169.44314575195312, -163.90591430664062, -158.36868286132812, -152.83145141601562, -147.29421997070312, -141.75698852539062, -136.21974182128906, -130.68251037597656, -125.14527893066406, -119.60804748535156, -114.07081604003906, -108.53358459472656, -102.99635314941406, -97.45912170410156, -91.92188262939453, -86.38465118408203, -80.84741973876953, -75.31018829345703, -69.77295684814453, -64.23572540283203, -58.698490142822266, -53.161258697509766, -47.624027252197266, -42.086795806884766, -36.549564361572266, -31.012331008911133, -25.475099563598633, -19.937868118286133, -14.400634765625, -8.8634033203125, -3.326171875, 2.211060047149658, 7.748291969299316, 13.285524368286133, 18.822755813598633, 24.359987258911133, 29.897220611572266, 35.434452056884766, 40.971683502197266, 46.508914947509766, 52.046146392822266, 57.58338165283203, 63.12061309814453, 68.65784454345703, 74.19507598876953, 79.73230743408203, 85.26953887939453]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 5.0, 5.0, 8.0, 11.0, 17.0, 12.0, 15.0, 23.0, 21.0, 28.0, 26.0, 34.0, 30.0, 35.0, 37.0, 43.0, 46.0, 38.0, 58.0, 37.0, 41.0, 49.0, 44.0, 34.0, 43.0, 38.0, 34.0, 32.0, 36.0, 26.0, 19.0, 15.0, 14.0, 12.0, 6.0, 11.0, 5.0, 4.0, 5.0, 4.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-70.44347381591797, -68.26459503173828, -66.0857162475586, -63.90683364868164, -61.72795486450195, -59.549076080322266, -57.37019348144531, -55.191314697265625, -53.01243591308594, -50.83355712890625, -48.65467834472656, -46.47579574584961, -44.29691696166992, -42.118038177490234, -39.93915557861328, -37.760276794433594, -35.581398010253906, -33.40251922607422, -31.2236385345459, -29.044757843017578, -26.86587905883789, -24.687000274658203, -22.508119583129883, -20.329238891601562, -18.150360107421875, -15.971480369567871, -13.792600631713867, -11.613720893859863, -9.43484115600586, -7.2559614181518555, -5.077081680297852, -2.8982019424438477, -0.7193145751953125, 1.4595651626586914, 3.6384449005126953, 5.817324638366699, 7.996204376220703, 10.175084114074707, 12.353963851928711, 14.532843589782715, 16.71172332763672, 18.890602111816406, 21.069482803344727, 23.248363494873047, 25.427242279052734, 27.606121063232422, 29.785001754760742, 31.963882446289062, 34.14276123046875, 36.32164001464844, 38.500518798828125, 40.67940139770508, 42.858280181884766, 45.03715896606445, 47.216041564941406, 49.394920349121094, 51.57379913330078, 53.75267791748047, 55.931556701660156, 58.11043930053711, 60.2893180847168, 62.468196868896484, 64.64707946777344, 66.82595825195312, 69.00483703613281]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 10.0, 4.0, 13.0, 9.0, 22.0, 34.0, 72.0, 126.0, 214.0, 423.0, 964.0, 2232.0, 5724.0, 16776.0, 62531.0, 326492.0, 501080.0, 95472.0, 23578.0, 7568.0, 2861.0, 1163.0, 551.0, 291.0, 132.0, 81.0, 46.0, 36.0, 18.0, 15.0, 7.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.453125, -17.773193359375, -17.09326171875, -16.413330078125, -15.7333984375, -15.053466796875, -14.37353515625, -13.693603515625, -13.013671875, -12.333740234375, -11.65380859375, -10.973876953125, -10.2939453125, -9.614013671875, -8.93408203125, -8.254150390625, -7.57421875, -6.894287109375, -6.21435546875, -5.534423828125, -4.8544921875, -4.174560546875, -3.49462890625, -2.814697265625, -2.134765625, -1.454833984375, -0.77490234375, -0.094970703125, 0.5849609375, 1.264892578125, 1.94482421875, 2.624755859375, 3.3046875, 3.984619140625, 4.66455078125, 5.344482421875, 6.0244140625, 6.704345703125, 7.38427734375, 8.064208984375, 8.744140625, 9.424072265625, 10.10400390625, 10.783935546875, 11.4638671875, 12.143798828125, 12.82373046875, 13.503662109375, 14.18359375, 14.863525390625, 15.54345703125, 16.223388671875, 16.9033203125, 17.583251953125, 18.26318359375, 18.943115234375, 19.623046875, 20.302978515625, 20.98291015625, 21.662841796875, 22.3427734375, 23.022705078125, 23.70263671875, 24.382568359375, 25.0625]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 1.0, 2.0, 3.0, 6.0, 7.0, 12.0, 11.0, 12.0, 14.0, 20.0, 16.0, 24.0, 32.0, 26.0, 38.0, 29.0, 57.0, 43.0, 60.0, 50.0, 44.0, 44.0, 52.0, 43.0, 39.0, 47.0, 35.0, 32.0, 30.0, 38.0, 23.0, 24.0, 24.0, 15.0, 13.0, 9.0, 7.0, 7.0, 7.0, 5.0, 1.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.7265625, -5.535888671875, -5.34521484375, -5.154541015625, -4.9638671875, -4.773193359375, -4.58251953125, -4.391845703125, -4.201171875, -4.010498046875, -3.81982421875, -3.629150390625, -3.4384765625, -3.247802734375, -3.05712890625, -2.866455078125, -2.67578125, -2.485107421875, -2.29443359375, -2.103759765625, -1.9130859375, -1.722412109375, -1.53173828125, -1.341064453125, -1.150390625, -0.959716796875, -0.76904296875, -0.578369140625, -0.3876953125, -0.197021484375, -0.00634765625, 0.184326171875, 0.375, 0.565673828125, 0.75634765625, 0.947021484375, 1.1376953125, 1.328369140625, 1.51904296875, 1.709716796875, 1.900390625, 2.091064453125, 2.28173828125, 2.472412109375, 2.6630859375, 2.853759765625, 3.04443359375, 3.235107421875, 3.42578125, 3.616455078125, 3.80712890625, 3.997802734375, 4.1884765625, 4.379150390625, 4.56982421875, 4.760498046875, 4.951171875, 5.141845703125, 5.33251953125, 5.523193359375, 5.7138671875, 5.904541015625, 6.09521484375, 6.285888671875, 6.4765625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 10.0, 4.0, 10.0, 21.0, 27.0, 37.0, 41.0, 78.0, 104.0, 153.0, 258.0, 470.0, 934.0, 1984.0, 6263.0, 28690.0, 259447.0, 695365.0, 41463.0, 8184.0, 2538.0, 1033.0, 592.0, 296.0, 197.0, 88.0, 88.0, 52.0, 34.0, 23.0, 19.0, 13.0, 13.0, 5.0, 4.0, 5.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.765625, -27.946044921875, -27.12646484375, -26.306884765625, -25.4873046875, -24.667724609375, -23.84814453125, -23.028564453125, -22.208984375, -21.389404296875, -20.56982421875, -19.750244140625, -18.9306640625, -18.111083984375, -17.29150390625, -16.471923828125, -15.65234375, -14.832763671875, -14.01318359375, -13.193603515625, -12.3740234375, -11.554443359375, -10.73486328125, -9.915283203125, -9.095703125, -8.276123046875, -7.45654296875, -6.636962890625, -5.8173828125, -4.997802734375, -4.17822265625, -3.358642578125, -2.5390625, -1.719482421875, -0.89990234375, -0.080322265625, 0.7392578125, 1.558837890625, 2.37841796875, 3.197998046875, 4.017578125, 4.837158203125, 5.65673828125, 6.476318359375, 7.2958984375, 8.115478515625, 8.93505859375, 9.754638671875, 10.57421875, 11.393798828125, 12.21337890625, 13.032958984375, 13.8525390625, 14.672119140625, 15.49169921875, 16.311279296875, 17.130859375, 17.950439453125, 18.77001953125, 19.589599609375, 20.4091796875, 21.228759765625, 22.04833984375, 22.867919921875, 23.6875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 2.0, 9.0, 9.0, 7.0, 18.0, 18.0, 27.0, 25.0, 23.0, 26.0, 43.0, 44.0, 41.0, 61.0, 51.0, 59.0, 64.0, 51.0, 59.0, 45.0, 56.0, 47.0, 33.0, 41.0, 27.0, 25.0, 23.0, 13.0, 17.0, 7.0, 9.0, 4.0, 6.0, 3.0, 5.0, 1.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.078125, -23.33447265625, -22.5908203125, -21.84716796875, -21.103515625, -20.35986328125, -19.6162109375, -18.87255859375, -18.12890625, -17.38525390625, -16.6416015625, -15.89794921875, -15.154296875, -14.41064453125, -13.6669921875, -12.92333984375, -12.1796875, -11.43603515625, -10.6923828125, -9.94873046875, -9.205078125, -8.46142578125, -7.7177734375, -6.97412109375, -6.23046875, -5.48681640625, -4.7431640625, -3.99951171875, -3.255859375, -2.51220703125, -1.7685546875, -1.02490234375, -0.28125, 0.46240234375, 1.2060546875, 1.94970703125, 2.693359375, 3.43701171875, 4.1806640625, 4.92431640625, 5.66796875, 6.41162109375, 7.1552734375, 7.89892578125, 8.642578125, 9.38623046875, 10.1298828125, 10.87353515625, 11.6171875, 12.36083984375, 13.1044921875, 13.84814453125, 14.591796875, 15.33544921875, 16.0791015625, 16.82275390625, 17.56640625, 18.31005859375, 19.0537109375, 19.79736328125, 20.541015625, 21.28466796875, 22.0283203125, 22.77197265625, 23.515625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 6.0, 5.0, 10.0, 11.0, 13.0, 12.0, 33.0, 29.0, 52.0, 76.0, 93.0, 153.0, 232.0, 313.0, 576.0, 969.0, 1796.0, 3639.0, 8976.0, 27669.0, 127669.0, 732104.0, 104296.0, 24199.0, 8006.0, 3419.0, 1709.0, 963.0, 505.0, 317.0, 212.0, 140.0, 111.0, 67.0, 43.0, 33.0, 27.0, 20.0, 14.0, 12.0, 13.0, 5.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0], "bins": [-6.8359375, -6.63153076171875, -6.4271240234375, -6.22271728515625, -6.018310546875, -5.81390380859375, -5.6094970703125, -5.40509033203125, -5.20068359375, -4.99627685546875, -4.7918701171875, -4.58746337890625, -4.383056640625, -4.17864990234375, -3.9742431640625, -3.76983642578125, -3.5654296875, -3.36102294921875, -3.1566162109375, -2.95220947265625, -2.747802734375, -2.54339599609375, -2.3389892578125, -2.13458251953125, -1.93017578125, -1.72576904296875, -1.5213623046875, -1.31695556640625, -1.112548828125, -0.90814208984375, -0.7037353515625, -0.49932861328125, -0.294921875, -0.09051513671875, 0.1138916015625, 0.31829833984375, 0.522705078125, 0.72711181640625, 0.9315185546875, 1.13592529296875, 1.34033203125, 1.54473876953125, 1.7491455078125, 1.95355224609375, 2.157958984375, 2.36236572265625, 2.5667724609375, 2.77117919921875, 2.9755859375, 3.17999267578125, 3.3843994140625, 3.58880615234375, 3.793212890625, 3.99761962890625, 4.2020263671875, 4.40643310546875, 4.61083984375, 4.81524658203125, 5.0196533203125, 5.22406005859375, 5.428466796875, 5.63287353515625, 5.8372802734375, 6.04168701171875, 6.24609375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 5.0, 7.0, 7.0, 11.0, 27.0, 36.0, 49.0, 89.0, 140.0, 209.0, 149.0, 96.0, 60.0, 45.0, 23.0, 19.0, 7.0, 11.0, 5.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004573822021484375, -0.004433929920196533, -0.004294037818908691, -0.00415414571762085, -0.004014253616333008, -0.003874361515045166, -0.0037344694137573242, -0.0035945773124694824, -0.0034546852111816406, -0.003314793109893799, -0.003174901008605957, -0.0030350089073181152, -0.0028951168060302734, -0.0027552247047424316, -0.00261533260345459, -0.002475440502166748, -0.0023355484008789062, -0.0021956562995910645, -0.0020557641983032227, -0.0019158720970153809, -0.001775979995727539, -0.0016360878944396973, -0.0014961957931518555, -0.0013563036918640137, -0.0012164115905761719, -0.00107651948928833, -0.0009366273880004883, -0.0007967352867126465, -0.0006568431854248047, -0.0005169510841369629, -0.0003770589828491211, -0.0002371668815612793, -9.72747802734375e-05, 4.26173210144043e-05, 0.0001825094223022461, 0.0003224015235900879, 0.0004622936248779297, 0.0006021857261657715, 0.0007420778274536133, 0.0008819699287414551, 0.0010218620300292969, 0.0011617541313171387, 0.0013016462326049805, 0.0014415383338928223, 0.001581430435180664, 0.0017213225364685059, 0.0018612146377563477, 0.0020011067390441895, 0.0021409988403320312, 0.002280890941619873, 0.002420783042907715, 0.0025606751441955566, 0.0027005672454833984, 0.0028404593467712402, 0.002980351448059082, 0.003120243549346924, 0.0032601356506347656, 0.0034000277519226074, 0.0035399198532104492, 0.003679811954498291, 0.003819704055786133, 0.003959596157073975, 0.004099488258361816, 0.004239380359649658, 0.0043792724609375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 6.0, 4.0, 12.0, 15.0, 24.0, 30.0, 54.0, 142.0, 281.0, 654.0, 1760.0, 6576.0, 54517.0, 916660.0, 58184.0, 6636.0, 1791.0, 640.0, 266.0, 136.0, 58.0, 34.0, 37.0, 10.0, 10.0, 5.0, 5.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.578125, -12.136962890625, -11.69580078125, -11.254638671875, -10.8134765625, -10.372314453125, -9.93115234375, -9.489990234375, -9.048828125, -8.607666015625, -8.16650390625, -7.725341796875, -7.2841796875, -6.843017578125, -6.40185546875, -5.960693359375, -5.51953125, -5.078369140625, -4.63720703125, -4.196044921875, -3.7548828125, -3.313720703125, -2.87255859375, -2.431396484375, -1.990234375, -1.549072265625, -1.10791015625, -0.666748046875, -0.2255859375, 0.215576171875, 0.65673828125, 1.097900390625, 1.5390625, 1.980224609375, 2.42138671875, 2.862548828125, 3.3037109375, 3.744873046875, 4.18603515625, 4.627197265625, 5.068359375, 5.509521484375, 5.95068359375, 6.391845703125, 6.8330078125, 7.274169921875, 7.71533203125, 8.156494140625, 8.59765625, 9.038818359375, 9.47998046875, 9.921142578125, 10.3623046875, 10.803466796875, 11.24462890625, 11.685791015625, 12.126953125, 12.568115234375, 13.00927734375, 13.450439453125, 13.8916015625, 14.332763671875, 14.77392578125, 15.215087890625, 15.65625]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 3.0, 1.0, 1.0, 6.0, 4.0, 4.0, 7.0, 5.0, 9.0, 7.0, 10.0, 13.0, 20.0, 26.0, 15.0, 27.0, 44.0, 40.0, 46.0, 59.0, 82.0, 89.0, 92.0, 78.0, 60.0, 53.0, 44.0, 34.0, 20.0, 26.0, 22.0, 8.0, 8.0, 6.0, 10.0, 5.0, 4.0, 3.0, 3.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.63671875, -4.48333740234375, -4.3299560546875, -4.17657470703125, -4.023193359375, -3.86981201171875, -3.7164306640625, -3.56304931640625, -3.40966796875, -3.25628662109375, -3.1029052734375, -2.94952392578125, -2.796142578125, -2.64276123046875, -2.4893798828125, -2.33599853515625, -2.1826171875, -2.02923583984375, -1.8758544921875, -1.72247314453125, -1.569091796875, -1.41571044921875, -1.2623291015625, -1.10894775390625, -0.95556640625, -0.80218505859375, -0.6488037109375, -0.49542236328125, -0.342041015625, -0.18865966796875, -0.0352783203125, 0.11810302734375, 0.271484375, 0.42486572265625, 0.5782470703125, 0.73162841796875, 0.885009765625, 1.03839111328125, 1.1917724609375, 1.34515380859375, 1.49853515625, 1.65191650390625, 1.8052978515625, 1.95867919921875, 2.112060546875, 2.26544189453125, 2.4188232421875, 2.57220458984375, 2.7255859375, 2.87896728515625, 3.0323486328125, 3.18572998046875, 3.339111328125, 3.49249267578125, 3.6458740234375, 3.79925537109375, 3.95263671875, 4.10601806640625, 4.2593994140625, 4.41278076171875, 4.566162109375, 4.71954345703125, 4.8729248046875, 5.02630615234375, 5.1796875]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 10.0, 3.0, 7.0, 6.0, 13.0, 31.0, 49.0, 84.0, 162.0, 355.0, 117.0, 74.0, 43.0, 15.0, 11.0, 6.0, 7.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-128.86544799804688, -123.31143188476562, -117.75741577148438, -112.20339965820312, -106.64938354492188, -101.09536743164062, -95.54135131835938, -89.98733520507812, -84.43331909179688, -78.87930297851562, -73.32528686523438, -67.77127075195312, -62.217254638671875, -56.663238525390625, -51.10922622680664, -45.55521011352539, -40.001197814941406, -34.447181701660156, -28.893165588378906, -23.33915138244629, -17.78513526916504, -12.231119155883789, -6.677104949951172, -1.1230888366699219, 4.430927276611328, 9.984943389892578, 15.538958549499512, 21.092973709106445, 26.646989822387695, 32.20100402832031, 37.75502014160156, 43.30903625488281, 48.86305236816406, 54.41706848144531, 59.97108459472656, 65.52510070800781, 71.07911682128906, 76.63313293457031, 82.18714904785156, 87.74116516113281, 93.29518127441406, 98.84919738769531, 104.40321350097656, 109.95722961425781, 115.51124572753906, 121.06526184082031, 126.61927795410156, 132.1732940673828, 137.727294921875, 143.28131103515625, 148.8353271484375, 154.38934326171875, 159.943359375, 165.49737548828125, 171.0513916015625, 176.60540771484375, 182.159423828125, 187.71343994140625, 193.2674560546875, 198.82147216796875, 204.37548828125, 209.92950439453125, 215.4835205078125, 221.03753662109375, 226.591552734375]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 1.0, 2.0, 5.0, 8.0, 11.0, 11.0, 12.0, 11.0, 15.0, 15.0, 18.0, 11.0, 19.0, 19.0, 25.0, 26.0, 31.0, 26.0, 35.0, 47.0, 82.0, 141.0, 88.0, 50.0, 30.0, 30.0, 25.0, 25.0, 28.0, 26.0, 18.0, 13.0, 12.0, 16.0, 9.0, 7.0, 11.0, 13.0, 5.0, 9.0, 5.0, 5.0, 3.0, 2.0, 0.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.51902770996094, -73.26681518554688, -71.01461029052734, -68.76239776611328, -66.51018524169922, -64.25798034667969, -62.005767822265625, -59.75355911254883, -57.50135040283203, -55.249141693115234, -52.99692916870117, -50.744720458984375, -48.49251174926758, -46.24030303955078, -43.98809051513672, -41.73588180541992, -39.48366928100586, -37.23146057128906, -34.979248046875, -32.7270393371582, -30.474830627441406, -28.222620010375977, -25.970409393310547, -23.71820068359375, -21.46599006652832, -19.21377944946289, -16.961570739746094, -14.709360122680664, -12.45715045928955, -10.204940795898438, -7.952730178833008, -5.7005205154418945, -3.4483108520507812, -1.1961009502410889, 1.0561089515686035, 3.308319091796875, 5.560528755187988, 7.812738418579102, 10.064949035644531, 12.317158699035645, 14.569368362426758, 16.821578979492188, 19.073787689208984, 21.325998306274414, 23.578208923339844, 25.83041763305664, 28.08262825012207, 30.3348388671875, 32.5870475769043, 34.839256286621094, 37.091468811035156, 39.34367752075195, 41.59588623046875, 43.84809875488281, 46.10030746459961, 48.352516174316406, 50.60472869873047, 52.856937408447266, 55.10914993286133, 57.361358642578125, 59.61356735229492, 61.86577606201172, 64.11798858642578, 66.37020111083984, 68.62240600585938]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 8.0, 16.0, 16.0, 27.0, 39.0, 100.0, 171.0, 451.0, 1106.0, 3727.0, 16572.0, 236275.0, 3724634.0, 191659.0, 14697.0, 3038.0, 977.0, 359.0, 151.0, 92.0, 58.0, 30.0, 18.0, 16.0, 10.0, 9.0, 5.0, 7.0, 5.0, 6.0, 1.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.875, -21.97705078125, -21.0791015625, -20.18115234375, -19.283203125, -18.38525390625, -17.4873046875, -16.58935546875, -15.69140625, -14.79345703125, -13.8955078125, -12.99755859375, -12.099609375, -11.20166015625, -10.3037109375, -9.40576171875, -8.5078125, -7.60986328125, -6.7119140625, -5.81396484375, -4.916015625, -4.01806640625, -3.1201171875, -2.22216796875, -1.32421875, -0.42626953125, 0.4716796875, 1.36962890625, 2.267578125, 3.16552734375, 4.0634765625, 4.96142578125, 5.859375, 6.75732421875, 7.6552734375, 8.55322265625, 9.451171875, 10.34912109375, 11.2470703125, 12.14501953125, 13.04296875, 13.94091796875, 14.8388671875, 15.73681640625, 16.634765625, 17.53271484375, 18.4306640625, 19.32861328125, 20.2265625, 21.12451171875, 22.0224609375, 22.92041015625, 23.818359375, 24.71630859375, 25.6142578125, 26.51220703125, 27.41015625, 28.30810546875, 29.2060546875, 30.10400390625, 31.001953125, 31.89990234375, 32.7978515625, 33.69580078125, 34.59375]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 3.0, 3.0, 10.0, 8.0, 6.0, 10.0, 9.0, 11.0, 17.0, 14.0, 18.0, 27.0, 22.0, 33.0, 42.0, 35.0, 33.0, 42.0, 40.0, 55.0, 34.0, 29.0, 35.0, 39.0, 47.0, 42.0, 43.0, 29.0, 33.0, 26.0, 23.0, 28.0, 27.0, 23.0, 18.0, 14.0, 12.0, 16.0, 13.0, 8.0, 4.0, 5.0, 7.0, 4.0, 4.0, 1.0, 1.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0], "bins": [-5.25, -5.0916748046875, -4.933349609375, -4.7750244140625, -4.61669921875, -4.4583740234375, -4.300048828125, -4.1417236328125, -3.9833984375, -3.8250732421875, -3.666748046875, -3.5084228515625, -3.35009765625, -3.1917724609375, -3.033447265625, -2.8751220703125, -2.716796875, -2.5584716796875, -2.400146484375, -2.2418212890625, -2.08349609375, -1.9251708984375, -1.766845703125, -1.6085205078125, -1.4501953125, -1.2918701171875, -1.133544921875, -0.9752197265625, -0.81689453125, -0.6585693359375, -0.500244140625, -0.3419189453125, -0.18359375, -0.0252685546875, 0.133056640625, 0.2913818359375, 0.44970703125, 0.6080322265625, 0.766357421875, 0.9246826171875, 1.0830078125, 1.2413330078125, 1.399658203125, 1.5579833984375, 1.71630859375, 1.8746337890625, 2.032958984375, 2.1912841796875, 2.349609375, 2.5079345703125, 2.666259765625, 2.8245849609375, 2.98291015625, 3.1412353515625, 3.299560546875, 3.4578857421875, 3.6162109375, 3.7745361328125, 3.932861328125, 4.0911865234375, 4.24951171875, 4.4078369140625, 4.566162109375, 4.7244873046875, 4.8828125]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 8.0, 13.0, 13.0, 43.0, 50.0, 104.0, 236.0, 499.0, 1489.0, 7314.0, 803798.0, 3368459.0, 9514.0, 1637.0, 562.0, 263.0, 128.0, 70.0, 30.0, 15.0, 13.0, 7.0, 7.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-73.5625, -71.46435546875, -69.3662109375, -67.26806640625, -65.169921875, -63.07177734375, -60.9736328125, -58.87548828125, -56.77734375, -54.67919921875, -52.5810546875, -50.48291015625, -48.384765625, -46.28662109375, -44.1884765625, -42.09033203125, -39.9921875, -37.89404296875, -35.7958984375, -33.69775390625, -31.599609375, -29.50146484375, -27.4033203125, -25.30517578125, -23.20703125, -21.10888671875, -19.0107421875, -16.91259765625, -14.814453125, -12.71630859375, -10.6181640625, -8.52001953125, -6.421875, -4.32373046875, -2.2255859375, -0.12744140625, 1.970703125, 4.06884765625, 6.1669921875, 8.26513671875, 10.36328125, 12.46142578125, 14.5595703125, 16.65771484375, 18.755859375, 20.85400390625, 22.9521484375, 25.05029296875, 27.1484375, 29.24658203125, 31.3447265625, 33.44287109375, 35.541015625, 37.63916015625, 39.7373046875, 41.83544921875, 43.93359375, 46.03173828125, 48.1298828125, 50.22802734375, 52.326171875, 54.42431640625, 56.5224609375, 58.62060546875, 60.71875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 9.0, 9.0, 18.0, 33.0, 63.0, 85.0, 144.0, 284.0, 656.0, 1249.0, 800.0, 363.0, 158.0, 98.0, 46.0, 33.0, 8.0, 16.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.65625, -40.41943359375, -39.1826171875, -37.94580078125, -36.708984375, -35.47216796875, -34.2353515625, -32.99853515625, -31.76171875, -30.52490234375, -29.2880859375, -28.05126953125, -26.814453125, -25.57763671875, -24.3408203125, -23.10400390625, -21.8671875, -20.63037109375, -19.3935546875, -18.15673828125, -16.919921875, -15.68310546875, -14.4462890625, -13.20947265625, -11.97265625, -10.73583984375, -9.4990234375, -8.26220703125, -7.025390625, -5.78857421875, -4.5517578125, -3.31494140625, -2.078125, -0.84130859375, 0.3955078125, 1.63232421875, 2.869140625, 4.10595703125, 5.3427734375, 6.57958984375, 7.81640625, 9.05322265625, 10.2900390625, 11.52685546875, 12.763671875, 14.00048828125, 15.2373046875, 16.47412109375, 17.7109375, 18.94775390625, 20.1845703125, 21.42138671875, 22.658203125, 23.89501953125, 25.1318359375, 26.36865234375, 27.60546875, 28.84228515625, 30.0791015625, 31.31591796875, 32.552734375, 33.78955078125, 35.0263671875, 36.26318359375, 37.5]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 4.0, 3.0, 2.0, 8.0, 7.0, 11.0, 20.0, 28.0, 50.0, 84.0, 128.0, 187.0, 168.0, 122.0, 69.0, 42.0, 20.0, 17.0, 12.0, 5.0, 5.0, 3.0, 3.0, 4.0, 0.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-281.06365966796875, -273.0006408691406, -264.9375915527344, -256.87457275390625, -248.81155395507812, -240.74851989746094, -232.68548583984375, -224.62246704101562, -216.55943298339844, -208.49639892578125, -200.43338012695312, -192.37034606933594, -184.30731201171875, -176.24429321289062, -168.18125915527344, -160.11822509765625, -152.05520629882812, -143.99217224121094, -135.9291534423828, -127.86611938476562, -119.80309295654297, -111.74006652832031, -103.67703247070312, -95.61400604248047, -87.55097961425781, -79.48795318603516, -71.4249267578125, -63.36189270019531, -55.298866271972656, -47.23583984375, -39.17280960083008, -31.109779357910156, -23.0467529296875, -14.983724594116211, -6.920696258544922, 1.1423320770263672, 9.205360412597656, 17.268386840820312, 25.331417083740234, 33.394447326660156, 41.45747375488281, 49.52050018310547, 57.58353042602539, 65.64656066894531, 73.70958709716797, 81.77261352539062, 89.83564758300781, 97.89867401123047, 105.96170043945312, 114.02472686767578, 122.08775329589844, 130.15078735351562, 138.21380615234375, 146.27684020996094, 154.33987426757812, 162.40289306640625, 170.46592712402344, 178.52896118164062, 186.59197998046875, 194.65501403808594, 202.71804809570312, 210.78106689453125, 218.84410095214844, 226.90713500976562, 234.97015380859375]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 6.0, 1.0, 5.0, 7.0, 5.0, 12.0, 17.0, 19.0, 31.0, 29.0, 24.0, 40.0, 50.0, 49.0, 35.0, 54.0, 61.0, 68.0, 64.0, 54.0, 49.0, 49.0, 55.0, 29.0, 34.0, 35.0, 30.0, 24.0, 14.0, 4.0, 17.0, 11.0, 6.0, 10.0, 3.0, 4.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-141.9376220703125, -137.2917022705078, -132.64578247070312, -127.99987030029297, -123.35395812988281, -118.70803833007812, -114.06211853027344, -109.41619873046875, -104.7702865600586, -100.1243667602539, -95.47845458984375, -90.83253479003906, -86.18661499023438, -81.54070281982422, -76.89478302001953, -72.24887084960938, -67.60295104980469, -62.957035064697266, -58.311119079589844, -53.665199279785156, -49.019283294677734, -44.37336730957031, -39.727447509765625, -35.0815315246582, -30.43561553955078, -25.78969955444336, -21.143781661987305, -16.49786376953125, -11.851947784423828, -7.206031799316406, -2.5601139068603516, 2.085803985595703, 6.731719970703125, 11.377636909484863, 16.0235538482666, 20.669471740722656, 25.315387725830078, 29.9613037109375, 34.60722351074219, 39.25313949584961, 43.89905548095703, 48.54497146606445, 53.190887451171875, 57.83680725097656, 62.482723236083984, 67.1286392211914, 71.7745590209961, 76.42047119140625, 81.06639099121094, 85.71231079101562, 90.35822296142578, 95.00414276123047, 99.65005493164062, 104.29597473144531, 108.94189453125, 113.58781433105469, 118.23372650146484, 122.87964630126953, 127.52555847167969, 132.17147827148438, 136.81739807128906, 141.46331787109375, 146.10922241210938, 150.75514221191406, 155.40106201171875]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 9.0, 8.0, 10.0, 18.0, 31.0, 36.0, 52.0, 96.0, 128.0, 248.0, 479.0, 821.0, 1944.0, 4743.0, 15508.0, 74149.0, 757627.0, 155095.0, 25423.0, 7097.0, 2570.0, 1089.0, 589.0, 315.0, 163.0, 111.0, 62.0, 35.0, 29.0, 20.0, 13.0, 9.0, 7.0, 6.0, 5.0, 5.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.546875, -22.779296875, -22.01171875, -21.244140625, -20.4765625, -19.708984375, -18.94140625, -18.173828125, -17.40625, -16.638671875, -15.87109375, -15.103515625, -14.3359375, -13.568359375, -12.80078125, -12.033203125, -11.265625, -10.498046875, -9.73046875, -8.962890625, -8.1953125, -7.427734375, -6.66015625, -5.892578125, -5.125, -4.357421875, -3.58984375, -2.822265625, -2.0546875, -1.287109375, -0.51953125, 0.248046875, 1.015625, 1.783203125, 2.55078125, 3.318359375, 4.0859375, 4.853515625, 5.62109375, 6.388671875, 7.15625, 7.923828125, 8.69140625, 9.458984375, 10.2265625, 10.994140625, 11.76171875, 12.529296875, 13.296875, 14.064453125, 14.83203125, 15.599609375, 16.3671875, 17.134765625, 17.90234375, 18.669921875, 19.4375, 20.205078125, 20.97265625, 21.740234375, 22.5078125, 23.275390625, 24.04296875, 24.810546875, 25.578125]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 9.0, 7.0, 17.0, 16.0, 35.0, 34.0, 39.0, 63.0, 61.0, 87.0, 82.0, 91.0, 88.0, 76.0, 76.0, 45.0, 40.0, 45.0, 32.0, 20.0, 13.0, 11.0, 8.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.0234375, -12.622802734375, -12.22216796875, -11.821533203125, -11.4208984375, -11.020263671875, -10.61962890625, -10.218994140625, -9.818359375, -9.417724609375, -9.01708984375, -8.616455078125, -8.2158203125, -7.815185546875, -7.41455078125, -7.013916015625, -6.61328125, -6.212646484375, -5.81201171875, -5.411376953125, -5.0107421875, -4.610107421875, -4.20947265625, -3.808837890625, -3.408203125, -3.007568359375, -2.60693359375, -2.206298828125, -1.8056640625, -1.405029296875, -1.00439453125, -0.603759765625, -0.203125, 0.197509765625, 0.59814453125, 0.998779296875, 1.3994140625, 1.800048828125, 2.20068359375, 2.601318359375, 3.001953125, 3.402587890625, 3.80322265625, 4.203857421875, 4.6044921875, 5.005126953125, 5.40576171875, 5.806396484375, 6.20703125, 6.607666015625, 7.00830078125, 7.408935546875, 7.8095703125, 8.210205078125, 8.61083984375, 9.011474609375, 9.412109375, 9.812744140625, 10.21337890625, 10.614013671875, 11.0146484375, 11.415283203125, 11.81591796875, 12.216552734375, 12.6171875]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 5.0, 6.0, 10.0, 12.0, 11.0, 22.0, 22.0, 34.0, 42.0, 64.0, 95.0, 132.0, 237.0, 445.0, 820.0, 1757.0, 4649.0, 17136.0, 135081.0, 847181.0, 29365.0, 6862.0, 2313.0, 975.0, 495.0, 277.0, 162.0, 101.0, 79.0, 56.0, 26.0, 21.0, 23.0, 10.0, 5.0, 8.0, 2.0, 7.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-20.421875, -19.69677734375, -18.9716796875, -18.24658203125, -17.521484375, -16.79638671875, -16.0712890625, -15.34619140625, -14.62109375, -13.89599609375, -13.1708984375, -12.44580078125, -11.720703125, -10.99560546875, -10.2705078125, -9.54541015625, -8.8203125, -8.09521484375, -7.3701171875, -6.64501953125, -5.919921875, -5.19482421875, -4.4697265625, -3.74462890625, -3.01953125, -2.29443359375, -1.5693359375, -0.84423828125, -0.119140625, 0.60595703125, 1.3310546875, 2.05615234375, 2.78125, 3.50634765625, 4.2314453125, 4.95654296875, 5.681640625, 6.40673828125, 7.1318359375, 7.85693359375, 8.58203125, 9.30712890625, 10.0322265625, 10.75732421875, 11.482421875, 12.20751953125, 12.9326171875, 13.65771484375, 14.3828125, 15.10791015625, 15.8330078125, 16.55810546875, 17.283203125, 18.00830078125, 18.7333984375, 19.45849609375, 20.18359375, 20.90869140625, 21.6337890625, 22.35888671875, 23.083984375, 23.80908203125, 24.5341796875, 25.25927734375, 25.984375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 3.0, 5.0, 3.0, 2.0, 9.0, 9.0, 9.0, 10.0, 14.0, 19.0, 27.0, 31.0, 35.0, 54.0, 49.0, 81.0, 126.0, 120.0, 93.0, 72.0, 49.0, 48.0, 27.0, 24.0, 24.0, 13.0, 8.0, 9.0, 10.0, 8.0, 1.0, 6.0, 5.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.75, -41.29736328125, -39.8447265625, -38.39208984375, -36.939453125, -35.48681640625, -34.0341796875, -32.58154296875, -31.12890625, -29.67626953125, -28.2236328125, -26.77099609375, -25.318359375, -23.86572265625, -22.4130859375, -20.96044921875, -19.5078125, -18.05517578125, -16.6025390625, -15.14990234375, -13.697265625, -12.24462890625, -10.7919921875, -9.33935546875, -7.88671875, -6.43408203125, -4.9814453125, -3.52880859375, -2.076171875, -0.62353515625, 0.8291015625, 2.28173828125, 3.734375, 5.18701171875, 6.6396484375, 8.09228515625, 9.544921875, 10.99755859375, 12.4501953125, 13.90283203125, 15.35546875, 16.80810546875, 18.2607421875, 19.71337890625, 21.166015625, 22.61865234375, 24.0712890625, 25.52392578125, 26.9765625, 28.42919921875, 29.8818359375, 31.33447265625, 32.787109375, 34.23974609375, 35.6923828125, 37.14501953125, 38.59765625, 40.05029296875, 41.5029296875, 42.95556640625, 44.408203125, 45.86083984375, 47.3134765625, 48.76611328125, 50.21875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 6.0, 4.0, 7.0, 9.0, 10.0, 8.0, 15.0, 23.0, 28.0, 42.0, 51.0, 62.0, 103.0, 176.0, 313.0, 514.0, 1050.0, 2603.0, 8631.0, 52634.0, 915961.0, 52835.0, 8403.0, 2649.0, 1024.0, 511.0, 327.0, 181.0, 99.0, 75.0, 44.0, 36.0, 30.0, 19.0, 19.0, 11.0, 11.0, 10.0, 6.0, 6.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0], "bins": [-6.6328125, -6.4405517578125, -6.248291015625, -6.0560302734375, -5.86376953125, -5.6715087890625, -5.479248046875, -5.2869873046875, -5.0947265625, -4.9024658203125, -4.710205078125, -4.5179443359375, -4.32568359375, -4.1334228515625, -3.941162109375, -3.7489013671875, -3.556640625, -3.3643798828125, -3.172119140625, -2.9798583984375, -2.78759765625, -2.5953369140625, -2.403076171875, -2.2108154296875, -2.0185546875, -1.8262939453125, -1.634033203125, -1.4417724609375, -1.24951171875, -1.0572509765625, -0.864990234375, -0.6727294921875, -0.48046875, -0.2882080078125, -0.095947265625, 0.0963134765625, 0.28857421875, 0.4808349609375, 0.673095703125, 0.8653564453125, 1.0576171875, 1.2498779296875, 1.442138671875, 1.6343994140625, 1.82666015625, 2.0189208984375, 2.211181640625, 2.4034423828125, 2.595703125, 2.7879638671875, 2.980224609375, 3.1724853515625, 3.36474609375, 3.5570068359375, 3.749267578125, 3.9415283203125, 4.1337890625, 4.3260498046875, 4.518310546875, 4.7105712890625, 4.90283203125, 5.0950927734375, 5.287353515625, 5.4796142578125, 5.671875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 0.0, 5.0, 10.0, 14.0, 6.0, 11.0, 28.0, 41.0, 70.0, 99.0, 137.0, 189.0, 135.0, 83.0, 47.0, 37.0, 34.0, 17.0, 12.0, 9.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.002765655517578125, -0.002684950828552246, -0.002604246139526367, -0.0025235414505004883, -0.0024428367614746094, -0.0023621320724487305, -0.0022814273834228516, -0.0022007226943969727, -0.0021200180053710938, -0.002039313316345215, -0.001958608627319336, -0.001877903938293457, -0.0017971992492675781, -0.0017164945602416992, -0.0016357898712158203, -0.0015550851821899414, -0.0014743804931640625, -0.0013936758041381836, -0.0013129711151123047, -0.0012322664260864258, -0.0011515617370605469, -0.001070857048034668, -0.000990152359008789, -0.0009094476699829102, -0.0008287429809570312, -0.0007480382919311523, -0.0006673336029052734, -0.0005866289138793945, -0.0005059242248535156, -0.0004252195358276367, -0.0003445148468017578, -0.0002638101577758789, -0.00018310546875, -0.0001024007797241211, -2.1696090698242188e-05, 5.900859832763672e-05, 0.00013971328735351562, 0.00022041797637939453, 0.00030112266540527344, 0.00038182735443115234, 0.00046253204345703125, 0.0005432367324829102, 0.0006239414215087891, 0.000704646110534668, 0.0007853507995605469, 0.0008660554885864258, 0.0009467601776123047, 0.0010274648666381836, 0.0011081695556640625, 0.0011888742446899414, 0.0012695789337158203, 0.0013502836227416992, 0.0014309883117675781, 0.001511693000793457, 0.001592397689819336, 0.0016731023788452148, 0.0017538070678710938, 0.0018345117568969727, 0.0019152164459228516, 0.0019959211349487305, 0.0020766258239746094, 0.0021573305130004883, 0.002238035202026367, 0.002318739891052246, 0.002399444580078125]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 5.0, 3.0, 1.0, 9.0, 7.0, 9.0, 23.0, 29.0, 40.0, 87.0, 125.0, 241.0, 492.0, 1199.0, 3478.0, 17525.0, 767829.0, 237635.0, 14577.0, 3151.0, 1082.0, 463.0, 211.0, 129.0, 90.0, 48.0, 20.0, 18.0, 10.0, 9.0, 2.0, 3.0, 6.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.83984375, -4.66668701171875, -4.4935302734375, -4.32037353515625, -4.147216796875, -3.97406005859375, -3.8009033203125, -3.62774658203125, -3.45458984375, -3.28143310546875, -3.1082763671875, -2.93511962890625, -2.761962890625, -2.58880615234375, -2.4156494140625, -2.24249267578125, -2.0693359375, -1.89617919921875, -1.7230224609375, -1.54986572265625, -1.376708984375, -1.20355224609375, -1.0303955078125, -0.85723876953125, -0.68408203125, -0.51092529296875, -0.3377685546875, -0.16461181640625, 0.008544921875, 0.18170166015625, 0.3548583984375, 0.52801513671875, 0.701171875, 0.87432861328125, 1.0474853515625, 1.22064208984375, 1.393798828125, 1.56695556640625, 1.7401123046875, 1.91326904296875, 2.08642578125, 2.25958251953125, 2.4327392578125, 2.60589599609375, 2.779052734375, 2.95220947265625, 3.1253662109375, 3.29852294921875, 3.4716796875, 3.64483642578125, 3.8179931640625, 3.99114990234375, 4.164306640625, 4.33746337890625, 4.5106201171875, 4.68377685546875, 4.85693359375, 5.03009033203125, 5.2032470703125, 5.37640380859375, 5.549560546875, 5.72271728515625, 5.8958740234375, 6.06903076171875, 6.2421875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 1.0, 5.0, 4.0, 2.0, 11.0, 12.0, 10.0, 6.0, 13.0, 33.0, 42.0, 43.0, 49.0, 84.0, 76.0, 93.0, 112.0, 68.0, 62.0, 50.0, 38.0, 27.0, 30.0, 18.0, 16.0, 15.0, 12.0, 12.0, 8.0, 8.0, 7.0, 3.0, 1.0, 5.0, 5.0, 3.0, 2.0, 1.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.05078125, -2.948486328125, -2.84619140625, -2.743896484375, -2.6416015625, -2.539306640625, -2.43701171875, -2.334716796875, -2.232421875, -2.130126953125, -2.02783203125, -1.925537109375, -1.8232421875, -1.720947265625, -1.61865234375, -1.516357421875, -1.4140625, -1.311767578125, -1.20947265625, -1.107177734375, -1.0048828125, -0.902587890625, -0.80029296875, -0.697998046875, -0.595703125, -0.493408203125, -0.39111328125, -0.288818359375, -0.1865234375, -0.084228515625, 0.01806640625, 0.120361328125, 0.22265625, 0.324951171875, 0.42724609375, 0.529541015625, 0.6318359375, 0.734130859375, 0.83642578125, 0.938720703125, 1.041015625, 1.143310546875, 1.24560546875, 1.347900390625, 1.4501953125, 1.552490234375, 1.65478515625, 1.757080078125, 1.859375, 1.961669921875, 2.06396484375, 2.166259765625, 2.2685546875, 2.370849609375, 2.47314453125, 2.575439453125, 2.677734375, 2.780029296875, 2.88232421875, 2.984619140625, 3.0869140625, 3.189208984375, 3.29150390625, 3.393798828125, 3.49609375]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 4.0, 9.0, 9.0, 18.0, 16.0, 21.0, 27.0, 40.0, 67.0, 115.0, 417.0, 81.0, 52.0, 34.0, 25.0, 16.0, 12.0, 8.0, 6.0, 1.0, 8.0, 2.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.13442993164062, -69.05243682861328, -65.97045135498047, -62.888458251953125, -59.80646514892578, -56.7244758605957, -53.642486572265625, -50.56049346923828, -47.47850036621094, -44.39651107788086, -41.314517974853516, -38.23252868652344, -35.150535583496094, -32.068546295166016, -28.986555099487305, -25.904563903808594, -22.822574615478516, -19.740583419799805, -16.658592224121094, -13.5766019821167, -10.494610786437988, -7.412619590759277, -4.330629348754883, -1.2486381530761719, 1.833353042602539, 4.91534423828125, 7.997334957122803, 11.079325675964355, 14.161316871643066, 17.243309020996094, 20.325298309326172, 23.407289505004883, 26.489280700683594, 29.571271896362305, 32.653263092041016, 35.735252380371094, 38.81724548339844, 41.899234771728516, 44.981224060058594, 48.06321716308594, 51.14521026611328, 54.22719955444336, 57.3091926574707, 60.39118194580078, 63.473175048828125, 66.55516815185547, 69.63715362548828, 72.71914672851562, 75.80113220214844, 78.88312530517578, 81.9651107788086, 85.04710388183594, 88.12909698486328, 91.21109008789062, 94.29307556152344, 97.37506866455078, 100.45706176757812, 103.53905487060547, 106.62104034423828, 109.70303344726562, 112.78502655029297, 115.86701965332031, 118.94900512695312, 122.03099822998047, 125.11299133300781]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 9.0, 5.0, 4.0, 9.0, 13.0, 15.0, 13.0, 21.0, 16.0, 20.0, 30.0, 32.0, 29.0, 52.0, 195.0, 262.0, 72.0, 22.0, 15.0, 22.0, 31.0, 18.0, 20.0, 13.0, 10.0, 6.0, 5.0, 8.0, 5.0, 12.0, 3.0, 4.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.41090393066406, -92.34678649902344, -89.28266906738281, -86.21855163574219, -83.15443420410156, -80.09031677246094, -77.02619934082031, -73.96207427978516, -70.89795684814453, -67.8338394165039, -64.76972198486328, -61.705604553222656, -58.641483306884766, -55.57736587524414, -52.513248443603516, -49.449127197265625, -46.385013580322266, -43.32089614868164, -40.256778717041016, -37.192657470703125, -34.1285400390625, -31.064422607421875, -28.00030517578125, -24.936185836791992, -21.872068405151367, -18.807950973510742, -15.743831634521484, -12.67971420288086, -9.615595817565918, -6.551477432250977, -3.4873600006103516, -0.42324066162109375, 2.6408767700195312, 5.704995155334473, 8.769113540649414, 11.833230972290039, 14.89734935760498, 17.961467742919922, 21.025585174560547, 24.089704513549805, 27.15382194519043, 30.217939376831055, 33.28205871582031, 36.34617614746094, 39.41029357910156, 42.47441101074219, 45.53852844238281, 48.6026496887207, 51.66676712036133, 54.73088455200195, 57.79500198364258, 60.85912322998047, 63.923240661621094, 66.98735809326172, 70.05147552490234, 73.11559295654297, 76.1797103881836, 79.24382781982422, 82.30794525146484, 85.37206268310547, 88.4361801147461, 91.50030517578125, 94.56442260742188, 97.6285400390625, 100.69265747070312]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 5.0, 7.0, 10.0, 16.0, 15.0, 19.0, 30.0, 38.0, 50.0, 61.0, 190.0, 268.0, 84.0, 55.0, 44.0, 26.0, 26.0, 21.0, 10.0, 7.0, 4.0, 12.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.9609375, -8.5548095703125, -8.148681640625, -7.7425537109375, -7.33642578125, -6.9302978515625, -6.524169921875, -6.1180419921875, -5.7119140625, -5.3057861328125, -4.899658203125, -4.4935302734375, -4.08740234375, -3.6812744140625, -3.275146484375, -2.8690185546875, -2.462890625, -2.0567626953125, -1.650634765625, -1.2445068359375, -0.83837890625, -0.4322509765625, -0.026123046875, 0.3800048828125, 0.7861328125, 1.1922607421875, 1.598388671875, 2.0045166015625, 2.41064453125, 2.8167724609375, 3.222900390625, 3.6290283203125, 4.03515625, 4.4412841796875, 4.847412109375, 5.2535400390625, 5.65966796875, 6.0657958984375, 6.471923828125, 6.8780517578125, 7.2841796875, 7.6903076171875, 8.096435546875, 8.5025634765625, 8.90869140625, 9.3148193359375, 9.720947265625, 10.1270751953125, 10.533203125, 10.9393310546875, 11.345458984375, 11.7515869140625, 12.15771484375, 12.5638427734375, 12.969970703125, 13.3760986328125, 13.7822265625, 14.1883544921875, 14.594482421875, 15.0006103515625, 15.40673828125, 15.8128662109375, 16.218994140625, 16.6251220703125, 17.03125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 2.0, 0.0, 1.0, 4.0, 5.0, 9.0, 16.0, 30.0, 64.0, 193.0, 1149.0, 8147985.0, 237717.0, 1093.0, 194.0, 50.0, 28.0, 20.0, 13.0, 5.0, 9.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-310.53485107421875, -303.31011962890625, -296.0853576660156, -288.8606262207031, -281.6358642578125, -274.4111328125, -267.1863708496094, -259.9616394042969, -252.73687744140625, -245.5121307373047, -238.28738403320312, -231.06263732910156, -223.837890625, -216.61314392089844, -209.38839721679688, -202.16366577148438, -194.9389190673828, -187.71417236328125, -180.4894256591797, -173.26467895507812, -166.03993225097656, -158.815185546875, -151.5904541015625, -144.36569213867188, -137.14096069335938, -129.9162139892578, -122.69146728515625, -115.46672058105469, -108.24197387695312, -101.01722717285156, -93.79248809814453, -86.56774139404297, -79.34298706054688, -72.11824035644531, -64.89349365234375, -57.66875076293945, -50.44400405883789, -43.21925735473633, -35.99451446533203, -28.76976776123047, -21.545021057128906, -14.32027530670166, -7.095529556274414, 0.12921524047851562, 7.353961944580078, 14.57870864868164, 21.803451538085938, 29.0281982421875, 36.25294494628906, 43.477691650390625, 50.70243835449219, 57.927181243896484, 65.15193176269531, 72.37667846679688, 79.6014175415039, 86.82616424560547, 94.05091094970703, 101.2756576538086, 108.50040435791016, 115.72514343261719, 122.94989013671875, 130.1746368408203, 137.39938354492188, 144.62413024902344, 151.848876953125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 5.0, 2.0, 2.0, 5.0, 8.0, 5.0, 8.0, 12.0, 8.0, 8.0, 5.0, 6.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 4.0, 1.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-175.32557678222656, -170.28524780273438, -165.2449188232422, -160.20458984375, -155.1642608642578, -150.12393188476562, -145.08360290527344, -140.04327392578125, -135.00294494628906, -129.96261596679688, -124.92228698730469, -119.8819580078125, -114.84162902832031, -109.80130004882812, -104.76097106933594, -99.72064208984375, -94.68031311035156, -89.63998413085938, -84.59965515136719, -79.559326171875, -74.51899719238281, -69.47866821289062, -64.43833923339844, -59.39801025390625, -54.35768127441406, -49.317352294921875, -44.27702331542969, -39.2366943359375, -34.19636535644531, -29.156036376953125, -24.115707397460938, -19.07537841796875, -14.035049438476562, -8.994720458984375, -3.9543914794921875, 1.0859375, 6.1262664794921875, 11.166595458984375, 16.206924438476562, 21.24725341796875, 26.287582397460938, 31.327911376953125, 36.36824035644531, 41.4085693359375, 46.44889831542969, 51.489227294921875, 56.52955627441406, 61.56988525390625, 66.61021423339844, 71.65054321289062, 76.69087219238281, 81.731201171875, 86.77153015136719, 91.81185913085938, 96.85218811035156, 101.89251708984375, 106.93284606933594, 111.97317504882812, 117.01350402832031, 122.0538330078125, 127.09416198730469, 132.13449096679688, 137.17481994628906, 142.21514892578125, 147.25547790527344]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 7.0, 4.0, 1.0, 9.0, 10.0, 13.0, 20.0, 30.0, 66.0, 128.0, 289.0, 754.0, 3007.0, 20586.0, 307493.0, 176904.0, 11825.0, 2004.0, 602.0, 249.0, 118.0, 55.0, 38.0, 23.0, 14.0, 7.0, 4.0, 5.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-162.625, -157.962890625, -153.30078125, -148.638671875, -143.9765625, -139.314453125, -134.65234375, -129.990234375, -125.328125, -120.666015625, -116.00390625, -111.341796875, -106.6796875, -102.017578125, -97.35546875, -92.693359375, -88.03125, -83.369140625, -78.70703125, -74.044921875, -69.3828125, -64.720703125, -60.05859375, -55.396484375, -50.734375, -46.072265625, -41.41015625, -36.748046875, -32.0859375, -27.423828125, -22.76171875, -18.099609375, -13.4375, -8.775390625, -4.11328125, 0.548828125, 5.2109375, 9.873046875, 14.53515625, 19.197265625, 23.859375, 28.521484375, 33.18359375, 37.845703125, 42.5078125, 47.169921875, 51.83203125, 56.494140625, 61.15625, 65.818359375, 70.48046875, 75.142578125, 79.8046875, 84.466796875, 89.12890625, 93.791015625, 98.453125, 103.115234375, 107.77734375, 112.439453125, 117.1015625, 121.763671875, 126.42578125, 131.087890625, 135.75]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 6.0, 7.0, 6.0, 13.0, 13.0, 25.0, 25.0, 52.0, 65.0, 69.0, 69.0, 106.0, 90.0, 102.0, 88.0, 67.0, 55.0, 46.0, 22.0, 17.0, 18.0, 12.0, 10.0, 9.0, 4.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.3984375, -12.0169677734375, -11.635498046875, -11.2540283203125, -10.87255859375, -10.4910888671875, -10.109619140625, -9.7281494140625, -9.3466796875, -8.9652099609375, -8.583740234375, -8.2022705078125, -7.82080078125, -7.4393310546875, -7.057861328125, -6.6763916015625, -6.294921875, -5.9134521484375, -5.531982421875, -5.1505126953125, -4.76904296875, -4.3875732421875, -4.006103515625, -3.6246337890625, -3.2431640625, -2.8616943359375, -2.480224609375, -2.0987548828125, -1.71728515625, -1.3358154296875, -0.954345703125, -0.5728759765625, -0.19140625, 0.1900634765625, 0.571533203125, 0.9530029296875, 1.33447265625, 1.7159423828125, 2.097412109375, 2.4788818359375, 2.8603515625, 3.2418212890625, 3.623291015625, 4.0047607421875, 4.38623046875, 4.7677001953125, 5.149169921875, 5.5306396484375, 5.912109375, 6.2935791015625, 6.675048828125, 7.0565185546875, 7.43798828125, 7.8194580078125, 8.200927734375, 8.5823974609375, 8.9638671875, 9.3453369140625, 9.726806640625, 10.1082763671875, 10.48974609375, 10.8712158203125, 11.252685546875, 11.6341552734375, 12.015625]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 1.0, 2.0, 4.0, 4.0, 9.0, 22.0, 63.0, 140.0, 113.0, 61.0, 26.0, 15.0, 12.0, 9.0, 4.0, 6.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.50978088378906, -129.83786010742188, -126.16593933105469, -122.4940185546875, -118.82209014892578, -115.1501693725586, -111.4782485961914, -107.80632781982422, -104.13440704345703, -100.46248626708984, -96.79056549072266, -93.11863708496094, -89.44671630859375, -85.77479553222656, -82.10287475585938, -78.43095397949219, -74.759033203125, -71.08711242675781, -67.41519165039062, -63.74326705932617, -60.071346282958984, -56.39942169189453, -52.727500915527344, -49.055580139160156, -45.38365173339844, -41.71173095703125, -38.0398063659668, -34.36788558959961, -30.695964813232422, -27.0240421295166, -23.35211944580078, -19.680198669433594, -16.008277893066406, -12.336356163024902, -8.664434432983398, -4.992511749267578, -1.3205900192260742, 2.3513317108154297, 6.02325439453125, 9.695175170898438, 13.367097854614258, 17.039020538330078, 20.710941314697266, 24.382863998413086, 28.054786682128906, 31.726707458496094, 35.39862823486328, 39.07054901123047, 42.74247360229492, 46.41439437866211, 50.08631896972656, 53.75823974609375, 57.43016052246094, 61.102081298828125, 64.77400207519531, 68.4459228515625, 72.11785125732422, 75.7897720336914, 79.4616928100586, 83.13362121582031, 86.8055419921875, 90.47746276855469, 94.14938354492188, 97.82130432128906, 101.49322509765625]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 1.0, 4.0, 4.0, 3.0, 2.0, 6.0, 0.0, 6.0, 16.0, 41.0, 63.0, 80.0, 93.0, 61.0, 35.0, 17.0, 12.0, 5.0, 5.0, 7.0, 3.0, 6.0, 4.0, 3.0, 1.0, 2.0, 1.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.763465881347656, -35.23151397705078, -33.699562072753906, -32.1676139831543, -30.635662078857422, -29.103710174560547, -27.571758270263672, -26.03980827331543, -24.507858276367188, -22.975906372070312, -21.44395637512207, -19.912004470825195, -18.380054473876953, -16.848102569580078, -15.31615161895752, -13.784200668334961, -12.252248764038086, -10.720297813415527, -9.188346862792969, -7.656395435333252, -6.124444484710693, -4.592493534088135, -3.060542106628418, -1.5285911560058594, 0.0033597946166992188, 1.5353108644485474, 3.0672619342803955, 4.599213123321533, 6.131164073944092, 7.66311502456665, 9.195066452026367, 10.727017402648926, 12.258968353271484, 13.790919303894043, 15.322870254516602, 16.854822158813477, 18.38677215576172, 19.918724060058594, 21.45067596435547, 22.98262596130371, 24.514575958251953, 26.046527862548828, 27.57847785949707, 29.110429763793945, 30.642379760742188, 32.17433166503906, 33.70628356933594, 35.23823547363281, 36.77018737792969, 38.30213928222656, 39.83409118652344, 41.36603927612305, 42.89799118041992, 44.4299430847168, 45.96189498901367, 47.49384307861328, 49.025794982910156, 50.55774688720703, 52.089698791503906, 53.621646881103516, 55.15359878540039, 56.685550689697266, 58.21750259399414, 59.74945068359375, 61.281402587890625]}, "_wandb": {"runtime": 2368}} \ No newline at end of file diff --git a/wandb/run-20220302_055556-ymuc7hv0/logs/debug-internal.log b/wandb/run-20220302_055556-ymuc7hv0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d345ed0708d165c46126c4a349894ff67408014d --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/logs/debug-internal.log @@ -0,0 +1,6391 @@ +2022-03-02 05:55:57,103 INFO MainThread:253308 [internal.py:wandb_internal():89] W&B internal server running at pid: 253308, started at: 2022-03-02 05:55:57.103147 +2022-03-02 05:55:57,105 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 05:55:57,105 INFO WriterThread:253308 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb +2022-03-02 05:55:57,107 DEBUG SenderThread:253308 [sender.py:send():235] send: header +2022-03-02 05:55:57,107 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: check_version +2022-03-02 05:55:57,177 DEBUG SenderThread:253308 [sender.py:send():235] send: run +2022-03-02 05:55:57,274 INFO SenderThread:253308 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files +2022-03-02 05:55:57,275 INFO SenderThread:253308 [sender.py:_start_run_threads():809] run started: ymuc7hv0 with start time 1646200556 +2022-03-02 05:55:57,275 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:55:57,275 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:55:57,276 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 05:55:57,281 DEBUG HandlerThread:253308 [meta.py:__init__():36] meta init +2022-03-02 05:55:57,281 DEBUG HandlerThread:253308 [meta.py:__init__():50] meta init done +2022-03-02 05:55:57,281 DEBUG HandlerThread:253308 [meta.py:probe():210] probe +2022-03-02 05:55:57,287 DEBUG HandlerThread:253308 [meta.py:_setup_git():200] setup git +2022-03-02 05:55:57,302 DEBUG HandlerThread:253308 [meta.py:_setup_git():207] setup git done +2022-03-02 05:55:57,302 DEBUG HandlerThread:253308 [meta.py:_save_pip():54] save pip +2022-03-02 05:55:57,303 DEBUG HandlerThread:253308 [meta.py:_save_pip():68] save pip done +2022-03-02 05:55:57,303 DEBUG HandlerThread:253308 [meta.py:probe():248] probe done +2022-03-02 05:55:57,382 DEBUG SenderThread:253308 [sender.py:send():235] send: files +2022-03-02 05:55:57,382 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 05:55:57,387 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:55:57,387 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:55:57,425 DEBUG SenderThread:253308 [sender.py:send():235] send: config +2022-03-02 05:55:57,426 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:55:57,427 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:55:57,427 WARNING SenderThread:253308 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 05:55:57,766 INFO Thread-11 :253308 [upload_job.py:push():137] Uploaded file /tmp/tmplza0v1x4wandb/27k0rqdf-wandb-metadata.json +2022-03-02 05:55:58,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt +2022-03-02 05:55:58,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:55:58,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-metadata.json +2022-03-02 05:55:58,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:00,276 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:04,192 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:56:04,192 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:56:04,192 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:56:04,192 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:04,193 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:04,193 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:04,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:04,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:06,278 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:08,278 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:10,193 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:10,193 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:10,195 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:10,279 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:12,280 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:12,540 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:56:12,540 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:56:14,280 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:16,365 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:16,366 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:16,366 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:17,281 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:18,282 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:20,283 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:22,368 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:22,368 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:22,370 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:23,284 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:24,284 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:25,719 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:56:26,285 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:27,577 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:56:27,577 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:56:28,154 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:28,154 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:28,156 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:28,285 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml +2022-03-02 05:56:28,286 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:30,286 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:31,287 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:33,287 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:34,030 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:34,030 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:34,031 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:34,288 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:35,288 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:36,288 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:39,289 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:39,836 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:39,836 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:39,838 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:40,290 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:41,290 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:42,291 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:42,611 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:56:42,611 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:56:45,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:45,574 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:45,574 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:45,575 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:46,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:46,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:47,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:49,293 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:51,229 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:51,229 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:51,231 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:51,293 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:52,294 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:53,294 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:55,295 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:56,074 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:56:56,831 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:56:56,832 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:56:56,832 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:56:57,295 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:56:57,296 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:56:57,925 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:56:57,925 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:56:58,296 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:02,297 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:02,514 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:02,514 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:02,515 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:03,298 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:04,298 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:05,298 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:08,106 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:08,107 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:08,107 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:08,299 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:08,299 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:09,299 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:10,300 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:12,301 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:13,027 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:57:13,027 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:57:13,698 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:13,698 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:13,699 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:14,301 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:15,302 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:16,302 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:18,303 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:19,211 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:19,212 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:19,212 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:19,303 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:20,304 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:21,304 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:24,305 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:24,639 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:24,639 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:24,640 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:25,305 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:26,306 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:26,419 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:57:27,306 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:28,183 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:57:28,185 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:57:28,306 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:30,104 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:30,104 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:30,105 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:30,307 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:31,307 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:32,308 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:34,308 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:35,444 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:35,444 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:35,445 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:36,309 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:36,309 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:37,309 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:40,712 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:40,712 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:40,712 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:41,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:41,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:43,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:43,403 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:57:43,403 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:57:45,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:46,010 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:46,011 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:46,012 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:46,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:47,313 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:51,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:51,332 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:51,332 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:51,333 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:52,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:53,315 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:55,315 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:56,539 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:57:56,540 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:57:56,540 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:57:56,773 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:57:57,316 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:57:57,316 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:57:58,554 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:57:58,554 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:58:01,317 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:01,756 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:01,757 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:01,758 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:02,318 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:03,318 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:04,318 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:05,319 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:06,989 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:06,989 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:06,990 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:07,319 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:08,320 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:09,320 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:11,321 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:12,113 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:12,114 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:12,114 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:12,321 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:13,321 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:13,604 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:58:13,604 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:58:14,322 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:15,322 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:17,161 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:17,162 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:17,162 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:17,323 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:18,323 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:19,323 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:21,324 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:22,238 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:22,238 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:22,239 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:22,324 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:24,325 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:26,326 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:27,131 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:58:27,229 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:27,230 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:27,230 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:27,326 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:28,326 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:28,643 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:58:28,643 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:58:32,152 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:32,152 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:32,153 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:32,328 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:32,328 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:34,328 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:36,329 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:37,108 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:37,109 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:37,110 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:37,329 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:38,330 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:40,330 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:42,000 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:42,001 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:42,001 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:42,331 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:42,331 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:43,681 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:58:43,681 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:58:44,332 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:46,333 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:46,900 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:46,901 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:46,901 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:47,333 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:48,333 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:50,334 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:51,682 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:51,682 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:51,683 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:52,334 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:52,335 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:53,335 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:55,335 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:56,417 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:58:56,418 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:58:56,418 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:58:57,336 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:58:57,336 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:58:57,507 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:58:58,758 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:58:58,758 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:58:59,337 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:00,988 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:00,988 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:00,989 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:01,337 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:01,338 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:03,338 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:05,339 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:05,504 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:05,504 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:05,505 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:06,339 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:07,339 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:09,340 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:10,007 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:10,008 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:10,009 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:10,340 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:11,341 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:13,341 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:13,953 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:59:13,953 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:59:14,440 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:14,440 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:14,441 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:15,342 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:15,342 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:17,343 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:18,645 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:18,645 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:18,645 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:19,343 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:19,344 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:21,344 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:22,779 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:22,779 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:22,780 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:23,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:23,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:24,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:26,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:26,784 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:26,785 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:26,785 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:27,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:27,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:27,851 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:59:28,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:29,054 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:59:29,054 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:59:30,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:30,628 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:30,628 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:30,629 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:31,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:32,348 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:34,198 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:34,198 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:34,199 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:34,348 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:34,348 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:36,349 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:37,469 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:37,469 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:37,470 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:38,350 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:38,350 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:40,350 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:40,510 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:40,511 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:40,511 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:41,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:42,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:43,171 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:43,172 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:43,172 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:43,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:44,276 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 05:59:44,277 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 05:59:44,352 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:45,520 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:45,521 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:45,521 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:46,352 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:46,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:47,594 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:47,594 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:47,595 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:48,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:48,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:49,433 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:49,434 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:49,434 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:50,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:50,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:51,051 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:51,051 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:51,052 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:51,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:52,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:53,114 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,115 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,115 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,115 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,115 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,115 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,120 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,120 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,126 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,126 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,126 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,126 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,131 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,131 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,131 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,131 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,132 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,137 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,138 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,143 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,148 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,154 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,159 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,167 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,167 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,167 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,168 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,169 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,175 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,176 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,181 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,181 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,182 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,188 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,198 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,198 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,198 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,198 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,198 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,199 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,199 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,199 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,204 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,204 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,204 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,204 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,210 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,215 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,215 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,216 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,222 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,227 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,228 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,234 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,240 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,240 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,240 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,240 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,240 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,245 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,251 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,256 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,257 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,257 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,257 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,257 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,262 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,262 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,262 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,262 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,263 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,263 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,263 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,263 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,263 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,266 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,267 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,268 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,269 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,270 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,271 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,272 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,273 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,274 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,275 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,276 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,277 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,278 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,279 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,280 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,281 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,282 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,283 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,284 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,284 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,284 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,285 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,286 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,287 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,288 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,289 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,290 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,291 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,292 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,293 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,294 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,295 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,296 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,297 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,298 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,299 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,300 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,301 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,302 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,303 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,304 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,305 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,306 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,307 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,308 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,309 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,310 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,311 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,312 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,313 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,314 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,315 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,316 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,317 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,318 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,319 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,320 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,321 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,322 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,323 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,324 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,325 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,326 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,327 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,328 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,329 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,330 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,331 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,332 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,333 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,334 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,335 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,336 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,337 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,338 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,339 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,340 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,341 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,342 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,343 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,344 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,345 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,346 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,347 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,348 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,349 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,350 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,351 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,352 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,353 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,354 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,355 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,356 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,357 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,358 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,359 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,360 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,361 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,362 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,363 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,364 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,365 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,366 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,367 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,368 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,369 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,370 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,371 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,372 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,373 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,374 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,375 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,376 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,377 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,378 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,379 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,380 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,381 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,382 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,383 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,384 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,385 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,386 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,387 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,388 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,389 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,390 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,391 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,392 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,393 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,394 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,395 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,396 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,397 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,398 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,399 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,400 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,401 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,402 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,403 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,404 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,405 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,406 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,407 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,408 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,409 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,410 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,411 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,412 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,413 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,414 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,415 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,416 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,417 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,418 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,419 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,420 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,421 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,422 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: metric +2022-03-02 05:59:53,423 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:53,509 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 05:59:53,592 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 05:59:54,356 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:54,356 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 05:59:58,271 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 05:59:58,357 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 05:59:59,276 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 05:59:59,443 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:00:00,157 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:00,237 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:00,238 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:00:00,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml +2022-03-02 06:00:00,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:00,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:04,359 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:05,262 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:05,313 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:05,395 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:06,394 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:06,395 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:10,396 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:11,185 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:11,238 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:11,320 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:11,396 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:12,396 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:15,454 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:00:15,455 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:00:16,398 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:17,060 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:17,115 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:17,201 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:17,398 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:18,398 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:22,400 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:22,814 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:22,868 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:22,951 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:23,400 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:24,400 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:26,401 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:28,634 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:28,685 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:28,765 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:28,802 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:00:29,402 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:30,403 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:30,546 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:00:30,547 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:00:31,403 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:33,404 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:34,420 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:34,472 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:34,558 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:35,404 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:37,405 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:39,406 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:40,129 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:40,178 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:40,265 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:40,406 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:41,406 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:45,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:45,603 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:00:45,604 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:00:45,803 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:45,857 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:45,942 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:46,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:47,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:49,409 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:51,491 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:51,545 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:51,629 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:52,410 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:53,411 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:55,411 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:56,968 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:00:57,021 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:00:57,104 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:00:57,412 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:00:57,412 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:00:59,273 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:00:59,413 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:00,818 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:01:00,818 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:01:02,414 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:02,518 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:02,592 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:02,676 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:03,414 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:03,415 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:04,415 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:08,022 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:08,074 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:08,156 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:08,416 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:08,417 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:09,417 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:10,417 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:12,418 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:13,535 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:13,587 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:13,670 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:14,418 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:14,419 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:15,419 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:15,890 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:01:15,890 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:01:18,420 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:18,976 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:19,030 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:19,116 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:19,420 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:20,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:21,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:22,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:24,395 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:24,475 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:24,558 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:25,458 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:26,459 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:28,459 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:29,683 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:01:29,890 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:29,937 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:30,021 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:30,460 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:30,460 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:30,992 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:01:30,993 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:01:32,461 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:34,461 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:35,202 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:35,279 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:35,362 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:35,462 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:36,462 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:39,463 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:40,621 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:40,676 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:40,759 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:41,464 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:42,464 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:43,465 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:45,465 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:45,870 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:45,924 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:46,009 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:46,175 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:01:46,177 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:01:46,466 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:47,466 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:48,467 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:49,467 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:51,121 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:51,177 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:51,262 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:51,468 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:52,468 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:53,468 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:55,469 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:56,394 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:01:56,445 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:01:56,531 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:01:57,529 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:01:57,530 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:01:58,530 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:00,207 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:02:01,479 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:02:01,480 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:02:01,531 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:01,561 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:01,615 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:01,700 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:02,531 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:02,532 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:03,532 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:05,532 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:06,700 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:06,771 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:06,854 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:07,533 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:07,534 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:08,534 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:11,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:11,844 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:11,896 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:11,979 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:12,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:12,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:13,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:15,536 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:16,529 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:02:16,529 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:02:16,938 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:17,013 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:17,093 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:17,537 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:17,537 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:19,538 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:21,920 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:21,972 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:22,054 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:22,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:22,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:23,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:24,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:26,540 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:26,868 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:26,921 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:27,000 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:27,541 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:28,541 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:29,542 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:30,542 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:30,681 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:02:31,575 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:02:31,575 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:02:31,812 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:31,864 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:31,947 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:32,543 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:33,543 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:34,544 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:36,544 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:36,703 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:36,757 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:36,840 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:37,545 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:38,545 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:39,546 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:40,546 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:41,580 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:41,622 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:41,734 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:42,547 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:42,547 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:43,547 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:46,346 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:46,396 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:46,478 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:46,548 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:46,548 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:46,740 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:02:46,742 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:02:47,548 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:48,549 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:50,549 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:51,031 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:51,086 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:51,172 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:51,550 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:52,550 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:53,550 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:54,551 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:55,609 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:02:55,659 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:02:55,740 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:02:56,551 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:56,552 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:02:57,552 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:02:58,552 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:00,040 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:00,091 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:00,175 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:00,553 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:01,328 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:03:01,553 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:02,083 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:03:02,083 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:03:03,554 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:04,531 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:04,586 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:04,668 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:05,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:05,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:08,875 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:08,927 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:09,010 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:09,587 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:09,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:11,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:13,101 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:13,150 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:13,230 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:13,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:13,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:15,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:17,117 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:17,168 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:17,248 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:03:17,251 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:17,252 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:03:17,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:17,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:18,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:19,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:20,926 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:20,976 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:21,059 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:21,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:21,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:22,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:23,592 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:24,551 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:24,602 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:24,684 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:25,682 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:25,682 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:26,682 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:27,683 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:27,986 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:28,040 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:28,124 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:28,683 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:29,683 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:30,684 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:31,184 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:31,225 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:31,350 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:31,684 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:31,685 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:31,830 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:03:32,393 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:03:32,394 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:03:32,684 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:33,685 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:34,157 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:34,209 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:34,292 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:34,685 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:35,686 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:36,686 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:36,866 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:36,921 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:37,008 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:37,686 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:37,687 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:38,687 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:39,329 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:39,379 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:39,461 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:39,687 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:39,687 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:40,687 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:41,482 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:41,533 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:41,617 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:41,688 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:41,688 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:43,361 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:43,415 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:43,498 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:43,688 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:43,689 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:44,689 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:45,006 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:45,054 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:45,135 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:45,689 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:45,689 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:46,689 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:46,987 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:47,161 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:47,242 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:47,656 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:03:47,657 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:03:47,690 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:47,690 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:48,690 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:49,690 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:51,691 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:53,097 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:53,164 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:53,243 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:53,692 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:03:54,692 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:55,692 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:57,693 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:58,899 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:03:58,972 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:03:59,057 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:03:59,694 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:03:59,694 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:00,694 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:02,417 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:04:02,959 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:04:02,960 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:04:03,695 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:04,848 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:04,901 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:04,984 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:05,696 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:05,696 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:06,696 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:10,541 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:10,593 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:10,678 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:10,698 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:10,698 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:12,698 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:14,699 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:16,294 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:16,345 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:16,485 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:16,700 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:16,700 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:18,107 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:04:18,108 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:04:18,700 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:20,701 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:22,092 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:22,144 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:22,227 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:22,702 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:24,702 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:25,703 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:26,703 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:27,802 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:27,856 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:27,942 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:28,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:28,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:29,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:32,705 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:33,018 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:04:33,167 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:04:33,169 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:04:33,476 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:33,529 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:33,612 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:33,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:34,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:35,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:36,707 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:39,142 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:39,195 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:39,276 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:39,708 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:40,708 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:41,708 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:42,709 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:44,790 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:44,841 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:44,926 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:45,710 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:47,710 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:48,355 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:04:48,356 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:04:49,711 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:50,297 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:50,350 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:50,435 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:50,711 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:51,712 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:55,713 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:55,765 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:04:55,815 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:04:55,897 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:04:56,714 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:04:57,714 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:04:59,715 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:01,239 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:01,289 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:01,371 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:01,715 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:03,426 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:05:03,522 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:05:03,523 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:05:03,716 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:05,717 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:06,647 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:06,700 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:06,783 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:07,782 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:07,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:11,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:12,088 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:12,139 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:12,223 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:12,784 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:13,784 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:15,785 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:17,506 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:17,557 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:17,642 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:17,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:17,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:18,641 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:05:18,642 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:05:19,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:21,787 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:22,985 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:23,039 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:23,121 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:23,788 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:23,788 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:24,788 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:26,789 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:28,212 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:28,265 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:28,344 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:28,789 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:30,790 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:32,791 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:33,444 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:33,496 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:33,581 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:33,791 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:33,927 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:05:33,984 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:05:33,985 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:05:34,792 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:38,706 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:38,761 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:38,848 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:38,849 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:39,848 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:40,849 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:42,849 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:43,976 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:44,030 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:44,115 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:44,850 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:44,850 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:46,851 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:48,851 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:49,132 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:49,185 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:49,214 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:05:49,270 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:49,271 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:05:49,852 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:50,852 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:52,853 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:54,332 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:54,387 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:54,474 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:54,854 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:54,854 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:05:56,855 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:58,855 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:05:59,469 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:05:59,522 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:05:59,607 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:05:59,856 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:00,856 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:02,857 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:04,402 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:06:04,497 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:06:04,498 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:06:04,688 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:04,688 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:04,772 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:04,857 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:05,858 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:06,858 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:09,603 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:09,653 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:09,738 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:09,859 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:09,860 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:10,860 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:11,860 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:13,861 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:14,592 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:14,645 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:14,731 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:14,861 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:15,862 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:19,487 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:19,541 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:19,627 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:19,863 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:19,863 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:19,921 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:06:19,923 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:06:21,864 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:23,865 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:24,410 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:24,463 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:24,550 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:24,865 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:25,865 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:27,866 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:29,207 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:29,258 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:29,339 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:29,867 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:31,867 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:33,868 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:34,021 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:34,076 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:34,160 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:34,832 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:06:34,868 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:35,091 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:06:35,092 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:06:35,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:37,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:38,773 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:38,827 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:38,916 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:39,915 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:39,915 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:41,916 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:43,522 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:43,577 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:43,663 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:43,916 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:43,917 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:45,917 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:47,918 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:48,119 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:48,173 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:48,257 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:48,918 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:49,919 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:50,168 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:06:50,169 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:06:51,919 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:52,664 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:52,719 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:52,848 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:52,920 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:06:53,920 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:55,921 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:57,128 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:06:57,180 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:06:57,267 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:06:57,921 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:06:57,922 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:00,923 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:01,517 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:01,570 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:01,654 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:01,923 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:01,923 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:02,923 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:04,924 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:05,222 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:07:05,223 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:07:05,412 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:07:05,722 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:05,777 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:05,858 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:05,925 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:05,925 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:06,925 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:08,926 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:09,733 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:09,786 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:09,873 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:09,926 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:10,926 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:12,927 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:13,678 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:13,730 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:13,814 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:13,927 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:14,928 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:16,928 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:17,341 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:17,396 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:17,479 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:17,929 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:18,929 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:20,266 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:07:20,267 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:07:20,726 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:20,777 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:20,864 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:20,930 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:20,930 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:22,931 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:23,844 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:23,898 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:23,985 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:24,984 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:24,984 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:26,666 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:26,714 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:26,799 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:26,985 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:26,985 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:28,985 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:29,294 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:29,351 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:29,440 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:29,986 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:30,986 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:31,586 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:31,641 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:31,726 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:31,986 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:32,987 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:33,655 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:33,709 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:33,795 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:33,987 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:34,987 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:35,323 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:07:35,324 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:07:35,472 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:35,525 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:35,609 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:35,988 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:36,057 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:07:36,988 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:37,068 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:37,120 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:37,201 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:37,989 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:38,989 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:39,061 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:39,233 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:39,316 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:39,989 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:40,990 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:42,990 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:45,297 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:45,352 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:45,440 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:45,991 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:46,992 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:47,992 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:49,993 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:50,369 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:07:50,370 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:07:51,224 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:51,279 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:51,364 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:51,994 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:52,994 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:53,995 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:55,995 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:57,123 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:07:57,177 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:07:57,263 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:07:57,996 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:57,996 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:07:58,996 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:07:59,997 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:01,997 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:02,883 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:02,937 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:03,021 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:04,020 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:04,020 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:05,020 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:05,421 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:08:05,422 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:08:06,590 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:08:08,021 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:08,552 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:08,605 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:08,693 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:09,021 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:10,022 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:11,022 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:12,023 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:14,228 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:14,303 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:14,386 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:15,024 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:15,024 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:16,024 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:19,025 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:19,943 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:19,997 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:20,084 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:20,512 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:08:20,513 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:08:21,083 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:21,083 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:23,084 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:25,084 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:25,658 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:25,712 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:25,796 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:26,085 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:27,085 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:28,085 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:31,087 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:31,321 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:31,374 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:31,459 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:32,087 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:33,087 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:34,088 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:35,088 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:35,843 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:08:35,844 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:08:36,970 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:37,025 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:37,109 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:37,150 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:08:38,108 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:39,108 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:40,109 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:41,109 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:42,551 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:42,603 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:42,692 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:43,110 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:43,110 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:44,110 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:48,034 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:48,088 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:48,171 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:48,173 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:49,171 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:50,172 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:50,993 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:08:50,994 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:08:52,173 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:53,565 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:53,617 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:53,704 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:08:54,173 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:56,174 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:58,175 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:08:59,043 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:08:59,097 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:08:59,185 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:08:59,186 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:00,185 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:02,186 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:04,187 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:04,478 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:04,534 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:04,620 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:05,187 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:06,188 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:06,241 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:09:06,242 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:09:07,612 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:09:08,188 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:09,917 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:09,970 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:10,054 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:10,189 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:10,189 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:12,190 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:14,191 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:15,312 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:15,366 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:15,450 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:16,191 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:16,192 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:17,192 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:20,193 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:20,665 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:20,719 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:20,832 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:21,193 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:21,392 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:09:21,394 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:09:22,193 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:23,194 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:25,194 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:25,977 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:26,031 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:26,120 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:26,195 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:27,195 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:31,197 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:31,212 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:31,265 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:31,353 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:32,197 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:33,197 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:35,198 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:36,323 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:36,377 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:36,464 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:36,547 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:09:36,548 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:09:37,199 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:37,199 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:38,119 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:09:39,199 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:41,200 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:41,489 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:41,543 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:41,627 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:42,200 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:43,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:45,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:46,534 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:46,587 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:46,673 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:47,202 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:47,202 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:49,203 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:51,204 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:51,526 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:51,578 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:51,663 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:51,812 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:09:51,813 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:09:52,204 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:53,204 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:55,205 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:56,482 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:09:56,536 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:09:56,621 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:09:57,206 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:09:57,206 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:09:58,206 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:00,207 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:01,466 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:01,521 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:01,605 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:02,207 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:03,208 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:04,208 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:06,209 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:06,444 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:06,496 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:06,580 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:07,088 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:10:07,089 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:10:07,209 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:08,209 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:08,635 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:10:09,210 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:10,210 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:11,349 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:11,400 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:11,523 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:12,211 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:12,211 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:13,211 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:16,191 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:16,246 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:16,246 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:16,335 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:17,246 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:17,247 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:18,247 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:20,247 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:21,000 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:21,053 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:21,140 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:21,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:22,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:22,303 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:10:22,304 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:10:23,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:24,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:25,773 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:25,824 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:25,910 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:26,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:26,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:27,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:29,250 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:30,523 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:30,578 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:30,663 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:31,251 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:31,251 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:33,252 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:35,114 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:35,167 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:35,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:35,255 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:36,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:37,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:37,414 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:10:37,416 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:10:38,254 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:39,113 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:10:39,254 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:39,699 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:39,752 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:39,839 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:40,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:41,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:42,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:43,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:44,217 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:44,269 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:44,356 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:45,290 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:45,291 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:46,291 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:47,291 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:48,671 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:48,725 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:48,812 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:49,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:50,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:51,292 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:52,549 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:10:52,550 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:10:52,986 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:53,040 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:53,124 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:53,293 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:53,293 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:10:54,293 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:55,294 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:57,236 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:10:57,287 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:10:57,368 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:10:57,370 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:10:58,369 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:00,369 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:01,320 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:01,373 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:01,460 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:02,402 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:02,403 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:04,403 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:05,227 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:05,289 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:05,373 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:05,403 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:06,404 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:07,724 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:11:07,725 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:11:08,405 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:08,900 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:08,953 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:09,037 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:09,405 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:09,592 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:11:10,405 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:12,406 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:12,421 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:12,475 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:12,560 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:13,407 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:14,407 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:15,668 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:15,724 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:15,813 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:16,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:16,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:18,408 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:18,705 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:18,759 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:18,843 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:19,409 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:20,409 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:21,428 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:21,481 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:21,565 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:22,410 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:22,410 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:22,842 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:11:22,843 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:11:23,980 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:24,033 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:24,118 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:24,410 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:24,411 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:26,308 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:26,361 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:26,444 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:26,445 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:27,444 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:28,339 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:28,391 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:28,473 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:28,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:28,474 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:30,098 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:30,148 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:30,232 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:30,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:30,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:31,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:32,146 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:32,323 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:32,408 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:32,475 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:32,475 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:33,475 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:34,476 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:36,476 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:37,976 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:11:37,978 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:11:38,191 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:38,248 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:38,333 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:38,477 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:39,477 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:40,211 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:11:40,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:42,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:44,205 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:44,259 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:44,343 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:44,479 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:44,479 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:45,479 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:47,480 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:49,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:50,007 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:50,060 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:50,150 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:50,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:51,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:53,035 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:11:53,036 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:11:55,483 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:55,820 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:11:55,869 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:11:55,954 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:11:56,483 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:11:57,484 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:11:59,484 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:01,483 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:01,526 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:01,611 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:02,510 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:03,510 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:05,511 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:07,173 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:07,234 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:07,321 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:07,511 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:07,512 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:08,359 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:12:08,360 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:12:09,512 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:10,779 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:12:11,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:12,782 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:12,836 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:12,923 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:13,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:13,514 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:14,514 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:17,515 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:18,395 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:18,447 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:18,530 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:19,529 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:19,530 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:20,530 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:22,530 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:23,447 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:12:23,448 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:12:23,906 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:23,959 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:24,043 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:24,531 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:26,532 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:28,532 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:29,503 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:29,556 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:29,645 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:30,557 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:30,557 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:32,558 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:34,558 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:35,090 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:35,144 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:35,230 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:35,559 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:36,559 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:38,604 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:12:38,605 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:12:40,560 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:40,634 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:40,688 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:40,775 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:41,440 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:12:41,561 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:42,561 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:44,562 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:46,091 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:46,145 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:46,227 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:46,562 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:46,562 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:48,563 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:49,563 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:51,521 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:51,575 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:51,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:51,660 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:52,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:52,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:53,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:53,840 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:12:53,841 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:12:55,587 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:56,872 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:12:56,927 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:12:57,011 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:12:57,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:12:58,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:12:59,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:01,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:02,199 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:02,248 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:02,332 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:02,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:03,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:04,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:07,495 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:07,546 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:07,630 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:07,632 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:08,631 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:08,631 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:08,914 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:13:08,915 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:13:09,631 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:11,632 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:11,961 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:13:12,880 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:12,957 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:13,042 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:13,633 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:13,633 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:14,633 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:17,634 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:18,226 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:18,278 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:18,362 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:18,635 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:19,635 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:21,636 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:23,466 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:23,520 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:23,639 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:23,641 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:24,059 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:13:24,060 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:13:24,639 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:25,640 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:26,640 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:28,508 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:28,557 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:28,641 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:28,641 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:28,642 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:29,641 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:30,642 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:32,643 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:33,564 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:33,619 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:33,719 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:34,718 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:34,718 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:35,718 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:38,675 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:38,729 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:38,816 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:38,819 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:39,328 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:13:39,329 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:13:39,761 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:40,761 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:41,762 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:42,486 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:13:42,762 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:43,686 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:43,739 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:43,825 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:44,824 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:44,824 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:45,824 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:46,824 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:48,652 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:48,707 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:48,792 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:48,825 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:49,825 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:50,826 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:52,826 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:53,660 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:53,710 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:53,794 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:53,827 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:54,523 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:13:54,525 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:13:54,827 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:55,827 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:56,828 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:58,656 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:13:58,711 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:13:58,800 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:13:58,828 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:13:58,829 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:13:59,829 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:00,829 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:02,830 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:03,470 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:03,523 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:03,604 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:03,830 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:04,831 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:05,831 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:06,831 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:08,210 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:08,263 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:08,349 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:08,832 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:09,669 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:14:09,670 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:14:09,832 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:11,833 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:12,961 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:13,023 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:13,108 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:13,144 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:14:13,834 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:13,834 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:17,631 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:17,683 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:17,770 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:17,836 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:17,837 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:19,836 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:21,837 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:22,331 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:22,382 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:22,466 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:22,837 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:23,838 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:24,825 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:14:24,826 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:14:24,838 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:25,838 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:26,858 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:26,911 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:26,995 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:27,839 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:27,839 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:28,839 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:29,840 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:31,327 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:31,382 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:31,466 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:31,840 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:31,841 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:32,841 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:34,841 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:35,798 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:35,853 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:35,940 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:36,864 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:36,864 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:38,864 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:39,958 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:14:39,960 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:14:40,096 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:40,152 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:40,235 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:40,865 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:40,865 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:42,866 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:43,693 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:14:44,330 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:44,383 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:44,467 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:44,866 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:44,867 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:46,867 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:48,485 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:48,537 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:48,622 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:48,868 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:48,868 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:50,868 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:52,518 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:52,569 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:52,656 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:52,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:52,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:54,870 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:55,139 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:14:55,141 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:14:56,383 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:14:56,434 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:14:56,517 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:14:56,870 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:14:56,871 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:14:58,871 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:00,067 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:00,120 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:00,205 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:00,872 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:00,872 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:02,873 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:03,422 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:03,476 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:03,560 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:03,873 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:04,873 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:06,507 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:06,560 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:06,645 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:06,874 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:06,874 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:08,875 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:09,277 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:09,329 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:09,414 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:09,875 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:10,248 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:15:10,249 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:15:10,876 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:11,839 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:11,896 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:11,984 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:12,918 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:12,918 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:14,179 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:14,240 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:14,325 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:14,361 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:15:14,918 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:14,919 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:16,153 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:16,205 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:16,293 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:16,919 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:16,919 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:17,917 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:17,973 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:18,061 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:18,973 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:18,974 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:19,524 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:19,575 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:19,657 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:19,974 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:20,974 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:21,534 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:21,705 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:21,786 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:21,975 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:22,975 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:25,449 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:15:25,450 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:15:26,976 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:27,655 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:27,711 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:27,795 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:27,977 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:28,977 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:31,978 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:33,550 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:33,601 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:33,685 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:33,979 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:35,979 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:37,980 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:39,371 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:39,423 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:39,509 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:39,981 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:40,823 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:15:40,825 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:15:41,981 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:43,982 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:44,940 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:15:45,210 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:45,288 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:45,372 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:45,983 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:45,983 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:47,984 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:49,984 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:50,989 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:51,045 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:51,131 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:52,021 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:52,022 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:54,022 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:55,872 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:15:55,873 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:15:56,023 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:15:56,741 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:15:56,794 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:15:56,880 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:15:57,023 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:15:58,024 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:00,024 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:02,324 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:02,376 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:02,461 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:03,025 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:04,026 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:05,026 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:07,027 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:07,952 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:08,006 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:08,094 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:09,093 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:09,093 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:10,093 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:11,047 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:16:11,049 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:16:13,094 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:13,618 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:13,670 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:13,756 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:14,095 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:14,095 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:15,095 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:15,492 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:16:17,096 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:19,182 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:19,238 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:19,322 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:20,097 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:20,097 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:21,097 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:23,098 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:24,584 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:24,637 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:24,725 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:25,099 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:25,099 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:26,217 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:16:26,218 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:16:27,099 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:29,991 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:30,047 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:30,168 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:30,169 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:31,168 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:32,168 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:33,169 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:34,169 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:35,488 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:35,542 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:35,626 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:36,170 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:37,170 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:38,170 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:40,171 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:40,993 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:41,047 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:41,130 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:41,171 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:41,307 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:16:41,309 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:16:42,172 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:43,172 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:44,173 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:45,910 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:16:46,173 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:46,342 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:46,396 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:46,483 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:47,174 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:48,174 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:49,174 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:50,175 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:51,773 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:51,828 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:51,914 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:52,175 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:53,176 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:54,176 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:56,177 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:56,470 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:16:56,471 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:16:57,150 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:16:57,201 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:16:57,283 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:16:58,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:16:58,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:16:59,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:02,203 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:02,473 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:02,527 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:02,614 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:03,203 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:03,203 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:04,203 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:06,204 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:07,810 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:07,863 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:07,948 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:08,205 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:09,205 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:10,205 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:11,543 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:17:11,544 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:17:12,206 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:13,090 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:13,142 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:13,245 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:14,244 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:14,244 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:15,244 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:16,245 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:16,403 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:17:18,291 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:18,345 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:18,432 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:19,246 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:19,246 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:21,247 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:23,247 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:23,418 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:23,473 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:23,559 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:24,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:25,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:26,248 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:26,654 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:17:26,655 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:17:27,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:28,725 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:28,778 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:28,867 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:29,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:29,249 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:30,250 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:31,250 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:33,251 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:33,577 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:33,634 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:33,720 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:34,251 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:35,251 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:36,252 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:37,252 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:38,595 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:38,648 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:38,730 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:39,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:39,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:40,253 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:41,908 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:17:41,909 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:17:43,254 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:43,672 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:43,747 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:43,832 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:44,254 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:45,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:46,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:46,891 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:17:47,255 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:48,709 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:48,765 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:48,855 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:49,256 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:49,256 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:50,257 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:53,699 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:53,752 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:53,837 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:54,258 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:54,258 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:17:56,259 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:57,153 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:17:57,154 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:17:58,260 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:17:58,659 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:17:58,714 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:17:58,799 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:17:59,260 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:00,261 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:02,261 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:03,535 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:03,588 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:03,668 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:04,262 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:04,262 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:06,263 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:08,263 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:08,295 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:08,348 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:08,434 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:09,264 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:10,264 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:11,264 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:12,258 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:18:12,259 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:18:12,265 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:13,074 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:13,126 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:13,212 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:13,265 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:14,265 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:15,266 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:16,266 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:17,292 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:18:17,722 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:17,774 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:17,860 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:18,267 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:19,267 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:20,267 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:22,268 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:22,361 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:22,411 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:22,499 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:23,268 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:23,269 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:24,269 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:26,269 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:26,922 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:26,973 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:27,059 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:27,270 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:27,441 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:18:27,442 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:18:28,270 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:29,271 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:30,271 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:31,432 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:31,485 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:31,569 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:32,272 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:32,272 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:33,272 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:34,272 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:35,764 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:35,815 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:35,899 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:36,273 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:36,273 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:37,273 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:39,274 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:40,010 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:40,063 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:40,146 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:40,274 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:41,275 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:42,594 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:18:42,596 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:18:43,275 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:44,058 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:44,112 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:44,200 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:44,276 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:45,276 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:47,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:47,802 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:18:47,921 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:47,975 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:48,058 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:48,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:49,277 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:51,278 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:51,569 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:51,620 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:51,704 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:52,278 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:53,279 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:55,038 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:55,112 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:55,195 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:55,279 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:55,280 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:18:57,280 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:57,839 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:18:57,841 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:18:58,225 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:18:58,278 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:18:58,361 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:18:59,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:18:59,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:01,177 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:01,229 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:01,311 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:01,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:01,313 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:03,313 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:03,918 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:03,974 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:04,058 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:04,313 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:05,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:06,313 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:06,368 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:06,451 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:07,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:07,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:08,400 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:08,455 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:08,540 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:09,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:09,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:10,294 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:10,349 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:10,432 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:11,365 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:11,366 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:11,877 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:11,931 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:12,014 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:12,366 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:12,887 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:19:12,888 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:19:13,366 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:13,868 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:14,028 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:14,110 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:14,366 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:15,367 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:18,493 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:19:19,368 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:19,762 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:19,838 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:19,923 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:20,368 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:21,369 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:23,369 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:25,678 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:25,730 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:25,811 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:26,370 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:27,371 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:27,934 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:19:27,935 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:19:30,372 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:31,500 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:31,554 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:31,679 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:32,373 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:33,373 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:34,373 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:36,374 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:37,284 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:37,337 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:37,422 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:38,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:38,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:40,421 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:42,422 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:42,982 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:19:42,983 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:19:43,059 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:43,071 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:43,153 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:43,423 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:44,423 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:48,424 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:48,742 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:48,803 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:48,906 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:48,942 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:19:49,425 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:50,425 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:52,426 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:54,431 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:19:54,482 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:19:54,565 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:19:55,448 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:19:56,448 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:19:58,294 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:19:58,295 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:19:58,449 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:00,056 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:00,111 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:00,197 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:00,450 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:00,450 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:02,450 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:04,451 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:05,757 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:05,809 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:05,891 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:06,452 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:06,452 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:08,452 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:11,344 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:11,396 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:11,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:11,479 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:12,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:12,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:13,369 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:20:13,370 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:20:13,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:15,479 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:16,871 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:16,924 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:17,005 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:17,480 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:18,480 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:19,383 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:20:19,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:21,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:22,311 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:22,356 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:22,434 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:22,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:23,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:24,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:27,484 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:27,665 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:27,719 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:27,801 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:28,439 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:20:28,441 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:20:28,485 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:28,486 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:29,485 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:31,486 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:33,041 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:33,093 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:33,176 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:33,487 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:35,487 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:37,488 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:38,395 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:38,448 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:38,536 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:39,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:39,535 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:41,536 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:43,536 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:43,669 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:20:43,671 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:20:43,768 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:43,822 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:43,907 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:44,537 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:45,537 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:47,538 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:49,124 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:49,189 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:49,272 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:49,538 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:49,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:49,822 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:20:51,539 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:53,540 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:54,433 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:54,493 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:54,573 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:20:55,572 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:55,573 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:20:58,573 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:20:58,738 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:20:58,739 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:20:59,640 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:20:59,692 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:20:59,777 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:00,574 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:01,574 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:02,575 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:04,576 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:04,830 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:04,882 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:04,965 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:05,576 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:05,576 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:06,576 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:08,577 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:10,019 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:10,074 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:10,160 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:10,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:11,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:12,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:13,795 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:21:13,797 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:21:14,579 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:15,137 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:15,190 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:15,272 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:15,580 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:15,580 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:16,580 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:18,581 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:20,180 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:20,243 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:20,329 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:20,364 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:21:20,581 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:22,582 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:24,583 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:25,263 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:25,314 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:25,397 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:25,583 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:26,583 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:28,584 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:28,851 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:21:28,852 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:21:30,144 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:30,195 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:30,279 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:30,584 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:30,585 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:32,585 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:33,585 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:35,053 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:35,105 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:35,184 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:35,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:35,587 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:36,587 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:37,587 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:39,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:39,998 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:40,053 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:40,140 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:40,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:41,588 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:42,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:43,589 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:43,900 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:21:43,901 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:21:44,910 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:44,965 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:45,047 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:45,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:46,590 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:47,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:49,591 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:49,669 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:49,721 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:49,803 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:50,592 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:50,592 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:50,838 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:21:51,592 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:53,593 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:54,470 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:54,523 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:54,604 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:55,602 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:55,602 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:21:56,602 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:58,949 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:21:58,951 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:21:59,216 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:21:59,267 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:21:59,348 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:21:59,604 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:21:59,604 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:00,604 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:02,605 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:03,864 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:03,915 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:03,995 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:04,605 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:04,606 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:06,606 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:08,394 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:08,443 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:08,527 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:08,607 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:08,607 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:10,607 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:11,608 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:12,608 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:12,911 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:12,965 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:13,051 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:13,609 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:13,996 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:22:13,997 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:22:14,609 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:15,609 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:16,610 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:17,348 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:17,400 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:17,485 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:17,610 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:18,610 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:19,611 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:20,611 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:21,276 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:22:21,673 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:21,728 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:21,814 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:22,612 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:22,612 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:23,612 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:24,612 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:25,972 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:26,025 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:26,108 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:26,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:26,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:27,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:28,614 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:29,049 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:22:29,051 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:22:30,083 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:30,155 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:30,234 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:30,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:31,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:33,616 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:34,054 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:34,106 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:34,190 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:34,616 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:35,617 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:37,617 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:37,808 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:37,858 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:37,938 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:38,617 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:39,618 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:41,370 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:41,423 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:41,505 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:41,619 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:41,619 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:43,619 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:44,093 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:22:44,094 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:22:44,752 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:44,806 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:44,888 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:45,620 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:45,620 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:47,621 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:47,892 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:47,945 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:48,031 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:48,621 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:49,621 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:50,794 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:50,847 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:50,933 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:51,622 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:51,622 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:51,800 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:22:53,478 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:53,532 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:53,616 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:53,623 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:53,623 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:55,623 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:55,855 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:55,912 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:55,999 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:56,624 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:57,624 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:57,949 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:58,003 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:58,133 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:22:58,624 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:22:59,151 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:22:59,152 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:22:59,625 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:22:59,798 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:22:59,852 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:22:59,937 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:00,625 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:01,386 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:01,432 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:01,518 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:01,626 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:01,626 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:03,377 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:03,554 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:03,637 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:03,639 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:04,638 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:05,638 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:07,639 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:09,461 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:09,515 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:09,601 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:09,639 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:11,640 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:13,641 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:14,214 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:23:14,215 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:23:15,342 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:15,395 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:15,481 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:15,641 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:17,642 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:19,643 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:21,158 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:21,216 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:21,308 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:21,644 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:21,645 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:22,543 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:23:23,645 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:26,646 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:26,978 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:27,032 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:27,117 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:27,646 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:27,647 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:28,647 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:29,295 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:23:29,297 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:23:32,648 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:32,739 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:32,793 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:32,878 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:33,648 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:34,649 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:36,650 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:38,376 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:38,419 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:38,502 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:38,651 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:40,652 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:42,652 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:44,119 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:44,174 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:44,261 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:44,446 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:23:44,447 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:23:44,653 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:44,653 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:46,654 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:48,655 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:49,770 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:49,817 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:49,904 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:50,655 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:50,656 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:52,656 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:52,960 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:23:54,657 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:55,467 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:23:55,521 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:23:55,635 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:23:55,657 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:23:56,658 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:59,659 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:23:59,767 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:23:59,768 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:24:01,137 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:01,191 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:01,279 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:01,660 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:02,660 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:03,660 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:05,661 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:06,682 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:06,735 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:06,821 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:07,662 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:07,662 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:08,662 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:11,663 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:12,127 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:12,180 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:12,267 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:12,664 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:13,664 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:14,664 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:14,946 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:24:14,947 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:24:15,665 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:17,563 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:17,616 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:17,703 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:18,702 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:18,702 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:19,702 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:21,703 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:22,961 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:23,023 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:23,108 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:23,443 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:24:23,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:23,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:24,704 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:27,705 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:28,297 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:28,352 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:28,440 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:28,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:29,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:30,027 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:24:30,028 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:24:30,706 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:31,707 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:33,691 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:33,744 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:33,829 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:34,745 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:35,745 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:36,745 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:38,746 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:39,020 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:39,073 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:39,170 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:39,747 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:40,747 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:41,747 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:42,748 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:44,346 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:44,400 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:44,485 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:44,748 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:45,187 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:24:45,188 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:24:45,749 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:46,749 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:48,750 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:49,623 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:49,678 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:49,765 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:50,763 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:50,764 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:51,764 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:52,764 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:53,958 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:24:54,765 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:54,883 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:24:54,937 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:24:55,024 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:24:55,766 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:55,766 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:24:56,766 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:24:58,767 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:00,098 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:00,152 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:00,236 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:00,548 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:25:00,550 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:25:00,767 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:01,768 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:02,768 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:04,769 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:05,241 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:05,297 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:05,380 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:05,769 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:06,769 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:07,770 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:08,770 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:10,378 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:10,433 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:10,520 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:10,771 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:11,771 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:12,772 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:14,772 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:15,486 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:15,536 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:15,618 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:15,772 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:15,991 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:25:15,992 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:25:16,773 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:17,773 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:18,774 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:20,450 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:20,504 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:20,591 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:20,774 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:21,775 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:24,348 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:25:25,461 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:25,515 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:25,600 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:25,776 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:25,776 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:27,777 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:29,778 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:30,461 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:30,515 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:30,601 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:30,778 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:31,264 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:25:31,265 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:25:31,778 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:33,779 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:35,401 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:35,456 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:35,543 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:35,780 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:36,780 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:37,780 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:39,781 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:40,290 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:40,343 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:40,430 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:40,781 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:41,782 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:42,782 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:43,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:45,104 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:45,157 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:45,240 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:45,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:45,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:46,482 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:25:46,483 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:25:46,783 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:47,784 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:49,866 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:49,922 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:50,009 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:50,785 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:51,785 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:52,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:53,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:54,535 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:54,602 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:54,687 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:54,786 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:25:55,022 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:25:55,787 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:56,787 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:58,788 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:25:59,189 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:25:59,239 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:25:59,322 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:25:59,788 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:00,789 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:01,636 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:26:01,638 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:26:02,789 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:03,757 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:03,812 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:03,899 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:04,813 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:04,813 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:06,814 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:08,290 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:08,343 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:08,431 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:08,814 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:08,815 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:10,815 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:12,731 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:12,785 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:12,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:12,871 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:13,869 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:14,870 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:16,776 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:26:16,777 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:26:16,870 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:17,020 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:17,074 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:17,156 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:17,871 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:18,871 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:20,872 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:21,227 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:21,282 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:21,366 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:21,873 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:22,873 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:24,874 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:25,281 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:25,342 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:25,429 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:25,489 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:26:25,874 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:26,875 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:28,875 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:29,075 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:29,128 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:29,251 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:29,876 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:30,876 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:31,848 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:26:31,849 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:26:32,598 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:32,652 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:32,736 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:32,877 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:32,877 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:34,877 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:36,023 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:36,077 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:36,160 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:36,878 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:36,878 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:38,879 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:39,251 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:39,307 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:39,399 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:39,879 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:40,880 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:42,263 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:42,336 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:42,417 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:42,880 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:42,881 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:44,881 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:44,903 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:44,954 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:45,041 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:45,881 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:46,882 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:46,895 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:26:46,896 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:26:47,264 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:47,304 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:47,387 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:47,882 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:48,882 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:49,322 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:49,375 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:49,463 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:49,883 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:50,883 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:51,177 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:51,233 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:51,322 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:51,883 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:52,782 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:52,835 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:52,920 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:52,921 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:53,920 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:54,756 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:26:54,922 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:26:55,003 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:55,005 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:26:56,003 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:26:56,043 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:26:57,003 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:26:59,004 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:00,820 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:00,871 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:00,954 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:01,005 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:02,034 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:27:02,036 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:27:03,006 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:05,006 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:06,736 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:06,779 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:06,862 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:07,007 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:08,008 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:10,008 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:12,009 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:12,503 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:12,557 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:12,648 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:13,010 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:14,010 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:17,114 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:27:17,116 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:27:18,012 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:18,244 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:18,296 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:18,379 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:19,012 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:20,012 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:22,013 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:24,036 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:24,089 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:24,171 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:25,014 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:26,015 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:26,532 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:27:28,015 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:29,727 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:29,779 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:29,863 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:30,016 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:32,017 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:32,164 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:27:32,165 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:27:34,017 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:35,326 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:35,379 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:35,463 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:36,018 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:36,018 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:38,019 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:40,019 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:40,957 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:41,013 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:41,097 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:42,096 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:42,096 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:46,097 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:46,601 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:46,656 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:46,740 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:47,097 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:47,327 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:27:47,328 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:27:48,098 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:50,099 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:52,137 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:52,190 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:52,271 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:53,100 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:54,100 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:55,100 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:56,968 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:27:57,101 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:57,617 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:27:57,671 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:27:57,759 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:27:58,102 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:27:58,102 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:27:59,102 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:02,648 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:28:02,649 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:28:03,067 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:03,121 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:03,121 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:03,205 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:04,121 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:05,121 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:07,122 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:08,524 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:08,576 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:08,663 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:09,123 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:09,123 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:11,124 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:13,125 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:13,913 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:13,967 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:14,051 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:14,125 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:15,125 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:17,126 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:17,764 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:28:17,765 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:28:19,237 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:19,290 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:19,376 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:20,127 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:21,128 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:23,129 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:24,550 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:24,602 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:24,687 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:25,129 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:26,129 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:27,130 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:27,490 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:28:28,130 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:29,858 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:29,931 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:30,009 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:30,131 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:31,131 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:32,132 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:32,954 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:28:32,955 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:28:34,132 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:35,211 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:35,264 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:35,346 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:36,133 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:36,133 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:37,134 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:38,134 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:40,135 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:40,399 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:40,453 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:40,540 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:41,135 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:42,135 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:43,136 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:44,136 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:45,591 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:45,645 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:45,726 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:46,137 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:47,137 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:48,138 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:48,295 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:28:48,296 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:28:50,138 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:50,850 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:50,906 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:50,991 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:51,139 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:52,139 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:53,139 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:54,140 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:56,039 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:28:56,093 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:28:56,177 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:28:57,176 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:28:57,177 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:28:57,913 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:28:59,177 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:01,134 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:01,188 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:01,209 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:01,275 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:02,199 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:03,200 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:03,539 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:29:03,540 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:29:05,201 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:06,125 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:06,178 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:06,262 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:07,260 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:07,260 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:11,149 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:11,200 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:11,283 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:11,285 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:12,283 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:13,283 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:15,284 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:16,169 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:16,220 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:16,304 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:17,302 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:17,302 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:19,026 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:29:19,028 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:29:21,067 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:21,119 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:21,205 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:21,304 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:21,304 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:23,304 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:25,305 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:26,024 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:26,077 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:26,160 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:26,305 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:27,306 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:28,306 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:28,516 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:29:30,869 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:30,921 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:31,004 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:31,307 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:31,307 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:32,307 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:33,308 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:34,195 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:29:34,196 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:29:35,308 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:35,674 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:35,727 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:35,811 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:36,309 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:37,309 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:38,310 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:39,310 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:40,419 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:40,473 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:40,558 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:41,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:41,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:42,311 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:44,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:45,041 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:45,096 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:45,218 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:45,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:46,312 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:48,313 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:49,270 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:29:49,271 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:29:49,718 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:49,774 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:49,858 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:50,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:50,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:52,314 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:54,259 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:54,311 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:54,341 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:54,399 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:55,341 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:29:56,341 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:58,342 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:29:58,809 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:29:58,874 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:29:58,963 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:29:59,010 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:29:59,342 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:00,343 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:02,343 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:03,160 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:03,208 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:03,293 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:03,344 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:04,317 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:30:04,318 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:30:04,344 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:06,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:07,428 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:07,480 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:07,562 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:08,345 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:08,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:10,346 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:11,541 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:11,593 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:11,676 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:12,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:12,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:13,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:14,347 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:15,572 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:15,624 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:15,709 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:16,348 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:17,348 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:19,349 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:19,370 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:30:19,371 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:30:19,457 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:19,474 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:19,554 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:20,350 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:21,350 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:23,125 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:23,177 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:23,262 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:23,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:23,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:25,351 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:26,702 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:26,756 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:26,840 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:27,352 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:27,352 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:29,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:29,495 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:30:30,024 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:30,079 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:30,165 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:30,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:31,353 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:33,128 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:33,181 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:33,263 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:33,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:33,354 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:34,651 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:30:34,652 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:30:35,355 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:35,973 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:36,026 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:36,110 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:36,355 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:37,356 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:38,470 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:38,525 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:38,605 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:39,356 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:39,357 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:40,657 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:40,710 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:40,795 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:41,357 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:41,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:42,515 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:42,570 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:42,652 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:43,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:43,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:44,113 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:44,166 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:44,247 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:44,358 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:45,359 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:46,159 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:46,331 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:46,413 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:47,411 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:47,412 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:49,812 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:30:49,813 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:30:51,413 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:52,395 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:52,448 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:52,533 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:53,449 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:53,449 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:57,450 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:30:58,235 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:30:58,289 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:30:58,373 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:30:58,451 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:30:59,451 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:00,009 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:31:00,451 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:03,452 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:04,057 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:04,133 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:04,218 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:04,453 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:05,282 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:31:05,284 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:31:05,453 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:06,453 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:07,454 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:09,939 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:09,994 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:10,077 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:10,455 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:11,455 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:12,456 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:14,456 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:15,608 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:15,662 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:15,744 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:16,457 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:18,458 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:20,364 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:31:20,365 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:31:20,458 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:21,313 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:21,367 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:21,453 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:21,459 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:22,459 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:24,460 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:26,460 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:27,027 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:27,099 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:27,185 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:27,461 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:28,461 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:30,494 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:31:32,463 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:32,709 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:32,764 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:32,848 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:33,463 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:34,463 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:35,412 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:31:35,414 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:31:36,464 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:38,287 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:38,344 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:38,432 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:38,465 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:40,465 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:42,466 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:43,799 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:43,854 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:43,939 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:44,467 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:44,467 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:46,468 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:48,468 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:49,290 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:49,341 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:49,429 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:49,469 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:50,469 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:50,684 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:31:50,686 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:31:53,470 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:54,743 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:31:54,799 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:31:54,884 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:31:55,471 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:31:56,471 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:57,471 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:31:59,472 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:00,134 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:00,189 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:00,273 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:00,472 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:00,472 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:01,007 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:32:01,473 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:05,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:05,584 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:05,635 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:05,721 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:05,813 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:32:05,814 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:32:06,474 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:07,475 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:09,475 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:10,897 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:10,952 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:11,039 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:11,476 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:13,477 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:15,477 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:16,310 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:16,363 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:16,447 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:16,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:17,478 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:19,479 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:20,976 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:32:20,977 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:32:21,659 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:21,712 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:21,798 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:22,480 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:23,480 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:25,481 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:26,939 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:26,993 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:27,078 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:27,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:27,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:29,482 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:30,483 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:31,454 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:32:32,212 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:32,265 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:32,351 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:32,483 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:33,484 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:34,484 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:36,029 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:32:36,030 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:32:36,485 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:37,441 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:37,494 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:37,579 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:38,495 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:38,495 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:39,495 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:42,496 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:42,633 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:42,684 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:42,768 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:43,497 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:43,497 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:44,497 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:46,498 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:47,710 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:47,765 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:47,850 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:48,499 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:48,499 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:49,499 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:51,092 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:32:51,093 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:32:52,500 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:52,911 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:52,992 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:53,081 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:53,500 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:53,501 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:54,501 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:56,501 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:32:58,080 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:32:58,134 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:32:58,278 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:32:58,502 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:32:59,502 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:00,503 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:02,059 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:33:03,159 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:03,211 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:03,294 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:03,504 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:03,504 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:04,504 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:05,505 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:06,288 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:33:06,290 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:33:07,505 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:08,097 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:08,148 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:08,229 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:08,506 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:09,506 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:10,507 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:11,507 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:13,108 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:13,161 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:13,247 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:13,508 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:14,508 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:15,509 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:17,509 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:18,043 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:18,096 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:18,183 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:18,510 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:19,510 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:20,510 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:21,511 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:21,539 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:33:21,540 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:33:23,022 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:23,076 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:23,160 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:23,511 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:24,512 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:25,512 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:27,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:27,930 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:27,983 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:28,066 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:28,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:28,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:29,513 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:31,514 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:32,593 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:33:32,741 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:32,794 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:32,881 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:33,515 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:33,515 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:34,515 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:36,516 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:36,642 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:33:36,643 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:33:37,465 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:37,520 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:37,605 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:38,553 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:38,553 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:40,553 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:42,201 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:42,257 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:42,340 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:42,554 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:42,554 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:43,554 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:44,555 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:46,555 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:46,918 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:46,966 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:47,053 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:47,556 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:48,556 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:49,557 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:50,557 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:51,436 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:51,491 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:51,579 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:51,766 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:33:51,767 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:33:52,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:52,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:53,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:54,578 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:55,932 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:33:55,985 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:33:56,068 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:33:56,579 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:56,579 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:33:57,579 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:33:58,580 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:00,317 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:00,370 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:00,455 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:00,580 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:00,581 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:01,581 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:03,111 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:34:04,589 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:04,643 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:04,728 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:05,582 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:05,583 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:07,068 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:34:07,069 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:34:07,583 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:08,759 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:08,814 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:08,904 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:09,584 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:09,584 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:11,584 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:12,773 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:12,827 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:12,912 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:13,585 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:13,585 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:15,586 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:16,546 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:16,599 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:16,685 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:17,610 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:17,611 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:19,611 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:20,067 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:20,121 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:20,207 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:20,611 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:21,612 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:22,276 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:34:22,277 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:34:23,337 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:23,389 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:23,476 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:23,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:23,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:25,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:26,376 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:26,428 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:26,515 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:26,613 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:27,614 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:29,183 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:29,236 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:29,321 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:29,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:29,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:31,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:31,715 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:31,784 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:31,869 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:32,615 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:33,616 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:33,635 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:34:34,030 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:34,082 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:34,167 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:34,616 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:35,616 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:36,004 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:36,056 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:36,140 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:36,617 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:37,380 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:34:37,382 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:34:37,617 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:37,718 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:37,773 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:37,859 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:38,618 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:39,618 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:39,734 DEBUG SenderThread:253308 [sender.py:send():235] send: history +2022-03-02 06:34:39,898 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:34:39,975 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:34:40,618 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:34:41,619 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:43,619 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:47,621 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:49,621 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:52,429 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:34:52,429 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:34:53,623 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:34:57,624 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:01,625 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:03,626 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:04,270 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:35:07,481 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:35:07,481 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:35:07,627 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:11,628 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:15,630 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:19,631 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:22,592 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:35:22,593 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:35:23,632 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:25,834 DEBUG SenderThread:253308 [sender.py:send():235] send: telemetry +2022-03-02 06:35:25,835 DEBUG SenderThread:253308 [sender.py:send():235] send: exit +2022-03-02 06:35:25,835 INFO SenderThread:253308 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 06:35:25,835 INFO SenderThread:253308 [sender.py:send_exit():373] handling runtime: 2368 +2022-03-02 06:35:25,835 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:25,886 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:35:25,886 INFO SenderThread:253308 [sender.py:send_exit():379] send defer +2022-03-02 06:35:25,886 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:25,887 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:25,887 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 06:35:25,887 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:25,887 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 06:35:25,888 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 1 +2022-03-02 06:35:25,888 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:25,888 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 06:35:25,915 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:25,915 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 06:35:25,915 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 2 +2022-03-02 06:35:25,916 DEBUG SenderThread:253308 [sender.py:send():235] send: stats +2022-03-02 06:35:25,916 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:25,917 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 06:35:25,917 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:25,917 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 06:35:25,917 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 3 +2022-03-02 06:35:25,917 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:25,917 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 06:35:25,975 DEBUG SenderThread:253308 [sender.py:send():235] send: summary +2022-03-02 06:35:25,993 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:26,059 INFO SenderThread:253308 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:35:26,059 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:26,059 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 06:35:26,059 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 4 +2022-03-02 06:35:26,059 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:26,060 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:26,060 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 06:35:26,060 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:26,060 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 06:35:26,161 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:26,674 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:26,680 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:35:26,882 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 5 +2022-03-02 06:35:26,883 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:26,883 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:26,883 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 06:35:26,884 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:26,884 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 06:35:26,884 INFO SenderThread:253308 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 06:35:26,984 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:27,665 INFO Thread-8 :253308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml +2022-03-02 06:35:27,665 INFO SenderThread:253308 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files +2022-03-02 06:35:27,665 INFO SenderThread:253308 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-metadata.json wandb-metadata.json +2022-03-02 06:35:27,665 INFO SenderThread:253308 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log output.log +2022-03-02 06:35:27,666 INFO SenderThread:253308 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json wandb-summary.json +2022-03-02 06:35:27,666 INFO SenderThread:253308 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt requirements.txt +2022-03-02 06:35:27,672 INFO SenderThread:253308 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml config.yaml +2022-03-02 06:35:27,672 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 6 +2022-03-02 06:35:27,672 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:27,673 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:27,673 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 06:35:27,676 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:27,676 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 06:35:27,676 INFO SenderThread:253308 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 06:35:27,774 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:27,776 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:27,877 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:27,877 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:27,973 INFO Thread-15 :253308 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/config.yaml +2022-03-02 06:35:27,974 INFO Thread-14 :253308 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/requirements.txt +2022-03-02 06:35:27,979 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:27,979 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:28,006 INFO Thread-12 :253308 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/output.log +2022-03-02 06:35:28,075 INFO Thread-13 :253308 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/files/wandb-summary.json +2022-03-02 06:35:28,081 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:28,081 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:28,182 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:28,182 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:28,276 INFO Thread-7 :253308 [sender.py:transition_state():392] send defer: 7 +2022-03-02 06:35:28,276 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:28,276 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 06:35:28,276 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:28,276 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 06:35:28,284 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:29,489 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 8 +2022-03-02 06:35:29,489 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:29,490 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:29,490 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 06:35:29,490 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:29,490 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 06:35:29,490 INFO SenderThread:253308 [sender.py:transition_state():392] send defer: 9 +2022-03-02 06:35:29,491 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: defer +2022-03-02 06:35:29,491 INFO HandlerThread:253308 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 06:35:29,491 DEBUG SenderThread:253308 [sender.py:send():235] send: final +2022-03-02 06:35:29,492 DEBUG SenderThread:253308 [sender.py:send():235] send: footer +2022-03-02 06:35:29,492 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: defer +2022-03-02 06:35:29,492 INFO SenderThread:253308 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 06:35:29,591 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 06:35:29,592 DEBUG SenderThread:253308 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 06:35:29,592 INFO SenderThread:253308 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 06:35:29,648 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 06:35:29,741 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 06:35:29,745 DEBUG HandlerThread:253308 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 06:35:29,745 INFO HandlerThread:253308 [handler.py:finish():739] shutting down handler +2022-03-02 06:35:30,492 INFO WriterThread:253308 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb +2022-03-02 06:35:30,647 INFO SenderThread:253308 [sender.py:finish():1075] shutting down sender +2022-03-02 06:35:30,647 INFO SenderThread:253308 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 06:35:30,647 INFO SenderThread:253308 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 06:35:30,654 INFO MainThread:253308 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_055556-ymuc7hv0/logs/debug.log b/wandb/run-20220302_055556-ymuc7hv0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ac029c73448dcfcf15a9694906920ca205b578e8 --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/logs/debug.log @@ -0,0 +1,125 @@ +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/logs/debug.log +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_055556-ymuc7hv0/logs/debug-internal.log +2022-03-02 05:55:56,180 INFO MainThread:253209 [wandb_init.py:init():420] calling init triggers +2022-03-02 05:55:56,181 INFO MainThread:253209 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 05:55:56,181 INFO MainThread:253209 [wandb_init.py:init():471] starting backend +2022-03-02 05:55:56,181 INFO MainThread:253209 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 05:55:56,237 INFO MainThread:253209 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 05:55:56,292 INFO MainThread:253209 [backend.py:ensure_launched():224] started backend process with pid: 253308 +2022-03-02 05:55:56,294 INFO MainThread:253209 [wandb_init.py:init():480] backend started and connected +2022-03-02 05:55:56,304 INFO MainThread:253209 [wandb_init.py:init():550] updated telemetry +2022-03-02 05:55:56,436 INFO MainThread:253209 [wandb_init.py:init():581] communicating current version +2022-03-02 05:55:57,175 INFO MainThread:253209 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 05:55:57,176 INFO MainThread:253209 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 05:55:57,276 INFO MainThread:253209 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 05:55:57,386 INFO MainThread:253209 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 05:55:57,386 INFO MainThread:253209 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 05:55:57,387 INFO MainThread:253209 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 05:55:57,388 INFO MainThread:253209 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 05:55:57,389 INFO MainThread:253209 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 05:55:57,391 INFO MainThread:253209 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_05-55-14_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 05:55:57,394 INFO MainThread:253209 [wandb_watch.py:watch():43] Watching +2022-03-02 06:35:23,000 INFO MainThread:253209 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 06:35:23,001 INFO MainThread:253209 [wandb_run.py:_restore():1769] restore +2022-03-02 06:35:25,887 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 06:35:26,060 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 06:35:26,883 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 06:35:27,673 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 1796625 +} + +2022-03-02 06:35:27,776 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 360250 + total_bytes: 2094199 +} + +2022-03-02 06:35:27,878 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} + +2022-03-02 06:35:27,980 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} + +2022-03-02 06:35:28,081 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} + +2022-03-02 06:35:28,183 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} + +2022-03-02 06:35:29,490 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} + +2022-03-02 06:35:29,647 INFO MainThread:253209 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2094199 + total_bytes: 2094199 +} +local_info { +} + +2022-03-02 06:35:30,792 INFO MainThread:253209 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 06:35:30,792 INFO MainThread:253209 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 06:35:30,793 INFO MainThread:253209 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb b/wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb new file mode 100644 index 0000000000000000000000000000000000000000..395f10c98f808959201409d1304b231cc40e534a --- /dev/null +++ b/wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d9238c321667ddae0d1b24fc1a82ac5c8fc5729064808fc88ea560fe53eb29 +size 17128618 diff --git a/wandb/run-20220302_063647-bmivw6vv/files/config.yaml b/wandb/run-20220302_063647-bmivw6vv/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ec14c206f9a8d5ca54e5087eb20e9a603377b49 --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646203007 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 4 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_06-36-06_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_063647-bmivw6vv/files/output.log b/wandb/run-20220302_063647-bmivw6vv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7ef08437fdd28330a70fe05d84890132f1fca594 --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/files/output.log @@ -0,0 +1,2158 @@ + + + 0%| | 0/509 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:36:55,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:36:58,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7886, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:00,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 1/509 [00:12<1:47:21, 12.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:37:04,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:07,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:09,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9215, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 2/509 [00:24<1:44:28, 12.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:37:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:19,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:21,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8623, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:24,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 3/509 [00:36<1:41:49, 12.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:37:27,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:30,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:33,440 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7976, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:36,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 4/509 [00:48<1:39:55, 11.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:37:39,323 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:42,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:45,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7759, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:47,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 5/509 [00:59<1:38:40, 11.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:37:50,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:53,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:56,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:37:59,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 6/509 [01:11<1:38:01, 11.69s/it] + + 1%|▉ | 6/509 [01:11<1:38:01, 11.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:02,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:05,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:08,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8042, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:10,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 7/509 [01:22<1:36:54, 11.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:13,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:16,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:19,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:22,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 8/509 [01:33<1:36:15, 11.53s/it] + + 2%|█▎ | 8/509 [01:33<1:36:15, 11.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:25,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:27,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:30,686 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:33,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 9/509 [01:45<1:35:25, 11.45s/it] + + 2%|█▍ | 9/509 [01:45<1:35:25, 11.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:36,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:39,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:41,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:44,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 10/509 [01:56<1:34:35, 11.37s/it] + + 2%|█▌ | 10/509 [01:56<1:34:35, 11.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:47,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:50,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:53,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:38:55,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 11/509 [02:07<1:33:35, 11.28s/it] + + 2%|█▋ | 11/509 [02:07<1:33:35, 11.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:38:58,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:01,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:03,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:06,686 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 12/509 [02:18<1:32:31, 11.17s/it] + + 2%|█▉ | 12/509 [02:18<1:32:31, 11.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:39:09,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:12,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:14,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:17,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 13/509 [02:29<1:31:42, 11.09s/it] + + 3%|██ | 13/509 [02:29<1:31:42, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:39:20,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:23,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:25,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:28,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 14/509 [02:40<1:31:01, 11.03s/it] + + 3%|██▏ | 14/509 [02:40<1:31:01, 11.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:39:31,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:33,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:36,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5477, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:39,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 15/509 [02:51<1:30:18, 10.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:39:42,032 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:44,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:47,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7113, 'learning_rate': 3e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:49,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 16/509 [03:01<1:29:05, 10.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:39:52,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:55,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:39:57,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6573, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:00,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▋ | 17/509 [03:12<1:28:23, 10.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:03,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:05,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:08,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:10,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 18/509 [03:22<1:27:23, 10.68s/it] + + 4%|██▊ | 18/509 [03:22<1:27:23, 10.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:13,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:16,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:18,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:21,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 19/509 [03:33<1:26:38, 10.61s/it] + + 4%|██▉ | 19/509 [03:33<1:26:38, 10.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:24,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:26,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:29,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:31,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 20/509 [03:43<1:25:53, 10.54s/it] + + 4%|███▏ | 20/509 [03:43<1:25:53, 10.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:34,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:36,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:39,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:42,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 21/509 [03:53<1:24:59, 10.45s/it] + + 4%|███▎ | 21/509 [03:53<1:24:59, 10.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:47,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:49,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:52,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 22/509 [04:03<1:23:57, 10.34s/it] + + 4%|███▍ | 22/509 [04:03<1:23:57, 10.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:40:54,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:57,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:40:59,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.56, 'learning_rate': 4.4e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▌ | 23/509 [04:13<1:23:07, 10.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:04,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:07,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:09,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6464, 'learning_rate': 4.6e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:12,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▊ | 24/509 [04:24<1:22:30, 10.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:14,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:17,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:19,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3654, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:22,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 25/509 [04:34<1:22:53, 10.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:25,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:27,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:25,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:30,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:25,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 26/509 [04:44<1:21:57, 10.18s/it]g-point operations will not be computed-02 06:41:25,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 26/509 [04:44<1:21:57, 10.18s/it]g-point operations will not be computed-02 06:41:25,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7374, 'learning_rate': 5e-06, 'epoch': 0.05} + 5%|████ | 26/509 [04:44<1:21:57, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:35,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:37,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:35,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:40,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:35,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:54<1:20:50, 10.06s/it]g-point operations will not be computed-02 06:41:35,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:54<1:20:50, 10.06s/it]g-point operations will not be computed-02 06:41:35,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 27/509 [04:54<1:20:50, 10.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:45,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:47,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:45,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:49,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:45,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:04<1:20:00, 9.98s/it]g-point operations will not be computed-02 06:41:45,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:04<1:20:00, 9.98s/it]g-point operations will not be computed-02 06:41:45,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 28/509 [05:04<1:20:00, 9.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:41:54,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:57,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:54,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:59,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:54,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:41:59,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:41:54,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 29/509 [05:13<1:19:08, 9.89s/it]g-point operations will not be computed-02 06:41:54,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 29/509 [05:13<1:19:08, 9.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:04,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:06,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:04,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:09,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:04,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 30/509 [05:23<1:18:07, 9.79s/it]g-point operations will not be computed-02 06:42:04,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 30/509 [05:23<1:18:07, 9.79s/it]g-point operations will not be computed-02 06:42:04,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 30/509 [05:23<1:18:07, 9.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:13,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:16,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:13,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:18,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:13,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:32<1:17:06, 9.68s/it]g-point operations will not be computed-02 06:42:13,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:32<1:17:06, 9.68s/it]g-point operations will not be computed-02 06:42:13,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 31/509 [05:32<1:17:06, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:23,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:25,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:23,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:28,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:23,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 32/509 [05:42<1:16:18, 9.60s/it]g-point operations will not be computed-02 06:42:23,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 32/509 [05:42<1:16:18, 9.60s/it]g-point operations will not be computed-02 06:42:23,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 32/509 [05:42<1:16:18, 9.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:32,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:34,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:32,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:37,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:32,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 33/509 [05:51<1:15:17, 9.49s/it]g-point operations will not be computed-02 06:42:32,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 33/509 [05:51<1:15:17, 9.49s/it]g-point operations will not be computed-02 06:42:32,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 33/509 [05:51<1:15:17, 9.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:44,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:46,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:48,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:48,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 34/509 [06:00<1:14:07, 9.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:53,130 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:55,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:57,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:42:57,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 35/509 [06:09<1:12:52, 9.23s/it]g-point operations will not be computed-02 06:42:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 35/509 [06:09<1:12:52, 9.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:42:59,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:01,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:59,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:04,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:59,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:04,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:42:59,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 36/509 [06:17<1:11:19, 9.05s/it]g-point operations will not be computed-02 06:42:59,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 36/509 [06:17<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:08,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:10,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:08,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:12,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:08,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:12,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:08,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 37/509 [06:26<1:10:01, 8.90s/it]g-point operations will not be computed-02 06:43:08,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 37/509 [06:26<1:10:01, 8.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:16,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:18,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:16,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:20,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:16,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:20,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:16,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 38/509 [06:34<1:08:32, 8.73s/it]g-point operations will not be computed-02 06:43:16,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 38/509 [06:34<1:08:32, 8.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:25,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:27,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:25,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:29,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:25,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:29,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:25,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 39/509 [06:42<1:06:33, 8.50s/it]g-point operations will not be computed-02 06:43:25,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 39/509 [06:42<1:06:33, 8.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:33,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:34,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:33,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:36,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:33,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:36,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:33,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 40/509 [06:50<1:04:26, 8.24s/it]g-point operations will not be computed-02 06:43:33,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 40/509 [06:50<1:04:26, 8.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:40,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:42,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:40,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:45,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:40,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:45,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:40,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 41/509 [06:57<1:02:02, 7.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:43:47,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:49,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:47,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:51,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:47,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:51,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:47,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 42/509 [07:04<59:16, 7.61s/it]g-point operations will not be computed-02 06:43:47,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:56,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:54,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:57,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:54,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:43:57,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:43:54,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 43/509 [07:10<56:03, 7.22s/it]g-point operations will not be computed-02 06:43:54,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:02,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:00,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:02,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:00,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:03,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3915, 'learning_rate': 8.599999999999999e-06, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:04,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 44/509 [07:16<52:27, 6.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:06,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:07,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:08,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:09,926 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 45/509 [07:21<48:39, 6.29s/it] + 9%|███████▏ | 45/509 [07:21<48:39, 6.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:12,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:14,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:14,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4925, 'learning_rate': 9e-06, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:17,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:17,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:20,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:20,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 48/509 [07:33<36:24, 4.74s/it]g-point operations will not be computed-02 06:44:11,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 48/509 [07:33<36:24, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 48/509 [07:33<36:24, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:25,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:27,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:27,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5262, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:34,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:34,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:52<48:35, 6.37s/it]g-point operations will not be computed-02 06:44:22,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:52<48:35, 6.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:52<48:35, 6.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:52<48:35, 6.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 51/509 [07:52<48:35, 6.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:51,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:51,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:44:51,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:00,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:00,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:00,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4211, 'learning_rate': 1.02e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:00,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:09,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:09,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:09,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:26<1:13:57, 9.75s/it]g-point operations will not be computed-02 06:44:43,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:26<1:13:57, 9.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:26<1:13:57, 9.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 54/509 [08:26<1:13:57, 9.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:26,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:26,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1811, 'learning_rate': 1.06e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:26,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:26,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:35,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:35,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:35,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3785, 'learning_rate': 1.08e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:43,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:45:43,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:00<1:21:11, 10.78s/it]g-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:00<1:21:11, 10.78s/it]g-point operations will not be computed-02 06:45:17,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:00<1:21:11, 10.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:00<1:21:11, 10.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 57/509 [09:00<1:21:11, 10.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:00,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:00,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5174, 'learning_rate': 1.1200000000000001e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:00,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:08,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:08,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:08,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.233, 'learning_rate': 1.1400000000000001e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:16,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:16,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 60/509 [09:34<1:22:19, 11.00s/it]g-point operations will not be computed-02 06:45:51,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 60/509 [09:34<1:22:19, 11.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:46:25,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:27,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:25,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:30,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:25,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:30,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:25,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:33,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:25,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:33,316 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:46:36,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:38,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:36,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:41,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:36,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:41,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:36,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:44,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:36,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:44,261 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:46:47,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:49,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:47,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:52,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:47,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:55,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:47,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:55,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:47,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:46:55,070 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:46:57,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:00,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:57,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:03,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:57,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:03,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:57,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:05,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:46:57,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:05,815 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:47:08,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:11,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:08,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:13,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:08,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:13,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:08,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:16,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:08,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:16,517 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:47:19,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:21,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:19,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:24,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:19,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:24,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:19,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:26,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:19,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:26,983 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:47:29,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:32,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:29,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:34,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:29,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:37,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:29,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:37,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:29,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:37,543 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:47:40,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:42,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:40,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:45,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:40,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:47,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:40,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:47,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:40,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:47,966 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:47:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:53,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:55,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:58,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:58,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:47:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:47:58,482 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:01,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:03,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:01,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:06,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:01,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:08,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:01,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:08,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:01,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:08,819 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:11,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:14,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:11,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:16,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:11,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:19,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:11,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:19,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:11,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:19,024 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:21,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:24,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:21,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:26,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:21,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:26,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:21,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:29,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:21,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:29,231 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:31,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:34,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:31,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:36,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:31,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:36,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:31,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:39,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:31,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:39,390 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:42,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:44,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:42,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:47,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:42,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:47,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:42,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:49,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:42,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:49,549 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:48:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:54,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:56,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:56,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:59,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:48:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:48:59,942 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:49:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:04,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:07,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:07,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:09,875 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:09,875 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:14,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:17,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:19,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:19,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:31<1:12:23, 10.05s/it]g-point operations will not be computed-02 06:49:12,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 77/509 [12:31<1:12:23, 10.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:49:22,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:24,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:22,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:26,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:22,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:29,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:22,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:29,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:22,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:29,270 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:49:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:34,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:36,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:38,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:38,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:38,747 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:43,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:45,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:48,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:48,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:00<1:09:25, 9.71s/it]g-point operations will not be computed-02 06:49:41,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 80/509 [13:00<1:09:25, 9.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:49:50,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:53,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:50,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:55,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:50,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:57,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:50,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:57,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:49:50,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:49:57,866 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:00,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:02,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:00,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:04,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:00,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:04,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:00,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:07,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:00,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:07,112 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:09,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:11,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:09,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:14,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:09,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:16,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:09,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:16,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:09,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:16,302 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:18,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:20,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:18,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:23,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:18,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:25,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:18,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:25,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:18,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:25,327 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:27,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:29,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:27,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:31,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:27,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:31,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:27,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:34,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:27,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:34,152 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:36,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:40,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:36,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:42,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:36,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:42,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:36,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 86/509 [13:54<1:03:29, 9.01s/it]g-point operations will not be computed-02 06:50:36,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 86/509 [13:54<1:03:29, 9.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:50:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:47,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:49,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:49,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:51,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:51,352 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:50:53,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:55,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:53,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:57,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:53,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:57,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:53,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:59,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:50:53,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:50:59,616 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:01,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:03,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:01,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:05,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:01,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:05,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:01,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:07,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:01,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:07,535 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:09,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:13,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:09,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:13,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:09,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 90/509 [14:26<57:14, 8.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:51:17,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:18,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:17,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:20,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:17,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:20,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:17,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:22,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:17,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:22,191 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:23,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:25,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:23,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:28,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:23,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:28,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:23,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:28,788 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:30,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:31,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:30,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:34,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:30,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:34,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:30,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:34,927 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:36,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:37,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:36,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:40,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:36,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:40,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:36,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:40,593 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:41,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:44,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:41,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:44,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:41,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:44,491 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:48,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:48,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:50,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:51:54,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 98/509 [15:09<32:43, 4.78s/it]g-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 98/509 [15:09<32:43, 4.78s/it]g-point operations will not be computed-02 06:51:46,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 98/509 [15:09<32:43, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:51:59,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:00,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:59,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:00,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:51:59,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:00,605 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:02,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:00,605 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:02,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:04,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:02,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:04,402 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:07,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:10,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:07,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:13,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:07,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:13,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:07,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:16,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:07,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:16,390 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:19,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:22,314 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:19,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:25,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:19,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:28,046 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:19,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:28,046 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:19,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:28,046 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:31,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:33,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:31,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:36,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:31,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:39,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:31,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:39,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:31,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:39,650 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:42,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:45,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:42,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:48,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:42,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:48,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:42,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:51,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:42,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:51,126 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:52:54,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:56,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:54,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:52:59,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:54,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:02,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:54,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:02,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:52:54,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:02,540 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:53:05,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:08,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:05,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:11,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:05,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:11,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:05,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:13,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:05,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:13,895 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:53:16,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:19,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:16,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:22,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:16,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:22,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:16,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:25,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:16,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:25,113 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:53:27,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:30,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:27,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:33,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:27,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:33,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:27,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:36,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:27,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:36,288 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:53:39,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:41,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:39,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:44,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:39,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:47,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:39,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:47,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:39,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:47,251 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:53:50,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:52,824 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:50,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:55,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:50,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:58,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:50,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:58,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:53:50,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:53:58,375 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:01,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:03,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:01,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:06,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:01,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:09,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:01,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:09,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:01,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:09,364 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:12,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:14,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:12,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:17,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:12,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:20,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:12,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:20,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:12,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:20,272 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:23,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:25,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:23,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:28,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:23,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:31,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:23,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:31,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:23,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:31,036 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:33,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:36,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:33,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:39,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:33,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:41,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:33,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:41,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:33,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:41,765 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:47,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:49,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:52,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:52,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:44,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:52,547 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:54:55,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:54:57,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:55,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:00,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:55,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:00,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:55,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:03,235 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:54:55,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:03,235 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:06,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:08,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:06,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:11,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:06,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:13,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:06,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:13,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:06,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:13,913 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:16,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:19,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:16,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:21,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:16,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:21,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:16,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:24,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:16,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:24,447 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:27,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:29,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:27,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:32,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:27,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:32,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:27,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:34,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:27,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:34,853 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:37,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:40,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:37,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:42,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:37,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:42,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:37,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:45,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:37,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:45,194 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:47,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:50,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:47,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:52,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:47,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:52,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:47,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:55,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:47,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:55:55,504 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:55:58,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:00,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:58,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:03,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:58,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:03,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:58,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:05,664 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:55:58,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:05,664 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:08,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:10,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:08,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:13,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:08,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:13,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:08,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:15,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:08,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:15,657 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:18,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:20,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:18,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:23,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:18,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:23,206 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:18,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:25,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:18,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:25,700 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:28,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:30,669 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:28,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:33,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:28,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:33,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:28,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:36,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:28,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:36,063 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:38,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:41,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:38,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:43,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:38,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:46,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:38,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:46,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:38,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:46,019 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:48,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:50,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:48,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:53,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:48,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:55,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:48,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:55,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:48,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:56:55,713 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:56:58,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:00,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:58,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:03,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:58,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:05,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:58,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:05,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:56:58,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:05,426 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:10,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:12,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:14,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:14,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 129/509 [20:26<1:02:04, 9.80s/it]g-point operations will not be computed-02 06:57:07,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 129/509 [20:26<1:02:04, 9.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:57:17,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:19,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:17,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:22,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:17,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:24,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:17,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:24,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:17,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:24,370 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:57:26,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:29,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:26,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:31,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:26,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:31,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:26,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:33,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:26,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:33,706 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:57:36,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:38,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:36,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:40,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:36,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:43,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:36,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:43,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:36,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:43,077 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:57:45,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:47,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:45,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:49,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:45,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:52,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:45,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:52,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:45,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:52,236 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:57:54,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:56,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:54,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:57:58,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:54,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:01,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:54,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:01,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:57:54,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:01,148 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:03,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:05,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:03,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:07,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:03,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:10,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:03,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:10,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:03,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:10,001 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:12,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:14,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:12,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:16,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:12,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:16,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:12,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:18,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:12,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:18,635 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:20,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:22,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:20,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:24,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:20,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:26,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:20,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:26,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:20,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:26,981 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:29,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:31,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:29,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:33,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:29,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:35,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:29,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:35,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:29,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 138/509 [21:46<53:13, 8.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:58:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:39,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:41,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:41,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:43,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:43,015 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:45,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:46,875 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:45,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:48,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:45,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:48,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:45,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:50,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:45,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:50,511 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:52,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:54,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:52,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:57,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:52,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:57,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:52,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:58:57,616 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:01,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:02,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:02,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:04,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:07,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:09,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:09,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:10,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:58:59,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:10,493 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:14,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:14,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:16,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:18,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:18,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:21,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:12,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:21,226 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:22,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:24,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:22,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:24,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:22,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:24,734 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:26,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:28,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:26,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:28,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:26,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:28,833 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:30,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:28,833 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:30,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:32,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:30,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 149/509 [22:47<24:57, 4.16s/it]g-point operations will not be computed-02 06:59:30,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 149/509 [22:47<24:57, 4.16s/it]g-point operations will not be computed-02 06:59:30,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 149/509 [22:47<24:57, 4.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:59:36,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 149/509 [22:47<24:57, 4.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 06:59:36,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:39,254 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:36,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:39,254 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:42,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:45,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:42,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:48,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:42,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:48,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:42,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:51,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:42,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:51,479 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 06:59:54,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 06:59:57,404 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:54,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:00,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:54,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:00,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:54,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:03,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 06:59:54,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:03,182 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:00:06,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:08,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:06,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:11,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:06,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:11,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:06,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:14,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:06,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:14,692 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:00:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:20,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:23,293 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:23,293 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:26,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:26,148 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:00:29,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:31,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:29,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:34,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:29,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:37,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:29,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:37,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:29,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:37,598 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:00:40,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:43,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:40,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:46,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:40,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:46,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:40,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:48,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:40,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:48,986 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:00:51,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:54,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:51,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:00:57,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:51,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:00,061 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:51,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:00,061 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:00:51,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:00,061 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:02,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:05,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:02,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:08,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:02,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:08,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:02,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:11,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:02,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:11,181 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:14,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:16,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:14,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:19,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:14,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:22,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:14,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:22,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:14,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:22,261 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:25,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:27,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:25,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:30,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:25,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:33,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:25,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:33,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:25,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:33,303 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:36,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:38,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:36,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:41,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:36,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:44,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:36,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:44,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:36,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:44,347 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:47,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:49,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:47,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:52,465 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:47,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:55,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:47,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:55,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:47,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:01:55,136 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:01:57,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:00,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:57,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:03,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:57,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:05,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:57,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:05,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:01:57,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:05,837 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:02:08,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:11,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:08,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:13,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:08,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:13,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:08,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:16,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:08,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:16,508 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:02:19,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:21,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:19,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:24,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:19,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:27,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:19,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:27,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:19,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:27,146 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:02:29,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:32,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:29,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:35,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:29,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:35,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:29,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:37,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:29,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:37,830 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:02:40,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:43,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:40,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:45,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:40,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:45,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:40,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:48,385 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:40,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:48,385 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:02:51,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:53,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:51,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:56,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:51,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:56,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:51,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:58,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:02:51,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:02:58,863 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:01,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:04,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:01,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:06,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:01,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:06,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:01,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:09,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:01,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:09,306 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:11,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:14,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:11,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:17,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:11,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:17,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:11,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:19,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:11,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:19,605 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:22,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:24,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:22,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:27,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:22,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:27,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:22,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:29,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:22,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:29,797 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:32,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:34,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:32,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:37,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:32,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:37,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:32,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:39,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:32,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:39,986 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:42,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:45,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:42,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:47,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:42,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:47,526 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:42,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:49,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:42,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:49,949 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:03:52,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:54,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:52,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:57,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:52,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:57,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:52,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:59,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:03:52,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:03:59,940 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:04:02,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:04,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:02,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:07,356 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:02,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:07,356 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:02,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:10,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:02,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:10,344 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:04:12,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:15,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:12,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:17,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:12,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:17,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:12,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:20,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:12,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:20,190 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:25,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:27,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:29,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:29,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 177/509 [27:41<55:15, 9.99s/it]g-point operations will not be computed-02 07:04:22,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 177/509 [27:41<55:15, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:04:32,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:34,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:32,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:37,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:32,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:37,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:32,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:32,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:39,507 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:04:42,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:44,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:42,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:46,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:42,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:46,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:42,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:49,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:42,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:49,103 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:04:51,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:53,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:51,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:56,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:51,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:58,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:51,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:58,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:04:51,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:04:58,457 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:00,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:03,130 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:00,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:05,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:00,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:05,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:00,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:07,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:00,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:07,758 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:10,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:12,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:10,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:14,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:10,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:16,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:10,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:16,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:10,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:16,982 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:19,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:21,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:19,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:23,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:19,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:23,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:19,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:25,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:19,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:25,902 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:28,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:30,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:28,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:32,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:28,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:32,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:28,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:34,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:28,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:34,677 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:36,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:39,061 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:36,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:41,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:36,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:41,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:36,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:43,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:36,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:43,286 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:45,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:47,642 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:45,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:49,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:45,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:49,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:45,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:51,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:45,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:51,840 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:05:54,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:56,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:54,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:58,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:54,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:05:58,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:54,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:00,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:05:54,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:00,107 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:02,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:04,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:02,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:06,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:02,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:06,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:02,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:02,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:08,161 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:10,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:12,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:10,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:14,052 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:10,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:14,052 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:10,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:15,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:10,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:15,942 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:19,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:21,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:21,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:23,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:23,332 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:25,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:28,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:25,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:28,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:25,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:30,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:25,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:30,227 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:31,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:33,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:31,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:33,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:31,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:36,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:31,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:36,764 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:38,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:39,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:38,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:39,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:38,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:41,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:38,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:41,449 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:44,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:45,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:44,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:45,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:44,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:48,503 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:44,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:48,503 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:52,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:52,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:53,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:55,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:55,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:06:58,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:00,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:00,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:02,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:03,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:05,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:05,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:08,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:10,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:10,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:11,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:06:49,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:11,696 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:17,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:20,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:23,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:23,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 201/509 [30:35<32:31, 6.34s/it]g-point operations will not be computed-02 07:07:14,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 201/509 [30:35<32:31, 6.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:07:26,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:29,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:26,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:32,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:26,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:35,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:26,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:35,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:26,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:35,670 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:07:38,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:41,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:38,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:44,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:38,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:44,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:38,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:47,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:38,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:47,257 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:07:50,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:53,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:50,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:55,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:50,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:58,642 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:50,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:58,642 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:07:50,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:07:58,642 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:08:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:04,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:07,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:10,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:10,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:10,050 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:15,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:18,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:21,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:21,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:33<53:20, 10.56s/it]g-point operations will not be computed-02 07:08:12,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 206/509 [31:33<53:20, 10.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:08:24,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:27,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:24,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:29,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:24,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:32,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:24,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:32,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:24,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:32,568 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:08:35,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:38,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:35,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:40,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:35,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:43,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:35,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:43,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:35,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:43,727 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:08:46,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:46,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:52,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:46,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:52,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:46,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:54,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:46,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:08:54,764 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:08:57,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:00,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:57,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:03,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:57,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:05,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:57,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:05,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:08:57,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:05,744 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:09:08,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:11,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:08,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:13,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:08,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:16,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:08,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:16,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:08,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:16,632 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:09:19,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:22,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:19,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:24,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:19,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:24,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:19,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:27,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:19,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:27,394 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:09:30,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:32,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:30,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:35,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:30,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:35,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:30,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:38,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:30,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:38,311 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:09:41,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:43,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:41,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:46,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:41,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:46,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:41,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:49,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:41,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:49,058 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:09:51,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:54,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:51,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:57,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:51,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:59,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:51,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:59,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:09:51,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:09:59,671 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:10:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:05,022 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:07,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:10,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:10,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:10,209 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:10:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:15,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:18,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:20,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:20,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:12,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:20,797 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:26,061 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:28,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:31,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:31,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [33:42<51:21, 10.59s/it]g-point operations will not be computed-02 07:10:23,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 218/509 [33:42<51:21, 10.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:10:33,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:36,406 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:33,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:38,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:33,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:41,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:33,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:41,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:33,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:41,536 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:10:44,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:46,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:44,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:49,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:44,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:51,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:44,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:51,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:44,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:51,843 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:10:54,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:56,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:54,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:10:59,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:54,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:01,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:54,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:01,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:10:54,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:01,985 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:04,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:07,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:04,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:09,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:04,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:12,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:04,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:12,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:04,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:12,027 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:14,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:17,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:14,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:19,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:14,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:22,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:14,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:22,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:14,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:22,129 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:24,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:27,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:24,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:29,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:24,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:24,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:24,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:32,103 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:34,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:37,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:34,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:39,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:34,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:42,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:34,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:42,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:34,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:42,559 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:47,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:50,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:50,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:52,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:52,512 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:11:55,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:57,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:55,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:59,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:55,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:11:59,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:55,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:02,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:11:55,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:02,371 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:07,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:09,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:12,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:12,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 228/509 [35:23<46:40, 9.97s/it]g-point operations will not be computed-02 07:12:04,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 228/509 [35:23<46:40, 9.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:12:14,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:16,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:14,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:19,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:14,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:19,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:14,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:21,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:14,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:21,707 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:12:24,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:26,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:24,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:28,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:24,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:28,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:24,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:31,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:24,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:31,315 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:36,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:38,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:40,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:40,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 231/509 [35:52<44:50, 9.68s/it]g-point operations will not be computed-02 07:12:33,751 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:45,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:43,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:47,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:43,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:47,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:43,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:50,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:43,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:50,092 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:12:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:54,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:57,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:59,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:59,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:12:52,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:12:59,433 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:01,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:04,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:01,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:06,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:01,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:08,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:01,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:08,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:01,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:08,486 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:10,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:12,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:10,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:15,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:10,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:17,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:10,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:17,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:10,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:17,332 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:19,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:21,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:19,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:23,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:19,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:23,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:19,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:26,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:19,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:26,077 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:28,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:30,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:28,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:32,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:28,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:32,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:28,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:32,495 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:36,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:38,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:36,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:40,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:36,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:42,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:36,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:42,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:36,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:42,746 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:44,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:46,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:44,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:48,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:44,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:50,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:44,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:50,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:44,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:50,689 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:13:52,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:54,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:52,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:56,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:52,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:58,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:52,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:58,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:13:52,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:13:58,236 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:00,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:01,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:00,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:03,757 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:00,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:03,757 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:00,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:05,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:00,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:05,513 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:07,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:10,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:07,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:12,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:07,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:12,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:07,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:12,300 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:14,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:15,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:14,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:15,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:14,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:18,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:14,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:18,620 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:20,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:21,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:20,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:21,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:20,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:24,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:20,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:24,512 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:28,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:28,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:29,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:32,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:32,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:34,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:36,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:36,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:38,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:26,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:38,021 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:40,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:38,021 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:40,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:40,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:40,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 249/509 [37:57<18:54, 4.36s/it]g-point operations will not be computed-02 07:14:40,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 249/509 [37:57<18:54, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:14:46,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 249/509 [37:57<18:54, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:14:46,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:48,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:46,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:48,876 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:14:52,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:55,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:52,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:58,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:52,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:14:58,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:52,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:00,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:14:52,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:00,954 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:15:03,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:06,792 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:03,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:09,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:03,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:12,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:03,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:12,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:03,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:12,523 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:15:15,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:18,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:15,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:21,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:15,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:21,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:15,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:24,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:15,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:24,001 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:15:26,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:29,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:26,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:32,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:26,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:35,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:26,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:35,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:26,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:35,321 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:15:38,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:41,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:38,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:43,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:38,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:46,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:38,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:46,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:38,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:46,559 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:15:49,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:52,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:49,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:54,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:49,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:57,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:49,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:57,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:15:49,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:15:57,729 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:00,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:03,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:00,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:06,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:00,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:08,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:00,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:08,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:00,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:08,806 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:11,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:14,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:11,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:17,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:11,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:17,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:11,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:19,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:11,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:19,893 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:22,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:25,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:22,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:28,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:22,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:28,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:22,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:30,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:22,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:30,861 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:33,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:36,389 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:33,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:39,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:33,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:39,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:33,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:41,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:33,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:41,851 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:44,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:47,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:44,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:49,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:44,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:49,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:44,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:52,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:44,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:52,624 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:16:55,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:16:58,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:55,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:00,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:55,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:00,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:55,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:03,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:16:55,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:03,370 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:06,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:08,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:06,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:11,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:06,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:14,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:06,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:14,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:06,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:14,040 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:16,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:19,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:16,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:22,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:16,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:22,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:16,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:24,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:16,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:24,822 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:27,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:30,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:27,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:32,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:27,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:32,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:27,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:35,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:27,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:35,446 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:38,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:40,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:38,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:43,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:38,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:43,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:38,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:46,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:38,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:46,017 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:48,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:51,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:48,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:53,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:48,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:56,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:48,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:56,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:48,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:17:56,512 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:17:59,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:01,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:59,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:04,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:59,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:07,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:59,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:07,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:17:59,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:07,007 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:18:09,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:12,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:09,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:14,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:09,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:17,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:09,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:17,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:09,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:17,332 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:18:19,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:22,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:19,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:25,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:19,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:27,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:19,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:27,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:19,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:27,668 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:18:30,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:32,840 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:30,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:35,314 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:30,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:37,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:30,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:37,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:30,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:37,843 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:18:40,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:42,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:40,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:45,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:40,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:47,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:40,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:47,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:40,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:47,919 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:18:50,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:53,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:50,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:55,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:50,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:55,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:50,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:57,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:18:50,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:18:57,972 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:03,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:05,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:07,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:07,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:19<39:39, 10.12s/it]g-point operations will not be computed-02 07:19:00,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 274/509 [42:19<39:39, 10.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:19:10,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:12,865 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:10,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:15,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:10,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:15,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:10,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:18,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:10,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:18,294 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:20,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:23,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:20,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:25,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:20,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:28,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:20,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:28,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:20,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:28,090 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:30,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:33,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:30,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:35,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:30,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:37,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:30,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:37,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:30,620 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:37,852 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:40,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:42,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:40,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:45,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:40,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:45,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:40,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:47,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:40,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:47,438 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:49,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:52,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:49,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:49,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:56,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:49,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:56,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:49,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:19:56,940 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:19:59,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:01,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:59,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:04,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:59,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:06,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:59,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:06,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:19:59,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:06,483 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:11,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:13,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:15,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:15,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:27<36:33, 9.62s/it]g-point operations will not be computed-02 07:20:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 281/509 [43:27<36:33, 9.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:20:18,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:20,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:18,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:22,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:18,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:25,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:18,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:25,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:18,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:25,237 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:20:27,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:29,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:27,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:32,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:27,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:34,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:27,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:34,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:27,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:34,352 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:20:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:38,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:41,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:43,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:43,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:43,219 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:20:45,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:47,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:45,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:49,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:45,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:52,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:45,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:52,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:45,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:52,019 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:20:54,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:56,357 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:54,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:58,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:54,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:20:58,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:54,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:00,669 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:20:54,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:00,669 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:02,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:04,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:02,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:07,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:02,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:09,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:02,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:09,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:02,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 287/509 [44:20<32:39, 8.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:21:11,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:13,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:11,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:15,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:11,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:17,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:11,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:17,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:11,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:17,378 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:19,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:21,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:19,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:23,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:19,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:23,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:19,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:25,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:19,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:25,154 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:27,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:28,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:27,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:30,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:27,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:30,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:27,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:32,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:27,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:32,477 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:34,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:37,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:34,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:37,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:34,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:39,416 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:34,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:39,416 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:41,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:42,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:41,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:42,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:41,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:44,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:41,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:44,424 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:47,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:49,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:47,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:49,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:47,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:50,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:47,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:50,584 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:53,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:54,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:53,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:54,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:53,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:57,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:53,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:21:57,327 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:21:58,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:58,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:00,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:21:58,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:00,965 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:03,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:05,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:03,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:05,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:03,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:05,306 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:07,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:05,306 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:07,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:09,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:07,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:09,204 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:11,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:09,204 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:11,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:12,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:11,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 299/509 [45:28<13:57, 3.99s/it]g-point operations will not be computed-02 07:22:11,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 299/509 [45:28<13:57, 3.99s/it]g-point operations will not be computed-02 07:22:11,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 299/509 [45:28<13:57, 3.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:22:17,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 299/509 [45:28<13:57, 3.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:22:17,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:19,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:17,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:19,297 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:22,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:25,503 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:22,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:28,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:22,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:28,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:22,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:31,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:22,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:31,348 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:34,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:37,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:34,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:39,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:34,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:42,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:34,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:42,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:34,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:42,766 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:45,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:48,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:45,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:51,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:45,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:51,374 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:45,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:54,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:45,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:54,236 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:22:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:22:59,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:02,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:05,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:05,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:22:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:05,565 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:23:08,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:11,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:08,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:14,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:08,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:14,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:08,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:16,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:08,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:16,860 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:23:19,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:22,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:19,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:25,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:19,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:28,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:19,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:28,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:19,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:28,131 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:23:31,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:33,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:31,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:36,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:31,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:36,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:31,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:39,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:31,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:39,343 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:23:42,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:44,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:42,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:47,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:42,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:50,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:42,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:50,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:42,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:50,409 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:23:53,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:55,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:53,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:23:58,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:53,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:01,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:53,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:01,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:23:53,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:01,432 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:04,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:07,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:04,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:09,770 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:04,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:12,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:04,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:12,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:04,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:12,424 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:15,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:17,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:15,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:20,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:15,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:23,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:15,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:23,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:15,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:23,337 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:26,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:28,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:26,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:31,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:26,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:34,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:26,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:34,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:26,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:34,121 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:36,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:39,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:36,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:42,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:36,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:42,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:36,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:44,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:36,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:44,819 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:47,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:50,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:47,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:47,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:52,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:47,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:55,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:47,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:24:55,502 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:24:58,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:00,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:58,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:03,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:58,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:03,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:58,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:06,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:24:58,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:06,312 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:25:09,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:11,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:09,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:14,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:09,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:14,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:09,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:16,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:09,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:16,788 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:25:19,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:22,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:19,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:24,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:19,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:24,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:19,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:27,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:19,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:27,224 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:25:29,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:32,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:29,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:35,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:29,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:37,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:29,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:37,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:29,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:37,601 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:25:40,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:40,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:45,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:40,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:47,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:40,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:47,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:40,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:47,958 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:25:50,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:53,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:50,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:55,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:50,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:58,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:50,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:58,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:25:50,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:25:58,114 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:00,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:03,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:00,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:05,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:00,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:08,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:00,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:08,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:00,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:08,283 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:13,425 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:15,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:18,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:18,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:18,410 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:20,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:23,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:20,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:25,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:20,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:28,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:20,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:28,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:20,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:28,368 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:30,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:33,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:30,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:35,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:30,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:38,408 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:30,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:38,408 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:30,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:38,408 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:41,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:43,408 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:41,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:45,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:41,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:48,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:41,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:48,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:41,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:48,813 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:26:51,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:53,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:51,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:56,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:51,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:58,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:51,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:58,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:26:51,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:26:58,630 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:01,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:03,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:01,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:05,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:01,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:05,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:01,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:08,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:01,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:08,361 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:10,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:13,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:10,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:15,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:10,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:15,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:10,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:17,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:10,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:17,958 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:22,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:25,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:25,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:27,618 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:27,618 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:30,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:32,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:30,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:34,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:30,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:34,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:30,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:37,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:30,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:37,187 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:39,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:41,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:39,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:44,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:39,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:44,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:39,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:46,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:39,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:46,650 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:49,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:51,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:49,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:53,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:49,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:53,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:49,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:56,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:49,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:27:56,056 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:27:58,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:00,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:58,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:02,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:58,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:05,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:58,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:05,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:27:58,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:05,180 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:07,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:09,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:07,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:11,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:07,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:11,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:07,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:14,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:07,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:14,064 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:16,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:18,552 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:16,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:20,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:16,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:20,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:16,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:22,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:16,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:22,921 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:25,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:27,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:25,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:29,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:25,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:31,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:25,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:31,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:25,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:31,726 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:33,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:36,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:33,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:40,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:33,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:40,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:33,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:40,262 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:42,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:44,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:42,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:46,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:42,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:48,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:42,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:48,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:42,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:48,719 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:50,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:52,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:50,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:54,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:50,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:56,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:50,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:56,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:50,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:28:56,867 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:28:58,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:00,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:58,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:02,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:58,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:02,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:58,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:04,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:28:58,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:04,625 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:08,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:10,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:10,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:11,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:15,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:17,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:17,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:18,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:06,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:18,787 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:22,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:22,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:25,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:20,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:25,064 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:26,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:29,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:26,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:29,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:26,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:30,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:26,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:30,823 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:32,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:34,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:32,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:34,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:32,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:34,736 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:38,320 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:38,320 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:40,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:42,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:42,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:44,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:47,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:47,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:37,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:47,270 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:49,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:47,270 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:29:49,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:29:51,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:49,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:06<10:17, 3.88s/it]g-point operations will not be computed-02 07:29:49,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:06<10:17, 3.88s/it]g-point operations will not be computed-02 07:29:49,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 350/509 [53:06<10:17, 3.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:29:57,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:00,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:57,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:03,618 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:57,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:06,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:57,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:06,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:29:57,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:06,598 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:30:09,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:12,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:09,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:15,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:09,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:18,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:09,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:18,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:09,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:18,265 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:30:21,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:24,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:21,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:26,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:21,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:26,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:21,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:21,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:29,788 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:30:32,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:35,610 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:32,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:38,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:32,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:41,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:32,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:41,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:32,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:41,185 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:46,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:49,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:52,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:52,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:04<26:11, 10.20s/it]g-point operations will not be computed-02 07:30:44,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 355/509 [54:04<26:11, 10.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:30:55,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:30:58,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:55,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:00,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:55,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:03,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:55,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:03,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:30:55,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:03,677 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:31:06,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:09,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:06,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:12,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:06,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:14,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:06,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:14,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:06,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:14,853 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:31:17,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:20,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:17,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:23,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:17,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:25,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:17,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:25,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:17,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:25,970 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:31:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:31,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:34,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:36,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:36,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:36,944 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:42,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:45,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:48,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:48,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [54:59<27:10, 10.94s/it]g-point operations will not be computed-02 07:31:39,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 360/509 [54:59<27:10, 10.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:31:50,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:53,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:50,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:56,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:50,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:56,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:50,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:58,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:31:50,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:31:58,811 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:32:01,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:04,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:01,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:06,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:01,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:06,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:01,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:09,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:01,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:09,691 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:32:12,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:15,198 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:12,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:17,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:12,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:20,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:12,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:20,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:12,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:20,459 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:25,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:28,456 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:31,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:31,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [55:42<26:00, 10.76s/it]g-point operations will not be computed-02 07:32:23,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 364/509 [55:42<26:00, 10.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:36,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:41,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:41,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [55:53<25:38, 10.68s/it]g-point operations will not be computed-02 07:32:33,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 365/509 [55:53<25:38, 10.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:32:44,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:46,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:44,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:49,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:44,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:49,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:44,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:52,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:44,217 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:52,017 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:32:54,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:57,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:54,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:59,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:54,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:32:59,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:54,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:02,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:32:54,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:02,391 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:05,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:07,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:05,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:10,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:05,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:12,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:05,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:12,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:05,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:12,718 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:15,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:17,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:15,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:20,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:15,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:23,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:15,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:23,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:15,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:23,068 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:25,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:28,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:25,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:30,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:25,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:33,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:25,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:33,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:25,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:33,312 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:35,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:38,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:35,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:40,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:35,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:43,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:35,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:43,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:35,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:43,438 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:46,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:48,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:46,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:51,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:46,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:46,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:46,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:53,536 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:33:56,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:33:58,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:56,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:01,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:56,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:01,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:56,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:03,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:33:56,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:03,604 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:06,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:08,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:06,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:11,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:06,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:13,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:06,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:13,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:06,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:13,560 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:16,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:18,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:16,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:21,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:16,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:24,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:16,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:24,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:16,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:24,000 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:26,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:28,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:26,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:31,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:26,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:33,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:26,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:33,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:26,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:33,787 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:36,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:38,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:36,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:41,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:36,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:41,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:36,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:43,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:36,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:43,580 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:46,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:48,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:46,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:50,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:46,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:50,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:46,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:53,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:46,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:53,243 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:34:55,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:34:58,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:55,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:00,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:55,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:02,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:55,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:02,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:34:55,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:02,812 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:35:05,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:07,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:05,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:10,032 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:05,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:12,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:05,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:12,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:05,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:12,345 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:17,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:19,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:21,762 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:21,762 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [58:33<20:32, 9.63s/it]g-point operations will not be computed-02 07:35:14,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 381/509 [58:33<20:32, 9.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:35:24,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:26,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:24,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:28,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:24,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:31,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:24,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:31,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:24,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:31,228 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:35:33,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:35,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:33,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:38,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:33,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:40,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:33,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:40,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:33,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:40,461 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:35:42,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:45,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:42,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:47,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:42,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:49,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:42,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:49,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:42,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:49,523 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:35:51,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:54,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:51,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:54,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:51,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:58,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:35:51,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:35:58,454 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:00,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:02,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:00,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:05,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:00,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:07,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:00,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:07,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:00,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:07,292 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:09,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:11,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:09,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:13,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:09,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:15,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:09,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:15,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:09,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:15,905 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:18,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:20,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:18,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:22,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:18,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:22,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:18,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:24,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:18,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:24,305 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:28,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:30,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:30,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:32,397 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:32,397 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:34,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:36,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:34,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:38,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:34,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:38,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:34,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:40,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:34,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:40,292 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:42,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:44,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:42,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:46,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:42,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:46,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:42,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:47,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:42,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:47,830 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:49,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:51,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:49,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:51,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:49,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:54,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:49,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:54,827 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:58,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:59,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:36:59,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:01,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:04,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:04,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:05,706 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:36:56,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 394/509 [1:00:18<13:15, 6.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:37:08,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:09,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:08,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:09,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:08,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:12,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:08,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:12,413 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:13,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:12,413 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:13,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:16,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:13,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:16,033 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:18,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:20,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:18,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:20,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:18,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 397/509 [1:00:33<10:09, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:37:22,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 397/509 [1:00:33<10:09, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:37:22,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:24,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:22,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:24,359 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:26,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:24,359 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:26,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:28,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:26,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 400/509 [1:00:43<07:12, 3.96s/it]g-point operations will not be computed-02 07:37:26,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 400/509 [1:00:43<07:12, 3.96s/it]g-point operations will not be computed-02 07:37:26,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 400/509 [1:00:43<07:12, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:37:34,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:37,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:34,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:40,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:34,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:43,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:34,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:43,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:34,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:43,228 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:46,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:48,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:46,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:51,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:46,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:54,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:46,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:54,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:46,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:37:54,722 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:37:57,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:00,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:57,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:03,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:57,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:03,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:57,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:06,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:37:57,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:06,126 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:38:08,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:08,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:14,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:08,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:14,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:08,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:17,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:08,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:17,446 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:38:20,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:23,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:20,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:20,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:20,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:28,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:20,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:28,737 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:38:31,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:34,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:31,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:37,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:31,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:37,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:31,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:39,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:31,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:39,960 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:38:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:45,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:48,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:51,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:51,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:42,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:51,107 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:38:54,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:56,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:54,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:59,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:54,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:38:59,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:54,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:02,234 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:38:54,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:02,234 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:05,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:07,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:05,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:10,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:05,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:13,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:05,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:13,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:05,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:13,271 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:18,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:21,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:21,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:24,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:16,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:24,290 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:27,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:29,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:27,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:32,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:27,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:32,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:27,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:35,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:27,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:35,150 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:37,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:40,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:37,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:43,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:37,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:45,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:37,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:45,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:37,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:45,952 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:48,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:51,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:48,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:53,963 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:48,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:56,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:48,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:56,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:48,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:39:56,594 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:39:59,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:02,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:59,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:04,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:59,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:07,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:59,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:07,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:39:59,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:07,285 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:40:10,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:12,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:10,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:15,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:10,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:15,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:10,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:17,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:10,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:17,891 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:40:20,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:23,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:20,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:25,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:20,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:28,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:20,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:28,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:20,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:28,500 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:40:31,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:33,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:31,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:36,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:31,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:39,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:31,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:39,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:31,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:39,103 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:40:41,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:44,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:41,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:47,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:41,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:49,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:41,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:49,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:41,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:49,580 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:40:52,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:54,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:52,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:57,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:52,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:59,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:52,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:59,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:40:52,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:40:59,924 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:02,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:05,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:02,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:07,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:02,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:10,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:02,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:10,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:02,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:10,261 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:12,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:15,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:12,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:17,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:12,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:17,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:12,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:20,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:12,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:20,534 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:23,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:25,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:23,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:28,314 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:23,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:28,314 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:23,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:30,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:23,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:30,871 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:33,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:35,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:33,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:38,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:33,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:38,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:33,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:41,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:33,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:41,055 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:43,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:46,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:43,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:48,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:43,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:48,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:43,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:51,130 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:43,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:51,130 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:41:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:56,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:58,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:41:58,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:01,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:41:53,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:01,730 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:04,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:06,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:04,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:09,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:04,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:09,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:04,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:11,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:04,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:11,801 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:14,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:16,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:14,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:19,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:14,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:19,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:14,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:21,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:14,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:21,615 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:24,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:26,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:24,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:28,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:24,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:31,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:24,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:31,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:24,146 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:31,251 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:36,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:38,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:38,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:40,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:40,768 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:43,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:45,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:43,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:47,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:43,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:47,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:43,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:50,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:43,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:50,315 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:42:52,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:55,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:52,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:57,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:52,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:59,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:52,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:59,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:42:52,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:42:59,772 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:04,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:06,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:06,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:09,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:02,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:09,082 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:13,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:16,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:18,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:18,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:11,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:18,350 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:20,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:22,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:20,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:25,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:20,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:25,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:20,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:27,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:20,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:27,346 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:31,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:34,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:36,230 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:36,230 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:29,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:36,230 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:38,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:40,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:38,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:42,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:38,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:44,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:38,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:44,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:38,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:44,980 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:47,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:49,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:47,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:51,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:47,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:51,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:47,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:53,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:47,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:53,517 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:43:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:57,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:59,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:43:59,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:01,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:43:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:01,766 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:03,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:05,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:03,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:07,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:03,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:07,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:03,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:09,824 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:03,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:09,824 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:11,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:13,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:11,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:15,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:11,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:15,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:11,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:17,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:11,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:17,489 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:19,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:21,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:19,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:23,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:19,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:23,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:19,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:24,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:19,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:24,859 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:26,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:30,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:26,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:30,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:26,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:31,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:26,708 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:31,870 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:35,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:35,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:36,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:36,996 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:40,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:41,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:40,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:41,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:40,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:43,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:40,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:43,210 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:46,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:47,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:46,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:47,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:46,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:50,076 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:46,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:50,076 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:51,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:53,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:51,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:53,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:51,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:53,831 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:44:56,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:57,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:56,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:44:57,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:44:56,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 447/509 [1:08:10<05:39, 5.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:45:00,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:01,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:45:00,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:01,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:45:00,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:01,982 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:45:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:01,982 >> Could not estimate the number of tokens of the input, floatin[WARNING|modeling_utils.py:388] 2022-03-02 07:45:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:05,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:45:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:07,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:45:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:45:07,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:45:03,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.61 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.61 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.61 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt b/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_063647-bmivw6vv/files/wandb-metadata.json b/wandb/run-20220302_063647-bmivw6vv/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..35e0d5268d1948d211ee1eaf8817d59b514f6f1b --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T06:36:48.207194", + "startedAt": "2022-03-02T06:36:47.139630", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=4", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json b/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..108957f204b037b0189e166a36525843c7c9de7e --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 4.25, "train/learning_rate": 8.960000000000001e-05, "train/epoch": 0.88, "train/global_step": 450, "_runtime": 4102, "_timestamp": 1646207109, "_step": 449, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 66.0, 572.0, 347.0, 29.0], "bins": [-787.8201904296875, -775.0996704101562, -762.379150390625, -749.65869140625, -736.9381713867188, -724.2176513671875, -711.4971923828125, -698.7766723632812, -686.05615234375, -673.3356323242188, -660.6151123046875, -647.8946533203125, -635.1741333007812, -622.45361328125, -609.733154296875, -597.0126342773438, -584.2921142578125, -571.5715942382812, -558.85107421875, -546.130615234375, -533.4100952148438, -520.6895751953125, -507.9690856933594, -495.24859619140625, -482.528076171875, -469.80755615234375, -457.0870666503906, -444.3665771484375, -431.64605712890625, -418.925537109375, -406.2050476074219, -393.48455810546875, -380.7640686035156, -368.0435791015625, -355.32305908203125, -342.6025390625, -329.8820495605469, -317.16156005859375, -304.4410400390625, -291.72052001953125, -279.0000305175781, -266.279541015625, -253.55902099609375, -240.83851623535156, -228.11801147460938, -215.3975067138672, -202.677001953125, -189.9564971923828, -177.23597717285156, -164.51547241210938, -151.7949676513672, -139.074462890625, -126.35395812988281, -113.63345336914062, -100.91294860839844, -88.19244384765625, -75.47193908691406, -62.751434326171875, -50.03092956542969, -37.3104248046875, -24.589920043945312, -11.869415283203125, 0.8510894775390625, 13.57159423828125, 26.292097091674805]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 6.0, 5.0, 6.0, 17.0, 20.0, 24.0, 21.0, 23.0, 24.0, 38.0, 26.0, 40.0, 46.0, 50.0, 47.0, 52.0, 47.0, 54.0, 46.0, 42.0, 34.0, 51.0, 38.0, 40.0, 26.0, 30.0, 21.0, 22.0, 13.0, 22.0, 12.0, 10.0, 11.0, 7.0, 11.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-106.69377136230469, -103.13261413574219, -99.57145690917969, -96.01029968261719, -92.44914245605469, -88.88798522949219, -85.32682800292969, -81.76567077636719, -78.20451354980469, -74.64335632324219, -71.08219909667969, -67.52104187011719, -63.95988464355469, -60.39872741699219, -56.83757019042969, -53.27641296386719, -49.71525192260742, -46.15409469604492, -42.59293746948242, -39.03178024291992, -35.47062301635742, -31.90946388244629, -28.34830665588379, -24.78714942932129, -21.22599220275879, -17.66483497619629, -14.103677749633789, -10.542519569396973, -6.981362342834473, -3.4202041625976562, 0.14095306396484375, 3.7021102905273438, 7.263267517089844, 10.824424743652344, 14.385581970214844, 17.946739196777344, 21.507896423339844, 25.069055557250977, 28.630212783813477, 32.191368103027344, 35.752525329589844, 39.313682556152344, 42.874839782714844, 46.435997009277344, 49.997154235839844, 53.558311462402344, 57.119468688964844, 60.680625915527344, 64.24179077148438, 67.80294799804688, 71.36410522460938, 74.92526245117188, 78.48641967773438, 82.04757690429688, 85.60873413085938, 89.16989135742188, 92.73104858398438, 96.29220581054688, 99.85336303710938, 103.41452026367188, 106.97567749023438, 110.53683471679688, 114.09799194335938, 117.65914916992188, 121.22030639648438]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 5.0, 4.0, 4.0, 4.0, 6.0, 14.0, 20.0, 14.0, 17.0, 26.0, 13.0, 22.0, 36.0, 26.0, 34.0, 34.0, 45.0, 39.0, 42.0, 50.0, 63.0, 50.0, 35.0, 49.0, 52.0, 39.0, 34.0, 33.0, 30.0, 26.0, 25.0, 22.0, 14.0, 20.0, 10.0, 15.0, 10.0, 13.0, 6.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-5.578125, -5.422119140625, -5.26611328125, -5.110107421875, -4.9541015625, -4.798095703125, -4.64208984375, -4.486083984375, -4.330078125, -4.174072265625, -4.01806640625, -3.862060546875, -3.7060546875, -3.550048828125, -3.39404296875, -3.238037109375, -3.08203125, -2.926025390625, -2.77001953125, -2.614013671875, -2.4580078125, -2.302001953125, -2.14599609375, -1.989990234375, -1.833984375, -1.677978515625, -1.52197265625, -1.365966796875, -1.2099609375, -1.053955078125, -0.89794921875, -0.741943359375, -0.5859375, -0.429931640625, -0.27392578125, -0.117919921875, 0.0380859375, 0.194091796875, 0.35009765625, 0.506103515625, 0.662109375, 0.818115234375, 0.97412109375, 1.130126953125, 1.2861328125, 1.442138671875, 1.59814453125, 1.754150390625, 1.91015625, 2.066162109375, 2.22216796875, 2.378173828125, 2.5341796875, 2.690185546875, 2.84619140625, 3.002197265625, 3.158203125, 3.314208984375, 3.47021484375, 3.626220703125, 3.7822265625, 3.938232421875, 4.09423828125, 4.250244140625, 4.40625]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 1.0, 4.0, 3.0, 10.0, 3.0, 10.0, 13.0, 20.0, 31.0, 28.0, 42.0, 83.0, 109.0, 141.0, 232.0, 367.0, 623.0, 1057.0, 1983.0, 4405.0, 12095.0, 51669.0, 489014.0, 3157381.0, 409059.0, 46142.0, 10964.0, 4160.0, 1936.0, 1010.0, 602.0, 320.0, 232.0, 163.0, 114.0, 74.0, 56.0, 40.0, 31.0, 18.0, 18.0, 13.0, 5.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.359375, -19.6181640625, -18.876953125, -18.1357421875, -17.39453125, -16.6533203125, -15.912109375, -15.1708984375, -14.4296875, -13.6884765625, -12.947265625, -12.2060546875, -11.46484375, -10.7236328125, -9.982421875, -9.2412109375, -8.5, -7.7587890625, -7.017578125, -6.2763671875, -5.53515625, -4.7939453125, -4.052734375, -3.3115234375, -2.5703125, -1.8291015625, -1.087890625, -0.3466796875, 0.39453125, 1.1357421875, 1.876953125, 2.6181640625, 3.359375, 4.1005859375, 4.841796875, 5.5830078125, 6.32421875, 7.0654296875, 7.806640625, 8.5478515625, 9.2890625, 10.0302734375, 10.771484375, 11.5126953125, 12.25390625, 12.9951171875, 13.736328125, 14.4775390625, 15.21875, 15.9599609375, 16.701171875, 17.4423828125, 18.18359375, 18.9248046875, 19.666015625, 20.4072265625, 21.1484375, 21.8896484375, 22.630859375, 23.3720703125, 24.11328125, 24.8544921875, 25.595703125, 26.3369140625, 27.078125]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 6.0, 9.0, 17.0, 36.0, 70.0, 89.0, 129.0, 199.0, 353.0, 540.0, 666.0, 633.0, 442.0, 290.0, 213.0, 145.0, 78.0, 44.0, 39.0, 25.0, 13.0, 8.0, 9.0, 12.0, 5.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.703125, -26.8828125, -26.0625, -25.2421875, -24.421875, -23.6015625, -22.78125, -21.9609375, -21.140625, -20.3203125, -19.5, -18.6796875, -17.859375, -17.0390625, -16.21875, -15.3984375, -14.578125, -13.7578125, -12.9375, -12.1171875, -11.296875, -10.4765625, -9.65625, -8.8359375, -8.015625, -7.1953125, -6.375, -5.5546875, -4.734375, -3.9140625, -3.09375, -2.2734375, -1.453125, -0.6328125, 0.1875, 1.0078125, 1.828125, 2.6484375, 3.46875, 4.2890625, 5.109375, 5.9296875, 6.75, 7.5703125, 8.390625, 9.2109375, 10.03125, 10.8515625, 11.671875, 12.4921875, 13.3125, 14.1328125, 14.953125, 15.7734375, 16.59375, 17.4140625, 18.234375, 19.0546875, 19.875, 20.6953125, 21.515625, 22.3359375, 23.15625, 23.9765625, 24.796875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 4.0, 5.0, 6.0, 3.0, 6.0, 16.0, 30.0, 33.0, 69.0, 132.0, 205.0, 358.0, 681.0, 1499.0, 6302.0, 356888.0, 3780150.0, 42594.0, 2923.0, 1053.0, 539.0, 285.0, 191.0, 112.0, 64.0, 35.0, 22.0, 25.0, 14.0, 13.0, 10.0, 6.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-82.3125, -79.763671875, -77.21484375, -74.666015625, -72.1171875, -69.568359375, -67.01953125, -64.470703125, -61.921875, -59.373046875, -56.82421875, -54.275390625, -51.7265625, -49.177734375, -46.62890625, -44.080078125, -41.53125, -38.982421875, -36.43359375, -33.884765625, -31.3359375, -28.787109375, -26.23828125, -23.689453125, -21.140625, -18.591796875, -16.04296875, -13.494140625, -10.9453125, -8.396484375, -5.84765625, -3.298828125, -0.75, 1.798828125, 4.34765625, 6.896484375, 9.4453125, 11.994140625, 14.54296875, 17.091796875, 19.640625, 22.189453125, 24.73828125, 27.287109375, 29.8359375, 32.384765625, 34.93359375, 37.482421875, 40.03125, 42.580078125, 45.12890625, 47.677734375, 50.2265625, 52.775390625, 55.32421875, 57.873046875, 60.421875, 62.970703125, 65.51953125, 68.068359375, 70.6171875, 73.166015625, 75.71484375, 78.263671875, 80.8125]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 32.0, 662.0, 303.0, 15.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-717.9820556640625, -702.9147338867188, -687.8473510742188, -672.780029296875, -657.712646484375, -642.6453247070312, -627.5779418945312, -612.5106201171875, -597.4432373046875, -582.3759155273438, -567.3085327148438, -552.2412109375, -537.173828125, -522.1065063476562, -507.0391540527344, -491.9718017578125, -476.90447998046875, -461.8371276855469, -446.769775390625, -431.7024230957031, -416.63507080078125, -401.5677490234375, -386.5003967285156, -371.43304443359375, -356.3656921386719, -341.29833984375, -326.2309875488281, -311.16363525390625, -296.0963134765625, -281.0289306640625, -265.96160888671875, -250.89425659179688, -235.82688903808594, -220.75953674316406, -205.69219970703125, -190.62484741210938, -175.5574951171875, -160.49014282226562, -145.42279052734375, -130.35545349121094, -115.28810119628906, -100.22074890136719, -85.15340423583984, -70.0860595703125, -55.018707275390625, -39.95135498046875, -24.884010314941406, -9.816665649414062, 5.2506866455078125, 20.318035125732422, 35.38538360595703, 50.45273208618164, 65.52008056640625, 80.58743286132812, 95.65477752685547, 110.72212219238281, 125.78947448730469, 140.85682678222656, 155.92416381835938, 170.99151611328125, 186.05886840820312, 201.126220703125, 216.19357299804688, 231.2609100341797, 246.32826232910156]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 6.0, 4.0, 7.0, 9.0, 9.0, 9.0, 19.0, 23.0, 22.0, 34.0, 32.0, 33.0, 36.0, 47.0, 53.0, 40.0, 60.0, 58.0, 71.0, 56.0, 41.0, 51.0, 40.0, 33.0, 41.0, 32.0, 31.0, 23.0, 17.0, 15.0, 15.0, 6.0, 11.0, 5.0, 7.0, 3.0, 1.0, 3.0, 2.0, 1.0, 2.0], "bins": [-140.79763793945312, -137.32191467285156, -133.84619140625, -130.37046813964844, -126.89473724365234, -123.41901397705078, -119.94328308105469, -116.46755981445312, -112.99183654785156, -109.51611328125, -106.04039001464844, -102.56465911865234, -99.08893585205078, -95.61321258544922, -92.13748168945312, -88.66175842285156, -85.18603515625, -81.71031188964844, -78.23458862304688, -74.75885772705078, -71.28313446044922, -67.80741119384766, -64.33168029785156, -60.85595703125, -57.38023376464844, -53.904510498046875, -50.42878341674805, -46.95305633544922, -43.477333068847656, -40.001609802246094, -36.525882720947266, -33.05015563964844, -29.574424743652344, -26.09869956970215, -22.622974395751953, -19.147249221801758, -15.671524047851562, -12.195798873901367, -8.720073699951172, -5.244348526000977, -1.7686233520507812, 1.707101821899414, 5.182826995849609, 8.658552169799805, 12.13427734375, 15.610002517700195, 19.08572769165039, 22.561452865600586, 26.03717803955078, 29.512903213500977, 32.98862838745117, 36.46435546875, 39.94007873535156, 43.415802001953125, 46.89152908325195, 50.36725616455078, 53.842979431152344, 57.318702697753906, 60.794429779052734, 64.27015686035156, 67.74588012695312, 71.22160339355469, 74.69732666015625, 78.17305755615234, 81.6487808227539]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 6.0, 5.0, 6.0, 9.0, 12.0, 12.0, 8.0, 14.0, 15.0, 15.0, 17.0, 22.0, 22.0, 23.0, 23.0, 27.0, 42.0, 36.0, 29.0, 42.0, 35.0, 40.0, 37.0, 43.0, 39.0, 43.0, 29.0, 35.0, 45.0, 21.0, 36.0, 32.0, 19.0, 21.0, 17.0, 21.0, 16.0, 9.0, 16.0, 10.0, 13.0, 12.0, 9.0, 5.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-4.79296875, -4.6435546875, -4.494140625, -4.3447265625, -4.1953125, -4.0458984375, -3.896484375, -3.7470703125, -3.59765625, -3.4482421875, -3.298828125, -3.1494140625, -3.0, -2.8505859375, -2.701171875, -2.5517578125, -2.40234375, -2.2529296875, -2.103515625, -1.9541015625, -1.8046875, -1.6552734375, -1.505859375, -1.3564453125, -1.20703125, -1.0576171875, -0.908203125, -0.7587890625, -0.609375, -0.4599609375, -0.310546875, -0.1611328125, -0.01171875, 0.1376953125, 0.287109375, 0.4365234375, 0.5859375, 0.7353515625, 0.884765625, 1.0341796875, 1.18359375, 1.3330078125, 1.482421875, 1.6318359375, 1.78125, 1.9306640625, 2.080078125, 2.2294921875, 2.37890625, 2.5283203125, 2.677734375, 2.8271484375, 2.9765625, 3.1259765625, 3.275390625, 3.4248046875, 3.57421875, 3.7236328125, 3.873046875, 4.0224609375, 4.171875, 4.3212890625, 4.470703125, 4.6201171875, 4.76953125]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 4.0, 1.0, 8.0, 12.0, 21.0, 27.0, 33.0, 62.0, 72.0, 124.0, 175.0, 245.0, 384.0, 537.0, 779.0, 1127.0, 1624.0, 2281.0, 3413.0, 5002.0, 7504.0, 11191.0, 16773.0, 25306.0, 38662.0, 58856.0, 87974.0, 124445.0, 161289.0, 152065.0, 114636.0, 78416.0, 52444.0, 34325.0, 22672.0, 14955.0, 9973.0, 6708.0, 4491.0, 3123.0, 2065.0, 1486.0, 1004.0, 724.0, 531.0, 313.0, 239.0, 157.0, 97.0, 69.0, 51.0, 34.0, 27.0, 9.0, 9.0, 5.0, 6.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.357421875, -0.3458404541015625, -0.334259033203125, -0.3226776123046875, -0.31109619140625, -0.2995147705078125, -0.287933349609375, -0.2763519287109375, -0.2647705078125, -0.2531890869140625, -0.241607666015625, -0.2300262451171875, -0.21844482421875, -0.2068634033203125, -0.195281982421875, -0.1837005615234375, -0.172119140625, -0.1605377197265625, -0.148956298828125, -0.1373748779296875, -0.12579345703125, -0.1142120361328125, -0.102630615234375, -0.0910491943359375, -0.0794677734375, -0.0678863525390625, -0.056304931640625, -0.0447235107421875, -0.03314208984375, -0.0215606689453125, -0.009979248046875, 0.0016021728515625, 0.01318359375, 0.0247650146484375, 0.036346435546875, 0.0479278564453125, 0.05950927734375, 0.0710906982421875, 0.082672119140625, 0.0942535400390625, 0.1058349609375, 0.1174163818359375, 0.128997802734375, 0.1405792236328125, 0.15216064453125, 0.1637420654296875, 0.175323486328125, 0.1869049072265625, 0.198486328125, 0.2100677490234375, 0.221649169921875, 0.2332305908203125, 0.24481201171875, 0.2563934326171875, 0.267974853515625, 0.2795562744140625, 0.2911376953125, 0.3027191162109375, 0.314300537109375, 0.3258819580078125, 0.33746337890625, 0.3490447998046875, 0.360626220703125, 0.3722076416015625, 0.3837890625]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 1.0, 1.0, 7.0, 6.0, 2.0, 5.0, 11.0, 10.0, 9.0, 10.0, 16.0, 20.0, 19.0, 25.0, 28.0, 23.0, 26.0, 33.0, 37.0, 41.0, 44.0, 48.0, 33.0, 1056.0, 42.0, 41.0, 39.0, 40.0, 34.0, 35.0, 29.0, 35.0, 35.0, 23.0, 11.0, 21.0, 22.0, 21.0, 15.0, 12.0, 9.0, 11.0, 12.0, 4.0, 6.0, 6.0, 7.0, 3.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.083984375, -2.984283447265625, -2.88458251953125, -2.784881591796875, -2.6851806640625, -2.585479736328125, -2.48577880859375, -2.386077880859375, -2.286376953125, -2.186676025390625, -2.08697509765625, -1.987274169921875, -1.8875732421875, -1.787872314453125, -1.68817138671875, -1.588470458984375, -1.48876953125, -1.389068603515625, -1.28936767578125, -1.189666748046875, -1.0899658203125, -0.990264892578125, -0.89056396484375, -0.790863037109375, -0.691162109375, -0.591461181640625, -0.49176025390625, -0.392059326171875, -0.2923583984375, -0.192657470703125, -0.09295654296875, 0.006744384765625, 0.1064453125, 0.206146240234375, 0.30584716796875, 0.405548095703125, 0.5052490234375, 0.604949951171875, 0.70465087890625, 0.804351806640625, 0.904052734375, 1.003753662109375, 1.10345458984375, 1.203155517578125, 1.3028564453125, 1.402557373046875, 1.50225830078125, 1.601959228515625, 1.70166015625, 1.801361083984375, 1.90106201171875, 2.000762939453125, 2.1004638671875, 2.200164794921875, 2.29986572265625, 2.399566650390625, 2.499267578125, 2.598968505859375, 2.69866943359375, 2.798370361328125, 2.8980712890625, 2.997772216796875, 3.09747314453125, 3.197174072265625, 3.296875]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 6.0, 9.0, 7.0, 19.0, 33.0, 42.0, 65.0, 93.0, 152.0, 196.0, 317.0, 495.0, 712.0, 1139.0, 1678.0, 2633.0, 3947.0, 6174.0, 9022.0, 14139.0, 21474.0, 32713.0, 48330.0, 71075.0, 99637.0, 132923.0, 1178445.0, 142052.0, 103663.0, 74837.0, 51194.0, 34137.0, 22345.0, 15031.0, 9801.0, 6278.0, 4184.0, 2781.0, 1801.0, 1193.0, 786.0, 571.0, 337.0, 230.0, 159.0, 86.0, 65.0, 45.0, 29.0, 22.0, 18.0, 8.0, 7.0, 2.0, 2.0, 3.0], "bins": [-0.276611328125, -0.26861000061035156, -0.2606086730957031, -0.2526073455810547, -0.24460601806640625, -0.2366046905517578, -0.22860336303710938, -0.22060203552246094, -0.2126007080078125, -0.20459938049316406, -0.19659805297851562, -0.1885967254638672, -0.18059539794921875, -0.1725940704345703, -0.16459274291992188, -0.15659141540527344, -0.148590087890625, -0.14058876037597656, -0.13258743286132812, -0.12458610534667969, -0.11658477783203125, -0.10858345031738281, -0.10058212280273438, -0.09258079528808594, -0.0845794677734375, -0.07657814025878906, -0.06857681274414062, -0.06057548522949219, -0.05257415771484375, -0.04457283020019531, -0.036571502685546875, -0.028570175170898438, -0.02056884765625, -0.012567520141601562, -0.004566192626953125, 0.0034351348876953125, 0.01143646240234375, 0.019437789916992188, 0.027439117431640625, 0.03544044494628906, 0.0434417724609375, 0.05144309997558594, 0.059444427490234375, 0.06744575500488281, 0.07544708251953125, 0.08344841003417969, 0.09144973754882812, 0.09945106506347656, 0.107452392578125, 0.11545372009277344, 0.12345504760742188, 0.1314563751220703, 0.13945770263671875, 0.1474590301513672, 0.15546035766601562, 0.16346168518066406, 0.1714630126953125, 0.17946434020996094, 0.18746566772460938, 0.1954669952392578, 0.20346832275390625, 0.2114696502685547, 0.21947097778320312, 0.22747230529785156, 0.2354736328125]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 5.0, 1.0, 5.0, 3.0, 4.0, 2.0, 3.0, 4.0, 7.0, 7.0, 5.0, 9.0, 10.0, 13.0, 27.0, 22.0, 39.0, 38.0, 38.0, 42.0, 52.0, 67.0, 71.0, 97.0, 67.0, 80.0, 46.0, 50.0, 29.0, 23.0, 32.0, 16.0, 18.0, 14.0, 14.0, 9.0, 9.0, 5.0, 6.0, 3.0, 2.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005405426025390625, -0.005237162113189697, -0.0050688982009887695, -0.004900634288787842, -0.004732370376586914, -0.004564106464385986, -0.004395842552185059, -0.004227578639984131, -0.004059314727783203, -0.0038910508155822754, -0.0037227869033813477, -0.00355452299118042, -0.003386259078979492, -0.0032179951667785645, -0.0030497312545776367, -0.002881467342376709, -0.0027132034301757812, -0.0025449395179748535, -0.0023766756057739258, -0.002208411693572998, -0.0020401477813720703, -0.0018718838691711426, -0.0017036199569702148, -0.0015353560447692871, -0.0013670921325683594, -0.0011988282203674316, -0.001030564308166504, -0.0008623003959655762, -0.0006940364837646484, -0.0005257725715637207, -0.00035750865936279297, -0.00018924474716186523, -2.09808349609375e-05, 0.00014728307723999023, 0.00031554698944091797, 0.0004838109016418457, 0.0006520748138427734, 0.0008203387260437012, 0.000988602638244629, 0.0011568665504455566, 0.0013251304626464844, 0.0014933943748474121, 0.0016616582870483398, 0.0018299221992492676, 0.0019981861114501953, 0.002166450023651123, 0.0023347139358520508, 0.0025029778480529785, 0.0026712417602539062, 0.002839505672454834, 0.0030077695846557617, 0.0031760334968566895, 0.003344297409057617, 0.003512561321258545, 0.0036808252334594727, 0.0038490891456604004, 0.004017353057861328, 0.004185616970062256, 0.004353880882263184, 0.004522144794464111, 0.004690408706665039, 0.004858672618865967, 0.0050269365310668945, 0.005195200443267822, 0.00536346435546875]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 6.0, 9.0, 9.0, 11.0, 9.0, 8.0, 6.0, 12.0, 17.0, 8.0, 23.0, 34.0, 45.0, 41.0, 57.0, 64.0, 100.0, 152.0, 166.0, 175.0, 328.0, 666.0, 23190.0, 1018495.0, 3289.0, 482.0, 280.0, 185.0, 131.0, 100.0, 95.0, 69.0, 54.0, 40.0, 33.0, 29.0, 24.0, 19.0, 15.0, 15.0, 13.0, 14.0, 7.0, 6.0, 5.0, 5.0, 2.0, 3.0, 3.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.09375, -0.09075736999511719, -0.08776473999023438, -0.08477210998535156, -0.08177947998046875, -0.07878684997558594, -0.07579421997070312, -0.07280158996582031, -0.0698089599609375, -0.06681632995605469, -0.06382369995117188, -0.06083106994628906, -0.05783843994140625, -0.05484580993652344, -0.051853179931640625, -0.04886054992675781, -0.045867919921875, -0.04287528991699219, -0.039882659912109375, -0.03689002990722656, -0.03389739990234375, -0.030904769897460938, -0.027912139892578125, -0.024919509887695312, -0.0219268798828125, -0.018934249877929688, -0.015941619873046875, -0.012948989868164062, -0.00995635986328125, -0.0069637298583984375, -0.003971099853515625, -0.0009784698486328125, 0.00201416015625, 0.0050067901611328125, 0.007999420166015625, 0.010992050170898438, 0.01398468017578125, 0.016977310180664062, 0.019969940185546875, 0.022962570190429688, 0.0259552001953125, 0.028947830200195312, 0.031940460205078125, 0.03493309020996094, 0.03792572021484375, 0.04091835021972656, 0.043910980224609375, 0.04690361022949219, 0.049896240234375, 0.05288887023925781, 0.055881500244140625, 0.05887413024902344, 0.06186676025390625, 0.06485939025878906, 0.06785202026367188, 0.07084465026855469, 0.0738372802734375, 0.07682991027832031, 0.07982254028320312, 0.08281517028808594, 0.08580780029296875, 0.08880043029785156, 0.09179306030273438, 0.09478569030761719, 0.0977783203125]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 29.0, 478.0, 468.0, 37.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.026848042383790016, -0.026341192424297333, -0.0258343443274498, -0.025327494367957115, -0.024820644408464432, -0.02431379444897175, -0.023806946352124214, -0.02330009639263153, -0.022793246433138847, -0.022286396473646164, -0.02177954837679863, -0.021272698417305946, -0.020765848457813263, -0.02025899849832058, -0.019752150401473045, -0.019245300441980362, -0.018738452345132828, -0.018231602385640144, -0.01772475428879261, -0.017217904329299927, -0.016711054369807243, -0.01620420441031456, -0.015697356313467026, -0.015190506353974342, -0.014683656394481659, -0.01417680736631155, -0.013669957406818867, -0.013163108378648758, -0.012656258419156075, -0.012149409390985966, -0.011642560362815857, -0.011135710403323174, -0.01062886044383049, -0.010122011415660381, -0.009615161456167698, -0.009108312427997589, -0.008601462468504906, -0.008094613440334797, -0.007587763946503401, -0.007080914452672005, -0.006574064493179321, -0.006067214999347925, -0.005560365505516529, -0.00505351647734642, -0.004546666517853737, -0.004039817489683628, -0.003532967995852232, -0.003026118502020836, -0.0025192690081894398, -0.0020124195143580437, -0.0015055701369419694, -0.0009987207595258951, -0.000491871265694499, 1.4978228136897087e-05, 0.0005218274891376495, 0.0010286769829690456, 0.0015355264768004417, 0.002042375970631838, 0.002549225464463234, 0.0030560747254639864, 0.0035629242192953825, 0.004069773480296135, 0.004576622974127531, 0.005083472467958927, 0.005590321961790323]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 4.0, 3.0, 5.0, 4.0, 4.0, 6.0, 8.0, 7.0, 9.0, 10.0, 17.0, 12.0, 11.0, 24.0, 24.0, 20.0, 20.0, 26.0, 29.0, 42.0, 24.0, 31.0, 30.0, 38.0, 37.0, 37.0, 35.0, 44.0, 44.0, 39.0, 47.0, 35.0, 36.0, 32.0, 31.0, 26.0, 18.0, 15.0, 20.0, 23.0, 20.0, 7.0, 14.0, 8.0, 6.0, 9.0, 2.0, 5.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.002948284149169922, -0.0028636185452342033, -0.002778952941298485, -0.0026942873373627663, -0.0026096217334270477, -0.002524956129491329, -0.0024402905255556107, -0.002355624921619892, -0.0022709593176841736, -0.002186293713748455, -0.0021016281098127365, -0.002016962505877018, -0.0019322969019412994, -0.001847631298005581, -0.0017629656940698624, -0.0016783000901341438, -0.0015936344861984253, -0.0015089688822627068, -0.0014243032783269882, -0.0013396376743912697, -0.0012549720704555511, -0.0011703064665198326, -0.001085640862584114, -0.0010009752586483955, -0.000916309654712677, -0.0008316440507769585, -0.0007469784468412399, -0.0006623128429055214, -0.0005776472389698029, -0.0004929816350340843, -0.0004083160310983658, -0.00032365042716264725, -0.0002389848232269287, -0.00015431921929121017, -6.965361535549164e-05, 1.5011988580226898e-05, 9.967759251594543e-05, 0.00018434319645166397, 0.0002690088003873825, 0.00035367440432310104, 0.0004383400082588196, 0.0005230056121945381, 0.0006076712161302567, 0.0006923368200659752, 0.0007770024240016937, 0.0008616680279374123, 0.0009463336318731308, 0.0010309992358088493, 0.0011156648397445679, 0.0012003304436802864, 0.001284996047616005, 0.0013696616515517235, 0.001454327255487442, 0.0015389928594231606, 0.001623658463358879, 0.0017083240672945976, 0.0017929896712303162, 0.0018776552751660347, 0.0019623208791017532, 0.0020469864830374718, 0.0021316520869731903, 0.002216317690908909, 0.0023009832948446274, 0.002385648898780346, 0.0024703145027160645]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 6.0, 5.0, 6.0, 9.0, 12.0, 12.0, 8.0, 14.0, 15.0, 15.0, 17.0, 22.0, 22.0, 23.0, 23.0, 27.0, 42.0, 36.0, 29.0, 42.0, 35.0, 40.0, 37.0, 43.0, 39.0, 43.0, 29.0, 35.0, 45.0, 21.0, 36.0, 32.0, 19.0, 21.0, 17.0, 21.0, 16.0, 9.0, 16.0, 10.0, 13.0, 12.0, 9.0, 5.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-4.79296875, -4.6435546875, -4.494140625, -4.3447265625, -4.1953125, -4.0458984375, -3.896484375, -3.7470703125, -3.59765625, -3.4482421875, -3.298828125, -3.1494140625, -3.0, -2.8505859375, -2.701171875, -2.5517578125, -2.40234375, -2.2529296875, -2.103515625, -1.9541015625, -1.8046875, -1.6552734375, -1.505859375, -1.3564453125, -1.20703125, -1.0576171875, -0.908203125, -0.7587890625, -0.609375, -0.4599609375, -0.310546875, -0.1611328125, -0.01171875, 0.1376953125, 0.287109375, 0.4365234375, 0.5859375, 0.7353515625, 0.884765625, 1.0341796875, 1.18359375, 1.3330078125, 1.482421875, 1.6318359375, 1.78125, 1.9306640625, 2.080078125, 2.2294921875, 2.37890625, 2.5283203125, 2.677734375, 2.8271484375, 2.9765625, 3.1259765625, 3.275390625, 3.4248046875, 3.57421875, 3.7236328125, 3.873046875, 4.0224609375, 4.171875, 4.3212890625, 4.470703125, 4.6201171875, 4.76953125]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 10.0, 12.0, 23.0, 25.0, 46.0, 58.0, 87.0, 123.0, 153.0, 260.0, 383.0, 545.0, 782.0, 1145.0, 1549.0, 2166.0, 3297.0, 4939.0, 7800.0, 13853.0, 30573.0, 96732.0, 425654.0, 325718.0, 73711.0, 25486.0, 12118.0, 6957.0, 4470.0, 3031.0, 2043.0, 1431.0, 1033.0, 745.0, 483.0, 337.0, 221.0, 175.0, 111.0, 78.0, 70.0, 42.0, 27.0, 21.0, 13.0, 9.0, 5.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-15.046875, -14.5894775390625, -14.132080078125, -13.6746826171875, -13.21728515625, -12.7598876953125, -12.302490234375, -11.8450927734375, -11.3876953125, -10.9302978515625, -10.472900390625, -10.0155029296875, -9.55810546875, -9.1007080078125, -8.643310546875, -8.1859130859375, -7.728515625, -7.2711181640625, -6.813720703125, -6.3563232421875, -5.89892578125, -5.4415283203125, -4.984130859375, -4.5267333984375, -4.0693359375, -3.6119384765625, -3.154541015625, -2.6971435546875, -2.23974609375, -1.7823486328125, -1.324951171875, -0.8675537109375, -0.41015625, 0.0472412109375, 0.504638671875, 0.9620361328125, 1.41943359375, 1.8768310546875, 2.334228515625, 2.7916259765625, 3.2490234375, 3.7064208984375, 4.163818359375, 4.6212158203125, 5.07861328125, 5.5360107421875, 5.993408203125, 6.4508056640625, 6.908203125, 7.3656005859375, 7.822998046875, 8.2803955078125, 8.73779296875, 9.1951904296875, 9.652587890625, 10.1099853515625, 10.5673828125, 11.0247802734375, 11.482177734375, 11.9395751953125, 12.39697265625, 12.8543701171875, 13.311767578125, 13.7691650390625, 14.2265625]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 4.0, 3.0, 2.0, 6.0, 4.0, 13.0, 13.0, 12.0, 10.0, 19.0, 26.0, 27.0, 34.0, 42.0, 44.0, 54.0, 81.0, 93.0, 149.0, 1573.0, 369.0, 82.0, 70.0, 49.0, 59.0, 42.0, 38.0, 33.0, 34.0, 19.0, 14.0, 8.0, 7.0, 7.0, 4.0, 6.0, 6.0, 2.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.953125, -24.236328125, -23.51953125, -22.802734375, -22.0859375, -21.369140625, -20.65234375, -19.935546875, -19.21875, -18.501953125, -17.78515625, -17.068359375, -16.3515625, -15.634765625, -14.91796875, -14.201171875, -13.484375, -12.767578125, -12.05078125, -11.333984375, -10.6171875, -9.900390625, -9.18359375, -8.466796875, -7.75, -7.033203125, -6.31640625, -5.599609375, -4.8828125, -4.166015625, -3.44921875, -2.732421875, -2.015625, -1.298828125, -0.58203125, 0.134765625, 0.8515625, 1.568359375, 2.28515625, 3.001953125, 3.71875, 4.435546875, 5.15234375, 5.869140625, 6.5859375, 7.302734375, 8.01953125, 8.736328125, 9.453125, 10.169921875, 10.88671875, 11.603515625, 12.3203125, 13.037109375, 13.75390625, 14.470703125, 15.1875, 15.904296875, 16.62109375, 17.337890625, 18.0546875, 18.771484375, 19.48828125, 20.205078125, 20.921875]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 2.0, 5.0, 8.0, 8.0, 13.0, 15.0, 26.0, 21.0, 35.0, 35.0, 59.0, 75.0, 116.0, 323.0, 1770.0, 2951074.0, 190376.0, 1048.0, 276.0, 110.0, 76.0, 38.0, 44.0, 32.0, 31.0, 21.0, 8.0, 14.0, 11.0, 16.0, 5.0, 6.0, 2.0, 4.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.8125, -92.708984375, -89.60546875, -86.501953125, -83.3984375, -80.294921875, -77.19140625, -74.087890625, -70.984375, -67.880859375, -64.77734375, -61.673828125, -58.5703125, -55.466796875, -52.36328125, -49.259765625, -46.15625, -43.052734375, -39.94921875, -36.845703125, -33.7421875, -30.638671875, -27.53515625, -24.431640625, -21.328125, -18.224609375, -15.12109375, -12.017578125, -8.9140625, -5.810546875, -2.70703125, 0.396484375, 3.5, 6.603515625, 9.70703125, 12.810546875, 15.9140625, 19.017578125, 22.12109375, 25.224609375, 28.328125, 31.431640625, 34.53515625, 37.638671875, 40.7421875, 43.845703125, 46.94921875, 50.052734375, 53.15625, 56.259765625, 59.36328125, 62.466796875, 65.5703125, 68.673828125, 71.77734375, 74.880859375, 77.984375, 81.087890625, 84.19140625, 87.294921875, 90.3984375, 93.501953125, 96.60546875, 99.708984375, 102.8125]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 246.0, 762.0, 5.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.12577819824219, -39.5609130859375, -29.996047973632812, -20.431182861328125, -10.866317749023438, -1.30145263671875, 8.263412475585938, 17.828277587890625, 27.393142700195312, 36.9580078125, 46.52287292480469, 56.087738037109375, 65.65260314941406, 75.21746826171875, 84.78233337402344, 94.34719848632812, 103.91206359863281, 113.4769287109375, 123.04179382324219, 132.60665893554688, 142.17152404785156, 151.73638916015625, 161.30125427246094, 170.86611938476562, 180.4309844970703, 189.995849609375, 199.5607147216797, 209.12557983398438, 218.69044494628906, 228.25531005859375, 237.82017517089844, 247.38504028320312, 256.94989013671875, 266.5147705078125, 276.0796203613281, 285.64447021484375, 295.2093505859375, 304.77423095703125, 314.3390808105469, 323.9039306640625, 333.46881103515625, 343.03369140625, 352.5985412597656, 362.16339111328125, 371.728271484375, 381.29315185546875, 390.8580017089844, 400.4228515625, 409.98773193359375, 419.5526123046875, 429.1174621582031, 438.68231201171875, 448.2471923828125, 457.81207275390625, 467.3769226074219, 476.9417724609375, 486.50665283203125, 496.071533203125, 505.6363830566406, 515.2012329101562, 524.76611328125, 534.3309936523438, 543.8958740234375, 553.460693359375, 563.0255737304688]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 7.0, 5.0, 5.0, 12.0, 9.0, 20.0, 15.0, 15.0, 18.0, 21.0, 16.0, 29.0, 25.0, 35.0, 36.0, 42.0, 45.0, 40.0, 39.0, 31.0, 32.0, 37.0, 42.0, 40.0, 45.0, 39.0, 29.0, 39.0, 36.0, 25.0, 21.0, 23.0, 22.0, 17.0, 16.0, 15.0, 15.0, 7.0, 8.0, 8.0, 6.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0], "bins": [-54.32135009765625, -52.57880783081055, -50.83626174926758, -49.093719482421875, -47.351173400878906, -45.6086311340332, -43.866085052490234, -42.12354278564453, -40.38099670410156, -38.63845443725586, -36.89590835571289, -35.15336608886719, -33.41082000732422, -31.668277740478516, -29.925731658935547, -28.183189392089844, -26.440645217895508, -24.698101043701172, -22.955556869506836, -21.2130126953125, -19.470468521118164, -17.727924346923828, -15.985381126403809, -14.242836952209473, -12.500292778015137, -10.7577486038208, -9.015204429626465, -7.272660732269287, -5.530116558074951, -3.7875728607177734, -2.0450286865234375, -0.30248451232910156, 1.4400596618652344, 3.1826038360595703, 4.925148010253906, 6.667691707611084, 8.410236358642578, 10.152779579162598, 11.895323753356934, 13.63786792755127, 15.380412101745605, 17.122955322265625, 18.86549949645996, 20.608043670654297, 22.350587844848633, 24.09313201904297, 25.835676193237305, 27.57822036743164, 29.320764541625977, 31.063308715820312, 32.805850982666016, 34.548397064208984, 36.29093933105469, 38.033485412597656, 39.77602767944336, 41.51857376098633, 43.26111602783203, 45.003658294677734, 46.7462043762207, 48.488746643066406, 50.231292724609375, 51.97383499145508, 53.71638107299805, 55.45892333984375, 57.20146942138672]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 5.0, 6.0, 14.0, 11.0, 7.0, 10.0, 9.0, 16.0, 21.0, 19.0, 21.0, 22.0, 23.0, 36.0, 29.0, 30.0, 37.0, 32.0, 41.0, 43.0, 37.0, 48.0, 47.0, 38.0, 43.0, 35.0, 40.0, 37.0, 28.0, 23.0, 30.0, 12.0, 28.0, 17.0, 18.0, 19.0, 11.0, 11.0, 11.0, 9.0, 10.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.30859375, -5.14068603515625, -4.9727783203125, -4.80487060546875, -4.636962890625, -4.46905517578125, -4.3011474609375, -4.13323974609375, -3.96533203125, -3.79742431640625, -3.6295166015625, -3.46160888671875, -3.293701171875, -3.12579345703125, -2.9578857421875, -2.78997802734375, -2.6220703125, -2.45416259765625, -2.2862548828125, -2.11834716796875, -1.950439453125, -1.78253173828125, -1.6146240234375, -1.44671630859375, -1.27880859375, -1.11090087890625, -0.9429931640625, -0.77508544921875, -0.607177734375, -0.43927001953125, -0.2713623046875, -0.10345458984375, 0.064453125, 0.23236083984375, 0.4002685546875, 0.56817626953125, 0.736083984375, 0.90399169921875, 1.0718994140625, 1.23980712890625, 1.40771484375, 1.57562255859375, 1.7435302734375, 1.91143798828125, 2.079345703125, 2.24725341796875, 2.4151611328125, 2.58306884765625, 2.7509765625, 2.91888427734375, 3.0867919921875, 3.25469970703125, 3.422607421875, 3.59051513671875, 3.7584228515625, 3.92633056640625, 4.09423828125, 4.26214599609375, 4.4300537109375, 4.59796142578125, 4.765869140625, 4.93377685546875, 5.1016845703125, 5.26959228515625, 5.4375]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 8.0, 7.0, 10.0, 16.0, 17.0, 43.0, 36.0, 54.0, 80.0, 88.0, 126.0, 152.0, 222.0, 372.0, 587.0, 1363.0, 3971.0, 19785.0, 187525.0, 3074949.0, 839735.0, 52480.0, 8132.0, 2172.0, 840.0, 455.0, 259.0, 195.0, 137.0, 141.0, 83.0, 63.0, 40.0, 36.0, 28.0, 24.0, 12.0, 11.0, 8.0, 5.0, 4.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.5625, -25.63818359375, -24.7138671875, -23.78955078125, -22.865234375, -21.94091796875, -21.0166015625, -20.09228515625, -19.16796875, -18.24365234375, -17.3193359375, -16.39501953125, -15.470703125, -14.54638671875, -13.6220703125, -12.69775390625, -11.7734375, -10.84912109375, -9.9248046875, -9.00048828125, -8.076171875, -7.15185546875, -6.2275390625, -5.30322265625, -4.37890625, -3.45458984375, -2.5302734375, -1.60595703125, -0.681640625, 0.24267578125, 1.1669921875, 2.09130859375, 3.015625, 3.93994140625, 4.8642578125, 5.78857421875, 6.712890625, 7.63720703125, 8.5615234375, 9.48583984375, 10.41015625, 11.33447265625, 12.2587890625, 13.18310546875, 14.107421875, 15.03173828125, 15.9560546875, 16.88037109375, 17.8046875, 18.72900390625, 19.6533203125, 20.57763671875, 21.501953125, 22.42626953125, 23.3505859375, 24.27490234375, 25.19921875, 26.12353515625, 27.0478515625, 27.97216796875, 28.896484375, 29.82080078125, 30.7451171875, 31.66943359375, 32.59375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 7.0, 10.0, 25.0, 40.0, 79.0, 172.0, 337.0, 559.0, 796.0, 814.0, 559.0, 328.0, 173.0, 87.0, 53.0, 21.0, 12.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.96875, -32.798828125, -31.62890625, -30.458984375, -29.2890625, -28.119140625, -26.94921875, -25.779296875, -24.609375, -23.439453125, -22.26953125, -21.099609375, -19.9296875, -18.759765625, -17.58984375, -16.419921875, -15.25, -14.080078125, -12.91015625, -11.740234375, -10.5703125, -9.400390625, -8.23046875, -7.060546875, -5.890625, -4.720703125, -3.55078125, -2.380859375, -1.2109375, -0.041015625, 1.12890625, 2.298828125, 3.46875, 4.638671875, 5.80859375, 6.978515625, 8.1484375, 9.318359375, 10.48828125, 11.658203125, 12.828125, 13.998046875, 15.16796875, 16.337890625, 17.5078125, 18.677734375, 19.84765625, 21.017578125, 22.1875, 23.357421875, 24.52734375, 25.697265625, 26.8671875, 28.037109375, 29.20703125, 30.376953125, 31.546875, 32.716796875, 33.88671875, 35.056640625, 36.2265625, 37.396484375, 38.56640625, 39.736328125, 40.90625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 3.0, 4.0, 0.0, 3.0, 6.0, 11.0, 26.0, 38.0, 81.0, 201.0, 567.0, 3088.0, 224214.0, 3934861.0, 28976.0, 1603.0, 361.0, 149.0, 52.0, 20.0, 12.0, 12.0, 7.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.84375, -44.50244140625, -41.1611328125, -37.81982421875, -34.478515625, -31.13720703125, -27.7958984375, -24.45458984375, -21.11328125, -17.77197265625, -14.4306640625, -11.08935546875, -7.748046875, -4.40673828125, -1.0654296875, 2.27587890625, 5.6171875, 8.95849609375, 12.2998046875, 15.64111328125, 18.982421875, 22.32373046875, 25.6650390625, 29.00634765625, 32.34765625, 35.68896484375, 39.0302734375, 42.37158203125, 45.712890625, 49.05419921875, 52.3955078125, 55.73681640625, 59.078125, 62.41943359375, 65.7607421875, 69.10205078125, 72.443359375, 75.78466796875, 79.1259765625, 82.46728515625, 85.80859375, 89.14990234375, 92.4912109375, 95.83251953125, 99.173828125, 102.51513671875, 105.8564453125, 109.19775390625, 112.5390625, 115.88037109375, 119.2216796875, 122.56298828125, 125.904296875, 129.24560546875, 132.5869140625, 135.92822265625, 139.26953125, 142.61083984375, 145.9521484375, 149.29345703125, 152.634765625, 155.97607421875, 159.3173828125, 162.65869140625, 166.0]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 17.0, 289.0, 619.0, 86.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.49221801757812, -63.72492980957031, -48.957645416259766, -34.19036102294922, -19.423072814941406, -4.655784606933594, 10.111495971679688, 24.8787841796875, 39.64607238769531, 54.413360595703125, 69.18064880371094, 83.94792938232422, 98.71521759033203, 113.48250579833984, 128.24978637695312, 143.01707458496094, 157.78436279296875, 172.55165100097656, 187.31893920898438, 202.08621215820312, 216.853515625, 231.62078857421875, 246.38807678222656, 261.1553649902344, 275.92266845703125, 290.68994140625, 305.4572448730469, 320.2245178222656, 334.9918212890625, 349.75909423828125, 364.5263671875, 379.2936706542969, 394.0609130859375, 408.82818603515625, 423.5954895019531, 438.3627624511719, 453.13006591796875, 467.8973388671875, 482.66461181640625, 497.4319152832031, 512.19921875, 526.9664916992188, 541.7337646484375, 556.5010986328125, 571.2683715820312, 586.03564453125, 600.8029174804688, 615.5701904296875, 630.3375244140625, 645.1047973632812, 659.8720703125, 674.639404296875, 689.4066772460938, 704.1739501953125, 718.9412231445312, 733.70849609375, 748.4757690429688, 763.2430419921875, 778.0103149414062, 792.7776489257812, 807.544921875, 822.3121948242188, 837.0794677734375, 851.8468017578125, 866.6140747070312]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 1.0, 2.0, 8.0, 7.0, 4.0, 6.0, 14.0, 16.0, 21.0, 16.0, 17.0, 22.0, 23.0, 31.0, 33.0, 33.0, 36.0, 33.0, 40.0, 39.0, 43.0, 25.0, 43.0, 44.0, 33.0, 41.0, 39.0, 49.0, 28.0, 32.0, 27.0, 33.0, 25.0, 15.0, 19.0, 24.0, 16.0, 12.0, 10.0, 9.0, 3.0, 10.0, 2.0, 4.0, 3.0, 3.0, 6.0, 0.0, 6.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-67.44198608398438, -65.38399505615234, -63.32600402832031, -61.26801681518555, -59.210025787353516, -57.152034759521484, -55.09404754638672, -53.03605651855469, -50.978065490722656, -48.920074462890625, -46.862083435058594, -44.80409622192383, -42.7461051940918, -40.688114166259766, -38.630126953125, -36.57213592529297, -34.51414489746094, -32.456153869628906, -30.398164749145508, -28.34017562866211, -26.282184600830078, -24.224193572998047, -22.16620445251465, -20.10821533203125, -18.05022430419922, -15.992234230041504, -13.934244155883789, -11.876254081726074, -9.81826400756836, -7.7602739334106445, -5.70228385925293, -3.644293785095215, -1.5863113403320312, 0.4716787338256836, 2.5296688079833984, 4.587658882141113, 6.645648956298828, 8.703639030456543, 10.761629104614258, 12.819619178771973, 14.877609252929688, 16.93560028076172, 18.993589401245117, 21.051578521728516, 23.109569549560547, 25.167560577392578, 27.225549697875977, 29.283538818359375, 31.341529846191406, 33.39952087402344, 35.45751190185547, 37.515499114990234, 39.573490142822266, 41.6314811706543, 43.68946838378906, 45.747459411621094, 47.805450439453125, 49.863441467285156, 51.92143249511719, 53.97941970825195, 56.037410736083984, 58.095401763916016, 60.15338897705078, 62.21138000488281, 64.26937103271484]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 5.0, 6.0, 6.0, 12.0, 8.0, 14.0, 9.0, 13.0, 15.0, 15.0, 16.0, 21.0, 22.0, 27.0, 41.0, 19.0, 30.0, 49.0, 36.0, 39.0, 44.0, 43.0, 37.0, 37.0, 40.0, 46.0, 38.0, 24.0, 30.0, 34.0, 28.0, 27.0, 23.0, 25.0, 21.0, 13.0, 12.0, 17.0, 18.0, 8.0, 12.0, 6.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 2.0, 1.0], "bins": [-5.671875, -5.5020751953125, -5.332275390625, -5.1624755859375, -4.99267578125, -4.8228759765625, -4.653076171875, -4.4832763671875, -4.3134765625, -4.1436767578125, -3.973876953125, -3.8040771484375, -3.63427734375, -3.4644775390625, -3.294677734375, -3.1248779296875, -2.955078125, -2.7852783203125, -2.615478515625, -2.4456787109375, -2.27587890625, -2.1060791015625, -1.936279296875, -1.7664794921875, -1.5966796875, -1.4268798828125, -1.257080078125, -1.0872802734375, -0.91748046875, -0.7476806640625, -0.577880859375, -0.4080810546875, -0.23828125, -0.0684814453125, 0.101318359375, 0.2711181640625, 0.44091796875, 0.6107177734375, 0.780517578125, 0.9503173828125, 1.1201171875, 1.2899169921875, 1.459716796875, 1.6295166015625, 1.79931640625, 1.9691162109375, 2.138916015625, 2.3087158203125, 2.478515625, 2.6483154296875, 2.818115234375, 2.9879150390625, 3.15771484375, 3.3275146484375, 3.497314453125, 3.6671142578125, 3.8369140625, 4.0067138671875, 4.176513671875, 4.3463134765625, 4.51611328125, 4.6859130859375, 4.855712890625, 5.0255126953125, 5.1953125]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 3.0, 7.0, 7.0, 16.0, 19.0, 32.0, 48.0, 97.0, 135.0, 208.0, 312.0, 460.0, 740.0, 1051.0, 1620.0, 2466.0, 3673.0, 5833.0, 9152.0, 14074.0, 22117.0, 35692.0, 58344.0, 93019.0, 139363.0, 178247.0, 163753.0, 116857.0, 75255.0, 46603.0, 28901.0, 18038.0, 11311.0, 7304.0, 4666.0, 3074.0, 2091.0, 1377.0, 889.0, 597.0, 384.0, 244.0, 188.0, 106.0, 67.0, 58.0, 27.0, 22.0, 8.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.482177734375, -0.4678993225097656, -0.45362091064453125, -0.4393424987792969, -0.4250640869140625, -0.4107856750488281, -0.39650726318359375, -0.3822288513183594, -0.367950439453125, -0.3536720275878906, -0.33939361572265625, -0.3251152038574219, -0.3108367919921875, -0.2965583801269531, -0.28227996826171875, -0.2680015563964844, -0.25372314453125, -0.23944473266601562, -0.22516632080078125, -0.21088790893554688, -0.1966094970703125, -0.18233108520507812, -0.16805267333984375, -0.15377426147460938, -0.139495849609375, -0.12521743774414062, -0.11093902587890625, -0.09666061401367188, -0.0823822021484375, -0.06810379028320312, -0.05382537841796875, -0.039546966552734375, -0.0252685546875, -0.010990142822265625, 0.00328826904296875, 0.017566680908203125, 0.0318450927734375, 0.046123504638671875, 0.06040191650390625, 0.07468032836914062, 0.088958740234375, 0.10323715209960938, 0.11751556396484375, 0.13179397583007812, 0.1460723876953125, 0.16035079956054688, 0.17462921142578125, 0.18890762329101562, 0.20318603515625, 0.21746444702148438, 0.23174285888671875, 0.24602127075195312, 0.2602996826171875, 0.2745780944824219, 0.28885650634765625, 0.3031349182128906, 0.317413330078125, 0.3316917419433594, 0.34597015380859375, 0.3602485656738281, 0.3745269775390625, 0.3888053894042969, 0.40308380126953125, 0.4173622131347656, 0.431640625]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 6.0, 0.0, 3.0, 3.0, 5.0, 3.0, 6.0, 10.0, 15.0, 20.0, 24.0, 17.0, 22.0, 27.0, 25.0, 33.0, 37.0, 39.0, 37.0, 52.0, 47.0, 39.0, 39.0, 1062.0, 53.0, 36.0, 34.0, 32.0, 31.0, 30.0, 31.0, 29.0, 30.0, 18.0, 19.0, 29.0, 15.0, 17.0, 11.0, 9.0, 10.0, 7.0, 8.0, 6.0, 8.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-3.68359375, -3.5667724609375, -3.449951171875, -3.3331298828125, -3.21630859375, -3.0994873046875, -2.982666015625, -2.8658447265625, -2.7490234375, -2.6322021484375, -2.515380859375, -2.3985595703125, -2.28173828125, -2.1649169921875, -2.048095703125, -1.9312744140625, -1.814453125, -1.6976318359375, -1.580810546875, -1.4639892578125, -1.34716796875, -1.2303466796875, -1.113525390625, -0.9967041015625, -0.8798828125, -0.7630615234375, -0.646240234375, -0.5294189453125, -0.41259765625, -0.2957763671875, -0.178955078125, -0.0621337890625, 0.0546875, 0.1715087890625, 0.288330078125, 0.4051513671875, 0.52197265625, 0.6387939453125, 0.755615234375, 0.8724365234375, 0.9892578125, 1.1060791015625, 1.222900390625, 1.3397216796875, 1.45654296875, 1.5733642578125, 1.690185546875, 1.8070068359375, 1.923828125, 2.0406494140625, 2.157470703125, 2.2742919921875, 2.39111328125, 2.5079345703125, 2.624755859375, 2.7415771484375, 2.8583984375, 2.9752197265625, 3.092041015625, 3.2088623046875, 3.32568359375, 3.4425048828125, 3.559326171875, 3.6761474609375, 3.79296875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 6.0, 5.0, 11.0, 11.0, 21.0, 29.0, 47.0, 68.0, 106.0, 142.0, 190.0, 294.0, 462.0, 649.0, 992.0, 1438.0, 2024.0, 3282.0, 4831.0, 7388.0, 11443.0, 17802.0, 27813.0, 42292.0, 63006.0, 92200.0, 124137.0, 1102220.0, 231806.0, 114915.0, 82857.0, 57197.0, 37912.0, 24475.0, 15659.0, 10151.0, 6425.0, 4278.0, 2831.0, 1853.0, 1235.0, 874.0, 546.0, 403.0, 285.0, 157.0, 113.0, 77.0, 59.0, 47.0, 20.0, 19.0, 16.0, 10.0, 2.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0], "bins": [-0.28369140625, -0.27449798583984375, -0.2653045654296875, -0.25611114501953125, -0.246917724609375, -0.23772430419921875, -0.2285308837890625, -0.21933746337890625, -0.21014404296875, -0.20095062255859375, -0.1917572021484375, -0.18256378173828125, -0.173370361328125, -0.16417694091796875, -0.1549835205078125, -0.14579010009765625, -0.1365966796875, -0.12740325927734375, -0.1182098388671875, -0.10901641845703125, -0.099822998046875, -0.09062957763671875, -0.0814361572265625, -0.07224273681640625, -0.06304931640625, -0.05385589599609375, -0.0446624755859375, -0.03546905517578125, -0.026275634765625, -0.01708221435546875, -0.0078887939453125, 0.00130462646484375, 0.010498046875, 0.01969146728515625, 0.0288848876953125, 0.03807830810546875, 0.047271728515625, 0.05646514892578125, 0.0656585693359375, 0.07485198974609375, 0.08404541015625, 0.09323883056640625, 0.1024322509765625, 0.11162567138671875, 0.120819091796875, 0.13001251220703125, 0.1392059326171875, 0.14839935302734375, 0.1575927734375, 0.16678619384765625, 0.1759796142578125, 0.18517303466796875, 0.194366455078125, 0.20355987548828125, 0.2127532958984375, 0.22194671630859375, 0.23114013671875, 0.24033355712890625, 0.2495269775390625, 0.25872039794921875, 0.267913818359375, 0.27710723876953125, 0.2863006591796875, 0.29549407958984375, 0.3046875]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 9.0, 9.0, 7.0, 13.0, 18.0, 22.0, 18.0, 26.0, 33.0, 56.0, 72.0, 95.0, 107.0, 97.0, 97.0, 73.0, 61.0, 49.0, 33.0, 29.0, 25.0, 8.0, 12.0, 6.0, 3.0, 10.0, 6.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00888824462890625, -0.008611202239990234, -0.008334159851074219, -0.008057117462158203, -0.0077800750732421875, -0.007503032684326172, -0.007225990295410156, -0.006948947906494141, -0.006671905517578125, -0.006394863128662109, -0.006117820739746094, -0.005840778350830078, -0.0055637359619140625, -0.005286693572998047, -0.005009651184082031, -0.004732608795166016, -0.00445556640625, -0.004178524017333984, -0.0039014816284179688, -0.003624439239501953, -0.0033473968505859375, -0.003070354461669922, -0.0027933120727539062, -0.0025162696838378906, -0.002239227294921875, -0.0019621849060058594, -0.0016851425170898438, -0.0014081001281738281, -0.0011310577392578125, -0.0008540153503417969, -0.0005769729614257812, -0.0002999305725097656, -2.288818359375e-05, 0.0002541542053222656, 0.0005311965942382812, 0.0008082389831542969, 0.0010852813720703125, 0.0013623237609863281, 0.0016393661499023438, 0.0019164085388183594, 0.002193450927734375, 0.0024704933166503906, 0.0027475357055664062, 0.003024578094482422, 0.0033016204833984375, 0.003578662872314453, 0.0038557052612304688, 0.004132747650146484, 0.0044097900390625, 0.004686832427978516, 0.004963874816894531, 0.005240917205810547, 0.0055179595947265625, 0.005795001983642578, 0.006072044372558594, 0.006349086761474609, 0.006626129150390625, 0.006903171539306641, 0.007180213928222656, 0.007457256317138672, 0.0077342987060546875, 0.008011341094970703, 0.008288383483886719, 0.008565425872802734, 0.00884246826171875]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 3.0, 7.0, 5.0, 10.0, 10.0, 13.0, 21.0, 26.0, 39.0, 65.0, 88.0, 103.0, 164.0, 251.0, 572.0, 12647.0, 1029633.0, 3703.0, 472.0, 214.0, 141.0, 97.0, 74.0, 57.0, 42.0, 34.0, 22.0, 10.0, 15.0, 8.0, 3.0, 1.0, 2.0, 1.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1651611328125, -0.1600627899169922, -0.15496444702148438, -0.14986610412597656, -0.14476776123046875, -0.13966941833496094, -0.13457107543945312, -0.1294727325439453, -0.1243743896484375, -0.11927604675292969, -0.11417770385742188, -0.10907936096191406, -0.10398101806640625, -0.09888267517089844, -0.09378433227539062, -0.08868598937988281, -0.083587646484375, -0.07848930358886719, -0.07339096069335938, -0.06829261779785156, -0.06319427490234375, -0.05809593200683594, -0.052997589111328125, -0.04789924621582031, -0.0428009033203125, -0.03770256042480469, -0.032604217529296875, -0.027505874633789062, -0.02240753173828125, -0.017309188842773438, -0.012210845947265625, -0.0071125030517578125, -0.00201416015625, 0.0030841827392578125, 0.008182525634765625, 0.013280868530273438, 0.01837921142578125, 0.023477554321289062, 0.028575897216796875, 0.03367424011230469, 0.0387725830078125, 0.04387092590332031, 0.048969268798828125, 0.05406761169433594, 0.05916595458984375, 0.06426429748535156, 0.06936264038085938, 0.07446098327636719, 0.079559326171875, 0.08465766906738281, 0.08975601196289062, 0.09485435485839844, 0.09995269775390625, 0.10505104064941406, 0.11014938354492188, 0.11524772644042969, 0.1203460693359375, 0.1254444122314453, 0.13054275512695312, 0.13564109802246094, 0.14073944091796875, 0.14583778381347656, 0.15093612670898438, 0.1560344696044922, 0.1611328125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 9.0, 57.0, 365.0, 455.0, 116.0, 14.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005142928101122379, -0.004581226501613855, -0.004019524902105331, -0.00345782283693552, -0.0028961212374269962, -0.0023344196379184723, -0.0017727178055793047, -0.001211015973240137, -0.0006493143737316132, -8.761265780776739e-05, 0.0004740890581160784, 0.0010357907740399241, 0.00159749248996377, 0.002159194089472294, 0.0027208959218114614, 0.003282597754150629, 0.003844299353659153, 0.004406000953167677, 0.004967702552676201, 0.005529404617846012, 0.006091106217354536, 0.00665280781686306, 0.007214509882032871, 0.007776211481541395, 0.008337913081049919, 0.00889961514621973, 0.009461316280066967, 0.010023018345236778, 0.010584719479084015, 0.011146421544253826, 0.011708123609423637, 0.012269824743270874, 0.01283152587711811, 0.013393227942287922, 0.013954929076135159, 0.01451663114130497, 0.015078332275152206, 0.015640035271644592, 0.01620173640549183, 0.016763437539339066, 0.01732514053583145, 0.017886841669678688, 0.018448544666171074, 0.01901024580001831, 0.019571946933865547, 0.020133648067712784, 0.02069535106420517, 0.021257052198052406, 0.021818753331899643, 0.02238045446574688, 0.022942157462239265, 0.023503858596086502, 0.02406555972993374, 0.024627260863780975, 0.02518896386027336, 0.025750664994120598, 0.026312366127967834, 0.02687406726181507, 0.027435770258307457, 0.027997471392154694, 0.02855917252600193, 0.029120873659849167, 0.029682576656341553, 0.03024427779018879, 0.030805980786681175]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 5.0, 11.0, 10.0, 15.0, 18.0, 10.0, 18.0, 18.0, 23.0, 22.0, 30.0, 25.0, 30.0, 31.0, 37.0, 27.0, 36.0, 33.0, 36.0, 46.0, 44.0, 51.0, 35.0, 44.0, 31.0, 38.0, 23.0, 27.0, 33.0, 31.0, 16.0, 17.0, 21.0, 18.0, 16.0, 17.0, 11.0, 16.0, 7.0, 9.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0032578706741333008, -0.0031544817611575127, -0.0030510928481817245, -0.0029477039352059364, -0.0028443150222301483, -0.00274092610925436, -0.002637537196278572, -0.002534148283302784, -0.002430759370326996, -0.0023273704573512077, -0.0022239815443754196, -0.0021205926313996315, -0.0020172037184238434, -0.0019138148054480553, -0.0018104258924722672, -0.001707036979496479, -0.001603648066520691, -0.0015002591535449028, -0.0013968702405691147, -0.0012934813275933266, -0.0011900924146175385, -0.0010867035016417503, -0.0009833145886659622, -0.0008799256756901741, -0.000776536762714386, -0.0006731478497385979, -0.0005697589367628098, -0.00046637002378702164, -0.0003629811108112335, -0.0002595921978354454, -0.0001562032848596573, -5.281437188386917e-05, 5.0574541091918945e-05, 0.00015396345406770706, 0.0002573523670434952, 0.0003607412800192833, 0.0004641301929950714, 0.0005675191059708595, 0.0006709080189466476, 0.0007742969319224358, 0.0008776858448982239, 0.000981074757874012, 0.0010844636708498001, 0.0011878525838255882, 0.0012912414968013763, 0.0013946304097771645, 0.0014980193227529526, 0.0016014082357287407, 0.0017047971487045288, 0.001808186061680317, 0.001911574974656105, 0.002014963887631893, 0.0021183528006076813, 0.0022217417135834694, 0.0023251306265592575, 0.0024285195395350456, 0.0025319084525108337, 0.002635297365486622, 0.00273868627846241, 0.002842075191438198, 0.002945464104413986, 0.0030488530173897743, 0.0031522419303655624, 0.0032556308433413506, 0.0033590197563171387]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 5.0, 6.0, 6.0, 12.0, 8.0, 14.0, 9.0, 13.0, 15.0, 15.0, 16.0, 21.0, 22.0, 27.0, 41.0, 19.0, 30.0, 49.0, 36.0, 39.0, 44.0, 43.0, 37.0, 37.0, 40.0, 46.0, 38.0, 25.0, 29.0, 34.0, 28.0, 27.0, 23.0, 25.0, 21.0, 13.0, 12.0, 17.0, 18.0, 8.0, 12.0, 6.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 2.0, 1.0], "bins": [-5.671875, -5.5020751953125, -5.332275390625, -5.1624755859375, -4.99267578125, -4.8228759765625, -4.653076171875, -4.4832763671875, -4.3134765625, -4.1436767578125, -3.973876953125, -3.8040771484375, -3.63427734375, -3.4644775390625, -3.294677734375, -3.1248779296875, -2.955078125, -2.7852783203125, -2.615478515625, -2.4456787109375, -2.27587890625, -2.1060791015625, -1.936279296875, -1.7664794921875, -1.5966796875, -1.4268798828125, -1.257080078125, -1.0872802734375, -0.91748046875, -0.7476806640625, -0.577880859375, -0.4080810546875, -0.23828125, -0.0684814453125, 0.101318359375, 0.2711181640625, 0.44091796875, 0.6107177734375, 0.780517578125, 0.9503173828125, 1.1201171875, 1.2899169921875, 1.459716796875, 1.6295166015625, 1.79931640625, 1.9691162109375, 2.138916015625, 2.3087158203125, 2.478515625, 2.6483154296875, 2.818115234375, 2.9879150390625, 3.15771484375, 3.3275146484375, 3.497314453125, 3.6671142578125, 3.8369140625, 4.0067138671875, 4.176513671875, 4.3463134765625, 4.51611328125, 4.6859130859375, 4.855712890625, 5.0255126953125, 5.1953125]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 4.0, 8.0, 11.0, 11.0, 14.0, 28.0, 24.0, 47.0, 52.0, 80.0, 109.0, 153.0, 186.0, 320.0, 501.0, 747.0, 1278.0, 2504.0, 4817.0, 10963.0, 28373.0, 83692.0, 273925.0, 418463.0, 144343.0, 46238.0, 16974.0, 6953.0, 3279.0, 1630.0, 957.0, 616.0, 379.0, 271.0, 148.0, 122.0, 103.0, 64.0, 54.0, 37.0, 34.0, 12.0, 9.0, 6.0, 4.0, 6.0, 5.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0], "bins": [-5.90625, -5.728515625, -5.55078125, -5.373046875, -5.1953125, -5.017578125, -4.83984375, -4.662109375, -4.484375, -4.306640625, -4.12890625, -3.951171875, -3.7734375, -3.595703125, -3.41796875, -3.240234375, -3.0625, -2.884765625, -2.70703125, -2.529296875, -2.3515625, -2.173828125, -1.99609375, -1.818359375, -1.640625, -1.462890625, -1.28515625, -1.107421875, -0.9296875, -0.751953125, -0.57421875, -0.396484375, -0.21875, -0.041015625, 0.13671875, 0.314453125, 0.4921875, 0.669921875, 0.84765625, 1.025390625, 1.203125, 1.380859375, 1.55859375, 1.736328125, 1.9140625, 2.091796875, 2.26953125, 2.447265625, 2.625, 2.802734375, 2.98046875, 3.158203125, 3.3359375, 3.513671875, 3.69140625, 3.869140625, 4.046875, 4.224609375, 4.40234375, 4.580078125, 4.7578125, 4.935546875, 5.11328125, 5.291015625, 5.46875]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 5.0, 3.0, 5.0, 5.0, 7.0, 6.0, 16.0, 12.0, 14.0, 12.0, 22.0, 19.0, 28.0, 36.0, 28.0, 38.0, 44.0, 31.0, 46.0, 51.0, 94.0, 323.0, 1578.0, 146.0, 68.0, 43.0, 36.0, 31.0, 43.0, 36.0, 30.0, 27.0, 28.0, 19.0, 17.0, 21.0, 14.0, 14.0, 12.0, 11.0, 6.0, 5.0, 8.0, 3.0, 4.0, 1.0, 4.0, 4.0, 4.0, 1.0, 0.0, 1.0], "bins": [-20.8125, -20.203857421875, -19.59521484375, -18.986572265625, -18.3779296875, -17.769287109375, -17.16064453125, -16.552001953125, -15.943359375, -15.334716796875, -14.72607421875, -14.117431640625, -13.5087890625, -12.900146484375, -12.29150390625, -11.682861328125, -11.07421875, -10.465576171875, -9.85693359375, -9.248291015625, -8.6396484375, -8.031005859375, -7.42236328125, -6.813720703125, -6.205078125, -5.596435546875, -4.98779296875, -4.379150390625, -3.7705078125, -3.161865234375, -2.55322265625, -1.944580078125, -1.3359375, -0.727294921875, -0.11865234375, 0.489990234375, 1.0986328125, 1.707275390625, 2.31591796875, 2.924560546875, 3.533203125, 4.141845703125, 4.75048828125, 5.359130859375, 5.9677734375, 6.576416015625, 7.18505859375, 7.793701171875, 8.40234375, 9.010986328125, 9.61962890625, 10.228271484375, 10.8369140625, 11.445556640625, 12.05419921875, 12.662841796875, 13.271484375, 13.880126953125, 14.48876953125, 15.097412109375, 15.7060546875, 16.314697265625, 16.92333984375, 17.531982421875, 18.140625]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 5.0, 4.0, 2.0, 1.0, 5.0, 8.0, 8.0, 8.0, 6.0, 14.0, 16.0, 17.0, 17.0, 23.0, 25.0, 34.0, 36.0, 39.0, 68.0, 75.0, 133.0, 232.0, 519.0, 1568.0, 15674.0, 3065634.0, 57782.0, 2382.0, 613.0, 252.0, 122.0, 89.0, 47.0, 45.0, 37.0, 31.0, 32.0, 18.0, 14.0, 16.0, 17.0, 11.0, 8.0, 3.0, 5.0, 4.0, 5.0, 3.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.34375, -37.00439453125, -35.6650390625, -34.32568359375, -32.986328125, -31.64697265625, -30.3076171875, -28.96826171875, -27.62890625, -26.28955078125, -24.9501953125, -23.61083984375, -22.271484375, -20.93212890625, -19.5927734375, -18.25341796875, -16.9140625, -15.57470703125, -14.2353515625, -12.89599609375, -11.556640625, -10.21728515625, -8.8779296875, -7.53857421875, -6.19921875, -4.85986328125, -3.5205078125, -2.18115234375, -0.841796875, 0.49755859375, 1.8369140625, 3.17626953125, 4.515625, 5.85498046875, 7.1943359375, 8.53369140625, 9.873046875, 11.21240234375, 12.5517578125, 13.89111328125, 15.23046875, 16.56982421875, 17.9091796875, 19.24853515625, 20.587890625, 21.92724609375, 23.2666015625, 24.60595703125, 25.9453125, 27.28466796875, 28.6240234375, 29.96337890625, 31.302734375, 32.64208984375, 33.9814453125, 35.32080078125, 36.66015625, 37.99951171875, 39.3388671875, 40.67822265625, 42.017578125, 43.35693359375, 44.6962890625, 46.03564453125, 47.375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [120.0, 896.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.795110702514648, -2.379739761352539, 8.03563117980957, 18.45100212097168, 28.86637306213379, 39.28174591064453, 49.697113037109375, 60.11248016357422, 70.5278549194336, 80.94322204589844, 91.35859680175781, 101.77397155761719, 112.18933868408203, 122.60470581054688, 133.02008056640625, 143.43545532226562, 153.850830078125, 164.26620483398438, 174.68157958984375, 185.09693908691406, 195.51231384277344, 205.9276885986328, 216.34304809570312, 226.7584228515625, 237.17379760742188, 247.58917236328125, 258.0045471191406, 268.419921875, 278.83526611328125, 289.2506408691406, 299.666015625, 310.0813903808594, 320.49676513671875, 330.9121398925781, 341.3275146484375, 351.7428894042969, 362.15826416015625, 372.5736083984375, 382.9889831542969, 393.40435791015625, 403.8197326660156, 414.235107421875, 424.6504821777344, 435.06585693359375, 445.481201171875, 455.8965759277344, 466.31195068359375, 476.7273254394531, 487.1427001953125, 497.5580749511719, 507.97344970703125, 518.3887939453125, 528.80419921875, 539.2195434570312, 549.6349487304688, 560.05029296875, 570.4656982421875, 580.8810424804688, 591.2964477539062, 601.7117919921875, 612.127197265625, 622.5425415039062, 632.9579467773438, 643.373291015625, 653.7886352539062]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 4.0, 3.0, 3.0, 4.0, 3.0, 6.0, 6.0, 8.0, 10.0, 11.0, 12.0, 9.0, 18.0, 13.0, 21.0, 22.0, 25.0, 23.0, 30.0, 30.0, 19.0, 24.0, 30.0, 36.0, 36.0, 36.0, 34.0, 48.0, 36.0, 43.0, 33.0, 39.0, 24.0, 33.0, 31.0, 35.0, 31.0, 27.0, 18.0, 19.0, 18.0, 20.0, 12.0, 14.0, 10.0, 12.0, 8.0, 7.0, 7.0, 6.0, 7.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-58.83184051513672, -56.977657318115234, -55.12347412109375, -53.269290924072266, -51.41510772705078, -49.5609245300293, -47.70674133300781, -45.85255813598633, -43.998374938964844, -42.14419174194336, -40.290008544921875, -38.43582534790039, -36.581642150878906, -34.72745895385742, -32.87327575683594, -31.019092559814453, -29.16490936279297, -27.310726165771484, -25.45654296875, -23.602359771728516, -21.74817657470703, -19.893993377685547, -18.039810180664062, -16.185626983642578, -14.331443786621094, -12.47726058959961, -10.623077392578125, -8.76889419555664, -6.914710998535156, -5.060527801513672, -3.2063446044921875, -1.3521614074707031, 0.5020217895507812, 2.3562049865722656, 4.21038818359375, 6.064571380615234, 7.918754577636719, 9.772937774658203, 11.627120971679688, 13.481304168701172, 15.335487365722656, 17.18967056274414, 19.043853759765625, 20.89803695678711, 22.752220153808594, 24.606403350830078, 26.460586547851562, 28.314769744873047, 30.16895294189453, 32.023136138916016, 33.8773193359375, 35.731502532958984, 37.58568572998047, 39.43986892700195, 41.29405212402344, 43.14823532104492, 45.002418518066406, 46.85660171508789, 48.710784912109375, 50.56496810913086, 52.419151306152344, 54.27333450317383, 56.12751770019531, 57.9817008972168, 59.83588409423828]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 4.0, 5.0, 9.0, 8.0, 13.0, 6.0, 11.0, 11.0, 16.0, 15.0, 18.0, 15.0, 17.0, 24.0, 30.0, 29.0, 28.0, 38.0, 35.0, 43.0, 33.0, 54.0, 33.0, 41.0, 33.0, 38.0, 52.0, 31.0, 26.0, 38.0, 28.0, 29.0, 24.0, 23.0, 26.0, 22.0, 10.0, 11.0, 18.0, 18.0, 8.0, 5.0, 12.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 2.0, 1.0], "bins": [-5.734375, -5.5615234375, -5.388671875, -5.2158203125, -5.04296875, -4.8701171875, -4.697265625, -4.5244140625, -4.3515625, -4.1787109375, -4.005859375, -3.8330078125, -3.66015625, -3.4873046875, -3.314453125, -3.1416015625, -2.96875, -2.7958984375, -2.623046875, -2.4501953125, -2.27734375, -2.1044921875, -1.931640625, -1.7587890625, -1.5859375, -1.4130859375, -1.240234375, -1.0673828125, -0.89453125, -0.7216796875, -0.548828125, -0.3759765625, -0.203125, -0.0302734375, 0.142578125, 0.3154296875, 0.48828125, 0.6611328125, 0.833984375, 1.0068359375, 1.1796875, 1.3525390625, 1.525390625, 1.6982421875, 1.87109375, 2.0439453125, 2.216796875, 2.3896484375, 2.5625, 2.7353515625, 2.908203125, 3.0810546875, 3.25390625, 3.4267578125, 3.599609375, 3.7724609375, 3.9453125, 4.1181640625, 4.291015625, 4.4638671875, 4.63671875, 4.8095703125, 4.982421875, 5.1552734375, 5.328125]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 6.0, 6.0, 6.0, 10.0, 11.0, 11.0, 19.0, 24.0, 29.0, 26.0, 35.0, 39.0, 45.0, 62.0, 77.0, 112.0, 183.0, 384.0, 1013.0, 4329.0, 29695.0, 364511.0, 3191695.0, 554636.0, 39774.0, 5269.0, 1112.0, 408.0, 191.0, 136.0, 86.0, 54.0, 54.0, 52.0, 42.0, 26.0, 25.0, 25.0, 15.0, 9.0, 13.0, 11.0, 4.0, 2.0, 3.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-23.921875, -23.1640625, -22.40625, -21.6484375, -20.890625, -20.1328125, -19.375, -18.6171875, -17.859375, -17.1015625, -16.34375, -15.5859375, -14.828125, -14.0703125, -13.3125, -12.5546875, -11.796875, -11.0390625, -10.28125, -9.5234375, -8.765625, -8.0078125, -7.25, -6.4921875, -5.734375, -4.9765625, -4.21875, -3.4609375, -2.703125, -1.9453125, -1.1875, -0.4296875, 0.328125, 1.0859375, 1.84375, 2.6015625, 3.359375, 4.1171875, 4.875, 5.6328125, 6.390625, 7.1484375, 7.90625, 8.6640625, 9.421875, 10.1796875, 10.9375, 11.6953125, 12.453125, 13.2109375, 13.96875, 14.7265625, 15.484375, 16.2421875, 17.0, 17.7578125, 18.515625, 19.2734375, 20.03125, 20.7890625, 21.546875, 22.3046875, 23.0625, 23.8203125, 24.578125]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 5.0, 5.0, 9.0, 15.0, 20.0, 42.0, 49.0, 80.0, 114.0, 184.0, 257.0, 333.0, 450.0, 519.0, 534.0, 406.0, 362.0, 245.0, 138.0, 109.0, 78.0, 42.0, 28.0, 26.0, 13.0, 7.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-27.90625, -27.189208984375, -26.47216796875, -25.755126953125, -25.0380859375, -24.321044921875, -23.60400390625, -22.886962890625, -22.169921875, -21.452880859375, -20.73583984375, -20.018798828125, -19.3017578125, -18.584716796875, -17.86767578125, -17.150634765625, -16.43359375, -15.716552734375, -14.99951171875, -14.282470703125, -13.5654296875, -12.848388671875, -12.13134765625, -11.414306640625, -10.697265625, -9.980224609375, -9.26318359375, -8.546142578125, -7.8291015625, -7.112060546875, -6.39501953125, -5.677978515625, -4.9609375, -4.243896484375, -3.52685546875, -2.809814453125, -2.0927734375, -1.375732421875, -0.65869140625, 0.058349609375, 0.775390625, 1.492431640625, 2.20947265625, 2.926513671875, 3.6435546875, 4.360595703125, 5.07763671875, 5.794677734375, 6.51171875, 7.228759765625, 7.94580078125, 8.662841796875, 9.3798828125, 10.096923828125, 10.81396484375, 11.531005859375, 12.248046875, 12.965087890625, 13.68212890625, 14.399169921875, 15.1162109375, 15.833251953125, 16.55029296875, 17.267333984375, 17.984375]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 3.0, 6.0, 3.0, 8.0, 19.0, 17.0, 39.0, 44.0, 91.0, 162.0, 278.0, 571.0, 1411.0, 4387.0, 21461.0, 173196.0, 2141329.0, 1690668.0, 136287.0, 18007.0, 3877.0, 1271.0, 526.0, 259.0, 161.0, 81.0, 50.0, 34.0, 17.0, 12.0, 5.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.25, -26.0625, -24.875, -23.6875, -22.5, -21.3125, -20.125, -18.9375, -17.75, -16.5625, -15.375, -14.1875, -13.0, -11.8125, -10.625, -9.4375, -8.25, -7.0625, -5.875, -4.6875, -3.5, -2.3125, -1.125, 0.0625, 1.25, 2.4375, 3.625, 4.8125, 6.0, 7.1875, 8.375, 9.5625, 10.75, 11.9375, 13.125, 14.3125, 15.5, 16.6875, 17.875, 19.0625, 20.25, 21.4375, 22.625, 23.8125, 25.0, 26.1875, 27.375, 28.5625, 29.75, 30.9375, 32.125, 33.3125, 34.5, 35.6875, 36.875, 38.0625, 39.25, 40.4375, 41.625, 42.8125, 44.0, 45.1875, 46.375, 47.5625, 48.75]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 14.0, 28.0, 51.0, 102.0, 158.0, 204.0, 174.0, 134.0, 84.0, 43.0, 11.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.90666961669922, -59.23860168457031, -54.57053756713867, -49.90247344970703, -45.234405517578125, -40.56633758544922, -35.89827346801758, -31.230209350585938, -26.56214141845703, -21.894075393676758, -17.226009368896484, -12.557943344116211, -7.8898773193359375, -3.221811294555664, 1.4462547302246094, 6.11431884765625, 10.782386779785156, 15.45045280456543, 20.118518829345703, 24.786584854125977, 29.45465087890625, 34.122718811035156, 38.7907829284668, 43.45884704589844, 48.126914978027344, 52.79498291015625, 57.46304702758789, 62.13111114501953, 66.79917907714844, 71.46724700927734, 76.13531494140625, 80.80337524414062, 85.47145080566406, 90.13951873779297, 94.80758666992188, 99.47564697265625, 104.14371490478516, 108.81178283691406, 113.47984313964844, 118.14791107177734, 122.81597900390625, 127.48404693603516, 132.15211486816406, 136.82017517089844, 141.48825073242188, 146.15631103515625, 150.82437133789062, 155.492431640625, 160.16050720214844, 164.8285675048828, 169.49664306640625, 174.16470336914062, 178.832763671875, 183.50083923339844, 188.1688995361328, 192.83697509765625, 197.50503540039062, 202.173095703125, 206.84117126464844, 211.5092315673828, 216.17730712890625, 220.84536743164062, 225.513427734375, 230.18148803710938, 234.8495635986328]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 7.0, 6.0, 14.0, 8.0, 6.0, 11.0, 13.0, 19.0, 13.0, 21.0, 28.0, 24.0, 23.0, 26.0, 29.0, 37.0, 45.0, 35.0, 38.0, 38.0, 27.0, 42.0, 53.0, 34.0, 47.0, 29.0, 24.0, 41.0, 36.0, 33.0, 26.0, 25.0, 25.0, 16.0, 19.0, 13.0, 14.0, 9.0, 10.0, 7.0, 11.0, 11.0, 2.0, 3.0, 1.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.54420471191406, -46.789825439453125, -45.03544998168945, -43.281070709228516, -41.526695251464844, -39.772315979003906, -38.01793670654297, -36.2635612487793, -34.509185791015625, -32.75480651855469, -31.000431060791016, -29.246051788330078, -27.491676330566406, -25.73729705810547, -23.982919692993164, -22.22854232788086, -20.474163055419922, -18.719785690307617, -16.965408325195312, -15.211030006408691, -13.456652641296387, -11.702275276184082, -9.947896957397461, -8.193519592285156, -6.439142227172852, -4.684764862060547, -2.930387020111084, -1.176009178161621, 0.5783681869506836, 2.3327455520629883, 4.087123870849609, 5.841501235961914, 7.595878601074219, 9.350255966186523, 11.104633331298828, 12.85901165008545, 14.613389015197754, 16.367767333984375, 18.12214469909668, 19.876522064208984, 21.63089942932129, 23.385276794433594, 25.1396541595459, 26.894031524658203, 28.64841079711914, 30.402786254882812, 32.15716552734375, 33.91154479980469, 35.66592025756836, 37.4202995300293, 39.17467498779297, 40.929054260253906, 42.68342971801758, 44.437808990478516, 46.19218444824219, 47.946563720703125, 49.70094299316406, 51.455322265625, 53.20969772338867, 54.96407699584961, 56.71845245361328, 58.47283172607422, 60.227210998535156, 61.98158645629883, 63.7359619140625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 7.0, 10.0, 10.0, 12.0, 7.0, 10.0, 14.0, 17.0, 17.0, 13.0, 15.0, 25.0, 26.0, 33.0, 27.0, 40.0, 32.0, 41.0, 37.0, 34.0, 36.0, 37.0, 45.0, 43.0, 33.0, 33.0, 47.0, 42.0, 34.0, 25.0, 25.0, 21.0, 23.0, 23.0, 24.0, 12.0, 15.0, 10.0, 8.0, 12.0, 8.0, 8.0, 1.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0], "bins": [-6.20703125, -6.02313232421875, -5.8392333984375, -5.65533447265625, -5.471435546875, -5.28753662109375, -5.1036376953125, -4.91973876953125, -4.73583984375, -4.55194091796875, -4.3680419921875, -4.18414306640625, -4.000244140625, -3.81634521484375, -3.6324462890625, -3.44854736328125, -3.2646484375, -3.08074951171875, -2.8968505859375, -2.71295166015625, -2.529052734375, -2.34515380859375, -2.1612548828125, -1.97735595703125, -1.79345703125, -1.60955810546875, -1.4256591796875, -1.24176025390625, -1.057861328125, -0.87396240234375, -0.6900634765625, -0.50616455078125, -0.322265625, -0.13836669921875, 0.0455322265625, 0.22943115234375, 0.413330078125, 0.59722900390625, 0.7811279296875, 0.96502685546875, 1.14892578125, 1.33282470703125, 1.5167236328125, 1.70062255859375, 1.884521484375, 2.06842041015625, 2.2523193359375, 2.43621826171875, 2.6201171875, 2.80401611328125, 2.9879150390625, 3.17181396484375, 3.355712890625, 3.53961181640625, 3.7235107421875, 3.90740966796875, 4.09130859375, 4.27520751953125, 4.4591064453125, 4.64300537109375, 4.826904296875, 5.01080322265625, 5.1947021484375, 5.37860107421875, 5.5625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 9.0, 11.0, 9.0, 30.0, 33.0, 59.0, 66.0, 131.0, 193.0, 291.0, 472.0, 707.0, 1113.0, 1654.0, 2678.0, 4171.0, 6625.0, 10367.0, 16470.0, 26652.0, 43215.0, 69997.0, 112461.0, 163952.0, 188763.0, 145788.0, 96375.0, 59243.0, 36256.0, 22502.0, 13937.0, 8909.0, 5588.0, 3496.0, 2237.0, 1445.0, 926.0, 596.0, 438.0, 244.0, 162.0, 97.0, 62.0, 50.0, 32.0, 20.0, 13.0, 8.0, 9.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.51171875, -0.49588775634765625, -0.4800567626953125, -0.46422576904296875, -0.448394775390625, -0.43256378173828125, -0.4167327880859375, -0.40090179443359375, -0.38507080078125, -0.36923980712890625, -0.3534088134765625, -0.33757781982421875, -0.321746826171875, -0.30591583251953125, -0.2900848388671875, -0.27425384521484375, -0.2584228515625, -0.24259185791015625, -0.2267608642578125, -0.21092987060546875, -0.195098876953125, -0.17926788330078125, -0.1634368896484375, -0.14760589599609375, -0.13177490234375, -0.11594390869140625, -0.1001129150390625, -0.08428192138671875, -0.068450927734375, -0.05261993408203125, -0.0367889404296875, -0.02095794677734375, -0.005126953125, 0.01070404052734375, 0.0265350341796875, 0.04236602783203125, 0.058197021484375, 0.07402801513671875, 0.0898590087890625, 0.10569000244140625, 0.12152099609375, 0.13735198974609375, 0.1531829833984375, 0.16901397705078125, 0.184844970703125, 0.20067596435546875, 0.2165069580078125, 0.23233795166015625, 0.2481689453125, 0.26399993896484375, 0.2798309326171875, 0.29566192626953125, 0.311492919921875, 0.32732391357421875, 0.3431549072265625, 0.35898590087890625, 0.37481689453125, 0.39064788818359375, 0.4064788818359375, 0.42230987548828125, 0.438140869140625, 0.45397186279296875, 0.4698028564453125, 0.48563385009765625, 0.50146484375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 2.0, 7.0, 7.0, 16.0, 15.0, 11.0, 22.0, 27.0, 20.0, 31.0, 29.0, 28.0, 30.0, 50.0, 57.0, 62.0, 47.0, 1084.0, 48.0, 47.0, 41.0, 41.0, 33.0, 39.0, 37.0, 29.0, 28.0, 28.0, 24.0, 18.0, 18.0, 13.0, 13.0, 3.0, 9.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.28515625, -5.13720703125, -4.9892578125, -4.84130859375, -4.693359375, -4.54541015625, -4.3974609375, -4.24951171875, -4.1015625, -3.95361328125, -3.8056640625, -3.65771484375, -3.509765625, -3.36181640625, -3.2138671875, -3.06591796875, -2.91796875, -2.77001953125, -2.6220703125, -2.47412109375, -2.326171875, -2.17822265625, -2.0302734375, -1.88232421875, -1.734375, -1.58642578125, -1.4384765625, -1.29052734375, -1.142578125, -0.99462890625, -0.8466796875, -0.69873046875, -0.55078125, -0.40283203125, -0.2548828125, -0.10693359375, 0.041015625, 0.18896484375, 0.3369140625, 0.48486328125, 0.6328125, 0.78076171875, 0.9287109375, 1.07666015625, 1.224609375, 1.37255859375, 1.5205078125, 1.66845703125, 1.81640625, 1.96435546875, 2.1123046875, 2.26025390625, 2.408203125, 2.55615234375, 2.7041015625, 2.85205078125, 3.0, 3.14794921875, 3.2958984375, 3.44384765625, 3.591796875, 3.73974609375, 3.8876953125, 4.03564453125, 4.18359375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 7.0, 17.0, 20.0, 24.0, 40.0, 48.0, 85.0, 152.0, 210.0, 333.0, 495.0, 700.0, 1085.0, 1739.0, 2683.0, 4066.0, 6310.0, 9898.0, 15497.0, 23909.0, 36335.0, 54736.0, 79793.0, 111444.0, 164329.0, 1166204.0, 126609.0, 95001.0, 66759.0, 44392.0, 29457.0, 19282.0, 12450.0, 8052.0, 5173.0, 3382.0, 2260.0, 1463.0, 931.0, 594.0, 435.0, 247.0, 173.0, 124.0, 70.0, 45.0, 30.0, 18.0, 11.0, 9.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.30859375, -0.2990264892578125, -0.289459228515625, -0.2798919677734375, -0.27032470703125, -0.2607574462890625, -0.251190185546875, -0.2416229248046875, -0.2320556640625, -0.2224884033203125, -0.212921142578125, -0.2033538818359375, -0.19378662109375, -0.1842193603515625, -0.174652099609375, -0.1650848388671875, -0.155517578125, -0.1459503173828125, -0.136383056640625, -0.1268157958984375, -0.11724853515625, -0.1076812744140625, -0.098114013671875, -0.0885467529296875, -0.0789794921875, -0.0694122314453125, -0.059844970703125, -0.0502777099609375, -0.04071044921875, -0.0311431884765625, -0.021575927734375, -0.0120086669921875, -0.00244140625, 0.0071258544921875, 0.016693115234375, 0.0262603759765625, 0.03582763671875, 0.0453948974609375, 0.054962158203125, 0.0645294189453125, 0.0740966796875, 0.0836639404296875, 0.093231201171875, 0.1027984619140625, 0.11236572265625, 0.1219329833984375, 0.131500244140625, 0.1410675048828125, 0.150634765625, 0.1602020263671875, 0.169769287109375, 0.1793365478515625, 0.18890380859375, 0.1984710693359375, 0.208038330078125, 0.2176055908203125, 0.2271728515625, 0.2367401123046875, 0.246307373046875, 0.2558746337890625, 0.26544189453125, 0.2750091552734375, 0.284576416015625, 0.2941436767578125, 0.3037109375]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 2.0, 8.0, 4.0, 9.0, 4.0, 6.0, 8.0, 13.0, 19.0, 18.0, 19.0, 20.0, 31.0, 32.0, 36.0, 44.0, 54.0, 52.0, 51.0, 50.0, 71.0, 70.0, 44.0, 56.0, 45.0, 38.0, 37.0, 23.0, 23.0, 23.0, 17.0, 12.0, 9.0, 7.0, 10.0, 7.0, 9.0, 5.0, 4.0, 3.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0], "bins": [-0.006557464599609375, -0.006365478038787842, -0.006173491477966309, -0.005981504917144775, -0.005789518356323242, -0.005597531795501709, -0.005405545234680176, -0.005213558673858643, -0.005021572113037109, -0.004829585552215576, -0.004637598991394043, -0.00444561243057251, -0.0042536258697509766, -0.004061639308929443, -0.00386965274810791, -0.003677666187286377, -0.0034856796264648438, -0.0032936930656433105, -0.0031017065048217773, -0.002909719944000244, -0.002717733383178711, -0.0025257468223571777, -0.0023337602615356445, -0.0021417737007141113, -0.0019497871398925781, -0.001757800579071045, -0.0015658140182495117, -0.0013738274574279785, -0.0011818408966064453, -0.0009898543357849121, -0.0007978677749633789, -0.0006058812141418457, -0.0004138946533203125, -0.0002219080924987793, -2.9921531677246094e-05, 0.0001620650291442871, 0.0003540515899658203, 0.0005460381507873535, 0.0007380247116088867, 0.0009300112724304199, 0.0011219978332519531, 0.0013139843940734863, 0.0015059709548950195, 0.0016979575157165527, 0.001889944076538086, 0.002081930637359619, 0.0022739171981811523, 0.0024659037590026855, 0.0026578903198242188, 0.002849876880645752, 0.003041863441467285, 0.0032338500022888184, 0.0034258365631103516, 0.0036178231239318848, 0.003809809684753418, 0.004001796245574951, 0.004193782806396484, 0.004385769367218018, 0.004577755928039551, 0.004769742488861084, 0.004961729049682617, 0.00515371561050415, 0.005345702171325684, 0.005537688732147217, 0.00572967529296875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 6.0, 6.0, 3.0, 8.0, 9.0, 12.0, 21.0, 23.0, 22.0, 32.0, 38.0, 49.0, 83.0, 111.0, 135.0, 194.0, 310.0, 720.0, 15596.0, 1003730.0, 25527.0, 806.0, 320.0, 201.0, 140.0, 95.0, 73.0, 61.0, 43.0, 28.0, 35.0, 19.0, 22.0, 16.0, 13.0, 14.0, 5.0, 12.0, 6.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.111083984375, -0.10732460021972656, -0.10356521606445312, -0.09980583190917969, -0.09604644775390625, -0.09228706359863281, -0.08852767944335938, -0.08476829528808594, -0.0810089111328125, -0.07724952697753906, -0.07349014282226562, -0.06973075866699219, -0.06597137451171875, -0.06221199035644531, -0.058452606201171875, -0.05469322204589844, -0.050933837890625, -0.04717445373535156, -0.043415069580078125, -0.03965568542480469, -0.03589630126953125, -0.03213691711425781, -0.028377532958984375, -0.024618148803710938, -0.0208587646484375, -0.017099380493164062, -0.013339996337890625, -0.009580612182617188, -0.00582122802734375, -0.0020618438720703125, 0.001697540283203125, 0.0054569244384765625, 0.00921630859375, 0.012975692749023438, 0.016735076904296875, 0.020494461059570312, 0.02425384521484375, 0.028013229370117188, 0.031772613525390625, 0.03553199768066406, 0.0392913818359375, 0.04305076599121094, 0.046810150146484375, 0.05056953430175781, 0.05432891845703125, 0.05808830261230469, 0.061847686767578125, 0.06560707092285156, 0.069366455078125, 0.07312583923339844, 0.07688522338867188, 0.08064460754394531, 0.08440399169921875, 0.08816337585449219, 0.09192276000976562, 0.09568214416503906, 0.0994415283203125, 0.10320091247558594, 0.10696029663085938, 0.11071968078613281, 0.11447906494140625, 0.11823844909667969, 0.12199783325195312, 0.12575721740722656, 0.1295166015625]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [8.0, 201.0, 700.0, 106.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0024994623381644487, -0.0014556304085999727, -0.0004117984790354967, 0.0006320334505289793, 0.0016758653800934553, 0.0027196973096579313, 0.0037635292392224073, 0.004807361401617527, 0.005851193331182003, 0.006895025260746479, 0.007938857190310955, 0.008982689119875431, 0.010026521049439907, 0.011070352979004383, 0.012114184908568859, 0.013158016838133335, 0.014201848767697811, 0.015245680697262287, 0.016289513558149338, 0.017333343625068665, 0.01837717741727829, 0.019421007484197617, 0.020464841276407242, 0.02150867134332657, 0.022552503272891045, 0.02359633520245552, 0.024640167132019997, 0.025683999061584473, 0.02672783099114895, 0.027771662920713425, 0.0288154948502779, 0.029859326779842377, 0.030903160572052002, 0.03194699436426163, 0.032990824431180954, 0.03403465449810028, 0.035078488290309906, 0.03612232208251953, 0.03716615214943886, 0.038209982216358185, 0.03925381600856781, 0.040297649800777435, 0.04134147986769676, 0.04238530993461609, 0.043429143726825714, 0.04447297751903534, 0.045516807585954666, 0.04656063765287399, 0.04760447144508362, 0.04864830523729324, 0.04969213530421257, 0.0507359653711319, 0.05177979916334152, 0.05282363295555115, 0.053867463022470474, 0.0549112930893898, 0.055955126881599426, 0.05699896067380905, 0.05804279074072838, 0.059086620807647705, 0.06013045459985733, 0.061174288392066956, 0.06221811845898628, 0.06326194852590561, 0.06430578231811523]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 4.0, 4.0, 6.0, 8.0, 9.0, 7.0, 11.0, 17.0, 24.0, 20.0, 17.0, 33.0, 23.0, 32.0, 39.0, 30.0, 43.0, 39.0, 53.0, 51.0, 38.0, 43.0, 43.0, 44.0, 48.0, 44.0, 41.0, 36.0, 33.0, 22.0, 19.0, 21.0, 25.0, 14.0, 14.0, 11.0, 14.0, 5.0, 6.0, 7.0, 6.0, 0.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.00412142276763916, -0.004003624431788921, -0.0038858260959386826, -0.0037680277600884438, -0.003650229424238205, -0.003532431088387966, -0.0034146327525377274, -0.0032968344166874886, -0.0031790360808372498, -0.003061237744987011, -0.002943439409136772, -0.0028256410732865334, -0.0027078427374362946, -0.0025900444015860558, -0.002472246065735817, -0.002354447729885578, -0.0022366493940353394, -0.0021188510581851006, -0.0020010527223348618, -0.001883254386484623, -0.0017654560506343842, -0.0016476577147841454, -0.0015298593789339066, -0.0014120610430836678, -0.001294262707233429, -0.0011764643713831902, -0.0010586660355329514, -0.0009408676996827126, -0.0008230693638324738, -0.000705271027982235, -0.0005874726921319962, -0.00046967435628175735, -0.00035187602043151855, -0.00023407768458127975, -0.00011627934873104095, 1.5189871191978455e-06, 0.00011931732296943665, 0.00023711565881967545, 0.00035491399466991425, 0.00047271233052015305, 0.0005905106663703918, 0.0007083090022206306, 0.0008261073380708694, 0.0009439056739211082, 0.001061704009771347, 0.0011795023456215858, 0.0012973006814718246, 0.0014150990173220634, 0.0015328973531723022, 0.001650695689022541, 0.0017684940248727798, 0.0018862923607230186, 0.0020040906965732574, 0.0021218890324234962, 0.002239687368273735, 0.002357485704123974, 0.0024752840399742126, 0.0025930823758244514, 0.0027108807116746902, 0.002828679047524929, 0.002946477383375168, 0.0030642757192254066, 0.0031820740550756454, 0.0032998723909258842, 0.003417670726776123]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 7.0, 10.0, 10.0, 12.0, 7.0, 10.0, 14.0, 17.0, 17.0, 13.0, 15.0, 25.0, 26.0, 33.0, 27.0, 40.0, 32.0, 41.0, 37.0, 34.0, 36.0, 37.0, 45.0, 43.0, 33.0, 33.0, 47.0, 42.0, 34.0, 25.0, 25.0, 21.0, 23.0, 23.0, 24.0, 12.0, 15.0, 10.0, 8.0, 12.0, 8.0, 8.0, 1.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0], "bins": [-6.20703125, -6.02313232421875, -5.8392333984375, -5.65533447265625, -5.471435546875, -5.28753662109375, -5.1036376953125, -4.91973876953125, -4.73583984375, -4.55194091796875, -4.3680419921875, -4.18414306640625, -4.000244140625, -3.81634521484375, -3.6324462890625, -3.44854736328125, -3.2646484375, -3.08074951171875, -2.8968505859375, -2.71295166015625, -2.529052734375, -2.34515380859375, -2.1612548828125, -1.97735595703125, -1.79345703125, -1.60955810546875, -1.4256591796875, -1.24176025390625, -1.057861328125, -0.87396240234375, -0.6900634765625, -0.50616455078125, -0.322265625, -0.13836669921875, 0.0455322265625, 0.22943115234375, 0.413330078125, 0.59722900390625, 0.7811279296875, 0.96502685546875, 1.14892578125, 1.33282470703125, 1.5167236328125, 1.70062255859375, 1.884521484375, 2.06842041015625, 2.2523193359375, 2.43621826171875, 2.6201171875, 2.80401611328125, 2.9879150390625, 3.17181396484375, 3.355712890625, 3.53961181640625, 3.7235107421875, 3.90740966796875, 4.09130859375, 4.27520751953125, 4.4591064453125, 4.64300537109375, 4.826904296875, 5.01080322265625, 5.1947021484375, 5.37860107421875, 5.5625]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 5.0, 3.0, 0.0, 6.0, 9.0, 13.0, 15.0, 12.0, 19.0, 29.0, 39.0, 62.0, 99.0, 155.0, 231.0, 287.0, 528.0, 828.0, 1297.0, 2196.0, 3696.0, 6382.0, 11262.0, 21006.0, 41809.0, 87378.0, 210537.0, 350881.0, 163258.0, 70927.0, 34391.0, 17742.0, 9644.0, 5400.0, 3242.0, 1905.0, 1155.0, 736.0, 441.0, 312.0, 204.0, 136.0, 82.0, 69.0, 43.0, 27.0, 12.0, 29.0, 8.0, 2.0, 5.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0], "bins": [-4.02734375, -3.89471435546875, -3.7620849609375, -3.62945556640625, -3.496826171875, -3.36419677734375, -3.2315673828125, -3.09893798828125, -2.96630859375, -2.83367919921875, -2.7010498046875, -2.56842041015625, -2.435791015625, -2.30316162109375, -2.1705322265625, -2.03790283203125, -1.9052734375, -1.77264404296875, -1.6400146484375, -1.50738525390625, -1.374755859375, -1.24212646484375, -1.1094970703125, -0.97686767578125, -0.84423828125, -0.71160888671875, -0.5789794921875, -0.44635009765625, -0.313720703125, -0.18109130859375, -0.0484619140625, 0.08416748046875, 0.216796875, 0.34942626953125, 0.4820556640625, 0.61468505859375, 0.747314453125, 0.87994384765625, 1.0125732421875, 1.14520263671875, 1.27783203125, 1.41046142578125, 1.5430908203125, 1.67572021484375, 1.808349609375, 1.94097900390625, 2.0736083984375, 2.20623779296875, 2.3388671875, 2.47149658203125, 2.6041259765625, 2.73675537109375, 2.869384765625, 3.00201416015625, 3.1346435546875, 3.26727294921875, 3.39990234375, 3.53253173828125, 3.6651611328125, 3.79779052734375, 3.930419921875, 4.06304931640625, 4.1956787109375, 4.32830810546875, 4.4609375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 5.0, 5.0, 6.0, 2.0, 6.0, 8.0, 7.0, 5.0, 8.0, 15.0, 18.0, 20.0, 25.0, 22.0, 28.0, 28.0, 29.0, 34.0, 49.0, 52.0, 43.0, 78.0, 211.0, 1670.0, 157.0, 86.0, 63.0, 37.0, 48.0, 34.0, 39.0, 29.0, 29.0, 25.0, 20.0, 15.0, 24.0, 15.0, 13.0, 15.0, 10.0, 8.0, 2.0, 6.0, 2.0, 1.0, 1.0, 3.0, 0.0, 3.0, 0.0, 2.0, 2.0], "bins": [-24.578125, -23.864501953125, -23.15087890625, -22.437255859375, -21.7236328125, -21.010009765625, -20.29638671875, -19.582763671875, -18.869140625, -18.155517578125, -17.44189453125, -16.728271484375, -16.0146484375, -15.301025390625, -14.58740234375, -13.873779296875, -13.16015625, -12.446533203125, -11.73291015625, -11.019287109375, -10.3056640625, -9.592041015625, -8.87841796875, -8.164794921875, -7.451171875, -6.737548828125, -6.02392578125, -5.310302734375, -4.5966796875, -3.883056640625, -3.16943359375, -2.455810546875, -1.7421875, -1.028564453125, -0.31494140625, 0.398681640625, 1.1123046875, 1.825927734375, 2.53955078125, 3.253173828125, 3.966796875, 4.680419921875, 5.39404296875, 6.107666015625, 6.8212890625, 7.534912109375, 8.24853515625, 8.962158203125, 9.67578125, 10.389404296875, 11.10302734375, 11.816650390625, 12.5302734375, 13.243896484375, 13.95751953125, 14.671142578125, 15.384765625, 16.098388671875, 16.81201171875, 17.525634765625, 18.2392578125, 18.952880859375, 19.66650390625, 20.380126953125, 21.09375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 2.0, 3.0, 4.0, 10.0, 15.0, 11.0, 13.0, 29.0, 16.0, 21.0, 30.0, 41.0, 48.0, 55.0, 90.0, 145.0, 246.0, 520.0, 1470.0, 10109.0, 1988976.0, 1132684.0, 8590.0, 1348.0, 492.0, 230.0, 140.0, 81.0, 54.0, 52.0, 22.0, 35.0, 25.0, 20.0, 12.0, 15.0, 9.0, 9.0, 6.0, 8.0, 3.0, 5.0, 2.0, 8.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.96875, -34.7685546875, -33.568359375, -32.3681640625, -31.16796875, -29.9677734375, -28.767578125, -27.5673828125, -26.3671875, -25.1669921875, -23.966796875, -22.7666015625, -21.56640625, -20.3662109375, -19.166015625, -17.9658203125, -16.765625, -15.5654296875, -14.365234375, -13.1650390625, -11.96484375, -10.7646484375, -9.564453125, -8.3642578125, -7.1640625, -5.9638671875, -4.763671875, -3.5634765625, -2.36328125, -1.1630859375, 0.037109375, 1.2373046875, 2.4375, 3.6376953125, 4.837890625, 6.0380859375, 7.23828125, 8.4384765625, 9.638671875, 10.8388671875, 12.0390625, 13.2392578125, 14.439453125, 15.6396484375, 16.83984375, 18.0400390625, 19.240234375, 20.4404296875, 21.640625, 22.8408203125, 24.041015625, 25.2412109375, 26.44140625, 27.6416015625, 28.841796875, 30.0419921875, 31.2421875, 32.4423828125, 33.642578125, 34.8427734375, 36.04296875, 37.2431640625, 38.443359375, 39.6435546875, 40.84375]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 13.0, 56.0, 178.0, 318.0, 300.0, 97.0, 43.0, 7.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-87.60333251953125, -85.28251647949219, -82.9616928100586, -80.640869140625, -78.32005310058594, -75.99923706054688, -73.67841339111328, -71.35758972167969, -69.03677368164062, -66.71595764160156, -64.39513397216797, -62.07431411743164, -59.75349426269531, -57.432674407958984, -55.111854553222656, -52.79103469848633, -50.47021484375, -48.14939498901367, -45.828575134277344, -43.507755279541016, -41.18693542480469, -38.86611557006836, -36.54529571533203, -34.2244758605957, -31.903656005859375, -29.582836151123047, -27.26201629638672, -24.94119644165039, -22.620376586914062, -20.299556732177734, -17.978736877441406, -15.657917022705078, -13.337089538574219, -11.01626968383789, -8.695449829101562, -6.374629974365234, -4.053810119628906, -1.7329902648925781, 0.58782958984375, 2.908649444580078, 5.229469299316406, 7.550289154052734, 9.871109008789062, 12.19192886352539, 14.512748718261719, 16.833568572998047, 19.154388427734375, 21.475208282470703, 23.79602813720703, 26.11684799194336, 28.437667846679688, 30.758487701416016, 33.079307556152344, 35.40012741088867, 37.720947265625, 40.04176712036133, 42.362586975097656, 44.683406829833984, 47.00422668457031, 49.32504653930664, 51.64586639404297, 53.9666862487793, 56.287506103515625, 58.60832595825195, 60.92914581298828]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 5.0, 3.0, 4.0, 8.0, 6.0, 10.0, 14.0, 16.0, 25.0, 28.0, 25.0, 35.0, 40.0, 28.0, 32.0, 32.0, 36.0, 53.0, 47.0, 46.0, 41.0, 52.0, 49.0, 36.0, 38.0, 30.0, 35.0, 28.0, 30.0, 28.0, 25.0, 23.0, 16.0, 18.0, 17.0, 8.0, 8.0, 11.0, 5.0, 8.0, 4.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-78.84596252441406, -76.44082641601562, -74.03568267822266, -71.63054656982422, -69.22541046142578, -66.82026672363281, -64.41513061523438, -62.00999450683594, -59.604854583740234, -57.19971466064453, -54.794578552246094, -52.38943862915039, -49.98429870605469, -47.57916259765625, -45.17402267456055, -42.768882751464844, -40.363746643066406, -37.9586067199707, -35.553470611572266, -33.14833068847656, -30.743192672729492, -28.338054656982422, -25.93291473388672, -23.52777671813965, -21.122638702392578, -18.717500686645508, -16.312362670898438, -13.907222747802734, -11.502084732055664, -9.096946716308594, -6.691807746887207, -4.28666877746582, -1.8815383911132812, 0.5236001014709473, 2.928738594055176, 5.333877086639404, 7.739015579223633, 10.144153594970703, 12.54929256439209, 14.954431533813477, 17.359569549560547, 19.764707565307617, 22.169845581054688, 24.57498550415039, 26.98012351989746, 29.38526153564453, 31.790401458740234, 34.19554138183594, 36.600677490234375, 39.00581741333008, 41.410953521728516, 43.81609344482422, 46.221229553222656, 48.62636947631836, 51.03150939941406, 53.4366455078125, 55.8417854309082, 58.246925354003906, 60.652061462402344, 63.05720138549805, 65.46234130859375, 67.86747741699219, 70.27261352539062, 72.6777572631836, 75.08289337158203]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 11.0, 12.0, 9.0, 10.0, 14.0, 13.0, 14.0, 14.0, 23.0, 27.0, 21.0, 24.0, 21.0, 47.0, 37.0, 40.0, 40.0, 41.0, 33.0, 47.0, 43.0, 39.0, 42.0, 44.0, 37.0, 47.0, 30.0, 36.0, 20.0, 30.0, 18.0, 21.0, 17.0, 13.0, 16.0, 8.0, 8.0, 13.0, 4.0, 3.0, 4.0, 6.0, 3.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-6.625, -6.423095703125, -6.22119140625, -6.019287109375, -5.8173828125, -5.615478515625, -5.41357421875, -5.211669921875, -5.009765625, -4.807861328125, -4.60595703125, -4.404052734375, -4.2021484375, -4.000244140625, -3.79833984375, -3.596435546875, -3.39453125, -3.192626953125, -2.99072265625, -2.788818359375, -2.5869140625, -2.385009765625, -2.18310546875, -1.981201171875, -1.779296875, -1.577392578125, -1.37548828125, -1.173583984375, -0.9716796875, -0.769775390625, -0.56787109375, -0.365966796875, -0.1640625, 0.037841796875, 0.23974609375, 0.441650390625, 0.6435546875, 0.845458984375, 1.04736328125, 1.249267578125, 1.451171875, 1.653076171875, 1.85498046875, 2.056884765625, 2.2587890625, 2.460693359375, 2.66259765625, 2.864501953125, 3.06640625, 3.268310546875, 3.47021484375, 3.672119140625, 3.8740234375, 4.075927734375, 4.27783203125, 4.479736328125, 4.681640625, 4.883544921875, 5.08544921875, 5.287353515625, 5.4892578125, 5.691162109375, 5.89306640625, 6.094970703125, 6.296875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 0.0, 8.0, 2.0, 4.0, 8.0, 9.0, 8.0, 9.0, 17.0, 12.0, 22.0, 13.0, 27.0, 22.0, 30.0, 27.0, 62.0, 79.0, 143.0, 302.0, 994.0, 5362.0, 50413.0, 1202341.0, 2793520.0, 127477.0, 10765.0, 1634.0, 406.0, 165.0, 94.0, 63.0, 49.0, 37.0, 32.0, 18.0, 15.0, 19.0, 19.0, 12.0, 9.0, 6.0, 4.0, 8.0, 4.0, 3.0, 4.0, 3.0, 4.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-28.234375, -27.356689453125, -26.47900390625, -25.601318359375, -24.7236328125, -23.845947265625, -22.96826171875, -22.090576171875, -21.212890625, -20.335205078125, -19.45751953125, -18.579833984375, -17.7021484375, -16.824462890625, -15.94677734375, -15.069091796875, -14.19140625, -13.313720703125, -12.43603515625, -11.558349609375, -10.6806640625, -9.802978515625, -8.92529296875, -8.047607421875, -7.169921875, -6.292236328125, -5.41455078125, -4.536865234375, -3.6591796875, -2.781494140625, -1.90380859375, -1.026123046875, -0.1484375, 0.729248046875, 1.60693359375, 2.484619140625, 3.3623046875, 4.239990234375, 5.11767578125, 5.995361328125, 6.873046875, 7.750732421875, 8.62841796875, 9.506103515625, 10.3837890625, 11.261474609375, 12.13916015625, 13.016845703125, 13.89453125, 14.772216796875, 15.64990234375, 16.527587890625, 17.4052734375, 18.282958984375, 19.16064453125, 20.038330078125, 20.916015625, 21.793701171875, 22.67138671875, 23.549072265625, 24.4267578125, 25.304443359375, 26.18212890625, 27.059814453125, 27.9375]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 11.0, 5.0, 9.0, 26.0, 24.0, 49.0, 71.0, 114.0, 169.0, 324.0, 443.0, 627.0, 673.0, 521.0, 374.0, 239.0, 155.0, 90.0, 65.0, 44.0, 21.0, 8.0, 10.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-34.71875, -33.89697265625, -33.0751953125, -32.25341796875, -31.431640625, -30.60986328125, -29.7880859375, -28.96630859375, -28.14453125, -27.32275390625, -26.5009765625, -25.67919921875, -24.857421875, -24.03564453125, -23.2138671875, -22.39208984375, -21.5703125, -20.74853515625, -19.9267578125, -19.10498046875, -18.283203125, -17.46142578125, -16.6396484375, -15.81787109375, -14.99609375, -14.17431640625, -13.3525390625, -12.53076171875, -11.708984375, -10.88720703125, -10.0654296875, -9.24365234375, -8.421875, -7.60009765625, -6.7783203125, -5.95654296875, -5.134765625, -4.31298828125, -3.4912109375, -2.66943359375, -1.84765625, -1.02587890625, -0.2041015625, 0.61767578125, 1.439453125, 2.26123046875, 3.0830078125, 3.90478515625, 4.7265625, 5.54833984375, 6.3701171875, 7.19189453125, 8.013671875, 8.83544921875, 9.6572265625, 10.47900390625, 11.30078125, 12.12255859375, 12.9443359375, 13.76611328125, 14.587890625, 15.40966796875, 16.2314453125, 17.05322265625, 17.875]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 7.0, 12.0, 24.0, 51.0, 65.0, 119.0, 185.0, 382.0, 901.0, 3887.0, 47458.0, 1947064.0, 2137218.0, 51071.0, 4039.0, 962.0, 380.0, 204.0, 91.0, 73.0, 28.0, 23.0, 21.0, 7.0, 6.0, 6.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.15625, -39.28662109375, -37.4169921875, -35.54736328125, -33.677734375, -31.80810546875, -29.9384765625, -28.06884765625, -26.19921875, -24.32958984375, -22.4599609375, -20.59033203125, -18.720703125, -16.85107421875, -14.9814453125, -13.11181640625, -11.2421875, -9.37255859375, -7.5029296875, -5.63330078125, -3.763671875, -1.89404296875, -0.0244140625, 1.84521484375, 3.71484375, 5.58447265625, 7.4541015625, 9.32373046875, 11.193359375, 13.06298828125, 14.9326171875, 16.80224609375, 18.671875, 20.54150390625, 22.4111328125, 24.28076171875, 26.150390625, 28.02001953125, 29.8896484375, 31.75927734375, 33.62890625, 35.49853515625, 37.3681640625, 39.23779296875, 41.107421875, 42.97705078125, 44.8466796875, 46.71630859375, 48.5859375, 50.45556640625, 52.3251953125, 54.19482421875, 56.064453125, 57.93408203125, 59.8037109375, 61.67333984375, 63.54296875, 65.41259765625, 67.2822265625, 69.15185546875, 71.021484375, 72.89111328125, 74.7607421875, 76.63037109375, 78.5]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 7.0, 22.0, 31.0, 34.0, 55.0, 74.0, 93.0, 127.0, 113.0, 117.0, 111.0, 84.0, 48.0, 37.0, 29.0, 17.0, 9.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.38624954223633, -59.204132080078125, -56.02201461791992, -52.83989715576172, -49.65777587890625, -46.47566223144531, -43.293540954589844, -40.11142349243164, -36.92930603027344, -33.747188568115234, -30.56507110595703, -27.382951736450195, -24.200834274291992, -21.01871681213379, -17.836597442626953, -14.65447998046875, -11.472362518310547, -8.290245056152344, -5.108126640319824, -1.9260082244873047, 1.2561092376708984, 4.438226699829102, 7.6203460693359375, 10.80246353149414, 13.984580993652344, 17.166698455810547, 20.34881591796875, 23.530935287475586, 26.71305274963379, 29.895170211791992, 33.07728958129883, 36.25940704345703, 39.4415283203125, 42.6236457824707, 45.805763244628906, 48.987884521484375, 52.16999816894531, 55.35211944580078, 58.534236907958984, 61.71635437011719, 64.89846801757812, 68.0805892944336, 71.26270294189453, 74.44482421875, 77.62693786621094, 80.8090591430664, 83.99118041992188, 87.17329406738281, 90.35541534423828, 93.53753662109375, 96.71965026855469, 99.90177154541016, 103.0838851928711, 106.26600646972656, 109.4481201171875, 112.63024139404297, 115.81236267089844, 118.9944839477539, 122.17659759521484, 125.35871887207031, 128.54083251953125, 131.7229461669922, 134.9050750732422, 138.08718872070312, 141.26930236816406]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 1.0, 2.0, 2.0, 4.0, 12.0, 5.0, 8.0, 11.0, 19.0, 17.0, 23.0, 26.0, 22.0, 33.0, 33.0, 35.0, 40.0, 45.0, 32.0, 50.0, 40.0, 38.0, 43.0, 38.0, 49.0, 48.0, 49.0, 40.0, 44.0, 34.0, 31.0, 20.0, 18.0, 12.0, 21.0, 18.0, 14.0, 8.0, 6.0, 2.0, 7.0, 4.0, 4.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.27806091308594, -59.15653991699219, -57.03501892089844, -54.91349792480469, -52.79197692871094, -50.67045593261719, -48.54893112182617, -46.42741012573242, -44.30588912963867, -42.18436813354492, -40.06284713745117, -37.94132614135742, -35.819801330566406, -33.698280334472656, -31.576759338378906, -29.455238342285156, -27.333717346191406, -25.212196350097656, -23.090675354003906, -20.969152450561523, -18.847631454467773, -16.726110458374023, -14.604588508605957, -12.48306655883789, -10.36154556274414, -8.24002456665039, -6.118502616882324, -3.996981143951416, -1.8754596710205078, 0.2460613250732422, 2.3675832748413086, 4.489105224609375, 6.610626220703125, 8.732147216796875, 10.853669166564941, 12.975191116333008, 15.096712112426758, 17.218233108520508, 19.33975601196289, 21.46127700805664, 23.58279800415039, 25.70431900024414, 27.82583999633789, 29.947362899780273, 32.068885803222656, 34.190406799316406, 36.311927795410156, 38.433448791503906, 40.554969787597656, 42.676490783691406, 44.798011779785156, 46.919532775878906, 49.041053771972656, 51.162574768066406, 53.28409957885742, 55.40562057495117, 57.52714157104492, 59.64866256713867, 61.77018356323242, 63.89170455932617, 66.01322937011719, 68.13475036621094, 70.25627136230469, 72.37779235839844, 74.49931335449219]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 3.0, 11.0, 5.0, 12.0, 6.0, 11.0, 17.0, 13.0, 19.0, 10.0, 19.0, 21.0, 29.0, 38.0, 31.0, 49.0, 31.0, 31.0, 41.0, 53.0, 49.0, 37.0, 43.0, 45.0, 44.0, 44.0, 34.0, 44.0, 22.0, 41.0, 24.0, 12.0, 20.0, 20.0, 18.0, 13.0, 14.0, 5.0, 3.0, 6.0, 5.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-7.3515625, -7.12908935546875, -6.9066162109375, -6.68414306640625, -6.461669921875, -6.23919677734375, -6.0167236328125, -5.79425048828125, -5.57177734375, -5.34930419921875, -5.1268310546875, -4.90435791015625, -4.681884765625, -4.45941162109375, -4.2369384765625, -4.01446533203125, -3.7919921875, -3.56951904296875, -3.3470458984375, -3.12457275390625, -2.902099609375, -2.67962646484375, -2.4571533203125, -2.23468017578125, -2.01220703125, -1.78973388671875, -1.5672607421875, -1.34478759765625, -1.122314453125, -0.89984130859375, -0.6773681640625, -0.45489501953125, -0.232421875, -0.00994873046875, 0.2125244140625, 0.43499755859375, 0.657470703125, 0.87994384765625, 1.1024169921875, 1.32489013671875, 1.54736328125, 1.76983642578125, 1.9923095703125, 2.21478271484375, 2.437255859375, 2.65972900390625, 2.8822021484375, 3.10467529296875, 3.3271484375, 3.54962158203125, 3.7720947265625, 3.99456787109375, 4.217041015625, 4.43951416015625, 4.6619873046875, 4.88446044921875, 5.10693359375, 5.32940673828125, 5.5518798828125, 5.77435302734375, 5.996826171875, 6.21929931640625, 6.4417724609375, 6.66424560546875, 6.88671875]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 5.0, 3.0, 7.0, 12.0, 18.0, 20.0, 34.0, 56.0, 77.0, 101.0, 151.0, 226.0, 298.0, 435.0, 629.0, 986.0, 1307.0, 2054.0, 2942.0, 4247.0, 6533.0, 9684.0, 14507.0, 22290.0, 34438.0, 52621.0, 81522.0, 123308.0, 164869.0, 164973.0, 123337.0, 81960.0, 53355.0, 34516.0, 22324.0, 14705.0, 9612.0, 6516.0, 4398.0, 2969.0, 1988.0, 1314.0, 964.0, 697.0, 485.0, 350.0, 219.0, 142.0, 119.0, 77.0, 49.0, 42.0, 33.0, 16.0, 10.0, 7.0, 4.0, 4.0, 5.0, 1.0, 2.0], "bins": [-0.49365234375, -0.4782295227050781, -0.46280670166015625, -0.4473838806152344, -0.4319610595703125, -0.4165382385253906, -0.40111541748046875, -0.3856925964355469, -0.370269775390625, -0.3548469543457031, -0.33942413330078125, -0.3240013122558594, -0.3085784912109375, -0.2931556701660156, -0.27773284912109375, -0.2623100280761719, -0.24688720703125, -0.23146438598632812, -0.21604156494140625, -0.20061874389648438, -0.1851959228515625, -0.16977310180664062, -0.15435028076171875, -0.13892745971679688, -0.123504638671875, -0.10808181762695312, -0.09265899658203125, -0.07723617553710938, -0.0618133544921875, -0.046390533447265625, -0.03096771240234375, -0.015544891357421875, -0.0001220703125, 0.015300750732421875, 0.03072357177734375, 0.046146392822265625, 0.0615692138671875, 0.07699203491210938, 0.09241485595703125, 0.10783767700195312, 0.123260498046875, 0.13868331909179688, 0.15410614013671875, 0.16952896118164062, 0.1849517822265625, 0.20037460327148438, 0.21579742431640625, 0.23122024536132812, 0.24664306640625, 0.2620658874511719, 0.27748870849609375, 0.2929115295410156, 0.3083343505859375, 0.3237571716308594, 0.33917999267578125, 0.3546028137207031, 0.370025634765625, 0.3854484558105469, 0.40087127685546875, 0.4162940979003906, 0.4317169189453125, 0.4471397399902344, 0.46256256103515625, 0.4779853820800781, 0.493408203125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 3.0, 7.0, 8.0, 10.0, 8.0, 10.0, 18.0, 12.0, 13.0, 19.0, 23.0, 24.0, 21.0, 33.0, 26.0, 35.0, 35.0, 29.0, 60.0, 39.0, 31.0, 45.0, 1066.0, 32.0, 35.0, 31.0, 41.0, 39.0, 31.0, 21.0, 40.0, 26.0, 25.0, 19.0, 16.0, 19.0, 10.0, 10.0, 11.0, 10.0, 6.0, 10.0, 6.0, 5.0, 4.0, 4.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.25390625, -4.126953125, -4.0, -3.873046875, -3.74609375, -3.619140625, -3.4921875, -3.365234375, -3.23828125, -3.111328125, -2.984375, -2.857421875, -2.73046875, -2.603515625, -2.4765625, -2.349609375, -2.22265625, -2.095703125, -1.96875, -1.841796875, -1.71484375, -1.587890625, -1.4609375, -1.333984375, -1.20703125, -1.080078125, -0.953125, -0.826171875, -0.69921875, -0.572265625, -0.4453125, -0.318359375, -0.19140625, -0.064453125, 0.0625, 0.189453125, 0.31640625, 0.443359375, 0.5703125, 0.697265625, 0.82421875, 0.951171875, 1.078125, 1.205078125, 1.33203125, 1.458984375, 1.5859375, 1.712890625, 1.83984375, 1.966796875, 2.09375, 2.220703125, 2.34765625, 2.474609375, 2.6015625, 2.728515625, 2.85546875, 2.982421875, 3.109375, 3.236328125, 3.36328125, 3.490234375, 3.6171875, 3.744140625, 3.87109375]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 5.0, 10.0, 15.0, 21.0, 26.0, 52.0, 74.0, 127.0, 178.0, 256.0, 414.0, 647.0, 1045.0, 1547.0, 2406.0, 3725.0, 5839.0, 8997.0, 14081.0, 21812.0, 33786.0, 51747.0, 76492.0, 108886.0, 151490.0, 1180465.0, 133673.0, 99613.0, 69343.0, 46528.0, 29829.0, 19200.0, 12395.0, 8039.0, 5169.0, 3215.0, 2104.0, 1351.0, 875.0, 527.0, 350.0, 246.0, 171.0, 134.0, 67.0, 53.0, 40.0, 21.0, 19.0, 12.0, 6.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.328857421875, -0.3183708190917969, -0.30788421630859375, -0.2973976135253906, -0.2869110107421875, -0.2764244079589844, -0.26593780517578125, -0.2554512023925781, -0.244964599609375, -0.23447799682617188, -0.22399139404296875, -0.21350479125976562, -0.2030181884765625, -0.19253158569335938, -0.18204498291015625, -0.17155838012695312, -0.16107177734375, -0.15058517456054688, -0.14009857177734375, -0.12961196899414062, -0.1191253662109375, -0.10863876342773438, -0.09815216064453125, -0.08766555786132812, -0.077178955078125, -0.06669235229492188, -0.05620574951171875, -0.045719146728515625, -0.0352325439453125, -0.024745941162109375, -0.01425933837890625, -0.003772735595703125, 0.0067138671875, 0.017200469970703125, 0.02768707275390625, 0.038173675537109375, 0.0486602783203125, 0.059146881103515625, 0.06963348388671875, 0.08012008666992188, 0.090606689453125, 0.10109329223632812, 0.11157989501953125, 0.12206649780273438, 0.1325531005859375, 0.14303970336914062, 0.15352630615234375, 0.16401290893554688, 0.17449951171875, 0.18498611450195312, 0.19547271728515625, 0.20595932006835938, 0.2164459228515625, 0.22693252563476562, 0.23741912841796875, 0.24790573120117188, 0.258392333984375, 0.2688789367675781, 0.27936553955078125, 0.2898521423339844, 0.3003387451171875, 0.3108253479003906, 0.32131195068359375, 0.3317985534667969, 0.34228515625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 4.0, 6.0, 5.0, 14.0, 15.0, 26.0, 43.0, 83.0, 120.0, 189.0, 193.0, 125.0, 61.0, 45.0, 26.0, 14.0, 9.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0159759521484375, -0.015441179275512695, -0.01490640640258789, -0.014371633529663086, -0.013836860656738281, -0.013302087783813477, -0.012767314910888672, -0.012232542037963867, -0.011697769165039062, -0.011162996292114258, -0.010628223419189453, -0.010093450546264648, -0.009558677673339844, -0.009023904800415039, -0.008489131927490234, -0.00795435905456543, -0.007419586181640625, -0.00688481330871582, -0.006350040435791016, -0.005815267562866211, -0.005280494689941406, -0.0047457218170166016, -0.004210948944091797, -0.003676176071166992, -0.0031414031982421875, -0.002606630325317383, -0.002071857452392578, -0.0015370845794677734, -0.0010023117065429688, -0.00046753883361816406, 6.723403930664062e-05, 0.0006020069122314453, 0.00113677978515625, 0.0016715526580810547, 0.0022063255310058594, 0.002741098403930664, 0.0032758712768554688, 0.0038106441497802734, 0.004345417022705078, 0.004880189895629883, 0.0054149627685546875, 0.005949735641479492, 0.006484508514404297, 0.0070192813873291016, 0.007554054260253906, 0.008088827133178711, 0.008623600006103516, 0.00915837287902832, 0.009693145751953125, 0.01022791862487793, 0.010762691497802734, 0.011297464370727539, 0.011832237243652344, 0.012367010116577148, 0.012901782989501953, 0.013436555862426758, 0.013971328735351562, 0.014506101608276367, 0.015040874481201172, 0.015575647354125977, 0.01611042022705078, 0.016645193099975586, 0.01717996597290039, 0.017714738845825195, 0.01824951171875]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 7.0, 4.0, 2.0, 9.0, 14.0, 15.0, 26.0, 50.0, 94.0, 163.0, 373.0, 2179.0, 1037227.0, 7522.0, 431.0, 215.0, 89.0, 46.0, 29.0, 16.0, 9.0, 4.0, 11.0, 4.0, 3.0, 4.0, 6.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3095703125, -0.3003578186035156, -0.29114532470703125, -0.2819328308105469, -0.2727203369140625, -0.2635078430175781, -0.25429534912109375, -0.24508285522460938, -0.235870361328125, -0.22665786743164062, -0.21744537353515625, -0.20823287963867188, -0.1990203857421875, -0.18980789184570312, -0.18059539794921875, -0.17138290405273438, -0.16217041015625, -0.15295791625976562, -0.14374542236328125, -0.13453292846679688, -0.1253204345703125, -0.11610794067382812, -0.10689544677734375, -0.09768295288085938, -0.088470458984375, -0.07925796508789062, -0.07004547119140625, -0.060832977294921875, -0.0516204833984375, -0.042407989501953125, -0.03319549560546875, -0.023983001708984375, -0.0147705078125, -0.005558013916015625, 0.00365447998046875, 0.012866973876953125, 0.0220794677734375, 0.031291961669921875, 0.04050445556640625, 0.049716949462890625, 0.058929443359375, 0.06814193725585938, 0.07735443115234375, 0.08656692504882812, 0.0957794189453125, 0.10499191284179688, 0.11420440673828125, 0.12341690063476562, 0.13262939453125, 0.14184188842773438, 0.15105438232421875, 0.16026687622070312, 0.1694793701171875, 0.17869186401367188, 0.18790435791015625, 0.19711685180664062, 0.206329345703125, 0.21554183959960938, 0.22475433349609375, 0.23396682739257812, 0.2431793212890625, 0.2523918151855469, 0.26160430908203125, 0.2708168029785156, 0.280029296875]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 5.0, 127.0, 591.0, 268.0, 22.0], "bins": [-0.06963048130273819, -0.06850139051675797, -0.06737229973077774, -0.06624321639537811, -0.06511412560939789, -0.06398503482341766, -0.06285594403743744, -0.061726853251457214, -0.06059776619076729, -0.059468675404787064, -0.05833958834409714, -0.05721049755811691, -0.05608140677213669, -0.05495231971144676, -0.05382322892546654, -0.05269414186477661, -0.05156505107879639, -0.05043596029281616, -0.049306873232126236, -0.04817778244614601, -0.047048695385456085, -0.04591960459947586, -0.044790513813495636, -0.04366142302751541, -0.042532335966825485, -0.04140324518084526, -0.040274158120155334, -0.03914506733417511, -0.038015976548194885, -0.03688688948750496, -0.035757798701524734, -0.03462871164083481, -0.033499620854854584, -0.03237053006887436, -0.031241443008184433, -0.03011235222220421, -0.028983263298869133, -0.027854174375534058, -0.026725083589553833, -0.025595994666218758, -0.024466905742883682, -0.023337816819548607, -0.02220872789621353, -0.021079637110233307, -0.01995054818689823, -0.018821459263563156, -0.01769236847758293, -0.016563279554247856, -0.015434189699590206, -0.014305099844932556, -0.01317601092159748, -0.012046921998262405, -0.010917832143604755, -0.009788742288947105, -0.00865965336561203, -0.00753056351095438, -0.006401474587619305, -0.005272385198622942, -0.004143295809626579, -0.0030142064206302166, -0.001885117031633854, -0.0007560276426374912, 0.00037306174635887146, 0.0015021516010165215, 0.002631240524351597]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 2.0, 3.0, 12.0, 10.0, 9.0, 8.0, 10.0, 17.0, 22.0, 27.0, 33.0, 31.0, 40.0, 29.0, 32.0, 46.0, 44.0, 42.0, 41.0, 45.0, 43.0, 42.0, 40.0, 43.0, 52.0, 34.0, 33.0, 29.0, 28.0, 15.0, 27.0, 23.0, 18.0, 22.0, 11.0, 9.0, 4.0, 8.0, 9.0, 5.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.0065416693687438965, -0.00637483224272728, -0.006207995116710663, -0.006041157990694046, -0.005874320864677429, -0.005707483738660812, -0.0055406466126441956, -0.005373809486627579, -0.005206972360610962, -0.005040135234594345, -0.004873298108577728, -0.0047064609825611115, -0.004539623856544495, -0.004372786730527878, -0.004205949604511261, -0.004039112478494644, -0.0038722753524780273, -0.0037054382264614105, -0.0035386011004447937, -0.003371763974428177, -0.00320492684841156, -0.0030380897223949432, -0.0028712525963783264, -0.0027044154703617096, -0.0025375783443450928, -0.002370741218328476, -0.002203904092311859, -0.0020370669662952423, -0.0018702298402786255, -0.0017033927142620087, -0.0015365555882453918, -0.001369718462228775, -0.0012028813362121582, -0.0010360442101955414, -0.0008692070841789246, -0.0007023699581623077, -0.0005355328321456909, -0.0003686957061290741, -0.00020185858011245728, -3.5021454095840454e-05, 0.00013181567192077637, 0.0002986527979373932, 0.00046548992395401, 0.0006323270499706268, 0.0007991641759872437, 0.0009660013020038605, 0.0011328384280204773, 0.0012996755540370941, 0.001466512680053711, 0.0016333498060703278, 0.0018001869320869446, 0.0019670240581035614, 0.0021338611841201782, 0.002300698310136795, 0.002467535436153412, 0.0026343725621700287, 0.0028012096881866455, 0.0029680468142032623, 0.003134883940219879, 0.003301721066236496, 0.003468558192253113, 0.0036353953182697296, 0.0038022324442863464, 0.003969069570302963, 0.00413590669631958]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 3.0, 11.0, 5.0, 12.0, 6.0, 11.0, 17.0, 13.0, 19.0, 10.0, 19.0, 21.0, 29.0, 38.0, 31.0, 49.0, 31.0, 31.0, 41.0, 53.0, 49.0, 37.0, 43.0, 45.0, 44.0, 44.0, 34.0, 44.0, 22.0, 41.0, 24.0, 12.0, 20.0, 20.0, 18.0, 13.0, 14.0, 5.0, 3.0, 6.0, 5.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-7.3515625, -7.12908935546875, -6.9066162109375, -6.68414306640625, -6.461669921875, -6.23919677734375, -6.0167236328125, -5.79425048828125, -5.57177734375, -5.34930419921875, -5.1268310546875, -4.90435791015625, -4.681884765625, -4.45941162109375, -4.2369384765625, -4.01446533203125, -3.7919921875, -3.56951904296875, -3.3470458984375, -3.12457275390625, -2.902099609375, -2.67962646484375, -2.4571533203125, -2.23468017578125, -2.01220703125, -1.78973388671875, -1.5672607421875, -1.34478759765625, -1.122314453125, -0.89984130859375, -0.6773681640625, -0.45489501953125, -0.232421875, -0.00994873046875, 0.2125244140625, 0.43499755859375, 0.657470703125, 0.87994384765625, 1.1024169921875, 1.32489013671875, 1.54736328125, 1.76983642578125, 1.9923095703125, 2.21478271484375, 2.437255859375, 2.65972900390625, 2.8822021484375, 3.10467529296875, 3.3271484375, 3.54962158203125, 3.7720947265625, 3.99456787109375, 4.217041015625, 4.43951416015625, 4.6619873046875, 4.88446044921875, 5.10693359375, 5.32940673828125, 5.5518798828125, 5.77435302734375, 5.996826171875, 6.21929931640625, 6.4417724609375, 6.66424560546875, 6.88671875]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 6.0, 6.0, 3.0, 11.0, 9.0, 17.0, 31.0, 38.0, 48.0, 80.0, 112.0, 169.0, 229.0, 313.0, 492.0, 883.0, 1367.0, 2311.0, 4296.0, 7964.0, 15826.0, 33149.0, 78689.0, 220297.0, 403425.0, 161946.0, 61294.0, 27051.0, 12922.0, 6675.0, 3513.0, 2069.0, 1176.0, 755.0, 443.0, 317.0, 181.0, 134.0, 93.0, 68.0, 49.0, 24.0, 25.0, 18.0, 14.0, 10.0, 8.0, 4.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.87109375, -3.7296142578125, -3.588134765625, -3.4466552734375, -3.30517578125, -3.1636962890625, -3.022216796875, -2.8807373046875, -2.7392578125, -2.5977783203125, -2.456298828125, -2.3148193359375, -2.17333984375, -2.0318603515625, -1.890380859375, -1.7489013671875, -1.607421875, -1.4659423828125, -1.324462890625, -1.1829833984375, -1.04150390625, -0.9000244140625, -0.758544921875, -0.6170654296875, -0.4755859375, -0.3341064453125, -0.192626953125, -0.0511474609375, 0.09033203125, 0.2318115234375, 0.373291015625, 0.5147705078125, 0.65625, 0.7977294921875, 0.939208984375, 1.0806884765625, 1.22216796875, 1.3636474609375, 1.505126953125, 1.6466064453125, 1.7880859375, 1.9295654296875, 2.071044921875, 2.2125244140625, 2.35400390625, 2.4954833984375, 2.636962890625, 2.7784423828125, 2.919921875, 3.0614013671875, 3.202880859375, 3.3443603515625, 3.48583984375, 3.6273193359375, 3.768798828125, 3.9102783203125, 4.0517578125, 4.1932373046875, 4.334716796875, 4.4761962890625, 4.61767578125, 4.7591552734375, 4.900634765625, 5.0421142578125, 5.18359375]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 2.0, 4.0, 4.0, 6.0, 11.0, 6.0, 7.0, 11.0, 16.0, 17.0, 19.0, 19.0, 23.0, 26.0, 28.0, 29.0, 42.0, 39.0, 45.0, 47.0, 63.0, 227.0, 1777.0, 118.0, 64.0, 42.0, 46.0, 40.0, 43.0, 34.0, 31.0, 28.0, 30.0, 22.0, 23.0, 13.0, 12.0, 11.0, 9.0, 6.0, 5.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0], "bins": [-28.078125, -27.28271484375, -26.4873046875, -25.69189453125, -24.896484375, -24.10107421875, -23.3056640625, -22.51025390625, -21.71484375, -20.91943359375, -20.1240234375, -19.32861328125, -18.533203125, -17.73779296875, -16.9423828125, -16.14697265625, -15.3515625, -14.55615234375, -13.7607421875, -12.96533203125, -12.169921875, -11.37451171875, -10.5791015625, -9.78369140625, -8.98828125, -8.19287109375, -7.3974609375, -6.60205078125, -5.806640625, -5.01123046875, -4.2158203125, -3.42041015625, -2.625, -1.82958984375, -1.0341796875, -0.23876953125, 0.556640625, 1.35205078125, 2.1474609375, 2.94287109375, 3.73828125, 4.53369140625, 5.3291015625, 6.12451171875, 6.919921875, 7.71533203125, 8.5107421875, 9.30615234375, 10.1015625, 10.89697265625, 11.6923828125, 12.48779296875, 13.283203125, 14.07861328125, 14.8740234375, 15.66943359375, 16.46484375, 17.26025390625, 18.0556640625, 18.85107421875, 19.646484375, 20.44189453125, 21.2373046875, 22.03271484375, 22.828125]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 5.0, 7.0, 9.0, 9.0, 14.0, 9.0, 24.0, 17.0, 30.0, 39.0, 40.0, 47.0, 69.0, 125.0, 213.0, 342.0, 909.0, 5298.0, 922357.0, 2207601.0, 6551.0, 996.0, 344.0, 186.0, 128.0, 78.0, 51.0, 43.0, 31.0, 24.0, 21.0, 16.0, 15.0, 11.0, 14.0, 10.0, 5.0, 6.0, 5.0, 3.0, 1.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.21875, -35.93701171875, -34.6552734375, -33.37353515625, -32.091796875, -30.81005859375, -29.5283203125, -28.24658203125, -26.96484375, -25.68310546875, -24.4013671875, -23.11962890625, -21.837890625, -20.55615234375, -19.2744140625, -17.99267578125, -16.7109375, -15.42919921875, -14.1474609375, -12.86572265625, -11.583984375, -10.30224609375, -9.0205078125, -7.73876953125, -6.45703125, -5.17529296875, -3.8935546875, -2.61181640625, -1.330078125, -0.04833984375, 1.2333984375, 2.51513671875, 3.796875, 5.07861328125, 6.3603515625, 7.64208984375, 8.923828125, 10.20556640625, 11.4873046875, 12.76904296875, 14.05078125, 15.33251953125, 16.6142578125, 17.89599609375, 19.177734375, 20.45947265625, 21.7412109375, 23.02294921875, 24.3046875, 25.58642578125, 26.8681640625, 28.14990234375, 29.431640625, 30.71337890625, 31.9951171875, 33.27685546875, 34.55859375, 35.84033203125, 37.1220703125, 38.40380859375, 39.685546875, 40.96728515625, 42.2490234375, 43.53076171875, 44.8125]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 104.0, 741.0, 163.0, 8.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.85691261291504, -25.356592178344727, -19.856271743774414, -14.355951309204102, -8.855630874633789, -3.3553104400634766, 2.145009994506836, 7.645330429077148, 13.145650863647461, 18.645971298217773, 24.146291732788086, 29.6466121673584, 35.146934509277344, 40.647254943847656, 46.14757537841797, 51.64789581298828, 57.148216247558594, 62.648536682128906, 68.14885711669922, 73.64917755126953, 79.14949798583984, 84.64981842041016, 90.15013885498047, 95.65045928955078, 101.1507797241211, 106.6511001586914, 112.15142059326172, 117.65174102783203, 123.15206146240234, 128.65237426757812, 134.1527099609375, 139.65301513671875, 145.15335083007812, 150.65367126464844, 156.15399169921875, 161.65431213378906, 167.15463256835938, 172.6549530029297, 178.1552734375, 183.6555938720703, 189.15591430664062, 194.65623474121094, 200.15655517578125, 205.65687561035156, 211.15719604492188, 216.6575164794922, 222.1578369140625, 227.6581573486328, 233.15847778320312, 238.65879821777344, 244.15911865234375, 249.65943908691406, 255.15975952148438, 260.66009521484375, 266.160400390625, 271.66070556640625, 277.1610412597656, 282.661376953125, 288.16168212890625, 293.6619873046875, 299.1623229980469, 304.66265869140625, 310.1629638671875, 315.66326904296875, 321.1636047363281]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 7.0, 7.0, 1.0, 8.0, 10.0, 5.0, 7.0, 12.0, 17.0, 19.0, 18.0, 23.0, 23.0, 34.0, 26.0, 26.0, 32.0, 37.0, 36.0, 31.0, 43.0, 30.0, 43.0, 40.0, 34.0, 37.0, 38.0, 23.0, 28.0, 30.0, 29.0, 40.0, 30.0, 24.0, 21.0, 19.0, 21.0, 16.0, 19.0, 12.0, 11.0, 8.0, 10.0, 5.0, 5.0, 2.0, 2.0, 5.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0], "bins": [-73.73575592041016, -71.53121948242188, -69.3266830444336, -67.12214660644531, -64.91761779785156, -62.713077545166016, -60.508544921875, -58.30400848388672, -56.09947204589844, -53.894935607910156, -51.690399169921875, -49.48586654663086, -47.28133010864258, -45.0767936706543, -42.87226104736328, -40.667724609375, -38.46318817138672, -36.25865173339844, -34.054115295410156, -31.84958267211914, -29.64504623413086, -27.440509796142578, -25.23597526550293, -23.03144073486328, -20.826904296875, -18.62236785888672, -16.41783332824707, -14.213297843933105, -12.00876235961914, -9.804226875305176, -7.599691390991211, -5.395155906677246, -3.1906280517578125, -0.9860925674438477, 1.2184429168701172, 3.422978401184082, 5.627513885498047, 7.832049369812012, 10.036584854125977, 12.241120338439941, 14.445655822753906, 16.650192260742188, 18.854726791381836, 21.059261322021484, 23.263797760009766, 25.468334197998047, 27.672868728637695, 29.877403259277344, 32.081939697265625, 34.286476135253906, 36.49101257324219, 38.6955451965332, 40.900081634521484, 43.104618072509766, 45.30915069580078, 47.51368713378906, 49.718223571777344, 51.922760009765625, 54.127296447753906, 56.33182907104492, 58.5363655090332, 60.740901947021484, 62.9454345703125, 65.14997100830078, 67.35450744628906]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 6.0, 6.0, 7.0, 7.0, 12.0, 13.0, 11.0, 14.0, 16.0, 15.0, 20.0, 31.0, 26.0, 34.0, 40.0, 32.0, 44.0, 39.0, 35.0, 50.0, 49.0, 35.0, 44.0, 41.0, 42.0, 45.0, 31.0, 39.0, 35.0, 27.0, 27.0, 25.0, 12.0, 14.0, 21.0, 18.0, 7.0, 10.0, 3.0, 7.0, 5.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-7.4609375, -7.23419189453125, -7.0074462890625, -6.78070068359375, -6.553955078125, -6.32720947265625, -6.1004638671875, -5.87371826171875, -5.64697265625, -5.42022705078125, -5.1934814453125, -4.96673583984375, -4.739990234375, -4.51324462890625, -4.2864990234375, -4.05975341796875, -3.8330078125, -3.60626220703125, -3.3795166015625, -3.15277099609375, -2.926025390625, -2.69927978515625, -2.4725341796875, -2.24578857421875, -2.01904296875, -1.79229736328125, -1.5655517578125, -1.33880615234375, -1.112060546875, -0.88531494140625, -0.6585693359375, -0.43182373046875, -0.205078125, 0.02166748046875, 0.2484130859375, 0.47515869140625, 0.701904296875, 0.92864990234375, 1.1553955078125, 1.38214111328125, 1.60888671875, 1.83563232421875, 2.0623779296875, 2.28912353515625, 2.515869140625, 2.74261474609375, 2.9693603515625, 3.19610595703125, 3.4228515625, 3.64959716796875, 3.8763427734375, 4.10308837890625, 4.329833984375, 4.55657958984375, 4.7833251953125, 5.01007080078125, 5.23681640625, 5.46356201171875, 5.6903076171875, 5.91705322265625, 6.143798828125, 6.37054443359375, 6.5972900390625, 6.82403564453125, 7.05078125]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 4.0, 6.0, 3.0, 11.0, 12.0, 21.0, 25.0, 25.0, 45.0, 82.0, 113.0, 213.0, 384.0, 955.0, 2602.0, 8093.0, 31482.0, 152262.0, 1111396.0, 2331537.0, 455523.0, 74169.0, 17488.0, 4794.0, 1622.0, 636.0, 323.0, 168.0, 95.0, 58.0, 31.0, 33.0, 18.0, 11.0, 12.0, 5.0, 3.0, 9.0, 6.0, 5.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-17.03125, -16.51953125, -16.0078125, -15.49609375, -14.984375, -14.47265625, -13.9609375, -13.44921875, -12.9375, -12.42578125, -11.9140625, -11.40234375, -10.890625, -10.37890625, -9.8671875, -9.35546875, -8.84375, -8.33203125, -7.8203125, -7.30859375, -6.796875, -6.28515625, -5.7734375, -5.26171875, -4.75, -4.23828125, -3.7265625, -3.21484375, -2.703125, -2.19140625, -1.6796875, -1.16796875, -0.65625, -0.14453125, 0.3671875, 0.87890625, 1.390625, 1.90234375, 2.4140625, 2.92578125, 3.4375, 3.94921875, 4.4609375, 4.97265625, 5.484375, 5.99609375, 6.5078125, 7.01953125, 7.53125, 8.04296875, 8.5546875, 9.06640625, 9.578125, 10.08984375, 10.6015625, 11.11328125, 11.625, 12.13671875, 12.6484375, 13.16015625, 13.671875, 14.18359375, 14.6953125, 15.20703125, 15.71875]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 6.0, 5.0, 4.0, 5.0, 14.0, 20.0, 28.0, 22.0, 36.0, 54.0, 66.0, 77.0, 100.0, 153.0, 152.0, 200.0, 285.0, 361.0, 343.0, 401.0, 333.0, 301.0, 244.0, 201.0, 165.0, 125.0, 88.0, 65.0, 49.0, 48.0, 30.0, 20.0, 11.0, 11.0, 15.0, 11.0, 9.0, 4.0, 4.0, 5.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.109375, -16.6241455078125, -16.138916015625, -15.6536865234375, -15.16845703125, -14.6832275390625, -14.197998046875, -13.7127685546875, -13.2275390625, -12.7423095703125, -12.257080078125, -11.7718505859375, -11.28662109375, -10.8013916015625, -10.316162109375, -9.8309326171875, -9.345703125, -8.8604736328125, -8.375244140625, -7.8900146484375, -7.40478515625, -6.9195556640625, -6.434326171875, -5.9490966796875, -5.4638671875, -4.9786376953125, -4.493408203125, -4.0081787109375, -3.52294921875, -3.0377197265625, -2.552490234375, -2.0672607421875, -1.58203125, -1.0968017578125, -0.611572265625, -0.1263427734375, 0.35888671875, 0.8441162109375, 1.329345703125, 1.8145751953125, 2.2998046875, 2.7850341796875, 3.270263671875, 3.7554931640625, 4.24072265625, 4.7259521484375, 5.211181640625, 5.6964111328125, 6.181640625, 6.6668701171875, 7.152099609375, 7.6373291015625, 8.12255859375, 8.6077880859375, 9.093017578125, 9.5782470703125, 10.0634765625, 10.5487060546875, 11.033935546875, 11.5191650390625, 12.00439453125, 12.4896240234375, 12.974853515625, 13.4600830078125, 13.9453125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [4.0, 3.0, 1.0, 1.0, 3.0, 1.0, 5.0, 12.0, 7.0, 20.0, 22.0, 36.0, 37.0, 83.0, 109.0, 262.0, 563.0, 1232.0, 3581.0, 14071.0, 74097.0, 576076.0, 2846242.0, 582781.0, 74792.0, 14167.0, 3711.0, 1227.0, 490.0, 258.0, 136.0, 76.0, 60.0, 36.0, 26.0, 18.0, 9.0, 9.0, 9.0, 7.0, 4.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-23.25, -22.216796875, -21.18359375, -20.150390625, -19.1171875, -18.083984375, -17.05078125, -16.017578125, -14.984375, -13.951171875, -12.91796875, -11.884765625, -10.8515625, -9.818359375, -8.78515625, -7.751953125, -6.71875, -5.685546875, -4.65234375, -3.619140625, -2.5859375, -1.552734375, -0.51953125, 0.513671875, 1.546875, 2.580078125, 3.61328125, 4.646484375, 5.6796875, 6.712890625, 7.74609375, 8.779296875, 9.8125, 10.845703125, 11.87890625, 12.912109375, 13.9453125, 14.978515625, 16.01171875, 17.044921875, 18.078125, 19.111328125, 20.14453125, 21.177734375, 22.2109375, 23.244140625, 24.27734375, 25.310546875, 26.34375, 27.376953125, 28.41015625, 29.443359375, 30.4765625, 31.509765625, 32.54296875, 33.576171875, 34.609375, 35.642578125, 36.67578125, 37.708984375, 38.7421875, 39.775390625, 40.80859375, 41.841796875, 42.875]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 7.0, 14.0, 28.0, 48.0, 82.0, 122.0, 130.0, 160.0, 119.0, 119.0, 89.0, 38.0, 23.0, 11.0, 11.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-87.89315795898438, -83.9896011352539, -80.08605194091797, -76.1824951171875, -72.27893829345703, -68.37538146972656, -64.47183227539062, -60.568275451660156, -56.66472244262695, -52.76116943359375, -48.85761260986328, -44.95405960083008, -41.050506591796875, -37.146949768066406, -33.2433967590332, -29.339841842651367, -25.43628692626953, -21.532732009887695, -17.62917709350586, -13.725624084472656, -9.82206916809082, -5.918514251708984, -2.0149612426757812, 1.8885936737060547, 5.792148590087891, 9.695703506469727, 13.599257469177246, 17.502811431884766, 21.4063663482666, 25.309921264648438, 29.21347427368164, 33.117027282714844, 37.02058410644531, 40.924137115478516, 44.827693939208984, 48.73124694824219, 52.634803771972656, 56.53835678100586, 60.44190979003906, 64.34546661376953, 68.2490234375, 72.15258026123047, 76.0561294555664, 79.95968627929688, 83.86324310302734, 87.76679992675781, 91.67034912109375, 95.57390594482422, 99.47745513916016, 103.38101196289062, 107.28456115722656, 111.18811798095703, 115.0916748046875, 118.99522399902344, 122.8987808227539, 126.80233764648438, 130.7058868408203, 134.60943603515625, 138.51300048828125, 142.4165496826172, 146.32009887695312, 150.22366333007812, 154.12721252441406, 158.03076171875, 161.934326171875]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 4.0, 4.0, 3.0, 5.0, 10.0, 13.0, 11.0, 15.0, 15.0, 16.0, 12.0, 15.0, 29.0, 21.0, 28.0, 26.0, 32.0, 38.0, 36.0, 33.0, 30.0, 31.0, 30.0, 47.0, 41.0, 48.0, 38.0, 28.0, 27.0, 29.0, 31.0, 30.0, 28.0, 18.0, 27.0, 28.0, 15.0, 13.0, 11.0, 18.0, 17.0, 7.0, 10.0, 5.0, 6.0, 6.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-54.07867431640625, -52.431583404541016, -50.78449249267578, -49.13740158081055, -47.49031066894531, -45.84321594238281, -44.19612503051758, -42.549034118652344, -40.90194320678711, -39.254852294921875, -37.60776138305664, -35.960670471191406, -34.313575744628906, -32.66648864746094, -31.019393920898438, -29.372303009033203, -27.72521209716797, -26.078121185302734, -24.4310302734375, -22.783937454223633, -21.1368465423584, -19.489755630493164, -17.842662811279297, -16.195571899414062, -14.548480987548828, -12.901390075683594, -11.254298210144043, -9.607206344604492, -7.960115432739258, -6.313024520874023, -4.665932655334473, -3.018840789794922, -1.3717498779296875, 0.2753415107727051, 1.9224328994750977, 3.5695242881774902, 5.216615676879883, 6.863706588745117, 8.510798454284668, 10.157890319824219, 11.804981231689453, 13.452072143554688, 15.099164009094238, 16.74625587463379, 18.393346786499023, 20.040437698364258, 21.687530517578125, 23.33462142944336, 24.981712341308594, 26.628803253173828, 28.275894165039062, 29.92298698425293, 31.570077896118164, 33.21717071533203, 34.864261627197266, 36.5113525390625, 38.158443450927734, 39.80553436279297, 41.4526252746582, 43.09971618652344, 44.74681091308594, 46.393898010253906, 48.040992736816406, 49.68808364868164, 51.335174560546875]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 6.0, 9.0, 4.0, 4.0, 4.0, 10.0, 8.0, 6.0, 16.0, 15.0, 14.0, 14.0, 23.0, 23.0, 34.0, 36.0, 34.0, 37.0, 26.0, 39.0, 45.0, 46.0, 37.0, 44.0, 43.0, 34.0, 51.0, 37.0, 39.0, 33.0, 26.0, 30.0, 29.0, 24.0, 21.0, 18.0, 13.0, 11.0, 6.0, 10.0, 9.0, 10.0, 5.0, 8.0, 3.0, 1.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0], "bins": [-7.35546875, -7.13275146484375, -6.9100341796875, -6.68731689453125, -6.464599609375, -6.24188232421875, -6.0191650390625, -5.79644775390625, -5.57373046875, -5.35101318359375, -5.1282958984375, -4.90557861328125, -4.682861328125, -4.46014404296875, -4.2374267578125, -4.01470947265625, -3.7919921875, -3.56927490234375, -3.3465576171875, -3.12384033203125, -2.901123046875, -2.67840576171875, -2.4556884765625, -2.23297119140625, -2.01025390625, -1.78753662109375, -1.5648193359375, -1.34210205078125, -1.119384765625, -0.89666748046875, -0.6739501953125, -0.45123291015625, -0.228515625, -0.00579833984375, 0.2169189453125, 0.43963623046875, 0.662353515625, 0.88507080078125, 1.1077880859375, 1.33050537109375, 1.55322265625, 1.77593994140625, 1.9986572265625, 2.22137451171875, 2.444091796875, 2.66680908203125, 2.8895263671875, 3.11224365234375, 3.3349609375, 3.55767822265625, 3.7803955078125, 4.00311279296875, 4.225830078125, 4.44854736328125, 4.6712646484375, 4.89398193359375, 5.11669921875, 5.33941650390625, 5.5621337890625, 5.78485107421875, 6.007568359375, 6.23028564453125, 6.4530029296875, 6.67572021484375, 6.8984375]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 11.0, 14.0, 14.0, 24.0, 47.0, 42.0, 75.0, 89.0, 152.0, 228.0, 328.0, 504.0, 734.0, 1073.0, 1530.0, 2178.0, 3389.0, 4977.0, 7556.0, 11238.0, 16924.0, 25896.0, 39683.0, 59701.0, 89308.0, 125731.0, 159288.0, 150508.0, 113988.0, 78668.0, 52652.0, 34570.0, 22522.0, 14874.0, 9817.0, 6563.0, 4363.0, 2942.0, 2013.0, 1363.0, 993.0, 612.0, 437.0, 292.0, 210.0, 137.0, 108.0, 56.0, 56.0, 22.0, 23.0, 15.0, 11.0, 5.0, 7.0, 5.0, 0.0, 2.0], "bins": [-0.51513671875, -0.49945068359375, -0.4837646484375, -0.46807861328125, -0.452392578125, -0.43670654296875, -0.4210205078125, -0.40533447265625, -0.3896484375, -0.37396240234375, -0.3582763671875, -0.34259033203125, -0.326904296875, -0.31121826171875, -0.2955322265625, -0.27984619140625, -0.26416015625, -0.24847412109375, -0.2327880859375, -0.21710205078125, -0.201416015625, -0.18572998046875, -0.1700439453125, -0.15435791015625, -0.138671875, -0.12298583984375, -0.1072998046875, -0.09161376953125, -0.075927734375, -0.06024169921875, -0.0445556640625, -0.02886962890625, -0.01318359375, 0.00250244140625, 0.0181884765625, 0.03387451171875, 0.049560546875, 0.06524658203125, 0.0809326171875, 0.09661865234375, 0.1123046875, 0.12799072265625, 0.1436767578125, 0.15936279296875, 0.175048828125, 0.19073486328125, 0.2064208984375, 0.22210693359375, 0.23779296875, 0.25347900390625, 0.2691650390625, 0.28485107421875, 0.300537109375, 0.31622314453125, 0.3319091796875, 0.34759521484375, 0.36328125, 0.37896728515625, 0.3946533203125, 0.41033935546875, 0.426025390625, 0.44171142578125, 0.4573974609375, 0.47308349609375, 0.48876953125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 6.0, 13.0, 4.0, 10.0, 15.0, 15.0, 22.0, 14.0, 13.0, 22.0, 19.0, 29.0, 28.0, 26.0, 31.0, 38.0, 30.0, 38.0, 35.0, 24.0, 34.0, 1077.0, 43.0, 45.0, 39.0, 42.0, 28.0, 33.0, 29.0, 27.0, 15.0, 20.0, 14.0, 18.0, 23.0, 15.0, 22.0, 11.0, 10.0, 14.0, 10.0, 9.0, 2.0, 3.0, 1.0, 1.0, 2.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.9921875, -3.85650634765625, -3.7208251953125, -3.58514404296875, -3.449462890625, -3.31378173828125, -3.1781005859375, -3.04241943359375, -2.90673828125, -2.77105712890625, -2.6353759765625, -2.49969482421875, -2.364013671875, -2.22833251953125, -2.0926513671875, -1.95697021484375, -1.8212890625, -1.68560791015625, -1.5499267578125, -1.41424560546875, -1.278564453125, -1.14288330078125, -1.0072021484375, -0.87152099609375, -0.73583984375, -0.60015869140625, -0.4644775390625, -0.32879638671875, -0.193115234375, -0.05743408203125, 0.0782470703125, 0.21392822265625, 0.349609375, 0.48529052734375, 0.6209716796875, 0.75665283203125, 0.892333984375, 1.02801513671875, 1.1636962890625, 1.29937744140625, 1.43505859375, 1.57073974609375, 1.7064208984375, 1.84210205078125, 1.977783203125, 2.11346435546875, 2.2491455078125, 2.38482666015625, 2.5205078125, 2.65618896484375, 2.7918701171875, 2.92755126953125, 3.063232421875, 3.19891357421875, 3.3345947265625, 3.47027587890625, 3.60595703125, 3.74163818359375, 3.8773193359375, 4.01300048828125, 4.148681640625, 4.28436279296875, 4.4200439453125, 4.55572509765625, 4.69140625]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 4.0, 8.0, 10.0, 29.0, 37.0, 48.0, 102.0, 149.0, 189.0, 299.0, 471.0, 730.0, 1200.0, 1955.0, 3039.0, 4810.0, 7704.0, 12317.0, 19660.0, 31202.0, 49694.0, 75387.0, 108689.0, 151303.0, 1192186.0, 139257.0, 102477.0, 70776.0, 45867.0, 28897.0, 18265.0, 11308.0, 7164.0, 4357.0, 2693.0, 1776.0, 1088.0, 739.0, 445.0, 286.0, 186.0, 102.0, 63.0, 50.0, 36.0, 19.0, 21.0, 18.0, 11.0, 7.0, 4.0, 3.0, 2.0, 0.0, 1.0], "bins": [-0.38671875, -0.37514495849609375, -0.3635711669921875, -0.35199737548828125, -0.340423583984375, -0.32884979248046875, -0.3172760009765625, -0.30570220947265625, -0.29412841796875, -0.28255462646484375, -0.2709808349609375, -0.25940704345703125, -0.247833251953125, -0.23625946044921875, -0.2246856689453125, -0.21311187744140625, -0.2015380859375, -0.18996429443359375, -0.1783905029296875, -0.16681671142578125, -0.155242919921875, -0.14366912841796875, -0.1320953369140625, -0.12052154541015625, -0.10894775390625, -0.09737396240234375, -0.0858001708984375, -0.07422637939453125, -0.062652587890625, -0.05107879638671875, -0.0395050048828125, -0.02793121337890625, -0.016357421875, -0.00478363037109375, 0.0067901611328125, 0.01836395263671875, 0.029937744140625, 0.04151153564453125, 0.0530853271484375, 0.06465911865234375, 0.07623291015625, 0.08780670166015625, 0.0993804931640625, 0.11095428466796875, 0.122528076171875, 0.13410186767578125, 0.1456756591796875, 0.15724945068359375, 0.1688232421875, 0.18039703369140625, 0.1919708251953125, 0.20354461669921875, 0.215118408203125, 0.22669219970703125, 0.2382659912109375, 0.24983978271484375, 0.26141357421875, 0.27298736572265625, 0.2845611572265625, 0.29613494873046875, 0.307708740234375, 0.31928253173828125, 0.3308563232421875, 0.34243011474609375, 0.35400390625]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 1.0, 6.0, 6.0, 6.0, 7.0, 6.0, 2.0, 15.0, 15.0, 14.0, 14.0, 12.0, 31.0, 23.0, 30.0, 51.0, 62.0, 66.0, 71.0, 60.0, 68.0, 74.0, 56.0, 44.0, 43.0, 39.0, 34.0, 20.0, 23.0, 17.0, 18.0, 5.0, 12.0, 13.0, 10.0, 2.0, 6.0, 5.0, 3.0, 5.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00901031494140625, -0.00873422622680664, -0.008458137512207031, -0.008182048797607422, -0.007905960083007812, -0.007629871368408203, -0.007353782653808594, -0.007077693939208984, -0.006801605224609375, -0.006525516510009766, -0.006249427795410156, -0.005973339080810547, -0.0056972503662109375, -0.005421161651611328, -0.005145072937011719, -0.004868984222412109, -0.0045928955078125, -0.004316806793212891, -0.004040718078613281, -0.003764629364013672, -0.0034885406494140625, -0.003212451934814453, -0.0029363632202148438, -0.0026602745056152344, -0.002384185791015625, -0.0021080970764160156, -0.0018320083618164062, -0.0015559196472167969, -0.0012798309326171875, -0.0010037422180175781, -0.0007276535034179688, -0.0004515647888183594, -0.00017547607421875, 0.00010061264038085938, 0.00037670135498046875, 0.0006527900695800781, 0.0009288787841796875, 0.0012049674987792969, 0.0014810562133789062, 0.0017571449279785156, 0.002033233642578125, 0.0023093223571777344, 0.0025854110717773438, 0.002861499786376953, 0.0031375885009765625, 0.003413677215576172, 0.0036897659301757812, 0.003965854644775391, 0.004241943359375, 0.004518032073974609, 0.004794120788574219, 0.005070209503173828, 0.0053462982177734375, 0.005622386932373047, 0.005898475646972656, 0.006174564361572266, 0.006450653076171875, 0.006726741790771484, 0.007002830505371094, 0.007278919219970703, 0.0075550079345703125, 0.007831096649169922, 0.008107185363769531, 0.00838327407836914, 0.00865936279296875]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 2.0, 5.0, 2.0, 3.0, 3.0, 7.0, 10.0, 22.0, 13.0, 19.0, 30.0, 38.0, 45.0, 53.0, 79.0, 93.0, 141.0, 193.0, 338.0, 744.0, 9317.0, 907760.0, 125970.0, 2273.0, 482.0, 232.0, 187.0, 122.0, 81.0, 56.0, 49.0, 37.0, 35.0, 25.0, 16.0, 10.0, 11.0, 12.0, 9.0, 7.0, 9.0, 7.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 1.0], "bins": [-0.1536865234375, -0.14899826049804688, -0.14430999755859375, -0.13962173461914062, -0.1349334716796875, -0.13024520874023438, -0.12555694580078125, -0.12086868286132812, -0.116180419921875, -0.11149215698242188, -0.10680389404296875, -0.10211563110351562, -0.0974273681640625, -0.09273910522460938, -0.08805084228515625, -0.08336257934570312, -0.07867431640625, -0.07398605346679688, -0.06929779052734375, -0.06460952758789062, -0.0599212646484375, -0.055233001708984375, -0.05054473876953125, -0.045856475830078125, -0.041168212890625, -0.036479949951171875, -0.03179168701171875, -0.027103424072265625, -0.0224151611328125, -0.017726898193359375, -0.01303863525390625, -0.008350372314453125, -0.003662109375, 0.001026153564453125, 0.00571441650390625, 0.010402679443359375, 0.0150909423828125, 0.019779205322265625, 0.02446746826171875, 0.029155731201171875, 0.033843994140625, 0.038532257080078125, 0.04322052001953125, 0.047908782958984375, 0.0525970458984375, 0.057285308837890625, 0.06197357177734375, 0.06666183471679688, 0.07135009765625, 0.07603836059570312, 0.08072662353515625, 0.08541488647460938, 0.0901031494140625, 0.09479141235351562, 0.09947967529296875, 0.10416793823242188, 0.108856201171875, 0.11354446411132812, 0.11823272705078125, 0.12292098999023438, 0.1276092529296875, 0.13229751586914062, 0.13698577880859375, 0.14167404174804688, 0.1463623046875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 108.0, 633.0, 264.0, 9.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05860211327672005, -0.05719451233744621, -0.05578691512346268, -0.05437931418418884, -0.05297171697020531, -0.05156411603093147, -0.05015651881694794, -0.0487489178776741, -0.04734132066369057, -0.04593371972441673, -0.0445261225104332, -0.04311852157115936, -0.04171092435717583, -0.04030332341790199, -0.03889572620391846, -0.03748812526464462, -0.03608052432537079, -0.034672923386096954, -0.03326532617211342, -0.031857725232839584, -0.03045012801885605, -0.029042528942227364, -0.02763492986559868, -0.026227328926324844, -0.02481973171234131, -0.023412132635712624, -0.02200453355908394, -0.020596934482455254, -0.01918933540582657, -0.017781736329197884, -0.0163741372525692, -0.014966537244617939, -0.013558939099311829, -0.012151340022683144, -0.010743740946054459, -0.009336141869425774, -0.007928542792797089, -0.006520943250507116, -0.005113343708217144, -0.003705744631588459, -0.002298145554959774, -0.0008905463619157672, 0.0005170528311282396, 0.0019246521405875683, 0.0033322512172162533, 0.004739850293844938, 0.006147449836134911, 0.007555048912763596, 0.00896264798939228, 0.010370247066020966, 0.01177784614264965, 0.013185445219278336, 0.01459304429590702, 0.016000643372535706, 0.01740824431180954, 0.018815841525793076, 0.02022344246506691, 0.021631041541695595, 0.02303864061832428, 0.024446239694952965, 0.02585383877158165, 0.027261437848210335, 0.02866903692483902, 0.030076637864112854, 0.03148423507809639]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 5.0, 2.0, 6.0, 7.0, 7.0, 7.0, 6.0, 15.0, 14.0, 14.0, 17.0, 14.0, 19.0, 21.0, 32.0, 26.0, 28.0, 32.0, 31.0, 28.0, 36.0, 44.0, 38.0, 42.0, 49.0, 33.0, 36.0, 52.0, 39.0, 34.0, 33.0, 30.0, 17.0, 26.0, 20.0, 28.0, 13.0, 22.0, 17.0, 16.0, 14.0, 10.0, 7.0, 4.0, 6.0, 2.0, 5.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.004636287689208984, -0.004491076804697514, -0.004345865920186043, -0.004200655035674572, -0.004055444151163101, -0.00391023326665163, -0.0037650223821401596, -0.003619811497628689, -0.003474600613117218, -0.0033293897286057472, -0.0031841788440942764, -0.0030389679595828056, -0.002893757075071335, -0.002748546190559864, -0.0026033353060483932, -0.0024581244215369225, -0.0023129135370254517, -0.002167702652513981, -0.00202249176800251, -0.0018772808834910393, -0.0017320699989795685, -0.0015868591144680977, -0.001441648229956627, -0.001296437345445156, -0.0011512264609336853, -0.0010060155764222145, -0.0008608046919107437, -0.0007155938073992729, -0.0005703829228878021, -0.00042517203837633133, -0.00027996115386486053, -0.00013475026935338974, 1.0460615158081055e-05, 0.00015567149966955185, 0.00030088238418102264, 0.00044609326869249344, 0.0005913041532039642, 0.000736515037715435, 0.0008817259222269058, 0.0010269368067383766, 0.0011721476912498474, 0.0013173585757613182, 0.001462569460272789, 0.0016077803447842598, 0.0017529912292957306, 0.0018982021138072014, 0.002043412998318672, 0.002188623882830143, 0.0023338347673416138, 0.0024790456518530846, 0.0026242565363645554, 0.002769467420876026, 0.002914678305387497, 0.0030598891898989677, 0.0032051000744104385, 0.0033503109589219093, 0.00349552184343338, 0.003640732727944851, 0.0037859436124563217, 0.0039311544969677925, 0.004076365381479263, 0.004221576265990734, 0.004366787150502205, 0.004511998035013676, 0.0046572089195251465]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 6.0, 9.0, 4.0, 4.0, 4.0, 10.0, 8.0, 6.0, 16.0, 15.0, 14.0, 14.0, 23.0, 23.0, 34.0, 36.0, 34.0, 37.0, 25.0, 40.0, 45.0, 46.0, 37.0, 44.0, 43.0, 34.0, 51.0, 37.0, 39.0, 33.0, 26.0, 30.0, 29.0, 24.0, 21.0, 18.0, 13.0, 11.0, 6.0, 10.0, 9.0, 10.0, 5.0, 8.0, 3.0, 1.0, 5.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0], "bins": [-7.35546875, -7.13275146484375, -6.9100341796875, -6.68731689453125, -6.464599609375, -6.24188232421875, -6.0191650390625, -5.79644775390625, -5.57373046875, -5.35101318359375, -5.1282958984375, -4.90557861328125, -4.682861328125, -4.46014404296875, -4.2374267578125, -4.01470947265625, -3.7919921875, -3.56927490234375, -3.3465576171875, -3.12384033203125, -2.901123046875, -2.67840576171875, -2.4556884765625, -2.23297119140625, -2.01025390625, -1.78753662109375, -1.5648193359375, -1.34210205078125, -1.119384765625, -0.89666748046875, -0.6739501953125, -0.45123291015625, -0.228515625, -0.00579833984375, 0.2169189453125, 0.43963623046875, 0.662353515625, 0.88507080078125, 1.1077880859375, 1.33050537109375, 1.55322265625, 1.77593994140625, 1.9986572265625, 2.22137451171875, 2.444091796875, 2.66680908203125, 2.8895263671875, 3.11224365234375, 3.3349609375, 3.55767822265625, 3.7803955078125, 4.00311279296875, 4.225830078125, 4.44854736328125, 4.6712646484375, 4.89398193359375, 5.11669921875, 5.33941650390625, 5.5621337890625, 5.78485107421875, 6.007568359375, 6.23028564453125, 6.4530029296875, 6.67572021484375, 6.8984375]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 5.0, 9.0, 7.0, 12.0, 5.0, 13.0, 15.0, 18.0, 35.0, 45.0, 83.0, 121.0, 203.0, 353.0, 638.0, 1017.0, 1738.0, 3031.0, 5467.0, 10320.0, 20907.0, 48918.0, 134121.0, 357145.0, 286598.0, 101926.0, 38505.0, 17507.0, 8833.0, 4644.0, 2648.0, 1448.0, 873.0, 525.0, 307.0, 175.0, 116.0, 70.0, 50.0, 22.0, 30.0, 14.0, 7.0, 7.0, 6.0, 10.0, 2.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-4.296875, -4.16204833984375, -4.0272216796875, -3.89239501953125, -3.757568359375, -3.62274169921875, -3.4879150390625, -3.35308837890625, -3.21826171875, -3.08343505859375, -2.9486083984375, -2.81378173828125, -2.678955078125, -2.54412841796875, -2.4093017578125, -2.27447509765625, -2.1396484375, -2.00482177734375, -1.8699951171875, -1.73516845703125, -1.600341796875, -1.46551513671875, -1.3306884765625, -1.19586181640625, -1.06103515625, -0.92620849609375, -0.7913818359375, -0.65655517578125, -0.521728515625, -0.38690185546875, -0.2520751953125, -0.11724853515625, 0.017578125, 0.15240478515625, 0.2872314453125, 0.42205810546875, 0.556884765625, 0.69171142578125, 0.8265380859375, 0.96136474609375, 1.09619140625, 1.23101806640625, 1.3658447265625, 1.50067138671875, 1.635498046875, 1.77032470703125, 1.9051513671875, 2.03997802734375, 2.1748046875, 2.30963134765625, 2.4444580078125, 2.57928466796875, 2.714111328125, 2.84893798828125, 2.9837646484375, 3.11859130859375, 3.25341796875, 3.38824462890625, 3.5230712890625, 3.65789794921875, 3.792724609375, 3.92755126953125, 4.0623779296875, 4.19720458984375, 4.33203125]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 9.0, 6.0, 5.0, 10.0, 10.0, 20.0, 19.0, 25.0, 29.0, 31.0, 33.0, 37.0, 31.0, 44.0, 55.0, 65.0, 141.0, 1741.0, 251.0, 80.0, 61.0, 51.0, 44.0, 34.0, 28.0, 36.0, 24.0, 25.0, 20.0, 17.0, 13.0, 14.0, 12.0, 7.0, 10.0, 8.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-27.5, -26.573486328125, -25.64697265625, -24.720458984375, -23.7939453125, -22.867431640625, -21.94091796875, -21.014404296875, -20.087890625, -19.161376953125, -18.23486328125, -17.308349609375, -16.3818359375, -15.455322265625, -14.52880859375, -13.602294921875, -12.67578125, -11.749267578125, -10.82275390625, -9.896240234375, -8.9697265625, -8.043212890625, -7.11669921875, -6.190185546875, -5.263671875, -4.337158203125, -3.41064453125, -2.484130859375, -1.5576171875, -0.631103515625, 0.29541015625, 1.221923828125, 2.1484375, 3.074951171875, 4.00146484375, 4.927978515625, 5.8544921875, 6.781005859375, 7.70751953125, 8.634033203125, 9.560546875, 10.487060546875, 11.41357421875, 12.340087890625, 13.2666015625, 14.193115234375, 15.11962890625, 16.046142578125, 16.97265625, 17.899169921875, 18.82568359375, 19.752197265625, 20.6787109375, 21.605224609375, 22.53173828125, 23.458251953125, 24.384765625, 25.311279296875, 26.23779296875, 27.164306640625, 28.0908203125, 29.017333984375, 29.94384765625, 30.870361328125, 31.796875]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 4.0, 6.0, 10.0, 6.0, 19.0, 13.0, 8.0, 21.0, 27.0, 33.0, 30.0, 46.0, 55.0, 112.0, 151.0, 273.0, 494.0, 1806.0, 96282.0, 3036736.0, 7757.0, 830.0, 359.0, 188.0, 105.0, 77.0, 66.0, 45.0, 40.0, 28.0, 16.0, 17.0, 11.0, 8.0, 9.0, 6.0, 8.0, 5.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-51.90625, -50.39111328125, -48.8759765625, -47.36083984375, -45.845703125, -44.33056640625, -42.8154296875, -41.30029296875, -39.78515625, -38.27001953125, -36.7548828125, -35.23974609375, -33.724609375, -32.20947265625, -30.6943359375, -29.17919921875, -27.6640625, -26.14892578125, -24.6337890625, -23.11865234375, -21.603515625, -20.08837890625, -18.5732421875, -17.05810546875, -15.54296875, -14.02783203125, -12.5126953125, -10.99755859375, -9.482421875, -7.96728515625, -6.4521484375, -4.93701171875, -3.421875, -1.90673828125, -0.3916015625, 1.12353515625, 2.638671875, 4.15380859375, 5.6689453125, 7.18408203125, 8.69921875, 10.21435546875, 11.7294921875, 13.24462890625, 14.759765625, 16.27490234375, 17.7900390625, 19.30517578125, 20.8203125, 22.33544921875, 23.8505859375, 25.36572265625, 26.880859375, 28.39599609375, 29.9111328125, 31.42626953125, 32.94140625, 34.45654296875, 35.9716796875, 37.48681640625, 39.001953125, 40.51708984375, 42.0322265625, 43.54736328125, 45.0625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 68.0, 253.0, 427.0, 203.0, 48.0, 8.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-45.062313079833984, -42.47664260864258, -39.89097595214844, -37.30530548095703, -34.71963882446289, -32.133968353271484, -29.54829978942871, -26.962631225585938, -24.376962661743164, -21.79129409790039, -19.205625534057617, -16.619956970214844, -14.034287452697754, -11.44861888885498, -8.86294937133789, -6.277280807495117, -3.6916122436523438, -1.1059434413909912, 1.4797253608703613, 4.065394401550293, 6.651062965393066, 9.23673152923584, 11.82240104675293, 14.408069610595703, 16.993738174438477, 19.57940673828125, 22.165075302124023, 24.750743865966797, 27.336414337158203, 29.922080993652344, 32.50775146484375, 35.093421936035156, 37.67909240722656, 40.26476287841797, 42.85042953491211, 45.436100006103516, 48.021766662597656, 50.60743713378906, 53.19310760498047, 55.77877426147461, 58.36444091796875, 60.950111389160156, 63.5357780456543, 66.12144470214844, 68.70711517333984, 71.29278564453125, 73.87845611572266, 76.46412658691406, 79.04978942871094, 81.63545989990234, 84.22113037109375, 86.80679321289062, 89.39246368408203, 91.97813415527344, 94.56380462646484, 97.14947509765625, 99.73514556884766, 102.32081604003906, 104.90648651123047, 107.49214935302734, 110.07781982421875, 112.66349029541016, 115.24916076660156, 117.83482360839844, 120.42049407958984]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 4.0, 5.0, 8.0, 12.0, 11.0, 20.0, 20.0, 22.0, 16.0, 29.0, 27.0, 38.0, 31.0, 29.0, 64.0, 38.0, 38.0, 55.0, 51.0, 54.0, 49.0, 41.0, 36.0, 36.0, 45.0, 35.0, 30.0, 20.0, 25.0, 22.0, 13.0, 12.0, 12.0, 14.0, 12.0, 8.0, 5.0, 5.0, 4.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-82.09048461914062, -79.2146987915039, -76.33891296386719, -73.46312713623047, -70.58734130859375, -67.71156311035156, -64.83577728271484, -61.959991455078125, -59.084205627441406, -56.20841979980469, -53.33263397216797, -50.456851959228516, -47.5810661315918, -44.70528030395508, -41.829498291015625, -38.953712463378906, -36.07792663574219, -33.20214080810547, -30.326356887817383, -27.450572967529297, -24.574787139892578, -21.69900131225586, -18.823217391967773, -15.947433471679688, -13.071647644042969, -10.195862770080566, -7.320077896118164, -4.444293022155762, -1.5685081481933594, 1.307276725769043, 4.183061599731445, 7.058845520019531, 9.934623718261719, 12.810408592224121, 15.686193466186523, 18.56197738647461, 21.437763214111328, 24.313549041748047, 27.189332962036133, 30.06511688232422, 32.94090270996094, 35.816688537597656, 38.692474365234375, 41.56825637817383, 44.44404220581055, 47.319828033447266, 50.19561004638672, 53.07139587402344, 55.947181701660156, 58.822967529296875, 61.698753356933594, 64.57453918457031, 67.4503173828125, 70.32610321044922, 73.20188903808594, 76.07767486572266, 78.95346069335938, 81.8292465209961, 84.70503234863281, 87.58081817626953, 90.45660400390625, 93.33238220214844, 96.20816802978516, 99.08395385742188, 101.9597396850586]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 6.0, 6.0, 4.0, 6.0, 6.0, 4.0, 5.0, 5.0, 7.0, 10.0, 13.0, 13.0, 11.0, 12.0, 25.0, 24.0, 25.0, 33.0, 30.0, 35.0, 34.0, 40.0, 34.0, 42.0, 41.0, 39.0, 41.0, 35.0, 38.0, 36.0, 47.0, 42.0, 27.0, 29.0, 32.0, 25.0, 18.0, 18.0, 23.0, 12.0, 16.0, 7.0, 10.0, 7.0, 8.0, 8.0, 8.0, 3.0, 4.0, 1.0, 3.0, 4.0, 0.0, 2.0, 0.0, 3.0, 2.0], "bins": [-7.2109375, -6.99176025390625, -6.7725830078125, -6.55340576171875, -6.334228515625, -6.11505126953125, -5.8958740234375, -5.67669677734375, -5.45751953125, -5.23834228515625, -5.0191650390625, -4.79998779296875, -4.580810546875, -4.36163330078125, -4.1424560546875, -3.92327880859375, -3.7041015625, -3.48492431640625, -3.2657470703125, -3.04656982421875, -2.827392578125, -2.60821533203125, -2.3890380859375, -2.16986083984375, -1.95068359375, -1.73150634765625, -1.5123291015625, -1.29315185546875, -1.073974609375, -0.85479736328125, -0.6356201171875, -0.41644287109375, -0.197265625, 0.02191162109375, 0.2410888671875, 0.46026611328125, 0.679443359375, 0.89862060546875, 1.1177978515625, 1.33697509765625, 1.55615234375, 1.77532958984375, 1.9945068359375, 2.21368408203125, 2.432861328125, 2.65203857421875, 2.8712158203125, 3.09039306640625, 3.3095703125, 3.52874755859375, 3.7479248046875, 3.96710205078125, 4.186279296875, 4.40545654296875, 4.6246337890625, 4.84381103515625, 5.06298828125, 5.28216552734375, 5.5013427734375, 5.72052001953125, 5.939697265625, 6.15887451171875, 6.3780517578125, 6.59722900390625, 6.81640625]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 4.0, 3.0, 3.0, 1.0, 4.0, 7.0, 4.0, 8.0, 13.0, 6.0, 9.0, 8.0, 12.0, 16.0, 27.0, 32.0, 53.0, 78.0, 130.0, 243.0, 583.0, 1586.0, 4998.0, 21585.0, 129150.0, 1343797.0, 2359294.0, 282260.0, 38364.0, 8085.0, 2245.0, 827.0, 357.0, 169.0, 95.0, 48.0, 34.0, 36.0, 22.0, 14.0, 21.0, 9.0, 15.0, 9.0, 5.0, 5.0, 6.0, 3.0, 6.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-20.15625, -19.52978515625, -18.9033203125, -18.27685546875, -17.650390625, -17.02392578125, -16.3974609375, -15.77099609375, -15.14453125, -14.51806640625, -13.8916015625, -13.26513671875, -12.638671875, -12.01220703125, -11.3857421875, -10.75927734375, -10.1328125, -9.50634765625, -8.8798828125, -8.25341796875, -7.626953125, -7.00048828125, -6.3740234375, -5.74755859375, -5.12109375, -4.49462890625, -3.8681640625, -3.24169921875, -2.615234375, -1.98876953125, -1.3623046875, -0.73583984375, -0.109375, 0.51708984375, 1.1435546875, 1.77001953125, 2.396484375, 3.02294921875, 3.6494140625, 4.27587890625, 4.90234375, 5.52880859375, 6.1552734375, 6.78173828125, 7.408203125, 8.03466796875, 8.6611328125, 9.28759765625, 9.9140625, 10.54052734375, 11.1669921875, 11.79345703125, 12.419921875, 13.04638671875, 13.6728515625, 14.29931640625, 14.92578125, 15.55224609375, 16.1787109375, 16.80517578125, 17.431640625, 18.05810546875, 18.6845703125, 19.31103515625, 19.9375]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 8.0, 3.0, 2.0, 7.0, 5.0, 3.0, 10.0, 14.0, 21.0, 22.0, 41.0, 52.0, 88.0, 89.0, 146.0, 184.0, 289.0, 376.0, 433.0, 495.0, 453.0, 353.0, 279.0, 219.0, 143.0, 98.0, 79.0, 49.0, 44.0, 20.0, 16.0, 16.0, 14.0, 8.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.7734375, -14.1688232421875, -13.564208984375, -12.9595947265625, -12.35498046875, -11.7503662109375, -11.145751953125, -10.5411376953125, -9.9365234375, -9.3319091796875, -8.727294921875, -8.1226806640625, -7.51806640625, -6.9134521484375, -6.308837890625, -5.7042236328125, -5.099609375, -4.4949951171875, -3.890380859375, -3.2857666015625, -2.68115234375, -2.0765380859375, -1.471923828125, -0.8673095703125, -0.2626953125, 0.3419189453125, 0.946533203125, 1.5511474609375, 2.15576171875, 2.7603759765625, 3.364990234375, 3.9696044921875, 4.57421875, 5.1788330078125, 5.783447265625, 6.3880615234375, 6.99267578125, 7.5972900390625, 8.201904296875, 8.8065185546875, 9.4111328125, 10.0157470703125, 10.620361328125, 11.2249755859375, 11.82958984375, 12.4342041015625, 13.038818359375, 13.6434326171875, 14.248046875, 14.8526611328125, 15.457275390625, 16.0618896484375, 16.66650390625, 17.2711181640625, 17.875732421875, 18.4803466796875, 19.0849609375, 19.6895751953125, 20.294189453125, 20.8988037109375, 21.50341796875, 22.1080322265625, 22.712646484375, 23.3172607421875, 23.921875]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 5.0, 4.0, 10.0, 7.0, 19.0, 30.0, 38.0, 44.0, 100.0, 116.0, 179.0, 351.0, 777.0, 2198.0, 8830.0, 51461.0, 435118.0, 2871416.0, 728128.0, 78305.0, 12385.0, 2817.0, 906.0, 388.0, 229.0, 132.0, 87.0, 70.0, 32.0, 28.0, 23.0, 15.0, 17.0, 9.0, 7.0, 8.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.59375, -31.52880859375, -30.4638671875, -29.39892578125, -28.333984375, -27.26904296875, -26.2041015625, -25.13916015625, -24.07421875, -23.00927734375, -21.9443359375, -20.87939453125, -19.814453125, -18.74951171875, -17.6845703125, -16.61962890625, -15.5546875, -14.48974609375, -13.4248046875, -12.35986328125, -11.294921875, -10.22998046875, -9.1650390625, -8.10009765625, -7.03515625, -5.97021484375, -4.9052734375, -3.84033203125, -2.775390625, -1.71044921875, -0.6455078125, 0.41943359375, 1.484375, 2.54931640625, 3.6142578125, 4.67919921875, 5.744140625, 6.80908203125, 7.8740234375, 8.93896484375, 10.00390625, 11.06884765625, 12.1337890625, 13.19873046875, 14.263671875, 15.32861328125, 16.3935546875, 17.45849609375, 18.5234375, 19.58837890625, 20.6533203125, 21.71826171875, 22.783203125, 23.84814453125, 24.9130859375, 25.97802734375, 27.04296875, 28.10791015625, 29.1728515625, 30.23779296875, 31.302734375, 32.36767578125, 33.4326171875, 34.49755859375, 35.5625]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 6.0, 4.0, 15.0, 19.0, 36.0, 44.0, 89.0, 98.0, 129.0, 133.0, 94.0, 114.0, 81.0, 66.0, 32.0, 28.0, 12.0, 9.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-96.24629211425781, -92.76132202148438, -89.2763442993164, -85.79137420654297, -82.306396484375, -78.82142639160156, -75.33645629882812, -71.85148620605469, -68.36650848388672, -64.88153839111328, -61.39656066894531, -57.911590576171875, -54.42661666870117, -50.94164276123047, -47.45667266845703, -43.97169876098633, -40.486724853515625, -37.00175094604492, -33.51677703857422, -30.03180694580078, -26.546833038330078, -23.061859130859375, -19.576887130737305, -16.091915130615234, -12.606941223144531, -9.121968269348145, -5.636995315551758, -2.152022361755371, 1.3329505920410156, 4.817924499511719, 8.302896499633789, 11.78786849975586, 15.272834777832031, 18.757808685302734, 22.242780685424805, 25.727752685546875, 29.212726593017578, 32.69770050048828, 36.18267059326172, 39.66764450073242, 43.152618408203125, 46.63759231567383, 50.12256622314453, 53.60753631591797, 57.09251022338867, 60.577484130859375, 64.06245422363281, 67.54742431640625, 71.03240203857422, 74.51737213134766, 78.00234985351562, 81.48731994628906, 84.9722900390625, 88.45726776123047, 91.9422378540039, 95.42721557617188, 98.91218566894531, 102.39715576171875, 105.88213348388672, 109.36710357666016, 112.85208129882812, 116.33705139160156, 119.822021484375, 123.30699157714844, 126.7919692993164]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 3.0, 4.0, 8.0, 8.0, 4.0, 5.0, 18.0, 19.0, 23.0, 30.0, 10.0, 28.0, 28.0, 24.0, 37.0, 37.0, 51.0, 42.0, 41.0, 39.0, 51.0, 37.0, 44.0, 36.0, 34.0, 32.0, 45.0, 39.0, 32.0, 32.0, 21.0, 25.0, 20.0, 11.0, 23.0, 16.0, 16.0, 5.0, 10.0, 7.0, 8.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-65.80789184570312, -63.88043212890625, -61.95297622680664, -60.02552032470703, -58.098060607910156, -56.17060089111328, -54.24314498901367, -52.31568908691406, -50.38822937011719, -48.46076965332031, -46.5333137512207, -44.605857849121094, -42.67839813232422, -40.750938415527344, -38.823482513427734, -36.896026611328125, -34.96856689453125, -33.041107177734375, -31.113651275634766, -29.186193466186523, -27.25873565673828, -25.33127784729004, -23.403820037841797, -21.476362228393555, -19.548904418945312, -17.62144660949707, -15.693988800048828, -13.766530990600586, -11.839073181152344, -9.911615371704102, -7.984157562255859, -6.056699752807617, -4.129241943359375, -2.201784133911133, -0.2743263244628906, 1.6531314849853516, 3.5805892944335938, 5.508047103881836, 7.435504913330078, 9.36296272277832, 11.290420532226562, 13.217878341674805, 15.145336151123047, 17.07279396057129, 19.00025177001953, 20.927709579467773, 22.855167388916016, 24.782625198364258, 26.7100830078125, 28.637540817260742, 30.564998626708984, 32.492454528808594, 34.41991424560547, 36.347373962402344, 38.27482986450195, 40.20228576660156, 42.12974548339844, 44.05720520019531, 45.98466110229492, 47.91211700439453, 49.839576721191406, 51.76703643798828, 53.69449234008789, 55.6219482421875, 57.549407958984375]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, 7.0, 6.0, 9.0, 10.0, 7.0, 14.0, 15.0, 17.0, 10.0, 22.0, 28.0, 17.0, 31.0, 34.0, 37.0, 42.0, 32.0, 27.0, 53.0, 42.0, 42.0, 50.0, 42.0, 38.0, 33.0, 34.0, 32.0, 34.0, 23.0, 32.0, 27.0, 20.0, 22.0, 15.0, 19.0, 14.0, 13.0, 8.0, 7.0, 9.0, 6.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-7.296875, -7.06591796875, -6.8349609375, -6.60400390625, -6.373046875, -6.14208984375, -5.9111328125, -5.68017578125, -5.44921875, -5.21826171875, -4.9873046875, -4.75634765625, -4.525390625, -4.29443359375, -4.0634765625, -3.83251953125, -3.6015625, -3.37060546875, -3.1396484375, -2.90869140625, -2.677734375, -2.44677734375, -2.2158203125, -1.98486328125, -1.75390625, -1.52294921875, -1.2919921875, -1.06103515625, -0.830078125, -0.59912109375, -0.3681640625, -0.13720703125, 0.09375, 0.32470703125, 0.5556640625, 0.78662109375, 1.017578125, 1.24853515625, 1.4794921875, 1.71044921875, 1.94140625, 2.17236328125, 2.4033203125, 2.63427734375, 2.865234375, 3.09619140625, 3.3271484375, 3.55810546875, 3.7890625, 4.02001953125, 4.2509765625, 4.48193359375, 4.712890625, 4.94384765625, 5.1748046875, 5.40576171875, 5.63671875, 5.86767578125, 6.0986328125, 6.32958984375, 6.560546875, 6.79150390625, 7.0224609375, 7.25341796875, 7.484375]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 8.0, 11.0, 11.0, 20.0, 18.0, 36.0, 68.0, 81.0, 127.0, 164.0, 261.0, 387.0, 575.0, 832.0, 1132.0, 1774.0, 2611.0, 4083.0, 6021.0, 9658.0, 14698.0, 23312.0, 36591.0, 57164.0, 88719.0, 131944.0, 169400.0, 160801.0, 117976.0, 79129.0, 50871.0, 31920.0, 20270.0, 13003.0, 8400.0, 5492.0, 3607.0, 2457.0, 1563.0, 1069.0, 736.0, 485.0, 335.0, 257.0, 148.0, 113.0, 80.0, 50.0, 33.0, 21.0, 16.0, 10.0, 4.0, 5.0, 5.0, 1.0, 3.0], "bins": [-0.578125, -0.5610504150390625, -0.543975830078125, -0.5269012451171875, -0.50982666015625, -0.4927520751953125, -0.475677490234375, -0.4586029052734375, -0.4415283203125, -0.4244537353515625, -0.407379150390625, -0.3903045654296875, -0.37322998046875, -0.3561553955078125, -0.339080810546875, -0.3220062255859375, -0.304931640625, -0.2878570556640625, -0.270782470703125, -0.2537078857421875, -0.23663330078125, -0.2195587158203125, -0.202484130859375, -0.1854095458984375, -0.1683349609375, -0.1512603759765625, -0.134185791015625, -0.1171112060546875, -0.10003662109375, -0.0829620361328125, -0.065887451171875, -0.0488128662109375, -0.03173828125, -0.0146636962890625, 0.002410888671875, 0.0194854736328125, 0.03656005859375, 0.0536346435546875, 0.070709228515625, 0.0877838134765625, 0.1048583984375, 0.1219329833984375, 0.139007568359375, 0.1560821533203125, 0.17315673828125, 0.1902313232421875, 0.207305908203125, 0.2243804931640625, 0.241455078125, 0.2585296630859375, 0.275604248046875, 0.2926788330078125, 0.30975341796875, 0.3268280029296875, 0.343902587890625, 0.3609771728515625, 0.3780517578125, 0.3951263427734375, 0.412200927734375, 0.4292755126953125, 0.44635009765625, 0.4634246826171875, 0.480499267578125, 0.4975738525390625, 0.5146484375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 5.0, 5.0, 2.0, 8.0, 10.0, 9.0, 11.0, 11.0, 14.0, 13.0, 19.0, 18.0, 21.0, 26.0, 29.0, 33.0, 34.0, 36.0, 41.0, 39.0, 44.0, 39.0, 37.0, 1057.0, 34.0, 32.0, 32.0, 38.0, 40.0, 36.0, 32.0, 24.0, 24.0, 23.0, 14.0, 21.0, 19.0, 22.0, 16.0, 7.0, 15.0, 12.0, 5.0, 7.0, 4.0, 5.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0], "bins": [-4.875, -4.73162841796875, -4.5882568359375, -4.44488525390625, -4.301513671875, -4.15814208984375, -4.0147705078125, -3.87139892578125, -3.72802734375, -3.58465576171875, -3.4412841796875, -3.29791259765625, -3.154541015625, -3.01116943359375, -2.8677978515625, -2.72442626953125, -2.5810546875, -2.43768310546875, -2.2943115234375, -2.15093994140625, -2.007568359375, -1.86419677734375, -1.7208251953125, -1.57745361328125, -1.43408203125, -1.29071044921875, -1.1473388671875, -1.00396728515625, -0.860595703125, -0.71722412109375, -0.5738525390625, -0.43048095703125, -0.287109375, -0.14373779296875, -0.0003662109375, 0.14300537109375, 0.286376953125, 0.42974853515625, 0.5731201171875, 0.71649169921875, 0.85986328125, 1.00323486328125, 1.1466064453125, 1.28997802734375, 1.433349609375, 1.57672119140625, 1.7200927734375, 1.86346435546875, 2.0068359375, 2.15020751953125, 2.2935791015625, 2.43695068359375, 2.580322265625, 2.72369384765625, 2.8670654296875, 3.01043701171875, 3.15380859375, 3.29718017578125, 3.4405517578125, 3.58392333984375, 3.727294921875, 3.87066650390625, 4.0140380859375, 4.15740966796875, 4.30078125]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 6.0, 3.0, 5.0, 7.0, 13.0, 13.0, 29.0, 31.0, 50.0, 94.0, 161.0, 216.0, 357.0, 558.0, 910.0, 1484.0, 2495.0, 4175.0, 6664.0, 11552.0, 19976.0, 33695.0, 57113.0, 93644.0, 140579.0, 1152440.0, 241378.0, 126725.0, 81843.0, 49338.0, 29297.0, 16928.0, 10016.0, 5929.0, 3668.0, 2161.0, 1350.0, 844.0, 521.0, 306.0, 186.0, 142.0, 80.0, 55.0, 29.0, 21.0, 24.0, 11.0, 7.0, 6.0, 0.0, 2.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.4306640625, -0.41670989990234375, -0.4027557373046875, -0.38880157470703125, -0.374847412109375, -0.36089324951171875, -0.3469390869140625, -0.33298492431640625, -0.31903076171875, -0.30507659912109375, -0.2911224365234375, -0.27716827392578125, -0.263214111328125, -0.24925994873046875, -0.2353057861328125, -0.22135162353515625, -0.2073974609375, -0.19344329833984375, -0.1794891357421875, -0.16553497314453125, -0.151580810546875, -0.13762664794921875, -0.1236724853515625, -0.10971832275390625, -0.09576416015625, -0.08180999755859375, -0.0678558349609375, -0.05390167236328125, -0.039947509765625, -0.02599334716796875, -0.0120391845703125, 0.00191497802734375, 0.015869140625, 0.02982330322265625, 0.0437774658203125, 0.05773162841796875, 0.071685791015625, 0.08563995361328125, 0.0995941162109375, 0.11354827880859375, 0.12750244140625, 0.14145660400390625, 0.1554107666015625, 0.16936492919921875, 0.183319091796875, 0.19727325439453125, 0.2112274169921875, 0.22518157958984375, 0.2391357421875, 0.25308990478515625, 0.2670440673828125, 0.28099822998046875, 0.294952392578125, 0.30890655517578125, 0.3228607177734375, 0.33681488037109375, 0.35076904296875, 0.36472320556640625, 0.3786773681640625, 0.39263153076171875, 0.406585693359375, 0.42053985595703125, 0.4344940185546875, 0.44844818115234375, 0.46240234375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 1.0, 4.0, 2.0, 1.0, 6.0, 10.0, 14.0, 16.0, 7.0, 11.0, 24.0, 34.0, 44.0, 47.0, 66.0, 92.0, 119.0, 107.0, 93.0, 75.0, 60.0, 35.0, 31.0, 26.0, 21.0, 13.0, 10.0, 2.0, 10.0, 6.0, 4.0, 5.0, 4.0, 2.0, 5.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01184844970703125, -0.011489391326904297, -0.011130332946777344, -0.01077127456665039, -0.010412216186523438, -0.010053157806396484, -0.009694099426269531, -0.009335041046142578, -0.008975982666015625, -0.008616924285888672, -0.008257865905761719, -0.007898807525634766, -0.0075397491455078125, -0.007180690765380859, -0.006821632385253906, -0.006462574005126953, -0.006103515625, -0.005744457244873047, -0.005385398864746094, -0.005026340484619141, -0.0046672821044921875, -0.004308223724365234, -0.003949165344238281, -0.003590106964111328, -0.003231048583984375, -0.002871990203857422, -0.0025129318237304688, -0.0021538734436035156, -0.0017948150634765625, -0.0014357566833496094, -0.0010766983032226562, -0.0007176399230957031, -0.00035858154296875, 4.76837158203125e-07, 0.00035953521728515625, 0.0007185935974121094, 0.0010776519775390625, 0.0014367103576660156, 0.0017957687377929688, 0.002154827117919922, 0.002513885498046875, 0.002872943878173828, 0.0032320022583007812, 0.0035910606384277344, 0.0039501190185546875, 0.004309177398681641, 0.004668235778808594, 0.005027294158935547, 0.0053863525390625, 0.005745410919189453, 0.006104469299316406, 0.006463527679443359, 0.0068225860595703125, 0.007181644439697266, 0.007540702819824219, 0.007899761199951172, 0.008258819580078125, 0.008617877960205078, 0.008976936340332031, 0.009335994720458984, 0.009695053100585938, 0.01005411148071289, 0.010413169860839844, 0.010772228240966797, 0.01113128662109375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 7.0, 6.0, 8.0, 7.0, 10.0, 14.0, 16.0, 22.0, 42.0, 62.0, 79.0, 122.0, 209.0, 417.0, 2669.0, 806166.0, 236312.0, 1478.0, 365.0, 185.0, 100.0, 76.0, 48.0, 36.0, 21.0, 22.0, 13.0, 14.0, 7.0, 6.0, 6.0, 1.0, 5.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.2330322265625, -0.22635650634765625, -0.2196807861328125, -0.21300506591796875, -0.206329345703125, -0.19965362548828125, -0.1929779052734375, -0.18630218505859375, -0.17962646484375, -0.17295074462890625, -0.1662750244140625, -0.15959930419921875, -0.152923583984375, -0.14624786376953125, -0.1395721435546875, -0.13289642333984375, -0.126220703125, -0.11954498291015625, -0.1128692626953125, -0.10619354248046875, -0.099517822265625, -0.09284210205078125, -0.0861663818359375, -0.07949066162109375, -0.07281494140625, -0.06613922119140625, -0.0594635009765625, -0.05278778076171875, -0.046112060546875, -0.03943634033203125, -0.0327606201171875, -0.02608489990234375, -0.0194091796875, -0.01273345947265625, -0.0060577392578125, 0.00061798095703125, 0.007293701171875, 0.01396942138671875, 0.0206451416015625, 0.02732086181640625, 0.03399658203125, 0.04067230224609375, 0.0473480224609375, 0.05402374267578125, 0.060699462890625, 0.06737518310546875, 0.0740509033203125, 0.08072662353515625, 0.08740234375, 0.09407806396484375, 0.1007537841796875, 0.10742950439453125, 0.114105224609375, 0.12078094482421875, 0.1274566650390625, 0.13413238525390625, 0.14080810546875, 0.14748382568359375, 0.1541595458984375, 0.16083526611328125, 0.167510986328125, 0.17418670654296875, 0.1808624267578125, 0.18753814697265625, 0.1942138671875]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 10.0, 172.0, 683.0, 140.0, 12.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08980203419923782, -0.0881827101111412, -0.08656337857246399, -0.08494405448436737, -0.08332473039627075, -0.08170539885759354, -0.08008607476949692, -0.0784667432308197, -0.07684741914272308, -0.07522809505462646, -0.07360876351594925, -0.07198943942785263, -0.07037010788917542, -0.0687507838010788, -0.06713145971298218, -0.06551212817430496, -0.06389280408620834, -0.062273476272821426, -0.06065414845943451, -0.05903482437133789, -0.057415496557950974, -0.055796168744564056, -0.05417684465646744, -0.05255751684308052, -0.050938189029693604, -0.049318861216306686, -0.04769953340291977, -0.04608020931482315, -0.044460881501436234, -0.042841553688049316, -0.0412222295999527, -0.03960290178656578, -0.037983573973178864, -0.036364246159791946, -0.03474491834640503, -0.03312559425830841, -0.031506266444921494, -0.029886938631534576, -0.02826761268079281, -0.02664828673005104, -0.025028957054018974, -0.023409631103277206, -0.02179030328989029, -0.020170975476503372, -0.018551649525761604, -0.016932323575019836, -0.01531299576163292, -0.013693668879568577, -0.012074341997504234, -0.010455015115439892, -0.00883568823337555, -0.007216361351311207, -0.005597034469246864, -0.003977707587182522, -0.0023583807051181793, -0.0007390538230538368, 0.0008802730590105057, 0.002499599941074848, 0.004118926823139191, 0.005738253705203533, 0.007357580587267876, 0.008976907469332218, 0.01059623435139656, 0.012215561233460903, 0.013834888115525246]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 2.0, 5.0, 6.0, 9.0, 6.0, 12.0, 10.0, 17.0, 18.0, 30.0, 23.0, 28.0, 32.0, 25.0, 32.0, 32.0, 44.0, 38.0, 41.0, 50.0, 43.0, 43.0, 35.0, 34.0, 46.0, 34.0, 32.0, 44.0, 33.0, 30.0, 36.0, 19.0, 17.0, 18.0, 14.0, 15.0, 9.0, 8.0, 8.0, 7.0, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.005070328712463379, -0.004917294718325138, -0.004764260724186897, -0.0046112267300486565, -0.004458192735910416, -0.004305158741772175, -0.004152124747633934, -0.003999090753495693, -0.0038460567593574524, -0.0036930227652192116, -0.0035399887710809708, -0.00338695477694273, -0.003233920782804489, -0.0030808867886662483, -0.0029278527945280075, -0.0027748188003897667, -0.002621784806251526, -0.002468750812113285, -0.0023157168179750443, -0.0021626828238368034, -0.0020096488296985626, -0.0018566148355603218, -0.001703580841422081, -0.0015505468472838402, -0.0013975128531455994, -0.0012444788590073586, -0.0010914448648691177, -0.0009384108707308769, -0.0007853768765926361, -0.0006323428824543953, -0.0004793088883161545, -0.00032627489417791367, -0.00017324090003967285, -2.0206905901432037e-05, 0.00013282708823680878, 0.0002858610823750496, 0.0004388950765132904, 0.0005919290706515312, 0.000744963064789772, 0.0008979970589280128, 0.0010510310530662537, 0.0012040650472044945, 0.0013570990413427353, 0.001510133035480976, 0.001663167029619217, 0.0018162010237574577, 0.0019692350178956985, 0.0021222690120339394, 0.00227530300617218, 0.002428337000310421, 0.002581370994448662, 0.0027344049885869026, 0.0028874389827251434, 0.0030404729768633842, 0.003193506971001625, 0.003346540965139866, 0.0034995749592781067, 0.0036526089534163475, 0.0038056429475545883, 0.003958676941692829, 0.00411171093583107, 0.004264744929969311, 0.004417778924107552, 0.004570812918245792, 0.004723846912384033]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, 7.0, 6.0, 9.0, 10.0, 7.0, 14.0, 15.0, 17.0, 10.0, 22.0, 28.0, 17.0, 31.0, 34.0, 37.0, 42.0, 32.0, 27.0, 53.0, 42.0, 42.0, 50.0, 42.0, 38.0, 33.0, 34.0, 32.0, 34.0, 23.0, 32.0, 27.0, 20.0, 22.0, 15.0, 19.0, 14.0, 13.0, 9.0, 6.0, 9.0, 6.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-7.296875, -7.06591796875, -6.8349609375, -6.60400390625, -6.373046875, -6.14208984375, -5.9111328125, -5.68017578125, -5.44921875, -5.21826171875, -4.9873046875, -4.75634765625, -4.525390625, -4.29443359375, -4.0634765625, -3.83251953125, -3.6015625, -3.37060546875, -3.1396484375, -2.90869140625, -2.677734375, -2.44677734375, -2.2158203125, -1.98486328125, -1.75390625, -1.52294921875, -1.2919921875, -1.06103515625, -0.830078125, -0.59912109375, -0.3681640625, -0.13720703125, 0.09375, 0.32470703125, 0.5556640625, 0.78662109375, 1.017578125, 1.24853515625, 1.4794921875, 1.71044921875, 1.94140625, 2.17236328125, 2.4033203125, 2.63427734375, 2.865234375, 3.09619140625, 3.3271484375, 3.55810546875, 3.7890625, 4.02001953125, 4.2509765625, 4.48193359375, 4.712890625, 4.94384765625, 5.1748046875, 5.40576171875, 5.63671875, 5.86767578125, 6.0986328125, 6.32958984375, 6.560546875, 6.79150390625, 7.0224609375, 7.25341796875, 7.484375]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 6.0, 5.0, 11.0, 11.0, 17.0, 16.0, 22.0, 49.0, 68.0, 90.0, 115.0, 199.0, 270.0, 407.0, 698.0, 997.0, 1558.0, 2465.0, 3840.0, 6382.0, 11881.0, 25796.0, 72131.0, 284989.0, 456657.0, 108332.0, 35648.0, 15370.0, 8069.0, 4412.0, 2816.0, 1802.0, 1159.0, 761.0, 505.0, 355.0, 235.0, 142.0, 90.0, 53.0, 36.0, 27.0, 25.0, 21.0, 8.0, 4.0, 4.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.42578125, -7.1949462890625, -6.964111328125, -6.7332763671875, -6.50244140625, -6.2716064453125, -6.040771484375, -5.8099365234375, -5.5791015625, -5.3482666015625, -5.117431640625, -4.8865966796875, -4.65576171875, -4.4249267578125, -4.194091796875, -3.9632568359375, -3.732421875, -3.5015869140625, -3.270751953125, -3.0399169921875, -2.80908203125, -2.5782470703125, -2.347412109375, -2.1165771484375, -1.8857421875, -1.6549072265625, -1.424072265625, -1.1932373046875, -0.96240234375, -0.7315673828125, -0.500732421875, -0.2698974609375, -0.0390625, 0.1917724609375, 0.422607421875, 0.6534423828125, 0.88427734375, 1.1151123046875, 1.345947265625, 1.5767822265625, 1.8076171875, 2.0384521484375, 2.269287109375, 2.5001220703125, 2.73095703125, 2.9617919921875, 3.192626953125, 3.4234619140625, 3.654296875, 3.8851318359375, 4.115966796875, 4.3468017578125, 4.57763671875, 4.8084716796875, 5.039306640625, 5.2701416015625, 5.5009765625, 5.7318115234375, 5.962646484375, 6.1934814453125, 6.42431640625, 6.6551513671875, 6.885986328125, 7.1168212890625, 7.34765625]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 7.0, 4.0, 5.0, 1.0, 5.0, 2.0, 5.0, 11.0, 11.0, 10.0, 15.0, 16.0, 14.0, 24.0, 29.0, 25.0, 27.0, 34.0, 42.0, 50.0, 50.0, 72.0, 89.0, 457.0, 1492.0, 122.0, 66.0, 63.0, 30.0, 38.0, 45.0, 34.0, 32.0, 16.0, 25.0, 18.0, 16.0, 16.0, 12.0, 6.0, 5.0, 7.0, 1.0, 4.0, 0.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.609375, -25.722900390625, -24.83642578125, -23.949951171875, -23.0634765625, -22.177001953125, -21.29052734375, -20.404052734375, -19.517578125, -18.631103515625, -17.74462890625, -16.858154296875, -15.9716796875, -15.085205078125, -14.19873046875, -13.312255859375, -12.42578125, -11.539306640625, -10.65283203125, -9.766357421875, -8.8798828125, -7.993408203125, -7.10693359375, -6.220458984375, -5.333984375, -4.447509765625, -3.56103515625, -2.674560546875, -1.7880859375, -0.901611328125, -0.01513671875, 0.871337890625, 1.7578125, 2.644287109375, 3.53076171875, 4.417236328125, 5.3037109375, 6.190185546875, 7.07666015625, 7.963134765625, 8.849609375, 9.736083984375, 10.62255859375, 11.509033203125, 12.3955078125, 13.281982421875, 14.16845703125, 15.054931640625, 15.94140625, 16.827880859375, 17.71435546875, 18.600830078125, 19.4873046875, 20.373779296875, 21.26025390625, 22.146728515625, 23.033203125, 23.919677734375, 24.80615234375, 25.692626953125, 26.5791015625, 27.465576171875, 28.35205078125, 29.238525390625, 30.125]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 7.0, 2.0, 6.0, 6.0, 7.0, 5.0, 11.0, 17.0, 21.0, 33.0, 40.0, 46.0, 68.0, 95.0, 133.0, 273.0, 501.0, 1874.0, 20264.0, 3030971.0, 85883.0, 3770.0, 681.0, 360.0, 175.0, 119.0, 84.0, 60.0, 51.0, 26.0, 22.0, 21.0, 13.0, 17.0, 12.0, 6.0, 4.0, 4.0, 7.0, 6.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-61.5, -59.7265625, -57.953125, -56.1796875, -54.40625, -52.6328125, -50.859375, -49.0859375, -47.3125, -45.5390625, -43.765625, -41.9921875, -40.21875, -38.4453125, -36.671875, -34.8984375, -33.125, -31.3515625, -29.578125, -27.8046875, -26.03125, -24.2578125, -22.484375, -20.7109375, -18.9375, -17.1640625, -15.390625, -13.6171875, -11.84375, -10.0703125, -8.296875, -6.5234375, -4.75, -2.9765625, -1.203125, 0.5703125, 2.34375, 4.1171875, 5.890625, 7.6640625, 9.4375, 11.2109375, 12.984375, 14.7578125, 16.53125, 18.3046875, 20.078125, 21.8515625, 23.625, 25.3984375, 27.171875, 28.9453125, 30.71875, 32.4921875, 34.265625, 36.0390625, 37.8125, 39.5859375, 41.359375, 43.1328125, 44.90625, 46.6796875, 48.453125, 50.2265625, 52.0]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 22.0, 624.0, 361.0, 11.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.44829559326172, -67.84322357177734, -59.23815155029297, -50.63307571411133, -42.02800369262695, -33.42293167114258, -24.817855834960938, -16.212783813476562, -7.6077117919921875, 0.9973611831665039, 9.602434158325195, 18.207508087158203, 26.812580108642578, 35.41765213012695, 44.022727966308594, 52.62779998779297, 61.232872009277344, 69.83794403076172, 78.4430160522461, 87.048095703125, 95.65316772460938, 104.25823974609375, 112.86331176757812, 121.4683837890625, 130.07345581054688, 138.67852783203125, 147.28359985351562, 155.888671875, 164.49374389648438, 173.09881591796875, 181.70388793945312, 190.3089599609375, 198.91403198242188, 207.51910400390625, 216.12417602539062, 224.729248046875, 233.33432006835938, 241.93939208984375, 250.54446411132812, 259.1495361328125, 267.7546081542969, 276.35968017578125, 284.9647521972656, 293.56982421875, 302.1748962402344, 310.77996826171875, 319.3850402832031, 327.9901123046875, 336.59521484375, 345.2002868652344, 353.80535888671875, 362.4104309082031, 371.0155029296875, 379.6205749511719, 388.22564697265625, 396.8307189941406, 405.435791015625, 414.0408630371094, 422.64593505859375, 431.2510070800781, 439.8560791015625, 448.4611511230469, 457.06622314453125, 465.6712951660156, 474.2763671875]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 6.0, 3.0, 5.0, 6.0, 8.0, 9.0, 8.0, 15.0, 17.0, 15.0, 24.0, 19.0, 23.0, 21.0, 34.0, 31.0, 39.0, 30.0, 37.0, 37.0, 40.0, 30.0, 41.0, 34.0, 43.0, 38.0, 30.0, 32.0, 34.0, 26.0, 34.0, 33.0, 30.0, 27.0, 22.0, 24.0, 15.0, 17.0, 8.0, 13.0, 12.0, 9.0, 7.0, 4.0, 2.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0], "bins": [-74.39491271972656, -72.0333480834961, -69.6717758178711, -67.31021118164062, -64.94864654541016, -62.58707809448242, -60.22550964355469, -57.86394500732422, -55.502376556396484, -53.14080810546875, -50.77924346923828, -48.41767501831055, -46.05610656738281, -43.694541931152344, -41.33297348022461, -38.971405029296875, -36.609840393066406, -34.24827194213867, -31.886707305908203, -29.52513885498047, -27.163572311401367, -24.802005767822266, -22.44043731689453, -20.07887077331543, -17.717304229736328, -15.355737686157227, -12.994170188903809, -10.63260269165039, -8.271036148071289, -5.9094696044921875, -3.5479021072387695, -1.1863346099853516, 1.1752243041992188, 3.5367913246154785, 5.898358345031738, 8.259925842285156, 10.621492385864258, 12.98305892944336, 15.344626426696777, 17.706193923950195, 20.067760467529297, 22.4293270111084, 24.7908935546875, 27.152462005615234, 29.514028549194336, 31.875595092773438, 34.23716354370117, 36.598731994628906, 38.960296630859375, 41.32186508178711, 43.68342971801758, 46.04499816894531, 48.40656280517578, 50.768131256103516, 53.12969970703125, 55.49126434326172, 57.85283279418945, 60.21440124511719, 62.575965881347656, 64.93753051757812, 67.29910278320312, 69.6606674194336, 72.02223205566406, 74.38380432128906, 76.74536895751953]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 4.0, 4.0, 6.0, 9.0, 7.0, 7.0, 9.0, 18.0, 13.0, 13.0, 10.0, 18.0, 29.0, 28.0, 38.0, 34.0, 31.0, 40.0, 37.0, 35.0, 36.0, 39.0, 50.0, 40.0, 40.0, 41.0, 44.0, 34.0, 30.0, 32.0, 31.0, 33.0, 22.0, 27.0, 18.0, 12.0, 10.0, 23.0, 11.0, 8.0, 6.0, 7.0, 5.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-7.60546875, -7.364013671875, -7.12255859375, -6.881103515625, -6.6396484375, -6.398193359375, -6.15673828125, -5.915283203125, -5.673828125, -5.432373046875, -5.19091796875, -4.949462890625, -4.7080078125, -4.466552734375, -4.22509765625, -3.983642578125, -3.7421875, -3.500732421875, -3.25927734375, -3.017822265625, -2.7763671875, -2.534912109375, -2.29345703125, -2.052001953125, -1.810546875, -1.569091796875, -1.32763671875, -1.086181640625, -0.8447265625, -0.603271484375, -0.36181640625, -0.120361328125, 0.12109375, 0.362548828125, 0.60400390625, 0.845458984375, 1.0869140625, 1.328369140625, 1.56982421875, 1.811279296875, 2.052734375, 2.294189453125, 2.53564453125, 2.777099609375, 3.0185546875, 3.260009765625, 3.50146484375, 3.742919921875, 3.984375, 4.225830078125, 4.46728515625, 4.708740234375, 4.9501953125, 5.191650390625, 5.43310546875, 5.674560546875, 5.916015625, 6.157470703125, 6.39892578125, 6.640380859375, 6.8818359375, 7.123291015625, 7.36474609375, 7.606201171875, 7.84765625]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 5.0, 5.0, 6.0, 6.0, 10.0, 14.0, 21.0, 41.0, 47.0, 73.0, 130.0, 192.0, 298.0, 540.0, 1009.0, 1987.0, 3881.0, 8435.0, 18866.0, 44693.0, 119238.0, 368308.0, 1062786.0, 1474788.0, 721273.0, 230729.0, 79526.0, 31275.0, 13562.0, 6160.0, 2980.0, 1525.0, 787.0, 418.0, 262.0, 156.0, 85.0, 54.0, 39.0, 29.0, 17.0, 12.0, 8.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.75, -9.4481201171875, -9.146240234375, -8.8443603515625, -8.54248046875, -8.2406005859375, -7.938720703125, -7.6368408203125, -7.3349609375, -7.0330810546875, -6.731201171875, -6.4293212890625, -6.12744140625, -5.8255615234375, -5.523681640625, -5.2218017578125, -4.919921875, -4.6180419921875, -4.316162109375, -4.0142822265625, -3.71240234375, -3.4105224609375, -3.108642578125, -2.8067626953125, -2.5048828125, -2.2030029296875, -1.901123046875, -1.5992431640625, -1.29736328125, -0.9954833984375, -0.693603515625, -0.3917236328125, -0.08984375, 0.2120361328125, 0.513916015625, 0.8157958984375, 1.11767578125, 1.4195556640625, 1.721435546875, 2.0233154296875, 2.3251953125, 2.6270751953125, 2.928955078125, 3.2308349609375, 3.53271484375, 3.8345947265625, 4.136474609375, 4.4383544921875, 4.740234375, 5.0421142578125, 5.343994140625, 5.6458740234375, 5.94775390625, 6.2496337890625, 6.551513671875, 6.8533935546875, 7.1552734375, 7.4571533203125, 7.759033203125, 8.0609130859375, 8.36279296875, 8.6646728515625, 8.966552734375, 9.2684326171875, 9.5703125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 3.0, 5.0, 2.0, 4.0, 8.0, 12.0, 18.0, 16.0, 19.0, 39.0, 32.0, 48.0, 72.0, 96.0, 145.0, 185.0, 198.0, 272.0, 360.0, 423.0, 418.0, 403.0, 295.0, 234.0, 187.0, 140.0, 120.0, 81.0, 61.0, 56.0, 47.0, 21.0, 20.0, 14.0, 10.0, 5.0, 1.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.6875, -20.1314697265625, -19.575439453125, -19.0194091796875, -18.46337890625, -17.9073486328125, -17.351318359375, -16.7952880859375, -16.2392578125, -15.6832275390625, -15.127197265625, -14.5711669921875, -14.01513671875, -13.4591064453125, -12.903076171875, -12.3470458984375, -11.791015625, -11.2349853515625, -10.678955078125, -10.1229248046875, -9.56689453125, -9.0108642578125, -8.454833984375, -7.8988037109375, -7.3427734375, -6.7867431640625, -6.230712890625, -5.6746826171875, -5.11865234375, -4.5626220703125, -4.006591796875, -3.4505615234375, -2.89453125, -2.3385009765625, -1.782470703125, -1.2264404296875, -0.67041015625, -0.1143798828125, 0.441650390625, 0.9976806640625, 1.5537109375, 2.1097412109375, 2.665771484375, 3.2218017578125, 3.77783203125, 4.3338623046875, 4.889892578125, 5.4459228515625, 6.001953125, 6.5579833984375, 7.114013671875, 7.6700439453125, 8.22607421875, 8.7821044921875, 9.338134765625, 9.8941650390625, 10.4501953125, 11.0062255859375, 11.562255859375, 12.1182861328125, 12.67431640625, 13.2303466796875, 13.786376953125, 14.3424072265625, 14.8984375]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 5.0, 4.0, 9.0, 6.0, 3.0, 23.0, 27.0, 26.0, 30.0, 46.0, 76.0, 154.0, 256.0, 585.0, 1739.0, 6601.0, 33569.0, 237770.0, 2194158.0, 1529738.0, 158132.0, 23822.0, 4996.0, 1362.0, 476.0, 221.0, 123.0, 97.0, 64.0, 40.0, 31.0, 11.0, 24.0, 16.0, 12.0, 11.0, 3.0, 4.0, 5.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.15625, -31.11572265625, -30.0751953125, -29.03466796875, -27.994140625, -26.95361328125, -25.9130859375, -24.87255859375, -23.83203125, -22.79150390625, -21.7509765625, -20.71044921875, -19.669921875, -18.62939453125, -17.5888671875, -16.54833984375, -15.5078125, -14.46728515625, -13.4267578125, -12.38623046875, -11.345703125, -10.30517578125, -9.2646484375, -8.22412109375, -7.18359375, -6.14306640625, -5.1025390625, -4.06201171875, -3.021484375, -1.98095703125, -0.9404296875, 0.10009765625, 1.140625, 2.18115234375, 3.2216796875, 4.26220703125, 5.302734375, 6.34326171875, 7.3837890625, 8.42431640625, 9.46484375, 10.50537109375, 11.5458984375, 12.58642578125, 13.626953125, 14.66748046875, 15.7080078125, 16.74853515625, 17.7890625, 18.82958984375, 19.8701171875, 20.91064453125, 21.951171875, 22.99169921875, 24.0322265625, 25.07275390625, 26.11328125, 27.15380859375, 28.1943359375, 29.23486328125, 30.275390625, 31.31591796875, 32.3564453125, 33.39697265625, 34.4375]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 9.0, 14.0, 47.0, 85.0, 150.0, 213.0, 213.0, 110.0, 106.0, 43.0, 16.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-280.083740234375, -274.2662658691406, -268.4487609863281, -262.63128662109375, -256.8138122558594, -250.99632263183594, -245.1788330078125, -239.36135864257812, -233.54388427734375, -227.7263946533203, -221.90892028808594, -216.0914306640625, -210.27395629882812, -204.4564666748047, -198.63897705078125, -192.82150268554688, -187.00401306152344, -181.1865234375, -175.36904907226562, -169.5515594482422, -163.7340850830078, -157.91659545898438, -152.09912109375, -146.28163146972656, -140.46414184570312, -134.6466522216797, -128.8291778564453, -123.01168823242188, -117.1942138671875, -111.37672424316406, -105.55924224853516, -99.74176025390625, -93.92427825927734, -88.10679626464844, -82.28931427001953, -76.47183227539062, -70.65434265136719, -64.83686828613281, -59.019378662109375, -53.20189666748047, -47.38441467285156, -41.566932678222656, -35.74945068359375, -29.931964874267578, -24.114482879638672, -18.297000885009766, -12.479515075683594, -6.6620330810546875, -0.8445510864257812, 4.972931861877441, 10.790414810180664, 16.607898712158203, 22.42538070678711, 28.242862701416016, 34.06034851074219, 39.877830505371094, 45.6953125, 51.512794494628906, 57.33027648925781, 63.147762298583984, 68.96524047851562, 74.78273010253906, 80.60021209716797, 86.41769409179688, 92.23517608642578]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 3.0, 7.0, 9.0, 8.0, 7.0, 15.0, 27.0, 15.0, 21.0, 19.0, 15.0, 31.0, 28.0, 51.0, 42.0, 38.0, 45.0, 46.0, 38.0, 56.0, 52.0, 48.0, 34.0, 42.0, 40.0, 28.0, 39.0, 29.0, 35.0, 30.0, 21.0, 13.0, 18.0, 14.0, 12.0, 8.0, 3.0, 5.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-69.48068237304688, -67.33245086669922, -65.18421936035156, -63.03598403930664, -60.887752532958984, -58.73952102661133, -56.591285705566406, -54.44305419921875, -52.294822692871094, -50.14659118652344, -47.99835968017578, -45.85012435913086, -43.7018928527832, -41.55366134643555, -39.405426025390625, -37.25719451904297, -35.10896301269531, -32.960731506347656, -30.812498092651367, -28.664264678955078, -26.516033172607422, -24.367801666259766, -22.219568252563477, -20.071334838867188, -17.92310333251953, -15.774870872497559, -13.626638412475586, -11.478405952453613, -9.33017349243164, -7.181941032409668, -5.033708572387695, -2.8854761123657227, -0.73724365234375, 1.4109888076782227, 3.5592212677001953, 5.707453727722168, 7.855686187744141, 10.003918647766113, 12.152151107788086, 14.300383567810059, 16.44861602783203, 18.596847534179688, 20.745080947875977, 22.893314361572266, 25.041545867919922, 27.189777374267578, 29.338010787963867, 31.486244201660156, 33.63447570800781, 35.78270721435547, 37.930938720703125, 40.07917404174805, 42.2274055480957, 44.37563705444336, 46.52387237548828, 48.67210388183594, 50.820335388183594, 52.96856689453125, 55.116798400878906, 57.26503372192383, 59.413265228271484, 61.56149673461914, 63.70973205566406, 65.85796356201172, 68.00619506835938]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 8.0, 4.0, 5.0, 7.0, 9.0, 4.0, 7.0, 14.0, 18.0, 16.0, 14.0, 21.0, 20.0, 21.0, 33.0, 40.0, 31.0, 42.0, 36.0, 24.0, 52.0, 41.0, 54.0, 49.0, 40.0, 50.0, 38.0, 35.0, 27.0, 32.0, 35.0, 23.0, 25.0, 26.0, 19.0, 16.0, 12.0, 12.0, 12.0, 6.0, 7.0, 3.0, 5.0, 3.0, 1.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.45703125, -7.20367431640625, -6.9503173828125, -6.69696044921875, -6.443603515625, -6.19024658203125, -5.9368896484375, -5.68353271484375, -5.43017578125, -5.17681884765625, -4.9234619140625, -4.67010498046875, -4.416748046875, -4.16339111328125, -3.9100341796875, -3.65667724609375, -3.4033203125, -3.14996337890625, -2.8966064453125, -2.64324951171875, -2.389892578125, -2.13653564453125, -1.8831787109375, -1.62982177734375, -1.37646484375, -1.12310791015625, -0.8697509765625, -0.61639404296875, -0.363037109375, -0.10968017578125, 0.1436767578125, 0.39703369140625, 0.650390625, 0.90374755859375, 1.1571044921875, 1.41046142578125, 1.663818359375, 1.91717529296875, 2.1705322265625, 2.42388916015625, 2.67724609375, 2.93060302734375, 3.1839599609375, 3.43731689453125, 3.690673828125, 3.94403076171875, 4.1973876953125, 4.45074462890625, 4.7041015625, 4.95745849609375, 5.2108154296875, 5.46417236328125, 5.717529296875, 5.97088623046875, 6.2242431640625, 6.47760009765625, 6.73095703125, 6.98431396484375, 7.2376708984375, 7.49102783203125, 7.744384765625, 7.99774169921875, 8.2510986328125, 8.50445556640625, 8.7578125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 7.0, 16.0, 27.0, 34.0, 56.0, 117.0, 162.0, 168.0, 318.0, 461.0, 681.0, 951.0, 1363.0, 1931.0, 2802.0, 3969.0, 6015.0, 8614.0, 12597.0, 18359.0, 27498.0, 39687.0, 59133.0, 87003.0, 121040.0, 149339.0, 144519.0, 112661.0, 79449.0, 53941.0, 36977.0, 24941.0, 16879.0, 11403.0, 7803.0, 5382.0, 3870.0, 2587.0, 1793.0, 1290.0, 873.0, 576.0, 405.0, 296.0, 197.0, 154.0, 89.0, 53.0, 31.0, 23.0, 8.0, 8.0, 4.0, 1.0, 2.0, 1.0], "bins": [-0.5595703125, -0.5430564880371094, -0.5265426635742188, -0.5100288391113281, -0.4935150146484375, -0.4770011901855469, -0.46048736572265625, -0.4439735412597656, -0.427459716796875, -0.4109458923339844, -0.39443206787109375, -0.3779182434082031, -0.3614044189453125, -0.3448905944824219, -0.32837677001953125, -0.3118629455566406, -0.29534912109375, -0.2788352966308594, -0.26232147216796875, -0.24580764770507812, -0.2292938232421875, -0.21277999877929688, -0.19626617431640625, -0.17975234985351562, -0.163238525390625, -0.14672470092773438, -0.13021087646484375, -0.11369705200195312, -0.0971832275390625, -0.08066940307617188, -0.06415557861328125, -0.047641754150390625, -0.0311279296875, -0.014614105224609375, 0.00189971923828125, 0.018413543701171875, 0.0349273681640625, 0.051441192626953125, 0.06795501708984375, 0.08446884155273438, 0.100982666015625, 0.11749649047851562, 0.13401031494140625, 0.15052413940429688, 0.1670379638671875, 0.18355178833007812, 0.20006561279296875, 0.21657943725585938, 0.23309326171875, 0.24960708618164062, 0.26612091064453125, 0.2826347351074219, 0.2991485595703125, 0.3156623840332031, 0.33217620849609375, 0.3486900329589844, 0.365203857421875, 0.3817176818847656, 0.39823150634765625, 0.4147453308105469, 0.4312591552734375, 0.4477729797363281, 0.46428680419921875, 0.4808006286621094, 0.497314453125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 3.0, 5.0, 3.0, 11.0, 3.0, 10.0, 15.0, 11.0, 13.0, 14.0, 16.0, 28.0, 25.0, 33.0, 33.0, 26.0, 40.0, 36.0, 36.0, 57.0, 55.0, 1070.0, 52.0, 50.0, 49.0, 40.0, 35.0, 31.0, 31.0, 23.0, 36.0, 29.0, 26.0, 17.0, 12.0, 14.0, 8.0, 7.0, 7.0, 4.0, 7.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-6.18359375, -6.01458740234375, -5.8455810546875, -5.67657470703125, -5.507568359375, -5.33856201171875, -5.1695556640625, -5.00054931640625, -4.83154296875, -4.66253662109375, -4.4935302734375, -4.32452392578125, -4.155517578125, -3.98651123046875, -3.8175048828125, -3.64849853515625, -3.4794921875, -3.31048583984375, -3.1414794921875, -2.97247314453125, -2.803466796875, -2.63446044921875, -2.4654541015625, -2.29644775390625, -2.12744140625, -1.95843505859375, -1.7894287109375, -1.62042236328125, -1.451416015625, -1.28240966796875, -1.1134033203125, -0.94439697265625, -0.775390625, -0.60638427734375, -0.4373779296875, -0.26837158203125, -0.099365234375, 0.06964111328125, 0.2386474609375, 0.40765380859375, 0.57666015625, 0.74566650390625, 0.9146728515625, 1.08367919921875, 1.252685546875, 1.42169189453125, 1.5906982421875, 1.75970458984375, 1.9287109375, 2.09771728515625, 2.2667236328125, 2.43572998046875, 2.604736328125, 2.77374267578125, 2.9427490234375, 3.11175537109375, 3.28076171875, 3.44976806640625, 3.6187744140625, 3.78778076171875, 3.956787109375, 4.12579345703125, 4.2947998046875, 4.46380615234375, 4.6328125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 6.0, 6.0, 5.0, 18.0, 24.0, 45.0, 43.0, 77.0, 108.0, 129.0, 223.0, 315.0, 530.0, 784.0, 1207.0, 1827.0, 2910.0, 4670.0, 7056.0, 11765.0, 18736.0, 29377.0, 46086.0, 70792.0, 102791.0, 139155.0, 1192414.0, 148541.0, 107813.0, 74735.0, 49143.0, 31318.0, 19846.0, 12652.0, 7885.0, 5083.0, 3112.0, 2028.0, 1335.0, 847.0, 535.0, 374.0, 231.0, 160.0, 132.0, 95.0, 57.0, 40.0, 24.0, 12.0, 12.0, 7.0, 8.0, 6.0, 1.0, 0.0, 4.0, 3.0], "bins": [-0.408203125, -0.3956718444824219, -0.38314056396484375, -0.3706092834472656, -0.3580780029296875, -0.3455467224121094, -0.33301544189453125, -0.3204841613769531, -0.307952880859375, -0.2954216003417969, -0.28289031982421875, -0.2703590393066406, -0.2578277587890625, -0.24529647827148438, -0.23276519775390625, -0.22023391723632812, -0.20770263671875, -0.19517135620117188, -0.18264007568359375, -0.17010879516601562, -0.1575775146484375, -0.14504623413085938, -0.13251495361328125, -0.11998367309570312, -0.107452392578125, -0.09492111206054688, -0.08238983154296875, -0.06985855102539062, -0.0573272705078125, -0.044795989990234375, -0.03226470947265625, -0.019733428955078125, -0.0072021484375, 0.005329132080078125, 0.01786041259765625, 0.030391693115234375, 0.0429229736328125, 0.055454254150390625, 0.06798553466796875, 0.08051681518554688, 0.093048095703125, 0.10557937622070312, 0.11811065673828125, 0.13064193725585938, 0.1431732177734375, 0.15570449829101562, 0.16823577880859375, 0.18076705932617188, 0.19329833984375, 0.20582962036132812, 0.21836090087890625, 0.23089218139648438, 0.2434234619140625, 0.2559547424316406, 0.26848602294921875, 0.2810173034667969, 0.293548583984375, 0.3060798645019531, 0.31861114501953125, 0.3311424255371094, 0.3436737060546875, 0.3562049865722656, 0.36873626708984375, 0.3812675476074219, 0.393798828125]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 2.0, 4.0, 3.0, 7.0, 5.0, 6.0, 5.0, 11.0, 14.0, 10.0, 15.0, 21.0, 26.0, 45.0, 31.0, 50.0, 64.0, 67.0, 91.0, 102.0, 83.0, 60.0, 51.0, 48.0, 36.0, 30.0, 25.0, 20.0, 12.0, 18.0, 9.0, 9.0, 7.0, 8.0, 2.0, 7.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00970458984375, -0.009337902069091797, -0.008971214294433594, -0.00860452651977539, -0.008237838745117188, -0.007871150970458984, -0.007504463195800781, -0.007137775421142578, -0.006771087646484375, -0.006404399871826172, -0.006037712097167969, -0.005671024322509766, -0.0053043365478515625, -0.004937648773193359, -0.004570960998535156, -0.004204273223876953, -0.00383758544921875, -0.003470897674560547, -0.0031042098999023438, -0.0027375221252441406, -0.0023708343505859375, -0.0020041465759277344, -0.0016374588012695312, -0.0012707710266113281, -0.000904083251953125, -0.0005373954772949219, -0.00017070770263671875, 0.00019598007202148438, 0.0005626678466796875, 0.0009293556213378906, 0.0012960433959960938, 0.0016627311706542969, 0.0020294189453125, 0.002396106719970703, 0.0027627944946289062, 0.0031294822692871094, 0.0034961700439453125, 0.0038628578186035156, 0.004229545593261719, 0.004596233367919922, 0.004962921142578125, 0.005329608917236328, 0.005696296691894531, 0.006062984466552734, 0.0064296722412109375, 0.006796360015869141, 0.007163047790527344, 0.007529735565185547, 0.00789642333984375, 0.008263111114501953, 0.008629798889160156, 0.00899648666381836, 0.009363174438476562, 0.009729862213134766, 0.010096549987792969, 0.010463237762451172, 0.010829925537109375, 0.011196613311767578, 0.011563301086425781, 0.011929988861083984, 0.012296676635742188, 0.01266336441040039, 0.013030052185058594, 0.013396739959716797, 0.013763427734375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 7.0, 3.0, 4.0, 8.0, 10.0, 13.0, 16.0, 32.0, 45.0, 61.0, 88.0, 133.0, 234.0, 455.0, 2167.0, 571993.0, 470128.0, 2070.0, 430.0, 235.0, 133.0, 87.0, 69.0, 33.0, 25.0, 20.0, 16.0, 8.0, 6.0, 5.0, 8.0, 4.0, 6.0, 5.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2432861328125, -0.2361278533935547, -0.22896957397460938, -0.22181129455566406, -0.21465301513671875, -0.20749473571777344, -0.20033645629882812, -0.1931781768798828, -0.1860198974609375, -0.1788616180419922, -0.17170333862304688, -0.16454505920410156, -0.15738677978515625, -0.15022850036621094, -0.14307022094726562, -0.1359119415283203, -0.128753662109375, -0.12159538269042969, -0.11443710327148438, -0.10727882385253906, -0.10012054443359375, -0.09296226501464844, -0.08580398559570312, -0.07864570617675781, -0.0714874267578125, -0.06432914733886719, -0.057170867919921875, -0.05001258850097656, -0.04285430908203125, -0.03569602966308594, -0.028537750244140625, -0.021379470825195312, -0.01422119140625, -0.0070629119873046875, 9.5367431640625e-05, 0.0072536468505859375, 0.01441192626953125, 0.021570205688476562, 0.028728485107421875, 0.03588676452636719, 0.0430450439453125, 0.05020332336425781, 0.057361602783203125, 0.06451988220214844, 0.07167816162109375, 0.07883644104003906, 0.08599472045898438, 0.09315299987792969, 0.100311279296875, 0.10746955871582031, 0.11462783813476562, 0.12178611755371094, 0.12894439697265625, 0.13610267639160156, 0.14326095581054688, 0.1504192352294922, 0.1575775146484375, 0.1647357940673828, 0.17189407348632812, 0.17905235290527344, 0.18621063232421875, 0.19336891174316406, 0.20052719116210938, 0.2076854705810547, 0.21484375]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 40.0, 348.0, 496.0, 118.0, 12.0, 0.0, 1.0, 1.0], "bins": [-0.07492337375879288, -0.07363671809434891, -0.07235006242990494, -0.07106341421604156, -0.0697767585515976, -0.06849010288715363, -0.06720344722270966, -0.06591679155826569, -0.06463013589382172, -0.06334348022937775, -0.062056828290224075, -0.060770172625780106, -0.059483520686626434, -0.058196865022182465, -0.056910209357738495, -0.055623553693294525, -0.05433690547943115, -0.05305024981498718, -0.05176359787583351, -0.05047694221138954, -0.04919029027223587, -0.0479036346077919, -0.04661697894334793, -0.04533032327890396, -0.04404367133975029, -0.04275701567530632, -0.04147036373615265, -0.04018370807170868, -0.03889705240726471, -0.03761040046811104, -0.03632374480366707, -0.0350370928645134, -0.03375043720006943, -0.03246378153562546, -0.031177129596471786, -0.029890473932027817, -0.028603820130228996, -0.027317166328430176, -0.026030510663986206, -0.024743856862187386, -0.023457204923033714, -0.022170551121234894, -0.020883895456790924, -0.019597241654992104, -0.018310587853193283, -0.017023934051394463, -0.015737280249595642, -0.014450624585151672, -0.013163970783352852, -0.011877316981554031, -0.010590662248432636, -0.009304007515311241, -0.00801735371351242, -0.006730699446052313, -0.005444045178592205, -0.00415739044547081, -0.002870736178010702, -0.0015840819105505943, -0.0002974276430904865, 0.0009892266243696213, 0.002275880891829729, 0.003562535159289837, 0.004849189426749945, 0.00613584415987134, 0.00742249796167016]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 2.0, 7.0, 2.0, 3.0, 3.0, 3.0, 3.0, 12.0, 13.0, 9.0, 21.0, 16.0, 24.0, 17.0, 21.0, 21.0, 30.0, 36.0, 34.0, 30.0, 39.0, 45.0, 37.0, 40.0, 42.0, 37.0, 40.0, 33.0, 34.0, 44.0, 38.0, 24.0, 28.0, 32.0, 16.0, 25.0, 33.0, 17.0, 18.0, 18.0, 15.0, 8.0, 8.0, 5.0, 10.0, 7.0, 3.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.005651652812957764, -0.005479979328811169, -0.005308305844664574, -0.005136632360517979, -0.004964958876371384, -0.004793285392224789, -0.004621611908078194, -0.004449938423931599, -0.004278264939785004, -0.004106591455638409, -0.003934917971491814, -0.0037632444873452187, -0.0035915710031986237, -0.0034198975190520287, -0.0032482240349054337, -0.0030765505507588387, -0.0029048770666122437, -0.0027332035824656487, -0.0025615300983190536, -0.0023898566141724586, -0.0022181831300258636, -0.0020465096458792686, -0.0018748361617326736, -0.0017031626775860786, -0.0015314891934394836, -0.0013598157092928886, -0.0011881422251462936, -0.0010164687409996986, -0.0008447952568531036, -0.0006731217727065086, -0.0005014482885599136, -0.00032977480441331863, -0.00015810132026672363, 1.3572163879871368e-05, 0.00018524564802646637, 0.00035691913217306137, 0.0005285926163196564, 0.0007002661004662514, 0.0008719395846128464, 0.0010436130687594414, 0.0012152865529060364, 0.0013869600370526314, 0.0015586335211992264, 0.0017303070053458214, 0.0019019804894924164, 0.0020736539736390114, 0.0022453274577856064, 0.0024170009419322014, 0.0025886744260787964, 0.0027603479102253914, 0.0029320213943719864, 0.0031036948785185814, 0.0032753683626651764, 0.0034470418468117714, 0.0036187153309583664, 0.0037903888151049614, 0.003962062299251556, 0.004133735783398151, 0.004305409267544746, 0.004477082751691341, 0.004648756235837936, 0.004820429719984531, 0.004992103204131126, 0.005163776688277721, 0.005335450172424316]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 8.0, 4.0, 5.0, 7.0, 9.0, 4.0, 7.0, 14.0, 18.0, 16.0, 14.0, 21.0, 20.0, 21.0, 33.0, 40.0, 31.0, 42.0, 36.0, 24.0, 52.0, 41.0, 54.0, 49.0, 40.0, 50.0, 38.0, 35.0, 27.0, 32.0, 35.0, 23.0, 25.0, 26.0, 19.0, 15.0, 12.0, 13.0, 12.0, 6.0, 7.0, 3.0, 5.0, 3.0, 1.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.45703125, -7.20367431640625, -6.9503173828125, -6.69696044921875, -6.443603515625, -6.19024658203125, -5.9368896484375, -5.68353271484375, -5.43017578125, -5.17681884765625, -4.9234619140625, -4.67010498046875, -4.416748046875, -4.16339111328125, -3.9100341796875, -3.65667724609375, -3.4033203125, -3.14996337890625, -2.8966064453125, -2.64324951171875, -2.389892578125, -2.13653564453125, -1.8831787109375, -1.62982177734375, -1.37646484375, -1.12310791015625, -0.8697509765625, -0.61639404296875, -0.363037109375, -0.10968017578125, 0.1436767578125, 0.39703369140625, 0.650390625, 0.90374755859375, 1.1571044921875, 1.41046142578125, 1.663818359375, 1.91717529296875, 2.1705322265625, 2.42388916015625, 2.67724609375, 2.93060302734375, 3.1839599609375, 3.43731689453125, 3.690673828125, 3.94403076171875, 4.1973876953125, 4.45074462890625, 4.7041015625, 4.95745849609375, 5.2108154296875, 5.46417236328125, 5.717529296875, 5.97088623046875, 6.2242431640625, 6.47760009765625, 6.73095703125, 6.98431396484375, 7.2376708984375, 7.49102783203125, 7.744384765625, 7.99774169921875, 8.2510986328125, 8.50445556640625, 8.7578125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 7.0, 6.0, 15.0, 14.0, 36.0, 39.0, 64.0, 79.0, 119.0, 162.0, 282.0, 404.0, 605.0, 1041.0, 1752.0, 2979.0, 5210.0, 9540.0, 18707.0, 41847.0, 113584.0, 365379.0, 314563.0, 97533.0, 36833.0, 16955.0, 8614.0, 4912.0, 2829.0, 1676.0, 1013.0, 613.0, 370.0, 232.0, 181.0, 111.0, 70.0, 52.0, 38.0, 25.0, 23.0, 12.0, 7.0, 5.0, 7.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.73828125, -5.54656982421875, -5.3548583984375, -5.16314697265625, -4.971435546875, -4.77972412109375, -4.5880126953125, -4.39630126953125, -4.20458984375, -4.01287841796875, -3.8211669921875, -3.62945556640625, -3.437744140625, -3.24603271484375, -3.0543212890625, -2.86260986328125, -2.6708984375, -2.47918701171875, -2.2874755859375, -2.09576416015625, -1.904052734375, -1.71234130859375, -1.5206298828125, -1.32891845703125, -1.13720703125, -0.94549560546875, -0.7537841796875, -0.56207275390625, -0.370361328125, -0.17864990234375, 0.0130615234375, 0.20477294921875, 0.396484375, 0.58819580078125, 0.7799072265625, 0.97161865234375, 1.163330078125, 1.35504150390625, 1.5467529296875, 1.73846435546875, 1.93017578125, 2.12188720703125, 2.3135986328125, 2.50531005859375, 2.697021484375, 2.88873291015625, 3.0804443359375, 3.27215576171875, 3.4638671875, 3.65557861328125, 3.8472900390625, 4.03900146484375, 4.230712890625, 4.42242431640625, 4.6141357421875, 4.80584716796875, 4.99755859375, 5.18927001953125, 5.3809814453125, 5.57269287109375, 5.764404296875, 5.95611572265625, 6.1478271484375, 6.33953857421875, 6.53125]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 6.0, 5.0, 5.0, 5.0, 14.0, 13.0, 16.0, 9.0, 17.0, 21.0, 26.0, 33.0, 42.0, 41.0, 45.0, 64.0, 76.0, 293.0, 1702.0, 166.0, 76.0, 77.0, 56.0, 42.0, 32.0, 31.0, 26.0, 23.0, 24.0, 14.0, 13.0, 12.0, 5.0, 12.0, 5.0, 0.0, 1.0, 6.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.65625, -38.46337890625, -37.2705078125, -36.07763671875, -34.884765625, -33.69189453125, -32.4990234375, -31.30615234375, -30.11328125, -28.92041015625, -27.7275390625, -26.53466796875, -25.341796875, -24.14892578125, -22.9560546875, -21.76318359375, -20.5703125, -19.37744140625, -18.1845703125, -16.99169921875, -15.798828125, -14.60595703125, -13.4130859375, -12.22021484375, -11.02734375, -9.83447265625, -8.6416015625, -7.44873046875, -6.255859375, -5.06298828125, -3.8701171875, -2.67724609375, -1.484375, -0.29150390625, 0.9013671875, 2.09423828125, 3.287109375, 4.47998046875, 5.6728515625, 6.86572265625, 8.05859375, 9.25146484375, 10.4443359375, 11.63720703125, 12.830078125, 14.02294921875, 15.2158203125, 16.40869140625, 17.6015625, 18.79443359375, 19.9873046875, 21.18017578125, 22.373046875, 23.56591796875, 24.7587890625, 25.95166015625, 27.14453125, 28.33740234375, 29.5302734375, 30.72314453125, 31.916015625, 33.10888671875, 34.3017578125, 35.49462890625, 36.6875]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 4.0, 2.0, 2.0, 6.0, 9.0, 9.0, 14.0, 17.0, 25.0, 30.0, 34.0, 62.0, 49.0, 100.0, 153.0, 225.0, 434.0, 1170.0, 26037.0, 3102686.0, 12690.0, 951.0, 353.0, 186.0, 112.0, 96.0, 63.0, 47.0, 39.0, 24.0, 14.0, 16.0, 11.0, 17.0, 7.0, 3.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-73.375, -70.9638671875, -68.552734375, -66.1416015625, -63.73046875, -61.3193359375, -58.908203125, -56.4970703125, -54.0859375, -51.6748046875, -49.263671875, -46.8525390625, -44.44140625, -42.0302734375, -39.619140625, -37.2080078125, -34.796875, -32.3857421875, -29.974609375, -27.5634765625, -25.15234375, -22.7412109375, -20.330078125, -17.9189453125, -15.5078125, -13.0966796875, -10.685546875, -8.2744140625, -5.86328125, -3.4521484375, -1.041015625, 1.3701171875, 3.78125, 6.1923828125, 8.603515625, 11.0146484375, 13.42578125, 15.8369140625, 18.248046875, 20.6591796875, 23.0703125, 25.4814453125, 27.892578125, 30.3037109375, 32.71484375, 35.1259765625, 37.537109375, 39.9482421875, 42.359375, 44.7705078125, 47.181640625, 49.5927734375, 52.00390625, 54.4150390625, 56.826171875, 59.2373046875, 61.6484375, 64.0595703125, 66.470703125, 68.8818359375, 71.29296875, 73.7041015625, 76.115234375, 78.5263671875, 80.9375]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 12.0, 49.0, 123.0, 230.0, 270.0, 195.0, 96.0, 25.0, 14.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-77.53850555419922, -75.20618438720703, -72.87386322021484, -70.54153442382812, -68.20921325683594, -65.87689208984375, -63.54457092285156, -61.212249755859375, -58.87992477416992, -56.547603607177734, -54.21527862548828, -51.882957458496094, -49.550636291503906, -47.21831130981445, -44.885990142822266, -42.55366516113281, -40.221343994140625, -37.88902282714844, -35.556697845458984, -33.2243766784668, -30.892053604125977, -28.559730529785156, -26.22740936279297, -23.89508628845215, -21.562763214111328, -19.230440139770508, -16.898117065429688, -14.5657958984375, -12.23347282409668, -9.90114974975586, -7.5688276290893555, -5.236505508422852, -2.9041824340820312, -0.5718598365783691, 1.760462760925293, 4.092785358428955, 6.425107955932617, 8.757431030273438, 11.089753150939941, 13.422075271606445, 15.754398345947266, 18.086721420288086, 20.419044494628906, 22.751365661621094, 25.083688735961914, 27.416011810302734, 29.748332977294922, 32.080657958984375, 34.41297912597656, 36.74530029296875, 39.0776252746582, 41.40994644165039, 43.742271423339844, 46.07459259033203, 48.40691375732422, 50.739234924316406, 53.07155990600586, 55.40388107299805, 57.7362060546875, 60.06852722167969, 62.400848388671875, 64.73316955566406, 67.06549835205078, 69.39781951904297, 71.73014068603516]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 3.0, 4.0, 9.0, 9.0, 4.0, 10.0, 12.0, 13.0, 16.0, 9.0, 18.0, 19.0, 26.0, 29.0, 34.0, 39.0, 38.0, 49.0, 35.0, 50.0, 38.0, 54.0, 44.0, 44.0, 55.0, 26.0, 42.0, 39.0, 30.0, 38.0, 29.0, 16.0, 24.0, 21.0, 22.0, 13.0, 9.0, 6.0, 11.0, 6.0, 2.0, 6.0, 3.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-86.75184631347656, -84.26504516601562, -81.77825164794922, -79.29145050048828, -76.80464935302734, -74.31785583496094, -71.8310546875, -69.34425354003906, -66.85745239257812, -64.37065124511719, -61.883853912353516, -59.397056579589844, -56.910255432128906, -54.423458099365234, -51.93666076660156, -49.449859619140625, -46.96306610107422, -44.47626876831055, -41.98946762084961, -39.50267028808594, -37.015869140625, -34.52907180786133, -32.042274475097656, -29.55547523498535, -27.068675994873047, -24.581876754760742, -22.095077514648438, -19.608280181884766, -17.12148094177246, -14.634681701660156, -12.147883415222168, -9.66108512878418, -7.174293518066406, -4.68749475479126, -2.2006959915161133, 0.2861027717590332, 2.7729015350341797, 5.259700775146484, 7.746499061584473, 10.233297348022461, 12.720096588134766, 15.20689582824707, 17.693695068359375, 20.180492401123047, 22.66729164123535, 25.154090881347656, 27.640888214111328, 30.127687454223633, 32.61448669433594, 35.10128402709961, 37.58808517456055, 40.07488250732422, 42.561683654785156, 45.04848098754883, 47.5352783203125, 50.02207946777344, 52.50887680053711, 54.99567413330078, 57.48247528076172, 59.96927261352539, 62.45606994628906, 64.94287109375, 67.42967224121094, 69.91646575927734, 72.40326690673828]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 5.0, 1.0, 2.0, 3.0, 8.0, 3.0, 7.0, 7.0, 8.0, 7.0, 7.0, 14.0, 15.0, 14.0, 28.0, 20.0, 14.0, 35.0, 35.0, 37.0, 34.0, 42.0, 43.0, 39.0, 34.0, 47.0, 51.0, 46.0, 49.0, 38.0, 44.0, 29.0, 40.0, 27.0, 27.0, 19.0, 26.0, 18.0, 23.0, 10.0, 10.0, 12.0, 13.0, 5.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.1953125, -7.92822265625, -7.6611328125, -7.39404296875, -7.126953125, -6.85986328125, -6.5927734375, -6.32568359375, -6.05859375, -5.79150390625, -5.5244140625, -5.25732421875, -4.990234375, -4.72314453125, -4.4560546875, -4.18896484375, -3.921875, -3.65478515625, -3.3876953125, -3.12060546875, -2.853515625, -2.58642578125, -2.3193359375, -2.05224609375, -1.78515625, -1.51806640625, -1.2509765625, -0.98388671875, -0.716796875, -0.44970703125, -0.1826171875, 0.08447265625, 0.3515625, 0.61865234375, 0.8857421875, 1.15283203125, 1.419921875, 1.68701171875, 1.9541015625, 2.22119140625, 2.48828125, 2.75537109375, 3.0224609375, 3.28955078125, 3.556640625, 3.82373046875, 4.0908203125, 4.35791015625, 4.625, 4.89208984375, 5.1591796875, 5.42626953125, 5.693359375, 5.96044921875, 6.2275390625, 6.49462890625, 6.76171875, 7.02880859375, 7.2958984375, 7.56298828125, 7.830078125, 8.09716796875, 8.3642578125, 8.63134765625, 8.8984375]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 1.0, 2.0, 4.0, 3.0, 4.0, 7.0, 8.0, 10.0, 9.0, 12.0, 15.0, 16.0, 10.0, 25.0, 28.0, 26.0, 34.0, 53.0, 88.0, 144.0, 267.0, 750.0, 3278.0, 33066.0, 942068.0, 3063033.0, 139806.0, 9218.0, 1261.0, 410.0, 170.0, 121.0, 70.0, 60.0, 31.0, 29.0, 31.0, 19.0, 16.0, 12.0, 17.0, 15.0, 9.0, 3.0, 2.0, 5.0, 4.0, 5.0, 1.0, 4.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-33.5625, -32.521484375, -31.48046875, -30.439453125, -29.3984375, -28.357421875, -27.31640625, -26.275390625, -25.234375, -24.193359375, -23.15234375, -22.111328125, -21.0703125, -20.029296875, -18.98828125, -17.947265625, -16.90625, -15.865234375, -14.82421875, -13.783203125, -12.7421875, -11.701171875, -10.66015625, -9.619140625, -8.578125, -7.537109375, -6.49609375, -5.455078125, -4.4140625, -3.373046875, -2.33203125, -1.291015625, -0.25, 0.791015625, 1.83203125, 2.873046875, 3.9140625, 4.955078125, 5.99609375, 7.037109375, 8.078125, 9.119140625, 10.16015625, 11.201171875, 12.2421875, 13.283203125, 14.32421875, 15.365234375, 16.40625, 17.447265625, 18.48828125, 19.529296875, 20.5703125, 21.611328125, 22.65234375, 23.693359375, 24.734375, 25.775390625, 26.81640625, 27.857421875, 28.8984375, 29.939453125, 30.98046875, 32.021484375, 33.0625]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 2.0, 15.0, 15.0, 21.0, 31.0, 74.0, 90.0, 154.0, 216.0, 290.0, 446.0, 569.0, 575.0, 498.0, 342.0, 235.0, 183.0, 105.0, 72.0, 51.0, 27.0, 20.0, 13.0, 13.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-29.78125, -29.001708984375, -28.22216796875, -27.442626953125, -26.6630859375, -25.883544921875, -25.10400390625, -24.324462890625, -23.544921875, -22.765380859375, -21.98583984375, -21.206298828125, -20.4267578125, -19.647216796875, -18.86767578125, -18.088134765625, -17.30859375, -16.529052734375, -15.74951171875, -14.969970703125, -14.1904296875, -13.410888671875, -12.63134765625, -11.851806640625, -11.072265625, -10.292724609375, -9.51318359375, -8.733642578125, -7.9541015625, -7.174560546875, -6.39501953125, -5.615478515625, -4.8359375, -4.056396484375, -3.27685546875, -2.497314453125, -1.7177734375, -0.938232421875, -0.15869140625, 0.620849609375, 1.400390625, 2.179931640625, 2.95947265625, 3.739013671875, 4.5185546875, 5.298095703125, 6.07763671875, 6.857177734375, 7.63671875, 8.416259765625, 9.19580078125, 9.975341796875, 10.7548828125, 11.534423828125, 12.31396484375, 13.093505859375, 13.873046875, 14.652587890625, 15.43212890625, 16.211669921875, 16.9912109375, 17.770751953125, 18.55029296875, 19.329833984375, 20.109375]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 7.0, 2.0, 15.0, 15.0, 26.0, 44.0, 60.0, 96.0, 158.0, 307.0, 659.0, 1880.0, 16639.0, 769678.0, 3325593.0, 73264.0, 3899.0, 925.0, 433.0, 243.0, 134.0, 72.0, 49.0, 27.0, 13.0, 14.0, 9.0, 9.0, 9.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.6875, -44.6767578125, -42.666015625, -40.6552734375, -38.64453125, -36.6337890625, -34.623046875, -32.6123046875, -30.6015625, -28.5908203125, -26.580078125, -24.5693359375, -22.55859375, -20.5478515625, -18.537109375, -16.5263671875, -14.515625, -12.5048828125, -10.494140625, -8.4833984375, -6.47265625, -4.4619140625, -2.451171875, -0.4404296875, 1.5703125, 3.5810546875, 5.591796875, 7.6025390625, 9.61328125, 11.6240234375, 13.634765625, 15.6455078125, 17.65625, 19.6669921875, 21.677734375, 23.6884765625, 25.69921875, 27.7099609375, 29.720703125, 31.7314453125, 33.7421875, 35.7529296875, 37.763671875, 39.7744140625, 41.78515625, 43.7958984375, 45.806640625, 47.8173828125, 49.828125, 51.8388671875, 53.849609375, 55.8603515625, 57.87109375, 59.8818359375, 61.892578125, 63.9033203125, 65.9140625, 67.9248046875, 69.935546875, 71.9462890625, 73.95703125, 75.9677734375, 77.978515625, 79.9892578125, 82.0]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 7.0, 7.0, 34.0, 82.0, 114.0, 178.0, 204.0, 172.0, 118.0, 50.0, 34.0, 11.0, 2.0, 2.0], "bins": [-317.98980712890625, -312.3635559082031, -306.7373352050781, -301.111083984375, -295.4848327636719, -289.85858154296875, -284.23236083984375, -278.6061096191406, -272.9798583984375, -267.3536071777344, -261.7273864746094, -256.10113525390625, -250.47488403320312, -244.84864807128906, -239.222412109375, -233.59616088867188, -227.96994018554688, -222.3437042236328, -216.7174530029297, -211.09121704101562, -205.4649658203125, -199.83872985839844, -194.21249389648438, -188.58624267578125, -182.95999145507812, -177.33375549316406, -171.70750427246094, -166.08126831054688, -160.45501708984375, -154.8287811279297, -149.20254516601562, -143.5762939453125, -137.95005798339844, -132.32382202148438, -126.69757080078125, -121.07133483886719, -115.4450912475586, -109.81884765625, -104.1926040649414, -98.56636047363281, -92.94012451171875, -87.31388092041016, -81.68763732910156, -76.0614013671875, -70.4351577758789, -64.80891418457031, -59.18267059326172, -53.55643081665039, -47.9301872253418, -42.3039436340332, -36.677703857421875, -31.05146026611328, -25.42521858215332, -19.79897689819336, -14.172733306884766, -8.546493530273438, -2.9202499389648438, 2.7059922218322754, 8.332234382629395, 13.958477020263672, 19.584718704223633, 25.210960388183594, 30.837203979492188, 36.463443756103516, 42.08968734741211]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 4.0, 4.0, 6.0, 5.0, 9.0, 6.0, 6.0, 8.0, 16.0, 18.0, 28.0, 37.0, 34.0, 22.0, 39.0, 34.0, 32.0, 32.0, 32.0, 43.0, 32.0, 50.0, 37.0, 51.0, 40.0, 41.0, 32.0, 42.0, 38.0, 33.0, 29.0, 23.0, 19.0, 19.0, 22.0, 16.0, 11.0, 13.0, 9.0, 9.0, 4.0, 8.0, 2.0, 3.0, 2.0, 5.0, 4.0, 3.0, 1.0, 1.0], "bins": [-70.81964111328125, -68.85372924804688, -66.88780975341797, -64.9218978881836, -62.95598602294922, -60.99007034301758, -59.02415466308594, -57.05824279785156, -55.09232711791992, -53.12641143798828, -51.160499572753906, -49.194583892822266, -47.228668212890625, -45.26275634765625, -43.29684066772461, -41.33092498779297, -39.365013122558594, -37.39909744262695, -35.43318557739258, -33.46726989746094, -31.50135612487793, -29.535442352294922, -27.56952667236328, -25.603612899780273, -23.637699127197266, -21.671785354614258, -19.70587158203125, -17.73995590209961, -15.774042129516602, -13.808128356933594, -11.84221363067627, -9.876298904418945, -7.9103851318359375, -5.9444708824157715, -3.9785566329956055, -2.0126423835754395, -0.04672813415527344, 1.9191856384277344, 3.8851003646850586, 5.851015090942383, 7.816928863525391, 9.782842636108398, 11.748757362365723, 13.714672088623047, 15.680585861206055, 17.646499633789062, 19.612415313720703, 21.57832908630371, 23.54424285888672, 25.510156631469727, 27.476070404052734, 29.441986083984375, 31.407899856567383, 33.37381362915039, 35.33972930908203, 37.305641174316406, 39.27155685424805, 41.23747253417969, 43.20338439941406, 45.1693000793457, 47.135215759277344, 49.10112762451172, 51.06704330444336, 53.032958984375, 54.998870849609375]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 1.0, 5.0, 4.0, 3.0, 4.0, 8.0, 5.0, 7.0, 10.0, 14.0, 12.0, 14.0, 19.0, 16.0, 16.0, 38.0, 34.0, 29.0, 33.0, 35.0, 44.0, 47.0, 47.0, 47.0, 55.0, 42.0, 42.0, 56.0, 30.0, 41.0, 45.0, 30.0, 21.0, 24.0, 33.0, 15.0, 12.0, 9.0, 15.0, 14.0, 3.0, 8.0, 9.0, 5.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6640625, -7.3819580078125, -7.099853515625, -6.8177490234375, -6.53564453125, -6.2535400390625, -5.971435546875, -5.6893310546875, -5.4072265625, -5.1251220703125, -4.843017578125, -4.5609130859375, -4.27880859375, -3.9967041015625, -3.714599609375, -3.4324951171875, -3.150390625, -2.8682861328125, -2.586181640625, -2.3040771484375, -2.02197265625, -1.7398681640625, -1.457763671875, -1.1756591796875, -0.8935546875, -0.6114501953125, -0.329345703125, -0.0472412109375, 0.23486328125, 0.5169677734375, 0.799072265625, 1.0811767578125, 1.36328125, 1.6453857421875, 1.927490234375, 2.2095947265625, 2.49169921875, 2.7738037109375, 3.055908203125, 3.3380126953125, 3.6201171875, 3.9022216796875, 4.184326171875, 4.4664306640625, 4.74853515625, 5.0306396484375, 5.312744140625, 5.5948486328125, 5.876953125, 6.1590576171875, 6.441162109375, 6.7232666015625, 7.00537109375, 7.2874755859375, 7.569580078125, 7.8516845703125, 8.1337890625, 8.4158935546875, 8.697998046875, 8.9801025390625, 9.26220703125, 9.5443115234375, 9.826416015625, 10.1085205078125, 10.390625]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 6.0, 4.0, 7.0, 14.0, 16.0, 25.0, 25.0, 33.0, 65.0, 78.0, 140.0, 228.0, 291.0, 494.0, 771.0, 1263.0, 1926.0, 3244.0, 5211.0, 8594.0, 14321.0, 23709.0, 39956.0, 66383.0, 110922.0, 169920.0, 200021.0, 155427.0, 98108.0, 58926.0, 35146.0, 20907.0, 12541.0, 7617.0, 4617.0, 2792.0, 1781.0, 1101.0, 709.0, 417.0, 276.0, 180.0, 119.0, 89.0, 47.0, 35.0, 20.0, 17.0, 7.0, 6.0, 6.0, 4.0, 3.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.69921875, -0.6762008666992188, -0.6531829833984375, -0.6301651000976562, -0.607147216796875, -0.5841293334960938, -0.5611114501953125, -0.5380935668945312, -0.51507568359375, -0.49205780029296875, -0.4690399169921875, -0.44602203369140625, -0.423004150390625, -0.39998626708984375, -0.3769683837890625, -0.35395050048828125, -0.3309326171875, -0.30791473388671875, -0.2848968505859375, -0.26187896728515625, -0.238861083984375, -0.21584320068359375, -0.1928253173828125, -0.16980743408203125, -0.14678955078125, -0.12377166748046875, -0.1007537841796875, -0.07773590087890625, -0.054718017578125, -0.03170013427734375, -0.0086822509765625, 0.01433563232421875, 0.037353515625, 0.06037139892578125, 0.0833892822265625, 0.10640716552734375, 0.129425048828125, 0.15244293212890625, 0.1754608154296875, 0.19847869873046875, 0.22149658203125, 0.24451446533203125, 0.2675323486328125, 0.29055023193359375, 0.313568115234375, 0.33658599853515625, 0.3596038818359375, 0.38262176513671875, 0.4056396484375, 0.42865753173828125, 0.4516754150390625, 0.47469329833984375, 0.497711181640625, 0.5207290649414062, 0.5437469482421875, 0.5667648315429688, 0.58978271484375, 0.6128005981445312, 0.6358184814453125, 0.6588363647460938, 0.681854248046875, 0.7048721313476562, 0.7278900146484375, 0.7509078979492188, 0.77392578125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 5.0, 4.0, 8.0, 4.0, 10.0, 6.0, 6.0, 13.0, 7.0, 9.0, 11.0, 17.0, 20.0, 22.0, 24.0, 21.0, 24.0, 30.0, 39.0, 35.0, 36.0, 35.0, 43.0, 39.0, 34.0, 1059.0, 33.0, 48.0, 29.0, 25.0, 42.0, 28.0, 30.0, 21.0, 21.0, 25.0, 25.0, 21.0, 17.0, 23.0, 17.0, 12.0, 9.0, 7.0, 13.0, 4.0, 6.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.89453125, -4.7388916015625, -4.583251953125, -4.4276123046875, -4.27197265625, -4.1163330078125, -3.960693359375, -3.8050537109375, -3.6494140625, -3.4937744140625, -3.338134765625, -3.1824951171875, -3.02685546875, -2.8712158203125, -2.715576171875, -2.5599365234375, -2.404296875, -2.2486572265625, -2.093017578125, -1.9373779296875, -1.78173828125, -1.6260986328125, -1.470458984375, -1.3148193359375, -1.1591796875, -1.0035400390625, -0.847900390625, -0.6922607421875, -0.53662109375, -0.3809814453125, -0.225341796875, -0.0697021484375, 0.0859375, 0.2415771484375, 0.397216796875, 0.5528564453125, 0.70849609375, 0.8641357421875, 1.019775390625, 1.1754150390625, 1.3310546875, 1.4866943359375, 1.642333984375, 1.7979736328125, 1.95361328125, 2.1092529296875, 2.264892578125, 2.4205322265625, 2.576171875, 2.7318115234375, 2.887451171875, 3.0430908203125, 3.19873046875, 3.3543701171875, 3.510009765625, 3.6656494140625, 3.8212890625, 3.9769287109375, 4.132568359375, 4.2882080078125, 4.44384765625, 4.5994873046875, 4.755126953125, 4.9107666015625, 5.06640625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 6.0, 6.0, 8.0, 14.0, 29.0, 31.0, 52.0, 62.0, 105.0, 140.0, 208.0, 349.0, 466.0, 775.0, 1184.0, 1804.0, 2818.0, 4558.0, 7284.0, 11733.0, 19077.0, 30787.0, 48858.0, 75388.0, 110795.0, 162844.0, 1189205.0, 142319.0, 101632.0, 68085.0, 43826.0, 27342.0, 16953.0, 10512.0, 6584.0, 3992.0, 2555.0, 1674.0, 1024.0, 673.0, 460.0, 313.0, 181.0, 138.0, 96.0, 59.0, 45.0, 32.0, 18.0, 11.0, 11.0, 2.0, 5.0, 6.0, 2.0, 1.0, 2.0, 3.0], "bins": [-0.47119140625, -0.4566383361816406, -0.44208526611328125, -0.4275321960449219, -0.4129791259765625, -0.3984260559082031, -0.38387298583984375, -0.3693199157714844, -0.354766845703125, -0.3402137756347656, -0.32566070556640625, -0.3111076354980469, -0.2965545654296875, -0.2820014953613281, -0.26744842529296875, -0.2528953552246094, -0.23834228515625, -0.22378921508789062, -0.20923614501953125, -0.19468307495117188, -0.1801300048828125, -0.16557693481445312, -0.15102386474609375, -0.13647079467773438, -0.121917724609375, -0.10736465454101562, -0.09281158447265625, -0.07825851440429688, -0.0637054443359375, -0.049152374267578125, -0.03459930419921875, -0.020046234130859375, -0.0054931640625, 0.009059906005859375, 0.02361297607421875, 0.038166046142578125, 0.0527191162109375, 0.06727218627929688, 0.08182525634765625, 0.09637832641601562, 0.110931396484375, 0.12548446655273438, 0.14003753662109375, 0.15459060668945312, 0.1691436767578125, 0.18369674682617188, 0.19824981689453125, 0.21280288696289062, 0.22735595703125, 0.24190902709960938, 0.25646209716796875, 0.2710151672363281, 0.2855682373046875, 0.3001213073730469, 0.31467437744140625, 0.3292274475097656, 0.343780517578125, 0.3583335876464844, 0.37288665771484375, 0.3874397277832031, 0.4019927978515625, 0.4165458679199219, 0.43109893798828125, 0.4456520080566406, 0.460205078125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 4.0, 1.0, 6.0, 3.0, 5.0, 1.0, 10.0, 11.0, 13.0, 11.0, 9.0, 15.0, 17.0, 28.0, 33.0, 32.0, 53.0, 68.0, 89.0, 79.0, 110.0, 95.0, 80.0, 44.0, 40.0, 31.0, 20.0, 22.0, 15.0, 17.0, 12.0, 11.0, 3.0, 3.0, 5.0, 2.0, 5.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0199432373046875, -0.019369125366210938, -0.018795013427734375, -0.018220901489257812, -0.01764678955078125, -0.017072677612304688, -0.016498565673828125, -0.015924453735351562, -0.015350341796875, -0.014776229858398438, -0.014202117919921875, -0.013628005981445312, -0.01305389404296875, -0.012479782104492188, -0.011905670166015625, -0.011331558227539062, -0.0107574462890625, -0.010183334350585938, -0.009609222412109375, -0.009035110473632812, -0.00846099853515625, -0.007886886596679688, -0.007312774658203125, -0.0067386627197265625, -0.00616455078125, -0.0055904388427734375, -0.005016326904296875, -0.0044422149658203125, -0.00386810302734375, -0.0032939910888671875, -0.002719879150390625, -0.0021457672119140625, -0.0015716552734375, -0.0009975433349609375, -0.000423431396484375, 0.0001506805419921875, 0.00072479248046875, 0.0012989044189453125, 0.001873016357421875, 0.0024471282958984375, 0.003021240234375, 0.0035953521728515625, 0.004169464111328125, 0.0047435760498046875, 0.00531768798828125, 0.0058917999267578125, 0.006465911865234375, 0.0070400238037109375, 0.0076141357421875, 0.008188247680664062, 0.008762359619140625, 0.009336471557617188, 0.00991058349609375, 0.010484695434570312, 0.011058807373046875, 0.011632919311523438, 0.01220703125, 0.012781143188476562, 0.013355255126953125, 0.013929367065429688, 0.01450347900390625, 0.015077590942382812, 0.015651702880859375, 0.016225814819335938, 0.0167999267578125]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 4.0, 7.0, 11.0, 12.0, 22.0, 21.0, 29.0, 68.0, 75.0, 141.0, 205.0, 483.0, 4163.0, 990346.0, 51241.0, 988.0, 274.0, 151.0, 89.0, 59.0, 31.0, 35.0, 20.0, 14.0, 11.0, 11.0, 9.0, 6.0, 7.0, 5.0, 0.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.352783203125, -0.3413047790527344, -0.32982635498046875, -0.3183479309082031, -0.3068695068359375, -0.2953910827636719, -0.28391265869140625, -0.2724342346191406, -0.260955810546875, -0.24947738647460938, -0.23799896240234375, -0.22652053833007812, -0.2150421142578125, -0.20356369018554688, -0.19208526611328125, -0.18060684204101562, -0.16912841796875, -0.15764999389648438, -0.14617156982421875, -0.13469314575195312, -0.1232147216796875, -0.11173629760742188, -0.10025787353515625, -0.08877944946289062, -0.077301025390625, -0.06582260131835938, -0.05434417724609375, -0.042865753173828125, -0.0313873291015625, -0.019908905029296875, -0.00843048095703125, 0.003047943115234375, 0.0145263671875, 0.026004791259765625, 0.03748321533203125, 0.048961639404296875, 0.0604400634765625, 0.07191848754882812, 0.08339691162109375, 0.09487533569335938, 0.106353759765625, 0.11783218383789062, 0.12931060791015625, 0.14078903198242188, 0.1522674560546875, 0.16374588012695312, 0.17522430419921875, 0.18670272827148438, 0.19818115234375, 0.20965957641601562, 0.22113800048828125, 0.23261642456054688, 0.2440948486328125, 0.2555732727050781, 0.26705169677734375, 0.2785301208496094, 0.290008544921875, 0.3014869689941406, 0.31296539306640625, 0.3244438171386719, 0.3359222412109375, 0.3474006652832031, 0.35887908935546875, 0.3703575134277344, 0.3818359375]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 23.0, 71.0, 178.0, 305.0, 268.0, 117.0, 31.0, 11.0, 8.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0532086119055748, -0.05221925675868988, -0.05122990161180496, -0.050240546464920044, -0.049251195043325424, -0.048261839896440506, -0.04727248474955559, -0.04628312960267067, -0.04529377445578575, -0.04430441930890083, -0.043315064162015915, -0.042325712740421295, -0.04133635759353638, -0.04034700244665146, -0.03935764729976654, -0.03836829215288162, -0.037378937005996704, -0.036389581859111786, -0.03540022671222687, -0.03441087156534195, -0.03342152014374733, -0.03243216499686241, -0.03144280984997749, -0.030453454703092575, -0.029464103281497955, -0.028474748134613037, -0.027485394850373268, -0.02649603970348835, -0.02550668455660343, -0.024517331272363663, -0.023527976125478745, -0.022538620978593826, -0.02154926396906376, -0.02055990882217884, -0.01957055553793907, -0.018581200391054153, -0.017591845244169235, -0.016602490097284317, -0.015613136813044548, -0.01462378166615963, -0.013634427450597286, -0.012645073235034943, -0.011655718088150024, -0.01066636387258768, -0.009677009657025337, -0.008687654510140419, -0.007698300294578075, -0.0067089456133544445, -0.005719590932130814, -0.004730236250907183, -0.0037408818025141954, -0.002751527354121208, -0.0017621726728975773, -0.0007728179916739464, 0.00021653622388839722, 0.0012058909051120281, 0.002195245586335659, 0.00318460026755929, 0.004173954948782921, 0.0051633091643452644, 0.006152663845568895, 0.007142018526792526, 0.00813137274235487, 0.009120726957917213, 0.010110082104802132]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [5.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 6.0, 6.0, 4.0, 2.0, 11.0, 10.0, 9.0, 16.0, 18.0, 17.0, 19.0, 34.0, 21.0, 32.0, 38.0, 38.0, 39.0, 45.0, 45.0, 36.0, 37.0, 45.0, 37.0, 41.0, 38.0, 42.0, 54.0, 33.0, 31.0, 27.0, 29.0, 18.0, 19.0, 20.0, 13.0, 10.0, 13.0, 9.0, 16.0, 6.0, 5.0, 5.0, 5.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007885932922363281, -0.007606863975524902, -0.0073277950286865234, -0.0070487260818481445, -0.006769657135009766, -0.006490588188171387, -0.006211519241333008, -0.005932450294494629, -0.00565338134765625, -0.005374312400817871, -0.005095243453979492, -0.004816174507141113, -0.004537105560302734, -0.0042580366134643555, -0.0039789676666259766, -0.0036998987197875977, -0.0034208297729492188, -0.00314176082611084, -0.002862691879272461, -0.002583622932434082, -0.002304553985595703, -0.0020254850387573242, -0.0017464160919189453, -0.0014673471450805664, -0.0011882781982421875, -0.0009092092514038086, -0.0006301403045654297, -0.0003510713577270508, -7.200241088867188e-05, 0.00020706653594970703, 0.00048613548278808594, 0.0007652044296264648, 0.0010442733764648438, 0.0013233423233032227, 0.0016024112701416016, 0.0018814802169799805, 0.0021605491638183594, 0.0024396181106567383, 0.002718687057495117, 0.002997756004333496, 0.003276824951171875, 0.003555893898010254, 0.003834962844848633, 0.004114031791687012, 0.004393100738525391, 0.0046721696853637695, 0.0049512386322021484, 0.005230307579040527, 0.005509376525878906, 0.005788445472717285, 0.006067514419555664, 0.006346583366394043, 0.006625652313232422, 0.006904721260070801, 0.00718379020690918, 0.007462859153747559, 0.0077419281005859375, 0.008020997047424316, 0.008300065994262695, 0.008579134941101074, 0.008858203887939453, 0.009137272834777832, 0.009416341781616211, 0.00969541072845459, 0.009974479675292969]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 1.0, 5.0, 4.0, 3.0, 4.0, 8.0, 5.0, 7.0, 10.0, 14.0, 12.0, 14.0, 19.0, 16.0, 16.0, 38.0, 34.0, 29.0, 33.0, 35.0, 44.0, 47.0, 47.0, 47.0, 55.0, 42.0, 42.0, 56.0, 30.0, 41.0, 45.0, 30.0, 21.0, 24.0, 33.0, 15.0, 12.0, 9.0, 15.0, 14.0, 3.0, 8.0, 9.0, 5.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6640625, -7.3819580078125, -7.099853515625, -6.8177490234375, -6.53564453125, -6.2535400390625, -5.971435546875, -5.6893310546875, -5.4072265625, -5.1251220703125, -4.843017578125, -4.5609130859375, -4.27880859375, -3.9967041015625, -3.714599609375, -3.4324951171875, -3.150390625, -2.8682861328125, -2.586181640625, -2.3040771484375, -2.02197265625, -1.7398681640625, -1.457763671875, -1.1756591796875, -0.8935546875, -0.6114501953125, -0.329345703125, -0.0472412109375, 0.23486328125, 0.5169677734375, 0.799072265625, 1.0811767578125, 1.36328125, 1.6453857421875, 1.927490234375, 2.2095947265625, 2.49169921875, 2.7738037109375, 3.055908203125, 3.3380126953125, 3.6201171875, 3.9022216796875, 4.184326171875, 4.4664306640625, 4.74853515625, 5.0306396484375, 5.312744140625, 5.5948486328125, 5.876953125, 6.1590576171875, 6.441162109375, 6.7232666015625, 7.00537109375, 7.2874755859375, 7.569580078125, 7.8516845703125, 8.1337890625, 8.4158935546875, 8.697998046875, 8.9801025390625, 9.26220703125, 9.5443115234375, 9.826416015625, 10.1085205078125, 10.390625]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 10.0, 7.0, 8.0, 23.0, 25.0, 39.0, 69.0, 113.0, 171.0, 304.0, 607.0, 990.0, 1919.0, 3655.0, 6938.0, 14079.0, 30093.0, 70928.0, 212224.0, 449762.0, 152606.0, 55486.0, 24375.0, 11608.0, 5821.0, 2969.0, 1683.0, 918.0, 482.0, 274.0, 141.0, 99.0, 52.0, 31.0, 21.0, 13.0, 5.0, 8.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.63671875, -7.4078369140625, -7.178955078125, -6.9500732421875, -6.72119140625, -6.4923095703125, -6.263427734375, -6.0345458984375, -5.8056640625, -5.5767822265625, -5.347900390625, -5.1190185546875, -4.89013671875, -4.6612548828125, -4.432373046875, -4.2034912109375, -3.974609375, -3.7457275390625, -3.516845703125, -3.2879638671875, -3.05908203125, -2.8302001953125, -2.601318359375, -2.3724365234375, -2.1435546875, -1.9146728515625, -1.685791015625, -1.4569091796875, -1.22802734375, -0.9991455078125, -0.770263671875, -0.5413818359375, -0.3125, -0.0836181640625, 0.145263671875, 0.3741455078125, 0.60302734375, 0.8319091796875, 1.060791015625, 1.2896728515625, 1.5185546875, 1.7474365234375, 1.976318359375, 2.2052001953125, 2.43408203125, 2.6629638671875, 2.891845703125, 3.1207275390625, 3.349609375, 3.5784912109375, 3.807373046875, 4.0362548828125, 4.26513671875, 4.4940185546875, 4.722900390625, 4.9517822265625, 5.1806640625, 5.4095458984375, 5.638427734375, 5.8673095703125, 6.09619140625, 6.3250732421875, 6.553955078125, 6.7828369140625, 7.01171875]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 1.0, 9.0, 5.0, 6.0, 9.0, 11.0, 9.0, 20.0, 21.0, 19.0, 25.0, 31.0, 26.0, 40.0, 50.0, 36.0, 58.0, 61.0, 96.0, 176.0, 1607.0, 174.0, 93.0, 67.0, 55.0, 52.0, 39.0, 35.0, 31.0, 32.0, 29.0, 26.0, 20.0, 17.0, 13.0, 11.0, 10.0, 9.0, 7.0, 5.0, 7.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-40.15625, -39.056396484375, -37.95654296875, -36.856689453125, -35.7568359375, -34.656982421875, -33.55712890625, -32.457275390625, -31.357421875, -30.257568359375, -29.15771484375, -28.057861328125, -26.9580078125, -25.858154296875, -24.75830078125, -23.658447265625, -22.55859375, -21.458740234375, -20.35888671875, -19.259033203125, -18.1591796875, -17.059326171875, -15.95947265625, -14.859619140625, -13.759765625, -12.659912109375, -11.56005859375, -10.460205078125, -9.3603515625, -8.260498046875, -7.16064453125, -6.060791015625, -4.9609375, -3.861083984375, -2.76123046875, -1.661376953125, -0.5615234375, 0.538330078125, 1.63818359375, 2.738037109375, 3.837890625, 4.937744140625, 6.03759765625, 7.137451171875, 8.2373046875, 9.337158203125, 10.43701171875, 11.536865234375, 12.63671875, 13.736572265625, 14.83642578125, 15.936279296875, 17.0361328125, 18.135986328125, 19.23583984375, 20.335693359375, 21.435546875, 22.535400390625, 23.63525390625, 24.735107421875, 25.8349609375, 26.934814453125, 28.03466796875, 29.134521484375, 30.234375]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 11.0, 29.0, 40.0, 58.0, 66.0, 159.0, 261.0, 678.0, 5032.0, 3117462.0, 20080.0, 1107.0, 305.0, 159.0, 100.0, 65.0, 28.0, 24.0, 9.0, 6.0, 4.0, 2.0, 5.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-123.5, -118.490234375, -113.48046875, -108.470703125, -103.4609375, -98.451171875, -93.44140625, -88.431640625, -83.421875, -78.412109375, -73.40234375, -68.392578125, -63.3828125, -58.373046875, -53.36328125, -48.353515625, -43.34375, -38.333984375, -33.32421875, -28.314453125, -23.3046875, -18.294921875, -13.28515625, -8.275390625, -3.265625, 1.744140625, 6.75390625, 11.763671875, 16.7734375, 21.783203125, 26.79296875, 31.802734375, 36.8125, 41.822265625, 46.83203125, 51.841796875, 56.8515625, 61.861328125, 66.87109375, 71.880859375, 76.890625, 81.900390625, 86.91015625, 91.919921875, 96.9296875, 101.939453125, 106.94921875, 111.958984375, 116.96875, 121.978515625, 126.98828125, 131.998046875, 137.0078125, 142.017578125, 147.02734375, 152.037109375, 157.046875, 162.056640625, 167.06640625, 172.076171875, 177.0859375, 182.095703125, 187.10546875, 192.115234375, 197.125]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 267.0, 750.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.93272399902344, -95.29681396484375, -64.66090393066406, -34.024986267089844, -3.3890762329101562, 27.246841430664062, 57.88275146484375, 88.51866149902344, 119.15457153320312, 149.7904815673828, 180.4263916015625, 211.06231689453125, 241.69821166992188, 272.3341369628906, 302.97003173828125, 333.60595703125, 364.24188232421875, 394.8778076171875, 425.5137023925781, 456.1496276855469, 486.7855224609375, 517.4214477539062, 548.057373046875, 578.6932373046875, 609.3291625976562, 639.965087890625, 670.6010131835938, 701.2368774414062, 731.872802734375, 762.5087280273438, 793.1446533203125, 823.780517578125, 854.4164428710938, 885.0523681640625, 915.6882934570312, 946.3241577148438, 976.9600830078125, 1007.5960083007812, 1038.23193359375, 1068.8677978515625, 1099.5037841796875, 1130.1396484375, 1160.775634765625, 1191.4114990234375, 1222.0474853515625, 1252.683349609375, 1283.3192138671875, 1313.9552001953125, 1344.591064453125, 1375.2269287109375, 1405.8629150390625, 1436.498779296875, 1467.134765625, 1497.7706298828125, 1528.406494140625, 1559.04248046875, 1589.6783447265625, 1620.314208984375, 1650.9501953125, 1681.5860595703125, 1712.2220458984375, 1742.85791015625, 1773.4937744140625, 1804.1297607421875, 1834.765625]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 3.0, 4.0, 4.0, 3.0, 3.0, 7.0, 6.0, 9.0, 16.0, 11.0, 15.0, 11.0, 28.0, 31.0, 18.0, 23.0, 37.0, 27.0, 35.0, 42.0, 34.0, 56.0, 39.0, 41.0, 54.0, 57.0, 44.0, 46.0, 34.0, 20.0, 33.0, 37.0, 33.0, 24.0, 31.0, 18.0, 25.0, 14.0, 10.0, 3.0, 8.0, 4.0, 4.0, 6.0, 2.0, 1.0, 0.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.95695495605469, -90.6125259399414, -87.2680892944336, -83.92366027832031, -80.57923126220703, -77.23479461669922, -73.89036560058594, -70.54592895507812, -67.20149993896484, -63.8570671081543, -60.512638092041016, -57.16820526123047, -53.82377243041992, -50.479339599609375, -47.134910583496094, -43.79047775268555, -40.446048736572266, -37.10161590576172, -33.75718688964844, -30.41275405883789, -27.068321228027344, -23.72389030456543, -20.379459381103516, -17.03502655029297, -13.690595626831055, -10.346163749694824, -7.001732349395752, -3.6573009490966797, -0.3128690719604492, 3.0315628051757812, 6.375993728637695, 9.720426559448242, 13.064857482910156, 16.40928840637207, 19.753721237182617, 23.09815216064453, 26.442584991455078, 29.787015914916992, 33.131446838378906, 36.47587966918945, 39.8203125, 43.16474533081055, 46.50917434692383, 49.853607177734375, 53.19804000854492, 56.54247283935547, 59.88690185546875, 63.2313346862793, 66.57575988769531, 69.9201889038086, 73.2646255493164, 76.60905456542969, 79.95348358154297, 83.29792022705078, 86.64234924316406, 89.98678588867188, 93.33121490478516, 96.67564392089844, 100.02008056640625, 103.36450958251953, 106.70893859863281, 110.05337524414062, 113.3978042602539, 116.74223327636719, 120.086669921875]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 1.0, 2.0, 5.0, 8.0, 6.0, 7.0, 13.0, 9.0, 15.0, 16.0, 16.0, 20.0, 16.0, 14.0, 30.0, 35.0, 29.0, 46.0, 40.0, 44.0, 60.0, 40.0, 54.0, 46.0, 44.0, 43.0, 43.0, 31.0, 37.0, 33.0, 23.0, 22.0, 31.0, 16.0, 23.0, 17.0, 17.0, 8.0, 11.0, 11.0, 5.0, 8.0, 6.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.625, -8.323974609375, -8.02294921875, -7.721923828125, -7.4208984375, -7.119873046875, -6.81884765625, -6.517822265625, -6.216796875, -5.915771484375, -5.61474609375, -5.313720703125, -5.0126953125, -4.711669921875, -4.41064453125, -4.109619140625, -3.80859375, -3.507568359375, -3.20654296875, -2.905517578125, -2.6044921875, -2.303466796875, -2.00244140625, -1.701416015625, -1.400390625, -1.099365234375, -0.79833984375, -0.497314453125, -0.1962890625, 0.104736328125, 0.40576171875, 0.706787109375, 1.0078125, 1.308837890625, 1.60986328125, 1.910888671875, 2.2119140625, 2.512939453125, 2.81396484375, 3.114990234375, 3.416015625, 3.717041015625, 4.01806640625, 4.319091796875, 4.6201171875, 4.921142578125, 5.22216796875, 5.523193359375, 5.82421875, 6.125244140625, 6.42626953125, 6.727294921875, 7.0283203125, 7.329345703125, 7.63037109375, 7.931396484375, 8.232421875, 8.533447265625, 8.83447265625, 9.135498046875, 9.4365234375, 9.737548828125, 10.03857421875, 10.339599609375, 10.640625]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 5.0, 7.0, 15.0, 13.0, 34.0, 26.0, 58.0, 87.0, 116.0, 170.0, 257.0, 479.0, 831.0, 1591.0, 2904.0, 6503.0, 14813.0, 37871.0, 114134.0, 412305.0, 1196930.0, 1497602.0, 635409.0, 179768.0, 54715.0, 20439.0, 8590.0, 4056.0, 1984.0, 1026.0, 567.0, 357.0, 183.0, 134.0, 98.0, 54.0, 35.0, 30.0, 32.0, 15.0, 10.0, 10.0, 5.0, 2.0, 3.0, 4.0, 0.0, 2.0, 1.0, 2.0, 2.0], "bins": [-12.1875, -11.8309326171875, -11.474365234375, -11.1177978515625, -10.76123046875, -10.4046630859375, -10.048095703125, -9.6915283203125, -9.3349609375, -8.9783935546875, -8.621826171875, -8.2652587890625, -7.90869140625, -7.5521240234375, -7.195556640625, -6.8389892578125, -6.482421875, -6.1258544921875, -5.769287109375, -5.4127197265625, -5.05615234375, -4.6995849609375, -4.343017578125, -3.9864501953125, -3.6298828125, -3.2733154296875, -2.916748046875, -2.5601806640625, -2.20361328125, -1.8470458984375, -1.490478515625, -1.1339111328125, -0.77734375, -0.4207763671875, -0.064208984375, 0.2923583984375, 0.64892578125, 1.0054931640625, 1.362060546875, 1.7186279296875, 2.0751953125, 2.4317626953125, 2.788330078125, 3.1448974609375, 3.50146484375, 3.8580322265625, 4.214599609375, 4.5711669921875, 4.927734375, 5.2843017578125, 5.640869140625, 5.9974365234375, 6.35400390625, 6.7105712890625, 7.067138671875, 7.4237060546875, 7.7802734375, 8.1368408203125, 8.493408203125, 8.8499755859375, 9.20654296875, 9.5631103515625, 9.919677734375, 10.2762451171875, 10.6328125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 2.0, 6.0, 18.0, 7.0, 26.0, 27.0, 36.0, 88.0, 110.0, 153.0, 264.0, 410.0, 528.0, 586.0, 573.0, 416.0, 276.0, 200.0, 117.0, 73.0, 46.0, 41.0, 27.0, 20.0, 10.0, 6.0, 7.0, 4.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.6875, -18.85302734375, -18.0185546875, -17.18408203125, -16.349609375, -15.51513671875, -14.6806640625, -13.84619140625, -13.01171875, -12.17724609375, -11.3427734375, -10.50830078125, -9.673828125, -8.83935546875, -8.0048828125, -7.17041015625, -6.3359375, -5.50146484375, -4.6669921875, -3.83251953125, -2.998046875, -2.16357421875, -1.3291015625, -0.49462890625, 0.33984375, 1.17431640625, 2.0087890625, 2.84326171875, 3.677734375, 4.51220703125, 5.3466796875, 6.18115234375, 7.015625, 7.85009765625, 8.6845703125, 9.51904296875, 10.353515625, 11.18798828125, 12.0224609375, 12.85693359375, 13.69140625, 14.52587890625, 15.3603515625, 16.19482421875, 17.029296875, 17.86376953125, 18.6982421875, 19.53271484375, 20.3671875, 21.20166015625, 22.0361328125, 22.87060546875, 23.705078125, 24.53955078125, 25.3740234375, 26.20849609375, 27.04296875, 27.87744140625, 28.7119140625, 29.54638671875, 30.380859375, 31.21533203125, 32.0498046875, 32.88427734375, 33.71875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 5.0, 2.0, 9.0, 6.0, 11.0, 27.0, 24.0, 46.0, 52.0, 85.0, 83.0, 132.0, 196.0, 302.0, 647.0, 1998.0, 9695.0, 76845.0, 1108642.0, 2749020.0, 218930.0, 21747.0, 3593.0, 956.0, 443.0, 221.0, 179.0, 100.0, 76.0, 54.0, 37.0, 41.0, 28.0, 15.0, 10.0, 14.0, 5.0, 3.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-45.15625, -43.9091796875, -42.662109375, -41.4150390625, -40.16796875, -38.9208984375, -37.673828125, -36.4267578125, -35.1796875, -33.9326171875, -32.685546875, -31.4384765625, -30.19140625, -28.9443359375, -27.697265625, -26.4501953125, -25.203125, -23.9560546875, -22.708984375, -21.4619140625, -20.21484375, -18.9677734375, -17.720703125, -16.4736328125, -15.2265625, -13.9794921875, -12.732421875, -11.4853515625, -10.23828125, -8.9912109375, -7.744140625, -6.4970703125, -5.25, -4.0029296875, -2.755859375, -1.5087890625, -0.26171875, 0.9853515625, 2.232421875, 3.4794921875, 4.7265625, 5.9736328125, 7.220703125, 8.4677734375, 9.71484375, 10.9619140625, 12.208984375, 13.4560546875, 14.703125, 15.9501953125, 17.197265625, 18.4443359375, 19.69140625, 20.9384765625, 22.185546875, 23.4326171875, 24.6796875, 25.9267578125, 27.173828125, 28.4208984375, 29.66796875, 30.9150390625, 32.162109375, 33.4091796875, 34.65625]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 18.0, 21.0, 44.0, 93.0, 114.0, 144.0, 160.0, 148.0, 121.0, 63.0, 34.0, 24.0, 12.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-159.43276977539062, -154.79037475585938, -150.14797973632812, -145.50558471679688, -140.86318969726562, -136.22079467773438, -131.57839965820312, -126.93600463867188, -122.29360961914062, -117.65121459960938, -113.00881958007812, -108.36642456054688, -103.72402954101562, -99.08163452148438, -94.43923950195312, -89.79684448242188, -85.15444946289062, -80.51205444335938, -75.86965942382812, -71.22726440429688, -66.58486938476562, -61.942474365234375, -57.300079345703125, -52.657684326171875, -48.015289306640625, -43.372894287109375, -38.730499267578125, -34.088104248046875, -29.445709228515625, -24.803314208984375, -20.160919189453125, -15.518524169921875, -10.876113891601562, -6.2337188720703125, -1.5913238525390625, 3.0510711669921875, 7.6934661865234375, 12.335861206054688, 16.978256225585938, 21.620651245117188, 26.263046264648438, 30.905441284179688, 35.54783630371094, 40.19023132324219, 44.83262634277344, 49.47502136230469, 54.11741638183594, 58.75981140136719, 63.40220642089844, 68.04460144042969, 72.68699645996094, 77.32939147949219, 81.97178649902344, 86.61418151855469, 91.25657653808594, 95.89897155761719, 100.54136657714844, 105.18376159667969, 109.82615661621094, 114.46855163574219, 119.11094665527344, 123.75334167480469, 128.39573669433594, 133.0381317138672, 137.68052673339844]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 4.0, 5.0, 2.0, 8.0, 9.0, 16.0, 15.0, 15.0, 15.0, 24.0, 38.0, 40.0, 47.0, 43.0, 36.0, 38.0, 50.0, 50.0, 46.0, 49.0, 58.0, 42.0, 42.0, 46.0, 41.0, 39.0, 36.0, 26.0, 30.0, 15.0, 27.0, 22.0, 9.0, 3.0, 7.0, 5.0, 1.0, 5.0, 3.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.790191650390625, -59.394737243652344, -56.9992790222168, -54.603824615478516, -52.20836639404297, -49.81291198730469, -47.417457580566406, -45.022003173828125, -42.62654495239258, -40.2310905456543, -37.83563232421875, -35.44017791748047, -33.04472351074219, -30.64926528930664, -28.25381088256836, -25.858354568481445, -23.46289825439453, -21.067441940307617, -18.671985626220703, -16.276531219482422, -13.881074905395508, -11.485618591308594, -9.090163230895996, -6.694707870483398, -4.299251556396484, -1.9037957191467285, 0.49166011810302734, 2.887115955352783, 5.282571792602539, 7.678028106689453, 10.07348346710205, 12.468938827514648, 14.864395141601562, 17.259851455688477, 19.65530776977539, 22.050762176513672, 24.446218490600586, 26.8416748046875, 29.23712921142578, 31.632585525512695, 34.02804183959961, 36.42349624633789, 38.81895446777344, 41.21440887451172, 43.60986328125, 46.00532150268555, 48.40077590942383, 50.796234130859375, 53.191688537597656, 55.58714294433594, 57.982601165771484, 60.378055572509766, 62.77351379394531, 65.1689682006836, 67.56442260742188, 69.95987701416016, 72.35533142089844, 74.75078582763672, 77.146240234375, 79.54170227050781, 81.9371566772461, 84.33261108398438, 86.72806549072266, 89.12351989746094, 91.51898193359375]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 7.0, 6.0, 9.0, 10.0, 8.0, 15.0, 19.0, 9.0, 20.0, 18.0, 21.0, 26.0, 33.0, 34.0, 39.0, 42.0, 37.0, 53.0, 39.0, 34.0, 38.0, 61.0, 48.0, 37.0, 42.0, 51.0, 33.0, 25.0, 35.0, 26.0, 33.0, 15.0, 8.0, 19.0, 10.0, 7.0, 11.0, 8.0, 9.0, 5.0, 3.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.7265625, -8.4281005859375, -8.129638671875, -7.8311767578125, -7.53271484375, -7.2342529296875, -6.935791015625, -6.6373291015625, -6.3388671875, -6.0404052734375, -5.741943359375, -5.4434814453125, -5.14501953125, -4.8465576171875, -4.548095703125, -4.2496337890625, -3.951171875, -3.6527099609375, -3.354248046875, -3.0557861328125, -2.75732421875, -2.4588623046875, -2.160400390625, -1.8619384765625, -1.5634765625, -1.2650146484375, -0.966552734375, -0.6680908203125, -0.36962890625, -0.0711669921875, 0.227294921875, 0.5257568359375, 0.82421875, 1.1226806640625, 1.421142578125, 1.7196044921875, 2.01806640625, 2.3165283203125, 2.614990234375, 2.9134521484375, 3.2119140625, 3.5103759765625, 3.808837890625, 4.1072998046875, 4.40576171875, 4.7042236328125, 5.002685546875, 5.3011474609375, 5.599609375, 5.8980712890625, 6.196533203125, 6.4949951171875, 6.79345703125, 7.0919189453125, 7.390380859375, 7.6888427734375, 7.9873046875, 8.2857666015625, 8.584228515625, 8.8826904296875, 9.18115234375, 9.4796142578125, 9.778076171875, 10.0765380859375, 10.375]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 5.0, 3.0, 1.0, 3.0, 8.0, 7.0, 17.0, 22.0, 29.0, 53.0, 58.0, 89.0, 125.0, 206.0, 275.0, 440.0, 577.0, 893.0, 1348.0, 1991.0, 2894.0, 4225.0, 6057.0, 9475.0, 13748.0, 20324.0, 29870.0, 44357.0, 64939.0, 94786.0, 130810.0, 154723.0, 139053.0, 103717.0, 72053.0, 49097.0, 32945.0, 22494.0, 15138.0, 10158.0, 6901.0, 4620.0, 3145.0, 2199.0, 1510.0, 1003.0, 695.0, 474.0, 314.0, 216.0, 151.0, 113.0, 61.0, 41.0, 45.0, 17.0, 25.0, 11.0, 5.0, 7.0, 2.0, 5.0, 2.0], "bins": [-0.63427734375, -0.614837646484375, -0.59539794921875, -0.575958251953125, -0.5565185546875, -0.537078857421875, -0.51763916015625, -0.498199462890625, -0.478759765625, -0.459320068359375, -0.43988037109375, -0.420440673828125, -0.4010009765625, -0.381561279296875, -0.36212158203125, -0.342681884765625, -0.3232421875, -0.303802490234375, -0.28436279296875, -0.264923095703125, -0.2454833984375, -0.226043701171875, -0.20660400390625, -0.187164306640625, -0.167724609375, -0.148284912109375, -0.12884521484375, -0.109405517578125, -0.0899658203125, -0.070526123046875, -0.05108642578125, -0.031646728515625, -0.01220703125, 0.007232666015625, 0.02667236328125, 0.046112060546875, 0.0655517578125, 0.084991455078125, 0.10443115234375, 0.123870849609375, 0.143310546875, 0.162750244140625, 0.18218994140625, 0.201629638671875, 0.2210693359375, 0.240509033203125, 0.25994873046875, 0.279388427734375, 0.298828125, 0.318267822265625, 0.33770751953125, 0.357147216796875, 0.3765869140625, 0.396026611328125, 0.41546630859375, 0.434906005859375, 0.454345703125, 0.473785400390625, 0.49322509765625, 0.512664794921875, 0.5321044921875, 0.551544189453125, 0.57098388671875, 0.590423583984375, 0.60986328125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 5.0, 4.0, 5.0, 9.0, 8.0, 7.0, 9.0, 18.0, 14.0, 23.0, 27.0, 26.0, 41.0, 29.0, 46.0, 40.0, 36.0, 38.0, 51.0, 32.0, 1059.0, 44.0, 42.0, 42.0, 45.0, 39.0, 33.0, 40.0, 28.0, 26.0, 34.0, 20.0, 22.0, 22.0, 12.0, 13.0, 10.0, 14.0, 6.0, 3.0, 4.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-6.71875, -6.53179931640625, -6.3448486328125, -6.15789794921875, -5.970947265625, -5.78399658203125, -5.5970458984375, -5.41009521484375, -5.22314453125, -5.03619384765625, -4.8492431640625, -4.66229248046875, -4.475341796875, -4.28839111328125, -4.1014404296875, -3.91448974609375, -3.7275390625, -3.54058837890625, -3.3536376953125, -3.16668701171875, -2.979736328125, -2.79278564453125, -2.6058349609375, -2.41888427734375, -2.23193359375, -2.04498291015625, -1.8580322265625, -1.67108154296875, -1.484130859375, -1.29718017578125, -1.1102294921875, -0.92327880859375, -0.736328125, -0.54937744140625, -0.3624267578125, -0.17547607421875, 0.011474609375, 0.19842529296875, 0.3853759765625, 0.57232666015625, 0.75927734375, 0.94622802734375, 1.1331787109375, 1.32012939453125, 1.507080078125, 1.69403076171875, 1.8809814453125, 2.06793212890625, 2.2548828125, 2.44183349609375, 2.6287841796875, 2.81573486328125, 3.002685546875, 3.18963623046875, 3.3765869140625, 3.56353759765625, 3.75048828125, 3.93743896484375, 4.1243896484375, 4.31134033203125, 4.498291015625, 4.68524169921875, 4.8721923828125, 5.05914306640625, 5.24609375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 5.0, 10.0, 16.0, 27.0, 38.0, 57.0, 92.0, 188.0, 294.0, 423.0, 792.0, 1450.0, 2352.0, 3917.0, 6674.0, 11205.0, 19251.0, 32316.0, 53299.0, 85624.0, 128299.0, 396966.0, 984090.0, 135279.0, 91720.0, 57649.0, 34869.0, 20749.0, 12139.0, 7271.0, 4145.0, 2406.0, 1449.0, 805.0, 493.0, 296.0, 206.0, 101.0, 66.0, 44.0, 23.0, 15.0, 6.0, 9.0, 4.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.50732421875, -0.490997314453125, -0.47467041015625, -0.458343505859375, -0.4420166015625, -0.425689697265625, -0.40936279296875, -0.393035888671875, -0.376708984375, -0.360382080078125, -0.34405517578125, -0.327728271484375, -0.3114013671875, -0.295074462890625, -0.27874755859375, -0.262420654296875, -0.24609375, -0.229766845703125, -0.21343994140625, -0.197113037109375, -0.1807861328125, -0.164459228515625, -0.14813232421875, -0.131805419921875, -0.115478515625, -0.099151611328125, -0.08282470703125, -0.066497802734375, -0.0501708984375, -0.033843994140625, -0.01751708984375, -0.001190185546875, 0.01513671875, 0.031463623046875, 0.04779052734375, 0.064117431640625, 0.0804443359375, 0.096771240234375, 0.11309814453125, 0.129425048828125, 0.145751953125, 0.162078857421875, 0.17840576171875, 0.194732666015625, 0.2110595703125, 0.227386474609375, 0.24371337890625, 0.260040283203125, 0.2763671875, 0.292694091796875, 0.30902099609375, 0.325347900390625, 0.3416748046875, 0.358001708984375, 0.37432861328125, 0.390655517578125, 0.406982421875, 0.423309326171875, 0.43963623046875, 0.455963134765625, 0.4722900390625, 0.488616943359375, 0.50494384765625, 0.521270751953125, 0.53759765625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 3.0, 3.0, 4.0, 5.0, 7.0, 6.0, 9.0, 11.0, 14.0, 17.0, 15.0, 16.0, 23.0, 22.0, 32.0, 41.0, 39.0, 60.0, 69.0, 82.0, 105.0, 77.0, 72.0, 53.0, 39.0, 18.0, 29.0, 22.0, 20.0, 23.0, 11.0, 12.0, 7.0, 10.0, 10.0, 8.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 4.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0155792236328125, -0.015065908432006836, -0.014552593231201172, -0.014039278030395508, -0.013525962829589844, -0.01301264762878418, -0.012499332427978516, -0.011986017227172852, -0.011472702026367188, -0.010959386825561523, -0.01044607162475586, -0.009932756423950195, -0.009419441223144531, -0.008906126022338867, -0.008392810821533203, -0.007879495620727539, -0.007366180419921875, -0.006852865219116211, -0.006339550018310547, -0.005826234817504883, -0.005312919616699219, -0.004799604415893555, -0.004286289215087891, -0.0037729740142822266, -0.0032596588134765625, -0.0027463436126708984, -0.0022330284118652344, -0.0017197132110595703, -0.0012063980102539062, -0.0006930828094482422, -0.00017976760864257812, 0.00033354759216308594, 0.00084686279296875, 0.001360177993774414, 0.0018734931945800781, 0.002386808395385742, 0.0029001235961914062, 0.0034134387969970703, 0.003926753997802734, 0.0044400691986083984, 0.0049533843994140625, 0.0054666996002197266, 0.005980014801025391, 0.006493330001831055, 0.007006645202636719, 0.007519960403442383, 0.008033275604248047, 0.008546590805053711, 0.009059906005859375, 0.009573221206665039, 0.010086536407470703, 0.010599851608276367, 0.011113166809082031, 0.011626482009887695, 0.01213979721069336, 0.012653112411499023, 0.013166427612304688, 0.013679742813110352, 0.014193058013916016, 0.01470637321472168, 0.015219688415527344, 0.015733003616333008, 0.016246318817138672, 0.016759634017944336, 0.01727294921875]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 1.0, 2.0, 3.0, 6.0, 3.0, 4.0, 9.0, 7.0, 12.0, 13.0, 16.0, 17.0, 17.0, 18.0, 31.0, 46.0, 65.0, 118.0, 225.0, 669.0, 8218.0, 899828.0, 135800.0, 2429.0, 441.0, 167.0, 98.0, 70.0, 41.0, 37.0, 15.0, 25.0, 24.0, 15.0, 3.0, 16.0, 8.0, 4.0, 5.0, 9.0, 7.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.339111328125, -0.3290824890136719, -0.31905364990234375, -0.3090248107910156, -0.2989959716796875, -0.2889671325683594, -0.27893829345703125, -0.2689094543457031, -0.258880615234375, -0.24885177612304688, -0.23882293701171875, -0.22879409790039062, -0.2187652587890625, -0.20873641967773438, -0.19870758056640625, -0.18867874145507812, -0.17864990234375, -0.16862106323242188, -0.15859222412109375, -0.14856338500976562, -0.1385345458984375, -0.12850570678710938, -0.11847686767578125, -0.10844802856445312, -0.098419189453125, -0.08839035034179688, -0.07836151123046875, -0.06833267211914062, -0.0583038330078125, -0.048274993896484375, -0.03824615478515625, -0.028217315673828125, -0.0181884765625, -0.008159637451171875, 0.00186920166015625, 0.011898040771484375, 0.0219268798828125, 0.031955718994140625, 0.04198455810546875, 0.052013397216796875, 0.062042236328125, 0.07207107543945312, 0.08209991455078125, 0.09212875366210938, 0.1021575927734375, 0.11218643188476562, 0.12221527099609375, 0.13224411010742188, 0.14227294921875, 0.15230178833007812, 0.16233062744140625, 0.17235946655273438, 0.1823883056640625, 0.19241714477539062, 0.20244598388671875, 0.21247482299804688, 0.222503662109375, 0.23253250122070312, 0.24256134033203125, 0.2525901794433594, 0.2626190185546875, 0.2726478576660156, 0.28267669677734375, 0.2927055358886719, 0.302734375]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 26.0, 482.0, 483.0, 25.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018005209043622017, -0.014414839446544647, -0.010824470780789852, -0.007234102115035057, -0.0036437325179576874, -5.336292088031769e-05, 0.0035370048135519028, 0.0071273744106292725, 0.010717744007706642, 0.014308113604784012, 0.017898481339216232, 0.021488850936293602, 0.02507922053337097, 0.02866959013044834, 0.03225995600223541, 0.03585032746195793, 0.03944069892168045, 0.04303106665611267, 0.04662143811583519, 0.05021180585026741, 0.05380217730998993, 0.05739254504442215, 0.06098291277885437, 0.06457328051328659, 0.06816364824771881, 0.07175401598215103, 0.07534438371658325, 0.07893475890159607, 0.08252512663602829, 0.08611549437046051, 0.08970586210489273, 0.09329622983932495, 0.09688660502433777, 0.10047697275876999, 0.10406734049320221, 0.10765771567821503, 0.11124808341264725, 0.11483845114707947, 0.11842881888151169, 0.12201918661594391, 0.12560956180095673, 0.12919993698596954, 0.13279029726982117, 0.13638067245483398, 0.1399710327386856, 0.14356140792369843, 0.14715176820755005, 0.15074214339256287, 0.15433251857757568, 0.1579228937625885, 0.16151325404644012, 0.16510362923145294, 0.16869398951530457, 0.17228436470031738, 0.1758747398853302, 0.17946510016918182, 0.18305546045303345, 0.18664583563804626, 0.1902361959218979, 0.1938265711069107, 0.19741693139076233, 0.20100730657577515, 0.20459768176078796, 0.2081880420446396, 0.2117784172296524]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 7.0, 6.0, 17.0, 28.0, 23.0, 26.0, 25.0, 32.0, 45.0, 36.0, 55.0, 45.0, 61.0, 59.0, 54.0, 60.0, 54.0, 55.0, 57.0, 48.0, 37.0, 45.0, 31.0, 22.0, 12.0, 22.0, 17.0, 16.0, 6.0, 3.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013411343097686768, -0.012956619262695312, -0.012501895427703857, -0.012047171592712402, -0.011592447757720947, -0.011137723922729492, -0.010683000087738037, -0.010228276252746582, -0.009773552417755127, -0.009318828582763672, -0.008864104747772217, -0.008409380912780762, -0.007954657077789307, -0.0074999332427978516, -0.0070452094078063965, -0.006590485572814941, -0.006135761737823486, -0.005681037902832031, -0.005226314067840576, -0.004771590232849121, -0.004316866397857666, -0.003862142562866211, -0.003407418727874756, -0.0029526948928833008, -0.0024979710578918457, -0.0020432472229003906, -0.0015885233879089355, -0.0011337995529174805, -0.0006790757179260254, -0.0002243518829345703, 0.00023037195205688477, 0.0006850957870483398, 0.001139819622039795, 0.00159454345703125, 0.002049267292022705, 0.00250399112701416, 0.0029587149620056152, 0.0034134387969970703, 0.0038681626319885254, 0.0043228864669799805, 0.0047776103019714355, 0.005232334136962891, 0.005687057971954346, 0.006141781806945801, 0.006596505641937256, 0.007051229476928711, 0.007505953311920166, 0.007960677146911621, 0.008415400981903076, 0.008870124816894531, 0.009324848651885986, 0.009779572486877441, 0.010234296321868896, 0.010689020156860352, 0.011143743991851807, 0.011598467826843262, 0.012053191661834717, 0.012507915496826172, 0.012962639331817627, 0.013417363166809082, 0.013872087001800537, 0.014326810836791992, 0.014781534671783447, 0.015236258506774902, 0.015690982341766357]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 7.0, 6.0, 9.0, 10.0, 8.0, 15.0, 19.0, 9.0, 20.0, 17.0, 22.0, 26.0, 33.0, 34.0, 39.0, 42.0, 37.0, 52.0, 40.0, 34.0, 38.0, 61.0, 48.0, 37.0, 42.0, 51.0, 33.0, 25.0, 35.0, 26.0, 33.0, 15.0, 8.0, 19.0, 10.0, 7.0, 11.0, 8.0, 9.0, 5.0, 3.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.7265625, -8.4281005859375, -8.129638671875, -7.8311767578125, -7.53271484375, -7.2342529296875, -6.935791015625, -6.6373291015625, -6.3388671875, -6.0404052734375, -5.741943359375, -5.4434814453125, -5.14501953125, -4.8465576171875, -4.548095703125, -4.2496337890625, -3.951171875, -3.6527099609375, -3.354248046875, -3.0557861328125, -2.75732421875, -2.4588623046875, -2.160400390625, -1.8619384765625, -1.5634765625, -1.2650146484375, -0.966552734375, -0.6680908203125, -0.36962890625, -0.0711669921875, 0.227294921875, 0.5257568359375, 0.82421875, 1.1226806640625, 1.421142578125, 1.7196044921875, 2.01806640625, 2.3165283203125, 2.614990234375, 2.9134521484375, 3.2119140625, 3.5103759765625, 3.808837890625, 4.1072998046875, 4.40576171875, 4.7042236328125, 5.002685546875, 5.3011474609375, 5.599609375, 5.8980712890625, 6.196533203125, 6.4949951171875, 6.79345703125, 7.0919189453125, 7.390380859375, 7.6888427734375, 7.9873046875, 8.2857666015625, 8.584228515625, 8.8826904296875, 9.18115234375, 9.4796142578125, 9.778076171875, 10.0765380859375, 10.375]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 6.0, 12.0, 19.0, 27.0, 41.0, 64.0, 103.0, 183.0, 265.0, 419.0, 800.0, 1461.0, 2711.0, 5525.0, 12650.0, 30805.0, 84744.0, 283809.0, 422149.0, 126928.0, 43626.0, 17270.0, 7394.0, 3439.0, 1745.0, 950.0, 552.0, 331.0, 191.0, 121.0, 78.0, 44.0, 31.0, 22.0, 16.0, 11.0, 7.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-8.9140625, -8.65325927734375, -8.3924560546875, -8.13165283203125, -7.870849609375, -7.61004638671875, -7.3492431640625, -7.08843994140625, -6.82763671875, -6.56683349609375, -6.3060302734375, -6.04522705078125, -5.784423828125, -5.52362060546875, -5.2628173828125, -5.00201416015625, -4.7412109375, -4.48040771484375, -4.2196044921875, -3.95880126953125, -3.697998046875, -3.43719482421875, -3.1763916015625, -2.91558837890625, -2.65478515625, -2.39398193359375, -2.1331787109375, -1.87237548828125, -1.611572265625, -1.35076904296875, -1.0899658203125, -0.82916259765625, -0.568359375, -0.30755615234375, -0.0467529296875, 0.21405029296875, 0.474853515625, 0.73565673828125, 0.9964599609375, 1.25726318359375, 1.51806640625, 1.77886962890625, 2.0396728515625, 2.30047607421875, 2.561279296875, 2.82208251953125, 3.0828857421875, 3.34368896484375, 3.6044921875, 3.86529541015625, 4.1260986328125, 4.38690185546875, 4.647705078125, 4.90850830078125, 5.1693115234375, 5.43011474609375, 5.69091796875, 5.95172119140625, 6.2125244140625, 6.47332763671875, 6.734130859375, 6.99493408203125, 7.2557373046875, 7.51654052734375, 7.77734375]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 7.0, 3.0, 13.0, 11.0, 17.0, 14.0, 15.0, 31.0, 38.0, 35.0, 32.0, 55.0, 67.0, 71.0, 132.0, 1641.0, 346.0, 110.0, 64.0, 67.0, 40.0, 38.0, 44.0, 28.0, 26.0, 27.0, 11.0, 18.0, 15.0, 8.0, 5.0, 6.0, 4.0, 1.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-49.75, -48.32275390625, -46.8955078125, -45.46826171875, -44.041015625, -42.61376953125, -41.1865234375, -39.75927734375, -38.33203125, -36.90478515625, -35.4775390625, -34.05029296875, -32.623046875, -31.19580078125, -29.7685546875, -28.34130859375, -26.9140625, -25.48681640625, -24.0595703125, -22.63232421875, -21.205078125, -19.77783203125, -18.3505859375, -16.92333984375, -15.49609375, -14.06884765625, -12.6416015625, -11.21435546875, -9.787109375, -8.35986328125, -6.9326171875, -5.50537109375, -4.078125, -2.65087890625, -1.2236328125, 0.20361328125, 1.630859375, 3.05810546875, 4.4853515625, 5.91259765625, 7.33984375, 8.76708984375, 10.1943359375, 11.62158203125, 13.048828125, 14.47607421875, 15.9033203125, 17.33056640625, 18.7578125, 20.18505859375, 21.6123046875, 23.03955078125, 24.466796875, 25.89404296875, 27.3212890625, 28.74853515625, 30.17578125, 31.60302734375, 33.0302734375, 34.45751953125, 35.884765625, 37.31201171875, 38.7392578125, 40.16650390625, 41.59375]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 4.0, 10.0, 6.0, 9.0, 11.0, 23.0, 20.0, 35.0, 43.0, 64.0, 77.0, 136.0, 131.0, 224.0, 454.0, 1288.0, 63494.0, 3072843.0, 5184.0, 665.0, 317.0, 206.0, 110.0, 95.0, 78.0, 58.0, 25.0, 25.0, 22.0, 14.0, 11.0, 7.0, 9.0, 3.0, 6.0, 4.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-93.6875, -90.4853515625, -87.283203125, -84.0810546875, -80.87890625, -77.6767578125, -74.474609375, -71.2724609375, -68.0703125, -64.8681640625, -61.666015625, -58.4638671875, -55.26171875, -52.0595703125, -48.857421875, -45.6552734375, -42.453125, -39.2509765625, -36.048828125, -32.8466796875, -29.64453125, -26.4423828125, -23.240234375, -20.0380859375, -16.8359375, -13.6337890625, -10.431640625, -7.2294921875, -4.02734375, -0.8251953125, 2.376953125, 5.5791015625, 8.78125, 11.9833984375, 15.185546875, 18.3876953125, 21.58984375, 24.7919921875, 27.994140625, 31.1962890625, 34.3984375, 37.6005859375, 40.802734375, 44.0048828125, 47.20703125, 50.4091796875, 53.611328125, 56.8134765625, 60.015625, 63.2177734375, 66.419921875, 69.6220703125, 72.82421875, 76.0263671875, 79.228515625, 82.4306640625, 85.6328125, 88.8349609375, 92.037109375, 95.2392578125, 98.44140625, 101.6435546875, 104.845703125, 108.0478515625, 111.25]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 29.0, 146.0, 385.0, 337.0, 104.0, 9.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-214.38636779785156, -210.08706665039062, -205.78775024414062, -201.4884490966797, -197.1891326904297, -192.88983154296875, -188.59051513671875, -184.2912139892578, -179.9918975830078, -175.69259643554688, -171.39328002929688, -167.09397888183594, -162.79466247558594, -158.495361328125, -154.196044921875, -149.89674377441406, -145.59744262695312, -141.2981414794922, -136.9988250732422, -132.69952392578125, -128.40020751953125, -124.10089874267578, -119.80158996582031, -115.50228881835938, -111.20297241210938, -106.9036636352539, -102.60435485839844, -98.30504608154297, -94.0057373046875, -89.70642852783203, -85.40711975097656, -81.10781860351562, -76.80850982666016, -72.50920104980469, -68.20989227294922, -63.91058349609375, -59.61127471923828, -55.31196594238281, -51.01266098022461, -46.71335220336914, -42.41404342651367, -38.1147346496582, -33.815425872802734, -29.5161190032959, -25.21681022644043, -20.91750144958496, -16.618194580078125, -12.318885803222656, -8.019577026367188, -3.720268726348877, 0.5790395736694336, 4.878347396850586, 9.177656173706055, 13.476964950561523, 17.77627182006836, 22.075580596923828, 26.374889373779297, 30.674198150634766, 34.973506927490234, 39.27281188964844, 43.572120666503906, 47.871429443359375, 52.170738220214844, 56.47004699707031, 60.76935577392578]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 3.0, 6.0, 3.0, 9.0, 5.0, 7.0, 13.0, 13.0, 15.0, 17.0, 12.0, 23.0, 15.0, 24.0, 20.0, 29.0, 34.0, 40.0, 24.0, 34.0, 49.0, 42.0, 55.0, 40.0, 44.0, 30.0, 35.0, 30.0, 40.0, 25.0, 28.0, 33.0, 31.0, 24.0, 20.0, 16.0, 18.0, 19.0, 8.0, 6.0, 13.0, 10.0, 8.0, 8.0, 8.0, 6.0, 0.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0], "bins": [-75.65803527832031, -73.16386413574219, -70.6697006225586, -68.175537109375, -65.68136596679688, -63.187198638916016, -60.693031311035156, -58.1988639831543, -55.70469665527344, -53.21052932739258, -50.71636199951172, -48.22219467163086, -45.72802734375, -43.23386001586914, -40.73969268798828, -38.24552536010742, -35.75135803222656, -33.2571907043457, -30.763023376464844, -28.268856048583984, -25.774688720703125, -23.280521392822266, -20.786354064941406, -18.292186737060547, -15.798019409179688, -13.303852081298828, -10.809684753417969, -8.31551742553711, -5.82135009765625, -3.3271827697753906, -0.8330154418945312, 1.6611518859863281, 4.155311584472656, 6.649478912353516, 9.143646240234375, 11.637813568115234, 14.131980895996094, 16.626148223876953, 19.120315551757812, 21.614482879638672, 24.10865020751953, 26.60281753540039, 29.09698486328125, 31.59115219116211, 34.08531951904297, 36.57948684692383, 39.07365417480469, 41.56782150268555, 44.061988830566406, 46.556156158447266, 49.050323486328125, 51.544490814208984, 54.038658142089844, 56.5328254699707, 59.02699279785156, 61.52116012573242, 64.01532745361328, 66.50949096679688, 69.003662109375, 71.49783325195312, 73.99199676513672, 76.48616027832031, 78.98033142089844, 81.47450256347656, 83.96866607666016]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 2.0, 7.0, 9.0, 11.0, 6.0, 7.0, 11.0, 23.0, 17.0, 15.0, 15.0, 21.0, 30.0, 24.0, 28.0, 43.0, 42.0, 42.0, 36.0, 41.0, 43.0, 36.0, 42.0, 54.0, 36.0, 46.0, 46.0, 35.0, 32.0, 19.0, 32.0, 26.0, 29.0, 16.0, 13.0, 15.0, 9.0, 8.0, 7.0, 7.0, 7.0, 6.0, 9.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.5390625, -8.2406005859375, -7.942138671875, -7.6436767578125, -7.34521484375, -7.0467529296875, -6.748291015625, -6.4498291015625, -6.1513671875, -5.8529052734375, -5.554443359375, -5.2559814453125, -4.95751953125, -4.6590576171875, -4.360595703125, -4.0621337890625, -3.763671875, -3.4652099609375, -3.166748046875, -2.8682861328125, -2.56982421875, -2.2713623046875, -1.972900390625, -1.6744384765625, -1.3759765625, -1.0775146484375, -0.779052734375, -0.4805908203125, -0.18212890625, 0.1163330078125, 0.414794921875, 0.7132568359375, 1.01171875, 1.3101806640625, 1.608642578125, 1.9071044921875, 2.20556640625, 2.5040283203125, 2.802490234375, 3.1009521484375, 3.3994140625, 3.6978759765625, 3.996337890625, 4.2947998046875, 4.59326171875, 4.8917236328125, 5.190185546875, 5.4886474609375, 5.787109375, 6.0855712890625, 6.384033203125, 6.6824951171875, 6.98095703125, 7.2794189453125, 7.577880859375, 7.8763427734375, 8.1748046875, 8.4732666015625, 8.771728515625, 9.0701904296875, 9.36865234375, 9.6671142578125, 9.965576171875, 10.2640380859375, 10.5625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 5.0, 9.0, 8.0, 4.0, 9.0, 14.0, 17.0, 19.0, 18.0, 19.0, 26.0, 30.0, 36.0, 50.0, 107.0, 214.0, 534.0, 2474.0, 25353.0, 856934.0, 3161951.0, 136867.0, 7669.0, 1141.0, 321.0, 126.0, 85.0, 43.0, 26.0, 26.0, 24.0, 24.0, 19.0, 19.0, 12.0, 5.0, 11.0, 7.0, 3.0, 5.0, 6.0, 6.0, 7.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.15625, -38.984375, -37.8125, -36.640625, -35.46875, -34.296875, -33.125, -31.953125, -30.78125, -29.609375, -28.4375, -27.265625, -26.09375, -24.921875, -23.75, -22.578125, -21.40625, -20.234375, -19.0625, -17.890625, -16.71875, -15.546875, -14.375, -13.203125, -12.03125, -10.859375, -9.6875, -8.515625, -7.34375, -6.171875, -5.0, -3.828125, -2.65625, -1.484375, -0.3125, 0.859375, 2.03125, 3.203125, 4.375, 5.546875, 6.71875, 7.890625, 9.0625, 10.234375, 11.40625, 12.578125, 13.75, 14.921875, 16.09375, 17.265625, 18.4375, 19.609375, 20.78125, 21.953125, 23.125, 24.296875, 25.46875, 26.640625, 27.8125, 28.984375, 30.15625, 31.328125, 32.5, 33.671875, 34.84375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 8.0, 6.0, 14.0, 22.0, 30.0, 39.0, 75.0, 96.0, 132.0, 193.0, 269.0, 378.0, 521.0, 544.0, 489.0, 364.0, 284.0, 171.0, 138.0, 98.0, 64.0, 40.0, 32.0, 24.0, 18.0, 11.0, 7.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-26.3125, -25.5908203125, -24.869140625, -24.1474609375, -23.42578125, -22.7041015625, -21.982421875, -21.2607421875, -20.5390625, -19.8173828125, -19.095703125, -18.3740234375, -17.65234375, -16.9306640625, -16.208984375, -15.4873046875, -14.765625, -14.0439453125, -13.322265625, -12.6005859375, -11.87890625, -11.1572265625, -10.435546875, -9.7138671875, -8.9921875, -8.2705078125, -7.548828125, -6.8271484375, -6.10546875, -5.3837890625, -4.662109375, -3.9404296875, -3.21875, -2.4970703125, -1.775390625, -1.0537109375, -0.33203125, 0.3896484375, 1.111328125, 1.8330078125, 2.5546875, 3.2763671875, 3.998046875, 4.7197265625, 5.44140625, 6.1630859375, 6.884765625, 7.6064453125, 8.328125, 9.0498046875, 9.771484375, 10.4931640625, 11.21484375, 11.9365234375, 12.658203125, 13.3798828125, 14.1015625, 14.8232421875, 15.544921875, 16.2666015625, 16.98828125, 17.7099609375, 18.431640625, 19.1533203125, 19.875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 8.0, 2.0, 3.0, 5.0, 9.0, 14.0, 16.0, 31.0, 33.0, 47.0, 60.0, 89.0, 113.0, 198.0, 350.0, 690.0, 1932.0, 10605.0, 171042.0, 3138810.0, 829476.0, 34835.0, 3725.0, 982.0, 434.0, 223.0, 163.0, 111.0, 81.0, 60.0, 38.0, 32.0, 16.0, 16.0, 10.0, 8.0, 7.0, 5.0, 5.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.0625, -40.5986328125, -39.134765625, -37.6708984375, -36.20703125, -34.7431640625, -33.279296875, -31.8154296875, -30.3515625, -28.8876953125, -27.423828125, -25.9599609375, -24.49609375, -23.0322265625, -21.568359375, -20.1044921875, -18.640625, -17.1767578125, -15.712890625, -14.2490234375, -12.78515625, -11.3212890625, -9.857421875, -8.3935546875, -6.9296875, -5.4658203125, -4.001953125, -2.5380859375, -1.07421875, 0.3896484375, 1.853515625, 3.3173828125, 4.78125, 6.2451171875, 7.708984375, 9.1728515625, 10.63671875, 12.1005859375, 13.564453125, 15.0283203125, 16.4921875, 17.9560546875, 19.419921875, 20.8837890625, 22.34765625, 23.8115234375, 25.275390625, 26.7392578125, 28.203125, 29.6669921875, 31.130859375, 32.5947265625, 34.05859375, 35.5224609375, 36.986328125, 38.4501953125, 39.9140625, 41.3779296875, 42.841796875, 44.3056640625, 45.76953125, 47.2333984375, 48.697265625, 50.1611328125, 51.625]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 4.0, 9.0, 16.0, 31.0, 30.0, 58.0, 71.0, 81.0, 108.0, 118.0, 121.0, 99.0, 86.0, 61.0, 50.0, 24.0, 19.0, 10.0, 11.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-97.73302459716797, -94.63463592529297, -91.53624725341797, -88.4378662109375, -85.3394775390625, -82.2410888671875, -79.1427001953125, -76.0443115234375, -72.9459228515625, -69.8475341796875, -66.7491455078125, -63.650760650634766, -60.55237579345703, -57.45398712158203, -54.35559844970703, -51.25720977783203, -48.15882873535156, -45.06044006347656, -41.96205520629883, -38.86366653442383, -35.765281677246094, -32.666893005371094, -29.568504333496094, -26.470117568969727, -23.37173080444336, -20.273344039916992, -17.174957275390625, -14.076568603515625, -10.978181838989258, -7.879795074462891, -4.781406402587891, -1.6830196380615234, 1.4153594970703125, 4.513746738433838, 7.612133979797363, 10.710521697998047, 13.808908462524414, 16.90729522705078, 20.00568389892578, 23.10407066345215, 26.202457427978516, 29.300844192504883, 32.39923095703125, 35.49761962890625, 38.59600830078125, 41.694393157958984, 44.792781829833984, 47.89116668701172, 50.98955535888672, 54.08794403076172, 57.18632888793945, 60.28471755981445, 63.38310241699219, 66.48149108886719, 69.57987976074219, 72.67826843261719, 75.77665710449219, 78.87504577636719, 81.97343444824219, 85.07182312011719, 88.17020416259766, 91.26859283447266, 94.36698150634766, 97.46537017822266, 100.56375122070312]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 4.0, 3.0, 6.0, 11.0, 13.0, 10.0, 17.0, 21.0, 18.0, 22.0, 21.0, 11.0, 27.0, 38.0, 39.0, 33.0, 41.0, 34.0, 37.0, 43.0, 42.0, 51.0, 40.0, 45.0, 24.0, 36.0, 42.0, 38.0, 39.0, 30.0, 18.0, 24.0, 19.0, 18.0, 15.0, 17.0, 8.0, 9.0, 11.0, 8.0, 7.0, 7.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-57.74072265625, -55.865501403808594, -53.99028015136719, -52.11505889892578, -50.239837646484375, -48.36461639404297, -46.48939514160156, -44.614173889160156, -42.73895263671875, -40.863731384277344, -38.98851013183594, -37.11328887939453, -35.238067626953125, -33.36284637451172, -31.487625122070312, -29.612403869628906, -27.7371826171875, -25.861961364746094, -23.986740112304688, -22.11151885986328, -20.236297607421875, -18.36107635498047, -16.485855102539062, -14.610633850097656, -12.73541259765625, -10.860191345214844, -8.984970092773438, -7.109748840332031, -5.234527587890625, -3.3593063354492188, -1.4840850830078125, 0.39113616943359375, 2.266357421875, 4.141578674316406, 6.0167999267578125, 7.892021179199219, 9.767242431640625, 11.642463684082031, 13.517684936523438, 15.392906188964844, 17.26812744140625, 19.143348693847656, 21.018569946289062, 22.89379119873047, 24.769012451171875, 26.64423370361328, 28.519454956054688, 30.394676208496094, 32.2698974609375, 34.145118713378906, 36.02033996582031, 37.89556121826172, 39.770782470703125, 41.64600372314453, 43.52122497558594, 45.396446228027344, 47.27166748046875, 49.146888732910156, 51.02210998535156, 52.89733123779297, 54.772552490234375, 56.64777374267578, 58.52299499511719, 60.398216247558594, 62.2734375]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 2.0, 5.0, 4.0, 2.0, 6.0, 6.0, 4.0, 8.0, 12.0, 16.0, 8.0, 19.0, 16.0, 14.0, 23.0, 25.0, 21.0, 32.0, 28.0, 29.0, 39.0, 30.0, 31.0, 35.0, 46.0, 37.0, 32.0, 38.0, 31.0, 39.0, 42.0, 38.0, 31.0, 39.0, 25.0, 29.0, 24.0, 20.0, 16.0, 12.0, 19.0, 14.0, 13.0, 2.0, 8.0, 6.0, 8.0, 5.0, 8.0, 6.0, 4.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.8515625, -7.592041015625, -7.33251953125, -7.072998046875, -6.8134765625, -6.553955078125, -6.29443359375, -6.034912109375, -5.775390625, -5.515869140625, -5.25634765625, -4.996826171875, -4.7373046875, -4.477783203125, -4.21826171875, -3.958740234375, -3.69921875, -3.439697265625, -3.18017578125, -2.920654296875, -2.6611328125, -2.401611328125, -2.14208984375, -1.882568359375, -1.623046875, -1.363525390625, -1.10400390625, -0.844482421875, -0.5849609375, -0.325439453125, -0.06591796875, 0.193603515625, 0.453125, 0.712646484375, 0.97216796875, 1.231689453125, 1.4912109375, 1.750732421875, 2.01025390625, 2.269775390625, 2.529296875, 2.788818359375, 3.04833984375, 3.307861328125, 3.5673828125, 3.826904296875, 4.08642578125, 4.345947265625, 4.60546875, 4.864990234375, 5.12451171875, 5.384033203125, 5.6435546875, 5.903076171875, 6.16259765625, 6.422119140625, 6.681640625, 6.941162109375, 7.20068359375, 7.460205078125, 7.7197265625, 7.979248046875, 8.23876953125, 8.498291015625, 8.7578125]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 2.0, 4.0, 6.0, 11.0, 15.0, 19.0, 38.0, 50.0, 78.0, 132.0, 178.0, 207.0, 397.0, 512.0, 725.0, 1023.0, 1550.0, 2104.0, 3060.0, 4386.0, 6306.0, 9110.0, 13149.0, 19396.0, 28282.0, 41670.0, 61046.0, 89038.0, 123889.0, 153194.0, 142285.0, 108273.0, 75224.0, 51640.0, 35003.0, 23787.0, 16223.0, 11062.0, 7784.0, 5352.0, 3737.0, 2600.0, 1834.0, 1341.0, 890.0, 576.0, 440.0, 364.0, 190.0, 133.0, 81.0, 51.0, 27.0, 31.0, 15.0, 21.0, 6.0, 9.0, 8.0, 1.0, 3.0, 0.0, 3.0], "bins": [-0.60107421875, -0.58154296875, -0.56201171875, -0.54248046875, -0.52294921875, -0.50341796875, -0.48388671875, -0.46435546875, -0.44482421875, -0.42529296875, -0.40576171875, -0.38623046875, -0.36669921875, -0.34716796875, -0.32763671875, -0.30810546875, -0.28857421875, -0.26904296875, -0.24951171875, -0.22998046875, -0.21044921875, -0.19091796875, -0.17138671875, -0.15185546875, -0.13232421875, -0.11279296875, -0.09326171875, -0.07373046875, -0.05419921875, -0.03466796875, -0.01513671875, 0.00439453125, 0.02392578125, 0.04345703125, 0.06298828125, 0.08251953125, 0.10205078125, 0.12158203125, 0.14111328125, 0.16064453125, 0.18017578125, 0.19970703125, 0.21923828125, 0.23876953125, 0.25830078125, 0.27783203125, 0.29736328125, 0.31689453125, 0.33642578125, 0.35595703125, 0.37548828125, 0.39501953125, 0.41455078125, 0.43408203125, 0.45361328125, 0.47314453125, 0.49267578125, 0.51220703125, 0.53173828125, 0.55126953125, 0.57080078125, 0.59033203125, 0.60986328125, 0.62939453125, 0.64892578125]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 7.0, 7.0, 9.0, 8.0, 6.0, 13.0, 11.0, 13.0, 23.0, 17.0, 24.0, 24.0, 26.0, 30.0, 32.0, 31.0, 48.0, 37.0, 44.0, 43.0, 35.0, 55.0, 1069.0, 43.0, 37.0, 32.0, 35.0, 38.0, 24.0, 24.0, 18.0, 29.0, 24.0, 20.0, 16.0, 14.0, 15.0, 6.0, 8.0, 3.0, 5.0, 6.0, 6.0, 8.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.953125, -5.77301025390625, -5.5928955078125, -5.41278076171875, -5.232666015625, -5.05255126953125, -4.8724365234375, -4.69232177734375, -4.51220703125, -4.33209228515625, -4.1519775390625, -3.97186279296875, -3.791748046875, -3.61163330078125, -3.4315185546875, -3.25140380859375, -3.0712890625, -2.89117431640625, -2.7110595703125, -2.53094482421875, -2.350830078125, -2.17071533203125, -1.9906005859375, -1.81048583984375, -1.63037109375, -1.45025634765625, -1.2701416015625, -1.09002685546875, -0.909912109375, -0.72979736328125, -0.5496826171875, -0.36956787109375, -0.189453125, -0.00933837890625, 0.1707763671875, 0.35089111328125, 0.531005859375, 0.71112060546875, 0.8912353515625, 1.07135009765625, 1.25146484375, 1.43157958984375, 1.6116943359375, 1.79180908203125, 1.971923828125, 2.15203857421875, 2.3321533203125, 2.51226806640625, 2.6923828125, 2.87249755859375, 3.0526123046875, 3.23272705078125, 3.412841796875, 3.59295654296875, 3.7730712890625, 3.95318603515625, 4.13330078125, 4.31341552734375, 4.4935302734375, 4.67364501953125, 4.853759765625, 5.03387451171875, 5.2139892578125, 5.39410400390625, 5.57421875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 3.0, 8.0, 10.0, 20.0, 16.0, 21.0, 38.0, 48.0, 70.0, 98.0, 147.0, 226.0, 349.0, 485.0, 812.0, 1249.0, 1883.0, 2776.0, 4137.0, 6624.0, 10130.0, 15759.0, 24299.0, 37311.0, 57233.0, 83667.0, 117395.0, 326961.0, 1005747.0, 127258.0, 91139.0, 62533.0, 41448.0, 26951.0, 17462.0, 11507.0, 7389.0, 4812.0, 3086.0, 2014.0, 1360.0, 912.0, 566.0, 407.0, 253.0, 158.0, 121.0, 83.0, 52.0, 45.0, 28.0, 10.0, 6.0, 7.0, 6.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.4091796875, -0.3951263427734375, -0.381072998046875, -0.3670196533203125, -0.35296630859375, -0.3389129638671875, -0.324859619140625, -0.3108062744140625, -0.2967529296875, -0.2826995849609375, -0.268646240234375, -0.2545928955078125, -0.24053955078125, -0.2264862060546875, -0.212432861328125, -0.1983795166015625, -0.184326171875, -0.1702728271484375, -0.156219482421875, -0.1421661376953125, -0.12811279296875, -0.1140594482421875, -0.100006103515625, -0.0859527587890625, -0.0718994140625, -0.0578460693359375, -0.043792724609375, -0.0297393798828125, -0.01568603515625, -0.0016326904296875, 0.012420654296875, 0.0264739990234375, 0.04052734375, 0.0545806884765625, 0.068634033203125, 0.0826873779296875, 0.09674072265625, 0.1107940673828125, 0.124847412109375, 0.1389007568359375, 0.1529541015625, 0.1670074462890625, 0.181060791015625, 0.1951141357421875, 0.20916748046875, 0.2232208251953125, 0.237274169921875, 0.2513275146484375, 0.265380859375, 0.2794342041015625, 0.293487548828125, 0.3075408935546875, 0.32159423828125, 0.3356475830078125, 0.349700927734375, 0.3637542724609375, 0.3778076171875, 0.3918609619140625, 0.405914306640625, 0.4199676513671875, 0.43402099609375, 0.4480743408203125, 0.462127685546875, 0.4761810302734375, 0.490234375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 5.0, 2.0, 0.0, 3.0, 3.0, 3.0, 5.0, 2.0, 3.0, 12.0, 15.0, 14.0, 19.0, 29.0, 35.0, 25.0, 45.0, 35.0, 67.0, 65.0, 71.0, 101.0, 80.0, 56.0, 60.0, 47.0, 41.0, 28.0, 28.0, 19.0, 19.0, 12.0, 7.0, 9.0, 8.0, 6.0, 5.0, 5.0, 3.0, 2.0, 4.0, 1.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0], "bins": [-0.020751953125, -0.02011251449584961, -0.01947307586669922, -0.018833637237548828, -0.018194198608398438, -0.017554759979248047, -0.016915321350097656, -0.016275882720947266, -0.015636444091796875, -0.014997005462646484, -0.014357566833496094, -0.013718128204345703, -0.013078689575195312, -0.012439250946044922, -0.011799812316894531, -0.01116037368774414, -0.01052093505859375, -0.00988149642944336, -0.009242057800292969, -0.008602619171142578, -0.007963180541992188, -0.007323741912841797, -0.006684303283691406, -0.006044864654541016, -0.005405426025390625, -0.004765987396240234, -0.004126548767089844, -0.003487110137939453, -0.0028476715087890625, -0.002208232879638672, -0.0015687942504882812, -0.0009293556213378906, -0.0002899169921875, 0.0003495216369628906, 0.0009889602661132812, 0.0016283988952636719, 0.0022678375244140625, 0.002907276153564453, 0.0035467147827148438, 0.004186153411865234, 0.004825592041015625, 0.005465030670166016, 0.006104469299316406, 0.006743907928466797, 0.0073833465576171875, 0.008022785186767578, 0.008662223815917969, 0.00930166244506836, 0.00994110107421875, 0.01058053970336914, 0.011219978332519531, 0.011859416961669922, 0.012498855590820312, 0.013138294219970703, 0.013777732849121094, 0.014417171478271484, 0.015056610107421875, 0.015696048736572266, 0.016335487365722656, 0.016974925994873047, 0.017614364624023438, 0.018253803253173828, 0.01889324188232422, 0.01953268051147461, 0.020172119140625]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 3.0, 5.0, 7.0, 12.0, 7.0, 13.0, 13.0, 21.0, 34.0, 45.0, 55.0, 106.0, 160.0, 353.0, 2078.0, 705780.0, 337431.0, 1580.0, 341.0, 177.0, 102.0, 48.0, 46.0, 31.0, 27.0, 30.0, 9.0, 8.0, 3.0, 4.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.464111328125, -0.4495887756347656, -0.43506622314453125, -0.4205436706542969, -0.4060211181640625, -0.3914985656738281, -0.37697601318359375, -0.3624534606933594, -0.347930908203125, -0.3334083557128906, -0.31888580322265625, -0.3043632507324219, -0.2898406982421875, -0.2753181457519531, -0.26079559326171875, -0.24627304077148438, -0.23175048828125, -0.21722793579101562, -0.20270538330078125, -0.18818283081054688, -0.1736602783203125, -0.15913772583007812, -0.14461517333984375, -0.13009262084960938, -0.115570068359375, -0.10104751586914062, -0.08652496337890625, -0.07200241088867188, -0.0574798583984375, -0.042957305908203125, -0.02843475341796875, -0.013912200927734375, 0.0006103515625, 0.015132904052734375, 0.02965545654296875, 0.044178009033203125, 0.0587005615234375, 0.07322311401367188, 0.08774566650390625, 0.10226821899414062, 0.116790771484375, 0.13131332397460938, 0.14583587646484375, 0.16035842895507812, 0.1748809814453125, 0.18940353393554688, 0.20392608642578125, 0.21844863891601562, 0.23297119140625, 0.24749374389648438, 0.26201629638671875, 0.2765388488769531, 0.2910614013671875, 0.3055839538574219, 0.32010650634765625, 0.3346290588378906, 0.349151611328125, 0.3636741638183594, 0.37819671630859375, 0.3927192687988281, 0.4072418212890625, 0.4217643737792969, 0.43628692626953125, 0.4508094787597656, 0.46533203125]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [4.0, 39.0, 392.0, 526.0, 48.0, 9.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00971305277198553, -0.006579868495464325, -0.0034466846846044064, -0.00031350087374448776, 0.002819683402776718, 0.005952867679297924, 0.009086051024496555, 0.012219236232340336, 0.015352419577538967, 0.018485603854060173, 0.021618787199258804, 0.024751972407102585, 0.027885155752301216, 0.031018339097499847, 0.03415152430534363, 0.03728470951318741, 0.04041789099574089, 0.04355107620358467, 0.04668425768613815, 0.049817442893981934, 0.052950628101825714, 0.056083813309669495, 0.05921699479222298, 0.06235018000006676, 0.06548336148262024, 0.06861654669046402, 0.0717497318983078, 0.07488290965557098, 0.07801609486341476, 0.08114928007125854, 0.08428246527910233, 0.0874156504869461, 0.09054883569478989, 0.09368202090263367, 0.09681520611047745, 0.09994839131832123, 0.10308156907558441, 0.10621475428342819, 0.10934793949127197, 0.11248112469911575, 0.11561430990695953, 0.11874749511480331, 0.1218806803226471, 0.12501385807991028, 0.12814705073833466, 0.13128022849559784, 0.13441342115402222, 0.1375465989112854, 0.14067977666854858, 0.14381295442581177, 0.14694614708423615, 0.15007932484149933, 0.1532125174999237, 0.1563456952571869, 0.15947887301445007, 0.16261206567287445, 0.16574525833129883, 0.168878436088562, 0.1720116287469864, 0.17514480650424957, 0.17827799916267395, 0.18141117691993713, 0.18454435467720032, 0.1876775473356247, 0.19081072509288788]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 2.0, 4.0, 8.0, 3.0, 11.0, 13.0, 19.0, 26.0, 22.0, 25.0, 31.0, 37.0, 52.0, 56.0, 61.0, 57.0, 62.0, 41.0, 59.0, 55.0, 55.0, 59.0, 54.0, 36.0, 35.0, 29.0, 25.0, 19.0, 20.0, 14.0, 8.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019249439239501953, -0.01865636743605137, -0.018063295632600784, -0.0174702238291502, -0.016877152025699615, -0.01628408022224903, -0.015691008418798447, -0.015097936615347862, -0.014504864811897278, -0.013911793008446693, -0.013318721204996109, -0.012725649401545525, -0.01213257759809494, -0.011539505794644356, -0.010946433991193771, -0.010353362187743187, -0.009760290384292603, -0.009167218580842018, -0.008574146777391434, -0.00798107497394085, -0.007388003170490265, -0.0067949313670396805, -0.006201859563589096, -0.005608787760138512, -0.005015715956687927, -0.004422644153237343, -0.0038295723497867584, -0.003236500546336174, -0.0026434287428855896, -0.002050356939435005, -0.0014572851359844208, -0.0008642133325338364, -0.00027114152908325195, 0.00032193027436733246, 0.0009150020778179169, 0.0015080738812685013, 0.0021011456847190857, 0.00269421748816967, 0.0032872892916202545, 0.003880361095070839, 0.004473432898521423, 0.005066504701972008, 0.005659576505422592, 0.006252648308873177, 0.006845720112323761, 0.007438791915774345, 0.00803186371922493, 0.008624935522675514, 0.009218007326126099, 0.009811079129576683, 0.010404150933027267, 0.010997222736477852, 0.011590294539928436, 0.01218336634337902, 0.012776438146829605, 0.01336950995028019, 0.013962581753730774, 0.014555653557181358, 0.015148725360631943, 0.015741797164082527, 0.01633486896753311, 0.016927940770983696, 0.01752101257443428, 0.018114084377884865, 0.01870715618133545]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 2.0, 5.0, 4.0, 2.0, 6.0, 6.0, 4.0, 8.0, 12.0, 16.0, 8.0, 19.0, 16.0, 14.0, 23.0, 25.0, 21.0, 32.0, 28.0, 29.0, 39.0, 30.0, 31.0, 35.0, 46.0, 37.0, 32.0, 38.0, 31.0, 39.0, 42.0, 38.0, 31.0, 39.0, 25.0, 29.0, 24.0, 20.0, 16.0, 12.0, 19.0, 14.0, 13.0, 2.0, 8.0, 6.0, 8.0, 5.0, 8.0, 6.0, 4.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.8515625, -7.592041015625, -7.33251953125, -7.072998046875, -6.8134765625, -6.553955078125, -6.29443359375, -6.034912109375, -5.775390625, -5.515869140625, -5.25634765625, -4.996826171875, -4.7373046875, -4.477783203125, -4.21826171875, -3.958740234375, -3.69921875, -3.439697265625, -3.18017578125, -2.920654296875, -2.6611328125, -2.401611328125, -2.14208984375, -1.882568359375, -1.623046875, -1.363525390625, -1.10400390625, -0.844482421875, -0.5849609375, -0.325439453125, -0.06591796875, 0.193603515625, 0.453125, 0.712646484375, 0.97216796875, 1.231689453125, 1.4912109375, 1.750732421875, 2.01025390625, 2.269775390625, 2.529296875, 2.788818359375, 3.04833984375, 3.307861328125, 3.5673828125, 3.826904296875, 4.08642578125, 4.345947265625, 4.60546875, 4.864990234375, 5.12451171875, 5.384033203125, 5.6435546875, 5.903076171875, 6.16259765625, 6.422119140625, 6.681640625, 6.941162109375, 7.20068359375, 7.460205078125, 7.7197265625, 7.979248046875, 8.23876953125, 8.498291015625, 8.7578125]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 6.0, 8.0, 15.0, 13.0, 29.0, 38.0, 55.0, 83.0, 122.0, 162.0, 217.0, 406.0, 557.0, 845.0, 1220.0, 2056.0, 3181.0, 5138.0, 8558.0, 14320.0, 25783.0, 49401.0, 103767.0, 239588.0, 317889.0, 135930.0, 62868.0, 31952.0, 17452.0, 10228.0, 6082.0, 3759.0, 2468.0, 1463.0, 981.0, 626.0, 408.0, 262.0, 209.0, 124.0, 94.0, 67.0, 37.0, 30.0, 24.0, 17.0, 5.0, 8.0, 5.0, 1.0, 4.0, 1.0, 2.0], "bins": [-6.9765625, -6.7781982421875, -6.579833984375, -6.3814697265625, -6.18310546875, -5.9847412109375, -5.786376953125, -5.5880126953125, -5.3896484375, -5.1912841796875, -4.992919921875, -4.7945556640625, -4.59619140625, -4.3978271484375, -4.199462890625, -4.0010986328125, -3.802734375, -3.6043701171875, -3.406005859375, -3.2076416015625, -3.00927734375, -2.8109130859375, -2.612548828125, -2.4141845703125, -2.2158203125, -2.0174560546875, -1.819091796875, -1.6207275390625, -1.42236328125, -1.2239990234375, -1.025634765625, -0.8272705078125, -0.62890625, -0.4305419921875, -0.232177734375, -0.0338134765625, 0.16455078125, 0.3629150390625, 0.561279296875, 0.7596435546875, 0.9580078125, 1.1563720703125, 1.354736328125, 1.5531005859375, 1.75146484375, 1.9498291015625, 2.148193359375, 2.3465576171875, 2.544921875, 2.7432861328125, 2.941650390625, 3.1400146484375, 3.33837890625, 3.5367431640625, 3.735107421875, 3.9334716796875, 4.1318359375, 4.3302001953125, 4.528564453125, 4.7269287109375, 4.92529296875, 5.1236572265625, 5.322021484375, 5.5203857421875, 5.71875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 8.0, 3.0, 4.0, 15.0, 13.0, 12.0, 12.0, 23.0, 13.0, 20.0, 21.0, 27.0, 29.0, 45.0, 36.0, 56.0, 52.0, 69.0, 179.0, 1657.0, 244.0, 97.0, 63.0, 40.0, 46.0, 42.0, 45.0, 32.0, 20.0, 28.0, 28.0, 10.0, 8.0, 14.0, 12.0, 8.0, 7.0, 4.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-35.40625, -34.287109375, -33.16796875, -32.048828125, -30.9296875, -29.810546875, -28.69140625, -27.572265625, -26.453125, -25.333984375, -24.21484375, -23.095703125, -21.9765625, -20.857421875, -19.73828125, -18.619140625, -17.5, -16.380859375, -15.26171875, -14.142578125, -13.0234375, -11.904296875, -10.78515625, -9.666015625, -8.546875, -7.427734375, -6.30859375, -5.189453125, -4.0703125, -2.951171875, -1.83203125, -0.712890625, 0.40625, 1.525390625, 2.64453125, 3.763671875, 4.8828125, 6.001953125, 7.12109375, 8.240234375, 9.359375, 10.478515625, 11.59765625, 12.716796875, 13.8359375, 14.955078125, 16.07421875, 17.193359375, 18.3125, 19.431640625, 20.55078125, 21.669921875, 22.7890625, 23.908203125, 25.02734375, 26.146484375, 27.265625, 28.384765625, 29.50390625, 30.623046875, 31.7421875, 32.861328125, 33.98046875, 35.099609375, 36.21875]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 5.0, 2.0, 2.0, 5.0, 7.0, 10.0, 12.0, 20.0, 24.0, 27.0, 62.0, 70.0, 101.0, 157.0, 208.0, 379.0, 903.0, 10545.0, 3097847.0, 32893.0, 1263.0, 405.0, 206.0, 155.0, 110.0, 81.0, 41.0, 49.0, 28.0, 22.0, 23.0, 20.0, 11.0, 9.0, 7.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0], "bins": [-118.6875, -115.61328125, -112.5390625, -109.46484375, -106.390625, -103.31640625, -100.2421875, -97.16796875, -94.09375, -91.01953125, -87.9453125, -84.87109375, -81.796875, -78.72265625, -75.6484375, -72.57421875, -69.5, -66.42578125, -63.3515625, -60.27734375, -57.203125, -54.12890625, -51.0546875, -47.98046875, -44.90625, -41.83203125, -38.7578125, -35.68359375, -32.609375, -29.53515625, -26.4609375, -23.38671875, -20.3125, -17.23828125, -14.1640625, -11.08984375, -8.015625, -4.94140625, -1.8671875, 1.20703125, 4.28125, 7.35546875, 10.4296875, 13.50390625, 16.578125, 19.65234375, 22.7265625, 25.80078125, 28.875, 31.94921875, 35.0234375, 38.09765625, 41.171875, 44.24609375, 47.3203125, 50.39453125, 53.46875, 56.54296875, 59.6171875, 62.69140625, 65.765625, 68.83984375, 71.9140625, 74.98828125, 78.0625]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [2.0, 21.0, 224.0, 627.0, 137.0, 7.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.91394805908203, -17.59988784790039, -10.285826683044434, -2.9717655181884766, 4.342294692993164, 11.656356811523438, 18.970417022705078, 26.28447723388672, 33.59853744506836, 40.91259765625, 48.22665786743164, 55.54071807861328, 62.85478210449219, 70.16883850097656, 77.48290252685547, 84.79696655273438, 92.11102294921875, 99.42508697509766, 106.73914337158203, 114.05320739746094, 121.36726379394531, 128.68133544921875, 135.99539184570312, 143.3094482421875, 150.62350463867188, 157.93756103515625, 165.2516326904297, 172.56568908691406, 179.87974548339844, 187.19381713867188, 194.50787353515625, 201.82192993164062, 209.13600158691406, 216.45005798339844, 223.76412963867188, 231.07818603515625, 238.39224243164062, 245.706298828125, 253.02037048339844, 260.33441162109375, 267.64849853515625, 274.9625549316406, 282.276611328125, 289.5906677246094, 296.9047546386719, 304.21881103515625, 311.5328674316406, 318.846923828125, 326.1609802246094, 333.47503662109375, 340.7890930175781, 348.1031494140625, 355.417236328125, 362.7312927246094, 370.04534912109375, 377.3594055175781, 384.6734619140625, 391.9875183105469, 399.30157470703125, 406.6156311035156, 413.9297180175781, 421.2437744140625, 428.5578308105469, 435.87188720703125, 443.1859436035156]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 5.0, 4.0, 5.0, 7.0, 7.0, 17.0, 23.0, 14.0, 20.0, 22.0, 17.0, 28.0, 36.0, 33.0, 40.0, 33.0, 39.0, 50.0, 49.0, 49.0, 43.0, 43.0, 42.0, 50.0, 38.0, 41.0, 32.0, 27.0, 21.0, 24.0, 20.0, 24.0, 23.0, 14.0, 13.0, 13.0, 8.0, 9.0, 8.0, 5.0, 1.0, 3.0, 2.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.97756958007812, -86.88311004638672, -83.78865814208984, -80.69419860839844, -77.59973907470703, -74.50528717041016, -71.41082763671875, -68.31637573242188, -65.22191619873047, -62.12746047973633, -59.03300094604492, -55.93854522705078, -52.84408950805664, -49.7496337890625, -46.655174255371094, -43.56071853637695, -40.46625900268555, -37.371803283691406, -34.27734375, -31.18288803100586, -28.08843231201172, -24.993974685668945, -21.899517059326172, -18.80506134033203, -15.710603713989258, -12.6161470413208, -9.521690368652344, -6.42723274230957, -3.3327760696411133, -0.23831939697265625, 2.856138229370117, 5.950593948364258, 9.045051574707031, 12.139508247375488, 15.233964920043945, 18.32842254638672, 21.42287826538086, 24.517335891723633, 27.611793518066406, 30.706249237060547, 33.80070495605469, 36.89516067504883, 39.989620208740234, 43.084075927734375, 46.178531646728516, 49.272987365722656, 52.36744689941406, 55.4619026184082, 58.55636215209961, 61.65081787109375, 64.74527740478516, 67.83973693847656, 70.93418884277344, 74.02864837646484, 77.12310791015625, 80.21755981445312, 83.31201934814453, 86.40647888183594, 89.50093078613281, 92.59539031982422, 95.68984985351562, 98.7843017578125, 101.8787612915039, 104.97322082519531, 108.06767272949219]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 3.0, 3.0, 2.0, 3.0, 5.0, 4.0, 5.0, 3.0, 11.0, 10.0, 11.0, 15.0, 12.0, 16.0, 18.0, 19.0, 10.0, 40.0, 29.0, 25.0, 30.0, 31.0, 28.0, 26.0, 32.0, 31.0, 39.0, 43.0, 38.0, 31.0, 28.0, 45.0, 37.0, 36.0, 29.0, 35.0, 19.0, 38.0, 21.0, 19.0, 18.0, 14.0, 14.0, 13.0, 16.0, 8.0, 5.0, 3.0, 10.0, 9.0, 8.0, 3.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-8.234375, -7.968505859375, -7.70263671875, -7.436767578125, -7.1708984375, -6.905029296875, -6.63916015625, -6.373291015625, -6.107421875, -5.841552734375, -5.57568359375, -5.309814453125, -5.0439453125, -4.778076171875, -4.51220703125, -4.246337890625, -3.98046875, -3.714599609375, -3.44873046875, -3.182861328125, -2.9169921875, -2.651123046875, -2.38525390625, -2.119384765625, -1.853515625, -1.587646484375, -1.32177734375, -1.055908203125, -0.7900390625, -0.524169921875, -0.25830078125, 0.007568359375, 0.2734375, 0.539306640625, 0.80517578125, 1.071044921875, 1.3369140625, 1.602783203125, 1.86865234375, 2.134521484375, 2.400390625, 2.666259765625, 2.93212890625, 3.197998046875, 3.4638671875, 3.729736328125, 3.99560546875, 4.261474609375, 4.52734375, 4.793212890625, 5.05908203125, 5.324951171875, 5.5908203125, 5.856689453125, 6.12255859375, 6.388427734375, 6.654296875, 6.920166015625, 7.18603515625, 7.451904296875, 7.7177734375, 7.983642578125, 8.24951171875, 8.515380859375, 8.78125]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 6.0, 3.0, 6.0, 5.0, 10.0, 7.0, 14.0, 10.0, 12.0, 18.0, 17.0, 29.0, 18.0, 33.0, 40.0, 50.0, 114.0, 275.0, 815.0, 2726.0, 12968.0, 117933.0, 1744033.0, 2134721.0, 159468.0, 16060.0, 3242.0, 945.0, 308.0, 118.0, 49.0, 33.0, 32.0, 21.0, 26.0, 22.0, 18.0, 11.0, 11.0, 9.0, 5.0, 14.0, 6.0, 3.0, 9.0, 3.0, 2.0, 5.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-27.015625, -26.145751953125, -25.27587890625, -24.406005859375, -23.5361328125, -22.666259765625, -21.79638671875, -20.926513671875, -20.056640625, -19.186767578125, -18.31689453125, -17.447021484375, -16.5771484375, -15.707275390625, -14.83740234375, -13.967529296875, -13.09765625, -12.227783203125, -11.35791015625, -10.488037109375, -9.6181640625, -8.748291015625, -7.87841796875, -7.008544921875, -6.138671875, -5.268798828125, -4.39892578125, -3.529052734375, -2.6591796875, -1.789306640625, -0.91943359375, -0.049560546875, 0.8203125, 1.690185546875, 2.56005859375, 3.429931640625, 4.2998046875, 5.169677734375, 6.03955078125, 6.909423828125, 7.779296875, 8.649169921875, 9.51904296875, 10.388916015625, 11.2587890625, 12.128662109375, 12.99853515625, 13.868408203125, 14.73828125, 15.608154296875, 16.47802734375, 17.347900390625, 18.2177734375, 19.087646484375, 19.95751953125, 20.827392578125, 21.697265625, 22.567138671875, 23.43701171875, 24.306884765625, 25.1767578125, 26.046630859375, 26.91650390625, 27.786376953125, 28.65625]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 4.0, 5.0, 6.0, 13.0, 18.0, 22.0, 25.0, 33.0, 57.0, 83.0, 122.0, 139.0, 186.0, 246.0, 322.0, 431.0, 429.0, 398.0, 416.0, 306.0, 192.0, 161.0, 119.0, 102.0, 66.0, 44.0, 32.0, 32.0, 20.0, 16.0, 8.0, 5.0, 5.0, 5.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.625, -21.00146484375, -20.3779296875, -19.75439453125, -19.130859375, -18.50732421875, -17.8837890625, -17.26025390625, -16.63671875, -16.01318359375, -15.3896484375, -14.76611328125, -14.142578125, -13.51904296875, -12.8955078125, -12.27197265625, -11.6484375, -11.02490234375, -10.4013671875, -9.77783203125, -9.154296875, -8.53076171875, -7.9072265625, -7.28369140625, -6.66015625, -6.03662109375, -5.4130859375, -4.78955078125, -4.166015625, -3.54248046875, -2.9189453125, -2.29541015625, -1.671875, -1.04833984375, -0.4248046875, 0.19873046875, 0.822265625, 1.44580078125, 2.0693359375, 2.69287109375, 3.31640625, 3.93994140625, 4.5634765625, 5.18701171875, 5.810546875, 6.43408203125, 7.0576171875, 7.68115234375, 8.3046875, 8.92822265625, 9.5517578125, 10.17529296875, 10.798828125, 11.42236328125, 12.0458984375, 12.66943359375, 13.29296875, 13.91650390625, 14.5400390625, 15.16357421875, 15.787109375, 16.41064453125, 17.0341796875, 17.65771484375, 18.28125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 3.0, 4.0, 5.0, 11.0, 14.0, 23.0, 27.0, 32.0, 52.0, 66.0, 102.0, 125.0, 148.0, 201.0, 342.0, 721.0, 3528.0, 66226.0, 3195418.0, 905865.0, 18228.0, 1648.0, 473.0, 274.0, 196.0, 154.0, 123.0, 87.0, 52.0, 40.0, 32.0, 17.0, 12.0, 11.0, 4.0, 10.0, 4.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-53.65625, -51.72705078125, -49.7978515625, -47.86865234375, -45.939453125, -44.01025390625, -42.0810546875, -40.15185546875, -38.22265625, -36.29345703125, -34.3642578125, -32.43505859375, -30.505859375, -28.57666015625, -26.6474609375, -24.71826171875, -22.7890625, -20.85986328125, -18.9306640625, -17.00146484375, -15.072265625, -13.14306640625, -11.2138671875, -9.28466796875, -7.35546875, -5.42626953125, -3.4970703125, -1.56787109375, 0.361328125, 2.29052734375, 4.2197265625, 6.14892578125, 8.078125, 10.00732421875, 11.9365234375, 13.86572265625, 15.794921875, 17.72412109375, 19.6533203125, 21.58251953125, 23.51171875, 25.44091796875, 27.3701171875, 29.29931640625, 31.228515625, 33.15771484375, 35.0869140625, 37.01611328125, 38.9453125, 40.87451171875, 42.8037109375, 44.73291015625, 46.662109375, 48.59130859375, 50.5205078125, 52.44970703125, 54.37890625, 56.30810546875, 58.2373046875, 60.16650390625, 62.095703125, 64.02490234375, 65.9541015625, 67.88330078125, 69.8125]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 8.0, 17.0, 40.0, 113.0, 166.0, 208.0, 195.0, 136.0, 88.0, 24.0, 14.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-262.7371826171875, -257.108154296875, -251.4791259765625, -245.85009765625, -240.2210693359375, -234.59202575683594, -228.96299743652344, -223.33396911621094, -217.70494079589844, -212.07591247558594, -206.44688415527344, -200.81785583496094, -195.18881225585938, -189.55978393554688, -183.93075561523438, -178.30172729492188, -172.67269897460938, -167.04367065429688, -161.41464233398438, -155.78561401367188, -150.15658569335938, -144.5275421142578, -138.8985137939453, -133.2694854736328, -127.64045715332031, -122.01142883300781, -116.38240051269531, -110.75336456298828, -105.12433624267578, -99.49530792236328, -93.86627197265625, -88.23724365234375, -82.60820770263672, -76.97917938232422, -71.35014343261719, -65.72111511230469, -60.09208679199219, -54.46305847167969, -48.83402633666992, -43.204994201660156, -37.575965881347656, -31.946935653686523, -26.31790542602539, -20.688875198364258, -15.059844970703125, -9.430814743041992, -3.8017845153808594, 1.8272476196289062, 7.456275939941406, 13.085306167602539, 18.714336395263672, 24.343366622924805, 29.972396850585938, 35.60142517089844, 41.2304573059082, 46.85948944091797, 52.48851776123047, 58.11754608154297, 63.746578216552734, 69.3756103515625, 75.004638671875, 80.6336669921875, 86.2626953125, 91.89173126220703, 97.52075958251953]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 4.0, 9.0, 5.0, 6.0, 8.0, 5.0, 16.0, 20.0, 17.0, 18.0, 23.0, 27.0, 35.0, 25.0, 30.0, 33.0, 40.0, 54.0, 49.0, 44.0, 50.0, 41.0, 38.0, 40.0, 35.0, 30.0, 46.0, 40.0, 30.0, 27.0, 27.0, 20.0, 23.0, 22.0, 14.0, 7.0, 14.0, 5.0, 6.0, 10.0, 4.0, 5.0, 2.0, 5.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.99034118652344, -62.831356048583984, -60.672367095947266, -58.51338195800781, -56.354393005371094, -54.19540786743164, -52.03642272949219, -49.87743377685547, -47.71844482421875, -45.5594596862793, -43.40047073364258, -41.241485595703125, -39.082496643066406, -36.92351150512695, -34.7645263671875, -32.60553741455078, -30.446552276611328, -28.287565231323242, -26.128578186035156, -23.969593048095703, -21.810604095458984, -19.65161895751953, -17.492631912231445, -15.33364486694336, -13.174657821655273, -11.015670776367188, -8.856683731079102, -6.697697639465332, -4.538710594177246, -2.37972354888916, -0.22073745727539062, 1.9382495880126953, 4.097236633300781, 6.256223678588867, 8.415210723876953, 10.574196815490723, 12.733183860778809, 14.892170906066895, 17.051156997680664, 19.21014404296875, 21.369131088256836, 23.528118133544922, 25.687105178833008, 27.846092224121094, 30.005077362060547, 32.164066314697266, 34.32305145263672, 36.48204040527344, 38.64102554321289, 40.800010681152344, 42.95899963378906, 45.117984771728516, 47.276973724365234, 49.43595886230469, 51.594947814941406, 53.75393295288086, 55.91291809082031, 58.071903228759766, 60.230892181396484, 62.38987731933594, 64.54886627197266, 66.70785522460938, 68.86683654785156, 71.02582550048828, 73.184814453125]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 3.0, 1.0, 7.0, 3.0, 10.0, 9.0, 10.0, 10.0, 18.0, 19.0, 13.0, 20.0, 19.0, 21.0, 24.0, 27.0, 30.0, 42.0, 30.0, 35.0, 31.0, 35.0, 28.0, 46.0, 37.0, 33.0, 38.0, 32.0, 34.0, 25.0, 42.0, 26.0, 41.0, 27.0, 23.0, 24.0, 17.0, 18.0, 18.0, 16.0, 8.0, 15.0, 4.0, 2.0, 10.0, 3.0, 5.0, 3.0, 8.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-8.1015625, -7.8349609375, -7.568359375, -7.3017578125, -7.03515625, -6.7685546875, -6.501953125, -6.2353515625, -5.96875, -5.7021484375, -5.435546875, -5.1689453125, -4.90234375, -4.6357421875, -4.369140625, -4.1025390625, -3.8359375, -3.5693359375, -3.302734375, -3.0361328125, -2.76953125, -2.5029296875, -2.236328125, -1.9697265625, -1.703125, -1.4365234375, -1.169921875, -0.9033203125, -0.63671875, -0.3701171875, -0.103515625, 0.1630859375, 0.4296875, 0.6962890625, 0.962890625, 1.2294921875, 1.49609375, 1.7626953125, 2.029296875, 2.2958984375, 2.5625, 2.8291015625, 3.095703125, 3.3623046875, 3.62890625, 3.8955078125, 4.162109375, 4.4287109375, 4.6953125, 4.9619140625, 5.228515625, 5.4951171875, 5.76171875, 6.0283203125, 6.294921875, 6.5615234375, 6.828125, 7.0947265625, 7.361328125, 7.6279296875, 7.89453125, 8.1611328125, 8.427734375, 8.6943359375, 8.9609375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 5.0, 6.0, 5.0, 6.0, 11.0, 33.0, 41.0, 57.0, 57.0, 106.0, 156.0, 254.0, 407.0, 630.0, 1076.0, 1651.0, 2737.0, 4418.0, 7490.0, 12448.0, 20325.0, 34498.0, 59206.0, 101788.0, 165370.0, 212646.0, 169393.0, 104408.0, 60702.0, 35552.0, 20985.0, 12668.0, 7487.0, 4485.0, 2776.0, 1736.0, 1107.0, 661.0, 402.0, 255.0, 179.0, 129.0, 71.0, 55.0, 28.0, 18.0, 10.0, 11.0, 11.0, 6.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.96923828125, -0.9384918212890625, -0.907745361328125, -0.8769989013671875, -0.84625244140625, -0.8155059814453125, -0.784759521484375, -0.7540130615234375, -0.7232666015625, -0.6925201416015625, -0.661773681640625, -0.6310272216796875, -0.60028076171875, -0.5695343017578125, -0.538787841796875, -0.5080413818359375, -0.477294921875, -0.4465484619140625, -0.415802001953125, -0.3850555419921875, -0.35430908203125, -0.3235626220703125, -0.292816162109375, -0.2620697021484375, -0.2313232421875, -0.2005767822265625, -0.169830322265625, -0.1390838623046875, -0.10833740234375, -0.0775909423828125, -0.046844482421875, -0.0160980224609375, 0.0146484375, 0.0453948974609375, 0.076141357421875, 0.1068878173828125, 0.13763427734375, 0.1683807373046875, 0.199127197265625, 0.2298736572265625, 0.2606201171875, 0.2913665771484375, 0.322113037109375, 0.3528594970703125, 0.38360595703125, 0.4143524169921875, 0.445098876953125, 0.4758453369140625, 0.506591796875, 0.5373382568359375, 0.568084716796875, 0.5988311767578125, 0.62957763671875, 0.6603240966796875, 0.691070556640625, 0.7218170166015625, 0.7525634765625, 0.7833099365234375, 0.814056396484375, 0.8448028564453125, 0.87554931640625, 0.9062957763671875, 0.937042236328125, 0.9677886962890625, 0.99853515625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 6.0, 7.0, 9.0, 5.0, 12.0, 13.0, 14.0, 14.0, 20.0, 26.0, 28.0, 33.0, 34.0, 37.0, 46.0, 53.0, 45.0, 38.0, 54.0, 1077.0, 36.0, 48.0, 46.0, 51.0, 37.0, 36.0, 44.0, 41.0, 21.0, 16.0, 18.0, 19.0, 11.0, 18.0, 2.0, 1.0, 2.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-8.4765625, -8.24749755859375, -8.0184326171875, -7.78936767578125, -7.560302734375, -7.33123779296875, -7.1021728515625, -6.87310791015625, -6.64404296875, -6.41497802734375, -6.1859130859375, -5.95684814453125, -5.727783203125, -5.49871826171875, -5.2696533203125, -5.04058837890625, -4.8115234375, -4.58245849609375, -4.3533935546875, -4.12432861328125, -3.895263671875, -3.66619873046875, -3.4371337890625, -3.20806884765625, -2.97900390625, -2.74993896484375, -2.5208740234375, -2.29180908203125, -2.062744140625, -1.83367919921875, -1.6046142578125, -1.37554931640625, -1.146484375, -0.91741943359375, -0.6883544921875, -0.45928955078125, -0.230224609375, -0.00115966796875, 0.2279052734375, 0.45697021484375, 0.68603515625, 0.91510009765625, 1.1441650390625, 1.37322998046875, 1.602294921875, 1.83135986328125, 2.0604248046875, 2.28948974609375, 2.5185546875, 2.74761962890625, 2.9766845703125, 3.20574951171875, 3.434814453125, 3.66387939453125, 3.8929443359375, 4.12200927734375, 4.35107421875, 4.58013916015625, 4.8092041015625, 5.03826904296875, 5.267333984375, 5.49639892578125, 5.7254638671875, 5.95452880859375, 6.18359375]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [4.0, 2.0, 5.0, 1.0, 5.0, 7.0, 4.0, 8.0, 8.0, 17.0, 27.0, 37.0, 53.0, 89.0, 121.0, 191.0, 254.0, 369.0, 551.0, 863.0, 1302.0, 2022.0, 3080.0, 5006.0, 7722.0, 12212.0, 19147.0, 30556.0, 47118.0, 72884.0, 108763.0, 166501.0, 1181748.0, 147341.0, 101549.0, 67632.0, 43817.0, 27852.0, 17490.0, 11001.0, 7060.0, 4383.0, 2839.0, 1885.0, 1215.0, 738.0, 532.0, 335.0, 254.0, 164.0, 110.0, 86.0, 63.0, 34.0, 29.0, 27.0, 13.0, 8.0, 7.0, 5.0, 3.0, 0.0, 2.0, 1.0], "bins": [-0.54638671875, -0.5295257568359375, -0.512664794921875, -0.4958038330078125, -0.47894287109375, -0.4620819091796875, -0.445220947265625, -0.4283599853515625, -0.4114990234375, -0.3946380615234375, -0.377777099609375, -0.3609161376953125, -0.34405517578125, -0.3271942138671875, -0.310333251953125, -0.2934722900390625, -0.276611328125, -0.2597503662109375, -0.242889404296875, -0.2260284423828125, -0.20916748046875, -0.1923065185546875, -0.175445556640625, -0.1585845947265625, -0.1417236328125, -0.1248626708984375, -0.108001708984375, -0.0911407470703125, -0.07427978515625, -0.0574188232421875, -0.040557861328125, -0.0236968994140625, -0.0068359375, 0.0100250244140625, 0.026885986328125, 0.0437469482421875, 0.06060791015625, 0.0774688720703125, 0.094329833984375, 0.1111907958984375, 0.1280517578125, 0.1449127197265625, 0.161773681640625, 0.1786346435546875, 0.19549560546875, 0.2123565673828125, 0.229217529296875, 0.2460784912109375, 0.262939453125, 0.2798004150390625, 0.296661376953125, 0.3135223388671875, 0.33038330078125, 0.3472442626953125, 0.364105224609375, 0.3809661865234375, 0.3978271484375, 0.4146881103515625, 0.431549072265625, 0.4484100341796875, 0.46527099609375, 0.4821319580078125, 0.498992919921875, 0.5158538818359375, 0.53271484375]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 1.0, 3.0, 6.0, 4.0, 10.0, 13.0, 7.0, 11.0, 19.0, 11.0, 24.0, 32.0, 34.0, 33.0, 42.0, 60.0, 78.0, 85.0, 94.0, 65.0, 53.0, 55.0, 44.0, 39.0, 31.0, 30.0, 24.0, 14.0, 14.0, 20.0, 10.0, 6.0, 6.0, 9.0, 3.0, 4.0, 3.0, 0.0, 3.0, 0.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.020721435546875, -0.020076513290405273, -0.019431591033935547, -0.01878666877746582, -0.018141746520996094, -0.017496824264526367, -0.01685190200805664, -0.016206979751586914, -0.015562057495117188, -0.014917135238647461, -0.014272212982177734, -0.013627290725708008, -0.012982368469238281, -0.012337446212768555, -0.011692523956298828, -0.011047601699829102, -0.010402679443359375, -0.009757757186889648, -0.009112834930419922, -0.008467912673950195, -0.007822990417480469, -0.007178068161010742, -0.006533145904541016, -0.005888223648071289, -0.0052433013916015625, -0.004598379135131836, -0.003953456878662109, -0.003308534622192383, -0.0026636123657226562, -0.0020186901092529297, -0.0013737678527832031, -0.0007288455963134766, -8.392333984375e-05, 0.0005609989166259766, 0.0012059211730957031, 0.0018508434295654297, 0.0024957656860351562, 0.003140687942504883, 0.0037856101989746094, 0.004430532455444336, 0.0050754547119140625, 0.005720376968383789, 0.006365299224853516, 0.007010221481323242, 0.007655143737792969, 0.008300065994262695, 0.008944988250732422, 0.009589910507202148, 0.010234832763671875, 0.010879755020141602, 0.011524677276611328, 0.012169599533081055, 0.012814521789550781, 0.013459444046020508, 0.014104366302490234, 0.014749288558959961, 0.015394210815429688, 0.016039133071899414, 0.01668405532836914, 0.017328977584838867, 0.017973899841308594, 0.01861882209777832, 0.019263744354248047, 0.019908666610717773, 0.0205535888671875]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 2.0, 0.0, 5.0, 6.0, 5.0, 5.0, 7.0, 11.0, 15.0, 15.0, 18.0, 18.0, 34.0, 46.0, 57.0, 73.0, 134.0, 199.0, 565.0, 7809.0, 760551.0, 273648.0, 4339.0, 443.0, 172.0, 113.0, 61.0, 57.0, 30.0, 25.0, 12.0, 20.0, 12.0, 13.0, 6.0, 9.0, 5.0, 5.0, 5.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.402099609375, -0.3894996643066406, -0.37689971923828125, -0.3642997741699219, -0.3516998291015625, -0.3390998840332031, -0.32649993896484375, -0.3138999938964844, -0.301300048828125, -0.2887001037597656, -0.27610015869140625, -0.2635002136230469, -0.2509002685546875, -0.23830032348632812, -0.22570037841796875, -0.21310043334960938, -0.20050048828125, -0.18790054321289062, -0.17530059814453125, -0.16270065307617188, -0.1501007080078125, -0.13750076293945312, -0.12490081787109375, -0.11230087280273438, -0.099700927734375, -0.08710098266601562, -0.07450103759765625, -0.061901092529296875, -0.0493011474609375, -0.036701202392578125, -0.02410125732421875, -0.011501312255859375, 0.0010986328125, 0.013698577880859375, 0.02629852294921875, 0.038898468017578125, 0.0514984130859375, 0.06409835815429688, 0.07669830322265625, 0.08929824829101562, 0.101898193359375, 0.11449813842773438, 0.12709808349609375, 0.13969802856445312, 0.1522979736328125, 0.16489791870117188, 0.17749786376953125, 0.19009780883789062, 0.20269775390625, 0.21529769897460938, 0.22789764404296875, 0.24049758911132812, 0.2530975341796875, 0.2656974792480469, 0.27829742431640625, 0.2908973693847656, 0.303497314453125, 0.3160972595214844, 0.32869720458984375, 0.3412971496582031, 0.3538970947265625, 0.3664970397949219, 0.37909698486328125, 0.3916969299316406, 0.404296875]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 13.0, 421.0, 553.0, 28.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04627005383372307, -0.03971001133322716, -0.03314996883273125, -0.026589928194880486, -0.020029885694384575, -0.013469845056533813, -0.006909802556037903, -0.0003497600555419922, 0.0062102824449539185, 0.012770324945449829, 0.01933036744594574, 0.0258904080837965, 0.03245045244693756, 0.03901049122214317, 0.045570533722639084, 0.052130576223134995, 0.058690618723630905, 0.06525065749883652, 0.07181069999933243, 0.07837074249982834, 0.08493078500032425, 0.09149082750082016, 0.09805087000131607, 0.10461091250181198, 0.11117095500230789, 0.1177309975028038, 0.12429104000329971, 0.13085107505321503, 0.13741111755371094, 0.14397116005420685, 0.15053120255470276, 0.15709124505519867, 0.16365130245685577, 0.17021134495735168, 0.1767713874578476, 0.1833314299583435, 0.18989147245883942, 0.19645151495933533, 0.20301155745983124, 0.20957159996032715, 0.21613164246082306, 0.22269168496131897, 0.22925172746181488, 0.2358117699623108, 0.2423718124628067, 0.2489318549633026, 0.25549188256263733, 0.26205193996429443, 0.26861196756362915, 0.27517199516296387, 0.28173205256462097, 0.2882920801639557, 0.2948521375656128, 0.3014121651649475, 0.3079722225666046, 0.31453225016593933, 0.32109230756759644, 0.32765233516693115, 0.33421239256858826, 0.340772420167923, 0.3473324775695801, 0.3538925051689148, 0.3604525625705719, 0.3670125901699066, 0.3735726475715637]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 5.0, 7.0, 12.0, 19.0, 23.0, 21.0, 39.0, 48.0, 58.0, 67.0, 70.0, 74.0, 74.0, 66.0, 81.0, 64.0, 62.0, 61.0, 47.0, 29.0, 22.0, 21.0, 15.0, 15.0, 5.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.022348761558532715, -0.0216132253408432, -0.020877689123153687, -0.020142152905464172, -0.019406616687774658, -0.018671080470085144, -0.01793554425239563, -0.017200008034706116, -0.0164644718170166, -0.015728935599327087, -0.014993399381637573, -0.014257863163948059, -0.013522326946258545, -0.01278679072856903, -0.012051254510879517, -0.011315718293190002, -0.010580182075500488, -0.009844645857810974, -0.00910910964012146, -0.008373573422431946, -0.007638037204742432, -0.0069025009870529175, -0.006166964769363403, -0.005431428551673889, -0.004695892333984375, -0.003960356116294861, -0.0032248198986053467, -0.0024892836809158325, -0.0017537474632263184, -0.0010182112455368042, -0.00028267502784729004, 0.0004528611898422241, 0.0011883974075317383, 0.0019239336252212524, 0.0026594698429107666, 0.0033950060606002808, 0.004130542278289795, 0.004866078495979309, 0.005601614713668823, 0.006337150931358337, 0.0070726871490478516, 0.007808223366737366, 0.00854375958442688, 0.009279295802116394, 0.010014832019805908, 0.010750368237495422, 0.011485904455184937, 0.01222144067287445, 0.012956976890563965, 0.013692513108253479, 0.014428049325942993, 0.015163585543632507, 0.01589912176132202, 0.016634657979011536, 0.01737019419670105, 0.018105730414390564, 0.018841266632080078, 0.019576802849769592, 0.020312339067459106, 0.02104787528514862, 0.021783411502838135, 0.02251894772052765, 0.023254483938217163, 0.023990020155906677, 0.02472555637359619]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 3.0, 1.0, 7.0, 3.0, 10.0, 9.0, 10.0, 10.0, 18.0, 19.0, 13.0, 20.0, 19.0, 21.0, 24.0, 27.0, 30.0, 42.0, 30.0, 35.0, 31.0, 35.0, 28.0, 46.0, 37.0, 33.0, 38.0, 32.0, 34.0, 25.0, 42.0, 26.0, 40.0, 28.0, 23.0, 24.0, 17.0, 18.0, 18.0, 16.0, 8.0, 15.0, 4.0, 2.0, 10.0, 3.0, 5.0, 3.0, 8.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-8.1015625, -7.8349609375, -7.568359375, -7.3017578125, -7.03515625, -6.7685546875, -6.501953125, -6.2353515625, -5.96875, -5.7021484375, -5.435546875, -5.1689453125, -4.90234375, -4.6357421875, -4.369140625, -4.1025390625, -3.8359375, -3.5693359375, -3.302734375, -3.0361328125, -2.76953125, -2.5029296875, -2.236328125, -1.9697265625, -1.703125, -1.4365234375, -1.169921875, -0.9033203125, -0.63671875, -0.3701171875, -0.103515625, 0.1630859375, 0.4296875, 0.6962890625, 0.962890625, 1.2294921875, 1.49609375, 1.7626953125, 2.029296875, 2.2958984375, 2.5625, 2.8291015625, 3.095703125, 3.3623046875, 3.62890625, 3.8955078125, 4.162109375, 4.4287109375, 4.6953125, 4.9619140625, 5.228515625, 5.4951171875, 5.76171875, 6.0283203125, 6.294921875, 6.5615234375, 6.828125, 7.0947265625, 7.361328125, 7.6279296875, 7.89453125, 8.1611328125, 8.427734375, 8.6943359375, 8.9609375]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 2.0, 10.0, 17.0, 31.0, 40.0, 57.0, 76.0, 128.0, 182.0, 298.0, 479.0, 698.0, 1030.0, 1630.0, 2514.0, 3828.0, 5982.0, 9958.0, 18158.0, 37693.0, 103855.0, 346164.0, 333539.0, 101235.0, 36626.0, 17593.0, 9841.0, 5990.0, 3798.0, 2475.0, 1572.0, 1027.0, 664.0, 494.0, 279.0, 200.0, 128.0, 81.0, 65.0, 38.0, 21.0, 20.0, 13.0, 14.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.06640625, -6.83074951171875, -6.5950927734375, -6.35943603515625, -6.123779296875, -5.88812255859375, -5.6524658203125, -5.41680908203125, -5.18115234375, -4.94549560546875, -4.7098388671875, -4.47418212890625, -4.238525390625, -4.00286865234375, -3.7672119140625, -3.53155517578125, -3.2958984375, -3.06024169921875, -2.8245849609375, -2.58892822265625, -2.353271484375, -2.11761474609375, -1.8819580078125, -1.64630126953125, -1.41064453125, -1.17498779296875, -0.9393310546875, -0.70367431640625, -0.468017578125, -0.23236083984375, 0.0032958984375, 0.23895263671875, 0.474609375, 0.71026611328125, 0.9459228515625, 1.18157958984375, 1.417236328125, 1.65289306640625, 1.8885498046875, 2.12420654296875, 2.35986328125, 2.59552001953125, 2.8311767578125, 3.06683349609375, 3.302490234375, 3.53814697265625, 3.7738037109375, 4.00946044921875, 4.2451171875, 4.48077392578125, 4.7164306640625, 4.95208740234375, 5.187744140625, 5.42340087890625, 5.6590576171875, 5.89471435546875, 6.13037109375, 6.36602783203125, 6.6016845703125, 6.83734130859375, 7.072998046875, 7.30865478515625, 7.5443115234375, 7.77996826171875, 8.015625]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 7.0, 7.0, 12.0, 6.0, 6.0, 17.0, 18.0, 17.0, 24.0, 25.0, 42.0, 37.0, 34.0, 39.0, 65.0, 67.0, 90.0, 143.0, 1631.0, 272.0, 81.0, 60.0, 46.0, 53.0, 32.0, 30.0, 30.0, 27.0, 28.0, 26.0, 19.0, 13.0, 12.0, 9.0, 7.0, 7.0, 4.0, 6.0, 7.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-43.40625, -42.28369140625, -41.1611328125, -40.03857421875, -38.916015625, -37.79345703125, -36.6708984375, -35.54833984375, -34.42578125, -33.30322265625, -32.1806640625, -31.05810546875, -29.935546875, -28.81298828125, -27.6904296875, -26.56787109375, -25.4453125, -24.32275390625, -23.2001953125, -22.07763671875, -20.955078125, -19.83251953125, -18.7099609375, -17.58740234375, -16.46484375, -15.34228515625, -14.2197265625, -13.09716796875, -11.974609375, -10.85205078125, -9.7294921875, -8.60693359375, -7.484375, -6.36181640625, -5.2392578125, -4.11669921875, -2.994140625, -1.87158203125, -0.7490234375, 0.37353515625, 1.49609375, 2.61865234375, 3.7412109375, 4.86376953125, 5.986328125, 7.10888671875, 8.2314453125, 9.35400390625, 10.4765625, 11.59912109375, 12.7216796875, 13.84423828125, 14.966796875, 16.08935546875, 17.2119140625, 18.33447265625, 19.45703125, 20.57958984375, 21.7021484375, 22.82470703125, 23.947265625, 25.06982421875, 26.1923828125, 27.31494140625, 28.4375]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 6.0, 7.0, 12.0, 16.0, 23.0, 28.0, 53.0, 63.0, 112.0, 166.0, 232.0, 418.0, 1020.0, 76257.0, 3062770.0, 3077.0, 524.0, 286.0, 193.0, 141.0, 64.0, 79.0, 30.0, 42.0, 23.0, 15.0, 8.0, 8.0, 6.0, 2.0, 6.0, 4.0, 6.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-116.4375, -112.5830078125, -108.728515625, -104.8740234375, -101.01953125, -97.1650390625, -93.310546875, -89.4560546875, -85.6015625, -81.7470703125, -77.892578125, -74.0380859375, -70.18359375, -66.3291015625, -62.474609375, -58.6201171875, -54.765625, -50.9111328125, -47.056640625, -43.2021484375, -39.34765625, -35.4931640625, -31.638671875, -27.7841796875, -23.9296875, -20.0751953125, -16.220703125, -12.3662109375, -8.51171875, -4.6572265625, -0.802734375, 3.0517578125, 6.90625, 10.7607421875, 14.615234375, 18.4697265625, 22.32421875, 26.1787109375, 30.033203125, 33.8876953125, 37.7421875, 41.5966796875, 45.451171875, 49.3056640625, 53.16015625, 57.0146484375, 60.869140625, 64.7236328125, 68.578125, 72.4326171875, 76.287109375, 80.1416015625, 83.99609375, 87.8505859375, 91.705078125, 95.5595703125, 99.4140625, 103.2685546875, 107.123046875, 110.9775390625, 114.83203125, 118.6865234375, 122.541015625, 126.3955078125, 130.25]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [9.0, 969.0, 42.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.82015037536621, -10.790372848510742, 7.239404678344727, 25.269182205200195, 43.29895782470703, 61.3287353515625, 79.35851287841797, 97.38829040527344, 115.4180679321289, 133.44784545898438, 151.47763061523438, 169.5074005126953, 187.53717041015625, 205.56695556640625, 223.59674072265625, 241.6265106201172, 259.6562805175781, 277.6860656738281, 295.7158203125, 313.74560546875, 331.775390625, 349.80517578125, 367.8349609375, 385.8647155761719, 403.8945007324219, 421.9242858886719, 439.95404052734375, 457.98382568359375, 476.01361083984375, 494.04339599609375, 512.0731811523438, 530.1029663085938, 548.1326904296875, 566.1624755859375, 584.1922607421875, 602.2220458984375, 620.2518310546875, 638.2815551757812, 656.3113403320312, 674.3411254882812, 692.3709106445312, 710.4006958007812, 728.4304809570312, 746.4602661132812, 764.489990234375, 782.519775390625, 800.549560546875, 818.579345703125, 836.609130859375, 854.638916015625, 872.668701171875, 890.698486328125, 908.728271484375, 926.7579956054688, 944.7877807617188, 962.8175659179688, 980.8473510742188, 998.8771362304688, 1016.9069213867188, 1034.9366455078125, 1052.9664306640625, 1070.9962158203125, 1089.0260009765625, 1107.0557861328125, 1125.0855712890625]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 4.0, 2.0, 2.0, 11.0, 6.0, 8.0, 10.0, 7.0, 22.0, 19.0, 24.0, 16.0, 16.0, 25.0, 29.0, 33.0, 35.0, 31.0, 31.0, 42.0, 45.0, 43.0, 34.0, 39.0, 43.0, 56.0, 38.0, 38.0, 36.0, 28.0, 21.0, 31.0, 32.0, 22.0, 16.0, 15.0, 12.0, 13.0, 13.0, 8.0, 8.0, 3.0, 8.0, 7.0, 5.0, 5.0, 2.0, 6.0, 5.0, 3.0, 1.0, 1.0, 2.0], "bins": [-89.05794525146484, -86.40727996826172, -83.75660705566406, -81.10594177246094, -78.45527648925781, -75.80461120605469, -73.15394592285156, -70.5032730102539, -67.85260772705078, -65.20194244384766, -62.551273345947266, -59.900604248046875, -57.24993896484375, -54.599273681640625, -51.948604583740234, -49.297935485839844, -46.64727020263672, -43.996604919433594, -41.3459358215332, -38.69526672363281, -36.04460144042969, -33.39393615722656, -30.743267059326172, -28.092599868774414, -25.441932678222656, -22.7912654876709, -20.14059829711914, -17.489931106567383, -14.839263916015625, -12.188596725463867, -9.53792953491211, -6.887262344360352, -4.2365875244140625, -1.5859203338623047, 1.0647468566894531, 3.715414047241211, 6.366081237792969, 9.016748428344727, 11.667415618896484, 14.318082809448242, 16.96875, 19.619417190551758, 22.270084381103516, 24.920751571655273, 27.57141876220703, 30.22208595275879, 32.87275314331055, 35.52342224121094, 38.17408752441406, 40.82475280761719, 43.47542190551758, 46.12609100341797, 48.776756286621094, 51.42742156982422, 54.07809066772461, 56.728759765625, 59.379425048828125, 62.03009033203125, 64.68075561523438, 67.33142852783203, 69.98209381103516, 72.63275909423828, 75.28343200683594, 77.93409729003906, 80.58476257324219]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 3.0, 6.0, 4.0, 9.0, 5.0, 12.0, 7.0, 9.0, 7.0, 21.0, 13.0, 18.0, 31.0, 28.0, 31.0, 28.0, 31.0, 35.0, 29.0, 37.0, 28.0, 34.0, 44.0, 40.0, 34.0, 32.0, 37.0, 37.0, 25.0, 37.0, 44.0, 29.0, 27.0, 24.0, 26.0, 24.0, 16.0, 12.0, 19.0, 17.0, 13.0, 9.0, 7.0, 6.0, 1.0, 1.0, 5.0, 3.0, 3.0, 4.0, 6.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.6796875, -8.3896484375, -8.099609375, -7.8095703125, -7.51953125, -7.2294921875, -6.939453125, -6.6494140625, -6.359375, -6.0693359375, -5.779296875, -5.4892578125, -5.19921875, -4.9091796875, -4.619140625, -4.3291015625, -4.0390625, -3.7490234375, -3.458984375, -3.1689453125, -2.87890625, -2.5888671875, -2.298828125, -2.0087890625, -1.71875, -1.4287109375, -1.138671875, -0.8486328125, -0.55859375, -0.2685546875, 0.021484375, 0.3115234375, 0.6015625, 0.8916015625, 1.181640625, 1.4716796875, 1.76171875, 2.0517578125, 2.341796875, 2.6318359375, 2.921875, 3.2119140625, 3.501953125, 3.7919921875, 4.08203125, 4.3720703125, 4.662109375, 4.9521484375, 5.2421875, 5.5322265625, 5.822265625, 6.1123046875, 6.40234375, 6.6923828125, 6.982421875, 7.2724609375, 7.5625, 7.8525390625, 8.142578125, 8.4326171875, 8.72265625, 9.0126953125, 9.302734375, 9.5927734375, 9.8828125]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 5.0, 7.0, 4.0, 4.0, 8.0, 6.0, 8.0, 10.0, 17.0, 23.0, 21.0, 42.0, 40.0, 63.0, 109.0, 190.0, 399.0, 930.0, 2347.0, 7610.0, 32234.0, 214682.0, 1426774.0, 2031459.0, 405066.0, 55017.0, 11390.0, 3333.0, 1250.0, 523.0, 260.0, 141.0, 84.0, 63.0, 37.0, 24.0, 22.0, 14.0, 21.0, 16.0, 12.0, 7.0, 3.0, 1.0, 5.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-18.390625, -17.78076171875, -17.1708984375, -16.56103515625, -15.951171875, -15.34130859375, -14.7314453125, -14.12158203125, -13.51171875, -12.90185546875, -12.2919921875, -11.68212890625, -11.072265625, -10.46240234375, -9.8525390625, -9.24267578125, -8.6328125, -8.02294921875, -7.4130859375, -6.80322265625, -6.193359375, -5.58349609375, -4.9736328125, -4.36376953125, -3.75390625, -3.14404296875, -2.5341796875, -1.92431640625, -1.314453125, -0.70458984375, -0.0947265625, 0.51513671875, 1.125, 1.73486328125, 2.3447265625, 2.95458984375, 3.564453125, 4.17431640625, 4.7841796875, 5.39404296875, 6.00390625, 6.61376953125, 7.2236328125, 7.83349609375, 8.443359375, 9.05322265625, 9.6630859375, 10.27294921875, 10.8828125, 11.49267578125, 12.1025390625, 12.71240234375, 13.322265625, 13.93212890625, 14.5419921875, 15.15185546875, 15.76171875, 16.37158203125, 16.9814453125, 17.59130859375, 18.201171875, 18.81103515625, 19.4208984375, 20.03076171875, 20.640625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 6.0, 4.0, 9.0, 20.0, 25.0, 38.0, 63.0, 100.0, 149.0, 250.0, 408.0, 585.0, 741.0, 610.0, 433.0, 248.0, 151.0, 92.0, 66.0, 31.0, 19.0, 16.0, 10.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.6875, -16.7314453125, -15.775390625, -14.8193359375, -13.86328125, -12.9072265625, -11.951171875, -10.9951171875, -10.0390625, -9.0830078125, -8.126953125, -7.1708984375, -6.21484375, -5.2587890625, -4.302734375, -3.3466796875, -2.390625, -1.4345703125, -0.478515625, 0.4775390625, 1.43359375, 2.3896484375, 3.345703125, 4.3017578125, 5.2578125, 6.2138671875, 7.169921875, 8.1259765625, 9.08203125, 10.0380859375, 10.994140625, 11.9501953125, 12.90625, 13.8623046875, 14.818359375, 15.7744140625, 16.73046875, 17.6865234375, 18.642578125, 19.5986328125, 20.5546875, 21.5107421875, 22.466796875, 23.4228515625, 24.37890625, 25.3349609375, 26.291015625, 27.2470703125, 28.203125, 29.1591796875, 30.115234375, 31.0712890625, 32.02734375, 32.9833984375, 33.939453125, 34.8955078125, 35.8515625, 36.8076171875, 37.763671875, 38.7197265625, 39.67578125, 40.6318359375, 41.587890625, 42.5439453125, 43.5]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 7.0, 13.0, 19.0, 19.0, 41.0, 56.0, 77.0, 146.0, 204.0, 342.0, 618.0, 2055.0, 104236.0, 3996510.0, 86423.0, 1933.0, 620.0, 343.0, 224.0, 128.0, 96.0, 71.0, 38.0, 33.0, 12.0, 7.0, 6.0, 7.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-116.0, -113.26953125, -110.5390625, -107.80859375, -105.078125, -102.34765625, -99.6171875, -96.88671875, -94.15625, -91.42578125, -88.6953125, -85.96484375, -83.234375, -80.50390625, -77.7734375, -75.04296875, -72.3125, -69.58203125, -66.8515625, -64.12109375, -61.390625, -58.66015625, -55.9296875, -53.19921875, -50.46875, -47.73828125, -45.0078125, -42.27734375, -39.546875, -36.81640625, -34.0859375, -31.35546875, -28.625, -25.89453125, -23.1640625, -20.43359375, -17.703125, -14.97265625, -12.2421875, -9.51171875, -6.78125, -4.05078125, -1.3203125, 1.41015625, 4.140625, 6.87109375, 9.6015625, 12.33203125, 15.0625, 17.79296875, 20.5234375, 23.25390625, 25.984375, 28.71484375, 31.4453125, 34.17578125, 36.90625, 39.63671875, 42.3671875, 45.09765625, 47.828125, 50.55859375, 53.2890625, 56.01953125, 58.75]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 20.0, 66.0, 120.0, 183.0, 241.0, 186.0, 120.0, 50.0, 16.0, 8.0, 2.0, 1.0], "bins": [-347.1307678222656, -340.98828125, -334.84576416015625, -328.7032775878906, -322.560791015625, -316.41827392578125, -310.2757873535156, -304.1332702636719, -297.99078369140625, -291.8482971191406, -285.7057800292969, -279.56329345703125, -273.4207763671875, -267.2782897949219, -261.13580322265625, -254.99330139160156, -248.85079956054688, -242.7082977294922, -236.5657958984375, -230.42330932617188, -224.2808074951172, -218.1383056640625, -211.99581909179688, -205.8533172607422, -199.7108154296875, -193.5683135986328, -187.42581176757812, -181.2833251953125, -175.1408233642578, -168.99832153320312, -162.8558349609375, -156.7133331298828, -150.57083129882812, -144.42832946777344, -138.28582763671875, -132.14334106445312, -126.00083923339844, -119.85833740234375, -113.7158432006836, -107.57334899902344, -101.43085479736328, -95.28836059570312, -89.14585876464844, -83.00335693359375, -76.8608627319336, -70.71836853027344, -64.57586669921875, -58.43336868286133, -52.290870666503906, -46.148372650146484, -40.00587463378906, -33.86337661743164, -27.72087860107422, -21.578380584716797, -15.435882568359375, -9.293384552001953, -3.1508865356445312, 2.9916114807128906, 9.134109497070312, 15.276607513427734, 21.419105529785156, 27.561603546142578, 33.7041015625, 39.84659957885742, 45.989097595214844]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 3.0, 3.0, 4.0, 1.0, 6.0, 10.0, 10.0, 5.0, 20.0, 13.0, 10.0, 14.0, 17.0, 24.0, 28.0, 30.0, 29.0, 22.0, 37.0, 22.0, 41.0, 38.0, 39.0, 43.0, 43.0, 51.0, 48.0, 31.0, 38.0, 23.0, 41.0, 41.0, 26.0, 22.0, 21.0, 19.0, 31.0, 12.0, 12.0, 13.0, 13.0, 12.0, 11.0, 6.0, 8.0, 7.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.06024169921875, -52.16075134277344, -50.26126480102539, -48.361778259277344, -46.46228790283203, -44.56279754638672, -42.66331100463867, -40.763824462890625, -38.86433410644531, -36.96484375, -35.06535720825195, -33.165870666503906, -31.266380310058594, -29.366891860961914, -27.467403411865234, -25.567914962768555, -23.668426513671875, -21.768938064575195, -19.869449615478516, -17.969961166381836, -16.070472717285156, -14.170984268188477, -12.271495819091797, -10.372007369995117, -8.472518920898438, -6.573030471801758, -4.673542022705078, -2.7740535736083984, -0.8745651245117188, 1.024923324584961, 2.9244117736816406, 4.82390022277832, 6.723388671875, 8.62287712097168, 10.52236557006836, 12.421854019165039, 14.321342468261719, 16.2208309173584, 18.120319366455078, 20.019807815551758, 21.919296264648438, 23.818784713745117, 25.718273162841797, 27.617761611938477, 29.517250061035156, 31.416738510131836, 33.316226959228516, 35.21571350097656, 37.115203857421875, 39.01469421386719, 40.914180755615234, 42.81366729736328, 44.713157653808594, 46.612648010253906, 48.51213455200195, 50.41162109375, 52.31111145019531, 54.210601806640625, 56.11008834838867, 58.00957489013672, 59.90906524658203, 61.808555603027344, 63.70804214477539, 65.60752868652344, 67.50701904296875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 8.0, 2.0, 4.0, 4.0, 5.0, 9.0, 13.0, 11.0, 17.0, 11.0, 20.0, 18.0, 19.0, 27.0, 21.0, 25.0, 33.0, 28.0, 32.0, 45.0, 42.0, 37.0, 30.0, 29.0, 39.0, 42.0, 36.0, 31.0, 33.0, 40.0, 34.0, 35.0, 37.0, 23.0, 30.0, 11.0, 24.0, 18.0, 17.0, 15.0, 8.0, 9.0, 8.0, 7.0, 4.0, 8.0, 5.0, 3.0, 1.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.890625, -8.606201171875, -8.32177734375, -8.037353515625, -7.7529296875, -7.468505859375, -7.18408203125, -6.899658203125, -6.615234375, -6.330810546875, -6.04638671875, -5.761962890625, -5.4775390625, -5.193115234375, -4.90869140625, -4.624267578125, -4.33984375, -4.055419921875, -3.77099609375, -3.486572265625, -3.2021484375, -2.917724609375, -2.63330078125, -2.348876953125, -2.064453125, -1.780029296875, -1.49560546875, -1.211181640625, -0.9267578125, -0.642333984375, -0.35791015625, -0.073486328125, 0.2109375, 0.495361328125, 0.77978515625, 1.064208984375, 1.3486328125, 1.633056640625, 1.91748046875, 2.201904296875, 2.486328125, 2.770751953125, 3.05517578125, 3.339599609375, 3.6240234375, 3.908447265625, 4.19287109375, 4.477294921875, 4.76171875, 5.046142578125, 5.33056640625, 5.614990234375, 5.8994140625, 6.183837890625, 6.46826171875, 6.752685546875, 7.037109375, 7.321533203125, 7.60595703125, 7.890380859375, 8.1748046875, 8.459228515625, 8.74365234375, 9.028076171875, 9.3125]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 8.0, 17.0, 25.0, 39.0, 65.0, 79.0, 141.0, 210.0, 313.0, 540.0, 853.0, 1401.0, 2188.0, 3448.0, 5803.0, 9527.0, 15555.0, 26664.0, 45234.0, 77592.0, 130144.0, 195933.0, 199420.0, 135213.0, 81070.0, 47506.0, 27680.0, 16526.0, 9657.0, 5976.0, 3650.0, 2299.0, 1376.0, 874.0, 559.0, 363.0, 208.0, 141.0, 87.0, 69.0, 32.0, 27.0, 16.0, 10.0, 9.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.92431640625, -0.8935470581054688, -0.8627777099609375, -0.8320083618164062, -0.801239013671875, -0.7704696655273438, -0.7397003173828125, -0.7089309692382812, -0.67816162109375, -0.6473922729492188, -0.6166229248046875, -0.5858535766601562, -0.555084228515625, -0.5243148803710938, -0.4935455322265625, -0.46277618408203125, -0.4320068359375, -0.40123748779296875, -0.3704681396484375, -0.33969879150390625, -0.308929443359375, -0.27816009521484375, -0.2473907470703125, -0.21662139892578125, -0.18585205078125, -0.15508270263671875, -0.1243133544921875, -0.09354400634765625, -0.062774658203125, -0.03200531005859375, -0.0012359619140625, 0.02953338623046875, 0.060302734375, 0.09107208251953125, 0.1218414306640625, 0.15261077880859375, 0.183380126953125, 0.21414947509765625, 0.2449188232421875, 0.27568817138671875, 0.30645751953125, 0.33722686767578125, 0.3679962158203125, 0.39876556396484375, 0.429534912109375, 0.46030426025390625, 0.4910736083984375, 0.5218429565429688, 0.5526123046875, 0.5833816528320312, 0.6141510009765625, 0.6449203491210938, 0.675689697265625, 0.7064590454101562, 0.7372283935546875, 0.7679977416992188, 0.79876708984375, 0.8295364379882812, 0.8603057861328125, 0.8910751342773438, 0.921844482421875, 0.9526138305664062, 0.9833831787109375, 1.0141525268554688, 1.044921875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 6.0, 11.0, 13.0, 10.0, 7.0, 17.0, 20.0, 15.0, 24.0, 19.0, 22.0, 25.0, 33.0, 17.0, 43.0, 41.0, 31.0, 30.0, 52.0, 42.0, 1060.0, 38.0, 37.0, 31.0, 44.0, 37.0, 34.0, 37.0, 29.0, 25.0, 27.0, 22.0, 18.0, 24.0, 12.0, 13.0, 22.0, 4.0, 7.0, 3.0, 6.0, 4.0, 4.0, 4.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.0078125, -5.8194580078125, -5.631103515625, -5.4427490234375, -5.25439453125, -5.0660400390625, -4.877685546875, -4.6893310546875, -4.5009765625, -4.3126220703125, -4.124267578125, -3.9359130859375, -3.74755859375, -3.5592041015625, -3.370849609375, -3.1824951171875, -2.994140625, -2.8057861328125, -2.617431640625, -2.4290771484375, -2.24072265625, -2.0523681640625, -1.864013671875, -1.6756591796875, -1.4873046875, -1.2989501953125, -1.110595703125, -0.9222412109375, -0.73388671875, -0.5455322265625, -0.357177734375, -0.1688232421875, 0.01953125, 0.2078857421875, 0.396240234375, 0.5845947265625, 0.77294921875, 0.9613037109375, 1.149658203125, 1.3380126953125, 1.5263671875, 1.7147216796875, 1.903076171875, 2.0914306640625, 2.27978515625, 2.4681396484375, 2.656494140625, 2.8448486328125, 3.033203125, 3.2215576171875, 3.409912109375, 3.5982666015625, 3.78662109375, 3.9749755859375, 4.163330078125, 4.3516845703125, 4.5400390625, 4.7283935546875, 4.916748046875, 5.1051025390625, 5.29345703125, 5.4818115234375, 5.670166015625, 5.8585205078125, 6.046875]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 7.0, 8.0, 19.0, 20.0, 30.0, 43.0, 83.0, 113.0, 218.0, 326.0, 502.0, 770.0, 1243.0, 2092.0, 3527.0, 6084.0, 10395.0, 18450.0, 32373.0, 57659.0, 99069.0, 162468.0, 1204434.0, 211875.0, 119944.0, 71528.0, 40323.0, 22927.0, 12653.0, 7287.0, 4193.0, 2539.0, 1423.0, 927.0, 582.0, 347.0, 203.0, 142.0, 118.0, 55.0, 39.0, 38.0, 19.0, 18.0, 8.0, 6.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.68603515625, -0.6643829345703125, -0.642730712890625, -0.6210784912109375, -0.59942626953125, -0.5777740478515625, -0.556121826171875, -0.5344696044921875, -0.5128173828125, -0.4911651611328125, -0.469512939453125, -0.4478607177734375, -0.42620849609375, -0.4045562744140625, -0.382904052734375, -0.3612518310546875, -0.339599609375, -0.3179473876953125, -0.296295166015625, -0.2746429443359375, -0.25299072265625, -0.2313385009765625, -0.209686279296875, -0.1880340576171875, -0.1663818359375, -0.1447296142578125, -0.123077392578125, -0.1014251708984375, -0.07977294921875, -0.0581207275390625, -0.036468505859375, -0.0148162841796875, 0.0068359375, 0.0284881591796875, 0.050140380859375, 0.0717926025390625, 0.09344482421875, 0.1150970458984375, 0.136749267578125, 0.1584014892578125, 0.1800537109375, 0.2017059326171875, 0.223358154296875, 0.2450103759765625, 0.26666259765625, 0.2883148193359375, 0.309967041015625, 0.3316192626953125, 0.353271484375, 0.3749237060546875, 0.396575927734375, 0.4182281494140625, 0.43988037109375, 0.4615325927734375, 0.483184814453125, 0.5048370361328125, 0.5264892578125, 0.5481414794921875, 0.569793701171875, 0.5914459228515625, 0.61309814453125, 0.6347503662109375, 0.656402587890625, 0.6780548095703125, 0.69970703125]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 2.0, 1.0, 3.0, 7.0, 4.0, 6.0, 8.0, 4.0, 4.0, 6.0, 9.0, 8.0, 9.0, 16.0, 15.0, 17.0, 23.0, 24.0, 28.0, 40.0, 58.0, 74.0, 117.0, 106.0, 80.0, 64.0, 50.0, 32.0, 37.0, 22.0, 16.0, 12.0, 11.0, 11.0, 13.0, 7.0, 8.0, 9.0, 6.0, 5.0, 5.0, 9.0, 6.0, 3.0, 5.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.032501220703125, -0.03148984909057617, -0.030478477478027344, -0.029467105865478516, -0.028455734252929688, -0.02744436264038086, -0.02643299102783203, -0.025421619415283203, -0.024410247802734375, -0.023398876190185547, -0.02238750457763672, -0.02137613296508789, -0.020364761352539062, -0.019353389739990234, -0.018342018127441406, -0.017330646514892578, -0.01631927490234375, -0.015307903289794922, -0.014296531677246094, -0.013285160064697266, -0.012273788452148438, -0.01126241683959961, -0.010251045227050781, -0.009239673614501953, -0.008228302001953125, -0.007216930389404297, -0.006205558776855469, -0.005194187164306641, -0.0041828155517578125, -0.0031714439392089844, -0.0021600723266601562, -0.0011487007141113281, -0.0001373291015625, 0.0008740425109863281, 0.0018854141235351562, 0.0028967857360839844, 0.0039081573486328125, 0.004919528961181641, 0.005930900573730469, 0.006942272186279297, 0.007953643798828125, 0.008965015411376953, 0.009976387023925781, 0.01098775863647461, 0.011999130249023438, 0.013010501861572266, 0.014021873474121094, 0.015033245086669922, 0.01604461669921875, 0.017055988311767578, 0.018067359924316406, 0.019078731536865234, 0.020090103149414062, 0.02110147476196289, 0.02211284637451172, 0.023124217987060547, 0.024135589599609375, 0.025146961212158203, 0.02615833282470703, 0.02716970443725586, 0.028181076049804688, 0.029192447662353516, 0.030203819274902344, 0.031215190887451172, 0.0322265625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 7.0, 1.0, 7.0, 3.0, 5.0, 6.0, 10.0, 8.0, 4.0, 14.0, 14.0, 18.0, 23.0, 18.0, 22.0, 29.0, 22.0, 50.0, 73.0, 154.0, 234.0, 473.0, 3969.0, 949634.0, 91753.0, 1118.0, 328.0, 191.0, 107.0, 60.0, 38.0, 22.0, 28.0, 19.0, 24.0, 15.0, 11.0, 7.0, 7.0, 9.0, 3.0, 6.0, 5.0, 5.0, 2.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.65625, -0.6362380981445312, -0.6162261962890625, -0.5962142944335938, -0.576202392578125, -0.5561904907226562, -0.5361785888671875, -0.5161666870117188, -0.49615478515625, -0.47614288330078125, -0.4561309814453125, -0.43611907958984375, -0.416107177734375, -0.39609527587890625, -0.3760833740234375, -0.35607147216796875, -0.3360595703125, -0.31604766845703125, -0.2960357666015625, -0.27602386474609375, -0.256011962890625, -0.23600006103515625, -0.2159881591796875, -0.19597625732421875, -0.17596435546875, -0.15595245361328125, -0.1359405517578125, -0.11592864990234375, -0.095916748046875, -0.07590484619140625, -0.0558929443359375, -0.03588104248046875, -0.015869140625, 0.00414276123046875, 0.0241546630859375, 0.04416656494140625, 0.064178466796875, 0.08419036865234375, 0.1042022705078125, 0.12421417236328125, 0.14422607421875, 0.16423797607421875, 0.1842498779296875, 0.20426177978515625, 0.224273681640625, 0.24428558349609375, 0.2642974853515625, 0.28430938720703125, 0.3043212890625, 0.32433319091796875, 0.3443450927734375, 0.36435699462890625, 0.384368896484375, 0.40438079833984375, 0.4243927001953125, 0.44440460205078125, 0.46441650390625, 0.48442840576171875, 0.5044403076171875, 0.5244522094726562, 0.544464111328125, 0.5644760131835938, 0.5844879150390625, 0.6044998168945312, 0.62451171875]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [956.0, 59.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.011058940552175045, 0.004542824812233448, 0.020144589245319366, 0.03574635460972786, 0.05134811997413635, 0.06694988906383514, 0.08255165070295334, 0.09815341234207153, 0.11375518143177032, 0.12935695052146912, 0.1449587047100067, 0.1605604737997055, 0.1761622428894043, 0.1917640119791031, 0.20736578106880188, 0.22296753525733948, 0.23856930434703827, 0.25417107343673706, 0.26977282762527466, 0.28537461161613464, 0.30097636580467224, 0.3165781497955322, 0.3321799039840698, 0.3477816581726074, 0.3633834421634674, 0.378985196352005, 0.394586980342865, 0.4101887345314026, 0.4257904887199402, 0.44139227271080017, 0.45699402689933777, 0.47259581089019775, 0.48819756507873535, 0.503799319267273, 0.5194010734558105, 0.5350028872489929, 0.5506046414375305, 0.5662063956260681, 0.5818081498146057, 0.5974099040031433, 0.6130117177963257, 0.6286134719848633, 0.6442152261734009, 0.6598170399665833, 0.6754187941551208, 0.6910205483436584, 0.706622302532196, 0.7222240567207336, 0.7378258109092712, 0.7534275650978088, 0.7690293192863464, 0.7846311330795288, 0.8002328872680664, 0.815834641456604, 0.8314363956451416, 0.8470381498336792, 0.8626399040222168, 0.8782416582107544, 0.893843412399292, 0.9094452261924744, 0.925046980381012, 0.9406487345695496, 0.9562504887580872, 0.9718522429466248, 0.9874540567398071]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 7.0, 10.0, 13.0, 12.0, 30.0, 34.0, 37.0, 70.0, 71.0, 76.0, 86.0, 77.0, 107.0, 74.0, 82.0, 50.0, 62.0, 33.0, 31.0, 23.0, 16.0, 4.0, 7.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05185270309448242, -0.05042874813079834, -0.04900478944182396, -0.04758083075284958, -0.0461568757891655, -0.044732920825481415, -0.043308962136507034, -0.041885003447532654, -0.04046104848384857, -0.03903709352016449, -0.03761313483119011, -0.03618917614221573, -0.03476522117853165, -0.033341266214847565, -0.031917307525873184, -0.030493350699543953, -0.02906939387321472, -0.02764543704688549, -0.02622148022055626, -0.024797523394227028, -0.023373566567897797, -0.021949609741568565, -0.020525652915239334, -0.019101696088910103, -0.01767773926258087, -0.01625378243625164, -0.014829825609922409, -0.013405868783593178, -0.011981911957263947, -0.010557955130934715, -0.009133998304605484, -0.007710041478276253, -0.0062860846519470215, -0.00486212782561779, -0.003438170999288559, -0.0020142141729593277, -0.0005902573466300964, 0.0008336994796991348, 0.002257656306028366, 0.0036816131323575974, 0.005105569958686829, 0.00652952678501606, 0.007953483611345291, 0.009377440437674522, 0.010801397264003754, 0.012225354090332985, 0.013649310916662216, 0.015073267742991447, 0.01649722456932068, 0.01792118139564991, 0.01934513822197914, 0.020769095048308372, 0.022193051874637604, 0.023617008700966835, 0.025040965527296066, 0.026464922353625298, 0.02788887917995453, 0.02931283600628376, 0.03073679283261299, 0.03216075152158737, 0.033584706485271454, 0.035008661448955536, 0.036432620137929916, 0.0378565788269043, 0.03928053379058838]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 8.0, 2.0, 4.0, 4.0, 5.0, 9.0, 13.0, 11.0, 17.0, 11.0, 20.0, 18.0, 19.0, 27.0, 21.0, 25.0, 33.0, 28.0, 32.0, 45.0, 43.0, 36.0, 30.0, 29.0, 38.0, 43.0, 36.0, 31.0, 33.0, 40.0, 34.0, 35.0, 37.0, 23.0, 30.0, 11.0, 24.0, 18.0, 17.0, 15.0, 8.0, 9.0, 8.0, 7.0, 4.0, 8.0, 5.0, 3.0, 1.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.890625, -8.606201171875, -8.32177734375, -8.037353515625, -7.7529296875, -7.468505859375, -7.18408203125, -6.899658203125, -6.615234375, -6.330810546875, -6.04638671875, -5.761962890625, -5.4775390625, -5.193115234375, -4.90869140625, -4.624267578125, -4.33984375, -4.055419921875, -3.77099609375, -3.486572265625, -3.2021484375, -2.917724609375, -2.63330078125, -2.348876953125, -2.064453125, -1.780029296875, -1.49560546875, -1.211181640625, -0.9267578125, -0.642333984375, -0.35791015625, -0.073486328125, 0.2109375, 0.495361328125, 0.77978515625, 1.064208984375, 1.3486328125, 1.633056640625, 1.91748046875, 2.201904296875, 2.486328125, 2.770751953125, 3.05517578125, 3.339599609375, 3.6240234375, 3.908447265625, 4.19287109375, 4.477294921875, 4.76171875, 5.046142578125, 5.33056640625, 5.614990234375, 5.8994140625, 6.183837890625, 6.46826171875, 6.752685546875, 7.037109375, 7.321533203125, 7.60595703125, 7.890380859375, 8.1748046875, 8.459228515625, 8.74365234375, 9.028076171875, 9.3125]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 8.0, 4.0, 4.0, 5.0, 5.0, 15.0, 19.0, 17.0, 41.0, 38.0, 75.0, 115.0, 154.0, 257.0, 420.0, 591.0, 979.0, 1581.0, 2693.0, 4632.0, 9056.0, 20927.0, 65333.0, 258723.0, 486852.0, 130674.0, 36321.0, 13544.0, 6349.0, 3494.0, 2149.0, 1278.0, 797.0, 494.0, 317.0, 178.0, 146.0, 86.0, 72.0, 35.0, 18.0, 17.0, 13.0, 11.0, 5.0, 8.0, 6.0, 3.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.40625, -8.1373291015625, -7.868408203125, -7.5994873046875, -7.33056640625, -7.0616455078125, -6.792724609375, -6.5238037109375, -6.2548828125, -5.9859619140625, -5.717041015625, -5.4481201171875, -5.17919921875, -4.9102783203125, -4.641357421875, -4.3724365234375, -4.103515625, -3.8345947265625, -3.565673828125, -3.2967529296875, -3.02783203125, -2.7589111328125, -2.489990234375, -2.2210693359375, -1.9521484375, -1.6832275390625, -1.414306640625, -1.1453857421875, -0.87646484375, -0.6075439453125, -0.338623046875, -0.0697021484375, 0.19921875, 0.4681396484375, 0.737060546875, 1.0059814453125, 1.27490234375, 1.5438232421875, 1.812744140625, 2.0816650390625, 2.3505859375, 2.6195068359375, 2.888427734375, 3.1573486328125, 3.42626953125, 3.6951904296875, 3.964111328125, 4.2330322265625, 4.501953125, 4.7708740234375, 5.039794921875, 5.3087158203125, 5.57763671875, 5.8465576171875, 6.115478515625, 6.3843994140625, 6.6533203125, 6.9222412109375, 7.191162109375, 7.4600830078125, 7.72900390625, 7.9979248046875, 8.266845703125, 8.5357666015625, 8.8046875]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 0.0, 1.0, 3.0, 5.0, 7.0, 4.0, 10.0, 18.0, 16.0, 11.0, 10.0, 18.0, 19.0, 27.0, 23.0, 33.0, 33.0, 40.0, 38.0, 49.0, 69.0, 97.0, 1618.0, 368.0, 98.0, 59.0, 53.0, 37.0, 42.0, 38.0, 33.0, 35.0, 22.0, 18.0, 27.0, 17.0, 9.0, 15.0, 8.0, 1.0, 5.0, 9.0, 2.0, 6.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-34.25, -33.1416015625, -32.033203125, -30.9248046875, -29.81640625, -28.7080078125, -27.599609375, -26.4912109375, -25.3828125, -24.2744140625, -23.166015625, -22.0576171875, -20.94921875, -19.8408203125, -18.732421875, -17.6240234375, -16.515625, -15.4072265625, -14.298828125, -13.1904296875, -12.08203125, -10.9736328125, -9.865234375, -8.7568359375, -7.6484375, -6.5400390625, -5.431640625, -4.3232421875, -3.21484375, -2.1064453125, -0.998046875, 0.1103515625, 1.21875, 2.3271484375, 3.435546875, 4.5439453125, 5.65234375, 6.7607421875, 7.869140625, 8.9775390625, 10.0859375, 11.1943359375, 12.302734375, 13.4111328125, 14.51953125, 15.6279296875, 16.736328125, 17.8447265625, 18.953125, 20.0615234375, 21.169921875, 22.2783203125, 23.38671875, 24.4951171875, 25.603515625, 26.7119140625, 27.8203125, 28.9287109375, 30.037109375, 31.1455078125, 32.25390625, 33.3623046875, 34.470703125, 35.5791015625, 36.6875]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 3.0, 8.0, 10.0, 11.0, 8.0, 25.0, 15.0, 42.0, 52.0, 79.0, 108.0, 176.0, 300.0, 509.0, 2751.0, 2857132.0, 281409.0, 1878.0, 478.0, 233.0, 154.0, 104.0, 71.0, 36.0, 26.0, 22.0, 29.0, 8.0, 8.0, 9.0, 1.0, 4.0, 4.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-89.9375, -86.4677734375, -82.998046875, -79.5283203125, -76.05859375, -72.5888671875, -69.119140625, -65.6494140625, -62.1796875, -58.7099609375, -55.240234375, -51.7705078125, -48.30078125, -44.8310546875, -41.361328125, -37.8916015625, -34.421875, -30.9521484375, -27.482421875, -24.0126953125, -20.54296875, -17.0732421875, -13.603515625, -10.1337890625, -6.6640625, -3.1943359375, 0.275390625, 3.7451171875, 7.21484375, 10.6845703125, 14.154296875, 17.6240234375, 21.09375, 24.5634765625, 28.033203125, 31.5029296875, 34.97265625, 38.4423828125, 41.912109375, 45.3818359375, 48.8515625, 52.3212890625, 55.791015625, 59.2607421875, 62.73046875, 66.2001953125, 69.669921875, 73.1396484375, 76.609375, 80.0791015625, 83.548828125, 87.0185546875, 90.48828125, 93.9580078125, 97.427734375, 100.8974609375, 104.3671875, 107.8369140625, 111.306640625, 114.7763671875, 118.24609375, 121.7158203125, 125.185546875, 128.6552734375, 132.125]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 3.0, 11.0, 34.0, 89.0, 165.0, 237.0, 210.0, 146.0, 81.0, 28.0, 14.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.183643341064453, -10.430811882019043, -8.677980422973633, -6.925148010253906, -5.172316551208496, -3.419485092163086, -1.6666526794433594, 0.08617877960205078, 1.839010238647461, 3.59184193611145, 5.3446736335754395, 7.097505569458008, 8.850337028503418, 10.603168487548828, 12.356000900268555, 14.108832359313965, 15.861663818359375, 17.6144962310791, 19.367326736450195, 21.120159149169922, 22.872989654541016, 24.625822067260742, 26.37865447998047, 28.131484985351562, 29.88431739807129, 31.637149810791016, 33.38998031616211, 35.14281463623047, 36.89564514160156, 38.648475646972656, 40.40130615234375, 42.15414047241211, 43.90696716308594, 45.65979766845703, 47.41263198852539, 49.165462493896484, 50.91829299926758, 52.67112731933594, 54.42395782470703, 56.176788330078125, 57.92961883544922, 59.68244934082031, 61.43528366088867, 63.188114166259766, 64.94094848632812, 66.69377899169922, 68.44660949707031, 70.1994400024414, 71.9522705078125, 73.7051010131836, 75.45793151855469, 77.21076965332031, 78.9636001586914, 80.7164306640625, 82.4692611694336, 84.22209167480469, 85.97492980957031, 87.7277603149414, 89.4805908203125, 91.23342895507812, 92.98625946044922, 94.73908996582031, 96.4919204711914, 98.2447509765625, 99.9975814819336]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 5.0, 1.0, 3.0, 2.0, 1.0, 5.0, 3.0, 6.0, 9.0, 10.0, 10.0, 13.0, 18.0, 24.0, 14.0, 20.0, 27.0, 22.0, 27.0, 31.0, 24.0, 32.0, 48.0, 32.0, 51.0, 41.0, 44.0, 37.0, 46.0, 42.0, 53.0, 36.0, 37.0, 19.0, 34.0, 24.0, 20.0, 30.0, 20.0, 11.0, 18.0, 16.0, 13.0, 6.0, 5.0, 6.0, 3.0, 0.0, 3.0, 2.0, 2.0, 1.0, 4.0, 4.0, 1.0, 0.0, 0.0, 2.0], "bins": [-84.25794219970703, -81.59794616699219, -78.93795013427734, -76.2779541015625, -73.61795806884766, -70.95796203613281, -68.29796600341797, -65.63796997070312, -62.97797393798828, -60.31797790527344, -57.657981872558594, -54.99798583984375, -52.337989807128906, -49.67799377441406, -47.01799774169922, -44.358001708984375, -41.698001861572266, -39.03800582885742, -36.37800979614258, -33.718013763427734, -31.05801773071289, -28.398021697998047, -25.73802375793457, -23.078027725219727, -20.418031692504883, -17.75803565979004, -15.098039627075195, -12.438042640686035, -9.778046607971191, -7.118050575256348, -4.4580535888671875, -1.7980575561523438, 0.8619384765625, 3.521934747695923, 6.181931018829346, 8.841927528381348, 11.501923561096191, 14.161919593811035, 16.821916580200195, 19.48191261291504, 22.141908645629883, 24.801904678344727, 27.46190071105957, 30.121898651123047, 32.78189468383789, 35.441890716552734, 38.10188674926758, 40.76188278198242, 43.421878814697266, 46.08187484741211, 48.74187088012695, 51.4018669128418, 54.06186294555664, 56.721858978271484, 59.381858825683594, 62.04185485839844, 64.70185089111328, 67.36184692382812, 70.02184295654297, 72.68183898925781, 75.34183502197266, 78.0018310546875, 80.66182708740234, 83.32182312011719, 85.98181915283203]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 5.0, 3.0, 4.0, 5.0, 7.0, 3.0, 9.0, 7.0, 11.0, 17.0, 8.0, 13.0, 17.0, 28.0, 20.0, 26.0, 25.0, 35.0, 38.0, 24.0, 36.0, 50.0, 32.0, 40.0, 36.0, 42.0, 36.0, 33.0, 32.0, 28.0, 47.0, 44.0, 26.0, 34.0, 21.0, 21.0, 24.0, 25.0, 22.0, 16.0, 14.0, 10.0, 5.0, 10.0, 7.0, 6.0, 2.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-8.8671875, -8.5640869140625, -8.260986328125, -7.9578857421875, -7.65478515625, -7.3516845703125, -7.048583984375, -6.7454833984375, -6.4423828125, -6.1392822265625, -5.836181640625, -5.5330810546875, -5.22998046875, -4.9268798828125, -4.623779296875, -4.3206787109375, -4.017578125, -3.7144775390625, -3.411376953125, -3.1082763671875, -2.80517578125, -2.5020751953125, -2.198974609375, -1.8958740234375, -1.5927734375, -1.2896728515625, -0.986572265625, -0.6834716796875, -0.38037109375, -0.0772705078125, 0.225830078125, 0.5289306640625, 0.83203125, 1.1351318359375, 1.438232421875, 1.7413330078125, 2.04443359375, 2.3475341796875, 2.650634765625, 2.9537353515625, 3.2568359375, 3.5599365234375, 3.863037109375, 4.1661376953125, 4.46923828125, 4.7723388671875, 5.075439453125, 5.3785400390625, 5.681640625, 5.9847412109375, 6.287841796875, 6.5909423828125, 6.89404296875, 7.1971435546875, 7.500244140625, 7.8033447265625, 8.1064453125, 8.4095458984375, 8.712646484375, 9.0157470703125, 9.31884765625, 9.6219482421875, 9.925048828125, 10.2281494140625, 10.53125]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 1.0, 0.0, 3.0, 2.0, 11.0, 4.0, 16.0, 11.0, 13.0, 12.0, 15.0, 27.0, 21.0, 35.0, 41.0, 58.0, 64.0, 85.0, 140.0, 220.0, 381.0, 840.0, 2034.0, 6350.0, 27296.0, 178546.0, 1312902.0, 2140453.0, 449780.0, 57232.0, 11738.0, 3190.0, 1240.0, 581.0, 290.0, 152.0, 118.0, 78.0, 59.0, 64.0, 38.0, 36.0, 28.0, 15.0, 18.0, 15.0, 5.0, 4.0, 10.0, 7.0, 3.0, 2.0, 1.0, 5.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-19.3125, -18.69384765625, -18.0751953125, -17.45654296875, -16.837890625, -16.21923828125, -15.6005859375, -14.98193359375, -14.36328125, -13.74462890625, -13.1259765625, -12.50732421875, -11.888671875, -11.27001953125, -10.6513671875, -10.03271484375, -9.4140625, -8.79541015625, -8.1767578125, -7.55810546875, -6.939453125, -6.32080078125, -5.7021484375, -5.08349609375, -4.46484375, -3.84619140625, -3.2275390625, -2.60888671875, -1.990234375, -1.37158203125, -0.7529296875, -0.13427734375, 0.484375, 1.10302734375, 1.7216796875, 2.34033203125, 2.958984375, 3.57763671875, 4.1962890625, 4.81494140625, 5.43359375, 6.05224609375, 6.6708984375, 7.28955078125, 7.908203125, 8.52685546875, 9.1455078125, 9.76416015625, 10.3828125, 11.00146484375, 11.6201171875, 12.23876953125, 12.857421875, 13.47607421875, 14.0947265625, 14.71337890625, 15.33203125, 15.95068359375, 16.5693359375, 17.18798828125, 17.806640625, 18.42529296875, 19.0439453125, 19.66259765625, 20.28125]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 1.0, 5.0, 9.0, 15.0, 15.0, 17.0, 19.0, 28.0, 37.0, 53.0, 83.0, 99.0, 135.0, 192.0, 259.0, 315.0, 368.0, 454.0, 471.0, 375.0, 288.0, 230.0, 146.0, 132.0, 83.0, 61.0, 50.0, 36.0, 26.0, 15.0, 13.0, 12.0, 9.0, 8.0, 4.0, 6.0, 5.0, 0.0, 2.0, 3.0, 2.0, 2.0], "bins": [-23.015625, -22.43701171875, -21.8583984375, -21.27978515625, -20.701171875, -20.12255859375, -19.5439453125, -18.96533203125, -18.38671875, -17.80810546875, -17.2294921875, -16.65087890625, -16.072265625, -15.49365234375, -14.9150390625, -14.33642578125, -13.7578125, -13.17919921875, -12.6005859375, -12.02197265625, -11.443359375, -10.86474609375, -10.2861328125, -9.70751953125, -9.12890625, -8.55029296875, -7.9716796875, -7.39306640625, -6.814453125, -6.23583984375, -5.6572265625, -5.07861328125, -4.5, -3.92138671875, -3.3427734375, -2.76416015625, -2.185546875, -1.60693359375, -1.0283203125, -0.44970703125, 0.12890625, 0.70751953125, 1.2861328125, 1.86474609375, 2.443359375, 3.02197265625, 3.6005859375, 4.17919921875, 4.7578125, 5.33642578125, 5.9150390625, 6.49365234375, 7.072265625, 7.65087890625, 8.2294921875, 8.80810546875, 9.38671875, 9.96533203125, 10.5439453125, 11.12255859375, 11.701171875, 12.27978515625, 12.8583984375, 13.43701171875, 14.015625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 3.0, 0.0, 5.0, 5.0, 9.0, 5.0, 11.0, 16.0, 12.0, 17.0, 36.0, 48.0, 55.0, 69.0, 123.0, 172.0, 236.0, 379.0, 832.0, 5939.0, 882069.0, 3284911.0, 16780.0, 1138.0, 490.0, 261.0, 191.0, 135.0, 105.0, 72.0, 48.0, 29.0, 23.0, 19.0, 13.0, 13.0, 8.0, 7.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.6875, -63.07421875, -60.4609375, -57.84765625, -55.234375, -52.62109375, -50.0078125, -47.39453125, -44.78125, -42.16796875, -39.5546875, -36.94140625, -34.328125, -31.71484375, -29.1015625, -26.48828125, -23.875, -21.26171875, -18.6484375, -16.03515625, -13.421875, -10.80859375, -8.1953125, -5.58203125, -2.96875, -0.35546875, 2.2578125, 4.87109375, 7.484375, 10.09765625, 12.7109375, 15.32421875, 17.9375, 20.55078125, 23.1640625, 25.77734375, 28.390625, 31.00390625, 33.6171875, 36.23046875, 38.84375, 41.45703125, 44.0703125, 46.68359375, 49.296875, 51.91015625, 54.5234375, 57.13671875, 59.75, 62.36328125, 64.9765625, 67.58984375, 70.203125, 72.81640625, 75.4296875, 78.04296875, 80.65625, 83.26953125, 85.8828125, 88.49609375, 91.109375, 93.72265625, 96.3359375, 98.94921875, 101.5625]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 6.0, 29.0, 59.0, 104.0, 152.0, 194.0, 168.0, 143.0, 83.0, 41.0, 17.0, 14.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-109.11027526855469, -104.53605651855469, -99.96183776855469, -95.38761901855469, -90.81340026855469, -86.23918151855469, -81.66495513916016, -77.09073638916016, -72.51651763916016, -67.94229888916016, -63.368080139160156, -58.79385757446289, -54.21963882446289, -49.64542007446289, -45.071197509765625, -40.496978759765625, -35.922760009765625, -31.348541259765625, -26.774320602416992, -22.20009994506836, -17.62588119506836, -13.05166244506836, -8.477441787719727, -3.9032211303710938, 0.6709976196289062, 5.245217323303223, 9.819437026977539, 14.393656730651855, 18.967876434326172, 23.542095184326172, 28.116315841674805, 32.69053649902344, 37.2647705078125, 41.8389892578125, 46.4132080078125, 50.987430572509766, 55.561649322509766, 60.135868072509766, 64.71009063720703, 69.28430938720703, 73.85852813720703, 78.43274688720703, 83.00696563720703, 87.58118438720703, 92.15541076660156, 96.72962951660156, 101.30384826660156, 105.87806701660156, 110.45228576660156, 115.02650451660156, 119.60072326660156, 124.17494201660156, 128.74916076660156, 133.32337951660156, 137.89759826660156, 142.47183227539062, 147.04605102539062, 151.62026977539062, 156.19448852539062, 160.76870727539062, 165.34292602539062, 169.91714477539062, 174.49136352539062, 179.06558227539062, 183.63980102539062]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 6.0, 4.0, 10.0, 9.0, 15.0, 13.0, 14.0, 20.0, 21.0, 23.0, 32.0, 21.0, 26.0, 31.0, 30.0, 26.0, 43.0, 28.0, 38.0, 36.0, 44.0, 50.0, 48.0, 55.0, 51.0, 33.0, 44.0, 34.0, 25.0, 38.0, 27.0, 19.0, 21.0, 13.0, 12.0, 11.0, 11.0, 7.0, 5.0, 4.0, 4.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.7603759765625, -62.68702697753906, -60.613677978515625, -58.54032516479492, -56.466976165771484, -54.39362716674805, -52.320274353027344, -50.246925354003906, -48.17357635498047, -46.10022735595703, -44.026878356933594, -41.95352554321289, -39.88017654418945, -37.806827545166016, -35.73347473144531, -33.660125732421875, -31.586776733398438, -29.513427734375, -27.44007682800293, -25.36672592163086, -23.293376922607422, -21.220027923583984, -19.146677017211914, -17.073326110839844, -14.999977111816406, -12.926627159118652, -10.853277206420898, -8.779927253723145, -6.706577301025391, -4.633227348327637, -2.559877395629883, -0.4865274429321289, 1.586822509765625, 3.660172462463379, 5.733522415161133, 7.806872367858887, 9.88022232055664, 11.953572273254395, 14.026922225952148, 16.10027313232422, 18.173622131347656, 20.246971130371094, 22.320322036743164, 24.393672943115234, 26.467021942138672, 28.54037094116211, 30.61372184753418, 32.68707275390625, 34.76042175292969, 36.833770751953125, 38.90711975097656, 40.980472564697266, 43.0538215637207, 45.12717056274414, 47.200523376464844, 49.27387237548828, 51.34722137451172, 53.420570373535156, 55.493919372558594, 57.5672721862793, 59.640621185302734, 61.71397018432617, 63.787322998046875, 65.86067199707031, 67.93402099609375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 9.0, 4.0, 10.0, 14.0, 10.0, 12.0, 13.0, 15.0, 19.0, 23.0, 24.0, 22.0, 23.0, 27.0, 31.0, 34.0, 34.0, 41.0, 31.0, 36.0, 36.0, 33.0, 31.0, 22.0, 47.0, 45.0, 34.0, 21.0, 35.0, 24.0, 31.0, 31.0, 21.0, 22.0, 30.0, 12.0, 10.0, 19.0, 16.0, 8.0, 12.0, 6.0, 6.0, 3.0, 3.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-8.8984375, -8.6199951171875, -8.341552734375, -8.0631103515625, -7.78466796875, -7.5062255859375, -7.227783203125, -6.9493408203125, -6.6708984375, -6.3924560546875, -6.114013671875, -5.8355712890625, -5.55712890625, -5.2786865234375, -5.000244140625, -4.7218017578125, -4.443359375, -4.1649169921875, -3.886474609375, -3.6080322265625, -3.32958984375, -3.0511474609375, -2.772705078125, -2.4942626953125, -2.2158203125, -1.9373779296875, -1.658935546875, -1.3804931640625, -1.10205078125, -0.8236083984375, -0.545166015625, -0.2667236328125, 0.01171875, 0.2901611328125, 0.568603515625, 0.8470458984375, 1.12548828125, 1.4039306640625, 1.682373046875, 1.9608154296875, 2.2392578125, 2.5177001953125, 2.796142578125, 3.0745849609375, 3.35302734375, 3.6314697265625, 3.909912109375, 4.1883544921875, 4.466796875, 4.7452392578125, 5.023681640625, 5.3021240234375, 5.58056640625, 5.8590087890625, 6.137451171875, 6.4158935546875, 6.6943359375, 6.9727783203125, 7.251220703125, 7.5296630859375, 7.80810546875, 8.0865478515625, 8.364990234375, 8.6434326171875, 8.921875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 3.0, 0.0, 10.0, 4.0, 11.0, 22.0, 24.0, 51.0, 58.0, 97.0, 158.0, 238.0, 364.0, 540.0, 767.0, 1109.0, 1760.0, 2628.0, 3951.0, 5874.0, 9026.0, 13912.0, 21021.0, 32258.0, 49882.0, 76009.0, 112694.0, 152450.0, 164224.0, 132193.0, 91781.0, 60455.0, 39663.0, 25960.0, 16782.0, 10970.0, 7310.0, 4851.0, 3201.0, 2065.0, 1430.0, 933.0, 646.0, 413.0, 254.0, 161.0, 120.0, 77.0, 56.0, 35.0, 28.0, 11.0, 9.0, 6.0, 5.0, 5.0, 2.0, 0.0, 1.0, 3.0], "bins": [-0.74169921875, -0.71868896484375, -0.6956787109375, -0.67266845703125, -0.649658203125, -0.62664794921875, -0.6036376953125, -0.58062744140625, -0.5576171875, -0.53460693359375, -0.5115966796875, -0.48858642578125, -0.465576171875, -0.44256591796875, -0.4195556640625, -0.39654541015625, -0.37353515625, -0.35052490234375, -0.3275146484375, -0.30450439453125, -0.281494140625, -0.25848388671875, -0.2354736328125, -0.21246337890625, -0.189453125, -0.16644287109375, -0.1434326171875, -0.12042236328125, -0.097412109375, -0.07440185546875, -0.0513916015625, -0.02838134765625, -0.00537109375, 0.01763916015625, 0.0406494140625, 0.06365966796875, 0.086669921875, 0.10968017578125, 0.1326904296875, 0.15570068359375, 0.1787109375, 0.20172119140625, 0.2247314453125, 0.24774169921875, 0.270751953125, 0.29376220703125, 0.3167724609375, 0.33978271484375, 0.36279296875, 0.38580322265625, 0.4088134765625, 0.43182373046875, 0.454833984375, 0.47784423828125, 0.5008544921875, 0.52386474609375, 0.546875, 0.56988525390625, 0.5928955078125, 0.61590576171875, 0.638916015625, 0.66192626953125, 0.6849365234375, 0.70794677734375, 0.73095703125]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 5.0, 0.0, 5.0, 2.0, 6.0, 6.0, 9.0, 9.0, 13.0, 12.0, 12.0, 22.0, 26.0, 28.0, 25.0, 23.0, 37.0, 33.0, 34.0, 42.0, 43.0, 42.0, 1071.0, 43.0, 33.0, 59.0, 34.0, 37.0, 33.0, 43.0, 35.0, 25.0, 27.0, 33.0, 29.0, 14.0, 15.0, 18.0, 15.0, 7.0, 7.0, 7.0, 1.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.9609375, -6.74285888671875, -6.5247802734375, -6.30670166015625, -6.088623046875, -5.87054443359375, -5.6524658203125, -5.43438720703125, -5.21630859375, -4.99822998046875, -4.7801513671875, -4.56207275390625, -4.343994140625, -4.12591552734375, -3.9078369140625, -3.68975830078125, -3.4716796875, -3.25360107421875, -3.0355224609375, -2.81744384765625, -2.599365234375, -2.38128662109375, -2.1632080078125, -1.94512939453125, -1.72705078125, -1.50897216796875, -1.2908935546875, -1.07281494140625, -0.854736328125, -0.63665771484375, -0.4185791015625, -0.20050048828125, 0.017578125, 0.23565673828125, 0.4537353515625, 0.67181396484375, 0.889892578125, 1.10797119140625, 1.3260498046875, 1.54412841796875, 1.76220703125, 1.98028564453125, 2.1983642578125, 2.41644287109375, 2.634521484375, 2.85260009765625, 3.0706787109375, 3.28875732421875, 3.5068359375, 3.72491455078125, 3.9429931640625, 4.16107177734375, 4.379150390625, 4.59722900390625, 4.8153076171875, 5.03338623046875, 5.25146484375, 5.46954345703125, 5.6876220703125, 5.90570068359375, 6.123779296875, 6.34185791015625, 6.5599365234375, 6.77801513671875, 6.99609375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 4.0, 8.0, 10.0, 13.0, 26.0, 24.0, 37.0, 63.0, 85.0, 135.0, 179.0, 286.0, 486.0, 647.0, 1012.0, 1501.0, 2201.0, 3396.0, 5180.0, 7823.0, 12285.0, 18631.0, 28664.0, 43884.0, 65643.0, 95401.0, 134552.0, 1119125.0, 207778.0, 112490.0, 79539.0, 53658.0, 35748.0, 22934.0, 15043.0, 9699.0, 6421.0, 4227.0, 2808.0, 1852.0, 1231.0, 778.0, 524.0, 342.0, 257.0, 167.0, 107.0, 65.0, 56.0, 40.0, 31.0, 18.0, 10.0, 8.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.50244140625, -0.4866180419921875, -0.470794677734375, -0.4549713134765625, -0.43914794921875, -0.4233245849609375, -0.407501220703125, -0.3916778564453125, -0.3758544921875, -0.3600311279296875, -0.344207763671875, -0.3283843994140625, -0.31256103515625, -0.2967376708984375, -0.280914306640625, -0.2650909423828125, -0.249267578125, -0.2334442138671875, -0.217620849609375, -0.2017974853515625, -0.18597412109375, -0.1701507568359375, -0.154327392578125, -0.1385040283203125, -0.1226806640625, -0.1068572998046875, -0.091033935546875, -0.0752105712890625, -0.05938720703125, -0.0435638427734375, -0.027740478515625, -0.0119171142578125, 0.00390625, 0.0197296142578125, 0.035552978515625, 0.0513763427734375, 0.06719970703125, 0.0830230712890625, 0.098846435546875, 0.1146697998046875, 0.1304931640625, 0.1463165283203125, 0.162139892578125, 0.1779632568359375, 0.19378662109375, 0.2096099853515625, 0.225433349609375, 0.2412567138671875, 0.257080078125, 0.2729034423828125, 0.288726806640625, 0.3045501708984375, 0.32037353515625, 0.3361968994140625, 0.352020263671875, 0.3678436279296875, 0.3836669921875, 0.3994903564453125, 0.415313720703125, 0.4311370849609375, 0.44696044921875, 0.4627838134765625, 0.478607177734375, 0.4944305419921875, 0.51025390625]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 5.0, 6.0, 6.0, 7.0, 9.0, 3.0, 7.0, 9.0, 13.0, 14.0, 11.0, 12.0, 20.0, 18.0, 26.0, 48.0, 77.0, 86.0, 126.0, 126.0, 98.0, 75.0, 37.0, 22.0, 24.0, 15.0, 13.0, 11.0, 10.0, 15.0, 10.0, 9.0, 4.0, 3.0, 5.0, 9.0, 7.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0281219482421875, -0.02724146842956543, -0.02636098861694336, -0.02548050880432129, -0.02460002899169922, -0.02371954917907715, -0.022839069366455078, -0.021958589553833008, -0.021078109741210938, -0.020197629928588867, -0.019317150115966797, -0.018436670303344727, -0.017556190490722656, -0.016675710678100586, -0.015795230865478516, -0.014914751052856445, -0.014034271240234375, -0.013153791427612305, -0.012273311614990234, -0.011392831802368164, -0.010512351989746094, -0.009631872177124023, -0.008751392364501953, -0.007870912551879883, -0.0069904327392578125, -0.006109952926635742, -0.005229473114013672, -0.0043489933013916016, -0.0034685134887695312, -0.002588033676147461, -0.0017075538635253906, -0.0008270740509033203, 5.340576171875e-05, 0.0009338855743408203, 0.0018143653869628906, 0.002694845199584961, 0.0035753250122070312, 0.0044558048248291016, 0.005336284637451172, 0.006216764450073242, 0.0070972442626953125, 0.007977724075317383, 0.008858203887939453, 0.009738683700561523, 0.010619163513183594, 0.011499643325805664, 0.012380123138427734, 0.013260602951049805, 0.014141082763671875, 0.015021562576293945, 0.015902042388916016, 0.016782522201538086, 0.017663002014160156, 0.018543481826782227, 0.019423961639404297, 0.020304441452026367, 0.021184921264648438, 0.022065401077270508, 0.022945880889892578, 0.02382636070251465, 0.02470684051513672, 0.02558732032775879, 0.02646780014038086, 0.02734827995300293, 0.028228759765625]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 9.0, 10.0, 4.0, 7.0, 7.0, 10.0, 15.0, 20.0, 21.0, 20.0, 30.0, 37.0, 48.0, 61.0, 143.0, 330.0, 1363.0, 522036.0, 522214.0, 1362.0, 337.0, 165.0, 62.0, 42.0, 39.0, 25.0, 20.0, 22.0, 24.0, 16.0, 10.0, 4.0, 12.0, 4.0, 12.0, 4.0, 6.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.583984375, -0.56573486328125, -0.5474853515625, -0.52923583984375, -0.510986328125, -0.49273681640625, -0.4744873046875, -0.45623779296875, -0.43798828125, -0.41973876953125, -0.4014892578125, -0.38323974609375, -0.364990234375, -0.34674072265625, -0.3284912109375, -0.31024169921875, -0.2919921875, -0.27374267578125, -0.2554931640625, -0.23724365234375, -0.218994140625, -0.20074462890625, -0.1824951171875, -0.16424560546875, -0.14599609375, -0.12774658203125, -0.1094970703125, -0.09124755859375, -0.072998046875, -0.05474853515625, -0.0364990234375, -0.01824951171875, 0.0, 0.01824951171875, 0.0364990234375, 0.05474853515625, 0.072998046875, 0.09124755859375, 0.1094970703125, 0.12774658203125, 0.14599609375, 0.16424560546875, 0.1824951171875, 0.20074462890625, 0.218994140625, 0.23724365234375, 0.2554931640625, 0.27374267578125, 0.2919921875, 0.31024169921875, 0.3284912109375, 0.34674072265625, 0.364990234375, 0.38323974609375, 0.4014892578125, 0.41973876953125, 0.43798828125, 0.45623779296875, 0.4744873046875, 0.49273681640625, 0.510986328125, 0.52923583984375, 0.5474853515625, 0.56573486328125, 0.583984375]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 376.0, 638.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0387532003223896, -0.026014698669314384, -0.013276197016239166, -0.0005376972258090973, 0.01220080628991127, 0.024939309805631638, 0.03767780587077141, 0.050416309386491776, 0.06315481662750244, 0.07589332014322281, 0.08863182365894318, 0.10137031972408295, 0.11410882323980331, 0.12684732675552368, 0.13958582282066345, 0.15232431888580322, 0.1650628298521042, 0.17780132591724396, 0.19053983688354492, 0.2032783329486847, 0.21601682901382446, 0.22875533998012543, 0.2414938360452652, 0.25423234701156616, 0.26697084307670593, 0.2797093391418457, 0.2924478352069855, 0.30518633127212524, 0.3179248571395874, 0.3306633532047272, 0.34340184926986694, 0.3561403453350067, 0.3688788414001465, 0.38161733746528625, 0.394355833530426, 0.4070943593978882, 0.41983285546302795, 0.4325713515281677, 0.4453098475933075, 0.45804834365844727, 0.4707868695259094, 0.4835253655910492, 0.49626386165618896, 0.5090023875236511, 0.5217408537864685, 0.5344793796539307, 0.547217845916748, 0.5599563717842102, 0.5726948976516724, 0.5854334235191345, 0.5981718897819519, 0.6109104156494141, 0.6236488819122314, 0.6363874077796936, 0.6491259336471558, 0.6618643999099731, 0.6746028661727905, 0.6873413920402527, 0.7000798583030701, 0.7128183841705322, 0.7255568504333496, 0.7382953763008118, 0.7510339021682739, 0.7637723684310913, 0.7765108942985535]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 6.0, 16.0, 27.0, 28.0, 36.0, 53.0, 54.0, 79.0, 68.0, 78.0, 81.0, 99.0, 86.0, 75.0, 64.0, 50.0, 34.0, 23.0, 23.0, 13.0, 4.0, 6.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.037107884883880615, -0.03602931648492813, -0.03495074436068535, -0.033872172236442566, -0.03279360383749008, -0.0317150354385376, -0.030636463314294815, -0.02955789305269718, -0.02847932279109955, -0.027400752529501915, -0.02632218226790428, -0.025243612006306648, -0.024165041744709015, -0.02308647148311138, -0.022007901221513748, -0.020929330959916115, -0.01985076069831848, -0.018772190436720848, -0.017693620175123215, -0.01661504991352558, -0.015536479651927948, -0.014457909390330315, -0.013379339128732681, -0.012300768867135048, -0.011222198605537415, -0.010143628343939781, -0.009065058082342148, -0.007986487820744514, -0.006907917559146881, -0.005829347297549248, -0.004750777035951614, -0.003672206774353981, -0.0025936365127563477, -0.0015150662511587143, -0.00043649598956108093, 0.0006420742720365524, 0.0017206445336341858, 0.002799214795231819, 0.0038777850568294525, 0.004956355318427086, 0.006034925580024719, 0.007113495841622353, 0.008192066103219986, 0.00927063636481762, 0.010349206626415253, 0.011427776888012886, 0.01250634714961052, 0.013584917411208153, 0.014663487672805786, 0.01574205793440342, 0.016820628196001053, 0.017899198457598686, 0.01897776871919632, 0.020056338980793953, 0.021134909242391586, 0.02221347950398922, 0.023292049765586853, 0.024370620027184486, 0.02544919028878212, 0.026527760550379753, 0.027606330811977386, 0.02868490107357502, 0.029763471335172653, 0.030842041596770287, 0.03192061185836792]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 9.0, 4.0, 10.0, 14.0, 10.0, 12.0, 13.0, 15.0, 19.0, 23.0, 24.0, 22.0, 23.0, 27.0, 31.0, 33.0, 35.0, 41.0, 31.0, 35.0, 37.0, 32.0, 32.0, 22.0, 47.0, 45.0, 34.0, 21.0, 35.0, 24.0, 31.0, 31.0, 21.0, 22.0, 30.0, 12.0, 10.0, 19.0, 16.0, 8.0, 12.0, 6.0, 6.0, 3.0, 3.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-8.8984375, -8.6199951171875, -8.341552734375, -8.0631103515625, -7.78466796875, -7.5062255859375, -7.227783203125, -6.9493408203125, -6.6708984375, -6.3924560546875, -6.114013671875, -5.8355712890625, -5.55712890625, -5.2786865234375, -5.000244140625, -4.7218017578125, -4.443359375, -4.1649169921875, -3.886474609375, -3.6080322265625, -3.32958984375, -3.0511474609375, -2.772705078125, -2.4942626953125, -2.2158203125, -1.9373779296875, -1.658935546875, -1.3804931640625, -1.10205078125, -0.8236083984375, -0.545166015625, -0.2667236328125, 0.01171875, 0.2901611328125, 0.568603515625, 0.8470458984375, 1.12548828125, 1.4039306640625, 1.682373046875, 1.9608154296875, 2.2392578125, 2.5177001953125, 2.796142578125, 3.0745849609375, 3.35302734375, 3.6314697265625, 3.909912109375, 4.1883544921875, 4.466796875, 4.7452392578125, 5.023681640625, 5.3021240234375, 5.58056640625, 5.8590087890625, 6.137451171875, 6.4158935546875, 6.6943359375, 6.9727783203125, 7.251220703125, 7.5296630859375, 7.80810546875, 8.0865478515625, 8.364990234375, 8.6434326171875, 8.921875]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 8.0, 8.0, 11.0, 19.0, 18.0, 25.0, 46.0, 59.0, 71.0, 120.0, 155.0, 204.0, 307.0, 493.0, 702.0, 1054.0, 1609.0, 2615.0, 4276.0, 7369.0, 12633.0, 23499.0, 49477.0, 122754.0, 309119.0, 293311.0, 117159.0, 47653.0, 23125.0, 12282.0, 6931.0, 4025.0, 2558.0, 1580.0, 1074.0, 693.0, 460.0, 323.0, 219.0, 148.0, 116.0, 81.0, 57.0, 37.0, 25.0, 15.0, 11.0, 6.0, 4.0, 9.0, 4.0, 6.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.3515625, -7.12164306640625, -6.8917236328125, -6.66180419921875, -6.431884765625, -6.20196533203125, -5.9720458984375, -5.74212646484375, -5.51220703125, -5.28228759765625, -5.0523681640625, -4.82244873046875, -4.592529296875, -4.36260986328125, -4.1326904296875, -3.90277099609375, -3.6728515625, -3.44293212890625, -3.2130126953125, -2.98309326171875, -2.753173828125, -2.52325439453125, -2.2933349609375, -2.06341552734375, -1.83349609375, -1.60357666015625, -1.3736572265625, -1.14373779296875, -0.913818359375, -0.68389892578125, -0.4539794921875, -0.22406005859375, 0.005859375, 0.23577880859375, 0.4656982421875, 0.69561767578125, 0.925537109375, 1.15545654296875, 1.3853759765625, 1.61529541015625, 1.84521484375, 2.07513427734375, 2.3050537109375, 2.53497314453125, 2.764892578125, 2.99481201171875, 3.2247314453125, 3.45465087890625, 3.6845703125, 3.91448974609375, 4.1444091796875, 4.37432861328125, 4.604248046875, 4.83416748046875, 5.0640869140625, 5.29400634765625, 5.52392578125, 5.75384521484375, 5.9837646484375, 6.21368408203125, 6.443603515625, 6.67352294921875, 6.9034423828125, 7.13336181640625, 7.36328125]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 2.0, 3.0, 5.0, 6.0, 7.0, 12.0, 13.0, 14.0, 20.0, 14.0, 19.0, 26.0, 29.0, 37.0, 39.0, 59.0, 42.0, 64.0, 88.0, 301.0, 1640.0, 131.0, 74.0, 56.0, 49.0, 48.0, 41.0, 30.0, 35.0, 24.0, 19.0, 18.0, 14.0, 19.0, 15.0, 9.0, 7.0, 6.0, 5.0, 6.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-40.0625, -38.8583984375, -37.654296875, -36.4501953125, -35.24609375, -34.0419921875, -32.837890625, -31.6337890625, -30.4296875, -29.2255859375, -28.021484375, -26.8173828125, -25.61328125, -24.4091796875, -23.205078125, -22.0009765625, -20.796875, -19.5927734375, -18.388671875, -17.1845703125, -15.98046875, -14.7763671875, -13.572265625, -12.3681640625, -11.1640625, -9.9599609375, -8.755859375, -7.5517578125, -6.34765625, -5.1435546875, -3.939453125, -2.7353515625, -1.53125, -0.3271484375, 0.876953125, 2.0810546875, 3.28515625, 4.4892578125, 5.693359375, 6.8974609375, 8.1015625, 9.3056640625, 10.509765625, 11.7138671875, 12.91796875, 14.1220703125, 15.326171875, 16.5302734375, 17.734375, 18.9384765625, 20.142578125, 21.3466796875, 22.55078125, 23.7548828125, 24.958984375, 26.1630859375, 27.3671875, 28.5712890625, 29.775390625, 30.9794921875, 32.18359375, 33.3876953125, 34.591796875, 35.7958984375, 37.0]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 0.0, 3.0, 0.0, 4.0, 3.0, 5.0, 10.0, 6.0, 14.0, 26.0, 23.0, 24.0, 24.0, 32.0, 50.0, 63.0, 108.0, 132.0, 162.0, 216.0, 341.0, 943.0, 10122.0, 3028891.0, 101140.0, 1921.0, 476.0, 265.0, 187.0, 131.0, 88.0, 70.0, 51.0, 32.0, 34.0, 20.0, 29.0, 20.0, 17.0, 8.0, 7.0, 6.0, 1.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-87.875, -85.0146484375, -82.154296875, -79.2939453125, -76.43359375, -73.5732421875, -70.712890625, -67.8525390625, -64.9921875, -62.1318359375, -59.271484375, -56.4111328125, -53.55078125, -50.6904296875, -47.830078125, -44.9697265625, -42.109375, -39.2490234375, -36.388671875, -33.5283203125, -30.66796875, -27.8076171875, -24.947265625, -22.0869140625, -19.2265625, -16.3662109375, -13.505859375, -10.6455078125, -7.78515625, -4.9248046875, -2.064453125, 0.7958984375, 3.65625, 6.5166015625, 9.376953125, 12.2373046875, 15.09765625, 17.9580078125, 20.818359375, 23.6787109375, 26.5390625, 29.3994140625, 32.259765625, 35.1201171875, 37.98046875, 40.8408203125, 43.701171875, 46.5615234375, 49.421875, 52.2822265625, 55.142578125, 58.0029296875, 60.86328125, 63.7236328125, 66.583984375, 69.4443359375, 72.3046875, 75.1650390625, 78.025390625, 80.8857421875, 83.74609375, 86.6064453125, 89.466796875, 92.3271484375, 95.1875]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [17.0, 405.0, 564.0, 32.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.939311981201172, -9.50727653503418, -1.0752410888671875, 7.356794357299805, 15.788829803466797, 24.220867156982422, 32.65290069580078, 41.08493423461914, 49.516971588134766, 57.949005126953125, 66.38104248046875, 74.81307983398438, 83.2451171875, 91.67715454101562, 100.10918426513672, 108.54121398925781, 116.97325134277344, 125.40528869628906, 133.8373260498047, 142.2693634033203, 150.70138549804688, 159.1334228515625, 167.56546020507812, 175.99749755859375, 184.42953491210938, 192.861572265625, 201.29360961914062, 209.72564697265625, 218.15768432617188, 226.5897216796875, 235.02174377441406, 243.4537811279297, 251.88580322265625, 260.3178405761719, 268.7498779296875, 277.1819152832031, 285.61395263671875, 294.0459899902344, 302.47802734375, 310.9100341796875, 319.34210205078125, 327.7741394042969, 336.2061767578125, 344.6382141113281, 353.07025146484375, 361.5022888183594, 369.934326171875, 378.3663330078125, 386.7983703613281, 395.23040771484375, 403.6624450683594, 412.094482421875, 420.5265197753906, 428.95855712890625, 437.3905944824219, 445.8226318359375, 454.254638671875, 462.6866760253906, 471.11871337890625, 479.5507507324219, 487.9827880859375, 496.4148254394531, 504.84686279296875, 513.2788696289062, 521.7109375]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 5.0, 3.0, 3.0, 12.0, 12.0, 13.0, 11.0, 13.0, 13.0, 19.0, 16.0, 26.0, 35.0, 32.0, 31.0, 38.0, 44.0, 46.0, 33.0, 39.0, 50.0, 46.0, 35.0, 56.0, 36.0, 39.0, 40.0, 34.0, 29.0, 27.0, 24.0, 19.0, 28.0, 23.0, 17.0, 14.0, 9.0, 13.0, 4.0, 5.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-90.13233184814453, -87.25057220458984, -84.36881256103516, -81.48704528808594, -78.60528564453125, -75.72352600097656, -72.84176635742188, -69.96000671386719, -67.0782470703125, -64.19648742675781, -61.31472396850586, -58.43296432495117, -55.55120086669922, -52.66944122314453, -49.787681579589844, -46.905921936035156, -44.02415466308594, -41.14239501953125, -38.2606315612793, -35.37887191772461, -32.497108459472656, -29.61534881591797, -26.73358917236328, -23.85182762145996, -20.97006607055664, -18.08830451965332, -15.206543922424316, -12.324783325195312, -9.443021774291992, -6.561260223388672, -3.6795005798339844, -0.7977390289306641, 2.0840225219726562, 4.965783596038818, 7.8475446701049805, 10.729305267333984, 13.611066818237305, 16.492828369140625, 19.374588012695312, 22.256349563598633, 25.138111114501953, 28.019872665405273, 30.901634216308594, 33.78339385986328, 36.66515350341797, 39.54691696166992, 42.42867660522461, 45.31044006347656, 48.19219970703125, 51.07395935058594, 53.95572280883789, 56.83748245239258, 59.71924591064453, 62.60100555419922, 65.4827651977539, 68.3645248413086, 71.24629211425781, 74.1280517578125, 77.00981140136719, 79.89157104492188, 82.7733383178711, 85.65509796142578, 88.53685760498047, 91.41861724853516, 94.30037689208984]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 4.0, 4.0, 8.0, 5.0, 7.0, 9.0, 15.0, 13.0, 9.0, 21.0, 13.0, 22.0, 33.0, 25.0, 32.0, 30.0, 20.0, 37.0, 39.0, 37.0, 39.0, 44.0, 37.0, 31.0, 42.0, 43.0, 28.0, 32.0, 33.0, 34.0, 29.0, 32.0, 33.0, 32.0, 20.0, 17.0, 11.0, 18.0, 17.0, 17.0, 7.0, 5.0, 6.0, 4.0, 4.0, 5.0, 5.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0], "bins": [-10.0546875, -9.741943359375, -9.42919921875, -9.116455078125, -8.8037109375, -8.490966796875, -8.17822265625, -7.865478515625, -7.552734375, -7.239990234375, -6.92724609375, -6.614501953125, -6.3017578125, -5.989013671875, -5.67626953125, -5.363525390625, -5.05078125, -4.738037109375, -4.42529296875, -4.112548828125, -3.7998046875, -3.487060546875, -3.17431640625, -2.861572265625, -2.548828125, -2.236083984375, -1.92333984375, -1.610595703125, -1.2978515625, -0.985107421875, -0.67236328125, -0.359619140625, -0.046875, 0.265869140625, 0.57861328125, 0.891357421875, 1.2041015625, 1.516845703125, 1.82958984375, 2.142333984375, 2.455078125, 2.767822265625, 3.08056640625, 3.393310546875, 3.7060546875, 4.018798828125, 4.33154296875, 4.644287109375, 4.95703125, 5.269775390625, 5.58251953125, 5.895263671875, 6.2080078125, 6.520751953125, 6.83349609375, 7.146240234375, 7.458984375, 7.771728515625, 8.08447265625, 8.397216796875, 8.7099609375, 9.022705078125, 9.33544921875, 9.648193359375, 9.9609375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 1.0, 4.0, 7.0, 1.0, 3.0, 6.0, 6.0, 9.0, 8.0, 9.0, 11.0, 16.0, 27.0, 27.0, 23.0, 37.0, 40.0, 43.0, 52.0, 98.0, 167.0, 388.0, 1062.0, 4135.0, 24780.0, 336664.0, 2843384.0, 919550.0, 53387.0, 7240.0, 1727.0, 632.0, 240.0, 117.0, 79.0, 63.0, 51.0, 38.0, 22.0, 24.0, 28.0, 12.0, 11.0, 14.0, 8.0, 9.0, 9.0, 6.0, 5.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-27.25, -26.361572265625, -25.47314453125, -24.584716796875, -23.6962890625, -22.807861328125, -21.91943359375, -21.031005859375, -20.142578125, -19.254150390625, -18.36572265625, -17.477294921875, -16.5888671875, -15.700439453125, -14.81201171875, -13.923583984375, -13.03515625, -12.146728515625, -11.25830078125, -10.369873046875, -9.4814453125, -8.593017578125, -7.70458984375, -6.816162109375, -5.927734375, -5.039306640625, -4.15087890625, -3.262451171875, -2.3740234375, -1.485595703125, -0.59716796875, 0.291259765625, 1.1796875, 2.068115234375, 2.95654296875, 3.844970703125, 4.7333984375, 5.621826171875, 6.51025390625, 7.398681640625, 8.287109375, 9.175537109375, 10.06396484375, 10.952392578125, 11.8408203125, 12.729248046875, 13.61767578125, 14.506103515625, 15.39453125, 16.282958984375, 17.17138671875, 18.059814453125, 18.9482421875, 19.836669921875, 20.72509765625, 21.613525390625, 22.501953125, 23.390380859375, 24.27880859375, 25.167236328125, 26.0556640625, 26.944091796875, 27.83251953125, 28.720947265625, 29.609375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 5.0, 6.0, 6.0, 13.0, 25.0, 34.0, 66.0, 89.0, 108.0, 188.0, 260.0, 425.0, 594.0, 580.0, 526.0, 364.0, 226.0, 184.0, 120.0, 77.0, 51.0, 41.0, 27.0, 22.0, 12.0, 6.0, 13.0, 9.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.203125, -28.4208984375, -27.638671875, -26.8564453125, -26.07421875, -25.2919921875, -24.509765625, -23.7275390625, -22.9453125, -22.1630859375, -21.380859375, -20.5986328125, -19.81640625, -19.0341796875, -18.251953125, -17.4697265625, -16.6875, -15.9052734375, -15.123046875, -14.3408203125, -13.55859375, -12.7763671875, -11.994140625, -11.2119140625, -10.4296875, -9.6474609375, -8.865234375, -8.0830078125, -7.30078125, -6.5185546875, -5.736328125, -4.9541015625, -4.171875, -3.3896484375, -2.607421875, -1.8251953125, -1.04296875, -0.2607421875, 0.521484375, 1.3037109375, 2.0859375, 2.8681640625, 3.650390625, 4.4326171875, 5.21484375, 5.9970703125, 6.779296875, 7.5615234375, 8.34375, 9.1259765625, 9.908203125, 10.6904296875, 11.47265625, 12.2548828125, 13.037109375, 13.8193359375, 14.6015625, 15.3837890625, 16.166015625, 16.9482421875, 17.73046875, 18.5126953125, 19.294921875, 20.0771484375, 20.859375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 5.0, 7.0, 3.0, 10.0, 12.0, 24.0, 29.0, 35.0, 47.0, 60.0, 83.0, 126.0, 207.0, 269.0, 463.0, 1130.0, 12744.0, 2952443.0, 1217224.0, 7190.0, 973.0, 409.0, 245.0, 167.0, 129.0, 87.0, 61.0, 38.0, 17.0, 19.0, 10.0, 8.0, 6.0, 4.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-73.125, -70.3037109375, -67.482421875, -64.6611328125, -61.83984375, -59.0185546875, -56.197265625, -53.3759765625, -50.5546875, -47.7333984375, -44.912109375, -42.0908203125, -39.26953125, -36.4482421875, -33.626953125, -30.8056640625, -27.984375, -25.1630859375, -22.341796875, -19.5205078125, -16.69921875, -13.8779296875, -11.056640625, -8.2353515625, -5.4140625, -2.5927734375, 0.228515625, 3.0498046875, 5.87109375, 8.6923828125, 11.513671875, 14.3349609375, 17.15625, 19.9775390625, 22.798828125, 25.6201171875, 28.44140625, 31.2626953125, 34.083984375, 36.9052734375, 39.7265625, 42.5478515625, 45.369140625, 48.1904296875, 51.01171875, 53.8330078125, 56.654296875, 59.4755859375, 62.296875, 65.1181640625, 67.939453125, 70.7607421875, 73.58203125, 76.4033203125, 79.224609375, 82.0458984375, 84.8671875, 87.6884765625, 90.509765625, 93.3310546875, 96.15234375, 98.9736328125, 101.794921875, 104.6162109375, 107.4375]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 8.0, 17.0, 51.0, 91.0, 161.0, 213.0, 173.0, 132.0, 97.0, 46.0, 17.0, 5.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-78.01551055908203, -73.3468017578125, -68.67809295654297, -64.00938415527344, -59.34068298339844, -54.671974182128906, -50.003265380859375, -45.33456039428711, -40.66585159301758, -35.99714279174805, -31.32843780517578, -26.65972900390625, -21.99102210998535, -17.322315216064453, -12.653606414794922, -7.984901428222656, -3.316192626953125, 1.3525147438049316, 6.021222114562988, 10.689929962158203, 15.358636856079102, 20.02734375, 24.69605255126953, 29.364757537841797, 34.03346633911133, 38.70217514038086, 43.370880126953125, 48.039588928222656, 52.70829772949219, 57.37700271606445, 62.045711517333984, 66.71441650390625, 71.38311767578125, 76.05182647705078, 80.72053527832031, 85.38923645019531, 90.05794525146484, 94.72665405273438, 99.3953628540039, 104.06407165527344, 108.73277282714844, 113.40148162841797, 118.0701904296875, 122.7388916015625, 127.40760040283203, 132.07630920410156, 136.74502563476562, 141.41372680664062, 146.08242797851562, 150.75112915039062, 155.4198455810547, 160.0885467529297, 164.75726318359375, 169.42596435546875, 174.09466552734375, 178.7633819580078, 183.43209838867188, 188.10079956054688, 192.76951599121094, 197.43821716308594, 202.10693359375, 206.775634765625, 211.4443359375, 216.11305236816406, 220.78175354003906]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 2.0, 10.0, 8.0, 6.0, 5.0, 10.0, 17.0, 15.0, 23.0, 18.0, 14.0, 22.0, 20.0, 24.0, 23.0, 31.0, 28.0, 33.0, 43.0, 33.0, 37.0, 42.0, 46.0, 34.0, 39.0, 34.0, 34.0, 46.0, 31.0, 29.0, 25.0, 25.0, 22.0, 25.0, 20.0, 15.0, 13.0, 16.0, 12.0, 11.0, 12.0, 10.0, 8.0, 6.0, 4.0, 6.0, 3.0, 1.0, 2.0, 5.0, 0.0, 3.0, 3.0], "bins": [-59.52677917480469, -57.77389907836914, -56.021018981933594, -54.26813507080078, -52.515254974365234, -50.76237487792969, -49.00949478149414, -47.256614685058594, -45.50373077392578, -43.750850677490234, -41.99797058105469, -40.245086669921875, -38.49220657348633, -36.73932647705078, -34.986446380615234, -33.23356628417969, -31.48068618774414, -29.727806091308594, -27.974924087524414, -26.222043991088867, -24.469161987304688, -22.71628189086914, -20.963401794433594, -19.210521697998047, -17.457639694213867, -15.704758644104004, -13.95187759399414, -12.198997497558594, -10.44611644744873, -8.693235397338867, -6.94035530090332, -5.187474250793457, -3.4345932006835938, -1.6817123889923096, 0.07116842269897461, 1.8240489959716797, 3.576930046081543, 5.329811096191406, 7.082691192626953, 8.835572242736816, 10.58845329284668, 12.341334342956543, 14.094215393066406, 15.847095489501953, 17.5999755859375, 19.35285758972168, 21.105737686157227, 22.858619689941406, 24.611499786376953, 26.3643798828125, 28.11726188659668, 29.870141983032227, 31.623023986816406, 33.37590408325195, 35.1287841796875, 36.88166427612305, 38.634544372558594, 40.38742446899414, 42.14030456542969, 43.8931884765625, 45.64606857299805, 47.398948669433594, 49.15182876586914, 50.90470886230469, 52.6575927734375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 3.0, 2.0, 3.0, 6.0, 1.0, 11.0, 5.0, 3.0, 7.0, 17.0, 13.0, 19.0, 18.0, 22.0, 18.0, 32.0, 18.0, 25.0, 37.0, 26.0, 41.0, 37.0, 36.0, 34.0, 48.0, 51.0, 39.0, 34.0, 32.0, 36.0, 37.0, 40.0, 39.0, 33.0, 23.0, 20.0, 20.0, 21.0, 15.0, 12.0, 14.0, 6.0, 6.0, 5.0, 7.0, 11.0, 5.0, 6.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-10.2265625, -9.92724609375, -9.6279296875, -9.32861328125, -9.029296875, -8.72998046875, -8.4306640625, -8.13134765625, -7.83203125, -7.53271484375, -7.2333984375, -6.93408203125, -6.634765625, -6.33544921875, -6.0361328125, -5.73681640625, -5.4375, -5.13818359375, -4.8388671875, -4.53955078125, -4.240234375, -3.94091796875, -3.6416015625, -3.34228515625, -3.04296875, -2.74365234375, -2.4443359375, -2.14501953125, -1.845703125, -1.54638671875, -1.2470703125, -0.94775390625, -0.6484375, -0.34912109375, -0.0498046875, 0.24951171875, 0.548828125, 0.84814453125, 1.1474609375, 1.44677734375, 1.74609375, 2.04541015625, 2.3447265625, 2.64404296875, 2.943359375, 3.24267578125, 3.5419921875, 3.84130859375, 4.140625, 4.43994140625, 4.7392578125, 5.03857421875, 5.337890625, 5.63720703125, 5.9365234375, 6.23583984375, 6.53515625, 6.83447265625, 7.1337890625, 7.43310546875, 7.732421875, 8.03173828125, 8.3310546875, 8.63037109375, 8.9296875]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 4.0, 8.0, 6.0, 9.0, 9.0, 28.0, 25.0, 39.0, 51.0, 94.0, 132.0, 191.0, 307.0, 453.0, 605.0, 908.0, 1362.0, 2038.0, 2965.0, 4665.0, 6832.0, 10098.0, 15352.0, 23033.0, 35487.0, 54227.0, 82471.0, 120331.0, 156877.0, 158810.0, 122880.0, 84688.0, 56088.0, 36368.0, 23768.0, 15763.0, 10506.0, 6834.0, 4777.0, 3131.0, 2015.0, 1423.0, 926.0, 614.0, 429.0, 310.0, 196.0, 132.0, 119.0, 51.0, 38.0, 33.0, 20.0, 19.0, 14.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0], "bins": [-0.72265625, -0.7000961303710938, -0.6775360107421875, -0.6549758911132812, -0.632415771484375, -0.6098556518554688, -0.5872955322265625, -0.5647354125976562, -0.54217529296875, -0.5196151733398438, -0.4970550537109375, -0.47449493408203125, -0.451934814453125, -0.42937469482421875, -0.4068145751953125, -0.38425445556640625, -0.3616943359375, -0.33913421630859375, -0.3165740966796875, -0.29401397705078125, -0.271453857421875, -0.24889373779296875, -0.2263336181640625, -0.20377349853515625, -0.18121337890625, -0.15865325927734375, -0.1360931396484375, -0.11353302001953125, -0.090972900390625, -0.06841278076171875, -0.0458526611328125, -0.02329254150390625, -0.000732421875, 0.02182769775390625, 0.0443878173828125, 0.06694793701171875, 0.089508056640625, 0.11206817626953125, 0.1346282958984375, 0.15718841552734375, 0.17974853515625, 0.20230865478515625, 0.2248687744140625, 0.24742889404296875, 0.269989013671875, 0.29254913330078125, 0.3151092529296875, 0.33766937255859375, 0.3602294921875, 0.38278961181640625, 0.4053497314453125, 0.42790985107421875, 0.450469970703125, 0.47303009033203125, 0.4955902099609375, 0.5181503295898438, 0.54071044921875, 0.5632705688476562, 0.5858306884765625, 0.6083908081054688, 0.630950927734375, 0.6535110473632812, 0.6760711669921875, 0.6986312866210938, 0.72119140625]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 3.0, 7.0, 9.0, 5.0, 14.0, 4.0, 20.0, 17.0, 17.0, 20.0, 28.0, 19.0, 35.0, 30.0, 34.0, 45.0, 30.0, 34.0, 47.0, 38.0, 39.0, 1069.0, 39.0, 34.0, 30.0, 46.0, 36.0, 31.0, 34.0, 37.0, 28.0, 18.0, 19.0, 19.0, 26.0, 11.0, 16.0, 8.0, 7.0, 3.0, 9.0, 7.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.9921875, -5.78045654296875, -5.5687255859375, -5.35699462890625, -5.145263671875, -4.93353271484375, -4.7218017578125, -4.51007080078125, -4.29833984375, -4.08660888671875, -3.8748779296875, -3.66314697265625, -3.451416015625, -3.23968505859375, -3.0279541015625, -2.81622314453125, -2.6044921875, -2.39276123046875, -2.1810302734375, -1.96929931640625, -1.757568359375, -1.54583740234375, -1.3341064453125, -1.12237548828125, -0.91064453125, -0.69891357421875, -0.4871826171875, -0.27545166015625, -0.063720703125, 0.14801025390625, 0.3597412109375, 0.57147216796875, 0.783203125, 0.99493408203125, 1.2066650390625, 1.41839599609375, 1.630126953125, 1.84185791015625, 2.0535888671875, 2.26531982421875, 2.47705078125, 2.68878173828125, 2.9005126953125, 3.11224365234375, 3.323974609375, 3.53570556640625, 3.7474365234375, 3.95916748046875, 4.1708984375, 4.38262939453125, 4.5943603515625, 4.80609130859375, 5.017822265625, 5.22955322265625, 5.4412841796875, 5.65301513671875, 5.86474609375, 6.07647705078125, 6.2882080078125, 6.49993896484375, 6.711669921875, 6.92340087890625, 7.1351318359375, 7.34686279296875, 7.55859375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 4.0, 11.0, 16.0, 27.0, 37.0, 63.0, 107.0, 150.0, 244.0, 324.0, 594.0, 965.0, 1431.0, 2250.0, 3797.0, 6044.0, 9651.0, 15491.0, 25599.0, 42095.0, 66162.0, 100664.0, 145478.0, 1158324.0, 192850.0, 115920.0, 77638.0, 50158.0, 31040.0, 18987.0, 11714.0, 7248.0, 4583.0, 2763.0, 1739.0, 1123.0, 661.0, 379.0, 260.0, 190.0, 142.0, 71.0, 49.0, 36.0, 21.0, 12.0, 13.0, 9.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.60107421875, -0.5826873779296875, -0.564300537109375, -0.5459136962890625, -0.52752685546875, -0.5091400146484375, -0.490753173828125, -0.4723663330078125, -0.4539794921875, -0.4355926513671875, -0.417205810546875, -0.3988189697265625, -0.38043212890625, -0.3620452880859375, -0.343658447265625, -0.3252716064453125, -0.306884765625, -0.2884979248046875, -0.270111083984375, -0.2517242431640625, -0.23333740234375, -0.2149505615234375, -0.196563720703125, -0.1781768798828125, -0.1597900390625, -0.1414031982421875, -0.123016357421875, -0.1046295166015625, -0.08624267578125, -0.0678558349609375, -0.049468994140625, -0.0310821533203125, -0.0126953125, 0.0056915283203125, 0.024078369140625, 0.0424652099609375, 0.06085205078125, 0.0792388916015625, 0.097625732421875, 0.1160125732421875, 0.1343994140625, 0.1527862548828125, 0.171173095703125, 0.1895599365234375, 0.20794677734375, 0.2263336181640625, 0.244720458984375, 0.2631072998046875, 0.281494140625, 0.2998809814453125, 0.318267822265625, 0.3366546630859375, 0.35504150390625, 0.3734283447265625, 0.391815185546875, 0.4102020263671875, 0.4285888671875, 0.4469757080078125, 0.465362548828125, 0.4837493896484375, 0.50213623046875, 0.5205230712890625, 0.538909912109375, 0.5572967529296875, 0.57568359375]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 1.0, 4.0, 4.0, 1.0, 2.0, 5.0, 6.0, 6.0, 8.0, 8.0, 11.0, 10.0, 9.0, 15.0, 17.0, 27.0, 40.0, 39.0, 47.0, 71.0, 104.0, 99.0, 88.0, 74.0, 75.0, 44.0, 33.0, 30.0, 27.0, 16.0, 18.0, 11.0, 6.0, 6.0, 9.0, 4.0, 4.0, 2.0, 3.0, 2.0, 6.0, 3.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0266265869140625, -0.02572035789489746, -0.024814128875732422, -0.023907899856567383, -0.023001670837402344, -0.022095441818237305, -0.021189212799072266, -0.020282983779907227, -0.019376754760742188, -0.01847052574157715, -0.01756429672241211, -0.01665806770324707, -0.01575183868408203, -0.014845609664916992, -0.013939380645751953, -0.013033151626586914, -0.012126922607421875, -0.011220693588256836, -0.010314464569091797, -0.009408235549926758, -0.008502006530761719, -0.00759577751159668, -0.006689548492431641, -0.0057833194732666016, -0.0048770904541015625, -0.0039708614349365234, -0.0030646324157714844, -0.0021584033966064453, -0.0012521743774414062, -0.0003459453582763672, 0.0005602836608886719, 0.001466512680053711, 0.00237274169921875, 0.003278970718383789, 0.004185199737548828, 0.005091428756713867, 0.005997657775878906, 0.006903886795043945, 0.007810115814208984, 0.008716344833374023, 0.009622573852539062, 0.010528802871704102, 0.01143503189086914, 0.01234126091003418, 0.013247489929199219, 0.014153718948364258, 0.015059947967529297, 0.015966176986694336, 0.016872406005859375, 0.017778635025024414, 0.018684864044189453, 0.019591093063354492, 0.02049732208251953, 0.02140355110168457, 0.02230978012084961, 0.02321600914001465, 0.024122238159179688, 0.025028467178344727, 0.025934696197509766, 0.026840925216674805, 0.027747154235839844, 0.028653383255004883, 0.029559612274169922, 0.03046584129333496, 0.0313720703125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 5.0, 5.0, 6.0, 7.0, 4.0, 11.0, 10.0, 17.0, 17.0, 17.0, 28.0, 39.0, 46.0, 99.0, 117.0, 199.0, 336.0, 1174.0, 417350.0, 626916.0, 1214.0, 383.0, 163.0, 95.0, 62.0, 51.0, 33.0, 28.0, 17.0, 8.0, 25.0, 12.0, 9.0, 8.0, 4.0, 7.0, 3.0, 2.0, 2.0, 5.0, 2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.634765625, -0.61663818359375, -0.5985107421875, -0.58038330078125, -0.562255859375, -0.54412841796875, -0.5260009765625, -0.50787353515625, -0.48974609375, -0.47161865234375, -0.4534912109375, -0.43536376953125, -0.417236328125, -0.39910888671875, -0.3809814453125, -0.36285400390625, -0.3447265625, -0.32659912109375, -0.3084716796875, -0.29034423828125, -0.272216796875, -0.25408935546875, -0.2359619140625, -0.21783447265625, -0.19970703125, -0.18157958984375, -0.1634521484375, -0.14532470703125, -0.127197265625, -0.10906982421875, -0.0909423828125, -0.07281494140625, -0.0546875, -0.03656005859375, -0.0184326171875, -0.00030517578125, 0.017822265625, 0.03594970703125, 0.0540771484375, 0.07220458984375, 0.09033203125, 0.10845947265625, 0.1265869140625, 0.14471435546875, 0.162841796875, 0.18096923828125, 0.1990966796875, 0.21722412109375, 0.2353515625, 0.25347900390625, 0.2716064453125, 0.28973388671875, 0.307861328125, 0.32598876953125, 0.3441162109375, 0.36224365234375, 0.38037109375, 0.39849853515625, 0.4166259765625, 0.43475341796875, 0.452880859375, 0.47100830078125, 0.4891357421875, 0.50726318359375, 0.525390625]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 970.0, 46.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.037553515285253525, -0.02384784072637558, -0.010142166167497635, 0.00356350839138031, 0.017269182950258255, 0.0309748537838459, 0.044680532068014145, 0.05838621035218239, 0.07209187746047974, 0.08579754829406738, 0.09950322657823563, 0.11320890486240387, 0.12691457569599152, 0.14062024652957916, 0.154325932264328, 0.16803160309791565, 0.1817372739315033, 0.19544294476509094, 0.2091486155986786, 0.22285430133342743, 0.23655997216701508, 0.2502656579017639, 0.26397132873535156, 0.2776769995689392, 0.29138267040252686, 0.3050883412361145, 0.31879401206970215, 0.3324996829032898, 0.34620535373687744, 0.3599110245704651, 0.3736167252063751, 0.38732239603996277, 0.4010280966758728, 0.41473376750946045, 0.4284394383430481, 0.44214510917663574, 0.4558507800102234, 0.46955645084381104, 0.48326215147972107, 0.4969678223133087, 0.5106735229492188, 0.5243791937828064, 0.538084864616394, 0.5517905354499817, 0.5654962062835693, 0.579201877117157, 0.5929075479507446, 0.606613278388977, 0.6203188896179199, 0.6340245604515076, 0.6477302312850952, 0.6614359021186829, 0.6751415729522705, 0.6888472437858582, 0.7025529146194458, 0.7162586450576782, 0.7299642562866211, 0.7436699271202087, 0.7573755979537964, 0.771081268787384, 0.7847869396209717, 0.7984926104545593, 0.812198281288147, 0.8259040117263794, 0.839609682559967]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 7.0, 15.0, 23.0, 21.0, 44.0, 38.0, 55.0, 66.0, 72.0, 94.0, 87.0, 78.0, 66.0, 73.0, 76.0, 46.0, 45.0, 22.0, 27.0, 26.0, 11.0, 9.0, 3.0, 6.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03854191303253174, -0.0375216007232666, -0.036501288414001465, -0.03548097237944603, -0.03446066007018089, -0.033440347760915756, -0.03242003172636032, -0.031399719417095184, -0.030379407107830048, -0.02935909479856491, -0.028338780626654625, -0.02731846645474434, -0.026298154145479202, -0.025277841836214066, -0.02425752766430378, -0.023237213492393494, -0.022216901183128357, -0.02119658887386322, -0.020176274701952934, -0.01915596053004265, -0.01813564822077751, -0.017115335911512375, -0.01609502173960209, -0.015074708499014378, -0.014054395258426666, -0.013034082017838955, -0.012013768777251244, -0.010993455536663532, -0.009973142296075821, -0.00895282905548811, -0.007932515814900398, -0.006912202574312687, -0.005891889333724976, -0.004871576093137264, -0.003851262852549553, -0.0028309496119618416, -0.0018106363713741302, -0.0007903231307864189, 0.00022999010980129242, 0.0012503033503890038, 0.002270616590976715, 0.0032909298315644264, 0.004311243072152138, 0.005331556312739849, 0.00635186955332756, 0.007372182793915272, 0.008392496034502983, 0.009412809275090694, 0.010433122515678406, 0.011453435756266117, 0.012473748996853828, 0.01349406223744154, 0.014514375478029251, 0.015534688718616962, 0.016555001959204674, 0.01757531613111496, 0.018595628440380096, 0.019615940749645233, 0.02063625492155552, 0.021656569093465805, 0.022676881402730942, 0.02369719371199608, 0.024717507883906364, 0.02573782205581665, 0.026758134365081787]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 3.0, 2.0, 3.0, 6.0, 1.0, 11.0, 5.0, 3.0, 7.0, 17.0, 13.0, 19.0, 18.0, 22.0, 18.0, 32.0, 18.0, 25.0, 37.0, 26.0, 42.0, 36.0, 36.0, 33.0, 49.0, 51.0, 39.0, 34.0, 32.0, 36.0, 37.0, 40.0, 38.0, 34.0, 23.0, 20.0, 20.0, 21.0, 15.0, 12.0, 14.0, 6.0, 6.0, 5.0, 7.0, 11.0, 5.0, 6.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-10.2265625, -9.92724609375, -9.6279296875, -9.32861328125, -9.029296875, -8.72998046875, -8.4306640625, -8.13134765625, -7.83203125, -7.53271484375, -7.2333984375, -6.93408203125, -6.634765625, -6.33544921875, -6.0361328125, -5.73681640625, -5.4375, -5.13818359375, -4.8388671875, -4.53955078125, -4.240234375, -3.94091796875, -3.6416015625, -3.34228515625, -3.04296875, -2.74365234375, -2.4443359375, -2.14501953125, -1.845703125, -1.54638671875, -1.2470703125, -0.94775390625, -0.6484375, -0.34912109375, -0.0498046875, 0.24951171875, 0.548828125, 0.84814453125, 1.1474609375, 1.44677734375, 1.74609375, 2.04541015625, 2.3447265625, 2.64404296875, 2.943359375, 3.24267578125, 3.5419921875, 3.84130859375, 4.140625, 4.43994140625, 4.7392578125, 5.03857421875, 5.337890625, 5.63720703125, 5.9365234375, 6.23583984375, 6.53515625, 6.83447265625, 7.1337890625, 7.43310546875, 7.732421875, 8.03173828125, 8.3310546875, 8.63037109375, 8.9296875]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 2.0, 4.0, 4.0, 2.0, 13.0, 10.0, 8.0, 9.0, 8.0, 26.0, 18.0, 30.0, 36.0, 29.0, 65.0, 70.0, 122.0, 163.0, 221.0, 326.0, 516.0, 948.0, 1714.0, 3020.0, 6001.0, 12297.0, 25261.0, 56314.0, 141862.0, 403124.0, 238474.0, 85509.0, 36660.0, 17436.0, 8478.0, 4194.0, 2223.0, 1245.0, 715.0, 419.0, 283.0, 173.0, 144.0, 91.0, 65.0, 53.0, 39.0, 37.0, 27.0, 16.0, 16.0, 12.0, 10.0, 6.0, 11.0, 1.0, 3.0, 4.0, 0.0, 3.0], "bins": [-9.1640625, -8.8924560546875, -8.620849609375, -8.3492431640625, -8.07763671875, -7.8060302734375, -7.534423828125, -7.2628173828125, -6.9912109375, -6.7196044921875, -6.447998046875, -6.1763916015625, -5.90478515625, -5.6331787109375, -5.361572265625, -5.0899658203125, -4.818359375, -4.5467529296875, -4.275146484375, -4.0035400390625, -3.73193359375, -3.4603271484375, -3.188720703125, -2.9171142578125, -2.6455078125, -2.3739013671875, -2.102294921875, -1.8306884765625, -1.55908203125, -1.2874755859375, -1.015869140625, -0.7442626953125, -0.47265625, -0.2010498046875, 0.070556640625, 0.3421630859375, 0.61376953125, 0.8853759765625, 1.156982421875, 1.4285888671875, 1.7001953125, 1.9718017578125, 2.243408203125, 2.5150146484375, 2.78662109375, 3.0582275390625, 3.329833984375, 3.6014404296875, 3.873046875, 4.1446533203125, 4.416259765625, 4.6878662109375, 4.95947265625, 5.2310791015625, 5.502685546875, 5.7742919921875, 6.0458984375, 6.3175048828125, 6.589111328125, 6.8607177734375, 7.13232421875, 7.4039306640625, 7.675537109375, 7.9471435546875, 8.21875]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 10.0, 6.0, 13.0, 9.0, 18.0, 13.0, 20.0, 35.0, 32.0, 40.0, 40.0, 64.0, 49.0, 63.0, 121.0, 1678.0, 357.0, 92.0, 74.0, 61.0, 51.0, 34.0, 32.0, 25.0, 24.0, 25.0, 21.0, 12.0, 13.0, 7.0, 7.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.78125, -36.3193359375, -34.857421875, -33.3955078125, -31.93359375, -30.4716796875, -29.009765625, -27.5478515625, -26.0859375, -24.6240234375, -23.162109375, -21.7001953125, -20.23828125, -18.7763671875, -17.314453125, -15.8525390625, -14.390625, -12.9287109375, -11.466796875, -10.0048828125, -8.54296875, -7.0810546875, -5.619140625, -4.1572265625, -2.6953125, -1.2333984375, 0.228515625, 1.6904296875, 3.15234375, 4.6142578125, 6.076171875, 7.5380859375, 9.0, 10.4619140625, 11.923828125, 13.3857421875, 14.84765625, 16.3095703125, 17.771484375, 19.2333984375, 20.6953125, 22.1572265625, 23.619140625, 25.0810546875, 26.54296875, 28.0048828125, 29.466796875, 30.9287109375, 32.390625, 33.8525390625, 35.314453125, 36.7763671875, 38.23828125, 39.7001953125, 41.162109375, 42.6240234375, 44.0859375, 45.5478515625, 47.009765625, 48.4716796875, 49.93359375, 51.3955078125, 52.857421875, 54.3193359375, 55.78125]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 5.0, 2.0, 4.0, 4.0, 4.0, 9.0, 24.0, 41.0, 34.0, 57.0, 60.0, 82.0, 145.0, 198.0, 408.0, 1014.0, 10383.0, 3102942.0, 27782.0, 1247.0, 472.0, 237.0, 167.0, 107.0, 80.0, 50.0, 33.0, 36.0, 26.0, 13.0, 10.0, 7.0, 6.0, 5.0, 2.0, 1.0, 5.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-120.125, -116.5498046875, -112.974609375, -109.3994140625, -105.82421875, -102.2490234375, -98.673828125, -95.0986328125, -91.5234375, -87.9482421875, -84.373046875, -80.7978515625, -77.22265625, -73.6474609375, -70.072265625, -66.4970703125, -62.921875, -59.3466796875, -55.771484375, -52.1962890625, -48.62109375, -45.0458984375, -41.470703125, -37.8955078125, -34.3203125, -30.7451171875, -27.169921875, -23.5947265625, -20.01953125, -16.4443359375, -12.869140625, -9.2939453125, -5.71875, -2.1435546875, 1.431640625, 5.0068359375, 8.58203125, 12.1572265625, 15.732421875, 19.3076171875, 22.8828125, 26.4580078125, 30.033203125, 33.6083984375, 37.18359375, 40.7587890625, 44.333984375, 47.9091796875, 51.484375, 55.0595703125, 58.634765625, 62.2099609375, 65.78515625, 69.3603515625, 72.935546875, 76.5107421875, 80.0859375, 83.6611328125, 87.236328125, 90.8115234375, 94.38671875, 97.9619140625, 101.537109375, 105.1123046875, 108.6875]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 2.0, 32.0, 177.0, 382.0, 335.0, 81.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-224.1649627685547, -219.47555541992188, -214.7861328125, -210.0967254638672, -205.4073028564453, -200.7178955078125, -196.02847290039062, -191.3390655517578, -186.649658203125, -181.9602508544922, -177.2708282470703, -172.5814208984375, -167.89199829101562, -163.2025909423828, -158.51316833496094, -153.82376098632812, -149.13433837890625, -144.44493103027344, -139.75550842285156, -135.06610107421875, -130.37667846679688, -125.68727111816406, -120.99785614013672, -116.30844116210938, -111.61902618408203, -106.92961120605469, -102.24019622802734, -97.55078125, -92.86137390136719, -88.17195129394531, -83.4825439453125, -78.79312896728516, -74.10370635986328, -69.41429138183594, -64.7248764038086, -60.035465240478516, -55.34605026245117, -50.65663528442383, -45.96722412109375, -41.277809143066406, -36.58839416503906, -31.89897918701172, -27.209566116333008, -22.520153045654297, -17.830738067626953, -13.14132308959961, -8.451910018920898, -3.7624969482421875, 0.9269180297851562, 5.616332054138184, 10.305746078491211, 14.995160102844238, 19.684574127197266, 24.37398910522461, 29.06340217590332, 33.75281524658203, 38.442230224609375, 43.13164520263672, 47.82106018066406, 52.51047134399414, 57.199886322021484, 61.88930130004883, 66.5787124633789, 71.26812744140625, 75.9575424194336]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 4.0, 1.0, 5.0, 4.0, 2.0, 9.0, 8.0, 11.0, 10.0, 11.0, 15.0, 19.0, 28.0, 17.0, 27.0, 29.0, 27.0, 26.0, 29.0, 37.0, 35.0, 43.0, 52.0, 38.0, 39.0, 38.0, 40.0, 42.0, 34.0, 37.0, 28.0, 25.0, 23.0, 34.0, 30.0, 31.0, 22.0, 19.0, 11.0, 10.0, 8.0, 10.0, 9.0, 7.0, 7.0, 3.0, 8.0, 3.0, 3.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-85.33950805664062, -82.64139556884766, -79.94328308105469, -77.24516296386719, -74.54705047607422, -71.84893798828125, -69.15081787109375, -66.45270538330078, -63.75459289550781, -61.056480407714844, -58.35836410522461, -55.660247802734375, -52.962135314941406, -50.26402282714844, -47.5659065246582, -44.86779022216797, -42.169677734375, -39.47156524658203, -36.7734489440918, -34.07533264160156, -31.377220153808594, -28.679105758666992, -25.98099136352539, -23.28287696838379, -20.584762573242188, -17.886648178100586, -15.188533782958984, -12.490419387817383, -9.792304992675781, -7.09419059753418, -4.396076202392578, -1.6979618072509766, 1.000152587890625, 3.6982669830322266, 6.396381378173828, 9.09449577331543, 11.792610168457031, 14.490724563598633, 17.188838958740234, 19.886953353881836, 22.585067749023438, 25.28318214416504, 27.98129653930664, 30.679410934448242, 33.377525329589844, 36.07563781738281, 38.77375411987305, 41.47187042236328, 44.16998291015625, 46.86809539794922, 49.56621170043945, 52.26432800292969, 54.962440490722656, 57.660552978515625, 60.35866928100586, 63.056785583496094, 65.75489807128906, 68.45301055908203, 71.151123046875, 73.8492431640625, 76.54735565185547, 79.24546813964844, 81.94358825683594, 84.6417007446289, 87.33981323242188]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 3.0, 4.0, 8.0, 9.0, 6.0, 4.0, 5.0, 10.0, 18.0, 13.0, 24.0, 19.0, 22.0, 29.0, 21.0, 27.0, 39.0, 31.0, 35.0, 33.0, 48.0, 35.0, 34.0, 39.0, 47.0, 40.0, 27.0, 26.0, 44.0, 37.0, 30.0, 34.0, 35.0, 25.0, 14.0, 20.0, 21.0, 13.0, 9.0, 8.0, 7.0, 10.0, 6.0, 10.0, 2.0, 5.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-10.140625, -9.8258056640625, -9.510986328125, -9.1961669921875, -8.88134765625, -8.5665283203125, -8.251708984375, -7.9368896484375, -7.6220703125, -7.3072509765625, -6.992431640625, -6.6776123046875, -6.36279296875, -6.0479736328125, -5.733154296875, -5.4183349609375, -5.103515625, -4.7886962890625, -4.473876953125, -4.1590576171875, -3.84423828125, -3.5294189453125, -3.214599609375, -2.8997802734375, -2.5849609375, -2.2701416015625, -1.955322265625, -1.6405029296875, -1.32568359375, -1.0108642578125, -0.696044921875, -0.3812255859375, -0.06640625, 0.2484130859375, 0.563232421875, 0.8780517578125, 1.19287109375, 1.5076904296875, 1.822509765625, 2.1373291015625, 2.4521484375, 2.7669677734375, 3.081787109375, 3.3966064453125, 3.71142578125, 4.0262451171875, 4.341064453125, 4.6558837890625, 4.970703125, 5.2855224609375, 5.600341796875, 5.9151611328125, 6.22998046875, 6.5447998046875, 6.859619140625, 7.1744384765625, 7.4892578125, 7.8040771484375, 8.118896484375, 8.4337158203125, 8.74853515625, 9.0633544921875, 9.378173828125, 9.6929931640625, 10.0078125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 2.0, 6.0, 6.0, 9.0, 10.0, 11.0, 13.0, 18.0, 27.0, 24.0, 34.0, 35.0, 50.0, 94.0, 123.0, 219.0, 471.0, 1151.0, 4150.0, 22884.0, 325074.0, 3072030.0, 719425.0, 39679.0, 5826.0, 1536.0, 579.0, 254.0, 148.0, 98.0, 62.0, 51.0, 28.0, 26.0, 22.0, 13.0, 18.0, 11.0, 12.0, 9.0, 8.0, 9.0, 1.0, 7.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0], "bins": [-32.84375, -31.866943359375, -30.89013671875, -29.913330078125, -28.9365234375, -27.959716796875, -26.98291015625, -26.006103515625, -25.029296875, -24.052490234375, -23.07568359375, -22.098876953125, -21.1220703125, -20.145263671875, -19.16845703125, -18.191650390625, -17.21484375, -16.238037109375, -15.26123046875, -14.284423828125, -13.3076171875, -12.330810546875, -11.35400390625, -10.377197265625, -9.400390625, -8.423583984375, -7.44677734375, -6.469970703125, -5.4931640625, -4.516357421875, -3.53955078125, -2.562744140625, -1.5859375, -0.609130859375, 0.36767578125, 1.344482421875, 2.3212890625, 3.298095703125, 4.27490234375, 5.251708984375, 6.228515625, 7.205322265625, 8.18212890625, 9.158935546875, 10.1357421875, 11.112548828125, 12.08935546875, 13.066162109375, 14.04296875, 15.019775390625, 15.99658203125, 16.973388671875, 17.9501953125, 18.927001953125, 19.90380859375, 20.880615234375, 21.857421875, 22.834228515625, 23.81103515625, 24.787841796875, 25.7646484375, 26.741455078125, 27.71826171875, 28.695068359375, 29.671875]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 9.0, 19.0, 25.0, 26.0, 48.0, 58.0, 58.0, 104.0, 145.0, 240.0, 341.0, 493.0, 583.0, 543.0, 425.0, 268.0, 202.0, 138.0, 115.0, 85.0, 40.0, 27.0, 21.0, 21.0, 9.0, 9.0, 8.0, 3.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-28.765625, -28.060546875, -27.35546875, -26.650390625, -25.9453125, -25.240234375, -24.53515625, -23.830078125, -23.125, -22.419921875, -21.71484375, -21.009765625, -20.3046875, -19.599609375, -18.89453125, -18.189453125, -17.484375, -16.779296875, -16.07421875, -15.369140625, -14.6640625, -13.958984375, -13.25390625, -12.548828125, -11.84375, -11.138671875, -10.43359375, -9.728515625, -9.0234375, -8.318359375, -7.61328125, -6.908203125, -6.203125, -5.498046875, -4.79296875, -4.087890625, -3.3828125, -2.677734375, -1.97265625, -1.267578125, -0.5625, 0.142578125, 0.84765625, 1.552734375, 2.2578125, 2.962890625, 3.66796875, 4.373046875, 5.078125, 5.783203125, 6.48828125, 7.193359375, 7.8984375, 8.603515625, 9.30859375, 10.013671875, 10.71875, 11.423828125, 12.12890625, 12.833984375, 13.5390625, 14.244140625, 14.94921875, 15.654296875, 16.359375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 3.0, 3.0, 3.0, 3.0, 7.0, 7.0, 14.0, 19.0, 24.0, 31.0, 50.0, 73.0, 108.0, 154.0, 205.0, 269.0, 506.0, 1551.0, 37557.0, 3937017.0, 211759.0, 3227.0, 610.0, 338.0, 235.0, 140.0, 103.0, 80.0, 53.0, 36.0, 26.0, 24.0, 28.0, 9.0, 9.0, 3.0, 4.0, 6.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-79.625, -76.5224609375, -73.419921875, -70.3173828125, -67.21484375, -64.1123046875, -61.009765625, -57.9072265625, -54.8046875, -51.7021484375, -48.599609375, -45.4970703125, -42.39453125, -39.2919921875, -36.189453125, -33.0869140625, -29.984375, -26.8818359375, -23.779296875, -20.6767578125, -17.57421875, -14.4716796875, -11.369140625, -8.2666015625, -5.1640625, -2.0615234375, 1.041015625, 4.1435546875, 7.24609375, 10.3486328125, 13.451171875, 16.5537109375, 19.65625, 22.7587890625, 25.861328125, 28.9638671875, 32.06640625, 35.1689453125, 38.271484375, 41.3740234375, 44.4765625, 47.5791015625, 50.681640625, 53.7841796875, 56.88671875, 59.9892578125, 63.091796875, 66.1943359375, 69.296875, 72.3994140625, 75.501953125, 78.6044921875, 81.70703125, 84.8095703125, 87.912109375, 91.0146484375, 94.1171875, 97.2197265625, 100.322265625, 103.4248046875, 106.52734375, 109.6298828125, 112.732421875, 115.8349609375, 118.9375]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 3.0, 11.0, 17.0, 26.0, 54.0, 97.0, 135.0, 143.0, 164.0, 113.0, 94.0, 71.0, 31.0, 30.0, 16.0, 5.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-177.62815856933594, -174.0123748779297, -170.39659118652344, -166.7808074951172, -163.16502380371094, -159.5492401123047, -155.93345642089844, -152.3176727294922, -148.70188903808594, -145.0861053466797, -141.47032165527344, -137.8545379638672, -134.23875427246094, -130.6229705810547, -127.00718688964844, -123.39140319824219, -119.77562713623047, -116.15984344482422, -112.54405975341797, -108.92827606201172, -105.31249237060547, -101.69670867919922, -98.0809326171875, -94.46514892578125, -90.849365234375, -87.23358154296875, -83.6177978515625, -80.00201416015625, -76.38623046875, -72.77044677734375, -69.1546630859375, -65.53887939453125, -61.923099517822266, -58.307315826416016, -54.691532135009766, -51.07575225830078, -47.45996856689453, -43.84418487548828, -40.22840118408203, -36.61261749267578, -32.99683380126953, -29.38105010986328, -25.76526641845703, -22.149484634399414, -18.533700942993164, -14.917917251586914, -11.302135467529297, -7.686351776123047, -4.070568084716797, -0.4547848701477051, 3.1609983444213867, 6.77678108215332, 10.39256477355957, 14.00834846496582, 17.624130249023438, 21.239913940429688, 24.855697631835938, 28.471481323242188, 32.08726501464844, 35.70304870605469, 39.31883239746094, 42.93461608886719, 46.55039596557617, 50.16617965698242, 53.78196334838867]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 7.0, 11.0, 10.0, 16.0, 19.0, 21.0, 24.0, 30.0, 28.0, 31.0, 39.0, 36.0, 39.0, 38.0, 19.0, 45.0, 31.0, 35.0, 33.0, 37.0, 34.0, 46.0, 34.0, 28.0, 29.0, 32.0, 24.0, 22.0, 20.0, 22.0, 23.0, 15.0, 16.0, 12.0, 15.0, 7.0, 11.0, 7.0, 2.0, 6.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-54.229827880859375, -52.40794372558594, -50.586063385009766, -48.76417922973633, -46.942298889160156, -45.12041473388672, -43.29853057861328, -41.476646423339844, -39.65476608276367, -37.832881927490234, -36.01100158691406, -34.189117431640625, -32.36723327636719, -30.545352935791016, -28.723468780517578, -26.901586532592773, -25.07970428466797, -23.257822036743164, -21.43593978881836, -19.614055633544922, -17.792173385620117, -15.970291137695312, -14.148407936096191, -12.32652473449707, -10.504642486572266, -8.682760238647461, -6.86087703704834, -5.038994312286377, -3.217111587524414, -1.3952293395996094, 0.4266538619995117, 2.248537063598633, 4.0704193115234375, 5.8923020362854, 7.714184761047363, 9.536067962646484, 11.357950210571289, 13.179832458496094, 15.001715660095215, 16.823598861694336, 18.64548110961914, 20.467363357543945, 22.28924560546875, 24.111129760742188, 25.933012008666992, 27.754894256591797, 29.576778411865234, 31.39866065979004, 33.220542907714844, 35.04242706298828, 36.86430740356445, 38.68619155883789, 40.50807189941406, 42.3299560546875, 44.15184020996094, 45.973724365234375, 47.79560470581055, 49.617488861083984, 51.439369201660156, 53.261253356933594, 55.08313751220703, 56.9050178527832, 58.72690200805664, 60.54878234863281, 62.37066650390625]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 7.0, 9.0, 9.0, 9.0, 16.0, 10.0, 11.0, 10.0, 16.0, 14.0, 19.0, 24.0, 24.0, 23.0, 33.0, 30.0, 31.0, 31.0, 32.0, 43.0, 48.0, 40.0, 28.0, 43.0, 33.0, 40.0, 39.0, 28.0, 26.0, 22.0, 25.0, 26.0, 33.0, 27.0, 17.0, 17.0, 14.0, 21.0, 16.0, 9.0, 10.0, 8.0, 10.0, 4.0, 5.0, 2.0, 1.0, 6.0, 2.0, 3.0, 3.0, 2.0], "bins": [-10.4921875, -10.1881103515625, -9.884033203125, -9.5799560546875, -9.27587890625, -8.9718017578125, -8.667724609375, -8.3636474609375, -8.0595703125, -7.7554931640625, -7.451416015625, -7.1473388671875, -6.84326171875, -6.5391845703125, -6.235107421875, -5.9310302734375, -5.626953125, -5.3228759765625, -5.018798828125, -4.7147216796875, -4.41064453125, -4.1065673828125, -3.802490234375, -3.4984130859375, -3.1943359375, -2.8902587890625, -2.586181640625, -2.2821044921875, -1.97802734375, -1.6739501953125, -1.369873046875, -1.0657958984375, -0.76171875, -0.4576416015625, -0.153564453125, 0.1505126953125, 0.45458984375, 0.7586669921875, 1.062744140625, 1.3668212890625, 1.6708984375, 1.9749755859375, 2.279052734375, 2.5831298828125, 2.88720703125, 3.1912841796875, 3.495361328125, 3.7994384765625, 4.103515625, 4.4075927734375, 4.711669921875, 5.0157470703125, 5.31982421875, 5.6239013671875, 5.927978515625, 6.2320556640625, 6.5361328125, 6.8402099609375, 7.144287109375, 7.4483642578125, 7.75244140625, 8.0565185546875, 8.360595703125, 8.6646728515625, 8.96875]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 4.0, 7.0, 15.0, 14.0, 26.0, 62.0, 83.0, 148.0, 207.0, 341.0, 525.0, 861.0, 1491.0, 2168.0, 3548.0, 5709.0, 9201.0, 14746.0, 23382.0, 38279.0, 61422.0, 98758.0, 149034.0, 185885.0, 160084.0, 109767.0, 69978.0, 42758.0, 26571.0, 16438.0, 10179.0, 6227.0, 3896.0, 2525.0, 1637.0, 979.0, 582.0, 401.0, 237.0, 144.0, 80.0, 53.0, 33.0, 27.0, 25.0, 12.0, 7.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.95263671875, -0.9243011474609375, -0.895965576171875, -0.8676300048828125, -0.83929443359375, -0.8109588623046875, -0.782623291015625, -0.7542877197265625, -0.7259521484375, -0.6976165771484375, -0.669281005859375, -0.6409454345703125, -0.61260986328125, -0.5842742919921875, -0.555938720703125, -0.5276031494140625, -0.499267578125, -0.4709320068359375, -0.442596435546875, -0.4142608642578125, -0.38592529296875, -0.3575897216796875, -0.329254150390625, -0.3009185791015625, -0.2725830078125, -0.2442474365234375, -0.215911865234375, -0.1875762939453125, -0.15924072265625, -0.1309051513671875, -0.102569580078125, -0.0742340087890625, -0.0458984375, -0.0175628662109375, 0.010772705078125, 0.0391082763671875, 0.06744384765625, 0.0957794189453125, 0.124114990234375, 0.1524505615234375, 0.1807861328125, 0.2091217041015625, 0.237457275390625, 0.2657928466796875, 0.29412841796875, 0.3224639892578125, 0.350799560546875, 0.3791351318359375, 0.407470703125, 0.4358062744140625, 0.464141845703125, 0.4924774169921875, 0.52081298828125, 0.5491485595703125, 0.577484130859375, 0.6058197021484375, 0.6341552734375, 0.6624908447265625, 0.690826416015625, 0.7191619873046875, 0.74749755859375, 0.7758331298828125, 0.804168701171875, 0.8325042724609375, 0.86083984375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 9.0, 6.0, 14.0, 9.0, 11.0, 19.0, 16.0, 29.0, 13.0, 29.0, 28.0, 30.0, 37.0, 38.0, 41.0, 53.0, 33.0, 37.0, 1066.0, 45.0, 50.0, 56.0, 35.0, 61.0, 39.0, 23.0, 35.0, 28.0, 21.0, 22.0, 20.0, 17.0, 8.0, 8.0, 9.0, 6.0, 8.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.65234375, -6.40899658203125, -6.1656494140625, -5.92230224609375, -5.678955078125, -5.43560791015625, -5.1922607421875, -4.94891357421875, -4.70556640625, -4.46221923828125, -4.2188720703125, -3.97552490234375, -3.732177734375, -3.48883056640625, -3.2454833984375, -3.00213623046875, -2.7587890625, -2.51544189453125, -2.2720947265625, -2.02874755859375, -1.785400390625, -1.54205322265625, -1.2987060546875, -1.05535888671875, -0.81201171875, -0.56866455078125, -0.3253173828125, -0.08197021484375, 0.161376953125, 0.40472412109375, 0.6480712890625, 0.89141845703125, 1.134765625, 1.37811279296875, 1.6214599609375, 1.86480712890625, 2.108154296875, 2.35150146484375, 2.5948486328125, 2.83819580078125, 3.08154296875, 3.32489013671875, 3.5682373046875, 3.81158447265625, 4.054931640625, 4.29827880859375, 4.5416259765625, 4.78497314453125, 5.0283203125, 5.27166748046875, 5.5150146484375, 5.75836181640625, 6.001708984375, 6.24505615234375, 6.4884033203125, 6.73175048828125, 6.97509765625, 7.21844482421875, 7.4617919921875, 7.70513916015625, 7.948486328125, 8.19183349609375, 8.4351806640625, 8.67852783203125, 8.921875]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 6.0, 8.0, 15.0, 23.0, 35.0, 44.0, 68.0, 101.0, 158.0, 282.0, 455.0, 688.0, 1146.0, 1753.0, 2910.0, 4662.0, 7631.0, 12935.0, 21366.0, 36009.0, 59050.0, 94407.0, 142707.0, 1069644.0, 304076.0, 126247.0, 81954.0, 51015.0, 30837.0, 18244.0, 10993.0, 6767.0, 4101.0, 2563.0, 1596.0, 972.0, 594.0, 364.0, 254.0, 153.0, 106.0, 57.0, 46.0, 27.0, 27.0, 10.0, 7.0, 7.0, 6.0, 3.0, 6.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.658203125, -0.6381683349609375, -0.618133544921875, -0.5980987548828125, -0.57806396484375, -0.5580291748046875, -0.537994384765625, -0.5179595947265625, -0.4979248046875, -0.4778900146484375, -0.457855224609375, -0.4378204345703125, -0.41778564453125, -0.3977508544921875, -0.377716064453125, -0.3576812744140625, -0.337646484375, -0.3176116943359375, -0.297576904296875, -0.2775421142578125, -0.25750732421875, -0.2374725341796875, -0.217437744140625, -0.1974029541015625, -0.1773681640625, -0.1573333740234375, -0.137298583984375, -0.1172637939453125, -0.09722900390625, -0.0771942138671875, -0.057159423828125, -0.0371246337890625, -0.01708984375, 0.0029449462890625, 0.022979736328125, 0.0430145263671875, 0.06304931640625, 0.0830841064453125, 0.103118896484375, 0.1231536865234375, 0.1431884765625, 0.1632232666015625, 0.183258056640625, 0.2032928466796875, 0.22332763671875, 0.2433624267578125, 0.263397216796875, 0.2834320068359375, 0.303466796875, 0.3235015869140625, 0.343536376953125, 0.3635711669921875, 0.38360595703125, 0.4036407470703125, 0.423675537109375, 0.4437103271484375, 0.4637451171875, 0.4837799072265625, 0.503814697265625, 0.5238494873046875, 0.54388427734375, 0.5639190673828125, 0.583953857421875, 0.6039886474609375, 0.6240234375]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 4.0, 3.0, 3.0, 7.0, 9.0, 8.0, 13.0, 14.0, 15.0, 12.0, 21.0, 16.0, 17.0, 13.0, 19.0, 18.0, 22.0, 34.0, 64.0, 95.0, 113.0, 97.0, 104.0, 59.0, 36.0, 17.0, 28.0, 14.0, 21.0, 15.0, 11.0, 13.0, 11.0, 14.0, 11.0, 12.0, 7.0, 3.0, 4.0, 3.0, 3.0, 2.0, 2.0, 5.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.036102294921875, -0.0350041389465332, -0.033905982971191406, -0.03280782699584961, -0.03170967102050781, -0.030611515045166016, -0.02951335906982422, -0.028415203094482422, -0.027317047119140625, -0.026218891143798828, -0.02512073516845703, -0.024022579193115234, -0.022924423217773438, -0.02182626724243164, -0.020728111267089844, -0.019629955291748047, -0.01853179931640625, -0.017433643341064453, -0.016335487365722656, -0.01523733139038086, -0.014139175415039062, -0.013041019439697266, -0.011942863464355469, -0.010844707489013672, -0.009746551513671875, -0.008648395538330078, -0.007550239562988281, -0.006452083587646484, -0.0053539276123046875, -0.004255771636962891, -0.0031576156616210938, -0.002059459686279297, -0.0009613037109375, 0.00013685226440429688, 0.0012350082397460938, 0.0023331642150878906, 0.0034313201904296875, 0.004529476165771484, 0.005627632141113281, 0.006725788116455078, 0.007823944091796875, 0.008922100067138672, 0.010020256042480469, 0.011118412017822266, 0.012216567993164062, 0.01331472396850586, 0.014412879943847656, 0.015511035919189453, 0.01660919189453125, 0.017707347869873047, 0.018805503845214844, 0.01990365982055664, 0.021001815795898438, 0.022099971771240234, 0.02319812774658203, 0.024296283721923828, 0.025394439697265625, 0.026492595672607422, 0.02759075164794922, 0.028688907623291016, 0.029787063598632812, 0.03088521957397461, 0.031983375549316406, 0.0330815315246582, 0.0341796875]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 6.0, 1.0, 2.0, 1.0, 5.0, 3.0, 2.0, 6.0, 8.0, 11.0, 16.0, 12.0, 17.0, 27.0, 27.0, 26.0, 28.0, 34.0, 53.0, 77.0, 122.0, 257.0, 506.0, 2630.0, 966184.0, 76551.0, 944.0, 381.0, 173.0, 87.0, 67.0, 52.0, 45.0, 31.0, 26.0, 25.0, 32.0, 15.0, 19.0, 17.0, 9.0, 4.0, 8.0, 9.0, 4.0, 4.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.69384765625, -0.672027587890625, -0.65020751953125, -0.628387451171875, -0.6065673828125, -0.584747314453125, -0.56292724609375, -0.541107177734375, -0.519287109375, -0.497467041015625, -0.47564697265625, -0.453826904296875, -0.4320068359375, -0.410186767578125, -0.38836669921875, -0.366546630859375, -0.3447265625, -0.322906494140625, -0.30108642578125, -0.279266357421875, -0.2574462890625, -0.235626220703125, -0.21380615234375, -0.191986083984375, -0.170166015625, -0.148345947265625, -0.12652587890625, -0.104705810546875, -0.0828857421875, -0.061065673828125, -0.03924560546875, -0.017425537109375, 0.00439453125, 0.026214599609375, 0.04803466796875, 0.069854736328125, 0.0916748046875, 0.113494873046875, 0.13531494140625, 0.157135009765625, 0.178955078125, 0.200775146484375, 0.22259521484375, 0.244415283203125, 0.2662353515625, 0.288055419921875, 0.30987548828125, 0.331695556640625, 0.353515625, 0.375335693359375, 0.39715576171875, 0.418975830078125, 0.4407958984375, 0.462615966796875, 0.48443603515625, 0.506256103515625, 0.528076171875, 0.549896240234375, 0.57171630859375, 0.593536376953125, 0.6153564453125, 0.637176513671875, 0.65899658203125, 0.680816650390625, 0.70263671875]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1001.0, 11.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06556237488985062, -0.046912312507629395, -0.028262246400117874, -0.009612180292606354, 0.009037882089614868, 0.02768794447183609, 0.04633801430463791, 0.06498806923627853, 0.08363813906908035, 0.10228820145130157, 0.1209382712841034, 0.13958832621574402, 0.15823839604854584, 0.17688846588134766, 0.19553852081298828, 0.2141885757446289, 0.23283866047859192, 0.25148871541023254, 0.27013880014419556, 0.2887888550758362, 0.3074389100074768, 0.32608896493911743, 0.34473904967308044, 0.36338910460472107, 0.3820391595363617, 0.4006892144680023, 0.41933929920196533, 0.43798935413360596, 0.4566394090652466, 0.4752894639968872, 0.4939395487308502, 0.5125895738601685, 0.5312396883964539, 0.5498897433280945, 0.5685397982597351, 0.5871899127960205, 0.6058399677276611, 0.6244900226593018, 0.6431400775909424, 0.661790132522583, 0.6804401874542236, 0.6990902423858643, 0.7177402973175049, 0.7363903522491455, 0.7550404667854309, 0.7736905217170715, 0.7923405766487122, 0.8109906315803528, 0.8296407461166382, 0.8482908010482788, 0.8669408559799194, 0.8855909109115601, 0.9042410254478455, 0.9228910803794861, 0.9415411353111267, 0.9601911902427673, 0.978841245174408, 0.9974913001060486, 1.016141414642334, 1.0347914695739746, 1.0534415245056152, 1.0720915794372559, 1.0907416343688965, 1.109391689300537, 1.1280417442321777]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 5.0, 5.0, 12.0, 17.0, 22.0, 37.0, 44.0, 54.0, 68.0, 82.0, 84.0, 107.0, 86.0, 87.0, 75.0, 53.0, 53.0, 32.0, 37.0, 20.0, 10.0, 7.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05605518817901611, -0.054511141031980515, -0.052967093884944916, -0.051423050463199615, -0.04987900331616402, -0.04833495616912842, -0.04679091274738312, -0.04524686560034752, -0.04370281845331192, -0.04215877130627632, -0.04061472415924072, -0.03907068073749542, -0.037526633590459824, -0.035982586443424225, -0.034438543021678925, -0.032894495874643326, -0.03135044872760773, -0.02980640158057213, -0.02826235629618168, -0.02671831101179123, -0.02517426386475563, -0.023630216717720032, -0.022086171433329582, -0.020542126148939133, -0.018998079001903534, -0.017454031854867935, -0.015909986570477486, -0.014365940354764462, -0.012821894139051437, -0.011277847923338413, -0.009733801707625389, -0.008189755491912365, -0.006645709276199341, -0.005101663060486317, -0.0035576168447732925, -0.0020135706290602684, -0.00046952441334724426, 0.0010745218023657799, 0.002618568018078804, 0.004162614233791828, 0.005706660449504852, 0.0072507066652178764, 0.0087947528809309, 0.010338799096643925, 0.011882845312356949, 0.013426891528069973, 0.014970937743782997, 0.016514983028173447, 0.018059030175209045, 0.019603077322244644, 0.021147122606635094, 0.022691167891025543, 0.024235215038061142, 0.02577926218509674, 0.02732330746948719, 0.02886735275387764, 0.03041139990091324, 0.03195544704794884, 0.033499494194984436, 0.035043537616729736, 0.036587584763765335, 0.038131631910800934, 0.039675675332546234, 0.04121972247958183, 0.04276376962661743]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 7.0, 8.0, 10.0, 9.0, 16.0, 10.0, 12.0, 9.0, 16.0, 14.0, 20.0, 23.0, 24.0, 23.0, 33.0, 30.0, 32.0, 30.0, 32.0, 43.0, 48.0, 40.0, 28.0, 43.0, 33.0, 40.0, 39.0, 28.0, 26.0, 22.0, 25.0, 25.0, 34.0, 27.0, 16.0, 18.0, 14.0, 21.0, 16.0, 9.0, 10.0, 8.0, 11.0, 3.0, 5.0, 2.0, 1.0, 6.0, 2.0, 3.0, 3.0, 2.0], "bins": [-10.4921875, -10.1881103515625, -9.884033203125, -9.5799560546875, -9.27587890625, -8.9718017578125, -8.667724609375, -8.3636474609375, -8.0595703125, -7.7554931640625, -7.451416015625, -7.1473388671875, -6.84326171875, -6.5391845703125, -6.235107421875, -5.9310302734375, -5.626953125, -5.3228759765625, -5.018798828125, -4.7147216796875, -4.41064453125, -4.1065673828125, -3.802490234375, -3.4984130859375, -3.1943359375, -2.8902587890625, -2.586181640625, -2.2821044921875, -1.97802734375, -1.6739501953125, -1.369873046875, -1.0657958984375, -0.76171875, -0.4576416015625, -0.153564453125, 0.1505126953125, 0.45458984375, 0.7586669921875, 1.062744140625, 1.3668212890625, 1.6708984375, 1.9749755859375, 2.279052734375, 2.5831298828125, 2.88720703125, 3.1912841796875, 3.495361328125, 3.7994384765625, 4.103515625, 4.4075927734375, 4.711669921875, 5.0157470703125, 5.31982421875, 5.6239013671875, 5.927978515625, 6.2320556640625, 6.5361328125, 6.8402099609375, 7.144287109375, 7.4483642578125, 7.75244140625, 8.0565185546875, 8.360595703125, 8.6646728515625, 8.96875]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 5.0, 8.0, 13.0, 7.0, 12.0, 19.0, 16.0, 21.0, 25.0, 35.0, 51.0, 63.0, 99.0, 140.0, 176.0, 318.0, 537.0, 988.0, 1815.0, 3848.0, 9291.0, 29635.0, 121383.0, 663482.0, 160336.0, 36017.0, 11202.0, 4312.0, 1950.0, 1033.0, 597.0, 359.0, 208.0, 141.0, 102.0, 71.0, 55.0, 47.0, 35.0, 21.0, 17.0, 12.0, 14.0, 18.0, 5.0, 6.0, 2.0, 1.0, 6.0, 2.0, 3.0, 4.0, 1.0], "bins": [-18.546875, -18.010498046875, -17.47412109375, -16.937744140625, -16.4013671875, -15.864990234375, -15.32861328125, -14.792236328125, -14.255859375, -13.719482421875, -13.18310546875, -12.646728515625, -12.1103515625, -11.573974609375, -11.03759765625, -10.501220703125, -9.96484375, -9.428466796875, -8.89208984375, -8.355712890625, -7.8193359375, -7.282958984375, -6.74658203125, -6.210205078125, -5.673828125, -5.137451171875, -4.60107421875, -4.064697265625, -3.5283203125, -2.991943359375, -2.45556640625, -1.919189453125, -1.3828125, -0.846435546875, -0.31005859375, 0.226318359375, 0.7626953125, 1.299072265625, 1.83544921875, 2.371826171875, 2.908203125, 3.444580078125, 3.98095703125, 4.517333984375, 5.0537109375, 5.590087890625, 6.12646484375, 6.662841796875, 7.19921875, 7.735595703125, 8.27197265625, 8.808349609375, 9.3447265625, 9.881103515625, 10.41748046875, 10.953857421875, 11.490234375, 12.026611328125, 12.56298828125, 13.099365234375, 13.6357421875, 14.172119140625, 14.70849609375, 15.244873046875, 15.78125]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 7.0, 3.0, 8.0, 6.0, 12.0, 11.0, 7.0, 15.0, 22.0, 31.0, 20.0, 29.0, 33.0, 37.0, 33.0, 43.0, 50.0, 64.0, 122.0, 1729.0, 252.0, 81.0, 57.0, 44.0, 49.0, 45.0, 46.0, 28.0, 32.0, 26.0, 16.0, 23.0, 17.0, 11.0, 8.0, 9.0, 5.0, 6.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-34.84375, -33.6318359375, -32.419921875, -31.2080078125, -29.99609375, -28.7841796875, -27.572265625, -26.3603515625, -25.1484375, -23.9365234375, -22.724609375, -21.5126953125, -20.30078125, -19.0888671875, -17.876953125, -16.6650390625, -15.453125, -14.2412109375, -13.029296875, -11.8173828125, -10.60546875, -9.3935546875, -8.181640625, -6.9697265625, -5.7578125, -4.5458984375, -3.333984375, -2.1220703125, -0.91015625, 0.3017578125, 1.513671875, 2.7255859375, 3.9375, 5.1494140625, 6.361328125, 7.5732421875, 8.78515625, 9.9970703125, 11.208984375, 12.4208984375, 13.6328125, 14.8447265625, 16.056640625, 17.2685546875, 18.48046875, 19.6923828125, 20.904296875, 22.1162109375, 23.328125, 24.5400390625, 25.751953125, 26.9638671875, 28.17578125, 29.3876953125, 30.599609375, 31.8115234375, 33.0234375, 34.2353515625, 35.447265625, 36.6591796875, 37.87109375, 39.0830078125, 40.294921875, 41.5068359375, 42.71875]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 3.0, 0.0, 4.0, 3.0, 9.0, 3.0, 10.0, 7.0, 12.0, 10.0, 28.0, 20.0, 20.0, 37.0, 45.0, 54.0, 58.0, 102.0, 131.0, 229.0, 724.0, 4418.0, 1112331.0, 2021371.0, 4603.0, 730.0, 250.0, 113.0, 76.0, 70.0, 50.0, 32.0, 28.0, 24.0, 30.0, 15.0, 13.0, 6.0, 8.0, 7.0, 5.0, 7.0, 6.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-123.5625, -120.033203125, -116.50390625, -112.974609375, -109.4453125, -105.916015625, -102.38671875, -98.857421875, -95.328125, -91.798828125, -88.26953125, -84.740234375, -81.2109375, -77.681640625, -74.15234375, -70.623046875, -67.09375, -63.564453125, -60.03515625, -56.505859375, -52.9765625, -49.447265625, -45.91796875, -42.388671875, -38.859375, -35.330078125, -31.80078125, -28.271484375, -24.7421875, -21.212890625, -17.68359375, -14.154296875, -10.625, -7.095703125, -3.56640625, -0.037109375, 3.4921875, 7.021484375, 10.55078125, 14.080078125, 17.609375, 21.138671875, 24.66796875, 28.197265625, 31.7265625, 35.255859375, 38.78515625, 42.314453125, 45.84375, 49.373046875, 52.90234375, 56.431640625, 59.9609375, 63.490234375, 67.01953125, 70.548828125, 74.078125, 77.607421875, 81.13671875, 84.666015625, 88.1953125, 91.724609375, 95.25390625, 98.783203125, 102.3125]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [6.0, 24.0, 206.0, 567.0, 194.0, 20.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.761428833007812, -16.316307067871094, -9.871187210083008, -3.426067352294922, 3.019054412841797, 9.464176177978516, 15.909294128417969, 22.354415893554688, 28.799537658691406, 35.244659423828125, 41.689781188964844, 48.1348991394043, 54.580020904541016, 61.025142669677734, 67.47026062011719, 73.9153823852539, 80.36050415039062, 86.80562591552734, 93.25074768066406, 99.69586181640625, 106.1409912109375, 112.58610534667969, 119.0312271118164, 125.47634887695312, 131.92147827148438, 138.36659240722656, 144.8117218017578, 151.2568359375, 157.70196533203125, 164.14707946777344, 170.59219360351562, 177.03732299804688, 183.48245239257812, 189.9275665283203, 196.37269592285156, 202.81781005859375, 209.262939453125, 215.7080535888672, 222.15316772460938, 228.59829711914062, 235.04342651367188, 241.48854064941406, 247.9336700439453, 254.3787841796875, 260.82391357421875, 267.26904296875, 273.7141418457031, 280.1592712402344, 286.6043701171875, 293.04949951171875, 299.4945983886719, 305.9397277832031, 312.3848571777344, 318.8299865722656, 325.27508544921875, 331.72021484375, 338.16534423828125, 344.6104736328125, 351.0555725097656, 357.5007019042969, 363.9458312988281, 370.3909606933594, 376.8360595703125, 383.28118896484375, 389.726318359375]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 7.0, 3.0, 5.0, 5.0, 11.0, 12.0, 13.0, 15.0, 26.0, 17.0, 22.0, 32.0, 31.0, 36.0, 40.0, 44.0, 48.0, 41.0, 43.0, 47.0, 41.0, 38.0, 38.0, 44.0, 50.0, 38.0, 22.0, 27.0, 30.0, 32.0, 22.0, 23.0, 23.0, 14.0, 13.0, 11.0, 9.0, 11.0, 9.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-113.2823486328125, -109.82086181640625, -106.359375, -102.89788818359375, -99.4364013671875, -95.97491455078125, -92.513427734375, -89.05194091796875, -85.5904541015625, -82.12896728515625, -78.66748046875, -75.20599365234375, -71.7445068359375, -68.28302001953125, -64.821533203125, -61.360042572021484, -57.89855194091797, -54.43706512451172, -50.97557830810547, -47.51409149169922, -44.05260467529297, -40.59111785888672, -37.1296272277832, -33.66814041137695, -30.206653594970703, -26.745166778564453, -23.283679962158203, -19.82219123840332, -16.36070442199707, -12.89921760559082, -9.437728881835938, -5.9762420654296875, -2.5147628784179688, 0.9467244148254395, 4.408211708068848, 7.869699478149414, 11.331186294555664, 14.792673110961914, 18.254161834716797, 21.715648651123047, 25.177135467529297, 28.638622283935547, 32.1001091003418, 35.56159973144531, 39.02308654785156, 42.48457336425781, 45.94606018066406, 49.40754699707031, 52.86903381347656, 56.33052062988281, 59.79200744628906, 63.25349426269531, 66.71498107910156, 70.17646789550781, 73.63795471191406, 77.09944152832031, 80.56092834472656, 84.02241516113281, 87.48390197753906, 90.94538879394531, 94.40687561035156, 97.86836242675781, 101.32984924316406, 104.79133605957031, 108.2528305053711]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 4.0, 7.0, 11.0, 8.0, 5.0, 15.0, 11.0, 10.0, 11.0, 15.0, 22.0, 17.0, 24.0, 28.0, 29.0, 21.0, 24.0, 34.0, 37.0, 34.0, 26.0, 43.0, 33.0, 39.0, 39.0, 43.0, 32.0, 37.0, 35.0, 38.0, 20.0, 28.0, 21.0, 15.0, 28.0, 22.0, 16.0, 26.0, 17.0, 12.0, 12.0, 9.0, 6.0, 8.0, 6.0, 4.0, 6.0, 2.0, 4.0, 5.0, 2.0, 3.0, 0.0, 2.0], "bins": [-10.7890625, -10.4666748046875, -10.144287109375, -9.8218994140625, -9.49951171875, -9.1771240234375, -8.854736328125, -8.5323486328125, -8.2099609375, -7.8875732421875, -7.565185546875, -7.2427978515625, -6.92041015625, -6.5980224609375, -6.275634765625, -5.9532470703125, -5.630859375, -5.3084716796875, -4.986083984375, -4.6636962890625, -4.34130859375, -4.0189208984375, -3.696533203125, -3.3741455078125, -3.0517578125, -2.7293701171875, -2.406982421875, -2.0845947265625, -1.76220703125, -1.4398193359375, -1.117431640625, -0.7950439453125, -0.47265625, -0.1502685546875, 0.172119140625, 0.4945068359375, 0.81689453125, 1.1392822265625, 1.461669921875, 1.7840576171875, 2.1064453125, 2.4288330078125, 2.751220703125, 3.0736083984375, 3.39599609375, 3.7183837890625, 4.040771484375, 4.3631591796875, 4.685546875, 5.0079345703125, 5.330322265625, 5.6527099609375, 5.97509765625, 6.2974853515625, 6.619873046875, 6.9422607421875, 7.2646484375, 7.5870361328125, 7.909423828125, 8.2318115234375, 8.55419921875, 8.8765869140625, 9.198974609375, 9.5213623046875, 9.84375]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 3.0, 2.0, 2.0, 1.0, 4.0, 4.0, 6.0, 16.0, 14.0, 17.0, 20.0, 31.0, 41.0, 52.0, 61.0, 111.0, 142.0, 225.0, 344.0, 613.0, 1017.0, 1889.0, 3692.0, 7855.0, 18585.0, 54549.0, 198061.0, 691037.0, 1473925.0, 1156428.0, 415178.0, 112195.0, 33789.0, 12705.0, 5335.0, 2668.0, 1403.0, 850.0, 488.0, 304.0, 169.0, 110.0, 93.0, 74.0, 50.0, 34.0, 22.0, 27.0, 10.0, 13.0, 10.0, 5.0, 6.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-13.6875, -13.2568359375, -12.826171875, -12.3955078125, -11.96484375, -11.5341796875, -11.103515625, -10.6728515625, -10.2421875, -9.8115234375, -9.380859375, -8.9501953125, -8.51953125, -8.0888671875, -7.658203125, -7.2275390625, -6.796875, -6.3662109375, -5.935546875, -5.5048828125, -5.07421875, -4.6435546875, -4.212890625, -3.7822265625, -3.3515625, -2.9208984375, -2.490234375, -2.0595703125, -1.62890625, -1.1982421875, -0.767578125, -0.3369140625, 0.09375, 0.5244140625, 0.955078125, 1.3857421875, 1.81640625, 2.2470703125, 2.677734375, 3.1083984375, 3.5390625, 3.9697265625, 4.400390625, 4.8310546875, 5.26171875, 5.6923828125, 6.123046875, 6.5537109375, 6.984375, 7.4150390625, 7.845703125, 8.2763671875, 8.70703125, 9.1376953125, 9.568359375, 9.9990234375, 10.4296875, 10.8603515625, 11.291015625, 11.7216796875, 12.15234375, 12.5830078125, 13.013671875, 13.4443359375, 13.875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 7.0, 5.0, 4.0, 11.0, 10.0, 26.0, 25.0, 55.0, 63.0, 93.0, 115.0, 195.0, 286.0, 391.0, 529.0, 543.0, 483.0, 362.0, 282.0, 179.0, 144.0, 82.0, 62.0, 27.0, 33.0, 22.0, 13.0, 6.0, 4.0, 11.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.09375, -22.307373046875, -21.52099609375, -20.734619140625, -19.9482421875, -19.161865234375, -18.37548828125, -17.589111328125, -16.802734375, -16.016357421875, -15.22998046875, -14.443603515625, -13.6572265625, -12.870849609375, -12.08447265625, -11.298095703125, -10.51171875, -9.725341796875, -8.93896484375, -8.152587890625, -7.3662109375, -6.579833984375, -5.79345703125, -5.007080078125, -4.220703125, -3.434326171875, -2.64794921875, -1.861572265625, -1.0751953125, -0.288818359375, 0.49755859375, 1.283935546875, 2.0703125, 2.856689453125, 3.64306640625, 4.429443359375, 5.2158203125, 6.002197265625, 6.78857421875, 7.574951171875, 8.361328125, 9.147705078125, 9.93408203125, 10.720458984375, 11.5068359375, 12.293212890625, 13.07958984375, 13.865966796875, 14.65234375, 15.438720703125, 16.22509765625, 17.011474609375, 17.7978515625, 18.584228515625, 19.37060546875, 20.156982421875, 20.943359375, 21.729736328125, 22.51611328125, 23.302490234375, 24.0888671875, 24.875244140625, 25.66162109375, 26.447998046875, 27.234375]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 12.0, 11.0, 15.0, 20.0, 31.0, 41.0, 59.0, 94.0, 147.0, 226.0, 366.0, 726.0, 2537.0, 52496.0, 3884136.0, 246231.0, 4943.0, 969.0, 439.0, 269.0, 174.0, 97.0, 85.0, 55.0, 38.0, 23.0, 14.0, 10.0, 4.0, 5.0, 4.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.1875, -91.388671875, -88.58984375, -85.791015625, -82.9921875, -80.193359375, -77.39453125, -74.595703125, -71.796875, -68.998046875, -66.19921875, -63.400390625, -60.6015625, -57.802734375, -55.00390625, -52.205078125, -49.40625, -46.607421875, -43.80859375, -41.009765625, -38.2109375, -35.412109375, -32.61328125, -29.814453125, -27.015625, -24.216796875, -21.41796875, -18.619140625, -15.8203125, -13.021484375, -10.22265625, -7.423828125, -4.625, -1.826171875, 0.97265625, 3.771484375, 6.5703125, 9.369140625, 12.16796875, 14.966796875, 17.765625, 20.564453125, 23.36328125, 26.162109375, 28.9609375, 31.759765625, 34.55859375, 37.357421875, 40.15625, 42.955078125, 45.75390625, 48.552734375, 51.3515625, 54.150390625, 56.94921875, 59.748046875, 62.546875, 65.345703125, 68.14453125, 70.943359375, 73.7421875, 76.541015625, 79.33984375, 82.138671875, 84.9375]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 32.0, 103.0, 200.0, 301.0, 221.0, 115.0, 30.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-195.31478881835938, -187.926513671875, -180.5382537841797, -173.1499786376953, -165.76170349121094, -158.37344360351562, -150.98516845703125, -143.59689331054688, -136.20863342285156, -128.8203582763672, -121.43209075927734, -114.0438232421875, -106.65555572509766, -99.26728820800781, -91.87901306152344, -84.4907455444336, -77.10247039794922, -69.71420288085938, -62.325931549072266, -54.937660217285156, -47.54939270019531, -40.1611213684082, -32.772850036621094, -25.38458251953125, -17.99631118774414, -10.608041763305664, -3.219771385192871, 4.168498992919922, 11.556768417358398, 18.945037841796875, 26.333309173583984, 33.72157669067383, 41.10984802246094, 48.49811935424805, 55.88638687133789, 63.274658203125, 70.66292572021484, 78.05119323730469, 85.43946838378906, 92.8277359008789, 100.21600341796875, 107.6042709350586, 114.99254608154297, 122.38081359863281, 129.7690887451172, 137.1573486328125, 144.54562377929688, 151.93389892578125, 159.32217407226562, 166.71044921875, 174.0987091064453, 181.4869842529297, 188.87525939941406, 196.26351928710938, 203.65179443359375, 211.04006958007812, 218.42832946777344, 225.8166046142578, 233.20486450195312, 240.5931396484375, 247.98141479492188, 255.3696746826172, 262.7579345703125, 270.1462097167969, 277.53448486328125]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 3.0, 5.0, 5.0, 8.0, 10.0, 17.0, 19.0, 7.0, 14.0, 21.0, 31.0, 26.0, 35.0, 31.0, 28.0, 46.0, 38.0, 37.0, 39.0, 44.0, 45.0, 40.0, 52.0, 50.0, 30.0, 26.0, 42.0, 30.0, 31.0, 23.0, 28.0, 26.0, 14.0, 18.0, 18.0, 14.0, 12.0, 15.0, 5.0, 5.0, 5.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-60.4449462890625, -58.251590728759766, -56.05823516845703, -53.8648796081543, -51.67152404785156, -49.478172302246094, -47.28481674194336, -45.091461181640625, -42.89810562133789, -40.704750061035156, -38.51139450073242, -36.31803894042969, -34.12468719482422, -31.93132972717285, -29.73797607421875, -27.544620513916016, -25.35126495361328, -23.157909393310547, -20.964553833007812, -18.77120018005371, -16.577844619750977, -14.384489059448242, -12.191134452819824, -9.997779846191406, -7.804424285888672, -5.611069202423096, -3.4177141189575195, -1.2243590354919434, 0.9689960479736328, 3.162351608276367, 5.355706214904785, 7.549060821533203, 9.742416381835938, 11.935771942138672, 14.12912654876709, 16.322481155395508, 18.515836715698242, 20.709192276000977, 22.902545928955078, 25.095901489257812, 27.289257049560547, 29.48261260986328, 31.675968170166016, 33.86932373046875, 36.06267547607422, 38.25603485107422, 40.44938659667969, 42.64274215698242, 44.836097717285156, 47.02945327758789, 49.222808837890625, 51.41616439819336, 53.609519958496094, 55.80287170410156, 57.9962272644043, 60.18958282470703, 62.382938385009766, 64.5762939453125, 66.76964569091797, 68.96300506591797, 71.15635681152344, 73.34971618652344, 75.5430679321289, 77.73641967773438, 79.92977905273438]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 6.0, 1.0, 6.0, 5.0, 4.0, 9.0, 11.0, 11.0, 7.0, 14.0, 15.0, 25.0, 23.0, 20.0, 22.0, 30.0, 31.0, 30.0, 42.0, 41.0, 36.0, 35.0, 37.0, 43.0, 40.0, 42.0, 50.0, 29.0, 40.0, 33.0, 36.0, 21.0, 19.0, 22.0, 16.0, 24.0, 19.0, 18.0, 20.0, 20.0, 11.0, 4.0, 9.0, 10.0, 4.0, 5.0, 2.0, 3.0, 0.0, 3.0, 3.0, 1.0, 1.0], "bins": [-11.7421875, -11.4044189453125, -11.066650390625, -10.7288818359375, -10.39111328125, -10.0533447265625, -9.715576171875, -9.3778076171875, -9.0400390625, -8.7022705078125, -8.364501953125, -8.0267333984375, -7.68896484375, -7.3511962890625, -7.013427734375, -6.6756591796875, -6.337890625, -6.0001220703125, -5.662353515625, -5.3245849609375, -4.98681640625, -4.6490478515625, -4.311279296875, -3.9735107421875, -3.6357421875, -3.2979736328125, -2.960205078125, -2.6224365234375, -2.28466796875, -1.9468994140625, -1.609130859375, -1.2713623046875, -0.93359375, -0.5958251953125, -0.258056640625, 0.0797119140625, 0.41748046875, 0.7552490234375, 1.093017578125, 1.4307861328125, 1.7685546875, 2.1063232421875, 2.444091796875, 2.7818603515625, 3.11962890625, 3.4573974609375, 3.795166015625, 4.1329345703125, 4.470703125, 4.8084716796875, 5.146240234375, 5.4840087890625, 5.82177734375, 6.1595458984375, 6.497314453125, 6.8350830078125, 7.1728515625, 7.5106201171875, 7.848388671875, 8.1861572265625, 8.52392578125, 8.8616943359375, 9.199462890625, 9.5372314453125, 9.875]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 5.0, 5.0, 7.0, 21.0, 15.0, 20.0, 37.0, 48.0, 73.0, 127.0, 152.0, 223.0, 332.0, 524.0, 744.0, 1120.0, 1583.0, 2354.0, 3309.0, 4829.0, 7164.0, 10470.0, 15703.0, 23708.0, 35778.0, 55189.0, 83800.0, 120954.0, 155934.0, 155330.0, 121076.0, 84124.0, 54938.0, 35896.0, 23955.0, 15697.0, 10547.0, 7028.0, 4962.0, 3288.0, 2351.0, 1652.0, 1107.0, 735.0, 494.0, 374.0, 260.0, 147.0, 137.0, 72.0, 55.0, 37.0, 27.0, 21.0, 14.0, 11.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.72900390625, -0.7054901123046875, -0.681976318359375, -0.6584625244140625, -0.63494873046875, -0.6114349365234375, -0.587921142578125, -0.5644073486328125, -0.5408935546875, -0.5173797607421875, -0.493865966796875, -0.4703521728515625, -0.44683837890625, -0.4233245849609375, -0.399810791015625, -0.3762969970703125, -0.352783203125, -0.3292694091796875, -0.305755615234375, -0.2822418212890625, -0.25872802734375, -0.2352142333984375, -0.211700439453125, -0.1881866455078125, -0.1646728515625, -0.1411590576171875, -0.117645263671875, -0.0941314697265625, -0.07061767578125, -0.0471038818359375, -0.023590087890625, -7.62939453125e-05, 0.0234375, 0.0469512939453125, 0.070465087890625, 0.0939788818359375, 0.11749267578125, 0.1410064697265625, 0.164520263671875, 0.1880340576171875, 0.2115478515625, 0.2350616455078125, 0.258575439453125, 0.2820892333984375, 0.30560302734375, 0.3291168212890625, 0.352630615234375, 0.3761444091796875, 0.399658203125, 0.4231719970703125, 0.446685791015625, 0.4701995849609375, 0.49371337890625, 0.5172271728515625, 0.540740966796875, 0.5642547607421875, 0.5877685546875, 0.6112823486328125, 0.634796142578125, 0.6583099365234375, 0.68182373046875, 0.7053375244140625, 0.728851318359375, 0.7523651123046875, 0.77587890625]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 4.0, 7.0, 4.0, 9.0, 10.0, 15.0, 8.0, 20.0, 19.0, 29.0, 27.0, 26.0, 34.0, 36.0, 19.0, 41.0, 55.0, 45.0, 28.0, 56.0, 1070.0, 40.0, 54.0, 44.0, 44.0, 37.0, 38.0, 32.0, 20.0, 18.0, 32.0, 13.0, 17.0, 23.0, 10.0, 9.0, 7.0, 13.0, 3.0, 1.0, 4.0, 3.0, 2.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0, -6.74560546875, -6.4912109375, -6.23681640625, -5.982421875, -5.72802734375, -5.4736328125, -5.21923828125, -4.96484375, -4.71044921875, -4.4560546875, -4.20166015625, -3.947265625, -3.69287109375, -3.4384765625, -3.18408203125, -2.9296875, -2.67529296875, -2.4208984375, -2.16650390625, -1.912109375, -1.65771484375, -1.4033203125, -1.14892578125, -0.89453125, -0.64013671875, -0.3857421875, -0.13134765625, 0.123046875, 0.37744140625, 0.6318359375, 0.88623046875, 1.140625, 1.39501953125, 1.6494140625, 1.90380859375, 2.158203125, 2.41259765625, 2.6669921875, 2.92138671875, 3.17578125, 3.43017578125, 3.6845703125, 3.93896484375, 4.193359375, 4.44775390625, 4.7021484375, 4.95654296875, 5.2109375, 5.46533203125, 5.7197265625, 5.97412109375, 6.228515625, 6.48291015625, 6.7373046875, 6.99169921875, 7.24609375, 7.50048828125, 7.7548828125, 8.00927734375, 8.263671875, 8.51806640625, 8.7724609375, 9.02685546875, 9.28125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 7.0, 7.0, 20.0, 22.0, 37.0, 55.0, 60.0, 108.0, 174.0, 270.0, 415.0, 589.0, 920.0, 1438.0, 2286.0, 3963.0, 6298.0, 10375.0, 17441.0, 28710.0, 46950.0, 74715.0, 111638.0, 170529.0, 1186586.0, 150491.0, 104453.0, 68432.0, 43180.0, 26064.0, 15918.0, 9740.0, 5863.0, 3580.0, 2132.0, 1311.0, 854.0, 496.0, 332.0, 251.0, 125.0, 80.0, 74.0, 47.0, 23.0, 30.0, 16.0, 9.0, 4.0, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0], "bins": [-0.650390625, -0.63092041015625, -0.6114501953125, -0.59197998046875, -0.572509765625, -0.55303955078125, -0.5335693359375, -0.51409912109375, -0.49462890625, -0.47515869140625, -0.4556884765625, -0.43621826171875, -0.416748046875, -0.39727783203125, -0.3778076171875, -0.35833740234375, -0.3388671875, -0.31939697265625, -0.2999267578125, -0.28045654296875, -0.260986328125, -0.24151611328125, -0.2220458984375, -0.20257568359375, -0.18310546875, -0.16363525390625, -0.1441650390625, -0.12469482421875, -0.105224609375, -0.08575439453125, -0.0662841796875, -0.04681396484375, -0.02734375, -0.00787353515625, 0.0115966796875, 0.03106689453125, 0.050537109375, 0.07000732421875, 0.0894775390625, 0.10894775390625, 0.12841796875, 0.14788818359375, 0.1673583984375, 0.18682861328125, 0.206298828125, 0.22576904296875, 0.2452392578125, 0.26470947265625, 0.2841796875, 0.30364990234375, 0.3231201171875, 0.34259033203125, 0.362060546875, 0.38153076171875, 0.4010009765625, 0.42047119140625, 0.43994140625, 0.45941162109375, 0.4788818359375, 0.49835205078125, 0.517822265625, 0.53729248046875, 0.5567626953125, 0.57623291015625, 0.595703125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 4.0, 0.0, 4.0, 6.0, 5.0, 10.0, 9.0, 11.0, 3.0, 15.0, 18.0, 17.0, 21.0, 25.0, 42.0, 62.0, 76.0, 75.0, 105.0, 107.0, 98.0, 72.0, 42.0, 40.0, 21.0, 25.0, 22.0, 14.0, 12.0, 7.0, 10.0, 4.0, 9.0, 2.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0293731689453125, -0.028477907180786133, -0.027582645416259766, -0.0266873836517334, -0.02579212188720703, -0.024896860122680664, -0.024001598358154297, -0.02310633659362793, -0.022211074829101562, -0.021315813064575195, -0.020420551300048828, -0.01952528953552246, -0.018630027770996094, -0.017734766006469727, -0.01683950424194336, -0.015944242477416992, -0.015048980712890625, -0.014153718948364258, -0.01325845718383789, -0.012363195419311523, -0.011467933654785156, -0.010572671890258789, -0.009677410125732422, -0.008782148361206055, -0.007886886596679688, -0.00699162483215332, -0.006096363067626953, -0.005201101303100586, -0.004305839538574219, -0.0034105777740478516, -0.0025153160095214844, -0.0016200542449951172, -0.00072479248046875, 0.0001704692840576172, 0.0010657310485839844, 0.0019609928131103516, 0.0028562545776367188, 0.003751516342163086, 0.004646778106689453, 0.00554203987121582, 0.0064373016357421875, 0.007332563400268555, 0.008227825164794922, 0.009123086929321289, 0.010018348693847656, 0.010913610458374023, 0.01180887222290039, 0.012704133987426758, 0.013599395751953125, 0.014494657516479492, 0.01538991928100586, 0.016285181045532227, 0.017180442810058594, 0.01807570457458496, 0.018970966339111328, 0.019866228103637695, 0.020761489868164062, 0.02165675163269043, 0.022552013397216797, 0.023447275161743164, 0.02434253692626953, 0.0252377986907959, 0.026133060455322266, 0.027028322219848633, 0.027923583984375]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 6.0, 4.0, 3.0, 6.0, 13.0, 10.0, 15.0, 12.0, 25.0, 34.0, 42.0, 43.0, 79.0, 119.0, 224.0, 574.0, 18072.0, 1023958.0, 4301.0, 389.0, 219.0, 125.0, 76.0, 39.0, 23.0, 40.0, 19.0, 13.0, 13.0, 11.0, 15.0, 10.0, 8.0, 6.0, 5.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5732421875, -0.5543746948242188, -0.5355072021484375, -0.5166397094726562, -0.497772216796875, -0.47890472412109375, -0.4600372314453125, -0.44116973876953125, -0.42230224609375, -0.40343475341796875, -0.3845672607421875, -0.36569976806640625, -0.346832275390625, -0.32796478271484375, -0.3090972900390625, -0.29022979736328125, -0.2713623046875, -0.25249481201171875, -0.2336273193359375, -0.21475982666015625, -0.195892333984375, -0.17702484130859375, -0.1581573486328125, -0.13928985595703125, -0.12042236328125, -0.10155487060546875, -0.0826873779296875, -0.06381988525390625, -0.044952392578125, -0.02608489990234375, -0.0072174072265625, 0.01165008544921875, 0.030517578125, 0.04938507080078125, 0.0682525634765625, 0.08712005615234375, 0.105987548828125, 0.12485504150390625, 0.1437225341796875, 0.16259002685546875, 0.18145751953125, 0.20032501220703125, 0.2191925048828125, 0.23805999755859375, 0.256927490234375, 0.27579498291015625, 0.2946624755859375, 0.31352996826171875, 0.3323974609375, 0.35126495361328125, 0.3701324462890625, 0.38899993896484375, 0.407867431640625, 0.42673492431640625, 0.4456024169921875, 0.46446990966796875, 0.48333740234375, 0.5022048950195312, 0.5210723876953125, 0.5399398803710938, 0.558807373046875, 0.5776748657226562, 0.5965423583984375, 0.6154098510742188, 0.63427734375]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 218.0, 775.0, 20.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.041298989206552505, -0.03557164967060089, -0.02984430640935898, -0.024116966873407364, -0.0183896254748106, -0.012662284076213837, -0.006934944540262222, -0.0012076012790203094, 0.004519738256931305, 0.010247079655528069, 0.015974421054124832, 0.021701760590076447, 0.02742910198867321, 0.033156443387269974, 0.03888378292322159, 0.0446111261844635, 0.050338465720415115, 0.05606580525636673, 0.06179314851760864, 0.06752048432826996, 0.07324782758951187, 0.07897517085075378, 0.0847025066614151, 0.09042985737323761, 0.09615719318389893, 0.10188453644514084, 0.10761187225580215, 0.11333921551704407, 0.11906655877828598, 0.1247939020395279, 0.1305212378501892, 0.13624858856201172, 0.14197592437267303, 0.14770326018333435, 0.15343061089515686, 0.15915794670581818, 0.1648852825164795, 0.170612633228302, 0.17633996903896332, 0.18206730484962463, 0.18779465556144714, 0.19352199137210846, 0.19924934208393097, 0.20497667789459229, 0.2107040137052536, 0.2164313644170761, 0.22215870022773743, 0.22788605093955994, 0.23361337184906006, 0.23934070765972137, 0.24506805837154388, 0.2507953941822052, 0.2565227448940277, 0.26225006580352783, 0.26797741651535034, 0.27370476722717285, 0.27943211793899536, 0.28515946865081787, 0.290886789560318, 0.2966141402721405, 0.302341490983963, 0.30806881189346313, 0.31379616260528564, 0.31952351331710815, 0.3252508342266083]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 7.0, 4.0, 7.0, 8.0, 12.0, 13.0, 17.0, 19.0, 33.0, 28.0, 34.0, 51.0, 46.0, 53.0, 56.0, 57.0, 60.0, 53.0, 66.0, 51.0, 55.0, 54.0, 53.0, 34.0, 37.0, 25.0, 23.0, 13.0, 14.0, 9.0, 9.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.02470952272415161, -0.02404044382274151, -0.023371364921331406, -0.022702286019921303, -0.0220332071185112, -0.021364128217101097, -0.020695049315690994, -0.02002597041428089, -0.01935689151287079, -0.018687812611460686, -0.018018733710050583, -0.01734965480864048, -0.016680575907230377, -0.016011497005820274, -0.015342418104410172, -0.014673339203000069, -0.014004260301589966, -0.013335181400179863, -0.01266610249876976, -0.011997023597359657, -0.011327944695949554, -0.010658865794539452, -0.009989786893129349, -0.009320707991719246, -0.008651629090309143, -0.00798255018889904, -0.007313471287488937, -0.0066443923860788345, -0.005975313484668732, -0.005306234583258629, -0.004637155681848526, -0.003968076780438423, -0.0032989978790283203, -0.0026299189776182175, -0.0019608400762081146, -0.0012917611747980118, -0.0006226822733879089, 4.639662802219391e-05, 0.0007154755294322968, 0.0013845544308423996, 0.0020536333322525024, 0.0027227122336626053, 0.003391791135072708, 0.004060870036482811, 0.004729948937892914, 0.005399027839303017, 0.0060681067407131195, 0.006737185642123222, 0.007406264543533325, 0.008075343444943428, 0.008744422346353531, 0.009413501247763634, 0.010082580149173737, 0.01075165905058384, 0.011420737951993942, 0.012089816853404045, 0.012758895754814148, 0.01342797465622425, 0.014097053557634354, 0.014766132459044456, 0.01543521136045456, 0.016104290261864662, 0.016773369163274765, 0.017442448064684868, 0.01811152696609497]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 6.0, 1.0, 6.0, 5.0, 4.0, 9.0, 11.0, 11.0, 7.0, 14.0, 15.0, 25.0, 23.0, 20.0, 22.0, 30.0, 31.0, 30.0, 42.0, 41.0, 36.0, 35.0, 37.0, 43.0, 40.0, 42.0, 50.0, 29.0, 40.0, 33.0, 36.0, 19.0, 20.0, 22.0, 17.0, 24.0, 18.0, 19.0, 20.0, 20.0, 11.0, 4.0, 9.0, 10.0, 4.0, 5.0, 2.0, 3.0, 0.0, 3.0, 3.0, 1.0, 1.0], "bins": [-11.7421875, -11.4044189453125, -11.066650390625, -10.7288818359375, -10.39111328125, -10.0533447265625, -9.715576171875, -9.3778076171875, -9.0400390625, -8.7022705078125, -8.364501953125, -8.0267333984375, -7.68896484375, -7.3511962890625, -7.013427734375, -6.6756591796875, -6.337890625, -6.0001220703125, -5.662353515625, -5.3245849609375, -4.98681640625, -4.6490478515625, -4.311279296875, -3.9735107421875, -3.6357421875, -3.2979736328125, -2.960205078125, -2.6224365234375, -2.28466796875, -1.9468994140625, -1.609130859375, -1.2713623046875, -0.93359375, -0.5958251953125, -0.258056640625, 0.0797119140625, 0.41748046875, 0.7552490234375, 1.093017578125, 1.4307861328125, 1.7685546875, 2.1063232421875, 2.444091796875, 2.7818603515625, 3.11962890625, 3.4573974609375, 3.795166015625, 4.1329345703125, 4.470703125, 4.8084716796875, 5.146240234375, 5.4840087890625, 5.82177734375, 6.1595458984375, 6.497314453125, 6.8350830078125, 7.1728515625, 7.5106201171875, 7.848388671875, 8.1861572265625, 8.52392578125, 8.8616943359375, 9.199462890625, 9.5372314453125, 9.875]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 2.0, 7.0, 5.0, 5.0, 8.0, 9.0, 9.0, 14.0, 27.0, 19.0, 51.0, 66.0, 116.0, 189.0, 267.0, 438.0, 786.0, 1343.0, 2528.0, 4683.0, 8834.0, 17090.0, 35437.0, 81461.0, 218451.0, 375839.0, 172606.0, 66879.0, 30055.0, 14734.0, 7369.0, 3939.0, 2171.0, 1204.0, 736.0, 416.0, 258.0, 177.0, 106.0, 71.0, 49.0, 35.0, 12.0, 16.0, 12.0, 10.0, 5.0, 5.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-9.8828125, -9.5867919921875, -9.290771484375, -8.9947509765625, -8.69873046875, -8.4027099609375, -8.106689453125, -7.8106689453125, -7.5146484375, -7.2186279296875, -6.922607421875, -6.6265869140625, -6.33056640625, -6.0345458984375, -5.738525390625, -5.4425048828125, -5.146484375, -4.8504638671875, -4.554443359375, -4.2584228515625, -3.96240234375, -3.6663818359375, -3.370361328125, -3.0743408203125, -2.7783203125, -2.4822998046875, -2.186279296875, -1.8902587890625, -1.59423828125, -1.2982177734375, -1.002197265625, -0.7061767578125, -0.41015625, -0.1141357421875, 0.181884765625, 0.4779052734375, 0.77392578125, 1.0699462890625, 1.365966796875, 1.6619873046875, 1.9580078125, 2.2540283203125, 2.550048828125, 2.8460693359375, 3.14208984375, 3.4381103515625, 3.734130859375, 4.0301513671875, 4.326171875, 4.6221923828125, 4.918212890625, 5.2142333984375, 5.51025390625, 5.8062744140625, 6.102294921875, 6.3983154296875, 6.6943359375, 6.9903564453125, 7.286376953125, 7.5823974609375, 7.87841796875, 8.1744384765625, 8.470458984375, 8.7664794921875, 9.0625]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 2.0, 7.0, 11.0, 12.0, 14.0, 10.0, 9.0, 15.0, 21.0, 36.0, 25.0, 32.0, 28.0, 35.0, 41.0, 44.0, 60.0, 74.0, 154.0, 1547.0, 341.0, 117.0, 57.0, 38.0, 34.0, 36.0, 34.0, 47.0, 20.0, 20.0, 27.0, 9.0, 20.0, 15.0, 10.0, 11.0, 10.0, 5.0, 3.0, 8.0, 6.0, 0.0, 2.0, 2.0, 1.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-35.4375, -34.28857421875, -33.1396484375, -31.99072265625, -30.841796875, -29.69287109375, -28.5439453125, -27.39501953125, -26.24609375, -25.09716796875, -23.9482421875, -22.79931640625, -21.650390625, -20.50146484375, -19.3525390625, -18.20361328125, -17.0546875, -15.90576171875, -14.7568359375, -13.60791015625, -12.458984375, -11.31005859375, -10.1611328125, -9.01220703125, -7.86328125, -6.71435546875, -5.5654296875, -4.41650390625, -3.267578125, -2.11865234375, -0.9697265625, 0.17919921875, 1.328125, 2.47705078125, 3.6259765625, 4.77490234375, 5.923828125, 7.07275390625, 8.2216796875, 9.37060546875, 10.51953125, 11.66845703125, 12.8173828125, 13.96630859375, 15.115234375, 16.26416015625, 17.4130859375, 18.56201171875, 19.7109375, 20.85986328125, 22.0087890625, 23.15771484375, 24.306640625, 25.45556640625, 26.6044921875, 27.75341796875, 28.90234375, 30.05126953125, 31.2001953125, 32.34912109375, 33.498046875, 34.64697265625, 35.7958984375, 36.94482421875, 38.09375]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 2.0, 5.0, 6.0, 9.0, 8.0, 19.0, 23.0, 13.0, 20.0, 34.0, 29.0, 32.0, 71.0, 82.0, 115.0, 123.0, 191.0, 285.0, 524.0, 1928.0, 37743.0, 2987917.0, 110950.0, 3619.0, 696.0, 381.0, 229.0, 146.0, 121.0, 83.0, 72.0, 56.0, 35.0, 32.0, 17.0, 19.0, 17.0, 13.0, 15.0, 7.0, 6.0, 3.0, 4.0, 2.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-61.78125, -59.8876953125, -57.994140625, -56.1005859375, -54.20703125, -52.3134765625, -50.419921875, -48.5263671875, -46.6328125, -44.7392578125, -42.845703125, -40.9521484375, -39.05859375, -37.1650390625, -35.271484375, -33.3779296875, -31.484375, -29.5908203125, -27.697265625, -25.8037109375, -23.91015625, -22.0166015625, -20.123046875, -18.2294921875, -16.3359375, -14.4423828125, -12.548828125, -10.6552734375, -8.76171875, -6.8681640625, -4.974609375, -3.0810546875, -1.1875, 0.7060546875, 2.599609375, 4.4931640625, 6.38671875, 8.2802734375, 10.173828125, 12.0673828125, 13.9609375, 15.8544921875, 17.748046875, 19.6416015625, 21.53515625, 23.4287109375, 25.322265625, 27.2158203125, 29.109375, 31.0029296875, 32.896484375, 34.7900390625, 36.68359375, 38.5771484375, 40.470703125, 42.3642578125, 44.2578125, 46.1513671875, 48.044921875, 49.9384765625, 51.83203125, 53.7255859375, 55.619140625, 57.5126953125, 59.40625]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 34.0, 516.0, 441.0, 26.0, 1.0, 0.0, 1.0], "bins": [-482.9097595214844, -474.7137756347656, -466.5177917480469, -458.3218078613281, -450.1258239746094, -441.9298400878906, -433.7338562011719, -425.537841796875, -417.34185791015625, -409.1458740234375, -400.94989013671875, -392.75390625, -384.55792236328125, -376.3619384765625, -368.16595458984375, -359.969970703125, -351.77398681640625, -343.5780029296875, -335.38201904296875, -327.18603515625, -318.99005126953125, -310.7940673828125, -302.59808349609375, -294.402099609375, -286.20611572265625, -278.0101318359375, -269.81414794921875, -261.6181640625, -253.42218017578125, -245.2261962890625, -237.0301971435547, -228.83421325683594, -220.63818359375, -212.44219970703125, -204.2462158203125, -196.05023193359375, -187.854248046875, -179.65826416015625, -171.46226501464844, -163.2662811279297, -155.07029724121094, -146.8743133544922, -138.67832946777344, -130.4823455810547, -122.2863540649414, -114.09037017822266, -105.89437866210938, -97.69839477539062, -89.50241088867188, -81.30642700195312, -73.11044311523438, -64.9144515991211, -56.718467712402344, -48.522483825683594, -40.32649612426758, -32.13050842285156, -23.934524536132812, -15.73853874206543, -7.542552947998047, 0.6534328460693359, 8.849418640136719, 17.04540252685547, 25.241390228271484, 33.4373779296875, 41.63336181640625]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 4.0, 6.0, 12.0, 13.0, 17.0, 16.0, 17.0, 18.0, 28.0, 32.0, 20.0, 39.0, 34.0, 38.0, 42.0, 36.0, 51.0, 41.0, 41.0, 35.0, 44.0, 42.0, 41.0, 41.0, 45.0, 39.0, 24.0, 24.0, 25.0, 22.0, 24.0, 17.0, 11.0, 12.0, 11.0, 8.0, 6.0, 3.0, 4.0, 7.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.49349975585938, -81.39781188964844, -78.30211639404297, -75.20642852783203, -72.1107406616211, -69.01504516601562, -65.91935729980469, -62.82366943359375, -59.72797775268555, -56.632286071777344, -53.536598205566406, -50.4409065246582, -47.34521484375, -44.24952697753906, -41.15383529663086, -38.058143615722656, -34.96245574951172, -31.86676597595215, -28.771076202392578, -25.675384521484375, -22.579694747924805, -19.484004974365234, -16.38831329345703, -13.292623519897461, -10.19693374633789, -7.101243495941162, -4.005553245544434, -0.9098625183105469, 2.1858272552490234, 5.281517028808594, 8.377208709716797, 11.472898483276367, 14.568588256835938, 17.664278030395508, 20.759967803955078, 23.85565948486328, 26.95134925842285, 30.047039031982422, 33.142730712890625, 36.23841857910156, 39.334110260009766, 42.42980194091797, 45.525489807128906, 48.62118148803711, 51.71687316894531, 54.81256103515625, 57.90825271606445, 61.003944396972656, 64.0996322631836, 67.19532012939453, 70.291015625, 73.38670349121094, 76.48239135742188, 79.57807922363281, 82.67377471923828, 85.76946258544922, 88.86515808105469, 91.96084594726562, 95.0565414428711, 98.15222930908203, 101.24791717529297, 104.34361267089844, 107.43930053710938, 110.53498840332031, 113.63067626953125]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 9.0, 1.0, 5.0, 5.0, 14.0, 13.0, 18.0, 19.0, 25.0, 13.0, 18.0, 23.0, 23.0, 26.0, 42.0, 32.0, 37.0, 48.0, 37.0, 37.0, 44.0, 42.0, 50.0, 43.0, 39.0, 37.0, 36.0, 29.0, 31.0, 20.0, 24.0, 18.0, 22.0, 23.0, 13.0, 22.0, 10.0, 12.0, 7.0, 7.0, 4.0, 6.0, 3.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-12.2734375, -11.921142578125, -11.56884765625, -11.216552734375, -10.8642578125, -10.511962890625, -10.15966796875, -9.807373046875, -9.455078125, -9.102783203125, -8.75048828125, -8.398193359375, -8.0458984375, -7.693603515625, -7.34130859375, -6.989013671875, -6.63671875, -6.284423828125, -5.93212890625, -5.579833984375, -5.2275390625, -4.875244140625, -4.52294921875, -4.170654296875, -3.818359375, -3.466064453125, -3.11376953125, -2.761474609375, -2.4091796875, -2.056884765625, -1.70458984375, -1.352294921875, -1.0, -0.647705078125, -0.29541015625, 0.056884765625, 0.4091796875, 0.761474609375, 1.11376953125, 1.466064453125, 1.818359375, 2.170654296875, 2.52294921875, 2.875244140625, 3.2275390625, 3.579833984375, 3.93212890625, 4.284423828125, 4.63671875, 4.989013671875, 5.34130859375, 5.693603515625, 6.0458984375, 6.398193359375, 6.75048828125, 7.102783203125, 7.455078125, 7.807373046875, 8.15966796875, 8.511962890625, 8.8642578125, 9.216552734375, 9.56884765625, 9.921142578125, 10.2734375]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 1.0, 2.0, 4.0, 5.0, 13.0, 5.0, 12.0, 9.0, 21.0, 26.0, 25.0, 29.0, 45.0, 49.0, 54.0, 112.0, 174.0, 383.0, 887.0, 2540.0, 9826.0, 55300.0, 546595.0, 2784951.0, 708648.0, 68423.0, 11345.0, 2812.0, 959.0, 410.0, 195.0, 105.0, 70.0, 56.0, 46.0, 30.0, 28.0, 16.0, 15.0, 6.0, 6.0, 9.0, 11.0, 11.0, 9.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0], "bins": [-28.5625, -27.73583984375, -26.9091796875, -26.08251953125, -25.255859375, -24.42919921875, -23.6025390625, -22.77587890625, -21.94921875, -21.12255859375, -20.2958984375, -19.46923828125, -18.642578125, -17.81591796875, -16.9892578125, -16.16259765625, -15.3359375, -14.50927734375, -13.6826171875, -12.85595703125, -12.029296875, -11.20263671875, -10.3759765625, -9.54931640625, -8.72265625, -7.89599609375, -7.0693359375, -6.24267578125, -5.416015625, -4.58935546875, -3.7626953125, -2.93603515625, -2.109375, -1.28271484375, -0.4560546875, 0.37060546875, 1.197265625, 2.02392578125, 2.8505859375, 3.67724609375, 4.50390625, 5.33056640625, 6.1572265625, 6.98388671875, 7.810546875, 8.63720703125, 9.4638671875, 10.29052734375, 11.1171875, 11.94384765625, 12.7705078125, 13.59716796875, 14.423828125, 15.25048828125, 16.0771484375, 16.90380859375, 17.73046875, 18.55712890625, 19.3837890625, 20.21044921875, 21.037109375, 21.86376953125, 22.6904296875, 23.51708984375, 24.34375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 9.0, 8.0, 15.0, 20.0, 29.0, 31.0, 54.0, 83.0, 124.0, 181.0, 277.0, 396.0, 549.0, 656.0, 534.0, 382.0, 237.0, 157.0, 120.0, 74.0, 40.0, 38.0, 26.0, 10.0, 10.0, 9.0, 7.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.890625, -27.080322265625, -26.27001953125, -25.459716796875, -24.6494140625, -23.839111328125, -23.02880859375, -22.218505859375, -21.408203125, -20.597900390625, -19.78759765625, -18.977294921875, -18.1669921875, -17.356689453125, -16.54638671875, -15.736083984375, -14.92578125, -14.115478515625, -13.30517578125, -12.494873046875, -11.6845703125, -10.874267578125, -10.06396484375, -9.253662109375, -8.443359375, -7.633056640625, -6.82275390625, -6.012451171875, -5.2021484375, -4.391845703125, -3.58154296875, -2.771240234375, -1.9609375, -1.150634765625, -0.34033203125, 0.469970703125, 1.2802734375, 2.090576171875, 2.90087890625, 3.711181640625, 4.521484375, 5.331787109375, 6.14208984375, 6.952392578125, 7.7626953125, 8.572998046875, 9.38330078125, 10.193603515625, 11.00390625, 11.814208984375, 12.62451171875, 13.434814453125, 14.2451171875, 15.055419921875, 15.86572265625, 16.676025390625, 17.486328125, 18.296630859375, 19.10693359375, 19.917236328125, 20.7275390625, 21.537841796875, 22.34814453125, 23.158447265625, 23.96875]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 10.0, 4.0, 14.0, 13.0, 28.0, 38.0, 49.0, 64.0, 88.0, 140.0, 204.0, 280.0, 572.0, 1797.0, 17385.0, 1160744.0, 2975044.0, 33580.0, 2475.0, 716.0, 305.0, 222.0, 139.0, 86.0, 61.0, 71.0, 50.0, 40.0, 22.0, 19.0, 10.0, 3.0, 4.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.875, -62.3896484375, -59.904296875, -57.4189453125, -54.93359375, -52.4482421875, -49.962890625, -47.4775390625, -44.9921875, -42.5068359375, -40.021484375, -37.5361328125, -35.05078125, -32.5654296875, -30.080078125, -27.5947265625, -25.109375, -22.6240234375, -20.138671875, -17.6533203125, -15.16796875, -12.6826171875, -10.197265625, -7.7119140625, -5.2265625, -2.7412109375, -0.255859375, 2.2294921875, 4.71484375, 7.2001953125, 9.685546875, 12.1708984375, 14.65625, 17.1416015625, 19.626953125, 22.1123046875, 24.59765625, 27.0830078125, 29.568359375, 32.0537109375, 34.5390625, 37.0244140625, 39.509765625, 41.9951171875, 44.48046875, 46.9658203125, 49.451171875, 51.9365234375, 54.421875, 56.9072265625, 59.392578125, 61.8779296875, 64.36328125, 66.8486328125, 69.333984375, 71.8193359375, 74.3046875, 76.7900390625, 79.275390625, 81.7607421875, 84.24609375, 86.7314453125, 89.216796875, 91.7021484375, 94.1875]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 20.0, 286.0, 593.0, 111.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-218.1983184814453, -201.80213928222656, -185.40594482421875, -169.009765625, -152.61358642578125, -136.21739196777344, -119.82121276855469, -103.4250259399414, -87.02883911132812, -70.63265228271484, -54.23646926879883, -37.84028625488281, -21.44409942626953, -5.04791259765625, 11.3482666015625, 27.74445343017578, 44.14064025878906, 60.536827087402344, 76.93301391601562, 93.32919311523438, 109.72537994384766, 126.12156677246094, 142.5177459716797, 158.9139404296875, 175.31011962890625, 191.706298828125, 208.1024932861328, 224.49867248535156, 240.89486694335938, 257.2910461425781, 273.6872253417969, 290.0834045410156, 306.4796142578125, 322.87579345703125, 339.27197265625, 355.66815185546875, 372.0643615722656, 388.4605407714844, 404.8567199707031, 421.2528991699219, 437.64910888671875, 454.0452880859375, 470.44146728515625, 486.837646484375, 503.2338562011719, 519.6300048828125, 536.0262451171875, 552.4224243164062, 568.818603515625, 585.2147827148438, 601.6109619140625, 618.0071411132812, 634.4033203125, 650.799560546875, 667.1956787109375, 683.5919189453125, 699.988037109375, 716.3842163085938, 732.7803955078125, 749.1765747070312, 765.57275390625, 781.968994140625, 798.3651123046875, 814.7613525390625, 831.1575317382812]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 4.0, 6.0, 3.0, 4.0, 11.0, 10.0, 15.0, 12.0, 12.0, 22.0, 28.0, 31.0, 30.0, 26.0, 30.0, 35.0, 31.0, 39.0, 44.0, 39.0, 40.0, 55.0, 68.0, 46.0, 40.0, 41.0, 29.0, 39.0, 40.0, 25.0, 29.0, 23.0, 20.0, 21.0, 9.0, 12.0, 11.0, 7.0, 8.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-61.515350341796875, -59.27070999145508, -57.02606964111328, -54.78142547607422, -52.53678512573242, -50.292144775390625, -48.04750061035156, -45.802860260009766, -43.55821990966797, -41.31357955932617, -39.068939208984375, -36.82429504394531, -34.579654693603516, -32.33501434326172, -30.09037208557129, -27.84572982788086, -25.601089477539062, -23.356449127197266, -21.111806869506836, -18.867164611816406, -16.62252426147461, -14.377882957458496, -12.133241653442383, -9.88860034942627, -7.643959045410156, -5.399317741394043, -3.1546764373779297, -0.9100351333618164, 1.3346061706542969, 3.57924747467041, 5.823888778686523, 8.068530082702637, 10.31317138671875, 12.557812690734863, 14.802453994750977, 17.047096252441406, 19.291736602783203, 21.536376953125, 23.78101921081543, 26.02566146850586, 28.270301818847656, 30.514942169189453, 32.75958251953125, 35.00422668457031, 37.24886703491211, 39.493507385253906, 41.73815155029297, 43.982791900634766, 46.22743225097656, 48.47207260131836, 50.716712951660156, 52.96135711669922, 55.205997467041016, 57.45063781738281, 59.695281982421875, 61.93992233276367, 64.18456268310547, 66.42920684814453, 68.67384338378906, 70.91848754882812, 73.16313171386719, 75.40776824951172, 77.65241241455078, 79.89704895019531, 82.14169311523438]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 1.0, 7.0, 10.0, 4.0, 6.0, 9.0, 14.0, 19.0, 22.0, 33.0, 26.0, 15.0, 23.0, 25.0, 36.0, 21.0, 28.0, 49.0, 37.0, 46.0, 33.0, 43.0, 29.0, 51.0, 32.0, 42.0, 34.0, 37.0, 29.0, 31.0, 31.0, 26.0, 25.0, 15.0, 22.0, 14.0, 6.0, 15.0, 9.0, 11.0, 9.0, 8.0, 7.0, 6.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.890625, -10.5458984375, -10.201171875, -9.8564453125, -9.51171875, -9.1669921875, -8.822265625, -8.4775390625, -8.1328125, -7.7880859375, -7.443359375, -7.0986328125, -6.75390625, -6.4091796875, -6.064453125, -5.7197265625, -5.375, -5.0302734375, -4.685546875, -4.3408203125, -3.99609375, -3.6513671875, -3.306640625, -2.9619140625, -2.6171875, -2.2724609375, -1.927734375, -1.5830078125, -1.23828125, -0.8935546875, -0.548828125, -0.2041015625, 0.140625, 0.4853515625, 0.830078125, 1.1748046875, 1.51953125, 1.8642578125, 2.208984375, 2.5537109375, 2.8984375, 3.2431640625, 3.587890625, 3.9326171875, 4.27734375, 4.6220703125, 4.966796875, 5.3115234375, 5.65625, 6.0009765625, 6.345703125, 6.6904296875, 7.03515625, 7.3798828125, 7.724609375, 8.0693359375, 8.4140625, 8.7587890625, 9.103515625, 9.4482421875, 9.79296875, 10.1376953125, 10.482421875, 10.8271484375, 11.171875]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 2.0, 1.0, 7.0, 5.0, 10.0, 12.0, 21.0, 31.0, 41.0, 61.0, 104.0, 142.0, 198.0, 290.0, 404.0, 642.0, 912.0, 1328.0, 1965.0, 2776.0, 4201.0, 6165.0, 8943.0, 13220.0, 19696.0, 28906.0, 43865.0, 64265.0, 91162.0, 122845.0, 147451.0, 138285.0, 107829.0, 77764.0, 53971.0, 36209.0, 24360.0, 16211.0, 10922.0, 7330.0, 5120.0, 3334.0, 2357.0, 1657.0, 1090.0, 733.0, 539.0, 383.0, 235.0, 193.0, 121.0, 79.0, 55.0, 49.0, 19.0, 22.0, 13.0, 5.0, 4.0, 3.0, 5.0, 2.0, 1.0], "bins": [-0.7216796875, -0.6989593505859375, -0.676239013671875, -0.6535186767578125, -0.63079833984375, -0.6080780029296875, -0.585357666015625, -0.5626373291015625, -0.5399169921875, -0.5171966552734375, -0.494476318359375, -0.4717559814453125, -0.44903564453125, -0.4263153076171875, -0.403594970703125, -0.3808746337890625, -0.358154296875, -0.3354339599609375, -0.312713623046875, -0.2899932861328125, -0.26727294921875, -0.2445526123046875, -0.221832275390625, -0.1991119384765625, -0.1763916015625, -0.1536712646484375, -0.130950927734375, -0.1082305908203125, -0.08551025390625, -0.0627899169921875, -0.040069580078125, -0.0173492431640625, 0.00537109375, 0.0280914306640625, 0.050811767578125, 0.0735321044921875, 0.09625244140625, 0.1189727783203125, 0.141693115234375, 0.1644134521484375, 0.1871337890625, 0.2098541259765625, 0.232574462890625, 0.2552947998046875, 0.27801513671875, 0.3007354736328125, 0.323455810546875, 0.3461761474609375, 0.368896484375, 0.3916168212890625, 0.414337158203125, 0.4370574951171875, 0.45977783203125, 0.4824981689453125, 0.505218505859375, 0.5279388427734375, 0.5506591796875, 0.5733795166015625, 0.596099853515625, 0.6188201904296875, 0.64154052734375, 0.6642608642578125, 0.686981201171875, 0.7097015380859375, 0.732421875]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 5.0, 3.0, 7.0, 9.0, 14.0, 8.0, 13.0, 12.0, 11.0, 19.0, 21.0, 23.0, 33.0, 31.0, 39.0, 37.0, 38.0, 28.0, 40.0, 39.0, 34.0, 1069.0, 49.0, 53.0, 47.0, 46.0, 41.0, 20.0, 32.0, 23.0, 28.0, 23.0, 18.0, 16.0, 17.0, 10.0, 15.0, 8.0, 11.0, 12.0, 10.0, 6.0, 1.0, 4.0, 5.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.80859375, -7.562255859375, -7.31591796875, -7.069580078125, -6.8232421875, -6.576904296875, -6.33056640625, -6.084228515625, -5.837890625, -5.591552734375, -5.34521484375, -5.098876953125, -4.8525390625, -4.606201171875, -4.35986328125, -4.113525390625, -3.8671875, -3.620849609375, -3.37451171875, -3.128173828125, -2.8818359375, -2.635498046875, -2.38916015625, -2.142822265625, -1.896484375, -1.650146484375, -1.40380859375, -1.157470703125, -0.9111328125, -0.664794921875, -0.41845703125, -0.172119140625, 0.07421875, 0.320556640625, 0.56689453125, 0.813232421875, 1.0595703125, 1.305908203125, 1.55224609375, 1.798583984375, 2.044921875, 2.291259765625, 2.53759765625, 2.783935546875, 3.0302734375, 3.276611328125, 3.52294921875, 3.769287109375, 4.015625, 4.261962890625, 4.50830078125, 4.754638671875, 5.0009765625, 5.247314453125, 5.49365234375, 5.739990234375, 5.986328125, 6.232666015625, 6.47900390625, 6.725341796875, 6.9716796875, 7.218017578125, 7.46435546875, 7.710693359375, 7.95703125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 2.0, 2.0, 4.0, 10.0, 6.0, 19.0, 33.0, 52.0, 77.0, 134.0, 179.0, 319.0, 487.0, 851.0, 1322.0, 2164.0, 3688.0, 5878.0, 9609.0, 15912.0, 25330.0, 40164.0, 62846.0, 94193.0, 135710.0, 1100143.0, 250979.0, 120120.0, 82976.0, 53899.0, 34163.0, 21436.0, 13424.0, 8133.0, 4889.0, 3148.0, 1902.0, 1141.0, 707.0, 399.0, 268.0, 134.0, 93.0, 66.0, 43.0, 32.0, 15.0, 20.0, 8.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.65576171875, -0.6363754272460938, -0.6169891357421875, -0.5976028442382812, -0.578216552734375, -0.5588302612304688, -0.5394439697265625, -0.5200576782226562, -0.50067138671875, -0.48128509521484375, -0.4618988037109375, -0.44251251220703125, -0.423126220703125, -0.40373992919921875, -0.3843536376953125, -0.36496734619140625, -0.3455810546875, -0.32619476318359375, -0.3068084716796875, -0.28742218017578125, -0.268035888671875, -0.24864959716796875, -0.2292633056640625, -0.20987701416015625, -0.19049072265625, -0.17110443115234375, -0.1517181396484375, -0.13233184814453125, -0.112945556640625, -0.09355926513671875, -0.0741729736328125, -0.05478668212890625, -0.035400390625, -0.01601409912109375, 0.0033721923828125, 0.02275848388671875, 0.042144775390625, 0.06153106689453125, 0.0809173583984375, 0.10030364990234375, 0.11968994140625, 0.13907623291015625, 0.1584625244140625, 0.17784881591796875, 0.197235107421875, 0.21662139892578125, 0.2360076904296875, 0.25539398193359375, 0.2747802734375, 0.29416656494140625, 0.3135528564453125, 0.33293914794921875, 0.352325439453125, 0.37171173095703125, 0.3910980224609375, 0.41048431396484375, 0.42987060546875, 0.44925689697265625, 0.4686431884765625, 0.48802947998046875, 0.507415771484375, 0.5268020629882812, 0.5461883544921875, 0.5655746459960938, 0.5849609375]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 7.0, 8.0, 11.0, 7.0, 13.0, 12.0, 14.0, 20.0, 23.0, 22.0, 29.0, 35.0, 46.0, 90.0, 93.0, 110.0, 82.0, 81.0, 50.0, 41.0, 34.0, 25.0, 21.0, 15.0, 19.0, 12.0, 13.0, 17.0, 7.0, 10.0, 6.0, 7.0, 5.0, 6.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.032470703125, -0.03144121170043945, -0.030411720275878906, -0.02938222885131836, -0.028352737426757812, -0.027323246002197266, -0.02629375457763672, -0.025264263153076172, -0.024234771728515625, -0.023205280303955078, -0.02217578887939453, -0.021146297454833984, -0.020116806030273438, -0.01908731460571289, -0.018057823181152344, -0.017028331756591797, -0.01599884033203125, -0.014969348907470703, -0.013939857482910156, -0.01291036605834961, -0.011880874633789062, -0.010851383209228516, -0.009821891784667969, -0.008792400360107422, -0.007762908935546875, -0.006733417510986328, -0.005703926086425781, -0.004674434661865234, -0.0036449432373046875, -0.0026154518127441406, -0.0015859603881835938, -0.0005564689636230469, 0.0004730224609375, 0.0015025138854980469, 0.0025320053100585938, 0.0035614967346191406, 0.0045909881591796875, 0.005620479583740234, 0.006649971008300781, 0.007679462432861328, 0.008708953857421875, 0.009738445281982422, 0.010767936706542969, 0.011797428131103516, 0.012826919555664062, 0.01385641098022461, 0.014885902404785156, 0.015915393829345703, 0.01694488525390625, 0.017974376678466797, 0.019003868103027344, 0.02003335952758789, 0.021062850952148438, 0.022092342376708984, 0.02312183380126953, 0.024151325225830078, 0.025180816650390625, 0.026210308074951172, 0.02723979949951172, 0.028269290924072266, 0.029298782348632812, 0.03032827377319336, 0.031357765197753906, 0.03238725662231445, 0.033416748046875]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 1.0, 3.0, 2.0, 6.0, 4.0, 12.0, 10.0, 9.0, 14.0, 20.0, 24.0, 18.0, 34.0, 44.0, 39.0, 65.0, 91.0, 163.0, 329.0, 687.0, 34641.0, 1009761.0, 1539.0, 400.0, 210.0, 122.0, 70.0, 51.0, 35.0, 37.0, 26.0, 22.0, 16.0, 7.0, 13.0, 9.0, 10.0, 4.0, 3.0, 1.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.67138671875, -0.6498870849609375, -0.628387451171875, -0.6068878173828125, -0.58538818359375, -0.5638885498046875, -0.542388916015625, -0.5208892822265625, -0.4993896484375, -0.4778900146484375, -0.456390380859375, -0.4348907470703125, -0.41339111328125, -0.3918914794921875, -0.370391845703125, -0.3488922119140625, -0.327392578125, -0.3058929443359375, -0.284393310546875, -0.2628936767578125, -0.24139404296875, -0.2198944091796875, -0.198394775390625, -0.1768951416015625, -0.1553955078125, -0.1338958740234375, -0.112396240234375, -0.0908966064453125, -0.06939697265625, -0.0478973388671875, -0.026397705078125, -0.0048980712890625, 0.0166015625, 0.0381011962890625, 0.059600830078125, 0.0811004638671875, 0.10260009765625, 0.1240997314453125, 0.145599365234375, 0.1670989990234375, 0.1885986328125, 0.2100982666015625, 0.231597900390625, 0.2530975341796875, 0.27459716796875, 0.2960968017578125, 0.317596435546875, 0.3390960693359375, 0.360595703125, 0.3820953369140625, 0.403594970703125, 0.4250946044921875, 0.44659423828125, 0.4680938720703125, 0.489593505859375, 0.5110931396484375, 0.5325927734375, 0.5540924072265625, 0.575592041015625, 0.5970916748046875, 0.61859130859375, 0.6400909423828125, 0.661590576171875, 0.6830902099609375, 0.70458984375]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 102.0, 910.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02921820804476738, -0.01572445034980774, -0.002230694517493248, 0.011263061314821243, 0.024756819009780884, 0.038250576704740524, 0.051744330674409866, 0.0652380883693695, 0.07873184978961945, 0.09222560375928879, 0.10571936517953873, 0.11921311914920807, 0.132706880569458, 0.14620062708854675, 0.1596943885087967, 0.17318814992904663, 0.18668189644813538, 0.20017565786838531, 0.21366940438747406, 0.227163165807724, 0.24065692722797394, 0.2541506886482239, 0.2676444351673126, 0.28113818168640137, 0.2946319580078125, 0.30812570452690125, 0.3216194808483124, 0.3351132273674011, 0.34860697388648987, 0.362100750207901, 0.37559449672698975, 0.3890882730484009, 0.4025820195674896, 0.41607576608657837, 0.4295695424079895, 0.44306328892707825, 0.456557035446167, 0.4700508117675781, 0.48354455828666687, 0.4970383048057556, 0.5105320811271667, 0.5240258574485779, 0.5375195741653442, 0.5510133504867554, 0.5645071268081665, 0.5780008435249329, 0.591494619846344, 0.6049883961677551, 0.6184821128845215, 0.6319758892059326, 0.645469605922699, 0.6589633822441101, 0.6724571585655212, 0.6859508752822876, 0.6994446516036987, 0.7129384279251099, 0.726432204246521, 0.7399259805679321, 0.7534196972846985, 0.7669134736061096, 0.7804072499275208, 0.7939009666442871, 0.8073947429656982, 0.8208885192871094, 0.8343822360038757]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 5.0, 10.0, 30.0, 25.0, 45.0, 63.0, 75.0, 91.0, 103.0, 95.0, 100.0, 91.0, 74.0, 61.0, 47.0, 35.0, 24.0, 24.0, 10.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0597648024559021, -0.058217670768499374, -0.05667053908109665, -0.05512341111898422, -0.0535762794315815, -0.05202914774417877, -0.050482019782066345, -0.04893488809466362, -0.047387756407260895, -0.04584062471985817, -0.044293493032455444, -0.04274636507034302, -0.04119923338294029, -0.03965210169553757, -0.03810497373342514, -0.036557842046022415, -0.03501071035861969, -0.033463578671216965, -0.03191644698381424, -0.030369319021701813, -0.028822187334299088, -0.027275055646896362, -0.025727925822138786, -0.02418079599738121, -0.022633664309978485, -0.02108653262257576, -0.019539402797818184, -0.017992272973060608, -0.016445141285657883, -0.014898010529577732, -0.013350879773497581, -0.011803749017417431, -0.01025661826133728, -0.00870948750525713, -0.007162356749176979, -0.0056152259930968285, -0.004068095237016678, -0.0025209644809365273, -0.0009738337248563766, 0.000573297031223774, 0.0021204277873039246, 0.003667558543384075, 0.005214689299464226, 0.006761820055544376, 0.008308950811624527, 0.009856081567704678, 0.011403212323784828, 0.012950343079864979, 0.01449747383594513, 0.016044605523347855, 0.01759173534810543, 0.019138865172863007, 0.020685996860265732, 0.022233128547668457, 0.023780258372426033, 0.02532738819718361, 0.026874519884586334, 0.02842165157198906, 0.029968781396746635, 0.03151591122150421, 0.03306304290890694, 0.03461017459630966, 0.03615730255842209, 0.037704434245824814, 0.03925156593322754]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0, 1.0, 7.0, 10.0, 4.0, 6.0, 9.0, 14.0, 19.0, 22.0, 34.0, 25.0, 15.0, 23.0, 25.0, 36.0, 21.0, 28.0, 49.0, 37.0, 45.0, 32.0, 45.0, 30.0, 50.0, 32.0, 42.0, 34.0, 37.0, 29.0, 31.0, 30.0, 27.0, 25.0, 15.0, 22.0, 14.0, 6.0, 15.0, 9.0, 11.0, 9.0, 8.0, 7.0, 6.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.890625, -10.5458984375, -10.201171875, -9.8564453125, -9.51171875, -9.1669921875, -8.822265625, -8.4775390625, -8.1328125, -7.7880859375, -7.443359375, -7.0986328125, -6.75390625, -6.4091796875, -6.064453125, -5.7197265625, -5.375, -5.0302734375, -4.685546875, -4.3408203125, -3.99609375, -3.6513671875, -3.306640625, -2.9619140625, -2.6171875, -2.2724609375, -1.927734375, -1.5830078125, -1.23828125, -0.8935546875, -0.548828125, -0.2041015625, 0.140625, 0.4853515625, 0.830078125, 1.1748046875, 1.51953125, 1.8642578125, 2.208984375, 2.5537109375, 2.8984375, 3.2431640625, 3.587890625, 3.9326171875, 4.27734375, 4.6220703125, 4.966796875, 5.3115234375, 5.65625, 6.0009765625, 6.345703125, 6.6904296875, 7.03515625, 7.3798828125, 7.724609375, 8.0693359375, 8.4140625, 8.7587890625, 9.103515625, 9.4482421875, 9.79296875, 10.1376953125, 10.482421875, 10.8271484375, 11.171875]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 5.0, 2.0, 3.0, 5.0, 10.0, 7.0, 9.0, 11.0, 15.0, 35.0, 36.0, 56.0, 56.0, 66.0, 87.0, 128.0, 211.0, 441.0, 795.0, 1727.0, 4423.0, 14056.0, 75831.0, 687454.0, 222577.0, 27996.0, 7366.0, 2621.0, 1088.0, 519.0, 260.0, 195.0, 127.0, 94.0, 49.0, 37.0, 39.0, 22.0, 16.0, 18.0, 12.0, 16.0, 9.0, 12.0, 7.0, 7.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.859375, -21.17041015625, -20.4814453125, -19.79248046875, -19.103515625, -18.41455078125, -17.7255859375, -17.03662109375, -16.34765625, -15.65869140625, -14.9697265625, -14.28076171875, -13.591796875, -12.90283203125, -12.2138671875, -11.52490234375, -10.8359375, -10.14697265625, -9.4580078125, -8.76904296875, -8.080078125, -7.39111328125, -6.7021484375, -6.01318359375, -5.32421875, -4.63525390625, -3.9462890625, -3.25732421875, -2.568359375, -1.87939453125, -1.1904296875, -0.50146484375, 0.1875, 0.87646484375, 1.5654296875, 2.25439453125, 2.943359375, 3.63232421875, 4.3212890625, 5.01025390625, 5.69921875, 6.38818359375, 7.0771484375, 7.76611328125, 8.455078125, 9.14404296875, 9.8330078125, 10.52197265625, 11.2109375, 11.89990234375, 12.5888671875, 13.27783203125, 13.966796875, 14.65576171875, 15.3447265625, 16.03369140625, 16.72265625, 17.41162109375, 18.1005859375, 18.78955078125, 19.478515625, 20.16748046875, 20.8564453125, 21.54541015625, 22.234375]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 5.0, 3.0, 4.0, 5.0, 11.0, 7.0, 18.0, 11.0, 17.0, 18.0, 29.0, 24.0, 29.0, 36.0, 40.0, 37.0, 37.0, 47.0, 58.0, 151.0, 1589.0, 346.0, 105.0, 59.0, 53.0, 43.0, 30.0, 37.0, 43.0, 25.0, 31.0, 22.0, 18.0, 17.0, 6.0, 6.0, 6.0, 8.0, 4.0, 7.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.4375, -35.255859375, -34.07421875, -32.892578125, -31.7109375, -30.529296875, -29.34765625, -28.166015625, -26.984375, -25.802734375, -24.62109375, -23.439453125, -22.2578125, -21.076171875, -19.89453125, -18.712890625, -17.53125, -16.349609375, -15.16796875, -13.986328125, -12.8046875, -11.623046875, -10.44140625, -9.259765625, -8.078125, -6.896484375, -5.71484375, -4.533203125, -3.3515625, -2.169921875, -0.98828125, 0.193359375, 1.375, 2.556640625, 3.73828125, 4.919921875, 6.1015625, 7.283203125, 8.46484375, 9.646484375, 10.828125, 12.009765625, 13.19140625, 14.373046875, 15.5546875, 16.736328125, 17.91796875, 19.099609375, 20.28125, 21.462890625, 22.64453125, 23.826171875, 25.0078125, 26.189453125, 27.37109375, 28.552734375, 29.734375, 30.916015625, 32.09765625, 33.279296875, 34.4609375, 35.642578125, 36.82421875, 38.005859375, 39.1875]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 12.0, 2.0, 5.0, 10.0, 4.0, 10.0, 16.0, 18.0, 26.0, 34.0, 44.0, 41.0, 58.0, 83.0, 137.0, 215.0, 459.0, 1746.0, 43342.0, 3082396.0, 14929.0, 1116.0, 341.0, 191.0, 106.0, 92.0, 57.0, 45.0, 36.0, 26.0, 20.0, 19.0, 11.0, 19.0, 10.0, 9.0, 5.0, 5.0, 3.0, 5.0, 3.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-91.25, -88.515625, -85.78125, -83.046875, -80.3125, -77.578125, -74.84375, -72.109375, -69.375, -66.640625, -63.90625, -61.171875, -58.4375, -55.703125, -52.96875, -50.234375, -47.5, -44.765625, -42.03125, -39.296875, -36.5625, -33.828125, -31.09375, -28.359375, -25.625, -22.890625, -20.15625, -17.421875, -14.6875, -11.953125, -9.21875, -6.484375, -3.75, -1.015625, 1.71875, 4.453125, 7.1875, 9.921875, 12.65625, 15.390625, 18.125, 20.859375, 23.59375, 26.328125, 29.0625, 31.796875, 34.53125, 37.265625, 40.0, 42.734375, 45.46875, 48.203125, 50.9375, 53.671875, 56.40625, 59.140625, 61.875, 64.609375, 67.34375, 70.078125, 72.8125, 75.546875, 78.28125, 81.015625, 83.75]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 8.0, 55.0, 164.0, 283.0, 312.0, 149.0, 32.0, 10.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.134422302246094, -25.317777633666992, -22.50113296508789, -19.684490203857422, -16.86784553527832, -14.051200866699219, -11.23455810546875, -8.417913436889648, -5.601268768310547, -2.7846245765686035, 0.032019615173339844, 2.848663330078125, 5.665307998657227, 8.481952667236328, 11.298595428466797, 14.115240097045898, 16.931884765625, 19.7485294342041, 22.565174102783203, 25.381816864013672, 28.198461532592773, 31.015106201171875, 33.831748962402344, 36.64839172363281, 39.46503829956055, 42.281681060791016, 45.09832763671875, 47.91497039794922, 50.73161315917969, 53.54825973510742, 56.36490249633789, 59.181549072265625, 61.998199462890625, 64.8148422241211, 67.63148498535156, 70.44813537597656, 73.26477813720703, 76.0814208984375, 78.89806365966797, 81.71470642089844, 84.53135681152344, 87.3479995727539, 90.16464233398438, 92.98129272460938, 95.79793548583984, 98.61457824707031, 101.43122100830078, 104.24786376953125, 107.06450653076172, 109.88114929199219, 112.69779205322266, 115.51443481445312, 118.33108520507812, 121.1477279663086, 123.96437072753906, 126.78101348876953, 129.59765625, 132.414306640625, 135.23094177246094, 138.04759216308594, 140.86422729492188, 143.68087768554688, 146.49752807617188, 149.3141632080078, 152.1308135986328]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 2.0, 8.0, 6.0, 8.0, 8.0, 13.0, 17.0, 17.0, 24.0, 19.0, 23.0, 24.0, 32.0, 30.0, 34.0, 36.0, 44.0, 56.0, 43.0, 37.0, 42.0, 61.0, 36.0, 41.0, 44.0, 37.0, 36.0, 37.0, 30.0, 28.0, 16.0, 22.0, 13.0, 22.0, 11.0, 5.0, 4.0, 8.0, 9.0, 4.0, 6.0, 3.0, 1.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-90.42059326171875, -87.60358428955078, -84.78656768798828, -81.96955871582031, -79.15254211425781, -76.33553314208984, -73.51852416992188, -70.70150756835938, -67.8844985961914, -65.06748962402344, -62.25047302246094, -59.43346405029297, -56.616451263427734, -53.7994384765625, -50.98242950439453, -48.1654167175293, -45.34840393066406, -42.53139114379883, -39.714378356933594, -36.897369384765625, -34.08035659790039, -31.263343811035156, -28.446332931518555, -25.629322052001953, -22.81230926513672, -19.995296478271484, -17.178285598754883, -14.361273765563965, -11.544261932373047, -8.727250099182129, -5.910238265991211, -3.0932273864746094, -0.27622222900390625, 2.5407896041870117, 5.35780143737793, 8.174813270568848, 10.991825103759766, 13.808836936950684, 16.6258487701416, 19.442859649658203, 22.259872436523438, 25.076885223388672, 27.893896102905273, 30.710906982421875, 33.52791976928711, 36.344932556152344, 39.16194152832031, 41.97895431518555, 44.79596710205078, 47.612979888916016, 50.42999267578125, 53.24700164794922, 56.06401443481445, 58.88102722167969, 61.698036193847656, 64.51504516601562, 67.33206176757812, 70.1490707397461, 72.9660873413086, 75.78309631347656, 78.60011291503906, 81.41712188720703, 84.234130859375, 87.0511474609375, 89.86815643310547]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 3.0, 7.0, 6.0, 9.0, 8.0, 8.0, 15.0, 13.0, 19.0, 17.0, 24.0, 22.0, 32.0, 26.0, 30.0, 30.0, 38.0, 35.0, 33.0, 37.0, 45.0, 36.0, 42.0, 30.0, 38.0, 38.0, 44.0, 36.0, 32.0, 27.0, 31.0, 26.0, 30.0, 18.0, 24.0, 13.0, 13.0, 13.0, 12.0, 5.0, 10.0, 6.0, 6.0, 7.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-11.8125, -11.454833984375, -11.09716796875, -10.739501953125, -10.3818359375, -10.024169921875, -9.66650390625, -9.308837890625, -8.951171875, -8.593505859375, -8.23583984375, -7.878173828125, -7.5205078125, -7.162841796875, -6.80517578125, -6.447509765625, -6.08984375, -5.732177734375, -5.37451171875, -5.016845703125, -4.6591796875, -4.301513671875, -3.94384765625, -3.586181640625, -3.228515625, -2.870849609375, -2.51318359375, -2.155517578125, -1.7978515625, -1.440185546875, -1.08251953125, -0.724853515625, -0.3671875, -0.009521484375, 0.34814453125, 0.705810546875, 1.0634765625, 1.421142578125, 1.77880859375, 2.136474609375, 2.494140625, 2.851806640625, 3.20947265625, 3.567138671875, 3.9248046875, 4.282470703125, 4.64013671875, 4.997802734375, 5.35546875, 5.713134765625, 6.07080078125, 6.428466796875, 6.7861328125, 7.143798828125, 7.50146484375, 7.859130859375, 8.216796875, 8.574462890625, 8.93212890625, 9.289794921875, 9.6474609375, 10.005126953125, 10.36279296875, 10.720458984375, 11.078125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 4.0, 2.0, 2.0, 11.0, 9.0, 17.0, 12.0, 22.0, 21.0, 45.0, 42.0, 74.0, 111.0, 183.0, 261.0, 539.0, 951.0, 2039.0, 4840.0, 14323.0, 58004.0, 370185.0, 2078485.0, 1411243.0, 199150.0, 36435.0, 10171.0, 3654.0, 1608.0, 773.0, 417.0, 244.0, 121.0, 76.0, 49.0, 46.0, 31.0, 18.0, 9.0, 13.0, 10.0, 10.0, 5.0, 6.0, 4.0, 6.0, 7.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-23.78125, -23.07861328125, -22.3759765625, -21.67333984375, -20.970703125, -20.26806640625, -19.5654296875, -18.86279296875, -18.16015625, -17.45751953125, -16.7548828125, -16.05224609375, -15.349609375, -14.64697265625, -13.9443359375, -13.24169921875, -12.5390625, -11.83642578125, -11.1337890625, -10.43115234375, -9.728515625, -9.02587890625, -8.3232421875, -7.62060546875, -6.91796875, -6.21533203125, -5.5126953125, -4.81005859375, -4.107421875, -3.40478515625, -2.7021484375, -1.99951171875, -1.296875, -0.59423828125, 0.1083984375, 0.81103515625, 1.513671875, 2.21630859375, 2.9189453125, 3.62158203125, 4.32421875, 5.02685546875, 5.7294921875, 6.43212890625, 7.134765625, 7.83740234375, 8.5400390625, 9.24267578125, 9.9453125, 10.64794921875, 11.3505859375, 12.05322265625, 12.755859375, 13.45849609375, 14.1611328125, 14.86376953125, 15.56640625, 16.26904296875, 16.9716796875, 17.67431640625, 18.376953125, 19.07958984375, 19.7822265625, 20.48486328125, 21.1875]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 4.0, 7.0, 9.0, 9.0, 16.0, 24.0, 33.0, 32.0, 43.0, 50.0, 98.0, 103.0, 156.0, 165.0, 245.0, 293.0, 408.0, 441.0, 443.0, 360.0, 278.0, 238.0, 147.0, 123.0, 81.0, 80.0, 48.0, 34.0, 28.0, 27.0, 11.0, 17.0, 8.0, 6.0, 4.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-19.234375, -18.680419921875, -18.12646484375, -17.572509765625, -17.0185546875, -16.464599609375, -15.91064453125, -15.356689453125, -14.802734375, -14.248779296875, -13.69482421875, -13.140869140625, -12.5869140625, -12.032958984375, -11.47900390625, -10.925048828125, -10.37109375, -9.817138671875, -9.26318359375, -8.709228515625, -8.1552734375, -7.601318359375, -7.04736328125, -6.493408203125, -5.939453125, -5.385498046875, -4.83154296875, -4.277587890625, -3.7236328125, -3.169677734375, -2.61572265625, -2.061767578125, -1.5078125, -0.953857421875, -0.39990234375, 0.154052734375, 0.7080078125, 1.261962890625, 1.81591796875, 2.369873046875, 2.923828125, 3.477783203125, 4.03173828125, 4.585693359375, 5.1396484375, 5.693603515625, 6.24755859375, 6.801513671875, 7.35546875, 7.909423828125, 8.46337890625, 9.017333984375, 9.5712890625, 10.125244140625, 10.67919921875, 11.233154296875, 11.787109375, 12.341064453125, 12.89501953125, 13.448974609375, 14.0029296875, 14.556884765625, 15.11083984375, 15.664794921875, 16.21875]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 2.0, 2.0, 4.0, 5.0, 3.0, 4.0, 6.0, 9.0, 14.0, 17.0, 29.0, 22.0, 27.0, 48.0, 56.0, 84.0, 102.0, 126.0, 184.0, 245.0, 453.0, 854.0, 2509.0, 11798.0, 96414.0, 1602714.0, 2322154.0, 135246.0, 15605.0, 3027.0, 997.0, 437.0, 277.0, 191.0, 126.0, 88.0, 87.0, 64.0, 68.0, 34.0, 46.0, 19.0, 18.0, 16.0, 13.0, 11.0, 5.0, 8.0, 6.0, 3.0, 1.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0], "bins": [-43.9375, -42.60888671875, -41.2802734375, -39.95166015625, -38.623046875, -37.29443359375, -35.9658203125, -34.63720703125, -33.30859375, -31.97998046875, -30.6513671875, -29.32275390625, -27.994140625, -26.66552734375, -25.3369140625, -24.00830078125, -22.6796875, -21.35107421875, -20.0224609375, -18.69384765625, -17.365234375, -16.03662109375, -14.7080078125, -13.37939453125, -12.05078125, -10.72216796875, -9.3935546875, -8.06494140625, -6.736328125, -5.40771484375, -4.0791015625, -2.75048828125, -1.421875, -0.09326171875, 1.2353515625, 2.56396484375, 3.892578125, 5.22119140625, 6.5498046875, 7.87841796875, 9.20703125, 10.53564453125, 11.8642578125, 13.19287109375, 14.521484375, 15.85009765625, 17.1787109375, 18.50732421875, 19.8359375, 21.16455078125, 22.4931640625, 23.82177734375, 25.150390625, 26.47900390625, 27.8076171875, 29.13623046875, 30.46484375, 31.79345703125, 33.1220703125, 34.45068359375, 35.779296875, 37.10791015625, 38.4365234375, 39.76513671875, 41.09375]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 12.0, 141.0, 470.0, 337.0, 50.0, 6.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-589.5473022460938, -577.4707641601562, -565.3941650390625, -553.317626953125, -541.2410278320312, -529.1644897460938, -517.087890625, -505.0113525390625, -492.934814453125, -480.8582458496094, -468.78167724609375, -456.70513916015625, -444.6285705566406, -432.552001953125, -420.4754333496094, -408.39886474609375, -396.3222961425781, -384.2457275390625, -372.1691589355469, -360.09259033203125, -348.01605224609375, -335.9394836425781, -323.8629150390625, -311.7863464355469, -299.70977783203125, -287.6332092285156, -275.556640625, -263.4801025390625, -251.40353393554688, -239.32696533203125, -227.25039672851562, -215.173828125, -203.0972900390625, -191.02072143554688, -178.9441680908203, -166.8675994873047, -154.79104614257812, -142.7144775390625, -130.63790893554688, -118.56134796142578, -106.48478698730469, -94.4082260131836, -82.3316650390625, -70.25509643554688, -58.17853546142578, -46.10197448730469, -34.02540588378906, -21.94884490966797, -9.872283935546875, 2.2042789459228516, 14.280841827392578, 26.357406616210938, 38.43396759033203, 50.510528564453125, 62.58709716796875, 74.66365814208984, 86.74021911621094, 98.81678009033203, 110.89334106445312, 122.96990966796875, 135.04647827148438, 147.12303161621094, 159.19960021972656, 171.27615356445312, 183.35272216796875]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 5.0, 6.0, 8.0, 7.0, 12.0, 12.0, 13.0, 20.0, 23.0, 19.0, 26.0, 43.0, 36.0, 31.0, 42.0, 41.0, 47.0, 33.0, 49.0, 48.0, 67.0, 36.0, 45.0, 41.0, 37.0, 40.0, 28.0, 30.0, 27.0, 24.0, 20.0, 18.0, 19.0, 11.0, 6.0, 7.0, 7.0, 4.0, 6.0, 3.0, 4.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-81.32589721679688, -79.14510345458984, -76.96430969238281, -74.78352355957031, -72.60272979736328, -70.42193603515625, -68.24114227294922, -66.06034851074219, -63.87955856323242, -61.69876480102539, -59.517974853515625, -57.337181091308594, -55.15638732910156, -52.9755973815918, -50.794803619384766, -48.614013671875, -46.43321990966797, -44.25242614746094, -42.07163619995117, -39.89084243774414, -37.710052490234375, -35.529258728027344, -33.34846496582031, -31.167673110961914, -28.986881256103516, -26.806089401245117, -24.62529754638672, -22.444503784179688, -20.26371192932129, -18.08292007446289, -15.902127265930176, -13.721334457397461, -11.540542602539062, -9.359750747680664, -7.178957939147949, -4.998165607452393, -2.817373275756836, -0.6365814208984375, 1.5442113876342773, 3.725004196166992, 5.905796051025391, 8.086587905883789, 10.267380714416504, 12.448173522949219, 14.628965377807617, 16.809757232666016, 18.990550994873047, 21.171342849731445, 23.352134704589844, 25.532926559448242, 27.71371841430664, 29.894512176513672, 32.07530212402344, 34.25609588623047, 36.4368896484375, 38.61768341064453, 40.7984733581543, 42.97926712036133, 45.160057067871094, 47.340850830078125, 49.521644592285156, 51.70243453979492, 53.88322830200195, 56.06401824951172, 58.24481201171875]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 9.0, 7.0, 7.0, 7.0, 11.0, 15.0, 15.0, 14.0, 28.0, 28.0, 19.0, 22.0, 29.0, 34.0, 35.0, 30.0, 42.0, 50.0, 35.0, 42.0, 55.0, 41.0, 39.0, 35.0, 34.0, 40.0, 30.0, 34.0, 26.0, 26.0, 18.0, 17.0, 16.0, 18.0, 20.0, 12.0, 12.0, 15.0, 6.0, 6.0, 7.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-13.1953125, -12.8187255859375, -12.442138671875, -12.0655517578125, -11.68896484375, -11.3123779296875, -10.935791015625, -10.5592041015625, -10.1826171875, -9.8060302734375, -9.429443359375, -9.0528564453125, -8.67626953125, -8.2996826171875, -7.923095703125, -7.5465087890625, -7.169921875, -6.7933349609375, -6.416748046875, -6.0401611328125, -5.66357421875, -5.2869873046875, -4.910400390625, -4.5338134765625, -4.1572265625, -3.7806396484375, -3.404052734375, -3.0274658203125, -2.65087890625, -2.2742919921875, -1.897705078125, -1.5211181640625, -1.14453125, -0.7679443359375, -0.391357421875, -0.0147705078125, 0.36181640625, 0.7384033203125, 1.114990234375, 1.4915771484375, 1.8681640625, 2.2447509765625, 2.621337890625, 2.9979248046875, 3.37451171875, 3.7510986328125, 4.127685546875, 4.5042724609375, 4.880859375, 5.2574462890625, 5.634033203125, 6.0106201171875, 6.38720703125, 6.7637939453125, 7.140380859375, 7.5169677734375, 7.8935546875, 8.2701416015625, 8.646728515625, 9.0233154296875, 9.39990234375, 9.7764892578125, 10.153076171875, 10.5296630859375, 10.90625]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 10.0, 10.0, 18.0, 22.0, 39.0, 59.0, 74.0, 125.0, 199.0, 276.0, 436.0, 688.0, 1164.0, 1725.0, 2825.0, 4342.0, 6954.0, 11448.0, 18401.0, 29855.0, 48016.0, 75324.0, 114047.0, 155156.0, 170827.0, 140865.0, 97641.0, 63341.0, 39509.0, 24821.0, 15110.0, 9416.0, 5826.0, 3546.0, 2371.0, 1387.0, 939.0, 597.0, 356.0, 270.0, 183.0, 110.0, 67.0, 50.0, 36.0, 30.0, 14.0, 12.0, 6.0, 5.0, 4.0, 4.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.8603515625, -0.8337173461914062, -0.8070831298828125, -0.7804489135742188, -0.753814697265625, -0.7271804809570312, -0.7005462646484375, -0.6739120483398438, -0.64727783203125, -0.6206436157226562, -0.5940093994140625, -0.5673751831054688, -0.540740966796875, -0.5141067504882812, -0.4874725341796875, -0.46083831787109375, -0.4342041015625, -0.40756988525390625, -0.3809356689453125, -0.35430145263671875, -0.327667236328125, -0.30103302001953125, -0.2743988037109375, -0.24776458740234375, -0.22113037109375, -0.19449615478515625, -0.1678619384765625, -0.14122772216796875, -0.114593505859375, -0.08795928955078125, -0.0613250732421875, -0.03469085693359375, -0.008056640625, 0.01857757568359375, 0.0452117919921875, 0.07184600830078125, 0.098480224609375, 0.12511444091796875, 0.1517486572265625, 0.17838287353515625, 0.20501708984375, 0.23165130615234375, 0.2582855224609375, 0.28491973876953125, 0.311553955078125, 0.33818817138671875, 0.3648223876953125, 0.39145660400390625, 0.4180908203125, 0.44472503662109375, 0.4713592529296875, 0.49799346923828125, 0.524627685546875, 0.5512619018554688, 0.5778961181640625, 0.6045303344726562, 0.63116455078125, 0.6577987670898438, 0.6844329833984375, 0.7110671997070312, 0.737701416015625, 0.7643356323242188, 0.7909698486328125, 0.8176040649414062, 0.84423828125]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 3.0, 0.0, 3.0, 2.0, 3.0, 3.0, 7.0, 4.0, 5.0, 11.0, 7.0, 12.0, 7.0, 26.0, 27.0, 27.0, 17.0, 32.0, 16.0, 28.0, 30.0, 33.0, 37.0, 39.0, 30.0, 31.0, 39.0, 1061.0, 40.0, 35.0, 30.0, 39.0, 28.0, 33.0, 27.0, 31.0, 30.0, 28.0, 25.0, 20.0, 19.0, 14.0, 17.0, 18.0, 17.0, 10.0, 6.0, 5.0, 9.0, 5.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-7.33984375, -7.11376953125, -6.8876953125, -6.66162109375, -6.435546875, -6.20947265625, -5.9833984375, -5.75732421875, -5.53125, -5.30517578125, -5.0791015625, -4.85302734375, -4.626953125, -4.40087890625, -4.1748046875, -3.94873046875, -3.72265625, -3.49658203125, -3.2705078125, -3.04443359375, -2.818359375, -2.59228515625, -2.3662109375, -2.14013671875, -1.9140625, -1.68798828125, -1.4619140625, -1.23583984375, -1.009765625, -0.78369140625, -0.5576171875, -0.33154296875, -0.10546875, 0.12060546875, 0.3466796875, 0.57275390625, 0.798828125, 1.02490234375, 1.2509765625, 1.47705078125, 1.703125, 1.92919921875, 2.1552734375, 2.38134765625, 2.607421875, 2.83349609375, 3.0595703125, 3.28564453125, 3.51171875, 3.73779296875, 3.9638671875, 4.18994140625, 4.416015625, 4.64208984375, 4.8681640625, 5.09423828125, 5.3203125, 5.54638671875, 5.7724609375, 5.99853515625, 6.224609375, 6.45068359375, 6.6767578125, 6.90283203125, 7.12890625]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 8.0, 11.0, 20.0, 38.0, 39.0, 59.0, 120.0, 138.0, 220.0, 318.0, 581.0, 985.0, 1612.0, 2624.0, 4177.0, 6874.0, 10952.0, 18170.0, 28486.0, 45229.0, 69643.0, 103279.0, 139800.0, 1195689.0, 151019.0, 109768.0, 75819.0, 48873.0, 31402.0, 19461.0, 12141.0, 7588.0, 4554.0, 2838.0, 1669.0, 1076.0, 665.0, 437.0, 257.0, 169.0, 100.0, 80.0, 48.0, 34.0, 21.0, 12.0, 8.0, 11.0, 5.0, 2.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59619140625, -0.5767059326171875, -0.557220458984375, -0.5377349853515625, -0.51824951171875, -0.4987640380859375, -0.479278564453125, -0.4597930908203125, -0.4403076171875, -0.4208221435546875, -0.401336669921875, -0.3818511962890625, -0.36236572265625, -0.3428802490234375, -0.323394775390625, -0.3039093017578125, -0.284423828125, -0.2649383544921875, -0.245452880859375, -0.2259674072265625, -0.20648193359375, -0.1869964599609375, -0.167510986328125, -0.1480255126953125, -0.1285400390625, -0.1090545654296875, -0.089569091796875, -0.0700836181640625, -0.05059814453125, -0.0311126708984375, -0.011627197265625, 0.0078582763671875, 0.02734375, 0.0468292236328125, 0.066314697265625, 0.0858001708984375, 0.10528564453125, 0.1247711181640625, 0.144256591796875, 0.1637420654296875, 0.1832275390625, 0.2027130126953125, 0.222198486328125, 0.2416839599609375, 0.26116943359375, 0.2806549072265625, 0.300140380859375, 0.3196258544921875, 0.339111328125, 0.3585968017578125, 0.378082275390625, 0.3975677490234375, 0.41705322265625, 0.4365386962890625, 0.456024169921875, 0.4755096435546875, 0.4949951171875, 0.5144805908203125, 0.533966064453125, 0.5534515380859375, 0.57293701171875, 0.5924224853515625, 0.611907958984375, 0.6313934326171875, 0.65087890625]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 2.0, 2.0, 2.0, 6.0, 9.0, 5.0, 11.0, 12.0, 17.0, 14.0, 25.0, 33.0, 39.0, 39.0, 52.0, 54.0, 61.0, 82.0, 68.0, 76.0, 75.0, 53.0, 52.0, 32.0, 29.0, 24.0, 21.0, 14.0, 16.0, 14.0, 10.0, 5.0, 5.0, 12.0, 9.0, 3.0, 5.0, 4.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.016021728515625, -0.0155181884765625, -0.0150146484375, -0.0145111083984375, -0.014007568359375, -0.0135040283203125, -0.01300048828125, -0.0124969482421875, -0.011993408203125, -0.0114898681640625, -0.010986328125, -0.0104827880859375, -0.009979248046875, -0.0094757080078125, -0.00897216796875, -0.0084686279296875, -0.007965087890625, -0.0074615478515625, -0.0069580078125, -0.0064544677734375, -0.005950927734375, -0.0054473876953125, -0.00494384765625, -0.0044403076171875, -0.003936767578125, -0.0034332275390625, -0.0029296875, -0.0024261474609375, -0.001922607421875, -0.0014190673828125, -0.00091552734375, -0.0004119873046875, 9.1552734375e-05, 0.0005950927734375, 0.0010986328125, 0.0016021728515625, 0.002105712890625, 0.0026092529296875, 0.00311279296875, 0.0036163330078125, 0.004119873046875, 0.0046234130859375, 0.005126953125, 0.0056304931640625, 0.006134033203125, 0.0066375732421875, 0.00714111328125, 0.0076446533203125, 0.008148193359375, 0.0086517333984375, 0.0091552734375, 0.0096588134765625, 0.010162353515625, 0.0106658935546875, 0.01116943359375, 0.0116729736328125, 0.012176513671875, 0.0126800537109375, 0.01318359375, 0.0136871337890625, 0.014190673828125, 0.0146942138671875, 0.01519775390625, 0.0157012939453125, 0.016204833984375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 2.0, 6.0, 8.0, 4.0, 11.0, 10.0, 12.0, 8.0, 19.0, 24.0, 30.0, 34.0, 50.0, 60.0, 81.0, 148.0, 241.0, 669.0, 14343.0, 985081.0, 45709.0, 1058.0, 307.0, 171.0, 111.0, 88.0, 65.0, 45.0, 31.0, 24.0, 14.0, 20.0, 16.0, 7.0, 5.0, 5.0, 7.0, 7.0, 4.0, 5.0, 7.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.341064453125, -0.3306083679199219, -0.32015228271484375, -0.3096961975097656, -0.2992401123046875, -0.2887840270996094, -0.27832794189453125, -0.2678718566894531, -0.257415771484375, -0.24695968627929688, -0.23650360107421875, -0.22604751586914062, -0.2155914306640625, -0.20513534545898438, -0.19467926025390625, -0.18422317504882812, -0.17376708984375, -0.16331100463867188, -0.15285491943359375, -0.14239883422851562, -0.1319427490234375, -0.12148666381835938, -0.11103057861328125, -0.10057449340820312, -0.090118408203125, -0.07966232299804688, -0.06920623779296875, -0.058750152587890625, -0.0482940673828125, -0.037837982177734375, -0.02738189697265625, -0.016925811767578125, -0.0064697265625, 0.003986358642578125, 0.01444244384765625, 0.024898529052734375, 0.0353546142578125, 0.045810699462890625, 0.05626678466796875, 0.06672286987304688, 0.077178955078125, 0.08763504028320312, 0.09809112548828125, 0.10854721069335938, 0.1190032958984375, 0.12945938110351562, 0.13991546630859375, 0.15037155151367188, 0.16082763671875, 0.17128372192382812, 0.18173980712890625, 0.19219589233398438, 0.2026519775390625, 0.21310806274414062, 0.22356414794921875, 0.23402023315429688, 0.244476318359375, 0.2549324035644531, 0.26538848876953125, 0.2758445739746094, 0.2863006591796875, 0.2967567443847656, 0.30721282958984375, 0.3176689147949219, 0.328125]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [44.0, 787.0, 184.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007597451563924551, -0.0031124460510909557, 0.0013725594617426395, 0.005857564974576235, 0.010342570021748543, 0.014827575534582138, 0.019312581047415733, 0.023797588422894478, 0.028282592073082924, 0.03276759758591652, 0.037252604961395264, 0.04173760861158371, 0.046222612261772156, 0.0507076196372509, 0.055192627012729645, 0.05967763066291809, 0.06416263431310654, 0.06864763796329498, 0.07313264906406403, 0.07761765271425247, 0.08210265636444092, 0.08658766001462936, 0.09107266366481781, 0.09555767476558685, 0.1000426784157753, 0.10452768206596375, 0.10901269316673279, 0.11349769681692123, 0.11798270046710968, 0.12246770411729813, 0.12695270776748657, 0.13143771886825562, 0.13592272996902466, 0.1404077410697937, 0.14489273726940155, 0.1493777483701706, 0.15386274456977844, 0.15834775567054749, 0.16283276677131653, 0.16731777787208557, 0.17180277407169342, 0.17628778517246246, 0.1807727813720703, 0.18525779247283936, 0.1897428035736084, 0.19422779977321625, 0.1987128108739853, 0.20319780707359314, 0.20768281817436218, 0.21216782927513123, 0.21665282547473907, 0.22113783657550812, 0.22562283277511597, 0.230107843875885, 0.23459285497665405, 0.2390778660774231, 0.24356286227703094, 0.2480478733778, 0.25253286957740784, 0.2570178806781769, 0.2615028917789459, 0.26598790287971497, 0.2704728841781616, 0.27495789527893066, 0.2794429063796997]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 5.0, 3.0, 14.0, 17.0, 13.0, 27.0, 24.0, 21.0, 33.0, 34.0, 52.0, 45.0, 38.0, 48.0, 61.0, 60.0, 46.0, 68.0, 58.0, 42.0, 38.0, 43.0, 38.0, 40.0, 25.0, 27.0, 21.0, 21.0, 10.0, 10.0, 7.0, 4.0, 1.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.014806628227233887, -0.014411944895982742, -0.014017261564731598, -0.013622578233480453, -0.013227894902229309, -0.012833211570978165, -0.01243852823972702, -0.012043844908475876, -0.011649161577224731, -0.011254478245973587, -0.010859794914722443, -0.010465111583471298, -0.010070428252220154, -0.00967574492096901, -0.009281061589717865, -0.00888637825846672, -0.008491694927215576, -0.008097011595964432, -0.007702328264713287, -0.007307644933462143, -0.0069129616022109985, -0.006518278270959854, -0.00612359493970871, -0.005728911608457565, -0.005334228277206421, -0.0049395449459552765, -0.004544861614704132, -0.004150178283452988, -0.0037554949522018433, -0.003360811620950699, -0.0029661282896995544, -0.00257144495844841, -0.0021767616271972656, -0.0017820782959461212, -0.0013873949646949768, -0.0009927116334438324, -0.000598028302192688, -0.00020334497094154358, 0.00019133836030960083, 0.0005860216915607452, 0.0009807050228118896, 0.001375388354063034, 0.0017700716853141785, 0.002164755016565323, 0.0025594383478164673, 0.0029541216790676117, 0.003348805010318756, 0.0037434883415699005, 0.004138171672821045, 0.004532855004072189, 0.004927538335323334, 0.005322221666574478, 0.0057169049978256226, 0.006111588329076767, 0.006506271660327911, 0.006900954991579056, 0.0072956383228302, 0.007690321654081345, 0.008085004985332489, 0.008479688316583633, 0.008874371647834778, 0.009269054979085922, 0.009663738310337067, 0.010058421641588211, 0.010453104972839355]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 9.0, 6.0, 8.0, 7.0, 11.0, 15.0, 15.0, 14.0, 28.0, 27.0, 20.0, 22.0, 29.0, 33.0, 36.0, 30.0, 42.0, 49.0, 36.0, 41.0, 56.0, 39.0, 40.0, 36.0, 33.0, 41.0, 30.0, 34.0, 26.0, 26.0, 18.0, 17.0, 16.0, 18.0, 20.0, 12.0, 12.0, 15.0, 6.0, 6.0, 7.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-13.203125, -12.826416015625, -12.44970703125, -12.072998046875, -11.6962890625, -11.319580078125, -10.94287109375, -10.566162109375, -10.189453125, -9.812744140625, -9.43603515625, -9.059326171875, -8.6826171875, -8.305908203125, -7.92919921875, -7.552490234375, -7.17578125, -6.799072265625, -6.42236328125, -6.045654296875, -5.6689453125, -5.292236328125, -4.91552734375, -4.538818359375, -4.162109375, -3.785400390625, -3.40869140625, -3.031982421875, -2.6552734375, -2.278564453125, -1.90185546875, -1.525146484375, -1.1484375, -0.771728515625, -0.39501953125, -0.018310546875, 0.3583984375, 0.735107421875, 1.11181640625, 1.488525390625, 1.865234375, 2.241943359375, 2.61865234375, 2.995361328125, 3.3720703125, 3.748779296875, 4.12548828125, 4.502197265625, 4.87890625, 5.255615234375, 5.63232421875, 6.009033203125, 6.3857421875, 6.762451171875, 7.13916015625, 7.515869140625, 7.892578125, 8.269287109375, 8.64599609375, 9.022705078125, 9.3994140625, 9.776123046875, 10.15283203125, 10.529541015625, 10.90625]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 7.0, 4.0, 5.0, 7.0, 8.0, 15.0, 14.0, 14.0, 16.0, 29.0, 38.0, 44.0, 60.0, 87.0, 134.0, 147.0, 216.0, 302.0, 442.0, 608.0, 1177.0, 2688.0, 8225.0, 34735.0, 410600.0, 534583.0, 39089.0, 8974.0, 2857.0, 1201.0, 684.0, 391.0, 307.0, 189.0, 163.0, 107.0, 77.0, 72.0, 55.0, 46.0, 37.0, 30.0, 23.0, 11.0, 12.0, 12.0, 6.0, 5.0, 5.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-32.9375, -31.997314453125, -31.05712890625, -30.116943359375, -29.1767578125, -28.236572265625, -27.29638671875, -26.356201171875, -25.416015625, -24.475830078125, -23.53564453125, -22.595458984375, -21.6552734375, -20.715087890625, -19.77490234375, -18.834716796875, -17.89453125, -16.954345703125, -16.01416015625, -15.073974609375, -14.1337890625, -13.193603515625, -12.25341796875, -11.313232421875, -10.373046875, -9.432861328125, -8.49267578125, -7.552490234375, -6.6123046875, -5.672119140625, -4.73193359375, -3.791748046875, -2.8515625, -1.911376953125, -0.97119140625, -0.031005859375, 0.9091796875, 1.849365234375, 2.78955078125, 3.729736328125, 4.669921875, 5.610107421875, 6.55029296875, 7.490478515625, 8.4306640625, 9.370849609375, 10.31103515625, 11.251220703125, 12.19140625, 13.131591796875, 14.07177734375, 15.011962890625, 15.9521484375, 16.892333984375, 17.83251953125, 18.772705078125, 19.712890625, 20.653076171875, 21.59326171875, 22.533447265625, 23.4736328125, 24.413818359375, 25.35400390625, 26.294189453125, 27.234375]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 6.0, 8.0, 3.0, 8.0, 12.0, 10.0, 16.0, 18.0, 15.0, 19.0, 33.0, 18.0, 31.0, 45.0, 37.0, 37.0, 36.0, 61.0, 75.0, 285.0, 1661.0, 137.0, 78.0, 57.0, 34.0, 51.0, 36.0, 34.0, 28.0, 20.0, 32.0, 11.0, 14.0, 11.0, 15.0, 16.0, 9.0, 7.0, 7.0, 5.0, 0.0, 7.0, 2.0, 1.0, 4.0, 0.0, 2.0, 1.0], "bins": [-39.40625, -38.319091796875, -37.23193359375, -36.144775390625, -35.0576171875, -33.970458984375, -32.88330078125, -31.796142578125, -30.708984375, -29.621826171875, -28.53466796875, -27.447509765625, -26.3603515625, -25.273193359375, -24.18603515625, -23.098876953125, -22.01171875, -20.924560546875, -19.83740234375, -18.750244140625, -17.6630859375, -16.575927734375, -15.48876953125, -14.401611328125, -13.314453125, -12.227294921875, -11.14013671875, -10.052978515625, -8.9658203125, -7.878662109375, -6.79150390625, -5.704345703125, -4.6171875, -3.530029296875, -2.44287109375, -1.355712890625, -0.2685546875, 0.818603515625, 1.90576171875, 2.992919921875, 4.080078125, 5.167236328125, 6.25439453125, 7.341552734375, 8.4287109375, 9.515869140625, 10.60302734375, 11.690185546875, 12.77734375, 13.864501953125, 14.95166015625, 16.038818359375, 17.1259765625, 18.213134765625, 19.30029296875, 20.387451171875, 21.474609375, 22.561767578125, 23.64892578125, 24.736083984375, 25.8232421875, 26.910400390625, 27.99755859375, 29.084716796875, 30.171875]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 4.0, 5.0, 4.0, 1.0, 7.0, 8.0, 4.0, 10.0, 16.0, 15.0, 14.0, 18.0, 15.0, 42.0, 24.0, 35.0, 51.0, 77.0, 102.0, 149.0, 314.0, 956.0, 22321.0, 3098405.0, 21296.0, 949.0, 295.0, 132.0, 92.0, 67.0, 46.0, 43.0, 37.0, 32.0, 16.0, 20.0, 22.0, 16.0, 8.0, 8.0, 7.0, 7.0, 8.0, 5.0, 4.0, 3.0, 3.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-82.1875, -79.1982421875, -76.208984375, -73.2197265625, -70.23046875, -67.2412109375, -64.251953125, -61.2626953125, -58.2734375, -55.2841796875, -52.294921875, -49.3056640625, -46.31640625, -43.3271484375, -40.337890625, -37.3486328125, -34.359375, -31.3701171875, -28.380859375, -25.3916015625, -22.40234375, -19.4130859375, -16.423828125, -13.4345703125, -10.4453125, -7.4560546875, -4.466796875, -1.4775390625, 1.51171875, 4.5009765625, 7.490234375, 10.4794921875, 13.46875, 16.4580078125, 19.447265625, 22.4365234375, 25.42578125, 28.4150390625, 31.404296875, 34.3935546875, 37.3828125, 40.3720703125, 43.361328125, 46.3505859375, 49.33984375, 52.3291015625, 55.318359375, 58.3076171875, 61.296875, 64.2861328125, 67.275390625, 70.2646484375, 73.25390625, 76.2431640625, 79.232421875, 82.2216796875, 85.2109375, 88.2001953125, 91.189453125, 94.1787109375, 97.16796875, 100.1572265625, 103.146484375, 106.1357421875, 109.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 23.0, 354.0, 555.0, 80.0, 6.0], "bins": [-367.01678466796875, -361.0179443359375, -355.01910400390625, -349.0202331542969, -343.0213928222656, -337.0225524902344, -331.0237121582031, -325.0248718261719, -319.0260009765625, -313.02716064453125, -307.0283203125, -301.0294494628906, -295.0306091308594, -289.0317687988281, -283.0329284667969, -277.0340881347656, -271.0352478027344, -265.0364074707031, -259.0375671386719, -253.03871154785156, -247.03985595703125, -241.041015625, -235.04217529296875, -229.04331970214844, -223.04446411132812, -217.04562377929688, -211.04676818847656, -205.0479278564453, -199.049072265625, -193.05023193359375, -187.0513916015625, -181.0525360107422, -175.0537109375, -169.05487060546875, -163.05601501464844, -157.0571746826172, -151.05831909179688, -145.05947875976562, -139.06063842773438, -133.06178283691406, -127.06293487548828, -121.0640869140625, -115.06523895263672, -109.06639099121094, -103.06755065917969, -97.0687026977539, -91.06985473632812, -85.07101440429688, -79.07215881347656, -73.07331085205078, -67.074462890625, -61.075618743896484, -55.07677459716797, -49.07792663574219, -43.079078674316406, -37.08023452758789, -31.081388473510742, -25.082542419433594, -19.083694458007812, -13.084848403930664, -7.086002349853516, -1.0871562957763672, 4.911691665649414, 10.91053581237793, 16.90938377380371]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 4.0, 4.0, 7.0, 5.0, 15.0, 4.0, 7.0, 8.0, 13.0, 24.0, 26.0, 30.0, 29.0, 31.0, 35.0, 36.0, 50.0, 30.0, 45.0, 39.0, 32.0, 54.0, 57.0, 38.0, 30.0, 37.0, 34.0, 36.0, 26.0, 29.0, 27.0, 32.0, 28.0, 24.0, 19.0, 12.0, 14.0, 6.0, 7.0, 7.0, 8.0, 2.0, 5.0, 5.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-81.04960632324219, -78.2597427368164, -75.46987915039062, -72.68001556396484, -69.89015197753906, -67.10028839111328, -64.3104248046875, -61.520565032958984, -58.7307014465332, -55.94083786010742, -53.15097427368164, -50.361114501953125, -47.571250915527344, -44.78138732910156, -41.99152374267578, -39.20166015625, -36.41179656982422, -33.62193298339844, -30.832069396972656, -28.042207717895508, -25.252344131469727, -22.462480545043945, -19.672618865966797, -16.882755279541016, -14.092891693115234, -11.303028106689453, -8.513165473937988, -5.723302841186523, -2.933439254760742, -0.14357566833496094, 2.6462860107421875, 5.436149597167969, 8.22601318359375, 11.015876770019531, 13.805739402770996, 16.59560203552246, 19.385465621948242, 22.175329208374023, 24.965190887451172, 27.755054473876953, 30.544918060302734, 33.334781646728516, 36.1246452331543, 38.91450500488281, 41.704368591308594, 44.494232177734375, 47.284095764160156, 50.07395935058594, 52.86382293701172, 55.6536865234375, 58.44355010986328, 61.23341369628906, 64.02327728271484, 66.81314086914062, 69.60299682617188, 72.39286804199219, 75.18272399902344, 77.97258758544922, 80.762451171875, 83.55231475830078, 86.34217834472656, 89.13204193115234, 91.92190551757812, 94.71176147460938, 97.50163269042969]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 8.0, 7.0, 11.0, 7.0, 6.0, 18.0, 12.0, 21.0, 21.0, 19.0, 20.0, 25.0, 23.0, 29.0, 29.0, 45.0, 46.0, 39.0, 33.0, 53.0, 33.0, 43.0, 45.0, 46.0, 35.0, 37.0, 30.0, 35.0, 29.0, 29.0, 19.0, 20.0, 20.0, 20.0, 13.0, 16.0, 10.0, 7.0, 14.0, 12.0, 6.0, 1.0, 2.0, 5.0, 0.0, 2.0, 2.0, 0.0, 3.0], "bins": [-13.71875, -13.333740234375, -12.94873046875, -12.563720703125, -12.1787109375, -11.793701171875, -11.40869140625, -11.023681640625, -10.638671875, -10.253662109375, -9.86865234375, -9.483642578125, -9.0986328125, -8.713623046875, -8.32861328125, -7.943603515625, -7.55859375, -7.173583984375, -6.78857421875, -6.403564453125, -6.0185546875, -5.633544921875, -5.24853515625, -4.863525390625, -4.478515625, -4.093505859375, -3.70849609375, -3.323486328125, -2.9384765625, -2.553466796875, -2.16845703125, -1.783447265625, -1.3984375, -1.013427734375, -0.62841796875, -0.243408203125, 0.1416015625, 0.526611328125, 0.91162109375, 1.296630859375, 1.681640625, 2.066650390625, 2.45166015625, 2.836669921875, 3.2216796875, 3.606689453125, 3.99169921875, 4.376708984375, 4.76171875, 5.146728515625, 5.53173828125, 5.916748046875, 6.3017578125, 6.686767578125, 7.07177734375, 7.456787109375, 7.841796875, 8.226806640625, 8.61181640625, 8.996826171875, 9.3818359375, 9.766845703125, 10.15185546875, 10.536865234375, 10.921875]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 3.0, 4.0, 9.0, 17.0, 18.0, 17.0, 18.0, 22.0, 42.0, 47.0, 80.0, 96.0, 124.0, 197.0, 326.0, 448.0, 765.0, 1243.0, 2285.0, 4802.0, 10500.0, 28105.0, 93399.0, 406766.0, 1473039.0, 1561323.0, 450829.0, 106752.0, 30732.0, 11321.0, 4928.0, 2471.0, 1315.0, 783.0, 461.0, 313.0, 205.0, 119.0, 97.0, 52.0, 54.0, 29.0, 23.0, 21.0, 19.0, 20.0, 14.0, 4.0, 12.0, 6.0, 5.0, 1.0, 4.0, 3.0], "bins": [-17.890625, -17.3800048828125, -16.869384765625, -16.3587646484375, -15.84814453125, -15.3375244140625, -14.826904296875, -14.3162841796875, -13.8056640625, -13.2950439453125, -12.784423828125, -12.2738037109375, -11.76318359375, -11.2525634765625, -10.741943359375, -10.2313232421875, -9.720703125, -9.2100830078125, -8.699462890625, -8.1888427734375, -7.67822265625, -7.1676025390625, -6.656982421875, -6.1463623046875, -5.6357421875, -5.1251220703125, -4.614501953125, -4.1038818359375, -3.59326171875, -3.0826416015625, -2.572021484375, -2.0614013671875, -1.55078125, -1.0401611328125, -0.529541015625, -0.0189208984375, 0.49169921875, 1.0023193359375, 1.512939453125, 2.0235595703125, 2.5341796875, 3.0447998046875, 3.555419921875, 4.0660400390625, 4.57666015625, 5.0872802734375, 5.597900390625, 6.1085205078125, 6.619140625, 7.1297607421875, 7.640380859375, 8.1510009765625, 8.66162109375, 9.1722412109375, 9.682861328125, 10.1934814453125, 10.7041015625, 11.2147216796875, 11.725341796875, 12.2359619140625, 12.74658203125, 13.2572021484375, 13.767822265625, 14.2784423828125, 14.7890625]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 1.0, 4.0, 4.0, 4.0, 4.0, 8.0, 6.0, 18.0, 24.0, 24.0, 47.0, 59.0, 67.0, 85.0, 115.0, 132.0, 201.0, 248.0, 353.0, 399.0, 426.0, 378.0, 328.0, 277.0, 209.0, 156.0, 130.0, 86.0, 56.0, 54.0, 38.0, 33.0, 30.0, 13.0, 19.0, 10.0, 9.0, 6.0, 6.0, 4.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-12.265625, -11.744140625, -11.22265625, -10.701171875, -10.1796875, -9.658203125, -9.13671875, -8.615234375, -8.09375, -7.572265625, -7.05078125, -6.529296875, -6.0078125, -5.486328125, -4.96484375, -4.443359375, -3.921875, -3.400390625, -2.87890625, -2.357421875, -1.8359375, -1.314453125, -0.79296875, -0.271484375, 0.25, 0.771484375, 1.29296875, 1.814453125, 2.3359375, 2.857421875, 3.37890625, 3.900390625, 4.421875, 4.943359375, 5.46484375, 5.986328125, 6.5078125, 7.029296875, 7.55078125, 8.072265625, 8.59375, 9.115234375, 9.63671875, 10.158203125, 10.6796875, 11.201171875, 11.72265625, 12.244140625, 12.765625, 13.287109375, 13.80859375, 14.330078125, 14.8515625, 15.373046875, 15.89453125, 16.416015625, 16.9375, 17.458984375, 17.98046875, 18.501953125, 19.0234375, 19.544921875, 20.06640625, 20.587890625, 21.109375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 8.0, 15.0, 20.0, 29.0, 34.0, 35.0, 53.0, 82.0, 101.0, 151.0, 239.0, 403.0, 805.0, 2849.0, 19451.0, 296060.0, 3511682.0, 335701.0, 21488.0, 2987.0, 881.0, 398.0, 220.0, 181.0, 110.0, 76.0, 67.0, 36.0, 29.0, 16.0, 19.0, 10.0, 12.0, 8.0, 6.0, 3.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-52.0, -50.40087890625, -48.8017578125, -47.20263671875, -45.603515625, -44.00439453125, -42.4052734375, -40.80615234375, -39.20703125, -37.60791015625, -36.0087890625, -34.40966796875, -32.810546875, -31.21142578125, -29.6123046875, -28.01318359375, -26.4140625, -24.81494140625, -23.2158203125, -21.61669921875, -20.017578125, -18.41845703125, -16.8193359375, -15.22021484375, -13.62109375, -12.02197265625, -10.4228515625, -8.82373046875, -7.224609375, -5.62548828125, -4.0263671875, -2.42724609375, -0.828125, 0.77099609375, 2.3701171875, 3.96923828125, 5.568359375, 7.16748046875, 8.7666015625, 10.36572265625, 11.96484375, 13.56396484375, 15.1630859375, 16.76220703125, 18.361328125, 19.96044921875, 21.5595703125, 23.15869140625, 24.7578125, 26.35693359375, 27.9560546875, 29.55517578125, 31.154296875, 32.75341796875, 34.3525390625, 35.95166015625, 37.55078125, 39.14990234375, 40.7490234375, 42.34814453125, 43.947265625, 45.54638671875, 47.1455078125, 48.74462890625, 50.34375]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 20.0, 235.0, 554.0, 187.0, 15.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-687.9711303710938, -675.7628173828125, -663.5545043945312, -651.34619140625, -639.1378784179688, -626.9295654296875, -614.7212524414062, -602.512939453125, -590.3046264648438, -578.0963134765625, -565.8880004882812, -553.6796875, -541.4713745117188, -529.2630615234375, -517.0547485351562, -504.846435546875, -492.63812255859375, -480.4298095703125, -468.22149658203125, -456.01318359375, -443.80487060546875, -431.5965576171875, -419.38824462890625, -407.179931640625, -394.9715881347656, -382.7632751464844, -370.5549621582031, -358.3466491699219, -346.1383361816406, -333.9300231933594, -321.7217102050781, -309.5133972167969, -297.3050537109375, -285.09674072265625, -272.888427734375, -260.68011474609375, -248.4718017578125, -236.26348876953125, -224.05517578125, -211.84686279296875, -199.6385498046875, -187.43023681640625, -175.221923828125, -163.01361083984375, -150.8052978515625, -138.59698486328125, -126.38866424560547, -114.18035125732422, -101.97203826904297, -89.76372528076172, -77.55541229248047, -65.34709167480469, -53.1387825012207, -40.93046951293945, -28.722152709960938, -16.513839721679688, -4.3055267333984375, 7.902787208557129, 20.111101150512695, 32.31941604614258, 44.52772903442383, 56.73604202270508, 68.9443588256836, 81.15267181396484, 93.3609848022461]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 5.0, 0.0, 7.0, 5.0, 14.0, 11.0, 6.0, 17.0, 11.0, 14.0, 15.0, 15.0, 25.0, 17.0, 41.0, 24.0, 36.0, 31.0, 34.0, 34.0, 42.0, 43.0, 38.0, 29.0, 42.0, 40.0, 39.0, 30.0, 44.0, 35.0, 32.0, 30.0, 19.0, 27.0, 24.0, 12.0, 14.0, 10.0, 21.0, 11.0, 14.0, 12.0, 9.0, 8.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0], "bins": [-61.10502624511719, -59.18751525878906, -57.27000045776367, -55.35248947143555, -53.434974670410156, -51.51746368408203, -49.599952697753906, -47.682437896728516, -45.764923095703125, -43.847412109375, -41.92989730834961, -40.012386322021484, -38.094871520996094, -36.17736053466797, -34.259849548339844, -32.34233474731445, -30.424823760986328, -28.50731086730957, -26.589797973632812, -24.672286987304688, -22.754772186279297, -20.837261199951172, -18.919748306274414, -17.002235412597656, -15.084722518920898, -13.16720962524414, -11.249696731567383, -9.332184791564941, -7.414671897888184, -5.497159004211426, -3.5796470642089844, -1.6621341705322266, 0.25537872314453125, 2.17289137840271, 4.090404033660889, 6.007916450500488, 7.925429344177246, 9.842942237854004, 11.760454177856445, 13.677967071533203, 15.595479965209961, 17.51299285888672, 19.430505752563477, 21.348018646240234, 23.26552963256836, 25.18304443359375, 27.100555419921875, 29.018068313598633, 30.93558120727539, 32.853092193603516, 34.770606994628906, 36.68811798095703, 38.60563278198242, 40.52314376831055, 42.44065856933594, 44.35816955566406, 46.27568054199219, 48.19319152832031, 50.1107063293457, 52.02821731567383, 53.94573211669922, 55.863243103027344, 57.78075408935547, 59.69826889038086, 61.61578369140625]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 4.0, 0.0, 7.0, 5.0, 6.0, 11.0, 12.0, 12.0, 15.0, 19.0, 18.0, 33.0, 18.0, 23.0, 30.0, 32.0, 44.0, 32.0, 46.0, 36.0, 39.0, 52.0, 48.0, 31.0, 42.0, 36.0, 29.0, 39.0, 33.0, 26.0, 23.0, 32.0, 24.0, 26.0, 20.0, 19.0, 19.0, 18.0, 17.0, 4.0, 6.0, 8.0, 4.0, 2.0, 3.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.109375, -11.7120361328125, -11.314697265625, -10.9173583984375, -10.52001953125, -10.1226806640625, -9.725341796875, -9.3280029296875, -8.9306640625, -8.5333251953125, -8.135986328125, -7.7386474609375, -7.34130859375, -6.9439697265625, -6.546630859375, -6.1492919921875, -5.751953125, -5.3546142578125, -4.957275390625, -4.5599365234375, -4.16259765625, -3.7652587890625, -3.367919921875, -2.9705810546875, -2.5732421875, -2.1759033203125, -1.778564453125, -1.3812255859375, -0.98388671875, -0.5865478515625, -0.189208984375, 0.2081298828125, 0.60546875, 1.0028076171875, 1.400146484375, 1.7974853515625, 2.19482421875, 2.5921630859375, 2.989501953125, 3.3868408203125, 3.7841796875, 4.1815185546875, 4.578857421875, 4.9761962890625, 5.37353515625, 5.7708740234375, 6.168212890625, 6.5655517578125, 6.962890625, 7.3602294921875, 7.757568359375, 8.1549072265625, 8.55224609375, 8.9495849609375, 9.346923828125, 9.7442626953125, 10.1416015625, 10.5389404296875, 10.936279296875, 11.3336181640625, 11.73095703125, 12.1282958984375, 12.525634765625, 12.9229736328125, 13.3203125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 2.0, 4.0, 8.0, 20.0, 15.0, 32.0, 59.0, 73.0, 113.0, 151.0, 213.0, 329.0, 464.0, 667.0, 1016.0, 1528.0, 2156.0, 3040.0, 4775.0, 6824.0, 10245.0, 15372.0, 23140.0, 34889.0, 51994.0, 76524.0, 106587.0, 136150.0, 147000.0, 126619.0, 95173.0, 66710.0, 45156.0, 29931.0, 20370.0, 13392.0, 9096.0, 6021.0, 3972.0, 2743.0, 1928.0, 1232.0, 860.0, 621.0, 437.0, 306.0, 223.0, 136.0, 85.0, 55.0, 48.0, 24.0, 13.0, 9.0, 2.0, 5.0, 5.0, 3.0, 2.0, 1.0], "bins": [-0.7548828125, -0.7315292358398438, -0.7081756591796875, -0.6848220825195312, -0.661468505859375, -0.6381149291992188, -0.6147613525390625, -0.5914077758789062, -0.56805419921875, -0.5447006225585938, -0.5213470458984375, -0.49799346923828125, -0.474639892578125, -0.45128631591796875, -0.4279327392578125, -0.40457916259765625, -0.3812255859375, -0.35787200927734375, -0.3345184326171875, -0.31116485595703125, -0.287811279296875, -0.26445770263671875, -0.2411041259765625, -0.21775054931640625, -0.19439697265625, -0.17104339599609375, -0.1476898193359375, -0.12433624267578125, -0.100982666015625, -0.07762908935546875, -0.0542755126953125, -0.03092193603515625, -0.007568359375, 0.01578521728515625, 0.0391387939453125, 0.06249237060546875, 0.085845947265625, 0.10919952392578125, 0.1325531005859375, 0.15590667724609375, 0.17926025390625, 0.20261383056640625, 0.2259674072265625, 0.24932098388671875, 0.272674560546875, 0.29602813720703125, 0.3193817138671875, 0.34273529052734375, 0.3660888671875, 0.38944244384765625, 0.4127960205078125, 0.43614959716796875, 0.459503173828125, 0.48285675048828125, 0.5062103271484375, 0.5295639038085938, 0.55291748046875, 0.5762710571289062, 0.5996246337890625, 0.6229782104492188, 0.646331787109375, 0.6696853637695312, 0.6930389404296875, 0.7163925170898438, 0.73974609375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 7.0, 6.0, 8.0, 2.0, 8.0, 6.0, 11.0, 11.0, 12.0, 17.0, 24.0, 21.0, 29.0, 36.0, 35.0, 27.0, 30.0, 27.0, 41.0, 44.0, 39.0, 38.0, 1075.0, 41.0, 38.0, 57.0, 33.0, 36.0, 20.0, 45.0, 23.0, 30.0, 27.0, 23.0, 22.0, 23.0, 8.0, 17.0, 5.0, 4.0, 8.0, 10.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-8.53125, -8.269775390625, -8.00830078125, -7.746826171875, -7.4853515625, -7.223876953125, -6.96240234375, -6.700927734375, -6.439453125, -6.177978515625, -5.91650390625, -5.655029296875, -5.3935546875, -5.132080078125, -4.87060546875, -4.609130859375, -4.34765625, -4.086181640625, -3.82470703125, -3.563232421875, -3.3017578125, -3.040283203125, -2.77880859375, -2.517333984375, -2.255859375, -1.994384765625, -1.73291015625, -1.471435546875, -1.2099609375, -0.948486328125, -0.68701171875, -0.425537109375, -0.1640625, 0.097412109375, 0.35888671875, 0.620361328125, 0.8818359375, 1.143310546875, 1.40478515625, 1.666259765625, 1.927734375, 2.189208984375, 2.45068359375, 2.712158203125, 2.9736328125, 3.235107421875, 3.49658203125, 3.758056640625, 4.01953125, 4.281005859375, 4.54248046875, 4.803955078125, 5.0654296875, 5.326904296875, 5.58837890625, 5.849853515625, 6.111328125, 6.372802734375, 6.63427734375, 6.895751953125, 7.1572265625, 7.418701171875, 7.68017578125, 7.941650390625, 8.203125]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 6.0, 10.0, 11.0, 18.0, 18.0, 42.0, 42.0, 64.0, 120.0, 189.0, 273.0, 415.0, 708.0, 1165.0, 1905.0, 3087.0, 5387.0, 8808.0, 15093.0, 25300.0, 42376.0, 69904.0, 108399.0, 159896.0, 1205436.0, 161385.0, 109472.0, 70650.0, 43246.0, 25881.0, 15203.0, 8982.0, 5373.0, 3277.0, 1952.0, 1189.0, 644.0, 422.0, 302.0, 165.0, 111.0, 72.0, 48.0, 27.0, 26.0, 15.0, 7.0, 7.0, 1.0, 5.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.70556640625, -0.6831741333007812, -0.6607818603515625, -0.6383895874023438, -0.615997314453125, -0.5936050415039062, -0.5712127685546875, -0.5488204956054688, -0.52642822265625, -0.5040359497070312, -0.4816436767578125, -0.45925140380859375, -0.436859130859375, -0.41446685791015625, -0.3920745849609375, -0.36968231201171875, -0.3472900390625, -0.32489776611328125, -0.3025054931640625, -0.28011322021484375, -0.257720947265625, -0.23532867431640625, -0.2129364013671875, -0.19054412841796875, -0.16815185546875, -0.14575958251953125, -0.1233673095703125, -0.10097503662109375, -0.078582763671875, -0.05619049072265625, -0.0337982177734375, -0.01140594482421875, 0.010986328125, 0.03337860107421875, 0.0557708740234375, 0.07816314697265625, 0.100555419921875, 0.12294769287109375, 0.1453399658203125, 0.16773223876953125, 0.19012451171875, 0.21251678466796875, 0.2349090576171875, 0.25730133056640625, 0.279693603515625, 0.30208587646484375, 0.3244781494140625, 0.34687042236328125, 0.3692626953125, 0.39165496826171875, 0.4140472412109375, 0.43643951416015625, 0.458831787109375, 0.48122406005859375, 0.5036163330078125, 0.5260086059570312, 0.54840087890625, 0.5707931518554688, 0.5931854248046875, 0.6155776977539062, 0.637969970703125, 0.6603622436523438, 0.6827545166015625, 0.7051467895507812, 0.7275390625]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 0.0, 2.0, 3.0, 8.0, 3.0, 10.0, 3.0, 5.0, 5.0, 8.0, 6.0, 10.0, 10.0, 9.0, 15.0, 23.0, 20.0, 26.0, 26.0, 34.0, 44.0, 38.0, 57.0, 74.0, 79.0, 76.0, 65.0, 53.0, 42.0, 32.0, 25.0, 26.0, 28.0, 18.0, 15.0, 20.0, 11.0, 16.0, 14.0, 6.0, 8.0, 11.0, 5.0, 6.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 0.0, 1.0], "bins": [-0.0266265869140625, -0.025838851928710938, -0.025051116943359375, -0.024263381958007812, -0.02347564697265625, -0.022687911987304688, -0.021900177001953125, -0.021112442016601562, -0.02032470703125, -0.019536972045898438, -0.018749237060546875, -0.017961502075195312, -0.01717376708984375, -0.016386032104492188, -0.015598297119140625, -0.014810562133789062, -0.0140228271484375, -0.013235092163085938, -0.012447357177734375, -0.011659622192382812, -0.01087188720703125, -0.010084152221679688, -0.009296417236328125, -0.008508682250976562, -0.007720947265625, -0.0069332122802734375, -0.006145477294921875, -0.0053577423095703125, -0.00457000732421875, -0.0037822723388671875, -0.002994537353515625, -0.0022068023681640625, -0.0014190673828125, -0.0006313323974609375, 0.000156402587890625, 0.0009441375732421875, 0.00173187255859375, 0.0025196075439453125, 0.003307342529296875, 0.0040950775146484375, 0.0048828125, 0.0056705474853515625, 0.006458282470703125, 0.0072460174560546875, 0.00803375244140625, 0.008821487426757812, 0.009609222412109375, 0.010396957397460938, 0.0111846923828125, 0.011972427368164062, 0.012760162353515625, 0.013547897338867188, 0.01433563232421875, 0.015123367309570312, 0.015911102294921875, 0.016698837280273438, 0.017486572265625, 0.018274307250976562, 0.019062042236328125, 0.019849777221679688, 0.02063751220703125, 0.021425247192382812, 0.022212982177734375, 0.023000717163085938, 0.0237884521484375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 5.0, 0.0, 6.0, 8.0, 10.0, 9.0, 16.0, 10.0, 16.0, 31.0, 20.0, 24.0, 30.0, 39.0, 64.0, 70.0, 101.0, 127.0, 191.0, 312.0, 588.0, 5432.0, 1000605.0, 38767.0, 834.0, 387.0, 210.0, 167.0, 115.0, 81.0, 44.0, 42.0, 40.0, 20.0, 21.0, 17.0, 8.0, 16.0, 20.0, 9.0, 8.0, 12.0, 7.0, 5.0, 5.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.468505859375, -0.4527168273925781, -0.43692779541015625, -0.4211387634277344, -0.4053497314453125, -0.3895606994628906, -0.37377166748046875, -0.3579826354980469, -0.342193603515625, -0.3264045715332031, -0.31061553955078125, -0.2948265075683594, -0.2790374755859375, -0.2632484436035156, -0.24745941162109375, -0.23167037963867188, -0.21588134765625, -0.20009231567382812, -0.18430328369140625, -0.16851425170898438, -0.1527252197265625, -0.13693618774414062, -0.12114715576171875, -0.10535812377929688, -0.089569091796875, -0.07378005981445312, -0.05799102783203125, -0.042201995849609375, -0.0264129638671875, -0.010623931884765625, 0.00516510009765625, 0.020954132080078125, 0.0367431640625, 0.052532196044921875, 0.06832122802734375, 0.08411026000976562, 0.0998992919921875, 0.11568832397460938, 0.13147735595703125, 0.14726638793945312, 0.163055419921875, 0.17884445190429688, 0.19463348388671875, 0.21042251586914062, 0.2262115478515625, 0.24200057983398438, 0.25778961181640625, 0.2735786437988281, 0.28936767578125, 0.3051567077636719, 0.32094573974609375, 0.3367347717285156, 0.3525238037109375, 0.3683128356933594, 0.38410186767578125, 0.3998908996582031, 0.415679931640625, 0.4314689636230469, 0.44725799560546875, 0.4630470275878906, 0.4788360595703125, 0.4946250915527344, 0.5104141235351562, 0.5262031555175781, 0.5419921875]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [327.0, 690.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.014319967478513718, -0.0008359970524907112, 0.012647973373532295, 0.026131942868232727, 0.03961591422557831, 0.05309988185763359, 0.06658385694026947, 0.08006782829761505, 0.09355179965496063, 0.10703577101230621, 0.1205197423696518, 0.13400371372699738, 0.14748768508434296, 0.16097164154052734, 0.17445561289787292, 0.1879395842552185, 0.2014235556125641, 0.21490752696990967, 0.22839149832725525, 0.24187546968460083, 0.2553594410419464, 0.268843412399292, 0.2823273837566376, 0.29581135511398315, 0.30929532647132874, 0.3227792978286743, 0.3362632691860199, 0.3497472405433655, 0.36323121190071106, 0.37671518325805664, 0.3901991546154022, 0.4036831259727478, 0.4171670973300934, 0.43065106868743896, 0.44413504004478455, 0.4576190114021301, 0.4711029827594757, 0.4845869541168213, 0.49807092547416687, 0.5115548968315125, 0.5250388383865356, 0.5385227799415588, 0.5520067811012268, 0.56549072265625, 0.578974723815918, 0.5924586653709412, 0.6059426665306091, 0.6194266080856323, 0.6329106092453003, 0.6463945508003235, 0.6598785519599915, 0.6733624935150146, 0.6868464946746826, 0.7003304362297058, 0.7138144373893738, 0.727298378944397, 0.7407823801040649, 0.7542663216590881, 0.7677503228187561, 0.7812342643737793, 0.7947182655334473, 0.8082022070884705, 0.8216862082481384, 0.8351701498031616, 0.8486541509628296]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 8.0, 9.0, 18.0, 22.0, 25.0, 37.0, 42.0, 53.0, 69.0, 79.0, 86.0, 94.0, 88.0, 60.0, 70.0, 60.0, 47.0, 39.0, 34.0, 21.0, 21.0, 12.0, 9.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.039733052253723145, -0.038698434829711914, -0.037663817405700684, -0.036629196256399155, -0.035594578832387924, -0.034559961408376694, -0.033525340259075165, -0.032490722835063934, -0.031456105411052704, -0.030421487987041473, -0.029386868700385094, -0.028352249413728714, -0.027317631989717484, -0.026283014565706253, -0.025248395279049873, -0.024213775992393494, -0.023179158568382263, -0.022144541144371033, -0.021109921857714653, -0.020075302571058273, -0.019040685147047043, -0.018006067723035812, -0.016971448436379433, -0.015936829149723053, -0.014902211725711823, -0.013867593370378017, -0.012832975015044212, -0.011798356659710407, -0.010763738304376602, -0.009729119949042797, -0.008694501593708992, -0.007659883238375187, -0.006625264883041382, -0.005590646527707577, -0.004556028172373772, -0.0035214098170399666, -0.0024867914617061615, -0.0014521731063723564, -0.00041755475103855133, 0.0006170636042952538, 0.0016516819596290588, 0.002686300314962864, 0.003720918670296669, 0.004755537025630474, 0.005790155380964279, 0.006824773736298084, 0.00785939209163189, 0.008894010446965694, 0.0099286288022995, 0.010963247157633305, 0.01199786551296711, 0.013032483868300915, 0.01406710222363472, 0.015101720578968525, 0.01613633893430233, 0.01717095822095871, 0.01820557564496994, 0.01924019306898117, 0.02027481235563755, 0.02130943164229393, 0.02234404906630516, 0.02337866649031639, 0.02441328577697277, 0.02544790506362915, 0.02648252248764038]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 4.0, 0.0, 7.0, 5.0, 6.0, 10.0, 13.0, 11.0, 16.0, 19.0, 18.0, 33.0, 18.0, 23.0, 30.0, 31.0, 45.0, 32.0, 46.0, 35.0, 40.0, 51.0, 49.0, 30.0, 42.0, 37.0, 29.0, 39.0, 33.0, 26.0, 23.0, 32.0, 23.0, 27.0, 20.0, 19.0, 19.0, 18.0, 17.0, 4.0, 6.0, 8.0, 4.0, 2.0, 3.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.109375, -11.712158203125, -11.31494140625, -10.917724609375, -10.5205078125, -10.123291015625, -9.72607421875, -9.328857421875, -8.931640625, -8.534423828125, -8.13720703125, -7.739990234375, -7.3427734375, -6.945556640625, -6.54833984375, -6.151123046875, -5.75390625, -5.356689453125, -4.95947265625, -4.562255859375, -4.1650390625, -3.767822265625, -3.37060546875, -2.973388671875, -2.576171875, -2.178955078125, -1.78173828125, -1.384521484375, -0.9873046875, -0.590087890625, -0.19287109375, 0.204345703125, 0.6015625, 0.998779296875, 1.39599609375, 1.793212890625, 2.1904296875, 2.587646484375, 2.98486328125, 3.382080078125, 3.779296875, 4.176513671875, 4.57373046875, 4.970947265625, 5.3681640625, 5.765380859375, 6.16259765625, 6.559814453125, 6.95703125, 7.354248046875, 7.75146484375, 8.148681640625, 8.5458984375, 8.943115234375, 9.34033203125, 9.737548828125, 10.134765625, 10.531982421875, 10.92919921875, 11.326416015625, 11.7236328125, 12.120849609375, 12.51806640625, 12.915283203125, 13.3125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 4.0, 3.0, 6.0, 3.0, 13.0, 8.0, 12.0, 19.0, 24.0, 37.0, 49.0, 52.0, 75.0, 88.0, 147.0, 234.0, 310.0, 483.0, 893.0, 1596.0, 3018.0, 6179.0, 12802.0, 28352.0, 65796.0, 168410.0, 424052.0, 198642.0, 75742.0, 32274.0, 14606.0, 6871.0, 3385.0, 1797.0, 944.0, 541.0, 311.0, 230.0, 143.0, 80.0, 78.0, 66.0, 56.0, 35.0, 22.0, 24.0, 21.0, 10.0, 4.0, 10.0, 7.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-15.3828125, -14.8797607421875, -14.376708984375, -13.8736572265625, -13.37060546875, -12.8675537109375, -12.364501953125, -11.8614501953125, -11.3583984375, -10.8553466796875, -10.352294921875, -9.8492431640625, -9.34619140625, -8.8431396484375, -8.340087890625, -7.8370361328125, -7.333984375, -6.8309326171875, -6.327880859375, -5.8248291015625, -5.32177734375, -4.8187255859375, -4.315673828125, -3.8126220703125, -3.3095703125, -2.8065185546875, -2.303466796875, -1.8004150390625, -1.29736328125, -0.7943115234375, -0.291259765625, 0.2117919921875, 0.71484375, 1.2178955078125, 1.720947265625, 2.2239990234375, 2.72705078125, 3.2301025390625, 3.733154296875, 4.2362060546875, 4.7392578125, 5.2423095703125, 5.745361328125, 6.2484130859375, 6.75146484375, 7.2545166015625, 7.757568359375, 8.2606201171875, 8.763671875, 9.2667236328125, 9.769775390625, 10.2728271484375, 10.77587890625, 11.2789306640625, 11.781982421875, 12.2850341796875, 12.7880859375, 13.2911376953125, 13.794189453125, 14.2972412109375, 14.80029296875, 15.3033447265625, 15.806396484375, 16.3094482421875, 16.8125]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 3.0, 3.0, 7.0, 4.0, 3.0, 13.0, 11.0, 14.0, 18.0, 22.0, 20.0, 26.0, 24.0, 29.0, 42.0, 40.0, 59.0, 55.0, 78.0, 111.0, 221.0, 1494.0, 227.0, 98.0, 77.0, 55.0, 40.0, 49.0, 37.0, 27.0, 24.0, 22.0, 15.0, 17.0, 11.0, 14.0, 9.0, 11.0, 6.0, 4.0, 4.0, 1.0, 4.0, 1.0, 3.0, 0.0, 4.0, 3.0, 0.0, 1.0], "bins": [-39.375, -38.262939453125, -37.15087890625, -36.038818359375, -34.9267578125, -33.814697265625, -32.70263671875, -31.590576171875, -30.478515625, -29.366455078125, -28.25439453125, -27.142333984375, -26.0302734375, -24.918212890625, -23.80615234375, -22.694091796875, -21.58203125, -20.469970703125, -19.35791015625, -18.245849609375, -17.1337890625, -16.021728515625, -14.90966796875, -13.797607421875, -12.685546875, -11.573486328125, -10.46142578125, -9.349365234375, -8.2373046875, -7.125244140625, -6.01318359375, -4.901123046875, -3.7890625, -2.677001953125, -1.56494140625, -0.452880859375, 0.6591796875, 1.771240234375, 2.88330078125, 3.995361328125, 5.107421875, 6.219482421875, 7.33154296875, 8.443603515625, 9.5556640625, 10.667724609375, 11.77978515625, 12.891845703125, 14.00390625, 15.115966796875, 16.22802734375, 17.340087890625, 18.4521484375, 19.564208984375, 20.67626953125, 21.788330078125, 22.900390625, 24.012451171875, 25.12451171875, 26.236572265625, 27.3486328125, 28.460693359375, 29.57275390625, 30.684814453125, 31.796875]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 6.0, 2.0, 4.0, 2.0, 8.0, 15.0, 9.0, 12.0, 27.0, 35.0, 31.0, 46.0, 65.0, 92.0, 120.0, 181.0, 361.0, 841.0, 4297.0, 60593.0, 2777603.0, 287346.0, 11384.0, 1395.0, 432.0, 233.0, 156.0, 117.0, 84.0, 56.0, 39.0, 34.0, 18.0, 19.0, 13.0, 7.0, 8.0, 9.0, 5.0, 0.0, 4.0, 1.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-50.59375, -48.76806640625, -46.9423828125, -45.11669921875, -43.291015625, -41.46533203125, -39.6396484375, -37.81396484375, -35.98828125, -34.16259765625, -32.3369140625, -30.51123046875, -28.685546875, -26.85986328125, -25.0341796875, -23.20849609375, -21.3828125, -19.55712890625, -17.7314453125, -15.90576171875, -14.080078125, -12.25439453125, -10.4287109375, -8.60302734375, -6.77734375, -4.95166015625, -3.1259765625, -1.30029296875, 0.525390625, 2.35107421875, 4.1767578125, 6.00244140625, 7.828125, 9.65380859375, 11.4794921875, 13.30517578125, 15.130859375, 16.95654296875, 18.7822265625, 20.60791015625, 22.43359375, 24.25927734375, 26.0849609375, 27.91064453125, 29.736328125, 31.56201171875, 33.3876953125, 35.21337890625, 37.0390625, 38.86474609375, 40.6904296875, 42.51611328125, 44.341796875, 46.16748046875, 47.9931640625, 49.81884765625, 51.64453125, 53.47021484375, 55.2958984375, 57.12158203125, 58.947265625, 60.77294921875, 62.5986328125, 64.42431640625, 66.25]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 12.0, 89.0, 332.0, 393.0, 151.0, 36.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.53226089477539, -40.807098388671875, -35.081932067871094, -29.356769561767578, -23.631607055664062, -17.906444549560547, -12.181278228759766, -6.45611572265625, -0.7309532165527344, 4.994210243225098, 10.71937370300293, 16.444538116455078, 22.169700622558594, 27.89486312866211, 33.62002944946289, 39.345191955566406, 45.07035446166992, 50.79551696777344, 56.52068328857422, 62.245845794677734, 67.97100830078125, 73.6961669921875, 79.42134094238281, 85.14649963378906, 90.87165832519531, 96.5968246459961, 102.32198333740234, 108.04714965820312, 113.77230834960938, 119.49747467041016, 125.22264099121094, 130.9477996826172, 136.6729736328125, 142.39813232421875, 148.12330627441406, 153.8484649658203, 159.57362365722656, 165.29879760742188, 171.02395629882812, 176.74911499023438, 182.47427368164062, 188.19943237304688, 193.9246063232422, 199.64976501464844, 205.3749237060547, 211.10009765625, 216.82525634765625, 222.5504150390625, 228.2755889892578, 234.00074768066406, 239.72592163085938, 245.45108032226562, 251.17623901367188, 256.9013977050781, 262.6265869140625, 268.35174560546875, 274.076904296875, 279.80206298828125, 285.5272216796875, 291.25238037109375, 296.9775695800781, 302.7027282714844, 308.4278869628906, 314.1530456542969, 319.8782043457031]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 4.0, 3.0, 3.0, 6.0, 5.0, 6.0, 9.0, 12.0, 12.0, 16.0, 11.0, 18.0, 18.0, 18.0, 19.0, 19.0, 43.0, 31.0, 30.0, 30.0, 29.0, 51.0, 25.0, 36.0, 45.0, 30.0, 27.0, 46.0, 40.0, 35.0, 34.0, 28.0, 31.0, 19.0, 36.0, 25.0, 27.0, 14.0, 21.0, 14.0, 17.0, 13.0, 12.0, 7.0, 9.0, 5.0, 5.0, 4.0, 7.0, 1.0, 2.0, 5.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-61.914642333984375, -59.86324691772461, -57.81185531616211, -55.760459899902344, -53.709068298339844, -51.65767288208008, -49.60627746582031, -47.55488586425781, -45.50349044799805, -43.45209503173828, -41.40070343017578, -39.349308013916016, -37.29791259765625, -35.24652099609375, -33.195125579833984, -31.14373207092285, -29.09233856201172, -27.040945053100586, -24.989551544189453, -22.938156127929688, -20.886762619018555, -18.835369110107422, -16.783973693847656, -14.732580184936523, -12.68118667602539, -10.629793167114258, -8.578398704528809, -6.527004718780518, -4.475610733032227, -2.4242172241210938, -0.37282276153564453, 1.6785717010498047, 3.7299652099609375, 5.7813591957092285, 7.8327531814575195, 9.884147644042969, 11.935541152954102, 13.986934661865234, 16.038330078125, 18.089723587036133, 20.141117095947266, 22.1925106048584, 24.24390411376953, 26.295299530029297, 28.34669303894043, 30.398086547851562, 32.44948196411133, 34.500877380371094, 36.552268981933594, 38.60366439819336, 40.65505599975586, 42.706451416015625, 44.757843017578125, 46.80923843383789, 48.860633850097656, 50.912025451660156, 52.96342086791992, 55.01481628417969, 57.06620788574219, 59.11760330200195, 61.16899871826172, 63.22039031982422, 65.27178192138672, 67.32318115234375, 69.37457275390625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 3.0, 2.0, 5.0, 2.0, 3.0, 9.0, 8.0, 11.0, 13.0, 18.0, 11.0, 18.0, 24.0, 29.0, 31.0, 29.0, 33.0, 31.0, 36.0, 38.0, 45.0, 33.0, 47.0, 51.0, 45.0, 51.0, 39.0, 37.0, 30.0, 34.0, 29.0, 26.0, 19.0, 24.0, 25.0, 22.0, 20.0, 14.0, 15.0, 12.0, 13.0, 4.0, 11.0, 4.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-14.5, -14.080322265625, -13.66064453125, -13.240966796875, -12.8212890625, -12.401611328125, -11.98193359375, -11.562255859375, -11.142578125, -10.722900390625, -10.30322265625, -9.883544921875, -9.4638671875, -9.044189453125, -8.62451171875, -8.204833984375, -7.78515625, -7.365478515625, -6.94580078125, -6.526123046875, -6.1064453125, -5.686767578125, -5.26708984375, -4.847412109375, -4.427734375, -4.008056640625, -3.58837890625, -3.168701171875, -2.7490234375, -2.329345703125, -1.90966796875, -1.489990234375, -1.0703125, -0.650634765625, -0.23095703125, 0.188720703125, 0.6083984375, 1.028076171875, 1.44775390625, 1.867431640625, 2.287109375, 2.706787109375, 3.12646484375, 3.546142578125, 3.9658203125, 4.385498046875, 4.80517578125, 5.224853515625, 5.64453125, 6.064208984375, 6.48388671875, 6.903564453125, 7.3232421875, 7.742919921875, 8.16259765625, 8.582275390625, 9.001953125, 9.421630859375, 9.84130859375, 10.260986328125, 10.6806640625, 11.100341796875, 11.52001953125, 11.939697265625, 12.359375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1.0, 1.0, 0.0, 5.0, 2.0, 7.0, 6.0, 4.0, 12.0, 14.0, 9.0, 16.0, 15.0, 28.0, 36.0, 44.0, 81.0, 129.0, 265.0, 510.0, 1032.0, 2466.0, 6713.0, 23219.0, 116923.0, 789428.0, 2293808.0, 806129.0, 117801.0, 23948.0, 6833.0, 2486.0, 1093.0, 510.0, 283.0, 163.0, 96.0, 42.0, 24.0, 20.0, 20.0, 16.0, 12.0, 5.0, 14.0, 3.0, 4.0, 3.0, 2.0, 2.0, 3.0, 6.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-19.5, -18.839111328125, -18.17822265625, -17.517333984375, -16.8564453125, -16.195556640625, -15.53466796875, -14.873779296875, -14.212890625, -13.552001953125, -12.89111328125, -12.230224609375, -11.5693359375, -10.908447265625, -10.24755859375, -9.586669921875, -8.92578125, -8.264892578125, -7.60400390625, -6.943115234375, -6.2822265625, -5.621337890625, -4.96044921875, -4.299560546875, -3.638671875, -2.977783203125, -2.31689453125, -1.656005859375, -0.9951171875, -0.334228515625, 0.32666015625, 0.987548828125, 1.6484375, 2.309326171875, 2.97021484375, 3.631103515625, 4.2919921875, 4.952880859375, 5.61376953125, 6.274658203125, 6.935546875, 7.596435546875, 8.25732421875, 8.918212890625, 9.5791015625, 10.239990234375, 10.90087890625, 11.561767578125, 12.22265625, 12.883544921875, 13.54443359375, 14.205322265625, 14.8662109375, 15.527099609375, 16.18798828125, 16.848876953125, 17.509765625, 18.170654296875, 18.83154296875, 19.492431640625, 20.1533203125, 20.814208984375, 21.47509765625, 22.135986328125, 22.796875]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 7.0, 7.0, 16.0, 21.0, 18.0, 21.0, 38.0, 58.0, 85.0, 87.0, 138.0, 187.0, 256.0, 395.0, 480.0, 550.0, 510.0, 377.0, 263.0, 184.0, 101.0, 85.0, 72.0, 24.0, 26.0, 18.0, 14.0, 8.0, 7.0, 4.0, 4.0, 6.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.328125, -18.69482421875, -18.0615234375, -17.42822265625, -16.794921875, -16.16162109375, -15.5283203125, -14.89501953125, -14.26171875, -13.62841796875, -12.9951171875, -12.36181640625, -11.728515625, -11.09521484375, -10.4619140625, -9.82861328125, -9.1953125, -8.56201171875, -7.9287109375, -7.29541015625, -6.662109375, -6.02880859375, -5.3955078125, -4.76220703125, -4.12890625, -3.49560546875, -2.8623046875, -2.22900390625, -1.595703125, -0.96240234375, -0.3291015625, 0.30419921875, 0.9375, 1.57080078125, 2.2041015625, 2.83740234375, 3.470703125, 4.10400390625, 4.7373046875, 5.37060546875, 6.00390625, 6.63720703125, 7.2705078125, 7.90380859375, 8.537109375, 9.17041015625, 9.8037109375, 10.43701171875, 11.0703125, 11.70361328125, 12.3369140625, 12.97021484375, 13.603515625, 14.23681640625, 14.8701171875, 15.50341796875, 16.13671875, 16.77001953125, 17.4033203125, 18.03662109375, 18.669921875, 19.30322265625, 19.9365234375, 20.56982421875, 21.203125]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 9.0, 12.0, 16.0, 26.0, 35.0, 46.0, 64.0, 135.0, 277.0, 580.0, 1375.0, 4433.0, 18024.0, 100341.0, 1014939.0, 2742815.0, 261883.0, 36952.0, 8061.0, 2394.0, 853.0, 437.0, 210.0, 115.0, 72.0, 54.0, 34.0, 27.0, 18.0, 14.0, 5.0, 7.0, 10.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-35.90625, -34.857421875, -33.80859375, -32.759765625, -31.7109375, -30.662109375, -29.61328125, -28.564453125, -27.515625, -26.466796875, -25.41796875, -24.369140625, -23.3203125, -22.271484375, -21.22265625, -20.173828125, -19.125, -18.076171875, -17.02734375, -15.978515625, -14.9296875, -13.880859375, -12.83203125, -11.783203125, -10.734375, -9.685546875, -8.63671875, -7.587890625, -6.5390625, -5.490234375, -4.44140625, -3.392578125, -2.34375, -1.294921875, -0.24609375, 0.802734375, 1.8515625, 2.900390625, 3.94921875, 4.998046875, 6.046875, 7.095703125, 8.14453125, 9.193359375, 10.2421875, 11.291015625, 12.33984375, 13.388671875, 14.4375, 15.486328125, 16.53515625, 17.583984375, 18.6328125, 19.681640625, 20.73046875, 21.779296875, 22.828125, 23.876953125, 24.92578125, 25.974609375, 27.0234375, 28.072265625, 29.12109375, 30.169921875, 31.21875]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 28.0, 230.0, 511.0, 219.0, 26.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-140.8870849609375, -129.66104125976562, -118.43499755859375, -107.20895385742188, -95.98291015625, -84.75686645507812, -73.53083038330078, -62.304786682128906, -51.07874298095703, -39.852699279785156, -28.626657485961914, -17.400615692138672, -6.174571990966797, 5.051471710205078, 16.277511596679688, 27.503555297851562, 38.72959899902344, 49.95564270019531, 61.18168640136719, 72.40773010253906, 83.63377380371094, 94.85981750488281, 106.08585357666016, 117.31189727783203, 128.53793334960938, 139.76397705078125, 150.99002075195312, 162.216064453125, 173.44210815429688, 184.66815185546875, 195.89419555664062, 207.1202392578125, 218.34628295898438, 229.57232666015625, 240.79837036132812, 252.0244140625, 263.2504577636719, 274.47650146484375, 285.7025451660156, 296.9285888671875, 308.1546325683594, 319.38067626953125, 330.6067199707031, 341.832763671875, 353.0588073730469, 364.28485107421875, 375.5108947753906, 386.7369384765625, 397.96295166015625, 409.1889953613281, 420.4150390625, 431.6410827636719, 442.86712646484375, 454.0931701660156, 465.3192138671875, 476.5452575683594, 487.77130126953125, 498.9973449707031, 510.223388671875, 521.4494018554688, 532.6754760742188, 543.9014892578125, 555.1275634765625, 566.3535766601562, 577.5796508789062]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 7.0, 4.0, 7.0, 4.0, 10.0, 9.0, 11.0, 9.0, 14.0, 15.0, 14.0, 15.0, 13.0, 24.0, 46.0, 37.0, 35.0, 56.0, 51.0, 34.0, 52.0, 34.0, 35.0, 47.0, 52.0, 30.0, 42.0, 35.0, 35.0, 30.0, 26.0, 33.0, 26.0, 23.0, 19.0, 19.0, 18.0, 10.0, 8.0, 7.0, 3.0, 2.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-72.07998657226562, -69.99510192871094, -67.91021728515625, -65.82533264160156, -63.740447998046875, -61.65556335449219, -59.5706787109375, -57.48579406738281, -55.400909423828125, -53.31602478027344, -51.23114013671875, -49.14625549316406, -47.061370849609375, -44.97648620605469, -42.8916015625, -40.80671691894531, -38.721832275390625, -36.63694763183594, -34.55206298828125, -32.46717834472656, -30.382293701171875, -28.297409057617188, -26.2125244140625, -24.127639770507812, -22.042755126953125, -19.957870483398438, -17.87298583984375, -15.788101196289062, -13.703216552734375, -11.618331909179688, -9.533447265625, -7.4485626220703125, -5.363677978515625, -3.2787933349609375, -1.19390869140625, 0.8909759521484375, 2.975860595703125, 5.0607452392578125, 7.1456298828125, 9.230514526367188, 11.315399169921875, 13.400283813476562, 15.48516845703125, 17.570053100585938, 19.654937744140625, 21.739822387695312, 23.82470703125, 25.909591674804688, 27.994476318359375, 30.079360961914062, 32.16424560546875, 34.24913024902344, 36.334014892578125, 38.41889953613281, 40.5037841796875, 42.58866882324219, 44.673553466796875, 46.75843811035156, 48.84332275390625, 50.92820739746094, 53.013092041015625, 55.09797668457031, 57.182861328125, 59.26774597167969, 61.352630615234375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 5.0, 13.0, 11.0, 15.0, 9.0, 16.0, 18.0, 33.0, 26.0, 27.0, 30.0, 37.0, 32.0, 31.0, 25.0, 28.0, 43.0, 36.0, 55.0, 49.0, 43.0, 36.0, 46.0, 38.0, 29.0, 25.0, 22.0, 33.0, 26.0, 24.0, 20.0, 12.0, 21.0, 20.0, 13.0, 12.0, 11.0, 4.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.203125, -12.808349609375, -12.41357421875, -12.018798828125, -11.6240234375, -11.229248046875, -10.83447265625, -10.439697265625, -10.044921875, -9.650146484375, -9.25537109375, -8.860595703125, -8.4658203125, -8.071044921875, -7.67626953125, -7.281494140625, -6.88671875, -6.491943359375, -6.09716796875, -5.702392578125, -5.3076171875, -4.912841796875, -4.51806640625, -4.123291015625, -3.728515625, -3.333740234375, -2.93896484375, -2.544189453125, -2.1494140625, -1.754638671875, -1.35986328125, -0.965087890625, -0.5703125, -0.175537109375, 0.21923828125, 0.614013671875, 1.0087890625, 1.403564453125, 1.79833984375, 2.193115234375, 2.587890625, 2.982666015625, 3.37744140625, 3.772216796875, 4.1669921875, 4.561767578125, 4.95654296875, 5.351318359375, 5.74609375, 6.140869140625, 6.53564453125, 6.930419921875, 7.3251953125, 7.719970703125, 8.11474609375, 8.509521484375, 8.904296875, 9.299072265625, 9.69384765625, 10.088623046875, 10.4833984375, 10.878173828125, 11.27294921875, 11.667724609375, 12.0625]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 5.0, 4.0, 4.0, 12.0, 15.0, 32.0, 34.0, 55.0, 98.0, 117.0, 155.0, 267.0, 346.0, 491.0, 743.0, 1058.0, 1466.0, 2104.0, 3006.0, 4270.0, 6084.0, 8860.0, 13130.0, 19236.0, 27935.0, 42005.0, 62051.0, 88984.0, 120644.0, 145206.0, 139930.0, 110810.0, 79156.0, 54505.0, 36816.0, 24662.0, 16739.0, 11432.0, 7897.0, 5428.0, 3881.0, 2641.0, 1838.0, 1351.0, 924.0, 680.0, 430.0, 307.0, 221.0, 157.0, 117.0, 73.0, 57.0, 41.0, 22.0, 12.0, 6.0, 11.0, 5.0, 1.0, 3.0], "bins": [-0.79638671875, -0.7721328735351562, -0.7478790283203125, -0.7236251831054688, -0.699371337890625, -0.6751174926757812, -0.6508636474609375, -0.6266098022460938, -0.60235595703125, -0.5781021118164062, -0.5538482666015625, -0.5295944213867188, -0.505340576171875, -0.48108673095703125, -0.4568328857421875, -0.43257904052734375, -0.4083251953125, -0.38407135009765625, -0.3598175048828125, -0.33556365966796875, -0.311309814453125, -0.28705596923828125, -0.2628021240234375, -0.23854827880859375, -0.21429443359375, -0.19004058837890625, -0.1657867431640625, -0.14153289794921875, -0.117279052734375, -0.09302520751953125, -0.0687713623046875, -0.04451751708984375, -0.020263671875, 0.00399017333984375, 0.0282440185546875, 0.05249786376953125, 0.076751708984375, 0.10100555419921875, 0.1252593994140625, 0.14951324462890625, 0.17376708984375, 0.19802093505859375, 0.2222747802734375, 0.24652862548828125, 0.270782470703125, 0.29503631591796875, 0.3192901611328125, 0.34354400634765625, 0.3677978515625, 0.39205169677734375, 0.4163055419921875, 0.44055938720703125, 0.464813232421875, 0.48906707763671875, 0.5133209228515625, 0.5375747680664062, 0.56182861328125, 0.5860824584960938, 0.6103363037109375, 0.6345901489257812, 0.658843994140625, 0.6830978393554688, 0.7073516845703125, 0.7316055297851562, 0.755859375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.0, 5.0, 5.0, 3.0, 6.0, 8.0, 10.0, 17.0, 14.0, 17.0, 20.0, 25.0, 22.0, 32.0, 28.0, 38.0, 35.0, 40.0, 43.0, 36.0, 41.0, 1074.0, 41.0, 52.0, 54.0, 34.0, 33.0, 45.0, 35.0, 33.0, 34.0, 31.0, 20.0, 15.0, 15.0, 14.0, 14.0, 13.0, 11.0, 5.0, 4.0, 3.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-9.6953125, -9.4061279296875, -9.116943359375, -8.8277587890625, -8.53857421875, -8.2493896484375, -7.960205078125, -7.6710205078125, -7.3818359375, -7.0926513671875, -6.803466796875, -6.5142822265625, -6.22509765625, -5.9359130859375, -5.646728515625, -5.3575439453125, -5.068359375, -4.7791748046875, -4.489990234375, -4.2008056640625, -3.91162109375, -3.6224365234375, -3.333251953125, -3.0440673828125, -2.7548828125, -2.4656982421875, -2.176513671875, -1.8873291015625, -1.59814453125, -1.3089599609375, -1.019775390625, -0.7305908203125, -0.44140625, -0.1522216796875, 0.136962890625, 0.4261474609375, 0.71533203125, 1.0045166015625, 1.293701171875, 1.5828857421875, 1.8720703125, 2.1612548828125, 2.450439453125, 2.7396240234375, 3.02880859375, 3.3179931640625, 3.607177734375, 3.8963623046875, 4.185546875, 4.4747314453125, 4.763916015625, 5.0531005859375, 5.34228515625, 5.6314697265625, 5.920654296875, 6.2098388671875, 6.4990234375, 6.7882080078125, 7.077392578125, 7.3665771484375, 7.65576171875, 7.9449462890625, 8.234130859375, 8.5233154296875, 8.8125]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 8.0, 13.0, 14.0, 12.0, 30.0, 45.0, 63.0, 95.0, 144.0, 223.0, 368.0, 550.0, 841.0, 1300.0, 2147.0, 3255.0, 5290.0, 8192.0, 13191.0, 20983.0, 33607.0, 53595.0, 81597.0, 116238.0, 185443.0, 1164962.0, 133073.0, 96207.0, 64617.0, 41367.0, 25982.0, 16175.0, 10012.0, 6289.0, 4052.0, 2531.0, 1586.0, 1029.0, 686.0, 451.0, 291.0, 190.0, 141.0, 77.0, 60.0, 40.0, 21.0, 15.0, 15.0, 6.0, 9.0, 4.0, 2.0, 1.0, 3.0, 2.0], "bins": [-0.69580078125, -0.6748580932617188, -0.6539154052734375, -0.6329727172851562, -0.612030029296875, -0.5910873413085938, -0.5701446533203125, -0.5492019653320312, -0.52825927734375, -0.5073165893554688, -0.4863739013671875, -0.46543121337890625, -0.444488525390625, -0.42354583740234375, -0.4026031494140625, -0.38166046142578125, -0.3607177734375, -0.33977508544921875, -0.3188323974609375, -0.29788970947265625, -0.276947021484375, -0.25600433349609375, -0.2350616455078125, -0.21411895751953125, -0.19317626953125, -0.17223358154296875, -0.1512908935546875, -0.13034820556640625, -0.109405517578125, -0.08846282958984375, -0.0675201416015625, -0.04657745361328125, -0.025634765625, -0.00469207763671875, 0.0162506103515625, 0.03719329833984375, 0.058135986328125, 0.07907867431640625, 0.1000213623046875, 0.12096405029296875, 0.14190673828125, 0.16284942626953125, 0.1837921142578125, 0.20473480224609375, 0.225677490234375, 0.24662017822265625, 0.2675628662109375, 0.28850555419921875, 0.3094482421875, 0.33039093017578125, 0.3513336181640625, 0.37227630615234375, 0.393218994140625, 0.41416168212890625, 0.4351043701171875, 0.45604705810546875, 0.47698974609375, 0.49793243408203125, 0.5188751220703125, 0.5398178100585938, 0.560760498046875, 0.5817031860351562, 0.6026458740234375, 0.6235885620117188, 0.64453125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 4.0, 2.0, 4.0, 7.0, 13.0, 7.0, 8.0, 14.0, 20.0, 15.0, 35.0, 40.0, 60.0, 66.0, 76.0, 107.0, 113.0, 89.0, 84.0, 62.0, 52.0, 35.0, 26.0, 17.0, 16.0, 13.0, 9.0, 5.0, 2.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015411376953125, -0.014655113220214844, -0.013898849487304688, -0.013142585754394531, -0.012386322021484375, -0.011630058288574219, -0.010873794555664062, -0.010117530822753906, -0.00936126708984375, -0.008605003356933594, -0.007848739624023438, -0.007092475891113281, -0.006336212158203125, -0.005579948425292969, -0.0048236846923828125, -0.004067420959472656, -0.0033111572265625, -0.0025548934936523438, -0.0017986297607421875, -0.0010423660278320312, -0.000286102294921875, 0.00047016143798828125, 0.0012264251708984375, 0.0019826889038085938, 0.00273895263671875, 0.0034952163696289062, 0.0042514801025390625, 0.005007743835449219, 0.005764007568359375, 0.006520271301269531, 0.0072765350341796875, 0.008032798767089844, 0.0087890625, 0.009545326232910156, 0.010301589965820312, 0.011057853698730469, 0.011814117431640625, 0.012570381164550781, 0.013326644897460938, 0.014082908630371094, 0.01483917236328125, 0.015595436096191406, 0.016351699829101562, 0.01710796356201172, 0.017864227294921875, 0.01862049102783203, 0.019376754760742188, 0.020133018493652344, 0.0208892822265625, 0.021645545959472656, 0.022401809692382812, 0.02315807342529297, 0.023914337158203125, 0.02467060089111328, 0.025426864624023438, 0.026183128356933594, 0.02693939208984375, 0.027695655822753906, 0.028451919555664062, 0.02920818328857422, 0.029964447021484375, 0.03072071075439453, 0.03147697448730469, 0.032233238220214844, 0.032989501953125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 7.0, 2.0, 3.0, 5.0, 12.0, 12.0, 14.0, 23.0, 26.0, 46.0, 59.0, 77.0, 159.0, 288.0, 589.0, 10730.0, 1027500.0, 7667.0, 556.0, 294.0, 162.0, 104.0, 55.0, 41.0, 32.0, 26.0, 20.0, 18.0, 11.0, 10.0, 6.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.66015625, -0.6449699401855469, -0.6297836303710938, -0.6145973205566406, -0.5994110107421875, -0.5842247009277344, -0.5690383911132812, -0.5538520812988281, -0.538665771484375, -0.5234794616699219, -0.5082931518554688, -0.4931068420410156, -0.4779205322265625, -0.4627342224121094, -0.44754791259765625, -0.4323616027832031, -0.41717529296875, -0.4019889831542969, -0.38680267333984375, -0.3716163635253906, -0.3564300537109375, -0.3412437438964844, -0.32605743408203125, -0.3108711242675781, -0.295684814453125, -0.2804985046386719, -0.26531219482421875, -0.2501258850097656, -0.2349395751953125, -0.21975326538085938, -0.20456695556640625, -0.18938064575195312, -0.1741943359375, -0.15900802612304688, -0.14382171630859375, -0.12863540649414062, -0.1134490966796875, -0.09826278686523438, -0.08307647705078125, -0.06789016723632812, -0.052703857421875, -0.037517547607421875, -0.02233123779296875, -0.007144927978515625, 0.0080413818359375, 0.023227691650390625, 0.03841400146484375, 0.053600311279296875, 0.06878662109375, 0.08397293090820312, 0.09915924072265625, 0.11434555053710938, 0.1295318603515625, 0.14471817016601562, 0.15990447998046875, 0.17509078979492188, 0.190277099609375, 0.20546340942382812, 0.22064971923828125, 0.23583602905273438, 0.2510223388671875, 0.2662086486816406, 0.28139495849609375, 0.2965812683105469, 0.311767578125]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [20.0, 777.0, 217.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008801125921308994, -0.003829937893897295, 0.0011412501335144043, 0.006112438626587391, 0.011083626188337803, 0.01605481281876564, 0.02102600410580635, 0.025997191667556763, 0.030968379229307175, 0.03593956679105759, 0.040910754352808, 0.04588194191455841, 0.05085313320159912, 0.055824317038059235, 0.060795508325099945, 0.06576669216156006, 0.07073788344860077, 0.07570907473564148, 0.08068025857210159, 0.0856514498591423, 0.09062263369560242, 0.09559382498264313, 0.10056501626968384, 0.10553620010614395, 0.11050738394260406, 0.11547857522964478, 0.12044975906610489, 0.125420942902565, 0.1303921341896057, 0.13536332547664642, 0.14033451676368713, 0.14530569314956665, 0.15027688443660736, 0.15524807572364807, 0.16021926701068878, 0.1651904433965683, 0.170161634683609, 0.17513282597064972, 0.18010401725769043, 0.18507519364356995, 0.19004638493061066, 0.19501757621765137, 0.19998876750469208, 0.2049599438905716, 0.2099311351776123, 0.21490232646465302, 0.21987351775169373, 0.22484469413757324, 0.22981590032577515, 0.23478709161281586, 0.23975828289985657, 0.24472945928573608, 0.2497006505727768, 0.2546718418598175, 0.259643018245697, 0.2646142244338989, 0.26958540081977844, 0.27455657720565796, 0.27952778339385986, 0.2844989597797394, 0.2894701659679413, 0.2944413423538208, 0.2994125187397003, 0.3043837249279022, 0.30935490131378174]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 10.0, 2.0, 7.0, 8.0, 13.0, 13.0, 15.0, 23.0, 34.0, 40.0, 46.0, 46.0, 47.0, 58.0, 58.0, 71.0, 47.0, 53.0, 54.0, 56.0, 60.0, 63.0, 35.0, 27.0, 32.0, 19.0, 12.0, 17.0, 10.0, 6.0, 10.0, 8.0, 4.0, 4.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015604794025421143, -0.015161249786615372, -0.0147177055478096, -0.01427416130900383, -0.013830617070198059, -0.013387072831392288, -0.012943528592586517, -0.012499984353780746, -0.012056440114974976, -0.011612895876169205, -0.011169351637363434, -0.010725807398557663, -0.010282263159751892, -0.009838718920946121, -0.00939517468214035, -0.00895163044333458, -0.008508086204528809, -0.008064541965723038, -0.007620997726917267, -0.007177453488111496, -0.006733909249305725, -0.006290365010499954, -0.005846820771694183, -0.0054032765328884125, -0.004959732294082642, -0.004516188055276871, -0.0040726438164711, -0.003629099577665329, -0.003185555338859558, -0.0027420111000537872, -0.0022984668612480164, -0.0018549226224422455, -0.0014113783836364746, -0.0009678341448307037, -0.0005242899060249329, -8.074566721916199e-05, 0.0003627985715866089, 0.0008063428103923798, 0.0012498870491981506, 0.0016934312880039215, 0.0021369755268096924, 0.0025805197656154633, 0.003024064004421234, 0.003467608243227005, 0.003911152482032776, 0.004354696720838547, 0.004798240959644318, 0.0052417851984500885, 0.005685329437255859, 0.00612887367606163, 0.006572417914867401, 0.007015962153673172, 0.007459506392478943, 0.007903050631284714, 0.008346594870090485, 0.008790139108896255, 0.009233683347702026, 0.009677227586507797, 0.010120771825313568, 0.010564316064119339, 0.01100786030292511, 0.01145140454173088, 0.011894948780536652, 0.012338493019342422, 0.012782037258148193]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 5.0, 13.0, 10.0, 16.0, 9.0, 16.0, 18.0, 33.0, 26.0, 27.0, 30.0, 37.0, 32.0, 31.0, 25.0, 28.0, 43.0, 36.0, 55.0, 49.0, 43.0, 36.0, 46.0, 38.0, 29.0, 25.0, 22.0, 33.0, 26.0, 24.0, 20.0, 12.0, 21.0, 20.0, 13.0, 12.0, 11.0, 4.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-13.203125, -12.808349609375, -12.41357421875, -12.018798828125, -11.6240234375, -11.229248046875, -10.83447265625, -10.439697265625, -10.044921875, -9.650146484375, -9.25537109375, -8.860595703125, -8.4658203125, -8.071044921875, -7.67626953125, -7.281494140625, -6.88671875, -6.491943359375, -6.09716796875, -5.702392578125, -5.3076171875, -4.912841796875, -4.51806640625, -4.123291015625, -3.728515625, -3.333740234375, -2.93896484375, -2.544189453125, -2.1494140625, -1.754638671875, -1.35986328125, -0.965087890625, -0.5703125, -0.175537109375, 0.21923828125, 0.614013671875, 1.0087890625, 1.403564453125, 1.79833984375, 2.193115234375, 2.587890625, 2.982666015625, 3.37744140625, 3.772216796875, 4.1669921875, 4.561767578125, 4.95654296875, 5.351318359375, 5.74609375, 6.140869140625, 6.53564453125, 6.930419921875, 7.3251953125, 7.719970703125, 8.11474609375, 8.509521484375, 8.904296875, 9.299072265625, 9.69384765625, 10.088623046875, 10.4833984375, 10.878173828125, 11.27294921875, 11.667724609375, 12.0625]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 5.0, 8.0, 7.0, 9.0, 6.0, 18.0, 21.0, 26.0, 46.0, 52.0, 77.0, 102.0, 146.0, 218.0, 360.0, 566.0, 1014.0, 1697.0, 3283.0, 6873.0, 14899.0, 34366.0, 86260.0, 249363.0, 391718.0, 154689.0, 57392.0, 23730.0, 10560.0, 4979.0, 2463.0, 1381.0, 809.0, 446.0, 328.0, 189.0, 123.0, 82.0, 71.0, 40.0, 38.0, 22.0, 19.0, 16.0, 14.0, 5.0, 8.0, 5.0, 4.0, 1.0, 4.0, 1.0, 3.0, 1.0, 1.0], "bins": [-17.40625, -16.88330078125, -16.3603515625, -15.83740234375, -15.314453125, -14.79150390625, -14.2685546875, -13.74560546875, -13.22265625, -12.69970703125, -12.1767578125, -11.65380859375, -11.130859375, -10.60791015625, -10.0849609375, -9.56201171875, -9.0390625, -8.51611328125, -7.9931640625, -7.47021484375, -6.947265625, -6.42431640625, -5.9013671875, -5.37841796875, -4.85546875, -4.33251953125, -3.8095703125, -3.28662109375, -2.763671875, -2.24072265625, -1.7177734375, -1.19482421875, -0.671875, -0.14892578125, 0.3740234375, 0.89697265625, 1.419921875, 1.94287109375, 2.4658203125, 2.98876953125, 3.51171875, 4.03466796875, 4.5576171875, 5.08056640625, 5.603515625, 6.12646484375, 6.6494140625, 7.17236328125, 7.6953125, 8.21826171875, 8.7412109375, 9.26416015625, 9.787109375, 10.31005859375, 10.8330078125, 11.35595703125, 11.87890625, 12.40185546875, 12.9248046875, 13.44775390625, 13.970703125, 14.49365234375, 15.0166015625, 15.53955078125, 16.0625]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 7.0, 7.0, 2.0, 8.0, 5.0, 5.0, 8.0, 9.0, 13.0, 11.0, 9.0, 17.0, 17.0, 25.0, 21.0, 37.0, 46.0, 37.0, 49.0, 63.0, 74.0, 140.0, 230.0, 1436.0, 213.0, 125.0, 86.0, 54.0, 45.0, 44.0, 25.0, 32.0, 25.0, 26.0, 22.0, 9.0, 15.0, 12.0, 11.0, 14.0, 4.0, 6.0, 6.0, 0.0, 1.0, 4.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-33.125, -32.10595703125, -31.0869140625, -30.06787109375, -29.048828125, -28.02978515625, -27.0107421875, -25.99169921875, -24.97265625, -23.95361328125, -22.9345703125, -21.91552734375, -20.896484375, -19.87744140625, -18.8583984375, -17.83935546875, -16.8203125, -15.80126953125, -14.7822265625, -13.76318359375, -12.744140625, -11.72509765625, -10.7060546875, -9.68701171875, -8.66796875, -7.64892578125, -6.6298828125, -5.61083984375, -4.591796875, -3.57275390625, -2.5537109375, -1.53466796875, -0.515625, 0.50341796875, 1.5224609375, 2.54150390625, 3.560546875, 4.57958984375, 5.5986328125, 6.61767578125, 7.63671875, 8.65576171875, 9.6748046875, 10.69384765625, 11.712890625, 12.73193359375, 13.7509765625, 14.77001953125, 15.7890625, 16.80810546875, 17.8271484375, 18.84619140625, 19.865234375, 20.88427734375, 21.9033203125, 22.92236328125, 23.94140625, 24.96044921875, 25.9794921875, 26.99853515625, 28.017578125, 29.03662109375, 30.0556640625, 31.07470703125, 32.09375]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 7.0, 8.0, 10.0, 14.0, 13.0, 13.0, 18.0, 28.0, 44.0, 56.0, 91.0, 120.0, 183.0, 329.0, 721.0, 3848.0, 113189.0, 2952475.0, 69946.0, 2928.0, 638.0, 371.0, 197.0, 121.0, 82.0, 55.0, 58.0, 30.0, 28.0, 21.0, 18.0, 10.0, 14.0, 8.0, 4.0, 0.0, 5.0, 1.0, 2.0, 3.0, 0.0, 2.0, 3.0], "bins": [-81.875, -79.7451171875, -77.615234375, -75.4853515625, -73.35546875, -71.2255859375, -69.095703125, -66.9658203125, -64.8359375, -62.7060546875, -60.576171875, -58.4462890625, -56.31640625, -54.1865234375, -52.056640625, -49.9267578125, -47.796875, -45.6669921875, -43.537109375, -41.4072265625, -39.27734375, -37.1474609375, -35.017578125, -32.8876953125, -30.7578125, -28.6279296875, -26.498046875, -24.3681640625, -22.23828125, -20.1083984375, -17.978515625, -15.8486328125, -13.71875, -11.5888671875, -9.458984375, -7.3291015625, -5.19921875, -3.0693359375, -0.939453125, 1.1904296875, 3.3203125, 5.4501953125, 7.580078125, 9.7099609375, 11.83984375, 13.9697265625, 16.099609375, 18.2294921875, 20.359375, 22.4892578125, 24.619140625, 26.7490234375, 28.87890625, 31.0087890625, 33.138671875, 35.2685546875, 37.3984375, 39.5283203125, 41.658203125, 43.7880859375, 45.91796875, 48.0478515625, 50.177734375, 52.3076171875, 54.4375]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 14.0, 320.0, 590.0, 87.0, 4.0, 0.0, 1.0, 1.0], "bins": [-557.5526123046875, -547.9741821289062, -538.3956909179688, -528.8172607421875, -519.23876953125, -509.66033935546875, -500.0818786621094, -490.50341796875, -480.9249572753906, -471.34649658203125, -461.7680358886719, -452.1895751953125, -442.61114501953125, -433.0326843261719, -423.4542236328125, -413.8757629394531, -404.29730224609375, -394.7188415527344, -385.140380859375, -375.5619201660156, -365.98345947265625, -356.405029296875, -346.8265686035156, -337.24810791015625, -327.6696472167969, -318.0911865234375, -308.5127258300781, -298.93426513671875, -289.3558349609375, -279.7773742675781, -270.19891357421875, -260.6204528808594, -251.0419921875, -241.46353149414062, -231.88507080078125, -222.30662536621094, -212.72816467285156, -203.1497039794922, -193.57125854492188, -183.9927978515625, -174.41433715820312, -164.83587646484375, -155.25741577148438, -145.67897033691406, -136.1005096435547, -126.52204895019531, -116.94359588623047, -107.36514282226562, -97.78668212890625, -88.20822143554688, -78.62976837158203, -69.05131530761719, -59.47285461425781, -49.8943977355957, -40.315940856933594, -30.73748779296875, -21.15903091430664, -11.580574035644531, -2.002117156982422, 7.5763397216796875, 17.154796600341797, 26.733253479003906, 36.311710357666016, 45.89016342163086, 55.468624114990234]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 8.0, 5.0, 6.0, 10.0, 12.0, 9.0, 14.0, 14.0, 12.0, 19.0, 26.0, 37.0, 32.0, 38.0, 39.0, 41.0, 35.0, 41.0, 56.0, 51.0, 44.0, 41.0, 43.0, 41.0, 31.0, 31.0, 41.0, 30.0, 25.0, 32.0, 22.0, 15.0, 18.0, 19.0, 5.0, 10.0, 14.0, 9.0, 10.0, 4.0, 5.0, 7.0, 5.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-81.30076599121094, -79.00665283203125, -76.71253967285156, -74.4184341430664, -72.12432098388672, -69.83020782470703, -67.53610229492188, -65.24198913574219, -62.9478759765625, -60.65376281738281, -58.35965347290039, -56.06554412841797, -53.77143096923828, -51.477317810058594, -49.18320846557617, -46.88909912109375, -44.59498596191406, -42.300872802734375, -40.00676345825195, -37.71265411376953, -35.418540954589844, -33.124427795410156, -30.830318450927734, -28.53620719909668, -26.242095947265625, -23.94798469543457, -21.653873443603516, -19.35976219177246, -17.065650939941406, -14.771539688110352, -12.477428436279297, -10.183317184448242, -7.8892059326171875, -5.595094680786133, -3.300983428955078, -1.0068721771240234, 1.2872390747070312, 3.581350326538086, 5.875461578369141, 8.169572830200195, 10.46368408203125, 12.757795333862305, 15.05190658569336, 17.346017837524414, 19.64012908935547, 21.934240341186523, 24.228351593017578, 26.522462844848633, 28.816574096679688, 31.110685348510742, 33.4047966003418, 35.69890594482422, 37.993019104003906, 40.287132263183594, 42.581241607666016, 44.87535095214844, 47.169464111328125, 49.46357727050781, 51.757686614990234, 54.051795959472656, 56.345909118652344, 58.64002227783203, 60.93413162231445, 63.228240966796875, 65.52235412597656]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 2.0, 7.0, 7.0, 5.0, 9.0, 14.0, 13.0, 23.0, 20.0, 23.0, 18.0, 20.0, 24.0, 25.0, 30.0, 33.0, 43.0, 32.0, 40.0, 39.0, 54.0, 35.0, 34.0, 52.0, 46.0, 45.0, 35.0, 33.0, 33.0, 31.0, 28.0, 18.0, 22.0, 20.0, 9.0, 17.0, 14.0, 12.0, 11.0, 7.0, 10.0, 4.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-14.4765625, -14.0648193359375, -13.653076171875, -13.2413330078125, -12.82958984375, -12.4178466796875, -12.006103515625, -11.5943603515625, -11.1826171875, -10.7708740234375, -10.359130859375, -9.9473876953125, -9.53564453125, -9.1239013671875, -8.712158203125, -8.3004150390625, -7.888671875, -7.4769287109375, -7.065185546875, -6.6534423828125, -6.24169921875, -5.8299560546875, -5.418212890625, -5.0064697265625, -4.5947265625, -4.1829833984375, -3.771240234375, -3.3594970703125, -2.94775390625, -2.5360107421875, -2.124267578125, -1.7125244140625, -1.30078125, -0.8890380859375, -0.477294921875, -0.0655517578125, 0.34619140625, 0.7579345703125, 1.169677734375, 1.5814208984375, 1.9931640625, 2.4049072265625, 2.816650390625, 3.2283935546875, 3.64013671875, 4.0518798828125, 4.463623046875, 4.8753662109375, 5.287109375, 5.6988525390625, 6.110595703125, 6.5223388671875, 6.93408203125, 7.3458251953125, 7.757568359375, 8.1693115234375, 8.5810546875, 8.9927978515625, 9.404541015625, 9.8162841796875, 10.22802734375, 10.6397705078125, 11.051513671875, 11.4632568359375, 11.875]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 6.0, 6.0, 5.0, 8.0, 6.0, 14.0, 13.0, 16.0, 15.0, 25.0, 33.0, 26.0, 48.0, 51.0, 70.0, 100.0, 130.0, 197.0, 224.0, 288.0, 486.0, 4284.0, 4175731.0, 10550.0, 587.0, 351.0, 222.0, 208.0, 143.0, 86.0, 75.0, 70.0, 43.0, 33.0, 20.0, 21.0, 17.0, 21.0, 16.0, 8.0, 7.0, 7.0, 5.0, 3.0, 3.0, 1.0, 4.0, 3.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-170.375, -164.802734375, -159.23046875, -153.658203125, -148.0859375, -142.513671875, -136.94140625, -131.369140625, -125.796875, -120.224609375, -114.65234375, -109.080078125, -103.5078125, -97.935546875, -92.36328125, -86.791015625, -81.21875, -75.646484375, -70.07421875, -64.501953125, -58.9296875, -53.357421875, -47.78515625, -42.212890625, -36.640625, -31.068359375, -25.49609375, -19.923828125, -14.3515625, -8.779296875, -3.20703125, 2.365234375, 7.9375, 13.509765625, 19.08203125, 24.654296875, 30.2265625, 35.798828125, 41.37109375, 46.943359375, 52.515625, 58.087890625, 63.66015625, 69.232421875, 74.8046875, 80.376953125, 85.94921875, 91.521484375, 97.09375, 102.666015625, 108.23828125, 113.810546875, 119.3828125, 124.955078125, 130.52734375, 136.099609375, 141.671875, 147.244140625, 152.81640625, 158.388671875, 163.9609375, 169.533203125, 175.10546875, 180.677734375, 186.25]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 5.0, 5.0, 8.0, 9.0, 8.0, 13.0, 13.0, 22.0, 22.0, 46.0, 47.0, 71.0, 113.0, 163.0, 241.0, 359.0, 512.0, 629.0, 595.0, 367.0, 243.0, 169.0, 106.0, 85.0, 61.0, 39.0, 40.0, 22.0, 19.0, 7.0, 8.0, 8.0, 10.0, 7.0, 5.0, 5.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.09375, -15.4404296875, -14.787109375, -14.1337890625, -13.48046875, -12.8271484375, -12.173828125, -11.5205078125, -10.8671875, -10.2138671875, -9.560546875, -8.9072265625, -8.25390625, -7.6005859375, -6.947265625, -6.2939453125, -5.640625, -4.9873046875, -4.333984375, -3.6806640625, -3.02734375, -2.3740234375, -1.720703125, -1.0673828125, -0.4140625, 0.2392578125, 0.892578125, 1.5458984375, 2.19921875, 2.8525390625, 3.505859375, 4.1591796875, 4.8125, 5.4658203125, 6.119140625, 6.7724609375, 7.42578125, 8.0791015625, 8.732421875, 9.3857421875, 10.0390625, 10.6923828125, 11.345703125, 11.9990234375, 12.65234375, 13.3056640625, 13.958984375, 14.6123046875, 15.265625, 15.9189453125, 16.572265625, 17.2255859375, 17.87890625, 18.5322265625, 19.185546875, 19.8388671875, 20.4921875, 21.1455078125, 21.798828125, 22.4521484375, 23.10546875, 23.7587890625, 24.412109375, 25.0654296875, 25.71875]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 4.0, 10.0, 10.0, 14.0, 19.0, 28.0, 28.0, 37.0, 41.0, 41.0, 80.0, 91.0, 282.0, 1878.0, 211897.0, 3964073.0, 14541.0, 670.0, 186.0, 91.0, 56.0, 38.0, 36.0, 26.0, 24.0, 19.0, 19.0, 20.0, 12.0, 6.0, 2.0, 2.0, 4.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-107.0, -103.4638671875, -99.927734375, -96.3916015625, -92.85546875, -89.3193359375, -85.783203125, -82.2470703125, -78.7109375, -75.1748046875, -71.638671875, -68.1025390625, -64.56640625, -61.0302734375, -57.494140625, -53.9580078125, -50.421875, -46.8857421875, -43.349609375, -39.8134765625, -36.27734375, -32.7412109375, -29.205078125, -25.6689453125, -22.1328125, -18.5966796875, -15.060546875, -11.5244140625, -7.98828125, -4.4521484375, -0.916015625, 2.6201171875, 6.15625, 9.6923828125, 13.228515625, 16.7646484375, 20.30078125, 23.8369140625, 27.373046875, 30.9091796875, 34.4453125, 37.9814453125, 41.517578125, 45.0537109375, 48.58984375, 52.1259765625, 55.662109375, 59.1982421875, 62.734375, 66.2705078125, 69.806640625, 73.3427734375, 76.87890625, 80.4150390625, 83.951171875, 87.4873046875, 91.0234375, 94.5595703125, 98.095703125, 101.6318359375, 105.16796875, 108.7041015625, 112.240234375, 115.7763671875, 119.3125]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 13.0, 113.0, 393.0, 371.0, 107.0, 15.0, 1.0, 1.0, 0.0, 1.0], "bins": [-506.3465270996094, -497.4556884765625, -488.5648498535156, -479.67401123046875, -470.7831726074219, -461.892333984375, -453.0014953613281, -444.11065673828125, -435.2198181152344, -426.3289794921875, -417.4381408691406, -408.54730224609375, -399.6564636230469, -390.765625, -381.8747863769531, -372.98394775390625, -364.09307861328125, -355.2022399902344, -346.3114013671875, -337.4205627441406, -328.52972412109375, -319.6388854980469, -310.748046875, -301.8572082519531, -292.96636962890625, -284.0755310058594, -275.1846923828125, -266.2938537597656, -257.40301513671875, -248.51217651367188, -239.621337890625, -230.73049926757812, -221.8396453857422, -212.9488067626953, -204.05796813964844, -195.16712951660156, -186.2762908935547, -177.3854522705078, -168.49459838867188, -159.603759765625, -150.71292114257812, -141.82208251953125, -132.93124389648438, -124.0404052734375, -115.14956665039062, -106.25872802734375, -97.36788177490234, -88.47704315185547, -79.58621215820312, -70.69537353515625, -61.804534912109375, -52.913692474365234, -44.02285385131836, -35.132015228271484, -26.241172790527344, -17.35033416748047, -8.459495544433594, 0.43134403228759766, 9.322183609008789, 18.213024139404297, 27.103862762451172, 35.99470138549805, 44.88554382324219, 53.77638244628906, 62.66722106933594]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 4.0, 4.0, 7.0, 3.0, 13.0, 12.0, 10.0, 19.0, 17.0, 13.0, 36.0, 25.0, 28.0, 29.0, 41.0, 34.0, 34.0, 36.0, 36.0, 44.0, 27.0, 50.0, 39.0, 46.0, 56.0, 39.0, 38.0, 34.0, 34.0, 33.0, 24.0, 27.0, 22.0, 13.0, 15.0, 11.0, 13.0, 16.0, 5.0, 8.0, 2.0, 3.0, 4.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-68.6031265258789, -66.5176010131836, -64.43206787109375, -62.34654235839844, -60.261016845703125, -58.17549133300781, -56.089962005615234, -54.004432678222656, -51.918907165527344, -49.83338165283203, -47.74785232543945, -45.662322998046875, -43.57679748535156, -41.49127197265625, -39.40574264526367, -37.320213317871094, -35.23468780517578, -33.14916229248047, -31.06363296508789, -28.978105545043945, -26.892578125, -24.807050704956055, -22.72152328491211, -20.635995864868164, -18.55046844482422, -16.464941024780273, -14.379413604736328, -12.293886184692383, -10.208358764648438, -8.122831344604492, -6.037303924560547, -3.9517765045166016, -1.866241455078125, 0.2192859649658203, 2.3048133850097656, 4.390340805053711, 6.475868225097656, 8.561395645141602, 10.646923065185547, 12.732450485229492, 14.817977905273438, 16.903505325317383, 18.989032745361328, 21.074560165405273, 23.16008758544922, 25.245615005493164, 27.33114242553711, 29.416669845581055, 31.502197265625, 33.58772277832031, 35.67325210571289, 37.75878143310547, 39.84430694580078, 41.929832458496094, 44.01536178588867, 46.10089111328125, 48.18641662597656, 50.271942138671875, 52.35747146606445, 54.44300079345703, 56.528526306152344, 58.614051818847656, 60.699581146240234, 62.78511047363281, 64.87063598632812]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 1.0, 7.0, 7.0, 9.0, 9.0, 10.0, 19.0, 16.0, 16.0, 27.0, 18.0, 35.0, 37.0, 31.0, 32.0, 36.0, 39.0, 48.0, 40.0, 39.0, 50.0, 48.0, 57.0, 41.0, 46.0, 29.0, 37.0, 25.0, 31.0, 24.0, 25.0, 23.0, 21.0, 11.0, 11.0, 16.0, 6.0, 8.0, 4.0, 4.0, 6.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.3046875, -13.8851318359375, -13.465576171875, -13.0460205078125, -12.62646484375, -12.2069091796875, -11.787353515625, -11.3677978515625, -10.9482421875, -10.5286865234375, -10.109130859375, -9.6895751953125, -9.27001953125, -8.8504638671875, -8.430908203125, -8.0113525390625, -7.591796875, -7.1722412109375, -6.752685546875, -6.3331298828125, -5.91357421875, -5.4940185546875, -5.074462890625, -4.6549072265625, -4.2353515625, -3.8157958984375, -3.396240234375, -2.9766845703125, -2.55712890625, -2.1375732421875, -1.718017578125, -1.2984619140625, -0.87890625, -0.4593505859375, -0.039794921875, 0.3797607421875, 0.79931640625, 1.2188720703125, 1.638427734375, 2.0579833984375, 2.4775390625, 2.8970947265625, 3.316650390625, 3.7362060546875, 4.15576171875, 4.5753173828125, 4.994873046875, 5.4144287109375, 5.833984375, 6.2535400390625, 6.673095703125, 7.0926513671875, 7.51220703125, 7.9317626953125, 8.351318359375, 8.7708740234375, 9.1904296875, 9.6099853515625, 10.029541015625, 10.4490966796875, 10.86865234375, 11.2882080078125, 11.707763671875, 12.1273193359375, 12.546875]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 5.0, 8.0, 12.0, 14.0, 22.0, 36.0, 55.0, 93.0, 143.0, 189.0, 289.0, 476.0, 678.0, 977.0, 1614.0, 2227.0, 3392.0, 4804.0, 7233.0, 10558.0, 15960.0, 23236.0, 34317.0, 50826.0, 72738.0, 101788.0, 132826.0, 147324.0, 127019.0, 95486.0, 68095.0, 47129.0, 32065.0, 21441.0, 14646.0, 10020.0, 6786.0, 4592.0, 3025.0, 2039.0, 1482.0, 1009.0, 607.0, 422.0, 271.0, 204.0, 130.0, 75.0, 66.0, 40.0, 29.0, 20.0, 11.0, 6.0, 3.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0], "bins": [-0.69384765625, -0.6710281372070312, -0.6482086181640625, -0.6253890991210938, -0.602569580078125, -0.5797500610351562, -0.5569305419921875, -0.5341110229492188, -0.51129150390625, -0.48847198486328125, -0.4656524658203125, -0.44283294677734375, -0.420013427734375, -0.39719390869140625, -0.3743743896484375, -0.35155487060546875, -0.3287353515625, -0.30591583251953125, -0.2830963134765625, -0.26027679443359375, -0.237457275390625, -0.21463775634765625, -0.1918182373046875, -0.16899871826171875, -0.14617919921875, -0.12335968017578125, -0.1005401611328125, -0.07772064208984375, -0.054901123046875, -0.03208160400390625, -0.0092620849609375, 0.01355743408203125, 0.036376953125, 0.05919647216796875, 0.0820159912109375, 0.10483551025390625, 0.127655029296875, 0.15047454833984375, 0.1732940673828125, 0.19611358642578125, 0.21893310546875, 0.24175262451171875, 0.2645721435546875, 0.28739166259765625, 0.310211181640625, 0.33303070068359375, 0.3558502197265625, 0.37866973876953125, 0.4014892578125, 0.42430877685546875, 0.4471282958984375, 0.46994781494140625, 0.492767333984375, 0.5155868530273438, 0.5384063720703125, 0.5612258911132812, 0.58404541015625, 0.6068649291992188, 0.6296844482421875, 0.6525039672851562, 0.675323486328125, 0.6981430053710938, 0.7209625244140625, 0.7437820434570312, 0.7666015625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 3.0, 5.0, 2.0, 6.0, 2.0, 4.0, 3.0, 6.0, 9.0, 9.0, 12.0, 7.0, 23.0, 15.0, 19.0, 20.0, 29.0, 28.0, 35.0, 40.0, 43.0, 44.0, 36.0, 43.0, 44.0, 1072.0, 51.0, 37.0, 35.0, 52.0, 33.0, 22.0, 36.0, 39.0, 30.0, 23.0, 20.0, 21.0, 20.0, 17.0, 15.0, 7.0, 3.0, 2.0, 7.0, 2.0, 2.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.9609375, -7.6923828125, -7.423828125, -7.1552734375, -6.88671875, -6.6181640625, -6.349609375, -6.0810546875, -5.8125, -5.5439453125, -5.275390625, -5.0068359375, -4.73828125, -4.4697265625, -4.201171875, -3.9326171875, -3.6640625, -3.3955078125, -3.126953125, -2.8583984375, -2.58984375, -2.3212890625, -2.052734375, -1.7841796875, -1.515625, -1.2470703125, -0.978515625, -0.7099609375, -0.44140625, -0.1728515625, 0.095703125, 0.3642578125, 0.6328125, 0.9013671875, 1.169921875, 1.4384765625, 1.70703125, 1.9755859375, 2.244140625, 2.5126953125, 2.78125, 3.0498046875, 3.318359375, 3.5869140625, 3.85546875, 4.1240234375, 4.392578125, 4.6611328125, 4.9296875, 5.1982421875, 5.466796875, 5.7353515625, 6.00390625, 6.2724609375, 6.541015625, 6.8095703125, 7.078125, 7.3466796875, 7.615234375, 7.8837890625, 8.15234375, 8.4208984375, 8.689453125, 8.9580078125, 9.2265625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 5.0, 10.0, 15.0, 12.0, 31.0, 34.0, 76.0, 107.0, 158.0, 313.0, 553.0, 888.0, 1378.0, 2584.0, 4313.0, 7326.0, 12462.0, 21470.0, 36500.0, 60343.0, 98316.0, 146438.0, 1202939.0, 188697.0, 120728.0, 77948.0, 46552.0, 27914.0, 16066.0, 9476.0, 5455.0, 3310.0, 1940.0, 1064.0, 681.0, 366.0, 241.0, 148.0, 82.0, 63.0, 36.0, 33.0, 19.0, 14.0, 11.0, 11.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.68115234375, -0.6582489013671875, -0.635345458984375, -0.6124420166015625, -0.58953857421875, -0.5666351318359375, -0.543731689453125, -0.5208282470703125, -0.4979248046875, -0.4750213623046875, -0.452117919921875, -0.4292144775390625, -0.40631103515625, -0.3834075927734375, -0.360504150390625, -0.3376007080078125, -0.314697265625, -0.2917938232421875, -0.268890380859375, -0.2459869384765625, -0.22308349609375, -0.2001800537109375, -0.177276611328125, -0.1543731689453125, -0.1314697265625, -0.1085662841796875, -0.085662841796875, -0.0627593994140625, -0.03985595703125, -0.0169525146484375, 0.005950927734375, 0.0288543701171875, 0.0517578125, 0.0746612548828125, 0.097564697265625, 0.1204681396484375, 0.14337158203125, 0.1662750244140625, 0.189178466796875, 0.2120819091796875, 0.2349853515625, 0.2578887939453125, 0.280792236328125, 0.3036956787109375, 0.32659912109375, 0.3495025634765625, 0.372406005859375, 0.3953094482421875, 0.418212890625, 0.4411163330078125, 0.464019775390625, 0.4869232177734375, 0.50982666015625, 0.5327301025390625, 0.555633544921875, 0.5785369873046875, 0.6014404296875, 0.6243438720703125, 0.647247314453125, 0.6701507568359375, 0.69305419921875, 0.7159576416015625, 0.738861083984375, 0.7617645263671875, 0.78466796875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 2.0, 3.0, 3.0, 5.0, 9.0, 6.0, 14.0, 17.0, 22.0, 17.0, 23.0, 25.0, 41.0, 45.0, 73.0, 59.0, 73.0, 84.0, 76.0, 73.0, 48.0, 50.0, 41.0, 35.0, 31.0, 26.0, 22.0, 17.0, 12.0, 6.0, 5.0, 7.0, 3.0, 11.0, 5.0, 5.0, 3.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.025299072265625, -0.024567604064941406, -0.023836135864257812, -0.02310466766357422, -0.022373199462890625, -0.02164173126220703, -0.020910263061523438, -0.020178794860839844, -0.01944732666015625, -0.018715858459472656, -0.017984390258789062, -0.01725292205810547, -0.016521453857421875, -0.01578998565673828, -0.015058517456054688, -0.014327049255371094, -0.0135955810546875, -0.012864112854003906, -0.012132644653320312, -0.011401176452636719, -0.010669708251953125, -0.009938240051269531, -0.009206771850585938, -0.008475303649902344, -0.00774383544921875, -0.007012367248535156, -0.0062808990478515625, -0.005549430847167969, -0.004817962646484375, -0.004086494445800781, -0.0033550262451171875, -0.0026235580444335938, -0.00189208984375, -0.0011606216430664062, -0.0004291534423828125, 0.00030231475830078125, 0.001033782958984375, 0.0017652511596679688, 0.0024967193603515625, 0.0032281875610351562, 0.00395965576171875, 0.004691123962402344, 0.0054225921630859375, 0.006154060363769531, 0.006885528564453125, 0.007616996765136719, 0.008348464965820312, 0.009079933166503906, 0.0098114013671875, 0.010542869567871094, 0.011274337768554688, 0.012005805969238281, 0.012737274169921875, 0.013468742370605469, 0.014200210571289062, 0.014931678771972656, 0.01566314697265625, 0.016394615173339844, 0.017126083374023438, 0.01785755157470703, 0.018589019775390625, 0.01932048797607422, 0.020051956176757812, 0.020783424377441406, 0.021514892578125]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 3.0, 5.0, 6.0, 4.0, 9.0, 8.0, 14.0, 9.0, 16.0, 23.0, 15.0, 37.0, 41.0, 61.0, 88.0, 120.0, 170.0, 239.0, 430.0, 983.0, 64288.0, 978073.0, 2383.0, 592.0, 324.0, 155.0, 130.0, 77.0, 61.0, 38.0, 40.0, 24.0, 18.0, 21.0, 11.0, 9.0, 8.0, 2.0, 7.0, 5.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.383056640625, -0.3699455261230469, -0.35683441162109375, -0.3437232971191406, -0.3306121826171875, -0.3175010681152344, -0.30438995361328125, -0.2912788391113281, -0.278167724609375, -0.2650566101074219, -0.25194549560546875, -0.23883438110351562, -0.2257232666015625, -0.21261215209960938, -0.19950103759765625, -0.18638992309570312, -0.17327880859375, -0.16016769409179688, -0.14705657958984375, -0.13394546508789062, -0.1208343505859375, -0.10772323608398438, -0.09461212158203125, -0.08150100708007812, -0.068389892578125, -0.055278778076171875, -0.04216766357421875, -0.029056549072265625, -0.0159454345703125, -0.002834320068359375, 0.01027679443359375, 0.023387908935546875, 0.0364990234375, 0.049610137939453125, 0.06272125244140625, 0.07583236694335938, 0.0889434814453125, 0.10205459594726562, 0.11516571044921875, 0.12827682495117188, 0.141387939453125, 0.15449905395507812, 0.16761016845703125, 0.18072128295898438, 0.1938323974609375, 0.20694351196289062, 0.22005462646484375, 0.23316574096679688, 0.24627685546875, 0.2593879699707031, 0.27249908447265625, 0.2856101989746094, 0.2987213134765625, 0.3118324279785156, 0.32494354248046875, 0.3380546569824219, 0.351165771484375, 0.3642768859863281, 0.37738800048828125, 0.3904991149902344, 0.4036102294921875, 0.4167213439941406, 0.42983245849609375, 0.4429435729980469, 0.4560546875]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 9.0, 62.0, 265.0, 429.0, 195.0, 46.0, 6.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07442733645439148, -0.0729195699095726, -0.07141180336475372, -0.06990403681993484, -0.06839627027511597, -0.06688850373029709, -0.06538073718547821, -0.06387297064065933, -0.06236520782113075, -0.060857441276311874, -0.059349674731492996, -0.05784190818667412, -0.05633414164185524, -0.05482637882232666, -0.05331861227750778, -0.051810845732688904, -0.050303079187870026, -0.04879531264305115, -0.04728754609823227, -0.04577977955341339, -0.04427201300859451, -0.042764246463775635, -0.04125647991895676, -0.03974871709942818, -0.038240946829319, -0.03673318028450012, -0.035225413739681244, -0.033717647194862366, -0.03220988065004349, -0.03070211596786976, -0.02919434942305088, -0.02768658474087715, -0.026178820058703423, -0.024671053513884544, -0.023163286969065666, -0.021655522286891937, -0.02014775574207306, -0.01863998919725418, -0.017132222652435303, -0.015624457038939, -0.014116690494120121, -0.012608923949301243, -0.01110115833580494, -0.009593391790986061, -0.008085625246167183, -0.006577859632670879, -0.005070093087852001, -0.0035623274743556976, -0.0020545609295368195, -0.0005467947339639068, 0.0009609714616090059, 0.0024687377735972404, 0.003976503852754831, 0.005484269931912422, 0.0069920364767313, 0.008499802090227604, 0.010007568635046482, 0.01151533517986536, 0.013023100793361664, 0.014530867338180542, 0.01603863388299942, 0.0175464004278183, 0.019054166972637177, 0.020561931654810905, 0.022069698199629784]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 4.0, 3.0, 7.0, 10.0, 6.0, 14.0, 6.0, 15.0, 18.0, 13.0, 19.0, 18.0, 28.0, 28.0, 27.0, 31.0, 35.0, 26.0, 42.0, 40.0, 44.0, 44.0, 46.0, 56.0, 39.0, 33.0, 35.0, 36.0, 37.0, 33.0, 29.0, 34.0, 22.0, 16.0, 15.0, 21.0, 17.0, 12.0, 7.0, 6.0, 9.0, 7.0, 7.0, 5.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.010846078395843506, -0.010490799322724342, -0.010135520249605179, -0.009780241176486015, -0.009424962103366852, -0.009069683030247688, -0.008714403957128525, -0.008359124884009361, -0.008003845810890198, -0.007648566737771034, -0.007293287664651871, -0.006938008591532707, -0.006582729518413544, -0.00622745044529438, -0.005872171372175217, -0.005516892299056053, -0.00516161322593689, -0.004806334152817726, -0.004451055079698563, -0.004095776006579399, -0.0037404969334602356, -0.003385217860341072, -0.0030299387872219086, -0.002674659714102745, -0.0023193806409835815, -0.001964101567864418, -0.0016088224947452545, -0.001253543421626091, -0.0008982643485069275, -0.000542985275387764, -0.00018770620226860046, 0.00016757287085056305, 0.0005228519439697266, 0.0008781310170888901, 0.0012334100902080536, 0.001588689163327217, 0.0019439682364463806, 0.002299247309565544, 0.0026545263826847076, 0.003009805455803871, 0.0033650845289230347, 0.003720363602042198, 0.004075642675161362, 0.004430921748280525, 0.004786200821399689, 0.005141479894518852, 0.005496758967638016, 0.005852038040757179, 0.006207317113876343, 0.006562596186995506, 0.00691787526011467, 0.007273154333233833, 0.007628433406352997, 0.00798371247947216, 0.008338991552591324, 0.008694270625710487, 0.009049549698829651, 0.009404828771948814, 0.009760107845067978, 0.010115386918187141, 0.010470665991306305, 0.010825945064425468, 0.011181224137544632, 0.011536503210663795, 0.011891782283782959]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 1.0, 7.0, 7.0, 9.0, 9.0, 9.0, 20.0, 16.0, 16.0, 27.0, 18.0, 35.0, 37.0, 31.0, 32.0, 36.0, 39.0, 48.0, 39.0, 40.0, 49.0, 49.0, 56.0, 42.0, 44.0, 30.0, 37.0, 26.0, 30.0, 25.0, 25.0, 23.0, 21.0, 11.0, 11.0, 16.0, 6.0, 8.0, 4.0, 4.0, 6.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.3046875, -13.88525390625, -13.4658203125, -13.04638671875, -12.626953125, -12.20751953125, -11.7880859375, -11.36865234375, -10.94921875, -10.52978515625, -10.1103515625, -9.69091796875, -9.271484375, -8.85205078125, -8.4326171875, -8.01318359375, -7.59375, -7.17431640625, -6.7548828125, -6.33544921875, -5.916015625, -5.49658203125, -5.0771484375, -4.65771484375, -4.23828125, -3.81884765625, -3.3994140625, -2.97998046875, -2.560546875, -2.14111328125, -1.7216796875, -1.30224609375, -0.8828125, -0.46337890625, -0.0439453125, 0.37548828125, 0.794921875, 1.21435546875, 1.6337890625, 2.05322265625, 2.47265625, 2.89208984375, 3.3115234375, 3.73095703125, 4.150390625, 4.56982421875, 4.9892578125, 5.40869140625, 5.828125, 6.24755859375, 6.6669921875, 7.08642578125, 7.505859375, 7.92529296875, 8.3447265625, 8.76416015625, 9.18359375, 9.60302734375, 10.0224609375, 10.44189453125, 10.861328125, 11.28076171875, 11.7001953125, 12.11962890625, 12.5390625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 2.0, 4.0, 6.0, 2.0, 11.0, 12.0, 13.0, 24.0, 31.0, 40.0, 61.0, 76.0, 88.0, 131.0, 170.0, 241.0, 325.0, 414.0, 566.0, 769.0, 1108.0, 1765.0, 3329.0, 8623.0, 32425.0, 146019.0, 516007.0, 254557.0, 56636.0, 13814.0, 4524.0, 2110.0, 1320.0, 840.0, 635.0, 462.0, 344.0, 281.0, 208.0, 150.0, 103.0, 81.0, 68.0, 44.0, 34.0, 18.0, 18.0, 19.0, 10.0, 10.0, 5.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0], "bins": [-22.296875, -21.61474609375, -20.9326171875, -20.25048828125, -19.568359375, -18.88623046875, -18.2041015625, -17.52197265625, -16.83984375, -16.15771484375, -15.4755859375, -14.79345703125, -14.111328125, -13.42919921875, -12.7470703125, -12.06494140625, -11.3828125, -10.70068359375, -10.0185546875, -9.33642578125, -8.654296875, -7.97216796875, -7.2900390625, -6.60791015625, -5.92578125, -5.24365234375, -4.5615234375, -3.87939453125, -3.197265625, -2.51513671875, -1.8330078125, -1.15087890625, -0.46875, 0.21337890625, 0.8955078125, 1.57763671875, 2.259765625, 2.94189453125, 3.6240234375, 4.30615234375, 4.98828125, 5.67041015625, 6.3525390625, 7.03466796875, 7.716796875, 8.39892578125, 9.0810546875, 9.76318359375, 10.4453125, 11.12744140625, 11.8095703125, 12.49169921875, 13.173828125, 13.85595703125, 14.5380859375, 15.22021484375, 15.90234375, 16.58447265625, 17.2666015625, 17.94873046875, 18.630859375, 19.31298828125, 19.9951171875, 20.67724609375, 21.359375]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 8.0, 2.0, 10.0, 11.0, 11.0, 19.0, 13.0, 17.0, 27.0, 30.0, 33.0, 40.0, 49.0, 56.0, 51.0, 125.0, 474.0, 1517.0, 138.0, 67.0, 57.0, 53.0, 40.0, 42.0, 33.0, 28.0, 17.0, 16.0, 13.0, 11.0, 3.0, 10.0, 3.0, 11.0, 1.0, 3.0, 5.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-40.4375, -39.28369140625, -38.1298828125, -36.97607421875, -35.822265625, -34.66845703125, -33.5146484375, -32.36083984375, -31.20703125, -30.05322265625, -28.8994140625, -27.74560546875, -26.591796875, -25.43798828125, -24.2841796875, -23.13037109375, -21.9765625, -20.82275390625, -19.6689453125, -18.51513671875, -17.361328125, -16.20751953125, -15.0537109375, -13.89990234375, -12.74609375, -11.59228515625, -10.4384765625, -9.28466796875, -8.130859375, -6.97705078125, -5.8232421875, -4.66943359375, -3.515625, -2.36181640625, -1.2080078125, -0.05419921875, 1.099609375, 2.25341796875, 3.4072265625, 4.56103515625, 5.71484375, 6.86865234375, 8.0224609375, 9.17626953125, 10.330078125, 11.48388671875, 12.6376953125, 13.79150390625, 14.9453125, 16.09912109375, 17.2529296875, 18.40673828125, 19.560546875, 20.71435546875, 21.8681640625, 23.02197265625, 24.17578125, 25.32958984375, 26.4833984375, 27.63720703125, 28.791015625, 29.94482421875, 31.0986328125, 32.25244140625, 33.40625]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 5.0, 2.0, 2.0, 6.0, 5.0, 4.0, 10.0, 12.0, 11.0, 20.0, 13.0, 23.0, 24.0, 55.0, 50.0, 90.0, 180.0, 305.0, 602.0, 1896.0, 2764949.0, 374669.0, 1497.0, 549.0, 251.0, 157.0, 87.0, 61.0, 38.0, 27.0, 21.0, 17.0, 20.0, 11.0, 10.0, 5.0, 4.0, 3.0, 7.0, 6.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-111.5625, -107.7060546875, -103.849609375, -99.9931640625, -96.13671875, -92.2802734375, -88.423828125, -84.5673828125, -80.7109375, -76.8544921875, -72.998046875, -69.1416015625, -65.28515625, -61.4287109375, -57.572265625, -53.7158203125, -49.859375, -46.0029296875, -42.146484375, -38.2900390625, -34.43359375, -30.5771484375, -26.720703125, -22.8642578125, -19.0078125, -15.1513671875, -11.294921875, -7.4384765625, -3.58203125, 0.2744140625, 4.130859375, 7.9873046875, 11.84375, 15.7001953125, 19.556640625, 23.4130859375, 27.26953125, 31.1259765625, 34.982421875, 38.8388671875, 42.6953125, 46.5517578125, 50.408203125, 54.2646484375, 58.12109375, 61.9775390625, 65.833984375, 69.6904296875, 73.546875, 77.4033203125, 81.259765625, 85.1162109375, 88.97265625, 92.8291015625, 96.685546875, 100.5419921875, 104.3984375, 108.2548828125, 112.111328125, 115.9677734375, 119.82421875, 123.6806640625, 127.537109375, 131.3935546875, 135.25]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 38.0, 145.0, 308.0, 335.0, 132.0, 44.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-153.91552734375, -149.25157165527344, -144.5876007080078, -139.92364501953125, -135.25967407226562, -130.59571838378906, -125.9317626953125, -121.2677993774414, -116.60383605957031, -111.93987274169922, -107.27590942382812, -102.61195373535156, -97.94799041748047, -93.28402709960938, -88.62007141113281, -83.95610809326172, -79.29214477539062, -74.62818145751953, -69.96421813964844, -65.30026245117188, -60.63629913330078, -55.97233581542969, -51.30837631225586, -46.64441680908203, -41.98045349121094, -37.316490173339844, -32.652530670166016, -27.988569259643555, -23.324607849121094, -18.660646438598633, -13.996685028076172, -9.332725524902344, -4.6687774658203125, -0.0048160552978515625, 4.659145355224609, 9.32310676574707, 13.987068176269531, 18.651029586791992, 23.314990997314453, 27.97895050048828, 32.642913818359375, 37.30687713623047, 41.9708366394043, 46.634796142578125, 51.29875946044922, 55.96272277832031, 60.62668228149414, 65.29064178466797, 69.95460510253906, 74.61856842041016, 79.28253173828125, 83.94648742675781, 88.6104507446289, 93.2744140625, 97.93836975097656, 102.60233306884766, 107.26629638671875, 111.93025970458984, 116.59422302246094, 121.2581787109375, 125.9221420288086, 130.5861053466797, 135.25006103515625, 139.91403198242188, 144.57798767089844]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 5.0, 2.0, 8.0, 7.0, 4.0, 9.0, 9.0, 11.0, 12.0, 18.0, 22.0, 13.0, 25.0, 24.0, 30.0, 33.0, 41.0, 38.0, 32.0, 47.0, 57.0, 50.0, 51.0, 39.0, 41.0, 29.0, 30.0, 36.0, 33.0, 35.0, 31.0, 29.0, 27.0, 16.0, 24.0, 14.0, 12.0, 9.0, 10.0, 10.0, 6.0, 6.0, 4.0, 4.0, 4.0, 3.0, 0.0, 6.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-81.78633117675781, -79.10333251953125, -76.42033386230469, -73.7373275756836, -71.05432891845703, -68.37133026123047, -65.68832397460938, -63.00532531738281, -60.32232666015625, -57.63932800292969, -54.95632553100586, -52.27332305908203, -49.59032440185547, -46.907325744628906, -44.22432327270508, -41.54132080078125, -38.85832214355469, -36.175323486328125, -33.4923210144043, -30.8093204498291, -28.126319885253906, -25.44331932067871, -22.760318756103516, -20.07731819152832, -17.394317626953125, -14.71131706237793, -12.028316497802734, -9.345315933227539, -6.662315368652344, -3.9793148040771484, -1.2963142395019531, 1.3866863250732422, 4.0696868896484375, 6.752687454223633, 9.435688018798828, 12.118688583374023, 14.801689147949219, 17.484689712524414, 20.16769027709961, 22.850690841674805, 25.53369140625, 28.216691970825195, 30.89969253540039, 33.58269500732422, 36.26569366455078, 38.948692321777344, 41.63169479370117, 44.314697265625, 46.99769592285156, 49.680694580078125, 52.36369705200195, 55.04669952392578, 57.729698181152344, 60.412696838378906, 63.095699310302734, 65.77870178222656, 68.46170043945312, 71.14469909667969, 73.82769775390625, 76.51070404052734, 79.1937026977539, 81.87670135498047, 84.55970764160156, 87.24270629882812, 89.92570495605469]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 3.0, 7.0, 7.0, 11.0, 11.0, 14.0, 10.0, 18.0, 23.0, 33.0, 20.0, 33.0, 34.0, 37.0, 32.0, 37.0, 49.0, 43.0, 32.0, 41.0, 49.0, 51.0, 58.0, 42.0, 40.0, 39.0, 33.0, 23.0, 26.0, 26.0, 25.0, 20.0, 16.0, 13.0, 9.0, 8.0, 8.0, 7.0, 6.0, 2.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.046875, -13.6156005859375, -13.184326171875, -12.7530517578125, -12.32177734375, -11.8905029296875, -11.459228515625, -11.0279541015625, -10.5966796875, -10.1654052734375, -9.734130859375, -9.3028564453125, -8.87158203125, -8.4403076171875, -8.009033203125, -7.5777587890625, -7.146484375, -6.7152099609375, -6.283935546875, -5.8526611328125, -5.42138671875, -4.9901123046875, -4.558837890625, -4.1275634765625, -3.6962890625, -3.2650146484375, -2.833740234375, -2.4024658203125, -1.97119140625, -1.5399169921875, -1.108642578125, -0.6773681640625, -0.24609375, 0.1851806640625, 0.616455078125, 1.0477294921875, 1.47900390625, 1.9102783203125, 2.341552734375, 2.7728271484375, 3.2041015625, 3.6353759765625, 4.066650390625, 4.4979248046875, 4.92919921875, 5.3604736328125, 5.791748046875, 6.2230224609375, 6.654296875, 7.0855712890625, 7.516845703125, 7.9481201171875, 8.37939453125, 8.8106689453125, 9.241943359375, 9.6732177734375, 10.1044921875, 10.5357666015625, 10.967041015625, 11.3983154296875, 11.82958984375, 12.2608642578125, 12.692138671875, 13.1234130859375, 13.5546875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 2.0, 7.0, 11.0, 6.0, 7.0, 20.0, 21.0, 18.0, 23.0, 43.0, 44.0, 49.0, 78.0, 113.0, 182.0, 291.0, 434.0, 679.0, 1371.0, 2460.0, 5207.0, 13236.0, 44537.0, 225093.0, 1164540.0, 1992854.0, 594021.0, 107458.0, 24727.0, 8398.0, 3757.0, 1861.0, 993.0, 601.0, 359.0, 242.0, 122.0, 113.0, 65.0, 50.0, 40.0, 35.0, 34.0, 17.0, 14.0, 14.0, 11.0, 7.0, 5.0, 9.0, 3.0, 0.0, 4.0, 0.0, 3.0], "bins": [-21.859375, -21.2216796875, -20.583984375, -19.9462890625, -19.30859375, -18.6708984375, -18.033203125, -17.3955078125, -16.7578125, -16.1201171875, -15.482421875, -14.8447265625, -14.20703125, -13.5693359375, -12.931640625, -12.2939453125, -11.65625, -11.0185546875, -10.380859375, -9.7431640625, -9.10546875, -8.4677734375, -7.830078125, -7.1923828125, -6.5546875, -5.9169921875, -5.279296875, -4.6416015625, -4.00390625, -3.3662109375, -2.728515625, -2.0908203125, -1.453125, -0.8154296875, -0.177734375, 0.4599609375, 1.09765625, 1.7353515625, 2.373046875, 3.0107421875, 3.6484375, 4.2861328125, 4.923828125, 5.5615234375, 6.19921875, 6.8369140625, 7.474609375, 8.1123046875, 8.75, 9.3876953125, 10.025390625, 10.6630859375, 11.30078125, 11.9384765625, 12.576171875, 13.2138671875, 13.8515625, 14.4892578125, 15.126953125, 15.7646484375, 16.40234375, 17.0400390625, 17.677734375, 18.3154296875, 18.953125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 4.0, 0.0, 5.0, 3.0, 10.0, 8.0, 4.0, 7.0, 16.0, 15.0, 27.0, 36.0, 33.0, 49.0, 59.0, 72.0, 109.0, 123.0, 137.0, 166.0, 237.0, 297.0, 405.0, 428.0, 371.0, 328.0, 238.0, 190.0, 117.0, 112.0, 108.0, 74.0, 52.0, 47.0, 56.0, 25.0, 19.0, 22.0, 18.0, 11.0, 7.0, 5.0, 4.0, 8.0, 2.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-18.984375, -18.4488525390625, -17.913330078125, -17.3778076171875, -16.84228515625, -16.3067626953125, -15.771240234375, -15.2357177734375, -14.7001953125, -14.1646728515625, -13.629150390625, -13.0936279296875, -12.55810546875, -12.0225830078125, -11.487060546875, -10.9515380859375, -10.416015625, -9.8804931640625, -9.344970703125, -8.8094482421875, -8.27392578125, -7.7384033203125, -7.202880859375, -6.6673583984375, -6.1318359375, -5.5963134765625, -5.060791015625, -4.5252685546875, -3.98974609375, -3.4542236328125, -2.918701171875, -2.3831787109375, -1.84765625, -1.3121337890625, -0.776611328125, -0.2410888671875, 0.29443359375, 0.8299560546875, 1.365478515625, 1.9010009765625, 2.4365234375, 2.9720458984375, 3.507568359375, 4.0430908203125, 4.57861328125, 5.1141357421875, 5.649658203125, 6.1851806640625, 6.720703125, 7.2562255859375, 7.791748046875, 8.3272705078125, 8.86279296875, 9.3983154296875, 9.933837890625, 10.4693603515625, 11.0048828125, 11.5404052734375, 12.075927734375, 12.6114501953125, 13.14697265625, 13.6824951171875, 14.218017578125, 14.7535400390625, 15.2890625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 2.0, 7.0, 9.0, 16.0, 16.0, 26.0, 41.0, 65.0, 87.0, 134.0, 220.0, 383.0, 723.0, 1525.0, 4363.0, 22707.0, 236766.0, 3012930.0, 839000.0, 62167.0, 8438.0, 2376.0, 1040.0, 470.0, 251.0, 152.0, 134.0, 73.0, 55.0, 30.0, 18.0, 16.0, 12.0, 13.0, 9.0, 8.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.03125, -40.85498046875, -39.6787109375, -38.50244140625, -37.326171875, -36.14990234375, -34.9736328125, -33.79736328125, -32.62109375, -31.44482421875, -30.2685546875, -29.09228515625, -27.916015625, -26.73974609375, -25.5634765625, -24.38720703125, -23.2109375, -22.03466796875, -20.8583984375, -19.68212890625, -18.505859375, -17.32958984375, -16.1533203125, -14.97705078125, -13.80078125, -12.62451171875, -11.4482421875, -10.27197265625, -9.095703125, -7.91943359375, -6.7431640625, -5.56689453125, -4.390625, -3.21435546875, -2.0380859375, -0.86181640625, 0.314453125, 1.49072265625, 2.6669921875, 3.84326171875, 5.01953125, 6.19580078125, 7.3720703125, 8.54833984375, 9.724609375, 10.90087890625, 12.0771484375, 13.25341796875, 14.4296875, 15.60595703125, 16.7822265625, 17.95849609375, 19.134765625, 20.31103515625, 21.4873046875, 22.66357421875, 23.83984375, 25.01611328125, 26.1923828125, 27.36865234375, 28.544921875, 29.72119140625, 30.8974609375, 32.07373046875, 33.25]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 9.0, 133.0, 511.0, 320.0, 38.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-663.9436645507812, -650.0341186523438, -636.1245727539062, -622.215087890625, -608.3055419921875, -594.39599609375, -580.4864501953125, -566.576904296875, -552.6673583984375, -538.7578125, -524.8482666015625, -510.9387512207031, -497.0292053222656, -483.11968994140625, -469.21014404296875, -455.30059814453125, -441.3910827636719, -427.4815368652344, -413.572021484375, -399.6624755859375, -385.7529296875, -371.8433837890625, -357.9338684082031, -344.0243225097656, -330.11480712890625, -316.20526123046875, -302.2957458496094, -288.3861999511719, -274.4766540527344, -260.567138671875, -246.6575927734375, -232.748046875, -218.83847045898438, -204.92893981933594, -191.01939392089844, -177.10986328125, -163.2003173828125, -149.29078674316406, -135.38125610351562, -121.47171783447266, -107.56217956542969, -93.65264129638672, -79.74310302734375, -65.83357238769531, -51.924034118652344, -38.014495849609375, -24.104965209960938, -10.195426940917969, 3.714111328125, 17.623647689819336, 31.533184051513672, 45.442718505859375, 59.352256774902344, 73.26179504394531, 87.17132568359375, 101.08086395263672, 114.99040222167969, 128.89993286132812, 142.80947875976562, 156.71900939941406, 170.6285400390625, 184.5380859375, 198.44761657714844, 212.35714721679688, 226.26669311523438]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 3.0, 9.0, 1.0, 10.0, 10.0, 9.0, 11.0, 15.0, 10.0, 18.0, 26.0, 21.0, 22.0, 34.0, 31.0, 25.0, 37.0, 26.0, 38.0, 40.0, 52.0, 44.0, 39.0, 29.0, 34.0, 40.0, 40.0, 25.0, 28.0, 36.0, 24.0, 28.0, 23.0, 26.0, 15.0, 25.0, 12.0, 13.0, 14.0, 10.0, 12.0, 9.0, 6.0, 3.0, 2.0, 3.0, 2.0, 8.0, 6.0, 2.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-70.35222625732422, -68.11717224121094, -65.88211059570312, -63.64705276489258, -61.41199493408203, -59.176937103271484, -56.94187927246094, -54.70682144165039, -52.471763610839844, -50.2367057800293, -48.00164794921875, -45.7665901184082, -43.531532287597656, -41.29647445678711, -39.06141662597656, -36.826358795166016, -34.59130096435547, -32.35624313354492, -30.121185302734375, -27.886127471923828, -25.65106964111328, -23.416011810302734, -21.180953979492188, -18.94589614868164, -16.710838317871094, -14.475780487060547, -12.24072265625, -10.005664825439453, -7.770606994628906, -5.535549163818359, -3.3004913330078125, -1.0654335021972656, 1.16961669921875, 3.404674530029297, 5.639732360839844, 7.874790191650391, 10.109848022460938, 12.344905853271484, 14.579963684082031, 16.815021514892578, 19.050079345703125, 21.285137176513672, 23.52019500732422, 25.755252838134766, 27.990310668945312, 30.22536849975586, 32.460426330566406, 34.69548416137695, 36.9305419921875, 39.16559982299805, 41.400657653808594, 43.63571548461914, 45.87077331542969, 48.105831146240234, 50.34088897705078, 52.57594680786133, 54.811004638671875, 57.04606246948242, 59.28112030029297, 61.516178131103516, 63.75123596191406, 65.98629760742188, 68.22135162353516, 70.45640563964844, 72.69146728515625]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 7.0, 11.0, 6.0, 14.0, 5.0, 11.0, 17.0, 11.0, 12.0, 20.0, 16.0, 29.0, 22.0, 35.0, 32.0, 38.0, 40.0, 27.0, 40.0, 39.0, 43.0, 42.0, 43.0, 47.0, 33.0, 38.0, 33.0, 33.0, 50.0, 24.0, 26.0, 24.0, 24.0, 16.0, 17.0, 16.0, 11.0, 13.0, 6.0, 7.0, 3.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-10.1484375, -9.8455810546875, -9.542724609375, -9.2398681640625, -8.93701171875, -8.6341552734375, -8.331298828125, -8.0284423828125, -7.7255859375, -7.4227294921875, -7.119873046875, -6.8170166015625, -6.51416015625, -6.2113037109375, -5.908447265625, -5.6055908203125, -5.302734375, -4.9998779296875, -4.697021484375, -4.3941650390625, -4.09130859375, -3.7884521484375, -3.485595703125, -3.1827392578125, -2.8798828125, -2.5770263671875, -2.274169921875, -1.9713134765625, -1.66845703125, -1.3656005859375, -1.062744140625, -0.7598876953125, -0.45703125, -0.1541748046875, 0.148681640625, 0.4515380859375, 0.75439453125, 1.0572509765625, 1.360107421875, 1.6629638671875, 1.9658203125, 2.2686767578125, 2.571533203125, 2.8743896484375, 3.17724609375, 3.4801025390625, 3.782958984375, 4.0858154296875, 4.388671875, 4.6915283203125, 4.994384765625, 5.2972412109375, 5.60009765625, 5.9029541015625, 6.205810546875, 6.5086669921875, 6.8115234375, 7.1143798828125, 7.417236328125, 7.7200927734375, 8.02294921875, 8.3258056640625, 8.628662109375, 8.9315185546875, 9.234375]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 6.0, 12.0, 18.0, 17.0, 41.0, 63.0, 91.0, 123.0, 229.0, 329.0, 500.0, 832.0, 1240.0, 2051.0, 3318.0, 5622.0, 9364.0, 15534.0, 25984.0, 43913.0, 73218.0, 118270.0, 168825.0, 186316.0, 147945.0, 97566.0, 59219.0, 35046.0, 20976.0, 12575.0, 7562.0, 4531.0, 2738.0, 1663.0, 1035.0, 632.0, 397.0, 270.0, 183.0, 100.0, 70.0, 55.0, 27.0, 24.0, 14.0, 10.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.8076171875, -0.783355712890625, -0.75909423828125, -0.734832763671875, -0.7105712890625, -0.686309814453125, -0.66204833984375, -0.637786865234375, -0.613525390625, -0.589263916015625, -0.56500244140625, -0.540740966796875, -0.5164794921875, -0.492218017578125, -0.46795654296875, -0.443695068359375, -0.41943359375, -0.395172119140625, -0.37091064453125, -0.346649169921875, -0.3223876953125, -0.298126220703125, -0.27386474609375, -0.249603271484375, -0.225341796875, -0.201080322265625, -0.17681884765625, -0.152557373046875, -0.1282958984375, -0.104034423828125, -0.07977294921875, -0.055511474609375, -0.03125, -0.006988525390625, 0.01727294921875, 0.041534423828125, 0.0657958984375, 0.090057373046875, 0.11431884765625, 0.138580322265625, 0.162841796875, 0.187103271484375, 0.21136474609375, 0.235626220703125, 0.2598876953125, 0.284149169921875, 0.30841064453125, 0.332672119140625, 0.35693359375, 0.381195068359375, 0.40545654296875, 0.429718017578125, 0.4539794921875, 0.478240966796875, 0.50250244140625, 0.526763916015625, 0.551025390625, 0.575286865234375, 0.59954833984375, 0.623809814453125, 0.6480712890625, 0.672332763671875, 0.69659423828125, 0.720855712890625, 0.7451171875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 2.0, 4.0, 6.0, 4.0, 8.0, 7.0, 12.0, 17.0, 9.0, 22.0, 17.0, 16.0, 36.0, 29.0, 37.0, 33.0, 42.0, 35.0, 42.0, 40.0, 38.0, 1064.0, 32.0, 45.0, 39.0, 32.0, 36.0, 35.0, 37.0, 38.0, 30.0, 21.0, 22.0, 19.0, 18.0, 15.0, 11.0, 16.0, 11.0, 9.0, 11.0, 6.0, 5.0, 1.0, 6.0, 3.0, 1.0, 4.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-5.9296875, -5.7373046875, -5.544921875, -5.3525390625, -5.16015625, -4.9677734375, -4.775390625, -4.5830078125, -4.390625, -4.1982421875, -4.005859375, -3.8134765625, -3.62109375, -3.4287109375, -3.236328125, -3.0439453125, -2.8515625, -2.6591796875, -2.466796875, -2.2744140625, -2.08203125, -1.8896484375, -1.697265625, -1.5048828125, -1.3125, -1.1201171875, -0.927734375, -0.7353515625, -0.54296875, -0.3505859375, -0.158203125, 0.0341796875, 0.2265625, 0.4189453125, 0.611328125, 0.8037109375, 0.99609375, 1.1884765625, 1.380859375, 1.5732421875, 1.765625, 1.9580078125, 2.150390625, 2.3427734375, 2.53515625, 2.7275390625, 2.919921875, 3.1123046875, 3.3046875, 3.4970703125, 3.689453125, 3.8818359375, 4.07421875, 4.2666015625, 4.458984375, 4.6513671875, 4.84375, 5.0361328125, 5.228515625, 5.4208984375, 5.61328125, 5.8056640625, 5.998046875, 6.1904296875, 6.3828125]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 1.0, 2.0, 7.0, 11.0, 17.0, 24.0, 45.0, 60.0, 88.0, 120.0, 183.0, 309.0, 458.0, 713.0, 1047.0, 1655.0, 2676.0, 4100.0, 6243.0, 9916.0, 15255.0, 23512.0, 36129.0, 54729.0, 79889.0, 110487.0, 175419.0, 1147219.0, 131496.0, 96894.0, 68146.0, 45572.0, 30229.0, 19329.0, 12382.0, 7985.0, 5218.0, 3396.0, 2169.0, 1356.0, 965.0, 561.0, 384.0, 255.0, 191.0, 111.0, 73.0, 42.0, 18.0, 18.0, 17.0, 10.0, 6.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.46875, -0.453765869140625, -0.43878173828125, -0.423797607421875, -0.4088134765625, -0.393829345703125, -0.37884521484375, -0.363861083984375, -0.348876953125, -0.333892822265625, -0.31890869140625, -0.303924560546875, -0.2889404296875, -0.273956298828125, -0.25897216796875, -0.243988037109375, -0.22900390625, -0.214019775390625, -0.19903564453125, -0.184051513671875, -0.1690673828125, -0.154083251953125, -0.13909912109375, -0.124114990234375, -0.109130859375, -0.094146728515625, -0.07916259765625, -0.064178466796875, -0.0491943359375, -0.034210205078125, -0.01922607421875, -0.004241943359375, 0.0107421875, 0.025726318359375, 0.04071044921875, 0.055694580078125, 0.0706787109375, 0.085662841796875, 0.10064697265625, 0.115631103515625, 0.130615234375, 0.145599365234375, 0.16058349609375, 0.175567626953125, 0.1905517578125, 0.205535888671875, 0.22052001953125, 0.235504150390625, 0.25048828125, 0.265472412109375, 0.28045654296875, 0.295440673828125, 0.3104248046875, 0.325408935546875, 0.34039306640625, 0.355377197265625, 0.370361328125, 0.385345458984375, 0.40032958984375, 0.415313720703125, 0.4302978515625, 0.445281982421875, 0.46026611328125, 0.475250244140625, 0.490234375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 2.0, 1.0, 5.0, 6.0, 3.0, 7.0, 7.0, 13.0, 8.0, 11.0, 21.0, 23.0, 26.0, 34.0, 44.0, 67.0, 93.0, 138.0, 119.0, 97.0, 81.0, 40.0, 34.0, 18.0, 24.0, 15.0, 8.0, 8.0, 8.0, 3.0, 10.0, 8.0, 4.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0242767333984375, -0.0236358642578125, -0.0229949951171875, -0.0223541259765625, -0.0217132568359375, -0.0210723876953125, -0.0204315185546875, -0.0197906494140625, -0.0191497802734375, -0.0185089111328125, -0.0178680419921875, -0.0172271728515625, -0.0165863037109375, -0.0159454345703125, -0.0153045654296875, -0.0146636962890625, -0.0140228271484375, -0.0133819580078125, -0.0127410888671875, -0.0121002197265625, -0.0114593505859375, -0.0108184814453125, -0.0101776123046875, -0.0095367431640625, -0.0088958740234375, -0.0082550048828125, -0.0076141357421875, -0.0069732666015625, -0.0063323974609375, -0.0056915283203125, -0.0050506591796875, -0.0044097900390625, -0.0037689208984375, -0.0031280517578125, -0.0024871826171875, -0.0018463134765625, -0.0012054443359375, -0.0005645751953125, 7.62939453125e-05, 0.0007171630859375, 0.0013580322265625, 0.0019989013671875, 0.0026397705078125, 0.0032806396484375, 0.0039215087890625, 0.0045623779296875, 0.0052032470703125, 0.0058441162109375, 0.0064849853515625, 0.0071258544921875, 0.0077667236328125, 0.0084075927734375, 0.0090484619140625, 0.0096893310546875, 0.0103302001953125, 0.0109710693359375, 0.0116119384765625, 0.0122528076171875, 0.0128936767578125, 0.0135345458984375, 0.0141754150390625, 0.0148162841796875, 0.0154571533203125, 0.0160980224609375, 0.0167388916015625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 3.0, 5.0, 6.0, 3.0, 11.0, 19.0, 20.0, 7.0, 21.0, 23.0, 21.0, 39.0, 68.0, 73.0, 105.0, 180.0, 332.0, 722.0, 11822.0, 1028823.0, 4686.0, 608.0, 286.0, 178.0, 105.0, 91.0, 64.0, 47.0, 32.0, 28.0, 27.0, 13.0, 14.0, 11.0, 9.0, 5.0, 7.0, 3.0, 4.0, 5.0, 3.0, 9.0, 5.0, 3.0, 2.0, 0.0, 1.0, 6.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 4.0], "bins": [-0.28466796875, -0.273895263671875, -0.26312255859375, -0.252349853515625, -0.2415771484375, -0.230804443359375, -0.22003173828125, -0.209259033203125, -0.198486328125, -0.187713623046875, -0.17694091796875, -0.166168212890625, -0.1553955078125, -0.144622802734375, -0.13385009765625, -0.123077392578125, -0.1123046875, -0.101531982421875, -0.09075927734375, -0.079986572265625, -0.0692138671875, -0.058441162109375, -0.04766845703125, -0.036895751953125, -0.026123046875, -0.015350341796875, -0.00457763671875, 0.006195068359375, 0.0169677734375, 0.027740478515625, 0.03851318359375, 0.049285888671875, 0.06005859375, 0.070831298828125, 0.08160400390625, 0.092376708984375, 0.1031494140625, 0.113922119140625, 0.12469482421875, 0.135467529296875, 0.146240234375, 0.157012939453125, 0.16778564453125, 0.178558349609375, 0.1893310546875, 0.200103759765625, 0.21087646484375, 0.221649169921875, 0.232421875, 0.243194580078125, 0.25396728515625, 0.264739990234375, 0.2755126953125, 0.286285400390625, 0.29705810546875, 0.307830810546875, 0.318603515625, 0.329376220703125, 0.34014892578125, 0.350921630859375, 0.3616943359375, 0.372467041015625, 0.38323974609375, 0.394012451171875, 0.40478515625]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 491.0, 517.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03625253587961197, -0.031077347695827484, -0.025902159512043, -0.020726971328258514, -0.01555178314447403, -0.010376594960689545, -0.00520140677690506, -2.621859312057495e-05, 0.00514896959066391, 0.010324157774448395, 0.01549934595823288, 0.020674534142017365, 0.02584972232580185, 0.031024910509586334, 0.03620009869337082, 0.041375286877155304, 0.04655047506093979, 0.051725663244724274, 0.05690085142850876, 0.06207603961229324, 0.06725122779607773, 0.07242641597986221, 0.0776016041636467, 0.08277679234743118, 0.08795198053121567, 0.09312716871500015, 0.09830235689878464, 0.10347754508256912, 0.10865273326635361, 0.11382792145013809, 0.11900310963392258, 0.12417829781770706, 0.12935349345207214, 0.13452868163585663, 0.1397038698196411, 0.1448790580034256, 0.15005424618721008, 0.15522943437099457, 0.16040462255477905, 0.16557981073856354, 0.17075499892234802, 0.1759301871061325, 0.181105375289917, 0.18628056347370148, 0.19145575165748596, 0.19663093984127045, 0.20180612802505493, 0.20698131620883942, 0.2121565043926239, 0.2173316925764084, 0.22250688076019287, 0.22768206894397736, 0.23285725712776184, 0.23803244531154633, 0.2432076334953308, 0.2483828216791153, 0.2535580098628998, 0.25873321294784546, 0.26390838623046875, 0.26908355951309204, 0.2742587625980377, 0.2794339656829834, 0.2846091389656067, 0.28978431224823, 0.29495951533317566]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 5.0, 5.0, 6.0, 3.0, 8.0, 20.0, 13.0, 14.0, 18.0, 22.0, 38.0, 45.0, 46.0, 40.0, 48.0, 60.0, 57.0, 66.0, 68.0, 48.0, 60.0, 42.0, 45.0, 32.0, 52.0, 32.0, 31.0, 18.0, 16.0, 13.0, 14.0, 10.0, 4.0, 1.0, 4.0, 5.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.018145620822906494, -0.01770263910293579, -0.017259659245610237, -0.016816679388284683, -0.01637369766831398, -0.015930715948343277, -0.015487736091017723, -0.015044755302369595, -0.014601774513721466, -0.014158793725073338, -0.013715812936425209, -0.01327283214777708, -0.012829851359128952, -0.012386870570480824, -0.011943889781832695, -0.011500908993184566, -0.011057928204536438, -0.01061494741588831, -0.010171966627240181, -0.009728985838592052, -0.009286005049943924, -0.008843024261295795, -0.008400043472647667, -0.007957062683999538, -0.00751408189535141, -0.007071101106703281, -0.006628120318055153, -0.006185139529407024, -0.005742158740758896, -0.005299177952110767, -0.004856197163462639, -0.00441321637481451, -0.003970235586166382, -0.0035272547975182533, -0.003084274008870125, -0.0026412932202219963, -0.002198312431573868, -0.0017553316429257393, -0.0013123508542776108, -0.0008693700656294823, -0.00042638927698135376, 1.659151166677475e-05, 0.00045957230031490326, 0.0009025530889630318, 0.0013455338776111603, 0.0017885146662592888, 0.0022314954549074173, 0.002674476243555546, 0.0031174570322036743, 0.003560437820851803, 0.004003418609499931, 0.00444639939814806, 0.004889380186796188, 0.005332360975444317, 0.005775341764092445, 0.006218322552740574, 0.006661303341388702, 0.007104284130036831, 0.007547264918684959, 0.007990245707333088, 0.008433226495981216, 0.008876207284629345, 0.009319188073277473, 0.009762168861925602, 0.01020514965057373]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 7.0, 11.0, 6.0, 14.0, 5.0, 11.0, 17.0, 11.0, 12.0, 20.0, 16.0, 29.0, 22.0, 35.0, 32.0, 38.0, 40.0, 27.0, 40.0, 39.0, 43.0, 41.0, 44.0, 47.0, 33.0, 38.0, 33.0, 33.0, 50.0, 24.0, 26.0, 24.0, 24.0, 16.0, 17.0, 16.0, 11.0, 13.0, 6.0, 7.0, 3.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-10.1484375, -9.8455810546875, -9.542724609375, -9.2398681640625, -8.93701171875, -8.6341552734375, -8.331298828125, -8.0284423828125, -7.7255859375, -7.4227294921875, -7.119873046875, -6.8170166015625, -6.51416015625, -6.2113037109375, -5.908447265625, -5.6055908203125, -5.302734375, -4.9998779296875, -4.697021484375, -4.3941650390625, -4.09130859375, -3.7884521484375, -3.485595703125, -3.1827392578125, -2.8798828125, -2.5770263671875, -2.274169921875, -1.9713134765625, -1.66845703125, -1.3656005859375, -1.062744140625, -0.7598876953125, -0.45703125, -0.1541748046875, 0.148681640625, 0.4515380859375, 0.75439453125, 1.0572509765625, 1.360107421875, 1.6629638671875, 1.9658203125, 2.2686767578125, 2.571533203125, 2.8743896484375, 3.17724609375, 3.4801025390625, 3.782958984375, 4.0858154296875, 4.388671875, 4.6915283203125, 4.994384765625, 5.2972412109375, 5.60009765625, 5.9029541015625, 6.205810546875, 6.5086669921875, 6.8115234375, 7.1143798828125, 7.417236328125, 7.7200927734375, 8.02294921875, 8.3258056640625, 8.628662109375, 8.9315185546875, 9.234375]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 3.0, 1.0, 4.0, 7.0, 4.0, 12.0, 10.0, 13.0, 14.0, 30.0, 35.0, 54.0, 69.0, 92.0, 116.0, 181.0, 208.0, 324.0, 462.0, 599.0, 873.0, 1351.0, 2220.0, 4027.0, 9173.0, 33588.0, 244075.0, 621341.0, 97281.0, 17598.0, 6159.0, 3077.0, 1707.0, 1136.0, 738.0, 504.0, 393.0, 304.0, 246.0, 128.0, 108.0, 68.0, 63.0, 39.0, 29.0, 31.0, 24.0, 15.0, 12.0, 4.0, 5.0, 5.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-22.625, -21.8779296875, -21.130859375, -20.3837890625, -19.63671875, -18.8896484375, -18.142578125, -17.3955078125, -16.6484375, -15.9013671875, -15.154296875, -14.4072265625, -13.66015625, -12.9130859375, -12.166015625, -11.4189453125, -10.671875, -9.9248046875, -9.177734375, -8.4306640625, -7.68359375, -6.9365234375, -6.189453125, -5.4423828125, -4.6953125, -3.9482421875, -3.201171875, -2.4541015625, -1.70703125, -0.9599609375, -0.212890625, 0.5341796875, 1.28125, 2.0283203125, 2.775390625, 3.5224609375, 4.26953125, 5.0166015625, 5.763671875, 6.5107421875, 7.2578125, 8.0048828125, 8.751953125, 9.4990234375, 10.24609375, 10.9931640625, 11.740234375, 12.4873046875, 13.234375, 13.9814453125, 14.728515625, 15.4755859375, 16.22265625, 16.9697265625, 17.716796875, 18.4638671875, 19.2109375, 19.9580078125, 20.705078125, 21.4521484375, 22.19921875, 22.9462890625, 23.693359375, 24.4404296875, 25.1875]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 9.0, 11.0, 9.0, 12.0, 16.0, 24.0, 25.0, 19.0, 42.0, 43.0, 47.0, 74.0, 53.0, 111.0, 317.0, 1582.0, 162.0, 85.0, 65.0, 58.0, 46.0, 45.0, 30.0, 29.0, 31.0, 20.0, 17.0, 14.0, 16.0, 5.0, 6.0, 4.0, 5.0, 0.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-32.625, -31.750244140625, -30.87548828125, -30.000732421875, -29.1259765625, -28.251220703125, -27.37646484375, -26.501708984375, -25.626953125, -24.752197265625, -23.87744140625, -23.002685546875, -22.1279296875, -21.253173828125, -20.37841796875, -19.503662109375, -18.62890625, -17.754150390625, -16.87939453125, -16.004638671875, -15.1298828125, -14.255126953125, -13.38037109375, -12.505615234375, -11.630859375, -10.756103515625, -9.88134765625, -9.006591796875, -8.1318359375, -7.257080078125, -6.38232421875, -5.507568359375, -4.6328125, -3.758056640625, -2.88330078125, -2.008544921875, -1.1337890625, -0.259033203125, 0.61572265625, 1.490478515625, 2.365234375, 3.239990234375, 4.11474609375, 4.989501953125, 5.8642578125, 6.739013671875, 7.61376953125, 8.488525390625, 9.36328125, 10.238037109375, 11.11279296875, 11.987548828125, 12.8623046875, 13.737060546875, 14.61181640625, 15.486572265625, 16.361328125, 17.236083984375, 18.11083984375, 18.985595703125, 19.8603515625, 20.735107421875, 21.60986328125, 22.484619140625, 23.359375]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 5.0, 8.0, 9.0, 3.0, 5.0, 15.0, 15.0, 19.0, 25.0, 43.0, 39.0, 56.0, 103.0, 138.0, 235.0, 328.0, 700.0, 1995.0, 641211.0, 2496677.0, 2282.0, 749.0, 364.0, 218.0, 136.0, 75.0, 76.0, 39.0, 31.0, 27.0, 20.0, 12.0, 14.0, 12.0, 8.0, 10.0, 5.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-66.875, -64.2041015625, -61.533203125, -58.8623046875, -56.19140625, -53.5205078125, -50.849609375, -48.1787109375, -45.5078125, -42.8369140625, -40.166015625, -37.4951171875, -34.82421875, -32.1533203125, -29.482421875, -26.8115234375, -24.140625, -21.4697265625, -18.798828125, -16.1279296875, -13.45703125, -10.7861328125, -8.115234375, -5.4443359375, -2.7734375, -0.1025390625, 2.568359375, 5.2392578125, 7.91015625, 10.5810546875, 13.251953125, 15.9228515625, 18.59375, 21.2646484375, 23.935546875, 26.6064453125, 29.27734375, 31.9482421875, 34.619140625, 37.2900390625, 39.9609375, 42.6318359375, 45.302734375, 47.9736328125, 50.64453125, 53.3154296875, 55.986328125, 58.6572265625, 61.328125, 63.9990234375, 66.669921875, 69.3408203125, 72.01171875, 74.6826171875, 77.353515625, 80.0244140625, 82.6953125, 85.3662109375, 88.037109375, 90.7080078125, 93.37890625, 96.0498046875, 98.720703125, 101.3916015625, 104.0625]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 10.0, 40.0, 159.0, 447.0, 276.0, 71.0, 11.0, 2.0, 0.0, 2.0], "bins": [-359.7062072753906, -353.4726257324219, -347.239013671875, -341.00543212890625, -334.7718200683594, -328.5382385253906, -322.30462646484375, -316.071044921875, -309.8374328613281, -303.6038513183594, -297.3702392578125, -291.13665771484375, -284.9030456542969, -278.6694641113281, -272.43585205078125, -266.2022705078125, -259.96868896484375, -253.73509216308594, -247.50149536132812, -241.2678985595703, -235.0343017578125, -228.8007049560547, -222.56710815429688, -216.33352661132812, -210.09991455078125, -203.86631774902344, -197.63272094726562, -191.3991241455078, -185.16552734375, -178.9319305419922, -172.69833374023438, -166.46475219726562, -160.2311248779297, -153.99752807617188, -147.76393127441406, -141.53033447265625, -135.29673767089844, -129.06314086914062, -122.82955169677734, -116.59595489501953, -110.36236572265625, -104.12876892089844, -97.89517211914062, -91.66157531738281, -85.427978515625, -79.19438171386719, -72.9607925415039, -66.7271957397461, -60.493595123291016, -54.2599983215332, -48.026405334472656, -41.792808532714844, -35.55921173095703, -29.32561492919922, -23.092021942138672, -16.85842514038086, -10.624828338623047, -4.391232490539551, 1.8423633575439453, 8.075958251953125, 14.309555053710938, 20.54315185546875, 26.776744842529297, 33.01034164428711, 39.24393844604492]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 5.0, 4.0, 0.0, 1.0, 5.0, 5.0, 5.0, 7.0, 6.0, 7.0, 7.0, 9.0, 4.0, 13.0, 14.0, 19.0, 23.0, 25.0, 14.0, 23.0, 22.0, 33.0, 35.0, 29.0, 40.0, 36.0, 39.0, 48.0, 44.0, 29.0, 29.0, 44.0, 41.0, 34.0, 40.0, 32.0, 24.0, 34.0, 25.0, 26.0, 28.0, 23.0, 11.0, 14.0, 5.0, 8.0, 13.0, 10.0, 6.0, 4.0, 2.0, 3.0, 7.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0], "bins": [-66.68267822265625, -64.56436157226562, -62.44605255126953, -60.32773971557617, -58.20942687988281, -56.09111022949219, -53.97279739379883, -51.85448455810547, -49.73617172241211, -47.61785888671875, -45.49954605102539, -43.38123321533203, -41.262916564941406, -39.14460754394531, -37.02629089355469, -34.90797805786133, -32.78966522216797, -30.67135238647461, -28.55303955078125, -26.434724807739258, -24.3164119720459, -22.19809913635254, -20.079784393310547, -17.961471557617188, -15.843158721923828, -13.724845886230469, -11.606532096862793, -9.488218307495117, -7.369905471801758, -5.251592636108398, -3.1332788467407227, -1.0149650573730469, 1.1033477783203125, 3.22166109085083, 5.339974403381348, 7.458287715911865, 9.576601028442383, 11.694913864135742, 13.813227653503418, 15.931541442871094, 18.049854278564453, 20.168167114257812, 22.286479949951172, 24.404794692993164, 26.523107528686523, 28.641420364379883, 30.759735107421875, 32.878047943115234, 34.996360778808594, 37.11467361450195, 39.23298645019531, 41.35129928588867, 43.46961212158203, 45.587928771972656, 47.706241607666016, 49.824554443359375, 51.942867279052734, 54.061180114746094, 56.17949295043945, 58.29780578613281, 60.41612243652344, 62.53443145751953, 64.65274810791016, 66.77105712890625, 68.88937377929688]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 8.0, 5.0, 7.0, 6.0, 11.0, 6.0, 10.0, 20.0, 18.0, 18.0, 13.0, 22.0, 25.0, 34.0, 23.0, 21.0, 30.0, 34.0, 29.0, 36.0, 39.0, 38.0, 36.0, 42.0, 56.0, 32.0, 40.0, 29.0, 38.0, 24.0, 32.0, 27.0, 30.0, 29.0, 23.0, 22.0, 13.0, 12.0, 12.0, 7.0, 7.0, 13.0, 7.0, 4.0, 3.0, 4.0, 5.0, 3.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.15625, -8.8402099609375, -8.524169921875, -8.2081298828125, -7.89208984375, -7.5760498046875, -7.260009765625, -6.9439697265625, -6.6279296875, -6.3118896484375, -5.995849609375, -5.6798095703125, -5.36376953125, -5.0477294921875, -4.731689453125, -4.4156494140625, -4.099609375, -3.7835693359375, -3.467529296875, -3.1514892578125, -2.83544921875, -2.5194091796875, -2.203369140625, -1.8873291015625, -1.5712890625, -1.2552490234375, -0.939208984375, -0.6231689453125, -0.30712890625, 0.0089111328125, 0.324951171875, 0.6409912109375, 0.95703125, 1.2730712890625, 1.589111328125, 1.9051513671875, 2.22119140625, 2.5372314453125, 2.853271484375, 3.1693115234375, 3.4853515625, 3.8013916015625, 4.117431640625, 4.4334716796875, 4.74951171875, 5.0655517578125, 5.381591796875, 5.6976318359375, 6.013671875, 6.3297119140625, 6.645751953125, 6.9617919921875, 7.27783203125, 7.5938720703125, 7.909912109375, 8.2259521484375, 8.5419921875, 8.8580322265625, 9.174072265625, 9.4901123046875, 9.80615234375, 10.1221923828125, 10.438232421875, 10.7542724609375, 11.0703125]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 7.0, 8.0, 9.0, 13.0, 25.0, 33.0, 57.0, 59.0, 113.0, 134.0, 204.0, 353.0, 485.0, 814.0, 1287.0, 2138.0, 3500.0, 6419.0, 13086.0, 34882.0, 156665.0, 808675.0, 1953086.0, 942385.0, 198667.0, 39769.0, 14530.0, 7033.0, 3825.0, 2229.0, 1319.0, 853.0, 524.0, 361.0, 235.0, 147.0, 100.0, 87.0, 52.0, 38.0, 21.0, 13.0, 15.0, 7.0, 8.0, 3.0, 0.0, 5.0, 3.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6875, -13.208251953125, -12.72900390625, -12.249755859375, -11.7705078125, -11.291259765625, -10.81201171875, -10.332763671875, -9.853515625, -9.374267578125, -8.89501953125, -8.415771484375, -7.9365234375, -7.457275390625, -6.97802734375, -6.498779296875, -6.01953125, -5.540283203125, -5.06103515625, -4.581787109375, -4.1025390625, -3.623291015625, -3.14404296875, -2.664794921875, -2.185546875, -1.706298828125, -1.22705078125, -0.747802734375, -0.2685546875, 0.210693359375, 0.68994140625, 1.169189453125, 1.6484375, 2.127685546875, 2.60693359375, 3.086181640625, 3.5654296875, 4.044677734375, 4.52392578125, 5.003173828125, 5.482421875, 5.961669921875, 6.44091796875, 6.920166015625, 7.3994140625, 7.878662109375, 8.35791015625, 8.837158203125, 9.31640625, 9.795654296875, 10.27490234375, 10.754150390625, 11.2333984375, 11.712646484375, 12.19189453125, 12.671142578125, 13.150390625, 13.629638671875, 14.10888671875, 14.588134765625, 15.0673828125, 15.546630859375, 16.02587890625, 16.505126953125, 16.984375]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 2.0, 4.0, 7.0, 15.0, 12.0, 26.0, 34.0, 41.0, 58.0, 97.0, 121.0, 178.0, 255.0, 354.0, 462.0, 601.0, 470.0, 411.0, 262.0, 199.0, 137.0, 96.0, 77.0, 50.0, 33.0, 25.0, 20.0, 9.0, 6.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.15625, -18.552490234375, -17.94873046875, -17.344970703125, -16.7412109375, -16.137451171875, -15.53369140625, -14.929931640625, -14.326171875, -13.722412109375, -13.11865234375, -12.514892578125, -11.9111328125, -11.307373046875, -10.70361328125, -10.099853515625, -9.49609375, -8.892333984375, -8.28857421875, -7.684814453125, -7.0810546875, -6.477294921875, -5.87353515625, -5.269775390625, -4.666015625, -4.062255859375, -3.45849609375, -2.854736328125, -2.2509765625, -1.647216796875, -1.04345703125, -0.439697265625, 0.1640625, 0.767822265625, 1.37158203125, 1.975341796875, 2.5791015625, 3.182861328125, 3.78662109375, 4.390380859375, 4.994140625, 5.597900390625, 6.20166015625, 6.805419921875, 7.4091796875, 8.012939453125, 8.61669921875, 9.220458984375, 9.82421875, 10.427978515625, 11.03173828125, 11.635498046875, 12.2392578125, 12.843017578125, 13.44677734375, 14.050537109375, 14.654296875, 15.258056640625, 15.86181640625, 16.465576171875, 17.0693359375, 17.673095703125, 18.27685546875, 18.880615234375, 19.484375]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 3.0, 4.0, 8.0, 12.0, 17.0, 16.0, 19.0, 35.0, 49.0, 108.0, 166.0, 269.0, 467.0, 949.0, 2362.0, 13418.0, 645786.0, 3465996.0, 56864.0, 4602.0, 1479.0, 698.0, 355.0, 223.0, 107.0, 95.0, 56.0, 31.0, 30.0, 18.0, 11.0, 14.0, 6.0, 2.0, 4.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-42.375, -41.01806640625, -39.6611328125, -38.30419921875, -36.947265625, -35.59033203125, -34.2333984375, -32.87646484375, -31.51953125, -30.16259765625, -28.8056640625, -27.44873046875, -26.091796875, -24.73486328125, -23.3779296875, -22.02099609375, -20.6640625, -19.30712890625, -17.9501953125, -16.59326171875, -15.236328125, -13.87939453125, -12.5224609375, -11.16552734375, -9.80859375, -8.45166015625, -7.0947265625, -5.73779296875, -4.380859375, -3.02392578125, -1.6669921875, -0.31005859375, 1.046875, 2.40380859375, 3.7607421875, 5.11767578125, 6.474609375, 7.83154296875, 9.1884765625, 10.54541015625, 11.90234375, 13.25927734375, 14.6162109375, 15.97314453125, 17.330078125, 18.68701171875, 20.0439453125, 21.40087890625, 22.7578125, 24.11474609375, 25.4716796875, 26.82861328125, 28.185546875, 29.54248046875, 30.8994140625, 32.25634765625, 33.61328125, 34.97021484375, 36.3271484375, 37.68408203125, 39.041015625, 40.39794921875, 41.7548828125, 43.11181640625, 44.46875]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 7.0, 3.0, 6.0, 18.0, 24.0, 63.0, 83.0, 96.0, 126.0, 135.0, 141.0, 114.0, 78.0, 44.0, 39.0, 14.0, 12.0, 1.0, 8.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-97.60442352294922, -95.0711898803711, -92.5379638671875, -90.00473022460938, -87.47149658203125, -84.93826293945312, -82.405029296875, -79.8718032836914, -77.33856964111328, -74.80533599853516, -72.27210998535156, -69.73887634277344, -67.20564270019531, -64.67240905761719, -62.13917922973633, -59.60594940185547, -57.072715759277344, -54.53948211669922, -52.00625228881836, -49.4730224609375, -46.939788818359375, -44.40655517578125, -41.87332534790039, -39.34009552001953, -36.806861877441406, -34.27362823486328, -31.740398406982422, -29.20716667175293, -26.673934936523438, -24.140703201293945, -21.607471466064453, -19.07423973083496, -16.541000366210938, -14.007768630981445, -11.474536895751953, -8.941305160522461, -6.408073425292969, -3.8748416900634766, -1.3416099548339844, 1.1916217803955078, 3.724853515625, 6.258085250854492, 8.791316986083984, 11.324548721313477, 13.857780456542969, 16.39101219177246, 18.924243927001953, 21.457475662231445, 23.990707397460938, 26.52393913269043, 29.057170867919922, 31.590402603149414, 34.123634338378906, 36.65686798095703, 39.19009780883789, 41.72332763671875, 44.256561279296875, 46.789794921875, 49.32302474975586, 51.85625457763672, 54.389488220214844, 56.92272186279297, 59.45595169067383, 61.98918151855469, 64.52241516113281]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 3.0, 2.0, 5.0, 3.0, 12.0, 16.0, 15.0, 19.0, 18.0, 19.0, 16.0, 16.0, 18.0, 31.0, 21.0, 24.0, 35.0, 42.0, 43.0, 37.0, 27.0, 48.0, 32.0, 45.0, 41.0, 32.0, 31.0, 27.0, 35.0, 40.0, 30.0, 22.0, 24.0, 22.0, 19.0, 28.0, 22.0, 15.0, 7.0, 13.0, 9.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 0.0, 1.0, 2.0], "bins": [-59.865234375, -58.04966354370117, -56.23409652709961, -54.41852569580078, -52.60295486450195, -50.787384033203125, -48.97181701660156, -47.156246185302734, -45.340675354003906, -43.52510452270508, -41.709537506103516, -39.89396667480469, -38.07839584350586, -36.26282501220703, -34.44725799560547, -32.63168716430664, -30.816120147705078, -29.000551223754883, -27.184980392456055, -25.36941146850586, -23.55384063720703, -21.738271713256836, -19.92270278930664, -18.107131958007812, -16.291563034057617, -14.475993156433105, -12.660423278808594, -10.844854354858398, -9.029284477233887, -7.213714599609375, -5.39814567565918, -3.582575798034668, -1.7670059204101562, 0.04856371879577637, 1.864133358001709, 3.6797027587890625, 5.495272636413574, 7.310842514038086, 9.126411437988281, 10.941981315612793, 12.757551193237305, 14.573121070861816, 16.388690948486328, 18.204259872436523, 20.01982879638672, 21.835399627685547, 23.650968551635742, 25.466537475585938, 27.282108306884766, 29.09767723083496, 30.91324806213379, 32.728816986083984, 34.54438781738281, 36.359954833984375, 38.1755256652832, 39.99109649658203, 41.806663513183594, 43.62223434448242, 45.437801361083984, 47.25337219238281, 49.06894302368164, 50.88451385498047, 52.70008087158203, 54.51565170288086, 56.33122253417969]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 4.0, 2.0, 2.0, 2.0, 6.0, 6.0, 8.0, 11.0, 10.0, 18.0, 15.0, 19.0, 16.0, 23.0, 19.0, 26.0, 17.0, 35.0, 37.0, 36.0, 34.0, 47.0, 49.0, 48.0, 43.0, 50.0, 31.0, 35.0, 40.0, 31.0, 31.0, 29.0, 38.0, 30.0, 27.0, 13.0, 19.0, 17.0, 12.0, 15.0, 14.0, 7.0, 10.0, 3.0, 6.0, 6.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.1328125, -7.8729248046875, -7.613037109375, -7.3531494140625, -7.09326171875, -6.8333740234375, -6.573486328125, -6.3135986328125, -6.0537109375, -5.7938232421875, -5.533935546875, -5.2740478515625, -5.01416015625, -4.7542724609375, -4.494384765625, -4.2344970703125, -3.974609375, -3.7147216796875, -3.454833984375, -3.1949462890625, -2.93505859375, -2.6751708984375, -2.415283203125, -2.1553955078125, -1.8955078125, -1.6356201171875, -1.375732421875, -1.1158447265625, -0.85595703125, -0.5960693359375, -0.336181640625, -0.0762939453125, 0.18359375, 0.4434814453125, 0.703369140625, 0.9632568359375, 1.22314453125, 1.4830322265625, 1.742919921875, 2.0028076171875, 2.2626953125, 2.5225830078125, 2.782470703125, 3.0423583984375, 3.30224609375, 3.5621337890625, 3.822021484375, 4.0819091796875, 4.341796875, 4.6016845703125, 4.861572265625, 5.1214599609375, 5.38134765625, 5.6412353515625, 5.901123046875, 6.1610107421875, 6.4208984375, 6.6807861328125, 6.940673828125, 7.2005615234375, 7.46044921875, 7.7203369140625, 7.980224609375, 8.2401123046875, 8.5]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 7.0, 4.0, 4.0, 7.0, 10.0, 17.0, 19.0, 29.0, 55.0, 62.0, 107.0, 187.0, 259.0, 412.0, 575.0, 874.0, 1315.0, 2021.0, 3040.0, 4799.0, 7342.0, 11382.0, 18026.0, 28844.0, 45801.0, 71527.0, 106640.0, 146232.0, 168695.0, 142024.0, 102123.0, 67511.0, 43045.0, 27191.0, 17326.0, 11098.0, 7097.0, 4527.0, 2870.0, 1917.0, 1195.0, 787.0, 497.0, 348.0, 218.0, 142.0, 99.0, 86.0, 64.0, 28.0, 36.0, 15.0, 14.0, 4.0, 9.0, 2.0, 1.0, 2.0, 4.0], "bins": [-0.57861328125, -0.561309814453125, -0.54400634765625, -0.526702880859375, -0.5093994140625, -0.492095947265625, -0.47479248046875, -0.457489013671875, -0.440185546875, -0.422882080078125, -0.40557861328125, -0.388275146484375, -0.3709716796875, -0.353668212890625, -0.33636474609375, -0.319061279296875, -0.3017578125, -0.284454345703125, -0.26715087890625, -0.249847412109375, -0.2325439453125, -0.215240478515625, -0.19793701171875, -0.180633544921875, -0.163330078125, -0.146026611328125, -0.12872314453125, -0.111419677734375, -0.0941162109375, -0.076812744140625, -0.05950927734375, -0.042205810546875, -0.02490234375, -0.007598876953125, 0.00970458984375, 0.027008056640625, 0.0443115234375, 0.061614990234375, 0.07891845703125, 0.096221923828125, 0.113525390625, 0.130828857421875, 0.14813232421875, 0.165435791015625, 0.1827392578125, 0.200042724609375, 0.21734619140625, 0.234649658203125, 0.251953125, 0.269256591796875, 0.28656005859375, 0.303863525390625, 0.3211669921875, 0.338470458984375, 0.35577392578125, 0.373077392578125, 0.390380859375, 0.407684326171875, 0.42498779296875, 0.442291259765625, 0.4595947265625, 0.476898193359375, 0.49420166015625, 0.511505126953125, 0.52880859375]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 5.0, 2.0, 8.0, 6.0, 6.0, 5.0, 10.0, 13.0, 13.0, 16.0, 25.0, 23.0, 30.0, 37.0, 24.0, 36.0, 37.0, 41.0, 35.0, 52.0, 34.0, 34.0, 1056.0, 39.0, 36.0, 36.0, 47.0, 41.0, 28.0, 37.0, 21.0, 26.0, 29.0, 19.0, 23.0, 14.0, 13.0, 16.0, 9.0, 13.0, 7.0, 7.0, 5.0, 4.0, 5.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.55859375, -5.384521484375, -5.21044921875, -5.036376953125, -4.8623046875, -4.688232421875, -4.51416015625, -4.340087890625, -4.166015625, -3.991943359375, -3.81787109375, -3.643798828125, -3.4697265625, -3.295654296875, -3.12158203125, -2.947509765625, -2.7734375, -2.599365234375, -2.42529296875, -2.251220703125, -2.0771484375, -1.903076171875, -1.72900390625, -1.554931640625, -1.380859375, -1.206787109375, -1.03271484375, -0.858642578125, -0.6845703125, -0.510498046875, -0.33642578125, -0.162353515625, 0.01171875, 0.185791015625, 0.35986328125, 0.533935546875, 0.7080078125, 0.882080078125, 1.05615234375, 1.230224609375, 1.404296875, 1.578369140625, 1.75244140625, 1.926513671875, 2.1005859375, 2.274658203125, 2.44873046875, 2.622802734375, 2.796875, 2.970947265625, 3.14501953125, 3.319091796875, 3.4931640625, 3.667236328125, 3.84130859375, 4.015380859375, 4.189453125, 4.363525390625, 4.53759765625, 4.711669921875, 4.8857421875, 5.059814453125, 5.23388671875, 5.407958984375, 5.58203125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 6.0, 8.0, 8.0, 14.0, 22.0, 30.0, 54.0, 76.0, 111.0, 155.0, 259.0, 401.0, 664.0, 1092.0, 1645.0, 2718.0, 4228.0, 6792.0, 10954.0, 17249.0, 26937.0, 42072.0, 64359.0, 93989.0, 127659.0, 1160977.0, 182580.0, 115752.0, 82616.0, 55603.0, 35934.0, 23127.0, 14539.0, 9109.0, 5673.0, 3580.0, 2270.0, 1440.0, 885.0, 589.0, 360.0, 219.0, 140.0, 96.0, 56.0, 34.0, 18.0, 14.0, 13.0, 7.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.431396484375, -0.41783905029296875, -0.4042816162109375, -0.39072418212890625, -0.377166748046875, -0.36360931396484375, -0.3500518798828125, -0.33649444580078125, -0.32293701171875, -0.30937957763671875, -0.2958221435546875, -0.28226470947265625, -0.268707275390625, -0.25514984130859375, -0.2415924072265625, -0.22803497314453125, -0.2144775390625, -0.20092010498046875, -0.1873626708984375, -0.17380523681640625, -0.160247802734375, -0.14669036865234375, -0.1331329345703125, -0.11957550048828125, -0.10601806640625, -0.09246063232421875, -0.0789031982421875, -0.06534576416015625, -0.051788330078125, -0.03823089599609375, -0.0246734619140625, -0.01111602783203125, 0.00244140625, 0.01599884033203125, 0.0295562744140625, 0.04311370849609375, 0.056671142578125, 0.07022857666015625, 0.0837860107421875, 0.09734344482421875, 0.11090087890625, 0.12445831298828125, 0.1380157470703125, 0.15157318115234375, 0.165130615234375, 0.17868804931640625, 0.1922454833984375, 0.20580291748046875, 0.2193603515625, 0.23291778564453125, 0.2464752197265625, 0.26003265380859375, 0.273590087890625, 0.28714752197265625, 0.3007049560546875, 0.31426239013671875, 0.32781982421875, 0.34137725830078125, 0.3549346923828125, 0.36849212646484375, 0.382049560546875, 0.39560699462890625, 0.4091644287109375, 0.42272186279296875, 0.436279296875]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 2.0, 6.0, 6.0, 3.0, 8.0, 7.0, 13.0, 11.0, 16.0, 23.0, 26.0, 39.0, 48.0, 66.0, 87.0, 90.0, 96.0, 101.0, 63.0, 68.0, 47.0, 34.0, 36.0, 19.0, 27.0, 8.0, 10.0, 12.0, 7.0, 7.0, 8.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01265716552734375, -0.012302517890930176, -0.011947870254516602, -0.011593222618103027, -0.011238574981689453, -0.010883927345275879, -0.010529279708862305, -0.01017463207244873, -0.009819984436035156, -0.009465336799621582, -0.009110689163208008, -0.008756041526794434, -0.00840139389038086, -0.008046746253967285, -0.007692098617553711, -0.007337450981140137, -0.0069828033447265625, -0.006628155708312988, -0.006273508071899414, -0.00591886043548584, -0.005564212799072266, -0.005209565162658691, -0.004854917526245117, -0.004500269889831543, -0.004145622253417969, -0.0037909746170043945, -0.0034363269805908203, -0.003081679344177246, -0.002727031707763672, -0.0023723840713500977, -0.0020177364349365234, -0.0016630887985229492, -0.001308441162109375, -0.0009537935256958008, -0.0005991458892822266, -0.00024449825286865234, 0.00011014938354492188, 0.0004647970199584961, 0.0008194446563720703, 0.0011740922927856445, 0.0015287399291992188, 0.001883387565612793, 0.002238035202026367, 0.0025926828384399414, 0.0029473304748535156, 0.00330197811126709, 0.003656625747680664, 0.004011273384094238, 0.0043659210205078125, 0.004720568656921387, 0.005075216293334961, 0.005429863929748535, 0.005784511566162109, 0.006139159202575684, 0.006493806838989258, 0.006848454475402832, 0.007203102111816406, 0.0075577497482299805, 0.007912397384643555, 0.008267045021057129, 0.008621692657470703, 0.008976340293884277, 0.009330987930297852, 0.009685635566711426, 0.010040283203125]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 6.0, 5.0, 6.0, 7.0, 9.0, 18.0, 23.0, 25.0, 33.0, 42.0, 57.0, 79.0, 127.0, 159.0, 275.0, 455.0, 915.0, 29463.0, 1011736.0, 3261.0, 703.0, 390.0, 218.0, 161.0, 97.0, 77.0, 43.0, 50.0, 29.0, 24.0, 12.0, 10.0, 10.0, 7.0, 7.0, 1.0, 7.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1566162109375, -0.15108489990234375, -0.1455535888671875, -0.14002227783203125, -0.134490966796875, -0.12895965576171875, -0.1234283447265625, -0.11789703369140625, -0.11236572265625, -0.10683441162109375, -0.1013031005859375, -0.09577178955078125, -0.090240478515625, -0.08470916748046875, -0.0791778564453125, -0.07364654541015625, -0.068115234375, -0.06258392333984375, -0.0570526123046875, -0.05152130126953125, -0.045989990234375, -0.04045867919921875, -0.0349273681640625, -0.02939605712890625, -0.02386474609375, -0.01833343505859375, -0.0128021240234375, -0.00727081298828125, -0.001739501953125, 0.00379180908203125, 0.0093231201171875, 0.01485443115234375, 0.0203857421875, 0.02591705322265625, 0.0314483642578125, 0.03697967529296875, 0.042510986328125, 0.04804229736328125, 0.0535736083984375, 0.05910491943359375, 0.06463623046875, 0.07016754150390625, 0.0756988525390625, 0.08123016357421875, 0.086761474609375, 0.09229278564453125, 0.0978240966796875, 0.10335540771484375, 0.10888671875, 0.11441802978515625, 0.1199493408203125, 0.12548065185546875, 0.131011962890625, 0.13654327392578125, 0.1420745849609375, 0.14760589599609375, 0.15313720703125, 0.15866851806640625, 0.1641998291015625, 0.16973114013671875, 0.175262451171875, 0.18079376220703125, 0.1863250732421875, 0.19185638427734375, 0.1973876953125]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 94.0, 883.0, 35.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09171372652053833, -0.08999724686145782, -0.08828076720237732, -0.08656428009271622, -0.08484780043363571, -0.0831313207745552, -0.0814148336648941, -0.0796983540058136, -0.0779818743467331, -0.07626539468765259, -0.07454891502857208, -0.07283242791891098, -0.07111594825983047, -0.06939946860074997, -0.06768298149108887, -0.06596650183200836, -0.06425002217292786, -0.06253354251384735, -0.06081705912947655, -0.05910057574510574, -0.05738409608602524, -0.05566761642694473, -0.05395113304257393, -0.052234649658203125, -0.05051816999912262, -0.048801690340042114, -0.04708520695567131, -0.04536872357130051, -0.04365224391222, -0.041935764253139496, -0.04021928086876869, -0.03850279748439789, -0.03678631782531738, -0.03506983816623688, -0.033353354781866074, -0.03163687139749527, -0.029920391738414764, -0.02820391021668911, -0.026487428694963455, -0.0247709471732378, -0.023054463788866997, -0.021337982267141342, -0.019621500745415688, -0.017905019223690033, -0.01618853770196438, -0.014472056180238724, -0.01275557465851307, -0.011039093136787415, -0.00932261161506176, -0.007606130093336105, -0.005889648571610451, -0.004173167049884796, -0.0024566855281591415, -0.0007402040064334869, 0.0009762775152921677, 0.0026927590370178223, 0.004409240558743477, 0.0061257220804691315, 0.007842203602194786, 0.00955868512392044, 0.011275166645646095, 0.01299164816737175, 0.014708129689097404, 0.01642461121082306, 0.018141092732548714]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 11.0, 6.0, 13.0, 8.0, 6.0, 23.0, 18.0, 27.0, 28.0, 23.0, 43.0, 58.0, 37.0, 48.0, 58.0, 46.0, 52.0, 58.0, 52.0, 41.0, 65.0, 58.0, 50.0, 31.0, 26.0, 36.0, 25.0, 21.0, 11.0, 5.0, 5.0, 9.0, 2.0, 2.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005323052406311035, -0.005099687725305557, -0.004876323044300079, -0.0046529583632946014, -0.0044295936822891235, -0.004206229001283646, -0.003982864320278168, -0.00375949963927269, -0.003536134958267212, -0.003312770277261734, -0.003089405596256256, -0.002866040915250778, -0.0026426762342453003, -0.0024193115532398224, -0.0021959468722343445, -0.0019725821912288666, -0.0017492175102233887, -0.0015258528292179108, -0.0013024881482124329, -0.001079123467206955, -0.000855758786201477, -0.0006323941051959991, -0.00040902942419052124, -0.00018566474318504333, 3.769993782043457e-05, 0.0002610646188259125, 0.0004844292998313904, 0.0007077939808368683, 0.0009311586618423462, 0.001154523342847824, 0.001377888023853302, 0.00160125270485878, 0.0018246173858642578, 0.0020479820668697357, 0.0022713467478752136, 0.0024947114288806915, 0.0027180761098861694, 0.0029414407908916473, 0.0031648054718971252, 0.003388170152902603, 0.003611534833908081, 0.003834899514913559, 0.004058264195919037, 0.004281628876924515, 0.004504993557929993, 0.004728358238935471, 0.0049517229199409485, 0.005175087600946426, 0.005398452281951904, 0.005621816962957382, 0.00584518164396286, 0.006068546324968338, 0.006291911005973816, 0.006515275686979294, 0.006738640367984772, 0.00696200504899025, 0.0071853697299957275, 0.0074087344110012054, 0.007632099092006683, 0.007855463773012161, 0.00807882845401764, 0.008302193135023117, 0.008525557816028595, 0.008748922497034073, 0.00897228717803955]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 4.0, 2.0, 2.0, 2.0, 6.0, 6.0, 8.0, 11.0, 9.0, 19.0, 15.0, 19.0, 16.0, 23.0, 19.0, 26.0, 17.0, 35.0, 37.0, 36.0, 34.0, 47.0, 49.0, 48.0, 43.0, 50.0, 31.0, 35.0, 40.0, 31.0, 31.0, 29.0, 38.0, 30.0, 27.0, 13.0, 19.0, 17.0, 12.0, 15.0, 14.0, 7.0, 10.0, 3.0, 6.0, 6.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.1328125, -7.8729248046875, -7.613037109375, -7.3531494140625, -7.09326171875, -6.8333740234375, -6.573486328125, -6.3135986328125, -6.0537109375, -5.7938232421875, -5.533935546875, -5.2740478515625, -5.01416015625, -4.7542724609375, -4.494384765625, -4.2344970703125, -3.974609375, -3.7147216796875, -3.454833984375, -3.1949462890625, -2.93505859375, -2.6751708984375, -2.415283203125, -2.1553955078125, -1.8955078125, -1.6356201171875, -1.375732421875, -1.1158447265625, -0.85595703125, -0.5960693359375, -0.336181640625, -0.0762939453125, 0.18359375, 0.4434814453125, 0.703369140625, 0.9632568359375, 1.22314453125, 1.4830322265625, 1.742919921875, 2.0028076171875, 2.2626953125, 2.5225830078125, 2.782470703125, 3.0423583984375, 3.30224609375, 3.5621337890625, 3.822021484375, 4.0819091796875, 4.341796875, 4.6016845703125, 4.861572265625, 5.1214599609375, 5.38134765625, 5.6412353515625, 5.901123046875, 6.1610107421875, 6.4208984375, 6.6807861328125, 6.940673828125, 7.2005615234375, 7.46044921875, 7.7203369140625, 7.980224609375, 8.2401123046875, 8.5]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 0.0, 4.0, 9.0, 2.0, 9.0, 8.0, 7.0, 13.0, 16.0, 17.0, 20.0, 31.0, 39.0, 48.0, 73.0, 106.0, 140.0, 187.0, 204.0, 357.0, 483.0, 777.0, 1119.0, 1955.0, 4183.0, 11665.0, 42597.0, 226110.0, 588922.0, 125876.0, 27205.0, 8241.0, 3244.0, 1675.0, 932.0, 658.0, 448.0, 330.0, 211.0, 163.0, 104.0, 97.0, 61.0, 50.0, 42.0, 21.0, 34.0, 20.0, 10.0, 14.0, 8.0, 7.0, 2.0, 5.0, 2.0, 1.0, 4.0, 0.0, 2.0, 1.0], "bins": [-13.6796875, -13.2567138671875, -12.833740234375, -12.4107666015625, -11.98779296875, -11.5648193359375, -11.141845703125, -10.7188720703125, -10.2958984375, -9.8729248046875, -9.449951171875, -9.0269775390625, -8.60400390625, -8.1810302734375, -7.758056640625, -7.3350830078125, -6.912109375, -6.4891357421875, -6.066162109375, -5.6431884765625, -5.22021484375, -4.7972412109375, -4.374267578125, -3.9512939453125, -3.5283203125, -3.1053466796875, -2.682373046875, -2.2593994140625, -1.83642578125, -1.4134521484375, -0.990478515625, -0.5675048828125, -0.14453125, 0.2784423828125, 0.701416015625, 1.1243896484375, 1.54736328125, 1.9703369140625, 2.393310546875, 2.8162841796875, 3.2392578125, 3.6622314453125, 4.085205078125, 4.5081787109375, 4.93115234375, 5.3541259765625, 5.777099609375, 6.2000732421875, 6.623046875, 7.0460205078125, 7.468994140625, 7.8919677734375, 8.31494140625, 8.7379150390625, 9.160888671875, 9.5838623046875, 10.0068359375, 10.4298095703125, 10.852783203125, 11.2757568359375, 11.69873046875, 12.1217041015625, 12.544677734375, 12.9676513671875, 13.390625]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 3.0, 6.0, 5.0, 5.0, 12.0, 13.0, 17.0, 13.0, 21.0, 29.0, 34.0, 31.0, 44.0, 55.0, 60.0, 104.0, 322.0, 1701.0, 178.0, 81.0, 45.0, 56.0, 40.0, 34.0, 38.0, 23.0, 15.0, 10.0, 10.0, 12.0, 6.0, 5.0, 3.0, 9.0, 4.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-28.859375, -27.991943359375, -27.12451171875, -26.257080078125, -25.3896484375, -24.522216796875, -23.65478515625, -22.787353515625, -21.919921875, -21.052490234375, -20.18505859375, -19.317626953125, -18.4501953125, -17.582763671875, -16.71533203125, -15.847900390625, -14.98046875, -14.113037109375, -13.24560546875, -12.378173828125, -11.5107421875, -10.643310546875, -9.77587890625, -8.908447265625, -8.041015625, -7.173583984375, -6.30615234375, -5.438720703125, -4.5712890625, -3.703857421875, -2.83642578125, -1.968994140625, -1.1015625, -0.234130859375, 0.63330078125, 1.500732421875, 2.3681640625, 3.235595703125, 4.10302734375, 4.970458984375, 5.837890625, 6.705322265625, 7.57275390625, 8.440185546875, 9.3076171875, 10.175048828125, 11.04248046875, 11.909912109375, 12.77734375, 13.644775390625, 14.51220703125, 15.379638671875, 16.2470703125, 17.114501953125, 17.98193359375, 18.849365234375, 19.716796875, 20.584228515625, 21.45166015625, 22.319091796875, 23.1865234375, 24.053955078125, 24.92138671875, 25.788818359375, 26.65625]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 5.0, 2.0, 5.0, 4.0, 12.0, 12.0, 9.0, 17.0, 21.0, 31.0, 57.0, 95.0, 129.0, 217.0, 482.0, 1201.0, 185061.0, 2955607.0, 1473.0, 540.0, 293.0, 158.0, 83.0, 57.0, 27.0, 26.0, 27.0, 12.0, 9.0, 12.0, 6.0, 4.0, 1.0, 7.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-69.125, -66.7490234375, -64.373046875, -61.9970703125, -59.62109375, -57.2451171875, -54.869140625, -52.4931640625, -50.1171875, -47.7412109375, -45.365234375, -42.9892578125, -40.61328125, -38.2373046875, -35.861328125, -33.4853515625, -31.109375, -28.7333984375, -26.357421875, -23.9814453125, -21.60546875, -19.2294921875, -16.853515625, -14.4775390625, -12.1015625, -9.7255859375, -7.349609375, -4.9736328125, -2.59765625, -0.2216796875, 2.154296875, 4.5302734375, 6.90625, 9.2822265625, 11.658203125, 14.0341796875, 16.41015625, 18.7861328125, 21.162109375, 23.5380859375, 25.9140625, 28.2900390625, 30.666015625, 33.0419921875, 35.41796875, 37.7939453125, 40.169921875, 42.5458984375, 44.921875, 47.2978515625, 49.673828125, 52.0498046875, 54.42578125, 56.8017578125, 59.177734375, 61.5537109375, 63.9296875, 66.3056640625, 68.681640625, 71.0576171875, 73.43359375, 75.8095703125, 78.185546875, 80.5615234375, 82.9375]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 6.0, 18.0, 31.0, 74.0, 142.0, 220.0, 212.0, 142.0, 90.0, 48.0, 18.0, 6.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.91883850097656, -46.516788482666016, -45.114742279052734, -43.71269226074219, -42.310646057128906, -40.90859603881836, -39.50654602050781, -38.10449981689453, -36.70245361328125, -35.3004035949707, -33.89835739135742, -32.496307373046875, -31.094261169433594, -29.692211151123047, -28.290163040161133, -26.88811492919922, -25.486064910888672, -24.084016799926758, -22.681968688964844, -21.279918670654297, -19.877872467041016, -18.47582244873047, -17.073774337768555, -15.67172622680664, -14.269678115844727, -12.867630004882812, -11.465581893920898, -10.063532829284668, -8.661484718322754, -7.25943660736084, -5.857387542724609, -4.455339431762695, -3.053295135498047, -1.6512467861175537, -0.24919843673706055, 1.1528501510620117, 2.554898262023926, 3.95694637298584, 5.35899543762207, 6.761043548583984, 8.163091659545898, 9.565139770507812, 10.967187881469727, 12.369236946105957, 13.771285057067871, 15.173333168029785, 16.575382232666016, 17.97743034362793, 19.379478454589844, 20.781526565551758, 22.183574676513672, 23.58562469482422, 24.9876708984375, 26.389720916748047, 27.79176902770996, 29.193817138671875, 30.59586524963379, 31.997913360595703, 33.39996337890625, 34.80200958251953, 36.20405960083008, 37.60610580444336, 39.008155822753906, 40.41020202636719, 41.812252044677734]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 6.0, 5.0, 3.0, 6.0, 9.0, 4.0, 7.0, 17.0, 11.0, 18.0, 20.0, 14.0, 30.0, 33.0, 23.0, 33.0, 32.0, 39.0, 39.0, 40.0, 52.0, 37.0, 44.0, 49.0, 48.0, 34.0, 33.0, 38.0, 41.0, 30.0, 28.0, 33.0, 21.0, 26.0, 20.0, 12.0, 20.0, 14.0, 11.0, 3.0, 5.0, 4.0, 4.0, 5.0, 1.0, 4.0, 2.0, 0.0, 3.0, 0.0, 2.0], "bins": [-53.2913818359375, -51.778995513916016, -50.26660919189453, -48.75422286987305, -47.24183654785156, -45.72945022583008, -44.217063903808594, -42.704681396484375, -41.192291259765625, -39.67990493774414, -38.167518615722656, -36.65513229370117, -35.14274597167969, -33.6303596496582, -32.11797332763672, -30.605588912963867, -29.093204498291016, -27.58081817626953, -26.068431854248047, -24.556045532226562, -23.043659210205078, -21.531272888183594, -20.018888473510742, -18.506502151489258, -16.994115829467773, -15.481729507446289, -13.969343185424805, -12.456957817077637, -10.944571495056152, -9.432185173034668, -7.9197998046875, -6.407413482666016, -4.895027160644531, -3.382641077041626, -1.8702549934387207, -0.35786914825439453, 1.1545171737670898, 2.666903495788574, 4.179288864135742, 5.691675186157227, 7.204061508178711, 8.716447830200195, 10.22883415222168, 11.741219520568848, 13.253605842590332, 14.765992164611816, 16.278377532958984, 17.79076385498047, 19.303150177001953, 20.815536499023438, 22.327922821044922, 23.840309143066406, 25.35269546508789, 26.865081787109375, 28.377466201782227, 29.88985252380371, 31.402238845825195, 32.91462326049805, 34.42700958251953, 35.939395904541016, 37.4517822265625, 38.964168548583984, 40.47655487060547, 41.98894119262695, 43.50132751464844]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 5.0, 3.0, 3.0, 2.0, 3.0, 4.0, 1.0, 2.0, 10.0, 7.0, 8.0, 7.0, 9.0, 14.0, 15.0, 21.0, 17.0, 23.0, 21.0, 25.0, 21.0, 29.0, 23.0, 28.0, 29.0, 40.0, 43.0, 40.0, 39.0, 54.0, 40.0, 29.0, 39.0, 36.0, 31.0, 35.0, 30.0, 29.0, 31.0, 19.0, 28.0, 21.0, 17.0, 6.0, 8.0, 15.0, 11.0, 6.0, 8.0, 8.0, 7.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0], "bins": [-8.78125, -8.50732421875, -8.2333984375, -7.95947265625, -7.685546875, -7.41162109375, -7.1376953125, -6.86376953125, -6.58984375, -6.31591796875, -6.0419921875, -5.76806640625, -5.494140625, -5.22021484375, -4.9462890625, -4.67236328125, -4.3984375, -4.12451171875, -3.8505859375, -3.57666015625, -3.302734375, -3.02880859375, -2.7548828125, -2.48095703125, -2.20703125, -1.93310546875, -1.6591796875, -1.38525390625, -1.111328125, -0.83740234375, -0.5634765625, -0.28955078125, -0.015625, 0.25830078125, 0.5322265625, 0.80615234375, 1.080078125, 1.35400390625, 1.6279296875, 1.90185546875, 2.17578125, 2.44970703125, 2.7236328125, 2.99755859375, 3.271484375, 3.54541015625, 3.8193359375, 4.09326171875, 4.3671875, 4.64111328125, 4.9150390625, 5.18896484375, 5.462890625, 5.73681640625, 6.0107421875, 6.28466796875, 6.55859375, 6.83251953125, 7.1064453125, 7.38037109375, 7.654296875, 7.92822265625, 8.2021484375, 8.47607421875, 8.75]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 8.0, 5.0, 2.0, 10.0, 7.0, 12.0, 17.0, 23.0, 33.0, 39.0, 66.0, 78.0, 118.0, 144.0, 189.0, 289.0, 374.0, 555.0, 933.0, 2130.0, 14457.0, 1092811.0, 3034865.0, 40524.0, 3216.0, 1170.0, 663.0, 456.0, 278.0, 217.0, 166.0, 133.0, 82.0, 63.0, 41.0, 26.0, 19.0, 27.0, 13.0, 9.0, 8.0, 4.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-55.78125, -53.99072265625, -52.2001953125, -50.40966796875, -48.619140625, -46.82861328125, -45.0380859375, -43.24755859375, -41.45703125, -39.66650390625, -37.8759765625, -36.08544921875, -34.294921875, -32.50439453125, -30.7138671875, -28.92333984375, -27.1328125, -25.34228515625, -23.5517578125, -21.76123046875, -19.970703125, -18.18017578125, -16.3896484375, -14.59912109375, -12.80859375, -11.01806640625, -9.2275390625, -7.43701171875, -5.646484375, -3.85595703125, -2.0654296875, -0.27490234375, 1.515625, 3.30615234375, 5.0966796875, 6.88720703125, 8.677734375, 10.46826171875, 12.2587890625, 14.04931640625, 15.83984375, 17.63037109375, 19.4208984375, 21.21142578125, 23.001953125, 24.79248046875, 26.5830078125, 28.37353515625, 30.1640625, 31.95458984375, 33.7451171875, 35.53564453125, 37.326171875, 39.11669921875, 40.9072265625, 42.69775390625, 44.48828125, 46.27880859375, 48.0693359375, 49.85986328125, 51.650390625, 53.44091796875, 55.2314453125, 57.02197265625, 58.8125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 8.0, 11.0, 22.0, 28.0, 31.0, 61.0, 84.0, 130.0, 202.0, 334.0, 542.0, 673.0, 620.0, 439.0, 304.0, 205.0, 135.0, 91.0, 46.0, 39.0, 17.0, 18.0, 11.0, 13.0, 7.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-29.265625, -28.5667724609375, -27.867919921875, -27.1690673828125, -26.47021484375, -25.7713623046875, -25.072509765625, -24.3736572265625, -23.6748046875, -22.9759521484375, -22.277099609375, -21.5782470703125, -20.87939453125, -20.1805419921875, -19.481689453125, -18.7828369140625, -18.083984375, -17.3851318359375, -16.686279296875, -15.9874267578125, -15.28857421875, -14.5897216796875, -13.890869140625, -13.1920166015625, -12.4931640625, -11.7943115234375, -11.095458984375, -10.3966064453125, -9.69775390625, -8.9989013671875, -8.300048828125, -7.6011962890625, -6.90234375, -6.2034912109375, -5.504638671875, -4.8057861328125, -4.10693359375, -3.4080810546875, -2.709228515625, -2.0103759765625, -1.3115234375, -0.6126708984375, 0.086181640625, 0.7850341796875, 1.48388671875, 2.1827392578125, 2.881591796875, 3.5804443359375, 4.279296875, 4.9781494140625, 5.677001953125, 6.3758544921875, 7.07470703125, 7.7735595703125, 8.472412109375, 9.1712646484375, 9.8701171875, 10.5689697265625, 11.267822265625, 11.9666748046875, 12.66552734375, 13.3643798828125, 14.063232421875, 14.7620849609375, 15.4609375]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 2.0, 6.0, 7.0, 6.0, 9.0, 10.0, 9.0, 32.0, 26.0, 56.0, 40.0, 84.0, 125.0, 229.0, 377.0, 843.0, 1739.0, 4419.0, 13050.0, 50213.0, 269913.0, 2048250.0, 1545099.0, 202154.0, 39776.0, 11033.0, 3628.0, 1548.0, 706.0, 334.0, 171.0, 120.0, 74.0, 49.0, 29.0, 29.0, 21.0, 20.0, 12.0, 8.0, 7.0, 6.0, 5.0, 3.0, 3.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.0, -16.4501953125, -15.900390625, -15.3505859375, -14.80078125, -14.2509765625, -13.701171875, -13.1513671875, -12.6015625, -12.0517578125, -11.501953125, -10.9521484375, -10.40234375, -9.8525390625, -9.302734375, -8.7529296875, -8.203125, -7.6533203125, -7.103515625, -6.5537109375, -6.00390625, -5.4541015625, -4.904296875, -4.3544921875, -3.8046875, -3.2548828125, -2.705078125, -2.1552734375, -1.60546875, -1.0556640625, -0.505859375, 0.0439453125, 0.59375, 1.1435546875, 1.693359375, 2.2431640625, 2.79296875, 3.3427734375, 3.892578125, 4.4423828125, 4.9921875, 5.5419921875, 6.091796875, 6.6416015625, 7.19140625, 7.7412109375, 8.291015625, 8.8408203125, 9.390625, 9.9404296875, 10.490234375, 11.0400390625, 11.58984375, 12.1396484375, 12.689453125, 13.2392578125, 13.7890625, 14.3388671875, 14.888671875, 15.4384765625, 15.98828125, 16.5380859375, 17.087890625, 17.6376953125, 18.1875]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 2.0, 7.0, 6.0, 8.0, 18.0, 12.0, 19.0, 18.0, 25.0, 34.0, 34.0, 34.0, 56.0, 56.0, 63.0, 56.0, 54.0, 65.0, 55.0, 56.0, 38.0, 46.0, 43.0, 37.0, 31.0, 25.0, 16.0, 26.0, 8.0, 14.0, 7.0, 9.0, 8.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-65.71598815917969, -63.41459655761719, -61.11320495605469, -58.81181335449219, -56.51042175292969, -54.20903015136719, -51.90763473510742, -49.60624313354492, -47.30485153198242, -45.00345993041992, -42.70206832885742, -40.40067672729492, -38.099281311035156, -35.797889709472656, -33.496498107910156, -31.195106506347656, -28.893714904785156, -26.592323303222656, -24.290931701660156, -21.989538192749023, -19.688146591186523, -17.386754989624023, -15.085362434387207, -12.78396987915039, -10.48257827758789, -8.18118667602539, -5.879794120788574, -3.578402042388916, -1.2770099639892578, 1.0243816375732422, 3.3257741928100586, 5.627166748046875, 7.928565979003906, 10.229957580566406, 12.531350135803223, 14.832742691040039, 17.13413429260254, 19.43552589416504, 21.736919403076172, 24.038311004638672, 26.339702606201172, 28.641094207763672, 30.942485809326172, 33.24387741088867, 35.54527282714844, 37.84666442871094, 40.14805603027344, 42.44944763183594, 44.75083923339844, 47.05223083496094, 49.35362243652344, 51.65501403808594, 53.95640563964844, 56.25779724121094, 58.5591926574707, 60.8605842590332, 63.1619758605957, 65.46337127685547, 67.76476287841797, 70.06615447998047, 72.36754608154297, 74.66893768310547, 76.97032928466797, 79.27172088623047, 81.57311248779297]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 3.0, 2.0, 8.0, 4.0, 4.0, 11.0, 17.0, 6.0, 10.0, 11.0, 23.0, 16.0, 25.0, 35.0, 39.0, 21.0, 31.0, 50.0, 30.0, 39.0, 43.0, 34.0, 59.0, 49.0, 39.0, 41.0, 37.0, 43.0, 30.0, 35.0, 34.0, 23.0, 26.0, 18.0, 19.0, 30.0, 14.0, 9.0, 10.0, 8.0, 5.0, 8.0, 6.0, 0.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-55.457855224609375, -53.38108825683594, -51.3043212890625, -49.22755432128906, -47.15079116821289, -45.07402420043945, -42.997257232666016, -40.92049026489258, -38.843727111816406, -36.76696014404297, -34.69019317626953, -32.613426208496094, -30.536663055419922, -28.459896087646484, -26.383129119873047, -24.30636215209961, -22.229595184326172, -20.152828216552734, -18.07606315612793, -15.999296188354492, -13.922530174255371, -11.84576416015625, -9.768997192382812, -7.692231178283691, -5.61546516418457, -3.53869891166687, -1.46193265914917, 0.6148338317871094, 2.6915998458862305, 4.768365859985352, 6.845132827758789, 8.92189884185791, 10.998664855957031, 13.075430870056152, 15.152196884155273, 17.22896385192871, 19.305728912353516, 21.382495880126953, 23.45926284790039, 25.536029815673828, 27.612794876098633, 29.68956184387207, 31.766326904296875, 33.84309387207031, 35.91986083984375, 37.99662780761719, 40.073394775390625, 42.1501579284668, 44.226924896240234, 46.30369186401367, 48.38045883178711, 50.45722198486328, 52.53398895263672, 54.610755920410156, 56.687522888183594, 58.76428985595703, 60.84105682373047, 62.917823791503906, 64.99459075927734, 67.07135772705078, 69.14812469482422, 71.22488403320312, 73.30165100097656, 75.37841796875, 77.45518493652344]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 7.0, 6.0, 10.0, 8.0, 9.0, 12.0, 16.0, 16.0, 25.0, 25.0, 29.0, 39.0, 41.0, 31.0, 36.0, 48.0, 45.0, 40.0, 41.0, 61.0, 44.0, 46.0, 50.0, 48.0, 30.0, 43.0, 39.0, 22.0, 19.0, 24.0, 25.0, 18.0, 14.0, 8.0, 7.0, 5.0, 2.0, 8.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-105.25, -101.341796875, -97.43359375, -93.525390625, -89.6171875, -85.708984375, -81.80078125, -77.892578125, -73.984375, -70.076171875, -66.16796875, -62.259765625, -58.3515625, -54.443359375, -50.53515625, -46.626953125, -42.71875, -38.810546875, -34.90234375, -30.994140625, -27.0859375, -23.177734375, -19.26953125, -15.361328125, -11.453125, -7.544921875, -3.63671875, 0.271484375, 4.1796875, 8.087890625, 11.99609375, 15.904296875, 19.8125, 23.720703125, 27.62890625, 31.537109375, 35.4453125, 39.353515625, 43.26171875, 47.169921875, 51.078125, 54.986328125, 58.89453125, 62.802734375, 66.7109375, 70.619140625, 74.52734375, 78.435546875, 82.34375, 86.251953125, 90.16015625, 94.068359375, 97.9765625, 101.884765625, 105.79296875, 109.701171875, 113.609375, 117.517578125, 121.42578125, 125.333984375, 129.2421875, 133.150390625, 137.05859375, 140.966796875, 144.875]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 10.0, 7.0, 8.0, 11.0, 22.0, 40.0, 45.0, 62.0, 93.0, 135.0, 207.0, 331.0, 447.0, 705.0, 1167.0, 1666.0, 2651.0, 4014.0, 6151.0, 9403.0, 14552.0, 22584.0, 34995.0, 54011.0, 80974.0, 114739.0, 148466.0, 155678.0, 126948.0, 91477.0, 62187.0, 40349.0, 26237.0, 16736.0, 10914.0, 7128.0, 4640.0, 2957.0, 1907.0, 1325.0, 882.0, 605.0, 324.0, 266.0, 168.0, 95.0, 92.0, 53.0, 33.0, 22.0, 14.0, 12.0, 8.0, 7.0, 4.0, 4.0, 0.0, 0.0, 3.0], "bins": [-6.36328125, -6.16546630859375, -5.9676513671875, -5.76983642578125, -5.572021484375, -5.37420654296875, -5.1763916015625, -4.97857666015625, -4.78076171875, -4.58294677734375, -4.3851318359375, -4.18731689453125, -3.989501953125, -3.79168701171875, -3.5938720703125, -3.39605712890625, -3.1982421875, -3.00042724609375, -2.8026123046875, -2.60479736328125, -2.406982421875, -2.20916748046875, -2.0113525390625, -1.81353759765625, -1.61572265625, -1.41790771484375, -1.2200927734375, -1.02227783203125, -0.824462890625, -0.62664794921875, -0.4288330078125, -0.23101806640625, -0.033203125, 0.16461181640625, 0.3624267578125, 0.56024169921875, 0.758056640625, 0.95587158203125, 1.1536865234375, 1.35150146484375, 1.54931640625, 1.74713134765625, 1.9449462890625, 2.14276123046875, 2.340576171875, 2.53839111328125, 2.7362060546875, 2.93402099609375, 3.1318359375, 3.32965087890625, 3.5274658203125, 3.72528076171875, 3.923095703125, 4.12091064453125, 4.3187255859375, 4.51654052734375, 4.71435546875, 4.91217041015625, 5.1099853515625, 5.30780029296875, 5.505615234375, 5.70343017578125, 5.9012451171875, 6.09906005859375, 6.296875]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 4.0, 2.0, 8.0, 12.0, 9.0, 9.0, 7.0, 10.0, 12.0, 14.0, 24.0, 21.0, 20.0, 26.0, 22.0, 25.0, 31.0, 35.0, 38.0, 39.0, 26.0, 34.0, 30.0, 29.0, 1061.0, 41.0, 44.0, 34.0, 25.0, 35.0, 39.0, 22.0, 38.0, 27.0, 24.0, 23.0, 23.0, 10.0, 9.0, 19.0, 11.0, 8.0, 11.0, 6.0, 6.0, 8.0, 5.0, 6.0, 5.0, 5.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-59.03125, -57.1435546875, -55.255859375, -53.3681640625, -51.48046875, -49.5927734375, -47.705078125, -45.8173828125, -43.9296875, -42.0419921875, -40.154296875, -38.2666015625, -36.37890625, -34.4912109375, -32.603515625, -30.7158203125, -28.828125, -26.9404296875, -25.052734375, -23.1650390625, -21.27734375, -19.3896484375, -17.501953125, -15.6142578125, -13.7265625, -11.8388671875, -9.951171875, -8.0634765625, -6.17578125, -4.2880859375, -2.400390625, -0.5126953125, 1.375, 3.2626953125, 5.150390625, 7.0380859375, 8.92578125, 10.8134765625, 12.701171875, 14.5888671875, 16.4765625, 18.3642578125, 20.251953125, 22.1396484375, 24.02734375, 25.9150390625, 27.802734375, 29.6904296875, 31.578125, 33.4658203125, 35.353515625, 37.2412109375, 39.12890625, 41.0166015625, 42.904296875, 44.7919921875, 46.6796875, 48.5673828125, 50.455078125, 52.3427734375, 54.23046875, 56.1181640625, 58.005859375, 59.8935546875, 61.78125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 6.0, 3.0, 10.0, 16.0, 18.0, 28.0, 52.0, 82.0, 99.0, 195.0, 259.0, 375.0, 627.0, 965.0, 1642.0, 2425.0, 3828.0, 6160.0, 9830.0, 15452.0, 24417.0, 37814.0, 56982.0, 84487.0, 117598.0, 285885.0, 1056751.0, 123947.0, 91366.0, 62040.0, 41091.0, 26742.0, 16749.0, 10733.0, 6769.0, 4201.0, 2674.0, 1745.0, 1056.0, 715.0, 458.0, 298.0, 210.0, 111.0, 69.0, 50.0, 38.0, 29.0, 12.0, 10.0, 6.0, 7.0, 2.0, 3.0, 2.0, 1.0, 2.0], "bins": [-5.6484375, -5.477783203125, -5.30712890625, -5.136474609375, -4.9658203125, -4.795166015625, -4.62451171875, -4.453857421875, -4.283203125, -4.112548828125, -3.94189453125, -3.771240234375, -3.6005859375, -3.429931640625, -3.25927734375, -3.088623046875, -2.91796875, -2.747314453125, -2.57666015625, -2.406005859375, -2.2353515625, -2.064697265625, -1.89404296875, -1.723388671875, -1.552734375, -1.382080078125, -1.21142578125, -1.040771484375, -0.8701171875, -0.699462890625, -0.52880859375, -0.358154296875, -0.1875, -0.016845703125, 0.15380859375, 0.324462890625, 0.4951171875, 0.665771484375, 0.83642578125, 1.007080078125, 1.177734375, 1.348388671875, 1.51904296875, 1.689697265625, 1.8603515625, 2.031005859375, 2.20166015625, 2.372314453125, 2.54296875, 2.713623046875, 2.88427734375, 3.054931640625, 3.2255859375, 3.396240234375, 3.56689453125, 3.737548828125, 3.908203125, 4.078857421875, 4.24951171875, 4.420166015625, 4.5908203125, 4.761474609375, 4.93212890625, 5.102783203125, 5.2734375]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 1.0, 1.0, 9.0, 5.0, 14.0, 11.0, 16.0, 19.0, 23.0, 37.0, 30.0, 50.0, 52.0, 91.0, 85.0, 93.0, 84.0, 70.0, 55.0, 50.0, 38.0, 33.0, 22.0, 18.0, 16.0, 16.0, 16.0, 10.0, 12.0, 2.0, 5.0, 3.0, 5.0, 4.0, 2.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1236572265625, -0.11906051635742188, -0.11446380615234375, -0.10986709594726562, -0.1052703857421875, -0.10067367553710938, -0.09607696533203125, -0.09148025512695312, -0.086883544921875, -0.08228683471679688, -0.07769012451171875, -0.07309341430664062, -0.0684967041015625, -0.06389999389648438, -0.05930328369140625, -0.054706573486328125, -0.05010986328125, -0.045513153076171875, -0.04091644287109375, -0.036319732666015625, -0.0317230224609375, -0.027126312255859375, -0.02252960205078125, -0.017932891845703125, -0.013336181640625, -0.008739471435546875, -0.00414276123046875, 0.000453948974609375, 0.0050506591796875, 0.009647369384765625, 0.01424407958984375, 0.018840789794921875, 0.0234375, 0.028034210205078125, 0.03263092041015625, 0.037227630615234375, 0.0418243408203125, 0.046421051025390625, 0.05101776123046875, 0.055614471435546875, 0.060211181640625, 0.06480789184570312, 0.06940460205078125, 0.07400131225585938, 0.0785980224609375, 0.08319473266601562, 0.08779144287109375, 0.09238815307617188, 0.09698486328125, 0.10158157348632812, 0.10617828369140625, 0.11077499389648438, 0.1153717041015625, 0.11996841430664062, 0.12456512451171875, 0.12916183471679688, 0.133758544921875, 0.13835525512695312, 0.14295196533203125, 0.14754867553710938, 0.1521453857421875, 0.15674209594726562, 0.16133880615234375, 0.16593551635742188, 0.1705322265625]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 10.0, 5.0, 8.0, 24.0, 27.0, 45.0, 59.0, 81.0, 119.0, 212.0, 329.0, 551.0, 1055.0, 2157.0, 4567.0, 10691.0, 28856.0, 98780.0, 475260.0, 319603.0, 68197.0, 21872.0, 8474.0, 3700.0, 1737.0, 920.0, 505.0, 255.0, 156.0, 106.0, 73.0, 44.0, 26.0, 15.0, 8.0, 9.0, 8.0, 5.0, 4.0, 0.0, 2.0, 0.0, 4.0, 0.0, 2.0], "bins": [-1.0107421875, -0.9840469360351562, -0.9573516845703125, -0.9306564331054688, -0.903961181640625, -0.8772659301757812, -0.8505706787109375, -0.8238754272460938, -0.79718017578125, -0.7704849243164062, -0.7437896728515625, -0.7170944213867188, -0.690399169921875, -0.6637039184570312, -0.6370086669921875, -0.6103134155273438, -0.5836181640625, -0.5569229125976562, -0.5302276611328125, -0.5035324096679688, -0.476837158203125, -0.45014190673828125, -0.4234466552734375, -0.39675140380859375, -0.37005615234375, -0.34336090087890625, -0.3166656494140625, -0.28997039794921875, -0.263275146484375, -0.23657989501953125, -0.2098846435546875, -0.18318939208984375, -0.156494140625, -0.12979888916015625, -0.1031036376953125, -0.07640838623046875, -0.049713134765625, -0.02301788330078125, 0.0036773681640625, 0.03037261962890625, 0.05706787109375, 0.08376312255859375, 0.1104583740234375, 0.13715362548828125, 0.163848876953125, 0.19054412841796875, 0.2172393798828125, 0.24393463134765625, 0.2706298828125, 0.29732513427734375, 0.3240203857421875, 0.35071563720703125, 0.377410888671875, 0.40410614013671875, 0.4308013916015625, 0.45749664306640625, 0.48419189453125, 0.5108871459960938, 0.5375823974609375, 0.5642776489257812, 0.590972900390625, 0.6176681518554688, 0.6443634033203125, 0.6710586547851562, 0.69775390625]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 6.0, 0.0, 6.0, 9.0, 14.0, 26.0, 40.0, 61.0, 75.0, 123.0, 144.0, 151.0, 121.0, 98.0, 50.0, 38.0, 21.0, 12.0, 7.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25400230288505554, -0.24652716517448425, -0.23905202746391296, -0.23157688975334167, -0.22410175204277039, -0.2166266143321991, -0.2091514617204666, -0.20167632400989532, -0.19420118629932404, -0.18672604858875275, -0.17925091087818146, -0.17177577316761017, -0.16430062055587769, -0.1568254828453064, -0.1493503451347351, -0.14187520742416382, -0.13440006971359253, -0.12692493200302124, -0.11944979429244995, -0.11197464913129807, -0.10449951142072678, -0.09702437371015549, -0.0895492285490036, -0.08207409083843231, -0.07459895312786102, -0.06712381541728973, -0.059648673981428146, -0.05217353254556656, -0.04469839483499527, -0.03722325712442398, -0.029748115688562393, -0.022272974252700806, -0.01479785144329071, -0.007322711870074272, 0.00015242770314216614, 0.007627567276358604, 0.015102706849575043, 0.022577844560146332, 0.03005298599600792, 0.03752812743186951, 0.045003265142440796, 0.052478402853012085, 0.05995354428887367, 0.06742868572473526, 0.07490382343530655, 0.08237896114587784, 0.08985410630702972, 0.09732924401760101, 0.1048043817281723, 0.11227951943874359, 0.11975465714931488, 0.12722979485988617, 0.13470494747161865, 0.14218008518218994, 0.14965522289276123, 0.15713036060333252, 0.1646054983139038, 0.1720806360244751, 0.1795557737350464, 0.18703091144561768, 0.19450604915618896, 0.20198118686676025, 0.20945633947849274, 0.21693147718906403, 0.22440661489963531]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 5.0, 3.0, 3.0, 3.0, 7.0, 4.0, 7.0, 5.0, 7.0, 12.0, 8.0, 14.0, 25.0, 18.0, 31.0, 22.0, 27.0, 26.0, 42.0, 34.0, 39.0, 37.0, 27.0, 40.0, 40.0, 39.0, 39.0, 29.0, 37.0, 44.0, 40.0, 38.0, 33.0, 34.0, 26.0, 13.0, 15.0, 24.0, 22.0, 17.0, 12.0, 14.0, 10.0, 9.0, 7.0, 2.0, 6.0, 2.0, 6.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.06296664476394653, -0.06081600859761238, -0.05866537243127823, -0.056514739990234375, -0.05436410382390022, -0.05221346765756607, -0.05006283521652222, -0.047912199050188065, -0.04576156288385391, -0.04361092671751976, -0.04146029055118561, -0.039309658110141754, -0.0371590219438076, -0.03500838577747345, -0.032857753336429596, -0.030707117170095444, -0.02855648100376129, -0.02640584483742714, -0.024255210533738136, -0.022104576230049133, -0.01995394006371498, -0.01780330389738083, -0.015652669593691826, -0.013502034358680248, -0.01135139912366867, -0.009200763888657093, -0.0070501286536455154, -0.004899493418633938, -0.0027488581836223602, -0.0005982229486107826, 0.001552412286400795, 0.0037030475214123726, 0.00585368275642395, 0.008004317991435528, 0.010154953226447105, 0.012305588461458683, 0.01445622369647026, 0.016606859862804413, 0.018757494166493416, 0.02090812847018242, 0.02305876463651657, 0.025209400802850723, 0.027360035106539726, 0.02951066941022873, 0.03166130557656288, 0.033811941742897034, 0.03596257418394089, 0.03811321035027504, 0.04026384651660919, 0.042414482682943344, 0.044565118849277496, 0.04671575129032135, 0.0488663874566555, 0.051017023622989655, 0.05316765606403351, 0.05531829223036766, 0.05746892839670181, 0.059619564563035965, 0.06177020072937012, 0.06392083317041397, 0.06607146561145782, 0.06822210550308228, 0.07037273794412613, 0.07252337783575058, 0.07467401027679443]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 8.0, 6.0, 10.0, 8.0, 9.0, 12.0, 15.0, 16.0, 26.0, 24.0, 30.0, 39.0, 40.0, 32.0, 36.0, 44.0, 49.0, 40.0, 39.0, 60.0, 45.0, 48.0, 51.0, 46.0, 29.0, 46.0, 37.0, 22.0, 20.0, 24.0, 24.0, 19.0, 14.0, 9.0, 6.0, 5.0, 2.0, 8.0, 6.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-105.4375, -101.5244140625, -97.611328125, -93.6982421875, -89.78515625, -85.8720703125, -81.958984375, -78.0458984375, -74.1328125, -70.2197265625, -66.306640625, -62.3935546875, -58.48046875, -54.5673828125, -50.654296875, -46.7412109375, -42.828125, -38.9150390625, -35.001953125, -31.0888671875, -27.17578125, -23.2626953125, -19.349609375, -15.4365234375, -11.5234375, -7.6103515625, -3.697265625, 0.2158203125, 4.12890625, 8.0419921875, 11.955078125, 15.8681640625, 19.78125, 23.6943359375, 27.607421875, 31.5205078125, 35.43359375, 39.3466796875, 43.259765625, 47.1728515625, 51.0859375, 54.9990234375, 58.912109375, 62.8251953125, 66.73828125, 70.6513671875, 74.564453125, 78.4775390625, 82.390625, 86.3037109375, 90.216796875, 94.1298828125, 98.04296875, 101.9560546875, 105.869140625, 109.7822265625, 113.6953125, 117.6083984375, 121.521484375, 125.4345703125, 129.34765625, 133.2607421875, 137.173828125, 141.0869140625, 145.0]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 7.0, 5.0, 9.0, 17.0, 13.0, 19.0, 31.0, 46.0, 67.0, 109.0, 123.0, 218.0, 321.0, 512.0, 913.0, 1771.0, 3699.0, 8973.0, 28409.0, 126839.0, 501044.0, 288566.0, 59813.0, 15473.0, 5734.0, 2580.0, 1325.0, 740.0, 416.0, 251.0, 169.0, 106.0, 72.0, 50.0, 35.0, 28.0, 14.0, 10.0, 9.0, 9.0, 5.0, 1.0, 0.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.40625, -19.671142578125, -18.93603515625, -18.200927734375, -17.4658203125, -16.730712890625, -15.99560546875, -15.260498046875, -14.525390625, -13.790283203125, -13.05517578125, -12.320068359375, -11.5849609375, -10.849853515625, -10.11474609375, -9.379638671875, -8.64453125, -7.909423828125, -7.17431640625, -6.439208984375, -5.7041015625, -4.968994140625, -4.23388671875, -3.498779296875, -2.763671875, -2.028564453125, -1.29345703125, -0.558349609375, 0.1767578125, 0.911865234375, 1.64697265625, 2.382080078125, 3.1171875, 3.852294921875, 4.58740234375, 5.322509765625, 6.0576171875, 6.792724609375, 7.52783203125, 8.262939453125, 8.998046875, 9.733154296875, 10.46826171875, 11.203369140625, 11.9384765625, 12.673583984375, 13.40869140625, 14.143798828125, 14.87890625, 15.614013671875, 16.34912109375, 17.084228515625, 17.8193359375, 18.554443359375, 19.28955078125, 20.024658203125, 20.759765625, 21.494873046875, 22.22998046875, 22.965087890625, 23.7001953125, 24.435302734375, 25.17041015625, 25.905517578125, 26.640625]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 5.0, 4.0, 9.0, 12.0, 14.0, 22.0, 19.0, 32.0, 31.0, 41.0, 46.0, 70.0, 55.0, 78.0, 2076.0, 116.0, 68.0, 70.0, 66.0, 49.0, 47.0, 26.0, 28.0, 14.0, 8.0, 5.0, 9.0, 5.0, 5.0, 7.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-259.75, -251.84375, -243.9375, -236.03125, -228.125, -220.21875, -212.3125, -204.40625, -196.5, -188.59375, -180.6875, -172.78125, -164.875, -156.96875, -149.0625, -141.15625, -133.25, -125.34375, -117.4375, -109.53125, -101.625, -93.71875, -85.8125, -77.90625, -70.0, -62.09375, -54.1875, -46.28125, -38.375, -30.46875, -22.5625, -14.65625, -6.75, 1.15625, 9.0625, 16.96875, 24.875, 32.78125, 40.6875, 48.59375, 56.5, 64.40625, 72.3125, 80.21875, 88.125, 96.03125, 103.9375, 111.84375, 119.75, 127.65625, 135.5625, 143.46875, 151.375, 159.28125, 167.1875, 175.09375, 183.0, 190.90625, 198.8125, 206.71875, 214.625, 222.53125, 230.4375, 238.34375, 246.25]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 3.0, 8.0, 13.0, 16.0, 12.0, 16.0, 20.0, 31.0, 40.0, 69.0, 80.0, 110.0, 163.0, 346.0, 577.0, 1211.0, 3801.0, 22786.0, 2628299.0, 463884.0, 18429.0, 3215.0, 1079.0, 542.0, 292.0, 190.0, 142.0, 88.0, 59.0, 41.0, 33.0, 24.0, 16.0, 16.0, 16.0, 8.0, 6.0, 8.0, 2.0, 1.0, 3.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-53.46875, -51.84619140625, -50.2236328125, -48.60107421875, -46.978515625, -45.35595703125, -43.7333984375, -42.11083984375, -40.48828125, -38.86572265625, -37.2431640625, -35.62060546875, -33.998046875, -32.37548828125, -30.7529296875, -29.13037109375, -27.5078125, -25.88525390625, -24.2626953125, -22.64013671875, -21.017578125, -19.39501953125, -17.7724609375, -16.14990234375, -14.52734375, -12.90478515625, -11.2822265625, -9.65966796875, -8.037109375, -6.41455078125, -4.7919921875, -3.16943359375, -1.546875, 0.07568359375, 1.6982421875, 3.32080078125, 4.943359375, 6.56591796875, 8.1884765625, 9.81103515625, 11.43359375, 13.05615234375, 14.6787109375, 16.30126953125, 17.923828125, 19.54638671875, 21.1689453125, 22.79150390625, 24.4140625, 26.03662109375, 27.6591796875, 29.28173828125, 30.904296875, 32.52685546875, 34.1494140625, 35.77197265625, 37.39453125, 39.01708984375, 40.6396484375, 42.26220703125, 43.884765625, 45.50732421875, 47.1298828125, 48.75244140625, 50.375]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 6.0, 4.0, 4.0, 11.0, 24.0, 34.0, 90.0, 204.0, 235.0, 191.0, 100.0, 48.0, 20.0, 18.0, 6.0, 4.0, 5.0, 3.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-205.1308135986328, -191.84591674804688, -178.56101989746094, -165.276123046875, -151.99124145507812, -138.70632934570312, -125.42144775390625, -112.13655090332031, -98.85165405273438, -85.56675720214844, -72.2818603515625, -58.996971130371094, -45.712074279785156, -32.42717742919922, -19.142288208007812, -5.857391357421875, 7.4275054931640625, 20.712400436401367, 33.99729537963867, 47.282188415527344, 60.56708526611328, 73.85198211669922, 87.13687133789062, 100.42176818847656, 113.7066650390625, 126.99156188964844, 140.27645874023438, 153.56134033203125, 166.84625244140625, 180.13113403320312, 193.41603088378906, 206.700927734375, 219.98583984375, 233.27073669433594, 246.55563354492188, 259.84051513671875, 273.12542724609375, 286.4103088378906, 299.6951904296875, 312.9801025390625, 326.2650146484375, 339.5498962402344, 352.8348083496094, 366.11968994140625, 379.40460205078125, 392.6894836425781, 405.974365234375, 419.25927734375, 432.5441589355469, 445.82904052734375, 459.11395263671875, 472.3988342285156, 485.6837463378906, 498.9686279296875, 512.2535400390625, 525.5384521484375, 538.8233032226562, 552.1082153320312, 565.39306640625, 578.677978515625, 591.962890625, 605.247802734375, 618.5326538085938, 631.8175659179688, 645.1024780273438]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 11.0, 7.0, 4.0, 16.0, 13.0, 13.0, 17.0, 17.0, 21.0, 30.0, 37.0, 32.0, 39.0, 36.0, 49.0, 47.0, 36.0, 46.0, 40.0, 42.0, 49.0, 49.0, 32.0, 36.0, 40.0, 29.0, 34.0, 17.0, 27.0, 18.0, 12.0, 13.0, 20.0, 14.0, 11.0, 8.0, 11.0, 0.0, 4.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-182.3821258544922, -175.43370056152344, -168.48526000976562, -161.53683471679688, -154.58839416503906, -147.6399688720703, -140.6915283203125, -133.74310302734375, -126.79467010498047, -119.84623718261719, -112.8978042602539, -105.94937133789062, -99.00094604492188, -92.05250549316406, -85.10408020019531, -78.15564727783203, -71.20721435546875, -64.25878143310547, -57.31034851074219, -50.36191940307617, -43.41348648071289, -36.46505355834961, -29.516624450683594, -22.568191528320312, -15.619758605957031, -8.671326637268066, -1.7228946685791016, 5.225536346435547, 12.173969268798828, 19.12240219116211, 26.070831298828125, 33.019264221191406, 39.967681884765625, 46.916114807128906, 53.86454772949219, 60.8129768371582, 67.76141357421875, 74.7098388671875, 81.65827178955078, 88.60670471191406, 95.55513763427734, 102.50357055664062, 109.4520034790039, 116.40043640136719, 123.34886169433594, 130.29730224609375, 137.2457275390625, 144.19415283203125, 151.14259338378906, 158.0910186767578, 165.03945922851562, 171.98788452148438, 178.9363250732422, 185.88475036621094, 192.83319091796875, 199.7816162109375, 206.73004150390625, 213.678466796875, 220.6269073486328, 227.57533264160156, 234.52377319335938, 241.47219848632812, 248.42062377929688, 255.3690643310547, 262.3175048828125]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 4.0, 4.0, 12.0, 15.0, 13.0, 16.0, 30.0, 28.0, 39.0, 45.0, 76.0, 107.0, 131.0, 184.0, 284.0, 364.0, 520.0, 652.0, 910.0, 1040166.0, 1661.0, 853.0, 651.0, 457.0, 365.0, 249.0, 190.0, 153.0, 93.0, 67.0, 55.0, 45.0, 35.0, 15.0, 25.0, 15.0, 9.0, 8.0, 6.0, 7.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-71.66717529296875, -69.10254669189453, -66.53791809082031, -63.97329330444336, -61.40866470336914, -58.84403610229492, -56.27941131591797, -53.71478271484375, -51.15015411376953, -48.58552551269531, -46.020896911621094, -43.45627212524414, -40.89164352416992, -38.3270149230957, -35.76239013671875, -33.19776153564453, -30.633132934570312, -28.068504333496094, -25.503877639770508, -22.939250946044922, -20.374622344970703, -17.809993743896484, -15.245367050170898, -12.680740356445312, -10.116111755371094, -7.551484107971191, -4.986856460571289, -2.4222288131713867, 0.14239883422851562, 2.707026481628418, 5.27165412902832, 7.836280822753906, 10.400909423828125, 12.965537071228027, 15.53016471862793, 18.094791412353516, 20.659420013427734, 23.224048614501953, 25.78867530822754, 28.353302001953125, 30.917930603027344, 33.48255920410156, 36.04718780517578, 38.611812591552734, 41.17644119262695, 43.74106979370117, 46.305694580078125, 48.870323181152344, 51.43495178222656, 53.99958038330078, 56.564208984375, 59.12883377075195, 61.69346237182617, 64.25808715820312, 66.82271575927734, 69.38734436035156, 71.95197296142578, 74.5166015625, 77.08123016357422, 79.64585876464844, 82.21047973632812, 84.77510833740234, 87.33973693847656, 89.90436553955078, 92.468994140625]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 6.0, 3.0, 4.0, 4.0, 3.0, 3.0, 7.0, 10.0, 22.0, 26.0, 62.0, 219.0, 51459000.0, 3579.0, 81.0, 44.0, 21.0, 19.0, 5.0, 6.0, 7.0, 1.0, 5.0, 4.0, 5.0, 4.0, 1.0, 2.0, 3.0, 8.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4572.0, -4371.81298828125, -4171.6259765625, -3971.43896484375, -3771.251953125, -3571.06494140625, -3370.8779296875, -3170.69091796875, -2970.50390625, -2770.31689453125, -2570.1298828125, -2369.94287109375, -2169.755859375, -1969.56884765625, -1769.3817138671875, -1569.1947021484375, -1369.007568359375, -1168.820556640625, -968.633544921875, -768.4464721679688, -568.2594604492188, -368.07244873046875, -167.8853759765625, 32.3016357421875, 232.4886474609375, 432.6756591796875, 632.8626708984375, 833.0497436523438, 1033.23681640625, 1233.423828125, 1433.61083984375, 1633.7978515625, 1833.98486328125, 2034.171875, 2234.35888671875, 2434.5458984375, 2634.73291015625, 2834.919921875, 3035.10693359375, 3235.2939453125, 3435.48095703125, 3635.66796875, 3835.85498046875, 4036.0419921875, 4236.22900390625, 4436.416015625, 4636.60302734375, 4836.7900390625, 5036.9775390625, 5237.16455078125, 5437.3515625, 5637.53857421875, 5837.7255859375, 6037.91259765625, 6238.099609375, 6438.28662109375, 6638.4736328125, 6838.66064453125, 7038.84765625, 7239.03466796875, 7439.2216796875, 7639.40869140625, 7839.595703125, 8039.78271484375, 8239.9697265625]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 3.0, 9.0, 18.0, 21.0, 29.0, 50.0, 80.0, 124.0, 203.0, 288.0, 508.0, 761.0, 1108.0, 1729.0, 2827.0, 4411.0, 6534.0, 10192.0, 15574.0, 23487.0, 36097.0, 54458.0, 81677.0, 122500.0, 176319.0, 246769.0, 333585.0, 808138.0, 3041394.0, 399738.0, 283676.0, 206709.0, 143841.0, 97701.0, 65945.0, 43217.0, 28139.0, 18762.0, 11923.0, 7902.0, 5333.0, 3420.0, 2149.0, 1390.0, 949.0, 642.0, 394.0, 252.0, 170.0, 93.0, 81.0, 52.0, 30.0, 16.0, 12.0, 7.0, 4.0, 5.0, 3.0, 0.0, 1.0], "bins": [-1.9912109375, -1.9274444580078125, -1.863677978515625, -1.7999114990234375, -1.73614501953125, -1.6723785400390625, -1.608612060546875, -1.5448455810546875, -1.4810791015625, -1.4173126220703125, -1.353546142578125, -1.2897796630859375, -1.22601318359375, -1.1622467041015625, -1.098480224609375, -1.0347137451171875, -0.970947265625, -0.9071807861328125, -0.843414306640625, -0.7796478271484375, -0.71588134765625, -0.6521148681640625, -0.588348388671875, -0.5245819091796875, -0.4608154296875, -0.3970489501953125, -0.333282470703125, -0.2695159912109375, -0.20574951171875, -0.1419830322265625, -0.078216552734375, -0.0144500732421875, 0.04931640625, 0.1130828857421875, 0.176849365234375, 0.2406158447265625, 0.30438232421875, 0.3681488037109375, 0.431915283203125, 0.4956817626953125, 0.5594482421875, 0.6232147216796875, 0.686981201171875, 0.7507476806640625, 0.81451416015625, 0.8782806396484375, 0.942047119140625, 1.0058135986328125, 1.069580078125, 1.1333465576171875, 1.197113037109375, 1.2608795166015625, 1.32464599609375, 1.3884124755859375, 1.452178955078125, 1.5159454345703125, 1.5797119140625, 1.6434783935546875, 1.707244873046875, 1.7710113525390625, 1.83477783203125, 1.8985443115234375, 1.962310791015625, 2.0260772705078125, 2.08984375]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 6.0, 4.0, 4.0, 11.0, 11.0, 10.0, 9.0, 18.0, 19.0, 21.0, 33.0, 27.0, 41.0, 32.0, 45.0, 42.0, 39.0, 35.0, 34.0, 41.0, 945.0, 180.0, 38.0, 41.0, 33.0, 35.0, 29.0, 29.0, 29.0, 37.0, 24.0, 22.0, 20.0, 14.0, 12.0, 12.0, 9.0, 10.0, 10.0, 5.0, 5.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.265625, -29.247802734375, -28.22998046875, -27.212158203125, -26.1943359375, -25.176513671875, -24.15869140625, -23.140869140625, -22.123046875, -21.105224609375, -20.08740234375, -19.069580078125, -18.0517578125, -17.033935546875, -16.01611328125, -14.998291015625, -13.98046875, -12.962646484375, -11.94482421875, -10.927001953125, -9.9091796875, -8.891357421875, -7.87353515625, -6.855712890625, -5.837890625, -4.820068359375, -3.80224609375, -2.784423828125, -1.7666015625, -0.748779296875, 0.26904296875, 1.286865234375, 2.3046875, 3.322509765625, 4.34033203125, 5.358154296875, 6.3759765625, 7.393798828125, 8.41162109375, 9.429443359375, 10.447265625, 11.465087890625, 12.48291015625, 13.500732421875, 14.5185546875, 15.536376953125, 16.55419921875, 17.572021484375, 18.58984375, 19.607666015625, 20.62548828125, 21.643310546875, 22.6611328125, 23.678955078125, 24.69677734375, 25.714599609375, 26.732421875, 27.750244140625, 28.76806640625, 29.785888671875, 30.8037109375, 31.821533203125, 32.83935546875, 33.857177734375, 34.875]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [2.0, 4.0, 16.0, 17.0, 20.0, 32.0, 53.0, 66.0, 72.0, 128.0, 208.0, 275.0, 421.0, 626.0, 971.0, 1424.0, 2259.0, 3407.0, 5308.0, 8227.0, 13049.0, 20508.0, 32120.0, 50952.0, 80054.0, 125089.0, 192419.0, 285647.0, 425331.0, 2627330.0, 1284892.0, 381605.0, 261464.0, 175272.0, 113483.0, 71961.0, 45899.0, 29099.0, 18359.0, 11792.0, 7519.0, 4993.0, 3062.0, 2079.0, 1379.0, 786.0, 598.0, 381.0, 268.0, 168.0, 121.0, 81.0, 57.0, 27.0, 22.0, 15.0, 21.0, 3.0, 3.0, 4.0, 0.0, 2.0, 3.0, 3.0], "bins": [-2.27734375, -2.201141357421875, -2.12493896484375, -2.048736572265625, -1.9725341796875, -1.896331787109375, -1.82012939453125, -1.743927001953125, -1.667724609375, -1.591522216796875, -1.51531982421875, -1.439117431640625, -1.3629150390625, -1.286712646484375, -1.21051025390625, -1.134307861328125, -1.05810546875, -0.981903076171875, -0.90570068359375, -0.829498291015625, -0.7532958984375, -0.677093505859375, -0.60089111328125, -0.524688720703125, -0.448486328125, -0.372283935546875, -0.29608154296875, -0.219879150390625, -0.1436767578125, -0.067474365234375, 0.00872802734375, 0.084930419921875, 0.1611328125, 0.237335205078125, 0.31353759765625, 0.389739990234375, 0.4659423828125, 0.542144775390625, 0.61834716796875, 0.694549560546875, 0.770751953125, 0.846954345703125, 0.92315673828125, 0.999359130859375, 1.0755615234375, 1.151763916015625, 1.22796630859375, 1.304168701171875, 1.38037109375, 1.456573486328125, 1.53277587890625, 1.608978271484375, 1.6851806640625, 1.761383056640625, 1.83758544921875, 1.913787841796875, 1.989990234375, 2.066192626953125, 2.14239501953125, 2.218597412109375, 2.2947998046875, 2.371002197265625, 2.44720458984375, 2.523406982421875, 2.599609375]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 5.0, 8.0, 6.0, 5.0, 8.0, 6.0, 15.0, 7.0, 21.0, 12.0, 22.0, 22.0, 33.0, 27.0, 31.0, 37.0, 40.0, 30.0, 42.0, 36.0, 49.0, 746.0, 358.0, 43.0, 38.0, 41.0, 33.0, 29.0, 36.0, 31.0, 24.0, 30.0, 24.0, 21.0, 19.0, 13.0, 17.0, 9.0, 11.0, 15.0, 6.0, 12.0, 7.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-29.6875, -28.784912109375, -27.88232421875, -26.979736328125, -26.0771484375, -25.174560546875, -24.27197265625, -23.369384765625, -22.466796875, -21.564208984375, -20.66162109375, -19.759033203125, -18.8564453125, -17.953857421875, -17.05126953125, -16.148681640625, -15.24609375, -14.343505859375, -13.44091796875, -12.538330078125, -11.6357421875, -10.733154296875, -9.83056640625, -8.927978515625, -8.025390625, -7.122802734375, -6.22021484375, -5.317626953125, -4.4150390625, -3.512451171875, -2.60986328125, -1.707275390625, -0.8046875, 0.097900390625, 1.00048828125, 1.903076171875, 2.8056640625, 3.708251953125, 4.61083984375, 5.513427734375, 6.416015625, 7.318603515625, 8.22119140625, 9.123779296875, 10.0263671875, 10.928955078125, 11.83154296875, 12.734130859375, 13.63671875, 14.539306640625, 15.44189453125, 16.344482421875, 17.2470703125, 18.149658203125, 19.05224609375, 19.954833984375, 20.857421875, 21.760009765625, 22.66259765625, 23.565185546875, 24.4677734375, 25.370361328125, 26.27294921875, 27.175537109375, 28.078125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 6.0, 10.0, 5.0, 2.0, 2.0, 7.0, 4.0, 10.0, 13.0, 24.0, 24.0, 42.0, 64.0, 108.0, 133.0, 169.0, 261.0, 415.0, 586.0, 942.0, 1592.0, 2746.0, 5581.0, 13349.0, 42607.0, 191923.0, 5855519.0, 123374.0, 30480.0, 10595.0, 4600.0, 2336.0, 1431.0, 847.0, 526.0, 358.0, 250.0, 136.0, 99.0, 76.0, 58.0, 34.0, 19.0, 20.0, 11.0, 16.0, 9.0, 14.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-16.75, -16.1796875, -15.609375, -15.0390625, -14.46875, -13.8984375, -13.328125, -12.7578125, -12.1875, -11.6171875, -11.046875, -10.4765625, -9.90625, -9.3359375, -8.765625, -8.1953125, -7.625, -7.0546875, -6.484375, -5.9140625, -5.34375, -4.7734375, -4.203125, -3.6328125, -3.0625, -2.4921875, -1.921875, -1.3515625, -0.78125, -0.2109375, 0.359375, 0.9296875, 1.5, 2.0703125, 2.640625, 3.2109375, 3.78125, 4.3515625, 4.921875, 5.4921875, 6.0625, 6.6328125, 7.203125, 7.7734375, 8.34375, 8.9140625, 9.484375, 10.0546875, 10.625, 11.1953125, 11.765625, 12.3359375, 12.90625, 13.4765625, 14.046875, 14.6171875, 15.1875, 15.7578125, 16.328125, 16.8984375, 17.46875, 18.0390625, 18.609375, 19.1796875, 19.75]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 4.0, 3.0, 6.0, 8.0, 6.0, 8.0, 7.0, 17.0, 14.0, 16.0, 10.0, 22.0, 22.0, 15.0, 30.0, 29.0, 32.0, 26.0, 32.0, 32.0, 43.0, 33.0, 33.0, 61.0, 825.0, 245.0, 39.0, 37.0, 40.0, 34.0, 32.0, 32.0, 35.0, 10.0, 29.0, 16.0, 17.0, 16.0, 20.0, 15.0, 12.0, 6.0, 9.0, 13.0, 9.0, 8.0, 6.0, 2.0, 3.0, 5.0, 3.0, 2.0, 0.0, 2.0, 2.0], "bins": [-24.453125, -23.7294921875, -23.005859375, -22.2822265625, -21.55859375, -20.8349609375, -20.111328125, -19.3876953125, -18.6640625, -17.9404296875, -17.216796875, -16.4931640625, -15.76953125, -15.0458984375, -14.322265625, -13.5986328125, -12.875, -12.1513671875, -11.427734375, -10.7041015625, -9.98046875, -9.2568359375, -8.533203125, -7.8095703125, -7.0859375, -6.3623046875, -5.638671875, -4.9150390625, -4.19140625, -3.4677734375, -2.744140625, -2.0205078125, -1.296875, -0.5732421875, 0.150390625, 0.8740234375, 1.59765625, 2.3212890625, 3.044921875, 3.7685546875, 4.4921875, 5.2158203125, 5.939453125, 6.6630859375, 7.38671875, 8.1103515625, 8.833984375, 9.5576171875, 10.28125, 11.0048828125, 11.728515625, 12.4521484375, 13.17578125, 13.8994140625, 14.623046875, 15.3466796875, 16.0703125, 16.7939453125, 17.517578125, 18.2412109375, 18.96484375, 19.6884765625, 20.412109375, 21.1357421875, 21.859375]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 5.0, 9.0, 21.0, 22.0, 59.0, 85.0, 197.0, 263.0, 176.0, 67.0, 53.0, 14.0, 16.0, 11.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-116.6446762084961, -113.8871841430664, -111.12968444824219, -108.3721923828125, -105.61470031738281, -102.8572006225586, -100.0997085571289, -97.34220886230469, -94.584716796875, -91.82722473144531, -89.0697250366211, -86.3122329711914, -83.55473327636719, -80.7972412109375, -78.03974914550781, -75.28225708007812, -72.5247573852539, -69.76726531982422, -67.009765625, -64.25227355957031, -61.49477767944336, -58.737281799316406, -55.97978973388672, -53.222293853759766, -50.46479797363281, -47.70730209350586, -44.949806213378906, -42.19231414794922, -39.434818267822266, -36.67732238769531, -33.919830322265625, -31.162334442138672, -28.40484619140625, -25.647350311279297, -22.889856338500977, -20.132362365722656, -17.374866485595703, -14.617371559143066, -11.85987663269043, -9.10238265991211, -6.344886779785156, -3.5873918533325195, -0.8298969268798828, 1.927597999572754, 4.685092926025391, 7.442587852478027, 10.200082778930664, 12.957576751708984, 15.715072631835938, 18.47256851196289, 21.23006248474121, 23.98755645751953, 26.745052337646484, 29.502548217773438, 32.260040283203125, 35.01753616333008, 37.77503204345703, 40.532527923583984, 43.29002380371094, 46.047515869140625, 48.80501174926758, 51.56250762939453, 54.31999969482422, 57.07749557495117, 59.834991455078125]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 8.0, 5.0, 20.0, 33.0, 48.0, 57.0, 53.0, 81.0, 83.0, 81.0, 89.0, 76.0, 77.0, 75.0, 55.0, 59.0, 47.0, 23.0, 15.0, 16.0, 8.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-169.69186401367188, -165.88055419921875, -162.06924438476562, -158.2579345703125, -154.44662475585938, -150.63531494140625, -146.82400512695312, -143.0126953125, -139.20138549804688, -135.39007568359375, -131.57876586914062, -127.7674560546875, -123.95614624023438, -120.14483642578125, -116.33352661132812, -112.522216796875, -108.71090698242188, -104.89959716796875, -101.08828735351562, -97.2769775390625, -93.46566772460938, -89.65435791015625, -85.84304809570312, -82.03173828125, -78.22042846679688, -74.40911865234375, -70.59780883789062, -66.7864990234375, -62.975189208984375, -59.16387939453125, -55.352569580078125, -51.541259765625, -47.729949951171875, -43.91864013671875, -40.107330322265625, -36.2960205078125, -32.484710693359375, -28.67340087890625, -24.862091064453125, -21.05078125, -17.239471435546875, -13.42816162109375, -9.616851806640625, -5.8055419921875, -1.994232177734375, 1.81707763671875, 5.628387451171875, 9.439697265625, 13.251007080078125, 17.06231689453125, 20.873626708984375, 24.6849365234375, 28.496246337890625, 32.30755615234375, 36.118865966796875, 39.93017578125, 43.741485595703125, 47.55279541015625, 51.364105224609375, 55.1754150390625, 58.986724853515625, 62.79803466796875, 66.60934448242188, 70.420654296875, 74.23196411132812]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 6.0, 2.0, 3.0, 1.0, 2.0, 4.0, 8.0, 6.0, 16.0, 8.0, 11.0, 10.0, 19.0, 23.0, 19.0, 29.0, 44.0, 48.0, 69.0, 113.0, 185.0, 298.0, 508.0, 1046.0, 2349.0, 5823.0, 23612.0, 4058896.0, 84904.0, 10177.0, 3308.0, 1309.0, 631.0, 303.0, 188.0, 122.0, 63.0, 46.0, 28.0, 18.0, 11.0, 8.0, 6.0, 2.0, 3.0, 2.0], "bins": [-2.69921875, -2.6402130126953125, -2.581207275390625, -2.5222015380859375, -2.46319580078125, -2.4041900634765625, -2.345184326171875, -2.2861785888671875, -2.2271728515625, -2.1681671142578125, -2.109161376953125, -2.0501556396484375, -1.99114990234375, -1.9321441650390625, -1.873138427734375, -1.8141326904296875, -1.755126953125, -1.6961212158203125, -1.637115478515625, -1.5781097412109375, -1.51910400390625, -1.4600982666015625, -1.401092529296875, -1.3420867919921875, -1.2830810546875, -1.2240753173828125, -1.165069580078125, -1.1060638427734375, -1.04705810546875, -0.9880523681640625, -0.929046630859375, -0.8700408935546875, -0.81103515625, -0.7520294189453125, -0.693023681640625, -0.6340179443359375, -0.57501220703125, -0.5160064697265625, -0.457000732421875, -0.3979949951171875, -0.3389892578125, -0.2799835205078125, -0.220977783203125, -0.1619720458984375, -0.10296630859375, -0.0439605712890625, 0.015045166015625, 0.0740509033203125, 0.133056640625, 0.1920623779296875, 0.251068115234375, 0.3100738525390625, 0.36907958984375, 0.4280853271484375, 0.487091064453125, 0.5460968017578125, 0.6051025390625, 0.6641082763671875, 0.723114013671875, 0.7821197509765625, 0.84112548828125, 0.9001312255859375, 0.959136962890625, 1.0181427001953125, 1.0771484375]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 5.0, 8.0, 13.0, 10.0, 29.0, 36.0, 795.0, 31.0, 32.0, 19.0, 8.0, 10.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8837890625, -0.8643226623535156, -0.8448562622070312, -0.8253898620605469, -0.8059234619140625, -0.7864570617675781, -0.7669906616210938, -0.7475242614746094, -0.728057861328125, -0.7085914611816406, -0.6891250610351562, -0.6696586608886719, -0.6501922607421875, -0.6307258605957031, -0.6112594604492188, -0.5917930603027344, -0.57232666015625, -0.5528602600097656, -0.5333938598632812, -0.5139274597167969, -0.4944610595703125, -0.4749946594238281, -0.45552825927734375, -0.4360618591308594, -0.416595458984375, -0.3971290588378906, -0.37766265869140625, -0.3581962585449219, -0.3387298583984375, -0.3192634582519531, -0.29979705810546875, -0.2803306579589844, -0.2608642578125, -0.24139785766601562, -0.22193145751953125, -0.20246505737304688, -0.1829986572265625, -0.16353225708007812, -0.14406585693359375, -0.12459945678710938, -0.105133056640625, -0.08566665649414062, -0.06620025634765625, -0.046733856201171875, -0.0272674560546875, -0.007801055908203125, 0.01166534423828125, 0.031131744384765625, 0.05059814453125, 0.07006454467773438, 0.08953094482421875, 0.10899734497070312, 0.1284637451171875, 0.14793014526367188, 0.16739654541015625, 0.18686294555664062, 0.206329345703125, 0.22579574584960938, 0.24526214599609375, 0.2647285461425781, 0.2841949462890625, 0.3036613464355469, 0.32312774658203125, 0.3425941467285156, 0.362060546875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 18.0, 10.0, 14.0, 20.0, 21.0, 30.0, 49.0, 72.0, 127.0, 176.0, 288.0, 501.0, 974.0, 1920.0, 4314.0, 10652.0, 32099.0, 146876.0, 3491009.0, 419056.0, 57066.0, 16462.0, 6344.0, 2817.0, 1422.0, 758.0, 394.0, 232.0, 143.0, 121.0, 66.0, 58.0, 42.0, 25.0, 25.0, 18.0, 9.0, 14.0, 6.0, 10.0, 5.0, 6.0, 2.0, 2.0, 5.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-1.107421875, -1.0735626220703125, -1.039703369140625, -1.0058441162109375, -0.97198486328125, -0.9381256103515625, -0.904266357421875, -0.8704071044921875, -0.8365478515625, -0.8026885986328125, -0.768829345703125, -0.7349700927734375, -0.70111083984375, -0.6672515869140625, -0.633392333984375, -0.5995330810546875, -0.565673828125, -0.5318145751953125, -0.497955322265625, -0.4640960693359375, -0.43023681640625, -0.3963775634765625, -0.362518310546875, -0.3286590576171875, -0.2947998046875, -0.2609405517578125, -0.227081298828125, -0.1932220458984375, -0.15936279296875, -0.1255035400390625, -0.091644287109375, -0.0577850341796875, -0.02392578125, 0.0099334716796875, 0.043792724609375, 0.0776519775390625, 0.11151123046875, 0.1453704833984375, 0.179229736328125, 0.2130889892578125, 0.2469482421875, 0.2808074951171875, 0.314666748046875, 0.3485260009765625, 0.38238525390625, 0.4162445068359375, 0.450103759765625, 0.4839630126953125, 0.517822265625, 0.5516815185546875, 0.585540771484375, 0.6194000244140625, 0.65325927734375, 0.6871185302734375, 0.720977783203125, 0.7548370361328125, 0.7886962890625, 0.8225555419921875, 0.856414794921875, 0.8902740478515625, 0.92413330078125, 0.9579925537109375, 0.991851806640625, 1.0257110595703125, 1.0595703125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 2.0, 4.0, 1.0, 7.0, 3.0, 4.0, 10.0, 15.0, 15.0, 19.0, 25.0, 39.0, 53.0, 83.0, 146.0, 407.0, 1766.0, 802.0, 252.0, 132.0, 97.0, 50.0, 40.0, 25.0, 18.0, 15.0, 11.0, 10.0, 2.0, 4.0, 1.0, 5.0, 2.0, 5.0, 2.0, 3.0, 0.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.47705078125, -0.4633216857910156, -0.44959259033203125, -0.4358634948730469, -0.4221343994140625, -0.4084053039550781, -0.39467620849609375, -0.3809471130371094, -0.367218017578125, -0.3534889221191406, -0.33975982666015625, -0.3260307312011719, -0.3123016357421875, -0.2985725402832031, -0.28484344482421875, -0.2711143493652344, -0.25738525390625, -0.24365615844726562, -0.22992706298828125, -0.21619796752929688, -0.2024688720703125, -0.18873977661132812, -0.17501068115234375, -0.16128158569335938, -0.147552490234375, -0.13382339477539062, -0.12009429931640625, -0.10636520385742188, -0.0926361083984375, -0.07890701293945312, -0.06517791748046875, -0.051448822021484375, -0.0377197265625, -0.023990631103515625, -0.01026153564453125, 0.003467559814453125, 0.0171966552734375, 0.030925750732421875, 0.04465484619140625, 0.058383941650390625, 0.072113037109375, 0.08584213256835938, 0.09957122802734375, 0.11330032348632812, 0.1270294189453125, 0.14075851440429688, 0.15448760986328125, 0.16821670532226562, 0.18194580078125, 0.19567489624023438, 0.20940399169921875, 0.22313308715820312, 0.2368621826171875, 0.2505912780761719, 0.26432037353515625, 0.2780494689941406, 0.291778564453125, 0.3055076599121094, 0.31923675537109375, 0.3329658508300781, 0.3466949462890625, 0.3604240417480469, 0.37415313720703125, 0.3878822326660156, 0.401611328125]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 1.0, 4.0, 11.0, 15.0, 68.0, 238.0, 351.0, 216.0, 64.0, 22.0, 12.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2942087650299072, -1.1551530361175537, -1.0160973072052002, -0.8770414590835571, -0.7379857301712036, -0.5989300012588501, -0.4598742127418518, -0.3208184242248535, -0.1817626953125, -0.0427069365978241, 0.0963488221168518, 0.2354045808315277, 0.3744603395462036, 0.5135160684585571, 0.6525718569755554, 0.7916276454925537, 0.9306833744049072, 1.0697391033172607, 1.2087948322296143, 1.3478506803512573, 1.4869064092636108, 1.6259621381759644, 1.7650179862976074, 1.904073715209961, 2.0431294441223145, 2.182185173034668, 2.3212409019470215, 2.460296630859375, 2.5993523597717285, 2.738408088684082, 2.8774640560150146, 3.016519784927368, 3.1555752754211426, 3.294631004333496, 3.4336867332458496, 3.572742462158203, 3.7117981910705566, 3.85085391998291, 3.9899098873138428, 4.128965377807617, 4.268021583557129, 4.407077312469482, 4.546133041381836, 4.6851887702941895, 4.824244499206543, 4.9633002281188965, 5.10235595703125, 5.241412162780762, 5.380467414855957, 5.5195231437683105, 5.658578872680664, 5.797634601593018, 5.936690330505371, 6.075746059417725, 6.214801788330078, 6.35385799407959, 6.492913246154785, 6.631968975067139, 6.771024703979492, 6.910080432891846, 7.049136161804199, 7.188191890716553, 7.327247619628906, 7.466303825378418, 7.6053595542907715]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 12.0, 15.0, 24.0, 40.0, 49.0, 75.0, 94.0, 89.0, 101.0, 101.0, 91.0, 53.0, 76.0, 61.0, 40.0, 26.0, 12.0, 12.0, 6.0, 8.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5944769382476807, -1.5182729959487915, -1.4420689344406128, -1.3658649921417236, -1.289660930633545, -1.2134569883346558, -1.1372530460357666, -1.061048984527588, -0.9848450422286987, -0.9086410403251648, -0.8324370384216309, -0.7562330961227417, -0.6800290942192078, -0.6038250923156738, -0.5276211500167847, -0.45141714811325073, -0.3752131462097168, -0.29900914430618286, -0.2228051722049713, -0.14660118520259857, -0.07039719820022583, 0.0058068037033081055, 0.08201077580451965, 0.1582147479057312, 0.23441874980926514, 0.3106227517127991, 0.3868267238140106, 0.46303069591522217, 0.5392346978187561, 0.61543869972229, 0.6916426420211792, 0.7678466439247131, 0.8440508842468262, 0.9202548861503601, 0.996458888053894, 1.0726628303527832, 1.148866891860962, 1.225070834159851, 1.3012747764587402, 1.377478837966919, 1.453682780265808, 1.5298867225646973, 1.606090784072876, 1.6822947263717651, 1.7584986686706543, 1.834702730178833, 1.9109066724777222, 1.9871106147766113, 2.06331467628479, 2.1395187377929688, 2.2157225608825684, 2.291926622390747, 2.368130683898926, 2.4443345069885254, 2.520538568496704, 2.596742630004883, 2.6729464530944824, 2.749150514602661, 2.8253543376922607, 2.9015583992004395, 2.977762460708618, 3.053966522216797, 3.1301703453063965, 3.206374406814575, 3.282578468322754]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 4.0, 1.0, 3.0, 6.0, 6.0, 7.0, 9.0, 6.0, 17.0, 22.0, 23.0, 26.0, 41.0, 57.0, 87.0, 153.0, 301.0, 516.0, 1226.0, 3286.0, 10223.0, 49757.0, 938304.0, 31933.0, 7914.0, 2530.0, 980.0, 430.0, 224.0, 129.0, 85.0, 61.0, 38.0, 38.0, 30.0, 18.0, 11.0, 13.0, 10.0, 10.0, 7.0, 4.0, 4.0, 7.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.39453125, -3.295806884765625, -3.19708251953125, -3.098358154296875, -2.9996337890625, -2.900909423828125, -2.80218505859375, -2.703460693359375, -2.604736328125, -2.506011962890625, -2.40728759765625, -2.308563232421875, -2.2098388671875, -2.111114501953125, -2.01239013671875, -1.913665771484375, -1.81494140625, -1.716217041015625, -1.61749267578125, -1.518768310546875, -1.4200439453125, -1.321319580078125, -1.22259521484375, -1.123870849609375, -1.025146484375, -0.926422119140625, -0.82769775390625, -0.728973388671875, -0.6302490234375, -0.531524658203125, -0.43280029296875, -0.334075927734375, -0.2353515625, -0.136627197265625, -0.03790283203125, 0.060821533203125, 0.1595458984375, 0.258270263671875, 0.35699462890625, 0.455718994140625, 0.554443359375, 0.653167724609375, 0.75189208984375, 0.850616455078125, 0.9493408203125, 1.048065185546875, 1.14678955078125, 1.245513916015625, 1.34423828125, 1.442962646484375, 1.54168701171875, 1.640411376953125, 1.7391357421875, 1.837860107421875, 1.93658447265625, 2.035308837890625, 2.134033203125, 2.232757568359375, 2.33148193359375, 2.430206298828125, 2.5289306640625, 2.627655029296875, 2.72637939453125, 2.825103759765625, 2.923828125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 3.0, 4.0, 6.0, 9.0, 12.0, 18.0, 26.0, 220.0, 602.0, 33.0, 32.0, 13.0, 4.0, 14.0, 4.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7236328125, -0.706878662109375, -0.69012451171875, -0.673370361328125, -0.6566162109375, -0.639862060546875, -0.62310791015625, -0.606353759765625, -0.589599609375, -0.572845458984375, -0.55609130859375, -0.539337158203125, -0.5225830078125, -0.505828857421875, -0.48907470703125, -0.472320556640625, -0.45556640625, -0.438812255859375, -0.42205810546875, -0.405303955078125, -0.3885498046875, -0.371795654296875, -0.35504150390625, -0.338287353515625, -0.321533203125, -0.304779052734375, -0.28802490234375, -0.271270751953125, -0.2545166015625, -0.237762451171875, -0.22100830078125, -0.204254150390625, -0.1875, -0.170745849609375, -0.15399169921875, -0.137237548828125, -0.1204833984375, -0.103729248046875, -0.08697509765625, -0.070220947265625, -0.053466796875, -0.036712646484375, -0.01995849609375, -0.003204345703125, 0.0135498046875, 0.030303955078125, 0.04705810546875, 0.063812255859375, 0.08056640625, 0.097320556640625, 0.11407470703125, 0.130828857421875, 0.1475830078125, 0.164337158203125, 0.18109130859375, 0.197845458984375, 0.214599609375, 0.231353759765625, 0.24810791015625, 0.264862060546875, 0.2816162109375, 0.298370361328125, 0.31512451171875, 0.331878662109375, 0.3486328125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 4.0, 3.0, 2.0, 3.0, 6.0, 11.0, 15.0, 22.0, 31.0, 52.0, 68.0, 84.0, 146.0, 260.0, 427.0, 809.0, 1575.0, 3864.0, 11038.0, 46920.0, 308835.0, 564139.0, 83206.0, 17404.0, 5313.0, 2004.0, 977.0, 519.0, 298.0, 178.0, 103.0, 80.0, 53.0, 33.0, 20.0, 22.0, 11.0, 6.0, 6.0, 4.0, 0.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.57421875, -1.523712158203125, -1.47320556640625, -1.422698974609375, -1.3721923828125, -1.321685791015625, -1.27117919921875, -1.220672607421875, -1.170166015625, -1.119659423828125, -1.06915283203125, -1.018646240234375, -0.9681396484375, -0.917633056640625, -0.86712646484375, -0.816619873046875, -0.76611328125, -0.715606689453125, -0.66510009765625, -0.614593505859375, -0.5640869140625, -0.513580322265625, -0.46307373046875, -0.412567138671875, -0.362060546875, -0.311553955078125, -0.26104736328125, -0.210540771484375, -0.1600341796875, -0.109527587890625, -0.05902099609375, -0.008514404296875, 0.0419921875, 0.092498779296875, 0.14300537109375, 0.193511962890625, 0.2440185546875, 0.294525146484375, 0.34503173828125, 0.395538330078125, 0.446044921875, 0.496551513671875, 0.54705810546875, 0.597564697265625, 0.6480712890625, 0.698577880859375, 0.74908447265625, 0.799591064453125, 0.85009765625, 0.900604248046875, 0.95111083984375, 1.001617431640625, 1.0521240234375, 1.102630615234375, 1.15313720703125, 1.203643798828125, 1.254150390625, 1.304656982421875, 1.35516357421875, 1.405670166015625, 1.4561767578125, 1.506683349609375, 1.55718994140625, 1.607696533203125, 1.658203125]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 2.0, 4.0, 4.0, 8.0, 4.0, 5.0, 5.0, 8.0, 10.0, 6.0, 16.0, 10.0, 22.0, 21.0, 19.0, 22.0, 30.0, 21.0, 21.0, 31.0, 33.0, 38.0, 34.0, 50.0, 36.0, 45.0, 47.0, 41.0, 48.0, 40.0, 40.0, 30.0, 41.0, 23.0, 34.0, 28.0, 18.0, 16.0, 24.0, 14.0, 13.0, 13.0, 6.0, 5.0, 2.0, 6.0, 2.0, 5.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.0859375, -1.051116943359375, -1.01629638671875, -0.981475830078125, -0.9466552734375, -0.911834716796875, -0.87701416015625, -0.842193603515625, -0.807373046875, -0.772552490234375, -0.73773193359375, -0.702911376953125, -0.6680908203125, -0.633270263671875, -0.59844970703125, -0.563629150390625, -0.52880859375, -0.493988037109375, -0.45916748046875, -0.424346923828125, -0.3895263671875, -0.354705810546875, -0.31988525390625, -0.285064697265625, -0.250244140625, -0.215423583984375, -0.18060302734375, -0.145782470703125, -0.1109619140625, -0.076141357421875, -0.04132080078125, -0.006500244140625, 0.0283203125, 0.063140869140625, 0.09796142578125, 0.132781982421875, 0.1676025390625, 0.202423095703125, 0.23724365234375, 0.272064208984375, 0.306884765625, 0.341705322265625, 0.37652587890625, 0.411346435546875, 0.4461669921875, 0.480987548828125, 0.51580810546875, 0.550628662109375, 0.58544921875, 0.620269775390625, 0.65509033203125, 0.689910888671875, 0.7247314453125, 0.759552001953125, 0.79437255859375, 0.829193115234375, 0.864013671875, 0.898834228515625, 0.93365478515625, 0.968475341796875, 1.0032958984375, 1.038116455078125, 1.07293701171875, 1.107757568359375, 1.142578125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 1.0, 5.0, 5.0, 11.0, 14.0, 13.0, 20.0, 38.0, 50.0, 97.0, 181.0, 428.0, 1322.0, 6623.0, 406011.0, 624120.0, 7250.0, 1408.0, 439.0, 236.0, 96.0, 69.0, 43.0, 23.0, 20.0, 11.0, 9.0, 2.0, 1.0, 4.0, 0.0, 4.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.484375, -4.352569580078125, -4.22076416015625, -4.088958740234375, -3.9571533203125, -3.825347900390625, -3.69354248046875, -3.561737060546875, -3.429931640625, -3.298126220703125, -3.16632080078125, -3.034515380859375, -2.9027099609375, -2.770904541015625, -2.63909912109375, -2.507293701171875, -2.37548828125, -2.243682861328125, -2.11187744140625, -1.980072021484375, -1.8482666015625, -1.716461181640625, -1.58465576171875, -1.452850341796875, -1.321044921875, -1.189239501953125, -1.05743408203125, -0.925628662109375, -0.7938232421875, -0.662017822265625, -0.53021240234375, -0.398406982421875, -0.2666015625, -0.134796142578125, -0.00299072265625, 0.128814697265625, 0.2606201171875, 0.392425537109375, 0.52423095703125, 0.656036376953125, 0.787841796875, 0.919647216796875, 1.05145263671875, 1.183258056640625, 1.3150634765625, 1.446868896484375, 1.57867431640625, 1.710479736328125, 1.84228515625, 1.974090576171875, 2.10589599609375, 2.237701416015625, 2.3695068359375, 2.501312255859375, 2.63311767578125, 2.764923095703125, 2.896728515625, 3.028533935546875, 3.16033935546875, 3.292144775390625, 3.4239501953125, 3.555755615234375, 3.68756103515625, 3.819366455078125, 3.951171875]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 5.0, 8.0, 8.0, 9.0, 16.0, 19.0, 21.0, 44.0, 48.0, 81.0, 132.0, 149.0, 127.0, 108.0, 92.0, 50.0, 30.0, 17.0, 10.0, 10.0, 5.0, 3.0, 3.0, 3.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.200241088867188e-05, -6.867572665214539e-05, -6.53490424156189e-05, -6.202235817909241e-05, -5.869567394256592e-05, -5.536898970603943e-05, -5.204230546951294e-05, -4.871562123298645e-05, -4.538893699645996e-05, -4.206225275993347e-05, -3.873556852340698e-05, -3.540888428688049e-05, -3.2082200050354004e-05, -2.8755515813827515e-05, -2.5428831577301025e-05, -2.2102147340774536e-05, -1.8775463104248047e-05, -1.5448778867721558e-05, -1.2122094631195068e-05, -8.795410394668579e-06, -5.46872615814209e-06, -2.1420419216156006e-06, 1.1846423149108887e-06, 4.511326551437378e-06, 7.838010787963867e-06, 1.1164695024490356e-05, 1.4491379261016846e-05, 1.7818063497543335e-05, 2.1144747734069824e-05, 2.4471431970596313e-05, 2.7798116207122803e-05, 3.112480044364929e-05, 3.445148468017578e-05, 3.777816891670227e-05, 4.110485315322876e-05, 4.443153738975525e-05, 4.775822162628174e-05, 5.108490586280823e-05, 5.441159009933472e-05, 5.7738274335861206e-05, 6.10649585723877e-05, 6.439164280891418e-05, 6.771832704544067e-05, 7.104501128196716e-05, 7.437169551849365e-05, 7.769837975502014e-05, 8.102506399154663e-05, 8.435174822807312e-05, 8.767843246459961e-05, 9.10051167011261e-05, 9.433180093765259e-05, 9.765848517417908e-05, 0.00010098516941070557, 0.00010431185364723206, 0.00010763853788375854, 0.00011096522212028503, 0.00011429190635681152, 0.00011761859059333801, 0.0001209452748298645, 0.000124271959066391, 0.00012759864330291748, 0.00013092532753944397, 0.00013425201177597046, 0.00013757869601249695, 0.00014090538024902344]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 5.0, 8.0, 8.0, 9.0, 20.0, 33.0, 38.0, 53.0, 114.0, 190.0, 384.0, 830.0, 2316.0, 8876.0, 103814.0, 889124.0, 34619.0, 5173.0, 1626.0, 648.0, 274.0, 152.0, 86.0, 59.0, 25.0, 26.0, 14.0, 16.0, 9.0, 2.0, 1.0, 5.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.111328125, -3.02325439453125, -2.9351806640625, -2.84710693359375, -2.759033203125, -2.67095947265625, -2.5828857421875, -2.49481201171875, -2.40673828125, -2.31866455078125, -2.2305908203125, -2.14251708984375, -2.054443359375, -1.96636962890625, -1.8782958984375, -1.79022216796875, -1.7021484375, -1.61407470703125, -1.5260009765625, -1.43792724609375, -1.349853515625, -1.26177978515625, -1.1737060546875, -1.08563232421875, -0.99755859375, -0.90948486328125, -0.8214111328125, -0.73333740234375, -0.645263671875, -0.55718994140625, -0.4691162109375, -0.38104248046875, -0.29296875, -0.20489501953125, -0.1168212890625, -0.02874755859375, 0.059326171875, 0.14739990234375, 0.2354736328125, 0.32354736328125, 0.41162109375, 0.49969482421875, 0.5877685546875, 0.67584228515625, 0.763916015625, 0.85198974609375, 0.9400634765625, 1.02813720703125, 1.1162109375, 1.20428466796875, 1.2923583984375, 1.38043212890625, 1.468505859375, 1.55657958984375, 1.6446533203125, 1.73272705078125, 1.82080078125, 1.90887451171875, 1.9969482421875, 2.08502197265625, 2.173095703125, 2.26116943359375, 2.3492431640625, 2.43731689453125, 2.525390625]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 6.0, 2.0, 7.0, 8.0, 11.0, 23.0, 23.0, 38.0, 65.0, 85.0, 138.0, 149.0, 135.0, 121.0, 68.0, 44.0, 31.0, 29.0, 5.0, 9.0, 5.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5458984375, -1.4880523681640625, -1.430206298828125, -1.3723602294921875, -1.31451416015625, -1.2566680908203125, -1.198822021484375, -1.1409759521484375, -1.0831298828125, -1.0252838134765625, -0.967437744140625, -0.9095916748046875, -0.85174560546875, -0.7938995361328125, -0.736053466796875, -0.6782073974609375, -0.620361328125, -0.5625152587890625, -0.504669189453125, -0.4468231201171875, -0.38897705078125, -0.3311309814453125, -0.273284912109375, -0.2154388427734375, -0.1575927734375, -0.0997467041015625, -0.041900634765625, 0.0159454345703125, 0.07379150390625, 0.1316375732421875, 0.189483642578125, 0.2473297119140625, 0.30517578125, 0.3630218505859375, 0.420867919921875, 0.4787139892578125, 0.53656005859375, 0.5944061279296875, 0.652252197265625, 0.7100982666015625, 0.7679443359375, 0.8257904052734375, 0.883636474609375, 0.9414825439453125, 0.99932861328125, 1.0571746826171875, 1.115020751953125, 1.1728668212890625, 1.230712890625, 1.2885589599609375, 1.346405029296875, 1.4042510986328125, 1.46209716796875, 1.5199432373046875, 1.577789306640625, 1.6356353759765625, 1.6934814453125, 1.7513275146484375, 1.809173583984375, 1.8670196533203125, 1.92486572265625, 1.9827117919921875, 2.040557861328125, 2.0984039306640625, 2.15625]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 6.0, 8.0, 41.0, 259.0, 588.0, 79.0, 19.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.32780838012695, -45.94390106201172, -44.55999755859375, -43.176090240478516, -41.79218292236328, -40.40827941894531, -39.02437210083008, -37.640464782714844, -36.256561279296875, -34.87265396118164, -33.48875045776367, -32.10484313964844, -30.720935821533203, -29.3370304107666, -27.953125, -26.569217681884766, -25.18531036376953, -23.80140495300293, -22.417497634887695, -21.033592224121094, -19.64968490600586, -18.265779495239258, -16.881874084472656, -15.497967720031738, -14.11406135559082, -12.730154991149902, -11.346248626708984, -9.962343215942383, -8.578436851501465, -7.194530487060547, -5.810625076293945, -4.426718711853027, -3.042816162109375, -1.6589100360870361, -0.27500391006469727, 1.1089019775390625, 2.4928083419799805, 3.8767147064208984, 5.2606201171875, 6.644526481628418, 8.028432846069336, 9.412339210510254, 10.796245574951172, 12.180150985717773, 13.564057350158691, 14.94796371459961, 16.33186912536621, 17.715774536132812, 19.099681854248047, 20.48358726501465, 21.867494583129883, 23.251399993896484, 24.63530731201172, 26.01921272277832, 27.403118133544922, 28.787025451660156, 30.170930862426758, 31.55483627319336, 32.938743591308594, 34.32264709472656, 35.7065544128418, 37.09046173095703, 38.474365234375, 39.858272552490234, 41.24217987060547]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 5.0, 5.0, 6.0, 10.0, 6.0, 15.0, 20.0, 37.0, 57.0, 87.0, 118.0, 135.0, 146.0, 111.0, 79.0, 71.0, 26.0, 15.0, 11.0, 11.0, 3.0, 8.0, 4.0, 3.0, 4.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.342493057250977, -22.585756301879883, -21.829021453857422, -21.072284698486328, -20.315549850463867, -19.558813095092773, -18.802078247070312, -18.04534149169922, -17.288604736328125, -16.53186798095703, -15.77513313293457, -15.018396377563477, -14.261661529541016, -13.504924774169922, -12.748188972473145, -11.991453170776367, -11.234718322753906, -10.477982521057129, -9.721246719360352, -8.964509963989258, -8.207775115966797, -7.451038837432861, -6.694302558898926, -5.937566757202148, -5.180830955505371, -4.424095153808594, -3.6673591136932373, -2.910623073577881, -2.1538872718811035, -1.3971514701843262, -0.6404151916503906, 0.11632061004638672, 0.8730564117431641, 1.629792332649231, 2.386528253555298, 3.1432642936706543, 3.9000000953674316, 4.656735897064209, 5.4134721755981445, 6.170207977294922, 6.926943778991699, 7.683679580688477, 8.440415382385254, 9.197151184082031, 9.953887939453125, 10.710622787475586, 11.46735954284668, 12.224095344543457, 12.980831146240234, 13.737566947937012, 14.494302749633789, 15.251039505004883, 16.007774353027344, 16.764511108398438, 17.52124786376953, 18.277982711791992, 19.034717559814453, 19.791454315185547, 20.548189163208008, 21.3049259185791, 22.061660766601562, 22.818397521972656, 23.57513427734375, 24.33186912536621, 25.088605880737305]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 6.0, 10.0, 14.0, 37.0, 65.0, 120.0, 344.0, 1512.0, 9045.0, 3846589.0, 329982.0, 5370.0, 882.0, 205.0, 58.0, 29.0, 13.0, 4.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-16.6875, -16.353271484375, -16.01904296875, -15.684814453125, -15.3505859375, -15.016357421875, -14.68212890625, -14.347900390625, -14.013671875, -13.679443359375, -13.34521484375, -13.010986328125, -12.6767578125, -12.342529296875, -12.00830078125, -11.674072265625, -11.33984375, -11.005615234375, -10.67138671875, -10.337158203125, -10.0029296875, -9.668701171875, -9.33447265625, -9.000244140625, -8.666015625, -8.331787109375, -7.99755859375, -7.663330078125, -7.3291015625, -6.994873046875, -6.66064453125, -6.326416015625, -5.9921875, -5.657958984375, -5.32373046875, -4.989501953125, -4.6552734375, -4.321044921875, -3.98681640625, -3.652587890625, -3.318359375, -2.984130859375, -2.64990234375, -2.315673828125, -1.9814453125, -1.647216796875, -1.31298828125, -0.978759765625, -0.64453125, -0.310302734375, 0.02392578125, 0.358154296875, 0.6923828125, 1.026611328125, 1.36083984375, 1.695068359375, 2.029296875, 2.363525390625, 2.69775390625, 3.031982421875, 3.3662109375, 3.700439453125, 4.03466796875, 4.368896484375, 4.703125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 8.0, 5.0, 4.0, 7.0, 9.0, 11.0, 13.0, 16.0, 35.0, 83.0, 191.0, 307.0, 153.0, 60.0, 31.0, 16.0, 9.0, 15.0, 6.0, 8.0, 1.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.3408203125, -0.33235931396484375, -0.3238983154296875, -0.31543731689453125, -0.306976318359375, -0.29851531982421875, -0.2900543212890625, -0.28159332275390625, -0.27313232421875, -0.26467132568359375, -0.2562103271484375, -0.24774932861328125, -0.239288330078125, -0.23082733154296875, -0.2223663330078125, -0.21390533447265625, -0.2054443359375, -0.19698333740234375, -0.1885223388671875, -0.18006134033203125, -0.171600341796875, -0.16313934326171875, -0.1546783447265625, -0.14621734619140625, -0.13775634765625, -0.12929534912109375, -0.1208343505859375, -0.11237335205078125, -0.103912353515625, -0.09545135498046875, -0.0869903564453125, -0.07852935791015625, -0.070068359375, -0.06160736083984375, -0.0531463623046875, -0.04468536376953125, -0.036224365234375, -0.02776336669921875, -0.0193023681640625, -0.01084136962890625, -0.00238037109375, 0.00608062744140625, 0.0145416259765625, 0.02300262451171875, 0.031463623046875, 0.03992462158203125, 0.0483856201171875, 0.05684661865234375, 0.0653076171875, 0.07376861572265625, 0.0822296142578125, 0.09069061279296875, 0.099151611328125, 0.10761260986328125, 0.1160736083984375, 0.12453460693359375, 0.13299560546875, 0.14145660400390625, 0.1499176025390625, 0.15837860107421875, 0.166839599609375, 0.17530059814453125, 0.1837615966796875, 0.19222259521484375, 0.20068359375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 5.0, 4.0, 9.0, 10.0, 14.0, 34.0, 61.0, 125.0, 253.0, 654.0, 2148.0, 9659.0, 94355.0, 3936005.0, 134627.0, 12373.0, 2549.0, 823.0, 324.0, 130.0, 61.0, 33.0, 17.0, 5.0, 8.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.3046875, -4.186920166015625, -4.06915283203125, -3.951385498046875, -3.8336181640625, -3.715850830078125, -3.59808349609375, -3.480316162109375, -3.362548828125, -3.244781494140625, -3.12701416015625, -3.009246826171875, -2.8914794921875, -2.773712158203125, -2.65594482421875, -2.538177490234375, -2.42041015625, -2.302642822265625, -2.18487548828125, -2.067108154296875, -1.9493408203125, -1.831573486328125, -1.71380615234375, -1.596038818359375, -1.478271484375, -1.360504150390625, -1.24273681640625, -1.124969482421875, -1.0072021484375, -0.889434814453125, -0.77166748046875, -0.653900146484375, -0.5361328125, -0.418365478515625, -0.30059814453125, -0.182830810546875, -0.0650634765625, 0.052703857421875, 0.17047119140625, 0.288238525390625, 0.406005859375, 0.523773193359375, 0.64154052734375, 0.759307861328125, 0.8770751953125, 0.994842529296875, 1.11260986328125, 1.230377197265625, 1.34814453125, 1.465911865234375, 1.58367919921875, 1.701446533203125, 1.8192138671875, 1.936981201171875, 2.05474853515625, 2.172515869140625, 2.290283203125, 2.408050537109375, 2.52581787109375, 2.643585205078125, 2.7613525390625, 2.879119873046875, 2.99688720703125, 3.114654541015625, 3.232421875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 3.0, 2.0, 5.0, 4.0, 5.0, 7.0, 8.0, 15.0, 8.0, 11.0, 26.0, 39.0, 42.0, 66.0, 85.0, 160.0, 438.0, 1453.0, 962.0, 302.0, 137.0, 71.0, 57.0, 35.0, 36.0, 31.0, 13.0, 13.0, 9.0, 11.0, 6.0, 5.0, 4.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.34765625, -0.3373756408691406, -0.32709503173828125, -0.3168144226074219, -0.3065338134765625, -0.2962532043457031, -0.28597259521484375, -0.2756919860839844, -0.265411376953125, -0.2551307678222656, -0.24485015869140625, -0.23456954956054688, -0.2242889404296875, -0.21400833129882812, -0.20372772216796875, -0.19344711303710938, -0.18316650390625, -0.17288589477539062, -0.16260528564453125, -0.15232467651367188, -0.1420440673828125, -0.13176345825195312, -0.12148284912109375, -0.11120223999023438, -0.100921630859375, -0.09064102172851562, -0.08036041259765625, -0.07007980346679688, -0.0597991943359375, -0.049518585205078125, -0.03923797607421875, -0.028957366943359375, -0.0186767578125, -0.008396148681640625, 0.00188446044921875, 0.012165069580078125, 0.0224456787109375, 0.032726287841796875, 0.04300689697265625, 0.053287506103515625, 0.063568115234375, 0.07384872436523438, 0.08412933349609375, 0.09440994262695312, 0.1046905517578125, 0.11497116088867188, 0.12525177001953125, 0.13553237915039062, 0.14581298828125, 0.15609359741210938, 0.16637420654296875, 0.17665481567382812, 0.1869354248046875, 0.19721603393554688, 0.20749664306640625, 0.21777725219726562, 0.228057861328125, 0.23833847045898438, 0.24861907958984375, 0.2588996887207031, 0.2691802978515625, 0.2794609069824219, 0.28974151611328125, 0.3000221252441406, 0.310302734375]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 4.0, 21.0, 144.0, 719.0, 103.0, 11.0, 6.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.988882541656494, -7.617837429046631, -7.246792316436768, -6.875747203826904, -6.504702091217041, -6.133656978607178, -5.762612342834473, -5.391567230224609, -5.020522117614746, -4.649477005004883, -4.2784318923950195, -3.9073867797851562, -3.536341667175293, -3.1652965545654297, -2.7942516803741455, -2.4232065677642822, -2.05216121673584, -1.6811161041259766, -1.3100709915161133, -0.9390259981155396, -0.5679808855056763, -0.196935772895813, 0.17410922050476074, 0.545154333114624, 0.9161994457244873, 1.2872445583343506, 1.6582896709442139, 2.029334545135498, 2.4003796577453613, 2.7714247703552246, 3.142469882965088, 3.513514995574951, 3.8845605850219727, 4.255605697631836, 4.626650810241699, 4.9976959228515625, 5.368741035461426, 5.739786148071289, 6.110831260681152, 6.481876373291016, 6.852921485900879, 7.223966598510742, 7.5950117111206055, 7.966056823730469, 8.337101936340332, 8.708147048950195, 9.079192161560059, 9.450237274169922, 9.821281433105469, 10.192326545715332, 10.563371658325195, 10.934416770935059, 11.305461883544922, 11.676506996154785, 12.047552108764648, 12.418597221374512, 12.789642333984375, 13.160687446594238, 13.531732559204102, 13.902777671813965, 14.273822784423828, 14.644867897033691, 15.015913009643555, 15.386958122253418, 15.758003234863281]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 0.0, 2.0, 1.0, 4.0, 3.0, 4.0, 2.0, 7.0, 7.0, 7.0, 10.0, 16.0, 36.0, 22.0, 20.0, 53.0, 50.0, 79.0, 68.0, 78.0, 59.0, 75.0, 91.0, 63.0, 50.0, 42.0, 38.0, 22.0, 22.0, 23.0, 17.0, 7.0, 10.0, 5.0, 1.0, 2.0, 3.0, 1.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8769762516021729, -1.81574547290802, -1.7545145750045776, -1.6932837963104248, -1.6320528984069824, -1.5708221197128296, -1.5095913410186768, -1.4483604431152344, -1.3871296644210815, -1.3258988857269287, -1.2646679878234863, -1.2034372091293335, -1.1422063112258911, -1.0809755325317383, -1.019744634628296, -0.9585138559341431, -0.8972830176353455, -0.8360521793365479, -0.7748213410377502, -0.7135905027389526, -0.6523597240447998, -0.5911288857460022, -0.5298980474472046, -0.46866723895072937, -0.40743640065193176, -0.34620556235313416, -0.28497475385665894, -0.22374391555786133, -0.16251309216022491, -0.1012822687625885, -0.040051430463790894, 0.021179378032684326, 0.08241021633148193, 0.14364103972911835, 0.20487186312675476, 0.26610270142555237, 0.3273335099220276, 0.3885643482208252, 0.4497951865196228, 0.5110260248184204, 0.5722568035125732, 0.6334876418113708, 0.6947184801101685, 0.7559492588043213, 0.8171800971031189, 0.8784109354019165, 0.9396417737007141, 1.0008726119995117, 1.062103509902954, 1.123334288597107, 1.1845651865005493, 1.2457959651947021, 1.3070268630981445, 1.3682576417922974, 1.4294884204864502, 1.4907193183898926, 1.5519500970840454, 1.6131808757781982, 1.6744117736816406, 1.7356425523757935, 1.7968734502792358, 1.8581042289733887, 1.919335126876831, 1.9805659055709839, 2.0417966842651367]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 5.0, 11.0, 7.0, 8.0, 14.0, 22.0, 38.0, 41.0, 81.0, 122.0, 211.0, 520.0, 1474.0, 5030.0, 26348.0, 661655.0, 327686.0, 19040.0, 3937.0, 1253.0, 496.0, 214.0, 125.0, 62.0, 35.0, 15.0, 21.0, 19.0, 6.0, 11.0, 9.0, 10.0, 3.0, 2.0, 7.0, 4.0, 1.0, 0.0, 3.0, 4.0, 1.0, 0.0, 1.0, 2.0, 2.0], "bins": [-3.40234375, -3.304901123046875, -3.20745849609375, -3.110015869140625, -3.0125732421875, -2.915130615234375, -2.81768798828125, -2.720245361328125, -2.622802734375, -2.525360107421875, -2.42791748046875, -2.330474853515625, -2.2330322265625, -2.135589599609375, -2.03814697265625, -1.940704345703125, -1.84326171875, -1.745819091796875, -1.64837646484375, -1.550933837890625, -1.4534912109375, -1.356048583984375, -1.25860595703125, -1.161163330078125, -1.063720703125, -0.966278076171875, -0.86883544921875, -0.771392822265625, -0.6739501953125, -0.576507568359375, -0.47906494140625, -0.381622314453125, -0.2841796875, -0.186737060546875, -0.08929443359375, 0.008148193359375, 0.1055908203125, 0.203033447265625, 0.30047607421875, 0.397918701171875, 0.495361328125, 0.592803955078125, 0.69024658203125, 0.787689208984375, 0.8851318359375, 0.982574462890625, 1.08001708984375, 1.177459716796875, 1.27490234375, 1.372344970703125, 1.46978759765625, 1.567230224609375, 1.6646728515625, 1.762115478515625, 1.85955810546875, 1.957000732421875, 2.054443359375, 2.151885986328125, 2.24932861328125, 2.346771240234375, 2.4442138671875, 2.541656494140625, 2.63909912109375, 2.736541748046875, 2.833984375]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 3.0, 3.0, 2.0, 7.0, 5.0, 12.0, 21.0, 28.0, 48.0, 84.0, 166.0, 233.0, 178.0, 109.0, 48.0, 19.0, 12.0, 8.0, 14.0, 3.0, 1.0, 3.0, 1.0, 0.0, 5.0, 1.0], "bins": [-0.45068359375, -0.4415550231933594, -0.43242645263671875, -0.4232978820800781, -0.4141693115234375, -0.4050407409667969, -0.39591217041015625, -0.3867835998535156, -0.377655029296875, -0.3685264587402344, -0.35939788818359375, -0.3502693176269531, -0.3411407470703125, -0.3320121765136719, -0.32288360595703125, -0.3137550354003906, -0.30462646484375, -0.2954978942871094, -0.28636932373046875, -0.2772407531738281, -0.2681121826171875, -0.2589836120605469, -0.24985504150390625, -0.24072647094726562, -0.231597900390625, -0.22246932983398438, -0.21334075927734375, -0.20421218872070312, -0.1950836181640625, -0.18595504760742188, -0.17682647705078125, -0.16769790649414062, -0.1585693359375, -0.14944076538085938, -0.14031219482421875, -0.13118362426757812, -0.1220550537109375, -0.11292648315429688, -0.10379791259765625, -0.09466934204101562, -0.085540771484375, -0.07641220092773438, -0.06728363037109375, -0.058155059814453125, -0.0490264892578125, -0.039897918701171875, -0.03076934814453125, -0.021640777587890625, -0.01251220703125, -0.003383636474609375, 0.00574493408203125, 0.014873504638671875, 0.0240020751953125, 0.033130645751953125, 0.04225921630859375, 0.051387786865234375, 0.060516357421875, 0.06964492797851562, 0.07877349853515625, 0.08790206909179688, 0.0970306396484375, 0.10615921020507812, 0.11528778076171875, 0.12441635131835938, 0.133544921875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0, 5.0, 4.0, 6.0, 6.0, 6.0, 5.0, 10.0, 74.0, 429.0, 9713.0, 1019463.0, 18027.0, 603.0, 89.0, 31.0, 13.0, 7.0, 8.0, 6.0, 3.0, 6.0, 4.0, 6.0, 5.0, 3.0, 0.0, 3.0, 0.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5078125, -5.30792236328125, -5.1080322265625, -4.90814208984375, -4.708251953125, -4.50836181640625, -4.3084716796875, -4.10858154296875, -3.90869140625, -3.70880126953125, -3.5089111328125, -3.30902099609375, -3.109130859375, -2.90924072265625, -2.7093505859375, -2.50946044921875, -2.3095703125, -2.10968017578125, -1.9097900390625, -1.70989990234375, -1.510009765625, -1.31011962890625, -1.1102294921875, -0.91033935546875, -0.71044921875, -0.51055908203125, -0.3106689453125, -0.11077880859375, 0.089111328125, 0.28900146484375, 0.4888916015625, 0.68878173828125, 0.888671875, 1.08856201171875, 1.2884521484375, 1.48834228515625, 1.688232421875, 1.88812255859375, 2.0880126953125, 2.28790283203125, 2.48779296875, 2.68768310546875, 2.8875732421875, 3.08746337890625, 3.287353515625, 3.48724365234375, 3.6871337890625, 3.88702392578125, 4.0869140625, 4.28680419921875, 4.4866943359375, 4.68658447265625, 4.886474609375, 5.08636474609375, 5.2862548828125, 5.48614501953125, 5.68603515625, 5.88592529296875, 6.0858154296875, 6.28570556640625, 6.485595703125, 6.68548583984375, 6.8853759765625, 7.08526611328125, 7.28515625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 7.0, 4.0, 1.0, 7.0, 9.0, 7.0, 7.0, 13.0, 10.0, 15.0, 23.0, 23.0, 26.0, 22.0, 30.0, 43.0, 32.0, 35.0, 35.0, 48.0, 46.0, 43.0, 47.0, 44.0, 41.0, 41.0, 34.0, 44.0, 35.0, 31.0, 28.0, 24.0, 29.0, 29.0, 14.0, 10.0, 19.0, 11.0, 11.0, 4.0, 5.0, 5.0, 1.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-0.61083984375, -0.5923233032226562, -0.5738067626953125, -0.5552902221679688, -0.536773681640625, -0.5182571411132812, -0.4997406005859375, -0.48122406005859375, -0.46270751953125, -0.44419097900390625, -0.4256744384765625, -0.40715789794921875, -0.388641357421875, -0.37012481689453125, -0.3516082763671875, -0.33309173583984375, -0.3145751953125, -0.29605865478515625, -0.2775421142578125, -0.25902557373046875, -0.240509033203125, -0.22199249267578125, -0.2034759521484375, -0.18495941162109375, -0.16644287109375, -0.14792633056640625, -0.1294097900390625, -0.11089324951171875, -0.092376708984375, -0.07386016845703125, -0.0553436279296875, -0.03682708740234375, -0.018310546875, 0.00020599365234375, 0.0187225341796875, 0.03723907470703125, 0.055755615234375, 0.07427215576171875, 0.0927886962890625, 0.11130523681640625, 0.12982177734375, 0.14833831787109375, 0.1668548583984375, 0.18537139892578125, 0.203887939453125, 0.22240447998046875, 0.2409210205078125, 0.25943756103515625, 0.2779541015625, 0.29647064208984375, 0.3149871826171875, 0.33350372314453125, 0.352020263671875, 0.37053680419921875, 0.3890533447265625, 0.40756988525390625, 0.42608642578125, 0.44460296630859375, 0.4631195068359375, 0.48163604736328125, 0.500152587890625, 0.5186691284179688, 0.5371856689453125, 0.5557022094726562, 0.57421875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 6.0, 6.0, 21.0, 75.0, 461.0, 1043842.0, 3958.0, 135.0, 19.0, 15.0, 8.0, 4.0, 3.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.171875, -25.070556640625, -23.96923828125, -22.867919921875, -21.7666015625, -20.665283203125, -19.56396484375, -18.462646484375, -17.361328125, -16.260009765625, -15.15869140625, -14.057373046875, -12.9560546875, -11.854736328125, -10.75341796875, -9.652099609375, -8.55078125, -7.449462890625, -6.34814453125, -5.246826171875, -4.1455078125, -3.044189453125, -1.94287109375, -0.841552734375, 0.259765625, 1.361083984375, 2.46240234375, 3.563720703125, 4.6650390625, 5.766357421875, 6.86767578125, 7.968994140625, 9.0703125, 10.171630859375, 11.27294921875, 12.374267578125, 13.4755859375, 14.576904296875, 15.67822265625, 16.779541015625, 17.880859375, 18.982177734375, 20.08349609375, 21.184814453125, 22.2861328125, 23.387451171875, 24.48876953125, 25.590087890625, 26.69140625, 27.792724609375, 28.89404296875, 29.995361328125, 31.0966796875, 32.197998046875, 33.29931640625, 34.400634765625, 35.501953125, 36.603271484375, 37.70458984375, 38.805908203125, 39.9072265625, 41.008544921875, 42.10986328125, 43.211181640625, 44.3125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 9.0, 3.0, 6.0, 13.0, 20.0, 18.0, 40.0, 59.0, 77.0, 102.0, 130.0, 123.0, 107.0, 84.0, 65.0, 46.0, 28.0, 19.0, 15.0, 10.0, 4.0, 9.0, 8.0, 6.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00010526180267333984, -0.00010225921869277954, -9.925663471221924e-05, -9.625405073165894e-05, -9.325146675109863e-05, -9.024888277053833e-05, -8.724629878997803e-05, -8.424371480941772e-05, -8.124113082885742e-05, -7.823854684829712e-05, -7.523596286773682e-05, -7.223337888717651e-05, -6.923079490661621e-05, -6.622821092605591e-05, -6.32256269454956e-05, -6.02230429649353e-05, -5.7220458984375e-05, -5.42178750038147e-05, -5.1215291023254395e-05, -4.821270704269409e-05, -4.521012306213379e-05, -4.2207539081573486e-05, -3.9204955101013184e-05, -3.620237112045288e-05, -3.319978713989258e-05, -3.0197203159332275e-05, -2.7194619178771973e-05, -2.419203519821167e-05, -2.1189451217651367e-05, -1.8186867237091064e-05, -1.5184283256530762e-05, -1.2181699275970459e-05, -9.179115295410156e-06, -6.1765313148498535e-06, -3.1739473342895508e-06, -1.7136335372924805e-07, 2.8312206268310547e-06, 5.833804607391357e-06, 8.83638858795166e-06, 1.1838972568511963e-05, 1.4841556549072266e-05, 1.784414052963257e-05, 2.084672451019287e-05, 2.3849308490753174e-05, 2.6851892471313477e-05, 2.985447645187378e-05, 3.285706043243408e-05, 3.5859644412994385e-05, 3.886222839355469e-05, 4.186481237411499e-05, 4.486739635467529e-05, 4.7869980335235596e-05, 5.08725643157959e-05, 5.38751482963562e-05, 5.6877732276916504e-05, 5.988031625747681e-05, 6.288290023803711e-05, 6.588548421859741e-05, 6.888806819915771e-05, 7.189065217971802e-05, 7.489323616027832e-05, 7.789582014083862e-05, 8.089840412139893e-05, 8.390098810195923e-05, 8.690357208251953e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 8.0, 11.0, 17.0, 27.0, 38.0, 76.0, 104.0, 229.0, 804.0, 5921.0, 941538.0, 96125.0, 2822.0, 520.0, 151.0, 66.0, 45.0, 18.0, 18.0, 11.0, 3.0, 6.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.59375, -5.42333984375, -5.2529296875, -5.08251953125, -4.912109375, -4.74169921875, -4.5712890625, -4.40087890625, -4.23046875, -4.06005859375, -3.8896484375, -3.71923828125, -3.548828125, -3.37841796875, -3.2080078125, -3.03759765625, -2.8671875, -2.69677734375, -2.5263671875, -2.35595703125, -2.185546875, -2.01513671875, -1.8447265625, -1.67431640625, -1.50390625, -1.33349609375, -1.1630859375, -0.99267578125, -0.822265625, -0.65185546875, -0.4814453125, -0.31103515625, -0.140625, 0.02978515625, 0.2001953125, 0.37060546875, 0.541015625, 0.71142578125, 0.8818359375, 1.05224609375, 1.22265625, 1.39306640625, 1.5634765625, 1.73388671875, 1.904296875, 2.07470703125, 2.2451171875, 2.41552734375, 2.5859375, 2.75634765625, 2.9267578125, 3.09716796875, 3.267578125, 3.43798828125, 3.6083984375, 3.77880859375, 3.94921875, 4.11962890625, 4.2900390625, 4.46044921875, 4.630859375, 4.80126953125, 4.9716796875, 5.14208984375, 5.3125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 4.0, 4.0, 3.0, 6.0, 8.0, 49.0, 185.0, 381.0, 247.0, 73.0, 26.0, 8.0, 5.0, 3.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-4.03125, -3.9448089599609375, -3.858367919921875, -3.7719268798828125, -3.68548583984375, -3.5990447998046875, -3.512603759765625, -3.4261627197265625, -3.3397216796875, -3.2532806396484375, -3.166839599609375, -3.0803985595703125, -2.99395751953125, -2.9075164794921875, -2.821075439453125, -2.7346343994140625, -2.648193359375, -2.5617523193359375, -2.475311279296875, -2.3888702392578125, -2.30242919921875, -2.2159881591796875, -2.129547119140625, -2.0431060791015625, -1.9566650390625, -1.8702239990234375, -1.783782958984375, -1.6973419189453125, -1.61090087890625, -1.5244598388671875, -1.438018798828125, -1.3515777587890625, -1.26513671875, -1.1786956787109375, -1.092254638671875, -1.0058135986328125, -0.91937255859375, -0.8329315185546875, -0.746490478515625, -0.6600494384765625, -0.5736083984375, -0.4871673583984375, -0.400726318359375, -0.3142852783203125, -0.22784423828125, -0.1414031982421875, -0.054962158203125, 0.0314788818359375, 0.117919921875, 0.2043609619140625, 0.290802001953125, 0.3772430419921875, 0.46368408203125, 0.5501251220703125, 0.636566162109375, 0.7230072021484375, 0.8094482421875, 0.8958892822265625, 0.982330322265625, 1.0687713623046875, 1.15521240234375, 1.2416534423828125, 1.328094482421875, 1.4145355224609375, 1.5009765625]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 10.0, 16.0, 72.0, 590.0, 299.0, 18.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-58.646339416503906, -57.53834533691406, -56.430355072021484, -55.32236099243164, -54.21437072753906, -53.10637664794922, -51.998382568359375, -50.8903923034668, -49.78239822387695, -48.67440414428711, -47.56641387939453, -46.45841979980469, -45.35042953491211, -44.242435455322266, -43.13444519042969, -42.026451110839844, -40.91845703125, -39.810462951660156, -38.70247268676758, -37.594478607177734, -36.486488342285156, -35.37849426269531, -34.27050018310547, -33.16250991821289, -32.05451965332031, -30.9465274810791, -29.83853530883789, -28.730541229248047, -27.622549057006836, -26.514556884765625, -25.406564712524414, -24.298572540283203, -23.190580368041992, -22.08258819580078, -20.97459602355957, -19.86660385131836, -18.758609771728516, -17.650617599487305, -16.542625427246094, -15.434633255004883, -14.326640129089355, -13.218647956848145, -12.110654830932617, -11.002662658691406, -9.894670486450195, -8.786677360534668, -7.678685188293457, -6.570692539215088, -5.462699890136719, -4.35470724105835, -3.2467148303985596, -2.1387224197387695, -1.0307297706604004, 0.07726287841796875, 1.1852550506591797, 2.293247699737549, 3.401240348815918, 4.509232997894287, 5.617225646972656, 6.725217819213867, 7.833210468292236, 8.941203117370605, 10.049195289611816, 11.157188415527344, 12.265180587768555]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 4.0, 4.0, 5.0, 7.0, 10.0, 9.0, 6.0, 19.0, 17.0, 43.0, 35.0, 59.0, 53.0, 81.0, 72.0, 91.0, 79.0, 73.0, 63.0, 52.0, 61.0, 38.0, 30.0, 28.0, 24.0, 6.0, 11.0, 14.0, 3.0, 2.0, 3.0, 3.0, 0.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.4281415939331055, -7.18755578994751, -6.946969509124756, -6.70638370513916, -6.465797424316406, -6.2252116203308105, -5.984625816345215, -5.744039535522461, -5.503453731536865, -5.2628679275512695, -5.022281646728516, -4.78169584274292, -4.541110038757324, -4.30052375793457, -4.059937953948975, -3.8193519115448, -3.578765869140625, -3.33817982673645, -3.0975937843322754, -2.8570079803466797, -2.616421937942505, -2.37583589553833, -2.1352500915527344, -1.8946640491485596, -1.6540780067443848, -1.41349196434021, -1.1729060411453247, -0.9323200583457947, -0.6917340755462646, -0.45114803314208984, -0.2105621099472046, 0.030023813247680664, 0.27060937881469727, 0.5111953616142273, 0.7517813444137573, 0.9923673272132874, 1.2329533100128174, 1.4735393524169922, 1.7141252756118774, 1.9547111988067627, 2.1952972412109375, 2.4358832836151123, 2.676469326019287, 2.917055130004883, 3.1576411724090576, 3.3982272148132324, 3.638813018798828, 3.879399061203003, 4.119985103607178, 4.360570907592773, 4.601157188415527, 4.841742992401123, 5.082328796386719, 5.322915077209473, 5.563500881195068, 5.804086685180664, 6.044672966003418, 6.285258769989014, 6.525845050811768, 6.766430854797363, 7.007017135620117, 7.247602939605713, 7.488188743591309, 7.7287750244140625, 7.969360828399658]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 5.0, 4.0, 6.0, 13.0, 11.0, 17.0, 23.0, 24.0, 34.0, 39.0, 55.0, 70.0, 83.0, 107.0, 145.0, 213.0, 348.0, 486.0, 789.0, 1263.0, 2377.0, 4719.0, 12087.0, 51772.0, 3801387.0, 278237.0, 24519.0, 7886.0, 3176.0, 1562.0, 932.0, 562.0, 374.0, 300.0, 191.0, 145.0, 95.0, 55.0, 57.0, 27.0, 30.0, 16.0, 10.0, 9.0, 9.0, 8.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 2.0], "bins": [-3.44921875, -3.352813720703125, -3.25640869140625, -3.160003662109375, -3.0635986328125, -2.967193603515625, -2.87078857421875, -2.774383544921875, -2.677978515625, -2.581573486328125, -2.48516845703125, -2.388763427734375, -2.2923583984375, -2.195953369140625, -2.09954833984375, -2.003143310546875, -1.90673828125, -1.810333251953125, -1.71392822265625, -1.617523193359375, -1.5211181640625, -1.424713134765625, -1.32830810546875, -1.231903076171875, -1.135498046875, -1.039093017578125, -0.94268798828125, -0.846282958984375, -0.7498779296875, -0.653472900390625, -0.55706787109375, -0.460662841796875, -0.3642578125, -0.267852783203125, -0.17144775390625, -0.075042724609375, 0.0213623046875, 0.117767333984375, 0.21417236328125, 0.310577392578125, 0.406982421875, 0.503387451171875, 0.59979248046875, 0.696197509765625, 0.7926025390625, 0.889007568359375, 0.98541259765625, 1.081817626953125, 1.17822265625, 1.274627685546875, 1.37103271484375, 1.467437744140625, 1.5638427734375, 1.660247802734375, 1.75665283203125, 1.853057861328125, 1.949462890625, 2.045867919921875, 2.14227294921875, 2.238677978515625, 2.3350830078125, 2.431488037109375, 2.52789306640625, 2.624298095703125, 2.720703125]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 3.0, 4.0, 4.0, 6.0, 6.0, 8.0, 11.0, 17.0, 20.0, 14.0, 38.0, 44.0, 79.0, 106.0, 120.0, 114.0, 94.0, 88.0, 55.0, 53.0, 29.0, 23.0, 17.0, 10.0, 5.0, 4.0, 3.0, 8.0, 4.0, 3.0, 2.0, 3.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1702880859375, -0.16558456420898438, -0.16088104248046875, -0.15617752075195312, -0.1514739990234375, -0.14677047729492188, -0.14206695556640625, -0.13736343383789062, -0.132659912109375, -0.12795639038085938, -0.12325286865234375, -0.11854934692382812, -0.1138458251953125, -0.10914230346679688, -0.10443878173828125, -0.09973526000976562, -0.09503173828125, -0.09032821655273438, -0.08562469482421875, -0.08092117309570312, -0.0762176513671875, -0.07151412963867188, -0.06681060791015625, -0.062107086181640625, -0.057403564453125, -0.052700042724609375, -0.04799652099609375, -0.043292999267578125, -0.0385894775390625, -0.033885955810546875, -0.02918243408203125, -0.024478912353515625, -0.019775390625, -0.015071868896484375, -0.01036834716796875, -0.005664825439453125, -0.0009613037109375, 0.003742218017578125, 0.00844573974609375, 0.013149261474609375, 0.017852783203125, 0.022556304931640625, 0.02725982666015625, 0.031963348388671875, 0.0366668701171875, 0.041370391845703125, 0.04607391357421875, 0.050777435302734375, 0.05548095703125, 0.060184478759765625, 0.06488800048828125, 0.06959152221679688, 0.0742950439453125, 0.07899856567382812, 0.08370208740234375, 0.08840560913085938, 0.093109130859375, 0.09781265258789062, 0.10251617431640625, 0.10721969604492188, 0.1119232177734375, 0.11662673950195312, 0.12133026123046875, 0.12603378295898438, 0.1307373046875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 4.0, 9.0, 6.0, 7.0, 19.0, 31.0, 41.0, 56.0, 134.0, 721.0, 22410.0, 4157879.0, 12233.0, 491.0, 102.0, 46.0, 31.0, 15.0, 11.0, 9.0, 6.0, 4.0, 4.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-11.765625, -11.47467041015625, -11.1837158203125, -10.89276123046875, -10.601806640625, -10.31085205078125, -10.0198974609375, -9.72894287109375, -9.43798828125, -9.14703369140625, -8.8560791015625, -8.56512451171875, -8.274169921875, -7.98321533203125, -7.6922607421875, -7.40130615234375, -7.1103515625, -6.81939697265625, -6.5284423828125, -6.23748779296875, -5.946533203125, -5.65557861328125, -5.3646240234375, -5.07366943359375, -4.78271484375, -4.49176025390625, -4.2008056640625, -3.90985107421875, -3.618896484375, -3.32794189453125, -3.0369873046875, -2.74603271484375, -2.455078125, -2.16412353515625, -1.8731689453125, -1.58221435546875, -1.291259765625, -1.00030517578125, -0.7093505859375, -0.41839599609375, -0.12744140625, 0.16351318359375, 0.4544677734375, 0.74542236328125, 1.036376953125, 1.32733154296875, 1.6182861328125, 1.90924072265625, 2.2001953125, 2.49114990234375, 2.7821044921875, 3.07305908203125, 3.364013671875, 3.65496826171875, 3.9459228515625, 4.23687744140625, 4.52783203125, 4.81878662109375, 5.1097412109375, 5.40069580078125, 5.691650390625, 5.98260498046875, 6.2735595703125, 6.56451416015625, 6.85546875]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 4.0, 8.0, 3.0, 6.0, 9.0, 5.0, 15.0, 12.0, 25.0, 31.0, 44.0, 74.0, 129.0, 245.0, 880.0, 1456.0, 508.0, 227.0, 131.0, 68.0, 59.0, 35.0, 22.0, 17.0, 12.0, 4.0, 9.0, 0.0, 5.0, 6.0, 2.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.277099609375, -0.26849365234375, -0.2598876953125, -0.25128173828125, -0.24267578125, -0.23406982421875, -0.2254638671875, -0.21685791015625, -0.208251953125, -0.19964599609375, -0.1910400390625, -0.18243408203125, -0.173828125, -0.16522216796875, -0.1566162109375, -0.14801025390625, -0.139404296875, -0.13079833984375, -0.1221923828125, -0.11358642578125, -0.10498046875, -0.09637451171875, -0.0877685546875, -0.07916259765625, -0.070556640625, -0.06195068359375, -0.0533447265625, -0.04473876953125, -0.0361328125, -0.02752685546875, -0.0189208984375, -0.01031494140625, -0.001708984375, 0.00689697265625, 0.0155029296875, 0.02410888671875, 0.03271484375, 0.04132080078125, 0.0499267578125, 0.05853271484375, 0.067138671875, 0.07574462890625, 0.0843505859375, 0.09295654296875, 0.1015625, 0.11016845703125, 0.1187744140625, 0.12738037109375, 0.135986328125, 0.14459228515625, 0.1531982421875, 0.16180419921875, 0.17041015625, 0.17901611328125, 0.1876220703125, 0.19622802734375, 0.204833984375, 0.21343994140625, 0.2220458984375, 0.23065185546875, 0.2392578125, 0.24786376953125, 0.2564697265625, 0.26507568359375, 0.273681640625]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 19.0, 547.0, 434.0, 15.0, 2.0, 4.0], "bins": [-28.242290496826172, -27.77063751220703, -27.298982620239258, -26.827329635620117, -26.355674743652344, -25.884021759033203, -25.412368774414062, -24.94071388244629, -24.46906089782715, -23.997407913208008, -23.525753021240234, -23.054100036621094, -22.58244514465332, -22.11079216003418, -21.63913917541504, -21.167484283447266, -20.695831298828125, -20.224178314208984, -19.75252342224121, -19.28087043762207, -18.809215545654297, -18.337562561035156, -17.865909576416016, -17.394254684448242, -16.9226016998291, -16.45094871520996, -15.979293823242188, -15.507640838623047, -15.03598690032959, -14.564332962036133, -14.092679023742676, -13.621025085449219, -13.149371147155762, -12.677717208862305, -12.206063270568848, -11.734410285949707, -11.26275634765625, -10.791102409362793, -10.319448471069336, -9.847795486450195, -9.376141548156738, -8.904487609863281, -8.432833671569824, -7.961180210113525, -7.489526748657227, -7.0178728103637695, -6.5462188720703125, -6.074565410614014, -5.602911949157715, -5.131258010864258, -4.659604549407959, -4.187950611114502, -3.716297149658203, -3.244643211364746, -2.772989511489868, -2.3013358116149902, -1.8296818733215332, -1.3580281734466553, -0.8863744139671326, -0.41472065448760986, 0.056933045387268066, 0.5285868644714355, 1.0002405643463135, 1.4718942642211914, 1.9435479640960693]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 9.0, 5.0, 13.0, 16.0, 52.0, 101.0, 164.0, 233.0, 202.0, 109.0, 59.0, 34.0, 7.0, 6.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.082638740539551, -4.971395015716553, -4.860151290893555, -4.748907566070557, -4.637663841247559, -4.5264201164245605, -4.4151763916015625, -4.3039326667785645, -4.192688941955566, -4.081445217132568, -3.9702014923095703, -3.8589577674865723, -3.747714042663574, -3.636470317840576, -3.525226593017578, -3.41398286819458, -3.302739143371582, -3.191495418548584, -3.080251693725586, -2.969007968902588, -2.85776424407959, -2.746520519256592, -2.6352767944335938, -2.5240330696105957, -2.4127893447875977, -2.3015456199645996, -2.1903018951416016, -2.0790581703186035, -1.9678144454956055, -1.8565707206726074, -1.7453269958496094, -1.6340832710266113, -1.5228395462036133, -1.4115958213806152, -1.3003520965576172, -1.1891083717346191, -1.077864646911621, -0.966620922088623, -0.855377197265625, -0.744133472442627, -0.6328897476196289, -0.5216460227966309, -0.4104022979736328, -0.29915857315063477, -0.18791484832763672, -0.07667112350463867, 0.034572601318359375, 0.14581632614135742, 0.25706005096435547, 0.3683037757873535, 0.47954750061035156, 0.5907912254333496, 0.7020349502563477, 0.8132786750793457, 0.9245223999023438, 1.0357661247253418, 1.1470098495483398, 1.258253574371338, 1.369497299194336, 1.480741024017334, 1.591984748840332, 1.70322847366333, 1.8144721984863281, 1.9257159233093262, 2.036959648132324]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 4.0, 1.0, 3.0, 2.0, 1.0, 6.0, 4.0, 10.0, 11.0, 5.0, 13.0, 16.0, 20.0, 17.0, 26.0, 30.0, 36.0, 47.0, 68.0, 116.0, 756.0, 245010.0, 800609.0, 1252.0, 119.0, 72.0, 53.0, 36.0, 43.0, 30.0, 29.0, 23.0, 18.0, 15.0, 17.0, 12.0, 7.0, 3.0, 6.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.7578125, -7.5081787109375, -7.258544921875, -7.0089111328125, -6.75927734375, -6.5096435546875, -6.260009765625, -6.0103759765625, -5.7607421875, -5.5111083984375, -5.261474609375, -5.0118408203125, -4.76220703125, -4.5125732421875, -4.262939453125, -4.0133056640625, -3.763671875, -3.5140380859375, -3.264404296875, -3.0147705078125, -2.76513671875, -2.5155029296875, -2.265869140625, -2.0162353515625, -1.7666015625, -1.5169677734375, -1.267333984375, -1.0177001953125, -0.76806640625, -0.5184326171875, -0.268798828125, -0.0191650390625, 0.23046875, 0.4801025390625, 0.729736328125, 0.9793701171875, 1.22900390625, 1.4786376953125, 1.728271484375, 1.9779052734375, 2.2275390625, 2.4771728515625, 2.726806640625, 2.9764404296875, 3.22607421875, 3.4757080078125, 3.725341796875, 3.9749755859375, 4.224609375, 4.4742431640625, 4.723876953125, 4.9735107421875, 5.22314453125, 5.4727783203125, 5.722412109375, 5.9720458984375, 6.2216796875, 6.4713134765625, 6.720947265625, 6.9705810546875, 7.22021484375, 7.4698486328125, 7.719482421875, 7.9691162109375, 8.21875]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 9.0, 53.0, 231.0, 478.0, 196.0, 35.0, 4.0, 6.0, 2.0, 1.0], "bins": [-1.27734375, -1.2550792694091797, -1.2328147888183594, -1.210550308227539, -1.1882858276367188, -1.1660213470458984, -1.1437568664550781, -1.1214923858642578, -1.0992279052734375, -1.0769634246826172, -1.0546989440917969, -1.0324344635009766, -1.0101699829101562, -0.9879055023193359, -0.9656410217285156, -0.9433765411376953, -0.921112060546875, -0.8988475799560547, -0.8765830993652344, -0.8543186187744141, -0.8320541381835938, -0.8097896575927734, -0.7875251770019531, -0.7652606964111328, -0.7429962158203125, -0.7207317352294922, -0.6984672546386719, -0.6762027740478516, -0.6539382934570312, -0.6316738128662109, -0.6094093322753906, -0.5871448516845703, -0.56488037109375, -0.5426158905029297, -0.5203514099121094, -0.49808692932128906, -0.47582244873046875, -0.45355796813964844, -0.4312934875488281, -0.4090290069580078, -0.3867645263671875, -0.3645000457763672, -0.3422355651855469, -0.31997108459472656, -0.29770660400390625, -0.27544212341308594, -0.2531776428222656, -0.2309131622314453, -0.208648681640625, -0.1863842010498047, -0.16411972045898438, -0.14185523986816406, -0.11959075927734375, -0.09732627868652344, -0.07506179809570312, -0.05279731750488281, -0.0305328369140625, -0.008268356323242188, 0.013996124267578125, 0.03626060485839844, 0.05852508544921875, 0.08078956604003906, 0.10305404663085938, 0.1253185272216797, 0.1475830078125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 8.0, 5.0, 7.0, 11.0, 18.0, 17.0, 28.0, 28.0, 48.0, 62.0, 93.0, 123.0, 189.0, 275.0, 517.0, 992.0, 1985.0, 4495.0, 12779.0, 40388.0, 147559.0, 431883.0, 290302.0, 79580.0, 23140.0, 7725.0, 3013.0, 1361.0, 704.0, 421.0, 234.0, 149.0, 137.0, 86.0, 50.0, 33.0, 26.0, 20.0, 20.0, 15.0, 7.0, 3.0, 6.0, 6.0, 7.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.4794921875, -1.4343414306640625, -1.389190673828125, -1.3440399169921875, -1.29888916015625, -1.2537384033203125, -1.208587646484375, -1.1634368896484375, -1.1182861328125, -1.0731353759765625, -1.027984619140625, -0.9828338623046875, -0.93768310546875, -0.8925323486328125, -0.847381591796875, -0.8022308349609375, -0.757080078125, -0.7119293212890625, -0.666778564453125, -0.6216278076171875, -0.57647705078125, -0.5313262939453125, -0.486175537109375, -0.4410247802734375, -0.3958740234375, -0.3507232666015625, -0.305572509765625, -0.2604217529296875, -0.21527099609375, -0.1701202392578125, -0.124969482421875, -0.0798187255859375, -0.03466796875, 0.0104827880859375, 0.055633544921875, 0.1007843017578125, 0.14593505859375, 0.1910858154296875, 0.236236572265625, 0.2813873291015625, 0.3265380859375, 0.3716888427734375, 0.416839599609375, 0.4619903564453125, 0.50714111328125, 0.5522918701171875, 0.597442626953125, 0.6425933837890625, 0.687744140625, 0.7328948974609375, 0.778045654296875, 0.8231964111328125, 0.86834716796875, 0.9134979248046875, 0.958648681640625, 1.0037994384765625, 1.0489501953125, 1.0941009521484375, 1.139251708984375, 1.1844024658203125, 1.22955322265625, 1.2747039794921875, 1.319854736328125, 1.3650054931640625, 1.41015625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 5.0, 1.0, 7.0, 9.0, 7.0, 12.0, 16.0, 11.0, 28.0, 22.0, 23.0, 36.0, 37.0, 50.0, 53.0, 42.0, 52.0, 52.0, 63.0, 61.0, 50.0, 40.0, 45.0, 45.0, 32.0, 36.0, 31.0, 22.0, 22.0, 20.0, 14.0, 18.0, 12.0, 8.0, 8.0, 5.0, 7.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.93408203125, -0.9005355834960938, -0.8669891357421875, -0.8334426879882812, -0.799896240234375, -0.7663497924804688, -0.7328033447265625, -0.6992568969726562, -0.66571044921875, -0.6321640014648438, -0.5986175537109375, -0.5650711059570312, -0.531524658203125, -0.49797821044921875, -0.4644317626953125, -0.43088531494140625, -0.3973388671875, -0.36379241943359375, -0.3302459716796875, -0.29669952392578125, -0.263153076171875, -0.22960662841796875, -0.1960601806640625, -0.16251373291015625, -0.12896728515625, -0.09542083740234375, -0.0618743896484375, -0.02832794189453125, 0.005218505859375, 0.03876495361328125, 0.0723114013671875, 0.10585784912109375, 0.139404296875, 0.17295074462890625, 0.2064971923828125, 0.24004364013671875, 0.273590087890625, 0.30713653564453125, 0.3406829833984375, 0.37422943115234375, 0.40777587890625, 0.44132232666015625, 0.4748687744140625, 0.5084152221679688, 0.541961669921875, 0.5755081176757812, 0.6090545654296875, 0.6426010131835938, 0.6761474609375, 0.7096939086914062, 0.7432403564453125, 0.7767868041992188, 0.810333251953125, 0.8438796997070312, 0.8774261474609375, 0.9109725952148438, 0.94451904296875, 0.9780654907226562, 1.0116119384765625, 1.0451583862304688, 1.078704833984375, 1.1122512817382812, 1.1457977294921875, 1.1793441772460938, 1.212890625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 8.0, 8.0, 5.0, 12.0, 13.0, 14.0, 30.0, 41.0, 43.0, 109.0, 162.0, 331.0, 822.0, 2208.0, 8584.0, 81433.0, 888181.0, 55884.0, 7248.0, 1967.0, 719.0, 317.0, 165.0, 81.0, 49.0, 41.0, 23.0, 9.0, 18.0, 9.0, 3.0, 4.0, 5.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.740234375, -3.62127685546875, -3.5023193359375, -3.38336181640625, -3.264404296875, -3.14544677734375, -3.0264892578125, -2.90753173828125, -2.78857421875, -2.66961669921875, -2.5506591796875, -2.43170166015625, -2.312744140625, -2.19378662109375, -2.0748291015625, -1.95587158203125, -1.8369140625, -1.71795654296875, -1.5989990234375, -1.48004150390625, -1.361083984375, -1.24212646484375, -1.1231689453125, -1.00421142578125, -0.88525390625, -0.76629638671875, -0.6473388671875, -0.52838134765625, -0.409423828125, -0.29046630859375, -0.1715087890625, -0.05255126953125, 0.06640625, 0.18536376953125, 0.3043212890625, 0.42327880859375, 0.542236328125, 0.66119384765625, 0.7801513671875, 0.89910888671875, 1.01806640625, 1.13702392578125, 1.2559814453125, 1.37493896484375, 1.493896484375, 1.61285400390625, 1.7318115234375, 1.85076904296875, 1.9697265625, 2.08868408203125, 2.2076416015625, 2.32659912109375, 2.445556640625, 2.56451416015625, 2.6834716796875, 2.80242919921875, 2.92138671875, 3.04034423828125, 3.1593017578125, 3.27825927734375, 3.397216796875, 3.51617431640625, 3.6351318359375, 3.75408935546875, 3.873046875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 0.0, 4.0, 6.0, 6.0, 6.0, 14.0, 9.0, 14.0, 42.0, 62.0, 151.0, 223.0, 202.0, 123.0, 58.0, 31.0, 23.0, 11.0, 7.0, 4.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005292892456054688, -0.0005163475871086121, -0.0005034059286117554, -0.0004904642701148987, -0.000477522611618042, -0.0004645809531211853, -0.0004516392946243286, -0.0004386976361274719, -0.00042575597763061523, -0.00041281431913375854, -0.00039987266063690186, -0.00038693100214004517, -0.0003739893436431885, -0.0003610476851463318, -0.0003481060266494751, -0.0003351643681526184, -0.0003222227096557617, -0.00030928105115890503, -0.00029633939266204834, -0.00028339773416519165, -0.00027045607566833496, -0.00025751441717147827, -0.0002445727586746216, -0.0002316311001777649, -0.0002186894416809082, -0.00020574778318405151, -0.00019280612468719482, -0.00017986446619033813, -0.00016692280769348145, -0.00015398114919662476, -0.00014103949069976807, -0.00012809783220291138, -0.00011515617370605469, -0.000102214515209198, -8.927285671234131e-05, -7.633119821548462e-05, -6.338953971862793e-05, -5.044788122177124e-05, -3.750622272491455e-05, -2.456456422805786e-05, -1.1622905731201172e-05, 1.3187527656555176e-06, 1.4260411262512207e-05, 2.7202069759368896e-05, 4.0143728256225586e-05, 5.3085386753082275e-05, 6.602704524993896e-05, 7.896870374679565e-05, 9.191036224365234e-05, 0.00010485202074050903, 0.00011779367923736572, 0.0001307353377342224, 0.0001436769962310791, 0.0001566186547279358, 0.00016956031322479248, 0.00018250197172164917, 0.00019544363021850586, 0.00020838528871536255, 0.00022132694721221924, 0.00023426860570907593, 0.0002472102642059326, 0.0002601519227027893, 0.000273093581199646, 0.0002860352396965027, 0.0002989768981933594]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 4.0, 5.0, 9.0, 9.0, 24.0, 41.0, 63.0, 96.0, 229.0, 570.0, 1546.0, 5988.0, 152720.0, 873650.0, 9924.0, 2215.0, 740.0, 374.0, 159.0, 84.0, 49.0, 24.0, 10.0, 6.0, 6.0, 2.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-11.453125, -11.167724609375, -10.88232421875, -10.596923828125, -10.3115234375, -10.026123046875, -9.74072265625, -9.455322265625, -9.169921875, -8.884521484375, -8.59912109375, -8.313720703125, -8.0283203125, -7.742919921875, -7.45751953125, -7.172119140625, -6.88671875, -6.601318359375, -6.31591796875, -6.030517578125, -5.7451171875, -5.459716796875, -5.17431640625, -4.888916015625, -4.603515625, -4.318115234375, -4.03271484375, -3.747314453125, -3.4619140625, -3.176513671875, -2.89111328125, -2.605712890625, -2.3203125, -2.034912109375, -1.74951171875, -1.464111328125, -1.1787109375, -0.893310546875, -0.60791015625, -0.322509765625, -0.037109375, 0.248291015625, 0.53369140625, 0.819091796875, 1.1044921875, 1.389892578125, 1.67529296875, 1.960693359375, 2.24609375, 2.531494140625, 2.81689453125, 3.102294921875, 3.3876953125, 3.673095703125, 3.95849609375, 4.243896484375, 4.529296875, 4.814697265625, 5.10009765625, 5.385498046875, 5.6708984375, 5.956298828125, 6.24169921875, 6.527099609375, 6.8125]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 6.0, 2.0, 1.0, 4.0, 12.0, 5.0, 9.0, 20.0, 42.0, 87.0, 245.0, 286.0, 161.0, 57.0, 20.0, 14.0, 8.0, 7.0, 1.0, 8.0, 6.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.83984375, -1.75701904296875, -1.6741943359375, -1.59136962890625, -1.508544921875, -1.42572021484375, -1.3428955078125, -1.26007080078125, -1.17724609375, -1.09442138671875, -1.0115966796875, -0.92877197265625, -0.845947265625, -0.76312255859375, -0.6802978515625, -0.59747314453125, -0.5146484375, -0.43182373046875, -0.3489990234375, -0.26617431640625, -0.183349609375, -0.10052490234375, -0.0177001953125, 0.06512451171875, 0.14794921875, 0.23077392578125, 0.3135986328125, 0.39642333984375, 0.479248046875, 0.56207275390625, 0.6448974609375, 0.72772216796875, 0.810546875, 0.89337158203125, 0.9761962890625, 1.05902099609375, 1.141845703125, 1.22467041015625, 1.3074951171875, 1.39031982421875, 1.47314453125, 1.55596923828125, 1.6387939453125, 1.72161865234375, 1.804443359375, 1.88726806640625, 1.9700927734375, 2.05291748046875, 2.1357421875, 2.21856689453125, 2.3013916015625, 2.38421630859375, 2.467041015625, 2.54986572265625, 2.6326904296875, 2.71551513671875, 2.79833984375, 2.88116455078125, 2.9639892578125, 3.04681396484375, 3.129638671875, 3.21246337890625, 3.2952880859375, 3.37811279296875, 3.4609375]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 85.0, 836.0, 73.0, 12.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-155.31600952148438, -152.4038848876953, -149.49176025390625, -146.57962036132812, -143.66749572753906, -140.75537109375, -137.84324645996094, -134.9311065673828, -132.01898193359375, -129.1068572998047, -126.1947250366211, -123.28260040283203, -120.37046813964844, -117.45834350585938, -114.54621124267578, -111.63408660888672, -108.72195434570312, -105.80982971191406, -102.89769744873047, -99.9855728149414, -97.07344055175781, -94.16131591796875, -91.24918365478516, -88.3370590209961, -85.42493438720703, -82.51280975341797, -79.60067749023438, -76.68855285644531, -73.77642059326172, -70.86429595947266, -67.95216369628906, -65.0400390625, -62.12791442871094, -59.21578598022461, -56.30365753173828, -53.39152908325195, -50.479400634765625, -47.5672721862793, -44.65514373779297, -41.743019104003906, -38.83088684082031, -35.918758392333984, -33.006629943847656, -30.094501495361328, -27.182373046875, -24.270244598388672, -21.358118057250977, -18.44598960876465, -15.53386116027832, -12.621732711791992, -9.709604263305664, -6.797476768493652, -3.885348320007324, -0.9732198715209961, 1.9389076232910156, 4.851036071777344, 7.763164520263672, 10.67529296875, 13.587421417236328, 16.499549865722656, 19.411678314208984, 22.323806762695312, 25.235933303833008, 28.148061752319336, 31.060190200805664]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 6.0, 11.0, 26.0, 47.0, 69.0, 76.0, 116.0, 112.0, 109.0, 115.0, 94.0, 69.0, 59.0, 33.0, 24.0, 21.0, 12.0, 2.0, 3.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-25.51825714111328, -25.00495719909668, -24.49165916442871, -23.97835922241211, -23.465059280395508, -22.95176124572754, -22.438461303710938, -21.925161361694336, -21.411863327026367, -20.898563385009766, -20.385265350341797, -19.871965408325195, -19.358665466308594, -18.845367431640625, -18.332067489624023, -17.818767547607422, -17.305469512939453, -16.79216957092285, -16.278871536254883, -15.765571594238281, -15.252272605895996, -14.738973617553711, -14.22567367553711, -13.712374687194824, -13.199073791503906, -12.685774803161621, -12.17247486114502, -11.659175872802734, -11.14587688446045, -10.632577896118164, -10.119277954101562, -9.605978965759277, -9.092679977416992, -8.579380989074707, -8.066081047058105, -7.55278205871582, -7.039483070373535, -6.526183605194092, -6.012884140014648, -5.499585151672363, -4.986285209655762, -4.472985744476318, -3.959686756134033, -3.44638729095459, -2.9330880641937256, -2.4197888374328613, -1.906489372253418, -1.3931901454925537, -0.8798909187316895, -0.3665916323661804, 0.1467076539993286, 0.6600069999694824, 1.1733062267303467, 1.686605453491211, 2.1999049186706543, 2.7132041454315186, 3.226503372192383, 3.739802598953247, 4.253101825714111, 4.766401290893555, 5.27970027923584, 5.792999744415283, 6.306299209594727, 6.819598197937012, 7.332897663116455]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 11.0, 12.0, 19.0, 43.0, 62.0, 104.0, 172.0, 456.0, 46070.0, 4146892.0, 333.0, 86.0, 25.0, 5.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-62.875, -61.713134765625, -60.55126953125, -59.389404296875, -58.2275390625, -57.065673828125, -55.90380859375, -54.741943359375, -53.580078125, -52.418212890625, -51.25634765625, -50.094482421875, -48.9326171875, -47.770751953125, -46.60888671875, -45.447021484375, -44.28515625, -43.123291015625, -41.96142578125, -40.799560546875, -39.6376953125, -38.475830078125, -37.31396484375, -36.152099609375, -34.990234375, -33.828369140625, -32.66650390625, -31.504638671875, -30.3427734375, -29.180908203125, -28.01904296875, -26.857177734375, -25.6953125, -24.533447265625, -23.37158203125, -22.209716796875, -21.0478515625, -19.885986328125, -18.72412109375, -17.562255859375, -16.400390625, -15.238525390625, -14.07666015625, -12.914794921875, -11.7529296875, -10.591064453125, -9.42919921875, -8.267333984375, -7.10546875, -5.943603515625, -4.78173828125, -3.619873046875, -2.4580078125, -1.296142578125, -0.13427734375, 1.027587890625, 2.189453125, 3.351318359375, 4.51318359375, 5.675048828125, 6.8369140625, 7.998779296875, 9.16064453125, 10.322509765625, 11.484375]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 9.0, 23.0, 106.0, 253.0, 330.0, 177.0, 87.0, 21.0, 5.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4853515625, -1.4580039978027344, -1.4306564331054688, -1.4033088684082031, -1.3759613037109375, -1.3486137390136719, -1.3212661743164062, -1.2939186096191406, -1.266571044921875, -1.2392234802246094, -1.2118759155273438, -1.1845283508300781, -1.1571807861328125, -1.1298332214355469, -1.1024856567382812, -1.0751380920410156, -1.04779052734375, -1.0204429626464844, -0.9930953979492188, -0.9657478332519531, -0.9384002685546875, -0.9110527038574219, -0.8837051391601562, -0.8563575744628906, -0.829010009765625, -0.8016624450683594, -0.7743148803710938, -0.7469673156738281, -0.7196197509765625, -0.6922721862792969, -0.6649246215820312, -0.6375770568847656, -0.6102294921875, -0.5828819274902344, -0.5555343627929688, -0.5281867980957031, -0.5008392333984375, -0.4734916687011719, -0.44614410400390625, -0.4187965393066406, -0.391448974609375, -0.3641014099121094, -0.33675384521484375, -0.3094062805175781, -0.2820587158203125, -0.2547111511230469, -0.22736358642578125, -0.20001602172851562, -0.17266845703125, -0.14532089233398438, -0.11797332763671875, -0.09062576293945312, -0.0632781982421875, -0.035930633544921875, -0.00858306884765625, 0.018764495849609375, 0.046112060546875, 0.07345962524414062, 0.10080718994140625, 0.12815475463867188, 0.1555023193359375, 0.18284988403320312, 0.21019744873046875, 0.23754501342773438, 0.264892578125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 5.0, 3.0, 9.0, 23.0, 37.0, 90.0, 151.0, 344.0, 8762.0, 4183929.0, 646.0, 162.0, 73.0, 34.0, 13.0, 8.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-41.75, -40.2158203125, -38.681640625, -37.1474609375, -35.61328125, -34.0791015625, -32.544921875, -31.0107421875, -29.4765625, -27.9423828125, -26.408203125, -24.8740234375, -23.33984375, -21.8056640625, -20.271484375, -18.7373046875, -17.203125, -15.6689453125, -14.134765625, -12.6005859375, -11.06640625, -9.5322265625, -7.998046875, -6.4638671875, -4.9296875, -3.3955078125, -1.861328125, -0.3271484375, 1.20703125, 2.7412109375, 4.275390625, 5.8095703125, 7.34375, 8.8779296875, 10.412109375, 11.9462890625, 13.48046875, 15.0146484375, 16.548828125, 18.0830078125, 19.6171875, 21.1513671875, 22.685546875, 24.2197265625, 25.75390625, 27.2880859375, 28.822265625, 30.3564453125, 31.890625, 33.4248046875, 34.958984375, 36.4931640625, 38.02734375, 39.5615234375, 41.095703125, 42.6298828125, 44.1640625, 45.6982421875, 47.232421875, 48.7666015625, 50.30078125, 51.8349609375, 53.369140625, 54.9033203125, 56.4375]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 10.0, 34.0, 333.0, 3464.0, 193.0, 35.0, 11.0, 6.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-6.125, -6.012298583984375, -5.89959716796875, -5.786895751953125, -5.6741943359375, -5.561492919921875, -5.44879150390625, -5.336090087890625, -5.223388671875, -5.110687255859375, -4.99798583984375, -4.885284423828125, -4.7725830078125, -4.659881591796875, -4.54718017578125, -4.434478759765625, -4.32177734375, -4.209075927734375, -4.09637451171875, -3.983673095703125, -3.8709716796875, -3.758270263671875, -3.64556884765625, -3.532867431640625, -3.420166015625, -3.307464599609375, -3.19476318359375, -3.082061767578125, -2.9693603515625, -2.856658935546875, -2.74395751953125, -2.631256103515625, -2.5185546875, -2.405853271484375, -2.29315185546875, -2.180450439453125, -2.0677490234375, -1.955047607421875, -1.84234619140625, -1.729644775390625, -1.616943359375, -1.504241943359375, -1.39154052734375, -1.278839111328125, -1.1661376953125, -1.053436279296875, -0.94073486328125, -0.828033447265625, -0.71533203125, -0.602630615234375, -0.48992919921875, -0.377227783203125, -0.2645263671875, -0.151824951171875, -0.03912353515625, 0.073577880859375, 0.186279296875, 0.298980712890625, 0.41168212890625, 0.524383544921875, 0.6370849609375, 0.749786376953125, 0.86248779296875, 0.975189208984375, 1.087890625]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 11.0, 40.0, 411.0, 465.0, 60.0, 13.0, 6.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-48.2281379699707, -47.28314971923828, -46.33816146850586, -45.39317321777344, -44.448184967041016, -43.503196716308594, -42.55820846557617, -41.61322021484375, -40.668235778808594, -39.72324752807617, -38.77825927734375, -37.83327102661133, -36.888282775878906, -35.943294525146484, -34.99830627441406, -34.053321838378906, -33.10832977294922, -32.1633415222168, -31.218353271484375, -30.273365020751953, -29.32837677001953, -28.38338851928711, -27.43840217590332, -26.4934139251709, -25.548425674438477, -24.603437423706055, -23.658449172973633, -22.71346092224121, -21.768474578857422, -20.823486328125, -19.878498077392578, -18.933509826660156, -17.988521575927734, -17.043533325195312, -16.09854507446289, -15.153557777404785, -14.208569526672363, -13.263581275939941, -12.318593978881836, -11.373605728149414, -10.428617477416992, -9.48362922668457, -8.538640975952148, -7.593653678894043, -6.648665428161621, -5.703677177429199, -4.7586894035339355, -3.813701629638672, -2.86871337890625, -1.9237253665924072, -0.9787373542785645, -0.03374934196472168, 0.9112386703491211, 1.856226921081543, 2.8012146949768066, 3.7462024688720703, 4.691190719604492, 5.636178970336914, 6.581166744232178, 7.526154518127441, 8.471142768859863, 9.416131019592285, 10.36111831665039, 11.306106567382812, 12.251094818115234]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 10.0, 9.0, 30.0, 39.0, 55.0, 103.0, 100.0, 143.0, 143.0, 124.0, 85.0, 70.0, 45.0, 22.0, 18.0, 5.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-15.258219718933105, -14.819241523742676, -14.380263328552246, -13.941285133361816, -13.502307891845703, -13.063329696655273, -12.624351501464844, -12.185373306274414, -11.746395111083984, -11.307416915893555, -10.868438720703125, -10.429460525512695, -9.990482330322266, -9.551504135131836, -9.112526893615723, -8.673548698425293, -8.234570503234863, -7.795592308044434, -7.356614112854004, -6.917636394500732, -6.478658199310303, -6.039680004119873, -5.600702285766602, -5.161724090576172, -4.722745895385742, -4.2837677001953125, -3.844789743423462, -3.4058117866516113, -2.9668335914611816, -2.527855396270752, -2.0888774394989014, -1.6498994827270508, -1.2109203338623047, -0.7719422578811646, -0.3329641819000244, 0.10601389408111572, 0.5449919700622559, 0.9839701652526855, 1.4229481220245361, 1.8619260787963867, 2.3009042739868164, 2.739882469177246, 3.1788604259490967, 3.6178383827209473, 4.056816577911377, 4.495794773101807, 4.934772491455078, 5.373750686645508, 5.8127288818359375, 6.251707077026367, 6.690685272216797, 7.129662990570068, 7.568641185760498, 8.007619857788086, 8.4465970993042, 8.885575294494629, 9.324553489685059, 9.763531684875488, 10.202509880065918, 10.641488075256348, 11.080465316772461, 11.51944351196289, 11.95842170715332, 12.39739990234375, 12.83637809753418]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 1.0, 4.0, 4.0, 6.0, 7.0, 9.0, 23.0, 18.0, 25.0, 31.0, 32.0, 67.0, 104.0, 202.0, 768.0, 5290.0, 208218.0, 818575.0, 13110.0, 1376.0, 312.0, 121.0, 70.0, 40.0, 34.0, 25.0, 21.0, 14.0, 14.0, 11.0, 10.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.9765625, -9.6458740234375, -9.315185546875, -8.9844970703125, -8.65380859375, -8.3231201171875, -7.992431640625, -7.6617431640625, -7.3310546875, -7.0003662109375, -6.669677734375, -6.3389892578125, -6.00830078125, -5.6776123046875, -5.346923828125, -5.0162353515625, -4.685546875, -4.3548583984375, -4.024169921875, -3.6934814453125, -3.36279296875, -3.0321044921875, -2.701416015625, -2.3707275390625, -2.0400390625, -1.7093505859375, -1.378662109375, -1.0479736328125, -0.71728515625, -0.3865966796875, -0.055908203125, 0.2747802734375, 0.60546875, 0.9361572265625, 1.266845703125, 1.5975341796875, 1.92822265625, 2.2589111328125, 2.589599609375, 2.9202880859375, 3.2509765625, 3.5816650390625, 3.912353515625, 4.2430419921875, 4.57373046875, 4.9044189453125, 5.235107421875, 5.5657958984375, 5.896484375, 6.2271728515625, 6.557861328125, 6.8885498046875, 7.21923828125, 7.5499267578125, 7.880615234375, 8.2113037109375, 8.5419921875, 8.8726806640625, 9.203369140625, 9.5340576171875, 9.86474609375, 10.1954345703125, 10.526123046875, 10.8568115234375, 11.1875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 11.0, 12.0, 24.0, 80.0, 129.0, 186.0, 213.0, 170.0, 103.0, 55.0, 17.0, 7.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.765625, -1.7297210693359375, -1.693817138671875, -1.6579132080078125, -1.62200927734375, -1.5861053466796875, -1.550201416015625, -1.5142974853515625, -1.4783935546875, -1.4424896240234375, -1.406585693359375, -1.3706817626953125, -1.33477783203125, -1.2988739013671875, -1.262969970703125, -1.2270660400390625, -1.191162109375, -1.1552581787109375, -1.119354248046875, -1.0834503173828125, -1.04754638671875, -1.0116424560546875, -0.975738525390625, -0.9398345947265625, -0.9039306640625, -0.8680267333984375, -0.832122802734375, -0.7962188720703125, -0.76031494140625, -0.7244110107421875, -0.688507080078125, -0.6526031494140625, -0.61669921875, -0.5807952880859375, -0.544891357421875, -0.5089874267578125, -0.47308349609375, -0.4371795654296875, -0.401275634765625, -0.3653717041015625, -0.3294677734375, -0.2935638427734375, -0.257659912109375, -0.2217559814453125, -0.18585205078125, -0.1499481201171875, -0.114044189453125, -0.0781402587890625, -0.042236328125, -0.0063323974609375, 0.029571533203125, 0.0654754638671875, 0.10137939453125, 0.1372833251953125, 0.173187255859375, 0.2090911865234375, 0.2449951171875, 0.2808990478515625, 0.316802978515625, 0.3527069091796875, 0.38861083984375, 0.4245147705078125, 0.460418701171875, 0.4963226318359375, 0.5322265625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 9.0, 12.0, 9.0, 18.0, 26.0, 32.0, 55.0, 66.0, 95.0, 173.0, 292.0, 515.0, 1134.0, 3181.0, 14872.0, 129260.0, 719751.0, 156305.0, 16786.0, 3510.0, 1164.0, 526.0, 273.0, 165.0, 118.0, 65.0, 43.0, 29.0, 23.0, 13.0, 8.0, 9.0, 6.0, 4.0, 5.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.775390625, -3.66607666015625, -3.5567626953125, -3.44744873046875, -3.338134765625, -3.22882080078125, -3.1195068359375, -3.01019287109375, -2.90087890625, -2.79156494140625, -2.6822509765625, -2.57293701171875, -2.463623046875, -2.35430908203125, -2.2449951171875, -2.13568115234375, -2.0263671875, -1.91705322265625, -1.8077392578125, -1.69842529296875, -1.589111328125, -1.47979736328125, -1.3704833984375, -1.26116943359375, -1.15185546875, -1.04254150390625, -0.9332275390625, -0.82391357421875, -0.714599609375, -0.60528564453125, -0.4959716796875, -0.38665771484375, -0.27734375, -0.16802978515625, -0.0587158203125, 0.05059814453125, 0.159912109375, 0.26922607421875, 0.3785400390625, 0.48785400390625, 0.59716796875, 0.70648193359375, 0.8157958984375, 0.92510986328125, 1.034423828125, 1.14373779296875, 1.2530517578125, 1.36236572265625, 1.4716796875, 1.58099365234375, 1.6903076171875, 1.79962158203125, 1.908935546875, 2.01824951171875, 2.1275634765625, 2.23687744140625, 2.34619140625, 2.45550537109375, 2.5648193359375, 2.67413330078125, 2.783447265625, 2.89276123046875, 3.0020751953125, 3.11138916015625, 3.220703125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 6.0, 1.0, 3.0, 10.0, 8.0, 6.0, 12.0, 16.0, 16.0, 25.0, 29.0, 31.0, 39.0, 37.0, 51.0, 55.0, 64.0, 61.0, 70.0, 63.0, 67.0, 42.0, 43.0, 44.0, 40.0, 29.0, 28.0, 20.0, 27.0, 13.0, 10.0, 7.0, 11.0, 7.0, 4.0, 1.0, 2.0, 2.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.8857421875, -1.8341064453125, -1.782470703125, -1.7308349609375, -1.67919921875, -1.6275634765625, -1.575927734375, -1.5242919921875, -1.47265625, -1.4210205078125, -1.369384765625, -1.3177490234375, -1.26611328125, -1.2144775390625, -1.162841796875, -1.1112060546875, -1.0595703125, -1.0079345703125, -0.956298828125, -0.9046630859375, -0.85302734375, -0.8013916015625, -0.749755859375, -0.6981201171875, -0.646484375, -0.5948486328125, -0.543212890625, -0.4915771484375, -0.43994140625, -0.3883056640625, -0.336669921875, -0.2850341796875, -0.2333984375, -0.1817626953125, -0.130126953125, -0.0784912109375, -0.02685546875, 0.0247802734375, 0.076416015625, 0.1280517578125, 0.1796875, 0.2313232421875, 0.282958984375, 0.3345947265625, 0.38623046875, 0.4378662109375, 0.489501953125, 0.5411376953125, 0.5927734375, 0.6444091796875, 0.696044921875, 0.7476806640625, 0.79931640625, 0.8509521484375, 0.902587890625, 0.9542236328125, 1.005859375, 1.0574951171875, 1.109130859375, 1.1607666015625, 1.21240234375, 1.2640380859375, 1.315673828125, 1.3673095703125, 1.4189453125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 5.0, 3.0, 3.0, 7.0, 17.0, 34.0, 40.0, 65.0, 166.0, 466.0, 2426.0, 63968.0, 967812.0, 11883.0, 1100.0, 290.0, 140.0, 57.0, 34.0, 19.0, 7.0, 5.0, 6.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.171875, -5.95379638671875, -5.7357177734375, -5.51763916015625, -5.299560546875, -5.08148193359375, -4.8634033203125, -4.64532470703125, -4.42724609375, -4.20916748046875, -3.9910888671875, -3.77301025390625, -3.554931640625, -3.33685302734375, -3.1187744140625, -2.90069580078125, -2.6826171875, -2.46453857421875, -2.2464599609375, -2.02838134765625, -1.810302734375, -1.59222412109375, -1.3741455078125, -1.15606689453125, -0.93798828125, -0.71990966796875, -0.5018310546875, -0.28375244140625, -0.065673828125, 0.15240478515625, 0.3704833984375, 0.58856201171875, 0.806640625, 1.02471923828125, 1.2427978515625, 1.46087646484375, 1.678955078125, 1.89703369140625, 2.1151123046875, 2.33319091796875, 2.55126953125, 2.76934814453125, 2.9874267578125, 3.20550537109375, 3.423583984375, 3.64166259765625, 3.8597412109375, 4.07781982421875, 4.2958984375, 4.51397705078125, 4.7320556640625, 4.95013427734375, 5.168212890625, 5.38629150390625, 5.6043701171875, 5.82244873046875, 6.04052734375, 6.25860595703125, 6.4766845703125, 6.69476318359375, 6.912841796875, 7.13092041015625, 7.3489990234375, 7.56707763671875, 7.78515625]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 5.0, 5.0, 3.0, 8.0, 16.0, 25.0, 58.0, 82.0, 159.0, 150.0, 185.0, 115.0, 70.0, 36.0, 22.0, 18.0, 11.0, 10.0, 6.0, 5.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00034737586975097656, -0.0003344342112541199, -0.0003214925527572632, -0.0003085508942604065, -0.0002956092357635498, -0.0002826675772666931, -0.0002697259187698364, -0.00025678426027297974, -0.00024384260177612305, -0.00023090094327926636, -0.00021795928478240967, -0.00020501762628555298, -0.0001920759677886963, -0.0001791343092918396, -0.0001661926507949829, -0.00015325099229812622, -0.00014030933380126953, -0.00012736767530441284, -0.00011442601680755615, -0.00010148435831069946, -8.854269981384277e-05, -7.560104131698608e-05, -6.26593828201294e-05, -4.9717724323272705e-05, -3.6776065826416016e-05, -2.3834407329559326e-05, -1.0892748832702637e-05, 2.0489096641540527e-06, 1.4990568161010742e-05, 2.793222665786743e-05, 4.087388515472412e-05, 5.381554365158081e-05, 6.67572021484375e-05, 7.969886064529419e-05, 9.264051914215088e-05, 0.00010558217763900757, 0.00011852383613586426, 0.00013146549463272095, 0.00014440715312957764, 0.00015734881162643433, 0.00017029047012329102, 0.0001832321286201477, 0.0001961737871170044, 0.00020911544561386108, 0.00022205710411071777, 0.00023499876260757446, 0.00024794042110443115, 0.00026088207960128784, 0.00027382373809814453, 0.0002867653965950012, 0.0002997070550918579, 0.0003126487135887146, 0.0003255903720855713, 0.000338532030582428, 0.00035147368907928467, 0.00036441534757614136, 0.00037735700607299805, 0.00039029866456985474, 0.0004032403230667114, 0.0004161819815635681, 0.0004291236400604248, 0.0004420652985572815, 0.0004550069570541382, 0.0004679486155509949, 0.00048089027404785156]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 3.0, 1.0, 3.0, 7.0, 6.0, 10.0, 13.0, 12.0, 18.0, 37.0, 75.0, 112.0, 233.0, 586.0, 1652.0, 6135.0, 47723.0, 891242.0, 89081.0, 8425.0, 2003.0, 630.0, 248.0, 122.0, 65.0, 45.0, 19.0, 15.0, 10.0, 10.0, 6.0, 8.0, 5.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.87109375, -4.74151611328125, -4.6119384765625, -4.48236083984375, -4.352783203125, -4.22320556640625, -4.0936279296875, -3.96405029296875, -3.83447265625, -3.70489501953125, -3.5753173828125, -3.44573974609375, -3.316162109375, -3.18658447265625, -3.0570068359375, -2.92742919921875, -2.7978515625, -2.66827392578125, -2.5386962890625, -2.40911865234375, -2.279541015625, -2.14996337890625, -2.0203857421875, -1.89080810546875, -1.76123046875, -1.63165283203125, -1.5020751953125, -1.37249755859375, -1.242919921875, -1.11334228515625, -0.9837646484375, -0.85418701171875, -0.724609375, -0.59503173828125, -0.4654541015625, -0.33587646484375, -0.206298828125, -0.07672119140625, 0.0528564453125, 0.18243408203125, 0.31201171875, 0.44158935546875, 0.5711669921875, 0.70074462890625, 0.830322265625, 0.95989990234375, 1.0894775390625, 1.21905517578125, 1.3486328125, 1.47821044921875, 1.6077880859375, 1.73736572265625, 1.866943359375, 1.99652099609375, 2.1260986328125, 2.25567626953125, 2.38525390625, 2.51483154296875, 2.6444091796875, 2.77398681640625, 2.903564453125, 3.03314208984375, 3.1627197265625, 3.29229736328125, 3.421875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 6.0, 7.0, 9.0, 16.0, 26.0, 47.0, 95.0, 130.0, 193.0, 173.0, 114.0, 68.0, 35.0, 26.0, 19.0, 12.0, 6.0, 5.0, 6.0, 1.0, 3.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.58203125, -2.5256805419921875, -2.469329833984375, -2.4129791259765625, -2.35662841796875, -2.3002777099609375, -2.243927001953125, -2.1875762939453125, -2.1312255859375, -2.0748748779296875, -2.018524169921875, -1.9621734619140625, -1.90582275390625, -1.8494720458984375, -1.793121337890625, -1.7367706298828125, -1.680419921875, -1.6240692138671875, -1.567718505859375, -1.5113677978515625, -1.45501708984375, -1.3986663818359375, -1.342315673828125, -1.2859649658203125, -1.2296142578125, -1.1732635498046875, -1.116912841796875, -1.0605621337890625, -1.00421142578125, -0.9478607177734375, -0.891510009765625, -0.8351593017578125, -0.77880859375, -0.7224578857421875, -0.666107177734375, -0.6097564697265625, -0.55340576171875, -0.4970550537109375, -0.440704345703125, -0.3843536376953125, -0.3280029296875, -0.2716522216796875, -0.215301513671875, -0.1589508056640625, -0.10260009765625, -0.0462493896484375, 0.010101318359375, 0.0664520263671875, 0.122802734375, 0.1791534423828125, 0.235504150390625, 0.2918548583984375, 0.34820556640625, 0.4045562744140625, 0.460906982421875, 0.5172576904296875, 0.5736083984375, 0.6299591064453125, 0.686309814453125, 0.7426605224609375, 0.79901123046875, 0.8553619384765625, 0.911712646484375, 0.9680633544921875, 1.0244140625]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 8.0, 17.0, 124.0, 517.0, 292.0, 45.0, 5.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.78473663330078, -39.19800567626953, -37.611270904541016, -36.024539947509766, -34.43780517578125, -32.85107421875, -31.264341354370117, -29.677608489990234, -28.09087562561035, -26.50414276123047, -24.917409896850586, -23.330677032470703, -21.743946075439453, -20.157211303710938, -18.570480346679688, -16.983747482299805, -15.397014617919922, -13.810281753540039, -12.223548889160156, -10.63681697845459, -9.050084114074707, -7.463351249694824, -5.876619338989258, -4.289886474609375, -2.703153610229492, -1.1164209842681885, 0.47031164169311523, 2.05704402923584, 3.6437768936157227, 5.2305097579956055, 6.817241668701172, 8.403974533081055, 9.990707397460938, 11.57744026184082, 13.164173126220703, 14.75090503692627, 16.33763885498047, 17.92436981201172, 19.5111026763916, 21.097835540771484, 22.684568405151367, 24.27130126953125, 25.858034133911133, 27.444766998291016, 29.031497955322266, 30.61823272705078, 32.20496368408203, 33.79169464111328, 35.3784294128418, 36.96516036987305, 38.55189514160156, 40.13862609863281, 41.72536087036133, 43.31209182739258, 44.898826599121094, 46.485557556152344, 48.072288513183594, 49.659019470214844, 51.24575424194336, 52.83248519897461, 54.419219970703125, 56.005950927734375, 57.592681884765625, 59.17941665649414, 60.766151428222656]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 4.0, 2.0, 1.0, 10.0, 17.0, 13.0, 24.0, 36.0, 20.0, 44.0, 44.0, 46.0, 43.0, 54.0, 57.0, 68.0, 46.0, 57.0, 59.0, 53.0, 55.0, 46.0, 43.0, 38.0, 29.0, 25.0, 12.0, 15.0, 14.0, 8.0, 7.0, 5.0, 5.0, 2.0, 1.0, 0.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-10.869195938110352, -10.525186538696289, -10.18117618560791, -9.837166786193848, -9.493157386779785, -9.149147033691406, -8.805137634277344, -8.461128234863281, -8.117118835449219, -7.773108959197998, -7.4290995597839355, -7.085089683532715, -6.741080284118652, -6.397070407867432, -6.053060531616211, -5.709051132202148, -5.3650407791137695, -5.021030902862549, -4.677021503448486, -4.333011627197266, -3.989001989364624, -3.6449923515319824, -3.3009824752807617, -2.95697283744812, -2.6129631996154785, -2.268953561782837, -1.9249438047409058, -1.5809340476989746, -1.236924409866333, -0.8929147720336914, -0.5489048957824707, -0.2048952579498291, 0.1391143798828125, 0.4831240773200989, 0.8271337747573853, 1.1711435317993164, 1.515153169631958, 1.8591628074645996, 2.2031726837158203, 2.547182321548462, 2.8911919593811035, 3.235201597213745, 3.5792112350463867, 3.9232211112976074, 4.267230987548828, 4.611240386962891, 4.955250263214111, 5.299260139465332, 5.6432695388793945, 5.987279415130615, 6.331288814544678, 6.675298690795898, 7.019308090209961, 7.363317966461182, 7.707327842712402, 8.051337242126465, 8.395347595214844, 8.739356994628906, 9.083367347717285, 9.427376747131348, 9.77138614654541, 10.115396499633789, 10.459405899047852, 10.803415298461914, 11.147424697875977]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 7.0, 2.0, 2.0, 2.0, 7.0, 3.0, 7.0, 15.0, 5.0, 17.0, 21.0, 13.0, 20.0, 12.0, 30.0, 39.0, 32.0, 38.0, 46.0, 67.0, 95.0, 181.0, 425.0, 1324.0, 5536.0, 51309.0, 3996843.0, 126989.0, 9217.0, 1495.0, 314.0, 99.0, 37.0, 14.0, 7.0, 6.0, 8.0, 2.0, 1.0, 1.0], "bins": [-14.296875, -14.014495849609375, -13.73211669921875, -13.449737548828125, -13.1673583984375, -12.884979248046875, -12.60260009765625, -12.320220947265625, -12.037841796875, -11.755462646484375, -11.47308349609375, -11.190704345703125, -10.9083251953125, -10.625946044921875, -10.34356689453125, -10.061187744140625, -9.77880859375, -9.496429443359375, -9.21405029296875, -8.931671142578125, -8.6492919921875, -8.366912841796875, -8.08453369140625, -7.802154541015625, -7.519775390625, -7.237396240234375, -6.95501708984375, -6.672637939453125, -6.3902587890625, -6.107879638671875, -5.82550048828125, -5.543121337890625, -5.2607421875, -4.978363037109375, -4.69598388671875, -4.413604736328125, -4.1312255859375, -3.848846435546875, -3.56646728515625, -3.284088134765625, -3.001708984375, -2.719329833984375, -2.43695068359375, -2.154571533203125, -1.8721923828125, -1.589813232421875, -1.30743408203125, -1.025054931640625, -0.74267578125, -0.460296630859375, -0.17791748046875, 0.104461669921875, 0.3868408203125, 0.669219970703125, 0.95159912109375, 1.233978271484375, 1.516357421875, 1.798736572265625, 2.08111572265625, 2.363494873046875, 2.6458740234375, 2.928253173828125, 3.21063232421875, 3.493011474609375, 3.775390625]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 5.0, 12.0, 18.0, 46.0, 81.0, 117.0, 169.0, 176.0, 157.0, 103.0, 71.0, 25.0, 21.0, 10.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.712890625, -1.6801109313964844, -1.6473312377929688, -1.6145515441894531, -1.5817718505859375, -1.5489921569824219, -1.5162124633789062, -1.4834327697753906, -1.450653076171875, -1.4178733825683594, -1.3850936889648438, -1.3523139953613281, -1.3195343017578125, -1.2867546081542969, -1.2539749145507812, -1.2211952209472656, -1.18841552734375, -1.1556358337402344, -1.1228561401367188, -1.0900764465332031, -1.0572967529296875, -1.0245170593261719, -0.9917373657226562, -0.9589576721191406, -0.926177978515625, -0.8933982849121094, -0.8606185913085938, -0.8278388977050781, -0.7950592041015625, -0.7622795104980469, -0.7294998168945312, -0.6967201232910156, -0.6639404296875, -0.6311607360839844, -0.5983810424804688, -0.5656013488769531, -0.5328216552734375, -0.5000419616699219, -0.46726226806640625, -0.4344825744628906, -0.401702880859375, -0.3689231872558594, -0.33614349365234375, -0.3033638000488281, -0.2705841064453125, -0.23780441284179688, -0.20502471923828125, -0.17224502563476562, -0.13946533203125, -0.10668563842773438, -0.07390594482421875, -0.041126251220703125, -0.0083465576171875, 0.024433135986328125, 0.05721282958984375, 0.08999252319335938, 0.122772216796875, 0.15555191040039062, 0.18833160400390625, 0.22111129760742188, 0.2538909912109375, 0.2866706848144531, 0.31945037841796875, 0.3522300720214844, 0.385009765625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 8.0, 6.0, 12.0, 25.0, 45.0, 191.0, 596.0, 3472.0, 3840929.0, 346626.0, 1897.0, 322.0, 93.0, 33.0, 11.0, 5.0, 5.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.875, -27.26806640625, -26.6611328125, -26.05419921875, -25.447265625, -24.84033203125, -24.2333984375, -23.62646484375, -23.01953125, -22.41259765625, -21.8056640625, -21.19873046875, -20.591796875, -19.98486328125, -19.3779296875, -18.77099609375, -18.1640625, -17.55712890625, -16.9501953125, -16.34326171875, -15.736328125, -15.12939453125, -14.5224609375, -13.91552734375, -13.30859375, -12.70166015625, -12.0947265625, -11.48779296875, -10.880859375, -10.27392578125, -9.6669921875, -9.06005859375, -8.453125, -7.84619140625, -7.2392578125, -6.63232421875, -6.025390625, -5.41845703125, -4.8115234375, -4.20458984375, -3.59765625, -2.99072265625, -2.3837890625, -1.77685546875, -1.169921875, -0.56298828125, 0.0439453125, 0.65087890625, 1.2578125, 1.86474609375, 2.4716796875, 3.07861328125, 3.685546875, 4.29248046875, 4.8994140625, 5.50634765625, 6.11328125, 6.72021484375, 7.3271484375, 7.93408203125, 8.541015625, 9.14794921875, 9.7548828125, 10.36181640625, 10.96875]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 5.0, 5.0, 18.0, 28.0, 34.0, 96.0, 271.0, 1788.0, 1441.0, 232.0, 86.0, 27.0, 16.0, 11.0, 6.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.55859375, -2.5063552856445312, -2.4541168212890625, -2.4018783569335938, -2.349639892578125, -2.2974014282226562, -2.2451629638671875, -2.1929244995117188, -2.14068603515625, -2.0884475708007812, -2.0362091064453125, -1.9839706420898438, -1.931732177734375, -1.8794937133789062, -1.8272552490234375, -1.7750167846679688, -1.7227783203125, -1.6705398559570312, -1.6183013916015625, -1.5660629272460938, -1.513824462890625, -1.4615859985351562, -1.4093475341796875, -1.3571090698242188, -1.30487060546875, -1.2526321411132812, -1.2003936767578125, -1.1481552124023438, -1.095916748046875, -1.0436782836914062, -0.9914398193359375, -0.9392013549804688, -0.886962890625, -0.8347244262695312, -0.7824859619140625, -0.7302474975585938, -0.678009033203125, -0.6257705688476562, -0.5735321044921875, -0.5212936401367188, -0.46905517578125, -0.41681671142578125, -0.3645782470703125, -0.31233978271484375, -0.260101318359375, -0.20786285400390625, -0.1556243896484375, -0.10338592529296875, -0.0511474609375, 0.00109100341796875, 0.0533294677734375, 0.10556793212890625, 0.157806396484375, 0.21004486083984375, 0.2622833251953125, 0.31452178955078125, 0.36676025390625, 0.41899871826171875, 0.4712371826171875, 0.5234756469726562, 0.575714111328125, 0.6279525756835938, 0.6801910400390625, 0.7324295043945312, 0.78466796875]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 7.0, 27.0, 100.0, 369.0, 388.0, 89.0, 12.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.176610946655273, -19.61725616455078, -19.057903289794922, -18.49854850769043, -17.93919563293457, -17.379840850830078, -16.82048797607422, -16.261133193969727, -15.701780319213867, -15.142426490783691, -14.583072662353516, -14.02371883392334, -13.464365005493164, -12.905011177062988, -12.345657348632812, -11.78630256652832, -11.226948738098145, -10.667594909667969, -10.108241081237793, -9.548887252807617, -8.989533424377441, -8.430179595947266, -7.870825290679932, -7.311471462249756, -6.75211763381958, -6.192763805389404, -5.6334099769592285, -5.0740556716918945, -4.514701843261719, -3.955348253250122, -3.395994186401367, -2.8366403579711914, -2.2772865295410156, -1.7179327011108398, -1.1585787534713745, -0.5992248058319092, -0.0398709774017334, 0.5194828510284424, 1.0788369178771973, 1.638190746307373, 2.197544574737549, 2.7568984031677246, 3.3162522315979004, 3.8756062984466553, 4.43496036529541, 4.994314193725586, 5.553668022155762, 6.1130218505859375, 6.672375679016113, 7.231729507446289, 7.791083335876465, 8.35043716430664, 8.909790992736816, 9.469144821166992, 10.028499603271484, 10.587852478027344, 11.147207260131836, 11.706561088562012, 12.265914916992188, 12.825268745422363, 13.384622573852539, 13.943976402282715, 14.50333023071289, 15.062685012817383, 15.622037887573242]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 15.0, 14.0, 10.0, 21.0, 25.0, 46.0, 54.0, 68.0, 71.0, 74.0, 73.0, 84.0, 78.0, 79.0, 80.0, 52.0, 46.0, 30.0, 27.0, 23.0, 10.0, 7.0, 6.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.320610046386719, -8.096750259399414, -7.872891426086426, -7.649032115936279, -7.425172805786133, -7.201313495635986, -6.97745418548584, -6.753594398498535, -6.529735565185547, -6.3058762550354, -6.082016944885254, -5.858157634735107, -5.634298324584961, -5.4104390144348145, -5.186579704284668, -4.962719917297363, -4.738860607147217, -4.51500129699707, -4.291141986846924, -4.067282676696777, -3.843423366546631, -3.6195640563964844, -3.395704507827759, -3.1718451976776123, -2.947985887527466, -2.7241265773773193, -2.500267267227173, -2.2764077186584473, -2.052548408508301, -1.8286892175674438, -1.6048297882080078, -1.3809704780578613, -1.157111644744873, -0.9332523345947266, -0.7093929648399353, -0.48553359508514404, -0.26167428493499756, -0.037814974784851074, 0.18604445457458496, 0.40990376472473145, 0.6337630748748779, 0.8576223850250244, 1.081481695175171, 1.305341124534607, 1.5292004346847534, 1.7530597448349, 1.976919174194336, 2.2007784843444824, 2.424637794494629, 2.6484971046447754, 2.872356414794922, 3.0962157249450684, 3.320075035095215, 3.5439343452453613, 3.767793893814087, 3.9916532039642334, 4.215512275695801, 4.439371585845947, 4.663230895996094, 4.88709020614624, 5.110949516296387, 5.334808826446533, 5.55866813659668, 5.782527923583984, 6.006387233734131]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 3.0, 3.0, 1.0, 5.0, 8.0, 17.0, 13.0, 19.0, 26.0, 39.0, 51.0, 84.0, 165.0, 281.0, 854.0, 3713.0, 34815.0, 696021.0, 294499.0, 14599.0, 2133.0, 593.0, 240.0, 122.0, 70.0, 49.0, 36.0, 33.0, 19.0, 15.0, 11.0, 8.0, 4.0, 4.0, 3.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.765625, -7.56036376953125, -7.3551025390625, -7.14984130859375, -6.944580078125, -6.73931884765625, -6.5340576171875, -6.32879638671875, -6.12353515625, -5.91827392578125, -5.7130126953125, -5.50775146484375, -5.302490234375, -5.09722900390625, -4.8919677734375, -4.68670654296875, -4.4814453125, -4.27618408203125, -4.0709228515625, -3.86566162109375, -3.660400390625, -3.45513916015625, -3.2498779296875, -3.04461669921875, -2.83935546875, -2.63409423828125, -2.4288330078125, -2.22357177734375, -2.018310546875, -1.81304931640625, -1.6077880859375, -1.40252685546875, -1.197265625, -0.99200439453125, -0.7867431640625, -0.58148193359375, -0.376220703125, -0.17095947265625, 0.0343017578125, 0.23956298828125, 0.44482421875, 0.65008544921875, 0.8553466796875, 1.06060791015625, 1.265869140625, 1.47113037109375, 1.6763916015625, 1.88165283203125, 2.0869140625, 2.29217529296875, 2.4974365234375, 2.70269775390625, 2.907958984375, 3.11322021484375, 3.3184814453125, 3.52374267578125, 3.72900390625, 3.93426513671875, 4.1395263671875, 4.34478759765625, 4.550048828125, 4.75531005859375, 4.9605712890625, 5.16583251953125, 5.37109375]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 9.0, 15.0, 22.0, 35.0, 62.0, 90.0, 106.0, 152.0, 132.0, 125.0, 100.0, 73.0, 35.0, 25.0, 12.0, 6.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5654296875, -1.5314102172851562, -1.4973907470703125, -1.4633712768554688, -1.429351806640625, -1.3953323364257812, -1.3613128662109375, -1.3272933959960938, -1.29327392578125, -1.2592544555664062, -1.2252349853515625, -1.1912155151367188, -1.157196044921875, -1.1231765747070312, -1.0891571044921875, -1.0551376342773438, -1.0211181640625, -0.9870986938476562, -0.9530792236328125, -0.9190597534179688, -0.885040283203125, -0.8510208129882812, -0.8170013427734375, -0.7829818725585938, -0.74896240234375, -0.7149429321289062, -0.6809234619140625, -0.6469039916992188, -0.612884521484375, -0.5788650512695312, -0.5448455810546875, -0.5108261108398438, -0.476806640625, -0.44278717041015625, -0.4087677001953125, -0.37474822998046875, -0.340728759765625, -0.30670928955078125, -0.2726898193359375, -0.23867034912109375, -0.20465087890625, -0.17063140869140625, -0.1366119384765625, -0.10259246826171875, -0.068572998046875, -0.03455352783203125, -0.0005340576171875, 0.03348541259765625, 0.0675048828125, 0.10152435302734375, 0.1355438232421875, 0.16956329345703125, 0.203582763671875, 0.23760223388671875, 0.2716217041015625, 0.30564117431640625, 0.33966064453125, 0.37368011474609375, 0.4076995849609375, 0.44171905517578125, 0.475738525390625, 0.5097579956054688, 0.5437774658203125, 0.5777969360351562, 0.61181640625]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 5.0, 1.0, 4.0, 6.0, 10.0, 10.0, 12.0, 26.0, 24.0, 42.0, 52.0, 59.0, 92.0, 171.0, 231.0, 439.0, 823.0, 2062.0, 5877.0, 21159.0, 104115.0, 494979.0, 337172.0, 60286.0, 13615.0, 4047.0, 1497.0, 689.0, 353.0, 216.0, 128.0, 116.0, 58.0, 43.0, 29.0, 29.0, 19.0, 12.0, 11.0, 6.0, 6.0, 6.0, 4.0, 2.0, 6.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0], "bins": [-2.517578125, -2.435943603515625, -2.35430908203125, -2.272674560546875, -2.1910400390625, -2.109405517578125, -2.02777099609375, -1.946136474609375, -1.864501953125, -1.782867431640625, -1.70123291015625, -1.619598388671875, -1.5379638671875, -1.456329345703125, -1.37469482421875, -1.293060302734375, -1.21142578125, -1.129791259765625, -1.04815673828125, -0.966522216796875, -0.8848876953125, -0.803253173828125, -0.72161865234375, -0.639984130859375, -0.558349609375, -0.476715087890625, -0.39508056640625, -0.313446044921875, -0.2318115234375, -0.150177001953125, -0.06854248046875, 0.013092041015625, 0.0947265625, 0.176361083984375, 0.25799560546875, 0.339630126953125, 0.4212646484375, 0.502899169921875, 0.58453369140625, 0.666168212890625, 0.747802734375, 0.829437255859375, 0.91107177734375, 0.992706298828125, 1.0743408203125, 1.155975341796875, 1.23760986328125, 1.319244384765625, 1.40087890625, 1.482513427734375, 1.56414794921875, 1.645782470703125, 1.7274169921875, 1.809051513671875, 1.89068603515625, 1.972320556640625, 2.053955078125, 2.135589599609375, 2.21722412109375, 2.298858642578125, 2.3804931640625, 2.462127685546875, 2.54376220703125, 2.625396728515625, 2.70703125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 8.0, 8.0, 12.0, 7.0, 13.0, 9.0, 9.0, 17.0, 22.0, 27.0, 24.0, 46.0, 42.0, 55.0, 40.0, 62.0, 60.0, 72.0, 56.0, 57.0, 55.0, 56.0, 41.0, 29.0, 30.0, 28.0, 24.0, 16.0, 16.0, 13.0, 15.0, 14.0, 6.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8994140625, -1.8348846435546875, -1.770355224609375, -1.7058258056640625, -1.64129638671875, -1.5767669677734375, -1.512237548828125, -1.4477081298828125, -1.3831787109375, -1.3186492919921875, -1.254119873046875, -1.1895904541015625, -1.12506103515625, -1.0605316162109375, -0.996002197265625, -0.9314727783203125, -0.866943359375, -0.8024139404296875, -0.737884521484375, -0.6733551025390625, -0.60882568359375, -0.5442962646484375, -0.479766845703125, -0.4152374267578125, -0.3507080078125, -0.2861785888671875, -0.221649169921875, -0.1571197509765625, -0.09259033203125, -0.0280609130859375, 0.036468505859375, 0.1009979248046875, 0.16552734375, 0.2300567626953125, 0.294586181640625, 0.3591156005859375, 0.42364501953125, 0.4881744384765625, 0.552703857421875, 0.6172332763671875, 0.6817626953125, 0.7462921142578125, 0.810821533203125, 0.8753509521484375, 0.93988037109375, 1.0044097900390625, 1.068939208984375, 1.1334686279296875, 1.197998046875, 1.2625274658203125, 1.327056884765625, 1.3915863037109375, 1.45611572265625, 1.5206451416015625, 1.585174560546875, 1.6497039794921875, 1.7142333984375, 1.7787628173828125, 1.843292236328125, 1.9078216552734375, 1.97235107421875, 2.0368804931640625, 2.101409912109375, 2.1659393310546875, 2.23046875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 4.0, 1.0, 4.0, 2.0, 1.0, 7.0, 7.0, 14.0, 8.0, 21.0, 26.0, 53.0, 82.0, 155.0, 339.0, 879.0, 2682.0, 9913.0, 64032.0, 661177.0, 274849.0, 26316.0, 5317.0, 1581.0, 524.0, 274.0, 108.0, 64.0, 39.0, 24.0, 11.0, 9.0, 10.0, 6.0, 9.0, 3.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.70703125, -1.6533203125, -1.599609375, -1.5458984375, -1.4921875, -1.4384765625, -1.384765625, -1.3310546875, -1.27734375, -1.2236328125, -1.169921875, -1.1162109375, -1.0625, -1.0087890625, -0.955078125, -0.9013671875, -0.84765625, -0.7939453125, -0.740234375, -0.6865234375, -0.6328125, -0.5791015625, -0.525390625, -0.4716796875, -0.41796875, -0.3642578125, -0.310546875, -0.2568359375, -0.203125, -0.1494140625, -0.095703125, -0.0419921875, 0.01171875, 0.0654296875, 0.119140625, 0.1728515625, 0.2265625, 0.2802734375, 0.333984375, 0.3876953125, 0.44140625, 0.4951171875, 0.548828125, 0.6025390625, 0.65625, 0.7099609375, 0.763671875, 0.8173828125, 0.87109375, 0.9248046875, 0.978515625, 1.0322265625, 1.0859375, 1.1396484375, 1.193359375, 1.2470703125, 1.30078125, 1.3544921875, 1.408203125, 1.4619140625, 1.515625, 1.5693359375, 1.623046875, 1.6767578125, 1.73046875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 6.0, 1.0, 6.0, 8.0, 5.0, 22.0, 14.0, 26.0, 41.0, 50.0, 75.0, 104.0, 94.0, 97.0, 101.0, 84.0, 70.0, 48.0, 47.0, 40.0, 22.0, 19.0, 7.0, 5.0, 9.0, 3.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002434253692626953, -0.00023712962865829468, -0.00023083388805389404, -0.0002245381474494934, -0.00021824240684509277, -0.00021194666624069214, -0.0002056509256362915, -0.00019935518503189087, -0.00019305944442749023, -0.0001867637038230896, -0.00018046796321868896, -0.00017417222261428833, -0.0001678764820098877, -0.00016158074140548706, -0.00015528500080108643, -0.0001489892601966858, -0.00014269351959228516, -0.00013639777898788452, -0.0001301020383834839, -0.00012380629777908325, -0.00011751055717468262, -0.00011121481657028198, -0.00010491907596588135, -9.862333536148071e-05, -9.232759475708008e-05, -8.603185415267944e-05, -7.973611354827881e-05, -7.344037294387817e-05, -6.714463233947754e-05, -6.0848891735076904e-05, -5.455315113067627e-05, -4.8257410526275635e-05, -4.1961669921875e-05, -3.5665929317474365e-05, -2.937018871307373e-05, -2.3074448108673096e-05, -1.677870750427246e-05, -1.0482966899871826e-05, -4.187226295471191e-06, 2.1085143089294434e-06, 8.404254913330078e-06, 1.4699995517730713e-05, 2.0995736122131348e-05, 2.7291476726531982e-05, 3.358721733093262e-05, 3.988295793533325e-05, 4.617869853973389e-05, 5.247443914413452e-05, 5.8770179748535156e-05, 6.506592035293579e-05, 7.136166095733643e-05, 7.765740156173706e-05, 8.39531421661377e-05, 9.024888277053833e-05, 9.654462337493896e-05, 0.0001028403639793396, 0.00010913610458374023, 0.00011543184518814087, 0.0001217275857925415, 0.00012802332639694214, 0.00013431906700134277, 0.0001406148076057434, 0.00014691054821014404, 0.00015320628881454468, 0.0001595020294189453]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 3.0, 9.0, 4.0, 10.0, 19.0, 18.0, 23.0, 41.0, 48.0, 74.0, 146.0, 251.0, 437.0, 856.0, 1770.0, 4112.0, 12147.0, 53046.0, 532934.0, 386768.0, 39328.0, 9640.0, 3497.0, 1583.0, 792.0, 376.0, 232.0, 123.0, 66.0, 52.0, 34.0, 28.0, 16.0, 17.0, 9.0, 8.0, 9.0, 4.0, 3.0, 3.0, 5.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-1.6142578125, -1.565216064453125, -1.51617431640625, -1.467132568359375, -1.4180908203125, -1.369049072265625, -1.32000732421875, -1.270965576171875, -1.221923828125, -1.172882080078125, -1.12384033203125, -1.074798583984375, -1.0257568359375, -0.976715087890625, -0.92767333984375, -0.878631591796875, -0.82958984375, -0.780548095703125, -0.73150634765625, -0.682464599609375, -0.6334228515625, -0.584381103515625, -0.53533935546875, -0.486297607421875, -0.437255859375, -0.388214111328125, -0.33917236328125, -0.290130615234375, -0.2410888671875, -0.192047119140625, -0.14300537109375, -0.093963623046875, -0.044921875, 0.004119873046875, 0.05316162109375, 0.102203369140625, 0.1512451171875, 0.200286865234375, 0.24932861328125, 0.298370361328125, 0.347412109375, 0.396453857421875, 0.44549560546875, 0.494537353515625, 0.5435791015625, 0.592620849609375, 0.64166259765625, 0.690704345703125, 0.73974609375, 0.788787841796875, 0.83782958984375, 0.886871337890625, 0.9359130859375, 0.984954833984375, 1.03399658203125, 1.083038330078125, 1.132080078125, 1.181121826171875, 1.23016357421875, 1.279205322265625, 1.3282470703125, 1.377288818359375, 1.42633056640625, 1.475372314453125, 1.5244140625]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 4.0, 8.0, 9.0, 18.0, 34.0, 49.0, 79.0, 108.0, 128.0, 164.0, 118.0, 103.0, 61.0, 41.0, 18.0, 16.0, 11.0, 8.0, 8.0, 4.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-1.4150390625, -1.377227783203125, -1.33941650390625, -1.301605224609375, -1.2637939453125, -1.225982666015625, -1.18817138671875, -1.150360107421875, -1.112548828125, -1.074737548828125, -1.03692626953125, -0.999114990234375, -0.9613037109375, -0.923492431640625, -0.88568115234375, -0.847869873046875, -0.81005859375, -0.772247314453125, -0.73443603515625, -0.696624755859375, -0.6588134765625, -0.621002197265625, -0.58319091796875, -0.545379638671875, -0.507568359375, -0.469757080078125, -0.43194580078125, -0.394134521484375, -0.3563232421875, -0.318511962890625, -0.28070068359375, -0.242889404296875, -0.205078125, -0.167266845703125, -0.12945556640625, -0.091644287109375, -0.0538330078125, -0.016021728515625, 0.02178955078125, 0.059600830078125, 0.097412109375, 0.135223388671875, 0.17303466796875, 0.210845947265625, 0.2486572265625, 0.286468505859375, 0.32427978515625, 0.362091064453125, 0.39990234375, 0.437713623046875, 0.47552490234375, 0.513336181640625, 0.5511474609375, 0.588958740234375, 0.62677001953125, 0.664581298828125, 0.702392578125, 0.740203857421875, 0.77801513671875, 0.815826416015625, 0.8536376953125, 0.891448974609375, 0.92926025390625, 0.967071533203125, 1.0048828125]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 13.0, 44.0, 88.0, 177.0, 241.0, 200.0, 120.0, 53.0, 28.0, 10.0, 8.0, 3.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-21.825170516967773, -21.15288734436035, -20.48060417175293, -19.808319091796875, -19.136035919189453, -18.46375274658203, -17.79146957397461, -17.119186401367188, -16.446903228759766, -15.774620056152344, -15.102335929870605, -14.430052757263184, -13.757768630981445, -13.085485458374023, -12.413202285766602, -11.74091911315918, -11.068634033203125, -10.396350860595703, -9.724066734313965, -9.051783561706543, -8.379499435424805, -7.707216262817383, -7.034933090209961, -6.362649440765381, -5.690365791320801, -5.018082141876221, -4.345798492431641, -3.6735153198242188, -3.0012316703796387, -2.3289480209350586, -1.6566648483276367, -0.9843811988830566, -0.3120994567871094, 0.36018407344818115, 1.0324676036834717, 1.7047510147094727, 2.3770346641540527, 3.049318313598633, 3.7216014862060547, 4.393885135650635, 5.066168785095215, 5.738452434539795, 6.410736083984375, 7.083019256591797, 7.755302906036377, 8.427586555480957, 9.099869728088379, 9.772153854370117, 10.444437026977539, 11.116720199584961, 11.7890043258667, 12.461287498474121, 13.13357162475586, 13.805854797363281, 14.478137969970703, 15.150421142578125, 15.822705268859863, 16.4949893951416, 17.167272567749023, 17.839555740356445, 18.511838912963867, 19.184123992919922, 19.856407165527344, 20.528690338134766, 21.200973510742188]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 1.0, 7.0, 4.0, 6.0, 6.0, 7.0, 13.0, 18.0, 25.0, 32.0, 25.0, 34.0, 28.0, 33.0, 36.0, 44.0, 46.0, 48.0, 59.0, 43.0, 42.0, 52.0, 49.0, 39.0, 43.0, 44.0, 33.0, 26.0, 21.0, 24.0, 28.0, 19.0, 10.0, 16.0, 15.0, 10.0, 7.0, 3.0, 5.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-11.449064254760742, -11.130510330200195, -10.811956405639648, -10.493403434753418, -10.174849510192871, -9.856295585632324, -9.537741661071777, -9.219188690185547, -8.900634765625, -8.582080841064453, -8.263526916503906, -7.944973468780518, -7.626420021057129, -7.307866096496582, -6.989312171936035, -6.6707587242126465, -6.3522047996521, -6.033650875091553, -5.715097427368164, -5.396543502807617, -5.0779900550842285, -4.759436130523682, -4.440882682800293, -4.122328758239746, -3.8037750720977783, -3.4852213859558105, -3.1666676998138428, -2.848114013671875, -2.529560089111328, -2.2110066413879395, -1.8924527168273926, -1.5738990306854248, -1.255345344543457, -0.9367916584014893, -0.6182379126548767, -0.29968416690826416, 0.018869519233703613, 0.3374232053756714, 0.6559770107269287, 0.9745306968688965, 1.2930843830108643, 1.611638069152832, 1.9301917552947998, 2.2487454414367676, 2.5672993659973145, 2.885852813720703, 3.20440673828125, 3.5229604244232178, 3.8415141105651855, 4.160068035125732, 4.478621482849121, 4.797175407409668, 5.115728855133057, 5.4342827796936035, 5.752836227416992, 6.071390151977539, 6.389944076538086, 6.708498001098633, 7.0270514488220215, 7.345605373382568, 7.664158821105957, 7.982712745666504, 8.30126667022705, 8.619819641113281, 8.938373565673828]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, 7.0, 4.0, 13.0, 7.0, 9.0, 8.0, 17.0, 17.0, 25.0, 15.0, 35.0, 36.0, 39.0, 79.0, 137.0, 182.0, 310.0, 680.0, 1722.0, 6498.0, 37899.0, 3642669.0, 473459.0, 23714.0, 4513.0, 1285.0, 490.0, 206.0, 92.0, 35.0, 29.0, 11.0, 15.0, 10.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-8.171875, -7.985626220703125, -7.79937744140625, -7.613128662109375, -7.4268798828125, -7.240631103515625, -7.05438232421875, -6.868133544921875, -6.681884765625, -6.495635986328125, -6.30938720703125, -6.123138427734375, -5.9368896484375, -5.750640869140625, -5.56439208984375, -5.378143310546875, -5.19189453125, -5.005645751953125, -4.81939697265625, -4.633148193359375, -4.4468994140625, -4.260650634765625, -4.07440185546875, -3.888153076171875, -3.701904296875, -3.515655517578125, -3.32940673828125, -3.143157958984375, -2.9569091796875, -2.770660400390625, -2.58441162109375, -2.398162841796875, -2.2119140625, -2.025665283203125, -1.83941650390625, -1.653167724609375, -1.4669189453125, -1.280670166015625, -1.09442138671875, -0.908172607421875, -0.721923828125, -0.535675048828125, -0.34942626953125, -0.163177490234375, 0.0230712890625, 0.209320068359375, 0.39556884765625, 0.581817626953125, 0.76806640625, 0.954315185546875, 1.14056396484375, 1.326812744140625, 1.5130615234375, 1.699310302734375, 1.88555908203125, 2.071807861328125, 2.258056640625, 2.444305419921875, 2.63055419921875, 2.816802978515625, 3.0030517578125, 3.189300537109375, 3.37554931640625, 3.561798095703125, 3.748046875]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 5.0, 5.0, 6.0, 9.0, 20.0, 31.0, 42.0, 76.0, 95.0, 107.0, 127.0, 124.0, 106.0, 86.0, 58.0, 44.0, 41.0, 9.0, 9.0, 7.0, 2.0, 1.0, 0.0, 2.0, 5.0, 1.0], "bins": [-1.5869140625, -1.5543861389160156, -1.5218582153320312, -1.4893302917480469, -1.4568023681640625, -1.4242744445800781, -1.3917465209960938, -1.3592185974121094, -1.326690673828125, -1.2941627502441406, -1.2616348266601562, -1.2291069030761719, -1.1965789794921875, -1.1640510559082031, -1.1315231323242188, -1.0989952087402344, -1.06646728515625, -1.0339393615722656, -1.0014114379882812, -0.9688835144042969, -0.9363555908203125, -0.9038276672363281, -0.8712997436523438, -0.8387718200683594, -0.806243896484375, -0.7737159729003906, -0.7411880493164062, -0.7086601257324219, -0.6761322021484375, -0.6436042785644531, -0.6110763549804688, -0.5785484313964844, -0.5460205078125, -0.5134925842285156, -0.48096466064453125, -0.4484367370605469, -0.4159088134765625, -0.3833808898925781, -0.35085296630859375, -0.3183250427246094, -0.285797119140625, -0.2532691955566406, -0.22074127197265625, -0.18821334838867188, -0.1556854248046875, -0.12315750122070312, -0.09062957763671875, -0.058101654052734375, -0.02557373046875, 0.006954193115234375, 0.03948211669921875, 0.07201004028320312, 0.1045379638671875, 0.13706588745117188, 0.16959381103515625, 0.20212173461914062, 0.234649658203125, 0.2671775817871094, 0.29970550537109375, 0.3322334289550781, 0.3647613525390625, 0.3972892761230469, 0.42981719970703125, 0.4623451232910156, 0.494873046875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 12.0, 16.0, 32.0, 33.0, 78.0, 92.0, 167.0, 594.0, 4297.0, 4128943.0, 58235.0, 1247.0, 285.0, 106.0, 51.0, 43.0, 18.0, 12.0, 7.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.96875, -18.4383544921875, -17.907958984375, -17.3775634765625, -16.84716796875, -16.3167724609375, -15.786376953125, -15.2559814453125, -14.7255859375, -14.1951904296875, -13.664794921875, -13.1343994140625, -12.60400390625, -12.0736083984375, -11.543212890625, -11.0128173828125, -10.482421875, -9.9520263671875, -9.421630859375, -8.8912353515625, -8.36083984375, -7.8304443359375, -7.300048828125, -6.7696533203125, -6.2392578125, -5.7088623046875, -5.178466796875, -4.6480712890625, -4.11767578125, -3.5872802734375, -3.056884765625, -2.5264892578125, -1.99609375, -1.4656982421875, -0.935302734375, -0.4049072265625, 0.12548828125, 0.6558837890625, 1.186279296875, 1.7166748046875, 2.2470703125, 2.7774658203125, 3.307861328125, 3.8382568359375, 4.36865234375, 4.8990478515625, 5.429443359375, 5.9598388671875, 6.490234375, 7.0206298828125, 7.551025390625, 8.0814208984375, 8.61181640625, 9.1422119140625, 9.672607421875, 10.2030029296875, 10.7333984375, 11.2637939453125, 11.794189453125, 12.3245849609375, 12.85498046875, 13.3853759765625, 13.915771484375, 14.4461669921875, 14.9765625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 3.0, 9.0, 48.0, 146.0, 2306.0, 1412.0, 109.0, 23.0, 13.0, 9.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.21484375, -4.1231231689453125, -4.031402587890625, -3.9396820068359375, -3.84796142578125, -3.7562408447265625, -3.664520263671875, -3.5727996826171875, -3.4810791015625, -3.3893585205078125, -3.297637939453125, -3.2059173583984375, -3.11419677734375, -3.0224761962890625, -2.930755615234375, -2.8390350341796875, -2.747314453125, -2.6555938720703125, -2.563873291015625, -2.4721527099609375, -2.38043212890625, -2.2887115478515625, -2.196990966796875, -2.1052703857421875, -2.0135498046875, -1.9218292236328125, -1.830108642578125, -1.7383880615234375, -1.64666748046875, -1.5549468994140625, -1.463226318359375, -1.3715057373046875, -1.27978515625, -1.1880645751953125, -1.096343994140625, -1.0046234130859375, -0.91290283203125, -0.8211822509765625, -0.729461669921875, -0.6377410888671875, -0.5460205078125, -0.4542999267578125, -0.362579345703125, -0.2708587646484375, -0.17913818359375, -0.0874176025390625, 0.004302978515625, 0.0960235595703125, 0.187744140625, 0.2794647216796875, 0.371185302734375, 0.4629058837890625, 0.55462646484375, 0.6463470458984375, 0.738067626953125, 0.8297882080078125, 0.9215087890625, 1.0132293701171875, 1.104949951171875, 1.1966705322265625, 1.28839111328125, 1.3801116943359375, 1.471832275390625, 1.5635528564453125, 1.6552734375]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 3.0, 16.0, 48.0, 92.0, 163.0, 222.0, 221.0, 116.0, 54.0, 24.0, 14.0, 10.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.490737915039062, -11.239913940429688, -10.989089965820312, -10.738265991210938, -10.487442016601562, -10.236618041992188, -9.985794067382812, -9.734970092773438, -9.484146118164062, -9.233322143554688, -8.982498168945312, -8.731674194335938, -8.480850219726562, -8.230026245117188, -7.9792022705078125, -7.7283782958984375, -7.4775543212890625, -7.2267303466796875, -6.9759063720703125, -6.7250823974609375, -6.4742584228515625, -6.2234344482421875, -5.9726104736328125, -5.7217864990234375, -5.4709625244140625, -5.2201385498046875, -4.9693145751953125, -4.7184906005859375, -4.4676666259765625, -4.2168426513671875, -3.9660186767578125, -3.7151947021484375, -3.4643712043762207, -3.2135472297668457, -2.9627232551574707, -2.7118992805480957, -2.4610753059387207, -2.2102513313293457, -1.9594273567199707, -1.7086033821105957, -1.4577794075012207, -1.2069554328918457, -0.9561314582824707, -0.7053074836730957, -0.4544835090637207, -0.2036595344543457, 0.0471644401550293, 0.2979884147644043, 0.5488123893737793, 0.7996363639831543, 1.0504603385925293, 1.3012843132019043, 1.5521082878112793, 1.8029322624206543, 2.0537562370300293, 2.3045802116394043, 2.5554041862487793, 2.8062281608581543, 3.0570521354675293, 3.3078761100769043, 3.5587000846862793, 3.8095240592956543, 4.060348033905029, 4.311172008514404, 4.561995983123779]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 6.0, 11.0, 16.0, 19.0, 18.0, 33.0, 53.0, 68.0, 77.0, 70.0, 98.0, 91.0, 94.0, 93.0, 82.0, 53.0, 38.0, 31.0, 21.0, 14.0, 9.0, 12.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.294525146484375, -6.098509311676025, -5.902493000030518, -5.706477165222168, -5.510461330413818, -5.314445495605469, -5.118429183959961, -4.922413349151611, -4.726397514343262, -4.530381679534912, -4.334365367889404, -4.138349533081055, -3.942333698272705, -3.7463176250457764, -3.5503015518188477, -3.354285717010498, -3.1582694053649902, -2.9622533321380615, -2.766237497329712, -2.570221424102783, -2.3742055892944336, -2.178189516067505, -1.9821734428405762, -1.786157488822937, -1.5901415348052979, -1.3941255807876587, -1.1981096267700195, -1.0020935535430908, -0.8060775995254517, -0.6100616455078125, -0.4140455722808838, -0.21802961826324463, -0.022013187408447266, 0.17400279641151428, 0.37001878023147583, 0.5660347938537598, 0.7620507478713989, 0.9580667018890381, 1.1540827751159668, 1.350098729133606, 1.5461146831512451, 1.7421306371688843, 1.9381465911865234, 2.134162664413452, 2.330178737640381, 2.5261945724487305, 2.722210645675659, 2.918226718902588, 3.1142425537109375, 3.310258626937866, 3.506274461746216, 3.7022905349731445, 3.898306369781494, 4.094322204589844, 4.290338516235352, 4.486354351043701, 4.682370185852051, 4.8783860206604, 5.074402332305908, 5.270418167114258, 5.466434001922607, 5.662449836730957, 5.858466148376465, 6.0544819831848145, 6.250498294830322]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 5.0, 6.0, 14.0, 13.0, 28.0, 27.0, 38.0, 87.0, 164.0, 282.0, 762.0, 2514.0, 15368.0, 355346.0, 642420.0, 26371.0, 3350.0, 1007.0, 342.0, 158.0, 89.0, 51.0, 33.0, 28.0, 14.0, 11.0, 8.0, 7.0, 8.0, 5.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.40625, -6.169921875, -5.93359375, -5.697265625, -5.4609375, -5.224609375, -4.98828125, -4.751953125, -4.515625, -4.279296875, -4.04296875, -3.806640625, -3.5703125, -3.333984375, -3.09765625, -2.861328125, -2.625, -2.388671875, -2.15234375, -1.916015625, -1.6796875, -1.443359375, -1.20703125, -0.970703125, -0.734375, -0.498046875, -0.26171875, -0.025390625, 0.2109375, 0.447265625, 0.68359375, 0.919921875, 1.15625, 1.392578125, 1.62890625, 1.865234375, 2.1015625, 2.337890625, 2.57421875, 2.810546875, 3.046875, 3.283203125, 3.51953125, 3.755859375, 3.9921875, 4.228515625, 4.46484375, 4.701171875, 4.9375, 5.173828125, 5.41015625, 5.646484375, 5.8828125, 6.119140625, 6.35546875, 6.591796875, 6.828125, 7.064453125, 7.30078125, 7.537109375, 7.7734375, 8.009765625, 8.24609375, 8.482421875, 8.71875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 3.0, 3.0, 7.0, 18.0, 27.0, 45.0, 56.0, 75.0, 108.0, 90.0, 122.0, 111.0, 106.0, 62.0, 57.0, 39.0, 39.0, 17.0, 5.0, 6.0, 5.0, 0.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-1.572265625, -1.5384063720703125, -1.504547119140625, -1.4706878662109375, -1.43682861328125, -1.4029693603515625, -1.369110107421875, -1.3352508544921875, -1.3013916015625, -1.2675323486328125, -1.233673095703125, -1.1998138427734375, -1.16595458984375, -1.1320953369140625, -1.098236083984375, -1.0643768310546875, -1.030517578125, -0.9966583251953125, -0.962799072265625, -0.9289398193359375, -0.89508056640625, -0.8612213134765625, -0.827362060546875, -0.7935028076171875, -0.7596435546875, -0.7257843017578125, -0.691925048828125, -0.6580657958984375, -0.62420654296875, -0.5903472900390625, -0.556488037109375, -0.5226287841796875, -0.48876953125, -0.4549102783203125, -0.421051025390625, -0.3871917724609375, -0.35333251953125, -0.3194732666015625, -0.285614013671875, -0.2517547607421875, -0.2178955078125, -0.1840362548828125, -0.150177001953125, -0.1163177490234375, -0.08245849609375, -0.0485992431640625, -0.014739990234375, 0.0191192626953125, 0.052978515625, 0.0868377685546875, 0.120697021484375, 0.1545562744140625, 0.18841552734375, 0.2222747802734375, 0.256134033203125, 0.2899932861328125, 0.3238525390625, 0.3577117919921875, 0.391571044921875, 0.4254302978515625, 0.45928955078125, 0.4931488037109375, 0.527008056640625, 0.5608673095703125, 0.5947265625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 5.0, 4.0, 5.0, 8.0, 16.0, 21.0, 29.0, 42.0, 68.0, 105.0, 232.0, 421.0, 1087.0, 3324.0, 16299.0, 169329.0, 745556.0, 96431.0, 11273.0, 2595.0, 869.0, 378.0, 181.0, 103.0, 61.0, 27.0, 25.0, 15.0, 13.0, 7.0, 9.0, 3.0, 3.0, 4.0, 2.0, 4.0, 0.0, 1.0, 2.0], "bins": [-6.1953125, -6.045654296875, -5.89599609375, -5.746337890625, -5.5966796875, -5.447021484375, -5.29736328125, -5.147705078125, -4.998046875, -4.848388671875, -4.69873046875, -4.549072265625, -4.3994140625, -4.249755859375, -4.10009765625, -3.950439453125, -3.80078125, -3.651123046875, -3.50146484375, -3.351806640625, -3.2021484375, -3.052490234375, -2.90283203125, -2.753173828125, -2.603515625, -2.453857421875, -2.30419921875, -2.154541015625, -2.0048828125, -1.855224609375, -1.70556640625, -1.555908203125, -1.40625, -1.256591796875, -1.10693359375, -0.957275390625, -0.8076171875, -0.657958984375, -0.50830078125, -0.358642578125, -0.208984375, -0.059326171875, 0.09033203125, 0.239990234375, 0.3896484375, 0.539306640625, 0.68896484375, 0.838623046875, 0.98828125, 1.137939453125, 1.28759765625, 1.437255859375, 1.5869140625, 1.736572265625, 1.88623046875, 2.035888671875, 2.185546875, 2.335205078125, 2.48486328125, 2.634521484375, 2.7841796875, 2.933837890625, 3.08349609375, 3.233154296875, 3.3828125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 3.0, 3.0, 4.0, 6.0, 11.0, 11.0, 13.0, 24.0, 23.0, 37.0, 34.0, 45.0, 53.0, 62.0, 58.0, 63.0, 87.0, 72.0, 77.0, 56.0, 44.0, 49.0, 34.0, 26.0, 24.0, 21.0, 15.0, 18.0, 7.0, 5.0, 6.0, 5.0, 4.0, 0.0, 7.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.685546875, -3.584869384765625, -3.48419189453125, -3.383514404296875, -3.2828369140625, -3.182159423828125, -3.08148193359375, -2.980804443359375, -2.880126953125, -2.779449462890625, -2.67877197265625, -2.578094482421875, -2.4774169921875, -2.376739501953125, -2.27606201171875, -2.175384521484375, -2.07470703125, -1.974029541015625, -1.87335205078125, -1.772674560546875, -1.6719970703125, -1.571319580078125, -1.47064208984375, -1.369964599609375, -1.269287109375, -1.168609619140625, -1.06793212890625, -0.967254638671875, -0.8665771484375, -0.765899658203125, -0.66522216796875, -0.564544677734375, -0.4638671875, -0.363189697265625, -0.26251220703125, -0.161834716796875, -0.0611572265625, 0.039520263671875, 0.14019775390625, 0.240875244140625, 0.341552734375, 0.442230224609375, 0.54290771484375, 0.643585205078125, 0.7442626953125, 0.844940185546875, 0.94561767578125, 1.046295166015625, 1.14697265625, 1.247650146484375, 1.34832763671875, 1.449005126953125, 1.5496826171875, 1.650360107421875, 1.75103759765625, 1.851715087890625, 1.952392578125, 2.053070068359375, 2.15374755859375, 2.254425048828125, 2.3551025390625, 2.455780029296875, 2.55645751953125, 2.657135009765625, 2.7578125]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 1.0, 9.0, 9.0, 20.0, 37.0, 42.0, 75.0, 129.0, 247.0, 815.0, 4066.0, 57531.0, 922957.0, 57189.0, 4002.0, 834.0, 267.0, 133.0, 59.0, 57.0, 28.0, 17.0, 12.0, 4.0, 4.0, 5.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.912109375, -2.80621337890625, -2.7003173828125, -2.59442138671875, -2.488525390625, -2.38262939453125, -2.2767333984375, -2.17083740234375, -2.06494140625, -1.95904541015625, -1.8531494140625, -1.74725341796875, -1.641357421875, -1.53546142578125, -1.4295654296875, -1.32366943359375, -1.2177734375, -1.11187744140625, -1.0059814453125, -0.90008544921875, -0.794189453125, -0.68829345703125, -0.5823974609375, -0.47650146484375, -0.37060546875, -0.26470947265625, -0.1588134765625, -0.05291748046875, 0.052978515625, 0.15887451171875, 0.2647705078125, 0.37066650390625, 0.4765625, 0.58245849609375, 0.6883544921875, 0.79425048828125, 0.900146484375, 1.00604248046875, 1.1119384765625, 1.21783447265625, 1.32373046875, 1.42962646484375, 1.5355224609375, 1.64141845703125, 1.747314453125, 1.85321044921875, 1.9591064453125, 2.06500244140625, 2.1708984375, 2.27679443359375, 2.3826904296875, 2.48858642578125, 2.594482421875, 2.70037841796875, 2.8062744140625, 2.91217041015625, 3.01806640625, 3.12396240234375, 3.2298583984375, 3.33575439453125, 3.441650390625, 3.54754638671875, 3.6534423828125, 3.75933837890625, 3.865234375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 9.0, 8.0, 6.0, 11.0, 22.0, 30.0, 51.0, 70.0, 120.0, 173.0, 157.0, 117.0, 88.0, 54.0, 44.0, 18.0, 11.0, 9.0, 7.0, 5.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006299018859863281, -0.0006162114441394806, -0.0006025210022926331, -0.0005888305604457855, -0.000575140118598938, -0.0005614496767520905, -0.0005477592349052429, -0.0005340687930583954, -0.0005203783512115479, -0.0005066879093647003, -0.0004929974675178528, -0.00047930702567100525, -0.0004656165838241577, -0.0004519261419773102, -0.00043823570013046265, -0.0004245452582836151, -0.0004108548164367676, -0.00039716437458992004, -0.0003834739327430725, -0.000369783490896225, -0.00035609304904937744, -0.0003424026072025299, -0.0003287121653556824, -0.00031502172350883484, -0.0003013312816619873, -0.00028764083981513977, -0.00027395039796829224, -0.0002602599561214447, -0.00024656951427459717, -0.00023287907242774963, -0.0002191886305809021, -0.00020549818873405457, -0.00019180774688720703, -0.0001781173050403595, -0.00016442686319351196, -0.00015073642134666443, -0.0001370459794998169, -0.00012335553765296936, -0.00010966509580612183, -9.597465395927429e-05, -8.228421211242676e-05, -6.859377026557922e-05, -5.490332841873169e-05, -4.1212886571884155e-05, -2.752244472503662e-05, -1.3832002878189087e-05, -1.4156103134155273e-07, 1.3548880815505981e-05, 2.7239322662353516e-05, 4.092976450920105e-05, 5.4620206356048584e-05, 6.831064820289612e-05, 8.200109004974365e-05, 9.569153189659119e-05, 0.00010938197374343872, 0.00012307241559028625, 0.0001367628574371338, 0.00015045329928398132, 0.00016414374113082886, 0.0001778341829776764, 0.00019152462482452393, 0.00020521506667137146, 0.000218905508518219, 0.00023259595036506653, 0.00024628639221191406]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 6.0, 8.0, 14.0, 30.0, 37.0, 91.0, 129.0, 275.0, 731.0, 2291.0, 13091.0, 293762.0, 704816.0, 28059.0, 3496.0, 916.0, 364.0, 184.0, 96.0, 74.0, 37.0, 22.0, 6.0, 4.0, 4.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.21875, -3.13653564453125, -3.0543212890625, -2.97210693359375, -2.889892578125, -2.80767822265625, -2.7254638671875, -2.64324951171875, -2.56103515625, -2.47882080078125, -2.3966064453125, -2.31439208984375, -2.232177734375, -2.14996337890625, -2.0677490234375, -1.98553466796875, -1.9033203125, -1.82110595703125, -1.7388916015625, -1.65667724609375, -1.574462890625, -1.49224853515625, -1.4100341796875, -1.32781982421875, -1.24560546875, -1.16339111328125, -1.0811767578125, -0.99896240234375, -0.916748046875, -0.83453369140625, -0.7523193359375, -0.67010498046875, -0.587890625, -0.50567626953125, -0.4234619140625, -0.34124755859375, -0.259033203125, -0.17681884765625, -0.0946044921875, -0.01239013671875, 0.06982421875, 0.15203857421875, 0.2342529296875, 0.31646728515625, 0.398681640625, 0.48089599609375, 0.5631103515625, 0.64532470703125, 0.7275390625, 0.80975341796875, 0.8919677734375, 0.97418212890625, 1.056396484375, 1.13861083984375, 1.2208251953125, 1.30303955078125, 1.38525390625, 1.46746826171875, 1.5496826171875, 1.63189697265625, 1.714111328125, 1.79632568359375, 1.8785400390625, 1.96075439453125, 2.04296875]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 14.0, 21.0, 37.0, 54.0, 80.0, 110.0, 156.0, 139.0, 124.0, 91.0, 59.0, 42.0, 31.0, 11.0, 11.0, 6.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0546875, -1.0140838623046875, -0.973480224609375, -0.9328765869140625, -0.89227294921875, -0.8516693115234375, -0.811065673828125, -0.7704620361328125, -0.7298583984375, -0.6892547607421875, -0.648651123046875, -0.6080474853515625, -0.56744384765625, -0.5268402099609375, -0.486236572265625, -0.4456329345703125, -0.405029296875, -0.3644256591796875, -0.323822021484375, -0.2832183837890625, -0.24261474609375, -0.2020111083984375, -0.161407470703125, -0.1208038330078125, -0.0802001953125, -0.0395965576171875, 0.001007080078125, 0.0416107177734375, 0.08221435546875, 0.1228179931640625, 0.163421630859375, 0.2040252685546875, 0.24462890625, 0.2852325439453125, 0.325836181640625, 0.3664398193359375, 0.40704345703125, 0.4476470947265625, 0.488250732421875, 0.5288543701171875, 0.5694580078125, 0.6100616455078125, 0.650665283203125, 0.6912689208984375, 0.73187255859375, 0.7724761962890625, 0.813079833984375, 0.8536834716796875, 0.894287109375, 0.9348907470703125, 0.975494384765625, 1.0160980224609375, 1.05670166015625, 1.0973052978515625, 1.137908935546875, 1.1785125732421875, 1.2191162109375, 1.2597198486328125, 1.300323486328125, 1.3409271240234375, 1.38153076171875, 1.4221343994140625, 1.462738037109375, 1.5033416748046875, 1.5439453125]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 10.0, 16.0, 70.0, 159.0, 315.0, 245.0, 113.0, 42.0, 16.0, 12.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-57.54263687133789, -56.40836715698242, -55.27410125732422, -54.13983154296875, -53.00556182861328, -51.87129592895508, -50.73702621459961, -49.602760314941406, -48.46849060058594, -47.33422088623047, -46.199954986572266, -45.0656852722168, -43.931419372558594, -42.797149658203125, -41.662879943847656, -40.52861404418945, -39.394344329833984, -38.260074615478516, -37.12580871582031, -35.991539001464844, -34.857269287109375, -33.72300338745117, -32.5887336730957, -31.454465866088867, -30.32019805908203, -29.185930252075195, -28.05166244506836, -26.91739273071289, -25.783124923706055, -24.64885711669922, -23.51458740234375, -22.380319595336914, -21.246051788330078, -20.111783981323242, -18.977516174316406, -17.843246459960938, -16.7089786529541, -15.574710845947266, -14.440442085266113, -13.306173324584961, -12.171904563903809, -11.037635803222656, -9.90336799621582, -8.769100189208984, -7.634831428527832, -6.500563144683838, -5.366294860839844, -4.23202657699585, -3.0977582931518555, -1.9634900093078613, -0.8292217254638672, 0.30504655838012695, 1.439314842224121, 2.5735831260681152, 3.7078514099121094, 4.8421196937561035, 5.976387977600098, 7.110656261444092, 8.244924545288086, 9.379192352294922, 10.513461112976074, 11.647729873657227, 12.781997680664062, 13.916265487670898, 15.05053424835205]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 0.0, 6.0, 6.0, 4.0, 10.0, 11.0, 4.0, 15.0, 27.0, 17.0, 25.0, 29.0, 37.0, 39.0, 37.0, 51.0, 57.0, 60.0, 50.0, 56.0, 67.0, 59.0, 45.0, 50.0, 33.0, 41.0, 34.0, 25.0, 38.0, 16.0, 12.0, 12.0, 6.0, 9.0, 9.0, 2.0, 4.0, 7.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-16.690654754638672, -16.20299530029297, -15.715333938598633, -15.22767448425293, -14.74001407623291, -14.25235366821289, -13.764693260192871, -13.277032852172852, -12.789373397827148, -12.301712989807129, -11.81405258178711, -11.326393127441406, -10.838732719421387, -10.351072311401367, -9.863411903381348, -9.375751495361328, -8.888091087341309, -8.400430679321289, -7.912770748138428, -7.425110340118408, -6.937450408935547, -6.449790000915527, -5.962129592895508, -5.474469184875488, -4.986809253692627, -4.499148845672607, -4.011488914489746, -3.5238285064697266, -3.036168336868286, -2.5485081672668457, -2.060847759246826, -1.5731875896453857, -1.0855283737182617, -0.5978681445121765, -0.11020791530609131, 0.37745237350463867, 0.8651125431060791, 1.3527727127075195, 1.840433120727539, 2.3280932903289795, 2.81575345993042, 3.3034136295318604, 3.791073799133301, 4.27873420715332, 4.76639461517334, 5.254054546356201, 5.741714954376221, 6.229374885559082, 6.717035293579102, 7.204695701599121, 7.692355632781982, 8.180015563964844, 8.667675971984863, 9.155336380004883, 9.642996788024902, 10.130657196044922, 10.618316650390625, 11.105977058410645, 11.593637466430664, 12.081296920776367, 12.568957328796387, 13.056617736816406, 13.544278144836426, 14.031938552856445, 14.519598960876465]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 6.0, 2.0, 1.0, 5.0, 6.0, 5.0, 5.0, 8.0, 15.0, 9.0, 22.0, 8.0, 12.0, 22.0, 32.0, 28.0, 34.0, 41.0, 47.0, 72.0, 80.0, 137.0, 195.0, 294.0, 490.0, 987.0, 2402.0, 6854.0, 25448.0, 198892.0, 3799736.0, 128417.0, 20562.0, 5727.0, 2036.0, 837.0, 401.0, 216.0, 89.0, 55.0, 34.0, 7.0, 5.0, 7.0, 4.0, 2.0, 1.0], "bins": [-8.828125, -8.641937255859375, -8.45574951171875, -8.269561767578125, -8.0833740234375, -7.897186279296875, -7.71099853515625, -7.524810791015625, -7.338623046875, -7.152435302734375, -6.96624755859375, -6.780059814453125, -6.5938720703125, -6.407684326171875, -6.22149658203125, -6.035308837890625, -5.84912109375, -5.662933349609375, -5.47674560546875, -5.290557861328125, -5.1043701171875, -4.918182373046875, -4.73199462890625, -4.545806884765625, -4.359619140625, -4.173431396484375, -3.98724365234375, -3.801055908203125, -3.6148681640625, -3.428680419921875, -3.24249267578125, -3.056304931640625, -2.8701171875, -2.683929443359375, -2.49774169921875, -2.311553955078125, -2.1253662109375, -1.939178466796875, -1.75299072265625, -1.566802978515625, -1.380615234375, -1.194427490234375, -1.00823974609375, -0.822052001953125, -0.6358642578125, -0.449676513671875, -0.26348876953125, -0.077301025390625, 0.10888671875, 0.295074462890625, 0.48126220703125, 0.667449951171875, 0.8536376953125, 1.039825439453125, 1.22601318359375, 1.412200927734375, 1.598388671875, 1.784576416015625, 1.97076416015625, 2.156951904296875, 2.3431396484375, 2.529327392578125, 2.71551513671875, 2.901702880859375, 3.087890625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 5.0, 13.0, 14.0, 27.0, 46.0, 50.0, 79.0, 79.0, 83.0, 117.0, 106.0, 91.0, 76.0, 66.0, 56.0, 38.0, 21.0, 12.0, 13.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0], "bins": [-1.5615234375, -1.5288467407226562, -1.4961700439453125, -1.4634933471679688, -1.430816650390625, -1.3981399536132812, -1.3654632568359375, -1.3327865600585938, -1.30010986328125, -1.2674331665039062, -1.2347564697265625, -1.2020797729492188, -1.169403076171875, -1.1367263793945312, -1.1040496826171875, -1.0713729858398438, -1.0386962890625, -1.0060195922851562, -0.9733428955078125, -0.9406661987304688, -0.907989501953125, -0.8753128051757812, -0.8426361083984375, -0.8099594116210938, -0.77728271484375, -0.7446060180664062, -0.7119293212890625, -0.6792526245117188, -0.646575927734375, -0.6138992309570312, -0.5812225341796875, -0.5485458374023438, -0.515869140625, -0.48319244384765625, -0.4505157470703125, -0.41783905029296875, -0.385162353515625, -0.35248565673828125, -0.3198089599609375, -0.28713226318359375, -0.25445556640625, -0.22177886962890625, -0.1891021728515625, -0.15642547607421875, -0.123748779296875, -0.09107208251953125, -0.0583953857421875, -0.02571868896484375, 0.0069580078125, 0.03963470458984375, 0.0723114013671875, 0.10498809814453125, 0.137664794921875, 0.17034149169921875, 0.2030181884765625, 0.23569488525390625, 0.26837158203125, 0.30104827880859375, 0.3337249755859375, 0.36640167236328125, 0.399078369140625, 0.43175506591796875, 0.4644317626953125, 0.49710845947265625, 0.52978515625]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 3.0, 4.0, 14.0, 11.0, 12.0, 34.0, 50.0, 78.0, 171.0, 342.0, 654.0, 1677.0, 4494.0, 27414.0, 4005446.0, 142847.0, 7488.0, 2007.0, 803.0, 357.0, 166.0, 76.0, 52.0, 28.0, 17.0, 11.0, 4.0, 5.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-14.0625, -13.70849609375, -13.3544921875, -13.00048828125, -12.646484375, -12.29248046875, -11.9384765625, -11.58447265625, -11.23046875, -10.87646484375, -10.5224609375, -10.16845703125, -9.814453125, -9.46044921875, -9.1064453125, -8.75244140625, -8.3984375, -8.04443359375, -7.6904296875, -7.33642578125, -6.982421875, -6.62841796875, -6.2744140625, -5.92041015625, -5.56640625, -5.21240234375, -4.8583984375, -4.50439453125, -4.150390625, -3.79638671875, -3.4423828125, -3.08837890625, -2.734375, -2.38037109375, -2.0263671875, -1.67236328125, -1.318359375, -0.96435546875, -0.6103515625, -0.25634765625, 0.09765625, 0.45166015625, 0.8056640625, 1.15966796875, 1.513671875, 1.86767578125, 2.2216796875, 2.57568359375, 2.9296875, 3.28369140625, 3.6376953125, 3.99169921875, 4.345703125, 4.69970703125, 5.0537109375, 5.40771484375, 5.76171875, 6.11572265625, 6.4697265625, 6.82373046875, 7.177734375, 7.53173828125, 7.8857421875, 8.23974609375, 8.59375]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 4.0, 4.0, 2.0, 8.0, 4.0, 12.0, 19.0, 37.0, 85.0, 254.0, 2199.0, 1188.0, 160.0, 49.0, 23.0, 9.0, 10.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.7578125, -4.6385498046875, -4.519287109375, -4.4000244140625, -4.28076171875, -4.1614990234375, -4.042236328125, -3.9229736328125, -3.8037109375, -3.6844482421875, -3.565185546875, -3.4459228515625, -3.32666015625, -3.2073974609375, -3.088134765625, -2.9688720703125, -2.849609375, -2.7303466796875, -2.611083984375, -2.4918212890625, -2.37255859375, -2.2532958984375, -2.134033203125, -2.0147705078125, -1.8955078125, -1.7762451171875, -1.656982421875, -1.5377197265625, -1.41845703125, -1.2991943359375, -1.179931640625, -1.0606689453125, -0.94140625, -0.8221435546875, -0.702880859375, -0.5836181640625, -0.46435546875, -0.3450927734375, -0.225830078125, -0.1065673828125, 0.0126953125, 0.1319580078125, 0.251220703125, 0.3704833984375, 0.48974609375, 0.6090087890625, 0.728271484375, 0.8475341796875, 0.966796875, 1.0860595703125, 1.205322265625, 1.3245849609375, 1.44384765625, 1.5631103515625, 1.682373046875, 1.8016357421875, 1.9208984375, 2.0401611328125, 2.159423828125, 2.2786865234375, 2.39794921875, 2.5172119140625, 2.636474609375, 2.7557373046875, 2.875]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 14.0, 50.0, 161.0, 389.0, 291.0, 75.0, 13.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0], "bins": [-49.422332763671875, -48.52080154418945, -47.6192741394043, -46.717742919921875, -45.81621170043945, -44.91468048095703, -44.013153076171875, -43.11162185668945, -42.21009063720703, -41.30855941772461, -40.40703201293945, -39.50550079345703, -38.60396957397461, -37.70243835449219, -36.80091094970703, -35.89937973022461, -34.99784851074219, -34.096317291259766, -33.19478988647461, -32.29325866699219, -31.391727447509766, -30.490198135375977, -29.588668823242188, -28.687137603759766, -27.78561019897461, -26.88408088684082, -25.9825496673584, -25.08102035522461, -24.179489135742188, -23.2779598236084, -22.37643051147461, -21.474899291992188, -20.573368072509766, -19.671838760375977, -18.770307540893555, -17.868778228759766, -16.967247009277344, -16.065717697143555, -15.16418743133545, -14.262657165527344, -13.361126899719238, -12.459596633911133, -11.558066368103027, -10.656536102294922, -9.755006790161133, -8.853475570678711, -7.951946258544922, -7.050415992736816, -6.148885726928711, -5.2473554611206055, -4.3458251953125, -3.4442954063415527, -2.5427651405334473, -1.6412348747253418, -0.7397050857543945, 0.16182518005371094, 1.0633554458618164, 1.9648855924606323, 2.8664157390594482, 3.7679457664489746, 4.66947603225708, 5.5710062980651855, 6.472536087036133, 7.374066352844238, 8.275596618652344]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 5.0, 5.0, 18.0, 27.0, 39.0, 76.0, 142.0, 162.0, 172.0, 141.0, 106.0, 59.0, 24.0, 23.0, 6.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.877960205078125, -17.244781494140625, -16.611602783203125, -15.978422164916992, -15.345243453979492, -14.712064743041992, -14.078885078430176, -13.44570541381836, -12.81252670288086, -12.17934799194336, -11.546168327331543, -10.912988662719727, -10.279809951782227, -9.646631240844727, -9.01345157623291, -8.380271911621094, -7.747093200683594, -7.1139140129089355, -6.480734825134277, -5.847555637359619, -5.214376449584961, -4.581197261810303, -3.9480180740356445, -3.3148388862609863, -2.681659698486328, -2.04848051071167, -1.4153013229370117, -0.7821221351623535, -0.1489429473876953, 0.4842362403869629, 1.117415428161621, 1.7505946159362793, 2.3837757110595703, 3.0169548988342285, 3.6501340866088867, 4.283313274383545, 4.916492462158203, 5.549671649932861, 6.1828508377075195, 6.816030025482178, 7.449209213256836, 8.082387924194336, 8.715567588806152, 9.348747253417969, 9.981925964355469, 10.615104675292969, 11.248284339904785, 11.881464004516602, 12.514642715454102, 13.147821426391602, 13.781001091003418, 14.414180755615234, 15.047359466552734, 15.680538177490234, 16.313716888427734, 16.946897506713867, 17.580076217651367, 18.213254928588867, 18.846435546875, 19.4796142578125, 20.11279296875, 20.7459716796875, 21.379150390625, 22.012331008911133, 22.645509719848633]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 4.0, 7.0, 7.0, 10.0, 12.0, 18.0, 29.0, 37.0, 70.0, 116.0, 211.0, 413.0, 945.0, 2594.0, 13119.0, 191050.0, 772143.0, 58391.0, 6363.0, 1602.0, 680.0, 290.0, 159.0, 101.0, 62.0, 42.0, 26.0, 17.0, 13.0, 8.0, 6.0, 6.0, 3.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.859375, -7.63677978515625, -7.4141845703125, -7.19158935546875, -6.968994140625, -6.74639892578125, -6.5238037109375, -6.30120849609375, -6.07861328125, -5.85601806640625, -5.6334228515625, -5.41082763671875, -5.188232421875, -4.96563720703125, -4.7430419921875, -4.52044677734375, -4.2978515625, -4.07525634765625, -3.8526611328125, -3.63006591796875, -3.407470703125, -3.18487548828125, -2.9622802734375, -2.73968505859375, -2.51708984375, -2.29449462890625, -2.0718994140625, -1.84930419921875, -1.626708984375, -1.40411376953125, -1.1815185546875, -0.95892333984375, -0.736328125, -0.51373291015625, -0.2911376953125, -0.06854248046875, 0.154052734375, 0.37664794921875, 0.5992431640625, 0.82183837890625, 1.04443359375, 1.26702880859375, 1.4896240234375, 1.71221923828125, 1.934814453125, 2.15740966796875, 2.3800048828125, 2.60260009765625, 2.8251953125, 3.04779052734375, 3.2703857421875, 3.49298095703125, 3.715576171875, 3.93817138671875, 4.1607666015625, 4.38336181640625, 4.60595703125, 4.82855224609375, 5.0511474609375, 5.27374267578125, 5.496337890625, 5.71893310546875, 5.9415283203125, 6.16412353515625, 6.38671875]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 8.0, 5.0, 15.0, 31.0, 30.0, 81.0, 91.0, 109.0, 143.0, 118.0, 120.0, 83.0, 68.0, 35.0, 28.0, 16.0, 15.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.87109375, -1.8275375366210938, -1.7839813232421875, -1.7404251098632812, -1.696868896484375, -1.6533126831054688, -1.6097564697265625, -1.5662002563476562, -1.52264404296875, -1.4790878295898438, -1.4355316162109375, -1.3919754028320312, -1.348419189453125, -1.3048629760742188, -1.2613067626953125, -1.2177505493164062, -1.1741943359375, -1.1306381225585938, -1.0870819091796875, -1.0435256958007812, -0.999969482421875, -0.9564132690429688, -0.9128570556640625, -0.8693008422851562, -0.82574462890625, -0.7821884155273438, -0.7386322021484375, -0.6950759887695312, -0.651519775390625, -0.6079635620117188, -0.5644073486328125, -0.5208511352539062, -0.477294921875, -0.43373870849609375, -0.3901824951171875, -0.34662628173828125, -0.303070068359375, -0.25951385498046875, -0.2159576416015625, -0.17240142822265625, -0.12884521484375, -0.08528900146484375, -0.0417327880859375, 0.00182342529296875, 0.045379638671875, 0.08893585205078125, 0.1324920654296875, 0.17604827880859375, 0.2196044921875, 0.26316070556640625, 0.3067169189453125, 0.35027313232421875, 0.393829345703125, 0.43738555908203125, 0.4809417724609375, 0.5244979858398438, 0.56805419921875, 0.6116104125976562, 0.6551666259765625, 0.6987228393554688, 0.742279052734375, 0.7858352661132812, 0.8293914794921875, 0.8729476928710938, 0.91650390625]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 4.0, 7.0, 8.0, 13.0, 29.0, 41.0, 61.0, 111.0, 241.0, 521.0, 1327.0, 5045.0, 27439.0, 230456.0, 641208.0, 121133.0, 15805.0, 3302.0, 981.0, 386.0, 187.0, 102.0, 53.0, 33.0, 25.0, 10.0, 8.0, 4.0, 8.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.435546875, -3.318511962890625, -3.20147705078125, -3.084442138671875, -2.9674072265625, -2.850372314453125, -2.73333740234375, -2.616302490234375, -2.499267578125, -2.382232666015625, -2.26519775390625, -2.148162841796875, -2.0311279296875, -1.914093017578125, -1.79705810546875, -1.680023193359375, -1.56298828125, -1.445953369140625, -1.32891845703125, -1.211883544921875, -1.0948486328125, -0.977813720703125, -0.86077880859375, -0.743743896484375, -0.626708984375, -0.509674072265625, -0.39263916015625, -0.275604248046875, -0.1585693359375, -0.041534423828125, 0.07550048828125, 0.192535400390625, 0.3095703125, 0.426605224609375, 0.54364013671875, 0.660675048828125, 0.7777099609375, 0.894744873046875, 1.01177978515625, 1.128814697265625, 1.245849609375, 1.362884521484375, 1.47991943359375, 1.596954345703125, 1.7139892578125, 1.831024169921875, 1.94805908203125, 2.065093994140625, 2.18212890625, 2.299163818359375, 2.41619873046875, 2.533233642578125, 2.6502685546875, 2.767303466796875, 2.88433837890625, 3.001373291015625, 3.118408203125, 3.235443115234375, 3.35247802734375, 3.469512939453125, 3.5865478515625, 3.703582763671875, 3.82061767578125, 3.937652587890625, 4.0546875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 10.0, 11.0, 17.0, 14.0, 22.0, 21.0, 25.0, 32.0, 33.0, 43.0, 45.0, 45.0, 46.0, 48.0, 54.0, 49.0, 47.0, 43.0, 48.0, 46.0, 38.0, 43.0, 27.0, 27.0, 36.0, 16.0, 24.0, 9.0, 17.0, 13.0, 6.0, 6.0, 8.0, 7.0, 9.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.4453125, -2.369781494140625, -2.29425048828125, -2.218719482421875, -2.1431884765625, -2.067657470703125, -1.99212646484375, -1.916595458984375, -1.841064453125, -1.765533447265625, -1.69000244140625, -1.614471435546875, -1.5389404296875, -1.463409423828125, -1.38787841796875, -1.312347412109375, -1.23681640625, -1.161285400390625, -1.08575439453125, -1.010223388671875, -0.9346923828125, -0.859161376953125, -0.78363037109375, -0.708099365234375, -0.632568359375, -0.557037353515625, -0.48150634765625, -0.405975341796875, -0.3304443359375, -0.254913330078125, -0.17938232421875, -0.103851318359375, -0.0283203125, 0.047210693359375, 0.12274169921875, 0.198272705078125, 0.2738037109375, 0.349334716796875, 0.42486572265625, 0.500396728515625, 0.575927734375, 0.651458740234375, 0.72698974609375, 0.802520751953125, 0.8780517578125, 0.953582763671875, 1.02911376953125, 1.104644775390625, 1.18017578125, 1.255706787109375, 1.33123779296875, 1.406768798828125, 1.4822998046875, 1.557830810546875, 1.63336181640625, 1.708892822265625, 1.784423828125, 1.859954833984375, 1.93548583984375, 2.011016845703125, 2.0865478515625, 2.162078857421875, 2.23760986328125, 2.313140869140625, 2.388671875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 7.0, 8.0, 13.0, 25.0, 52.0, 65.0, 118.0, 234.0, 394.0, 905.0, 2430.0, 7676.0, 34192.0, 292483.0, 609831.0, 80000.0, 13714.0, 3782.0, 1412.0, 579.0, 270.0, 154.0, 84.0, 43.0, 28.0, 12.0, 11.0, 6.0, 8.0, 8.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.474609375, -1.4223480224609375, -1.370086669921875, -1.3178253173828125, -1.26556396484375, -1.2133026123046875, -1.161041259765625, -1.1087799072265625, -1.0565185546875, -1.0042572021484375, -0.951995849609375, -0.8997344970703125, -0.84747314453125, -0.7952117919921875, -0.742950439453125, -0.6906890869140625, -0.638427734375, -0.5861663818359375, -0.533905029296875, -0.4816436767578125, -0.42938232421875, -0.3771209716796875, -0.324859619140625, -0.2725982666015625, -0.2203369140625, -0.1680755615234375, -0.115814208984375, -0.0635528564453125, -0.01129150390625, 0.0409698486328125, 0.093231201171875, 0.1454925537109375, 0.19775390625, 0.2500152587890625, 0.302276611328125, 0.3545379638671875, 0.40679931640625, 0.4590606689453125, 0.511322021484375, 0.5635833740234375, 0.6158447265625, 0.6681060791015625, 0.720367431640625, 0.7726287841796875, 0.82489013671875, 0.8771514892578125, 0.929412841796875, 0.9816741943359375, 1.033935546875, 1.0861968994140625, 1.138458251953125, 1.1907196044921875, 1.24298095703125, 1.2952423095703125, 1.347503662109375, 1.3997650146484375, 1.4520263671875, 1.5042877197265625, 1.556549072265625, 1.6088104248046875, 1.66107177734375, 1.7133331298828125, 1.765594482421875, 1.8178558349609375, 1.8701171875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 8.0, 4.0, 8.0, 10.0, 11.0, 34.0, 18.0, 38.0, 51.0, 66.0, 75.0, 69.0, 83.0, 76.0, 101.0, 83.0, 53.0, 48.0, 37.0, 28.0, 22.0, 27.0, 13.0, 11.0, 6.0, 11.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002548694610595703, -0.00024819374084472656, -0.0002415180206298828, -0.00023484230041503906, -0.0002281665802001953, -0.00022149085998535156, -0.0002148151397705078, -0.00020813941955566406, -0.0002014636993408203, -0.00019478797912597656, -0.0001881122589111328, -0.00018143653869628906, -0.0001747608184814453, -0.00016808509826660156, -0.0001614093780517578, -0.00015473365783691406, -0.0001480579376220703, -0.00014138221740722656, -0.0001347064971923828, -0.00012803077697753906, -0.00012135505676269531, -0.00011467933654785156, -0.00010800361633300781, -0.00010132789611816406, -9.465217590332031e-05, -8.797645568847656e-05, -8.130073547363281e-05, -7.462501525878906e-05, -6.794929504394531e-05, -6.127357482910156e-05, -5.459785461425781e-05, -4.792213439941406e-05, -4.124641418457031e-05, -3.457069396972656e-05, -2.7894973754882812e-05, -2.1219253540039062e-05, -1.4543533325195312e-05, -7.867813110351562e-06, -1.1920928955078125e-06, 5.4836273193359375e-06, 1.2159347534179688e-05, 1.8835067749023438e-05, 2.5510787963867188e-05, 3.218650817871094e-05, 3.886222839355469e-05, 4.553794860839844e-05, 5.221366882324219e-05, 5.888938903808594e-05, 6.556510925292969e-05, 7.224082946777344e-05, 7.891654968261719e-05, 8.559226989746094e-05, 9.226799011230469e-05, 9.894371032714844e-05, 0.00010561943054199219, 0.00011229515075683594, 0.00011897087097167969, 0.00012564659118652344, 0.0001323223114013672, 0.00013899803161621094, 0.0001456737518310547, 0.00015234947204589844, 0.0001590251922607422, 0.00016570091247558594, 0.0001723766326904297]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 7.0, 6.0, 12.0, 24.0, 18.0, 31.0, 52.0, 76.0, 124.0, 212.0, 383.0, 772.0, 1728.0, 4912.0, 20036.0, 145354.0, 694600.0, 150919.0, 20935.0, 4899.0, 1684.0, 807.0, 388.0, 217.0, 116.0, 77.0, 49.0, 35.0, 26.0, 17.0, 15.0, 1.0, 7.0, 9.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5048828125, -1.453887939453125, -1.40289306640625, -1.351898193359375, -1.3009033203125, -1.249908447265625, -1.19891357421875, -1.147918701171875, -1.096923828125, -1.045928955078125, -0.99493408203125, -0.943939208984375, -0.8929443359375, -0.841949462890625, -0.79095458984375, -0.739959716796875, -0.68896484375, -0.637969970703125, -0.58697509765625, -0.535980224609375, -0.4849853515625, -0.433990478515625, -0.38299560546875, -0.332000732421875, -0.281005859375, -0.230010986328125, -0.17901611328125, -0.128021240234375, -0.0770263671875, -0.026031494140625, 0.02496337890625, 0.075958251953125, 0.126953125, 0.177947998046875, 0.22894287109375, 0.279937744140625, 0.3309326171875, 0.381927490234375, 0.43292236328125, 0.483917236328125, 0.534912109375, 0.585906982421875, 0.63690185546875, 0.687896728515625, 0.7388916015625, 0.789886474609375, 0.84088134765625, 0.891876220703125, 0.94287109375, 0.993865966796875, 1.04486083984375, 1.095855712890625, 1.1468505859375, 1.197845458984375, 1.24884033203125, 1.299835205078125, 1.350830078125, 1.401824951171875, 1.45281982421875, 1.503814697265625, 1.5548095703125, 1.605804443359375, 1.65679931640625, 1.707794189453125, 1.7587890625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 6.0, 1.0, 9.0, 11.0, 20.0, 21.0, 42.0, 51.0, 60.0, 82.0, 95.0, 126.0, 113.0, 88.0, 58.0, 65.0, 50.0, 33.0, 24.0, 15.0, 4.0, 9.0, 13.0, 5.0, 3.0, 1.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.87060546875, -0.8366470336914062, -0.8026885986328125, -0.7687301635742188, -0.734771728515625, -0.7008132934570312, -0.6668548583984375, -0.6328964233398438, -0.59893798828125, -0.5649795532226562, -0.5310211181640625, -0.49706268310546875, -0.463104248046875, -0.42914581298828125, -0.3951873779296875, -0.36122894287109375, -0.3272705078125, -0.29331207275390625, -0.2593536376953125, -0.22539520263671875, -0.191436767578125, -0.15747833251953125, -0.1235198974609375, -0.08956146240234375, -0.05560302734375, -0.02164459228515625, 0.0123138427734375, 0.04627227783203125, 0.080230712890625, 0.11418914794921875, 0.1481475830078125, 0.18210601806640625, 0.216064453125, 0.25002288818359375, 0.2839813232421875, 0.31793975830078125, 0.351898193359375, 0.38585662841796875, 0.4198150634765625, 0.45377349853515625, 0.48773193359375, 0.5216903686523438, 0.5556488037109375, 0.5896072387695312, 0.623565673828125, 0.6575241088867188, 0.6914825439453125, 0.7254409790039062, 0.7593994140625, 0.7933578491210938, 0.8273162841796875, 0.8612747192382812, 0.895233154296875, 0.9291915893554688, 0.9631500244140625, 0.9971084594726562, 1.03106689453125, 1.0650253295898438, 1.0989837646484375, 1.1329421997070312, 1.166900634765625, 1.2008590698242188, 1.2348175048828125, 1.2687759399414062, 1.302734375]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 15.0, 11.0, 46.0, 88.0, 124.0, 189.0, 192.0, 151.0, 86.0, 50.0, 24.0, 13.0, 9.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-23.194259643554688, -22.491138458251953, -21.788015365600586, -21.08489418029785, -20.381772994995117, -19.678651809692383, -18.975528717041016, -18.27240753173828, -17.569286346435547, -16.866165161132812, -16.163042068481445, -15.459920883178711, -14.756799697875977, -14.053677558898926, -13.350555419921875, -12.64743423461914, -11.94431209564209, -11.241189956665039, -10.538068771362305, -9.834946632385254, -9.13182544708252, -8.428703308105469, -7.725581645965576, -7.022459983825684, -6.319338321685791, -5.616216659545898, -4.913094997406006, -4.209973335266113, -3.5068514347076416, -2.803729772567749, -2.1006078720092773, -1.3974862098693848, -0.6943645477294922, 0.008757174015045166, 0.7118788957595825, 1.4150006771087646, 2.1181223392486572, 2.82124400138855, 3.5243659019470215, 4.227487564086914, 4.930609226226807, 5.633730888366699, 6.336852550506592, 7.039974212646484, 7.743096351623535, 8.44621753692627, 9.14933967590332, 9.852460861206055, 10.555583000183105, 11.258705139160156, 11.96182632446289, 12.664948463439941, 13.368069648742676, 14.071191787719727, 14.774312973022461, 15.477435111999512, 16.180557250976562, 16.883678436279297, 17.586801528930664, 18.2899227142334, 18.993043899536133, 19.696165084838867, 20.399288177490234, 21.10240936279297, 21.805530548095703]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 9.0, 6.0, 10.0, 10.0, 11.0, 22.0, 16.0, 22.0, 25.0, 22.0, 30.0, 30.0, 34.0, 36.0, 48.0, 43.0, 39.0, 58.0, 54.0, 48.0, 57.0, 50.0, 35.0, 28.0, 30.0, 26.0, 38.0, 26.0, 21.0, 19.0, 15.0, 16.0, 14.0, 11.0, 7.0, 8.0, 9.0, 6.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-14.616498947143555, -14.191328048706055, -13.766157150268555, -13.340985298156738, -12.915814399719238, -12.490643501281738, -12.065471649169922, -11.640300750732422, -11.215129852294922, -10.789958953857422, -10.364788055419922, -9.939616203308105, -9.514445304870605, -9.089274406433105, -8.664102554321289, -8.238931655883789, -7.813760757446289, -7.388589859008789, -6.963418483734131, -6.538247108459473, -6.113076210021973, -5.687905311584473, -5.2627339363098145, -4.837562561035156, -4.412391662597656, -3.987220525741577, -3.562049388885498, -3.136878252029419, -2.71170711517334, -2.2865359783172607, -1.8613648414611816, -1.4361937046051025, -1.0110235214233398, -0.5858523845672607, -0.16068124771118164, 0.26448988914489746, 0.6896610260009766, 1.1148321628570557, 1.5400032997131348, 1.9651744365692139, 2.390345573425293, 2.815516710281372, 3.240687847137451, 3.6658589839935303, 4.091030120849609, 4.516201019287109, 4.941372394561768, 5.366543769836426, 5.791714668273926, 6.216885566711426, 6.642056941986084, 7.067228317260742, 7.492399215698242, 7.917570114135742, 8.342741012573242, 8.767912864685059, 9.193083763122559, 9.618254661560059, 10.043426513671875, 10.468597412109375, 10.893768310546875, 11.318939208984375, 11.744110107421875, 12.169281959533691, 12.594452857971191]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 5.0, 2.0, 1.0, 1.0, 2.0, 4.0, 7.0, 4.0, 7.0, 14.0, 14.0, 20.0, 18.0, 39.0, 45.0, 59.0, 103.0, 127.0, 243.0, 373.0, 783.0, 1970.0, 6302.0, 36076.0, 3431352.0, 681953.0, 26540.0, 5107.0, 1685.0, 657.0, 318.0, 197.0, 88.0, 56.0, 25.0, 22.0, 21.0, 15.0, 10.0, 9.0, 4.0, 6.0, 3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.85546875, -5.6776123046875, -5.499755859375, -5.3218994140625, -5.14404296875, -4.9661865234375, -4.788330078125, -4.6104736328125, -4.4326171875, -4.2547607421875, -4.076904296875, -3.8990478515625, -3.72119140625, -3.5433349609375, -3.365478515625, -3.1876220703125, -3.009765625, -2.8319091796875, -2.654052734375, -2.4761962890625, -2.29833984375, -2.1204833984375, -1.942626953125, -1.7647705078125, -1.5869140625, -1.4090576171875, -1.231201171875, -1.0533447265625, -0.87548828125, -0.6976318359375, -0.519775390625, -0.3419189453125, -0.1640625, 0.0137939453125, 0.191650390625, 0.3695068359375, 0.54736328125, 0.7252197265625, 0.903076171875, 1.0809326171875, 1.2587890625, 1.4366455078125, 1.614501953125, 1.7923583984375, 1.97021484375, 2.1480712890625, 2.325927734375, 2.5037841796875, 2.681640625, 2.8594970703125, 3.037353515625, 3.2152099609375, 3.39306640625, 3.5709228515625, 3.748779296875, 3.9266357421875, 4.1044921875, 4.2823486328125, 4.460205078125, 4.6380615234375, 4.81591796875, 4.9937744140625, 5.171630859375, 5.3494873046875, 5.52734375]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 5.0, 12.0, 32.0, 53.0, 58.0, 78.0, 89.0, 112.0, 103.0, 118.0, 103.0, 75.0, 59.0, 43.0, 24.0, 16.0, 5.0, 6.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8779296875, -1.833648681640625, -1.78936767578125, -1.745086669921875, -1.7008056640625, -1.656524658203125, -1.61224365234375, -1.567962646484375, -1.523681640625, -1.479400634765625, -1.43511962890625, -1.390838623046875, -1.3465576171875, -1.302276611328125, -1.25799560546875, -1.213714599609375, -1.16943359375, -1.125152587890625, -1.08087158203125, -1.036590576171875, -0.9923095703125, -0.948028564453125, -0.90374755859375, -0.859466552734375, -0.815185546875, -0.770904541015625, -0.72662353515625, -0.682342529296875, -0.6380615234375, -0.593780517578125, -0.54949951171875, -0.505218505859375, -0.4609375, -0.416656494140625, -0.37237548828125, -0.328094482421875, -0.2838134765625, -0.239532470703125, -0.19525146484375, -0.150970458984375, -0.106689453125, -0.062408447265625, -0.01812744140625, 0.026153564453125, 0.0704345703125, 0.114715576171875, 0.15899658203125, 0.203277587890625, 0.24755859375, 0.291839599609375, 0.33612060546875, 0.380401611328125, 0.4246826171875, 0.468963623046875, 0.51324462890625, 0.557525634765625, 0.601806640625, 0.646087646484375, 0.69036865234375, 0.734649658203125, 0.7789306640625, 0.823211669921875, 0.86749267578125, 0.911773681640625, 0.9560546875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 8.0, 9.0, 19.0, 21.0, 39.0, 58.0, 90.0, 173.0, 355.0, 1129.0, 6606.0, 120752.0, 4008797.0, 50355.0, 4257.0, 916.0, 332.0, 145.0, 88.0, 45.0, 31.0, 22.0, 13.0, 10.0, 6.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.2265625, -7.9940185546875, -7.761474609375, -7.5289306640625, -7.29638671875, -7.0638427734375, -6.831298828125, -6.5987548828125, -6.3662109375, -6.1336669921875, -5.901123046875, -5.6685791015625, -5.43603515625, -5.2034912109375, -4.970947265625, -4.7384033203125, -4.505859375, -4.2733154296875, -4.040771484375, -3.8082275390625, -3.57568359375, -3.3431396484375, -3.110595703125, -2.8780517578125, -2.6455078125, -2.4129638671875, -2.180419921875, -1.9478759765625, -1.71533203125, -1.4827880859375, -1.250244140625, -1.0177001953125, -0.78515625, -0.5526123046875, -0.320068359375, -0.0875244140625, 0.14501953125, 0.3775634765625, 0.610107421875, 0.8426513671875, 1.0751953125, 1.3077392578125, 1.540283203125, 1.7728271484375, 2.00537109375, 2.2379150390625, 2.470458984375, 2.7030029296875, 2.935546875, 3.1680908203125, 3.400634765625, 3.6331787109375, 3.86572265625, 4.0982666015625, 4.330810546875, 4.5633544921875, 4.7958984375, 5.0284423828125, 5.260986328125, 5.4935302734375, 5.72607421875, 5.9586181640625, 6.191162109375, 6.4237060546875, 6.65625]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 8.0, 12.0, 20.0, 29.0, 52.0, 87.0, 191.0, 660.0, 2237.0, 415.0, 164.0, 68.0, 53.0, 24.0, 22.0, 11.0, 10.0, 3.0, 1.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.740234375, -1.6880035400390625, -1.635772705078125, -1.5835418701171875, -1.53131103515625, -1.4790802001953125, -1.426849365234375, -1.3746185302734375, -1.3223876953125, -1.2701568603515625, -1.217926025390625, -1.1656951904296875, -1.11346435546875, -1.0612335205078125, -1.009002685546875, -0.9567718505859375, -0.904541015625, -0.8523101806640625, -0.800079345703125, -0.7478485107421875, -0.69561767578125, -0.6433868408203125, -0.591156005859375, -0.5389251708984375, -0.4866943359375, -0.4344635009765625, -0.382232666015625, -0.3300018310546875, -0.27777099609375, -0.2255401611328125, -0.173309326171875, -0.1210784912109375, -0.06884765625, -0.0166168212890625, 0.035614013671875, 0.0878448486328125, 0.14007568359375, 0.1923065185546875, 0.244537353515625, 0.2967681884765625, 0.3489990234375, 0.4012298583984375, 0.453460693359375, 0.5056915283203125, 0.55792236328125, 0.6101531982421875, 0.662384033203125, 0.7146148681640625, 0.766845703125, 0.8190765380859375, 0.871307373046875, 0.9235382080078125, 0.97576904296875, 1.0279998779296875, 1.080230712890625, 1.1324615478515625, 1.1846923828125, 1.2369232177734375, 1.289154052734375, 1.3413848876953125, 1.39361572265625, 1.4458465576171875, 1.498077392578125, 1.5503082275390625, 1.6025390625]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 11.0, 17.0, 39.0, 124.0, 200.0, 235.0, 211.0, 89.0, 41.0, 19.0, 5.0, 5.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-10.390524864196777, -10.022367477416992, -9.654211044311523, -9.286053657531738, -8.917896270751953, -8.549738883972168, -8.181581497192383, -7.813425064086914, -7.445267677307129, -7.077110290527344, -6.708953380584717, -6.34079647064209, -5.972639083862305, -5.6044816970825195, -5.236324787139893, -4.868167877197266, -4.5000104904174805, -4.131853103637695, -3.7636961936950684, -3.3955390453338623, -3.0273818969726562, -2.65922474861145, -2.291067600250244, -1.922910451889038, -1.554753303527832, -1.186596155166626, -0.8184390068054199, -0.45028185844421387, -0.08212471008300781, 0.28603243827819824, 0.6541895866394043, 1.0223467350006104, 1.3905029296875, 1.758660078048706, 2.126817226409912, 2.494974374771118, 2.863131523132324, 3.2312886714935303, 3.5994458198547363, 3.9676029682159424, 4.335760116577148, 4.703917503356934, 5.0720744132995605, 5.4402313232421875, 5.808388710021973, 6.176546096801758, 6.544703006744385, 6.912859916687012, 7.281017303466797, 7.649174690246582, 8.017332077026367, 8.385488510131836, 8.753645896911621, 9.121803283691406, 9.489959716796875, 9.85811710357666, 10.226274490356445, 10.59443187713623, 10.962589263916016, 11.330745697021484, 11.69890308380127, 12.067060470581055, 12.435216903686523, 12.803374290466309, 13.171531677246094]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 7.0, 5.0, 8.0, 7.0, 10.0, 8.0, 10.0, 8.0, 17.0, 14.0, 18.0, 16.0, 30.0, 28.0, 40.0, 35.0, 43.0, 56.0, 42.0, 44.0, 44.0, 47.0, 40.0, 42.0, 48.0, 33.0, 32.0, 44.0, 30.0, 29.0, 19.0, 26.0, 20.0, 18.0, 20.0, 12.0, 13.0, 8.0, 7.0, 9.0, 3.0, 6.0, 2.0, 4.0, 3.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.454960584640503, -3.342747449874878, -3.230534076690674, -3.118320941925049, -3.006107807159424, -2.893894672393799, -2.7816812992095947, -2.6694681644439697, -2.5572547912597656, -2.4450416564941406, -2.3328282833099365, -2.2206151485443115, -2.1084020137786865, -1.996188759803772, -1.8839755058288574, -1.7717623710632324, -1.6595492362976074, -1.5473359823226929, -1.4351228475570679, -1.3229095935821533, -1.2106964588165283, -1.0984832048416138, -0.9862699508666992, -0.8740567564964294, -0.7618435621261597, -0.6496303677558899, -0.5374171733856201, -0.42520391941070557, -0.3129907250404358, -0.20077753067016602, -0.08856427669525146, 0.02364891767501831, 0.13586211204528809, 0.24807532131671906, 0.36028853058815, 0.4725017547607422, 0.584714949131012, 0.6969281435012817, 0.8091413974761963, 0.9213545918464661, 1.0335677862167358, 1.1457810401916504, 1.2579941749572754, 1.37020742893219, 1.4824206829071045, 1.5946338176727295, 1.706847071647644, 1.8190603256225586, 1.9312734603881836, 2.0434865951538086, 2.1556999683380127, 2.2679131031036377, 2.3801262378692627, 2.492339611053467, 2.604552745819092, 2.716765880584717, 2.828979015350342, 2.941192150115967, 3.053405523300171, 3.165618658065796, 3.277831792831421, 3.390045166015625, 3.50225830078125, 3.614471435546875, 3.726684808731079]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 6.0, 10.0, 12.0, 12.0, 25.0, 27.0, 58.0, 91.0, 216.0, 428.0, 1129.0, 3863.0, 27473.0, 491954.0, 490046.0, 27379.0, 3809.0, 1112.0, 452.0, 194.0, 97.0, 53.0, 41.0, 33.0, 14.0, 9.0, 5.0, 6.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.078125, -6.84979248046875, -6.6214599609375, -6.39312744140625, -6.164794921875, -5.93646240234375, -5.7081298828125, -5.47979736328125, -5.25146484375, -5.02313232421875, -4.7947998046875, -4.56646728515625, -4.338134765625, -4.10980224609375, -3.8814697265625, -3.65313720703125, -3.4248046875, -3.19647216796875, -2.9681396484375, -2.73980712890625, -2.511474609375, -2.28314208984375, -2.0548095703125, -1.82647705078125, -1.59814453125, -1.36981201171875, -1.1414794921875, -0.91314697265625, -0.684814453125, -0.45648193359375, -0.2281494140625, 0.00018310546875, 0.228515625, 0.45684814453125, 0.6851806640625, 0.91351318359375, 1.141845703125, 1.37017822265625, 1.5985107421875, 1.82684326171875, 2.05517578125, 2.28350830078125, 2.5118408203125, 2.74017333984375, 2.968505859375, 3.19683837890625, 3.4251708984375, 3.65350341796875, 3.8818359375, 4.11016845703125, 4.3385009765625, 4.56683349609375, 4.795166015625, 5.02349853515625, 5.2518310546875, 5.48016357421875, 5.70849609375, 5.93682861328125, 6.1651611328125, 6.39349365234375, 6.621826171875, 6.85015869140625, 7.0784912109375, 7.30682373046875, 7.53515625]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 7.0, 10.0, 36.0, 45.0, 55.0, 90.0, 88.0, 94.0, 125.0, 109.0, 92.0, 82.0, 61.0, 45.0, 30.0, 12.0, 6.0, 1.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.8310546875, -1.78790283203125, -1.7447509765625, -1.70159912109375, -1.658447265625, -1.61529541015625, -1.5721435546875, -1.52899169921875, -1.48583984375, -1.44268798828125, -1.3995361328125, -1.35638427734375, -1.313232421875, -1.27008056640625, -1.2269287109375, -1.18377685546875, -1.140625, -1.09747314453125, -1.0543212890625, -1.01116943359375, -0.968017578125, -0.92486572265625, -0.8817138671875, -0.83856201171875, -0.79541015625, -0.75225830078125, -0.7091064453125, -0.66595458984375, -0.622802734375, -0.57965087890625, -0.5364990234375, -0.49334716796875, -0.4501953125, -0.40704345703125, -0.3638916015625, -0.32073974609375, -0.277587890625, -0.23443603515625, -0.1912841796875, -0.14813232421875, -0.10498046875, -0.06182861328125, -0.0186767578125, 0.02447509765625, 0.067626953125, 0.11077880859375, 0.1539306640625, 0.19708251953125, 0.240234375, 0.28338623046875, 0.3265380859375, 0.36968994140625, 0.412841796875, 0.45599365234375, 0.4991455078125, 0.54229736328125, 0.58544921875, 0.62860107421875, 0.6717529296875, 0.71490478515625, 0.758056640625, 0.80120849609375, 0.8443603515625, 0.88751220703125, 0.9306640625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 3.0, 4.0, 7.0, 3.0, 5.0, 12.0, 19.0, 17.0, 24.0, 33.0, 47.0, 58.0, 112.0, 155.0, 253.0, 488.0, 913.0, 2083.0, 4831.0, 13814.0, 42697.0, 151138.0, 422199.0, 287255.0, 83293.0, 24698.0, 8237.0, 3184.0, 1348.0, 667.0, 360.0, 200.0, 117.0, 67.0, 65.0, 40.0, 30.0, 24.0, 17.0, 7.0, 8.0, 8.0, 1.0, 7.0, 3.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.171875, -2.098907470703125, -2.02593994140625, -1.952972412109375, -1.8800048828125, -1.807037353515625, -1.73406982421875, -1.661102294921875, -1.588134765625, -1.515167236328125, -1.44219970703125, -1.369232177734375, -1.2962646484375, -1.223297119140625, -1.15032958984375, -1.077362060546875, -1.00439453125, -0.931427001953125, -0.85845947265625, -0.785491943359375, -0.7125244140625, -0.639556884765625, -0.56658935546875, -0.493621826171875, -0.420654296875, -0.347686767578125, -0.27471923828125, -0.201751708984375, -0.1287841796875, -0.055816650390625, 0.01715087890625, 0.090118408203125, 0.1630859375, 0.236053466796875, 0.30902099609375, 0.381988525390625, 0.4549560546875, 0.527923583984375, 0.60089111328125, 0.673858642578125, 0.746826171875, 0.819793701171875, 0.89276123046875, 0.965728759765625, 1.0386962890625, 1.111663818359375, 1.18463134765625, 1.257598876953125, 1.33056640625, 1.403533935546875, 1.47650146484375, 1.549468994140625, 1.6224365234375, 1.695404052734375, 1.76837158203125, 1.841339111328125, 1.914306640625, 1.987274169921875, 2.06024169921875, 2.133209228515625, 2.2061767578125, 2.279144287109375, 2.35211181640625, 2.425079345703125, 2.498046875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 4.0, 3.0, 4.0, 1.0, 4.0, 6.0, 10.0, 6.0, 12.0, 7.0, 22.0, 18.0, 16.0, 29.0, 23.0, 16.0, 26.0, 32.0, 37.0, 34.0, 41.0, 39.0, 34.0, 41.0, 45.0, 25.0, 43.0, 42.0, 28.0, 34.0, 35.0, 35.0, 34.0, 30.0, 29.0, 24.0, 22.0, 20.0, 16.0, 14.0, 17.0, 14.0, 10.0, 6.0, 6.0, 7.0, 4.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.03125, -1.962860107421875, -1.89447021484375, -1.826080322265625, -1.7576904296875, -1.689300537109375, -1.62091064453125, -1.552520751953125, -1.484130859375, -1.415740966796875, -1.34735107421875, -1.278961181640625, -1.2105712890625, -1.142181396484375, -1.07379150390625, -1.005401611328125, -0.93701171875, -0.868621826171875, -0.80023193359375, -0.731842041015625, -0.6634521484375, -0.595062255859375, -0.52667236328125, -0.458282470703125, -0.389892578125, -0.321502685546875, -0.25311279296875, -0.184722900390625, -0.1163330078125, -0.047943115234375, 0.02044677734375, 0.088836669921875, 0.1572265625, 0.225616455078125, 0.29400634765625, 0.362396240234375, 0.4307861328125, 0.499176025390625, 0.56756591796875, 0.635955810546875, 0.704345703125, 0.772735595703125, 0.84112548828125, 0.909515380859375, 0.9779052734375, 1.046295166015625, 1.11468505859375, 1.183074951171875, 1.25146484375, 1.319854736328125, 1.38824462890625, 1.456634521484375, 1.5250244140625, 1.593414306640625, 1.66180419921875, 1.730194091796875, 1.798583984375, 1.866973876953125, 1.93536376953125, 2.003753662109375, 2.0721435546875, 2.140533447265625, 2.20892333984375, 2.277313232421875, 2.345703125]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 2.0, 5.0, 8.0, 9.0, 13.0, 15.0, 31.0, 49.0, 103.0, 180.0, 450.0, 1558.0, 11014.0, 981648.0, 49522.0, 2765.0, 642.0, 247.0, 111.0, 70.0, 35.0, 28.0, 10.0, 7.0, 10.0, 8.0, 4.0, 5.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33203125, -6.12567138671875, -5.9193115234375, -5.71295166015625, -5.506591796875, -5.30023193359375, -5.0938720703125, -4.88751220703125, -4.68115234375, -4.47479248046875, -4.2684326171875, -4.06207275390625, -3.855712890625, -3.64935302734375, -3.4429931640625, -3.23663330078125, -3.0302734375, -2.82391357421875, -2.6175537109375, -2.41119384765625, -2.204833984375, -1.99847412109375, -1.7921142578125, -1.58575439453125, -1.37939453125, -1.17303466796875, -0.9666748046875, -0.76031494140625, -0.553955078125, -0.34759521484375, -0.1412353515625, 0.06512451171875, 0.271484375, 0.47784423828125, 0.6842041015625, 0.89056396484375, 1.096923828125, 1.30328369140625, 1.5096435546875, 1.71600341796875, 1.92236328125, 2.12872314453125, 2.3350830078125, 2.54144287109375, 2.747802734375, 2.95416259765625, 3.1605224609375, 3.36688232421875, 3.5732421875, 3.77960205078125, 3.9859619140625, 4.19232177734375, 4.398681640625, 4.60504150390625, 4.8114013671875, 5.01776123046875, 5.22412109375, 5.43048095703125, 5.6368408203125, 5.84320068359375, 6.049560546875, 6.25592041015625, 6.4622802734375, 6.66864013671875, 6.875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 13.0, 35.0, 137.0, 449.0, 243.0, 75.0, 22.0, 12.0, 3.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006699562072753906, -0.0006321743130683899, -0.0005943924188613892, -0.0005566105246543884, -0.0005188286304473877, -0.00048104673624038696, -0.00044326484203338623, -0.0004054829478263855, -0.00036770105361938477, -0.00032991915941238403, -0.0002921372652053833, -0.00025435537099838257, -0.00021657347679138184, -0.0001787915825843811, -0.00014100968837738037, -0.00010322779417037964, -6.54458999633789e-05, -2.7664005756378174e-05, 1.0117888450622559e-05, 4.789978265762329e-05, 8.568167686462402e-05, 0.00012346357107162476, 0.0001612454652786255, 0.00019902735948562622, 0.00023680925369262695, 0.0002745911478996277, 0.0003123730421066284, 0.00035015493631362915, 0.0003879368305206299, 0.0004257187247276306, 0.00046350061893463135, 0.0005012825131416321, 0.0005390644073486328, 0.0005768463015556335, 0.0006146281957626343, 0.000652410089969635, 0.0006901919841766357, 0.0007279738783836365, 0.0007657557725906372, 0.0008035376667976379, 0.0008413195610046387, 0.0008791014552116394, 0.0009168833494186401, 0.0009546652436256409, 0.0009924471378326416, 0.0010302290320396423, 0.001068010926246643, 0.0011057928204536438, 0.0011435747146606445, 0.0011813566088676453, 0.001219138503074646, 0.0012569203972816467, 0.0012947022914886475, 0.0013324841856956482, 0.001370266079902649, 0.0014080479741096497, 0.0014458298683166504, 0.0014836117625236511, 0.0015213936567306519, 0.0015591755509376526, 0.0015969574451446533, 0.001634739339351654, 0.0016725212335586548, 0.0017103031277656555, 0.0017480850219726562]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 7.0, 10.0, 13.0, 22.0, 32.0, 56.0, 84.0, 172.0, 404.0, 1626.0, 16409.0, 997679.0, 29120.0, 2072.0, 429.0, 154.0, 87.0, 54.0, 38.0, 22.0, 15.0, 19.0, 16.0, 4.0, 5.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.515625, -7.32568359375, -7.1357421875, -6.94580078125, -6.755859375, -6.56591796875, -6.3759765625, -6.18603515625, -5.99609375, -5.80615234375, -5.6162109375, -5.42626953125, -5.236328125, -5.04638671875, -4.8564453125, -4.66650390625, -4.4765625, -4.28662109375, -4.0966796875, -3.90673828125, -3.716796875, -3.52685546875, -3.3369140625, -3.14697265625, -2.95703125, -2.76708984375, -2.5771484375, -2.38720703125, -2.197265625, -2.00732421875, -1.8173828125, -1.62744140625, -1.4375, -1.24755859375, -1.0576171875, -0.86767578125, -0.677734375, -0.48779296875, -0.2978515625, -0.10791015625, 0.08203125, 0.27197265625, 0.4619140625, 0.65185546875, 0.841796875, 1.03173828125, 1.2216796875, 1.41162109375, 1.6015625, 1.79150390625, 1.9814453125, 2.17138671875, 2.361328125, 2.55126953125, 2.7412109375, 2.93115234375, 3.12109375, 3.31103515625, 3.5009765625, 3.69091796875, 3.880859375, 4.07080078125, 4.2607421875, 4.45068359375, 4.640625]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 6.0, 5.0, 13.0, 14.0, 31.0, 61.0, 116.0, 184.0, 228.0, 176.0, 93.0, 41.0, 22.0, 4.0, 7.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.384765625, -1.304840087890625, -1.22491455078125, -1.144989013671875, -1.0650634765625, -0.985137939453125, -0.90521240234375, -0.825286865234375, -0.745361328125, -0.665435791015625, -0.58551025390625, -0.505584716796875, -0.4256591796875, -0.345733642578125, -0.26580810546875, -0.185882568359375, -0.10595703125, -0.026031494140625, 0.05389404296875, 0.133819580078125, 0.2137451171875, 0.293670654296875, 0.37359619140625, 0.453521728515625, 0.533447265625, 0.613372802734375, 0.69329833984375, 0.773223876953125, 0.8531494140625, 0.933074951171875, 1.01300048828125, 1.092926025390625, 1.1728515625, 1.252777099609375, 1.33270263671875, 1.412628173828125, 1.4925537109375, 1.572479248046875, 1.65240478515625, 1.732330322265625, 1.812255859375, 1.892181396484375, 1.97210693359375, 2.052032470703125, 2.1319580078125, 2.211883544921875, 2.29180908203125, 2.371734619140625, 2.45166015625, 2.531585693359375, 2.61151123046875, 2.691436767578125, 2.7713623046875, 2.851287841796875, 2.93121337890625, 3.011138916015625, 3.091064453125, 3.170989990234375, 3.25091552734375, 3.330841064453125, 3.4107666015625, 3.490692138671875, 3.57061767578125, 3.650543212890625, 3.73046875]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 12.0, 21.0, 47.0, 157.0, 274.0, 253.0, 155.0, 57.0, 18.0, 7.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.04229736328125, -25.812040328979492, -24.581783294677734, -23.351524353027344, -22.121267318725586, -20.891010284423828, -19.660751342773438, -18.43049430847168, -17.200237274169922, -15.969980239868164, -14.73972225189209, -13.509464263916016, -12.279207229614258, -11.0489501953125, -9.818692207336426, -8.588434219360352, -7.358177185058594, -6.127919673919678, -4.897662162780762, -3.6674046516418457, -2.4371471405029297, -1.2068896293640137, 0.023367881774902344, 1.2536258697509766, 2.4838829040527344, 3.7141404151916504, 4.944397926330566, 6.174655437469482, 7.404912948608398, 8.635169982910156, 9.86542797088623, 11.095685958862305, 12.325946807861328, 13.556203842163086, 14.78646183013916, 16.016719818115234, 17.246976852416992, 18.47723388671875, 19.70749282836914, 20.9377498626709, 22.168006896972656, 23.398263931274414, 24.628520965576172, 25.858779907226562, 27.08903694152832, 28.319293975830078, 29.54955291748047, 30.779809951782227, 32.010066986083984, 33.240325927734375, 34.4705810546875, 35.70083999633789, 36.93109893798828, 38.161354064941406, 39.3916130065918, 40.62187194824219, 41.85212707519531, 43.0823860168457, 44.31264114379883, 45.54290008544922, 46.773155212402344, 48.003414154052734, 49.233673095703125, 50.46392822265625, 51.69418716430664]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 6.0, 2.0, 5.0, 7.0, 8.0, 4.0, 6.0, 14.0, 8.0, 20.0, 15.0, 22.0, 21.0, 36.0, 20.0, 38.0, 39.0, 40.0, 33.0, 37.0, 36.0, 41.0, 42.0, 44.0, 41.0, 46.0, 29.0, 33.0, 26.0, 35.0, 25.0, 26.0, 33.0, 30.0, 25.0, 16.0, 12.0, 16.0, 19.0, 15.0, 9.0, 5.0, 5.0, 7.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-13.662406921386719, -13.273585319519043, -12.88476276397705, -12.495941162109375, -12.1071195602417, -11.718297958374023, -11.329475402832031, -10.940653800964355, -10.55183219909668, -10.163010597229004, -9.774188041687012, -9.385366439819336, -8.99654483795166, -8.607723236083984, -8.218900680541992, -7.830079078674316, -7.441256523132324, -7.05243444442749, -6.6636128425598145, -6.2747907638549805, -5.885969161987305, -5.497147083282471, -5.108325004577637, -4.719503402709961, -4.330681324005127, -3.941859483718872, -3.553037643432617, -3.164215564727783, -2.7753937244415283, -2.3865718841552734, -1.9977498054504395, -1.6089279651641846, -1.2201061248779297, -0.83128422498703, -0.44246232509613037, -0.05364036560058594, 0.33518147468566895, 0.7240033149719238, 1.1128253936767578, 1.5016472339630127, 1.8904690742492676, 2.2792909145355225, 2.6681127548217773, 3.0569348335266113, 3.445756673812866, 3.834578514099121, 4.223400592803955, 4.612222671508789, 5.001044273376465, 5.389866352081299, 5.778687953948975, 6.167510032653809, 6.556331634521484, 6.945153713226318, 7.333975791931152, 7.722797393798828, 8.11161994934082, 8.500441551208496, 8.889264106750488, 9.278085708618164, 9.66690731048584, 10.055728912353516, 10.444551467895508, 10.833373069763184, 11.22219467163086]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 5.0, 2.0, 3.0, 6.0, 8.0, 12.0, 18.0, 26.0, 33.0, 49.0, 104.0, 126.0, 194.0, 447.0, 1062.0, 2758.0, 11342.0, 132594.0, 3977784.0, 57063.0, 7170.0, 2016.0, 727.0, 324.0, 176.0, 90.0, 53.0, 32.0, 18.0, 17.0, 6.0, 3.0, 8.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-9.8671875, -9.62274169921875, -9.3782958984375, -9.13385009765625, -8.889404296875, -8.64495849609375, -8.4005126953125, -8.15606689453125, -7.91162109375, -7.66717529296875, -7.4227294921875, -7.17828369140625, -6.933837890625, -6.68939208984375, -6.4449462890625, -6.20050048828125, -5.9560546875, -5.71160888671875, -5.4671630859375, -5.22271728515625, -4.978271484375, -4.73382568359375, -4.4893798828125, -4.24493408203125, -4.00048828125, -3.75604248046875, -3.5115966796875, -3.26715087890625, -3.022705078125, -2.77825927734375, -2.5338134765625, -2.28936767578125, -2.044921875, -1.80047607421875, -1.5560302734375, -1.31158447265625, -1.067138671875, -0.82269287109375, -0.5782470703125, -0.33380126953125, -0.08935546875, 0.15509033203125, 0.3995361328125, 0.64398193359375, 0.888427734375, 1.13287353515625, 1.3773193359375, 1.62176513671875, 1.8662109375, 2.11065673828125, 2.3551025390625, 2.59954833984375, 2.843994140625, 3.08843994140625, 3.3328857421875, 3.57733154296875, 3.82177734375, 4.06622314453125, 4.3106689453125, 4.55511474609375, 4.799560546875, 5.04400634765625, 5.2884521484375, 5.53289794921875, 5.77734375]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 6.0, 11.0, 18.0, 21.0, 37.0, 43.0, 47.0, 66.0, 75.0, 99.0, 69.0, 110.0, 98.0, 77.0, 65.0, 51.0, 43.0, 22.0, 11.0, 14.0, 9.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.7119140625, -1.671844482421875, -1.63177490234375, -1.591705322265625, -1.5516357421875, -1.511566162109375, -1.47149658203125, -1.431427001953125, -1.391357421875, -1.351287841796875, -1.31121826171875, -1.271148681640625, -1.2310791015625, -1.191009521484375, -1.15093994140625, -1.110870361328125, -1.07080078125, -1.030731201171875, -0.99066162109375, -0.950592041015625, -0.9105224609375, -0.870452880859375, -0.83038330078125, -0.790313720703125, -0.750244140625, -0.710174560546875, -0.67010498046875, -0.630035400390625, -0.5899658203125, -0.549896240234375, -0.50982666015625, -0.469757080078125, -0.4296875, -0.389617919921875, -0.34954833984375, -0.309478759765625, -0.2694091796875, -0.229339599609375, -0.18927001953125, -0.149200439453125, -0.109130859375, -0.069061279296875, -0.02899169921875, 0.011077880859375, 0.0511474609375, 0.091217041015625, 0.13128662109375, 0.171356201171875, 0.21142578125, 0.251495361328125, 0.29156494140625, 0.331634521484375, 0.3717041015625, 0.411773681640625, 0.45184326171875, 0.491912841796875, 0.531982421875, 0.572052001953125, 0.61212158203125, 0.652191162109375, 0.6922607421875, 0.732330322265625, 0.77239990234375, 0.812469482421875, 0.8525390625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 3.0, 4.0, 7.0, 20.0, 26.0, 31.0, 39.0, 79.0, 142.0, 259.0, 546.0, 1655.0, 6683.0, 45476.0, 3299172.0, 803336.0, 29295.0, 5169.0, 1403.0, 474.0, 199.0, 99.0, 62.0, 39.0, 31.0, 17.0, 5.0, 7.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.72265625, -5.5374755859375, -5.352294921875, -5.1671142578125, -4.98193359375, -4.7967529296875, -4.611572265625, -4.4263916015625, -4.2412109375, -4.0560302734375, -3.870849609375, -3.6856689453125, -3.50048828125, -3.3153076171875, -3.130126953125, -2.9449462890625, -2.759765625, -2.5745849609375, -2.389404296875, -2.2042236328125, -2.01904296875, -1.8338623046875, -1.648681640625, -1.4635009765625, -1.2783203125, -1.0931396484375, -0.907958984375, -0.7227783203125, -0.53759765625, -0.3524169921875, -0.167236328125, 0.0179443359375, 0.203125, 0.3883056640625, 0.573486328125, 0.7586669921875, 0.94384765625, 1.1290283203125, 1.314208984375, 1.4993896484375, 1.6845703125, 1.8697509765625, 2.054931640625, 2.2401123046875, 2.42529296875, 2.6104736328125, 2.795654296875, 2.9808349609375, 3.166015625, 3.3511962890625, 3.536376953125, 3.7215576171875, 3.90673828125, 4.0919189453125, 4.277099609375, 4.4622802734375, 4.6474609375, 4.8326416015625, 5.017822265625, 5.2030029296875, 5.38818359375, 5.5733642578125, 5.758544921875, 5.9437255859375, 6.12890625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 2.0, 5.0, 12.0, 11.0, 20.0, 16.0, 27.0, 47.0, 73.0, 121.0, 242.0, 730.0, 1768.0, 494.0, 213.0, 116.0, 55.0, 34.0, 19.0, 12.0, 10.0, 11.0, 12.0, 3.0, 5.0, 3.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.794921875, -1.7426605224609375, -1.690399169921875, -1.6381378173828125, -1.58587646484375, -1.5336151123046875, -1.481353759765625, -1.4290924072265625, -1.3768310546875, -1.3245697021484375, -1.272308349609375, -1.2200469970703125, -1.16778564453125, -1.1155242919921875, -1.063262939453125, -1.0110015869140625, -0.958740234375, -0.9064788818359375, -0.854217529296875, -0.8019561767578125, -0.74969482421875, -0.6974334716796875, -0.645172119140625, -0.5929107666015625, -0.5406494140625, -0.4883880615234375, -0.436126708984375, -0.3838653564453125, -0.33160400390625, -0.2793426513671875, -0.227081298828125, -0.1748199462890625, -0.12255859375, -0.0702972412109375, -0.018035888671875, 0.0342254638671875, 0.08648681640625, 0.1387481689453125, 0.191009521484375, 0.2432708740234375, 0.2955322265625, 0.3477935791015625, 0.400054931640625, 0.4523162841796875, 0.50457763671875, 0.5568389892578125, 0.609100341796875, 0.6613616943359375, 0.713623046875, 0.7658843994140625, 0.818145751953125, 0.8704071044921875, 0.92266845703125, 0.9749298095703125, 1.027191162109375, 1.0794525146484375, 1.1317138671875, 1.1839752197265625, 1.236236572265625, 1.2884979248046875, 1.34075927734375, 1.3930206298828125, 1.445281982421875, 1.4975433349609375, 1.5498046875]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 2.0, 4.0, 9.0, 28.0, 83.0, 150.0, 275.0, 227.0, 126.0, 48.0, 26.0, 17.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.287155151367188, -18.804149627685547, -18.321142196655273, -17.838134765625, -17.35512924194336, -16.87212371826172, -16.389116287231445, -15.906109809875488, -15.423103332519531, -14.940096855163574, -14.457090377807617, -13.97408390045166, -13.491077423095703, -13.008070945739746, -12.525064468383789, -12.042057991027832, -11.559051513671875, -11.076045036315918, -10.593038558959961, -10.110032081604004, -9.627025604248047, -9.14401912689209, -8.661012649536133, -8.178006172180176, -7.694999694824219, -7.211993217468262, -6.728986740112305, -6.245980262756348, -5.762973785400391, -5.279967308044434, -4.796960830688477, -4.3139543533325195, -3.830946922302246, -3.347940444946289, -2.864933967590332, -2.381927490234375, -1.898921012878418, -1.415914535522461, -0.9329080581665039, -0.4499015808105469, 0.033104896545410156, 0.5161113739013672, 0.9991178512573242, 1.4821243286132812, 1.9651308059692383, 2.4481372833251953, 2.9311437606811523, 3.4141502380371094, 3.8971567153930664, 4.380163192749023, 4.8631696701049805, 5.3461761474609375, 5.8291826248168945, 6.312189102172852, 6.795195579528809, 7.278202056884766, 7.761208534240723, 8.24421501159668, 8.727221488952637, 9.210227966308594, 9.69323444366455, 10.176240921020508, 10.659247398376465, 11.142253875732422, 11.625260353088379]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 4.0, 7.0, 10.0, 9.0, 9.0, 14.0, 8.0, 29.0, 29.0, 25.0, 34.0, 39.0, 33.0, 35.0, 61.0, 42.0, 37.0, 54.0, 40.0, 36.0, 54.0, 44.0, 59.0, 41.0, 31.0, 30.0, 40.0, 34.0, 21.0, 23.0, 13.0, 10.0, 6.0, 10.0, 7.0, 7.0, 6.0, 5.0, 5.0, 2.0, 4.0, 6.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.219407081604004, -5.058233261108398, -4.897059440612793, -4.735886096954346, -4.57471227645874, -4.413538455963135, -4.2523651123046875, -4.091191291809082, -3.9300174713134766, -3.768843650817871, -3.6076700687408447, -3.4464964866638184, -3.285322666168213, -3.1241488456726074, -2.962975263595581, -2.8018016815185547, -2.640627861022949, -2.4794540405273438, -2.3182804584503174, -2.157106876373291, -1.9959330558776855, -1.8347593545913696, -1.6735856533050537, -1.5124119520187378, -1.3512382507324219, -1.190064549446106, -1.02889084815979, -0.8677171468734741, -0.7065434455871582, -0.5453697443008423, -0.38419604301452637, -0.22302234172821045, -0.061849117279052734, 0.09932458400726318, 0.2604982852935791, 0.421671986579895, 0.5828456878662109, 0.7440193891525269, 0.9051930904388428, 1.0663667917251587, 1.2275404930114746, 1.3887141942977905, 1.5498878955841064, 1.7110615968704224, 1.8722352981567383, 2.0334091186523438, 2.19458270072937, 2.3557562828063965, 2.516930103302002, 2.6781039237976074, 2.839277505874634, 3.00045108795166, 3.1616249084472656, 3.322798728942871, 3.4839723110198975, 3.645145893096924, 3.8063197135925293, 3.9674935340881348, 4.128666877746582, 4.2898406982421875, 4.451014518737793, 4.612188339233398, 4.773362159729004, 4.934535503387451, 5.095709323883057]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 2.0, 7.0, 16.0, 15.0, 35.0, 42.0, 69.0, 129.0, 252.0, 615.0, 1739.0, 6324.0, 39882.0, 604331.0, 360631.0, 27096.0, 4881.0, 1392.0, 555.0, 226.0, 127.0, 62.0, 35.0, 20.0, 21.0, 15.0, 9.0, 9.0, 5.0, 5.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.26953125, -6.07305908203125, -5.8765869140625, -5.68011474609375, -5.483642578125, -5.28717041015625, -5.0906982421875, -4.89422607421875, -4.69775390625, -4.50128173828125, -4.3048095703125, -4.10833740234375, -3.911865234375, -3.71539306640625, -3.5189208984375, -3.32244873046875, -3.1259765625, -2.92950439453125, -2.7330322265625, -2.53656005859375, -2.340087890625, -2.14361572265625, -1.9471435546875, -1.75067138671875, -1.55419921875, -1.35772705078125, -1.1612548828125, -0.96478271484375, -0.768310546875, -0.57183837890625, -0.3753662109375, -0.17889404296875, 0.017578125, 0.21405029296875, 0.4105224609375, 0.60699462890625, 0.803466796875, 0.99993896484375, 1.1964111328125, 1.39288330078125, 1.58935546875, 1.78582763671875, 1.9822998046875, 2.17877197265625, 2.375244140625, 2.57171630859375, 2.7681884765625, 2.96466064453125, 3.1611328125, 3.35760498046875, 3.5540771484375, 3.75054931640625, 3.947021484375, 4.14349365234375, 4.3399658203125, 4.53643798828125, 4.73291015625, 4.92938232421875, 5.1258544921875, 5.32232666015625, 5.518798828125, 5.71527099609375, 5.9117431640625, 6.10821533203125, 6.3046875]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 7.0, 7.0, 7.0, 21.0, 23.0, 22.0, 39.0, 63.0, 76.0, 94.0, 90.0, 94.0, 91.0, 79.0, 75.0, 75.0, 59.0, 23.0, 26.0, 14.0, 11.0, 3.0, 4.0, 0.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.796875, -1.7546463012695312, -1.7124176025390625, -1.6701889038085938, -1.627960205078125, -1.5857315063476562, -1.5435028076171875, -1.5012741088867188, -1.45904541015625, -1.4168167114257812, -1.3745880126953125, -1.3323593139648438, -1.290130615234375, -1.2479019165039062, -1.2056732177734375, -1.1634445190429688, -1.1212158203125, -1.0789871215820312, -1.0367584228515625, -0.9945297241210938, -0.952301025390625, -0.9100723266601562, -0.8678436279296875, -0.8256149291992188, -0.78338623046875, -0.7411575317382812, -0.6989288330078125, -0.6567001342773438, -0.614471435546875, -0.5722427368164062, -0.5300140380859375, -0.48778533935546875, -0.445556640625, -0.40332794189453125, -0.3610992431640625, -0.31887054443359375, -0.276641845703125, -0.23441314697265625, -0.1921844482421875, -0.14995574951171875, -0.10772705078125, -0.06549835205078125, -0.0232696533203125, 0.01895904541015625, 0.061187744140625, 0.10341644287109375, 0.1456451416015625, 0.18787384033203125, 0.2301025390625, 0.27233123779296875, 0.3145599365234375, 0.35678863525390625, 0.399017333984375, 0.44124603271484375, 0.4834747314453125, 0.5257034301757812, 0.56793212890625, 0.6101608276367188, 0.6523895263671875, 0.6946182250976562, 0.736846923828125, 0.7790756225585938, 0.8213043212890625, 0.8635330200195312, 0.90576171875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 5.0, 3.0, 5.0, 9.0, 8.0, 17.0, 28.0, 26.0, 35.0, 54.0, 85.0, 99.0, 193.0, 339.0, 781.0, 2118.0, 7405.0, 35451.0, 272763.0, 624784.0, 83813.0, 14298.0, 3692.0, 1285.0, 543.0, 267.0, 146.0, 78.0, 54.0, 38.0, 36.0, 23.0, 18.0, 15.0, 6.0, 7.0, 7.0, 3.0, 5.0, 4.0, 5.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.359375, -3.2403564453125, -3.121337890625, -3.0023193359375, -2.88330078125, -2.7642822265625, -2.645263671875, -2.5262451171875, -2.4072265625, -2.2882080078125, -2.169189453125, -2.0501708984375, -1.93115234375, -1.8121337890625, -1.693115234375, -1.5740966796875, -1.455078125, -1.3360595703125, -1.217041015625, -1.0980224609375, -0.97900390625, -0.8599853515625, -0.740966796875, -0.6219482421875, -0.5029296875, -0.3839111328125, -0.264892578125, -0.1458740234375, -0.02685546875, 0.0921630859375, 0.211181640625, 0.3302001953125, 0.44921875, 0.5682373046875, 0.687255859375, 0.8062744140625, 0.92529296875, 1.0443115234375, 1.163330078125, 1.2823486328125, 1.4013671875, 1.5203857421875, 1.639404296875, 1.7584228515625, 1.87744140625, 1.9964599609375, 2.115478515625, 2.2344970703125, 2.353515625, 2.4725341796875, 2.591552734375, 2.7105712890625, 2.82958984375, 2.9486083984375, 3.067626953125, 3.1866455078125, 3.3056640625, 3.4246826171875, 3.543701171875, 3.6627197265625, 3.78173828125, 3.9007568359375, 4.019775390625, 4.1387939453125, 4.2578125]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 7.0, 9.0, 9.0, 5.0, 10.0, 31.0, 19.0, 24.0, 29.0, 30.0, 32.0, 36.0, 33.0, 33.0, 56.0, 38.0, 41.0, 52.0, 36.0, 56.0, 44.0, 47.0, 45.0, 47.0, 39.0, 36.0, 37.0, 16.0, 16.0, 16.0, 18.0, 12.0, 13.0, 10.0, 2.0, 7.0, 2.0, 4.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.517578125, -2.42388916015625, -2.3302001953125, -2.23651123046875, -2.142822265625, -2.04913330078125, -1.9554443359375, -1.86175537109375, -1.76806640625, -1.67437744140625, -1.5806884765625, -1.48699951171875, -1.393310546875, -1.29962158203125, -1.2059326171875, -1.11224365234375, -1.0185546875, -0.92486572265625, -0.8311767578125, -0.73748779296875, -0.643798828125, -0.55010986328125, -0.4564208984375, -0.36273193359375, -0.26904296875, -0.17535400390625, -0.0816650390625, 0.01202392578125, 0.105712890625, 0.19940185546875, 0.2930908203125, 0.38677978515625, 0.48046875, 0.57415771484375, 0.6678466796875, 0.76153564453125, 0.855224609375, 0.94891357421875, 1.0426025390625, 1.13629150390625, 1.22998046875, 1.32366943359375, 1.4173583984375, 1.51104736328125, 1.604736328125, 1.69842529296875, 1.7921142578125, 1.88580322265625, 1.9794921875, 2.07318115234375, 2.1668701171875, 2.26055908203125, 2.354248046875, 2.44793701171875, 2.5416259765625, 2.63531494140625, 2.72900390625, 2.82269287109375, 2.9163818359375, 3.01007080078125, 3.103759765625, 3.19744873046875, 3.2911376953125, 3.38482666015625, 3.478515625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 5.0, 5.0, 9.0, 7.0, 13.0, 17.0, 24.0, 52.0, 112.0, 232.0, 685.0, 2036.0, 8225.0, 67299.0, 871434.0, 85538.0, 9240.0, 2369.0, 723.0, 262.0, 116.0, 51.0, 23.0, 23.0, 18.0, 6.0, 9.0, 7.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.099609375, -2.028533935546875, -1.95745849609375, -1.886383056640625, -1.8153076171875, -1.744232177734375, -1.67315673828125, -1.602081298828125, -1.531005859375, -1.459930419921875, -1.38885498046875, -1.317779541015625, -1.2467041015625, -1.175628662109375, -1.10455322265625, -1.033477783203125, -0.96240234375, -0.891326904296875, -0.82025146484375, -0.749176025390625, -0.6781005859375, -0.607025146484375, -0.53594970703125, -0.464874267578125, -0.393798828125, -0.322723388671875, -0.25164794921875, -0.180572509765625, -0.1094970703125, -0.038421630859375, 0.03265380859375, 0.103729248046875, 0.1748046875, 0.245880126953125, 0.31695556640625, 0.388031005859375, 0.4591064453125, 0.530181884765625, 0.60125732421875, 0.672332763671875, 0.743408203125, 0.814483642578125, 0.88555908203125, 0.956634521484375, 1.0277099609375, 1.098785400390625, 1.16986083984375, 1.240936279296875, 1.31201171875, 1.383087158203125, 1.45416259765625, 1.525238037109375, 1.5963134765625, 1.667388916015625, 1.73846435546875, 1.809539794921875, 1.880615234375, 1.951690673828125, 2.02276611328125, 2.093841552734375, 2.1649169921875, 2.235992431640625, 2.30706787109375, 2.378143310546875, 2.44921875]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 7.0, 7.0, 10.0, 14.0, 28.0, 29.0, 54.0, 98.0, 181.0, 207.0, 141.0, 80.0, 54.0, 28.0, 17.0, 10.0, 15.0, 7.0, 6.0, 5.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002503395080566406, -0.00023960694670677185, -0.00022887438535690308, -0.0002181418240070343, -0.00020740926265716553, -0.00019667670130729675, -0.00018594413995742798, -0.0001752115786075592, -0.00016447901725769043, -0.00015374645590782166, -0.00014301389455795288, -0.0001322813332080841, -0.00012154877185821533, -0.00011081621050834656, -0.00010008364915847778, -8.935108780860901e-05, -7.861852645874023e-05, -6.788596510887146e-05, -5.7153403759002686e-05, -4.642084240913391e-05, -3.568828105926514e-05, -2.4955719709396362e-05, -1.4223158359527588e-05, -3.4905970096588135e-06, 7.241964340209961e-06, 1.7974525690078735e-05, 2.870708703994751e-05, 3.9439648389816284e-05, 5.017220973968506e-05, 6.090477108955383e-05, 7.163733243942261e-05, 8.236989378929138e-05, 9.310245513916016e-05, 0.00010383501648902893, 0.0001145675778388977, 0.00012530013918876648, 0.00013603270053863525, 0.00014676526188850403, 0.0001574978232383728, 0.00016823038458824158, 0.00017896294593811035, 0.00018969550728797913, 0.0002004280686378479, 0.00021116062998771667, 0.00022189319133758545, 0.00023262575268745422, 0.000243358314037323, 0.00025409087538719177, 0.00026482343673706055, 0.0002755559980869293, 0.0002862885594367981, 0.00029702112078666687, 0.00030775368213653564, 0.0003184862434864044, 0.0003292188048362732, 0.00033995136618614197, 0.00035068392753601074, 0.0003614164888858795, 0.0003721490502357483, 0.00038288161158561707, 0.00039361417293548584, 0.0004043467342853546, 0.0004150792956352234, 0.00042581185698509216, 0.00043654441833496094]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 6.0, 9.0, 1.0, 10.0, 17.0, 14.0, 28.0, 53.0, 91.0, 120.0, 278.0, 612.0, 1851.0, 9024.0, 81355.0, 867634.0, 76208.0, 8227.0, 1759.0, 630.0, 258.0, 137.0, 73.0, 61.0, 36.0, 23.0, 14.0, 14.0, 8.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.9208984375, -1.8534698486328125, -1.786041259765625, -1.7186126708984375, -1.65118408203125, -1.5837554931640625, -1.516326904296875, -1.4488983154296875, -1.3814697265625, -1.3140411376953125, -1.246612548828125, -1.1791839599609375, -1.11175537109375, -1.0443267822265625, -0.976898193359375, -0.9094696044921875, -0.842041015625, -0.7746124267578125, -0.707183837890625, -0.6397552490234375, -0.57232666015625, -0.5048980712890625, -0.437469482421875, -0.3700408935546875, -0.3026123046875, -0.2351837158203125, -0.167755126953125, -0.1003265380859375, -0.03289794921875, 0.0345306396484375, 0.101959228515625, 0.1693878173828125, 0.23681640625, 0.3042449951171875, 0.371673583984375, 0.4391021728515625, 0.50653076171875, 0.5739593505859375, 0.641387939453125, 0.7088165283203125, 0.7762451171875, 0.8436737060546875, 0.911102294921875, 0.9785308837890625, 1.04595947265625, 1.1133880615234375, 1.180816650390625, 1.2482452392578125, 1.315673828125, 1.3831024169921875, 1.450531005859375, 1.5179595947265625, 1.58538818359375, 1.6528167724609375, 1.720245361328125, 1.7876739501953125, 1.8551025390625, 1.9225311279296875, 1.989959716796875, 2.0573883056640625, 2.12481689453125, 2.1922454833984375, 2.259674072265625, 2.3271026611328125, 2.39453125]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 5.0, 3.0, 6.0, 9.0, 3.0, 8.0, 13.0, 20.0, 24.0, 26.0, 29.0, 44.0, 75.0, 116.0, 129.0, 126.0, 111.0, 79.0, 51.0, 27.0, 30.0, 22.0, 15.0, 10.0, 5.0, 3.0, 12.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-1.49609375, -1.4611434936523438, -1.4261932373046875, -1.3912429809570312, -1.356292724609375, -1.3213424682617188, -1.2863922119140625, -1.2514419555664062, -1.21649169921875, -1.1815414428710938, -1.1465911865234375, -1.1116409301757812, -1.076690673828125, -1.0417404174804688, -1.0067901611328125, -0.9718399047851562, -0.9368896484375, -0.9019393920898438, -0.8669891357421875, -0.8320388793945312, -0.797088623046875, -0.7621383666992188, -0.7271881103515625, -0.6922378540039062, -0.65728759765625, -0.6223373413085938, -0.5873870849609375, -0.5524368286132812, -0.517486572265625, -0.48253631591796875, -0.4475860595703125, -0.41263580322265625, -0.377685546875, -0.34273529052734375, -0.3077850341796875, -0.27283477783203125, -0.237884521484375, -0.20293426513671875, -0.1679840087890625, -0.13303375244140625, -0.09808349609375, -0.06313323974609375, -0.0281829833984375, 0.00676727294921875, 0.041717529296875, 0.07666778564453125, 0.1116180419921875, 0.14656829833984375, 0.1815185546875, 0.21646881103515625, 0.2514190673828125, 0.28636932373046875, 0.321319580078125, 0.35626983642578125, 0.3912200927734375, 0.42617034912109375, 0.46112060546875, 0.49607086181640625, 0.5310211181640625, 0.5659713745117188, 0.600921630859375, 0.6358718872070312, 0.6708221435546875, 0.7057723999023438, 0.74072265625]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 0.0, 4.0, 3.0, 29.0, 64.0, 175.0, 275.0, 224.0, 129.0, 65.0, 19.0, 10.0, 5.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.359935760498047, -23.333763122558594, -22.30759048461914, -21.281415939331055, -20.2552433013916, -19.22907066345215, -18.202896118164062, -17.17672348022461, -16.150550842285156, -15.124378204345703, -14.098204612731934, -13.072031021118164, -12.045858383178711, -11.019685745239258, -9.993512153625488, -8.967338562011719, -7.941165924072266, -6.914992809295654, -5.888819694519043, -4.862646579742432, -3.8364734649658203, -2.810300350189209, -1.7841272354125977, -0.7579541206359863, 0.268218994140625, 1.2943921089172363, 2.3205652236938477, 3.346738338470459, 4.37291145324707, 5.399084568023682, 6.425257682800293, 7.451430797576904, 8.477607727050781, 9.503780364990234, 10.529953956604004, 11.556127548217773, 12.582300186157227, 13.60847282409668, 14.63464641571045, 15.660820007324219, 16.686992645263672, 17.713165283203125, 18.739337921142578, 19.765512466430664, 20.791685104370117, 21.81785774230957, 22.844032287597656, 23.87020492553711, 24.896377563476562, 25.922550201416016, 26.94872283935547, 27.974897384643555, 29.001070022583008, 30.02724266052246, 31.053417205810547, 32.07958984375, 33.10576248168945, 34.131935119628906, 35.15810775756836, 36.18428039550781, 37.21045684814453, 38.236629486083984, 39.26280212402344, 40.28897476196289, 41.315147399902344]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 5.0, 1.0, 4.0, 1.0, 4.0, 1.0, 5.0, 5.0, 7.0, 7.0, 11.0, 14.0, 14.0, 17.0, 22.0, 21.0, 16.0, 17.0, 16.0, 28.0, 27.0, 32.0, 27.0, 47.0, 37.0, 52.0, 38.0, 36.0, 36.0, 38.0, 47.0, 37.0, 36.0, 31.0, 25.0, 30.0, 30.0, 34.0, 22.0, 19.0, 18.0, 15.0, 13.0, 13.0, 14.0, 10.0, 6.0, 5.0, 3.0, 1.0, 3.0, 6.0, 1.0, 1.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0], "bins": [-12.554769515991211, -12.159811019897461, -11.764853477478027, -11.369894981384277, -10.974937438964844, -10.579978942871094, -10.18502140045166, -9.79006290435791, -9.395105361938477, -9.000146865844727, -8.605189323425293, -8.210230827331543, -7.815273284912109, -7.420315265655518, -7.025357246398926, -6.630398750305176, -6.235440731048584, -5.840482711791992, -5.4455246925354, -5.050566673278809, -4.655608654022217, -4.260650634765625, -3.865692377090454, -3.4707343578338623, -3.0757763385772705, -2.6808183193206787, -2.285860300064087, -1.8909021615982056, -1.4959441423416138, -1.100986123085022, -0.7060279846191406, -0.31106996536254883, 0.08388805389404297, 0.47884610295295715, 0.8738041520118713, 1.268762230873108, 1.6637202501296997, 2.058678150177002, 2.453636407852173, 2.8485944271087646, 3.2435524463653564, 3.6385104656219482, 4.033468723297119, 4.428426742553711, 4.823384761810303, 5.2183427810668945, 5.613300800323486, 6.008258819580078, 6.40321683883667, 6.798174858093262, 7.1931328773498535, 7.588090896606445, 7.983048915863037, 8.378006935119629, 8.772965431213379, 9.167922973632812, 9.562881469726562, 9.957839965820312, 10.352797508239746, 10.747756004333496, 11.14271354675293, 11.53767204284668, 11.932629585266113, 12.327588081359863, 12.722545623779297]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 4.0, 9.0, 14.0, 8.0, 20.0, 21.0, 29.0, 36.0, 54.0, 117.0, 165.0, 343.0, 687.0, 1796.0, 5623.0, 27338.0, 1009159.0, 3108117.0, 31400.0, 6080.0, 1867.0, 712.0, 293.0, 161.0, 97.0, 49.0, 27.0, 18.0, 13.0, 9.0, 5.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-6.82421875, -6.644775390625, -6.46533203125, -6.285888671875, -6.1064453125, -5.927001953125, -5.74755859375, -5.568115234375, -5.388671875, -5.209228515625, -5.02978515625, -4.850341796875, -4.6708984375, -4.491455078125, -4.31201171875, -4.132568359375, -3.953125, -3.773681640625, -3.59423828125, -3.414794921875, -3.2353515625, -3.055908203125, -2.87646484375, -2.697021484375, -2.517578125, -2.338134765625, -2.15869140625, -1.979248046875, -1.7998046875, -1.620361328125, -1.44091796875, -1.261474609375, -1.08203125, -0.902587890625, -0.72314453125, -0.543701171875, -0.3642578125, -0.184814453125, -0.00537109375, 0.174072265625, 0.353515625, 0.532958984375, 0.71240234375, 0.891845703125, 1.0712890625, 1.250732421875, 1.43017578125, 1.609619140625, 1.7890625, 1.968505859375, 2.14794921875, 2.327392578125, 2.5068359375, 2.686279296875, 2.86572265625, 3.045166015625, 3.224609375, 3.404052734375, 3.58349609375, 3.762939453125, 3.9423828125, 4.121826171875, 4.30126953125, 4.480712890625, 4.66015625]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 9.0, 5.0, 9.0, 18.0, 17.0, 26.0, 38.0, 55.0, 61.0, 76.0, 80.0, 76.0, 81.0, 82.0, 69.0, 76.0, 64.0, 48.0, 43.0, 32.0, 13.0, 9.0, 7.0, 2.0, 6.0, 1.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.8076171875, -1.7658462524414062, -1.7240753173828125, -1.6823043823242188, -1.640533447265625, -1.5987625122070312, -1.5569915771484375, -1.5152206420898438, -1.47344970703125, -1.4316787719726562, -1.3899078369140625, -1.3481369018554688, -1.306365966796875, -1.2645950317382812, -1.2228240966796875, -1.1810531616210938, -1.1392822265625, -1.0975112915039062, -1.0557403564453125, -1.0139694213867188, -0.972198486328125, -0.9304275512695312, -0.8886566162109375, -0.8468856811523438, -0.80511474609375, -0.7633438110351562, -0.7215728759765625, -0.6798019409179688, -0.638031005859375, -0.5962600708007812, -0.5544891357421875, -0.5127182006835938, -0.470947265625, -0.42917633056640625, -0.3874053955078125, -0.34563446044921875, -0.303863525390625, -0.26209259033203125, -0.2203216552734375, -0.17855072021484375, -0.13677978515625, -0.09500885009765625, -0.0532379150390625, -0.01146697998046875, 0.030303955078125, 0.07207489013671875, 0.1138458251953125, 0.15561676025390625, 0.1973876953125, 0.23915863037109375, 0.2809295654296875, 0.32270050048828125, 0.364471435546875, 0.40624237060546875, 0.4480133056640625, 0.48978424072265625, 0.53155517578125, 0.5733261108398438, 0.6150970458984375, 0.6568679809570312, 0.698638916015625, 0.7404098510742188, 0.7821807861328125, 0.8239517211914062, 0.86572265625]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 8.0, 5.0, 5.0, 17.0, 30.0, 49.0, 55.0, 106.0, 148.0, 287.0, 539.0, 1374.0, 4325.0, 25901.0, 3239776.0, 894705.0, 20795.0, 3918.0, 1203.0, 503.0, 234.0, 127.0, 66.0, 45.0, 34.0, 15.0, 6.0, 2.0, 6.0, 3.0, 3.0, 0.0, 1.0, 2.0, 1.0], "bins": [-9.1875, -8.97845458984375, -8.7694091796875, -8.56036376953125, -8.351318359375, -8.14227294921875, -7.9332275390625, -7.72418212890625, -7.51513671875, -7.30609130859375, -7.0970458984375, -6.88800048828125, -6.678955078125, -6.46990966796875, -6.2608642578125, -6.05181884765625, -5.8427734375, -5.63372802734375, -5.4246826171875, -5.21563720703125, -5.006591796875, -4.79754638671875, -4.5885009765625, -4.37945556640625, -4.17041015625, -3.96136474609375, -3.7523193359375, -3.54327392578125, -3.334228515625, -3.12518310546875, -2.9161376953125, -2.70709228515625, -2.498046875, -2.28900146484375, -2.0799560546875, -1.87091064453125, -1.661865234375, -1.45281982421875, -1.2437744140625, -1.03472900390625, -0.82568359375, -0.61663818359375, -0.4075927734375, -0.19854736328125, 0.010498046875, 0.21954345703125, 0.4285888671875, 0.63763427734375, 0.8466796875, 1.05572509765625, 1.2647705078125, 1.47381591796875, 1.682861328125, 1.89190673828125, 2.1009521484375, 2.30999755859375, 2.51904296875, 2.72808837890625, 2.9371337890625, 3.14617919921875, 3.355224609375, 3.56427001953125, 3.7733154296875, 3.98236083984375, 4.19140625]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 7.0, 11.0, 22.0, 21.0, 48.0, 132.0, 482.0, 2773.0, 373.0, 100.0, 43.0, 24.0, 14.0, 11.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.513671875, -3.432830810546875, -3.35198974609375, -3.271148681640625, -3.1903076171875, -3.109466552734375, -3.02862548828125, -2.947784423828125, -2.866943359375, -2.786102294921875, -2.70526123046875, -2.624420166015625, -2.5435791015625, -2.462738037109375, -2.38189697265625, -2.301055908203125, -2.22021484375, -2.139373779296875, -2.05853271484375, -1.977691650390625, -1.8968505859375, -1.816009521484375, -1.73516845703125, -1.654327392578125, -1.573486328125, -1.492645263671875, -1.41180419921875, -1.330963134765625, -1.2501220703125, -1.169281005859375, -1.08843994140625, -1.007598876953125, -0.9267578125, -0.845916748046875, -0.76507568359375, -0.684234619140625, -0.6033935546875, -0.522552490234375, -0.44171142578125, -0.360870361328125, -0.280029296875, -0.199188232421875, -0.11834716796875, -0.037506103515625, 0.0433349609375, 0.124176025390625, 0.20501708984375, 0.285858154296875, 0.36669921875, 0.447540283203125, 0.52838134765625, 0.609222412109375, 0.6900634765625, 0.770904541015625, 0.85174560546875, 0.932586669921875, 1.013427734375, 1.094268798828125, 1.17510986328125, 1.255950927734375, 1.3367919921875, 1.417633056640625, 1.49847412109375, 1.579315185546875, 1.66015625]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 6.0, 9.0, 15.0, 46.0, 129.0, 277.0, 292.0, 139.0, 63.0, 19.0, 6.0, 8.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-26.064916610717773, -25.56854820251465, -25.072181701660156, -24.57581329345703, -24.079444885253906, -23.583078384399414, -23.08670997619629, -22.590341567993164, -22.093975067138672, -21.597606658935547, -21.101240158081055, -20.60487174987793, -20.108503341674805, -19.612136840820312, -19.115768432617188, -18.619400024414062, -18.123031616210938, -17.626663208007812, -17.13029670715332, -16.633928298950195, -16.13755989074707, -15.641193389892578, -15.144824981689453, -14.648457527160645, -14.152090072631836, -13.655722618103027, -13.159354209899902, -12.662986755371094, -12.166619300842285, -11.670251846313477, -11.173883438110352, -10.677515983581543, -10.181148529052734, -9.684781074523926, -9.1884126663208, -8.692045211791992, -8.195677757263184, -7.699309825897217, -7.20294189453125, -6.706574440002441, -6.210206031799316, -5.71383810043335, -5.217470645904541, -4.721102714538574, -4.224735260009766, -3.728367328643799, -3.231999397277832, -2.7356317043304443, -2.2392640113830566, -1.742896318435669, -1.2465285062789917, -0.7501606941223145, -0.25379300117492676, 0.24257469177246094, 0.7389426231384277, 1.2353103160858154, 1.7316780090332031, 2.228045701980591, 2.7244133949279785, 3.2207813262939453, 3.717149019241333, 4.213516712188721, 4.7098846435546875, 5.206252098083496, 5.702620029449463]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 4.0, 7.0, 19.0, 19.0, 34.0, 29.0, 53.0, 78.0, 83.0, 98.0, 110.0, 82.0, 88.0, 73.0, 90.0, 40.0, 36.0, 22.0, 15.0, 13.0, 12.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.387171745300293, -11.07668685913086, -10.766201972961426, -10.455717086791992, -10.145232200622559, -9.834747314453125, -9.524262428283691, -9.213777542114258, -8.903291702270508, -8.592806816101074, -8.28232192993164, -7.971837043762207, -7.661352157592773, -7.35086727142334, -7.040381908416748, -6.7298970222473145, -6.419412612915039, -6.1089277267456055, -5.798442840576172, -5.487957954406738, -5.177473068237305, -4.866988182067871, -4.556502819061279, -4.246017932891846, -3.935533046722412, -3.6250481605529785, -3.314563274383545, -3.0040781497955322, -2.6935932636260986, -2.383108377456665, -2.0726232528686523, -1.7621383666992188, -1.4516544342041016, -1.141169548034668, -0.8306845426559448, -0.5201995968818665, -0.20971465110778809, 0.10077023506164551, 0.41125524044036865, 0.7217402458190918, 1.0322251319885254, 1.342710018157959, 1.6531950235366821, 1.9636800289154053, 2.274164915084839, 2.5846498012542725, 2.895134925842285, 3.2056198120117188, 3.5161046981811523, 3.826589584350586, 4.1370744705200195, 4.447559356689453, 4.758044242858887, 5.06852912902832, 5.379014492034912, 5.689499378204346, 5.999984264373779, 6.310469150543213, 6.6209540367126465, 6.93143892288208, 7.241924285888672, 7.5524091720581055, 7.862894058227539, 8.173378944396973, 8.483863830566406]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 5.0, 12.0, 13.0, 22.0, 48.0, 132.0, 282.0, 1213.0, 10947.0, 625040.0, 401102.0, 8301.0, 939.0, 296.0, 105.0, 44.0, 26.0, 11.0, 9.0, 6.0, 5.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.3515625, -14.0250244140625, -13.698486328125, -13.3719482421875, -13.04541015625, -12.7188720703125, -12.392333984375, -12.0657958984375, -11.7392578125, -11.4127197265625, -11.086181640625, -10.7596435546875, -10.43310546875, -10.1065673828125, -9.780029296875, -9.4534912109375, -9.126953125, -8.8004150390625, -8.473876953125, -8.1473388671875, -7.82080078125, -7.4942626953125, -7.167724609375, -6.8411865234375, -6.5146484375, -6.1881103515625, -5.861572265625, -5.5350341796875, -5.20849609375, -4.8819580078125, -4.555419921875, -4.2288818359375, -3.90234375, -3.5758056640625, -3.249267578125, -2.9227294921875, -2.59619140625, -2.2696533203125, -1.943115234375, -1.6165771484375, -1.2900390625, -0.9635009765625, -0.636962890625, -0.3104248046875, 0.01611328125, 0.3426513671875, 0.669189453125, 0.9957275390625, 1.322265625, 1.6488037109375, 1.975341796875, 2.3018798828125, 2.62841796875, 2.9549560546875, 3.281494140625, 3.6080322265625, 3.9345703125, 4.2611083984375, 4.587646484375, 4.9141845703125, 5.24072265625, 5.5672607421875, 5.893798828125, 6.2203369140625, 6.546875]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 4.0, 5.0, 13.0, 16.0, 14.0, 40.0, 46.0, 63.0, 60.0, 78.0, 78.0, 89.0, 111.0, 73.0, 80.0, 66.0, 61.0, 32.0, 33.0, 17.0, 10.0, 6.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-2.07421875, -2.02777099609375, -1.9813232421875, -1.93487548828125, -1.888427734375, -1.84197998046875, -1.7955322265625, -1.74908447265625, -1.70263671875, -1.65618896484375, -1.6097412109375, -1.56329345703125, -1.516845703125, -1.47039794921875, -1.4239501953125, -1.37750244140625, -1.3310546875, -1.28460693359375, -1.2381591796875, -1.19171142578125, -1.145263671875, -1.09881591796875, -1.0523681640625, -1.00592041015625, -0.95947265625, -0.91302490234375, -0.8665771484375, -0.82012939453125, -0.773681640625, -0.72723388671875, -0.6807861328125, -0.63433837890625, -0.587890625, -0.54144287109375, -0.4949951171875, -0.44854736328125, -0.402099609375, -0.35565185546875, -0.3092041015625, -0.26275634765625, -0.21630859375, -0.16986083984375, -0.1234130859375, -0.07696533203125, -0.030517578125, 0.01593017578125, 0.0623779296875, 0.10882568359375, 0.1552734375, 0.20172119140625, 0.2481689453125, 0.29461669921875, 0.341064453125, 0.38751220703125, 0.4339599609375, 0.48040771484375, 0.52685546875, 0.57330322265625, 0.6197509765625, 0.66619873046875, 0.712646484375, 0.75909423828125, 0.8055419921875, 0.85198974609375, 0.8984375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 6.0, 2.0, 2.0, 7.0, 12.0, 9.0, 15.0, 17.0, 27.0, 36.0, 44.0, 78.0, 99.0, 132.0, 292.0, 559.0, 1538.0, 6038.0, 29710.0, 179908.0, 651940.0, 145598.0, 24655.0, 5150.0, 1363.0, 528.0, 254.0, 172.0, 90.0, 63.0, 48.0, 41.0, 39.0, 26.0, 18.0, 10.0, 11.0, 5.0, 6.0, 5.0, 3.0, 5.0, 1.0, 2.0, 1.0], "bins": [-4.46484375, -4.351654052734375, -4.23846435546875, -4.125274658203125, -4.0120849609375, -3.898895263671875, -3.78570556640625, -3.672515869140625, -3.559326171875, -3.446136474609375, -3.33294677734375, -3.219757080078125, -3.1065673828125, -2.993377685546875, -2.88018798828125, -2.766998291015625, -2.65380859375, -2.540618896484375, -2.42742919921875, -2.314239501953125, -2.2010498046875, -2.087860107421875, -1.97467041015625, -1.861480712890625, -1.748291015625, -1.635101318359375, -1.52191162109375, -1.408721923828125, -1.2955322265625, -1.182342529296875, -1.06915283203125, -0.955963134765625, -0.8427734375, -0.729583740234375, -0.61639404296875, -0.503204345703125, -0.3900146484375, -0.276824951171875, -0.16363525390625, -0.050445556640625, 0.062744140625, 0.175933837890625, 0.28912353515625, 0.402313232421875, 0.5155029296875, 0.628692626953125, 0.74188232421875, 0.855072021484375, 0.96826171875, 1.081451416015625, 1.19464111328125, 1.307830810546875, 1.4210205078125, 1.534210205078125, 1.64739990234375, 1.760589599609375, 1.873779296875, 1.986968994140625, 2.10015869140625, 2.213348388671875, 2.3265380859375, 2.439727783203125, 2.55291748046875, 2.666107177734375, 2.779296875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 0.0, 3.0, 2.0, 2.0, 5.0, 5.0, 9.0, 8.0, 11.0, 15.0, 15.0, 14.0, 17.0, 20.0, 20.0, 26.0, 29.0, 33.0, 34.0, 35.0, 41.0, 44.0, 46.0, 44.0, 46.0, 54.0, 41.0, 34.0, 38.0, 35.0, 43.0, 44.0, 33.0, 27.0, 22.0, 23.0, 11.0, 22.0, 15.0, 9.0, 11.0, 7.0, 5.0, 5.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.259765625, -3.166748046875, -3.07373046875, -2.980712890625, -2.8876953125, -2.794677734375, -2.70166015625, -2.608642578125, -2.515625, -2.422607421875, -2.32958984375, -2.236572265625, -2.1435546875, -2.050537109375, -1.95751953125, -1.864501953125, -1.771484375, -1.678466796875, -1.58544921875, -1.492431640625, -1.3994140625, -1.306396484375, -1.21337890625, -1.120361328125, -1.02734375, -0.934326171875, -0.84130859375, -0.748291015625, -0.6552734375, -0.562255859375, -0.46923828125, -0.376220703125, -0.283203125, -0.190185546875, -0.09716796875, -0.004150390625, 0.0888671875, 0.181884765625, 0.27490234375, 0.367919921875, 0.4609375, 0.553955078125, 0.64697265625, 0.739990234375, 0.8330078125, 0.926025390625, 1.01904296875, 1.112060546875, 1.205078125, 1.298095703125, 1.39111328125, 1.484130859375, 1.5771484375, 1.670166015625, 1.76318359375, 1.856201171875, 1.94921875, 2.042236328125, 2.13525390625, 2.228271484375, 2.3212890625, 2.414306640625, 2.50732421875, 2.600341796875, 2.693359375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, 4.0, 9.0, 10.0, 7.0, 18.0, 23.0, 43.0, 66.0, 122.0, 177.0, 375.0, 810.0, 2081.0, 6570.0, 27282.0, 144842.0, 648490.0, 174832.0, 31316.0, 7423.0, 2271.0, 833.0, 400.0, 216.0, 120.0, 63.0, 40.0, 29.0, 22.0, 18.0, 8.0, 6.0, 6.0, 4.0, 2.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.03125, -0.9995651245117188, -0.9678802490234375, -0.9361953735351562, -0.904510498046875, -0.8728256225585938, -0.8411407470703125, -0.8094558715820312, -0.77777099609375, -0.7460861206054688, -0.7144012451171875, -0.6827163696289062, -0.651031494140625, -0.6193466186523438, -0.5876617431640625, -0.5559768676757812, -0.5242919921875, -0.49260711669921875, -0.4609222412109375, -0.42923736572265625, -0.397552490234375, -0.36586761474609375, -0.3341827392578125, -0.30249786376953125, -0.27081298828125, -0.23912811279296875, -0.2074432373046875, -0.17575836181640625, -0.144073486328125, -0.11238861083984375, -0.0807037353515625, -0.04901885986328125, -0.017333984375, 0.01435089111328125, 0.0460357666015625, 0.07772064208984375, 0.109405517578125, 0.14109039306640625, 0.1727752685546875, 0.20446014404296875, 0.23614501953125, 0.26782989501953125, 0.2995147705078125, 0.33119964599609375, 0.362884521484375, 0.39456939697265625, 0.4262542724609375, 0.45793914794921875, 0.4896240234375, 0.5213088989257812, 0.5529937744140625, 0.5846786499023438, 0.616363525390625, 0.6480484008789062, 0.6797332763671875, 0.7114181518554688, 0.74310302734375, 0.7747879028320312, 0.8064727783203125, 0.8381576538085938, 0.869842529296875, 0.9015274047851562, 0.9332122802734375, 0.9648971557617188, 0.99658203125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 4.0, 1.0, 3.0, 5.0, 4.0, 9.0, 11.0, 9.0, 15.0, 45.0, 55.0, 81.0, 194.0, 193.0, 166.0, 77.0, 53.0, 22.0, 21.0, 8.0, 8.0, 4.0, 6.0, 2.0, 2.0, 2.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.000438690185546875, -0.00042694807052612305, -0.0004152059555053711, -0.00040346384048461914, -0.0003917217254638672, -0.00037997961044311523, -0.0003682374954223633, -0.00035649538040161133, -0.0003447532653808594, -0.0003330111503601074, -0.00032126903533935547, -0.0003095269203186035, -0.00029778480529785156, -0.0002860426902770996, -0.00027430057525634766, -0.0002625584602355957, -0.00025081634521484375, -0.0002390742301940918, -0.00022733211517333984, -0.0002155900001525879, -0.00020384788513183594, -0.00019210577011108398, -0.00018036365509033203, -0.00016862154006958008, -0.00015687942504882812, -0.00014513731002807617, -0.00013339519500732422, -0.00012165307998657227, -0.00010991096496582031, -9.816884994506836e-05, -8.64267349243164e-05, -7.468461990356445e-05, -6.29425048828125e-05, -5.120038986206055e-05, -3.9458274841308594e-05, -2.771615982055664e-05, -1.5974044799804688e-05, -4.231929779052734e-06, 7.510185241699219e-06, 1.9252300262451172e-05, 3.0994415283203125e-05, 4.273653030395508e-05, 5.447864532470703e-05, 6.622076034545898e-05, 7.796287536621094e-05, 8.970499038696289e-05, 0.00010144710540771484, 0.0001131892204284668, 0.00012493133544921875, 0.0001366734504699707, 0.00014841556549072266, 0.0001601576805114746, 0.00017189979553222656, 0.00018364191055297852, 0.00019538402557373047, 0.00020712614059448242, 0.00021886825561523438, 0.00023061037063598633, 0.00024235248565673828, 0.00025409460067749023, 0.0002658367156982422, 0.00027757883071899414, 0.0002893209457397461, 0.00030106306076049805, 0.00031280517578125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 5.0, 5.0, 9.0, 9.0, 11.0, 24.0, 26.0, 40.0, 92.0, 132.0, 227.0, 489.0, 1126.0, 3228.0, 14842.0, 108146.0, 701571.0, 187910.0, 23533.0, 4546.0, 1362.0, 562.0, 277.0, 138.0, 80.0, 52.0, 34.0, 32.0, 11.0, 11.0, 5.0, 7.0, 6.0, 5.0, 5.0, 1.0, 2.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.91748046875, -0.8816757202148438, -0.8458709716796875, -0.8100662231445312, -0.774261474609375, -0.7384567260742188, -0.7026519775390625, -0.6668472290039062, -0.63104248046875, -0.5952377319335938, -0.5594329833984375, -0.5236282348632812, -0.487823486328125, -0.45201873779296875, -0.4162139892578125, -0.38040924072265625, -0.3446044921875, -0.30879974365234375, -0.2729949951171875, -0.23719024658203125, -0.201385498046875, -0.16558074951171875, -0.1297760009765625, -0.09397125244140625, -0.05816650390625, -0.02236175537109375, 0.0134429931640625, 0.04924774169921875, 0.085052490234375, 0.12085723876953125, 0.1566619873046875, 0.19246673583984375, 0.228271484375, 0.26407623291015625, 0.2998809814453125, 0.33568572998046875, 0.371490478515625, 0.40729522705078125, 0.4430999755859375, 0.47890472412109375, 0.51470947265625, 0.5505142211914062, 0.5863189697265625, 0.6221237182617188, 0.657928466796875, 0.6937332153320312, 0.7295379638671875, 0.7653427124023438, 0.8011474609375, 0.8369522094726562, 0.8727569580078125, 0.9085617065429688, 0.944366455078125, 0.9801712036132812, 1.0159759521484375, 1.0517807006835938, 1.08758544921875, 1.1233901977539062, 1.1591949462890625, 1.1949996948242188, 1.230804443359375, 1.2666091918945312, 1.3024139404296875, 1.3382186889648438, 1.3740234375]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 5.0, 8.0, 12.0, 9.0, 20.0, 27.0, 44.0, 57.0, 63.0, 121.0, 145.0, 139.0, 82.0, 78.0, 57.0, 44.0, 30.0, 13.0, 20.0, 12.0, 6.0, 3.0, 4.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.76953125, -0.7410125732421875, -0.712493896484375, -0.6839752197265625, -0.65545654296875, -0.6269378662109375, -0.598419189453125, -0.5699005126953125, -0.5413818359375, -0.5128631591796875, -0.484344482421875, -0.4558258056640625, -0.42730712890625, -0.3987884521484375, -0.370269775390625, -0.3417510986328125, -0.313232421875, -0.2847137451171875, -0.256195068359375, -0.2276763916015625, -0.19915771484375, -0.1706390380859375, -0.142120361328125, -0.1136016845703125, -0.0850830078125, -0.0565643310546875, -0.028045654296875, 0.0004730224609375, 0.02899169921875, 0.0575103759765625, 0.086029052734375, 0.1145477294921875, 0.14306640625, 0.1715850830078125, 0.200103759765625, 0.2286224365234375, 0.25714111328125, 0.2856597900390625, 0.314178466796875, 0.3426971435546875, 0.3712158203125, 0.3997344970703125, 0.428253173828125, 0.4567718505859375, 0.48529052734375, 0.5138092041015625, 0.542327880859375, 0.5708465576171875, 0.599365234375, 0.6278839111328125, 0.656402587890625, 0.6849212646484375, 0.71343994140625, 0.7419586181640625, 0.770477294921875, 0.7989959716796875, 0.8275146484375, 0.8560333251953125, 0.884552001953125, 0.9130706787109375, 0.94158935546875, 0.9701080322265625, 0.998626708984375, 1.0271453857421875, 1.0556640625]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 7.0, 10.0, 23.0, 86.0, 202.0, 283.0, 254.0, 81.0, 44.0, 15.0, 8.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.402024269104004, -10.21059799194336, -9.019170761108398, -7.827744007110596, -6.636317253112793, -5.44489049911499, -4.2534637451171875, -3.062037467956543, -1.870610237121582, -0.6791834831237793, 0.5122432708740234, 1.7036700248718262, 2.895096778869629, 4.086523532867432, 5.277950286865234, 6.469376564025879, 7.66080379486084, 8.852230072021484, 10.043657302856445, 11.235084533691406, 12.42651081085205, 13.617937088012695, 14.809364318847656, 16.000789642333984, 17.192218780517578, 18.38364601135254, 19.5750732421875, 20.766498565673828, 21.95792579650879, 23.14935302734375, 24.340778350830078, 25.53220558166504, 26.7236328125, 27.91506004333496, 29.106487274169922, 30.29791259765625, 31.48933982849121, 32.68076705932617, 33.8721923828125, 35.063621520996094, 36.25504684448242, 37.44647216796875, 38.637901306152344, 39.82932662963867, 41.020751953125, 42.212181091308594, 43.40360641479492, 44.59503173828125, 45.786460876464844, 46.97788619995117, 48.169315338134766, 49.360740661621094, 50.55216979980469, 51.743595123291016, 52.935020446777344, 54.12644958496094, 55.317874908447266, 56.509300231933594, 57.70072937011719, 58.892154693603516, 60.083580017089844, 61.27500915527344, 62.466434478759766, 63.657859802246094, 64.84928894042969]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 4.0, 5.0, 7.0, 6.0, 7.0, 24.0, 8.0, 20.0, 25.0, 25.0, 29.0, 46.0, 27.0, 51.0, 51.0, 46.0, 43.0, 62.0, 55.0, 51.0, 50.0, 47.0, 36.0, 30.0, 45.0, 27.0, 35.0, 29.0, 29.0, 24.0, 12.0, 12.0, 6.0, 8.0, 12.0, 3.0, 5.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.992293357849121, -15.482062339782715, -14.971830368041992, -14.461599349975586, -13.951367378234863, -13.441136360168457, -12.930904388427734, -12.420673370361328, -11.910442352294922, -11.400211334228516, -10.889979362487793, -10.379748344421387, -9.869516372680664, -9.359285354614258, -8.849054336547852, -8.338822364807129, -7.828590393066406, -7.318358898162842, -6.808127403259277, -6.297896385192871, -5.787664413452148, -5.277433395385742, -4.767201900482178, -4.256970405578613, -3.746738910675049, -3.2365074157714844, -2.72627592086792, -2.2160446643829346, -1.7058131694793701, -1.1955816745758057, -0.6853504180908203, -0.17511892318725586, 0.3351116180419922, 0.8453430533409119, 1.3555744886398315, 1.8658058643341064, 2.376037359237671, 2.8862688541412354, 3.3965001106262207, 3.906731605529785, 4.41696310043335, 4.927194595336914, 5.4374260902404785, 5.947657585144043, 6.457888603210449, 6.968120574951172, 7.478351593017578, 7.988583087921143, 8.498814582824707, 9.009045600891113, 9.519277572631836, 10.029508590698242, 10.539740562438965, 11.049971580505371, 11.560203552246094, 12.0704345703125, 12.580665588378906, 13.090896606445312, 13.601128578186035, 14.111359596252441, 14.621591567993164, 15.13182258605957, 15.642053604125977, 16.152286529541016, 16.662517547607422]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 8.0, 9.0, 10.0, 15.0, 19.0, 23.0, 35.0, 46.0, 73.0, 123.0, 175.0, 297.0, 584.0, 1131.0, 2652.0, 7302.0, 30275.0, 510423.0, 3583442.0, 42442.0, 9484.0, 3093.0, 1295.0, 587.0, 319.0, 155.0, 91.0, 48.0, 40.0, 27.0, 21.0, 10.0, 5.0, 4.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0], "bins": [-8.3515625, -8.15313720703125, -7.9547119140625, -7.75628662109375, -7.557861328125, -7.35943603515625, -7.1610107421875, -6.96258544921875, -6.76416015625, -6.56573486328125, -6.3673095703125, -6.16888427734375, -5.970458984375, -5.77203369140625, -5.5736083984375, -5.37518310546875, -5.1767578125, -4.97833251953125, -4.7799072265625, -4.58148193359375, -4.383056640625, -4.18463134765625, -3.9862060546875, -3.78778076171875, -3.58935546875, -3.39093017578125, -3.1925048828125, -2.99407958984375, -2.795654296875, -2.59722900390625, -2.3988037109375, -2.20037841796875, -2.001953125, -1.80352783203125, -1.6051025390625, -1.40667724609375, -1.208251953125, -1.00982666015625, -0.8114013671875, -0.61297607421875, -0.41455078125, -0.21612548828125, -0.0177001953125, 0.18072509765625, 0.379150390625, 0.57757568359375, 0.7760009765625, 0.97442626953125, 1.1728515625, 1.37127685546875, 1.5697021484375, 1.76812744140625, 1.966552734375, 2.16497802734375, 2.3634033203125, 2.56182861328125, 2.76025390625, 2.95867919921875, 3.1571044921875, 3.35552978515625, 3.553955078125, 3.75238037109375, 3.9508056640625, 4.14923095703125, 4.34765625]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 3.0, 4.0, 2.0, 8.0, 13.0, 11.0, 20.0, 22.0, 40.0, 45.0, 71.0, 61.0, 59.0, 74.0, 87.0, 86.0, 80.0, 61.0, 60.0, 58.0, 43.0, 37.0, 22.0, 13.0, 11.0, 9.0, 2.0, 0.0, 2.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-1.873046875, -1.8291244506835938, -1.7852020263671875, -1.7412796020507812, -1.697357177734375, -1.6534347534179688, -1.6095123291015625, -1.5655899047851562, -1.52166748046875, -1.4777450561523438, -1.4338226318359375, -1.3899002075195312, -1.345977783203125, -1.3020553588867188, -1.2581329345703125, -1.2142105102539062, -1.1702880859375, -1.1263656616210938, -1.0824432373046875, -1.0385208129882812, -0.994598388671875, -0.9506759643554688, -0.9067535400390625, -0.8628311157226562, -0.81890869140625, -0.7749862670898438, -0.7310638427734375, -0.6871414184570312, -0.643218994140625, -0.5992965698242188, -0.5553741455078125, -0.5114517211914062, -0.467529296875, -0.42360687255859375, -0.3796844482421875, -0.33576202392578125, -0.291839599609375, -0.24791717529296875, -0.2039947509765625, -0.16007232666015625, -0.11614990234375, -0.07222747802734375, -0.0283050537109375, 0.01561737060546875, 0.059539794921875, 0.10346221923828125, 0.1473846435546875, 0.19130706787109375, 0.2352294921875, 0.27915191650390625, 0.3230743408203125, 0.36699676513671875, 0.410919189453125, 0.45484161376953125, 0.4987640380859375, 0.5426864624023438, 0.58660888671875, 0.6305313110351562, 0.6744537353515625, 0.7183761596679688, 0.762298583984375, 0.8062210083007812, 0.8501434326171875, 0.8940658569335938, 0.93798828125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 2.0, 6.0, 5.0, 11.0, 13.0, 20.0, 29.0, 39.0, 73.0, 119.0, 193.0, 325.0, 650.0, 1493.0, 4748.0, 29092.0, 3659930.0, 472522.0, 18510.0, 3852.0, 1279.0, 611.0, 306.0, 154.0, 124.0, 61.0, 37.0, 23.0, 13.0, 16.0, 7.0, 5.0, 7.0, 8.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.453125, -9.2100830078125, -8.967041015625, -8.7239990234375, -8.48095703125, -8.2379150390625, -7.994873046875, -7.7518310546875, -7.5087890625, -7.2657470703125, -7.022705078125, -6.7796630859375, -6.53662109375, -6.2935791015625, -6.050537109375, -5.8074951171875, -5.564453125, -5.3214111328125, -5.078369140625, -4.8353271484375, -4.59228515625, -4.3492431640625, -4.106201171875, -3.8631591796875, -3.6201171875, -3.3770751953125, -3.134033203125, -2.8909912109375, -2.64794921875, -2.4049072265625, -2.161865234375, -1.9188232421875, -1.67578125, -1.4327392578125, -1.189697265625, -0.9466552734375, -0.70361328125, -0.4605712890625, -0.217529296875, 0.0255126953125, 0.2685546875, 0.5115966796875, 0.754638671875, 0.9976806640625, 1.24072265625, 1.4837646484375, 1.726806640625, 1.9698486328125, 2.212890625, 2.4559326171875, 2.698974609375, 2.9420166015625, 3.18505859375, 3.4281005859375, 3.671142578125, 3.9141845703125, 4.1572265625, 4.4002685546875, 4.643310546875, 4.8863525390625, 5.12939453125, 5.3724365234375, 5.615478515625, 5.8585205078125, 6.1015625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 7.0, 5.0, 9.0, 21.0, 55.0, 109.0, 630.0, 2856.0, 207.0, 83.0, 32.0, 26.0, 14.0, 5.0, 8.0, 3.0, 4.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.046875, -3.943695068359375, -3.84051513671875, -3.737335205078125, -3.6341552734375, -3.530975341796875, -3.42779541015625, -3.324615478515625, -3.221435546875, -3.118255615234375, -3.01507568359375, -2.911895751953125, -2.8087158203125, -2.705535888671875, -2.60235595703125, -2.499176025390625, -2.39599609375, -2.292816162109375, -2.18963623046875, -2.086456298828125, -1.9832763671875, -1.880096435546875, -1.77691650390625, -1.673736572265625, -1.570556640625, -1.467376708984375, -1.36419677734375, -1.261016845703125, -1.1578369140625, -1.054656982421875, -0.95147705078125, -0.848297119140625, -0.7451171875, -0.641937255859375, -0.53875732421875, -0.435577392578125, -0.3323974609375, -0.229217529296875, -0.12603759765625, -0.022857666015625, 0.080322265625, 0.183502197265625, 0.28668212890625, 0.389862060546875, 0.4930419921875, 0.596221923828125, 0.69940185546875, 0.802581787109375, 0.90576171875, 1.008941650390625, 1.11212158203125, 1.215301513671875, 1.3184814453125, 1.421661376953125, 1.52484130859375, 1.628021240234375, 1.731201171875, 1.834381103515625, 1.93756103515625, 2.040740966796875, 2.1439208984375, 2.247100830078125, 2.35028076171875, 2.453460693359375, 2.556640625]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 9.0, 66.0, 717.0, 188.0, 25.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.32317352294922, -15.828619956970215, -14.334067344665527, -12.839513778686523, -11.344961166381836, -9.850407600402832, -8.355854034423828, -6.861301422119141, -5.366747856140137, -3.872194766998291, -2.377641439437866, -0.8830881118774414, 0.6114649772644043, 2.10601806640625, 3.600571632385254, 5.095124244689941, 6.589677810668945, 8.08423137664795, 9.578783988952637, 11.07333755493164, 12.567890167236328, 14.062443733215332, 15.556997299194336, 17.051549911499023, 18.546104431152344, 20.04065704345703, 21.53521156311035, 23.02976417541504, 24.524316787719727, 26.018871307373047, 27.513423919677734, 29.007976531982422, 30.50252914428711, 31.997081756591797, 33.491634368896484, 34.98619079589844, 36.480743408203125, 37.97529602050781, 39.4698486328125, 40.96440124511719, 42.458953857421875, 43.95350646972656, 45.44805908203125, 46.94261169433594, 48.43716812133789, 49.93172073364258, 51.426273345947266, 52.92082595825195, 54.415382385253906, 55.909934997558594, 57.40448760986328, 58.89904022216797, 60.39359664916992, 61.88814926147461, 63.3827018737793, 64.87725830078125, 66.3718032836914, 67.8663558959961, 69.36090850830078, 70.85546112060547, 72.35001373291016, 73.84456634521484, 75.33912658691406, 76.83367919921875, 78.32823181152344]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 7.0, 2.0, 5.0, 14.0, 20.0, 23.0, 26.0, 46.0, 48.0, 62.0, 55.0, 66.0, 89.0, 67.0, 67.0, 73.0, 69.0, 57.0, 49.0, 54.0, 25.0, 25.0, 13.0, 19.0, 6.0, 8.0, 6.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.762116432189941, -7.51392126083374, -7.265726089477539, -7.01753044128418, -6.7693352699279785, -6.521140098571777, -6.272944927215576, -6.024749755859375, -5.776554107666016, -5.5283589363098145, -5.280163764953613, -5.031968116760254, -4.783772945404053, -4.535577774047852, -4.28738260269165, -4.039187431335449, -3.790992021560669, -3.5427968502044678, -3.2946014404296875, -3.0464062690734863, -2.798210859298706, -2.550015687942505, -2.3018202781677246, -2.0536251068115234, -1.8054298162460327, -1.557234525680542, -1.3090392351150513, -1.0608439445495605, -0.8126487135887146, -0.5644534826278687, -0.31625819206237793, -0.06806290149688721, 0.18013238906860352, 0.42832767963409424, 0.676522970199585, 0.9247182011604309, 1.1729135513305664, 1.4211087226867676, 1.6693040132522583, 1.917499303817749, 2.1656947135925293, 2.4138898849487305, 2.6620852947235107, 2.910280466079712, 3.158475875854492, 3.4066710472106934, 3.6548662185668945, 3.903061628341675, 4.151256561279297, 4.399451732635498, 4.647646903991699, 4.895842552185059, 5.14403772354126, 5.392232894897461, 5.640428066253662, 5.888623237609863, 6.136818885803223, 6.385014057159424, 6.633209228515625, 6.881404876708984, 7.1296000480651855, 7.377795219421387, 7.625990390777588, 7.874185562133789, 8.122381210327148]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 5.0, 22.0, 23.0, 30.0, 36.0, 116.0, 198.0, 382.0, 808.0, 2350.0, 10356.0, 73634.0, 592878.0, 323461.0, 35370.0, 6009.0, 1616.0, 612.0, 279.0, 153.0, 73.0, 50.0, 34.0, 15.0, 11.0, 9.0, 5.0, 5.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.89453125, -4.70501708984375, -4.5155029296875, -4.32598876953125, -4.136474609375, -3.94696044921875, -3.7574462890625, -3.56793212890625, -3.37841796875, -3.18890380859375, -2.9993896484375, -2.80987548828125, -2.620361328125, -2.43084716796875, -2.2413330078125, -2.05181884765625, -1.8623046875, -1.67279052734375, -1.4832763671875, -1.29376220703125, -1.104248046875, -0.91473388671875, -0.7252197265625, -0.53570556640625, -0.34619140625, -0.15667724609375, 0.0328369140625, 0.22235107421875, 0.411865234375, 0.60137939453125, 0.7908935546875, 0.98040771484375, 1.169921875, 1.35943603515625, 1.5489501953125, 1.73846435546875, 1.927978515625, 2.11749267578125, 2.3070068359375, 2.49652099609375, 2.68603515625, 2.87554931640625, 3.0650634765625, 3.25457763671875, 3.444091796875, 3.63360595703125, 3.8231201171875, 4.01263427734375, 4.2021484375, 4.39166259765625, 4.5811767578125, 4.77069091796875, 4.960205078125, 5.14971923828125, 5.3392333984375, 5.52874755859375, 5.71826171875, 5.90777587890625, 6.0972900390625, 6.28680419921875, 6.476318359375, 6.66583251953125, 6.8553466796875, 7.04486083984375, 7.234375]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 3.0, 6.0, 10.0, 16.0, 19.0, 20.0, 28.0, 36.0, 49.0, 70.0, 82.0, 76.0, 86.0, 65.0, 89.0, 86.0, 63.0, 50.0, 43.0, 37.0, 21.0, 16.0, 11.0, 10.0, 2.0, 2.0, 1.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.9326171875, -1.88653564453125, -1.8404541015625, -1.79437255859375, -1.748291015625, -1.70220947265625, -1.6561279296875, -1.61004638671875, -1.56396484375, -1.51788330078125, -1.4718017578125, -1.42572021484375, -1.379638671875, -1.33355712890625, -1.2874755859375, -1.24139404296875, -1.1953125, -1.14923095703125, -1.1031494140625, -1.05706787109375, -1.010986328125, -0.96490478515625, -0.9188232421875, -0.87274169921875, -0.82666015625, -0.78057861328125, -0.7344970703125, -0.68841552734375, -0.642333984375, -0.59625244140625, -0.5501708984375, -0.50408935546875, -0.4580078125, -0.41192626953125, -0.3658447265625, -0.31976318359375, -0.273681640625, -0.22760009765625, -0.1815185546875, -0.13543701171875, -0.08935546875, -0.04327392578125, 0.0028076171875, 0.04888916015625, 0.094970703125, 0.14105224609375, 0.1871337890625, 0.23321533203125, 0.279296875, 0.32537841796875, 0.3714599609375, 0.41754150390625, 0.463623046875, 0.50970458984375, 0.5557861328125, 0.60186767578125, 0.64794921875, 0.69403076171875, 0.7401123046875, 0.78619384765625, 0.832275390625, 0.87835693359375, 0.9244384765625, 0.97052001953125, 1.0166015625]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 7.0, 4.0, 9.0, 10.0, 12.0, 19.0, 38.0, 66.0, 104.0, 193.0, 440.0, 974.0, 2976.0, 12632.0, 82757.0, 544433.0, 346704.0, 45649.0, 7832.0, 2081.0, 746.0, 376.0, 218.0, 108.0, 57.0, 36.0, 23.0, 16.0, 7.0, 8.0, 7.0, 5.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.5234375, -4.3857421875, -4.248046875, -4.1103515625, -3.97265625, -3.8349609375, -3.697265625, -3.5595703125, -3.421875, -3.2841796875, -3.146484375, -3.0087890625, -2.87109375, -2.7333984375, -2.595703125, -2.4580078125, -2.3203125, -2.1826171875, -2.044921875, -1.9072265625, -1.76953125, -1.6318359375, -1.494140625, -1.3564453125, -1.21875, -1.0810546875, -0.943359375, -0.8056640625, -0.66796875, -0.5302734375, -0.392578125, -0.2548828125, -0.1171875, 0.0205078125, 0.158203125, 0.2958984375, 0.43359375, 0.5712890625, 0.708984375, 0.8466796875, 0.984375, 1.1220703125, 1.259765625, 1.3974609375, 1.53515625, 1.6728515625, 1.810546875, 1.9482421875, 2.0859375, 2.2236328125, 2.361328125, 2.4990234375, 2.63671875, 2.7744140625, 2.912109375, 3.0498046875, 3.1875, 3.3251953125, 3.462890625, 3.6005859375, 3.73828125, 3.8759765625, 4.013671875, 4.1513671875, 4.2890625]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 3.0, 3.0, 8.0, 7.0, 8.0, 14.0, 14.0, 28.0, 30.0, 32.0, 33.0, 49.0, 49.0, 64.0, 47.0, 60.0, 72.0, 70.0, 66.0, 57.0, 53.0, 38.0, 41.0, 38.0, 33.0, 21.0, 19.0, 12.0, 8.0, 9.0, 3.0, 6.0, 4.0, 5.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.265625, -4.0994873046875, -3.933349609375, -3.7672119140625, -3.60107421875, -3.4349365234375, -3.268798828125, -3.1026611328125, -2.9365234375, -2.7703857421875, -2.604248046875, -2.4381103515625, -2.27197265625, -2.1058349609375, -1.939697265625, -1.7735595703125, -1.607421875, -1.4412841796875, -1.275146484375, -1.1090087890625, -0.94287109375, -0.7767333984375, -0.610595703125, -0.4444580078125, -0.2783203125, -0.1121826171875, 0.053955078125, 0.2200927734375, 0.38623046875, 0.5523681640625, 0.718505859375, 0.8846435546875, 1.05078125, 1.2169189453125, 1.383056640625, 1.5491943359375, 1.71533203125, 1.8814697265625, 2.047607421875, 2.2137451171875, 2.3798828125, 2.5460205078125, 2.712158203125, 2.8782958984375, 3.04443359375, 3.2105712890625, 3.376708984375, 3.5428466796875, 3.708984375, 3.8751220703125, 4.041259765625, 4.2073974609375, 4.37353515625, 4.5396728515625, 4.705810546875, 4.8719482421875, 5.0380859375, 5.2042236328125, 5.370361328125, 5.5364990234375, 5.70263671875, 5.8687744140625, 6.034912109375, 6.2010498046875, 6.3671875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 6.0, 8.0, 8.0, 24.0, 60.0, 140.0, 392.0, 1184.0, 8490.0, 544926.0, 483666.0, 7849.0, 1195.0, 335.0, 166.0, 59.0, 23.0, 9.0, 10.0, 6.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.490234375, -3.350555419921875, -3.21087646484375, -3.071197509765625, -2.9315185546875, -2.791839599609375, -2.65216064453125, -2.512481689453125, -2.372802734375, -2.233123779296875, -2.09344482421875, -1.953765869140625, -1.8140869140625, -1.674407958984375, -1.53472900390625, -1.395050048828125, -1.25537109375, -1.115692138671875, -0.97601318359375, -0.836334228515625, -0.6966552734375, -0.556976318359375, -0.41729736328125, -0.277618408203125, -0.137939453125, 0.001739501953125, 0.14141845703125, 0.281097412109375, 0.4207763671875, 0.560455322265625, 0.70013427734375, 0.839813232421875, 0.9794921875, 1.119171142578125, 1.25885009765625, 1.398529052734375, 1.5382080078125, 1.677886962890625, 1.81756591796875, 1.957244873046875, 2.096923828125, 2.236602783203125, 2.37628173828125, 2.515960693359375, 2.6556396484375, 2.795318603515625, 2.93499755859375, 3.074676513671875, 3.21435546875, 3.354034423828125, 3.49371337890625, 3.633392333984375, 3.7730712890625, 3.912750244140625, 4.05242919921875, 4.192108154296875, 4.331787109375, 4.471466064453125, 4.61114501953125, 4.750823974609375, 4.8905029296875, 5.030181884765625, 5.16986083984375, 5.309539794921875, 5.44921875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 1.0, 2.0, 3.0, 7.0, 10.0, 7.0, 10.0, 16.0, 22.0, 29.0, 46.0, 57.0, 96.0, 107.0, 134.0, 122.0, 86.0, 56.0, 55.0, 31.0, 33.0, 26.0, 12.0, 10.0, 5.0, 4.0, 4.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00025534629821777344, -0.00024670735001564026, -0.00023806840181350708, -0.0002294294536113739, -0.00022079050540924072, -0.00021215155720710754, -0.00020351260900497437, -0.0001948736608028412, -0.000186234712600708, -0.00017759576439857483, -0.00016895681619644165, -0.00016031786799430847, -0.0001516789197921753, -0.00014303997159004211, -0.00013440102338790894, -0.00012576207518577576, -0.00011712312698364258, -0.0001084841787815094, -9.984523057937622e-05, -9.120628237724304e-05, -8.256733417510986e-05, -7.392838597297668e-05, -6.52894377708435e-05, -5.665048956871033e-05, -4.801154136657715e-05, -3.937259316444397e-05, -3.073364496231079e-05, -2.2094696760177612e-05, -1.3455748558044434e-05, -4.816800355911255e-06, 3.822147846221924e-06, 1.2461096048355103e-05, 2.110004425048828e-05, 2.973899245262146e-05, 3.837794065475464e-05, 4.701688885688782e-05, 5.5655837059020996e-05, 6.429478526115417e-05, 7.293373346328735e-05, 8.157268166542053e-05, 9.021162986755371e-05, 9.885057806968689e-05, 0.00010748952627182007, 0.00011612847447395325, 0.00012476742267608643, 0.0001334063708782196, 0.00014204531908035278, 0.00015068426728248596, 0.00015932321548461914, 0.00016796216368675232, 0.0001766011118888855, 0.00018524006009101868, 0.00019387900829315186, 0.00020251795649528503, 0.0002111569046974182, 0.0002197958528995514, 0.00022843480110168457, 0.00023707374930381775, 0.00024571269750595093, 0.0002543516457080841, 0.0002629905939102173, 0.00027162954211235046, 0.00028026849031448364, 0.0002889074385166168, 0.00029754638671875]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 4.0, 11.0, 20.0, 18.0, 39.0, 57.0, 76.0, 146.0, 300.0, 755.0, 3476.0, 42081.0, 848365.0, 143814.0, 7275.0, 1237.0, 384.0, 200.0, 115.0, 70.0, 44.0, 26.0, 15.0, 17.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.884765625, -3.7694091796875, -3.654052734375, -3.5386962890625, -3.42333984375, -3.3079833984375, -3.192626953125, -3.0772705078125, -2.9619140625, -2.8465576171875, -2.731201171875, -2.6158447265625, -2.50048828125, -2.3851318359375, -2.269775390625, -2.1544189453125, -2.0390625, -1.9237060546875, -1.808349609375, -1.6929931640625, -1.57763671875, -1.4622802734375, -1.346923828125, -1.2315673828125, -1.1162109375, -1.0008544921875, -0.885498046875, -0.7701416015625, -0.65478515625, -0.5394287109375, -0.424072265625, -0.3087158203125, -0.193359375, -0.0780029296875, 0.037353515625, 0.1527099609375, 0.26806640625, 0.3834228515625, 0.498779296875, 0.6141357421875, 0.7294921875, 0.8448486328125, 0.960205078125, 1.0755615234375, 1.19091796875, 1.3062744140625, 1.421630859375, 1.5369873046875, 1.65234375, 1.7677001953125, 1.883056640625, 1.9984130859375, 2.11376953125, 2.2291259765625, 2.344482421875, 2.4598388671875, 2.5751953125, 2.6905517578125, 2.805908203125, 2.9212646484375, 3.03662109375, 3.1519775390625, 3.267333984375, 3.3826904296875, 3.498046875]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 7.0, 4.0, 5.0, 3.0, 17.0, 16.0, 24.0, 37.0, 78.0, 97.0, 168.0, 151.0, 143.0, 87.0, 61.0, 35.0, 36.0, 16.0, 11.0, 9.0, 5.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1357421875, -1.0433502197265625, -0.950958251953125, -0.8585662841796875, -0.76617431640625, -0.6737823486328125, -0.581390380859375, -0.4889984130859375, -0.3966064453125, -0.3042144775390625, -0.211822509765625, -0.1194305419921875, -0.02703857421875, 0.0653533935546875, 0.157745361328125, 0.2501373291015625, 0.342529296875, 0.4349212646484375, 0.527313232421875, 0.6197052001953125, 0.71209716796875, 0.8044891357421875, 0.896881103515625, 0.9892730712890625, 1.0816650390625, 1.1740570068359375, 1.266448974609375, 1.3588409423828125, 1.45123291015625, 1.5436248779296875, 1.636016845703125, 1.7284088134765625, 1.82080078125, 1.9131927490234375, 2.005584716796875, 2.0979766845703125, 2.19036865234375, 2.2827606201171875, 2.375152587890625, 2.4675445556640625, 2.5599365234375, 2.6523284912109375, 2.744720458984375, 2.8371124267578125, 2.92950439453125, 3.0218963623046875, 3.114288330078125, 3.2066802978515625, 3.299072265625, 3.3914642333984375, 3.483856201171875, 3.5762481689453125, 3.66864013671875, 3.7610321044921875, 3.853424072265625, 3.9458160400390625, 4.0382080078125, 4.1305999755859375, 4.222991943359375, 4.3153839111328125, 4.40777587890625, 4.5001678466796875, 4.592559814453125, 4.6849517822265625, 4.77734375]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 10.0, 58.0, 173.0, 380.0, 281.0, 86.0, 16.0, 8.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.46617126464844, -31.035585403442383, -28.604999542236328, -26.17441177368164, -23.743825912475586, -21.31324005126953, -18.882652282714844, -16.45206642150879, -14.021480560302734, -11.59089469909668, -9.160307884216309, -6.729721546173096, -4.299135208129883, -1.8685493469238281, 0.562037467956543, 2.992624282836914, 5.423210144042969, 7.853796482086182, 10.284382820129395, 12.714969635009766, 15.14555549621582, 17.576141357421875, 20.006729125976562, 22.437314987182617, 24.867900848388672, 27.298486709594727, 29.72907257080078, 32.15966033935547, 34.590248107910156, 37.02083206176758, 39.451419830322266, 41.88200378417969, 44.312591552734375, 46.74317932128906, 49.173763275146484, 51.60435104370117, 54.034934997558594, 56.46552276611328, 58.89611053466797, 61.326698303222656, 63.75728225708008, 66.1878662109375, 68.61845397949219, 71.04904174804688, 73.47962951660156, 75.91021728515625, 78.3407974243164, 80.7713851928711, 83.20197296142578, 85.63256072998047, 88.06314849853516, 90.49372863769531, 92.92431640625, 95.35490417480469, 97.78549194335938, 100.21607971191406, 102.64666748046875, 105.07725524902344, 107.50784301757812, 109.93842315673828, 112.36901092529297, 114.79959869384766, 117.23018646240234, 119.66077423095703, 122.09135437011719]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 3.0, 5.0, 8.0, 13.0, 6.0, 12.0, 15.0, 11.0, 13.0, 18.0, 20.0, 18.0, 28.0, 19.0, 28.0, 27.0, 25.0, 34.0, 29.0, 34.0, 43.0, 43.0, 28.0, 44.0, 46.0, 32.0, 49.0, 44.0, 34.0, 35.0, 29.0, 26.0, 19.0, 23.0, 15.0, 20.0, 13.0, 10.0, 13.0, 10.0, 11.0, 10.0, 5.0, 6.0, 7.0, 6.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 0.0, 0.0, 2.0], "bins": [-20.601903915405273, -19.95659828186035, -19.311290740966797, -18.665985107421875, -18.020679473876953, -17.3753719329834, -16.730066299438477, -16.084758758544922, -15.439453125, -14.794146537780762, -14.148839950561523, -13.503534317016602, -12.858227729797363, -12.212921142578125, -11.567615509033203, -10.922308921813965, -10.277002334594727, -9.631695747375488, -8.98638916015625, -8.341083526611328, -7.69577693939209, -7.050470352172852, -6.4051642417907715, -5.759858131408691, -5.114551544189453, -4.469244956970215, -3.8239388465881348, -3.1786324977874756, -2.5333261489868164, -1.8880198001861572, -1.242713451385498, -0.597407341003418, 0.0478973388671875, 0.6932036876678467, 1.3385100364685059, 1.983816385269165, 2.629122734069824, 3.2744290828704834, 3.9197354316711426, 4.565041542053223, 5.210348129272461, 5.855654716491699, 6.500960826873779, 7.146266937255859, 7.791573524475098, 8.436880111694336, 9.082185745239258, 9.727492332458496, 10.372798919677734, 11.018105506896973, 11.663412094116211, 12.308717727661133, 12.954024314880371, 13.59933090209961, 14.244636535644531, 14.88994312286377, 15.535249710083008, 16.18055534362793, 16.825862884521484, 17.471168518066406, 18.116474151611328, 18.761781692504883, 19.407087326049805, 20.05239486694336, 20.69770050048828]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 0.0, 2.0, 5.0, 2.0, 2.0, 4.0, 2.0, 6.0, 6.0, 10.0, 11.0, 10.0, 15.0, 17.0, 27.0, 31.0, 43.0, 69.0, 78.0, 123.0, 210.0, 359.0, 669.0, 1446.0, 3103.0, 7406.0, 21046.0, 92568.0, 3570029.0, 426619.0, 46893.0, 13451.0, 5309.0, 2272.0, 1121.0, 548.0, 330.0, 151.0, 106.0, 61.0, 46.0, 23.0, 24.0, 12.0, 11.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.734375, -5.569580078125, -5.40478515625, -5.239990234375, -5.0751953125, -4.910400390625, -4.74560546875, -4.580810546875, -4.416015625, -4.251220703125, -4.08642578125, -3.921630859375, -3.7568359375, -3.592041015625, -3.42724609375, -3.262451171875, -3.09765625, -2.932861328125, -2.76806640625, -2.603271484375, -2.4384765625, -2.273681640625, -2.10888671875, -1.944091796875, -1.779296875, -1.614501953125, -1.44970703125, -1.284912109375, -1.1201171875, -0.955322265625, -0.79052734375, -0.625732421875, -0.4609375, -0.296142578125, -0.13134765625, 0.033447265625, 0.1982421875, 0.363037109375, 0.52783203125, 0.692626953125, 0.857421875, 1.022216796875, 1.18701171875, 1.351806640625, 1.5166015625, 1.681396484375, 1.84619140625, 2.010986328125, 2.17578125, 2.340576171875, 2.50537109375, 2.670166015625, 2.8349609375, 2.999755859375, 3.16455078125, 3.329345703125, 3.494140625, 3.658935546875, 3.82373046875, 3.988525390625, 4.1533203125, 4.318115234375, 4.48291015625, 4.647705078125, 4.8125]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 5.0, 11.0, 7.0, 14.0, 17.0, 28.0, 28.0, 28.0, 40.0, 47.0, 50.0, 59.0, 74.0, 64.0, 58.0, 83.0, 69.0, 49.0, 66.0, 39.0, 41.0, 29.0, 20.0, 24.0, 7.0, 21.0, 8.0, 2.0, 5.0, 5.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8681640625, -1.8194732666015625, -1.770782470703125, -1.7220916748046875, -1.67340087890625, -1.6247100830078125, -1.576019287109375, -1.5273284912109375, -1.4786376953125, -1.4299468994140625, -1.381256103515625, -1.3325653076171875, -1.28387451171875, -1.2351837158203125, -1.186492919921875, -1.1378021240234375, -1.089111328125, -1.0404205322265625, -0.991729736328125, -0.9430389404296875, -0.89434814453125, -0.8456573486328125, -0.796966552734375, -0.7482757568359375, -0.6995849609375, -0.6508941650390625, -0.602203369140625, -0.5535125732421875, -0.50482177734375, -0.4561309814453125, -0.407440185546875, -0.3587493896484375, -0.31005859375, -0.2613677978515625, -0.212677001953125, -0.1639862060546875, -0.11529541015625, -0.0666046142578125, -0.017913818359375, 0.0307769775390625, 0.0794677734375, 0.1281585693359375, 0.176849365234375, 0.2255401611328125, 0.27423095703125, 0.3229217529296875, 0.371612548828125, 0.4203033447265625, 0.468994140625, 0.5176849365234375, 0.566375732421875, 0.6150665283203125, 0.66375732421875, 0.7124481201171875, 0.761138916015625, 0.8098297119140625, 0.8585205078125, 0.9072113037109375, 0.955902099609375, 1.0045928955078125, 1.05328369140625, 1.1019744873046875, 1.150665283203125, 1.1993560791015625, 1.248046875]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 4.0, 3.0, 5.0, 6.0, 6.0, 13.0, 9.0, 28.0, 29.0, 45.0, 73.0, 112.0, 219.0, 399.0, 838.0, 1612.0, 3819.0, 9868.0, 30825.0, 147577.0, 3527160.0, 393714.0, 53047.0, 14817.0, 5495.0, 2304.0, 1066.0, 510.0, 297.0, 165.0, 75.0, 44.0, 27.0, 33.0, 18.0, 11.0, 11.0, 5.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.9296875, -4.791595458984375, -4.65350341796875, -4.515411376953125, -4.3773193359375, -4.239227294921875, -4.10113525390625, -3.963043212890625, -3.824951171875, -3.686859130859375, -3.54876708984375, -3.410675048828125, -3.2725830078125, -3.134490966796875, -2.99639892578125, -2.858306884765625, -2.72021484375, -2.582122802734375, -2.44403076171875, -2.305938720703125, -2.1678466796875, -2.029754638671875, -1.89166259765625, -1.753570556640625, -1.615478515625, -1.477386474609375, -1.33929443359375, -1.201202392578125, -1.0631103515625, -0.925018310546875, -0.78692626953125, -0.648834228515625, -0.5107421875, -0.372650146484375, -0.23455810546875, -0.096466064453125, 0.0416259765625, 0.179718017578125, 0.31781005859375, 0.455902099609375, 0.593994140625, 0.732086181640625, 0.87017822265625, 1.008270263671875, 1.1463623046875, 1.284454345703125, 1.42254638671875, 1.560638427734375, 1.69873046875, 1.836822509765625, 1.97491455078125, 2.113006591796875, 2.2510986328125, 2.389190673828125, 2.52728271484375, 2.665374755859375, 2.803466796875, 2.941558837890625, 3.07965087890625, 3.217742919921875, 3.3558349609375, 3.493927001953125, 3.63201904296875, 3.770111083984375, 3.908203125]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 3.0, 0.0, 4.0, 9.0, 10.0, 9.0, 11.0, 15.0, 16.0, 25.0, 45.0, 39.0, 62.0, 139.0, 275.0, 1252.0, 1426.0, 298.0, 151.0, 73.0, 50.0, 32.0, 29.0, 24.0, 19.0, 7.0, 12.0, 6.0, 5.0, 4.0, 5.0, 4.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 2.0, 2.0], "bins": [-2.630859375, -2.56512451171875, -2.4993896484375, -2.43365478515625, -2.367919921875, -2.30218505859375, -2.2364501953125, -2.17071533203125, -2.10498046875, -2.03924560546875, -1.9735107421875, -1.90777587890625, -1.842041015625, -1.77630615234375, -1.7105712890625, -1.64483642578125, -1.5791015625, -1.51336669921875, -1.4476318359375, -1.38189697265625, -1.316162109375, -1.25042724609375, -1.1846923828125, -1.11895751953125, -1.05322265625, -0.98748779296875, -0.9217529296875, -0.85601806640625, -0.790283203125, -0.72454833984375, -0.6588134765625, -0.59307861328125, -0.52734375, -0.46160888671875, -0.3958740234375, -0.33013916015625, -0.264404296875, -0.19866943359375, -0.1329345703125, -0.06719970703125, -0.00146484375, 0.06427001953125, 0.1300048828125, 0.19573974609375, 0.261474609375, 0.32720947265625, 0.3929443359375, 0.45867919921875, 0.5244140625, 0.59014892578125, 0.6558837890625, 0.72161865234375, 0.787353515625, 0.85308837890625, 0.9188232421875, 0.98455810546875, 1.05029296875, 1.11602783203125, 1.1817626953125, 1.24749755859375, 1.313232421875, 1.37896728515625, 1.4447021484375, 1.51043701171875, 1.576171875]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 9.0, 25.0, 144.0, 410.0, 295.0, 86.0, 24.0, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.44594955444336, -27.330509185791016, -26.215070724487305, -25.099632263183594, -23.98419189453125, -22.868751525878906, -21.753313064575195, -20.637874603271484, -19.52243423461914, -18.406993865966797, -17.291555404663086, -16.176116943359375, -15.060676574707031, -13.945237159729004, -12.829797744750977, -11.71435832977295, -10.598918914794922, -9.483479499816895, -8.368040084838867, -7.25260066986084, -6.1371612548828125, -5.021721839904785, -3.906282424926758, -2.7908430099487305, -1.6754035949707031, -0.5599641799926758, 0.5554752349853516, 1.670914649963379, 2.7863540649414062, 3.9017934799194336, 5.017232894897461, 6.132672309875488, 7.24810791015625, 8.363547325134277, 9.478986740112305, 10.594426155090332, 11.70986557006836, 12.825304985046387, 13.940744400024414, 15.056183815002441, 16.17162322998047, 17.287063598632812, 18.402502059936523, 19.517940521240234, 20.633380889892578, 21.748821258544922, 22.864259719848633, 23.979698181152344, 25.095138549804688, 26.21057891845703, 27.326017379760742, 28.441455841064453, 29.556896209716797, 30.67233657836914, 31.78777503967285, 32.90321350097656, 34.018653869628906, 35.13409423828125, 36.249534606933594, 37.36497116088867, 38.480411529541016, 39.59585189819336, 40.71128845214844, 41.82672882080078, 42.942169189453125]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 6.0, 8.0, 12.0, 14.0, 12.0, 11.0, 16.0, 31.0, 43.0, 37.0, 40.0, 43.0, 51.0, 77.0, 53.0, 66.0, 48.0, 66.0, 47.0, 45.0, 41.0, 36.0, 35.0, 33.0, 28.0, 28.0, 16.0, 14.0, 13.0, 11.0, 6.0, 6.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0], "bins": [-11.73771858215332, -11.449079513549805, -11.160440444946289, -10.871801376342773, -10.583162307739258, -10.294523239135742, -10.005884170532227, -9.717246055603027, -9.428606986999512, -9.139967918395996, -8.85132884979248, -8.562689781188965, -8.27405071258545, -7.985412120819092, -7.696773052215576, -7.408134460449219, -7.119494915008545, -6.830855846405029, -6.542216777801514, -6.253578186035156, -5.964939117431641, -5.676300048828125, -5.387660980224609, -5.099021911621094, -4.810382843017578, -4.5217437744140625, -4.233104705810547, -3.9444658756256104, -3.655827045440674, -3.367187976837158, -3.0785489082336426, -2.789910078048706, -2.5012712478637695, -2.212632179260254, -1.9239933490753174, -1.6353542804718018, -1.3467153310775757, -1.0580763816833496, -0.769437313079834, -0.48079848289489746, -0.19215941429138184, 0.09647956490516663, 0.3851185441017151, 0.6737575531005859, 0.962396502494812, 1.251035451889038, 1.5396745204925537, 1.8283133506774902, 2.116952419281006, 2.4055914878845215, 2.694230318069458, 2.9828693866729736, 3.27150821685791, 3.560147285461426, 3.8487863540649414, 4.137425422668457, 4.426064491271973, 4.714703559875488, 5.003342628479004, 5.2919816970825195, 5.580620288848877, 5.869259357452393, 6.157898426055908, 6.446537017822266, 6.735176086425781]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 10.0, 7.0, 8.0, 14.0, 15.0, 17.0, 37.0, 56.0, 97.0, 180.0, 217.0, 416.0, 927.0, 2127.0, 5582.0, 19696.0, 87092.0, 396062.0, 412371.0, 92575.0, 20870.0, 5937.0, 2192.0, 937.0, 457.0, 253.0, 149.0, 71.0, 63.0, 32.0, 25.0, 21.0, 10.0, 9.0, 7.0, 4.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.8984375, -5.73468017578125, -5.5709228515625, -5.40716552734375, -5.243408203125, -5.07965087890625, -4.9158935546875, -4.75213623046875, -4.58837890625, -4.42462158203125, -4.2608642578125, -4.09710693359375, -3.933349609375, -3.76959228515625, -3.6058349609375, -3.44207763671875, -3.2783203125, -3.11456298828125, -2.9508056640625, -2.78704833984375, -2.623291015625, -2.45953369140625, -2.2957763671875, -2.13201904296875, -1.96826171875, -1.80450439453125, -1.6407470703125, -1.47698974609375, -1.313232421875, -1.14947509765625, -0.9857177734375, -0.82196044921875, -0.658203125, -0.49444580078125, -0.3306884765625, -0.16693115234375, -0.003173828125, 0.16058349609375, 0.3243408203125, 0.48809814453125, 0.65185546875, 0.81561279296875, 0.9793701171875, 1.14312744140625, 1.306884765625, 1.47064208984375, 1.6343994140625, 1.79815673828125, 1.9619140625, 2.12567138671875, 2.2894287109375, 2.45318603515625, 2.616943359375, 2.78070068359375, 2.9444580078125, 3.10821533203125, 3.27197265625, 3.43572998046875, 3.5994873046875, 3.76324462890625, 3.927001953125, 4.09075927734375, 4.2545166015625, 4.41827392578125, 4.58203125]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 7.0, 5.0, 6.0, 11.0, 19.0, 18.0, 22.0, 23.0, 33.0, 57.0, 59.0, 57.0, 68.0, 74.0, 70.0, 81.0, 71.0, 53.0, 58.0, 46.0, 39.0, 26.0, 32.0, 25.0, 17.0, 12.0, 6.0, 8.0, 4.0, 4.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.048828125, -1.99578857421875, -1.9427490234375, -1.88970947265625, -1.836669921875, -1.78363037109375, -1.7305908203125, -1.67755126953125, -1.62451171875, -1.57147216796875, -1.5184326171875, -1.46539306640625, -1.412353515625, -1.35931396484375, -1.3062744140625, -1.25323486328125, -1.2001953125, -1.14715576171875, -1.0941162109375, -1.04107666015625, -0.988037109375, -0.93499755859375, -0.8819580078125, -0.82891845703125, -0.77587890625, -0.72283935546875, -0.6697998046875, -0.61676025390625, -0.563720703125, -0.51068115234375, -0.4576416015625, -0.40460205078125, -0.3515625, -0.29852294921875, -0.2454833984375, -0.19244384765625, -0.139404296875, -0.08636474609375, -0.0333251953125, 0.01971435546875, 0.07275390625, 0.12579345703125, 0.1788330078125, 0.23187255859375, 0.284912109375, 0.33795166015625, 0.3909912109375, 0.44403076171875, 0.4970703125, 0.55010986328125, 0.6031494140625, 0.65618896484375, 0.709228515625, 0.76226806640625, 0.8153076171875, 0.86834716796875, 0.92138671875, 0.97442626953125, 1.0274658203125, 1.08050537109375, 1.133544921875, 1.18658447265625, 1.2396240234375, 1.29266357421875, 1.345703125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 3.0, 8.0, 1.0, 3.0, 8.0, 11.0, 8.0, 13.0, 16.0, 17.0, 41.0, 51.0, 68.0, 116.0, 205.0, 406.0, 742.0, 1670.0, 4147.0, 11935.0, 43059.0, 197930.0, 535032.0, 192162.0, 41792.0, 11754.0, 4077.0, 1628.0, 687.0, 372.0, 201.0, 122.0, 85.0, 49.0, 41.0, 19.0, 17.0, 14.0, 13.0, 7.0, 4.0, 6.0, 7.0, 4.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.18359375, -4.04638671875, -3.9091796875, -3.77197265625, -3.634765625, -3.49755859375, -3.3603515625, -3.22314453125, -3.0859375, -2.94873046875, -2.8115234375, -2.67431640625, -2.537109375, -2.39990234375, -2.2626953125, -2.12548828125, -1.98828125, -1.85107421875, -1.7138671875, -1.57666015625, -1.439453125, -1.30224609375, -1.1650390625, -1.02783203125, -0.890625, -0.75341796875, -0.6162109375, -0.47900390625, -0.341796875, -0.20458984375, -0.0673828125, 0.06982421875, 0.20703125, 0.34423828125, 0.4814453125, 0.61865234375, 0.755859375, 0.89306640625, 1.0302734375, 1.16748046875, 1.3046875, 1.44189453125, 1.5791015625, 1.71630859375, 1.853515625, 1.99072265625, 2.1279296875, 2.26513671875, 2.40234375, 2.53955078125, 2.6767578125, 2.81396484375, 2.951171875, 3.08837890625, 3.2255859375, 3.36279296875, 3.5, 3.63720703125, 3.7744140625, 3.91162109375, 4.048828125, 4.18603515625, 4.3232421875, 4.46044921875, 4.59765625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 5.0, 1.0, 3.0, 8.0, 6.0, 5.0, 11.0, 15.0, 18.0, 18.0, 17.0, 21.0, 24.0, 35.0, 28.0, 41.0, 50.0, 30.0, 50.0, 56.0, 53.0, 57.0, 55.0, 51.0, 52.0, 50.0, 43.0, 33.0, 24.0, 32.0, 24.0, 22.0, 12.0, 14.0, 9.0, 9.0, 7.0, 0.0, 7.0, 2.0, 6.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.3359375, -7.12139892578125, -6.9068603515625, -6.69232177734375, -6.477783203125, -6.26324462890625, -6.0487060546875, -5.83416748046875, -5.61962890625, -5.40509033203125, -5.1905517578125, -4.97601318359375, -4.761474609375, -4.54693603515625, -4.3323974609375, -4.11785888671875, -3.9033203125, -3.68878173828125, -3.4742431640625, -3.25970458984375, -3.045166015625, -2.83062744140625, -2.6160888671875, -2.40155029296875, -2.18701171875, -1.97247314453125, -1.7579345703125, -1.54339599609375, -1.328857421875, -1.11431884765625, -0.8997802734375, -0.68524169921875, -0.470703125, -0.25616455078125, -0.0416259765625, 0.17291259765625, 0.387451171875, 0.60198974609375, 0.8165283203125, 1.03106689453125, 1.24560546875, 1.46014404296875, 1.6746826171875, 1.88922119140625, 2.103759765625, 2.31829833984375, 2.5328369140625, 2.74737548828125, 2.9619140625, 3.17645263671875, 3.3909912109375, 3.60552978515625, 3.820068359375, 4.03460693359375, 4.2491455078125, 4.46368408203125, 4.67822265625, 4.89276123046875, 5.1072998046875, 5.32183837890625, 5.536376953125, 5.75091552734375, 5.9654541015625, 6.17999267578125, 6.39453125]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 4.0, 1.0, 3.0, 9.0, 15.0, 21.0, 24.0, 26.0, 42.0, 64.0, 98.0, 187.0, 268.0, 523.0, 1167.0, 3032.0, 9397.0, 42616.0, 351901.0, 549174.0, 69702.0, 13322.0, 3989.0, 1463.0, 665.0, 322.0, 195.0, 99.0, 75.0, 44.0, 30.0, 25.0, 15.0, 7.0, 12.0, 6.0, 4.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.265625, -2.1904296875, -2.115234375, -2.0400390625, -1.96484375, -1.8896484375, -1.814453125, -1.7392578125, -1.6640625, -1.5888671875, -1.513671875, -1.4384765625, -1.36328125, -1.2880859375, -1.212890625, -1.1376953125, -1.0625, -0.9873046875, -0.912109375, -0.8369140625, -0.76171875, -0.6865234375, -0.611328125, -0.5361328125, -0.4609375, -0.3857421875, -0.310546875, -0.2353515625, -0.16015625, -0.0849609375, -0.009765625, 0.0654296875, 0.140625, 0.2158203125, 0.291015625, 0.3662109375, 0.44140625, 0.5166015625, 0.591796875, 0.6669921875, 0.7421875, 0.8173828125, 0.892578125, 0.9677734375, 1.04296875, 1.1181640625, 1.193359375, 1.2685546875, 1.34375, 1.4189453125, 1.494140625, 1.5693359375, 1.64453125, 1.7197265625, 1.794921875, 1.8701171875, 1.9453125, 2.0205078125, 2.095703125, 2.1708984375, 2.24609375, 2.3212890625, 2.396484375, 2.4716796875, 2.546875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 1.0, 12.0, 5.0, 18.0, 25.0, 40.0, 51.0, 51.0, 106.0, 124.0, 117.0, 111.0, 88.0, 74.0, 58.0, 40.0, 29.0, 9.0, 13.0, 5.0, 5.0, 6.0, 3.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003104209899902344, -0.0002987682819366455, -0.00028711557388305664, -0.0002754628658294678, -0.0002638101577758789, -0.00025215744972229004, -0.00024050474166870117, -0.0002288520336151123, -0.00021719932556152344, -0.00020554661750793457, -0.0001938939094543457, -0.00018224120140075684, -0.00017058849334716797, -0.0001589357852935791, -0.00014728307723999023, -0.00013563036918640137, -0.0001239776611328125, -0.00011232495307922363, -0.00010067224502563477, -8.90195369720459e-05, -7.736682891845703e-05, -6.571412086486816e-05, -5.40614128112793e-05, -4.240870475769043e-05, -3.075599670410156e-05, -1.9103288650512695e-05, -7.450580596923828e-06, 4.202127456665039e-06, 1.5854835510253906e-05, 2.7507543563842773e-05, 3.916025161743164e-05, 5.081295967102051e-05, 6.246566772460938e-05, 7.411837577819824e-05, 8.577108383178711e-05, 9.742379188537598e-05, 0.00010907649993896484, 0.00012072920799255371, 0.00013238191604614258, 0.00014403462409973145, 0.0001556873321533203, 0.00016734004020690918, 0.00017899274826049805, 0.00019064545631408691, 0.00020229816436767578, 0.00021395087242126465, 0.00022560358047485352, 0.00023725628852844238, 0.00024890899658203125, 0.0002605617046356201, 0.000272214412689209, 0.00028386712074279785, 0.0002955198287963867, 0.0003071725368499756, 0.00031882524490356445, 0.0003304779529571533, 0.0003421306610107422, 0.00035378336906433105, 0.0003654360771179199, 0.0003770887851715088, 0.00038874149322509766, 0.0004003942012786865, 0.0004120469093322754, 0.00042369961738586426, 0.0004353523254394531]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 6.0, 5.0, 6.0, 9.0, 6.0, 28.0, 20.0, 59.0, 72.0, 72.0, 148.0, 220.0, 378.0, 653.0, 1270.0, 2626.0, 6430.0, 19761.0, 81436.0, 353747.0, 433874.0, 108305.0, 25332.0, 7790.0, 3013.0, 1451.0, 738.0, 389.0, 236.0, 143.0, 99.0, 62.0, 43.0, 38.0, 20.0, 21.0, 13.0, 6.0, 5.0, 7.0, 6.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-1.671875, -1.619781494140625, -1.56768798828125, -1.515594482421875, -1.4635009765625, -1.411407470703125, -1.35931396484375, -1.307220458984375, -1.255126953125, -1.203033447265625, -1.15093994140625, -1.098846435546875, -1.0467529296875, -0.994659423828125, -0.94256591796875, -0.890472412109375, -0.83837890625, -0.786285400390625, -0.73419189453125, -0.682098388671875, -0.6300048828125, -0.577911376953125, -0.52581787109375, -0.473724365234375, -0.421630859375, -0.369537353515625, -0.31744384765625, -0.265350341796875, -0.2132568359375, -0.161163330078125, -0.10906982421875, -0.056976318359375, -0.0048828125, 0.047210693359375, 0.09930419921875, 0.151397705078125, 0.2034912109375, 0.255584716796875, 0.30767822265625, 0.359771728515625, 0.411865234375, 0.463958740234375, 0.51605224609375, 0.568145751953125, 0.6202392578125, 0.672332763671875, 0.72442626953125, 0.776519775390625, 0.82861328125, 0.880706787109375, 0.93280029296875, 0.984893798828125, 1.0369873046875, 1.089080810546875, 1.14117431640625, 1.193267822265625, 1.245361328125, 1.297454833984375, 1.34954833984375, 1.401641845703125, 1.4537353515625, 1.505828857421875, 1.55792236328125, 1.610015869140625, 1.662109375]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 0.0, 9.0, 5.0, 14.0, 18.0, 31.0, 34.0, 52.0, 62.0, 84.0, 92.0, 128.0, 110.0, 111.0, 68.0, 55.0, 41.0, 28.0, 11.0, 10.0, 7.0, 8.0, 5.0, 5.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4375, -2.352203369140625, -2.26690673828125, -2.181610107421875, -2.0963134765625, -2.011016845703125, -1.92572021484375, -1.840423583984375, -1.755126953125, -1.669830322265625, -1.58453369140625, -1.499237060546875, -1.4139404296875, -1.328643798828125, -1.24334716796875, -1.158050537109375, -1.07275390625, -0.987457275390625, -0.90216064453125, -0.816864013671875, -0.7315673828125, -0.646270751953125, -0.56097412109375, -0.475677490234375, -0.390380859375, -0.305084228515625, -0.21978759765625, -0.134490966796875, -0.0491943359375, 0.036102294921875, 0.12139892578125, 0.206695556640625, 0.2919921875, 0.377288818359375, 0.46258544921875, 0.547882080078125, 0.6331787109375, 0.718475341796875, 0.80377197265625, 0.889068603515625, 0.974365234375, 1.059661865234375, 1.14495849609375, 1.230255126953125, 1.3155517578125, 1.400848388671875, 1.48614501953125, 1.571441650390625, 1.65673828125, 1.742034912109375, 1.82733154296875, 1.912628173828125, 1.9979248046875, 2.083221435546875, 2.16851806640625, 2.253814697265625, 2.339111328125, 2.424407958984375, 2.50970458984375, 2.595001220703125, 2.6802978515625, 2.765594482421875, 2.85089111328125, 2.936187744140625, 3.021484375]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 14.0, 28.0, 151.0, 348.0, 330.0, 103.0, 29.0, 5.0, 3.0, 2.0, 1.0, 1.0], "bins": [-182.0247039794922, -178.7698516845703, -175.51498413085938, -172.2601318359375, -169.00527954101562, -165.7504119873047, -162.4955596923828, -159.24070739746094, -155.98583984375, -152.73098754882812, -149.4761199951172, -146.2212677001953, -142.96641540527344, -139.7115478515625, -136.45669555664062, -133.20184326171875, -129.94699096679688, -126.69213104248047, -123.4372787475586, -120.18241882324219, -116.92755889892578, -113.6727066040039, -110.4178466796875, -107.16299438476562, -103.90812683105469, -100.65326690673828, -97.3984146118164, -94.1435546875, -90.8886947631836, -87.63384246826172, -84.37898254394531, -81.12413024902344, -77.8692626953125, -74.6144027709961, -71.35955047607422, -68.10469055175781, -64.8498306274414, -61.59497833251953, -58.340118408203125, -55.085262298583984, -51.830406188964844, -48.5755500793457, -45.3206901550293, -42.065834045410156, -38.810977935791016, -35.556121826171875, -32.30126190185547, -29.046405792236328, -25.791547775268555, -22.53668975830078, -19.28183364868164, -16.026975631713867, -12.77211856842041, -9.517261505126953, -6.26240348815918, -3.007547378540039, 0.24731063842773438, 3.5021679401397705, 6.757025241851807, 10.011882781982422, 13.266739845275879, 16.521596908569336, 19.77645492553711, 23.03131103515625, 26.286169052124023]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 0.0, 2.0, 6.0, 3.0, 2.0, 6.0, 13.0, 8.0, 15.0, 18.0, 25.0, 21.0, 38.0, 44.0, 40.0, 45.0, 53.0, 51.0, 52.0, 50.0, 59.0, 58.0, 45.0, 52.0, 49.0, 45.0, 34.0, 33.0, 20.0, 25.0, 15.0, 12.0, 13.0, 12.0, 9.0, 6.0, 9.0, 8.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.51097106933594, -39.196617126464844, -37.88226318359375, -36.56790542602539, -35.2535514831543, -33.9391975402832, -32.62484359741211, -31.310489654541016, -29.99613380432129, -28.681779861450195, -27.36742401123047, -26.053070068359375, -24.73871612548828, -23.424360275268555, -22.11000633239746, -20.795650482177734, -19.48129653930664, -18.166942596435547, -16.85258674621582, -15.538232803344727, -14.223877906799316, -12.909523010253906, -11.595169067382812, -10.280814170837402, -8.966459274291992, -7.652104377746582, -6.33774995803833, -5.023395538330078, -3.709040641784668, -2.394685745239258, -1.0803313255310059, 0.2340230941772461, 1.5483779907226562, 2.8627326488494873, 4.177087306976318, 5.49144172668457, 6.8057966232299805, 8.12015151977539, 9.434505462646484, 10.748860359191895, 12.063215255737305, 13.377570152282715, 14.691925048828125, 16.00627899169922, 17.320632934570312, 18.63498878479004, 19.949342727661133, 21.26369857788086, 22.578052520751953, 23.892406463623047, 25.206762313842773, 26.521116256713867, 27.835472106933594, 29.149826049804688, 30.46417999267578, 31.778533935546875, 33.09288787841797, 34.40724182128906, 35.721595764160156, 37.035953521728516, 38.35030746459961, 39.6646614074707, 40.9790153503418, 42.29336929321289, 43.60772705078125]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 2.0, 6.0, 6.0, 3.0, 4.0, 12.0, 23.0, 20.0, 28.0, 55.0, 78.0, 174.0, 273.0, 587.0, 1568.0, 4684.0, 21416.0, 4036986.0, 112457.0, 10980.0, 2911.0, 1039.0, 474.0, 212.0, 111.0, 66.0, 41.0, 21.0, 9.0, 13.0, 5.0, 6.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.734375, -11.4239501953125, -11.113525390625, -10.8031005859375, -10.49267578125, -10.1822509765625, -9.871826171875, -9.5614013671875, -9.2509765625, -8.9405517578125, -8.630126953125, -8.3197021484375, -8.00927734375, -7.6988525390625, -7.388427734375, -7.0780029296875, -6.767578125, -6.4571533203125, -6.146728515625, -5.8363037109375, -5.52587890625, -5.2154541015625, -4.905029296875, -4.5946044921875, -4.2841796875, -3.9737548828125, -3.663330078125, -3.3529052734375, -3.04248046875, -2.7320556640625, -2.421630859375, -2.1112060546875, -1.80078125, -1.4903564453125, -1.179931640625, -0.8695068359375, -0.55908203125, -0.2486572265625, 0.061767578125, 0.3721923828125, 0.6826171875, 0.9930419921875, 1.303466796875, 1.6138916015625, 1.92431640625, 2.2347412109375, 2.545166015625, 2.8555908203125, 3.166015625, 3.4764404296875, 3.786865234375, 4.0972900390625, 4.40771484375, 4.7181396484375, 5.028564453125, 5.3389892578125, 5.6494140625, 5.9598388671875, 6.270263671875, 6.5806884765625, 6.89111328125, 7.2015380859375, 7.511962890625, 7.8223876953125, 8.1328125]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 7.0, 7.0, 8.0, 8.0, 17.0, 20.0, 15.0, 26.0, 27.0, 39.0, 31.0, 55.0, 55.0, 55.0, 51.0, 41.0, 54.0, 57.0, 33.0, 42.0, 57.0, 48.0, 47.0, 34.0, 33.0, 19.0, 26.0, 19.0, 24.0, 4.0, 8.0, 7.0, 5.0, 3.0, 3.0, 3.0, 1.0, 4.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.021484375, -1.9661102294921875, -1.910736083984375, -1.8553619384765625, -1.79998779296875, -1.7446136474609375, -1.689239501953125, -1.6338653564453125, -1.5784912109375, -1.5231170654296875, -1.467742919921875, -1.4123687744140625, -1.35699462890625, -1.3016204833984375, -1.246246337890625, -1.1908721923828125, -1.135498046875, -1.0801239013671875, -1.024749755859375, -0.9693756103515625, -0.91400146484375, -0.8586273193359375, -0.803253173828125, -0.7478790283203125, -0.6925048828125, -0.6371307373046875, -0.581756591796875, -0.5263824462890625, -0.47100830078125, -0.4156341552734375, -0.360260009765625, -0.3048858642578125, -0.24951171875, -0.1941375732421875, -0.138763427734375, -0.0833892822265625, -0.02801513671875, 0.0273590087890625, 0.082733154296875, 0.1381072998046875, 0.1934814453125, 0.2488555908203125, 0.304229736328125, 0.3596038818359375, 0.41497802734375, 0.4703521728515625, 0.525726318359375, 0.5811004638671875, 0.636474609375, 0.6918487548828125, 0.747222900390625, 0.8025970458984375, 0.85797119140625, 0.9133453369140625, 0.968719482421875, 1.0240936279296875, 1.0794677734375, 1.1348419189453125, 1.190216064453125, 1.2455902099609375, 1.30096435546875, 1.3563385009765625, 1.411712646484375, 1.4670867919921875, 1.5224609375]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 5.0, 5.0, 5.0, 2.0, 8.0, 20.0, 29.0, 36.0, 51.0, 74.0, 107.0, 179.0, 251.0, 403.0, 590.0, 980.0, 1685.0, 3300.0, 7583.0, 21100.0, 107061.0, 3931740.0, 86680.0, 18406.0, 6771.0, 2916.0, 1579.0, 948.0, 565.0, 389.0, 260.0, 156.0, 113.0, 79.0, 68.0, 40.0, 24.0, 26.0, 14.0, 8.0, 6.0, 10.0, 7.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.96875, -4.80560302734375, -4.6424560546875, -4.47930908203125, -4.316162109375, -4.15301513671875, -3.9898681640625, -3.82672119140625, -3.66357421875, -3.50042724609375, -3.3372802734375, -3.17413330078125, -3.010986328125, -2.84783935546875, -2.6846923828125, -2.52154541015625, -2.3583984375, -2.19525146484375, -2.0321044921875, -1.86895751953125, -1.705810546875, -1.54266357421875, -1.3795166015625, -1.21636962890625, -1.05322265625, -0.89007568359375, -0.7269287109375, -0.56378173828125, -0.400634765625, -0.23748779296875, -0.0743408203125, 0.08880615234375, 0.251953125, 0.41510009765625, 0.5782470703125, 0.74139404296875, 0.904541015625, 1.06768798828125, 1.2308349609375, 1.39398193359375, 1.55712890625, 1.72027587890625, 1.8834228515625, 2.04656982421875, 2.209716796875, 2.37286376953125, 2.5360107421875, 2.69915771484375, 2.8623046875, 3.02545166015625, 3.1885986328125, 3.35174560546875, 3.514892578125, 3.67803955078125, 3.8411865234375, 4.00433349609375, 4.16748046875, 4.33062744140625, 4.4937744140625, 4.65692138671875, 4.820068359375, 4.98321533203125, 5.1463623046875, 5.30950927734375, 5.47265625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 13.0, 13.0, 20.0, 27.0, 46.0, 104.0, 617.0, 3016.0, 100.0, 50.0, 22.0, 11.0, 7.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2236328125, -1.1728057861328125, -1.121978759765625, -1.0711517333984375, -1.02032470703125, -0.9694976806640625, -0.918670654296875, -0.8678436279296875, -0.8170166015625, -0.7661895751953125, -0.715362548828125, -0.6645355224609375, -0.61370849609375, -0.5628814697265625, -0.512054443359375, -0.4612274169921875, -0.410400390625, -0.3595733642578125, -0.308746337890625, -0.2579193115234375, -0.20709228515625, -0.1562652587890625, -0.105438232421875, -0.0546112060546875, -0.0037841796875, 0.0470428466796875, 0.097869873046875, 0.1486968994140625, 0.19952392578125, 0.2503509521484375, 0.301177978515625, 0.3520050048828125, 0.40283203125, 0.4536590576171875, 0.504486083984375, 0.5553131103515625, 0.60614013671875, 0.6569671630859375, 0.707794189453125, 0.7586212158203125, 0.8094482421875, 0.8602752685546875, 0.911102294921875, 0.9619293212890625, 1.01275634765625, 1.0635833740234375, 1.114410400390625, 1.1652374267578125, 1.216064453125, 1.2668914794921875, 1.317718505859375, 1.3685455322265625, 1.41937255859375, 1.4701995849609375, 1.521026611328125, 1.5718536376953125, 1.6226806640625, 1.6735076904296875, 1.724334716796875, 1.7751617431640625, 1.82598876953125, 1.8768157958984375, 1.927642822265625, 1.9784698486328125, 2.029296875]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 3.0, 8.0, 12.0, 27.0, 45.0, 97.0, 165.0, 239.0, 198.0, 137.0, 63.0, 9.0, 7.0, 3.0, 2.0], "bins": [-15.506940841674805, -15.235616683959961, -14.9642915725708, -14.69296646118164, -14.421642303466797, -14.150318145751953, -13.878993034362793, -13.607667922973633, -13.336343765258789, -13.065019607543945, -12.793694496154785, -12.522369384765625, -12.251045227050781, -11.979721069335938, -11.708395957946777, -11.437070846557617, -11.165746688842773, -10.89442253112793, -10.62309741973877, -10.35177230834961, -10.080448150634766, -9.809123992919922, -9.537798881530762, -9.266473770141602, -8.995149612426758, -8.723825454711914, -8.452500343322754, -8.181175231933594, -7.90985107421875, -7.638526439666748, -7.367201805114746, -7.095877170562744, -6.824552059173584, -6.553227424621582, -6.28190279006958, -6.010578155517578, -5.739253520965576, -5.467928886413574, -5.196604251861572, -4.92527961730957, -4.653954982757568, -4.382630348205566, -4.1113057136535645, -3.8399810791015625, -3.5686564445495605, -3.2973318099975586, -3.0260071754455566, -2.7546825408935547, -2.4833579063415527, -2.212033271789551, -1.9407086372375488, -1.6693840026855469, -1.398059368133545, -1.126734733581543, -0.855410099029541, -0.5840854644775391, -0.3127608299255371, -0.041436195373535156, 0.2298884391784668, 0.5012130737304688, 0.7725377082824707, 1.0438623428344727, 1.3151869773864746, 1.5865116119384766, 1.8578362464904785]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 2.0, 4.0, 1.0, 4.0, 7.0, 8.0, 7.0, 8.0, 15.0, 16.0, 15.0, 22.0, 27.0, 25.0, 22.0, 32.0, 30.0, 36.0, 28.0, 29.0, 38.0, 31.0, 29.0, 31.0, 42.0, 45.0, 31.0, 41.0, 42.0, 42.0, 42.0, 28.0, 26.0, 35.0, 19.0, 20.0, 11.0, 20.0, 22.0, 12.0, 13.0, 8.0, 7.0, 8.0, 7.0, 7.0, 5.0, 5.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-2.7228174209594727, -2.630297899246216, -2.537778377532959, -2.4452590942382812, -2.3527395725250244, -2.2602200508117676, -2.1677005290985107, -2.075181007385254, -1.9826616048812866, -1.8901420831680298, -1.7976226806640625, -1.7051031589508057, -1.6125836372375488, -1.5200642347335815, -1.4275447130203247, -1.3350253105163574, -1.2425057888031006, -1.1499862670898438, -1.0574668645858765, -0.9649473428726196, -0.8724278807640076, -0.7799084186553955, -0.6873888969421387, -0.5948694348335266, -0.5023499727249146, -0.4098305106163025, -0.31731101870536804, -0.2247915267944336, -0.13227206468582153, -0.03975260257720947, 0.05276691913604736, 0.14528638124465942, 0.23780584335327148, 0.33032530546188354, 0.422844797372818, 0.5153642892837524, 0.6078837513923645, 0.7004032135009766, 0.7929227352142334, 0.8854421973228455, 0.9779616594314575, 1.0704811811447144, 1.1630005836486816, 1.2555201053619385, 1.3480396270751953, 1.4405590295791626, 1.5330785512924194, 1.6255979537963867, 1.7181174755096436, 1.8106369972229004, 1.9031563997268677, 1.9956759214401245, 2.088195323944092, 2.1807148456573486, 2.2732343673706055, 2.3657538890838623, 2.458273410797119, 2.550792932510376, 2.643312454223633, 2.7358317375183105, 2.8283512592315674, 2.920870780944824, 3.013390302658081, 3.105909824371338, 3.1984291076660156]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 7.0, 12.0, 13.0, 26.0, 30.0, 59.0, 99.0, 198.0, 488.0, 1133.0, 3154.0, 10739.0, 43677.0, 200392.0, 506163.0, 217457.0, 47675.0, 11597.0, 3460.0, 1225.0, 474.0, 204.0, 120.0, 55.0, 42.0, 20.0, 11.0, 9.0, 6.0, 9.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.1484375, -3.96429443359375, -3.7801513671875, -3.59600830078125, -3.411865234375, -3.22772216796875, -3.0435791015625, -2.85943603515625, -2.67529296875, -2.49114990234375, -2.3070068359375, -2.12286376953125, -1.938720703125, -1.75457763671875, -1.5704345703125, -1.38629150390625, -1.2021484375, -1.01800537109375, -0.8338623046875, -0.64971923828125, -0.465576171875, -0.28143310546875, -0.0972900390625, 0.08685302734375, 0.27099609375, 0.45513916015625, 0.6392822265625, 0.82342529296875, 1.007568359375, 1.19171142578125, 1.3758544921875, 1.55999755859375, 1.744140625, 1.92828369140625, 2.1124267578125, 2.29656982421875, 2.480712890625, 2.66485595703125, 2.8489990234375, 3.03314208984375, 3.21728515625, 3.40142822265625, 3.5855712890625, 3.76971435546875, 3.953857421875, 4.13800048828125, 4.3221435546875, 4.50628662109375, 4.6904296875, 4.87457275390625, 5.0587158203125, 5.24285888671875, 5.427001953125, 5.61114501953125, 5.7952880859375, 5.97943115234375, 6.16357421875, 6.34771728515625, 6.5318603515625, 6.71600341796875, 6.900146484375, 7.08428955078125, 7.2684326171875, 7.45257568359375, 7.63671875]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 2.0, 2.0, 7.0, 2.0, 7.0, 4.0, 9.0, 13.0, 16.0, 12.0, 23.0, 18.0, 26.0, 40.0, 43.0, 44.0, 41.0, 45.0, 55.0, 52.0, 51.0, 59.0, 43.0, 43.0, 54.0, 43.0, 37.0, 36.0, 36.0, 26.0, 29.0, 19.0, 18.0, 10.0, 9.0, 8.0, 8.0, 7.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.037109375, -1.97845458984375, -1.9197998046875, -1.86114501953125, -1.802490234375, -1.74383544921875, -1.6851806640625, -1.62652587890625, -1.56787109375, -1.50921630859375, -1.4505615234375, -1.39190673828125, -1.333251953125, -1.27459716796875, -1.2159423828125, -1.15728759765625, -1.0986328125, -1.03997802734375, -0.9813232421875, -0.92266845703125, -0.864013671875, -0.80535888671875, -0.7467041015625, -0.68804931640625, -0.62939453125, -0.57073974609375, -0.5120849609375, -0.45343017578125, -0.394775390625, -0.33612060546875, -0.2774658203125, -0.21881103515625, -0.16015625, -0.10150146484375, -0.0428466796875, 0.01580810546875, 0.074462890625, 0.13311767578125, 0.1917724609375, 0.25042724609375, 0.30908203125, 0.36773681640625, 0.4263916015625, 0.48504638671875, 0.543701171875, 0.60235595703125, 0.6610107421875, 0.71966552734375, 0.7783203125, 0.83697509765625, 0.8956298828125, 0.95428466796875, 1.012939453125, 1.07159423828125, 1.1302490234375, 1.18890380859375, 1.24755859375, 1.30621337890625, 1.3648681640625, 1.42352294921875, 1.482177734375, 1.54083251953125, 1.5994873046875, 1.65814208984375, 1.716796875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 6.0, 3.0, 3.0, 5.0, 9.0, 14.0, 14.0, 28.0, 26.0, 46.0, 73.0, 73.0, 149.0, 249.0, 471.0, 1070.0, 2916.0, 10023.0, 50844.0, 348329.0, 531400.0, 81313.0, 14841.0, 3900.0, 1346.0, 563.0, 301.0, 188.0, 108.0, 71.0, 55.0, 43.0, 24.0, 19.0, 10.0, 4.0, 7.0, 4.0, 3.0, 4.0, 3.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.78125, -6.56341552734375, -6.3455810546875, -6.12774658203125, -5.909912109375, -5.69207763671875, -5.4742431640625, -5.25640869140625, -5.03857421875, -4.82073974609375, -4.6029052734375, -4.38507080078125, -4.167236328125, -3.94940185546875, -3.7315673828125, -3.51373291015625, -3.2958984375, -3.07806396484375, -2.8602294921875, -2.64239501953125, -2.424560546875, -2.20672607421875, -1.9888916015625, -1.77105712890625, -1.55322265625, -1.33538818359375, -1.1175537109375, -0.89971923828125, -0.681884765625, -0.46405029296875, -0.2462158203125, -0.02838134765625, 0.189453125, 0.40728759765625, 0.6251220703125, 0.84295654296875, 1.060791015625, 1.27862548828125, 1.4964599609375, 1.71429443359375, 1.93212890625, 2.14996337890625, 2.3677978515625, 2.58563232421875, 2.803466796875, 3.02130126953125, 3.2391357421875, 3.45697021484375, 3.6748046875, 3.89263916015625, 4.1104736328125, 4.32830810546875, 4.546142578125, 4.76397705078125, 4.9818115234375, 5.19964599609375, 5.41748046875, 5.63531494140625, 5.8531494140625, 6.07098388671875, 6.288818359375, 6.50665283203125, 6.7244873046875, 6.94232177734375, 7.16015625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 3.0, 2.0, 5.0, 3.0, 7.0, 9.0, 16.0, 20.0, 30.0, 37.0, 50.0, 37.0, 59.0, 50.0, 68.0, 63.0, 83.0, 80.0, 61.0, 66.0, 53.0, 51.0, 34.0, 24.0, 29.0, 25.0, 9.0, 9.0, 7.0, 5.0, 5.0, 3.0, 1.0, 9.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.421875, -8.963623046875, -8.50537109375, -8.047119140625, -7.5888671875, -7.130615234375, -6.67236328125, -6.214111328125, -5.755859375, -5.297607421875, -4.83935546875, -4.381103515625, -3.9228515625, -3.464599609375, -3.00634765625, -2.548095703125, -2.08984375, -1.631591796875, -1.17333984375, -0.715087890625, -0.2568359375, 0.201416015625, 0.65966796875, 1.117919921875, 1.576171875, 2.034423828125, 2.49267578125, 2.950927734375, 3.4091796875, 3.867431640625, 4.32568359375, 4.783935546875, 5.2421875, 5.700439453125, 6.15869140625, 6.616943359375, 7.0751953125, 7.533447265625, 7.99169921875, 8.449951171875, 8.908203125, 9.366455078125, 9.82470703125, 10.282958984375, 10.7412109375, 11.199462890625, 11.65771484375, 12.115966796875, 12.57421875, 13.032470703125, 13.49072265625, 13.948974609375, 14.4072265625, 14.865478515625, 15.32373046875, 15.781982421875, 16.240234375, 16.698486328125, 17.15673828125, 17.614990234375, 18.0732421875, 18.531494140625, 18.98974609375, 19.447998046875, 19.90625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 6.0, 4.0, 9.0, 12.0, 12.0, 40.0, 74.0, 144.0, 375.0, 1302.0, 11043.0, 1007979.0, 24841.0, 1897.0, 470.0, 176.0, 83.0, 42.0, 21.0, 15.0, 7.0, 6.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.40625, -9.9998779296875, -9.593505859375, -9.1871337890625, -8.78076171875, -8.3743896484375, -7.968017578125, -7.5616455078125, -7.1552734375, -6.7489013671875, -6.342529296875, -5.9361572265625, -5.52978515625, -5.1234130859375, -4.717041015625, -4.3106689453125, -3.904296875, -3.4979248046875, -3.091552734375, -2.6851806640625, -2.27880859375, -1.8724365234375, -1.466064453125, -1.0596923828125, -0.6533203125, -0.2469482421875, 0.159423828125, 0.5657958984375, 0.97216796875, 1.3785400390625, 1.784912109375, 2.1912841796875, 2.59765625, 3.0040283203125, 3.410400390625, 3.8167724609375, 4.22314453125, 4.6295166015625, 5.035888671875, 5.4422607421875, 5.8486328125, 6.2550048828125, 6.661376953125, 7.0677490234375, 7.47412109375, 7.8804931640625, 8.286865234375, 8.6932373046875, 9.099609375, 9.5059814453125, 9.912353515625, 10.3187255859375, 10.72509765625, 11.1314697265625, 11.537841796875, 11.9442138671875, 12.3505859375, 12.7569580078125, 13.163330078125, 13.5697021484375, 13.97607421875, 14.3824462890625, 14.788818359375, 15.1951904296875, 15.6015625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 4.0, 10.0, 15.0, 17.0, 44.0, 69.0, 87.0, 143.0, 178.0, 172.0, 106.0, 74.0, 32.0, 21.0, 11.0, 9.0, 4.0, 7.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007047653198242188, -0.000681854784488678, -0.0006589442491531372, -0.0006360337138175964, -0.0006131231784820557, -0.0005902126431465149, -0.0005673021078109741, -0.0005443915724754333, -0.0005214810371398926, -0.0004985705018043518, -0.00047565996646881104, -0.00045274943113327026, -0.0004298388957977295, -0.0004069283604621887, -0.00038401782512664795, -0.0003611072897911072, -0.0003381967544555664, -0.00031528621912002563, -0.00029237568378448486, -0.0002694651484489441, -0.0002465546131134033, -0.00022364407777786255, -0.00020073354244232178, -0.000177823007106781, -0.00015491247177124023, -0.00013200193643569946, -0.00010909140110015869, -8.618086576461792e-05, -6.327033042907715e-05, -4.035979509353638e-05, -1.7449259757995605e-05, 5.461275577545166e-06, 2.8371810913085938e-05, 5.128234624862671e-05, 7.419288158416748e-05, 9.710341691970825e-05, 0.00012001395225524902, 0.0001429244875907898, 0.00016583502292633057, 0.00018874555826187134, 0.0002116560935974121, 0.00023456662893295288, 0.00025747716426849365, 0.0002803876996040344, 0.0003032982349395752, 0.00032620877027511597, 0.00034911930561065674, 0.0003720298409461975, 0.0003949403762817383, 0.00041785091161727905, 0.0004407614469528198, 0.0004636719822883606, 0.00048658251762390137, 0.0005094930529594421, 0.0005324035882949829, 0.0005553141236305237, 0.0005782246589660645, 0.0006011351943016052, 0.000624045729637146, 0.0006469562649726868, 0.0006698668003082275, 0.0006927773356437683, 0.0007156878709793091, 0.0007385984063148499, 0.0007615089416503906]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 13.0, 14.0, 33.0, 48.0, 116.0, 213.0, 578.0, 1553.0, 5818.0, 56257.0, 898065.0, 76184.0, 6777.0, 1791.0, 634.0, 234.0, 103.0, 54.0, 21.0, 18.0, 10.0, 6.0, 4.0, 2.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.86328125, -3.6839599609375, -3.504638671875, -3.3253173828125, -3.14599609375, -2.9666748046875, -2.787353515625, -2.6080322265625, -2.4287109375, -2.2493896484375, -2.070068359375, -1.8907470703125, -1.71142578125, -1.5321044921875, -1.352783203125, -1.1734619140625, -0.994140625, -0.8148193359375, -0.635498046875, -0.4561767578125, -0.27685546875, -0.0975341796875, 0.081787109375, 0.2611083984375, 0.4404296875, 0.6197509765625, 0.799072265625, 0.9783935546875, 1.15771484375, 1.3370361328125, 1.516357421875, 1.6956787109375, 1.875, 2.0543212890625, 2.233642578125, 2.4129638671875, 2.59228515625, 2.7716064453125, 2.950927734375, 3.1302490234375, 3.3095703125, 3.4888916015625, 3.668212890625, 3.8475341796875, 4.02685546875, 4.2061767578125, 4.385498046875, 4.5648193359375, 4.744140625, 4.9234619140625, 5.102783203125, 5.2821044921875, 5.46142578125, 5.6407470703125, 5.820068359375, 5.9993896484375, 6.1787109375, 6.3580322265625, 6.537353515625, 6.7166748046875, 6.89599609375, 7.0753173828125, 7.254638671875, 7.4339599609375, 7.61328125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 6.0, 12.0, 20.0, 33.0, 77.0, 158.0, 245.0, 216.0, 102.0, 68.0, 21.0, 12.0, 7.0, 2.0, 5.0, 4.0, 1.0, 1.0, 0.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.5390625, -8.3128662109375, -8.086669921875, -7.8604736328125, -7.63427734375, -7.4080810546875, -7.181884765625, -6.9556884765625, -6.7294921875, -6.5032958984375, -6.277099609375, -6.0509033203125, -5.82470703125, -5.5985107421875, -5.372314453125, -5.1461181640625, -4.919921875, -4.6937255859375, -4.467529296875, -4.2413330078125, -4.01513671875, -3.7889404296875, -3.562744140625, -3.3365478515625, -3.1103515625, -2.8841552734375, -2.657958984375, -2.4317626953125, -2.20556640625, -1.9793701171875, -1.753173828125, -1.5269775390625, -1.30078125, -1.0745849609375, -0.848388671875, -0.6221923828125, -0.39599609375, -0.1697998046875, 0.056396484375, 0.2825927734375, 0.5087890625, 0.7349853515625, 0.961181640625, 1.1873779296875, 1.41357421875, 1.6397705078125, 1.865966796875, 2.0921630859375, 2.318359375, 2.5445556640625, 2.770751953125, 2.9969482421875, 3.22314453125, 3.4493408203125, 3.675537109375, 3.9017333984375, 4.1279296875, 4.3541259765625, 4.580322265625, 4.8065185546875, 5.03271484375, 5.2589111328125, 5.485107421875, 5.7113037109375, 5.9375]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 1.0, 3.0, 13.0, 31.0, 65.0, 102.0, 171.0, 206.0, 186.0, 121.0, 52.0, 33.0, 13.0, 9.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-63.882904052734375, -61.46776580810547, -59.05262756347656, -56.63749313354492, -54.222354888916016, -51.80721664428711, -49.39208221435547, -46.97694396972656, -44.561805725097656, -42.14666748046875, -39.731529235839844, -37.3163948059082, -34.9012565612793, -32.48611831665039, -30.070981979370117, -27.655845642089844, -25.240707397460938, -22.82556915283203, -20.410432815551758, -17.995296478271484, -15.580158233642578, -13.165020942687988, -10.749883651733398, -8.334747314453125, -5.919609069824219, -3.504471778869629, -1.089334487915039, 1.3258028030395508, 3.7409400939941406, 6.1560773849487305, 8.57121467590332, 10.986351013183594, 13.4014892578125, 15.81662654876709, 18.23176383972168, 20.646900177001953, 23.06203842163086, 25.477176666259766, 27.89231300354004, 30.307449340820312, 32.72258758544922, 35.137725830078125, 37.55286407470703, 39.96799850463867, 42.38313674926758, 44.798274993896484, 47.213409423828125, 49.62854766845703, 52.04368591308594, 54.458824157714844, 56.87396240234375, 59.28909683227539, 61.7042350769043, 64.11936950683594, 66.53450775146484, 68.94964599609375, 71.36478424072266, 73.77992248535156, 76.19506072998047, 78.61019897460938, 81.02532958984375, 83.44046783447266, 85.85560607910156, 88.27074432373047, 90.68588256835938]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 8.0, 7.0, 7.0, 8.0, 10.0, 12.0, 16.0, 21.0, 17.0, 16.0, 32.0, 18.0, 42.0, 26.0, 40.0, 49.0, 45.0, 50.0, 62.0, 56.0, 45.0, 72.0, 46.0, 40.0, 43.0, 37.0, 34.0, 28.0, 18.0, 20.0, 9.0, 15.0, 14.0, 9.0, 5.0, 11.0, 2.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.304649353027344, -46.701942443847656, -45.099239349365234, -43.49653244018555, -41.89382553100586, -40.29112243652344, -38.68841552734375, -37.08570861816406, -35.483001708984375, -33.88029479980469, -32.277591705322266, -30.674884796142578, -29.07217788696289, -27.469472885131836, -25.86676788330078, -24.264060974121094, -22.661357879638672, -21.058652877807617, -19.45594596862793, -17.853240966796875, -16.250534057617188, -14.647829055786133, -13.045124053955078, -11.442418098449707, -9.839712142944336, -8.237006187438965, -6.634300708770752, -5.031595230102539, -3.428889274597168, -1.8261833190917969, -0.2234783172607422, 1.379227638244629, 2.98193359375, 4.584639549255371, 6.187345027923584, 7.790050506591797, 9.392756462097168, 10.995462417602539, 12.598167419433594, 14.200873374938965, 15.803579330444336, 17.40628433227539, 19.008991241455078, 20.611696243286133, 22.214401245117188, 23.817108154296875, 25.41981315612793, 27.022518157958984, 28.625225067138672, 30.227930068969727, 31.830636978149414, 33.43334197998047, 35.036048889160156, 36.638755798339844, 38.241458892822266, 39.84416580200195, 41.446868896484375, 43.04957580566406, 44.652278900146484, 46.25498580932617, 47.85769271850586, 49.46039581298828, 51.06310272216797, 52.665809631347656, 54.268516540527344]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 7.0, 2.0, 7.0, 7.0, 7.0, 7.0, 20.0, 18.0, 26.0, 22.0, 33.0, 79.0, 101.0, 140.0, 238.0, 372.0, 702.0, 1388.0, 3089.0, 8263.0, 31888.0, 3951734.0, 166322.0, 19150.0, 5782.0, 2449.0, 1091.0, 588.0, 317.0, 175.0, 91.0, 57.0, 39.0, 31.0, 14.0, 14.0, 6.0, 5.0, 7.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.09375, -7.86785888671875, -7.6419677734375, -7.41607666015625, -7.190185546875, -6.96429443359375, -6.7384033203125, -6.51251220703125, -6.28662109375, -6.06072998046875, -5.8348388671875, -5.60894775390625, -5.383056640625, -5.15716552734375, -4.9312744140625, -4.70538330078125, -4.4794921875, -4.25360107421875, -4.0277099609375, -3.80181884765625, -3.575927734375, -3.35003662109375, -3.1241455078125, -2.89825439453125, -2.67236328125, -2.44647216796875, -2.2205810546875, -1.99468994140625, -1.768798828125, -1.54290771484375, -1.3170166015625, -1.09112548828125, -0.865234375, -0.63934326171875, -0.4134521484375, -0.18756103515625, 0.038330078125, 0.26422119140625, 0.4901123046875, 0.71600341796875, 0.94189453125, 1.16778564453125, 1.3936767578125, 1.61956787109375, 1.845458984375, 2.07135009765625, 2.2972412109375, 2.52313232421875, 2.7490234375, 2.97491455078125, 3.2008056640625, 3.42669677734375, 3.652587890625, 3.87847900390625, 4.1043701171875, 4.33026123046875, 4.55615234375, 4.78204345703125, 5.0079345703125, 5.23382568359375, 5.459716796875, 5.68560791015625, 5.9114990234375, 6.13739013671875, 6.36328125]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 3.0, 11.0, 7.0, 6.0, 8.0, 17.0, 24.0, 24.0, 31.0, 41.0, 45.0, 63.0, 58.0, 67.0, 80.0, 67.0, 54.0, 53.0, 61.0, 63.0, 38.0, 35.0, 37.0, 21.0, 17.0, 15.0, 12.0, 14.0, 6.0, 5.0, 2.0, 3.0, 0.0, 4.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.654296875, -2.57440185546875, -2.4945068359375, -2.41461181640625, -2.334716796875, -2.25482177734375, -2.1749267578125, -2.09503173828125, -2.01513671875, -1.93524169921875, -1.8553466796875, -1.77545166015625, -1.695556640625, -1.61566162109375, -1.5357666015625, -1.45587158203125, -1.3759765625, -1.29608154296875, -1.2161865234375, -1.13629150390625, -1.056396484375, -0.97650146484375, -0.8966064453125, -0.81671142578125, -0.73681640625, -0.65692138671875, -0.5770263671875, -0.49713134765625, -0.417236328125, -0.33734130859375, -0.2574462890625, -0.17755126953125, -0.09765625, -0.01776123046875, 0.0621337890625, 0.14202880859375, 0.221923828125, 0.30181884765625, 0.3817138671875, 0.46160888671875, 0.54150390625, 0.62139892578125, 0.7012939453125, 0.78118896484375, 0.861083984375, 0.94097900390625, 1.0208740234375, 1.10076904296875, 1.1806640625, 1.26055908203125, 1.3404541015625, 1.42034912109375, 1.500244140625, 1.58013916015625, 1.6600341796875, 1.73992919921875, 1.81982421875, 1.89971923828125, 1.9796142578125, 2.05950927734375, 2.139404296875, 2.21929931640625, 2.2991943359375, 2.37908935546875, 2.458984375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 8.0, 8.0, 14.0, 12.0, 15.0, 28.0, 49.0, 40.0, 72.0, 94.0, 109.0, 163.0, 222.0, 292.0, 419.0, 592.0, 959.0, 1579.0, 2812.0, 5242.0, 11080.0, 27956.0, 115780.0, 3822055.0, 146919.0, 32301.0, 11869.0, 5735.0, 2949.0, 1696.0, 978.0, 621.0, 435.0, 311.0, 213.0, 154.0, 113.0, 101.0, 61.0, 54.0, 49.0, 34.0, 23.0, 12.0, 16.0, 12.0, 12.0, 9.0, 2.0, 7.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.8984375, -4.73272705078125, -4.5670166015625, -4.40130615234375, -4.235595703125, -4.06988525390625, -3.9041748046875, -3.73846435546875, -3.57275390625, -3.40704345703125, -3.2413330078125, -3.07562255859375, -2.909912109375, -2.74420166015625, -2.5784912109375, -2.41278076171875, -2.2470703125, -2.08135986328125, -1.9156494140625, -1.74993896484375, -1.584228515625, -1.41851806640625, -1.2528076171875, -1.08709716796875, -0.92138671875, -0.75567626953125, -0.5899658203125, -0.42425537109375, -0.258544921875, -0.09283447265625, 0.0728759765625, 0.23858642578125, 0.404296875, 0.57000732421875, 0.7357177734375, 0.90142822265625, 1.067138671875, 1.23284912109375, 1.3985595703125, 1.56427001953125, 1.72998046875, 1.89569091796875, 2.0614013671875, 2.22711181640625, 2.392822265625, 2.55853271484375, 2.7242431640625, 2.88995361328125, 3.0556640625, 3.22137451171875, 3.3870849609375, 3.55279541015625, 3.718505859375, 3.88421630859375, 4.0499267578125, 4.21563720703125, 4.38134765625, 4.54705810546875, 4.7127685546875, 4.87847900390625, 5.044189453125, 5.20989990234375, 5.3756103515625, 5.54132080078125, 5.70703125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 10.0, 11.0, 20.0, 22.0, 43.0, 95.0, 332.0, 3253.0, 146.0, 61.0, 32.0, 20.0, 8.0, 6.0, 2.0, 4.0, 4.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.15234375, -3.067718505859375, -2.98309326171875, -2.898468017578125, -2.8138427734375, -2.729217529296875, -2.64459228515625, -2.559967041015625, -2.475341796875, -2.390716552734375, -2.30609130859375, -2.221466064453125, -2.1368408203125, -2.052215576171875, -1.96759033203125, -1.882965087890625, -1.79833984375, -1.713714599609375, -1.62908935546875, -1.544464111328125, -1.4598388671875, -1.375213623046875, -1.29058837890625, -1.205963134765625, -1.121337890625, -1.036712646484375, -0.95208740234375, -0.867462158203125, -0.7828369140625, -0.698211669921875, -0.61358642578125, -0.528961181640625, -0.4443359375, -0.359710693359375, -0.27508544921875, -0.190460205078125, -0.1058349609375, -0.021209716796875, 0.06341552734375, 0.148040771484375, 0.232666015625, 0.317291259765625, 0.40191650390625, 0.486541748046875, 0.5711669921875, 0.655792236328125, 0.74041748046875, 0.825042724609375, 0.90966796875, 0.994293212890625, 1.07891845703125, 1.163543701171875, 1.2481689453125, 1.332794189453125, 1.41741943359375, 1.502044677734375, 1.586669921875, 1.671295166015625, 1.75592041015625, 1.840545654296875, 1.9251708984375, 2.009796142578125, 2.09442138671875, 2.179046630859375, 2.263671875]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 8.0, 17.0, 22.0, 48.0, 96.0, 130.0, 187.0, 162.0, 144.0, 98.0, 48.0, 31.0, 12.0, 4.0, 1.0, 2.0, 1.0, 1.0], "bins": [-18.860492706298828, -18.50800895690918, -18.15552520751953, -17.803041458129883, -17.450557708740234, -17.098072052001953, -16.745588302612305, -16.393104553222656, -16.040620803833008, -15.68813705444336, -15.335653305053711, -14.983168601989746, -14.630684852600098, -14.27820110321045, -13.9257173538208, -13.573232650756836, -13.220748901367188, -12.868265151977539, -12.51578140258789, -12.163296699523926, -11.810812950134277, -11.458329200744629, -11.10584545135498, -10.753360748291016, -10.400877952575684, -10.048394203186035, -9.695910453796387, -9.343425750732422, -8.990942001342773, -8.638458251953125, -8.285974502563477, -7.93349027633667, -7.581006050109863, -7.228522300720215, -6.876038074493408, -6.52355432510376, -6.171070098876953, -5.818586349487305, -5.466102600097656, -5.11361837387085, -4.761134147644043, -4.4086503982543945, -4.056166172027588, -3.7036824226379395, -3.351198196411133, -2.9987144470214844, -2.646230459213257, -2.2937464714050293, -1.9412624835968018, -1.5887784957885742, -1.2362945079803467, -0.8838106393814087, -0.5313266515731812, -0.1788426637649536, 0.17364120483398438, 0.5261251926422119, 0.8786091804504395, 1.231093168258667, 1.5835771560668945, 1.9360610246658325, 2.2885451316833496, 2.641028881072998, 2.9935128688812256, 3.345996856689453, 3.6984808444976807]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 6.0, 10.0, 4.0, 4.0, 12.0, 12.0, 16.0, 19.0, 22.0, 31.0, 27.0, 24.0, 34.0, 25.0, 18.0, 41.0, 46.0, 51.0, 46.0, 53.0, 54.0, 37.0, 38.0, 48.0, 32.0, 41.0, 38.0, 30.0, 36.0, 22.0, 23.0, 25.0, 18.0, 10.0, 13.0, 12.0, 8.0, 2.0, 7.0, 5.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-4.86547327041626, -4.707979679107666, -4.550486087799072, -4.3929924964904785, -4.235498905181885, -4.078005313873291, -3.920511484146118, -3.7630178928375244, -3.6055243015289307, -3.448030710220337, -3.290537118911743, -3.1330435276031494, -2.9755496978759766, -2.818056106567383, -2.660562515258789, -2.5030689239501953, -2.3455753326416016, -2.188081741333008, -2.030588150024414, -1.8730944395065308, -1.715600848197937, -1.5581072568893433, -1.40061354637146, -1.2431199550628662, -1.0856263637542725, -0.9281327724456787, -0.7706391215324402, -0.6131454706192017, -0.4556518793106079, -0.29815828800201416, -0.14066463708877563, 0.01682901382446289, 0.17432212829589844, 0.3318157494068146, 0.4893093705177307, 0.6468030214309692, 0.804296612739563, 0.9617902040481567, 1.11928391456604, 1.2767775058746338, 1.4342710971832275, 1.5917646884918213, 1.749258279800415, 1.9067519903182983, 2.0642457008361816, 2.2217392921447754, 2.379232883453369, 2.536726474761963, 2.6942200660705566, 2.8517136573791504, 3.009207248687744, 3.166700839996338, 3.3241944313049316, 3.4816880226135254, 3.6391818523406982, 3.796675443649292, 3.9541690349578857, 4.111662864685059, 4.269156455993652, 4.426650047302246, 4.58414363861084, 4.741637229919434, 4.899130821228027, 5.056624412536621, 5.214118003845215]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 2.0, 4.0, 2.0, 4.0, 11.0, 13.0, 27.0, 33.0, 68.0, 89.0, 203.0, 322.0, 684.0, 1502.0, 3894.0, 11464.0, 37997.0, 146228.0, 437250.0, 297743.0, 77868.0, 21289.0, 7180.0, 2532.0, 1063.0, 473.0, 247.0, 148.0, 77.0, 46.0, 25.0, 26.0, 17.0, 11.0, 7.0, 5.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.1015625, -3.9483642578125, -3.795166015625, -3.6419677734375, -3.48876953125, -3.3355712890625, -3.182373046875, -3.0291748046875, -2.8759765625, -2.7227783203125, -2.569580078125, -2.4163818359375, -2.26318359375, -2.1099853515625, -1.956787109375, -1.8035888671875, -1.650390625, -1.4971923828125, -1.343994140625, -1.1907958984375, -1.03759765625, -0.8843994140625, -0.731201171875, -0.5780029296875, -0.4248046875, -0.2716064453125, -0.118408203125, 0.0347900390625, 0.18798828125, 0.3411865234375, 0.494384765625, 0.6475830078125, 0.80078125, 0.9539794921875, 1.107177734375, 1.2603759765625, 1.41357421875, 1.5667724609375, 1.719970703125, 1.8731689453125, 2.0263671875, 2.1795654296875, 2.332763671875, 2.4859619140625, 2.63916015625, 2.7923583984375, 2.945556640625, 3.0987548828125, 3.251953125, 3.4051513671875, 3.558349609375, 3.7115478515625, 3.86474609375, 4.0179443359375, 4.171142578125, 4.3243408203125, 4.4775390625, 4.6307373046875, 4.783935546875, 4.9371337890625, 5.09033203125, 5.2435302734375, 5.396728515625, 5.5499267578125, 5.703125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 8.0, 12.0, 4.0, 7.0, 12.0, 11.0, 28.0, 25.0, 33.0, 50.0, 39.0, 58.0, 63.0, 78.0, 74.0, 78.0, 60.0, 68.0, 53.0, 44.0, 48.0, 30.0, 28.0, 26.0, 16.0, 14.0, 12.0, 5.0, 5.0, 2.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.00390625, -2.91522216796875, -2.8265380859375, -2.73785400390625, -2.649169921875, -2.56048583984375, -2.4718017578125, -2.38311767578125, -2.29443359375, -2.20574951171875, -2.1170654296875, -2.02838134765625, -1.939697265625, -1.85101318359375, -1.7623291015625, -1.67364501953125, -1.5849609375, -1.49627685546875, -1.4075927734375, -1.31890869140625, -1.230224609375, -1.14154052734375, -1.0528564453125, -0.96417236328125, -0.87548828125, -0.78680419921875, -0.6981201171875, -0.60943603515625, -0.520751953125, -0.43206787109375, -0.3433837890625, -0.25469970703125, -0.166015625, -0.07733154296875, 0.0113525390625, 0.10003662109375, 0.188720703125, 0.27740478515625, 0.3660888671875, 0.45477294921875, 0.54345703125, 0.63214111328125, 0.7208251953125, 0.80950927734375, 0.898193359375, 0.98687744140625, 1.0755615234375, 1.16424560546875, 1.2529296875, 1.34161376953125, 1.4302978515625, 1.51898193359375, 1.607666015625, 1.69635009765625, 1.7850341796875, 1.87371826171875, 1.96240234375, 2.05108642578125, 2.1397705078125, 2.22845458984375, 2.317138671875, 2.40582275390625, 2.4945068359375, 2.58319091796875, 2.671875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 3.0, 5.0, 10.0, 8.0, 11.0, 14.0, 26.0, 46.0, 54.0, 104.0, 151.0, 486.0, 1495.0, 7990.0, 101113.0, 850146.0, 77757.0, 6899.0, 1396.0, 398.0, 178.0, 92.0, 54.0, 35.0, 23.0, 13.0, 12.0, 7.0, 10.0, 8.0, 5.0, 5.0, 3.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.625, -15.2078857421875, -14.790771484375, -14.3736572265625, -13.95654296875, -13.5394287109375, -13.122314453125, -12.7052001953125, -12.2880859375, -11.8709716796875, -11.453857421875, -11.0367431640625, -10.61962890625, -10.2025146484375, -9.785400390625, -9.3682861328125, -8.951171875, -8.5340576171875, -8.116943359375, -7.6998291015625, -7.28271484375, -6.8656005859375, -6.448486328125, -6.0313720703125, -5.6142578125, -5.1971435546875, -4.780029296875, -4.3629150390625, -3.94580078125, -3.5286865234375, -3.111572265625, -2.6944580078125, -2.27734375, -1.8602294921875, -1.443115234375, -1.0260009765625, -0.60888671875, -0.1917724609375, 0.225341796875, 0.6424560546875, 1.0595703125, 1.4766845703125, 1.893798828125, 2.3109130859375, 2.72802734375, 3.1451416015625, 3.562255859375, 3.9793701171875, 4.396484375, 4.8135986328125, 5.230712890625, 5.6478271484375, 6.06494140625, 6.4820556640625, 6.899169921875, 7.3162841796875, 7.7333984375, 8.1505126953125, 8.567626953125, 8.9847412109375, 9.40185546875, 9.8189697265625, 10.236083984375, 10.6531982421875, 11.0703125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 3.0, 2.0, 12.0, 3.0, 12.0, 8.0, 11.0, 14.0, 11.0, 18.0, 29.0, 36.0, 39.0, 41.0, 57.0, 53.0, 53.0, 60.0, 60.0, 57.0, 47.0, 60.0, 41.0, 70.0, 43.0, 28.0, 19.0, 22.0, 24.0, 10.0, 12.0, 11.0, 7.0, 5.0, 3.0, 5.0, 5.0, 4.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.4375, -13.0316162109375, -12.625732421875, -12.2198486328125, -11.81396484375, -11.4080810546875, -11.002197265625, -10.5963134765625, -10.1904296875, -9.7845458984375, -9.378662109375, -8.9727783203125, -8.56689453125, -8.1610107421875, -7.755126953125, -7.3492431640625, -6.943359375, -6.5374755859375, -6.131591796875, -5.7257080078125, -5.31982421875, -4.9139404296875, -4.508056640625, -4.1021728515625, -3.6962890625, -3.2904052734375, -2.884521484375, -2.4786376953125, -2.07275390625, -1.6668701171875, -1.260986328125, -0.8551025390625, -0.44921875, -0.0433349609375, 0.362548828125, 0.7684326171875, 1.17431640625, 1.5802001953125, 1.986083984375, 2.3919677734375, 2.7978515625, 3.2037353515625, 3.609619140625, 4.0155029296875, 4.42138671875, 4.8272705078125, 5.233154296875, 5.6390380859375, 6.044921875, 6.4508056640625, 6.856689453125, 7.2625732421875, 7.66845703125, 8.0743408203125, 8.480224609375, 8.8861083984375, 9.2919921875, 9.6978759765625, 10.103759765625, 10.5096435546875, 10.91552734375, 11.3214111328125, 11.727294921875, 12.1331787109375, 12.5390625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 4.0, 5.0, 7.0, 7.0, 26.0, 74.0, 175.0, 827.0, 12834.0, 1020562.0, 12995.0, 768.0, 173.0, 58.0, 18.0, 21.0, 3.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.0625, -18.5673828125, -18.072265625, -17.5771484375, -17.08203125, -16.5869140625, -16.091796875, -15.5966796875, -15.1015625, -14.6064453125, -14.111328125, -13.6162109375, -13.12109375, -12.6259765625, -12.130859375, -11.6357421875, -11.140625, -10.6455078125, -10.150390625, -9.6552734375, -9.16015625, -8.6650390625, -8.169921875, -7.6748046875, -7.1796875, -6.6845703125, -6.189453125, -5.6943359375, -5.19921875, -4.7041015625, -4.208984375, -3.7138671875, -3.21875, -2.7236328125, -2.228515625, -1.7333984375, -1.23828125, -0.7431640625, -0.248046875, 0.2470703125, 0.7421875, 1.2373046875, 1.732421875, 2.2275390625, 2.72265625, 3.2177734375, 3.712890625, 4.2080078125, 4.703125, 5.1982421875, 5.693359375, 6.1884765625, 6.68359375, 7.1787109375, 7.673828125, 8.1689453125, 8.6640625, 9.1591796875, 9.654296875, 10.1494140625, 10.64453125, 11.1396484375, 11.634765625, 12.1298828125, 12.625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 5.0, 7.0, 10.0, 14.0, 32.0, 56.0, 73.0, 130.0, 179.0, 166.0, 136.0, 76.0, 56.0, 18.0, 12.0, 17.0, 9.0, 5.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007848739624023438, -0.0007567405700683594, -0.000728607177734375, -0.0007004737854003906, -0.0006723403930664062, -0.0006442070007324219, -0.0006160736083984375, -0.0005879402160644531, -0.0005598068237304688, -0.0005316734313964844, -0.0005035400390625, -0.0004754066467285156, -0.00044727325439453125, -0.0004191398620605469, -0.0003910064697265625, -0.0003628730773925781, -0.00033473968505859375, -0.0003066062927246094, -0.000278472900390625, -0.0002503395080566406, -0.00022220611572265625, -0.00019407272338867188, -0.0001659393310546875, -0.00013780593872070312, -0.00010967254638671875, -8.153915405273438e-05, -5.340576171875e-05, -2.5272369384765625e-05, 2.86102294921875e-06, 3.0994415283203125e-05, 5.91278076171875e-05, 8.726119995117188e-05, 0.00011539459228515625, 0.00014352798461914062, 0.000171661376953125, 0.00019979476928710938, 0.00022792816162109375, 0.0002560615539550781, 0.0002841949462890625, 0.0003123283386230469, 0.00034046173095703125, 0.0003685951232910156, 0.000396728515625, 0.0004248619079589844, 0.00045299530029296875, 0.0004811286926269531, 0.0005092620849609375, 0.0005373954772949219, 0.0005655288696289062, 0.0005936622619628906, 0.000621795654296875, 0.0006499290466308594, 0.0006780624389648438, 0.0007061958312988281, 0.0007343292236328125, 0.0007624626159667969, 0.0007905960083007812, 0.0008187294006347656, 0.00084686279296875, 0.0008749961853027344, 0.0009031295776367188, 0.0009312629699707031, 0.0009593963623046875, 0.0009875297546386719, 0.0010156631469726562]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 8.0, 16.0, 23.0, 71.0, 238.0, 2588.0, 993560.0, 50999.0, 829.0, 132.0, 54.0, 22.0, 9.0, 6.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.65625, -17.063720703125, -16.47119140625, -15.878662109375, -15.2861328125, -14.693603515625, -14.10107421875, -13.508544921875, -12.916015625, -12.323486328125, -11.73095703125, -11.138427734375, -10.5458984375, -9.953369140625, -9.36083984375, -8.768310546875, -8.17578125, -7.583251953125, -6.99072265625, -6.398193359375, -5.8056640625, -5.213134765625, -4.62060546875, -4.028076171875, -3.435546875, -2.843017578125, -2.25048828125, -1.657958984375, -1.0654296875, -0.472900390625, 0.11962890625, 0.712158203125, 1.3046875, 1.897216796875, 2.48974609375, 3.082275390625, 3.6748046875, 4.267333984375, 4.85986328125, 5.452392578125, 6.044921875, 6.637451171875, 7.22998046875, 7.822509765625, 8.4150390625, 9.007568359375, 9.60009765625, 10.192626953125, 10.78515625, 11.377685546875, 11.97021484375, 12.562744140625, 13.1552734375, 13.747802734375, 14.34033203125, 14.932861328125, 15.525390625, 16.117919921875, 16.71044921875, 17.302978515625, 17.8955078125, 18.488037109375, 19.08056640625, 19.673095703125, 20.265625]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 4.0, 3.0, 4.0, 5.0, 5.0, 32.0, 91.0, 226.0, 323.0, 196.0, 74.0, 15.0, 10.0, 10.0, 6.0, 2.0, 5.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.748046875, -3.420440673828125, -3.09283447265625, -2.765228271484375, -2.4376220703125, -2.110015869140625, -1.78240966796875, -1.454803466796875, -1.127197265625, -0.799591064453125, -0.47198486328125, -0.144378662109375, 0.1832275390625, 0.510833740234375, 0.83843994140625, 1.166046142578125, 1.49365234375, 1.821258544921875, 2.14886474609375, 2.476470947265625, 2.8040771484375, 3.131683349609375, 3.45928955078125, 3.786895751953125, 4.114501953125, 4.442108154296875, 4.76971435546875, 5.097320556640625, 5.4249267578125, 5.752532958984375, 6.08013916015625, 6.407745361328125, 6.7353515625, 7.062957763671875, 7.39056396484375, 7.718170166015625, 8.0457763671875, 8.373382568359375, 8.70098876953125, 9.028594970703125, 9.356201171875, 9.683807373046875, 10.01141357421875, 10.339019775390625, 10.6666259765625, 10.994232177734375, 11.32183837890625, 11.649444580078125, 11.97705078125, 12.304656982421875, 12.63226318359375, 12.959869384765625, 13.2874755859375, 13.615081787109375, 13.94268798828125, 14.270294189453125, 14.597900390625, 14.925506591796875, 15.25311279296875, 15.580718994140625, 15.9083251953125, 16.235931396484375, 16.56353759765625, 16.891143798828125, 17.21875]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 8.0, 17.0, 73.0, 163.0, 308.0, 257.0, 105.0, 38.0, 19.0, 10.0, 6.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-156.6019744873047, -152.19078063964844, -147.77960205078125, -143.368408203125, -138.95721435546875, -134.5460205078125, -130.13482666015625, -125.72364807128906, -121.31245422363281, -116.90126037597656, -112.49007415771484, -108.07888793945312, -103.66769409179688, -99.25650024414062, -94.8453140258789, -90.43412780761719, -86.02293395996094, -81.61174011230469, -77.20055389404297, -72.78936767578125, -68.378173828125, -63.966983795166016, -59.55579376220703, -55.14460372924805, -50.73341369628906, -46.32222366333008, -41.911033630371094, -37.49984359741211, -33.088653564453125, -28.67746353149414, -24.266273498535156, -19.855083465576172, -15.44390869140625, -11.032718658447266, -6.621528625488281, -2.210338592529297, 2.2008514404296875, 6.612041473388672, 11.023231506347656, 15.43442153930664, 19.845611572265625, 24.25680160522461, 28.667991638183594, 33.07918167114258, 37.49037170410156, 41.90156173706055, 46.31275177001953, 50.723941802978516, 55.1351318359375, 59.546321868896484, 63.95751190185547, 68.36869812011719, 72.77989196777344, 77.19108581542969, 81.6022720336914, 86.01345825195312, 90.42465209960938, 94.83584594726562, 99.24703216552734, 103.65821838378906, 108.06941223144531, 112.48060607910156, 116.89179229736328, 121.302978515625, 125.71417236328125]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 3.0, 3.0, 3.0, 6.0, 4.0, 13.0, 9.0, 16.0, 20.0, 19.0, 18.0, 24.0, 48.0, 36.0, 49.0, 48.0, 44.0, 71.0, 43.0, 68.0, 70.0, 39.0, 53.0, 44.0, 44.0, 51.0, 29.0, 34.0, 19.0, 17.0, 15.0, 6.0, 14.0, 7.0, 8.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.44290542602539, -57.641380310058594, -55.83985137939453, -54.038326263427734, -52.23680114746094, -50.43527603149414, -48.633750915527344, -46.83222198486328, -45.030696868896484, -43.22917175292969, -41.427642822265625, -39.62611770629883, -37.82459259033203, -36.023067474365234, -34.22154235839844, -32.420013427734375, -30.618488311767578, -28.81696319580078, -27.01543617248535, -25.213909149169922, -23.412384033203125, -21.610858917236328, -19.8093318939209, -18.00780487060547, -16.206279754638672, -14.404753684997559, -12.603227615356445, -10.801701545715332, -9.000175476074219, -7.1986494064331055, -5.397123336791992, -3.595597267150879, -1.7940673828125, 0.007458686828613281, 1.8089847564697266, 3.61051082611084, 5.412036895751953, 7.213562965393066, 9.01508903503418, 10.816615104675293, 12.618141174316406, 14.41966724395752, 16.221193313598633, 18.022720336914062, 19.82424545288086, 21.625770568847656, 23.427297592163086, 25.228824615478516, 27.030349731445312, 28.83187484741211, 30.63340187072754, 32.43492889404297, 34.236454010009766, 36.03797912597656, 37.839508056640625, 39.64103317260742, 41.44255828857422, 43.244083404541016, 45.04560852050781, 46.847137451171875, 48.64866256713867, 50.45018768310547, 52.25171661376953, 54.05324172973633, 55.854766845703125]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 5.0, 14.0, 19.0, 24.0, 29.0, 44.0, 101.0, 155.0, 246.0, 542.0, 1171.0, 3373.0, 16257.0, 4103154.0, 58975.0, 6726.0, 1895.0, 800.0, 338.0, 159.0, 88.0, 45.0, 31.0, 25.0, 17.0, 7.0, 7.0, 7.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0], "bins": [-13.703125, -13.38250732421875, -13.0618896484375, -12.74127197265625, -12.420654296875, -12.10003662109375, -11.7794189453125, -11.45880126953125, -11.13818359375, -10.81756591796875, -10.4969482421875, -10.17633056640625, -9.855712890625, -9.53509521484375, -9.2144775390625, -8.89385986328125, -8.5732421875, -8.25262451171875, -7.9320068359375, -7.61138916015625, -7.290771484375, -6.97015380859375, -6.6495361328125, -6.32891845703125, -6.00830078125, -5.68768310546875, -5.3670654296875, -5.04644775390625, -4.725830078125, -4.40521240234375, -4.0845947265625, -3.76397705078125, -3.443359375, -3.12274169921875, -2.8021240234375, -2.48150634765625, -2.160888671875, -1.84027099609375, -1.5196533203125, -1.19903564453125, -0.87841796875, -0.55780029296875, -0.2371826171875, 0.08343505859375, 0.404052734375, 0.72467041015625, 1.0452880859375, 1.36590576171875, 1.6865234375, 2.00714111328125, 2.3277587890625, 2.64837646484375, 2.968994140625, 3.28961181640625, 3.6102294921875, 3.93084716796875, 4.25146484375, 4.57208251953125, 4.8927001953125, 5.21331787109375, 5.533935546875, 5.85455322265625, 6.1751708984375, 6.49578857421875, 6.81640625]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 4.0, 2.0, 4.0, 3.0, 5.0, 9.0, 5.0, 11.0, 6.0, 15.0, 27.0, 33.0, 44.0, 53.0, 48.0, 82.0, 53.0, 67.0, 97.0, 78.0, 65.0, 65.0, 57.0, 36.0, 30.0, 24.0, 20.0, 17.0, 12.0, 7.0, 6.0, 5.0, 4.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0], "bins": [-4.22265625, -4.11444091796875, -4.0062255859375, -3.89801025390625, -3.789794921875, -3.68157958984375, -3.5733642578125, -3.46514892578125, -3.35693359375, -3.24871826171875, -3.1405029296875, -3.03228759765625, -2.924072265625, -2.81585693359375, -2.7076416015625, -2.59942626953125, -2.4912109375, -2.38299560546875, -2.2747802734375, -2.16656494140625, -2.058349609375, -1.95013427734375, -1.8419189453125, -1.73370361328125, -1.62548828125, -1.51727294921875, -1.4090576171875, -1.30084228515625, -1.192626953125, -1.08441162109375, -0.9761962890625, -0.86798095703125, -0.759765625, -0.65155029296875, -0.5433349609375, -0.43511962890625, -0.326904296875, -0.21868896484375, -0.1104736328125, -0.00225830078125, 0.10595703125, 0.21417236328125, 0.3223876953125, 0.43060302734375, 0.538818359375, 0.64703369140625, 0.7552490234375, 0.86346435546875, 0.9716796875, 1.07989501953125, 1.1881103515625, 1.29632568359375, 1.404541015625, 1.51275634765625, 1.6209716796875, 1.72918701171875, 1.83740234375, 1.94561767578125, 2.0538330078125, 2.16204833984375, 2.270263671875, 2.37847900390625, 2.4866943359375, 2.59490966796875, 2.703125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 1.0, 3.0, 4.0, 4.0, 13.0, 5.0, 12.0, 20.0, 34.0, 41.0, 65.0, 96.0, 119.0, 216.0, 305.0, 403.0, 622.0, 985.0, 1707.0, 3193.0, 6406.0, 17721.0, 141559.0, 3977910.0, 25532.0, 8244.0, 3825.0, 1916.0, 1148.0, 722.0, 451.0, 306.0, 200.0, 140.0, 115.0, 78.0, 55.0, 34.0, 23.0, 16.0, 7.0, 9.0, 12.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.2265625, -8.909912109375, -8.59326171875, -8.276611328125, -7.9599609375, -7.643310546875, -7.32666015625, -7.010009765625, -6.693359375, -6.376708984375, -6.06005859375, -5.743408203125, -5.4267578125, -5.110107421875, -4.79345703125, -4.476806640625, -4.16015625, -3.843505859375, -3.52685546875, -3.210205078125, -2.8935546875, -2.576904296875, -2.26025390625, -1.943603515625, -1.626953125, -1.310302734375, -0.99365234375, -0.677001953125, -0.3603515625, -0.043701171875, 0.27294921875, 0.589599609375, 0.90625, 1.222900390625, 1.53955078125, 1.856201171875, 2.1728515625, 2.489501953125, 2.80615234375, 3.122802734375, 3.439453125, 3.756103515625, 4.07275390625, 4.389404296875, 4.7060546875, 5.022705078125, 5.33935546875, 5.656005859375, 5.97265625, 6.289306640625, 6.60595703125, 6.922607421875, 7.2392578125, 7.555908203125, 7.87255859375, 8.189208984375, 8.505859375, 8.822509765625, 9.13916015625, 9.455810546875, 9.7724609375, 10.089111328125, 10.40576171875, 10.722412109375, 11.0390625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 10.0, 12.0, 12.0, 39.0, 69.0, 3617.0, 204.0, 48.0, 22.0, 4.0, 10.0, 8.0, 5.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.65625, -2.55352783203125, -2.4508056640625, -2.34808349609375, -2.245361328125, -2.14263916015625, -2.0399169921875, -1.93719482421875, -1.83447265625, -1.73175048828125, -1.6290283203125, -1.52630615234375, -1.423583984375, -1.32086181640625, -1.2181396484375, -1.11541748046875, -1.0126953125, -0.90997314453125, -0.8072509765625, -0.70452880859375, -0.601806640625, -0.49908447265625, -0.3963623046875, -0.29364013671875, -0.19091796875, -0.08819580078125, 0.0145263671875, 0.11724853515625, 0.219970703125, 0.32269287109375, 0.4254150390625, 0.52813720703125, 0.630859375, 0.73358154296875, 0.8363037109375, 0.93902587890625, 1.041748046875, 1.14447021484375, 1.2471923828125, 1.34991455078125, 1.45263671875, 1.55535888671875, 1.6580810546875, 1.76080322265625, 1.863525390625, 1.96624755859375, 2.0689697265625, 2.17169189453125, 2.2744140625, 2.37713623046875, 2.4798583984375, 2.58258056640625, 2.685302734375, 2.78802490234375, 2.8907470703125, 2.99346923828125, 3.09619140625, 3.19891357421875, 3.3016357421875, 3.40435791015625, 3.507080078125, 3.60980224609375, 3.7125244140625, 3.81524658203125, 3.91796875]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 4.0, 9.0, 10.0, 15.0, 19.0, 38.0, 42.0, 67.0, 78.0, 98.0, 123.0, 108.0, 115.0, 86.0, 59.0, 43.0, 34.0, 21.0, 6.0, 12.0, 3.0, 4.0, 1.0, 3.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.610106468200684, -8.326332092285156, -8.042557716369629, -7.758783340454102, -7.475008964538574, -7.191234588623047, -6.9074602127075195, -6.623685836791992, -6.339911460876465, -6.0561370849609375, -5.77236270904541, -5.488588333129883, -5.2048139572143555, -4.921039581298828, -4.637265205383301, -4.353490829467773, -4.069716453552246, -3.7859420776367188, -3.5021677017211914, -3.218393325805664, -2.9346189498901367, -2.6508445739746094, -2.367070198059082, -2.0832958221435547, -1.7995214462280273, -1.5157470703125, -1.2319726943969727, -0.9481983184814453, -0.664423942565918, -0.3806495666503906, -0.09687519073486328, 0.18689918518066406, 0.4706745147705078, 0.7544488906860352, 1.0382232666015625, 1.3219976425170898, 1.6057720184326172, 1.8895463943481445, 2.173320770263672, 2.457095146179199, 2.7408695220947266, 3.024643898010254, 3.3084182739257812, 3.5921926498413086, 3.875967025756836, 4.159741401672363, 4.443515777587891, 4.727290153503418, 5.011064529418945, 5.294838905334473, 5.57861328125, 5.862387657165527, 6.146162033081055, 6.429936408996582, 6.713710784912109, 6.997485160827637, 7.281259536743164, 7.565033912658691, 7.848808288574219, 8.132582664489746, 8.416357040405273, 8.7001314163208, 8.983905792236328, 9.267680168151855, 9.551454544067383]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 9.0, 9.0, 10.0, 10.0, 19.0, 17.0, 20.0, 25.0, 28.0, 30.0, 33.0, 46.0, 56.0, 39.0, 54.0, 54.0, 50.0, 58.0, 55.0, 61.0, 44.0, 42.0, 33.0, 41.0, 26.0, 24.0, 19.0, 22.0, 13.0, 15.0, 8.0, 10.0, 4.0, 4.0, 5.0, 5.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.873305320739746, -5.667628765106201, -5.461951732635498, -5.256275177001953, -5.05059814453125, -4.844921588897705, -4.63924503326416, -4.433568000793457, -4.227890968322754, -4.022214412689209, -3.816537380218506, -3.610860824584961, -3.405183792114258, -3.199507236480713, -2.993830442428589, -2.788153648376465, -2.58247709274292, -2.376800298690796, -2.171123504638672, -1.9654468297958374, -1.7597700357437134, -1.5540932416915894, -1.3484165668487549, -1.1427397727966309, -0.9370629787445068, -0.7313861846923828, -0.5257094502449036, -0.3200327157974243, -0.11435592174530029, 0.09132087230682373, 0.2969975471496582, 0.5026743412017822, 0.7083511352539062, 0.9140279293060303, 1.1197047233581543, 1.3253813982009888, 1.5310581922531128, 1.7367349863052368, 1.9424116611480713, 2.1480884552001953, 2.3537652492523193, 2.5594420433044434, 2.7651188373565674, 2.9707956314086914, 3.1764721870422363, 3.3821492195129395, 3.5878257751464844, 3.7935025691986084, 3.9991793632507324, 4.204855918884277, 4.4105329513549805, 4.616209506988525, 4.8218865394592285, 5.027563095092773, 5.233240127563477, 5.4389166831970215, 5.644593238830566, 5.850269794464111, 6.0559468269348145, 6.261623382568359, 6.4673004150390625, 6.672976970672607, 6.878653526306152, 7.0843305587768555, 7.290007591247559]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 6.0, 7.0, 9.0, 20.0, 18.0, 43.0, 53.0, 79.0, 113.0, 149.0, 232.0, 382.0, 626.0, 968.0, 1818.0, 3249.0, 6495.0, 13583.0, 31615.0, 82264.0, 218596.0, 370335.0, 192532.0, 71682.0, 28390.0, 12307.0, 5738.0, 3049.0, 1646.0, 933.0, 557.0, 384.0, 241.0, 142.0, 96.0, 60.0, 34.0, 28.0, 26.0, 15.0, 9.0, 11.0, 3.0, 2.0, 3.0, 7.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.6484375, -4.50054931640625, -4.3526611328125, -4.20477294921875, -4.056884765625, -3.90899658203125, -3.7611083984375, -3.61322021484375, -3.46533203125, -3.31744384765625, -3.1695556640625, -3.02166748046875, -2.873779296875, -2.72589111328125, -2.5780029296875, -2.43011474609375, -2.2822265625, -2.13433837890625, -1.9864501953125, -1.83856201171875, -1.690673828125, -1.54278564453125, -1.3948974609375, -1.24700927734375, -1.09912109375, -0.95123291015625, -0.8033447265625, -0.65545654296875, -0.507568359375, -0.35968017578125, -0.2117919921875, -0.06390380859375, 0.083984375, 0.23187255859375, 0.3797607421875, 0.52764892578125, 0.675537109375, 0.82342529296875, 0.9713134765625, 1.11920166015625, 1.26708984375, 1.41497802734375, 1.5628662109375, 1.71075439453125, 1.858642578125, 2.00653076171875, 2.1544189453125, 2.30230712890625, 2.4501953125, 2.59808349609375, 2.7459716796875, 2.89385986328125, 3.041748046875, 3.18963623046875, 3.3375244140625, 3.48541259765625, 3.63330078125, 3.78118896484375, 3.9290771484375, 4.07696533203125, 4.224853515625, 4.37274169921875, 4.5206298828125, 4.66851806640625, 4.81640625]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 2.0, 7.0, 2.0, 4.0, 7.0, 8.0, 12.0, 16.0, 16.0, 28.0, 29.0, 36.0, 61.0, 56.0, 71.0, 59.0, 74.0, 80.0, 61.0, 67.0, 67.0, 47.0, 38.0, 28.0, 29.0, 34.0, 16.0, 10.0, 10.0, 4.0, 4.0, 8.0, 3.0, 3.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.265625, -4.153839111328125, -4.04205322265625, -3.930267333984375, -3.8184814453125, -3.706695556640625, -3.59490966796875, -3.483123779296875, -3.371337890625, -3.259552001953125, -3.14776611328125, -3.035980224609375, -2.9241943359375, -2.812408447265625, -2.70062255859375, -2.588836669921875, -2.47705078125, -2.365264892578125, -2.25347900390625, -2.141693115234375, -2.0299072265625, -1.918121337890625, -1.80633544921875, -1.694549560546875, -1.582763671875, -1.470977783203125, -1.35919189453125, -1.247406005859375, -1.1356201171875, -1.023834228515625, -0.91204833984375, -0.800262451171875, -0.6884765625, -0.576690673828125, -0.46490478515625, -0.353118896484375, -0.2413330078125, -0.129547119140625, -0.01776123046875, 0.094024658203125, 0.205810546875, 0.317596435546875, 0.42938232421875, 0.541168212890625, 0.6529541015625, 0.764739990234375, 0.87652587890625, 0.988311767578125, 1.10009765625, 1.211883544921875, 1.32366943359375, 1.435455322265625, 1.5472412109375, 1.659027099609375, 1.77081298828125, 1.882598876953125, 1.994384765625, 2.106170654296875, 2.21795654296875, 2.329742431640625, 2.4415283203125, 2.553314208984375, 2.66510009765625, 2.776885986328125, 2.888671875]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 2.0, 4.0, 7.0, 7.0, 11.0, 9.0, 25.0, 22.0, 39.0, 76.0, 124.0, 248.0, 540.0, 1156.0, 3600.0, 19061.0, 254297.0, 709075.0, 50176.0, 6827.0, 1860.0, 655.0, 307.0, 167.0, 83.0, 63.0, 37.0, 16.0, 17.0, 12.0, 12.0, 6.0, 8.0, 2.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.9453125, -15.516845703125, -15.08837890625, -14.659912109375, -14.2314453125, -13.802978515625, -13.37451171875, -12.946044921875, -12.517578125, -12.089111328125, -11.66064453125, -11.232177734375, -10.8037109375, -10.375244140625, -9.94677734375, -9.518310546875, -9.08984375, -8.661376953125, -8.23291015625, -7.804443359375, -7.3759765625, -6.947509765625, -6.51904296875, -6.090576171875, -5.662109375, -5.233642578125, -4.80517578125, -4.376708984375, -3.9482421875, -3.519775390625, -3.09130859375, -2.662841796875, -2.234375, -1.805908203125, -1.37744140625, -0.948974609375, -0.5205078125, -0.092041015625, 0.33642578125, 0.764892578125, 1.193359375, 1.621826171875, 2.05029296875, 2.478759765625, 2.9072265625, 3.335693359375, 3.76416015625, 4.192626953125, 4.62109375, 5.049560546875, 5.47802734375, 5.906494140625, 6.3349609375, 6.763427734375, 7.19189453125, 7.620361328125, 8.048828125, 8.477294921875, 8.90576171875, 9.334228515625, 9.7626953125, 10.191162109375, 10.61962890625, 11.048095703125, 11.4765625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 2.0, 2.0, 7.0, 4.0, 4.0, 8.0, 11.0, 18.0, 8.0, 20.0, 13.0, 20.0, 30.0, 22.0, 38.0, 40.0, 56.0, 56.0, 44.0, 51.0, 66.0, 55.0, 56.0, 58.0, 43.0, 51.0, 41.0, 31.0, 30.0, 20.0, 16.0, 15.0, 15.0, 13.0, 11.0, 12.0, 4.0, 4.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-13.4609375, -12.9945068359375, -12.528076171875, -12.0616455078125, -11.59521484375, -11.1287841796875, -10.662353515625, -10.1959228515625, -9.7294921875, -9.2630615234375, -8.796630859375, -8.3302001953125, -7.86376953125, -7.3973388671875, -6.930908203125, -6.4644775390625, -5.998046875, -5.5316162109375, -5.065185546875, -4.5987548828125, -4.13232421875, -3.6658935546875, -3.199462890625, -2.7330322265625, -2.2666015625, -1.8001708984375, -1.333740234375, -0.8673095703125, -0.40087890625, 0.0655517578125, 0.531982421875, 0.9984130859375, 1.46484375, 1.9312744140625, 2.397705078125, 2.8641357421875, 3.33056640625, 3.7969970703125, 4.263427734375, 4.7298583984375, 5.1962890625, 5.6627197265625, 6.129150390625, 6.5955810546875, 7.06201171875, 7.5284423828125, 7.994873046875, 8.4613037109375, 8.927734375, 9.3941650390625, 9.860595703125, 10.3270263671875, 10.79345703125, 11.2598876953125, 11.726318359375, 12.1927490234375, 12.6591796875, 13.1256103515625, 13.592041015625, 14.0584716796875, 14.52490234375, 14.9913330078125, 15.457763671875, 15.9241943359375, 16.390625]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 1.0, 5.0, 1.0, 3.0, 18.0, 18.0, 19.0, 24.0, 52.0, 111.0, 169.0, 502.0, 1566.0, 10858.0, 612636.0, 411610.0, 8787.0, 1360.0, 433.0, 169.0, 94.0, 54.0, 31.0, 14.0, 7.0, 6.0, 2.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.8671875, -11.5843505859375, -11.301513671875, -11.0186767578125, -10.73583984375, -10.4530029296875, -10.170166015625, -9.8873291015625, -9.6044921875, -9.3216552734375, -9.038818359375, -8.7559814453125, -8.47314453125, -8.1903076171875, -7.907470703125, -7.6246337890625, -7.341796875, -7.0589599609375, -6.776123046875, -6.4932861328125, -6.21044921875, -5.9276123046875, -5.644775390625, -5.3619384765625, -5.0791015625, -4.7962646484375, -4.513427734375, -4.2305908203125, -3.94775390625, -3.6649169921875, -3.382080078125, -3.0992431640625, -2.81640625, -2.5335693359375, -2.250732421875, -1.9678955078125, -1.68505859375, -1.4022216796875, -1.119384765625, -0.8365478515625, -0.5537109375, -0.2708740234375, 0.011962890625, 0.2947998046875, 0.57763671875, 0.8604736328125, 1.143310546875, 1.4261474609375, 1.708984375, 1.9918212890625, 2.274658203125, 2.5574951171875, 2.84033203125, 3.1231689453125, 3.406005859375, 3.6888427734375, 3.9716796875, 4.2545166015625, 4.537353515625, 4.8201904296875, 5.10302734375, 5.3858642578125, 5.668701171875, 5.9515380859375, 6.234375]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 4.0, 2.0, 6.0, 6.0, 12.0, 5.0, 23.0, 22.0, 32.0, 29.0, 53.0, 70.0, 73.0, 111.0, 91.0, 79.0, 99.0, 71.0, 51.0, 35.0, 40.0, 26.0, 21.0, 15.0, 8.0, 5.0, 5.0, 4.0, 5.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0007143020629882812, -0.0006958246231079102, -0.0006773471832275391, -0.000658869743347168, -0.0006403923034667969, -0.0006219148635864258, -0.0006034374237060547, -0.0005849599838256836, -0.0005664825439453125, -0.0005480051040649414, -0.0005295276641845703, -0.0005110502243041992, -0.0004925727844238281, -0.00047409534454345703, -0.00045561790466308594, -0.00043714046478271484, -0.00041866302490234375, -0.00040018558502197266, -0.00038170814514160156, -0.00036323070526123047, -0.0003447532653808594, -0.0003262758255004883, -0.0003077983856201172, -0.0002893209457397461, -0.000270843505859375, -0.0002523660659790039, -0.0002338886260986328, -0.00021541118621826172, -0.00019693374633789062, -0.00017845630645751953, -0.00015997886657714844, -0.00014150142669677734, -0.00012302398681640625, -0.00010454654693603516, -8.606910705566406e-05, -6.759166717529297e-05, -4.9114227294921875e-05, -3.063678741455078e-05, -1.2159347534179688e-05, 6.318092346191406e-06, 2.47955322265625e-05, 4.3272972106933594e-05, 6.175041198730469e-05, 8.022785186767578e-05, 9.870529174804688e-05, 0.00011718273162841797, 0.00013566017150878906, 0.00015413761138916016, 0.00017261505126953125, 0.00019109249114990234, 0.00020956993103027344, 0.00022804737091064453, 0.0002465248107910156, 0.0002650022506713867, 0.0002834796905517578, 0.0003019571304321289, 0.0003204345703125, 0.0003389120101928711, 0.0003573894500732422, 0.0003758668899536133, 0.0003943443298339844, 0.00041282176971435547, 0.00043129920959472656, 0.00044977664947509766, 0.00046825408935546875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 2.0, 8.0, 8.0, 17.0, 23.0, 35.0, 52.0, 100.0, 191.0, 396.0, 1170.0, 4598.0, 41298.0, 805507.0, 181552.0, 10535.0, 1922.0, 576.0, 250.0, 119.0, 77.0, 41.0, 24.0, 19.0, 10.0, 7.0, 4.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-8.65625, -8.44879150390625, -8.2413330078125, -8.03387451171875, -7.826416015625, -7.61895751953125, -7.4114990234375, -7.20404052734375, -6.99658203125, -6.78912353515625, -6.5816650390625, -6.37420654296875, -6.166748046875, -5.95928955078125, -5.7518310546875, -5.54437255859375, -5.3369140625, -5.12945556640625, -4.9219970703125, -4.71453857421875, -4.507080078125, -4.29962158203125, -4.0921630859375, -3.88470458984375, -3.67724609375, -3.46978759765625, -3.2623291015625, -3.05487060546875, -2.847412109375, -2.63995361328125, -2.4324951171875, -2.22503662109375, -2.017578125, -1.81011962890625, -1.6026611328125, -1.39520263671875, -1.187744140625, -0.98028564453125, -0.7728271484375, -0.56536865234375, -0.35791015625, -0.15045166015625, 0.0570068359375, 0.26446533203125, 0.471923828125, 0.67938232421875, 0.8868408203125, 1.09429931640625, 1.3017578125, 1.50921630859375, 1.7166748046875, 1.92413330078125, 2.131591796875, 2.33905029296875, 2.5465087890625, 2.75396728515625, 2.96142578125, 3.16888427734375, 3.3763427734375, 3.58380126953125, 3.791259765625, 3.99871826171875, 4.2061767578125, 4.41363525390625, 4.62109375]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 3.0, 4.0, 3.0, 9.0, 11.0, 16.0, 27.0, 32.0, 53.0, 76.0, 101.0, 113.0, 137.0, 136.0, 99.0, 64.0, 39.0, 35.0, 20.0, 9.0, 5.0, 6.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.3984375, -8.2083740234375, -8.018310546875, -7.8282470703125, -7.63818359375, -7.4481201171875, -7.258056640625, -7.0679931640625, -6.8779296875, -6.6878662109375, -6.497802734375, -6.3077392578125, -6.11767578125, -5.9276123046875, -5.737548828125, -5.5474853515625, -5.357421875, -5.1673583984375, -4.977294921875, -4.7872314453125, -4.59716796875, -4.4071044921875, -4.217041015625, -4.0269775390625, -3.8369140625, -3.6468505859375, -3.456787109375, -3.2667236328125, -3.07666015625, -2.8865966796875, -2.696533203125, -2.5064697265625, -2.31640625, -2.1263427734375, -1.936279296875, -1.7462158203125, -1.55615234375, -1.3660888671875, -1.176025390625, -0.9859619140625, -0.7958984375, -0.6058349609375, -0.415771484375, -0.2257080078125, -0.03564453125, 0.1544189453125, 0.344482421875, 0.5345458984375, 0.724609375, 0.9146728515625, 1.104736328125, 1.2947998046875, 1.48486328125, 1.6749267578125, 1.864990234375, 2.0550537109375, 2.2451171875, 2.4351806640625, 2.625244140625, 2.8153076171875, 3.00537109375, 3.1954345703125, 3.385498046875, 3.5755615234375, 3.765625]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 9.0, 20.0, 30.0, 109.0, 196.0, 258.0, 211.0, 103.0, 50.0, 14.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-221.7382354736328, -217.44447326660156, -213.1507110595703, -208.85693359375, -204.56317138671875, -200.2694091796875, -195.97564697265625, -191.681884765625, -187.3881072998047, -183.09434509277344, -178.8005828857422, -174.50680541992188, -170.21304321289062, -165.91928100585938, -161.62551879882812, -157.33175659179688, -153.03799438476562, -148.74423217773438, -144.45046997070312, -140.1566925048828, -135.86293029785156, -131.5691680908203, -127.27540588378906, -122.98163604736328, -118.6878662109375, -114.39410400390625, -110.10033416748047, -105.80657196044922, -101.51280212402344, -97.21903991699219, -92.92527770996094, -88.63150787353516, -84.33773803710938, -80.04397583007812, -75.75020599365234, -71.4564437866211, -67.16267395019531, -62.86891174316406, -58.57514572143555, -54.28137969970703, -49.98761749267578, -45.693851470947266, -41.40008544921875, -37.1063232421875, -32.81255340576172, -28.518789291381836, -24.225025177001953, -19.931259155273438, -15.637493133544922, -11.343727111816406, -7.049962043762207, -2.756196975708008, 1.5375690460205078, 5.831335067749023, 10.125099182128906, 14.418865203857422, 18.712631225585938, 23.006397247314453, 27.30016326904297, 31.59392738342285, 35.8876953125, 40.18145751953125, 44.475223541259766, 48.76898956298828, 53.0627555847168]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 8.0, 4.0, 6.0, 7.0, 11.0, 15.0, 4.0, 16.0, 16.0, 19.0, 23.0, 28.0, 36.0, 41.0, 42.0, 43.0, 62.0, 72.0, 72.0, 51.0, 62.0, 47.0, 45.0, 56.0, 42.0, 40.0, 24.0, 22.0, 18.0, 13.0, 15.0, 13.0, 6.0, 13.0, 9.0, 5.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-91.30396270751953, -89.07377624511719, -86.84359741210938, -84.61341094970703, -82.38323211669922, -80.15304565429688, -77.92286682128906, -75.69268035888672, -73.46249389648438, -71.23230743408203, -69.00212860107422, -66.77194213867188, -64.54176330566406, -62.31157684326172, -60.08139419555664, -57.85121154785156, -55.62103271484375, -53.39085006713867, -51.160667419433594, -48.930484771728516, -46.70030212402344, -44.470115661621094, -42.239933013916016, -40.00975036621094, -37.77956771850586, -35.54938507080078, -33.3192024230957, -31.089017868041992, -28.858835220336914, -26.628652572631836, -24.398468017578125, -22.168285369873047, -19.9381103515625, -17.707927703857422, -15.477744102478027, -13.247560501098633, -11.017377853393555, -8.787195205688477, -6.557011604309082, -4.3268280029296875, -2.0966453552246094, 0.13353776931762695, 2.3637208938598633, 4.5939040184021, 6.824087142944336, 9.054269790649414, 11.284453392028809, 13.514636993408203, 15.744819641113281, 17.97500228881836, 20.205184936523438, 22.43536949157715, 24.665552139282227, 26.895734786987305, 29.125919342041016, 31.356101989746094, 33.58628463745117, 35.81646728515625, 38.04664993286133, 40.276832580566406, 42.50701904296875, 44.73719787597656, 46.967384338378906, 49.197566986083984, 51.42774963378906]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 4.0, 11.0, 9.0, 11.0, 9.0, 10.0, 20.0, 25.0, 33.0, 44.0, 65.0, 117.0, 182.0, 347.0, 666.0, 1638.0, 4433.0, 20373.0, 4026224.0, 124270.0, 10675.0, 2803.0, 1171.0, 523.0, 244.0, 145.0, 93.0, 50.0, 29.0, 21.0, 16.0, 7.0, 6.0, 2.0, 2.0, 3.0, 0.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1640625, -14.7630615234375, -14.362060546875, -13.9610595703125, -13.56005859375, -13.1590576171875, -12.758056640625, -12.3570556640625, -11.9560546875, -11.5550537109375, -11.154052734375, -10.7530517578125, -10.35205078125, -9.9510498046875, -9.550048828125, -9.1490478515625, -8.748046875, -8.3470458984375, -7.946044921875, -7.5450439453125, -7.14404296875, -6.7430419921875, -6.342041015625, -5.9410400390625, -5.5400390625, -5.1390380859375, -4.738037109375, -4.3370361328125, -3.93603515625, -3.5350341796875, -3.134033203125, -2.7330322265625, -2.33203125, -1.9310302734375, -1.530029296875, -1.1290283203125, -0.72802734375, -0.3270263671875, 0.073974609375, 0.4749755859375, 0.8759765625, 1.2769775390625, 1.677978515625, 2.0789794921875, 2.47998046875, 2.8809814453125, 3.281982421875, 3.6829833984375, 4.083984375, 4.4849853515625, 4.885986328125, 5.2869873046875, 5.68798828125, 6.0889892578125, 6.489990234375, 6.8909912109375, 7.2919921875, 7.6929931640625, 8.093994140625, 8.4949951171875, 8.89599609375, 9.2969970703125, 9.697998046875, 10.0989990234375, 10.5]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 0.0, 2.0, 4.0, 0.0, 6.0, 6.0, 10.0, 12.0, 18.0, 14.0, 34.0, 49.0, 69.0, 96.0, 91.0, 118.0, 115.0, 83.0, 80.0, 54.0, 40.0, 33.0, 23.0, 20.0, 4.0, 7.0, 3.0, 5.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.70703125, -6.53717041015625, -6.3673095703125, -6.19744873046875, -6.027587890625, -5.85772705078125, -5.6878662109375, -5.51800537109375, -5.34814453125, -5.17828369140625, -5.0084228515625, -4.83856201171875, -4.668701171875, -4.49884033203125, -4.3289794921875, -4.15911865234375, -3.9892578125, -3.81939697265625, -3.6495361328125, -3.47967529296875, -3.309814453125, -3.13995361328125, -2.9700927734375, -2.80023193359375, -2.63037109375, -2.46051025390625, -2.2906494140625, -2.12078857421875, -1.950927734375, -1.78106689453125, -1.6112060546875, -1.44134521484375, -1.271484375, -1.10162353515625, -0.9317626953125, -0.76190185546875, -0.592041015625, -0.42218017578125, -0.2523193359375, -0.08245849609375, 0.08740234375, 0.25726318359375, 0.4271240234375, 0.59698486328125, 0.766845703125, 0.93670654296875, 1.1065673828125, 1.27642822265625, 1.4462890625, 1.61614990234375, 1.7860107421875, 1.95587158203125, 2.125732421875, 2.29559326171875, 2.4654541015625, 2.63531494140625, 2.80517578125, 2.97503662109375, 3.1448974609375, 3.31475830078125, 3.484619140625, 3.65447998046875, 3.8243408203125, 3.99420166015625, 4.1640625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 1.0, 4.0, 3.0, 8.0, 7.0, 12.0, 29.0, 37.0, 37.0, 69.0, 111.0, 170.0, 290.0, 448.0, 741.0, 1500.0, 3509.0, 11471.0, 97178.0, 4025778.0, 39849.0, 7533.0, 2635.0, 1222.0, 613.0, 387.0, 241.0, 143.0, 93.0, 56.0, 38.0, 22.0, 13.0, 13.0, 10.0, 8.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.4765625, -14.0289306640625, -13.581298828125, -13.1336669921875, -12.68603515625, -12.2384033203125, -11.790771484375, -11.3431396484375, -10.8955078125, -10.4478759765625, -10.000244140625, -9.5526123046875, -9.10498046875, -8.6573486328125, -8.209716796875, -7.7620849609375, -7.314453125, -6.8668212890625, -6.419189453125, -5.9715576171875, -5.52392578125, -5.0762939453125, -4.628662109375, -4.1810302734375, -3.7333984375, -3.2857666015625, -2.838134765625, -2.3905029296875, -1.94287109375, -1.4952392578125, -1.047607421875, -0.5999755859375, -0.15234375, 0.2952880859375, 0.742919921875, 1.1905517578125, 1.63818359375, 2.0858154296875, 2.533447265625, 2.9810791015625, 3.4287109375, 3.8763427734375, 4.323974609375, 4.7716064453125, 5.21923828125, 5.6668701171875, 6.114501953125, 6.5621337890625, 7.009765625, 7.4573974609375, 7.905029296875, 8.3526611328125, 8.80029296875, 9.2479248046875, 9.695556640625, 10.1431884765625, 10.5908203125, 11.0384521484375, 11.486083984375, 11.9337158203125, 12.38134765625, 12.8289794921875, 13.276611328125, 13.7242431640625, 14.171875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 4.0, 2.0, 6.0, 9.0, 6.0, 15.0, 25.0, 66.0, 182.0, 3385.0, 224.0, 71.0, 33.0, 19.0, 14.0, 6.0, 4.0, 1.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.06640625, -5.8543701171875, -5.642333984375, -5.4302978515625, -5.21826171875, -5.0062255859375, -4.794189453125, -4.5821533203125, -4.3701171875, -4.1580810546875, -3.946044921875, -3.7340087890625, -3.52197265625, -3.3099365234375, -3.097900390625, -2.8858642578125, -2.673828125, -2.4617919921875, -2.249755859375, -2.0377197265625, -1.82568359375, -1.6136474609375, -1.401611328125, -1.1895751953125, -0.9775390625, -0.7655029296875, -0.553466796875, -0.3414306640625, -0.12939453125, 0.0826416015625, 0.294677734375, 0.5067138671875, 0.71875, 0.9307861328125, 1.142822265625, 1.3548583984375, 1.56689453125, 1.7789306640625, 1.990966796875, 2.2030029296875, 2.4150390625, 2.6270751953125, 2.839111328125, 3.0511474609375, 3.26318359375, 3.4752197265625, 3.687255859375, 3.8992919921875, 4.111328125, 4.3233642578125, 4.535400390625, 4.7474365234375, 4.95947265625, 5.1715087890625, 5.383544921875, 5.5955810546875, 5.8076171875, 6.0196533203125, 6.231689453125, 6.4437255859375, 6.65576171875, 6.8677978515625, 7.079833984375, 7.2918701171875, 7.50390625]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 6.0, 1.0, 3.0, 2.0, 7.0, 10.0, 9.0, 10.0, 14.0, 28.0, 26.0, 48.0, 91.0, 83.0, 125.0, 143.0, 133.0, 90.0, 57.0, 54.0, 21.0, 17.0, 12.0, 7.0, 5.0, 5.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-25.181058883666992, -24.566375732421875, -23.95169448852539, -23.337011337280273, -22.72233009338379, -22.107646942138672, -21.492965698242188, -20.87828254699707, -20.263599395751953, -19.648916244506836, -19.03423500061035, -18.419551849365234, -17.80487060546875, -17.190187454223633, -16.575504302978516, -15.960823059082031, -15.34614086151123, -14.73145866394043, -14.116776466369629, -13.502094268798828, -12.887411117553711, -12.27272891998291, -11.65804672241211, -11.043363571166992, -10.428682327270508, -9.814000129699707, -9.199317932128906, -8.584634780883789, -7.969952583312988, -7.3552703857421875, -6.740588188171387, -6.125905513763428, -5.511222839355469, -4.896540641784668, -4.281857967376709, -3.667175769805908, -3.0524933338165283, -2.4378108978271484, -1.8231287002563477, -1.2084460258483887, -0.5937638282775879, 0.020918548107147217, 0.6356009244918823, 1.2502832412719727, 1.8649656772613525, 2.4796481132507324, 3.094330310821533, 3.709012985229492, 4.323695182800293, 4.938377380371094, 5.553060054779053, 6.1677422523498535, 6.7824249267578125, 7.397107124328613, 8.011789321899414, 8.626472473144531, 9.241153717041016, 9.855835914611816, 10.470518112182617, 11.085201263427734, 11.699883460998535, 12.314565658569336, 12.929247856140137, 13.543930053710938, 14.158613204956055]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 3.0, 2.0, 5.0, 6.0, 6.0, 6.0, 9.0, 12.0, 24.0, 23.0, 27.0, 26.0, 26.0, 30.0, 33.0, 32.0, 44.0, 43.0, 45.0, 36.0, 54.0, 36.0, 51.0, 48.0, 45.0, 48.0, 34.0, 36.0, 29.0, 34.0, 28.0, 25.0, 14.0, 14.0, 9.0, 13.0, 5.0, 12.0, 12.0, 5.0, 8.0, 2.0, 3.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-11.813680648803711, -11.383864402770996, -10.954048156738281, -10.524232864379883, -10.094416618347168, -9.664600372314453, -9.234784126281738, -8.804967880249023, -8.375152587890625, -7.94533634185791, -7.5155205726623535, -7.085704326629639, -6.655888557434082, -6.226072311401367, -5.796256065368652, -5.366440296173096, -4.936624050140381, -4.506807804107666, -4.076992034912109, -3.6471757888793945, -3.217360019683838, -2.787543773651123, -2.3577277660369873, -1.9279117584228516, -1.4980957508087158, -1.06827974319458, -0.6384636759757996, -0.20864760875701904, 0.2211683988571167, 0.650984525680542, 1.0808005332946777, 1.5106165409088135, 1.9404325485229492, 2.370248556137085, 2.8000645637512207, 3.2298808097839355, 3.659696578979492, 4.089512825012207, 4.519329071044922, 4.9491448402404785, 5.378960609436035, 5.80877685546875, 6.238592624664307, 6.6684088706970215, 7.098224639892578, 7.528040885925293, 7.957857131958008, 8.387672424316406, 8.817489624023438, 9.247305870056152, 9.677122116088867, 10.106937408447266, 10.53675365447998, 10.966569900512695, 11.39638614654541, 11.826202392578125, 12.256017684936523, 12.685833930969238, 13.115650177001953, 13.545465469360352, 13.975281715393066, 14.405097961425781, 14.834914207458496, 15.264730453491211, 15.69454574584961]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 7.0, 9.0, 15.0, 35.0, 60.0, 126.0, 280.0, 795.0, 2357.0, 10621.0, 85296.0, 697423.0, 224015.0, 21455.0, 4162.0, 1144.0, 424.0, 167.0, 68.0, 39.0, 22.0, 18.0, 5.0, 6.0, 5.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.9296875, -9.4920654296875, -9.054443359375, -8.6168212890625, -8.17919921875, -7.7415771484375, -7.303955078125, -6.8663330078125, -6.4287109375, -5.9910888671875, -5.553466796875, -5.1158447265625, -4.67822265625, -4.2406005859375, -3.802978515625, -3.3653564453125, -2.927734375, -2.4901123046875, -2.052490234375, -1.6148681640625, -1.17724609375, -0.7396240234375, -0.302001953125, 0.1356201171875, 0.5732421875, 1.0108642578125, 1.448486328125, 1.8861083984375, 2.32373046875, 2.7613525390625, 3.198974609375, 3.6365966796875, 4.07421875, 4.5118408203125, 4.949462890625, 5.3870849609375, 5.82470703125, 6.2623291015625, 6.699951171875, 7.1375732421875, 7.5751953125, 8.0128173828125, 8.450439453125, 8.8880615234375, 9.32568359375, 9.7633056640625, 10.200927734375, 10.6385498046875, 11.076171875, 11.5137939453125, 11.951416015625, 12.3890380859375, 12.82666015625, 13.2642822265625, 13.701904296875, 14.1395263671875, 14.5771484375, 15.0147705078125, 15.452392578125, 15.8900146484375, 16.32763671875, 16.7652587890625, 17.202880859375, 17.6405029296875, 18.078125]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 3.0, 5.0, 3.0, 4.0, 6.0, 7.0, 10.0, 17.0, 25.0, 25.0, 30.0, 41.0, 33.0, 56.0, 55.0, 65.0, 68.0, 75.0, 71.0, 64.0, 61.0, 56.0, 41.0, 43.0, 32.0, 21.0, 18.0, 15.0, 12.0, 5.0, 9.0, 5.0, 4.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-4.59765625, -4.47406005859375, -4.3504638671875, -4.22686767578125, -4.103271484375, -3.97967529296875, -3.8560791015625, -3.73248291015625, -3.60888671875, -3.48529052734375, -3.3616943359375, -3.23809814453125, -3.114501953125, -2.99090576171875, -2.8673095703125, -2.74371337890625, -2.6201171875, -2.49652099609375, -2.3729248046875, -2.24932861328125, -2.125732421875, -2.00213623046875, -1.8785400390625, -1.75494384765625, -1.63134765625, -1.50775146484375, -1.3841552734375, -1.26055908203125, -1.136962890625, -1.01336669921875, -0.8897705078125, -0.76617431640625, -0.642578125, -0.51898193359375, -0.3953857421875, -0.27178955078125, -0.148193359375, -0.02459716796875, 0.0989990234375, 0.22259521484375, 0.34619140625, 0.46978759765625, 0.5933837890625, 0.71697998046875, 0.840576171875, 0.96417236328125, 1.0877685546875, 1.21136474609375, 1.3349609375, 1.45855712890625, 1.5821533203125, 1.70574951171875, 1.829345703125, 1.95294189453125, 2.0765380859375, 2.20013427734375, 2.32373046875, 2.44732666015625, 2.5709228515625, 2.69451904296875, 2.818115234375, 2.94171142578125, 3.0653076171875, 3.18890380859375, 3.3125]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 9.0, 4.0, 13.0, 13.0, 21.0, 43.0, 46.0, 63.0, 129.0, 249.0, 510.0, 1196.0, 3268.0, 12793.0, 94489.0, 725717.0, 182705.0, 19970.0, 4441.0, 1548.0, 595.0, 312.0, 139.0, 83.0, 56.0, 43.0, 31.0, 10.0, 18.0, 9.0, 8.0, 5.0, 5.0, 0.0, 0.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-13.7109375, -13.30322265625, -12.8955078125, -12.48779296875, -12.080078125, -11.67236328125, -11.2646484375, -10.85693359375, -10.44921875, -10.04150390625, -9.6337890625, -9.22607421875, -8.818359375, -8.41064453125, -8.0029296875, -7.59521484375, -7.1875, -6.77978515625, -6.3720703125, -5.96435546875, -5.556640625, -5.14892578125, -4.7412109375, -4.33349609375, -3.92578125, -3.51806640625, -3.1103515625, -2.70263671875, -2.294921875, -1.88720703125, -1.4794921875, -1.07177734375, -0.6640625, -0.25634765625, 0.1513671875, 0.55908203125, 0.966796875, 1.37451171875, 1.7822265625, 2.18994140625, 2.59765625, 3.00537109375, 3.4130859375, 3.82080078125, 4.228515625, 4.63623046875, 5.0439453125, 5.45166015625, 5.859375, 6.26708984375, 6.6748046875, 7.08251953125, 7.490234375, 7.89794921875, 8.3056640625, 8.71337890625, 9.12109375, 9.52880859375, 9.9365234375, 10.34423828125, 10.751953125, 11.15966796875, 11.5673828125, 11.97509765625, 12.3828125]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 4.0, 4.0, 2.0, 9.0, 15.0, 8.0, 11.0, 13.0, 17.0, 23.0, 31.0, 22.0, 31.0, 40.0, 53.0, 35.0, 38.0, 49.0, 63.0, 54.0, 51.0, 58.0, 40.0, 37.0, 35.0, 46.0, 39.0, 31.0, 26.0, 21.0, 17.0, 17.0, 11.0, 8.0, 14.0, 9.0, 6.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0], "bins": [-16.15625, -15.7005615234375, -15.244873046875, -14.7891845703125, -14.33349609375, -13.8778076171875, -13.422119140625, -12.9664306640625, -12.5107421875, -12.0550537109375, -11.599365234375, -11.1436767578125, -10.68798828125, -10.2322998046875, -9.776611328125, -9.3209228515625, -8.865234375, -8.4095458984375, -7.953857421875, -7.4981689453125, -7.04248046875, -6.5867919921875, -6.131103515625, -5.6754150390625, -5.2197265625, -4.7640380859375, -4.308349609375, -3.8526611328125, -3.39697265625, -2.9412841796875, -2.485595703125, -2.0299072265625, -1.57421875, -1.1185302734375, -0.662841796875, -0.2071533203125, 0.24853515625, 0.7042236328125, 1.159912109375, 1.6156005859375, 2.0712890625, 2.5269775390625, 2.982666015625, 3.4383544921875, 3.89404296875, 4.3497314453125, 4.805419921875, 5.2611083984375, 5.716796875, 6.1724853515625, 6.628173828125, 7.0838623046875, 7.53955078125, 7.9952392578125, 8.450927734375, 8.9066162109375, 9.3623046875, 9.8179931640625, 10.273681640625, 10.7293701171875, 11.18505859375, 11.6407470703125, 12.096435546875, 12.5521240234375, 13.0078125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 3.0, 1.0, 2.0, 6.0, 1.0, 8.0, 12.0, 18.0, 31.0, 32.0, 58.0, 76.0, 150.0, 274.0, 558.0, 1529.0, 6622.0, 90667.0, 884506.0, 56434.0, 5099.0, 1308.0, 524.0, 257.0, 134.0, 79.0, 58.0, 38.0, 18.0, 16.0, 9.0, 11.0, 6.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.15234375, -5.95660400390625, -5.7608642578125, -5.56512451171875, -5.369384765625, -5.17364501953125, -4.9779052734375, -4.78216552734375, -4.58642578125, -4.39068603515625, -4.1949462890625, -3.99920654296875, -3.803466796875, -3.60772705078125, -3.4119873046875, -3.21624755859375, -3.0205078125, -2.82476806640625, -2.6290283203125, -2.43328857421875, -2.237548828125, -2.04180908203125, -1.8460693359375, -1.65032958984375, -1.45458984375, -1.25885009765625, -1.0631103515625, -0.86737060546875, -0.671630859375, -0.47589111328125, -0.2801513671875, -0.08441162109375, 0.111328125, 0.30706787109375, 0.5028076171875, 0.69854736328125, 0.894287109375, 1.09002685546875, 1.2857666015625, 1.48150634765625, 1.67724609375, 1.87298583984375, 2.0687255859375, 2.26446533203125, 2.460205078125, 2.65594482421875, 2.8516845703125, 3.04742431640625, 3.2431640625, 3.43890380859375, 3.6346435546875, 3.83038330078125, 4.026123046875, 4.22186279296875, 4.4176025390625, 4.61334228515625, 4.80908203125, 5.00482177734375, 5.2005615234375, 5.39630126953125, 5.592041015625, 5.78778076171875, 5.9835205078125, 6.17926025390625, 6.375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 9.0, 10.0, 12.0, 44.0, 58.0, 135.0, 188.0, 246.0, 138.0, 84.0, 40.0, 15.0, 14.0, 9.0, 5.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00211334228515625, -0.002069428563117981, -0.002025514841079712, -0.001981601119041443, -0.0019376873970031738, -0.0018937736749649048, -0.0018498599529266357, -0.0018059462308883667, -0.0017620325088500977, -0.0017181187868118286, -0.0016742050647735596, -0.0016302913427352905, -0.0015863776206970215, -0.0015424638986587524, -0.0014985501766204834, -0.0014546364545822144, -0.0014107227325439453, -0.0013668090105056763, -0.0013228952884674072, -0.0012789815664291382, -0.0012350678443908691, -0.0011911541223526, -0.001147240400314331, -0.001103326678276062, -0.001059412956237793, -0.001015499234199524, -0.0009715855121612549, -0.0009276717901229858, -0.0008837580680847168, -0.0008398443460464478, -0.0007959306240081787, -0.0007520169019699097, -0.0007081031799316406, -0.0006641894578933716, -0.0006202757358551025, -0.0005763620138168335, -0.0005324482917785645, -0.0004885345697402954, -0.00044462084770202637, -0.0004007071256637573, -0.0003567934036254883, -0.00031287968158721924, -0.0002689659595489502, -0.00022505223751068115, -0.0001811385154724121, -0.00013722479343414307, -9.331107139587402e-05, -4.939734935760498e-05, -5.4836273193359375e-06, 3.8430094718933105e-05, 8.234381675720215e-05, 0.0001262575387954712, 0.00017017126083374023, 0.00021408498287200928, 0.0002579987049102783, 0.00030191242694854736, 0.0003458261489868164, 0.00038973987102508545, 0.0004336535930633545, 0.00047756731510162354, 0.0005214810371398926, 0.0005653947591781616, 0.0006093084812164307, 0.0006532222032546997, 0.0006971359252929688]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 8.0, 6.0, 11.0, 18.0, 26.0, 48.0, 95.0, 138.0, 269.0, 598.0, 1578.0, 6099.0, 56740.0, 853405.0, 117199.0, 8854.0, 1997.0, 724.0, 330.0, 167.0, 94.0, 49.0, 30.0, 22.0, 17.0, 12.0, 7.0, 2.0, 6.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.0859375, -5.91009521484375, -5.7342529296875, -5.55841064453125, -5.382568359375, -5.20672607421875, -5.0308837890625, -4.85504150390625, -4.67919921875, -4.50335693359375, -4.3275146484375, -4.15167236328125, -3.975830078125, -3.79998779296875, -3.6241455078125, -3.44830322265625, -3.2724609375, -3.09661865234375, -2.9207763671875, -2.74493408203125, -2.569091796875, -2.39324951171875, -2.2174072265625, -2.04156494140625, -1.86572265625, -1.68988037109375, -1.5140380859375, -1.33819580078125, -1.162353515625, -0.98651123046875, -0.8106689453125, -0.63482666015625, -0.458984375, -0.28314208984375, -0.1072998046875, 0.06854248046875, 0.244384765625, 0.42022705078125, 0.5960693359375, 0.77191162109375, 0.94775390625, 1.12359619140625, 1.2994384765625, 1.47528076171875, 1.651123046875, 1.82696533203125, 2.0028076171875, 2.17864990234375, 2.3544921875, 2.53033447265625, 2.7061767578125, 2.88201904296875, 3.057861328125, 3.23370361328125, 3.4095458984375, 3.58538818359375, 3.76123046875, 3.93707275390625, 4.1129150390625, 4.28875732421875, 4.464599609375, 4.64044189453125, 4.8162841796875, 4.99212646484375, 5.16796875]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 4.0, 4.0, 8.0, 7.0, 11.0, 12.0, 8.0, 23.0, 35.0, 43.0, 77.0, 71.0, 111.0, 107.0, 118.0, 103.0, 79.0, 60.0, 32.0, 30.0, 30.0, 13.0, 10.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.70703125, -4.56365966796875, -4.4202880859375, -4.27691650390625, -4.133544921875, -3.99017333984375, -3.8468017578125, -3.70343017578125, -3.56005859375, -3.41668701171875, -3.2733154296875, -3.12994384765625, -2.986572265625, -2.84320068359375, -2.6998291015625, -2.55645751953125, -2.4130859375, -2.26971435546875, -2.1263427734375, -1.98297119140625, -1.839599609375, -1.69622802734375, -1.5528564453125, -1.40948486328125, -1.26611328125, -1.12274169921875, -0.9793701171875, -0.83599853515625, -0.692626953125, -0.54925537109375, -0.4058837890625, -0.26251220703125, -0.119140625, 0.02423095703125, 0.1676025390625, 0.31097412109375, 0.454345703125, 0.59771728515625, 0.7410888671875, 0.88446044921875, 1.02783203125, 1.17120361328125, 1.3145751953125, 1.45794677734375, 1.601318359375, 1.74468994140625, 1.8880615234375, 2.03143310546875, 2.1748046875, 2.31817626953125, 2.4615478515625, 2.60491943359375, 2.748291015625, 2.89166259765625, 3.0350341796875, 3.17840576171875, 3.32177734375, 3.46514892578125, 3.6085205078125, 3.75189208984375, 3.895263671875, 4.03863525390625, 4.1820068359375, 4.32537841796875, 4.46875]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 9.0, 15.0, 46.0, 87.0, 173.0, 270.0, 206.0, 112.0, 52.0, 26.0, 9.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-178.2339324951172, -174.05209350585938, -169.8702392578125, -165.6884002685547, -161.50656127929688, -157.32470703125, -153.1428680419922, -148.9610137939453, -144.7791748046875, -140.5973358154297, -136.4154815673828, -132.233642578125, -128.05178833007812, -123.86994934082031, -119.6881103515625, -115.50626373291016, -111.32441711425781, -107.14257049560547, -102.96072387695312, -98.77888488769531, -94.59703826904297, -90.41519165039062, -86.23335266113281, -82.05150604248047, -77.86965942382812, -73.68781280517578, -69.50596618652344, -65.32412719726562, -61.14228057861328, -56.96043395996094, -52.77859115600586, -48.59674835205078, -44.4149169921875, -40.233070373535156, -36.05122756958008, -31.869382858276367, -27.687538146972656, -23.505693435668945, -19.323848724365234, -15.142004013061523, -10.960159301757812, -6.778314590454102, -2.5964698791503906, 1.5853748321533203, 5.767219543457031, 9.949064254760742, 14.130908966064453, 18.312753677368164, 22.494598388671875, 26.676443099975586, 30.858287811279297, 35.040130615234375, 39.22197723388672, 43.40382385253906, 47.58566665649414, 51.76750946044922, 55.94935607910156, 60.131202697753906, 64.31304931640625, 68.49488830566406, 72.6767349243164, 76.85858154296875, 81.04042053222656, 85.2222671508789, 89.40411376953125]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 2.0, 1.0, 4.0, 8.0, 5.0, 14.0, 9.0, 10.0, 18.0, 17.0, 21.0, 28.0, 24.0, 38.0, 36.0, 47.0, 44.0, 37.0, 48.0, 57.0, 48.0, 69.0, 58.0, 37.0, 54.0, 50.0, 38.0, 31.0, 29.0, 29.0, 17.0, 16.0, 17.0, 14.0, 9.0, 7.0, 3.0, 7.0, 2.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-50.782127380371094, -48.71908950805664, -46.65605163574219, -44.593013763427734, -42.52997589111328, -40.46693801879883, -38.403900146484375, -36.34086227416992, -34.27782440185547, -32.214786529541016, -30.151748657226562, -28.08871078491211, -26.025672912597656, -23.962635040283203, -21.89959716796875, -19.836559295654297, -17.773521423339844, -15.71048355102539, -13.647445678710938, -11.584407806396484, -9.521369934082031, -7.458332061767578, -5.395294189453125, -3.332256317138672, -1.2692184448242188, 0.7938194274902344, 2.8568572998046875, 4.919895172119141, 6.982933044433594, 9.045970916748047, 11.1090087890625, 13.172046661376953, 15.235076904296875, 17.298114776611328, 19.36115264892578, 21.424190521240234, 23.487228393554688, 25.55026626586914, 27.613304138183594, 29.676342010498047, 31.7393798828125, 33.80241775512695, 35.865455627441406, 37.92849349975586, 39.99153137207031, 42.054569244384766, 44.11760711669922, 46.18064498901367, 48.243682861328125, 50.30672073364258, 52.36975860595703, 54.432796478271484, 56.49583435058594, 58.55887222290039, 60.621910095214844, 62.6849479675293, 64.74798583984375, 66.81101989746094, 68.87406158447266, 70.93710327148438, 73.00013732910156, 75.06317138671875, 77.12621307373047, 79.18925476074219, 81.25228881835938]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 4.0, 3.0, 3.0, 3.0, 4.0, 9.0, 12.0, 16.0, 22.0, 32.0, 67.0, 86.0, 195.0, 487.0, 1349.0, 4831.0, 28553.0, 1505011.0, 2610299.0, 35585.0, 5659.0, 1342.0, 415.0, 182.0, 56.0, 32.0, 14.0, 9.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.890625, -15.53778076171875, -15.1849365234375, -14.83209228515625, -14.479248046875, -14.12640380859375, -13.7735595703125, -13.42071533203125, -13.06787109375, -12.71502685546875, -12.3621826171875, -12.00933837890625, -11.656494140625, -11.30364990234375, -10.9508056640625, -10.59796142578125, -10.2451171875, -9.89227294921875, -9.5394287109375, -9.18658447265625, -8.833740234375, -8.48089599609375, -8.1280517578125, -7.77520751953125, -7.42236328125, -7.06951904296875, -6.7166748046875, -6.36383056640625, -6.010986328125, -5.65814208984375, -5.3052978515625, -4.95245361328125, -4.599609375, -4.24676513671875, -3.8939208984375, -3.54107666015625, -3.188232421875, -2.83538818359375, -2.4825439453125, -2.12969970703125, -1.77685546875, -1.42401123046875, -1.0711669921875, -0.71832275390625, -0.365478515625, -0.01263427734375, 0.3402099609375, 0.69305419921875, 1.0458984375, 1.39874267578125, 1.7515869140625, 2.10443115234375, 2.457275390625, 2.81011962890625, 3.1629638671875, 3.51580810546875, 3.86865234375, 4.22149658203125, 4.5743408203125, 4.92718505859375, 5.280029296875, 5.63287353515625, 5.9857177734375, 6.33856201171875, 6.69140625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 8.0, 9.0, 19.0, 19.0, 21.0, 41.0, 61.0, 72.0, 77.0, 90.0, 93.0, 94.0, 93.0, 88.0, 53.0, 60.0, 37.0, 30.0, 18.0, 8.0, 7.0, 5.0, 4.0, 2.0, 2.0, 1.0, 2.0], "bins": [-7.453125, -7.2972412109375, -7.141357421875, -6.9854736328125, -6.82958984375, -6.6737060546875, -6.517822265625, -6.3619384765625, -6.2060546875, -6.0501708984375, -5.894287109375, -5.7384033203125, -5.58251953125, -5.4266357421875, -5.270751953125, -5.1148681640625, -4.958984375, -4.8031005859375, -4.647216796875, -4.4913330078125, -4.33544921875, -4.1795654296875, -4.023681640625, -3.8677978515625, -3.7119140625, -3.5560302734375, -3.400146484375, -3.2442626953125, -3.08837890625, -2.9324951171875, -2.776611328125, -2.6207275390625, -2.46484375, -2.3089599609375, -2.153076171875, -1.9971923828125, -1.84130859375, -1.6854248046875, -1.529541015625, -1.3736572265625, -1.2177734375, -1.0618896484375, -0.906005859375, -0.7501220703125, -0.59423828125, -0.4383544921875, -0.282470703125, -0.1265869140625, 0.029296875, 0.1851806640625, 0.341064453125, 0.4969482421875, 0.65283203125, 0.8087158203125, 0.964599609375, 1.1204833984375, 1.2763671875, 1.4322509765625, 1.588134765625, 1.7440185546875, 1.89990234375, 2.0557861328125, 2.211669921875, 2.3675537109375, 2.5234375]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 4.0, 3.0, 6.0, 4.0, 13.0, 19.0, 33.0, 36.0, 108.0, 192.0, 477.0, 1475.0, 4973.0, 23655.0, 279534.0, 3764339.0, 101253.0, 13559.0, 3043.0, 903.0, 313.0, 155.0, 86.0, 42.0, 26.0, 9.0, 9.0, 8.0, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-13.1953125, -12.8321533203125, -12.468994140625, -12.1058349609375, -11.74267578125, -11.3795166015625, -11.016357421875, -10.6531982421875, -10.2900390625, -9.9268798828125, -9.563720703125, -9.2005615234375, -8.83740234375, -8.4742431640625, -8.111083984375, -7.7479248046875, -7.384765625, -7.0216064453125, -6.658447265625, -6.2952880859375, -5.93212890625, -5.5689697265625, -5.205810546875, -4.8426513671875, -4.4794921875, -4.1163330078125, -3.753173828125, -3.3900146484375, -3.02685546875, -2.6636962890625, -2.300537109375, -1.9373779296875, -1.57421875, -1.2110595703125, -0.847900390625, -0.4847412109375, -0.12158203125, 0.2415771484375, 0.604736328125, 0.9678955078125, 1.3310546875, 1.6942138671875, 2.057373046875, 2.4205322265625, 2.78369140625, 3.1468505859375, 3.510009765625, 3.8731689453125, 4.236328125, 4.5994873046875, 4.962646484375, 5.3258056640625, 5.68896484375, 6.0521240234375, 6.415283203125, 6.7784423828125, 7.1416015625, 7.5047607421875, 7.867919921875, 8.2310791015625, 8.59423828125, 8.9573974609375, 9.320556640625, 9.6837158203125, 10.046875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 5.0, 7.0, 8.0, 2.0, 10.0, 9.0, 13.0, 14.0, 24.0, 37.0, 42.0, 67.0, 111.0, 171.0, 350.0, 844.0, 1146.0, 521.0, 259.0, 135.0, 79.0, 44.0, 45.0, 35.0, 18.0, 20.0, 12.0, 15.0, 10.0, 4.0, 3.0, 1.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.5546875, -7.30517578125, -7.0556640625, -6.80615234375, -6.556640625, -6.30712890625, -6.0576171875, -5.80810546875, -5.55859375, -5.30908203125, -5.0595703125, -4.81005859375, -4.560546875, -4.31103515625, -4.0615234375, -3.81201171875, -3.5625, -3.31298828125, -3.0634765625, -2.81396484375, -2.564453125, -2.31494140625, -2.0654296875, -1.81591796875, -1.56640625, -1.31689453125, -1.0673828125, -0.81787109375, -0.568359375, -0.31884765625, -0.0693359375, 0.18017578125, 0.4296875, 0.67919921875, 0.9287109375, 1.17822265625, 1.427734375, 1.67724609375, 1.9267578125, 2.17626953125, 2.42578125, 2.67529296875, 2.9248046875, 3.17431640625, 3.423828125, 3.67333984375, 3.9228515625, 4.17236328125, 4.421875, 4.67138671875, 4.9208984375, 5.17041015625, 5.419921875, 5.66943359375, 5.9189453125, 6.16845703125, 6.41796875, 6.66748046875, 6.9169921875, 7.16650390625, 7.416015625, 7.66552734375, 7.9150390625, 8.16455078125, 8.4140625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 4.0, 17.0, 10.0, 27.0, 62.0, 148.0, 213.0, 211.0, 165.0, 64.0, 32.0, 26.0, 7.0, 4.0, 2.0, 7.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.34654235839844, -73.52115631103516, -70.69577026367188, -67.87037658691406, -65.04499053955078, -62.2196044921875, -59.39421844482422, -56.56883239746094, -53.74344253540039, -50.91805648803711, -48.09266662597656, -45.26728057861328, -42.44189453125, -39.61650466918945, -36.79111862182617, -33.965728759765625, -31.140342712402344, -28.31495475769043, -25.489566802978516, -22.664180755615234, -19.83879280090332, -17.013404846191406, -14.188018798828125, -11.362630844116211, -8.537242889404297, -5.711855411529541, -2.886467933654785, -0.0610809326171875, 2.7643070220947266, 5.589694976806641, 8.415081024169922, 11.240468978881836, 14.065864562988281, 16.891252517700195, 19.71664047241211, 22.54202651977539, 25.367414474487305, 28.19280242919922, 31.0181884765625, 33.84357452392578, 36.66896438598633, 39.49435043334961, 42.319740295410156, 45.14512634277344, 47.97051239013672, 50.795902252197266, 53.62128829956055, 56.446678161621094, 59.272064208984375, 62.097450256347656, 64.92283630371094, 67.74822998046875, 70.57361602783203, 73.39900207519531, 76.2243881225586, 79.04977416992188, 81.87516784667969, 84.70055389404297, 87.52593994140625, 90.35133361816406, 93.17671966552734, 96.00210571289062, 98.8274917602539, 101.65287780761719, 104.47826385498047]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 2.0, 4.0, 8.0, 6.0, 13.0, 18.0, 12.0, 13.0, 16.0, 16.0, 24.0, 31.0, 30.0, 44.0, 37.0, 46.0, 46.0, 50.0, 44.0, 54.0, 49.0, 39.0, 36.0, 48.0, 32.0, 41.0, 38.0, 33.0, 26.0, 23.0, 27.0, 18.0, 22.0, 9.0, 10.0, 6.0, 9.0, 5.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.3267936706543, -31.235261917114258, -30.14373016357422, -29.052196502685547, -27.960664749145508, -26.86913299560547, -25.777599334716797, -24.686067581176758, -23.59453582763672, -22.50300407409668, -21.41147232055664, -20.31993865966797, -19.22840690612793, -18.13687515258789, -17.04534149169922, -15.95380973815918, -14.86227798461914, -13.770746231079102, -12.679213523864746, -11.58768081665039, -10.496149063110352, -9.404617309570312, -8.313084602355957, -7.22155237197876, -6.1300201416015625, -5.038487911224365, -3.946955680847168, -2.8554234504699707, -1.7638912200927734, -0.6723589897155762, 0.4191732406616211, 1.5107054710388184, 2.6022377014160156, 3.693769931793213, 4.78530216217041, 5.876834392547607, 6.968366622924805, 8.059898376464844, 9.1514310836792, 10.242963790893555, 11.334495544433594, 12.426027297973633, 13.517560005187988, 14.609092712402344, 15.700624465942383, 16.792156219482422, 17.883689880371094, 18.975221633911133, 20.066753387451172, 21.15828514099121, 22.24981689453125, 23.341350555419922, 24.43288230895996, 25.5244140625, 26.615947723388672, 27.70747947692871, 28.79901123046875, 29.89054298400879, 30.982074737548828, 32.0736083984375, 33.165138244628906, 34.25667190551758, 35.34820556640625, 36.439735412597656, 37.53126907348633]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 4.0, 7.0, 4.0, 8.0, 18.0, 24.0, 33.0, 34.0, 47.0, 110.0, 179.0, 287.0, 457.0, 837.0, 1569.0, 2975.0, 5919.0, 12229.0, 28188.0, 69237.0, 173111.0, 350700.0, 237048.0, 94605.0, 38703.0, 16588.0, 7503.0, 3719.0, 1967.0, 1004.0, 586.0, 318.0, 190.0, 133.0, 73.0, 42.0, 31.0, 29.0, 14.0, 11.0, 9.0, 4.0, 2.0, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.88671875, -4.74578857421875, -4.6048583984375, -4.46392822265625, -4.322998046875, -4.18206787109375, -4.0411376953125, -3.90020751953125, -3.75927734375, -3.61834716796875, -3.4774169921875, -3.33648681640625, -3.195556640625, -3.05462646484375, -2.9136962890625, -2.77276611328125, -2.6318359375, -2.49090576171875, -2.3499755859375, -2.20904541015625, -2.068115234375, -1.92718505859375, -1.7862548828125, -1.64532470703125, -1.50439453125, -1.36346435546875, -1.2225341796875, -1.08160400390625, -0.940673828125, -0.79974365234375, -0.6588134765625, -0.51788330078125, -0.376953125, -0.23602294921875, -0.0950927734375, 0.04583740234375, 0.186767578125, 0.32769775390625, 0.4686279296875, 0.60955810546875, 0.75048828125, 0.89141845703125, 1.0323486328125, 1.17327880859375, 1.314208984375, 1.45513916015625, 1.5960693359375, 1.73699951171875, 1.8779296875, 2.01885986328125, 2.1597900390625, 2.30072021484375, 2.441650390625, 2.58258056640625, 2.7235107421875, 2.86444091796875, 3.00537109375, 3.14630126953125, 3.2872314453125, 3.42816162109375, 3.569091796875, 3.71002197265625, 3.8509521484375, 3.99188232421875, 4.1328125]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 2.0, 8.0, 3.0, 8.0, 13.0, 22.0, 17.0, 31.0, 27.0, 31.0, 31.0, 44.0, 44.0, 58.0, 56.0, 73.0, 62.0, 59.0, 60.0, 55.0, 45.0, 46.0, 47.0, 34.0, 34.0, 13.0, 27.0, 14.0, 13.0, 7.0, 7.0, 7.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.603515625, -3.49896240234375, -3.3944091796875, -3.28985595703125, -3.185302734375, -3.08074951171875, -2.9761962890625, -2.87164306640625, -2.76708984375, -2.66253662109375, -2.5579833984375, -2.45343017578125, -2.348876953125, -2.24432373046875, -2.1397705078125, -2.03521728515625, -1.9306640625, -1.82611083984375, -1.7215576171875, -1.61700439453125, -1.512451171875, -1.40789794921875, -1.3033447265625, -1.19879150390625, -1.09423828125, -0.98968505859375, -0.8851318359375, -0.78057861328125, -0.676025390625, -0.57147216796875, -0.4669189453125, -0.36236572265625, -0.2578125, -0.15325927734375, -0.0487060546875, 0.05584716796875, 0.160400390625, 0.26495361328125, 0.3695068359375, 0.47406005859375, 0.57861328125, 0.68316650390625, 0.7877197265625, 0.89227294921875, 0.996826171875, 1.10137939453125, 1.2059326171875, 1.31048583984375, 1.4150390625, 1.51959228515625, 1.6241455078125, 1.72869873046875, 1.833251953125, 1.93780517578125, 2.0423583984375, 2.14691162109375, 2.25146484375, 2.35601806640625, 2.4605712890625, 2.56512451171875, 2.669677734375, 2.77423095703125, 2.8787841796875, 2.98333740234375, 3.087890625]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 8.0, 2.0, 7.0, 8.0, 8.0, 16.0, 22.0, 31.0, 46.0, 76.0, 106.0, 170.0, 278.0, 408.0, 696.0, 1196.0, 2337.0, 5742.0, 20876.0, 117176.0, 654661.0, 199181.0, 31414.0, 7778.0, 2837.0, 1417.0, 849.0, 430.0, 295.0, 176.0, 95.0, 84.0, 52.0, 22.0, 18.0, 10.0, 10.0, 7.0, 7.0, 5.0, 3.0, 3.0, 1.0, 2.0], "bins": [-11.34375, -11.0645751953125, -10.785400390625, -10.5062255859375, -10.22705078125, -9.9478759765625, -9.668701171875, -9.3895263671875, -9.1103515625, -8.8311767578125, -8.552001953125, -8.2728271484375, -7.99365234375, -7.7144775390625, -7.435302734375, -7.1561279296875, -6.876953125, -6.5977783203125, -6.318603515625, -6.0394287109375, -5.76025390625, -5.4810791015625, -5.201904296875, -4.9227294921875, -4.6435546875, -4.3643798828125, -4.085205078125, -3.8060302734375, -3.52685546875, -3.2476806640625, -2.968505859375, -2.6893310546875, -2.41015625, -2.1309814453125, -1.851806640625, -1.5726318359375, -1.29345703125, -1.0142822265625, -0.735107421875, -0.4559326171875, -0.1767578125, 0.1024169921875, 0.381591796875, 0.6607666015625, 0.93994140625, 1.2191162109375, 1.498291015625, 1.7774658203125, 2.056640625, 2.3358154296875, 2.614990234375, 2.8941650390625, 3.17333984375, 3.4525146484375, 3.731689453125, 4.0108642578125, 4.2900390625, 4.5692138671875, 4.848388671875, 5.1275634765625, 5.40673828125, 5.6859130859375, 5.965087890625, 6.2442626953125, 6.5234375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 5.0, 2.0, 13.0, 8.0, 10.0, 13.0, 16.0, 22.0, 22.0, 18.0, 26.0, 31.0, 33.0, 46.0, 44.0, 48.0, 44.0, 43.0, 46.0, 61.0, 48.0, 50.0, 41.0, 54.0, 24.0, 31.0, 29.0, 29.0, 31.0, 15.0, 14.0, 14.0, 14.0, 16.0, 6.0, 13.0, 5.0, 1.0, 6.0, 4.0, 1.0, 5.0, 0.0, 5.0, 2.0], "bins": [-15.7265625, -15.3177490234375, -14.908935546875, -14.5001220703125, -14.09130859375, -13.6824951171875, -13.273681640625, -12.8648681640625, -12.4560546875, -12.0472412109375, -11.638427734375, -11.2296142578125, -10.82080078125, -10.4119873046875, -10.003173828125, -9.5943603515625, -9.185546875, -8.7767333984375, -8.367919921875, -7.9591064453125, -7.55029296875, -7.1414794921875, -6.732666015625, -6.3238525390625, -5.9150390625, -5.5062255859375, -5.097412109375, -4.6885986328125, -4.27978515625, -3.8709716796875, -3.462158203125, -3.0533447265625, -2.64453125, -2.2357177734375, -1.826904296875, -1.4180908203125, -1.00927734375, -0.6004638671875, -0.191650390625, 0.2171630859375, 0.6259765625, 1.0347900390625, 1.443603515625, 1.8524169921875, 2.26123046875, 2.6700439453125, 3.078857421875, 3.4876708984375, 3.896484375, 4.3052978515625, 4.714111328125, 5.1229248046875, 5.53173828125, 5.9405517578125, 6.349365234375, 6.7581787109375, 7.1669921875, 7.5758056640625, 7.984619140625, 8.3934326171875, 8.80224609375, 9.2110595703125, 9.619873046875, 10.0286865234375, 10.4375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 6.0, 7.0, 12.0, 11.0, 21.0, 44.0, 49.0, 112.0, 255.0, 590.0, 2068.0, 12500.0, 254487.0, 743270.0, 29970.0, 3609.0, 897.0, 313.0, 134.0, 77.0, 46.0, 28.0, 12.0, 17.0, 6.0, 8.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.91015625, -4.7474365234375, -4.584716796875, -4.4219970703125, -4.25927734375, -4.0965576171875, -3.933837890625, -3.7711181640625, -3.6083984375, -3.4456787109375, -3.282958984375, -3.1202392578125, -2.95751953125, -2.7947998046875, -2.632080078125, -2.4693603515625, -2.306640625, -2.1439208984375, -1.981201171875, -1.8184814453125, -1.65576171875, -1.4930419921875, -1.330322265625, -1.1676025390625, -1.0048828125, -0.8421630859375, -0.679443359375, -0.5167236328125, -0.35400390625, -0.1912841796875, -0.028564453125, 0.1341552734375, 0.296875, 0.4595947265625, 0.622314453125, 0.7850341796875, 0.94775390625, 1.1104736328125, 1.273193359375, 1.4359130859375, 1.5986328125, 1.7613525390625, 1.924072265625, 2.0867919921875, 2.24951171875, 2.4122314453125, 2.574951171875, 2.7376708984375, 2.900390625, 3.0631103515625, 3.225830078125, 3.3885498046875, 3.55126953125, 3.7139892578125, 3.876708984375, 4.0394287109375, 4.2021484375, 4.3648681640625, 4.527587890625, 4.6903076171875, 4.85302734375, 5.0157470703125, 5.178466796875, 5.3411865234375, 5.50390625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 5.0, 5.0, 9.0, 10.0, 9.0, 16.0, 11.0, 22.0, 30.0, 36.0, 47.0, 47.0, 68.0, 79.0, 84.0, 82.0, 89.0, 65.0, 57.0, 31.0, 42.0, 41.0, 22.0, 22.0, 15.0, 12.0, 13.0, 9.0, 5.0, 4.0, 4.0, 1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004367828369140625, -0.0004205256700515747, -0.0004042685031890869, -0.0003880113363265991, -0.00037175416946411133, -0.00035549700260162354, -0.00033923983573913574, -0.00032298266887664795, -0.00030672550201416016, -0.00029046833515167236, -0.00027421116828918457, -0.0002579540014266968, -0.00024169683456420898, -0.0002254396677017212, -0.0002091825008392334, -0.0001929253339767456, -0.0001766681671142578, -0.00016041100025177002, -0.00014415383338928223, -0.00012789666652679443, -0.00011163949966430664, -9.538233280181885e-05, -7.912516593933105e-05, -6.286799907684326e-05, -4.661083221435547e-05, -3.0353665351867676e-05, -1.4096498489379883e-05, 2.16066837310791e-06, 1.8417835235595703e-05, 3.4675002098083496e-05, 5.093216896057129e-05, 6.718933582305908e-05, 8.344650268554688e-05, 9.970366954803467e-05, 0.00011596083641052246, 0.00013221800327301025, 0.00014847517013549805, 0.00016473233699798584, 0.00018098950386047363, 0.00019724667072296143, 0.00021350383758544922, 0.000229761004447937, 0.0002460181713104248, 0.0002622753381729126, 0.0002785325050354004, 0.0002947896718978882, 0.000311046838760376, 0.00032730400562286377, 0.00034356117248535156, 0.00035981833934783936, 0.00037607550621032715, 0.00039233267307281494, 0.00040858983993530273, 0.00042484700679779053, 0.0004411041736602783, 0.0004573613405227661, 0.0004736185073852539, 0.0004898756742477417, 0.0005061328411102295, 0.0005223900079727173, 0.0005386471748352051, 0.0005549043416976929, 0.0005711615085601807, 0.0005874186754226685, 0.0006036758422851562]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 6.0, 4.0, 8.0, 7.0, 21.0, 48.0, 64.0, 155.0, 323.0, 879.0, 2846.0, 15471.0, 284235.0, 704982.0, 32954.0, 4526.0, 1223.0, 425.0, 165.0, 95.0, 50.0, 19.0, 20.0, 14.0, 9.0, 3.0, 4.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.30078125, -5.1541748046875, -5.007568359375, -4.8609619140625, -4.71435546875, -4.5677490234375, -4.421142578125, -4.2745361328125, -4.1279296875, -3.9813232421875, -3.834716796875, -3.6881103515625, -3.54150390625, -3.3948974609375, -3.248291015625, -3.1016845703125, -2.955078125, -2.8084716796875, -2.661865234375, -2.5152587890625, -2.36865234375, -2.2220458984375, -2.075439453125, -1.9288330078125, -1.7822265625, -1.6356201171875, -1.489013671875, -1.3424072265625, -1.19580078125, -1.0491943359375, -0.902587890625, -0.7559814453125, -0.609375, -0.4627685546875, -0.316162109375, -0.1695556640625, -0.02294921875, 0.1236572265625, 0.270263671875, 0.4168701171875, 0.5634765625, 0.7100830078125, 0.856689453125, 1.0032958984375, 1.14990234375, 1.2965087890625, 1.443115234375, 1.5897216796875, 1.736328125, 1.8829345703125, 2.029541015625, 2.1761474609375, 2.32275390625, 2.4693603515625, 2.615966796875, 2.7625732421875, 2.9091796875, 3.0557861328125, 3.202392578125, 3.3489990234375, 3.49560546875, 3.6422119140625, 3.788818359375, 3.9354248046875, 4.08203125]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 2.0, 6.0, 6.0, 10.0, 6.0, 15.0, 21.0, 29.0, 28.0, 49.0, 55.0, 63.0, 59.0, 72.0, 84.0, 77.0, 78.0, 69.0, 60.0, 40.0, 43.0, 34.0, 33.0, 23.0, 9.0, 9.0, 5.0, 10.0, 8.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.888671875, -2.795196533203125, -2.70172119140625, -2.608245849609375, -2.5147705078125, -2.421295166015625, -2.32781982421875, -2.234344482421875, -2.140869140625, -2.047393798828125, -1.95391845703125, -1.860443115234375, -1.7669677734375, -1.673492431640625, -1.58001708984375, -1.486541748046875, -1.39306640625, -1.299591064453125, -1.20611572265625, -1.112640380859375, -1.0191650390625, -0.925689697265625, -0.83221435546875, -0.738739013671875, -0.645263671875, -0.551788330078125, -0.45831298828125, -0.364837646484375, -0.2713623046875, -0.177886962890625, -0.08441162109375, 0.009063720703125, 0.1025390625, 0.196014404296875, 0.28948974609375, 0.382965087890625, 0.4764404296875, 0.569915771484375, 0.66339111328125, 0.756866455078125, 0.850341796875, 0.943817138671875, 1.03729248046875, 1.130767822265625, 1.2242431640625, 1.317718505859375, 1.41119384765625, 1.504669189453125, 1.59814453125, 1.691619873046875, 1.78509521484375, 1.878570556640625, 1.9720458984375, 2.065521240234375, 2.15899658203125, 2.252471923828125, 2.345947265625, 2.439422607421875, 2.53289794921875, 2.626373291015625, 2.7198486328125, 2.813323974609375, 2.90679931640625, 3.000274658203125, 3.09375]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 6.0, 7.0, 7.0, 12.0, 22.0, 47.0, 84.0, 135.0, 199.0, 192.0, 141.0, 76.0, 35.0, 18.0, 4.0, 7.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-61.604705810546875, -59.14763641357422, -56.69056701660156, -54.233497619628906, -51.77642822265625, -49.319358825683594, -46.8622932434082, -44.40522384643555, -41.94815444946289, -39.491085052490234, -37.03401565551758, -34.57694625854492, -32.11988067626953, -29.662809371948242, -27.20574188232422, -24.748672485351562, -22.291603088378906, -19.83453369140625, -17.377464294433594, -14.92039680480957, -12.463327407836914, -10.006258010864258, -7.549189567565918, -5.092121124267578, -2.635051727294922, -0.17798280715942383, 2.279086112976074, 4.736155033111572, 7.19322395324707, 9.650293350219727, 12.107361793518066, 14.564430236816406, 17.02149200439453, 19.478561401367188, 21.935630798339844, 24.392698287963867, 26.849767684936523, 29.30683708190918, 31.763904571533203, 34.22097396850586, 36.678043365478516, 39.13511276245117, 41.59218215942383, 44.049251556396484, 46.506317138671875, 48.96338653564453, 51.42045593261719, 53.877525329589844, 56.3345947265625, 58.791664123535156, 61.24873352050781, 63.70580291748047, 66.16287231445312, 68.61994171142578, 71.07701110839844, 73.53407287597656, 75.99114990234375, 78.4482192993164, 80.90528869628906, 83.36235809326172, 85.81942749023438, 88.27649688720703, 90.73356628417969, 93.19062805175781, 95.64769744873047]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 5.0, 8.0, 9.0, 12.0, 17.0, 15.0, 19.0, 21.0, 25.0, 26.0, 23.0, 32.0, 26.0, 30.0, 33.0, 39.0, 46.0, 52.0, 45.0, 55.0, 66.0, 42.0, 41.0, 39.0, 32.0, 42.0, 23.0, 30.0, 24.0, 8.0, 20.0, 17.0, 14.0, 11.0, 10.0, 10.0, 11.0, 6.0, 4.0, 2.0, 2.0, 1.0, 3.0, 5.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-46.37994384765625, -44.93372344970703, -43.48750686645508, -42.04128646850586, -40.595069885253906, -39.14884948730469, -37.70262908935547, -36.256412506103516, -34.8101921081543, -33.36397171020508, -31.917755126953125, -30.471534729003906, -29.02531623840332, -27.579097747802734, -26.13287925720215, -24.686660766601562, -23.240442276000977, -21.79422378540039, -20.348005294799805, -18.90178680419922, -17.45556640625, -16.009347915649414, -14.563129425048828, -13.116909980773926, -11.67069149017334, -10.224472999572754, -8.778253555297852, -7.332035064697266, -5.8858160972595215, -4.439597129821777, -2.9933786392211914, -1.547159194946289, -0.10094070434570312, 1.3452781438827515, 2.791496992111206, 4.237715721130371, 5.683934688568115, 7.130153656005859, 8.576372146606445, 10.022591590881348, 11.468810081481934, 12.91502857208252, 14.361248016357422, 15.807466506958008, 17.253684997558594, 18.699905395507812, 20.146121978759766, 21.592342376708984, 23.03856086730957, 24.484779357910156, 25.930997848510742, 27.377216339111328, 28.823436737060547, 30.269655227661133, 31.71587371826172, 33.16209411621094, 34.60831069946289, 36.05453109741211, 37.50074768066406, 38.94696807861328, 40.393184661865234, 41.83940505981445, 43.285621643066406, 44.731842041015625, 46.178062438964844]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 3.0, 0.0, 3.0, 7.0, 6.0, 3.0, 12.0, 12.0, 18.0, 33.0, 37.0, 47.0, 78.0, 120.0, 180.0, 372.0, 962.0, 3483.0, 18902.0, 525415.0, 3582161.0, 52806.0, 6990.0, 1609.0, 488.0, 203.0, 102.0, 80.0, 41.0, 34.0, 20.0, 14.0, 13.0, 6.0, 10.0, 7.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.3984375, -9.1153564453125, -8.832275390625, -8.5491943359375, -8.26611328125, -7.9830322265625, -7.699951171875, -7.4168701171875, -7.1337890625, -6.8507080078125, -6.567626953125, -6.2845458984375, -6.00146484375, -5.7183837890625, -5.435302734375, -5.1522216796875, -4.869140625, -4.5860595703125, -4.302978515625, -4.0198974609375, -3.73681640625, -3.4537353515625, -3.170654296875, -2.8875732421875, -2.6044921875, -2.3214111328125, -2.038330078125, -1.7552490234375, -1.47216796875, -1.1890869140625, -0.906005859375, -0.6229248046875, -0.33984375, -0.0567626953125, 0.226318359375, 0.5093994140625, 0.79248046875, 1.0755615234375, 1.358642578125, 1.6417236328125, 1.9248046875, 2.2078857421875, 2.490966796875, 2.7740478515625, 3.05712890625, 3.3402099609375, 3.623291015625, 3.9063720703125, 4.189453125, 4.4725341796875, 4.755615234375, 5.0386962890625, 5.32177734375, 5.6048583984375, 5.887939453125, 6.1710205078125, 6.4541015625, 6.7371826171875, 7.020263671875, 7.3033447265625, 7.58642578125, 7.8695068359375, 8.152587890625, 8.4356689453125, 8.71875]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 2.0, 2.0, 9.0, 15.0, 24.0, 32.0, 35.0, 56.0, 69.0, 88.0, 99.0, 101.0, 108.0, 93.0, 74.0, 59.0, 52.0, 23.0, 31.0, 14.0, 11.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.0546875, -6.896209716796875, -6.73773193359375, -6.579254150390625, -6.4207763671875, -6.262298583984375, -6.10382080078125, -5.945343017578125, -5.786865234375, -5.628387451171875, -5.46990966796875, -5.311431884765625, -5.1529541015625, -4.994476318359375, -4.83599853515625, -4.677520751953125, -4.51904296875, -4.360565185546875, -4.20208740234375, -4.043609619140625, -3.8851318359375, -3.726654052734375, -3.56817626953125, -3.409698486328125, -3.251220703125, -3.092742919921875, -2.93426513671875, -2.775787353515625, -2.6173095703125, -2.458831787109375, -2.30035400390625, -2.141876220703125, -1.9833984375, -1.824920654296875, -1.66644287109375, -1.507965087890625, -1.3494873046875, -1.191009521484375, -1.03253173828125, -0.874053955078125, -0.715576171875, -0.557098388671875, -0.39862060546875, -0.240142822265625, -0.0816650390625, 0.076812744140625, 0.23529052734375, 0.393768310546875, 0.55224609375, 0.710723876953125, 0.86920166015625, 1.027679443359375, 1.1861572265625, 1.344635009765625, 1.50311279296875, 1.661590576171875, 1.820068359375, 1.978546142578125, 2.13702392578125, 2.295501708984375, 2.4539794921875, 2.612457275390625, 2.77093505859375, 2.929412841796875, 3.087890625]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 7.0, 6.0, 7.0, 12.0, 25.0, 39.0, 60.0, 89.0, 162.0, 292.0, 617.0, 1528.0, 4463.0, 15850.0, 85045.0, 2623962.0, 1376225.0, 66378.0, 13550.0, 3683.0, 1253.0, 497.0, 260.0, 134.0, 57.0, 35.0, 22.0, 13.0, 6.0, 7.0, 2.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.359375, -8.126708984375, -7.89404296875, -7.661376953125, -7.4287109375, -7.196044921875, -6.96337890625, -6.730712890625, -6.498046875, -6.265380859375, -6.03271484375, -5.800048828125, -5.5673828125, -5.334716796875, -5.10205078125, -4.869384765625, -4.63671875, -4.404052734375, -4.17138671875, -3.938720703125, -3.7060546875, -3.473388671875, -3.24072265625, -3.008056640625, -2.775390625, -2.542724609375, -2.31005859375, -2.077392578125, -1.8447265625, -1.612060546875, -1.37939453125, -1.146728515625, -0.9140625, -0.681396484375, -0.44873046875, -0.216064453125, 0.0166015625, 0.249267578125, 0.48193359375, 0.714599609375, 0.947265625, 1.179931640625, 1.41259765625, 1.645263671875, 1.8779296875, 2.110595703125, 2.34326171875, 2.575927734375, 2.80859375, 3.041259765625, 3.27392578125, 3.506591796875, 3.7392578125, 3.971923828125, 4.20458984375, 4.437255859375, 4.669921875, 4.902587890625, 5.13525390625, 5.367919921875, 5.6005859375, 5.833251953125, 6.06591796875, 6.298583984375, 6.53125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 6.0, 5.0, 7.0, 13.0, 13.0, 24.0, 32.0, 47.0, 86.0, 134.0, 403.0, 915.0, 1186.0, 588.0, 255.0, 128.0, 75.0, 45.0, 44.0, 19.0, 19.0, 11.0, 7.0, 4.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.0234375, -9.76788330078125, -9.5123291015625, -9.25677490234375, -9.001220703125, -8.74566650390625, -8.4901123046875, -8.23455810546875, -7.97900390625, -7.72344970703125, -7.4678955078125, -7.21234130859375, -6.956787109375, -6.70123291015625, -6.4456787109375, -6.19012451171875, -5.9345703125, -5.67901611328125, -5.4234619140625, -5.16790771484375, -4.912353515625, -4.65679931640625, -4.4012451171875, -4.14569091796875, -3.89013671875, -3.63458251953125, -3.3790283203125, -3.12347412109375, -2.867919921875, -2.61236572265625, -2.3568115234375, -2.10125732421875, -1.845703125, -1.59014892578125, -1.3345947265625, -1.07904052734375, -0.823486328125, -0.56793212890625, -0.3123779296875, -0.05682373046875, 0.19873046875, 0.45428466796875, 0.7098388671875, 0.96539306640625, 1.220947265625, 1.47650146484375, 1.7320556640625, 1.98760986328125, 2.2431640625, 2.49871826171875, 2.7542724609375, 3.00982666015625, 3.265380859375, 3.52093505859375, 3.7764892578125, 4.03204345703125, 4.28759765625, 4.54315185546875, 4.7987060546875, 5.05426025390625, 5.309814453125, 5.56536865234375, 5.8209228515625, 6.07647705078125, 6.33203125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 3.0, 17.0, 63.0, 281.0, 380.0, 177.0, 45.0, 16.0, 6.0, 4.0, 2.0, 4.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-75.29899597167969, -71.90971374511719, -68.52043914794922, -65.13115692138672, -61.74187469482422, -58.352596282958984, -54.96331787109375, -51.57403564453125, -48.18475341796875, -44.795475006103516, -41.406192779541016, -38.01691436767578, -34.62763214111328, -31.238353729248047, -27.84907341003418, -24.459793090820312, -21.070514678955078, -17.68123435974121, -14.291954040527344, -10.902674674987793, -7.513394355773926, -4.124114990234375, -0.7348346710205078, 2.6544456481933594, 6.043725967407227, 9.433006286621094, 12.822286605834961, 16.211566925048828, 19.600845336914062, 22.99012565612793, 26.379405975341797, 29.768686294555664, 33.15796661376953, 36.547245025634766, 39.936527252197266, 43.3258056640625, 46.715087890625, 50.104366302490234, 53.49364471435547, 56.88292694091797, 60.27220916748047, 63.6614875793457, 67.05076599121094, 70.44004821777344, 73.82933044433594, 77.21861267089844, 80.6078872680664, 83.9971694946289, 87.38644409179688, 90.77572631835938, 94.16500091552734, 97.55428314208984, 100.94356536865234, 104.33283996582031, 107.72212219238281, 111.11140441894531, 114.50068664550781, 117.88996887207031, 121.27924346923828, 124.66852569580078, 128.05780029296875, 131.44708251953125, 134.83636474609375, 138.22564697265625, 141.61492919921875]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 5.0, 2.0, 2.0, 3.0, 4.0, 2.0, 6.0, 8.0, 17.0, 10.0, 18.0, 21.0, 23.0, 23.0, 25.0, 32.0, 44.0, 37.0, 35.0, 34.0, 37.0, 54.0, 45.0, 46.0, 62.0, 52.0, 51.0, 45.0, 39.0, 27.0, 32.0, 40.0, 25.0, 27.0, 22.0, 11.0, 7.0, 12.0, 9.0, 9.0, 2.0, 5.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.39168930053711, -22.540687561035156, -21.68968391418457, -20.838682174682617, -19.98767852783203, -19.136676788330078, -18.285675048828125, -17.434673309326172, -16.583669662475586, -15.732666969299316, -14.881664276123047, -14.030662536621094, -13.179659843444824, -12.328657150268555, -11.477655410766602, -10.626652717590332, -9.775650024414062, -8.924647331237793, -8.073644638061523, -7.22264289855957, -6.371640205383301, -5.520637512207031, -4.66963529586792, -3.8186330795288086, -2.967630386352539, -2.1166279315948486, -1.2656254768371582, -0.4146230220794678, 0.43637943267822266, 1.2873821258544922, 2.1383843421936035, 2.989386558532715, 3.8403892517089844, 4.691391944885254, 5.542394161224365, 6.393396377563477, 7.244399070739746, 8.095401763916016, 8.946403503417969, 9.797406196594238, 10.648408889770508, 11.499411582946777, 12.350414276123047, 13.201416015625, 14.05241870880127, 14.903421401977539, 15.754423141479492, 16.605426788330078, 17.45642852783203, 18.307430267333984, 19.15843391418457, 20.009435653686523, 20.86043930053711, 21.711441040039062, 22.562442779541016, 23.41344451904297, 24.264448165893555, 25.115449905395508, 25.966453552246094, 26.817455291748047, 27.66845703125, 28.519460678100586, 29.37046241760254, 30.221466064453125, 31.072467803955078]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 9.0, 6.0, 14.0, 16.0, 12.0, 23.0, 42.0, 67.0, 108.0, 177.0, 317.0, 481.0, 822.0, 1452.0, 2775.0, 5243.0, 10785.0, 23157.0, 53552.0, 129573.0, 311975.0, 295027.0, 120449.0, 49451.0, 21753.0, 10165.0, 4984.0, 2613.0, 1422.0, 817.0, 498.0, 294.0, 183.0, 109.0, 64.0, 42.0, 17.0, 24.0, 17.0, 12.0, 6.0, 4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-4.4296875, -4.2952880859375, -4.160888671875, -4.0264892578125, -3.89208984375, -3.7576904296875, -3.623291015625, -3.4888916015625, -3.3544921875, -3.2200927734375, -3.085693359375, -2.9512939453125, -2.81689453125, -2.6824951171875, -2.548095703125, -2.4136962890625, -2.279296875, -2.1448974609375, -2.010498046875, -1.8760986328125, -1.74169921875, -1.6072998046875, -1.472900390625, -1.3385009765625, -1.2041015625, -1.0697021484375, -0.935302734375, -0.8009033203125, -0.66650390625, -0.5321044921875, -0.397705078125, -0.2633056640625, -0.12890625, 0.0054931640625, 0.139892578125, 0.2742919921875, 0.40869140625, 0.5430908203125, 0.677490234375, 0.8118896484375, 0.9462890625, 1.0806884765625, 1.215087890625, 1.3494873046875, 1.48388671875, 1.6182861328125, 1.752685546875, 1.8870849609375, 2.021484375, 2.1558837890625, 2.290283203125, 2.4246826171875, 2.55908203125, 2.6934814453125, 2.827880859375, 2.9622802734375, 3.0966796875, 3.2310791015625, 3.365478515625, 3.4998779296875, 3.63427734375, 3.7686767578125, 3.903076171875, 4.0374755859375, 4.171875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 6.0, 6.0, 13.0, 8.0, 11.0, 11.0, 22.0, 40.0, 32.0, 46.0, 47.0, 45.0, 60.0, 64.0, 80.0, 79.0, 53.0, 66.0, 66.0, 56.0, 32.0, 37.0, 29.0, 28.0, 14.0, 20.0, 10.0, 2.0, 9.0, 5.0, 7.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.86328125, -4.747772216796875, -4.63226318359375, -4.516754150390625, -4.4012451171875, -4.285736083984375, -4.17022705078125, -4.054718017578125, -3.939208984375, -3.823699951171875, -3.70819091796875, -3.592681884765625, -3.4771728515625, -3.361663818359375, -3.24615478515625, -3.130645751953125, -3.01513671875, -2.899627685546875, -2.78411865234375, -2.668609619140625, -2.5531005859375, -2.437591552734375, -2.32208251953125, -2.206573486328125, -2.091064453125, -1.975555419921875, -1.86004638671875, -1.744537353515625, -1.6290283203125, -1.513519287109375, -1.39801025390625, -1.282501220703125, -1.1669921875, -1.051483154296875, -0.93597412109375, -0.820465087890625, -0.7049560546875, -0.589447021484375, -0.47393798828125, -0.358428955078125, -0.242919921875, -0.127410888671875, -0.01190185546875, 0.103607177734375, 0.2191162109375, 0.334625244140625, 0.45013427734375, 0.565643310546875, 0.68115234375, 0.796661376953125, 0.91217041015625, 1.027679443359375, 1.1431884765625, 1.258697509765625, 1.37420654296875, 1.489715576171875, 1.605224609375, 1.720733642578125, 1.83624267578125, 1.951751708984375, 2.0672607421875, 2.182769775390625, 2.29827880859375, 2.413787841796875, 2.529296875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 6.0, 5.0, 8.0, 7.0, 17.0, 12.0, 13.0, 32.0, 46.0, 66.0, 91.0, 152.0, 240.0, 389.0, 713.0, 1468.0, 3804.0, 12449.0, 57378.0, 397385.0, 489588.0, 63705.0, 13513.0, 4017.0, 1608.0, 742.0, 403.0, 237.0, 134.0, 96.0, 53.0, 42.0, 35.0, 29.0, 10.0, 16.0, 9.0, 8.0, 7.0, 5.0, 3.0, 0.0, 3.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-8.734375, -8.461669921875, -8.18896484375, -7.916259765625, -7.6435546875, -7.370849609375, -7.09814453125, -6.825439453125, -6.552734375, -6.280029296875, -6.00732421875, -5.734619140625, -5.4619140625, -5.189208984375, -4.91650390625, -4.643798828125, -4.37109375, -4.098388671875, -3.82568359375, -3.552978515625, -3.2802734375, -3.007568359375, -2.73486328125, -2.462158203125, -2.189453125, -1.916748046875, -1.64404296875, -1.371337890625, -1.0986328125, -0.825927734375, -0.55322265625, -0.280517578125, -0.0078125, 0.264892578125, 0.53759765625, 0.810302734375, 1.0830078125, 1.355712890625, 1.62841796875, 1.901123046875, 2.173828125, 2.446533203125, 2.71923828125, 2.991943359375, 3.2646484375, 3.537353515625, 3.81005859375, 4.082763671875, 4.35546875, 4.628173828125, 4.90087890625, 5.173583984375, 5.4462890625, 5.718994140625, 5.99169921875, 6.264404296875, 6.537109375, 6.809814453125, 7.08251953125, 7.355224609375, 7.6279296875, 7.900634765625, 8.17333984375, 8.446044921875, 8.71875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 5.0, 2.0, 5.0, 1.0, 4.0, 9.0, 11.0, 14.0, 12.0, 21.0, 20.0, 20.0, 23.0, 42.0, 35.0, 38.0, 54.0, 56.0, 43.0, 66.0, 45.0, 49.0, 44.0, 45.0, 42.0, 49.0, 33.0, 38.0, 34.0, 21.0, 14.0, 21.0, 15.0, 8.0, 14.0, 11.0, 10.0, 9.0, 7.0, 4.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0], "bins": [-11.09375, -10.7310791015625, -10.368408203125, -10.0057373046875, -9.64306640625, -9.2803955078125, -8.917724609375, -8.5550537109375, -8.1923828125, -7.8297119140625, -7.467041015625, -7.1043701171875, -6.74169921875, -6.3790283203125, -6.016357421875, -5.6536865234375, -5.291015625, -4.9283447265625, -4.565673828125, -4.2030029296875, -3.84033203125, -3.4776611328125, -3.114990234375, -2.7523193359375, -2.3896484375, -2.0269775390625, -1.664306640625, -1.3016357421875, -0.93896484375, -0.5762939453125, -0.213623046875, 0.1490478515625, 0.51171875, 0.8743896484375, 1.237060546875, 1.5997314453125, 1.96240234375, 2.3250732421875, 2.687744140625, 3.0504150390625, 3.4130859375, 3.7757568359375, 4.138427734375, 4.5010986328125, 4.86376953125, 5.2264404296875, 5.589111328125, 5.9517822265625, 6.314453125, 6.6771240234375, 7.039794921875, 7.4024658203125, 7.76513671875, 8.1278076171875, 8.490478515625, 8.8531494140625, 9.2158203125, 9.5784912109375, 9.941162109375, 10.3038330078125, 10.66650390625, 11.0291748046875, 11.391845703125, 11.7545166015625, 12.1171875]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 5.0, 4.0, 5.0, 4.0, 4.0, 1.0, 7.0, 11.0, 11.0, 16.0, 18.0, 28.0, 40.0, 84.0, 140.0, 295.0, 651.0, 1927.0, 9834.0, 155340.0, 829057.0, 44020.0, 4759.0, 1272.0, 466.0, 222.0, 136.0, 60.0, 45.0, 32.0, 20.0, 11.0, 10.0, 11.0, 3.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.55078125, -4.3900146484375, -4.229248046875, -4.0684814453125, -3.90771484375, -3.7469482421875, -3.586181640625, -3.4254150390625, -3.2646484375, -3.1038818359375, -2.943115234375, -2.7823486328125, -2.62158203125, -2.4608154296875, -2.300048828125, -2.1392822265625, -1.978515625, -1.8177490234375, -1.656982421875, -1.4962158203125, -1.33544921875, -1.1746826171875, -1.013916015625, -0.8531494140625, -0.6923828125, -0.5316162109375, -0.370849609375, -0.2100830078125, -0.04931640625, 0.1114501953125, 0.272216796875, 0.4329833984375, 0.59375, 0.7545166015625, 0.915283203125, 1.0760498046875, 1.23681640625, 1.3975830078125, 1.558349609375, 1.7191162109375, 1.8798828125, 2.0406494140625, 2.201416015625, 2.3621826171875, 2.52294921875, 2.6837158203125, 2.844482421875, 3.0052490234375, 3.166015625, 3.3267822265625, 3.487548828125, 3.6483154296875, 3.80908203125, 3.9698486328125, 4.130615234375, 4.2913818359375, 4.4521484375, 4.6129150390625, 4.773681640625, 4.9344482421875, 5.09521484375, 5.2559814453125, 5.416748046875, 5.5775146484375, 5.73828125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 0.0, 3.0, 3.0, 2.0, 5.0, 1.0, 7.0, 7.0, 10.0, 9.0, 12.0, 15.0, 16.0, 28.0, 32.0, 28.0, 49.0, 65.0, 74.0, 74.0, 71.0, 77.0, 81.0, 60.0, 53.0, 47.0, 38.0, 25.0, 23.0, 23.0, 9.0, 14.0, 8.0, 8.0, 11.0, 4.0, 4.0, 4.0, 2.0, 1.0, 5.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004949569702148438, -0.00048040226101875305, -0.00046584755182266235, -0.00045129284262657166, -0.00043673813343048096, -0.00042218342423439026, -0.00040762871503829956, -0.00039307400584220886, -0.00037851929664611816, -0.00036396458745002747, -0.00034940987825393677, -0.00033485516905784607, -0.00032030045986175537, -0.00030574575066566467, -0.000291191041469574, -0.0002766363322734833, -0.0002620816230773926, -0.0002475269138813019, -0.00023297220468521118, -0.00021841749548912048, -0.00020386278629302979, -0.0001893080770969391, -0.0001747533679008484, -0.0001601986587047577, -0.000145643949508667, -0.0001310892403125763, -0.0001165345311164856, -0.0001019798219203949, -8.74251127243042e-05, -7.28704035282135e-05, -5.83156943321228e-05, -4.3760985136032104e-05, -2.9206275939941406e-05, -1.4651566743850708e-05, -9.685754776000977e-08, 1.4457851648330688e-05, 2.9012560844421387e-05, 4.3567270040512085e-05, 5.812197923660278e-05, 7.267668843269348e-05, 8.723139762878418e-05, 0.00010178610682487488, 0.00011634081602096558, 0.00013089552521705627, 0.00014545023441314697, 0.00016000494360923767, 0.00017455965280532837, 0.00018911436200141907, 0.00020366907119750977, 0.00021822378039360046, 0.00023277848958969116, 0.00024733319878578186, 0.00026188790798187256, 0.00027644261717796326, 0.00029099732637405396, 0.00030555203557014465, 0.00032010674476623535, 0.00033466145396232605, 0.00034921616315841675, 0.00036377087235450745, 0.00037832558155059814, 0.00039288029074668884, 0.00040743499994277954, 0.00042198970913887024, 0.00043654441833496094]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 1.0, 4.0, 2.0, 2.0, 3.0, 5.0, 8.0, 4.0, 10.0, 14.0, 15.0, 23.0, 29.0, 35.0, 54.0, 77.0, 125.0, 194.0, 292.0, 490.0, 924.0, 1775.0, 4157.0, 12754.0, 59123.0, 373165.0, 500495.0, 71104.0, 14718.0, 4666.0, 1906.0, 933.0, 502.0, 317.0, 185.0, 136.0, 91.0, 69.0, 42.0, 22.0, 25.0, 16.0, 10.0, 10.0, 3.0, 7.0, 7.0, 3.0, 4.0, 2.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.37890625, -2.29974365234375, -2.2205810546875, -2.14141845703125, -2.062255859375, -1.98309326171875, -1.9039306640625, -1.82476806640625, -1.74560546875, -1.66644287109375, -1.5872802734375, -1.50811767578125, -1.428955078125, -1.34979248046875, -1.2706298828125, -1.19146728515625, -1.1123046875, -1.03314208984375, -0.9539794921875, -0.87481689453125, -0.795654296875, -0.71649169921875, -0.6373291015625, -0.55816650390625, -0.47900390625, -0.39984130859375, -0.3206787109375, -0.24151611328125, -0.162353515625, -0.08319091796875, -0.0040283203125, 0.07513427734375, 0.154296875, 0.23345947265625, 0.3126220703125, 0.39178466796875, 0.470947265625, 0.55010986328125, 0.6292724609375, 0.70843505859375, 0.78759765625, 0.86676025390625, 0.9459228515625, 1.02508544921875, 1.104248046875, 1.18341064453125, 1.2625732421875, 1.34173583984375, 1.4208984375, 1.50006103515625, 1.5792236328125, 1.65838623046875, 1.737548828125, 1.81671142578125, 1.8958740234375, 1.97503662109375, 2.05419921875, 2.13336181640625, 2.2125244140625, 2.29168701171875, 2.370849609375, 2.45001220703125, 2.5291748046875, 2.60833740234375, 2.6875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 6.0, 5.0, 7.0, 8.0, 13.0, 18.0, 31.0, 34.0, 36.0, 58.0, 67.0, 65.0, 103.0, 98.0, 86.0, 80.0, 80.0, 49.0, 35.0, 33.0, 25.0, 21.0, 14.0, 9.0, 9.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-4.02734375, -3.927490234375, -3.82763671875, -3.727783203125, -3.6279296875, -3.528076171875, -3.42822265625, -3.328369140625, -3.228515625, -3.128662109375, -3.02880859375, -2.928955078125, -2.8291015625, -2.729248046875, -2.62939453125, -2.529541015625, -2.4296875, -2.329833984375, -2.22998046875, -2.130126953125, -2.0302734375, -1.930419921875, -1.83056640625, -1.730712890625, -1.630859375, -1.531005859375, -1.43115234375, -1.331298828125, -1.2314453125, -1.131591796875, -1.03173828125, -0.931884765625, -0.83203125, -0.732177734375, -0.63232421875, -0.532470703125, -0.4326171875, -0.332763671875, -0.23291015625, -0.133056640625, -0.033203125, 0.066650390625, 0.16650390625, 0.266357421875, 0.3662109375, 0.466064453125, 0.56591796875, 0.665771484375, 0.765625, 0.865478515625, 0.96533203125, 1.065185546875, 1.1650390625, 1.264892578125, 1.36474609375, 1.464599609375, 1.564453125, 1.664306640625, 1.76416015625, 1.864013671875, 1.9638671875, 2.063720703125, 2.16357421875, 2.263427734375, 2.36328125]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 3.0, 1.0, 3.0, 15.0, 16.0, 43.0, 90.0, 176.0, 238.0, 200.0, 113.0, 49.0, 31.0, 13.0, 7.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-88.62583923339844, -86.14532470703125, -83.66480255126953, -81.18428802490234, -78.70377349853516, -76.22325134277344, -73.74273681640625, -71.26222229003906, -68.78170013427734, -66.30118560791016, -63.8206672668457, -61.34014892578125, -58.8596305847168, -56.379112243652344, -53.898597717285156, -51.4180793762207, -48.937564849853516, -46.45704650878906, -43.976531982421875, -41.49601364135742, -39.01549530029297, -36.53498077392578, -34.05446243286133, -31.573944091796875, -29.093427658081055, -26.612911224365234, -24.13239288330078, -21.65187644958496, -19.17136001586914, -16.690841674804688, -14.210325241088867, -11.729806900024414, -9.249290466308594, -6.768773078918457, -4.2882561683654785, -1.8077392578125, 0.6727781295776367, 3.1532955169677734, 5.633811950683594, 8.114330291748047, 10.594846725463867, 13.075364112854004, 15.55588150024414, 18.03639793395996, 20.51691436767578, 22.997432708740234, 25.477949142456055, 27.958467483520508, 30.438983917236328, 32.91950225830078, 35.40001678466797, 37.88053512573242, 40.361053466796875, 42.84156799316406, 45.322086334228516, 47.80260467529297, 50.283119201660156, 52.76363754272461, 55.2441520690918, 57.72467041015625, 60.2051887512207, 62.685707092285156, 65.16622161865234, 67.64674377441406, 70.12725830078125]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 7.0, 2.0, 9.0, 14.0, 8.0, 16.0, 23.0, 31.0, 32.0, 29.0, 45.0, 41.0, 64.0, 79.0, 90.0, 81.0, 85.0, 62.0, 51.0, 60.0, 51.0, 35.0, 26.0, 18.0, 12.0, 12.0, 9.0, 9.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-79.71756744384766, -77.67255401611328, -75.6275405883789, -73.58252716064453, -71.53751373291016, -69.49250030517578, -67.4474868774414, -65.4024658203125, -63.35745620727539, -61.312442779541016, -59.26742935180664, -57.222415924072266, -55.177398681640625, -53.13238525390625, -51.087371826171875, -49.0423583984375, -46.997344970703125, -44.95233154296875, -42.907318115234375, -40.8623046875, -38.817291259765625, -36.77227783203125, -34.72726058959961, -32.682247161865234, -30.63723373413086, -28.592220306396484, -26.54720687866211, -24.5021915435791, -22.457178115844727, -20.41216468811035, -18.367149353027344, -16.32213592529297, -14.277130126953125, -12.23211669921875, -10.187102317810059, -8.142087936401367, -6.097074508666992, -4.052061080932617, -2.007046699523926, 0.037967681884765625, 2.0829811096191406, 4.127995014190674, 6.173008918762207, 8.218023300170898, 10.263036727905273, 12.308050155639648, 14.35306453704834, 16.39807891845703, 18.443092346191406, 20.48810577392578, 22.533119201660156, 24.578134536743164, 26.62314796447754, 28.668161392211914, 30.713176727294922, 32.7581901550293, 34.80320358276367, 36.84821701049805, 38.89323043823242, 40.9382438659668, 42.98326110839844, 45.02827453613281, 47.07328796386719, 49.11830139160156, 51.16331481933594]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 4.0, 3.0, 6.0, 2.0, 3.0, 10.0, 11.0, 10.0, 16.0, 33.0, 71.0, 108.0, 238.0, 769.0, 2658.0, 11199.0, 93501.0, 2808926.0, 1214785.0, 49895.0, 8456.0, 2391.0, 788.0, 263.0, 80.0, 26.0, 17.0, 7.0, 7.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.640625, -7.448974609375, -7.25732421875, -7.065673828125, -6.8740234375, -6.682373046875, -6.49072265625, -6.299072265625, -6.107421875, -5.915771484375, -5.72412109375, -5.532470703125, -5.3408203125, -5.149169921875, -4.95751953125, -4.765869140625, -4.57421875, -4.382568359375, -4.19091796875, -3.999267578125, -3.8076171875, -3.615966796875, -3.42431640625, -3.232666015625, -3.041015625, -2.849365234375, -2.65771484375, -2.466064453125, -2.2744140625, -2.082763671875, -1.89111328125, -1.699462890625, -1.5078125, -1.316162109375, -1.12451171875, -0.932861328125, -0.7412109375, -0.549560546875, -0.35791015625, -0.166259765625, 0.025390625, 0.217041015625, 0.40869140625, 0.600341796875, 0.7919921875, 0.983642578125, 1.17529296875, 1.366943359375, 1.55859375, 1.750244140625, 1.94189453125, 2.133544921875, 2.3251953125, 2.516845703125, 2.70849609375, 2.900146484375, 3.091796875, 3.283447265625, 3.47509765625, 3.666748046875, 3.8583984375, 4.050048828125, 4.24169921875, 4.433349609375, 4.625]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 10.0, 14.0, 20.0, 26.0, 50.0, 66.0, 86.0, 99.0, 119.0, 93.0, 110.0, 85.0, 80.0, 56.0, 47.0, 23.0, 15.0, 11.0, 4.0, 4.0, 1.0], "bins": [-9.0, -8.827606201171875, -8.65521240234375, -8.482818603515625, -8.3104248046875, -8.138031005859375, -7.96563720703125, -7.793243408203125, -7.620849609375, -7.448455810546875, -7.27606201171875, -7.103668212890625, -6.9312744140625, -6.758880615234375, -6.58648681640625, -6.414093017578125, -6.24169921875, -6.069305419921875, -5.89691162109375, -5.724517822265625, -5.5521240234375, -5.379730224609375, -5.20733642578125, -5.034942626953125, -4.862548828125, -4.690155029296875, -4.51776123046875, -4.345367431640625, -4.1729736328125, -4.000579833984375, -3.82818603515625, -3.655792236328125, -3.4833984375, -3.311004638671875, -3.13861083984375, -2.966217041015625, -2.7938232421875, -2.621429443359375, -2.44903564453125, -2.276641845703125, -2.104248046875, -1.931854248046875, -1.75946044921875, -1.587066650390625, -1.4146728515625, -1.242279052734375, -1.06988525390625, -0.897491455078125, -0.72509765625, -0.552703857421875, -0.38031005859375, -0.207916259765625, -0.0355224609375, 0.136871337890625, 0.30926513671875, 0.481658935546875, 0.654052734375, 0.826446533203125, 0.99884033203125, 1.171234130859375, 1.3436279296875, 1.516021728515625, 1.68841552734375, 1.860809326171875, 2.033203125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 7.0, 4.0, 4.0, 4.0, 9.0, 5.0, 19.0, 20.0, 43.0, 44.0, 83.0, 130.0, 280.0, 442.0, 974.0, 2000.0, 5491.0, 18611.0, 94008.0, 1909949.0, 2042175.0, 95746.0, 16935.0, 4397.0, 1554.0, 591.0, 323.0, 168.0, 111.0, 63.0, 32.0, 27.0, 19.0, 10.0, 1.0, 5.0, 4.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.69140625, -7.48358154296875, -7.2757568359375, -7.06793212890625, -6.860107421875, -6.65228271484375, -6.4444580078125, -6.23663330078125, -6.02880859375, -5.82098388671875, -5.6131591796875, -5.40533447265625, -5.197509765625, -4.98968505859375, -4.7818603515625, -4.57403564453125, -4.3662109375, -4.15838623046875, -3.9505615234375, -3.74273681640625, -3.534912109375, -3.32708740234375, -3.1192626953125, -2.91143798828125, -2.70361328125, -2.49578857421875, -2.2879638671875, -2.08013916015625, -1.872314453125, -1.66448974609375, -1.4566650390625, -1.24884033203125, -1.041015625, -0.83319091796875, -0.6253662109375, -0.41754150390625, -0.209716796875, -0.00189208984375, 0.2059326171875, 0.41375732421875, 0.62158203125, 0.82940673828125, 1.0372314453125, 1.24505615234375, 1.452880859375, 1.66070556640625, 1.8685302734375, 2.07635498046875, 2.2841796875, 2.49200439453125, 2.6998291015625, 2.90765380859375, 3.115478515625, 3.32330322265625, 3.5311279296875, 3.73895263671875, 3.94677734375, 4.15460205078125, 4.3624267578125, 4.57025146484375, 4.778076171875, 4.98590087890625, 5.1937255859375, 5.40155029296875, 5.609375]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 7.0, 7.0, 5.0, 3.0, 10.0, 9.0, 17.0, 20.0, 29.0, 51.0, 59.0, 110.0, 187.0, 303.0, 697.0, 1007.0, 711.0, 319.0, 196.0, 115.0, 70.0, 47.0, 32.0, 19.0, 17.0, 7.0, 7.0, 8.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-10.78125, -10.521728515625, -10.26220703125, -10.002685546875, -9.7431640625, -9.483642578125, -9.22412109375, -8.964599609375, -8.705078125, -8.445556640625, -8.18603515625, -7.926513671875, -7.6669921875, -7.407470703125, -7.14794921875, -6.888427734375, -6.62890625, -6.369384765625, -6.10986328125, -5.850341796875, -5.5908203125, -5.331298828125, -5.07177734375, -4.812255859375, -4.552734375, -4.293212890625, -4.03369140625, -3.774169921875, -3.5146484375, -3.255126953125, -2.99560546875, -2.736083984375, -2.4765625, -2.217041015625, -1.95751953125, -1.697998046875, -1.4384765625, -1.178955078125, -0.91943359375, -0.659912109375, -0.400390625, -0.140869140625, 0.11865234375, 0.378173828125, 0.6376953125, 0.897216796875, 1.15673828125, 1.416259765625, 1.67578125, 1.935302734375, 2.19482421875, 2.454345703125, 2.7138671875, 2.973388671875, 3.23291015625, 3.492431640625, 3.751953125, 4.011474609375, 4.27099609375, 4.530517578125, 4.7900390625, 5.049560546875, 5.30908203125, 5.568603515625, 5.828125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 7.0, 23.0, 41.0, 107.0, 212.0, 224.0, 186.0, 95.0, 44.0, 22.0, 12.0, 6.0, 4.0, 3.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-54.46714401245117, -52.03287124633789, -49.598602294921875, -47.164329528808594, -44.73005676269531, -42.29578399658203, -39.86151123046875, -37.427242279052734, -34.99296951293945, -32.55869674682617, -30.124425888061523, -27.690155029296875, -25.255882263183594, -22.821609497070312, -20.387338638305664, -17.953067779541016, -15.518795013427734, -13.08452320098877, -10.650251388549805, -8.21597957611084, -5.781707763671875, -3.34743595123291, -0.9131641387939453, 1.5211067199707031, 3.9553794860839844, 6.389651298522949, 8.823923110961914, 11.258194923400879, 13.692466735839844, 16.126739501953125, 18.561010360717773, 20.995281219482422, 23.429550170898438, 25.86382293701172, 28.298093795776367, 30.732364654541016, 33.1666374206543, 35.60091018676758, 38.035179138183594, 40.469451904296875, 42.903724670410156, 45.33799743652344, 47.77227020263672, 50.206539154052734, 52.640811920166016, 55.0750846862793, 57.50935363769531, 59.943626403808594, 62.377899169921875, 64.81217193603516, 67.24644470214844, 69.68071746826172, 72.114990234375, 74.54925537109375, 76.98352813720703, 79.41780090332031, 81.8520736694336, 84.28634643554688, 86.72061920166016, 89.15489196777344, 91.58915710449219, 94.02342987060547, 96.45770263671875, 98.89197540283203, 101.32624816894531]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 4.0, 2.0, 4.0, 8.0, 12.0, 11.0, 10.0, 21.0, 27.0, 40.0, 32.0, 49.0, 65.0, 65.0, 66.0, 52.0, 63.0, 63.0, 54.0, 56.0, 53.0, 50.0, 54.0, 30.0, 28.0, 23.0, 16.0, 19.0, 10.0, 3.0, 9.0, 6.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.337627410888672, -22.94782829284668, -21.558029174804688, -20.168228149414062, -18.77842903137207, -17.388629913330078, -15.99882984161377, -14.609029769897461, -13.219230651855469, -11.829431533813477, -10.439631462097168, -9.04983139038086, -7.660032272338867, -6.270232677459717, -4.880433082580566, -3.490633010864258, -2.1008338928222656, -0.7110342979431152, 0.6787652969360352, 2.0685648918151855, 3.458364486694336, 4.848164081573486, 6.237963676452637, 7.627763748168945, 9.017562866210938, 10.40736198425293, 11.797162055969238, 13.186962127685547, 14.576761245727539, 15.966560363769531, 17.356361389160156, 18.74616050720215, 20.135955810546875, 21.525754928588867, 22.91555404663086, 24.305355072021484, 25.695154190063477, 27.08495330810547, 28.474754333496094, 29.864553451538086, 31.254352569580078, 32.6441535949707, 34.03395080566406, 35.42375183105469, 36.81355285644531, 38.20335006713867, 39.5931510925293, 40.982948303222656, 42.37274932861328, 43.762550354003906, 45.152347564697266, 46.54214859008789, 47.93194580078125, 49.321746826171875, 50.7115478515625, 52.101348876953125, 53.491146087646484, 54.88094711303711, 56.27074432373047, 57.660545349121094, 59.05034637451172, 60.44014358520508, 61.8299446105957, 63.21974182128906, 64.60954284667969]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 5.0, 6.0, 9.0, 15.0, 25.0, 37.0, 67.0, 98.0, 152.0, 255.0, 427.0, 893.0, 1796.0, 3879.0, 8885.0, 20418.0, 49583.0, 115667.0, 249243.0, 307357.0, 164525.0, 71653.0, 30142.0, 12834.0, 5450.0, 2504.0, 1197.0, 587.0, 345.0, 188.0, 123.0, 74.0, 36.0, 27.0, 16.0, 17.0, 10.0, 3.0, 4.0, 1.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.21875, -4.102264404296875, -3.98577880859375, -3.869293212890625, -3.7528076171875, -3.636322021484375, -3.51983642578125, -3.403350830078125, -3.286865234375, -3.170379638671875, -3.05389404296875, -2.937408447265625, -2.8209228515625, -2.704437255859375, -2.58795166015625, -2.471466064453125, -2.35498046875, -2.238494873046875, -2.12200927734375, -2.005523681640625, -1.8890380859375, -1.772552490234375, -1.65606689453125, -1.539581298828125, -1.423095703125, -1.306610107421875, -1.19012451171875, -1.073638916015625, -0.9571533203125, -0.840667724609375, -0.72418212890625, -0.607696533203125, -0.4912109375, -0.374725341796875, -0.25823974609375, -0.141754150390625, -0.0252685546875, 0.091217041015625, 0.20770263671875, 0.324188232421875, 0.440673828125, 0.557159423828125, 0.67364501953125, 0.790130615234375, 0.9066162109375, 1.023101806640625, 1.13958740234375, 1.256072998046875, 1.37255859375, 1.489044189453125, 1.60552978515625, 1.722015380859375, 1.8385009765625, 1.954986572265625, 2.07147216796875, 2.187957763671875, 2.304443359375, 2.420928955078125, 2.53741455078125, 2.653900146484375, 2.7703857421875, 2.886871337890625, 3.00335693359375, 3.119842529296875, 3.236328125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 3.0, 6.0, 4.0, 7.0, 8.0, 8.0, 6.0, 15.0, 15.0, 18.0, 25.0, 32.0, 33.0, 28.0, 32.0, 31.0, 33.0, 49.0, 53.0, 57.0, 40.0, 46.0, 40.0, 52.0, 59.0, 35.0, 31.0, 48.0, 32.0, 31.0, 27.0, 21.0, 17.0, 14.0, 15.0, 6.0, 10.0, 6.0, 7.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-2.7578125, -2.683013916015625, -2.60821533203125, -2.533416748046875, -2.4586181640625, -2.383819580078125, -2.30902099609375, -2.234222412109375, -2.159423828125, -2.084625244140625, -2.00982666015625, -1.935028076171875, -1.8602294921875, -1.785430908203125, -1.71063232421875, -1.635833740234375, -1.56103515625, -1.486236572265625, -1.41143798828125, -1.336639404296875, -1.2618408203125, -1.187042236328125, -1.11224365234375, -1.037445068359375, -0.962646484375, -0.887847900390625, -0.81304931640625, -0.738250732421875, -0.6634521484375, -0.588653564453125, -0.51385498046875, -0.439056396484375, -0.3642578125, -0.289459228515625, -0.21466064453125, -0.139862060546875, -0.0650634765625, 0.009735107421875, 0.08453369140625, 0.159332275390625, 0.234130859375, 0.308929443359375, 0.38372802734375, 0.458526611328125, 0.5333251953125, 0.608123779296875, 0.68292236328125, 0.757720947265625, 0.83251953125, 0.907318115234375, 0.98211669921875, 1.056915283203125, 1.1317138671875, 1.206512451171875, 1.28131103515625, 1.356109619140625, 1.430908203125, 1.505706787109375, 1.58050537109375, 1.655303955078125, 1.7301025390625, 1.804901123046875, 1.87969970703125, 1.954498291015625, 2.029296875]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 4.0, 3.0, 12.0, 13.0, 9.0, 19.0, 24.0, 34.0, 33.0, 62.0, 98.0, 140.0, 215.0, 304.0, 467.0, 838.0, 1613.0, 3669.0, 12913.0, 96128.0, 735207.0, 168263.0, 19238.0, 4725.0, 1892.0, 962.0, 565.0, 336.0, 245.0, 167.0, 113.0, 80.0, 51.0, 36.0, 28.0, 14.0, 11.0, 4.0, 7.0, 5.0, 4.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.84765625, -7.57342529296875, -7.2991943359375, -7.02496337890625, -6.750732421875, -6.47650146484375, -6.2022705078125, -5.92803955078125, -5.65380859375, -5.37957763671875, -5.1053466796875, -4.83111572265625, -4.556884765625, -4.28265380859375, -4.0084228515625, -3.73419189453125, -3.4599609375, -3.18572998046875, -2.9114990234375, -2.63726806640625, -2.363037109375, -2.08880615234375, -1.8145751953125, -1.54034423828125, -1.26611328125, -0.99188232421875, -0.7176513671875, -0.44342041015625, -0.169189453125, 0.10504150390625, 0.3792724609375, 0.65350341796875, 0.927734375, 1.20196533203125, 1.4761962890625, 1.75042724609375, 2.024658203125, 2.29888916015625, 2.5731201171875, 2.84735107421875, 3.12158203125, 3.39581298828125, 3.6700439453125, 3.94427490234375, 4.218505859375, 4.49273681640625, 4.7669677734375, 5.04119873046875, 5.3154296875, 5.58966064453125, 5.8638916015625, 6.13812255859375, 6.412353515625, 6.68658447265625, 6.9608154296875, 7.23504638671875, 7.50927734375, 7.78350830078125, 8.0577392578125, 8.33197021484375, 8.606201171875, 8.88043212890625, 9.1546630859375, 9.42889404296875, 9.703125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 5.0, 9.0, 13.0, 12.0, 25.0, 19.0, 35.0, 43.0, 46.0, 77.0, 60.0, 73.0, 58.0, 79.0, 64.0, 74.0, 61.0, 57.0, 46.0, 34.0, 20.0, 28.0, 14.0, 14.0, 17.0, 12.0, 4.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.0234375, -11.5648193359375, -11.106201171875, -10.6475830078125, -10.18896484375, -9.7303466796875, -9.271728515625, -8.8131103515625, -8.3544921875, -7.8958740234375, -7.437255859375, -6.9786376953125, -6.52001953125, -6.0614013671875, -5.602783203125, -5.1441650390625, -4.685546875, -4.2269287109375, -3.768310546875, -3.3096923828125, -2.85107421875, -2.3924560546875, -1.933837890625, -1.4752197265625, -1.0166015625, -0.5579833984375, -0.099365234375, 0.3592529296875, 0.81787109375, 1.2764892578125, 1.735107421875, 2.1937255859375, 2.65234375, 3.1109619140625, 3.569580078125, 4.0281982421875, 4.48681640625, 4.9454345703125, 5.404052734375, 5.8626708984375, 6.3212890625, 6.7799072265625, 7.238525390625, 7.6971435546875, 8.15576171875, 8.6143798828125, 9.072998046875, 9.5316162109375, 9.990234375, 10.4488525390625, 10.907470703125, 11.3660888671875, 11.82470703125, 12.2833251953125, 12.741943359375, 13.2005615234375, 13.6591796875, 14.1177978515625, 14.576416015625, 15.0350341796875, 15.49365234375, 15.9522705078125, 16.410888671875, 16.8695068359375, 17.328125]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 5.0, 0.0, 1.0, 6.0, 3.0, 9.0, 19.0, 28.0, 42.0, 79.0, 169.0, 397.0, 1063.0, 4706.0, 51401.0, 887357.0, 94378.0, 6709.0, 1352.0, 438.0, 196.0, 93.0, 37.0, 25.0, 21.0, 15.0, 8.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.53515625, -4.400146484375, -4.26513671875, -4.130126953125, -3.9951171875, -3.860107421875, -3.72509765625, -3.590087890625, -3.455078125, -3.320068359375, -3.18505859375, -3.050048828125, -2.9150390625, -2.780029296875, -2.64501953125, -2.510009765625, -2.375, -2.239990234375, -2.10498046875, -1.969970703125, -1.8349609375, -1.699951171875, -1.56494140625, -1.429931640625, -1.294921875, -1.159912109375, -1.02490234375, -0.889892578125, -0.7548828125, -0.619873046875, -0.48486328125, -0.349853515625, -0.21484375, -0.079833984375, 0.05517578125, 0.190185546875, 0.3251953125, 0.460205078125, 0.59521484375, 0.730224609375, 0.865234375, 1.000244140625, 1.13525390625, 1.270263671875, 1.4052734375, 1.540283203125, 1.67529296875, 1.810302734375, 1.9453125, 2.080322265625, 2.21533203125, 2.350341796875, 2.4853515625, 2.620361328125, 2.75537109375, 2.890380859375, 3.025390625, 3.160400390625, 3.29541015625, 3.430419921875, 3.5654296875, 3.700439453125, 3.83544921875, 3.970458984375, 4.10546875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 3.0, 3.0, 11.0, 11.0, 11.0, 21.0, 17.0, 25.0, 22.0, 27.0, 48.0, 65.0, 75.0, 92.0, 92.0, 95.0, 76.0, 71.0, 45.0, 43.0, 33.0, 30.0, 17.0, 18.0, 8.0, 8.0, 7.0, 6.0, 2.0, 9.0, 1.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004858970642089844, -0.0004694536328315735, -0.0004530102014541626, -0.0004365667700767517, -0.0004201233386993408, -0.00040367990732192993, -0.00038723647594451904, -0.00037079304456710815, -0.00035434961318969727, -0.0003379061818122864, -0.0003214627504348755, -0.0003050193190574646, -0.0002885758876800537, -0.0002721324563026428, -0.00025568902492523193, -0.00023924559354782104, -0.00022280216217041016, -0.00020635873079299927, -0.00018991529941558838, -0.0001734718680381775, -0.0001570284366607666, -0.0001405850052833557, -0.00012414157390594482, -0.00010769814252853394, -9.125471115112305e-05, -7.481127977371216e-05, -5.836784839630127e-05, -4.192441701889038e-05, -2.5480985641479492e-05, -9.037554264068604e-06, 7.405877113342285e-06, 2.3849308490753174e-05, 4.029273986816406e-05, 5.673617124557495e-05, 7.317960262298584e-05, 8.962303400039673e-05, 0.00010606646537780762, 0.0001225098967552185, 0.0001389533281326294, 0.00015539675951004028, 0.00017184019088745117, 0.00018828362226486206, 0.00020472705364227295, 0.00022117048501968384, 0.00023761391639709473, 0.0002540573477745056, 0.0002705007791519165, 0.0002869442105293274, 0.0003033876419067383, 0.00031983107328414917, 0.00033627450466156006, 0.00035271793603897095, 0.00036916136741638184, 0.0003856047987937927, 0.0004020482301712036, 0.0004184916615486145, 0.0004349350929260254, 0.0004513785243034363, 0.00046782195568084717, 0.00048426538705825806, 0.0005007088184356689, 0.0005171522498130798, 0.0005335956811904907, 0.0005500391125679016, 0.0005664825439453125]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 6.0, 5.0, 5.0, 9.0, 14.0, 24.0, 26.0, 48.0, 81.0, 109.0, 163.0, 296.0, 486.0, 1026.0, 2368.0, 6404.0, 27467.0, 233250.0, 690413.0, 67493.0, 12075.0, 3652.0, 1434.0, 668.0, 369.0, 230.0, 139.0, 101.0, 74.0, 29.0, 24.0, 21.0, 11.0, 8.0, 4.0, 5.0, 3.0, 4.0, 0.0, 6.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-2.4375, -2.36187744140625, -2.2862548828125, -2.21063232421875, -2.135009765625, -2.05938720703125, -1.9837646484375, -1.90814208984375, -1.83251953125, -1.75689697265625, -1.6812744140625, -1.60565185546875, -1.530029296875, -1.45440673828125, -1.3787841796875, -1.30316162109375, -1.2275390625, -1.15191650390625, -1.0762939453125, -1.00067138671875, -0.925048828125, -0.84942626953125, -0.7738037109375, -0.69818115234375, -0.62255859375, -0.54693603515625, -0.4713134765625, -0.39569091796875, -0.320068359375, -0.24444580078125, -0.1688232421875, -0.09320068359375, -0.017578125, 0.05804443359375, 0.1336669921875, 0.20928955078125, 0.284912109375, 0.36053466796875, 0.4361572265625, 0.51177978515625, 0.58740234375, 0.66302490234375, 0.7386474609375, 0.81427001953125, 0.889892578125, 0.96551513671875, 1.0411376953125, 1.11676025390625, 1.1923828125, 1.26800537109375, 1.3436279296875, 1.41925048828125, 1.494873046875, 1.57049560546875, 1.6461181640625, 1.72174072265625, 1.79736328125, 1.87298583984375, 1.9486083984375, 2.02423095703125, 2.099853515625, 2.17547607421875, 2.2510986328125, 2.32672119140625, 2.40234375]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 6.0, 1.0, 7.0, 7.0, 7.0, 7.0, 12.0, 19.0, 20.0, 29.0, 38.0, 31.0, 59.0, 53.0, 70.0, 92.0, 84.0, 87.0, 71.0, 60.0, 44.0, 37.0, 31.0, 27.0, 20.0, 15.0, 17.0, 12.0, 8.0, 9.0, 5.0, 4.0, 4.0, 1.0, 4.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-2.3515625, -2.286773681640625, -2.22198486328125, -2.157196044921875, -2.0924072265625, -2.027618408203125, -1.96282958984375, -1.898040771484375, -1.833251953125, -1.768463134765625, -1.70367431640625, -1.638885498046875, -1.5740966796875, -1.509307861328125, -1.44451904296875, -1.379730224609375, -1.31494140625, -1.250152587890625, -1.18536376953125, -1.120574951171875, -1.0557861328125, -0.990997314453125, -0.92620849609375, -0.861419677734375, -0.796630859375, -0.731842041015625, -0.66705322265625, -0.602264404296875, -0.5374755859375, -0.472686767578125, -0.40789794921875, -0.343109130859375, -0.2783203125, -0.213531494140625, -0.14874267578125, -0.083953857421875, -0.0191650390625, 0.045623779296875, 0.11041259765625, 0.175201416015625, 0.239990234375, 0.304779052734375, 0.36956787109375, 0.434356689453125, 0.4991455078125, 0.563934326171875, 0.62872314453125, 0.693511962890625, 0.75830078125, 0.823089599609375, 0.88787841796875, 0.952667236328125, 1.0174560546875, 1.082244873046875, 1.14703369140625, 1.211822509765625, 1.276611328125, 1.341400146484375, 1.40618896484375, 1.470977783203125, 1.5357666015625, 1.600555419921875, 1.66534423828125, 1.730133056640625, 1.794921875]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 7.0, 6.0, 11.0, 26.0, 55.0, 83.0, 135.0, 241.0, 175.0, 108.0, 71.0, 37.0, 25.0, 13.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-49.515403747558594, -47.71651077270508, -45.91761779785156, -44.11872100830078, -42.319828033447266, -40.52093505859375, -38.722042083740234, -36.92314910888672, -35.12425231933594, -33.32535934448242, -31.526464462280273, -29.727571487426758, -27.92867660522461, -26.129783630371094, -24.330890655517578, -22.53199577331543, -20.733102798461914, -18.9342098236084, -17.13531494140625, -15.336421966552734, -13.537527084350586, -11.73863410949707, -9.939740180969238, -8.140846252441406, -6.341952323913574, -4.543058395385742, -2.7441647052764893, -0.9452710151672363, 0.8536229133605957, 2.6525163650512695, 4.451410293579102, 6.250304222106934, 8.049198150634766, 9.848092079162598, 11.64698600769043, 13.445878982543945, 15.244773864746094, 17.04366683959961, 18.842559814453125, 20.641454696655273, 22.440349578857422, 24.239242553710938, 26.038137435913086, 27.8370304107666, 29.63592529296875, 31.434818267822266, 33.23371124267578, 35.03260803222656, 36.83149719238281, 38.63039016723633, 40.429283142089844, 42.228179931640625, 44.02707290649414, 45.825965881347656, 47.62485885620117, 49.42375183105469, 51.22264862060547, 53.021541595458984, 54.8204345703125, 56.61933135986328, 58.4182243347168, 60.21711730957031, 62.01601028442383, 63.814903259277344, 65.61380004882812]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 7.0, 5.0, 5.0, 7.0, 10.0, 14.0, 9.0, 20.0, 18.0, 18.0, 18.0, 23.0, 27.0, 36.0, 24.0, 39.0, 47.0, 58.0, 71.0, 76.0, 77.0, 54.0, 48.0, 38.0, 39.0, 30.0, 37.0, 26.0, 16.0, 17.0, 15.0, 18.0, 13.0, 9.0, 9.0, 6.0, 7.0, 8.0, 4.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-35.38645935058594, -34.184654235839844, -32.982845306396484, -31.78104019165039, -30.579235076904297, -29.37742805480957, -28.175621032714844, -26.97381591796875, -25.772008895874023, -24.570201873779297, -23.368396759033203, -22.166589736938477, -20.96478271484375, -19.762977600097656, -18.56117057800293, -17.359363555908203, -16.15755844116211, -14.9557523727417, -13.753946304321289, -12.552139282226562, -11.350333213806152, -10.148527145385742, -8.946720123291016, -7.7449140548706055, -6.543107986450195, -5.341301918029785, -4.139495372772217, -2.9376890659332275, -1.7358827590942383, -0.5340766906738281, 0.6677298545837402, 1.8695363998413086, 3.0713424682617188, 4.273148536682129, 5.474955081939697, 6.676761627197266, 7.878567695617676, 9.080373764038086, 10.282180786132812, 11.483986854553223, 12.685792922973633, 13.887598991394043, 15.089405059814453, 16.29121208190918, 17.493019104003906, 18.69482421875, 19.896631240844727, 21.098438262939453, 22.300243377685547, 23.502050399780273, 24.703855514526367, 25.905662536621094, 27.107467651367188, 28.309274673461914, 29.51108169555664, 30.712886810302734, 31.91469383239746, 33.11650085449219, 34.31830596923828, 35.520111083984375, 36.721920013427734, 37.92372512817383, 39.12553024291992, 40.32733917236328, 41.529144287109375]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 7.0, 5.0, 7.0, 17.0, 26.0, 26.0, 32.0, 51.0, 73.0, 127.0, 187.0, 334.0, 541.0, 958.0, 1804.0, 3450.0, 6995.0, 14549.0, 34507.0, 96531.0, 324965.0, 1147290.0, 1697423.0, 597470.0, 168106.0, 56030.0, 22292.0, 10041.0, 4768.0, 2518.0, 1323.0, 761.0, 384.0, 268.0, 136.0, 85.0, 52.0, 44.0, 26.0, 18.0, 16.0, 8.0, 11.0, 6.0, 5.0, 2.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.919921875, -1.8603668212890625, -1.800811767578125, -1.7412567138671875, -1.68170166015625, -1.6221466064453125, -1.562591552734375, -1.5030364990234375, -1.4434814453125, -1.3839263916015625, -1.324371337890625, -1.2648162841796875, -1.20526123046875, -1.1457061767578125, -1.086151123046875, -1.0265960693359375, -0.967041015625, -0.9074859619140625, -0.847930908203125, -0.7883758544921875, -0.72882080078125, -0.6692657470703125, -0.609710693359375, -0.5501556396484375, -0.4906005859375, -0.4310455322265625, -0.371490478515625, -0.3119354248046875, -0.25238037109375, -0.1928253173828125, -0.133270263671875, -0.0737152099609375, -0.01416015625, 0.0453948974609375, 0.104949951171875, 0.1645050048828125, 0.22406005859375, 0.2836151123046875, 0.343170166015625, 0.4027252197265625, 0.4622802734375, 0.5218353271484375, 0.581390380859375, 0.6409454345703125, 0.70050048828125, 0.7600555419921875, 0.819610595703125, 0.8791656494140625, 0.938720703125, 0.9982757568359375, 1.057830810546875, 1.1173858642578125, 1.17694091796875, 1.2364959716796875, 1.296051025390625, 1.3556060791015625, 1.4151611328125, 1.4747161865234375, 1.534271240234375, 1.5938262939453125, 1.65338134765625, 1.7129364013671875, 1.772491455078125, 1.8320465087890625, 1.8916015625]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 7.0, 8.0, 6.0, 7.0, 5.0, 11.0, 11.0, 18.0, 20.0, 26.0, 33.0, 30.0, 44.0, 42.0, 36.0, 33.0, 49.0, 48.0, 50.0, 55.0, 45.0, 59.0, 41.0, 41.0, 34.0, 38.0, 34.0, 26.0, 29.0, 26.0, 23.0, 13.0, 15.0, 9.0, 9.0, 6.0, 8.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.525390625, -2.451385498046875, -2.37738037109375, -2.303375244140625, -2.2293701171875, -2.155364990234375, -2.08135986328125, -2.007354736328125, -1.933349609375, -1.859344482421875, -1.78533935546875, -1.711334228515625, -1.6373291015625, -1.563323974609375, -1.48931884765625, -1.415313720703125, -1.34130859375, -1.267303466796875, -1.19329833984375, -1.119293212890625, -1.0452880859375, -0.971282958984375, -0.89727783203125, -0.823272705078125, -0.749267578125, -0.675262451171875, -0.60125732421875, -0.527252197265625, -0.4532470703125, -0.379241943359375, -0.30523681640625, -0.231231689453125, -0.1572265625, -0.083221435546875, -0.00921630859375, 0.064788818359375, 0.1387939453125, 0.212799072265625, 0.28680419921875, 0.360809326171875, 0.434814453125, 0.508819580078125, 0.58282470703125, 0.656829833984375, 0.7308349609375, 0.804840087890625, 0.87884521484375, 0.952850341796875, 1.02685546875, 1.100860595703125, 1.17486572265625, 1.248870849609375, 1.3228759765625, 1.396881103515625, 1.47088623046875, 1.544891357421875, 1.618896484375, 1.692901611328125, 1.76690673828125, 1.840911865234375, 1.9149169921875, 1.988922119140625, 2.06292724609375, 2.136932373046875, 2.2109375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 2.0, 8.0, 2.0, 7.0, 7.0, 20.0, 26.0, 30.0, 58.0, 60.0, 121.0, 158.0, 262.0, 571.0, 1409.0, 3625.0, 12009.0, 56665.0, 554888.0, 3317659.0, 210244.0, 27012.0, 5878.0, 1930.0, 747.0, 399.0, 200.0, 110.0, 61.0, 34.0, 24.0, 23.0, 8.0, 6.0, 7.0, 7.0, 3.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0], "bins": [-6.80078125, -6.632171630859375, -6.46356201171875, -6.294952392578125, -6.1263427734375, -5.957733154296875, -5.78912353515625, -5.620513916015625, -5.451904296875, -5.283294677734375, -5.11468505859375, -4.946075439453125, -4.7774658203125, -4.608856201171875, -4.44024658203125, -4.271636962890625, -4.10302734375, -3.934417724609375, -3.76580810546875, -3.597198486328125, -3.4285888671875, -3.259979248046875, -3.09136962890625, -2.922760009765625, -2.754150390625, -2.585540771484375, -2.41693115234375, -2.248321533203125, -2.0797119140625, -1.911102294921875, -1.74249267578125, -1.573883056640625, -1.4052734375, -1.236663818359375, -1.06805419921875, -0.899444580078125, -0.7308349609375, -0.562225341796875, -0.39361572265625, -0.225006103515625, -0.056396484375, 0.112213134765625, 0.28082275390625, 0.449432373046875, 0.6180419921875, 0.786651611328125, 0.95526123046875, 1.123870849609375, 1.29248046875, 1.461090087890625, 1.62969970703125, 1.798309326171875, 1.9669189453125, 2.135528564453125, 2.30413818359375, 2.472747802734375, 2.641357421875, 2.809967041015625, 2.97857666015625, 3.147186279296875, 3.3157958984375, 3.484405517578125, 3.65301513671875, 3.821624755859375, 3.990234375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 6.0, 2.0, 4.0, 9.0, 7.0, 15.0, 21.0, 27.0, 41.0, 69.0, 111.0, 166.0, 378.0, 821.0, 1126.0, 663.0, 272.0, 157.0, 70.0, 44.0, 25.0, 28.0, 10.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-10.859375, -10.596435546875, -10.33349609375, -10.070556640625, -9.8076171875, -9.544677734375, -9.28173828125, -9.018798828125, -8.755859375, -8.492919921875, -8.22998046875, -7.967041015625, -7.7041015625, -7.441162109375, -7.17822265625, -6.915283203125, -6.65234375, -6.389404296875, -6.12646484375, -5.863525390625, -5.6005859375, -5.337646484375, -5.07470703125, -4.811767578125, -4.548828125, -4.285888671875, -4.02294921875, -3.760009765625, -3.4970703125, -3.234130859375, -2.97119140625, -2.708251953125, -2.4453125, -2.182373046875, -1.91943359375, -1.656494140625, -1.3935546875, -1.130615234375, -0.86767578125, -0.604736328125, -0.341796875, -0.078857421875, 0.18408203125, 0.447021484375, 0.7099609375, 0.972900390625, 1.23583984375, 1.498779296875, 1.76171875, 2.024658203125, 2.28759765625, 2.550537109375, 2.8134765625, 3.076416015625, 3.33935546875, 3.602294921875, 3.865234375, 4.128173828125, 4.39111328125, 4.654052734375, 4.9169921875, 5.179931640625, 5.44287109375, 5.705810546875, 5.96875]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 5.0, 3.0, 6.0, 13.0, 24.0, 93.0, 194.0, 312.0, 200.0, 96.0, 30.0, 15.0, 4.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.0848388671875, -62.64365768432617, -60.202476501464844, -57.76129913330078, -55.32011795043945, -52.878936767578125, -50.43775939941406, -47.996578216552734, -45.555397033691406, -43.11421585083008, -40.67303466796875, -38.23185729980469, -35.79067611694336, -33.34949493408203, -30.908315658569336, -28.46713638305664, -26.025955200195312, -23.584774017333984, -21.14359474182129, -18.702415466308594, -16.261234283447266, -13.820054054260254, -11.378873825073242, -8.937694549560547, -6.496513366699219, -4.055333137512207, -1.6141529083251953, 0.8270273208618164, 3.268207550048828, 5.70938777923584, 8.150568008422852, 10.591747283935547, 13.032928466796875, 15.474108695983887, 17.9152889251709, 20.356468200683594, 22.797649383544922, 25.23883056640625, 27.680009841918945, 30.12118911743164, 32.56237030029297, 35.0035514831543, 37.444732666015625, 39.88591003417969, 42.327091217041016, 44.768272399902344, 47.209449768066406, 49.650630950927734, 52.09181213378906, 54.53299331665039, 56.97417449951172, 59.41535186767578, 61.85653305053711, 64.29771423339844, 66.7388916015625, 69.18006896972656, 71.62125396728516, 74.06243133544922, 76.50361633300781, 78.94479370117188, 81.38597106933594, 83.82715606689453, 86.2683334350586, 88.70951843261719, 91.15069580078125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 7.0, 4.0, 4.0, 6.0, 4.0, 7.0, 11.0, 9.0, 9.0, 20.0, 19.0, 23.0, 18.0, 27.0, 28.0, 30.0, 28.0, 50.0, 43.0, 39.0, 44.0, 41.0, 41.0, 43.0, 44.0, 41.0, 31.0, 36.0, 35.0, 33.0, 29.0, 24.0, 29.0, 22.0, 25.0, 21.0, 18.0, 12.0, 13.0, 11.0, 5.0, 1.0, 2.0, 4.0, 2.0, 7.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-22.15709686279297, -21.41603660583496, -20.674978256225586, -19.933917999267578, -19.192859649658203, -18.451799392700195, -17.710739135742188, -16.969680786132812, -16.228620529174805, -15.487561225891113, -14.746501922607422, -14.005441665649414, -13.264382362365723, -12.523323059082031, -11.782262802124023, -11.041203498840332, -10.30014419555664, -9.55908489227295, -8.818025588989258, -8.07696533203125, -7.335906028747559, -6.594846725463867, -5.853786945343018, -5.112727165222168, -4.371667861938477, -3.630608320236206, -2.8895487785339355, -2.148489236831665, -1.4074296951293945, -0.666370153427124, 0.07468938827514648, 0.8157491683959961, 1.5568084716796875, 2.297868013381958, 3.0389275550842285, 3.779987096786499, 4.5210466384887695, 5.262105941772461, 6.0031657218933105, 6.74422550201416, 7.485284805297852, 8.226344108581543, 8.967403411865234, 9.708463668823242, 10.449522972106934, 11.190582275390625, 11.931642532348633, 12.672701835632324, 13.413761138916016, 14.154820442199707, 14.895879745483398, 15.636940002441406, 16.37799835205078, 17.11905860900879, 17.860118865966797, 18.601177215576172, 19.34223747253418, 20.083297729492188, 20.824356079101562, 21.56541633605957, 22.306476593017578, 23.047534942626953, 23.78859519958496, 24.52965545654297, 25.270713806152344]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 1.0, 6.0, 1.0, 2.0, 11.0, 9.0, 29.0, 31.0, 53.0, 77.0, 104.0, 170.0, 249.0, 345.0, 526.0, 859.0, 1366.0, 2251.0, 3927.0, 6365.0, 11472.0, 21189.0, 40431.0, 75584.0, 144374.0, 238038.0, 224963.0, 129172.0, 67445.0, 35161.0, 19248.0, 10336.0, 5715.0, 3461.0, 1977.0, 1267.0, 782.0, 527.0, 358.0, 227.0, 164.0, 87.0, 64.0, 50.0, 33.0, 22.0, 13.0, 7.0, 8.0, 7.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.52734375, -2.442840576171875, -2.35833740234375, -2.273834228515625, -2.1893310546875, -2.104827880859375, -2.02032470703125, -1.935821533203125, -1.851318359375, -1.766815185546875, -1.68231201171875, -1.597808837890625, -1.5133056640625, -1.428802490234375, -1.34429931640625, -1.259796142578125, -1.17529296875, -1.090789794921875, -1.00628662109375, -0.921783447265625, -0.8372802734375, -0.752777099609375, -0.66827392578125, -0.583770751953125, -0.499267578125, -0.414764404296875, -0.33026123046875, -0.245758056640625, -0.1612548828125, -0.076751708984375, 0.00775146484375, 0.092254638671875, 0.1767578125, 0.261260986328125, 0.34576416015625, 0.430267333984375, 0.5147705078125, 0.599273681640625, 0.68377685546875, 0.768280029296875, 0.852783203125, 0.937286376953125, 1.02178955078125, 1.106292724609375, 1.1907958984375, 1.275299072265625, 1.35980224609375, 1.444305419921875, 1.52880859375, 1.613311767578125, 1.69781494140625, 1.782318115234375, 1.8668212890625, 1.951324462890625, 2.03582763671875, 2.120330810546875, 2.204833984375, 2.289337158203125, 2.37384033203125, 2.458343505859375, 2.5428466796875, 2.627349853515625, 2.71185302734375, 2.796356201171875, 2.880859375]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 2.0, 3.0, 7.0, 4.0, 1.0, 6.0, 7.0, 9.0, 18.0, 22.0, 24.0, 20.0, 22.0, 29.0, 30.0, 36.0, 37.0, 36.0, 43.0, 42.0, 40.0, 55.0, 46.0, 49.0, 57.0, 45.0, 27.0, 37.0, 35.0, 30.0, 25.0, 23.0, 36.0, 22.0, 18.0, 12.0, 15.0, 6.0, 11.0, 7.0, 4.0, 4.0, 3.0, 1.0, 0.0, 4.0, 3.0, 3.0], "bins": [-2.63671875, -2.567718505859375, -2.49871826171875, -2.429718017578125, -2.3607177734375, -2.291717529296875, -2.22271728515625, -2.153717041015625, -2.084716796875, -2.015716552734375, -1.94671630859375, -1.877716064453125, -1.8087158203125, -1.739715576171875, -1.67071533203125, -1.601715087890625, -1.53271484375, -1.463714599609375, -1.39471435546875, -1.325714111328125, -1.2567138671875, -1.187713623046875, -1.11871337890625, -1.049713134765625, -0.980712890625, -0.911712646484375, -0.84271240234375, -0.773712158203125, -0.7047119140625, -0.635711669921875, -0.56671142578125, -0.497711181640625, -0.4287109375, -0.359710693359375, -0.29071044921875, -0.221710205078125, -0.1527099609375, -0.083709716796875, -0.01470947265625, 0.054290771484375, 0.123291015625, 0.192291259765625, 0.26129150390625, 0.330291748046875, 0.3992919921875, 0.468292236328125, 0.53729248046875, 0.606292724609375, 0.67529296875, 0.744293212890625, 0.81329345703125, 0.882293701171875, 0.9512939453125, 1.020294189453125, 1.08929443359375, 1.158294677734375, 1.227294921875, 1.296295166015625, 1.36529541015625, 1.434295654296875, 1.5032958984375, 1.572296142578125, 1.64129638671875, 1.710296630859375, 1.779296875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 10.0, 4.0, 19.0, 14.0, 27.0, 28.0, 26.0, 61.0, 100.0, 107.0, 176.0, 228.0, 381.0, 539.0, 929.0, 1710.0, 3896.0, 12626.0, 65361.0, 570371.0, 326689.0, 48043.0, 9738.0, 3385.0, 1501.0, 817.0, 557.0, 330.0, 238.0, 176.0, 115.0, 98.0, 67.0, 43.0, 32.0, 25.0, 20.0, 17.0, 15.0, 6.0, 9.0, 4.0, 6.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.3828125, -7.15850830078125, -6.9342041015625, -6.70989990234375, -6.485595703125, -6.26129150390625, -6.0369873046875, -5.81268310546875, -5.58837890625, -5.36407470703125, -5.1397705078125, -4.91546630859375, -4.691162109375, -4.46685791015625, -4.2425537109375, -4.01824951171875, -3.7939453125, -3.56964111328125, -3.3453369140625, -3.12103271484375, -2.896728515625, -2.67242431640625, -2.4481201171875, -2.22381591796875, -1.99951171875, -1.77520751953125, -1.5509033203125, -1.32659912109375, -1.102294921875, -0.87799072265625, -0.6536865234375, -0.42938232421875, -0.205078125, 0.01922607421875, 0.2435302734375, 0.46783447265625, 0.692138671875, 0.91644287109375, 1.1407470703125, 1.36505126953125, 1.58935546875, 1.81365966796875, 2.0379638671875, 2.26226806640625, 2.486572265625, 2.71087646484375, 2.9351806640625, 3.15948486328125, 3.3837890625, 3.60809326171875, 3.8323974609375, 4.05670166015625, 4.281005859375, 4.50531005859375, 4.7296142578125, 4.95391845703125, 5.17822265625, 5.40252685546875, 5.6268310546875, 5.85113525390625, 6.075439453125, 6.29974365234375, 6.5240478515625, 6.74835205078125, 6.97265625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 7.0, 4.0, 5.0, 12.0, 9.0, 16.0, 21.0, 24.0, 35.0, 26.0, 38.0, 34.0, 61.0, 42.0, 42.0, 47.0, 57.0, 55.0, 55.0, 64.0, 64.0, 37.0, 39.0, 37.0, 16.0, 28.0, 26.0, 16.0, 24.0, 16.0, 11.0, 9.0, 10.0, 7.0, 8.0, 3.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.13671875, -6.84820556640625, -6.5596923828125, -6.27117919921875, -5.982666015625, -5.69415283203125, -5.4056396484375, -5.11712646484375, -4.82861328125, -4.54010009765625, -4.2515869140625, -3.96307373046875, -3.674560546875, -3.38604736328125, -3.0975341796875, -2.80902099609375, -2.5205078125, -2.23199462890625, -1.9434814453125, -1.65496826171875, -1.366455078125, -1.07794189453125, -0.7894287109375, -0.50091552734375, -0.21240234375, 0.07611083984375, 0.3646240234375, 0.65313720703125, 0.941650390625, 1.23016357421875, 1.5186767578125, 1.80718994140625, 2.095703125, 2.38421630859375, 2.6727294921875, 2.96124267578125, 3.249755859375, 3.53826904296875, 3.8267822265625, 4.11529541015625, 4.40380859375, 4.69232177734375, 4.9808349609375, 5.26934814453125, 5.557861328125, 5.84637451171875, 6.1348876953125, 6.42340087890625, 6.7119140625, 7.00042724609375, 7.2889404296875, 7.57745361328125, 7.865966796875, 8.15447998046875, 8.4429931640625, 8.73150634765625, 9.02001953125, 9.30853271484375, 9.5970458984375, 9.88555908203125, 10.174072265625, 10.46258544921875, 10.7510986328125, 11.03961181640625, 11.328125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 1.0, 5.0, 6.0, 7.0, 14.0, 35.0, 53.0, 101.0, 279.0, 881.0, 4456.0, 72147.0, 896729.0, 68151.0, 4359.0, 858.0, 240.0, 116.0, 51.0, 23.0, 20.0, 5.0, 3.0, 8.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8203125, -2.6890869140625, -2.557861328125, -2.4266357421875, -2.29541015625, -2.1641845703125, -2.032958984375, -1.9017333984375, -1.7705078125, -1.6392822265625, -1.508056640625, -1.3768310546875, -1.24560546875, -1.1143798828125, -0.983154296875, -0.8519287109375, -0.720703125, -0.5894775390625, -0.458251953125, -0.3270263671875, -0.19580078125, -0.0645751953125, 0.066650390625, 0.1978759765625, 0.3291015625, 0.4603271484375, 0.591552734375, 0.7227783203125, 0.85400390625, 0.9852294921875, 1.116455078125, 1.2476806640625, 1.37890625, 1.5101318359375, 1.641357421875, 1.7725830078125, 1.90380859375, 2.0350341796875, 2.166259765625, 2.2974853515625, 2.4287109375, 2.5599365234375, 2.691162109375, 2.8223876953125, 2.95361328125, 3.0848388671875, 3.216064453125, 3.3472900390625, 3.478515625, 3.6097412109375, 3.740966796875, 3.8721923828125, 4.00341796875, 4.1346435546875, 4.265869140625, 4.3970947265625, 4.5283203125, 4.6595458984375, 4.790771484375, 4.9219970703125, 5.05322265625, 5.1844482421875, 5.315673828125, 5.4468994140625, 5.578125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 6.0, 3.0, 4.0, 6.0, 7.0, 8.0, 13.0, 16.0, 13.0, 17.0, 21.0, 36.0, 36.0, 53.0, 51.0, 71.0, 74.0, 76.0, 62.0, 91.0, 65.0, 60.0, 43.0, 43.0, 30.0, 24.0, 10.0, 16.0, 14.0, 8.0, 5.0, 9.0, 4.0, 3.0, 2.0, 3.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0006165504455566406, -0.0005993470549583435, -0.0005821436643600464, -0.0005649402737617493, -0.0005477368831634521, -0.000530533492565155, -0.0005133301019668579, -0.0004961267113685608, -0.00047892332077026367, -0.00046171993017196655, -0.00044451653957366943, -0.0004273131489753723, -0.0004101097583770752, -0.0003929063677787781, -0.00037570297718048096, -0.00035849958658218384, -0.0003412961959838867, -0.0003240928053855896, -0.0003068894147872925, -0.00028968602418899536, -0.00027248263359069824, -0.0002552792429924011, -0.000238075852394104, -0.00022087246179580688, -0.00020366907119750977, -0.00018646568059921265, -0.00016926229000091553, -0.0001520588994026184, -0.0001348555088043213, -0.00011765211820602417, -0.00010044872760772705, -8.324533700942993e-05, -6.604194641113281e-05, -4.883855581283569e-05, -3.1635165214538574e-05, -1.4431774616241455e-05, 2.771615982055664e-06, 1.9975006580352783e-05, 3.71783971786499e-05, 5.438178777694702e-05, 7.158517837524414e-05, 8.878856897354126e-05, 0.00010599195957183838, 0.0001231953501701355, 0.00014039874076843262, 0.00015760213136672974, 0.00017480552196502686, 0.00019200891256332397, 0.0002092123031616211, 0.0002264156937599182, 0.00024361908435821533, 0.00026082247495651245, 0.00027802586555480957, 0.0002952292561531067, 0.0003124326467514038, 0.00032963603734970093, 0.00034683942794799805, 0.00036404281854629517, 0.0003812462091445923, 0.0003984495997428894, 0.0004156529903411865, 0.00043285638093948364, 0.00045005977153778076, 0.0004672631621360779, 0.000484466552734375]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 4.0, 3.0, 5.0, 1.0, 8.0, 17.0, 11.0, 20.0, 33.0, 51.0, 94.0, 135.0, 241.0, 413.0, 788.0, 1548.0, 3780.0, 12479.0, 71792.0, 596433.0, 303248.0, 42882.0, 8724.0, 3041.0, 1268.0, 656.0, 345.0, 193.0, 127.0, 78.0, 53.0, 37.0, 24.0, 15.0, 8.0, 4.0, 5.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.34765625, -2.2856597900390625, -2.223663330078125, -2.1616668701171875, -2.09967041015625, -2.0376739501953125, -1.975677490234375, -1.9136810302734375, -1.8516845703125, -1.7896881103515625, -1.727691650390625, -1.6656951904296875, -1.60369873046875, -1.5417022705078125, -1.479705810546875, -1.4177093505859375, -1.355712890625, -1.2937164306640625, -1.231719970703125, -1.1697235107421875, -1.10772705078125, -1.0457305908203125, -0.983734130859375, -0.9217376708984375, -0.8597412109375, -0.7977447509765625, -0.735748291015625, -0.6737518310546875, -0.61175537109375, -0.5497589111328125, -0.487762451171875, -0.4257659912109375, -0.36376953125, -0.3017730712890625, -0.239776611328125, -0.1777801513671875, -0.11578369140625, -0.0537872314453125, 0.008209228515625, 0.0702056884765625, 0.1322021484375, 0.1941986083984375, 0.256195068359375, 0.3181915283203125, 0.38018798828125, 0.4421844482421875, 0.504180908203125, 0.5661773681640625, 0.628173828125, 0.6901702880859375, 0.752166748046875, 0.8141632080078125, 0.87615966796875, 0.9381561279296875, 1.000152587890625, 1.0621490478515625, 1.1241455078125, 1.1861419677734375, 1.248138427734375, 1.3101348876953125, 1.37213134765625, 1.4341278076171875, 1.496124267578125, 1.5581207275390625, 1.6201171875]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 6.0, 2.0, 2.0, 5.0, 6.0, 5.0, 12.0, 5.0, 12.0, 18.0, 18.0, 8.0, 29.0, 25.0, 37.0, 35.0, 39.0, 38.0, 49.0, 57.0, 53.0, 49.0, 48.0, 46.0, 56.0, 54.0, 34.0, 37.0, 26.0, 37.0, 26.0, 30.0, 24.0, 14.0, 15.0, 7.0, 14.0, 8.0, 7.0, 6.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.3359375, -1.29400634765625, -1.2520751953125, -1.21014404296875, -1.168212890625, -1.12628173828125, -1.0843505859375, -1.04241943359375, -1.00048828125, -0.95855712890625, -0.9166259765625, -0.87469482421875, -0.832763671875, -0.79083251953125, -0.7489013671875, -0.70697021484375, -0.6650390625, -0.62310791015625, -0.5811767578125, -0.53924560546875, -0.497314453125, -0.45538330078125, -0.4134521484375, -0.37152099609375, -0.32958984375, -0.28765869140625, -0.2457275390625, -0.20379638671875, -0.161865234375, -0.11993408203125, -0.0780029296875, -0.03607177734375, 0.005859375, 0.04779052734375, 0.0897216796875, 0.13165283203125, 0.173583984375, 0.21551513671875, 0.2574462890625, 0.29937744140625, 0.34130859375, 0.38323974609375, 0.4251708984375, 0.46710205078125, 0.509033203125, 0.55096435546875, 0.5928955078125, 0.63482666015625, 0.6767578125, 0.71868896484375, 0.7606201171875, 0.80255126953125, 0.844482421875, 0.88641357421875, 0.9283447265625, 0.97027587890625, 1.01220703125, 1.05413818359375, 1.0960693359375, 1.13800048828125, 1.179931640625, 1.22186279296875, 1.2637939453125, 1.30572509765625, 1.34765625]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 25.0, 77.0, 284.0, 402.0, 148.0, 33.0, 12.0, 9.0, 2.0, 0.0, 4.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.11856842041016, -80.77030944824219, -77.42205047607422, -74.07379150390625, -70.72554016113281, -67.37728118896484, -64.02902221679688, -60.680763244628906, -57.3325080871582, -53.984249114990234, -50.63599395751953, -47.28773498535156, -43.939476013183594, -40.59122085571289, -37.24296188354492, -33.89470672607422, -30.54644775390625, -27.198190689086914, -23.849933624267578, -20.50167465209961, -17.153417587280273, -13.805160522460938, -10.456901550292969, -7.108644485473633, -3.760387420654297, -0.41212987899780273, 2.9361276626586914, 6.284385681152344, 9.63264274597168, 12.980899810791016, 16.329158782958984, 19.67741584777832, 23.025680541992188, 26.373937606811523, 29.72219467163086, 33.07045364379883, 36.41870880126953, 39.7669677734375, 43.11522674560547, 46.46348571777344, 49.81174087524414, 53.15999984741211, 56.50825500488281, 59.85651397705078, 63.20477294921875, 66.55302429199219, 69.90129089355469, 73.24954223632812, 76.5978012084961, 79.94606018066406, 83.29431915283203, 86.642578125, 89.99082946777344, 93.3390884399414, 96.68734741210938, 100.03560638427734, 103.38386535644531, 106.73212432861328, 110.08038330078125, 113.42863464355469, 116.77689361572266, 120.12515258789062, 123.4734115600586, 126.82167053222656, 130.169921875]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 5.0, 4.0, 4.0, 6.0, 11.0, 11.0, 13.0, 8.0, 24.0, 17.0, 21.0, 23.0, 21.0, 31.0, 26.0, 28.0, 49.0, 45.0, 63.0, 70.0, 74.0, 61.0, 57.0, 31.0, 27.0, 37.0, 33.0, 27.0, 30.0, 29.0, 20.0, 22.0, 13.0, 9.0, 12.0, 8.0, 8.0, 6.0, 8.0, 4.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.632476806640625, -29.653396606445312, -28.674314498901367, -27.695232391357422, -26.71615219116211, -25.737071990966797, -24.75798988342285, -23.778907775878906, -22.799827575683594, -21.82074737548828, -20.841665267944336, -19.86258316040039, -18.883502960205078, -17.904422760009766, -16.92534065246582, -15.946259498596191, -14.967178344726562, -13.988097190856934, -13.009016036987305, -12.029934883117676, -11.050853729248047, -10.071772575378418, -9.092691421508789, -8.11361026763916, -7.134529113769531, -6.155447959899902, -5.176366806030273, -4.1972856521606445, -3.2182044982910156, -2.2391233444213867, -1.2600421905517578, -0.2809610366821289, 0.6981201171875, 1.677201271057129, 2.656282424926758, 3.6353635787963867, 4.614444732666016, 5.5935258865356445, 6.572607040405273, 7.551688194274902, 8.530769348144531, 9.50985050201416, 10.488931655883789, 11.468012809753418, 12.447093963623047, 13.426175117492676, 14.405256271362305, 15.384337425231934, 16.363418579101562, 17.342498779296875, 18.32158088684082, 19.300662994384766, 20.279743194580078, 21.25882339477539, 22.237905502319336, 23.21698760986328, 24.196067810058594, 25.175148010253906, 26.15423011779785, 27.133312225341797, 28.11239242553711, 29.091472625732422, 30.070554733276367, 31.049636840820312, 32.028717041015625]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 3.0, 5.0, 3.0, 8.0, 14.0, 15.0, 24.0, 32.0, 53.0, 73.0, 153.0, 255.0, 447.0, 860.0, 1688.0, 3739.0, 9156.0, 27326.0, 112407.0, 593988.0, 2147590.0, 1041733.0, 189736.0, 41608.0, 13214.0, 5259.0, 2308.0, 1159.0, 579.0, 340.0, 184.0, 123.0, 69.0, 43.0, 24.0, 23.0, 10.0, 13.0, 5.0, 7.0, 4.0, 4.0, 4.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.67578125, -1.60784912109375, -1.5399169921875, -1.47198486328125, -1.404052734375, -1.33612060546875, -1.2681884765625, -1.20025634765625, -1.13232421875, -1.06439208984375, -0.9964599609375, -0.92852783203125, -0.860595703125, -0.79266357421875, -0.7247314453125, -0.65679931640625, -0.5888671875, -0.52093505859375, -0.4530029296875, -0.38507080078125, -0.317138671875, -0.24920654296875, -0.1812744140625, -0.11334228515625, -0.04541015625, 0.02252197265625, 0.0904541015625, 0.15838623046875, 0.226318359375, 0.29425048828125, 0.3621826171875, 0.43011474609375, 0.498046875, 0.56597900390625, 0.6339111328125, 0.70184326171875, 0.769775390625, 0.83770751953125, 0.9056396484375, 0.97357177734375, 1.04150390625, 1.10943603515625, 1.1773681640625, 1.24530029296875, 1.313232421875, 1.38116455078125, 1.4490966796875, 1.51702880859375, 1.5849609375, 1.65289306640625, 1.7208251953125, 1.78875732421875, 1.856689453125, 1.92462158203125, 1.9925537109375, 2.06048583984375, 2.12841796875, 2.19635009765625, 2.2642822265625, 2.33221435546875, 2.400146484375, 2.46807861328125, 2.5360107421875, 2.60394287109375, 2.671875]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 2.0, 2.0, 8.0, 10.0, 10.0, 27.0, 14.0, 25.0, 23.0, 34.0, 42.0, 43.0, 41.0, 45.0, 52.0, 49.0, 47.0, 57.0, 57.0, 45.0, 53.0, 45.0, 47.0, 36.0, 33.0, 34.0, 27.0, 20.0, 18.0, 12.0, 9.0, 5.0, 12.0, 10.0, 6.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.54296875, -2.468963623046875, -2.39495849609375, -2.320953369140625, -2.2469482421875, -2.172943115234375, -2.09893798828125, -2.024932861328125, -1.950927734375, -1.876922607421875, -1.80291748046875, -1.728912353515625, -1.6549072265625, -1.580902099609375, -1.50689697265625, -1.432891845703125, -1.35888671875, -1.284881591796875, -1.21087646484375, -1.136871337890625, -1.0628662109375, -0.988861083984375, -0.91485595703125, -0.840850830078125, -0.766845703125, -0.692840576171875, -0.61883544921875, -0.544830322265625, -0.4708251953125, -0.396820068359375, -0.32281494140625, -0.248809814453125, -0.1748046875, -0.100799560546875, -0.02679443359375, 0.047210693359375, 0.1212158203125, 0.195220947265625, 0.26922607421875, 0.343231201171875, 0.417236328125, 0.491241455078125, 0.56524658203125, 0.639251708984375, 0.7132568359375, 0.787261962890625, 0.86126708984375, 0.935272216796875, 1.00927734375, 1.083282470703125, 1.15728759765625, 1.231292724609375, 1.3052978515625, 1.379302978515625, 1.45330810546875, 1.527313232421875, 1.601318359375, 1.675323486328125, 1.74932861328125, 1.823333740234375, 1.8973388671875, 1.971343994140625, 2.04534912109375, 2.119354248046875, 2.193359375]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 0.0, 4.0, 3.0, 8.0, 12.0, 11.0, 23.0, 52.0, 103.0, 239.0, 515.0, 1623.0, 9362.0, 245970.0, 3877120.0, 52687.0, 4470.0, 1154.0, 468.0, 216.0, 113.0, 51.0, 35.0, 17.0, 13.0, 11.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.953125, -9.6712646484375, -9.389404296875, -9.1075439453125, -8.82568359375, -8.5438232421875, -8.261962890625, -7.9801025390625, -7.6982421875, -7.4163818359375, -7.134521484375, -6.8526611328125, -6.57080078125, -6.2889404296875, -6.007080078125, -5.7252197265625, -5.443359375, -5.1614990234375, -4.879638671875, -4.5977783203125, -4.31591796875, -4.0340576171875, -3.752197265625, -3.4703369140625, -3.1884765625, -2.9066162109375, -2.624755859375, -2.3428955078125, -2.06103515625, -1.7791748046875, -1.497314453125, -1.2154541015625, -0.93359375, -0.6517333984375, -0.369873046875, -0.0880126953125, 0.19384765625, 0.4757080078125, 0.757568359375, 1.0394287109375, 1.3212890625, 1.6031494140625, 1.885009765625, 2.1668701171875, 2.44873046875, 2.7305908203125, 3.012451171875, 3.2943115234375, 3.576171875, 3.8580322265625, 4.139892578125, 4.4217529296875, 4.70361328125, 4.9854736328125, 5.267333984375, 5.5491943359375, 5.8310546875, 6.1129150390625, 6.394775390625, 6.6766357421875, 6.95849609375, 7.2403564453125, 7.522216796875, 7.8040771484375, 8.0859375]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 5.0, 5.0, 7.0, 14.0, 18.0, 35.0, 57.0, 118.0, 270.0, 766.0, 1331.0, 840.0, 336.0, 120.0, 58.0, 38.0, 25.0, 11.0, 9.0, 10.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.38671875, -7.12823486328125, -6.8697509765625, -6.61126708984375, -6.352783203125, -6.09429931640625, -5.8358154296875, -5.57733154296875, -5.31884765625, -5.06036376953125, -4.8018798828125, -4.54339599609375, -4.284912109375, -4.02642822265625, -3.7679443359375, -3.50946044921875, -3.2509765625, -2.99249267578125, -2.7340087890625, -2.47552490234375, -2.217041015625, -1.95855712890625, -1.7000732421875, -1.44158935546875, -1.18310546875, -0.92462158203125, -0.6661376953125, -0.40765380859375, -0.149169921875, 0.10931396484375, 0.3677978515625, 0.62628173828125, 0.884765625, 1.14324951171875, 1.4017333984375, 1.66021728515625, 1.918701171875, 2.17718505859375, 2.4356689453125, 2.69415283203125, 2.95263671875, 3.21112060546875, 3.4696044921875, 3.72808837890625, 3.986572265625, 4.24505615234375, 4.5035400390625, 4.76202392578125, 5.0205078125, 5.27899169921875, 5.5374755859375, 5.79595947265625, 6.054443359375, 6.31292724609375, 6.5714111328125, 6.82989501953125, 7.08837890625, 7.34686279296875, 7.6053466796875, 7.86383056640625, 8.122314453125, 8.38079833984375, 8.6392822265625, 8.89776611328125, 9.15625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 9.0, 22.0, 383.0, 507.0, 58.0, 17.0, 4.0, 4.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-92.98286437988281, -87.84374237060547, -82.70462036132812, -77.56549835205078, -72.42637634277344, -67.2872543334961, -62.14813232421875, -57.009010314941406, -51.86988830566406, -46.73076629638672, -41.591644287109375, -36.45252227783203, -31.313400268554688, -26.174278259277344, -21.03515625, -15.896034240722656, -10.756912231445312, -5.617790222167969, -0.478668212890625, 4.660453796386719, 9.799575805664062, 14.938697814941406, 20.07781982421875, 25.216941833496094, 30.356063842773438, 35.49518585205078, 40.634307861328125, 45.77342987060547, 50.91255187988281, 56.051673889160156, 61.1907958984375, 66.32991790771484, 71.46902465820312, 76.60814666748047, 81.74726867675781, 86.88639068603516, 92.0255126953125, 97.16463470458984, 102.30375671386719, 107.44287872314453, 112.58200073242188, 117.72112274169922, 122.86024475097656, 127.9993667602539, 133.13848876953125, 138.27761840820312, 143.41673278808594, 148.55584716796875, 153.69497680664062, 158.8341064453125, 163.9732208251953, 169.11233520507812, 174.25146484375, 179.39059448242188, 184.5297088623047, 189.6688232421875, 194.80795288085938, 199.94708251953125, 205.08619689941406, 210.22531127929688, 215.36444091796875, 220.50357055664062, 225.64268493652344, 230.78179931640625, 235.92092895507812]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 5.0, 3.0, 6.0, 21.0, 18.0, 9.0, 17.0, 29.0, 34.0, 55.0, 58.0, 75.0, 73.0, 92.0, 75.0, 91.0, 80.0, 53.0, 60.0, 46.0, 33.0, 22.0, 16.0, 15.0, 9.0, 7.0, 6.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.274860382080078, -28.04176139831543, -26.80866241455078, -25.575563430786133, -24.342464447021484, -23.10936737060547, -21.87626838684082, -20.643169403076172, -19.410070419311523, -18.176971435546875, -16.943872451782227, -15.710774421691895, -14.477675437927246, -13.244576454162598, -12.011478424072266, -10.778379440307617, -9.545280456542969, -8.31218147277832, -7.07908296585083, -5.84598445892334, -4.612885475158691, -3.379786491394043, -2.1466879844665527, -0.9135894775390625, 0.31950950622558594, 1.5526082515716553, 2.7857069969177246, 4.018805503845215, 5.251904487609863, 6.485003471374512, 7.718101978302002, 8.951200485229492, 10.18429946899414, 11.417398452758789, 12.650497436523438, 13.88359546661377, 15.116694450378418, 16.34979248046875, 17.5828914642334, 18.815990447998047, 20.049089431762695, 21.282188415527344, 22.515287399291992, 23.74838638305664, 24.981483459472656, 26.214584350585938, 27.447681427001953, 28.6807804107666, 29.91387939453125, 31.1469783782959, 32.38007736206055, 33.61317443847656, 34.846275329589844, 36.07937240600586, 37.31247329711914, 38.545570373535156, 39.77867126464844, 41.01176834106445, 42.244869232177734, 43.47796630859375, 44.71106719970703, 45.94416427612305, 47.17726516723633, 48.410362243652344, 49.64345932006836]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 6.0, 4.0, 10.0, 4.0, 20.0, 22.0, 24.0, 57.0, 119.0, 149.0, 251.0, 401.0, 664.0, 1136.0, 2138.0, 3986.0, 8149.0, 16581.0, 35494.0, 78201.0, 163021.0, 260150.0, 234040.0, 130073.0, 59929.0, 27156.0, 13034.0, 6394.0, 3260.0, 1655.0, 1011.0, 549.0, 339.0, 201.0, 120.0, 71.0, 55.0, 36.0, 18.0, 14.0, 15.0, 5.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.080078125, -1.9927978515625, -1.905517578125, -1.8182373046875, -1.73095703125, -1.6436767578125, -1.556396484375, -1.4691162109375, -1.3818359375, -1.2945556640625, -1.207275390625, -1.1199951171875, -1.03271484375, -0.9454345703125, -0.858154296875, -0.7708740234375, -0.68359375, -0.5963134765625, -0.509033203125, -0.4217529296875, -0.33447265625, -0.2471923828125, -0.159912109375, -0.0726318359375, 0.0146484375, 0.1019287109375, 0.189208984375, 0.2764892578125, 0.36376953125, 0.4510498046875, 0.538330078125, 0.6256103515625, 0.712890625, 0.8001708984375, 0.887451171875, 0.9747314453125, 1.06201171875, 1.1492919921875, 1.236572265625, 1.3238525390625, 1.4111328125, 1.4984130859375, 1.585693359375, 1.6729736328125, 1.76025390625, 1.8475341796875, 1.934814453125, 2.0220947265625, 2.109375, 2.1966552734375, 2.283935546875, 2.3712158203125, 2.45849609375, 2.5457763671875, 2.633056640625, 2.7203369140625, 2.8076171875, 2.8948974609375, 2.982177734375, 3.0694580078125, 3.15673828125, 3.2440185546875, 3.331298828125, 3.4185791015625, 3.505859375]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 6.0, 5.0, 6.0, 12.0, 7.0, 14.0, 18.0, 11.0, 16.0, 26.0, 23.0, 37.0, 37.0, 43.0, 39.0, 48.0, 52.0, 47.0, 43.0, 39.0, 48.0, 52.0, 32.0, 43.0, 39.0, 36.0, 38.0, 27.0, 22.0, 28.0, 17.0, 18.0, 15.0, 15.0, 7.0, 14.0, 11.0, 7.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.517578125, -2.448455810546875, -2.37933349609375, -2.310211181640625, -2.2410888671875, -2.171966552734375, -2.10284423828125, -2.033721923828125, -1.964599609375, -1.895477294921875, -1.82635498046875, -1.757232666015625, -1.6881103515625, -1.618988037109375, -1.54986572265625, -1.480743408203125, -1.41162109375, -1.342498779296875, -1.27337646484375, -1.204254150390625, -1.1351318359375, -1.066009521484375, -0.99688720703125, -0.927764892578125, -0.858642578125, -0.789520263671875, -0.72039794921875, -0.651275634765625, -0.5821533203125, -0.513031005859375, -0.44390869140625, -0.374786376953125, -0.3056640625, -0.236541748046875, -0.16741943359375, -0.098297119140625, -0.0291748046875, 0.039947509765625, 0.10906982421875, 0.178192138671875, 0.247314453125, 0.316436767578125, 0.38555908203125, 0.454681396484375, 0.5238037109375, 0.592926025390625, 0.66204833984375, 0.731170654296875, 0.80029296875, 0.869415283203125, 0.93853759765625, 1.007659912109375, 1.0767822265625, 1.145904541015625, 1.21502685546875, 1.284149169921875, 1.353271484375, 1.422393798828125, 1.49151611328125, 1.560638427734375, 1.6297607421875, 1.698883056640625, 1.76800537109375, 1.837127685546875, 1.90625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 1.0, 5.0, 6.0, 6.0, 7.0, 18.0, 22.0, 29.0, 27.0, 38.0, 46.0, 61.0, 105.0, 200.0, 312.0, 673.0, 1589.0, 4740.0, 18763.0, 105875.0, 634696.0, 230904.0, 37720.0, 8288.0, 2395.0, 889.0, 437.0, 228.0, 140.0, 88.0, 63.0, 43.0, 36.0, 27.0, 14.0, 13.0, 13.0, 7.0, 9.0, 3.0, 4.0, 4.0, 5.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.26953125, -6.077880859375, -5.88623046875, -5.694580078125, -5.5029296875, -5.311279296875, -5.11962890625, -4.927978515625, -4.736328125, -4.544677734375, -4.35302734375, -4.161376953125, -3.9697265625, -3.778076171875, -3.58642578125, -3.394775390625, -3.203125, -3.011474609375, -2.81982421875, -2.628173828125, -2.4365234375, -2.244873046875, -2.05322265625, -1.861572265625, -1.669921875, -1.478271484375, -1.28662109375, -1.094970703125, -0.9033203125, -0.711669921875, -0.52001953125, -0.328369140625, -0.13671875, 0.054931640625, 0.24658203125, 0.438232421875, 0.6298828125, 0.821533203125, 1.01318359375, 1.204833984375, 1.396484375, 1.588134765625, 1.77978515625, 1.971435546875, 2.1630859375, 2.354736328125, 2.54638671875, 2.738037109375, 2.9296875, 3.121337890625, 3.31298828125, 3.504638671875, 3.6962890625, 3.887939453125, 4.07958984375, 4.271240234375, 4.462890625, 4.654541015625, 4.84619140625, 5.037841796875, 5.2294921875, 5.421142578125, 5.61279296875, 5.804443359375, 5.99609375]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 10.0, 9.0, 16.0, 17.0, 17.0, 28.0, 48.0, 50.0, 42.0, 51.0, 68.0, 60.0, 74.0, 67.0, 64.0, 72.0, 45.0, 67.0, 35.0, 29.0, 31.0, 26.0, 22.0, 10.0, 14.0, 6.0, 8.0, 2.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.0234375, -10.690185546875, -10.35693359375, -10.023681640625, -9.6904296875, -9.357177734375, -9.02392578125, -8.690673828125, -8.357421875, -8.024169921875, -7.69091796875, -7.357666015625, -7.0244140625, -6.691162109375, -6.35791015625, -6.024658203125, -5.69140625, -5.358154296875, -5.02490234375, -4.691650390625, -4.3583984375, -4.025146484375, -3.69189453125, -3.358642578125, -3.025390625, -2.692138671875, -2.35888671875, -2.025634765625, -1.6923828125, -1.359130859375, -1.02587890625, -0.692626953125, -0.359375, -0.026123046875, 0.30712890625, 0.640380859375, 0.9736328125, 1.306884765625, 1.64013671875, 1.973388671875, 2.306640625, 2.639892578125, 2.97314453125, 3.306396484375, 3.6396484375, 3.972900390625, 4.30615234375, 4.639404296875, 4.97265625, 5.305908203125, 5.63916015625, 5.972412109375, 6.3056640625, 6.638916015625, 6.97216796875, 7.305419921875, 7.638671875, 7.971923828125, 8.30517578125, 8.638427734375, 8.9716796875, 9.304931640625, 9.63818359375, 9.971435546875, 10.3046875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 9.0, 7.0, 7.0, 20.0, 44.0, 61.0, 75.0, 144.0, 264.0, 534.0, 1058.0, 2441.0, 6505.0, 19642.0, 72991.0, 304922.0, 515451.0, 88860.0, 22906.0, 7337.0, 2775.0, 1261.0, 594.0, 257.0, 164.0, 74.0, 49.0, 36.0, 23.0, 19.0, 10.0, 10.0, 3.0, 3.0, 3.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.755859375, -1.704315185546875, -1.65277099609375, -1.601226806640625, -1.5496826171875, -1.498138427734375, -1.44659423828125, -1.395050048828125, -1.343505859375, -1.291961669921875, -1.24041748046875, -1.188873291015625, -1.1373291015625, -1.085784912109375, -1.03424072265625, -0.982696533203125, -0.93115234375, -0.879608154296875, -0.82806396484375, -0.776519775390625, -0.7249755859375, -0.673431396484375, -0.62188720703125, -0.570343017578125, -0.518798828125, -0.467254638671875, -0.41571044921875, -0.364166259765625, -0.3126220703125, -0.261077880859375, -0.20953369140625, -0.157989501953125, -0.1064453125, -0.054901123046875, -0.00335693359375, 0.048187255859375, 0.0997314453125, 0.151275634765625, 0.20281982421875, 0.254364013671875, 0.305908203125, 0.357452392578125, 0.40899658203125, 0.460540771484375, 0.5120849609375, 0.563629150390625, 0.61517333984375, 0.666717529296875, 0.71826171875, 0.769805908203125, 0.82135009765625, 0.872894287109375, 0.9244384765625, 0.975982666015625, 1.02752685546875, 1.079071044921875, 1.130615234375, 1.182159423828125, 1.23370361328125, 1.285247802734375, 1.3367919921875, 1.388336181640625, 1.43988037109375, 1.491424560546875, 1.54296875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 4.0, 3.0, 6.0, 12.0, 9.0, 19.0, 22.0, 45.0, 73.0, 103.0, 156.0, 151.0, 124.0, 103.0, 59.0, 43.0, 21.0, 20.0, 10.0, 7.0, 7.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00115966796875, -0.0011286139488220215, -0.001097559928894043, -0.0010665059089660645, -0.001035451889038086, -0.0010043978691101074, -0.0009733438491821289, -0.0009422898292541504, -0.0009112358093261719, -0.0008801817893981934, -0.0008491277694702148, -0.0008180737495422363, -0.0007870197296142578, -0.0007559657096862793, -0.0007249116897583008, -0.0006938576698303223, -0.0006628036499023438, -0.0006317496299743652, -0.0006006956100463867, -0.0005696415901184082, -0.0005385875701904297, -0.0005075335502624512, -0.00047647953033447266, -0.00044542551040649414, -0.0004143714904785156, -0.0003833174705505371, -0.0003522634506225586, -0.0003212094306945801, -0.00029015541076660156, -0.00025910139083862305, -0.00022804737091064453, -0.00019699335098266602, -0.0001659393310546875, -0.00013488531112670898, -0.00010383129119873047, -7.277727127075195e-05, -4.172325134277344e-05, -1.0669231414794922e-05, 2.0384788513183594e-05, 5.143880844116211e-05, 8.249282836914062e-05, 0.00011354684829711914, 0.00014460086822509766, 0.00017565488815307617, 0.0002067089080810547, 0.0002377629280090332, 0.0002688169479370117, 0.00029987096786499023, 0.00033092498779296875, 0.00036197900772094727, 0.0003930330276489258, 0.0004240870475769043, 0.0004551410675048828, 0.00048619508743286133, 0.0005172491073608398, 0.0005483031272888184, 0.0005793571472167969, 0.0006104111671447754, 0.0006414651870727539, 0.0006725192070007324, 0.0007035732269287109, 0.0007346272468566895, 0.000765681266784668, 0.0007967352867126465, 0.000827789306640625]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 8.0, 6.0, 5.0, 7.0, 14.0, 18.0, 28.0, 53.0, 102.0, 145.0, 234.0, 475.0, 983.0, 2501.0, 7950.0, 30130.0, 144794.0, 614840.0, 191737.0, 39347.0, 9770.0, 3138.0, 1108.0, 524.0, 257.0, 146.0, 74.0, 43.0, 33.0, 34.0, 11.0, 9.0, 11.0, 8.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-2.119140625, -2.0642242431640625, -2.009307861328125, -1.9543914794921875, -1.89947509765625, -1.8445587158203125, -1.789642333984375, -1.7347259521484375, -1.6798095703125, -1.6248931884765625, -1.569976806640625, -1.5150604248046875, -1.46014404296875, -1.4052276611328125, -1.350311279296875, -1.2953948974609375, -1.240478515625, -1.1855621337890625, -1.130645751953125, -1.0757293701171875, -1.02081298828125, -0.9658966064453125, -0.910980224609375, -0.8560638427734375, -0.8011474609375, -0.7462310791015625, -0.691314697265625, -0.6363983154296875, -0.58148193359375, -0.5265655517578125, -0.471649169921875, -0.4167327880859375, -0.36181640625, -0.3069000244140625, -0.251983642578125, -0.1970672607421875, -0.14215087890625, -0.0872344970703125, -0.032318115234375, 0.0225982666015625, 0.0775146484375, 0.1324310302734375, 0.187347412109375, 0.2422637939453125, 0.29718017578125, 0.3520965576171875, 0.407012939453125, 0.4619293212890625, 0.516845703125, 0.5717620849609375, 0.626678466796875, 0.6815948486328125, 0.73651123046875, 0.7914276123046875, 0.846343994140625, 0.9012603759765625, 0.9561767578125, 1.0110931396484375, 1.066009521484375, 1.1209259033203125, 1.17584228515625, 1.2307586669921875, 1.285675048828125, 1.3405914306640625, 1.3955078125]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 17.0, 13.0, 14.0, 24.0, 31.0, 43.0, 51.0, 52.0, 56.0, 80.0, 74.0, 82.0, 87.0, 66.0, 64.0, 44.0, 45.0, 29.0, 26.0, 28.0, 19.0, 16.0, 12.0, 6.0, 2.0, 5.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-2.498046875, -2.43719482421875, -2.3763427734375, -2.31549072265625, -2.254638671875, -2.19378662109375, -2.1329345703125, -2.07208251953125, -2.01123046875, -1.95037841796875, -1.8895263671875, -1.82867431640625, -1.767822265625, -1.70697021484375, -1.6461181640625, -1.58526611328125, -1.5244140625, -1.46356201171875, -1.4027099609375, -1.34185791015625, -1.281005859375, -1.22015380859375, -1.1593017578125, -1.09844970703125, -1.03759765625, -0.97674560546875, -0.9158935546875, -0.85504150390625, -0.794189453125, -0.73333740234375, -0.6724853515625, -0.61163330078125, -0.55078125, -0.48992919921875, -0.4290771484375, -0.36822509765625, -0.307373046875, -0.24652099609375, -0.1856689453125, -0.12481689453125, -0.06396484375, -0.00311279296875, 0.0577392578125, 0.11859130859375, 0.179443359375, 0.24029541015625, 0.3011474609375, 0.36199951171875, 0.4228515625, 0.48370361328125, 0.5445556640625, 0.60540771484375, 0.666259765625, 0.72711181640625, 0.7879638671875, 0.84881591796875, 0.90966796875, 0.97052001953125, 1.0313720703125, 1.09222412109375, 1.153076171875, 1.21392822265625, 1.2747802734375, 1.33563232421875, 1.396484375]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 7.0, 12.0, 17.0, 35.0, 57.0, 123.0, 248.0, 264.0, 105.0, 70.0, 32.0, 20.0, 8.0, 6.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-74.70728302001953, -72.79360961914062, -70.87992858886719, -68.96625518798828, -67.05257415771484, -65.13890075683594, -63.225223541259766, -61.311546325683594, -59.39786911010742, -57.48419189453125, -55.57051467895508, -53.656837463378906, -51.7431640625, -49.82948303222656, -47.915809631347656, -46.002132415771484, -44.08845520019531, -42.17477798461914, -40.26110076904297, -38.3474235534668, -36.433746337890625, -34.52007293701172, -32.60639572143555, -30.692718505859375, -28.779041290283203, -26.86536407470703, -24.95168685913086, -23.03801155090332, -21.12433433532715, -19.210657119750977, -17.296981811523438, -15.383304595947266, -13.469627380371094, -11.555950164794922, -9.642273902893066, -7.728597164154053, -5.814920425415039, -3.901243209838867, -1.9875669479370117, -0.07389068603515625, 1.8397865295410156, 3.7534632682800293, 5.667140007019043, 7.580816745758057, 9.49449348449707, 11.408170700073242, 13.321846961975098, 15.235523223876953, 17.149200439453125, 19.062877655029297, 20.97655487060547, 22.890230178833008, 24.80390739440918, 26.71758460998535, 28.63125991821289, 30.544937133789062, 32.458614349365234, 34.372291564941406, 36.28596878051758, 38.19964599609375, 40.113319396972656, 42.027000427246094, 43.940673828125, 45.85435104370117, 47.768028259277344]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 5.0, 5.0, 5.0, 6.0, 3.0, 7.0, 10.0, 17.0, 16.0, 19.0, 18.0, 19.0, 23.0, 23.0, 41.0, 37.0, 41.0, 26.0, 53.0, 87.0, 96.0, 68.0, 63.0, 42.0, 38.0, 34.0, 31.0, 31.0, 24.0, 24.0, 10.0, 21.0, 15.0, 5.0, 8.0, 11.0, 8.0, 2.0, 8.0, 3.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.77023696899414, -32.593143463134766, -31.416048049926758, -30.23895263671875, -29.061859130859375, -27.884763717651367, -26.70766830444336, -25.530574798583984, -24.353479385375977, -23.17638397216797, -21.999290466308594, -20.822195053100586, -19.645099639892578, -18.468006134033203, -17.290910720825195, -16.113815307617188, -14.936721801757812, -13.759627342224121, -12.58253288269043, -11.405437469482422, -10.22834300994873, -9.051248550415039, -7.8741536140441895, -6.69705867767334, -5.519964218139648, -4.342869758605957, -3.1657748222351074, -1.988680124282837, -0.8115854263305664, 0.365509033203125, 1.5426039695739746, 2.719698905944824, 3.89678955078125, 5.073884010314941, 6.250978946685791, 7.428073883056641, 8.605168342590332, 9.782262802124023, 10.959358215332031, 12.136452674865723, 13.313547134399414, 14.490641593933105, 15.667736053466797, 16.844831466674805, 18.021926879882812, 19.199020385742188, 20.376115798950195, 21.553211212158203, 22.730304718017578, 23.907400131225586, 25.08449363708496, 26.26158905029297, 27.438682556152344, 28.61577796936035, 29.79287338256836, 30.969966888427734, 32.147064208984375, 33.32415771484375, 34.50125503540039, 35.678348541259766, 36.85544204711914, 38.03253936767578, 39.209632873535156, 40.38672637939453, 41.563819885253906]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 0.0, 3.0, 3.0, 4.0, 4.0, 8.0, 7.0, 3.0, 15.0, 18.0, 25.0, 34.0, 27.0, 44.0, 90.0, 107.0, 164.0, 358.0, 687.0, 1568.0, 4333.0, 13252.0, 53521.0, 274154.0, 1391495.0, 1891861.0, 446760.0, 85252.0, 20256.0, 6007.0, 2278.0, 904.0, 425.0, 228.0, 110.0, 75.0, 52.0, 41.0, 33.0, 23.0, 14.0, 9.0, 9.0, 6.0, 6.0, 5.0, 6.0, 1.0, 3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.40234375, -2.327606201171875, -2.25286865234375, -2.178131103515625, -2.1033935546875, -2.028656005859375, -1.95391845703125, -1.879180908203125, -1.804443359375, -1.729705810546875, -1.65496826171875, -1.580230712890625, -1.5054931640625, -1.430755615234375, -1.35601806640625, -1.281280517578125, -1.20654296875, -1.131805419921875, -1.05706787109375, -0.982330322265625, -0.9075927734375, -0.832855224609375, -0.75811767578125, -0.683380126953125, -0.608642578125, -0.533905029296875, -0.45916748046875, -0.384429931640625, -0.3096923828125, -0.234954833984375, -0.16021728515625, -0.085479736328125, -0.0107421875, 0.063995361328125, 0.13873291015625, 0.213470458984375, 0.2882080078125, 0.362945556640625, 0.43768310546875, 0.512420654296875, 0.587158203125, 0.661895751953125, 0.73663330078125, 0.811370849609375, 0.8861083984375, 0.960845947265625, 1.03558349609375, 1.110321044921875, 1.18505859375, 1.259796142578125, 1.33453369140625, 1.409271240234375, 1.4840087890625, 1.558746337890625, 1.63348388671875, 1.708221435546875, 1.782958984375, 1.857696533203125, 1.93243408203125, 2.007171630859375, 2.0819091796875, 2.156646728515625, 2.23138427734375, 2.306121826171875, 2.380859375]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 2.0, 5.0, 7.0, 8.0, 7.0, 13.0, 8.0, 21.0, 25.0, 19.0, 30.0, 39.0, 37.0, 41.0, 48.0, 38.0, 57.0, 66.0, 56.0, 50.0, 61.0, 46.0, 40.0, 54.0, 38.0, 40.0, 30.0, 21.0, 24.0, 18.0, 15.0, 11.0, 11.0, 9.0, 5.0, 6.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.720703125, -2.637176513671875, -2.55364990234375, -2.470123291015625, -2.3865966796875, -2.303070068359375, -2.21954345703125, -2.136016845703125, -2.052490234375, -1.968963623046875, -1.88543701171875, -1.801910400390625, -1.7183837890625, -1.634857177734375, -1.55133056640625, -1.467803955078125, -1.38427734375, -1.300750732421875, -1.21722412109375, -1.133697509765625, -1.0501708984375, -0.966644287109375, -0.88311767578125, -0.799591064453125, -0.716064453125, -0.632537841796875, -0.54901123046875, -0.465484619140625, -0.3819580078125, -0.298431396484375, -0.21490478515625, -0.131378173828125, -0.0478515625, 0.035675048828125, 0.11920166015625, 0.202728271484375, 0.2862548828125, 0.369781494140625, 0.45330810546875, 0.536834716796875, 0.620361328125, 0.703887939453125, 0.78741455078125, 0.870941162109375, 0.9544677734375, 1.037994384765625, 1.12152099609375, 1.205047607421875, 1.28857421875, 1.372100830078125, 1.45562744140625, 1.539154052734375, 1.6226806640625, 1.706207275390625, 1.78973388671875, 1.873260498046875, 1.956787109375, 2.040313720703125, 2.12384033203125, 2.207366943359375, 2.2908935546875, 2.374420166015625, 2.45794677734375, 2.541473388671875, 2.625]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 4.0, 6.0, 10.0, 9.0, 23.0, 26.0, 50.0, 93.0, 154.0, 276.0, 618.0, 2039.0, 11298.0, 173000.0, 3722859.0, 265012.0, 14837.0, 2505.0, 758.0, 313.0, 150.0, 104.0, 48.0, 34.0, 19.0, 9.0, 9.0, 5.0, 5.0, 6.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.703125, -6.5091552734375, -6.315185546875, -6.1212158203125, -5.92724609375, -5.7332763671875, -5.539306640625, -5.3453369140625, -5.1513671875, -4.9573974609375, -4.763427734375, -4.5694580078125, -4.37548828125, -4.1815185546875, -3.987548828125, -3.7935791015625, -3.599609375, -3.4056396484375, -3.211669921875, -3.0177001953125, -2.82373046875, -2.6297607421875, -2.435791015625, -2.2418212890625, -2.0478515625, -1.8538818359375, -1.659912109375, -1.4659423828125, -1.27197265625, -1.0780029296875, -0.884033203125, -0.6900634765625, -0.49609375, -0.3021240234375, -0.108154296875, 0.0858154296875, 0.27978515625, 0.4737548828125, 0.667724609375, 0.8616943359375, 1.0556640625, 1.2496337890625, 1.443603515625, 1.6375732421875, 1.83154296875, 2.0255126953125, 2.219482421875, 2.4134521484375, 2.607421875, 2.8013916015625, 2.995361328125, 3.1893310546875, 3.38330078125, 3.5772705078125, 3.771240234375, 3.9652099609375, 4.1591796875, 4.3531494140625, 4.547119140625, 4.7410888671875, 4.93505859375, 5.1290283203125, 5.322998046875, 5.5169677734375, 5.7109375]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 7.0, 10.0, 29.0, 36.0, 51.0, 70.0, 100.0, 221.0, 360.0, 611.0, 780.0, 679.0, 433.0, 270.0, 162.0, 93.0, 53.0, 40.0, 18.0, 9.0, 11.0, 9.0, 5.0, 3.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.1796875, -6.0162353515625, -5.852783203125, -5.6893310546875, -5.52587890625, -5.3624267578125, -5.198974609375, -5.0355224609375, -4.8720703125, -4.7086181640625, -4.545166015625, -4.3817138671875, -4.21826171875, -4.0548095703125, -3.891357421875, -3.7279052734375, -3.564453125, -3.4010009765625, -3.237548828125, -3.0740966796875, -2.91064453125, -2.7471923828125, -2.583740234375, -2.4202880859375, -2.2568359375, -2.0933837890625, -1.929931640625, -1.7664794921875, -1.60302734375, -1.4395751953125, -1.276123046875, -1.1126708984375, -0.94921875, -0.7857666015625, -0.622314453125, -0.4588623046875, -0.29541015625, -0.1319580078125, 0.031494140625, 0.1949462890625, 0.3583984375, 0.5218505859375, 0.685302734375, 0.8487548828125, 1.01220703125, 1.1756591796875, 1.339111328125, 1.5025634765625, 1.666015625, 1.8294677734375, 1.992919921875, 2.1563720703125, 2.31982421875, 2.4832763671875, 2.646728515625, 2.8101806640625, 2.9736328125, 3.1370849609375, 3.300537109375, 3.4639892578125, 3.62744140625, 3.7908935546875, 3.954345703125, 4.1177978515625, 4.28125]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 5.0, 4.0, 25.0, 102.0, 347.0, 367.0, 108.0, 34.0, 11.0, 3.0, 2.0, 1.0, 1.0, 0.0, 3.0], "bins": [-141.87994384765625, -139.25193786621094, -136.62393188476562, -133.9959259033203, -131.36793518066406, -128.73992919921875, -126.11192321777344, -123.48391723632812, -120.85591125488281, -118.2279052734375, -115.59990692138672, -112.9719009399414, -110.3438949584961, -107.71589660644531, -105.087890625, -102.45988464355469, -99.8318862915039, -97.2038803100586, -94.57588195800781, -91.9478759765625, -89.31986999511719, -86.69186401367188, -84.0638656616211, -81.43585968017578, -78.807861328125, -76.17985534667969, -73.5518569946289, -70.9238510131836, -68.29584503173828, -65.6678466796875, -63.03984069824219, -60.411834716796875, -57.78383255004883, -55.15583038330078, -52.52782440185547, -49.89982223510742, -47.271820068359375, -44.64381408691406, -42.015811920166016, -39.38780975341797, -36.759803771972656, -34.13180160522461, -31.503795623779297, -28.87579345703125, -26.24778938293457, -23.61978530883789, -20.991783142089844, -18.363779067993164, -15.735774993896484, -13.107770919799805, -10.479767799377441, -7.85176420211792, -5.223760604858398, -2.5957565307617188, 0.03224658966064453, 2.660249710083008, 5.2882537841796875, 7.916257381439209, 10.54426097869873, 13.172264099121094, 15.800268173217773, 18.428272247314453, 21.0562744140625, 23.68427848815918, 26.31228256225586]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 5.0, 2.0, 2.0, 5.0, 4.0, 10.0, 7.0, 8.0, 8.0, 9.0, 13.0, 16.0, 20.0, 26.0, 39.0, 27.0, 25.0, 38.0, 31.0, 41.0, 36.0, 31.0, 39.0, 37.0, 32.0, 40.0, 38.0, 42.0, 47.0, 30.0, 43.0, 36.0, 22.0, 33.0, 21.0, 21.0, 17.0, 17.0, 17.0, 12.0, 16.0, 7.0, 8.0, 9.0, 5.0, 7.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-17.322376251220703, -16.767051696777344, -16.211727142333984, -15.656401634216309, -15.101076126098633, -14.545751571655273, -13.990427017211914, -13.435101509094238, -12.879776000976562, -12.324451446533203, -11.769125938415527, -11.213801383972168, -10.658475875854492, -10.103151321411133, -9.547826766967773, -8.992501258850098, -8.437176704406738, -7.881851673126221, -7.326526641845703, -6.771202087402344, -6.215876579284668, -5.660552024841309, -5.105226993560791, -4.549901962280273, -3.994576930999756, -3.4392518997192383, -2.8839268684387207, -2.3286020755767822, -1.7732770442962646, -1.217952013015747, -0.6626272201538086, -0.10730218887329102, 0.44802284240722656, 1.0033478736877441, 1.5586727857589722, 2.1139976978302, 2.6693227291107178, 3.2246477603912354, 3.779972553253174, 4.335297584533691, 4.890622615814209, 5.445947647094727, 6.001272678375244, 6.556597709655762, 7.111922264099121, 7.667247772216797, 8.222572326660156, 8.777896881103516, 9.333222389221191, 9.88854694366455, 10.443872451782227, 10.999197006225586, 11.554522514343262, 12.109847068786621, 12.665172576904297, 13.220497131347656, 13.775821685791016, 14.331146240234375, 14.88647174835205, 15.44179630279541, 15.997121810913086, 16.552446365356445, 17.107770919799805, 17.663097381591797, 18.218421936035156]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 2.0, 2.0, 3.0, 6.0, 11.0, 11.0, 25.0, 24.0, 39.0, 62.0, 88.0, 129.0, 163.0, 252.0, 417.0, 591.0, 923.0, 1544.0, 2569.0, 4518.0, 7621.0, 13656.0, 24427.0, 44357.0, 78863.0, 140192.0, 210912.0, 206588.0, 136566.0, 76868.0, 42226.0, 23543.0, 13028.0, 7308.0, 4306.0, 2609.0, 1446.0, 913.0, 570.0, 378.0, 234.0, 179.0, 121.0, 79.0, 67.0, 44.0, 39.0, 13.0, 11.0, 8.0, 1.0, 5.0, 4.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.763671875, -1.702789306640625, -1.64190673828125, -1.581024169921875, -1.5201416015625, -1.459259033203125, -1.39837646484375, -1.337493896484375, -1.276611328125, -1.215728759765625, -1.15484619140625, -1.093963623046875, -1.0330810546875, -0.972198486328125, -0.91131591796875, -0.850433349609375, -0.78955078125, -0.728668212890625, -0.66778564453125, -0.606903076171875, -0.5460205078125, -0.485137939453125, -0.42425537109375, -0.363372802734375, -0.302490234375, -0.241607666015625, -0.18072509765625, -0.119842529296875, -0.0589599609375, 0.001922607421875, 0.06280517578125, 0.123687744140625, 0.1845703125, 0.245452880859375, 0.30633544921875, 0.367218017578125, 0.4281005859375, 0.488983154296875, 0.54986572265625, 0.610748291015625, 0.671630859375, 0.732513427734375, 0.79339599609375, 0.854278564453125, 0.9151611328125, 0.976043701171875, 1.03692626953125, 1.097808837890625, 1.15869140625, 1.219573974609375, 1.28045654296875, 1.341339111328125, 1.4022216796875, 1.463104248046875, 1.52398681640625, 1.584869384765625, 1.645751953125, 1.706634521484375, 1.76751708984375, 1.828399658203125, 1.8892822265625, 1.950164794921875, 2.01104736328125, 2.071929931640625, 2.1328125]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 4.0, 10.0, 8.0, 5.0, 16.0, 13.0, 16.0, 12.0, 28.0, 33.0, 39.0, 43.0, 44.0, 40.0, 50.0, 36.0, 42.0, 48.0, 54.0, 54.0, 56.0, 55.0, 38.0, 41.0, 55.0, 40.0, 22.0, 25.0, 14.0, 10.0, 8.0, 5.0, 15.0, 9.0, 6.0, 9.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.75, -2.6708984375, -2.591796875, -2.5126953125, -2.43359375, -2.3544921875, -2.275390625, -2.1962890625, -2.1171875, -2.0380859375, -1.958984375, -1.8798828125, -1.80078125, -1.7216796875, -1.642578125, -1.5634765625, -1.484375, -1.4052734375, -1.326171875, -1.2470703125, -1.16796875, -1.0888671875, -1.009765625, -0.9306640625, -0.8515625, -0.7724609375, -0.693359375, -0.6142578125, -0.53515625, -0.4560546875, -0.376953125, -0.2978515625, -0.21875, -0.1396484375, -0.060546875, 0.0185546875, 0.09765625, 0.1767578125, 0.255859375, 0.3349609375, 0.4140625, 0.4931640625, 0.572265625, 0.6513671875, 0.73046875, 0.8095703125, 0.888671875, 0.9677734375, 1.046875, 1.1259765625, 1.205078125, 1.2841796875, 1.36328125, 1.4423828125, 1.521484375, 1.6005859375, 1.6796875, 1.7587890625, 1.837890625, 1.9169921875, 1.99609375, 2.0751953125, 2.154296875, 2.2333984375, 2.3125]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 5.0, 7.0, 8.0, 9.0, 14.0, 17.0, 20.0, 35.0, 56.0, 74.0, 134.0, 221.0, 310.0, 526.0, 996.0, 2454.0, 9323.0, 57389.0, 644622.0, 286915.0, 34908.0, 6432.0, 1925.0, 830.0, 477.0, 292.0, 185.0, 125.0, 71.0, 43.0, 43.0, 28.0, 17.0, 12.0, 13.0, 6.0, 7.0, 6.0, 3.0, 2.0, 2.0, 2.0], "bins": [-8.328125, -8.1243896484375, -7.920654296875, -7.7169189453125, -7.51318359375, -7.3094482421875, -7.105712890625, -6.9019775390625, -6.6982421875, -6.4945068359375, -6.290771484375, -6.0870361328125, -5.88330078125, -5.6795654296875, -5.475830078125, -5.2720947265625, -5.068359375, -4.8646240234375, -4.660888671875, -4.4571533203125, -4.25341796875, -4.0496826171875, -3.845947265625, -3.6422119140625, -3.4384765625, -3.2347412109375, -3.031005859375, -2.8272705078125, -2.62353515625, -2.4197998046875, -2.216064453125, -2.0123291015625, -1.80859375, -1.6048583984375, -1.401123046875, -1.1973876953125, -0.99365234375, -0.7899169921875, -0.586181640625, -0.3824462890625, -0.1787109375, 0.0250244140625, 0.228759765625, 0.4324951171875, 0.63623046875, 0.8399658203125, 1.043701171875, 1.2474365234375, 1.451171875, 1.6549072265625, 1.858642578125, 2.0623779296875, 2.26611328125, 2.4698486328125, 2.673583984375, 2.8773193359375, 3.0810546875, 3.2847900390625, 3.488525390625, 3.6922607421875, 3.89599609375, 4.0997314453125, 4.303466796875, 4.5072021484375, 4.7109375]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 12.0, 10.0, 18.0, 18.0, 26.0, 24.0, 36.0, 30.0, 31.0, 43.0, 67.0, 76.0, 65.0, 77.0, 68.0, 49.0, 69.0, 53.0, 35.0, 35.0, 30.0, 23.0, 24.0, 18.0, 19.0, 12.0, 7.0, 8.0, 2.0, 5.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.734375, -10.41552734375, -10.0966796875, -9.77783203125, -9.458984375, -9.14013671875, -8.8212890625, -8.50244140625, -8.18359375, -7.86474609375, -7.5458984375, -7.22705078125, -6.908203125, -6.58935546875, -6.2705078125, -5.95166015625, -5.6328125, -5.31396484375, -4.9951171875, -4.67626953125, -4.357421875, -4.03857421875, -3.7197265625, -3.40087890625, -3.08203125, -2.76318359375, -2.4443359375, -2.12548828125, -1.806640625, -1.48779296875, -1.1689453125, -0.85009765625, -0.53125, -0.21240234375, 0.1064453125, 0.42529296875, 0.744140625, 1.06298828125, 1.3818359375, 1.70068359375, 2.01953125, 2.33837890625, 2.6572265625, 2.97607421875, 3.294921875, 3.61376953125, 3.9326171875, 4.25146484375, 4.5703125, 4.88916015625, 5.2080078125, 5.52685546875, 5.845703125, 6.16455078125, 6.4833984375, 6.80224609375, 7.12109375, 7.43994140625, 7.7587890625, 8.07763671875, 8.396484375, 8.71533203125, 9.0341796875, 9.35302734375, 9.671875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 5.0, 1.0, 5.0, 4.0, 8.0, 8.0, 13.0, 13.0, 28.0, 33.0, 50.0, 67.0, 88.0, 112.0, 183.0, 299.0, 537.0, 907.0, 1783.0, 3736.0, 8627.0, 22687.0, 66053.0, 229698.0, 555027.0, 102880.0, 32852.0, 12311.0, 5155.0, 2372.0, 1168.0, 693.0, 430.0, 208.0, 139.0, 105.0, 72.0, 46.0, 48.0, 26.0, 25.0, 13.0, 13.0, 10.0, 3.0, 7.0, 6.0, 1.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0869140625, -1.05206298828125, -1.0172119140625, -0.98236083984375, -0.947509765625, -0.91265869140625, -0.8778076171875, -0.84295654296875, -0.80810546875, -0.77325439453125, -0.7384033203125, -0.70355224609375, -0.668701171875, -0.63385009765625, -0.5989990234375, -0.56414794921875, -0.529296875, -0.49444580078125, -0.4595947265625, -0.42474365234375, -0.389892578125, -0.35504150390625, -0.3201904296875, -0.28533935546875, -0.25048828125, -0.21563720703125, -0.1807861328125, -0.14593505859375, -0.111083984375, -0.07623291015625, -0.0413818359375, -0.00653076171875, 0.0283203125, 0.06317138671875, 0.0980224609375, 0.13287353515625, 0.167724609375, 0.20257568359375, 0.2374267578125, 0.27227783203125, 0.30712890625, 0.34197998046875, 0.3768310546875, 0.41168212890625, 0.446533203125, 0.48138427734375, 0.5162353515625, 0.55108642578125, 0.5859375, 0.62078857421875, 0.6556396484375, 0.69049072265625, 0.725341796875, 0.76019287109375, 0.7950439453125, 0.82989501953125, 0.86474609375, 0.89959716796875, 0.9344482421875, 0.96929931640625, 1.004150390625, 1.03900146484375, 1.0738525390625, 1.10870361328125, 1.1435546875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 5.0, 5.0, 5.0, 4.0, 9.0, 7.0, 15.0, 14.0, 18.0, 30.0, 17.0, 49.0, 48.0, 78.0, 92.0, 99.0, 98.0, 83.0, 70.0, 57.0, 45.0, 38.0, 28.0, 16.0, 15.0, 16.0, 9.0, 9.0, 2.0, 7.0, 3.0, 5.0, 4.0, 2.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005331039428710938, -0.000517476350069046, -0.0005018487572669983, -0.00048622116446495056, -0.00047059357166290283, -0.0004549659788608551, -0.0004393383860588074, -0.00042371079325675964, -0.0004080832004547119, -0.0003924556076526642, -0.00037682801485061646, -0.0003612004220485687, -0.000345572829246521, -0.00032994523644447327, -0.00031431764364242554, -0.0002986900508403778, -0.0002830624580383301, -0.00026743486523628235, -0.0002518072724342346, -0.0002361796796321869, -0.00022055208683013916, -0.00020492449402809143, -0.0001892969012260437, -0.00017366930842399597, -0.00015804171562194824, -0.0001424141228199005, -0.00012678653001785278, -0.00011115893721580505, -9.553134441375732e-05, -7.99037516117096e-05, -6.427615880966187e-05, -4.8648566007614136e-05, -3.3020973205566406e-05, -1.7393380403518677e-05, -1.7657876014709473e-06, 1.3861805200576782e-05, 2.9489398002624512e-05, 4.511699080467224e-05, 6.074458360671997e-05, 7.63721764087677e-05, 9.199976921081543e-05, 0.00010762736201286316, 0.0001232549548149109, 0.00013888254761695862, 0.00015451014041900635, 0.00017013773322105408, 0.0001857653260231018, 0.00020139291882514954, 0.00021702051162719727, 0.000232648104429245, 0.0002482756972312927, 0.00026390329003334045, 0.0002795308828353882, 0.0002951584756374359, 0.00031078606843948364, 0.00032641366124153137, 0.0003420412540435791, 0.00035766884684562683, 0.00037329643964767456, 0.0003889240324497223, 0.00040455162525177, 0.00042017921805381775, 0.0004358068108558655, 0.0004514344036579132, 0.00046706199645996094]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 7.0, 6.0, 11.0, 15.0, 18.0, 15.0, 48.0, 56.0, 75.0, 118.0, 164.0, 260.0, 505.0, 980.0, 1986.0, 4697.0, 13034.0, 40416.0, 148313.0, 611522.0, 160091.0, 43048.0, 13650.0, 5046.0, 2086.0, 1040.0, 501.0, 287.0, 210.0, 122.0, 77.0, 38.0, 36.0, 26.0, 11.0, 12.0, 10.0, 13.0, 5.0, 3.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.99267578125, -0.9552536010742188, -0.9178314208984375, -0.8804092407226562, -0.842987060546875, -0.8055648803710938, -0.7681427001953125, -0.7307205200195312, -0.69329833984375, -0.6558761596679688, -0.6184539794921875, -0.5810317993164062, -0.543609619140625, -0.5061874389648438, -0.4687652587890625, -0.43134307861328125, -0.3939208984375, -0.35649871826171875, -0.3190765380859375, -0.28165435791015625, -0.244232177734375, -0.20680999755859375, -0.1693878173828125, -0.13196563720703125, -0.09454345703125, -0.05712127685546875, -0.0196990966796875, 0.01772308349609375, 0.055145263671875, 0.09256744384765625, 0.1299896240234375, 0.16741180419921875, 0.204833984375, 0.24225616455078125, 0.2796783447265625, 0.31710052490234375, 0.354522705078125, 0.39194488525390625, 0.4293670654296875, 0.46678924560546875, 0.50421142578125, 0.5416336059570312, 0.5790557861328125, 0.6164779663085938, 0.653900146484375, 0.6913223266601562, 0.7287445068359375, 0.7661666870117188, 0.8035888671875, 0.8410110473632812, 0.8784332275390625, 0.9158554077148438, 0.953277587890625, 0.9906997680664062, 1.0281219482421875, 1.0655441284179688, 1.10296630859375, 1.1403884887695312, 1.1778106689453125, 1.2152328491210938, 1.252655029296875, 1.2900772094726562, 1.3274993896484375, 1.3649215698242188, 1.40234375]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 15.0, 21.0, 23.0, 36.0, 38.0, 59.0, 61.0, 73.0, 77.0, 81.0, 73.0, 72.0, 54.0, 61.0, 41.0, 38.0, 40.0, 24.0, 16.0, 20.0, 10.0, 7.0, 8.0, 4.0, 7.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.87890625, -1.832489013671875, -1.78607177734375, -1.739654541015625, -1.6932373046875, -1.646820068359375, -1.60040283203125, -1.553985595703125, -1.507568359375, -1.461151123046875, -1.41473388671875, -1.368316650390625, -1.3218994140625, -1.275482177734375, -1.22906494140625, -1.182647705078125, -1.13623046875, -1.089813232421875, -1.04339599609375, -0.996978759765625, -0.9505615234375, -0.904144287109375, -0.85772705078125, -0.811309814453125, -0.764892578125, -0.718475341796875, -0.67205810546875, -0.625640869140625, -0.5792236328125, -0.532806396484375, -0.48638916015625, -0.439971923828125, -0.3935546875, -0.347137451171875, -0.30072021484375, -0.254302978515625, -0.2078857421875, -0.161468505859375, -0.11505126953125, -0.068634033203125, -0.022216796875, 0.024200439453125, 0.07061767578125, 0.117034912109375, 0.1634521484375, 0.209869384765625, 0.25628662109375, 0.302703857421875, 0.34912109375, 0.395538330078125, 0.44195556640625, 0.488372802734375, 0.5347900390625, 0.581207275390625, 0.62762451171875, 0.674041748046875, 0.720458984375, 0.766876220703125, 0.81329345703125, 0.859710693359375, 0.9061279296875, 0.952545166015625, 0.99896240234375, 1.045379638671875, 1.091796875]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 4.0, 19.0, 35.0, 93.0, 191.0, 361.0, 157.0, 77.0, 36.0, 21.0, 3.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-65.68341064453125, -63.874114990234375, -62.0648193359375, -60.255523681640625, -58.44622802734375, -56.636932373046875, -54.82763671875, -53.018341064453125, -51.20904541015625, -49.399749755859375, -47.5904541015625, -45.781158447265625, -43.97186279296875, -42.162567138671875, -40.353271484375, -38.543975830078125, -36.734676361083984, -34.92538070678711, -33.116085052490234, -31.30678939819336, -29.497493743896484, -27.68819808959961, -25.8789005279541, -24.069604873657227, -22.26030921936035, -20.451013565063477, -18.6417179107666, -16.832420349121094, -15.023125648498535, -13.21382999420166, -11.404533386230469, -9.595237731933594, -7.785942077636719, -5.976646423339844, -4.1673502922058105, -2.3580541610717773, -0.5487585067749023, 1.2605371475219727, 3.069833755493164, 4.879129409790039, 6.688425064086914, 8.497720718383789, 10.307016372680664, 12.116312980651855, 13.92560863494873, 15.734904289245605, 17.544200897216797, 19.353496551513672, 21.162792205810547, 22.972087860107422, 24.781383514404297, 26.590679168701172, 28.399974822998047, 30.209270477294922, 32.01856994628906, 33.82786560058594, 35.63716125488281, 37.44645690917969, 39.25575256347656, 41.06504821777344, 42.87434387207031, 44.68363952636719, 46.49293518066406, 48.30223083496094, 50.11152648925781]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 0.0, 3.0, 2.0, 5.0, 6.0, 13.0, 8.0, 9.0, 17.0, 15.0, 21.0, 22.0, 28.0, 22.0, 26.0, 29.0, 33.0, 42.0, 54.0, 104.0, 121.0, 66.0, 55.0, 32.0, 39.0, 33.0, 30.0, 22.0, 23.0, 22.0, 10.0, 18.0, 12.0, 12.0, 15.0, 6.0, 11.0, 2.0, 5.0, 9.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-33.92973327636719, -32.90850830078125, -31.887279510498047, -30.866052627563477, -29.844825744628906, -28.823598861694336, -27.802371978759766, -26.781147003173828, -25.759918212890625, -24.738691329956055, -23.717464447021484, -22.696237564086914, -21.675010681152344, -20.653783798217773, -19.632556915283203, -18.611331939697266, -17.590105056762695, -16.568878173828125, -15.547651290893555, -14.526424407958984, -13.505197525024414, -12.483970642089844, -11.46274471282959, -10.44151782989502, -9.42029094696045, -8.399064064025879, -7.377837181091309, -6.3566107749938965, -5.335383892059326, -4.314157009124756, -3.2929306030273438, -2.2717037200927734, -1.2504749298095703, -0.22924816608428955, 0.7919785976409912, 1.8132052421569824, 2.8344321250915527, 3.855659008026123, 4.876885414123535, 5.8981122970581055, 6.919339179992676, 7.940566062927246, 8.961792945861816, 9.98301887512207, 11.00424575805664, 12.025472640991211, 13.046699523925781, 14.067926406860352, 15.089153289794922, 16.110380172729492, 17.131607055664062, 18.152833938598633, 19.174060821533203, 20.195287704467773, 21.216514587402344, 22.23773956298828, 23.258968353271484, 24.280195236206055, 25.301422119140625, 26.322649002075195, 27.343875885009766, 28.365102767944336, 29.386329650878906, 30.407554626464844, 31.428781509399414]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 8.0, 4.0, 9.0, 11.0, 11.0, 25.0, 24.0, 39.0, 43.0, 76.0, 128.0, 203.0, 462.0, 977.0, 2488.0, 6905.0, 23785.0, 118012.0, 708735.0, 2169764.0, 942975.0, 171949.0, 33236.0, 8912.0, 3075.0, 1184.0, 559.0, 277.0, 143.0, 75.0, 53.0, 37.0, 25.0, 14.0, 15.0, 17.0, 9.0, 6.0, 4.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-2.76953125, -2.69384765625, -2.6181640625, -2.54248046875, -2.466796875, -2.39111328125, -2.3154296875, -2.23974609375, -2.1640625, -2.08837890625, -2.0126953125, -1.93701171875, -1.861328125, -1.78564453125, -1.7099609375, -1.63427734375, -1.55859375, -1.48291015625, -1.4072265625, -1.33154296875, -1.255859375, -1.18017578125, -1.1044921875, -1.02880859375, -0.953125, -0.87744140625, -0.8017578125, -0.72607421875, -0.650390625, -0.57470703125, -0.4990234375, -0.42333984375, -0.34765625, -0.27197265625, -0.1962890625, -0.12060546875, -0.044921875, 0.03076171875, 0.1064453125, 0.18212890625, 0.2578125, 0.33349609375, 0.4091796875, 0.48486328125, 0.560546875, 0.63623046875, 0.7119140625, 0.78759765625, 0.86328125, 0.93896484375, 1.0146484375, 1.09033203125, 1.166015625, 1.24169921875, 1.3173828125, 1.39306640625, 1.46875, 1.54443359375, 1.6201171875, 1.69580078125, 1.771484375, 1.84716796875, 1.9228515625, 1.99853515625, 2.07421875]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 7.0, 7.0, 6.0, 6.0, 9.0, 14.0, 19.0, 31.0, 32.0, 38.0, 36.0, 55.0, 47.0, 50.0, 49.0, 59.0, 66.0, 66.0, 71.0, 68.0, 52.0, 48.0, 37.0, 36.0, 27.0, 22.0, 13.0, 8.0, 10.0, 13.0, 5.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.01953125, -2.9312744140625, -2.843017578125, -2.7547607421875, -2.66650390625, -2.5782470703125, -2.489990234375, -2.4017333984375, -2.3134765625, -2.2252197265625, -2.136962890625, -2.0487060546875, -1.96044921875, -1.8721923828125, -1.783935546875, -1.6956787109375, -1.607421875, -1.5191650390625, -1.430908203125, -1.3426513671875, -1.25439453125, -1.1661376953125, -1.077880859375, -0.9896240234375, -0.9013671875, -0.8131103515625, -0.724853515625, -0.6365966796875, -0.54833984375, -0.4600830078125, -0.371826171875, -0.2835693359375, -0.1953125, -0.1070556640625, -0.018798828125, 0.0694580078125, 0.15771484375, 0.2459716796875, 0.334228515625, 0.4224853515625, 0.5107421875, 0.5989990234375, 0.687255859375, 0.7755126953125, 0.86376953125, 0.9520263671875, 1.040283203125, 1.1285400390625, 1.216796875, 1.3050537109375, 1.393310546875, 1.4815673828125, 1.56982421875, 1.6580810546875, 1.746337890625, 1.8345947265625, 1.9228515625, 2.0111083984375, 2.099365234375, 2.1876220703125, 2.27587890625, 2.3641357421875, 2.452392578125, 2.5406494140625, 2.62890625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 3.0, 2.0, 3.0, 0.0, 1.0, 3.0, 10.0, 4.0, 12.0, 20.0, 28.0, 44.0, 47.0, 97.0, 162.0, 243.0, 485.0, 1168.0, 3630.0, 24422.0, 656107.0, 3401470.0, 94073.0, 8493.0, 1974.0, 792.0, 406.0, 210.0, 125.0, 82.0, 57.0, 32.0, 27.0, 19.0, 7.0, 9.0, 6.0, 3.0, 5.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.32421875, -5.14215087890625, -4.9600830078125, -4.77801513671875, -4.595947265625, -4.41387939453125, -4.2318115234375, -4.04974365234375, -3.86767578125, -3.68560791015625, -3.5035400390625, -3.32147216796875, -3.139404296875, -2.95733642578125, -2.7752685546875, -2.59320068359375, -2.4111328125, -2.22906494140625, -2.0469970703125, -1.86492919921875, -1.682861328125, -1.50079345703125, -1.3187255859375, -1.13665771484375, -0.95458984375, -0.77252197265625, -0.5904541015625, -0.40838623046875, -0.226318359375, -0.04425048828125, 0.1378173828125, 0.31988525390625, 0.501953125, 0.68402099609375, 0.8660888671875, 1.04815673828125, 1.230224609375, 1.41229248046875, 1.5943603515625, 1.77642822265625, 1.95849609375, 2.14056396484375, 2.3226318359375, 2.50469970703125, 2.686767578125, 2.86883544921875, 3.0509033203125, 3.23297119140625, 3.4150390625, 3.59710693359375, 3.7791748046875, 3.96124267578125, 4.143310546875, 4.32537841796875, 4.5074462890625, 4.68951416015625, 4.87158203125, 5.05364990234375, 5.2357177734375, 5.41778564453125, 5.599853515625, 5.78192138671875, 5.9639892578125, 6.14605712890625, 6.328125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 5.0, 3.0, 7.0, 11.0, 15.0, 26.0, 53.0, 88.0, 170.0, 342.0, 575.0, 861.0, 778.0, 511.0, 247.0, 154.0, 104.0, 51.0, 33.0, 17.0, 9.0, 7.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.728515625, -3.530853271484375, -3.33319091796875, -3.135528564453125, -2.9378662109375, -2.740203857421875, -2.54254150390625, -2.344879150390625, -2.147216796875, -1.949554443359375, -1.75189208984375, -1.554229736328125, -1.3565673828125, -1.158905029296875, -0.96124267578125, -0.763580322265625, -0.56591796875, -0.368255615234375, -0.17059326171875, 0.027069091796875, 0.2247314453125, 0.422393798828125, 0.62005615234375, 0.817718505859375, 1.015380859375, 1.213043212890625, 1.41070556640625, 1.608367919921875, 1.8060302734375, 2.003692626953125, 2.20135498046875, 2.399017333984375, 2.5966796875, 2.794342041015625, 2.99200439453125, 3.189666748046875, 3.3873291015625, 3.584991455078125, 3.78265380859375, 3.980316162109375, 4.177978515625, 4.375640869140625, 4.57330322265625, 4.770965576171875, 4.9686279296875, 5.166290283203125, 5.36395263671875, 5.561614990234375, 5.75927734375, 5.956939697265625, 6.15460205078125, 6.352264404296875, 6.5499267578125, 6.747589111328125, 6.94525146484375, 7.142913818359375, 7.340576171875, 7.538238525390625, 7.73590087890625, 7.933563232421875, 8.1312255859375, 8.328887939453125, 8.52655029296875, 8.724212646484375, 8.921875]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 7.0, 11.0, 9.0, 23.0, 75.0, 153.0, 253.0, 237.0, 119.0, 62.0, 28.0, 12.0, 4.0, 5.0, 3.0, 1.0, 4.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-46.820858001708984, -45.06989288330078, -43.318931579589844, -41.56796646118164, -39.8170051574707, -38.0660400390625, -36.31507873535156, -34.56411361694336, -32.813148498535156, -31.062185287475586, -29.311222076416016, -27.560256958007812, -25.809295654296875, -24.058330535888672, -22.3073673248291, -20.55640411376953, -18.805442810058594, -17.054479598999023, -15.303516387939453, -13.552552223205566, -11.801589012145996, -10.050625801086426, -8.299661636352539, -6.548698425292969, -4.797735214233398, -3.046771764755249, -1.2958083152770996, 0.4551553726196289, 2.206118583679199, 3.9570817947387695, 5.708045959472656, 7.459009170532227, 9.209968566894531, 10.960931777954102, 12.711894989013672, 14.462859153747559, 16.213821411132812, 17.964786529541016, 19.715749740600586, 21.466712951660156, 23.217676162719727, 24.968639373779297, 26.719602584838867, 28.470565795898438, 30.22153091430664, 31.972492218017578, 33.72345733642578, 35.47441864013672, 37.22538375854492, 38.976348876953125, 40.72731018066406, 42.478275299072266, 44.2292366027832, 45.980201721191406, 47.731163024902344, 49.48212814331055, 51.23309326171875, 52.98405838012695, 54.73501968383789, 56.485984802246094, 58.23694610595703, 59.987911224365234, 61.73887634277344, 63.489837646484375, 65.24079895019531]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 8.0, 6.0, 9.0, 7.0, 12.0, 8.0, 22.0, 15.0, 39.0, 32.0, 35.0, 46.0, 47.0, 52.0, 56.0, 46.0, 50.0, 50.0, 56.0, 46.0, 61.0, 40.0, 43.0, 40.0, 38.0, 24.0, 28.0, 17.0, 17.0, 12.0, 10.0, 11.0, 8.0, 6.0, 4.0, 5.0, 1.0, 2.0, 3.0], "bins": [-32.34654235839844, -31.590980529785156, -30.835418701171875, -30.079858779907227, -29.324296951293945, -28.568735122680664, -27.813173294067383, -27.057613372802734, -26.302051544189453, -25.546489715576172, -24.79092788696289, -24.035367965698242, -23.27980613708496, -22.52424430847168, -21.7686824798584, -21.01312255859375, -20.25756072998047, -19.501998901367188, -18.746437072753906, -17.990877151489258, -17.235315322875977, -16.479753494262695, -15.724191665649414, -14.96863079071045, -14.213068008422852, -13.45750617980957, -12.701945304870605, -11.946383476257324, -11.19082260131836, -10.435260772705078, -9.679698944091797, -8.924138069152832, -8.168577194213867, -7.413015842437744, -6.657454490661621, -5.90189266204834, -5.146331787109375, -4.390769958496094, -3.6352086067199707, -2.8796472549438477, -2.1240859031677246, -1.3685245513916016, -0.612963080406189, 0.14259839057922363, 0.8981597423553467, 1.6537210941314697, 2.409282684326172, 3.164844036102295, 3.920405387878418, 4.675966739654541, 5.431528091430664, 6.187089920043945, 6.94265079498291, 7.698212623596191, 8.453773498535156, 9.209335327148438, 9.964897155761719, 10.720458984375, 11.476019859313965, 12.231581687927246, 12.987142562866211, 13.742704391479492, 14.498266220092773, 15.253827095031738, 16.009387969970703]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 4.0, 5.0, 10.0, 12.0, 12.0, 19.0, 29.0, 50.0, 50.0, 112.0, 193.0, 262.0, 465.0, 859.0, 1387.0, 2696.0, 5062.0, 9946.0, 20972.0, 45735.0, 109176.0, 252419.0, 315840.0, 159730.0, 66093.0, 28963.0, 13559.0, 6961.0, 3536.0, 1876.0, 1002.0, 596.0, 345.0, 210.0, 125.0, 93.0, 59.0, 33.0, 22.0, 12.0, 13.0, 6.0, 9.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.953125, -2.8585205078125, -2.763916015625, -2.6693115234375, -2.57470703125, -2.4801025390625, -2.385498046875, -2.2908935546875, -2.1962890625, -2.1016845703125, -2.007080078125, -1.9124755859375, -1.81787109375, -1.7232666015625, -1.628662109375, -1.5340576171875, -1.439453125, -1.3448486328125, -1.250244140625, -1.1556396484375, -1.06103515625, -0.9664306640625, -0.871826171875, -0.7772216796875, -0.6826171875, -0.5880126953125, -0.493408203125, -0.3988037109375, -0.30419921875, -0.2095947265625, -0.114990234375, -0.0203857421875, 0.07421875, 0.1688232421875, 0.263427734375, 0.3580322265625, 0.45263671875, 0.5472412109375, 0.641845703125, 0.7364501953125, 0.8310546875, 0.9256591796875, 1.020263671875, 1.1148681640625, 1.20947265625, 1.3040771484375, 1.398681640625, 1.4932861328125, 1.587890625, 1.6824951171875, 1.777099609375, 1.8717041015625, 1.96630859375, 2.0609130859375, 2.155517578125, 2.2501220703125, 2.3447265625, 2.4393310546875, 2.533935546875, 2.6285400390625, 2.72314453125, 2.8177490234375, 2.912353515625, 3.0069580078125, 3.1015625]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 3.0, 8.0, 5.0, 8.0, 7.0, 15.0, 20.0, 17.0, 21.0, 29.0, 41.0, 47.0, 48.0, 49.0, 57.0, 63.0, 64.0, 56.0, 66.0, 71.0, 42.0, 58.0, 43.0, 41.0, 24.0, 27.0, 19.0, 25.0, 13.0, 4.0, 12.0, 7.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.005859375, -2.917144775390625, -2.82843017578125, -2.739715576171875, -2.6510009765625, -2.562286376953125, -2.47357177734375, -2.384857177734375, -2.296142578125, -2.207427978515625, -2.11871337890625, -2.029998779296875, -1.9412841796875, -1.852569580078125, -1.76385498046875, -1.675140380859375, -1.58642578125, -1.497711181640625, -1.40899658203125, -1.320281982421875, -1.2315673828125, -1.142852783203125, -1.05413818359375, -0.965423583984375, -0.876708984375, -0.787994384765625, -0.69927978515625, -0.610565185546875, -0.5218505859375, -0.433135986328125, -0.34442138671875, -0.255706787109375, -0.1669921875, -0.078277587890625, 0.01043701171875, 0.099151611328125, 0.1878662109375, 0.276580810546875, 0.36529541015625, 0.454010009765625, 0.542724609375, 0.631439208984375, 0.72015380859375, 0.808868408203125, 0.8975830078125, 0.986297607421875, 1.07501220703125, 1.163726806640625, 1.25244140625, 1.341156005859375, 1.42987060546875, 1.518585205078125, 1.6072998046875, 1.696014404296875, 1.78472900390625, 1.873443603515625, 1.962158203125, 2.050872802734375, 2.13958740234375, 2.228302001953125, 2.3170166015625, 2.405731201171875, 2.49444580078125, 2.583160400390625, 2.671875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 9.0, 16.0, 18.0, 24.0, 43.0, 50.0, 54.0, 64.0, 96.0, 114.0, 200.0, 285.0, 369.0, 560.0, 866.0, 1526.0, 3475.0, 11150.0, 57387.0, 651509.0, 267821.0, 37971.0, 8329.0, 2758.0, 1346.0, 752.0, 481.0, 345.0, 239.0, 182.0, 117.0, 94.0, 71.0, 60.0, 35.0, 30.0, 29.0, 18.0, 13.0, 8.0, 5.0, 6.0, 4.0, 6.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-6.11328125, -5.93804931640625, -5.7628173828125, -5.58758544921875, -5.412353515625, -5.23712158203125, -5.0618896484375, -4.88665771484375, -4.71142578125, -4.53619384765625, -4.3609619140625, -4.18572998046875, -4.010498046875, -3.83526611328125, -3.6600341796875, -3.48480224609375, -3.3095703125, -3.13433837890625, -2.9591064453125, -2.78387451171875, -2.608642578125, -2.43341064453125, -2.2581787109375, -2.08294677734375, -1.90771484375, -1.73248291015625, -1.5572509765625, -1.38201904296875, -1.206787109375, -1.03155517578125, -0.8563232421875, -0.68109130859375, -0.505859375, -0.33062744140625, -0.1553955078125, 0.01983642578125, 0.195068359375, 0.37030029296875, 0.5455322265625, 0.72076416015625, 0.89599609375, 1.07122802734375, 1.2464599609375, 1.42169189453125, 1.596923828125, 1.77215576171875, 1.9473876953125, 2.12261962890625, 2.2978515625, 2.47308349609375, 2.6483154296875, 2.82354736328125, 2.998779296875, 3.17401123046875, 3.3492431640625, 3.52447509765625, 3.69970703125, 3.87493896484375, 4.0501708984375, 4.22540283203125, 4.400634765625, 4.57586669921875, 4.7510986328125, 4.92633056640625, 5.1015625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 5.0, 7.0, 3.0, 2.0, 4.0, 14.0, 10.0, 15.0, 22.0, 27.0, 23.0, 32.0, 29.0, 45.0, 46.0, 61.0, 61.0, 67.0, 60.0, 69.0, 61.0, 60.0, 59.0, 35.0, 34.0, 31.0, 22.0, 17.0, 19.0, 13.0, 10.0, 14.0, 5.0, 4.0, 9.0, 2.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.3671875, -9.078857421875, -8.79052734375, -8.502197265625, -8.2138671875, -7.925537109375, -7.63720703125, -7.348876953125, -7.060546875, -6.772216796875, -6.48388671875, -6.195556640625, -5.9072265625, -5.618896484375, -5.33056640625, -5.042236328125, -4.75390625, -4.465576171875, -4.17724609375, -3.888916015625, -3.6005859375, -3.312255859375, -3.02392578125, -2.735595703125, -2.447265625, -2.158935546875, -1.87060546875, -1.582275390625, -1.2939453125, -1.005615234375, -0.71728515625, -0.428955078125, -0.140625, 0.147705078125, 0.43603515625, 0.724365234375, 1.0126953125, 1.301025390625, 1.58935546875, 1.877685546875, 2.166015625, 2.454345703125, 2.74267578125, 3.031005859375, 3.3193359375, 3.607666015625, 3.89599609375, 4.184326171875, 4.47265625, 4.760986328125, 5.04931640625, 5.337646484375, 5.6259765625, 5.914306640625, 6.20263671875, 6.490966796875, 6.779296875, 7.067626953125, 7.35595703125, 7.644287109375, 7.9326171875, 8.220947265625, 8.50927734375, 8.797607421875, 9.0859375]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 3.0, 1.0, 9.0, 7.0, 13.0, 12.0, 15.0, 26.0, 35.0, 62.0, 110.0, 216.0, 493.0, 1289.0, 3971.0, 16528.0, 116361.0, 801208.0, 89085.0, 13638.0, 3428.0, 1110.0, 439.0, 187.0, 114.0, 72.0, 25.0, 25.0, 16.0, 11.0, 13.0, 7.0, 5.0, 5.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.833984375, -1.7774505615234375, -1.720916748046875, -1.6643829345703125, -1.60784912109375, -1.5513153076171875, -1.494781494140625, -1.4382476806640625, -1.3817138671875, -1.3251800537109375, -1.268646240234375, -1.2121124267578125, -1.15557861328125, -1.0990447998046875, -1.042510986328125, -0.9859771728515625, -0.929443359375, -0.8729095458984375, -0.816375732421875, -0.7598419189453125, -0.70330810546875, -0.6467742919921875, -0.590240478515625, -0.5337066650390625, -0.4771728515625, -0.4206390380859375, -0.364105224609375, -0.3075714111328125, -0.25103759765625, -0.1945037841796875, -0.137969970703125, -0.0814361572265625, -0.02490234375, 0.0316314697265625, 0.088165283203125, 0.1446990966796875, 0.20123291015625, 0.2577667236328125, 0.314300537109375, 0.3708343505859375, 0.4273681640625, 0.4839019775390625, 0.540435791015625, 0.5969696044921875, 0.65350341796875, 0.7100372314453125, 0.766571044921875, 0.8231048583984375, 0.879638671875, 0.9361724853515625, 0.992706298828125, 1.0492401123046875, 1.10577392578125, 1.1623077392578125, 1.218841552734375, 1.2753753662109375, 1.3319091796875, 1.3884429931640625, 1.444976806640625, 1.5015106201171875, 1.55804443359375, 1.6145782470703125, 1.671112060546875, 1.7276458740234375, 1.7841796875]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 6.0, 8.0, 10.0, 6.0, 6.0, 10.0, 21.0, 18.0, 30.0, 42.0, 65.0, 65.0, 92.0, 99.0, 109.0, 92.0, 60.0, 70.0, 59.0, 40.0, 27.0, 19.0, 11.0, 8.0, 9.0, 6.0, 6.0, 1.0, 1.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004982948303222656, -0.000482410192489624, -0.0004665255546569824, -0.0004506409168243408, -0.0004347562789916992, -0.0004188716411590576, -0.000402987003326416, -0.0003871023654937744, -0.0003712177276611328, -0.0003553330898284912, -0.0003394484519958496, -0.000323563814163208, -0.0003076791763305664, -0.0002917945384979248, -0.0002759099006652832, -0.0002600252628326416, -0.000244140625, -0.0002282559871673584, -0.0002123713493347168, -0.0001964867115020752, -0.0001806020736694336, -0.000164717435836792, -0.0001488327980041504, -0.0001329481601715088, -0.00011706352233886719, -0.00010117888450622559, -8.529424667358398e-05, -6.940960884094238e-05, -5.352497100830078e-05, -3.764033317565918e-05, -2.1755695343017578e-05, -5.8710575103759766e-06, 1.0013580322265625e-05, 2.5898218154907227e-05, 4.178285598754883e-05, 5.766749382019043e-05, 7.355213165283203e-05, 8.943676948547363e-05, 0.00010532140731811523, 0.00012120604515075684, 0.00013709068298339844, 0.00015297532081604004, 0.00016885995864868164, 0.00018474459648132324, 0.00020062923431396484, 0.00021651387214660645, 0.00023239850997924805, 0.00024828314781188965, 0.00026416778564453125, 0.00028005242347717285, 0.00029593706130981445, 0.00031182169914245605, 0.00032770633697509766, 0.00034359097480773926, 0.00035947561264038086, 0.00037536025047302246, 0.00039124488830566406, 0.00040712952613830566, 0.00042301416397094727, 0.00043889880180358887, 0.00045478343963623047, 0.00047066807746887207, 0.00048655271530151367, 0.0005024373531341553, 0.0005183219909667969]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 7.0, 4.0, 3.0, 10.0, 17.0, 18.0, 21.0, 38.0, 60.0, 82.0, 174.0, 301.0, 566.0, 1249.0, 3301.0, 12821.0, 81895.0, 769105.0, 150831.0, 20115.0, 4817.0, 1543.0, 718.0, 346.0, 192.0, 114.0, 54.0, 52.0, 28.0, 18.0, 12.0, 12.0, 8.0, 10.0, 5.0, 5.0, 3.0, 0.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.703125, -1.652496337890625, -1.60186767578125, -1.551239013671875, -1.5006103515625, -1.449981689453125, -1.39935302734375, -1.348724365234375, -1.298095703125, -1.247467041015625, -1.19683837890625, -1.146209716796875, -1.0955810546875, -1.044952392578125, -0.99432373046875, -0.943695068359375, -0.89306640625, -0.842437744140625, -0.79180908203125, -0.741180419921875, -0.6905517578125, -0.639923095703125, -0.58929443359375, -0.538665771484375, -0.488037109375, -0.437408447265625, -0.38677978515625, -0.336151123046875, -0.2855224609375, -0.234893798828125, -0.18426513671875, -0.133636474609375, -0.0830078125, -0.032379150390625, 0.01824951171875, 0.068878173828125, 0.1195068359375, 0.170135498046875, 0.22076416015625, 0.271392822265625, 0.322021484375, 0.372650146484375, 0.42327880859375, 0.473907470703125, 0.5245361328125, 0.575164794921875, 0.62579345703125, 0.676422119140625, 0.72705078125, 0.777679443359375, 0.82830810546875, 0.878936767578125, 0.9295654296875, 0.980194091796875, 1.03082275390625, 1.081451416015625, 1.132080078125, 1.182708740234375, 1.23333740234375, 1.283966064453125, 1.3345947265625, 1.385223388671875, 1.43585205078125, 1.486480712890625, 1.537109375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 3.0, 3.0, 5.0, 7.0, 7.0, 8.0, 10.0, 9.0, 19.0, 29.0, 19.0, 37.0, 44.0, 52.0, 50.0, 77.0, 105.0, 96.0, 74.0, 62.0, 68.0, 44.0, 36.0, 19.0, 24.0, 16.0, 14.0, 19.0, 12.0, 7.0, 5.0, 8.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.1728515625, -1.13238525390625, -1.0919189453125, -1.05145263671875, -1.010986328125, -0.97052001953125, -0.9300537109375, -0.88958740234375, -0.84912109375, -0.80865478515625, -0.7681884765625, -0.72772216796875, -0.687255859375, -0.64678955078125, -0.6063232421875, -0.56585693359375, -0.525390625, -0.48492431640625, -0.4444580078125, -0.40399169921875, -0.363525390625, -0.32305908203125, -0.2825927734375, -0.24212646484375, -0.20166015625, -0.16119384765625, -0.1207275390625, -0.08026123046875, -0.039794921875, 0.00067138671875, 0.0411376953125, 0.08160400390625, 0.1220703125, 0.16253662109375, 0.2030029296875, 0.24346923828125, 0.283935546875, 0.32440185546875, 0.3648681640625, 0.40533447265625, 0.44580078125, 0.48626708984375, 0.5267333984375, 0.56719970703125, 0.607666015625, 0.64813232421875, 0.6885986328125, 0.72906494140625, 0.76953125, 0.80999755859375, 0.8504638671875, 0.89093017578125, 0.931396484375, 0.97186279296875, 1.0123291015625, 1.05279541015625, 1.09326171875, 1.13372802734375, 1.1741943359375, 1.21466064453125, 1.255126953125, 1.29559326171875, 1.3360595703125, 1.37652587890625, 1.4169921875]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 2.0, 4.0, 21.0, 45.0, 107.0, 384.0, 278.0, 101.0, 37.0, 15.0, 4.0, 2.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-86.36686706542969, -84.25753021240234, -82.148193359375, -80.03885650634766, -77.92951965332031, -75.82018280029297, -73.71084594726562, -71.60150146484375, -69.49217224121094, -67.3828353881836, -65.27349853515625, -63.164161682128906, -61.05482482910156, -58.94548797607422, -56.83614730834961, -54.726810455322266, -52.617469787597656, -50.50813293457031, -48.39879608154297, -46.289459228515625, -44.18012237548828, -42.07078552246094, -39.96144485473633, -37.852108001708984, -35.74277114868164, -33.6334342956543, -31.524097442626953, -29.414758682250977, -27.305421829223633, -25.19608497619629, -23.086746215820312, -20.97740936279297, -18.868072509765625, -16.75873565673828, -14.649397850036621, -12.540060043334961, -10.430723190307617, -8.321386337280273, -6.212048530578613, -4.102710723876953, -1.9933738708496094, 0.11596345901489258, 2.2253007888793945, 4.3346381187438965, 6.443975448608398, 8.553312301635742, 10.662650108337402, 12.771987915039062, 14.881324768066406, 16.99066162109375, 19.099998474121094, 21.20933723449707, 23.318674087524414, 25.428010940551758, 27.537349700927734, 29.646686553955078, 31.756023406982422, 33.865360260009766, 35.97469711303711, 38.08403396606445, 40.19337463378906, 42.302711486816406, 44.41204833984375, 46.521385192871094, 48.63072204589844]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 3.0, 8.0, 11.0, 15.0, 9.0, 7.0, 14.0, 15.0, 21.0, 25.0, 24.0, 31.0, 28.0, 41.0, 45.0, 55.0, 131.0, 157.0, 65.0, 32.0, 35.0, 33.0, 33.0, 17.0, 27.0, 24.0, 14.0, 13.0, 12.0, 12.0, 10.0, 9.0, 6.0, 2.0, 4.0, 6.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-33.65927505493164, -32.641571044921875, -31.623870849609375, -30.606168746948242, -29.58846664428711, -28.570762634277344, -27.55306053161621, -26.535358428955078, -25.517656326293945, -24.499954223632812, -23.48225212097168, -22.464550018310547, -21.44684600830078, -20.42914581298828, -19.411441802978516, -18.393739700317383, -17.37603759765625, -16.358335494995117, -15.340633392333984, -14.322930335998535, -13.305228233337402, -12.28752613067627, -11.26982307434082, -10.252120971679688, -9.234418869018555, -8.216716766357422, -7.199014186859131, -6.18131160736084, -5.163609504699707, -4.145907402038574, -3.128204822540283, -2.110502243041992, -1.0927982330322266, -0.07509589195251465, 0.9426064491271973, 1.9603087902069092, 2.978011131286621, 3.995713233947754, 5.013415813446045, 6.031118392944336, 7.048820495605469, 8.066522598266602, 9.084224700927734, 10.101927757263184, 11.119629859924316, 12.13733196258545, 13.155035018920898, 14.172737121582031, 15.190439224243164, 16.208141326904297, 17.22584342956543, 18.243545532226562, 19.261249542236328, 20.278949737548828, 21.296653747558594, 22.314355850219727, 23.33205795288086, 24.349760055541992, 25.367462158203125, 26.385164260864258, 27.40286636352539, 28.420570373535156, 29.43827247619629, 30.455974578857422, 31.473676681518555]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 0.0, 4.0, 4.0, 9.0, 11.0, 14.0, 14.0, 20.0, 34.0, 46.0, 55.0, 76.0, 119.0, 171.0, 207.0, 331.0, 501.0, 704.0, 1175.0, 1766.0, 3249.0, 6118.0, 12939.0, 30286.0, 84066.0, 246068.0, 640854.0, 1218764.0, 1101075.0, 525050.0, 198834.0, 70403.0, 26405.0, 11622.0, 5553.0, 2816.0, 1728.0, 1107.0, 672.0, 455.0, 283.0, 206.0, 137.0, 84.0, 75.0, 33.0, 40.0, 31.0, 15.0, 17.0, 14.0, 10.0, 3.0, 10.0, 5.0, 2.0, 3.0], "bins": [-2.099609375, -2.0394439697265625, -1.979278564453125, -1.9191131591796875, -1.85894775390625, -1.7987823486328125, -1.738616943359375, -1.6784515380859375, -1.6182861328125, -1.5581207275390625, -1.497955322265625, -1.4377899169921875, -1.37762451171875, -1.3174591064453125, -1.257293701171875, -1.1971282958984375, -1.136962890625, -1.0767974853515625, -1.016632080078125, -0.9564666748046875, -0.89630126953125, -0.8361358642578125, -0.775970458984375, -0.7158050537109375, -0.6556396484375, -0.5954742431640625, -0.535308837890625, -0.4751434326171875, -0.41497802734375, -0.3548126220703125, -0.294647216796875, -0.2344818115234375, -0.17431640625, -0.1141510009765625, -0.053985595703125, 0.0061798095703125, 0.06634521484375, 0.1265106201171875, 0.186676025390625, 0.2468414306640625, 0.3070068359375, 0.3671722412109375, 0.427337646484375, 0.4875030517578125, 0.54766845703125, 0.6078338623046875, 0.667999267578125, 0.7281646728515625, 0.788330078125, 0.8484954833984375, 0.908660888671875, 0.9688262939453125, 1.02899169921875, 1.0891571044921875, 1.149322509765625, 1.2094879150390625, 1.2696533203125, 1.3298187255859375, 1.389984130859375, 1.4501495361328125, 1.51031494140625, 1.5704803466796875, 1.630645751953125, 1.6908111572265625, 1.7509765625]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 2.0, 7.0, 11.0, 9.0, 7.0, 14.0, 11.0, 20.0, 15.0, 28.0, 31.0, 37.0, 41.0, 46.0, 46.0, 59.0, 53.0, 39.0, 45.0, 58.0, 52.0, 48.0, 52.0, 41.0, 39.0, 33.0, 27.0, 20.0, 26.0, 19.0, 17.0, 15.0, 9.0, 7.0, 4.0, 4.0, 4.0, 6.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-2.71875, -2.6495208740234375, -2.580291748046875, -2.5110626220703125, -2.44183349609375, -2.3726043701171875, -2.303375244140625, -2.2341461181640625, -2.1649169921875, -2.0956878662109375, -2.026458740234375, -1.9572296142578125, -1.88800048828125, -1.8187713623046875, -1.749542236328125, -1.6803131103515625, -1.611083984375, -1.5418548583984375, -1.472625732421875, -1.4033966064453125, -1.33416748046875, -1.2649383544921875, -1.195709228515625, -1.1264801025390625, -1.0572509765625, -0.9880218505859375, -0.918792724609375, -0.8495635986328125, -0.78033447265625, -0.7111053466796875, -0.641876220703125, -0.5726470947265625, -0.50341796875, -0.4341888427734375, -0.364959716796875, -0.2957305908203125, -0.22650146484375, -0.1572723388671875, -0.088043212890625, -0.0188140869140625, 0.0504150390625, 0.1196441650390625, 0.188873291015625, 0.2581024169921875, 0.32733154296875, 0.3965606689453125, 0.465789794921875, 0.5350189208984375, 0.604248046875, 0.6734771728515625, 0.742706298828125, 0.8119354248046875, 0.88116455078125, 0.9503936767578125, 1.019622802734375, 1.0888519287109375, 1.1580810546875, 1.2273101806640625, 1.296539306640625, 1.3657684326171875, 1.43499755859375, 1.5042266845703125, 1.573455810546875, 1.6426849365234375, 1.7119140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 2.0, 9.0, 15.0, 21.0, 37.0, 55.0, 129.0, 182.0, 438.0, 965.0, 2824.0, 18566.0, 3732975.0, 427862.0, 7130.0, 1668.0, 729.0, 325.0, 147.0, 78.0, 56.0, 29.0, 14.0, 10.0, 11.0, 3.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.8515625, -11.425537109375, -10.99951171875, -10.573486328125, -10.1474609375, -9.721435546875, -9.29541015625, -8.869384765625, -8.443359375, -8.017333984375, -7.59130859375, -7.165283203125, -6.7392578125, -6.313232421875, -5.88720703125, -5.461181640625, -5.03515625, -4.609130859375, -4.18310546875, -3.757080078125, -3.3310546875, -2.905029296875, -2.47900390625, -2.052978515625, -1.626953125, -1.200927734375, -0.77490234375, -0.348876953125, 0.0771484375, 0.503173828125, 0.92919921875, 1.355224609375, 1.78125, 2.207275390625, 2.63330078125, 3.059326171875, 3.4853515625, 3.911376953125, 4.33740234375, 4.763427734375, 5.189453125, 5.615478515625, 6.04150390625, 6.467529296875, 6.8935546875, 7.319580078125, 7.74560546875, 8.171630859375, 8.59765625, 9.023681640625, 9.44970703125, 9.875732421875, 10.3017578125, 10.727783203125, 11.15380859375, 11.579833984375, 12.005859375, 12.431884765625, 12.85791015625, 13.283935546875, 13.7099609375, 14.135986328125, 14.56201171875, 14.988037109375, 15.4140625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 2.0, 3.0, 4.0, 6.0, 4.0, 8.0, 5.0, 13.0, 28.0, 32.0, 55.0, 83.0, 121.0, 175.0, 265.0, 366.0, 470.0, 567.0, 545.0, 439.0, 289.0, 204.0, 124.0, 91.0, 63.0, 41.0, 37.0, 17.0, 9.0, 6.0, 2.0, 5.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.64453125, -4.43145751953125, -4.2183837890625, -4.00531005859375, -3.792236328125, -3.57916259765625, -3.3660888671875, -3.15301513671875, -2.93994140625, -2.72686767578125, -2.5137939453125, -2.30072021484375, -2.087646484375, -1.87457275390625, -1.6614990234375, -1.44842529296875, -1.2353515625, -1.02227783203125, -0.8092041015625, -0.59613037109375, -0.383056640625, -0.16998291015625, 0.0430908203125, 0.25616455078125, 0.46923828125, 0.68231201171875, 0.8953857421875, 1.10845947265625, 1.321533203125, 1.53460693359375, 1.7476806640625, 1.96075439453125, 2.173828125, 2.38690185546875, 2.5999755859375, 2.81304931640625, 3.026123046875, 3.23919677734375, 3.4522705078125, 3.66534423828125, 3.87841796875, 4.09149169921875, 4.3045654296875, 4.51763916015625, 4.730712890625, 4.94378662109375, 5.1568603515625, 5.36993408203125, 5.5830078125, 5.79608154296875, 6.0091552734375, 6.22222900390625, 6.435302734375, 6.64837646484375, 6.8614501953125, 7.07452392578125, 7.28759765625, 7.50067138671875, 7.7137451171875, 7.92681884765625, 8.139892578125, 8.35296630859375, 8.5660400390625, 8.77911376953125, 8.9921875]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 9.0, 8.0, 24.0, 49.0, 145.0, 288.0, 266.0, 112.0, 53.0, 20.0, 9.0, 4.0, 4.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-164.75033569335938, -161.0089111328125, -157.26747131347656, -153.5260467529297, -149.78460693359375, -146.04318237304688, -142.3017578125, -138.56031799316406, -134.8188934326172, -131.0774688720703, -127.33602905273438, -123.5946044921875, -119.8531723022461, -116.11174011230469, -112.37030792236328, -108.62887573242188, -104.88744354248047, -101.14601135253906, -97.40457916259766, -93.66314697265625, -89.92172241210938, -86.18029022216797, -82.43885803222656, -78.69742584228516, -74.95599365234375, -71.21456146240234, -67.47312927246094, -63.7317008972168, -59.990272521972656, -56.24884033203125, -52.507408142089844, -48.76597595214844, -45.02455139160156, -41.283119201660156, -37.541690826416016, -33.80025863647461, -30.058828353881836, -26.317398071289062, -22.575965881347656, -18.834535598754883, -15.09310531616211, -11.351675033569336, -7.610243797302246, -3.8688125610351562, -0.1273822784423828, 3.6140480041503906, 7.355480194091797, 11.09691047668457, 14.838340759277344, 18.579771041870117, 22.32120132446289, 26.062633514404297, 29.80406379699707, 33.545494079589844, 37.28692626953125, 41.028358459472656, 44.7697868347168, 48.5112190246582, 52.252647399902344, 55.99407958984375, 59.735511779785156, 63.4769401550293, 67.21836853027344, 70.95980072021484, 74.70123291015625]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 2.0, 3.0, 7.0, 10.0, 10.0, 11.0, 20.0, 26.0, 27.0, 33.0, 34.0, 40.0, 39.0, 40.0, 51.0, 51.0, 68.0, 61.0, 44.0, 45.0, 46.0, 54.0, 34.0, 33.0, 29.0, 32.0, 29.0, 19.0, 19.0, 13.0, 22.0, 11.0, 10.0, 5.0, 7.0, 7.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-33.44635772705078, -32.323265075683594, -31.200172424316406, -30.07707977294922, -28.95398712158203, -27.830894470214844, -26.70780372619629, -25.5847110748291, -24.461618423461914, -23.338525772094727, -22.21543312072754, -21.09234046936035, -19.969249725341797, -18.84615707397461, -17.723064422607422, -16.599971771240234, -15.476879119873047, -14.35378646850586, -13.230693817138672, -12.1076021194458, -10.984509468078613, -9.861416816711426, -8.738325119018555, -7.615232467651367, -6.49213981628418, -5.369047164916992, -4.245954990386963, -3.1228625774383545, -1.999770164489746, -0.8766775131225586, 0.2464146614074707, 1.3695068359375, 2.4925994873046875, 3.615691900253296, 4.738784313201904, 5.861876487731934, 6.984969139099121, 8.108061790466309, 9.23115348815918, 10.354246139526367, 11.477338790893555, 12.600431442260742, 13.72352409362793, 14.8466157913208, 15.969708442687988, 17.09280014038086, 18.215892791748047, 19.338985443115234, 20.462078094482422, 21.58517074584961, 22.708263397216797, 23.831356048583984, 24.954448699951172, 26.07754135131836, 27.200632095336914, 28.3237247467041, 29.44681739807129, 30.569910049438477, 31.693002700805664, 32.81609344482422, 33.939186096191406, 35.062278747558594, 36.18537139892578, 37.30846405029297, 38.431556701660156]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 14.0, 16.0, 14.0, 18.0, 35.0, 45.0, 77.0, 115.0, 179.0, 295.0, 638.0, 1190.0, 2313.0, 5169.0, 13390.0, 39746.0, 150191.0, 593470.0, 172259.0, 43957.0, 14446.0, 5642.0, 2535.0, 1238.0, 675.0, 345.0, 217.0, 111.0, 73.0, 44.0, 31.0, 14.0, 13.0, 12.0, 8.0, 8.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-4.4296875, -4.30145263671875, -4.1732177734375, -4.04498291015625, -3.916748046875, -3.78851318359375, -3.6602783203125, -3.53204345703125, -3.40380859375, -3.27557373046875, -3.1473388671875, -3.01910400390625, -2.890869140625, -2.76263427734375, -2.6343994140625, -2.50616455078125, -2.3779296875, -2.24969482421875, -2.1214599609375, -1.99322509765625, -1.864990234375, -1.73675537109375, -1.6085205078125, -1.48028564453125, -1.35205078125, -1.22381591796875, -1.0955810546875, -0.96734619140625, -0.839111328125, -0.71087646484375, -0.5826416015625, -0.45440673828125, -0.326171875, -0.19793701171875, -0.0697021484375, 0.05853271484375, 0.186767578125, 0.31500244140625, 0.4432373046875, 0.57147216796875, 0.69970703125, 0.82794189453125, 0.9561767578125, 1.08441162109375, 1.212646484375, 1.34088134765625, 1.4691162109375, 1.59735107421875, 1.7255859375, 1.85382080078125, 1.9820556640625, 2.11029052734375, 2.238525390625, 2.36676025390625, 2.4949951171875, 2.62322998046875, 2.75146484375, 2.87969970703125, 3.0079345703125, 3.13616943359375, 3.264404296875, 3.39263916015625, 3.5208740234375, 3.64910888671875, 3.77734375]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 6.0, 3.0, 13.0, 21.0, 21.0, 16.0, 40.0, 39.0, 67.0, 50.0, 65.0, 77.0, 85.0, 67.0, 77.0, 76.0, 53.0, 57.0, 43.0, 35.0, 19.0, 29.0, 18.0, 14.0, 6.0, 5.0, 5.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.466796875, -3.35174560546875, -3.2366943359375, -3.12164306640625, -3.006591796875, -2.89154052734375, -2.7764892578125, -2.66143798828125, -2.54638671875, -2.43133544921875, -2.3162841796875, -2.20123291015625, -2.086181640625, -1.97113037109375, -1.8560791015625, -1.74102783203125, -1.6259765625, -1.51092529296875, -1.3958740234375, -1.28082275390625, -1.165771484375, -1.05072021484375, -0.9356689453125, -0.82061767578125, -0.70556640625, -0.59051513671875, -0.4754638671875, -0.36041259765625, -0.245361328125, -0.13031005859375, -0.0152587890625, 0.09979248046875, 0.21484375, 0.32989501953125, 0.4449462890625, 0.55999755859375, 0.675048828125, 0.79010009765625, 0.9051513671875, 1.02020263671875, 1.13525390625, 1.25030517578125, 1.3653564453125, 1.48040771484375, 1.595458984375, 1.71051025390625, 1.8255615234375, 1.94061279296875, 2.0556640625, 2.17071533203125, 2.2857666015625, 2.40081787109375, 2.515869140625, 2.63092041015625, 2.7459716796875, 2.86102294921875, 2.97607421875, 3.09112548828125, 3.2061767578125, 3.32122802734375, 3.436279296875, 3.55133056640625, 3.6663818359375, 3.78143310546875, 3.896484375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 7.0, 7.0, 4.0, 6.0, 11.0, 20.0, 30.0, 26.0, 45.0, 70.0, 90.0, 126.0, 221.0, 392.0, 858.0, 2014.0, 7679.0, 56057.0, 899784.0, 68423.0, 8768.0, 2168.0, 812.0, 382.0, 212.0, 120.0, 58.0, 55.0, 32.0, 23.0, 13.0, 9.0, 10.0, 6.0, 6.0, 5.0, 4.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.30078125, -6.10101318359375, -5.9012451171875, -5.70147705078125, -5.501708984375, -5.30194091796875, -5.1021728515625, -4.90240478515625, -4.70263671875, -4.50286865234375, -4.3031005859375, -4.10333251953125, -3.903564453125, -3.70379638671875, -3.5040283203125, -3.30426025390625, -3.1044921875, -2.90472412109375, -2.7049560546875, -2.50518798828125, -2.305419921875, -2.10565185546875, -1.9058837890625, -1.70611572265625, -1.50634765625, -1.30657958984375, -1.1068115234375, -0.90704345703125, -0.707275390625, -0.50750732421875, -0.3077392578125, -0.10797119140625, 0.091796875, 0.29156494140625, 0.4913330078125, 0.69110107421875, 0.890869140625, 1.09063720703125, 1.2904052734375, 1.49017333984375, 1.68994140625, 1.88970947265625, 2.0894775390625, 2.28924560546875, 2.489013671875, 2.68878173828125, 2.8885498046875, 3.08831787109375, 3.2880859375, 3.48785400390625, 3.6876220703125, 3.88739013671875, 4.087158203125, 4.28692626953125, 4.4866943359375, 4.68646240234375, 4.88623046875, 5.08599853515625, 5.2857666015625, 5.48553466796875, 5.685302734375, 5.88507080078125, 6.0848388671875, 6.28460693359375, 6.484375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 3.0, 2.0, 7.0, 3.0, 4.0, 7.0, 4.0, 14.0, 2.0, 12.0, 9.0, 16.0, 15.0, 18.0, 29.0, 18.0, 27.0, 26.0, 39.0, 51.0, 52.0, 61.0, 74.0, 77.0, 64.0, 65.0, 53.0, 50.0, 37.0, 35.0, 23.0, 20.0, 10.0, 14.0, 9.0, 12.0, 7.0, 5.0, 7.0, 5.0, 7.0, 2.0, 3.0, 5.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.8828125, -8.591064453125, -8.29931640625, -8.007568359375, -7.7158203125, -7.424072265625, -7.13232421875, -6.840576171875, -6.548828125, -6.257080078125, -5.96533203125, -5.673583984375, -5.3818359375, -5.090087890625, -4.79833984375, -4.506591796875, -4.21484375, -3.923095703125, -3.63134765625, -3.339599609375, -3.0478515625, -2.756103515625, -2.46435546875, -2.172607421875, -1.880859375, -1.589111328125, -1.29736328125, -1.005615234375, -0.7138671875, -0.422119140625, -0.13037109375, 0.161376953125, 0.453125, 0.744873046875, 1.03662109375, 1.328369140625, 1.6201171875, 1.911865234375, 2.20361328125, 2.495361328125, 2.787109375, 3.078857421875, 3.37060546875, 3.662353515625, 3.9541015625, 4.245849609375, 4.53759765625, 4.829345703125, 5.12109375, 5.412841796875, 5.70458984375, 5.996337890625, 6.2880859375, 6.579833984375, 6.87158203125, 7.163330078125, 7.455078125, 7.746826171875, 8.03857421875, 8.330322265625, 8.6220703125, 8.913818359375, 9.20556640625, 9.497314453125, 9.7890625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 5.0, 12.0, 16.0, 24.0, 31.0, 44.0, 53.0, 78.0, 126.0, 172.0, 293.0, 507.0, 1004.0, 2065.0, 4888.0, 13962.0, 52763.0, 816453.0, 119711.0, 22745.0, 7507.0, 2954.0, 1369.0, 690.0, 386.0, 214.0, 151.0, 88.0, 69.0, 45.0, 28.0, 26.0, 19.0, 6.0, 11.0, 5.0, 6.0, 7.0, 5.0, 2.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.001953125, -0.97222900390625, -0.9425048828125, -0.91278076171875, -0.883056640625, -0.85333251953125, -0.8236083984375, -0.79388427734375, -0.76416015625, -0.73443603515625, -0.7047119140625, -0.67498779296875, -0.645263671875, -0.61553955078125, -0.5858154296875, -0.55609130859375, -0.5263671875, -0.49664306640625, -0.4669189453125, -0.43719482421875, -0.407470703125, -0.37774658203125, -0.3480224609375, -0.31829833984375, -0.28857421875, -0.25885009765625, -0.2291259765625, -0.19940185546875, -0.169677734375, -0.13995361328125, -0.1102294921875, -0.08050537109375, -0.05078125, -0.02105712890625, 0.0086669921875, 0.03839111328125, 0.068115234375, 0.09783935546875, 0.1275634765625, 0.15728759765625, 0.18701171875, 0.21673583984375, 0.2464599609375, 0.27618408203125, 0.305908203125, 0.33563232421875, 0.3653564453125, 0.39508056640625, 0.4248046875, 0.45452880859375, 0.4842529296875, 0.51397705078125, 0.543701171875, 0.57342529296875, 0.6031494140625, 0.63287353515625, 0.66259765625, 0.69232177734375, 0.7220458984375, 0.75177001953125, 0.781494140625, 0.81121826171875, 0.8409423828125, 0.87066650390625, 0.900390625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 5.0, 7.0, 7.0, 12.0, 15.0, 17.0, 29.0, 26.0, 34.0, 38.0, 61.0, 91.0, 133.0, 122.0, 109.0, 71.0, 55.0, 41.0, 33.0, 21.0, 20.0, 11.0, 11.0, 8.0, 9.0, 6.0, 5.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0005426406860351562, -0.0005280636250972748, -0.0005134865641593933, -0.0004989095032215118, -0.00048433244228363037, -0.0004697553813457489, -0.00045517832040786743, -0.00044060125946998596, -0.0004260241985321045, -0.000411447137594223, -0.00039687007665634155, -0.0003822930157184601, -0.0003677159547805786, -0.00035313889384269714, -0.0003385618329048157, -0.0003239847719669342, -0.00030940771102905273, -0.00029483065009117126, -0.0002802535891532898, -0.0002656765282154083, -0.00025109946727752686, -0.00023652240633964539, -0.00022194534540176392, -0.00020736828446388245, -0.00019279122352600098, -0.0001782141625881195, -0.00016363710165023804, -0.00014906004071235657, -0.0001344829797744751, -0.00011990591883659363, -0.00010532885789871216, -9.075179696083069e-05, -7.617473602294922e-05, -6.159767508506775e-05, -4.702061414718628e-05, -3.244355320930481e-05, -1.786649227142334e-05, -3.28943133354187e-06, 1.12876296043396e-05, 2.586469054222107e-05, 4.044175148010254e-05, 5.501881241798401e-05, 6.959587335586548e-05, 8.417293429374695e-05, 9.874999523162842e-05, 0.00011332705616950989, 0.00012790411710739136, 0.00014248117804527283, 0.0001570582389831543, 0.00017163529992103577, 0.00018621236085891724, 0.0002007894217967987, 0.00021536648273468018, 0.00022994354367256165, 0.0002445206046104431, 0.0002590976655483246, 0.00027367472648620605, 0.0002882517874240875, 0.000302828848361969, 0.00031740590929985046, 0.00033198297023773193, 0.0003465600311756134, 0.0003611370921134949, 0.00037571415305137634, 0.0003902912139892578]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 10.0, 4.0, 11.0, 15.0, 18.0, 26.0, 21.0, 58.0, 84.0, 109.0, 169.0, 264.0, 448.0, 714.0, 1296.0, 2518.0, 5460.0, 12868.0, 36071.0, 161801.0, 732239.0, 60105.0, 19017.0, 7596.0, 3470.0, 1772.0, 949.0, 543.0, 303.0, 183.0, 127.0, 90.0, 50.0, 49.0, 30.0, 22.0, 18.0, 5.0, 9.0, 6.0, 8.0, 0.0, 5.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.78955078125, -0.7683258056640625, -0.747100830078125, -0.7258758544921875, -0.70465087890625, -0.6834259033203125, -0.662200927734375, -0.6409759521484375, -0.6197509765625, -0.5985260009765625, -0.577301025390625, -0.5560760498046875, -0.53485107421875, -0.5136260986328125, -0.492401123046875, -0.4711761474609375, -0.449951171875, -0.4287261962890625, -0.407501220703125, -0.3862762451171875, -0.36505126953125, -0.3438262939453125, -0.322601318359375, -0.3013763427734375, -0.2801513671875, -0.2589263916015625, -0.237701416015625, -0.2164764404296875, -0.19525146484375, -0.1740264892578125, -0.152801513671875, -0.1315765380859375, -0.1103515625, -0.0891265869140625, -0.067901611328125, -0.0466766357421875, -0.02545166015625, -0.0042266845703125, 0.016998291015625, 0.0382232666015625, 0.0594482421875, 0.0806732177734375, 0.101898193359375, 0.1231231689453125, 0.14434814453125, 0.1655731201171875, 0.186798095703125, 0.2080230712890625, 0.229248046875, 0.2504730224609375, 0.271697998046875, 0.2929229736328125, 0.31414794921875, 0.3353729248046875, 0.356597900390625, 0.3778228759765625, 0.3990478515625, 0.4202728271484375, 0.441497802734375, 0.4627227783203125, 0.48394775390625, 0.5051727294921875, 0.526397705078125, 0.5476226806640625, 0.56884765625]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 9.0, 8.0, 6.0, 9.0, 9.0, 14.0, 17.0, 25.0, 46.0, 51.0, 83.0, 91.0, 128.0, 142.0, 93.0, 71.0, 41.0, 24.0, 35.0, 20.0, 13.0, 11.0, 12.0, 12.0, 5.0, 6.0, 2.0, 6.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.3447265625, -1.3036956787109375, -1.262664794921875, -1.2216339111328125, -1.18060302734375, -1.1395721435546875, -1.098541259765625, -1.0575103759765625, -1.0164794921875, -0.9754486083984375, -0.934417724609375, -0.8933868408203125, -0.85235595703125, -0.8113250732421875, -0.770294189453125, -0.7292633056640625, -0.688232421875, -0.6472015380859375, -0.606170654296875, -0.5651397705078125, -0.52410888671875, -0.4830780029296875, -0.442047119140625, -0.4010162353515625, -0.3599853515625, -0.3189544677734375, -0.277923583984375, -0.2368927001953125, -0.19586181640625, -0.1548309326171875, -0.113800048828125, -0.0727691650390625, -0.03173828125, 0.0092926025390625, 0.050323486328125, 0.0913543701171875, 0.13238525390625, 0.1734161376953125, 0.214447021484375, 0.2554779052734375, 0.2965087890625, 0.3375396728515625, 0.378570556640625, 0.4196014404296875, 0.46063232421875, 0.5016632080078125, 0.542694091796875, 0.5837249755859375, 0.624755859375, 0.6657867431640625, 0.706817626953125, 0.7478485107421875, 0.78887939453125, 0.8299102783203125, 0.870941162109375, 0.9119720458984375, 0.9530029296875, 0.9940338134765625, 1.035064697265625, 1.0760955810546875, 1.11712646484375, 1.1581573486328125, 1.199188232421875, 1.2402191162109375, 1.28125]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 3.0, 8.0, 9.0, 15.0, 23.0, 39.0, 54.0, 98.0, 498.0, 113.0, 61.0, 34.0, 16.0, 9.0, 3.0, 5.0, 2.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.201515197753906, -41.98759841918945, -40.773681640625, -39.55976867675781, -38.34585189819336, -37.131935119628906, -35.91801834106445, -34.7041015625, -33.49018859863281, -32.27627182006836, -31.06235694885254, -29.848440170288086, -28.634525299072266, -27.420608520507812, -26.20669174194336, -24.992774963378906, -23.778858184814453, -22.56494140625, -21.35102653503418, -20.137109756469727, -18.923194885253906, -17.709278106689453, -16.495361328125, -15.281445503234863, -14.067529678344727, -12.85361385345459, -11.639698028564453, -10.42578125, -9.211865425109863, -7.997949600219727, -6.784033298492432, -5.570116996765137, -4.356201171875, -3.142285108566284, -1.9283690452575684, -0.7144529819488525, 0.4994630813598633, 1.71337890625, 2.927295207977295, 4.14121150970459, 5.355127334594727, 6.569043159484863, 7.782959461212158, 8.996875762939453, 10.21079158782959, 11.424707412719727, 12.63862419128418, 13.852540016174316, 15.066455841064453, 16.280372619628906, 17.494287490844727, 18.70820426940918, 19.922119140625, 21.136035919189453, 22.349952697753906, 23.56386947631836, 24.77778434753418, 25.991701126098633, 27.205615997314453, 28.419532775878906, 29.63344955444336, 30.84736442565918, 32.061279296875, 33.27519607543945, 34.489112854003906]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 5.0, 1.0, 2.0, 5.0, 13.0, 7.0, 6.0, 5.0, 10.0, 8.0, 11.0, 16.0, 20.0, 21.0, 21.0, 29.0, 24.0, 19.0, 30.0, 72.0, 258.0, 159.0, 42.0, 25.0, 28.0, 20.0, 22.0, 13.0, 17.0, 15.0, 12.0, 11.0, 10.0, 5.0, 9.0, 6.0, 2.0, 6.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.954437255859375, -21.240957260131836, -20.52747917175293, -19.81399917602539, -19.10051918029785, -18.387041091918945, -17.673561096191406, -16.9600830078125, -16.24660301208496, -15.533123970031738, -14.8196439743042, -14.106164932250977, -13.392685890197754, -12.679206848144531, -11.965726852416992, -11.25224781036377, -10.53876781463623, -9.825288772583008, -9.111808776855469, -8.398329734802246, -7.684850692749023, -6.971371173858643, -6.257891654968262, -5.544412612915039, -4.830933094024658, -4.117453575134277, -3.4039745330810547, -2.690495014190674, -1.977015733718872, -1.2635364532470703, -0.5500569343566895, 0.1634221076965332, 0.8769016265869141, 1.5903809070587158, 2.3038601875305176, 3.0173397064208984, 3.7308189868927, 4.444298267364502, 5.157777786254883, 5.8712568283081055, 6.584736347198486, 7.298215866088867, 8.01169490814209, 8.725173950195312, 9.438653945922852, 10.152132987976074, 10.865612030029297, 11.579092025756836, 12.292571067810059, 13.006050109863281, 13.71953010559082, 14.433009147644043, 15.146488189697266, 15.859968185424805, 16.573448181152344, 17.28692626953125, 18.00040626525879, 18.713886260986328, 19.427364349365234, 20.140844345092773, 20.854324340820312, 21.56780242919922, 22.281282424926758, 22.994762420654297, 23.708240509033203]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 4.0, 4.0, 5.0, 9.0, 16.0, 13.0, 15.0, 12.0, 26.0, 17.0, 23.0, 31.0, 34.0, 48.0, 132.0, 245.0, 88.0, 45.0, 36.0, 33.0, 29.0, 18.0, 25.0, 22.0, 12.0, 9.0, 10.0, 14.0, 4.0, 5.0, 5.0, 6.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.486328125, -2.407073974609375, -2.32781982421875, -2.248565673828125, -2.1693115234375, -2.090057373046875, -2.01080322265625, -1.931549072265625, -1.852294921875, -1.773040771484375, -1.69378662109375, -1.614532470703125, -1.5352783203125, -1.456024169921875, -1.37677001953125, -1.297515869140625, -1.21826171875, -1.139007568359375, -1.05975341796875, -0.980499267578125, -0.9012451171875, -0.821990966796875, -0.74273681640625, -0.663482666015625, -0.584228515625, -0.504974365234375, -0.42572021484375, -0.346466064453125, -0.2672119140625, -0.187957763671875, -0.10870361328125, -0.029449462890625, 0.0498046875, 0.129058837890625, 0.20831298828125, 0.287567138671875, 0.3668212890625, 0.446075439453125, 0.52532958984375, 0.604583740234375, 0.683837890625, 0.763092041015625, 0.84234619140625, 0.921600341796875, 1.0008544921875, 1.080108642578125, 1.15936279296875, 1.238616943359375, 1.31787109375, 1.397125244140625, 1.47637939453125, 1.555633544921875, 1.6348876953125, 1.714141845703125, 1.79339599609375, 1.872650146484375, 1.951904296875, 2.031158447265625, 2.11041259765625, 2.189666748046875, 2.2689208984375, 2.348175048828125, 2.42742919921875, 2.506683349609375, 2.5859375]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 5.0, 4.0, 5.0, 2.0, 7.0, 13.0, 33.0, 24.0, 30.0, 58.0, 49.0, 113.0, 225.0, 635.0, 2216.0, 15102.0, 8354459.0, 12634.0, 1943.0, 519.0, 225.0, 104.0, 43.0, 21.0, 22.0, 20.0, 20.0, 7.0, 5.0, 10.0, 5.0, 4.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0], "bins": [-29.67580223083496, -28.761919021606445, -27.84803581237793, -26.934152603149414, -26.0202693939209, -25.106386184692383, -24.192502975463867, -23.27861976623535, -22.364736557006836, -21.45085334777832, -20.536970138549805, -19.62308692932129, -18.709203720092773, -17.795320510864258, -16.881437301635742, -15.967554092407227, -15.053671836853027, -14.139788627624512, -13.225905418395996, -12.31202220916748, -11.398138999938965, -10.484256744384766, -9.57037353515625, -8.656490325927734, -7.7426066398620605, -6.828723430633545, -5.914840221405029, -5.000957489013672, -4.087074279785156, -3.1731910705566406, -2.259307861328125, -1.3454246520996094, -0.43154144287109375, 0.4823417067527771, 1.396224856376648, 2.310107946395874, 3.2239911556243896, 4.137874126434326, 5.051757335662842, 5.965640544891357, 6.879523754119873, 7.793406963348389, 8.707289695739746, 9.621172904968262, 10.535056114196777, 11.448939323425293, 12.362822532653809, 13.276705741882324, 14.19058895111084, 15.104472160339355, 16.018354415893555, 16.93223762512207, 17.846120834350586, 18.7600040435791, 19.673887252807617, 20.587770462036133, 21.50165367126465, 22.415536880493164, 23.32942008972168, 24.243303298950195, 25.15718650817871, 26.071069717407227, 26.984952926635742, 27.898836135864258, 28.812719345092773]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 5.0, 2.0, 4.0, 3.0, 8.0, 5.0, 1.0, 3.0, 2.0, 3.0, 1.0, 4.0, 4.0, 2.0, 4.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-21.674331665039062, -20.998470306396484, -20.322607040405273, -19.646745681762695, -18.970884323120117, -18.295021057128906, -17.619159698486328, -16.94329833984375, -16.267436981201172, -15.591574668884277, -14.9157133102417, -14.239850997924805, -13.563989639282227, -12.888127326965332, -12.212265014648438, -11.53640365600586, -10.860540390014648, -10.184678077697754, -9.508816719055176, -8.832954406738281, -8.157093048095703, -7.481230735778809, -6.805368423461914, -6.129506587982178, -5.453644752502441, -4.777782917022705, -4.101921081542969, -3.426058769226074, -2.750196933746338, -2.0743350982666016, -1.398472785949707, -0.7226109504699707, -0.04674720764160156, 0.6291147470474243, 1.3049767017364502, 1.9808387756347656, 2.656700611114502, 3.3325624465942383, 4.008424758911133, 4.684286594390869, 5.3601484298706055, 6.036010265350342, 6.711872100830078, 7.387734413146973, 8.063596725463867, 8.739458084106445, 9.41532039642334, 10.091182708740234, 10.767044067382812, 11.442906379699707, 12.118767738342285, 12.79463005065918, 13.470491409301758, 14.146353721618652, 14.822216033935547, 15.498077392578125, 16.173938751220703, 16.84980010986328, 17.525663375854492, 18.20152473449707, 18.87738609313965, 19.55324935913086, 20.229110717773438, 20.904972076416016, 21.580835342407227]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 9.0, 5.0, 8.0, 11.0, 12.0, 8.0, 19.0, 37.0, 30.0, 39.0, 80.0, 96.0, 143.0, 216.0, 375.0, 767.0, 1754.0, 4839.0, 13939.0, 42735.0, 140101.0, 207724.0, 75123.0, 23374.0, 7757.0, 2671.0, 1080.0, 488.0, 265.0, 165.0, 114.0, 76.0, 50.0, 38.0, 26.0, 29.0, 19.0, 8.0, 15.0, 8.0, 6.0, 4.0, 2.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-20.734375, -20.110595703125, -19.48681640625, -18.863037109375, -18.2392578125, -17.615478515625, -16.99169921875, -16.367919921875, -15.744140625, -15.120361328125, -14.49658203125, -13.872802734375, -13.2490234375, -12.625244140625, -12.00146484375, -11.377685546875, -10.75390625, -10.130126953125, -9.50634765625, -8.882568359375, -8.2587890625, -7.635009765625, -7.01123046875, -6.387451171875, -5.763671875, -5.139892578125, -4.51611328125, -3.892333984375, -3.2685546875, -2.644775390625, -2.02099609375, -1.397216796875, -0.7734375, -0.149658203125, 0.47412109375, 1.097900390625, 1.7216796875, 2.345458984375, 2.96923828125, 3.593017578125, 4.216796875, 4.840576171875, 5.46435546875, 6.088134765625, 6.7119140625, 7.335693359375, 7.95947265625, 8.583251953125, 9.20703125, 9.830810546875, 10.45458984375, 11.078369140625, 11.7021484375, 12.325927734375, 12.94970703125, 13.573486328125, 14.197265625, 14.821044921875, 15.44482421875, 16.068603515625, 16.6923828125, 17.316162109375, 17.93994140625, 18.563720703125, 19.1875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 4.0, 7.0, 7.0, 9.0, 23.0, 22.0, 22.0, 36.0, 56.0, 49.0, 79.0, 84.0, 103.0, 101.0, 90.0, 74.0, 67.0, 42.0, 35.0, 32.0, 16.0, 15.0, 8.0, 7.0, 4.0, 4.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.314453125, -3.196685791015625, -3.07891845703125, -2.961151123046875, -2.8433837890625, -2.725616455078125, -2.60784912109375, -2.490081787109375, -2.372314453125, -2.254547119140625, -2.13677978515625, -2.019012451171875, -1.9012451171875, -1.783477783203125, -1.66571044921875, -1.547943115234375, -1.43017578125, -1.312408447265625, -1.19464111328125, -1.076873779296875, -0.9591064453125, -0.841339111328125, -0.72357177734375, -0.605804443359375, -0.488037109375, -0.370269775390625, -0.25250244140625, -0.134735107421875, -0.0169677734375, 0.100799560546875, 0.21856689453125, 0.336334228515625, 0.4541015625, 0.571868896484375, 0.68963623046875, 0.807403564453125, 0.9251708984375, 1.042938232421875, 1.16070556640625, 1.278472900390625, 1.396240234375, 1.514007568359375, 1.63177490234375, 1.749542236328125, 1.8673095703125, 1.985076904296875, 2.10284423828125, 2.220611572265625, 2.33837890625, 2.456146240234375, 2.57391357421875, 2.691680908203125, 2.8094482421875, 2.927215576171875, 3.04498291015625, 3.162750244140625, 3.280517578125, 3.398284912109375, 3.51605224609375, 3.633819580078125, 3.7515869140625, 3.869354248046875, 3.98712158203125, 4.104888916015625, 4.22265625]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 6.0, 6.0, 5.0, 4.0, 11.0, 14.0, 23.0, 35.0, 60.0, 68.0, 78.0, 57.0, 31.0, 35.0, 16.0, 13.0, 5.0, 5.0, 4.0, 0.0, 1.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.380627632141113, -13.786942481994629, -13.193258285522461, -12.599573135375977, -12.005888938903809, -11.412203788757324, -10.818519592285156, -10.224834442138672, -9.631149291992188, -9.037464141845703, -8.443779945373535, -7.850094795227051, -7.256410598754883, -6.662725448608398, -6.069040775299072, -5.475356101989746, -4.881671905517578, -4.287987232208252, -3.694302558898926, -3.1006176471710205, -2.5069329738616943, -1.9132483005523682, -1.319563388824463, -0.7258787155151367, -0.13219404220581055, 0.4614906907081604, 1.0551754236221313, 1.648860216140747, 2.2425448894500732, 2.8362295627593994, 3.4299144744873047, 4.023599147796631, 4.617284774780273, 5.2109694480896, 5.804654121398926, 6.39833927154541, 6.992023468017578, 7.5857086181640625, 8.179393768310547, 8.773077964782715, 9.366762161254883, 9.960447311401367, 10.554131507873535, 11.14781665802002, 11.741500854492188, 12.335186004638672, 12.928871154785156, 13.522555351257324, 14.116240501403809, 14.709925651550293, 15.303609848022461, 15.897294998168945, 16.49098014831543, 17.08466339111328, 17.678348541259766, 18.27203369140625, 18.865718841552734, 19.45940399169922, 20.053089141845703, 20.646772384643555, 21.24045753479004, 21.834142684936523, 22.427827835083008, 23.02151107788086, 23.615196228027344]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 5.0, 4.0, 3.0, 3.0, 4.0, 6.0, 8.0, 8.0, 10.0, 22.0, 70.0, 110.0, 109.0, 49.0, 25.0, 14.0, 5.0, 9.0, 3.0, 5.0, 8.0, 1.0, 6.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.935245513916016, -16.340185165405273, -15.745122909545898, -15.150062561035156, -14.555001258850098, -13.959939956665039, -13.364879608154297, -12.769818305969238, -12.17475700378418, -11.579695701599121, -10.984634399414062, -10.38957405090332, -9.794512748718262, -9.199451446533203, -8.604391098022461, -8.009329795837402, -7.414268493652344, -6.819207191467285, -6.224146366119385, -5.629085540771484, -5.034024238586426, -4.438962936401367, -3.843902111053467, -3.2488412857055664, -2.653779983520508, -2.0587189197540283, -1.4636578559875488, -0.8685967922210693, -0.27353572845458984, 0.32152533531188965, 0.9165863990783691, 1.5116472244262695, 2.106710433959961, 2.7017714977264404, 3.29683256149292, 3.8918936252593994, 4.486954689025879, 5.0820159912109375, 5.677076816558838, 6.272137641906738, 6.867198944091797, 7.4622602462768555, 8.057321548461914, 8.652381896972656, 9.247443199157715, 9.842504501342773, 10.437564849853516, 11.032626152038574, 11.627687454223633, 12.222748756408691, 12.81781005859375, 13.412870407104492, 14.00793170928955, 14.60299301147461, 15.198053359985352, 15.79311466217041, 16.38817596435547, 16.98323631286621, 17.578298568725586, 18.173358917236328, 18.768421173095703, 19.363481521606445, 19.958541870117188, 20.553604125976562, 21.148664474487305]}, "_wandb": {"runtime": 4105}} \ No newline at end of file diff --git a/wandb/run-20220302_063647-bmivw6vv/logs/debug-internal.log b/wandb/run-20220302_063647-bmivw6vv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ddcf303e3e5f226f87bb08088c387b220bbf02e7 --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/logs/debug-internal.log @@ -0,0 +1,7188 @@ +2022-03-02 06:36:48,035 INFO MainThread:253545 [internal.py:wandb_internal():89] W&B internal server running at pid: 253545, started at: 2022-03-02 06:36:48.035693 +2022-03-02 06:36:48,038 INFO WriterThread:253545 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb +2022-03-02 06:36:48,038 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 06:36:48,040 DEBUG SenderThread:253545 [sender.py:send():235] send: header +2022-03-02 06:36:48,040 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: check_version +2022-03-02 06:36:48,105 DEBUG SenderThread:253545 [sender.py:send():235] send: run +2022-03-02 06:36:48,200 INFO SenderThread:253545 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files +2022-03-02 06:36:48,201 INFO SenderThread:253545 [sender.py:_start_run_threads():809] run started: bmivw6vv with start time 1646203007 +2022-03-02 06:36:48,201 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:36:48,201 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:36:48,202 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 06:36:48,207 DEBUG HandlerThread:253545 [meta.py:__init__():36] meta init +2022-03-02 06:36:48,207 DEBUG HandlerThread:253545 [meta.py:__init__():50] meta init done +2022-03-02 06:36:48,207 DEBUG HandlerThread:253545 [meta.py:probe():210] probe +2022-03-02 06:36:48,213 DEBUG HandlerThread:253545 [meta.py:_setup_git():200] setup git +2022-03-02 06:36:48,228 DEBUG HandlerThread:253545 [meta.py:_setup_git():207] setup git done +2022-03-02 06:36:48,228 DEBUG HandlerThread:253545 [meta.py:_save_pip():54] save pip +2022-03-02 06:36:48,228 DEBUG HandlerThread:253545 [meta.py:_save_pip():68] save pip done +2022-03-02 06:36:48,228 DEBUG HandlerThread:253545 [meta.py:probe():248] probe done +2022-03-02 06:36:48,305 DEBUG SenderThread:253545 [sender.py:send():235] send: files +2022-03-02 06:36:48,305 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 06:36:48,310 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:36:48,310 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:36:48,347 DEBUG SenderThread:253545 [sender.py:send():235] send: config +2022-03-02 06:36:48,348 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:36:48,348 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:36:48,348 WARNING SenderThread:253545 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 06:36:48,549 INFO Thread-11 :253545 [upload_job.py:push():137] Uploaded file /tmp/tmpfgm9d4hlwandb/u73odpet-wandb-metadata.json +2022-03-02 06:36:49,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:36:49,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt +2022-03-02 06:36:49,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-metadata.json +2022-03-02 06:36:49,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:36:51,202 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:36:53,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:36:57,204 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:01,013 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:37:01,014 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:37:01,014 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:37:01,014 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:01,014 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:01,014 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:37:01,205 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:37:01,206 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:03,206 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:03,756 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:37:03,757 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:37:05,207 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:09,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:11,209 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:13,157 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:13,157 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:13,159 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:37:13,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:37:15,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:16,654 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:37:17,211 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:18,798 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:37:18,798 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:37:19,212 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/config.yaml +2022-03-02 06:37:21,212 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:23,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:24,885 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:24,886 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:24,886 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:37:25,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:37:27,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:28,215 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:30,215 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:32,216 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:33,970 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:37:33,971 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:37:36,217 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:36,446 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:36,446 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:36,447 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:37:37,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:37:37,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:38,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:42,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:44,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:46,221 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:47,001 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:37:47,971 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:47,972 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:47,972 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:37:48,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:37:49,211 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:37:49,211 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:37:50,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:52,223 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:56,224 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:58,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:37:59,562 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:37:59,563 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:37:59,564 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:00,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:00,226 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:02,226 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:04,301 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:38:04,302 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:38:05,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:07,228 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:09,228 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:10,918 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:38:10,919 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:38:10,919 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:11,229 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:12,230 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:13,230 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:15,231 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:17,338 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:38:19,232 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:19,342 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:38:19,342 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:38:21,233 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:22,326 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:38:22,326 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:38:22,327 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:23,234 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:23,234 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:24,234 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:27,235 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:29,236 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:33,237 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:33,610 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:38:33,610 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:38:33,611 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:34,237 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:34,525 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:38:34,526 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:38:35,238 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:36,238 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:37,238 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:42,240 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:44,240 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:44,811 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:38:44,812 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:38:44,813 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:45,241 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:46,241 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:47,689 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:38:49,691 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:38:49,692 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:38:50,243 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:52,243 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:54,244 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:38:55,864 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:38:55,865 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:38:55,865 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:38:56,245 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:38:58,245 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:00,246 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:02,247 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:04,248 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:04,821 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:39:04,822 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:39:06,791 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:39:06,791 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:39:06,792 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:39:07,249 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:39:08,249 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:11,250 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:15,252 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:17,252 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:17,708 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:39:17,708 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:39:17,709 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:39:18,036 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:39:18,253 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:39:19,253 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:19,987 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:39:19,987 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:39:21,254 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:23,255 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:25,255 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:27,256 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:28,603 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:39:28,603 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:39:28,604 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:39:29,257 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:39:29,257 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:31,258 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:33,258 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:35,139 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:39:35,139 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:39:35,259 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:39,260 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:39,421 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:39:39,421 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:39:39,422 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:39:40,260 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:39:41,261 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:43,261 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:46,262 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:48,372 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:39:49,975 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:39:49,975 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:39:49,976 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:39:50,208 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:39:50,208 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:39:50,264 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:39:50,264 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:51,264 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:52,264 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:54,265 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:39:56,266 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:00,267 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:00,605 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:00,605 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:00,605 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:01,267 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:01,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:02,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:04,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:05,457 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:40:05,457 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:40:06,269 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:11,049 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:11,050 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:11,051 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:11,270 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:11,271 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:13,271 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:14,271 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:15,272 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:17,272 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:18,712 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:40:20,508 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:40:20,509 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:40:21,274 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:21,493 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:21,494 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:21,494 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:22,274 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:23,275 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:24,275 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:25,276 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:29,277 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:31,278 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:31,873 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:31,874 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:31,874 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:32,278 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:33,279 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:34,279 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:35,280 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:35,595 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:40:35,596 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:40:39,281 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:41,281 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:42,114 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:42,114 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:42,115 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:42,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:43,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:44,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:46,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:48,284 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:49,067 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:40:50,691 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:40:50,691 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:40:52,213 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:40:52,214 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:40:52,214 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:40:52,285 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:40:52,285 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:54,286 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:56,287 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:40:58,287 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:02,283 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:02,283 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:02,283 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:02,288 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:02,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:04,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:05,866 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:41:05,866 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:41:06,290 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:08,290 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:11,291 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:12,359 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:12,359 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:12,361 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:13,292 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:14,292 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:15,293 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:17,293 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:19,294 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:19,428 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:41:20,952 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:41:20,952 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:41:21,295 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:22,903 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,909 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,914 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,914 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,914 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,914 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,914 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,920 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,920 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,920 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,920 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,921 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,921 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,921 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,921 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,927 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,928 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,929 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,936 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,942 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,943 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,944 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,945 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,946 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,952 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,965 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,966 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,971 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,972 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,977 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,978 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,979 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,980 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,980 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,980 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,985 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,990 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,996 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,997 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,997 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,997 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,997 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:22,997 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,002 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,008 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,009 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,010 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,021 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,022 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,022 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,027 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,032 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,038 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,044 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,045 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,045 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,050 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,051 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,056 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,057 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,057 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,057 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,062 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,067 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,068 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,069 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,070 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,071 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,072 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,073 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,074 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,075 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,076 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,077 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,078 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,079 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,080 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,081 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,082 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,083 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,084 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,086 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,086 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,087 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,088 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,089 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,090 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,091 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,092 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,093 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,094 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,095 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,096 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,097 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,098 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,099 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,100 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,101 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,102 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,103 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,104 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,105 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,106 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,107 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,108 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,109 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,110 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,111 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,112 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,113 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,114 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,115 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,116 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,117 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,118 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,119 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,120 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,121 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,122 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,123 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,124 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,125 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,126 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,127 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,128 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,129 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,130 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,131 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,132 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,133 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,134 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,136 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,137 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,138 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,139 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,140 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,141 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,142 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,143 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,144 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,145 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,146 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,147 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,148 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,149 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,150 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,151 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,152 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,153 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,154 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,155 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,156 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,157 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,158 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,159 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,160 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,161 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,162 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,163 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,164 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,165 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,166 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,167 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,168 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,169 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,170 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,171 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,172 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,173 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,174 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,175 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,176 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,177 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,179 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,180 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,181 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,182 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,183 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,184 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,185 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,187 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,188 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,189 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,190 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,191 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,192 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,193 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,194 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,195 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,196 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,197 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,198 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,199 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,200 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,201 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,202 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,203 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,204 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,205 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,206 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,207 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,208 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,209 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,210 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,211 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,212 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,213 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,214 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,215 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: metric +2022-03-02 06:41:23,216 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:23,304 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:23,401 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:24,400 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:24,400 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:25,401 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:27,401 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:29,402 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:31,403 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:32,763 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:32,816 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:32,904 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:33,403 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:33,404 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:34,406 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:35,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:36,055 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:41:36,056 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:41:37,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:39,408 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:41,409 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:42,552 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:42,607 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:42,701 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:43,410 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:43,410 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:45,410 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:46,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:49,835 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:41:50,412 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:51,226 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:41:51,663 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:41:52,339 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:41:52,392 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:41:52,475 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/config.yaml +2022-03-02 06:41:52,475 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:52,476 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:41:53,475 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:41:54,475 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:55,476 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:56,476 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:41:58,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:00,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:02,024 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:02,076 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:02,158 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:02,478 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:03,478 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:04,479 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:06,479 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:06,712 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:42:06,713 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:42:08,480 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:10,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:11,560 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:11,610 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:11,693 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:12,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:12,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:13,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:16,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:18,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:20,418 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:42:20,484 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:20,992 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:21,043 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:21,131 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:21,484 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:21,914 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:42:21,916 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:42:22,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:23,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:24,486 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:26,486 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:29,487 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:30,399 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:30,453 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:30,540 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:31,538 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:31,538 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:35,540 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:37,011 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:42:37,012 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:42:37,540 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:39,541 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:39,641 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:39,693 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:39,778 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:40,541 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:41,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:43,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:45,543 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:47,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:48,707 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:48,760 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:48,847 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:49,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:49,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:50,904 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:42:51,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:52,124 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:42:52,124 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:42:54,546 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:56,547 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:42:57,610 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:42:57,664 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:42:57,749 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:42:58,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:42:58,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:00,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:02,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:04,550 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:06,244 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:06,296 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:06,380 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:06,550 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:06,551 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:07,178 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:43:07,180 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:43:08,551 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:10,552 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:12,553 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:14,553 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:14,803 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:14,854 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:14,936 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:15,554 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:16,554 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:18,555 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:20,555 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:21,289 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:43:22,260 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:43:22,260 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:43:22,556 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:23,135 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:23,186 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:23,270 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:23,556 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:24,557 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:26,557 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:28,558 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:30,559 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:31,088 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:31,140 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:31,225 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:31,559 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:32,559 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:34,560 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:36,561 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:37,358 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:43:37,359 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:43:38,561 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:38,742 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:38,808 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:38,894 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:39,562 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:40,562 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:42,563 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:44,564 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:46,021 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:46,074 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:46,155 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:46,564 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:46,565 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:48,565 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:50,566 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:51,679 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:43:52,415 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:43:52,415 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:43:52,566 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:52,843 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:52,895 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:52,977 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:53,567 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:43:54,567 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:56,568 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:58,568 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:43:59,134 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:43:59,185 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:43:59,268 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:43:59,568 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:00,569 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:02,569 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:04,570 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:04,859 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:04,936 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:05,020 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:05,570 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:06,571 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:07,468 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:44:07,469 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:44:09,572 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:10,034 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:10,085 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:10,166 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:10,572 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:11,572 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:12,573 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:13,573 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:14,629 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:14,681 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:14,764 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:15,574 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:15,574 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:16,574 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:17,574 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:18,664 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:18,726 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:18,806 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:19,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:19,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:20,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:21,576 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:22,073 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:44:22,191 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:22,243 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:22,326 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:22,576 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:44:22,577 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:44:22,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:22,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:23,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:25,254 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:25,306 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:25,385 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:25,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:25,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:27,581 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:28,392 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:28,560 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:28,645 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:29,644 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:29,644 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:31,644 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:33,645 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:35,646 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:37,848 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:44:37,849 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:44:39,647 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:40,464 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:40,516 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:40,599 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:40,647 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:41,648 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:43,648 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:45,649 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:47,650 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:51,651 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:52,068 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:44:52,120 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:44:52,201 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:44:52,550 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:44:52,651 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:44:53,182 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:44:53,184 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:44:53,652 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:55,652 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:44:59,654 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:01,654 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:03,592 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:45:03,666 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:45:03,748 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:45:04,677 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:45:04,678 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:05,678 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:06,678 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:08,301 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:45:08,303 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:45:08,679 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:10,679 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:14,681 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:15,081 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:45:15,125 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:45:15,227 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:45:15,681 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:45:15,681 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:16,681 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:18,682 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:22,683 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:22,930 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:45:23,351 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:45:23,352 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:45:24,684 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:26,600 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:45:26,652 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:45:26,734 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:45:27,733 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:45:27,733 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:29,734 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:31,735 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:33,735 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:37,737 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:37,919 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:45:37,990 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:45:38,074 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:45:38,426 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:45:38,427 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:45:38,737 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:45:39,737 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:40,738 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:41,738 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:45,739 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:47,740 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:49,119 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:45:49,170 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:45:49,250 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:45:49,741 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:45:49,741 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:50,741 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:53,340 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:45:53,594 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:45:53,595 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:45:53,742 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:45:55,743 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:00,348 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:00,401 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:00,485 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:00,745 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:00,745 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:02,745 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:04,746 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:08,747 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:08,767 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:46:08,769 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:46:10,748 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:11,416 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:11,469 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:11,550 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:11,748 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:12,749 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:14,749 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:18,751 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:21,752 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:22,515 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:22,566 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:22,647 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:22,752 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:23,718 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:46:23,752 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:24,085 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:46:24,086 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:46:24,753 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:25,753 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:27,754 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:29,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:31,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:33,424 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:33,477 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:33,557 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:33,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:34,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:35,757 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:37,757 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:39,253 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:46:39,255 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:46:39,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:43,760 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:44,369 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:44,422 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:44,506 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:44,760 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:44,760 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:45,761 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:49,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:51,763 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:53,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:54,130 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:46:54,586 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:46:54,587 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:46:55,178 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:46:55,229 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:46:55,308 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:46:55,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:46:55,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:57,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:46:59,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:01,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:04,767 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:05,926 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:05,975 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:06,054 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:06,768 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:07,768 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:08,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:09,743 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:47:09,745 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:47:10,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:12,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:16,628 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:16,680 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:16,761 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:16,771 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:16,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:17,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:18,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:20,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:22,774 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:24,491 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:47:24,964 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:47:24,965 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:47:26,775 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:27,092 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:27,142 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:27,264 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:27,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:27,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:28,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:30,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:32,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:36,779 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:37,654 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:37,706 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:37,790 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:38,789 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:38,789 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:40,130 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:47:40,131 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:47:40,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:41,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:45,791 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:47,792 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:48,078 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:48,129 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:48,211 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:48,792 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:49,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:50,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:51,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:54,919 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:47:55,173 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:47:55,174 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:47:55,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:57,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:47:58,590 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:47:58,641 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:47:58,724 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:47:58,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:47:59,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:00,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:01,797 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:05,798 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:07,799 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:08,931 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:48:08,982 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:48:09,061 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:48:09,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:48:09,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:10,246 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:48:10,248 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:48:10,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:12,801 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:16,802 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:18,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:19,132 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:48:19,182 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:48:19,261 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:48:19,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:48:20,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:22,804 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:25,321 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:48:25,322 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:48:25,376 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:48:26,805 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:28,806 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:29,342 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:48:29,392 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:48:29,477 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:48:29,806 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:48:30,807 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:32,808 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:36,809 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:38,810 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:39,500 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:48:39,553 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:48:39,635 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:48:39,810 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:48:40,572 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:48:40,574 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:48:40,811 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:41,811 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:43,812 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:45,812 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:49,659 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:48:49,714 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:48:49,799 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:48:49,814 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:48:49,814 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:50,814 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:51,815 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:53,815 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:55,663 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:48:55,665 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:48:55,816 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:48:55,903 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:48:57,817 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:00,157 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:00,353 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:00,433 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:00,818 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:01,818 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:03,819 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:05,820 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:09,821 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:09,985 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:10,040 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:10,130 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:10,821 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:10,945 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:49:10,946 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:49:11,822 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:12,822 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:14,823 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:16,824 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:18,824 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:19,736 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:19,784 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:19,865 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:20,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:20,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:21,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:22,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:24,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:26,035 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:49:26,037 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:49:26,364 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:49:26,866 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:28,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:29,379 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:29,434 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:29,521 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:29,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:30,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:31,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:32,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:34,869 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:38,858 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:38,895 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:38,915 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:38,999 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:39,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:40,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:41,086 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:49:41,088 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:49:41,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:42,887 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:44,887 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:46,888 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:48,367 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:48,420 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:48,508 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:48,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:48,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:49,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:50,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:53,890 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:55,891 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:56,232 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:49:56,233 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:49:56,756 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:49:57,892 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:49:58,115 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:49:58,166 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:49:58,252 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:49:58,892 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:49:59,892 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:01,893 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:03,894 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:05,894 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:07,220 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:07,272 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:07,354 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:07,895 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:07,895 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:09,896 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:11,307 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:50:11,309 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:50:11,896 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:13,897 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:15,898 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:16,414 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:16,467 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:16,551 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:16,898 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:17,898 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:18,899 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:19,899 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:21,900 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:23,900 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:25,438 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:25,491 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:25,578 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:25,901 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:26,378 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:50:26,380 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:50:26,901 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:27,125 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:50:28,902 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:32,904 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:34,262 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:34,314 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:34,399 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:34,904 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:34,905 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:36,905 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:38,906 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:40,906 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:41,547 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:50:41,548 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:50:42,890 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:42,942 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:43,057 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:43,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:43,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:44,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:45,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:47,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:49,945 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:51,462 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:51,536 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:51,620 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:51,945 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:51,946 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:52,946 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:53,946 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:55,947 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:56,643 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:50:56,645 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:50:57,507 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:50:57,947 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:50:59,727 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:50:59,787 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:50:59,878 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:50:59,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:50:59,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:00,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:01,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:03,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:05,950 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:07,646 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:07,700 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:07,789 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:07,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:07,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:08,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:09,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:11,725 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:51:11,727 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:51:11,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:15,227 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:15,279 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:15,362 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:15,953 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:15,954 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:16,954 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:17,954 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:19,955 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:21,955 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:22,300 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:22,353 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:22,437 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:22,956 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:22,956 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:23,956 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:25,957 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:27,000 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:51:27,001 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:51:28,051 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:51:28,898 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:28,955 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:29,041 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:29,043 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:30,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:30,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:31,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:32,043 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:35,038 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:35,079 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:35,092 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:35,199 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:36,070 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:37,070 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:39,071 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:40,702 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:40,756 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:40,837 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:41,071 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:41,072 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:42,091 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:51:42,093 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:51:43,072 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:45,073 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:45,798 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:45,842 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:45,926 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:46,073 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:47,074 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:49,074 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:50,447 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:50,500 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:50,606 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:51,075 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:51,075 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:53,075 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:54,584 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:54,636 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:54,719 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:55,076 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:55,076 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:57,077 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:51:57,204 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:51:57,205 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:51:58,255 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:51:58,315 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:51:58,401 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:51:58,532 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:51:59,077 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:51:59,078 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:01,078 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:01,421 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:01,475 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:01,562 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:02,078 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:03,079 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:04,615 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:04,785 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:04,865 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:05,080 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:05,080 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:06,080 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:07,080 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:09,081 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:12,291 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:52:12,292 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:52:13,082 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:15,083 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:16,509 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:16,551 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:16,631 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:17,083 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:18,084 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:19,084 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:21,085 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:23,086 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:27,087 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:27,348 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:52:27,349 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:52:28,156 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:28,208 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:28,304 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:29,088 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:29,088 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:29,227 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:52:30,088 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:33,089 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:35,090 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:39,091 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:39,760 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:39,812 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:39,896 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:40,091 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:41,092 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:42,092 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:42,408 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:52:42,410 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:52:44,093 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:48,094 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:50,095 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:51,238 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:52:51,294 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:52:51,381 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:52:52,095 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:52:52,096 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:54,096 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:56,097 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:57,543 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:52:57,544 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:52:58,097 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:52:59,604 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:53:02,099 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:02,649 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:02,704 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:02,788 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:03,099 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:53:04,099 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:06,100 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:10,101 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:12,102 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:12,807 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:53:12,809 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:53:14,006 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:14,058 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:14,180 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:15,179 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:53:15,179 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:19,180 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:21,181 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:23,182 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:25,225 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:25,277 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:25,360 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:26,183 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:53:27,183 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:27,850 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:53:27,852 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:53:29,184 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:29,983 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:53:33,185 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:35,186 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:36,397 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:36,448 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:36,532 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:37,186 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:53:37,187 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:38,187 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:40,187 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:42,915 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:53:42,916 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:53:44,189 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:46,189 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:47,359 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:47,413 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:47,494 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:48,190 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:53:48,190 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:49,190 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:52,191 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:54,192 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:58,013 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:53:58,015 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:53:58,193 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:53:58,485 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:53:58,539 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:53:58,623 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:53:59,194 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:00,194 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:00,363 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:54:01,194 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:02,195 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:06,196 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:08,197 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:09,475 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:54:09,527 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:54:09,612 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:54:10,197 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:11,198 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:13,071 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:54:13,073 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:54:13,198 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:17,200 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:19,200 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:20,384 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:54:20,439 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:54:20,526 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:54:21,201 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:21,201 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:25,202 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:27,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:28,240 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:54:28,241 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:54:29,204 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:30,788 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:54:31,147 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:54:31,200 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:54:31,283 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:54:32,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:33,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:34,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:35,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:37,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:41,285 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:41,876 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:54:41,928 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:54:42,008 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:54:42,285 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:43,285 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:43,286 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:54:43,287 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:54:44,286 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:46,287 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:50,288 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:52,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:52,656 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:54:52,710 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:54:52,792 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:54:53,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:54:54,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:58,291 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:54:58,470 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:54:58,471 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:55:00,291 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:01,157 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:55:02,292 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:03,351 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:03,397 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:03,481 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:04,293 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:04,293 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:06,294 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:08,294 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:10,295 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:12,296 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:13,628 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:55:13,630 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:55:14,023 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:14,077 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:14,158 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:14,296 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:16,297 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:18,298 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:20,299 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:22,299 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:24,558 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:24,609 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:24,692 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:25,300 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:26,301 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:28,301 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:28,673 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:55:28,675 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:55:31,303 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:31,509 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:55:34,963 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:35,014 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:35,098 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:35,304 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:35,304 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:37,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:39,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:41,306 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:43,789 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:55:43,791 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:55:45,302 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:45,354 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:45,354 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:45,438 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:46,349 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:47,349 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:49,350 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:51,351 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:55,352 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:55,615 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:55:55,668 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:55:55,756 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:55:56,352 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:55:57,353 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:55:59,025 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:55:59,027 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:55:59,353 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:01,354 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:01,868 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:56:05,355 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:05,773 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:05,826 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:05,909 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:06,356 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:07,356 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:10,357 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:12,358 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:14,279 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:56:14,281 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:56:14,358 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:15,765 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:15,841 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:15,920 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:16,359 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:17,359 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:18,360 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:20,361 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:22,361 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:24,362 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:25,810 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:25,862 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:25,942 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:26,363 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:27,363 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:28,363 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:29,405 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:56:29,406 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:56:30,364 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:32,242 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:56:32,365 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:34,366 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:36,347 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:36,529 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:36,615 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:37,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:37,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:38,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:40,412 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:42,412 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:44,413 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:44,600 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:56:44,601 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:56:46,130 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:46,182 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:46,266 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:46,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:46,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:47,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:51,415 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:53,416 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:55,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:55,824 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:56:55,877 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:56:55,964 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:56:56,418 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:56:57,418 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:58,418 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:59,419 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:56:59,692 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:56:59,694 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:57:02,661 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:57:03,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:05,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:05,534 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:05,588 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:05,676 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:06,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:07,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:08,422 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:09,422 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:11,423 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:13,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:14,737 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:57:14,739 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:57:15,047 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:15,104 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:15,190 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:15,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:15,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:16,425 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:17,425 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:19,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:21,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:23,427 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:24,480 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:24,535 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:24,626 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:25,428 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:25,428 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:26,428 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:27,428 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:29,925 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:57:29,927 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:57:32,430 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:33,046 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:57:33,817 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:33,891 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:33,978 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:34,431 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:34,431 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:36,431 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:38,432 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:40,432 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:42,433 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:43,189 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:43,242 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:43,327 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:43,434 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:44,434 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:45,073 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:57:45,074 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:57:46,435 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:48,435 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:52,345 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:57:52,399 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:57:52,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:52,483 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:57:53,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:57:54,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:56,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:57:58,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:00,120 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:58:00,121 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:58:00,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:01,259 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:01,337 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:01,424 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:01,484 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:02,484 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:03,481 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:58:04,490 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:06,491 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:09,492 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:10,112 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:10,165 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:10,253 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:10,492 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:11,493 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:13,493 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:15,226 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:58:15,228 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:58:15,494 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:17,495 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:18,745 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:18,800 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:18,890 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:19,496 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:19,496 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:21,497 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:24,498 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:26,498 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:27,092 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:27,144 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:27,229 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:27,499 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:28,499 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:29,499 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:30,302 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:58:30,304 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:58:30,500 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:32,500 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:33,929 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:58:34,501 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:35,258 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:35,313 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:35,401 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:35,501 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:36,502 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:38,503 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:40,503 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:42,504 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:43,125 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:43,180 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:43,264 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:43,504 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:44,505 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:45,423 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:58:45,424 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:58:46,505 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:48,506 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:50,507 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:50,622 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:50,677 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:50,764 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:51,507 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:52,508 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:54,508 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:56,509 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:58:57,735 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:58:57,786 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:58:57,875 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:58:58,510 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:58:58,510 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:00,510 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:00,536 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:59:00,537 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:59:02,511 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:04,373 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:59:04,447 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:04,504 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:04,591 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:04,594 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:05,539 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:06,540 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:08,540 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:10,541 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:10,604 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:10,658 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:10,746 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:11,541 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:12,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:14,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:15,584 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:59:15,586 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:59:16,228 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:16,282 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:16,368 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:16,543 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:16,543 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:18,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:19,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:21,339 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:21,393 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:21,481 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:21,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:21,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:22,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:23,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:25,546 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:25,910 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:25,964 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:26,053 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:26,547 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:27,547 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:28,547 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:29,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:29,860 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:29,915 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:30,003 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:30,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:30,646 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:59:30,647 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:59:31,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:32,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:33,312 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:33,367 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:33,493 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:33,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:33,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:34,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:34,928 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 06:59:35,550 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:36,301 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:36,356 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:36,443 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:36,550 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:37,551 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:38,551 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:39,566 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:39,623 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:39,743 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:39,825 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:40,613 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:40,613 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:41,613 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:45,615 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:45,844 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 06:59:45,846 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 06:59:47,615 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:49,616 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:51,589 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 06:59:51,643 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 06:59:51,729 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 06:59:52,644 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 06:59:53,644 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:55,645 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 06:59:59,646 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:01,354 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:00:01,355 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:00:01,647 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:03,292 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:00:03,345 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:00:03,427 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:00:03,648 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:00:05,467 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:00:05,648 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:07,649 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:10,650 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:14,651 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:14,805 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:00:14,856 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:00:14,961 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:00:15,651 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:00:15,652 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:16,571 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:00:16,573 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:00:16,652 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:18,652 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:22,654 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:24,654 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:26,258 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:00:26,308 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:00:26,392 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:00:26,655 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:00:27,655 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:28,656 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:30,656 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:31,622 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:00:31,624 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:00:32,657 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:35,827 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:00:37,659 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:37,706 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:00:37,758 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:00:37,843 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:00:38,659 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:00:39,659 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:41,660 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:45,661 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:46,687 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:00:46,689 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:00:47,662 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:49,095 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:00:49,149 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:00:49,233 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:00:49,663 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:00:50,663 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:51,663 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:53,664 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:55,665 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:00:59,666 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:00,174 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:00,225 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:00,312 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:00,666 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:01,667 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:01,858 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:01:01,860 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:01:02,667 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:05,668 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:06,194 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:01:07,669 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:09,670 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:11,292 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:11,348 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:11,433 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:11,670 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:12,671 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:13,671 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:15,671 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:17,167 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:01:17,169 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:01:17,672 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:21,673 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:22,372 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:22,452 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:22,538 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:22,674 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:23,674 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:24,674 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:26,675 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:30,676 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:32,329 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:01:32,330 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:01:32,677 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:33,413 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:33,467 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:33,550 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:33,677 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:34,677 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:36,716 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:01:38,679 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:40,680 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:42,680 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:44,459 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:44,512 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:44,597 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:44,681 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:46,682 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:47,413 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:01:47,414 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:01:47,682 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:48,683 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:50,683 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:54,685 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:55,246 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:01:55,298 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:01:55,386 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:01:55,685 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:01:56,685 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:57,686 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:01:58,686 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:02,649 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:02:02,650 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:02:03,688 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:05,688 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:05,945 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:05,999 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:06,084 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:06,689 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:02:07,137 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:02:07,689 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:09,690 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:13,691 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:15,692 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:16,618 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:16,670 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:16,755 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:17,754 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:02:17,754 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:17,827 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:02:17,828 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:02:21,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:23,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:25,757 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:27,256 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:27,311 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:27,399 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:27,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:02:27,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:29,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:31,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:32,927 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:02:32,929 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:02:34,760 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:36,761 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:37,543 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:02:37,941 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:37,993 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:38,077 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:38,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:02:39,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:40,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:42,763 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:44,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:48,313 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:02:48,315 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:02:48,496 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:48,551 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:48,636 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:48,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:02:48,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:49,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:50,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:52,767 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:54,767 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:58,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:02:58,975 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:02:59,029 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:02:59,113 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:02:59,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:00,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:02,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:03,397 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:03:03,398 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:03:04,771 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:07,994 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:03:08,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:09,416 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:03:09,470 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:03:09,559 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:03:09,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:10,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:12,774 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:15,775 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:18,477 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:03:18,479 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:03:19,714 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:03:19,768 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:03:19,848 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:19,851 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:03:20,848 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:20,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:21,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:23,850 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:27,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:29,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:29,911 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:03:29,966 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:03:30,050 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:03:30,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:30,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:31,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:33,555 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:03:33,556 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:03:33,853 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:35,854 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:38,381 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:03:39,855 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:40,095 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:03:40,148 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:03:40,232 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:03:40,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:41,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:43,857 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:45,857 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:48,670 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:03:48,671 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:03:49,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:50,060 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:03:50,114 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:03:50,201 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:03:50,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:03:51,860 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:53,860 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:55,861 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:03:58,862 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:00,052 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:00,121 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:00,205 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:00,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:01,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:02,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:03,759 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:04:03,760 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:04:04,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:06,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:08,827 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:04:08,866 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:10,559 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:10,733 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:10,820 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:10,866 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:11,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:12,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:14,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:16,869 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:18,848 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:04:18,848 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:04:18,869 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:20,301 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:20,356 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:20,441 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:20,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:21,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:22,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:24,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:26,872 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:28,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:29,991 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:30,043 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:30,125 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:30,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:30,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:31,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:32,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:33,955 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:04:33,956 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:04:34,875 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:36,876 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:38,876 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:39,272 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:04:39,617 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:39,668 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:39,754 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:39,877 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:39,877 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:40,877 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:42,878 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:45,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:47,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:49,032 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:04:49,034 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:04:49,211 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:49,263 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:49,346 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:49,880 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:50,881 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:51,881 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:53,882 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:55,882 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:57,883 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:04:58,568 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:04:58,620 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:04:58,699 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:04:58,883 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:04:59,884 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:00,884 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:01,884 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:03,885 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:04,102 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:05:04,103 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:05:05,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:07,869 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:07,922 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:08,025 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:08,923 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:08,923 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:09,661 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:05:10,924 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:12,924 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:14,925 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:16,926 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:17,092 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:17,145 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:17,231 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:17,926 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:18,926 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:19,201 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:05:19,203 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:05:20,927 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:22,928 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:24,929 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:26,012 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:26,063 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:26,150 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:26,929 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:26,929 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:28,930 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:30,930 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:32,931 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:34,272 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:05:34,274 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:05:34,787 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:34,842 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:34,926 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:34,932 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:36,933 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:38,933 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:40,022 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:05:40,934 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:42,935 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:43,397 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:43,447 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:43,533 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:43,935 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:44,935 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:46,936 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:48,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:49,323 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:05:49,324 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:05:50,938 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:51,949 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:05:52,002 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:05:52,085 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:05:52,938 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:05:52,939 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:54,939 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:57,940 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:05:59,941 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:00,217 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:00,271 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:00,357 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:00,941 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:00,941 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:01,942 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:03,942 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:04,488 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:06:04,489 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:06:05,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:07,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:08,270 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:08,323 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:08,441 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:08,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:09,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:10,532 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:06:11,945 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:13,946 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:15,947 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:16,054 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:16,108 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:16,195 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:16,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:17,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:19,664 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:06:19,665 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:06:19,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:21,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:23,443 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:23,497 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:23,584 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:23,950 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:23,950 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:25,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:27,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:29,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:30,336 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:30,387 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:30,473 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:30,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:31,953 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:33,953 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:34,711 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:06:34,712 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:06:35,954 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:36,876 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:36,932 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:37,018 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:38,017 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:38,017 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:40,018 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:40,920 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:06:42,018 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:43,015 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:43,072 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:43,161 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:44,052 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:44,052 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:46,053 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:48,053 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:48,614 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:48,666 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:48,750 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:49,054 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:49,759 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:06:49,761 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:06:50,055 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:52,055 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:53,723 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:53,774 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:53,864 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:54,056 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:06:54,056 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:56,057 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:58,057 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:06:58,270 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:06:58,324 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:06:58,407 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:06:59,058 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:00,058 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:02,059 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:02,242 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:02,295 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:02,379 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:03,059 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:04,059 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:04,935 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:07:04,936 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:07:05,779 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:05,832 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:05,917 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:06,060 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:06,060 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:08,060 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:08,795 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:08,846 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:08,931 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:09,061 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:10,061 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:11,410 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:07:11,912 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:12,078 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:12,157 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:12,160 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:13,105 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:14,105 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:17,106 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:20,040 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:07:20,042 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:07:21,108 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:23,109 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:24,008 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:24,061 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:24,148 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:25,146 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:25,147 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:26,147 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:27,147 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:29,148 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:31,149 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:35,084 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:07:35,085 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:07:35,150 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:35,782 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:35,837 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:35,918 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:36,150 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:37,151 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:38,151 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:41,152 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:41,829 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:07:43,153 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:45,153 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:47,367 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:47,423 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:47,511 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:48,154 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:07:48,155 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:49,155 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:50,147 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:07:50,148 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:07:51,155 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:55,157 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:58,158 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:07:58,750 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:07:58,829 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:07:58,914 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:07:59,158 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:00,159 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:01,159 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:04,160 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:05,429 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:08:05,430 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:08:06,161 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:10,160 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:08:10,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:10,214 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:08:10,300 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:08:11,198 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:12,198 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:12,357 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:08:13,199 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:14,199 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:18,200 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:20,201 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:20,665 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:08:20,667 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:08:21,458 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:08:21,512 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:08:21,600 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:08:22,202 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:22,202 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:23,202 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:24,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:26,203 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:28,204 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:32,206 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:32,681 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:08:32,735 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:08:32,823 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:08:33,206 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:34,206 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:35,207 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:35,804 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:08:35,805 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:08:36,207 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:41,209 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:42,758 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:08:43,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:43,837 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:08:43,895 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:08:43,988 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:08:44,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:45,211 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:49,212 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:50,971 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:08:50,972 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:08:51,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:53,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:54,874 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:08:54,947 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:08:55,033 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:08:55,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:08:55,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:57,215 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:08:59,215 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:01,216 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:05,217 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:05,855 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:05,909 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:05,992 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:09:06,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:09:06,252 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:09:06,254 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:09:07,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:08,218 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:09,219 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:13,298 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:09:14,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:16,221 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:16,742 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:16,794 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:16,878 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:09:17,221 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:09:18,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:21,334 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:09:21,335 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:09:22,223 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:24,224 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:26,224 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:27,505 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:27,560 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:27,645 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:09:28,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:09:28,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:30,226 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:32,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:34,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:36,228 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:36,481 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:09:36,483 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:09:38,507 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:38,562 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:38,650 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:09:39,229 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:09:40,229 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:41,230 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:43,231 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:43,845 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:09:45,231 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:49,167 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:49,219 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:49,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:49,306 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:09:50,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:09:50,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:51,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:51,728 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:09:51,729 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:09:53,306 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:57,307 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:59,308 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:09:59,781 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:09:59,835 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:09:59,920 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:00,308 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:01,309 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:02,309 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:03,309 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:06,785 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:10:06,786 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:10:07,311 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:09,311 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:10,320 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:10:10,374 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:10:10,460 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:11,312 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:11,312 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:12,312 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:14,220 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:10:15,313 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:17,314 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:19,315 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:20,918 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:10:20,961 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:10:21,052 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:21,315 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:21,992 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:10:21,994 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:10:22,316 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:26,317 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:28,318 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:30,318 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:31,315 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:10:31,369 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:10:31,454 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:32,369 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:32,369 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:33,369 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:34,370 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:36,371 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:37,347 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:10:37,348 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:10:38,371 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:40,372 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:41,646 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:10:41,699 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:10:41,785 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:42,373 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:42,373 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:43,373 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:44,639 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:10:46,374 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:48,375 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:50,375 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:51,952 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:10:52,007 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:10:52,094 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:10:52,376 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:10:52,376 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:52,544 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:10:52,545 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:10:53,376 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:57,378 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:10:59,378 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:01,379 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:02,098 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:02,149 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:02,233 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:02,379 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:03,380 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:07,381 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:07,660 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:11:07,661 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:11:09,382 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:11,383 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:12,136 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:12,188 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:12,275 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:12,383 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:13,384 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:15,031 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:11:17,385 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:19,386 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:21,386 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:22,239 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:22,292 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:22,375 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:22,387 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:22,824 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:11:22,826 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:11:23,387 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:27,388 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:29,389 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:31,389 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:32,216 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:32,264 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:32,347 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:32,390 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:33,390 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:34,390 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:35,391 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:37,391 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:38,301 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:11:38,302 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:11:42,393 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:42,783 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:42,970 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:43,061 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:43,393 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:44,394 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:45,516 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:11:46,395 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:50,396 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:52,397 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:52,622 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:11:52,676 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:11:52,762 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:11:53,397 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:11:53,498 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:11:53,500 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:11:54,397 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:56,398 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:11:58,399 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:02,400 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:02,482 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:02,536 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:02,622 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:03,401 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:04,401 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:06,402 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:08,402 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:08,656 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:12:08,658 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:12:10,403 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:12,220 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:12,274 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:12,402 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:12,404 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:13,404 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:14,404 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:15,405 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:15,892 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:12:17,405 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:19,406 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:21,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:21,818 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:21,871 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:21,955 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:22,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:22,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:23,407 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:23,711 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:12:23,712 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:12:25,408 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:27,409 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:31,410 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:31,425 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:31,480 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:31,565 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:32,410 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:32,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:33,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:35,411 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:37,412 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:38,813 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:12:38,814 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:12:39,413 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:40,862 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:40,917 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:41,003 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:41,413 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:41,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:43,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:44,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:46,288 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:12:48,416 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:50,205 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:50,258 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:50,346 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:12:50,416 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:12:50,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:51,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:52,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:54,025 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:12:54,026 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:12:54,418 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:56,418 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:58,419 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:12:59,610 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:12:59,662 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:12:59,748 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:00,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:00,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:01,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:04,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:06,422 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:08,423 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:08,599 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:08,655 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:08,743 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:09,184 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:13:09,186 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:13:09,423 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:10,423 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:11,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:12,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:14,425 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:16,425 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:16,670 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:13:17,442 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:17,495 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:17,580 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:18,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:18,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:19,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:20,427 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:22,427 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:24,440 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:13:24,441 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:13:25,428 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:26,187 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:26,242 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:26,326 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:26,429 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:27,429 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:29,430 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:31,430 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:33,431 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:34,673 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:34,725 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:34,812 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:35,432 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:35,432 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:37,433 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:39,433 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:39,731 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:13:39,732 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:13:41,434 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:42,856 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:42,910 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:42,995 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:43,435 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:43,435 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:44,435 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:46,436 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:47,084 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:13:48,436 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:50,437 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:50,802 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:50,856 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:50,944 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:51,437 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:13:52,438 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:54,438 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:54,882 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:13:54,884 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:13:56,439 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:58,346 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:13:58,400 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:13:58,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:13:58,484 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:13:59,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:00,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:02,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:04,484 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:05,624 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:05,679 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:05,767 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:06,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:06,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:08,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:10,047 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:14:10,048 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:14:10,486 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:12,414 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:12,470 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:12,555 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:12,557 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:13,554 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:14,555 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:16,555 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:17,533 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:14:18,556 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:18,730 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:18,783 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:18,868 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:19,556 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:20,557 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:22,557 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:24,558 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:24,622 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:24,696 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:24,779 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:25,118 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:14:25,120 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:14:25,558 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:26,559 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:28,560 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:30,019 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:30,073 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:30,157 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:30,560 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:30,561 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:32,561 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:34,562 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:34,850 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:34,904 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:34,990 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:35,562 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:36,562 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:38,563 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:39,145 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:39,198 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:39,282 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:39,563 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:40,250 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:14:40,252 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:14:40,564 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:42,564 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:42,765 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:42,821 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:42,903 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:43,565 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:44,565 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:45,914 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:45,968 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:46,053 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:46,566 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:46,566 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:48,109 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:14:48,567 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:49,090 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:14:49,263 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:14:49,341 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:14:49,567 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:14:50,567 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:52,568 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:55,510 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:14:55,512 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:14:57,570 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:14:59,570 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:01,063 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:01,112 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:01,195 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:01,571 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:02,571 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:03,571 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:05,572 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:07,573 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:10,798 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:15:10,800 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:15:11,574 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:12,635 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:12,685 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:12,767 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:13,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:13,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:14,575 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:17,576 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:18,551 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:15:19,576 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:22,578 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:24,111 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:24,162 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:24,247 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:24,578 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:25,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:25,901 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:15:25,902 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:15:26,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:28,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:30,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:34,582 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:35,430 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:35,481 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:35,566 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:35,582 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:36,582 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:37,583 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:40,584 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:40,957 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:15:40,958 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:15:42,584 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:44,585 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:46,669 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:46,722 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:46,802 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:47,586 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:48,967 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:15:49,587 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:51,587 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:55,589 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:56,095 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:15:56,097 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:15:57,590 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:15:57,839 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:15:57,910 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:15:57,993 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:15:58,590 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:15:59,590 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:01,591 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:05,592 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:07,593 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:08,918 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:16:08,970 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:16:09,053 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:16:09,594 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:16:09,594 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:11,199 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:16:11,201 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:16:11,594 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:13,595 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:15,596 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:19,396 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:16:19,597 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:20,004 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:16:20,060 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:16:20,141 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:16:20,597 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:16:21,598 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:23,598 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:26,551 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:16:26,552 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:16:26,599 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:30,600 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:30,972 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:16:31,024 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:16:31,102 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:16:31,601 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:16:31,601 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:32,601 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:36,603 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:38,603 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:40,604 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:41,669 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:16:41,671 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:16:41,961 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:16:42,012 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:16:42,093 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:16:42,605 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:16:44,605 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:46,606 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:48,607 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:49,838 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:16:50,608 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:52,737 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:16:52,789 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:16:52,874 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:16:53,609 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:16:54,609 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:56,610 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:16:56,742 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:16:56,744 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:17:00,611 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:02,612 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:03,479 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:03,530 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:03,610 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:03,612 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:04,612 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:07,613 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:11,614 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:12,000 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:17:12,002 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:17:13,615 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:14,151 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:14,203 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:14,288 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:14,616 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:15,616 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:16,616 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:19,617 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:20,368 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:17:21,618 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:23,619 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:24,932 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:24,985 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:25,069 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:25,619 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:26,620 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:27,308 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:17:27,310 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:17:27,620 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:29,620 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:31,621 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:33,622 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:35,556 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:35,608 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:35,691 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:36,690 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:36,690 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:37,690 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:39,691 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:41,691 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:42,375 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:17:42,377 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:17:46,125 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:46,176 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:46,258 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:46,693 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:46,693 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:47,693 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:48,694 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:50,695 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:50,837 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:17:52,695 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:56,634 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:17:56,674 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:17:56,754 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:56,756 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:17:57,552 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:17:57,554 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:17:57,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:17:58,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:17:59,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:00,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:04,757 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:06,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:07,122 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:07,168 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:07,248 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:07,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:08,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:09,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:10,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:12,775 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:18:12,776 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:18:14,761 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:16,761 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:17,442 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:17,493 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:17,616 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:17,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:18,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:19,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:20,763 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:21,231 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:18:24,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:26,765 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:27,779 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:27,832 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:27,917 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:27,999 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:18:28,001 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:18:28,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:28,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:29,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:31,767 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:35,768 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:37,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:37,955 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:38,007 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:38,087 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:38,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:39,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:41,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:43,207 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:18:43,209 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:18:43,771 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:47,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:48,030 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:48,082 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:48,160 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:48,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:49,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:51,630 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:18:51,774 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:53,775 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:58,084 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:18:58,136 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:18:58,220 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:18:58,466 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:18:58,468 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:18:58,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:18:58,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:18:59,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:00,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:02,778 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:04,778 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:06,779 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:07,995 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:08,046 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:08,127 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:08,780 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:08,780 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:10,780 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:12,781 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:13,825 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:19:13,826 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:19:14,782 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:16,782 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:18,510 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:18,682 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:18,767 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:18,783 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:20,784 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:22,026 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:19:22,784 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:24,785 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:26,786 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:28,201 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:28,253 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:28,336 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:28,786 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:28,787 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:28,988 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:19:28,990 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:19:30,787 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:33,788 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:35,789 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:37,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:37,963 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:38,018 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:38,105 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:38,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:39,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:40,791 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:43,792 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:44,130 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:19:44,132 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:19:45,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:47,549 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:47,602 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:47,688 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:47,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:47,794 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:48,794 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:49,794 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:51,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:52,465 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:19:53,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:55,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:57,053 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:19:57,105 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:19:57,192 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:19:57,797 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:19:57,797 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:58,797 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:19:59,184 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:19:59,185 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:20:01,798 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:03,799 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:05,799 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:06,593 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:06,645 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:06,734 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:06,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:07,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:08,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:09,801 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:12,802 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:14,308 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:20:14,310 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:20:14,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:16,051 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:16,103 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:16,187 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:16,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:16,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:18,804 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:20,805 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:22,806 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:22,986 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:20:24,806 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:25,349 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:25,401 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:25,487 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:25,807 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:26,807 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:28,808 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:29,512 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:20:29,513 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:20:30,808 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:32,809 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:34,462 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:34,517 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:34,600 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:34,810 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:37,811 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:39,812 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:41,812 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:43,330 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:43,384 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:43,469 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:43,813 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:43,813 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:44,563 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:20:44,565 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:20:45,814 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:47,814 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:49,815 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:51,816 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:52,129 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:20:52,183 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:20:52,267 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:20:52,816 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:20:53,407 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:20:53,816 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:55,817 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:57,818 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:20:59,627 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:20:59,628 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:20:59,818 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:00,777 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:00,834 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:00,920 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:01,845 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:01,845 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:03,846 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:05,846 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:08,847 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:09,269 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:09,324 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:09,412 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:09,848 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:10,848 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:11,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:12,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:14,693 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:21:14,694 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:21:14,850 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:16,850 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:17,487 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:17,541 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:17,626 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:17,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:18,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:19,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:20,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:22,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:23,821 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:21:24,853 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:25,264 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:25,318 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:25,405 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:25,853 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:26,854 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:28,854 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:29,781 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:21:29,782 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:21:30,855 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:32,587 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:32,641 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:32,728 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:32,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:32,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:34,857 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:36,858 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:38,858 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:39,528 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:39,580 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:39,672 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:39,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:40,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:42,860 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:44,861 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:44,945 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:21:44,946 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:21:46,117 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:46,170 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:46,279 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:46,861 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:46,862 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:48,862 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:50,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:52,105 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:52,157 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:52,243 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:52,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:52,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:54,302 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:21:54,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:56,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:21:57,437 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:21:57,489 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:21:57,572 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:21:57,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:21:58,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:00,064 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:22:00,065 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:22:00,866 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:02,181 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:02,234 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:02,321 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:02,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:02,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:04,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:06,405 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:06,459 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:06,544 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:06,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:06,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:08,869 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:10,185 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:10,237 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:10,319 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:10,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:10,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:12,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:13,554 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:13,611 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:13,694 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:13,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:14,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:15,222 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:22:15,224 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:22:15,872 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:16,419 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:16,470 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:16,555 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:16,872 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:17,872 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:18,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:19,602 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:19,761 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:19,840 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:19,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:19,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:20,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:21,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:24,920 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:22:25,875 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:27,876 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:29,877 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:30,286 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:22:30,287 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:22:31,458 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:31,510 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:31,591 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:31,877 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:33,878 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:35,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:39,880 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:41,881 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:42,878 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:42,932 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:43,012 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:43,928 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:43,928 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:45,332 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:22:45,333 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:22:45,929 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:47,929 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:49,930 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:51,931 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:54,346 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:22:54,396 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:22:54,474 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:22:54,932 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:22:55,307 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:22:55,932 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:22:58,933 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:00,466 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:23:00,468 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:23:02,935 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:04,935 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:05,675 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:23:05,728 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:23:05,809 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:23:05,936 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:23:06,936 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:07,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:10,938 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:12,938 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:14,939 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:15,628 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:23:15,629 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:23:16,972 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:23:17,023 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:23:17,104 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:23:17,940 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:23:17,940 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:18,940 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:20,941 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:22,942 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:25,660 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:23:26,943 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:28,242 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:23:28,292 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:23:28,371 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:23:28,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:23:28,944 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:30,717 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:23:30,718 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:23:30,945 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:33,946 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:35,947 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:39,464 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:23:39,506 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:23:39,588 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:23:39,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:23:39,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:40,948 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:41,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:43,949 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:45,852 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:23:45,854 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:23:45,950 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:49,951 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:50,521 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:23:50,576 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:23:50,656 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:23:50,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:23:51,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:52,952 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:55,953 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:56,034 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:23:57,954 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:23:59,955 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:01,092 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:24:01,094 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:24:01,542 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:01,597 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:01,715 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:01,955 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:01,956 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:02,956 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:06,957 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:08,958 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:10,959 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:12,535 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:12,604 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:12,682 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:12,960 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:14,960 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:16,364 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:24:16,365 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:24:16,961 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:18,962 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:22,963 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:23,446 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:23,499 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:23,577 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:23,963 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:24,964 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:26,454 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:24:26,964 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:30,966 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:31,513 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:24:31,514 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:24:32,967 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:34,232 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:34,282 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:34,360 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:34,967 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:34,967 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:38,969 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:40,969 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:42,970 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:44,930 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:44,978 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:45,059 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:45,989 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:46,839 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:24:46,840 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:24:46,989 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:48,990 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:50,991 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:55,613 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:24:55,663 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:24:55,746 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:24:55,993 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:24:55,993 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:56,885 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:24:56,993 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:57,994 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:24:59,994 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:02,019 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:25:02,021 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:25:03,996 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:05,997 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:06,503 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:06,555 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:06,634 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:06,997 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:07,998 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:09,998 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:14,000 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:16,001 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:16,897 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:16,946 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:17,025 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:17,271 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:25:17,272 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:25:18,024 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:18,024 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:20,025 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:24,026 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:26,027 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:27,269 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:25:27,333 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:27,382 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:27,460 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:28,028 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:28,028 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:29,028 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:31,029 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:32,640 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:25:32,642 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:25:35,030 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:37,031 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:37,711 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:37,766 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:37,846 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:38,031 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:39,032 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:40,032 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:43,033 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:45,034 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:47,035 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:47,795 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:25:47,797 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:25:48,068 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:48,119 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:48,198 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:49,035 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:49,036 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:50,036 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:51,036 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:55,038 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:57,038 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:25:57,727 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:25:58,224 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:25:58,273 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:25:58,348 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:25:59,039 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:25:59,039 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:00,039 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:02,856 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:26:02,858 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:26:03,040 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:05,041 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:07,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:08,393 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:08,446 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:08,527 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:09,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:10,043 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:14,044 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:16,045 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:17,903 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:26:17,905 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:26:18,046 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:18,520 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:18,572 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:18,650 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:19,046 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:20,046 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:22,047 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:26,048 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:28,049 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:28,091 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:26:28,480 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:28,529 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:28,637 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:29,050 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:30,050 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:32,963 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:26:32,964 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:26:34,051 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:36,052 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:38,053 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:38,517 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:38,570 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:38,652 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:39,053 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:40,053 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:41,054 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:42,054 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:44,055 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:48,018 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:26:48,020 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:26:48,056 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:49,034 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:49,222 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:49,306 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:50,094 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:50,094 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:51,094 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:52,095 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:57,096 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:26:58,525 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:26:58,738 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:26:58,789 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:26:58,872 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:26:59,097 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:26:59,097 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:01,098 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:03,069 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:27:03,071 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:27:03,098 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:05,099 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:07,100 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:08,471 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:08,525 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:08,610 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:09,100 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:11,101 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:13,102 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:15,102 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:17,103 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:18,070 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:18,124 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:18,209 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:18,209 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:27:18,211 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:27:19,129 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:19,130 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:21,130 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:25,132 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:27,132 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:27,729 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:27,783 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:27,892 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:28,132 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:29,028 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:27:29,133 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:31,133 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:33,454 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:27:33,455 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:27:34,135 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:37,303 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:37,351 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:37,438 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:38,136 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:38,136 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:39,136 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:40,137 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:42,138 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:44,138 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:46,139 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:46,762 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:46,816 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:46,906 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:47,139 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:47,139 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:48,140 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:48,555 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:27:48,557 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:27:50,140 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:54,142 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:56,142 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:56,166 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:27:56,220 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:27:56,307 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:27:57,143 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:27:57,143 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:58,143 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:27:59,404 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:28:00,144 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:02,145 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:03,819 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:28:03,820 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:28:04,145 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:05,290 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:05,344 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:05,431 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:06,146 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:06,146 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:08,147 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:10,147 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:12,148 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:14,148 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:14,175 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:14,227 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:14,311 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:15,149 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:16,149 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:18,150 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:18,957 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:28:18,959 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:28:20,151 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:22,151 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:23,031 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:23,085 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:23,169 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:24,168 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:24,168 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:26,169 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:29,170 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:29,806 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:28:31,170 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:31,837 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:31,888 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:31,976 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:32,171 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:33,171 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:34,171 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:34,217 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:28:34,218 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:28:35,172 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:39,173 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:40,370 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:40,444 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:40,529 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:41,174 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:41,174 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:42,174 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:43,175 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:45,175 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:48,176 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:48,830 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:48,885 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:48,973 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:49,177 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:49,626 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:28:49,627 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:28:50,177 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:52,178 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:54,178 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:56,179 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:28:56,979 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:28:57,035 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:28:57,123 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:28:57,179 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:28:58,180 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:00,180 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:00,225 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:29:02,181 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:04,182 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:04,710 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:29:04,711 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:29:04,790 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:04,791 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:04,877 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:05,182 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:06,182 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:08,183 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:10,184 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:12,016 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:12,071 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:12,156 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:12,184 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:12,184 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:14,185 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:16,186 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:18,186 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:18,898 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:18,950 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:19,033 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:19,187 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:19,898 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:29:19,900 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:29:20,187 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:23,188 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:25,174 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:25,228 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:25,228 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:25,314 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:26,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:26,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:27,220 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:29,221 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:30,611 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:29:30,935 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:30,988 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:31,074 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:31,221 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:31,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:32,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:33,222 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:35,009 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:29:35,010 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:29:35,223 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:36,034 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:36,093 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:36,181 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:36,223 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:37,223 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:38,224 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:39,224 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:40,597 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:40,649 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:40,731 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:41,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:41,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:42,225 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:43,226 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:44,653 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:44,707 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:44,792 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:45,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:45,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:46,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:47,227 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:48,213 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:48,267 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:48,350 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:49,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:49,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:50,181 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:29:50,183 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:29:50,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:51,268 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:51,299 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:51,347 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:51,428 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:52,269 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:52,269 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:53,269 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:54,420 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:29:54,633 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:29:54,717 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:29:55,270 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:29:55,270 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:56,270 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:29:59,271 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:01,127 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:30:01,272 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:05,246 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:30:05,246 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:30:05,273 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:06,709 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:30:06,761 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:30:06,845 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:30:07,274 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:30:07,274 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:08,274 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:11,275 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:14,276 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:18,278 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:18,377 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:30:18,429 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:30:18,512 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:30:19,278 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:30:20,279 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:20,320 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:30:20,321 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:30:21,279 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:24,280 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:26,281 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:28,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:29,899 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:30:29,952 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:30:30,035 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:30:30,282 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:30:31,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:31,484 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:30:32,283 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:34,284 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:35,364 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:30:35,366 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:30:36,284 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:40,286 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:41,295 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:30:41,346 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:30:41,450 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:30:42,286 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:30:42,287 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:43,287 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:47,288 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:49,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:50,658 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:30:50,660 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:30:51,289 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:52,526 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:30:52,578 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:30:52,658 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:30:53,290 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:30:53,290 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:55,291 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:57,291 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:30:59,292 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:01,883 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:31:03,293 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:03,789 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:03,841 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:03,927 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:04,294 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:31:05,294 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:05,739 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:31:05,741 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:31:06,295 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:09,296 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:11,296 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:13,297 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:14,964 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:15,017 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:15,100 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:15,298 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:31:15,298 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:16,298 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:19,299 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:20,863 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:31:20,864 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:31:22,300 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:24,301 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:26,082 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:26,135 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:26,218 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:26,301 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:31:28,302 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:30,302 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:32,319 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:31:34,304 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:35,946 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:31:35,946 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:31:36,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:37,055 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:37,111 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:37,198 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:37,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:31:38,305 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:42,307 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:44,308 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:46,308 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:48,153 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:48,204 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:48,287 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:48,309 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:31:49,309 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:50,310 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:51,105 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:31:51,105 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:31:51,310 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:53,311 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:55,311 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:57,312 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:31:58,922 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:31:58,975 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:31:59,057 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:31:59,313 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:00,313 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:01,314 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:02,812 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:32:03,314 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:06,347 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:32:06,348 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:32:07,316 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:09,316 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:09,801 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:32:09,854 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:32:09,938 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:32:10,317 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:10,317 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:11,317 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:13,318 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:17,319 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:19,320 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:20,574 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:32:20,629 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:32:20,712 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:32:21,320 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:21,321 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:21,529 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:32:21,530 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:32:23,321 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:24,321 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:28,323 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:30,323 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:31,121 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:32:31,173 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:32:31,257 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:32:31,324 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:32,324 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:33,312 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:32:33,324 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:34,325 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:36,325 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:36,869 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:32:36,871 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:32:38,326 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:40,327 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:41,625 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:32:41,676 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:32:41,758 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:32:42,327 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:42,328 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:43,328 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:44,328 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:46,329 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:48,329 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:50,330 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:52,129 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:32:52,180 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:32:52,207 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:32:52,262 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:32:52,263 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:32:52,331 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:32:53,331 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:54,331 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:56,332 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:32:58,333 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:01,334 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:02,502 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:02,554 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:02,635 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:03,334 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:03,335 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:03,710 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:33:05,335 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:07,336 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:07,435 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:33:07,436 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:33:09,336 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:11,337 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:12,829 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:12,882 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:12,965 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:13,338 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:15,338 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:17,339 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:19,340 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:21,340 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:22,482 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:33:22,483 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:33:23,178 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:23,230 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:23,311 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:23,341 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:25,342 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:27,342 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:29,343 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:33,344 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:33,422 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:33,474 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:33,556 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:34,090 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:33:34,345 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:35,345 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:37,346 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:37,551 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:33:37,553 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:33:39,346 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:43,348 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:43,545 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:43,595 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:43,678 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:44,348 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:45,348 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:46,349 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:48,349 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:50,350 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:52,351 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:52,597 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:33:52,598 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:33:53,646 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:33:53,698 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:33:53,781 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:33:54,352 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:33:56,352 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:33:58,353 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:00,354 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:02,354 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:03,712 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:03,763 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:03,848 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:04,355 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:04,482 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:34:06,356 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:07,643 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:34:07,644 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:34:08,356 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:10,357 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:12,358 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:13,667 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:13,718 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:13,802 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:14,358 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:14,359 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:16,359 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:18,360 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:20,360 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:22,361 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:22,703 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:34:22,704 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:34:24,210 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:24,388 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:24,471 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:25,389 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:25,389 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:26,389 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:29,391 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:31,391 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:33,392 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:33,899 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:33,953 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:34,038 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:34,392 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:34,913 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:34:35,393 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:36,393 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:37,759 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:34:37,760 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:34:39,394 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:41,395 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:43,396 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:43,706 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:43,746 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:43,835 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:44,396 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:44,396 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:45,396 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:47,397 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:49,398 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:52,808 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:34:52,810 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:34:53,353 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:34:53,408 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:34:53,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:53,494 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:34:54,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:34:54,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:55,414 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:57,415 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:34:59,416 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:01,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:02,923 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:02,976 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:03,058 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:03,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:03,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:04,417 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:05,315 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:35:07,952 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:35:07,954 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:35:08,419 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:10,419 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:12,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:12,457 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:12,513 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:12,599 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:13,420 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:14,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:15,421 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:16,422 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:18,422 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:20,423 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:21,874 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:21,929 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:22,013 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:22,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:22,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:23,092 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:35:23,093 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:35:23,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:24,424 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:26,425 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:28,426 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:30,427 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:31,338 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:31,391 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:31,478 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:32,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:32,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:33,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:34,477 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:35,693 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:35:38,135 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:35:38,136 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:35:38,479 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:40,479 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:40,572 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:40,626 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:40,711 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:41,480 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:42,480 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:43,480 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:44,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:46,481 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:48,482 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:49,631 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:49,684 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:49,768 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:50,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:50,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:51,483 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:53,221 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:35:53,223 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:35:55,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:57,485 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:35:58,565 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:35:58,620 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:35:58,709 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:35:59,486 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:35:59,486 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:01,487 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:03,487 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:05,488 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:06,116 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:36:07,403 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:07,455 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:07,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:07,542 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:07,543 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:08,321 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:36:08,323 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:36:09,543 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:11,543 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:13,544 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:15,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:16,015 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:16,068 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:16,151 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:16,545 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:17,546 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:19,546 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:21,547 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:23,414 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:36:23,415 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:36:23,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:24,414 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:24,464 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:24,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:24,549 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:25,548 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:27,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:29,549 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:31,550 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:32,507 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:32,562 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:32,649 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:33,578 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:33,578 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:35,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:36,521 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:36:37,579 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:38,513 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:36:38,515 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:36:39,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:40,401 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:40,455 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:40,547 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:40,580 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:41,581 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:43,581 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:45,582 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:47,583 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:47,942 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:47,995 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:48,117 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:48,583 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:49,583 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:51,584 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:53,584 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:53,719 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:36:53,720 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:36:54,943 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:36:54,993 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:36:55,076 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:36:55,585 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:36:55,585 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:56,586 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:36:58,586 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:00,587 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:01,341 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:01,393 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:01,476 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:01,587 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:02,588 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:03,588 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:04,588 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:06,589 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:06,956 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:37:07,212 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:07,265 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:07,351 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:07,589 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:08,589 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:08,944 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:37:08,946 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:37:10,590 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:12,525 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:12,578 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:12,662 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:12,664 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:13,663 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:14,663 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:16,664 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:17,279 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:17,326 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:17,409 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:17,664 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:18,665 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:20,665 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:21,576 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:21,628 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:21,717 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:22,715 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:22,716 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:24,059 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:37:24,060 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:37:24,716 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:25,333 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:25,387 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:25,471 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:25,716 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:26,717 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:28,427 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:28,471 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:28,553 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:28,717 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:28,718 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:30,718 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:31,507 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:31,673 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:31,756 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:32,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:32,755 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:36,756 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:37,433 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:37:38,757 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:39,107 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:37:39,107 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:37:42,758 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:43,347 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:43,393 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:43,476 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:43,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:44,759 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:46,760 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:50,761 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:52,762 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:54,149 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:37:54,150 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:37:54,833 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:37:54,888 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:37:54,972 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:37:55,763 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:37:56,763 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:37:58,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:01,764 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:05,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:06,236 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:38:06,286 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:38:06,368 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:38:06,766 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:38:07,767 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:07,841 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:38:09,229 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:38:09,230 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:38:11,768 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:13,769 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:15,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:17,554 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:38:17,603 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:38:17,685 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:38:17,770 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:38:19,771 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:21,772 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:24,438 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:38:24,439 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:38:25,773 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:27,774 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:28,845 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:38:28,896 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:38:28,981 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:38:29,775 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:38:29,775 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:33,776 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:35,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:37,777 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:38,212 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:38:39,510 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:38:39,512 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:38:40,072 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:38:40,124 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:38:40,207 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:38:40,778 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:38:41,779 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:44,780 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:48,781 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:50,782 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:51,222 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:38:51,275 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:38:51,358 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:38:51,782 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:38:52,783 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:53,783 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:54,650 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:38:54,651 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:38:56,784 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:38:58,785 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:00,786 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:02,343 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:02,397 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:02,481 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:02,787 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:03,787 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:04,787 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:06,788 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:08,579 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:39:08,789 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:09,895 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:39:09,897 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:39:12,790 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:13,380 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:13,432 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:13,515 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:13,791 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:14,791 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:15,791 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:16,792 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:20,793 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:22,794 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:24,401 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:24,454 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:24,536 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:24,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:24,962 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:39:24,964 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:39:26,795 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:27,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:29,796 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:31,797 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:33,798 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:35,260 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:35,312 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:35,391 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:35,798 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:36,799 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:37,799 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:38,956 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:39:39,800 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:40,205 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:39:40,207 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:39:41,801 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:45,802 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:46,060 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:46,111 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:46,196 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:46,802 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:47,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:48,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:49,803 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:53,805 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:55,267 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:39:55,269 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:39:55,805 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:56,705 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:39:56,756 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:39:56,835 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:39:57,834 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:39:57,834 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:39:58,835 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:01,836 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:03,836 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:05,837 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:07,406 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:40:07,446 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:40:07,531 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:40:07,838 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:40:07,838 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:08,838 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:09,367 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:40:10,361 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:40:10,363 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:40:11,839 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:13,840 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:16,841 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:18,002 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:40:18,054 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:40:18,137 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:40:18,841 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:40:18,842 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:20,842 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:22,843 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:24,843 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:25,673 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:40:25,674 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:40:28,611 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:40:28,665 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:40:28,748 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:40:28,845 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:40:28,845 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:30,846 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:32,846 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:34,847 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:38,848 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:39,213 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:40:39,266 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:40:39,349 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:40:39,732 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:40:39,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:40:40,849 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:41,005 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:40:41,006 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:40:42,850 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:46,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:48,851 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:49,689 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:40:49,741 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:40:49,827 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:40:49,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:40:50,852 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:51,853 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:53,853 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:56,064 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:40:56,066 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:40:57,855 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:40:59,855 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:00,036 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:00,089 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:00,172 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:00,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:01,856 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:03,857 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:07,858 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:09,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:10,091 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:41:10,372 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:10,424 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:10,506 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:10,859 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:11,134 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:41:11,135 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:41:11,860 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:13,860 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:17,862 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:19,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:20,646 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:20,724 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:20,807 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:20,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:21,863 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:23,864 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:26,551 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:41:26,552 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:41:27,865 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:29,866 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:30,980 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:31,035 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:31,114 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:31,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:31,867 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:34,868 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:38,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:40,456 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:41:40,870 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:41,166 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:41,215 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:41,295 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:41,746 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:41:41,747 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:41:41,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:42,871 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:44,872 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:46,873 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:50,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:51,243 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:41:51,296 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:41:51,380 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:41:51,874 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:41:52,875 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:54,876 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:56,876 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:41:56,972 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:41:56,973 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:42:00,878 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:01,946 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:02,115 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:02,197 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:02,878 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:02,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:03,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:05,879 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:09,881 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:10,923 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:42:11,881 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:11,911 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:11,963 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:12,049 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:12,196 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:42:12,197 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:42:12,882 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:12,882 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:13,882 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:15,883 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:17,884 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:21,726 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:21,782 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:21,866 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:21,885 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:21,885 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:22,885 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:23,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:25,886 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:27,438 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:42:27,440 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:42:27,887 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:29,888 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:31,361 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:31,415 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:31,496 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:31,888 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:31,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:32,889 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:35,890 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:37,890 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:39,891 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:40,880 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:40,933 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:41,039 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:41,367 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:42:41,933 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:41,934 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:42,527 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:42:42,529 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:42:44,934 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:48,936 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:50,426 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:50,479 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:42:50,610 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:42:50,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:42:50,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:51,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:52,937 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:54,938 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:56,939 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:57,753 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:42:57,754 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:42:58,939 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:42:59,884 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:42:59,937 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:00,019 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:01,017 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:01,018 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:02,018 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:03,018 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:07,020 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:09,020 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:09,195 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:09,246 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:09,328 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:10,021 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:10,021 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:11,021 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:11,783 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:43:12,823 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:43:12,824 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:43:13,022 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:15,022 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:17,023 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:18,459 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:18,509 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:18,589 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:19,024 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:19,024 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:20,024 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:22,025 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:24,026 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:26,027 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:27,456 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:27,511 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:27,596 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:27,925 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:43:27,926 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:43:28,027 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:28,027 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:30,028 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:32,028 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:34,029 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:36,030 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:36,339 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:36,393 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:36,477 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:37,030 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:38,031 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:40,032 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:42,032 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:42,190 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:43:43,148 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:43:43,149 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:43:44,033 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:45,091 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:45,145 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:45,229 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:46,034 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:46,034 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:48,035 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:51,036 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:53,037 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:53,633 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:43:53,678 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:43:53,758 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:43:54,037 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:43:55,038 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:57,038 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:43:58,356 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:43:58,358 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:43:59,039 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:01,040 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:01,875 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:01,928 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:02,034 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:02,041 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:03,041 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:05,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:07,042 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:09,043 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:09,942 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:09,989 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:10,073 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:11,072 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:11,073 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:12,626 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:44:13,073 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:13,607 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:44:13,608 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:44:15,074 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:17,074 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:17,600 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:17,652 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:17,738 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:18,075 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:19,075 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:21,076 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:23,076 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:24,978 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:25,024 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:25,105 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:25,107 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:26,106 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:27,106 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:28,699 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:44:28,701 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:44:29,107 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:31,107 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:31,980 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:32,034 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:32,117 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:33,116 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:33,117 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:35,117 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:37,118 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:38,674 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:38,727 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:38,807 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:39,118 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:39,119 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:40,119 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:42,119 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:43,085 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:44:43,962 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:44:43,963 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:44:44,120 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:44,766 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:44,817 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:44,899 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:45,120 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:45,121 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:46,121 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:48,122 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:50,122 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:50,190 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:50,244 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:50,329 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:51,123 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:51,123 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:52,123 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:54,124 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:55,071 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:55,123 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:55,209 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:44:56,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:44:56,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:57,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:58,208 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:44:59,013 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:44:59,015 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:44:59,241 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:44:59,294 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:44:59,377 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:00,209 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:00,209 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:02,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:03,034 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:45:03,086 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:45:03,165 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:03,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:04,210 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:06,211 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:06,237 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:45:06,286 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:45:06,366 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:07,212 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:08,212 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:09,504 DEBUG SenderThread:253545 [sender.py:send():235] send: history +2022-03-02 07:45:09,666 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:45:09,746 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:10,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:10,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:12,213 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:13,471 DEBUG SenderThread:253545 [sender.py:send():235] send: telemetry +2022-03-02 07:45:13,475 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:13,477 DEBUG SenderThread:253545 [sender.py:send():235] send: exit +2022-03-02 07:45:13,477 INFO SenderThread:253545 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 07:45:13,477 INFO SenderThread:253545 [sender.py:send_exit():373] handling runtime: 4105 +2022-03-02 07:45:13,530 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:13,530 INFO SenderThread:253545 [sender.py:send_exit():379] send defer +2022-03-02 07:45:13,530 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:13,531 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:13,531 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 07:45:13,531 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:13,531 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 07:45:13,531 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 1 +2022-03-02 07:45:13,532 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:13,532 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 07:45:13,646 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:13,646 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:13,647 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 07:45:13,647 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 2 +2022-03-02 07:45:13,647 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:13,647 DEBUG SenderThread:253545 [sender.py:send():235] send: stats +2022-03-02 07:45:13,648 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:13,648 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 07:45:13,648 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:13,648 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 07:45:13,648 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 3 +2022-03-02 07:45:13,648 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:13,648 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 07:45:13,706 DEBUG SenderThread:253545 [sender.py:send():235] send: summary +2022-03-02 07:45:13,787 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:13,790 INFO SenderThread:253545 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:45:13,790 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:13,790 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 07:45:13,790 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 4 +2022-03-02 07:45:13,790 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:13,791 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:13,791 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 07:45:13,791 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:13,791 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 07:45:13,892 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:14,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:14,214 INFO Thread-8 :253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:15,312 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 5 +2022-03-02 07:45:15,313 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:15,313 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:15,313 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 07:45:15,314 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:15,314 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 07:45:15,314 INFO SenderThread:253545 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 07:45:15,415 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,220 INFO SenderThread:253545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/config.yaml +2022-03-02 07:45:16,220 INFO SenderThread:253545 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files +2022-03-02 07:45:16,220 INFO SenderThread:253545 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-metadata.json wandb-metadata.json +2022-03-02 07:45:16,220 INFO SenderThread:253545 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log output.log +2022-03-02 07:45:16,220 INFO SenderThread:253545 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json wandb-summary.json +2022-03-02 07:45:16,221 INFO SenderThread:253545 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt requirements.txt +2022-03-02 07:45:16,223 INFO SenderThread:253545 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/config.yaml config.yaml +2022-03-02 07:45:16,229 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 6 +2022-03-02 07:45:16,229 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,232 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:16,232 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 07:45:16,233 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:16,233 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 07:45:16,233 INFO SenderThread:253545 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 07:45:16,331 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,331 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,433 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,433 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,533 INFO Thread-14 :253545 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/requirements.txt +2022-03-02 07:45:16,534 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,534 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,554 INFO Thread-12 :253545 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/output.log +2022-03-02 07:45:16,594 INFO Thread-15 :253545 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/config.yaml +2022-03-02 07:45:16,636 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,636 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,667 INFO Thread-13 :253545 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/files/wandb-summary.json +2022-03-02 07:45:16,737 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,737 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,838 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:16,839 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:16,868 INFO Thread-7 :253545 [sender.py:transition_state():392] send defer: 7 +2022-03-02 07:45:16,868 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:16,869 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 07:45:16,869 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:16,869 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 07:45:16,940 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:18,393 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 8 +2022-03-02 07:45:18,393 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:18,394 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:18,394 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 07:45:18,394 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:18,394 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 07:45:18,394 INFO SenderThread:253545 [sender.py:transition_state():392] send defer: 9 +2022-03-02 07:45:18,396 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: defer +2022-03-02 07:45:18,396 DEBUG SenderThread:253545 [sender.py:send():235] send: final +2022-03-02 07:45:18,396 INFO HandlerThread:253545 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 07:45:18,396 DEBUG SenderThread:253545 [sender.py:send():235] send: footer +2022-03-02 07:45:18,397 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: defer +2022-03-02 07:45:18,397 INFO SenderThread:253545 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 07:45:18,495 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 07:45:18,495 DEBUG SenderThread:253545 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 07:45:18,495 INFO SenderThread:253545 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 07:45:18,558 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 07:45:18,652 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 07:45:18,654 DEBUG HandlerThread:253545 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 07:45:18,655 INFO HandlerThread:253545 [handler.py:finish():739] shutting down handler +2022-03-02 07:45:19,396 INFO WriterThread:253545 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb +2022-03-02 07:45:19,557 INFO SenderThread:253545 [sender.py:finish():1075] shutting down sender +2022-03-02 07:45:19,557 INFO SenderThread:253545 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 07:45:19,557 INFO SenderThread:253545 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 07:45:19,564 INFO MainThread:253545 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_063647-bmivw6vv/logs/debug.log b/wandb/run-20220302_063647-bmivw6vv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..02bd281ddc5de8cd7ceeaaaa1a69779e2d526b25 --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/logs/debug.log @@ -0,0 +1,141 @@ +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/logs/debug.log +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_063647-bmivw6vv/logs/debug-internal.log +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_init.py:init():420] calling init triggers +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 06:36:47,141 INFO MainThread:253446 [wandb_init.py:init():471] starting backend +2022-03-02 06:36:47,141 INFO MainThread:253446 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 06:36:47,197 INFO MainThread:253446 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 06:36:47,251 INFO MainThread:253446 [backend.py:ensure_launched():224] started backend process with pid: 253545 +2022-03-02 06:36:47,254 INFO MainThread:253446 [wandb_init.py:init():480] backend started and connected +2022-03-02 06:36:47,263 INFO MainThread:253446 [wandb_init.py:init():550] updated telemetry +2022-03-02 06:36:47,391 INFO MainThread:253446 [wandb_init.py:init():581] communicating current version +2022-03-02 06:36:48,104 INFO MainThread:253446 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 06:36:48,104 INFO MainThread:253446 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 06:36:48,202 INFO MainThread:253446 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 06:36:48,309 INFO MainThread:253446 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 06:36:48,309 INFO MainThread:253446 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 06:36:48,310 INFO MainThread:253446 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 06:36:48,312 INFO MainThread:253446 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 06:36:48,312 INFO MainThread:253446 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 06:36:48,314 INFO MainThread:253446 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_06-36-06_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 06:36:48,317 INFO MainThread:253446 [wandb_watch.py:watch():43] Watching +2022-03-02 07:45:11,103 INFO MainThread:253446 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 07:45:11,105 INFO MainThread:253446 [wandb_run.py:_restore():1769] restore +2022-03-02 07:45:13,531 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 07:45:13,648 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 07:45:13,791 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 07:45:15,313 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 07:45:16,230 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,332 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 889444 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,433 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,535 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,636 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,738 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:16,839 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:18,394 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} + +2022-03-02 07:45:18,557 INFO MainThread:253446 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2345838 + total_bytes: 2345838 +} +local_info { +} + +2022-03-02 07:45:19,706 INFO MainThread:253446 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 07:45:19,707 INFO MainThread:253446 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 07:45:19,708 INFO MainThread:253446 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb b/wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb new file mode 100644 index 0000000000000000000000000000000000000000..cbda68507116193db15b3934383ec07cca2ec986 --- /dev/null +++ b/wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a27d78f42670f0237b947ed81744c2bc569e3ecc5ba6f04e6ff4143e1036d1 +size 29743466 diff --git a/wandb/run-20220302_074637-35y19oi2/files/config.yaml b/wandb/run-20220302_074637-35y19oi2/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d9786cb1d155c5b5d53ac01c38bde19f829efb2 --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/files/config.yaml @@ -0,0 +1,11321 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646207197 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 14 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_07-45-55_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 14 +per_device_train_batch_size: + desc: null + value: 14 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 14 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_074637-35y19oi2/files/output.log b/wandb/run-20220302_074637-35y19oi2/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..578dff01ce8aa6c935d849e04e30f64d74c00cdb --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/files/output.log @@ -0,0 +1,1694 @@ + + + 0%| | 0/254 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:46:45,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:46:48,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:46:51,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:46:54,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:46:57,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:00,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7871, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:03,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 1/254 [00:25<1:45:26, 25.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:47:06,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:09,393 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:12,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:15,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:18,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:21,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:23,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:26,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7899, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.01} + + 1%|▋ | 2/254 [00:48<1:41:18, 24.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:47:29,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:32,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:35,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:38,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:41,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:44,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:47,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9306, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:50,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 3/254 [01:11<1:39:09, 23.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:47:53,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:55,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:47:58,686 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:01,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:04,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:07,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:10,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8908, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:12,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 4/254 [01:34<1:37:26, 23.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:48:15,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:18,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:21,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:24,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:27,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:29,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:32,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:35,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 5/254 [01:57<1:35:37, 23.04s/it] + + 2%|█▌ | 5/254 [01:57<1:35:37, 23.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:48:38,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:41,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:43,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:46,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:49,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:52,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:55,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:48:57,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 6/254 [02:19<1:34:27, 22.85s/it] + + 2%|█▉ | 6/254 [02:19<1:34:27, 22.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:49:00,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:03,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:06,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:09,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:11,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:14,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:17,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:20,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 7/254 [02:41<1:33:31, 22.72s/it] + + 3%|██▏ | 7/254 [02:41<1:33:31, 22.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:49:23,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:26,020 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:28,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:31,625 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:34,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:37,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:39,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9372, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:42,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 8/254 [03:04<1:32:45, 22.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:49:45,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:48,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:51,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:53,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:56,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:49:59,503 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:02,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.844, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:05,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▊ | 9/254 [03:26<1:31:58, 22.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:50:07,895 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:10,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:13,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:16,199 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:18,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:21,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:24,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:27,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 10/254 [03:48<1:31:07, 22.41s/it] + + 4%|███▏ | 10/254 [03:48<1:31:07, 22.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:50:30,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:32,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:35,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:38,094 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:40,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:43,465 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:46,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:48,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 11/254 [04:10<1:29:58, 22.22s/it] + + 4%|███▍ | 11/254 [04:10<1:29:58, 22.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:50:51,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:54,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:57,198 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:50:59,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:02,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:05,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:07,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8076, 'learning_rate': 2.2e-06, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:10,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▊ | 12/254 [04:32<1:29:00, 22.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:51:13,389 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:16,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:18,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:22,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:24,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:27,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:30,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:32,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [04:54<1:28:43, 22.09s/it] + 5%|████ | 13/254 [04:54<1:28:43, 22.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [04:54<1:28:43, 22.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:41,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:41,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:46,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:46,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:51:51,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:16<1:27:42, 21.93s/it]g-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:16<1:27:42, 21.93s/it]g-point operations will not be computed-02 07:51:35,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:16<1:27:42, 21.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:16<1:27:42, 21.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:02,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:02,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:07,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:07,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:12,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:12,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:37<1:26:22, 21.68s/it]g-point operations will not be computed-02 07:51:57,075 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:37<1:26:22, 21.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:37<1:26:22, 21.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:23,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:23,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:28,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:28,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:33,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:58<1:25:09, 21.47s/it]g-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:58<1:25:09, 21.47s/it]g-point operations will not be computed-02 07:52:18,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:58<1:25:09, 21.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:58<1:25:09, 21.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:44,411 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:44,411 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:49,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:49,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:54,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:54,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:52:54,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:19<1:24:20, 21.35s/it]g-point operations will not be computed-02 07:52:39,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:19<1:24:20, 21.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:19<1:24:20, 21.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:05,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:05,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:10,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:10,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:15,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:15,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:15,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:40<1:23:21, 21.19s/it]g-point operations will not be computed-02 07:53:00,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:40<1:23:21, 21.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:40<1:23:21, 21.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:26,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:26,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:31,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:31,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:36,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:36,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:00<1:22:29, 21.06s/it]g-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:00<1:22:29, 21.06s/it]g-point operations will not be computed-02 07:53:21,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:00<1:22:29, 21.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:00<1:22:29, 21.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:46,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:46,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:52,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:52,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:57,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:57,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:53:57,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:21<1:21:35, 20.92s/it]g-point operations will not be computed-02 07:53:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:21<1:21:35, 20.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:21<1:21:35, 20.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:07,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:07,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:12,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:12,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:17,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:17,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:17,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:41<1:20:35, 20.75s/it]g-point operations will not be computed-02 07:54:02,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:41<1:20:35, 20.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:41<1:20:35, 20.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:27,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:27,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:32,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:32,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:37,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:01<1:19:35, 20.59s/it]g-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:01<1:19:35, 20.59s/it]g-point operations will not be computed-02 07:54:22,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:01<1:19:35, 20.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:01<1:19:35, 20.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:47,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:47,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:52,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:52,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:54:57,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:22<1:18:39, 20.43s/it]g-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:22<1:18:39, 20.43s/it]g-point operations will not be computed-02 07:54:42,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:22<1:18:39, 20.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:22<1:18:39, 20.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:07,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:07,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:12,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:12,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:17,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:55:17,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it]g-point operations will not be computed-02 07:55:02,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:42<1:17:53, 20.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4866, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.1} + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:02<1:17:19, 20.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:22<1:16:32, 20.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:41<1:15:25, 19.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4714, 'learning_rate': 5.4e-06, 'epoch': 0.11} + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:00<1:14:24, 19.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3393, 'learning_rate': 5.600000000000001e-06, 'epoch': 0.11} + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:20<1:13:35, 19.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5015, 'learning_rate': 5.8e-06, 'epoch': 0.12} + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:39<1:12:36, 19.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [10:57<1:11:28, 19.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:16<1:10:33, 19.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:55:22,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:35<1:09:30, 18.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4072, 'learning_rate': 6.6e-06, 'epoch': 0.13} + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [11:52<1:08:10, 18.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3752, 'learning_rate': 6.800000000000001e-06, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 07:58:48,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 36/254 [12:27<1:05:26, 18.01s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4767, 'learning_rate': 7.2e-06, 'epoch': 0.15} + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:44<1:04:03, 17.71s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.403, 'learning_rate': 7.4e-06, 'epoch': 0.15} + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:02<1:02:58, 17.49s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:17<1:01:01, 17.03s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 40/254 [13:33<58:46, 16.48s/it]g-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:15,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 07:58:15,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:47<56:13, 15.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:47<56:13, 15.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:47<56:13, 15.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:47<56:13, 15.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:47<56:13, 15.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:37,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:00<53:25, 15.12s/it]g-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:00<53:25, 15.12s/it]g-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4575, 'learning_rate': 8.200000000000001e-06, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:43,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:43,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:43,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:50,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:13<50:20, 14.31s/it]g-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:13<50:20, 14.31s/it]g-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3135, 'learning_rate': 8.400000000000001e-06, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:56,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:00:56,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:00,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:00,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:00,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:00:27,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████▏ | 44/254 [14:24<47:03, 13.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:01:04,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████▏ | 44/254 [14:24<47:03, 13.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:01:04,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:08,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:04,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:08,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:04,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:12,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:04,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 45/254 [14:35<43:31, 12.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 45/254 [14:35<43:31, 12.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4926, 'learning_rate': 8.8e-06, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:18,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:20,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:22,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:22,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:24,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:26,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:28,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:30,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:30,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:32,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:34,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:36,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:37,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:37,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:41,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:42,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:44,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:44,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:46,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:47,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:49,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:49,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3864, 'learning_rate': 9.800000000000001e-06, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:55,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:01:55,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.311, 'learning_rate': 1e-05, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:02:01,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [15:58<53:13, 15.81s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:21<1:00:16, 17.99s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2127, 'learning_rate': 1.06e-05, 'epoch': 0.21} + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:44<1:04:47, 19.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.128, 'learning_rate': 1.08e-05, 'epoch': 0.22} + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:07<1:07:48, 20.45s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1871, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.22} + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:30<1:09:38, 21.11s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2409, 'learning_rate': 1.1200000000000001e-05, 'epoch': 0.22} + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:52<1:10:26, 21.46s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:14<1:11:06, 21.77s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:37<1:11:08, 21.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [18:59<1:11:02, 21.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.278, 'learning_rate': 1.2e-05, 'epoch': 0.24} + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:21<1:10:42, 21.98s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1953, 'learning_rate': 1.22e-05, 'epoch': 0.24} + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [19:42<1:10:09, 21.93s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:05<1:10:09, 22.04s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:27<1:09:31, 21.95s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:48<1:08:39, 21.80s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2909, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.26} + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:09<1:07:47, 21.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.224, 'learning_rate': 1.32e-05, 'epoch': 0.26} + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:30<1:06:57, 21.48s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2762, 'learning_rate': 1.3400000000000002e-05, 'epoch': 0.27} + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:51<1:06:05, 21.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:12<1:05:18, 21.18s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2109, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.28} + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:33<1:04:25, 21.01s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2057, 'learning_rate': 1.42e-05, 'epoch': 0.28} + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:14<1:02:45, 20.69s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:34<1:01:57, 20.54s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [23:54<1:01:20, 20.44s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:14<1:00:55, 20.42s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2807, 'learning_rate': 1.5e-05, 'epoch': 0.3} + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:34<1:00:01, 20.23s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2235, 'learning_rate': 1.52e-05, 'epoch': 0.3} + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [24:54<59:08, 20.05s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2436, 'learning_rate': 1.54e-05, 'epoch': 0.31} + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:13<58:18, 19.88s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1341, 'learning_rate': 1.56e-05, 'epoch': 0.31} + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:32<57:15, 19.63s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2765, 'learning_rate': 1.58e-05, 'epoch': 0.31} + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:51<56:11, 19.38s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:10<55:19, 19.19s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:28<54:22, 18.97s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:47<53:28, 18.76s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2277, 'learning_rate': 1.66e-05, 'epoch': 0.33} + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:04<52:23, 18.49s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3309, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.33} + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:22<51:16, 18.20s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1866, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.34} + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:39<50:04, 17.89s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2429, 'learning_rate': 1.7199999999999998e-05, 'epoch': 0.34} + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [27:56<48:42, 17.50s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2578, 'learning_rate': 1.74e-05, 'epoch': 0.35} + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:13<47:55, 17.32s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:28<46:18, 16.84s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:43<44:32, 16.29s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:25,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:25,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:25,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:25,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:25,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:36,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:36,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2666, 'learning_rate': 1.8e-05, 'epoch': 0.36} +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:36,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:36,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:44,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:44,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:44,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:44,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:10<40:06, 14.85s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:52,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:52,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:52,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:58,419 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:58,419 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:15:58,419 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 93/254 [29:23<37:38, 14.03s/it]g-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:04,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:04,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:08,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:08,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:12,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:12,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3208, 'learning_rate': 1.86e-05, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:16,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:18,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:18,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:22,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:22,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:24,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:24,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:28,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:30,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:30,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:01:14,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▉ | 96/254 [29:53<29:35, 11.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:16:32,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:34,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:32,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:36,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:32,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:38,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:32,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:38,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:32,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████▎ | 97/254 [30:01<26:47, 10.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:16:40,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:42,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:40,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:43,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:40,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:43,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:40,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 98/254 [30:08<24:05, 9.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:16:47,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:48,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:47,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:50,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:47,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:50,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:47,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 99/254 [30:14<21:21, 8.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:16:53,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:55,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:53,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:56,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:53,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:16:56,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:16:53,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 100/254 [30:19<19:07, 7.45s/it]g-point operations will not be computed-02 08:16:53,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 100/254 [30:19<19:07, 7.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 100/254 [30:19<19:07, 7.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:07,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:07,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2785, 'learning_rate': 2e-05, 'epoch': 0.4} +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:17:13,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1837, 'learning_rate': 2.0200000000000003e-05, 'epoch': 0.4} + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:06<39:44, 15.69s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:29<44:51, 17.83s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2698, 'learning_rate': 2.08e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:52<48:10, 19.27s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:37<51:35, 20.92s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1013, 'learning_rate': 2.12e-05, 'epoch': 0.42} + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [32:59<52:16, 21.34s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2109, 'learning_rate': 2.1400000000000002e-05, 'epoch': 0.42} + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:21<52:37, 21.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2275, 'learning_rate': 2.16e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:44<52:34, 21.76s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2312, 'learning_rate': 2.18e-05, 'epoch': 0.43} + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:06<52:25, 21.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:27<51:50, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:49<51:28, 21.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:11<51:25, 21.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2024, 'learning_rate': 2.26e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:33<50:54, 21.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:54<50:10, 21.66s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1406, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.46} + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:15<49:28, 21.51s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2267, 'learning_rate': 2.32e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:36<48:52, 21.40s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1685, 'learning_rate': 2.36e-05, 'epoch': 0.47} + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2016, 'learning_rate': 2.38e-05, 'epoch': 0.47} + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [36:57<48:05, 21.22s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1813, 'learning_rate': 2.4e-05, 'epoch': 0.47} + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [37:59<46:08, 20.82s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.251, 'learning_rate': 2.4200000000000002e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:19<45:23, 20.63s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1699, 'learning_rate': 2.44e-05, 'epoch': 0.48} + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:39<44:39, 20.45s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [38:59<43:59, 20.31s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:20<43:40, 20.32s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1442, 'learning_rate': 2.5e-05, 'epoch': 0.49} + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:39<42:52, 20.10s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [39:59<42:04, 19.88s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1569, 'learning_rate': 2.54e-05, 'epoch': 0.5} + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:18<41:23, 19.71s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2225, 'learning_rate': 2.5600000000000002e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:37<40:37, 19.50s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2638, 'learning_rate': 2.58e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [40:56<39:57, 19.33s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2403, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.51} + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:14<39:11, 19.12s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1364, 'learning_rate': 2.6200000000000003e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:33<38:27, 18.91s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:51<37:44, 18.72s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:51<37:44, 18.72s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1834, 'learning_rate': 2.64e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▍ | 133/254 [41:51<37:44, 18.72s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:51<37:44, 18.72s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:28:40,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:28:40,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:28:40,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0975, 'learning_rate': 2.6600000000000003e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:09<36:47, 18.39s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:26<35:59, 18.14s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:26<35:59, 18.14s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.296, 'learning_rate': 2.6800000000000004e-05, 'epoch': 0.53} +[WARNING|modeling_utils.py:388] 2022-03-02 08:29:11,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:29:11,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:29:11,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:29:11,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:29:11,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2294, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.53} + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:43<35:04, 17.84s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1809, 'learning_rate': 2.7200000000000004e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:00<34:04, 17.47s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.056, 'learning_rate': 2.7400000000000002e-05, 'epoch': 0.54} + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:17<33:20, 17.24s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:32<32:05, 16.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:32<32:05, 16.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:32<32:05, 16.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:32<32:05, 16.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:32<32:05, 16.75s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:22,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:22,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:22,328 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [43:47<30:40, 16.15s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [43:47<30:40, 16.15s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [43:47<30:40, 16.15s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [43:47<30:40, 16.15s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [43:47<30:40, 16.15s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:36,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:36,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:36,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:01<29:10, 15.49s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:01<29:10, 15.49s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:01<29:10, 15.49s/it]g-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:46,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:46,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:46,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:52,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:52,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2078, 'learning_rate': 2.8199999999999998e-05, 'epoch': 0.56} +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:52,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:30:52,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:00,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:00,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:04,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:04,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3606, 'learning_rate': 2.84e-05, 'epoch': 0.56} +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:04,964 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:10,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:10,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:14,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:14,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:17:01,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [44:37<24:00, 13.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [44:37<24:00, 13.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:21,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:21,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:24,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:24,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:17,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 145/254 [44:47<22:02, 12.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:29,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:31,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:33,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▌ | 146/254 [44:56<20:02, 11.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:36,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:38,870 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:40,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:40,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:42,664 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:44,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:46,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:49,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:49,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:51,236 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:52,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:55,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:55,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:56,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:59,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:31:59,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:00,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:00,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:07,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:07,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:13,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:13,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:19,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:19,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:32:19,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:46<21:23, 12.46s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2034, 'learning_rate': 3.02e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:10<26:38, 15.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:33<30:03, 17.85s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [46:55<32:09, 19.29s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:18<33:23, 20.24s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:40<34:06, 20.88s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1791, 'learning_rate': 3.1400000000000004e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:02<34:23, 21.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [48:46<34:15, 21.63s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:08<33:57, 21.67s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:29<33:29, 21.61s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1827, 'learning_rate': 3.2200000000000003e-05, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:51<33:02, 21.55s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2159, 'learning_rate': 3.24e-05, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:13<32:52, 21.68s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1203, 'learning_rate': 3.26e-05, 'epoch': 0.64} + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:34<32:16, 21.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [50:55<31:42, 21.38s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [51:16<31:07, 21.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:37<30:33, 21.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [51:57<29:58, 20.91s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1565, 'learning_rate': 3.3600000000000004e-05, 'epoch': 0.66} + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:18<29:26, 20.78s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1505, 'learning_rate': 3.38e-05, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:38<28:54, 20.65s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0987, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0598, 'learning_rate': 3.4200000000000005e-05, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [52:58<28:23, 20.52s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:38<27:17, 20.22s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1869, 'learning_rate': 3.48e-05, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [53:58<26:49, 20.11s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1173, 'learning_rate': 3.5e-05, 'epoch': 0.69} + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:38<25:57, 19.96s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1547, 'learning_rate': 3.52e-05, 'epoch': 0.69} + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [54:57<25:22, 19.77s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:16<24:49, 19.60s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:35<24:16, 19.43s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2217, 'learning_rate': 3.58e-05, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [55:54<23:45, 19.27s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2581, 'learning_rate': 3.6e-05, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:13<23:16, 19.13s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0438, 'learning_rate': 3.62e-05, 'epoch': 0.71} + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:32<22:44, 18.95s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1206, 'learning_rate': 3.6400000000000004e-05, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:50<22:07, 18.69s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [57:08<21:35, 18.50s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:25<21:00, 18.26s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:08,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:43<20:22, 17.97s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:00<19:42, 17.66s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:00<19:42, 17.66s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:00<19:42, 17.66s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:00<19:42, 17.66s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:49,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:49,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:44:49,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3035, 'learning_rate': 3.74e-05, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:17<19:14, 17.49s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.114, 'learning_rate': 3.76e-05, 'epoch': 0.74} + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:33<18:29, 17.07s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:48<17:41, 16.58s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:48<17:41, 16.58s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.193, 'learning_rate': 3.7800000000000004e-05, 'epoch': 0.75} + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:48<17:41, 16.58s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:34,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:34,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:34,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:34,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:34,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 191/254 [59:03<16:48, 16.00s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 191/254 [59:03<16:48, 16.00s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 191/254 [59:03<16:48, 16.00s/it]g-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:48,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:48,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:48,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:55,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:55,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.281, 'learning_rate': 3.82e-05, 'epoch': 0.75} +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:55,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:45:55,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:03,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:03,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:03,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:03,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:31:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▌ | 193/254 [59:29<14:38, 14.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▌ | 193/254 [59:29<14:38, 14.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▌ | 193/254 [59:29<14:38, 14.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:14,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:14,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:14,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:18,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:18,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:22,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:25,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:25,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:25,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:29,094 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:31,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:33,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:33,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:37,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:37,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:09,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:00:00<11:08, 11.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:46:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:41,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:43,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:45,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:45,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:39,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▎ | 197/254 [1:00:08<10:00, 10.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:46:47,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:49,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:47,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:51,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:47,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 198/254 [1:00:15<08:53, 9.53s/it]g-point operations will not be computed-02 08:46:47,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 198/254 [1:00:15<08:53, 9.53s/it]g-point operations will not be computed-02 08:46:47,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:56,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:54,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:46:57,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:46:54,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 199/254 [1:00:21<07:45, 8.47s/it]g-point operations will not be computed-02 08:46:54,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 199/254 [1:00:21<07:45, 8.47s/it]g-point operations will not be computed-02 08:46:54,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:47:01,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:47:04,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 200/254 [1:00:26<06:48, 7.57s/it]g-point operations will not be computed-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 200/254 [1:00:26<06:48, 7.57s/it]g-point operations will not be computed-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)6<06:48, 7.57s/it]Traceback (most recent call last):puted-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)6<06:48, 7.57s/it]Traceback (most recent call last):puted-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)6<06:48, 7.57s/it]Traceback (most recent call last):puted-02 08:47:00,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed \ No newline at end of file diff --git a/wandb/run-20220302_074637-35y19oi2/files/requirements.txt b/wandb/run-20220302_074637-35y19oi2/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_074637-35y19oi2/files/wandb-metadata.json b/wandb/run-20220302_074637-35y19oi2/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9998c570754c0983977cf4d1450728373dad39b9 --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T07:46:38.278448", + "startedAt": "2022-03-02T07:46:37.113392", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=14", + "--per_device_eval_batch_size=14", + "--gradient_accumulation_steps=8", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json b/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..8e9e2d1e0f00673bb2002537e4a370f248fd893b --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 4.3938, "train/learning_rate": 3.9800000000000005e-05, "train/epoch": 0.78, "train/global_step": 200, "_runtime": 3628, "_timestamp": 1646210825, "_step": 199, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 583.0, 429.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-214.20204162597656, -183.53944396972656, -152.87684631347656, -122.2142562866211, -91.5516586303711, -60.889068603515625, -30.226470947265625, 0.436126708984375, 31.098724365234375, 61.761322021484375, 92.42391967773438, 123.08650970458984, 153.74911499023438, 184.4116973876953, 215.0742950439453, 245.7368927001953, 276.39947509765625, 307.06207275390625, 337.72467041015625, 368.38726806640625, 399.04986572265625, 429.71246337890625, 460.37506103515625, 491.03765869140625, 521.7002563476562, 552.3628540039062, 583.0254516601562, 613.6880493164062, 644.3506469726562, 675.0132446289062, 705.6758422851562, 736.3384399414062, 767.0010375976562, 797.6636352539062, 828.3262329101562, 858.9888305664062, 889.6514282226562, 920.3140258789062, 950.9766235351562, 981.6392211914062, 1012.3018188476562, 1042.96435546875, 1073.626953125, 1104.28955078125, 1134.9521484375, 1165.61474609375, 1196.27734375, 1226.93994140625, 1257.6025390625, 1288.26513671875, 1318.927734375, 1349.59033203125, 1380.2529296875, 1410.91552734375, 1441.578125, 1472.24072265625, 1502.9033203125, 1533.56591796875, 1564.228515625, 1594.89111328125, 1625.5537109375, 1656.21630859375, 1686.87890625, 1717.54150390625, 1748.2041015625]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 5.0, 2.0, 7.0, 7.0, 8.0, 8.0, 14.0, 11.0, 18.0, 16.0, 24.0, 37.0, 40.0, 39.0, 33.0, 57.0, 44.0, 50.0, 56.0, 43.0, 46.0, 63.0, 54.0, 42.0, 57.0, 42.0, 23.0, 36.0, 23.0, 19.0, 17.0, 13.0, 12.0, 5.0, 9.0, 4.0, 7.0, 6.0, 3.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-116.32231140136719, -112.09468078613281, -107.86705017089844, -103.63941192626953, -99.41178131103516, -95.18415069580078, -90.95651245117188, -86.7288818359375, -82.50125122070312, -78.27362060546875, -74.04598999023438, -69.81835174560547, -65.5907211303711, -61.36309051513672, -57.13545608520508, -52.90782165527344, -48.68019104003906, -44.45256042480469, -40.22492599487305, -35.997291564941406, -31.76966094970703, -27.542028427124023, -23.314395904541016, -19.086763381958008, -14.859130859375, -10.631498336791992, -6.403865814208984, -2.1762332916259766, 2.0513992309570312, 6.279031753540039, 10.506664276123047, 14.734296798706055, 18.961944580078125, 23.189577102661133, 27.41720962524414, 31.64484214782715, 35.872474670410156, 40.10010528564453, 44.32773971557617, 48.55537414550781, 52.78300476074219, 57.01063537597656, 61.2382698059082, 65.46590423583984, 69.69353485107422, 73.9211654663086, 78.1488037109375, 82.37643432617188, 86.60406494140625, 90.83169555664062, 95.059326171875, 99.2869644165039, 103.51459503173828, 107.74222564697266, 111.96986389160156, 116.19749450683594, 120.42512512207031, 124.65275573730469, 128.88038635253906, 133.10801696777344, 137.33566284179688, 141.56329345703125, 145.79092407226562, 150.0185546875, 154.24618530273438]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 2.0, 3.0, 12.0, 7.0, 14.0, 17.0, 15.0, 9.0, 30.0, 46.0, 30.0, 43.0, 50.0, 40.0, 44.0, 43.0, 56.0, 54.0, 48.0, 55.0, 63.0, 45.0, 45.0, 42.0, 31.0, 34.0, 22.0, 19.0, 20.0, 15.0, 11.0, 6.0, 9.0, 4.0, 8.0, 1.0, 5.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0], "bins": [-10.2578125, -9.99139404296875, -9.7249755859375, -9.45855712890625, -9.192138671875, -8.92572021484375, -8.6593017578125, -8.39288330078125, -8.12646484375, -7.86004638671875, -7.5936279296875, -7.32720947265625, -7.060791015625, -6.79437255859375, -6.5279541015625, -6.26153564453125, -5.9951171875, -5.72869873046875, -5.4622802734375, -5.19586181640625, -4.929443359375, -4.66302490234375, -4.3966064453125, -4.13018798828125, -3.86376953125, -3.59735107421875, -3.3309326171875, -3.06451416015625, -2.798095703125, -2.53167724609375, -2.2652587890625, -1.99884033203125, -1.732421875, -1.46600341796875, -1.1995849609375, -0.93316650390625, -0.666748046875, -0.40032958984375, -0.1339111328125, 0.13250732421875, 0.39892578125, 0.66534423828125, 0.9317626953125, 1.19818115234375, 1.464599609375, 1.73101806640625, 1.9974365234375, 2.26385498046875, 2.5302734375, 2.79669189453125, 3.0631103515625, 3.32952880859375, 3.595947265625, 3.86236572265625, 4.1287841796875, 4.39520263671875, 4.66162109375, 4.92803955078125, 5.1944580078125, 5.46087646484375, 5.727294921875, 5.99371337890625, 6.2601318359375, 6.52655029296875, 6.79296875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 3.0, 0.0, 3.0, 7.0, 6.0, 7.0, 16.0, 15.0, 25.0, 53.0, 57.0, 105.0, 165.0, 292.0, 474.0, 770.0, 1503.0, 3132.0, 7687.0, 26624.0, 170288.0, 2646197.0, 1227234.0, 82122.0, 16824.0, 5668.0, 2336.0, 1130.0, 655.0, 309.0, 216.0, 155.0, 78.0, 50.0, 28.0, 23.0, 11.0, 10.0, 7.0, 5.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-25.171875, -24.233642578125, -23.29541015625, -22.357177734375, -21.4189453125, -20.480712890625, -19.54248046875, -18.604248046875, -17.666015625, -16.727783203125, -15.78955078125, -14.851318359375, -13.9130859375, -12.974853515625, -12.03662109375, -11.098388671875, -10.16015625, -9.221923828125, -8.28369140625, -7.345458984375, -6.4072265625, -5.468994140625, -4.53076171875, -3.592529296875, -2.654296875, -1.716064453125, -0.77783203125, 0.160400390625, 1.0986328125, 2.036865234375, 2.97509765625, 3.913330078125, 4.8515625, 5.789794921875, 6.72802734375, 7.666259765625, 8.6044921875, 9.542724609375, 10.48095703125, 11.419189453125, 12.357421875, 13.295654296875, 14.23388671875, 15.172119140625, 16.1103515625, 17.048583984375, 17.98681640625, 18.925048828125, 19.86328125, 20.801513671875, 21.73974609375, 22.677978515625, 23.6162109375, 24.554443359375, 25.49267578125, 26.430908203125, 27.369140625, 28.307373046875, 29.24560546875, 30.183837890625, 31.1220703125, 32.060302734375, 32.99853515625, 33.936767578125, 34.875]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 9.0, 14.0, 33.0, 39.0, 63.0, 108.0, 164.0, 344.0, 652.0, 845.0, 726.0, 450.0, 243.0, 166.0, 86.0, 45.0, 29.0, 16.0, 10.0, 13.0, 4.0, 1.0, 5.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.375, -35.9697265625, -34.564453125, -33.1591796875, -31.75390625, -30.3486328125, -28.943359375, -27.5380859375, -26.1328125, -24.7275390625, -23.322265625, -21.9169921875, -20.51171875, -19.1064453125, -17.701171875, -16.2958984375, -14.890625, -13.4853515625, -12.080078125, -10.6748046875, -9.26953125, -7.8642578125, -6.458984375, -5.0537109375, -3.6484375, -2.2431640625, -0.837890625, 0.5673828125, 1.97265625, 3.3779296875, 4.783203125, 6.1884765625, 7.59375, 8.9990234375, 10.404296875, 11.8095703125, 13.21484375, 14.6201171875, 16.025390625, 17.4306640625, 18.8359375, 20.2412109375, 21.646484375, 23.0517578125, 24.45703125, 25.8623046875, 27.267578125, 28.6728515625, 30.078125, 31.4833984375, 32.888671875, 34.2939453125, 35.69921875, 37.1044921875, 38.509765625, 39.9150390625, 41.3203125, 42.7255859375, 44.130859375, 45.5361328125, 46.94140625, 48.3466796875, 49.751953125, 51.1572265625, 52.5625]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 5.0, 7.0, 13.0, 14.0, 18.0, 45.0, 99.0, 209.0, 430.0, 1069.0, 4562.0, 1943743.0, 2237006.0, 4944.0, 1199.0, 464.0, 193.0, 111.0, 61.0, 34.0, 16.0, 8.0, 12.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-180.375, -174.91015625, -169.4453125, -163.98046875, -158.515625, -153.05078125, -147.5859375, -142.12109375, -136.65625, -131.19140625, -125.7265625, -120.26171875, -114.796875, -109.33203125, -103.8671875, -98.40234375, -92.9375, -87.47265625, -82.0078125, -76.54296875, -71.078125, -65.61328125, -60.1484375, -54.68359375, -49.21875, -43.75390625, -38.2890625, -32.82421875, -27.359375, -21.89453125, -16.4296875, -10.96484375, -5.5, -0.03515625, 5.4296875, 10.89453125, 16.359375, 21.82421875, 27.2890625, 32.75390625, 38.21875, 43.68359375, 49.1484375, 54.61328125, 60.078125, 65.54296875, 71.0078125, 76.47265625, 81.9375, 87.40234375, 92.8671875, 98.33203125, 103.796875, 109.26171875, 114.7265625, 120.19140625, 125.65625, 131.12109375, 136.5859375, 142.05078125, 147.515625, 152.98046875, 158.4453125, 163.91015625, 169.375]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 23.0, 781.0, 206.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1118.4102783203125, -1094.9495849609375, -1071.4888916015625, -1048.0281982421875, -1024.5675048828125, -1001.1068725585938, -977.6461791992188, -954.1854858398438, -930.724853515625, -907.26416015625, -883.803466796875, -860.3427734375, -836.8821411132812, -813.4214477539062, -789.9607543945312, -766.5000610351562, -743.0393676757812, -719.5786743164062, -696.1179809570312, -672.6573486328125, -649.1966552734375, -625.7359619140625, -602.2752685546875, -578.8145751953125, -555.3538818359375, -531.8931884765625, -508.4325256347656, -484.9718322753906, -461.51116943359375, -438.05047607421875, -414.58978271484375, -391.12908935546875, -367.66851806640625, -344.20782470703125, -320.7471618652344, -297.2864685058594, -273.8258056640625, -250.3651123046875, -226.9044189453125, -203.44374084472656, -179.98306274414062, -156.5223846435547, -133.06170654296875, -109.60101318359375, -86.14033508300781, -62.679656982421875, -39.218963623046875, -15.758285522460938, 7.702392578125, 31.163074493408203, 54.623756408691406, 78.08444213867188, 101.54512023925781, 125.00579833984375, 148.46649169921875, 171.9271697998047, 195.38784790039062, 218.84852600097656, 242.3092041015625, 265.7698974609375, 289.2305908203125, 312.6912536621094, 336.1519470214844, 359.61260986328125, 383.07330322265625]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 3.0, 14.0, 5.0, 14.0, 18.0, 19.0, 19.0, 22.0, 33.0, 21.0, 33.0, 27.0, 41.0, 40.0, 50.0, 50.0, 54.0, 40.0, 50.0, 50.0, 50.0, 48.0, 49.0, 45.0, 28.0, 35.0, 28.0, 16.0, 18.0, 10.0, 20.0, 11.0, 16.0, 8.0, 7.0, 6.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-133.56314086914062, -129.90260314941406, -126.2420654296875, -122.58152770996094, -118.92098999023438, -115.26045227050781, -111.59991455078125, -107.93937683105469, -104.27883911132812, -100.61830139160156, -96.957763671875, -93.29722595214844, -89.63668823242188, -85.97615051269531, -82.31561279296875, -78.65507507324219, -74.99454498291016, -71.3340072631836, -67.67346954345703, -64.01293182373047, -60.352394104003906, -56.691856384277344, -53.03132247924805, -49.370784759521484, -45.71024703979492, -42.04970932006836, -38.3891716003418, -34.7286376953125, -31.068098068237305, -27.407560348510742, -23.747024536132812, -20.08648681640625, -16.42595672607422, -12.765419006347656, -9.10488224029541, -5.444345474243164, -1.7838077545166016, 1.876729965209961, 5.537265777587891, 9.197803497314453, 12.858341217041016, 16.518878936767578, 20.17941665649414, 23.83995246887207, 27.500490188598633, 31.161027908325195, 34.821563720703125, 38.48210144042969, 42.14263916015625, 45.80317687988281, 49.463714599609375, 53.12425231933594, 56.7847900390625, 60.44532775878906, 64.10586547851562, 67.76640319824219, 71.42694091796875, 75.08747863769531, 78.74801635742188, 82.40855407714844, 86.069091796875, 89.72962951660156, 93.39016723632812, 97.05070495605469, 100.71123504638672]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 8.0, 7.0, 7.0, 5.0, 13.0, 21.0, 11.0, 21.0, 19.0, 22.0, 33.0, 38.0, 29.0, 29.0, 49.0, 43.0, 48.0, 50.0, 48.0, 56.0, 51.0, 44.0, 47.0, 37.0, 33.0, 38.0, 24.0, 23.0, 29.0, 19.0, 25.0, 13.0, 8.0, 9.0, 13.0, 6.0, 12.0, 5.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.28125, -8.0213623046875, -7.761474609375, -7.5015869140625, -7.24169921875, -6.9818115234375, -6.721923828125, -6.4620361328125, -6.2021484375, -5.9422607421875, -5.682373046875, -5.4224853515625, -5.16259765625, -4.9027099609375, -4.642822265625, -4.3829345703125, -4.123046875, -3.8631591796875, -3.603271484375, -3.3433837890625, -3.08349609375, -2.8236083984375, -2.563720703125, -2.3038330078125, -2.0439453125, -1.7840576171875, -1.524169921875, -1.2642822265625, -1.00439453125, -0.7445068359375, -0.484619140625, -0.2247314453125, 0.03515625, 0.2950439453125, 0.554931640625, 0.8148193359375, 1.07470703125, 1.3345947265625, 1.594482421875, 1.8543701171875, 2.1142578125, 2.3741455078125, 2.634033203125, 2.8939208984375, 3.15380859375, 3.4136962890625, 3.673583984375, 3.9334716796875, 4.193359375, 4.4532470703125, 4.713134765625, 4.9730224609375, 5.23291015625, 5.4927978515625, 5.752685546875, 6.0125732421875, 6.2724609375, 6.5323486328125, 6.792236328125, 7.0521240234375, 7.31201171875, 7.5718994140625, 7.831787109375, 8.0916748046875, 8.3515625]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 6.0, 5.0, 9.0, 13.0, 12.0, 28.0, 39.0, 52.0, 71.0, 136.0, 182.0, 304.0, 549.0, 720.0, 1089.0, 1787.0, 2881.0, 4592.0, 7319.0, 11702.0, 18498.0, 29483.0, 46885.0, 74587.0, 118110.0, 189724.0, 200551.0, 125900.0, 78712.0, 50013.0, 31632.0, 19521.0, 12382.0, 7880.0, 4758.0, 3048.0, 2014.0, 1184.0, 750.0, 511.0, 312.0, 210.0, 150.0, 90.0, 45.0, 31.0, 28.0, 24.0, 16.0, 6.0, 6.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.947265625, -0.9186172485351562, -0.8899688720703125, -0.8613204956054688, -0.832672119140625, -0.8040237426757812, -0.7753753662109375, -0.7467269897460938, -0.71807861328125, -0.6894302368164062, -0.6607818603515625, -0.6321334838867188, -0.603485107421875, -0.5748367309570312, -0.5461883544921875, -0.5175399780273438, -0.4888916015625, -0.46024322509765625, -0.4315948486328125, -0.40294647216796875, -0.374298095703125, -0.34564971923828125, -0.3170013427734375, -0.28835296630859375, -0.25970458984375, -0.23105621337890625, -0.2024078369140625, -0.17375946044921875, -0.145111083984375, -0.11646270751953125, -0.0878143310546875, -0.05916595458984375, -0.030517578125, -0.00186920166015625, 0.0267791748046875, 0.05542755126953125, 0.084075927734375, 0.11272430419921875, 0.1413726806640625, 0.17002105712890625, 0.19866943359375, 0.22731781005859375, 0.2559661865234375, 0.28461456298828125, 0.313262939453125, 0.34191131591796875, 0.3705596923828125, 0.39920806884765625, 0.4278564453125, 0.45650482177734375, 0.4851531982421875, 0.5138015747070312, 0.542449951171875, 0.5710983276367188, 0.5997467041015625, 0.6283950805664062, 0.65704345703125, 0.6856918334960938, 0.7143402099609375, 0.7429885864257812, 0.771636962890625, 0.8002853393554688, 0.8289337158203125, 0.8575820922851562, 0.88623046875]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 4.0, 3.0, 6.0, 6.0, 9.0, 8.0, 7.0, 11.0, 20.0, 19.0, 20.0, 25.0, 19.0, 29.0, 44.0, 37.0, 32.0, 42.0, 54.0, 34.0, 38.0, 50.0, 1067.0, 36.0, 40.0, 40.0, 45.0, 41.0, 31.0, 31.0, 32.0, 19.0, 20.0, 16.0, 20.0, 9.0, 15.0, 8.0, 10.0, 15.0, 6.0, 8.0, 2.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.63671875, -4.47735595703125, -4.3179931640625, -4.15863037109375, -3.999267578125, -3.83990478515625, -3.6805419921875, -3.52117919921875, -3.36181640625, -3.20245361328125, -3.0430908203125, -2.88372802734375, -2.724365234375, -2.56500244140625, -2.4056396484375, -2.24627685546875, -2.0869140625, -1.92755126953125, -1.7681884765625, -1.60882568359375, -1.449462890625, -1.29010009765625, -1.1307373046875, -0.97137451171875, -0.81201171875, -0.65264892578125, -0.4932861328125, -0.33392333984375, -0.174560546875, -0.01519775390625, 0.1441650390625, 0.30352783203125, 0.462890625, 0.62225341796875, 0.7816162109375, 0.94097900390625, 1.100341796875, 1.25970458984375, 1.4190673828125, 1.57843017578125, 1.73779296875, 1.89715576171875, 2.0565185546875, 2.21588134765625, 2.375244140625, 2.53460693359375, 2.6939697265625, 2.85333251953125, 3.0126953125, 3.17205810546875, 3.3314208984375, 3.49078369140625, 3.650146484375, 3.80950927734375, 3.9688720703125, 4.12823486328125, 4.28759765625, 4.44696044921875, 4.6063232421875, 4.76568603515625, 4.925048828125, 5.08441162109375, 5.2437744140625, 5.40313720703125, 5.5625]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 10.0, 20.0, 27.0, 58.0, 68.0, 106.0, 159.0, 226.0, 339.0, 517.0, 871.0, 1326.0, 1920.0, 2963.0, 4378.0, 6751.0, 10170.0, 15807.0, 23631.0, 36379.0, 55610.0, 87691.0, 138618.0, 1247443.0, 165827.0, 104650.0, 66652.0, 42764.0, 28157.0, 18597.0, 12103.0, 7983.0, 5194.0, 3518.0, 2197.0, 1464.0, 1031.0, 643.0, 440.0, 307.0, 208.0, 96.0, 75.0, 60.0, 28.0, 17.0, 13.0, 7.0, 8.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.833984375, -0.8084793090820312, -0.7829742431640625, -0.7574691772460938, -0.731964111328125, -0.7064590454101562, -0.6809539794921875, -0.6554489135742188, -0.62994384765625, -0.6044387817382812, -0.5789337158203125, -0.5534286499023438, -0.527923583984375, -0.5024185180664062, -0.4769134521484375, -0.45140838623046875, -0.4259033203125, -0.40039825439453125, -0.3748931884765625, -0.34938812255859375, -0.323883056640625, -0.29837799072265625, -0.2728729248046875, -0.24736785888671875, -0.22186279296875, -0.19635772705078125, -0.1708526611328125, -0.14534759521484375, -0.119842529296875, -0.09433746337890625, -0.0688323974609375, -0.04332733154296875, -0.017822265625, 0.00768280029296875, 0.0331878662109375, 0.05869293212890625, 0.084197998046875, 0.10970306396484375, 0.1352081298828125, 0.16071319580078125, 0.18621826171875, 0.21172332763671875, 0.2372283935546875, 0.26273345947265625, 0.288238525390625, 0.31374359130859375, 0.3392486572265625, 0.36475372314453125, 0.3902587890625, 0.41576385498046875, 0.4412689208984375, 0.46677398681640625, 0.492279052734375, 0.5177841186523438, 0.5432891845703125, 0.5687942504882812, 0.59429931640625, 0.6198043823242188, 0.6453094482421875, 0.6708145141601562, 0.696319580078125, 0.7218246459960938, 0.7473297119140625, 0.7728347778320312, 0.79833984375]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 4.0, 5.0, 13.0, 6.0, 6.0, 11.0, 14.0, 18.0, 24.0, 32.0, 45.0, 40.0, 56.0, 57.0, 83.0, 83.0, 90.0, 83.0, 70.0, 57.0, 44.0, 41.0, 32.0, 17.0, 14.0, 8.0, 9.0, 13.0, 5.0, 4.0, 3.0, 2.0, 5.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0016450881958007812, -0.00158749520778656, -0.0015299022197723389, -0.0014723092317581177, -0.0014147162437438965, -0.0013571232557296753, -0.001299530267715454, -0.001241937279701233, -0.0011843442916870117, -0.0011267513036727905, -0.0010691583156585693, -0.0010115653276443481, -0.000953972339630127, -0.0008963793516159058, -0.0008387863636016846, -0.0007811933755874634, -0.0007236003875732422, -0.000666007399559021, -0.0006084144115447998, -0.0005508214235305786, -0.0004932284355163574, -0.00043563544750213623, -0.00037804245948791504, -0.00032044947147369385, -0.00026285648345947266, -0.00020526349544525146, -0.00014767050743103027, -9.007751941680908e-05, -3.248453140258789e-05, 2.51084566116333e-05, 8.270144462585449e-05, 0.00014029443264007568, 0.00019788742065429688, 0.00025548040866851807, 0.00031307339668273926, 0.00037066638469696045, 0.00042825937271118164, 0.00048585236072540283, 0.000543445348739624, 0.0006010383367538452, 0.0006586313247680664, 0.0007162243127822876, 0.0007738173007965088, 0.00083141028881073, 0.0008890032768249512, 0.0009465962648391724, 0.0010041892528533936, 0.0010617822408676147, 0.001119375228881836, 0.0011769682168960571, 0.0012345612049102783, 0.0012921541929244995, 0.0013497471809387207, 0.001407340168952942, 0.001464933156967163, 0.0015225261449813843, 0.0015801191329956055, 0.0016377121210098267, 0.0016953051090240479, 0.001752898097038269, 0.0018104910850524902, 0.0018680840730667114, 0.0019256770610809326, 0.001983270049095154, 0.002040863037109375]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 4.0, 1.0, 5.0, 7.0, 9.0, 12.0, 23.0, 15.0, 24.0, 22.0, 25.0, 32.0, 63.0, 60.0, 107.0, 148.0, 206.0, 286.0, 509.0, 1076.0, 955967.0, 87623.0, 848.0, 467.0, 300.0, 184.0, 166.0, 103.0, 64.0, 50.0, 32.0, 22.0, 18.0, 13.0, 16.0, 11.0, 10.0, 8.0, 4.0, 7.0, 1.0, 0.0, 1.0, 4.0, 4.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0300750732421875, -0.02903437614440918, -0.02799367904663086, -0.02695298194885254, -0.02591228485107422, -0.0248715877532959, -0.023830890655517578, -0.022790193557739258, -0.021749496459960938, -0.020708799362182617, -0.019668102264404297, -0.018627405166625977, -0.017586708068847656, -0.016546010971069336, -0.015505313873291016, -0.014464616775512695, -0.013423919677734375, -0.012383222579956055, -0.011342525482177734, -0.010301828384399414, -0.009261131286621094, -0.008220434188842773, -0.007179737091064453, -0.006139039993286133, -0.0050983428955078125, -0.004057645797729492, -0.003016948699951172, -0.0019762516021728516, -0.0009355545043945312, 0.00010514259338378906, 0.0011458396911621094, 0.0021865367889404297, 0.00322723388671875, 0.00426793098449707, 0.005308628082275391, 0.006349325180053711, 0.007390022277832031, 0.008430719375610352, 0.009471416473388672, 0.010512113571166992, 0.011552810668945312, 0.012593507766723633, 0.013634204864501953, 0.014674901962280273, 0.015715599060058594, 0.016756296157836914, 0.017796993255615234, 0.018837690353393555, 0.019878387451171875, 0.020919084548950195, 0.021959781646728516, 0.023000478744506836, 0.024041175842285156, 0.025081872940063477, 0.026122570037841797, 0.027163267135620117, 0.028203964233398438, 0.029244661331176758, 0.030285358428955078, 0.0313260555267334, 0.03236675262451172, 0.03340744972229004, 0.03444814682006836, 0.03548884391784668, 0.036529541015625]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 12.0, 52.0, 120.0, 304.0, 331.0, 135.0, 40.0, 12.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006757056689821184, -0.0006336048245429993, -0.0005915040383115411, -0.0005494032520800829, -0.0005073024076409638, -0.00046520159230567515, -0.0004231007769703865, -0.00038099996163509786, -0.0003388991462998092, -0.0002967983309645206, -0.00025469751562923193, -0.00021259670029394329, -0.00017049588495865464, -0.000128395069623366, -8.629425428807735e-05, -4.419343895278871e-05, -2.0926236175000668e-06, 4.000819171778858e-05, 8.210900705307722e-05, 0.00012420982238836586, 0.0001663106377236545, 0.00020841145305894315, 0.0002505122683942318, 0.00029261308372952044, 0.0003347138990648091, 0.00037681471440009773, 0.00041891552973538637, 0.000461016345070675, 0.0005031171604059637, 0.0005452180048450828, 0.000587318791076541, 0.0006294195773079991, 0.0006715203635394573, 0.0007136211497709155, 0.0007557219942100346, 0.0007978228386491537, 0.0008399236248806119, 0.0008820244111120701, 0.0009241252555511892, 0.0009662260999903083, 0.0010083268862217665, 0.0010504276724532247, 0.0010925284586846828, 0.0011346293613314629, 0.001176730147562921, 0.0012188309337943792, 0.0012609318364411592, 0.0013030326226726174, 0.0013451334089040756, 0.0013872341951355338, 0.001429334981366992, 0.001471435884013772, 0.0015135366702452302, 0.0015556374564766884, 0.0015977383591234684, 0.0016398391453549266, 0.0016819399315863848, 0.001724040717817843, 0.0017661415040493011, 0.0018082424066960812, 0.0018503431929275393, 0.0018924439791589975, 0.0019345448818057775, 0.001976645551621914, 0.002018746454268694]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 4.0, 3.0, 4.0, 8.0, 5.0, 8.0, 10.0, 13.0, 14.0, 13.0, 14.0, 25.0, 25.0, 28.0, 21.0, 34.0, 34.0, 26.0, 31.0, 47.0, 47.0, 59.0, 46.0, 45.0, 34.0, 41.0, 48.0, 27.0, 40.0, 32.0, 26.0, 23.0, 17.0, 27.0, 22.0, 16.0, 15.0, 13.0, 10.0, 13.0, 11.0, 8.0, 10.0, 3.0, 2.0, 1.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.000709235668182373, -0.0006841253489255905, -0.000659015029668808, -0.0006339047104120255, -0.0006087943911552429, -0.0005836840718984604, -0.0005585737526416779, -0.0005334634333848953, -0.0005083531141281128, -0.00048324279487133026, -0.00045813247561454773, -0.0004330221563577652, -0.00040791183710098267, -0.00038280151784420013, -0.0003576911985874176, -0.00033258087933063507, -0.00030747056007385254, -0.00028236024081707, -0.0002572499215602875, -0.00023213960230350494, -0.0002070292830467224, -0.00018191896378993988, -0.00015680864453315735, -0.00013169832527637482, -0.00010658800601959229, -8.147768676280975e-05, -5.636736750602722e-05, -3.125704824924469e-05, -6.146728992462158e-06, 1.8963590264320374e-05, 4.4073909521102905e-05, 6.918422877788544e-05, 9.429454803466797e-05, 0.0001194048672914505, 0.00014451518654823303, 0.00016962550580501556, 0.0001947358250617981, 0.00021984614431858063, 0.00024495646357536316, 0.0002700667828321457, 0.0002951771020889282, 0.00032028742134571075, 0.0003453977406024933, 0.0003705080598592758, 0.00039561837911605835, 0.0004207286983728409, 0.0004458390176296234, 0.00047094933688640594, 0.0004960596561431885, 0.000521169975399971, 0.0005462802946567535, 0.0005713906139135361, 0.0005965009331703186, 0.0006216112524271011, 0.0006467215716838837, 0.0006718318909406662, 0.0006969422101974487, 0.0007220525294542313, 0.0007471628487110138, 0.0007722731679677963, 0.0007973834872245789, 0.0008224938064813614, 0.0008476041257381439, 0.0008727144449949265, 0.000897824764251709]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 8.0, 7.0, 7.0, 5.0, 13.0, 21.0, 11.0, 21.0, 19.0, 22.0, 33.0, 38.0, 29.0, 29.0, 49.0, 43.0, 48.0, 50.0, 48.0, 56.0, 51.0, 45.0, 46.0, 37.0, 33.0, 38.0, 24.0, 23.0, 29.0, 19.0, 25.0, 13.0, 8.0, 9.0, 13.0, 6.0, 12.0, 5.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.28125, -8.0213623046875, -7.761474609375, -7.5015869140625, -7.24169921875, -6.9818115234375, -6.721923828125, -6.4620361328125, -6.2021484375, -5.9422607421875, -5.682373046875, -5.4224853515625, -5.16259765625, -4.9027099609375, -4.642822265625, -4.3829345703125, -4.123046875, -3.8631591796875, -3.603271484375, -3.3433837890625, -3.08349609375, -2.8236083984375, -2.563720703125, -2.3038330078125, -2.0439453125, -1.7840576171875, -1.524169921875, -1.2642822265625, -1.00439453125, -0.7445068359375, -0.484619140625, -0.2247314453125, 0.03515625, 0.2950439453125, 0.554931640625, 0.8148193359375, 1.07470703125, 1.3345947265625, 1.594482421875, 1.8543701171875, 2.1142578125, 2.3741455078125, 2.634033203125, 2.8939208984375, 3.15380859375, 3.4136962890625, 3.673583984375, 3.9334716796875, 4.193359375, 4.4532470703125, 4.713134765625, 4.9730224609375, 5.23291015625, 5.4927978515625, 5.752685546875, 6.0125732421875, 6.2724609375, 6.5323486328125, 6.792236328125, 7.0521240234375, 7.31201171875, 7.5718994140625, 7.831787109375, 8.0916748046875, 8.3515625]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 8.0, 8.0, 7.0, 13.0, 15.0, 38.0, 38.0, 41.0, 70.0, 93.0, 134.0, 177.0, 244.0, 358.0, 484.0, 703.0, 1006.0, 1350.0, 2184.0, 3409.0, 6057.0, 12554.0, 44455.0, 780508.0, 153144.0, 20454.0, 8289.0, 4379.0, 2637.0, 1718.0, 1137.0, 796.0, 594.0, 403.0, 297.0, 208.0, 153.0, 107.0, 81.0, 55.0, 42.0, 31.0, 29.0, 12.0, 18.0, 7.0, 5.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-49.28125, -47.77587890625, -46.2705078125, -44.76513671875, -43.259765625, -41.75439453125, -40.2490234375, -38.74365234375, -37.23828125, -35.73291015625, -34.2275390625, -32.72216796875, -31.216796875, -29.71142578125, -28.2060546875, -26.70068359375, -25.1953125, -23.68994140625, -22.1845703125, -20.67919921875, -19.173828125, -17.66845703125, -16.1630859375, -14.65771484375, -13.15234375, -11.64697265625, -10.1416015625, -8.63623046875, -7.130859375, -5.62548828125, -4.1201171875, -2.61474609375, -1.109375, 0.39599609375, 1.9013671875, 3.40673828125, 4.912109375, 6.41748046875, 7.9228515625, 9.42822265625, 10.93359375, 12.43896484375, 13.9443359375, 15.44970703125, 16.955078125, 18.46044921875, 19.9658203125, 21.47119140625, 22.9765625, 24.48193359375, 25.9873046875, 27.49267578125, 28.998046875, 30.50341796875, 32.0087890625, 33.51416015625, 35.01953125, 36.52490234375, 38.0302734375, 39.53564453125, 41.041015625, 42.54638671875, 44.0517578125, 45.55712890625, 47.0625]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 8.0, 4.0, 9.0, 10.0, 14.0, 11.0, 14.0, 24.0, 18.0, 30.0, 19.0, 36.0, 41.0, 39.0, 48.0, 53.0, 68.0, 96.0, 242.0, 1539.0, 191.0, 89.0, 64.0, 51.0, 40.0, 35.0, 26.0, 36.0, 31.0, 22.0, 21.0, 21.0, 15.0, 19.0, 15.0, 10.0, 5.0, 5.0, 5.0, 3.0, 3.0, 8.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-23.84375, -23.108642578125, -22.37353515625, -21.638427734375, -20.9033203125, -20.168212890625, -19.43310546875, -18.697998046875, -17.962890625, -17.227783203125, -16.49267578125, -15.757568359375, -15.0224609375, -14.287353515625, -13.55224609375, -12.817138671875, -12.08203125, -11.346923828125, -10.61181640625, -9.876708984375, -9.1416015625, -8.406494140625, -7.67138671875, -6.936279296875, -6.201171875, -5.466064453125, -4.73095703125, -3.995849609375, -3.2607421875, -2.525634765625, -1.79052734375, -1.055419921875, -0.3203125, 0.414794921875, 1.14990234375, 1.885009765625, 2.6201171875, 3.355224609375, 4.09033203125, 4.825439453125, 5.560546875, 6.295654296875, 7.03076171875, 7.765869140625, 8.5009765625, 9.236083984375, 9.97119140625, 10.706298828125, 11.44140625, 12.176513671875, 12.91162109375, 13.646728515625, 14.3818359375, 15.116943359375, 15.85205078125, 16.587158203125, 17.322265625, 18.057373046875, 18.79248046875, 19.527587890625, 20.2626953125, 20.997802734375, 21.73291015625, 22.468017578125, 23.203125]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 2.0, 5.0, 5.0, 8.0, 10.0, 16.0, 8.0, 14.0, 24.0, 21.0, 24.0, 29.0, 34.0, 52.0, 52.0, 80.0, 163.0, 407.0, 1929.0, 2545150.0, 595146.0, 1575.0, 393.0, 174.0, 83.0, 61.0, 47.0, 29.0, 24.0, 29.0, 24.0, 12.0, 11.0, 12.0, 14.0, 12.0, 7.0, 2.0, 5.0, 4.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-115.4375, -111.9345703125, -108.431640625, -104.9287109375, -101.42578125, -97.9228515625, -94.419921875, -90.9169921875, -87.4140625, -83.9111328125, -80.408203125, -76.9052734375, -73.40234375, -69.8994140625, -66.396484375, -62.8935546875, -59.390625, -55.8876953125, -52.384765625, -48.8818359375, -45.37890625, -41.8759765625, -38.373046875, -34.8701171875, -31.3671875, -27.8642578125, -24.361328125, -20.8583984375, -17.35546875, -13.8525390625, -10.349609375, -6.8466796875, -3.34375, 0.1591796875, 3.662109375, 7.1650390625, 10.66796875, 14.1708984375, 17.673828125, 21.1767578125, 24.6796875, 28.1826171875, 31.685546875, 35.1884765625, 38.69140625, 42.1943359375, 45.697265625, 49.2001953125, 52.703125, 56.2060546875, 59.708984375, 63.2119140625, 66.71484375, 70.2177734375, 73.720703125, 77.2236328125, 80.7265625, 84.2294921875, 87.732421875, 91.2353515625, 94.73828125, 98.2412109375, 101.744140625, 105.2470703125, 108.75]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 139.0, 870.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-221.09609985351562, -209.5765380859375, -198.05697631835938, -186.53741455078125, -175.01785278320312, -163.498291015625, -151.97874450683594, -140.4591827392578, -128.9396209716797, -117.42005920410156, -105.90049743652344, -94.38094329833984, -82.86138153076172, -71.3418197631836, -59.822265625, -48.302703857421875, -36.78314208984375, -25.263582229614258, -13.744022369384766, -2.2244644165039062, 9.295097351074219, 20.814659118652344, 32.33421325683594, 43.85377502441406, 55.37333679199219, 66.89289855957031, 78.41246032714844, 89.93201446533203, 101.45157623291016, 112.97113800048828, 124.49069213867188, 136.01025390625, 147.52978515625, 159.04934692382812, 170.56890869140625, 182.08847045898438, 193.6080322265625, 205.12759399414062, 216.6471405029297, 228.1667022705078, 239.68626403808594, 251.20582580566406, 262.7253723144531, 274.24493408203125, 285.7644958496094, 297.2840576171875, 308.8036193847656, 320.32318115234375, 331.8427429199219, 343.3623046875, 354.8818664550781, 366.40142822265625, 377.9209899902344, 389.4405517578125, 400.9600830078125, 412.47967529296875, 423.99920654296875, 435.5187683105469, 447.038330078125, 458.5578918457031, 470.07745361328125, 481.5970153808594, 493.1165771484375, 504.6361083984375, 516.1557006835938]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 10.0, 5.0, 8.0, 7.0, 14.0, 14.0, 22.0, 16.0, 23.0, 26.0, 29.0, 29.0, 25.0, 18.0, 36.0, 33.0, 32.0, 28.0, 45.0, 36.0, 44.0, 36.0, 40.0, 39.0, 43.0, 40.0, 41.0, 31.0, 39.0, 20.0, 18.0, 29.0, 20.0, 30.0, 14.0, 17.0, 8.0, 9.0, 7.0, 6.0, 5.0, 4.0, 5.0, 5.0], "bins": [-83.70170593261719, -81.44474792480469, -79.18778228759766, -76.93082427978516, -74.67386627197266, -72.41690826416016, -70.15994262695312, -67.90298461914062, -65.64602661132812, -63.38906478881836, -61.13210678100586, -58.875144958496094, -56.618186950683594, -54.36122512817383, -52.10426330566406, -49.84730529785156, -47.5903434753418, -45.33338165283203, -43.07642364501953, -40.819461822509766, -38.562503814697266, -36.3055419921875, -34.048583984375, -31.791622161865234, -29.5346622467041, -27.27770233154297, -25.020742416381836, -22.763782501220703, -20.506820678710938, -18.249862670898438, -15.992900848388672, -13.735940933227539, -11.478981018066406, -9.222021102905273, -6.965060710906982, -4.708100318908691, -2.4511404037475586, -0.19418048858642578, 2.0627803802490234, 4.319740295410156, 6.576700210571289, 8.833660125732422, 11.090620040893555, 13.347580909729004, 15.604540824890137, 17.861499786376953, 20.11846160888672, 22.37542152404785, 24.632381439208984, 26.889341354370117, 29.14630126953125, 31.403263092041016, 33.660221099853516, 35.91718292236328, 38.17414093017578, 40.43110275268555, 42.68806457519531, 44.94502639770508, 47.20198440551758, 49.458946228027344, 51.715904235839844, 53.97286605834961, 56.229827880859375, 58.486785888671875, 60.743743896484375]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 5.0, 9.0, 4.0, 10.0, 6.0, 10.0, 14.0, 10.0, 20.0, 12.0, 23.0, 18.0, 37.0, 24.0, 30.0, 31.0, 29.0, 40.0, 49.0, 47.0, 49.0, 44.0, 52.0, 54.0, 49.0, 33.0, 27.0, 47.0, 26.0, 21.0, 20.0, 31.0, 24.0, 17.0, 15.0, 10.0, 8.0, 13.0, 10.0, 9.0, 5.0, 5.0, 2.0, 6.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.83203125, -7.57476806640625, -7.3175048828125, -7.06024169921875, -6.802978515625, -6.54571533203125, -6.2884521484375, -6.03118896484375, -5.77392578125, -5.51666259765625, -5.2593994140625, -5.00213623046875, -4.744873046875, -4.48760986328125, -4.2303466796875, -3.97308349609375, -3.7158203125, -3.45855712890625, -3.2012939453125, -2.94403076171875, -2.686767578125, -2.42950439453125, -2.1722412109375, -1.91497802734375, -1.65771484375, -1.40045166015625, -1.1431884765625, -0.88592529296875, -0.628662109375, -0.37139892578125, -0.1141357421875, 0.14312744140625, 0.400390625, 0.65765380859375, 0.9149169921875, 1.17218017578125, 1.429443359375, 1.68670654296875, 1.9439697265625, 2.20123291015625, 2.45849609375, 2.71575927734375, 2.9730224609375, 3.23028564453125, 3.487548828125, 3.74481201171875, 4.0020751953125, 4.25933837890625, 4.5166015625, 4.77386474609375, 5.0311279296875, 5.28839111328125, 5.545654296875, 5.80291748046875, 6.0601806640625, 6.31744384765625, 6.57470703125, 6.83197021484375, 7.0892333984375, 7.34649658203125, 7.603759765625, 7.86102294921875, 8.1182861328125, 8.37554931640625, 8.6328125]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 5.0, 5.0, 9.0, 11.0, 10.0, 10.0, 17.0, 27.0, 37.0, 52.0, 80.0, 133.0, 184.0, 307.0, 537.0, 975.0, 2103.0, 4577.0, 11377.0, 39610.0, 417727.0, 3485306.0, 186577.0, 27679.0, 9098.0, 3852.0, 1773.0, 942.0, 477.0, 260.0, 171.0, 107.0, 68.0, 58.0, 28.0, 28.0, 20.0, 16.0, 12.0, 9.0, 5.0, 5.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-50.46875, -48.91015625, -47.3515625, -45.79296875, -44.234375, -42.67578125, -41.1171875, -39.55859375, -38.0, -36.44140625, -34.8828125, -33.32421875, -31.765625, -30.20703125, -28.6484375, -27.08984375, -25.53125, -23.97265625, -22.4140625, -20.85546875, -19.296875, -17.73828125, -16.1796875, -14.62109375, -13.0625, -11.50390625, -9.9453125, -8.38671875, -6.828125, -5.26953125, -3.7109375, -2.15234375, -0.59375, 0.96484375, 2.5234375, 4.08203125, 5.640625, 7.19921875, 8.7578125, 10.31640625, 11.875, 13.43359375, 14.9921875, 16.55078125, 18.109375, 19.66796875, 21.2265625, 22.78515625, 24.34375, 25.90234375, 27.4609375, 29.01953125, 30.578125, 32.13671875, 33.6953125, 35.25390625, 36.8125, 38.37109375, 39.9296875, 41.48828125, 43.046875, 44.60546875, 46.1640625, 47.72265625, 49.28125]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 11.0, 5.0, 8.0, 14.0, 15.0, 22.0, 39.0, 37.0, 67.0, 131.0, 173.0, 276.0, 425.0, 640.0, 735.0, 520.0, 325.0, 204.0, 126.0, 97.0, 67.0, 45.0, 25.0, 19.0, 20.0, 10.0, 10.0, 9.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-57.03125, -55.537109375, -54.04296875, -52.548828125, -51.0546875, -49.560546875, -48.06640625, -46.572265625, -45.078125, -43.583984375, -42.08984375, -40.595703125, -39.1015625, -37.607421875, -36.11328125, -34.619140625, -33.125, -31.630859375, -30.13671875, -28.642578125, -27.1484375, -25.654296875, -24.16015625, -22.666015625, -21.171875, -19.677734375, -18.18359375, -16.689453125, -15.1953125, -13.701171875, -12.20703125, -10.712890625, -9.21875, -7.724609375, -6.23046875, -4.736328125, -3.2421875, -1.748046875, -0.25390625, 1.240234375, 2.734375, 4.228515625, 5.72265625, 7.216796875, 8.7109375, 10.205078125, 11.69921875, 13.193359375, 14.6875, 16.181640625, 17.67578125, 19.169921875, 20.6640625, 22.158203125, 23.65234375, 25.146484375, 26.640625, 28.134765625, 29.62890625, 31.123046875, 32.6171875, 34.111328125, 35.60546875, 37.099609375, 38.59375]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 5.0, 5.0, 9.0, 9.0, 13.0, 29.0, 32.0, 53.0, 83.0, 120.0, 201.0, 407.0, 2137.0, 3956621.0, 232759.0, 1053.0, 304.0, 160.0, 105.0, 55.0, 42.0, 33.0, 17.0, 13.0, 4.0, 12.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-188.375, -180.478515625, -172.58203125, -164.685546875, -156.7890625, -148.892578125, -140.99609375, -133.099609375, -125.203125, -117.306640625, -109.41015625, -101.513671875, -93.6171875, -85.720703125, -77.82421875, -69.927734375, -62.03125, -54.134765625, -46.23828125, -38.341796875, -30.4453125, -22.548828125, -14.65234375, -6.755859375, 1.140625, 9.037109375, 16.93359375, 24.830078125, 32.7265625, 40.623046875, 48.51953125, 56.416015625, 64.3125, 72.208984375, 80.10546875, 88.001953125, 95.8984375, 103.794921875, 111.69140625, 119.587890625, 127.484375, 135.380859375, 143.27734375, 151.173828125, 159.0703125, 166.966796875, 174.86328125, 182.759765625, 190.65625, 198.552734375, 206.44921875, 214.345703125, 222.2421875, 230.138671875, 238.03515625, 245.931640625, 253.828125, 261.724609375, 269.62109375, 277.517578125, 285.4140625, 293.310546875, 301.20703125, 309.103515625, 317.0]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 89.0, 873.0, 52.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-258.8080139160156, -234.0271759033203, -209.24635314941406, -184.46551513671875, -159.6846923828125, -134.9038543701172, -110.12301635742188, -85.34219360351562, -60.56135559082031, -35.78052520751953, -10.999691009521484, 13.781143188476562, 38.561973571777344, 63.342803955078125, 88.12364196777344, 112.90446472167969, 137.685302734375, 162.4661407470703, 187.24696350097656, 212.02780151367188, 236.80862426757812, 261.5894775390625, 286.37030029296875, 311.151123046875, 335.93194580078125, 360.7127685546875, 385.4936218261719, 410.2744445800781, 435.0552673339844, 459.83612060546875, 484.616943359375, 509.39776611328125, 534.1786499023438, 558.95947265625, 583.7402954101562, 608.5211181640625, 633.302001953125, 658.0828247070312, 682.8636474609375, 707.6444702148438, 732.42529296875, 757.2061157226562, 781.9869384765625, 806.767822265625, 831.5486450195312, 856.3294677734375, 881.1102905273438, 905.89111328125, 930.6719970703125, 955.4528198242188, 980.233642578125, 1005.0145263671875, 1029.7952880859375, 1054.576171875, 1079.35693359375, 1104.1378173828125, 1128.9185791015625, 1153.699462890625, 1178.480224609375, 1203.2611083984375, 1228.0418701171875, 1252.82275390625, 1277.603515625, 1302.3843994140625, 1327.165283203125]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 1.0, 5.0, 10.0, 7.0, 16.0, 11.0, 18.0, 13.0, 16.0, 16.0, 21.0, 32.0, 25.0, 39.0, 34.0, 30.0, 47.0, 37.0, 46.0, 42.0, 46.0, 33.0, 46.0, 37.0, 41.0, 33.0, 39.0, 43.0, 28.0, 33.0, 31.0, 12.0, 19.0, 16.0, 15.0, 12.0, 16.0, 11.0, 6.0, 7.0, 3.0, 6.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-102.78587341308594, -98.97459411621094, -95.16332244873047, -91.35204315185547, -87.540771484375, -83.7294921875, -79.918212890625, -76.10694122314453, -72.29566192626953, -68.48438262939453, -64.67311096191406, -60.86183166503906, -57.05055618286133, -53.239280700683594, -49.42800521850586, -45.616729736328125, -41.80545425415039, -37.994178771972656, -34.18290328979492, -30.371625900268555, -26.560348510742188, -22.749073028564453, -18.93779754638672, -15.126520156860352, -11.315244674682617, -7.503968238830566, -3.692692279815674, 0.11858367919921875, 3.9298601150512695, 7.74113655090332, 11.552412033081055, 15.363689422607422, 19.174964904785156, 22.98624038696289, 26.797517776489258, 30.608793258666992, 34.42007064819336, 38.231346130371094, 42.04262161254883, 45.85389709472656, 49.66517639160156, 53.4764518737793, 57.28772735595703, 61.09900665283203, 64.9102783203125, 68.7215576171875, 72.5328369140625, 76.34410858154297, 80.15538024902344, 83.96665954589844, 87.7779312133789, 91.5892105102539, 95.40048217773438, 99.21176147460938, 103.02304077148438, 106.83431243896484, 110.64559173583984, 114.45687103271484, 118.26814270019531, 122.07942199707031, 125.89069366455078, 129.70196533203125, 133.51324462890625, 137.32452392578125, 141.13580322265625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 5.0, 7.0, 9.0, 10.0, 9.0, 6.0, 9.0, 15.0, 10.0, 14.0, 22.0, 25.0, 26.0, 31.0, 30.0, 28.0, 43.0, 35.0, 36.0, 48.0, 45.0, 49.0, 39.0, 52.0, 39.0, 35.0, 33.0, 36.0, 34.0, 35.0, 31.0, 21.0, 25.0, 19.0, 16.0, 15.0, 11.0, 12.0, 7.0, 10.0, 9.0, 8.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.875, -7.6202392578125, -7.365478515625, -7.1107177734375, -6.85595703125, -6.6011962890625, -6.346435546875, -6.0916748046875, -5.8369140625, -5.5821533203125, -5.327392578125, -5.0726318359375, -4.81787109375, -4.5631103515625, -4.308349609375, -4.0535888671875, -3.798828125, -3.5440673828125, -3.289306640625, -3.0345458984375, -2.77978515625, -2.5250244140625, -2.270263671875, -2.0155029296875, -1.7607421875, -1.5059814453125, -1.251220703125, -0.9964599609375, -0.74169921875, -0.4869384765625, -0.232177734375, 0.0225830078125, 0.27734375, 0.5321044921875, 0.786865234375, 1.0416259765625, 1.29638671875, 1.5511474609375, 1.805908203125, 2.0606689453125, 2.3154296875, 2.5701904296875, 2.824951171875, 3.0797119140625, 3.33447265625, 3.5892333984375, 3.843994140625, 4.0987548828125, 4.353515625, 4.6082763671875, 4.863037109375, 5.1177978515625, 5.37255859375, 5.6273193359375, 5.882080078125, 6.1368408203125, 6.3916015625, 6.6463623046875, 6.901123046875, 7.1558837890625, 7.41064453125, 7.6654052734375, 7.920166015625, 8.1749267578125, 8.4296875]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 10.0, 11.0, 9.0, 11.0, 28.0, 40.0, 51.0, 72.0, 135.0, 201.0, 276.0, 397.0, 607.0, 884.0, 1320.0, 1931.0, 2799.0, 4114.0, 6215.0, 9040.0, 13444.0, 19705.0, 29006.0, 43404.0, 63666.0, 93346.0, 140635.0, 190304.0, 138980.0, 92004.0, 63270.0, 43096.0, 28978.0, 19457.0, 13161.0, 8993.0, 6068.0, 4116.0, 2879.0, 1939.0, 1287.0, 839.0, 592.0, 390.0, 279.0, 199.0, 125.0, 75.0, 60.0, 30.0, 28.0, 18.0, 13.0, 8.0, 8.0, 5.0, 2.0, 4.0], "bins": [-0.87939453125, -0.8531341552734375, -0.826873779296875, -0.8006134033203125, -0.77435302734375, -0.7480926513671875, -0.721832275390625, -0.6955718994140625, -0.6693115234375, -0.6430511474609375, -0.616790771484375, -0.5905303955078125, -0.56427001953125, -0.5380096435546875, -0.511749267578125, -0.4854888916015625, -0.459228515625, -0.4329681396484375, -0.406707763671875, -0.3804473876953125, -0.35418701171875, -0.3279266357421875, -0.301666259765625, -0.2754058837890625, -0.2491455078125, -0.2228851318359375, -0.196624755859375, -0.1703643798828125, -0.14410400390625, -0.1178436279296875, -0.091583251953125, -0.0653228759765625, -0.0390625, -0.0128021240234375, 0.013458251953125, 0.0397186279296875, 0.06597900390625, 0.0922393798828125, 0.118499755859375, 0.1447601318359375, 0.1710205078125, 0.1972808837890625, 0.223541259765625, 0.2498016357421875, 0.27606201171875, 0.3023223876953125, 0.328582763671875, 0.3548431396484375, 0.381103515625, 0.4073638916015625, 0.433624267578125, 0.4598846435546875, 0.48614501953125, 0.5124053955078125, 0.538665771484375, 0.5649261474609375, 0.5911865234375, 0.6174468994140625, 0.643707275390625, 0.6699676513671875, 0.69622802734375, 0.7224884033203125, 0.748748779296875, 0.7750091552734375, 0.80126953125]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 2.0, 5.0, 6.0, 6.0, 6.0, 7.0, 9.0, 11.0, 12.0, 14.0, 23.0, 19.0, 36.0, 30.0, 33.0, 42.0, 24.0, 54.0, 38.0, 41.0, 53.0, 40.0, 1053.0, 43.0, 32.0, 46.0, 29.0, 38.0, 32.0, 25.0, 23.0, 35.0, 31.0, 15.0, 19.0, 20.0, 15.0, 12.0, 9.0, 12.0, 4.0, 5.0, 8.0, 4.0, 2.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.73828125, -4.57525634765625, -4.4122314453125, -4.24920654296875, -4.086181640625, -3.92315673828125, -3.7601318359375, -3.59710693359375, -3.43408203125, -3.27105712890625, -3.1080322265625, -2.94500732421875, -2.781982421875, -2.61895751953125, -2.4559326171875, -2.29290771484375, -2.1298828125, -1.96685791015625, -1.8038330078125, -1.64080810546875, -1.477783203125, -1.31475830078125, -1.1517333984375, -0.98870849609375, -0.82568359375, -0.66265869140625, -0.4996337890625, -0.33660888671875, -0.173583984375, -0.01055908203125, 0.1524658203125, 0.31549072265625, 0.478515625, 0.64154052734375, 0.8045654296875, 0.96759033203125, 1.130615234375, 1.29364013671875, 1.4566650390625, 1.61968994140625, 1.78271484375, 1.94573974609375, 2.1087646484375, 2.27178955078125, 2.434814453125, 2.59783935546875, 2.7608642578125, 2.92388916015625, 3.0869140625, 3.24993896484375, 3.4129638671875, 3.57598876953125, 3.739013671875, 3.90203857421875, 4.0650634765625, 4.22808837890625, 4.39111328125, 4.55413818359375, 4.7171630859375, 4.88018798828125, 5.043212890625, 5.20623779296875, 5.3692626953125, 5.53228759765625, 5.6953125]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 8.0, 17.0, 21.0, 27.0, 63.0, 69.0, 123.0, 177.0, 273.0, 373.0, 654.0, 941.0, 1396.0, 2173.0, 3414.0, 5157.0, 8119.0, 12447.0, 19537.0, 30645.0, 47440.0, 74372.0, 117101.0, 187868.0, 1240804.0, 124222.0, 78955.0, 50214.0, 32094.0, 20719.0, 13158.0, 8628.0, 5532.0, 3681.0, 2374.0, 1494.0, 1020.0, 617.0, 392.0, 298.0, 198.0, 106.0, 73.0, 48.0, 36.0, 23.0, 12.0, 8.0, 4.0, 5.0, 4.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.93994140625, -0.9115142822265625, -0.883087158203125, -0.8546600341796875, -0.82623291015625, -0.7978057861328125, -0.769378662109375, -0.7409515380859375, -0.7125244140625, -0.6840972900390625, -0.655670166015625, -0.6272430419921875, -0.59881591796875, -0.5703887939453125, -0.541961669921875, -0.5135345458984375, -0.485107421875, -0.4566802978515625, -0.428253173828125, -0.3998260498046875, -0.37139892578125, -0.3429718017578125, -0.314544677734375, -0.2861175537109375, -0.2576904296875, -0.2292633056640625, -0.200836181640625, -0.1724090576171875, -0.14398193359375, -0.1155548095703125, -0.087127685546875, -0.0587005615234375, -0.0302734375, -0.0018463134765625, 0.026580810546875, 0.0550079345703125, 0.08343505859375, 0.1118621826171875, 0.140289306640625, 0.1687164306640625, 0.1971435546875, 0.2255706787109375, 0.253997802734375, 0.2824249267578125, 0.31085205078125, 0.3392791748046875, 0.367706298828125, 0.3961334228515625, 0.424560546875, 0.4529876708984375, 0.481414794921875, 0.5098419189453125, 0.53826904296875, 0.5666961669921875, 0.595123291015625, 0.6235504150390625, 0.6519775390625, 0.6804046630859375, 0.708831787109375, 0.7372589111328125, 0.76568603515625, 0.7941131591796875, 0.822540283203125, 0.8509674072265625, 0.87939453125]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 6.0, 11.0, 13.0, 13.0, 7.0, 11.0, 10.0, 19.0, 19.0, 31.0, 27.0, 30.0, 40.0, 40.0, 49.0, 50.0, 69.0, 55.0, 51.0, 54.0, 56.0, 49.0, 44.0, 33.0, 34.0, 39.0, 22.0, 23.0, 19.0, 10.0, 11.0, 10.0, 9.0, 9.0, 5.0, 7.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.001800537109375, -0.0017470866441726685, -0.001693636178970337, -0.0016401857137680054, -0.0015867352485656738, -0.0015332847833633423, -0.0014798343181610107, -0.0014263838529586792, -0.0013729333877563477, -0.0013194829225540161, -0.0012660324573516846, -0.001212581992149353, -0.0011591315269470215, -0.00110568106174469, -0.0010522305965423584, -0.0009987801313400269, -0.0009453296661376953, -0.0008918792009353638, -0.0008384287357330322, -0.0007849782705307007, -0.0007315278053283691, -0.0006780773401260376, -0.0006246268749237061, -0.0005711764097213745, -0.000517725944519043, -0.0004642754793167114, -0.0004108250141143799, -0.00035737454891204834, -0.0003039240837097168, -0.00025047361850738525, -0.0001970231533050537, -0.00014357268810272217, -9.012222290039062e-05, -3.667175769805908e-05, 1.677870750427246e-05, 7.0229172706604e-05, 0.00012367963790893555, 0.0001771301031112671, 0.00023058056831359863, 0.0002840310335159302, 0.0003374814987182617, 0.00039093196392059326, 0.0004443824291229248, 0.0004978328943252563, 0.0005512833595275879, 0.0006047338247299194, 0.000658184289932251, 0.0007116347551345825, 0.0007650852203369141, 0.0008185356855392456, 0.0008719861507415771, 0.0009254366159439087, 0.0009788870811462402, 0.0010323375463485718, 0.0010857880115509033, 0.0011392384767532349, 0.0011926889419555664, 0.001246139407157898, 0.0012995898723602295, 0.001353040337562561, 0.0014064908027648926, 0.0014599412679672241, 0.0015133917331695557, 0.0015668421983718872, 0.0016202926635742188]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [4.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 12.0, 13.0, 27.0, 24.0, 35.0, 39.0, 80.0, 77.0, 90.0, 130.0, 183.0, 307.0, 786.0, 20123.0, 1023975.0, 1215.0, 480.0, 281.0, 177.0, 110.0, 92.0, 63.0, 62.0, 30.0, 27.0, 20.0, 19.0, 14.0, 9.0, 11.0, 5.0, 2.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03619384765625, -0.03491067886352539, -0.03362751007080078, -0.03234434127807617, -0.031061172485351562, -0.029778003692626953, -0.028494834899902344, -0.027211666107177734, -0.025928497314453125, -0.024645328521728516, -0.023362159729003906, -0.022078990936279297, -0.020795822143554688, -0.019512653350830078, -0.01822948455810547, -0.01694631576538086, -0.01566314697265625, -0.01437997817993164, -0.013096809387207031, -0.011813640594482422, -0.010530471801757812, -0.009247303009033203, -0.007964134216308594, -0.006680965423583984, -0.005397796630859375, -0.004114627838134766, -0.0028314590454101562, -0.0015482902526855469, -0.0002651214599609375, 0.0010180473327636719, 0.0023012161254882812, 0.0035843849182128906, 0.0048675537109375, 0.006150722503662109, 0.007433891296386719, 0.008717060089111328, 0.010000228881835938, 0.011283397674560547, 0.012566566467285156, 0.013849735260009766, 0.015132904052734375, 0.016416072845458984, 0.017699241638183594, 0.018982410430908203, 0.020265579223632812, 0.021548748016357422, 0.02283191680908203, 0.02411508560180664, 0.02539825439453125, 0.02668142318725586, 0.02796459197998047, 0.029247760772705078, 0.030530929565429688, 0.0318140983581543, 0.033097267150878906, 0.034380435943603516, 0.035663604736328125, 0.036946773529052734, 0.038229942321777344, 0.03951311111450195, 0.04079627990722656, 0.04207944869995117, 0.04336261749267578, 0.04464578628540039, 0.045928955078125]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 78.0, 881.0, 55.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00650307722389698, -0.006308676674962044, -0.00611427566036582, -0.005919875111430883, -0.005725474562495947, -0.005531073547899723, -0.0053366729989647865, -0.00514227245002985, -0.0049478719010949135, -0.004753471352159977, -0.004559070337563753, -0.004364669788628817, -0.00417026923969388, -0.003975868225097656, -0.0037814676761627197, -0.003587067127227783, -0.0033926661126315594, -0.003198265330865979, -0.0030038647819310427, -0.0028094640001654625, -0.002615063451230526, -0.002420662669464946, -0.0022262618876993656, -0.002031861338764429, -0.001837460556998849, -0.0016430598916485906, -0.0014486592262983322, -0.001254258444532752, -0.0010598577791824937, -0.0008654571138322353, -0.0006710563320666552, -0.0004766556667163968, -0.00028225453570485115, -8.785384125076234e-05, 0.00010654685320332646, 0.00030094757676124573, 0.0004953482421115041, 0.0006897489074617624, 0.0008841496892273426, 0.001078550354577601, 0.0012729510199278593, 0.0014673516852781177, 0.001661752350628376, 0.0018561531323939562, 0.0020505539141595364, 0.002244954463094473, 0.002439355244860053, 0.0026337560266256332, 0.0028281565755605698, 0.00302255735732615, 0.0032169579062610865, 0.0034113586880266666, 0.003605759236961603, 0.0038001600187271833, 0.0039945608004927635, 0.0041889613494277, 0.0043833618983626366, 0.004577762447297573, 0.004772163461893797, 0.0049665640108287334, 0.00516096455976367, 0.005355365574359894, 0.00554976612329483, 0.005744166672229767, 0.005938567686825991]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 1.0, 4.0, 2.0, 2.0, 8.0, 7.0, 5.0, 14.0, 14.0, 14.0, 10.0, 12.0, 13.0, 21.0, 28.0, 27.0, 39.0, 34.0, 33.0, 46.0, 50.0, 40.0, 51.0, 47.0, 39.0, 41.0, 37.0, 45.0, 32.0, 39.0, 36.0, 32.0, 37.0, 23.0, 17.0, 16.0, 15.0, 12.0, 9.0, 22.0, 6.0, 8.0, 6.0, 6.0, 7.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009483098983764648, -0.0009173713624477386, -0.0008864328265190125, -0.0008554942905902863, -0.0008245557546615601, -0.0007936172187328339, -0.0007626786828041077, -0.0007317401468753815, -0.0007008016109466553, -0.0006698630750179291, -0.0006389245390892029, -0.0006079860031604767, -0.0005770474672317505, -0.0005461089313030243, -0.0005151703953742981, -0.0004842318594455719, -0.0004532933235168457, -0.0004223547875881195, -0.0003914162516593933, -0.0003604777157306671, -0.0003295391798019409, -0.0002986006438732147, -0.0002676621079444885, -0.00023672357201576233, -0.00020578503608703613, -0.00017484650015830994, -0.00014390796422958374, -0.00011296942830085754, -8.203089237213135e-05, -5.109235644340515e-05, -2.0153820514678955e-05, 1.0784715414047241e-05, 4.172325134277344e-05, 7.266178727149963e-05, 0.00010360032320022583, 0.00013453885912895203, 0.00016547739505767822, 0.00019641593098640442, 0.00022735446691513062, 0.0002582930028438568, 0.000289231538772583, 0.0003201700747013092, 0.0003511086106300354, 0.0003820471465587616, 0.0004129856824874878, 0.000443924218416214, 0.0004748627543449402, 0.0005058012902736664, 0.0005367398262023926, 0.0005676783621311188, 0.000598616898059845, 0.0006295554339885712, 0.0006604939699172974, 0.0006914325058460236, 0.0007223710417747498, 0.000753309577703476, 0.0007842481136322021, 0.0008151866495609283, 0.0008461251854896545, 0.0008770637214183807, 0.0009080022573471069, 0.0009389407932758331, 0.0009698793292045593, 0.0010008178651332855, 0.0010317564010620117]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 5.0, 7.0, 9.0, 10.0, 9.0, 6.0, 9.0, 15.0, 10.0, 14.0, 22.0, 25.0, 26.0, 31.0, 30.0, 28.0, 43.0, 35.0, 36.0, 48.0, 45.0, 49.0, 39.0, 52.0, 39.0, 35.0, 33.0, 36.0, 34.0, 35.0, 31.0, 21.0, 25.0, 19.0, 16.0, 15.0, 11.0, 12.0, 7.0, 10.0, 9.0, 8.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.875, -7.6202392578125, -7.365478515625, -7.1107177734375, -6.85595703125, -6.6011962890625, -6.346435546875, -6.0916748046875, -5.8369140625, -5.5821533203125, -5.327392578125, -5.0726318359375, -4.81787109375, -4.5631103515625, -4.308349609375, -4.0535888671875, -3.798828125, -3.5440673828125, -3.289306640625, -3.0345458984375, -2.77978515625, -2.5250244140625, -2.270263671875, -2.0155029296875, -1.7607421875, -1.5059814453125, -1.251220703125, -0.9964599609375, -0.74169921875, -0.4869384765625, -0.232177734375, 0.0225830078125, 0.27734375, 0.5321044921875, 0.786865234375, 1.0416259765625, 1.29638671875, 1.5511474609375, 1.805908203125, 2.0606689453125, 2.3154296875, 2.5701904296875, 2.824951171875, 3.0797119140625, 3.33447265625, 3.5892333984375, 3.843994140625, 4.0987548828125, 4.353515625, 4.6082763671875, 4.863037109375, 5.1177978515625, 5.37255859375, 5.6273193359375, 5.882080078125, 6.1368408203125, 6.3916015625, 6.6463623046875, 6.901123046875, 7.1558837890625, 7.41064453125, 7.6654052734375, 7.920166015625, 8.1749267578125, 8.4296875]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 2.0, 13.0, 6.0, 12.0, 8.0, 17.0, 26.0, 33.0, 58.0, 48.0, 75.0, 88.0, 139.0, 218.0, 265.0, 464.0, 722.0, 1268.0, 2554.0, 5513.0, 14596.0, 43417.0, 151043.0, 460625.0, 257565.0, 71460.0, 22387.0, 8153.0, 3497.0, 1655.0, 878.0, 598.0, 348.0, 225.0, 146.0, 117.0, 79.0, 62.0, 43.0, 37.0, 26.0, 26.0, 13.0, 10.0, 7.0, 5.0, 3.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-8.0625, -7.7999267578125, -7.537353515625, -7.2747802734375, -7.01220703125, -6.7496337890625, -6.487060546875, -6.2244873046875, -5.9619140625, -5.6993408203125, -5.436767578125, -5.1741943359375, -4.91162109375, -4.6490478515625, -4.386474609375, -4.1239013671875, -3.861328125, -3.5987548828125, -3.336181640625, -3.0736083984375, -2.81103515625, -2.5484619140625, -2.285888671875, -2.0233154296875, -1.7607421875, -1.4981689453125, -1.235595703125, -0.9730224609375, -0.71044921875, -0.4478759765625, -0.185302734375, 0.0772705078125, 0.33984375, 0.6024169921875, 0.864990234375, 1.1275634765625, 1.39013671875, 1.6527099609375, 1.915283203125, 2.1778564453125, 2.4404296875, 2.7030029296875, 2.965576171875, 3.2281494140625, 3.49072265625, 3.7532958984375, 4.015869140625, 4.2784423828125, 4.541015625, 4.8035888671875, 5.066162109375, 5.3287353515625, 5.59130859375, 5.8538818359375, 6.116455078125, 6.3790283203125, 6.6416015625, 6.9041748046875, 7.166748046875, 7.4293212890625, 7.69189453125, 7.9544677734375, 8.217041015625, 8.4796142578125, 8.7421875]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 8.0, 1.0, 4.0, 4.0, 10.0, 8.0, 12.0, 14.0, 18.0, 23.0, 24.0, 29.0, 34.0, 35.0, 38.0, 35.0, 42.0, 44.0, 40.0, 79.0, 252.0, 1681.0, 158.0, 70.0, 49.0, 51.0, 33.0, 42.0, 37.0, 24.0, 23.0, 20.0, 13.0, 22.0, 15.0, 10.0, 13.0, 9.0, 10.0, 3.0, 2.0, 6.0, 3.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.296875, -27.363525390625, -26.43017578125, -25.496826171875, -24.5634765625, -23.630126953125, -22.69677734375, -21.763427734375, -20.830078125, -19.896728515625, -18.96337890625, -18.030029296875, -17.0966796875, -16.163330078125, -15.22998046875, -14.296630859375, -13.36328125, -12.429931640625, -11.49658203125, -10.563232421875, -9.6298828125, -8.696533203125, -7.76318359375, -6.829833984375, -5.896484375, -4.963134765625, -4.02978515625, -3.096435546875, -2.1630859375, -1.229736328125, -0.29638671875, 0.636962890625, 1.5703125, 2.503662109375, 3.43701171875, 4.370361328125, 5.3037109375, 6.237060546875, 7.17041015625, 8.103759765625, 9.037109375, 9.970458984375, 10.90380859375, 11.837158203125, 12.7705078125, 13.703857421875, 14.63720703125, 15.570556640625, 16.50390625, 17.437255859375, 18.37060546875, 19.303955078125, 20.2373046875, 21.170654296875, 22.10400390625, 23.037353515625, 23.970703125, 24.904052734375, 25.83740234375, 26.770751953125, 27.7041015625, 28.637451171875, 29.57080078125, 30.504150390625, 31.4375]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 3.0, 1.0, 6.0, 2.0, 5.0, 10.0, 10.0, 11.0, 11.0, 20.0, 16.0, 22.0, 17.0, 34.0, 39.0, 48.0, 80.0, 105.0, 160.0, 396.0, 1199.0, 27698.0, 3105627.0, 8385.0, 879.0, 314.0, 155.0, 97.0, 70.0, 45.0, 54.0, 41.0, 27.0, 25.0, 17.0, 19.0, 14.0, 10.0, 9.0, 6.0, 8.0, 0.0, 3.0, 5.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-71.9375, -69.783203125, -67.62890625, -65.474609375, -63.3203125, -61.166015625, -59.01171875, -56.857421875, -54.703125, -52.548828125, -50.39453125, -48.240234375, -46.0859375, -43.931640625, -41.77734375, -39.623046875, -37.46875, -35.314453125, -33.16015625, -31.005859375, -28.8515625, -26.697265625, -24.54296875, -22.388671875, -20.234375, -18.080078125, -15.92578125, -13.771484375, -11.6171875, -9.462890625, -7.30859375, -5.154296875, -3.0, -0.845703125, 1.30859375, 3.462890625, 5.6171875, 7.771484375, 9.92578125, 12.080078125, 14.234375, 16.388671875, 18.54296875, 20.697265625, 22.8515625, 25.005859375, 27.16015625, 29.314453125, 31.46875, 33.623046875, 35.77734375, 37.931640625, 40.0859375, 42.240234375, 44.39453125, 46.548828125, 48.703125, 50.857421875, 53.01171875, 55.166015625, 57.3203125, 59.474609375, 61.62890625, 63.783203125, 65.9375]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 13.0, 977.0, 25.0, 2.0], "bins": [-599.9486694335938, -590.1962280273438, -580.4437255859375, -570.6912841796875, -560.9387817382812, -551.1863403320312, -541.433837890625, -531.681396484375, -521.9288940429688, -512.1764526367188, -502.4239501953125, -492.6714782714844, -482.91900634765625, -473.1665344238281, -463.4140625, -453.66162109375, -443.9091491699219, -434.15667724609375, -424.4042053222656, -414.6517333984375, -404.8992614746094, -395.14678955078125, -385.3943176269531, -375.641845703125, -365.889404296875, -356.1369323730469, -346.38446044921875, -336.6319885253906, -326.8795166015625, -317.1270446777344, -307.37457275390625, -297.62213134765625, -287.86962890625, -278.1171569824219, -268.36468505859375, -258.6122131347656, -248.8597412109375, -239.10726928710938, -229.3548126220703, -219.6023406982422, -209.849853515625, -200.09738159179688, -190.34490966796875, -180.59243774414062, -170.8399658203125, -161.08749389648438, -151.3350372314453, -141.5825653076172, -131.83009338378906, -122.07762145996094, -112.32514953613281, -102.57268524169922, -92.8202133178711, -83.06774139404297, -73.31527709960938, -63.56280517578125, -53.810333251953125, -44.057861328125, -34.30539321899414, -24.55292320251465, -14.800453186035156, -5.047981262207031, 4.704486846923828, 14.456954956054688, 24.209426879882812]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 5.0, 2.0, 6.0, 2.0, 2.0, 6.0, 8.0, 4.0, 22.0, 17.0, 17.0, 17.0, 18.0, 27.0, 38.0, 25.0, 31.0, 35.0, 38.0, 40.0, 43.0, 49.0, 42.0, 42.0, 40.0, 42.0, 44.0, 26.0, 45.0, 47.0, 25.0, 31.0, 28.0, 21.0, 25.0, 20.0, 13.0, 12.0, 11.0, 11.0, 9.0, 3.0, 7.0, 4.0, 2.0, 4.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-85.84872436523438, -82.8845443725586, -79.92037200927734, -76.95619201660156, -73.99201202392578, -71.02783966064453, -68.06365966796875, -65.0994873046875, -62.13530731201172, -59.1711311340332, -56.20695114135742, -53.242774963378906, -50.27859878540039, -47.314422607421875, -44.350242614746094, -41.38606643676758, -38.4218864440918, -35.45771026611328, -32.4935302734375, -29.529354095458984, -26.56517791748047, -23.60099983215332, -20.636821746826172, -17.672645568847656, -14.708467483520508, -11.744290351867676, -8.780113220214844, -5.815935134887695, -2.8517580032348633, 0.11241912841796875, 3.076597213745117, 6.040773391723633, 9.004951477050781, 11.969128608703613, 14.933305740356445, 17.897483825683594, 20.86166000366211, 23.825838088989258, 26.790016174316406, 29.754192352294922, 32.71836853027344, 35.68254470825195, 38.646724700927734, 41.61090087890625, 44.575077056884766, 47.53925323486328, 50.50343322753906, 53.46760940551758, 56.43178939819336, 59.395965576171875, 62.360145568847656, 65.32432556152344, 68.28849792480469, 71.25267791748047, 74.21685791015625, 77.1810302734375, 80.14521026611328, 83.10939025878906, 86.07356262207031, 89.0377426147461, 92.00192260742188, 94.96609497070312, 97.9302749633789, 100.89445495605469, 103.85862731933594]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 10.0, 12.0, 9.0, 9.0, 9.0, 11.0, 7.0, 18.0, 21.0, 26.0, 24.0, 34.0, 31.0, 30.0, 34.0, 39.0, 44.0, 42.0, 51.0, 48.0, 41.0, 44.0, 46.0, 31.0, 41.0, 35.0, 46.0, 25.0, 30.0, 23.0, 25.0, 22.0, 17.0, 10.0, 11.0, 9.0, 13.0, 8.0, 5.0, 6.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-8.5390625, -8.2703857421875, -8.001708984375, -7.7330322265625, -7.46435546875, -7.1956787109375, -6.927001953125, -6.6583251953125, -6.3896484375, -6.1209716796875, -5.852294921875, -5.5836181640625, -5.31494140625, -5.0462646484375, -4.777587890625, -4.5089111328125, -4.240234375, -3.9715576171875, -3.702880859375, -3.4342041015625, -3.16552734375, -2.8968505859375, -2.628173828125, -2.3594970703125, -2.0908203125, -1.8221435546875, -1.553466796875, -1.2847900390625, -1.01611328125, -0.7474365234375, -0.478759765625, -0.2100830078125, 0.05859375, 0.3272705078125, 0.595947265625, 0.8646240234375, 1.13330078125, 1.4019775390625, 1.670654296875, 1.9393310546875, 2.2080078125, 2.4766845703125, 2.745361328125, 3.0140380859375, 3.28271484375, 3.5513916015625, 3.820068359375, 4.0887451171875, 4.357421875, 4.6260986328125, 4.894775390625, 5.1634521484375, 5.43212890625, 5.7008056640625, 5.969482421875, 6.2381591796875, 6.5068359375, 6.7755126953125, 7.044189453125, 7.3128662109375, 7.58154296875, 7.8502197265625, 8.118896484375, 8.3875732421875, 8.65625]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 6.0, 4.0, 7.0, 9.0, 10.0, 16.0, 19.0, 33.0, 35.0, 46.0, 59.0, 77.0, 124.0, 200.0, 356.0, 900.0, 2883.0, 15464.0, 158522.0, 2604358.0, 1331113.0, 68053.0, 8621.0, 1898.0, 624.0, 286.0, 147.0, 119.0, 61.0, 56.0, 39.0, 34.0, 24.0, 29.0, 11.0, 11.0, 6.0, 5.0, 6.0, 6.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-29.71875, -28.755126953125, -27.79150390625, -26.827880859375, -25.8642578125, -24.900634765625, -23.93701171875, -22.973388671875, -22.009765625, -21.046142578125, -20.08251953125, -19.118896484375, -18.1552734375, -17.191650390625, -16.22802734375, -15.264404296875, -14.30078125, -13.337158203125, -12.37353515625, -11.409912109375, -10.4462890625, -9.482666015625, -8.51904296875, -7.555419921875, -6.591796875, -5.628173828125, -4.66455078125, -3.700927734375, -2.7373046875, -1.773681640625, -0.81005859375, 0.153564453125, 1.1171875, 2.080810546875, 3.04443359375, 4.008056640625, 4.9716796875, 5.935302734375, 6.89892578125, 7.862548828125, 8.826171875, 9.789794921875, 10.75341796875, 11.717041015625, 12.6806640625, 13.644287109375, 14.60791015625, 15.571533203125, 16.53515625, 17.498779296875, 18.46240234375, 19.426025390625, 20.3896484375, 21.353271484375, 22.31689453125, 23.280517578125, 24.244140625, 25.207763671875, 26.17138671875, 27.135009765625, 28.0986328125, 29.062255859375, 30.02587890625, 30.989501953125, 31.953125]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 7.0, 5.0, 7.0, 8.0, 23.0, 40.0, 80.0, 112.0, 187.0, 335.0, 502.0, 786.0, 784.0, 483.0, 306.0, 156.0, 102.0, 75.0, 37.0, 20.0, 13.0, 8.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.71875, -61.10595703125, -59.4931640625, -57.88037109375, -56.267578125, -54.65478515625, -53.0419921875, -51.42919921875, -49.81640625, -48.20361328125, -46.5908203125, -44.97802734375, -43.365234375, -41.75244140625, -40.1396484375, -38.52685546875, -36.9140625, -35.30126953125, -33.6884765625, -32.07568359375, -30.462890625, -28.85009765625, -27.2373046875, -25.62451171875, -24.01171875, -22.39892578125, -20.7861328125, -19.17333984375, -17.560546875, -15.94775390625, -14.3349609375, -12.72216796875, -11.109375, -9.49658203125, -7.8837890625, -6.27099609375, -4.658203125, -3.04541015625, -1.4326171875, 0.18017578125, 1.79296875, 3.40576171875, 5.0185546875, 6.63134765625, 8.244140625, 9.85693359375, 11.4697265625, 13.08251953125, 14.6953125, 16.30810546875, 17.9208984375, 19.53369140625, 21.146484375, 22.75927734375, 24.3720703125, 25.98486328125, 27.59765625, 29.21044921875, 30.8232421875, 32.43603515625, 34.048828125, 35.66162109375, 37.2744140625, 38.88720703125, 40.5]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 8.0, 5.0, 10.0, 16.0, 39.0, 54.0, 83.0, 145.0, 278.0, 610.0, 5542.0, 3836707.0, 348027.0, 1822.0, 416.0, 217.0, 117.0, 69.0, 42.0, 26.0, 27.0, 9.0, 4.0, 6.0, 1.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-146.75, -141.66015625, -136.5703125, -131.48046875, -126.390625, -121.30078125, -116.2109375, -111.12109375, -106.03125, -100.94140625, -95.8515625, -90.76171875, -85.671875, -80.58203125, -75.4921875, -70.40234375, -65.3125, -60.22265625, -55.1328125, -50.04296875, -44.953125, -39.86328125, -34.7734375, -29.68359375, -24.59375, -19.50390625, -14.4140625, -9.32421875, -4.234375, 0.85546875, 5.9453125, 11.03515625, 16.125, 21.21484375, 26.3046875, 31.39453125, 36.484375, 41.57421875, 46.6640625, 51.75390625, 56.84375, 61.93359375, 67.0234375, 72.11328125, 77.203125, 82.29296875, 87.3828125, 92.47265625, 97.5625, 102.65234375, 107.7421875, 112.83203125, 117.921875, 123.01171875, 128.1015625, 133.19140625, 138.28125, 143.37109375, 148.4609375, 153.55078125, 158.640625, 163.73046875, 168.8203125, 173.91015625, 179.0]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 20.0, 90.0, 372.0, 389.0, 124.0, 16.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-438.0901184082031, -426.88385009765625, -415.6775817871094, -404.4713134765625, -393.2650146484375, -382.0587463378906, -370.85247802734375, -359.6462097167969, -348.43994140625, -337.2336730957031, -326.02740478515625, -314.8211364746094, -303.6148681640625, -292.4085693359375, -281.2023010253906, -269.99603271484375, -258.7897644042969, -247.58349609375, -236.37722778320312, -225.1709442138672, -213.9646759033203, -202.75840759277344, -191.5521240234375, -180.34585571289062, -169.13958740234375, -157.93331909179688, -146.72705078125, -135.52076721191406, -124.31449890136719, -113.10823059082031, -101.9019546508789, -90.6956787109375, -79.48941040039062, -68.28314208984375, -57.076866149902344, -45.8705940246582, -34.66432189941406, -23.458049774169922, -12.251777648925781, -1.045501708984375, 10.1607666015625, 21.36703872680664, 32.57331085205078, 43.77958297729492, 54.98585510253906, 66.19212341308594, 77.39839935302734, 88.60467529296875, 99.81094360351562, 111.0172119140625, 122.2234878540039, 133.4297637939453, 144.6360321044922, 155.84230041503906, 167.048583984375, 178.25485229492188, 189.46112060546875, 200.66738891601562, 211.8736572265625, 223.07994079589844, 234.2862091064453, 245.4924774169922, 256.6987609863281, 267.905029296875, 279.1112976074219]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 5.0, 5.0, 7.0, 8.0, 6.0, 6.0, 18.0, 23.0, 14.0, 19.0, 22.0, 27.0, 31.0, 27.0, 23.0, 33.0, 34.0, 32.0, 54.0, 50.0, 43.0, 34.0, 47.0, 38.0, 45.0, 50.0, 28.0, 30.0, 25.0, 27.0, 25.0, 22.0, 28.0, 15.0, 23.0, 21.0, 14.0, 10.0, 10.0, 11.0, 1.0, 6.0, 3.0, 0.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-107.93976593017578, -104.61256408691406, -101.28536224365234, -97.95816040039062, -94.6309585571289, -91.30375671386719, -87.97655487060547, -84.64935302734375, -81.32215118408203, -77.99494934082031, -74.6677474975586, -71.34054565429688, -68.01334381103516, -64.68614196777344, -61.35894012451172, -58.03173828125, -54.70453643798828, -51.37733459472656, -48.050132751464844, -44.722930908203125, -41.395729064941406, -38.06852722167969, -34.74132537841797, -31.41412353515625, -28.08692169189453, -24.759719848632812, -21.432518005371094, -18.105316162109375, -14.778114318847656, -11.450912475585938, -8.123710632324219, -4.7965087890625, -1.46929931640625, 1.8579025268554688, 5.1851043701171875, 8.512306213378906, 11.839508056640625, 15.166709899902344, 18.493911743164062, 21.82111358642578, 25.1483154296875, 28.47551727294922, 31.802719116210938, 35.129920959472656, 38.457122802734375, 41.784324645996094, 45.11152648925781, 48.43872833251953, 51.76593017578125, 55.09313201904297, 58.42033386230469, 61.747535705566406, 65.07473754882812, 68.40193939208984, 71.72914123535156, 75.05634307861328, 78.383544921875, 81.71074676513672, 85.03794860839844, 88.36515045166016, 91.69235229492188, 95.0195541381836, 98.34675598144531, 101.67395782470703, 105.00115966796875]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 2.0, 8.0, 6.0, 7.0, 6.0, 9.0, 13.0, 13.0, 11.0, 20.0, 18.0, 25.0, 28.0, 29.0, 31.0, 34.0, 37.0, 32.0, 51.0, 44.0, 53.0, 41.0, 43.0, 47.0, 42.0, 39.0, 37.0, 29.0, 43.0, 35.0, 26.0, 29.0, 15.0, 16.0, 21.0, 13.0, 12.0, 14.0, 3.0, 3.0, 6.0, 7.0, 1.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.4375, -8.160888671875, -7.88427734375, -7.607666015625, -7.3310546875, -7.054443359375, -6.77783203125, -6.501220703125, -6.224609375, -5.947998046875, -5.67138671875, -5.394775390625, -5.1181640625, -4.841552734375, -4.56494140625, -4.288330078125, -4.01171875, -3.735107421875, -3.45849609375, -3.181884765625, -2.9052734375, -2.628662109375, -2.35205078125, -2.075439453125, -1.798828125, -1.522216796875, -1.24560546875, -0.968994140625, -0.6923828125, -0.415771484375, -0.13916015625, 0.137451171875, 0.4140625, 0.690673828125, 0.96728515625, 1.243896484375, 1.5205078125, 1.797119140625, 2.07373046875, 2.350341796875, 2.626953125, 2.903564453125, 3.18017578125, 3.456787109375, 3.7333984375, 4.010009765625, 4.28662109375, 4.563232421875, 4.83984375, 5.116455078125, 5.39306640625, 5.669677734375, 5.9462890625, 6.222900390625, 6.49951171875, 6.776123046875, 7.052734375, 7.329345703125, 7.60595703125, 7.882568359375, 8.1591796875, 8.435791015625, 8.71240234375, 8.989013671875, 9.265625]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 5.0, 3.0, 5.0, 7.0, 12.0, 30.0, 36.0, 71.0, 71.0, 114.0, 171.0, 207.0, 336.0, 417.0, 662.0, 910.0, 1375.0, 1893.0, 2903.0, 4232.0, 6077.0, 8927.0, 13013.0, 19571.0, 28855.0, 41779.0, 60924.0, 88154.0, 129652.0, 180205.0, 146203.0, 97402.0, 67441.0, 46670.0, 32130.0, 21690.0, 14777.0, 10050.0, 6933.0, 4539.0, 3085.0, 2189.0, 1527.0, 1009.0, 704.0, 501.0, 331.0, 248.0, 159.0, 129.0, 82.0, 49.0, 35.0, 23.0, 12.0, 15.0, 6.0, 7.0, 4.0, 1.0, 2.0, 3.0], "bins": [-0.83740234375, -0.8109359741210938, -0.7844696044921875, -0.7580032348632812, -0.731536865234375, -0.7050704956054688, -0.6786041259765625, -0.6521377563476562, -0.62567138671875, -0.5992050170898438, -0.5727386474609375, -0.5462722778320312, -0.519805908203125, -0.49333953857421875, -0.4668731689453125, -0.44040679931640625, -0.4139404296875, -0.38747406005859375, -0.3610076904296875, -0.33454132080078125, -0.308074951171875, -0.28160858154296875, -0.2551422119140625, -0.22867584228515625, -0.20220947265625, -0.17574310302734375, -0.1492767333984375, -0.12281036376953125, -0.096343994140625, -0.06987762451171875, -0.0434112548828125, -0.01694488525390625, 0.009521484375, 0.03598785400390625, 0.0624542236328125, 0.08892059326171875, 0.115386962890625, 0.14185333251953125, 0.1683197021484375, 0.19478607177734375, 0.22125244140625, 0.24771881103515625, 0.2741851806640625, 0.30065155029296875, 0.327117919921875, 0.35358428955078125, 0.3800506591796875, 0.40651702880859375, 0.4329833984375, 0.45944976806640625, 0.4859161376953125, 0.5123825073242188, 0.538848876953125, 0.5653152465820312, 0.5917816162109375, 0.6182479858398438, 0.64471435546875, 0.6711807250976562, 0.6976470947265625, 0.7241134643554688, 0.750579833984375, 0.7770462036132812, 0.8035125732421875, 0.8299789428710938, 0.8564453125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 7.0, 6.0, 10.0, 9.0, 5.0, 12.0, 13.0, 18.0, 22.0, 19.0, 31.0, 33.0, 31.0, 36.0, 34.0, 36.0, 35.0, 56.0, 47.0, 41.0, 1059.0, 47.0, 41.0, 28.0, 37.0, 30.0, 33.0, 35.0, 36.0, 21.0, 23.0, 27.0, 12.0, 10.0, 16.0, 11.0, 12.0, 10.0, 9.0, 10.0, 3.0, 5.0, 7.0, 4.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.55859375, -5.38665771484375, -5.2147216796875, -5.04278564453125, -4.870849609375, -4.69891357421875, -4.5269775390625, -4.35504150390625, -4.18310546875, -4.01116943359375, -3.8392333984375, -3.66729736328125, -3.495361328125, -3.32342529296875, -3.1514892578125, -2.97955322265625, -2.8076171875, -2.63568115234375, -2.4637451171875, -2.29180908203125, -2.119873046875, -1.94793701171875, -1.7760009765625, -1.60406494140625, -1.43212890625, -1.26019287109375, -1.0882568359375, -0.91632080078125, -0.744384765625, -0.57244873046875, -0.4005126953125, -0.22857666015625, -0.056640625, 0.11529541015625, 0.2872314453125, 0.45916748046875, 0.631103515625, 0.80303955078125, 0.9749755859375, 1.14691162109375, 1.31884765625, 1.49078369140625, 1.6627197265625, 1.83465576171875, 2.006591796875, 2.17852783203125, 2.3504638671875, 2.52239990234375, 2.6943359375, 2.86627197265625, 3.0382080078125, 3.21014404296875, 3.382080078125, 3.55401611328125, 3.7259521484375, 3.89788818359375, 4.06982421875, 4.24176025390625, 4.4136962890625, 4.58563232421875, 4.757568359375, 4.92950439453125, 5.1014404296875, 5.27337646484375, 5.4453125]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 6.0, 3.0, 13.0, 8.0, 18.0, 31.0, 36.0, 59.0, 95.0, 140.0, 212.0, 360.0, 491.0, 820.0, 1245.0, 1807.0, 2848.0, 4376.0, 6639.0, 10405.0, 16168.0, 25620.0, 40513.0, 63814.0, 102028.0, 163117.0, 1258102.0, 147703.0, 91539.0, 57841.0, 36547.0, 22934.0, 14644.0, 9679.0, 6114.0, 3909.0, 2587.0, 1670.0, 1031.0, 650.0, 468.0, 297.0, 224.0, 107.0, 83.0, 50.0, 36.0, 25.0, 13.0, 8.0, 5.0, 3.0, 4.0, 2.0, 1.0, 2.0], "bins": [-1.01953125, -0.9898834228515625, -0.960235595703125, -0.9305877685546875, -0.90093994140625, -0.8712921142578125, -0.841644287109375, -0.8119964599609375, -0.7823486328125, -0.7527008056640625, -0.723052978515625, -0.6934051513671875, -0.66375732421875, -0.6341094970703125, -0.604461669921875, -0.5748138427734375, -0.545166015625, -0.5155181884765625, -0.485870361328125, -0.4562225341796875, -0.42657470703125, -0.3969268798828125, -0.367279052734375, -0.3376312255859375, -0.3079833984375, -0.2783355712890625, -0.248687744140625, -0.2190399169921875, -0.18939208984375, -0.1597442626953125, -0.130096435546875, -0.1004486083984375, -0.07080078125, -0.0411529541015625, -0.011505126953125, 0.0181427001953125, 0.04779052734375, 0.0774383544921875, 0.107086181640625, 0.1367340087890625, 0.1663818359375, 0.1960296630859375, 0.225677490234375, 0.2553253173828125, 0.28497314453125, 0.3146209716796875, 0.344268798828125, 0.3739166259765625, 0.403564453125, 0.4332122802734375, 0.462860107421875, 0.4925079345703125, 0.52215576171875, 0.5518035888671875, 0.581451416015625, 0.6110992431640625, 0.6407470703125, 0.6703948974609375, 0.700042724609375, 0.7296905517578125, 0.75933837890625, 0.7889862060546875, 0.818634033203125, 0.8482818603515625, 0.8779296875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 0.0, 3.0, 6.0, 0.0, 3.0, 4.0, 3.0, 7.0, 6.0, 10.0, 12.0, 5.0, 13.0, 17.0, 24.0, 23.0, 29.0, 36.0, 41.0, 37.0, 58.0, 61.0, 69.0, 58.0, 51.0, 55.0, 54.0, 54.0, 46.0, 44.0, 27.0, 26.0, 22.0, 9.0, 15.0, 12.0, 10.0, 6.0, 7.0, 4.0, 10.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 4.0, 1.0, 4.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.00142669677734375, -0.0013808012008666992, -0.0013349056243896484, -0.0012890100479125977, -0.0012431144714355469, -0.001197218894958496, -0.0011513233184814453, -0.0011054277420043945, -0.0010595321655273438, -0.001013636589050293, -0.0009677410125732422, -0.0009218454360961914, -0.0008759498596191406, -0.0008300542831420898, -0.0007841587066650391, -0.0007382631301879883, -0.0006923675537109375, -0.0006464719772338867, -0.0006005764007568359, -0.0005546808242797852, -0.0005087852478027344, -0.0004628896713256836, -0.0004169940948486328, -0.00037109851837158203, -0.00032520294189453125, -0.00027930736541748047, -0.0002334117889404297, -0.0001875162124633789, -0.00014162063598632812, -9.572505950927734e-05, -4.982948303222656e-05, -3.933906555175781e-06, 4.1961669921875e-05, 8.785724639892578e-05, 0.00013375282287597656, 0.00017964839935302734, 0.00022554397583007812, 0.0002714395523071289, 0.0003173351287841797, 0.00036323070526123047, 0.00040912628173828125, 0.00045502185821533203, 0.0005009174346923828, 0.0005468130111694336, 0.0005927085876464844, 0.0006386041641235352, 0.0006844997406005859, 0.0007303953170776367, 0.0007762908935546875, 0.0008221864700317383, 0.0008680820465087891, 0.0009139776229858398, 0.0009598731994628906, 0.0010057687759399414, 0.0010516643524169922, 0.001097559928894043, 0.0011434555053710938, 0.0011893510818481445, 0.0012352466583251953, 0.001281142234802246, 0.0013270378112792969, 0.0013729333877563477, 0.0014188289642333984, 0.0014647245407104492, 0.0015106201171875]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 4.0, 3.0, 2.0, 7.0, 2.0, 12.0, 14.0, 12.0, 23.0, 35.0, 32.0, 55.0, 61.0, 96.0, 128.0, 216.0, 264.0, 666.0, 7048.0, 1036687.0, 1814.0, 473.0, 272.0, 183.0, 105.0, 73.0, 61.0, 39.0, 30.0, 33.0, 14.0, 19.0, 8.0, 13.0, 6.0, 4.0, 3.0, 8.0, 6.0, 5.0, 2.0, 4.0, 1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.037139892578125, -0.03595590591430664, -0.03477191925048828, -0.03358793258666992, -0.03240394592285156, -0.031219959259033203, -0.030035972595214844, -0.028851985931396484, -0.027667999267578125, -0.026484012603759766, -0.025300025939941406, -0.024116039276123047, -0.022932052612304688, -0.021748065948486328, -0.02056407928466797, -0.01938009262084961, -0.01819610595703125, -0.01701211929321289, -0.01582813262939453, -0.014644145965576172, -0.013460159301757812, -0.012276172637939453, -0.011092185974121094, -0.009908199310302734, -0.008724212646484375, -0.007540225982666016, -0.006356239318847656, -0.005172252655029297, -0.0039882659912109375, -0.002804279327392578, -0.0016202926635742188, -0.0004363059997558594, 0.0007476806640625, 0.0019316673278808594, 0.0031156539916992188, 0.004299640655517578, 0.0054836273193359375, 0.006667613983154297, 0.007851600646972656, 0.009035587310791016, 0.010219573974609375, 0.011403560638427734, 0.012587547302246094, 0.013771533966064453, 0.014955520629882812, 0.016139507293701172, 0.01732349395751953, 0.01850748062133789, 0.01969146728515625, 0.02087545394897461, 0.02205944061279297, 0.023243427276611328, 0.024427413940429688, 0.025611400604248047, 0.026795387268066406, 0.027979373931884766, 0.029163360595703125, 0.030347347259521484, 0.031531333923339844, 0.0327153205871582, 0.03389930725097656, 0.03508329391479492, 0.03626728057861328, 0.03745126724243164, 0.03863525390625]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 10.0, 33.0, 165.0, 434.0, 290.0, 68.0, 14.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003726032329723239, -0.0036445967853069305, -0.003563161240890622, -0.00348172546364367, -0.0034002899192273617, -0.0033188543748110533, -0.003237418830394745, -0.003155983053147793, -0.0030745475087314844, -0.002993111964315176, -0.0029116764198988676, -0.0028302406426519156, -0.002748805098235607, -0.0026673695538192987, -0.0025859340094029903, -0.0025044982321560383, -0.0024230629205703735, -0.002341627376154065, -0.0022601918317377567, -0.0021787560544908047, -0.0020973205100744963, -0.002015884965658188, -0.0019344494212418795, -0.0018530137604102492, -0.001771578099578619, -0.0016901425551623106, -0.0016087068943306804, -0.001527271349914372, -0.0014458356890827417, -0.0013644001446664333, -0.001282964600250125, -0.0012015289394184947, -0.0011200933950021863, -0.001038657850585878, -0.0009572221897542477, -0.0008757866453379393, -0.000794350984506309, -0.0007129154400900006, -0.0006314798374660313, -0.000550044234842062, -0.0004686086322180927, -0.00038717302959412336, -0.00030573742697015405, -0.0002243018534500152, -0.00014286625082604587, -6.143064820207655e-05, 2.0004925318062305e-05, 0.00010144052794203162, 0.00018287613056600094, 0.00026431173318997025, 0.00034574733581393957, 0.00042718290933407843, 0.0005086185410618782, 0.0005900540854781866, 0.0006714896881021559, 0.0007529252907261252, 0.0008343608933500946, 0.0009157964959740639, 0.0009972320403903723, 0.0010786677012220025, 0.001160103245638311, 0.0012415389064699411, 0.0013229744508862495, 0.001404409995302558, 0.0014858456561341882]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0, 3.0, 0.0, 3.0, 5.0, 5.0, 7.0, 7.0, 6.0, 8.0, 19.0, 10.0, 16.0, 22.0, 26.0, 18.0, 33.0, 24.0, 32.0, 34.0, 50.0, 31.0, 43.0, 38.0, 49.0, 36.0, 51.0, 28.0, 46.0, 44.0, 28.0, 28.0, 38.0, 35.0, 26.0, 26.0, 27.0, 15.0, 16.0, 17.0, 13.0, 10.0, 3.0, 9.0, 7.0, 5.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.0008644461631774902, -0.0008378894999623299, -0.0008113328367471695, -0.0007847761735320091, -0.0007582195103168488, -0.0007316628471016884, -0.000705106183886528, -0.0006785495206713676, -0.0006519928574562073, -0.0006254361942410469, -0.0005988795310258865, -0.0005723228678107262, -0.0005457662045955658, -0.0005192095413804054, -0.0004926528781652451, -0.0004660962149500847, -0.0004395395517349243, -0.00041298288851976395, -0.0003864262253046036, -0.0003598695620894432, -0.00033331289887428284, -0.00030675623565912247, -0.0002801995724439621, -0.00025364290922880173, -0.00022708624601364136, -0.000200529582798481, -0.00017397291958332062, -0.00014741625636816025, -0.00012085959315299988, -9.430292993783951e-05, -6.774626672267914e-05, -4.118960350751877e-05, -1.4632940292358398e-05, 1.1923722922801971e-05, 3.848038613796234e-05, 6.503704935312271e-05, 9.159371256828308e-05, 0.00011815037578344345, 0.00014470703899860382, 0.0001712637022137642, 0.00019782036542892456, 0.00022437702864408493, 0.0002509336918592453, 0.00027749035507440567, 0.00030404701828956604, 0.0003306036815047264, 0.0003571603447198868, 0.00038371700793504715, 0.0004102736711502075, 0.0004368303343653679, 0.00046338699758052826, 0.0004899436607956886, 0.000516500324010849, 0.0005430569872260094, 0.0005696136504411697, 0.0005961703136563301, 0.0006227269768714905, 0.0006492836400866508, 0.0006758403033018112, 0.0007023969665169716, 0.000728953629732132, 0.0007555102929472923, 0.0007820669561624527, 0.0008086236193776131, 0.0008351802825927734]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 2.0, 8.0, 6.0, 7.0, 6.0, 9.0, 13.0, 13.0, 11.0, 20.0, 18.0, 25.0, 28.0, 29.0, 31.0, 34.0, 37.0, 32.0, 51.0, 44.0, 53.0, 41.0, 43.0, 47.0, 42.0, 39.0, 37.0, 29.0, 43.0, 35.0, 26.0, 29.0, 15.0, 16.0, 21.0, 13.0, 12.0, 14.0, 3.0, 3.0, 6.0, 7.0, 1.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.4375, -8.160888671875, -7.88427734375, -7.607666015625, -7.3310546875, -7.054443359375, -6.77783203125, -6.501220703125, -6.224609375, -5.947998046875, -5.67138671875, -5.394775390625, -5.1181640625, -4.841552734375, -4.56494140625, -4.288330078125, -4.01171875, -3.735107421875, -3.45849609375, -3.181884765625, -2.9052734375, -2.628662109375, -2.35205078125, -2.075439453125, -1.798828125, -1.522216796875, -1.24560546875, -0.968994140625, -0.6923828125, -0.415771484375, -0.13916015625, 0.137451171875, 0.4140625, 0.690673828125, 0.96728515625, 1.243896484375, 1.5205078125, 1.797119140625, 2.07373046875, 2.350341796875, 2.626953125, 2.903564453125, 3.18017578125, 3.456787109375, 3.7333984375, 4.010009765625, 4.28662109375, 4.563232421875, 4.83984375, 5.116455078125, 5.39306640625, 5.669677734375, 5.9462890625, 6.222900390625, 6.49951171875, 6.776123046875, 7.052734375, 7.329345703125, 7.60595703125, 7.882568359375, 8.1591796875, 8.435791015625, 8.71240234375, 8.989013671875, 9.265625]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 7.0, 2.0, 4.0, 3.0, 9.0, 15.0, 14.0, 18.0, 26.0, 57.0, 72.0, 84.0, 149.0, 253.0, 306.0, 507.0, 753.0, 1148.0, 1745.0, 2988.0, 5232.0, 9780.0, 21820.0, 58882.0, 221468.0, 506256.0, 139483.0, 41409.0, 16579.0, 8125.0, 4273.0, 2573.0, 1562.0, 967.0, 639.0, 471.0, 279.0, 190.0, 123.0, 87.0, 68.0, 49.0, 27.0, 22.0, 15.0, 8.0, 2.0, 6.0, 4.0, 3.0, 6.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6484375, -7.3963623046875, -7.144287109375, -6.8922119140625, -6.64013671875, -6.3880615234375, -6.135986328125, -5.8839111328125, -5.6318359375, -5.3797607421875, -5.127685546875, -4.8756103515625, -4.62353515625, -4.3714599609375, -4.119384765625, -3.8673095703125, -3.615234375, -3.3631591796875, -3.111083984375, -2.8590087890625, -2.60693359375, -2.3548583984375, -2.102783203125, -1.8507080078125, -1.5986328125, -1.3465576171875, -1.094482421875, -0.8424072265625, -0.59033203125, -0.3382568359375, -0.086181640625, 0.1658935546875, 0.41796875, 0.6700439453125, 0.922119140625, 1.1741943359375, 1.42626953125, 1.6783447265625, 1.930419921875, 2.1824951171875, 2.4345703125, 2.6866455078125, 2.938720703125, 3.1907958984375, 3.44287109375, 3.6949462890625, 3.947021484375, 4.1990966796875, 4.451171875, 4.7032470703125, 4.955322265625, 5.2073974609375, 5.45947265625, 5.7115478515625, 5.963623046875, 6.2156982421875, 6.4677734375, 6.7198486328125, 6.971923828125, 7.2239990234375, 7.47607421875, 7.7281494140625, 7.980224609375, 8.2322998046875, 8.484375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 10.0, 9.0, 9.0, 9.0, 11.0, 17.0, 12.0, 19.0, 12.0, 14.0, 27.0, 30.0, 34.0, 26.0, 34.0, 41.0, 46.0, 46.0, 45.0, 69.0, 203.0, 1738.0, 89.0, 57.0, 46.0, 39.0, 40.0, 37.0, 37.0, 33.0, 31.0, 27.0, 23.0, 32.0, 19.0, 15.0, 8.0, 9.0, 5.0, 4.0, 5.0, 10.0, 3.0, 6.0, 1.0, 4.0, 2.0, 2.0, 2.0, 3.0], "bins": [-30.71875, -29.848388671875, -28.97802734375, -28.107666015625, -27.2373046875, -26.366943359375, -25.49658203125, -24.626220703125, -23.755859375, -22.885498046875, -22.01513671875, -21.144775390625, -20.2744140625, -19.404052734375, -18.53369140625, -17.663330078125, -16.79296875, -15.922607421875, -15.05224609375, -14.181884765625, -13.3115234375, -12.441162109375, -11.57080078125, -10.700439453125, -9.830078125, -8.959716796875, -8.08935546875, -7.218994140625, -6.3486328125, -5.478271484375, -4.60791015625, -3.737548828125, -2.8671875, -1.996826171875, -1.12646484375, -0.256103515625, 0.6142578125, 1.484619140625, 2.35498046875, 3.225341796875, 4.095703125, 4.966064453125, 5.83642578125, 6.706787109375, 7.5771484375, 8.447509765625, 9.31787109375, 10.188232421875, 11.05859375, 11.928955078125, 12.79931640625, 13.669677734375, 14.5400390625, 15.410400390625, 16.28076171875, 17.151123046875, 18.021484375, 18.891845703125, 19.76220703125, 20.632568359375, 21.5029296875, 22.373291015625, 23.24365234375, 24.114013671875, 24.984375]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 4.0, 7.0, 2.0, 6.0, 5.0, 6.0, 6.0, 7.0, 10.0, 13.0, 11.0, 23.0, 22.0, 32.0, 40.0, 43.0, 70.0, 75.0, 117.0, 172.0, 315.0, 749.0, 2736.0, 52890.0, 3039456.0, 44498.0, 2657.0, 721.0, 342.0, 170.0, 112.0, 74.0, 46.0, 51.0, 39.0, 32.0, 22.0, 22.0, 12.0, 25.0, 15.0, 15.0, 8.0, 9.0, 9.0, 4.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-45.125, -43.59423828125, -42.0634765625, -40.53271484375, -39.001953125, -37.47119140625, -35.9404296875, -34.40966796875, -32.87890625, -31.34814453125, -29.8173828125, -28.28662109375, -26.755859375, -25.22509765625, -23.6943359375, -22.16357421875, -20.6328125, -19.10205078125, -17.5712890625, -16.04052734375, -14.509765625, -12.97900390625, -11.4482421875, -9.91748046875, -8.38671875, -6.85595703125, -5.3251953125, -3.79443359375, -2.263671875, -0.73291015625, 0.7978515625, 2.32861328125, 3.859375, 5.39013671875, 6.9208984375, 8.45166015625, 9.982421875, 11.51318359375, 13.0439453125, 14.57470703125, 16.10546875, 17.63623046875, 19.1669921875, 20.69775390625, 22.228515625, 23.75927734375, 25.2900390625, 26.82080078125, 28.3515625, 29.88232421875, 31.4130859375, 32.94384765625, 34.474609375, 36.00537109375, 37.5361328125, 39.06689453125, 40.59765625, 42.12841796875, 43.6591796875, 45.18994140625, 46.720703125, 48.25146484375, 49.7822265625, 51.31298828125, 52.84375]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 10.0, 1008.0, 0.0, 1.0], "bins": [-1240.0467529296875, -1219.8519287109375, -1199.656982421875, -1179.462158203125, -1159.267333984375, -1139.0723876953125, -1118.8775634765625, -1098.6826171875, -1078.48779296875, -1058.29296875, -1038.0980224609375, -1017.9031982421875, -997.7083129882812, -977.513427734375, -957.318603515625, -937.1237182617188, -916.9288330078125, -896.7339477539062, -876.5390625, -856.34423828125, -836.1493530273438, -815.9544677734375, -795.7596435546875, -775.5647583007812, -755.369873046875, -735.1749877929688, -714.9801025390625, -694.7852783203125, -674.5903930664062, -654.3955078125, -634.20068359375, -614.0057983398438, -593.8109130859375, -573.6160278320312, -553.421142578125, -533.226318359375, -513.0314331054688, -492.8365478515625, -472.6416931152344, -452.44683837890625, -432.251953125, -412.05706787109375, -391.8622131347656, -371.6673583984375, -351.47247314453125, -331.277587890625, -311.0827331542969, -290.88787841796875, -270.6929931640625, -250.4981231689453, -230.30325317382812, -210.10838317871094, -189.91351318359375, -169.71864318847656, -149.52377319335938, -129.3289031982422, -109.13404083251953, -88.93917083740234, -68.74430084228516, -48.54943084716797, -28.35456085205078, -8.159690856933594, 12.035179138183594, 32.23004913330078, 52.42491912841797]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, 2.0, 2.0, 3.0, 6.0, 14.0, 7.0, 8.0, 16.0, 20.0, 19.0, 34.0, 22.0, 26.0, 31.0, 20.0, 28.0, 27.0, 36.0, 38.0, 34.0, 43.0, 35.0, 36.0, 25.0, 48.0, 28.0, 35.0, 47.0, 34.0, 28.0, 23.0, 31.0, 19.0, 24.0, 23.0, 22.0, 27.0, 20.0, 15.0, 10.0, 6.0, 7.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0], "bins": [-91.7229232788086, -88.89141082763672, -86.05990600585938, -83.2283935546875, -80.39688110351562, -77.56536865234375, -74.7338638305664, -71.90235137939453, -69.07084655761719, -66.23933410644531, -63.4078254699707, -60.576316833496094, -57.74480438232422, -54.91329574584961, -52.081787109375, -49.250274658203125, -46.41876220703125, -43.58725357055664, -40.755741119384766, -37.924232482910156, -35.09272003173828, -32.26121139526367, -29.429702758789062, -26.59819221496582, -23.766681671142578, -20.935171127319336, -18.103660583496094, -15.272151947021484, -12.440641403198242, -9.609130859375, -6.777622222900391, -3.9461116790771484, -1.1146087646484375, 1.7169013023376465, 4.5484113693237305, 7.379920959472656, 10.211431503295898, 13.04294204711914, 15.87445068359375, 18.705961227416992, 21.537471771240234, 24.368982315063477, 27.20049285888672, 30.032001495361328, 32.86351013183594, 35.69502258300781, 38.52653121948242, 41.35803985595703, 44.189552307128906, 47.021060943603516, 49.85257339477539, 52.68408203125, 55.515594482421875, 58.347103118896484, 61.178611755371094, 64.01012420654297, 66.84162902832031, 69.67314147949219, 72.50464630126953, 75.3361587524414, 78.16767120361328, 80.99917602539062, 83.8306884765625, 86.66220092773438, 89.49371337890625]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 5.0, 8.0, 10.0, 5.0, 10.0, 8.0, 12.0, 18.0, 18.0, 18.0, 25.0, 22.0, 31.0, 36.0, 38.0, 28.0, 49.0, 43.0, 45.0, 48.0, 49.0, 49.0, 43.0, 47.0, 39.0, 34.0, 44.0, 38.0, 27.0, 26.0, 19.0, 24.0, 21.0, 18.0, 9.0, 11.0, 7.0, 4.0, 6.0, 6.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.3046875, -9.007080078125, -8.70947265625, -8.411865234375, -8.1142578125, -7.816650390625, -7.51904296875, -7.221435546875, -6.923828125, -6.626220703125, -6.32861328125, -6.031005859375, -5.7333984375, -5.435791015625, -5.13818359375, -4.840576171875, -4.54296875, -4.245361328125, -3.94775390625, -3.650146484375, -3.3525390625, -3.054931640625, -2.75732421875, -2.459716796875, -2.162109375, -1.864501953125, -1.56689453125, -1.269287109375, -0.9716796875, -0.674072265625, -0.37646484375, -0.078857421875, 0.21875, 0.516357421875, 0.81396484375, 1.111572265625, 1.4091796875, 1.706787109375, 2.00439453125, 2.302001953125, 2.599609375, 2.897216796875, 3.19482421875, 3.492431640625, 3.7900390625, 4.087646484375, 4.38525390625, 4.682861328125, 4.98046875, 5.278076171875, 5.57568359375, 5.873291015625, 6.1708984375, 6.468505859375, 6.76611328125, 7.063720703125, 7.361328125, 7.658935546875, 7.95654296875, 8.254150390625, 8.5517578125, 8.849365234375, 9.14697265625, 9.444580078125, 9.7421875]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 5.0, 4.0, 6.0, 2.0, 7.0, 2.0, 7.0, 14.0, 14.0, 10.0, 21.0, 19.0, 18.0, 27.0, 35.0, 33.0, 47.0, 55.0, 112.0, 327.0, 1723.0, 26262.0, 2539522.0, 1606848.0, 17244.0, 1255.0, 245.0, 95.0, 58.0, 39.0, 32.0, 38.0, 33.0, 24.0, 14.0, 17.0, 13.0, 10.0, 9.0, 11.0, 8.0, 9.0, 3.0, 2.0, 5.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-51.46875, -49.80517578125, -48.1416015625, -46.47802734375, -44.814453125, -43.15087890625, -41.4873046875, -39.82373046875, -38.16015625, -36.49658203125, -34.8330078125, -33.16943359375, -31.505859375, -29.84228515625, -28.1787109375, -26.51513671875, -24.8515625, -23.18798828125, -21.5244140625, -19.86083984375, -18.197265625, -16.53369140625, -14.8701171875, -13.20654296875, -11.54296875, -9.87939453125, -8.2158203125, -6.55224609375, -4.888671875, -3.22509765625, -1.5615234375, 0.10205078125, 1.765625, 3.42919921875, 5.0927734375, 6.75634765625, 8.419921875, 10.08349609375, 11.7470703125, 13.41064453125, 15.07421875, 16.73779296875, 18.4013671875, 20.06494140625, 21.728515625, 23.39208984375, 25.0556640625, 26.71923828125, 28.3828125, 30.04638671875, 31.7099609375, 33.37353515625, 35.037109375, 36.70068359375, 38.3642578125, 40.02783203125, 41.69140625, 43.35498046875, 45.0185546875, 46.68212890625, 48.345703125, 50.00927734375, 51.6728515625, 53.33642578125, 55.0]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 9.0, 13.0, 23.0, 38.0, 56.0, 85.0, 129.0, 190.0, 329.0, 453.0, 635.0, 714.0, 474.0, 322.0, 233.0, 135.0, 79.0, 60.0, 40.0, 22.0, 15.0, 9.0, 9.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.71875, -36.46337890625, -35.2080078125, -33.95263671875, -32.697265625, -31.44189453125, -30.1865234375, -28.93115234375, -27.67578125, -26.42041015625, -25.1650390625, -23.90966796875, -22.654296875, -21.39892578125, -20.1435546875, -18.88818359375, -17.6328125, -16.37744140625, -15.1220703125, -13.86669921875, -12.611328125, -11.35595703125, -10.1005859375, -8.84521484375, -7.58984375, -6.33447265625, -5.0791015625, -3.82373046875, -2.568359375, -1.31298828125, -0.0576171875, 1.19775390625, 2.453125, 3.70849609375, 4.9638671875, 6.21923828125, 7.474609375, 8.72998046875, 9.9853515625, 11.24072265625, 12.49609375, 13.75146484375, 15.0068359375, 16.26220703125, 17.517578125, 18.77294921875, 20.0283203125, 21.28369140625, 22.5390625, 23.79443359375, 25.0498046875, 26.30517578125, 27.560546875, 28.81591796875, 30.0712890625, 31.32666015625, 32.58203125, 33.83740234375, 35.0927734375, 36.34814453125, 37.603515625, 38.85888671875, 40.1142578125, 41.36962890625, 42.625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 4.0, 16.0, 18.0, 29.0, 47.0, 71.0, 100.0, 186.0, 385.0, 1035.0, 97999.0, 4087978.0, 5239.0, 520.0, 273.0, 164.0, 88.0, 62.0, 32.0, 17.0, 12.0, 5.0, 6.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-190.25, -184.533203125, -178.81640625, -173.099609375, -167.3828125, -161.666015625, -155.94921875, -150.232421875, -144.515625, -138.798828125, -133.08203125, -127.365234375, -121.6484375, -115.931640625, -110.21484375, -104.498046875, -98.78125, -93.064453125, -87.34765625, -81.630859375, -75.9140625, -70.197265625, -64.48046875, -58.763671875, -53.046875, -47.330078125, -41.61328125, -35.896484375, -30.1796875, -24.462890625, -18.74609375, -13.029296875, -7.3125, -1.595703125, 4.12109375, 9.837890625, 15.5546875, 21.271484375, 26.98828125, 32.705078125, 38.421875, 44.138671875, 49.85546875, 55.572265625, 61.2890625, 67.005859375, 72.72265625, 78.439453125, 84.15625, 89.873046875, 95.58984375, 101.306640625, 107.0234375, 112.740234375, 118.45703125, 124.173828125, 129.890625, 135.607421875, 141.32421875, 147.041015625, 152.7578125, 158.474609375, 164.19140625, 169.908203125, 175.625]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 16.0, 85.0, 184.0, 298.0, 261.0, 109.0, 46.0, 11.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-399.3083190917969, -391.5912170410156, -383.87408447265625, -376.156982421875, -368.43988037109375, -360.7227478027344, -353.0056457519531, -345.2885437011719, -337.5714111328125, -329.85430908203125, -322.1371765136719, -314.4200744628906, -306.7029724121094, -298.98583984375, -291.26873779296875, -283.5516357421875, -275.83453369140625, -268.117431640625, -260.4002990722656, -252.68319702148438, -244.96607971191406, -237.2489776611328, -229.5318603515625, -221.81475830078125, -214.09762573242188, -206.38050842285156, -198.6634063720703, -190.9462890625, -183.2291717529297, -175.51206970214844, -167.79495239257812, -160.07785034179688, -152.3607177734375, -144.6436004638672, -136.92649841308594, -129.20938110351562, -121.49227142333984, -113.77516174316406, -106.05804443359375, -98.34093475341797, -90.62382507324219, -82.9067153930664, -75.1895980834961, -67.47248840332031, -59.75537872314453, -52.038265228271484, -44.32115173339844, -36.604042053222656, -28.886932373046875, -21.16982078552246, -13.45270824432373, -5.735595703125, 1.981515884399414, 9.698627471923828, 17.415740966796875, 25.132850646972656, 32.8499641418457, 40.56707763671875, 48.28418731689453, 56.00130081176758, 63.718414306640625, 71.4355239868164, 79.15263366699219, 86.8697509765625, 94.58686065673828]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 5.0, 6.0, 9.0, 8.0, 8.0, 16.0, 22.0, 22.0, 24.0, 29.0, 41.0, 45.0, 31.0, 46.0, 46.0, 46.0, 40.0, 34.0, 45.0, 50.0, 50.0, 43.0, 40.0, 39.0, 42.0, 31.0, 28.0, 27.0, 21.0, 21.0, 16.0, 16.0, 13.0, 18.0, 11.0, 4.0, 7.0, 5.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.91848754882812, -92.65423583984375, -89.38998413085938, -86.125732421875, -82.86148071289062, -79.59722900390625, -76.33297729492188, -73.0687255859375, -69.80447387695312, -66.54022216796875, -63.275970458984375, -60.01171875, -56.747467041015625, -53.48321533203125, -50.21895980834961, -46.954708099365234, -43.690452575683594, -40.42620086669922, -37.161949157714844, -33.89769744873047, -30.63344383239746, -27.369192123413086, -24.104938507080078, -20.840686798095703, -17.576435089111328, -14.312183380126953, -11.047930717468262, -7.78367805480957, -4.519426345825195, -1.2551746368408203, 2.0090789794921875, 5.2733306884765625, 8.537582397460938, 11.801834106445312, 15.066086769104004, 18.330339431762695, 21.59459114074707, 24.858842849731445, 28.123096466064453, 31.387348175048828, 34.6515998840332, 37.91585159301758, 41.18010330200195, 44.444358825683594, 47.70861053466797, 50.972862243652344, 54.23711395263672, 57.501365661621094, 60.76561737060547, 64.02986907958984, 67.29412078857422, 70.5583724975586, 73.82262420654297, 77.08687591552734, 80.35113525390625, 83.61538696289062, 86.879638671875, 90.14389038085938, 93.40814208984375, 96.67239379882812, 99.9366455078125, 103.20089721679688, 106.46514892578125, 109.72940063476562, 112.99365234375]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 9.0, 4.0, 5.0, 4.0, 6.0, 15.0, 5.0, 10.0, 12.0, 21.0, 26.0, 20.0, 37.0, 25.0, 35.0, 37.0, 42.0, 46.0, 36.0, 53.0, 42.0, 35.0, 38.0, 54.0, 37.0, 35.0, 31.0, 40.0, 37.0, 26.0, 25.0, 29.0, 21.0, 22.0, 20.0, 9.0, 8.0, 16.0, 5.0, 6.0, 8.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.046875, -8.758056640625, -8.46923828125, -8.180419921875, -7.8916015625, -7.602783203125, -7.31396484375, -7.025146484375, -6.736328125, -6.447509765625, -6.15869140625, -5.869873046875, -5.5810546875, -5.292236328125, -5.00341796875, -4.714599609375, -4.42578125, -4.136962890625, -3.84814453125, -3.559326171875, -3.2705078125, -2.981689453125, -2.69287109375, -2.404052734375, -2.115234375, -1.826416015625, -1.53759765625, -1.248779296875, -0.9599609375, -0.671142578125, -0.38232421875, -0.093505859375, 0.1953125, 0.484130859375, 0.77294921875, 1.061767578125, 1.3505859375, 1.639404296875, 1.92822265625, 2.217041015625, 2.505859375, 2.794677734375, 3.08349609375, 3.372314453125, 3.6611328125, 3.949951171875, 4.23876953125, 4.527587890625, 4.81640625, 5.105224609375, 5.39404296875, 5.682861328125, 5.9716796875, 6.260498046875, 6.54931640625, 6.838134765625, 7.126953125, 7.415771484375, 7.70458984375, 7.993408203125, 8.2822265625, 8.571044921875, 8.85986328125, 9.148681640625, 9.4375]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 4.0, 4.0, 10.0, 15.0, 12.0, 22.0, 26.0, 56.0, 78.0, 113.0, 171.0, 263.0, 372.0, 557.0, 791.0, 1244.0, 1735.0, 2689.0, 4046.0, 5964.0, 8652.0, 12841.0, 19322.0, 28485.0, 42159.0, 60807.0, 89174.0, 135686.0, 187533.0, 146340.0, 95435.0, 65242.0, 44524.0, 30752.0, 20937.0, 13933.0, 9385.0, 6229.0, 4196.0, 2847.0, 1865.0, 1291.0, 878.0, 610.0, 418.0, 252.0, 198.0, 132.0, 88.0, 57.0, 51.0, 30.0, 24.0, 9.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.91845703125, -0.8893814086914062, -0.8603057861328125, -0.8312301635742188, -0.802154541015625, -0.7730789184570312, -0.7440032958984375, -0.7149276733398438, -0.68585205078125, -0.6567764282226562, -0.6277008056640625, -0.5986251831054688, -0.569549560546875, -0.5404739379882812, -0.5113983154296875, -0.48232269287109375, -0.4532470703125, -0.42417144775390625, -0.3950958251953125, -0.36602020263671875, -0.336944580078125, -0.30786895751953125, -0.2787933349609375, -0.24971771240234375, -0.22064208984375, -0.19156646728515625, -0.1624908447265625, -0.13341522216796875, -0.104339599609375, -0.07526397705078125, -0.0461883544921875, -0.01711273193359375, 0.011962890625, 0.04103851318359375, 0.0701141357421875, 0.09918975830078125, 0.128265380859375, 0.15734100341796875, 0.1864166259765625, 0.21549224853515625, 0.24456787109375, 0.27364349365234375, 0.3027191162109375, 0.33179473876953125, 0.360870361328125, 0.38994598388671875, 0.4190216064453125, 0.44809722900390625, 0.4771728515625, 0.5062484741210938, 0.5353240966796875, 0.5643997192382812, 0.593475341796875, 0.6225509643554688, 0.6516265869140625, 0.6807022094726562, 0.70977783203125, 0.7388534545898438, 0.7679290771484375, 0.7970046997070312, 0.826080322265625, 0.8551559448242188, 0.8842315673828125, 0.9133071899414062, 0.9423828125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 7.0, 9.0, 5.0, 7.0, 13.0, 9.0, 12.0, 20.0, 18.0, 15.0, 25.0, 30.0, 23.0, 24.0, 29.0, 31.0, 34.0, 33.0, 37.0, 44.0, 41.0, 1059.0, 48.0, 47.0, 37.0, 42.0, 33.0, 38.0, 32.0, 23.0, 28.0, 20.0, 23.0, 16.0, 17.0, 12.0, 13.0, 9.0, 8.0, 11.0, 11.0, 6.0, 11.0, 4.0, 5.0, 5.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0], "bins": [-5.0703125, -4.90484619140625, -4.7393798828125, -4.57391357421875, -4.408447265625, -4.24298095703125, -4.0775146484375, -3.91204833984375, -3.74658203125, -3.58111572265625, -3.4156494140625, -3.25018310546875, -3.084716796875, -2.91925048828125, -2.7537841796875, -2.58831787109375, -2.4228515625, -2.25738525390625, -2.0919189453125, -1.92645263671875, -1.760986328125, -1.59552001953125, -1.4300537109375, -1.26458740234375, -1.09912109375, -0.93365478515625, -0.7681884765625, -0.60272216796875, -0.437255859375, -0.27178955078125, -0.1063232421875, 0.05914306640625, 0.224609375, 0.39007568359375, 0.5555419921875, 0.72100830078125, 0.886474609375, 1.05194091796875, 1.2174072265625, 1.38287353515625, 1.54833984375, 1.71380615234375, 1.8792724609375, 2.04473876953125, 2.210205078125, 2.37567138671875, 2.5411376953125, 2.70660400390625, 2.8720703125, 3.03753662109375, 3.2030029296875, 3.36846923828125, 3.533935546875, 3.69940185546875, 3.8648681640625, 4.03033447265625, 4.19580078125, 4.36126708984375, 4.5267333984375, 4.69219970703125, 4.857666015625, 5.02313232421875, 5.1885986328125, 5.35406494140625, 5.51953125]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 6.0, 2.0, 11.0, 9.0, 13.0, 28.0, 45.0, 74.0, 108.0, 148.0, 192.0, 283.0, 429.0, 620.0, 866.0, 1316.0, 1999.0, 3087.0, 4616.0, 7003.0, 10408.0, 16379.0, 24687.0, 37591.0, 58675.0, 92622.0, 148704.0, 1260285.0, 155088.0, 96466.0, 61335.0, 39197.0, 25512.0, 16896.0, 10818.0, 7292.0, 4756.0, 3199.0, 2173.0, 1408.0, 963.0, 600.0, 398.0, 273.0, 183.0, 132.0, 80.0, 56.0, 39.0, 29.0, 19.0, 9.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0], "bins": [-0.99755859375, -0.9678192138671875, -0.938079833984375, -0.9083404541015625, -0.87860107421875, -0.8488616943359375, -0.819122314453125, -0.7893829345703125, -0.7596435546875, -0.7299041748046875, -0.700164794921875, -0.6704254150390625, -0.64068603515625, -0.6109466552734375, -0.581207275390625, -0.5514678955078125, -0.521728515625, -0.4919891357421875, -0.462249755859375, -0.4325103759765625, -0.40277099609375, -0.3730316162109375, -0.343292236328125, -0.3135528564453125, -0.2838134765625, -0.2540740966796875, -0.224334716796875, -0.1945953369140625, -0.16485595703125, -0.1351165771484375, -0.105377197265625, -0.0756378173828125, -0.0458984375, -0.0161590576171875, 0.013580322265625, 0.0433197021484375, 0.07305908203125, 0.1027984619140625, 0.132537841796875, 0.1622772216796875, 0.1920166015625, 0.2217559814453125, 0.251495361328125, 0.2812347412109375, 0.31097412109375, 0.3407135009765625, 0.370452880859375, 0.4001922607421875, 0.429931640625, 0.4596710205078125, 0.489410400390625, 0.5191497802734375, 0.54888916015625, 0.5786285400390625, 0.608367919921875, 0.6381072998046875, 0.6678466796875, 0.6975860595703125, 0.727325439453125, 0.7570648193359375, 0.78680419921875, 0.8165435791015625, 0.846282958984375, 0.8760223388671875, 0.90576171875]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 5.0, 4.0, 8.0, 10.0, 14.0, 22.0, 24.0, 30.0, 31.0, 53.0, 42.0, 59.0, 65.0, 64.0, 75.0, 66.0, 66.0, 59.0, 65.0, 45.0, 39.0, 34.0, 26.0, 25.0, 18.0, 21.0, 8.0, 6.0, 1.0, 4.0, 5.0, 6.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.002208709716796875, -0.002143755555152893, -0.002078801393508911, -0.002013847231864929, -0.0019488930702209473, -0.0018839389085769653, -0.0018189847469329834, -0.0017540305852890015, -0.0016890764236450195, -0.0016241222620010376, -0.0015591681003570557, -0.0014942139387130737, -0.0014292597770690918, -0.0013643056154251099, -0.001299351453781128, -0.001234397292137146, -0.001169443130493164, -0.0011044889688491821, -0.0010395348072052002, -0.0009745806455612183, -0.0009096264839172363, -0.0008446723222732544, -0.0007797181606292725, -0.0007147639989852905, -0.0006498098373413086, -0.0005848556756973267, -0.0005199015140533447, -0.0004549473524093628, -0.00038999319076538086, -0.0003250390291213989, -0.000260084867477417, -0.00019513070583343506, -0.00013017654418945312, -6.522238254547119e-05, -2.682209014892578e-07, 6.468594074249268e-05, 0.0001296401023864746, 0.00019459426403045654, 0.0002595484256744385, 0.0003245025873184204, 0.00038945674896240234, 0.0004544109106063843, 0.0005193650722503662, 0.0005843192338943481, 0.0006492733955383301, 0.000714227557182312, 0.0007791817188262939, 0.0008441358804702759, 0.0009090900421142578, 0.0009740442037582397, 0.0010389983654022217, 0.0011039525270462036, 0.0011689066886901855, 0.0012338608503341675, 0.0012988150119781494, 0.0013637691736221313, 0.0014287233352661133, 0.0014936774969100952, 0.0015586316585540771, 0.001623585820198059, 0.001688539981842041, 0.001753494143486023, 0.0018184483051300049, 0.0018834024667739868, 0.0019483566284179688]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 6.0, 0.0, 2.0, 6.0, 3.0, 11.0, 15.0, 14.0, 23.0, 27.0, 37.0, 51.0, 53.0, 90.0, 142.0, 236.0, 403.0, 1101.0, 969208.0, 75417.0, 758.0, 347.0, 182.0, 124.0, 80.0, 52.0, 44.0, 33.0, 18.0, 25.0, 15.0, 10.0, 7.0, 3.0, 2.0, 7.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0511474609375, -0.049680233001708984, -0.04821300506591797, -0.04674577713012695, -0.04527854919433594, -0.04381132125854492, -0.042344093322753906, -0.04087686538696289, -0.039409637451171875, -0.03794240951538086, -0.036475181579589844, -0.03500795364379883, -0.03354072570800781, -0.0320734977722168, -0.03060626983642578, -0.029139041900634766, -0.02767181396484375, -0.026204586029052734, -0.02473735809326172, -0.023270130157470703, -0.021802902221679688, -0.020335674285888672, -0.018868446350097656, -0.01740121841430664, -0.015933990478515625, -0.01446676254272461, -0.012999534606933594, -0.011532306671142578, -0.010065078735351562, -0.008597850799560547, -0.007130622863769531, -0.005663394927978516, -0.0041961669921875, -0.0027289390563964844, -0.0012617111206054688, 0.00020551681518554688, 0.0016727447509765625, 0.003139972686767578, 0.004607200622558594, 0.006074428558349609, 0.007541656494140625, 0.00900888442993164, 0.010476112365722656, 0.011943340301513672, 0.013410568237304688, 0.014877796173095703, 0.01634502410888672, 0.017812252044677734, 0.01927947998046875, 0.020746707916259766, 0.02221393585205078, 0.023681163787841797, 0.025148391723632812, 0.026615619659423828, 0.028082847595214844, 0.02955007553100586, 0.031017303466796875, 0.03248453140258789, 0.033951759338378906, 0.03541898727416992, 0.03688621520996094, 0.03835344314575195, 0.03982067108154297, 0.041287899017333984, 0.042755126953125]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0, 318.0, 515.0, 125.0, 8.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0052827103063464165, -0.005151478108018637, -0.005020245909690857, -0.00488901324570179, -0.00475778104737401, -0.00462654884904623, -0.0044953166507184505, -0.004364084452390671, -0.004232851788401604, -0.004101619590073824, -0.003970387391746044, -0.0038391549605876207, -0.0037079225294291973, -0.0035766903311014175, -0.0034454581327736378, -0.003314225934445858, -0.0031829937361180782, -0.0030517615377902985, -0.002920529106631875, -0.0027892969083040953, -0.002658064477145672, -0.002526832278817892, -0.0023956000804901123, -0.0022643678821623325, -0.002133135451003909, -0.0020019032526761293, -0.001870670821517706, -0.0017394386231899261, -0.0016082063084468246, -0.001476973993703723, -0.0013457417953759432, -0.0012145094806328416, -0.0010832776315510273, -0.0009520453168079257, -0.000820813060272485, -0.0006895808037370443, -0.0005583484889939427, -0.00042711617425084114, -0.00029588391771540046, -0.00016465166117995977, -3.341934643685818e-05, 9.781293920241296e-05, 0.0002290452248416841, 0.00036027751048095524, 0.0004915097961202264, 0.000622742110863328, 0.0007539743673987687, 0.0008852066239342093, 0.001016438938677311, 0.0011476712534204125, 0.0012789035681635141, 0.001410135766491294, 0.0015413680812343955, 0.001672600395977497, 0.0018038325943052769, 0.0019350649090483785, 0.00206629722379148, 0.00219752942211926, 0.0023287618532776833, 0.002459994051605463, 0.002591226249933243, 0.0027224586810916662, 0.002853690879419446, 0.0029849233105778694, 0.003116155508905649]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 5.0, 4.0, 5.0, 3.0, 4.0, 11.0, 1.0, 9.0, 10.0, 15.0, 20.0, 19.0, 30.0, 14.0, 17.0, 27.0, 17.0, 22.0, 27.0, 38.0, 35.0, 32.0, 30.0, 36.0, 43.0, 38.0, 30.0, 34.0, 30.0, 38.0, 43.0, 31.0, 22.0, 22.0, 30.0, 30.0, 14.0, 20.0, 23.0, 22.0, 12.0, 21.0, 13.0, 10.0, 6.0, 7.0, 7.0, 4.0, 6.0, 2.0, 5.0, 5.0, 1.0, 2.0, 0.0, 4.0, 4.0], "bins": [-0.0008194446563720703, -0.000794626772403717, -0.0007698088884353638, -0.0007449910044670105, -0.0007201731204986572, -0.000695355236530304, -0.0006705373525619507, -0.0006457194685935974, -0.0006209015846252441, -0.0005960837006568909, -0.0005712658166885376, -0.0005464479327201843, -0.0005216300487518311, -0.0004968121647834778, -0.0004719942808151245, -0.00044717639684677124, -0.00042235851287841797, -0.0003975406289100647, -0.0003727227449417114, -0.00034790486097335815, -0.0003230869770050049, -0.0002982690930366516, -0.00027345120906829834, -0.00024863332509994507, -0.0002238154411315918, -0.00019899755716323853, -0.00017417967319488525, -0.00014936178922653198, -0.0001245439052581787, -9.972602128982544e-05, -7.490813732147217e-05, -5.0090253353118896e-05, -2.5272369384765625e-05, -4.544854164123535e-07, 2.4363398551940918e-05, 4.918128252029419e-05, 7.399916648864746e-05, 9.881705045700073e-05, 0.000123634934425354, 0.00014845281839370728, 0.00017327070236206055, 0.00019808858633041382, 0.0002229064702987671, 0.00024772435426712036, 0.00027254223823547363, 0.0002973601222038269, 0.0003221780061721802, 0.00034699589014053345, 0.0003718137741088867, 0.00039663165807724, 0.00042144954204559326, 0.00044626742601394653, 0.0004710853099822998, 0.0004959031939506531, 0.0005207210779190063, 0.0005455389618873596, 0.0005703568458557129, 0.0005951747298240662, 0.0006199926137924194, 0.0006448104977607727, 0.000669628381729126, 0.0006944462656974792, 0.0007192641496658325, 0.0007440820336341858, 0.0007688999176025391]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 9.0, 4.0, 5.0, 4.0, 6.0, 15.0, 5.0, 10.0, 12.0, 21.0, 26.0, 20.0, 37.0, 25.0, 35.0, 37.0, 42.0, 46.0, 36.0, 53.0, 42.0, 35.0, 38.0, 54.0, 37.0, 35.0, 31.0, 40.0, 37.0, 26.0, 25.0, 29.0, 21.0, 22.0, 20.0, 9.0, 8.0, 16.0, 5.0, 6.0, 8.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.046875, -8.758056640625, -8.46923828125, -8.180419921875, -7.8916015625, -7.602783203125, -7.31396484375, -7.025146484375, -6.736328125, -6.447509765625, -6.15869140625, -5.869873046875, -5.5810546875, -5.292236328125, -5.00341796875, -4.714599609375, -4.42578125, -4.136962890625, -3.84814453125, -3.559326171875, -3.2705078125, -2.981689453125, -2.69287109375, -2.404052734375, -2.115234375, -1.826416015625, -1.53759765625, -1.248779296875, -0.9599609375, -0.671142578125, -0.38232421875, -0.093505859375, 0.1953125, 0.484130859375, 0.77294921875, 1.061767578125, 1.3505859375, 1.639404296875, 1.92822265625, 2.217041015625, 2.505859375, 2.794677734375, 3.08349609375, 3.372314453125, 3.6611328125, 3.949951171875, 4.23876953125, 4.527587890625, 4.81640625, 5.105224609375, 5.39404296875, 5.682861328125, 5.9716796875, 6.260498046875, 6.54931640625, 6.838134765625, 7.126953125, 7.415771484375, 7.70458984375, 7.993408203125, 8.2822265625, 8.571044921875, 8.85986328125, 9.148681640625, 9.4375]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 3.0, 9.0, 9.0, 13.0, 17.0, 23.0, 36.0, 50.0, 83.0, 109.0, 170.0, 214.0, 356.0, 556.0, 985.0, 1795.0, 3604.0, 7959.0, 24015.0, 106702.0, 585630.0, 248270.0, 44527.0, 12522.0, 5034.0, 2490.0, 1313.0, 709.0, 467.0, 302.0, 185.0, 117.0, 88.0, 65.0, 36.0, 30.0, 23.0, 13.0, 11.0, 5.0, 5.0, 3.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.7421875, -7.4525146484375, -7.162841796875, -6.8731689453125, -6.58349609375, -6.2938232421875, -6.004150390625, -5.7144775390625, -5.4248046875, -5.1351318359375, -4.845458984375, -4.5557861328125, -4.26611328125, -3.9764404296875, -3.686767578125, -3.3970947265625, -3.107421875, -2.8177490234375, -2.528076171875, -2.2384033203125, -1.94873046875, -1.6590576171875, -1.369384765625, -1.0797119140625, -0.7900390625, -0.5003662109375, -0.210693359375, 0.0789794921875, 0.36865234375, 0.6583251953125, 0.947998046875, 1.2376708984375, 1.52734375, 1.8170166015625, 2.106689453125, 2.3963623046875, 2.68603515625, 2.9757080078125, 3.265380859375, 3.5550537109375, 3.8447265625, 4.1343994140625, 4.424072265625, 4.7137451171875, 5.00341796875, 5.2930908203125, 5.582763671875, 5.8724365234375, 6.162109375, 6.4517822265625, 6.741455078125, 7.0311279296875, 7.32080078125, 7.6104736328125, 7.900146484375, 8.1898193359375, 8.4794921875, 8.7691650390625, 9.058837890625, 9.3485107421875, 9.63818359375, 9.9278564453125, 10.217529296875, 10.5072021484375, 10.796875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 5.0, 7.0, 9.0, 10.0, 10.0, 12.0, 16.0, 12.0, 16.0, 22.0, 25.0, 26.0, 27.0, 38.0, 38.0, 30.0, 30.0, 44.0, 70.0, 101.0, 1696.0, 276.0, 94.0, 67.0, 47.0, 41.0, 41.0, 29.0, 32.0, 35.0, 18.0, 13.0, 21.0, 9.0, 9.0, 14.0, 9.0, 9.0, 10.0, 6.0, 4.0, 4.0, 2.0, 4.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-32.40625, -31.418212890625, -30.43017578125, -29.442138671875, -28.4541015625, -27.466064453125, -26.47802734375, -25.489990234375, -24.501953125, -23.513916015625, -22.52587890625, -21.537841796875, -20.5498046875, -19.561767578125, -18.57373046875, -17.585693359375, -16.59765625, -15.609619140625, -14.62158203125, -13.633544921875, -12.6455078125, -11.657470703125, -10.66943359375, -9.681396484375, -8.693359375, -7.705322265625, -6.71728515625, -5.729248046875, -4.7412109375, -3.753173828125, -2.76513671875, -1.777099609375, -0.7890625, 0.198974609375, 1.18701171875, 2.175048828125, 3.1630859375, 4.151123046875, 5.13916015625, 6.127197265625, 7.115234375, 8.103271484375, 9.09130859375, 10.079345703125, 11.0673828125, 12.055419921875, 13.04345703125, 14.031494140625, 15.01953125, 16.007568359375, 16.99560546875, 17.983642578125, 18.9716796875, 19.959716796875, 20.94775390625, 21.935791015625, 22.923828125, 23.911865234375, 24.89990234375, 25.887939453125, 26.8759765625, 27.864013671875, 28.85205078125, 29.840087890625, 30.828125]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 4.0, 3.0, 5.0, 6.0, 9.0, 7.0, 14.0, 13.0, 11.0, 13.0, 23.0, 17.0, 31.0, 41.0, 56.0, 66.0, 109.0, 161.0, 269.0, 615.0, 2445.0, 42182.0, 3069248.0, 27000.0, 1894.0, 595.0, 243.0, 158.0, 117.0, 77.0, 51.0, 37.0, 43.0, 24.0, 21.0, 12.0, 14.0, 13.0, 14.0, 6.0, 11.0, 7.0, 6.0, 4.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-51.21875, -49.59033203125, -47.9619140625, -46.33349609375, -44.705078125, -43.07666015625, -41.4482421875, -39.81982421875, -38.19140625, -36.56298828125, -34.9345703125, -33.30615234375, -31.677734375, -30.04931640625, -28.4208984375, -26.79248046875, -25.1640625, -23.53564453125, -21.9072265625, -20.27880859375, -18.650390625, -17.02197265625, -15.3935546875, -13.76513671875, -12.13671875, -10.50830078125, -8.8798828125, -7.25146484375, -5.623046875, -3.99462890625, -2.3662109375, -0.73779296875, 0.890625, 2.51904296875, 4.1474609375, 5.77587890625, 7.404296875, 9.03271484375, 10.6611328125, 12.28955078125, 13.91796875, 15.54638671875, 17.1748046875, 18.80322265625, 20.431640625, 22.06005859375, 23.6884765625, 25.31689453125, 26.9453125, 28.57373046875, 30.2021484375, 31.83056640625, 33.458984375, 35.08740234375, 36.7158203125, 38.34423828125, 39.97265625, 41.60107421875, 43.2294921875, 44.85791015625, 46.486328125, 48.11474609375, 49.7431640625, 51.37158203125, 53.0]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 183.0, 829.0, 5.0], "bins": [-734.0596313476562, -722.255126953125, -710.4505615234375, -698.6460571289062, -686.841552734375, -675.0369873046875, -663.2324829101562, -651.4279174804688, -639.6234130859375, -627.8189086914062, -616.0143432617188, -604.2098388671875, -592.4052734375, -580.6007690429688, -568.7962646484375, -556.99169921875, -545.1871948242188, -533.3826904296875, -521.578125, -509.77362060546875, -497.9690856933594, -486.16455078125, -474.36004638671875, -462.5555114746094, -450.7509765625, -438.9464416503906, -427.14190673828125, -415.33740234375, -403.5328674316406, -391.72833251953125, -379.923828125, -368.1192932128906, -356.3147277832031, -344.51019287109375, -332.7056884765625, -320.9011535644531, -309.09661865234375, -297.2920837402344, -285.487548828125, -273.68304443359375, -261.8785095214844, -250.073974609375, -238.2694549560547, -226.46493530273438, -214.660400390625, -202.85586547851562, -191.0513458251953, -179.246826171875, -167.44229125976562, -155.63775634765625, -143.83323669433594, -132.02871704101562, -120.22418212890625, -108.4196548461914, -96.61512756347656, -84.81060028076172, -73.0060806274414, -61.20155334472656, -49.39702606201172, -37.592498779296875, -25.78797149658203, -13.983444213867188, -2.1789169311523438, 9.6256103515625, 21.430137634277344]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 7.0, 8.0, 6.0, 7.0, 13.0, 9.0, 12.0, 18.0, 18.0, 16.0, 19.0, 18.0, 20.0, 23.0, 30.0, 32.0, 26.0, 40.0, 34.0, 33.0, 36.0, 39.0, 47.0, 44.0, 34.0, 43.0, 41.0, 36.0, 40.0, 30.0, 25.0, 29.0, 23.0, 18.0, 24.0, 14.0, 11.0, 10.0, 15.0, 15.0, 10.0, 12.0, 7.0, 6.0, 4.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-100.83224487304688, -97.67776489257812, -94.52328491210938, -91.36880493164062, -88.21432495117188, -85.05984497070312, -81.90536499023438, -78.7508773803711, -75.59639739990234, -72.4419174194336, -69.28743743896484, -66.1329574584961, -62.97847366333008, -59.82399368286133, -56.66951370239258, -53.51502990722656, -50.36055374145508, -47.20607376098633, -44.05159378051758, -40.89710998535156, -37.74263000488281, -34.58815002441406, -31.433670043945312, -28.27918815612793, -25.12470817565918, -21.97022819519043, -18.815746307373047, -15.661266326904297, -12.50678539276123, -9.352304458618164, -6.197824478149414, -3.0433425903320312, 0.11113739013671875, 3.265618085861206, 6.420098781585693, 9.574579238891602, 12.729060173034668, 15.883541107177734, 19.038021087646484, 22.192502975463867, 25.346982955932617, 28.501462936401367, 31.65594482421875, 34.8104248046875, 37.96490478515625, 41.119384765625, 44.27386474609375, 47.428348541259766, 50.582828521728516, 53.737308502197266, 56.891788482666016, 60.04627227783203, 63.20075225830078, 66.35523223876953, 69.50971221923828, 72.66419219970703, 75.81867218017578, 78.97315216064453, 82.12763214111328, 85.28211212158203, 88.43659210205078, 91.59107971191406, 94.74555969238281, 97.90003967285156, 101.05451965332031]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 6.0, 6.0, 8.0, 4.0, 0.0, 9.0, 9.0, 10.0, 12.0, 14.0, 20.0, 21.0, 23.0, 25.0, 28.0, 35.0, 38.0, 45.0, 43.0, 44.0, 44.0, 46.0, 32.0, 38.0, 47.0, 31.0, 44.0, 42.0, 30.0, 34.0, 28.0, 20.0, 30.0, 35.0, 18.0, 16.0, 15.0, 10.0, 11.0, 9.0, 7.0, 2.0, 6.0, 5.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.1640625, -8.873046875, -8.58203125, -8.291015625, -8.0, -7.708984375, -7.41796875, -7.126953125, -6.8359375, -6.544921875, -6.25390625, -5.962890625, -5.671875, -5.380859375, -5.08984375, -4.798828125, -4.5078125, -4.216796875, -3.92578125, -3.634765625, -3.34375, -3.052734375, -2.76171875, -2.470703125, -2.1796875, -1.888671875, -1.59765625, -1.306640625, -1.015625, -0.724609375, -0.43359375, -0.142578125, 0.1484375, 0.439453125, 0.73046875, 1.021484375, 1.3125, 1.603515625, 1.89453125, 2.185546875, 2.4765625, 2.767578125, 3.05859375, 3.349609375, 3.640625, 3.931640625, 4.22265625, 4.513671875, 4.8046875, 5.095703125, 5.38671875, 5.677734375, 5.96875, 6.259765625, 6.55078125, 6.841796875, 7.1328125, 7.423828125, 7.71484375, 8.005859375, 8.296875, 8.587890625, 8.87890625, 9.169921875, 9.4609375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 8.0, 3.0, 5.0, 11.0, 10.0, 11.0, 15.0, 24.0, 34.0, 53.0, 61.0, 85.0, 144.0, 264.0, 566.0, 1344.0, 3917.0, 13828.0, 64074.0, 510883.0, 2425170.0, 1022268.0, 120576.0, 22006.0, 5667.0, 1744.0, 669.0, 291.0, 169.0, 110.0, 71.0, 51.0, 39.0, 26.0, 23.0, 19.0, 11.0, 8.0, 6.0, 6.0, 5.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-19.234375, -18.5859375, -17.9375, -17.2890625, -16.640625, -15.9921875, -15.34375, -14.6953125, -14.046875, -13.3984375, -12.75, -12.1015625, -11.453125, -10.8046875, -10.15625, -9.5078125, -8.859375, -8.2109375, -7.5625, -6.9140625, -6.265625, -5.6171875, -4.96875, -4.3203125, -3.671875, -3.0234375, -2.375, -1.7265625, -1.078125, -0.4296875, 0.21875, 0.8671875, 1.515625, 2.1640625, 2.8125, 3.4609375, 4.109375, 4.7578125, 5.40625, 6.0546875, 6.703125, 7.3515625, 8.0, 8.6484375, 9.296875, 9.9453125, 10.59375, 11.2421875, 11.890625, 12.5390625, 13.1875, 13.8359375, 14.484375, 15.1328125, 15.78125, 16.4296875, 17.078125, 17.7265625, 18.375, 19.0234375, 19.671875, 20.3203125, 20.96875, 21.6171875, 22.265625]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 3.0, 8.0, 3.0, 9.0, 12.0, 22.0, 27.0, 33.0, 53.0, 67.0, 88.0, 137.0, 166.0, 264.0, 321.0, 444.0, 539.0, 507.0, 373.0, 287.0, 192.0, 147.0, 121.0, 64.0, 55.0, 35.0, 25.0, 20.0, 18.0, 8.0, 9.0, 6.0, 5.0, 7.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-34.0625, -33.134765625, -32.20703125, -31.279296875, -30.3515625, -29.423828125, -28.49609375, -27.568359375, -26.640625, -25.712890625, -24.78515625, -23.857421875, -22.9296875, -22.001953125, -21.07421875, -20.146484375, -19.21875, -18.291015625, -17.36328125, -16.435546875, -15.5078125, -14.580078125, -13.65234375, -12.724609375, -11.796875, -10.869140625, -9.94140625, -9.013671875, -8.0859375, -7.158203125, -6.23046875, -5.302734375, -4.375, -3.447265625, -2.51953125, -1.591796875, -0.6640625, 0.263671875, 1.19140625, 2.119140625, 3.046875, 3.974609375, 4.90234375, 5.830078125, 6.7578125, 7.685546875, 8.61328125, 9.541015625, 10.46875, 11.396484375, 12.32421875, 13.251953125, 14.1796875, 15.107421875, 16.03515625, 16.962890625, 17.890625, 18.818359375, 19.74609375, 20.673828125, 21.6015625, 22.529296875, 23.45703125, 24.384765625, 25.3125]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 6.0, 7.0, 12.0, 6.0, 20.0, 24.0, 25.0, 49.0, 56.0, 96.0, 135.0, 224.0, 515.0, 2631.0, 314692.0, 3863149.0, 10883.0, 921.0, 295.0, 191.0, 102.0, 75.0, 46.0, 42.0, 22.0, 23.0, 12.0, 12.0, 12.0, 0.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-123.0, -118.7890625, -114.578125, -110.3671875, -106.15625, -101.9453125, -97.734375, -93.5234375, -89.3125, -85.1015625, -80.890625, -76.6796875, -72.46875, -68.2578125, -64.046875, -59.8359375, -55.625, -51.4140625, -47.203125, -42.9921875, -38.78125, -34.5703125, -30.359375, -26.1484375, -21.9375, -17.7265625, -13.515625, -9.3046875, -5.09375, -0.8828125, 3.328125, 7.5390625, 11.75, 15.9609375, 20.171875, 24.3828125, 28.59375, 32.8046875, 37.015625, 41.2265625, 45.4375, 49.6484375, 53.859375, 58.0703125, 62.28125, 66.4921875, 70.703125, 74.9140625, 79.125, 83.3359375, 87.546875, 91.7578125, 95.96875, 100.1796875, 104.390625, 108.6015625, 112.8125, 117.0234375, 121.234375, 125.4453125, 129.65625, 133.8671875, 138.078125, 142.2890625, 146.5]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 48.0, 192.0, 402.0, 283.0, 74.0, 11.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-225.11192321777344, -213.69786071777344, -202.28379821777344, -190.8697509765625, -179.4556884765625, -168.0416259765625, -156.6275634765625, -145.2135009765625, -133.7994384765625, -122.3853759765625, -110.9713134765625, -99.55725860595703, -88.14319610595703, -76.72913360595703, -65.31507873535156, -53.90101623535156, -42.48695373535156, -31.072893142700195, -19.658832550048828, -8.244773864746094, 3.1692886352539062, 14.583351135253906, 25.997406005859375, 37.411468505859375, 48.825531005859375, 60.239593505859375, 71.65365600585938, 83.06771087646484, 94.48177337646484, 105.89583587646484, 117.30989074707031, 128.7239532470703, 140.13803100585938, 151.55209350585938, 162.96615600585938, 174.38021850585938, 185.79428100585938, 197.20834350585938, 208.6223907470703, 220.0364532470703, 231.4505157470703, 242.8645782470703, 254.2786407470703, 265.69268798828125, 277.10675048828125, 288.52081298828125, 299.93487548828125, 311.34893798828125, 322.76300048828125, 334.17706298828125, 345.59112548828125, 357.00518798828125, 368.41925048828125, 379.83331298828125, 391.24737548828125, 402.66143798828125, 414.07550048828125, 425.48956298828125, 436.90362548828125, 448.31768798828125, 459.73175048828125, 471.14581298828125, 482.55987548828125, 493.97393798828125, 505.3879699707031]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 10.0, 6.0, 4.0, 8.0, 12.0, 10.0, 20.0, 13.0, 11.0, 16.0, 16.0, 21.0, 30.0, 26.0, 26.0, 23.0, 33.0, 30.0, 25.0, 30.0, 39.0, 40.0, 34.0, 40.0, 36.0, 34.0, 37.0, 35.0, 36.0, 29.0, 25.0, 27.0, 23.0, 24.0, 23.0, 27.0, 14.0, 11.0, 15.0, 11.0, 11.0, 9.0, 9.0, 11.0, 7.0, 6.0, 5.0, 5.0, 2.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-79.11333465576172, -76.68402099609375, -74.25470733642578, -71.82539367675781, -69.39608001708984, -66.96676635742188, -64.5374526977539, -62.1081428527832, -59.678829193115234, -57.249515533447266, -54.8202018737793, -52.39088821411133, -49.961578369140625, -47.532264709472656, -45.10295104980469, -42.67363739013672, -40.24432373046875, -37.81501007080078, -35.38569641113281, -32.956382751464844, -30.527070999145508, -28.09775733947754, -25.668445587158203, -23.239131927490234, -20.809818267822266, -18.380504608154297, -15.951191902160645, -13.521879196166992, -11.092565536499023, -8.663251876831055, -6.233939170837402, -3.80462646484375, -1.3753204345703125, 1.053992748260498, 3.4833059310913086, 5.912619113922119, 8.34193229675293, 10.771245956420898, 13.20055866241455, 15.629871368408203, 18.059185028076172, 20.48849868774414, 22.91781234741211, 25.347124099731445, 27.776437759399414, 30.205751419067383, 32.63506317138672, 35.06437683105469, 37.493690490722656, 39.923004150390625, 42.352317810058594, 44.78163146972656, 47.21094512939453, 49.6402587890625, 52.0695686340332, 54.49888229370117, 56.92819595336914, 59.35750961303711, 61.78682327270508, 64.21613311767578, 66.64544677734375, 69.07476043701172, 71.50407409667969, 73.93338775634766, 76.36270141601562]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 5.0, 3.0, 4.0, 6.0, 7.0, 8.0, 12.0, 11.0, 17.0, 22.0, 12.0, 12.0, 22.0, 27.0, 37.0, 32.0, 32.0, 47.0, 25.0, 45.0, 44.0, 47.0, 37.0, 45.0, 32.0, 40.0, 41.0, 31.0, 40.0, 34.0, 30.0, 34.0, 20.0, 14.0, 20.0, 32.0, 16.0, 15.0, 9.0, 6.0, 6.0, 6.0, 7.0, 6.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5546875, -9.2618408203125, -8.968994140625, -8.6761474609375, -8.38330078125, -8.0904541015625, -7.797607421875, -7.5047607421875, -7.2119140625, -6.9190673828125, -6.626220703125, -6.3333740234375, -6.04052734375, -5.7476806640625, -5.454833984375, -5.1619873046875, -4.869140625, -4.5762939453125, -4.283447265625, -3.9906005859375, -3.69775390625, -3.4049072265625, -3.112060546875, -2.8192138671875, -2.5263671875, -2.2335205078125, -1.940673828125, -1.6478271484375, -1.35498046875, -1.0621337890625, -0.769287109375, -0.4764404296875, -0.18359375, 0.1092529296875, 0.402099609375, 0.6949462890625, 0.98779296875, 1.2806396484375, 1.573486328125, 1.8663330078125, 2.1591796875, 2.4520263671875, 2.744873046875, 3.0377197265625, 3.33056640625, 3.6234130859375, 3.916259765625, 4.2091064453125, 4.501953125, 4.7947998046875, 5.087646484375, 5.3804931640625, 5.67333984375, 5.9661865234375, 6.259033203125, 6.5518798828125, 6.8447265625, 7.1375732421875, 7.430419921875, 7.7232666015625, 8.01611328125, 8.3089599609375, 8.601806640625, 8.8946533203125, 9.1875]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 4.0, 6.0, 11.0, 16.0, 21.0, 29.0, 49.0, 78.0, 103.0, 142.0, 225.0, 307.0, 438.0, 649.0, 950.0, 1345.0, 1932.0, 2838.0, 4080.0, 5794.0, 8621.0, 12511.0, 18160.0, 25893.0, 38557.0, 56351.0, 82567.0, 120799.0, 169766.0, 156241.0, 106870.0, 73473.0, 50231.0, 34358.0, 23206.0, 16131.0, 11183.0, 7595.0, 5325.0, 3651.0, 2506.0, 1713.0, 1211.0, 791.0, 611.0, 396.0, 239.0, 208.0, 137.0, 84.0, 63.0, 31.0, 28.0, 17.0, 8.0, 10.0, 4.0, 5.0, 0.0, 2.0], "bins": [-0.94775390625, -0.9188995361328125, -0.890045166015625, -0.8611907958984375, -0.83233642578125, -0.8034820556640625, -0.774627685546875, -0.7457733154296875, -0.7169189453125, -0.6880645751953125, -0.659210205078125, -0.6303558349609375, -0.60150146484375, -0.5726470947265625, -0.543792724609375, -0.5149383544921875, -0.486083984375, -0.4572296142578125, -0.428375244140625, -0.3995208740234375, -0.37066650390625, -0.3418121337890625, -0.312957763671875, -0.2841033935546875, -0.2552490234375, -0.2263946533203125, -0.197540283203125, -0.1686859130859375, -0.13983154296875, -0.1109771728515625, -0.082122802734375, -0.0532684326171875, -0.0244140625, 0.0044403076171875, 0.033294677734375, 0.0621490478515625, 0.09100341796875, 0.1198577880859375, 0.148712158203125, 0.1775665283203125, 0.2064208984375, 0.2352752685546875, 0.264129638671875, 0.2929840087890625, 0.32183837890625, 0.3506927490234375, 0.379547119140625, 0.4084014892578125, 0.437255859375, 0.4661102294921875, 0.494964599609375, 0.5238189697265625, 0.55267333984375, 0.5815277099609375, 0.610382080078125, 0.6392364501953125, 0.6680908203125, 0.6969451904296875, 0.725799560546875, 0.7546539306640625, 0.78350830078125, 0.8123626708984375, 0.841217041015625, 0.8700714111328125, 0.89892578125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 8.0, 9.0, 17.0, 8.0, 14.0, 16.0, 17.0, 20.0, 29.0, 27.0, 36.0, 32.0, 37.0, 41.0, 38.0, 48.0, 30.0, 53.0, 1076.0, 42.0, 46.0, 44.0, 36.0, 38.0, 33.0, 22.0, 33.0, 22.0, 22.0, 24.0, 17.0, 10.0, 15.0, 20.0, 8.0, 5.0, 8.0, 10.0, 3.0, 4.0, 3.0, 7.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-6.9296875, -6.72760009765625, -6.5255126953125, -6.32342529296875, -6.121337890625, -5.91925048828125, -5.7171630859375, -5.51507568359375, -5.31298828125, -5.11090087890625, -4.9088134765625, -4.70672607421875, -4.504638671875, -4.30255126953125, -4.1004638671875, -3.89837646484375, -3.6962890625, -3.49420166015625, -3.2921142578125, -3.09002685546875, -2.887939453125, -2.68585205078125, -2.4837646484375, -2.28167724609375, -2.07958984375, -1.87750244140625, -1.6754150390625, -1.47332763671875, -1.271240234375, -1.06915283203125, -0.8670654296875, -0.66497802734375, -0.462890625, -0.26080322265625, -0.0587158203125, 0.14337158203125, 0.345458984375, 0.54754638671875, 0.7496337890625, 0.95172119140625, 1.15380859375, 1.35589599609375, 1.5579833984375, 1.76007080078125, 1.962158203125, 2.16424560546875, 2.3663330078125, 2.56842041015625, 2.7705078125, 2.97259521484375, 3.1746826171875, 3.37677001953125, 3.578857421875, 3.78094482421875, 3.9830322265625, 4.18511962890625, 4.38720703125, 4.58929443359375, 4.7913818359375, 4.99346923828125, 5.195556640625, 5.39764404296875, 5.5997314453125, 5.80181884765625, 6.00390625]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 4.0, 4.0, 4.0, 9.0, 4.0, 12.0, 26.0, 45.0, 52.0, 102.0, 136.0, 271.0, 341.0, 541.0, 793.0, 1268.0, 2006.0, 3188.0, 5018.0, 7824.0, 12401.0, 20142.0, 31996.0, 51218.0, 83057.0, 140001.0, 1266897.0, 182766.0, 110152.0, 66142.0, 41294.0, 25725.0, 16086.0, 10133.0, 6321.0, 4072.0, 2557.0, 1616.0, 1029.0, 682.0, 443.0, 277.0, 175.0, 100.0, 73.0, 47.0, 36.0, 17.0, 19.0, 11.0, 7.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0419921875, -1.006988525390625, -0.97198486328125, -0.936981201171875, -0.9019775390625, -0.866973876953125, -0.83197021484375, -0.796966552734375, -0.761962890625, -0.726959228515625, -0.69195556640625, -0.656951904296875, -0.6219482421875, -0.586944580078125, -0.55194091796875, -0.516937255859375, -0.48193359375, -0.446929931640625, -0.41192626953125, -0.376922607421875, -0.3419189453125, -0.306915283203125, -0.27191162109375, -0.236907958984375, -0.201904296875, -0.166900634765625, -0.13189697265625, -0.096893310546875, -0.0618896484375, -0.026885986328125, 0.00811767578125, 0.043121337890625, 0.078125, 0.113128662109375, 0.14813232421875, 0.183135986328125, 0.2181396484375, 0.253143310546875, 0.28814697265625, 0.323150634765625, 0.358154296875, 0.393157958984375, 0.42816162109375, 0.463165283203125, 0.4981689453125, 0.533172607421875, 0.56817626953125, 0.603179931640625, 0.63818359375, 0.673187255859375, 0.70819091796875, 0.743194580078125, 0.7781982421875, 0.813201904296875, 0.84820556640625, 0.883209228515625, 0.918212890625, 0.953216552734375, 0.98822021484375, 1.023223876953125, 1.0582275390625, 1.093231201171875, 1.12823486328125, 1.163238525390625, 1.1982421875]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 4.0, 0.0, 7.0, 7.0, 5.0, 15.0, 8.0, 10.0, 13.0, 15.0, 22.0, 28.0, 35.0, 45.0, 43.0, 53.0, 70.0, 70.0, 75.0, 75.0, 53.0, 57.0, 59.0, 45.0, 36.0, 34.0, 26.0, 29.0, 10.0, 15.0, 12.0, 10.0, 4.0, 1.0, 1.0, 0.0, 5.0, 3.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.002468109130859375, -0.002392321825027466, -0.0023165345191955566, -0.0022407472133636475, -0.0021649599075317383, -0.002089172601699829, -0.00201338529586792, -0.0019375979900360107, -0.0018618106842041016, -0.0017860233783721924, -0.0017102360725402832, -0.001634448766708374, -0.0015586614608764648, -0.0014828741550445557, -0.0014070868492126465, -0.0013312995433807373, -0.0012555122375488281, -0.001179724931716919, -0.0011039376258850098, -0.0010281503200531006, -0.0009523630142211914, -0.0008765757083892822, -0.000800788402557373, -0.0007250010967254639, -0.0006492137908935547, -0.0005734264850616455, -0.0004976391792297363, -0.00042185187339782715, -0.00034606456756591797, -0.0002702772617340088, -0.0001944899559020996, -0.00011870265007019043, -4.291534423828125e-05, 3.287196159362793e-05, 0.00010865926742553711, 0.0001844465732574463, 0.00026023387908935547, 0.00033602118492126465, 0.00041180849075317383, 0.000487595796585083, 0.0005633831024169922, 0.0006391704082489014, 0.0007149577140808105, 0.0007907450199127197, 0.0008665323257446289, 0.0009423196315765381, 0.0010181069374084473, 0.0010938942432403564, 0.0011696815490722656, 0.0012454688549041748, 0.001321256160736084, 0.0013970434665679932, 0.0014728307723999023, 0.0015486180782318115, 0.0016244053840637207, 0.0017001926898956299, 0.001775979995727539, 0.0018517673015594482, 0.0019275546073913574, 0.0020033419132232666, 0.0020791292190551758, 0.002154916524887085, 0.002230703830718994, 0.0023064911365509033, 0.0023822784423828125]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 4.0, 2.0, 3.0, 6.0, 2.0, 8.0, 21.0, 22.0, 29.0, 30.0, 39.0, 47.0, 78.0, 101.0, 205.0, 334.0, 890.0, 521007.0, 523917.0, 916.0, 324.0, 194.0, 106.0, 80.0, 44.0, 36.0, 28.0, 15.0, 13.0, 8.0, 10.0, 7.0, 9.0, 4.0, 4.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0584716796875, -0.056644439697265625, -0.05481719970703125, -0.052989959716796875, -0.0511627197265625, -0.049335479736328125, -0.04750823974609375, -0.045680999755859375, -0.043853759765625, -0.042026519775390625, -0.04019927978515625, -0.038372039794921875, -0.0365447998046875, -0.034717559814453125, -0.03289031982421875, -0.031063079833984375, -0.02923583984375, -0.027408599853515625, -0.02558135986328125, -0.023754119873046875, -0.0219268798828125, -0.020099639892578125, -0.01827239990234375, -0.016445159912109375, -0.014617919921875, -0.012790679931640625, -0.01096343994140625, -0.009136199951171875, -0.0073089599609375, -0.005481719970703125, -0.00365447998046875, -0.001827239990234375, 0.0, 0.001827239990234375, 0.00365447998046875, 0.005481719970703125, 0.0073089599609375, 0.009136199951171875, 0.01096343994140625, 0.012790679931640625, 0.014617919921875, 0.016445159912109375, 0.01827239990234375, 0.020099639892578125, 0.0219268798828125, 0.023754119873046875, 0.02558135986328125, 0.027408599853515625, 0.02923583984375, 0.031063079833984375, 0.03289031982421875, 0.034717559814453125, 0.0365447998046875, 0.038372039794921875, 0.04019927978515625, 0.042026519775390625, 0.043853759765625, 0.045680999755859375, 0.04750823974609375, 0.049335479736328125, 0.0511627197265625, 0.052989959716796875, 0.05481719970703125, 0.056644439697265625, 0.0584716796875]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 28.0, 279.0, 607.0, 93.0, 6.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0027603129856288433, -0.0025665361899882555, -0.0023727596271783113, -0.0021789828315377235, -0.0019852062687277794, -0.0017914294730871916, -0.0015976526774466038, -0.0014038759982213378, -0.0012100993189960718, -0.0010163226397708058, -0.0008225459023378789, -0.000628769164904952, -0.00043499248567968607, -0.0002412158064544201, -4.743901081383228e-05, 0.0001463376684114337, 0.0003401143476366997, 0.0005338910268619657, 0.0007276677642948925, 0.0009214445017278194, 0.0011152211809530854, 0.0013089978601783514, 0.0015027746558189392, 0.0016965513350442052, 0.0018903280142694712, 0.002084104809910059, 0.002277881372720003, 0.002471658168360591, 0.0026654349640011787, 0.002859211526811123, 0.0030529883224517107, 0.0032467651180922985, 0.0034405412152409554, 0.003634318010881543, 0.0038280945736914873, 0.004021871369332075, 0.004215647932142019, 0.004409424960613251, 0.004603201523423195, 0.004796978086233139, 0.004990754649043083, 0.005184531211853027, 0.005378308240324259, 0.005572084803134203, 0.005765861365944147, 0.005959638394415379, 0.006153414957225323, 0.006347191520035267, 0.006540968548506498, 0.0067347451113164425, 0.006928522139787674, 0.007122298702597618, 0.007316075265407562, 0.007509851828217506, 0.007703628856688738, 0.00789740588515997, 0.008091182447969913, 0.008284959010779858, 0.008478735573589802, 0.00867251306772232, 0.008866289630532265, 0.009060066193342209, 0.009253842756152153, 0.009447619318962097, 0.009641395881772041]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 7.0, 4.0, 7.0, 3.0, 9.0, 16.0, 7.0, 10.0, 8.0, 9.0, 20.0, 25.0, 14.0, 23.0, 33.0, 36.0, 23.0, 34.0, 35.0, 41.0, 37.0, 37.0, 45.0, 41.0, 44.0, 37.0, 43.0, 43.0, 31.0, 26.0, 32.0, 33.0, 20.0, 26.0, 18.0, 26.0, 15.0, 12.0, 18.0, 14.0, 6.0, 9.0, 4.0, 9.0, 4.0, 5.0, 5.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.001100778579711914, -0.001066124066710472, -0.0010314695537090302, -0.0009968150407075882, -0.0009621605277061462, -0.0009275060147047043, -0.0008928515017032623, -0.0008581969887018204, -0.0008235424757003784, -0.0007888879626989365, -0.0007542334496974945, -0.0007195789366960526, -0.0006849244236946106, -0.0006502699106931686, -0.0006156153976917267, -0.0005809608846902847, -0.0005463063716888428, -0.0005116518586874008, -0.00047699734568595886, -0.0004423428326845169, -0.00040768831968307495, -0.000373033806681633, -0.00033837929368019104, -0.0003037247806787491, -0.00026907026767730713, -0.00023441575467586517, -0.00019976124167442322, -0.00016510672867298126, -0.0001304522156715393, -9.579770267009735e-05, -6.11431896686554e-05, -2.648867666721344e-05, 8.165836334228516e-06, 4.282034933567047e-05, 7.747486233711243e-05, 0.00011212937533855438, 0.00014678388833999634, 0.0001814384013414383, 0.00021609291434288025, 0.0002507474273443222, 0.00028540194034576416, 0.0003200564533472061, 0.00035471096634864807, 0.00038936547935009, 0.000424019992351532, 0.00045867450535297394, 0.0004933290183544159, 0.0005279835313558578, 0.0005626380443572998, 0.0005972925573587418, 0.0006319470703601837, 0.0006666015833616257, 0.0007012560963630676, 0.0007359106093645096, 0.0007705651223659515, 0.0008052196353673935, 0.0008398741483688354, 0.0008745286613702774, 0.0009091831743717194, 0.0009438376873731613, 0.0009784922003746033, 0.0010131467133760452, 0.0010478012263774872, 0.0010824557393789291, 0.001117110252380371]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 5.0, 3.0, 4.0, 6.0, 7.0, 8.0, 12.0, 11.0, 17.0, 22.0, 12.0, 12.0, 22.0, 27.0, 37.0, 32.0, 32.0, 48.0, 24.0, 45.0, 44.0, 47.0, 37.0, 46.0, 31.0, 41.0, 40.0, 31.0, 41.0, 35.0, 28.0, 34.0, 20.0, 14.0, 20.0, 33.0, 16.0, 14.0, 9.0, 6.0, 6.0, 7.0, 6.0, 6.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5546875, -9.26171875, -8.96875, -8.67578125, -8.3828125, -8.08984375, -7.796875, -7.50390625, -7.2109375, -6.91796875, -6.625, -6.33203125, -6.0390625, -5.74609375, -5.453125, -5.16015625, -4.8671875, -4.57421875, -4.28125, -3.98828125, -3.6953125, -3.40234375, -3.109375, -2.81640625, -2.5234375, -2.23046875, -1.9375, -1.64453125, -1.3515625, -1.05859375, -0.765625, -0.47265625, -0.1796875, 0.11328125, 0.40625, 0.69921875, 0.9921875, 1.28515625, 1.578125, 1.87109375, 2.1640625, 2.45703125, 2.75, 3.04296875, 3.3359375, 3.62890625, 3.921875, 4.21484375, 4.5078125, 4.80078125, 5.09375, 5.38671875, 5.6796875, 5.97265625, 6.265625, 6.55859375, 6.8515625, 7.14453125, 7.4375, 7.73046875, 8.0234375, 8.31640625, 8.609375, 8.90234375, 9.1953125]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 1.0, 3.0, 10.0, 12.0, 16.0, 21.0, 35.0, 45.0, 72.0, 98.0, 154.0, 217.0, 293.0, 433.0, 693.0, 1020.0, 1576.0, 2622.0, 4616.0, 8802.0, 18525.0, 46576.0, 143571.0, 398867.0, 275266.0, 85650.0, 30200.0, 13036.0, 6580.0, 3584.0, 2053.0, 1296.0, 810.0, 569.0, 388.0, 242.0, 215.0, 112.0, 92.0, 65.0, 42.0, 25.0, 17.0, 19.0, 10.0, 3.0, 1.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.24609375, -6.0555419921875, -5.864990234375, -5.6744384765625, -5.48388671875, -5.2933349609375, -5.102783203125, -4.9122314453125, -4.7216796875, -4.5311279296875, -4.340576171875, -4.1500244140625, -3.95947265625, -3.7689208984375, -3.578369140625, -3.3878173828125, -3.197265625, -3.0067138671875, -2.816162109375, -2.6256103515625, -2.43505859375, -2.2445068359375, -2.053955078125, -1.8634033203125, -1.6728515625, -1.4822998046875, -1.291748046875, -1.1011962890625, -0.91064453125, -0.7200927734375, -0.529541015625, -0.3389892578125, -0.1484375, 0.0421142578125, 0.232666015625, 0.4232177734375, 0.61376953125, 0.8043212890625, 0.994873046875, 1.1854248046875, 1.3759765625, 1.5665283203125, 1.757080078125, 1.9476318359375, 2.13818359375, 2.3287353515625, 2.519287109375, 2.7098388671875, 2.900390625, 3.0909423828125, 3.281494140625, 3.4720458984375, 3.66259765625, 3.8531494140625, 4.043701171875, 4.2342529296875, 4.4248046875, 4.6153564453125, 4.805908203125, 4.9964599609375, 5.18701171875, 5.3775634765625, 5.568115234375, 5.7586669921875, 5.94921875]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 7.0, 11.0, 9.0, 14.0, 12.0, 12.0, 22.0, 17.0, 23.0, 24.0, 31.0, 24.0, 29.0, 33.0, 37.0, 33.0, 51.0, 57.0, 115.0, 1701.0, 281.0, 85.0, 55.0, 47.0, 49.0, 37.0, 31.0, 27.0, 30.0, 17.0, 18.0, 18.0, 14.0, 16.0, 7.0, 9.0, 7.0, 6.0, 10.0, 8.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-31.75, -30.68359375, -29.6171875, -28.55078125, -27.484375, -26.41796875, -25.3515625, -24.28515625, -23.21875, -22.15234375, -21.0859375, -20.01953125, -18.953125, -17.88671875, -16.8203125, -15.75390625, -14.6875, -13.62109375, -12.5546875, -11.48828125, -10.421875, -9.35546875, -8.2890625, -7.22265625, -6.15625, -5.08984375, -4.0234375, -2.95703125, -1.890625, -0.82421875, 0.2421875, 1.30859375, 2.375, 3.44140625, 4.5078125, 5.57421875, 6.640625, 7.70703125, 8.7734375, 9.83984375, 10.90625, 11.97265625, 13.0390625, 14.10546875, 15.171875, 16.23828125, 17.3046875, 18.37109375, 19.4375, 20.50390625, 21.5703125, 22.63671875, 23.703125, 24.76953125, 25.8359375, 26.90234375, 27.96875, 29.03515625, 30.1015625, 31.16796875, 32.234375, 33.30078125, 34.3671875, 35.43359375, 36.5]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 4.0, 2.0, 4.0, 5.0, 9.0, 10.0, 7.0, 7.0, 6.0, 9.0, 15.0, 16.0, 23.0, 20.0, 23.0, 47.0, 50.0, 87.0, 101.0, 153.0, 242.0, 576.0, 2035.0, 54223.0, 3071162.0, 14386.0, 1233.0, 427.0, 240.0, 147.0, 101.0, 69.0, 56.0, 40.0, 34.0, 22.0, 21.0, 19.0, 17.0, 15.0, 10.0, 12.0, 9.0, 6.0, 6.0, 4.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-58.78125, -57.01904296875, -55.2568359375, -53.49462890625, -51.732421875, -49.97021484375, -48.2080078125, -46.44580078125, -44.68359375, -42.92138671875, -41.1591796875, -39.39697265625, -37.634765625, -35.87255859375, -34.1103515625, -32.34814453125, -30.5859375, -28.82373046875, -27.0615234375, -25.29931640625, -23.537109375, -21.77490234375, -20.0126953125, -18.25048828125, -16.48828125, -14.72607421875, -12.9638671875, -11.20166015625, -9.439453125, -7.67724609375, -5.9150390625, -4.15283203125, -2.390625, -0.62841796875, 1.1337890625, 2.89599609375, 4.658203125, 6.42041015625, 8.1826171875, 9.94482421875, 11.70703125, 13.46923828125, 15.2314453125, 16.99365234375, 18.755859375, 20.51806640625, 22.2802734375, 24.04248046875, 25.8046875, 27.56689453125, 29.3291015625, 31.09130859375, 32.853515625, 34.61572265625, 36.3779296875, 38.14013671875, 39.90234375, 41.66455078125, 43.4267578125, 45.18896484375, 46.951171875, 48.71337890625, 50.4755859375, 52.23779296875, 54.0]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 10.0, 64.0, 306.0, 440.0, 162.0, 30.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-40.47980499267578, -37.180206298828125, -33.8806037902832, -30.581003189086914, -27.281402587890625, -23.981801986694336, -20.682201385498047, -17.382600784301758, -14.083000183105469, -10.78339958190918, -7.483798980712891, -4.184198379516602, -0.8845977783203125, 2.4150028228759766, 5.714603424072266, 9.014204025268555, 12.313804626464844, 15.613405227661133, 18.913005828857422, 22.21260643005371, 25.51220703125, 28.81180763244629, 32.11140823364258, 35.4110107421875, 38.710609436035156, 42.01020812988281, 45.309810638427734, 48.609413146972656, 51.90901184082031, 55.20861053466797, 58.50821304321289, 61.80781555175781, 65.10740661621094, 68.4070053100586, 71.70660400390625, 75.00621032714844, 78.3058090209961, 81.60540771484375, 84.90501403808594, 88.2046127319336, 91.50421142578125, 94.8038101196289, 98.10340881347656, 101.40301513671875, 104.7026138305664, 108.00221252441406, 111.30181884765625, 114.6014175415039, 117.90101623535156, 121.20061492919922, 124.50021362304688, 127.79981994628906, 131.09942626953125, 134.39901733398438, 137.69862365722656, 140.9982147216797, 144.29782104492188, 147.59742736816406, 150.8970184326172, 154.19662475585938, 157.4962158203125, 160.7958221435547, 164.09542846679688, 167.39501953125, 170.6946258544922]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 6.0, 5.0, 8.0, 8.0, 20.0, 20.0, 20.0, 17.0, 16.0, 41.0, 32.0, 30.0, 41.0, 47.0, 31.0, 49.0, 49.0, 54.0, 51.0, 51.0, 51.0, 44.0, 41.0, 38.0, 35.0, 25.0, 29.0, 23.0, 26.0, 18.0, 13.0, 8.0, 12.0, 9.0, 10.0, 6.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-140.05441284179688, -136.0807647705078, -132.1071014404297, -128.13345336914062, -124.15980529785156, -120.18614959716797, -116.21249389648438, -112.23884582519531, -108.26519012451172, -104.29153442382812, -100.31788635253906, -96.34423065185547, -92.3705825805664, -88.39692687988281, -84.42327880859375, -80.44962310791016, -76.47596740722656, -72.50231170654297, -68.5286636352539, -64.55500793457031, -60.581356048583984, -56.607704162597656, -52.63405227661133, -48.660400390625, -44.68675231933594, -40.71310043334961, -36.73944854736328, -32.76579284667969, -28.79214096069336, -24.81848907470703, -20.844837188720703, -16.871183395385742, -12.897529602050781, -8.923876762390137, -4.95022439956665, -0.9765720367431641, 2.9970808029174805, 6.970733642578125, 10.944385528564453, 14.918039321899414, 18.891691207885742, 22.86534309387207, 26.83899688720703, 30.81264877319336, 34.78630065917969, 38.75995635986328, 42.733604431152344, 46.70726013183594, 50.680912017822266, 54.654563903808594, 58.62821578979492, 62.60186767578125, 66.57552337646484, 70.54917907714844, 74.5228271484375, 78.4964828491211, 82.47013092041016, 86.44378662109375, 90.41743469238281, 94.3910903930664, 98.36473846435547, 102.33839416503906, 106.31204223632812, 110.28569793701172, 114.25935363769531]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 1.0, 4.0, 6.0, 2.0, 3.0, 5.0, 9.0, 11.0, 11.0, 9.0, 24.0, 14.0, 22.0, 16.0, 20.0, 31.0, 35.0, 39.0, 26.0, 43.0, 35.0, 45.0, 46.0, 47.0, 41.0, 41.0, 33.0, 34.0, 36.0, 35.0, 44.0, 33.0, 27.0, 25.0, 21.0, 15.0, 27.0, 26.0, 10.0, 12.0, 6.0, 5.0, 10.0, 6.0, 5.0, 6.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.9375, -9.6373291015625, -9.337158203125, -9.0369873046875, -8.73681640625, -8.4366455078125, -8.136474609375, -7.8363037109375, -7.5361328125, -7.2359619140625, -6.935791015625, -6.6356201171875, -6.33544921875, -6.0352783203125, -5.735107421875, -5.4349365234375, -5.134765625, -4.8345947265625, -4.534423828125, -4.2342529296875, -3.93408203125, -3.6339111328125, -3.333740234375, -3.0335693359375, -2.7333984375, -2.4332275390625, -2.133056640625, -1.8328857421875, -1.53271484375, -1.2325439453125, -0.932373046875, -0.6322021484375, -0.33203125, -0.0318603515625, 0.268310546875, 0.5684814453125, 0.86865234375, 1.1688232421875, 1.468994140625, 1.7691650390625, 2.0693359375, 2.3695068359375, 2.669677734375, 2.9698486328125, 3.27001953125, 3.5701904296875, 3.870361328125, 4.1705322265625, 4.470703125, 4.7708740234375, 5.071044921875, 5.3712158203125, 5.67138671875, 5.9715576171875, 6.271728515625, 6.5718994140625, 6.8720703125, 7.1722412109375, 7.472412109375, 7.7725830078125, 8.07275390625, 8.3729248046875, 8.673095703125, 8.9732666015625, 9.2734375]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 1.0, 1.0, 8.0, 6.0, 9.0, 12.0, 8.0, 17.0, 19.0, 15.0, 20.0, 43.0, 58.0, 70.0, 122.0, 188.0, 417.0, 1097.0, 4249.0, 24331.0, 293518.0, 2963479.0, 844751.0, 51468.0, 7459.0, 1680.0, 522.0, 227.0, 135.0, 111.0, 49.0, 37.0, 28.0, 25.0, 21.0, 13.0, 14.0, 13.0, 11.0, 7.0, 6.0, 8.0, 3.0, 0.0, 1.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-31.796875, -30.8525390625, -29.908203125, -28.9638671875, -28.01953125, -27.0751953125, -26.130859375, -25.1865234375, -24.2421875, -23.2978515625, -22.353515625, -21.4091796875, -20.46484375, -19.5205078125, -18.576171875, -17.6318359375, -16.6875, -15.7431640625, -14.798828125, -13.8544921875, -12.91015625, -11.9658203125, -11.021484375, -10.0771484375, -9.1328125, -8.1884765625, -7.244140625, -6.2998046875, -5.35546875, -4.4111328125, -3.466796875, -2.5224609375, -1.578125, -0.6337890625, 0.310546875, 1.2548828125, 2.19921875, 3.1435546875, 4.087890625, 5.0322265625, 5.9765625, 6.9208984375, 7.865234375, 8.8095703125, 9.75390625, 10.6982421875, 11.642578125, 12.5869140625, 13.53125, 14.4755859375, 15.419921875, 16.3642578125, 17.30859375, 18.2529296875, 19.197265625, 20.1416015625, 21.0859375, 22.0302734375, 22.974609375, 23.9189453125, 24.86328125, 25.8076171875, 26.751953125, 27.6962890625, 28.640625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 5.0, 7.0, 10.0, 11.0, 14.0, 26.0, 30.0, 48.0, 61.0, 81.0, 118.0, 153.0, 273.0, 348.0, 424.0, 548.0, 469.0, 400.0, 295.0, 193.0, 164.0, 117.0, 85.0, 60.0, 39.0, 27.0, 31.0, 12.0, 9.0, 9.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.0625, -31.112548828125, -30.16259765625, -29.212646484375, -28.2626953125, -27.312744140625, -26.36279296875, -25.412841796875, -24.462890625, -23.512939453125, -22.56298828125, -21.613037109375, -20.6630859375, -19.713134765625, -18.76318359375, -17.813232421875, -16.86328125, -15.913330078125, -14.96337890625, -14.013427734375, -13.0634765625, -12.113525390625, -11.16357421875, -10.213623046875, -9.263671875, -8.313720703125, -7.36376953125, -6.413818359375, -5.4638671875, -4.513916015625, -3.56396484375, -2.614013671875, -1.6640625, -0.714111328125, 0.23583984375, 1.185791015625, 2.1357421875, 3.085693359375, 4.03564453125, 4.985595703125, 5.935546875, 6.885498046875, 7.83544921875, 8.785400390625, 9.7353515625, 10.685302734375, 11.63525390625, 12.585205078125, 13.53515625, 14.485107421875, 15.43505859375, 16.385009765625, 17.3349609375, 18.284912109375, 19.23486328125, 20.184814453125, 21.134765625, 22.084716796875, 23.03466796875, 23.984619140625, 24.9345703125, 25.884521484375, 26.83447265625, 27.784423828125, 28.734375]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 4.0, 3.0, 3.0, 13.0, 10.0, 26.0, 30.0, 51.0, 59.0, 89.0, 139.0, 232.0, 441.0, 995.0, 7063.0, 1483539.0, 2689938.0, 9347.0, 1150.0, 467.0, 237.0, 150.0, 92.0, 67.0, 41.0, 30.0, 15.0, 12.0, 15.0, 8.0, 11.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.6875, -91.0517578125, -87.416015625, -83.7802734375, -80.14453125, -76.5087890625, -72.873046875, -69.2373046875, -65.6015625, -61.9658203125, -58.330078125, -54.6943359375, -51.05859375, -47.4228515625, -43.787109375, -40.1513671875, -36.515625, -32.8798828125, -29.244140625, -25.6083984375, -21.97265625, -18.3369140625, -14.701171875, -11.0654296875, -7.4296875, -3.7939453125, -0.158203125, 3.4775390625, 7.11328125, 10.7490234375, 14.384765625, 18.0205078125, 21.65625, 25.2919921875, 28.927734375, 32.5634765625, 36.19921875, 39.8349609375, 43.470703125, 47.1064453125, 50.7421875, 54.3779296875, 58.013671875, 61.6494140625, 65.28515625, 68.9208984375, 72.556640625, 76.1923828125, 79.828125, 83.4638671875, 87.099609375, 90.7353515625, 94.37109375, 98.0068359375, 101.642578125, 105.2783203125, 108.9140625, 112.5498046875, 116.185546875, 119.8212890625, 123.45703125, 127.0927734375, 130.728515625, 134.3642578125, 138.0]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 1.0, 10.0, 15.0, 42.0, 80.0, 135.0, 200.0, 204.0, 155.0, 73.0, 57.0, 20.0, 11.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-112.28968811035156, -106.3785629272461, -100.46743774414062, -94.55632019042969, -88.64519500732422, -82.73406982421875, -76.82295227050781, -70.91182708740234, -65.00070190429688, -59.089576721191406, -53.1784553527832, -47.267333984375, -41.35620880126953, -35.44508361816406, -29.53396224975586, -23.622840881347656, -17.711715698242188, -11.800592422485352, -5.889469146728516, 0.021654129028320312, 5.932777404785156, 11.843900680541992, 17.755023956298828, 23.66614532470703, 29.5772705078125, 35.48839569091797, 41.39951705932617, 47.310638427734375, 53.221763610839844, 59.13288879394531, 65.04400634765625, 70.95513153076172, 76.86624145507812, 82.7773666381836, 88.68849182128906, 94.599609375, 100.51073455810547, 106.42185974121094, 112.33297729492188, 118.24410247802734, 124.15522766113281, 130.06634521484375, 135.97747802734375, 141.8885955810547, 147.79971313476562, 153.71084594726562, 159.62196350097656, 165.5330810546875, 171.4442138671875, 177.35533142089844, 183.26646423339844, 189.17758178710938, 195.08871459960938, 200.9998321533203, 206.91094970703125, 212.82208251953125, 218.7332000732422, 224.64431762695312, 230.55545043945312, 236.46656799316406, 242.377685546875, 248.288818359375, 254.19993591308594, 260.1110534667969, 266.0221862792969]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 3.0, 5.0, 6.0, 5.0, 9.0, 6.0, 23.0, 7.0, 19.0, 12.0, 20.0, 17.0, 34.0, 33.0, 36.0, 45.0, 36.0, 44.0, 33.0, 39.0, 48.0, 40.0, 43.0, 47.0, 30.0, 43.0, 30.0, 36.0, 49.0, 29.0, 28.0, 14.0, 30.0, 17.0, 12.0, 7.0, 8.0, 11.0, 13.0, 8.0, 6.0, 2.0, 5.0, 3.0, 2.0, 4.0, 0.0, 2.0, 3.0, 1.0, 1.0], "bins": [-98.94590759277344, -96.07748413085938, -93.20905303955078, -90.34062957763672, -87.47219848632812, -84.60377502441406, -81.7353515625, -78.86692810058594, -75.99849700927734, -73.13007354736328, -70.26164245605469, -67.39321899414062, -64.52479553222656, -61.65636444091797, -58.787940979003906, -55.91951370239258, -53.05108642578125, -50.18265914916992, -47.314231872558594, -44.44580841064453, -41.5773811340332, -38.708953857421875, -35.84053039550781, -32.972103118896484, -30.103675842285156, -27.235248565673828, -24.366823196411133, -21.498397827148438, -18.62997055053711, -15.761544227600098, -12.893117904663086, -10.02469253540039, -7.1562652587890625, -4.287838935852051, -1.419412612915039, 1.4490137100219727, 4.317440032958984, 7.185866355895996, 10.054292678833008, 12.922718048095703, 15.791145324707031, 18.65957260131836, 21.527997970581055, 24.39642333984375, 27.264850616455078, 30.133277893066406, 33.00170135498047, 35.8701286315918, 38.738555908203125, 41.60698318481445, 44.47541046142578, 47.343833923339844, 50.21226119995117, 53.0806884765625, 55.94911193847656, 58.81753921508789, 61.68596649169922, 64.55438995361328, 67.42282104492188, 70.29124450683594, 73.15966796875, 76.0280990600586, 78.89652252197266, 81.76495361328125, 84.63337707519531]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 5.0, 5.0, 5.0, 3.0, 7.0, 11.0, 13.0, 6.0, 4.0, 12.0, 14.0, 29.0, 18.0, 13.0, 45.0, 31.0, 28.0, 42.0, 40.0, 37.0, 31.0, 44.0, 54.0, 44.0, 47.0, 32.0, 37.0, 38.0, 36.0, 36.0, 37.0, 27.0, 20.0, 19.0, 23.0, 26.0, 16.0, 17.0, 10.0, 14.0, 5.0, 10.0, 5.0, 6.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-10.75, -10.4354248046875, -10.120849609375, -9.8062744140625, -9.49169921875, -9.1771240234375, -8.862548828125, -8.5479736328125, -8.2333984375, -7.9188232421875, -7.604248046875, -7.2896728515625, -6.97509765625, -6.6605224609375, -6.345947265625, -6.0313720703125, -5.716796875, -5.4022216796875, -5.087646484375, -4.7730712890625, -4.45849609375, -4.1439208984375, -3.829345703125, -3.5147705078125, -3.2001953125, -2.8856201171875, -2.571044921875, -2.2564697265625, -1.94189453125, -1.6273193359375, -1.312744140625, -0.9981689453125, -0.68359375, -0.3690185546875, -0.054443359375, 0.2601318359375, 0.57470703125, 0.8892822265625, 1.203857421875, 1.5184326171875, 1.8330078125, 2.1475830078125, 2.462158203125, 2.7767333984375, 3.09130859375, 3.4058837890625, 3.720458984375, 4.0350341796875, 4.349609375, 4.6641845703125, 4.978759765625, 5.2933349609375, 5.60791015625, 5.9224853515625, 6.237060546875, 6.5516357421875, 6.8662109375, 7.1807861328125, 7.495361328125, 7.8099365234375, 8.12451171875, 8.4390869140625, 8.753662109375, 9.0682373046875, 9.3828125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 16.0, 20.0, 23.0, 26.0, 56.0, 66.0, 106.0, 144.0, 220.0, 327.0, 513.0, 828.0, 1216.0, 1777.0, 2580.0, 3832.0, 5814.0, 8481.0, 12775.0, 18458.0, 27173.0, 39387.0, 57793.0, 85766.0, 127131.0, 180532.0, 154089.0, 102445.0, 69165.0, 47144.0, 32448.0, 22216.0, 15057.0, 9991.0, 6831.0, 4618.0, 3081.0, 2026.0, 1385.0, 920.0, 697.0, 438.0, 299.0, 233.0, 139.0, 109.0, 48.0, 41.0, 25.0, 20.0, 14.0, 7.0, 6.0, 6.0, 3.0, 0.0, 1.0], "bins": [-1.0400390625, -1.0082626342773438, -0.9764862060546875, -0.9447097778320312, -0.912933349609375, -0.8811569213867188, -0.8493804931640625, -0.8176040649414062, -0.78582763671875, -0.7540512084960938, -0.7222747802734375, -0.6904983520507812, -0.658721923828125, -0.6269454956054688, -0.5951690673828125, -0.5633926391601562, -0.5316162109375, -0.49983978271484375, -0.4680633544921875, -0.43628692626953125, -0.404510498046875, -0.37273406982421875, -0.3409576416015625, -0.30918121337890625, -0.27740478515625, -0.24562835693359375, -0.2138519287109375, -0.18207550048828125, -0.150299072265625, -0.11852264404296875, -0.0867462158203125, -0.05496978759765625, -0.023193359375, 0.00858306884765625, 0.0403594970703125, 0.07213592529296875, 0.103912353515625, 0.13568878173828125, 0.1674652099609375, 0.19924163818359375, 0.23101806640625, 0.26279449462890625, 0.2945709228515625, 0.32634735107421875, 0.358123779296875, 0.38990020751953125, 0.4216766357421875, 0.45345306396484375, 0.4852294921875, 0.5170059204101562, 0.5487823486328125, 0.5805587768554688, 0.612335205078125, 0.6441116333007812, 0.6758880615234375, 0.7076644897460938, 0.73944091796875, 0.7712173461914062, 0.8029937744140625, 0.8347702026367188, 0.866546630859375, 0.8983230590820312, 0.9300994873046875, 0.9618759155273438, 0.99365234375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 6.0, 5.0, 8.0, 14.0, 9.0, 10.0, 20.0, 28.0, 18.0, 36.0, 21.0, 29.0, 38.0, 39.0, 37.0, 55.0, 35.0, 64.0, 27.0, 1060.0, 44.0, 37.0, 51.0, 45.0, 37.0, 32.0, 37.0, 19.0, 22.0, 34.0, 20.0, 16.0, 17.0, 17.0, 9.0, 12.0, 9.0, 3.0, 1.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-6.94140625, -6.7247314453125, -6.508056640625, -6.2913818359375, -6.07470703125, -5.8580322265625, -5.641357421875, -5.4246826171875, -5.2080078125, -4.9913330078125, -4.774658203125, -4.5579833984375, -4.34130859375, -4.1246337890625, -3.907958984375, -3.6912841796875, -3.474609375, -3.2579345703125, -3.041259765625, -2.8245849609375, -2.60791015625, -2.3912353515625, -2.174560546875, -1.9578857421875, -1.7412109375, -1.5245361328125, -1.307861328125, -1.0911865234375, -0.87451171875, -0.6578369140625, -0.441162109375, -0.2244873046875, -0.0078125, 0.2088623046875, 0.425537109375, 0.6422119140625, 0.85888671875, 1.0755615234375, 1.292236328125, 1.5089111328125, 1.7255859375, 1.9422607421875, 2.158935546875, 2.3756103515625, 2.59228515625, 2.8089599609375, 3.025634765625, 3.2423095703125, 3.458984375, 3.6756591796875, 3.892333984375, 4.1090087890625, 4.32568359375, 4.5423583984375, 4.759033203125, 4.9757080078125, 5.1923828125, 5.4090576171875, 5.625732421875, 5.8424072265625, 6.05908203125, 6.2757568359375, 6.492431640625, 6.7091064453125, 6.92578125]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 8.0, 8.0, 19.0, 29.0, 34.0, 43.0, 60.0, 101.0, 161.0, 257.0, 349.0, 496.0, 781.0, 1287.0, 1908.0, 3037.0, 4620.0, 7308.0, 11706.0, 18598.0, 29776.0, 48119.0, 77305.0, 126279.0, 1078181.0, 357875.0, 124513.0, 77037.0, 47588.0, 29412.0, 18265.0, 11544.0, 7299.0, 4649.0, 2996.0, 1931.0, 1222.0, 774.0, 513.0, 379.0, 224.0, 150.0, 86.0, 63.0, 50.0, 37.0, 19.0, 16.0, 8.0, 5.0, 7.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.171875, -1.1363525390625, -1.100830078125, -1.0653076171875, -1.02978515625, -0.9942626953125, -0.958740234375, -0.9232177734375, -0.8876953125, -0.8521728515625, -0.816650390625, -0.7811279296875, -0.74560546875, -0.7100830078125, -0.674560546875, -0.6390380859375, -0.603515625, -0.5679931640625, -0.532470703125, -0.4969482421875, -0.46142578125, -0.4259033203125, -0.390380859375, -0.3548583984375, -0.3193359375, -0.2838134765625, -0.248291015625, -0.2127685546875, -0.17724609375, -0.1417236328125, -0.106201171875, -0.0706787109375, -0.03515625, 0.0003662109375, 0.035888671875, 0.0714111328125, 0.10693359375, 0.1424560546875, 0.177978515625, 0.2135009765625, 0.2490234375, 0.2845458984375, 0.320068359375, 0.3555908203125, 0.39111328125, 0.4266357421875, 0.462158203125, 0.4976806640625, 0.533203125, 0.5687255859375, 0.604248046875, 0.6397705078125, 0.67529296875, 0.7108154296875, 0.746337890625, 0.7818603515625, 0.8173828125, 0.8529052734375, 0.888427734375, 0.9239501953125, 0.95947265625, 0.9949951171875, 1.030517578125, 1.0660400390625, 1.1015625]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 4.0, 5.0, 3.0, 14.0, 11.0, 17.0, 18.0, 15.0, 27.0, 32.0, 47.0, 34.0, 58.0, 54.0, 54.0, 72.0, 75.0, 64.0, 72.0, 59.0, 48.0, 44.0, 34.0, 24.0, 26.0, 20.0, 18.0, 13.0, 9.0, 9.0, 7.0, 1.0, 6.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0025463104248046875, -0.0024705231189727783, -0.002394735813140869, -0.00231894850730896, -0.0022431612014770508, -0.0021673738956451416, -0.0020915865898132324, -0.0020157992839813232, -0.001940011978149414, -0.0018642246723175049, -0.0017884373664855957, -0.0017126500606536865, -0.0016368627548217773, -0.0015610754489898682, -0.001485288143157959, -0.0014095008373260498, -0.0013337135314941406, -0.0012579262256622314, -0.0011821389198303223, -0.001106351613998413, -0.001030564308166504, -0.0009547770023345947, -0.0008789896965026855, -0.0008032023906707764, -0.0007274150848388672, -0.000651627779006958, -0.0005758404731750488, -0.0005000531673431396, -0.00042426586151123047, -0.0003484785556793213, -0.0002726912498474121, -0.00019690394401550293, -0.00012111663818359375, -4.532933235168457e-05, 3.045797348022461e-05, 0.00010624527931213379, 0.00018203258514404297, 0.00025781989097595215, 0.00033360719680786133, 0.0004093945026397705, 0.0004851818084716797, 0.0005609691143035889, 0.000636756420135498, 0.0007125437259674072, 0.0007883310317993164, 0.0008641183376312256, 0.0009399056434631348, 0.001015692949295044, 0.0010914802551269531, 0.0011672675609588623, 0.0012430548667907715, 0.0013188421726226807, 0.0013946294784545898, 0.001470416784286499, 0.0015462040901184082, 0.0016219913959503174, 0.0016977787017822266, 0.0017735660076141357, 0.001849353313446045, 0.001925140619277954, 0.0020009279251098633, 0.0020767152309417725, 0.0021525025367736816, 0.002228289842605591, 0.0023040771484375]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 4.0, 9.0, 6.0, 10.0, 16.0, 14.0, 17.0, 19.0, 27.0, 26.0, 42.0, 53.0, 85.0, 113.0, 167.0, 282.0, 528.0, 1956.0, 941703.0, 101103.0, 1126.0, 442.0, 235.0, 138.0, 119.0, 67.0, 53.0, 51.0, 25.0, 20.0, 28.0, 19.0, 10.0, 12.0, 6.0, 5.0, 6.0, 6.0, 2.0, 2.0, 2.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.04296875, -0.04147911071777344, -0.039989471435546875, -0.03849983215332031, -0.03701019287109375, -0.03552055358886719, -0.034030914306640625, -0.03254127502441406, -0.0310516357421875, -0.029561996459960938, -0.028072357177734375, -0.026582717895507812, -0.02509307861328125, -0.023603439331054688, -0.022113800048828125, -0.020624160766601562, -0.019134521484375, -0.017644882202148438, -0.016155242919921875, -0.014665603637695312, -0.01317596435546875, -0.011686325073242188, -0.010196685791015625, -0.008707046508789062, -0.0072174072265625, -0.0057277679443359375, -0.004238128662109375, -0.0027484893798828125, -0.00125885009765625, 0.0002307891845703125, 0.001720428466796875, 0.0032100677490234375, 0.00469970703125, 0.0061893463134765625, 0.007678985595703125, 0.009168624877929688, 0.01065826416015625, 0.012147903442382812, 0.013637542724609375, 0.015127182006835938, 0.0166168212890625, 0.018106460571289062, 0.019596099853515625, 0.021085739135742188, 0.02257537841796875, 0.024065017700195312, 0.025554656982421875, 0.027044296264648438, 0.028533935546875, 0.030023574829101562, 0.031513214111328125, 0.03300285339355469, 0.03449249267578125, 0.03598213195800781, 0.037471771240234375, 0.03896141052246094, 0.0404510498046875, 0.04194068908691406, 0.043430328369140625, 0.04491996765136719, 0.04640960693359375, 0.04789924621582031, 0.049388885498046875, 0.05087852478027344, 0.0523681640625]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 7.0, 61.0, 385.0, 440.0, 107.0, 9.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006576722953468561, -0.0064084515906870365, -0.006240180693566799, -0.0060719093307852745, -0.00590363796800375, -0.005735366605222225, -0.0055670952424407005, -0.005398824345320463, -0.0052305529825389385, -0.005062281619757414, -0.0048940107226371765, -0.004725739359855652, -0.004557467997074127, -0.0043891966342926025, -0.004220925271511078, -0.0040526543743908405, -0.003884383011609316, -0.003716111648827791, -0.00354784051887691, -0.003379569388926029, -0.0032112980261445045, -0.00304302666336298, -0.002874755533412099, -0.002706484403461218, -0.0025382130406796932, -0.0023699416778981686, -0.0022016705479472876, -0.0020333994179964066, -0.001865128055214882, -0.001696856808848679, -0.0015285855624824762, -0.0013603143161162734, -0.0011920435354113579, -0.001023772289045155, -0.0008555010426789522, -0.0006872297963127494, -0.0005189585499465466, -0.0003506873035803437, -0.0001824160572141409, -1.414481084793806e-05, 0.00015412643551826477, 0.0003223976818844676, 0.0004906689282506704, 0.0006589401746168733, 0.0008272114209830761, 0.000995482667349279, 0.0011637539137154818, 0.0013320251600816846, 0.0015002964064478874, 0.0016685676528140903, 0.001836838899180293, 0.002005110029131174, 0.0021733813919126987, 0.0023416527546942234, 0.0025099238846451044, 0.0026781950145959854, 0.00284646637737751, 0.0030147377401590347, 0.0031830088701099157, 0.0033512800000607967, 0.0035195513628423214, 0.003687822725623846, 0.003856093855574727, 0.004024364985525608, 0.004192636348307133]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 5.0, 5.0, 4.0, 9.0, 8.0, 6.0, 14.0, 23.0, 14.0, 17.0, 23.0, 25.0, 26.0, 24.0, 39.0, 33.0, 47.0, 61.0, 43.0, 33.0, 41.0, 35.0, 41.0, 45.0, 48.0, 41.0, 29.0, 30.0, 41.0, 26.0, 31.0, 12.0, 27.0, 27.0, 17.0, 11.0, 10.0, 13.0, 8.0, 4.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010998845100402832, -0.0010632015764713287, -0.0010265186429023743, -0.0009898357093334198, -0.0009531527757644653, -0.0009164698421955109, -0.0008797869086265564, -0.0008431039750576019, -0.0008064210414886475, -0.000769738107919693, -0.0007330551743507385, -0.0006963722407817841, -0.0006596893072128296, -0.0006230063736438751, -0.0005863234400749207, -0.0005496405065059662, -0.0005129575729370117, -0.00047627463936805725, -0.0004395917057991028, -0.0004029087722301483, -0.00036622583866119385, -0.0003295429050922394, -0.0002928599715232849, -0.00025617703795433044, -0.00021949410438537598, -0.0001828111708164215, -0.00014612823724746704, -0.00010944530367851257, -7.27623701095581e-05, -3.607943654060364e-05, 6.034970283508301e-07, 3.72864305973053e-05, 7.396936416625977e-05, 0.00011065229773521423, 0.0001473352313041687, 0.00018401816487312317, 0.00022070109844207764, 0.0002573840320110321, 0.00029406696557998657, 0.00033074989914894104, 0.0003674328327178955, 0.00040411576628685, 0.00044079869985580444, 0.0004774816334247589, 0.0005141645669937134, 0.0005508475005626678, 0.0005875304341316223, 0.0006242133677005768, 0.0006608963012695312, 0.0006975792348384857, 0.0007342621684074402, 0.0007709451019763947, 0.0008076280355453491, 0.0008443109691143036, 0.0008809939026832581, 0.0009176768362522125, 0.000954359769821167, 0.0009910427033901215, 0.001027725636959076, 0.0010644085705280304, 0.0011010915040969849, 0.0011377744376659393, 0.0011744573712348938, 0.0012111403048038483, 0.0012478232383728027]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 5.0, 5.0, 5.0, 3.0, 7.0, 11.0, 13.0, 6.0, 4.0, 12.0, 14.0, 29.0, 18.0, 13.0, 45.0, 31.0, 28.0, 42.0, 40.0, 37.0, 31.0, 44.0, 54.0, 44.0, 47.0, 32.0, 37.0, 38.0, 36.0, 36.0, 37.0, 27.0, 20.0, 19.0, 23.0, 26.0, 16.0, 17.0, 10.0, 14.0, 5.0, 10.0, 5.0, 6.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-10.75, -10.4354248046875, -10.120849609375, -9.8062744140625, -9.49169921875, -9.1771240234375, -8.862548828125, -8.5479736328125, -8.2333984375, -7.9188232421875, -7.604248046875, -7.2896728515625, -6.97509765625, -6.6605224609375, -6.345947265625, -6.0313720703125, -5.716796875, -5.4022216796875, -5.087646484375, -4.7730712890625, -4.45849609375, -4.1439208984375, -3.829345703125, -3.5147705078125, -3.2001953125, -2.8856201171875, -2.571044921875, -2.2564697265625, -1.94189453125, -1.6273193359375, -1.312744140625, -0.9981689453125, -0.68359375, -0.3690185546875, -0.054443359375, 0.2601318359375, 0.57470703125, 0.8892822265625, 1.203857421875, 1.5184326171875, 1.8330078125, 2.1475830078125, 2.462158203125, 2.7767333984375, 3.09130859375, 3.4058837890625, 3.720458984375, 4.0350341796875, 4.349609375, 4.6641845703125, 4.978759765625, 5.2933349609375, 5.60791015625, 5.9224853515625, 6.237060546875, 6.5516357421875, 6.8662109375, 7.1807861328125, 7.495361328125, 7.8099365234375, 8.12451171875, 8.4390869140625, 8.753662109375, 9.0682373046875, 9.3828125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 9.0, 6.0, 5.0, 10.0, 9.0, 10.0, 25.0, 26.0, 34.0, 50.0, 73.0, 78.0, 132.0, 187.0, 267.0, 438.0, 688.0, 1082.0, 1798.0, 3057.0, 5130.0, 9492.0, 19378.0, 47111.0, 159332.0, 565161.0, 149337.0, 45013.0, 18648.0, 9139.0, 5043.0, 2890.0, 1773.0, 1071.0, 684.0, 419.0, 298.0, 186.0, 114.0, 110.0, 48.0, 61.0, 34.0, 35.0, 10.0, 18.0, 8.0, 8.0, 4.0, 6.0, 4.0, 5.0, 4.0, 2.0, 2.0], "bins": [-10.65625, -10.34716796875, -10.0380859375, -9.72900390625, -9.419921875, -9.11083984375, -8.8017578125, -8.49267578125, -8.18359375, -7.87451171875, -7.5654296875, -7.25634765625, -6.947265625, -6.63818359375, -6.3291015625, -6.02001953125, -5.7109375, -5.40185546875, -5.0927734375, -4.78369140625, -4.474609375, -4.16552734375, -3.8564453125, -3.54736328125, -3.23828125, -2.92919921875, -2.6201171875, -2.31103515625, -2.001953125, -1.69287109375, -1.3837890625, -1.07470703125, -0.765625, -0.45654296875, -0.1474609375, 0.16162109375, 0.470703125, 0.77978515625, 1.0888671875, 1.39794921875, 1.70703125, 2.01611328125, 2.3251953125, 2.63427734375, 2.943359375, 3.25244140625, 3.5615234375, 3.87060546875, 4.1796875, 4.48876953125, 4.7978515625, 5.10693359375, 5.416015625, 5.72509765625, 6.0341796875, 6.34326171875, 6.65234375, 6.96142578125, 7.2705078125, 7.57958984375, 7.888671875, 8.19775390625, 8.5068359375, 8.81591796875, 9.125]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 6.0, 3.0, 1.0, 9.0, 6.0, 6.0, 7.0, 8.0, 9.0, 5.0, 20.0, 23.0, 17.0, 20.0, 15.0, 28.0, 33.0, 31.0, 33.0, 33.0, 44.0, 48.0, 72.0, 113.0, 1751.0, 224.0, 74.0, 50.0, 44.0, 39.0, 36.0, 45.0, 25.0, 28.0, 18.0, 23.0, 26.0, 8.0, 15.0, 10.0, 12.0, 11.0, 8.0, 3.0, 4.0, 6.0, 6.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-37.78125, -36.66064453125, -35.5400390625, -34.41943359375, -33.298828125, -32.17822265625, -31.0576171875, -29.93701171875, -28.81640625, -27.69580078125, -26.5751953125, -25.45458984375, -24.333984375, -23.21337890625, -22.0927734375, -20.97216796875, -19.8515625, -18.73095703125, -17.6103515625, -16.48974609375, -15.369140625, -14.24853515625, -13.1279296875, -12.00732421875, -10.88671875, -9.76611328125, -8.6455078125, -7.52490234375, -6.404296875, -5.28369140625, -4.1630859375, -3.04248046875, -1.921875, -0.80126953125, 0.3193359375, 1.43994140625, 2.560546875, 3.68115234375, 4.8017578125, 5.92236328125, 7.04296875, 8.16357421875, 9.2841796875, 10.40478515625, 11.525390625, 12.64599609375, 13.7666015625, 14.88720703125, 16.0078125, 17.12841796875, 18.2490234375, 19.36962890625, 20.490234375, 21.61083984375, 22.7314453125, 23.85205078125, 24.97265625, 26.09326171875, 27.2138671875, 28.33447265625, 29.455078125, 30.57568359375, 31.6962890625, 32.81689453125, 33.9375]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 9.0, 14.0, 9.0, 17.0, 27.0, 23.0, 44.0, 60.0, 103.0, 145.0, 258.0, 708.0, 5052.0, 3046025.0, 90369.0, 1778.0, 444.0, 194.0, 144.0, 82.0, 62.0, 44.0, 32.0, 20.0, 8.0, 10.0, 11.0, 6.0, 3.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.3125, -113.9404296875, -110.568359375, -107.1962890625, -103.82421875, -100.4521484375, -97.080078125, -93.7080078125, -90.3359375, -86.9638671875, -83.591796875, -80.2197265625, -76.84765625, -73.4755859375, -70.103515625, -66.7314453125, -63.359375, -59.9873046875, -56.615234375, -53.2431640625, -49.87109375, -46.4990234375, -43.126953125, -39.7548828125, -36.3828125, -33.0107421875, -29.638671875, -26.2666015625, -22.89453125, -19.5224609375, -16.150390625, -12.7783203125, -9.40625, -6.0341796875, -2.662109375, 0.7099609375, 4.08203125, 7.4541015625, 10.826171875, 14.1982421875, 17.5703125, 20.9423828125, 24.314453125, 27.6865234375, 31.05859375, 34.4306640625, 37.802734375, 41.1748046875, 44.546875, 47.9189453125, 51.291015625, 54.6630859375, 58.03515625, 61.4072265625, 64.779296875, 68.1513671875, 71.5234375, 74.8955078125, 78.267578125, 81.6396484375, 85.01171875, 88.3837890625, 91.755859375, 95.1279296875, 98.5]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.0, 219.0, 634.0, 136.0, 9.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-326.3973693847656, -319.3601379394531, -312.3228759765625, -305.28564453125, -298.2483825683594, -291.2111511230469, -284.17388916015625, -277.13665771484375, -270.09942626953125, -263.06219482421875, -256.0249328613281, -248.98770141601562, -241.95045471191406, -234.9132080078125, -227.87596130371094, -220.83871459960938, -213.8014678955078, -206.76422119140625, -199.7269744873047, -192.68972778320312, -185.65249633789062, -178.61524963378906, -171.5780029296875, -164.54075622558594, -157.50350952148438, -150.4662628173828, -143.42901611328125, -136.39178466796875, -129.3545379638672, -122.31729125976562, -115.28004455566406, -108.2427978515625, -101.20557403564453, -94.16832733154297, -87.13108825683594, -80.09384155273438, -73.05659484863281, -66.01935577392578, -58.98210906982422, -51.94486618041992, -44.907623291015625, -37.87038040161133, -30.8331356048584, -23.79589080810547, -16.758647918701172, -9.721405029296875, -2.6841583251953125, 4.353084564208984, 11.390327453613281, 18.427570343017578, 25.464815139770508, 32.50205993652344, 39.539302825927734, 46.57654571533203, 53.613792419433594, 60.65103530883789, 67.68827819824219, 74.72552490234375, 81.76276397705078, 88.80001068115234, 95.83724975585938, 102.87449645996094, 109.9117431640625, 116.94898986816406, 123.9862289428711]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 5.0, 9.0, 4.0, 9.0, 7.0, 11.0, 12.0, 12.0, 20.0, 21.0, 22.0, 16.0, 21.0, 23.0, 34.0, 37.0, 19.0, 35.0, 44.0, 38.0, 39.0, 42.0, 46.0, 36.0, 39.0, 25.0, 44.0, 32.0, 39.0, 31.0, 28.0, 29.0, 25.0, 23.0, 27.0, 19.0, 8.0, 12.0, 15.0, 9.0, 7.0, 6.0, 6.0, 6.0, 4.0, 4.0, 1.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-99.8073959350586, -96.61431121826172, -93.42121887207031, -90.22813415527344, -87.03504180908203, -83.84195709228516, -80.64886474609375, -77.45578002929688, -74.2626953125, -71.06961059570312, -67.87651824951172, -64.68343353271484, -61.49034118652344, -58.29725646972656, -55.10416793823242, -51.91107940673828, -48.717987060546875, -45.524898529052734, -42.331809997558594, -39.13872528076172, -35.94563293457031, -32.75254821777344, -29.559459686279297, -26.366371154785156, -23.173282623291016, -19.980194091796875, -16.787105560302734, -13.594018936157227, -10.400930404663086, -7.207841873168945, -4.0147552490234375, -0.8216667175292969, 2.371429443359375, 5.564517498016357, 8.75760555267334, 11.950693130493164, 15.143781661987305, 18.336870193481445, 21.529956817626953, 24.723045349121094, 27.916133880615234, 31.109222412109375, 34.302310943603516, 37.495399475097656, 40.68848419189453, 43.88157653808594, 47.07466125488281, 50.26774978637695, 53.460838317871094, 56.653926849365234, 59.847015380859375, 63.04010009765625, 66.23319244384766, 69.42627716064453, 72.61936950683594, 75.81245422363281, 79.00553894042969, 82.19862365722656, 85.39171600341797, 88.58480072021484, 91.77789306640625, 94.97097778320312, 98.1640625, 101.3571548461914, 104.55024719238281]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 4.0, 4.0, 5.0, 6.0, 10.0, 9.0, 11.0, 9.0, 15.0, 13.0, 16.0, 18.0, 22.0, 32.0, 37.0, 34.0, 40.0, 45.0, 38.0, 31.0, 46.0, 51.0, 47.0, 46.0, 38.0, 37.0, 36.0, 30.0, 41.0, 27.0, 32.0, 22.0, 26.0, 26.0, 16.0, 16.0, 20.0, 11.0, 7.0, 8.0, 7.0, 3.0, 5.0, 7.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.2421875, -10.9129638671875, -10.583740234375, -10.2545166015625, -9.92529296875, -9.5960693359375, -9.266845703125, -8.9376220703125, -8.6083984375, -8.2791748046875, -7.949951171875, -7.6207275390625, -7.29150390625, -6.9622802734375, -6.633056640625, -6.3038330078125, -5.974609375, -5.6453857421875, -5.316162109375, -4.9869384765625, -4.65771484375, -4.3284912109375, -3.999267578125, -3.6700439453125, -3.3408203125, -3.0115966796875, -2.682373046875, -2.3531494140625, -2.02392578125, -1.6947021484375, -1.365478515625, -1.0362548828125, -0.70703125, -0.3778076171875, -0.048583984375, 0.2806396484375, 0.60986328125, 0.9390869140625, 1.268310546875, 1.5975341796875, 1.9267578125, 2.2559814453125, 2.585205078125, 2.9144287109375, 3.24365234375, 3.5728759765625, 3.902099609375, 4.2313232421875, 4.560546875, 4.8897705078125, 5.218994140625, 5.5482177734375, 5.87744140625, 6.2066650390625, 6.535888671875, 6.8651123046875, 7.1943359375, 7.5235595703125, 7.852783203125, 8.1820068359375, 8.51123046875, 8.8404541015625, 9.169677734375, 9.4989013671875, 9.828125]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 9.0, 9.0, 16.0, 19.0, 22.0, 36.0, 54.0, 63.0, 92.0, 146.0, 222.0, 398.0, 727.0, 1334.0, 2598.0, 5513.0, 12935.0, 32930.0, 103668.0, 395661.0, 1208945.0, 1531387.0, 637945.0, 174378.0, 51265.0, 18713.0, 7815.0, 3436.0, 1748.0, 891.0, 461.0, 276.0, 196.0, 114.0, 76.0, 51.0, 41.0, 34.0, 13.0, 16.0, 11.0, 6.0, 3.0, 7.0, 6.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.9453125, -12.543212890625, -12.14111328125, -11.739013671875, -11.3369140625, -10.934814453125, -10.53271484375, -10.130615234375, -9.728515625, -9.326416015625, -8.92431640625, -8.522216796875, -8.1201171875, -7.718017578125, -7.31591796875, -6.913818359375, -6.51171875, -6.109619140625, -5.70751953125, -5.305419921875, -4.9033203125, -4.501220703125, -4.09912109375, -3.697021484375, -3.294921875, -2.892822265625, -2.49072265625, -2.088623046875, -1.6865234375, -1.284423828125, -0.88232421875, -0.480224609375, -0.078125, 0.323974609375, 0.72607421875, 1.128173828125, 1.5302734375, 1.932373046875, 2.33447265625, 2.736572265625, 3.138671875, 3.540771484375, 3.94287109375, 4.344970703125, 4.7470703125, 5.149169921875, 5.55126953125, 5.953369140625, 6.35546875, 6.757568359375, 7.15966796875, 7.561767578125, 7.9638671875, 8.365966796875, 8.76806640625, 9.170166015625, 9.572265625, 9.974365234375, 10.37646484375, 10.778564453125, 11.1806640625, 11.582763671875, 11.98486328125, 12.386962890625, 12.7890625]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 3.0, 0.0, 2.0, 6.0, 2.0, 8.0, 10.0, 14.0, 21.0, 23.0, 33.0, 30.0, 44.0, 68.0, 85.0, 103.0, 140.0, 183.0, 210.0, 286.0, 367.0, 384.0, 429.0, 390.0, 267.0, 233.0, 172.0, 136.0, 98.0, 81.0, 69.0, 43.0, 37.0, 28.0, 29.0, 8.0, 10.0, 10.0, 10.0, 5.0, 2.0, 2.0, 2.0, 1.0], "bins": [-31.828125, -31.070068359375, -30.31201171875, -29.553955078125, -28.7958984375, -28.037841796875, -27.27978515625, -26.521728515625, -25.763671875, -25.005615234375, -24.24755859375, -23.489501953125, -22.7314453125, -21.973388671875, -21.21533203125, -20.457275390625, -19.69921875, -18.941162109375, -18.18310546875, -17.425048828125, -16.6669921875, -15.908935546875, -15.15087890625, -14.392822265625, -13.634765625, -12.876708984375, -12.11865234375, -11.360595703125, -10.6025390625, -9.844482421875, -9.08642578125, -8.328369140625, -7.5703125, -6.812255859375, -6.05419921875, -5.296142578125, -4.5380859375, -3.780029296875, -3.02197265625, -2.263916015625, -1.505859375, -0.747802734375, 0.01025390625, 0.768310546875, 1.5263671875, 2.284423828125, 3.04248046875, 3.800537109375, 4.55859375, 5.316650390625, 6.07470703125, 6.832763671875, 7.5908203125, 8.348876953125, 9.10693359375, 9.864990234375, 10.623046875, 11.381103515625, 12.13916015625, 12.897216796875, 13.6552734375, 14.413330078125, 15.17138671875, 15.929443359375, 16.6875]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 6.0, 8.0, 15.0, 11.0, 12.0, 28.0, 43.0, 41.0, 54.0, 64.0, 105.0, 148.0, 228.0, 429.0, 1569.0, 31187.0, 3752280.0, 401057.0, 5264.0, 734.0, 325.0, 174.0, 124.0, 94.0, 69.0, 42.0, 36.0, 37.0, 23.0, 14.0, 18.0, 14.0, 7.0, 6.0, 5.0, 2.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-81.4375, -78.275390625, -75.11328125, -71.951171875, -68.7890625, -65.626953125, -62.46484375, -59.302734375, -56.140625, -52.978515625, -49.81640625, -46.654296875, -43.4921875, -40.330078125, -37.16796875, -34.005859375, -30.84375, -27.681640625, -24.51953125, -21.357421875, -18.1953125, -15.033203125, -11.87109375, -8.708984375, -5.546875, -2.384765625, 0.77734375, 3.939453125, 7.1015625, 10.263671875, 13.42578125, 16.587890625, 19.75, 22.912109375, 26.07421875, 29.236328125, 32.3984375, 35.560546875, 38.72265625, 41.884765625, 45.046875, 48.208984375, 51.37109375, 54.533203125, 57.6953125, 60.857421875, 64.01953125, 67.181640625, 70.34375, 73.505859375, 76.66796875, 79.830078125, 82.9921875, 86.154296875, 89.31640625, 92.478515625, 95.640625, 98.802734375, 101.96484375, 105.126953125, 108.2890625, 111.451171875, 114.61328125, 117.775390625, 120.9375]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 7.0, 22.0, 76.0, 190.0, 319.0, 248.0, 112.0, 33.0, 6.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-313.3824768066406, -303.8031311035156, -294.2237854003906, -284.6444091796875, -275.0650634765625, -265.4857177734375, -255.9063720703125, -246.3270263671875, -236.74766540527344, -227.16831970214844, -217.58895874023438, -208.00961303710938, -198.43026733398438, -188.8509063720703, -179.2715606689453, -169.69219970703125, -160.11285400390625, -150.53350830078125, -140.9541473388672, -131.3748016357422, -121.79544830322266, -112.21609497070312, -102.63674926757812, -93.0573959350586, -83.47804260253906, -73.89868927001953, -64.3193359375, -54.739990234375, -45.16063690185547, -35.58128356933594, -26.001934051513672, -16.422584533691406, -6.84326171875, 2.7360897064208984, 12.315441131591797, 21.894792556762695, 31.474143981933594, 41.053497314453125, 50.63284683227539, 60.212196350097656, 69.79154968261719, 79.37090301513672, 88.95025634765625, 98.52960205078125, 108.10895538330078, 117.68830871582031, 127.26765441894531, 136.84701538085938, 146.42636108398438, 156.00570678710938, 165.58506774902344, 175.16441345214844, 184.7437744140625, 194.3231201171875, 203.9024658203125, 213.4818115234375, 223.06117248535156, 232.64051818847656, 242.21987915039062, 251.79922485351562, 261.3785705566406, 270.95794677734375, 280.53729248046875, 290.11663818359375, 299.69598388671875]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 5.0, 3.0, 9.0, 4.0, 8.0, 6.0, 12.0, 16.0, 23.0, 21.0, 21.0, 20.0, 20.0, 31.0, 28.0, 33.0, 42.0, 51.0, 35.0, 33.0, 37.0, 44.0, 51.0, 40.0, 40.0, 33.0, 35.0, 34.0, 34.0, 34.0, 24.0, 23.0, 18.0, 21.0, 25.0, 18.0, 9.0, 12.0, 11.0, 11.0, 6.0, 5.0, 5.0, 4.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-82.7083740234375, -79.97003173828125, -77.23168182373047, -74.49333953857422, -71.75499725341797, -69.01664733886719, -66.27830505371094, -63.53996276855469, -60.80161666870117, -58.063270568847656, -55.324928283691406, -52.58658218383789, -49.848236083984375, -47.109893798828125, -44.37154769897461, -41.633201599121094, -38.894859313964844, -36.15651321411133, -33.41817092895508, -30.679824829101562, -27.94148063659668, -25.203136444091797, -22.46479034423828, -19.7264461517334, -16.988101959228516, -14.249757766723633, -11.511412620544434, -8.773067474365234, -6.034723281860352, -3.2963790893554688, -0.5580329895019531, 2.1803112030029297, 4.918663024902344, 7.657007694244385, 10.395352363586426, 13.133697509765625, 15.872041702270508, 18.61038589477539, 21.348731994628906, 24.08707618713379, 26.825420379638672, 29.563764572143555, 32.30210876464844, 35.04045486450195, 37.77880096435547, 40.51714324951172, 43.255489349365234, 45.99383544921875, 48.732177734375, 51.470523834228516, 54.208866119384766, 56.94721221923828, 59.68555450439453, 62.42390060424805, 65.16224670410156, 67.90058898925781, 70.63893127441406, 73.37727355957031, 76.1156234741211, 78.85396575927734, 81.5923080444336, 84.33065795898438, 87.06900024414062, 89.80734252929688, 92.54569244384766]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 5.0, 7.0, 9.0, 7.0, 8.0, 9.0, 13.0, 14.0, 22.0, 21.0, 23.0, 22.0, 34.0, 32.0, 41.0, 39.0, 34.0, 39.0, 42.0, 50.0, 46.0, 41.0, 45.0, 28.0, 41.0, 36.0, 40.0, 31.0, 38.0, 29.0, 22.0, 19.0, 21.0, 12.0, 16.0, 16.0, 15.0, 9.0, 5.0, 5.0, 4.0, 5.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.5390625, -11.1961669921875, -10.853271484375, -10.5103759765625, -10.16748046875, -9.8245849609375, -9.481689453125, -9.1387939453125, -8.7958984375, -8.4530029296875, -8.110107421875, -7.7672119140625, -7.42431640625, -7.0814208984375, -6.738525390625, -6.3956298828125, -6.052734375, -5.7098388671875, -5.366943359375, -5.0240478515625, -4.68115234375, -4.3382568359375, -3.995361328125, -3.6524658203125, -3.3095703125, -2.9666748046875, -2.623779296875, -2.2808837890625, -1.93798828125, -1.5950927734375, -1.252197265625, -0.9093017578125, -0.56640625, -0.2235107421875, 0.119384765625, 0.4622802734375, 0.80517578125, 1.1480712890625, 1.490966796875, 1.8338623046875, 2.1767578125, 2.5196533203125, 2.862548828125, 3.2054443359375, 3.54833984375, 3.8912353515625, 4.234130859375, 4.5770263671875, 4.919921875, 5.2628173828125, 5.605712890625, 5.9486083984375, 6.29150390625, 6.6343994140625, 6.977294921875, 7.3201904296875, 7.6630859375, 8.0059814453125, 8.348876953125, 8.6917724609375, 9.03466796875, 9.3775634765625, 9.720458984375, 10.0633544921875, 10.40625]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 4.0, 4.0, 6.0, 12.0, 22.0, 33.0, 37.0, 76.0, 95.0, 159.0, 208.0, 320.0, 485.0, 709.0, 976.0, 1583.0, 2367.0, 3427.0, 5207.0, 7868.0, 11451.0, 17088.0, 25878.0, 39433.0, 60666.0, 93376.0, 141719.0, 189255.0, 151423.0, 101000.0, 65657.0, 43045.0, 28234.0, 18932.0, 12358.0, 8333.0, 5591.0, 3841.0, 2536.0, 1695.0, 1089.0, 791.0, 533.0, 338.0, 239.0, 150.0, 122.0, 64.0, 46.0, 31.0, 15.0, 14.0, 9.0, 5.0, 2.0, 1.0, 9.0, 0.0, 0.0, 2.0], "bins": [-1.072265625, -1.038330078125, -1.00439453125, -0.970458984375, -0.9365234375, -0.902587890625, -0.86865234375, -0.834716796875, -0.80078125, -0.766845703125, -0.73291015625, -0.698974609375, -0.6650390625, -0.631103515625, -0.59716796875, -0.563232421875, -0.529296875, -0.495361328125, -0.46142578125, -0.427490234375, -0.3935546875, -0.359619140625, -0.32568359375, -0.291748046875, -0.2578125, -0.223876953125, -0.18994140625, -0.156005859375, -0.1220703125, -0.088134765625, -0.05419921875, -0.020263671875, 0.013671875, 0.047607421875, 0.08154296875, 0.115478515625, 0.1494140625, 0.183349609375, 0.21728515625, 0.251220703125, 0.28515625, 0.319091796875, 0.35302734375, 0.386962890625, 0.4208984375, 0.454833984375, 0.48876953125, 0.522705078125, 0.556640625, 0.590576171875, 0.62451171875, 0.658447265625, 0.6923828125, 0.726318359375, 0.76025390625, 0.794189453125, 0.828125, 0.862060546875, 0.89599609375, 0.929931640625, 0.9638671875, 0.997802734375, 1.03173828125, 1.065673828125, 1.099609375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [5.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 3.0, 3.0, 3.0, 2.0, 7.0, 11.0, 9.0, 8.0, 14.0, 13.0, 14.0, 15.0, 26.0, 24.0, 18.0, 25.0, 30.0, 27.0, 36.0, 29.0, 39.0, 37.0, 40.0, 35.0, 33.0, 1052.0, 35.0, 24.0, 37.0, 45.0, 29.0, 18.0, 34.0, 32.0, 20.0, 22.0, 28.0, 25.0, 13.0, 18.0, 15.0, 10.0, 5.0, 10.0, 15.0, 8.0, 3.0, 4.0, 7.0, 3.0, 4.0, 3.0, 3.0, 3.0, 0.0, 2.0], "bins": [-6.08984375, -5.9058837890625, -5.721923828125, -5.5379638671875, -5.35400390625, -5.1700439453125, -4.986083984375, -4.8021240234375, -4.6181640625, -4.4342041015625, -4.250244140625, -4.0662841796875, -3.88232421875, -3.6983642578125, -3.514404296875, -3.3304443359375, -3.146484375, -2.9625244140625, -2.778564453125, -2.5946044921875, -2.41064453125, -2.2266845703125, -2.042724609375, -1.8587646484375, -1.6748046875, -1.4908447265625, -1.306884765625, -1.1229248046875, -0.93896484375, -0.7550048828125, -0.571044921875, -0.3870849609375, -0.203125, -0.0191650390625, 0.164794921875, 0.3487548828125, 0.53271484375, 0.7166748046875, 0.900634765625, 1.0845947265625, 1.2685546875, 1.4525146484375, 1.636474609375, 1.8204345703125, 2.00439453125, 2.1883544921875, 2.372314453125, 2.5562744140625, 2.740234375, 2.9241943359375, 3.108154296875, 3.2921142578125, 3.47607421875, 3.6600341796875, 3.843994140625, 4.0279541015625, 4.2119140625, 4.3958740234375, 4.579833984375, 4.7637939453125, 4.94775390625, 5.1317138671875, 5.315673828125, 5.4996337890625, 5.68359375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 3.0, 7.0, 12.0, 18.0, 17.0, 48.0, 48.0, 86.0, 122.0, 160.0, 243.0, 368.0, 551.0, 801.0, 1237.0, 1803.0, 2587.0, 4060.0, 5945.0, 8960.0, 13562.0, 20833.0, 31800.0, 48952.0, 75438.0, 117791.0, 195848.0, 1214538.0, 123064.0, 79173.0, 51398.0, 33458.0, 21910.0, 14210.0, 9377.0, 6110.0, 4214.0, 2760.0, 1766.0, 1236.0, 835.0, 575.0, 395.0, 264.0, 176.0, 118.0, 112.0, 53.0, 35.0, 22.0, 11.0, 12.0, 7.0, 4.0, 6.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.0166015625, -0.9827728271484375, -0.948944091796875, -0.9151153564453125, -0.88128662109375, -0.8474578857421875, -0.813629150390625, -0.7798004150390625, -0.7459716796875, -0.7121429443359375, -0.678314208984375, -0.6444854736328125, -0.61065673828125, -0.5768280029296875, -0.542999267578125, -0.5091705322265625, -0.475341796875, -0.4415130615234375, -0.407684326171875, -0.3738555908203125, -0.34002685546875, -0.3061981201171875, -0.272369384765625, -0.2385406494140625, -0.2047119140625, -0.1708831787109375, -0.137054443359375, -0.1032257080078125, -0.06939697265625, -0.0355682373046875, -0.001739501953125, 0.0320892333984375, 0.06591796875, 0.0997467041015625, 0.133575439453125, 0.1674041748046875, 0.20123291015625, 0.2350616455078125, 0.268890380859375, 0.3027191162109375, 0.3365478515625, 0.3703765869140625, 0.404205322265625, 0.4380340576171875, 0.47186279296875, 0.5056915283203125, 0.539520263671875, 0.5733489990234375, 0.607177734375, 0.6410064697265625, 0.674835205078125, 0.7086639404296875, 0.74249267578125, 0.7763214111328125, 0.810150146484375, 0.8439788818359375, 0.8778076171875, 0.9116363525390625, 0.945465087890625, 0.9792938232421875, 1.01312255859375, 1.0469512939453125, 1.080780029296875, 1.1146087646484375, 1.1484375]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 4.0, 2.0, 6.0, 4.0, 6.0, 4.0, 8.0, 7.0, 11.0, 18.0, 17.0, 26.0, 30.0, 24.0, 41.0, 56.0, 62.0, 76.0, 93.0, 78.0, 78.0, 44.0, 48.0, 40.0, 43.0, 48.0, 16.0, 27.0, 17.0, 15.0, 17.0, 7.0, 12.0, 1.0, 2.0, 3.0, 2.0, 0.0, 3.0, 1.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0033626556396484375, -0.0032573938369750977, -0.003152132034301758, -0.003046870231628418, -0.002941608428955078, -0.0028363466262817383, -0.0027310848236083984, -0.0026258230209350586, -0.0025205612182617188, -0.002415299415588379, -0.002310037612915039, -0.0022047758102416992, -0.0020995140075683594, -0.0019942522048950195, -0.0018889904022216797, -0.0017837285995483398, -0.001678466796875, -0.0015732049942016602, -0.0014679431915283203, -0.0013626813888549805, -0.0012574195861816406, -0.0011521577835083008, -0.001046895980834961, -0.0009416341781616211, -0.0008363723754882812, -0.0007311105728149414, -0.0006258487701416016, -0.0005205869674682617, -0.0004153251647949219, -0.00031006336212158203, -0.0002048015594482422, -9.953975677490234e-05, 5.7220458984375e-06, 0.00011098384857177734, 0.0002162456512451172, 0.00032150745391845703, 0.0004267692565917969, 0.0005320310592651367, 0.0006372928619384766, 0.0007425546646118164, 0.0008478164672851562, 0.0009530782699584961, 0.001058340072631836, 0.0011636018753051758, 0.0012688636779785156, 0.0013741254806518555, 0.0014793872833251953, 0.0015846490859985352, 0.001689910888671875, 0.0017951726913452148, 0.0019004344940185547, 0.0020056962966918945, 0.0021109580993652344, 0.0022162199020385742, 0.002321481704711914, 0.002426743507385254, 0.0025320053100585938, 0.0026372671127319336, 0.0027425289154052734, 0.0028477907180786133, 0.002953052520751953, 0.003058314323425293, 0.003163576126098633, 0.0032688379287719727, 0.0033740997314453125]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 1.0, 5.0, 7.0, 4.0, 13.0, 15.0, 14.0, 26.0, 30.0, 40.0, 66.0, 106.0, 151.0, 316.0, 736.0, 29301.0, 1015381.0, 1332.0, 443.0, 211.0, 107.0, 67.0, 40.0, 39.0, 21.0, 11.0, 9.0, 9.0, 9.0, 9.0, 3.0, 6.0, 5.0, 4.0, 4.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.071044921875, -0.06852531433105469, -0.06600570678710938, -0.06348609924316406, -0.06096649169921875, -0.05844688415527344, -0.055927276611328125, -0.05340766906738281, -0.0508880615234375, -0.04836845397949219, -0.045848846435546875, -0.04332923889160156, -0.04080963134765625, -0.03829002380371094, -0.035770416259765625, -0.03325080871582031, -0.030731201171875, -0.028211593627929688, -0.025691986083984375, -0.023172378540039062, -0.02065277099609375, -0.018133163452148438, -0.015613555908203125, -0.013093948364257812, -0.0105743408203125, -0.008054733276367188, -0.005535125732421875, -0.0030155181884765625, -0.00049591064453125, 0.0020236968994140625, 0.004543304443359375, 0.0070629119873046875, 0.00958251953125, 0.012102127075195312, 0.014621734619140625, 0.017141342163085938, 0.01966094970703125, 0.022180557250976562, 0.024700164794921875, 0.027219772338867188, 0.0297393798828125, 0.03225898742675781, 0.034778594970703125, 0.03729820251464844, 0.03981781005859375, 0.04233741760253906, 0.044857025146484375, 0.04737663269042969, 0.049896240234375, 0.05241584777832031, 0.054935455322265625, 0.05745506286621094, 0.05997467041015625, 0.06249427795410156, 0.06501388549804688, 0.06753349304199219, 0.0700531005859375, 0.07257270812988281, 0.07509231567382812, 0.07761192321777344, 0.08013153076171875, 0.08265113830566406, 0.08517074584960938, 0.08769035339355469, 0.0902099609375]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 13.0, 260.0, 688.0, 52.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004649254493415356, -0.004322778899222612, -0.003996303305029869, -0.0036698277108371258, -0.0033433521166443825, -0.003016876522451639, -0.002690400928258896, -0.0023639253340661526, -0.0020374497398734093, -0.001710974145680666, -0.0013844985514879227, -0.0010580229572951794, -0.0007315473631024361, -0.00040507176890969276, -7.859617471694946e-05, 0.00024787941947579384, 0.0005743550136685371, 0.0009008306078612804, 0.0012273062020540237, 0.001553781796246767, 0.0018802573904395103, 0.0022067329846322536, 0.002533208578824997, 0.0028596841730177402, 0.0031861597672104836, 0.003512635361403227, 0.00383911095559597, 0.0041655865497887135, 0.004492062143981457, 0.0048185377381742, 0.005145013332366943, 0.005471488926559687, 0.005797963589429855, 0.006124439183622599, 0.006450914777815342, 0.006777390372008085, 0.0071038659662008286, 0.007430341560393572, 0.007756817154586315, 0.008083293214440346, 0.008409768342971802, 0.008736243471503258, 0.009062719531357288, 0.009389195591211319, 0.009715670719742775, 0.010042145848274231, 0.010368621908128262, 0.010695097967982292, 0.011021573096513748, 0.011348048225045204, 0.011674524284899235, 0.012001000344753265, 0.012327475473284721, 0.012653950601816177, 0.012980426661670208, 0.013306902721524239, 0.013633377850055695, 0.01395985297858715, 0.014286329038441181, 0.014612805098295212, 0.014939280226826668, 0.015265755355358124, 0.015592231415212154, 0.015918707475066185, 0.01624518260359764]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 2.0, 7.0, 5.0, 10.0, 10.0, 8.0, 16.0, 21.0, 18.0, 14.0, 26.0, 18.0, 33.0, 37.0, 38.0, 50.0, 34.0, 27.0, 43.0, 29.0, 40.0, 34.0, 49.0, 45.0, 45.0, 37.0, 39.0, 28.0, 28.0, 27.0, 31.0, 14.0, 29.0, 20.0, 21.0, 18.0, 5.0, 13.0, 7.0, 9.0, 5.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.001759648323059082, -0.0017096661031246185, -0.001659683883190155, -0.0016097016632556915, -0.001559719443321228, -0.0015097372233867645, -0.001459755003452301, -0.0014097727835178375, -0.001359790563583374, -0.0013098083436489105, -0.001259826123714447, -0.0012098439037799835, -0.00115986168384552, -0.0011098794639110565, -0.001059897243976593, -0.0010099150240421295, -0.000959932804107666, -0.0009099505841732025, -0.000859968364238739, -0.0008099861443042755, -0.000760003924369812, -0.0007100217044353485, -0.000660039484500885, -0.0006100572645664215, -0.000560075044631958, -0.0005100928246974945, -0.000460110604763031, -0.0004101283848285675, -0.000360146164894104, -0.0003101639449596405, -0.000260181725025177, -0.0002101995050907135, -0.00016021728515625, -0.0001102350652217865, -6.0252845287323e-05, -1.0270625352859497e-05, 3.9711594581604004e-05, 8.96938145160675e-05, 0.000139676034450531, 0.0001896582543849945, 0.000239640474319458, 0.0002896226942539215, 0.000339604914188385, 0.0003895871341228485, 0.000439569354057312, 0.0004895515739917755, 0.000539533793926239, 0.0005895160138607025, 0.000639498233795166, 0.0006894804537296295, 0.000739462673664093, 0.0007894448935985565, 0.00083942711353302, 0.0008894093334674835, 0.000939391553401947, 0.0009893737733364105, 0.001039355993270874, 0.0010893382132053375, 0.001139320433139801, 0.0011893026530742645, 0.001239284873008728, 0.0012892670929431915, 0.001339249312877655, 0.0013892315328121185, 0.001439213752746582]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 5.0, 7.0, 9.0, 7.0, 8.0, 9.0, 13.0, 14.0, 22.0, 21.0, 23.0, 22.0, 34.0, 32.0, 41.0, 39.0, 34.0, 39.0, 42.0, 49.0, 47.0, 41.0, 45.0, 28.0, 41.0, 36.0, 40.0, 31.0, 38.0, 29.0, 22.0, 19.0, 21.0, 12.0, 16.0, 16.0, 15.0, 9.0, 5.0, 5.0, 4.0, 5.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.5390625, -11.1961669921875, -10.853271484375, -10.5103759765625, -10.16748046875, -9.8245849609375, -9.481689453125, -9.1387939453125, -8.7958984375, -8.4530029296875, -8.110107421875, -7.7672119140625, -7.42431640625, -7.0814208984375, -6.738525390625, -6.3956298828125, -6.052734375, -5.7098388671875, -5.366943359375, -5.0240478515625, -4.68115234375, -4.3382568359375, -3.995361328125, -3.6524658203125, -3.3095703125, -2.9666748046875, -2.623779296875, -2.2808837890625, -1.93798828125, -1.5950927734375, -1.252197265625, -0.9093017578125, -0.56640625, -0.2235107421875, 0.119384765625, 0.4622802734375, 0.80517578125, 1.1480712890625, 1.490966796875, 1.8338623046875, 2.1767578125, 2.5196533203125, 2.862548828125, 3.2054443359375, 3.54833984375, 3.8912353515625, 4.234130859375, 4.5770263671875, 4.919921875, 5.2628173828125, 5.605712890625, 5.9486083984375, 6.29150390625, 6.6343994140625, 6.977294921875, 7.3201904296875, 7.6630859375, 8.0059814453125, 8.348876953125, 8.6917724609375, 9.03466796875, 9.3775634765625, 9.720458984375, 10.0633544921875, 10.40625]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 5.0, 5.0, 10.0, 10.0, 12.0, 23.0, 23.0, 29.0, 47.0, 88.0, 121.0, 244.0, 397.0, 689.0, 1288.0, 2656.0, 5321.0, 11175.0, 25841.0, 69926.0, 262464.0, 468814.0, 126430.0, 41232.0, 16384.0, 7595.0, 3605.0, 1865.0, 967.0, 533.0, 298.0, 161.0, 86.0, 60.0, 52.0, 28.0, 30.0, 13.0, 10.0, 7.0, 8.0, 5.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.7890625, -9.4854736328125, -9.181884765625, -8.8782958984375, -8.57470703125, -8.2711181640625, -7.967529296875, -7.6639404296875, -7.3603515625, -7.0567626953125, -6.753173828125, -6.4495849609375, -6.14599609375, -5.8424072265625, -5.538818359375, -5.2352294921875, -4.931640625, -4.6280517578125, -4.324462890625, -4.0208740234375, -3.71728515625, -3.4136962890625, -3.110107421875, -2.8065185546875, -2.5029296875, -2.1993408203125, -1.895751953125, -1.5921630859375, -1.28857421875, -0.9849853515625, -0.681396484375, -0.3778076171875, -0.07421875, 0.2293701171875, 0.532958984375, 0.8365478515625, 1.14013671875, 1.4437255859375, 1.747314453125, 2.0509033203125, 2.3544921875, 2.6580810546875, 2.961669921875, 3.2652587890625, 3.56884765625, 3.8724365234375, 4.176025390625, 4.4796142578125, 4.783203125, 5.0867919921875, 5.390380859375, 5.6939697265625, 5.99755859375, 6.3011474609375, 6.604736328125, 6.9083251953125, 7.2119140625, 7.5155029296875, 7.819091796875, 8.1226806640625, 8.42626953125, 8.7298583984375, 9.033447265625, 9.3370361328125, 9.640625]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 2.0, 7.0, 2.0, 7.0, 7.0, 11.0, 6.0, 12.0, 16.0, 15.0, 17.0, 18.0, 30.0, 35.0, 48.0, 49.0, 51.0, 53.0, 57.0, 75.0, 173.0, 1651.0, 191.0, 69.0, 59.0, 53.0, 45.0, 55.0, 39.0, 28.0, 26.0, 19.0, 21.0, 24.0, 19.0, 7.0, 19.0, 13.0, 6.0, 5.0, 5.0, 8.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-46.59375, -45.31640625, -44.0390625, -42.76171875, -41.484375, -40.20703125, -38.9296875, -37.65234375, -36.375, -35.09765625, -33.8203125, -32.54296875, -31.265625, -29.98828125, -28.7109375, -27.43359375, -26.15625, -24.87890625, -23.6015625, -22.32421875, -21.046875, -19.76953125, -18.4921875, -17.21484375, -15.9375, -14.66015625, -13.3828125, -12.10546875, -10.828125, -9.55078125, -8.2734375, -6.99609375, -5.71875, -4.44140625, -3.1640625, -1.88671875, -0.609375, 0.66796875, 1.9453125, 3.22265625, 4.5, 5.77734375, 7.0546875, 8.33203125, 9.609375, 10.88671875, 12.1640625, 13.44140625, 14.71875, 15.99609375, 17.2734375, 18.55078125, 19.828125, 21.10546875, 22.3828125, 23.66015625, 24.9375, 26.21484375, 27.4921875, 28.76953125, 30.046875, 31.32421875, 32.6015625, 33.87890625, 35.15625]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 7.0, 9.0, 6.0, 14.0, 19.0, 19.0, 23.0, 31.0, 38.0, 41.0, 60.0, 95.0, 147.0, 174.0, 269.0, 499.0, 1844.0, 43413.0, 3080469.0, 15937.0, 1321.0, 412.0, 248.0, 171.0, 115.0, 82.0, 46.0, 43.0, 34.0, 28.0, 27.0, 13.0, 13.0, 7.0, 7.0, 10.0, 7.0, 0.0, 4.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-89.625, -86.7705078125, -83.916015625, -81.0615234375, -78.20703125, -75.3525390625, -72.498046875, -69.6435546875, -66.7890625, -63.9345703125, -61.080078125, -58.2255859375, -55.37109375, -52.5166015625, -49.662109375, -46.8076171875, -43.953125, -41.0986328125, -38.244140625, -35.3896484375, -32.53515625, -29.6806640625, -26.826171875, -23.9716796875, -21.1171875, -18.2626953125, -15.408203125, -12.5537109375, -9.69921875, -6.8447265625, -3.990234375, -1.1357421875, 1.71875, 4.5732421875, 7.427734375, 10.2822265625, 13.13671875, 15.9912109375, 18.845703125, 21.7001953125, 24.5546875, 27.4091796875, 30.263671875, 33.1181640625, 35.97265625, 38.8271484375, 41.681640625, 44.5361328125, 47.390625, 50.2451171875, 53.099609375, 55.9541015625, 58.80859375, 61.6630859375, 64.517578125, 67.3720703125, 70.2265625, 73.0810546875, 75.935546875, 78.7900390625, 81.64453125, 84.4990234375, 87.353515625, 90.2080078125, 93.0625]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 7.0, 19.0, 108.0, 332.0, 367.0, 151.0, 26.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.08236694335938, -129.04515075683594, -125.00794219970703, -120.9707260131836, -116.93351745605469, -112.89630126953125, -108.85908508300781, -104.8218765258789, -100.78466796875, -96.74745178222656, -92.71024322509766, -88.67302703857422, -84.63581848144531, -80.59860229492188, -76.56138610839844, -72.52417755126953, -68.4869613647461, -64.44974517822266, -60.41253662109375, -56.37532043457031, -52.338111877441406, -48.30089569091797, -44.2636833190918, -40.226470947265625, -36.18925857543945, -32.15204620361328, -28.11483383178711, -24.077619552612305, -20.040407180786133, -16.00319480895996, -11.965980529785156, -7.928768157958984, -3.8915481567382812, 0.14566469192504883, 4.182877540588379, 8.220090866088867, 12.257303237915039, 16.29451560974121, 20.331729888916016, 24.368942260742188, 28.40615463256836, 32.44336700439453, 36.4805793762207, 40.517791748046875, 44.55500793457031, 48.59221649169922, 52.629432678222656, 56.66664505004883, 60.703857421875, 64.74107360839844, 68.77828216552734, 72.81549835205078, 76.85270690917969, 80.88992309570312, 84.92713928222656, 88.96434783935547, 93.00155639648438, 97.03877258300781, 101.07598114013672, 105.11319732666016, 109.15040588378906, 113.1876220703125, 117.22483825683594, 121.26204681396484, 125.29926300048828]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 4.0, 4.0, 7.0, 4.0, 5.0, 11.0, 14.0, 6.0, 12.0, 18.0, 23.0, 25.0, 23.0, 22.0, 28.0, 44.0, 27.0, 33.0, 39.0, 44.0, 42.0, 33.0, 32.0, 32.0, 36.0, 43.0, 30.0, 35.0, 28.0, 39.0, 33.0, 39.0, 23.0, 21.0, 18.0, 22.0, 12.0, 14.0, 14.0, 14.0, 9.0, 6.0, 7.0, 8.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0], "bins": [-85.38558197021484, -82.61650848388672, -79.84744262695312, -77.078369140625, -74.30929565429688, -71.54022979736328, -68.77115631103516, -66.00209045410156, -63.23301696777344, -60.46394729614258, -57.69487762451172, -54.925804138183594, -52.156734466552734, -49.387664794921875, -46.61859130859375, -43.84952163696289, -41.08045196533203, -38.31138229370117, -35.54231262207031, -32.77323913574219, -30.004169464111328, -27.23509979248047, -24.466028213500977, -21.696956634521484, -18.927886962890625, -16.158817291259766, -13.389745712280273, -10.620675086975098, -7.851604461669922, -5.082533836364746, -2.3134632110595703, 0.4556083679199219, 3.2246856689453125, 5.993756294250488, 8.762826919555664, 11.53189754486084, 14.300968170166016, 17.070037841796875, 19.839109420776367, 22.60818099975586, 25.37725067138672, 28.146320343017578, 30.91539192199707, 33.68446350097656, 36.45353317260742, 39.22260284423828, 41.991676330566406, 44.760746002197266, 47.529815673828125, 50.298885345458984, 53.067955017089844, 55.83702850341797, 58.60609817504883, 61.37516784667969, 64.14424133300781, 66.91331481933594, 69.68238067626953, 72.45145416259766, 75.22052001953125, 77.98959350585938, 80.7586669921875, 83.5277328491211, 86.29680633544922, 89.06587219238281, 91.83494567871094]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 8.0, 5.0, 4.0, 7.0, 7.0, 9.0, 7.0, 13.0, 14.0, 22.0, 21.0, 18.0, 27.0, 27.0, 34.0, 34.0, 36.0, 38.0, 41.0, 42.0, 44.0, 42.0, 45.0, 46.0, 36.0, 35.0, 36.0, 38.0, 44.0, 36.0, 26.0, 25.0, 22.0, 18.0, 23.0, 13.0, 9.0, 20.0, 12.0, 7.0, 5.0, 5.0, 3.0, 4.0, 0.0, 0.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.734375, -11.3800048828125, -11.025634765625, -10.6712646484375, -10.31689453125, -9.9625244140625, -9.608154296875, -9.2537841796875, -8.8994140625, -8.5450439453125, -8.190673828125, -7.8363037109375, -7.48193359375, -7.1275634765625, -6.773193359375, -6.4188232421875, -6.064453125, -5.7100830078125, -5.355712890625, -5.0013427734375, -4.64697265625, -4.2926025390625, -3.938232421875, -3.5838623046875, -3.2294921875, -2.8751220703125, -2.520751953125, -2.1663818359375, -1.81201171875, -1.4576416015625, -1.103271484375, -0.7489013671875, -0.39453125, -0.0401611328125, 0.314208984375, 0.6685791015625, 1.02294921875, 1.3773193359375, 1.731689453125, 2.0860595703125, 2.4404296875, 2.7947998046875, 3.149169921875, 3.5035400390625, 3.85791015625, 4.2122802734375, 4.566650390625, 4.9210205078125, 5.275390625, 5.6297607421875, 5.984130859375, 6.3385009765625, 6.69287109375, 7.0472412109375, 7.401611328125, 7.7559814453125, 8.1103515625, 8.4647216796875, 8.819091796875, 9.1734619140625, 9.52783203125, 9.8822021484375, 10.236572265625, 10.5909423828125, 10.9453125]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 6.0, 3.0, 4.0, 7.0, 2.0, 5.0, 10.0, 6.0, 14.0, 15.0, 17.0, 9.0, 26.0, 30.0, 25.0, 49.0, 53.0, 57.0, 89.0, 103.0, 174.0, 385.0, 1779.0, 26958.0, 2401920.0, 1740852.0, 19218.0, 1443.0, 376.0, 160.0, 94.0, 66.0, 59.0, 41.0, 42.0, 38.0, 20.0, 21.0, 27.0, 24.0, 9.0, 4.0, 15.0, 7.0, 7.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 3.0], "bins": [-58.71875, -56.9892578125, -55.259765625, -53.5302734375, -51.80078125, -50.0712890625, -48.341796875, -46.6123046875, -44.8828125, -43.1533203125, -41.423828125, -39.6943359375, -37.96484375, -36.2353515625, -34.505859375, -32.7763671875, -31.046875, -29.3173828125, -27.587890625, -25.8583984375, -24.12890625, -22.3994140625, -20.669921875, -18.9404296875, -17.2109375, -15.4814453125, -13.751953125, -12.0224609375, -10.29296875, -8.5634765625, -6.833984375, -5.1044921875, -3.375, -1.6455078125, 0.083984375, 1.8134765625, 3.54296875, 5.2724609375, 7.001953125, 8.7314453125, 10.4609375, 12.1904296875, 13.919921875, 15.6494140625, 17.37890625, 19.1083984375, 20.837890625, 22.5673828125, 24.296875, 26.0263671875, 27.755859375, 29.4853515625, 31.21484375, 32.9443359375, 34.673828125, 36.4033203125, 38.1328125, 39.8623046875, 41.591796875, 43.3212890625, 45.05078125, 46.7802734375, 48.509765625, 50.2392578125, 51.96875]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 4.0, 8.0, 9.0, 10.0, 19.0, 37.0, 36.0, 42.0, 82.0, 85.0, 151.0, 223.0, 286.0, 444.0, 529.0, 552.0, 436.0, 326.0, 236.0, 177.0, 105.0, 82.0, 75.0, 35.0, 25.0, 15.0, 15.0, 10.0, 8.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.125, -32.15478515625, -31.1845703125, -30.21435546875, -29.244140625, -28.27392578125, -27.3037109375, -26.33349609375, -25.36328125, -24.39306640625, -23.4228515625, -22.45263671875, -21.482421875, -20.51220703125, -19.5419921875, -18.57177734375, -17.6015625, -16.63134765625, -15.6611328125, -14.69091796875, -13.720703125, -12.75048828125, -11.7802734375, -10.81005859375, -9.83984375, -8.86962890625, -7.8994140625, -6.92919921875, -5.958984375, -4.98876953125, -4.0185546875, -3.04833984375, -2.078125, -1.10791015625, -0.1376953125, 0.83251953125, 1.802734375, 2.77294921875, 3.7431640625, 4.71337890625, 5.68359375, 6.65380859375, 7.6240234375, 8.59423828125, 9.564453125, 10.53466796875, 11.5048828125, 12.47509765625, 13.4453125, 14.41552734375, 15.3857421875, 16.35595703125, 17.326171875, 18.29638671875, 19.2666015625, 20.23681640625, 21.20703125, 22.17724609375, 23.1474609375, 24.11767578125, 25.087890625, 26.05810546875, 27.0283203125, 27.99853515625, 28.96875]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 5.0, 1.0, 6.0, 8.0, 9.0, 12.0, 12.0, 18.0, 20.0, 30.0, 40.0, 44.0, 46.0, 59.0, 115.0, 119.0, 160.0, 249.0, 404.0, 865.0, 3182.0, 77996.0, 3888551.0, 214867.0, 4991.0, 932.0, 464.0, 269.0, 212.0, 118.0, 97.0, 86.0, 74.0, 53.0, 39.0, 31.0, 17.0, 21.0, 16.0, 16.0, 8.0, 9.0, 1.0, 3.0, 3.0, 4.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0], "bins": [-83.4375, -80.619140625, -77.80078125, -74.982421875, -72.1640625, -69.345703125, -66.52734375, -63.708984375, -60.890625, -58.072265625, -55.25390625, -52.435546875, -49.6171875, -46.798828125, -43.98046875, -41.162109375, -38.34375, -35.525390625, -32.70703125, -29.888671875, -27.0703125, -24.251953125, -21.43359375, -18.615234375, -15.796875, -12.978515625, -10.16015625, -7.341796875, -4.5234375, -1.705078125, 1.11328125, 3.931640625, 6.75, 9.568359375, 12.38671875, 15.205078125, 18.0234375, 20.841796875, 23.66015625, 26.478515625, 29.296875, 32.115234375, 34.93359375, 37.751953125, 40.5703125, 43.388671875, 46.20703125, 49.025390625, 51.84375, 54.662109375, 57.48046875, 60.298828125, 63.1171875, 65.935546875, 68.75390625, 71.572265625, 74.390625, 77.208984375, 80.02734375, 82.845703125, 85.6640625, 88.482421875, 91.30078125, 94.119140625, 96.9375]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 2.0, 19.0, 49.0, 79.0, 141.0, 183.0, 191.0, 146.0, 87.0, 68.0, 25.0, 7.0, 3.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.84791564941406, -71.0132064819336, -65.17849731445312, -59.343780517578125, -53.509071350097656, -47.67436218261719, -41.83964920043945, -36.00493621826172, -30.17022705078125, -24.33551597595215, -18.500804901123047, -12.666093826293945, -6.831382751464844, -0.9966716766357422, 4.838039398193359, 10.672752380371094, 16.507461547851562, 22.342172622680664, 28.176883697509766, 34.0115966796875, 39.84630584716797, 45.68101501464844, 51.51572799682617, 57.350440979003906, 63.185150146484375, 69.01985931396484, 74.85456848144531, 80.68928527832031, 86.52399444580078, 92.35870361328125, 98.19342041015625, 104.02812957763672, 109.86285400390625, 115.69756317138672, 121.53227233886719, 127.36698913574219, 133.20169067382812, 139.03640747070312, 144.87112426757812, 150.70584106445312, 156.54054260253906, 162.37525939941406, 168.2099609375, 174.044677734375, 179.87939453125, 185.71409606933594, 191.54881286621094, 197.38351440429688, 203.21823120117188, 209.05294799804688, 214.8876495361328, 220.7223663330078, 226.55706787109375, 232.39178466796875, 238.22650146484375, 244.06121826171875, 249.8959197998047, 255.7306365966797, 261.5653381347656, 267.4000549316406, 273.2347717285156, 279.0694885253906, 284.9041748046875, 290.7388916015625, 296.5736083984375]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 7.0, 3.0, 1.0, 8.0, 4.0, 16.0, 10.0, 10.0, 24.0, 18.0, 18.0, 25.0, 31.0, 26.0, 29.0, 26.0, 44.0, 45.0, 29.0, 38.0, 38.0, 35.0, 35.0, 40.0, 44.0, 40.0, 35.0, 32.0, 34.0, 43.0, 31.0, 24.0, 20.0, 19.0, 21.0, 20.0, 14.0, 10.0, 14.0, 8.0, 8.0, 6.0, 7.0, 3.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-91.09634399414062, -88.38265228271484, -85.6689682006836, -82.95527648925781, -80.24159240722656, -77.52790069580078, -74.814208984375, -72.10052490234375, -69.38683319091797, -66.67314147949219, -63.95945739746094, -61.245765686035156, -58.53207778930664, -55.818389892578125, -53.104698181152344, -50.39101028442383, -47.67732238769531, -44.9636344909668, -42.24994659423828, -39.5362548828125, -36.822566986083984, -34.10887908935547, -31.39518928527832, -28.681499481201172, -25.967811584472656, -23.25412368774414, -20.540433883666992, -17.826744079589844, -15.113056182861328, -12.399367332458496, -9.685678482055664, -6.971988677978516, -4.25830078125, -1.544611930847168, 1.169076919555664, 3.882765769958496, 6.596454620361328, 9.31014347076416, 12.023832321166992, 14.73752212524414, 17.451210021972656, 20.164897918701172, 22.87858772277832, 25.59227752685547, 28.305965423583984, 31.0196533203125, 33.73334503173828, 36.4470329284668, 39.16072082519531, 41.87440872192383, 44.588096618652344, 47.301788330078125, 50.01547622680664, 52.729164123535156, 55.44285583496094, 58.15654373168945, 60.87023162841797, 63.583919525146484, 66.297607421875, 69.01129913330078, 71.72499084472656, 74.43867492675781, 77.1523666381836, 79.86605834960938, 82.57974243164062]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 6.0, 10.0, 11.0, 14.0, 13.0, 10.0, 12.0, 19.0, 18.0, 29.0, 30.0, 36.0, 33.0, 37.0, 45.0, 53.0, 42.0, 43.0, 39.0, 48.0, 40.0, 51.0, 39.0, 40.0, 37.0, 37.0, 36.0, 22.0, 24.0, 24.0, 15.0, 20.0, 13.0, 11.0, 15.0, 8.0, 6.0, 3.0, 3.0, 3.0, 4.0, 1.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.40625, -12.0289306640625, -11.651611328125, -11.2742919921875, -10.89697265625, -10.5196533203125, -10.142333984375, -9.7650146484375, -9.3876953125, -9.0103759765625, -8.633056640625, -8.2557373046875, -7.87841796875, -7.5010986328125, -7.123779296875, -6.7464599609375, -6.369140625, -5.9918212890625, -5.614501953125, -5.2371826171875, -4.85986328125, -4.4825439453125, -4.105224609375, -3.7279052734375, -3.3505859375, -2.9732666015625, -2.595947265625, -2.2186279296875, -1.84130859375, -1.4639892578125, -1.086669921875, -0.7093505859375, -0.33203125, 0.0452880859375, 0.422607421875, 0.7999267578125, 1.17724609375, 1.5545654296875, 1.931884765625, 2.3092041015625, 2.6865234375, 3.0638427734375, 3.441162109375, 3.8184814453125, 4.19580078125, 4.5731201171875, 4.950439453125, 5.3277587890625, 5.705078125, 6.0823974609375, 6.459716796875, 6.8370361328125, 7.21435546875, 7.5916748046875, 7.968994140625, 8.3463134765625, 8.7236328125, 9.1009521484375, 9.478271484375, 9.8555908203125, 10.23291015625, 10.6102294921875, 10.987548828125, 11.3648681640625, 11.7421875]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 2.0, 2.0, 5.0, 8.0, 16.0, 21.0, 27.0, 54.0, 71.0, 93.0, 135.0, 220.0, 300.0, 489.0, 779.0, 1073.0, 1655.0, 2362.0, 3494.0, 5196.0, 7752.0, 11590.0, 17188.0, 25858.0, 38429.0, 56418.0, 83575.0, 123385.0, 172497.0, 159196.0, 109571.0, 73311.0, 50281.0, 34192.0, 22614.0, 15492.0, 10227.0, 7042.0, 4585.0, 3076.0, 2028.0, 1380.0, 911.0, 641.0, 423.0, 283.0, 199.0, 122.0, 93.0, 77.0, 49.0, 28.0, 17.0, 13.0, 9.0, 7.0, 4.0, 2.0, 0.0, 2.0], "bins": [-1.1572265625, -1.1219940185546875, -1.086761474609375, -1.0515289306640625, -1.01629638671875, -0.9810638427734375, -0.945831298828125, -0.9105987548828125, -0.8753662109375, -0.8401336669921875, -0.804901123046875, -0.7696685791015625, -0.73443603515625, -0.6992034912109375, -0.663970947265625, -0.6287384033203125, -0.593505859375, -0.5582733154296875, -0.523040771484375, -0.4878082275390625, -0.45257568359375, -0.4173431396484375, -0.382110595703125, -0.3468780517578125, -0.3116455078125, -0.2764129638671875, -0.241180419921875, -0.2059478759765625, -0.17071533203125, -0.1354827880859375, -0.100250244140625, -0.0650177001953125, -0.02978515625, 0.0054473876953125, 0.040679931640625, 0.0759124755859375, 0.11114501953125, 0.1463775634765625, 0.181610107421875, 0.2168426513671875, 0.2520751953125, 0.2873077392578125, 0.322540283203125, 0.3577728271484375, 0.39300537109375, 0.4282379150390625, 0.463470458984375, 0.4987030029296875, 0.533935546875, 0.5691680908203125, 0.604400634765625, 0.6396331787109375, 0.67486572265625, 0.7100982666015625, 0.745330810546875, 0.7805633544921875, 0.8157958984375, 0.8510284423828125, 0.886260986328125, 0.9214935302734375, 0.95672607421875, 0.9919586181640625, 1.027191162109375, 1.0624237060546875, 1.09765625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 6.0, 3.0, 7.0, 10.0, 3.0, 6.0, 23.0, 11.0, 15.0, 17.0, 17.0, 25.0, 32.0, 28.0, 23.0, 37.0, 30.0, 44.0, 44.0, 47.0, 34.0, 1069.0, 38.0, 39.0, 39.0, 42.0, 34.0, 42.0, 23.0, 23.0, 35.0, 27.0, 33.0, 22.0, 20.0, 15.0, 13.0, 14.0, 6.0, 5.0, 7.0, 7.0, 5.0, 3.0, 4.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.92578125, -6.7069091796875, -6.488037109375, -6.2691650390625, -6.05029296875, -5.8314208984375, -5.612548828125, -5.3936767578125, -5.1748046875, -4.9559326171875, -4.737060546875, -4.5181884765625, -4.29931640625, -4.0804443359375, -3.861572265625, -3.6427001953125, -3.423828125, -3.2049560546875, -2.986083984375, -2.7672119140625, -2.54833984375, -2.3294677734375, -2.110595703125, -1.8917236328125, -1.6728515625, -1.4539794921875, -1.235107421875, -1.0162353515625, -0.79736328125, -0.5784912109375, -0.359619140625, -0.1407470703125, 0.078125, 0.2969970703125, 0.515869140625, 0.7347412109375, 0.95361328125, 1.1724853515625, 1.391357421875, 1.6102294921875, 1.8291015625, 2.0479736328125, 2.266845703125, 2.4857177734375, 2.70458984375, 2.9234619140625, 3.142333984375, 3.3612060546875, 3.580078125, 3.7989501953125, 4.017822265625, 4.2366943359375, 4.45556640625, 4.6744384765625, 4.893310546875, 5.1121826171875, 5.3310546875, 5.5499267578125, 5.768798828125, 5.9876708984375, 6.20654296875, 6.4254150390625, 6.644287109375, 6.8631591796875, 7.08203125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 7.0, 10.0, 10.0, 22.0, 35.0, 54.0, 83.0, 116.0, 179.0, 293.0, 424.0, 596.0, 967.0, 1446.0, 2224.0, 3340.0, 5193.0, 7817.0, 12277.0, 19065.0, 29730.0, 46332.0, 71979.0, 114149.0, 180754.0, 1244428.0, 128798.0, 81209.0, 51558.0, 33560.0, 21395.0, 13626.0, 8895.0, 5642.0, 3816.0, 2502.0, 1542.0, 1001.0, 730.0, 464.0, 297.0, 212.0, 115.0, 95.0, 57.0, 31.0, 25.0, 13.0, 10.0, 5.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0], "bins": [-1.2021484375, -1.165863037109375, -1.12957763671875, -1.093292236328125, -1.0570068359375, -1.020721435546875, -0.98443603515625, -0.948150634765625, -0.911865234375, -0.875579833984375, -0.83929443359375, -0.803009033203125, -0.7667236328125, -0.730438232421875, -0.69415283203125, -0.657867431640625, -0.62158203125, -0.585296630859375, -0.54901123046875, -0.512725830078125, -0.4764404296875, -0.440155029296875, -0.40386962890625, -0.367584228515625, -0.331298828125, -0.295013427734375, -0.25872802734375, -0.222442626953125, -0.1861572265625, -0.149871826171875, -0.11358642578125, -0.077301025390625, -0.041015625, -0.004730224609375, 0.03155517578125, 0.067840576171875, 0.1041259765625, 0.140411376953125, 0.17669677734375, 0.212982177734375, 0.249267578125, 0.285552978515625, 0.32183837890625, 0.358123779296875, 0.3944091796875, 0.430694580078125, 0.46697998046875, 0.503265380859375, 0.53955078125, 0.575836181640625, 0.61212158203125, 0.648406982421875, 0.6846923828125, 0.720977783203125, 0.75726318359375, 0.793548583984375, 0.829833984375, 0.866119384765625, 0.90240478515625, 0.938690185546875, 0.9749755859375, 1.011260986328125, 1.04754638671875, 1.083831787109375, 1.1201171875]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 5.0, 1.0, 6.0, 8.0, 7.0, 12.0, 10.0, 18.0, 19.0, 36.0, 42.0, 54.0, 70.0, 98.0, 106.0, 92.0, 102.0, 75.0, 71.0, 51.0, 19.0, 19.0, 22.0, 17.0, 14.0, 5.0, 9.0, 7.0, 1.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003173828125, -0.0030379891395568848, -0.0029021501541137695, -0.0027663111686706543, -0.002630472183227539, -0.002494633197784424, -0.0023587942123413086, -0.0022229552268981934, -0.002087116241455078, -0.0019512772560119629, -0.0018154382705688477, -0.0016795992851257324, -0.0015437602996826172, -0.001407921314239502, -0.0012720823287963867, -0.0011362433433532715, -0.0010004043579101562, -0.000864565372467041, -0.0007287263870239258, -0.0005928874015808105, -0.0004570484161376953, -0.0003212094306945801, -0.00018537044525146484, -4.953145980834961e-05, 8.630752563476562e-05, 0.00022214651107788086, 0.0003579854965209961, 0.0004938244819641113, 0.0006296634674072266, 0.0007655024528503418, 0.000901341438293457, 0.0010371804237365723, 0.0011730194091796875, 0.0013088583946228027, 0.001444697380065918, 0.0015805363655090332, 0.0017163753509521484, 0.0018522143363952637, 0.001988053321838379, 0.002123892307281494, 0.0022597312927246094, 0.0023955702781677246, 0.00253140926361084, 0.002667248249053955, 0.0028030872344970703, 0.0029389262199401855, 0.0030747652053833008, 0.003210604190826416, 0.0033464431762695312, 0.0034822821617126465, 0.0036181211471557617, 0.003753960132598877, 0.003889799118041992, 0.004025638103485107, 0.004161477088928223, 0.004297316074371338, 0.004433155059814453, 0.004568994045257568, 0.004704833030700684, 0.004840672016143799, 0.004976511001586914, 0.005112349987030029, 0.0052481889724731445, 0.00538402795791626, 0.005519866943359375]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 2.0, 3.0, 2.0, 5.0, 7.0, 15.0, 11.0, 13.0, 26.0, 25.0, 41.0, 81.0, 168.0, 438.0, 2024.0, 1042066.0, 2700.0, 483.0, 168.0, 90.0, 57.0, 29.0, 21.0, 18.0, 17.0, 10.0, 7.0, 5.0, 6.0, 8.0, 1.0, 3.0, 5.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.1259765625, -0.12279033660888672, -0.11960411071777344, -0.11641788482666016, -0.11323165893554688, -0.1100454330444336, -0.10685920715332031, -0.10367298126220703, -0.10048675537109375, -0.09730052947998047, -0.09411430358886719, -0.0909280776977539, -0.08774185180664062, -0.08455562591552734, -0.08136940002441406, -0.07818317413330078, -0.0749969482421875, -0.07181072235107422, -0.06862449645996094, -0.06543827056884766, -0.062252044677734375, -0.059065818786621094, -0.05587959289550781, -0.05269336700439453, -0.04950714111328125, -0.04632091522216797, -0.04313468933105469, -0.039948463439941406, -0.036762237548828125, -0.033576011657714844, -0.030389785766601562, -0.02720355987548828, -0.024017333984375, -0.02083110809326172, -0.017644882202148438, -0.014458656311035156, -0.011272430419921875, -0.008086204528808594, -0.0048999786376953125, -0.0017137527465820312, 0.00147247314453125, 0.004658699035644531, 0.007844924926757812, 0.011031150817871094, 0.014217376708984375, 0.017403602600097656, 0.020589828491210938, 0.02377605438232422, 0.0269622802734375, 0.03014850616455078, 0.03333473205566406, 0.036520957946777344, 0.039707183837890625, 0.042893409729003906, 0.04607963562011719, 0.04926586151123047, 0.05245208740234375, 0.05563831329345703, 0.05882453918457031, 0.062010765075683594, 0.06519699096679688, 0.06838321685791016, 0.07156944274902344, 0.07475566864013672, 0.07794189453125]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 17.0, 138.0, 520.0, 294.0, 42.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011775626335293055, -0.0009695462067611516, -0.0007615297799929976, -0.0005535132950171828, -0.0003454968682490289, -0.00013748044148087502, 7.05360434949398e-05, 0.0002785524120554328, 0.0004865688970312476, 0.0006945853237994015, 0.0009026017505675554, 0.0011106182355433702, 0.001318634720519185, 0.001526651089079678, 0.0017346675740554929, 0.0019426839426159859, 0.0021507004275918007, 0.0023587169125676155, 0.0025667333975434303, 0.002774749882519245, 0.0029827661346644163, 0.003190782619640231, 0.003398799104616046, 0.003606815356761217, 0.003814831841737032, 0.004022848326712847, 0.004230864811688662, 0.004438881296664476, 0.004646897781640291, 0.004854913800954819, 0.0050629302859306335, 0.005270946770906448, 0.0054789637215435505, 0.005686980206519365, 0.00589499669149518, 0.006103013176470995, 0.00631102966144681, 0.006519045680761337, 0.006727062165737152, 0.006935078650712967, 0.007143095135688782, 0.0073511116206645966, 0.007559128105640411, 0.007767144590616226, 0.007975161075592041, 0.008183177560567856, 0.00839119404554367, 0.008599210530519485, 0.0088072270154953, 0.009015243500471115, 0.00922325998544693, 0.009431276470422745, 0.00963929295539856, 0.009847309440374374, 0.01005532592535019, 0.010263342410326004, 0.010471357963979244, 0.010679374448955059, 0.010887390933930874, 0.011095407418906689, 0.011303423903882504, 0.011511440388858318, 0.011719456873834133, 0.011927473358809948, 0.012135489843785763]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 1.0, 3.0, 9.0, 10.0, 5.0, 11.0, 10.0, 17.0, 19.0, 14.0, 18.0, 21.0, 27.0, 34.0, 40.0, 34.0, 41.0, 45.0, 27.0, 27.0, 54.0, 39.0, 52.0, 40.0, 41.0, 48.0, 33.0, 37.0, 29.0, 34.0, 23.0, 25.0, 27.0, 19.0, 13.0, 14.0, 8.0, 15.0, 10.0, 7.0, 1.0, 4.0, 3.0, 6.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.001589357852935791, -0.001538855955004692, -0.0014883540570735931, -0.0014378521591424942, -0.0013873502612113953, -0.0013368483632802963, -0.0012863464653491974, -0.0012358445674180984, -0.0011853426694869995, -0.0011348407715559006, -0.0010843388736248016, -0.0010338369756937027, -0.0009833350777626038, -0.0009328331798315048, -0.0008823312819004059, -0.000831829383969307, -0.000781327486038208, -0.0007308255881071091, -0.0006803236901760101, -0.0006298217922449112, -0.0005793198943138123, -0.0005288179963827133, -0.0004783160984516144, -0.00042781420052051544, -0.0003773123025894165, -0.00032681040465831757, -0.00027630850672721863, -0.0002258066087961197, -0.00017530471086502075, -0.00012480281293392181, -7.430091500282288e-05, -2.3799017071723938e-05, 2.6702880859375e-05, 7.720477879047394e-05, 0.00012770667672157288, 0.00017820857465267181, 0.00022871047258377075, 0.0002792123705148697, 0.00032971426844596863, 0.00038021616637706757, 0.0004307180643081665, 0.00048121996223926544, 0.0005317218601703644, 0.0005822237581014633, 0.0006327256560325623, 0.0006832275539636612, 0.0007337294518947601, 0.0007842313498258591, 0.000834733247756958, 0.000885235145688057, 0.0009357370436191559, 0.0009862389415502548, 0.0010367408394813538, 0.0010872427374124527, 0.0011377446353435516, 0.0011882465332746506, 0.0012387484312057495, 0.0012892503291368484, 0.0013397522270679474, 0.0013902541249990463, 0.0014407560229301453, 0.0014912579208612442, 0.0015417598187923431, 0.001592261716723442, 0.001642763614654541]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 6.0, 10.0, 11.0, 14.0, 13.0, 10.0, 12.0, 19.0, 18.0, 29.0, 30.0, 36.0, 33.0, 37.0, 45.0, 53.0, 42.0, 43.0, 39.0, 48.0, 40.0, 51.0, 39.0, 40.0, 37.0, 37.0, 36.0, 22.0, 24.0, 24.0, 15.0, 20.0, 13.0, 11.0, 15.0, 8.0, 6.0, 3.0, 3.0, 3.0, 4.0, 1.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-12.40625, -12.0289306640625, -11.651611328125, -11.2742919921875, -10.89697265625, -10.5196533203125, -10.142333984375, -9.7650146484375, -9.3876953125, -9.0103759765625, -8.633056640625, -8.2557373046875, -7.87841796875, -7.5010986328125, -7.123779296875, -6.7464599609375, -6.369140625, -5.9918212890625, -5.614501953125, -5.2371826171875, -4.85986328125, -4.4825439453125, -4.105224609375, -3.7279052734375, -3.3505859375, -2.9732666015625, -2.595947265625, -2.2186279296875, -1.84130859375, -1.4639892578125, -1.086669921875, -0.7093505859375, -0.33203125, 0.0452880859375, 0.422607421875, 0.7999267578125, 1.17724609375, 1.5545654296875, 1.931884765625, 2.3092041015625, 2.6865234375, 3.0638427734375, 3.441162109375, 3.8184814453125, 4.19580078125, 4.5731201171875, 4.950439453125, 5.3277587890625, 5.705078125, 6.0823974609375, 6.459716796875, 6.8370361328125, 7.21435546875, 7.5916748046875, 7.968994140625, 8.3463134765625, 8.7236328125, 9.1009521484375, 9.478271484375, 9.8555908203125, 10.23291015625, 10.6102294921875, 10.987548828125, 11.3648681640625, 11.7421875]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [3.0, 4.0, 4.0, 2.0, 7.0, 10.0, 11.0, 15.0, 33.0, 47.0, 67.0, 102.0, 160.0, 240.0, 349.0, 546.0, 768.0, 1217.0, 1848.0, 2863.0, 4464.0, 7312.0, 12092.0, 21638.0, 42392.0, 95768.0, 258673.0, 353672.0, 127246.0, 53429.0, 26165.0, 14362.0, 8379.0, 5241.0, 3185.0, 2155.0, 1398.0, 886.0, 634.0, 409.0, 259.0, 174.0, 110.0, 89.0, 45.0, 37.0, 20.0, 15.0, 9.0, 9.0, 7.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.50390625, -5.30120849609375, -5.0985107421875, -4.89581298828125, -4.693115234375, -4.49041748046875, -4.2877197265625, -4.08502197265625, -3.88232421875, -3.67962646484375, -3.4769287109375, -3.27423095703125, -3.071533203125, -2.86883544921875, -2.6661376953125, -2.46343994140625, -2.2607421875, -2.05804443359375, -1.8553466796875, -1.65264892578125, -1.449951171875, -1.24725341796875, -1.0445556640625, -0.84185791015625, -0.63916015625, -0.43646240234375, -0.2337646484375, -0.03106689453125, 0.171630859375, 0.37432861328125, 0.5770263671875, 0.77972412109375, 0.982421875, 1.18511962890625, 1.3878173828125, 1.59051513671875, 1.793212890625, 1.99591064453125, 2.1986083984375, 2.40130615234375, 2.60400390625, 2.80670166015625, 3.0093994140625, 3.21209716796875, 3.414794921875, 3.61749267578125, 3.8201904296875, 4.02288818359375, 4.2255859375, 4.42828369140625, 4.6309814453125, 4.83367919921875, 5.036376953125, 5.23907470703125, 5.4417724609375, 5.64447021484375, 5.84716796875, 6.04986572265625, 6.2525634765625, 6.45526123046875, 6.657958984375, 6.86065673828125, 7.0633544921875, 7.26605224609375, 7.46875]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 6.0, 4.0, 10.0, 8.0, 14.0, 8.0, 16.0, 18.0, 19.0, 23.0, 26.0, 39.0, 38.0, 47.0, 43.0, 43.0, 52.0, 105.0, 323.0, 1615.0, 124.0, 65.0, 45.0, 46.0, 49.0, 34.0, 40.0, 29.0, 25.0, 20.0, 32.0, 15.0, 16.0, 7.0, 9.0, 7.0, 7.0, 6.0, 2.0, 10.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.78125, -36.3359375, -34.890625, -33.4453125, -32.0, -30.5546875, -29.109375, -27.6640625, -26.21875, -24.7734375, -23.328125, -21.8828125, -20.4375, -18.9921875, -17.546875, -16.1015625, -14.65625, -13.2109375, -11.765625, -10.3203125, -8.875, -7.4296875, -5.984375, -4.5390625, -3.09375, -1.6484375, -0.203125, 1.2421875, 2.6875, 4.1328125, 5.578125, 7.0234375, 8.46875, 9.9140625, 11.359375, 12.8046875, 14.25, 15.6953125, 17.140625, 18.5859375, 20.03125, 21.4765625, 22.921875, 24.3671875, 25.8125, 27.2578125, 28.703125, 30.1484375, 31.59375, 33.0390625, 34.484375, 35.9296875, 37.375, 38.8203125, 40.265625, 41.7109375, 43.15625, 44.6015625, 46.046875, 47.4921875, 48.9375, 50.3828125, 51.828125, 53.2734375, 54.71875]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 6.0, 4.0, 7.0, 14.0, 18.0, 14.0, 30.0, 40.0, 62.0, 81.0, 113.0, 177.0, 339.0, 644.0, 4747.0, 3108542.0, 28899.0, 920.0, 400.0, 227.0, 143.0, 75.0, 62.0, 35.0, 29.0, 22.0, 16.0, 13.0, 7.0, 5.0, 5.0, 6.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-136.625, -132.443359375, -128.26171875, -124.080078125, -119.8984375, -115.716796875, -111.53515625, -107.353515625, -103.171875, -98.990234375, -94.80859375, -90.626953125, -86.4453125, -82.263671875, -78.08203125, -73.900390625, -69.71875, -65.537109375, -61.35546875, -57.173828125, -52.9921875, -48.810546875, -44.62890625, -40.447265625, -36.265625, -32.083984375, -27.90234375, -23.720703125, -19.5390625, -15.357421875, -11.17578125, -6.994140625, -2.8125, 1.369140625, 5.55078125, 9.732421875, 13.9140625, 18.095703125, 22.27734375, 26.458984375, 30.640625, 34.822265625, 39.00390625, 43.185546875, 47.3671875, 51.548828125, 55.73046875, 59.912109375, 64.09375, 68.275390625, 72.45703125, 76.638671875, 80.8203125, 85.001953125, 89.18359375, 93.365234375, 97.546875, 101.728515625, 105.91015625, 110.091796875, 114.2734375, 118.455078125, 122.63671875, 126.818359375, 131.0]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 9.0, 19.0, 46.0, 63.0, 94.0, 134.0, 165.0, 155.0, 127.0, 105.0, 45.0, 21.0, 14.0, 7.0, 2.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.19178009033203, -45.420387268066406, -43.64899444580078, -41.87759780883789, -40.106204986572266, -38.33481216430664, -36.56341552734375, -34.792022705078125, -33.0206298828125, -31.249237060546875, -29.477842330932617, -27.70644760131836, -25.935054779052734, -24.16366195678711, -22.39226722717285, -20.620872497558594, -18.84947967529297, -17.078086853027344, -15.306692123413086, -13.535298347473145, -11.763904571533203, -9.992510795593262, -8.22111701965332, -6.449723243713379, -4.6783294677734375, -2.906935691833496, -1.1355419158935547, 0.6358518600463867, 2.407245635986328, 4.1786394119262695, 5.950033187866211, 7.721426963806152, 9.492820739746094, 11.264214515686035, 13.035608291625977, 14.807002067565918, 16.57839584350586, 18.349788665771484, 20.121183395385742, 21.892578125, 23.663970947265625, 25.43536376953125, 27.206758499145508, 28.978153228759766, 30.74954605102539, 32.520938873291016, 34.292335510253906, 36.06372833251953, 37.835121154785156, 39.60651397705078, 41.377906799316406, 43.1493034362793, 44.92069625854492, 46.69208908081055, 48.46348571777344, 50.23487854003906, 52.00627136230469, 53.77766418457031, 55.54905700683594, 57.32045364379883, 59.09184646606445, 60.86323928833008, 62.63463592529297, 64.4060287475586, 66.17742156982422]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 4.0, 5.0, 4.0, 9.0, 11.0, 10.0, 11.0, 11.0, 17.0, 14.0, 20.0, 31.0, 34.0, 27.0, 32.0, 35.0, 35.0, 35.0, 32.0, 40.0, 38.0, 43.0, 44.0, 36.0, 47.0, 32.0, 49.0, 41.0, 24.0, 34.0, 26.0, 27.0, 22.0, 19.0, 17.0, 15.0, 14.0, 14.0, 11.0, 8.0, 4.0, 8.0, 2.0, 3.0, 4.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-121.63163757324219, -117.97769927978516, -114.32376098632812, -110.66981506347656, -107.01587677001953, -103.3619384765625, -99.70800018310547, -96.05406188964844, -92.40011596679688, -88.74617767333984, -85.09223937988281, -81.43829345703125, -77.78435516357422, -74.13041687011719, -70.47647857666016, -66.82254028320312, -63.16859817504883, -59.5146598815918, -55.8607177734375, -52.20677947998047, -48.55283737182617, -44.89889907836914, -41.244956970214844, -37.59101867675781, -33.93708038330078, -30.283140182495117, -26.629199981689453, -22.975261688232422, -19.321319580078125, -15.667381286621094, -12.01344108581543, -8.359500885009766, -4.705558776855469, -1.0516188144683838, 2.602321147918701, 6.256260871887207, 9.910201072692871, 13.564140319824219, 17.218080520629883, 20.872020721435547, 24.52596092224121, 28.179901123046875, 31.83384132385254, 35.4877815246582, 39.141719818115234, 42.79566192626953, 46.44960021972656, 50.103538513183594, 53.75748062133789, 57.41141891479492, 61.06536102294922, 64.71929931640625, 68.37323760986328, 72.02717590332031, 75.68112182617188, 79.3350601196289, 82.98899841308594, 86.64293670654297, 90.296875, 93.95082092285156, 97.6047592163086, 101.25869750976562, 104.91263580322266, 108.56657409667969, 112.22052001953125]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 3.0, 10.0, 10.0, 10.0, 17.0, 14.0, 9.0, 19.0, 23.0, 28.0, 31.0, 35.0, 38.0, 31.0, 33.0, 37.0, 48.0, 48.0, 45.0, 43.0, 42.0, 42.0, 38.0, 45.0, 35.0, 36.0, 39.0, 23.0, 25.0, 18.0, 20.0, 15.0, 18.0, 21.0, 14.0, 5.0, 11.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-12.328125, -11.9447021484375, -11.561279296875, -11.1778564453125, -10.79443359375, -10.4110107421875, -10.027587890625, -9.6441650390625, -9.2607421875, -8.8773193359375, -8.493896484375, -8.1104736328125, -7.72705078125, -7.3436279296875, -6.960205078125, -6.5767822265625, -6.193359375, -5.8099365234375, -5.426513671875, -5.0430908203125, -4.65966796875, -4.2762451171875, -3.892822265625, -3.5093994140625, -3.1259765625, -2.7425537109375, -2.359130859375, -1.9757080078125, -1.59228515625, -1.2088623046875, -0.825439453125, -0.4420166015625, -0.05859375, 0.3248291015625, 0.708251953125, 1.0916748046875, 1.47509765625, 1.8585205078125, 2.241943359375, 2.6253662109375, 3.0087890625, 3.3922119140625, 3.775634765625, 4.1590576171875, 4.54248046875, 4.9259033203125, 5.309326171875, 5.6927490234375, 6.076171875, 6.4595947265625, 6.843017578125, 7.2264404296875, 7.60986328125, 7.9932861328125, 8.376708984375, 8.7601318359375, 9.1435546875, 9.5269775390625, 9.910400390625, 10.2938232421875, 10.67724609375, 11.0606689453125, 11.444091796875, 11.8275146484375, 12.2109375]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 5.0, 8.0, 10.0, 7.0, 17.0, 26.0, 18.0, 22.0, 25.0, 30.0, 42.0, 57.0, 74.0, 124.0, 172.0, 241.0, 474.0, 838.0, 1964.0, 5176.0, 17032.0, 75139.0, 459146.0, 1718141.0, 1495289.0, 340831.0, 57638.0, 13743.0, 4455.0, 1599.0, 741.0, 379.0, 213.0, 142.0, 110.0, 87.0, 56.0, 56.0, 41.0, 28.0, 18.0, 20.0, 7.0, 14.0, 5.0, 4.0, 5.0, 1.0, 4.0, 2.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-18.0625, -17.478271484375, -16.89404296875, -16.309814453125, -15.7255859375, -15.141357421875, -14.55712890625, -13.972900390625, -13.388671875, -12.804443359375, -12.22021484375, -11.635986328125, -11.0517578125, -10.467529296875, -9.88330078125, -9.299072265625, -8.71484375, -8.130615234375, -7.54638671875, -6.962158203125, -6.3779296875, -5.793701171875, -5.20947265625, -4.625244140625, -4.041015625, -3.456787109375, -2.87255859375, -2.288330078125, -1.7041015625, -1.119873046875, -0.53564453125, 0.048583984375, 0.6328125, 1.217041015625, 1.80126953125, 2.385498046875, 2.9697265625, 3.553955078125, 4.13818359375, 4.722412109375, 5.306640625, 5.890869140625, 6.47509765625, 7.059326171875, 7.6435546875, 8.227783203125, 8.81201171875, 9.396240234375, 9.98046875, 10.564697265625, 11.14892578125, 11.733154296875, 12.3173828125, 12.901611328125, 13.48583984375, 14.070068359375, 14.654296875, 15.238525390625, 15.82275390625, 16.406982421875, 16.9912109375, 17.575439453125, 18.15966796875, 18.743896484375, 19.328125]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 18.0, 19.0, 32.0, 55.0, 127.0, 192.0, 311.0, 604.0, 811.0, 796.0, 492.0, 283.0, 161.0, 75.0, 57.0, 27.0, 14.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.21875, -46.6552734375, -45.091796875, -43.5283203125, -41.96484375, -40.4013671875, -38.837890625, -37.2744140625, -35.7109375, -34.1474609375, -32.583984375, -31.0205078125, -29.45703125, -27.8935546875, -26.330078125, -24.7666015625, -23.203125, -21.6396484375, -20.076171875, -18.5126953125, -16.94921875, -15.3857421875, -13.822265625, -12.2587890625, -10.6953125, -9.1318359375, -7.568359375, -6.0048828125, -4.44140625, -2.8779296875, -1.314453125, 0.2490234375, 1.8125, 3.3759765625, 4.939453125, 6.5029296875, 8.06640625, 9.6298828125, 11.193359375, 12.7568359375, 14.3203125, 15.8837890625, 17.447265625, 19.0107421875, 20.57421875, 22.1376953125, 23.701171875, 25.2646484375, 26.828125, 28.3916015625, 29.955078125, 31.5185546875, 33.08203125, 34.6455078125, 36.208984375, 37.7724609375, 39.3359375, 40.8994140625, 42.462890625, 44.0263671875, 45.58984375, 47.1533203125, 48.716796875, 50.2802734375, 51.84375]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 1.0, 5.0, 12.0, 6.0, 10.0, 19.0, 24.0, 31.0, 37.0, 47.0, 82.0, 129.0, 178.0, 348.0, 668.0, 2993.0, 509907.0, 3666063.0, 11549.0, 994.0, 419.0, 235.0, 147.0, 124.0, 68.0, 59.0, 26.0, 25.0, 20.0, 24.0, 9.0, 11.0, 7.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-114.75, -111.1845703125, -107.619140625, -104.0537109375, -100.48828125, -96.9228515625, -93.357421875, -89.7919921875, -86.2265625, -82.6611328125, -79.095703125, -75.5302734375, -71.96484375, -68.3994140625, -64.833984375, -61.2685546875, -57.703125, -54.1376953125, -50.572265625, -47.0068359375, -43.44140625, -39.8759765625, -36.310546875, -32.7451171875, -29.1796875, -25.6142578125, -22.048828125, -18.4833984375, -14.91796875, -11.3525390625, -7.787109375, -4.2216796875, -0.65625, 2.9091796875, 6.474609375, 10.0400390625, 13.60546875, 17.1708984375, 20.736328125, 24.3017578125, 27.8671875, 31.4326171875, 34.998046875, 38.5634765625, 42.12890625, 45.6943359375, 49.259765625, 52.8251953125, 56.390625, 59.9560546875, 63.521484375, 67.0869140625, 70.65234375, 74.2177734375, 77.783203125, 81.3486328125, 84.9140625, 88.4794921875, 92.044921875, 95.6103515625, 99.17578125, 102.7412109375, 106.306640625, 109.8720703125, 113.4375]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 8.0, 13.0, 41.0, 65.0, 87.0, 152.0, 177.0, 186.0, 130.0, 77.0, 46.0, 14.0, 10.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.42098999023438, -128.0428924560547, -122.66481018066406, -117.28671264648438, -111.90862274169922, -106.53053283691406, -101.15243530273438, -95.77434539794922, -90.39625549316406, -85.0181655883789, -79.64007568359375, -74.26197814941406, -68.8838882446289, -63.50579833984375, -58.12770462036133, -52.749610900878906, -47.37152099609375, -41.993431091308594, -36.61533737182617, -31.237245559692383, -25.859153747558594, -20.481061935424805, -15.102970123291016, -9.724876403808594, -4.3467864990234375, 1.0313053131103516, 6.409397125244141, 11.78748893737793, 17.16558074951172, 22.543672561645508, 27.921764373779297, 33.29985809326172, 38.677947998046875, 44.05603790283203, 49.43413162231445, 54.812225341796875, 60.19031524658203, 65.56840515136719, 70.94650268554688, 76.32459259033203, 81.70268249511719, 87.08077239990234, 92.4588623046875, 97.83695983886719, 103.21504974365234, 108.5931396484375, 113.97123718261719, 119.34932708740234, 124.7274169921875, 130.1055145263672, 135.4835968017578, 140.8616943359375, 146.23977661132812, 151.6178741455078, 156.9959716796875, 162.37405395507812, 167.7521514892578, 173.1302490234375, 178.50833129882812, 183.8864288330078, 189.2645263671875, 194.64260864257812, 200.0207061767578, 205.3988037109375, 210.77688598632812]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 2.0, 5.0, 1.0, 5.0, 5.0, 14.0, 9.0, 14.0, 19.0, 11.0, 15.0, 26.0, 20.0, 34.0, 28.0, 33.0, 43.0, 30.0, 30.0, 35.0, 31.0, 30.0, 47.0, 25.0, 34.0, 42.0, 39.0, 31.0, 42.0, 38.0, 38.0, 33.0, 25.0, 25.0, 19.0, 23.0, 24.0, 12.0, 12.0, 12.0, 7.0, 7.0, 5.0, 7.0, 4.0, 3.0, 6.0, 6.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-87.049072265625, -84.43451690673828, -81.81996154785156, -79.20540618896484, -76.59085083007812, -73.9762954711914, -71.36174011230469, -68.74717712402344, -66.13262939453125, -63.51807403564453, -60.90351867675781, -58.288963317871094, -55.674407958984375, -53.059852600097656, -50.44529342651367, -47.83073806762695, -45.21617889404297, -42.60162353515625, -39.98706817626953, -37.37251281738281, -34.757957458496094, -32.143402099609375, -29.52884292602539, -26.914287567138672, -24.299732208251953, -21.685176849365234, -19.070621490478516, -16.456064224243164, -13.841508865356445, -11.226953506469727, -8.612397193908691, -5.997840881347656, -3.3832855224609375, -0.7687296867370605, 1.8458261489868164, 4.460381984710693, 7.07493782043457, 9.689493179321289, 12.304049491882324, 14.91860580444336, 17.533161163330078, 20.147716522216797, 22.762271881103516, 25.376829147338867, 27.991384506225586, 30.605939865112305, 33.220497131347656, 35.835052490234375, 38.449607849121094, 41.06416320800781, 43.67871856689453, 46.29327392578125, 48.90782928466797, 51.52238464355469, 54.13694381713867, 56.75149917602539, 59.36605453491211, 61.98060989379883, 64.59516906738281, 67.20972442626953, 69.82427978515625, 72.43883514404297, 75.05339050292969, 77.6679458618164, 80.28250122070312]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 2.0, 4.0, 6.0, 9.0, 5.0, 14.0, 12.0, 10.0, 15.0, 20.0, 26.0, 17.0, 31.0, 37.0, 28.0, 39.0, 39.0, 30.0, 42.0, 33.0, 33.0, 51.0, 52.0, 45.0, 57.0, 27.0, 41.0, 36.0, 33.0, 23.0, 21.0, 26.0, 24.0, 23.0, 13.0, 12.0, 12.0, 12.0, 12.0, 9.0, 7.0, 4.0, 5.0, 6.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.578125, -11.2012939453125, -10.824462890625, -10.4476318359375, -10.07080078125, -9.6939697265625, -9.317138671875, -8.9403076171875, -8.5634765625, -8.1866455078125, -7.809814453125, -7.4329833984375, -7.05615234375, -6.6793212890625, -6.302490234375, -5.9256591796875, -5.548828125, -5.1719970703125, -4.795166015625, -4.4183349609375, -4.04150390625, -3.6646728515625, -3.287841796875, -2.9110107421875, -2.5341796875, -2.1573486328125, -1.780517578125, -1.4036865234375, -1.02685546875, -0.6500244140625, -0.273193359375, 0.1036376953125, 0.48046875, 0.8572998046875, 1.234130859375, 1.6109619140625, 1.98779296875, 2.3646240234375, 2.741455078125, 3.1182861328125, 3.4951171875, 3.8719482421875, 4.248779296875, 4.6256103515625, 5.00244140625, 5.3792724609375, 5.756103515625, 6.1329345703125, 6.509765625, 6.8865966796875, 7.263427734375, 7.6402587890625, 8.01708984375, 8.3939208984375, 8.770751953125, 9.1475830078125, 9.5244140625, 9.9012451171875, 10.278076171875, 10.6549072265625, 11.03173828125, 11.4085693359375, 11.785400390625, 12.1622314453125, 12.5390625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 5.0, 3.0, 0.0, 10.0, 11.0, 19.0, 30.0, 33.0, 67.0, 90.0, 124.0, 218.0, 297.0, 407.0, 658.0, 936.0, 1386.0, 2084.0, 3008.0, 4596.0, 6560.0, 10045.0, 14944.0, 22406.0, 33577.0, 50603.0, 74544.0, 108419.0, 159325.0, 176213.0, 121529.0, 83699.0, 57415.0, 38308.0, 25434.0, 17227.0, 11369.0, 7503.0, 4881.0, 3413.0, 2310.0, 1589.0, 1064.0, 733.0, 481.0, 341.0, 200.0, 161.0, 112.0, 55.0, 38.0, 35.0, 22.0, 16.0, 6.0, 7.0, 5.0, 2.0, 1.0], "bins": [-1.23046875, -1.1944427490234375, -1.158416748046875, -1.1223907470703125, -1.08636474609375, -1.0503387451171875, -1.014312744140625, -0.9782867431640625, -0.9422607421875, -0.9062347412109375, -0.870208740234375, -0.8341827392578125, -0.79815673828125, -0.7621307373046875, -0.726104736328125, -0.6900787353515625, -0.654052734375, -0.6180267333984375, -0.582000732421875, -0.5459747314453125, -0.50994873046875, -0.4739227294921875, -0.437896728515625, -0.4018707275390625, -0.3658447265625, -0.3298187255859375, -0.293792724609375, -0.2577667236328125, -0.22174072265625, -0.1857147216796875, -0.149688720703125, -0.1136627197265625, -0.07763671875, -0.0416107177734375, -0.005584716796875, 0.0304412841796875, 0.06646728515625, 0.1024932861328125, 0.138519287109375, 0.1745452880859375, 0.2105712890625, 0.2465972900390625, 0.282623291015625, 0.3186492919921875, 0.35467529296875, 0.3907012939453125, 0.426727294921875, 0.4627532958984375, 0.498779296875, 0.5348052978515625, 0.570831298828125, 0.6068572998046875, 0.64288330078125, 0.6789093017578125, 0.714935302734375, 0.7509613037109375, 0.7869873046875, 0.8230133056640625, 0.859039306640625, 0.8950653076171875, 0.93109130859375, 0.9671173095703125, 1.003143310546875, 1.0391693115234375, 1.0751953125]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 3.0, 6.0, 7.0, 10.0, 5.0, 10.0, 13.0, 10.0, 21.0, 16.0, 30.0, 22.0, 22.0, 39.0, 29.0, 37.0, 42.0, 48.0, 46.0, 46.0, 30.0, 1064.0, 37.0, 38.0, 48.0, 38.0, 34.0, 38.0, 24.0, 24.0, 31.0, 29.0, 15.0, 13.0, 22.0, 18.0, 16.0, 12.0, 8.0, 7.0, 4.0, 5.0, 2.0, 4.0, 0.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.2265625, -6.9814453125, -6.736328125, -6.4912109375, -6.24609375, -6.0009765625, -5.755859375, -5.5107421875, -5.265625, -5.0205078125, -4.775390625, -4.5302734375, -4.28515625, -4.0400390625, -3.794921875, -3.5498046875, -3.3046875, -3.0595703125, -2.814453125, -2.5693359375, -2.32421875, -2.0791015625, -1.833984375, -1.5888671875, -1.34375, -1.0986328125, -0.853515625, -0.6083984375, -0.36328125, -0.1181640625, 0.126953125, 0.3720703125, 0.6171875, 0.8623046875, 1.107421875, 1.3525390625, 1.59765625, 1.8427734375, 2.087890625, 2.3330078125, 2.578125, 2.8232421875, 3.068359375, 3.3134765625, 3.55859375, 3.8037109375, 4.048828125, 4.2939453125, 4.5390625, 4.7841796875, 5.029296875, 5.2744140625, 5.51953125, 5.7646484375, 6.009765625, 6.2548828125, 6.5, 6.7451171875, 6.990234375, 7.2353515625, 7.48046875, 7.7255859375, 7.970703125, 8.2158203125, 8.4609375]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 6.0, 2.0, 10.0, 13.0, 26.0, 27.0, 33.0, 67.0, 99.0, 151.0, 220.0, 333.0, 475.0, 787.0, 1135.0, 1795.0, 2762.0, 4161.0, 6584.0, 10478.0, 16319.0, 26066.0, 40940.0, 65247.0, 104939.0, 167810.0, 1253693.0, 147139.0, 90923.0, 56745.0, 35675.0, 22475.0, 14433.0, 9236.0, 5786.0, 3673.0, 2340.0, 1505.0, 1032.0, 716.0, 446.0, 269.0, 170.0, 133.0, 101.0, 58.0, 39.0, 24.0, 15.0, 13.0, 7.0, 4.0, 7.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.3642578125, -1.3233489990234375, -1.282440185546875, -1.2415313720703125, -1.20062255859375, -1.1597137451171875, -1.118804931640625, -1.0778961181640625, -1.0369873046875, -0.9960784912109375, -0.955169677734375, -0.9142608642578125, -0.87335205078125, -0.8324432373046875, -0.791534423828125, -0.7506256103515625, -0.709716796875, -0.6688079833984375, -0.627899169921875, -0.5869903564453125, -0.54608154296875, -0.5051727294921875, -0.464263916015625, -0.4233551025390625, -0.3824462890625, -0.3415374755859375, -0.300628662109375, -0.2597198486328125, -0.21881103515625, -0.1779022216796875, -0.136993408203125, -0.0960845947265625, -0.05517578125, -0.0142669677734375, 0.026641845703125, 0.0675506591796875, 0.10845947265625, 0.1493682861328125, 0.190277099609375, 0.2311859130859375, 0.2720947265625, 0.3130035400390625, 0.353912353515625, 0.3948211669921875, 0.43572998046875, 0.4766387939453125, 0.517547607421875, 0.5584564208984375, 0.599365234375, 0.6402740478515625, 0.681182861328125, 0.7220916748046875, 0.76300048828125, 0.8039093017578125, 0.844818115234375, 0.8857269287109375, 0.9266357421875, 0.9675445556640625, 1.008453369140625, 1.0493621826171875, 1.09027099609375, 1.1311798095703125, 1.172088623046875, 1.2129974365234375, 1.25390625]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 2.0, 8.0, 6.0, 10.0, 11.0, 17.0, 15.0, 30.0, 30.0, 29.0, 40.0, 36.0, 53.0, 51.0, 44.0, 57.0, 65.0, 65.0, 67.0, 52.0, 41.0, 47.0, 40.0, 34.0, 19.0, 23.0, 22.0, 16.0, 8.0, 19.0, 10.0, 5.0, 6.0, 4.0, 4.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0020923614501953125, -0.00202980637550354, -0.0019672513008117676, -0.0019046962261199951, -0.0018421411514282227, -0.0017795860767364502, -0.0017170310020446777, -0.0016544759273529053, -0.0015919208526611328, -0.0015293657779693604, -0.0014668107032775879, -0.0014042556285858154, -0.001341700553894043, -0.0012791454792022705, -0.001216590404510498, -0.0011540353298187256, -0.0010914802551269531, -0.0010289251804351807, -0.0009663701057434082, -0.0009038150310516357, -0.0008412599563598633, -0.0007787048816680908, -0.0007161498069763184, -0.0006535947322845459, -0.0005910396575927734, -0.000528484582901001, -0.0004659295082092285, -0.00040337443351745605, -0.0003408193588256836, -0.00027826428413391113, -0.00021570920944213867, -0.0001531541347503662, -9.059906005859375e-05, -2.804398536682129e-05, 3.451108932495117e-05, 9.706616401672363e-05, 0.0001596212387084961, 0.00022217631340026855, 0.000284731388092041, 0.0003472864627838135, 0.00040984153747558594, 0.0004723966121673584, 0.0005349516868591309, 0.0005975067615509033, 0.0006600618362426758, 0.0007226169109344482, 0.0007851719856262207, 0.0008477270603179932, 0.0009102821350097656, 0.0009728372097015381, 0.0010353922843933105, 0.001097947359085083, 0.0011605024337768555, 0.001223057508468628, 0.0012856125831604004, 0.0013481676578521729, 0.0014107227325439453, 0.0014732778072357178, 0.0015358328819274902, 0.0015983879566192627, 0.0016609430313110352, 0.0017234981060028076, 0.00178605318069458, 0.0018486082553863525, 0.001911163330078125]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 4.0, 1.0, 5.0, 9.0, 10.0, 5.0, 14.0, 13.0, 17.0, 25.0, 29.0, 44.0, 51.0, 92.0, 145.0, 264.0, 665.0, 4362.0, 1032502.0, 8723.0, 800.0, 294.0, 143.0, 101.0, 59.0, 37.0, 31.0, 22.0, 14.0, 22.0, 10.0, 8.0, 9.0, 5.0, 3.0, 8.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04803466796875, -0.0464630126953125, -0.044891357421875, -0.0433197021484375, -0.041748046875, -0.0401763916015625, -0.038604736328125, -0.0370330810546875, -0.03546142578125, -0.0338897705078125, -0.032318115234375, -0.0307464599609375, -0.0291748046875, -0.0276031494140625, -0.026031494140625, -0.0244598388671875, -0.02288818359375, -0.0213165283203125, -0.019744873046875, -0.0181732177734375, -0.0166015625, -0.0150299072265625, -0.013458251953125, -0.0118865966796875, -0.01031494140625, -0.0087432861328125, -0.007171630859375, -0.0055999755859375, -0.0040283203125, -0.0024566650390625, -0.000885009765625, 0.0006866455078125, 0.00225830078125, 0.0038299560546875, 0.005401611328125, 0.0069732666015625, 0.008544921875, 0.0101165771484375, 0.011688232421875, 0.0132598876953125, 0.01483154296875, 0.0164031982421875, 0.017974853515625, 0.0195465087890625, 0.0211181640625, 0.0226898193359375, 0.024261474609375, 0.0258331298828125, 0.02740478515625, 0.0289764404296875, 0.030548095703125, 0.0321197509765625, 0.03369140625, 0.0352630615234375, 0.036834716796875, 0.0384063720703125, 0.03997802734375, 0.0415496826171875, 0.043121337890625, 0.0446929931640625, 0.0462646484375, 0.0478363037109375, 0.049407958984375, 0.0509796142578125, 0.05255126953125]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 15.0, 20.0, 39.0, 71.0, 120.0, 142.0, 146.0, 134.0, 122.0, 94.0, 44.0, 26.0, 16.0, 8.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0016483803046867251, -0.0015979146119207144, -0.0015474490355700254, -0.0014969834592193365, -0.0014465177664533257, -0.001396052073687315, -0.001345586497336626, -0.0012951209209859371, -0.0012446552282199264, -0.0011941895354539156, -0.0011437239591032267, -0.0010932583827525377, -0.001042792689986527, -0.0009923269972205162, -0.0009418614208698273, -0.0008913957863114774, -0.0008409301517531276, -0.0007904645171947777, -0.0007399988826364279, -0.000689533248078078, -0.0006390676135197282, -0.0005886019789613783, -0.0005381363444030285, -0.00048767070984467864, -0.0004372050752863288, -0.00038673944072797894, -0.0003362738061696291, -0.00028580817161127925, -0.0002353425370529294, -0.00018487690249457955, -0.0001344112679362297, -8.394563337787986e-05, -3.347988240420818e-05, 1.6985752154141665e-05, 6.745138671249151e-05, 0.00011791702127084136, 0.0001683826558291912, 0.00021884829038754106, 0.0002693139249458909, 0.00031977955950424075, 0.0003702451940625906, 0.00042071082862094045, 0.0004711764631792903, 0.0005216420977376401, 0.00057210773229599, 0.0006225733668543398, 0.0006730390014126897, 0.0007235046359710395, 0.0007739702705293894, 0.0008244359050877392, 0.0008749015396460891, 0.0009253671742044389, 0.0009758328087627888, 0.0010262983851134777, 0.0010767640778794885, 0.0011272297706454992, 0.0011776953469961882, 0.001228160923346877, 0.0012786266161128879, 0.0013290923088788986, 0.0013795578852295876, 0.0014300234615802765, 0.0014804891543462873, 0.001530954847112298, 0.001581420423462987]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 4.0, 9.0, 2.0, 9.0, 17.0, 8.0, 17.0, 15.0, 15.0, 23.0, 22.0, 23.0, 21.0, 22.0, 38.0, 27.0, 39.0, 36.0, 40.0, 40.0, 37.0, 26.0, 37.0, 33.0, 42.0, 27.0, 44.0, 30.0, 39.0, 31.0, 34.0, 22.0, 20.0, 13.0, 27.0, 17.0, 22.0, 13.0, 8.0, 11.0, 9.0, 9.0, 5.0, 9.0, 4.0, 5.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0008800029754638672, -0.0008500795811414719, -0.0008201561868190765, -0.0007902327924966812, -0.0007603093981742859, -0.0007303860038518906, -0.0007004626095294952, -0.0006705392152070999, -0.0006406158208847046, -0.0006106924265623093, -0.0005807690322399139, -0.0005508456379175186, -0.0005209222435951233, -0.000490998849272728, -0.00046107545495033264, -0.0004311520606279373, -0.000401228666305542, -0.00037130527198314667, -0.00034138187766075134, -0.000311458483338356, -0.0002815350890159607, -0.00025161169469356537, -0.00022168830037117004, -0.00019176490604877472, -0.0001618415117263794, -0.00013191811740398407, -0.00010199472308158875, -7.207132875919342e-05, -4.2147934436798096e-05, -1.2224540114402771e-05, 1.7698854207992554e-05, 4.762224853038788e-05, 7.75456428527832e-05, 0.00010746903717517853, 0.00013739243149757385, 0.00016731582581996918, 0.0001972392201423645, 0.00022716261446475983, 0.00025708600878715515, 0.0002870094031095505, 0.0003169327974319458, 0.0003468561917543411, 0.00037677958607673645, 0.0004067029803991318, 0.0004366263747215271, 0.0004665497690439224, 0.0004964731633663177, 0.0005263965576887131, 0.0005563199520111084, 0.0005862433463335037, 0.000616166740655899, 0.0006460901349782944, 0.0006760135293006897, 0.000705936923623085, 0.0007358603179454803, 0.0007657837122678757, 0.000795707106590271, 0.0008256305009126663, 0.0008555538952350616, 0.000885477289557457, 0.0009154006838798523, 0.0009453240782022476, 0.0009752474725246429, 0.0010051708668470383, 0.0010350942611694336]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 3.0, 2.0, 4.0, 6.0, 9.0, 5.0, 14.0, 12.0, 10.0, 15.0, 20.0, 26.0, 17.0, 31.0, 37.0, 29.0, 39.0, 38.0, 30.0, 42.0, 33.0, 33.0, 54.0, 49.0, 46.0, 56.0, 27.0, 41.0, 36.0, 33.0, 23.0, 21.0, 26.0, 24.0, 23.0, 13.0, 12.0, 12.0, 12.0, 12.0, 9.0, 7.0, 4.0, 5.0, 6.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.578125, -11.201171875, -10.82421875, -10.447265625, -10.0703125, -9.693359375, -9.31640625, -8.939453125, -8.5625, -8.185546875, -7.80859375, -7.431640625, -7.0546875, -6.677734375, -6.30078125, -5.923828125, -5.546875, -5.169921875, -4.79296875, -4.416015625, -4.0390625, -3.662109375, -3.28515625, -2.908203125, -2.53125, -2.154296875, -1.77734375, -1.400390625, -1.0234375, -0.646484375, -0.26953125, 0.107421875, 0.484375, 0.861328125, 1.23828125, 1.615234375, 1.9921875, 2.369140625, 2.74609375, 3.123046875, 3.5, 3.876953125, 4.25390625, 4.630859375, 5.0078125, 5.384765625, 5.76171875, 6.138671875, 6.515625, 6.892578125, 7.26953125, 7.646484375, 8.0234375, 8.400390625, 8.77734375, 9.154296875, 9.53125, 9.908203125, 10.28515625, 10.662109375, 11.0390625, 11.416015625, 11.79296875, 12.169921875, 12.546875]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 6.0, 7.0, 9.0, 17.0, 16.0, 23.0, 30.0, 37.0, 44.0, 60.0, 114.0, 121.0, 180.0, 283.0, 410.0, 595.0, 1074.0, 1826.0, 3328.0, 6147.0, 12050.0, 24951.0, 52409.0, 116662.0, 262604.0, 302353.0, 140756.0, 62540.0, 29294.0, 14103.0, 7215.0, 3858.0, 2075.0, 1213.0, 708.0, 413.0, 296.0, 208.0, 135.0, 86.0, 83.0, 69.0, 35.0, 26.0, 25.0, 15.0, 18.0, 6.0, 7.0, 8.0, 4.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0], "bins": [-7.1875, -6.96368408203125, -6.7398681640625, -6.51605224609375, -6.292236328125, -6.06842041015625, -5.8446044921875, -5.62078857421875, -5.39697265625, -5.17315673828125, -4.9493408203125, -4.72552490234375, -4.501708984375, -4.27789306640625, -4.0540771484375, -3.83026123046875, -3.6064453125, -3.38262939453125, -3.1588134765625, -2.93499755859375, -2.711181640625, -2.48736572265625, -2.2635498046875, -2.03973388671875, -1.81591796875, -1.59210205078125, -1.3682861328125, -1.14447021484375, -0.920654296875, -0.69683837890625, -0.4730224609375, -0.24920654296875, -0.025390625, 0.19842529296875, 0.4222412109375, 0.64605712890625, 0.869873046875, 1.09368896484375, 1.3175048828125, 1.54132080078125, 1.76513671875, 1.98895263671875, 2.2127685546875, 2.43658447265625, 2.660400390625, 2.88421630859375, 3.1080322265625, 3.33184814453125, 3.5556640625, 3.77947998046875, 4.0032958984375, 4.22711181640625, 4.450927734375, 4.67474365234375, 4.8985595703125, 5.12237548828125, 5.34619140625, 5.57000732421875, 5.7938232421875, 6.01763916015625, 6.241455078125, 6.46527099609375, 6.6890869140625, 6.91290283203125, 7.13671875]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 2.0, 6.0, 3.0, 6.0, 6.0, 9.0, 11.0, 10.0, 19.0, 13.0, 23.0, 30.0, 24.0, 36.0, 51.0, 58.0, 53.0, 74.0, 310.0, 1730.0, 125.0, 79.0, 62.0, 57.0, 53.0, 47.0, 39.0, 25.0, 29.0, 21.0, 7.0, 11.0, 3.0, 4.0, 10.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-75.625, -73.6962890625, -71.767578125, -69.8388671875, -67.91015625, -65.9814453125, -64.052734375, -62.1240234375, -60.1953125, -58.2666015625, -56.337890625, -54.4091796875, -52.48046875, -50.5517578125, -48.623046875, -46.6943359375, -44.765625, -42.8369140625, -40.908203125, -38.9794921875, -37.05078125, -35.1220703125, -33.193359375, -31.2646484375, -29.3359375, -27.4072265625, -25.478515625, -23.5498046875, -21.62109375, -19.6923828125, -17.763671875, -15.8349609375, -13.90625, -11.9775390625, -10.048828125, -8.1201171875, -6.19140625, -4.2626953125, -2.333984375, -0.4052734375, 1.5234375, 3.4521484375, 5.380859375, 7.3095703125, 9.23828125, 11.1669921875, 13.095703125, 15.0244140625, 16.953125, 18.8818359375, 20.810546875, 22.7392578125, 24.66796875, 26.5966796875, 28.525390625, 30.4541015625, 32.3828125, 34.3115234375, 36.240234375, 38.1689453125, 40.09765625, 42.0263671875, 43.955078125, 45.8837890625, 47.8125]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 4.0, 9.0, 5.0, 7.0, 12.0, 14.0, 24.0, 30.0, 48.0, 74.0, 99.0, 126.0, 196.0, 303.0, 578.0, 2800.0, 3053944.0, 85124.0, 1156.0, 376.0, 234.0, 156.0, 99.0, 85.0, 46.0, 42.0, 39.0, 16.0, 13.0, 13.0, 8.0, 3.0, 9.0, 2.0, 4.0, 5.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-108.4375, -104.0673828125, -99.697265625, -95.3271484375, -90.95703125, -86.5869140625, -82.216796875, -77.8466796875, -73.4765625, -69.1064453125, -64.736328125, -60.3662109375, -55.99609375, -51.6259765625, -47.255859375, -42.8857421875, -38.515625, -34.1455078125, -29.775390625, -25.4052734375, -21.03515625, -16.6650390625, -12.294921875, -7.9248046875, -3.5546875, 0.8154296875, 5.185546875, 9.5556640625, 13.92578125, 18.2958984375, 22.666015625, 27.0361328125, 31.40625, 35.7763671875, 40.146484375, 44.5166015625, 48.88671875, 53.2568359375, 57.626953125, 61.9970703125, 66.3671875, 70.7373046875, 75.107421875, 79.4775390625, 83.84765625, 88.2177734375, 92.587890625, 96.9580078125, 101.328125, 105.6982421875, 110.068359375, 114.4384765625, 118.80859375, 123.1787109375, 127.548828125, 131.9189453125, 136.2890625, 140.6591796875, 145.029296875, 149.3994140625, 153.76953125, 158.1396484375, 162.509765625, 166.8798828125, 171.25]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 51.0, 323.0, 474.0, 145.0, 16.0, 4.0], "bins": [-308.0945129394531, -302.9800720214844, -297.86566162109375, -292.751220703125, -287.63677978515625, -282.5223693847656, -277.4079284667969, -272.2934875488281, -267.1790771484375, -262.06463623046875, -256.9502258300781, -251.83578491210938, -246.7213592529297, -241.60693359375, -236.49249267578125, -231.37806701660156, -226.2636260986328, -221.14920043945312, -216.03475952148438, -210.9203338623047, -205.805908203125, -200.69146728515625, -195.57704162597656, -190.46261596679688, -185.34817504882812, -180.23374938964844, -175.1193084716797, -170.0048828125, -164.8904571533203, -159.77603149414062, -154.66159057617188, -149.5471649169922, -144.4327392578125, -139.3183135986328, -134.20387268066406, -129.08944702148438, -123.97502136230469, -118.86058807373047, -113.74615478515625, -108.63172912597656, -103.51729583740234, -98.40286254882812, -93.28843688964844, -88.17400360107422, -83.0595703125, -77.94514465332031, -72.8307113647461, -67.71627807617188, -62.60185241699219, -57.487422943115234, -52.37299346923828, -47.25856018066406, -42.14413070678711, -37.029701232910156, -31.91526985168457, -26.800838470458984, -21.68640899658203, -16.571979522705078, -11.457548141479492, -6.343117713928223, -1.2286872863769531, 3.8857421875, 9.000173568725586, 14.114604949951172, 19.229034423828125]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 8.0, 4.0, 5.0, 2.0, 11.0, 12.0, 12.0, 12.0, 20.0, 19.0, 28.0, 30.0, 29.0, 27.0, 31.0, 39.0, 39.0, 39.0, 52.0, 41.0, 28.0, 43.0, 43.0, 38.0, 49.0, 43.0, 37.0, 25.0, 29.0, 32.0, 31.0, 27.0, 22.0, 27.0, 10.0, 10.0, 12.0, 8.0, 5.0, 8.0, 7.0, 2.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-106.20925903320312, -102.42353820800781, -98.63782501220703, -94.85210418701172, -91.06639099121094, -87.28067016601562, -83.49494934082031, -79.70923614501953, -75.92352294921875, -72.13780212402344, -68.35208892822266, -64.56636810302734, -60.78065490722656, -56.99493408203125, -53.2092170715332, -49.423500061035156, -45.637779235839844, -41.8520622253418, -38.06634521484375, -34.28062438964844, -30.494909286499023, -26.709192276000977, -22.923473358154297, -19.13775634765625, -15.352039337158203, -11.566322326660156, -7.780604362487793, -3.9948863983154297, -0.2091693878173828, 3.576547622680664, 7.362266540527344, 11.14798355102539, 14.933700561523438, 18.719417572021484, 22.50513458251953, 26.29085350036621, 30.076570510864258, 33.86228942871094, 37.648006439208984, 41.43372344970703, 45.21944046020508, 49.005157470703125, 52.79087448120117, 56.57659149169922, 60.36231231689453, 64.14802551269531, 67.93374633789062, 71.71946716308594, 75.50518035888672, 79.29090118408203, 83.07661437988281, 86.86233520507812, 90.6480484008789, 94.43376922607422, 98.219482421875, 102.00520324707031, 105.79092407226562, 109.57664489746094, 113.36235809326172, 117.14807891845703, 120.93379211425781, 124.71951293945312, 128.50523376464844, 132.29095458984375, 136.07666015625]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 3.0, 2.0, 3.0, 4.0, 10.0, 6.0, 12.0, 14.0, 13.0, 15.0, 21.0, 25.0, 27.0, 26.0, 34.0, 43.0, 33.0, 39.0, 21.0, 45.0, 44.0, 37.0, 40.0, 65.0, 37.0, 41.0, 47.0, 45.0, 31.0, 26.0, 23.0, 27.0, 25.0, 19.0, 23.0, 12.0, 15.0, 7.0, 9.0, 14.0, 5.0, 4.0, 6.0, 4.0, 6.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.4296875, -12.0328369140625, -11.635986328125, -11.2391357421875, -10.84228515625, -10.4454345703125, -10.048583984375, -9.6517333984375, -9.2548828125, -8.8580322265625, -8.461181640625, -8.0643310546875, -7.66748046875, -7.2706298828125, -6.873779296875, -6.4769287109375, -6.080078125, -5.6832275390625, -5.286376953125, -4.8895263671875, -4.49267578125, -4.0958251953125, -3.698974609375, -3.3021240234375, -2.9052734375, -2.5084228515625, -2.111572265625, -1.7147216796875, -1.31787109375, -0.9210205078125, -0.524169921875, -0.1273193359375, 0.26953125, 0.6663818359375, 1.063232421875, 1.4600830078125, 1.85693359375, 2.2537841796875, 2.650634765625, 3.0474853515625, 3.4443359375, 3.8411865234375, 4.238037109375, 4.6348876953125, 5.03173828125, 5.4285888671875, 5.825439453125, 6.2222900390625, 6.619140625, 7.0159912109375, 7.412841796875, 7.8096923828125, 8.20654296875, 8.6033935546875, 9.000244140625, 9.3970947265625, 9.7939453125, 10.1907958984375, 10.587646484375, 10.9844970703125, 11.38134765625, 11.7781982421875, 12.175048828125, 12.5718994140625, 12.96875]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 4.0, 2.0, 7.0, 7.0, 7.0, 11.0, 8.0, 13.0, 13.0, 22.0, 22.0, 23.0, 34.0, 37.0, 51.0, 62.0, 114.0, 228.0, 711.0, 4520.0, 97244.0, 3316008.0, 759414.0, 13481.0, 1412.0, 342.0, 135.0, 88.0, 42.0, 42.0, 23.0, 20.0, 29.0, 20.0, 17.0, 18.0, 9.0, 10.0, 10.0, 7.0, 6.0, 6.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-51.71875, -50.13818359375, -48.5576171875, -46.97705078125, -45.396484375, -43.81591796875, -42.2353515625, -40.65478515625, -39.07421875, -37.49365234375, -35.9130859375, -34.33251953125, -32.751953125, -31.17138671875, -29.5908203125, -28.01025390625, -26.4296875, -24.84912109375, -23.2685546875, -21.68798828125, -20.107421875, -18.52685546875, -16.9462890625, -15.36572265625, -13.78515625, -12.20458984375, -10.6240234375, -9.04345703125, -7.462890625, -5.88232421875, -4.3017578125, -2.72119140625, -1.140625, 0.43994140625, 2.0205078125, 3.60107421875, 5.181640625, 6.76220703125, 8.3427734375, 9.92333984375, 11.50390625, 13.08447265625, 14.6650390625, 16.24560546875, 17.826171875, 19.40673828125, 20.9873046875, 22.56787109375, 24.1484375, 25.72900390625, 27.3095703125, 28.89013671875, 30.470703125, 32.05126953125, 33.6318359375, 35.21240234375, 36.79296875, 38.37353515625, 39.9541015625, 41.53466796875, 43.115234375, 44.69580078125, 46.2763671875, 47.85693359375, 49.4375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 5.0, 13.0, 23.0, 25.0, 44.0, 98.0, 146.0, 230.0, 383.0, 582.0, 724.0, 689.0, 446.0, 266.0, 156.0, 96.0, 57.0, 38.0, 21.0, 15.0, 8.0, 6.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.59375, -47.26708984375, -45.9404296875, -44.61376953125, -43.287109375, -41.96044921875, -40.6337890625, -39.30712890625, -37.98046875, -36.65380859375, -35.3271484375, -34.00048828125, -32.673828125, -31.34716796875, -30.0205078125, -28.69384765625, -27.3671875, -26.04052734375, -24.7138671875, -23.38720703125, -22.060546875, -20.73388671875, -19.4072265625, -18.08056640625, -16.75390625, -15.42724609375, -14.1005859375, -12.77392578125, -11.447265625, -10.12060546875, -8.7939453125, -7.46728515625, -6.140625, -4.81396484375, -3.4873046875, -2.16064453125, -0.833984375, 0.49267578125, 1.8193359375, 3.14599609375, 4.47265625, 5.79931640625, 7.1259765625, 8.45263671875, 9.779296875, 11.10595703125, 12.4326171875, 13.75927734375, 15.0859375, 16.41259765625, 17.7392578125, 19.06591796875, 20.392578125, 21.71923828125, 23.0458984375, 24.37255859375, 25.69921875, 27.02587890625, 28.3525390625, 29.67919921875, 31.005859375, 32.33251953125, 33.6591796875, 34.98583984375, 36.3125]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 10.0, 6.0, 11.0, 13.0, 19.0, 26.0, 34.0, 57.0, 80.0, 116.0, 148.0, 238.0, 525.0, 1448.0, 39333.0, 4101588.0, 47867.0, 1489.0, 481.0, 275.0, 144.0, 88.0, 65.0, 64.0, 52.0, 37.0, 17.0, 14.0, 9.0, 12.0, 7.0, 6.0, 4.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-134.125, -130.123046875, -126.12109375, -122.119140625, -118.1171875, -114.115234375, -110.11328125, -106.111328125, -102.109375, -98.107421875, -94.10546875, -90.103515625, -86.1015625, -82.099609375, -78.09765625, -74.095703125, -70.09375, -66.091796875, -62.08984375, -58.087890625, -54.0859375, -50.083984375, -46.08203125, -42.080078125, -38.078125, -34.076171875, -30.07421875, -26.072265625, -22.0703125, -18.068359375, -14.06640625, -10.064453125, -6.0625, -2.060546875, 1.94140625, 5.943359375, 9.9453125, 13.947265625, 17.94921875, 21.951171875, 25.953125, 29.955078125, 33.95703125, 37.958984375, 41.9609375, 45.962890625, 49.96484375, 53.966796875, 57.96875, 61.970703125, 65.97265625, 69.974609375, 73.9765625, 77.978515625, 81.98046875, 85.982421875, 89.984375, 93.986328125, 97.98828125, 101.990234375, 105.9921875, 109.994140625, 113.99609375, 117.998046875, 122.0]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 19.0, 125.0, 356.0, 372.0, 116.0, 25.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-473.57769775390625, -460.76361083984375, -447.94952392578125, -435.13543701171875, -422.32135009765625, -409.50726318359375, -396.69317626953125, -383.87908935546875, -371.06500244140625, -358.25091552734375, -345.43682861328125, -332.62274169921875, -319.80865478515625, -306.99456787109375, -294.18048095703125, -281.36639404296875, -268.5523376464844, -255.73825073242188, -242.92416381835938, -230.11007690429688, -217.29598999023438, -204.48190307617188, -191.66783142089844, -178.85374450683594, -166.03965759277344, -153.22557067871094, -140.41148376464844, -127.59740447998047, -114.78331756591797, -101.96923065185547, -89.1551513671875, -76.341064453125, -63.527008056640625, -50.712921142578125, -37.89883804321289, -25.084754943847656, -12.270668029785156, 0.5434188842773438, 13.357498168945312, 26.171585083007812, 38.98567199707031, 51.79975891113281, 64.61384582519531, 77.42792510986328, 90.24201202392578, 103.05609893798828, 115.87017822265625, 128.68426513671875, 141.49835205078125, 154.31243896484375, 167.12652587890625, 179.94061279296875, 192.75469970703125, 205.56878662109375, 218.3828582763672, 231.1969451904297, 244.0110321044922, 256.8251037597656, 269.6391906738281, 282.4532775878906, 295.2673645019531, 308.0814514160156, 320.8955383300781, 333.7096252441406, 346.5237121582031]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 3.0, 6.0, 5.0, 8.0, 8.0, 7.0, 8.0, 15.0, 18.0, 10.0, 19.0, 28.0, 19.0, 23.0, 18.0, 25.0, 30.0, 27.0, 36.0, 24.0, 40.0, 35.0, 43.0, 34.0, 31.0, 31.0, 43.0, 40.0, 35.0, 34.0, 31.0, 44.0, 34.0, 20.0, 13.0, 17.0, 23.0, 15.0, 18.0, 12.0, 10.0, 9.0, 13.0, 9.0, 8.0, 7.0, 5.0, 5.0, 1.0, 3.0, 1.0, 6.0, 1.0, 0.0, 1.0, 1.0, 3.0], "bins": [-76.84820556640625, -74.42475891113281, -72.00131225585938, -69.57787322998047, -67.15442657470703, -64.7309799194336, -62.307533264160156, -59.88408660888672, -57.46064376831055, -55.03719711303711, -52.61375427246094, -50.1903076171875, -47.76686096191406, -45.34341812133789, -42.91997146606445, -40.49652862548828, -38.073081970214844, -35.649635314941406, -33.226192474365234, -30.802745819091797, -28.379301071166992, -25.955856323242188, -23.53240966796875, -21.108964920043945, -18.68552017211914, -16.262075424194336, -13.838629722595215, -11.415184020996094, -8.991739273071289, -6.568294525146484, -4.144848823547363, -1.7214031219482422, 0.7020416259765625, 3.1254868507385254, 5.548932075500488, 7.972377300262451, 10.395822525024414, 12.819267272949219, 15.24271297454834, 17.66615867614746, 20.089603424072266, 22.51304817199707, 24.936492919921875, 27.359939575195312, 29.783384323120117, 32.20682907104492, 34.63027572631836, 37.05371856689453, 39.47716522216797, 41.900611877441406, 44.32405471801758, 46.747501373291016, 49.17094421386719, 51.594390869140625, 54.01783752441406, 56.4412841796875, 58.86472702026367, 61.28817367553711, 63.71161651611328, 66.13506317138672, 68.55850982666016, 70.98194885253906, 73.4053955078125, 75.82884216308594, 78.25228881835938]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 6.0, 6.0, 12.0, 11.0, 9.0, 15.0, 19.0, 26.0, 24.0, 34.0, 37.0, 27.0, 28.0, 35.0, 40.0, 37.0, 51.0, 45.0, 43.0, 57.0, 52.0, 45.0, 41.0, 39.0, 40.0, 23.0, 27.0, 23.0, 22.0, 23.0, 28.0, 21.0, 12.0, 15.0, 6.0, 5.0, 7.0, 6.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.7109375, -12.2952880859375, -11.879638671875, -11.4639892578125, -11.04833984375, -10.6326904296875, -10.217041015625, -9.8013916015625, -9.3857421875, -8.9700927734375, -8.554443359375, -8.1387939453125, -7.72314453125, -7.3074951171875, -6.891845703125, -6.4761962890625, -6.060546875, -5.6448974609375, -5.229248046875, -4.8135986328125, -4.39794921875, -3.9822998046875, -3.566650390625, -3.1510009765625, -2.7353515625, -2.3197021484375, -1.904052734375, -1.4884033203125, -1.07275390625, -0.6571044921875, -0.241455078125, 0.1741943359375, 0.58984375, 1.0054931640625, 1.421142578125, 1.8367919921875, 2.25244140625, 2.6680908203125, 3.083740234375, 3.4993896484375, 3.9150390625, 4.3306884765625, 4.746337890625, 5.1619873046875, 5.57763671875, 5.9932861328125, 6.408935546875, 6.8245849609375, 7.240234375, 7.6558837890625, 8.071533203125, 8.4871826171875, 8.90283203125, 9.3184814453125, 9.734130859375, 10.1497802734375, 10.5654296875, 10.9810791015625, 11.396728515625, 11.8123779296875, 12.22802734375, 12.6436767578125, 13.059326171875, 13.4749755859375, 13.890625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 6.0, 10.0, 15.0, 31.0, 47.0, 79.0, 105.0, 189.0, 285.0, 409.0, 749.0, 1110.0, 1784.0, 2835.0, 4491.0, 7158.0, 11356.0, 18292.0, 28540.0, 44680.0, 69459.0, 106582.0, 161643.0, 196285.0, 138879.0, 90902.0, 59384.0, 38239.0, 23945.0, 15183.0, 9607.0, 6023.0, 3738.0, 2428.0, 1482.0, 965.0, 627.0, 373.0, 224.0, 158.0, 100.0, 68.0, 37.0, 26.0, 16.0, 6.0, 1.0, 5.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.3974609375, -1.3555145263671875, -1.313568115234375, -1.2716217041015625, -1.22967529296875, -1.1877288818359375, -1.145782470703125, -1.1038360595703125, -1.0618896484375, -1.0199432373046875, -0.977996826171875, -0.9360504150390625, -0.89410400390625, -0.8521575927734375, -0.810211181640625, -0.7682647705078125, -0.726318359375, -0.6843719482421875, -0.642425537109375, -0.6004791259765625, -0.55853271484375, -0.5165863037109375, -0.474639892578125, -0.4326934814453125, -0.3907470703125, -0.3488006591796875, -0.306854248046875, -0.2649078369140625, -0.22296142578125, -0.1810150146484375, -0.139068603515625, -0.0971221923828125, -0.05517578125, -0.0132293701171875, 0.028717041015625, 0.0706634521484375, 0.11260986328125, 0.1545562744140625, 0.196502685546875, 0.2384490966796875, 0.2803955078125, 0.3223419189453125, 0.364288330078125, 0.4062347412109375, 0.44818115234375, 0.4901275634765625, 0.532073974609375, 0.5740203857421875, 0.615966796875, 0.6579132080078125, 0.699859619140625, 0.7418060302734375, 0.78375244140625, 0.8256988525390625, 0.867645263671875, 0.9095916748046875, 0.9515380859375, 0.9934844970703125, 1.035430908203125, 1.0773773193359375, 1.11932373046875, 1.1612701416015625, 1.203216552734375, 1.2451629638671875, 1.287109375]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 5.0, 12.0, 7.0, 13.0, 19.0, 13.0, 16.0, 22.0, 29.0, 22.0, 26.0, 31.0, 35.0, 29.0, 47.0, 26.0, 36.0, 54.0, 38.0, 1059.0, 46.0, 45.0, 32.0, 45.0, 34.0, 35.0, 26.0, 26.0, 31.0, 21.0, 19.0, 25.0, 15.0, 14.0, 13.0, 11.0, 14.0, 9.0, 4.0, 5.0, 2.0, 7.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5625, -7.326904296875, -7.09130859375, -6.855712890625, -6.6201171875, -6.384521484375, -6.14892578125, -5.913330078125, -5.677734375, -5.442138671875, -5.20654296875, -4.970947265625, -4.7353515625, -4.499755859375, -4.26416015625, -4.028564453125, -3.79296875, -3.557373046875, -3.32177734375, -3.086181640625, -2.8505859375, -2.614990234375, -2.37939453125, -2.143798828125, -1.908203125, -1.672607421875, -1.43701171875, -1.201416015625, -0.9658203125, -0.730224609375, -0.49462890625, -0.259033203125, -0.0234375, 0.212158203125, 0.44775390625, 0.683349609375, 0.9189453125, 1.154541015625, 1.39013671875, 1.625732421875, 1.861328125, 2.096923828125, 2.33251953125, 2.568115234375, 2.8037109375, 3.039306640625, 3.27490234375, 3.510498046875, 3.74609375, 3.981689453125, 4.21728515625, 4.452880859375, 4.6884765625, 4.924072265625, 5.15966796875, 5.395263671875, 5.630859375, 5.866455078125, 6.10205078125, 6.337646484375, 6.5732421875, 6.808837890625, 7.04443359375, 7.280029296875, 7.515625]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 2.0, 3.0, 19.0, 17.0, 30.0, 26.0, 69.0, 77.0, 110.0, 179.0, 294.0, 457.0, 698.0, 997.0, 1477.0, 2257.0, 3339.0, 5059.0, 7751.0, 11645.0, 17947.0, 26704.0, 40513.0, 61160.0, 93937.0, 145612.0, 1243961.0, 149845.0, 96871.0, 63250.0, 41103.0, 27704.0, 18125.0, 12155.0, 8048.0, 5313.0, 3471.0, 2365.0, 1534.0, 995.0, 712.0, 480.0, 273.0, 203.0, 104.0, 83.0, 51.0, 46.0, 29.0, 18.0, 6.0, 5.0, 4.0, 0.0, 2.0, 3.0, 3.0, 1.0], "bins": [-1.216796875, -1.1793975830078125, -1.141998291015625, -1.1045989990234375, -1.06719970703125, -1.0298004150390625, -0.992401123046875, -0.9550018310546875, -0.9176025390625, -0.8802032470703125, -0.842803955078125, -0.8054046630859375, -0.76800537109375, -0.7306060791015625, -0.693206787109375, -0.6558074951171875, -0.618408203125, -0.5810089111328125, -0.543609619140625, -0.5062103271484375, -0.46881103515625, -0.4314117431640625, -0.394012451171875, -0.3566131591796875, -0.3192138671875, -0.2818145751953125, -0.244415283203125, -0.2070159912109375, -0.16961669921875, -0.1322174072265625, -0.094818115234375, -0.0574188232421875, -0.02001953125, 0.0173797607421875, 0.054779052734375, 0.0921783447265625, 0.12957763671875, 0.1669769287109375, 0.204376220703125, 0.2417755126953125, 0.2791748046875, 0.3165740966796875, 0.353973388671875, 0.3913726806640625, 0.42877197265625, 0.4661712646484375, 0.503570556640625, 0.5409698486328125, 0.578369140625, 0.6157684326171875, 0.653167724609375, 0.6905670166015625, 0.72796630859375, 0.7653656005859375, 0.802764892578125, 0.8401641845703125, 0.8775634765625, 0.9149627685546875, 0.952362060546875, 0.9897613525390625, 1.02716064453125, 1.0645599365234375, 1.101959228515625, 1.1393585205078125, 1.1767578125]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 5.0, 4.0, 9.0, 11.0, 11.0, 16.0, 16.0, 18.0, 22.0, 24.0, 18.0, 27.0, 39.0, 45.0, 49.0, 56.0, 64.0, 64.0, 75.0, 65.0, 47.0, 55.0, 37.0, 49.0, 34.0, 26.0, 27.0, 22.0, 16.0, 11.0, 8.0, 5.0, 7.0, 7.0, 5.0, 5.0, 2.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.0038356781005859375, -0.003734409809112549, -0.00363314151763916, -0.0035318732261657715, -0.003430604934692383, -0.003329336643218994, -0.0032280683517456055, -0.003126800060272217, -0.003025531768798828, -0.0029242634773254395, -0.0028229951858520508, -0.002721726894378662, -0.0026204586029052734, -0.0025191903114318848, -0.002417922019958496, -0.0023166537284851074, -0.0022153854370117188, -0.00211411714553833, -0.0020128488540649414, -0.0019115805625915527, -0.001810312271118164, -0.0017090439796447754, -0.0016077756881713867, -0.001506507396697998, -0.0014052391052246094, -0.0013039708137512207, -0.001202702522277832, -0.0011014342308044434, -0.0010001659393310547, -0.000898897647857666, -0.0007976293563842773, -0.0006963610649108887, -0.0005950927734375, -0.0004938244819641113, -0.00039255619049072266, -0.000291287899017334, -0.0001900196075439453, -8.875131607055664e-05, 1.2516975402832031e-05, 0.0001137852668762207, 0.00021505355834960938, 0.00031632184982299805, 0.0004175901412963867, 0.0005188584327697754, 0.0006201267242431641, 0.0007213950157165527, 0.0008226633071899414, 0.0009239315986633301, 0.0010251998901367188, 0.0011264681816101074, 0.001227736473083496, 0.0013290047645568848, 0.0014302730560302734, 0.0015315413475036621, 0.0016328096389770508, 0.0017340779304504395, 0.0018353462219238281, 0.0019366145133972168, 0.0020378828048706055, 0.002139151096343994, 0.002240419387817383, 0.0023416876792907715, 0.00244295597076416, 0.002544224262237549, 0.0026454925537109375]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 6.0, 6.0, 6.0, 4.0, 6.0, 7.0, 18.0, 17.0, 22.0, 34.0, 32.0, 41.0, 74.0, 96.0, 156.0, 255.0, 688.0, 6251.0, 1031618.0, 7664.0, 769.0, 269.0, 136.0, 98.0, 55.0, 47.0, 27.0, 23.0, 29.0, 23.0, 14.0, 11.0, 13.0, 8.0, 11.0, 11.0, 2.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.057769775390625, -0.055506229400634766, -0.05324268341064453, -0.0509791374206543, -0.04871559143066406, -0.04645204544067383, -0.044188499450683594, -0.04192495346069336, -0.039661407470703125, -0.03739786148071289, -0.035134315490722656, -0.03287076950073242, -0.030607223510742188, -0.028343677520751953, -0.02608013153076172, -0.023816585540771484, -0.02155303955078125, -0.019289493560791016, -0.01702594757080078, -0.014762401580810547, -0.012498855590820312, -0.010235309600830078, -0.007971763610839844, -0.005708217620849609, -0.003444671630859375, -0.0011811256408691406, 0.0010824203491210938, 0.003345966339111328, 0.0056095123291015625, 0.007873058319091797, 0.010136604309082031, 0.012400150299072266, 0.0146636962890625, 0.016927242279052734, 0.01919078826904297, 0.021454334259033203, 0.023717880249023438, 0.025981426239013672, 0.028244972229003906, 0.03050851821899414, 0.032772064208984375, 0.03503561019897461, 0.037299156188964844, 0.03956270217895508, 0.04182624816894531, 0.04408979415893555, 0.04635334014892578, 0.048616886138916016, 0.05088043212890625, 0.053143978118896484, 0.05540752410888672, 0.05767107009887695, 0.05993461608886719, 0.06219816207885742, 0.06446170806884766, 0.06672525405883789, 0.06898880004882812, 0.07125234603881836, 0.0735158920288086, 0.07577943801879883, 0.07804298400878906, 0.0803065299987793, 0.08257007598876953, 0.08483362197875977, 0.08709716796875]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 58.0, 545.0, 384.0, 25.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0028951370622962713, -0.002528177807107568, -0.0021612183190882206, -0.001794259063899517, -0.0014272998087108135, -0.00106034055352211, -0.0006933810655027628, -0.00032642181031405926, 4.053744487464428e-05, 0.00040749675827100873, 0.0007744560716673732, 0.0011414154432713985, 0.001508374698460102, 0.0018753339536488056, 0.002242293441668153, 0.0026092526968568563, 0.00297621195204556, 0.0033431712072342634, 0.0037101306952536106, 0.004077089950442314, 0.004444049205631018, 0.004811008460819721, 0.005177968181669712, 0.005544926971197128, 0.005911886692047119, 0.006278845947235823, 0.006645805202424526, 0.007012764923274517, 0.007379723712801933, 0.007746683433651924, 0.008113643154501915, 0.008480601944029331, 0.008847560733556747, 0.009214520454406738, 0.009581479243934155, 0.009948438964784145, 0.010315397754311562, 0.010682357475161552, 0.011049317196011543, 0.01141627598553896, 0.011783234775066376, 0.012150194495916367, 0.012517153285443783, 0.012884113006293774, 0.01325107179582119, 0.01361803151667118, 0.013984991237521172, 0.014351950027048588, 0.014718909747898579, 0.01508586946874857, 0.015452828258275986, 0.015819787979125977, 0.016186747699975967, 0.01655370555818081, 0.0169206652790308, 0.01728762499988079, 0.01765458472073078, 0.018021544441580772, 0.018388504162430763, 0.018755462020635605, 0.019122421741485596, 0.019489381462335587, 0.019856341183185577, 0.020223300904035568, 0.02059025876224041]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 4.0, 1.0, 6.0, 1.0, 7.0, 5.0, 7.0, 10.0, 15.0, 11.0, 10.0, 17.0, 22.0, 18.0, 27.0, 34.0, 38.0, 24.0, 30.0, 29.0, 26.0, 34.0, 45.0, 51.0, 45.0, 32.0, 36.0, 33.0, 30.0, 28.0, 31.0, 51.0, 27.0, 28.0, 29.0, 33.0, 23.0, 23.0, 17.0, 8.0, 13.0, 13.0, 9.0, 5.0, 7.0, 7.0, 3.0, 6.0, 3.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0014243125915527344, -0.0013732602819800377, -0.001322207972407341, -0.0012711556628346443, -0.0012201033532619476, -0.001169051043689251, -0.0011179987341165543, -0.0010669464245438576, -0.0010158941149711609, -0.0009648418053984642, -0.0009137894958257675, -0.0008627371862530708, -0.0008116848766803741, -0.0007606325671076775, -0.0007095802575349808, -0.0006585279479622841, -0.0006074756383895874, -0.0005564233288168907, -0.000505371019244194, -0.00045431870967149734, -0.00040326640009880066, -0.000352214090526104, -0.0003011617809534073, -0.0002501094713807106, -0.00019905716180801392, -0.00014800485223531723, -9.695254266262054e-05, -4.590023308992386e-05, 5.152076482772827e-06, 5.620438605546951e-05, 0.0001072566956281662, 0.00015830900520086288, 0.00020936131477355957, 0.00026041362434625626, 0.00031146593391895294, 0.00036251824349164963, 0.0004135705530643463, 0.000464622862637043, 0.0005156751722097397, 0.0005667274817824364, 0.0006177797913551331, 0.0006688321009278297, 0.0007198844105005264, 0.0007709367200732231, 0.0008219890296459198, 0.0008730413392186165, 0.0009240936487913132, 0.0009751459583640099, 0.0010261982679367065, 0.0010772505775094032, 0.0011283028870821, 0.0011793551966547966, 0.0012304075062274933, 0.00128145981580019, 0.0013325121253728867, 0.0013835644349455833, 0.00143461674451828, 0.0014856690540909767, 0.0015367213636636734, 0.00158777367323637, 0.0016388259828090668, 0.0016898782923817635, 0.0017409306019544601, 0.0017919829115271568, 0.0018430352210998535]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 6.0, 6.0, 12.0, 11.0, 9.0, 15.0, 19.0, 26.0, 24.0, 34.0, 37.0, 27.0, 28.0, 35.0, 40.0, 37.0, 51.0, 45.0, 43.0, 57.0, 52.0, 45.0, 41.0, 39.0, 40.0, 23.0, 27.0, 23.0, 22.0, 23.0, 28.0, 21.0, 12.0, 15.0, 6.0, 5.0, 7.0, 6.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.7109375, -12.2952880859375, -11.879638671875, -11.4639892578125, -11.04833984375, -10.6326904296875, -10.217041015625, -9.8013916015625, -9.3857421875, -8.9700927734375, -8.554443359375, -8.1387939453125, -7.72314453125, -7.3074951171875, -6.891845703125, -6.4761962890625, -6.060546875, -5.6448974609375, -5.229248046875, -4.8135986328125, -4.39794921875, -3.9822998046875, -3.566650390625, -3.1510009765625, -2.7353515625, -2.3197021484375, -1.904052734375, -1.4884033203125, -1.07275390625, -0.6571044921875, -0.241455078125, 0.1741943359375, 0.58984375, 1.0054931640625, 1.421142578125, 1.8367919921875, 2.25244140625, 2.6680908203125, 3.083740234375, 3.4993896484375, 3.9150390625, 4.3306884765625, 4.746337890625, 5.1619873046875, 5.57763671875, 5.9932861328125, 6.408935546875, 6.8245849609375, 7.240234375, 7.6558837890625, 8.071533203125, 8.4871826171875, 8.90283203125, 9.3184814453125, 9.734130859375, 10.1497802734375, 10.5654296875, 10.9810791015625, 11.396728515625, 11.8123779296875, 12.22802734375, 12.6436767578125, 13.059326171875, 13.4749755859375, 13.890625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 5.0, 7.0, 4.0, 15.0, 9.0, 17.0, 30.0, 41.0, 32.0, 65.0, 83.0, 91.0, 130.0, 181.0, 260.0, 366.0, 568.0, 895.0, 1665.0, 3184.0, 6844.0, 16959.0, 45408.0, 129816.0, 459896.0, 250378.0, 80537.0, 29081.0, 11097.0, 4865.0, 2389.0, 1290.0, 795.0, 428.0, 282.0, 220.0, 150.0, 113.0, 93.0, 69.0, 51.0, 35.0, 37.0, 25.0, 21.0, 11.0, 8.0, 4.0, 6.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-10.703125, -10.3553466796875, -10.007568359375, -9.6597900390625, -9.31201171875, -8.9642333984375, -8.616455078125, -8.2686767578125, -7.9208984375, -7.5731201171875, -7.225341796875, -6.8775634765625, -6.52978515625, -6.1820068359375, -5.834228515625, -5.4864501953125, -5.138671875, -4.7908935546875, -4.443115234375, -4.0953369140625, -3.74755859375, -3.3997802734375, -3.052001953125, -2.7042236328125, -2.3564453125, -2.0086669921875, -1.660888671875, -1.3131103515625, -0.96533203125, -0.6175537109375, -0.269775390625, 0.0780029296875, 0.42578125, 0.7735595703125, 1.121337890625, 1.4691162109375, 1.81689453125, 2.1646728515625, 2.512451171875, 2.8602294921875, 3.2080078125, 3.5557861328125, 3.903564453125, 4.2513427734375, 4.59912109375, 4.9468994140625, 5.294677734375, 5.6424560546875, 5.990234375, 6.3380126953125, 6.685791015625, 7.0335693359375, 7.38134765625, 7.7291259765625, 8.076904296875, 8.4246826171875, 8.7724609375, 9.1202392578125, 9.468017578125, 9.8157958984375, 10.16357421875, 10.5113525390625, 10.859130859375, 11.2069091796875, 11.5546875]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 1.0, 0.0, 4.0, 5.0, 7.0, 9.0, 12.0, 19.0, 17.0, 14.0, 28.0, 20.0, 27.0, 24.0, 34.0, 43.0, 34.0, 51.0, 50.0, 62.0, 121.0, 1572.0, 341.0, 104.0, 63.0, 50.0, 51.0, 49.0, 30.0, 37.0, 31.0, 24.0, 21.0, 20.0, 10.0, 15.0, 13.0, 9.0, 5.0, 7.0, 5.0, 5.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0], "bins": [-44.875, -43.55078125, -42.2265625, -40.90234375, -39.578125, -38.25390625, -36.9296875, -35.60546875, -34.28125, -32.95703125, -31.6328125, -30.30859375, -28.984375, -27.66015625, -26.3359375, -25.01171875, -23.6875, -22.36328125, -21.0390625, -19.71484375, -18.390625, -17.06640625, -15.7421875, -14.41796875, -13.09375, -11.76953125, -10.4453125, -9.12109375, -7.796875, -6.47265625, -5.1484375, -3.82421875, -2.5, -1.17578125, 0.1484375, 1.47265625, 2.796875, 4.12109375, 5.4453125, 6.76953125, 8.09375, 9.41796875, 10.7421875, 12.06640625, 13.390625, 14.71484375, 16.0390625, 17.36328125, 18.6875, 20.01171875, 21.3359375, 22.66015625, 23.984375, 25.30859375, 26.6328125, 27.95703125, 29.28125, 30.60546875, 31.9296875, 33.25390625, 34.578125, 35.90234375, 37.2265625, 38.55078125, 39.875]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 8.0, 3.0, 4.0, 7.0, 18.0, 24.0, 45.0, 59.0, 106.0, 163.0, 297.0, 551.0, 3451.0, 3132244.0, 7274.0, 671.0, 318.0, 176.0, 105.0, 69.0, 60.0, 28.0, 16.0, 8.0, 3.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-182.75, -176.962890625, -171.17578125, -165.388671875, -159.6015625, -153.814453125, -148.02734375, -142.240234375, -136.453125, -130.666015625, -124.87890625, -119.091796875, -113.3046875, -107.517578125, -101.73046875, -95.943359375, -90.15625, -84.369140625, -78.58203125, -72.794921875, -67.0078125, -61.220703125, -55.43359375, -49.646484375, -43.859375, -38.072265625, -32.28515625, -26.498046875, -20.7109375, -14.923828125, -9.13671875, -3.349609375, 2.4375, 8.224609375, 14.01171875, 19.798828125, 25.5859375, 31.373046875, 37.16015625, 42.947265625, 48.734375, 54.521484375, 60.30859375, 66.095703125, 71.8828125, 77.669921875, 83.45703125, 89.244140625, 95.03125, 100.818359375, 106.60546875, 112.392578125, 118.1796875, 123.966796875, 129.75390625, 135.541015625, 141.328125, 147.115234375, 152.90234375, 158.689453125, 164.4765625, 170.263671875, 176.05078125, 181.837890625, 187.625]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 6.0, 617.0, 389.0, 5.0, 2.0], "bins": [-958.7362060546875, -942.990478515625, -927.2448120117188, -911.4990844726562, -895.75341796875, -880.0076904296875, -864.2620239257812, -848.5162963867188, -832.7706298828125, -817.02490234375, -801.2792358398438, -785.5335083007812, -769.787841796875, -754.0421142578125, -738.2964477539062, -722.5507202148438, -706.8050537109375, -691.059326171875, -675.3136596679688, -659.5679321289062, -643.822265625, -628.0765380859375, -612.3308715820312, -596.5851440429688, -580.8394165039062, -565.0936889648438, -549.3480224609375, -533.602294921875, -517.8566284179688, -502.1109313964844, -486.365234375, -470.6195068359375, -454.87384033203125, -439.1281433105469, -423.3824462890625, -407.6367492675781, -391.89105224609375, -376.1453552246094, -360.399658203125, -344.6539306640625, -328.9082336425781, -313.16253662109375, -297.4168395996094, -281.671142578125, -265.9254455566406, -250.17974853515625, -234.4340362548828, -218.68833923339844, -202.94264221191406, -187.1969451904297, -171.4512481689453, -155.70553588867188, -139.9598388671875, -124.21414947509766, -108.46844482421875, -92.72274780273438, -76.97705078125, -61.231353759765625, -45.485652923583984, -29.739952087402344, -13.994255065917969, 1.7514419555664062, 17.497146606445312, 33.24284362792969, 48.9885368347168]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 6.0, 8.0, 5.0, 10.0, 10.0, 10.0, 13.0, 13.0, 13.0, 19.0, 18.0, 22.0, 32.0, 35.0, 18.0, 29.0, 37.0, 40.0, 42.0, 44.0, 58.0, 41.0, 45.0, 38.0, 45.0, 42.0, 34.0, 36.0, 28.0, 32.0, 22.0, 33.0, 24.0, 22.0, 13.0, 9.0, 11.0, 8.0, 9.0, 5.0, 4.0, 9.0, 4.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-114.29349517822266, -110.54558563232422, -106.79767608642578, -103.04976654052734, -99.30184936523438, -95.55393981933594, -91.8060302734375, -88.05812072753906, -84.31021118164062, -80.56230163574219, -76.81439208984375, -73.06648254394531, -69.31857299804688, -65.57066345214844, -61.82274627685547, -58.07483673095703, -54.326927185058594, -50.579017639160156, -46.83110809326172, -43.083194732666016, -39.33528518676758, -35.58737564086914, -31.83946418762207, -28.091552734375, -24.343643188476562, -20.595733642578125, -16.847822189331055, -13.0999116897583, -9.352001190185547, -5.604091644287109, -1.856180191040039, 1.8917312622070312, 5.6396484375, 9.387558937072754, 13.135469436645508, 16.883380889892578, 20.631290435791016, 24.379199981689453, 28.127111434936523, 31.875022888183594, 35.62293243408203, 39.37084197998047, 43.118751525878906, 46.86666488647461, 50.61457443237305, 54.362483978271484, 58.11039733886719, 61.858306884765625, 65.60621643066406, 69.3541259765625, 73.10203552246094, 76.84994506835938, 80.59785461425781, 84.34576416015625, 88.09368133544922, 91.84159088134766, 95.5895004272461, 99.33740997314453, 103.08531951904297, 106.8332290649414, 110.58114624023438, 114.32905578613281, 118.07696533203125, 121.82487487792969, 125.57278442382812]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 1.0, 3.0, 2.0, 5.0, 8.0, 8.0, 12.0, 10.0, 7.0, 21.0, 11.0, 32.0, 27.0, 33.0, 30.0, 31.0, 28.0, 38.0, 45.0, 31.0, 57.0, 57.0, 37.0, 50.0, 52.0, 47.0, 47.0, 31.0, 41.0, 19.0, 21.0, 24.0, 21.0, 24.0, 26.0, 16.0, 17.0, 10.0, 7.0, 5.0, 6.0, 6.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.84375, -13.4000244140625, -12.956298828125, -12.5125732421875, -12.06884765625, -11.6251220703125, -11.181396484375, -10.7376708984375, -10.2939453125, -9.8502197265625, -9.406494140625, -8.9627685546875, -8.51904296875, -8.0753173828125, -7.631591796875, -7.1878662109375, -6.744140625, -6.3004150390625, -5.856689453125, -5.4129638671875, -4.96923828125, -4.5255126953125, -4.081787109375, -3.6380615234375, -3.1943359375, -2.7506103515625, -2.306884765625, -1.8631591796875, -1.41943359375, -0.9757080078125, -0.531982421875, -0.0882568359375, 0.35546875, 0.7991943359375, 1.242919921875, 1.6866455078125, 2.13037109375, 2.5740966796875, 3.017822265625, 3.4615478515625, 3.9052734375, 4.3489990234375, 4.792724609375, 5.2364501953125, 5.68017578125, 6.1239013671875, 6.567626953125, 7.0113525390625, 7.455078125, 7.8988037109375, 8.342529296875, 8.7862548828125, 9.22998046875, 9.6737060546875, 10.117431640625, 10.5611572265625, 11.0048828125, 11.4486083984375, 11.892333984375, 12.3360595703125, 12.77978515625, 13.2235107421875, 13.667236328125, 14.1109619140625, 14.5546875]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 7.0, 3.0, 8.0, 8.0, 10.0, 9.0, 12.0, 17.0, 16.0, 21.0, 22.0, 24.0, 30.0, 35.0, 39.0, 79.0, 137.0, 454.0, 1794.0, 19137.0, 1103636.0, 2986106.0, 77615.0, 3796.0, 692.0, 209.0, 85.0, 42.0, 32.0, 25.0, 25.0, 19.0, 26.0, 15.0, 27.0, 16.0, 12.0, 16.0, 6.0, 8.0, 8.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.34375, -45.77587890625, -44.2080078125, -42.64013671875, -41.072265625, -39.50439453125, -37.9365234375, -36.36865234375, -34.80078125, -33.23291015625, -31.6650390625, -30.09716796875, -28.529296875, -26.96142578125, -25.3935546875, -23.82568359375, -22.2578125, -20.68994140625, -19.1220703125, -17.55419921875, -15.986328125, -14.41845703125, -12.8505859375, -11.28271484375, -9.71484375, -8.14697265625, -6.5791015625, -5.01123046875, -3.443359375, -1.87548828125, -0.3076171875, 1.26025390625, 2.828125, 4.39599609375, 5.9638671875, 7.53173828125, 9.099609375, 10.66748046875, 12.2353515625, 13.80322265625, 15.37109375, 16.93896484375, 18.5068359375, 20.07470703125, 21.642578125, 23.21044921875, 24.7783203125, 26.34619140625, 27.9140625, 29.48193359375, 31.0498046875, 32.61767578125, 34.185546875, 35.75341796875, 37.3212890625, 38.88916015625, 40.45703125, 42.02490234375, 43.5927734375, 45.16064453125, 46.728515625, 48.29638671875, 49.8642578125, 51.43212890625, 53.0]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 12.0, 5.0, 8.0, 13.0, 25.0, 27.0, 36.0, 54.0, 71.0, 81.0, 106.0, 173.0, 202.0, 276.0, 418.0, 437.0, 474.0, 398.0, 335.0, 245.0, 204.0, 117.0, 78.0, 74.0, 48.0, 44.0, 33.0, 23.0, 12.0, 11.0, 4.0, 5.0, 6.0, 1.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-27.234375, -26.41455078125, -25.5947265625, -24.77490234375, -23.955078125, -23.13525390625, -22.3154296875, -21.49560546875, -20.67578125, -19.85595703125, -19.0361328125, -18.21630859375, -17.396484375, -16.57666015625, -15.7568359375, -14.93701171875, -14.1171875, -13.29736328125, -12.4775390625, -11.65771484375, -10.837890625, -10.01806640625, -9.1982421875, -8.37841796875, -7.55859375, -6.73876953125, -5.9189453125, -5.09912109375, -4.279296875, -3.45947265625, -2.6396484375, -1.81982421875, -1.0, -0.18017578125, 0.6396484375, 1.45947265625, 2.279296875, 3.09912109375, 3.9189453125, 4.73876953125, 5.55859375, 6.37841796875, 7.1982421875, 8.01806640625, 8.837890625, 9.65771484375, 10.4775390625, 11.29736328125, 12.1171875, 12.93701171875, 13.7568359375, 14.57666015625, 15.396484375, 16.21630859375, 17.0361328125, 17.85595703125, 18.67578125, 19.49560546875, 20.3154296875, 21.13525390625, 21.955078125, 22.77490234375, 23.5947265625, 24.41455078125, 25.234375]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 10.0, 8.0, 12.0, 11.0, 11.0, 16.0, 24.0, 29.0, 37.0, 45.0, 72.0, 83.0, 102.0, 133.0, 148.0, 230.0, 362.0, 605.0, 2983.0, 157010.0, 3976423.0, 52178.0, 1844.0, 559.0, 300.0, 210.0, 174.0, 124.0, 131.0, 85.0, 59.0, 48.0, 53.0, 43.0, 30.0, 22.0, 11.0, 12.0, 11.0, 16.0, 7.0, 4.0, 2.0, 5.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-89.75, -86.697265625, -83.64453125, -80.591796875, -77.5390625, -74.486328125, -71.43359375, -68.380859375, -65.328125, -62.275390625, -59.22265625, -56.169921875, -53.1171875, -50.064453125, -47.01171875, -43.958984375, -40.90625, -37.853515625, -34.80078125, -31.748046875, -28.6953125, -25.642578125, -22.58984375, -19.537109375, -16.484375, -13.431640625, -10.37890625, -7.326171875, -4.2734375, -1.220703125, 1.83203125, 4.884765625, 7.9375, 10.990234375, 14.04296875, 17.095703125, 20.1484375, 23.201171875, 26.25390625, 29.306640625, 32.359375, 35.412109375, 38.46484375, 41.517578125, 44.5703125, 47.623046875, 50.67578125, 53.728515625, 56.78125, 59.833984375, 62.88671875, 65.939453125, 68.9921875, 72.044921875, 75.09765625, 78.150390625, 81.203125, 84.255859375, 87.30859375, 90.361328125, 93.4140625, 96.466796875, 99.51953125, 102.572265625, 105.625]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 44.0, 188.0, 391.0, 290.0, 80.0, 12.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-503.3935241699219, -491.60693359375, -479.82037353515625, -468.0337829589844, -456.2471923828125, -444.46063232421875, -432.6740417480469, -420.887451171875, -409.10089111328125, -397.3143005371094, -385.5277404785156, -373.74114990234375, -361.9545593261719, -350.1679992675781, -338.38140869140625, -326.5948486328125, -314.8082275390625, -303.0216369628906, -291.2350769042969, -279.448486328125, -267.6618957519531, -255.87533569335938, -244.0887451171875, -232.3021697998047, -220.51559448242188, -208.72901916503906, -196.9424285888672, -185.15585327148438, -173.36927795410156, -161.58270263671875, -149.79611206054688, -138.00953674316406, -126.22294616699219, -114.43636322021484, -102.64978790283203, -90.86320495605469, -79.07662963867188, -67.29004669189453, -55.50346374511719, -43.716888427734375, -31.93030548095703, -20.143726348876953, -8.357145309448242, 3.4294357299804688, 15.216014862060547, 27.002593994140625, 38.78917694091797, 50.57575225830078, 62.362335205078125, 74.14891815185547, 85.93549346923828, 97.72207641601562, 109.50865173339844, 121.29523468017578, 133.08181762695312, 144.86839294433594, 156.65496826171875, 168.44154357910156, 180.22813415527344, 192.01470947265625, 203.80128479003906, 215.58786010742188, 227.37445068359375, 239.16102600097656, 250.94761657714844]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 7.0, 6.0, 2.0, 14.0, 9.0, 11.0, 11.0, 9.0, 18.0, 21.0, 19.0, 16.0, 30.0, 22.0, 29.0, 39.0, 45.0, 31.0, 35.0, 38.0, 41.0, 54.0, 43.0, 38.0, 42.0, 41.0, 29.0, 38.0, 27.0, 35.0, 31.0, 20.0, 27.0, 20.0, 20.0, 11.0, 18.0, 10.0, 12.0, 11.0, 14.0, 6.0, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-85.47265625, -82.67277526855469, -79.87289428710938, -77.0730209350586, -74.27313995361328, -71.47325897216797, -68.67337799072266, -65.87350463867188, -63.07362365722656, -60.27374267578125, -57.4738655090332, -54.67398452758789, -51.874107360839844, -49.07422637939453, -46.27434539794922, -43.47446823120117, -40.67458724975586, -37.87470626831055, -35.0748291015625, -32.27494812011719, -29.47507095336914, -26.675189971923828, -23.87531089782715, -21.07543182373047, -18.27555274963379, -15.47567367553711, -12.67579460144043, -9.875914573669434, -7.076035499572754, -4.276156425476074, -1.4762763977050781, 1.3236026763916016, 4.123481750488281, 6.923360824584961, 9.72323989868164, 12.523119926452637, 15.322999000549316, 18.122879028320312, 20.922758102416992, 23.722637176513672, 26.52251625061035, 29.32239532470703, 32.122276306152344, 34.92215347290039, 37.7220344543457, 40.52191162109375, 43.32179260253906, 46.121673583984375, 48.92155075073242, 51.721431732177734, 54.52130889892578, 57.321189880371094, 60.12106704711914, 62.92094802856445, 65.7208251953125, 68.52070617675781, 71.32058715820312, 74.12046813964844, 76.92034912109375, 79.72022247314453, 82.52010345458984, 85.31998443603516, 88.11986541748047, 90.91973876953125, 93.71961975097656]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 3.0, 6.0, 9.0, 13.0, 22.0, 19.0, 13.0, 18.0, 22.0, 32.0, 35.0, 37.0, 29.0, 38.0, 43.0, 39.0, 36.0, 38.0, 50.0, 35.0, 50.0, 40.0, 42.0, 41.0, 28.0, 31.0, 32.0, 33.0, 24.0, 15.0, 17.0, 19.0, 23.0, 18.0, 11.0, 7.0, 9.0, 5.0, 4.0, 4.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-12.7265625, -12.3148193359375, -11.903076171875, -11.4913330078125, -11.07958984375, -10.6678466796875, -10.256103515625, -9.8443603515625, -9.4326171875, -9.0208740234375, -8.609130859375, -8.1973876953125, -7.78564453125, -7.3739013671875, -6.962158203125, -6.5504150390625, -6.138671875, -5.7269287109375, -5.315185546875, -4.9034423828125, -4.49169921875, -4.0799560546875, -3.668212890625, -3.2564697265625, -2.8447265625, -2.4329833984375, -2.021240234375, -1.6094970703125, -1.19775390625, -0.7860107421875, -0.374267578125, 0.0374755859375, 0.44921875, 0.8609619140625, 1.272705078125, 1.6844482421875, 2.09619140625, 2.5079345703125, 2.919677734375, 3.3314208984375, 3.7431640625, 4.1549072265625, 4.566650390625, 4.9783935546875, 5.39013671875, 5.8018798828125, 6.213623046875, 6.6253662109375, 7.037109375, 7.4488525390625, 7.860595703125, 8.2723388671875, 8.68408203125, 9.0958251953125, 9.507568359375, 9.9193115234375, 10.3310546875, 10.7427978515625, 11.154541015625, 11.5662841796875, 11.97802734375, 12.3897705078125, 12.801513671875, 13.2132568359375, 13.625]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [4.0, 3.0, 4.0, 7.0, 5.0, 16.0, 13.0, 37.0, 41.0, 69.0, 125.0, 146.0, 233.0, 314.0, 488.0, 730.0, 1092.0, 1664.0, 2514.0, 3622.0, 5608.0, 7980.0, 12251.0, 17684.0, 26340.0, 38053.0, 55742.0, 79479.0, 115094.0, 159325.0, 157473.0, 113157.0, 78003.0, 54340.0, 37439.0, 25744.0, 17468.0, 11943.0, 7985.0, 5334.0, 3632.0, 2424.0, 1638.0, 1093.0, 715.0, 488.0, 339.0, 222.0, 153.0, 105.0, 58.0, 44.0, 37.0, 21.0, 16.0, 6.0, 5.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1416015625, -1.103515625, -1.0654296875, -1.02734375, -0.9892578125, -0.951171875, -0.9130859375, -0.875, -0.8369140625, -0.798828125, -0.7607421875, -0.72265625, -0.6845703125, -0.646484375, -0.6083984375, -0.5703125, -0.5322265625, -0.494140625, -0.4560546875, -0.41796875, -0.3798828125, -0.341796875, -0.3037109375, -0.265625, -0.2275390625, -0.189453125, -0.1513671875, -0.11328125, -0.0751953125, -0.037109375, 0.0009765625, 0.0390625, 0.0771484375, 0.115234375, 0.1533203125, 0.19140625, 0.2294921875, 0.267578125, 0.3056640625, 0.34375, 0.3818359375, 0.419921875, 0.4580078125, 0.49609375, 0.5341796875, 0.572265625, 0.6103515625, 0.6484375, 0.6865234375, 0.724609375, 0.7626953125, 0.80078125, 0.8388671875, 0.876953125, 0.9150390625, 0.953125, 0.9912109375, 1.029296875, 1.0673828125, 1.10546875, 1.1435546875, 1.181640625, 1.2197265625, 1.2578125, 1.2958984375]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 3.0, 3.0, 3.0, 8.0, 3.0, 7.0, 18.0, 13.0, 8.0, 16.0, 26.0, 11.0, 17.0, 26.0, 36.0, 24.0, 36.0, 32.0, 35.0, 37.0, 35.0, 49.0, 38.0, 1067.0, 36.0, 45.0, 42.0, 42.0, 37.0, 39.0, 30.0, 26.0, 32.0, 21.0, 27.0, 15.0, 15.0, 13.0, 10.0, 12.0, 4.0, 11.0, 5.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.1171875, -8.85052490234375, -8.5838623046875, -8.31719970703125, -8.050537109375, -7.78387451171875, -7.5172119140625, -7.25054931640625, -6.98388671875, -6.71722412109375, -6.4505615234375, -6.18389892578125, -5.917236328125, -5.65057373046875, -5.3839111328125, -5.11724853515625, -4.8505859375, -4.58392333984375, -4.3172607421875, -4.05059814453125, -3.783935546875, -3.51727294921875, -3.2506103515625, -2.98394775390625, -2.71728515625, -2.45062255859375, -2.1839599609375, -1.91729736328125, -1.650634765625, -1.38397216796875, -1.1173095703125, -0.85064697265625, -0.583984375, -0.31732177734375, -0.0506591796875, 0.21600341796875, 0.482666015625, 0.74932861328125, 1.0159912109375, 1.28265380859375, 1.54931640625, 1.81597900390625, 2.0826416015625, 2.34930419921875, 2.615966796875, 2.88262939453125, 3.1492919921875, 3.41595458984375, 3.6826171875, 3.94927978515625, 4.2159423828125, 4.48260498046875, 4.749267578125, 5.01593017578125, 5.2825927734375, 5.54925537109375, 5.81591796875, 6.08258056640625, 6.3492431640625, 6.61590576171875, 6.882568359375, 7.14923095703125, 7.4158935546875, 7.68255615234375, 7.94921875]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 10.0, 29.0, 27.0, 61.0, 55.0, 124.0, 149.0, 260.0, 340.0, 515.0, 770.0, 1145.0, 1731.0, 2545.0, 3930.0, 5892.0, 8806.0, 13401.0, 20150.0, 30946.0, 47681.0, 73647.0, 117275.0, 186466.0, 1234562.0, 123423.0, 77543.0, 50100.0, 32549.0, 21399.0, 14147.0, 9132.0, 6079.0, 4119.0, 2699.0, 1706.0, 1185.0, 833.0, 537.0, 385.0, 264.0, 176.0, 100.0, 82.0, 38.0, 40.0, 25.0, 19.0, 16.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-1.3564453125, -1.3141326904296875, -1.271820068359375, -1.2295074462890625, -1.18719482421875, -1.1448822021484375, -1.102569580078125, -1.0602569580078125, -1.0179443359375, -0.9756317138671875, -0.933319091796875, -0.8910064697265625, -0.84869384765625, -0.8063812255859375, -0.764068603515625, -0.7217559814453125, -0.679443359375, -0.6371307373046875, -0.594818115234375, -0.5525054931640625, -0.51019287109375, -0.4678802490234375, -0.425567626953125, -0.3832550048828125, -0.3409423828125, -0.2986297607421875, -0.256317138671875, -0.2140045166015625, -0.17169189453125, -0.1293792724609375, -0.087066650390625, -0.0447540283203125, -0.00244140625, 0.0398712158203125, 0.082183837890625, 0.1244964599609375, 0.16680908203125, 0.2091217041015625, 0.251434326171875, 0.2937469482421875, 0.3360595703125, 0.3783721923828125, 0.420684814453125, 0.4629974365234375, 0.50531005859375, 0.5476226806640625, 0.589935302734375, 0.6322479248046875, 0.674560546875, 0.7168731689453125, 0.759185791015625, 0.8014984130859375, 0.84381103515625, 0.8861236572265625, 0.928436279296875, 0.9707489013671875, 1.0130615234375, 1.0553741455078125, 1.097686767578125, 1.1399993896484375, 1.18231201171875, 1.2246246337890625, 1.266937255859375, 1.3092498779296875, 1.3515625]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 0.0, 3.0, 3.0, 5.0, 5.0, 3.0, 14.0, 10.0, 8.0, 22.0, 22.0, 23.0, 29.0, 37.0, 50.0, 53.0, 43.0, 73.0, 68.0, 54.0, 61.0, 61.0, 72.0, 50.0, 51.0, 38.0, 37.0, 19.0, 21.0, 17.0, 15.0, 11.0, 7.0, 6.0, 4.0, 4.0, 7.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0019989013671875, -0.0019109547138214111, -0.0018230080604553223, -0.0017350614070892334, -0.0016471147537231445, -0.0015591681003570557, -0.0014712214469909668, -0.001383274793624878, -0.001295328140258789, -0.0012073814868927002, -0.0011194348335266113, -0.0010314881801605225, -0.0009435415267944336, -0.0008555948734283447, -0.0007676482200622559, -0.000679701566696167, -0.0005917549133300781, -0.0005038082599639893, -0.0004158616065979004, -0.0003279149532318115, -0.00023996829986572266, -0.0001520216464996338, -6.407499313354492e-05, 2.3871660232543945e-05, 0.00011181831359863281, 0.00019976496696472168, 0.00028771162033081055, 0.0003756582736968994, 0.0004636049270629883, 0.0005515515804290771, 0.000639498233795166, 0.0007274448871612549, 0.0008153915405273438, 0.0009033381938934326, 0.0009912848472595215, 0.0010792315006256104, 0.0011671781539916992, 0.001255124807357788, 0.001343071460723877, 0.0014310181140899658, 0.0015189647674560547, 0.0016069114208221436, 0.0016948580741882324, 0.0017828047275543213, 0.0018707513809204102, 0.001958698034286499, 0.002046644687652588, 0.0021345913410186768, 0.0022225379943847656, 0.0023104846477508545, 0.0023984313011169434, 0.0024863779544830322, 0.002574324607849121, 0.00266227126121521, 0.002750217914581299, 0.0028381645679473877, 0.0029261112213134766, 0.0030140578746795654, 0.0031020045280456543, 0.003189951181411743, 0.003277897834777832, 0.003365844488143921, 0.0034537911415100098, 0.0035417377948760986, 0.0036296844482421875]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 5.0, 6.0, 4.0, 4.0, 6.0, 9.0, 10.0, 16.0, 15.0, 27.0, 23.0, 41.0, 57.0, 86.0, 114.0, 184.0, 537.0, 2724.0, 1029329.0, 13835.0, 802.0, 252.0, 126.0, 72.0, 61.0, 51.0, 35.0, 30.0, 28.0, 14.0, 9.0, 14.0, 10.0, 4.0, 5.0, 4.0, 5.0, 4.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.08477783203125, -0.08269214630126953, -0.08060646057128906, -0.0785207748413086, -0.07643508911132812, -0.07434940338134766, -0.07226371765136719, -0.07017803192138672, -0.06809234619140625, -0.06600666046142578, -0.06392097473144531, -0.061835289001464844, -0.059749603271484375, -0.057663917541503906, -0.05557823181152344, -0.05349254608154297, -0.0514068603515625, -0.04932117462158203, -0.04723548889160156, -0.045149803161621094, -0.043064117431640625, -0.040978431701660156, -0.03889274597167969, -0.03680706024169922, -0.03472137451171875, -0.03263568878173828, -0.030550003051757812, -0.028464317321777344, -0.026378631591796875, -0.024292945861816406, -0.022207260131835938, -0.02012157440185547, -0.018035888671875, -0.01595020294189453, -0.013864517211914062, -0.011778831481933594, -0.009693145751953125, -0.007607460021972656, -0.0055217742919921875, -0.0034360885620117188, -0.00135040283203125, 0.0007352828979492188, 0.0028209686279296875, 0.004906654357910156, 0.006992340087890625, 0.009078025817871094, 0.011163711547851562, 0.013249397277832031, 0.0153350830078125, 0.01742076873779297, 0.019506454467773438, 0.021592140197753906, 0.023677825927734375, 0.025763511657714844, 0.027849197387695312, 0.02993488311767578, 0.03202056884765625, 0.03410625457763672, 0.03619194030761719, 0.038277626037597656, 0.040363311767578125, 0.042448997497558594, 0.04453468322753906, 0.04662036895751953, 0.0487060546875]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 92.0, 723.0, 188.0, 9.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002585046924650669, -0.0022451633121818304, -0.0019052796997129917, -0.001565396087244153, -0.0012255124747753143, -0.0008856288623064756, -0.000545745249837637, -0.00020586163736879826, 0.00013402197510004044, 0.00047390558756887913, 0.0008137892000377178, 0.0011536728125065565, 0.0014935564249753952, 0.001833440037444234, 0.0021733236499130726, 0.0025132072623819113, 0.00285309087485075, 0.0031929744873195887, 0.0035328580997884274, 0.003872741712257266, 0.004212625324726105, 0.0045525087043643, 0.004892392549663782, 0.0052322763949632645, 0.0055721597746014595, 0.0059120431542396545, 0.006251926999539137, 0.006591810844838619, 0.006931694224476814, 0.007271577604115009, 0.007611461449414492, 0.007951345294713974, 0.008291229605674744, 0.008631112985312939, 0.008970996364951134, 0.009310880675911903, 0.009650764055550098, 0.009990647435188293, 0.010330531746149063, 0.010670415125787258, 0.011010298505425453, 0.011350181885063648, 0.011690065264701843, 0.012029949575662613, 0.012369832955300808, 0.012709716334939003, 0.013049600645899773, 0.013389484025537968, 0.013729367405176163, 0.014069250784814358, 0.014409134164452553, 0.014749018475413322, 0.015088901855051517, 0.015428785234689713, 0.015768669545650482, 0.016108551993966103, 0.016448436304926872, 0.016788320615887642, 0.017128203064203262, 0.017468087375164032, 0.0178079716861248, 0.018147854134440422, 0.01848773844540119, 0.018827620893716812, 0.019167505204677582]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 4.0, 5.0, 1.0, 4.0, 8.0, 7.0, 8.0, 11.0, 11.0, 15.0, 19.0, 16.0, 21.0, 15.0, 28.0, 27.0, 35.0, 41.0, 33.0, 39.0, 41.0, 34.0, 29.0, 30.0, 43.0, 32.0, 41.0, 47.0, 32.0, 35.0, 16.0, 29.0, 35.0, 30.0, 21.0, 25.0, 16.0, 23.0, 22.0, 15.0, 12.0, 13.0, 5.0, 3.0, 7.0, 4.0, 6.0, 3.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0011688470840454102, -0.001131356693804264, -0.001093866303563118, -0.001056375913321972, -0.0010188855230808258, -0.0009813951328396797, -0.0009439047425985336, -0.0009064143523573875, -0.0008689239621162415, -0.0008314335718750954, -0.0007939431816339493, -0.0007564527913928032, -0.0007189624011516571, -0.000681472010910511, -0.0006439816206693649, -0.0006064912304282188, -0.0005690008401870728, -0.0005315104499459267, -0.0004940200597047806, -0.0004565296694636345, -0.0004190392792224884, -0.0003815488889813423, -0.00034405849874019623, -0.00030656810849905014, -0.00026907771825790405, -0.00023158732801675797, -0.00019409693777561188, -0.0001566065475344658, -0.0001191161572933197, -8.162576705217361e-05, -4.413537681102753e-05, -6.644986569881439e-06, 3.084540367126465e-05, 6.833579391241074e-05, 0.00010582618415355682, 0.0001433165743947029, 0.000180806964635849, 0.0002182973548769951, 0.0002557877451181412, 0.00029327813535928726, 0.00033076852560043335, 0.00036825891584157944, 0.0004057493060827255, 0.0004432396963238716, 0.0004807300865650177, 0.0005182204768061638, 0.0005557108670473099, 0.000593201257288456, 0.000630691647529602, 0.0006681820377707481, 0.0007056724280118942, 0.0007431628182530403, 0.0007806532084941864, 0.0008181435987353325, 0.0008556339889764786, 0.0008931243792176247, 0.0009306147694587708, 0.0009681051596999168, 0.001005595549941063, 0.001043085940182209, 0.001080576330423355, 0.0011180667206645012, 0.0011555571109056473, 0.0011930475011467934, 0.0012305378913879395]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 3.0, 6.0, 9.0, 13.0, 22.0, 19.0, 13.0, 18.0, 22.0, 32.0, 35.0, 37.0, 29.0, 38.0, 43.0, 39.0, 36.0, 38.0, 50.0, 35.0, 50.0, 40.0, 42.0, 41.0, 28.0, 31.0, 32.0, 33.0, 24.0, 15.0, 17.0, 19.0, 23.0, 18.0, 11.0, 7.0, 9.0, 5.0, 4.0, 4.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-12.7265625, -12.3148193359375, -11.903076171875, -11.4913330078125, -11.07958984375, -10.6678466796875, -10.256103515625, -9.8443603515625, -9.4326171875, -9.0208740234375, -8.609130859375, -8.1973876953125, -7.78564453125, -7.3739013671875, -6.962158203125, -6.5504150390625, -6.138671875, -5.7269287109375, -5.315185546875, -4.9034423828125, -4.49169921875, -4.0799560546875, -3.668212890625, -3.2564697265625, -2.8447265625, -2.4329833984375, -2.021240234375, -1.6094970703125, -1.19775390625, -0.7860107421875, -0.374267578125, 0.0374755859375, 0.44921875, 0.8609619140625, 1.272705078125, 1.6844482421875, 2.09619140625, 2.5079345703125, 2.919677734375, 3.3314208984375, 3.7431640625, 4.1549072265625, 4.566650390625, 4.9783935546875, 5.39013671875, 5.8018798828125, 6.213623046875, 6.6253662109375, 7.037109375, 7.4488525390625, 7.860595703125, 8.2723388671875, 8.68408203125, 9.0958251953125, 9.507568359375, 9.9193115234375, 10.3310546875, 10.7427978515625, 11.154541015625, 11.5662841796875, 11.97802734375, 12.3897705078125, 12.801513671875, 13.2132568359375, 13.625]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 10.0, 11.0, 15.0, 21.0, 36.0, 36.0, 52.0, 58.0, 83.0, 124.0, 181.0, 236.0, 318.0, 491.0, 677.0, 1168.0, 1973.0, 3333.0, 5758.0, 10739.0, 20041.0, 42329.0, 99552.0, 247033.0, 345755.0, 147596.0, 60859.0, 27643.0, 13905.0, 7532.0, 4155.0, 2487.0, 1473.0, 907.0, 593.0, 411.0, 258.0, 192.0, 133.0, 87.0, 75.0, 38.0, 39.0, 27.0, 35.0, 19.0, 12.0, 8.0, 9.0, 8.0, 6.0, 6.0, 3.0, 3.0, 4.0, 1.0, 2.0], "bins": [-7.578125, -7.33538818359375, -7.0926513671875, -6.84991455078125, -6.607177734375, -6.36444091796875, -6.1217041015625, -5.87896728515625, -5.63623046875, -5.39349365234375, -5.1507568359375, -4.90802001953125, -4.665283203125, -4.42254638671875, -4.1798095703125, -3.93707275390625, -3.6943359375, -3.45159912109375, -3.2088623046875, -2.96612548828125, -2.723388671875, -2.48065185546875, -2.2379150390625, -1.99517822265625, -1.75244140625, -1.50970458984375, -1.2669677734375, -1.02423095703125, -0.781494140625, -0.53875732421875, -0.2960205078125, -0.05328369140625, 0.189453125, 0.43218994140625, 0.6749267578125, 0.91766357421875, 1.160400390625, 1.40313720703125, 1.6458740234375, 1.88861083984375, 2.13134765625, 2.37408447265625, 2.6168212890625, 2.85955810546875, 3.102294921875, 3.34503173828125, 3.5877685546875, 3.83050537109375, 4.0732421875, 4.31597900390625, 4.5587158203125, 4.80145263671875, 5.044189453125, 5.28692626953125, 5.5296630859375, 5.77239990234375, 6.01513671875, 6.25787353515625, 6.5006103515625, 6.74334716796875, 6.986083984375, 7.22882080078125, 7.4715576171875, 7.71429443359375, 7.95703125]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 7.0, 6.0, 7.0, 5.0, 4.0, 6.0, 14.0, 14.0, 16.0, 24.0, 22.0, 24.0, 28.0, 22.0, 34.0, 41.0, 56.0, 44.0, 70.0, 128.0, 1589.0, 343.0, 111.0, 55.0, 48.0, 52.0, 41.0, 34.0, 34.0, 30.0, 22.0, 27.0, 13.0, 15.0, 17.0, 9.0, 9.0, 8.0, 9.0, 3.0, 4.0, 4.0, 1.0, 1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-49.125, -47.67578125, -46.2265625, -44.77734375, -43.328125, -41.87890625, -40.4296875, -38.98046875, -37.53125, -36.08203125, -34.6328125, -33.18359375, -31.734375, -30.28515625, -28.8359375, -27.38671875, -25.9375, -24.48828125, -23.0390625, -21.58984375, -20.140625, -18.69140625, -17.2421875, -15.79296875, -14.34375, -12.89453125, -11.4453125, -9.99609375, -8.546875, -7.09765625, -5.6484375, -4.19921875, -2.75, -1.30078125, 0.1484375, 1.59765625, 3.046875, 4.49609375, 5.9453125, 7.39453125, 8.84375, 10.29296875, 11.7421875, 13.19140625, 14.640625, 16.08984375, 17.5390625, 18.98828125, 20.4375, 21.88671875, 23.3359375, 24.78515625, 26.234375, 27.68359375, 29.1328125, 30.58203125, 32.03125, 33.48046875, 34.9296875, 36.37890625, 37.828125, 39.27734375, 40.7265625, 42.17578125, 43.625]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 5.0, 5.0, 3.0, 6.0, 10.0, 23.0, 26.0, 24.0, 51.0, 48.0, 77.0, 94.0, 141.0, 188.0, 257.0, 413.0, 1039.0, 66156.0, 3072259.0, 3214.0, 560.0, 328.0, 184.0, 162.0, 127.0, 75.0, 58.0, 43.0, 36.0, 18.0, 16.0, 14.0, 13.0, 15.0, 6.0, 2.0, 1.0, 5.0, 2.0, 6.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-128.25, -124.271484375, -120.29296875, -116.314453125, -112.3359375, -108.357421875, -104.37890625, -100.400390625, -96.421875, -92.443359375, -88.46484375, -84.486328125, -80.5078125, -76.529296875, -72.55078125, -68.572265625, -64.59375, -60.615234375, -56.63671875, -52.658203125, -48.6796875, -44.701171875, -40.72265625, -36.744140625, -32.765625, -28.787109375, -24.80859375, -20.830078125, -16.8515625, -12.873046875, -8.89453125, -4.916015625, -0.9375, 3.041015625, 7.01953125, 10.998046875, 14.9765625, 18.955078125, 22.93359375, 26.912109375, 30.890625, 34.869140625, 38.84765625, 42.826171875, 46.8046875, 50.783203125, 54.76171875, 58.740234375, 62.71875, 66.697265625, 70.67578125, 74.654296875, 78.6328125, 82.611328125, 86.58984375, 90.568359375, 94.546875, 98.525390625, 102.50390625, 106.482421875, 110.4609375, 114.439453125, 118.41796875, 122.396484375, 126.375]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [73.0, 656.0, 281.0, 11.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.60875129699707, -6.226749420166016, 2.155252456665039, 10.537254333496094, 18.91925621032715, 27.301259994506836, 35.683258056640625, 44.06526184082031, 52.447265625, 60.82926940917969, 69.21127319335938, 77.59326934814453, 85.97527313232422, 94.3572769165039, 102.73927307128906, 111.12127685546875, 119.50328063964844, 127.88528442382812, 136.2672882080078, 144.6492919921875, 153.03128051757812, 161.41329956054688, 169.7952880859375, 178.1772918701172, 186.55929565429688, 194.94129943847656, 203.32330322265625, 211.70530700683594, 220.08731079101562, 228.46929931640625, 236.85130310058594, 245.23330688476562, 253.61529541015625, 261.9972839355469, 270.3793029785156, 278.76129150390625, 287.143310546875, 295.5252990722656, 303.9073181152344, 312.289306640625, 320.67132568359375, 329.0533142089844, 337.4353332519531, 345.81732177734375, 354.1993408203125, 362.5813293457031, 370.9633483886719, 379.3453369140625, 387.7273254394531, 396.10931396484375, 404.4913330078125, 412.8733215332031, 421.2553405761719, 429.6373291015625, 438.01934814453125, 446.4013366699219, 454.7833251953125, 463.1653137207031, 471.5473327636719, 479.9293212890625, 488.31134033203125, 496.6933288574219, 505.0753479003906, 513.4573364257812, 521.83935546875]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 5.0, 4.0, 7.0, 7.0, 11.0, 10.0, 7.0, 19.0, 19.0, 25.0, 26.0, 23.0, 24.0, 28.0, 34.0, 37.0, 27.0, 31.0, 40.0, 36.0, 32.0, 38.0, 31.0, 45.0, 35.0, 42.0, 42.0, 36.0, 41.0, 29.0, 31.0, 23.0, 16.0, 22.0, 20.0, 15.0, 12.0, 16.0, 8.0, 9.0, 6.0, 5.0, 7.0, 3.0, 7.0, 7.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-107.27496337890625, -103.82782745361328, -100.38069152832031, -96.93354797363281, -93.48641204833984, -90.03927612304688, -86.5921401977539, -83.14500427246094, -79.69786834716797, -76.250732421875, -72.80359649658203, -69.35646057128906, -65.90931701660156, -62.462181091308594, -59.015045166015625, -55.567909240722656, -52.12076950073242, -48.67363357543945, -45.22649383544922, -41.77935791015625, -38.33222198486328, -34.88508605957031, -31.437946319580078, -27.99081039428711, -24.543672561645508, -21.096534729003906, -17.649398803710938, -14.202260971069336, -10.75512409210205, -7.307987213134766, -3.860849380493164, -0.4137134552001953, 3.0334243774414062, 6.480561256408691, 9.927698135375977, 13.374835968017578, 16.821971893310547, 20.26910972595215, 23.71624755859375, 27.16338348388672, 30.61052131652832, 34.05765914916992, 37.50479507446289, 40.951934814453125, 44.399070739746094, 47.84620666503906, 51.29334259033203, 54.740478515625, 58.187618255615234, 61.6347541809082, 65.08189392089844, 68.5290298461914, 71.97616577148438, 75.42330169677734, 78.87043762207031, 82.31758117675781, 85.76471710205078, 89.21185302734375, 92.65898895263672, 96.10612487792969, 99.55326843261719, 103.00040435791016, 106.44754028320312, 109.8946762084961, 113.34181213378906]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 3.0, 1.0, 3.0, 8.0, 8.0, 11.0, 13.0, 19.0, 18.0, 21.0, 19.0, 25.0, 41.0, 24.0, 32.0, 47.0, 42.0, 34.0, 38.0, 48.0, 36.0, 49.0, 40.0, 47.0, 44.0, 28.0, 44.0, 29.0, 33.0, 24.0, 27.0, 26.0, 20.0, 19.0, 24.0, 11.0, 9.0, 7.0, 6.0, 10.0, 4.0, 4.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-13.09375, -12.6597900390625, -12.225830078125, -11.7918701171875, -11.35791015625, -10.9239501953125, -10.489990234375, -10.0560302734375, -9.6220703125, -9.1881103515625, -8.754150390625, -8.3201904296875, -7.88623046875, -7.4522705078125, -7.018310546875, -6.5843505859375, -6.150390625, -5.7164306640625, -5.282470703125, -4.8485107421875, -4.41455078125, -3.9805908203125, -3.546630859375, -3.1126708984375, -2.6787109375, -2.2447509765625, -1.810791015625, -1.3768310546875, -0.94287109375, -0.5089111328125, -0.074951171875, 0.3590087890625, 0.79296875, 1.2269287109375, 1.660888671875, 2.0948486328125, 2.52880859375, 2.9627685546875, 3.396728515625, 3.8306884765625, 4.2646484375, 4.6986083984375, 5.132568359375, 5.5665283203125, 6.00048828125, 6.4344482421875, 6.868408203125, 7.3023681640625, 7.736328125, 8.1702880859375, 8.604248046875, 9.0382080078125, 9.47216796875, 9.9061279296875, 10.340087890625, 10.7740478515625, 11.2080078125, 11.6419677734375, 12.075927734375, 12.5098876953125, 12.94384765625, 13.3778076171875, 13.811767578125, 14.2457275390625, 14.6796875]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 4.0, 2.0, 3.0, 9.0, 8.0, 5.0, 9.0, 14.0, 16.0, 25.0, 21.0, 20.0, 31.0, 38.0, 71.0, 85.0, 140.0, 269.0, 700.0, 2839.0, 24632.0, 609720.0, 3059120.0, 472515.0, 20218.0, 2437.0, 627.0, 229.0, 133.0, 74.0, 56.0, 45.0, 33.0, 26.0, 18.0, 19.0, 14.0, 10.0, 16.0, 8.0, 6.0, 2.0, 9.0, 8.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.96875, -33.82080078125, -32.6728515625, -31.52490234375, -30.376953125, -29.22900390625, -28.0810546875, -26.93310546875, -25.78515625, -24.63720703125, -23.4892578125, -22.34130859375, -21.193359375, -20.04541015625, -18.8974609375, -17.74951171875, -16.6015625, -15.45361328125, -14.3056640625, -13.15771484375, -12.009765625, -10.86181640625, -9.7138671875, -8.56591796875, -7.41796875, -6.27001953125, -5.1220703125, -3.97412109375, -2.826171875, -1.67822265625, -0.5302734375, 0.61767578125, 1.765625, 2.91357421875, 4.0615234375, 5.20947265625, 6.357421875, 7.50537109375, 8.6533203125, 9.80126953125, 10.94921875, 12.09716796875, 13.2451171875, 14.39306640625, 15.541015625, 16.68896484375, 17.8369140625, 18.98486328125, 20.1328125, 21.28076171875, 22.4287109375, 23.57666015625, 24.724609375, 25.87255859375, 27.0205078125, 28.16845703125, 29.31640625, 30.46435546875, 31.6123046875, 32.76025390625, 33.908203125, 35.05615234375, 36.2041015625, 37.35205078125, 38.5]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 7.0, 7.0, 3.0, 9.0, 18.0, 16.0, 26.0, 29.0, 44.0, 72.0, 90.0, 115.0, 164.0, 239.0, 342.0, 455.0, 529.0, 523.0, 415.0, 299.0, 204.0, 136.0, 92.0, 61.0, 56.0, 39.0, 31.0, 15.0, 13.0, 8.0, 8.0, 6.0, 0.0, 4.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.71875, -32.802734375, -31.88671875, -30.970703125, -30.0546875, -29.138671875, -28.22265625, -27.306640625, -26.390625, -25.474609375, -24.55859375, -23.642578125, -22.7265625, -21.810546875, -20.89453125, -19.978515625, -19.0625, -18.146484375, -17.23046875, -16.314453125, -15.3984375, -14.482421875, -13.56640625, -12.650390625, -11.734375, -10.818359375, -9.90234375, -8.986328125, -8.0703125, -7.154296875, -6.23828125, -5.322265625, -4.40625, -3.490234375, -2.57421875, -1.658203125, -0.7421875, 0.173828125, 1.08984375, 2.005859375, 2.921875, 3.837890625, 4.75390625, 5.669921875, 6.5859375, 7.501953125, 8.41796875, 9.333984375, 10.25, 11.166015625, 12.08203125, 12.998046875, 13.9140625, 14.830078125, 15.74609375, 16.662109375, 17.578125, 18.494140625, 19.41015625, 20.326171875, 21.2421875, 22.158203125, 23.07421875, 23.990234375, 24.90625]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 0.0, 3.0, 2.0, 1.0, 4.0, 11.0, 9.0, 16.0, 7.0, 14.0, 16.0, 20.0, 27.0, 41.0, 51.0, 72.0, 75.0, 101.0, 135.0, 175.0, 261.0, 388.0, 941.0, 18434.0, 3963438.0, 206287.0, 1971.0, 491.0, 323.0, 220.0, 150.0, 131.0, 104.0, 78.0, 70.0, 56.0, 34.0, 34.0, 28.0, 16.0, 14.0, 11.0, 9.0, 8.0, 2.0, 4.0, 1.0, 3.0, 0.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-99.75, -96.2744140625, -92.798828125, -89.3232421875, -85.84765625, -82.3720703125, -78.896484375, -75.4208984375, -71.9453125, -68.4697265625, -64.994140625, -61.5185546875, -58.04296875, -54.5673828125, -51.091796875, -47.6162109375, -44.140625, -40.6650390625, -37.189453125, -33.7138671875, -30.23828125, -26.7626953125, -23.287109375, -19.8115234375, -16.3359375, -12.8603515625, -9.384765625, -5.9091796875, -2.43359375, 1.0419921875, 4.517578125, 7.9931640625, 11.46875, 14.9443359375, 18.419921875, 21.8955078125, 25.37109375, 28.8466796875, 32.322265625, 35.7978515625, 39.2734375, 42.7490234375, 46.224609375, 49.7001953125, 53.17578125, 56.6513671875, 60.126953125, 63.6025390625, 67.078125, 70.5537109375, 74.029296875, 77.5048828125, 80.98046875, 84.4560546875, 87.931640625, 91.4072265625, 94.8828125, 98.3583984375, 101.833984375, 105.3095703125, 108.78515625, 112.2607421875, 115.736328125, 119.2119140625, 122.6875]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 7.0, 13.0, 16.0, 35.0, 56.0, 88.0, 114.0, 145.0, 165.0, 124.0, 101.0, 78.0, 41.0, 20.0, 3.0, 5.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-92.79792022705078, -88.779296875, -84.76066589355469, -80.7420425415039, -76.72341918945312, -72.70479583740234, -68.68617248535156, -64.66754150390625, -60.64891815185547, -56.63029479980469, -52.61166763305664, -48.593040466308594, -44.57441711425781, -40.55579376220703, -36.537166595458984, -32.51853942871094, -28.499916076660156, -24.481290817260742, -20.462665557861328, -16.444040298461914, -12.4254150390625, -8.406789779663086, -4.388164520263672, -0.3695392608642578, 3.6490859985351562, 7.66771125793457, 11.686336517333984, 15.704961776733398, 19.723587036132812, 23.742212295532227, 27.76083755493164, 31.779462814331055, 35.79808044433594, 39.81670379638672, 43.835330963134766, 47.85395812988281, 51.872581481933594, 55.891204833984375, 59.90983200073242, 63.92845916748047, 67.94708251953125, 71.96570587158203, 75.98432922363281, 80.00296020507812, 84.0215835571289, 88.04020690917969, 92.058837890625, 96.07746124267578, 100.09608459472656, 104.11470794677734, 108.13333129882812, 112.15196228027344, 116.17058563232422, 120.189208984375, 124.20783996582031, 128.22645568847656, 132.24508666992188, 136.2637176513672, 140.28233337402344, 144.30096435546875, 148.319580078125, 152.3382110595703, 156.35684204101562, 160.37545776367188, 164.3940887451172]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 6.0, 15.0, 19.0, 18.0, 15.0, 24.0, 19.0, 21.0, 23.0, 44.0, 42.0, 36.0, 38.0, 33.0, 46.0, 40.0, 41.0, 35.0, 38.0, 45.0, 33.0, 37.0, 32.0, 26.0, 38.0, 35.0, 30.0, 28.0, 19.0, 11.0, 24.0, 19.0, 6.0, 13.0, 9.0, 7.0, 5.0, 9.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-85.63916015625, -83.06629943847656, -80.4934310913086, -77.92057037353516, -75.34770202636719, -72.77484130859375, -70.20198059082031, -67.62911987304688, -65.0562515258789, -62.4833869934082, -59.9105224609375, -57.33766174316406, -54.76479721069336, -52.191932678222656, -49.61907196044922, -47.046207427978516, -44.47334289550781, -41.90047836303711, -39.327613830566406, -36.75475311279297, -34.181888580322266, -31.609024047851562, -29.036161422729492, -26.463298797607422, -23.89043426513672, -21.317569732666016, -18.744707107543945, -16.171844482421875, -13.598979949951172, -11.026116371154785, -8.453252792358398, -5.880390167236328, -3.307525634765625, -0.7346620559692383, 1.8382015228271484, 4.411065101623535, 6.983928680419922, 9.556792259216309, 12.129655838012695, 14.702518463134766, 17.27538299560547, 19.848247528076172, 22.421110153198242, 24.993972778320312, 27.566837310791016, 30.13970184326172, 32.712562561035156, 35.28542709350586, 37.85829162597656, 40.431156158447266, 43.00402069091797, 45.576881408691406, 48.14974594116211, 50.72261047363281, 53.29547119140625, 55.86833572387695, 58.441200256347656, 61.01406478881836, 63.58692932128906, 66.1597900390625, 68.73265075683594, 71.3055191040039, 73.87837982177734, 76.45124816894531, 79.02410888671875]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 12.0, 12.0, 13.0, 15.0, 15.0, 16.0, 23.0, 22.0, 23.0, 30.0, 24.0, 35.0, 31.0, 37.0, 38.0, 44.0, 46.0, 39.0, 43.0, 38.0, 33.0, 33.0, 32.0, 45.0, 32.0, 25.0, 33.0, 21.0, 33.0, 21.0, 26.0, 15.0, 12.0, 21.0, 10.0, 7.0, 7.0, 12.0, 6.0, 6.0, 3.0, 2.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 4.0], "bins": [-13.125, -12.73583984375, -12.3466796875, -11.95751953125, -11.568359375, -11.17919921875, -10.7900390625, -10.40087890625, -10.01171875, -9.62255859375, -9.2333984375, -8.84423828125, -8.455078125, -8.06591796875, -7.6767578125, -7.28759765625, -6.8984375, -6.50927734375, -6.1201171875, -5.73095703125, -5.341796875, -4.95263671875, -4.5634765625, -4.17431640625, -3.78515625, -3.39599609375, -3.0068359375, -2.61767578125, -2.228515625, -1.83935546875, -1.4501953125, -1.06103515625, -0.671875, -0.28271484375, 0.1064453125, 0.49560546875, 0.884765625, 1.27392578125, 1.6630859375, 2.05224609375, 2.44140625, 2.83056640625, 3.2197265625, 3.60888671875, 3.998046875, 4.38720703125, 4.7763671875, 5.16552734375, 5.5546875, 5.94384765625, 6.3330078125, 6.72216796875, 7.111328125, 7.50048828125, 7.8896484375, 8.27880859375, 8.66796875, 9.05712890625, 9.4462890625, 9.83544921875, 10.224609375, 10.61376953125, 11.0029296875, 11.39208984375, 11.78125]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 7.0, 7.0, 17.0, 23.0, 26.0, 49.0, 72.0, 106.0, 167.0, 211.0, 353.0, 518.0, 769.0, 1194.0, 1804.0, 2675.0, 4141.0, 6095.0, 9253.0, 13695.0, 20168.0, 29775.0, 43730.0, 64622.0, 97528.0, 148555.0, 192997.0, 135876.0, 89682.0, 59596.0, 40627.0, 27630.0, 18647.0, 12771.0, 8410.0, 5710.0, 3768.0, 2468.0, 1603.0, 1062.0, 733.0, 461.0, 322.0, 191.0, 156.0, 111.0, 57.0, 46.0, 34.0, 17.0, 9.0, 8.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.1865234375, -1.1461639404296875, -1.105804443359375, -1.0654449462890625, -1.02508544921875, -0.9847259521484375, -0.944366455078125, -0.9040069580078125, -0.8636474609375, -0.8232879638671875, -0.782928466796875, -0.7425689697265625, -0.70220947265625, -0.6618499755859375, -0.621490478515625, -0.5811309814453125, -0.540771484375, -0.5004119873046875, -0.460052490234375, -0.4196929931640625, -0.37933349609375, -0.3389739990234375, -0.298614501953125, -0.2582550048828125, -0.2178955078125, -0.1775360107421875, -0.137176513671875, -0.0968170166015625, -0.05645751953125, -0.0160980224609375, 0.024261474609375, 0.0646209716796875, 0.10498046875, 0.1453399658203125, 0.185699462890625, 0.2260589599609375, 0.26641845703125, 0.3067779541015625, 0.347137451171875, 0.3874969482421875, 0.4278564453125, 0.4682159423828125, 0.508575439453125, 0.5489349365234375, 0.58929443359375, 0.6296539306640625, 0.670013427734375, 0.7103729248046875, 0.750732421875, 0.7910919189453125, 0.831451416015625, 0.8718109130859375, 0.91217041015625, 0.9525299072265625, 0.992889404296875, 1.0332489013671875, 1.0736083984375, 1.1139678955078125, 1.154327392578125, 1.1946868896484375, 1.23504638671875, 1.2754058837890625, 1.315765380859375, 1.3561248779296875, 1.396484375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 6.0, 3.0, 4.0, 3.0, 8.0, 8.0, 9.0, 14.0, 10.0, 15.0, 16.0, 19.0, 20.0, 19.0, 18.0, 31.0, 26.0, 35.0, 33.0, 39.0, 24.0, 25.0, 37.0, 39.0, 1072.0, 35.0, 43.0, 49.0, 40.0, 32.0, 42.0, 32.0, 23.0, 23.0, 26.0, 13.0, 17.0, 25.0, 12.0, 11.0, 17.0, 9.0, 4.0, 8.0, 6.0, 4.0, 7.0, 3.0, 4.0, 1.0, 2.0, 1.0, 5.0, 2.0, 2.0, 2.0], "bins": [-7.60546875, -7.3714599609375, -7.137451171875, -6.9034423828125, -6.66943359375, -6.4354248046875, -6.201416015625, -5.9674072265625, -5.7333984375, -5.4993896484375, -5.265380859375, -5.0313720703125, -4.79736328125, -4.5633544921875, -4.329345703125, -4.0953369140625, -3.861328125, -3.6273193359375, -3.393310546875, -3.1593017578125, -2.92529296875, -2.6912841796875, -2.457275390625, -2.2232666015625, -1.9892578125, -1.7552490234375, -1.521240234375, -1.2872314453125, -1.05322265625, -0.8192138671875, -0.585205078125, -0.3511962890625, -0.1171875, 0.1168212890625, 0.350830078125, 0.5848388671875, 0.81884765625, 1.0528564453125, 1.286865234375, 1.5208740234375, 1.7548828125, 1.9888916015625, 2.222900390625, 2.4569091796875, 2.69091796875, 2.9249267578125, 3.158935546875, 3.3929443359375, 3.626953125, 3.8609619140625, 4.094970703125, 4.3289794921875, 4.56298828125, 4.7969970703125, 5.031005859375, 5.2650146484375, 5.4990234375, 5.7330322265625, 5.967041015625, 6.2010498046875, 6.43505859375, 6.6690673828125, 6.903076171875, 7.1370849609375, 7.37109375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 6.0, 12.0, 15.0, 24.0, 33.0, 33.0, 74.0, 126.0, 123.0, 200.0, 316.0, 432.0, 613.0, 918.0, 1445.0, 2074.0, 3020.0, 4581.0, 6853.0, 10344.0, 15519.0, 23491.0, 36068.0, 54937.0, 84824.0, 134353.0, 1238839.0, 167265.0, 108384.0, 69196.0, 45111.0, 29700.0, 19406.0, 12916.0, 8425.0, 5782.0, 3773.0, 2569.0, 1670.0, 1121.0, 816.0, 589.0, 341.0, 250.0, 165.0, 131.0, 84.0, 60.0, 36.0, 26.0, 23.0, 10.0, 6.0, 3.0, 3.0, 3.0, 2.0, 1.0], "bins": [-1.3125, -1.2724456787109375, -1.232391357421875, -1.1923370361328125, -1.15228271484375, -1.1122283935546875, -1.072174072265625, -1.0321197509765625, -0.9920654296875, -0.9520111083984375, -0.911956787109375, -0.8719024658203125, -0.83184814453125, -0.7917938232421875, -0.751739501953125, -0.7116851806640625, -0.671630859375, -0.6315765380859375, -0.591522216796875, -0.5514678955078125, -0.51141357421875, -0.4713592529296875, -0.431304931640625, -0.3912506103515625, -0.3511962890625, -0.3111419677734375, -0.271087646484375, -0.2310333251953125, -0.19097900390625, -0.1509246826171875, -0.110870361328125, -0.0708160400390625, -0.03076171875, 0.0092926025390625, 0.049346923828125, 0.0894012451171875, 0.12945556640625, 0.1695098876953125, 0.209564208984375, 0.2496185302734375, 0.2896728515625, 0.3297271728515625, 0.369781494140625, 0.4098358154296875, 0.44989013671875, 0.4899444580078125, 0.529998779296875, 0.5700531005859375, 0.610107421875, 0.6501617431640625, 0.690216064453125, 0.7302703857421875, 0.77032470703125, 0.8103790283203125, 0.850433349609375, 0.8904876708984375, 0.9305419921875, 0.9705963134765625, 1.010650634765625, 1.0507049560546875, 1.09075927734375, 1.1308135986328125, 1.170867919921875, 1.2109222412109375, 1.2509765625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 8.0, 5.0, 7.0, 7.0, 6.0, 17.0, 17.0, 23.0, 22.0, 28.0, 30.0, 35.0, 41.0, 46.0, 66.0, 61.0, 69.0, 55.0, 70.0, 56.0, 56.0, 47.0, 39.0, 38.0, 24.0, 23.0, 12.0, 19.0, 18.0, 12.0, 8.0, 5.0, 3.0, 5.0, 2.0, 5.0, 6.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.003932952880859375, -0.0038246214389801025, -0.00371628999710083, -0.0036079585552215576, -0.003499627113342285, -0.0033912956714630127, -0.0032829642295837402, -0.0031746327877044678, -0.0030663013458251953, -0.002957969903945923, -0.0028496384620666504, -0.002741307020187378, -0.0026329755783081055, -0.002524644136428833, -0.0024163126945495605, -0.002307981252670288, -0.0021996498107910156, -0.002091318368911743, -0.0019829869270324707, -0.0018746554851531982, -0.0017663240432739258, -0.0016579926013946533, -0.0015496611595153809, -0.0014413297176361084, -0.001332998275756836, -0.0012246668338775635, -0.001116335391998291, -0.0010080039501190186, -0.0008996725082397461, -0.0007913410663604736, -0.0006830096244812012, -0.0005746781826019287, -0.00046634674072265625, -0.0003580152988433838, -0.00024968385696411133, -0.00014135241508483887, -3.3020973205566406e-05, 7.531046867370605e-05, 0.00018364191055297852, 0.000291973352432251, 0.00040030479431152344, 0.0005086362361907959, 0.0006169676780700684, 0.0007252991199493408, 0.0008336305618286133, 0.0009419620037078857, 0.0010502934455871582, 0.0011586248874664307, 0.0012669563293457031, 0.0013752877712249756, 0.001483619213104248, 0.0015919506549835205, 0.001700282096862793, 0.0018086135387420654, 0.0019169449806213379, 0.0020252764225006104, 0.002133607864379883, 0.0022419393062591553, 0.0023502707481384277, 0.0024586021900177, 0.0025669336318969727, 0.002675265073776245, 0.0027835965156555176, 0.00289192795753479, 0.0030002593994140625]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 1.0, 4.0, 4.0, 3.0, 9.0, 6.0, 8.0, 10.0, 9.0, 22.0, 25.0, 34.0, 36.0, 54.0, 72.0, 130.0, 247.0, 792.0, 75769.0, 969297.0, 1167.0, 364.0, 130.0, 92.0, 69.0, 47.0, 31.0, 28.0, 30.0, 17.0, 6.0, 10.0, 6.0, 5.0, 4.0, 3.0, 7.0, 5.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.081787109375, -0.07887840270996094, -0.07596969604492188, -0.07306098937988281, -0.07015228271484375, -0.06724357604980469, -0.06433486938476562, -0.06142616271972656, -0.0585174560546875, -0.05560874938964844, -0.052700042724609375, -0.04979133605957031, -0.04688262939453125, -0.04397392272949219, -0.041065216064453125, -0.03815650939941406, -0.035247802734375, -0.03233909606933594, -0.029430389404296875, -0.026521682739257812, -0.02361297607421875, -0.020704269409179688, -0.017795562744140625, -0.014886856079101562, -0.0119781494140625, -0.009069442749023438, -0.006160736083984375, -0.0032520294189453125, -0.00034332275390625, 0.0025653839111328125, 0.005474090576171875, 0.008382797241210938, 0.01129150390625, 0.014200210571289062, 0.017108917236328125, 0.020017623901367188, 0.02292633056640625, 0.025835037231445312, 0.028743743896484375, 0.03165245056152344, 0.0345611572265625, 0.03746986389160156, 0.040378570556640625, 0.04328727722167969, 0.04619598388671875, 0.04910469055175781, 0.052013397216796875, 0.05492210388183594, 0.057830810546875, 0.06073951721191406, 0.06364822387695312, 0.06655693054199219, 0.06946563720703125, 0.07237434387207031, 0.07528305053710938, 0.07819175720214844, 0.0811004638671875, 0.08400917053222656, 0.08691787719726562, 0.08982658386230469, 0.09273529052734375, 0.09564399719238281, 0.09855270385742188, 0.10146141052246094, 0.1043701171875]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 13.0, 45.0, 122.0, 277.0, 304.0, 168.0, 58.0, 7.0, 10.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.003455569501966238, -0.0033405963331460953, -0.0032256231643259525, -0.00311064999550581, -0.0029956770595163107, -0.002880703890696168, -0.002765730721876025, -0.0026507575530558825, -0.0025357843842357397, -0.002420811215415597, -0.002305838046595454, -0.002190865110605955, -0.0020758919417858124, -0.0019609187729656696, -0.0018459456041455269, -0.0017309724353253841, -0.0016159993829205632, -0.0015010262141004205, -0.0013860531616955996, -0.0012710799928754568, -0.001156106824055314, -0.0010411336552351713, -0.0009261606028303504, -0.0008111874340102077, -0.0006962143233977258, -0.000581241212785244, -0.00046626804396510124, -0.0003512949333526194, -0.00023632179363630712, -0.00012134865391999483, -6.375543307512999e-06, 0.00010859762551262975, 0.00022357073612511158, 0.00033854387584142387, 0.00045351701555773616, 0.000568490126170218, 0.0006834632949903607, 0.0007984364056028426, 0.0009134095162153244, 0.0010283826850354671, 0.00114335585385561, 0.0012583290226757526, 0.0013733020750805736, 0.0014882752439007163, 0.001603248412720859, 0.00171822146512568, 0.0018331946339458227, 0.0019481678027659655, 0.0020631407387554646, 0.0021781139075756073, 0.00229308707639575, 0.002408060245215893, 0.002523033181205392, 0.0026380063500255346, 0.0027529795188456774, 0.00286795268766582, 0.002982925856485963, 0.0030978990253061056, 0.0032128721941262484, 0.0033278451301157475, 0.00344281829893589, 0.003557791467756033, 0.0036727646365761757, 0.0037877378053963184, 0.003902710974216461]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 3.0, 0.0, 5.0, 9.0, 10.0, 8.0, 11.0, 13.0, 13.0, 22.0, 19.0, 31.0, 37.0, 36.0, 37.0, 22.0, 43.0, 43.0, 48.0, 46.0, 45.0, 39.0, 47.0, 57.0, 41.0, 36.0, 40.0, 32.0, 29.0, 23.0, 31.0, 23.0, 16.0, 11.0, 14.0, 23.0, 9.0, 13.0, 6.0, 10.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001829385757446289, -0.0017669163644313812, -0.0017044469714164734, -0.0016419775784015656, -0.0015795081853866577, -0.0015170387923717499, -0.001454569399356842, -0.0013921000063419342, -0.0013296306133270264, -0.0012671612203121185, -0.0012046918272972107, -0.0011422224342823029, -0.001079753041267395, -0.0010172836482524872, -0.0009548142552375793, -0.0008923448622226715, -0.0008298754692077637, -0.0007674060761928558, -0.000704936683177948, -0.0006424672901630402, -0.0005799978971481323, -0.0005175285041332245, -0.00045505911111831665, -0.0003925897181034088, -0.000330120325088501, -0.00026765093207359314, -0.0002051815390586853, -0.00014271214604377747, -8.024275302886963e-05, -1.7773360013961792e-05, 4.4696033000946045e-05, 0.00010716542601585388, 0.00016963481903076172, 0.00023210421204566956, 0.0002945736050605774, 0.00035704299807548523, 0.00041951239109039307, 0.0004819817841053009, 0.0005444511771202087, 0.0006069205701351166, 0.0006693899631500244, 0.0007318593561649323, 0.0007943287491798401, 0.0008567981421947479, 0.0009192675352096558, 0.0009817369282245636, 0.0010442063212394714, 0.0011066757142543793, 0.0011691451072692871, 0.001231614500284195, 0.0012940838932991028, 0.0013565532863140106, 0.0014190226793289185, 0.0014814920723438263, 0.0015439614653587341, 0.001606430858373642, 0.0016689002513885498, 0.0017313696444034576, 0.0017938390374183655, 0.0018563084304332733, 0.0019187778234481812, 0.001981247216463089, 0.002043716609477997, 0.0021061860024929047, 0.0021686553955078125]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 12.0, 12.0, 13.0, 15.0, 15.0, 16.0, 23.0, 22.0, 23.0, 30.0, 24.0, 35.0, 31.0, 37.0, 38.0, 44.0, 46.0, 39.0, 43.0, 38.0, 33.0, 33.0, 32.0, 45.0, 32.0, 25.0, 33.0, 21.0, 33.0, 21.0, 26.0, 15.0, 12.0, 21.0, 10.0, 7.0, 7.0, 12.0, 6.0, 6.0, 3.0, 2.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 4.0], "bins": [-13.125, -12.73583984375, -12.3466796875, -11.95751953125, -11.568359375, -11.17919921875, -10.7900390625, -10.40087890625, -10.01171875, -9.62255859375, -9.2333984375, -8.84423828125, -8.455078125, -8.06591796875, -7.6767578125, -7.28759765625, -6.8984375, -6.50927734375, -6.1201171875, -5.73095703125, -5.341796875, -4.95263671875, -4.5634765625, -4.17431640625, -3.78515625, -3.39599609375, -3.0068359375, -2.61767578125, -2.228515625, -1.83935546875, -1.4501953125, -1.06103515625, -0.671875, -0.28271484375, 0.1064453125, 0.49560546875, 0.884765625, 1.27392578125, 1.6630859375, 2.05224609375, 2.44140625, 2.83056640625, 3.2197265625, 3.60888671875, 3.998046875, 4.38720703125, 4.7763671875, 5.16552734375, 5.5546875, 5.94384765625, 6.3330078125, 6.72216796875, 7.111328125, 7.50048828125, 7.8896484375, 8.27880859375, 8.66796875, 9.05712890625, 9.4462890625, 9.83544921875, 10.224609375, 10.61376953125, 11.0029296875, 11.39208984375, 11.78125]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 6.0, 2.0, 3.0, 15.0, 13.0, 16.0, 26.0, 24.0, 31.0, 53.0, 66.0, 98.0, 157.0, 252.0, 421.0, 846.0, 1870.0, 4100.0, 10766.0, 33324.0, 128890.0, 524250.0, 253101.0, 60941.0, 17708.0, 6239.0, 2617.0, 1182.0, 606.0, 332.0, 170.0, 129.0, 65.0, 68.0, 40.0, 22.0, 33.0, 15.0, 8.0, 11.0, 17.0, 6.0, 6.0, 3.0, 2.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 4.0], "bins": [-12.7109375, -12.3341064453125, -11.957275390625, -11.5804443359375, -11.20361328125, -10.8267822265625, -10.449951171875, -10.0731201171875, -9.6962890625, -9.3194580078125, -8.942626953125, -8.5657958984375, -8.18896484375, -7.8121337890625, -7.435302734375, -7.0584716796875, -6.681640625, -6.3048095703125, -5.927978515625, -5.5511474609375, -5.17431640625, -4.7974853515625, -4.420654296875, -4.0438232421875, -3.6669921875, -3.2901611328125, -2.913330078125, -2.5364990234375, -2.15966796875, -1.7828369140625, -1.406005859375, -1.0291748046875, -0.65234375, -0.2755126953125, 0.101318359375, 0.4781494140625, 0.85498046875, 1.2318115234375, 1.608642578125, 1.9854736328125, 2.3623046875, 2.7391357421875, 3.115966796875, 3.4927978515625, 3.86962890625, 4.2464599609375, 4.623291015625, 5.0001220703125, 5.376953125, 5.7537841796875, 6.130615234375, 6.5074462890625, 6.88427734375, 7.2611083984375, 7.637939453125, 8.0147705078125, 8.3916015625, 8.7684326171875, 9.145263671875, 9.5220947265625, 9.89892578125, 10.2757568359375, 10.652587890625, 11.0294189453125, 11.40625]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 3.0, 2.0, 5.0, 4.0, 10.0, 10.0, 6.0, 12.0, 15.0, 27.0, 23.0, 31.0, 24.0, 40.0, 43.0, 38.0, 47.0, 40.0, 58.0, 167.0, 1866.0, 133.0, 61.0, 44.0, 47.0, 54.0, 41.0, 34.0, 24.0, 23.0, 24.0, 19.0, 25.0, 9.0, 8.0, 7.0, 7.0, 8.0, 6.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-39.46875, -37.9716796875, -36.474609375, -34.9775390625, -33.48046875, -31.9833984375, -30.486328125, -28.9892578125, -27.4921875, -25.9951171875, -24.498046875, -23.0009765625, -21.50390625, -20.0068359375, -18.509765625, -17.0126953125, -15.515625, -14.0185546875, -12.521484375, -11.0244140625, -9.52734375, -8.0302734375, -6.533203125, -5.0361328125, -3.5390625, -2.0419921875, -0.544921875, 0.9521484375, 2.44921875, 3.9462890625, 5.443359375, 6.9404296875, 8.4375, 9.9345703125, 11.431640625, 12.9287109375, 14.42578125, 15.9228515625, 17.419921875, 18.9169921875, 20.4140625, 21.9111328125, 23.408203125, 24.9052734375, 26.40234375, 27.8994140625, 29.396484375, 30.8935546875, 32.390625, 33.8876953125, 35.384765625, 36.8818359375, 38.37890625, 39.8759765625, 41.373046875, 42.8701171875, 44.3671875, 45.8642578125, 47.361328125, 48.8583984375, 50.35546875, 51.8525390625, 53.349609375, 54.8466796875, 56.34375]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 6.0, 5.0, 7.0, 13.0, 10.0, 6.0, 19.0, 12.0, 29.0, 36.0, 43.0, 51.0, 75.0, 112.0, 175.0, 232.0, 375.0, 928.0, 14898.0, 3120005.0, 6882.0, 680.0, 351.0, 232.0, 141.0, 100.0, 77.0, 54.0, 40.0, 35.0, 20.0, 8.0, 14.0, 16.0, 3.0, 6.0, 4.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-124.25, -120.6396484375, -117.029296875, -113.4189453125, -109.80859375, -106.1982421875, -102.587890625, -98.9775390625, -95.3671875, -91.7568359375, -88.146484375, -84.5361328125, -80.92578125, -77.3154296875, -73.705078125, -70.0947265625, -66.484375, -62.8740234375, -59.263671875, -55.6533203125, -52.04296875, -48.4326171875, -44.822265625, -41.2119140625, -37.6015625, -33.9912109375, -30.380859375, -26.7705078125, -23.16015625, -19.5498046875, -15.939453125, -12.3291015625, -8.71875, -5.1083984375, -1.498046875, 2.1123046875, 5.72265625, 9.3330078125, 12.943359375, 16.5537109375, 20.1640625, 23.7744140625, 27.384765625, 30.9951171875, 34.60546875, 38.2158203125, 41.826171875, 45.4365234375, 49.046875, 52.6572265625, 56.267578125, 59.8779296875, 63.48828125, 67.0986328125, 70.708984375, 74.3193359375, 77.9296875, 81.5400390625, 85.150390625, 88.7607421875, 92.37109375, 95.9814453125, 99.591796875, 103.2021484375, 106.8125]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 30.0, 592.0, 389.0, 9.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-352.5506896972656, -345.6189270019531, -338.68719482421875, -331.75543212890625, -324.8236999511719, -317.8919372558594, -310.9601745605469, -304.0284423828125, -297.0966796875, -290.1649169921875, -283.2331848144531, -276.3014221191406, -269.36968994140625, -262.43792724609375, -255.5061798095703, -248.57443237304688, -241.64266967773438, -234.71092224121094, -227.7791748046875, -220.847412109375, -213.91566467285156, -206.98391723632812, -200.0521697998047, -193.12042236328125, -186.1886749267578, -179.25692749023438, -172.32518005371094, -165.39341735839844, -158.461669921875, -151.52992248535156, -144.59817504882812, -137.66641235351562, -130.73464965820312, -123.80290222167969, -116.87114715576172, -109.93939971923828, -103.00764465332031, -96.07589721679688, -89.14414978027344, -82.21239471435547, -75.28065490722656, -68.34890747070312, -61.417152404785156, -54.48540496826172, -47.55364990234375, -40.62190246582031, -33.69015121459961, -26.758399963378906, -19.826644897460938, -12.894893646240234, -5.963143348693848, 0.9686069488525391, 7.900358200073242, 14.832107543945312, 21.763858795166016, 28.69561004638672, 35.62736129760742, 42.559112548828125, 49.49086380004883, 56.42261505126953, 63.35436248779297, 70.28611755371094, 77.21786499023438, 84.14961242675781, 91.08136749267578]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 5.0, 4.0, 7.0, 5.0, 10.0, 12.0, 27.0, 17.0, 20.0, 21.0, 28.0, 21.0, 27.0, 40.0, 42.0, 45.0, 41.0, 42.0, 35.0, 42.0, 39.0, 47.0, 43.0, 50.0, 45.0, 29.0, 34.0, 32.0, 18.0, 27.0, 31.0, 24.0, 20.0, 11.0, 13.0, 8.0, 12.0, 7.0, 7.0, 6.0, 4.0, 4.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-108.52529907226562, -105.02540588378906, -101.52552032470703, -98.025634765625, -94.52574157714844, -91.02584838867188, -87.52596282958984, -84.02607727050781, -80.52618408203125, -77.02629089355469, -73.52640533447266, -70.02651977539062, -66.52662658691406, -63.026737213134766, -59.52684783935547, -56.02695846557617, -52.527069091796875, -49.02717971801758, -45.52729034423828, -42.027400970458984, -38.52751159667969, -35.02762222290039, -31.527732849121094, -28.027843475341797, -24.5279541015625, -21.028064727783203, -17.528175354003906, -14.02828598022461, -10.528396606445312, -7.028507232666016, -3.5286178588867188, -0.028728485107421875, 3.4711685180664062, 6.971057891845703, 10.470947265625, 13.970836639404297, 17.470726013183594, 20.97061538696289, 24.470504760742188, 27.970394134521484, 31.47028350830078, 34.97017288208008, 38.470062255859375, 41.96995162963867, 45.46984100341797, 48.969730377197266, 52.46961975097656, 55.96950912475586, 59.469398498535156, 62.96928787231445, 66.46917724609375, 69.96907043457031, 73.46895599365234, 76.96884155273438, 80.46873474121094, 83.9686279296875, 87.46851348876953, 90.96839904785156, 94.46829223632812, 97.96818542480469, 101.46807098388672, 104.96795654296875, 108.46784973144531, 111.96774291992188, 115.4676284790039]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 4.0, 2.0, 10.0, 11.0, 15.0, 10.0, 23.0, 15.0, 19.0, 25.0, 29.0, 17.0, 38.0, 37.0, 21.0, 42.0, 47.0, 49.0, 43.0, 39.0, 38.0, 25.0, 49.0, 39.0, 37.0, 41.0, 32.0, 25.0, 23.0, 28.0, 21.0, 25.0, 20.0, 15.0, 17.0, 17.0, 7.0, 7.0, 13.0, 6.0, 5.0, 5.0, 3.0, 3.0, 1.0, 5.0, 0.0, 0.0, 1.0, 3.0], "bins": [-14.0546875, -13.6458740234375, -13.237060546875, -12.8282470703125, -12.41943359375, -12.0106201171875, -11.601806640625, -11.1929931640625, -10.7841796875, -10.3753662109375, -9.966552734375, -9.5577392578125, -9.14892578125, -8.7401123046875, -8.331298828125, -7.9224853515625, -7.513671875, -7.1048583984375, -6.696044921875, -6.2872314453125, -5.87841796875, -5.4696044921875, -5.060791015625, -4.6519775390625, -4.2431640625, -3.8343505859375, -3.425537109375, -3.0167236328125, -2.60791015625, -2.1990966796875, -1.790283203125, -1.3814697265625, -0.97265625, -0.5638427734375, -0.155029296875, 0.2537841796875, 0.66259765625, 1.0714111328125, 1.480224609375, 1.8890380859375, 2.2978515625, 2.7066650390625, 3.115478515625, 3.5242919921875, 3.93310546875, 4.3419189453125, 4.750732421875, 5.1595458984375, 5.568359375, 5.9771728515625, 6.385986328125, 6.7947998046875, 7.20361328125, 7.6124267578125, 8.021240234375, 8.4300537109375, 8.8388671875, 9.2476806640625, 9.656494140625, 10.0653076171875, 10.47412109375, 10.8829345703125, 11.291748046875, 11.7005615234375, 12.109375]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 7.0, 3.0, 3.0, 3.0, 2.0, 11.0, 11.0, 14.0, 21.0, 18.0, 27.0, 33.0, 34.0, 47.0, 53.0, 84.0, 76.0, 111.0, 143.0, 245.0, 424.0, 916.0, 3269.0, 20120.0, 355780.0, 2687969.0, 1060154.0, 55697.0, 6051.0, 1350.0, 532.0, 269.0, 163.0, 139.0, 95.0, 100.0, 63.0, 37.0, 37.0, 36.0, 22.0, 24.0, 25.0, 12.0, 18.0, 13.0, 10.0, 3.0, 6.0, 4.0, 1.0, 4.0, 3.0, 4.0], "bins": [-36.125, -35.113037109375, -34.10107421875, -33.089111328125, -32.0771484375, -31.065185546875, -30.05322265625, -29.041259765625, -28.029296875, -27.017333984375, -26.00537109375, -24.993408203125, -23.9814453125, -22.969482421875, -21.95751953125, -20.945556640625, -19.93359375, -18.921630859375, -17.90966796875, -16.897705078125, -15.8857421875, -14.873779296875, -13.86181640625, -12.849853515625, -11.837890625, -10.825927734375, -9.81396484375, -8.802001953125, -7.7900390625, -6.778076171875, -5.76611328125, -4.754150390625, -3.7421875, -2.730224609375, -1.71826171875, -0.706298828125, 0.3056640625, 1.317626953125, 2.32958984375, 3.341552734375, 4.353515625, 5.365478515625, 6.37744140625, 7.389404296875, 8.4013671875, 9.413330078125, 10.42529296875, 11.437255859375, 12.44921875, 13.461181640625, 14.47314453125, 15.485107421875, 16.4970703125, 17.509033203125, 18.52099609375, 19.532958984375, 20.544921875, 21.556884765625, 22.56884765625, 23.580810546875, 24.5927734375, 25.604736328125, 26.61669921875, 27.628662109375, 28.640625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 1.0, 7.0, 4.0, 9.0, 14.0, 16.0, 18.0, 25.0, 45.0, 68.0, 104.0, 131.0, 186.0, 272.0, 364.0, 509.0, 581.0, 487.0, 389.0, 271.0, 158.0, 137.0, 78.0, 61.0, 39.0, 33.0, 23.0, 23.0, 8.0, 6.0, 5.0, 7.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.765625, -29.8466796875, -28.927734375, -28.0087890625, -27.08984375, -26.1708984375, -25.251953125, -24.3330078125, -23.4140625, -22.4951171875, -21.576171875, -20.6572265625, -19.73828125, -18.8193359375, -17.900390625, -16.9814453125, -16.0625, -15.1435546875, -14.224609375, -13.3056640625, -12.38671875, -11.4677734375, -10.548828125, -9.6298828125, -8.7109375, -7.7919921875, -6.873046875, -5.9541015625, -5.03515625, -4.1162109375, -3.197265625, -2.2783203125, -1.359375, -0.4404296875, 0.478515625, 1.3974609375, 2.31640625, 3.2353515625, 4.154296875, 5.0732421875, 5.9921875, 6.9111328125, 7.830078125, 8.7490234375, 9.66796875, 10.5869140625, 11.505859375, 12.4248046875, 13.34375, 14.2626953125, 15.181640625, 16.1005859375, 17.01953125, 17.9384765625, 18.857421875, 19.7763671875, 20.6953125, 21.6142578125, 22.533203125, 23.4521484375, 24.37109375, 25.2900390625, 26.208984375, 27.1279296875, 28.046875]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 6.0, 4.0, 10.0, 9.0, 25.0, 37.0, 48.0, 55.0, 72.0, 89.0, 151.0, 211.0, 284.0, 415.0, 1044.0, 69654.0, 4112391.0, 7761.0, 715.0, 344.0, 258.0, 177.0, 149.0, 108.0, 82.0, 60.0, 31.0, 33.0, 18.0, 12.0, 12.0, 7.0, 6.0, 2.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-138.375, -133.814453125, -129.25390625, -124.693359375, -120.1328125, -115.572265625, -111.01171875, -106.451171875, -101.890625, -97.330078125, -92.76953125, -88.208984375, -83.6484375, -79.087890625, -74.52734375, -69.966796875, -65.40625, -60.845703125, -56.28515625, -51.724609375, -47.1640625, -42.603515625, -38.04296875, -33.482421875, -28.921875, -24.361328125, -19.80078125, -15.240234375, -10.6796875, -6.119140625, -1.55859375, 3.001953125, 7.5625, 12.123046875, 16.68359375, 21.244140625, 25.8046875, 30.365234375, 34.92578125, 39.486328125, 44.046875, 48.607421875, 53.16796875, 57.728515625, 62.2890625, 66.849609375, 71.41015625, 75.970703125, 80.53125, 85.091796875, 89.65234375, 94.212890625, 98.7734375, 103.333984375, 107.89453125, 112.455078125, 117.015625, 121.576171875, 126.13671875, 130.697265625, 135.2578125, 139.818359375, 144.37890625, 148.939453125, 153.5]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 113.0, 648.0, 234.0, 18.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-977.7388916015625, -959.5175170898438, -941.296142578125, -923.0747680664062, -904.8533935546875, -886.6320190429688, -868.41064453125, -850.1892700195312, -831.9678955078125, -813.7465209960938, -795.525146484375, -777.3037719726562, -759.0823974609375, -740.8610229492188, -722.6396484375, -704.4182739257812, -686.1968994140625, -667.9755249023438, -649.754150390625, -631.5327758789062, -613.3114013671875, -595.0900268554688, -576.86865234375, -558.6472778320312, -540.4259643554688, -522.20458984375, -503.98321533203125, -485.7618408203125, -467.54046630859375, -449.319091796875, -431.09771728515625, -412.8763427734375, -394.65496826171875, -376.43359375, -358.21221923828125, -339.9908447265625, -321.76947021484375, -303.548095703125, -285.32672119140625, -267.1053466796875, -248.88397216796875, -230.66259765625, -212.44122314453125, -194.2198486328125, -175.99847412109375, -157.777099609375, -139.5557403564453, -121.33436584472656, -103.11299133300781, -84.89161682128906, -66.67024230957031, -48.448875427246094, -30.227500915527344, -12.006126403808594, 6.215240478515625, 24.436614990234375, 42.657989501953125, 60.879364013671875, 79.10073852539062, 97.32210540771484, 115.5434799194336, 133.76486206054688, 151.98622131347656, 170.2075958251953, 188.42897033691406]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 2.0, 5.0, 10.0, 4.0, 8.0, 10.0, 8.0, 12.0, 18.0, 18.0, 20.0, 19.0, 19.0, 18.0, 31.0, 38.0, 31.0, 32.0, 36.0, 43.0, 43.0, 43.0, 49.0, 40.0, 56.0, 36.0, 33.0, 20.0, 30.0, 32.0, 31.0, 27.0, 25.0, 21.0, 22.0, 20.0, 19.0, 18.0, 12.0, 9.0, 11.0, 5.0, 5.0, 5.0, 2.0, 5.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-79.32612609863281, -76.93557739257812, -74.54502868652344, -72.15448760986328, -69.7639389038086, -67.3733901977539, -64.98284149169922, -62.5922966003418, -60.201751708984375, -57.81120300292969, -55.420658111572266, -53.03010940551758, -50.639564514160156, -48.24901580810547, -45.85846710205078, -43.46792221069336, -41.07737350463867, -38.686824798583984, -36.29627990722656, -33.905731201171875, -31.515186309814453, -29.124637603759766, -26.73409080505371, -24.343544006347656, -21.9529972076416, -19.562450408935547, -17.171903610229492, -14.781355857849121, -12.390809059143066, -10.000262260437012, -7.609714508056641, -5.219167709350586, -2.8286209106445312, -0.43807387351989746, 1.9524731636047363, 4.343020439147949, 6.733567237854004, 9.124114036560059, 11.51466178894043, 13.905208587646484, 16.29575538635254, 18.686302185058594, 21.07684898376465, 23.467395782470703, 25.85794448852539, 28.248489379882812, 30.6390380859375, 33.02958679199219, 35.42013168334961, 37.8106803894043, 40.20122528076172, 42.591773986816406, 44.98231887817383, 47.372867584228516, 49.76341247558594, 52.153961181640625, 54.54450988769531, 56.93505859375, 59.32560348510742, 61.71615219116211, 64.10669708251953, 66.49724578857422, 68.8877944946289, 71.27833557128906, 73.66888427734375]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 8.0, 8.0, 4.0, 11.0, 17.0, 16.0, 19.0, 21.0, 11.0, 26.0, 20.0, 30.0, 28.0, 35.0, 36.0, 42.0, 44.0, 35.0, 45.0, 40.0, 44.0, 52.0, 41.0, 37.0, 40.0, 33.0, 32.0, 28.0, 25.0, 22.0, 24.0, 19.0, 18.0, 13.0, 13.0, 11.0, 13.0, 8.0, 7.0, 5.0, 10.0, 6.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-12.9609375, -12.5570068359375, -12.153076171875, -11.7491455078125, -11.34521484375, -10.9412841796875, -10.537353515625, -10.1334228515625, -9.7294921875, -9.3255615234375, -8.921630859375, -8.5177001953125, -8.11376953125, -7.7098388671875, -7.305908203125, -6.9019775390625, -6.498046875, -6.0941162109375, -5.690185546875, -5.2862548828125, -4.88232421875, -4.4783935546875, -4.074462890625, -3.6705322265625, -3.2666015625, -2.8626708984375, -2.458740234375, -2.0548095703125, -1.65087890625, -1.2469482421875, -0.843017578125, -0.4390869140625, -0.03515625, 0.3687744140625, 0.772705078125, 1.1766357421875, 1.58056640625, 1.9844970703125, 2.388427734375, 2.7923583984375, 3.1962890625, 3.6002197265625, 4.004150390625, 4.4080810546875, 4.81201171875, 5.2159423828125, 5.619873046875, 6.0238037109375, 6.427734375, 6.8316650390625, 7.235595703125, 7.6395263671875, 8.04345703125, 8.4473876953125, 8.851318359375, 9.2552490234375, 9.6591796875, 10.0631103515625, 10.467041015625, 10.8709716796875, 11.27490234375, 11.6788330078125, 12.082763671875, 12.4866943359375, 12.890625]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 5.0, 8.0, 10.0, 24.0, 25.0, 26.0, 47.0, 67.0, 132.0, 140.0, 228.0, 352.0, 487.0, 790.0, 1119.0, 1751.0, 2650.0, 4057.0, 6327.0, 9524.0, 14745.0, 22743.0, 34273.0, 51943.0, 79520.0, 121503.0, 181205.0, 176133.0, 116358.0, 76493.0, 50264.0, 33275.0, 21696.0, 14011.0, 9182.0, 5941.0, 3972.0, 2552.0, 1663.0, 1058.0, 785.0, 513.0, 312.0, 217.0, 142.0, 98.0, 65.0, 41.0, 38.0, 22.0, 12.0, 14.0, 2.0, 6.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2216796875, -1.1808929443359375, -1.140106201171875, -1.0993194580078125, -1.05853271484375, -1.0177459716796875, -0.976959228515625, -0.9361724853515625, -0.8953857421875, -0.8545989990234375, -0.813812255859375, -0.7730255126953125, -0.73223876953125, -0.6914520263671875, -0.650665283203125, -0.6098785400390625, -0.569091796875, -0.5283050537109375, -0.487518310546875, -0.4467315673828125, -0.40594482421875, -0.3651580810546875, -0.324371337890625, -0.2835845947265625, -0.2427978515625, -0.2020111083984375, -0.161224365234375, -0.1204376220703125, -0.07965087890625, -0.0388641357421875, 0.001922607421875, 0.0427093505859375, 0.08349609375, 0.1242828369140625, 0.165069580078125, 0.2058563232421875, 0.24664306640625, 0.2874298095703125, 0.328216552734375, 0.3690032958984375, 0.4097900390625, 0.4505767822265625, 0.491363525390625, 0.5321502685546875, 0.57293701171875, 0.6137237548828125, 0.654510498046875, 0.6952972412109375, 0.736083984375, 0.7768707275390625, 0.817657470703125, 0.8584442138671875, 0.89923095703125, 0.9400177001953125, 0.980804443359375, 1.0215911865234375, 1.0623779296875, 1.1031646728515625, 1.143951416015625, 1.1847381591796875, 1.22552490234375, 1.2663116455078125, 1.307098388671875, 1.3478851318359375, 1.388671875]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 5.0, 4.0, 3.0, 6.0, 8.0, 11.0, 12.0, 12.0, 18.0, 18.0, 19.0, 16.0, 27.0, 37.0, 34.0, 36.0, 30.0, 34.0, 36.0, 41.0, 63.0, 42.0, 1062.0, 39.0, 51.0, 26.0, 40.0, 32.0, 38.0, 26.0, 25.0, 30.0, 24.0, 22.0, 23.0, 15.0, 13.0, 13.0, 7.0, 8.0, 8.0, 10.0, 3.0, 1.0, 2.0, 2.0, 2.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.765625, -7.501953125, -7.23828125, -6.974609375, -6.7109375, -6.447265625, -6.18359375, -5.919921875, -5.65625, -5.392578125, -5.12890625, -4.865234375, -4.6015625, -4.337890625, -4.07421875, -3.810546875, -3.546875, -3.283203125, -3.01953125, -2.755859375, -2.4921875, -2.228515625, -1.96484375, -1.701171875, -1.4375, -1.173828125, -0.91015625, -0.646484375, -0.3828125, -0.119140625, 0.14453125, 0.408203125, 0.671875, 0.935546875, 1.19921875, 1.462890625, 1.7265625, 1.990234375, 2.25390625, 2.517578125, 2.78125, 3.044921875, 3.30859375, 3.572265625, 3.8359375, 4.099609375, 4.36328125, 4.626953125, 4.890625, 5.154296875, 5.41796875, 5.681640625, 5.9453125, 6.208984375, 6.47265625, 6.736328125, 7.0, 7.263671875, 7.52734375, 7.791015625, 8.0546875, 8.318359375, 8.58203125, 8.845703125, 9.109375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 11.0, 11.0, 28.0, 37.0, 44.0, 81.0, 111.0, 146.0, 241.0, 352.0, 498.0, 762.0, 1207.0, 1793.0, 2676.0, 4238.0, 6634.0, 10294.0, 15939.0, 24927.0, 39621.0, 63883.0, 102565.0, 169214.0, 1263718.0, 146726.0, 89584.0, 55146.0, 35077.0, 21869.0, 13983.0, 8917.0, 5851.0, 3824.0, 2420.0, 1588.0, 1082.0, 681.0, 479.0, 283.0, 206.0, 111.0, 93.0, 63.0, 45.0, 25.0, 14.0, 9.0, 9.0, 9.0, 6.0, 0.0, 2.0, 1.0, 2.0], "bins": [-1.4306640625, -1.3877716064453125, -1.344879150390625, -1.3019866943359375, -1.25909423828125, -1.2162017822265625, -1.173309326171875, -1.1304168701171875, -1.0875244140625, -1.0446319580078125, -1.001739501953125, -0.9588470458984375, -0.91595458984375, -0.8730621337890625, -0.830169677734375, -0.7872772216796875, -0.744384765625, -0.7014923095703125, -0.658599853515625, -0.6157073974609375, -0.57281494140625, -0.5299224853515625, -0.487030029296875, -0.4441375732421875, -0.4012451171875, -0.3583526611328125, -0.315460205078125, -0.2725677490234375, -0.22967529296875, -0.1867828369140625, -0.143890380859375, -0.1009979248046875, -0.05810546875, -0.0152130126953125, 0.027679443359375, 0.0705718994140625, 0.11346435546875, 0.1563568115234375, 0.199249267578125, 0.2421417236328125, 0.2850341796875, 0.3279266357421875, 0.370819091796875, 0.4137115478515625, 0.45660400390625, 0.4994964599609375, 0.542388916015625, 0.5852813720703125, 0.628173828125, 0.6710662841796875, 0.713958740234375, 0.7568511962890625, 0.79974365234375, 0.8426361083984375, 0.885528564453125, 0.9284210205078125, 0.9713134765625, 1.0142059326171875, 1.057098388671875, 1.0999908447265625, 1.14288330078125, 1.1857757568359375, 1.228668212890625, 1.2715606689453125, 1.314453125]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 5.0, 2.0, 3.0, 2.0, 6.0, 5.0, 7.0, 10.0, 7.0, 10.0, 10.0, 29.0, 21.0, 41.0, 32.0, 47.0, 63.0, 84.0, 65.0, 75.0, 83.0, 82.0, 85.0, 54.0, 45.0, 31.0, 26.0, 19.0, 10.0, 8.0, 6.0, 7.0, 5.0, 3.0, 5.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0034961700439453125, -0.0033777058124542236, -0.0032592415809631348, -0.003140777349472046, -0.003022313117980957, -0.002903848886489868, -0.0027853846549987793, -0.0026669204235076904, -0.0025484561920166016, -0.0024299919605255127, -0.002311527729034424, -0.002193063497543335, -0.002074599266052246, -0.0019561350345611572, -0.0018376708030700684, -0.0017192065715789795, -0.0016007423400878906, -0.0014822781085968018, -0.0013638138771057129, -0.001245349645614624, -0.0011268854141235352, -0.0010084211826324463, -0.0008899569511413574, -0.0007714927196502686, -0.0006530284881591797, -0.0005345642566680908, -0.00041610002517700195, -0.0002976357936859131, -0.00017917156219482422, -6.070733070373535e-05, 5.7756900787353516e-05, 0.00017622113227844238, 0.00029468536376953125, 0.0004131495952606201, 0.000531613826751709, 0.0006500780582427979, 0.0007685422897338867, 0.0008870065212249756, 0.0010054707527160645, 0.0011239349842071533, 0.0012423992156982422, 0.001360863447189331, 0.00147932767868042, 0.0015977919101715088, 0.0017162561416625977, 0.0018347203731536865, 0.0019531846046447754, 0.0020716488361358643, 0.002190113067626953, 0.002308577299118042, 0.002427041530609131, 0.0025455057621002197, 0.0026639699935913086, 0.0027824342250823975, 0.0029008984565734863, 0.003019362688064575, 0.003137826919555664, 0.003256291151046753, 0.003374755382537842, 0.0034932196140289307, 0.0036116838455200195, 0.0037301480770111084, 0.0038486123085021973, 0.003967076539993286, 0.004085540771484375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 3.0, 4.0, 5.0, 5.0, 6.0, 5.0, 10.0, 6.0, 13.0, 19.0, 23.0, 37.0, 47.0, 55.0, 107.0, 205.0, 477.0, 6743.0, 1038206.0, 1743.0, 334.0, 156.0, 86.0, 53.0, 41.0, 44.0, 21.0, 26.0, 16.0, 13.0, 4.0, 11.0, 7.0, 4.0, 5.0, 3.0, 4.0, 1.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.09490966796875, -0.0921478271484375, -0.089385986328125, -0.0866241455078125, -0.0838623046875, -0.0811004638671875, -0.078338623046875, -0.0755767822265625, -0.07281494140625, -0.0700531005859375, -0.067291259765625, -0.0645294189453125, -0.061767578125, -0.0590057373046875, -0.056243896484375, -0.0534820556640625, -0.05072021484375, -0.0479583740234375, -0.045196533203125, -0.0424346923828125, -0.0396728515625, -0.0369110107421875, -0.034149169921875, -0.0313873291015625, -0.02862548828125, -0.0258636474609375, -0.023101806640625, -0.0203399658203125, -0.017578125, -0.0148162841796875, -0.012054443359375, -0.0092926025390625, -0.00653076171875, -0.0037689208984375, -0.001007080078125, 0.0017547607421875, 0.0045166015625, 0.0072784423828125, 0.010040283203125, 0.0128021240234375, 0.01556396484375, 0.0183258056640625, 0.021087646484375, 0.0238494873046875, 0.026611328125, 0.0293731689453125, 0.032135009765625, 0.0348968505859375, 0.03765869140625, 0.0404205322265625, 0.043182373046875, 0.0459442138671875, 0.0487060546875, 0.0514678955078125, 0.054229736328125, 0.0569915771484375, 0.05975341796875, 0.0625152587890625, 0.065277099609375, 0.0680389404296875, 0.07080078125, 0.0735626220703125, 0.076324462890625, 0.0790863037109375, 0.08184814453125]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 7.0, 59.0, 341.0, 488.0, 106.0, 13.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0021171297412365675, -0.0019076892640441656, -0.0016982487868517637, -0.00148880819324404, -0.0012793677160516381, -0.0010699272388592362, -0.0008604866452515125, -0.0006510461680591106, -0.00044160569086670876, -0.0002321651845704764, -2.272467827424407e-05, 0.00018671585712581873, 0.0003961563343182206, 0.0006055968115106225, 0.0008150374051183462, 0.001024477882310748, 0.00123391835950315, 0.0014433588366955519, 0.0016527993138879538, 0.0018622399074956775, 0.002071680501103401, 0.002281120978295803, 0.002490561455488205, 0.002700001932680607, 0.0029094424098730087, 0.0031188828870654106, 0.0033283233642578125, 0.0035377638414502144, 0.0037472043186426163, 0.003956644795835018, 0.00416608527302742, 0.004375525750219822, 0.004584966227412224, 0.004794406704604626, 0.005003847181797028, 0.0052132876589894295, 0.005422728136181831, 0.005632168613374233, 0.005841609090566635, 0.006051049567759037, 0.006260490044951439, 0.006469930522143841, 0.006679370999336243, 0.0068888114765286446, 0.0070982519537210464, 0.007307692430913448, 0.00751713290810585, 0.007726573385298252, 0.007936013862490654, 0.008145454339683056, 0.008354894816875458, 0.00856433529406786, 0.008773775771260262, 0.008983216248452663, 0.009192656725645065, 0.009402097202837467, 0.009611538611352444, 0.009820979088544846, 0.010030419565737247, 0.01023986004292965, 0.010449300520122051, 0.010658740997314453, 0.010868181474506855, 0.011077621951699257, 0.011287062428891659]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 5.0, 4.0, 6.0, 6.0, 9.0, 3.0, 10.0, 15.0, 17.0, 21.0, 17.0, 15.0, 14.0, 25.0, 21.0, 36.0, 25.0, 36.0, 25.0, 33.0, 37.0, 29.0, 47.0, 44.0, 35.0, 42.0, 40.0, 39.0, 38.0, 38.0, 22.0, 36.0, 28.0, 27.0, 22.0, 17.0, 18.0, 22.0, 13.0, 13.0, 10.0, 13.0, 7.0, 8.0, 9.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0014216899871826172, -0.0013732193037867546, -0.001324748620390892, -0.0012762779369950294, -0.0012278072535991669, -0.0011793365702033043, -0.0011308658868074417, -0.0010823952034115791, -0.0010339245200157166, -0.000985453836619854, -0.0009369831532239914, -0.0008885124698281288, -0.0008400417864322662, -0.0007915711030364037, -0.0007431004196405411, -0.0006946297362446785, -0.0006461590528488159, -0.0005976883694529533, -0.0005492176860570908, -0.0005007470026612282, -0.0004522763192653656, -0.000403805635869503, -0.00035533495247364044, -0.00030686426907777786, -0.0002583935856819153, -0.0002099229022860527, -0.00016145221889019012, -0.00011298153549432755, -6.451085209846497e-05, -1.6040168702602386e-05, 3.243051469326019e-05, 8.090119808912277e-05, 0.00012937188148498535, 0.00017784256488084793, 0.0002263132482767105, 0.0002747839316725731, 0.00032325461506843567, 0.00037172529846429825, 0.00042019598186016083, 0.0004686666652560234, 0.000517137348651886, 0.0005656080320477486, 0.0006140787154436111, 0.0006625493988394737, 0.0007110200822353363, 0.0007594907656311989, 0.0008079614490270615, 0.000856432132422924, 0.0009049028158187866, 0.0009533734992146492, 0.0010018441826105118, 0.0010503148660063744, 0.001098785549402237, 0.0011472562327980995, 0.001195726916193962, 0.0012441975995898247, 0.0012926682829856873, 0.0013411389663815498, 0.0013896096497774124, 0.001438080333173275, 0.0014865510165691376, 0.0015350216999650002, 0.0015834923833608627, 0.0016319630667567253, 0.0016804337501525879]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 8.0, 8.0, 4.0, 11.0, 17.0, 16.0, 19.0, 21.0, 11.0, 26.0, 20.0, 30.0, 28.0, 35.0, 36.0, 42.0, 44.0, 35.0, 45.0, 40.0, 44.0, 52.0, 41.0, 37.0, 40.0, 33.0, 32.0, 28.0, 25.0, 22.0, 24.0, 19.0, 18.0, 13.0, 13.0, 12.0, 12.0, 8.0, 7.0, 5.0, 10.0, 6.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-12.9609375, -12.5570068359375, -12.153076171875, -11.7491455078125, -11.34521484375, -10.9412841796875, -10.537353515625, -10.1334228515625, -9.7294921875, -9.3255615234375, -8.921630859375, -8.5177001953125, -8.11376953125, -7.7098388671875, -7.305908203125, -6.9019775390625, -6.498046875, -6.0941162109375, -5.690185546875, -5.2862548828125, -4.88232421875, -4.4783935546875, -4.074462890625, -3.6705322265625, -3.2666015625, -2.8626708984375, -2.458740234375, -2.0548095703125, -1.65087890625, -1.2469482421875, -0.843017578125, -0.4390869140625, -0.03515625, 0.3687744140625, 0.772705078125, 1.1766357421875, 1.58056640625, 1.9844970703125, 2.388427734375, 2.7923583984375, 3.1962890625, 3.6002197265625, 4.004150390625, 4.4080810546875, 4.81201171875, 5.2159423828125, 5.619873046875, 6.0238037109375, 6.427734375, 6.8316650390625, 7.235595703125, 7.6395263671875, 8.04345703125, 8.4473876953125, 8.851318359375, 9.2552490234375, 9.6591796875, 10.0631103515625, 10.467041015625, 10.8709716796875, 11.27490234375, 11.6788330078125, 12.082763671875, 12.4866943359375, 12.890625]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 5.0, 4.0, 7.0, 9.0, 16.0, 15.0, 17.0, 20.0, 33.0, 50.0, 85.0, 141.0, 252.0, 477.0, 889.0, 1764.0, 3654.0, 7354.0, 15216.0, 33192.0, 79677.0, 222485.0, 417672.0, 156461.0, 59582.0, 25777.0, 11995.0, 5749.0, 2832.0, 1420.0, 720.0, 388.0, 230.0, 101.0, 80.0, 59.0, 34.0, 18.0, 20.0, 19.0, 17.0, 5.0, 4.0, 3.0, 0.0, 6.0, 3.0, 1.0, 0.0, 0.0, 4.0, 1.0, 1.0, 1.0], "bins": [-8.5390625, -8.2750244140625, -8.010986328125, -7.7469482421875, -7.48291015625, -7.2188720703125, -6.954833984375, -6.6907958984375, -6.4267578125, -6.1627197265625, -5.898681640625, -5.6346435546875, -5.37060546875, -5.1065673828125, -4.842529296875, -4.5784912109375, -4.314453125, -4.0504150390625, -3.786376953125, -3.5223388671875, -3.25830078125, -2.9942626953125, -2.730224609375, -2.4661865234375, -2.2021484375, -1.9381103515625, -1.674072265625, -1.4100341796875, -1.14599609375, -0.8819580078125, -0.617919921875, -0.3538818359375, -0.08984375, 0.1741943359375, 0.438232421875, 0.7022705078125, 0.96630859375, 1.2303466796875, 1.494384765625, 1.7584228515625, 2.0224609375, 2.2864990234375, 2.550537109375, 2.8145751953125, 3.07861328125, 3.3426513671875, 3.606689453125, 3.8707275390625, 4.134765625, 4.3988037109375, 4.662841796875, 4.9268798828125, 5.19091796875, 5.4549560546875, 5.718994140625, 5.9830322265625, 6.2470703125, 6.5111083984375, 6.775146484375, 7.0391845703125, 7.30322265625, 7.5672607421875, 7.831298828125, 8.0953369140625, 8.359375]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 2.0, 4.0, 5.0, 1.0, 6.0, 7.0, 8.0, 11.0, 12.0, 11.0, 9.0, 21.0, 27.0, 22.0, 35.0, 43.0, 46.0, 40.0, 36.0, 55.0, 81.0, 307.0, 1676.0, 119.0, 69.0, 61.0, 45.0, 44.0, 38.0, 34.0, 30.0, 27.0, 22.0, 24.0, 15.0, 14.0, 11.0, 8.0, 9.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-53.25, -51.6484375, -50.046875, -48.4453125, -46.84375, -45.2421875, -43.640625, -42.0390625, -40.4375, -38.8359375, -37.234375, -35.6328125, -34.03125, -32.4296875, -30.828125, -29.2265625, -27.625, -26.0234375, -24.421875, -22.8203125, -21.21875, -19.6171875, -18.015625, -16.4140625, -14.8125, -13.2109375, -11.609375, -10.0078125, -8.40625, -6.8046875, -5.203125, -3.6015625, -2.0, -0.3984375, 1.203125, 2.8046875, 4.40625, 6.0078125, 7.609375, 9.2109375, 10.8125, 12.4140625, 14.015625, 15.6171875, 17.21875, 18.8203125, 20.421875, 22.0234375, 23.625, 25.2265625, 26.828125, 28.4296875, 30.03125, 31.6328125, 33.234375, 34.8359375, 36.4375, 38.0390625, 39.640625, 41.2421875, 42.84375, 44.4453125, 46.046875, 47.6484375, 49.25]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 7.0, 6.0, 10.0, 10.0, 16.0, 18.0, 26.0, 39.0, 39.0, 44.0, 82.0, 93.0, 128.0, 172.0, 256.0, 327.0, 813.0, 15049.0, 3118245.0, 8408.0, 732.0, 356.0, 202.0, 155.0, 108.0, 96.0, 54.0, 48.0, 37.0, 28.0, 23.0, 12.0, 14.0, 10.0, 10.0, 7.0, 5.0, 4.0, 1.0, 6.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-122.6875, -118.7861328125, -114.884765625, -110.9833984375, -107.08203125, -103.1806640625, -99.279296875, -95.3779296875, -91.4765625, -87.5751953125, -83.673828125, -79.7724609375, -75.87109375, -71.9697265625, -68.068359375, -64.1669921875, -60.265625, -56.3642578125, -52.462890625, -48.5615234375, -44.66015625, -40.7587890625, -36.857421875, -32.9560546875, -29.0546875, -25.1533203125, -21.251953125, -17.3505859375, -13.44921875, -9.5478515625, -5.646484375, -1.7451171875, 2.15625, 6.0576171875, 9.958984375, 13.8603515625, 17.76171875, 21.6630859375, 25.564453125, 29.4658203125, 33.3671875, 37.2685546875, 41.169921875, 45.0712890625, 48.97265625, 52.8740234375, 56.775390625, 60.6767578125, 64.578125, 68.4794921875, 72.380859375, 76.2822265625, 80.18359375, 84.0849609375, 87.986328125, 91.8876953125, 95.7890625, 99.6904296875, 103.591796875, 107.4931640625, 111.39453125, 115.2958984375, 119.197265625, 123.0986328125, 127.0]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 15.0, 40.0, 119.0, 201.0, 240.0, 223.0, 112.0, 41.0, 17.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-60.34346008300781, -58.22357177734375, -56.10367965698242, -53.98379135131836, -51.86389923095703, -49.74401092529297, -47.624122619628906, -45.50423049926758, -43.38433837890625, -41.26445007324219, -39.14455795288086, -37.0246696472168, -34.90477752685547, -32.784889221191406, -30.66499900817871, -28.545108795166016, -26.425220489501953, -24.305330276489258, -22.185440063476562, -20.0655517578125, -17.945659637451172, -15.825770378112793, -13.705881118774414, -11.585990905761719, -9.466100692749023, -7.346210479736328, -5.226320743560791, -3.106431007385254, -0.9865407943725586, 1.1333494186401367, 3.2532386779785156, 5.373128890991211, 7.493019104003906, 9.612909317016602, 11.732799530029297, 13.852688789367676, 15.972579002380371, 18.09246826171875, 20.212358474731445, 22.33224868774414, 24.452138900756836, 26.57202911376953, 28.691919326782227, 30.811809539794922, 32.931697845458984, 35.05158996582031, 37.171478271484375, 39.29136657714844, 41.411258697509766, 43.53114700317383, 45.651039123535156, 47.77092742919922, 49.89081954956055, 52.01070785522461, 54.13059997558594, 56.25048828125, 58.37037658691406, 60.490264892578125, 62.61015701293945, 64.73004913330078, 66.84993743896484, 68.9698257446289, 71.08971405029297, 73.20960998535156, 75.32949829101562]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 3.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 5.0, 4.0, 8.0, 15.0, 14.0, 6.0, 13.0, 16.0, 19.0, 22.0, 26.0, 19.0, 18.0, 19.0, 30.0, 29.0, 27.0, 30.0, 26.0, 42.0, 38.0, 34.0, 28.0, 41.0, 43.0, 40.0, 35.0, 31.0, 22.0, 29.0, 28.0, 24.0, 24.0, 30.0, 20.0, 16.0, 22.0, 23.0, 12.0, 11.0, 10.0, 13.0, 8.0, 7.0, 6.0, 2.0, 4.0, 2.0, 4.0, 4.0, 1.0, 0.0, 3.0, 1.0], "bins": [-103.20541381835938, -100.02490234375, -96.84439849853516, -93.66388702392578, -90.4833755493164, -87.30287170410156, -84.12236022949219, -80.94184875488281, -77.76133728027344, -74.58082580566406, -71.40032196044922, -68.21981048583984, -65.03929901123047, -61.85879135131836, -58.67828369140625, -55.497772216796875, -52.31726837158203, -49.13676071166992, -45.95624923706055, -42.77574157714844, -39.59523010253906, -36.41472244262695, -33.234214782714844, -30.0537052154541, -26.87319564819336, -23.692686080932617, -20.512176513671875, -17.331668853759766, -14.151159286499023, -10.970649719238281, -7.790142059326172, -4.60963249206543, -1.4291229248046875, 1.7513861656188965, 4.9318952560424805, 8.112403869628906, 11.292913436889648, 14.47342300415039, 17.6539306640625, 20.834440231323242, 24.014949798583984, 27.195459365844727, 30.37596893310547, 33.55647659301758, 36.73698425292969, 39.91749572753906, 43.09800338745117, 46.27851104736328, 49.459022521972656, 52.639530181884766, 55.82004165649414, 59.00054931640625, 62.181060791015625, 65.361572265625, 68.54207611083984, 71.72258758544922, 74.90309143066406, 78.08360290527344, 81.26410675048828, 84.44461822509766, 87.62512969970703, 90.80563354492188, 93.98614501953125, 97.16665649414062, 100.34716796875]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 4.0, 7.0, 8.0, 11.0, 12.0, 18.0, 10.0, 16.0, 15.0, 21.0, 26.0, 23.0, 24.0, 31.0, 35.0, 32.0, 37.0, 42.0, 37.0, 45.0, 44.0, 40.0, 38.0, 35.0, 41.0, 41.0, 46.0, 27.0, 24.0, 29.0, 28.0, 27.0, 18.0, 14.0, 14.0, 9.0, 12.0, 12.0, 10.0, 7.0, 9.0, 7.0, 4.0, 5.0, 3.0, 1.0, 1.0, 4.0, 0.0, 1.0, 2.0, 2.0], "bins": [-13.4140625, -13.0093994140625, -12.604736328125, -12.2000732421875, -11.79541015625, -11.3907470703125, -10.986083984375, -10.5814208984375, -10.1767578125, -9.7720947265625, -9.367431640625, -8.9627685546875, -8.55810546875, -8.1534423828125, -7.748779296875, -7.3441162109375, -6.939453125, -6.5347900390625, -6.130126953125, -5.7254638671875, -5.32080078125, -4.9161376953125, -4.511474609375, -4.1068115234375, -3.7021484375, -3.2974853515625, -2.892822265625, -2.4881591796875, -2.08349609375, -1.6788330078125, -1.274169921875, -0.8695068359375, -0.46484375, -0.0601806640625, 0.344482421875, 0.7491455078125, 1.15380859375, 1.5584716796875, 1.963134765625, 2.3677978515625, 2.7724609375, 3.1771240234375, 3.581787109375, 3.9864501953125, 4.39111328125, 4.7957763671875, 5.200439453125, 5.6051025390625, 6.009765625, 6.4144287109375, 6.819091796875, 7.2237548828125, 7.62841796875, 8.0330810546875, 8.437744140625, 8.8424072265625, 9.2470703125, 9.6517333984375, 10.056396484375, 10.4610595703125, 10.86572265625, 11.2703857421875, 11.675048828125, 12.0797119140625, 12.484375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 5.0, 5.0, 6.0, 9.0, 15.0, 20.0, 10.0, 15.0, 13.0, 21.0, 20.0, 26.0, 43.0, 56.0, 84.0, 153.0, 318.0, 860.0, 4433.0, 67469.0, 2500117.0, 1582538.0, 33878.0, 2754.0, 677.0, 276.0, 124.0, 75.0, 54.0, 36.0, 31.0, 18.0, 21.0, 16.0, 14.0, 15.0, 11.0, 11.0, 9.0, 7.0, 6.0, 4.0, 2.0, 5.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-47.1875, -45.75341796875, -44.3193359375, -42.88525390625, -41.451171875, -40.01708984375, -38.5830078125, -37.14892578125, -35.71484375, -34.28076171875, -32.8466796875, -31.41259765625, -29.978515625, -28.54443359375, -27.1103515625, -25.67626953125, -24.2421875, -22.80810546875, -21.3740234375, -19.93994140625, -18.505859375, -17.07177734375, -15.6376953125, -14.20361328125, -12.76953125, -11.33544921875, -9.9013671875, -8.46728515625, -7.033203125, -5.59912109375, -4.1650390625, -2.73095703125, -1.296875, 0.13720703125, 1.5712890625, 3.00537109375, 4.439453125, 5.87353515625, 7.3076171875, 8.74169921875, 10.17578125, 11.60986328125, 13.0439453125, 14.47802734375, 15.912109375, 17.34619140625, 18.7802734375, 20.21435546875, 21.6484375, 23.08251953125, 24.5166015625, 25.95068359375, 27.384765625, 28.81884765625, 30.2529296875, 31.68701171875, 33.12109375, 34.55517578125, 35.9892578125, 37.42333984375, 38.857421875, 40.29150390625, 41.7255859375, 43.15966796875, 44.59375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 4.0, 4.0, 8.0, 7.0, 9.0, 11.0, 14.0, 18.0, 28.0, 24.0, 51.0, 56.0, 86.0, 101.0, 154.0, 178.0, 235.0, 284.0, 368.0, 411.0, 418.0, 381.0, 304.0, 222.0, 157.0, 123.0, 94.0, 76.0, 56.0, 44.0, 28.0, 25.0, 15.0, 23.0, 20.0, 9.0, 6.0, 5.0, 6.0, 5.0, 3.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-21.875, -21.2119140625, -20.548828125, -19.8857421875, -19.22265625, -18.5595703125, -17.896484375, -17.2333984375, -16.5703125, -15.9072265625, -15.244140625, -14.5810546875, -13.91796875, -13.2548828125, -12.591796875, -11.9287109375, -11.265625, -10.6025390625, -9.939453125, -9.2763671875, -8.61328125, -7.9501953125, -7.287109375, -6.6240234375, -5.9609375, -5.2978515625, -4.634765625, -3.9716796875, -3.30859375, -2.6455078125, -1.982421875, -1.3193359375, -0.65625, 0.0068359375, 0.669921875, 1.3330078125, 1.99609375, 2.6591796875, 3.322265625, 3.9853515625, 4.6484375, 5.3115234375, 5.974609375, 6.6376953125, 7.30078125, 7.9638671875, 8.626953125, 9.2900390625, 9.953125, 10.6162109375, 11.279296875, 11.9423828125, 12.60546875, 13.2685546875, 13.931640625, 14.5947265625, 15.2578125, 15.9208984375, 16.583984375, 17.2470703125, 17.91015625, 18.5732421875, 19.236328125, 19.8994140625, 20.5625]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 0.0, 6.0, 3.0, 6.0, 5.0, 6.0, 9.0, 12.0, 11.0, 18.0, 24.0, 26.0, 38.0, 34.0, 39.0, 56.0, 67.0, 74.0, 111.0, 130.0, 153.0, 181.0, 276.0, 396.0, 973.0, 15758.0, 3856875.0, 314365.0, 2575.0, 520.0, 326.0, 236.0, 203.0, 140.0, 138.0, 105.0, 82.0, 55.0, 53.0, 37.0, 36.0, 24.0, 18.0, 22.0, 21.0, 12.0, 14.0, 7.0, 8.0, 3.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-97.8125, -94.6318359375, -91.451171875, -88.2705078125, -85.08984375, -81.9091796875, -78.728515625, -75.5478515625, -72.3671875, -69.1865234375, -66.005859375, -62.8251953125, -59.64453125, -56.4638671875, -53.283203125, -50.1025390625, -46.921875, -43.7412109375, -40.560546875, -37.3798828125, -34.19921875, -31.0185546875, -27.837890625, -24.6572265625, -21.4765625, -18.2958984375, -15.115234375, -11.9345703125, -8.75390625, -5.5732421875, -2.392578125, 0.7880859375, 3.96875, 7.1494140625, 10.330078125, 13.5107421875, 16.69140625, 19.8720703125, 23.052734375, 26.2333984375, 29.4140625, 32.5947265625, 35.775390625, 38.9560546875, 42.13671875, 45.3173828125, 48.498046875, 51.6787109375, 54.859375, 58.0400390625, 61.220703125, 64.4013671875, 67.58203125, 70.7626953125, 73.943359375, 77.1240234375, 80.3046875, 83.4853515625, 86.666015625, 89.8466796875, 93.02734375, 96.2080078125, 99.388671875, 102.5693359375, 105.75]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 29.0, 328.0, 537.0, 118.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-436.3837890625, -421.9413757324219, -407.4989318847656, -393.0565185546875, -378.6141052246094, -364.1716613769531, -349.729248046875, -335.28680419921875, -320.8443908691406, -306.4019775390625, -291.95953369140625, -277.5171203613281, -263.07470703125, -248.63226318359375, -234.18984985351562, -219.74742126464844, -205.3050079345703, -190.86257934570312, -176.420166015625, -161.9777374267578, -147.53530883789062, -133.0928955078125, -118.65046691894531, -104.20803833007812, -89.76561737060547, -75.32319641113281, -60.880767822265625, -46.43834686279297, -31.995922088623047, -17.553497314453125, -3.1110763549804688, 11.331352233886719, 25.773773193359375, 40.2161979675293, 54.65862274169922, 69.10104370117188, 83.54347229003906, 97.98589324951172, 112.42831420898438, 126.87074279785156, 141.31317138671875, 155.75559997558594, 170.19801330566406, 184.64044189453125, 199.08287048339844, 213.52529907226562, 227.96771240234375, 242.41014099121094, 256.8525390625, 271.2949523925781, 285.7373962402344, 300.1798095703125, 314.6222229003906, 329.0646667480469, 343.507080078125, 357.94952392578125, 372.3919372558594, 386.8343505859375, 401.27679443359375, 415.7192077636719, 430.16162109375, 444.60406494140625, 459.0464782714844, 473.4888916015625, 487.93133544921875]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 5.0, 13.0, 5.0, 5.0, 7.0, 9.0, 10.0, 12.0, 17.0, 18.0, 17.0, 21.0, 21.0, 24.0, 32.0, 25.0, 31.0, 24.0, 31.0, 30.0, 40.0, 44.0, 44.0, 44.0, 37.0, 30.0, 30.0, 38.0, 31.0, 20.0, 32.0, 37.0, 28.0, 21.0, 31.0, 13.0, 13.0, 22.0, 16.0, 11.0, 11.0, 13.0, 9.0, 3.0, 2.0, 13.0, 4.0, 3.0, 2.0, 5.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-69.1353759765625, -66.96306610107422, -64.79075622558594, -62.618446350097656, -60.446136474609375, -58.273826599121094, -56.10151672363281, -53.92920684814453, -51.75689697265625, -49.58458709716797, -47.41227722167969, -45.239967346191406, -43.067657470703125, -40.895347595214844, -38.72303771972656, -36.55072784423828, -34.37841796875, -32.20610809326172, -30.033798217773438, -27.861488342285156, -25.689178466796875, -23.516868591308594, -21.344558715820312, -19.17224884033203, -16.99993896484375, -14.827629089355469, -12.655319213867188, -10.483009338378906, -8.310699462890625, -6.138389587402344, -3.9660797119140625, -1.7937698364257812, 0.3785400390625, 2.5508499145507812, 4.7231597900390625, 6.895469665527344, 9.067779541015625, 11.240089416503906, 13.412399291992188, 15.584709167480469, 17.75701904296875, 19.92932891845703, 22.101638793945312, 24.273948669433594, 26.446258544921875, 28.618568420410156, 30.790878295898438, 32.96318817138672, 35.135498046875, 37.30780792236328, 39.48011779785156, 41.652427673339844, 43.824737548828125, 45.997047424316406, 48.16935729980469, 50.34166717529297, 52.51397705078125, 54.68628692626953, 56.85859680175781, 59.030906677246094, 61.203216552734375, 63.375526428222656, 65.54783630371094, 67.72014617919922, 69.8924560546875]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 5.0, 7.0, 4.0, 7.0, 7.0, 14.0, 16.0, 17.0, 7.0, 19.0, 21.0, 22.0, 22.0, 28.0, 27.0, 39.0, 27.0, 34.0, 33.0, 40.0, 47.0, 42.0, 45.0, 49.0, 43.0, 31.0, 36.0, 40.0, 30.0, 37.0, 34.0, 25.0, 17.0, 21.0, 19.0, 10.0, 12.0, 12.0, 12.0, 13.0, 9.0, 8.0, 3.0, 3.0, 6.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-12.71875, -12.3233642578125, -11.927978515625, -11.5325927734375, -11.13720703125, -10.7418212890625, -10.346435546875, -9.9510498046875, -9.5556640625, -9.1602783203125, -8.764892578125, -8.3695068359375, -7.97412109375, -7.5787353515625, -7.183349609375, -6.7879638671875, -6.392578125, -5.9971923828125, -5.601806640625, -5.2064208984375, -4.81103515625, -4.4156494140625, -4.020263671875, -3.6248779296875, -3.2294921875, -2.8341064453125, -2.438720703125, -2.0433349609375, -1.64794921875, -1.2525634765625, -0.857177734375, -0.4617919921875, -0.06640625, 0.3289794921875, 0.724365234375, 1.1197509765625, 1.51513671875, 1.9105224609375, 2.305908203125, 2.7012939453125, 3.0966796875, 3.4920654296875, 3.887451171875, 4.2828369140625, 4.67822265625, 5.0736083984375, 5.468994140625, 5.8643798828125, 6.259765625, 6.6551513671875, 7.050537109375, 7.4459228515625, 7.84130859375, 8.2366943359375, 8.632080078125, 9.0274658203125, 9.4228515625, 9.8182373046875, 10.213623046875, 10.6090087890625, 11.00439453125, 11.3997802734375, 11.795166015625, 12.1905517578125, 12.5859375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 7.0, 6.0, 3.0, 10.0, 14.0, 18.0, 37.0, 58.0, 81.0, 91.0, 161.0, 260.0, 354.0, 619.0, 966.0, 1432.0, 2381.0, 3702.0, 5904.0, 9080.0, 14620.0, 23678.0, 38278.0, 61334.0, 97455.0, 155719.0, 213661.0, 156526.0, 98200.0, 61379.0, 38412.0, 23832.0, 14857.0, 9324.0, 5924.0, 3727.0, 2312.0, 1480.0, 938.0, 615.0, 385.0, 253.0, 177.0, 101.0, 57.0, 42.0, 32.0, 21.0, 14.0, 17.0, 6.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.5048828125, -1.458587646484375, -1.41229248046875, -1.365997314453125, -1.3197021484375, -1.273406982421875, -1.22711181640625, -1.180816650390625, -1.134521484375, -1.088226318359375, -1.04193115234375, -0.995635986328125, -0.9493408203125, -0.903045654296875, -0.85675048828125, -0.810455322265625, -0.76416015625, -0.717864990234375, -0.67156982421875, -0.625274658203125, -0.5789794921875, -0.532684326171875, -0.48638916015625, -0.440093994140625, -0.393798828125, -0.347503662109375, -0.30120849609375, -0.254913330078125, -0.2086181640625, -0.162322998046875, -0.11602783203125, -0.069732666015625, -0.0234375, 0.022857666015625, 0.06915283203125, 0.115447998046875, 0.1617431640625, 0.208038330078125, 0.25433349609375, 0.300628662109375, 0.346923828125, 0.393218994140625, 0.43951416015625, 0.485809326171875, 0.5321044921875, 0.578399658203125, 0.62469482421875, 0.670989990234375, 0.71728515625, 0.763580322265625, 0.80987548828125, 0.856170654296875, 0.9024658203125, 0.948760986328125, 0.99505615234375, 1.041351318359375, 1.087646484375, 1.133941650390625, 1.18023681640625, 1.226531982421875, 1.2728271484375, 1.319122314453125, 1.36541748046875, 1.411712646484375, 1.4580078125]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 9.0, 8.0, 6.0, 15.0, 17.0, 23.0, 19.0, 24.0, 23.0, 29.0, 33.0, 32.0, 33.0, 36.0, 33.0, 35.0, 39.0, 50.0, 1073.0, 31.0, 39.0, 38.0, 34.0, 29.0, 30.0, 39.0, 29.0, 31.0, 24.0, 24.0, 23.0, 22.0, 15.0, 11.0, 19.0, 13.0, 6.0, 5.0, 6.0, 5.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-8.109375, -7.853759765625, -7.59814453125, -7.342529296875, -7.0869140625, -6.831298828125, -6.57568359375, -6.320068359375, -6.064453125, -5.808837890625, -5.55322265625, -5.297607421875, -5.0419921875, -4.786376953125, -4.53076171875, -4.275146484375, -4.01953125, -3.763916015625, -3.50830078125, -3.252685546875, -2.9970703125, -2.741455078125, -2.48583984375, -2.230224609375, -1.974609375, -1.718994140625, -1.46337890625, -1.207763671875, -0.9521484375, -0.696533203125, -0.44091796875, -0.185302734375, 0.0703125, 0.325927734375, 0.58154296875, 0.837158203125, 1.0927734375, 1.348388671875, 1.60400390625, 1.859619140625, 2.115234375, 2.370849609375, 2.62646484375, 2.882080078125, 3.1376953125, 3.393310546875, 3.64892578125, 3.904541015625, 4.16015625, 4.415771484375, 4.67138671875, 4.927001953125, 5.1826171875, 5.438232421875, 5.69384765625, 5.949462890625, 6.205078125, 6.460693359375, 6.71630859375, 6.971923828125, 7.2275390625, 7.483154296875, 7.73876953125, 7.994384765625, 8.25]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 5.0, 7.0, 10.0, 10.0, 13.0, 30.0, 39.0, 51.0, 108.0, 141.0, 223.0, 356.0, 612.0, 1009.0, 1497.0, 2445.0, 3900.0, 6331.0, 10435.0, 17106.0, 27743.0, 45247.0, 74043.0, 123632.0, 233277.0, 1223987.0, 127329.0, 76641.0, 46414.0, 28955.0, 17510.0, 10765.0, 6598.0, 4001.0, 2538.0, 1505.0, 964.0, 617.0, 405.0, 231.0, 138.0, 100.0, 57.0, 37.0, 27.0, 17.0, 12.0, 14.0, 4.0, 5.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5126953125, -1.4654693603515625, -1.418243408203125, -1.3710174560546875, -1.32379150390625, -1.2765655517578125, -1.229339599609375, -1.1821136474609375, -1.1348876953125, -1.0876617431640625, -1.040435791015625, -0.9932098388671875, -0.94598388671875, -0.8987579345703125, -0.851531982421875, -0.8043060302734375, -0.757080078125, -0.7098541259765625, -0.662628173828125, -0.6154022216796875, -0.56817626953125, -0.5209503173828125, -0.473724365234375, -0.4264984130859375, -0.3792724609375, -0.3320465087890625, -0.284820556640625, -0.2375946044921875, -0.19036865234375, -0.1431427001953125, -0.095916748046875, -0.0486907958984375, -0.00146484375, 0.0457611083984375, 0.092987060546875, 0.1402130126953125, 0.18743896484375, 0.2346649169921875, 0.281890869140625, 0.3291168212890625, 0.3763427734375, 0.4235687255859375, 0.470794677734375, 0.5180206298828125, 0.56524658203125, 0.6124725341796875, 0.659698486328125, 0.7069244384765625, 0.754150390625, 0.8013763427734375, 0.848602294921875, 0.8958282470703125, 0.94305419921875, 0.9902801513671875, 1.037506103515625, 1.0847320556640625, 1.1319580078125, 1.1791839599609375, 1.226409912109375, 1.2736358642578125, 1.32086181640625, 1.3680877685546875, 1.415313720703125, 1.4625396728515625, 1.509765625]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 3.0, 2.0, 7.0, 4.0, 9.0, 10.0, 4.0, 12.0, 7.0, 11.0, 11.0, 27.0, 29.0, 31.0, 50.0, 73.0, 72.0, 77.0, 72.0, 76.0, 66.0, 61.0, 63.0, 52.0, 30.0, 27.0, 21.0, 19.0, 17.0, 13.0, 13.0, 9.0, 11.0, 2.0, 4.0, 1.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0032958984375, -0.003184497356414795, -0.00307309627532959, -0.0029616951942443848, -0.0028502941131591797, -0.0027388930320739746, -0.0026274919509887695, -0.0025160908699035645, -0.0024046897888183594, -0.0022932887077331543, -0.0021818876266479492, -0.002070486545562744, -0.001959085464477539, -0.001847684383392334, -0.001736283302307129, -0.0016248822212219238, -0.0015134811401367188, -0.0014020800590515137, -0.0012906789779663086, -0.0011792778968811035, -0.0010678768157958984, -0.0009564757347106934, -0.0008450746536254883, -0.0007336735725402832, -0.0006222724914550781, -0.000510871410369873, -0.00039947032928466797, -0.0002880692481994629, -0.0001766681671142578, -6.526708602905273e-05, 4.6133995056152344e-05, 0.00015753507614135742, 0.0002689361572265625, 0.0003803372383117676, 0.0004917383193969727, 0.0006031394004821777, 0.0007145404815673828, 0.0008259415626525879, 0.000937342643737793, 0.001048743724822998, 0.0011601448059082031, 0.0012715458869934082, 0.0013829469680786133, 0.0014943480491638184, 0.0016057491302490234, 0.0017171502113342285, 0.0018285512924194336, 0.0019399523735046387, 0.0020513534545898438, 0.002162754535675049, 0.002274155616760254, 0.002385556697845459, 0.002496957778930664, 0.002608358860015869, 0.0027197599411010742, 0.0028311610221862793, 0.0029425621032714844, 0.0030539631843566895, 0.0031653642654418945, 0.0032767653465270996, 0.0033881664276123047, 0.0034995675086975098, 0.003610968589782715, 0.00372236967086792, 0.003833770751953125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 4.0, 0.0, 6.0, 5.0, 13.0, 13.0, 16.0, 16.0, 30.0, 23.0, 44.0, 58.0, 89.0, 149.0, 306.0, 991.0, 895111.0, 150326.0, 741.0, 249.0, 105.0, 78.0, 45.0, 33.0, 25.0, 13.0, 12.0, 8.0, 9.0, 9.0, 3.0, 6.0, 4.0, 5.0, 3.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0987548828125, -0.09584331512451172, -0.09293174743652344, -0.09002017974853516, -0.08710861206054688, -0.0841970443725586, -0.08128547668457031, -0.07837390899658203, -0.07546234130859375, -0.07255077362060547, -0.06963920593261719, -0.0667276382446289, -0.06381607055664062, -0.060904502868652344, -0.05799293518066406, -0.05508136749267578, -0.0521697998046875, -0.04925823211669922, -0.04634666442871094, -0.043435096740722656, -0.040523529052734375, -0.037611961364746094, -0.03470039367675781, -0.03178882598876953, -0.02887725830078125, -0.02596569061279297, -0.023054122924804688, -0.020142555236816406, -0.017230987548828125, -0.014319419860839844, -0.011407852172851562, -0.008496284484863281, -0.005584716796875, -0.0026731491088867188, 0.0002384185791015625, 0.0031499862670898438, 0.006061553955078125, 0.008973121643066406, 0.011884689331054688, 0.014796257019042969, 0.01770782470703125, 0.02061939239501953, 0.023530960083007812, 0.026442527770996094, 0.029354095458984375, 0.032265663146972656, 0.03517723083496094, 0.03808879852294922, 0.0410003662109375, 0.04391193389892578, 0.04682350158691406, 0.049735069274902344, 0.052646636962890625, 0.055558204650878906, 0.05846977233886719, 0.06138134002685547, 0.06429290771484375, 0.06720447540283203, 0.07011604309082031, 0.0730276107788086, 0.07593917846679688, 0.07885074615478516, 0.08176231384277344, 0.08467388153076172, 0.08758544921875]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 10.0, 63.0, 441.0, 420.0, 68.0, 11.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0019389829831197858, -0.0016962802037596703, -0.0014535775408148766, -0.0012108748778700829, -0.0009681720985099673, -0.0007254694355651736, -0.0004827666562050581, -0.00024006387684494257, 2.6387860998511314e-06, 0.00024534150725230575, 0.00048804422840476036, 0.000730746949557215, 0.0009734496707096696, 0.0012161523336544633, 0.0014588551130145788, 0.0017015578923746943, 0.001944260555319488, 0.0021869633346796036, 0.0024296659976243973, 0.002672368660569191, 0.0029150713235139847, 0.0031577739864587784, 0.0034004768822342157, 0.003643179778009653, 0.003885882440954447, 0.0041285851038992405, 0.004371287766844034, 0.004613990429788828, 0.004856693558394909, 0.005099396221339703, 0.005342098884284496, 0.00558480154722929, 0.005827504210174084, 0.006070206873118877, 0.006312909536063671, 0.006555612199008465, 0.0067983148619532585, 0.007041017524898052, 0.007283720653504133, 0.007526423316448927, 0.007769125979393721, 0.008011829107999802, 0.008254531770944595, 0.008497234433889389, 0.008739937096834183, 0.008982639759778976, 0.00922534242272377, 0.009468045085668564, 0.009710747748613358, 0.009953450411558151, 0.010196153074502945, 0.010438855737447739, 0.010681558400392532, 0.010924261063337326, 0.01116696372628212, 0.011409666389226913, 0.011652369052171707, 0.0118950717151165, 0.012137774378061295, 0.012380477041006088, 0.012623179703950882, 0.012865882366895676, 0.01310858502984047, 0.013351287692785263, 0.013593991287052631]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 8.0, 8.0, 2.0, 7.0, 8.0, 13.0, 14.0, 16.0, 11.0, 29.0, 23.0, 28.0, 33.0, 27.0, 33.0, 47.0, 39.0, 43.0, 41.0, 32.0, 40.0, 55.0, 41.0, 54.0, 44.0, 47.0, 34.0, 41.0, 27.0, 24.0, 23.0, 28.0, 7.0, 11.0, 11.0, 17.0, 6.0, 12.0, 10.0, 1.0, 4.0, 6.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00161820650100708, -0.0015616370365023613, -0.0015050675719976425, -0.0014484981074929237, -0.001391928642988205, -0.0013353591784834862, -0.0012787897139787674, -0.0012222202494740486, -0.0011656507849693298, -0.001109081320464611, -0.0010525118559598923, -0.0009959423914551735, -0.0009393729269504547, -0.0008828034624457359, -0.0008262339979410172, -0.0007696645334362984, -0.0007130950689315796, -0.0006565256044268608, -0.000599956139922142, -0.0005433866754174232, -0.00048681721091270447, -0.0004302477464079857, -0.0003736782819032669, -0.0003171088173985481, -0.00026053935289382935, -0.00020396988838911057, -0.00014740042388439178, -9.0830959379673e-05, -3.4261494874954224e-05, 2.2307969629764557e-05, 7.887743413448334e-05, 0.00013544689863920212, 0.0001920163631439209, 0.0002485858276486397, 0.00030515529215335846, 0.00036172475665807724, 0.000418294221162796, 0.0004748636856675148, 0.0005314331501722336, 0.0005880026146769524, 0.0006445720791816711, 0.0007011415436863899, 0.0007577110081911087, 0.0008142804726958275, 0.0008708499372005463, 0.000927419401705265, 0.0009839888662099838, 0.0010405583307147026, 0.0010971277952194214, 0.0011536972597241402, 0.001210266724228859, 0.0012668361887335777, 0.0013234056532382965, 0.0013799751177430153, 0.001436544582247734, 0.0014931140467524529, 0.0015496835112571716, 0.0016062529757618904, 0.0016628224402666092, 0.001719391904771328, 0.0017759613692760468, 0.0018325308337807655, 0.0018891002982854843, 0.001945669762790203, 0.002002239227294922]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 5.0, 7.0, 4.0, 7.0, 7.0, 14.0, 16.0, 17.0, 7.0, 19.0, 21.0, 22.0, 22.0, 28.0, 27.0, 39.0, 27.0, 34.0, 33.0, 40.0, 47.0, 42.0, 45.0, 49.0, 43.0, 31.0, 36.0, 40.0, 30.0, 37.0, 34.0, 25.0, 17.0, 21.0, 19.0, 10.0, 12.0, 12.0, 12.0, 13.0, 9.0, 8.0, 3.0, 3.0, 6.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-12.71875, -12.3233642578125, -11.927978515625, -11.5325927734375, -11.13720703125, -10.7418212890625, -10.346435546875, -9.9510498046875, -9.5556640625, -9.1602783203125, -8.764892578125, -8.3695068359375, -7.97412109375, -7.5787353515625, -7.183349609375, -6.7879638671875, -6.392578125, -5.9971923828125, -5.601806640625, -5.2064208984375, -4.81103515625, -4.4156494140625, -4.020263671875, -3.6248779296875, -3.2294921875, -2.8341064453125, -2.438720703125, -2.0433349609375, -1.64794921875, -1.2525634765625, -0.857177734375, -0.4617919921875, -0.06640625, 0.3289794921875, 0.724365234375, 1.1197509765625, 1.51513671875, 1.9105224609375, 2.305908203125, 2.7012939453125, 3.0966796875, 3.4920654296875, 3.887451171875, 4.2828369140625, 4.67822265625, 5.0736083984375, 5.468994140625, 5.8643798828125, 6.259765625, 6.6551513671875, 7.050537109375, 7.4459228515625, 7.84130859375, 8.2366943359375, 8.632080078125, 9.0274658203125, 9.4228515625, 9.8182373046875, 10.213623046875, 10.6090087890625, 11.00439453125, 11.3997802734375, 11.795166015625, 12.1905517578125, 12.5859375]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 5.0, 2.0, 6.0, 6.0, 5.0, 10.0, 14.0, 20.0, 24.0, 32.0, 30.0, 50.0, 74.0, 94.0, 120.0, 187.0, 270.0, 466.0, 900.0, 1942.0, 5159.0, 14265.0, 41359.0, 131818.0, 494734.0, 250386.0, 69305.0, 22974.0, 8185.0, 3137.0, 1276.0, 614.0, 325.0, 214.0, 151.0, 96.0, 74.0, 59.0, 45.0, 26.0, 18.0, 22.0, 17.0, 12.0, 8.0, 9.0, 3.0, 10.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-13.984375, -13.55712890625, -13.1298828125, -12.70263671875, -12.275390625, -11.84814453125, -11.4208984375, -10.99365234375, -10.56640625, -10.13916015625, -9.7119140625, -9.28466796875, -8.857421875, -8.43017578125, -8.0029296875, -7.57568359375, -7.1484375, -6.72119140625, -6.2939453125, -5.86669921875, -5.439453125, -5.01220703125, -4.5849609375, -4.15771484375, -3.73046875, -3.30322265625, -2.8759765625, -2.44873046875, -2.021484375, -1.59423828125, -1.1669921875, -0.73974609375, -0.3125, 0.11474609375, 0.5419921875, 0.96923828125, 1.396484375, 1.82373046875, 2.2509765625, 2.67822265625, 3.10546875, 3.53271484375, 3.9599609375, 4.38720703125, 4.814453125, 5.24169921875, 5.6689453125, 6.09619140625, 6.5234375, 6.95068359375, 7.3779296875, 7.80517578125, 8.232421875, 8.65966796875, 9.0869140625, 9.51416015625, 9.94140625, 10.36865234375, 10.7958984375, 11.22314453125, 11.650390625, 12.07763671875, 12.5048828125, 12.93212890625, 13.359375]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 7.0, 6.0, 14.0, 9.0, 11.0, 12.0, 19.0, 19.0, 27.0, 24.0, 30.0, 24.0, 36.0, 37.0, 29.0, 48.0, 39.0, 86.0, 202.0, 1608.0, 230.0, 91.0, 55.0, 41.0, 34.0, 30.0, 39.0, 32.0, 25.0, 31.0, 19.0, 15.0, 13.0, 18.0, 11.0, 13.0, 11.0, 4.0, 11.0, 7.0, 5.0, 4.0, 2.0, 2.0, 3.0, 5.0, 1.0, 0.0, 0.0, 3.0, 1.0], "bins": [-38.40625, -37.22509765625, -36.0439453125, -34.86279296875, -33.681640625, -32.50048828125, -31.3193359375, -30.13818359375, -28.95703125, -27.77587890625, -26.5947265625, -25.41357421875, -24.232421875, -23.05126953125, -21.8701171875, -20.68896484375, -19.5078125, -18.32666015625, -17.1455078125, -15.96435546875, -14.783203125, -13.60205078125, -12.4208984375, -11.23974609375, -10.05859375, -8.87744140625, -7.6962890625, -6.51513671875, -5.333984375, -4.15283203125, -2.9716796875, -1.79052734375, -0.609375, 0.57177734375, 1.7529296875, 2.93408203125, 4.115234375, 5.29638671875, 6.4775390625, 7.65869140625, 8.83984375, 10.02099609375, 11.2021484375, 12.38330078125, 13.564453125, 14.74560546875, 15.9267578125, 17.10791015625, 18.2890625, 19.47021484375, 20.6513671875, 21.83251953125, 23.013671875, 24.19482421875, 25.3759765625, 26.55712890625, 27.73828125, 28.91943359375, 30.1005859375, 31.28173828125, 32.462890625, 33.64404296875, 34.8251953125, 36.00634765625, 37.1875]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 5.0, 6.0, 10.0, 12.0, 13.0, 31.0, 35.0, 47.0, 59.0, 81.0, 138.0, 214.0, 365.0, 839.0, 10266.0, 3123882.0, 7921.0, 790.0, 345.0, 195.0, 149.0, 92.0, 58.0, 47.0, 38.0, 19.0, 17.0, 10.0, 8.0, 5.0, 7.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-146.0, -141.50390625, -137.0078125, -132.51171875, -128.015625, -123.51953125, -119.0234375, -114.52734375, -110.03125, -105.53515625, -101.0390625, -96.54296875, -92.046875, -87.55078125, -83.0546875, -78.55859375, -74.0625, -69.56640625, -65.0703125, -60.57421875, -56.078125, -51.58203125, -47.0859375, -42.58984375, -38.09375, -33.59765625, -29.1015625, -24.60546875, -20.109375, -15.61328125, -11.1171875, -6.62109375, -2.125, 2.37109375, 6.8671875, 11.36328125, 15.859375, 20.35546875, 24.8515625, 29.34765625, 33.84375, 38.33984375, 42.8359375, 47.33203125, 51.828125, 56.32421875, 60.8203125, 65.31640625, 69.8125, 74.30859375, 78.8046875, 83.30078125, 87.796875, 92.29296875, 96.7890625, 101.28515625, 105.78125, 110.27734375, 114.7734375, 119.26953125, 123.765625, 128.26171875, 132.7578125, 137.25390625, 141.75]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 15.0, 136.0, 398.0, 352.0, 103.0, 10.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-157.9329071044922, -153.5359344482422, -149.13894653320312, -144.74197387695312, -140.34500122070312, -135.94802856445312, -131.55104064941406, -127.15406799316406, -122.75708770751953, -118.360107421875, -113.963134765625, -109.56615447998047, -105.16917419433594, -100.77220153808594, -96.3752212524414, -91.97824096679688, -87.58126831054688, -83.18428802490234, -78.78731536865234, -74.39033508300781, -69.99336242675781, -65.59638214111328, -61.19940185546875, -56.802425384521484, -52.40544891357422, -48.00847244262695, -43.61149597167969, -39.214515686035156, -34.81753921508789, -30.420562744140625, -26.023584365844727, -21.626605987548828, -17.229629516601562, -12.83265209197998, -8.435674667358398, -4.038697242736816, 0.3582801818847656, 4.755256652832031, 9.15223503112793, 13.549213409423828, 17.946189880371094, 22.34316635131836, 26.740144729614258, 31.137123107910156, 35.53409957885742, 39.93107604980469, 44.32805633544922, 48.725032806396484, 53.12200927734375, 57.518985748291016, 61.91596221923828, 66.31294250488281, 70.70991516113281, 75.10689544677734, 79.50387573242188, 83.90084838867188, 88.2978286743164, 92.69480895996094, 97.09178161621094, 101.48876190185547, 105.8857421875, 110.28271484375, 114.67969512939453, 119.07667541503906, 123.47364807128906]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 0.0, 3.0, 7.0, 2.0, 5.0, 5.0, 8.0, 11.0, 9.0, 17.0, 15.0, 17.0, 19.0, 14.0, 33.0, 23.0, 25.0, 28.0, 34.0, 37.0, 47.0, 36.0, 35.0, 42.0, 39.0, 36.0, 39.0, 32.0, 34.0, 33.0, 35.0, 33.0, 32.0, 32.0, 26.0, 26.0, 19.0, 18.0, 12.0, 11.0, 13.0, 11.0, 6.0, 10.0, 10.0, 6.0, 7.0, 2.0, 1.0, 4.0, 2.0, 3.0, 4.0, 3.0], "bins": [-102.71715545654297, -99.72340393066406, -96.72965240478516, -93.73590087890625, -90.74214935302734, -87.74839782714844, -84.754638671875, -81.76089477539062, -78.76713562011719, -75.77338409423828, -72.77963256835938, -69.78588104248047, -66.79212951660156, -63.798377990722656, -60.804622650146484, -57.81087112426758, -54.81712341308594, -51.82337188720703, -48.829620361328125, -45.83586883544922, -42.84211730957031, -39.848365783691406, -36.854610443115234, -33.86085891723633, -30.867107391357422, -27.873355865478516, -24.87960433959961, -21.88585090637207, -18.892099380493164, -15.898347854614258, -12.904594421386719, -9.910842895507812, -6.917083740234375, -3.9233317375183105, -0.9295797348022461, 2.0641727447509766, 5.057924270629883, 8.051675796508789, 11.045429229736328, 14.039180755615234, 17.03293228149414, 20.026683807373047, 23.020435333251953, 26.014188766479492, 29.0079402923584, 32.00169372558594, 34.995445251464844, 37.98919677734375, 40.982948303222656, 43.97669982910156, 46.97045135498047, 49.964202880859375, 52.95795440673828, 55.95170593261719, 58.94546127319336, 61.939212799072266, 64.93296813964844, 67.92671966552734, 70.92047119140625, 73.91422271728516, 76.90797424316406, 79.90172576904297, 82.89547729492188, 85.88923645019531, 88.88298034667969]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 4.0, 7.0, 6.0, 8.0, 11.0, 13.0, 14.0, 19.0, 19.0, 14.0, 23.0, 33.0, 30.0, 26.0, 30.0, 44.0, 38.0, 28.0, 39.0, 41.0, 39.0, 56.0, 50.0, 39.0, 44.0, 42.0, 45.0, 26.0, 31.0, 35.0, 21.0, 21.0, 10.0, 17.0, 13.0, 13.0, 12.0, 11.0, 10.0, 8.0, 5.0, 2.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.4453125, -14.005126953125, -13.56494140625, -13.124755859375, -12.6845703125, -12.244384765625, -11.80419921875, -11.364013671875, -10.923828125, -10.483642578125, -10.04345703125, -9.603271484375, -9.1630859375, -8.722900390625, -8.28271484375, -7.842529296875, -7.40234375, -6.962158203125, -6.52197265625, -6.081787109375, -5.6416015625, -5.201416015625, -4.76123046875, -4.321044921875, -3.880859375, -3.440673828125, -3.00048828125, -2.560302734375, -2.1201171875, -1.679931640625, -1.23974609375, -0.799560546875, -0.359375, 0.080810546875, 0.52099609375, 0.961181640625, 1.4013671875, 1.841552734375, 2.28173828125, 2.721923828125, 3.162109375, 3.602294921875, 4.04248046875, 4.482666015625, 4.9228515625, 5.363037109375, 5.80322265625, 6.243408203125, 6.68359375, 7.123779296875, 7.56396484375, 8.004150390625, 8.4443359375, 8.884521484375, 9.32470703125, 9.764892578125, 10.205078125, 10.645263671875, 11.08544921875, 11.525634765625, 11.9658203125, 12.406005859375, 12.84619140625, 13.286376953125, 13.7265625]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 5.0, 1.0, 5.0, 8.0, 12.0, 8.0, 10.0, 17.0, 14.0, 14.0, 22.0, 34.0, 48.0, 51.0, 60.0, 94.0, 136.0, 206.0, 384.0, 965.0, 5142.0, 92173.0, 2966208.0, 1101889.0, 23001.0, 2279.0, 599.0, 287.0, 157.0, 111.0, 87.0, 59.0, 42.0, 32.0, 26.0, 22.0, 20.0, 18.0, 6.0, 10.0, 8.0, 3.0, 8.0, 6.0, 1.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-50.6875, -49.23095703125, -47.7744140625, -46.31787109375, -44.861328125, -43.40478515625, -41.9482421875, -40.49169921875, -39.03515625, -37.57861328125, -36.1220703125, -34.66552734375, -33.208984375, -31.75244140625, -30.2958984375, -28.83935546875, -27.3828125, -25.92626953125, -24.4697265625, -23.01318359375, -21.556640625, -20.10009765625, -18.6435546875, -17.18701171875, -15.73046875, -14.27392578125, -12.8173828125, -11.36083984375, -9.904296875, -8.44775390625, -6.9912109375, -5.53466796875, -4.078125, -2.62158203125, -1.1650390625, 0.29150390625, 1.748046875, 3.20458984375, 4.6611328125, 6.11767578125, 7.57421875, 9.03076171875, 10.4873046875, 11.94384765625, 13.400390625, 14.85693359375, 16.3134765625, 17.77001953125, 19.2265625, 20.68310546875, 22.1396484375, 23.59619140625, 25.052734375, 26.50927734375, 27.9658203125, 29.42236328125, 30.87890625, 32.33544921875, 33.7919921875, 35.24853515625, 36.705078125, 38.16162109375, 39.6181640625, 41.07470703125, 42.53125]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 5.0, 4.0, 4.0, 9.0, 12.0, 29.0, 40.0, 55.0, 111.0, 140.0, 278.0, 482.0, 774.0, 876.0, 521.0, 301.0, 179.0, 117.0, 49.0, 33.0, 26.0, 12.0, 10.0, 5.0, 4.0, 2.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.46875, -26.1669921875, -24.865234375, -23.5634765625, -22.26171875, -20.9599609375, -19.658203125, -18.3564453125, -17.0546875, -15.7529296875, -14.451171875, -13.1494140625, -11.84765625, -10.5458984375, -9.244140625, -7.9423828125, -6.640625, -5.3388671875, -4.037109375, -2.7353515625, -1.43359375, -0.1318359375, 1.169921875, 2.4716796875, 3.7734375, 5.0751953125, 6.376953125, 7.6787109375, 8.98046875, 10.2822265625, 11.583984375, 12.8857421875, 14.1875, 15.4892578125, 16.791015625, 18.0927734375, 19.39453125, 20.6962890625, 21.998046875, 23.2998046875, 24.6015625, 25.9033203125, 27.205078125, 28.5068359375, 29.80859375, 31.1103515625, 32.412109375, 33.7138671875, 35.015625, 36.3173828125, 37.619140625, 38.9208984375, 40.22265625, 41.5244140625, 42.826171875, 44.1279296875, 45.4296875, 46.7314453125, 48.033203125, 49.3349609375, 50.63671875, 51.9384765625, 53.240234375, 54.5419921875, 55.84375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 2.0, 7.0, 14.0, 21.0, 37.0, 58.0, 94.0, 132.0, 247.0, 354.0, 670.0, 10504.0, 4177181.0, 3444.0, 590.0, 302.0, 230.0, 165.0, 98.0, 57.0, 23.0, 31.0, 10.0, 5.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-315.5, -308.060546875, -300.62109375, -293.181640625, -285.7421875, -278.302734375, -270.86328125, -263.423828125, -255.984375, -248.544921875, -241.10546875, -233.666015625, -226.2265625, -218.787109375, -211.34765625, -203.908203125, -196.46875, -189.029296875, -181.58984375, -174.150390625, -166.7109375, -159.271484375, -151.83203125, -144.392578125, -136.953125, -129.513671875, -122.07421875, -114.634765625, -107.1953125, -99.755859375, -92.31640625, -84.876953125, -77.4375, -69.998046875, -62.55859375, -55.119140625, -47.6796875, -40.240234375, -32.80078125, -25.361328125, -17.921875, -10.482421875, -3.04296875, 4.396484375, 11.8359375, 19.275390625, 26.71484375, 34.154296875, 41.59375, 49.033203125, 56.47265625, 63.912109375, 71.3515625, 78.791015625, 86.23046875, 93.669921875, 101.109375, 108.548828125, 115.98828125, 123.427734375, 130.8671875, 138.306640625, 145.74609375, 153.185546875, 160.625]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 10.0, 24.0, 34.0, 46.0, 94.0, 127.0, 138.0, 152.0, 120.0, 101.0, 72.0, 45.0, 17.0, 14.0, 8.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.55813598632812, -122.10962677001953, -118.6611099243164, -115.21260070800781, -111.76408386230469, -108.3155746459961, -104.8670654296875, -101.41854858398438, -97.97003173828125, -94.52152252197266, -91.07300567626953, -87.62449645996094, -84.17597961425781, -80.72747039794922, -77.27896118164062, -73.8304443359375, -70.3819351196289, -66.93342590332031, -63.48490905761719, -60.036399841308594, -56.58788299560547, -53.139373779296875, -49.690860748291016, -46.242347717285156, -42.7938346862793, -39.34532165527344, -35.89680862426758, -32.44829559326172, -28.999784469604492, -25.551271438598633, -22.102760314941406, -18.654247283935547, -15.205726623535156, -11.757213592529297, -8.308701515197754, -4.860189437866211, -1.4116764068603516, 2.036836624145508, 5.485347747802734, 8.933860778808594, 12.382373809814453, 15.830886840820312, 19.279399871826172, 22.7279109954834, 26.176424026489258, 29.624937057495117, 33.073448181152344, 36.5219612121582, 39.97047424316406, 43.41898727416992, 46.86750030517578, 50.316009521484375, 53.7645263671875, 57.213035583496094, 60.66154861450195, 64.11006164550781, 67.55857849121094, 71.00708770751953, 74.45560455322266, 77.90411376953125, 81.35263061523438, 84.80113983154297, 88.24964904785156, 91.69816589355469, 95.14667510986328]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 2.0, 4.0, 4.0, 8.0, 13.0, 6.0, 11.0, 14.0, 18.0, 25.0, 21.0, 18.0, 29.0, 27.0, 34.0, 36.0, 43.0, 36.0, 37.0, 41.0, 42.0, 56.0, 41.0, 39.0, 42.0, 34.0, 34.0, 46.0, 29.0, 32.0, 19.0, 28.0, 28.0, 20.0, 16.0, 14.0, 15.0, 8.0, 9.0, 7.0, 6.0, 8.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-73.1434326171875, -70.69056701660156, -68.23770141601562, -65.78483581542969, -63.33197021484375, -60.87910461425781, -58.426239013671875, -55.97337341308594, -53.5205078125, -51.06764221191406, -48.614776611328125, -46.16191101074219, -43.70904541015625, -41.25617980957031, -38.803314208984375, -36.35044860839844, -33.897579193115234, -31.444713592529297, -28.99184799194336, -26.538982391357422, -24.086116790771484, -21.633251190185547, -19.180383682250977, -16.72751808166504, -14.274652481079102, -11.821786880493164, -9.368921279907227, -6.916054725646973, -4.463189125061035, -2.0103235244750977, 0.44254302978515625, 2.8954086303710938, 5.348274230957031, 7.801139831542969, 10.254005432128906, 12.70687198638916, 15.159737586975098, 17.61260223388672, 20.06546974182129, 22.518335342407227, 24.971200942993164, 27.4240665435791, 29.87693214416504, 32.32979965209961, 34.78266525268555, 37.235530853271484, 39.68839645385742, 42.14126205444336, 44.5941276550293, 47.046993255615234, 49.49985885620117, 51.95272445678711, 54.40559005737305, 56.858455657958984, 59.31132507324219, 61.764190673828125, 64.21705627441406, 66.669921875, 69.12278747558594, 71.57565307617188, 74.02851867675781, 76.48138427734375, 78.93424987792969, 81.38711547851562, 83.83998107910156]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 5.0, 7.0, 9.0, 5.0, 14.0, 18.0, 13.0, 18.0, 12.0, 26.0, 24.0, 35.0, 34.0, 37.0, 37.0, 38.0, 41.0, 40.0, 57.0, 33.0, 46.0, 47.0, 45.0, 43.0, 38.0, 36.0, 28.0, 30.0, 29.0, 26.0, 25.0, 20.0, 15.0, 11.0, 14.0, 14.0, 2.0, 5.0, 8.0, 7.0, 9.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.046875, -13.5921630859375, -13.137451171875, -12.6827392578125, -12.22802734375, -11.7733154296875, -11.318603515625, -10.8638916015625, -10.4091796875, -9.9544677734375, -9.499755859375, -9.0450439453125, -8.59033203125, -8.1356201171875, -7.680908203125, -7.2261962890625, -6.771484375, -6.3167724609375, -5.862060546875, -5.4073486328125, -4.95263671875, -4.4979248046875, -4.043212890625, -3.5885009765625, -3.1337890625, -2.6790771484375, -2.224365234375, -1.7696533203125, -1.31494140625, -0.8602294921875, -0.405517578125, 0.0491943359375, 0.50390625, 0.9586181640625, 1.413330078125, 1.8680419921875, 2.32275390625, 2.7774658203125, 3.232177734375, 3.6868896484375, 4.1416015625, 4.5963134765625, 5.051025390625, 5.5057373046875, 5.96044921875, 6.4151611328125, 6.869873046875, 7.3245849609375, 7.779296875, 8.2340087890625, 8.688720703125, 9.1434326171875, 9.59814453125, 10.0528564453125, 10.507568359375, 10.9622802734375, 11.4169921875, 11.8717041015625, 12.326416015625, 12.7811279296875, 13.23583984375, 13.6905517578125, 14.145263671875, 14.5999755859375, 15.0546875]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 4.0, 8.0, 9.0, 16.0, 13.0, 29.0, 34.0, 65.0, 102.0, 144.0, 200.0, 280.0, 386.0, 602.0, 768.0, 1146.0, 1614.0, 2339.0, 3410.0, 5082.0, 7212.0, 10347.0, 15100.0, 22265.0, 32702.0, 49073.0, 72423.0, 107703.0, 160847.0, 175581.0, 123090.0, 82638.0, 55587.0, 37407.0, 25311.0, 17077.0, 11569.0, 8120.0, 5563.0, 3816.0, 2676.0, 1819.0, 1365.0, 976.0, 577.0, 484.0, 285.0, 210.0, 168.0, 106.0, 71.0, 47.0, 34.0, 25.0, 20.0, 11.0, 5.0, 3.0, 2.0, 3.0, 2.0], "bins": [-1.2568359375, -1.2177581787109375, -1.178680419921875, -1.1396026611328125, -1.10052490234375, -1.0614471435546875, -1.022369384765625, -0.9832916259765625, -0.9442138671875, -0.9051361083984375, -0.866058349609375, -0.8269805908203125, -0.78790283203125, -0.7488250732421875, -0.709747314453125, -0.6706695556640625, -0.631591796875, -0.5925140380859375, -0.553436279296875, -0.5143585205078125, -0.47528076171875, -0.4362030029296875, -0.397125244140625, -0.3580474853515625, -0.3189697265625, -0.2798919677734375, -0.240814208984375, -0.2017364501953125, -0.16265869140625, -0.1235809326171875, -0.084503173828125, -0.0454254150390625, -0.00634765625, 0.0327301025390625, 0.071807861328125, 0.1108856201171875, 0.14996337890625, 0.1890411376953125, 0.228118896484375, 0.2671966552734375, 0.3062744140625, 0.3453521728515625, 0.384429931640625, 0.4235076904296875, 0.46258544921875, 0.5016632080078125, 0.540740966796875, 0.5798187255859375, 0.618896484375, 0.6579742431640625, 0.697052001953125, 0.7361297607421875, 0.77520751953125, 0.8142852783203125, 0.853363037109375, 0.8924407958984375, 0.9315185546875, 0.9705963134765625, 1.009674072265625, 1.0487518310546875, 1.08782958984375, 1.1269073486328125, 1.165985107421875, 1.2050628662109375, 1.244140625]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 5.0, 6.0, 6.0, 8.0, 8.0, 6.0, 12.0, 16.0, 20.0, 14.0, 24.0, 23.0, 28.0, 33.0, 25.0, 34.0, 41.0, 53.0, 33.0, 50.0, 33.0, 1065.0, 60.0, 43.0, 29.0, 31.0, 35.0, 28.0, 34.0, 26.0, 27.0, 20.0, 23.0, 28.0, 16.0, 9.0, 15.0, 12.0, 11.0, 12.0, 7.0, 7.0, 8.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.0703125, -8.79248046875, -8.5146484375, -8.23681640625, -7.958984375, -7.68115234375, -7.4033203125, -7.12548828125, -6.84765625, -6.56982421875, -6.2919921875, -6.01416015625, -5.736328125, -5.45849609375, -5.1806640625, -4.90283203125, -4.625, -4.34716796875, -4.0693359375, -3.79150390625, -3.513671875, -3.23583984375, -2.9580078125, -2.68017578125, -2.40234375, -2.12451171875, -1.8466796875, -1.56884765625, -1.291015625, -1.01318359375, -0.7353515625, -0.45751953125, -0.1796875, 0.09814453125, 0.3759765625, 0.65380859375, 0.931640625, 1.20947265625, 1.4873046875, 1.76513671875, 2.04296875, 2.32080078125, 2.5986328125, 2.87646484375, 3.154296875, 3.43212890625, 3.7099609375, 3.98779296875, 4.265625, 4.54345703125, 4.8212890625, 5.09912109375, 5.376953125, 5.65478515625, 5.9326171875, 6.21044921875, 6.48828125, 6.76611328125, 7.0439453125, 7.32177734375, 7.599609375, 7.87744140625, 8.1552734375, 8.43310546875, 8.7109375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 4.0, 2.0, 12.0, 12.0, 23.0, 31.0, 36.0, 54.0, 95.0, 124.0, 229.0, 296.0, 487.0, 675.0, 1060.0, 1642.0, 2415.0, 3572.0, 5348.0, 8152.0, 12737.0, 19654.0, 30416.0, 47592.0, 72686.0, 114818.0, 176645.0, 1238749.0, 127625.0, 81772.0, 53147.0, 33959.0, 21779.0, 14309.0, 9365.0, 6004.0, 3873.0, 2607.0, 1719.0, 1096.0, 740.0, 521.0, 312.0, 234.0, 190.0, 114.0, 73.0, 49.0, 31.0, 17.0, 19.0, 10.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.2998046875, -1.25665283203125, -1.2135009765625, -1.17034912109375, -1.127197265625, -1.08404541015625, -1.0408935546875, -0.99774169921875, -0.95458984375, -0.91143798828125, -0.8682861328125, -0.82513427734375, -0.781982421875, -0.73883056640625, -0.6956787109375, -0.65252685546875, -0.609375, -0.56622314453125, -0.5230712890625, -0.47991943359375, -0.436767578125, -0.39361572265625, -0.3504638671875, -0.30731201171875, -0.26416015625, -0.22100830078125, -0.1778564453125, -0.13470458984375, -0.091552734375, -0.04840087890625, -0.0052490234375, 0.03790283203125, 0.0810546875, 0.12420654296875, 0.1673583984375, 0.21051025390625, 0.253662109375, 0.29681396484375, 0.3399658203125, 0.38311767578125, 0.42626953125, 0.46942138671875, 0.5125732421875, 0.55572509765625, 0.598876953125, 0.64202880859375, 0.6851806640625, 0.72833251953125, 0.771484375, 0.81463623046875, 0.8577880859375, 0.90093994140625, 0.944091796875, 0.98724365234375, 1.0303955078125, 1.07354736328125, 1.11669921875, 1.15985107421875, 1.2030029296875, 1.24615478515625, 1.289306640625, 1.33245849609375, 1.3756103515625, 1.41876220703125, 1.4619140625]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 6.0, 5.0, 12.0, 8.0, 9.0, 8.0, 17.0, 22.0, 16.0, 32.0, 38.0, 54.0, 44.0, 76.0, 71.0, 83.0, 82.0, 79.0, 63.0, 62.0, 45.0, 32.0, 37.0, 27.0, 15.0, 17.0, 6.0, 13.0, 4.0, 5.0, 1.0, 3.0, 6.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00377655029296875, -0.00364530086517334, -0.0035140514373779297, -0.0033828020095825195, -0.0032515525817871094, -0.0031203031539916992, -0.002989053726196289, -0.002857804298400879, -0.0027265548706054688, -0.0025953054428100586, -0.0024640560150146484, -0.0023328065872192383, -0.002201557159423828, -0.002070307731628418, -0.0019390583038330078, -0.0018078088760375977, -0.0016765594482421875, -0.0015453100204467773, -0.0014140605926513672, -0.001282811164855957, -0.0011515617370605469, -0.0010203123092651367, -0.0008890628814697266, -0.0007578134536743164, -0.0006265640258789062, -0.0004953145980834961, -0.00036406517028808594, -0.00023281574249267578, -0.00010156631469726562, 2.968311309814453e-05, 0.0001609325408935547, 0.00029218196868896484, 0.000423431396484375, 0.0005546808242797852, 0.0006859302520751953, 0.0008171796798706055, 0.0009484291076660156, 0.0010796785354614258, 0.001210927963256836, 0.001342177391052246, 0.0014734268188476562, 0.0016046762466430664, 0.0017359256744384766, 0.0018671751022338867, 0.001998424530029297, 0.002129673957824707, 0.002260923385620117, 0.0023921728134155273, 0.0025234222412109375, 0.0026546716690063477, 0.002785921096801758, 0.002917170524597168, 0.003048419952392578, 0.0031796693801879883, 0.0033109188079833984, 0.0034421682357788086, 0.0035734176635742188, 0.003704667091369629, 0.003835916519165039, 0.003967165946960449, 0.004098415374755859, 0.0042296648025512695, 0.00436091423034668, 0.00449216365814209, 0.0046234130859375]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 5.0, 2.0, 3.0, 7.0, 6.0, 12.0, 11.0, 24.0, 27.0, 36.0, 46.0, 56.0, 89.0, 108.0, 262.0, 644.0, 59958.0, 985485.0, 989.0, 302.0, 136.0, 78.0, 67.0, 52.0, 32.0, 25.0, 18.0, 16.0, 14.0, 13.0, 5.0, 11.0, 5.0, 4.0, 3.0, 0.0, 4.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.109130859375, -0.10602474212646484, -0.10291862487792969, -0.09981250762939453, -0.09670639038085938, -0.09360027313232422, -0.09049415588378906, -0.0873880386352539, -0.08428192138671875, -0.0811758041381836, -0.07806968688964844, -0.07496356964111328, -0.07185745239257812, -0.06875133514404297, -0.06564521789550781, -0.06253910064697266, -0.0594329833984375, -0.056326866149902344, -0.05322074890136719, -0.05011463165283203, -0.047008514404296875, -0.04390239715576172, -0.04079627990722656, -0.037690162658691406, -0.03458404541015625, -0.031477928161621094, -0.028371810913085938, -0.02526569366455078, -0.022159576416015625, -0.01905345916748047, -0.015947341918945312, -0.012841224670410156, -0.009735107421875, -0.006628990173339844, -0.0035228729248046875, -0.00041675567626953125, 0.002689361572265625, 0.005795478820800781, 0.008901596069335938, 0.012007713317871094, 0.01511383056640625, 0.018219947814941406, 0.021326065063476562, 0.02443218231201172, 0.027538299560546875, 0.03064441680908203, 0.03375053405761719, 0.036856651306152344, 0.0399627685546875, 0.043068885803222656, 0.04617500305175781, 0.04928112030029297, 0.052387237548828125, 0.05549335479736328, 0.05859947204589844, 0.061705589294433594, 0.06481170654296875, 0.0679178237915039, 0.07102394104003906, 0.07413005828857422, 0.07723617553710938, 0.08034229278564453, 0.08344841003417969, 0.08655452728271484, 0.08966064453125]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 9.0, 39.0, 106.0, 268.0, 334.0, 167.0, 65.0, 14.0, 6.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0014605483738705516, -0.0013296190882101655, -0.0011986899189651012, -0.0010677606333047152, -0.0009368313476443291, -0.000805902061983943, -0.0006749728345312178, -0.0005440436070784926, -0.00041311432141810656, -0.00028218506486155093, -0.0001512558083049953, -2.032655174843967e-05, 0.00011060270480811596, 0.00024153199046850204, 0.0003724612179212272, 0.0005033904453739524, 0.0006343197310343385, 0.0007652490166947246, 0.0008961782441474497, 0.001027107471600175, 0.001158036757260561, 0.001288966042920947, 0.0014198953285813332, 0.0015508244978263974, 0.0016817537834867835, 0.0018126830691471696, 0.0019436122383922338, 0.00207454152405262, 0.002205470809713006, 0.002336400095373392, 0.002467329381033778, 0.0025982586666941643, 0.002729188185185194, 0.00286011747084558, 0.002991046756505966, 0.0031219760421663523, 0.0032529053278267384, 0.003383834380656481, 0.003514763666316867, 0.003645692951977253, 0.003776622237637639, 0.003907551523298025, 0.0040384805761277676, 0.004169410094618797, 0.00430033914744854, 0.0044312686659395695, 0.004562197718769312, 0.004693127237260342, 0.004824056290090084, 0.0049549853429198265, 0.005085914861410856, 0.005216843914240599, 0.005347773432731628, 0.005478702485561371, 0.005609632004052401, 0.005740561056882143, 0.005871490575373173, 0.006002419628202915, 0.006133349146693945, 0.006264278199523687, 0.006395207718014717, 0.0065261367708444595, 0.006657066289335489, 0.006787995342165232, 0.006918924394994974]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 4.0, 3.0, 7.0, 16.0, 6.0, 5.0, 17.0, 26.0, 21.0, 13.0, 27.0, 32.0, 25.0, 26.0, 16.0, 27.0, 38.0, 20.0, 37.0, 49.0, 43.0, 24.0, 38.0, 29.0, 43.0, 29.0, 43.0, 20.0, 32.0, 34.0, 25.0, 25.0, 23.0, 23.0, 12.0, 20.0, 21.0, 18.0, 14.0, 15.0, 5.0, 12.0, 7.0, 7.0, 6.0, 7.0, 6.0, 2.0, 3.0, 0.0, 1.0, 4.0, 2.0], "bins": [-0.001606285572052002, -0.0015572886914014816, -0.0015082918107509613, -0.001459294930100441, -0.0014102980494499207, -0.0013613011687994003, -0.00131230428814888, -0.0012633074074983597, -0.0012143105268478394, -0.001165313646197319, -0.0011163167655467987, -0.0010673198848962784, -0.001018323004245758, -0.0009693261235952377, -0.0009203292429447174, -0.0008713323622941971, -0.0008223354816436768, -0.0007733386009931564, -0.0007243417203426361, -0.0006753448396921158, -0.0006263479590415955, -0.0005773510783910751, -0.0005283541977405548, -0.0004793573170900345, -0.00043036043643951416, -0.00038136355578899384, -0.0003323666751384735, -0.0002833697944879532, -0.00023437291383743286, -0.00018537603318691254, -0.0001363791525363922, -8.738227188587189e-05, -3.838539123535156e-05, 1.0611489415168762e-05, 5.960837006568909e-05, 0.00010860525071620941, 0.00015760213136672974, 0.00020659901201725006, 0.0002555958926677704, 0.0003045927733182907, 0.00035358965396881104, 0.00040258653461933136, 0.0004515834152698517, 0.000500580295920372, 0.0005495771765708923, 0.0005985740572214127, 0.000647570937871933, 0.0006965678185224533, 0.0007455646991729736, 0.000794561579823494, 0.0008435584604740143, 0.0008925553411245346, 0.0009415522217750549, 0.0009905491024255753, 0.0010395459830760956, 0.001088542863726616, 0.0011375397443771362, 0.0011865366250276566, 0.0012355335056781769, 0.0012845303863286972, 0.0013335272669792175, 0.0013825241476297379, 0.0014315210282802582, 0.0014805179089307785, 0.0015295147895812988]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 5.0, 7.0, 9.0, 5.0, 14.0, 18.0, 13.0, 18.0, 12.0, 26.0, 24.0, 35.0, 34.0, 37.0, 37.0, 38.0, 41.0, 40.0, 57.0, 33.0, 46.0, 47.0, 45.0, 43.0, 38.0, 36.0, 28.0, 30.0, 29.0, 27.0, 24.0, 20.0, 15.0, 11.0, 14.0, 14.0, 2.0, 5.0, 8.0, 7.0, 9.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.046875, -13.5921630859375, -13.137451171875, -12.6827392578125, -12.22802734375, -11.7733154296875, -11.318603515625, -10.8638916015625, -10.4091796875, -9.9544677734375, -9.499755859375, -9.0450439453125, -8.59033203125, -8.1356201171875, -7.680908203125, -7.2261962890625, -6.771484375, -6.3167724609375, -5.862060546875, -5.4073486328125, -4.95263671875, -4.4979248046875, -4.043212890625, -3.5885009765625, -3.1337890625, -2.6790771484375, -2.224365234375, -1.7696533203125, -1.31494140625, -0.8602294921875, -0.405517578125, 0.0491943359375, 0.50390625, 0.9586181640625, 1.413330078125, 1.8680419921875, 2.32275390625, 2.7774658203125, 3.232177734375, 3.6868896484375, 4.1416015625, 4.5963134765625, 5.051025390625, 5.5057373046875, 5.96044921875, 6.4151611328125, 6.869873046875, 7.3245849609375, 7.779296875, 8.2340087890625, 8.688720703125, 9.1434326171875, 9.59814453125, 10.0528564453125, 10.507568359375, 10.9622802734375, 11.4169921875, 11.8717041015625, 12.326416015625, 12.7811279296875, 13.23583984375, 13.6905517578125, 14.145263671875, 14.5999755859375, 15.0546875]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 3.0, 8.0, 13.0, 8.0, 13.0, 27.0, 29.0, 37.0, 32.0, 59.0, 76.0, 123.0, 154.0, 222.0, 374.0, 634.0, 1259.0, 4733.0, 35766.0, 632701.0, 342190.0, 23804.0, 3553.0, 1108.0, 528.0, 355.0, 210.0, 130.0, 105.0, 69.0, 56.0, 35.0, 34.0, 34.0, 16.0, 13.0, 6.0, 9.0, 9.0, 7.0, 8.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.375, -25.521728515625, -24.66845703125, -23.815185546875, -22.9619140625, -22.108642578125, -21.25537109375, -20.402099609375, -19.548828125, -18.695556640625, -17.84228515625, -16.989013671875, -16.1357421875, -15.282470703125, -14.42919921875, -13.575927734375, -12.72265625, -11.869384765625, -11.01611328125, -10.162841796875, -9.3095703125, -8.456298828125, -7.60302734375, -6.749755859375, -5.896484375, -5.043212890625, -4.18994140625, -3.336669921875, -2.4833984375, -1.630126953125, -0.77685546875, 0.076416015625, 0.9296875, 1.782958984375, 2.63623046875, 3.489501953125, 4.3427734375, 5.196044921875, 6.04931640625, 6.902587890625, 7.755859375, 8.609130859375, 9.46240234375, 10.315673828125, 11.1689453125, 12.022216796875, 12.87548828125, 13.728759765625, 14.58203125, 15.435302734375, 16.28857421875, 17.141845703125, 17.9951171875, 18.848388671875, 19.70166015625, 20.554931640625, 21.408203125, 22.261474609375, 23.11474609375, 23.968017578125, 24.8212890625, 25.674560546875, 26.52783203125, 27.381103515625, 28.234375]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 5.0, 2.0, 2.0, 4.0, 6.0, 6.0, 13.0, 10.0, 9.0, 19.0, 14.0, 23.0, 23.0, 26.0, 35.0, 35.0, 42.0, 42.0, 47.0, 69.0, 88.0, 408.0, 1606.0, 95.0, 45.0, 45.0, 36.0, 41.0, 37.0, 38.0, 21.0, 26.0, 21.0, 23.0, 18.0, 12.0, 17.0, 18.0, 4.0, 9.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-50.15625, -48.63818359375, -47.1201171875, -45.60205078125, -44.083984375, -42.56591796875, -41.0478515625, -39.52978515625, -38.01171875, -36.49365234375, -34.9755859375, -33.45751953125, -31.939453125, -30.42138671875, -28.9033203125, -27.38525390625, -25.8671875, -24.34912109375, -22.8310546875, -21.31298828125, -19.794921875, -18.27685546875, -16.7587890625, -15.24072265625, -13.72265625, -12.20458984375, -10.6865234375, -9.16845703125, -7.650390625, -6.13232421875, -4.6142578125, -3.09619140625, -1.578125, -0.06005859375, 1.4580078125, 2.97607421875, 4.494140625, 6.01220703125, 7.5302734375, 9.04833984375, 10.56640625, 12.08447265625, 13.6025390625, 15.12060546875, 16.638671875, 18.15673828125, 19.6748046875, 21.19287109375, 22.7109375, 24.22900390625, 25.7470703125, 27.26513671875, 28.783203125, 30.30126953125, 31.8193359375, 33.33740234375, 34.85546875, 36.37353515625, 37.8916015625, 39.40966796875, 40.927734375, 42.44580078125, 43.9638671875, 45.48193359375, 47.0]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 4.0, 1.0, 3.0, 7.0, 7.0, 16.0, 13.0, 19.0, 18.0, 26.0, 21.0, 26.0, 28.0, 47.0, 58.0, 76.0, 102.0, 223.0, 401.0, 1219.0, 2732779.0, 408549.0, 1036.0, 342.0, 210.0, 123.0, 68.0, 64.0, 38.0, 38.0, 26.0, 28.0, 13.0, 21.0, 7.0, 10.0, 12.0, 9.0, 4.0, 4.0, 2.0, 0.0, 6.0, 4.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-141.625, -137.048828125, -132.47265625, -127.896484375, -123.3203125, -118.744140625, -114.16796875, -109.591796875, -105.015625, -100.439453125, -95.86328125, -91.287109375, -86.7109375, -82.134765625, -77.55859375, -72.982421875, -68.40625, -63.830078125, -59.25390625, -54.677734375, -50.1015625, -45.525390625, -40.94921875, -36.373046875, -31.796875, -27.220703125, -22.64453125, -18.068359375, -13.4921875, -8.916015625, -4.33984375, 0.236328125, 4.8125, 9.388671875, 13.96484375, 18.541015625, 23.1171875, 27.693359375, 32.26953125, 36.845703125, 41.421875, 45.998046875, 50.57421875, 55.150390625, 59.7265625, 64.302734375, 68.87890625, 73.455078125, 78.03125, 82.607421875, 87.18359375, 91.759765625, 96.3359375, 100.912109375, 105.48828125, 110.064453125, 114.640625, 119.216796875, 123.79296875, 128.369140625, 132.9453125, 137.521484375, 142.09765625, 146.673828125, 151.25]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 18.0, 77.0, 181.0, 285.0, 249.0, 141.0, 43.0, 13.0, 6.0], "bins": [-121.4266586303711, -119.35983276367188, -117.29300689697266, -115.22618103027344, -113.15935516357422, -111.092529296875, -109.02570343017578, -106.95887756347656, -104.89205932617188, -102.82523345947266, -100.75840759277344, -98.69158172607422, -96.624755859375, -94.55792999267578, -92.49110412597656, -90.42428588867188, -88.35745239257812, -86.2906265258789, -84.22380065917969, -82.15697479248047, -80.09014892578125, -78.02332305908203, -75.95649719238281, -73.88967895507812, -71.82284545898438, -69.75601959228516, -67.68919372558594, -65.62236785888672, -63.5555419921875, -61.48871612548828, -59.42189407348633, -57.35506820678711, -55.288246154785156, -53.22142028808594, -51.15459442138672, -49.0877685546875, -47.02094268798828, -44.95411682128906, -42.88729476928711, -40.82046890258789, -38.75364303588867, -36.68681716918945, -34.619991302490234, -32.553165435791016, -30.48634147644043, -28.41951560974121, -26.352691650390625, -24.285865783691406, -22.219039916992188, -20.15221405029297, -18.08538818359375, -16.018564224243164, -13.951738357543945, -11.884912490844727, -9.818087577819824, -7.751262664794922, -5.6844377517700195, -3.617612361907959, -1.5507869720458984, 0.5160384178161621, 2.5828638076782227, 4.649689674377441, 6.716514587402344, 8.783339500427246, 10.850165367126465]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 3.0, 7.0, 10.0, 10.0, 8.0, 15.0, 15.0, 14.0, 23.0, 14.0, 21.0, 23.0, 34.0, 36.0, 42.0, 37.0, 32.0, 42.0, 41.0, 46.0, 46.0, 35.0, 32.0, 39.0, 34.0, 24.0, 37.0, 36.0, 16.0, 24.0, 24.0, 35.0, 18.0, 15.0, 17.0, 18.0, 16.0, 20.0, 9.0, 9.0, 6.0, 2.0, 5.0, 3.0, 2.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-119.99846649169922, -115.9561767578125, -111.91388702392578, -107.87159729003906, -103.82930755615234, -99.78701782226562, -95.74473571777344, -91.70243835449219, -87.66015625, -83.61786651611328, -79.57557678222656, -75.53328704833984, -71.49099731445312, -67.4487075805664, -63.40642166137695, -59.364131927490234, -55.32183837890625, -51.27954864501953, -47.23725891113281, -43.194969177246094, -39.152679443359375, -35.110389709472656, -31.068103790283203, -27.025814056396484, -22.983524322509766, -18.941234588623047, -14.898945808410645, -10.856657028198242, -6.814367294311523, -2.7720775604248047, 1.2702102661132812, 5.3125, 9.35479736328125, 13.397087097167969, 17.439376831054688, 21.481664657592773, 25.523954391479492, 29.56624412536621, 33.6085319519043, 37.650821685791016, 41.693111419677734, 45.73540115356445, 49.77769088745117, 53.819976806640625, 57.862266540527344, 61.90455627441406, 65.94684600830078, 69.9891357421875, 74.03142547607422, 78.07371520996094, 82.11600494384766, 86.15829467773438, 90.2005844116211, 94.24287414550781, 98.28515625, 102.32745361328125, 106.36973571777344, 110.41202545166016, 114.45431518554688, 118.4966049194336, 122.53889465332031, 126.58118438720703, 130.62347412109375, 134.66575622558594, 138.7080535888672]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 6.0, 1.0, 7.0, 5.0, 17.0, 14.0, 12.0, 17.0, 21.0, 18.0, 21.0, 24.0, 34.0, 35.0, 28.0, 40.0, 44.0, 40.0, 51.0, 40.0, 35.0, 51.0, 45.0, 42.0, 40.0, 30.0, 36.0, 32.0, 32.0, 26.0, 16.0, 26.0, 22.0, 16.0, 14.0, 15.0, 10.0, 10.0, 6.0, 4.0, 5.0, 8.0, 9.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.3046875, -13.8402099609375, -13.375732421875, -12.9112548828125, -12.44677734375, -11.9822998046875, -11.517822265625, -11.0533447265625, -10.5888671875, -10.1243896484375, -9.659912109375, -9.1954345703125, -8.73095703125, -8.2664794921875, -7.802001953125, -7.3375244140625, -6.873046875, -6.4085693359375, -5.944091796875, -5.4796142578125, -5.01513671875, -4.5506591796875, -4.086181640625, -3.6217041015625, -3.1572265625, -2.6927490234375, -2.228271484375, -1.7637939453125, -1.29931640625, -0.8348388671875, -0.370361328125, 0.0941162109375, 0.55859375, 1.0230712890625, 1.487548828125, 1.9520263671875, 2.41650390625, 2.8809814453125, 3.345458984375, 3.8099365234375, 4.2744140625, 4.7388916015625, 5.203369140625, 5.6678466796875, 6.13232421875, 6.5968017578125, 7.061279296875, 7.5257568359375, 7.990234375, 8.4547119140625, 8.919189453125, 9.3836669921875, 9.84814453125, 10.3126220703125, 10.777099609375, 11.2415771484375, 11.7060546875, 12.1705322265625, 12.635009765625, 13.0994873046875, 13.56396484375, 14.0284423828125, 14.492919921875, 14.9573974609375, 15.421875]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 5.0, 3.0, 3.0, 8.0, 11.0, 14.0, 20.0, 23.0, 28.0, 46.0, 58.0, 81.0, 107.0, 127.0, 197.0, 347.0, 574.0, 1025.0, 2098.0, 4867.0, 13543.0, 47498.0, 202285.0, 766236.0, 1537291.0, 1134144.0, 365005.0, 84302.0, 21332.0, 7020.0, 2787.0, 1292.0, 709.0, 380.0, 224.0, 164.0, 97.0, 98.0, 66.0, 45.0, 34.0, 24.0, 17.0, 14.0, 11.0, 10.0, 6.0, 7.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-16.109375, -15.584716796875, -15.06005859375, -14.535400390625, -14.0107421875, -13.486083984375, -12.96142578125, -12.436767578125, -11.912109375, -11.387451171875, -10.86279296875, -10.338134765625, -9.8134765625, -9.288818359375, -8.76416015625, -8.239501953125, -7.71484375, -7.190185546875, -6.66552734375, -6.140869140625, -5.6162109375, -5.091552734375, -4.56689453125, -4.042236328125, -3.517578125, -2.992919921875, -2.46826171875, -1.943603515625, -1.4189453125, -0.894287109375, -0.36962890625, 0.155029296875, 0.6796875, 1.204345703125, 1.72900390625, 2.253662109375, 2.7783203125, 3.302978515625, 3.82763671875, 4.352294921875, 4.876953125, 5.401611328125, 5.92626953125, 6.450927734375, 6.9755859375, 7.500244140625, 8.02490234375, 8.549560546875, 9.07421875, 9.598876953125, 10.12353515625, 10.648193359375, 11.1728515625, 11.697509765625, 12.22216796875, 12.746826171875, 13.271484375, 13.796142578125, 14.32080078125, 14.845458984375, 15.3701171875, 15.894775390625, 16.41943359375, 16.944091796875, 17.46875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 6.0, 11.0, 18.0, 21.0, 34.0, 47.0, 89.0, 116.0, 163.0, 266.0, 428.0, 525.0, 652.0, 556.0, 389.0, 253.0, 182.0, 113.0, 78.0, 52.0, 37.0, 18.0, 10.0, 5.0, 2.0, 3.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-49.8125, -48.641357421875, -47.47021484375, -46.299072265625, -45.1279296875, -43.956787109375, -42.78564453125, -41.614501953125, -40.443359375, -39.272216796875, -38.10107421875, -36.929931640625, -35.7587890625, -34.587646484375, -33.41650390625, -32.245361328125, -31.07421875, -29.903076171875, -28.73193359375, -27.560791015625, -26.3896484375, -25.218505859375, -24.04736328125, -22.876220703125, -21.705078125, -20.533935546875, -19.36279296875, -18.191650390625, -17.0205078125, -15.849365234375, -14.67822265625, -13.507080078125, -12.3359375, -11.164794921875, -9.99365234375, -8.822509765625, -7.6513671875, -6.480224609375, -5.30908203125, -4.137939453125, -2.966796875, -1.795654296875, -0.62451171875, 0.546630859375, 1.7177734375, 2.888916015625, 4.06005859375, 5.231201171875, 6.40234375, 7.573486328125, 8.74462890625, 9.915771484375, 11.0869140625, 12.258056640625, 13.42919921875, 14.600341796875, 15.771484375, 16.942626953125, 18.11376953125, 19.284912109375, 20.4560546875, 21.627197265625, 22.79833984375, 23.969482421875, 25.140625]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 5.0, 5.0, 14.0, 8.0, 13.0, 28.0, 47.0, 50.0, 85.0, 134.0, 175.0, 203.0, 317.0, 530.0, 1369.0, 49474.0, 4127564.0, 11573.0, 1091.0, 503.0, 316.0, 210.0, 159.0, 117.0, 104.0, 48.0, 47.0, 31.0, 22.0, 23.0, 10.0, 10.0, 6.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-115.0, -110.470703125, -105.94140625, -101.412109375, -96.8828125, -92.353515625, -87.82421875, -83.294921875, -78.765625, -74.236328125, -69.70703125, -65.177734375, -60.6484375, -56.119140625, -51.58984375, -47.060546875, -42.53125, -38.001953125, -33.47265625, -28.943359375, -24.4140625, -19.884765625, -15.35546875, -10.826171875, -6.296875, -1.767578125, 2.76171875, 7.291015625, 11.8203125, 16.349609375, 20.87890625, 25.408203125, 29.9375, 34.466796875, 38.99609375, 43.525390625, 48.0546875, 52.583984375, 57.11328125, 61.642578125, 66.171875, 70.701171875, 75.23046875, 79.759765625, 84.2890625, 88.818359375, 93.34765625, 97.876953125, 102.40625, 106.935546875, 111.46484375, 115.994140625, 120.5234375, 125.052734375, 129.58203125, 134.111328125, 138.640625, 143.169921875, 147.69921875, 152.228515625, 156.7578125, 161.287109375, 165.81640625, 170.345703125, 174.875]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 5.0, 35.0, 155.0, 385.0, 318.0, 102.0, 13.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-511.6800231933594, -502.3577575683594, -493.0354919433594, -483.7132263183594, -474.3909606933594, -465.0686950683594, -455.7464294433594, -446.42413330078125, -437.10186767578125, -427.77960205078125, -418.45733642578125, -409.13507080078125, -399.81280517578125, -390.49053955078125, -381.16827392578125, -371.84600830078125, -362.52374267578125, -353.20147705078125, -343.87921142578125, -334.55694580078125, -325.23468017578125, -315.91241455078125, -306.59014892578125, -297.26788330078125, -287.94561767578125, -278.62335205078125, -269.30108642578125, -259.97882080078125, -250.65655517578125, -241.33428955078125, -232.0120086669922, -222.6897430419922, -213.367431640625, -204.045166015625, -194.722900390625, -185.400634765625, -176.078369140625, -166.756103515625, -157.43382263183594, -148.11155700683594, -138.789306640625, -129.467041015625, -120.144775390625, -110.82250213623047, -101.50023651123047, -92.17797088623047, -82.85569763183594, -73.53343200683594, -64.2111587524414, -54.888893127441406, -45.56662368774414, -36.244354248046875, -26.922088623046875, -17.599822998046875, -8.27755355834961, 1.0447158813476562, 10.366981506347656, 19.68924903869629, 29.011516571044922, 38.33378601074219, 47.65605163574219, 56.97831726074219, 66.30058288574219, 75.62285614013672, 84.94512176513672]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 7.0, 5.0, 11.0, 10.0, 9.0, 17.0, 14.0, 15.0, 22.0, 14.0, 18.0, 18.0, 31.0, 30.0, 31.0, 41.0, 35.0, 39.0, 47.0, 34.0, 43.0, 37.0, 44.0, 42.0, 42.0, 48.0, 33.0, 29.0, 34.0, 43.0, 25.0, 24.0, 26.0, 19.0, 10.0, 13.0, 13.0, 8.0, 8.0, 9.0, 4.0, 2.0, 0.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-95.5745849609375, -92.62934875488281, -89.68411254882812, -86.73887634277344, -83.79363250732422, -80.84839630126953, -77.90316009521484, -74.95792388916016, -72.01268005371094, -69.06744384765625, -66.12220764160156, -63.17696762084961, -60.231727600097656, -57.28649139404297, -54.34125518798828, -51.396018981933594, -48.450782775878906, -45.50554656982422, -42.560306549072266, -39.61507034301758, -36.669830322265625, -33.72459411621094, -30.77935791015625, -27.83411979675293, -24.88888168334961, -21.94364356994629, -18.99840545654297, -16.05316925048828, -13.107931137084961, -10.16269302368164, -7.217456817626953, -4.272218704223633, -1.3269805908203125, 1.6182570457458496, 4.563494682312012, 7.508731842041016, 10.453969955444336, 13.399208068847656, 16.344444274902344, 19.289682388305664, 22.234920501708984, 25.180158615112305, 28.125396728515625, 31.070632934570312, 34.015869140625, 36.96110916137695, 39.90634536743164, 42.851585388183594, 45.79682159423828, 48.74205780029297, 51.68729782104492, 54.63253402709961, 57.57777404785156, 60.52301025390625, 63.46824645996094, 66.41348266601562, 69.35871887207031, 72.303955078125, 75.24919128417969, 78.19442749023438, 81.1396713256836, 84.08490753173828, 87.03014373779297, 89.97537994384766, 92.92062377929688]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 10.0, 8.0, 14.0, 9.0, 12.0, 21.0, 14.0, 21.0, 31.0, 28.0, 32.0, 36.0, 42.0, 42.0, 39.0, 39.0, 40.0, 64.0, 34.0, 47.0, 58.0, 38.0, 41.0, 34.0, 36.0, 29.0, 23.0, 20.0, 20.0, 9.0, 23.0, 14.0, 18.0, 14.0, 11.0, 6.0, 7.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6015625, -13.1275634765625, -12.653564453125, -12.1795654296875, -11.70556640625, -11.2315673828125, -10.757568359375, -10.2835693359375, -9.8095703125, -9.3355712890625, -8.861572265625, -8.3875732421875, -7.91357421875, -7.4395751953125, -6.965576171875, -6.4915771484375, -6.017578125, -5.5435791015625, -5.069580078125, -4.5955810546875, -4.12158203125, -3.6475830078125, -3.173583984375, -2.6995849609375, -2.2255859375, -1.7515869140625, -1.277587890625, -0.8035888671875, -0.32958984375, 0.1444091796875, 0.618408203125, 1.0924072265625, 1.56640625, 2.0404052734375, 2.514404296875, 2.9884033203125, 3.46240234375, 3.9364013671875, 4.410400390625, 4.8843994140625, 5.3583984375, 5.8323974609375, 6.306396484375, 6.7803955078125, 7.25439453125, 7.7283935546875, 8.202392578125, 8.6763916015625, 9.150390625, 9.6243896484375, 10.098388671875, 10.5723876953125, 11.04638671875, 11.5203857421875, 11.994384765625, 12.4683837890625, 12.9423828125, 13.4163818359375, 13.890380859375, 14.3643798828125, 14.83837890625, 15.3123779296875, 15.786376953125, 16.2603759765625, 16.734375]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 8.0, 8.0, 11.0, 24.0, 29.0, 37.0, 65.0, 89.0, 161.0, 241.0, 368.0, 543.0, 831.0, 1295.0, 1920.0, 2972.0, 4525.0, 6953.0, 10709.0, 16182.0, 26049.0, 40733.0, 65245.0, 103418.0, 163862.0, 208581.0, 144366.0, 91295.0, 57318.0, 36019.0, 22635.0, 14910.0, 9433.0, 6117.0, 4009.0, 2669.0, 1724.0, 1097.0, 719.0, 462.0, 307.0, 226.0, 125.0, 86.0, 65.0, 46.0, 33.0, 12.0, 9.0, 7.0, 8.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.513671875, -1.4654083251953125, -1.417144775390625, -1.3688812255859375, -1.32061767578125, -1.2723541259765625, -1.224090576171875, -1.1758270263671875, -1.1275634765625, -1.0792999267578125, -1.031036376953125, -0.9827728271484375, -0.93450927734375, -0.8862457275390625, -0.837982177734375, -0.7897186279296875, -0.741455078125, -0.6931915283203125, -0.644927978515625, -0.5966644287109375, -0.54840087890625, -0.5001373291015625, -0.451873779296875, -0.4036102294921875, -0.3553466796875, -0.3070831298828125, -0.258819580078125, -0.2105560302734375, -0.16229248046875, -0.1140289306640625, -0.065765380859375, -0.0175018310546875, 0.03076171875, 0.0790252685546875, 0.127288818359375, 0.1755523681640625, 0.22381591796875, 0.2720794677734375, 0.320343017578125, 0.3686065673828125, 0.4168701171875, 0.4651336669921875, 0.513397216796875, 0.5616607666015625, 0.60992431640625, 0.6581878662109375, 0.706451416015625, 0.7547149658203125, 0.802978515625, 0.8512420654296875, 0.899505615234375, 0.9477691650390625, 0.99603271484375, 1.0442962646484375, 1.092559814453125, 1.1408233642578125, 1.1890869140625, 1.2373504638671875, 1.285614013671875, 1.3338775634765625, 1.38214111328125, 1.4304046630859375, 1.478668212890625, 1.5269317626953125, 1.5751953125]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 7.0, 7.0, 5.0, 5.0, 6.0, 7.0, 8.0, 12.0, 19.0, 20.0, 20.0, 21.0, 20.0, 26.0, 20.0, 32.0, 17.0, 26.0, 25.0, 29.0, 34.0, 34.0, 30.0, 33.0, 31.0, 1063.0, 38.0, 34.0, 35.0, 37.0, 24.0, 36.0, 25.0, 11.0, 34.0, 22.0, 21.0, 21.0, 19.0, 20.0, 13.0, 10.0, 9.0, 11.0, 7.0, 8.0, 5.0, 10.0, 6.0, 8.0, 6.0, 3.0, 2.0, 1.0, 2.0], "bins": [-8.0390625, -7.80316162109375, -7.5672607421875, -7.33135986328125, -7.095458984375, -6.85955810546875, -6.6236572265625, -6.38775634765625, -6.15185546875, -5.91595458984375, -5.6800537109375, -5.44415283203125, -5.208251953125, -4.97235107421875, -4.7364501953125, -4.50054931640625, -4.2646484375, -4.02874755859375, -3.7928466796875, -3.55694580078125, -3.321044921875, -3.08514404296875, -2.8492431640625, -2.61334228515625, -2.37744140625, -2.14154052734375, -1.9056396484375, -1.66973876953125, -1.433837890625, -1.19793701171875, -0.9620361328125, -0.72613525390625, -0.490234375, -0.25433349609375, -0.0184326171875, 0.21746826171875, 0.453369140625, 0.68927001953125, 0.9251708984375, 1.16107177734375, 1.39697265625, 1.63287353515625, 1.8687744140625, 2.10467529296875, 2.340576171875, 2.57647705078125, 2.8123779296875, 3.04827880859375, 3.2841796875, 3.52008056640625, 3.7559814453125, 3.99188232421875, 4.227783203125, 4.46368408203125, 4.6995849609375, 4.93548583984375, 5.17138671875, 5.40728759765625, 5.6431884765625, 5.87908935546875, 6.114990234375, 6.35089111328125, 6.5867919921875, 6.82269287109375, 7.05859375]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 10.0, 18.0, 18.0, 48.0, 62.0, 93.0, 106.0, 182.0, 292.0, 397.0, 637.0, 956.0, 1290.0, 1893.0, 2801.0, 3996.0, 5724.0, 8515.0, 12503.0, 18458.0, 27521.0, 40009.0, 59729.0, 89267.0, 135297.0, 1234717.0, 149805.0, 99014.0, 66080.0, 43628.0, 30054.0, 20434.0, 13803.0, 9391.0, 6388.0, 4382.0, 3105.0, 2013.0, 1368.0, 997.0, 696.0, 449.0, 314.0, 225.0, 155.0, 108.0, 66.0, 53.0, 25.0, 11.0, 11.0, 4.0, 6.0, 5.0, 0.0, 3.0, 0.0, 2.0], "bins": [-1.2744140625, -1.2341156005859375, -1.193817138671875, -1.1535186767578125, -1.11322021484375, -1.0729217529296875, -1.032623291015625, -0.9923248291015625, -0.9520263671875, -0.9117279052734375, -0.871429443359375, -0.8311309814453125, -0.79083251953125, -0.7505340576171875, -0.710235595703125, -0.6699371337890625, -0.629638671875, -0.5893402099609375, -0.549041748046875, -0.5087432861328125, -0.46844482421875, -0.4281463623046875, -0.387847900390625, -0.3475494384765625, -0.3072509765625, -0.2669525146484375, -0.226654052734375, -0.1863555908203125, -0.14605712890625, -0.1057586669921875, -0.065460205078125, -0.0251617431640625, 0.01513671875, 0.0554351806640625, 0.095733642578125, 0.1360321044921875, 0.17633056640625, 0.2166290283203125, 0.256927490234375, 0.2972259521484375, 0.3375244140625, 0.3778228759765625, 0.418121337890625, 0.4584197998046875, 0.49871826171875, 0.5390167236328125, 0.579315185546875, 0.6196136474609375, 0.659912109375, 0.7002105712890625, 0.740509033203125, 0.7808074951171875, 0.82110595703125, 0.8614044189453125, 0.901702880859375, 0.9420013427734375, 0.9822998046875, 1.0225982666015625, 1.062896728515625, 1.1031951904296875, 1.14349365234375, 1.1837921142578125, 1.224090576171875, 1.2643890380859375, 1.3046875]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 8.0, 4.0, 11.0, 14.0, 11.0, 19.0, 24.0, 53.0, 46.0, 50.0, 62.0, 116.0, 101.0, 105.0, 68.0, 80.0, 62.0, 44.0, 34.0, 25.0, 15.0, 15.0, 11.0, 3.0, 8.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003940582275390625, -0.003789365291595459, -0.003638148307800293, -0.003486931324005127, -0.003335714340209961, -0.003184497356414795, -0.003033280372619629, -0.002882063388824463, -0.002730846405029297, -0.002579629421234131, -0.002428412437438965, -0.002277195453643799, -0.002125978469848633, -0.001974761486053467, -0.0018235445022583008, -0.0016723275184631348, -0.0015211105346679688, -0.0013698935508728027, -0.0012186765670776367, -0.0010674595832824707, -0.0009162425994873047, -0.0007650256156921387, -0.0006138086318969727, -0.00046259164810180664, -0.0003113746643066406, -0.0001601576805114746, -8.940696716308594e-06, 0.00014227628707885742, 0.00029349327087402344, 0.00044471025466918945, 0.0005959272384643555, 0.0007471442222595215, 0.0008983612060546875, 0.0010495781898498535, 0.0012007951736450195, 0.0013520121574401855, 0.0015032291412353516, 0.0016544461250305176, 0.0018056631088256836, 0.0019568800926208496, 0.0021080970764160156, 0.0022593140602111816, 0.0024105310440063477, 0.0025617480278015137, 0.0027129650115966797, 0.0028641819953918457, 0.0030153989791870117, 0.0031666159629821777, 0.0033178329467773438, 0.0034690499305725098, 0.0036202669143676758, 0.003771483898162842, 0.003922700881958008, 0.004073917865753174, 0.00422513484954834, 0.004376351833343506, 0.004527568817138672, 0.004678785800933838, 0.004830002784729004, 0.00498121976852417, 0.005132436752319336, 0.005283653736114502, 0.005434870719909668, 0.005586087703704834, 0.0057373046875]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 1.0, 2.0, 3.0, 5.0, 6.0, 15.0, 12.0, 34.0, 26.0, 58.0, 98.0, 161.0, 319.0, 1652.0, 1043676.0, 1649.0, 384.0, 155.0, 96.0, 59.0, 51.0, 29.0, 15.0, 13.0, 9.0, 7.0, 1.0, 9.0, 6.0, 2.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1435546875, -0.1397256851196289, -0.1358966827392578, -0.13206768035888672, -0.12823867797851562, -0.12440967559814453, -0.12058067321777344, -0.11675167083740234, -0.11292266845703125, -0.10909366607666016, -0.10526466369628906, -0.10143566131591797, -0.09760665893554688, -0.09377765655517578, -0.08994865417480469, -0.0861196517944336, -0.0822906494140625, -0.0784616470336914, -0.07463264465332031, -0.07080364227294922, -0.06697463989257812, -0.06314563751220703, -0.05931663513183594, -0.055487632751464844, -0.05165863037109375, -0.047829627990722656, -0.04400062561035156, -0.04017162322998047, -0.036342620849609375, -0.03251361846923828, -0.028684616088867188, -0.024855613708496094, -0.021026611328125, -0.017197608947753906, -0.013368606567382812, -0.009539604187011719, -0.005710601806640625, -0.0018815994262695312, 0.0019474029541015625, 0.005776405334472656, 0.00960540771484375, 0.013434410095214844, 0.017263412475585938, 0.02109241485595703, 0.024921417236328125, 0.02875041961669922, 0.03257942199707031, 0.036408424377441406, 0.0402374267578125, 0.044066429138183594, 0.04789543151855469, 0.05172443389892578, 0.055553436279296875, 0.05938243865966797, 0.06321144104003906, 0.06704044342041016, 0.07086944580078125, 0.07469844818115234, 0.07852745056152344, 0.08235645294189453, 0.08618545532226562, 0.09001445770263672, 0.09384346008300781, 0.0976724624633789, 0.10150146484375]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 20.0, 691.0, 300.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002911006798967719, -0.002484816126525402, -0.002058625454083085, -0.0016324346652254462, -0.0012062439927831292, -0.0007800532039254904, -0.00035386253148317337, 7.232814095914364e-05, 0.0004985188134014606, 0.0009247094858437777, 0.0013509001582860947, 0.0017770909471437335, 0.0022032815031707287, 0.0026294724084436893, 0.0030556630808860064, 0.0034818537533283234, 0.003908044658601284, 0.004334235563874245, 0.004760426003485918, 0.005186616908758879, 0.005612807348370552, 0.006038998253643513, 0.006465189158916473, 0.006891379598528147, 0.00731757003813982, 0.007743760943412781, 0.008169951848685741, 0.008596141822636127, 0.009022332727909088, 0.009448523633182049, 0.00987471453845501, 0.010300904512405396, 0.010727095417678356, 0.011153286322951317, 0.011579477228224277, 0.012005667202174664, 0.012431858107447624, 0.012858049012720585, 0.013284239917993546, 0.013710429891943932, 0.014136620797216892, 0.014562811702489853, 0.014989002607762814, 0.0154151925817132, 0.01584138348698616, 0.016267575323581696, 0.01669376529753208, 0.017119955271482468, 0.017546147108078003, 0.01797233708202839, 0.018398528918623924, 0.01882471889257431, 0.019250910729169846, 0.01967710070312023, 0.020103290677070618, 0.020529482513666153, 0.02095567248761654, 0.021381862461566925, 0.02180805429816246, 0.022234244272112846, 0.02266043610870838, 0.023086626082658768, 0.023512816056609154, 0.02393900789320469, 0.024365197867155075]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 7.0, 9.0, 7.0, 7.0, 8.0, 13.0, 13.0, 25.0, 15.0, 13.0, 19.0, 30.0, 25.0, 24.0, 31.0, 41.0, 36.0, 36.0, 37.0, 33.0, 37.0, 39.0, 28.0, 39.0, 36.0, 35.0, 42.0, 29.0, 30.0, 27.0, 31.0, 21.0, 22.0, 18.0, 24.0, 23.0, 12.0, 17.0, 10.0, 11.0, 7.0, 10.0, 2.0, 8.0, 4.0, 2.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.001588284969329834, -0.0015390831977128983, -0.0014898814260959625, -0.0014406796544790268, -0.001391477882862091, -0.0013422761112451553, -0.0012930743396282196, -0.0012438725680112839, -0.0011946707963943481, -0.0011454690247774124, -0.0010962672531604767, -0.001047065481543541, -0.0009978637099266052, -0.0009486619383096695, -0.0008994601666927338, -0.000850258395075798, -0.0008010566234588623, -0.0007518548518419266, -0.0007026530802249908, -0.0006534513086080551, -0.0006042495369911194, -0.0005550477653741837, -0.0005058459937572479, -0.0004566442221403122, -0.00040744245052337646, -0.00035824067890644073, -0.000309038907289505, -0.0002598371356725693, -0.00021063536405563354, -0.00016143359243869781, -0.00011223182082176208, -6.303004920482635e-05, -1.3828277587890625e-05, 3.5373494029045105e-05, 8.457526564598083e-05, 0.00013377703726291656, 0.0001829788088798523, 0.00023218058049678802, 0.00028138235211372375, 0.0003305841237306595, 0.0003797858953475952, 0.00042898766696453094, 0.0004781894385814667, 0.0005273912101984024, 0.0005765929818153381, 0.0006257947534322739, 0.0006749965250492096, 0.0007241982966661453, 0.0007734000682830811, 0.0008226018399000168, 0.0008718036115169525, 0.0009210053831338882, 0.000970207154750824, 0.0010194089263677597, 0.0010686106979846954, 0.0011178124696016312, 0.001167014241218567, 0.0012162160128355026, 0.0012654177844524384, 0.001314619556069374, 0.0013638213276863098, 0.0014130230993032455, 0.0014622248709201813, 0.001511426642537117, 0.0015606284141540527]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 2.0, 4.0, 6.0, 10.0, 8.0, 14.0, 9.0, 12.0, 21.0, 14.0, 21.0, 31.0, 28.0, 32.0, 36.0, 42.0, 42.0, 39.0, 40.0, 39.0, 64.0, 34.0, 47.0, 58.0, 38.0, 41.0, 34.0, 36.0, 29.0, 23.0, 20.0, 20.0, 9.0, 23.0, 14.0, 18.0, 14.0, 11.0, 6.0, 7.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.6015625, -13.1275634765625, -12.653564453125, -12.1795654296875, -11.70556640625, -11.2315673828125, -10.757568359375, -10.2835693359375, -9.8095703125, -9.3355712890625, -8.861572265625, -8.3875732421875, -7.91357421875, -7.4395751953125, -6.965576171875, -6.4915771484375, -6.017578125, -5.5435791015625, -5.069580078125, -4.5955810546875, -4.12158203125, -3.6475830078125, -3.173583984375, -2.6995849609375, -2.2255859375, -1.7515869140625, -1.277587890625, -0.8035888671875, -0.32958984375, 0.1444091796875, 0.618408203125, 1.0924072265625, 1.56640625, 2.0404052734375, 2.514404296875, 2.9884033203125, 3.46240234375, 3.9364013671875, 4.410400390625, 4.8843994140625, 5.3583984375, 5.8323974609375, 6.306396484375, 6.7803955078125, 7.25439453125, 7.7283935546875, 8.202392578125, 8.6763916015625, 9.150390625, 9.6243896484375, 10.098388671875, 10.5723876953125, 11.04638671875, 11.5203857421875, 11.994384765625, 12.4683837890625, 12.9423828125, 13.4163818359375, 13.890380859375, 14.3643798828125, 14.83837890625, 15.3123779296875, 15.786376953125, 16.2603759765625, 16.734375]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 1.0, 2.0, 3.0, 5.0, 12.0, 14.0, 13.0, 15.0, 23.0, 32.0, 34.0, 37.0, 83.0, 150.0, 244.0, 520.0, 1293.0, 3021.0, 7427.0, 18473.0, 48062.0, 140511.0, 419270.0, 273377.0, 84306.0, 30889.0, 12164.0, 4823.0, 1975.0, 855.0, 375.0, 187.0, 110.0, 62.0, 44.0, 36.0, 28.0, 25.0, 17.0, 15.0, 7.0, 10.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.9765625, -11.560302734375, -11.14404296875, -10.727783203125, -10.3115234375, -9.895263671875, -9.47900390625, -9.062744140625, -8.646484375, -8.230224609375, -7.81396484375, -7.397705078125, -6.9814453125, -6.565185546875, -6.14892578125, -5.732666015625, -5.31640625, -4.900146484375, -4.48388671875, -4.067626953125, -3.6513671875, -3.235107421875, -2.81884765625, -2.402587890625, -1.986328125, -1.570068359375, -1.15380859375, -0.737548828125, -0.3212890625, 0.094970703125, 0.51123046875, 0.927490234375, 1.34375, 1.760009765625, 2.17626953125, 2.592529296875, 3.0087890625, 3.425048828125, 3.84130859375, 4.257568359375, 4.673828125, 5.090087890625, 5.50634765625, 5.922607421875, 6.3388671875, 6.755126953125, 7.17138671875, 7.587646484375, 8.00390625, 8.420166015625, 8.83642578125, 9.252685546875, 9.6689453125, 10.085205078125, 10.50146484375, 10.917724609375, 11.333984375, 11.750244140625, 12.16650390625, 12.582763671875, 12.9990234375, 13.415283203125, 13.83154296875, 14.247802734375, 14.6640625]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 3.0, 7.0, 1.0, 6.0, 8.0, 9.0, 7.0, 11.0, 16.0, 11.0, 13.0, 18.0, 17.0, 25.0, 25.0, 26.0, 30.0, 27.0, 32.0, 33.0, 39.0, 58.0, 87.0, 256.0, 1556.0, 208.0, 91.0, 51.0, 42.0, 37.0, 36.0, 31.0, 32.0, 24.0, 28.0, 18.0, 19.0, 18.0, 13.0, 17.0, 11.0, 14.0, 4.0, 8.0, 3.0, 5.0, 6.0, 5.0, 7.0, 2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0], "bins": [-41.09375, -39.828125, -38.5625, -37.296875, -36.03125, -34.765625, -33.5, -32.234375, -30.96875, -29.703125, -28.4375, -27.171875, -25.90625, -24.640625, -23.375, -22.109375, -20.84375, -19.578125, -18.3125, -17.046875, -15.78125, -14.515625, -13.25, -11.984375, -10.71875, -9.453125, -8.1875, -6.921875, -5.65625, -4.390625, -3.125, -1.859375, -0.59375, 0.671875, 1.9375, 3.203125, 4.46875, 5.734375, 7.0, 8.265625, 9.53125, 10.796875, 12.0625, 13.328125, 14.59375, 15.859375, 17.125, 18.390625, 19.65625, 20.921875, 22.1875, 23.453125, 24.71875, 25.984375, 27.25, 28.515625, 29.78125, 31.046875, 32.3125, 33.578125, 34.84375, 36.109375, 37.375, 38.640625, 39.90625]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 6.0, 2.0, 10.0, 8.0, 5.0, 9.0, 12.0, 25.0, 19.0, 19.0, 27.0, 38.0, 39.0, 66.0, 79.0, 110.0, 123.0, 204.0, 272.0, 416.0, 1029.0, 11944.0, 2887215.0, 238631.0, 3445.0, 681.0, 341.0, 218.0, 143.0, 133.0, 105.0, 69.0, 58.0, 43.0, 29.0, 20.0, 26.0, 18.0, 17.0, 10.0, 12.0, 7.0, 4.0, 4.0, 8.0, 8.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0], "bins": [-79.5, -77.2158203125, -74.931640625, -72.6474609375, -70.36328125, -68.0791015625, -65.794921875, -63.5107421875, -61.2265625, -58.9423828125, -56.658203125, -54.3740234375, -52.08984375, -49.8056640625, -47.521484375, -45.2373046875, -42.953125, -40.6689453125, -38.384765625, -36.1005859375, -33.81640625, -31.5322265625, -29.248046875, -26.9638671875, -24.6796875, -22.3955078125, -20.111328125, -17.8271484375, -15.54296875, -13.2587890625, -10.974609375, -8.6904296875, -6.40625, -4.1220703125, -1.837890625, 0.4462890625, 2.73046875, 5.0146484375, 7.298828125, 9.5830078125, 11.8671875, 14.1513671875, 16.435546875, 18.7197265625, 21.00390625, 23.2880859375, 25.572265625, 27.8564453125, 30.140625, 32.4248046875, 34.708984375, 36.9931640625, 39.27734375, 41.5615234375, 43.845703125, 46.1298828125, 48.4140625, 50.6982421875, 52.982421875, 55.2666015625, 57.55078125, 59.8349609375, 62.119140625, 64.4033203125, 66.6875]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [514.0, 504.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.902969360351562, -0.14458084106445312, 17.613807678222656, 35.372196197509766, 53.130584716796875, 70.88897705078125, 88.6473617553711, 106.40574645996094, 124.16413879394531, 141.9225311279297, 159.680908203125, 177.43930053710938, 195.19769287109375, 212.95608520507812, 230.7144775390625, 248.4728546142578, 266.23126220703125, 283.9896545410156, 301.748046875, 319.5064392089844, 337.26483154296875, 355.023193359375, 372.7815856933594, 390.53997802734375, 408.29833984375, 426.0567321777344, 443.81512451171875, 461.5735168457031, 479.3319091796875, 497.09027099609375, 514.8486938476562, 532.6070556640625, 550.365478515625, 568.1238403320312, 585.8822631835938, 603.640625, 621.3990478515625, 639.1574096679688, 656.9158325195312, 674.6741943359375, 692.4326171875, 710.1909790039062, 727.9494018554688, 745.707763671875, 763.4661865234375, 781.2245483398438, 798.9829711914062, 816.7413330078125, 834.4996948242188, 852.258056640625, 870.0164794921875, 887.7748413085938, 905.5332641601562, 923.2916259765625, 941.050048828125, 958.8084106445312, 976.5667724609375, 994.3251342773438, 1012.0835571289062, 1029.8419189453125, 1047.600341796875, 1065.3587646484375, 1083.1170654296875, 1100.87548828125, 1118.6339111328125]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 2.0, 7.0, 2.0, 8.0, 12.0, 12.0, 15.0, 10.0, 17.0, 16.0, 18.0, 28.0, 23.0, 35.0, 29.0, 46.0, 42.0, 38.0, 35.0, 33.0, 44.0, 44.0, 53.0, 50.0, 41.0, 33.0, 39.0, 37.0, 31.0, 28.0, 22.0, 30.0, 20.0, 20.0, 14.0, 13.0, 10.0, 15.0, 4.0, 2.0, 7.0, 8.0, 6.0, 4.0, 1.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0], "bins": [-121.51361083984375, -117.98515319824219, -114.4566879272461, -110.92822265625, -107.39976501464844, -103.87130737304688, -100.34284210205078, -96.81437683105469, -93.28591918945312, -89.75746154785156, -86.22899627685547, -82.70053100585938, -79.17207336425781, -75.64361572265625, -72.11515045166016, -68.58668518066406, -65.0582275390625, -61.52976608276367, -58.001304626464844, -54.472843170166016, -50.94438171386719, -47.41592025756836, -43.88745880126953, -40.3589973449707, -36.830535888671875, -33.30207443237305, -29.77361297607422, -26.24515151977539, -22.716690063476562, -19.188228607177734, -15.659767150878906, -12.131305694580078, -8.60284423828125, -5.074382781982422, -1.5459213256835938, 1.9825401306152344, 5.5110015869140625, 9.03946304321289, 12.567924499511719, 16.096385955810547, 19.624847412109375, 23.153308868408203, 26.68177032470703, 30.21023178100586, 33.73869323730469, 37.267154693603516, 40.795616149902344, 44.32407760620117, 47.8525390625, 51.38100051879883, 54.909461975097656, 58.437923431396484, 61.96638488769531, 65.49484252929688, 69.02330780029297, 72.55177307128906, 76.08023071289062, 79.60868835449219, 83.13715362548828, 86.66561889648438, 90.19407653808594, 93.7225341796875, 97.2509994506836, 100.77946472167969, 104.30792236328125]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 6.0, 3.0, 3.0, 3.0, 4.0, 12.0, 13.0, 11.0, 12.0, 16.0, 21.0, 25.0, 22.0, 35.0, 31.0, 36.0, 40.0, 37.0, 39.0, 45.0, 48.0, 53.0, 47.0, 46.0, 50.0, 41.0, 35.0, 42.0, 42.0, 27.0, 21.0, 25.0, 16.0, 17.0, 17.0, 14.0, 16.0, 15.0, 10.0, 6.0, 3.0, 3.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.578125, -14.07177734375, -13.5654296875, -13.05908203125, -12.552734375, -12.04638671875, -11.5400390625, -11.03369140625, -10.52734375, -10.02099609375, -9.5146484375, -9.00830078125, -8.501953125, -7.99560546875, -7.4892578125, -6.98291015625, -6.4765625, -5.97021484375, -5.4638671875, -4.95751953125, -4.451171875, -3.94482421875, -3.4384765625, -2.93212890625, -2.42578125, -1.91943359375, -1.4130859375, -0.90673828125, -0.400390625, 0.10595703125, 0.6123046875, 1.11865234375, 1.625, 2.13134765625, 2.6376953125, 3.14404296875, 3.650390625, 4.15673828125, 4.6630859375, 5.16943359375, 5.67578125, 6.18212890625, 6.6884765625, 7.19482421875, 7.701171875, 8.20751953125, 8.7138671875, 9.22021484375, 9.7265625, 10.23291015625, 10.7392578125, 11.24560546875, 11.751953125, 12.25830078125, 12.7646484375, 13.27099609375, 13.77734375, 14.28369140625, 14.7900390625, 15.29638671875, 15.802734375, 16.30908203125, 16.8154296875, 17.32177734375, 17.828125]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 0.0, 5.0, 6.0, 5.0, 6.0, 4.0, 10.0, 12.0, 9.0, 14.0, 24.0, 27.0, 22.0, 43.0, 45.0, 57.0, 67.0, 85.0, 113.0, 221.0, 564.0, 4313.0, 153846.0, 3594068.0, 431292.0, 7741.0, 839.0, 265.0, 139.0, 95.0, 71.0, 55.0, 44.0, 21.0, 33.0, 34.0, 15.0, 12.0, 20.0, 13.0, 7.0, 6.0, 6.0, 9.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-46.25, -44.57470703125, -42.8994140625, -41.22412109375, -39.548828125, -37.87353515625, -36.1982421875, -34.52294921875, -32.84765625, -31.17236328125, -29.4970703125, -27.82177734375, -26.146484375, -24.47119140625, -22.7958984375, -21.12060546875, -19.4453125, -17.77001953125, -16.0947265625, -14.41943359375, -12.744140625, -11.06884765625, -9.3935546875, -7.71826171875, -6.04296875, -4.36767578125, -2.6923828125, -1.01708984375, 0.658203125, 2.33349609375, 4.0087890625, 5.68408203125, 7.359375, 9.03466796875, 10.7099609375, 12.38525390625, 14.060546875, 15.73583984375, 17.4111328125, 19.08642578125, 20.76171875, 22.43701171875, 24.1123046875, 25.78759765625, 27.462890625, 29.13818359375, 30.8134765625, 32.48876953125, 34.1640625, 35.83935546875, 37.5146484375, 39.18994140625, 40.865234375, 42.54052734375, 44.2158203125, 45.89111328125, 47.56640625, 49.24169921875, 50.9169921875, 52.59228515625, 54.267578125, 55.94287109375, 57.6181640625, 59.29345703125, 60.96875]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 6.0, 2.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 15.0, 16.0, 29.0, 46.0, 72.0, 82.0, 98.0, 132.0, 203.0, 288.0, 383.0, 512.0, 502.0, 474.0, 325.0, 243.0, 162.0, 121.0, 90.0, 64.0, 56.0, 30.0, 25.0, 13.0, 15.0, 14.0, 7.0, 4.0, 3.0, 3.0, 4.0, 3.0, 3.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.53125, -26.703125, -25.875, -25.046875, -24.21875, -23.390625, -22.5625, -21.734375, -20.90625, -20.078125, -19.25, -18.421875, -17.59375, -16.765625, -15.9375, -15.109375, -14.28125, -13.453125, -12.625, -11.796875, -10.96875, -10.140625, -9.3125, -8.484375, -7.65625, -6.828125, -6.0, -5.171875, -4.34375, -3.515625, -2.6875, -1.859375, -1.03125, -0.203125, 0.625, 1.453125, 2.28125, 3.109375, 3.9375, 4.765625, 5.59375, 6.421875, 7.25, 8.078125, 8.90625, 9.734375, 10.5625, 11.390625, 12.21875, 13.046875, 13.875, 14.703125, 15.53125, 16.359375, 17.1875, 18.015625, 18.84375, 19.671875, 20.5, 21.328125, 22.15625, 22.984375, 23.8125, 24.640625, 25.46875]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 0.0, 4.0, 10.0, 9.0, 16.0, 14.0, 21.0, 23.0, 25.0, 42.0, 65.0, 83.0, 123.0, 146.0, 198.0, 304.0, 582.0, 2134.0, 268496.0, 3912577.0, 7329.0, 770.0, 370.0, 246.0, 180.0, 103.0, 103.0, 81.0, 57.0, 50.0, 17.0, 29.0, 19.0, 11.0, 9.0, 10.0, 3.0, 8.0, 7.0, 4.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-135.0, -130.673828125, -126.34765625, -122.021484375, -117.6953125, -113.369140625, -109.04296875, -104.716796875, -100.390625, -96.064453125, -91.73828125, -87.412109375, -83.0859375, -78.759765625, -74.43359375, -70.107421875, -65.78125, -61.455078125, -57.12890625, -52.802734375, -48.4765625, -44.150390625, -39.82421875, -35.498046875, -31.171875, -26.845703125, -22.51953125, -18.193359375, -13.8671875, -9.541015625, -5.21484375, -0.888671875, 3.4375, 7.763671875, 12.08984375, 16.416015625, 20.7421875, 25.068359375, 29.39453125, 33.720703125, 38.046875, 42.373046875, 46.69921875, 51.025390625, 55.3515625, 59.677734375, 64.00390625, 68.330078125, 72.65625, 76.982421875, 81.30859375, 85.634765625, 89.9609375, 94.287109375, 98.61328125, 102.939453125, 107.265625, 111.591796875, 115.91796875, 120.244140625, 124.5703125, 128.896484375, 133.22265625, 137.548828125, 141.875]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 14.0, 100.0, 296.0, 331.0, 204.0, 53.0, 15.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-224.96385192871094, -217.29205322265625, -209.62026977539062, -201.94847106933594, -194.27667236328125, -186.60487365722656, -178.93307495117188, -171.26129150390625, -163.58949279785156, -155.91769409179688, -148.24591064453125, -140.57411193847656, -132.90231323242188, -125.23051452636719, -117.55872344970703, -109.88693237304688, -102.21513366699219, -94.5433349609375, -86.87154388427734, -79.19975280761719, -71.5279541015625, -63.85615921020508, -56.184364318847656, -48.512569427490234, -40.84077453613281, -33.16897964477539, -25.49718475341797, -17.825389862060547, -10.153594970703125, -2.481800079345703, 5.189994812011719, 12.86178970336914, 20.5335693359375, 28.205364227294922, 35.877159118652344, 43.548954010009766, 51.22074890136719, 58.89254379272461, 66.56433868408203, 74.23612976074219, 81.90792846679688, 89.57972717285156, 97.25151824951172, 104.92330932617188, 112.59510803222656, 120.26690673828125, 127.9386978149414, 135.61048889160156, 143.28228759765625, 150.95408630371094, 158.62588500976562, 166.29766845703125, 173.96946716308594, 181.64126586914062, 189.31304931640625, 196.98484802246094, 204.65664672851562, 212.3284454345703, 220.000244140625, 227.67202758789062, 235.3438262939453, 243.015625, 250.68740844726562, 258.3592224121094, 266.031005859375]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 12.0, 9.0, 10.0, 14.0, 18.0, 17.0, 28.0, 26.0, 27.0, 22.0, 32.0, 49.0, 49.0, 51.0, 48.0, 56.0, 50.0, 41.0, 35.0, 41.0, 46.0, 42.0, 34.0, 37.0, 29.0, 25.0, 21.0, 23.0, 23.0, 15.0, 15.0, 11.0, 14.0, 8.0, 4.0, 4.0, 7.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-85.64910888671875, -82.68594360351562, -79.7227783203125, -76.75960540771484, -73.79644012451172, -70.8332748413086, -67.87010192871094, -64.90693664550781, -61.94377136230469, -58.98060607910156, -56.01743698120117, -53.05426788330078, -50.091102600097656, -47.12793731689453, -44.16476821899414, -41.20159912109375, -38.238433837890625, -35.2752685546875, -32.31209945678711, -29.34893226623535, -26.385765075683594, -23.422597885131836, -20.459430694580078, -17.49626350402832, -14.533096313476562, -11.569929122924805, -8.606761932373047, -5.643594741821289, -2.6804275512695312, 0.28273963928222656, 3.2459068298339844, 6.209074020385742, 9.1722412109375, 12.135408401489258, 15.098575592041016, 18.061742782592773, 21.02490997314453, 23.98807716369629, 26.951244354248047, 29.914411544799805, 32.87757873535156, 35.84074401855469, 38.80391311645508, 41.76708221435547, 44.730247497558594, 47.69341278076172, 50.65658187866211, 53.6197509765625, 56.582916259765625, 59.54608154296875, 62.50925064086914, 65.47241973876953, 68.43558502197266, 71.39875030517578, 74.36192321777344, 77.32508850097656, 80.28825378417969, 83.25141906738281, 86.21458435058594, 89.1777572631836, 92.14092254638672, 95.10408782958984, 98.0672607421875, 101.03042602539062, 103.99359130859375]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 3.0, 5.0, 3.0, 6.0, 8.0, 16.0, 19.0, 16.0, 18.0, 20.0, 12.0, 30.0, 21.0, 38.0, 34.0, 48.0, 25.0, 50.0, 48.0, 32.0, 52.0, 46.0, 49.0, 43.0, 42.0, 51.0, 36.0, 30.0, 27.0, 26.0, 28.0, 19.0, 20.0, 19.0, 11.0, 11.0, 6.0, 12.0, 9.0, 7.0, 5.0, 5.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.640625, -14.14990234375, -13.6591796875, -13.16845703125, -12.677734375, -12.18701171875, -11.6962890625, -11.20556640625, -10.71484375, -10.22412109375, -9.7333984375, -9.24267578125, -8.751953125, -8.26123046875, -7.7705078125, -7.27978515625, -6.7890625, -6.29833984375, -5.8076171875, -5.31689453125, -4.826171875, -4.33544921875, -3.8447265625, -3.35400390625, -2.86328125, -2.37255859375, -1.8818359375, -1.39111328125, -0.900390625, -0.40966796875, 0.0810546875, 0.57177734375, 1.0625, 1.55322265625, 2.0439453125, 2.53466796875, 3.025390625, 3.51611328125, 4.0068359375, 4.49755859375, 4.98828125, 5.47900390625, 5.9697265625, 6.46044921875, 6.951171875, 7.44189453125, 7.9326171875, 8.42333984375, 8.9140625, 9.40478515625, 9.8955078125, 10.38623046875, 10.876953125, 11.36767578125, 11.8583984375, 12.34912109375, 12.83984375, 13.33056640625, 13.8212890625, 14.31201171875, 14.802734375, 15.29345703125, 15.7841796875, 16.27490234375, 16.765625]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 2.0, 4.0, 5.0, 5.0, 9.0, 9.0, 21.0, 15.0, 36.0, 69.0, 116.0, 161.0, 244.0, 365.0, 580.0, 925.0, 1437.0, 2307.0, 3721.0, 5851.0, 9431.0, 14997.0, 24130.0, 39073.0, 64010.0, 107684.0, 180950.0, 224266.0, 144993.0, 86568.0, 51984.0, 31749.0, 19860.0, 12423.0, 7586.0, 4726.0, 3026.0, 1946.0, 1191.0, 746.0, 475.0, 301.0, 194.0, 135.0, 73.0, 64.0, 48.0, 23.0, 11.0, 10.0, 4.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6328125, -1.5789031982421875, -1.524993896484375, -1.4710845947265625, -1.41717529296875, -1.3632659912109375, -1.309356689453125, -1.2554473876953125, -1.2015380859375, -1.1476287841796875, -1.093719482421875, -1.0398101806640625, -0.98590087890625, -0.9319915771484375, -0.878082275390625, -0.8241729736328125, -0.770263671875, -0.7163543701171875, -0.662445068359375, -0.6085357666015625, -0.55462646484375, -0.5007171630859375, -0.446807861328125, -0.3928985595703125, -0.3389892578125, -0.2850799560546875, -0.231170654296875, -0.1772613525390625, -0.12335205078125, -0.0694427490234375, -0.015533447265625, 0.0383758544921875, 0.09228515625, 0.1461944580078125, 0.200103759765625, 0.2540130615234375, 0.30792236328125, 0.3618316650390625, 0.415740966796875, 0.4696502685546875, 0.5235595703125, 0.5774688720703125, 0.631378173828125, 0.6852874755859375, 0.73919677734375, 0.7931060791015625, 0.847015380859375, 0.9009246826171875, 0.954833984375, 1.0087432861328125, 1.062652587890625, 1.1165618896484375, 1.17047119140625, 1.2243804931640625, 1.278289794921875, 1.3321990966796875, 1.3861083984375, 1.4400177001953125, 1.493927001953125, 1.5478363037109375, 1.60174560546875, 1.6556549072265625, 1.709564208984375, 1.7634735107421875, 1.8173828125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 5.0, 5.0, 3.0, 4.0, 7.0, 12.0, 8.0, 12.0, 14.0, 17.0, 23.0, 17.0, 20.0, 35.0, 27.0, 28.0, 32.0, 40.0, 36.0, 28.0, 41.0, 32.0, 1073.0, 39.0, 38.0, 31.0, 39.0, 37.0, 37.0, 34.0, 31.0, 31.0, 23.0, 20.0, 19.0, 18.0, 27.0, 18.0, 13.0, 11.0, 10.0, 10.0, 7.0, 4.0, 4.0, 3.0, 4.0, 5.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0], "bins": [-8.9765625, -8.7012939453125, -8.426025390625, -8.1507568359375, -7.87548828125, -7.6002197265625, -7.324951171875, -7.0496826171875, -6.7744140625, -6.4991455078125, -6.223876953125, -5.9486083984375, -5.67333984375, -5.3980712890625, -5.122802734375, -4.8475341796875, -4.572265625, -4.2969970703125, -4.021728515625, -3.7464599609375, -3.47119140625, -3.1959228515625, -2.920654296875, -2.6453857421875, -2.3701171875, -2.0948486328125, -1.819580078125, -1.5443115234375, -1.26904296875, -0.9937744140625, -0.718505859375, -0.4432373046875, -0.16796875, 0.1072998046875, 0.382568359375, 0.6578369140625, 0.93310546875, 1.2083740234375, 1.483642578125, 1.7589111328125, 2.0341796875, 2.3094482421875, 2.584716796875, 2.8599853515625, 3.13525390625, 3.4105224609375, 3.685791015625, 3.9610595703125, 4.236328125, 4.5115966796875, 4.786865234375, 5.0621337890625, 5.33740234375, 5.6126708984375, 5.887939453125, 6.1632080078125, 6.4384765625, 6.7137451171875, 6.989013671875, 7.2642822265625, 7.53955078125, 7.8148193359375, 8.090087890625, 8.3653564453125, 8.640625]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 8.0, 9.0, 18.0, 19.0, 24.0, 46.0, 74.0, 110.0, 179.0, 253.0, 407.0, 556.0, 829.0, 1345.0, 2043.0, 2969.0, 4746.0, 7159.0, 11378.0, 17388.0, 27025.0, 41798.0, 64877.0, 104149.0, 163074.0, 1248262.0, 145120.0, 90844.0, 57698.0, 37029.0, 24026.0, 15488.0, 10000.0, 6198.0, 4293.0, 2654.0, 1760.0, 1115.0, 727.0, 503.0, 323.0, 205.0, 140.0, 85.0, 58.0, 44.0, 39.0, 16.0, 18.0, 1.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4384765625, -1.392608642578125, -1.34674072265625, -1.300872802734375, -1.2550048828125, -1.209136962890625, -1.16326904296875, -1.117401123046875, -1.071533203125, -1.025665283203125, -0.97979736328125, -0.933929443359375, -0.8880615234375, -0.842193603515625, -0.79632568359375, -0.750457763671875, -0.70458984375, -0.658721923828125, -0.61285400390625, -0.566986083984375, -0.5211181640625, -0.475250244140625, -0.42938232421875, -0.383514404296875, -0.337646484375, -0.291778564453125, -0.24591064453125, -0.200042724609375, -0.1541748046875, -0.108306884765625, -0.06243896484375, -0.016571044921875, 0.029296875, 0.075164794921875, 0.12103271484375, 0.166900634765625, 0.2127685546875, 0.258636474609375, 0.30450439453125, 0.350372314453125, 0.396240234375, 0.442108154296875, 0.48797607421875, 0.533843994140625, 0.5797119140625, 0.625579833984375, 0.67144775390625, 0.717315673828125, 0.76318359375, 0.809051513671875, 0.85491943359375, 0.900787353515625, 0.9466552734375, 0.992523193359375, 1.03839111328125, 1.084259033203125, 1.130126953125, 1.175994873046875, 1.22186279296875, 1.267730712890625, 1.3135986328125, 1.359466552734375, 1.40533447265625, 1.451202392578125, 1.4970703125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 7.0, 5.0, 6.0, 11.0, 11.0, 12.0, 21.0, 25.0, 24.0, 38.0, 54.0, 59.0, 65.0, 75.0, 74.0, 81.0, 89.0, 61.0, 57.0, 48.0, 42.0, 20.0, 25.0, 28.0, 14.0, 15.0, 8.0, 10.0, 5.0, 4.0, 1.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00310516357421875, -0.0029949545860290527, -0.0028847455978393555, -0.002774536609649658, -0.002664327621459961, -0.0025541186332702637, -0.0024439096450805664, -0.002333700656890869, -0.002223491668701172, -0.0021132826805114746, -0.0020030736923217773, -0.00189286470413208, -0.0017826557159423828, -0.0016724467277526855, -0.0015622377395629883, -0.001452028751373291, -0.0013418197631835938, -0.0012316107749938965, -0.0011214017868041992, -0.001011192798614502, -0.0009009838104248047, -0.0007907748222351074, -0.0006805658340454102, -0.0005703568458557129, -0.0004601478576660156, -0.00034993886947631836, -0.0002397298812866211, -0.00012952089309692383, -1.9311904907226562e-05, 9.08970832824707e-05, 0.00020110607147216797, 0.00031131505966186523, 0.0004215240478515625, 0.0005317330360412598, 0.000641942024230957, 0.0007521510124206543, 0.0008623600006103516, 0.0009725689888000488, 0.001082777976989746, 0.0011929869651794434, 0.0013031959533691406, 0.0014134049415588379, 0.0015236139297485352, 0.0016338229179382324, 0.0017440319061279297, 0.001854240894317627, 0.0019644498825073242, 0.0020746588706970215, 0.0021848678588867188, 0.002295076847076416, 0.0024052858352661133, 0.0025154948234558105, 0.002625703811645508, 0.002735912799835205, 0.0028461217880249023, 0.0029563307762145996, 0.003066539764404297, 0.003176748752593994, 0.0032869577407836914, 0.0033971667289733887, 0.003507375717163086, 0.003617584705352783, 0.0037277936935424805, 0.0038380026817321777, 0.003948211669921875]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 4.0, 1.0, 5.0, 9.0, 9.0, 15.0, 17.0, 29.0, 19.0, 36.0, 79.0, 101.0, 173.0, 417.0, 2185.0, 1042543.0, 1967.0, 415.0, 189.0, 98.0, 60.0, 54.0, 33.0, 28.0, 23.0, 13.0, 5.0, 4.0, 6.0, 7.0, 5.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10467529296875, -0.10172557830810547, -0.09877586364746094, -0.0958261489868164, -0.09287643432617188, -0.08992671966552734, -0.08697700500488281, -0.08402729034423828, -0.08107757568359375, -0.07812786102294922, -0.07517814636230469, -0.07222843170166016, -0.06927871704101562, -0.0663290023803711, -0.06337928771972656, -0.06042957305908203, -0.0574798583984375, -0.05453014373779297, -0.05158042907714844, -0.048630714416503906, -0.045680999755859375, -0.042731285095214844, -0.03978157043457031, -0.03683185577392578, -0.03388214111328125, -0.03093242645263672, -0.027982711791992188, -0.025032997131347656, -0.022083282470703125, -0.019133567810058594, -0.016183853149414062, -0.013234138488769531, -0.010284423828125, -0.007334709167480469, -0.0043849945068359375, -0.0014352798461914062, 0.001514434814453125, 0.004464149475097656, 0.0074138641357421875, 0.010363578796386719, 0.01331329345703125, 0.01626300811767578, 0.019212722778320312, 0.022162437438964844, 0.025112152099609375, 0.028061866760253906, 0.031011581420898438, 0.03396129608154297, 0.0369110107421875, 0.03986072540283203, 0.04281044006347656, 0.045760154724121094, 0.048709869384765625, 0.051659584045410156, 0.05460929870605469, 0.05755901336669922, 0.06050872802734375, 0.06345844268798828, 0.06640815734863281, 0.06935787200927734, 0.07230758666992188, 0.0752573013305664, 0.07820701599121094, 0.08115673065185547, 0.0841064453125]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 20.0, 267.0, 618.0, 102.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0018993988633155823, -0.0016395584680140018, -0.0013797181891277432, -0.0011198779102414846, -0.0008600375149399042, -0.0006001971196383238, -0.0003403568407520652, -8.051656186580658e-05, 0.00017932383343577385, 0.00043916417052969337, 0.0006990045076236129, 0.0009588448447175324, 0.001218685181811452, 0.0014785255771130323, 0.001738365855999291, 0.0019982061348855495, 0.00225804653018713, 0.0025178869254887104, 0.002777727320790291, 0.0030375674832612276, 0.003297407878562808, 0.0035572482738643885, 0.0038170884363353252, 0.004076928831636906, 0.004336769226938486, 0.0045966096222400665, 0.004856450017541647, 0.005116290412843227, 0.0053761303424835205, 0.005635971203446388, 0.005895811133086681, 0.006155651528388262, 0.006415490992367268, 0.006675331387668848, 0.0069351717829704285, 0.007195012178272009, 0.007454852573573589, 0.0077146925032138824, 0.00797453336417675, 0.008234373293817043, 0.008494213223457336, 0.00875405315309763, 0.009013894014060497, 0.00927373394370079, 0.009533574804663658, 0.009793414734303951, 0.010053255595266819, 0.010313095524907112, 0.01057293638586998, 0.010832776315510273, 0.01109261717647314, 0.011352457106113434, 0.011612297967076302, 0.011872137896716595, 0.012131978757679462, 0.012391818687319756, 0.012651659548282623, 0.012911499477922916, 0.013171340338885784, 0.013431180268526077, 0.013691021129488945, 0.013950861059129238, 0.014210701920092106, 0.014470541849732399, 0.014730381779372692]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 4.0, 3.0, 4.0, 9.0, 5.0, 5.0, 8.0, 8.0, 9.0, 13.0, 16.0, 13.0, 11.0, 23.0, 24.0, 28.0, 26.0, 30.0, 30.0, 31.0, 42.0, 44.0, 45.0, 33.0, 49.0, 32.0, 37.0, 40.0, 40.0, 33.0, 32.0, 27.0, 29.0, 32.0, 28.0, 26.0, 21.0, 17.0, 18.0, 18.0, 12.0, 8.0, 3.0, 13.0, 10.0, 6.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.001431882381439209, -0.0013854308053851128, -0.0013389792293310165, -0.0012925276532769203, -0.001246076077222824, -0.0011996245011687279, -0.0011531729251146317, -0.0011067213490605354, -0.0010602697730064392, -0.001013818196952343, -0.0009673666208982468, -0.0009209150448441505, -0.0008744634687900543, -0.0008280118927359581, -0.0007815603166818619, -0.0007351087406277657, -0.0006886571645736694, -0.0006422055885195732, -0.000595754012465477, -0.0005493024364113808, -0.0005028508603572845, -0.0004563992843031883, -0.0004099477082490921, -0.0003634961321949959, -0.00031704455614089966, -0.00027059298008680344, -0.00022414140403270721, -0.000177689827978611, -0.00013123825192451477, -8.478667587041855e-05, -3.833509981632233e-05, 8.116476237773895e-06, 5.456805229187012e-05, 0.00010101962834596634, 0.00014747120440006256, 0.00019392278045415878, 0.000240374356508255, 0.0002868259325623512, 0.00033327750861644745, 0.00037972908467054367, 0.0004261806607246399, 0.0004726322367787361, 0.0005190838128328323, 0.0005655353888869286, 0.0006119869649410248, 0.000658438540995121, 0.0007048901170492172, 0.0007513416931033134, 0.0007977932691574097, 0.0008442448452115059, 0.0008906964212656021, 0.0009371479973196983, 0.0009835995733737946, 0.0010300511494278908, 0.001076502725481987, 0.0011229543015360832, 0.0011694058775901794, 0.0012158574536442757, 0.0012623090296983719, 0.0013087606057524681, 0.0013552121818065643, 0.0014016637578606606, 0.0014481153339147568, 0.001494566909968853, 0.0015410184860229492]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 3.0, 5.0, 3.0, 6.0, 8.0, 16.0, 19.0, 16.0, 18.0, 20.0, 12.0, 30.0, 21.0, 38.0, 34.0, 48.0, 25.0, 50.0, 48.0, 32.0, 52.0, 46.0, 49.0, 43.0, 42.0, 51.0, 36.0, 30.0, 27.0, 26.0, 28.0, 19.0, 20.0, 19.0, 11.0, 11.0, 6.0, 12.0, 9.0, 7.0, 5.0, 5.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.640625, -14.14990234375, -13.6591796875, -13.16845703125, -12.677734375, -12.18701171875, -11.6962890625, -11.20556640625, -10.71484375, -10.22412109375, -9.7333984375, -9.24267578125, -8.751953125, -8.26123046875, -7.7705078125, -7.27978515625, -6.7890625, -6.29833984375, -5.8076171875, -5.31689453125, -4.826171875, -4.33544921875, -3.8447265625, -3.35400390625, -2.86328125, -2.37255859375, -1.8818359375, -1.39111328125, -0.900390625, -0.40966796875, 0.0810546875, 0.57177734375, 1.0625, 1.55322265625, 2.0439453125, 2.53466796875, 3.025390625, 3.51611328125, 4.0068359375, 4.49755859375, 4.98828125, 5.47900390625, 5.9697265625, 6.46044921875, 6.951171875, 7.44189453125, 7.9326171875, 8.42333984375, 8.9140625, 9.40478515625, 9.8955078125, 10.38623046875, 10.876953125, 11.36767578125, 11.8583984375, 12.34912109375, 12.83984375, 13.33056640625, 13.8212890625, 14.31201171875, 14.802734375, 15.29345703125, 15.7841796875, 16.27490234375, 16.765625]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 4.0, 4.0, 5.0, 8.0, 15.0, 15.0, 23.0, 20.0, 34.0, 38.0, 37.0, 76.0, 68.0, 159.0, 200.0, 347.0, 653.0, 1910.0, 10639.0, 82681.0, 713854.0, 208948.0, 23166.0, 3507.0, 913.0, 415.0, 243.0, 167.0, 95.0, 83.0, 55.0, 39.0, 33.0, 25.0, 16.0, 13.0, 8.0, 8.0, 13.0, 10.0, 6.0, 3.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.296875, -27.343994140625, -26.39111328125, -25.438232421875, -24.4853515625, -23.532470703125, -22.57958984375, -21.626708984375, -20.673828125, -19.720947265625, -18.76806640625, -17.815185546875, -16.8623046875, -15.909423828125, -14.95654296875, -14.003662109375, -13.05078125, -12.097900390625, -11.14501953125, -10.192138671875, -9.2392578125, -8.286376953125, -7.33349609375, -6.380615234375, -5.427734375, -4.474853515625, -3.52197265625, -2.569091796875, -1.6162109375, -0.663330078125, 0.28955078125, 1.242431640625, 2.1953125, 3.148193359375, 4.10107421875, 5.053955078125, 6.0068359375, 6.959716796875, 7.91259765625, 8.865478515625, 9.818359375, 10.771240234375, 11.72412109375, 12.677001953125, 13.6298828125, 14.582763671875, 15.53564453125, 16.488525390625, 17.44140625, 18.394287109375, 19.34716796875, 20.300048828125, 21.2529296875, 22.205810546875, 23.15869140625, 24.111572265625, 25.064453125, 26.017333984375, 26.97021484375, 27.923095703125, 28.8759765625, 29.828857421875, 30.78173828125, 31.734619140625, 32.6875]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 5.0, 3.0, 4.0, 3.0, 5.0, 11.0, 8.0, 12.0, 14.0, 11.0, 19.0, 20.0, 18.0, 40.0, 36.0, 45.0, 36.0, 58.0, 59.0, 73.0, 140.0, 1484.0, 419.0, 109.0, 58.0, 48.0, 34.0, 40.0, 20.0, 33.0, 30.0, 31.0, 24.0, 18.0, 28.0, 14.0, 9.0, 4.0, 6.0, 7.0, 7.0, 6.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.71875, -47.14599609375, -45.5732421875, -44.00048828125, -42.427734375, -40.85498046875, -39.2822265625, -37.70947265625, -36.13671875, -34.56396484375, -32.9912109375, -31.41845703125, -29.845703125, -28.27294921875, -26.7001953125, -25.12744140625, -23.5546875, -21.98193359375, -20.4091796875, -18.83642578125, -17.263671875, -15.69091796875, -14.1181640625, -12.54541015625, -10.97265625, -9.39990234375, -7.8271484375, -6.25439453125, -4.681640625, -3.10888671875, -1.5361328125, 0.03662109375, 1.609375, 3.18212890625, 4.7548828125, 6.32763671875, 7.900390625, 9.47314453125, 11.0458984375, 12.61865234375, 14.19140625, 15.76416015625, 17.3369140625, 18.90966796875, 20.482421875, 22.05517578125, 23.6279296875, 25.20068359375, 26.7734375, 28.34619140625, 29.9189453125, 31.49169921875, 33.064453125, 34.63720703125, 36.2099609375, 37.78271484375, 39.35546875, 40.92822265625, 42.5009765625, 44.07373046875, 45.646484375, 47.21923828125, 48.7919921875, 50.36474609375, 51.9375]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 7.0, 12.0, 12.0, 8.0, 7.0, 26.0, 26.0, 43.0, 57.0, 53.0, 60.0, 103.0, 131.0, 278.0, 613.0, 3430.0, 1991797.0, 1144329.0, 3171.0, 671.0, 262.0, 170.0, 107.0, 90.0, 62.0, 46.0, 24.0, 22.0, 16.0, 20.0, 6.0, 12.0, 7.0, 7.0, 2.0, 4.0, 7.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-140.25, -136.123046875, -131.99609375, -127.869140625, -123.7421875, -119.615234375, -115.48828125, -111.361328125, -107.234375, -103.107421875, -98.98046875, -94.853515625, -90.7265625, -86.599609375, -82.47265625, -78.345703125, -74.21875, -70.091796875, -65.96484375, -61.837890625, -57.7109375, -53.583984375, -49.45703125, -45.330078125, -41.203125, -37.076171875, -32.94921875, -28.822265625, -24.6953125, -20.568359375, -16.44140625, -12.314453125, -8.1875, -4.060546875, 0.06640625, 4.193359375, 8.3203125, 12.447265625, 16.57421875, 20.701171875, 24.828125, 28.955078125, 33.08203125, 37.208984375, 41.3359375, 45.462890625, 49.58984375, 53.716796875, 57.84375, 61.970703125, 66.09765625, 70.224609375, 74.3515625, 78.478515625, 82.60546875, 86.732421875, 90.859375, 94.986328125, 99.11328125, 103.240234375, 107.3671875, 111.494140625, 115.62109375, 119.748046875, 123.875]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 419.0, 574.0, 19.0], "bins": [-936.6903076171875, -921.5911865234375, -906.4920654296875, -891.3929443359375, -876.2937622070312, -861.1946411132812, -846.0955200195312, -830.9963989257812, -815.8972778320312, -800.7981567382812, -785.6990356445312, -770.599853515625, -755.500732421875, -740.401611328125, -725.302490234375, -710.203369140625, -695.104248046875, -680.005126953125, -664.906005859375, -649.806884765625, -634.7077026367188, -619.6085815429688, -604.5094604492188, -589.4103393554688, -574.3111572265625, -559.2120361328125, -544.1129150390625, -529.0137939453125, -513.9146118164062, -498.81549072265625, -483.71636962890625, -468.61724853515625, -453.51812744140625, -438.41900634765625, -423.3198547363281, -408.2207336425781, -393.1216125488281, -378.0224609375, -362.92333984375, -347.82421875, -332.7250671386719, -317.6259460449219, -302.52679443359375, -287.42767333984375, -272.32855224609375, -257.22943115234375, -242.13027954101562, -227.03115844726562, -211.93203735351562, -196.83290100097656, -181.73377990722656, -166.6346435546875, -151.5355224609375, -136.43638610839844, -121.33724975585938, -106.23812103271484, -91.13898468017578, -76.03985595703125, -60.94072341918945, -45.841590881347656, -30.742462158203125, -15.643333435058594, -0.5441970825195312, 14.554931640625, 29.6540584564209]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 5.0, 3.0, 6.0, 11.0, 10.0, 5.0, 9.0, 14.0, 25.0, 19.0, 25.0, 21.0, 18.0, 24.0, 33.0, 33.0, 39.0, 36.0, 38.0, 46.0, 47.0, 42.0, 47.0, 54.0, 33.0, 32.0, 39.0, 40.0, 32.0, 21.0, 22.0, 30.0, 27.0, 19.0, 18.0, 15.0, 5.0, 11.0, 14.0, 8.0, 5.0, 5.0, 5.0, 6.0, 3.0, 5.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-113.52316284179688, -109.79229736328125, -106.0614242553711, -102.33055877685547, -98.59968566894531, -94.86882019042969, -91.13795471191406, -87.40708923339844, -83.67621612548828, -79.94535064697266, -76.2144775390625, -72.48361206054688, -68.75274658203125, -65.0218734741211, -61.29100799560547, -57.56013870239258, -53.82926940917969, -50.0984001159668, -46.367530822753906, -42.63666534423828, -38.90579605102539, -35.1749267578125, -31.444059371948242, -27.713191986083984, -23.982322692871094, -20.251453399658203, -16.520586013793945, -12.789717674255371, -9.058849334716797, -5.327980041503906, -1.5971126556396484, 2.1337547302246094, 5.8646240234375, 9.595492362976074, 13.326360702514648, 17.057228088378906, 20.788097381591797, 24.518966674804688, 28.249834060668945, 31.980701446533203, 35.711570739746094, 39.442440032958984, 43.173309326171875, 46.9041748046875, 50.63504409790039, 54.36591339111328, 58.096778869628906, 61.8276481628418, 65.55851745605469, 69.28938293457031, 73.02025604248047, 76.7511215209961, 80.48199462890625, 84.21286010742188, 87.9437255859375, 91.67459106445312, 95.40546417236328, 99.1363296508789, 102.86720275878906, 106.59806823730469, 110.32893371582031, 114.05980682373047, 117.7906723022461, 121.52154541015625, 125.25241088867188]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 6.0, 4.0, 10.0, 3.0, 8.0, 8.0, 14.0, 16.0, 18.0, 14.0, 20.0, 20.0, 21.0, 27.0, 25.0, 39.0, 34.0, 32.0, 49.0, 51.0, 36.0, 39.0, 34.0, 47.0, 43.0, 44.0, 39.0, 32.0, 33.0, 22.0, 31.0, 32.0, 20.0, 14.0, 17.0, 14.0, 19.0, 19.0, 10.0, 7.0, 13.0, 7.0, 5.0, 6.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-15.046875, -14.579833984375, -14.11279296875, -13.645751953125, -13.1787109375, -12.711669921875, -12.24462890625, -11.777587890625, -11.310546875, -10.843505859375, -10.37646484375, -9.909423828125, -9.4423828125, -8.975341796875, -8.50830078125, -8.041259765625, -7.57421875, -7.107177734375, -6.64013671875, -6.173095703125, -5.7060546875, -5.239013671875, -4.77197265625, -4.304931640625, -3.837890625, -3.370849609375, -2.90380859375, -2.436767578125, -1.9697265625, -1.502685546875, -1.03564453125, -0.568603515625, -0.1015625, 0.365478515625, 0.83251953125, 1.299560546875, 1.7666015625, 2.233642578125, 2.70068359375, 3.167724609375, 3.634765625, 4.101806640625, 4.56884765625, 5.035888671875, 5.5029296875, 5.969970703125, 6.43701171875, 6.904052734375, 7.37109375, 7.838134765625, 8.30517578125, 8.772216796875, 9.2392578125, 9.706298828125, 10.17333984375, 10.640380859375, 11.107421875, 11.574462890625, 12.04150390625, 12.508544921875, 12.9755859375, 13.442626953125, 13.90966796875, 14.376708984375, 14.84375]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [5.0, 0.0, 3.0, 1.0, 3.0, 2.0, 2.0, 8.0, 7.0, 9.0, 9.0, 8.0, 13.0, 9.0, 17.0, 18.0, 31.0, 27.0, 34.0, 50.0, 50.0, 84.0, 111.0, 165.0, 279.0, 505.0, 1383.0, 7863.0, 163736.0, 2728247.0, 1243195.0, 42947.0, 3503.0, 800.0, 359.0, 207.0, 152.0, 95.0, 86.0, 48.0, 39.0, 27.0, 28.0, 29.0, 25.0, 18.0, 5.0, 12.0, 13.0, 12.0, 6.0, 4.0, 1.0, 5.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-38.125, -36.845703125, -35.56640625, -34.287109375, -33.0078125, -31.728515625, -30.44921875, -29.169921875, -27.890625, -26.611328125, -25.33203125, -24.052734375, -22.7734375, -21.494140625, -20.21484375, -18.935546875, -17.65625, -16.376953125, -15.09765625, -13.818359375, -12.5390625, -11.259765625, -9.98046875, -8.701171875, -7.421875, -6.142578125, -4.86328125, -3.583984375, -2.3046875, -1.025390625, 0.25390625, 1.533203125, 2.8125, 4.091796875, 5.37109375, 6.650390625, 7.9296875, 9.208984375, 10.48828125, 11.767578125, 13.046875, 14.326171875, 15.60546875, 16.884765625, 18.1640625, 19.443359375, 20.72265625, 22.001953125, 23.28125, 24.560546875, 25.83984375, 27.119140625, 28.3984375, 29.677734375, 30.95703125, 32.236328125, 33.515625, 34.794921875, 36.07421875, 37.353515625, 38.6328125, 39.912109375, 41.19140625, 42.470703125, 43.75]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 2.0, 2.0, 6.0, 6.0, 6.0, 8.0, 12.0, 33.0, 32.0, 41.0, 61.0, 98.0, 143.0, 187.0, 287.0, 416.0, 578.0, 628.0, 499.0, 342.0, 242.0, 165.0, 103.0, 60.0, 38.0, 32.0, 19.0, 9.0, 7.0, 9.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-35.625, -34.64013671875, -33.6552734375, -32.67041015625, -31.685546875, -30.70068359375, -29.7158203125, -28.73095703125, -27.74609375, -26.76123046875, -25.7763671875, -24.79150390625, -23.806640625, -22.82177734375, -21.8369140625, -20.85205078125, -19.8671875, -18.88232421875, -17.8974609375, -16.91259765625, -15.927734375, -14.94287109375, -13.9580078125, -12.97314453125, -11.98828125, -11.00341796875, -10.0185546875, -9.03369140625, -8.048828125, -7.06396484375, -6.0791015625, -5.09423828125, -4.109375, -3.12451171875, -2.1396484375, -1.15478515625, -0.169921875, 0.81494140625, 1.7998046875, 2.78466796875, 3.76953125, 4.75439453125, 5.7392578125, 6.72412109375, 7.708984375, 8.69384765625, 9.6787109375, 10.66357421875, 11.6484375, 12.63330078125, 13.6181640625, 14.60302734375, 15.587890625, 16.57275390625, 17.5576171875, 18.54248046875, 19.52734375, 20.51220703125, 21.4970703125, 22.48193359375, 23.466796875, 24.45166015625, 25.4365234375, 26.42138671875, 27.40625]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 3.0, 6.0, 7.0, 12.0, 13.0, 14.0, 20.0, 22.0, 26.0, 48.0, 43.0, 72.0, 109.0, 125.0, 213.0, 315.0, 657.0, 2239.0, 58287.0, 4067723.0, 60332.0, 2269.0, 609.0, 303.0, 183.0, 163.0, 108.0, 77.0, 68.0, 43.0, 37.0, 36.0, 26.0, 26.0, 11.0, 11.0, 5.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0], "bins": [-107.4375, -104.02734375, -100.6171875, -97.20703125, -93.796875, -90.38671875, -86.9765625, -83.56640625, -80.15625, -76.74609375, -73.3359375, -69.92578125, -66.515625, -63.10546875, -59.6953125, -56.28515625, -52.875, -49.46484375, -46.0546875, -42.64453125, -39.234375, -35.82421875, -32.4140625, -29.00390625, -25.59375, -22.18359375, -18.7734375, -15.36328125, -11.953125, -8.54296875, -5.1328125, -1.72265625, 1.6875, 5.09765625, 8.5078125, 11.91796875, 15.328125, 18.73828125, 22.1484375, 25.55859375, 28.96875, 32.37890625, 35.7890625, 39.19921875, 42.609375, 46.01953125, 49.4296875, 52.83984375, 56.25, 59.66015625, 63.0703125, 66.48046875, 69.890625, 73.30078125, 76.7109375, 80.12109375, 83.53125, 86.94140625, 90.3515625, 93.76171875, 97.171875, 100.58203125, 103.9921875, 107.40234375, 110.8125]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 26.0, 462.0, 500.0, 28.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-705.7628173828125, -687.2308959960938, -668.698974609375, -650.1670532226562, -631.6351318359375, -613.1032104492188, -594.5712890625, -576.039306640625, -557.5074462890625, -538.9755249023438, -520.443603515625, -501.91168212890625, -483.3797607421875, -464.84783935546875, -446.3158874511719, -427.7839660644531, -409.25201416015625, -390.7200927734375, -372.18817138671875, -353.65625, -335.12432861328125, -316.5924072265625, -298.0604553222656, -279.5285339355469, -260.9966125488281, -242.46469116210938, -223.93276977539062, -205.4008331298828, -186.86891174316406, -168.3369903564453, -149.8050537109375, -131.27313232421875, -112.74114990234375, -94.209228515625, -75.67729949951172, -57.1453742980957, -38.61344909667969, -20.081527709960938, -1.5495986938476562, 16.982330322265625, 35.514251708984375, 54.04617691040039, 72.5781021118164, 91.11003112792969, 109.64195251464844, 128.1738739013672, 146.705810546875, 165.23773193359375, 183.7696533203125, 202.30157470703125, 220.83349609375, 239.3654327392578, 257.8973388671875, 276.42926025390625, 294.9612121582031, 313.4931335449219, 332.0250549316406, 350.5569763183594, 369.0888977050781, 387.6208190917969, 406.15277099609375, 424.6846923828125, 443.21661376953125, 461.74853515625, 480.28045654296875]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 2.0, 4.0, 7.0, 7.0, 4.0, 9.0, 11.0, 6.0, 20.0, 13.0, 13.0, 21.0, 21.0, 20.0, 20.0, 32.0, 26.0, 24.0, 42.0, 43.0, 39.0, 42.0, 41.0, 40.0, 34.0, 39.0, 43.0, 33.0, 39.0, 34.0, 30.0, 41.0, 29.0, 17.0, 26.0, 32.0, 11.0, 18.0, 18.0, 16.0, 9.0, 10.0, 4.0, 4.0, 5.0, 3.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-85.25485229492188, -82.76126861572266, -80.26768493652344, -77.77409362792969, -75.28050994873047, -72.78692626953125, -70.29334259033203, -67.79975891113281, -65.30616760253906, -62.812583923339844, -60.31899642944336, -57.82541275024414, -55.331825256347656, -52.83824157714844, -50.34465789794922, -47.85107421875, -45.35749053955078, -42.86390686035156, -40.37031936645508, -37.87673568725586, -35.383148193359375, -32.889564514160156, -30.395980834960938, -27.902395248413086, -25.408809661865234, -22.915224075317383, -20.42163848876953, -17.928054809570312, -15.434469223022461, -12.94088363647461, -10.447299003601074, -7.953714370727539, -5.4601287841796875, -2.966543674468994, -0.4729585647583008, 2.0206265449523926, 4.514211654663086, 7.0077972412109375, 9.501381874084473, 11.994966506958008, 14.48855209350586, 16.98213768005371, 19.475723266601562, 21.96930694580078, 24.462892532348633, 26.956478118896484, 29.450061798095703, 31.943647384643555, 34.437232971191406, 36.930816650390625, 39.42440414428711, 41.91798782348633, 44.41157531738281, 46.90515899658203, 49.39874267578125, 51.89232635498047, 54.38591384887695, 56.87949752807617, 59.373085021972656, 61.866668701171875, 64.3602523803711, 66.85383605957031, 69.34742736816406, 71.84101104736328, 74.3345947265625]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 7.0, 5.0, 6.0, 9.0, 14.0, 12.0, 19.0, 18.0, 16.0, 18.0, 28.0, 22.0, 31.0, 41.0, 39.0, 48.0, 60.0, 51.0, 43.0, 45.0, 47.0, 50.0, 42.0, 53.0, 37.0, 29.0, 40.0, 26.0, 27.0, 20.0, 21.0, 16.0, 18.0, 12.0, 13.0, 8.0, 5.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.515625, -16.95458984375, -16.3935546875, -15.83251953125, -15.271484375, -14.71044921875, -14.1494140625, -13.58837890625, -13.02734375, -12.46630859375, -11.9052734375, -11.34423828125, -10.783203125, -10.22216796875, -9.6611328125, -9.10009765625, -8.5390625, -7.97802734375, -7.4169921875, -6.85595703125, -6.294921875, -5.73388671875, -5.1728515625, -4.61181640625, -4.05078125, -3.48974609375, -2.9287109375, -2.36767578125, -1.806640625, -1.24560546875, -0.6845703125, -0.12353515625, 0.4375, 0.99853515625, 1.5595703125, 2.12060546875, 2.681640625, 3.24267578125, 3.8037109375, 4.36474609375, 4.92578125, 5.48681640625, 6.0478515625, 6.60888671875, 7.169921875, 7.73095703125, 8.2919921875, 8.85302734375, 9.4140625, 9.97509765625, 10.5361328125, 11.09716796875, 11.658203125, 12.21923828125, 12.7802734375, 13.34130859375, 13.90234375, 14.46337890625, 15.0244140625, 15.58544921875, 16.146484375, 16.70751953125, 17.2685546875, 17.82958984375, 18.390625]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 12.0, 14.0, 12.0, 26.0, 29.0, 51.0, 90.0, 138.0, 218.0, 374.0, 590.0, 982.0, 1610.0, 2680.0, 4675.0, 7759.0, 13201.0, 22280.0, 38258.0, 65577.0, 116072.0, 203380.0, 235035.0, 142815.0, 80378.0, 46038.0, 27056.0, 15873.0, 9286.0, 5607.0, 3366.0, 2003.0, 1238.0, 700.0, 443.0, 257.0, 183.0, 102.0, 58.0, 33.0, 30.0, 11.0, 10.0, 1.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.013671875, -1.9510955810546875, -1.888519287109375, -1.8259429931640625, -1.76336669921875, -1.7007904052734375, -1.638214111328125, -1.5756378173828125, -1.5130615234375, -1.4504852294921875, -1.387908935546875, -1.3253326416015625, -1.26275634765625, -1.2001800537109375, -1.137603759765625, -1.0750274658203125, -1.012451171875, -0.9498748779296875, -0.887298583984375, -0.8247222900390625, -0.76214599609375, -0.6995697021484375, -0.636993408203125, -0.5744171142578125, -0.5118408203125, -0.4492645263671875, -0.386688232421875, -0.3241119384765625, -0.26153564453125, -0.1989593505859375, -0.136383056640625, -0.0738067626953125, -0.01123046875, 0.0513458251953125, 0.113922119140625, 0.1764984130859375, 0.23907470703125, 0.3016510009765625, 0.364227294921875, 0.4268035888671875, 0.4893798828125, 0.5519561767578125, 0.614532470703125, 0.6771087646484375, 0.73968505859375, 0.8022613525390625, 0.864837646484375, 0.9274139404296875, 0.989990234375, 1.0525665283203125, 1.115142822265625, 1.1777191162109375, 1.24029541015625, 1.3028717041015625, 1.365447998046875, 1.4280242919921875, 1.4906005859375, 1.5531768798828125, 1.615753173828125, 1.6783294677734375, 1.74090576171875, 1.8034820556640625, 1.866058349609375, 1.9286346435546875, 1.9912109375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 6.0, 4.0, 7.0, 8.0, 5.0, 9.0, 14.0, 17.0, 22.0, 19.0, 27.0, 30.0, 29.0, 42.0, 41.0, 39.0, 49.0, 49.0, 42.0, 56.0, 1073.0, 45.0, 37.0, 38.0, 46.0, 38.0, 32.0, 24.0, 25.0, 23.0, 30.0, 24.0, 26.0, 16.0, 10.0, 6.0, 6.0, 6.0, 6.0, 3.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.6953125, -10.3193359375, -9.943359375, -9.5673828125, -9.19140625, -8.8154296875, -8.439453125, -8.0634765625, -7.6875, -7.3115234375, -6.935546875, -6.5595703125, -6.18359375, -5.8076171875, -5.431640625, -5.0556640625, -4.6796875, -4.3037109375, -3.927734375, -3.5517578125, -3.17578125, -2.7998046875, -2.423828125, -2.0478515625, -1.671875, -1.2958984375, -0.919921875, -0.5439453125, -0.16796875, 0.2080078125, 0.583984375, 0.9599609375, 1.3359375, 1.7119140625, 2.087890625, 2.4638671875, 2.83984375, 3.2158203125, 3.591796875, 3.9677734375, 4.34375, 4.7197265625, 5.095703125, 5.4716796875, 5.84765625, 6.2236328125, 6.599609375, 6.9755859375, 7.3515625, 7.7275390625, 8.103515625, 8.4794921875, 8.85546875, 9.2314453125, 9.607421875, 9.9833984375, 10.359375, 10.7353515625, 11.111328125, 11.4873046875, 11.86328125, 12.2392578125, 12.615234375, 12.9912109375, 13.3671875]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 9.0, 4.0, 11.0, 24.0, 25.0, 34.0, 67.0, 106.0, 157.0, 288.0, 460.0, 742.0, 1250.0, 2003.0, 3390.0, 5574.0, 9516.0, 16154.0, 27514.0, 47523.0, 83003.0, 146358.0, 1287623.0, 197822.0, 112953.0, 64167.0, 37146.0, 21821.0, 12541.0, 7502.0, 4434.0, 2698.0, 1663.0, 964.0, 612.0, 368.0, 224.0, 148.0, 93.0, 57.0, 34.0, 24.0, 13.0, 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.05078125, -1.9882049560546875, -1.925628662109375, -1.8630523681640625, -1.80047607421875, -1.7378997802734375, -1.675323486328125, -1.6127471923828125, -1.5501708984375, -1.4875946044921875, -1.425018310546875, -1.3624420166015625, -1.29986572265625, -1.2372894287109375, -1.174713134765625, -1.1121368408203125, -1.049560546875, -0.9869842529296875, -0.924407958984375, -0.8618316650390625, -0.79925537109375, -0.7366790771484375, -0.674102783203125, -0.6115264892578125, -0.5489501953125, -0.4863739013671875, -0.423797607421875, -0.3612213134765625, -0.29864501953125, -0.2360687255859375, -0.173492431640625, -0.1109161376953125, -0.04833984375, 0.0142364501953125, 0.076812744140625, 0.1393890380859375, 0.20196533203125, 0.2645416259765625, 0.327117919921875, 0.3896942138671875, 0.4522705078125, 0.5148468017578125, 0.577423095703125, 0.6399993896484375, 0.70257568359375, 0.7651519775390625, 0.827728271484375, 0.8903045654296875, 0.952880859375, 1.0154571533203125, 1.078033447265625, 1.1406097412109375, 1.20318603515625, 1.2657623291015625, 1.328338623046875, 1.3909149169921875, 1.4534912109375, 1.5160675048828125, 1.578643798828125, 1.6412200927734375, 1.70379638671875, 1.7663726806640625, 1.828948974609375, 1.8915252685546875, 1.9541015625]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 2.0, 1.0, 5.0, 3.0, 11.0, 17.0, 14.0, 23.0, 27.0, 39.0, 37.0, 43.0, 78.0, 63.0, 87.0, 86.0, 66.0, 80.0, 52.0, 57.0, 44.0, 26.0, 37.0, 19.0, 11.0, 13.0, 12.0, 9.0, 6.0, 11.0, 5.0, 4.0, 4.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.003604888916015625, -0.0034918487071990967, -0.0033788084983825684, -0.00326576828956604, -0.0031527280807495117, -0.0030396878719329834, -0.002926647663116455, -0.0028136074542999268, -0.0027005672454833984, -0.00258752703666687, -0.002474486827850342, -0.0023614466190338135, -0.002248406410217285, -0.002135366201400757, -0.0020223259925842285, -0.0019092857837677002, -0.0017962455749511719, -0.0016832053661346436, -0.0015701651573181152, -0.001457124948501587, -0.0013440847396850586, -0.0012310445308685303, -0.001118004322052002, -0.0010049641132354736, -0.0008919239044189453, -0.000778883695602417, -0.0006658434867858887, -0.0005528032779693604, -0.00043976306915283203, -0.0003267228603363037, -0.0002136826515197754, -0.00010064244270324707, 1.239776611328125e-05, 0.00012543797492980957, 0.0002384781837463379, 0.0003515183925628662, 0.00046455860137939453, 0.0005775988101959229, 0.0006906390190124512, 0.0008036792278289795, 0.0009167194366455078, 0.0010297596454620361, 0.0011427998542785645, 0.0012558400630950928, 0.001368880271911621, 0.0014819204807281494, 0.0015949606895446777, 0.001708000898361206, 0.0018210411071777344, 0.0019340813159942627, 0.002047121524810791, 0.0021601617336273193, 0.0022732019424438477, 0.002386242151260376, 0.0024992823600769043, 0.0026123225688934326, 0.002725362777709961, 0.0028384029865264893, 0.0029514431953430176, 0.003064483404159546, 0.0031775236129760742, 0.0032905638217926025, 0.003403604030609131, 0.003516644239425659, 0.0036296844482421875]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 5.0, 1.0, 6.0, 5.0, 2.0, 7.0, 8.0, 11.0, 9.0, 14.0, 17.0, 27.0, 42.0, 68.0, 58.0, 88.0, 129.0, 321.0, 762.0, 64108.0, 980605.0, 1221.0, 389.0, 203.0, 121.0, 89.0, 66.0, 40.0, 30.0, 24.0, 18.0, 19.0, 11.0, 9.0, 3.0, 0.0, 1.0, 6.0, 5.0, 3.0, 5.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.07977294921875, -0.07729148864746094, -0.07481002807617188, -0.07232856750488281, -0.06984710693359375, -0.06736564636230469, -0.06488418579101562, -0.06240272521972656, -0.0599212646484375, -0.05743980407714844, -0.054958343505859375, -0.05247688293457031, -0.04999542236328125, -0.04751396179199219, -0.045032501220703125, -0.04255104064941406, -0.040069580078125, -0.03758811950683594, -0.035106658935546875, -0.03262519836425781, -0.03014373779296875, -0.027662277221679688, -0.025180816650390625, -0.022699356079101562, -0.0202178955078125, -0.017736434936523438, -0.015254974365234375, -0.012773513793945312, -0.01029205322265625, -0.0078105926513671875, -0.005329132080078125, -0.0028476715087890625, -0.0003662109375, 0.0021152496337890625, 0.004596710205078125, 0.0070781707763671875, 0.00955963134765625, 0.012041091918945312, 0.014522552490234375, 0.017004013061523438, 0.0194854736328125, 0.021966934204101562, 0.024448394775390625, 0.026929855346679688, 0.02941131591796875, 0.03189277648925781, 0.034374237060546875, 0.03685569763183594, 0.039337158203125, 0.04181861877441406, 0.044300079345703125, 0.04678153991699219, 0.04926300048828125, 0.05174446105957031, 0.054225921630859375, 0.05670738220214844, 0.0591888427734375, 0.06167030334472656, 0.06415176391601562, 0.06663322448730469, 0.06911468505859375, 0.07159614562988281, 0.07407760620117188, 0.07655906677246094, 0.07904052734375]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 24.0, 99.0, 311.0, 391.0, 154.0, 23.0, 7.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.007985332980751991, -0.00782609824091196, -0.007666863966733217, -0.007507629226893187, -0.007348394487053156, -0.0071891602128744125, -0.007029925473034382, -0.006870690733194351, -0.0067114559933543205, -0.00655222125351429, -0.0063929869793355465, -0.006233752239495516, -0.006074517499655485, -0.005915283225476742, -0.005756048485636711, -0.0055968137457966805, -0.005437579471617937, -0.005278344731777906, -0.005119110457599163, -0.004959875717759132, -0.004800640977919102, -0.004641406238079071, -0.004482171963900328, -0.004322937224060297, -0.004163702949881554, -0.004004468210041523, -0.003845233703032136, -0.003685999196022749, -0.0035267644561827183, -0.0033675299491733313, -0.0032082954421639442, -0.0030490607023239136, -0.002889825962483883, -0.002730591455474496, -0.0025713567156344652, -0.002412122208625078, -0.0022528874687850475, -0.0020936529617756605, -0.0019344184547662735, -0.0017751838313415647, -0.0016159492079168558, -0.001456714584492147, -0.0012974799610674381, -0.0011382454540580511, -0.0009790108306333423, -0.0008197762072086334, -0.0006605416419915855, -0.0005013070767745376, -0.0003420724533498287, -0.00018283785902895033, -2.3603264708071947e-05, 0.00013563132961280644, 0.0002948659239336848, 0.00045410054735839367, 0.0006133351125754416, 0.0007725696777924895, 0.0009318043012171984, 0.0010910389246419072, 0.001250273548066616, 0.001409508055076003, 0.001568742678500712, 0.0017279773019254208, 0.0018872118089348078, 0.002046446315944195, 0.0022056810557842255]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 0.0, 3.0, 1.0, 8.0, 11.0, 8.0, 11.0, 15.0, 11.0, 10.0, 16.0, 32.0, 26.0, 28.0, 22.0, 28.0, 36.0, 40.0, 39.0, 24.0, 32.0, 43.0, 36.0, 28.0, 36.0, 50.0, 30.0, 32.0, 28.0, 33.0, 40.0, 28.0, 22.0, 18.0, 23.0, 18.0, 18.0, 13.0, 19.0, 17.0, 11.0, 12.0, 5.0, 9.0, 10.0, 6.0, 5.0, 7.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 3.0], "bins": [-0.0013865232467651367, -0.0013429252430796623, -0.001299327239394188, -0.0012557292357087135, -0.0012121312320232391, -0.0011685332283377647, -0.0011249352246522903, -0.001081337220966816, -0.0010377392172813416, -0.0009941412135958672, -0.0009505432099103928, -0.0009069452062249184, -0.000863347202539444, -0.0008197491988539696, -0.0007761511951684952, -0.0007325531914830208, -0.0006889551877975464, -0.000645357184112072, -0.0006017591804265976, -0.0005581611767411232, -0.0005145631730556488, -0.0004709651693701744, -0.0004273671656847, -0.0003837691619992256, -0.0003401711583137512, -0.0002965731546282768, -0.00025297515094280243, -0.00020937714725732803, -0.00016577914357185364, -0.00012218113988637924, -7.858313620090485e-05, -3.498513251543045e-05, 8.612871170043945e-06, 5.221087485551834e-05, 9.580887854099274e-05, 0.00013940688222646713, 0.00018300488591194153, 0.00022660288959741592, 0.0002702008932828903, 0.0003137988969683647, 0.0003573969006538391, 0.0004009949043393135, 0.0004445929080247879, 0.0004881909117102623, 0.0005317889153957367, 0.0005753869190812111, 0.0006189849227666855, 0.0006625829264521599, 0.0007061809301376343, 0.0007497789338231087, 0.0007933769375085831, 0.0008369749411940575, 0.0008805729448795319, 0.0009241709485650063, 0.0009677689522504807, 0.001011366955935955, 0.0010549649596214294, 0.0010985629633069038, 0.0011421609669923782, 0.0011857589706778526, 0.001229356974363327, 0.0012729549780488014, 0.0013165529817342758, 0.0013601509854197502, 0.0014037489891052246]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 7.0, 5.0, 6.0, 9.0, 14.0, 12.0, 19.0, 18.0, 16.0, 18.0, 28.0, 22.0, 31.0, 41.0, 39.0, 48.0, 60.0, 51.0, 43.0, 45.0, 47.0, 50.0, 42.0, 53.0, 37.0, 29.0, 40.0, 26.0, 27.0, 20.0, 21.0, 16.0, 18.0, 12.0, 13.0, 8.0, 5.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.515625, -16.95458984375, -16.3935546875, -15.83251953125, -15.271484375, -14.71044921875, -14.1494140625, -13.58837890625, -13.02734375, -12.46630859375, -11.9052734375, -11.34423828125, -10.783203125, -10.22216796875, -9.6611328125, -9.10009765625, -8.5390625, -7.97802734375, -7.4169921875, -6.85595703125, -6.294921875, -5.73388671875, -5.1728515625, -4.61181640625, -4.05078125, -3.48974609375, -2.9287109375, -2.36767578125, -1.806640625, -1.24560546875, -0.6845703125, -0.12353515625, 0.4375, 0.99853515625, 1.5595703125, 2.12060546875, 2.681640625, 3.24267578125, 3.8037109375, 4.36474609375, 4.92578125, 5.48681640625, 6.0478515625, 6.60888671875, 7.169921875, 7.73095703125, 8.2919921875, 8.85302734375, 9.4140625, 9.97509765625, 10.5361328125, 11.09716796875, 11.658203125, 12.21923828125, 12.7802734375, 13.34130859375, 13.90234375, 14.46337890625, 15.0244140625, 15.58544921875, 16.146484375, 16.70751953125, 17.2685546875, 17.82958984375, 18.390625]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 6.0, 11.0, 9.0, 11.0, 24.0, 25.0, 24.0, 47.0, 46.0, 58.0, 90.0, 120.0, 183.0, 293.0, 404.0, 555.0, 829.0, 1982.0, 11593.0, 161240.0, 829147.0, 34214.0, 4248.0, 1189.0, 625.0, 438.0, 322.0, 243.0, 161.0, 108.0, 83.0, 55.0, 49.0, 43.0, 23.0, 20.0, 14.0, 9.0, 6.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-44.40625, -42.98486328125, -41.5634765625, -40.14208984375, -38.720703125, -37.29931640625, -35.8779296875, -34.45654296875, -33.03515625, -31.61376953125, -30.1923828125, -28.77099609375, -27.349609375, -25.92822265625, -24.5068359375, -23.08544921875, -21.6640625, -20.24267578125, -18.8212890625, -17.39990234375, -15.978515625, -14.55712890625, -13.1357421875, -11.71435546875, -10.29296875, -8.87158203125, -7.4501953125, -6.02880859375, -4.607421875, -3.18603515625, -1.7646484375, -0.34326171875, 1.078125, 2.49951171875, 3.9208984375, 5.34228515625, 6.763671875, 8.18505859375, 9.6064453125, 11.02783203125, 12.44921875, 13.87060546875, 15.2919921875, 16.71337890625, 18.134765625, 19.55615234375, 20.9775390625, 22.39892578125, 23.8203125, 25.24169921875, 26.6630859375, 28.08447265625, 29.505859375, 30.92724609375, 32.3486328125, 33.77001953125, 35.19140625, 36.61279296875, 38.0341796875, 39.45556640625, 40.876953125, 42.29833984375, 43.7197265625, 45.14111328125, 46.5625]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 6.0, 2.0, 4.0, 12.0, 7.0, 11.0, 9.0, 12.0, 17.0, 19.0, 23.0, 34.0, 35.0, 32.0, 35.0, 39.0, 42.0, 34.0, 44.0, 95.0, 379.0, 1606.0, 119.0, 60.0, 45.0, 49.0, 43.0, 38.0, 30.0, 28.0, 20.0, 16.0, 17.0, 16.0, 15.0, 15.0, 13.0, 10.0, 10.0, 6.0, 5.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-52.53125, -50.99072265625, -49.4501953125, -47.90966796875, -46.369140625, -44.82861328125, -43.2880859375, -41.74755859375, -40.20703125, -38.66650390625, -37.1259765625, -35.58544921875, -34.044921875, -32.50439453125, -30.9638671875, -29.42333984375, -27.8828125, -26.34228515625, -24.8017578125, -23.26123046875, -21.720703125, -20.18017578125, -18.6396484375, -17.09912109375, -15.55859375, -14.01806640625, -12.4775390625, -10.93701171875, -9.396484375, -7.85595703125, -6.3154296875, -4.77490234375, -3.234375, -1.69384765625, -0.1533203125, 1.38720703125, 2.927734375, 4.46826171875, 6.0087890625, 7.54931640625, 9.08984375, 10.63037109375, 12.1708984375, 13.71142578125, 15.251953125, 16.79248046875, 18.3330078125, 19.87353515625, 21.4140625, 22.95458984375, 24.4951171875, 26.03564453125, 27.576171875, 29.11669921875, 30.6572265625, 32.19775390625, 33.73828125, 35.27880859375, 36.8193359375, 38.35986328125, 39.900390625, 41.44091796875, 42.9814453125, 44.52197265625, 46.0625]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 7.0, 4.0, 7.0, 14.0, 14.0, 21.0, 13.0, 19.0, 18.0, 24.0, 22.0, 30.0, 32.0, 60.0, 82.0, 138.0, 211.0, 453.0, 2120.0, 2468107.0, 671462.0, 1693.0, 473.0, 183.0, 115.0, 78.0, 55.0, 45.0, 45.0, 24.0, 32.0, 23.0, 14.0, 11.0, 12.0, 13.0, 5.0, 12.0, 8.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-127.75, -123.486328125, -119.22265625, -114.958984375, -110.6953125, -106.431640625, -102.16796875, -97.904296875, -93.640625, -89.376953125, -85.11328125, -80.849609375, -76.5859375, -72.322265625, -68.05859375, -63.794921875, -59.53125, -55.267578125, -51.00390625, -46.740234375, -42.4765625, -38.212890625, -33.94921875, -29.685546875, -25.421875, -21.158203125, -16.89453125, -12.630859375, -8.3671875, -4.103515625, 0.16015625, 4.423828125, 8.6875, 12.951171875, 17.21484375, 21.478515625, 25.7421875, 30.005859375, 34.26953125, 38.533203125, 42.796875, 47.060546875, 51.32421875, 55.587890625, 59.8515625, 64.115234375, 68.37890625, 72.642578125, 76.90625, 81.169921875, 85.43359375, 89.697265625, 93.9609375, 98.224609375, 102.48828125, 106.751953125, 111.015625, 115.279296875, 119.54296875, 123.806640625, 128.0703125, 132.333984375, 136.59765625, 140.861328125, 145.125]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 44.0, 595.0, 356.0, 23.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.111148834228516, -27.849672317504883, -20.58819580078125, -13.32672119140625, -6.065244674682617, 1.1962318420410156, 8.457706451416016, 15.719184875488281, 22.98065948486328, 30.242136001586914, 37.50361251831055, 44.76508712768555, 52.02656555175781, 59.28804016113281, 66.54951477050781, 73.81098937988281, 81.07246398925781, 88.33393859863281, 95.59541320800781, 102.85688781738281, 110.11837005615234, 117.37984466552734, 124.64131927490234, 131.90280151367188, 139.16427612304688, 146.42575073242188, 153.68722534179688, 160.94869995117188, 168.21017456054688, 175.47164916992188, 182.73312377929688, 189.99461364746094, 197.25608825683594, 204.51756286621094, 211.77903747558594, 219.04051208496094, 226.30198669433594, 233.5634765625, 240.824951171875, 248.08642578125, 255.347900390625, 262.609375, 269.870849609375, 277.13232421875, 284.393798828125, 291.6552734375, 298.916748046875, 306.17822265625, 313.439697265625, 320.701171875, 327.962646484375, 335.22412109375, 342.485595703125, 349.7470703125, 357.008544921875, 364.27001953125, 371.5315246582031, 378.7929992675781, 386.0544738769531, 393.3159484863281, 400.5774230957031, 407.8388977050781, 415.1003723144531, 422.3618469238281, 429.6233215332031]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 0.0, 4.0, 3.0, 2.0, 5.0, 9.0, 16.0, 13.0, 13.0, 12.0, 20.0, 16.0, 27.0, 16.0, 22.0, 27.0, 26.0, 22.0, 30.0, 27.0, 30.0, 35.0, 44.0, 36.0, 40.0, 31.0, 31.0, 41.0, 40.0, 42.0, 27.0, 26.0, 33.0, 29.0, 29.0, 29.0, 28.0, 20.0, 17.0, 17.0, 8.0, 16.0, 9.0, 4.0, 10.0, 6.0, 3.0, 9.0, 3.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0], "bins": [-105.90666198730469, -102.45897674560547, -99.01129913330078, -95.56361389160156, -92.11593627929688, -88.66825103759766, -85.22056579589844, -81.77288818359375, -78.32521057128906, -74.87752532958984, -71.42984771728516, -67.98216247558594, -64.53448486328125, -61.08679962158203, -57.63911819458008, -54.191436767578125, -50.743751525878906, -47.29607009887695, -43.848388671875, -40.40070343017578, -36.953025817871094, -33.505340576171875, -30.057659149169922, -26.60997772216797, -23.162296295166016, -19.714614868164062, -16.26693344116211, -12.819250106811523, -9.37156867980957, -5.923887252807617, -2.4762039184570312, 0.9714775085449219, 4.419158935546875, 7.866840839385986, 11.314522743225098, 14.762205123901367, 18.20988655090332, 21.657567977905273, 25.10525131225586, 28.552932739257812, 32.000614166259766, 35.44829559326172, 38.89597702026367, 42.343658447265625, 45.791343688964844, 49.23902130126953, 52.68670654296875, 56.1343879699707, 59.582069396972656, 63.02975082397461, 66.47743225097656, 69.92511749267578, 73.37279510498047, 76.82048034667969, 80.26815795898438, 83.7158432006836, 87.16352844238281, 90.61121368408203, 94.05889129638672, 97.50657653808594, 100.95425415039062, 104.40193939208984, 107.84962463378906, 111.29730224609375, 114.74497985839844]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 12.0, 16.0, 9.0, 14.0, 18.0, 17.0, 23.0, 33.0, 18.0, 30.0, 33.0, 53.0, 41.0, 49.0, 50.0, 51.0, 44.0, 60.0, 50.0, 33.0, 47.0, 42.0, 34.0, 43.0, 31.0, 25.0, 23.0, 16.0, 23.0, 9.0, 14.0, 13.0, 5.0, 6.0, 1.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.203125, -18.6259765625, -18.048828125, -17.4716796875, -16.89453125, -16.3173828125, -15.740234375, -15.1630859375, -14.5859375, -14.0087890625, -13.431640625, -12.8544921875, -12.27734375, -11.7001953125, -11.123046875, -10.5458984375, -9.96875, -9.3916015625, -8.814453125, -8.2373046875, -7.66015625, -7.0830078125, -6.505859375, -5.9287109375, -5.3515625, -4.7744140625, -4.197265625, -3.6201171875, -3.04296875, -2.4658203125, -1.888671875, -1.3115234375, -0.734375, -0.1572265625, 0.419921875, 0.9970703125, 1.57421875, 2.1513671875, 2.728515625, 3.3056640625, 3.8828125, 4.4599609375, 5.037109375, 5.6142578125, 6.19140625, 6.7685546875, 7.345703125, 7.9228515625, 8.5, 9.0771484375, 9.654296875, 10.2314453125, 10.80859375, 11.3857421875, 11.962890625, 12.5400390625, 13.1171875, 13.6943359375, 14.271484375, 14.8486328125, 15.42578125, 16.0029296875, 16.580078125, 17.1572265625, 17.734375]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 6.0, 8.0, 7.0, 5.0, 10.0, 14.0, 15.0, 26.0, 37.0, 44.0, 39.0, 52.0, 81.0, 93.0, 146.0, 168.0, 248.0, 283.0, 496.0, 873.0, 1998.0, 7635.0, 67189.0, 1127248.0, 2616715.0, 341439.0, 22095.0, 3818.0, 1314.0, 642.0, 417.0, 282.0, 196.0, 155.0, 126.0, 84.0, 72.0, 40.0, 42.0, 25.0, 31.0, 30.0, 9.0, 13.0, 8.0, 9.0, 5.0, 1.0, 2.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-34.875, -33.79443359375, -32.7138671875, -31.63330078125, -30.552734375, -29.47216796875, -28.3916015625, -27.31103515625, -26.23046875, -25.14990234375, -24.0693359375, -22.98876953125, -21.908203125, -20.82763671875, -19.7470703125, -18.66650390625, -17.5859375, -16.50537109375, -15.4248046875, -14.34423828125, -13.263671875, -12.18310546875, -11.1025390625, -10.02197265625, -8.94140625, -7.86083984375, -6.7802734375, -5.69970703125, -4.619140625, -3.53857421875, -2.4580078125, -1.37744140625, -0.296875, 0.78369140625, 1.8642578125, 2.94482421875, 4.025390625, 5.10595703125, 6.1865234375, 7.26708984375, 8.34765625, 9.42822265625, 10.5087890625, 11.58935546875, 12.669921875, 13.75048828125, 14.8310546875, 15.91162109375, 16.9921875, 18.07275390625, 19.1533203125, 20.23388671875, 21.314453125, 22.39501953125, 23.4755859375, 24.55615234375, 25.63671875, 26.71728515625, 27.7978515625, 28.87841796875, 29.958984375, 31.03955078125, 32.1201171875, 33.20068359375, 34.28125]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 5.0, 9.0, 14.0, 22.0, 42.0, 57.0, 128.0, 223.0, 346.0, 647.0, 876.0, 720.0, 434.0, 229.0, 144.0, 74.0, 41.0, 28.0, 20.0, 9.0, 5.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-54.125, -52.72314453125, -51.3212890625, -49.91943359375, -48.517578125, -47.11572265625, -45.7138671875, -44.31201171875, -42.91015625, -41.50830078125, -40.1064453125, -38.70458984375, -37.302734375, -35.90087890625, -34.4990234375, -33.09716796875, -31.6953125, -30.29345703125, -28.8916015625, -27.48974609375, -26.087890625, -24.68603515625, -23.2841796875, -21.88232421875, -20.48046875, -19.07861328125, -17.6767578125, -16.27490234375, -14.873046875, -13.47119140625, -12.0693359375, -10.66748046875, -9.265625, -7.86376953125, -6.4619140625, -5.06005859375, -3.658203125, -2.25634765625, -0.8544921875, 0.54736328125, 1.94921875, 3.35107421875, 4.7529296875, 6.15478515625, 7.556640625, 8.95849609375, 10.3603515625, 11.76220703125, 13.1640625, 14.56591796875, 15.9677734375, 17.36962890625, 18.771484375, 20.17333984375, 21.5751953125, 22.97705078125, 24.37890625, 25.78076171875, 27.1826171875, 28.58447265625, 29.986328125, 31.38818359375, 32.7900390625, 34.19189453125, 35.59375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 4.0, 5.0, 15.0, 15.0, 26.0, 30.0, 39.0, 74.0, 112.0, 175.0, 325.0, 663.0, 2159.0, 49478.0, 4107845.0, 30050.0, 1879.0, 607.0, 283.0, 153.0, 91.0, 68.0, 58.0, 35.0, 26.0, 22.0, 12.0, 7.0, 8.0, 5.0, 3.0, 3.0, 2.0, 3.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-133.625, -129.5078125, -125.390625, -121.2734375, -117.15625, -113.0390625, -108.921875, -104.8046875, -100.6875, -96.5703125, -92.453125, -88.3359375, -84.21875, -80.1015625, -75.984375, -71.8671875, -67.75, -63.6328125, -59.515625, -55.3984375, -51.28125, -47.1640625, -43.046875, -38.9296875, -34.8125, -30.6953125, -26.578125, -22.4609375, -18.34375, -14.2265625, -10.109375, -5.9921875, -1.875, 2.2421875, 6.359375, 10.4765625, 14.59375, 18.7109375, 22.828125, 26.9453125, 31.0625, 35.1796875, 39.296875, 43.4140625, 47.53125, 51.6484375, 55.765625, 59.8828125, 64.0, 68.1171875, 72.234375, 76.3515625, 80.46875, 84.5859375, 88.703125, 92.8203125, 96.9375, 101.0546875, 105.171875, 109.2890625, 113.40625, 117.5234375, 121.640625, 125.7578125, 129.875]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 37.0, 343.0, 504.0, 118.0, 8.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-424.31805419921875, -411.1553039550781, -397.9925842285156, -384.829833984375, -371.6670837402344, -358.5043640136719, -345.34161376953125, -332.17889404296875, -319.0161437988281, -305.8533935546875, -292.690673828125, -279.5279235839844, -266.36517333984375, -253.20245361328125, -240.03970336914062, -226.87696838378906, -213.71421813964844, -200.55148315429688, -187.38873291015625, -174.2259979248047, -161.06326293945312, -147.9005126953125, -134.73777770996094, -121.57504272460938, -108.41230010986328, -95.24955749511719, -82.08682250976562, -68.92407989501953, -55.7613410949707, -42.598602294921875, -29.43585968017578, -16.27312469482422, -3.110382080078125, 10.05235767364502, 23.215097427368164, 36.377838134765625, 49.54057693481445, 62.70331573486328, 75.86605834960938, 89.02879333496094, 102.19153594970703, 115.35427856445312, 128.5170135498047, 141.67974853515625, 154.84249877929688, 168.00523376464844, 181.16796875, 194.33071899414062, 207.4934539794922, 220.65618896484375, 233.81893920898438, 246.98167419433594, 260.1444091796875, 273.3071594238281, 286.46990966796875, 299.63262939453125, 312.7953796386719, 325.9581298828125, 339.120849609375, 352.2835998535156, 365.44635009765625, 378.60906982421875, 391.7718200683594, 404.9345703125, 418.0972900390625]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 6.0, 2.0, 1.0, 9.0, 6.0, 7.0, 10.0, 12.0, 12.0, 11.0, 18.0, 32.0, 19.0, 30.0, 33.0, 30.0, 41.0, 38.0, 43.0, 50.0, 45.0, 48.0, 42.0, 46.0, 52.0, 39.0, 43.0, 44.0, 35.0, 46.0, 27.0, 28.0, 23.0, 17.0, 13.0, 14.0, 8.0, 9.0, 10.0, 5.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-106.2860107421875, -103.18708801269531, -100.0881576538086, -96.9892349243164, -93.89030456542969, -90.7913818359375, -87.69245910644531, -84.59353637695312, -81.4946060180664, -78.39568328857422, -75.2967529296875, -72.19783020019531, -69.09890747070312, -65.9999771118164, -62.90105438232422, -59.802127838134766, -56.70320129394531, -53.60427474975586, -50.505348205566406, -47.40642547607422, -44.307498931884766, -41.20857238769531, -38.109649658203125, -35.01072311401367, -31.91179656982422, -28.812870025634766, -25.713945388793945, -22.615020751953125, -19.516094207763672, -16.41716766357422, -13.318243026733398, -10.219318389892578, -7.120391845703125, -4.021466255187988, -0.9225406646728516, 2.176384925842285, 5.275310516357422, 8.374236106872559, 11.473161697387695, 14.572086334228516, 17.67101287841797, 20.769939422607422, 23.868864059448242, 26.967788696289062, 30.066715240478516, 33.16564178466797, 36.264564514160156, 39.36349105834961, 42.46241760253906, 45.561344146728516, 48.66027069091797, 51.759193420410156, 54.85811996459961, 57.95704650878906, 61.05596923828125, 64.15489196777344, 67.25382232666016, 70.35274505615234, 73.45167541503906, 76.55059814453125, 79.64952087402344, 82.74845123291016, 85.84737396240234, 88.94630432128906, 92.04522705078125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 5.0, 2.0, 5.0, 4.0, 5.0, 10.0, 15.0, 3.0, 12.0, 10.0, 22.0, 17.0, 25.0, 25.0, 32.0, 32.0, 34.0, 47.0, 39.0, 32.0, 32.0, 51.0, 48.0, 41.0, 37.0, 46.0, 39.0, 52.0, 36.0, 30.0, 22.0, 29.0, 38.0, 20.0, 27.0, 14.0, 19.0, 14.0, 4.0, 10.0, 6.0, 4.0, 6.0, 2.0, 3.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.0390625, -14.5018310546875, -13.964599609375, -13.4273681640625, -12.89013671875, -12.3529052734375, -11.815673828125, -11.2784423828125, -10.7412109375, -10.2039794921875, -9.666748046875, -9.1295166015625, -8.59228515625, -8.0550537109375, -7.517822265625, -6.9805908203125, -6.443359375, -5.9061279296875, -5.368896484375, -4.8316650390625, -4.29443359375, -3.7572021484375, -3.219970703125, -2.6827392578125, -2.1455078125, -1.6082763671875, -1.071044921875, -0.5338134765625, 0.00341796875, 0.5406494140625, 1.077880859375, 1.6151123046875, 2.15234375, 2.6895751953125, 3.226806640625, 3.7640380859375, 4.30126953125, 4.8385009765625, 5.375732421875, 5.9129638671875, 6.4501953125, 6.9874267578125, 7.524658203125, 8.0618896484375, 8.59912109375, 9.1363525390625, 9.673583984375, 10.2108154296875, 10.748046875, 11.2852783203125, 11.822509765625, 12.3597412109375, 12.89697265625, 13.4342041015625, 13.971435546875, 14.5086669921875, 15.0458984375, 15.5831298828125, 16.120361328125, 16.6575927734375, 17.19482421875, 17.7320556640625, 18.269287109375, 18.8065185546875, 19.34375]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 8.0, 4.0, 15.0, 15.0, 23.0, 48.0, 56.0, 98.0, 138.0, 202.0, 280.0, 457.0, 739.0, 1114.0, 1727.0, 2794.0, 4190.0, 6734.0, 10512.0, 16966.0, 27119.0, 43475.0, 71623.0, 117262.0, 187117.0, 206074.0, 134453.0, 81857.0, 49969.0, 31130.0, 19308.0, 12110.0, 7470.0, 4955.0, 3014.0, 1913.0, 1236.0, 806.0, 540.0, 341.0, 218.0, 144.0, 98.0, 71.0, 59.0, 25.0, 17.0, 18.0, 8.0, 2.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0], "bins": [-1.80859375, -1.7523193359375, -1.696044921875, -1.6397705078125, -1.58349609375, -1.5272216796875, -1.470947265625, -1.4146728515625, -1.3583984375, -1.3021240234375, -1.245849609375, -1.1895751953125, -1.13330078125, -1.0770263671875, -1.020751953125, -0.9644775390625, -0.908203125, -0.8519287109375, -0.795654296875, -0.7393798828125, -0.68310546875, -0.6268310546875, -0.570556640625, -0.5142822265625, -0.4580078125, -0.4017333984375, -0.345458984375, -0.2891845703125, -0.23291015625, -0.1766357421875, -0.120361328125, -0.0640869140625, -0.0078125, 0.0484619140625, 0.104736328125, 0.1610107421875, 0.21728515625, 0.2735595703125, 0.329833984375, 0.3861083984375, 0.4423828125, 0.4986572265625, 0.554931640625, 0.6112060546875, 0.66748046875, 0.7237548828125, 0.780029296875, 0.8363037109375, 0.892578125, 0.9488525390625, 1.005126953125, 1.0614013671875, 1.11767578125, 1.1739501953125, 1.230224609375, 1.2864990234375, 1.3427734375, 1.3990478515625, 1.455322265625, 1.5115966796875, 1.56787109375, 1.6241455078125, 1.680419921875, 1.7366943359375, 1.79296875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 7.0, 13.0, 8.0, 14.0, 13.0, 21.0, 16.0, 20.0, 27.0, 14.0, 29.0, 29.0, 34.0, 27.0, 42.0, 33.0, 34.0, 48.0, 34.0, 1073.0, 38.0, 40.0, 38.0, 44.0, 39.0, 38.0, 32.0, 32.0, 40.0, 22.0, 18.0, 17.0, 15.0, 13.0, 11.0, 9.0, 8.0, 8.0, 6.0, 8.0, 5.0, 3.0, 2.0, 2.0, 0.0, 3.0, 1.0, 2.0], "bins": [-11.4296875, -11.1068115234375, -10.783935546875, -10.4610595703125, -10.13818359375, -9.8153076171875, -9.492431640625, -9.1695556640625, -8.8466796875, -8.5238037109375, -8.200927734375, -7.8780517578125, -7.55517578125, -7.2322998046875, -6.909423828125, -6.5865478515625, -6.263671875, -5.9407958984375, -5.617919921875, -5.2950439453125, -4.97216796875, -4.6492919921875, -4.326416015625, -4.0035400390625, -3.6806640625, -3.3577880859375, -3.034912109375, -2.7120361328125, -2.38916015625, -2.0662841796875, -1.743408203125, -1.4205322265625, -1.09765625, -0.7747802734375, -0.451904296875, -0.1290283203125, 0.19384765625, 0.5167236328125, 0.839599609375, 1.1624755859375, 1.4853515625, 1.8082275390625, 2.131103515625, 2.4539794921875, 2.77685546875, 3.0997314453125, 3.422607421875, 3.7454833984375, 4.068359375, 4.3912353515625, 4.714111328125, 5.0369873046875, 5.35986328125, 5.6827392578125, 6.005615234375, 6.3284912109375, 6.6513671875, 6.9742431640625, 7.297119140625, 7.6199951171875, 7.94287109375, 8.2657470703125, 8.588623046875, 8.9114990234375, 9.234375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 11.0, 7.0, 17.0, 33.0, 41.0, 82.0, 86.0, 140.0, 188.0, 331.0, 496.0, 767.0, 1155.0, 1639.0, 2700.0, 4338.0, 6668.0, 10415.0, 16903.0, 26943.0, 43073.0, 69662.0, 114047.0, 183837.0, 1255546.0, 137160.0, 83340.0, 51211.0, 31884.0, 19930.0, 12355.0, 7954.0, 4983.0, 3332.0, 2075.0, 1347.0, 840.0, 549.0, 331.0, 243.0, 155.0, 119.0, 61.0, 40.0, 35.0, 16.0, 21.0, 17.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-1.78125, -1.727569580078125, -1.67388916015625, -1.620208740234375, -1.5665283203125, -1.512847900390625, -1.45916748046875, -1.405487060546875, -1.351806640625, -1.298126220703125, -1.24444580078125, -1.190765380859375, -1.1370849609375, -1.083404541015625, -1.02972412109375, -0.976043701171875, -0.92236328125, -0.868682861328125, -0.81500244140625, -0.761322021484375, -0.7076416015625, -0.653961181640625, -0.60028076171875, -0.546600341796875, -0.492919921875, -0.439239501953125, -0.38555908203125, -0.331878662109375, -0.2781982421875, -0.224517822265625, -0.17083740234375, -0.117156982421875, -0.0634765625, -0.009796142578125, 0.04388427734375, 0.097564697265625, 0.1512451171875, 0.204925537109375, 0.25860595703125, 0.312286376953125, 0.365966796875, 0.419647216796875, 0.47332763671875, 0.527008056640625, 0.5806884765625, 0.634368896484375, 0.68804931640625, 0.741729736328125, 0.79541015625, 0.849090576171875, 0.90277099609375, 0.956451416015625, 1.0101318359375, 1.063812255859375, 1.11749267578125, 1.171173095703125, 1.224853515625, 1.278533935546875, 1.33221435546875, 1.385894775390625, 1.4395751953125, 1.493255615234375, 1.54693603515625, 1.600616455078125, 1.654296875]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 6.0, 12.0, 10.0, 15.0, 18.0, 28.0, 40.0, 37.0, 61.0, 66.0, 74.0, 91.0, 87.0, 90.0, 85.0, 70.0, 55.0, 49.0, 28.0, 19.0, 14.0, 10.0, 11.0, 7.0, 6.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004901885986328125, -0.004735291004180908, -0.004568696022033691, -0.004402101039886475, -0.004235506057739258, -0.004068911075592041, -0.0039023160934448242, -0.0037357211112976074, -0.0035691261291503906, -0.003402531147003174, -0.003235936164855957, -0.0030693411827087402, -0.0029027462005615234, -0.0027361512184143066, -0.00256955623626709, -0.002402961254119873, -0.0022363662719726562, -0.0020697712898254395, -0.0019031763076782227, -0.0017365813255310059, -0.001569986343383789, -0.0014033913612365723, -0.0012367963790893555, -0.0010702013969421387, -0.0009036064147949219, -0.0007370114326477051, -0.0005704164505004883, -0.0004038214683532715, -0.0002372264862060547, -7.063150405883789e-05, 9.59634780883789e-05, 0.0002625584602355957, 0.0004291534423828125, 0.0005957484245300293, 0.0007623434066772461, 0.0009289383888244629, 0.0010955333709716797, 0.0012621283531188965, 0.0014287233352661133, 0.00159531831741333, 0.0017619132995605469, 0.0019285082817077637, 0.0020951032638549805, 0.0022616982460021973, 0.002428293228149414, 0.002594888210296631, 0.0027614831924438477, 0.0029280781745910645, 0.0030946731567382812, 0.003261268138885498, 0.003427863121032715, 0.0035944581031799316, 0.0037610530853271484, 0.003927648067474365, 0.004094243049621582, 0.004260838031768799, 0.004427433013916016, 0.004594027996063232, 0.004760622978210449, 0.004927217960357666, 0.005093812942504883, 0.0052604079246521, 0.005427002906799316, 0.005593597888946533, 0.00576019287109375]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 2.0, 7.0, 4.0, 10.0, 16.0, 12.0, 25.0, 31.0, 48.0, 62.0, 139.0, 202.0, 449.0, 1669.0, 1042054.0, 2712.0, 484.0, 213.0, 135.0, 94.0, 44.0, 53.0, 20.0, 19.0, 13.0, 17.0, 9.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1278076171875, -0.12411022186279297, -0.12041282653808594, -0.1167154312133789, -0.11301803588867188, -0.10932064056396484, -0.10562324523925781, -0.10192584991455078, -0.09822845458984375, -0.09453105926513672, -0.09083366394042969, -0.08713626861572266, -0.08343887329101562, -0.0797414779663086, -0.07604408264160156, -0.07234668731689453, -0.0686492919921875, -0.06495189666748047, -0.06125450134277344, -0.057557106018066406, -0.053859710693359375, -0.050162315368652344, -0.04646492004394531, -0.04276752471923828, -0.03907012939453125, -0.03537273406982422, -0.03167533874511719, -0.027977943420410156, -0.024280548095703125, -0.020583152770996094, -0.016885757446289062, -0.013188362121582031, -0.009490966796875, -0.005793571472167969, -0.0020961761474609375, 0.0016012191772460938, 0.005298614501953125, 0.008996009826660156, 0.012693405151367188, 0.01639080047607422, 0.02008819580078125, 0.02378559112548828, 0.027482986450195312, 0.031180381774902344, 0.034877777099609375, 0.038575172424316406, 0.04227256774902344, 0.04596996307373047, 0.0496673583984375, 0.05336475372314453, 0.05706214904785156, 0.060759544372558594, 0.06445693969726562, 0.06815433502197266, 0.07185173034667969, 0.07554912567138672, 0.07924652099609375, 0.08294391632080078, 0.08664131164550781, 0.09033870697021484, 0.09403610229492188, 0.0977334976196289, 0.10143089294433594, 0.10512828826904297, 0.10882568359375]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [108.0, 827.0, 80.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007027438259683549, -0.00020967214368283749, 0.00028339953860267997, 0.0007764711626805365, 0.001269542844966054, 0.0017626145854592323, 0.002255686093121767, 0.0027487578336149454, 0.0032418295741081238, 0.003734901314601302, 0.0042279730550944805, 0.004721044562757015, 0.005214116536080837, 0.005707188043743372, 0.006200259551405907, 0.006693331524729729, 0.007186403032392263, 0.007679474540054798, 0.00817254651337862, 0.008665617555379868, 0.00915868952870369, 0.009651761502027512, 0.010144833475351334, 0.010637904517352581, 0.011130976490676403, 0.011624048464000225, 0.012117119506001472, 0.012610191479325294, 0.013103263452649117, 0.013596335425972939, 0.014089406467974186, 0.014582478441298008, 0.015075549483299255, 0.015568621456623077, 0.0160616934299469, 0.01655476540327072, 0.017047835513949394, 0.017540907487273216, 0.01803397946059704, 0.01852705143392086, 0.019020123407244682, 0.019513195380568504, 0.020006267353892326, 0.020499337464571, 0.02099240943789482, 0.021485481411218643, 0.021978553384542465, 0.022471625357866287, 0.02296469733119011, 0.02345776930451393, 0.023950841277837753, 0.024443913251161575, 0.024936983361840248, 0.02543005533516407, 0.025923127308487892, 0.026416199281811714, 0.026909269392490387, 0.02740234136581421, 0.02789541333913803, 0.028388485312461853, 0.028881555423140526, 0.029374627396464348, 0.02986769936978817, 0.030360771343111992, 0.030853843316435814]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 8.0, 7.0, 10.0, 10.0, 10.0, 12.0, 7.0, 20.0, 17.0, 21.0, 26.0, 21.0, 13.0, 33.0, 27.0, 28.0, 35.0, 36.0, 38.0, 44.0, 45.0, 46.0, 45.0, 33.0, 33.0, 38.0, 31.0, 30.0, 36.0, 34.0, 33.0, 24.0, 40.0, 18.0, 14.0, 11.0, 9.0, 14.0, 9.0, 9.0, 12.0, 4.0, 3.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.0020775198936462402, -0.0020182253792881966, -0.001958930864930153, -0.0018996363505721092, -0.0018403418362140656, -0.0017810473218560219, -0.0017217528074979782, -0.0016624582931399345, -0.0016031637787818909, -0.0015438692644238472, -0.0014845747500658035, -0.0014252802357077599, -0.0013659857213497162, -0.0013066912069916725, -0.0012473966926336288, -0.0011881021782755852, -0.0011288076639175415, -0.0010695131495594978, -0.0010102186352014542, -0.0009509241208434105, -0.0008916296064853668, -0.0008323350921273232, -0.0007730405777692795, -0.0007137460634112358, -0.0006544515490531921, -0.0005951570346951485, -0.0005358625203371048, -0.0004765680059790611, -0.00041727349162101746, -0.0003579789772629738, -0.0002986844629049301, -0.00023938994854688644, -0.00018009543418884277, -0.0001208009198307991, -6.150640547275543e-05, -2.2118911147117615e-06, 5.708262324333191e-05, 0.00011637713760137558, 0.00017567165195941925, 0.00023496616631746292, 0.0002942606806755066, 0.00035355519503355026, 0.00041284970939159393, 0.0004721442237496376, 0.0005314387381076813, 0.0005907332524657249, 0.0006500277668237686, 0.0007093222811818123, 0.000768616795539856, 0.0008279113098978996, 0.0008872058242559433, 0.000946500338613987, 0.0010057948529720306, 0.0010650893673300743, 0.001124383881688118, 0.0011836783960461617, 0.0012429729104042053, 0.001302267424762249, 0.0013615619391202927, 0.0014208564534783363, 0.00148015096783638, 0.0015394454821944237, 0.0015987399965524673, 0.001658034510910511, 0.0017173290252685547]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 5.0, 2.0, 5.0, 4.0, 5.0, 10.0, 15.0, 3.0, 12.0, 10.0, 22.0, 17.0, 25.0, 25.0, 32.0, 32.0, 34.0, 47.0, 39.0, 32.0, 32.0, 51.0, 48.0, 41.0, 37.0, 46.0, 39.0, 52.0, 36.0, 30.0, 22.0, 29.0, 38.0, 20.0, 27.0, 14.0, 19.0, 14.0, 4.0, 10.0, 6.0, 4.0, 6.0, 2.0, 3.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.0390625, -14.5018310546875, -13.964599609375, -13.4273681640625, -12.89013671875, -12.3529052734375, -11.815673828125, -11.2784423828125, -10.7412109375, -10.2039794921875, -9.666748046875, -9.1295166015625, -8.59228515625, -8.0550537109375, -7.517822265625, -6.9805908203125, -6.443359375, -5.9061279296875, -5.368896484375, -4.8316650390625, -4.29443359375, -3.7572021484375, -3.219970703125, -2.6827392578125, -2.1455078125, -1.6082763671875, -1.071044921875, -0.5338134765625, 0.00341796875, 0.5406494140625, 1.077880859375, 1.6151123046875, 2.15234375, 2.6895751953125, 3.226806640625, 3.7640380859375, 4.30126953125, 4.8385009765625, 5.375732421875, 5.9129638671875, 6.4501953125, 6.9874267578125, 7.524658203125, 8.0618896484375, 8.59912109375, 9.1363525390625, 9.673583984375, 10.2108154296875, 10.748046875, 11.2852783203125, 11.822509765625, 12.3597412109375, 12.89697265625, 13.4342041015625, 13.971435546875, 14.5086669921875, 15.0458984375, 15.5831298828125, 16.120361328125, 16.6575927734375, 17.19482421875, 17.7320556640625, 18.269287109375, 18.8065185546875, 19.34375]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 3.0, 6.0, 3.0, 9.0, 6.0, 14.0, 14.0, 21.0, 22.0, 32.0, 41.0, 54.0, 67.0, 77.0, 125.0, 182.0, 282.0, 514.0, 1060.0, 2580.0, 7346.0, 23675.0, 88336.0, 371354.0, 412772.0, 99860.0, 26638.0, 7984.0, 2852.0, 1105.0, 525.0, 320.0, 178.0, 149.0, 88.0, 69.0, 44.0, 43.0, 27.0, 21.0, 17.0, 14.0, 8.0, 10.0, 3.0, 5.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.59375, -18.89501953125, -18.1962890625, -17.49755859375, -16.798828125, -16.10009765625, -15.4013671875, -14.70263671875, -14.00390625, -13.30517578125, -12.6064453125, -11.90771484375, -11.208984375, -10.51025390625, -9.8115234375, -9.11279296875, -8.4140625, -7.71533203125, -7.0166015625, -6.31787109375, -5.619140625, -4.92041015625, -4.2216796875, -3.52294921875, -2.82421875, -2.12548828125, -1.4267578125, -0.72802734375, -0.029296875, 0.66943359375, 1.3681640625, 2.06689453125, 2.765625, 3.46435546875, 4.1630859375, 4.86181640625, 5.560546875, 6.25927734375, 6.9580078125, 7.65673828125, 8.35546875, 9.05419921875, 9.7529296875, 10.45166015625, 11.150390625, 11.84912109375, 12.5478515625, 13.24658203125, 13.9453125, 14.64404296875, 15.3427734375, 16.04150390625, 16.740234375, 17.43896484375, 18.1376953125, 18.83642578125, 19.53515625, 20.23388671875, 20.9326171875, 21.63134765625, 22.330078125, 23.02880859375, 23.7275390625, 24.42626953125, 25.125]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0, 4.0, 5.0, 1.0, 5.0, 8.0, 13.0, 7.0, 24.0, 17.0, 14.0, 23.0, 27.0, 34.0, 35.0, 30.0, 31.0, 48.0, 50.0, 54.0, 90.0, 116.0, 209.0, 1470.0, 177.0, 88.0, 64.0, 54.0, 50.0, 38.0, 46.0, 29.0, 30.0, 28.0, 20.0, 16.0, 17.0, 23.0, 10.0, 6.0, 11.0, 16.0, 0.0, 5.0, 1.0, 4.0, 0.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-41.3125, -39.90234375, -38.4921875, -37.08203125, -35.671875, -34.26171875, -32.8515625, -31.44140625, -30.03125, -28.62109375, -27.2109375, -25.80078125, -24.390625, -22.98046875, -21.5703125, -20.16015625, -18.75, -17.33984375, -15.9296875, -14.51953125, -13.109375, -11.69921875, -10.2890625, -8.87890625, -7.46875, -6.05859375, -4.6484375, -3.23828125, -1.828125, -0.41796875, 0.9921875, 2.40234375, 3.8125, 5.22265625, 6.6328125, 8.04296875, 9.453125, 10.86328125, 12.2734375, 13.68359375, 15.09375, 16.50390625, 17.9140625, 19.32421875, 20.734375, 22.14453125, 23.5546875, 24.96484375, 26.375, 27.78515625, 29.1953125, 30.60546875, 32.015625, 33.42578125, 34.8359375, 36.24609375, 37.65625, 39.06640625, 40.4765625, 41.88671875, 43.296875, 44.70703125, 46.1171875, 47.52734375, 48.9375]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 8.0, 0.0, 1.0, 4.0, 4.0, 9.0, 8.0, 13.0, 15.0, 14.0, 34.0, 41.0, 43.0, 67.0, 87.0, 122.0, 179.0, 246.0, 436.0, 850.0, 4000.0, 96791.0, 2953506.0, 83441.0, 3658.0, 812.0, 418.0, 235.0, 188.0, 136.0, 96.0, 67.0, 45.0, 41.0, 31.0, 16.0, 18.0, 8.0, 7.0, 5.0, 4.0, 4.0, 5.0, 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-63.96875, -61.64111328125, -59.3134765625, -56.98583984375, -54.658203125, -52.33056640625, -50.0029296875, -47.67529296875, -45.34765625, -43.02001953125, -40.6923828125, -38.36474609375, -36.037109375, -33.70947265625, -31.3818359375, -29.05419921875, -26.7265625, -24.39892578125, -22.0712890625, -19.74365234375, -17.416015625, -15.08837890625, -12.7607421875, -10.43310546875, -8.10546875, -5.77783203125, -3.4501953125, -1.12255859375, 1.205078125, 3.53271484375, 5.8603515625, 8.18798828125, 10.515625, 12.84326171875, 15.1708984375, 17.49853515625, 19.826171875, 22.15380859375, 24.4814453125, 26.80908203125, 29.13671875, 31.46435546875, 33.7919921875, 36.11962890625, 38.447265625, 40.77490234375, 43.1025390625, 45.43017578125, 47.7578125, 50.08544921875, 52.4130859375, 54.74072265625, 57.068359375, 59.39599609375, 61.7236328125, 64.05126953125, 66.37890625, 68.70654296875, 71.0341796875, 73.36181640625, 75.689453125, 78.01708984375, 80.3447265625, 82.67236328125, 85.0]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 8.0, 29.0, 65.0, 130.0, 225.0, 231.0, 177.0, 72.0, 43.0, 19.0, 8.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.59333801269531, -88.8408432006836, -86.0883560180664, -83.33586120605469, -80.5833740234375, -77.83087921142578, -75.0783920288086, -72.32589721679688, -69.57341003417969, -66.82091522216797, -64.06842803955078, -61.31593704223633, -58.563446044921875, -55.81095504760742, -53.05846405029297, -50.30596923828125, -47.5534782409668, -44.800987243652344, -42.04849624633789, -39.29600524902344, -36.543514251708984, -33.79102325439453, -31.038530349731445, -28.286039352416992, -25.53354835510254, -22.781057357788086, -20.028566360473633, -17.276073455810547, -14.52358341217041, -11.771092414855957, -9.018600463867188, -6.266109466552734, -3.5136184692382812, -0.761127233505249, 1.9913640022277832, 4.7438554763793945, 7.496346473693848, 10.2488374710083, 13.00132942199707, 15.753820419311523, 18.506311416625977, 21.25880241394043, 24.011293411254883, 26.76378631591797, 29.516277313232422, 32.268768310546875, 35.02125930786133, 37.77375030517578, 40.526241302490234, 43.27873229980469, 46.03122329711914, 48.783714294433594, 51.53620529174805, 54.2886962890625, 57.04119110107422, 59.793678283691406, 62.546173095703125, 65.29866790771484, 68.05115509033203, 70.80364990234375, 73.55613708496094, 76.30863189697266, 79.06111907958984, 81.81361389160156, 84.56610107421875]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [1.0, 5.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 4.0, 7.0, 3.0, 11.0, 9.0, 7.0, 10.0, 8.0, 13.0, 16.0, 19.0, 17.0, 16.0, 19.0, 32.0, 28.0, 28.0, 26.0, 25.0, 34.0, 27.0, 35.0, 33.0, 46.0, 36.0, 40.0, 34.0, 32.0, 41.0, 28.0, 39.0, 26.0, 24.0, 29.0, 27.0, 25.0, 23.0, 18.0, 17.0, 16.0, 9.0, 14.0, 10.0, 5.0, 4.0, 7.0, 7.0, 6.0, 3.0, 1.0, 4.0, 1.0, 3.0, 0.0, 3.0, 3.0], "bins": [-100.16975402832031, -97.17277526855469, -94.1758041381836, -91.17882537841797, -88.18185424804688, -85.18487548828125, -82.18790435791016, -79.19092559814453, -76.19395446777344, -73.19697570800781, -70.20000457763672, -67.2030258178711, -64.2060546875, -61.20907974243164, -58.21210479736328, -55.215126037597656, -52.2181510925293, -49.22117614746094, -46.22420120239258, -43.22722625732422, -40.23025131225586, -37.2332763671875, -34.236297607421875, -31.23932456970215, -28.24234962463379, -25.24537467956543, -22.24839973449707, -19.251422882080078, -16.25444793701172, -13.257473945617676, -10.260498046875, -7.263523101806641, -4.266548156738281, -1.2695729732513428, 1.7274022102355957, 4.724377632141113, 7.721352577209473, 10.718327522277832, 13.715303421020508, 16.712278366088867, 19.709253311157227, 22.706228256225586, 25.703203201293945, 28.700180053710938, 31.697154998779297, 34.694129943847656, 37.691104888916016, 40.688079833984375, 43.685054779052734, 46.682029724121094, 49.67900466918945, 52.67597961425781, 55.67295455932617, 58.66992950439453, 61.666908264160156, 64.66387939453125, 67.66085815429688, 70.6578369140625, 73.6548080444336, 76.65178680419922, 79.64875793457031, 82.64573669433594, 85.64270782470703, 88.63968658447266, 91.63665771484375]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 4.0, 2.0, 2.0, 4.0, 6.0, 2.0, 9.0, 11.0, 16.0, 9.0, 12.0, 16.0, 22.0, 25.0, 24.0, 17.0, 27.0, 30.0, 35.0, 35.0, 35.0, 39.0, 46.0, 33.0, 43.0, 35.0, 59.0, 39.0, 25.0, 47.0, 35.0, 34.0, 27.0, 29.0, 28.0, 28.0, 25.0, 14.0, 20.0, 10.0, 10.0, 7.0, 9.0, 6.0, 6.0, 5.0, 2.0, 1.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-16.046875, -15.526611328125, -15.00634765625, -14.486083984375, -13.9658203125, -13.445556640625, -12.92529296875, -12.405029296875, -11.884765625, -11.364501953125, -10.84423828125, -10.323974609375, -9.8037109375, -9.283447265625, -8.76318359375, -8.242919921875, -7.72265625, -7.202392578125, -6.68212890625, -6.161865234375, -5.6416015625, -5.121337890625, -4.60107421875, -4.080810546875, -3.560546875, -3.040283203125, -2.52001953125, -1.999755859375, -1.4794921875, -0.959228515625, -0.43896484375, 0.081298828125, 0.6015625, 1.121826171875, 1.64208984375, 2.162353515625, 2.6826171875, 3.202880859375, 3.72314453125, 4.243408203125, 4.763671875, 5.283935546875, 5.80419921875, 6.324462890625, 6.8447265625, 7.364990234375, 7.88525390625, 8.405517578125, 8.92578125, 9.446044921875, 9.96630859375, 10.486572265625, 11.0068359375, 11.527099609375, 12.04736328125, 12.567626953125, 13.087890625, 13.608154296875, 14.12841796875, 14.648681640625, 15.1689453125, 15.689208984375, 16.20947265625, 16.729736328125, 17.25]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 3.0, 4.0, 7.0, 15.0, 9.0, 7.0, 14.0, 8.0, 13.0, 9.0, 20.0, 23.0, 28.0, 30.0, 41.0, 55.0, 104.0, 159.0, 303.0, 782.0, 3461.0, 38287.0, 1175189.0, 2793801.0, 171675.0, 7754.0, 1331.0, 483.0, 201.0, 124.0, 83.0, 56.0, 34.0, 28.0, 24.0, 14.0, 12.0, 15.0, 14.0, 5.0, 11.0, 11.0, 7.0, 8.0, 8.0, 1.0, 5.0, 5.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0], "bins": [-44.21875, -42.80224609375, -41.3857421875, -39.96923828125, -38.552734375, -37.13623046875, -35.7197265625, -34.30322265625, -32.88671875, -31.47021484375, -30.0537109375, -28.63720703125, -27.220703125, -25.80419921875, -24.3876953125, -22.97119140625, -21.5546875, -20.13818359375, -18.7216796875, -17.30517578125, -15.888671875, -14.47216796875, -13.0556640625, -11.63916015625, -10.22265625, -8.80615234375, -7.3896484375, -5.97314453125, -4.556640625, -3.14013671875, -1.7236328125, -0.30712890625, 1.109375, 2.52587890625, 3.9423828125, 5.35888671875, 6.775390625, 8.19189453125, 9.6083984375, 11.02490234375, 12.44140625, 13.85791015625, 15.2744140625, 16.69091796875, 18.107421875, 19.52392578125, 20.9404296875, 22.35693359375, 23.7734375, 25.18994140625, 26.6064453125, 28.02294921875, 29.439453125, 30.85595703125, 32.2724609375, 33.68896484375, 35.10546875, 36.52197265625, 37.9384765625, 39.35498046875, 40.771484375, 42.18798828125, 43.6044921875, 45.02099609375, 46.4375]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 3.0, 5.0, 12.0, 14.0, 22.0, 34.0, 40.0, 67.0, 101.0, 153.0, 251.0, 394.0, 525.0, 624.0, 564.0, 405.0, 279.0, 181.0, 130.0, 71.0, 54.0, 38.0, 26.0, 15.0, 20.0, 12.0, 5.0, 2.0, 7.0, 9.0, 1.0, 3.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.546875, -25.706298828125, -24.86572265625, -24.025146484375, -23.1845703125, -22.343994140625, -21.50341796875, -20.662841796875, -19.822265625, -18.981689453125, -18.14111328125, -17.300537109375, -16.4599609375, -15.619384765625, -14.77880859375, -13.938232421875, -13.09765625, -12.257080078125, -11.41650390625, -10.575927734375, -9.7353515625, -8.894775390625, -8.05419921875, -7.213623046875, -6.373046875, -5.532470703125, -4.69189453125, -3.851318359375, -3.0107421875, -2.170166015625, -1.32958984375, -0.489013671875, 0.3515625, 1.192138671875, 2.03271484375, 2.873291015625, 3.7138671875, 4.554443359375, 5.39501953125, 6.235595703125, 7.076171875, 7.916748046875, 8.75732421875, 9.597900390625, 10.4384765625, 11.279052734375, 12.11962890625, 12.960205078125, 13.80078125, 14.641357421875, 15.48193359375, 16.322509765625, 17.1630859375, 18.003662109375, 18.84423828125, 19.684814453125, 20.525390625, 21.365966796875, 22.20654296875, 23.047119140625, 23.8876953125, 24.728271484375, 25.56884765625, 26.409423828125, 27.25]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 3.0, 4.0, 1.0, 2.0, 4.0, 7.0, 12.0, 12.0, 17.0, 6.0, 34.0, 21.0, 31.0, 59.0, 66.0, 136.0, 210.0, 459.0, 948.0, 3058.0, 19211.0, 378256.0, 3614195.0, 161897.0, 11734.0, 2257.0, 720.0, 403.0, 186.0, 91.0, 66.0, 46.0, 30.0, 22.0, 16.0, 18.0, 7.0, 9.0, 6.0, 8.0, 6.0, 8.0, 3.0, 4.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-44.4375, -42.6865234375, -40.935546875, -39.1845703125, -37.43359375, -35.6826171875, -33.931640625, -32.1806640625, -30.4296875, -28.6787109375, -26.927734375, -25.1767578125, -23.42578125, -21.6748046875, -19.923828125, -18.1728515625, -16.421875, -14.6708984375, -12.919921875, -11.1689453125, -9.41796875, -7.6669921875, -5.916015625, -4.1650390625, -2.4140625, -0.6630859375, 1.087890625, 2.8388671875, 4.58984375, 6.3408203125, 8.091796875, 9.8427734375, 11.59375, 13.3447265625, 15.095703125, 16.8466796875, 18.59765625, 20.3486328125, 22.099609375, 23.8505859375, 25.6015625, 27.3525390625, 29.103515625, 30.8544921875, 32.60546875, 34.3564453125, 36.107421875, 37.8583984375, 39.609375, 41.3603515625, 43.111328125, 44.8623046875, 46.61328125, 48.3642578125, 50.115234375, 51.8662109375, 53.6171875, 55.3681640625, 57.119140625, 58.8701171875, 60.62109375, 62.3720703125, 64.123046875, 65.8740234375, 67.625]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 59.0, 761.0, 193.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1019.1182250976562, -998.5784301757812, -978.0386352539062, -957.4988403320312, -936.9590454101562, -916.4192504882812, -895.8794555664062, -875.339599609375, -854.7998046875, -834.260009765625, -813.72021484375, -793.180419921875, -772.640625, -752.100830078125, -731.56103515625, -711.021240234375, -690.4814453125, -669.941650390625, -649.40185546875, -628.862060546875, -608.322265625, -587.782470703125, -567.24267578125, -546.702880859375, -526.1630859375, -505.623291015625, -485.08349609375, -464.543701171875, -444.00390625, -423.464111328125, -402.9242858886719, -382.3844909667969, -361.8446350097656, -341.3048400878906, -320.7650451660156, -300.2252502441406, -279.6854248046875, -259.1456298828125, -238.6058349609375, -218.0660400390625, -197.5262451171875, -176.9864501953125, -156.4466552734375, -135.90684509277344, -115.36705017089844, -94.82725524902344, -74.2874526977539, -53.747650146484375, -33.207855224609375, -12.66805648803711, 7.871742248535156, 28.411540985107422, 48.95133972167969, 69.49113464355469, 90.03093719482422, 110.57073974609375, 131.11053466796875, 151.65032958984375, 172.19012451171875, 192.7299346923828, 213.2697296142578, 233.8095245361328, 254.34933471679688, 274.8891296386719, 295.4289245605469]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 8.0, 6.0, 5.0, 4.0, 11.0, 7.0, 13.0, 14.0, 24.0, 23.0, 33.0, 30.0, 35.0, 39.0, 42.0, 40.0, 52.0, 32.0, 50.0, 32.0, 49.0, 55.0, 45.0, 41.0, 35.0, 27.0, 36.0, 27.0, 35.0, 22.0, 37.0, 19.0, 15.0, 19.0, 9.0, 3.0, 4.0, 7.0, 3.0, 6.0, 2.0, 6.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-79.953857421875, -77.24220275878906, -74.53054809570312, -71.81889343261719, -69.10723876953125, -66.39558410644531, -63.68393325805664, -60.9722785949707, -58.260623931884766, -55.54896926879883, -52.83731460571289, -50.12566375732422, -47.41400909423828, -44.702354431152344, -41.990699768066406, -39.27904510498047, -36.56739044189453, -33.855735778808594, -31.144081115722656, -28.43242835998535, -25.720773696899414, -23.009119033813477, -20.297466278076172, -17.585811614990234, -14.874156951904297, -12.16250228881836, -9.450848579406738, -6.739194869995117, -4.02754020690918, -1.3158855438232422, 1.3957672119140625, 4.107421875, 6.8190765380859375, 9.530731201171875, 12.242384910583496, 14.954038619995117, 17.665693283081055, 20.377347946166992, 23.089000701904297, 25.800655364990234, 28.512310028076172, 31.22396469116211, 33.93561935424805, 36.64727020263672, 39.358924865722656, 42.070579528808594, 44.78223419189453, 47.49388885498047, 50.205543518066406, 52.917198181152344, 55.62885284423828, 58.34050750732422, 61.052162170410156, 63.763816833496094, 66.4754638671875, 69.18711853027344, 71.89877319335938, 74.61042785644531, 77.32208251953125, 80.03373718261719, 82.74539184570312, 85.45704650878906, 88.168701171875, 90.88035583496094, 93.59201049804688]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 4.0, 3.0, 1.0, 6.0, 3.0, 7.0, 10.0, 10.0, 14.0, 16.0, 18.0, 19.0, 20.0, 28.0, 27.0, 23.0, 32.0, 30.0, 34.0, 44.0, 50.0, 44.0, 40.0, 47.0, 54.0, 47.0, 31.0, 31.0, 38.0, 41.0, 36.0, 25.0, 39.0, 27.0, 25.0, 20.0, 16.0, 9.0, 10.0, 7.0, 5.0, 3.0, 4.0, 5.0, 6.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.6015625, -15.0321044921875, -14.462646484375, -13.8931884765625, -13.32373046875, -12.7542724609375, -12.184814453125, -11.6153564453125, -11.0458984375, -10.4764404296875, -9.906982421875, -9.3375244140625, -8.76806640625, -8.1986083984375, -7.629150390625, -7.0596923828125, -6.490234375, -5.9207763671875, -5.351318359375, -4.7818603515625, -4.21240234375, -3.6429443359375, -3.073486328125, -2.5040283203125, -1.9345703125, -1.3651123046875, -0.795654296875, -0.2261962890625, 0.34326171875, 0.9127197265625, 1.482177734375, 2.0516357421875, 2.62109375, 3.1905517578125, 3.760009765625, 4.3294677734375, 4.89892578125, 5.4683837890625, 6.037841796875, 6.6072998046875, 7.1767578125, 7.7462158203125, 8.315673828125, 8.8851318359375, 9.45458984375, 10.0240478515625, 10.593505859375, 11.1629638671875, 11.732421875, 12.3018798828125, 12.871337890625, 13.4407958984375, 14.01025390625, 14.5797119140625, 15.149169921875, 15.7186279296875, 16.2880859375, 16.8575439453125, 17.427001953125, 17.9964599609375, 18.56591796875, 19.1353759765625, 19.704833984375, 20.2742919921875, 20.84375]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 6.0, 13.0, 18.0, 29.0, 41.0, 52.0, 97.0, 136.0, 195.0, 287.0, 477.0, 757.0, 1226.0, 1944.0, 3019.0, 4914.0, 7651.0, 12494.0, 19813.0, 32059.0, 50939.0, 82554.0, 135408.0, 207521.0, 185569.0, 114507.0, 70118.0, 43938.0, 27316.0, 17107.0, 10636.0, 6531.0, 4010.0, 2643.0, 1630.0, 1017.0, 672.0, 443.0, 264.0, 155.0, 106.0, 98.0, 43.0, 40.0, 23.0, 18.0, 5.0, 6.0, 3.0, 6.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.79296875, -1.7348175048828125, -1.676666259765625, -1.6185150146484375, -1.56036376953125, -1.5022125244140625, -1.444061279296875, -1.3859100341796875, -1.3277587890625, -1.2696075439453125, -1.211456298828125, -1.1533050537109375, -1.09515380859375, -1.0370025634765625, -0.978851318359375, -0.9207000732421875, -0.862548828125, -0.8043975830078125, -0.746246337890625, -0.6880950927734375, -0.62994384765625, -0.5717926025390625, -0.513641357421875, -0.4554901123046875, -0.3973388671875, -0.3391876220703125, -0.281036376953125, -0.2228851318359375, -0.16473388671875, -0.1065826416015625, -0.048431396484375, 0.0097198486328125, 0.06787109375, 0.1260223388671875, 0.184173583984375, 0.2423248291015625, 0.30047607421875, 0.3586273193359375, 0.416778564453125, 0.4749298095703125, 0.5330810546875, 0.5912322998046875, 0.649383544921875, 0.7075347900390625, 0.76568603515625, 0.8238372802734375, 0.881988525390625, 0.9401397705078125, 0.998291015625, 1.0564422607421875, 1.114593505859375, 1.1727447509765625, 1.23089599609375, 1.2890472412109375, 1.347198486328125, 1.4053497314453125, 1.4635009765625, 1.5216522216796875, 1.579803466796875, 1.6379547119140625, 1.69610595703125, 1.7542572021484375, 1.812408447265625, 1.8705596923828125, 1.9287109375]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 1.0, 2.0, 3.0, 4.0, 9.0, 7.0, 9.0, 14.0, 4.0, 19.0, 17.0, 17.0, 20.0, 25.0, 30.0, 24.0, 32.0, 31.0, 28.0, 35.0, 37.0, 44.0, 38.0, 1069.0, 48.0, 41.0, 50.0, 44.0, 42.0, 32.0, 37.0, 22.0, 27.0, 19.0, 14.0, 20.0, 15.0, 24.0, 12.0, 17.0, 10.0, 12.0, 8.0, 4.0, 3.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-11.5390625, -11.1849365234375, -10.830810546875, -10.4766845703125, -10.12255859375, -9.7684326171875, -9.414306640625, -9.0601806640625, -8.7060546875, -8.3519287109375, -7.997802734375, -7.6436767578125, -7.28955078125, -6.9354248046875, -6.581298828125, -6.2271728515625, -5.873046875, -5.5189208984375, -5.164794921875, -4.8106689453125, -4.45654296875, -4.1024169921875, -3.748291015625, -3.3941650390625, -3.0400390625, -2.6859130859375, -2.331787109375, -1.9776611328125, -1.62353515625, -1.2694091796875, -0.915283203125, -0.5611572265625, -0.20703125, 0.1470947265625, 0.501220703125, 0.8553466796875, 1.20947265625, 1.5635986328125, 1.917724609375, 2.2718505859375, 2.6259765625, 2.9801025390625, 3.334228515625, 3.6883544921875, 4.04248046875, 4.3966064453125, 4.750732421875, 5.1048583984375, 5.458984375, 5.8131103515625, 6.167236328125, 6.5213623046875, 6.87548828125, 7.2296142578125, 7.583740234375, 7.9378662109375, 8.2919921875, 8.6461181640625, 9.000244140625, 9.3543701171875, 9.70849609375, 10.0626220703125, 10.416748046875, 10.7708740234375, 11.125]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 4.0, 3.0, 7.0, 6.0, 8.0, 15.0, 22.0, 36.0, 54.0, 84.0, 114.0, 150.0, 245.0, 353.0, 518.0, 770.0, 1116.0, 1608.0, 2449.0, 3673.0, 5714.0, 8738.0, 13518.0, 20695.0, 32456.0, 50593.0, 81968.0, 133400.0, 1251260.0, 181547.0, 113372.0, 69447.0, 43763.0, 27603.0, 17788.0, 11434.0, 7704.0, 4922.0, 3400.0, 2141.0, 1428.0, 946.0, 677.0, 433.0, 300.0, 191.0, 143.0, 106.0, 59.0, 49.0, 36.0, 35.0, 10.0, 13.0, 4.0, 3.0, 8.0, 1.0, 2.0, 2.0, 1.0], "bins": [-1.7646484375, -1.7092132568359375, -1.653778076171875, -1.5983428955078125, -1.54290771484375, -1.4874725341796875, -1.432037353515625, -1.3766021728515625, -1.3211669921875, -1.2657318115234375, -1.210296630859375, -1.1548614501953125, -1.09942626953125, -1.0439910888671875, -0.988555908203125, -0.9331207275390625, -0.877685546875, -0.8222503662109375, -0.766815185546875, -0.7113800048828125, -0.65594482421875, -0.6005096435546875, -0.545074462890625, -0.4896392822265625, -0.4342041015625, -0.3787689208984375, -0.323333740234375, -0.2678985595703125, -0.21246337890625, -0.1570281982421875, -0.101593017578125, -0.0461578369140625, 0.00927734375, 0.0647125244140625, 0.120147705078125, 0.1755828857421875, 0.23101806640625, 0.2864532470703125, 0.341888427734375, 0.3973236083984375, 0.4527587890625, 0.5081939697265625, 0.563629150390625, 0.6190643310546875, 0.67449951171875, 0.7299346923828125, 0.785369873046875, 0.8408050537109375, 0.896240234375, 0.9516754150390625, 1.007110595703125, 1.0625457763671875, 1.11798095703125, 1.1734161376953125, 1.228851318359375, 1.2842864990234375, 1.3397216796875, 1.3951568603515625, 1.450592041015625, 1.5060272216796875, 1.56146240234375, 1.6168975830078125, 1.672332763671875, 1.7277679443359375, 1.783203125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 5.0, 5.0, 9.0, 4.0, 4.0, 7.0, 12.0, 23.0, 12.0, 9.0, 19.0, 20.0, 30.0, 35.0, 36.0, 51.0, 40.0, 51.0, 49.0, 55.0, 39.0, 49.0, 63.0, 42.0, 60.0, 37.0, 33.0, 37.0, 39.0, 21.0, 17.0, 20.0, 12.0, 10.0, 9.0, 9.0, 4.0, 8.0, 4.0, 5.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0033779144287109375, -0.0032731294631958008, -0.003168344497680664, -0.0030635595321655273, -0.0029587745666503906, -0.002853989601135254, -0.002749204635620117, -0.0026444196701049805, -0.0025396347045898438, -0.002434849739074707, -0.0023300647735595703, -0.0022252798080444336, -0.002120494842529297, -0.00201570987701416, -0.0019109249114990234, -0.0018061399459838867, -0.00170135498046875, -0.0015965700149536133, -0.0014917850494384766, -0.0013870000839233398, -0.0012822151184082031, -0.0011774301528930664, -0.0010726451873779297, -0.000967860221862793, -0.0008630752563476562, -0.0007582902908325195, -0.0006535053253173828, -0.0005487203598022461, -0.0004439353942871094, -0.00033915042877197266, -0.00023436546325683594, -0.00012958049774169922, -2.47955322265625e-05, 7.998943328857422e-05, 0.00018477439880371094, 0.00028955936431884766, 0.0003943443298339844, 0.0004991292953491211, 0.0006039142608642578, 0.0007086992263793945, 0.0008134841918945312, 0.000918269157409668, 0.0010230541229248047, 0.0011278390884399414, 0.0012326240539550781, 0.0013374090194702148, 0.0014421939849853516, 0.0015469789505004883, 0.001651763916015625, 0.0017565488815307617, 0.0018613338470458984, 0.001966118812561035, 0.002070903778076172, 0.0021756887435913086, 0.0022804737091064453, 0.002385258674621582, 0.0024900436401367188, 0.0025948286056518555, 0.002699613571166992, 0.002804398536682129, 0.0029091835021972656, 0.0030139684677124023, 0.003118753433227539, 0.0032235383987426758, 0.0033283233642578125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 3.0, 8.0, 4.0, 6.0, 9.0, 8.0, 10.0, 17.0, 24.0, 27.0, 26.0, 37.0, 30.0, 68.0, 74.0, 104.0, 154.0, 219.0, 347.0, 710.0, 4900.0, 1029167.0, 10646.0, 743.0, 363.0, 229.0, 155.0, 103.0, 62.0, 53.0, 50.0, 44.0, 29.0, 17.0, 22.0, 20.0, 17.0, 8.0, 7.0, 5.0, 4.0, 4.0, 6.0, 2.0, 9.0, 2.0, 3.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0], "bins": [-0.072021484375, -0.0698099136352539, -0.06759834289550781, -0.06538677215576172, -0.06317520141601562, -0.06096363067626953, -0.05875205993652344, -0.056540489196777344, -0.05432891845703125, -0.052117347717285156, -0.04990577697753906, -0.04769420623779297, -0.045482635498046875, -0.04327106475830078, -0.04105949401855469, -0.038847923278808594, -0.0366363525390625, -0.034424781799316406, -0.03221321105957031, -0.03000164031982422, -0.027790069580078125, -0.02557849884033203, -0.023366928100585938, -0.021155357360839844, -0.01894378662109375, -0.016732215881347656, -0.014520645141601562, -0.012309074401855469, -0.010097503662109375, -0.007885932922363281, -0.0056743621826171875, -0.0034627914428710938, -0.001251220703125, 0.0009603500366210938, 0.0031719207763671875, 0.005383491516113281, 0.007595062255859375, 0.009806632995605469, 0.012018203735351562, 0.014229774475097656, 0.01644134521484375, 0.018652915954589844, 0.020864486694335938, 0.02307605743408203, 0.025287628173828125, 0.02749919891357422, 0.029710769653320312, 0.031922340393066406, 0.0341339111328125, 0.036345481872558594, 0.03855705261230469, 0.04076862335205078, 0.042980194091796875, 0.04519176483154297, 0.04740333557128906, 0.049614906311035156, 0.05182647705078125, 0.054038047790527344, 0.05624961853027344, 0.05846118927001953, 0.060672760009765625, 0.06288433074951172, 0.06509590148925781, 0.0673074722290039, 0.06951904296875]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 6.0, 1.0, 4.0, 12.0, 33.0, 38.0, 71.0, 141.0, 181.0, 207.0, 135.0, 83.0, 46.0, 23.0, 13.0, 10.0, 4.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.002235602820292115, -0.0021529877558350563, -0.0020703724585473537, -0.001987757394090295, -0.001905142213217914, -0.0018225270323455334, -0.0017399118514731526, -0.001657296670600772, -0.001574681606143713, -0.0014920664252713323, -0.0014094512443989515, -0.0013268361799418926, -0.0012442209990695119, -0.0011616058181971312, -0.0010789906373247504, -0.0009963754564523697, -0.000913760275579989, -0.0008311450947076082, -0.0007485299720428884, -0.0006659147911705077, -0.0005832996685057878, -0.0005006844876334071, -0.0004180693067610264, -0.00033545418409630656, -0.00025283900322392583, -0.00017022385145537555, -8.760868513491005e-05, -4.993518814444542e-06, 7.762163295410573e-05, 0.000160236784722656, 0.00024285196559503675, 0.00032546708825975657, 0.0004080822691321373, 0.000490697450004518, 0.0005733125726692379, 0.0006559277535416186, 0.0007385428762063384, 0.0008211580570787191, 0.0009037732379510999, 0.0009863884188234806, 0.0010690034832805395, 0.0011516186641529202, 0.001234233845025301, 0.0013168489094823599, 0.0013994640903547406, 0.0014820792712271214, 0.001564694452099502, 0.0016473096329718828, 0.0017299248138442636, 0.0018125399947166443, 0.001895155175589025, 0.0019777703564614058, 0.0020603854209184647, 0.0021430007182061672, 0.002225615782663226, 0.002308230847120285, 0.0023908461444079876, 0.0024734612088650465, 0.002556076506152749, 0.002638691570609808, 0.0027213068678975105, 0.0028039219323545694, 0.0028865369968116283, 0.002969152294099331, 0.00305176735855639]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 3.0, 8.0, 11.0, 5.0, 13.0, 14.0, 21.0, 13.0, 23.0, 31.0, 23.0, 36.0, 26.0, 36.0, 39.0, 40.0, 42.0, 40.0, 39.0, 36.0, 56.0, 31.0, 49.0, 44.0, 45.0, 34.0, 30.0, 33.0, 31.0, 21.0, 23.0, 17.0, 14.0, 14.0, 12.0, 11.0, 7.0, 6.0, 4.0, 7.0, 5.0, 5.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0018064379692077637, -0.0017452612519264221, -0.0016840845346450806, -0.001622907817363739, -0.0015617311000823975, -0.001500554382801056, -0.0014393776655197144, -0.0013782009482383728, -0.0013170242309570312, -0.0012558475136756897, -0.0011946707963943481, -0.0011334940791130066, -0.001072317361831665, -0.0010111406445503235, -0.0009499639272689819, -0.0008887872099876404, -0.0008276104927062988, -0.0007664337754249573, -0.0007052570581436157, -0.0006440803408622742, -0.0005829036235809326, -0.0005217269062995911, -0.0004605501890182495, -0.00039937347173690796, -0.0003381967544555664, -0.00027702003717422485, -0.0002158433198928833, -0.00015466660261154175, -9.34898853302002e-05, -3.231316804885864e-05, 2.886354923248291e-05, 9.004026651382446e-05, 0.00015121698379516602, 0.00021239370107650757, 0.0002735704183578491, 0.0003347471356391907, 0.0003959238529205322, 0.0004571005702018738, 0.0005182772874832153, 0.0005794540047645569, 0.0006406307220458984, 0.00070180743932724, 0.0007629841566085815, 0.0008241608738899231, 0.0008853375911712646, 0.0009465143084526062, 0.0010076910257339478, 0.0010688677430152893, 0.0011300444602966309, 0.0011912211775779724, 0.001252397894859314, 0.0013135746121406555, 0.001374751329421997, 0.0014359280467033386, 0.0014971047639846802, 0.0015582814812660217, 0.0016194581985473633, 0.0016806349158287048, 0.0017418116331100464, 0.001802988350391388, 0.0018641650676727295, 0.001925341784954071, 0.0019865185022354126, 0.002047695219516754, 0.0021088719367980957]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 4.0, 3.0, 1.0, 6.0, 3.0, 7.0, 10.0, 10.0, 14.0, 16.0, 18.0, 19.0, 20.0, 28.0, 27.0, 23.0, 32.0, 30.0, 34.0, 44.0, 50.0, 44.0, 40.0, 47.0, 54.0, 47.0, 31.0, 31.0, 38.0, 41.0, 36.0, 25.0, 39.0, 27.0, 25.0, 20.0, 16.0, 9.0, 10.0, 7.0, 5.0, 3.0, 4.0, 5.0, 6.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.6015625, -15.0321044921875, -14.462646484375, -13.8931884765625, -13.32373046875, -12.7542724609375, -12.184814453125, -11.6153564453125, -11.0458984375, -10.4764404296875, -9.906982421875, -9.3375244140625, -8.76806640625, -8.1986083984375, -7.629150390625, -7.0596923828125, -6.490234375, -5.9207763671875, -5.351318359375, -4.7818603515625, -4.21240234375, -3.6429443359375, -3.073486328125, -2.5040283203125, -1.9345703125, -1.3651123046875, -0.795654296875, -0.2261962890625, 0.34326171875, 0.9127197265625, 1.482177734375, 2.0516357421875, 2.62109375, 3.1905517578125, 3.760009765625, 4.3294677734375, 4.89892578125, 5.4683837890625, 6.037841796875, 6.6072998046875, 7.1767578125, 7.7462158203125, 8.315673828125, 8.8851318359375, 9.45458984375, 10.0240478515625, 10.593505859375, 11.1629638671875, 11.732421875, 12.3018798828125, 12.871337890625, 13.4407958984375, 14.01025390625, 14.5797119140625, 15.149169921875, 15.7186279296875, 16.2880859375, 16.8575439453125, 17.427001953125, 17.9964599609375, 18.56591796875, 19.1353759765625, 19.704833984375, 20.2742919921875, 20.84375]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 4.0, 10.0, 15.0, 13.0, 19.0, 21.0, 45.0, 49.0, 67.0, 119.0, 148.0, 224.0, 291.0, 441.0, 746.0, 1417.0, 2995.0, 8406.0, 29566.0, 114989.0, 415936.0, 346452.0, 90317.0, 23543.0, 6829.0, 2567.0, 1244.0, 659.0, 446.0, 297.0, 182.0, 142.0, 97.0, 70.0, 52.0, 36.0, 34.0, 25.0, 16.0, 9.0, 10.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-24.265625, -23.570556640625, -22.87548828125, -22.180419921875, -21.4853515625, -20.790283203125, -20.09521484375, -19.400146484375, -18.705078125, -18.010009765625, -17.31494140625, -16.619873046875, -15.9248046875, -15.229736328125, -14.53466796875, -13.839599609375, -13.14453125, -12.449462890625, -11.75439453125, -11.059326171875, -10.3642578125, -9.669189453125, -8.97412109375, -8.279052734375, -7.583984375, -6.888916015625, -6.19384765625, -5.498779296875, -4.8037109375, -4.108642578125, -3.41357421875, -2.718505859375, -2.0234375, -1.328369140625, -0.63330078125, 0.061767578125, 0.7568359375, 1.451904296875, 2.14697265625, 2.842041015625, 3.537109375, 4.232177734375, 4.92724609375, 5.622314453125, 6.3173828125, 7.012451171875, 7.70751953125, 8.402587890625, 9.09765625, 9.792724609375, 10.48779296875, 11.182861328125, 11.8779296875, 12.572998046875, 13.26806640625, 13.963134765625, 14.658203125, 15.353271484375, 16.04833984375, 16.743408203125, 17.4384765625, 18.133544921875, 18.82861328125, 19.523681640625, 20.21875]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 5.0, 6.0, 11.0, 6.0, 6.0, 14.0, 14.0, 14.0, 25.0, 19.0, 25.0, 41.0, 38.0, 45.0, 49.0, 68.0, 81.0, 156.0, 1514.0, 349.0, 147.0, 77.0, 65.0, 57.0, 47.0, 34.0, 20.0, 26.0, 23.0, 17.0, 11.0, 19.0, 9.0, 4.0, 5.0, 2.0, 3.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-67.4375, -65.560546875, -63.68359375, -61.806640625, -59.9296875, -58.052734375, -56.17578125, -54.298828125, -52.421875, -50.544921875, -48.66796875, -46.791015625, -44.9140625, -43.037109375, -41.16015625, -39.283203125, -37.40625, -35.529296875, -33.65234375, -31.775390625, -29.8984375, -28.021484375, -26.14453125, -24.267578125, -22.390625, -20.513671875, -18.63671875, -16.759765625, -14.8828125, -13.005859375, -11.12890625, -9.251953125, -7.375, -5.498046875, -3.62109375, -1.744140625, 0.1328125, 2.009765625, 3.88671875, 5.763671875, 7.640625, 9.517578125, 11.39453125, 13.271484375, 15.1484375, 17.025390625, 18.90234375, 20.779296875, 22.65625, 24.533203125, 26.41015625, 28.287109375, 30.1640625, 32.041015625, 33.91796875, 35.794921875, 37.671875, 39.548828125, 41.42578125, 43.302734375, 45.1796875, 47.056640625, 48.93359375, 50.810546875, 52.6875]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 8.0, 6.0, 7.0, 15.0, 13.0, 27.0, 22.0, 42.0, 48.0, 80.0, 87.0, 157.0, 184.0, 310.0, 589.0, 1714.0, 67706.0, 3043156.0, 28638.0, 1336.0, 511.0, 312.0, 203.0, 138.0, 111.0, 64.0, 61.0, 47.0, 28.0, 23.0, 20.0, 12.0, 15.0, 11.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-84.8125, -82.0244140625, -79.236328125, -76.4482421875, -73.66015625, -70.8720703125, -68.083984375, -65.2958984375, -62.5078125, -59.7197265625, -56.931640625, -54.1435546875, -51.35546875, -48.5673828125, -45.779296875, -42.9912109375, -40.203125, -37.4150390625, -34.626953125, -31.8388671875, -29.05078125, -26.2626953125, -23.474609375, -20.6865234375, -17.8984375, -15.1103515625, -12.322265625, -9.5341796875, -6.74609375, -3.9580078125, -1.169921875, 1.6181640625, 4.40625, 7.1943359375, 9.982421875, 12.7705078125, 15.55859375, 18.3466796875, 21.134765625, 23.9228515625, 26.7109375, 29.4990234375, 32.287109375, 35.0751953125, 37.86328125, 40.6513671875, 43.439453125, 46.2275390625, 49.015625, 51.8037109375, 54.591796875, 57.3798828125, 60.16796875, 62.9560546875, 65.744140625, 68.5322265625, 71.3203125, 74.1083984375, 76.896484375, 79.6845703125, 82.47265625, 85.2607421875, 88.048828125, 90.8369140625, 93.625]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 463.0, 554.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-455.4273986816406, -425.091064453125, -394.7547302246094, -364.41839599609375, -334.08209228515625, -303.7457580566406, -273.409423828125, -243.07310485839844, -212.7367706298828, -182.4004364013672, -152.06411743164062, -121.727783203125, -91.3914566040039, -61.05513000488281, -30.718795776367188, -0.382476806640625, 29.953857421875, 60.290184020996094, 90.62651062011719, 120.96284484863281, 151.29916381835938, 181.635498046875, 211.97183227539062, 242.3081512451172, 272.64447021484375, 302.9808044433594, 333.317138671875, 363.6534423828125, 393.9897766113281, 424.32611083984375, 454.6624450683594, 484.998779296875, 515.3351440429688, 545.6714477539062, 576.0078125, 606.3441162109375, 636.6804809570312, 667.0167846679688, 697.3531494140625, 727.689453125, 758.0257568359375, 788.362060546875, 818.6984252929688, 849.0347290039062, 879.37109375, 909.7073974609375, 940.043701171875, 970.3800659179688, 1000.7164306640625, 1031.052734375, 1061.3890380859375, 1091.7254638671875, 1122.061767578125, 1152.3980712890625, 1182.734375, 1213.0706787109375, 1243.406982421875, 1273.7432861328125, 1304.07958984375, 1334.416015625, 1364.7523193359375, 1395.088623046875, 1425.4249267578125, 1455.76123046875, 1486.09765625]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 6.0, 3.0, 6.0, 9.0, 7.0, 17.0, 21.0, 22.0, 29.0, 18.0, 23.0, 32.0, 28.0, 44.0, 30.0, 39.0, 38.0, 56.0, 32.0, 40.0, 33.0, 46.0, 48.0, 33.0, 47.0, 30.0, 37.0, 37.0, 26.0, 24.0, 24.0, 20.0, 23.0, 17.0, 11.0, 10.0, 5.0, 10.0, 4.0, 11.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0], "bins": [-144.21868896484375, -140.26858520507812, -136.3184814453125, -132.36837768554688, -128.41827392578125, -124.46817016601562, -120.51805877685547, -116.56795501708984, -112.61785125732422, -108.6677474975586, -104.71764373779297, -100.76753997802734, -96.81742858886719, -92.86732482910156, -88.91722106933594, -84.96711730957031, -81.01701354980469, -77.06690979003906, -73.11680603027344, -69.16670227050781, -65.21659851074219, -61.2664909362793, -57.316383361816406, -53.36627960205078, -49.416175842285156, -45.46607208251953, -41.515968322753906, -37.565860748291016, -33.61575698852539, -29.665653228759766, -25.715547561645508, -21.76544189453125, -17.815338134765625, -13.865233421325684, -9.915128707885742, -5.965023994445801, -2.0149192810058594, 1.9351844787597656, 5.885290145874023, 9.835395812988281, 13.785499572753906, 17.73560333251953, 21.68570899963379, 25.635814666748047, 29.585918426513672, 33.5360221862793, 37.48612976074219, 41.43623352050781, 45.38633728027344, 49.33644104003906, 53.28654479980469, 57.23665237426758, 61.1867561340332, 65.13685607910156, 69.08696746826172, 73.03707122802734, 76.98717498779297, 80.9372787475586, 84.88738250732422, 88.83748626708984, 92.78759765625, 96.73770141601562, 100.68780517578125, 104.63790893554688, 108.5880126953125]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 5.0, 0.0, 5.0, 4.0, 10.0, 4.0, 9.0, 8.0, 8.0, 15.0, 8.0, 20.0, 16.0, 21.0, 24.0, 24.0, 26.0, 25.0, 33.0, 29.0, 26.0, 35.0, 33.0, 52.0, 34.0, 41.0, 40.0, 33.0, 54.0, 34.0, 37.0, 36.0, 24.0, 27.0, 34.0, 30.0, 24.0, 30.0, 19.0, 13.0, 13.0, 15.0, 9.0, 6.0, 2.0, 4.0, 7.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.6953125, -14.1815185546875, -13.667724609375, -13.1539306640625, -12.64013671875, -12.1263427734375, -11.612548828125, -11.0987548828125, -10.5849609375, -10.0711669921875, -9.557373046875, -9.0435791015625, -8.52978515625, -8.0159912109375, -7.502197265625, -6.9884033203125, -6.474609375, -5.9608154296875, -5.447021484375, -4.9332275390625, -4.41943359375, -3.9056396484375, -3.391845703125, -2.8780517578125, -2.3642578125, -1.8504638671875, -1.336669921875, -0.8228759765625, -0.30908203125, 0.2047119140625, 0.718505859375, 1.2322998046875, 1.74609375, 2.2598876953125, 2.773681640625, 3.2874755859375, 3.80126953125, 4.3150634765625, 4.828857421875, 5.3426513671875, 5.8564453125, 6.3702392578125, 6.884033203125, 7.3978271484375, 7.91162109375, 8.4254150390625, 8.939208984375, 9.4530029296875, 9.966796875, 10.4805908203125, 10.994384765625, 11.5081787109375, 12.02197265625, 12.5357666015625, 13.049560546875, 13.5633544921875, 14.0771484375, 14.5909423828125, 15.104736328125, 15.6185302734375, 16.13232421875, 16.6461181640625, 17.159912109375, 17.6737060546875, 18.1875]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 7.0, 6.0, 14.0, 8.0, 12.0, 15.0, 13.0, 16.0, 25.0, 31.0, 43.0, 41.0, 58.0, 80.0, 110.0, 157.0, 176.0, 224.0, 306.0, 383.0, 713.0, 2102068.0, 2087353.0, 662.0, 405.0, 318.0, 212.0, 174.0, 152.0, 119.0, 66.0, 72.0, 46.0, 44.0, 23.0, 22.0, 16.0, 15.0, 13.0, 11.0, 15.0, 9.0, 6.0, 5.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0], "bins": [-379.25, -367.3984375, -355.546875, -343.6953125, -331.84375, -319.9921875, -308.140625, -296.2890625, -284.4375, -272.5859375, -260.734375, -248.8828125, -237.03125, -225.1796875, -213.328125, -201.4765625, -189.625, -177.7734375, -165.921875, -154.0703125, -142.21875, -130.3671875, -118.515625, -106.6640625, -94.8125, -82.9609375, -71.109375, -59.2578125, -47.40625, -35.5546875, -23.703125, -11.8515625, 0.0, 11.8515625, 23.703125, 35.5546875, 47.40625, 59.2578125, 71.109375, 82.9609375, 94.8125, 106.6640625, 118.515625, 130.3671875, 142.21875, 154.0703125, 165.921875, 177.7734375, 189.625, 201.4765625, 213.328125, 225.1796875, 237.03125, 248.8828125, 260.734375, 272.5859375, 284.4375, 296.2890625, 308.140625, 319.9921875, 331.84375, 343.6953125, 355.546875, 367.3984375, 379.25]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 5.0, 2.0, 2.0, 8.0, 5.0, 20.0, 25.0, 25.0, 39.0, 67.0, 103.0, 166.0, 253.0, 498.0, 801.0, 767.0, 527.0, 313.0, 153.0, 98.0, 66.0, 44.0, 32.0, 16.0, 16.0, 8.0, 7.0, 7.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.90625, -33.7431640625, -32.580078125, -31.4169921875, -30.25390625, -29.0908203125, -27.927734375, -26.7646484375, -25.6015625, -24.4384765625, -23.275390625, -22.1123046875, -20.94921875, -19.7861328125, -18.623046875, -17.4599609375, -16.296875, -15.1337890625, -13.970703125, -12.8076171875, -11.64453125, -10.4814453125, -9.318359375, -8.1552734375, -6.9921875, -5.8291015625, -4.666015625, -3.5029296875, -2.33984375, -1.1767578125, -0.013671875, 1.1494140625, 2.3125, 3.4755859375, 4.638671875, 5.8017578125, 6.96484375, 8.1279296875, 9.291015625, 10.4541015625, 11.6171875, 12.7802734375, 13.943359375, 15.1064453125, 16.26953125, 17.4326171875, 18.595703125, 19.7587890625, 20.921875, 22.0849609375, 23.248046875, 24.4111328125, 25.57421875, 26.7373046875, 27.900390625, 29.0634765625, 30.2265625, 31.3896484375, 32.552734375, 33.7158203125, 34.87890625, 36.0419921875, 37.205078125, 38.3681640625, 39.53125]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 7.0, 5.0, 10.0, 7.0, 12.0, 14.0, 13.0, 21.0, 28.0, 29.0, 30.0, 39.0, 41.0, 55.0, 79.0, 121.0, 474.0, 6109.0, 4128247.0, 57385.0, 896.0, 200.0, 88.0, 54.0, 49.0, 44.0, 35.0, 22.0, 22.0, 24.0, 20.0, 19.0, 13.0, 4.0, 12.0, 9.0, 10.0, 5.0, 6.0, 0.0, 5.0, 3.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-178.625, -172.607421875, -166.58984375, -160.572265625, -154.5546875, -148.537109375, -142.51953125, -136.501953125, -130.484375, -124.466796875, -118.44921875, -112.431640625, -106.4140625, -100.396484375, -94.37890625, -88.361328125, -82.34375, -76.326171875, -70.30859375, -64.291015625, -58.2734375, -52.255859375, -46.23828125, -40.220703125, -34.203125, -28.185546875, -22.16796875, -16.150390625, -10.1328125, -4.115234375, 1.90234375, 7.919921875, 13.9375, 19.955078125, 25.97265625, 31.990234375, 38.0078125, 44.025390625, 50.04296875, 56.060546875, 62.078125, 68.095703125, 74.11328125, 80.130859375, 86.1484375, 92.166015625, 98.18359375, 104.201171875, 110.21875, 116.236328125, 122.25390625, 128.271484375, 134.2890625, 140.306640625, 146.32421875, 152.341796875, 158.359375, 164.376953125, 170.39453125, 176.412109375, 182.4296875, 188.447265625, 194.46484375, 200.482421875, 206.5]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 11.0, 338.0, 623.0, 40.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1047.48828125, -1027.42041015625, -1007.3526611328125, -987.2847900390625, -967.2169799804688, -947.149169921875, -927.0813598632812, -907.0135498046875, -886.9456787109375, -866.8778686523438, -846.81005859375, -826.7421875, -806.6743774414062, -786.6065673828125, -766.5387573242188, -746.470947265625, -726.403076171875, -706.3352661132812, -686.2674560546875, -666.1995849609375, -646.1317749023438, -626.06396484375, -605.9961547851562, -585.9283447265625, -565.8605346679688, -545.792724609375, -525.7249145507812, -505.6570739746094, -485.5892333984375, -465.52142333984375, -445.45361328125, -425.3857727050781, -405.31793212890625, -385.2501220703125, -365.1822814941406, -345.1144714355469, -325.046630859375, -304.97882080078125, -284.9110107421875, -264.8431701660156, -244.77536010742188, -224.70753479003906, -204.63970947265625, -184.5718994140625, -164.50405883789062, -144.43624877929688, -124.36842346191406, -104.30059814453125, -84.23277282714844, -64.16494750976562, -44.09712600708008, -24.02930450439453, -3.9614791870117188, 16.106346130371094, 36.174163818359375, 56.24198913574219, 76.309814453125, 96.37763977050781, 116.44546508789062, 136.51327514648438, 156.58111572265625, 176.64892578125, 196.7167510986328, 216.78457641601562, 236.85240173339844]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 7.0, 8.0, 3.0, 7.0, 7.0, 12.0, 25.0, 17.0, 14.0, 22.0, 13.0, 23.0, 25.0, 33.0, 21.0, 33.0, 28.0, 40.0, 30.0, 29.0, 32.0, 37.0, 37.0, 42.0, 41.0, 31.0, 40.0, 34.0, 40.0, 38.0, 37.0, 20.0, 23.0, 14.0, 26.0, 18.0, 20.0, 12.0, 10.0, 15.0, 6.0, 6.0, 5.0, 6.0, 5.0, 5.0, 3.0, 3.0, 1.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-78.3214340209961, -75.7307357788086, -73.14002990722656, -70.54933166503906, -67.95863342285156, -65.36792755126953, -62.77722930908203, -60.186527252197266, -57.5958251953125, -55.005123138427734, -52.41442108154297, -49.82372283935547, -47.2330207824707, -44.64231872558594, -42.05162048339844, -39.46091842651367, -36.870216369628906, -34.27951431274414, -31.688814163208008, -29.098114013671875, -26.50741195678711, -23.916709899902344, -21.32600975036621, -18.735309600830078, -16.144607543945312, -13.553906440734863, -10.963205337524414, -8.372504234313965, -5.781803131103516, -3.1911020278930664, -0.6004009246826172, 1.9902992248535156, 4.58099365234375, 7.171694755554199, 9.762395858764648, 12.353096961975098, 14.943798065185547, 17.534500122070312, 20.125200271606445, 22.715900421142578, 25.306602478027344, 27.89730453491211, 30.488004684448242, 33.078704833984375, 35.66940689086914, 38.260108947753906, 40.850807189941406, 43.44150924682617, 46.03221130371094, 48.6229133605957, 51.21361541748047, 53.80431365966797, 56.395015716552734, 58.9857177734375, 61.576416015625, 64.1671142578125, 66.75782012939453, 69.34851837158203, 71.93922424316406, 74.52992248535156, 77.12062072753906, 79.7113265991211, 82.3020248413086, 84.89273071289062, 87.48342895507812]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 6.0, 1.0, 4.0, 3.0, 9.0, 9.0, 8.0, 16.0, 14.0, 13.0, 13.0, 26.0, 13.0, 11.0, 25.0, 30.0, 36.0, 31.0, 20.0, 34.0, 40.0, 41.0, 40.0, 52.0, 51.0, 42.0, 47.0, 40.0, 38.0, 45.0, 22.0, 33.0, 32.0, 22.0, 25.0, 17.0, 16.0, 16.0, 16.0, 18.0, 5.0, 7.0, 7.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-15.3671875, -14.8199462890625, -14.272705078125, -13.7254638671875, -13.17822265625, -12.6309814453125, -12.083740234375, -11.5364990234375, -10.9892578125, -10.4420166015625, -9.894775390625, -9.3475341796875, -8.80029296875, -8.2530517578125, -7.705810546875, -7.1585693359375, -6.611328125, -6.0640869140625, -5.516845703125, -4.9696044921875, -4.42236328125, -3.8751220703125, -3.327880859375, -2.7806396484375, -2.2333984375, -1.6861572265625, -1.138916015625, -0.5916748046875, -0.04443359375, 0.5028076171875, 1.050048828125, 1.5972900390625, 2.14453125, 2.6917724609375, 3.239013671875, 3.7862548828125, 4.33349609375, 4.8807373046875, 5.427978515625, 5.9752197265625, 6.5224609375, 7.0697021484375, 7.616943359375, 8.1641845703125, 8.71142578125, 9.2586669921875, 9.805908203125, 10.3531494140625, 10.900390625, 11.4476318359375, 11.994873046875, 12.5421142578125, 13.08935546875, 13.6365966796875, 14.183837890625, 14.7310791015625, 15.2783203125, 15.8255615234375, 16.372802734375, 16.9200439453125, 17.46728515625, 18.0145263671875, 18.561767578125, 19.1090087890625, 19.65625]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 4.0, 3.0, 2.0, 7.0, 10.0, 12.0, 33.0, 34.0, 44.0, 94.0, 110.0, 197.0, 348.0, 555.0, 909.0, 1525.0, 2519.0, 4152.0, 6896.0, 11399.0, 18994.0, 31915.0, 55294.0, 96999.0, 177853.0, 260383.0, 163667.0, 89685.0, 51210.0, 29557.0, 17569.0, 10411.0, 6389.0, 3793.0, 2321.0, 1327.0, 904.0, 510.0, 321.0, 198.0, 164.0, 88.0, 52.0, 42.0, 21.0, 21.0, 9.0, 5.0, 1.0, 3.0, 4.0, 3.0], "bins": [-2.6328125, -2.562469482421875, -2.49212646484375, -2.421783447265625, -2.3514404296875, -2.281097412109375, -2.21075439453125, -2.140411376953125, -2.070068359375, -1.999725341796875, -1.92938232421875, -1.859039306640625, -1.7886962890625, -1.718353271484375, -1.64801025390625, -1.577667236328125, -1.50732421875, -1.436981201171875, -1.36663818359375, -1.296295166015625, -1.2259521484375, -1.155609130859375, -1.08526611328125, -1.014923095703125, -0.944580078125, -0.874237060546875, -0.80389404296875, -0.733551025390625, -0.6632080078125, -0.592864990234375, -0.52252197265625, -0.452178955078125, -0.3818359375, -0.311492919921875, -0.24114990234375, -0.170806884765625, -0.1004638671875, -0.030120849609375, 0.04022216796875, 0.110565185546875, 0.180908203125, 0.251251220703125, 0.32159423828125, 0.391937255859375, 0.4622802734375, 0.532623291015625, 0.60296630859375, 0.673309326171875, 0.74365234375, 0.813995361328125, 0.88433837890625, 0.954681396484375, 1.0250244140625, 1.095367431640625, 1.16571044921875, 1.236053466796875, 1.306396484375, 1.376739501953125, 1.44708251953125, 1.517425537109375, 1.5877685546875, 1.658111572265625, 1.72845458984375, 1.798797607421875, 1.869140625]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 9.0, 2.0, 8.0, 12.0, 13.0, 11.0, 6.0, 14.0, 18.0, 11.0, 20.0, 18.0, 32.0, 32.0, 27.0, 28.0, 31.0, 36.0, 45.0, 51.0, 51.0, 35.0, 1061.0, 40.0, 36.0, 43.0, 34.0, 27.0, 38.0, 21.0, 29.0, 18.0, 23.0, 26.0, 17.0, 22.0, 17.0, 21.0, 10.0, 11.0, 3.0, 2.0, 3.0, 6.0, 4.0, 1.0, 1.0, 5.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-10.2109375, -9.8736572265625, -9.536376953125, -9.1990966796875, -8.86181640625, -8.5245361328125, -8.187255859375, -7.8499755859375, -7.5126953125, -7.1754150390625, -6.838134765625, -6.5008544921875, -6.16357421875, -5.8262939453125, -5.489013671875, -5.1517333984375, -4.814453125, -4.4771728515625, -4.139892578125, -3.8026123046875, -3.46533203125, -3.1280517578125, -2.790771484375, -2.4534912109375, -2.1162109375, -1.7789306640625, -1.441650390625, -1.1043701171875, -0.76708984375, -0.4298095703125, -0.092529296875, 0.2447509765625, 0.58203125, 0.9193115234375, 1.256591796875, 1.5938720703125, 1.93115234375, 2.2684326171875, 2.605712890625, 2.9429931640625, 3.2802734375, 3.6175537109375, 3.954833984375, 4.2921142578125, 4.62939453125, 4.9666748046875, 5.303955078125, 5.6412353515625, 5.978515625, 6.3157958984375, 6.653076171875, 6.9903564453125, 7.32763671875, 7.6649169921875, 8.002197265625, 8.3394775390625, 8.6767578125, 9.0140380859375, 9.351318359375, 9.6885986328125, 10.02587890625, 10.3631591796875, 10.700439453125, 11.0377197265625, 11.375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 6.0, 5.0, 4.0, 6.0, 15.0, 21.0, 40.0, 57.0, 77.0, 138.0, 193.0, 268.0, 433.0, 695.0, 1003.0, 1523.0, 2464.0, 3794.0, 6051.0, 9529.0, 14653.0, 22967.0, 36276.0, 58010.0, 93580.0, 154476.0, 1265854.0, 162086.0, 97685.0, 60487.0, 38058.0, 24026.0, 15273.0, 9950.0, 6044.0, 4058.0, 2600.0, 1699.0, 1029.0, 719.0, 449.0, 277.0, 212.0, 104.0, 79.0, 56.0, 40.0, 26.0, 16.0, 12.0, 7.0, 6.0, 4.0, 5.0, 1.0, 0.0, 1.0], "bins": [-1.9267578125, -1.86932373046875, -1.8118896484375, -1.75445556640625, -1.697021484375, -1.63958740234375, -1.5821533203125, -1.52471923828125, -1.46728515625, -1.40985107421875, -1.3524169921875, -1.29498291015625, -1.237548828125, -1.18011474609375, -1.1226806640625, -1.06524658203125, -1.0078125, -0.95037841796875, -0.8929443359375, -0.83551025390625, -0.778076171875, -0.72064208984375, -0.6632080078125, -0.60577392578125, -0.54833984375, -0.49090576171875, -0.4334716796875, -0.37603759765625, -0.318603515625, -0.26116943359375, -0.2037353515625, -0.14630126953125, -0.0888671875, -0.03143310546875, 0.0260009765625, 0.08343505859375, 0.140869140625, 0.19830322265625, 0.2557373046875, 0.31317138671875, 0.37060546875, 0.42803955078125, 0.4854736328125, 0.54290771484375, 0.600341796875, 0.65777587890625, 0.7152099609375, 0.77264404296875, 0.830078125, 0.88751220703125, 0.9449462890625, 1.00238037109375, 1.059814453125, 1.11724853515625, 1.1746826171875, 1.23211669921875, 1.28955078125, 1.34698486328125, 1.4044189453125, 1.46185302734375, 1.519287109375, 1.57672119140625, 1.6341552734375, 1.69158935546875, 1.7490234375]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 7.0, 6.0, 5.0, 7.0, 9.0, 15.0, 18.0, 20.0, 25.0, 20.0, 31.0, 40.0, 53.0, 71.0, 88.0, 93.0, 72.0, 58.0, 67.0, 60.0, 42.0, 38.0, 24.0, 25.0, 22.0, 18.0, 11.0, 14.0, 8.0, 4.0, 11.0, 6.0, 1.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004520416259765625, -0.004369974136352539, -0.004219532012939453, -0.004069089889526367, -0.003918647766113281, -0.0037682056427001953, -0.0036177635192871094, -0.0034673213958740234, -0.0033168792724609375, -0.0031664371490478516, -0.0030159950256347656, -0.0028655529022216797, -0.0027151107788085938, -0.002564668655395508, -0.002414226531982422, -0.002263784408569336, -0.00211334228515625, -0.001962900161743164, -0.0018124580383300781, -0.0016620159149169922, -0.0015115737915039062, -0.0013611316680908203, -0.0012106895446777344, -0.0010602474212646484, -0.0009098052978515625, -0.0007593631744384766, -0.0006089210510253906, -0.0004584789276123047, -0.00030803680419921875, -0.0001575946807861328, -7.152557373046875e-06, 0.00014328956604003906, 0.000293731689453125, 0.00044417381286621094, 0.0005946159362792969, 0.0007450580596923828, 0.0008955001831054688, 0.0010459423065185547, 0.0011963844299316406, 0.0013468265533447266, 0.0014972686767578125, 0.0016477108001708984, 0.0017981529235839844, 0.0019485950469970703, 0.0020990371704101562, 0.002249479293823242, 0.002399921417236328, 0.002550363540649414, 0.0027008056640625, 0.002851247787475586, 0.003001689910888672, 0.003152132034301758, 0.0033025741577148438, 0.0034530162811279297, 0.0036034584045410156, 0.0037539005279541016, 0.0039043426513671875, 0.0040547847747802734, 0.004205226898193359, 0.004355669021606445, 0.004506111145019531, 0.004656553268432617, 0.004806995391845703, 0.004957437515258789, 0.005107879638671875]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 3.0, 5.0, 6.0, 3.0, 6.0, 2.0, 11.0, 13.0, 18.0, 17.0, 27.0, 34.0, 53.0, 65.0, 99.0, 112.0, 201.0, 334.0, 538.0, 1192.0, 875687.0, 167682.0, 1067.0, 507.0, 279.0, 171.0, 109.0, 81.0, 65.0, 37.0, 35.0, 21.0, 22.0, 9.0, 7.0, 10.0, 8.0, 8.0, 5.0, 3.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09857177734375, -0.09566593170166016, -0.09276008605957031, -0.08985424041748047, -0.08694839477539062, -0.08404254913330078, -0.08113670349121094, -0.0782308578491211, -0.07532501220703125, -0.0724191665649414, -0.06951332092285156, -0.06660747528076172, -0.06370162963867188, -0.06079578399658203, -0.05788993835449219, -0.054984092712402344, -0.0520782470703125, -0.049172401428222656, -0.04626655578613281, -0.04336071014404297, -0.040454864501953125, -0.03754901885986328, -0.03464317321777344, -0.031737327575683594, -0.02883148193359375, -0.025925636291503906, -0.023019790649414062, -0.02011394500732422, -0.017208099365234375, -0.014302253723144531, -0.011396408081054688, -0.008490562438964844, -0.005584716796875, -0.0026788711547851562, 0.0002269744873046875, 0.0031328201293945312, 0.006038665771484375, 0.008944511413574219, 0.011850357055664062, 0.014756202697753906, 0.01766204833984375, 0.020567893981933594, 0.023473739624023438, 0.02637958526611328, 0.029285430908203125, 0.03219127655029297, 0.03509712219238281, 0.038002967834472656, 0.0409088134765625, 0.043814659118652344, 0.04672050476074219, 0.04962635040283203, 0.052532196044921875, 0.05543804168701172, 0.05834388732910156, 0.061249732971191406, 0.06415557861328125, 0.0670614242553711, 0.06996726989746094, 0.07287311553955078, 0.07577896118164062, 0.07868480682373047, 0.08159065246582031, 0.08449649810791016, 0.08740234375]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 12.0, 27.0, 64.0, 159.0, 303.0, 250.0, 129.0, 38.0, 19.0, 1.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0023831615690141916, -0.0022629122249782085, -0.002142663113772869, -0.0020224140025675297, -0.0019021646585315466, -0.0017819154309108853, -0.001661666203290224, -0.0015414169756695628, -0.0014211677480489016, -0.0013009185204282403, -0.001180669292807579, -0.0010604200651869178, -0.0009401708375662565, -0.0008199216099455953, -0.000699672382324934, -0.0005794231547042727, -0.0004591739270836115, -0.00033892469946295023, -0.00021867547184228897, -9.842624422162771e-05, 2.1822983399033546e-05, 0.0001420722110196948, 0.00026232143864035606, 0.0003825706662610173, 0.0005028198938816786, 0.0006230691215023398, 0.0007433183491230011, 0.0008635675767436624, 0.0009838168043643236, 0.0011040660319849849, 0.0012243152596056461, 0.0013445644872263074, 0.001464813482016325, 0.0015850627096369863, 0.0017053119372576475, 0.0018255611648783088, 0.00194581039249897, 0.0020660595037043095, 0.0021863088477402925, 0.0023065581917762756, 0.002426807302981615, 0.0025470564141869545, 0.0026673057582229376, 0.0027875551022589207, 0.00290780421346426, 0.0030280533246695995, 0.0031483026687055826, 0.0032685520127415657, 0.003388801123946905, 0.0035090502351522446, 0.0036292995791882277, 0.0037495489232242107, 0.00386979803442955, 0.00399004714563489, 0.004110296256840229, 0.004230545833706856, 0.004350794944912195, 0.004471044056117535, 0.004591293632984161, 0.004711542744189501, 0.00483179185539484, 0.00495204096660018, 0.005072290077805519, 0.005192539654672146, 0.005312788765877485]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 5.0, 4.0, 7.0, 3.0, 2.0, 11.0, 9.0, 10.0, 13.0, 14.0, 19.0, 22.0, 24.0, 33.0, 36.0, 36.0, 39.0, 43.0, 42.0, 56.0, 38.0, 37.0, 35.0, 49.0, 51.0, 44.0, 37.0, 39.0, 37.0, 34.0, 29.0, 22.0, 23.0, 21.0, 13.0, 14.0, 18.0, 11.0, 8.0, 8.0, 4.0, 5.0, 5.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0024521350860595703, -0.002376510761678219, -0.0023008864372968674, -0.002225262112915516, -0.0021496377885341644, -0.002074013464152813, -0.0019983891397714615, -0.00192276481539011, -0.0018471404910087585, -0.001771516166627407, -0.0016958918422460556, -0.0016202675178647041, -0.0015446431934833527, -0.0014690188691020012, -0.0013933945447206497, -0.0013177702203392982, -0.0012421458959579468, -0.0011665215715765953, -0.0010908972471952438, -0.0010152729228138924, -0.0009396485984325409, -0.0008640242740511894, -0.000788399949669838, -0.0007127756252884865, -0.000637151300907135, -0.0005615269765257835, -0.00048590265214443207, -0.0004102783277630806, -0.0003346540033817291, -0.00025902967900037766, -0.00018340535461902618, -0.00010778103023767471, -3.215670585632324e-05, 4.346761852502823e-05, 0.0001190919429063797, 0.00019471626728773117, 0.00027034059166908264, 0.0003459649160504341, 0.0004215892404317856, 0.0004972135648131371, 0.0005728378891944885, 0.00064846221357584, 0.0007240865379571915, 0.0007997108623385429, 0.0008753351867198944, 0.0009509595111012459, 0.0010265838354825974, 0.0011022081598639488, 0.0011778324842453003, 0.0012534568086266518, 0.0013290811330080032, 0.0014047054573893547, 0.0014803297817707062, 0.0015559541061520576, 0.0016315784305334091, 0.0017072027549147606, 0.001782827079296112, 0.0018584514036774635, 0.001934075728058815, 0.0020097000524401665, 0.002085324376821518, 0.0021609487012028694, 0.002236573025584221, 0.0023121973499655724, 0.002387821674346924]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 6.0, 1.0, 4.0, 3.0, 9.0, 9.0, 8.0, 16.0, 14.0, 13.0, 13.0, 26.0, 13.0, 11.0, 25.0, 30.0, 36.0, 31.0, 20.0, 34.0, 40.0, 41.0, 40.0, 52.0, 51.0, 42.0, 47.0, 40.0, 38.0, 45.0, 22.0, 33.0, 32.0, 22.0, 25.0, 17.0, 16.0, 16.0, 16.0, 18.0, 5.0, 7.0, 7.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-15.3671875, -14.8199462890625, -14.272705078125, -13.7254638671875, -13.17822265625, -12.6309814453125, -12.083740234375, -11.5364990234375, -10.9892578125, -10.4420166015625, -9.894775390625, -9.3475341796875, -8.80029296875, -8.2530517578125, -7.705810546875, -7.1585693359375, -6.611328125, -6.0640869140625, -5.516845703125, -4.9696044921875, -4.42236328125, -3.8751220703125, -3.327880859375, -2.7806396484375, -2.2333984375, -1.6861572265625, -1.138916015625, -0.5916748046875, -0.04443359375, 0.5028076171875, 1.050048828125, 1.5972900390625, 2.14453125, 2.6917724609375, 3.239013671875, 3.7862548828125, 4.33349609375, 4.8807373046875, 5.427978515625, 5.9752197265625, 6.5224609375, 7.0697021484375, 7.616943359375, 8.1641845703125, 8.71142578125, 9.2586669921875, 9.805908203125, 10.3531494140625, 10.900390625, 11.4476318359375, 11.994873046875, 12.5421142578125, 13.08935546875, 13.6365966796875, 14.183837890625, 14.7310791015625, 15.2783203125, 15.8255615234375, 16.372802734375, 16.9200439453125, 17.46728515625, 18.0145263671875, 18.561767578125, 19.1090087890625, 19.65625]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 7.0, 9.0, 3.0, 10.0, 26.0, 27.0, 35.0, 49.0, 66.0, 91.0, 147.0, 185.0, 208.0, 294.0, 416.0, 521.0, 692.0, 886.0, 1257.0, 2200.0, 6319.0, 34176.0, 239808.0, 598574.0, 132994.0, 19023.0, 4206.0, 1818.0, 1081.0, 844.0, 663.0, 487.0, 393.0, 282.0, 221.0, 136.0, 121.0, 96.0, 49.0, 44.0, 32.0, 16.0, 10.0, 11.0, 14.0, 6.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-29.1875, -28.25634765625, -27.3251953125, -26.39404296875, -25.462890625, -24.53173828125, -23.6005859375, -22.66943359375, -21.73828125, -20.80712890625, -19.8759765625, -18.94482421875, -18.013671875, -17.08251953125, -16.1513671875, -15.22021484375, -14.2890625, -13.35791015625, -12.4267578125, -11.49560546875, -10.564453125, -9.63330078125, -8.7021484375, -7.77099609375, -6.83984375, -5.90869140625, -4.9775390625, -4.04638671875, -3.115234375, -2.18408203125, -1.2529296875, -0.32177734375, 0.609375, 1.54052734375, 2.4716796875, 3.40283203125, 4.333984375, 5.26513671875, 6.1962890625, 7.12744140625, 8.05859375, 8.98974609375, 9.9208984375, 10.85205078125, 11.783203125, 12.71435546875, 13.6455078125, 14.57666015625, 15.5078125, 16.43896484375, 17.3701171875, 18.30126953125, 19.232421875, 20.16357421875, 21.0947265625, 22.02587890625, 22.95703125, 23.88818359375, 24.8193359375, 25.75048828125, 26.681640625, 27.61279296875, 28.5439453125, 29.47509765625, 30.40625]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 3.0, 4.0, 4.0, 8.0, 5.0, 4.0, 12.0, 8.0, 10.0, 7.0, 12.0, 15.0, 18.0, 9.0, 36.0, 25.0, 34.0, 33.0, 31.0, 42.0, 36.0, 54.0, 76.0, 189.0, 1556.0, 280.0, 104.0, 67.0, 53.0, 46.0, 33.0, 32.0, 32.0, 29.0, 17.0, 20.0, 16.0, 18.0, 13.0, 10.0, 10.0, 8.0, 6.0, 4.0, 6.0, 8.0, 2.0, 4.0, 2.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-42.03125, -40.61865234375, -39.2060546875, -37.79345703125, -36.380859375, -34.96826171875, -33.5556640625, -32.14306640625, -30.73046875, -29.31787109375, -27.9052734375, -26.49267578125, -25.080078125, -23.66748046875, -22.2548828125, -20.84228515625, -19.4296875, -18.01708984375, -16.6044921875, -15.19189453125, -13.779296875, -12.36669921875, -10.9541015625, -9.54150390625, -8.12890625, -6.71630859375, -5.3037109375, -3.89111328125, -2.478515625, -1.06591796875, 0.3466796875, 1.75927734375, 3.171875, 4.58447265625, 5.9970703125, 7.40966796875, 8.822265625, 10.23486328125, 11.6474609375, 13.06005859375, 14.47265625, 15.88525390625, 17.2978515625, 18.71044921875, 20.123046875, 21.53564453125, 22.9482421875, 24.36083984375, 25.7734375, 27.18603515625, 28.5986328125, 30.01123046875, 31.423828125, 32.83642578125, 34.2490234375, 35.66162109375, 37.07421875, 38.48681640625, 39.8994140625, 41.31201171875, 42.724609375, 44.13720703125, 45.5498046875, 46.96240234375, 48.375]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 3.0, 1.0, 8.0, 7.0, 5.0, 15.0, 15.0, 12.0, 17.0, 17.0, 18.0, 30.0, 29.0, 44.0, 75.0, 83.0, 152.0, 222.0, 397.0, 865.0, 3850.0, 3118596.0, 18645.0, 1191.0, 503.0, 269.0, 162.0, 118.0, 89.0, 63.0, 53.0, 27.0, 27.0, 16.0, 18.0, 15.0, 9.0, 10.0, 8.0, 6.0, 4.0, 4.0, 6.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-153.375, -148.81640625, -144.2578125, -139.69921875, -135.140625, -130.58203125, -126.0234375, -121.46484375, -116.90625, -112.34765625, -107.7890625, -103.23046875, -98.671875, -94.11328125, -89.5546875, -84.99609375, -80.4375, -75.87890625, -71.3203125, -66.76171875, -62.203125, -57.64453125, -53.0859375, -48.52734375, -43.96875, -39.41015625, -34.8515625, -30.29296875, -25.734375, -21.17578125, -16.6171875, -12.05859375, -7.5, -2.94140625, 1.6171875, 6.17578125, 10.734375, 15.29296875, 19.8515625, 24.41015625, 28.96875, 33.52734375, 38.0859375, 42.64453125, 47.203125, 51.76171875, 56.3203125, 60.87890625, 65.4375, 69.99609375, 74.5546875, 79.11328125, 83.671875, 88.23046875, 92.7890625, 97.34765625, 101.90625, 106.46484375, 111.0234375, 115.58203125, 120.140625, 124.69921875, 129.2578125, 133.81640625, 138.375]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 15.0, 670.0, 331.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-98.5048828125, -81.67568969726562, -64.84650421142578, -48.01731491088867, -31.188125610351562, -14.358932495117188, 2.4702529907226562, 19.2994384765625, 36.128631591796875, 52.957820892333984, 69.7870101928711, 86.61619567871094, 103.44538879394531, 120.27458190917969, 137.103759765625, 153.93295288085938, 170.76214599609375, 187.59133911132812, 204.4205322265625, 221.2497100830078, 238.0789031982422, 254.90809631347656, 271.7372741699219, 288.56646728515625, 305.3956604003906, 322.224853515625, 339.0540466308594, 355.88323974609375, 372.71240234375, 389.5416259765625, 406.37078857421875, 423.1999816894531, 440.0291748046875, 456.8583679199219, 473.68756103515625, 490.5167541503906, 507.345947265625, 524.1751098632812, 541.0043334960938, 557.83349609375, 574.6627197265625, 591.4918823242188, 608.3211059570312, 625.1502685546875, 641.9794921875, 658.8086547851562, 675.6378784179688, 692.467041015625, 709.2962036132812, 726.1253662109375, 742.95458984375, 759.7837524414062, 776.6129760742188, 793.442138671875, 810.2713623046875, 827.1005249023438, 843.9296875, 860.7588500976562, 877.5880737304688, 894.417236328125, 911.2464599609375, 928.0756225585938, 944.9048461914062, 961.7340087890625, 978.563232421875]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 4.0, 6.0, 3.0, 10.0, 16.0, 9.0, 12.0, 17.0, 11.0, 21.0, 29.0, 25.0, 17.0, 31.0, 38.0, 30.0, 40.0, 37.0, 47.0, 52.0, 43.0, 38.0, 48.0, 36.0, 50.0, 35.0, 34.0, 31.0, 31.0, 30.0, 17.0, 23.0, 17.0, 21.0, 13.0, 12.0, 13.0, 19.0, 7.0, 5.0, 9.0, 4.0, 3.0, 5.0, 7.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-126.76602172851562, -122.73422241210938, -118.70243072509766, -114.6706314086914, -110.63883209228516, -106.60704040527344, -102.57524108886719, -98.54344177246094, -94.51164245605469, -90.47984313964844, -86.44805145263672, -82.41625213623047, -78.38445281982422, -74.3526611328125, -70.32086181640625, -66.2890625, -62.25727081298828, -58.2254753112793, -54.19367599487305, -50.16188049316406, -46.13008117675781, -42.09828567504883, -38.066490173339844, -34.034690856933594, -30.00289535522461, -25.971097946166992, -21.939300537109375, -17.90750503540039, -13.875707626342773, -9.843910217285156, -5.812114715576172, -1.7803173065185547, 2.2514801025390625, 6.2832770347595215, 10.31507396697998, 14.346870422363281, 18.3786678314209, 22.410465240478516, 26.4422607421875, 30.474058151245117, 34.505855560302734, 38.53765106201172, 42.56945037841797, 46.60124588012695, 50.63304138183594, 54.66484069824219, 58.69663619995117, 62.728431701660156, 66.7602310180664, 70.79203033447266, 74.82382202148438, 78.85562133789062, 82.88742065429688, 86.91921997070312, 90.95101165771484, 94.9828109741211, 99.01460266113281, 103.04640197753906, 107.07819366455078, 111.10999298095703, 115.14179229736328, 119.173583984375, 123.20538330078125, 127.2371826171875, 131.26898193359375]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 6.0, 3.0, 12.0, 6.0, 7.0, 9.0, 12.0, 12.0, 17.0, 23.0, 22.0, 21.0, 32.0, 26.0, 19.0, 34.0, 23.0, 35.0, 34.0, 43.0, 58.0, 48.0, 39.0, 37.0, 45.0, 45.0, 35.0, 42.0, 40.0, 29.0, 25.0, 31.0, 22.0, 23.0, 16.0, 20.0, 12.0, 12.0, 11.0, 3.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.125, -14.567626953125, -14.01025390625, -13.452880859375, -12.8955078125, -12.338134765625, -11.78076171875, -11.223388671875, -10.666015625, -10.108642578125, -9.55126953125, -8.993896484375, -8.4365234375, -7.879150390625, -7.32177734375, -6.764404296875, -6.20703125, -5.649658203125, -5.09228515625, -4.534912109375, -3.9775390625, -3.420166015625, -2.86279296875, -2.305419921875, -1.748046875, -1.190673828125, -0.63330078125, -0.075927734375, 0.4814453125, 1.038818359375, 1.59619140625, 2.153564453125, 2.7109375, 3.268310546875, 3.82568359375, 4.383056640625, 4.9404296875, 5.497802734375, 6.05517578125, 6.612548828125, 7.169921875, 7.727294921875, 8.28466796875, 8.842041015625, 9.3994140625, 9.956787109375, 10.51416015625, 11.071533203125, 11.62890625, 12.186279296875, 12.74365234375, 13.301025390625, 13.8583984375, 14.415771484375, 14.97314453125, 15.530517578125, 16.087890625, 16.645263671875, 17.20263671875, 17.760009765625, 18.3173828125, 18.874755859375, 19.43212890625, 19.989501953125, 20.546875]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 10.0, 3.0, 13.0, 17.0, 14.0, 17.0, 23.0, 29.0, 39.0, 37.0, 58.0, 67.0, 85.0, 103.0, 133.0, 191.0, 334.0, 708.0, 1916.0, 6766.0, 59868.0, 1325606.0, 2618113.0, 163197.0, 11827.0, 2769.0, 933.0, 436.0, 231.0, 175.0, 110.0, 98.0, 64.0, 75.0, 45.0, 42.0, 34.0, 18.0, 19.0, 15.0, 15.0, 4.0, 8.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-43.46875, -41.978515625, -40.48828125, -38.998046875, -37.5078125, -36.017578125, -34.52734375, -33.037109375, -31.546875, -30.056640625, -28.56640625, -27.076171875, -25.5859375, -24.095703125, -22.60546875, -21.115234375, -19.625, -18.134765625, -16.64453125, -15.154296875, -13.6640625, -12.173828125, -10.68359375, -9.193359375, -7.703125, -6.212890625, -4.72265625, -3.232421875, -1.7421875, -0.251953125, 1.23828125, 2.728515625, 4.21875, 5.708984375, 7.19921875, 8.689453125, 10.1796875, 11.669921875, 13.16015625, 14.650390625, 16.140625, 17.630859375, 19.12109375, 20.611328125, 22.1015625, 23.591796875, 25.08203125, 26.572265625, 28.0625, 29.552734375, 31.04296875, 32.533203125, 34.0234375, 35.513671875, 37.00390625, 38.494140625, 39.984375, 41.474609375, 42.96484375, 44.455078125, 45.9453125, 47.435546875, 48.92578125, 50.416015625, 51.90625]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 5.0, 6.0, 7.0, 8.0, 16.0, 13.0, 31.0, 40.0, 49.0, 82.0, 83.0, 102.0, 147.0, 239.0, 283.0, 422.0, 525.0, 536.0, 418.0, 277.0, 182.0, 146.0, 100.0, 81.0, 65.0, 55.0, 42.0, 26.0, 22.0, 17.0, 13.0, 10.0, 9.0, 4.0, 6.0, 0.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.53125, -29.5732421875, -28.615234375, -27.6572265625, -26.69921875, -25.7412109375, -24.783203125, -23.8251953125, -22.8671875, -21.9091796875, -20.951171875, -19.9931640625, -19.03515625, -18.0771484375, -17.119140625, -16.1611328125, -15.203125, -14.2451171875, -13.287109375, -12.3291015625, -11.37109375, -10.4130859375, -9.455078125, -8.4970703125, -7.5390625, -6.5810546875, -5.623046875, -4.6650390625, -3.70703125, -2.7490234375, -1.791015625, -0.8330078125, 0.125, 1.0830078125, 2.041015625, 2.9990234375, 3.95703125, 4.9150390625, 5.873046875, 6.8310546875, 7.7890625, 8.7470703125, 9.705078125, 10.6630859375, 11.62109375, 12.5791015625, 13.537109375, 14.4951171875, 15.453125, 16.4111328125, 17.369140625, 18.3271484375, 19.28515625, 20.2431640625, 21.201171875, 22.1591796875, 23.1171875, 24.0751953125, 25.033203125, 25.9912109375, 26.94921875, 27.9072265625, 28.865234375, 29.8232421875, 30.78125]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 8.0, 5.0, 3.0, 3.0, 16.0, 16.0, 25.0, 33.0, 56.0, 80.0, 101.0, 196.0, 346.0, 724.0, 2134.0, 15826.0, 2744705.0, 1414876.0, 11788.0, 1921.0, 666.0, 290.0, 162.0, 94.0, 69.0, 37.0, 26.0, 17.0, 20.0, 10.0, 8.0, 11.0, 4.0, 2.0, 5.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-106.6875, -103.7197265625, -100.751953125, -97.7841796875, -94.81640625, -91.8486328125, -88.880859375, -85.9130859375, -82.9453125, -79.9775390625, -77.009765625, -74.0419921875, -71.07421875, -68.1064453125, -65.138671875, -62.1708984375, -59.203125, -56.2353515625, -53.267578125, -50.2998046875, -47.33203125, -44.3642578125, -41.396484375, -38.4287109375, -35.4609375, -32.4931640625, -29.525390625, -26.5576171875, -23.58984375, -20.6220703125, -17.654296875, -14.6865234375, -11.71875, -8.7509765625, -5.783203125, -2.8154296875, 0.15234375, 3.1201171875, 6.087890625, 9.0556640625, 12.0234375, 14.9912109375, 17.958984375, 20.9267578125, 23.89453125, 26.8623046875, 29.830078125, 32.7978515625, 35.765625, 38.7333984375, 41.701171875, 44.6689453125, 47.63671875, 50.6044921875, 53.572265625, 56.5400390625, 59.5078125, 62.4755859375, 65.443359375, 68.4111328125, 71.37890625, 74.3466796875, 77.314453125, 80.2822265625, 83.25]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 66.0, 936.0, 13.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1059.983642578125, -1016.536376953125, -973.089111328125, -929.641845703125, -886.194580078125, -842.747314453125, -799.2999877929688, -755.8527221679688, -712.4054565429688, -668.9581909179688, -625.5109252929688, -582.0635986328125, -538.6163330078125, -495.1690979003906, -451.7218017578125, -408.2745361328125, -364.8272705078125, -321.3800048828125, -277.9327392578125, -234.48544311523438, -191.03817749023438, -147.59091186523438, -104.14361572265625, -60.69635009765625, -17.24908447265625, 26.19818878173828, 69.64546203613281, 113.09274291992188, 156.54000854492188, 199.98727416992188, 243.4345703125, 286.8818359375, 330.3289794921875, 373.7762451171875, 417.2235107421875, 460.6708068847656, 504.1180725097656, 547.5653076171875, 591.0126342773438, 634.4598999023438, 677.9071655273438, 721.3544311523438, 764.8016967773438, 808.2490234375, 851.6962890625, 895.1435546875, 938.5908203125, 982.0380859375, 1025.4853515625, 1068.9326171875, 1112.3798828125, 1155.8271484375, 1199.2744140625, 1242.7216796875, 1286.1689453125, 1329.6162109375, 1373.0634765625, 1416.5107421875, 1459.9580078125, 1503.4052734375, 1546.8525390625, 1590.2998046875, 1633.7470703125, 1677.1943359375, 1720.6417236328125]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 7.0, 4.0, 6.0, 8.0, 7.0, 17.0, 19.0, 13.0, 15.0, 16.0, 23.0, 35.0, 31.0, 22.0, 27.0, 32.0, 34.0, 47.0, 37.0, 57.0, 38.0, 39.0, 40.0, 34.0, 42.0, 27.0, 46.0, 42.0, 21.0, 34.0, 28.0, 26.0, 22.0, 22.0, 20.0, 12.0, 12.0, 11.0, 9.0, 7.0, 2.0, 8.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-115.52212524414062, -111.89141082763672, -108.26069641113281, -104.6299819946289, -100.999267578125, -97.3685531616211, -93.73783874511719, -90.10713195800781, -86.47640991210938, -82.84569549560547, -79.21498107910156, -75.58426666259766, -71.95355224609375, -68.32283782958984, -64.69212341308594, -61.0614128112793, -57.430702209472656, -53.79998779296875, -50.169273376464844, -46.53855895996094, -42.90784454345703, -39.277130126953125, -35.646419525146484, -32.01570510864258, -28.384990692138672, -24.754276275634766, -21.12356185913086, -17.492849349975586, -13.86213493347168, -10.231420516967773, -6.6007080078125, -2.9699935913085938, 0.6607208251953125, 4.2914347648620605, 7.922148704528809, 11.552862167358398, 15.183576583862305, 18.81429100036621, 22.445003509521484, 26.07571792602539, 29.706432342529297, 33.3371467590332, 36.96786117553711, 40.59857177734375, 44.229286193847656, 47.86000061035156, 51.49071502685547, 55.121429443359375, 58.75214385986328, 62.38285827636719, 66.0135726928711, 69.644287109375, 73.2750015258789, 76.90571594238281, 80.53642272949219, 84.16714477539062, 87.7978515625, 91.4285659790039, 95.05928039550781, 98.68999481201172, 102.32070922851562, 105.95142364501953, 109.58213806152344, 113.21284484863281, 116.84356689453125]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 5.0, 9.0, 5.0, 5.0, 12.0, 20.0, 12.0, 16.0, 26.0, 26.0, 32.0, 32.0, 36.0, 27.0, 35.0, 35.0, 39.0, 39.0, 36.0, 43.0, 47.0, 55.0, 34.0, 43.0, 36.0, 33.0, 28.0, 30.0, 30.0, 18.0, 28.0, 17.0, 20.0, 17.0, 12.0, 14.0, 10.0, 10.0, 8.0, 4.0, 2.0, 5.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-15.046875, -14.5791015625, -14.111328125, -13.6435546875, -13.17578125, -12.7080078125, -12.240234375, -11.7724609375, -11.3046875, -10.8369140625, -10.369140625, -9.9013671875, -9.43359375, -8.9658203125, -8.498046875, -8.0302734375, -7.5625, -7.0947265625, -6.626953125, -6.1591796875, -5.69140625, -5.2236328125, -4.755859375, -4.2880859375, -3.8203125, -3.3525390625, -2.884765625, -2.4169921875, -1.94921875, -1.4814453125, -1.013671875, -0.5458984375, -0.078125, 0.3896484375, 0.857421875, 1.3251953125, 1.79296875, 2.2607421875, 2.728515625, 3.1962890625, 3.6640625, 4.1318359375, 4.599609375, 5.0673828125, 5.53515625, 6.0029296875, 6.470703125, 6.9384765625, 7.40625, 7.8740234375, 8.341796875, 8.8095703125, 9.27734375, 9.7451171875, 10.212890625, 10.6806640625, 11.1484375, 11.6162109375, 12.083984375, 12.5517578125, 13.01953125, 13.4873046875, 13.955078125, 14.4228515625, 14.890625]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 13.0, 10.0, 6.0, 12.0, 14.0, 40.0, 51.0, 77.0, 94.0, 164.0, 235.0, 371.0, 549.0, 860.0, 1448.0, 2141.0, 3235.0, 5136.0, 7978.0, 12549.0, 19479.0, 30640.0, 49134.0, 79749.0, 132205.0, 212423.0, 189780.0, 113598.0, 69255.0, 42839.0, 26797.0, 16986.0, 10969.0, 6960.0, 4416.0, 2906.0, 1844.0, 1143.0, 799.0, 548.0, 350.0, 247.0, 170.0, 108.0, 73.0, 50.0, 36.0, 27.0, 13.0, 14.0, 6.0, 4.0, 4.0, 3.0, 3.0, 6.0, 1.0], "bins": [-1.65234375, -1.60205078125, -1.5517578125, -1.50146484375, -1.451171875, -1.40087890625, -1.3505859375, -1.30029296875, -1.25, -1.19970703125, -1.1494140625, -1.09912109375, -1.048828125, -0.99853515625, -0.9482421875, -0.89794921875, -0.84765625, -0.79736328125, -0.7470703125, -0.69677734375, -0.646484375, -0.59619140625, -0.5458984375, -0.49560546875, -0.4453125, -0.39501953125, -0.3447265625, -0.29443359375, -0.244140625, -0.19384765625, -0.1435546875, -0.09326171875, -0.04296875, 0.00732421875, 0.0576171875, 0.10791015625, 0.158203125, 0.20849609375, 0.2587890625, 0.30908203125, 0.359375, 0.40966796875, 0.4599609375, 0.51025390625, 0.560546875, 0.61083984375, 0.6611328125, 0.71142578125, 0.76171875, 0.81201171875, 0.8623046875, 0.91259765625, 0.962890625, 1.01318359375, 1.0634765625, 1.11376953125, 1.1640625, 1.21435546875, 1.2646484375, 1.31494140625, 1.365234375, 1.41552734375, 1.4658203125, 1.51611328125, 1.56640625]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 3.0, 1.0, 3.0, 3.0, 3.0, 6.0, 5.0, 6.0, 12.0, 10.0, 10.0, 16.0, 22.0, 22.0, 28.0, 17.0, 31.0, 28.0, 30.0, 39.0, 39.0, 38.0, 39.0, 45.0, 45.0, 1070.0, 36.0, 39.0, 32.0, 43.0, 25.0, 37.0, 27.0, 33.0, 26.0, 24.0, 19.0, 23.0, 20.0, 13.0, 12.0, 13.0, 11.0, 9.0, 6.0, 5.0, 1.0, 3.0, 4.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.7109375, -8.415771484375, -8.12060546875, -7.825439453125, -7.5302734375, -7.235107421875, -6.93994140625, -6.644775390625, -6.349609375, -6.054443359375, -5.75927734375, -5.464111328125, -5.1689453125, -4.873779296875, -4.57861328125, -4.283447265625, -3.98828125, -3.693115234375, -3.39794921875, -3.102783203125, -2.8076171875, -2.512451171875, -2.21728515625, -1.922119140625, -1.626953125, -1.331787109375, -1.03662109375, -0.741455078125, -0.4462890625, -0.151123046875, 0.14404296875, 0.439208984375, 0.734375, 1.029541015625, 1.32470703125, 1.619873046875, 1.9150390625, 2.210205078125, 2.50537109375, 2.800537109375, 3.095703125, 3.390869140625, 3.68603515625, 3.981201171875, 4.2763671875, 4.571533203125, 4.86669921875, 5.161865234375, 5.45703125, 5.752197265625, 6.04736328125, 6.342529296875, 6.6376953125, 6.932861328125, 7.22802734375, 7.523193359375, 7.818359375, 8.113525390625, 8.40869140625, 8.703857421875, 8.9990234375, 9.294189453125, 9.58935546875, 9.884521484375, 10.1796875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 3.0, 3.0, 5.0, 8.0, 13.0, 23.0, 26.0, 51.0, 64.0, 74.0, 102.0, 170.0, 296.0, 387.0, 583.0, 897.0, 1398.0, 2134.0, 3092.0, 4906.0, 7882.0, 12403.0, 19653.0, 31679.0, 51712.0, 84439.0, 140826.0, 1267912.0, 182339.0, 108909.0, 66259.0, 40785.0, 24958.0, 15495.0, 9874.0, 6188.0, 4020.0, 2507.0, 1691.0, 1145.0, 722.0, 488.0, 324.0, 214.0, 177.0, 98.0, 64.0, 48.0, 26.0, 23.0, 22.0, 7.0, 8.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5029296875, -1.4540557861328125, -1.405181884765625, -1.3563079833984375, -1.30743408203125, -1.2585601806640625, -1.209686279296875, -1.1608123779296875, -1.1119384765625, -1.0630645751953125, -1.014190673828125, -0.9653167724609375, -0.91644287109375, -0.8675689697265625, -0.818695068359375, -0.7698211669921875, -0.720947265625, -0.6720733642578125, -0.623199462890625, -0.5743255615234375, -0.52545166015625, -0.4765777587890625, -0.427703857421875, -0.3788299560546875, -0.3299560546875, -0.2810821533203125, -0.232208251953125, -0.1833343505859375, -0.13446044921875, -0.0855865478515625, -0.036712646484375, 0.0121612548828125, 0.06103515625, 0.1099090576171875, 0.158782958984375, 0.2076568603515625, 0.25653076171875, 0.3054046630859375, 0.354278564453125, 0.4031524658203125, 0.4520263671875, 0.5009002685546875, 0.549774169921875, 0.5986480712890625, 0.64752197265625, 0.6963958740234375, 0.745269775390625, 0.7941436767578125, 0.843017578125, 0.8918914794921875, 0.940765380859375, 0.9896392822265625, 1.03851318359375, 1.0873870849609375, 1.136260986328125, 1.1851348876953125, 1.2340087890625, 1.2828826904296875, 1.331756591796875, 1.3806304931640625, 1.42950439453125, 1.4783782958984375, 1.527252197265625, 1.5761260986328125, 1.625]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 7.0, 7.0, 5.0, 11.0, 6.0, 17.0, 8.0, 15.0, 17.0, 29.0, 36.0, 37.0, 41.0, 49.0, 76.0, 69.0, 86.0, 79.0, 69.0, 62.0, 53.0, 50.0, 36.0, 27.0, 19.0, 17.0, 17.0, 11.0, 9.0, 11.0, 6.0, 7.0, 3.0, 1.0, 4.0, 1.0, 2.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0047149658203125, -0.004561722278594971, -0.004408478736877441, -0.004255235195159912, -0.004101991653442383, -0.0039487481117248535, -0.0037955045700073242, -0.003642261028289795, -0.0034890174865722656, -0.0033357739448547363, -0.003182530403137207, -0.0030292868614196777, -0.0028760433197021484, -0.002722799777984619, -0.00256955623626709, -0.0024163126945495605, -0.0022630691528320312, -0.002109825611114502, -0.0019565820693969727, -0.0018033385276794434, -0.001650094985961914, -0.0014968514442443848, -0.0013436079025268555, -0.0011903643608093262, -0.0010371208190917969, -0.0008838772773742676, -0.0007306337356567383, -0.000577390193939209, -0.0004241466522216797, -0.0002709031105041504, -0.0001176595687866211, 3.55839729309082e-05, 0.0001888275146484375, 0.0003420710563659668, 0.0004953145980834961, 0.0006485581398010254, 0.0008018016815185547, 0.000955045223236084, 0.0011082887649536133, 0.0012615323066711426, 0.0014147758483886719, 0.0015680193901062012, 0.0017212629318237305, 0.0018745064735412598, 0.002027750015258789, 0.0021809935569763184, 0.0023342370986938477, 0.002487480640411377, 0.0026407241821289062, 0.0027939677238464355, 0.002947211265563965, 0.003100454807281494, 0.0032536983489990234, 0.0034069418907165527, 0.003560185432434082, 0.0037134289741516113, 0.0038666725158691406, 0.00401991605758667, 0.004173159599304199, 0.0043264031410217285, 0.004479646682739258, 0.004632890224456787, 0.004786133766174316, 0.004939377307891846, 0.005092620849609375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 1.0, 3.0, 2.0, 3.0, 5.0, 4.0, 3.0, 6.0, 8.0, 16.0, 14.0, 31.0, 16.0, 32.0, 47.0, 61.0, 75.0, 115.0, 175.0, 233.0, 474.0, 895.0, 21519.0, 1021111.0, 1974.0, 643.0, 336.0, 209.0, 146.0, 93.0, 73.0, 64.0, 36.0, 21.0, 25.0, 16.0, 15.0, 12.0, 10.0, 12.0, 6.0, 3.0, 4.0, 2.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.09234619140625, -0.0895700454711914, -0.08679389953613281, -0.08401775360107422, -0.08124160766601562, -0.07846546173095703, -0.07568931579589844, -0.07291316986083984, -0.07013702392578125, -0.06736087799072266, -0.06458473205566406, -0.06180858612060547, -0.059032440185546875, -0.05625629425048828, -0.05348014831542969, -0.050704002380371094, -0.0479278564453125, -0.045151710510253906, -0.04237556457519531, -0.03959941864013672, -0.036823272705078125, -0.03404712677001953, -0.03127098083496094, -0.028494834899902344, -0.02571868896484375, -0.022942543029785156, -0.020166397094726562, -0.01739025115966797, -0.014614105224609375, -0.011837959289550781, -0.009061813354492188, -0.006285667419433594, -0.003509521484375, -0.0007333755493164062, 0.0020427703857421875, 0.004818916320800781, 0.007595062255859375, 0.010371208190917969, 0.013147354125976562, 0.015923500061035156, 0.01869964599609375, 0.021475791931152344, 0.024251937866210938, 0.02702808380126953, 0.029804229736328125, 0.03258037567138672, 0.03535652160644531, 0.038132667541503906, 0.0409088134765625, 0.043684959411621094, 0.04646110534667969, 0.04923725128173828, 0.052013397216796875, 0.05478954315185547, 0.05756568908691406, 0.060341835021972656, 0.06311798095703125, 0.06589412689208984, 0.06867027282714844, 0.07144641876220703, 0.07422256469726562, 0.07699871063232422, 0.07977485656738281, 0.0825510025024414, 0.0853271484375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 11.0, 710.0, 288.0, 9.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006648569367825985, -0.006035926286131144, -0.005423282738775015, -0.0048106396570801735, -0.004197996109724045, -0.0035853530280292034, -0.002972709946334362, -0.0023600663989782333, -0.001747423317283392, -0.001134780002757907, -0.0005221368046477437, 9.050639346241951e-05, 0.0007031497079879045, 0.0013157930225133896, 0.001928436104208231, 0.0025410796515643597, 0.003153722733259201, 0.003766366047784686, 0.004379009362310171, 0.0049916524440050125, 0.005604295991361141, 0.006216939073055983, 0.006829582154750824, 0.007442225702106953, 0.008054869249463081, 0.00866751279681921, 0.009280155412852764, 0.009892798960208893, 0.010505442507565022, 0.01111808605492115, 0.011730728670954704, 0.012343372218310833, 0.012956013903021812, 0.013568657450377941, 0.014181300066411495, 0.014793943613767624, 0.015406587161123753, 0.01601923070847988, 0.016631873324513435, 0.01724451780319214, 0.017857160419225693, 0.018469803035259247, 0.01908244751393795, 0.019695090129971504, 0.02030773274600506, 0.02092037722468376, 0.021533019840717316, 0.02214566245675087, 0.022758305072784424, 0.023370947688817978, 0.02398359216749668, 0.024596234783530235, 0.02520887739956379, 0.025821521878242493, 0.026434164494276047, 0.0270468071103096, 0.027659451588988304, 0.028272094205021858, 0.02888473868370056, 0.029497381299734116, 0.03011002391576767, 0.030722668394446373, 0.03133530914783478, 0.03194795548915863, 0.032560598105192184]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 5.0, 1.0, 5.0, 7.0, 7.0, 10.0, 16.0, 11.0, 11.0, 23.0, 19.0, 23.0, 17.0, 19.0, 27.0, 29.0, 27.0, 41.0, 35.0, 38.0, 44.0, 45.0, 49.0, 46.0, 50.0, 43.0, 32.0, 35.0, 37.0, 45.0, 30.0, 21.0, 29.0, 13.0, 12.0, 13.0, 19.0, 18.0, 17.0, 7.0, 14.0, 3.0, 7.0, 6.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0021241307258605957, -0.0020545776933431625, -0.0019850246608257294, -0.0019154716283082962, -0.001845918595790863, -0.0017763655632734299, -0.0017068125307559967, -0.0016372594982385635, -0.0015677064657211304, -0.0014981534332036972, -0.001428600400686264, -0.0013590473681688309, -0.0012894943356513977, -0.0012199413031339645, -0.0011503882706165314, -0.0010808352380990982, -0.001011282205581665, -0.0009417291730642319, -0.0008721761405467987, -0.0008026231080293655, -0.0007330700755119324, -0.0006635170429944992, -0.000593964010477066, -0.0005244109779596329, -0.0004548579454421997, -0.00038530491292476654, -0.0003157518804073334, -0.0002461988478899002, -0.00017664581537246704, -0.00010709278285503387, -3.753975033760071e-05, 3.201328217983246e-05, 0.00010156631469726562, 0.0001711193472146988, 0.00024067237973213196, 0.0003102254122495651, 0.0003797784447669983, 0.00044933147728443146, 0.0005188845098018646, 0.0005884375423192978, 0.000657990574836731, 0.0007275436073541641, 0.0007970966398715973, 0.0008666496723890305, 0.0009362027049064636, 0.0010057557374238968, 0.00107530876994133, 0.0011448618024587631, 0.0012144148349761963, 0.0012839678674936295, 0.0013535209000110626, 0.0014230739325284958, 0.001492626965045929, 0.0015621799975633621, 0.0016317330300807953, 0.0017012860625982285, 0.0017708390951156616, 0.0018403921276330948, 0.001909945160150528, 0.001979498192667961, 0.0020490512251853943, 0.0021186042577028275, 0.0021881572902202606, 0.002257710322737694, 0.002327263355255127]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 5.0, 9.0, 5.0, 5.0, 12.0, 20.0, 12.0, 16.0, 26.0, 26.0, 32.0, 32.0, 36.0, 27.0, 35.0, 35.0, 39.0, 39.0, 36.0, 43.0, 47.0, 55.0, 34.0, 43.0, 36.0, 33.0, 28.0, 30.0, 30.0, 18.0, 28.0, 17.0, 20.0, 17.0, 12.0, 14.0, 10.0, 10.0, 8.0, 4.0, 2.0, 5.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-15.046875, -14.5791015625, -14.111328125, -13.6435546875, -13.17578125, -12.7080078125, -12.240234375, -11.7724609375, -11.3046875, -10.8369140625, -10.369140625, -9.9013671875, -9.43359375, -8.9658203125, -8.498046875, -8.0302734375, -7.5625, -7.0947265625, -6.626953125, -6.1591796875, -5.69140625, -5.2236328125, -4.755859375, -4.2880859375, -3.8203125, -3.3525390625, -2.884765625, -2.4169921875, -1.94921875, -1.4814453125, -1.013671875, -0.5458984375, -0.078125, 0.3896484375, 0.857421875, 1.3251953125, 1.79296875, 2.2607421875, 2.728515625, 3.1962890625, 3.6640625, 4.1318359375, 4.599609375, 5.0673828125, 5.53515625, 6.0029296875, 6.470703125, 6.9384765625, 7.40625, 7.8740234375, 8.341796875, 8.8095703125, 9.27734375, 9.7451171875, 10.212890625, 10.6806640625, 11.1484375, 11.6162109375, 12.083984375, 12.5517578125, 13.01953125, 13.4873046875, 13.955078125, 14.4228515625, 14.890625]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 12.0, 3.0, 10.0, 11.0, 12.0, 23.0, 28.0, 32.0, 40.0, 82.0, 114.0, 147.0, 183.0, 271.0, 420.0, 575.0, 890.0, 1348.0, 2151.0, 3860.0, 10560.0, 76334.0, 744086.0, 178192.0, 16976.0, 4882.0, 2518.0, 1541.0, 1002.0, 715.0, 463.0, 325.0, 217.0, 161.0, 101.0, 83.0, 52.0, 35.0, 27.0, 21.0, 20.0, 17.0, 9.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-41.375, -40.10791015625, -38.8408203125, -37.57373046875, -36.306640625, -35.03955078125, -33.7724609375, -32.50537109375, -31.23828125, -29.97119140625, -28.7041015625, -27.43701171875, -26.169921875, -24.90283203125, -23.6357421875, -22.36865234375, -21.1015625, -19.83447265625, -18.5673828125, -17.30029296875, -16.033203125, -14.76611328125, -13.4990234375, -12.23193359375, -10.96484375, -9.69775390625, -8.4306640625, -7.16357421875, -5.896484375, -4.62939453125, -3.3623046875, -2.09521484375, -0.828125, 0.43896484375, 1.7060546875, 2.97314453125, 4.240234375, 5.50732421875, 6.7744140625, 8.04150390625, 9.30859375, 10.57568359375, 11.8427734375, 13.10986328125, 14.376953125, 15.64404296875, 16.9111328125, 18.17822265625, 19.4453125, 20.71240234375, 21.9794921875, 23.24658203125, 24.513671875, 25.78076171875, 27.0478515625, 28.31494140625, 29.58203125, 30.84912109375, 32.1162109375, 33.38330078125, 34.650390625, 35.91748046875, 37.1845703125, 38.45166015625, 39.71875]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 5.0, 8.0, 8.0, 9.0, 20.0, 25.0, 24.0, 21.0, 29.0, 52.0, 57.0, 53.0, 77.0, 91.0, 1581.0, 514.0, 78.0, 71.0, 54.0, 45.0, 45.0, 31.0, 27.0, 24.0, 23.0, 17.0, 9.0, 11.0, 8.0, 8.0, 3.0, 7.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-59.03125, -57.34375, -55.65625, -53.96875, -52.28125, -50.59375, -48.90625, -47.21875, -45.53125, -43.84375, -42.15625, -40.46875, -38.78125, -37.09375, -35.40625, -33.71875, -32.03125, -30.34375, -28.65625, -26.96875, -25.28125, -23.59375, -21.90625, -20.21875, -18.53125, -16.84375, -15.15625, -13.46875, -11.78125, -10.09375, -8.40625, -6.71875, -5.03125, -3.34375, -1.65625, 0.03125, 1.71875, 3.40625, 5.09375, 6.78125, 8.46875, 10.15625, 11.84375, 13.53125, 15.21875, 16.90625, 18.59375, 20.28125, 21.96875, 23.65625, 25.34375, 27.03125, 28.71875, 30.40625, 32.09375, 33.78125, 35.46875, 37.15625, 38.84375, 40.53125, 42.21875, 43.90625, 45.59375, 47.28125, 48.96875]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0, 4.0, 5.0, 8.0, 12.0, 18.0, 19.0, 22.0, 39.0, 72.0, 68.0, 115.0, 159.0, 260.0, 607.0, 2609.0, 3132676.0, 7250.0, 859.0, 320.0, 185.0, 126.0, 85.0, 62.0, 34.0, 27.0, 23.0, 14.0, 9.0, 8.0, 8.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-167.5, -162.033203125, -156.56640625, -151.099609375, -145.6328125, -140.166015625, -134.69921875, -129.232421875, -123.765625, -118.298828125, -112.83203125, -107.365234375, -101.8984375, -96.431640625, -90.96484375, -85.498046875, -80.03125, -74.564453125, -69.09765625, -63.630859375, -58.1640625, -52.697265625, -47.23046875, -41.763671875, -36.296875, -30.830078125, -25.36328125, -19.896484375, -14.4296875, -8.962890625, -3.49609375, 1.970703125, 7.4375, 12.904296875, 18.37109375, 23.837890625, 29.3046875, 34.771484375, 40.23828125, 45.705078125, 51.171875, 56.638671875, 62.10546875, 67.572265625, 73.0390625, 78.505859375, 83.97265625, 89.439453125, 94.90625, 100.373046875, 105.83984375, 111.306640625, 116.7734375, 122.240234375, 127.70703125, 133.173828125, 138.640625, 144.107421875, 149.57421875, 155.041015625, 160.5078125, 165.974609375, 171.44140625, 176.908203125, 182.375]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 12.0, 61.0, 303.0, 456.0, 150.0, 21.0, 9.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-220.38963317871094, -213.33377075195312, -206.2779083251953, -199.2220458984375, -192.16616821289062, -185.11032104492188, -178.054443359375, -170.9985809326172, -163.94271850585938, -156.88685607910156, -149.83099365234375, -142.77513122558594, -135.71926879882812, -128.66339111328125, -121.60752868652344, -114.55166625976562, -107.49580383300781, -100.43994140625, -93.38407897949219, -86.32820892333984, -79.27234649658203, -72.21648406982422, -65.16061401367188, -58.10475158691406, -51.04888916015625, -43.99302673339844, -36.93716049194336, -29.881296157836914, -22.82543182373047, -15.769569396972656, -8.713703155517578, -1.6578369140625, 5.398040771484375, 12.45390510559082, 19.509769439697266, 26.56563377380371, 33.621498107910156, 40.67736053466797, 47.73322677612305, 54.789093017578125, 61.84495544433594, 68.90081787109375, 75.95668029785156, 83.0125503540039, 90.06841278076172, 97.12427520751953, 104.18014526367188, 111.23600769042969, 118.2918701171875, 125.34773254394531, 132.40359497070312, 139.45945739746094, 146.51531982421875, 153.57119750976562, 160.62705993652344, 167.68292236328125, 174.73878479003906, 181.79464721679688, 188.8505096435547, 195.9063720703125, 202.96224975585938, 210.01809692382812, 217.073974609375, 224.1298370361328, 231.18569946289062]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 5.0, 5.0, 3.0, 9.0, 6.0, 7.0, 13.0, 12.0, 26.0, 24.0, 21.0, 29.0, 28.0, 35.0, 37.0, 35.0, 56.0, 56.0, 57.0, 63.0, 50.0, 49.0, 42.0, 47.0, 45.0, 42.0, 44.0, 26.0, 20.0, 23.0, 14.0, 12.0, 13.0, 8.0, 13.0, 10.0, 5.0, 2.0, 6.0, 3.0, 0.0, 4.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-161.30824279785156, -156.46434020996094, -151.62045288085938, -146.77655029296875, -141.93264770507812, -137.08876037597656, -132.24485778808594, -127.40096282958984, -122.55706787109375, -117.71317291259766, -112.86927795410156, -108.02537536621094, -103.18148040771484, -98.33758544921875, -93.49368286132812, -88.64978790283203, -83.80589294433594, -78.96199798583984, -74.11810302734375, -69.27420043945312, -64.43030548095703, -59.58641052246094, -54.74251174926758, -49.89861297607422, -45.054718017578125, -40.21082305908203, -35.36692428588867, -30.523027420043945, -25.67913055419922, -20.835233688354492, -15.991336822509766, -11.147438049316406, -6.303558349609375, -1.4596614837646484, 3.384235382080078, 8.228132247924805, 13.072029113769531, 17.915925979614258, 22.759822845458984, 27.603721618652344, 32.44761657714844, 37.29151153564453, 42.13541030883789, 46.97930908203125, 51.823204040527344, 56.66709899902344, 61.5109977722168, 66.35489654541016, 71.19879150390625, 76.04268646240234, 80.88658142089844, 85.73048400878906, 90.57437896728516, 95.41827392578125, 100.26217651367188, 105.10607147216797, 109.94996643066406, 114.79386138916016, 119.63775634765625, 124.48165893554688, 129.3255615234375, 134.16944885253906, 139.0133514404297, 143.85723876953125, 148.70114135742188]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 7.0, 8.0, 7.0, 3.0, 12.0, 16.0, 9.0, 14.0, 24.0, 19.0, 22.0, 20.0, 29.0, 36.0, 45.0, 40.0, 33.0, 41.0, 46.0, 42.0, 46.0, 43.0, 37.0, 47.0, 37.0, 45.0, 35.0, 28.0, 29.0, 25.0, 28.0, 22.0, 22.0, 19.0, 13.0, 9.0, 12.0, 10.0, 8.0, 4.0, 6.0, 4.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-15.3125, -14.79833984375, -14.2841796875, -13.77001953125, -13.255859375, -12.74169921875, -12.2275390625, -11.71337890625, -11.19921875, -10.68505859375, -10.1708984375, -9.65673828125, -9.142578125, -8.62841796875, -8.1142578125, -7.60009765625, -7.0859375, -6.57177734375, -6.0576171875, -5.54345703125, -5.029296875, -4.51513671875, -4.0009765625, -3.48681640625, -2.97265625, -2.45849609375, -1.9443359375, -1.43017578125, -0.916015625, -0.40185546875, 0.1123046875, 0.62646484375, 1.140625, 1.65478515625, 2.1689453125, 2.68310546875, 3.197265625, 3.71142578125, 4.2255859375, 4.73974609375, 5.25390625, 5.76806640625, 6.2822265625, 6.79638671875, 7.310546875, 7.82470703125, 8.3388671875, 8.85302734375, 9.3671875, 9.88134765625, 10.3955078125, 10.90966796875, 11.423828125, 11.93798828125, 12.4521484375, 12.96630859375, 13.48046875, 13.99462890625, 14.5087890625, 15.02294921875, 15.537109375, 16.05126953125, 16.5654296875, 17.07958984375, 17.59375]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 5.0, 12.0, 18.0, 14.0, 24.0, 43.0, 54.0, 64.0, 121.0, 135.0, 210.0, 346.0, 531.0, 959.0, 1880.0, 4789.0, 16119.0, 96358.0, 898240.0, 2497501.0, 596902.0, 60462.0, 11735.0, 3879.0, 1608.0, 829.0, 487.0, 316.0, 200.0, 122.0, 104.0, 53.0, 41.0, 33.0, 25.0, 21.0, 12.0, 7.0, 13.0, 3.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-29.859375, -28.908447265625, -27.95751953125, -27.006591796875, -26.0556640625, -25.104736328125, -24.15380859375, -23.202880859375, -22.251953125, -21.301025390625, -20.35009765625, -19.399169921875, -18.4482421875, -17.497314453125, -16.54638671875, -15.595458984375, -14.64453125, -13.693603515625, -12.74267578125, -11.791748046875, -10.8408203125, -9.889892578125, -8.93896484375, -7.988037109375, -7.037109375, -6.086181640625, -5.13525390625, -4.184326171875, -3.2333984375, -2.282470703125, -1.33154296875, -0.380615234375, 0.5703125, 1.521240234375, 2.47216796875, 3.423095703125, 4.3740234375, 5.324951171875, 6.27587890625, 7.226806640625, 8.177734375, 9.128662109375, 10.07958984375, 11.030517578125, 11.9814453125, 12.932373046875, 13.88330078125, 14.834228515625, 15.78515625, 16.736083984375, 17.68701171875, 18.637939453125, 19.5888671875, 20.539794921875, 21.49072265625, 22.441650390625, 23.392578125, 24.343505859375, 25.29443359375, 26.245361328125, 27.1962890625, 28.147216796875, 29.09814453125, 30.049072265625, 31.0]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 11.0, 4.0, 14.0, 28.0, 40.0, 56.0, 87.0, 141.0, 236.0, 316.0, 472.0, 659.0, 657.0, 454.0, 265.0, 224.0, 156.0, 86.0, 51.0, 40.0, 19.0, 22.0, 15.0, 2.0, 4.0, 1.0, 4.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-47.21875, -46.03955078125, -44.8603515625, -43.68115234375, -42.501953125, -41.32275390625, -40.1435546875, -38.96435546875, -37.78515625, -36.60595703125, -35.4267578125, -34.24755859375, -33.068359375, -31.88916015625, -30.7099609375, -29.53076171875, -28.3515625, -27.17236328125, -25.9931640625, -24.81396484375, -23.634765625, -22.45556640625, -21.2763671875, -20.09716796875, -18.91796875, -17.73876953125, -16.5595703125, -15.38037109375, -14.201171875, -13.02197265625, -11.8427734375, -10.66357421875, -9.484375, -8.30517578125, -7.1259765625, -5.94677734375, -4.767578125, -3.58837890625, -2.4091796875, -1.22998046875, -0.05078125, 1.12841796875, 2.3076171875, 3.48681640625, 4.666015625, 5.84521484375, 7.0244140625, 8.20361328125, 9.3828125, 10.56201171875, 11.7412109375, 12.92041015625, 14.099609375, 15.27880859375, 16.4580078125, 17.63720703125, 18.81640625, 19.99560546875, 21.1748046875, 22.35400390625, 23.533203125, 24.71240234375, 25.8916015625, 27.07080078125, 28.25]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 6.0, 7.0, 6.0, 8.0, 16.0, 30.0, 40.0, 67.0, 137.0, 233.0, 518.0, 1378.0, 5332.0, 3771802.0, 409143.0, 3573.0, 1062.0, 444.0, 210.0, 109.0, 42.0, 33.0, 25.0, 21.0, 11.0, 8.0, 7.0, 6.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-110.625, -106.79296875, -102.9609375, -99.12890625, -95.296875, -91.46484375, -87.6328125, -83.80078125, -79.96875, -76.13671875, -72.3046875, -68.47265625, -64.640625, -60.80859375, -56.9765625, -53.14453125, -49.3125, -45.48046875, -41.6484375, -37.81640625, -33.984375, -30.15234375, -26.3203125, -22.48828125, -18.65625, -14.82421875, -10.9921875, -7.16015625, -3.328125, 0.50390625, 4.3359375, 8.16796875, 12.0, 15.83203125, 19.6640625, 23.49609375, 27.328125, 31.16015625, 34.9921875, 38.82421875, 42.65625, 46.48828125, 50.3203125, 54.15234375, 57.984375, 61.81640625, 65.6484375, 69.48046875, 73.3125, 77.14453125, 80.9765625, 84.80859375, 88.640625, 92.47265625, 96.3046875, 100.13671875, 103.96875, 107.80078125, 111.6328125, 115.46484375, 119.296875, 123.12890625, 126.9609375, 130.79296875, 134.625]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 10.0, 105.0, 517.0, 336.0, 43.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-546.1066284179688, -533.6357421875, -521.164794921875, -508.69390869140625, -496.2229919433594, -483.7521057128906, -471.28118896484375, -458.810302734375, -446.3393859863281, -433.86846923828125, -421.3975830078125, -408.9266662597656, -396.45574951171875, -383.98486328125, -371.5139465332031, -359.04302978515625, -346.5721435546875, -334.1012268066406, -321.6303405761719, -309.159423828125, -296.6885070800781, -284.2176208496094, -271.7467041015625, -259.27581787109375, -246.80487060546875, -234.33396911621094, -221.86305236816406, -209.39215087890625, -196.92124938964844, -184.45034790039062, -171.97943115234375, -159.50852966308594, -147.03762817382812, -134.5667266845703, -122.09581756591797, -109.62490844726562, -97.15400695800781, -84.68309783935547, -72.21218872070312, -59.74128723144531, -47.27037811279297, -34.79947280883789, -22.32856559753418, -9.857658386230469, 2.6132469177246094, 15.084152221679688, 27.55506134033203, 40.025962829589844, 52.49687194824219, 64.96778106689453, 77.43868255615234, 89.90959167480469, 102.3804931640625, 114.85140228271484, 127.32231140136719, 139.793212890625, 152.26412963867188, 164.7350311279297, 177.20594787597656, 189.67684936523438, 202.1477508544922, 214.61865234375, 227.08956909179688, 239.5604705810547, 252.0313720703125]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 2.0, 8.0, 7.0, 4.0, 10.0, 10.0, 11.0, 10.0, 19.0, 12.0, 26.0, 23.0, 24.0, 36.0, 23.0, 22.0, 31.0, 28.0, 34.0, 37.0, 34.0, 32.0, 41.0, 25.0, 40.0, 44.0, 27.0, 28.0, 24.0, 34.0, 37.0, 21.0, 27.0, 26.0, 25.0, 26.0, 18.0, 11.0, 13.0, 18.0, 12.0, 9.0, 9.0, 8.0, 12.0, 7.0, 3.0, 7.0, 3.0, 1.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-90.83758544921875, -87.79442596435547, -84.75126647949219, -81.70811462402344, -78.66495513916016, -75.62179565429688, -72.5786361694336, -69.53547668457031, -66.49231719970703, -63.44915771484375, -60.406002044677734, -57.36284255981445, -54.31968307495117, -51.276527404785156, -48.233367919921875, -45.190208435058594, -42.14705276489258, -39.1038932800293, -36.06073760986328, -33.017578125, -29.97441864013672, -26.93126106262207, -23.888103485107422, -20.84494400024414, -17.801786422729492, -14.758627891540527, -11.715469360351562, -8.672311782836914, -5.629153251647949, -2.5859947204589844, 0.45716285705566406, 3.5003223419189453, 6.543479919433594, 9.586638450622559, 12.629796981811523, 15.672954559326172, 18.716114044189453, 21.7592716217041, 24.80242919921875, 27.84558868408203, 30.88874626159668, 33.93190383911133, 36.97506332397461, 40.018218994140625, 43.061378479003906, 46.10453796386719, 49.14769744873047, 52.19085693359375, 55.234012603759766, 58.27717208862305, 61.32032775878906, 64.36348724365234, 67.40664672851562, 70.4498062133789, 73.49296569824219, 76.53611755371094, 79.57927703857422, 82.6224365234375, 85.66559600830078, 88.70875549316406, 91.75190734863281, 94.7950668334961, 97.83822631835938, 100.88138580322266, 103.92454528808594]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 5.0, 5.0, 5.0, 5.0, 8.0, 8.0, 8.0, 13.0, 4.0, 14.0, 6.0, 18.0, 8.0, 22.0, 36.0, 27.0, 14.0, 36.0, 21.0, 40.0, 32.0, 30.0, 39.0, 48.0, 30.0, 32.0, 40.0, 42.0, 42.0, 39.0, 27.0, 34.0, 30.0, 32.0, 19.0, 24.0, 22.0, 16.0, 15.0, 9.0, 25.0, 18.0, 13.0, 9.0, 8.0, 9.0, 5.0, 5.0, 6.0, 4.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0], "bins": [-10.734375, -10.39599609375, -10.0576171875, -9.71923828125, -9.380859375, -9.04248046875, -8.7041015625, -8.36572265625, -8.02734375, -7.68896484375, -7.3505859375, -7.01220703125, -6.673828125, -6.33544921875, -5.9970703125, -5.65869140625, -5.3203125, -4.98193359375, -4.6435546875, -4.30517578125, -3.966796875, -3.62841796875, -3.2900390625, -2.95166015625, -2.61328125, -2.27490234375, -1.9365234375, -1.59814453125, -1.259765625, -0.92138671875, -0.5830078125, -0.24462890625, 0.09375, 0.43212890625, 0.7705078125, 1.10888671875, 1.447265625, 1.78564453125, 2.1240234375, 2.46240234375, 2.80078125, 3.13916015625, 3.4775390625, 3.81591796875, 4.154296875, 4.49267578125, 4.8310546875, 5.16943359375, 5.5078125, 5.84619140625, 6.1845703125, 6.52294921875, 6.861328125, 7.19970703125, 7.5380859375, 7.87646484375, 8.21484375, 8.55322265625, 8.8916015625, 9.22998046875, 9.568359375, 9.90673828125, 10.2451171875, 10.58349609375, 10.921875]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 3.0, 7.0, 15.0, 22.0, 32.0, 53.0, 51.0, 128.0, 160.0, 250.0, 385.0, 560.0, 792.0, 1167.0, 1773.0, 2739.0, 3824.0, 5678.0, 8509.0, 12702.0, 18318.0, 27327.0, 41680.0, 61798.0, 91752.0, 135780.0, 185990.0, 146699.0, 98430.0, 66519.0, 44133.0, 29700.0, 19942.0, 13629.0, 9079.0, 6059.0, 4232.0, 2787.0, 1907.0, 1308.0, 872.0, 579.0, 405.0, 284.0, 180.0, 104.0, 82.0, 42.0, 26.0, 33.0, 14.0, 9.0, 8.0, 2.0, 3.0, 2.0, 2.0], "bins": [-1.1962890625, -1.160675048828125, -1.12506103515625, -1.089447021484375, -1.0538330078125, -1.018218994140625, -0.98260498046875, -0.946990966796875, -0.911376953125, -0.875762939453125, -0.84014892578125, -0.804534912109375, -0.7689208984375, -0.733306884765625, -0.69769287109375, -0.662078857421875, -0.62646484375, -0.590850830078125, -0.55523681640625, -0.519622802734375, -0.4840087890625, -0.448394775390625, -0.41278076171875, -0.377166748046875, -0.341552734375, -0.305938720703125, -0.27032470703125, -0.234710693359375, -0.1990966796875, -0.163482666015625, -0.12786865234375, -0.092254638671875, -0.056640625, -0.021026611328125, 0.01458740234375, 0.050201416015625, 0.0858154296875, 0.121429443359375, 0.15704345703125, 0.192657470703125, 0.228271484375, 0.263885498046875, 0.29949951171875, 0.335113525390625, 0.3707275390625, 0.406341552734375, 0.44195556640625, 0.477569580078125, 0.51318359375, 0.548797607421875, 0.58441162109375, 0.620025634765625, 0.6556396484375, 0.691253662109375, 0.72686767578125, 0.762481689453125, 0.798095703125, 0.833709716796875, 0.86932373046875, 0.904937744140625, 0.9405517578125, 0.976165771484375, 1.01177978515625, 1.047393798828125, 1.0830078125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 7.0, 10.0, 10.0, 8.0, 14.0, 12.0, 15.0, 17.0, 17.0, 14.0, 20.0, 18.0, 28.0, 26.0, 24.0, 37.0, 32.0, 27.0, 38.0, 50.0, 27.0, 1063.0, 42.0, 42.0, 39.0, 40.0, 28.0, 31.0, 25.0, 30.0, 34.0, 29.0, 29.0, 21.0, 15.0, 10.0, 16.0, 7.0, 15.0, 10.0, 11.0, 12.0, 7.0, 5.0, 2.0, 3.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0], "bins": [-7.48828125, -7.26727294921875, -7.0462646484375, -6.82525634765625, -6.604248046875, -6.38323974609375, -6.1622314453125, -5.94122314453125, -5.72021484375, -5.49920654296875, -5.2781982421875, -5.05718994140625, -4.836181640625, -4.61517333984375, -4.3941650390625, -4.17315673828125, -3.9521484375, -3.73114013671875, -3.5101318359375, -3.28912353515625, -3.068115234375, -2.84710693359375, -2.6260986328125, -2.40509033203125, -2.18408203125, -1.96307373046875, -1.7420654296875, -1.52105712890625, -1.300048828125, -1.07904052734375, -0.8580322265625, -0.63702392578125, -0.416015625, -0.19500732421875, 0.0260009765625, 0.24700927734375, 0.468017578125, 0.68902587890625, 0.9100341796875, 1.13104248046875, 1.35205078125, 1.57305908203125, 1.7940673828125, 2.01507568359375, 2.236083984375, 2.45709228515625, 2.6781005859375, 2.89910888671875, 3.1201171875, 3.34112548828125, 3.5621337890625, 3.78314208984375, 4.004150390625, 4.22515869140625, 4.4461669921875, 4.66717529296875, 4.88818359375, 5.10919189453125, 5.3302001953125, 5.55120849609375, 5.772216796875, 5.99322509765625, 6.2142333984375, 6.43524169921875, 6.65625]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 4.0, 6.0, 16.0, 9.0, 23.0, 34.0, 42.0, 56.0, 95.0, 96.0, 184.0, 295.0, 451.0, 635.0, 888.0, 1307.0, 1844.0, 2758.0, 4046.0, 6047.0, 8815.0, 13287.0, 20032.0, 30825.0, 46464.0, 71766.0, 111284.0, 175402.0, 1239328.0, 125875.0, 80472.0, 52343.0, 34263.0, 22273.0, 14947.0, 9922.0, 6746.0, 4617.0, 3027.0, 2082.0, 1352.0, 934.0, 720.0, 482.0, 302.0, 239.0, 180.0, 101.0, 84.0, 48.0, 48.0, 15.0, 7.0, 8.0, 3.0, 3.0, 5.0, 3.0, 1.0, 2.0, 3.0], "bins": [-1.12109375, -1.0850830078125, -1.049072265625, -1.0130615234375, -0.97705078125, -0.9410400390625, -0.905029296875, -0.8690185546875, -0.8330078125, -0.7969970703125, -0.760986328125, -0.7249755859375, -0.68896484375, -0.6529541015625, -0.616943359375, -0.5809326171875, -0.544921875, -0.5089111328125, -0.472900390625, -0.4368896484375, -0.40087890625, -0.3648681640625, -0.328857421875, -0.2928466796875, -0.2568359375, -0.2208251953125, -0.184814453125, -0.1488037109375, -0.11279296875, -0.0767822265625, -0.040771484375, -0.0047607421875, 0.03125, 0.0672607421875, 0.103271484375, 0.1392822265625, 0.17529296875, 0.2113037109375, 0.247314453125, 0.2833251953125, 0.3193359375, 0.3553466796875, 0.391357421875, 0.4273681640625, 0.46337890625, 0.4993896484375, 0.535400390625, 0.5714111328125, 0.607421875, 0.6434326171875, 0.679443359375, 0.7154541015625, 0.75146484375, 0.7874755859375, 0.823486328125, 0.8594970703125, 0.8955078125, 0.9315185546875, 0.967529296875, 1.0035400390625, 1.03955078125, 1.0755615234375, 1.111572265625, 1.1475830078125, 1.18359375]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 8.0, 9.0, 13.0, 18.0, 22.0, 24.0, 42.0, 50.0, 69.0, 95.0, 115.0, 96.0, 83.0, 85.0, 60.0, 58.0, 43.0, 30.0, 17.0, 16.0, 7.0, 9.0, 7.0, 3.0, 10.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004001617431640625, -0.0038559436798095703, -0.0037102699279785156, -0.003564596176147461, -0.0034189224243164062, -0.0032732486724853516, -0.003127574920654297, -0.002981901168823242, -0.0028362274169921875, -0.002690553665161133, -0.002544879913330078, -0.0023992061614990234, -0.0022535324096679688, -0.002107858657836914, -0.0019621849060058594, -0.0018165111541748047, -0.00167083740234375, -0.0015251636505126953, -0.0013794898986816406, -0.001233816146850586, -0.0010881423950195312, -0.0009424686431884766, -0.0007967948913574219, -0.0006511211395263672, -0.0005054473876953125, -0.0003597736358642578, -0.00021409988403320312, -6.842613220214844e-05, 7.724761962890625e-05, 0.00022292137145996094, 0.0003685951232910156, 0.0005142688751220703, 0.000659942626953125, 0.0008056163787841797, 0.0009512901306152344, 0.001096963882446289, 0.0012426376342773438, 0.0013883113861083984, 0.0015339851379394531, 0.0016796588897705078, 0.0018253326416015625, 0.001971006393432617, 0.002116680145263672, 0.0022623538970947266, 0.0024080276489257812, 0.002553701400756836, 0.0026993751525878906, 0.0028450489044189453, 0.00299072265625, 0.0031363964080810547, 0.0032820701599121094, 0.003427743911743164, 0.0035734176635742188, 0.0037190914154052734, 0.003864765167236328, 0.004010438919067383, 0.0041561126708984375, 0.004301786422729492, 0.004447460174560547, 0.0045931339263916016, 0.004738807678222656, 0.004884481430053711, 0.005030155181884766, 0.00517582893371582, 0.005321502685546875]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 7.0, 3.0, 5.0, 7.0, 16.0, 9.0, 18.0, 23.0, 40.0, 47.0, 96.0, 116.0, 165.0, 340.0, 580.0, 1283.0, 1033782.0, 10017.0, 875.0, 417.0, 233.0, 142.0, 105.0, 74.0, 49.0, 38.0, 22.0, 16.0, 10.0, 6.0, 4.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0943603515625, -0.09192609786987305, -0.0894918441772461, -0.08705759048461914, -0.08462333679199219, -0.08218908309936523, -0.07975482940673828, -0.07732057571411133, -0.07488632202148438, -0.07245206832885742, -0.07001781463623047, -0.06758356094360352, -0.06514930725097656, -0.06271505355834961, -0.060280799865722656, -0.0578465461730957, -0.05541229248046875, -0.0529780387878418, -0.050543785095214844, -0.04810953140258789, -0.04567527770996094, -0.043241024017333984, -0.04080677032470703, -0.03837251663208008, -0.035938262939453125, -0.03350400924682617, -0.03106975555419922, -0.028635501861572266, -0.026201248168945312, -0.02376699447631836, -0.021332740783691406, -0.018898487091064453, -0.0164642333984375, -0.014029979705810547, -0.011595726013183594, -0.00916147232055664, -0.0067272186279296875, -0.004292964935302734, -0.0018587112426757812, 0.0005755424499511719, 0.003009796142578125, 0.005444049835205078, 0.007878303527832031, 0.010312557220458984, 0.012746810913085938, 0.01518106460571289, 0.017615318298339844, 0.020049571990966797, 0.02248382568359375, 0.024918079376220703, 0.027352333068847656, 0.02978658676147461, 0.03222084045410156, 0.034655094146728516, 0.03708934783935547, 0.03952360153198242, 0.041957855224609375, 0.04439210891723633, 0.04682636260986328, 0.049260616302490234, 0.05169486999511719, 0.05412912368774414, 0.056563377380371094, 0.05899763107299805, 0.061431884765625]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 8.0, 300.0, 678.0, 27.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005797862075269222, -0.0055241514928638935, -0.005250440910458565, -0.004976730793714523, -0.0047030202113091946, -0.004429309628903866, -0.004155599046498537, -0.0038818884640932083, -0.003608178114518523, -0.0033344675321131945, -0.0030607571825385094, -0.0027870466001331806, -0.002513336017727852, -0.0022396256681531668, -0.001965915085747838, -0.001692204619757831, -0.0014184941537678242, -0.0011447836877778172, -0.0008710731635801494, -0.0005973626393824816, -0.00032365217339247465, -4.994170740246773e-05, 0.00022376887500286102, 0.000497479340992868, 0.0007711898069828749, 0.0010449002729728818, 0.0013186107389628887, 0.0015923213213682175, 0.0018660317873582244, 0.0021397422533482313, 0.00241345283575356, 0.002687163185328245, 0.002960873767733574, 0.0032345843501389027, 0.0035082946997135878, 0.0037820052821189165, 0.004055715631693602, 0.00432942621409893, 0.004603136796504259, 0.004876847378909588, 0.005150557495653629, 0.005424268078058958, 0.005697978660464287, 0.005971688777208328, 0.006245399359613657, 0.006519109942018986, 0.0067928205244243145, 0.007066531106829643, 0.007340241689234972, 0.007613952271640301, 0.007887662388384342, 0.008161373436450958, 0.008435083553195, 0.008708793669939041, 0.008982504718005657, 0.009256214834749699, 0.009529925882816315, 0.009803635999560356, 0.010077347047626972, 0.010351057164371014, 0.01062476821243763, 0.010898478329181671, 0.011172188445925713, 0.011445899493992329, 0.01171960961073637]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 3.0, 11.0, 7.0, 7.0, 7.0, 6.0, 9.0, 12.0, 21.0, 19.0, 30.0, 26.0, 39.0, 28.0, 33.0, 23.0, 29.0, 38.0, 40.0, 55.0, 37.0, 45.0, 36.0, 32.0, 41.0, 36.0, 42.0, 29.0, 34.0, 35.0, 27.0, 21.0, 22.0, 13.0, 22.0, 22.0, 14.0, 13.0, 9.0, 6.0, 7.0, 6.0, 4.0, 2.0, 5.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00160294771194458, -0.0015491126105189323, -0.0014952775090932846, -0.0014414424076676369, -0.0013876073062419891, -0.0013337722048163414, -0.0012799371033906937, -0.001226102001965046, -0.0011722669005393982, -0.0011184317991137505, -0.0010645966976881027, -0.001010761596262455, -0.0009569264948368073, -0.0009030913934111595, -0.0008492562919855118, -0.000795421190559864, -0.0007415860891342163, -0.0006877509877085686, -0.0006339158862829208, -0.0005800807848572731, -0.0005262456834316254, -0.00047241058200597763, -0.0004185754805803299, -0.00036474037915468216, -0.0003109052777290344, -0.0002570701763033867, -0.00020323507487773895, -0.00014939997345209122, -9.556487202644348e-05, -4.1729770600795746e-05, 1.210533082485199e-05, 6.594043225049973e-05, 0.00011977553367614746, 0.0001736106351017952, 0.00022744573652744293, 0.00028128083795309067, 0.0003351159393787384, 0.00038895104080438614, 0.0004427861422300339, 0.0004966212436556816, 0.0005504563450813293, 0.0006042914465069771, 0.0006581265479326248, 0.0007119616493582726, 0.0007657967507839203, 0.000819631852209568, 0.0008734669536352158, 0.0009273020550608635, 0.0009811371564865112, 0.001034972257912159, 0.0010888073593378067, 0.0011426424607634544, 0.0011964775621891022, 0.00125031266361475, 0.0013041477650403976, 0.0013579828664660454, 0.0014118179678916931, 0.0014656530693173409, 0.0015194881707429886, 0.0015733232721686363, 0.001627158373594284, 0.0016809934750199318, 0.0017348285764455795, 0.0017886636778712273, 0.001842498779296875]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 5.0, 5.0, 5.0, 5.0, 8.0, 8.0, 8.0, 13.0, 4.0, 14.0, 6.0, 18.0, 8.0, 22.0, 36.0, 27.0, 14.0, 36.0, 21.0, 40.0, 32.0, 30.0, 40.0, 47.0, 30.0, 32.0, 40.0, 41.0, 43.0, 39.0, 27.0, 34.0, 30.0, 32.0, 19.0, 24.0, 22.0, 16.0, 15.0, 9.0, 25.0, 18.0, 13.0, 9.0, 8.0, 9.0, 5.0, 5.0, 6.0, 4.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0], "bins": [-10.734375, -10.39599609375, -10.0576171875, -9.71923828125, -9.380859375, -9.04248046875, -8.7041015625, -8.36572265625, -8.02734375, -7.68896484375, -7.3505859375, -7.01220703125, -6.673828125, -6.33544921875, -5.9970703125, -5.65869140625, -5.3203125, -4.98193359375, -4.6435546875, -4.30517578125, -3.966796875, -3.62841796875, -3.2900390625, -2.95166015625, -2.61328125, -2.27490234375, -1.9365234375, -1.59814453125, -1.259765625, -0.92138671875, -0.5830078125, -0.24462890625, 0.09375, 0.43212890625, 0.7705078125, 1.10888671875, 1.447265625, 1.78564453125, 2.1240234375, 2.46240234375, 2.80078125, 3.13916015625, 3.4775390625, 3.81591796875, 4.154296875, 4.49267578125, 4.8310546875, 5.16943359375, 5.5078125, 5.84619140625, 6.1845703125, 6.52294921875, 6.861328125, 7.19970703125, 7.5380859375, 7.87646484375, 8.21484375, 8.55322265625, 8.8916015625, 9.22998046875, 9.568359375, 9.90673828125, 10.2451171875, 10.58349609375, 10.921875]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 1.0, 7.0, 12.0, 11.0, 9.0, 21.0, 20.0, 34.0, 34.0, 41.0, 67.0, 81.0, 103.0, 150.0, 241.0, 327.0, 470.0, 715.0, 1142.0, 2129.0, 4439.0, 10828.0, 32401.0, 130003.0, 590026.0, 204592.0, 44410.0, 14303.0, 5481.0, 2482.0, 1411.0, 834.0, 496.0, 319.0, 256.0, 166.0, 107.0, 90.0, 89.0, 49.0, 42.0, 27.0, 16.0, 15.0, 14.0, 13.0, 10.0, 4.0, 5.0, 7.0, 2.0, 4.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0], "bins": [-20.25, -19.5888671875, -18.927734375, -18.2666015625, -17.60546875, -16.9443359375, -16.283203125, -15.6220703125, -14.9609375, -14.2998046875, -13.638671875, -12.9775390625, -12.31640625, -11.6552734375, -10.994140625, -10.3330078125, -9.671875, -9.0107421875, -8.349609375, -7.6884765625, -7.02734375, -6.3662109375, -5.705078125, -5.0439453125, -4.3828125, -3.7216796875, -3.060546875, -2.3994140625, -1.73828125, -1.0771484375, -0.416015625, 0.2451171875, 0.90625, 1.5673828125, 2.228515625, 2.8896484375, 3.55078125, 4.2119140625, 4.873046875, 5.5341796875, 6.1953125, 6.8564453125, 7.517578125, 8.1787109375, 8.83984375, 9.5009765625, 10.162109375, 10.8232421875, 11.484375, 12.1455078125, 12.806640625, 13.4677734375, 14.12890625, 14.7900390625, 15.451171875, 16.1123046875, 16.7734375, 17.4345703125, 18.095703125, 18.7568359375, 19.41796875, 20.0791015625, 20.740234375, 21.4013671875, 22.0625]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 9.0, 6.0, 9.0, 11.0, 14.0, 12.0, 21.0, 16.0, 20.0, 17.0, 41.0, 24.0, 41.0, 41.0, 56.0, 80.0, 94.0, 258.0, 1680.0, 153.0, 75.0, 65.0, 59.0, 41.0, 25.0, 35.0, 30.0, 20.0, 20.0, 11.0, 15.0, 9.0, 13.0, 9.0, 5.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-37.5, -36.375, -35.25, -34.125, -33.0, -31.875, -30.75, -29.625, -28.5, -27.375, -26.25, -25.125, -24.0, -22.875, -21.75, -20.625, -19.5, -18.375, -17.25, -16.125, -15.0, -13.875, -12.75, -11.625, -10.5, -9.375, -8.25, -7.125, -6.0, -4.875, -3.75, -2.625, -1.5, -0.375, 0.75, 1.875, 3.0, 4.125, 5.25, 6.375, 7.5, 8.625, 9.75, 10.875, 12.0, 13.125, 14.25, 15.375, 16.5, 17.625, 18.75, 19.875, 21.0, 22.125, 23.25, 24.375, 25.5, 26.625, 27.75, 28.875, 30.0, 31.125, 32.25, 33.375, 34.5]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 6.0, 8.0, 8.0, 9.0, 12.0, 13.0, 12.0, 29.0, 32.0, 42.0, 38.0, 68.0, 144.0, 257.0, 529.0, 1158.0, 76885.0, 3062774.0, 2128.0, 656.0, 337.0, 183.0, 107.0, 53.0, 50.0, 31.0, 28.0, 18.0, 13.0, 17.0, 13.0, 13.0, 10.0, 4.0, 4.0, 3.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-108.375, -105.0048828125, -101.634765625, -98.2646484375, -94.89453125, -91.5244140625, -88.154296875, -84.7841796875, -81.4140625, -78.0439453125, -74.673828125, -71.3037109375, -67.93359375, -64.5634765625, -61.193359375, -57.8232421875, -54.453125, -51.0830078125, -47.712890625, -44.3427734375, -40.97265625, -37.6025390625, -34.232421875, -30.8623046875, -27.4921875, -24.1220703125, -20.751953125, -17.3818359375, -14.01171875, -10.6416015625, -7.271484375, -3.9013671875, -0.53125, 2.8388671875, 6.208984375, 9.5791015625, 12.94921875, 16.3193359375, 19.689453125, 23.0595703125, 26.4296875, 29.7998046875, 33.169921875, 36.5400390625, 39.91015625, 43.2802734375, 46.650390625, 50.0205078125, 53.390625, 56.7607421875, 60.130859375, 63.5009765625, 66.87109375, 70.2412109375, 73.611328125, 76.9814453125, 80.3515625, 83.7216796875, 87.091796875, 90.4619140625, 93.83203125, 97.2021484375, 100.572265625, 103.9423828125, 107.3125]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 121.0, 878.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-455.6017761230469, -444.0265808105469, -432.45135498046875, -420.87615966796875, -409.30096435546875, -397.72576904296875, -386.15057373046875, -374.5753479003906, -363.0001525878906, -351.4249572753906, -339.8497314453125, -328.2745361328125, -316.6993408203125, -305.1241455078125, -293.5489501953125, -281.9737243652344, -270.3985290527344, -258.8233337402344, -247.2481231689453, -235.67291259765625, -224.09771728515625, -212.52252197265625, -200.9473114013672, -189.37210083007812, -177.79690551757812, -166.22171020507812, -154.64649963378906, -143.0712890625, -131.49609375, -119.92089080810547, -108.34568786621094, -96.7704849243164, -85.19525146484375, -73.62004852294922, -62.04484558105469, -50.469642639160156, -38.894439697265625, -27.319236755371094, -15.744033813476562, -4.168830871582031, 7.4063720703125, 18.98157501220703, 30.556777954101562, 42.131980895996094, 53.707183837890625, 65.28238677978516, 76.85758972167969, 88.43279266357422, 100.00799560546875, 111.58319854736328, 123.15840148925781, 134.73361206054688, 146.30880737304688, 157.88400268554688, 169.45921325683594, 181.034423828125, 192.609619140625, 204.184814453125, 215.76002502441406, 227.33523559570312, 238.91043090820312, 250.48562622070312, 262.06085205078125, 273.63604736328125, 285.21124267578125]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 4.0, 7.0, 5.0, 5.0, 7.0, 8.0, 22.0, 13.0, 25.0, 22.0, 18.0, 21.0, 24.0, 33.0, 33.0, 30.0, 36.0, 40.0, 35.0, 46.0, 43.0, 43.0, 45.0, 35.0, 31.0, 41.0, 40.0, 31.0, 44.0, 29.0, 39.0, 19.0, 20.0, 17.0, 15.0, 18.0, 15.0, 7.0, 10.0, 7.0, 6.0, 2.0, 6.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-88.51687622070312, -85.93806457519531, -83.3592529296875, -80.78043365478516, -78.20162200927734, -75.62281036376953, -73.04399871826172, -70.46517944335938, -67.88636779785156, -65.30755615234375, -62.72874069213867, -60.14992904663086, -57.57111358642578, -54.99230194091797, -52.413490295410156, -49.83467483520508, -47.255863189697266, -44.67705154418945, -42.098236083984375, -39.51942443847656, -36.940608978271484, -34.36179733276367, -31.782983779907227, -29.20417022705078, -26.625356674194336, -24.04654312133789, -21.467729568481445, -18.888916015625, -16.310104370117188, -13.731289863586426, -11.152477264404297, -8.573663711547852, -5.994850158691406, -3.41603684425354, -0.8372235298156738, 1.7415895462036133, 4.320403099060059, 6.899216651916504, 9.478029251098633, 12.056842803955078, 14.635656356811523, 17.21446990966797, 19.793283462524414, 22.37209701538086, 24.950908660888672, 27.52972412109375, 30.108535766601562, 32.687347412109375, 35.26616287231445, 37.844974517822266, 40.423789978027344, 43.002601623535156, 45.581417083740234, 48.16022872924805, 50.739044189453125, 53.31785583496094, 55.89666748046875, 58.47547912597656, 61.05429458618164, 63.63310623168945, 66.21192169189453, 68.79073333740234, 71.36954498291016, 73.9483642578125, 76.52717590332031]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [4.0, 2.0, 2.0, 1.0, 3.0, 6.0, 4.0, 3.0, 8.0, 11.0, 10.0, 7.0, 17.0, 12.0, 17.0, 22.0, 17.0, 26.0, 26.0, 35.0, 29.0, 34.0, 44.0, 41.0, 40.0, 50.0, 34.0, 43.0, 48.0, 37.0, 42.0, 35.0, 34.0, 37.0, 26.0, 22.0, 28.0, 23.0, 26.0, 14.0, 14.0, 11.0, 16.0, 13.0, 11.0, 7.0, 5.0, 7.0, 5.0, 2.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.1484375, -11.7032470703125, -11.258056640625, -10.8128662109375, -10.36767578125, -9.9224853515625, -9.477294921875, -9.0321044921875, -8.5869140625, -8.1417236328125, -7.696533203125, -7.2513427734375, -6.80615234375, -6.3609619140625, -5.915771484375, -5.4705810546875, -5.025390625, -4.5802001953125, -4.135009765625, -3.6898193359375, -3.24462890625, -2.7994384765625, -2.354248046875, -1.9090576171875, -1.4638671875, -1.0186767578125, -0.573486328125, -0.1282958984375, 0.31689453125, 0.7620849609375, 1.207275390625, 1.6524658203125, 2.09765625, 2.5428466796875, 2.988037109375, 3.4332275390625, 3.87841796875, 4.3236083984375, 4.768798828125, 5.2139892578125, 5.6591796875, 6.1043701171875, 6.549560546875, 6.9947509765625, 7.43994140625, 7.8851318359375, 8.330322265625, 8.7755126953125, 9.220703125, 9.6658935546875, 10.111083984375, 10.5562744140625, 11.00146484375, 11.4466552734375, 11.891845703125, 12.3370361328125, 12.7822265625, 13.2274169921875, 13.672607421875, 14.1177978515625, 14.56298828125, 15.0081787109375, 15.453369140625, 15.8985595703125, 16.34375]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 7.0, 12.0, 11.0, 17.0, 23.0, 40.0, 34.0, 46.0, 59.0, 61.0, 85.0, 116.0, 176.0, 225.0, 318.0, 469.0, 880.0, 2177.0, 15473.0, 3759030.0, 407319.0, 4312.0, 1344.0, 606.0, 381.0, 250.0, 200.0, 126.0, 90.0, 73.0, 65.0, 55.0, 48.0, 29.0, 30.0, 21.0, 12.0, 20.0, 10.0, 7.0, 9.0, 5.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-94.6875, -91.3935546875, -88.099609375, -84.8056640625, -81.51171875, -78.2177734375, -74.923828125, -71.6298828125, -68.3359375, -65.0419921875, -61.748046875, -58.4541015625, -55.16015625, -51.8662109375, -48.572265625, -45.2783203125, -41.984375, -38.6904296875, -35.396484375, -32.1025390625, -28.80859375, -25.5146484375, -22.220703125, -18.9267578125, -15.6328125, -12.3388671875, -9.044921875, -5.7509765625, -2.45703125, 0.8369140625, 4.130859375, 7.4248046875, 10.71875, 14.0126953125, 17.306640625, 20.6005859375, 23.89453125, 27.1884765625, 30.482421875, 33.7763671875, 37.0703125, 40.3642578125, 43.658203125, 46.9521484375, 50.24609375, 53.5400390625, 56.833984375, 60.1279296875, 63.421875, 66.7158203125, 70.009765625, 73.3037109375, 76.59765625, 79.8916015625, 83.185546875, 86.4794921875, 89.7734375, 93.0673828125, 96.361328125, 99.6552734375, 102.94921875, 106.2431640625, 109.537109375, 112.8310546875, 116.125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 6.0, 6.0, 6.0, 12.0, 29.0, 52.0, 84.0, 173.0, 409.0, 817.0, 1036.0, 739.0, 365.0, 168.0, 69.0, 40.0, 23.0, 13.0, 8.0, 8.0, 5.0, 4.0, 0.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.625, -42.31982421875, -41.0146484375, -39.70947265625, -38.404296875, -37.09912109375, -35.7939453125, -34.48876953125, -33.18359375, -31.87841796875, -30.5732421875, -29.26806640625, -27.962890625, -26.65771484375, -25.3525390625, -24.04736328125, -22.7421875, -21.43701171875, -20.1318359375, -18.82666015625, -17.521484375, -16.21630859375, -14.9111328125, -13.60595703125, -12.30078125, -10.99560546875, -9.6904296875, -8.38525390625, -7.080078125, -5.77490234375, -4.4697265625, -3.16455078125, -1.859375, -0.55419921875, 0.7509765625, 2.05615234375, 3.361328125, 4.66650390625, 5.9716796875, 7.27685546875, 8.58203125, 9.88720703125, 11.1923828125, 12.49755859375, 13.802734375, 15.10791015625, 16.4130859375, 17.71826171875, 19.0234375, 20.32861328125, 21.6337890625, 22.93896484375, 24.244140625, 25.54931640625, 26.8544921875, 28.15966796875, 29.46484375, 30.77001953125, 32.0751953125, 33.38037109375, 34.685546875, 35.99072265625, 37.2958984375, 38.60107421875, 39.90625]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 8.0, 7.0, 10.0, 15.0, 18.0, 42.0, 78.0, 121.0, 302.0, 699.0, 2094.0, 10026.0, 189680.0, 3810705.0, 167694.0, 9514.0, 2002.0, 659.0, 288.0, 148.0, 65.0, 34.0, 21.0, 14.0, 9.0, 6.0, 4.0, 7.0, 6.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-42.5, -41.05859375, -39.6171875, -38.17578125, -36.734375, -35.29296875, -33.8515625, -32.41015625, -30.96875, -29.52734375, -28.0859375, -26.64453125, -25.203125, -23.76171875, -22.3203125, -20.87890625, -19.4375, -17.99609375, -16.5546875, -15.11328125, -13.671875, -12.23046875, -10.7890625, -9.34765625, -7.90625, -6.46484375, -5.0234375, -3.58203125, -2.140625, -0.69921875, 0.7421875, 2.18359375, 3.625, 5.06640625, 6.5078125, 7.94921875, 9.390625, 10.83203125, 12.2734375, 13.71484375, 15.15625, 16.59765625, 18.0390625, 19.48046875, 20.921875, 22.36328125, 23.8046875, 25.24609375, 26.6875, 28.12890625, 29.5703125, 31.01171875, 32.453125, 33.89453125, 35.3359375, 36.77734375, 38.21875, 39.66015625, 41.1015625, 42.54296875, 43.984375, 45.42578125, 46.8671875, 48.30859375, 49.75]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [5.0, 3.0, 6.0, 15.0, 14.0, 48.0, 85.0, 160.0, 191.0, 173.0, 139.0, 105.0, 40.0, 19.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.97997283935547, -60.34026336669922, -52.70055389404297, -45.06084060668945, -37.4211311340332, -29.781421661376953, -22.141708374023438, -14.501998901367188, -6.8622894287109375, 0.7774209976196289, 8.417131423950195, 16.056842803955078, 23.696552276611328, 31.336261749267578, 38.975975036621094, 46.615684509277344, 54.255393981933594, 61.895103454589844, 69.5348129272461, 77.17453002929688, 84.81423950195312, 92.45394897460938, 100.09365844726562, 107.73336791992188, 115.37307739257812, 123.01278686523438, 130.65249633789062, 138.29220581054688, 145.93191528320312, 153.57162475585938, 161.21133422851562, 168.85104370117188, 176.49075317382812, 184.13046264648438, 191.77017211914062, 199.40988159179688, 207.04959106445312, 214.68930053710938, 222.32901000976562, 229.96871948242188, 237.60842895507812, 245.24813842773438, 252.88784790039062, 260.5275573730469, 268.1672668457031, 275.8069763183594, 283.4466857910156, 291.0863952636719, 298.72613525390625, 306.3658447265625, 314.00555419921875, 321.645263671875, 329.28497314453125, 336.9246826171875, 344.56439208984375, 352.2041015625, 359.84381103515625, 367.4835205078125, 375.12322998046875, 382.762939453125, 390.40264892578125, 398.0423583984375, 405.68206787109375, 413.32177734375, 420.96148681640625]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 3.0, 7.0, 6.0, 11.0, 14.0, 6.0, 13.0, 17.0, 21.0, 21.0, 36.0, 28.0, 42.0, 35.0, 38.0, 42.0, 47.0, 42.0, 36.0, 44.0, 43.0, 39.0, 49.0, 33.0, 41.0, 47.0, 38.0, 21.0, 33.0, 26.0, 17.0, 16.0, 19.0, 16.0, 8.0, 16.0, 8.0, 7.0, 4.0, 3.0, 2.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-76.54390716552734, -73.96456909179688, -71.38522338867188, -68.80587768554688, -66.2265396118164, -63.64719772338867, -61.06785583496094, -58.4885139465332, -55.90917205810547, -53.329830169677734, -50.75048828125, -48.171146392822266, -45.59180450439453, -43.0124626159668, -40.43312072753906, -37.85377883911133, -35.274436950683594, -32.69509506225586, -30.115753173828125, -27.53641128540039, -24.957069396972656, -22.377727508544922, -19.798385620117188, -17.219043731689453, -14.639701843261719, -12.060359954833984, -9.48101806640625, -6.901676177978516, -4.322334289550781, -1.7429924011230469, 0.8363494873046875, 3.415691375732422, 5.995025634765625, 8.57436752319336, 11.153709411621094, 13.733051300048828, 16.312393188476562, 18.891735076904297, 21.47107696533203, 24.050418853759766, 26.6297607421875, 29.209102630615234, 31.78844451904297, 34.3677864074707, 36.94712829589844, 39.52647018432617, 42.105812072753906, 44.68515396118164, 47.264495849609375, 49.84383773803711, 52.423179626464844, 55.00252151489258, 57.58186340332031, 60.16120529174805, 62.74054718017578, 65.31988525390625, 67.89923095703125, 70.47857666015625, 73.05791473388672, 75.63725280761719, 78.21659851074219, 80.79594421386719, 83.37528228759766, 85.95462036132812, 88.53396606445312]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 7.0, 8.0, 12.0, 6.0, 11.0, 13.0, 13.0, 11.0, 17.0, 21.0, 27.0, 35.0, 45.0, 26.0, 43.0, 51.0, 55.0, 35.0, 51.0, 42.0, 40.0, 43.0, 52.0, 40.0, 45.0, 33.0, 32.0, 29.0, 19.0, 28.0, 21.0, 24.0, 9.0, 15.0, 13.0, 8.0, 7.0, 6.0, 6.0, 4.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.625, -121.380859375, -117.13671875, -112.892578125, -108.6484375, -104.404296875, -100.16015625, -95.916015625, -91.671875, -87.427734375, -83.18359375, -78.939453125, -74.6953125, -70.451171875, -66.20703125, -61.962890625, -57.71875, -53.474609375, -49.23046875, -44.986328125, -40.7421875, -36.498046875, -32.25390625, -28.009765625, -23.765625, -19.521484375, -15.27734375, -11.033203125, -6.7890625, -2.544921875, 1.69921875, 5.943359375, 10.1875, 14.431640625, 18.67578125, 22.919921875, 27.1640625, 31.408203125, 35.65234375, 39.896484375, 44.140625, 48.384765625, 52.62890625, 56.873046875, 61.1171875, 65.361328125, 69.60546875, 73.849609375, 78.09375, 82.337890625, 86.58203125, 90.826171875, 95.0703125, 99.314453125, 103.55859375, 107.802734375, 112.046875, 116.291015625, 120.53515625, 124.779296875, 129.0234375, 133.267578125, 137.51171875, 141.755859375, 146.0]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 3.0, 6.0, 5.0, 11.0, 17.0, 17.0, 41.0, 44.0, 78.0, 123.0, 199.0, 344.0, 573.0, 824.0, 1356.0, 2245.0, 3641.0, 5944.0, 9996.0, 16073.0, 27057.0, 45498.0, 78062.0, 134245.0, 224577.0, 205247.0, 119846.0, 69856.0, 41082.0, 24129.0, 14488.0, 8839.0, 5400.0, 3308.0, 2066.0, 1215.0, 783.0, 488.0, 317.0, 199.0, 113.0, 66.0, 52.0, 35.0, 18.0, 16.0, 6.0, 3.0, 8.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-18.0625, -17.513427734375, -16.96435546875, -16.415283203125, -15.8662109375, -15.317138671875, -14.76806640625, -14.218994140625, -13.669921875, -13.120849609375, -12.57177734375, -12.022705078125, -11.4736328125, -10.924560546875, -10.37548828125, -9.826416015625, -9.27734375, -8.728271484375, -8.17919921875, -7.630126953125, -7.0810546875, -6.531982421875, -5.98291015625, -5.433837890625, -4.884765625, -4.335693359375, -3.78662109375, -3.237548828125, -2.6884765625, -2.139404296875, -1.59033203125, -1.041259765625, -0.4921875, 0.056884765625, 0.60595703125, 1.155029296875, 1.7041015625, 2.253173828125, 2.80224609375, 3.351318359375, 3.900390625, 4.449462890625, 4.99853515625, 5.547607421875, 6.0966796875, 6.645751953125, 7.19482421875, 7.743896484375, 8.29296875, 8.842041015625, 9.39111328125, 9.940185546875, 10.4892578125, 11.038330078125, 11.58740234375, 12.136474609375, 12.685546875, 13.234619140625, 13.78369140625, 14.332763671875, 14.8818359375, 15.430908203125, 15.97998046875, 16.529052734375, 17.078125]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 5.0, 2.0, 3.0, 2.0, 6.0, 9.0, 12.0, 13.0, 14.0, 21.0, 22.0, 22.0, 24.0, 35.0, 29.0, 35.0, 32.0, 36.0, 45.0, 43.0, 27.0, 37.0, 1070.0, 37.0, 42.0, 38.0, 39.0, 36.0, 34.0, 34.0, 31.0, 20.0, 31.0, 16.0, 18.0, 14.0, 19.0, 18.0, 12.0, 7.0, 15.0, 2.0, 4.0, 6.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-75.125, -72.7548828125, -70.384765625, -68.0146484375, -65.64453125, -63.2744140625, -60.904296875, -58.5341796875, -56.1640625, -53.7939453125, -51.423828125, -49.0537109375, -46.68359375, -44.3134765625, -41.943359375, -39.5732421875, -37.203125, -34.8330078125, -32.462890625, -30.0927734375, -27.72265625, -25.3525390625, -22.982421875, -20.6123046875, -18.2421875, -15.8720703125, -13.501953125, -11.1318359375, -8.76171875, -6.3916015625, -4.021484375, -1.6513671875, 0.71875, 3.0888671875, 5.458984375, 7.8291015625, 10.19921875, 12.5693359375, 14.939453125, 17.3095703125, 19.6796875, 22.0498046875, 24.419921875, 26.7900390625, 29.16015625, 31.5302734375, 33.900390625, 36.2705078125, 38.640625, 41.0107421875, 43.380859375, 45.7509765625, 48.12109375, 50.4912109375, 52.861328125, 55.2314453125, 57.6015625, 59.9716796875, 62.341796875, 64.7119140625, 67.08203125, 69.4521484375, 71.822265625, 74.1923828125, 76.5625]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 4.0, 4.0, 5.0, 7.0, 17.0, 30.0, 35.0, 53.0, 90.0, 151.0, 183.0, 311.0, 469.0, 549.0, 898.0, 1287.0, 1900.0, 2835.0, 4134.0, 6415.0, 9610.0, 14561.0, 21894.0, 33684.0, 51765.0, 81799.0, 130350.0, 1246862.0, 175825.0, 111310.0, 69969.0, 44599.0, 28595.0, 19052.0, 12756.0, 8246.0, 5420.0, 3725.0, 2505.0, 1725.0, 1119.0, 805.0, 533.0, 333.0, 215.0, 170.0, 119.0, 73.0, 56.0, 26.0, 20.0, 19.0, 13.0, 6.0, 4.0, 3.0, 3.0, 0.0, 1.0, 2.0], "bins": [-12.8515625, -12.44775390625, -12.0439453125, -11.64013671875, -11.236328125, -10.83251953125, -10.4287109375, -10.02490234375, -9.62109375, -9.21728515625, -8.8134765625, -8.40966796875, -8.005859375, -7.60205078125, -7.1982421875, -6.79443359375, -6.390625, -5.98681640625, -5.5830078125, -5.17919921875, -4.775390625, -4.37158203125, -3.9677734375, -3.56396484375, -3.16015625, -2.75634765625, -2.3525390625, -1.94873046875, -1.544921875, -1.14111328125, -0.7373046875, -0.33349609375, 0.0703125, 0.47412109375, 0.8779296875, 1.28173828125, 1.685546875, 2.08935546875, 2.4931640625, 2.89697265625, 3.30078125, 3.70458984375, 4.1083984375, 4.51220703125, 4.916015625, 5.31982421875, 5.7236328125, 6.12744140625, 6.53125, 6.93505859375, 7.3388671875, 7.74267578125, 8.146484375, 8.55029296875, 8.9541015625, 9.35791015625, 9.76171875, 10.16552734375, 10.5693359375, 10.97314453125, 11.376953125, 11.78076171875, 12.1845703125, 12.58837890625, 12.9921875]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 1.0, 6.0, 4.0, 6.0, 8.0, 8.0, 17.0, 17.0, 30.0, 42.0, 54.0, 62.0, 61.0, 83.0, 87.0, 86.0, 81.0, 69.0, 63.0, 53.0, 42.0, 34.0, 24.0, 15.0, 8.0, 9.0, 9.0, 4.0, 7.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.035980224609375, -0.03482484817504883, -0.033669471740722656, -0.032514095306396484, -0.03135871887207031, -0.03020334243774414, -0.02904796600341797, -0.027892589569091797, -0.026737213134765625, -0.025581836700439453, -0.02442646026611328, -0.02327108383178711, -0.022115707397460938, -0.020960330963134766, -0.019804954528808594, -0.018649578094482422, -0.01749420166015625, -0.016338825225830078, -0.015183448791503906, -0.014028072357177734, -0.012872695922851562, -0.01171731948852539, -0.010561943054199219, -0.009406566619873047, -0.008251190185546875, -0.007095813751220703, -0.005940437316894531, -0.004785060882568359, -0.0036296844482421875, -0.0024743080139160156, -0.0013189315795898438, -0.00016355514526367188, 0.0009918212890625, 0.002147197723388672, 0.0033025741577148438, 0.004457950592041016, 0.0056133270263671875, 0.006768703460693359, 0.007924079895019531, 0.009079456329345703, 0.010234832763671875, 0.011390209197998047, 0.012545585632324219, 0.01370096206665039, 0.014856338500976562, 0.016011714935302734, 0.017167091369628906, 0.018322467803955078, 0.01947784423828125, 0.020633220672607422, 0.021788597106933594, 0.022943973541259766, 0.024099349975585938, 0.02525472640991211, 0.02641010284423828, 0.027565479278564453, 0.028720855712890625, 0.029876232147216797, 0.03103160858154297, 0.03218698501586914, 0.03334236145019531, 0.034497737884521484, 0.035653114318847656, 0.03680849075317383, 0.0379638671875]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 8.0, 8.0, 7.0, 14.0, 23.0, 21.0, 27.0, 44.0, 66.0, 86.0, 147.0, 255.0, 476.0, 870.0, 1908.0, 4954.0, 22237.0, 294574.0, 655178.0, 54127.0, 8208.0, 2661.0, 1214.0, 549.0, 326.0, 198.0, 108.0, 67.0, 45.0, 32.0, 33.0, 25.0, 13.0, 12.0, 5.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.298095703125, -0.28911590576171875, -0.2801361083984375, -0.27115631103515625, -0.262176513671875, -0.25319671630859375, -0.2442169189453125, -0.23523712158203125, -0.22625732421875, -0.21727752685546875, -0.2082977294921875, -0.19931793212890625, -0.190338134765625, -0.18135833740234375, -0.1723785400390625, -0.16339874267578125, -0.1544189453125, -0.14543914794921875, -0.1364593505859375, -0.12747955322265625, -0.118499755859375, -0.10951995849609375, -0.1005401611328125, -0.09156036376953125, -0.08258056640625, -0.07360076904296875, -0.0646209716796875, -0.05564117431640625, -0.046661376953125, -0.03768157958984375, -0.0287017822265625, -0.01972198486328125, -0.0107421875, -0.00176239013671875, 0.0072174072265625, 0.01619720458984375, 0.025177001953125, 0.03415679931640625, 0.0431365966796875, 0.05211639404296875, 0.06109619140625, 0.07007598876953125, 0.0790557861328125, 0.08803558349609375, 0.097015380859375, 0.10599517822265625, 0.1149749755859375, 0.12395477294921875, 0.1329345703125, 0.14191436767578125, 0.1508941650390625, 0.15987396240234375, 0.168853759765625, 0.17783355712890625, 0.1868133544921875, 0.19579315185546875, 0.20477294921875, 0.21375274658203125, 0.2227325439453125, 0.23171234130859375, 0.240692138671875, 0.24967193603515625, 0.2586517333984375, 0.26763153076171875, 0.276611328125]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 10.0, 13.0, 40.0, 99.0, 135.0, 232.0, 195.0, 120.0, 72.0, 37.0, 20.0, 14.0, 9.0, 3.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025282271206378937, -0.023666420951485634, -0.02205057069659233, -0.020434722304344177, -0.018818870186805725, -0.01720302179455757, -0.015587171539664268, -0.013971321284770966, -0.012355471029877663, -0.01073962077498436, -0.009123770520091057, -0.0075079211965203285, -0.005892070941627026, -0.004276220686733723, -0.0026603713631629944, -0.0010445211082696915, 0.0005713291466236115, 0.0021871791686862707, 0.00380302919074893, 0.005418878979980946, 0.0070347292348742485, 0.008650579489767551, 0.01026642881333828, 0.011882279068231583, 0.013498129323124886, 0.015113979578018188, 0.01672982983291149, 0.018345680087804794, 0.019961528480052948, 0.0215773805975914, 0.023193228989839554, 0.024809079244732857, 0.02642492949962616, 0.028040779754519463, 0.029656630009412766, 0.03127247840166092, 0.03288833051919937, 0.034504178911447525, 0.03612002730369568, 0.03773587942123413, 0.03935173153877258, 0.04096757993102074, 0.04258343204855919, 0.04419928044080734, 0.045815132558345795, 0.04743098095059395, 0.0490468293428421, 0.050662681460380554, 0.05227852985262871, 0.05389437824487686, 0.055510230362415314, 0.05712607875466347, 0.05874193087220192, 0.06035777926445007, 0.061973631381988525, 0.06358948349952698, 0.06520532816648483, 0.06682118028402328, 0.06843702495098114, 0.07005287706851959, 0.07166872918605804, 0.0732845813035965, 0.07490042597055435, 0.0765162780880928, 0.07813213020563126]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 4.0, 5.0, 5.0, 9.0, 8.0, 5.0, 7.0, 8.0, 11.0, 9.0, 24.0, 17.0, 18.0, 25.0, 32.0, 34.0, 32.0, 41.0, 30.0, 44.0, 41.0, 44.0, 43.0, 37.0, 44.0, 29.0, 39.0, 37.0, 41.0, 37.0, 34.0, 27.0, 37.0, 22.0, 21.0, 18.0, 19.0, 17.0, 13.0, 9.0, 7.0, 6.0, 2.0, 6.0, 2.0, 4.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.015754342079162598, -0.01525197084993124, -0.014749599620699883, -0.014247228391468525, -0.013744857162237167, -0.01324248593300581, -0.012740114703774452, -0.012237743474543095, -0.011735372245311737, -0.01123300101608038, -0.010730629786849022, -0.010228258557617664, -0.009725887328386307, -0.00922351609915495, -0.008721144869923592, -0.008218773640692234, -0.0077164024114608765, -0.007214031182229519, -0.006711659952998161, -0.006209288723766804, -0.005706917494535446, -0.005204546265304089, -0.004702175036072731, -0.0041998038068413734, -0.003697432577610016, -0.0031950613483786583, -0.0026926901191473007, -0.002190318889915943, -0.0016879476606845856, -0.001185576431453228, -0.0006832052022218704, -0.00018083397299051285, 0.0003215372562408447, 0.0008239084854722023, 0.0013262797147035599, 0.0018286509439349174, 0.002331022173166275, 0.0028333934023976326, 0.00333576463162899, 0.0038381358608603477, 0.004340507090091705, 0.004842878319323063, 0.0053452495485544205, 0.005847620777785778, 0.006349992007017136, 0.006852363236248493, 0.007354734465479851, 0.007857105694711208, 0.008359476923942566, 0.008861848153173923, 0.009364219382405281, 0.009866590611636639, 0.010368961840867996, 0.010871333070099354, 0.011373704299330711, 0.011876075528562069, 0.012378446757793427, 0.012880817987024784, 0.013383189216256142, 0.0138855604454875, 0.014387931674718857, 0.014890302903950214, 0.015392674133181572, 0.015895046293735504, 0.016397416591644287]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 7.0, 9.0, 10.0, 6.0, 12.0, 13.0, 13.0, 11.0, 17.0, 21.0, 29.0, 33.0, 45.0, 26.0, 43.0, 51.0, 54.0, 36.0, 52.0, 41.0, 40.0, 43.0, 50.0, 42.0, 45.0, 32.0, 33.0, 29.0, 19.0, 28.0, 21.0, 24.0, 9.0, 15.0, 13.0, 8.0, 7.0, 6.0, 6.0, 4.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-125.6875, -121.4423828125, -117.197265625, -112.9521484375, -108.70703125, -104.4619140625, -100.216796875, -95.9716796875, -91.7265625, -87.4814453125, -83.236328125, -78.9912109375, -74.74609375, -70.5009765625, -66.255859375, -62.0107421875, -57.765625, -53.5205078125, -49.275390625, -45.0302734375, -40.78515625, -36.5400390625, -32.294921875, -28.0498046875, -23.8046875, -19.5595703125, -15.314453125, -11.0693359375, -6.82421875, -2.5791015625, 1.666015625, 5.9111328125, 10.15625, 14.4013671875, 18.646484375, 22.8916015625, 27.13671875, 31.3818359375, 35.626953125, 39.8720703125, 44.1171875, 48.3623046875, 52.607421875, 56.8525390625, 61.09765625, 65.3427734375, 69.587890625, 73.8330078125, 78.078125, 82.3232421875, 86.568359375, 90.8134765625, 95.05859375, 99.3037109375, 103.548828125, 107.7939453125, 112.0390625, 116.2841796875, 120.529296875, 124.7744140625, 129.01953125, 133.2646484375, 137.509765625, 141.7548828125, 146.0]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 8.0, 7.0, 8.0, 12.0, 14.0, 16.0, 20.0, 41.0, 45.0, 60.0, 115.0, 159.0, 257.0, 514.0, 910.0, 1521.0, 3244.0, 7414.0, 21610.0, 101059.0, 591186.0, 256729.0, 42256.0, 11784.0, 4543.0, 2235.0, 1166.0, 616.0, 341.0, 216.0, 141.0, 85.0, 58.0, 52.0, 24.0, 20.0, 19.0, 12.0, 16.0, 8.0, 4.0, 6.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.0, -34.86572265625, -33.7314453125, -32.59716796875, -31.462890625, -30.32861328125, -29.1943359375, -28.06005859375, -26.92578125, -25.79150390625, -24.6572265625, -23.52294921875, -22.388671875, -21.25439453125, -20.1201171875, -18.98583984375, -17.8515625, -16.71728515625, -15.5830078125, -14.44873046875, -13.314453125, -12.18017578125, -11.0458984375, -9.91162109375, -8.77734375, -7.64306640625, -6.5087890625, -5.37451171875, -4.240234375, -3.10595703125, -1.9716796875, -0.83740234375, 0.296875, 1.43115234375, 2.5654296875, 3.69970703125, 4.833984375, 5.96826171875, 7.1025390625, 8.23681640625, 9.37109375, 10.50537109375, 11.6396484375, 12.77392578125, 13.908203125, 15.04248046875, 16.1767578125, 17.31103515625, 18.4453125, 19.57958984375, 20.7138671875, 21.84814453125, 22.982421875, 24.11669921875, 25.2509765625, 26.38525390625, 27.51953125, 28.65380859375, 29.7880859375, 30.92236328125, 32.056640625, 33.19091796875, 34.3251953125, 35.45947265625, 36.59375]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 7.0, 7.0, 12.0, 15.0, 19.0, 30.0, 37.0, 56.0, 68.0, 81.0, 91.0, 2133.0, 119.0, 76.0, 80.0, 61.0, 51.0, 27.0, 27.0, 29.0, 10.0, 4.0, 5.0, 7.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-451.75, -439.46484375, -427.1796875, -414.89453125, -402.609375, -390.32421875, -378.0390625, -365.75390625, -353.46875, -341.18359375, -328.8984375, -316.61328125, -304.328125, -292.04296875, -279.7578125, -267.47265625, -255.1875, -242.90234375, -230.6171875, -218.33203125, -206.046875, -193.76171875, -181.4765625, -169.19140625, -156.90625, -144.62109375, -132.3359375, -120.05078125, -107.765625, -95.48046875, -83.1953125, -70.91015625, -58.625, -46.33984375, -34.0546875, -21.76953125, -9.484375, 2.80078125, 15.0859375, 27.37109375, 39.65625, 51.94140625, 64.2265625, 76.51171875, 88.796875, 101.08203125, 113.3671875, 125.65234375, 137.9375, 150.22265625, 162.5078125, 174.79296875, 187.078125, 199.36328125, 211.6484375, 223.93359375, 236.21875, 248.50390625, 260.7890625, 273.07421875, 285.359375, 297.64453125, 309.9296875, 322.21484375, 334.5]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 9.0, 8.0, 21.0, 37.0, 101.0, 221.0, 413.0, 1165.0, 5408.0, 2800223.0, 332367.0, 4076.0, 933.0, 359.0, 198.0, 75.0, 43.0, 23.0, 10.0, 5.0, 4.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-135.375, -131.3857421875, -127.396484375, -123.4072265625, -119.41796875, -115.4287109375, -111.439453125, -107.4501953125, -103.4609375, -99.4716796875, -95.482421875, -91.4931640625, -87.50390625, -83.5146484375, -79.525390625, -75.5361328125, -71.546875, -67.5576171875, -63.568359375, -59.5791015625, -55.58984375, -51.6005859375, -47.611328125, -43.6220703125, -39.6328125, -35.6435546875, -31.654296875, -27.6650390625, -23.67578125, -19.6865234375, -15.697265625, -11.7080078125, -7.71875, -3.7294921875, 0.259765625, 4.2490234375, 8.23828125, 12.2275390625, 16.216796875, 20.2060546875, 24.1953125, 28.1845703125, 32.173828125, 36.1630859375, 40.15234375, 44.1416015625, 48.130859375, 52.1201171875, 56.109375, 60.0986328125, 64.087890625, 68.0771484375, 72.06640625, 76.0556640625, 80.044921875, 84.0341796875, 88.0234375, 92.0126953125, 96.001953125, 99.9912109375, 103.98046875, 107.9697265625, 111.958984375, 115.9482421875, 119.9375]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 4.0, 4.0, 8.0, 8.0, 23.0, 48.0, 105.0, 198.0, 232.0, 177.0, 83.0, 46.0, 27.0, 13.0, 8.0, 7.0, 4.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-252.2066650390625, -237.6568603515625, -223.1070556640625, -208.55726623535156, -194.00746154785156, -179.45765686035156, -164.90786743164062, -150.35806274414062, -135.80825805664062, -121.25845336914062, -106.70865631103516, -92.15885925292969, -77.60905456542969, -63.05924987792969, -48.50945281982422, -33.95965576171875, -19.40985107421875, -4.860050201416016, 9.689750671386719, 24.239551544189453, 38.78935241699219, 53.33915710449219, 67.88895416259766, 82.43875122070312, 96.98855590820312, 111.53836059570312, 126.0881576538086, 140.63795471191406, 155.18775939941406, 169.73756408691406, 184.287353515625, 198.837158203125, 213.386962890625, 227.936767578125, 242.486572265625, 257.036376953125, 271.586181640625, 286.135986328125, 300.6857604980469, 315.2355651855469, 329.7853698730469, 344.3351745605469, 358.8849792480469, 373.4347839355469, 387.98455810546875, 402.53436279296875, 417.08416748046875, 431.63397216796875, 446.18377685546875, 460.73358154296875, 475.28338623046875, 489.83319091796875, 504.38299560546875, 518.9328002929688, 533.4826049804688, 548.0323486328125, 562.5821533203125, 577.1319580078125, 591.6817626953125, 606.2315673828125, 620.7813720703125, 635.3311767578125, 649.8809814453125, 664.4307861328125, 678.9805908203125]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 3.0, 4.0, 5.0, 7.0, 8.0, 10.0, 5.0, 9.0, 13.0, 8.0, 15.0, 16.0, 14.0, 17.0, 26.0, 32.0, 29.0, 40.0, 37.0, 50.0, 41.0, 47.0, 47.0, 49.0, 53.0, 44.0, 42.0, 41.0, 37.0, 40.0, 29.0, 26.0, 20.0, 19.0, 20.0, 18.0, 20.0, 14.0, 6.0, 7.0, 10.0, 11.0, 3.0, 6.0, 5.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-336.05377197265625, -327.0004577636719, -317.9471435546875, -308.893798828125, -299.8404846191406, -290.78717041015625, -281.73382568359375, -272.6805114746094, -263.627197265625, -254.57388305664062, -245.5205535888672, -236.46722412109375, -227.41390991210938, -218.360595703125, -209.30726623535156, -200.25393676757812, -191.20062255859375, -182.14730834960938, -173.09397888183594, -164.0406494140625, -154.98733520507812, -145.93402099609375, -136.8806915283203, -127.8273696899414, -118.7740478515625, -109.7207260131836, -100.66740417480469, -91.61408233642578, -82.56076049804688, -73.50743865966797, -64.45411682128906, -55.400794982910156, -46.34747314453125, -37.294151306152344, -28.240829467773438, -19.18750762939453, -10.134185791015625, -1.0808639526367188, 7.9724578857421875, 17.025779724121094, 26.0791015625, 35.132423400878906, 44.18574523925781, 53.23906707763672, 62.292388916015625, 71.34571075439453, 80.39903259277344, 89.45235443115234, 98.50567626953125, 107.55899810791016, 116.61231994628906, 125.66564178466797, 134.71896362304688, 143.77227783203125, 152.8256072998047, 161.87893676757812, 170.9322509765625, 179.98556518554688, 189.0388946533203, 198.09222412109375, 207.14553833007812, 216.1988525390625, 225.25218200683594, 234.30551147460938, 243.35882568359375]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 6.0, 7.0, 6.0, 5.0, 12.0, 10.0, 10.0, 19.0, 28.0, 32.0, 34.0, 52.0, 40.0, 61.0, 86.0, 115.0, 141.0, 167.0, 187.0, 219.0, 280.0, 318.0, 387.0, 509.0, 697.0, 1040771.0, 1127.0, 611.0, 485.0, 389.0, 338.0, 246.0, 212.0, 165.0, 133.0, 123.0, 108.0, 79.0, 68.0, 59.0, 42.0, 49.0, 20.0, 21.0, 19.0, 10.0, 12.0, 11.0, 9.0, 6.0, 3.0, 6.0, 5.0, 2.0, 1.0, 4.0], "bins": [-71.16878509521484, -69.06757354736328, -66.96636199951172, -64.86515045166016, -62.76394271850586, -60.6627311706543, -58.561519622802734, -56.46030807495117, -54.359100341796875, -52.25788879394531, -50.15667724609375, -48.05546569824219, -45.95425796508789, -43.85304641723633, -41.751834869384766, -39.6506233215332, -37.54941177368164, -35.44820022583008, -33.346988677978516, -31.245779037475586, -29.144569396972656, -27.043357849121094, -24.94214630126953, -22.84093475341797, -20.73972511291504, -18.638513565063477, -16.537303924560547, -14.436092376708984, -12.334881782531738, -10.233671188354492, -8.13245964050293, -6.031249046325684, -3.930034637451172, -1.8288238048553467, 0.2723870277404785, 2.373598098754883, 4.474808692932129, 6.576019287109375, 8.677230834960938, 10.778441429138184, 12.87965202331543, 14.980862617492676, 17.082073211669922, 19.183284759521484, 21.284496307373047, 23.385705947875977, 25.48691749572754, 27.58812713623047, 29.68933868408203, 31.790550231933594, 33.891761779785156, 35.99297332763672, 38.094181060791016, 40.19539260864258, 42.29660415649414, 44.3978157043457, 46.4990234375, 48.60023498535156, 50.701446533203125, 52.80265808105469, 54.903865814208984, 57.00507736206055, 59.10628890991211, 61.20750045776367, 63.308712005615234]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 5.0, 5.0, 10.0, 9.0, 2.0, 3.0, 6.0, 5.0, 14.0, 24.0, 44.0, 106.0, 2091.0, 51460616.0, 116.0, 50.0, 22.0, 11.0, 17.0, 7.0, 1.0, 2.0, 3.0, 1.0, 4.0, 1.0, 6.0, 2.0, 4.0, 7.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-9416.0, -9009.373046875, -8602.74609375, -8196.119140625, -7789.4921875, -7382.865234375, -6976.23876953125, -6569.61181640625, -6162.98486328125, -5756.35791015625, -5349.73095703125, -4943.1044921875, -4536.4775390625, -4129.8505859375, -3723.2236328125, -3316.5966796875, -2909.9697265625, -2503.3427734375, -2096.7158203125, -1690.089111328125, -1283.462158203125, -876.835205078125, -470.20849609375, -63.58154296875, 343.04541015625, 749.6723022460938, 1156.2991943359375, 1562.926025390625, 1969.552978515625, 2376.179931640625, 2782.806640625, 3189.43359375, 3596.060546875, 4002.6875, 4409.314453125, 4815.94140625, 5222.568359375, 5629.1953125, 6035.82177734375, 6442.44873046875, 6849.07568359375, 7255.70263671875, 7662.32958984375, 8068.9560546875, 8475.5830078125, 8882.2099609375, 9288.8369140625, 9695.4638671875, 10102.0908203125, 10508.7177734375, 10915.3447265625, 11321.9716796875, 11728.5986328125, 12135.2255859375, 12541.8515625, 12948.478515625, 13355.10546875, 13761.732421875, 14168.359375, 14574.986328125, 14981.61328125, 15388.240234375, 15794.8671875, 16201.494140625, 16608.12109375]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 2.0, 4.0, 4.0, 14.0, 18.0, 39.0, 55.0, 66.0, 107.0, 170.0, 225.0, 357.0, 541.0, 792.0, 1218.0, 2009.0, 3039.0, 4664.0, 7516.0, 11551.0, 18736.0, 30368.0, 49329.0, 80966.0, 136573.0, 235494.0, 436492.0, 3765087.0, 737497.0, 317174.0, 178811.0, 105802.0, 63129.0, 38739.0, 23930.0, 14989.0, 9205.0, 6019.0, 3718.0, 2438.0, 1599.0, 932.0, 700.0, 462.0, 284.0, 185.0, 132.0, 105.0, 55.0, 37.0, 28.0, 12.0, 8.0, 11.0, 2.0, 2.0, 2.0, 2.0], "bins": [-8.6953125, -8.43768310546875, -8.1800537109375, -7.92242431640625, -7.664794921875, -7.40716552734375, -7.1495361328125, -6.89190673828125, -6.63427734375, -6.37664794921875, -6.1190185546875, -5.86138916015625, -5.603759765625, -5.34613037109375, -5.0885009765625, -4.83087158203125, -4.5732421875, -4.31561279296875, -4.0579833984375, -3.80035400390625, -3.542724609375, -3.28509521484375, -3.0274658203125, -2.76983642578125, -2.51220703125, -2.25457763671875, -1.9969482421875, -1.73931884765625, -1.481689453125, -1.22406005859375, -0.9664306640625, -0.70880126953125, -0.451171875, -0.19354248046875, 0.0640869140625, 0.32171630859375, 0.579345703125, 0.83697509765625, 1.0946044921875, 1.35223388671875, 1.60986328125, 1.86749267578125, 2.1251220703125, 2.38275146484375, 2.640380859375, 2.89801025390625, 3.1556396484375, 3.41326904296875, 3.6708984375, 3.92852783203125, 4.1861572265625, 4.44378662109375, 4.701416015625, 4.95904541015625, 5.2166748046875, 5.47430419921875, 5.73193359375, 5.98956298828125, 6.2471923828125, 6.50482177734375, 6.762451171875, 7.02008056640625, 7.2777099609375, 7.53533935546875, 7.79296875]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 9.0, 9.0, 14.0, 8.0, 15.0, 19.0, 16.0, 22.0, 19.0, 30.0, 37.0, 30.0, 36.0, 34.0, 39.0, 34.0, 42.0, 48.0, 173.0, 782.0, 167.0, 48.0, 26.0, 30.0, 32.0, 37.0, 27.0, 25.0, 29.0, 29.0, 20.0, 25.0, 15.0, 18.0, 13.0, 10.0, 13.0, 10.0, 11.0, 3.0, 4.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-26.8125, -25.96240234375, -25.1123046875, -24.26220703125, -23.412109375, -22.56201171875, -21.7119140625, -20.86181640625, -20.01171875, -19.16162109375, -18.3115234375, -17.46142578125, -16.611328125, -15.76123046875, -14.9111328125, -14.06103515625, -13.2109375, -12.36083984375, -11.5107421875, -10.66064453125, -9.810546875, -8.96044921875, -8.1103515625, -7.26025390625, -6.41015625, -5.56005859375, -4.7099609375, -3.85986328125, -3.009765625, -2.15966796875, -1.3095703125, -0.45947265625, 0.390625, 1.24072265625, 2.0908203125, 2.94091796875, 3.791015625, 4.64111328125, 5.4912109375, 6.34130859375, 7.19140625, 8.04150390625, 8.8916015625, 9.74169921875, 10.591796875, 11.44189453125, 12.2919921875, 13.14208984375, 13.9921875, 14.84228515625, 15.6923828125, 16.54248046875, 17.392578125, 18.24267578125, 19.0927734375, 19.94287109375, 20.79296875, 21.64306640625, 22.4931640625, 23.34326171875, 24.193359375, 25.04345703125, 25.8935546875, 26.74365234375, 27.59375]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 8.0, 6.0, 4.0, 15.0, 16.0, 23.0, 35.0, 70.0, 82.0, 128.0, 199.0, 285.0, 402.0, 609.0, 989.0, 1502.0, 2251.0, 3626.0, 5521.0, 9114.0, 14801.0, 24229.0, 41236.0, 72733.0, 137289.0, 285355.0, 892716.0, 3962053.0, 420631.0, 186384.0, 96383.0, 53539.0, 30770.0, 17919.0, 11265.0, 6885.0, 4264.0, 2748.0, 1897.0, 1188.0, 766.0, 495.0, 342.0, 207.0, 140.0, 89.0, 81.0, 49.0, 48.0, 25.0, 14.0, 6.0, 3.0, 4.0, 5.0, 3.0, 0.0, 3.0], "bins": [-14.1328125, -13.7073974609375, -13.281982421875, -12.8565673828125, -12.43115234375, -12.0057373046875, -11.580322265625, -11.1549072265625, -10.7294921875, -10.3040771484375, -9.878662109375, -9.4532470703125, -9.02783203125, -8.6024169921875, -8.177001953125, -7.7515869140625, -7.326171875, -6.9007568359375, -6.475341796875, -6.0499267578125, -5.62451171875, -5.1990966796875, -4.773681640625, -4.3482666015625, -3.9228515625, -3.4974365234375, -3.072021484375, -2.6466064453125, -2.22119140625, -1.7957763671875, -1.370361328125, -0.9449462890625, -0.51953125, -0.0941162109375, 0.331298828125, 0.7567138671875, 1.18212890625, 1.6075439453125, 2.032958984375, 2.4583740234375, 2.8837890625, 3.3092041015625, 3.734619140625, 4.1600341796875, 4.58544921875, 5.0108642578125, 5.436279296875, 5.8616943359375, 6.287109375, 6.7125244140625, 7.137939453125, 7.5633544921875, 7.98876953125, 8.4141845703125, 8.839599609375, 9.2650146484375, 9.6904296875, 10.1158447265625, 10.541259765625, 10.9666748046875, 11.39208984375, 11.8175048828125, 12.242919921875, 12.6683349609375, 13.09375]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 0.0, 3.0, 4.0, 2.0, 4.0, 6.0, 4.0, 8.0, 5.0, 10.0, 13.0, 9.0, 12.0, 22.0, 14.0, 25.0, 32.0, 28.0, 36.0, 39.0, 44.0, 43.0, 85.0, 297.0, 697.0, 138.0, 71.0, 52.0, 49.0, 33.0, 29.0, 42.0, 35.0, 24.0, 24.0, 20.0, 17.0, 16.0, 13.0, 5.0, 8.0, 5.0, 2.0, 7.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.75, -24.028076171875, -23.30615234375, -22.584228515625, -21.8623046875, -21.140380859375, -20.41845703125, -19.696533203125, -18.974609375, -18.252685546875, -17.53076171875, -16.808837890625, -16.0869140625, -15.364990234375, -14.64306640625, -13.921142578125, -13.19921875, -12.477294921875, -11.75537109375, -11.033447265625, -10.3115234375, -9.589599609375, -8.86767578125, -8.145751953125, -7.423828125, -6.701904296875, -5.97998046875, -5.258056640625, -4.5361328125, -3.814208984375, -3.09228515625, -2.370361328125, -1.6484375, -0.926513671875, -0.20458984375, 0.517333984375, 1.2392578125, 1.961181640625, 2.68310546875, 3.405029296875, 4.126953125, 4.848876953125, 5.57080078125, 6.292724609375, 7.0146484375, 7.736572265625, 8.45849609375, 9.180419921875, 9.90234375, 10.624267578125, 11.34619140625, 12.068115234375, 12.7900390625, 13.511962890625, 14.23388671875, 14.955810546875, 15.677734375, 16.399658203125, 17.12158203125, 17.843505859375, 18.5654296875, 19.287353515625, 20.00927734375, 20.731201171875, 21.453125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [6.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 6.0, 10.0, 8.0, 2.0, 15.0, 19.0, 41.0, 25.0, 59.0, 71.0, 89.0, 172.0, 248.0, 359.0, 439.0, 793.0, 1410.0, 2301.0, 4399.0, 9550.0, 28217.0, 162152.0, 5983911.0, 64973.0, 17090.0, 6908.0, 3226.0, 1832.0, 1059.0, 640.0, 434.0, 319.0, 249.0, 142.0, 69.0, 35.0, 47.0, 37.0, 26.0, 25.0, 17.0, 6.0, 2.0, 0.0, 0.0, 3.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-60.125, -58.1396484375, -56.154296875, -54.1689453125, -52.18359375, -50.1982421875, -48.212890625, -46.2275390625, -44.2421875, -42.2568359375, -40.271484375, -38.2861328125, -36.30078125, -34.3154296875, -32.330078125, -30.3447265625, -28.359375, -26.3740234375, -24.388671875, -22.4033203125, -20.41796875, -18.4326171875, -16.447265625, -14.4619140625, -12.4765625, -10.4912109375, -8.505859375, -6.5205078125, -4.53515625, -2.5498046875, -0.564453125, 1.4208984375, 3.40625, 5.3916015625, 7.376953125, 9.3623046875, 11.34765625, 13.3330078125, 15.318359375, 17.3037109375, 19.2890625, 21.2744140625, 23.259765625, 25.2451171875, 27.23046875, 29.2158203125, 31.201171875, 33.1865234375, 35.171875, 37.1572265625, 39.142578125, 41.1279296875, 43.11328125, 45.0986328125, 47.083984375, 49.0693359375, 51.0546875, 53.0400390625, 55.025390625, 57.0107421875, 58.99609375, 60.9814453125, 62.966796875, 64.9521484375, 66.9375]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 3.0, 7.0, 7.0, 18.0, 23.0, 17.0, 21.0, 26.0, 34.0, 38.0, 47.0, 68.0, 68.0, 133.0, 357.0, 567.0, 153.0, 90.0, 68.0, 51.0, 36.0, 39.0, 42.0, 24.0, 26.0, 17.0, 13.0, 9.0, 6.0, 8.0, 3.0, 5.0, 3.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.328125, -20.59814453125, -19.8681640625, -19.13818359375, -18.408203125, -17.67822265625, -16.9482421875, -16.21826171875, -15.48828125, -14.75830078125, -14.0283203125, -13.29833984375, -12.568359375, -11.83837890625, -11.1083984375, -10.37841796875, -9.6484375, -8.91845703125, -8.1884765625, -7.45849609375, -6.728515625, -5.99853515625, -5.2685546875, -4.53857421875, -3.80859375, -3.07861328125, -2.3486328125, -1.61865234375, -0.888671875, -0.15869140625, 0.5712890625, 1.30126953125, 2.03125, 2.76123046875, 3.4912109375, 4.22119140625, 4.951171875, 5.68115234375, 6.4111328125, 7.14111328125, 7.87109375, 8.60107421875, 9.3310546875, 10.06103515625, 10.791015625, 11.52099609375, 12.2509765625, 12.98095703125, 13.7109375, 14.44091796875, 15.1708984375, 15.90087890625, 16.630859375, 17.36083984375, 18.0908203125, 18.82080078125, 19.55078125, 20.28076171875, 21.0107421875, 21.74072265625, 22.470703125, 23.20068359375, 23.9306640625, 24.66064453125, 25.390625]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 17.0, 24.0, 50.0, 136.0, 490.0, 190.0, 56.0, 21.0, 11.0, 6.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-86.84056854248047, -83.31663513183594, -79.79270935058594, -76.26878356933594, -72.7448501586914, -69.22091674804688, -65.69699096679688, -62.17306137084961, -58.649131774902344, -55.12520217895508, -51.60127258300781, -48.07734298706055, -44.55341339111328, -41.029483795166016, -37.50555419921875, -33.981624603271484, -30.45769500732422, -26.933765411376953, -23.409835815429688, -19.885906219482422, -16.361976623535156, -12.83804702758789, -9.314117431640625, -5.790187835693359, -2.2662582397460938, 1.2576713562011719, 4.7816009521484375, 8.305530548095703, 11.829460144042969, 15.353389739990234, 18.8773193359375, 22.401248931884766, 25.9251708984375, 29.449100494384766, 32.97303009033203, 36.4969596862793, 40.02088928222656, 43.54481887817383, 47.068748474121094, 50.59267807006836, 54.116607666015625, 57.64053726196289, 61.164466857910156, 64.68840026855469, 68.21232604980469, 71.73625183105469, 75.26018524169922, 78.78411865234375, 82.30804443359375, 85.83197021484375, 89.35590362548828, 92.87983703613281, 96.40376281738281, 99.92768859863281, 103.45162200927734, 106.97555541992188, 110.49948120117188, 114.02340698242188, 117.5473403930664, 121.07127380371094, 124.59519958496094, 128.11912536621094, 131.64306640625, 135.1669921875, 138.69091796875]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 7.0, 9.0, 13.0, 25.0, 31.0, 45.0, 62.0, 65.0, 81.0, 91.0, 99.0, 94.0, 74.0, 75.0, 72.0, 51.0, 48.0, 30.0, 16.0, 13.0, 6.0, 5.0, 1.0, 2.0], "bins": [-86.64862060546875, -84.94123840332031, -83.2338638305664, -81.52648162841797, -79.81910705566406, -78.11172485351562, -76.40434265136719, -74.69696807861328, -72.98958587646484, -71.2822036743164, -69.5748291015625, -67.86744689941406, -66.16007232666016, -64.45269012451172, -62.74531173706055, -61.037933349609375, -59.3305549621582, -57.62317657470703, -55.91579818725586, -54.20841979980469, -52.50103759765625, -50.79365921020508, -49.086280822753906, -47.378902435302734, -45.67152404785156, -43.96414566040039, -42.25676727294922, -40.54938507080078, -38.84200668334961, -37.13462829589844, -35.427249908447266, -33.719871520996094, -32.012489318847656, -30.305110931396484, -28.59773063659668, -26.890352249145508, -25.182971954345703, -23.47559356689453, -21.76821517944336, -20.060836791992188, -18.35345458984375, -16.646076202392578, -14.938695907592773, -13.231317520141602, -11.523938179016113, -9.816558837890625, -8.109180450439453, -6.401801109313965, -4.694421768188477, -2.9870426654815674, -1.2796635627746582, 0.4277153015136719, 2.13509464263916, 3.8424739837646484, 5.54985237121582, 7.257231712341309, 8.964611053466797, 10.671990394592285, 12.379369735717773, 14.086748123168945, 15.794127464294434, 17.501506805419922, 19.208885192871094, 20.916263580322266, 22.62364387512207]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 4.0, 6.0, 4.0, 2.0, 8.0, 8.0, 10.0, 8.0, 9.0, 12.0, 26.0, 26.0, 51.0, 67.0, 90.0, 133.0, 284.0, 451.0, 836.0, 1662.0, 3409.0, 7225.0, 17039.0, 48186.0, 236670.0, 3775263.0, 64561.0, 21015.0, 8858.0, 4071.0, 2075.0, 1045.0, 558.0, 286.0, 157.0, 83.0, 37.0, 20.0, 11.0, 8.0, 3.0, 2.0], "bins": [-0.2493896484375, -0.24410247802734375, -0.2388153076171875, -0.23352813720703125, -0.228240966796875, -0.22295379638671875, -0.2176666259765625, -0.21237945556640625, -0.20709228515625, -0.20180511474609375, -0.1965179443359375, -0.19123077392578125, -0.185943603515625, -0.18065643310546875, -0.1753692626953125, -0.17008209228515625, -0.164794921875, -0.15950775146484375, -0.1542205810546875, -0.14893341064453125, -0.143646240234375, -0.13835906982421875, -0.1330718994140625, -0.12778472900390625, -0.12249755859375, -0.11721038818359375, -0.1119232177734375, -0.10663604736328125, -0.101348876953125, -0.09606170654296875, -0.0907745361328125, -0.08548736572265625, -0.0802001953125, -0.07491302490234375, -0.0696258544921875, -0.06433868408203125, -0.059051513671875, -0.05376434326171875, -0.0484771728515625, -0.04319000244140625, -0.03790283203125, -0.03261566162109375, -0.0273284912109375, -0.02204132080078125, -0.016754150390625, -0.01146697998046875, -0.0061798095703125, -0.00089263916015625, 0.00439453125, 0.00968170166015625, 0.0149688720703125, 0.02025604248046875, 0.025543212890625, 0.03083038330078125, 0.0361175537109375, 0.04140472412109375, 0.04669189453125, 0.05197906494140625, 0.0572662353515625, 0.06255340576171875, 0.067840576171875, 0.07312774658203125, 0.0784149169921875, 0.08370208740234375, 0.0889892578125]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 8.0, 4.0, 5.0, 10.0, 6.0, 6.0, 14.0, 7.0, 11.0, 15.0, 22.0, 262.0, 541.0, 15.0, 14.0, 10.0, 7.0, 10.0, 7.0, 7.0, 6.0, 7.0, 1.0, 1.0, 3.0, 4.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.026611328125, -0.025960803031921387, -0.025310277938842773, -0.02465975284576416, -0.024009227752685547, -0.023358702659606934, -0.02270817756652832, -0.022057652473449707, -0.021407127380371094, -0.02075660228729248, -0.020106077194213867, -0.019455552101135254, -0.01880502700805664, -0.018154501914978027, -0.017503976821899414, -0.0168534517288208, -0.016202926635742188, -0.015552401542663574, -0.014901876449584961, -0.014251351356506348, -0.013600826263427734, -0.012950301170349121, -0.012299776077270508, -0.011649250984191895, -0.010998725891113281, -0.010348200798034668, -0.009697675704956055, -0.009047150611877441, -0.008396625518798828, -0.007746100425720215, -0.0070955753326416016, -0.006445050239562988, -0.005794525146484375, -0.005144000053405762, -0.0044934749603271484, -0.003842949867248535, -0.003192424774169922, -0.0025418996810913086, -0.0018913745880126953, -0.001240849494934082, -0.0005903244018554688, 6.020069122314453e-05, 0.0007107257843017578, 0.001361250877380371, 0.0020117759704589844, 0.0026623010635375977, 0.003312826156616211, 0.003963351249694824, 0.0046138763427734375, 0.005264401435852051, 0.005914926528930664, 0.006565451622009277, 0.007215976715087891, 0.007866501808166504, 0.008517026901245117, 0.00916755199432373, 0.009818077087402344, 0.010468602180480957, 0.01111912727355957, 0.011769652366638184, 0.012420177459716797, 0.01307070255279541, 0.013721227645874023, 0.014371752738952637, 0.01502227783203125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 6.0, 6.0, 9.0, 9.0, 14.0, 18.0, 30.0, 52.0, 65.0, 101.0, 137.0, 237.0, 459.0, 916.0, 2090.0, 6167.0, 22902.0, 111523.0, 845616.0, 2820121.0, 313083.0, 52060.0, 12178.0, 3617.0, 1346.0, 655.0, 341.0, 179.0, 115.0, 77.0, 46.0, 40.0, 27.0, 16.0, 13.0, 9.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1529541015625, -0.14806365966796875, -0.1431732177734375, -0.13828277587890625, -0.133392333984375, -0.12850189208984375, -0.1236114501953125, -0.11872100830078125, -0.11383056640625, -0.10894012451171875, -0.1040496826171875, -0.09915924072265625, -0.094268798828125, -0.08937835693359375, -0.0844879150390625, -0.07959747314453125, -0.07470703125, -0.06981658935546875, -0.0649261474609375, -0.06003570556640625, -0.055145263671875, -0.05025482177734375, -0.0453643798828125, -0.04047393798828125, -0.03558349609375, -0.03069305419921875, -0.0258026123046875, -0.02091217041015625, -0.016021728515625, -0.01113128662109375, -0.0062408447265625, -0.00135040283203125, 0.0035400390625, 0.00843048095703125, 0.0133209228515625, 0.01821136474609375, 0.023101806640625, 0.02799224853515625, 0.0328826904296875, 0.03777313232421875, 0.04266357421875, 0.04755401611328125, 0.0524444580078125, 0.05733489990234375, 0.062225341796875, 0.06711578369140625, 0.0720062255859375, 0.07689666748046875, 0.081787109375, 0.08667755126953125, 0.0915679931640625, 0.09645843505859375, 0.101348876953125, 0.10623931884765625, 0.1111297607421875, 0.11602020263671875, 0.12091064453125, 0.12580108642578125, 0.1306915283203125, 0.13558197021484375, 0.140472412109375, 0.14536285400390625, 0.1502532958984375, 0.15514373779296875, 0.1600341796875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 8.0, 6.0, 3.0, 7.0, 1.0, 10.0, 6.0, 15.0, 15.0, 23.0, 25.0, 37.0, 33.0, 55.0, 60.0, 72.0, 84.0, 113.0, 109.0, 159.0, 179.0, 218.0, 287.0, 362.0, 499.0, 304.0, 289.0, 194.0, 167.0, 108.0, 104.0, 99.0, 81.0, 68.0, 56.0, 48.0, 36.0, 35.0, 14.0, 22.0, 21.0, 14.0, 11.0, 6.0, 3.0, 4.0, 2.0, 5.0, 0.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0224151611328125, -0.02170085906982422, -0.020986557006835938, -0.020272254943847656, -0.019557952880859375, -0.018843650817871094, -0.018129348754882812, -0.01741504669189453, -0.01670074462890625, -0.01598644256591797, -0.015272140502929688, -0.014557838439941406, -0.013843536376953125, -0.013129234313964844, -0.012414932250976562, -0.011700630187988281, -0.010986328125, -0.010272026062011719, -0.009557723999023438, -0.008843421936035156, -0.008129119873046875, -0.007414817810058594, -0.0067005157470703125, -0.005986213684082031, -0.00527191162109375, -0.004557609558105469, -0.0038433074951171875, -0.0031290054321289062, -0.002414703369140625, -0.0017004013061523438, -0.0009860992431640625, -0.00027179718017578125, 0.0004425048828125, 0.0011568069458007812, 0.0018711090087890625, 0.0025854110717773438, 0.003299713134765625, 0.004014015197753906, 0.0047283172607421875, 0.005442619323730469, 0.00615692138671875, 0.006871223449707031, 0.0075855255126953125, 0.008299827575683594, 0.009014129638671875, 0.009728431701660156, 0.010442733764648438, 0.011157035827636719, 0.011871337890625, 0.012585639953613281, 0.013299942016601562, 0.014014244079589844, 0.014728546142578125, 0.015442848205566406, 0.016157150268554688, 0.01687145233154297, 0.01758575439453125, 0.01830005645751953, 0.019014358520507812, 0.019728660583496094, 0.020442962646484375, 0.021157264709472656, 0.021871566772460938, 0.02258586883544922, 0.0233001708984375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 6.0, 6.0, 10.0, 25.0, 28.0, 44.0, 69.0, 115.0, 161.0, 185.0, 111.0, 105.0, 59.0, 44.0, 16.0, 12.0, 4.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.44241875410079956, -0.4314727485179901, -0.42052674293518066, -0.4095807373523712, -0.39863473176956177, -0.3876887559890747, -0.37674272060394287, -0.3657967448234558, -0.35485073924064636, -0.3439047336578369, -0.33295872807502747, -0.322012722492218, -0.31106671690940857, -0.3001207113265991, -0.28917473554611206, -0.2782287299633026, -0.26728272438049316, -0.2563367187976837, -0.24539071321487427, -0.23444470763206482, -0.22349871695041656, -0.21255271136760712, -0.20160670578479767, -0.19066071510314941, -0.17971467971801758, -0.16876867413520813, -0.15782266855239868, -0.14687666296958923, -0.13593067228794098, -0.12498466670513153, -0.11403866112232208, -0.10309266299009323, -0.09214666485786438, -0.08120065927505493, -0.07025466114282608, -0.05930865556001663, -0.04836265370249748, -0.03741665184497833, -0.026470646262168884, -0.015524648129940033, -0.004578642547130585, 0.00636736024171114, 0.017313363030552864, 0.028259366750717163, 0.03920536860823631, 0.05015137046575546, 0.06109737604856491, 0.07204337418079376, 0.08298937976360321, 0.09393538534641266, 0.10488138347864151, 0.11582738906145096, 0.1267733871936798, 0.13771939277648926, 0.1486653983592987, 0.15961140394210815, 0.1705574095249176, 0.18150341510772705, 0.1924494206905365, 0.20339542627334595, 0.2143414169549942, 0.22528742253780365, 0.2362334281206131, 0.24717941880226135, 0.2581254243850708]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 3.0, 3.0, 8.0, 9.0, 9.0, 11.0, 11.0, 13.0, 17.0, 31.0, 31.0, 37.0, 34.0, 38.0, 51.0, 43.0, 40.0, 65.0, 46.0, 46.0, 54.0, 46.0, 52.0, 31.0, 37.0, 35.0, 30.0, 28.0, 22.0, 28.0, 13.0, 15.0, 19.0, 14.0, 5.0, 7.0, 5.0, 3.0, 4.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12418943643569946, -0.11976609379053116, -0.11534275859594345, -0.11091941595077515, -0.10649608075618744, -0.10207273811101913, -0.09764939546585083, -0.09322606027126312, -0.08880271762609482, -0.08437937498092651, -0.0799560397863388, -0.0755326971411705, -0.0711093544960022, -0.06668601930141449, -0.062262676656246185, -0.05783933773636818, -0.05341599881649017, -0.04899265989661217, -0.04456932097673416, -0.04014597833156586, -0.03572263941168785, -0.031299300491809845, -0.02687595970928669, -0.022452618926763535, -0.01802928000688553, -0.013605940155684948, -0.009182600304484367, -0.004759260453283787, -0.0003359206020832062, 0.0040874183177948, 0.008510759100317955, 0.01293409988284111, 0.017357438802719116, 0.021780777722597122, 0.026204118505120277, 0.030627459287643433, 0.03505079820752144, 0.039474137127399445, 0.04389747977256775, 0.048320818692445755, 0.05274415761232376, 0.05716749653220177, 0.06159083545207977, 0.06601417809724808, 0.07043752074241638, 0.07486085593700409, 0.0792841985821724, 0.0837075412273407, 0.0881308764219284, 0.09255421906709671, 0.09697755426168442, 0.10140089690685272, 0.10582423210144043, 0.11024757474660873, 0.11467091739177704, 0.11909425258636475, 0.12351759523153305, 0.12794093787670135, 0.13236427307128906, 0.13678760826587677, 0.14121095836162567, 0.14563429355621338, 0.1500576287508011, 0.15448097884655, 0.1589043140411377]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 0.0, 7.0, 13.0, 9.0, 20.0, 28.0, 31.0, 48.0, 67.0, 95.0, 202.0, 365.0, 727.0, 1535.0, 3980.0, 11664.0, 61092.0, 914371.0, 39034.0, 9368.0, 3246.0, 1286.0, 586.0, 305.0, 162.0, 104.0, 59.0, 45.0, 29.0, 19.0, 12.0, 13.0, 8.0, 4.0, 1.0, 3.0, 4.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.257568359375, -0.2496185302734375, -0.241668701171875, -0.2337188720703125, -0.22576904296875, -0.2178192138671875, -0.209869384765625, -0.2019195556640625, -0.1939697265625, -0.1860198974609375, -0.178070068359375, -0.1701202392578125, -0.16217041015625, -0.1542205810546875, -0.146270751953125, -0.1383209228515625, -0.13037109375, -0.1224212646484375, -0.114471435546875, -0.1065216064453125, -0.09857177734375, -0.0906219482421875, -0.082672119140625, -0.0747222900390625, -0.0667724609375, -0.0588226318359375, -0.050872802734375, -0.0429229736328125, -0.03497314453125, -0.0270233154296875, -0.019073486328125, -0.0111236572265625, -0.003173828125, 0.0047760009765625, 0.012725830078125, 0.0206756591796875, 0.02862548828125, 0.0365753173828125, 0.044525146484375, 0.0524749755859375, 0.0604248046875, 0.0683746337890625, 0.076324462890625, 0.0842742919921875, 0.09222412109375, 0.1001739501953125, 0.108123779296875, 0.1160736083984375, 0.1240234375, 0.1319732666015625, 0.139923095703125, 0.1478729248046875, 0.15582275390625, 0.1637725830078125, 0.171722412109375, 0.1796722412109375, 0.1876220703125, 0.1955718994140625, 0.203521728515625, 0.2114715576171875, 0.21942138671875, 0.2273712158203125, 0.235321044921875, 0.2432708740234375, 0.251220703125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 8.0, 4.0, 6.0, 5.0, 7.0, 11.0, 11.0, 16.0, 19.0, 58.0, 160.0, 320.0, 200.0, 83.0, 34.0, 8.0, 13.0, 6.0, 13.0, 3.0, 7.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.0243072509765625, -0.023676156997680664, -0.023045063018798828, -0.022413969039916992, -0.021782875061035156, -0.02115178108215332, -0.020520687103271484, -0.01988959312438965, -0.019258499145507812, -0.018627405166625977, -0.01799631118774414, -0.017365217208862305, -0.01673412322998047, -0.016103029251098633, -0.015471935272216797, -0.014840841293334961, -0.014209747314453125, -0.013578653335571289, -0.012947559356689453, -0.012316465377807617, -0.011685371398925781, -0.011054277420043945, -0.01042318344116211, -0.009792089462280273, -0.009160995483398438, -0.008529901504516602, -0.007898807525634766, -0.00726771354675293, -0.006636619567871094, -0.006005525588989258, -0.005374431610107422, -0.004743337631225586, -0.00411224365234375, -0.003481149673461914, -0.002850055694580078, -0.002218961715698242, -0.0015878677368164062, -0.0009567737579345703, -0.0003256797790527344, 0.00030541419982910156, 0.0009365081787109375, 0.0015676021575927734, 0.0021986961364746094, 0.0028297901153564453, 0.0034608840942382812, 0.004091978073120117, 0.004723072052001953, 0.005354166030883789, 0.005985260009765625, 0.006616353988647461, 0.007247447967529297, 0.007878541946411133, 0.008509635925292969, 0.009140729904174805, 0.00977182388305664, 0.010402917861938477, 0.011034011840820312, 0.011665105819702148, 0.012296199798583984, 0.01292729377746582, 0.013558387756347656, 0.014189481735229492, 0.014820575714111328, 0.015451669692993164, 0.016082763671875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 2.0, 7.0, 4.0, 9.0, 6.0, 13.0, 14.0, 27.0, 34.0, 55.0, 70.0, 102.0, 138.0, 233.0, 479.0, 793.0, 1443.0, 3257.0, 10335.0, 79332.0, 837200.0, 96593.0, 11329.0, 3538.0, 1527.0, 794.0, 434.0, 242.0, 189.0, 119.0, 83.0, 49.0, 35.0, 23.0, 14.0, 11.0, 9.0, 4.0, 7.0, 0.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.23779296875, -0.229156494140625, -0.22052001953125, -0.211883544921875, -0.2032470703125, -0.194610595703125, -0.18597412109375, -0.177337646484375, -0.168701171875, -0.160064697265625, -0.15142822265625, -0.142791748046875, -0.1341552734375, -0.125518798828125, -0.11688232421875, -0.108245849609375, -0.099609375, -0.090972900390625, -0.08233642578125, -0.073699951171875, -0.0650634765625, -0.056427001953125, -0.04779052734375, -0.039154052734375, -0.030517578125, -0.021881103515625, -0.01324462890625, -0.004608154296875, 0.0040283203125, 0.012664794921875, 0.02130126953125, 0.029937744140625, 0.03857421875, 0.047210693359375, 0.05584716796875, 0.064483642578125, 0.0731201171875, 0.081756591796875, 0.09039306640625, 0.099029541015625, 0.107666015625, 0.116302490234375, 0.12493896484375, 0.133575439453125, 0.1422119140625, 0.150848388671875, 0.15948486328125, 0.168121337890625, 0.1767578125, 0.185394287109375, 0.19403076171875, 0.202667236328125, 0.2113037109375, 0.219940185546875, 0.22857666015625, 0.237213134765625, 0.245849609375, 0.254486083984375, 0.26312255859375, 0.271759033203125, 0.2803955078125, 0.289031982421875, 0.29766845703125, 0.306304931640625, 0.31494140625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 5.0, 2.0, 2.0, 6.0, 7.0, 7.0, 9.0, 5.0, 14.0, 14.0, 14.0, 22.0, 16.0, 17.0, 26.0, 34.0, 23.0, 31.0, 26.0, 32.0, 30.0, 41.0, 34.0, 42.0, 29.0, 44.0, 48.0, 45.0, 45.0, 22.0, 35.0, 31.0, 40.0, 29.0, 22.0, 18.0, 26.0, 18.0, 14.0, 18.0, 14.0, 12.0, 11.0, 6.0, 6.0, 8.0, 2.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0640869140625, -0.062012672424316406, -0.05993843078613281, -0.05786418914794922, -0.055789947509765625, -0.05371570587158203, -0.05164146423339844, -0.049567222595214844, -0.04749298095703125, -0.045418739318847656, -0.04334449768066406, -0.04127025604248047, -0.039196014404296875, -0.03712177276611328, -0.03504753112792969, -0.032973289489746094, -0.0308990478515625, -0.028824806213378906, -0.026750564575195312, -0.02467632293701172, -0.022602081298828125, -0.02052783966064453, -0.018453598022460938, -0.016379356384277344, -0.01430511474609375, -0.012230873107910156, -0.010156631469726562, -0.008082389831542969, -0.006008148193359375, -0.003933906555175781, -0.0018596649169921875, 0.00021457672119140625, 0.002288818359375, 0.004363059997558594, 0.0064373016357421875, 0.008511543273925781, 0.010585784912109375, 0.012660026550292969, 0.014734268188476562, 0.016808509826660156, 0.01888275146484375, 0.020956993103027344, 0.023031234741210938, 0.02510547637939453, 0.027179718017578125, 0.02925395965576172, 0.03132820129394531, 0.033402442932128906, 0.0354766845703125, 0.037550926208496094, 0.03962516784667969, 0.04169940948486328, 0.043773651123046875, 0.04584789276123047, 0.04792213439941406, 0.049996376037597656, 0.05207061767578125, 0.054144859313964844, 0.05621910095214844, 0.05829334259033203, 0.060367584228515625, 0.06244182586669922, 0.06451606750488281, 0.0665903091430664, 0.06866455078125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 5.0, 6.0, 11.0, 13.0, 30.0, 46.0, 68.0, 130.0, 291.0, 567.0, 1780.0, 8300.0, 198659.0, 819255.0, 15406.0, 2537.0, 769.0, 305.0, 145.0, 78.0, 63.0, 32.0, 19.0, 11.0, 7.0, 11.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3955078125, -0.3861236572265625, -0.376739501953125, -0.3673553466796875, -0.35797119140625, -0.3485870361328125, -0.339202880859375, -0.3298187255859375, -0.3204345703125, -0.3110504150390625, -0.301666259765625, -0.2922821044921875, -0.28289794921875, -0.2735137939453125, -0.264129638671875, -0.2547454833984375, -0.245361328125, -0.2359771728515625, -0.226593017578125, -0.2172088623046875, -0.20782470703125, -0.1984405517578125, -0.189056396484375, -0.1796722412109375, -0.1702880859375, -0.1609039306640625, -0.151519775390625, -0.1421356201171875, -0.13275146484375, -0.1233673095703125, -0.113983154296875, -0.1045989990234375, -0.09521484375, -0.0858306884765625, -0.076446533203125, -0.0670623779296875, -0.05767822265625, -0.0482940673828125, -0.038909912109375, -0.0295257568359375, -0.0201416015625, -0.0107574462890625, -0.001373291015625, 0.0080108642578125, 0.01739501953125, 0.0267791748046875, 0.036163330078125, 0.0455474853515625, 0.054931640625, 0.0643157958984375, 0.073699951171875, 0.0830841064453125, 0.09246826171875, 0.1018524169921875, 0.111236572265625, 0.1206207275390625, 0.1300048828125, 0.1393890380859375, 0.148773193359375, 0.1581573486328125, 0.16754150390625, 0.1769256591796875, 0.186309814453125, 0.1956939697265625, 0.205078125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 6.0, 3.0, 1.0, 8.0, 20.0, 11.0, 16.0, 16.0, 30.0, 40.0, 56.0, 78.0, 80.0, 120.0, 75.0, 103.0, 68.0, 74.0, 55.0, 41.0, 30.0, 24.0, 10.0, 14.0, 10.0, 9.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.4662742614746094e-05, -1.4225021004676819e-05, -1.3787299394607544e-05, -1.3349577784538269e-05, -1.2911856174468994e-05, -1.247413456439972e-05, -1.2036412954330444e-05, -1.159869134426117e-05, -1.1160969734191895e-05, -1.072324812412262e-05, -1.0285526514053345e-05, -9.84780490398407e-06, -9.410083293914795e-06, -8.97236168384552e-06, -8.534640073776245e-06, -8.09691846370697e-06, -7.659196853637695e-06, -7.22147524356842e-06, -6.7837536334991455e-06, -6.346032023429871e-06, -5.908310413360596e-06, -5.470588803291321e-06, -5.032867193222046e-06, -4.595145583152771e-06, -4.157423973083496e-06, -3.719702363014221e-06, -3.2819807529449463e-06, -2.8442591428756714e-06, -2.4065375328063965e-06, -1.9688159227371216e-06, -1.5310943126678467e-06, -1.0933727025985718e-06, -6.556510925292969e-07, -2.1792948246002197e-07, 2.1979212760925293e-07, 6.575137376785278e-07, 1.0952353477478027e-06, 1.5329569578170776e-06, 1.9706785678863525e-06, 2.4084001779556274e-06, 2.8461217880249023e-06, 3.2838433980941772e-06, 3.721565008163452e-06, 4.159286618232727e-06, 4.597008228302002e-06, 5.034729838371277e-06, 5.472451448440552e-06, 5.910173058509827e-06, 6.3478946685791016e-06, 6.7856162786483765e-06, 7.223337888717651e-06, 7.661059498786926e-06, 8.098781108856201e-06, 8.536502718925476e-06, 8.974224328994751e-06, 9.411945939064026e-06, 9.8496675491333e-06, 1.0287389159202576e-05, 1.072511076927185e-05, 1.1162832379341125e-05, 1.16005539894104e-05, 1.2038275599479675e-05, 1.247599720954895e-05, 1.2913718819618225e-05, 1.33514404296875e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 4.0, 5.0, 9.0, 7.0, 8.0, 8.0, 23.0, 22.0, 59.0, 77.0, 156.0, 234.0, 427.0, 830.0, 1797.0, 5329.0, 32105.0, 819716.0, 169549.0, 12343.0, 3216.0, 1277.0, 554.0, 330.0, 164.0, 99.0, 61.0, 42.0, 47.0, 20.0, 19.0, 6.0, 4.0, 5.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.24365234375, -0.23455047607421875, -0.2254486083984375, -0.21634674072265625, -0.207244873046875, -0.19814300537109375, -0.1890411376953125, -0.17993927001953125, -0.17083740234375, -0.16173553466796875, -0.1526336669921875, -0.14353179931640625, -0.134429931640625, -0.12532806396484375, -0.1162261962890625, -0.10712432861328125, -0.0980224609375, -0.08892059326171875, -0.0798187255859375, -0.07071685791015625, -0.061614990234375, -0.05251312255859375, -0.0434112548828125, -0.03430938720703125, -0.02520751953125, -0.01610565185546875, -0.0070037841796875, 0.00209808349609375, 0.011199951171875, 0.02030181884765625, 0.0294036865234375, 0.03850555419921875, 0.047607421875, 0.05670928955078125, 0.0658111572265625, 0.07491302490234375, 0.084014892578125, 0.09311676025390625, 0.1022186279296875, 0.11132049560546875, 0.12042236328125, 0.12952423095703125, 0.1386260986328125, 0.14772796630859375, 0.156829833984375, 0.16593170166015625, 0.1750335693359375, 0.18413543701171875, 0.1932373046875, 0.20233917236328125, 0.2114410400390625, 0.22054290771484375, 0.229644775390625, 0.23874664306640625, 0.2478485107421875, 0.25695037841796875, 0.26605224609375, 0.27515411376953125, 0.2842559814453125, 0.29335784912109375, 0.302459716796875, 0.31156158447265625, 0.3206634521484375, 0.32976531982421875, 0.3388671875]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 4.0, 3.0, 2.0, 11.0, 8.0, 9.0, 18.0, 24.0, 27.0, 50.0, 73.0, 96.0, 124.0, 162.0, 111.0, 67.0, 45.0, 37.0, 20.0, 24.0, 21.0, 17.0, 10.0, 5.0, 8.0, 9.0, 4.0, 7.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1119384765625, -0.1082305908203125, -0.104522705078125, -0.1008148193359375, -0.09710693359375, -0.0933990478515625, -0.089691162109375, -0.0859832763671875, -0.082275390625, -0.0785675048828125, -0.074859619140625, -0.0711517333984375, -0.06744384765625, -0.0637359619140625, -0.060028076171875, -0.0563201904296875, -0.0526123046875, -0.0489044189453125, -0.045196533203125, -0.0414886474609375, -0.03778076171875, -0.0340728759765625, -0.030364990234375, -0.0266571044921875, -0.02294921875, -0.0192413330078125, -0.015533447265625, -0.0118255615234375, -0.00811767578125, -0.0044097900390625, -0.000701904296875, 0.0030059814453125, 0.0067138671875, 0.0104217529296875, 0.014129638671875, 0.0178375244140625, 0.02154541015625, 0.0252532958984375, 0.028961181640625, 0.0326690673828125, 0.036376953125, 0.0400848388671875, 0.043792724609375, 0.0475006103515625, 0.05120849609375, 0.0549163818359375, 0.058624267578125, 0.0623321533203125, 0.0660400390625, 0.0697479248046875, 0.073455810546875, 0.0771636962890625, 0.08087158203125, 0.0845794677734375, 0.088287353515625, 0.0919952392578125, 0.095703125, 0.0994110107421875, 0.103118896484375, 0.1068267822265625, 0.11053466796875, 0.1142425537109375, 0.117950439453125, 0.1216583251953125, 0.1253662109375]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 4.0, 8.0, 3.0, 18.0, 38.0, 175.0, 478.0, 172.0, 48.0, 19.0, 16.0, 13.0, 7.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4274253845214844, -3.3486196994781494, -3.2698142528533936, -3.1910085678100586, -3.1122031211853027, -3.0333974361419678, -2.954591751098633, -2.875786304473877, -2.796980619430542, -2.718174934387207, -2.639369487762451, -2.560563802719116, -2.4817583560943604, -2.4029526710510254, -2.3241472244262695, -2.2453415393829346, -2.1665358543395996, -2.0877301692962646, -2.008924722671509, -1.9301190376281738, -1.8513134717941284, -1.772507905960083, -1.6937023401260376, -1.6148967742919922, -1.5360913276672363, -1.457285761833191, -1.3784801959991455, -1.2996745109558105, -1.2208689451217651, -1.1420633792877197, -1.0632578134536743, -0.9844521880149841, -0.9056466817855835, -0.8268411159515381, -0.7480354905128479, -0.6692299246788025, -0.5904242992401123, -0.5116187334060669, -0.4328131675720215, -0.3540075421333313, -0.2752019762992859, -0.1963963806629181, -0.11759079992771149, -0.03878521919250488, 0.040020376443862915, 0.11882597208023071, 0.19763153791427612, 0.2764371633529663, 0.3552427291870117, 0.4340483248233795, 0.5128539204597473, 0.5916594862937927, 0.6704651117324829, 0.7492706775665283, 0.8280762434005737, 0.9068818688392639, 0.9856874346733093, 1.0644930601119995, 1.143298625946045, 1.2221041917800903, 1.3009097576141357, 1.3797154426574707, 1.4585208892822266, 1.5373265743255615, 1.616132140159607]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 4.0, 2.0, 4.0, 7.0, 7.0, 10.0, 15.0, 12.0, 31.0, 59.0, 78.0, 133.0, 137.0, 127.0, 103.0, 67.0, 59.0, 35.0, 23.0, 18.0, 12.0, 8.0, 4.0, 4.0, 4.0, 6.0, 6.0, 5.0, 4.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.4160118103027344, -1.3756730556488037, -1.335334300994873, -1.2949955463409424, -1.2546567916870117, -1.214318037033081, -1.1739792823791504, -1.1336405277252197, -1.093301773071289, -1.0529630184173584, -1.0126242637634277, -0.9722855091094971, -0.9319467544555664, -0.8916079998016357, -0.8512692451477051, -0.8109304904937744, -0.7705917358398438, -0.7302529811859131, -0.6899142265319824, -0.6495754718780518, -0.6092367172241211, -0.5688979625701904, -0.5285592079162598, -0.4882204532623291, -0.44788169860839844, -0.4075429439544678, -0.3672041893005371, -0.32686543464660645, -0.2865266799926758, -0.24618792533874512, -0.20584917068481445, -0.1655104160308838, -0.12517166137695312, -0.08483290672302246, -0.0444941520690918, -0.004155397415161133, 0.03618335723876953, 0.0765221118927002, 0.11686086654663086, 0.15719962120056152, 0.1975383758544922, 0.23787713050842285, 0.2782158851623535, 0.3185546398162842, 0.35889339447021484, 0.3992321491241455, 0.43957090377807617, 0.47990965843200684, 0.5202484130859375, 0.5605871677398682, 0.6009259223937988, 0.6412646770477295, 0.6816034317016602, 0.7219421863555908, 0.7622809410095215, 0.8026196956634521, 0.8429584503173828, 0.8832972049713135, 0.9236359596252441, 0.9639747142791748, 1.0043134689331055, 1.0446522235870361, 1.0849909782409668, 1.1253297328948975, 1.1656684875488281]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 2.0, 4.0, 13.0, 19.0, 21.0, 74.0, 224.0, 933.0, 4184023.0, 8764.0, 102.0, 44.0, 27.0, 11.0, 12.0, 5.0, 3.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0], "bins": [-28.546875, -27.9732666015625, -27.399658203125, -26.8260498046875, -26.25244140625, -25.6788330078125, -25.105224609375, -24.5316162109375, -23.9580078125, -23.3843994140625, -22.810791015625, -22.2371826171875, -21.66357421875, -21.0899658203125, -20.516357421875, -19.9427490234375, -19.369140625, -18.7955322265625, -18.221923828125, -17.6483154296875, -17.07470703125, -16.5010986328125, -15.927490234375, -15.3538818359375, -14.7802734375, -14.2066650390625, -13.633056640625, -13.0594482421875, -12.48583984375, -11.9122314453125, -11.338623046875, -10.7650146484375, -10.19140625, -9.6177978515625, -9.044189453125, -8.4705810546875, -7.89697265625, -7.3233642578125, -6.749755859375, -6.1761474609375, -5.6025390625, -5.0289306640625, -4.455322265625, -3.8817138671875, -3.30810546875, -2.7344970703125, -2.160888671875, -1.5872802734375, -1.013671875, -0.4400634765625, 0.133544921875, 0.7071533203125, 1.28076171875, 1.8543701171875, 2.427978515625, 3.0015869140625, 3.5751953125, 4.1488037109375, 4.722412109375, 5.2960205078125, 5.86962890625, 6.4432373046875, 7.016845703125, 7.5904541015625, 8.1640625]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, 1.0, 7.0, 7.0, 12.0, 8.0, 22.0, 16.0, 20.0, 35.0, 93.0, 186.0, 234.0, 148.0, 87.0, 44.0, 24.0, 22.0, 7.0, 10.0, 6.0, 4.0, 4.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02313232421875, -0.022584915161132812, -0.022037506103515625, -0.021490097045898438, -0.02094268798828125, -0.020395278930664062, -0.019847869873046875, -0.019300460815429688, -0.0187530517578125, -0.018205642700195312, -0.017658233642578125, -0.017110824584960938, -0.01656341552734375, -0.016016006469726562, -0.015468597412109375, -0.014921188354492188, -0.014373779296875, -0.013826370239257812, -0.013278961181640625, -0.012731552124023438, -0.01218414306640625, -0.011636734008789062, -0.011089324951171875, -0.010541915893554688, -0.0099945068359375, -0.009447097778320312, -0.008899688720703125, -0.008352279663085938, -0.00780487060546875, -0.0072574615478515625, -0.006710052490234375, -0.0061626434326171875, -0.005615234375, -0.0050678253173828125, -0.004520416259765625, -0.0039730072021484375, -0.00342559814453125, -0.0028781890869140625, -0.002330780029296875, -0.0017833709716796875, -0.0012359619140625, -0.0006885528564453125, -0.000141143798828125, 0.0004062652587890625, 0.00095367431640625, 0.0015010833740234375, 0.002048492431640625, 0.0025959014892578125, 0.003143310546875, 0.0036907196044921875, 0.004238128662109375, 0.0047855377197265625, 0.00533294677734375, 0.0058803558349609375, 0.006427764892578125, 0.0069751739501953125, 0.0075225830078125, 0.008069992065429688, 0.008617401123046875, 0.009164810180664062, 0.00971221923828125, 0.010259628295898438, 0.010807037353515625, 0.011354446411132812, 0.01190185546875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 4.0, 11.0, 26.0, 66.0, 143.0, 383.0, 3570998.0, 621271.0, 512.0, 311.0, 198.0, 133.0, 97.0, 57.0, 40.0, 24.0, 7.0, 4.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4248046875, -1.2819976806640625, -1.139190673828125, -0.9963836669921875, -0.85357666015625, -0.7107696533203125, -0.567962646484375, -0.4251556396484375, -0.2823486328125, -0.1395416259765625, 0.003265380859375, 0.1460723876953125, 0.28887939453125, 0.4316864013671875, 0.574493408203125, 0.7173004150390625, 0.860107421875, 1.0029144287109375, 1.145721435546875, 1.2885284423828125, 1.43133544921875, 1.5741424560546875, 1.716949462890625, 1.8597564697265625, 2.0025634765625, 2.1453704833984375, 2.288177490234375, 2.4309844970703125, 2.57379150390625, 2.7165985107421875, 2.859405517578125, 3.0022125244140625, 3.14501953125, 3.2878265380859375, 3.430633544921875, 3.5734405517578125, 3.71624755859375, 3.8590545654296875, 4.001861572265625, 4.1446685791015625, 4.2874755859375, 4.4302825927734375, 4.573089599609375, 4.7158966064453125, 4.85870361328125, 5.0015106201171875, 5.144317626953125, 5.2871246337890625, 5.429931640625, 5.5727386474609375, 5.715545654296875, 5.8583526611328125, 6.00115966796875, 6.1439666748046875, 6.286773681640625, 6.4295806884765625, 6.5723876953125, 6.7151947021484375, 6.858001708984375, 7.0008087158203125, 7.14361572265625, 7.2864227294921875, 7.429229736328125, 7.5720367431640625, 7.71484375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 5.0, 7.0, 13.0, 20.0, 45.0, 71.0, 160.0, 260.0, 581.0, 1242.0, 545.0, 407.0, 267.0, 173.0, 125.0, 70.0, 48.0, 21.0, 17.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.026275634765625, -0.024927616119384766, -0.02357959747314453, -0.022231578826904297, -0.020883560180664062, -0.019535541534423828, -0.018187522888183594, -0.01683950424194336, -0.015491485595703125, -0.01414346694946289, -0.012795448303222656, -0.011447429656982422, -0.010099411010742188, -0.008751392364501953, -0.007403373718261719, -0.006055355072021484, -0.00470733642578125, -0.0033593177795410156, -0.0020112991333007812, -0.0006632804870605469, 0.0006847381591796875, 0.002032756805419922, 0.0033807754516601562, 0.004728794097900391, 0.006076812744140625, 0.007424831390380859, 0.008772850036621094, 0.010120868682861328, 0.011468887329101562, 0.012816905975341797, 0.014164924621582031, 0.015512943267822266, 0.0168609619140625, 0.018208980560302734, 0.01955699920654297, 0.020905017852783203, 0.022253036499023438, 0.023601055145263672, 0.024949073791503906, 0.02629709243774414, 0.027645111083984375, 0.02899312973022461, 0.030341148376464844, 0.03168916702270508, 0.03303718566894531, 0.03438520431518555, 0.03573322296142578, 0.037081241607666016, 0.03842926025390625, 0.039777278900146484, 0.04112529754638672, 0.04247331619262695, 0.04382133483886719, 0.04516935348510742, 0.046517372131347656, 0.04786539077758789, 0.049213409423828125, 0.05056142807006836, 0.051909446716308594, 0.05325746536254883, 0.05460548400878906, 0.0559535026550293, 0.05730152130126953, 0.058649539947509766, 0.05999755859375]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [18.0, 982.0, 18.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.676487922668457, -0.08301091194152832, 0.5104660987854004, 1.103943109512329, 1.6974201202392578, 2.2908971309661865, 2.8843741416931152, 3.477850914001465, 4.071328163146973, 4.6648054122924805, 5.25828218460083, 5.85175895690918, 6.4452362060546875, 7.038713455200195, 7.632190227508545, 8.225666999816895, 8.819144248962402, 9.41262149810791, 10.006097793579102, 10.59957504272461, 11.193052291870117, 11.786529541015625, 12.380006790161133, 12.973483085632324, 13.566960334777832, 14.16043758392334, 14.753913879394531, 15.347391128540039, 15.940868377685547, 16.534345626831055, 17.127822875976562, 17.721298217773438, 18.314777374267578, 18.908254623413086, 19.501731872558594, 20.0952091217041, 20.68868637084961, 21.282161712646484, 21.875638961791992, 22.4691162109375, 23.062593460083008, 23.656070709228516, 24.249547958374023, 24.84302520751953, 25.436500549316406, 26.029977798461914, 26.623455047607422, 27.21693229675293, 27.810409545898438, 28.403886795043945, 28.997364044189453, 29.59084129333496, 30.18431854248047, 30.777793884277344, 31.37127113342285, 31.96474838256836, 32.5582275390625, 33.151702880859375, 33.745182037353516, 34.33865737915039, 34.93213653564453, 35.525611877441406, 36.11909103393555, 36.71256637573242, 37.3060417175293]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 8.0, 19.0, 24.0, 65.0, 74.0, 146.0, 189.0, 169.0, 140.0, 91.0, 38.0, 26.0, 12.0, 6.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5328328013420105, -0.4855237305164337, -0.43821465969085693, -0.39090561866760254, -0.34359651803970337, -0.296287477016449, -0.2489784061908722, -0.2016693353652954, -0.15436026453971863, -0.10705119371414185, -0.05974213033914566, -0.012433066964149475, 0.03487600386142731, 0.08218507468700409, 0.12949413061141968, 0.17680320143699646, 0.22411227226257324, 0.27142134308815, 0.3187304139137268, 0.3660394549369812, 0.41334855556488037, 0.46065759658813477, 0.5079666376113892, 0.5552757382392883, 0.6025848388671875, 0.6498938798904419, 0.6972029805183411, 0.7445120215415955, 0.7918211221694946, 0.839130163192749, 0.8864392042160034, 0.9337483048439026, 0.9810572862625122, 1.0283663272857666, 1.075675368309021, 1.122984528541565, 1.1702935695648193, 1.2176026105880737, 1.2649116516113281, 1.312220811843872, 1.3595298528671265, 1.4068388938903809, 1.4541479349136353, 1.5014570951461792, 1.5487661361694336, 1.596075177192688, 1.6433842182159424, 1.6906933784484863, 1.7380023002624512, 1.7853113412857056, 1.83262038230896, 1.879929542541504, 1.9272385835647583, 1.9745476245880127, 2.0218567848205566, 2.0691657066345215, 2.1164748668670654, 2.1637840270996094, 2.211092948913574, 2.258402109146118, 2.305711030960083, 2.353020191192627, 2.400329351425171, 2.4476382732391357, 2.4949474334716797]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 5.0, 4.0, 2.0, 1.0, 7.0, 2.0, 5.0, 3.0, 11.0, 23.0, 43.0, 44.0, 52.0, 115.0, 165.0, 306.0, 498.0, 842.0, 1413.0, 2563.0, 4591.0, 8547.0, 17937.0, 52617.0, 317689.0, 514145.0, 80543.0, 23123.0, 10474.0, 5686.0, 3007.0, 1606.0, 959.0, 613.0, 338.0, 215.0, 138.0, 75.0, 51.0, 30.0, 15.0, 15.0, 12.0, 11.0, 4.0, 5.0, 7.0, 1.0, 6.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1463623046875, -0.14166259765625, -0.136962890625, -0.13226318359375, -0.1275634765625, -0.12286376953125, -0.1181640625, -0.11346435546875, -0.1087646484375, -0.10406494140625, -0.099365234375, -0.09466552734375, -0.0899658203125, -0.08526611328125, -0.08056640625, -0.07586669921875, -0.0711669921875, -0.06646728515625, -0.061767578125, -0.05706787109375, -0.0523681640625, -0.04766845703125, -0.04296875, -0.03826904296875, -0.0335693359375, -0.02886962890625, -0.024169921875, -0.01947021484375, -0.0147705078125, -0.01007080078125, -0.00537109375, -0.00067138671875, 0.0040283203125, 0.00872802734375, 0.013427734375, 0.01812744140625, 0.0228271484375, 0.02752685546875, 0.0322265625, 0.03692626953125, 0.0416259765625, 0.04632568359375, 0.051025390625, 0.05572509765625, 0.0604248046875, 0.06512451171875, 0.06982421875, 0.07452392578125, 0.0792236328125, 0.08392333984375, 0.088623046875, 0.09332275390625, 0.0980224609375, 0.10272216796875, 0.107421875, 0.11212158203125, 0.1168212890625, 0.12152099609375, 0.126220703125, 0.13092041015625, 0.1356201171875, 0.14031982421875, 0.14501953125, 0.14971923828125, 0.1544189453125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 4.0, 4.0, 1.0, 5.0, 2.0, 2.0, 3.0, 12.0, 6.0, 5.0, 7.0, 12.0, 11.0, 26.0, 21.0, 27.0, 45.0, 71.0, 103.0, 100.0, 126.0, 107.0, 76.0, 66.0, 37.0, 34.0, 17.0, 20.0, 10.0, 7.0, 7.0, 12.0, 3.0, 3.0, 5.0, 3.0, 5.0, 1.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.008148193359375, -0.007812857627868652, -0.007477521896362305, -0.007142186164855957, -0.006806850433349609, -0.006471514701843262, -0.006136178970336914, -0.005800843238830566, -0.005465507507324219, -0.005130171775817871, -0.0047948360443115234, -0.004459500312805176, -0.004124164581298828, -0.0037888288497924805, -0.003453493118286133, -0.003118157386779785, -0.0027828216552734375, -0.00244748592376709, -0.002112150192260742, -0.0017768144607543945, -0.0014414787292480469, -0.0011061429977416992, -0.0007708072662353516, -0.0004354715347290039, -0.00010013580322265625, 0.0002351999282836914, 0.0005705356597900391, 0.0009058713912963867, 0.0012412071228027344, 0.001576542854309082, 0.0019118785858154297, 0.0022472143173217773, 0.002582550048828125, 0.0029178857803344727, 0.0032532215118408203, 0.003588557243347168, 0.003923892974853516, 0.004259228706359863, 0.004594564437866211, 0.004929900169372559, 0.005265235900878906, 0.005600571632385254, 0.0059359073638916016, 0.006271243095397949, 0.006606578826904297, 0.0069419145584106445, 0.007277250289916992, 0.00761258602142334, 0.007947921752929688, 0.008283257484436035, 0.008618593215942383, 0.00895392894744873, 0.009289264678955078, 0.009624600410461426, 0.009959936141967773, 0.010295271873474121, 0.010630607604980469, 0.010965943336486816, 0.011301279067993164, 0.011636614799499512, 0.01197195053100586, 0.012307286262512207, 0.012642621994018555, 0.012977957725524902, 0.01331329345703125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 3.0, 4.0, 5.0, 1.0, 10.0, 9.0, 6.0, 5.0, 18.0, 16.0, 28.0, 26.0, 26.0, 29.0, 38.0, 42.0, 55.0, 83.0, 303.0, 3673.0, 353625.0, 684262.0, 5509.0, 394.0, 97.0, 49.0, 45.0, 35.0, 28.0, 29.0, 25.0, 12.0, 20.0, 10.0, 8.0, 12.0, 4.0, 4.0, 6.0, 6.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.482666015625, -0.4676170349121094, -0.45256805419921875, -0.4375190734863281, -0.4224700927734375, -0.4074211120605469, -0.39237213134765625, -0.3773231506347656, -0.362274169921875, -0.3472251892089844, -0.33217620849609375, -0.3171272277832031, -0.3020782470703125, -0.2870292663574219, -0.27198028564453125, -0.2569313049316406, -0.24188232421875, -0.22683334350585938, -0.21178436279296875, -0.19673538208007812, -0.1816864013671875, -0.16663742065429688, -0.15158843994140625, -0.13653945922851562, -0.121490478515625, -0.10644149780273438, -0.09139251708984375, -0.07634353637695312, -0.0612945556640625, -0.046245574951171875, -0.03119659423828125, -0.016147613525390625, -0.0010986328125, 0.013950347900390625, 0.02899932861328125, 0.044048309326171875, 0.0590972900390625, 0.07414627075195312, 0.08919525146484375, 0.10424423217773438, 0.119293212890625, 0.13434219360351562, 0.14939117431640625, 0.16444015502929688, 0.1794891357421875, 0.19453811645507812, 0.20958709716796875, 0.22463607788085938, 0.23968505859375, 0.2547340393066406, 0.26978302001953125, 0.2848320007324219, 0.2998809814453125, 0.3149299621582031, 0.32997894287109375, 0.3450279235839844, 0.360076904296875, 0.3751258850097656, 0.39017486572265625, 0.4052238464355469, 0.4202728271484375, 0.4353218078613281, 0.45037078857421875, 0.4654197692871094, 0.48046875]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 0.0, 4.0, 6.0, 3.0, 7.0, 10.0, 12.0, 8.0, 14.0, 12.0, 16.0, 14.0, 17.0, 20.0, 34.0, 34.0, 29.0, 30.0, 38.0, 29.0, 40.0, 30.0, 39.0, 44.0, 43.0, 37.0, 42.0, 36.0, 45.0, 33.0, 41.0, 32.0, 30.0, 34.0, 23.0, 26.0, 11.0, 16.0, 15.0, 12.0, 12.0, 3.0, 4.0, 5.0, 4.0, 7.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.043853759765625, -0.04261302947998047, -0.04137229919433594, -0.040131568908691406, -0.038890838623046875, -0.037650108337402344, -0.03640937805175781, -0.03516864776611328, -0.03392791748046875, -0.03268718719482422, -0.03144645690917969, -0.030205726623535156, -0.028964996337890625, -0.027724266052246094, -0.026483535766601562, -0.02524280548095703, -0.0240020751953125, -0.02276134490966797, -0.021520614624023438, -0.020279884338378906, -0.019039154052734375, -0.017798423767089844, -0.016557693481445312, -0.015316963195800781, -0.01407623291015625, -0.012835502624511719, -0.011594772338867188, -0.010354042053222656, -0.009113311767578125, -0.007872581481933594, -0.0066318511962890625, -0.005391120910644531, -0.004150390625, -0.0029096603393554688, -0.0016689300537109375, -0.00042819976806640625, 0.000812530517578125, 0.0020532608032226562, 0.0032939910888671875, 0.004534721374511719, 0.00577545166015625, 0.007016181945800781, 0.008256912231445312, 0.009497642517089844, 0.010738372802734375, 0.011979103088378906, 0.013219833374023438, 0.014460563659667969, 0.0157012939453125, 0.01694202423095703, 0.018182754516601562, 0.019423484802246094, 0.020664215087890625, 0.021904945373535156, 0.023145675659179688, 0.02438640594482422, 0.02562713623046875, 0.02686786651611328, 0.028108596801757812, 0.029349327087402344, 0.030590057373046875, 0.031830787658691406, 0.03307151794433594, 0.03431224822998047, 0.035552978515625]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 1.0, 6.0, 2.0, 5.0, 4.0, 7.0, 8.0, 23.0, 70.0, 269.0, 3488.0, 1041157.0, 3147.0, 238.0, 60.0, 29.0, 14.0, 2.0, 4.0, 6.0, 4.0, 2.0, 5.0, 5.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5390625, -1.48095703125, -1.4228515625, -1.36474609375, -1.306640625, -1.24853515625, -1.1904296875, -1.13232421875, -1.07421875, -1.01611328125, -0.9580078125, -0.89990234375, -0.841796875, -0.78369140625, -0.7255859375, -0.66748046875, -0.609375, -0.55126953125, -0.4931640625, -0.43505859375, -0.376953125, -0.31884765625, -0.2607421875, -0.20263671875, -0.14453125, -0.08642578125, -0.0283203125, 0.02978515625, 0.087890625, 0.14599609375, 0.2041015625, 0.26220703125, 0.3203125, 0.37841796875, 0.4365234375, 0.49462890625, 0.552734375, 0.61083984375, 0.6689453125, 0.72705078125, 0.78515625, 0.84326171875, 0.9013671875, 0.95947265625, 1.017578125, 1.07568359375, 1.1337890625, 1.19189453125, 1.25, 1.30810546875, 1.3662109375, 1.42431640625, 1.482421875, 1.54052734375, 1.5986328125, 1.65673828125, 1.71484375, 1.77294921875, 1.8310546875, 1.88916015625, 1.947265625, 2.00537109375, 2.0634765625, 2.12158203125, 2.1796875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 7.0, 9.0, 16.0, 29.0, 39.0, 153.0, 341.0, 231.0, 81.0, 23.0, 25.0, 10.0, 6.0, 8.0, 7.0, 5.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00011467933654785156, -0.00011094287037849426, -0.00010720640420913696, -0.00010346993803977966, -9.973347187042236e-05, -9.599700570106506e-05, -9.226053953170776e-05, -8.852407336235046e-05, -8.478760719299316e-05, -8.105114102363586e-05, -7.731467485427856e-05, -7.357820868492126e-05, -6.984174251556396e-05, -6.610527634620667e-05, -6.236881017684937e-05, -5.8632344007492065e-05, -5.4895877838134766e-05, -5.1159411668777466e-05, -4.7422945499420166e-05, -4.3686479330062866e-05, -3.9950013160705566e-05, -3.6213546991348267e-05, -3.247708082199097e-05, -2.8740614652633667e-05, -2.5004148483276367e-05, -2.1267682313919067e-05, -1.7531216144561768e-05, -1.3794749975204468e-05, -1.0058283805847168e-05, -6.321817636489868e-06, -2.5853514671325684e-06, 1.1511147022247314e-06, 4.887580871582031e-06, 8.624047040939331e-06, 1.2360513210296631e-05, 1.609697937965393e-05, 1.983344554901123e-05, 2.356991171836853e-05, 2.730637788772583e-05, 3.104284405708313e-05, 3.477931022644043e-05, 3.851577639579773e-05, 4.225224256515503e-05, 4.598870873451233e-05, 4.972517490386963e-05, 5.346164107322693e-05, 5.719810724258423e-05, 6.093457341194153e-05, 6.467103958129883e-05, 6.840750575065613e-05, 7.214397192001343e-05, 7.588043808937073e-05, 7.961690425872803e-05, 8.335337042808533e-05, 8.708983659744263e-05, 9.082630276679993e-05, 9.456276893615723e-05, 9.829923510551453e-05, 0.00010203570127487183, 0.00010577216744422913, 0.00010950863361358643, 0.00011324509978294373, 0.00011698156595230103, 0.00012071803212165833, 0.00012445449829101562]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 6.0, 2.0, 8.0, 27.0, 55.0, 137.0, 371.0, 1263.0, 9100.0, 871133.0, 160478.0, 4669.0, 821.0, 295.0, 105.0, 42.0, 20.0, 4.0, 4.0, 3.0, 7.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.81884765625, -0.7923202514648438, -0.7657928466796875, -0.7392654418945312, -0.712738037109375, -0.6862106323242188, -0.6596832275390625, -0.6331558227539062, -0.60662841796875, -0.5801010131835938, -0.5535736083984375, -0.5270462036132812, -0.500518798828125, -0.47399139404296875, -0.4474639892578125, -0.42093658447265625, -0.3944091796875, -0.36788177490234375, -0.3413543701171875, -0.31482696533203125, -0.288299560546875, -0.26177215576171875, -0.2352447509765625, -0.20871734619140625, -0.18218994140625, -0.15566253662109375, -0.1291351318359375, -0.10260772705078125, -0.076080322265625, -0.04955291748046875, -0.0230255126953125, 0.00350189208984375, 0.030029296875, 0.05655670166015625, 0.0830841064453125, 0.10961151123046875, 0.136138916015625, 0.16266632080078125, 0.1891937255859375, 0.21572113037109375, 0.24224853515625, 0.26877593994140625, 0.2953033447265625, 0.32183074951171875, 0.348358154296875, 0.37488555908203125, 0.4014129638671875, 0.42794036865234375, 0.4544677734375, 0.48099517822265625, 0.5075225830078125, 0.5340499877929688, 0.560577392578125, 0.5871047973632812, 0.6136322021484375, 0.6401596069335938, 0.66668701171875, 0.6932144165039062, 0.7197418212890625, 0.7462692260742188, 0.772796630859375, 0.7993240356445312, 0.8258514404296875, 0.8523788452148438, 0.87890625]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 4.0, 7.0, 2.0, 9.0, 7.0, 12.0, 14.0, 44.0, 54.0, 147.0, 219.0, 217.0, 121.0, 55.0, 28.0, 21.0, 14.0, 8.0, 5.0, 4.0, 1.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1956787109375, -0.1903705596923828, -0.18506240844726562, -0.17975425720214844, -0.17444610595703125, -0.16913795471191406, -0.16382980346679688, -0.1585216522216797, -0.1532135009765625, -0.1479053497314453, -0.14259719848632812, -0.13728904724121094, -0.13198089599609375, -0.12667274475097656, -0.12136459350585938, -0.11605644226074219, -0.110748291015625, -0.10544013977050781, -0.10013198852539062, -0.09482383728027344, -0.08951568603515625, -0.08420753479003906, -0.07889938354492188, -0.07359123229980469, -0.0682830810546875, -0.06297492980957031, -0.057666778564453125, -0.05235862731933594, -0.04705047607421875, -0.04174232482910156, -0.036434173583984375, -0.031126022338867188, -0.02581787109375, -0.020509719848632812, -0.015201568603515625, -0.009893417358398438, -0.00458526611328125, 0.0007228851318359375, 0.006031036376953125, 0.011339187622070312, 0.0166473388671875, 0.021955490112304688, 0.027263641357421875, 0.03257179260253906, 0.03787994384765625, 0.04318809509277344, 0.048496246337890625, 0.05380439758300781, 0.059112548828125, 0.06442070007324219, 0.06972885131835938, 0.07503700256347656, 0.08034515380859375, 0.08565330505371094, 0.09096145629882812, 0.09626960754394531, 0.1015777587890625, 0.10688591003417969, 0.11219406127929688, 0.11750221252441406, 0.12281036376953125, 0.12811851501464844, 0.13342666625976562, 0.1387348175048828, 0.14404296875]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 11.0, 23.0, 61.0, 131.0, 214.0, 249.0, 150.0, 77.0, 35.0, 23.0, 8.0, 9.0, 5.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1968357563018799, -1.1637367010116577, -1.130637764930725, -1.097538709640503, -1.0644397735595703, -1.0313407182693481, -0.9982417821884155, -0.9651427268981934, -0.9320437908172607, -0.8989447951316833, -0.865845799446106, -0.8327468037605286, -0.7996478080749512, -0.7665488123893738, -0.7334498167037964, -0.7003507614135742, -0.6672517657279968, -0.6341527700424194, -0.601053774356842, -0.5679547786712646, -0.5348557829856873, -0.5017567873001099, -0.4686577618122101, -0.4355587661266327, -0.4024597704410553, -0.3693607747554779, -0.3362617790699005, -0.30316275358200073, -0.27006375789642334, -0.23696477711200714, -0.20386576652526855, -0.17076677083969116, -0.13766777515411377, -0.10456877946853638, -0.07146977633237839, -0.0383707731962204, -0.005271777510643005, 0.027827218174934387, 0.060926228761672974, 0.09402522444725037, 0.12712422013282776, 0.16022321581840515, 0.19332221150398254, 0.22642122209072113, 0.2595202326774597, 0.2926192283630371, 0.3257182240486145, 0.3588172197341919, 0.3919162154197693, 0.4250152111053467, 0.4581142067909241, 0.49121320247650146, 0.5243121981620789, 0.5574111938476562, 0.5905102491378784, 0.623609185218811, 0.6567082405090332, 0.6898072361946106, 0.722906231880188, 0.7560052275657654, 0.7891042232513428, 0.8222032189369202, 0.8553022146224976, 0.8884012699127197, 0.9215002059936523]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 5.0, 4.0, 3.0, 2.0, 11.0, 10.0, 16.0, 8.0, 30.0, 37.0, 60.0, 59.0, 79.0, 69.0, 102.0, 90.0, 88.0, 66.0, 64.0, 62.0, 34.0, 22.0, 18.0, 15.0, 10.0, 6.0, 10.0, 7.0, 4.0, 1.0, 3.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5755372047424316, -0.5581703186035156, -0.5408034324645996, -0.5234366059303284, -0.5060697197914124, -0.48870283365249634, -0.4713359475135803, -0.4539690613746643, -0.4366022050380707, -0.41923531889915466, -0.40186846256256104, -0.384501576423645, -0.367134690284729, -0.3497678339481354, -0.33240094780921936, -0.31503409147262573, -0.2976672053337097, -0.2803003191947937, -0.2629334628582001, -0.24556657671928406, -0.22819970548152924, -0.21083283424377441, -0.1934659481048584, -0.17609907686710358, -0.15873220562934875, -0.14136533439159393, -0.12399845570325851, -0.1066315770149231, -0.08926470577716827, -0.07189783453941345, -0.05453095585107803, -0.037164077162742615, -0.019797146320343018, -0.0024302713572978973, 0.014936603605747223, 0.03230347856879234, 0.04967035353183746, 0.06703722476959229, 0.0844041034579277, 0.10177098214626312, 0.11913785338401794, 0.13650472462177277, 0.1538715958595276, 0.1712384819984436, 0.18860535323619843, 0.20597222447395325, 0.22333911061286926, 0.24070598185062408, 0.2580728530883789, 0.2754397392272949, 0.29280659556388855, 0.31017348170280457, 0.3275403380393982, 0.3449072241783142, 0.3622741103172302, 0.37964099645614624, 0.39700785279273987, 0.4143747389316559, 0.4317415952682495, 0.4491084814071655, 0.46647536754608154, 0.48384222388267517, 0.5012090802192688, 0.5185759663581848, 0.5359428524971008]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 10.0, 6.0, 2.0, 6.0, 2.0, 8.0, 8.0, 8.0, 8.0, 6.0, 20.0, 10.0, 18.0, 10.0, 22.0, 16.0, 24.0, 82.0, 276.0, 4188237.0, 4958.0, 297.0, 76.0, 34.0, 22.0, 18.0, 20.0, 10.0, 6.0, 12.0, 2.0, 12.0, 6.0, 12.0, 2.0, 4.0, 0.0, 4.0, 4.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0], "bins": [-5.953125, -5.77166748046875, -5.5902099609375, -5.40875244140625, -5.227294921875, -5.04583740234375, -4.8643798828125, -4.68292236328125, -4.50146484375, -4.32000732421875, -4.1385498046875, -3.95709228515625, -3.775634765625, -3.59417724609375, -3.4127197265625, -3.23126220703125, -3.0498046875, -2.86834716796875, -2.6868896484375, -2.50543212890625, -2.323974609375, -2.14251708984375, -1.9610595703125, -1.77960205078125, -1.59814453125, -1.41668701171875, -1.2352294921875, -1.05377197265625, -0.872314453125, -0.69085693359375, -0.5093994140625, -0.32794189453125, -0.146484375, 0.03497314453125, 0.2164306640625, 0.39788818359375, 0.579345703125, 0.76080322265625, 0.9422607421875, 1.12371826171875, 1.30517578125, 1.48663330078125, 1.6680908203125, 1.84954833984375, 2.031005859375, 2.21246337890625, 2.3939208984375, 2.57537841796875, 2.7568359375, 2.93829345703125, 3.1197509765625, 3.30120849609375, 3.482666015625, 3.66412353515625, 3.8455810546875, 4.02703857421875, 4.20849609375, 4.38995361328125, 4.5714111328125, 4.75286865234375, 4.934326171875, 5.11578369140625, 5.2972412109375, 5.47869873046875, 5.66015625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 5.0, 2.0, 13.0, 13.0, 16.0, 20.0, 43.0, 88.0, 127.0, 178.0, 153.0, 114.0, 112.0, 48.0, 21.0, 16.0, 12.0, 12.0, 5.0, 6.0, 4.0, 5.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01171875, -0.011137008666992188, -0.010555267333984375, -0.009973526000976562, -0.00939178466796875, -0.008810043334960938, -0.008228302001953125, -0.0076465606689453125, -0.0070648193359375, -0.0064830780029296875, -0.005901336669921875, -0.0053195953369140625, -0.00473785400390625, -0.0041561126708984375, -0.003574371337890625, -0.0029926300048828125, -0.002410888671875, -0.0018291473388671875, -0.001247406005859375, -0.0006656646728515625, -8.392333984375e-05, 0.0004978179931640625, 0.001079559326171875, 0.0016613006591796875, 0.0022430419921875, 0.0028247833251953125, 0.003406524658203125, 0.0039882659912109375, 0.00457000732421875, 0.0051517486572265625, 0.005733489990234375, 0.0063152313232421875, 0.00689697265625, 0.0074787139892578125, 0.008060455322265625, 0.008642196655273438, 0.00922393798828125, 0.009805679321289062, 0.010387420654296875, 0.010969161987304688, 0.0115509033203125, 0.012132644653320312, 0.012714385986328125, 0.013296127319335938, 0.01387786865234375, 0.014459609985351562, 0.015041351318359375, 0.015623092651367188, 0.016204833984375, 0.016786575317382812, 0.017368316650390625, 0.017950057983398438, 0.01853179931640625, 0.019113540649414062, 0.019695281982421875, 0.020277023315429688, 0.0208587646484375, 0.021440505981445312, 0.022022247314453125, 0.022603988647460938, 0.02318572998046875, 0.023767471313476562, 0.024349212646484375, 0.024930953979492188, 0.0255126953125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 26.0, 333.0, 4193482.0, 435.0, 17.0, 3.0], "bins": [-22.09375, -21.7288818359375, -21.364013671875, -20.9991455078125, -20.63427734375, -20.2694091796875, -19.904541015625, -19.5396728515625, -19.1748046875, -18.8099365234375, -18.445068359375, -18.0802001953125, -17.71533203125, -17.3504638671875, -16.985595703125, -16.6207275390625, -16.255859375, -15.8909912109375, -15.526123046875, -15.1612548828125, -14.79638671875, -14.4315185546875, -14.066650390625, -13.7017822265625, -13.3369140625, -12.9720458984375, -12.607177734375, -12.2423095703125, -11.87744140625, -11.5125732421875, -11.147705078125, -10.7828369140625, -10.41796875, -10.0531005859375, -9.688232421875, -9.3233642578125, -8.95849609375, -8.5936279296875, -8.228759765625, -7.8638916015625, -7.4990234375, -7.1341552734375, -6.769287109375, -6.4044189453125, -6.03955078125, -5.6746826171875, -5.309814453125, -4.9449462890625, -4.580078125, -4.2152099609375, -3.850341796875, -3.4854736328125, -3.12060546875, -2.7557373046875, -2.390869140625, -2.0260009765625, -1.6611328125, -1.2962646484375, -0.931396484375, -0.5665283203125, -0.20166015625, 0.1632080078125, 0.528076171875, 0.8929443359375, 1.2578125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 9.0, 16.0, 64.0, 228.0, 1068.0, 2296.0, 282.0, 71.0, 28.0, 10.0, 5.0, 4.0, 3.0, 2.0], "bins": [-0.1671142578125, -0.16408300399780273, -0.16105175018310547, -0.1580204963684082, -0.15498924255371094, -0.15195798873901367, -0.1489267349243164, -0.14589548110961914, -0.14286422729492188, -0.1398329734802246, -0.13680171966552734, -0.13377046585083008, -0.1307392120361328, -0.12770795822143555, -0.12467670440673828, -0.12164545059204102, -0.11861419677734375, -0.11558294296264648, -0.11255168914794922, -0.10952043533325195, -0.10648918151855469, -0.10345792770385742, -0.10042667388916016, -0.09739542007446289, -0.09436416625976562, -0.09133291244506836, -0.0883016586303711, -0.08527040481567383, -0.08223915100097656, -0.0792078971862793, -0.07617664337158203, -0.07314538955688477, -0.0701141357421875, -0.06708288192749023, -0.06405162811279297, -0.0610203742980957, -0.05798912048339844, -0.05495786666870117, -0.051926612854003906, -0.04889535903930664, -0.045864105224609375, -0.04283285140991211, -0.039801597595214844, -0.03677034378051758, -0.03373908996582031, -0.030707836151123047, -0.02767658233642578, -0.024645328521728516, -0.02161407470703125, -0.018582820892333984, -0.015551567077636719, -0.012520313262939453, -0.009489059448242188, -0.006457805633544922, -0.0034265518188476562, -0.0003952980041503906, 0.002635955810546875, 0.005667209625244141, 0.008698463439941406, 0.011729717254638672, 0.014760971069335938, 0.017792224884033203, 0.02082347869873047, 0.023854732513427734, 0.026885986328125]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 943.0, 78.0], "bins": [-53.487030029296875, -52.636539459228516, -51.786048889160156, -50.9355583190918, -50.08506774902344, -49.23457717895508, -48.38408660888672, -47.53359603881836, -46.68310546875, -45.83261489868164, -44.98212432861328, -44.13163375854492, -43.28114318847656, -42.4306526184082, -41.580162048339844, -40.729671478271484, -39.879180908203125, -39.028690338134766, -38.178199768066406, -37.32770919799805, -36.47721862792969, -35.62672805786133, -34.77623748779297, -33.92574691772461, -33.075260162353516, -32.224769592285156, -31.374279022216797, -30.523788452148438, -29.673297882080078, -28.82280731201172, -27.97231674194336, -27.121826171875, -26.27133560180664, -25.42084503173828, -24.570354461669922, -23.719863891601562, -22.869373321533203, -22.018882751464844, -21.168392181396484, -20.317901611328125, -19.4674129486084, -18.61692237854004, -17.76643180847168, -16.91594123840332, -16.06545066833496, -15.214960098266602, -14.364470481872559, -13.5139799118042, -12.66348934173584, -11.81299877166748, -10.962508201599121, -10.112018585205078, -9.261528015136719, -8.41103744506836, -7.560546875, -6.710056304931641, -5.859565734863281, -5.009075164794922, -4.1585845947265625, -3.3080945014953613, -2.457603931427002, -1.6071133613586426, -0.7566232681274414, 0.09386730194091797, 0.9443579912185669]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 5.0, 10.0, 25.0, 77.0, 174.0, 277.0, 226.0, 124.0, 60.0, 25.0, 10.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.495227336883545, -3.42746901512146, -3.359710693359375, -3.291952133178711, -3.224193811416626, -3.156435489654541, -3.088677167892456, -3.020918846130371, -2.953160285949707, -2.885401964187622, -2.817643642425537, -2.749885082244873, -2.682126760482788, -2.614368438720703, -2.546610116958618, -2.478851795196533, -2.4110934734344482, -2.3433351516723633, -2.2755768299102783, -2.2078182697296143, -2.1400599479675293, -2.0723016262054443, -2.0045433044433594, -1.9367849826812744, -1.8690265417099, -1.801268219947815, -1.7335097789764404, -1.6657514572143555, -1.5979931354522705, -1.530234694480896, -1.462476372718811, -1.3947179317474365, -1.3269596099853516, -1.2592012882232666, -1.191442847251892, -1.1236845254898071, -1.0559260845184326, -0.9881677627563477, -0.9204094409942627, -0.852651059627533, -0.7848926782608032, -0.7171342968940735, -0.6493759155273438, -0.5816175937652588, -0.513859212398529, -0.4461008310317993, -0.37834247946739197, -0.3105841279029846, -0.24282574653625488, -0.17506738007068634, -0.1073090136051178, -0.039550647139549255, 0.028207719326019287, 0.09596610069274902, 0.16372445225715637, 0.23148280382156372, 0.29924118518829346, 0.3669995665550232, 0.43475791811943054, 0.5025162696838379, 0.5702746510505676, 0.6380330324172974, 0.7057913541793823, 0.7735497355461121, 0.8413081169128418]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 5.0, 2.0, 2.0, 6.0, 3.0, 4.0, 3.0, 12.0, 14.0, 16.0, 12.0, 23.0, 17.0, 25.0, 33.0, 23.0, 25.0, 38.0, 30.0, 41.0, 41.0, 43.0, 55.0, 1049.0, 1045394.0, 1218.0, 50.0, 39.0, 40.0, 40.0, 33.0, 34.0, 25.0, 34.0, 22.0, 17.0, 14.0, 12.0, 16.0, 9.0, 11.0, 4.0, 10.0, 5.0, 4.0, 4.0, 5.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.14453125, -4.01702880859375, -3.8895263671875, -3.76202392578125, -3.634521484375, -3.50701904296875, -3.3795166015625, -3.25201416015625, -3.12451171875, -2.99700927734375, -2.8695068359375, -2.74200439453125, -2.614501953125, -2.48699951171875, -2.3594970703125, -2.23199462890625, -2.1044921875, -1.97698974609375, -1.8494873046875, -1.72198486328125, -1.594482421875, -1.46697998046875, -1.3394775390625, -1.21197509765625, -1.08447265625, -0.95697021484375, -0.8294677734375, -0.70196533203125, -0.574462890625, -0.44696044921875, -0.3194580078125, -0.19195556640625, -0.064453125, 0.06304931640625, 0.1905517578125, 0.31805419921875, 0.445556640625, 0.57305908203125, 0.7005615234375, 0.82806396484375, 0.95556640625, 1.08306884765625, 1.2105712890625, 1.33807373046875, 1.465576171875, 1.59307861328125, 1.7205810546875, 1.84808349609375, 1.9755859375, 2.10308837890625, 2.2305908203125, 2.35809326171875, 2.485595703125, 2.61309814453125, 2.7406005859375, 2.86810302734375, 2.99560546875, 3.12310791015625, 3.2506103515625, 3.37811279296875, 3.505615234375, 3.63311767578125, 3.7606201171875, 3.88812255859375, 4.015625]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.0, 114.0, 576.0, 289.0, 26.0, 1.0, 1.0, 1.0], "bins": [-0.77685546875, -0.763606071472168, -0.7503566741943359, -0.7371072769165039, -0.7238578796386719, -0.7106084823608398, -0.6973590850830078, -0.6841096878051758, -0.6708602905273438, -0.6576108932495117, -0.6443614959716797, -0.6311120986938477, -0.6178627014160156, -0.6046133041381836, -0.5913639068603516, -0.5781145095825195, -0.5648651123046875, -0.5516157150268555, -0.5383663177490234, -0.5251169204711914, -0.5118675231933594, -0.49861812591552734, -0.4853687286376953, -0.4721193313598633, -0.45886993408203125, -0.4456205368041992, -0.4323711395263672, -0.41912174224853516, -0.4058723449707031, -0.3926229476928711, -0.37937355041503906, -0.36612415313720703, -0.352874755859375, -0.33962535858154297, -0.32637596130371094, -0.3131265640258789, -0.2998771667480469, -0.28662776947021484, -0.2733783721923828, -0.2601289749145508, -0.24687957763671875, -0.23363018035888672, -0.2203807830810547, -0.20713138580322266, -0.19388198852539062, -0.1806325912475586, -0.16738319396972656, -0.15413379669189453, -0.1408843994140625, -0.12763500213623047, -0.11438560485839844, -0.1011362075805664, -0.08788681030273438, -0.07463741302490234, -0.06138801574707031, -0.04813861846923828, -0.03488922119140625, -0.02163982391357422, -0.008390426635742188, 0.004858970642089844, 0.018108367919921875, 0.031357765197753906, 0.04460716247558594, 0.05785655975341797, 0.07110595703125]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 7.0, 1.0, 7.0, 12.0, 5.0, 8.0, 12.0, 23.0, 27.0, 36.0, 64.0, 103.0, 215.0, 480.0, 1236.0, 3944.0, 18119.0, 136785.0, 783718.0, 85659.0, 13193.0, 3076.0, 965.0, 390.0, 164.0, 106.0, 53.0, 48.0, 26.0, 17.0, 15.0, 10.0, 7.0, 5.0, 4.0, 6.0, 2.0, 4.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2841796875, -1.2389678955078125, -1.193756103515625, -1.1485443115234375, -1.10333251953125, -1.0581207275390625, -1.012908935546875, -0.9676971435546875, -0.9224853515625, -0.8772735595703125, -0.832061767578125, -0.7868499755859375, -0.74163818359375, -0.6964263916015625, -0.651214599609375, -0.6060028076171875, -0.560791015625, -0.5155792236328125, -0.470367431640625, -0.4251556396484375, -0.37994384765625, -0.3347320556640625, -0.289520263671875, -0.2443084716796875, -0.1990966796875, -0.1538848876953125, -0.108673095703125, -0.0634613037109375, -0.01824951171875, 0.0269622802734375, 0.072174072265625, 0.1173858642578125, 0.16259765625, 0.2078094482421875, 0.253021240234375, 0.2982330322265625, 0.34344482421875, 0.3886566162109375, 0.433868408203125, 0.4790802001953125, 0.5242919921875, 0.5695037841796875, 0.614715576171875, 0.6599273681640625, 0.70513916015625, 0.7503509521484375, 0.795562744140625, 0.8407745361328125, 0.885986328125, 0.9311981201171875, 0.976409912109375, 1.0216217041015625, 1.06683349609375, 1.1120452880859375, 1.157257080078125, 1.2024688720703125, 1.2476806640625, 1.2928924560546875, 1.338104248046875, 1.3833160400390625, 1.42852783203125, 1.4737396240234375, 1.518951416015625, 1.5641632080078125, 1.609375]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 2.0, 10.0, 6.0, 8.0, 18.0, 12.0, 10.0, 24.0, 25.0, 36.0, 51.0, 46.0, 68.0, 65.0, 66.0, 79.0, 69.0, 66.0, 65.0, 55.0, 49.0, 46.0, 29.0, 27.0, 12.0, 20.0, 12.0, 7.0, 9.0, 3.0, 2.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5859375, -0.5654525756835938, -0.5449676513671875, -0.5244827270507812, -0.503997802734375, -0.48351287841796875, -0.4630279541015625, -0.44254302978515625, -0.42205810546875, -0.40157318115234375, -0.3810882568359375, -0.36060333251953125, -0.340118408203125, -0.31963348388671875, -0.2991485595703125, -0.27866363525390625, -0.2581787109375, -0.23769378662109375, -0.2172088623046875, -0.19672393798828125, -0.176239013671875, -0.15575408935546875, -0.1352691650390625, -0.11478424072265625, -0.09429931640625, -0.07381439208984375, -0.0533294677734375, -0.03284454345703125, -0.012359619140625, 0.00812530517578125, 0.0286102294921875, 0.04909515380859375, 0.069580078125, 0.09006500244140625, 0.1105499267578125, 0.13103485107421875, 0.151519775390625, 0.17200469970703125, 0.1924896240234375, 0.21297454833984375, 0.23345947265625, 0.25394439697265625, 0.2744293212890625, 0.29491424560546875, 0.315399169921875, 0.33588409423828125, 0.3563690185546875, 0.37685394287109375, 0.3973388671875, 0.41782379150390625, 0.4383087158203125, 0.45879364013671875, 0.479278564453125, 0.49976348876953125, 0.5202484130859375, 0.5407333374023438, 0.56121826171875, 0.5817031860351562, 0.6021881103515625, 0.6226730346679688, 0.643157958984375, 0.6636428833007812, 0.6841278076171875, 0.7046127319335938, 0.72509765625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 3.0, 2.0, 1.0, 9.0, 2.0, 6.0, 5.0, 14.0, 10.0, 7.0, 15.0, 25.0, 30.0, 53.0, 71.0, 151.0, 388.0, 1095.0, 4833.0, 45314.0, 900062.0, 87285.0, 6769.0, 1506.0, 466.0, 169.0, 81.0, 46.0, 37.0, 21.0, 16.0, 13.0, 13.0, 7.0, 6.0, 5.0, 6.0, 4.0, 1.0, 5.0, 3.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.73046875, -1.6719970703125, -1.613525390625, -1.5550537109375, -1.49658203125, -1.4381103515625, -1.379638671875, -1.3211669921875, -1.2626953125, -1.2042236328125, -1.145751953125, -1.0872802734375, -1.02880859375, -0.9703369140625, -0.911865234375, -0.8533935546875, -0.794921875, -0.7364501953125, -0.677978515625, -0.6195068359375, -0.56103515625, -0.5025634765625, -0.444091796875, -0.3856201171875, -0.3271484375, -0.2686767578125, -0.210205078125, -0.1517333984375, -0.09326171875, -0.0347900390625, 0.023681640625, 0.0821533203125, 0.140625, 0.1990966796875, 0.257568359375, 0.3160400390625, 0.37451171875, 0.4329833984375, 0.491455078125, 0.5499267578125, 0.6083984375, 0.6668701171875, 0.725341796875, 0.7838134765625, 0.84228515625, 0.9007568359375, 0.959228515625, 1.0177001953125, 1.076171875, 1.1346435546875, 1.193115234375, 1.2515869140625, 1.31005859375, 1.3685302734375, 1.427001953125, 1.4854736328125, 1.5439453125, 1.6024169921875, 1.660888671875, 1.7193603515625, 1.77783203125, 1.8363037109375, 1.894775390625, 1.9532470703125, 2.01171875]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 5.0, 0.0, 0.0, 7.0, 5.0, 6.0, 16.0, 19.0, 22.0, 37.0, 39.0, 51.0, 64.0, 115.0, 115.0, 121.0, 99.0, 82.0, 72.0, 41.0, 24.0, 19.0, 13.0, 13.0, 3.0, 5.0, 5.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.636713027954102e-05, -8.303392678499222e-05, -7.970072329044342e-05, -7.636751979589462e-05, -7.303431630134583e-05, -6.970111280679703e-05, -6.636790931224823e-05, -6.303470581769943e-05, -5.9701502323150635e-05, -5.636829882860184e-05, -5.303509533405304e-05, -4.970189183950424e-05, -4.6368688344955444e-05, -4.303548485040665e-05, -3.970228135585785e-05, -3.636907786130905e-05, -3.3035874366760254e-05, -2.9702670872211456e-05, -2.636946737766266e-05, -2.303626388311386e-05, -1.9703060388565063e-05, -1.6369856894016266e-05, -1.3036653399467468e-05, -9.70344990491867e-06, -6.370246410369873e-06, -3.0370429158210754e-06, 2.9616057872772217e-07, 3.6293640732765198e-06, 6.962567567825317e-06, 1.0295771062374115e-05, 1.3628974556922913e-05, 1.696217805147171e-05, 2.0295381546020508e-05, 2.3628585040569305e-05, 2.6961788535118103e-05, 3.02949920296669e-05, 3.36281955242157e-05, 3.6961399018764496e-05, 4.0294602513313293e-05, 4.362780600786209e-05, 4.696100950241089e-05, 5.0294212996959686e-05, 5.3627416491508484e-05, 5.696061998605728e-05, 6.029382348060608e-05, 6.362702697515488e-05, 6.696023046970367e-05, 7.029343396425247e-05, 7.362663745880127e-05, 7.695984095335007e-05, 8.029304444789886e-05, 8.362624794244766e-05, 8.695945143699646e-05, 9.029265493154526e-05, 9.362585842609406e-05, 9.695906192064285e-05, 0.00010029226541519165, 0.00010362546890974045, 0.00010695867240428925, 0.00011029187589883804, 0.00011362507939338684, 0.00011695828288793564, 0.00012029148638248444, 0.00012362468987703323, 0.00012695789337158203]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 5.0, 3.0, 4.0, 8.0, 6.0, 8.0, 12.0, 23.0, 19.0, 56.0, 96.0, 292.0, 799.0, 3221.0, 18739.0, 495501.0, 505544.0, 19326.0, 3435.0, 903.0, 304.0, 108.0, 52.0, 26.0, 16.0, 16.0, 3.0, 2.0, 6.0, 4.0, 6.0, 6.0, 0.0, 1.0, 1.0, 1.0, 6.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.814453125, -1.75189208984375, -1.6893310546875, -1.62677001953125, -1.564208984375, -1.50164794921875, -1.4390869140625, -1.37652587890625, -1.31396484375, -1.25140380859375, -1.1888427734375, -1.12628173828125, -1.063720703125, -1.00115966796875, -0.9385986328125, -0.87603759765625, -0.8134765625, -0.75091552734375, -0.6883544921875, -0.62579345703125, -0.563232421875, -0.50067138671875, -0.4381103515625, -0.37554931640625, -0.31298828125, -0.25042724609375, -0.1878662109375, -0.12530517578125, -0.062744140625, -0.00018310546875, 0.0623779296875, 0.12493896484375, 0.1875, 0.25006103515625, 0.3126220703125, 0.37518310546875, 0.437744140625, 0.50030517578125, 0.5628662109375, 0.62542724609375, 0.68798828125, 0.75054931640625, 0.8131103515625, 0.87567138671875, 0.938232421875, 1.00079345703125, 1.0633544921875, 1.12591552734375, 1.1884765625, 1.25103759765625, 1.3135986328125, 1.37615966796875, 1.438720703125, 1.50128173828125, 1.5638427734375, 1.62640380859375, 1.68896484375, 1.75152587890625, 1.8140869140625, 1.87664794921875, 1.939208984375, 2.00177001953125, 2.0643310546875, 2.12689208984375, 2.189453125]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 5.0, 2.0, 6.0, 6.0, 12.0, 10.0, 16.0, 24.0, 32.0, 49.0, 79.0, 127.0, 162.0, 152.0, 108.0, 70.0, 38.0, 25.0, 16.0, 11.0, 7.0, 5.0, 8.0, 7.0, 6.0, 4.0, 0.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6005859375, -0.5773391723632812, -0.5540924072265625, -0.5308456420898438, -0.507598876953125, -0.48435211181640625, -0.4611053466796875, -0.43785858154296875, -0.41461181640625, -0.39136505126953125, -0.3681182861328125, -0.34487152099609375, -0.321624755859375, -0.29837799072265625, -0.2751312255859375, -0.25188446044921875, -0.2286376953125, -0.20539093017578125, -0.1821441650390625, -0.15889739990234375, -0.135650634765625, -0.11240386962890625, -0.0891571044921875, -0.06591033935546875, -0.04266357421875, -0.01941680908203125, 0.0038299560546875, 0.02707672119140625, 0.050323486328125, 0.07357025146484375, 0.0968170166015625, 0.12006378173828125, 0.143310546875, 0.16655731201171875, 0.1898040771484375, 0.21305084228515625, 0.236297607421875, 0.25954437255859375, 0.2827911376953125, 0.30603790283203125, 0.32928466796875, 0.35253143310546875, 0.3757781982421875, 0.39902496337890625, 0.422271728515625, 0.44551849365234375, 0.4687652587890625, 0.49201202392578125, 0.5152587890625, 0.5385055541992188, 0.5617523193359375, 0.5849990844726562, 0.608245849609375, 0.6314926147460938, 0.6547393798828125, 0.6779861450195312, 0.70123291015625, 0.7244796752929688, 0.7477264404296875, 0.7709732055664062, 0.794219970703125, 0.8174667358398438, 0.8407135009765625, 0.8639602661132812, 0.88720703125]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 524.0, 481.0, 9.0, 2.0, 1.0, 1.0], "bins": [-175.54312133789062, -172.56423950195312, -169.58535766601562, -166.60647583007812, -163.62759399414062, -160.64871215820312, -157.66983032226562, -154.69094848632812, -151.71206665039062, -148.73318481445312, -145.75430297851562, -142.77542114257812, -139.79653930664062, -136.81765747070312, -133.83877563476562, -130.85989379882812, -127.88101959228516, -124.90213775634766, -121.92325592041016, -118.94437408447266, -115.96549224853516, -112.98661041259766, -110.00773620605469, -107.02885437011719, -104.04997253417969, -101.07109069824219, -98.09220886230469, -95.11332702636719, -92.13444519042969, -89.15556335449219, -86.17668151855469, -83.19779968261719, -80.21891784667969, -77.24003601074219, -74.26115417480469, -71.28227233886719, -68.30339050292969, -65.32450866699219, -62.34563064575195, -59.36674880981445, -56.38787078857422, -53.40898895263672, -50.43010711669922, -47.45122528076172, -44.47234344482422, -41.49346160888672, -38.514583587646484, -35.535701751708984, -32.556819915771484, -29.577938079833984, -26.599056243896484, -23.620176315307617, -20.641294479370117, -17.662412643432617, -14.68353271484375, -11.70465087890625, -8.72576904296875, -5.746887683868408, -2.7680063247680664, 0.2108745574951172, 3.189756393432617, 6.168638229370117, 9.147518157958984, 12.126399993896484, 15.105281829833984]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 8.0, 15.0, 31.0, 42.0, 56.0, 88.0, 94.0, 126.0, 119.0, 116.0, 99.0, 79.0, 45.0, 22.0, 34.0, 19.0, 8.0, 7.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.14976692199707, -11.889132499694824, -11.628498077392578, -11.367863655090332, -11.107229232788086, -10.84659481048584, -10.585960388183594, -10.325325965881348, -10.064691543579102, -9.804057121276855, -9.54342269897461, -9.282788276672363, -9.022153854370117, -8.761519432067871, -8.500885009765625, -8.240250587463379, -7.979615688323975, -7.7189812660217285, -7.458346843719482, -7.197712421417236, -6.93707799911499, -6.676443576812744, -6.41580867767334, -6.155174255371094, -5.894539833068848, -5.633905410766602, -5.3732709884643555, -5.112636566162109, -4.852002143859863, -4.591367721557617, -4.330733299255371, -4.070098876953125, -3.809464454650879, -3.548830032348633, -3.2881956100463867, -3.0275611877441406, -2.7669267654418945, -2.5062923431396484, -2.2456576824188232, -1.9850232601165771, -1.724388837814331, -1.463754415512085, -1.2031199932098389, -0.9424854516983032, -0.6818510293960571, -0.42121660709381104, -0.1605820655822754, 0.1000523567199707, 0.3606867790222168, 0.6213212013244629, 0.8819556832313538, 1.1425901651382446, 1.4032245874404907, 1.6638590097427368, 1.9244935512542725, 2.1851279735565186, 2.4457623958587646, 2.7063968181610107, 2.967031240463257, 3.227665901184082, 3.488300323486328, 3.748934745788574, 4.00956916809082, 4.270203590393066, 4.5308380126953125]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 5.0, 4.0, 16.0, 19.0, 32.0, 51.0, 78.0, 165.0, 428.0, 319793.0, 3873320.0, 259.0, 66.0, 27.0, 16.0, 8.0, 6.0, 1.0, 2.0], "bins": [-33.96875, -33.35137939453125, -32.7340087890625, -32.11663818359375, -31.499267578125, -30.88189697265625, -30.2645263671875, -29.64715576171875, -29.02978515625, -28.41241455078125, -27.7950439453125, -27.17767333984375, -26.560302734375, -25.94293212890625, -25.3255615234375, -24.70819091796875, -24.0908203125, -23.47344970703125, -22.8560791015625, -22.23870849609375, -21.621337890625, -21.00396728515625, -20.3865966796875, -19.76922607421875, -19.15185546875, -18.53448486328125, -17.9171142578125, -17.29974365234375, -16.682373046875, -16.06500244140625, -15.4476318359375, -14.83026123046875, -14.212890625, -13.59552001953125, -12.9781494140625, -12.36077880859375, -11.743408203125, -11.12603759765625, -10.5086669921875, -9.89129638671875, -9.27392578125, -8.65655517578125, -8.0391845703125, -7.42181396484375, -6.804443359375, -6.18707275390625, -5.5697021484375, -4.95233154296875, -4.3349609375, -3.71759033203125, -3.1002197265625, -2.48284912109375, -1.865478515625, -1.24810791015625, -0.6307373046875, -0.01336669921875, 0.60400390625, 1.22137451171875, 1.8387451171875, 2.45611572265625, 3.073486328125, 3.69085693359375, 4.3082275390625, 4.92559814453125, 5.54296875]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 19.0, 80.0, 269.0, 382.0, 177.0, 76.0, 12.0, 4.0], "bins": [-0.9345703125, -0.918818473815918, -0.9030666351318359, -0.8873147964477539, -0.8715629577636719, -0.8558111190795898, -0.8400592803955078, -0.8243074417114258, -0.8085556030273438, -0.7928037643432617, -0.7770519256591797, -0.7613000869750977, -0.7455482482910156, -0.7297964096069336, -0.7140445709228516, -0.6982927322387695, -0.6825408935546875, -0.6667890548706055, -0.6510372161865234, -0.6352853775024414, -0.6195335388183594, -0.6037817001342773, -0.5880298614501953, -0.5722780227661133, -0.5565261840820312, -0.5407743453979492, -0.5250225067138672, -0.5092706680297852, -0.4935188293457031, -0.4777669906616211, -0.46201515197753906, -0.44626331329345703, -0.430511474609375, -0.41475963592529297, -0.39900779724121094, -0.3832559585571289, -0.3675041198730469, -0.35175228118896484, -0.3360004425048828, -0.3202486038208008, -0.30449676513671875, -0.2887449264526367, -0.2729930877685547, -0.25724124908447266, -0.24148941040039062, -0.2257375717163086, -0.20998573303222656, -0.19423389434814453, -0.1784820556640625, -0.16273021697998047, -0.14697837829589844, -0.1312265396118164, -0.11547470092773438, -0.09972286224365234, -0.08397102355957031, -0.06821918487548828, -0.05246734619140625, -0.03671550750732422, -0.020963668823242188, -0.005211830139160156, 0.010540008544921875, 0.026291847229003906, 0.04204368591308594, 0.05779552459716797, 0.07354736328125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 11.0, 35.0, 108.0, 235.0, 445.0, 474826.0, 3717958.0, 472.0, 140.0, 44.0, 9.0, 5.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-52.34375, -51.297119140625, -50.25048828125, -49.203857421875, -48.1572265625, -47.110595703125, -46.06396484375, -45.017333984375, -43.970703125, -42.924072265625, -41.87744140625, -40.830810546875, -39.7841796875, -38.737548828125, -37.69091796875, -36.644287109375, -35.59765625, -34.551025390625, -33.50439453125, -32.457763671875, -31.4111328125, -30.364501953125, -29.31787109375, -28.271240234375, -27.224609375, -26.177978515625, -25.13134765625, -24.084716796875, -23.0380859375, -21.991455078125, -20.94482421875, -19.898193359375, -18.8515625, -17.804931640625, -16.75830078125, -15.711669921875, -14.6650390625, -13.618408203125, -12.57177734375, -11.525146484375, -10.478515625, -9.431884765625, -8.38525390625, -7.338623046875, -6.2919921875, -5.245361328125, -4.19873046875, -3.152099609375, -2.10546875, -1.058837890625, -0.01220703125, 1.034423828125, 2.0810546875, 3.127685546875, 4.17431640625, 5.220947265625, 6.267578125, 7.314208984375, 8.36083984375, 9.407470703125, 10.4541015625, 11.500732421875, 12.54736328125, 13.593994140625, 14.640625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 326.0, 3659.0, 79.0, 15.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.359375, -4.2670440673828125, -4.174713134765625, -4.0823822021484375, -3.99005126953125, -3.8977203369140625, -3.805389404296875, -3.7130584716796875, -3.6207275390625, -3.5283966064453125, -3.436065673828125, -3.3437347412109375, -3.25140380859375, -3.1590728759765625, -3.066741943359375, -2.9744110107421875, -2.882080078125, -2.7897491455078125, -2.697418212890625, -2.6050872802734375, -2.51275634765625, -2.4204254150390625, -2.328094482421875, -2.2357635498046875, -2.1434326171875, -2.0511016845703125, -1.958770751953125, -1.8664398193359375, -1.77410888671875, -1.6817779541015625, -1.589447021484375, -1.4971160888671875, -1.40478515625, -1.3124542236328125, -1.220123291015625, -1.1277923583984375, -1.03546142578125, -0.9431304931640625, -0.850799560546875, -0.7584686279296875, -0.6661376953125, -0.5738067626953125, -0.481475830078125, -0.3891448974609375, -0.29681396484375, -0.2044830322265625, -0.112152099609375, -0.0198211669921875, 0.072509765625, 0.1648406982421875, 0.257171630859375, 0.3495025634765625, 0.44183349609375, 0.5341644287109375, 0.626495361328125, 0.7188262939453125, 0.8111572265625, 0.9034881591796875, 0.995819091796875, 1.0881500244140625, 1.18048095703125, 1.2728118896484375, 1.365142822265625, 1.4574737548828125, 1.5498046875]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 10.0, 257.0, 715.0, 27.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.99972534179688, -63.67671203613281, -62.35369873046875, -61.03068542480469, -59.707672119140625, -58.38465881347656, -57.0616455078125, -55.73863220214844, -54.415618896484375, -53.09260559082031, -51.76959228515625, -50.44657897949219, -49.123565673828125, -47.80055236816406, -46.4775390625, -45.15452575683594, -43.83150863647461, -42.50849533081055, -41.185482025146484, -39.86246871948242, -38.53945541381836, -37.2164421081543, -35.893428802490234, -34.570411682128906, -33.247398376464844, -31.92438507080078, -30.60137176513672, -29.278358459472656, -27.955345153808594, -26.63233184814453, -25.309316635131836, -23.986303329467773, -22.663288116455078, -21.340274810791016, -20.017261505126953, -18.69424819946289, -17.371234893798828, -16.048221588134766, -14.72520637512207, -13.402193069458008, -12.079179763793945, -10.756166458129883, -9.43315315246582, -8.110138893127441, -6.787125587463379, -5.464112281799316, -4.141098499298096, -2.818084716796875, -1.4950714111328125, -0.1720578670501709, 1.1509556770324707, 2.4739692211151123, 3.796982765197754, 5.119996070861816, 6.443009853363037, 7.766023635864258, 9.08903694152832, 10.412050247192383, 11.735063552856445, 13.058077812194824, 14.381091117858887, 15.70410442352295, 17.027118682861328, 18.35013198852539, 19.673145294189453]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 2.0, 19.0, 31.0, 51.0, 86.0, 141.0, 150.0, 158.0, 133.0, 95.0, 70.0, 34.0, 17.0, 9.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-15.002391815185547, -14.516193389892578, -14.029995918273926, -13.543798446655273, -13.057600021362305, -12.571401596069336, -12.085204124450684, -11.599006652832031, -11.112808227539062, -10.626609802246094, -10.140412330627441, -9.654214859008789, -9.16801643371582, -8.681818008422852, -8.1956205368042, -7.709422588348389, -7.223224639892578, -6.737026691436768, -6.250828742980957, -5.7646307945251465, -5.278432846069336, -4.792234897613525, -4.306036949157715, -3.8198390007019043, -3.3336410522460938, -2.847443103790283, -2.3612451553344727, -1.875047206878662, -1.3888492584228516, -0.902651309967041, -0.41645336151123047, 0.06974458694458008, 0.5559425354003906, 1.0421404838562012, 1.5283384323120117, 2.0145363807678223, 2.500734329223633, 2.9869322776794434, 3.473130226135254, 3.9593281745910645, 4.445526123046875, 4.9317240715026855, 5.417922019958496, 5.904119968414307, 6.390317916870117, 6.876515865325928, 7.362713813781738, 7.848911762237549, 8.33510971069336, 8.821308135986328, 9.30750560760498, 9.793703079223633, 10.279901504516602, 10.76609992980957, 11.252297401428223, 11.738494873046875, 12.224693298339844, 12.710891723632812, 13.197089195251465, 13.683286666870117, 14.169485092163086, 14.655683517456055, 15.141880989074707, 15.62807846069336, 16.114276885986328]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 14.0, 15.0, 16.0, 22.0, 38.0, 53.0, 73.0, 94.0, 190.0, 662.0, 3291.0, 92251.0, 937413.0, 12052.0, 1584.0, 376.0, 144.0, 78.0, 59.0, 38.0, 28.0, 18.0, 10.0, 12.0, 7.0, 7.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.8671875, -7.63763427734375, -7.4080810546875, -7.17852783203125, -6.948974609375, -6.71942138671875, -6.4898681640625, -6.26031494140625, -6.03076171875, -5.80120849609375, -5.5716552734375, -5.34210205078125, -5.112548828125, -4.88299560546875, -4.6534423828125, -4.42388916015625, -4.1943359375, -3.96478271484375, -3.7352294921875, -3.50567626953125, -3.276123046875, -3.04656982421875, -2.8170166015625, -2.58746337890625, -2.35791015625, -2.12835693359375, -1.8988037109375, -1.66925048828125, -1.439697265625, -1.21014404296875, -0.9805908203125, -0.75103759765625, -0.521484375, -0.29193115234375, -0.0623779296875, 0.16717529296875, 0.396728515625, 0.62628173828125, 0.8558349609375, 1.08538818359375, 1.31494140625, 1.54449462890625, 1.7740478515625, 2.00360107421875, 2.233154296875, 2.46270751953125, 2.6922607421875, 2.92181396484375, 3.1513671875, 3.38092041015625, 3.6104736328125, 3.84002685546875, 4.069580078125, 4.29913330078125, 4.5286865234375, 4.75823974609375, 4.98779296875, 5.21734619140625, 5.4468994140625, 5.67645263671875, 5.906005859375, 6.13555908203125, 6.3651123046875, 6.59466552734375, 6.82421875]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 7.0, 32.0, 39.0, 82.0, 122.0, 171.0, 184.0, 155.0, 82.0, 79.0, 27.0, 15.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3486328125, -1.3194122314453125, -1.290191650390625, -1.2609710693359375, -1.23175048828125, -1.2025299072265625, -1.173309326171875, -1.1440887451171875, -1.1148681640625, -1.0856475830078125, -1.056427001953125, -1.0272064208984375, -0.99798583984375, -0.9687652587890625, -0.939544677734375, -0.9103240966796875, -0.881103515625, -0.8518829345703125, -0.822662353515625, -0.7934417724609375, -0.76422119140625, -0.7350006103515625, -0.705780029296875, -0.6765594482421875, -0.6473388671875, -0.6181182861328125, -0.588897705078125, -0.5596771240234375, -0.53045654296875, -0.5012359619140625, -0.472015380859375, -0.4427947998046875, -0.41357421875, -0.3843536376953125, -0.355133056640625, -0.3259124755859375, -0.29669189453125, -0.2674713134765625, -0.238250732421875, -0.2090301513671875, -0.1798095703125, -0.1505889892578125, -0.121368408203125, -0.0921478271484375, -0.06292724609375, -0.0337066650390625, -0.004486083984375, 0.0247344970703125, 0.053955078125, 0.0831756591796875, 0.112396240234375, 0.1416168212890625, 0.17083740234375, 0.2000579833984375, 0.229278564453125, 0.2584991455078125, 0.2877197265625, 0.3169403076171875, 0.346160888671875, 0.3753814697265625, 0.40460205078125, 0.4338226318359375, 0.463043212890625, 0.4922637939453125, 0.521484375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 6.0, 1.0, 6.0, 4.0, 13.0, 11.0, 8.0, 9.0, 22.0, 18.0, 34.0, 51.0, 71.0, 130.0, 369.0, 1454.0, 28794.0, 980652.0, 34574.0, 1605.0, 348.0, 125.0, 69.0, 43.0, 31.0, 22.0, 20.0, 26.0, 9.0, 5.0, 9.0, 11.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.99609375, -4.84246826171875, -4.6888427734375, -4.53521728515625, -4.381591796875, -4.22796630859375, -4.0743408203125, -3.92071533203125, -3.76708984375, -3.61346435546875, -3.4598388671875, -3.30621337890625, -3.152587890625, -2.99896240234375, -2.8453369140625, -2.69171142578125, -2.5380859375, -2.38446044921875, -2.2308349609375, -2.07720947265625, -1.923583984375, -1.76995849609375, -1.6163330078125, -1.46270751953125, -1.30908203125, -1.15545654296875, -1.0018310546875, -0.84820556640625, -0.694580078125, -0.54095458984375, -0.3873291015625, -0.23370361328125, -0.080078125, 0.07354736328125, 0.2271728515625, 0.38079833984375, 0.534423828125, 0.68804931640625, 0.8416748046875, 0.99530029296875, 1.14892578125, 1.30255126953125, 1.4561767578125, 1.60980224609375, 1.763427734375, 1.91705322265625, 2.0706787109375, 2.22430419921875, 2.3779296875, 2.53155517578125, 2.6851806640625, 2.83880615234375, 2.992431640625, 3.14605712890625, 3.2996826171875, 3.45330810546875, 3.60693359375, 3.76055908203125, 3.9141845703125, 4.06781005859375, 4.221435546875, 4.37506103515625, 4.5286865234375, 4.68231201171875, 4.8359375]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 9.0, 8.0, 11.0, 10.0, 15.0, 21.0, 26.0, 36.0, 44.0, 36.0, 52.0, 67.0, 55.0, 49.0, 59.0, 67.0, 55.0, 56.0, 50.0, 42.0, 34.0, 38.0, 26.0, 24.0, 23.0, 18.0, 12.0, 12.0, 12.0, 6.0, 7.0, 4.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5546875, -1.505615234375, -1.45654296875, -1.407470703125, -1.3583984375, -1.309326171875, -1.26025390625, -1.211181640625, -1.162109375, -1.113037109375, -1.06396484375, -1.014892578125, -0.9658203125, -0.916748046875, -0.86767578125, -0.818603515625, -0.76953125, -0.720458984375, -0.67138671875, -0.622314453125, -0.5732421875, -0.524169921875, -0.47509765625, -0.426025390625, -0.376953125, -0.327880859375, -0.27880859375, -0.229736328125, -0.1806640625, -0.131591796875, -0.08251953125, -0.033447265625, 0.015625, 0.064697265625, 0.11376953125, 0.162841796875, 0.2119140625, 0.260986328125, 0.31005859375, 0.359130859375, 0.408203125, 0.457275390625, 0.50634765625, 0.555419921875, 0.6044921875, 0.653564453125, 0.70263671875, 0.751708984375, 0.80078125, 0.849853515625, 0.89892578125, 0.947998046875, 0.9970703125, 1.046142578125, 1.09521484375, 1.144287109375, 1.193359375, 1.242431640625, 1.29150390625, 1.340576171875, 1.3896484375, 1.438720703125, 1.48779296875, 1.536865234375, 1.5859375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 9.0, 6.0, 10.0, 11.0, 41.0, 60.0, 170.0, 634.0, 6002.0, 1005879.0, 33976.0, 1243.0, 304.0, 104.0, 45.0, 27.0, 9.0, 9.0, 8.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.91796875, -5.76885986328125, -5.6197509765625, -5.47064208984375, -5.321533203125, -5.17242431640625, -5.0233154296875, -4.87420654296875, -4.72509765625, -4.57598876953125, -4.4268798828125, -4.27777099609375, -4.128662109375, -3.97955322265625, -3.8304443359375, -3.68133544921875, -3.5322265625, -3.38311767578125, -3.2340087890625, -3.08489990234375, -2.935791015625, -2.78668212890625, -2.6375732421875, -2.48846435546875, -2.33935546875, -2.19024658203125, -2.0411376953125, -1.89202880859375, -1.742919921875, -1.59381103515625, -1.4447021484375, -1.29559326171875, -1.146484375, -0.99737548828125, -0.8482666015625, -0.69915771484375, -0.550048828125, -0.40093994140625, -0.2518310546875, -0.10272216796875, 0.04638671875, 0.19549560546875, 0.3446044921875, 0.49371337890625, 0.642822265625, 0.79193115234375, 0.9410400390625, 1.09014892578125, 1.2392578125, 1.38836669921875, 1.5374755859375, 1.68658447265625, 1.835693359375, 1.98480224609375, 2.1339111328125, 2.28302001953125, 2.43212890625, 2.58123779296875, 2.7303466796875, 2.87945556640625, 3.028564453125, 3.17767333984375, 3.3267822265625, 3.47589111328125, 3.625]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 6.0, 3.0, 2.0, 2.0, 4.0, 4.0, 16.0, 14.0, 35.0, 40.0, 71.0, 76.0, 147.0, 163.0, 145.0, 79.0, 67.0, 46.0, 32.0, 20.0, 11.0, 6.0, 7.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021076202392578125, -0.00020498409867286682, -0.0001992061734199524, -0.00019342824816703796, -0.00018765032291412354, -0.0001818723976612091, -0.00017609447240829468, -0.00017031654715538025, -0.00016453862190246582, -0.0001587606966495514, -0.00015298277139663696, -0.00014720484614372253, -0.0001414269208908081, -0.00013564899563789368, -0.00012987107038497925, -0.00012409314513206482, -0.00011831521987915039, -0.00011253729462623596, -0.00010675936937332153, -0.0001009814441204071, -9.520351886749268e-05, -8.942559361457825e-05, -8.364766836166382e-05, -7.786974310874939e-05, -7.209181785583496e-05, -6.631389260292053e-05, -6.0535967350006104e-05, -5.4758042097091675e-05, -4.8980116844177246e-05, -4.320219159126282e-05, -3.742426633834839e-05, -3.164634108543396e-05, -2.586841583251953e-05, -2.0090490579605103e-05, -1.4312565326690674e-05, -8.534640073776245e-06, -2.7567148208618164e-06, 3.0212104320526123e-06, 8.799135684967041e-06, 1.457706093788147e-05, 2.03549861907959e-05, 2.6132911443710327e-05, 3.1910836696624756e-05, 3.7688761949539185e-05, 4.346668720245361e-05, 4.924461245536804e-05, 5.502253770828247e-05, 6.08004629611969e-05, 6.657838821411133e-05, 7.235631346702576e-05, 7.813423871994019e-05, 8.391216397285461e-05, 8.969008922576904e-05, 9.546801447868347e-05, 0.0001012459397315979, 0.00010702386498451233, 0.00011280179023742676, 0.00011857971549034119, 0.00012435764074325562, 0.00013013556599617004, 0.00013591349124908447, 0.0001416914165019989, 0.00014746934175491333, 0.00015324726700782776, 0.0001590251922607422]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 8.0, 12.0, 13.0, 18.0, 36.0, 42.0, 95.0, 237.0, 883.0, 6724.0, 766624.0, 268551.0, 4182.0, 737.0, 201.0, 81.0, 36.0, 24.0, 12.0, 8.0, 8.0, 8.0, 2.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.484375, -3.381622314453125, -3.27886962890625, -3.176116943359375, -3.0733642578125, -2.970611572265625, -2.86785888671875, -2.765106201171875, -2.662353515625, -2.559600830078125, -2.45684814453125, -2.354095458984375, -2.2513427734375, -2.148590087890625, -2.04583740234375, -1.943084716796875, -1.84033203125, -1.737579345703125, -1.63482666015625, -1.532073974609375, -1.4293212890625, -1.326568603515625, -1.22381591796875, -1.121063232421875, -1.018310546875, -0.915557861328125, -0.81280517578125, -0.710052490234375, -0.6072998046875, -0.504547119140625, -0.40179443359375, -0.299041748046875, -0.1962890625, -0.093536376953125, 0.00921630859375, 0.111968994140625, 0.2147216796875, 0.317474365234375, 0.42022705078125, 0.522979736328125, 0.625732421875, 0.728485107421875, 0.83123779296875, 0.933990478515625, 1.0367431640625, 1.139495849609375, 1.24224853515625, 1.345001220703125, 1.44775390625, 1.550506591796875, 1.65325927734375, 1.756011962890625, 1.8587646484375, 1.961517333984375, 2.06427001953125, 2.167022705078125, 2.269775390625, 2.372528076171875, 2.47528076171875, 2.578033447265625, 2.6807861328125, 2.783538818359375, 2.88629150390625, 2.989044189453125, 3.091796875]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 4.0, 2.0, 7.0, 5.0, 14.0, 7.0, 22.0, 37.0, 84.0, 122.0, 209.0, 218.0, 112.0, 67.0, 35.0, 17.0, 12.0, 11.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.150390625, -1.11322021484375, -1.0760498046875, -1.03887939453125, -1.001708984375, -0.96453857421875, -0.9273681640625, -0.89019775390625, -0.85302734375, -0.81585693359375, -0.7786865234375, -0.74151611328125, -0.704345703125, -0.66717529296875, -0.6300048828125, -0.59283447265625, -0.5556640625, -0.51849365234375, -0.4813232421875, -0.44415283203125, -0.406982421875, -0.36981201171875, -0.3326416015625, -0.29547119140625, -0.25830078125, -0.22113037109375, -0.1839599609375, -0.14678955078125, -0.109619140625, -0.07244873046875, -0.0352783203125, 0.00189208984375, 0.0390625, 0.07623291015625, 0.1134033203125, 0.15057373046875, 0.187744140625, 0.22491455078125, 0.2620849609375, 0.29925537109375, 0.33642578125, 0.37359619140625, 0.4107666015625, 0.44793701171875, 0.485107421875, 0.52227783203125, 0.5594482421875, 0.59661865234375, 0.6337890625, 0.67095947265625, 0.7081298828125, 0.74530029296875, 0.782470703125, 0.81964111328125, 0.8568115234375, 0.89398193359375, 0.93115234375, 0.96832275390625, 1.0054931640625, 1.04266357421875, 1.079833984375, 1.11700439453125, 1.1541748046875, 1.19134521484375, 1.228515625]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 8.0, 53.0, 419.0, 413.0, 89.0, 14.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-26.389394760131836, -25.473508834838867, -24.5576229095459, -23.64173698425293, -22.72585105895996, -21.809965133666992, -20.89407730102539, -19.978191375732422, -19.062305450439453, -18.146419525146484, -17.230533599853516, -16.314647674560547, -15.398761749267578, -14.48287582397461, -13.566988945007324, -12.651103019714355, -11.735218048095703, -10.819332122802734, -9.903446197509766, -8.987560272216797, -8.071674346923828, -7.155787944793701, -6.239901542663574, -5.3240156173706055, -4.408129692077637, -3.492243766784668, -2.57635760307312, -1.6604714393615723, -0.7445855140686035, 0.17130041122436523, 1.0871868133544922, 2.003072738647461, 2.9189605712890625, 3.8348464965820312, 4.750732421875, 5.666618824005127, 6.582504749298096, 7.4983906745910645, 8.414277076721191, 9.33016300201416, 10.246048927307129, 11.161934852600098, 12.077820777893066, 12.993707656860352, 13.90959358215332, 14.825479507446289, 15.741365432739258, 16.657251358032227, 17.573137283325195, 18.489023208618164, 19.404909133911133, 20.3207950592041, 21.23668098449707, 22.15256690979004, 23.06845474243164, 23.98434066772461, 24.900226593017578, 25.816112518310547, 26.731998443603516, 27.647884368896484, 28.563770294189453, 29.479656219482422, 30.39554214477539, 31.31142807006836, 32.22731399536133]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 8.0, 8.0, 7.0, 10.0, 17.0, 15.0, 19.0, 20.0, 37.0, 39.0, 45.0, 48.0, 47.0, 75.0, 56.0, 56.0, 71.0, 49.0, 49.0, 57.0, 46.0, 37.0, 35.0, 27.0, 27.0, 20.0, 21.0, 11.0, 9.0, 12.0, 7.0, 6.0, 1.0, 3.0, 1.0, 0.0, 2.0, 2.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.838167190551758, -9.514019966125488, -9.189872741699219, -8.86572551727295, -8.54157829284668, -8.217430114746094, -7.893282890319824, -7.569135665893555, -7.244988441467285, -6.920841217041016, -6.596693992614746, -6.272546291351318, -5.948399066925049, -5.624251842498779, -5.300104141235352, -4.975956916809082, -4.6518096923828125, -4.327662467956543, -4.003515243530273, -3.6793675422668457, -3.355220317840576, -3.0310730934143066, -2.706925630569458, -2.3827781677246094, -2.05863094329834, -1.7344835996627808, -1.4103362560272217, -1.0861889123916626, -0.7620415687561035, -0.43789422512054443, -0.11374688148498535, 0.21040058135986328, 0.5345468521118164, 0.8586941957473755, 1.1828415393829346, 1.5069888830184937, 1.8311362266540527, 2.1552834510803223, 2.479430913925171, 2.8035783767700195, 3.127725601196289, 3.4518728256225586, 3.7760202884674072, 4.100167751312256, 4.424314975738525, 4.748462200164795, 5.072609901428223, 5.396757125854492, 5.720904350280762, 6.045051574707031, 6.369198799133301, 6.6933465003967285, 7.017493724822998, 7.341640949249268, 7.665788650512695, 7.989935874938965, 8.314083099365234, 8.638230323791504, 8.962377548217773, 9.286524772644043, 9.610671997070312, 9.934820175170898, 10.258967399597168, 10.583114624023438, 10.907261848449707]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 8.0, 6.0, 4.0, 6.0, 14.0, 15.0, 21.0, 20.0, 32.0, 48.0, 68.0, 70.0, 129.0, 212.0, 518.0, 1811.0, 17013.0, 4011289.0, 157567.0, 4485.0, 679.0, 164.0, 56.0, 29.0, 8.0, 5.0, 4.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-13.125, -12.8502197265625, -12.575439453125, -12.3006591796875, -12.02587890625, -11.7510986328125, -11.476318359375, -11.2015380859375, -10.9267578125, -10.6519775390625, -10.377197265625, -10.1024169921875, -9.82763671875, -9.5528564453125, -9.278076171875, -9.0032958984375, -8.728515625, -8.4537353515625, -8.178955078125, -7.9041748046875, -7.62939453125, -7.3546142578125, -7.079833984375, -6.8050537109375, -6.5302734375, -6.2554931640625, -5.980712890625, -5.7059326171875, -5.43115234375, -5.1563720703125, -4.881591796875, -4.6068115234375, -4.33203125, -4.0572509765625, -3.782470703125, -3.5076904296875, -3.23291015625, -2.9581298828125, -2.683349609375, -2.4085693359375, -2.1337890625, -1.8590087890625, -1.584228515625, -1.3094482421875, -1.03466796875, -0.7598876953125, -0.485107421875, -0.2103271484375, 0.064453125, 0.3392333984375, 0.614013671875, 0.8887939453125, 1.16357421875, 1.4383544921875, 1.713134765625, 1.9879150390625, 2.2626953125, 2.5374755859375, 2.812255859375, 3.0870361328125, 3.36181640625, 3.6365966796875, 3.911376953125, 4.1861572265625, 4.4609375]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 3.0, 4.0, 10.0, 17.0, 40.0, 64.0, 99.0, 125.0, 171.0, 160.0, 113.0, 84.0, 51.0, 34.0, 20.0, 7.0, 5.0, 1.0, 2.0, 2.0, 2.0], "bins": [-1.435546875, -1.407806396484375, -1.38006591796875, -1.352325439453125, -1.3245849609375, -1.296844482421875, -1.26910400390625, -1.241363525390625, -1.213623046875, -1.185882568359375, -1.15814208984375, -1.130401611328125, -1.1026611328125, -1.074920654296875, -1.04718017578125, -1.019439697265625, -0.99169921875, -0.963958740234375, -0.93621826171875, -0.908477783203125, -0.8807373046875, -0.852996826171875, -0.82525634765625, -0.797515869140625, -0.769775390625, -0.742034912109375, -0.71429443359375, -0.686553955078125, -0.6588134765625, -0.631072998046875, -0.60333251953125, -0.575592041015625, -0.5478515625, -0.520111083984375, -0.49237060546875, -0.464630126953125, -0.4368896484375, -0.409149169921875, -0.38140869140625, -0.353668212890625, -0.325927734375, -0.298187255859375, -0.27044677734375, -0.242706298828125, -0.2149658203125, -0.187225341796875, -0.15948486328125, -0.131744384765625, -0.10400390625, -0.076263427734375, -0.04852294921875, -0.020782470703125, 0.0069580078125, 0.034698486328125, 0.06243896484375, 0.090179443359375, 0.117919921875, 0.145660400390625, 0.17340087890625, 0.201141357421875, 0.2288818359375, 0.256622314453125, 0.28436279296875, 0.312103271484375, 0.33984375]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 4.0, 33.0, 70.0, 210.0, 1081.0, 4188066.0, 4516.0, 206.0, 66.0, 20.0, 8.0, 1.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.4375, -63.167724609375, -61.89794921875, -60.628173828125, -59.3583984375, -58.088623046875, -56.81884765625, -55.549072265625, -54.279296875, -53.009521484375, -51.73974609375, -50.469970703125, -49.2001953125, -47.930419921875, -46.66064453125, -45.390869140625, -44.12109375, -42.851318359375, -41.58154296875, -40.311767578125, -39.0419921875, -37.772216796875, -36.50244140625, -35.232666015625, -33.962890625, -32.693115234375, -31.42333984375, -30.153564453125, -28.8837890625, -27.614013671875, -26.34423828125, -25.074462890625, -23.8046875, -22.534912109375, -21.26513671875, -19.995361328125, -18.7255859375, -17.455810546875, -16.18603515625, -14.916259765625, -13.646484375, -12.376708984375, -11.10693359375, -9.837158203125, -8.5673828125, -7.297607421875, -6.02783203125, -4.758056640625, -3.48828125, -2.218505859375, -0.94873046875, 0.321044921875, 1.5908203125, 2.860595703125, 4.13037109375, 5.400146484375, 6.669921875, 7.939697265625, 9.20947265625, 10.479248046875, 11.7490234375, 13.018798828125, 14.28857421875, 15.558349609375, 16.828125]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 17.0, 50.0, 458.0, 3389.0, 109.0, 32.0, 18.0, 1.0, 2.0, 3.0, 0.0, 1.0], "bins": [-6.02734375, -5.918327331542969, -5.8093109130859375, -5.700294494628906, -5.591278076171875, -5.482261657714844, -5.3732452392578125, -5.264228820800781, -5.15521240234375, -5.046195983886719, -4.9371795654296875, -4.828163146972656, -4.719146728515625, -4.610130310058594, -4.5011138916015625, -4.392097473144531, -4.2830810546875, -4.174064636230469, -4.0650482177734375, -3.9560317993164062, -3.847015380859375, -3.7379989624023438, -3.6289825439453125, -3.5199661254882812, -3.41094970703125, -3.3019332885742188, -3.1929168701171875, -3.0839004516601562, -2.974884033203125, -2.8658676147460938, -2.7568511962890625, -2.6478347778320312, -2.538818359375, -2.4298019409179688, -2.3207855224609375, -2.2117691040039062, -2.102752685546875, -1.9937362670898438, -1.8847198486328125, -1.7757034301757812, -1.66668701171875, -1.5576705932617188, -1.4486541748046875, -1.3396377563476562, -1.230621337890625, -1.1216049194335938, -1.0125885009765625, -0.9035720825195312, -0.7945556640625, -0.6855392456054688, -0.5765228271484375, -0.46750640869140625, -0.358489990234375, -0.24947357177734375, -0.1404571533203125, -0.03144073486328125, 0.07757568359375, 0.18659210205078125, 0.2956085205078125, 0.40462493896484375, 0.513641357421875, 0.6226577758789062, 0.7316741943359375, 0.8406906127929688, 0.94970703125]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 8.0, 13.0, 95.0, 846.0, 42.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.04470443725586, -29.828142166137695, -28.61157989501953, -27.395017623901367, -26.178455352783203, -24.96189308166504, -23.745330810546875, -22.52876853942871, -21.312206268310547, -20.095643997192383, -18.87908172607422, -17.662519454956055, -16.44595718383789, -15.229394912719727, -14.012832641601562, -12.796270370483398, -11.579708099365234, -10.36314582824707, -9.146583557128906, -7.930021286010742, -6.713459014892578, -5.496896743774414, -4.28033447265625, -3.063772201538086, -1.8472099304199219, -0.6306476593017578, 0.5859146118164062, 1.8024768829345703, 3.0190391540527344, 4.235601425170898, 5.4521636962890625, 6.668725967407227, 7.885284423828125, 9.101846694946289, 10.318408966064453, 11.534971237182617, 12.751533508300781, 13.968095779418945, 15.18465805053711, 16.401220321655273, 17.617782592773438, 18.8343448638916, 20.050907135009766, 21.26746940612793, 22.484031677246094, 23.700593948364258, 24.917156219482422, 26.133718490600586, 27.35028076171875, 28.566843032836914, 29.783405303955078, 30.999967575073242, 32.216529846191406, 33.43309020996094, 34.649654388427734, 35.86621856689453, 37.08277893066406, 38.299339294433594, 39.51590347290039, 40.73246765136719, 41.94902801513672, 43.16558837890625, 44.38215255737305, 45.598716735839844, 46.815277099609375]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 8.0, 9.0, 27.0, 40.0, 60.0, 76.0, 93.0, 117.0, 127.0, 100.0, 98.0, 88.0, 59.0, 45.0, 20.0, 18.0, 7.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.386377334594727, -13.97288990020752, -13.559402465820312, -13.145914077758789, -12.732426643371582, -12.318939208984375, -11.905450820922852, -11.491963386535645, -11.078475952148438, -10.66498851776123, -10.251501083374023, -9.8380126953125, -9.424525260925293, -9.011037826538086, -8.597549438476562, -8.184062004089355, -7.770574569702148, -7.357087135314941, -6.943599224090576, -6.530111312866211, -6.116623878479004, -5.703136444091797, -5.289648532867432, -4.876160621643066, -4.462673187255859, -4.049185752868652, -3.635697841644287, -3.222210168838501, -2.808722496032715, -2.3952348232269287, -1.9817471504211426, -1.5682594776153564, -1.1547718048095703, -0.7412841320037842, -0.32779645919799805, 0.08569121360778809, 0.4991788864135742, 0.9126665592193604, 1.3261542320251465, 1.7396419048309326, 2.1531295776367188, 2.566617250442505, 2.980104923248291, 3.393592596054077, 3.8070802688598633, 4.22056770324707, 4.6340556144714355, 5.047543525695801, 5.461030960083008, 5.874518394470215, 6.28800630569458, 6.701494216918945, 7.114981651306152, 7.528469085693359, 7.941956996917725, 8.35544490814209, 8.768932342529297, 9.182419776916504, 9.595907211303711, 10.009395599365234, 10.422883033752441, 10.836370468139648, 11.249858856201172, 11.663346290588379, 12.076833724975586]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 5.0, 6.0, 4.0, 7.0, 11.0, 9.0, 15.0, 19.0, 27.0, 43.0, 50.0, 79.0, 123.0, 186.0, 324.0, 634.0, 1592.0, 6850.0, 55422.0, 745943.0, 214990.0, 16898.0, 3124.0, 1015.0, 469.0, 267.0, 148.0, 94.0, 68.0, 48.0, 28.0, 19.0, 14.0, 14.0, 3.0, 7.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8828125, -4.73809814453125, -4.5933837890625, -4.44866943359375, -4.303955078125, -4.15924072265625, -4.0145263671875, -3.86981201171875, -3.72509765625, -3.58038330078125, -3.4356689453125, -3.29095458984375, -3.146240234375, -3.00152587890625, -2.8568115234375, -2.71209716796875, -2.5673828125, -2.42266845703125, -2.2779541015625, -2.13323974609375, -1.988525390625, -1.84381103515625, -1.6990966796875, -1.55438232421875, -1.40966796875, -1.26495361328125, -1.1202392578125, -0.97552490234375, -0.830810546875, -0.68609619140625, -0.5413818359375, -0.39666748046875, -0.251953125, -0.10723876953125, 0.0374755859375, 0.18218994140625, 0.326904296875, 0.47161865234375, 0.6163330078125, 0.76104736328125, 0.90576171875, 1.05047607421875, 1.1951904296875, 1.33990478515625, 1.484619140625, 1.62933349609375, 1.7740478515625, 1.91876220703125, 2.0634765625, 2.20819091796875, 2.3529052734375, 2.49761962890625, 2.642333984375, 2.78704833984375, 2.9317626953125, 3.07647705078125, 3.22119140625, 3.36590576171875, 3.5106201171875, 3.65533447265625, 3.800048828125, 3.94476318359375, 4.0894775390625, 4.23419189453125, 4.37890625]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 6.0, 11.0, 22.0, 33.0, 34.0, 60.0, 87.0, 99.0, 96.0, 115.0, 99.0, 92.0, 89.0, 51.0, 38.0, 36.0, 14.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.369140625, -1.3360214233398438, -1.3029022216796875, -1.2697830200195312, -1.236663818359375, -1.2035446166992188, -1.1704254150390625, -1.1373062133789062, -1.10418701171875, -1.0710678100585938, -1.0379486083984375, -1.0048294067382812, -0.971710205078125, -0.9385910034179688, -0.9054718017578125, -0.8723526000976562, -0.8392333984375, -0.8061141967773438, -0.7729949951171875, -0.7398757934570312, -0.706756591796875, -0.6736373901367188, -0.6405181884765625, -0.6073989868164062, -0.57427978515625, -0.5411605834960938, -0.5080413818359375, -0.47492218017578125, -0.441802978515625, -0.40868377685546875, -0.3755645751953125, -0.34244537353515625, -0.309326171875, -0.27620697021484375, -0.2430877685546875, -0.20996856689453125, -0.176849365234375, -0.14373016357421875, -0.1106109619140625, -0.07749176025390625, -0.04437255859375, -0.01125335693359375, 0.0218658447265625, 0.05498504638671875, 0.088104248046875, 0.12122344970703125, 0.1543426513671875, 0.18746185302734375, 0.2205810546875, 0.25370025634765625, 0.2868194580078125, 0.31993865966796875, 0.353057861328125, 0.38617706298828125, 0.4192962646484375, 0.45241546630859375, 0.48553466796875, 0.5186538696289062, 0.5517730712890625, 0.5848922729492188, 0.618011474609375, 0.6511306762695312, 0.6842498779296875, 0.7173690795898438, 0.75048828125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 5.0, 7.0, 10.0, 18.0, 16.0, 20.0, 41.0, 46.0, 74.0, 119.0, 241.0, 657.0, 2597.0, 28157.0, 832524.0, 174766.0, 7215.0, 1180.0, 379.0, 163.0, 83.0, 64.0, 42.0, 36.0, 22.0, 27.0, 9.0, 9.0, 5.0, 5.0, 6.0, 1.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.203125, -4.06182861328125, -3.9205322265625, -3.77923583984375, -3.637939453125, -3.49664306640625, -3.3553466796875, -3.21405029296875, -3.07275390625, -2.93145751953125, -2.7901611328125, -2.64886474609375, -2.507568359375, -2.36627197265625, -2.2249755859375, -2.08367919921875, -1.9423828125, -1.80108642578125, -1.6597900390625, -1.51849365234375, -1.377197265625, -1.23590087890625, -1.0946044921875, -0.95330810546875, -0.81201171875, -0.67071533203125, -0.5294189453125, -0.38812255859375, -0.246826171875, -0.10552978515625, 0.0357666015625, 0.17706298828125, 0.318359375, 0.45965576171875, 0.6009521484375, 0.74224853515625, 0.883544921875, 1.02484130859375, 1.1661376953125, 1.30743408203125, 1.44873046875, 1.59002685546875, 1.7313232421875, 1.87261962890625, 2.013916015625, 2.15521240234375, 2.2965087890625, 2.43780517578125, 2.5791015625, 2.72039794921875, 2.8616943359375, 3.00299072265625, 3.144287109375, 3.28558349609375, 3.4268798828125, 3.56817626953125, 3.70947265625, 3.85076904296875, 3.9920654296875, 4.13336181640625, 4.274658203125, 4.41595458984375, 4.5572509765625, 4.69854736328125, 4.83984375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 7.0, 4.0, 4.0, 7.0, 5.0, 10.0, 16.0, 23.0, 25.0, 33.0, 38.0, 41.0, 56.0, 55.0, 46.0, 59.0, 67.0, 68.0, 62.0, 65.0, 50.0, 52.0, 46.0, 39.0, 25.0, 26.0, 18.0, 20.0, 10.0, 9.0, 4.0, 6.0, 4.0, 0.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.626953125, -2.539031982421875, -2.45111083984375, -2.363189697265625, -2.2752685546875, -2.187347412109375, -2.09942626953125, -2.011505126953125, -1.923583984375, -1.835662841796875, -1.74774169921875, -1.659820556640625, -1.5718994140625, -1.483978271484375, -1.39605712890625, -1.308135986328125, -1.22021484375, -1.132293701171875, -1.04437255859375, -0.956451416015625, -0.8685302734375, -0.780609130859375, -0.69268798828125, -0.604766845703125, -0.516845703125, -0.428924560546875, -0.34100341796875, -0.253082275390625, -0.1651611328125, -0.077239990234375, 0.01068115234375, 0.098602294921875, 0.1865234375, 0.274444580078125, 0.36236572265625, 0.450286865234375, 0.5382080078125, 0.626129150390625, 0.71405029296875, 0.801971435546875, 0.889892578125, 0.977813720703125, 1.06573486328125, 1.153656005859375, 1.2415771484375, 1.329498291015625, 1.41741943359375, 1.505340576171875, 1.59326171875, 1.681182861328125, 1.76910400390625, 1.857025146484375, 1.9449462890625, 2.032867431640625, 2.12078857421875, 2.208709716796875, 2.296630859375, 2.384552001953125, 2.47247314453125, 2.560394287109375, 2.6483154296875, 2.736236572265625, 2.82415771484375, 2.912078857421875, 3.0]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 2.0, 1.0, 4.0, 7.0, 4.0, 5.0, 10.0, 12.0, 11.0, 14.0, 18.0, 18.0, 49.0, 63.0, 77.0, 137.0, 206.0, 336.0, 593.0, 1032.0, 1967.0, 4228.0, 10341.0, 35416.0, 203334.0, 621505.0, 127609.0, 25477.0, 8418.0, 3585.0, 1750.0, 926.0, 516.0, 320.0, 170.0, 111.0, 76.0, 62.0, 38.0, 30.0, 16.0, 20.0, 6.0, 10.0, 4.0, 4.0, 7.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.80224609375, -0.7789154052734375, -0.755584716796875, -0.7322540283203125, -0.70892333984375, -0.6855926513671875, -0.662261962890625, -0.6389312744140625, -0.6156005859375, -0.5922698974609375, -0.568939208984375, -0.5456085205078125, -0.52227783203125, -0.4989471435546875, -0.475616455078125, -0.4522857666015625, -0.428955078125, -0.4056243896484375, -0.382293701171875, -0.3589630126953125, -0.33563232421875, -0.3123016357421875, -0.288970947265625, -0.2656402587890625, -0.2423095703125, -0.2189788818359375, -0.195648193359375, -0.1723175048828125, -0.14898681640625, -0.1256561279296875, -0.102325439453125, -0.0789947509765625, -0.0556640625, -0.0323333740234375, -0.009002685546875, 0.0143280029296875, 0.03765869140625, 0.0609893798828125, 0.084320068359375, 0.1076507568359375, 0.1309814453125, 0.1543121337890625, 0.177642822265625, 0.2009735107421875, 0.22430419921875, 0.2476348876953125, 0.270965576171875, 0.2942962646484375, 0.317626953125, 0.3409576416015625, 0.364288330078125, 0.3876190185546875, 0.41094970703125, 0.4342803955078125, 0.457611083984375, 0.4809417724609375, 0.5042724609375, 0.5276031494140625, 0.550933837890625, 0.5742645263671875, 0.59759521484375, 0.6209259033203125, 0.644256591796875, 0.6675872802734375, 0.69091796875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 0.0, 2.0, 3.0, 1.0, 6.0, 4.0, 9.0, 8.0, 11.0, 23.0, 25.0, 26.0, 48.0, 72.0, 111.0, 129.0, 174.0, 124.0, 78.0, 51.0, 31.0, 12.0, 19.0, 15.0, 7.0, 3.0, 4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001704692840576172, -0.00016530044376850128, -0.00016013160347938538, -0.00015496276319026947, -0.00014979392290115356, -0.00014462508261203766, -0.00013945624232292175, -0.00013428740203380585, -0.00012911856174468994, -0.00012394972145557404, -0.00011878088116645813, -0.00011361204087734222, -0.00010844320058822632, -0.00010327436029911041, -9.810552000999451e-05, -9.29366797208786e-05, -8.77678394317627e-05, -8.259899914264679e-05, -7.743015885353088e-05, -7.226131856441498e-05, -6.709247827529907e-05, -6.192363798618317e-05, -5.675479769706726e-05, -5.1585957407951355e-05, -4.641711711883545e-05, -4.1248276829719543e-05, -3.607943654060364e-05, -3.091059625148773e-05, -2.5741755962371826e-05, -2.057291567325592e-05, -1.5404075384140015e-05, -1.0235235095024109e-05, -5.066394805908203e-06, 1.0244548320770264e-07, 5.271285772323608e-06, 1.0440126061439514e-05, 1.560896635055542e-05, 2.0777806639671326e-05, 2.594664692878723e-05, 3.111548721790314e-05, 3.628432750701904e-05, 4.145316779613495e-05, 4.6622008085250854e-05, 5.179084837436676e-05, 5.6959688663482666e-05, 6.212852895259857e-05, 6.729736924171448e-05, 7.246620953083038e-05, 7.763504981994629e-05, 8.28038901090622e-05, 8.79727303981781e-05, 9.3141570687294e-05, 9.831041097640991e-05, 0.00010347925126552582, 0.00010864809155464172, 0.00011381693184375763, 0.00011898577213287354, 0.00012415461242198944, 0.00012932345271110535, 0.00013449229300022125, 0.00013966113328933716, 0.00014482997357845306, 0.00014999881386756897, 0.00015516765415668488, 0.00016033649444580078]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 2.0, 1.0, 5.0, 2.0, 3.0, 3.0, 4.0, 13.0, 16.0, 31.0, 28.0, 43.0, 63.0, 100.0, 152.0, 305.0, 496.0, 1011.0, 2012.0, 4510.0, 11595.0, 50170.0, 519574.0, 400299.0, 40123.0, 10050.0, 3991.0, 1793.0, 929.0, 526.0, 283.0, 138.0, 94.0, 74.0, 32.0, 38.0, 16.0, 8.0, 9.0, 9.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.8876953125, -0.8590011596679688, -0.8303070068359375, -0.8016128540039062, -0.772918701171875, -0.7442245483398438, -0.7155303955078125, -0.6868362426757812, -0.65814208984375, -0.6294479370117188, -0.6007537841796875, -0.5720596313476562, -0.543365478515625, -0.5146713256835938, -0.4859771728515625, -0.45728302001953125, -0.4285888671875, -0.39989471435546875, -0.3712005615234375, -0.34250640869140625, -0.313812255859375, -0.28511810302734375, -0.2564239501953125, -0.22772979736328125, -0.19903564453125, -0.17034149169921875, -0.1416473388671875, -0.11295318603515625, -0.084259033203125, -0.05556488037109375, -0.0268707275390625, 0.00182342529296875, 0.030517578125, 0.05921173095703125, 0.0879058837890625, 0.11660003662109375, 0.145294189453125, 0.17398834228515625, 0.2026824951171875, 0.23137664794921875, 0.26007080078125, 0.28876495361328125, 0.3174591064453125, 0.34615325927734375, 0.374847412109375, 0.40354156494140625, 0.4322357177734375, 0.46092987060546875, 0.4896240234375, 0.5183181762695312, 0.5470123291015625, 0.5757064819335938, 0.604400634765625, 0.6330947875976562, 0.6617889404296875, 0.6904830932617188, 0.71917724609375, 0.7478713989257812, 0.7765655517578125, 0.8052597045898438, 0.833953857421875, 0.8626480102539062, 0.8913421630859375, 0.9200363159179688, 0.94873046875]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 3.0, 5.0, 1.0, 3.0, 7.0, 12.0, 17.0, 24.0, 32.0, 35.0, 57.0, 69.0, 85.0, 126.0, 115.0, 87.0, 95.0, 69.0, 50.0, 35.0, 20.0, 17.0, 13.0, 8.0, 4.0, 5.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.646484375, -0.62579345703125, -0.6051025390625, -0.58441162109375, -0.563720703125, -0.54302978515625, -0.5223388671875, -0.50164794921875, -0.48095703125, -0.46026611328125, -0.4395751953125, -0.41888427734375, -0.398193359375, -0.37750244140625, -0.3568115234375, -0.33612060546875, -0.3154296875, -0.29473876953125, -0.2740478515625, -0.25335693359375, -0.232666015625, -0.21197509765625, -0.1912841796875, -0.17059326171875, -0.14990234375, -0.12921142578125, -0.1085205078125, -0.08782958984375, -0.067138671875, -0.04644775390625, -0.0257568359375, -0.00506591796875, 0.015625, 0.03631591796875, 0.0570068359375, 0.07769775390625, 0.098388671875, 0.11907958984375, 0.1397705078125, 0.16046142578125, 0.18115234375, 0.20184326171875, 0.2225341796875, 0.24322509765625, 0.263916015625, 0.28460693359375, 0.3052978515625, 0.32598876953125, 0.3466796875, 0.36737060546875, 0.3880615234375, 0.40875244140625, 0.429443359375, 0.45013427734375, 0.4708251953125, 0.49151611328125, 0.51220703125, 0.53289794921875, 0.5535888671875, 0.57427978515625, 0.594970703125, 0.61566162109375, 0.6363525390625, 0.65704345703125, 0.677734375]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 9.0, 48.0, 419.0, 447.0, 66.0, 14.0, 6.0, 4.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-43.79530715942383, -42.38101577758789, -40.96672821044922, -39.55243682861328, -38.138145446777344, -36.723854064941406, -35.309566497802734, -33.8952751159668, -32.480987548828125, -31.06669807434082, -29.652406692504883, -28.238117218017578, -26.82382583618164, -25.409536361694336, -23.99524688720703, -22.580955505371094, -21.166664123535156, -19.75237464904785, -18.338083267211914, -16.92379379272461, -15.509503364562988, -14.095212936401367, -12.680923461914062, -11.266633033752441, -9.85234260559082, -8.4380521774292, -7.023762226104736, -5.609472274780273, -4.195181846618652, -2.7808914184570312, -1.3666019439697266, 0.04768848419189453, 1.46197509765625, 2.876265287399292, 4.290555477142334, 5.704845428466797, 7.119135856628418, 8.533426284790039, 9.947715759277344, 11.362006187438965, 12.776296615600586, 14.190587043762207, 15.604877471923828, 17.019166946411133, 18.433456420898438, 19.847747802734375, 21.26203727722168, 22.676326751708984, 24.090618133544922, 25.504907608032227, 26.919198989868164, 28.33348846435547, 29.747779846191406, 31.16206932067871, 32.576358795166016, 33.99065017700195, 35.404937744140625, 36.81922912597656, 38.233516693115234, 39.64780807495117, 41.06209945678711, 42.47638702392578, 43.89067840576172, 45.304969787597656, 46.719261169433594]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 4.0, 1.0, 4.0, 6.0, 9.0, 11.0, 10.0, 16.0, 15.0, 28.0, 29.0, 24.0, 40.0, 41.0, 33.0, 38.0, 40.0, 63.0, 43.0, 62.0, 54.0, 61.0, 44.0, 47.0, 47.0, 41.0, 36.0, 29.0, 27.0, 17.0, 16.0, 15.0, 12.0, 8.0, 8.0, 0.0, 2.0, 10.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-13.20128345489502, -12.788833618164062, -12.376384735107422, -11.963934898376465, -11.551485061645508, -11.13903522491455, -10.726585388183594, -10.314136505126953, -9.901686668395996, -9.489236831665039, -9.076787948608398, -8.664338111877441, -8.251888275146484, -7.839438438415527, -7.4269890785217285, -7.01453971862793, -6.602089881896973, -6.189640045166016, -5.777190685272217, -5.364741325378418, -4.952291488647461, -4.539841651916504, -4.127392292022705, -3.714942693710327, -3.302493095397949, -2.8900434970855713, -2.4775938987731934, -2.0651443004608154, -1.6526947021484375, -1.2402451038360596, -0.8277955055236816, -0.4153459072113037, -0.0028972625732421875, 0.40955233573913574, 0.8220019340515137, 1.2344515323638916, 1.6469011306762695, 2.0593507289886475, 2.4718003273010254, 2.8842499256134033, 3.2966995239257812, 3.709149122238159, 4.121598720550537, 4.534048080444336, 4.946497917175293, 5.35894775390625, 5.771397113800049, 6.183846473693848, 6.596296310424805, 7.008746147155762, 7.4211955070495605, 7.833644866943359, 8.246094703674316, 8.658544540405273, 9.070993423461914, 9.483443260192871, 9.895893096923828, 10.308342933654785, 10.720792770385742, 11.133241653442383, 11.54569149017334, 11.958141326904297, 12.370590209960938, 12.783040046691895, 13.195489883422852]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 0.0, 6.0, 0.0, 2.0, 2.0, 4.0, 3.0, 6.0, 4.0, 11.0, 11.0, 8.0, 20.0, 28.0, 54.0, 63.0, 132.0, 181.0, 342.0, 647.0, 1552.0, 6168.0, 56015.0, 4030286.0, 88696.0, 7306.0, 1631.0, 596.0, 246.0, 117.0, 44.0, 34.0, 23.0, 8.0, 13.0, 7.0, 11.0, 4.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.33203125, -6.15399169921875, -5.9759521484375, -5.79791259765625, -5.619873046875, -5.44183349609375, -5.2637939453125, -5.08575439453125, -4.90771484375, -4.72967529296875, -4.5516357421875, -4.37359619140625, -4.195556640625, -4.01751708984375, -3.8394775390625, -3.66143798828125, -3.4833984375, -3.30535888671875, -3.1273193359375, -2.94927978515625, -2.771240234375, -2.59320068359375, -2.4151611328125, -2.23712158203125, -2.05908203125, -1.88104248046875, -1.7030029296875, -1.52496337890625, -1.346923828125, -1.16888427734375, -0.9908447265625, -0.81280517578125, -0.634765625, -0.45672607421875, -0.2786865234375, -0.10064697265625, 0.077392578125, 0.25543212890625, 0.4334716796875, 0.61151123046875, 0.78955078125, 0.96759033203125, 1.1456298828125, 1.32366943359375, 1.501708984375, 1.67974853515625, 1.8577880859375, 2.03582763671875, 2.2138671875, 2.39190673828125, 2.5699462890625, 2.74798583984375, 2.926025390625, 3.10406494140625, 3.2821044921875, 3.46014404296875, 3.63818359375, 3.81622314453125, 3.9942626953125, 4.17230224609375, 4.350341796875, 4.52838134765625, 4.7064208984375, 4.88446044921875, 5.0625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 12.0, 16.0, 32.0, 52.0, 40.0, 87.0, 107.0, 89.0, 90.0, 105.0, 85.0, 76.0, 69.0, 48.0, 32.0, 25.0, 13.0, 5.0, 3.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0], "bins": [-1.3955078125, -1.3634490966796875, -1.331390380859375, -1.2993316650390625, -1.26727294921875, -1.2352142333984375, -1.203155517578125, -1.1710968017578125, -1.1390380859375, -1.1069793701171875, -1.074920654296875, -1.0428619384765625, -1.01080322265625, -0.9787445068359375, -0.946685791015625, -0.9146270751953125, -0.882568359375, -0.8505096435546875, -0.818450927734375, -0.7863922119140625, -0.75433349609375, -0.7222747802734375, -0.690216064453125, -0.6581573486328125, -0.6260986328125, -0.5940399169921875, -0.561981201171875, -0.5299224853515625, -0.49786376953125, -0.4658050537109375, -0.433746337890625, -0.4016876220703125, -0.36962890625, -0.3375701904296875, -0.305511474609375, -0.2734527587890625, -0.24139404296875, -0.2093353271484375, -0.177276611328125, -0.1452178955078125, -0.1131591796875, -0.0811004638671875, -0.049041748046875, -0.0169830322265625, 0.01507568359375, 0.0471343994140625, 0.079193115234375, 0.1112518310546875, 0.143310546875, 0.1753692626953125, 0.207427978515625, 0.2394866943359375, 0.27154541015625, 0.3036041259765625, 0.335662841796875, 0.3677215576171875, 0.3997802734375, 0.4318389892578125, 0.463897705078125, 0.4959564208984375, 0.52801513671875, 0.5600738525390625, 0.592132568359375, 0.6241912841796875, 0.65625]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 15.0, 53.0, 125.0, 478.0, 6900.0, 4183738.0, 2437.0, 351.0, 133.0, 38.0, 13.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.265625, -18.651611328125, -18.03759765625, -17.423583984375, -16.8095703125, -16.195556640625, -15.58154296875, -14.967529296875, -14.353515625, -13.739501953125, -13.12548828125, -12.511474609375, -11.8974609375, -11.283447265625, -10.66943359375, -10.055419921875, -9.44140625, -8.827392578125, -8.21337890625, -7.599365234375, -6.9853515625, -6.371337890625, -5.75732421875, -5.143310546875, -4.529296875, -3.915283203125, -3.30126953125, -2.687255859375, -2.0732421875, -1.459228515625, -0.84521484375, -0.231201171875, 0.3828125, 0.996826171875, 1.61083984375, 2.224853515625, 2.8388671875, 3.452880859375, 4.06689453125, 4.680908203125, 5.294921875, 5.908935546875, 6.52294921875, 7.136962890625, 7.7509765625, 8.364990234375, 8.97900390625, 9.593017578125, 10.20703125, 10.821044921875, 11.43505859375, 12.049072265625, 12.6630859375, 13.277099609375, 13.89111328125, 14.505126953125, 15.119140625, 15.733154296875, 16.34716796875, 16.961181640625, 17.5751953125, 18.189208984375, 18.80322265625, 19.417236328125, 20.03125]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 9.0, 46.0, 214.0, 3263.0, 464.0, 53.0, 20.0, 8.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.08203125, -2.99322509765625, -2.9044189453125, -2.81561279296875, -2.726806640625, -2.63800048828125, -2.5491943359375, -2.46038818359375, -2.37158203125, -2.28277587890625, -2.1939697265625, -2.10516357421875, -2.016357421875, -1.92755126953125, -1.8387451171875, -1.74993896484375, -1.6611328125, -1.57232666015625, -1.4835205078125, -1.39471435546875, -1.305908203125, -1.21710205078125, -1.1282958984375, -1.03948974609375, -0.95068359375, -0.86187744140625, -0.7730712890625, -0.68426513671875, -0.595458984375, -0.50665283203125, -0.4178466796875, -0.32904052734375, -0.240234375, -0.15142822265625, -0.0626220703125, 0.02618408203125, 0.114990234375, 0.20379638671875, 0.2926025390625, 0.38140869140625, 0.47021484375, 0.55902099609375, 0.6478271484375, 0.73663330078125, 0.825439453125, 0.91424560546875, 1.0030517578125, 1.09185791015625, 1.1806640625, 1.26947021484375, 1.3582763671875, 1.44708251953125, 1.535888671875, 1.62469482421875, 1.7135009765625, 1.80230712890625, 1.89111328125, 1.97991943359375, 2.0687255859375, 2.15753173828125, 2.246337890625, 2.33514404296875, 2.4239501953125, 2.51275634765625, 2.6015625]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 4.0, 40.0, 948.0, 17.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-59.586273193359375, -58.275943756103516, -56.965614318847656, -55.6552848815918, -54.34495544433594, -53.03462600708008, -51.72429656982422, -50.413970947265625, -49.1036376953125, -47.79330825805664, -46.48297882080078, -45.17264938354492, -43.86231994628906, -42.5519905090332, -41.241661071777344, -39.93133544921875, -38.62100601196289, -37.31067657470703, -36.00034713745117, -34.69001770019531, -33.37968826293945, -32.069358825683594, -30.759031295776367, -29.448701858520508, -28.13837242126465, -26.82804298400879, -25.51771354675293, -24.20738410949707, -22.897056579589844, -21.586727142333984, -20.276397705078125, -18.966068267822266, -17.655742645263672, -16.345413208007812, -15.035083770751953, -13.72475528717041, -12.41442584991455, -11.104096412658691, -9.793767929077148, -8.483438491821289, -7.17310905456543, -5.86277961730957, -4.552450656890869, -3.242121458053589, -1.9317922592163086, -0.6214628219604492, 0.688866138458252, 1.9991950988769531, 3.3095245361328125, 4.619853973388672, 5.930182933807373, 7.240511894226074, 8.550841331481934, 9.861170768737793, 11.171499252319336, 12.481828689575195, 13.792158126831055, 15.102487564086914, 16.412817001342773, 17.723146438598633, 19.03347396850586, 20.34380340576172, 21.654132843017578, 22.964462280273438, 24.274791717529297]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 4.0, 4.0, 3.0, 5.0, 14.0, 24.0, 18.0, 36.0, 30.0, 58.0, 43.0, 71.0, 75.0, 75.0, 81.0, 63.0, 77.0, 71.0, 54.0, 58.0, 41.0, 31.0, 23.0, 15.0, 5.0, 4.0, 5.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.693879127502441, -4.563742637634277, -4.433605670928955, -4.303469181060791, -4.173332214355469, -4.043195724487305, -3.9130592346191406, -3.7829225063323975, -3.6527857780456543, -3.522649049758911, -3.392512321472168, -3.262375831604004, -3.1322391033172607, -3.0021023750305176, -2.8719658851623535, -2.7418291568756104, -2.611692428588867, -2.481555700302124, -2.351418972015381, -2.221282482147217, -2.0911457538604736, -1.9610090255737305, -1.8308724164962769, -1.7007358074188232, -1.57059907913208, -1.440462350845337, -1.3103257417678833, -1.1801891326904297, -1.0500524044036865, -0.9199157357215881, -0.7897790670394897, -0.6596423983573914, -0.5295052528381348, -0.3993685841560364, -0.269231915473938, -0.1390952467918396, -0.008958578109741211, 0.12117809057235718, 0.25131475925445557, 0.38145142793655396, 0.5115880966186523, 0.6417247653007507, 0.7718614339828491, 0.9019981026649475, 1.032134771347046, 1.162271499633789, 1.2924081087112427, 1.4225447177886963, 1.5526814460754395, 1.6828181743621826, 1.8129547834396362, 1.9430913925170898, 2.073228120803833, 2.203364849090576, 2.3335013389587402, 2.4636380672454834, 2.5937747955322266, 2.7239115238189697, 2.854048252105713, 2.984184741973877, 3.11432147026062, 3.2444581985473633, 3.3745946884155273, 3.5047314167022705, 3.6348681449890137]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 4.0, 1.0, 3.0, 5.0, 5.0, 3.0, 8.0, 16.0, 14.0, 23.0, 43.0, 50.0, 72.0, 118.0, 179.0, 355.0, 659.0, 1521.0, 4683.0, 26590.0, 297056.0, 643384.0, 61152.0, 8432.0, 2241.0, 897.0, 420.0, 201.0, 151.0, 91.0, 52.0, 34.0, 31.0, 19.0, 16.0, 9.0, 5.0, 7.0, 4.0, 7.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-4.13671875, -4.025482177734375, -3.91424560546875, -3.803009033203125, -3.6917724609375, -3.580535888671875, -3.46929931640625, -3.358062744140625, -3.246826171875, -3.135589599609375, -3.02435302734375, -2.913116455078125, -2.8018798828125, -2.690643310546875, -2.57940673828125, -2.468170166015625, -2.35693359375, -2.245697021484375, -2.13446044921875, -2.023223876953125, -1.9119873046875, -1.800750732421875, -1.68951416015625, -1.578277587890625, -1.467041015625, -1.355804443359375, -1.24456787109375, -1.133331298828125, -1.0220947265625, -0.910858154296875, -0.79962158203125, -0.688385009765625, -0.5771484375, -0.465911865234375, -0.35467529296875, -0.243438720703125, -0.1322021484375, -0.020965576171875, 0.09027099609375, 0.201507568359375, 0.312744140625, 0.423980712890625, 0.53521728515625, 0.646453857421875, 0.7576904296875, 0.868927001953125, 0.98016357421875, 1.091400146484375, 1.20263671875, 1.313873291015625, 1.42510986328125, 1.536346435546875, 1.6475830078125, 1.758819580078125, 1.87005615234375, 1.981292724609375, 2.092529296875, 2.203765869140625, 2.31500244140625, 2.426239013671875, 2.5374755859375, 2.648712158203125, 2.75994873046875, 2.871185302734375, 2.982421875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 3.0, 6.0, 6.0, 23.0, 17.0, 46.0, 51.0, 67.0, 55.0, 84.0, 95.0, 105.0, 87.0, 76.0, 83.0, 48.0, 42.0, 41.0, 25.0, 19.0, 8.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.224609375, -1.1949844360351562, -1.1653594970703125, -1.1357345581054688, -1.106109619140625, -1.0764846801757812, -1.0468597412109375, -1.0172348022460938, -0.98760986328125, -0.9579849243164062, -0.9283599853515625, -0.8987350463867188, -0.869110107421875, -0.8394851684570312, -0.8098602294921875, -0.7802352905273438, -0.7506103515625, -0.7209854125976562, -0.6913604736328125, -0.6617355346679688, -0.632110595703125, -0.6024856567382812, -0.5728607177734375, -0.5432357788085938, -0.51361083984375, -0.48398590087890625, -0.4543609619140625, -0.42473602294921875, -0.395111083984375, -0.36548614501953125, -0.3358612060546875, -0.30623626708984375, -0.276611328125, -0.24698638916015625, -0.2173614501953125, -0.18773651123046875, -0.158111572265625, -0.12848663330078125, -0.0988616943359375, -0.06923675537109375, -0.03961181640625, -0.00998687744140625, 0.0196380615234375, 0.04926300048828125, 0.078887939453125, 0.10851287841796875, 0.1381378173828125, 0.16776275634765625, 0.1973876953125, 0.22701263427734375, 0.2566375732421875, 0.28626251220703125, 0.315887451171875, 0.34551239013671875, 0.3751373291015625, 0.40476226806640625, 0.43438720703125, 0.46401214599609375, 0.4936370849609375, 0.5232620239257812, 0.552886962890625, 0.5825119018554688, 0.6121368408203125, 0.6417617797851562, 0.67138671875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 8.0, 5.0, 7.0, 10.0, 16.0, 25.0, 51.0, 65.0, 122.0, 241.0, 783.0, 3119.0, 35558.0, 894609.0, 106496.0, 5637.0, 1089.0, 364.0, 134.0, 71.0, 51.0, 26.0, 18.0, 18.0, 9.0, 6.0, 6.0, 4.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.4375, -6.27093505859375, -6.1043701171875, -5.93780517578125, -5.771240234375, -5.60467529296875, -5.4381103515625, -5.27154541015625, -5.10498046875, -4.93841552734375, -4.7718505859375, -4.60528564453125, -4.438720703125, -4.27215576171875, -4.1055908203125, -3.93902587890625, -3.7724609375, -3.60589599609375, -3.4393310546875, -3.27276611328125, -3.106201171875, -2.93963623046875, -2.7730712890625, -2.60650634765625, -2.43994140625, -2.27337646484375, -2.1068115234375, -1.94024658203125, -1.773681640625, -1.60711669921875, -1.4405517578125, -1.27398681640625, -1.107421875, -0.94085693359375, -0.7742919921875, -0.60772705078125, -0.441162109375, -0.27459716796875, -0.1080322265625, 0.05853271484375, 0.22509765625, 0.39166259765625, 0.5582275390625, 0.72479248046875, 0.891357421875, 1.05792236328125, 1.2244873046875, 1.39105224609375, 1.5576171875, 1.72418212890625, 1.8907470703125, 2.05731201171875, 2.223876953125, 2.39044189453125, 2.5570068359375, 2.72357177734375, 2.89013671875, 3.05670166015625, 3.2232666015625, 3.38983154296875, 3.556396484375, 3.72296142578125, 3.8895263671875, 4.05609130859375, 4.22265625]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 10.0, 3.0, 7.0, 7.0, 11.0, 18.0, 31.0, 39.0, 46.0, 44.0, 63.0, 90.0, 96.0, 81.0, 69.0, 83.0, 86.0, 51.0, 49.0, 27.0, 24.0, 15.0, 19.0, 8.0, 8.0, 5.0, 6.0, 2.0, 0.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.85546875, -4.72930908203125, -4.6031494140625, -4.47698974609375, -4.350830078125, -4.22467041015625, -4.0985107421875, -3.97235107421875, -3.84619140625, -3.72003173828125, -3.5938720703125, -3.46771240234375, -3.341552734375, -3.21539306640625, -3.0892333984375, -2.96307373046875, -2.8369140625, -2.71075439453125, -2.5845947265625, -2.45843505859375, -2.332275390625, -2.20611572265625, -2.0799560546875, -1.95379638671875, -1.82763671875, -1.70147705078125, -1.5753173828125, -1.44915771484375, -1.322998046875, -1.19683837890625, -1.0706787109375, -0.94451904296875, -0.818359375, -0.69219970703125, -0.5660400390625, -0.43988037109375, -0.313720703125, -0.18756103515625, -0.0614013671875, 0.06475830078125, 0.19091796875, 0.31707763671875, 0.4432373046875, 0.56939697265625, 0.695556640625, 0.82171630859375, 0.9478759765625, 1.07403564453125, 1.2001953125, 1.32635498046875, 1.4525146484375, 1.57867431640625, 1.704833984375, 1.83099365234375, 1.9571533203125, 2.08331298828125, 2.20947265625, 2.33563232421875, 2.4617919921875, 2.58795166015625, 2.714111328125, 2.84027099609375, 2.9664306640625, 3.09259033203125, 3.21875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 8.0, 8.0, 6.0, 15.0, 17.0, 34.0, 46.0, 67.0, 129.0, 222.0, 374.0, 808.0, 1716.0, 4650.0, 17660.0, 113284.0, 688008.0, 186710.0, 24740.0, 6069.0, 2089.0, 867.0, 443.0, 223.0, 136.0, 76.0, 46.0, 40.0, 16.0, 16.0, 8.0, 8.0, 3.0, 3.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.80615234375, -0.7789230346679688, -0.7516937255859375, -0.7244644165039062, -0.697235107421875, -0.6700057983398438, -0.6427764892578125, -0.6155471801757812, -0.58831787109375, -0.5610885620117188, -0.5338592529296875, -0.5066299438476562, -0.479400634765625, -0.45217132568359375, -0.4249420166015625, -0.39771270751953125, -0.3704833984375, -0.34325408935546875, -0.3160247802734375, -0.28879547119140625, -0.261566162109375, -0.23433685302734375, -0.2071075439453125, -0.17987823486328125, -0.15264892578125, -0.12541961669921875, -0.0981903076171875, -0.07096099853515625, -0.043731689453125, -0.01650238037109375, 0.0107269287109375, 0.03795623779296875, 0.065185546875, 0.09241485595703125, 0.1196441650390625, 0.14687347412109375, 0.174102783203125, 0.20133209228515625, 0.2285614013671875, 0.25579071044921875, 0.28302001953125, 0.31024932861328125, 0.3374786376953125, 0.36470794677734375, 0.391937255859375, 0.41916656494140625, 0.4463958740234375, 0.47362518310546875, 0.5008544921875, 0.5280838012695312, 0.5553131103515625, 0.5825424194335938, 0.609771728515625, 0.6370010375976562, 0.6642303466796875, 0.6914596557617188, 0.71868896484375, 0.7459182739257812, 0.7731475830078125, 0.8003768920898438, 0.827606201171875, 0.8548355102539062, 0.8820648193359375, 0.9092941284179688, 0.9365234375]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 6.0, 4.0, 7.0, 14.0, 10.0, 13.0, 29.0, 49.0, 51.0, 100.0, 121.0, 142.0, 135.0, 102.0, 65.0, 54.0, 35.0, 17.0, 7.0, 9.0, 5.0, 7.0, 6.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.0002319812774658203, -0.00022643059492111206, -0.0002208799123764038, -0.00021532922983169556, -0.0002097785472869873, -0.00020422786474227905, -0.0001986771821975708, -0.00019312649965286255, -0.0001875758171081543, -0.00018202513456344604, -0.0001764744520187378, -0.00017092376947402954, -0.0001653730869293213, -0.00015982240438461304, -0.00015427172183990479, -0.00014872103929519653, -0.00014317035675048828, -0.00013761967420578003, -0.00013206899166107178, -0.00012651830911636353, -0.00012096762657165527, -0.00011541694402694702, -0.00010986626148223877, -0.00010431557893753052, -9.876489639282227e-05, -9.321421384811401e-05, -8.766353130340576e-05, -8.211284875869751e-05, -7.656216621398926e-05, -7.1011483669281e-05, -6.546080112457275e-05, -5.99101185798645e-05, -5.435943603515625e-05, -4.8808753490448e-05, -4.3258070945739746e-05, -3.7707388401031494e-05, -3.215670585632324e-05, -2.660602331161499e-05, -2.1055340766906738e-05, -1.5504658222198486e-05, -9.953975677490234e-06, -4.403293132781982e-06, 1.1473894119262695e-06, 6.6980719566345215e-06, 1.2248754501342773e-05, 1.7799437046051025e-05, 2.3350119590759277e-05, 2.890080213546753e-05, 3.445148468017578e-05, 4.000216722488403e-05, 4.5552849769592285e-05, 5.110353231430054e-05, 5.665421485900879e-05, 6.220489740371704e-05, 6.775557994842529e-05, 7.330626249313354e-05, 7.88569450378418e-05, 8.440762758255005e-05, 8.99583101272583e-05, 9.550899267196655e-05, 0.0001010596752166748, 0.00010661035776138306, 0.00011216104030609131, 0.00011771172285079956, 0.0001232624053955078]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 6.0, 7.0, 11.0, 9.0, 14.0, 16.0, 31.0, 44.0, 89.0, 178.0, 408.0, 941.0, 2321.0, 8719.0, 48029.0, 480228.0, 450407.0, 44653.0, 8417.0, 2345.0, 855.0, 370.0, 185.0, 97.0, 54.0, 35.0, 22.0, 15.0, 3.0, 10.0, 7.0, 7.0, 1.0, 6.0, 6.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.94287109375, -0.91339111328125, -0.8839111328125, -0.85443115234375, -0.824951171875, -0.79547119140625, -0.7659912109375, -0.73651123046875, -0.70703125, -0.67755126953125, -0.6480712890625, -0.61859130859375, -0.589111328125, -0.55963134765625, -0.5301513671875, -0.50067138671875, -0.47119140625, -0.44171142578125, -0.4122314453125, -0.38275146484375, -0.353271484375, -0.32379150390625, -0.2943115234375, -0.26483154296875, -0.2353515625, -0.20587158203125, -0.1763916015625, -0.14691162109375, -0.117431640625, -0.08795166015625, -0.0584716796875, -0.02899169921875, 0.00048828125, 0.02996826171875, 0.0594482421875, 0.08892822265625, 0.118408203125, 0.14788818359375, 0.1773681640625, 0.20684814453125, 0.236328125, 0.26580810546875, 0.2952880859375, 0.32476806640625, 0.354248046875, 0.38372802734375, 0.4132080078125, 0.44268798828125, 0.47216796875, 0.50164794921875, 0.5311279296875, 0.56060791015625, 0.590087890625, 0.61956787109375, 0.6490478515625, 0.67852783203125, 0.7080078125, 0.73748779296875, 0.7669677734375, 0.79644775390625, 0.825927734375, 0.85540771484375, 0.8848876953125, 0.91436767578125, 0.94384765625]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 2.0, 10.0, 13.0, 23.0, 29.0, 41.0, 61.0, 88.0, 110.0, 129.0, 151.0, 87.0, 70.0, 63.0, 35.0, 22.0, 21.0, 10.0, 9.0, 7.0, 2.0, 6.0, 2.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.78662109375, -0.7618637084960938, -0.7371063232421875, -0.7123489379882812, -0.687591552734375, -0.6628341674804688, -0.6380767822265625, -0.6133193969726562, -0.58856201171875, -0.5638046264648438, -0.5390472412109375, -0.5142898559570312, -0.489532470703125, -0.46477508544921875, -0.4400177001953125, -0.41526031494140625, -0.3905029296875, -0.36574554443359375, -0.3409881591796875, -0.31623077392578125, -0.291473388671875, -0.26671600341796875, -0.2419586181640625, -0.21720123291015625, -0.19244384765625, -0.16768646240234375, -0.1429290771484375, -0.11817169189453125, -0.093414306640625, -0.06865692138671875, -0.0438995361328125, -0.01914215087890625, 0.005615234375, 0.03037261962890625, 0.0551300048828125, 0.07988739013671875, 0.104644775390625, 0.12940216064453125, 0.1541595458984375, 0.17891693115234375, 0.20367431640625, 0.22843170166015625, 0.2531890869140625, 0.27794647216796875, 0.302703857421875, 0.32746124267578125, 0.3522186279296875, 0.37697601318359375, 0.4017333984375, 0.42649078369140625, 0.4512481689453125, 0.47600555419921875, 0.500762939453125, 0.5255203247070312, 0.5502777099609375, 0.5750350952148438, 0.59979248046875, 0.6245498657226562, 0.6493072509765625, 0.6740646362304688, 0.698822021484375, 0.7235794067382812, 0.7483367919921875, 0.7730941772460938, 0.7978515625]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 8.0, 26.0, 205.0, 619.0, 128.0, 24.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-92.25709533691406, -90.52912902832031, -88.80116271972656, -87.07319641113281, -85.34523010253906, -83.61726379394531, -81.88929748535156, -80.16133880615234, -78.4333724975586, -76.70540618896484, -74.9774398803711, -73.24947357177734, -71.5215072631836, -69.79354858398438, -68.06558227539062, -66.33761596679688, -64.60964965820312, -62.881683349609375, -61.153717041015625, -59.425750732421875, -57.69778823852539, -55.96982192993164, -54.24185562133789, -52.51388931274414, -50.785919189453125, -49.057952880859375, -47.329986572265625, -45.602020263671875, -43.87405776977539, -42.14609146118164, -40.41812515258789, -38.69015884399414, -36.96219253540039, -35.23422622680664, -33.50625991821289, -31.778295516967773, -30.050331115722656, -28.322364807128906, -26.594398498535156, -24.866432189941406, -23.13846778869629, -21.41050148010254, -19.682537078857422, -17.954570770263672, -16.226604461669922, -14.498640060424805, -12.770673751831055, -11.042708396911621, -9.314743041992188, -7.586777687072754, -5.858811855316162, -4.13084602355957, -2.4028806686401367, -0.6749153137207031, 1.0530509948730469, 2.7810163497924805, 4.508981704711914, 6.236947059631348, 7.9649128913879395, 9.692878723144531, 11.420844078063965, 13.148809432983398, 14.876775741577148, 16.604740142822266, 18.332706451416016]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 6.0, 6.0, 1.0, 10.0, 10.0, 16.0, 26.0, 25.0, 24.0, 27.0, 31.0, 36.0, 46.0, 59.0, 44.0, 47.0, 50.0, 49.0, 52.0, 45.0, 37.0, 45.0, 34.0, 36.0, 33.0, 40.0, 30.0, 29.0, 22.0, 19.0, 14.0, 8.0, 8.0, 10.0, 5.0, 6.0, 3.0, 7.0, 2.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-16.424535751342773, -15.943592071533203, -15.462648391723633, -14.981704711914062, -14.500761032104492, -14.019817352294922, -13.538874626159668, -13.057930946350098, -12.576987266540527, -12.096043586730957, -11.615099906921387, -11.134156227111816, -10.653213500976562, -10.172269821166992, -9.691326141357422, -9.210382461547852, -8.729438781738281, -8.248495101928711, -7.767551422119141, -7.2866082191467285, -6.805664539337158, -6.324720859527588, -5.843777656555176, -5.3628339767456055, -4.881890296936035, -4.400946617126465, -3.9200031757354736, -3.4390597343444824, -2.958116054534912, -2.477172374725342, -1.9962289333343506, -1.5152854919433594, -1.0343427658081055, -0.5533992052078247, -0.07245564460754395, 0.4084879159927368, 0.8894314765930176, 1.370375156402588, 1.851318597793579, 2.3322620391845703, 2.8132057189941406, 3.294149398803711, 3.775092840194702, 4.256036281585693, 4.736979961395264, 5.217923641204834, 5.698866844177246, 6.179810523986816, 6.660754203796387, 7.141697883605957, 7.622641563415527, 8.103585243225098, 8.584527969360352, 9.065471649169922, 9.546415328979492, 10.027359008789062, 10.508302688598633, 10.989246368408203, 11.470190048217773, 11.951133728027344, 12.432077407836914, 12.913021087646484, 13.393963813781738, 13.874907493591309, 14.355851173400879]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 4.0, 4.0, 4.0, 16.0, 7.0, 14.0, 8.0, 13.0, 28.0, 46.0, 53.0, 60.0, 125.0, 204.0, 312.0, 674.0, 1212.0, 3293.0, 10912.0, 70392.0, 3836395.0, 240988.0, 21253.0, 4988.0, 1650.0, 749.0, 356.0, 215.0, 95.0, 77.0, 49.0, 34.0, 13.0, 10.0, 12.0, 3.0, 7.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.19140625, -6.03546142578125, -5.8795166015625, -5.72357177734375, -5.567626953125, -5.41168212890625, -5.2557373046875, -5.09979248046875, -4.94384765625, -4.78790283203125, -4.6319580078125, -4.47601318359375, -4.320068359375, -4.16412353515625, -4.0081787109375, -3.85223388671875, -3.6962890625, -3.54034423828125, -3.3843994140625, -3.22845458984375, -3.072509765625, -2.91656494140625, -2.7606201171875, -2.60467529296875, -2.44873046875, -2.29278564453125, -2.1368408203125, -1.98089599609375, -1.824951171875, -1.66900634765625, -1.5130615234375, -1.35711669921875, -1.201171875, -1.04522705078125, -0.8892822265625, -0.73333740234375, -0.577392578125, -0.42144775390625, -0.2655029296875, -0.10955810546875, 0.04638671875, 0.20233154296875, 0.3582763671875, 0.51422119140625, 0.670166015625, 0.82611083984375, 0.9820556640625, 1.13800048828125, 1.2939453125, 1.44989013671875, 1.6058349609375, 1.76177978515625, 1.917724609375, 2.07366943359375, 2.2296142578125, 2.38555908203125, 2.54150390625, 2.69744873046875, 2.8533935546875, 3.00933837890625, 3.165283203125, 3.32122802734375, 3.4771728515625, 3.63311767578125, 3.7890625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 7.0, 4.0, 5.0, 6.0, 10.0, 26.0, 32.0, 39.0, 43.0, 52.0, 89.0, 80.0, 85.0, 81.0, 82.0, 88.0, 65.0, 60.0, 43.0, 35.0, 20.0, 18.0, 12.0, 12.0, 9.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2265625, -1.1968536376953125, -1.167144775390625, -1.1374359130859375, -1.10772705078125, -1.0780181884765625, -1.048309326171875, -1.0186004638671875, -0.9888916015625, -0.9591827392578125, -0.929473876953125, -0.8997650146484375, -0.87005615234375, -0.8403472900390625, -0.810638427734375, -0.7809295654296875, -0.751220703125, -0.7215118408203125, -0.691802978515625, -0.6620941162109375, -0.63238525390625, -0.6026763916015625, -0.572967529296875, -0.5432586669921875, -0.5135498046875, -0.4838409423828125, -0.454132080078125, -0.4244232177734375, -0.39471435546875, -0.3650054931640625, -0.335296630859375, -0.3055877685546875, -0.27587890625, -0.2461700439453125, -0.216461181640625, -0.1867523193359375, -0.15704345703125, -0.1273345947265625, -0.097625732421875, -0.0679168701171875, -0.0382080078125, -0.0084991455078125, 0.021209716796875, 0.0509185791015625, 0.08062744140625, 0.1103363037109375, 0.140045166015625, 0.1697540283203125, 0.199462890625, 0.2291717529296875, 0.258880615234375, 0.2885894775390625, 0.31829833984375, 0.3480072021484375, 0.377716064453125, 0.4074249267578125, 0.4371337890625, 0.4668426513671875, 0.496551513671875, 0.5262603759765625, 0.55596923828125, 0.5856781005859375, 0.615386962890625, 0.6450958251953125, 0.6748046875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 6.0, 8.0, 14.0, 18.0, 32.0, 60.0, 107.0, 233.0, 677.0, 2732.0, 27643.0, 4137535.0, 22249.0, 2191.0, 471.0, 137.0, 74.0, 41.0, 28.0, 11.0, 8.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.546875, -12.2120361328125, -11.877197265625, -11.5423583984375, -11.20751953125, -10.8726806640625, -10.537841796875, -10.2030029296875, -9.8681640625, -9.5333251953125, -9.198486328125, -8.8636474609375, -8.52880859375, -8.1939697265625, -7.859130859375, -7.5242919921875, -7.189453125, -6.8546142578125, -6.519775390625, -6.1849365234375, -5.85009765625, -5.5152587890625, -5.180419921875, -4.8455810546875, -4.5107421875, -4.1759033203125, -3.841064453125, -3.5062255859375, -3.17138671875, -2.8365478515625, -2.501708984375, -2.1668701171875, -1.83203125, -1.4971923828125, -1.162353515625, -0.8275146484375, -0.49267578125, -0.1578369140625, 0.177001953125, 0.5118408203125, 0.8466796875, 1.1815185546875, 1.516357421875, 1.8511962890625, 2.18603515625, 2.5208740234375, 2.855712890625, 3.1905517578125, 3.525390625, 3.8602294921875, 4.195068359375, 4.5299072265625, 4.86474609375, 5.1995849609375, 5.534423828125, 5.8692626953125, 6.2041015625, 6.5389404296875, 6.873779296875, 7.2086181640625, 7.54345703125, 7.8782958984375, 8.213134765625, 8.5479736328125, 8.8828125]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 5.0, 0.0, 6.0, 2.0, 6.0, 5.0, 8.0, 16.0, 39.0, 93.0, 303.0, 2730.0, 614.0, 149.0, 53.0, 14.0, 14.0, 9.0, 7.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.15625, -4.0652618408203125, -3.974273681640625, -3.8832855224609375, -3.79229736328125, -3.7013092041015625, -3.610321044921875, -3.5193328857421875, -3.4283447265625, -3.3373565673828125, -3.246368408203125, -3.1553802490234375, -3.06439208984375, -2.9734039306640625, -2.882415771484375, -2.7914276123046875, -2.700439453125, -2.6094512939453125, -2.518463134765625, -2.4274749755859375, -2.33648681640625, -2.2454986572265625, -2.154510498046875, -2.0635223388671875, -1.9725341796875, -1.8815460205078125, -1.790557861328125, -1.6995697021484375, -1.60858154296875, -1.5175933837890625, -1.426605224609375, -1.3356170654296875, -1.24462890625, -1.1536407470703125, -1.062652587890625, -0.9716644287109375, -0.88067626953125, -0.7896881103515625, -0.698699951171875, -0.6077117919921875, -0.5167236328125, -0.4257354736328125, -0.334747314453125, -0.2437591552734375, -0.15277099609375, -0.0617828369140625, 0.029205322265625, 0.1201934814453125, 0.211181640625, 0.3021697998046875, 0.393157958984375, 0.4841461181640625, 0.57513427734375, 0.6661224365234375, 0.757110595703125, 0.8480987548828125, 0.9390869140625, 1.0300750732421875, 1.121063232421875, 1.2120513916015625, 1.30303955078125, 1.3940277099609375, 1.485015869140625, 1.5760040283203125, 1.6669921875]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 2.0, 1.0, 8.0, 64.0, 327.0, 476.0, 99.0, 24.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.391414642333984, -30.68013572692871, -29.968856811523438, -29.257577896118164, -28.54629898071289, -27.835020065307617, -27.123741149902344, -26.41246223449707, -25.701183319091797, -24.989904403686523, -24.27862548828125, -23.567346572875977, -22.856067657470703, -22.14478874206543, -21.433509826660156, -20.722230911254883, -20.01095199584961, -19.299673080444336, -18.588394165039062, -17.87711524963379, -17.165836334228516, -16.454557418823242, -15.743278503417969, -15.031999588012695, -14.320720672607422, -13.609441757202148, -12.898162841796875, -12.186883926391602, -11.475605010986328, -10.764326095581055, -10.053047180175781, -9.341768264770508, -8.63049030303955, -7.919211387634277, -7.207932472229004, -6.4966535568237305, -5.785374641418457, -5.074095726013184, -4.36281681060791, -3.6515378952026367, -2.9402589797973633, -2.22898006439209, -1.5177011489868164, -0.806422233581543, -0.09514331817626953, 0.6161355972290039, 1.3274145126342773, 2.038693428039551, 2.749972343444824, 3.4612512588500977, 4.172530174255371, 4.8838090896606445, 5.595088005065918, 6.306366920471191, 7.017645835876465, 7.728924751281738, 8.440203666687012, 9.151482582092285, 9.862761497497559, 10.574040412902832, 11.285319328308105, 11.996598243713379, 12.707877159118652, 13.419156074523926, 14.1304349899292]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 5.0, 8.0, 23.0, 28.0, 62.0, 80.0, 116.0, 109.0, 112.0, 130.0, 112.0, 80.0, 52.0, 29.0, 30.0, 10.0, 7.0, 8.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.645078659057617, -11.257071495056152, -10.869063377380371, -10.481056213378906, -10.093048095703125, -9.70504093170166, -9.317033767700195, -8.929025650024414, -8.541017532348633, -8.153010368347168, -7.765002250671387, -7.376995086669922, -6.988986968994141, -6.600979804992676, -6.212972164154053, -5.82496452331543, -5.436957359313965, -5.048949718475342, -4.660942077636719, -4.272934913635254, -3.8849270343780518, -3.4969193935394287, -3.1089119911193848, -2.7209043502807617, -2.3328967094421387, -1.9448890686035156, -1.5568815469741821, -1.1688740253448486, -0.7808663845062256, -0.39285874366760254, -0.004851341247558594, 0.38315629959106445, 0.7711639404296875, 1.1591715812683105, 1.547179102897644, 1.9351866245269775, 2.3231942653656006, 2.7112019062042236, 3.0992093086242676, 3.4872169494628906, 3.8752245903015137, 4.263232231140137, 4.65123987197876, 5.039247512817383, 5.427254676818848, 5.815262794494629, 6.203269958496094, 6.591277599334717, 6.97928524017334, 7.367292881011963, 7.755300521850586, 8.14330768585205, 8.531315803527832, 8.919322967529297, 9.307331085205078, 9.695338249206543, 10.083345413208008, 10.471352577209473, 10.859360694885254, 11.247367858886719, 11.6353759765625, 12.023383140563965, 12.41139030456543, 12.799398422241211, 13.187406539916992]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 9.0, 7.0, 15.0, 20.0, 17.0, 20.0, 46.0, 59.0, 91.0, 130.0, 222.0, 383.0, 744.0, 1806.0, 6341.0, 42496.0, 434925.0, 500157.0, 50044.0, 7157.0, 2010.0, 789.0, 391.0, 239.0, 138.0, 108.0, 55.0, 38.0, 26.0, 16.0, 21.0, 6.0, 8.0, 9.0, 5.0, 3.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0], "bins": [-4.35546875, -4.243896484375, -4.13232421875, -4.020751953125, -3.9091796875, -3.797607421875, -3.68603515625, -3.574462890625, -3.462890625, -3.351318359375, -3.23974609375, -3.128173828125, -3.0166015625, -2.905029296875, -2.79345703125, -2.681884765625, -2.5703125, -2.458740234375, -2.34716796875, -2.235595703125, -2.1240234375, -2.012451171875, -1.90087890625, -1.789306640625, -1.677734375, -1.566162109375, -1.45458984375, -1.343017578125, -1.2314453125, -1.119873046875, -1.00830078125, -0.896728515625, -0.78515625, -0.673583984375, -0.56201171875, -0.450439453125, -0.3388671875, -0.227294921875, -0.11572265625, -0.004150390625, 0.107421875, 0.218994140625, 0.33056640625, 0.442138671875, 0.5537109375, 0.665283203125, 0.77685546875, 0.888427734375, 1.0, 1.111572265625, 1.22314453125, 1.334716796875, 1.4462890625, 1.557861328125, 1.66943359375, 1.781005859375, 1.892578125, 2.004150390625, 2.11572265625, 2.227294921875, 2.3388671875, 2.450439453125, 2.56201171875, 2.673583984375, 2.78515625]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 9.0, 14.0, 26.0, 54.0, 94.0, 94.0, 134.0, 133.0, 127.0, 117.0, 81.0, 44.0, 34.0, 17.0, 11.0, 8.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.91015625, -1.8662567138671875, -1.822357177734375, -1.7784576416015625, -1.73455810546875, -1.6906585693359375, -1.646759033203125, -1.6028594970703125, -1.5589599609375, -1.5150604248046875, -1.471160888671875, -1.4272613525390625, -1.38336181640625, -1.3394622802734375, -1.295562744140625, -1.2516632080078125, -1.207763671875, -1.1638641357421875, -1.119964599609375, -1.0760650634765625, -1.03216552734375, -0.9882659912109375, -0.944366455078125, -0.9004669189453125, -0.8565673828125, -0.8126678466796875, -0.768768310546875, -0.7248687744140625, -0.68096923828125, -0.6370697021484375, -0.593170166015625, -0.5492706298828125, -0.50537109375, -0.4614715576171875, -0.417572021484375, -0.3736724853515625, -0.32977294921875, -0.2858734130859375, -0.241973876953125, -0.1980743408203125, -0.1541748046875, -0.1102752685546875, -0.066375732421875, -0.0224761962890625, 0.02142333984375, 0.0653228759765625, 0.109222412109375, 0.1531219482421875, 0.197021484375, 0.2409210205078125, 0.284820556640625, 0.3287200927734375, 0.37261962890625, 0.4165191650390625, 0.460418701171875, 0.5043182373046875, 0.5482177734375, 0.5921173095703125, 0.636016845703125, 0.6799163818359375, 0.72381591796875, 0.7677154541015625, 0.811614990234375, 0.8555145263671875, 0.8994140625]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 9.0, 5.0, 7.0, 13.0, 25.0, 20.0, 43.0, 58.0, 107.0, 129.0, 190.0, 320.0, 602.0, 1357.0, 3897.0, 13943.0, 59027.0, 274863.0, 507997.0, 142187.0, 31472.0, 7882.0, 2353.0, 881.0, 456.0, 229.0, 133.0, 99.0, 78.0, 60.0, 34.0, 32.0, 18.0, 10.0, 8.0, 8.0, 6.0, 1.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9228515625, -1.8667755126953125, -1.810699462890625, -1.7546234130859375, -1.69854736328125, -1.6424713134765625, -1.586395263671875, -1.5303192138671875, -1.4742431640625, -1.4181671142578125, -1.362091064453125, -1.3060150146484375, -1.24993896484375, -1.1938629150390625, -1.137786865234375, -1.0817108154296875, -1.025634765625, -0.9695587158203125, -0.913482666015625, -0.8574066162109375, -0.80133056640625, -0.7452545166015625, -0.689178466796875, -0.6331024169921875, -0.5770263671875, -0.5209503173828125, -0.464874267578125, -0.4087982177734375, -0.35272216796875, -0.2966461181640625, -0.240570068359375, -0.1844940185546875, -0.12841796875, -0.0723419189453125, -0.016265869140625, 0.0398101806640625, 0.09588623046875, 0.1519622802734375, 0.208038330078125, 0.2641143798828125, 0.3201904296875, 0.3762664794921875, 0.432342529296875, 0.4884185791015625, 0.54449462890625, 0.6005706787109375, 0.656646728515625, 0.7127227783203125, 0.768798828125, 0.8248748779296875, 0.880950927734375, 0.9370269775390625, 0.99310302734375, 1.0491790771484375, 1.105255126953125, 1.1613311767578125, 1.2174072265625, 1.2734832763671875, 1.329559326171875, 1.3856353759765625, 1.44171142578125, 1.4977874755859375, 1.553863525390625, 1.6099395751953125, 1.666015625]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 15.0, 4.0, 8.0, 12.0, 17.0, 20.0, 27.0, 27.0, 30.0, 47.0, 46.0, 38.0, 66.0, 67.0, 53.0, 64.0, 68.0, 46.0, 60.0, 54.0, 43.0, 30.0, 44.0, 28.0, 21.0, 21.0, 13.0, 12.0, 12.0, 5.0, 2.0, 4.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.16796875, -2.077850341796875, -1.98773193359375, -1.897613525390625, -1.8074951171875, -1.717376708984375, -1.62725830078125, -1.537139892578125, -1.447021484375, -1.356903076171875, -1.26678466796875, -1.176666259765625, -1.0865478515625, -0.996429443359375, -0.90631103515625, -0.816192626953125, -0.72607421875, -0.635955810546875, -0.54583740234375, -0.455718994140625, -0.3656005859375, -0.275482177734375, -0.18536376953125, -0.095245361328125, -0.005126953125, 0.084991455078125, 0.17510986328125, 0.265228271484375, 0.3553466796875, 0.445465087890625, 0.53558349609375, 0.625701904296875, 0.7158203125, 0.805938720703125, 0.89605712890625, 0.986175537109375, 1.0762939453125, 1.166412353515625, 1.25653076171875, 1.346649169921875, 1.436767578125, 1.526885986328125, 1.61700439453125, 1.707122802734375, 1.7972412109375, 1.887359619140625, 1.97747802734375, 2.067596435546875, 2.15771484375, 2.247833251953125, 2.33795166015625, 2.428070068359375, 2.5181884765625, 2.608306884765625, 2.69842529296875, 2.788543701171875, 2.878662109375, 2.968780517578125, 3.05889892578125, 3.149017333984375, 3.2391357421875, 3.329254150390625, 3.41937255859375, 3.509490966796875, 3.599609375]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 5.0, 8.0, 14.0, 17.0, 34.0, 38.0, 60.0, 133.0, 225.0, 407.0, 947.0, 2389.0, 7407.0, 29683.0, 187791.0, 590449.0, 187432.0, 29974.0, 7220.0, 2414.0, 987.0, 419.0, 228.0, 116.0, 59.0, 41.0, 23.0, 16.0, 12.0, 1.0, 4.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6533203125, -0.6295623779296875, -0.605804443359375, -0.5820465087890625, -0.55828857421875, -0.5345306396484375, -0.510772705078125, -0.4870147705078125, -0.4632568359375, -0.4394989013671875, -0.415740966796875, -0.3919830322265625, -0.36822509765625, -0.3444671630859375, -0.320709228515625, -0.2969512939453125, -0.273193359375, -0.2494354248046875, -0.225677490234375, -0.2019195556640625, -0.17816162109375, -0.1544036865234375, -0.130645751953125, -0.1068878173828125, -0.0831298828125, -0.0593719482421875, -0.035614013671875, -0.0118560791015625, 0.01190185546875, 0.0356597900390625, 0.059417724609375, 0.0831756591796875, 0.10693359375, 0.1306915283203125, 0.154449462890625, 0.1782073974609375, 0.20196533203125, 0.2257232666015625, 0.249481201171875, 0.2732391357421875, 0.2969970703125, 0.3207550048828125, 0.344512939453125, 0.3682708740234375, 0.39202880859375, 0.4157867431640625, 0.439544677734375, 0.4633026123046875, 0.487060546875, 0.5108184814453125, 0.534576416015625, 0.5583343505859375, 0.58209228515625, 0.6058502197265625, 0.629608154296875, 0.6533660888671875, 0.6771240234375, 0.7008819580078125, 0.724639892578125, 0.7483978271484375, 0.77215576171875, 0.7959136962890625, 0.819671630859375, 0.8434295654296875, 0.8671875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 2.0, 2.0, 6.0, 5.0, 3.0, 6.0, 12.0, 14.0, 27.0, 31.0, 46.0, 63.0, 75.0, 99.0, 110.0, 104.0, 73.0, 82.0, 70.0, 43.0, 32.0, 25.0, 19.0, 13.0, 7.0, 13.0, 4.0, 4.0, 5.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011485815048217773, -0.00011061038821935654, -0.00010636262595653534, -0.00010211486369371414, -9.786710143089294e-05, -9.361933916807175e-05, -8.937157690525055e-05, -8.512381464242935e-05, -8.087605237960815e-05, -7.662829011678696e-05, -7.238052785396576e-05, -6.813276559114456e-05, -6.388500332832336e-05, -5.963724106550217e-05, -5.538947880268097e-05, -5.114171653985977e-05, -4.6893954277038574e-05, -4.264619201421738e-05, -3.839842975139618e-05, -3.415066748857498e-05, -2.9902905225753784e-05, -2.5655142962932587e-05, -2.140738070011139e-05, -1.715961843729019e-05, -1.2911856174468994e-05, -8.664093911647797e-06, -4.416331648826599e-06, -1.685693860054016e-07, 4.079192876815796e-06, 8.326955139636993e-06, 1.2574717402458191e-05, 1.682247966527939e-05, 2.1070241928100586e-05, 2.5318004190921783e-05, 2.956576645374298e-05, 3.381352871656418e-05, 3.8061290979385376e-05, 4.2309053242206573e-05, 4.655681550502777e-05, 5.080457776784897e-05, 5.5052340030670166e-05, 5.9300102293491364e-05, 6.354786455631256e-05, 6.779562681913376e-05, 7.204338908195496e-05, 7.629115134477615e-05, 8.053891360759735e-05, 8.478667587041855e-05, 8.903443813323975e-05, 9.328220039606094e-05, 9.752996265888214e-05, 0.00010177772492170334, 0.00010602548718452454, 0.00011027324944734573, 0.00011452101171016693, 0.00011876877397298813, 0.00012301653623580933, 0.00012726429849863052, 0.00013151206076145172, 0.00013575982302427292, 0.00014000758528709412, 0.00014425534754991531, 0.0001485031098127365, 0.0001527508720755577, 0.0001569986343383789]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 7.0, 5.0, 3.0, 5.0, 17.0, 17.0, 38.0, 46.0, 114.0, 208.0, 515.0, 1702.0, 7608.0, 47275.0, 403242.0, 508845.0, 65697.0, 9887.0, 2184.0, 628.0, 237.0, 112.0, 77.0, 31.0, 25.0, 11.0, 11.0, 3.0, 2.0, 7.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.99658203125, -0.968170166015625, -0.93975830078125, -0.911346435546875, -0.8829345703125, -0.854522705078125, -0.82611083984375, -0.797698974609375, -0.769287109375, -0.740875244140625, -0.71246337890625, -0.684051513671875, -0.6556396484375, -0.627227783203125, -0.59881591796875, -0.570404052734375, -0.5419921875, -0.513580322265625, -0.48516845703125, -0.456756591796875, -0.4283447265625, -0.399932861328125, -0.37152099609375, -0.343109130859375, -0.314697265625, -0.286285400390625, -0.25787353515625, -0.229461669921875, -0.2010498046875, -0.172637939453125, -0.14422607421875, -0.115814208984375, -0.08740234375, -0.058990478515625, -0.03057861328125, -0.002166748046875, 0.0262451171875, 0.054656982421875, 0.08306884765625, 0.111480712890625, 0.139892578125, 0.168304443359375, 0.19671630859375, 0.225128173828125, 0.2535400390625, 0.281951904296875, 0.31036376953125, 0.338775634765625, 0.3671875, 0.395599365234375, 0.42401123046875, 0.452423095703125, 0.4808349609375, 0.509246826171875, 0.53765869140625, 0.566070556640625, 0.594482421875, 0.622894287109375, 0.65130615234375, 0.679718017578125, 0.7081298828125, 0.736541748046875, 0.76495361328125, 0.793365478515625, 0.82177734375]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 3.0, 5.0, 6.0, 9.0, 10.0, 16.0, 19.0, 40.0, 47.0, 61.0, 83.0, 71.0, 84.0, 111.0, 96.0, 90.0, 58.0, 56.0, 40.0, 30.0, 24.0, 15.0, 11.0, 8.0, 3.0, 4.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.83203125, -0.8114700317382812, -0.7909088134765625, -0.7703475952148438, -0.749786376953125, -0.7292251586914062, -0.7086639404296875, -0.6881027221679688, -0.66754150390625, -0.6469802856445312, -0.6264190673828125, -0.6058578491210938, -0.585296630859375, -0.5647354125976562, -0.5441741943359375, -0.5236129760742188, -0.5030517578125, -0.48249053955078125, -0.4619293212890625, -0.44136810302734375, -0.420806884765625, -0.40024566650390625, -0.3796844482421875, -0.35912322998046875, -0.33856201171875, -0.31800079345703125, -0.2974395751953125, -0.27687835693359375, -0.256317138671875, -0.23575592041015625, -0.2151947021484375, -0.19463348388671875, -0.174072265625, -0.15351104736328125, -0.1329498291015625, -0.11238861083984375, -0.091827392578125, -0.07126617431640625, -0.0507049560546875, -0.03014373779296875, -0.00958251953125, 0.01097869873046875, 0.0315399169921875, 0.05210113525390625, 0.072662353515625, 0.09322357177734375, 0.1137847900390625, 0.13434600830078125, 0.1549072265625, 0.17546844482421875, 0.1960296630859375, 0.21659088134765625, 0.237152099609375, 0.25771331787109375, 0.2782745361328125, 0.29883575439453125, 0.31939697265625, 0.33995819091796875, 0.3605194091796875, 0.38108062744140625, 0.401641845703125, 0.42220306396484375, 0.4427642822265625, 0.46332550048828125, 0.48388671875]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 23.0, 86.0, 280.0, 413.0, 152.0, 39.0, 12.0, 7.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.636882781982422, -19.616531372070312, -18.596179962158203, -17.57582664489746, -16.55547523498535, -15.535123825073242, -14.514771461486816, -13.49441909790039, -12.474067687988281, -11.453716278076172, -10.433363914489746, -9.41301155090332, -8.392660140991211, -7.372308254241943, -6.351956367492676, -5.331604480743408, -4.311252593994141, -3.290900707244873, -2.2705488204956055, -1.250196933746338, -0.2298450469970703, 0.7905068397521973, 1.8108587265014648, 2.8312106132507324, 3.8515625, 4.871914386749268, 5.892266273498535, 6.912618160247803, 7.93297004699707, 8.95332145690918, 9.973673820495605, 10.994026184082031, 12.014381408691406, 13.034732818603516, 14.055085182189941, 15.075437545776367, 16.095788955688477, 17.116140365600586, 18.136493682861328, 19.156845092773438, 20.177196502685547, 21.197547912597656, 22.217899322509766, 23.238252639770508, 24.258604049682617, 25.278955459594727, 26.29930877685547, 27.319660186767578, 28.340011596679688, 29.360363006591797, 30.380714416503906, 31.40106773376465, 32.421417236328125, 33.4417724609375, 34.46212387084961, 35.48247528076172, 36.50282669067383, 37.52317810058594, 38.54352951049805, 39.563880920410156, 40.58423614501953, 41.60458755493164, 42.62493896484375, 43.64529037475586, 44.66564178466797]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 5.0, 11.0, 7.0, 11.0, 18.0, 23.0, 20.0, 25.0, 37.0, 35.0, 41.0, 50.0, 50.0, 52.0, 56.0, 73.0, 51.0, 55.0, 59.0, 44.0, 48.0, 38.0, 34.0, 31.0, 27.0, 37.0, 17.0, 10.0, 5.0, 6.0, 7.0, 8.0, 8.0, 1.0, 3.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-19.901290893554688, -19.38831329345703, -18.875337600708008, -18.36236000061035, -17.849384307861328, -17.336406707763672, -16.82343101501465, -16.310453414916992, -15.797476768493652, -15.284500122070312, -14.771523475646973, -14.258546829223633, -13.745569229125977, -13.232593536376953, -12.719615936279297, -12.206639289855957, -11.693662643432617, -11.180685997009277, -10.667709350585938, -10.154732704162598, -9.641756057739258, -9.128778457641602, -8.615801811218262, -8.102825164794922, -7.589848518371582, -7.076871871948242, -6.563895225524902, -6.050918102264404, -5.5379414558410645, -5.024964809417725, -4.511987686157227, -3.9990110397338867, -3.486034393310547, -2.973057746887207, -2.460080862045288, -1.9471040964126587, -1.4341273307800293, -0.9211506843566895, -0.4081737995147705, 0.10480308532714844, 0.6177797317504883, 1.1307564973831177, 1.643733263015747, 2.156710147857666, 2.669686794281006, 3.1826634407043457, 3.6956403255462646, 4.208617210388184, 4.721593856811523, 5.234570503234863, 5.747547149658203, 6.260524272918701, 6.773500919342041, 7.286477565765381, 7.799454689025879, 8.312431335449219, 8.825407981872559, 9.338384628295898, 9.851361274719238, 10.364337921142578, 10.877315521240234, 11.390291213989258, 11.903268814086914, 12.416245460510254, 12.929222106933594]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 0.0, 5.0, 2.0, 6.0, 0.0, 7.0, 13.0, 6.0, 16.0, 20.0, 25.0, 28.0, 39.0, 77.0, 104.0, 209.0, 359.0, 785.0, 2275.0, 9637.0, 100682.0, 4004320.0, 65089.0, 7537.0, 1867.0, 580.0, 270.0, 129.0, 74.0, 48.0, 26.0, 14.0, 14.0, 6.0, 10.0, 0.0, 2.0, 5.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.328125, -3.2110595703125, -3.093994140625, -2.9769287109375, -2.85986328125, -2.7427978515625, -2.625732421875, -2.5086669921875, -2.3916015625, -2.2745361328125, -2.157470703125, -2.0404052734375, -1.92333984375, -1.8062744140625, -1.689208984375, -1.5721435546875, -1.455078125, -1.3380126953125, -1.220947265625, -1.1038818359375, -0.98681640625, -0.8697509765625, -0.752685546875, -0.6356201171875, -0.5185546875, -0.4014892578125, -0.284423828125, -0.1673583984375, -0.05029296875, 0.0667724609375, 0.183837890625, 0.3009033203125, 0.41796875, 0.5350341796875, 0.652099609375, 0.7691650390625, 0.88623046875, 1.0032958984375, 1.120361328125, 1.2374267578125, 1.3544921875, 1.4715576171875, 1.588623046875, 1.7056884765625, 1.82275390625, 1.9398193359375, 2.056884765625, 2.1739501953125, 2.291015625, 2.4080810546875, 2.525146484375, 2.6422119140625, 2.75927734375, 2.8763427734375, 2.993408203125, 3.1104736328125, 3.2275390625, 3.3446044921875, 3.461669921875, 3.5787353515625, 3.69580078125, 3.8128662109375, 3.929931640625, 4.0469970703125, 4.1640625]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 7.0, 4.0, 5.0, 17.0, 18.0, 34.0, 52.0, 91.0, 95.0, 87.0, 113.0, 116.0, 105.0, 89.0, 64.0, 45.0, 23.0, 20.0, 10.0, 7.0, 5.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.765625, -1.7243270874023438, -1.6830291748046875, -1.6417312622070312, -1.600433349609375, -1.5591354370117188, -1.5178375244140625, -1.4765396118164062, -1.43524169921875, -1.3939437866210938, -1.3526458740234375, -1.3113479614257812, -1.270050048828125, -1.2287521362304688, -1.1874542236328125, -1.1461563110351562, -1.1048583984375, -1.0635604858398438, -1.0222625732421875, -0.9809646606445312, -0.939666748046875, -0.8983688354492188, -0.8570709228515625, -0.8157730102539062, -0.77447509765625, -0.7331771850585938, -0.6918792724609375, -0.6505813598632812, -0.609283447265625, -0.5679855346679688, -0.5266876220703125, -0.48538970947265625, -0.444091796875, -0.40279388427734375, -0.3614959716796875, -0.32019805908203125, -0.278900146484375, -0.23760223388671875, -0.1963043212890625, -0.15500640869140625, -0.11370849609375, -0.07241058349609375, -0.0311126708984375, 0.01018524169921875, 0.051483154296875, 0.09278106689453125, 0.1340789794921875, 0.17537689208984375, 0.2166748046875, 0.25797271728515625, 0.2992706298828125, 0.34056854248046875, 0.381866455078125, 0.42316436767578125, 0.4644622802734375, 0.5057601928710938, 0.54705810546875, 0.5883560180664062, 0.6296539306640625, 0.6709518432617188, 0.712249755859375, 0.7535476684570312, 0.7948455810546875, 0.8361434936523438, 0.87744140625]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 11.0, 8.0, 14.0, 25.0, 25.0, 55.0, 159.0, 446.0, 1577.0, 7731.0, 82462.0, 4018249.0, 74430.0, 6957.0, 1413.0, 425.0, 154.0, 71.0, 31.0, 11.0, 7.0, 4.0, 5.0, 6.0, 1.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.7265625, -4.60980224609375, -4.4930419921875, -4.37628173828125, -4.259521484375, -4.14276123046875, -4.0260009765625, -3.90924072265625, -3.79248046875, -3.67572021484375, -3.5589599609375, -3.44219970703125, -3.325439453125, -3.20867919921875, -3.0919189453125, -2.97515869140625, -2.8583984375, -2.74163818359375, -2.6248779296875, -2.50811767578125, -2.391357421875, -2.27459716796875, -2.1578369140625, -2.04107666015625, -1.92431640625, -1.80755615234375, -1.6907958984375, -1.57403564453125, -1.457275390625, -1.34051513671875, -1.2237548828125, -1.10699462890625, -0.990234375, -0.87347412109375, -0.7567138671875, -0.63995361328125, -0.523193359375, -0.40643310546875, -0.2896728515625, -0.17291259765625, -0.05615234375, 0.06060791015625, 0.1773681640625, 0.29412841796875, 0.410888671875, 0.52764892578125, 0.6444091796875, 0.76116943359375, 0.8779296875, 0.99468994140625, 1.1114501953125, 1.22821044921875, 1.344970703125, 1.46173095703125, 1.5784912109375, 1.69525146484375, 1.81201171875, 1.92877197265625, 2.0455322265625, 2.16229248046875, 2.279052734375, 2.39581298828125, 2.5125732421875, 2.62933349609375, 2.74609375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 4.0, 3.0, 1.0, 1.0, 5.0, 4.0, 10.0, 10.0, 21.0, 20.0, 36.0, 62.0, 136.0, 274.0, 1508.0, 1332.0, 296.0, 130.0, 79.0, 50.0, 25.0, 17.0, 12.0, 14.0, 6.0, 8.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.76171875, -0.7354202270507812, -0.7091217041015625, -0.6828231811523438, -0.656524658203125, -0.6302261352539062, -0.6039276123046875, -0.5776290893554688, -0.55133056640625, -0.5250320434570312, -0.4987335205078125, -0.47243499755859375, -0.446136474609375, -0.41983795166015625, -0.3935394287109375, -0.36724090576171875, -0.3409423828125, -0.31464385986328125, -0.2883453369140625, -0.26204681396484375, -0.235748291015625, -0.20944976806640625, -0.1831512451171875, -0.15685272216796875, -0.13055419921875, -0.10425567626953125, -0.0779571533203125, -0.05165863037109375, -0.025360107421875, 0.00093841552734375, 0.0272369384765625, 0.05353546142578125, 0.079833984375, 0.10613250732421875, 0.1324310302734375, 0.15872955322265625, 0.185028076171875, 0.21132659912109375, 0.2376251220703125, 0.26392364501953125, 0.29022216796875, 0.31652069091796875, 0.3428192138671875, 0.36911773681640625, 0.395416259765625, 0.42171478271484375, 0.4480133056640625, 0.47431182861328125, 0.5006103515625, 0.5269088745117188, 0.5532073974609375, 0.5795059204101562, 0.605804443359375, 0.6321029663085938, 0.6584014892578125, 0.6847000122070312, 0.71099853515625, 0.7372970581054688, 0.7635955810546875, 0.7898941040039062, 0.816192626953125, 0.8424911499023438, 0.8687896728515625, 0.8950881958007812, 0.92138671875]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 23.0, 122.0, 429.0, 339.0, 72.0, 12.0, 7.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.01996374130249, -5.636176586151123, -5.252388954162598, -4.8686017990112305, -4.484814643859863, -4.101027488708496, -3.7172398567199707, -3.3334527015686035, -2.9496653079986572, -2.565877914428711, -2.1820907592773438, -1.7983033657073975, -1.4145160913467407, -1.030728816986084, -0.6469414234161377, -0.2631542682647705, 0.12063312530517578, 0.5044203996658325, 0.888207733631134, 1.2719950675964355, 1.6557823419570923, 2.039569616317749, 2.4233570098876953, 2.8071441650390625, 3.190931558609009, 3.574718952178955, 3.9585061073303223, 4.342293739318848, 4.726080894470215, 5.109868049621582, 5.493655204772949, 5.877442359924316, 6.261229515075684, 6.645016670227051, 7.028804302215576, 7.412591457366943, 7.7963786125183105, 8.180166244506836, 8.563953399658203, 8.94774055480957, 9.331527709960938, 9.715314865112305, 10.099102020263672, 10.482889175415039, 10.866677284240723, 11.25046443939209, 11.634251594543457, 12.018038749694824, 12.401826858520508, 12.785614013671875, 13.169401168823242, 13.55318832397461, 13.936976432800293, 14.32076358795166, 14.704550743103027, 15.088337898254395, 15.472125053405762, 15.855912208557129, 16.239700317382812, 16.62348747253418, 17.007274627685547, 17.391061782836914, 17.77484893798828, 18.15863609313965, 18.542423248291016]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 6.0, 5.0, 6.0, 6.0, 9.0, 16.0, 31.0, 34.0, 27.0, 48.0, 41.0, 65.0, 57.0, 78.0, 67.0, 65.0, 69.0, 81.0, 59.0, 47.0, 47.0, 35.0, 27.0, 24.0, 20.0, 19.0, 4.0, 7.0, 3.0, 3.0, 1.0, 2.0, 5.0, 0.0, 0.0, 1.0], "bins": [-5.130592346191406, -5.011147975921631, -4.891704082489014, -4.772259712219238, -4.652815818786621, -4.533371448516846, -4.41392707824707, -4.294483184814453, -4.175038814544678, -4.055594444274902, -3.936150550842285, -3.8167061805725098, -3.6972620487213135, -3.577817916870117, -3.458373785018921, -3.3389296531677246, -3.219485282897949, -3.100041151046753, -2.9805970191955566, -2.8611526489257812, -2.741708517074585, -2.6222643852233887, -2.5028202533721924, -2.383376121520996, -2.2639319896698, -2.1444878578186035, -2.0250437259674072, -1.9055994749069214, -1.7861552238464355, -1.6667110919952393, -1.547266960144043, -1.4278227090835571, -1.3083784580230713, -1.188934326171875, -1.0694900751113892, -0.9500459432601929, -0.830601692199707, -0.7111575603485107, -0.5917133688926697, -0.4722691774368286, -0.35282498598098755, -0.23338079452514648, -0.11393661797046661, 0.005507558584213257, 0.12495175004005432, 0.244395911693573, 0.36384010314941406, 0.4832842946052551, 0.6027284860610962, 0.7221726775169373, 0.8416168689727783, 0.9610610008239746, 1.0805052518844604, 1.1999493837356567, 1.3193936347961426, 1.4388377666473389, 1.5582818984985352, 1.6777260303497314, 1.7971702814102173, 1.9166144132614136, 2.0360586643218994, 2.1555027961730957, 2.274946928024292, 2.3943910598754883, 2.5138354301452637]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 8.0, 4.0, 11.0, 9.0, 25.0, 15.0, 28.0, 47.0, 79.0, 93.0, 191.0, 324.0, 600.0, 1533.0, 4626.0, 21424.0, 152296.0, 647990.0, 185257.0, 25635.0, 5225.0, 1645.0, 623.0, 367.0, 175.0, 103.0, 61.0, 39.0, 28.0, 27.0, 19.0, 14.0, 7.0, 7.0, 6.0, 7.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3984375, -2.311370849609375, -2.22430419921875, -2.137237548828125, -2.0501708984375, -1.963104248046875, -1.87603759765625, -1.788970947265625, -1.701904296875, -1.614837646484375, -1.52777099609375, -1.440704345703125, -1.3536376953125, -1.266571044921875, -1.17950439453125, -1.092437744140625, -1.00537109375, -0.918304443359375, -0.83123779296875, -0.744171142578125, -0.6571044921875, -0.570037841796875, -0.48297119140625, -0.395904541015625, -0.308837890625, -0.221771240234375, -0.13470458984375, -0.047637939453125, 0.0394287109375, 0.126495361328125, 0.21356201171875, 0.300628662109375, 0.3876953125, 0.474761962890625, 0.56182861328125, 0.648895263671875, 0.7359619140625, 0.823028564453125, 0.91009521484375, 0.997161865234375, 1.084228515625, 1.171295166015625, 1.25836181640625, 1.345428466796875, 1.4324951171875, 1.519561767578125, 1.60662841796875, 1.693695068359375, 1.78076171875, 1.867828369140625, 1.95489501953125, 2.041961669921875, 2.1290283203125, 2.216094970703125, 2.30316162109375, 2.390228271484375, 2.477294921875, 2.564361572265625, 2.65142822265625, 2.738494873046875, 2.8255615234375, 2.912628173828125, 2.99969482421875, 3.086761474609375, 3.173828125]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 3.0, 7.0, 12.0, 33.0, 46.0, 59.0, 89.0, 111.0, 114.0, 107.0, 104.0, 96.0, 86.0, 62.0, 33.0, 14.0, 13.0, 8.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7744140625, -1.7313995361328125, -1.688385009765625, -1.6453704833984375, -1.60235595703125, -1.5593414306640625, -1.516326904296875, -1.4733123779296875, -1.4302978515625, -1.3872833251953125, -1.344268798828125, -1.3012542724609375, -1.25823974609375, -1.2152252197265625, -1.172210693359375, -1.1291961669921875, -1.086181640625, -1.0431671142578125, -1.000152587890625, -0.9571380615234375, -0.91412353515625, -0.8711090087890625, -0.828094482421875, -0.7850799560546875, -0.7420654296875, -0.6990509033203125, -0.656036376953125, -0.6130218505859375, -0.57000732421875, -0.5269927978515625, -0.483978271484375, -0.4409637451171875, -0.39794921875, -0.3549346923828125, -0.311920166015625, -0.2689056396484375, -0.22589111328125, -0.1828765869140625, -0.139862060546875, -0.0968475341796875, -0.0538330078125, -0.0108184814453125, 0.032196044921875, 0.0752105712890625, 0.11822509765625, 0.1612396240234375, 0.204254150390625, 0.2472686767578125, 0.290283203125, 0.3332977294921875, 0.376312255859375, 0.4193267822265625, 0.46234130859375, 0.5053558349609375, 0.548370361328125, 0.5913848876953125, 0.6343994140625, 0.6774139404296875, 0.720428466796875, 0.7634429931640625, 0.80645751953125, 0.8494720458984375, 0.892486572265625, 0.9355010986328125, 0.978515625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 5.0, 7.0, 2.0, 6.0, 12.0, 19.0, 32.0, 23.0, 35.0, 77.0, 91.0, 151.0, 174.0, 314.0, 539.0, 1066.0, 2489.0, 6939.0, 22554.0, 81943.0, 334064.0, 434992.0, 116576.0, 30536.0, 9500.0, 3261.0, 1376.0, 657.0, 353.0, 225.0, 170.0, 118.0, 81.0, 52.0, 30.0, 21.0, 15.0, 17.0, 12.0, 10.0, 12.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.75, -1.7001800537109375, -1.650360107421875, -1.6005401611328125, -1.55072021484375, -1.5009002685546875, -1.451080322265625, -1.4012603759765625, -1.3514404296875, -1.3016204833984375, -1.251800537109375, -1.2019805908203125, -1.15216064453125, -1.1023406982421875, -1.052520751953125, -1.0027008056640625, -0.952880859375, -0.9030609130859375, -0.853240966796875, -0.8034210205078125, -0.75360107421875, -0.7037811279296875, -0.653961181640625, -0.6041412353515625, -0.5543212890625, -0.5045013427734375, -0.454681396484375, -0.4048614501953125, -0.35504150390625, -0.3052215576171875, -0.255401611328125, -0.2055816650390625, -0.15576171875, -0.1059417724609375, -0.056121826171875, -0.0063018798828125, 0.04351806640625, 0.0933380126953125, 0.143157958984375, 0.1929779052734375, 0.2427978515625, 0.2926177978515625, 0.342437744140625, 0.3922576904296875, 0.44207763671875, 0.4918975830078125, 0.541717529296875, 0.5915374755859375, 0.641357421875, 0.6911773681640625, 0.740997314453125, 0.7908172607421875, 0.84063720703125, 0.8904571533203125, 0.940277099609375, 0.9900970458984375, 1.0399169921875, 1.0897369384765625, 1.139556884765625, 1.1893768310546875, 1.23919677734375, 1.2890167236328125, 1.338836669921875, 1.3886566162109375, 1.4384765625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 1.0, 4.0, 6.0, 15.0, 4.0, 11.0, 12.0, 14.0, 14.0, 20.0, 10.0, 14.0, 23.0, 15.0, 40.0, 31.0, 54.0, 44.0, 37.0, 42.0, 37.0, 48.0, 39.0, 39.0, 50.0, 44.0, 39.0, 37.0, 36.0, 29.0, 31.0, 23.0, 22.0, 17.0, 19.0, 12.0, 15.0, 10.0, 15.0, 3.0, 7.0, 3.0, 6.0, 6.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.1640625, -2.093475341796875, -2.02288818359375, -1.952301025390625, -1.8817138671875, -1.811126708984375, -1.74053955078125, -1.669952392578125, -1.599365234375, -1.528778076171875, -1.45819091796875, -1.387603759765625, -1.3170166015625, -1.246429443359375, -1.17584228515625, -1.105255126953125, -1.03466796875, -0.964080810546875, -0.89349365234375, -0.822906494140625, -0.7523193359375, -0.681732177734375, -0.61114501953125, -0.540557861328125, -0.469970703125, -0.399383544921875, -0.32879638671875, -0.258209228515625, -0.1876220703125, -0.117034912109375, -0.04644775390625, 0.024139404296875, 0.0947265625, 0.165313720703125, 0.23590087890625, 0.306488037109375, 0.3770751953125, 0.447662353515625, 0.51824951171875, 0.588836669921875, 0.659423828125, 0.730010986328125, 0.80059814453125, 0.871185302734375, 0.9417724609375, 1.012359619140625, 1.08294677734375, 1.153533935546875, 1.22412109375, 1.294708251953125, 1.36529541015625, 1.435882568359375, 1.5064697265625, 1.577056884765625, 1.64764404296875, 1.718231201171875, 1.788818359375, 1.859405517578125, 1.92999267578125, 2.000579833984375, 2.0711669921875, 2.141754150390625, 2.21234130859375, 2.282928466796875, 2.353515625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 5.0, 2.0, 7.0, 9.0, 9.0, 26.0, 38.0, 61.0, 87.0, 175.0, 371.0, 775.0, 2197.0, 8097.0, 52628.0, 435551.0, 475769.0, 59971.0, 8769.0, 2337.0, 809.0, 390.0, 201.0, 103.0, 59.0, 37.0, 28.0, 17.0, 12.0, 5.0, 5.0, 2.0, 0.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6904296875, -0.664886474609375, -0.63934326171875, -0.613800048828125, -0.5882568359375, -0.562713623046875, -0.53717041015625, -0.511627197265625, -0.486083984375, -0.460540771484375, -0.43499755859375, -0.409454345703125, -0.3839111328125, -0.358367919921875, -0.33282470703125, -0.307281494140625, -0.28173828125, -0.256195068359375, -0.23065185546875, -0.205108642578125, -0.1795654296875, -0.154022216796875, -0.12847900390625, -0.102935791015625, -0.077392578125, -0.051849365234375, -0.02630615234375, -0.000762939453125, 0.0247802734375, 0.050323486328125, 0.07586669921875, 0.101409912109375, 0.126953125, 0.152496337890625, 0.17803955078125, 0.203582763671875, 0.2291259765625, 0.254669189453125, 0.28021240234375, 0.305755615234375, 0.331298828125, 0.356842041015625, 0.38238525390625, 0.407928466796875, 0.4334716796875, 0.459014892578125, 0.48455810546875, 0.510101318359375, 0.53564453125, 0.561187744140625, 0.58673095703125, 0.612274169921875, 0.6378173828125, 0.663360595703125, 0.68890380859375, 0.714447021484375, 0.739990234375, 0.765533447265625, 0.79107666015625, 0.816619873046875, 0.8421630859375, 0.867706298828125, 0.89324951171875, 0.918792724609375, 0.9443359375]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 5.0, 3.0, 5.0, 7.0, 8.0, 12.0, 12.0, 17.0, 21.0, 43.0, 49.0, 72.0, 73.0, 99.0, 86.0, 106.0, 72.0, 87.0, 55.0, 36.0, 35.0, 32.0, 13.0, 16.0, 9.0, 9.0, 5.0, 3.0, 4.0, 2.0, 4.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014591217041015625, -0.00014194101095199585, -0.00013796985149383545, -0.00013399869203567505, -0.00013002753257751465, -0.00012605637311935425, -0.00012208521366119385, -0.00011811405420303345, -0.00011414289474487305, -0.00011017173528671265, -0.00010620057582855225, -0.00010222941637039185, -9.825825691223145e-05, -9.428709745407104e-05, -9.031593799591064e-05, -8.634477853775024e-05, -8.237361907958984e-05, -7.840245962142944e-05, -7.443130016326904e-05, -7.046014070510864e-05, -6.648898124694824e-05, -6.251782178878784e-05, -5.854666233062744e-05, -5.457550287246704e-05, -5.060434341430664e-05, -4.663318395614624e-05, -4.266202449798584e-05, -3.869086503982544e-05, -3.471970558166504e-05, -3.074854612350464e-05, -2.6777386665344238e-05, -2.2806227207183838e-05, -1.8835067749023438e-05, -1.4863908290863037e-05, -1.0892748832702637e-05, -6.921589374542236e-06, -2.950429916381836e-06, 1.0207295417785645e-06, 4.991888999938965e-06, 8.963048458099365e-06, 1.2934207916259766e-05, 1.6905367374420166e-05, 2.0876526832580566e-05, 2.4847686290740967e-05, 2.8818845748901367e-05, 3.279000520706177e-05, 3.676116466522217e-05, 4.073232412338257e-05, 4.470348358154297e-05, 4.867464303970337e-05, 5.264580249786377e-05, 5.661696195602417e-05, 6.058812141418457e-05, 6.455928087234497e-05, 6.853044033050537e-05, 7.250159978866577e-05, 7.647275924682617e-05, 8.044391870498657e-05, 8.441507816314697e-05, 8.838623762130737e-05, 9.235739707946777e-05, 9.632855653762817e-05, 0.00010029971599578857, 0.00010427087545394897, 0.00010824203491210938]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 0.0, 4.0, 2.0, 5.0, 7.0, 9.0, 11.0, 6.0, 19.0, 16.0, 23.0, 58.0, 77.0, 147.0, 254.0, 530.0, 1372.0, 3986.0, 14725.0, 67266.0, 330356.0, 472593.0, 123111.0, 24297.0, 6198.0, 1923.0, 786.0, 345.0, 155.0, 91.0, 41.0, 47.0, 25.0, 18.0, 13.0, 13.0, 9.0, 6.0, 4.0, 6.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.7080078125, -0.6878814697265625, -0.667755126953125, -0.6476287841796875, -0.62750244140625, -0.6073760986328125, -0.587249755859375, -0.5671234130859375, -0.5469970703125, -0.5268707275390625, -0.506744384765625, -0.4866180419921875, -0.46649169921875, -0.4463653564453125, -0.426239013671875, -0.4061126708984375, -0.385986328125, -0.3658599853515625, -0.345733642578125, -0.3256072998046875, -0.30548095703125, -0.2853546142578125, -0.265228271484375, -0.2451019287109375, -0.2249755859375, -0.2048492431640625, -0.184722900390625, -0.1645965576171875, -0.14447021484375, -0.1243438720703125, -0.104217529296875, -0.0840911865234375, -0.06396484375, -0.0438385009765625, -0.023712158203125, -0.0035858154296875, 0.01654052734375, 0.0366668701171875, 0.056793212890625, 0.0769195556640625, 0.0970458984375, 0.1171722412109375, 0.137298583984375, 0.1574249267578125, 0.17755126953125, 0.1976776123046875, 0.217803955078125, 0.2379302978515625, 0.258056640625, 0.2781829833984375, 0.298309326171875, 0.3184356689453125, 0.33856201171875, 0.3586883544921875, 0.378814697265625, 0.3989410400390625, 0.4190673828125, 0.4391937255859375, 0.459320068359375, 0.4794464111328125, 0.49957275390625, 0.5196990966796875, 0.539825439453125, 0.5599517822265625, 0.580078125]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 3.0, 2.0, 1.0, 9.0, 8.0, 8.0, 9.0, 10.0, 21.0, 26.0, 35.0, 36.0, 33.0, 65.0, 64.0, 82.0, 80.0, 74.0, 69.0, 62.0, 50.0, 58.0, 41.0, 31.0, 33.0, 23.0, 19.0, 16.0, 8.0, 5.0, 7.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.47265625, -0.45670318603515625, -0.4407501220703125, -0.42479705810546875, -0.408843994140625, -0.39289093017578125, -0.3769378662109375, -0.36098480224609375, -0.34503173828125, -0.32907867431640625, -0.3131256103515625, -0.29717254638671875, -0.281219482421875, -0.26526641845703125, -0.2493133544921875, -0.23336029052734375, -0.2174072265625, -0.20145416259765625, -0.1855010986328125, -0.16954803466796875, -0.153594970703125, -0.13764190673828125, -0.1216888427734375, -0.10573577880859375, -0.08978271484375, -0.07382965087890625, -0.0578765869140625, -0.04192352294921875, -0.025970458984375, -0.01001739501953125, 0.0059356689453125, 0.02188873291015625, 0.037841796875, 0.05379486083984375, 0.0697479248046875, 0.08570098876953125, 0.101654052734375, 0.11760711669921875, 0.1335601806640625, 0.14951324462890625, 0.16546630859375, 0.18141937255859375, 0.1973724365234375, 0.21332550048828125, 0.229278564453125, 0.24523162841796875, 0.2611846923828125, 0.27713775634765625, 0.2930908203125, 0.30904388427734375, 0.3249969482421875, 0.34095001220703125, 0.356903076171875, 0.37285614013671875, 0.3888092041015625, 0.40476226806640625, 0.42071533203125, 0.43666839599609375, 0.4526214599609375, 0.46857452392578125, 0.484527587890625, 0.5004806518554688, 0.5164337158203125, 0.5323867797851562, 0.54833984375]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 57.0, 517.0, 376.0, 50.0, 7.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-90.84335327148438, -89.02262878417969, -87.20189666748047, -85.38116455078125, -83.56044006347656, -81.73971557617188, -79.91898345947266, -78.09825134277344, -76.27752685546875, -74.45680236816406, -72.63607025146484, -70.81533813476562, -68.99461364746094, -67.17388916015625, -65.35315704345703, -63.53242874145508, -61.711700439453125, -59.89097213745117, -58.07024383544922, -56.249515533447266, -54.42878723144531, -52.60805892944336, -50.787330627441406, -48.96660232543945, -47.1458740234375, -45.32514572143555, -43.504417419433594, -41.68368911743164, -39.86296081542969, -38.042232513427734, -36.22150421142578, -34.40077590942383, -32.580047607421875, -30.759319305419922, -28.93859100341797, -27.117862701416016, -25.297134399414062, -23.47640609741211, -21.655677795410156, -19.834949493408203, -18.01422119140625, -16.193492889404297, -14.372764587402344, -12.55203628540039, -10.731307983398438, -8.910579681396484, -7.089851379394531, -5.269123077392578, -3.448394775390625, -1.6276664733886719, 0.19306182861328125, 2.0137901306152344, 3.8345184326171875, 5.655246734619141, 7.475975036621094, 9.296703338623047, 11.117431640625, 12.938159942626953, 14.758888244628906, 16.57961654663086, 18.400344848632812, 20.221073150634766, 22.04180145263672, 23.862529754638672, 25.683258056640625]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 4.0, 7.0, 5.0, 9.0, 18.0, 15.0, 16.0, 14.0, 19.0, 25.0, 29.0, 41.0, 34.0, 36.0, 40.0, 46.0, 42.0, 49.0, 66.0, 46.0, 47.0, 47.0, 32.0, 41.0, 39.0, 27.0, 29.0, 34.0, 19.0, 26.0, 25.0, 13.0, 19.0, 11.0, 7.0, 10.0, 6.0, 5.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.620672225952148, -11.15491771697998, -10.689164161682129, -10.223409652709961, -9.75765609741211, -9.291901588439941, -8.82614803314209, -8.360393524169922, -7.89463996887207, -7.4288859367370605, -6.963131904602051, -6.497377872467041, -6.031623840332031, -5.565869331359863, -5.100115776062012, -4.634361267089844, -4.168607234954834, -3.702853202819824, -3.2370991706848145, -2.7713451385498047, -2.305591106414795, -1.839836835861206, -1.3740828037261963, -0.9083287715911865, -0.44257473945617676, 0.023179322481155396, 0.48893338441848755, 0.9546874761581421, 1.4204415082931519, 1.8861956596374512, 2.351949691772461, 2.8177037239074707, 3.2834577560424805, 3.7492117881774902, 4.2149658203125, 4.68071985244751, 5.1464738845825195, 5.6122283935546875, 6.077981948852539, 6.543736457824707, 7.009490013122559, 7.475244045257568, 7.940998077392578, 8.406752586364746, 8.872506141662598, 9.338260650634766, 9.804014205932617, 10.269768714904785, 10.735523223876953, 11.201277732849121, 11.667031288146973, 12.13278579711914, 12.598539352416992, 13.06429386138916, 13.530047416687012, 13.99580192565918, 14.461555480957031, 14.9273099899292, 15.39306354522705, 15.858818054199219, 16.32457160949707, 16.790325164794922, 17.256080627441406, 17.721834182739258, 18.18758773803711]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 1.0, 3.0, 5.0, 6.0, 8.0, 13.0, 22.0, 16.0, 24.0, 34.0, 64.0, 101.0, 178.0, 301.0, 522.0, 1142.0, 2905.0, 10190.0, 64388.0, 3906256.0, 182890.0, 18038.0, 4243.0, 1452.0, 661.0, 361.0, 183.0, 131.0, 53.0, 34.0, 21.0, 18.0, 11.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0], "bins": [-4.35546875, -4.248382568359375, -4.14129638671875, -4.034210205078125, -3.9271240234375, -3.820037841796875, -3.71295166015625, -3.605865478515625, -3.498779296875, -3.391693115234375, -3.28460693359375, -3.177520751953125, -3.0704345703125, -2.963348388671875, -2.85626220703125, -2.749176025390625, -2.64208984375, -2.535003662109375, -2.42791748046875, -2.320831298828125, -2.2137451171875, -2.106658935546875, -1.99957275390625, -1.892486572265625, -1.785400390625, -1.678314208984375, -1.57122802734375, -1.464141845703125, -1.3570556640625, -1.249969482421875, -1.14288330078125, -1.035797119140625, -0.9287109375, -0.821624755859375, -0.71453857421875, -0.607452392578125, -0.5003662109375, -0.393280029296875, -0.28619384765625, -0.179107666015625, -0.072021484375, 0.035064697265625, 0.14215087890625, 0.249237060546875, 0.3563232421875, 0.463409423828125, 0.57049560546875, 0.677581787109375, 0.78466796875, 0.891754150390625, 0.99884033203125, 1.105926513671875, 1.2130126953125, 1.320098876953125, 1.42718505859375, 1.534271240234375, 1.641357421875, 1.748443603515625, 1.85552978515625, 1.962615966796875, 2.0697021484375, 2.176788330078125, 2.28387451171875, 2.390960693359375, 2.498046875]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 6.0, 5.0, 17.0, 20.0, 29.0, 45.0, 55.0, 66.0, 90.0, 85.0, 99.0, 100.0, 90.0, 72.0, 69.0, 43.0, 36.0, 27.0, 22.0, 12.0, 9.0, 4.0, 4.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6943359375, -1.6528472900390625, -1.611358642578125, -1.5698699951171875, -1.52838134765625, -1.4868927001953125, -1.445404052734375, -1.4039154052734375, -1.3624267578125, -1.3209381103515625, -1.279449462890625, -1.2379608154296875, -1.19647216796875, -1.1549835205078125, -1.113494873046875, -1.0720062255859375, -1.030517578125, -0.9890289306640625, -0.947540283203125, -0.9060516357421875, -0.86456298828125, -0.8230743408203125, -0.781585693359375, -0.7400970458984375, -0.6986083984375, -0.6571197509765625, -0.615631103515625, -0.5741424560546875, -0.53265380859375, -0.4911651611328125, -0.449676513671875, -0.4081878662109375, -0.36669921875, -0.3252105712890625, -0.283721923828125, -0.2422332763671875, -0.20074462890625, -0.1592559814453125, -0.117767333984375, -0.0762786865234375, -0.0347900390625, 0.0066986083984375, 0.048187255859375, 0.0896759033203125, 0.13116455078125, 0.1726531982421875, 0.214141845703125, 0.2556304931640625, 0.297119140625, 0.3386077880859375, 0.380096435546875, 0.4215850830078125, 0.46307373046875, 0.5045623779296875, 0.546051025390625, 0.5875396728515625, 0.6290283203125, 0.6705169677734375, 0.712005615234375, 0.7534942626953125, 0.79498291015625, 0.8364715576171875, 0.877960205078125, 0.9194488525390625, 0.9609375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 0.0, 4.0, 6.0, 4.0, 11.0, 13.0, 41.0, 65.0, 132.0, 264.0, 665.0, 1912.0, 7499.0, 51917.0, 3682370.0, 420005.0, 22698.0, 4413.0, 1273.0, 531.0, 235.0, 90.0, 67.0, 17.0, 14.0, 11.0, 10.0, 7.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.958984375, -2.862945556640625, -2.76690673828125, -2.670867919921875, -2.5748291015625, -2.478790283203125, -2.38275146484375, -2.286712646484375, -2.190673828125, -2.094635009765625, -1.99859619140625, -1.902557373046875, -1.8065185546875, -1.710479736328125, -1.61444091796875, -1.518402099609375, -1.42236328125, -1.326324462890625, -1.23028564453125, -1.134246826171875, -1.0382080078125, -0.942169189453125, -0.84613037109375, -0.750091552734375, -0.654052734375, -0.558013916015625, -0.46197509765625, -0.365936279296875, -0.2698974609375, -0.173858642578125, -0.07781982421875, 0.018218994140625, 0.1142578125, 0.210296630859375, 0.30633544921875, 0.402374267578125, 0.4984130859375, 0.594451904296875, 0.69049072265625, 0.786529541015625, 0.882568359375, 0.978607177734375, 1.07464599609375, 1.170684814453125, 1.2667236328125, 1.362762451171875, 1.45880126953125, 1.554840087890625, 1.65087890625, 1.746917724609375, 1.84295654296875, 1.938995361328125, 2.0350341796875, 2.131072998046875, 2.22711181640625, 2.323150634765625, 2.419189453125, 2.515228271484375, 2.61126708984375, 2.707305908203125, 2.8033447265625, 2.899383544921875, 2.99542236328125, 3.091461181640625, 3.1875]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 2.0, 1.0, 1.0, 6.0, 9.0, 7.0, 10.0, 11.0, 14.0, 20.0, 26.0, 68.0, 107.0, 180.0, 552.0, 2064.0, 553.0, 178.0, 92.0, 55.0, 35.0, 22.0, 14.0, 12.0, 8.0, 8.0, 4.0, 6.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.92333984375, -0.8885116577148438, -0.8536834716796875, -0.8188552856445312, -0.784027099609375, -0.7491989135742188, -0.7143707275390625, -0.6795425415039062, -0.64471435546875, -0.6098861694335938, -0.5750579833984375, -0.5402297973632812, -0.505401611328125, -0.47057342529296875, -0.4357452392578125, -0.40091705322265625, -0.3660888671875, -0.33126068115234375, -0.2964324951171875, -0.26160430908203125, -0.226776123046875, -0.19194793701171875, -0.1571197509765625, -0.12229156494140625, -0.08746337890625, -0.05263519287109375, -0.0178070068359375, 0.01702117919921875, 0.051849365234375, 0.08667755126953125, 0.1215057373046875, 0.15633392333984375, 0.191162109375, 0.22599029541015625, 0.2608184814453125, 0.29564666748046875, 0.330474853515625, 0.36530303955078125, 0.4001312255859375, 0.43495941162109375, 0.46978759765625, 0.5046157836914062, 0.5394439697265625, 0.5742721557617188, 0.609100341796875, 0.6439285278320312, 0.6787567138671875, 0.7135848999023438, 0.7484130859375, 0.7832412719726562, 0.8180694580078125, 0.8528976440429688, 0.887725830078125, 0.9225540161132812, 0.9573822021484375, 0.9922103881835938, 1.02703857421875, 1.0618667602539062, 1.0966949462890625, 1.1315231323242188, 1.166351318359375, 1.2011795043945312, 1.2360076904296875, 1.2708358764648438, 1.3056640625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 5.0, 13.0, 276.0, 557.0, 132.0, 20.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.66816520690918, -12.995689392089844, -12.323213577270508, -11.650738716125488, -10.978262901306152, -10.305787086486816, -9.633312225341797, -8.960836410522461, -8.288360595703125, -7.615884780883789, -6.943409442901611, -6.270934104919434, -5.598458290100098, -4.925982475280762, -4.253507137298584, -3.5810317993164062, -2.9085559844970703, -2.2360804080963135, -1.5636048316955566, -0.8911292552947998, -0.21865367889404297, 0.45382189750671387, 1.1262974739074707, 1.7987728118896484, 2.4712486267089844, 3.143724203109741, 3.816199779510498, 4.488675117492676, 5.161150932312012, 5.833626747131348, 6.506102085113525, 7.178577423095703, 7.851051330566406, 8.523527145385742, 9.196002960205078, 9.868477821350098, 10.540953636169434, 11.21342945098877, 11.885904312133789, 12.558380126953125, 13.230855941772461, 13.903331756591797, 14.575807571411133, 15.248282432556152, 15.920758247375488, 16.593233108520508, 17.265708923339844, 17.93818473815918, 18.610660552978516, 19.28313636779785, 19.955612182617188, 20.628087997436523, 21.30056381225586, 21.973037719726562, 22.6455135345459, 23.317989349365234, 23.99046516418457, 24.662940979003906, 25.335416793823242, 26.007892608642578, 26.68036651611328, 27.352842330932617, 28.025318145751953, 28.69779396057129, 29.370269775390625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 7.0, 7.0, 8.0, 5.0, 5.0, 9.0, 17.0, 16.0, 14.0, 16.0, 25.0, 29.0, 43.0, 18.0, 26.0, 39.0, 36.0, 43.0, 42.0, 53.0, 34.0, 35.0, 50.0, 43.0, 32.0, 41.0, 29.0, 32.0, 32.0, 29.0, 29.0, 27.0, 27.0, 17.0, 16.0, 18.0, 15.0, 10.0, 5.0, 9.0, 6.0, 2.0, 5.0, 3.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.2756569385528564, -3.176650285720825, -3.077643871307373, -2.978637218475342, -2.8796305656433105, -2.7806239128112793, -2.681617498397827, -2.582610845565796, -2.4836041927337646, -2.3845975399017334, -2.2855911254882812, -2.18658447265625, -2.0875778198242188, -1.988571286201477, -1.8895647525787354, -1.790558099746704, -1.6915515661239624, -1.5925450325012207, -1.4935383796691895, -1.3945318460464478, -1.2955251932144165, -1.1965186595916748, -1.0975120067596436, -0.9985054731369019, -0.8994988799095154, -0.8004922866821289, -0.7014856934547424, -0.602479100227356, -0.5034725666046143, -0.4044659435749054, -0.3054593801498413, -0.20645278692245483, -0.10744619369506836, -0.008439607918262482, 0.0905669778585434, 0.18957355618476868, 0.28858014941215515, 0.3875867426395416, 0.4865933060646057, 0.5855998992919922, 0.6846064925193787, 0.7836130857467651, 0.8826196789741516, 0.9816262722015381, 1.0806328058242798, 1.179639458656311, 1.2786459922790527, 1.377652645111084, 1.4766591787338257, 1.5756657123565674, 1.6746723651885986, 1.7736788988113403, 1.8726855516433716, 1.9716920852661133, 2.0706987380981445, 2.169705390930176, 2.268711805343628, 2.367718458175659, 2.4667248725891113, 2.5657315254211426, 2.664738178253174, 2.763744831085205, 2.8627512454986572, 2.9617578983306885, 3.0607645511627197]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 4.0, 5.0, 5.0, 7.0, 11.0, 9.0, 19.0, 19.0, 44.0, 63.0, 101.0, 206.0, 398.0, 844.0, 1935.0, 5964.0, 25234.0, 176246.0, 688874.0, 121134.0, 19410.0, 4768.0, 1688.0, 740.0, 393.0, 182.0, 98.0, 49.0, 40.0, 18.0, 12.0, 13.0, 8.0, 11.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.1015625, -3.002838134765625, -2.90411376953125, -2.805389404296875, -2.7066650390625, -2.607940673828125, -2.50921630859375, -2.410491943359375, -2.311767578125, -2.213043212890625, -2.11431884765625, -2.015594482421875, -1.9168701171875, -1.818145751953125, -1.71942138671875, -1.620697021484375, -1.52197265625, -1.423248291015625, -1.32452392578125, -1.225799560546875, -1.1270751953125, -1.028350830078125, -0.92962646484375, -0.830902099609375, -0.732177734375, -0.633453369140625, -0.53472900390625, -0.436004638671875, -0.3372802734375, -0.238555908203125, -0.13983154296875, -0.041107177734375, 0.0576171875, 0.156341552734375, 0.25506591796875, 0.353790283203125, 0.4525146484375, 0.551239013671875, 0.64996337890625, 0.748687744140625, 0.847412109375, 0.946136474609375, 1.04486083984375, 1.143585205078125, 1.2423095703125, 1.341033935546875, 1.43975830078125, 1.538482666015625, 1.63720703125, 1.735931396484375, 1.83465576171875, 1.933380126953125, 2.0321044921875, 2.130828857421875, 2.22955322265625, 2.328277587890625, 2.427001953125, 2.525726318359375, 2.62445068359375, 2.723175048828125, 2.8218994140625, 2.920623779296875, 3.01934814453125, 3.118072509765625, 3.216796875]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 6.0, 10.0, 13.0, 14.0, 31.0, 38.0, 46.0, 66.0, 80.0, 97.0, 76.0, 83.0, 100.0, 85.0, 58.0, 56.0, 42.0, 28.0, 30.0, 18.0, 9.0, 7.0, 5.0, 4.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5810546875, -1.541748046875, -1.50244140625, -1.463134765625, -1.423828125, -1.384521484375, -1.34521484375, -1.305908203125, -1.2666015625, -1.227294921875, -1.18798828125, -1.148681640625, -1.109375, -1.070068359375, -1.03076171875, -0.991455078125, -0.9521484375, -0.912841796875, -0.87353515625, -0.834228515625, -0.794921875, -0.755615234375, -0.71630859375, -0.677001953125, -0.6376953125, -0.598388671875, -0.55908203125, -0.519775390625, -0.48046875, -0.441162109375, -0.40185546875, -0.362548828125, -0.3232421875, -0.283935546875, -0.24462890625, -0.205322265625, -0.166015625, -0.126708984375, -0.08740234375, -0.048095703125, -0.0087890625, 0.030517578125, 0.06982421875, 0.109130859375, 0.1484375, 0.187744140625, 0.22705078125, 0.266357421875, 0.3056640625, 0.344970703125, 0.38427734375, 0.423583984375, 0.462890625, 0.502197265625, 0.54150390625, 0.580810546875, 0.6201171875, 0.659423828125, 0.69873046875, 0.738037109375, 0.77734375, 0.816650390625, 0.85595703125, 0.895263671875, 0.9345703125]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 0.0, 3.0, 7.0, 7.0, 11.0, 8.0, 10.0, 20.0, 18.0, 32.0, 35.0, 41.0, 48.0, 98.0, 152.0, 244.0, 481.0, 1162.0, 3170.0, 10658.0, 45010.0, 265970.0, 577225.0, 112243.0, 22416.0, 5815.0, 1899.0, 745.0, 363.0, 210.0, 120.0, 81.0, 70.0, 36.0, 33.0, 31.0, 27.0, 11.0, 11.0, 13.0, 5.0, 6.0, 3.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.16796875, -2.093902587890625, -2.01983642578125, -1.945770263671875, -1.8717041015625, -1.797637939453125, -1.72357177734375, -1.649505615234375, -1.575439453125, -1.501373291015625, -1.42730712890625, -1.353240966796875, -1.2791748046875, -1.205108642578125, -1.13104248046875, -1.056976318359375, -0.98291015625, -0.908843994140625, -0.83477783203125, -0.760711669921875, -0.6866455078125, -0.612579345703125, -0.53851318359375, -0.464447021484375, -0.390380859375, -0.316314697265625, -0.24224853515625, -0.168182373046875, -0.0941162109375, -0.020050048828125, 0.05401611328125, 0.128082275390625, 0.2021484375, 0.276214599609375, 0.35028076171875, 0.424346923828125, 0.4984130859375, 0.572479248046875, 0.64654541015625, 0.720611572265625, 0.794677734375, 0.868743896484375, 0.94281005859375, 1.016876220703125, 1.0909423828125, 1.165008544921875, 1.23907470703125, 1.313140869140625, 1.38720703125, 1.461273193359375, 1.53533935546875, 1.609405517578125, 1.6834716796875, 1.757537841796875, 1.83160400390625, 1.905670166015625, 1.979736328125, 2.053802490234375, 2.12786865234375, 2.201934814453125, 2.2760009765625, 2.350067138671875, 2.42413330078125, 2.498199462890625, 2.572265625]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 10.0, 6.0, 7.0, 11.0, 11.0, 21.0, 21.0, 27.0, 29.0, 29.0, 31.0, 36.0, 44.0, 45.0, 48.0, 54.0, 55.0, 55.0, 64.0, 64.0, 42.0, 31.0, 34.0, 36.0, 32.0, 39.0, 15.0, 21.0, 19.0, 13.0, 4.0, 12.0, 8.0, 8.0, 4.0, 8.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.470703125, -3.365142822265625, -3.25958251953125, -3.154022216796875, -3.0484619140625, -2.942901611328125, -2.83734130859375, -2.731781005859375, -2.626220703125, -2.520660400390625, -2.41510009765625, -2.309539794921875, -2.2039794921875, -2.098419189453125, -1.99285888671875, -1.887298583984375, -1.78173828125, -1.676177978515625, -1.57061767578125, -1.465057373046875, -1.3594970703125, -1.253936767578125, -1.14837646484375, -1.042816162109375, -0.937255859375, -0.831695556640625, -0.72613525390625, -0.620574951171875, -0.5150146484375, -0.409454345703125, -0.30389404296875, -0.198333740234375, -0.0927734375, 0.012786865234375, 0.11834716796875, 0.223907470703125, 0.3294677734375, 0.435028076171875, 0.54058837890625, 0.646148681640625, 0.751708984375, 0.857269287109375, 0.96282958984375, 1.068389892578125, 1.1739501953125, 1.279510498046875, 1.38507080078125, 1.490631103515625, 1.59619140625, 1.701751708984375, 1.80731201171875, 1.912872314453125, 2.0184326171875, 2.123992919921875, 2.22955322265625, 2.335113525390625, 2.440673828125, 2.546234130859375, 2.65179443359375, 2.757354736328125, 2.8629150390625, 2.968475341796875, 3.07403564453125, 3.179595947265625, 3.28515625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 5.0, 18.0, 15.0, 51.0, 284.0, 2601.0, 1024844.0, 19982.0, 570.0, 129.0, 36.0, 11.0, 5.0, 5.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.72265625, -5.52996826171875, -5.3372802734375, -5.14459228515625, -4.951904296875, -4.75921630859375, -4.5665283203125, -4.37384033203125, -4.18115234375, -3.98846435546875, -3.7957763671875, -3.60308837890625, -3.410400390625, -3.21771240234375, -3.0250244140625, -2.83233642578125, -2.6396484375, -2.44696044921875, -2.2542724609375, -2.06158447265625, -1.868896484375, -1.67620849609375, -1.4835205078125, -1.29083251953125, -1.09814453125, -0.90545654296875, -0.7127685546875, -0.52008056640625, -0.327392578125, -0.13470458984375, 0.0579833984375, 0.25067138671875, 0.443359375, 0.63604736328125, 0.8287353515625, 1.02142333984375, 1.214111328125, 1.40679931640625, 1.5994873046875, 1.79217529296875, 1.98486328125, 2.17755126953125, 2.3702392578125, 2.56292724609375, 2.755615234375, 2.94830322265625, 3.1409912109375, 3.33367919921875, 3.5263671875, 3.71905517578125, 3.9117431640625, 4.10443115234375, 4.297119140625, 4.48980712890625, 4.6824951171875, 4.87518310546875, 5.06787109375, 5.26055908203125, 5.4532470703125, 5.64593505859375, 5.838623046875, 6.03131103515625, 6.2239990234375, 6.41668701171875, 6.609375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 5.0, 13.0, 34.0, 88.0, 286.0, 371.0, 141.0, 45.0, 13.0, 9.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006246566772460938, -0.0006078332662582397, -0.0005910098552703857, -0.0005741864442825317, -0.0005573630332946777, -0.0005405396223068237, -0.0005237162113189697, -0.0005068928003311157, -0.0004900693893432617, -0.0004732459783554077, -0.0004564225673675537, -0.0004395991563796997, -0.0004227757453918457, -0.0004059523344039917, -0.0003891289234161377, -0.0003723055124282837, -0.0003554821014404297, -0.0003386586904525757, -0.0003218352794647217, -0.0003050118684768677, -0.00028818845748901367, -0.00027136504650115967, -0.00025454163551330566, -0.00023771822452545166, -0.00022089481353759766, -0.00020407140254974365, -0.00018724799156188965, -0.00017042458057403564, -0.00015360116958618164, -0.00013677775859832764, -0.00011995434761047363, -0.00010313093662261963, -8.630752563476562e-05, -6.948411464691162e-05, -5.266070365905762e-05, -3.583729267120361e-05, -1.901388168334961e-05, -2.1904706954956055e-06, 1.4632940292358398e-05, 3.14563512802124e-05, 4.8279762268066406e-05, 6.510317325592041e-05, 8.192658424377441e-05, 9.874999523162842e-05, 0.00011557340621948242, 0.00013239681720733643, 0.00014922022819519043, 0.00016604363918304443, 0.00018286705017089844, 0.00019969046115875244, 0.00021651387214660645, 0.00023333728313446045, 0.00025016069412231445, 0.00026698410511016846, 0.00028380751609802246, 0.00030063092708587646, 0.00031745433807373047, 0.00033427774906158447, 0.0003511011600494385, 0.0003679245710372925, 0.0003847479820251465, 0.0004015713930130005, 0.0004183948040008545, 0.0004352182149887085, 0.0004520416259765625]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 6.0, 2.0, 3.0, 14.0, 29.0, 53.0, 125.0, 373.0, 4012.0, 1038109.0, 5250.0, 353.0, 148.0, 53.0, 23.0, 5.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.69140625, -7.496826171875, -7.30224609375, -7.107666015625, -6.9130859375, -6.718505859375, -6.52392578125, -6.329345703125, -6.134765625, -5.940185546875, -5.74560546875, -5.551025390625, -5.3564453125, -5.161865234375, -4.96728515625, -4.772705078125, -4.578125, -4.383544921875, -4.18896484375, -3.994384765625, -3.7998046875, -3.605224609375, -3.41064453125, -3.216064453125, -3.021484375, -2.826904296875, -2.63232421875, -2.437744140625, -2.2431640625, -2.048583984375, -1.85400390625, -1.659423828125, -1.46484375, -1.270263671875, -1.07568359375, -0.881103515625, -0.6865234375, -0.491943359375, -0.29736328125, -0.102783203125, 0.091796875, 0.286376953125, 0.48095703125, 0.675537109375, 0.8701171875, 1.064697265625, 1.25927734375, 1.453857421875, 1.6484375, 1.843017578125, 2.03759765625, 2.232177734375, 2.4267578125, 2.621337890625, 2.81591796875, 3.010498046875, 3.205078125, 3.399658203125, 3.59423828125, 3.788818359375, 3.9833984375, 4.177978515625, 4.37255859375, 4.567138671875, 4.76171875]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 7.0, 30.0, 123.0, 529.0, 263.0, 51.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.0, -6.832244873046875, -6.66448974609375, -6.496734619140625, -6.3289794921875, -6.161224365234375, -5.99346923828125, -5.825714111328125, -5.657958984375, -5.490203857421875, -5.32244873046875, -5.154693603515625, -4.9869384765625, -4.819183349609375, -4.65142822265625, -4.483673095703125, -4.31591796875, -4.148162841796875, -3.98040771484375, -3.812652587890625, -3.6448974609375, -3.477142333984375, -3.30938720703125, -3.141632080078125, -2.973876953125, -2.806121826171875, -2.63836669921875, -2.470611572265625, -2.3028564453125, -2.135101318359375, -1.96734619140625, -1.799591064453125, -1.6318359375, -1.464080810546875, -1.29632568359375, -1.128570556640625, -0.9608154296875, -0.793060302734375, -0.62530517578125, -0.457550048828125, -0.289794921875, -0.122039794921875, 0.04571533203125, 0.213470458984375, 0.3812255859375, 0.548980712890625, 0.71673583984375, 0.884490966796875, 1.05224609375, 1.220001220703125, 1.38775634765625, 1.555511474609375, 1.7232666015625, 1.891021728515625, 2.05877685546875, 2.226531982421875, 2.394287109375, 2.562042236328125, 2.72979736328125, 2.897552490234375, 3.0653076171875, 3.233062744140625, 3.40081787109375, 3.568572998046875, 3.736328125]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 7.0, 32.0, 88.0, 240.0, 316.0, 191.0, 74.0, 31.0, 19.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-46.45402526855469, -45.52759552001953, -44.60116195678711, -43.67472839355469, -42.74829864501953, -41.821868896484375, -40.89543533325195, -39.96900177001953, -39.042572021484375, -38.11614227294922, -37.1897087097168, -36.263275146484375, -35.33684539794922, -34.41041564941406, -33.48398208618164, -32.55754852294922, -31.631118774414062, -30.704687118530273, -29.778255462646484, -28.851823806762695, -27.925392150878906, -26.998960494995117, -26.072528839111328, -25.14609718322754, -24.21966552734375, -23.29323387145996, -22.366802215576172, -21.440370559692383, -20.513938903808594, -19.587507247924805, -18.661075592041016, -17.734643936157227, -16.80821418762207, -15.881782531738281, -14.955350875854492, -14.028919219970703, -13.102487564086914, -12.176055908203125, -11.249624252319336, -10.323192596435547, -9.396760940551758, -8.470329284667969, -7.54389762878418, -6.617465972900391, -5.691034317016602, -4.7646026611328125, -3.8381710052490234, -2.9117393493652344, -1.9853076934814453, -1.0588760375976562, -0.1324443817138672, 0.7939872741699219, 1.720418930053711, 2.6468505859375, 3.573282241821289, 4.499713897705078, 5.426145553588867, 6.352577209472656, 7.279008865356445, 8.205440521240234, 9.131872177124023, 10.058303833007812, 10.984735488891602, 11.91116714477539, 12.83759880065918]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 4.0, 3.0, 1.0, 2.0, 1.0, 8.0, 5.0, 10.0, 18.0, 14.0, 16.0, 16.0, 20.0, 19.0, 31.0, 31.0, 38.0, 37.0, 32.0, 41.0, 33.0, 59.0, 32.0, 49.0, 42.0, 41.0, 31.0, 42.0, 32.0, 33.0, 35.0, 28.0, 31.0, 25.0, 18.0, 19.0, 15.0, 12.0, 21.0, 20.0, 7.0, 6.0, 4.0, 6.0, 7.0, 6.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-15.201499938964844, -14.745380401611328, -14.289259910583496, -13.83314037322998, -13.377019882202148, -12.920900344848633, -12.464780807495117, -12.008660316467285, -11.55254077911377, -11.096421241760254, -10.640300750732422, -10.184181213378906, -9.728060722351074, -9.271941184997559, -8.815820693969727, -8.359701156616211, -7.903581142425537, -7.447461128234863, -6.9913411140441895, -6.535221099853516, -6.0791015625, -5.622981548309326, -5.166861534118652, -4.710741996765137, -4.254621505737305, -3.798501491546631, -3.342381715774536, -2.8862617015838623, -2.4301419258117676, -1.9740219116210938, -1.51790189743042, -1.0617821216583252, -0.6056623458862305, -0.1495424211025238, 0.30657750368118286, 0.7626974582672119, 1.2188173532485962, 1.6749372482299805, 2.1310572624206543, 2.587177038192749, 3.043297052383423, 3.4994170665740967, 3.9555368423461914, 4.411656856536865, 4.867776870727539, 5.323896408081055, 5.780016899108887, 6.236136436462402, 6.692256450653076, 7.14837646484375, 7.604496479034424, 8.060616493225098, 8.516736030578613, 8.972856521606445, 9.428976058959961, 9.885095596313477, 10.341216087341309, 10.797335624694824, 11.253456115722656, 11.709575653076172, 12.165696144104004, 12.62181568145752, 13.077936172485352, 13.534055709838867, 13.990175247192383]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 8.0, 13.0, 16.0, 22.0, 36.0, 57.0, 111.0, 162.0, 263.0, 617.0, 1376.0, 3948.0, 15147.0, 138809.0, 3927065.0, 88471.0, 12233.0, 3449.0, 1252.0, 526.0, 296.0, 155.0, 100.0, 57.0, 30.0, 21.0, 13.0, 7.0, 8.0, 8.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.072265625, -2.971282958984375, -2.87030029296875, -2.769317626953125, -2.6683349609375, -2.567352294921875, -2.46636962890625, -2.365386962890625, -2.264404296875, -2.163421630859375, -2.06243896484375, -1.961456298828125, -1.8604736328125, -1.759490966796875, -1.65850830078125, -1.557525634765625, -1.45654296875, -1.355560302734375, -1.25457763671875, -1.153594970703125, -1.0526123046875, -0.951629638671875, -0.85064697265625, -0.749664306640625, -0.648681640625, -0.547698974609375, -0.44671630859375, -0.345733642578125, -0.2447509765625, -0.143768310546875, -0.04278564453125, 0.058197021484375, 0.1591796875, 0.260162353515625, 0.36114501953125, 0.462127685546875, 0.5631103515625, 0.664093017578125, 0.76507568359375, 0.866058349609375, 0.967041015625, 1.068023681640625, 1.16900634765625, 1.269989013671875, 1.3709716796875, 1.471954345703125, 1.57293701171875, 1.673919677734375, 1.77490234375, 1.875885009765625, 1.97686767578125, 2.077850341796875, 2.1788330078125, 2.279815673828125, 2.38079833984375, 2.481781005859375, 2.582763671875, 2.683746337890625, 2.78472900390625, 2.885711669921875, 2.9866943359375, 3.087677001953125, 3.18865966796875, 3.289642333984375, 3.390625]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 12.0, 2.0, 15.0, 12.0, 23.0, 19.0, 40.0, 60.0, 54.0, 53.0, 73.0, 85.0, 79.0, 88.0, 84.0, 66.0, 59.0, 41.0, 39.0, 27.0, 25.0, 20.0, 10.0, 10.0, 4.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6806640625, -1.6392974853515625, -1.597930908203125, -1.5565643310546875, -1.51519775390625, -1.4738311767578125, -1.432464599609375, -1.3910980224609375, -1.3497314453125, -1.3083648681640625, -1.266998291015625, -1.2256317138671875, -1.18426513671875, -1.1428985595703125, -1.101531982421875, -1.0601654052734375, -1.018798828125, -0.9774322509765625, -0.936065673828125, -0.8946990966796875, -0.85333251953125, -0.8119659423828125, -0.770599365234375, -0.7292327880859375, -0.6878662109375, -0.6464996337890625, -0.605133056640625, -0.5637664794921875, -0.52239990234375, -0.4810333251953125, -0.439666748046875, -0.3983001708984375, -0.35693359375, -0.3155670166015625, -0.274200439453125, -0.2328338623046875, -0.19146728515625, -0.1501007080078125, -0.108734130859375, -0.0673675537109375, -0.0260009765625, 0.0153656005859375, 0.056732177734375, 0.0980987548828125, 0.13946533203125, 0.1808319091796875, 0.222198486328125, 0.2635650634765625, 0.304931640625, 0.3462982177734375, 0.387664794921875, 0.4290313720703125, 0.47039794921875, 0.5117645263671875, 0.553131103515625, 0.5944976806640625, 0.6358642578125, 0.6772308349609375, 0.718597412109375, 0.7599639892578125, 0.80133056640625, 0.8426971435546875, 0.884063720703125, 0.9254302978515625, 0.966796875]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 7.0, 14.0, 22.0, 35.0, 52.0, 96.0, 201.0, 849.0, 7386.0, 1248328.0, 2927702.0, 8266.0, 912.0, 219.0, 87.0, 40.0, 31.0, 18.0, 8.0, 9.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.765625, -10.53167724609375, -10.2977294921875, -10.06378173828125, -9.829833984375, -9.59588623046875, -9.3619384765625, -9.12799072265625, -8.89404296875, -8.66009521484375, -8.4261474609375, -8.19219970703125, -7.958251953125, -7.72430419921875, -7.4903564453125, -7.25640869140625, -7.0224609375, -6.78851318359375, -6.5545654296875, -6.32061767578125, -6.086669921875, -5.85272216796875, -5.6187744140625, -5.38482666015625, -5.15087890625, -4.91693115234375, -4.6829833984375, -4.44903564453125, -4.215087890625, -3.98114013671875, -3.7471923828125, -3.51324462890625, -3.279296875, -3.04534912109375, -2.8114013671875, -2.57745361328125, -2.343505859375, -2.10955810546875, -1.8756103515625, -1.64166259765625, -1.40771484375, -1.17376708984375, -0.9398193359375, -0.70587158203125, -0.471923828125, -0.23797607421875, -0.0040283203125, 0.22991943359375, 0.4638671875, 0.69781494140625, 0.9317626953125, 1.16571044921875, 1.399658203125, 1.63360595703125, 1.8675537109375, 2.10150146484375, 2.33544921875, 2.56939697265625, 2.8033447265625, 3.03729248046875, 3.271240234375, 3.50518798828125, 3.7391357421875, 3.97308349609375, 4.20703125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 1.0, 4.0, 12.0, 14.0, 12.0, 27.0, 55.0, 104.0, 307.0, 2166.0, 966.0, 197.0, 94.0, 43.0, 24.0, 20.0, 16.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.072265625, -2.02276611328125, -1.9732666015625, -1.92376708984375, -1.874267578125, -1.82476806640625, -1.7752685546875, -1.72576904296875, -1.67626953125, -1.62677001953125, -1.5772705078125, -1.52777099609375, -1.478271484375, -1.42877197265625, -1.3792724609375, -1.32977294921875, -1.2802734375, -1.23077392578125, -1.1812744140625, -1.13177490234375, -1.082275390625, -1.03277587890625, -0.9832763671875, -0.93377685546875, -0.88427734375, -0.83477783203125, -0.7852783203125, -0.73577880859375, -0.686279296875, -0.63677978515625, -0.5872802734375, -0.53778076171875, -0.48828125, -0.43878173828125, -0.3892822265625, -0.33978271484375, -0.290283203125, -0.24078369140625, -0.1912841796875, -0.14178466796875, -0.09228515625, -0.04278564453125, 0.0067138671875, 0.05621337890625, 0.105712890625, 0.15521240234375, 0.2047119140625, 0.25421142578125, 0.3037109375, 0.35321044921875, 0.4027099609375, 0.45220947265625, 0.501708984375, 0.55120849609375, 0.6007080078125, 0.65020751953125, 0.69970703125, 0.74920654296875, 0.7987060546875, 0.84820556640625, 0.897705078125, 0.94720458984375, 0.9967041015625, 1.04620361328125, 1.095703125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 4.0, 3.0, 4.0, 9.0, 16.0, 38.0, 77.0, 112.0, 168.0, 175.0, 143.0, 116.0, 74.0, 27.0, 18.0, 13.0, 3.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.530097007751465, -9.312538146972656, -9.094979286193848, -8.877421379089355, -8.659862518310547, -8.442303657531738, -8.22474479675293, -8.007186889648438, -7.789628028869629, -7.57206916809082, -7.35451078414917, -7.136951923370361, -6.919393539428711, -6.701834678649902, -6.484275817871094, -6.266717433929443, -6.049158573150635, -5.831599712371826, -5.614041328430176, -5.396482467651367, -5.178924083709717, -4.961365222930908, -4.743806838989258, -4.526247978210449, -4.308689117431641, -4.091130256652832, -3.8735718727111816, -3.656013011932373, -3.4384546279907227, -3.220895767211914, -3.0033371448516846, -2.785778522491455, -2.5682196617126465, -2.350661039352417, -2.1331024169921875, -1.9155436754226685, -1.697985053062439, -1.4804264307022095, -1.2628676891326904, -1.045309066772461, -0.8277504444122314, -0.610191822052002, -0.3926331400871277, -0.17507445812225342, 0.042484164237976074, 0.26004278659820557, 0.4776015281677246, 0.6951601505279541, 0.9127187728881836, 1.130277395248413, 1.3478360176086426, 1.5653947591781616, 1.7829533815383911, 2.00051212310791, 2.2180707454681396, 2.435629367828369, 2.6531879901885986, 2.870746612548828, 3.0883052349090576, 3.305863857269287, 3.5234227180480957, 3.740981101989746, 3.9585399627685547, 4.176098823547363, 4.393657207489014]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 4.0, 9.0, 8.0, 9.0, 9.0, 10.0, 16.0, 14.0, 15.0, 20.0, 36.0, 32.0, 26.0, 37.0, 26.0, 40.0, 52.0, 47.0, 34.0, 57.0, 61.0, 43.0, 47.0, 29.0, 35.0, 48.0, 29.0, 34.0, 25.0, 22.0, 17.0, 21.0, 9.0, 15.0, 11.0, 8.0, 7.0, 12.0, 7.0, 5.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.1181986331939697, -3.0216519832611084, -2.925105333328247, -2.8285584449768066, -2.7320117950439453, -2.635465145111084, -2.5389184951782227, -2.4423718452453613, -2.3458251953125, -2.2492785453796387, -2.1527318954467773, -2.056185245513916, -1.9596383571624756, -1.8630917072296143, -1.766545057296753, -1.6699984073638916, -1.5734515190124512, -1.4769048690795898, -1.380358099937439, -1.2838114500045776, -1.1872646808624268, -1.0907180309295654, -0.9941713809967041, -0.897624671459198, -0.8010779619216919, -0.7045312523841858, -0.6079845428466797, -0.5114378929138184, -0.41489118337631226, -0.31834447383880615, -0.22179782390594482, -0.12525111436843872, -0.028704166412353516, 0.0678425282239914, 0.1643892228603363, 0.26093590259552, 0.3574826121330261, 0.4540293216705322, 0.5505759716033936, 0.6471226811408997, 0.7436693906784058, 0.8402161002159119, 0.936762809753418, 1.0333094596862793, 1.1298561096191406, 1.2264028787612915, 1.3229495286941528, 1.4194962978363037, 1.516042947769165, 1.6125895977020264, 1.7091363668441772, 1.8056830167770386, 1.9022297859191895, 1.9987764358520508, 2.095323085784912, 2.1918697357177734, 2.2884163856506348, 2.384963035583496, 2.4815096855163574, 2.5780563354492188, 2.674603223800659, 2.7711498737335205, 2.867696523666382, 2.964243173599243, 3.0607900619506836]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 7.0, 7.0, 9.0, 21.0, 31.0, 50.0, 75.0, 133.0, 281.0, 676.0, 1893.0, 7155.0, 35741.0, 240616.0, 608548.0, 126079.0, 20531.0, 4427.0, 1273.0, 468.0, 214.0, 123.0, 73.0, 45.0, 28.0, 20.0, 11.0, 9.0, 7.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.609375, -3.507232666015625, -3.40509033203125, -3.302947998046875, -3.2008056640625, -3.098663330078125, -2.99652099609375, -2.894378662109375, -2.792236328125, -2.690093994140625, -2.58795166015625, -2.485809326171875, -2.3836669921875, -2.281524658203125, -2.17938232421875, -2.077239990234375, -1.97509765625, -1.872955322265625, -1.77081298828125, -1.668670654296875, -1.5665283203125, -1.464385986328125, -1.36224365234375, -1.260101318359375, -1.157958984375, -1.055816650390625, -0.95367431640625, -0.851531982421875, -0.7493896484375, -0.647247314453125, -0.54510498046875, -0.442962646484375, -0.3408203125, -0.238677978515625, -0.13653564453125, -0.034393310546875, 0.0677490234375, 0.169891357421875, 0.27203369140625, 0.374176025390625, 0.476318359375, 0.578460693359375, 0.68060302734375, 0.782745361328125, 0.8848876953125, 0.987030029296875, 1.08917236328125, 1.191314697265625, 1.29345703125, 1.395599365234375, 1.49774169921875, 1.599884033203125, 1.7020263671875, 1.804168701171875, 1.90631103515625, 2.008453369140625, 2.110595703125, 2.212738037109375, 2.31488037109375, 2.417022705078125, 2.5191650390625, 2.621307373046875, 2.72344970703125, 2.825592041015625, 2.927734375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 12.0, 8.0, 19.0, 21.0, 21.0, 32.0, 47.0, 44.0, 52.0, 69.0, 77.0, 68.0, 94.0, 68.0, 56.0, 62.0, 56.0, 54.0, 25.0, 32.0, 27.0, 19.0, 13.0, 4.0, 10.0, 2.0, 5.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-1.564453125, -1.525604248046875, -1.48675537109375, -1.447906494140625, -1.4090576171875, -1.370208740234375, -1.33135986328125, -1.292510986328125, -1.253662109375, -1.214813232421875, -1.17596435546875, -1.137115478515625, -1.0982666015625, -1.059417724609375, -1.02056884765625, -0.981719970703125, -0.94287109375, -0.904022216796875, -0.86517333984375, -0.826324462890625, -0.7874755859375, -0.748626708984375, -0.70977783203125, -0.670928955078125, -0.632080078125, -0.593231201171875, -0.55438232421875, -0.515533447265625, -0.4766845703125, -0.437835693359375, -0.39898681640625, -0.360137939453125, -0.3212890625, -0.282440185546875, -0.24359130859375, -0.204742431640625, -0.1658935546875, -0.127044677734375, -0.08819580078125, -0.049346923828125, -0.010498046875, 0.028350830078125, 0.06719970703125, 0.106048583984375, 0.1448974609375, 0.183746337890625, 0.22259521484375, 0.261444091796875, 0.30029296875, 0.339141845703125, 0.37799072265625, 0.416839599609375, 0.4556884765625, 0.494537353515625, 0.53338623046875, 0.572235107421875, 0.611083984375, 0.649932861328125, 0.68878173828125, 0.727630615234375, 0.7664794921875, 0.805328369140625, 0.84417724609375, 0.883026123046875, 0.921875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 2.0, 5.0, 11.0, 4.0, 13.0, 18.0, 21.0, 20.0, 24.0, 32.0, 63.0, 70.0, 102.0, 201.0, 360.0, 977.0, 3139.0, 12620.0, 62144.0, 350555.0, 492253.0, 99796.0, 19112.0, 4507.0, 1328.0, 475.0, 226.0, 134.0, 99.0, 67.0, 37.0, 33.0, 26.0, 22.0, 14.0, 9.0, 9.0, 4.0, 5.0, 3.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.7734375, -2.6871337890625, -2.600830078125, -2.5145263671875, -2.42822265625, -2.3419189453125, -2.255615234375, -2.1693115234375, -2.0830078125, -1.9967041015625, -1.910400390625, -1.8240966796875, -1.73779296875, -1.6514892578125, -1.565185546875, -1.4788818359375, -1.392578125, -1.3062744140625, -1.219970703125, -1.1336669921875, -1.04736328125, -0.9610595703125, -0.874755859375, -0.7884521484375, -0.7021484375, -0.6158447265625, -0.529541015625, -0.4432373046875, -0.35693359375, -0.2706298828125, -0.184326171875, -0.0980224609375, -0.01171875, 0.0745849609375, 0.160888671875, 0.2471923828125, 0.33349609375, 0.4197998046875, 0.506103515625, 0.5924072265625, 0.6787109375, 0.7650146484375, 0.851318359375, 0.9376220703125, 1.02392578125, 1.1102294921875, 1.196533203125, 1.2828369140625, 1.369140625, 1.4554443359375, 1.541748046875, 1.6280517578125, 1.71435546875, 1.8006591796875, 1.886962890625, 1.9732666015625, 2.0595703125, 2.1458740234375, 2.232177734375, 2.3184814453125, 2.40478515625, 2.4910888671875, 2.577392578125, 2.6636962890625, 2.75]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 5.0, 6.0, 10.0, 8.0, 18.0, 19.0, 15.0, 22.0, 22.0, 21.0, 34.0, 27.0, 34.0, 39.0, 38.0, 42.0, 46.0, 53.0, 50.0, 41.0, 39.0, 38.0, 38.0, 52.0, 38.0, 28.0, 26.0, 31.0, 30.0, 17.0, 28.0, 17.0, 16.0, 12.0, 9.0, 7.0, 9.0, 4.0, 3.0, 3.0, 4.0, 4.0, 4.0, 0.0, 2.0], "bins": [-3.91796875, -3.813995361328125, -3.71002197265625, -3.606048583984375, -3.5020751953125, -3.398101806640625, -3.29412841796875, -3.190155029296875, -3.086181640625, -2.982208251953125, -2.87823486328125, -2.774261474609375, -2.6702880859375, -2.566314697265625, -2.46234130859375, -2.358367919921875, -2.25439453125, -2.150421142578125, -2.04644775390625, -1.942474365234375, -1.8385009765625, -1.734527587890625, -1.63055419921875, -1.526580810546875, -1.422607421875, -1.318634033203125, -1.21466064453125, -1.110687255859375, -1.0067138671875, -0.902740478515625, -0.79876708984375, -0.694793701171875, -0.5908203125, -0.486846923828125, -0.38287353515625, -0.278900146484375, -0.1749267578125, -0.070953369140625, 0.03302001953125, 0.136993408203125, 0.240966796875, 0.344940185546875, 0.44891357421875, 0.552886962890625, 0.6568603515625, 0.760833740234375, 0.86480712890625, 0.968780517578125, 1.07275390625, 1.176727294921875, 1.28070068359375, 1.384674072265625, 1.4886474609375, 1.592620849609375, 1.69659423828125, 1.800567626953125, 1.904541015625, 2.008514404296875, 2.11248779296875, 2.216461181640625, 2.3204345703125, 2.424407958984375, 2.52838134765625, 2.632354736328125, 2.736328125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 3.0, 1.0, 5.0, 9.0, 6.0, 14.0, 25.0, 41.0, 47.0, 109.0, 186.0, 398.0, 1044.0, 2998.0, 9495.0, 37295.0, 273794.0, 618971.0, 79240.0, 17240.0, 4840.0, 1593.0, 600.0, 272.0, 132.0, 82.0, 40.0, 30.0, 17.0, 9.0, 6.0, 5.0, 6.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.26953125, -1.2211151123046875, -1.172698974609375, -1.1242828369140625, -1.07586669921875, -1.0274505615234375, -0.979034423828125, -0.9306182861328125, -0.8822021484375, -0.8337860107421875, -0.785369873046875, -0.7369537353515625, -0.68853759765625, -0.6401214599609375, -0.591705322265625, -0.5432891845703125, -0.494873046875, -0.4464569091796875, -0.398040771484375, -0.3496246337890625, -0.30120849609375, -0.2527923583984375, -0.204376220703125, -0.1559600830078125, -0.1075439453125, -0.0591278076171875, -0.010711669921875, 0.0377044677734375, 0.08612060546875, 0.1345367431640625, 0.182952880859375, 0.2313690185546875, 0.27978515625, 0.3282012939453125, 0.376617431640625, 0.4250335693359375, 0.47344970703125, 0.5218658447265625, 0.570281982421875, 0.6186981201171875, 0.6671142578125, 0.7155303955078125, 0.763946533203125, 0.8123626708984375, 0.86077880859375, 0.9091949462890625, 0.957611083984375, 1.0060272216796875, 1.054443359375, 1.1028594970703125, 1.151275634765625, 1.1996917724609375, 1.24810791015625, 1.2965240478515625, 1.344940185546875, 1.3933563232421875, 1.4417724609375, 1.4901885986328125, 1.538604736328125, 1.5870208740234375, 1.63543701171875, 1.6838531494140625, 1.732269287109375, 1.7806854248046875, 1.8291015625]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 1.0, 4.0, 9.0, 7.0, 15.0, 14.0, 13.0, 24.0, 43.0, 57.0, 72.0, 116.0, 151.0, 130.0, 105.0, 69.0, 57.0, 36.0, 22.0, 16.0, 14.0, 8.0, 7.0, 5.0, 3.0, 6.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00019478797912597656, -0.00018617138266563416, -0.00017755478620529175, -0.00016893818974494934, -0.00016032159328460693, -0.00015170499682426453, -0.00014308840036392212, -0.0001344718039035797, -0.0001258552074432373, -0.0001172386109828949, -0.00010862201452255249, -0.00010000541806221008, -9.138882160186768e-05, -8.277222514152527e-05, -7.415562868118286e-05, -6.553903222084045e-05, -5.692243576049805e-05, -4.830583930015564e-05, -3.968924283981323e-05, -3.1072646379470825e-05, -2.2456049919128418e-05, -1.383945345878601e-05, -5.2228569984436035e-06, 3.3937394618988037e-06, 1.2010335922241211e-05, 2.0626932382583618e-05, 2.9243528842926025e-05, 3.786012530326843e-05, 4.647672176361084e-05, 5.509331822395325e-05, 6.370991468429565e-05, 7.232651114463806e-05, 8.094310760498047e-05, 8.955970406532288e-05, 9.817630052566528e-05, 0.00010679289698600769, 0.0001154094934463501, 0.0001240260899066925, 0.0001326426863670349, 0.00014125928282737732, 0.00014987587928771973, 0.00015849247574806213, 0.00016710907220840454, 0.00017572566866874695, 0.00018434226512908936, 0.00019295886158943176, 0.00020157545804977417, 0.00021019205451011658, 0.00021880865097045898, 0.0002274252474308014, 0.0002360418438911438, 0.0002446584403514862, 0.0002532750368118286, 0.000261891633272171, 0.00027050822973251343, 0.00027912482619285583, 0.00028774142265319824, 0.00029635801911354065, 0.00030497461557388306, 0.00031359121203422546, 0.00032220780849456787, 0.0003308244049549103, 0.0003394410014152527, 0.0003480575978755951, 0.0003566741943359375]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 5.0, 7.0, 8.0, 20.0, 33.0, 68.0, 153.0, 472.0, 1857.0, 11599.0, 159379.0, 811223.0, 56059.0, 5985.0, 1122.0, 326.0, 130.0, 48.0, 20.0, 19.0, 7.0, 6.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.560546875, -2.4900665283203125, -2.419586181640625, -2.3491058349609375, -2.27862548828125, -2.2081451416015625, -2.137664794921875, -2.0671844482421875, -1.9967041015625, -1.9262237548828125, -1.855743408203125, -1.7852630615234375, -1.71478271484375, -1.6443023681640625, -1.573822021484375, -1.5033416748046875, -1.432861328125, -1.3623809814453125, -1.291900634765625, -1.2214202880859375, -1.15093994140625, -1.0804595947265625, -1.009979248046875, -0.9394989013671875, -0.8690185546875, -0.7985382080078125, -0.728057861328125, -0.6575775146484375, -0.58709716796875, -0.5166168212890625, -0.446136474609375, -0.3756561279296875, -0.30517578125, -0.2346954345703125, -0.164215087890625, -0.0937347412109375, -0.02325439453125, 0.0472259521484375, 0.117706298828125, 0.1881866455078125, 0.2586669921875, 0.3291473388671875, 0.399627685546875, 0.4701080322265625, 0.54058837890625, 0.6110687255859375, 0.681549072265625, 0.7520294189453125, 0.822509765625, 0.8929901123046875, 0.963470458984375, 1.0339508056640625, 1.10443115234375, 1.1749114990234375, 1.245391845703125, 1.3158721923828125, 1.3863525390625, 1.4568328857421875, 1.527313232421875, 1.5977935791015625, 1.66827392578125, 1.7387542724609375, 1.809234619140625, 1.8797149658203125, 1.9501953125]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 11.0, 15.0, 15.0, 23.0, 29.0, 34.0, 57.0, 62.0, 80.0, 91.0, 90.0, 87.0, 81.0, 79.0, 59.0, 42.0, 33.0, 30.0, 20.0, 12.0, 8.0, 7.0, 7.0, 8.0, 3.0, 2.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0537109375, -1.0193023681640625, -0.984893798828125, -0.9504852294921875, -0.91607666015625, -0.8816680908203125, -0.847259521484375, -0.8128509521484375, -0.7784423828125, -0.7440338134765625, -0.709625244140625, -0.6752166748046875, -0.64080810546875, -0.6063995361328125, -0.571990966796875, -0.5375823974609375, -0.503173828125, -0.4687652587890625, -0.434356689453125, -0.3999481201171875, -0.36553955078125, -0.3311309814453125, -0.296722412109375, -0.2623138427734375, -0.2279052734375, -0.1934967041015625, -0.159088134765625, -0.1246795654296875, -0.09027099609375, -0.0558624267578125, -0.021453857421875, 0.0129547119140625, 0.04736328125, 0.0817718505859375, 0.116180419921875, 0.1505889892578125, 0.18499755859375, 0.2194061279296875, 0.253814697265625, 0.2882232666015625, 0.3226318359375, 0.3570404052734375, 0.391448974609375, 0.4258575439453125, 0.46026611328125, 0.4946746826171875, 0.529083251953125, 0.5634918212890625, 0.597900390625, 0.6323089599609375, 0.666717529296875, 0.7011260986328125, 0.73553466796875, 0.7699432373046875, 0.804351806640625, 0.8387603759765625, 0.8731689453125, 0.9075775146484375, 0.941986083984375, 0.9763946533203125, 1.01080322265625, 1.0452117919921875, 1.079620361328125, 1.1140289306640625, 1.1484375]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 22.0, 47.0, 139.0, 277.0, 279.0, 145.0, 54.0, 26.0, 8.0, 7.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.60749816894531, -35.50053787231445, -34.393577575683594, -33.286617279052734, -32.179656982421875, -31.072696685791016, -29.96573829650879, -28.85877799987793, -27.75181770324707, -26.64485740661621, -25.53789710998535, -24.430936813354492, -23.323978424072266, -22.217018127441406, -21.110057830810547, -20.003097534179688, -18.896137237548828, -17.78917694091797, -16.68221664428711, -15.575257301330566, -14.468297004699707, -13.361336708068848, -12.254377365112305, -11.147417068481445, -10.040456771850586, -8.933496475219727, -7.826536655426025, -6.719576835632324, -5.612616539001465, -4.5056562423706055, -3.3986964225769043, -2.291736602783203, -1.1847801208496094, -0.0778200626373291, 1.0291399955749512, 2.1361000537872314, 3.2430601119995117, 4.350020408630371, 5.456980228424072, 6.563940048217773, 7.670900344848633, 8.777860641479492, 9.884820938110352, 10.991780281066895, 12.098740577697754, 13.205700874328613, 14.312660217285156, 15.419620513916016, 16.526580810546875, 17.633541107177734, 18.740501403808594, 19.847461700439453, 20.954421997070312, 22.061382293701172, 23.1683406829834, 24.275300979614258, 25.382261276245117, 26.489221572875977, 27.596181869506836, 28.703142166137695, 29.810100555419922, 30.91706085205078, 32.02402114868164, 33.1309814453125, 34.23794174194336]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 7.0, 5.0, 8.0, 9.0, 8.0, 18.0, 12.0, 27.0, 15.0, 31.0, 28.0, 35.0, 31.0, 36.0, 39.0, 48.0, 47.0, 50.0, 52.0, 40.0, 36.0, 50.0, 35.0, 39.0, 37.0, 38.0, 34.0, 37.0, 30.0, 25.0, 13.0, 12.0, 17.0, 17.0, 8.0, 9.0, 2.0, 3.0, 6.0, 3.0, 3.0, 4.0, 4.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.035300254821777, -13.497518539428711, -12.959736824035645, -12.421955108642578, -11.884174346923828, -11.346391677856445, -10.808610916137695, -10.270829200744629, -9.733047485351562, -9.195265769958496, -8.65748405456543, -8.119702339172363, -7.581921100616455, -7.044139385223389, -6.5063581466674805, -5.968576431274414, -5.430794715881348, -4.893013000488281, -4.355231285095215, -3.8174500465393066, -3.2796683311462402, -2.741886615753174, -2.2041051387786865, -1.6663236618041992, -1.1285419464111328, -0.590760350227356, -0.0529787540435791, 0.48480284214019775, 1.0225844383239746, 1.560366153717041, 2.0981476306915283, 2.6359291076660156, 3.1737098693847656, 3.711491584777832, 4.249273300170898, 4.787054538726807, 5.324836254119873, 5.8626179695129395, 6.400399208068848, 6.938180923461914, 7.4759626388549805, 8.013744354248047, 8.551526069641113, 9.08930778503418, 9.62708854675293, 10.164871215820312, 10.702651977539062, 11.240433692932129, 11.778215408325195, 12.315997123718262, 12.853778839111328, 13.391560554504395, 13.929342269897461, 14.467123031616211, 15.004904747009277, 15.542686462402344, 16.080467224121094, 16.618247985839844, 17.156030654907227, 17.693811416625977, 18.23159408569336, 18.76937484741211, 19.307157516479492, 19.844938278198242, 20.382720947265625]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 6.0, 1.0, 8.0, 6.0, 12.0, 12.0, 21.0, 37.0, 46.0, 67.0, 109.0, 209.0, 304.0, 573.0, 1100.0, 2269.0, 4986.0, 12638.0, 38794.0, 393207.0, 3648285.0, 61849.0, 16868.0, 6689.0, 2943.0, 1390.0, 756.0, 438.0, 253.0, 157.0, 88.0, 51.0, 37.0, 23.0, 16.0, 18.0, 7.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.28125, -3.19293212890625, -3.1046142578125, -3.01629638671875, -2.927978515625, -2.83966064453125, -2.7513427734375, -2.66302490234375, -2.57470703125, -2.48638916015625, -2.3980712890625, -2.30975341796875, -2.221435546875, -2.13311767578125, -2.0447998046875, -1.95648193359375, -1.8681640625, -1.77984619140625, -1.6915283203125, -1.60321044921875, -1.514892578125, -1.42657470703125, -1.3382568359375, -1.24993896484375, -1.16162109375, -1.07330322265625, -0.9849853515625, -0.89666748046875, -0.808349609375, -0.72003173828125, -0.6317138671875, -0.54339599609375, -0.455078125, -0.36676025390625, -0.2784423828125, -0.19012451171875, -0.101806640625, -0.01348876953125, 0.0748291015625, 0.16314697265625, 0.25146484375, 0.33978271484375, 0.4281005859375, 0.51641845703125, 0.604736328125, 0.69305419921875, 0.7813720703125, 0.86968994140625, 0.9580078125, 1.04632568359375, 1.1346435546875, 1.22296142578125, 1.311279296875, 1.39959716796875, 1.4879150390625, 1.57623291015625, 1.66455078125, 1.75286865234375, 1.8411865234375, 1.92950439453125, 2.017822265625, 2.10614013671875, 2.1944580078125, 2.28277587890625, 2.37109375]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 7.0, 3.0, 3.0, 7.0, 11.0, 15.0, 17.0, 19.0, 26.0, 35.0, 42.0, 42.0, 52.0, 60.0, 60.0, 70.0, 51.0, 57.0, 53.0, 66.0, 42.0, 38.0, 46.0, 42.0, 27.0, 27.0, 18.0, 25.0, 15.0, 12.0, 3.0, 2.0, 7.0, 3.0, 5.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-1.4013671875, -1.3653335571289062, -1.3292999267578125, -1.2932662963867188, -1.257232666015625, -1.2211990356445312, -1.1851654052734375, -1.1491317749023438, -1.11309814453125, -1.0770645141601562, -1.0410308837890625, -1.0049972534179688, -0.968963623046875, -0.9329299926757812, -0.8968963623046875, -0.8608627319335938, -0.8248291015625, -0.7887954711914062, -0.7527618408203125, -0.7167282104492188, -0.680694580078125, -0.6446609497070312, -0.6086273193359375, -0.5725936889648438, -0.53656005859375, -0.5005264282226562, -0.4644927978515625, -0.42845916748046875, -0.392425537109375, -0.35639190673828125, -0.3203582763671875, -0.28432464599609375, -0.248291015625, -0.21225738525390625, -0.1762237548828125, -0.14019012451171875, -0.104156494140625, -0.06812286376953125, -0.0320892333984375, 0.00394439697265625, 0.03997802734375, 0.07601165771484375, 0.1120452880859375, 0.14807891845703125, 0.184112548828125, 0.22014617919921875, 0.2561798095703125, 0.29221343994140625, 0.3282470703125, 0.36428070068359375, 0.4003143310546875, 0.43634796142578125, 0.472381591796875, 0.5084152221679688, 0.5444488525390625, 0.5804824829101562, 0.61651611328125, 0.6525497436523438, 0.6885833740234375, 0.7246170043945312, 0.760650634765625, 0.7966842651367188, 0.8327178955078125, 0.8687515258789062, 0.90478515625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 10.0, 6.0, 16.0, 27.0, 31.0, 62.0, 115.0, 276.0, 903.0, 4485.0, 47570.0, 4093092.0, 42043.0, 4149.0, 890.0, 299.0, 131.0, 66.0, 48.0, 29.0, 17.0, 10.0, 11.0, 2.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.0234375, -3.779296875, -3.53515625, -3.291015625, -3.046875, -2.802734375, -2.55859375, -2.314453125, -2.0703125, -1.826171875, -1.58203125, -1.337890625, -1.09375, -0.849609375, -0.60546875, -0.361328125, -0.1171875, 0.126953125, 0.37109375, 0.615234375, 0.859375, 1.103515625, 1.34765625, 1.591796875, 1.8359375, 2.080078125, 2.32421875, 2.568359375, 2.8125, 3.056640625, 3.30078125, 3.544921875, 3.7890625, 4.033203125, 4.27734375, 4.521484375, 4.765625, 5.009765625, 5.25390625, 5.498046875, 5.7421875, 5.986328125, 6.23046875, 6.474609375, 6.71875, 6.962890625, 7.20703125, 7.451171875, 7.6953125, 7.939453125, 8.18359375, 8.427734375, 8.671875, 8.916015625, 9.16015625, 9.404296875, 9.6484375, 9.892578125, 10.13671875, 10.380859375, 10.625, 10.869140625, 11.11328125, 11.357421875, 11.6015625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 4.0, 6.0, 6.0, 12.0, 11.0, 18.0, 18.0, 44.0, 85.0, 254.0, 2818.0, 508.0, 121.0, 61.0, 29.0, 19.0, 14.0, 13.0, 7.0, 10.0, 3.0, 7.0, 2.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.125, -1.07061767578125, -1.0162353515625, -0.96185302734375, -0.907470703125, -0.85308837890625, -0.7987060546875, -0.74432373046875, -0.68994140625, -0.63555908203125, -0.5811767578125, -0.52679443359375, -0.472412109375, -0.41802978515625, -0.3636474609375, -0.30926513671875, -0.2548828125, -0.20050048828125, -0.1461181640625, -0.09173583984375, -0.037353515625, 0.01702880859375, 0.0714111328125, 0.12579345703125, 0.18017578125, 0.23455810546875, 0.2889404296875, 0.34332275390625, 0.397705078125, 0.45208740234375, 0.5064697265625, 0.56085205078125, 0.615234375, 0.66961669921875, 0.7239990234375, 0.77838134765625, 0.832763671875, 0.88714599609375, 0.9415283203125, 0.99591064453125, 1.05029296875, 1.10467529296875, 1.1590576171875, 1.21343994140625, 1.267822265625, 1.32220458984375, 1.3765869140625, 1.43096923828125, 1.4853515625, 1.53973388671875, 1.5941162109375, 1.64849853515625, 1.702880859375, 1.75726318359375, 1.8116455078125, 1.86602783203125, 1.92041015625, 1.97479248046875, 2.0291748046875, 2.08355712890625, 2.137939453125, 2.19232177734375, 2.2467041015625, 2.30108642578125, 2.35546875]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 0.0, 2.0, 8.0, 31.0, 38.0, 73.0, 139.0, 208.0, 202.0, 123.0, 72.0, 46.0, 29.0, 14.0, 6.0, 6.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.981631755828857, -6.719402313232422, -6.457172870635986, -6.194943428039551, -5.932713985443115, -5.67048454284668, -5.408255100250244, -5.146025657653809, -4.883796215057373, -4.6215667724609375, -4.359337329864502, -4.097107887268066, -3.834878444671631, -3.5726490020751953, -3.3104195594787598, -3.048190116882324, -2.7859606742858887, -2.523731231689453, -2.2615017890930176, -1.999272346496582, -1.7370429039001465, -1.474813461303711, -1.2125840187072754, -0.9503545761108398, -0.6881251335144043, -0.42589569091796875, -0.1636662483215332, 0.09856319427490234, 0.3607926368713379, 0.6230220794677734, 0.885251522064209, 1.1474809646606445, 1.4097099304199219, 1.6719393730163574, 1.934168815612793, 2.1963982582092285, 2.458627700805664, 2.7208571434020996, 2.983086585998535, 3.2453160285949707, 3.5075454711914062, 3.769774913787842, 4.032004356384277, 4.294233798980713, 4.556463241577148, 4.818692684173584, 5.0809221267700195, 5.343151569366455, 5.605381011962891, 5.867610454559326, 6.129839897155762, 6.392069339752197, 6.654298782348633, 6.916528224945068, 7.178757667541504, 7.4409871101379395, 7.703216552734375, 7.9654459953308105, 8.227675437927246, 8.489904403686523, 8.752134323120117, 9.014364242553711, 9.276593208312988, 9.538822174072266, 9.80105209350586]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 2.0, 6.0, 7.0, 8.0, 8.0, 10.0, 11.0, 14.0, 14.0, 14.0, 31.0, 33.0, 22.0, 26.0, 33.0, 54.0, 36.0, 49.0, 56.0, 47.0, 50.0, 47.0, 34.0, 37.0, 36.0, 42.0, 33.0, 26.0, 30.0, 26.0, 25.0, 18.0, 17.0, 25.0, 20.0, 12.0, 11.0, 12.0, 7.0, 4.0, 4.0, 4.0, 2.0, 0.0, 5.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.6212024688720703, -3.505706310272217, -3.3902103900909424, -3.274714231491089, -3.1592180728912354, -3.043722152709961, -2.9282259941101074, -2.812729835510254, -2.6972339153289795, -2.581737756729126, -2.4662418365478516, -2.350745677947998, -2.2352495193481445, -2.11975359916687, -2.0042574405670166, -1.8887614011764526, -1.7732652425765991, -1.6577692031860352, -1.5422730445861816, -1.4267770051956177, -1.3112809658050537, -1.1957848072052002, -1.0802887678146362, -0.9647927284240723, -0.8492966294288635, -0.7338005304336548, -0.6183044910430908, -0.5028083920478821, -0.3873123228549957, -0.2718162536621094, -0.15632015466690063, -0.04082411527633667, 0.07467198371887207, 0.19016805291175842, 0.3056641221046448, 0.4211602210998535, 0.5366562604904175, 0.6521523594856262, 0.767648458480835, 0.8831444978713989, 0.9986405968666077, 1.1141366958618164, 1.2296327352523804, 1.3451287746429443, 1.4606249332427979, 1.5761209726333618, 1.6916170120239258, 1.8071131706237793, 1.9226092100143433, 2.0381052494049072, 2.1536014080047607, 2.269097328186035, 2.3845934867858887, 2.500089645385742, 2.6155858039855957, 2.73108172416687, 2.8465778827667236, 2.962074041366577, 3.0775699615478516, 3.193066120147705, 3.3085622787475586, 3.424058198928833, 3.5395543575286865, 3.655050277709961, 3.7705464363098145]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 9.0, 4.0, 10.0, 6.0, 14.0, 13.0, 22.0, 26.0, 57.0, 85.0, 124.0, 197.0, 387.0, 682.0, 1289.0, 2468.0, 5205.0, 11719.0, 28256.0, 71076.0, 173117.0, 321624.0, 248211.0, 108391.0, 43269.0, 17448.0, 7600.0, 3504.0, 1651.0, 861.0, 475.0, 266.0, 158.0, 118.0, 68.0, 54.0, 28.0, 12.0, 19.0, 6.0, 4.0, 6.0, 3.0, 1.0, 4.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.69140625, -1.6363677978515625, -1.581329345703125, -1.5262908935546875, -1.47125244140625, -1.4162139892578125, -1.361175537109375, -1.3061370849609375, -1.2510986328125, -1.1960601806640625, -1.141021728515625, -1.0859832763671875, -1.03094482421875, -0.9759063720703125, -0.920867919921875, -0.8658294677734375, -0.810791015625, -0.7557525634765625, -0.700714111328125, -0.6456756591796875, -0.59063720703125, -0.5355987548828125, -0.480560302734375, -0.4255218505859375, -0.3704833984375, -0.3154449462890625, -0.260406494140625, -0.2053680419921875, -0.15032958984375, -0.0952911376953125, -0.040252685546875, 0.0147857666015625, 0.06982421875, 0.1248626708984375, 0.179901123046875, 0.2349395751953125, 0.28997802734375, 0.3450164794921875, 0.400054931640625, 0.4550933837890625, 0.5101318359375, 0.5651702880859375, 0.620208740234375, 0.6752471923828125, 0.73028564453125, 0.7853240966796875, 0.840362548828125, 0.8954010009765625, 0.950439453125, 1.0054779052734375, 1.060516357421875, 1.1155548095703125, 1.17059326171875, 1.2256317138671875, 1.280670166015625, 1.3357086181640625, 1.3907470703125, 1.4457855224609375, 1.500823974609375, 1.5558624267578125, 1.61090087890625, 1.6659393310546875, 1.720977783203125, 1.7760162353515625, 1.8310546875]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 6.0, 9.0, 10.0, 11.0, 26.0, 31.0, 25.0, 32.0, 44.0, 39.0, 62.0, 55.0, 51.0, 54.0, 64.0, 66.0, 49.0, 54.0, 54.0, 49.0, 39.0, 33.0, 26.0, 30.0, 23.0, 16.0, 13.0, 14.0, 3.0, 6.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.390625, -1.352691650390625, -1.31475830078125, -1.276824951171875, -1.2388916015625, -1.200958251953125, -1.16302490234375, -1.125091552734375, -1.087158203125, -1.049224853515625, -1.01129150390625, -0.973358154296875, -0.9354248046875, -0.897491455078125, -0.85955810546875, -0.821624755859375, -0.78369140625, -0.745758056640625, -0.70782470703125, -0.669891357421875, -0.6319580078125, -0.594024658203125, -0.55609130859375, -0.518157958984375, -0.480224609375, -0.442291259765625, -0.40435791015625, -0.366424560546875, -0.3284912109375, -0.290557861328125, -0.25262451171875, -0.214691162109375, -0.1767578125, -0.138824462890625, -0.10089111328125, -0.062957763671875, -0.0250244140625, 0.012908935546875, 0.05084228515625, 0.088775634765625, 0.126708984375, 0.164642333984375, 0.20257568359375, 0.240509033203125, 0.2784423828125, 0.316375732421875, 0.35430908203125, 0.392242431640625, 0.43017578125, 0.468109130859375, 0.50604248046875, 0.543975830078125, 0.5819091796875, 0.619842529296875, 0.65777587890625, 0.695709228515625, 0.733642578125, 0.771575927734375, 0.80950927734375, 0.847442626953125, 0.8853759765625, 0.923309326171875, 0.96124267578125, 0.999176025390625, 1.037109375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 5.0, 3.0, 9.0, 11.0, 6.0, 11.0, 15.0, 17.0, 23.0, 33.0, 37.0, 54.0, 74.0, 92.0, 137.0, 212.0, 339.0, 568.0, 1193.0, 2660.0, 6667.0, 18746.0, 59692.0, 206073.0, 439029.0, 217723.0, 62744.0, 19549.0, 6973.0, 2883.0, 1217.0, 654.0, 348.0, 216.0, 132.0, 100.0, 71.0, 53.0, 45.0, 26.0, 25.0, 25.0, 11.0, 12.0, 12.0, 7.0, 5.0, 9.0, 5.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 3.0], "bins": [-2.185546875, -2.11834716796875, -2.0511474609375, -1.98394775390625, -1.916748046875, -1.84954833984375, -1.7823486328125, -1.71514892578125, -1.64794921875, -1.58074951171875, -1.5135498046875, -1.44635009765625, -1.379150390625, -1.31195068359375, -1.2447509765625, -1.17755126953125, -1.1103515625, -1.04315185546875, -0.9759521484375, -0.90875244140625, -0.841552734375, -0.77435302734375, -0.7071533203125, -0.63995361328125, -0.57275390625, -0.50555419921875, -0.4383544921875, -0.37115478515625, -0.303955078125, -0.23675537109375, -0.1695556640625, -0.10235595703125, -0.03515625, 0.03204345703125, 0.0992431640625, 0.16644287109375, 0.233642578125, 0.30084228515625, 0.3680419921875, 0.43524169921875, 0.50244140625, 0.56964111328125, 0.6368408203125, 0.70404052734375, 0.771240234375, 0.83843994140625, 0.9056396484375, 0.97283935546875, 1.0400390625, 1.10723876953125, 1.1744384765625, 1.24163818359375, 1.308837890625, 1.37603759765625, 1.4432373046875, 1.51043701171875, 1.57763671875, 1.64483642578125, 1.7120361328125, 1.77923583984375, 1.846435546875, 1.91363525390625, 1.9808349609375, 2.04803466796875, 2.115234375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 7.0, 6.0, 8.0, 6.0, 10.0, 11.0, 22.0, 22.0, 23.0, 22.0, 23.0, 36.0, 27.0, 37.0, 33.0, 40.0, 41.0, 34.0, 36.0, 40.0, 41.0, 45.0, 47.0, 48.0, 33.0, 34.0, 29.0, 41.0, 22.0, 18.0, 28.0, 18.0, 21.0, 11.0, 22.0, 9.0, 9.0, 11.0, 9.0, 4.0, 5.0, 4.0, 2.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.818359375, -3.69647216796875, -3.5745849609375, -3.45269775390625, -3.330810546875, -3.20892333984375, -3.0870361328125, -2.96514892578125, -2.84326171875, -2.72137451171875, -2.5994873046875, -2.47760009765625, -2.355712890625, -2.23382568359375, -2.1119384765625, -1.99005126953125, -1.8681640625, -1.74627685546875, -1.6243896484375, -1.50250244140625, -1.380615234375, -1.25872802734375, -1.1368408203125, -1.01495361328125, -0.89306640625, -0.77117919921875, -0.6492919921875, -0.52740478515625, -0.405517578125, -0.28363037109375, -0.1617431640625, -0.03985595703125, 0.08203125, 0.20391845703125, 0.3258056640625, 0.44769287109375, 0.569580078125, 0.69146728515625, 0.8133544921875, 0.93524169921875, 1.05712890625, 1.17901611328125, 1.3009033203125, 1.42279052734375, 1.544677734375, 1.66656494140625, 1.7884521484375, 1.91033935546875, 2.0322265625, 2.15411376953125, 2.2760009765625, 2.39788818359375, 2.519775390625, 2.64166259765625, 2.7635498046875, 2.88543701171875, 3.00732421875, 3.12921142578125, 3.2510986328125, 3.37298583984375, 3.494873046875, 3.61676025390625, 3.7386474609375, 3.86053466796875, 3.982421875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 5.0, 10.0, 18.0, 26.0, 63.0, 170.0, 378.0, 1198.0, 5168.0, 62979.0, 895080.0, 75787.0, 5715.0, 1261.0, 389.0, 181.0, 73.0, 35.0, 19.0, 3.0, 1.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.623046875, -2.534210205078125, -2.44537353515625, -2.356536865234375, -2.2677001953125, -2.178863525390625, -2.09002685546875, -2.001190185546875, -1.912353515625, -1.823516845703125, -1.73468017578125, -1.645843505859375, -1.5570068359375, -1.468170166015625, -1.37933349609375, -1.290496826171875, -1.20166015625, -1.112823486328125, -1.02398681640625, -0.935150146484375, -0.8463134765625, -0.757476806640625, -0.66864013671875, -0.579803466796875, -0.490966796875, -0.402130126953125, -0.31329345703125, -0.224456787109375, -0.1356201171875, -0.046783447265625, 0.04205322265625, 0.130889892578125, 0.2197265625, 0.308563232421875, 0.39739990234375, 0.486236572265625, 0.5750732421875, 0.663909912109375, 0.75274658203125, 0.841583251953125, 0.930419921875, 1.019256591796875, 1.10809326171875, 1.196929931640625, 1.2857666015625, 1.374603271484375, 1.46343994140625, 1.552276611328125, 1.64111328125, 1.729949951171875, 1.81878662109375, 1.907623291015625, 1.9964599609375, 2.085296630859375, 2.17413330078125, 2.262969970703125, 2.351806640625, 2.440643310546875, 2.52947998046875, 2.618316650390625, 2.7071533203125, 2.795989990234375, 2.88482666015625, 2.973663330078125, 3.0625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 6.0, 11.0, 17.0, 27.0, 60.0, 117.0, 176.0, 218.0, 166.0, 89.0, 53.0, 25.0, 14.0, 9.0, 6.0, 1.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004024505615234375, -0.0003882572054862976, -0.0003740638494491577, -0.0003598704934120178, -0.00034567713737487793, -0.00033148378133773804, -0.00031729042530059814, -0.00030309706926345825, -0.00028890371322631836, -0.00027471035718917847, -0.0002605170011520386, -0.0002463236451148987, -0.0002321302890777588, -0.0002179369330406189, -0.000203743577003479, -0.0001895502209663391, -0.00017535686492919922, -0.00016116350889205933, -0.00014697015285491943, -0.00013277679681777954, -0.00011858344078063965, -0.00010439008474349976, -9.019672870635986e-05, -7.600337266921997e-05, -6.181001663208008e-05, -4.7616660594940186e-05, -3.342330455780029e-05, -1.92299485206604e-05, -5.036592483520508e-06, 9.156763553619385e-06, 2.3350119590759277e-05, 3.754347562789917e-05, 5.173683166503906e-05, 6.593018770217896e-05, 8.012354373931885e-05, 9.431689977645874e-05, 0.00010851025581359863, 0.00012270361185073853, 0.00013689696788787842, 0.0001510903239250183, 0.0001652836799621582, 0.0001794770359992981, 0.000193670392036438, 0.00020786374807357788, 0.00022205710411071777, 0.00023625046014785767, 0.00025044381618499756, 0.00026463717222213745, 0.00027883052825927734, 0.00029302388429641724, 0.00030721724033355713, 0.000321410596370697, 0.0003356039524078369, 0.0003497973084449768, 0.0003639906644821167, 0.0003781840205192566, 0.0003923773765563965, 0.0004065707325935364, 0.00042076408863067627, 0.00043495744466781616, 0.00044915080070495605, 0.00046334415674209595, 0.00047753751277923584, 0.0004917308688163757, 0.0005059242248535156]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 5.0, 0.0, 5.0, 1.0, 4.0, 13.0, 15.0, 21.0, 37.0, 65.0, 157.0, 405.0, 1046.0, 3320.0, 14266.0, 116808.0, 709378.0, 177593.0, 19212.0, 4092.0, 1291.0, 438.0, 179.0, 70.0, 45.0, 28.0, 12.0, 9.0, 5.0, 8.0, 8.0, 3.0, 2.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7490234375, -1.69183349609375, -1.6346435546875, -1.57745361328125, -1.520263671875, -1.46307373046875, -1.4058837890625, -1.34869384765625, -1.29150390625, -1.23431396484375, -1.1771240234375, -1.11993408203125, -1.062744140625, -1.00555419921875, -0.9483642578125, -0.89117431640625, -0.833984375, -0.77679443359375, -0.7196044921875, -0.66241455078125, -0.605224609375, -0.54803466796875, -0.4908447265625, -0.43365478515625, -0.37646484375, -0.31927490234375, -0.2620849609375, -0.20489501953125, -0.147705078125, -0.09051513671875, -0.0333251953125, 0.02386474609375, 0.0810546875, 0.13824462890625, 0.1954345703125, 0.25262451171875, 0.309814453125, 0.36700439453125, 0.4241943359375, 0.48138427734375, 0.53857421875, 0.59576416015625, 0.6529541015625, 0.71014404296875, 0.767333984375, 0.82452392578125, 0.8817138671875, 0.93890380859375, 0.99609375, 1.05328369140625, 1.1104736328125, 1.16766357421875, 1.224853515625, 1.28204345703125, 1.3392333984375, 1.39642333984375, 1.45361328125, 1.51080322265625, 1.5679931640625, 1.62518310546875, 1.682373046875, 1.73956298828125, 1.7967529296875, 1.85394287109375, 1.9111328125]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 4.0, 2.0, 3.0, 3.0, 6.0, 6.0, 7.0, 5.0, 10.0, 27.0, 8.0, 29.0, 28.0, 22.0, 32.0, 32.0, 54.0, 62.0, 65.0, 72.0, 66.0, 73.0, 56.0, 55.0, 54.0, 36.0, 37.0, 30.0, 26.0, 14.0, 22.0, 8.0, 7.0, 8.0, 8.0, 8.0, 1.0, 6.0, 2.0, 1.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.89990234375, -0.8688430786132812, -0.8377838134765625, -0.8067245483398438, -0.775665283203125, -0.7446060180664062, -0.7135467529296875, -0.6824874877929688, -0.65142822265625, -0.6203689575195312, -0.5893096923828125, -0.5582504272460938, -0.527191162109375, -0.49613189697265625, -0.4650726318359375, -0.43401336669921875, -0.4029541015625, -0.37189483642578125, -0.3408355712890625, -0.30977630615234375, -0.278717041015625, -0.24765777587890625, -0.2165985107421875, -0.18553924560546875, -0.15447998046875, -0.12342071533203125, -0.0923614501953125, -0.06130218505859375, -0.030242919921875, 0.00081634521484375, 0.0318756103515625, 0.06293487548828125, 0.093994140625, 0.12505340576171875, 0.1561126708984375, 0.18717193603515625, 0.218231201171875, 0.24929046630859375, 0.2803497314453125, 0.31140899658203125, 0.34246826171875, 0.37352752685546875, 0.4045867919921875, 0.43564605712890625, 0.466705322265625, 0.49776458740234375, 0.5288238525390625, 0.5598831176757812, 0.5909423828125, 0.6220016479492188, 0.6530609130859375, 0.6841201782226562, 0.715179443359375, 0.7462387084960938, 0.7772979736328125, 0.8083572387695312, 0.83941650390625, 0.8704757690429688, 0.9015350341796875, 0.9325942993164062, 0.963653564453125, 0.9947128295898438, 1.0257720947265625, 1.0568313598632812, 1.087890625]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 12.0, 31.0, 94.0, 180.0, 344.0, 231.0, 73.0, 32.0, 11.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-75.13302612304688, -73.62413024902344, -72.115234375, -70.60633850097656, -69.09744262695312, -67.58853912353516, -66.07964324951172, -64.57074737548828, -63.061851501464844, -61.552955627441406, -60.04405975341797, -58.535160064697266, -57.02626419067383, -55.51736831665039, -54.00846862792969, -52.49957275390625, -50.99067687988281, -49.481781005859375, -47.97288513183594, -46.463985443115234, -44.9550895690918, -43.44619369506836, -41.937294006347656, -40.42839813232422, -38.91950225830078, -37.410606384277344, -35.901710510253906, -34.3928108215332, -32.883914947509766, -31.375019073486328, -29.866121292114258, -28.357223510742188, -26.848325729370117, -25.339427947998047, -23.83053207397461, -22.321636199951172, -20.8127384185791, -19.30384063720703, -17.794944763183594, -16.286048889160156, -14.777151107788086, -13.268254280090332, -11.759357452392578, -10.250460624694824, -8.74156379699707, -7.232666969299316, -5.7237701416015625, -4.214873313903809, -2.7059764862060547, -1.1970796585083008, 0.3118171691894531, 1.820713996887207, 3.329610824584961, 4.838507652282715, 6.347404479980469, 7.856301307678223, 9.365198135375977, 10.87409496307373, 12.382991790771484, 13.891888618469238, 15.400785446166992, 16.909683227539062, 18.4185791015625, 19.927474975585938, 21.436372756958008]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 5.0, 3.0, 2.0, 5.0, 5.0, 7.0, 5.0, 12.0, 12.0, 21.0, 15.0, 32.0, 11.0, 28.0, 32.0, 27.0, 35.0, 44.0, 43.0, 54.0, 51.0, 41.0, 37.0, 44.0, 37.0, 51.0, 47.0, 33.0, 29.0, 24.0, 41.0, 23.0, 22.0, 22.0, 19.0, 23.0, 15.0, 12.0, 9.0, 11.0, 6.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.000274658203125, -18.28957748413086, -17.57887840270996, -16.868181228637695, -16.15748405456543, -15.446785926818848, -14.736087799072266, -14.025390625, -13.314692497253418, -12.603994369506836, -11.89329719543457, -11.182599067687988, -10.471900939941406, -9.76120376586914, -9.050505638122559, -8.339807510375977, -7.629110336303711, -6.918412685394287, -6.207715034484863, -5.497016906738281, -4.786319255828857, -4.075621604919434, -3.3649234771728516, -2.6542258262634277, -1.943528175354004, -1.2328304052352905, -0.5221326351165771, 0.18856525421142578, 0.8992629051208496, 1.6099605560302734, 2.3206586837768555, 3.0313563346862793, 3.742053985595703, 4.452751636505127, 5.163449287414551, 5.874147415161133, 6.584845066070557, 7.2955427169799805, 8.006240844726562, 8.716938018798828, 9.42763614654541, 10.138334274291992, 10.849031448364258, 11.55972957611084, 12.270427703857422, 12.981124877929688, 13.69182300567627, 14.402521133422852, 15.113218307495117, 15.8239164352417, 16.53461456298828, 17.245311737060547, 17.956008911132812, 18.666706085205078, 19.377405166625977, 20.088102340698242, 20.79880142211914, 21.509498596191406, 22.220197677612305, 22.93089485168457, 23.641592025756836, 24.352291107177734, 25.06298828125, 25.773685455322266, 26.48438262939453]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 1.0, 7.0, 12.0, 16.0, 22.0, 27.0, 66.0, 71.0, 141.0, 219.0, 374.0, 669.0, 1189.0, 2191.0, 4621.0, 11013.0, 34278.0, 323318.0, 3723202.0, 63950.0, 16153.0, 6253.0, 2908.0, 1450.0, 871.0, 469.0, 260.0, 188.0, 108.0, 78.0, 51.0, 35.0, 23.0, 16.0, 8.0, 8.0, 5.0, 7.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.736328125, -3.6358642578125, -3.535400390625, -3.4349365234375, -3.33447265625, -3.2340087890625, -3.133544921875, -3.0330810546875, -2.9326171875, -2.8321533203125, -2.731689453125, -2.6312255859375, -2.53076171875, -2.4302978515625, -2.329833984375, -2.2293701171875, -2.12890625, -2.0284423828125, -1.927978515625, -1.8275146484375, -1.72705078125, -1.6265869140625, -1.526123046875, -1.4256591796875, -1.3251953125, -1.2247314453125, -1.124267578125, -1.0238037109375, -0.92333984375, -0.8228759765625, -0.722412109375, -0.6219482421875, -0.521484375, -0.4210205078125, -0.320556640625, -0.2200927734375, -0.11962890625, -0.0191650390625, 0.081298828125, 0.1817626953125, 0.2822265625, 0.3826904296875, 0.483154296875, 0.5836181640625, 0.68408203125, 0.7845458984375, 0.885009765625, 0.9854736328125, 1.0859375, 1.1864013671875, 1.286865234375, 1.3873291015625, 1.48779296875, 1.5882568359375, 1.688720703125, 1.7891845703125, 1.8896484375, 1.9901123046875, 2.090576171875, 2.1910400390625, 2.29150390625, 2.3919677734375, 2.492431640625, 2.5928955078125, 2.693359375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 6.0, 6.0, 10.0, 15.0, 20.0, 25.0, 21.0, 30.0, 38.0, 45.0, 42.0, 46.0, 64.0, 50.0, 50.0, 58.0, 41.0, 59.0, 52.0, 55.0, 53.0, 43.0, 26.0, 32.0, 31.0, 14.0, 18.0, 12.0, 13.0, 10.0, 7.0, 6.0, 2.0, 3.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.427734375, -1.3866119384765625, -1.345489501953125, -1.3043670654296875, -1.26324462890625, -1.2221221923828125, -1.180999755859375, -1.1398773193359375, -1.0987548828125, -1.0576324462890625, -1.016510009765625, -0.9753875732421875, -0.93426513671875, -0.8931427001953125, -0.852020263671875, -0.8108978271484375, -0.769775390625, -0.7286529541015625, -0.687530517578125, -0.6464080810546875, -0.60528564453125, -0.5641632080078125, -0.523040771484375, -0.4819183349609375, -0.4407958984375, -0.3996734619140625, -0.358551025390625, -0.3174285888671875, -0.27630615234375, -0.2351837158203125, -0.194061279296875, -0.1529388427734375, -0.11181640625, -0.0706939697265625, -0.029571533203125, 0.0115509033203125, 0.05267333984375, 0.0937957763671875, 0.134918212890625, 0.1760406494140625, 0.2171630859375, 0.2582855224609375, 0.299407958984375, 0.3405303955078125, 0.38165283203125, 0.4227752685546875, 0.463897705078125, 0.5050201416015625, 0.546142578125, 0.5872650146484375, 0.628387451171875, 0.6695098876953125, 0.71063232421875, 0.7517547607421875, 0.792877197265625, 0.8339996337890625, 0.8751220703125, 0.9162445068359375, 0.957366943359375, 0.9984893798828125, 1.03961181640625, 1.0807342529296875, 1.121856689453125, 1.1629791259765625, 1.2041015625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 4.0, 4.0, 8.0, 14.0, 19.0, 19.0, 43.0, 53.0, 80.0, 128.0, 170.0, 218.0, 357.0, 705.0, 1369.0, 3111.0, 9167.0, 37717.0, 425138.0, 3623473.0, 69730.0, 14411.0, 4361.0, 1770.0, 788.0, 516.0, 323.0, 189.0, 125.0, 71.0, 57.0, 47.0, 29.0, 23.0, 19.0, 8.0, 15.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.794921875, -2.69207763671875, -2.5892333984375, -2.48638916015625, -2.383544921875, -2.28070068359375, -2.1778564453125, -2.07501220703125, -1.97216796875, -1.86932373046875, -1.7664794921875, -1.66363525390625, -1.560791015625, -1.45794677734375, -1.3551025390625, -1.25225830078125, -1.1494140625, -1.04656982421875, -0.9437255859375, -0.84088134765625, -0.738037109375, -0.63519287109375, -0.5323486328125, -0.42950439453125, -0.32666015625, -0.22381591796875, -0.1209716796875, -0.01812744140625, 0.084716796875, 0.18756103515625, 0.2904052734375, 0.39324951171875, 0.49609375, 0.59893798828125, 0.7017822265625, 0.80462646484375, 0.907470703125, 1.01031494140625, 1.1131591796875, 1.21600341796875, 1.31884765625, 1.42169189453125, 1.5245361328125, 1.62738037109375, 1.730224609375, 1.83306884765625, 1.9359130859375, 2.03875732421875, 2.1416015625, 2.24444580078125, 2.3472900390625, 2.45013427734375, 2.552978515625, 2.65582275390625, 2.7586669921875, 2.86151123046875, 2.96435546875, 3.06719970703125, 3.1700439453125, 3.27288818359375, 3.375732421875, 3.47857666015625, 3.5814208984375, 3.68426513671875, 3.787109375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 4.0, 4.0, 5.0, 4.0, 11.0, 11.0, 25.0, 33.0, 78.0, 196.0, 2557.0, 831.0, 150.0, 50.0, 43.0, 30.0, 15.0, 9.0, 7.0, 5.0, 3.0, 3.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9560546875, -1.8803558349609375, -1.804656982421875, -1.7289581298828125, -1.65325927734375, -1.5775604248046875, -1.501861572265625, -1.4261627197265625, -1.3504638671875, -1.2747650146484375, -1.199066162109375, -1.1233673095703125, -1.04766845703125, -0.9719696044921875, -0.896270751953125, -0.8205718994140625, -0.744873046875, -0.6691741943359375, -0.593475341796875, -0.5177764892578125, -0.44207763671875, -0.3663787841796875, -0.290679931640625, -0.2149810791015625, -0.1392822265625, -0.0635833740234375, 0.012115478515625, 0.0878143310546875, 0.16351318359375, 0.2392120361328125, 0.314910888671875, 0.3906097412109375, 0.46630859375, 0.5420074462890625, 0.617706298828125, 0.6934051513671875, 0.76910400390625, 0.8448028564453125, 0.920501708984375, 0.9962005615234375, 1.0718994140625, 1.1475982666015625, 1.223297119140625, 1.2989959716796875, 1.37469482421875, 1.4503936767578125, 1.526092529296875, 1.6017913818359375, 1.677490234375, 1.7531890869140625, 1.828887939453125, 1.9045867919921875, 1.98028564453125, 2.0559844970703125, 2.131683349609375, 2.2073822021484375, 2.2830810546875, 2.3587799072265625, 2.434478759765625, 2.5101776123046875, 2.58587646484375, 2.6615753173828125, 2.737274169921875, 2.8129730224609375, 2.888671875]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 5.0, 44.0, 172.0, 366.0, 291.0, 92.0, 23.0, 11.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.947605133056641, -7.31705904006958, -6.686513423919678, -6.055967330932617, -5.425421714782715, -4.794875621795654, -4.164329528808594, -3.5337839126586914, -2.903237819671631, -2.2726919651031494, -1.6421459913253784, -1.0116000175476074, -0.381054162979126, 0.24949169158935547, 0.880037784576416, 1.5105834007263184, 2.141129493713379, 2.7716753482818604, 3.402221202850342, 4.032767295837402, 4.663312911987305, 5.293859004974365, 5.924405097961426, 6.554950714111328, 7.185496807098389, 7.816042900085449, 8.446588516235352, 9.07713508605957, 9.707680702209473, 10.338226318359375, 10.968772888183594, 11.59931755065918, 12.229864120483398, 12.8604097366333, 13.49095630645752, 14.121501922607422, 14.752047538757324, 15.382593154907227, 16.013139724731445, 16.64368438720703, 17.27423095703125, 17.90477752685547, 18.535322189331055, 19.165868759155273, 19.796415328979492, 20.426959991455078, 21.057506561279297, 21.688053131103516, 22.318599700927734, 22.949146270751953, 23.57969093322754, 24.210237503051758, 24.840784072875977, 25.471328735351562, 26.10187530517578, 26.732421875, 27.362966537475586, 27.993513107299805, 28.62405776977539, 29.25460433959961, 29.885150909423828, 30.515695571899414, 31.146242141723633, 31.77678680419922, 32.40733337402344]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 2.0, 5.0, 7.0, 12.0, 16.0, 17.0, 19.0, 21.0, 23.0, 14.0, 20.0, 29.0, 32.0, 24.0, 34.0, 32.0, 52.0, 39.0, 31.0, 38.0, 45.0, 36.0, 42.0, 56.0, 37.0, 39.0, 34.0, 37.0, 32.0, 19.0, 31.0, 20.0, 21.0, 15.0, 13.0, 8.0, 10.0, 13.0, 5.0, 2.0, 7.0, 2.0, 2.0, 1.0, 4.0, 4.0, 2.0, 1.0, 1.0, 2.0, 2.0], "bins": [-4.228907585144043, -4.102432727813721, -3.9759578704833984, -3.849482774734497, -3.723007917404175, -3.5965330600738525, -3.470057964324951, -3.343583106994629, -3.2171082496643066, -3.0906333923339844, -2.964158535003662, -2.8376834392547607, -2.7112085819244385, -2.584733724594116, -2.458258628845215, -2.3317837715148926, -2.2053089141845703, -2.078834056854248, -1.9523590803146362, -1.8258841037750244, -1.6994092464447021, -1.5729343891143799, -1.446459412574768, -1.3199844360351562, -1.193509578704834, -1.0670347213745117, -0.9405597448348999, -0.8140848278999329, -0.6876099109649658, -0.5611349940299988, -0.43466007709503174, -0.3081851601600647, -0.18171072006225586, -0.05523580312728882, 0.07123911380767822, 0.19771403074264526, 0.3241889476776123, 0.45066386461257935, 0.5771387815475464, 0.7036136984825134, 0.8300886154174805, 0.9565635323524475, 1.0830384492874146, 1.2095134258270264, 1.3359882831573486, 1.462463140487671, 1.5889381170272827, 1.7154130935668945, 1.8418879508972168, 1.968362808227539, 2.0948376655578613, 2.2213127613067627, 2.347787618637085, 2.4742624759674072, 2.6007375717163086, 2.727212429046631, 2.853687286376953, 2.9801621437072754, 3.1066370010375977, 3.233112096786499, 3.3595869541168213, 3.4860618114471436, 3.612536907196045, 3.739011764526367, 3.8654866218566895]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 4.0, 7.0, 4.0, 9.0, 13.0, 18.0, 27.0, 39.0, 92.0, 116.0, 247.0, 442.0, 1034.0, 2541.0, 6634.0, 19694.0, 59457.0, 193888.0, 432443.0, 226469.0, 70049.0, 22398.0, 7763.0, 2869.0, 1219.0, 478.0, 257.0, 132.0, 72.0, 48.0, 23.0, 21.0, 20.0, 13.0, 7.0, 4.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.853515625, -2.770965576171875, -2.68841552734375, -2.605865478515625, -2.5233154296875, -2.440765380859375, -2.35821533203125, -2.275665283203125, -2.193115234375, -2.110565185546875, -2.02801513671875, -1.945465087890625, -1.8629150390625, -1.780364990234375, -1.69781494140625, -1.615264892578125, -1.53271484375, -1.450164794921875, -1.36761474609375, -1.285064697265625, -1.2025146484375, -1.119964599609375, -1.03741455078125, -0.954864501953125, -0.872314453125, -0.789764404296875, -0.70721435546875, -0.624664306640625, -0.5421142578125, -0.459564208984375, -0.37701416015625, -0.294464111328125, -0.2119140625, -0.129364013671875, -0.04681396484375, 0.035736083984375, 0.1182861328125, 0.200836181640625, 0.28338623046875, 0.365936279296875, 0.448486328125, 0.531036376953125, 0.61358642578125, 0.696136474609375, 0.7786865234375, 0.861236572265625, 0.94378662109375, 1.026336669921875, 1.10888671875, 1.191436767578125, 1.27398681640625, 1.356536865234375, 1.4390869140625, 1.521636962890625, 1.60418701171875, 1.686737060546875, 1.769287109375, 1.851837158203125, 1.93438720703125, 2.016937255859375, 2.0994873046875, 2.182037353515625, 2.26458740234375, 2.347137451171875, 2.4296875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 4.0, 10.0, 14.0, 14.0, 24.0, 18.0, 35.0, 31.0, 40.0, 45.0, 49.0, 61.0, 41.0, 52.0, 70.0, 59.0, 49.0, 59.0, 55.0, 47.0, 41.0, 33.0, 27.0, 21.0, 24.0, 16.0, 14.0, 11.0, 14.0, 8.0, 5.0, 5.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4287109375, -1.38525390625, -1.341796875, -1.29833984375, -1.2548828125, -1.21142578125, -1.16796875, -1.12451171875, -1.0810546875, -1.03759765625, -0.994140625, -0.95068359375, -0.9072265625, -0.86376953125, -0.8203125, -0.77685546875, -0.7333984375, -0.68994140625, -0.646484375, -0.60302734375, -0.5595703125, -0.51611328125, -0.47265625, -0.42919921875, -0.3857421875, -0.34228515625, -0.298828125, -0.25537109375, -0.2119140625, -0.16845703125, -0.125, -0.08154296875, -0.0380859375, 0.00537109375, 0.048828125, 0.09228515625, 0.1357421875, 0.17919921875, 0.22265625, 0.26611328125, 0.3095703125, 0.35302734375, 0.396484375, 0.43994140625, 0.4833984375, 0.52685546875, 0.5703125, 0.61376953125, 0.6572265625, 0.70068359375, 0.744140625, 0.78759765625, 0.8310546875, 0.87451171875, 0.91796875, 0.96142578125, 1.0048828125, 1.04833984375, 1.091796875, 1.13525390625, 1.1787109375, 1.22216796875, 1.265625, 1.30908203125, 1.3525390625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 13.0, 9.0, 16.0, 36.0, 55.0, 100.0, 137.0, 319.0, 695.0, 2226.0, 8169.0, 48740.0, 456057.0, 469934.0, 49883.0, 8534.0, 2190.0, 757.0, 317.0, 135.0, 61.0, 59.0, 45.0, 25.0, 19.0, 8.0, 7.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.6328125, -4.49658203125, -4.3603515625, -4.22412109375, -4.087890625, -3.95166015625, -3.8154296875, -3.67919921875, -3.54296875, -3.40673828125, -3.2705078125, -3.13427734375, -2.998046875, -2.86181640625, -2.7255859375, -2.58935546875, -2.453125, -2.31689453125, -2.1806640625, -2.04443359375, -1.908203125, -1.77197265625, -1.6357421875, -1.49951171875, -1.36328125, -1.22705078125, -1.0908203125, -0.95458984375, -0.818359375, -0.68212890625, -0.5458984375, -0.40966796875, -0.2734375, -0.13720703125, -0.0009765625, 0.13525390625, 0.271484375, 0.40771484375, 0.5439453125, 0.68017578125, 0.81640625, 0.95263671875, 1.0888671875, 1.22509765625, 1.361328125, 1.49755859375, 1.6337890625, 1.77001953125, 1.90625, 2.04248046875, 2.1787109375, 2.31494140625, 2.451171875, 2.58740234375, 2.7236328125, 2.85986328125, 2.99609375, 3.13232421875, 3.2685546875, 3.40478515625, 3.541015625, 3.67724609375, 3.8134765625, 3.94970703125, 4.0859375]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 8.0, 9.0, 4.0, 8.0, 13.0, 17.0, 16.0, 20.0, 21.0, 45.0, 33.0, 41.0, 37.0, 42.0, 59.0, 65.0, 50.0, 53.0, 38.0, 52.0, 45.0, 45.0, 54.0, 32.0, 29.0, 24.0, 35.0, 20.0, 18.0, 8.0, 17.0, 13.0, 6.0, 2.0, 9.0, 5.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.5703125, -5.39129638671875, -5.2122802734375, -5.03326416015625, -4.854248046875, -4.67523193359375, -4.4962158203125, -4.31719970703125, -4.13818359375, -3.95916748046875, -3.7801513671875, -3.60113525390625, -3.422119140625, -3.24310302734375, -3.0640869140625, -2.88507080078125, -2.7060546875, -2.52703857421875, -2.3480224609375, -2.16900634765625, -1.989990234375, -1.81097412109375, -1.6319580078125, -1.45294189453125, -1.27392578125, -1.09490966796875, -0.9158935546875, -0.73687744140625, -0.557861328125, -0.37884521484375, -0.1998291015625, -0.02081298828125, 0.158203125, 0.33721923828125, 0.5162353515625, 0.69525146484375, 0.874267578125, 1.05328369140625, 1.2322998046875, 1.41131591796875, 1.59033203125, 1.76934814453125, 1.9483642578125, 2.12738037109375, 2.306396484375, 2.48541259765625, 2.6644287109375, 2.84344482421875, 3.0224609375, 3.20147705078125, 3.3804931640625, 3.55950927734375, 3.738525390625, 3.91754150390625, 4.0965576171875, 4.27557373046875, 4.45458984375, 4.63360595703125, 4.8126220703125, 4.99163818359375, 5.170654296875, 5.34967041015625, 5.5286865234375, 5.70770263671875, 5.88671875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 7.0, 5.0, 6.0, 9.0, 10.0, 15.0, 14.0, 34.0, 42.0, 70.0, 98.0, 164.0, 310.0, 486.0, 697.0, 1184.0, 1986.0, 3733.0, 8238.0, 23471.0, 127525.0, 727740.0, 114146.0, 22056.0, 7798.0, 3633.0, 1958.0, 1175.0, 721.0, 436.0, 275.0, 167.0, 116.0, 57.0, 64.0, 32.0, 23.0, 16.0, 12.0, 11.0, 5.0, 1.0, 3.0, 2.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1.904296875, -1.8437957763671875, -1.783294677734375, -1.7227935791015625, -1.66229248046875, -1.6017913818359375, -1.541290283203125, -1.4807891845703125, -1.4202880859375, -1.3597869873046875, -1.299285888671875, -1.2387847900390625, -1.17828369140625, -1.1177825927734375, -1.057281494140625, -0.9967803955078125, -0.936279296875, -0.8757781982421875, -0.815277099609375, -0.7547760009765625, -0.69427490234375, -0.6337738037109375, -0.573272705078125, -0.5127716064453125, -0.4522705078125, -0.3917694091796875, -0.331268310546875, -0.2707672119140625, -0.21026611328125, -0.1497650146484375, -0.089263916015625, -0.0287628173828125, 0.03173828125, 0.0922393798828125, 0.152740478515625, 0.2132415771484375, 0.27374267578125, 0.3342437744140625, 0.394744873046875, 0.4552459716796875, 0.5157470703125, 0.5762481689453125, 0.636749267578125, 0.6972503662109375, 0.75775146484375, 0.8182525634765625, 0.878753662109375, 0.9392547607421875, 0.999755859375, 1.0602569580078125, 1.120758056640625, 1.1812591552734375, 1.24176025390625, 1.3022613525390625, 1.362762451171875, 1.4232635498046875, 1.4837646484375, 1.5442657470703125, 1.604766845703125, 1.6652679443359375, 1.72576904296875, 1.7862701416015625, 1.846771240234375, 1.9072723388671875, 1.9677734375]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 10.0, 1.0, 8.0, 21.0, 34.0, 54.0, 94.0, 152.0, 198.0, 171.0, 94.0, 48.0, 27.0, 35.0, 15.0, 8.0, 8.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00035643577575683594, -0.00034194812178611755, -0.00032746046781539917, -0.0003129728138446808, -0.0002984851598739624, -0.000283997505903244, -0.00026950985193252563, -0.00025502219796180725, -0.00024053454399108887, -0.00022604689002037048, -0.0002115592360496521, -0.00019707158207893372, -0.00018258392810821533, -0.00016809627413749695, -0.00015360862016677856, -0.00013912096619606018, -0.0001246333122253418, -0.00011014565825462341, -9.565800428390503e-05, -8.117035031318665e-05, -6.668269634246826e-05, -5.219504237174988e-05, -3.7707388401031494e-05, -2.321973443031311e-05, -8.732080459594727e-06, 5.755573511123657e-06, 2.024322748184204e-05, 3.4730881452560425e-05, 4.921853542327881e-05, 6.370618939399719e-05, 7.819384336471558e-05, 9.268149733543396e-05, 0.00010716915130615234, 0.00012165680527687073, 0.0001361444592475891, 0.0001506321132183075, 0.00016511976718902588, 0.00017960742115974426, 0.00019409507513046265, 0.00020858272910118103, 0.00022307038307189941, 0.0002375580370426178, 0.0002520456910133362, 0.00026653334498405457, 0.00028102099895477295, 0.00029550865292549133, 0.0003099963068962097, 0.0003244839608669281, 0.0003389716148376465, 0.00035345926880836487, 0.00036794692277908325, 0.00038243457674980164, 0.00039692223072052, 0.0004114098846912384, 0.0004258975386619568, 0.00044038519263267517, 0.00045487284660339355, 0.00046936050057411194, 0.0004838481545448303, 0.0004983358085155487, 0.0005128234624862671, 0.0005273111164569855, 0.0005417987704277039, 0.0005562864243984222, 0.0005707740783691406]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 4.0, 1.0, 4.0, 2.0, 4.0, 9.0, 23.0, 23.0, 24.0, 52.0, 56.0, 90.0, 150.0, 229.0, 388.0, 627.0, 1039.0, 2043.0, 4161.0, 10212.0, 36862.0, 280605.0, 604710.0, 78591.0, 16537.0, 6003.0, 2684.0, 1385.0, 792.0, 430.0, 281.0, 175.0, 133.0, 71.0, 54.0, 38.0, 21.0, 17.0, 10.0, 5.0, 8.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.037109375, -1.9792633056640625, -1.921417236328125, -1.8635711669921875, -1.80572509765625, -1.7478790283203125, -1.690032958984375, -1.6321868896484375, -1.5743408203125, -1.5164947509765625, -1.458648681640625, -1.4008026123046875, -1.34295654296875, -1.2851104736328125, -1.227264404296875, -1.1694183349609375, -1.111572265625, -1.0537261962890625, -0.995880126953125, -0.9380340576171875, -0.88018798828125, -0.8223419189453125, -0.764495849609375, -0.7066497802734375, -0.6488037109375, -0.5909576416015625, -0.533111572265625, -0.4752655029296875, -0.41741943359375, -0.3595733642578125, -0.301727294921875, -0.2438812255859375, -0.18603515625, -0.1281890869140625, -0.070343017578125, -0.0124969482421875, 0.04534912109375, 0.1031951904296875, 0.161041259765625, 0.2188873291015625, 0.2767333984375, 0.3345794677734375, 0.392425537109375, 0.4502716064453125, 0.50811767578125, 0.5659637451171875, 0.623809814453125, 0.6816558837890625, 0.739501953125, 0.7973480224609375, 0.855194091796875, 0.9130401611328125, 0.97088623046875, 1.0287322998046875, 1.086578369140625, 1.1444244384765625, 1.2022705078125, 1.2601165771484375, 1.317962646484375, 1.3758087158203125, 1.43365478515625, 1.4915008544921875, 1.549346923828125, 1.6071929931640625, 1.6650390625]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 5.0, 2.0, 5.0, 12.0, 11.0, 11.0, 20.0, 22.0, 46.0, 63.0, 104.0, 116.0, 145.0, 148.0, 90.0, 66.0, 43.0, 23.0, 21.0, 17.0, 13.0, 6.0, 6.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-3.1796875, -3.10321044921875, -3.0267333984375, -2.95025634765625, -2.873779296875, -2.79730224609375, -2.7208251953125, -2.64434814453125, -2.56787109375, -2.49139404296875, -2.4149169921875, -2.33843994140625, -2.261962890625, -2.18548583984375, -2.1090087890625, -2.03253173828125, -1.9560546875, -1.87957763671875, -1.8031005859375, -1.72662353515625, -1.650146484375, -1.57366943359375, -1.4971923828125, -1.42071533203125, -1.34423828125, -1.26776123046875, -1.1912841796875, -1.11480712890625, -1.038330078125, -0.96185302734375, -0.8853759765625, -0.80889892578125, -0.732421875, -0.65594482421875, -0.5794677734375, -0.50299072265625, -0.426513671875, -0.35003662109375, -0.2735595703125, -0.19708251953125, -0.12060546875, -0.04412841796875, 0.0323486328125, 0.10882568359375, 0.185302734375, 0.26177978515625, 0.3382568359375, 0.41473388671875, 0.4912109375, 0.56768798828125, 0.6441650390625, 0.72064208984375, 0.797119140625, 0.87359619140625, 0.9500732421875, 1.02655029296875, 1.10302734375, 1.17950439453125, 1.2559814453125, 1.33245849609375, 1.408935546875, 1.48541259765625, 1.5618896484375, 1.63836669921875, 1.71484375]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 2.0, 4.0, 6.0, 11.0, 11.0, 28.0, 38.0, 50.0, 77.0, 104.0, 95.0, 121.0, 109.0, 92.0, 64.0, 55.0, 38.0, 25.0, 23.0, 14.0, 13.0, 8.0, 7.0, 2.0, 6.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-22.11834144592285, -21.469161987304688, -20.819982528686523, -20.170801162719727, -19.521621704101562, -18.8724422454834, -18.223262786865234, -17.57408332824707, -16.924903869628906, -16.275724411010742, -15.626543998718262, -14.977364540100098, -14.328184127807617, -13.679004669189453, -13.029825210571289, -12.380645751953125, -11.731464385986328, -11.082284927368164, -10.433104515075684, -9.78392505645752, -9.134744644165039, -8.485565185546875, -7.836385726928711, -7.187205791473389, -6.538025856018066, -5.888845920562744, -5.239665985107422, -4.590486526489258, -3.9413065910339355, -3.2921266555786133, -2.64294695854187, -1.993767261505127, -1.3445873260498047, -0.695407509803772, -0.04622769355773926, 0.6029521226882935, 1.2521319389343262, 1.9013118743896484, 2.5504915714263916, 3.1996712684631348, 3.848851203918457, 4.498031139373779, 5.147211074829102, 5.796390533447266, 6.445570468902588, 7.09475040435791, 7.743929862976074, 8.393110275268555, 9.042289733886719, 9.691469192504883, 10.340649604797363, 10.989829063415527, 11.639009475708008, 12.288188934326172, 12.937368392944336, 13.5865478515625, 14.23572826385498, 14.884907722473145, 15.534088134765625, 16.18326759338379, 16.832447052001953, 17.48162841796875, 18.130807876586914, 18.779987335205078, 19.429166793823242]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 6.0, 2.0, 9.0, 15.0, 9.0, 13.0, 16.0, 23.0, 19.0, 27.0, 27.0, 32.0, 30.0, 36.0, 34.0, 37.0, 39.0, 42.0, 61.0, 52.0, 54.0, 43.0, 39.0, 48.0, 33.0, 33.0, 39.0, 27.0, 26.0, 21.0, 20.0, 14.0, 14.0, 12.0, 8.0, 7.0, 10.0, 6.0, 9.0, 2.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-30.519367218017578, -29.607492446899414, -28.695619583129883, -27.78374481201172, -26.871871948242188, -25.959997177124023, -25.04812240600586, -24.136249542236328, -23.224374771118164, -22.3125, -21.40062713623047, -20.488752365112305, -19.57687759399414, -18.66500473022461, -17.753129959106445, -16.84125518798828, -15.92938232421875, -15.017508506774902, -14.105634689331055, -13.19375991821289, -12.281886100769043, -11.370012283325195, -10.458137512207031, -9.546263694763184, -8.634389877319336, -7.722516059875488, -6.810641765594482, -5.898767471313477, -4.986893653869629, -4.075019836425781, -3.1631455421447754, -2.2512712478637695, -1.3393974304199219, -0.4275233745574951, 0.48435068130493164, 1.3962247371673584, 2.308098793029785, 3.219972610473633, 4.131846904754639, 5.0437211990356445, 5.955595016479492, 6.86746883392334, 7.779343128204346, 8.691217422485352, 9.6030912399292, 10.514965057373047, 11.426839828491211, 12.338713645935059, 13.250587463378906, 14.162461280822754, 15.074335098266602, 15.986209869384766, 16.898082733154297, 17.80995750427246, 18.721832275390625, 19.633705139160156, 20.54557991027832, 21.457454681396484, 22.369327545166016, 23.28120231628418, 24.193077087402344, 25.104949951171875, 26.01682472229004, 26.928699493408203, 27.840572357177734]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 0.0, 7.0, 7.0, 10.0, 8.0, 23.0, 29.0, 30.0, 43.0, 68.0, 111.0, 186.0, 341.0, 684.0, 1269.0, 2869.0, 7775.0, 41204.0, 4097025.0, 30619.0, 6601.0, 2629.0, 1269.0, 585.0, 336.0, 207.0, 119.0, 73.0, 46.0, 44.0, 22.0, 8.0, 10.0, 5.0, 4.0, 5.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.96875, -3.850067138671875, -3.73138427734375, -3.612701416015625, -3.4940185546875, -3.375335693359375, -3.25665283203125, -3.137969970703125, -3.019287109375, -2.900604248046875, -2.78192138671875, -2.663238525390625, -2.5445556640625, -2.425872802734375, -2.30718994140625, -2.188507080078125, -2.06982421875, -1.951141357421875, -1.83245849609375, -1.713775634765625, -1.5950927734375, -1.476409912109375, -1.35772705078125, -1.239044189453125, -1.120361328125, -1.001678466796875, -0.88299560546875, -0.764312744140625, -0.6456298828125, -0.526947021484375, -0.40826416015625, -0.289581298828125, -0.1708984375, -0.052215576171875, 0.06646728515625, 0.185150146484375, 0.3038330078125, 0.422515869140625, 0.54119873046875, 0.659881591796875, 0.778564453125, 0.897247314453125, 1.01593017578125, 1.134613037109375, 1.2532958984375, 1.371978759765625, 1.49066162109375, 1.609344482421875, 1.72802734375, 1.846710205078125, 1.96539306640625, 2.084075927734375, 2.2027587890625, 2.321441650390625, 2.44012451171875, 2.558807373046875, 2.677490234375, 2.796173095703125, 2.91485595703125, 3.033538818359375, 3.1522216796875, 3.270904541015625, 3.38958740234375, 3.508270263671875, 3.626953125]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 0.0, 2.0, 3.0, 4.0, 7.0, 7.0, 11.0, 13.0, 15.0, 13.0, 20.0, 27.0, 24.0, 37.0, 32.0, 31.0, 34.0, 49.0, 42.0, 45.0, 59.0, 58.0, 38.0, 37.0, 53.0, 39.0, 34.0, 38.0, 39.0, 31.0, 23.0, 20.0, 24.0, 22.0, 10.0, 9.0, 12.0, 9.0, 8.0, 6.0, 4.0, 6.0, 4.0, 1.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.4091796875, -1.3635406494140625, -1.317901611328125, -1.2722625732421875, -1.22662353515625, -1.1809844970703125, -1.135345458984375, -1.0897064208984375, -1.0440673828125, -0.9984283447265625, -0.952789306640625, -0.9071502685546875, -0.86151123046875, -0.8158721923828125, -0.770233154296875, -0.7245941162109375, -0.678955078125, -0.6333160400390625, -0.587677001953125, -0.5420379638671875, -0.49639892578125, -0.4507598876953125, -0.405120849609375, -0.3594818115234375, -0.3138427734375, -0.2682037353515625, -0.222564697265625, -0.1769256591796875, -0.13128662109375, -0.0856475830078125, -0.040008544921875, 0.0056304931640625, 0.05126953125, 0.0969085693359375, 0.142547607421875, 0.1881866455078125, 0.23382568359375, 0.2794647216796875, 0.325103759765625, 0.3707427978515625, 0.4163818359375, 0.4620208740234375, 0.507659912109375, 0.5532989501953125, 0.59893798828125, 0.6445770263671875, 0.690216064453125, 0.7358551025390625, 0.781494140625, 0.8271331787109375, 0.872772216796875, 0.9184112548828125, 0.96405029296875, 1.0096893310546875, 1.055328369140625, 1.1009674072265625, 1.1466064453125, 1.1922454833984375, 1.237884521484375, 1.2835235595703125, 1.32916259765625, 1.3748016357421875, 1.420440673828125, 1.4660797119140625, 1.51171875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 6.0, 4.0, 4.0, 5.0, 6.0, 10.0, 11.0, 22.0, 32.0, 26.0, 37.0, 48.0, 86.0, 128.0, 212.0, 294.0, 577.0, 892.0, 1639.0, 3117.0, 6928.0, 21678.0, 228040.0, 3883949.0, 29829.0, 8563.0, 3646.0, 1855.0, 1005.0, 564.0, 381.0, 240.0, 154.0, 82.0, 70.0, 44.0, 27.0, 19.0, 10.0, 8.0, 9.0, 9.0, 4.0, 8.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-2.638671875, -2.551025390625, -2.46337890625, -2.375732421875, -2.2880859375, -2.200439453125, -2.11279296875, -2.025146484375, -1.9375, -1.849853515625, -1.76220703125, -1.674560546875, -1.5869140625, -1.499267578125, -1.41162109375, -1.323974609375, -1.236328125, -1.148681640625, -1.06103515625, -0.973388671875, -0.8857421875, -0.798095703125, -0.71044921875, -0.622802734375, -0.53515625, -0.447509765625, -0.35986328125, -0.272216796875, -0.1845703125, -0.096923828125, -0.00927734375, 0.078369140625, 0.166015625, 0.253662109375, 0.34130859375, 0.428955078125, 0.5166015625, 0.604248046875, 0.69189453125, 0.779541015625, 0.8671875, 0.954833984375, 1.04248046875, 1.130126953125, 1.2177734375, 1.305419921875, 1.39306640625, 1.480712890625, 1.568359375, 1.656005859375, 1.74365234375, 1.831298828125, 1.9189453125, 2.006591796875, 2.09423828125, 2.181884765625, 2.26953125, 2.357177734375, 2.44482421875, 2.532470703125, 2.6201171875, 2.707763671875, 2.79541015625, 2.883056640625, 2.970703125]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 2.0, 6.0, 4.0, 4.0, 11.0, 19.0, 28.0, 60.0, 258.0, 3468.0, 100.0, 31.0, 23.0, 15.0, 11.0, 6.0, 6.0, 0.0, 6.0, 3.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7099609375, -0.689208984375, -0.66845703125, -0.647705078125, -0.626953125, -0.606201171875, -0.58544921875, -0.564697265625, -0.5439453125, -0.523193359375, -0.50244140625, -0.481689453125, -0.4609375, -0.440185546875, -0.41943359375, -0.398681640625, -0.3779296875, -0.357177734375, -0.33642578125, -0.315673828125, -0.294921875, -0.274169921875, -0.25341796875, -0.232666015625, -0.2119140625, -0.191162109375, -0.17041015625, -0.149658203125, -0.12890625, -0.108154296875, -0.08740234375, -0.066650390625, -0.0458984375, -0.025146484375, -0.00439453125, 0.016357421875, 0.037109375, 0.057861328125, 0.07861328125, 0.099365234375, 0.1201171875, 0.140869140625, 0.16162109375, 0.182373046875, 0.203125, 0.223876953125, 0.24462890625, 0.265380859375, 0.2861328125, 0.306884765625, 0.32763671875, 0.348388671875, 0.369140625, 0.389892578125, 0.41064453125, 0.431396484375, 0.4521484375, 0.472900390625, 0.49365234375, 0.514404296875, 0.53515625, 0.555908203125, 0.57666015625, 0.597412109375, 0.6181640625]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 7.0, 4.0, 8.0, 11.0, 20.0, 27.0, 57.0, 86.0, 100.0, 141.0, 111.0, 134.0, 118.0, 73.0, 45.0, 27.0, 17.0, 9.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0], "bins": [-3.56059193611145, -3.487452745437622, -3.414313554763794, -3.341174602508545, -3.268035411834717, -3.1948962211608887, -3.1217570304870605, -3.0486178398132324, -2.9754786491394043, -2.902339458465576, -2.829200267791748, -2.75606107711792, -2.682922124862671, -2.6097829341888428, -2.5366437435150146, -2.4635045528411865, -2.3903656005859375, -2.3172264099121094, -2.2440872192382812, -2.170948028564453, -2.097809076309204, -2.024669885635376, -1.9515306949615479, -1.8783915042877197, -1.8052523136138916, -1.7321131229400635, -1.658974051475525, -1.5858348608016968, -1.5126956701278687, -1.43955659866333, -1.366417407989502, -1.2932782173156738, -1.2201390266418457, -1.1469998359680176, -1.073860764503479, -1.0007215738296509, -0.9275823831558228, -0.8544432520866394, -0.781304121017456, -0.7081649303436279, -0.6350257992744446, -0.5618866682052612, -0.4887474775314331, -0.41560834646224976, -0.342469185590744, -0.2693300247192383, -0.19619089365005493, -0.1230517029762268, -0.04991257190704346, 0.023226581513881683, 0.09636573493480682, 0.16950488090515137, 0.2426440417766571, 0.31578320264816284, 0.3889223337173462, 0.4620615243911743, 0.5352006554603577, 0.608339786529541, 0.6814789772033691, 0.7546181082725525, 0.8277572393417358, 0.900896430015564, 0.9740355610847473, 1.0471746921539307, 1.1203138828277588]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 5.0, 8.0, 5.0, 5.0, 15.0, 15.0, 10.0, 16.0, 16.0, 19.0, 25.0, 17.0, 36.0, 27.0, 39.0, 49.0, 49.0, 49.0, 42.0, 39.0, 46.0, 43.0, 46.0, 46.0, 31.0, 48.0, 37.0, 29.0, 17.0, 25.0, 25.0, 27.0, 19.0, 10.0, 11.0, 6.0, 13.0, 5.0, 6.0, 8.0, 7.0, 1.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.0735180377960205, -1.0398602485656738, -1.0062024593353271, -0.9725446105003357, -0.9388867616653442, -0.9052289724349976, -0.8715711832046509, -0.8379133343696594, -0.804255485534668, -0.7705976963043213, -0.7369398474693298, -0.7032820582389832, -0.6696242094039917, -0.635966420173645, -0.6023086309432983, -0.5686507821083069, -0.5349929928779602, -0.5013352036476135, -0.46767735481262207, -0.4340195655822754, -0.40036171674728394, -0.36670392751693726, -0.3330461084842682, -0.2993882894515991, -0.26573047041893005, -0.232072651386261, -0.19841483235359192, -0.16475702822208405, -0.13109920918941498, -0.09744139015674591, -0.06378358602523804, -0.03012576699256897, 0.0035320520401000977, 0.03718986734747887, 0.07084768265485764, 0.1045054942369461, 0.13816331326961517, 0.17182113230228424, 0.20547893643379211, 0.23913675546646118, 0.27279457449913025, 0.3064523935317993, 0.3401102125644684, 0.37376803159713745, 0.40742582082748413, 0.4410836696624756, 0.47474145889282227, 0.508399248123169, 0.5420570969581604, 0.5757148861885071, 0.6093727350234985, 0.6430305242538452, 0.6766883730888367, 0.7103461623191833, 0.7440040111541748, 0.7776618003845215, 0.8113195896148682, 0.8449773788452148, 0.8786352276802063, 0.912293016910553, 0.9459508657455444, 0.9796086549758911, 1.0132664442062378, 1.046924352645874, 1.0805821418762207]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 7.0, 7.0, 7.0, 16.0, 22.0, 38.0, 51.0, 80.0, 134.0, 202.0, 347.0, 731.0, 1364.0, 3135.0, 6939.0, 16819.0, 44847.0, 127386.0, 325241.0, 322762.0, 124641.0, 44183.0, 16683.0, 6814.0, 3125.0, 1385.0, 633.0, 408.0, 185.0, 141.0, 91.0, 44.0, 34.0, 16.0, 15.0, 7.0, 6.0, 3.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.431640625, -2.353179931640625, -2.27471923828125, -2.196258544921875, -2.1177978515625, -2.039337158203125, -1.96087646484375, -1.882415771484375, -1.803955078125, -1.725494384765625, -1.64703369140625, -1.568572998046875, -1.4901123046875, -1.411651611328125, -1.33319091796875, -1.254730224609375, -1.17626953125, -1.097808837890625, -1.01934814453125, -0.940887451171875, -0.8624267578125, -0.783966064453125, -0.70550537109375, -0.627044677734375, -0.548583984375, -0.470123291015625, -0.39166259765625, -0.313201904296875, -0.2347412109375, -0.156280517578125, -0.07781982421875, 0.000640869140625, 0.0791015625, 0.157562255859375, 0.23602294921875, 0.314483642578125, 0.3929443359375, 0.471405029296875, 0.54986572265625, 0.628326416015625, 0.706787109375, 0.785247802734375, 0.86370849609375, 0.942169189453125, 1.0206298828125, 1.099090576171875, 1.17755126953125, 1.256011962890625, 1.33447265625, 1.412933349609375, 1.49139404296875, 1.569854736328125, 1.6483154296875, 1.726776123046875, 1.80523681640625, 1.883697509765625, 1.962158203125, 2.040618896484375, 2.11907958984375, 2.197540283203125, 2.2760009765625, 2.354461669921875, 2.43292236328125, 2.511383056640625, 2.58984375]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 5.0, 4.0, 4.0, 8.0, 9.0, 9.0, 18.0, 15.0, 15.0, 25.0, 27.0, 31.0, 34.0, 33.0, 35.0, 42.0, 40.0, 45.0, 45.0, 44.0, 53.0, 56.0, 50.0, 44.0, 33.0, 35.0, 44.0, 35.0, 23.0, 18.0, 18.0, 30.0, 15.0, 9.0, 12.0, 9.0, 10.0, 5.0, 3.0, 4.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1.4501953125, -1.40374755859375, -1.3572998046875, -1.31085205078125, -1.264404296875, -1.21795654296875, -1.1715087890625, -1.12506103515625, -1.07861328125, -1.03216552734375, -0.9857177734375, -0.93927001953125, -0.892822265625, -0.84637451171875, -0.7999267578125, -0.75347900390625, -0.70703125, -0.66058349609375, -0.6141357421875, -0.56768798828125, -0.521240234375, -0.47479248046875, -0.4283447265625, -0.38189697265625, -0.33544921875, -0.28900146484375, -0.2425537109375, -0.19610595703125, -0.149658203125, -0.10321044921875, -0.0567626953125, -0.01031494140625, 0.0361328125, 0.08258056640625, 0.1290283203125, 0.17547607421875, 0.221923828125, 0.26837158203125, 0.3148193359375, 0.36126708984375, 0.40771484375, 0.45416259765625, 0.5006103515625, 0.54705810546875, 0.593505859375, 0.63995361328125, 0.6864013671875, 0.73284912109375, 0.779296875, 0.82574462890625, 0.8721923828125, 0.91864013671875, 0.965087890625, 1.01153564453125, 1.0579833984375, 1.10443115234375, 1.15087890625, 1.19732666015625, 1.2437744140625, 1.29022216796875, 1.336669921875, 1.38311767578125, 1.4295654296875, 1.47601318359375, 1.5224609375]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 5.0, 7.0, 9.0, 18.0, 20.0, 14.0, 37.0, 46.0, 61.0, 87.0, 157.0, 236.0, 452.0, 1172.0, 3520.0, 15782.0, 114107.0, 725454.0, 160040.0, 20469.0, 4313.0, 1374.0, 480.0, 247.0, 142.0, 102.0, 51.0, 52.0, 34.0, 27.0, 23.0, 8.0, 3.0, 5.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8984375, -4.72698974609375, -4.5555419921875, -4.38409423828125, -4.212646484375, -4.04119873046875, -3.8697509765625, -3.69830322265625, -3.52685546875, -3.35540771484375, -3.1839599609375, -3.01251220703125, -2.841064453125, -2.66961669921875, -2.4981689453125, -2.32672119140625, -2.1552734375, -1.98382568359375, -1.8123779296875, -1.64093017578125, -1.469482421875, -1.29803466796875, -1.1265869140625, -0.95513916015625, -0.78369140625, -0.61224365234375, -0.4407958984375, -0.26934814453125, -0.097900390625, 0.07354736328125, 0.2449951171875, 0.41644287109375, 0.587890625, 0.75933837890625, 0.9307861328125, 1.10223388671875, 1.273681640625, 1.44512939453125, 1.6165771484375, 1.78802490234375, 1.95947265625, 2.13092041015625, 2.3023681640625, 2.47381591796875, 2.645263671875, 2.81671142578125, 2.9881591796875, 3.15960693359375, 3.3310546875, 3.50250244140625, 3.6739501953125, 3.84539794921875, 4.016845703125, 4.18829345703125, 4.3597412109375, 4.53118896484375, 4.70263671875, 4.87408447265625, 5.0455322265625, 5.21697998046875, 5.388427734375, 5.55987548828125, 5.7313232421875, 5.90277099609375, 6.07421875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 3.0, 5.0, 8.0, 10.0, 20.0, 28.0, 35.0, 41.0, 60.0, 76.0, 91.0, 80.0, 89.0, 110.0, 71.0, 64.0, 48.0, 44.0, 38.0, 29.0, 25.0, 14.0, 7.0, 8.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.984375, -18.550048828125, -18.11572265625, -17.681396484375, -17.2470703125, -16.812744140625, -16.37841796875, -15.944091796875, -15.509765625, -15.075439453125, -14.64111328125, -14.206787109375, -13.7724609375, -13.338134765625, -12.90380859375, -12.469482421875, -12.03515625, -11.600830078125, -11.16650390625, -10.732177734375, -10.2978515625, -9.863525390625, -9.42919921875, -8.994873046875, -8.560546875, -8.126220703125, -7.69189453125, -7.257568359375, -6.8232421875, -6.388916015625, -5.95458984375, -5.520263671875, -5.0859375, -4.651611328125, -4.21728515625, -3.782958984375, -3.3486328125, -2.914306640625, -2.47998046875, -2.045654296875, -1.611328125, -1.177001953125, -0.74267578125, -0.308349609375, 0.1259765625, 0.560302734375, 0.99462890625, 1.428955078125, 1.86328125, 2.297607421875, 2.73193359375, 3.166259765625, 3.6005859375, 4.034912109375, 4.46923828125, 4.903564453125, 5.337890625, 5.772216796875, 6.20654296875, 6.640869140625, 7.0751953125, 7.509521484375, 7.94384765625, 8.378173828125, 8.8125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 14.0, 16.0, 24.0, 78.0, 244.0, 785.0, 12452.0, 1031163.0, 3061.0, 440.0, 166.0, 67.0, 20.0, 11.0, 6.0, 3.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.140625, -11.67919921875, -11.2177734375, -10.75634765625, -10.294921875, -9.83349609375, -9.3720703125, -8.91064453125, -8.44921875, -7.98779296875, -7.5263671875, -7.06494140625, -6.603515625, -6.14208984375, -5.6806640625, -5.21923828125, -4.7578125, -4.29638671875, -3.8349609375, -3.37353515625, -2.912109375, -2.45068359375, -1.9892578125, -1.52783203125, -1.06640625, -0.60498046875, -0.1435546875, 0.31787109375, 0.779296875, 1.24072265625, 1.7021484375, 2.16357421875, 2.625, 3.08642578125, 3.5478515625, 4.00927734375, 4.470703125, 4.93212890625, 5.3935546875, 5.85498046875, 6.31640625, 6.77783203125, 7.2392578125, 7.70068359375, 8.162109375, 8.62353515625, 9.0849609375, 9.54638671875, 10.0078125, 10.46923828125, 10.9306640625, 11.39208984375, 11.853515625, 12.31494140625, 12.7763671875, 13.23779296875, 13.69921875, 14.16064453125, 14.6220703125, 15.08349609375, 15.544921875, 16.00634765625, 16.4677734375, 16.92919921875, 17.390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 6.0, 14.0, 9.0, 18.0, 22.0, 21.0, 23.0, 53.0, 69.0, 70.0, 99.0, 135.0, 111.0, 79.0, 66.0, 48.0, 38.0, 42.0, 23.0, 11.0, 14.0, 6.0, 4.0, 5.0, 3.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.000293731689453125, -0.0002834908664226532, -0.0002732500433921814, -0.0002630092203617096, -0.0002527683973312378, -0.000242527574300766, -0.0002322867512702942, -0.0002220459282398224, -0.00021180510520935059, -0.00020156428217887878, -0.00019132345914840698, -0.00018108263611793518, -0.00017084181308746338, -0.00016060099005699158, -0.00015036016702651978, -0.00014011934399604797, -0.00012987852096557617, -0.00011963769793510437, -0.00010939687490463257, -9.915605187416077e-05, -8.891522884368896e-05, -7.867440581321716e-05, -6.843358278274536e-05, -5.819275975227356e-05, -4.795193672180176e-05, -3.7711113691329956e-05, -2.7470290660858154e-05, -1.7229467630386353e-05, -6.988644599914551e-06, 3.252178430557251e-06, 1.3493001461029053e-05, 2.3733824491500854e-05, 3.3974647521972656e-05, 4.421547055244446e-05, 5.445629358291626e-05, 6.469711661338806e-05, 7.493793964385986e-05, 8.517876267433167e-05, 9.541958570480347e-05, 0.00010566040873527527, 0.00011590123176574707, 0.00012614205479621887, 0.00013638287782669067, 0.00014662370085716248, 0.00015686452388763428, 0.00016710534691810608, 0.00017734616994857788, 0.00018758699297904968, 0.00019782781600952148, 0.00020806863903999329, 0.0002183094620704651, 0.0002285502851009369, 0.0002387911081314087, 0.0002490319311618805, 0.0002592727541923523, 0.0002695135772228241, 0.0002797544002532959, 0.0002899952232837677, 0.0003002360463142395, 0.0003104768693447113, 0.0003207176923751831, 0.0003309585154056549, 0.0003411993384361267, 0.0003514401614665985, 0.0003616809844970703]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 1.0, 2.0, 4.0, 1.0, 3.0, 9.0, 5.0, 12.0, 17.0, 24.0, 30.0, 44.0, 71.0, 115.0, 192.0, 379.0, 800.0, 2133.0, 7218.0, 43392.0, 686797.0, 279576.0, 20339.0, 4554.0, 1445.0, 624.0, 316.0, 179.0, 91.0, 60.0, 30.0, 19.0, 27.0, 8.0, 9.0, 7.0, 7.0, 3.0, 6.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.828125, -2.749176025390625, -2.67022705078125, -2.591278076171875, -2.5123291015625, -2.433380126953125, -2.35443115234375, -2.275482177734375, -2.196533203125, -2.117584228515625, -2.03863525390625, -1.959686279296875, -1.8807373046875, -1.801788330078125, -1.72283935546875, -1.643890380859375, -1.56494140625, -1.485992431640625, -1.40704345703125, -1.328094482421875, -1.2491455078125, -1.170196533203125, -1.09124755859375, -1.012298583984375, -0.933349609375, -0.854400634765625, -0.77545166015625, -0.696502685546875, -0.6175537109375, -0.538604736328125, -0.45965576171875, -0.380706787109375, -0.3017578125, -0.222808837890625, -0.14385986328125, -0.064910888671875, 0.0140380859375, 0.092987060546875, 0.17193603515625, 0.250885009765625, 0.329833984375, 0.408782958984375, 0.48773193359375, 0.566680908203125, 0.6456298828125, 0.724578857421875, 0.80352783203125, 0.882476806640625, 0.96142578125, 1.040374755859375, 1.11932373046875, 1.198272705078125, 1.2772216796875, 1.356170654296875, 1.43511962890625, 1.514068603515625, 1.593017578125, 1.671966552734375, 1.75091552734375, 1.829864501953125, 1.9088134765625, 1.987762451171875, 2.06671142578125, 2.145660400390625, 2.224609375]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 11.0, 26.0, 97.0, 319.0, 383.0, 89.0, 31.0, 17.0, 7.0, 7.0, 3.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6875, -5.460693359375, -5.23388671875, -5.007080078125, -4.7802734375, -4.553466796875, -4.32666015625, -4.099853515625, -3.873046875, -3.646240234375, -3.41943359375, -3.192626953125, -2.9658203125, -2.739013671875, -2.51220703125, -2.285400390625, -2.05859375, -1.831787109375, -1.60498046875, -1.378173828125, -1.1513671875, -0.924560546875, -0.69775390625, -0.470947265625, -0.244140625, -0.017333984375, 0.20947265625, 0.436279296875, 0.6630859375, 0.889892578125, 1.11669921875, 1.343505859375, 1.5703125, 1.797119140625, 2.02392578125, 2.250732421875, 2.4775390625, 2.704345703125, 2.93115234375, 3.157958984375, 3.384765625, 3.611572265625, 3.83837890625, 4.065185546875, 4.2919921875, 4.518798828125, 4.74560546875, 4.972412109375, 5.19921875, 5.426025390625, 5.65283203125, 5.879638671875, 6.1064453125, 6.333251953125, 6.56005859375, 6.786865234375, 7.013671875, 7.240478515625, 7.46728515625, 7.694091796875, 7.9208984375, 8.147705078125, 8.37451171875, 8.601318359375, 8.828125]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 14.0, 36.0, 85.0, 153.0, 254.0, 260.0, 120.0, 48.0, 23.0, 12.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-106.01998901367188, -103.90133666992188, -101.7826919555664, -99.6640396118164, -97.54539489746094, -95.42674255371094, -93.30809783935547, -91.18944549560547, -89.07080078125, -86.9521484375, -84.83350372314453, -82.71485137939453, -80.59620666503906, -78.47755432128906, -76.3589096069336, -74.2402572631836, -72.12161254882812, -70.00296020507812, -67.88431549072266, -65.76566314697266, -63.64701843261719, -61.52836990356445, -59.40972137451172, -57.29106903076172, -55.17241668701172, -53.053768157958984, -50.93511962890625, -48.816471099853516, -46.69782257080078, -44.57917404174805, -42.46052551269531, -40.34187316894531, -38.223228454589844, -36.10457992553711, -33.985931396484375, -31.86728286743164, -29.748634338378906, -27.629985809326172, -25.511335372924805, -23.39268684387207, -21.27404022216797, -19.155391693115234, -17.0367431640625, -14.91809368133545, -12.799445152282715, -10.68079662322998, -8.56214714050293, -6.443498611450195, -4.324850082397461, -2.2062013149261475, -0.08755254745483398, 2.0310964584350586, 4.149744987487793, 6.268393516540527, 8.387042999267578, 10.505691528320312, 12.624340057373047, 14.742988586425781, 16.861637115478516, 18.98028564453125, 21.098934173583984, 23.21758270263672, 25.336233139038086, 27.45488166809082, 29.573530197143555]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 3.0, 6.0, 10.0, 2.0, 4.0, 12.0, 8.0, 18.0, 10.0, 34.0, 29.0, 33.0, 35.0, 37.0, 42.0, 43.0, 33.0, 58.0, 56.0, 60.0, 44.0, 47.0, 48.0, 54.0, 26.0, 39.0, 45.0, 29.0, 16.0, 20.0, 14.0, 22.0, 6.0, 13.0, 7.0, 10.0, 7.0, 6.0, 3.0, 9.0, 3.0, 1.0, 5.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.69606018066406, -34.529457092285156, -33.36285400390625, -32.196250915527344, -31.02964973449707, -29.863046646118164, -28.69644546508789, -27.529842376708984, -26.363239288330078, -25.196636199951172, -24.030033111572266, -22.863431930541992, -21.696828842163086, -20.53022575378418, -19.363624572753906, -18.197021484375, -17.030418395996094, -15.863815307617188, -14.697213172912598, -13.530611038208008, -12.364007949829102, -11.197404861450195, -10.030802726745605, -8.864200592041016, -7.697597503662109, -6.530994892120361, -5.364392280578613, -4.197789669036865, -3.031187057495117, -1.8645844459533691, -0.6979818344116211, 0.46862030029296875, 1.635223388671875, 2.801826000213623, 3.968428611755371, 5.135031223297119, 6.301633834838867, 7.468236446380615, 8.634839057922363, 9.801441192626953, 10.96804428100586, 12.134647369384766, 13.301249504089355, 14.467851638793945, 15.634454727172852, 16.801057815551758, 17.96765899658203, 19.134262084960938, 20.300865173339844, 21.46746826171875, 22.634071350097656, 23.80067253112793, 24.967275619506836, 26.133878707885742, 27.300479888916016, 28.467082977294922, 29.633686065673828, 30.800289154052734, 31.96689224243164, 33.13349533081055, 34.30009460449219, 35.466697692871094, 36.63330078125, 37.799903869628906, 38.96650695800781]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 4.0, 5.0, 3.0, 3.0, 7.0, 10.0, 15.0, 37.0, 48.0, 49.0, 96.0, 160.0, 302.0, 570.0, 1101.0, 2427.0, 5820.0, 18978.0, 224306.0, 3904396.0, 23193.0, 6922.0, 2766.0, 1276.0, 729.0, 365.0, 245.0, 156.0, 93.0, 48.0, 51.0, 29.0, 21.0, 14.0, 13.0, 8.0, 6.0, 1.0, 4.0, 7.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.701171875, -2.589019775390625, -2.47686767578125, -2.364715576171875, -2.2525634765625, -2.140411376953125, -2.02825927734375, -1.916107177734375, -1.803955078125, -1.691802978515625, -1.57965087890625, -1.467498779296875, -1.3553466796875, -1.243194580078125, -1.13104248046875, -1.018890380859375, -0.90673828125, -0.794586181640625, -0.68243408203125, -0.570281982421875, -0.4581298828125, -0.345977783203125, -0.23382568359375, -0.121673583984375, -0.009521484375, 0.102630615234375, 0.21478271484375, 0.326934814453125, 0.4390869140625, 0.551239013671875, 0.66339111328125, 0.775543212890625, 0.8876953125, 0.999847412109375, 1.11199951171875, 1.224151611328125, 1.3363037109375, 1.448455810546875, 1.56060791015625, 1.672760009765625, 1.784912109375, 1.897064208984375, 2.00921630859375, 2.121368408203125, 2.2335205078125, 2.345672607421875, 2.45782470703125, 2.569976806640625, 2.68212890625, 2.794281005859375, 2.90643310546875, 3.018585205078125, 3.1307373046875, 3.242889404296875, 3.35504150390625, 3.467193603515625, 3.579345703125, 3.691497802734375, 3.80364990234375, 3.915802001953125, 4.0279541015625, 4.140106201171875, 4.25225830078125, 4.364410400390625, 4.4765625]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 8.0, 10.0, 8.0, 9.0, 10.0, 16.0, 23.0, 29.0, 30.0, 32.0, 35.0, 49.0, 49.0, 74.0, 66.0, 62.0, 53.0, 56.0, 64.0, 49.0, 49.0, 36.0, 31.0, 28.0, 27.0, 22.0, 17.0, 12.0, 9.0, 10.0, 6.0, 8.0, 3.0, 6.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6298828125, -1.5654449462890625, -1.501007080078125, -1.4365692138671875, -1.37213134765625, -1.3076934814453125, -1.243255615234375, -1.1788177490234375, -1.1143798828125, -1.0499420166015625, -0.985504150390625, -0.9210662841796875, -0.85662841796875, -0.7921905517578125, -0.727752685546875, -0.6633148193359375, -0.598876953125, -0.5344390869140625, -0.470001220703125, -0.4055633544921875, -0.34112548828125, -0.2766876220703125, -0.212249755859375, -0.1478118896484375, -0.0833740234375, -0.0189361572265625, 0.045501708984375, 0.1099395751953125, 0.17437744140625, 0.2388153076171875, 0.303253173828125, 0.3676910400390625, 0.43212890625, 0.4965667724609375, 0.561004638671875, 0.6254425048828125, 0.68988037109375, 0.7543182373046875, 0.818756103515625, 0.8831939697265625, 0.9476318359375, 1.0120697021484375, 1.076507568359375, 1.1409454345703125, 1.20538330078125, 1.2698211669921875, 1.334259033203125, 1.3986968994140625, 1.463134765625, 1.5275726318359375, 1.592010498046875, 1.6564483642578125, 1.72088623046875, 1.7853240966796875, 1.849761962890625, 1.9141998291015625, 1.9786376953125, 2.0430755615234375, 2.107513427734375, 2.1719512939453125, 2.23638916015625, 2.3008270263671875, 2.365264892578125, 2.4297027587890625, 2.494140625]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 4.0, 3.0, 9.0, 14.0, 9.0, 13.0, 23.0, 22.0, 30.0, 35.0, 69.0, 96.0, 93.0, 170.0, 344.0, 1022.0, 3260.0, 14706.0, 395902.0, 3756271.0, 16464.0, 3594.0, 1099.0, 384.0, 189.0, 119.0, 82.0, 64.0, 53.0, 32.0, 31.0, 15.0, 23.0, 17.0, 9.0, 5.0, 4.0, 1.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.390625, -7.159912109375, -6.92919921875, -6.698486328125, -6.4677734375, -6.237060546875, -6.00634765625, -5.775634765625, -5.544921875, -5.314208984375, -5.08349609375, -4.852783203125, -4.6220703125, -4.391357421875, -4.16064453125, -3.929931640625, -3.69921875, -3.468505859375, -3.23779296875, -3.007080078125, -2.7763671875, -2.545654296875, -2.31494140625, -2.084228515625, -1.853515625, -1.622802734375, -1.39208984375, -1.161376953125, -0.9306640625, -0.699951171875, -0.46923828125, -0.238525390625, -0.0078125, 0.222900390625, 0.45361328125, 0.684326171875, 0.9150390625, 1.145751953125, 1.37646484375, 1.607177734375, 1.837890625, 2.068603515625, 2.29931640625, 2.530029296875, 2.7607421875, 2.991455078125, 3.22216796875, 3.452880859375, 3.68359375, 3.914306640625, 4.14501953125, 4.375732421875, 4.6064453125, 4.837158203125, 5.06787109375, 5.298583984375, 5.529296875, 5.760009765625, 5.99072265625, 6.221435546875, 6.4521484375, 6.682861328125, 6.91357421875, 7.144287109375, 7.375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 0.0, 4.0, 4.0, 7.0, 5.0, 12.0, 17.0, 27.0, 41.0, 125.0, 3467.0, 240.0, 53.0, 28.0, 15.0, 10.0, 5.0, 4.0, 8.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.126953125, -2.077178955078125, -2.02740478515625, -1.977630615234375, -1.9278564453125, -1.878082275390625, -1.82830810546875, -1.778533935546875, -1.728759765625, -1.678985595703125, -1.62921142578125, -1.579437255859375, -1.5296630859375, -1.479888916015625, -1.43011474609375, -1.380340576171875, -1.33056640625, -1.280792236328125, -1.23101806640625, -1.181243896484375, -1.1314697265625, -1.081695556640625, -1.03192138671875, -0.982147216796875, -0.932373046875, -0.882598876953125, -0.83282470703125, -0.783050537109375, -0.7332763671875, -0.683502197265625, -0.63372802734375, -0.583953857421875, -0.5341796875, -0.484405517578125, -0.43463134765625, -0.384857177734375, -0.3350830078125, -0.285308837890625, -0.23553466796875, -0.185760498046875, -0.135986328125, -0.086212158203125, -0.03643798828125, 0.013336181640625, 0.0631103515625, 0.112884521484375, 0.16265869140625, 0.212432861328125, 0.26220703125, 0.311981201171875, 0.36175537109375, 0.411529541015625, 0.4613037109375, 0.511077880859375, 0.56085205078125, 0.610626220703125, 0.660400390625, 0.710174560546875, 0.75994873046875, 0.809722900390625, 0.8594970703125, 0.909271240234375, 0.95904541015625, 1.008819580078125, 1.05859375]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 7.0, 7.0, 9.0, 18.0, 17.0, 33.0, 38.0, 46.0, 85.0, 123.0, 121.0, 134.0, 122.0, 96.0, 56.0, 41.0, 28.0, 8.0, 5.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.407549858093262, -7.2488627433776855, -7.090176105499268, -6.931488990783691, -6.772802352905273, -6.614115238189697, -6.455428600311279, -6.296741485595703, -6.138054847717285, -5.979367733001709, -5.820681095123291, -5.661993980407715, -5.503307342529297, -5.344620227813721, -5.185933589935303, -5.027246475219727, -4.86855936050415, -4.709872245788574, -4.551185607910156, -4.39249849319458, -4.233811855316162, -4.075124740600586, -3.916437864303589, -3.757750988006592, -3.5990641117095947, -3.4403772354125977, -3.2816903591156006, -3.1230034828186035, -2.9643163681030273, -2.8056297302246094, -2.646942615509033, -2.488255739212036, -2.329569101333618, -2.170882225036621, -2.012195348739624, -1.8535083532333374, -1.6948214769363403, -1.5361346006393433, -1.3774476051330566, -1.2187607288360596, -1.0600738525390625, -0.9013869762420654, -0.7427000403404236, -0.5840131044387817, -0.42532622814178467, -0.2666393518447876, -0.10795241594314575, 0.050734519958496094, 0.20942139625549316, 0.3681083023548126, 0.5267952084541321, 0.6854821443557739, 0.844169020652771, 1.002855896949768, 1.1615428924560547, 1.3202297687530518, 1.4789166450500488, 1.637603521347046, 1.796290397644043, 1.9549773931503296, 2.113664150238037, 2.2723512649536133, 2.4310381412506104, 2.5897250175476074, 2.7484118938446045]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 6.0, 10.0, 8.0, 9.0, 9.0, 10.0, 27.0, 18.0, 29.0, 27.0, 30.0, 26.0, 35.0, 37.0, 35.0, 35.0, 51.0, 44.0, 46.0, 51.0, 43.0, 42.0, 39.0, 44.0, 36.0, 32.0, 36.0, 25.0, 29.0, 22.0, 25.0, 15.0, 17.0, 16.0, 10.0, 8.0, 7.0, 5.0, 6.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.207650661468506, -3.11444354057312, -3.0212364196777344, -2.9280292987823486, -2.834822177886963, -2.741614818572998, -2.6484079360961914, -2.5552005767822266, -2.461993455886841, -2.368786334991455, -2.2755792140960693, -2.1823720932006836, -2.089164972305298, -1.9959577322006226, -1.9027506113052368, -1.8095433712005615, -1.7163363695144653, -1.6231292486190796, -1.5299221277236938, -1.4367148876190186, -1.3435077667236328, -1.250300645828247, -1.1570935249328613, -1.0638864040374756, -0.9706792235374451, -0.8774721026420593, -0.7842649221420288, -0.6910578012466431, -0.5978506803512573, -0.5046434998512268, -0.41143637895584106, -0.31822919845581055, -0.2250220775604248, -0.13181492686271667, -0.03860779106616974, 0.0545993447303772, 0.14780649542808533, 0.24101364612579346, 0.3342207670211792, 0.4274279475212097, 0.5206350684165955, 0.6138421893119812, 0.7070493698120117, 0.8002564907073975, 0.8934636116027832, 0.9866707921028137, 1.0798778533935547, 1.17308509349823, 1.2662922143936157, 1.3594993352890015, 1.4527064561843872, 1.5459136962890625, 1.6391208171844482, 1.732327938079834, 1.8255350589752197, 1.9187421798706055, 2.011949300765991, 2.105156421661377, 2.1983635425567627, 2.2915706634521484, 2.384777784347534, 2.47798490524292, 2.5711922645568848, 2.6643993854522705, 2.7576065063476562]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 6.0, 6.0, 9.0, 18.0, 29.0, 26.0, 51.0, 59.0, 71.0, 147.0, 196.0, 312.0, 495.0, 862.0, 1681.0, 3088.0, 6301.0, 13869.0, 34086.0, 88135.0, 229113.0, 353618.0, 191827.0, 72131.0, 28462.0, 12015.0, 5509.0, 2785.0, 1508.0, 799.0, 459.0, 285.0, 191.0, 121.0, 79.0, 68.0, 48.0, 28.0, 17.0, 12.0, 11.0, 7.0, 7.0, 1.0, 3.0, 0.0, 4.0, 3.0, 3.0, 1.0], "bins": [-2.75390625, -2.676116943359375, -2.59832763671875, -2.520538330078125, -2.4427490234375, -2.364959716796875, -2.28717041015625, -2.209381103515625, -2.131591796875, -2.053802490234375, -1.97601318359375, -1.898223876953125, -1.8204345703125, -1.742645263671875, -1.66485595703125, -1.587066650390625, -1.50927734375, -1.431488037109375, -1.35369873046875, -1.275909423828125, -1.1981201171875, -1.120330810546875, -1.04254150390625, -0.964752197265625, -0.886962890625, -0.809173583984375, -0.73138427734375, -0.653594970703125, -0.5758056640625, -0.498016357421875, -0.42022705078125, -0.342437744140625, -0.2646484375, -0.186859130859375, -0.10906982421875, -0.031280517578125, 0.0465087890625, 0.124298095703125, 0.20208740234375, 0.279876708984375, 0.357666015625, 0.435455322265625, 0.51324462890625, 0.591033935546875, 0.6688232421875, 0.746612548828125, 0.82440185546875, 0.902191162109375, 0.97998046875, 1.057769775390625, 1.13555908203125, 1.213348388671875, 1.2911376953125, 1.368927001953125, 1.44671630859375, 1.524505615234375, 1.602294921875, 1.680084228515625, 1.75787353515625, 1.835662841796875, 1.9134521484375, 1.991241455078125, 2.06903076171875, 2.146820068359375, 2.224609375]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 3.0, 8.0, 4.0, 7.0, 9.0, 11.0, 18.0, 20.0, 26.0, 31.0, 43.0, 38.0, 69.0, 44.0, 52.0, 58.0, 62.0, 63.0, 45.0, 64.0, 41.0, 49.0, 42.0, 40.0, 26.0, 31.0, 18.0, 25.0, 11.0, 8.0, 9.0, 8.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7587890625, -1.6917877197265625, -1.624786376953125, -1.5577850341796875, -1.49078369140625, -1.4237823486328125, -1.356781005859375, -1.2897796630859375, -1.2227783203125, -1.1557769775390625, -1.088775634765625, -1.0217742919921875, -0.95477294921875, -0.8877716064453125, -0.820770263671875, -0.7537689208984375, -0.686767578125, -0.6197662353515625, -0.552764892578125, -0.4857635498046875, -0.41876220703125, -0.3517608642578125, -0.284759521484375, -0.2177581787109375, -0.1507568359375, -0.0837554931640625, -0.016754150390625, 0.0502471923828125, 0.11724853515625, 0.1842498779296875, 0.251251220703125, 0.3182525634765625, 0.38525390625, 0.4522552490234375, 0.519256591796875, 0.5862579345703125, 0.65325927734375, 0.7202606201171875, 0.787261962890625, 0.8542633056640625, 0.9212646484375, 0.9882659912109375, 1.055267333984375, 1.1222686767578125, 1.18927001953125, 1.2562713623046875, 1.323272705078125, 1.3902740478515625, 1.457275390625, 1.5242767333984375, 1.591278076171875, 1.6582794189453125, 1.72528076171875, 1.7922821044921875, 1.859283447265625, 1.9262847900390625, 1.9932861328125, 2.0602874755859375, 2.127288818359375, 2.1942901611328125, 2.26129150390625, 2.3282928466796875, 2.395294189453125, 2.4622955322265625, 2.529296875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 3.0, 2.0, 4.0, 9.0, 7.0, 12.0, 15.0, 20.0, 32.0, 44.0, 88.0, 132.0, 254.0, 455.0, 1112.0, 3500.0, 16362.0, 160339.0, 767535.0, 83326.0, 10944.0, 2598.0, 869.0, 386.0, 184.0, 113.0, 62.0, 42.0, 28.0, 24.0, 14.0, 9.0, 6.0, 8.0, 3.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.47265625, -6.24456787109375, -6.0164794921875, -5.78839111328125, -5.560302734375, -5.33221435546875, -5.1041259765625, -4.87603759765625, -4.64794921875, -4.41986083984375, -4.1917724609375, -3.96368408203125, -3.735595703125, -3.50750732421875, -3.2794189453125, -3.05133056640625, -2.8232421875, -2.59515380859375, -2.3670654296875, -2.13897705078125, -1.910888671875, -1.68280029296875, -1.4547119140625, -1.22662353515625, -0.99853515625, -0.77044677734375, -0.5423583984375, -0.31427001953125, -0.086181640625, 0.14190673828125, 0.3699951171875, 0.59808349609375, 0.826171875, 1.05426025390625, 1.2823486328125, 1.51043701171875, 1.738525390625, 1.96661376953125, 2.1947021484375, 2.42279052734375, 2.65087890625, 2.87896728515625, 3.1070556640625, 3.33514404296875, 3.563232421875, 3.79132080078125, 4.0194091796875, 4.24749755859375, 4.4755859375, 4.70367431640625, 4.9317626953125, 5.15985107421875, 5.387939453125, 5.61602783203125, 5.8441162109375, 6.07220458984375, 6.30029296875, 6.52838134765625, 6.7564697265625, 6.98455810546875, 7.212646484375, 7.44073486328125, 7.6688232421875, 7.89691162109375, 8.125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 8.0, 7.0, 5.0, 6.0, 6.0, 12.0, 17.0, 17.0, 27.0, 31.0, 30.0, 32.0, 45.0, 44.0, 54.0, 57.0, 45.0, 54.0, 60.0, 54.0, 44.0, 52.0, 36.0, 41.0, 37.0, 43.0, 33.0, 18.0, 18.0, 8.0, 13.0, 11.0, 5.0, 8.0, 5.0, 6.0, 2.0, 4.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.671875, -8.3955078125, -8.119140625, -7.8427734375, -7.56640625, -7.2900390625, -7.013671875, -6.7373046875, -6.4609375, -6.1845703125, -5.908203125, -5.6318359375, -5.35546875, -5.0791015625, -4.802734375, -4.5263671875, -4.25, -3.9736328125, -3.697265625, -3.4208984375, -3.14453125, -2.8681640625, -2.591796875, -2.3154296875, -2.0390625, -1.7626953125, -1.486328125, -1.2099609375, -0.93359375, -0.6572265625, -0.380859375, -0.1044921875, 0.171875, 0.4482421875, 0.724609375, 1.0009765625, 1.27734375, 1.5537109375, 1.830078125, 2.1064453125, 2.3828125, 2.6591796875, 2.935546875, 3.2119140625, 3.48828125, 3.7646484375, 4.041015625, 4.3173828125, 4.59375, 4.8701171875, 5.146484375, 5.4228515625, 5.69921875, 5.9755859375, 6.251953125, 6.5283203125, 6.8046875, 7.0810546875, 7.357421875, 7.6337890625, 7.91015625, 8.1865234375, 8.462890625, 8.7392578125, 9.015625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 8.0, 8.0, 8.0, 12.0, 25.0, 43.0, 49.0, 61.0, 88.0, 113.0, 220.0, 304.0, 534.0, 845.0, 1705.0, 4563.0, 21731.0, 428764.0, 555043.0, 25214.0, 4945.0, 1905.0, 874.0, 501.0, 322.0, 227.0, 142.0, 87.0, 70.0, 47.0, 25.0, 12.0, 20.0, 21.0, 4.0, 4.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.603515625, -3.50067138671875, -3.3978271484375, -3.29498291015625, -3.192138671875, -3.08929443359375, -2.9864501953125, -2.88360595703125, -2.78076171875, -2.67791748046875, -2.5750732421875, -2.47222900390625, -2.369384765625, -2.26654052734375, -2.1636962890625, -2.06085205078125, -1.9580078125, -1.85516357421875, -1.7523193359375, -1.64947509765625, -1.546630859375, -1.44378662109375, -1.3409423828125, -1.23809814453125, -1.13525390625, -1.03240966796875, -0.9295654296875, -0.82672119140625, -0.723876953125, -0.62103271484375, -0.5181884765625, -0.41534423828125, -0.3125, -0.20965576171875, -0.1068115234375, -0.00396728515625, 0.098876953125, 0.20172119140625, 0.3045654296875, 0.40740966796875, 0.51025390625, 0.61309814453125, 0.7159423828125, 0.81878662109375, 0.921630859375, 1.02447509765625, 1.1273193359375, 1.23016357421875, 1.3330078125, 1.43585205078125, 1.5386962890625, 1.64154052734375, 1.744384765625, 1.84722900390625, 1.9500732421875, 2.05291748046875, 2.15576171875, 2.25860595703125, 2.3614501953125, 2.46429443359375, 2.567138671875, 2.66998291015625, 2.7728271484375, 2.87567138671875, 2.978515625]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 5.0, 6.0, 7.0, 18.0, 37.0, 41.0, 64.0, 131.0, 201.0, 189.0, 137.0, 71.0, 37.0, 23.0, 14.0, 7.0, 4.0, 7.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007686614990234375, -0.0007478520274162292, -0.000727042555809021, -0.0007062330842018127, -0.0006854236125946045, -0.0006646141409873962, -0.000643804669380188, -0.0006229951977729797, -0.0006021857261657715, -0.0005813762545585632, -0.000560566782951355, -0.0005397573113441467, -0.0005189478397369385, -0.0004981383681297302, -0.00047732889652252197, -0.0004565194249153137, -0.00043570995330810547, -0.0004149004817008972, -0.00039409101009368896, -0.0003732815384864807, -0.00035247206687927246, -0.0003316625952720642, -0.00031085312366485596, -0.0002900436520576477, -0.00026923418045043945, -0.0002484247088432312, -0.00022761523723602295, -0.0002068057656288147, -0.00018599629402160645, -0.0001651868224143982, -0.00014437735080718994, -0.0001235678791999817, -0.00010275840759277344, -8.194893598556519e-05, -6.113946437835693e-05, -4.032999277114868e-05, -1.952052116394043e-05, 1.2889504432678223e-06, 2.2098422050476074e-05, 4.2907893657684326e-05, 6.371736526489258e-05, 8.452683687210083e-05, 0.00010533630847930908, 0.00012614578008651733, 0.00014695525169372559, 0.00016776472330093384, 0.0001885741949081421, 0.00020938366651535034, 0.0002301931381225586, 0.00025100260972976685, 0.0002718120813369751, 0.00029262155294418335, 0.0003134310245513916, 0.00033424049615859985, 0.0003550499677658081, 0.00037585943937301636, 0.0003966689109802246, 0.00041747838258743286, 0.0004382878541946411, 0.00045909732580184937, 0.0004799067974090576, 0.0005007162690162659, 0.0005215257406234741, 0.0005423352122306824, 0.0005631446838378906]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 4.0, 8.0, 8.0, 12.0, 17.0, 30.0, 38.0, 80.0, 150.0, 294.0, 819.0, 2875.0, 26963.0, 872447.0, 136262.0, 6339.0, 1292.0, 470.0, 201.0, 108.0, 53.0, 28.0, 22.0, 10.0, 4.0, 8.0, 3.0, 3.0, 1.0, 5.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.806640625, -3.669281005859375, -3.53192138671875, -3.394561767578125, -3.2572021484375, -3.119842529296875, -2.98248291015625, -2.845123291015625, -2.707763671875, -2.570404052734375, -2.43304443359375, -2.295684814453125, -2.1583251953125, -2.020965576171875, -1.88360595703125, -1.746246337890625, -1.60888671875, -1.471527099609375, -1.33416748046875, -1.196807861328125, -1.0594482421875, -0.922088623046875, -0.78472900390625, -0.647369384765625, -0.510009765625, -0.372650146484375, -0.23529052734375, -0.097930908203125, 0.0394287109375, 0.176788330078125, 0.31414794921875, 0.451507568359375, 0.5888671875, 0.726226806640625, 0.86358642578125, 1.000946044921875, 1.1383056640625, 1.275665283203125, 1.41302490234375, 1.550384521484375, 1.687744140625, 1.825103759765625, 1.96246337890625, 2.099822998046875, 2.2371826171875, 2.374542236328125, 2.51190185546875, 2.649261474609375, 2.78662109375, 2.923980712890625, 3.06134033203125, 3.198699951171875, 3.3360595703125, 3.473419189453125, 3.61077880859375, 3.748138427734375, 3.885498046875, 4.022857666015625, 4.16021728515625, 4.297576904296875, 4.4349365234375, 4.572296142578125, 4.70965576171875, 4.847015380859375, 4.984375]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 13.0, 21.0, 36.0, 68.0, 132.0, 182.0, 191.0, 151.0, 97.0, 50.0, 22.0, 15.0, 7.0, 7.0, 5.0, 1.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.458984375, -3.302947998046875, -3.14691162109375, -2.990875244140625, -2.8348388671875, -2.678802490234375, -2.52276611328125, -2.366729736328125, -2.210693359375, -2.054656982421875, -1.89862060546875, -1.742584228515625, -1.5865478515625, -1.430511474609375, -1.27447509765625, -1.118438720703125, -0.96240234375, -0.806365966796875, -0.65032958984375, -0.494293212890625, -0.3382568359375, -0.182220458984375, -0.02618408203125, 0.129852294921875, 0.285888671875, 0.441925048828125, 0.59796142578125, 0.753997802734375, 0.9100341796875, 1.066070556640625, 1.22210693359375, 1.378143310546875, 1.5341796875, 1.690216064453125, 1.84625244140625, 2.002288818359375, 2.1583251953125, 2.314361572265625, 2.47039794921875, 2.626434326171875, 2.782470703125, 2.938507080078125, 3.09454345703125, 3.250579833984375, 3.4066162109375, 3.562652587890625, 3.71868896484375, 3.874725341796875, 4.03076171875, 4.186798095703125, 4.34283447265625, 4.498870849609375, 4.6549072265625, 4.810943603515625, 4.96697998046875, 5.123016357421875, 5.279052734375, 5.435089111328125, 5.59112548828125, 5.747161865234375, 5.9031982421875, 6.059234619140625, 6.21527099609375, 6.371307373046875, 6.52734375]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 6.0, 8.0, 14.0, 30.0, 57.0, 98.0, 132.0, 197.0, 167.0, 113.0, 78.0, 46.0, 27.0, 17.0, 8.0, 5.0, 3.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-58.37742614746094, -56.696720123291016, -55.01601028442383, -53.335304260253906, -51.65459442138672, -49.9738883972168, -48.293182373046875, -46.61247253417969, -44.931766510009766, -43.251060485839844, -41.570350646972656, -39.889644622802734, -38.20893859863281, -36.528228759765625, -34.8475227355957, -33.16681671142578, -31.486106872558594, -29.80539894104004, -28.124691009521484, -26.443984985351562, -24.763277053833008, -23.082569122314453, -21.40186309814453, -19.721155166625977, -18.040447235107422, -16.359739303588867, -14.679032325744629, -12.99832534790039, -11.317617416381836, -9.636909484863281, -7.956202507019043, -6.275495529174805, -4.59478759765625, -2.9140801429748535, -1.233372688293457, 0.44733476638793945, 2.128042221069336, 3.8087501525878906, 5.489457130432129, 7.170164108276367, 8.850872039794922, 10.531579971313477, 12.212286949157715, 13.892993927001953, 15.573701858520508, 17.254409790039062, 18.935115814208984, 20.61582374572754, 22.296531677246094, 23.97723960876465, 25.657947540283203, 27.338653564453125, 29.01936149597168, 30.700069427490234, 32.380775451660156, 34.061485290527344, 35.742191314697266, 37.42289733886719, 39.103607177734375, 40.7843132019043, 42.46501922607422, 44.145729064941406, 45.82643508911133, 47.50714111328125, 49.18785095214844]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 2.0, 5.0, 5.0, 5.0, 10.0, 8.0, 10.0, 14.0, 16.0, 18.0, 22.0, 25.0, 19.0, 33.0, 34.0, 36.0, 44.0, 49.0, 37.0, 65.0, 41.0, 55.0, 41.0, 53.0, 48.0, 28.0, 40.0, 38.0, 29.0, 21.0, 27.0, 21.0, 23.0, 14.0, 12.0, 10.0, 8.0, 9.0, 8.0, 8.0, 5.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-34.93851089477539, -33.84404754638672, -32.74958801269531, -31.65512466430664, -30.56066131591797, -29.46619987487793, -28.37173843383789, -27.27727508544922, -26.18281364440918, -25.08835220336914, -23.99388885498047, -22.89942741394043, -21.80496597290039, -20.71050262451172, -19.61604118347168, -18.52157974243164, -17.42711639404297, -16.33265495300293, -15.238191604614258, -14.143730163574219, -13.049267768859863, -11.954805374145508, -10.860343933105469, -9.765881538391113, -8.671419143676758, -7.576956748962402, -6.482494831085205, -5.388032913208008, -4.293570518493652, -3.199108123779297, -2.1046462059020996, -1.0101842880249023, 0.0842742919921875, 1.1787364482879639, 2.2731986045837402, 3.3676607608795166, 4.462122917175293, 5.556585311889648, 6.651047229766846, 7.745509147644043, 8.839971542358398, 9.934433937072754, 11.02889633178711, 12.123357772827148, 13.217820167541504, 14.31228256225586, 15.406744003295898, 16.501205444335938, 17.59566879272461, 18.69013023376465, 19.78459358215332, 20.87905502319336, 21.97351837158203, 23.06797981262207, 24.16244125366211, 25.25690460205078, 26.35136604309082, 27.44582748413086, 28.54029083251953, 29.63475227355957, 30.72921371459961, 31.82367706298828, 32.91814041137695, 34.01259994506836, 35.10706329345703]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 3.0, 3.0, 11.0, 5.0, 11.0, 27.0, 29.0, 36.0, 51.0, 94.0, 164.0, 303.0, 518.0, 1171.0, 3511.0, 33861.0, 4142718.0, 8240.0, 1811.0, 745.0, 406.0, 187.0, 110.0, 79.0, 56.0, 39.0, 16.0, 19.0, 18.0, 11.0, 10.0, 8.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-8.3359375, -8.030029296875, -7.72412109375, -7.418212890625, -7.1123046875, -6.806396484375, -6.50048828125, -6.194580078125, -5.888671875, -5.582763671875, -5.27685546875, -4.970947265625, -4.6650390625, -4.359130859375, -4.05322265625, -3.747314453125, -3.44140625, -3.135498046875, -2.82958984375, -2.523681640625, -2.2177734375, -1.911865234375, -1.60595703125, -1.300048828125, -0.994140625, -0.688232421875, -0.38232421875, -0.076416015625, 0.2294921875, 0.535400390625, 0.84130859375, 1.147216796875, 1.453125, 1.759033203125, 2.06494140625, 2.370849609375, 2.6767578125, 2.982666015625, 3.28857421875, 3.594482421875, 3.900390625, 4.206298828125, 4.51220703125, 4.818115234375, 5.1240234375, 5.429931640625, 5.73583984375, 6.041748046875, 6.34765625, 6.653564453125, 6.95947265625, 7.265380859375, 7.5712890625, 7.877197265625, 8.18310546875, 8.489013671875, 8.794921875, 9.100830078125, 9.40673828125, 9.712646484375, 10.0185546875, 10.324462890625, 10.63037109375, 10.936279296875, 11.2421875]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 5.0, 8.0, 9.0, 8.0, 11.0, 9.0, 25.0, 26.0, 24.0, 42.0, 52.0, 63.0, 67.0, 57.0, 62.0, 62.0, 78.0, 56.0, 51.0, 57.0, 48.0, 41.0, 29.0, 37.0, 15.0, 14.0, 13.0, 6.0, 7.0, 6.0, 8.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.158203125, -2.079681396484375, -2.00115966796875, -1.922637939453125, -1.8441162109375, -1.765594482421875, -1.68707275390625, -1.608551025390625, -1.530029296875, -1.451507568359375, -1.37298583984375, -1.294464111328125, -1.2159423828125, -1.137420654296875, -1.05889892578125, -0.980377197265625, -0.90185546875, -0.823333740234375, -0.74481201171875, -0.666290283203125, -0.5877685546875, -0.509246826171875, -0.43072509765625, -0.352203369140625, -0.273681640625, -0.195159912109375, -0.11663818359375, -0.038116455078125, 0.0404052734375, 0.118927001953125, 0.19744873046875, 0.275970458984375, 0.3544921875, 0.433013916015625, 0.51153564453125, 0.590057373046875, 0.6685791015625, 0.747100830078125, 0.82562255859375, 0.904144287109375, 0.982666015625, 1.061187744140625, 1.13970947265625, 1.218231201171875, 1.2967529296875, 1.375274658203125, 1.45379638671875, 1.532318115234375, 1.61083984375, 1.689361572265625, 1.76788330078125, 1.846405029296875, 1.9249267578125, 2.003448486328125, 2.08197021484375, 2.160491943359375, 2.239013671875, 2.317535400390625, 2.39605712890625, 2.474578857421875, 2.5531005859375, 2.631622314453125, 2.71014404296875, 2.788665771484375, 2.8671875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 7.0, 6.0, 15.0, 11.0, 13.0, 16.0, 19.0, 45.0, 40.0, 62.0, 76.0, 103.0, 227.0, 277.0, 479.0, 780.0, 1456.0, 3065.0, 11898.0, 4099370.0, 65833.0, 5497.0, 2075.0, 1098.0, 672.0, 377.0, 234.0, 159.0, 106.0, 73.0, 48.0, 32.0, 31.0, 22.0, 19.0, 10.0, 8.0, 10.0, 4.0, 5.0, 1.0, 5.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.0625, -9.7646484375, -9.466796875, -9.1689453125, -8.87109375, -8.5732421875, -8.275390625, -7.9775390625, -7.6796875, -7.3818359375, -7.083984375, -6.7861328125, -6.48828125, -6.1904296875, -5.892578125, -5.5947265625, -5.296875, -4.9990234375, -4.701171875, -4.4033203125, -4.10546875, -3.8076171875, -3.509765625, -3.2119140625, -2.9140625, -2.6162109375, -2.318359375, -2.0205078125, -1.72265625, -1.4248046875, -1.126953125, -0.8291015625, -0.53125, -0.2333984375, 0.064453125, 0.3623046875, 0.66015625, 0.9580078125, 1.255859375, 1.5537109375, 1.8515625, 2.1494140625, 2.447265625, 2.7451171875, 3.04296875, 3.3408203125, 3.638671875, 3.9365234375, 4.234375, 4.5322265625, 4.830078125, 5.1279296875, 5.42578125, 5.7236328125, 6.021484375, 6.3193359375, 6.6171875, 6.9150390625, 7.212890625, 7.5107421875, 7.80859375, 8.1064453125, 8.404296875, 8.7021484375, 9.0]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 7.0, 7.0, 21.0, 65.0, 3681.0, 216.0, 42.0, 16.0, 6.0, 2.0, 5.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.734375, -2.6658172607421875, -2.597259521484375, -2.5287017822265625, -2.46014404296875, -2.3915863037109375, -2.323028564453125, -2.2544708251953125, -2.1859130859375, -2.1173553466796875, -2.048797607421875, -1.9802398681640625, -1.91168212890625, -1.8431243896484375, -1.774566650390625, -1.7060089111328125, -1.637451171875, -1.5688934326171875, -1.500335693359375, -1.4317779541015625, -1.36322021484375, -1.2946624755859375, -1.226104736328125, -1.1575469970703125, -1.0889892578125, -1.0204315185546875, -0.951873779296875, -0.8833160400390625, -0.81475830078125, -0.7462005615234375, -0.677642822265625, -0.6090850830078125, -0.54052734375, -0.4719696044921875, -0.403411865234375, -0.3348541259765625, -0.26629638671875, -0.1977386474609375, -0.129180908203125, -0.0606231689453125, 0.0079345703125, 0.0764923095703125, 0.145050048828125, 0.2136077880859375, 0.28216552734375, 0.3507232666015625, 0.419281005859375, 0.4878387451171875, 0.556396484375, 0.6249542236328125, 0.693511962890625, 0.7620697021484375, 0.83062744140625, 0.8991851806640625, 0.967742919921875, 1.0363006591796875, 1.1048583984375, 1.1734161376953125, 1.241973876953125, 1.3105316162109375, 1.37908935546875, 1.4476470947265625, 1.516204833984375, 1.5847625732421875, 1.6533203125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 3.0, 7.0, 12.0, 16.0, 26.0, 34.0, 57.0, 77.0, 107.0, 131.0, 126.0, 140.0, 95.0, 76.0, 28.0, 28.0, 17.0, 9.0, 7.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.026236057281494, -4.837462902069092, -4.648690223693848, -4.459917068481445, -4.271143913269043, -4.082370758056641, -3.8935978412628174, -3.704824924468994, -3.516051769256592, -3.3272786140441895, -3.138505697250366, -2.949732780456543, -2.7609596252441406, -2.5721864700317383, -2.383413553237915, -2.194640636444092, -2.0058674812316895, -1.8170944452285767, -1.6283214092254639, -1.439548373222351, -1.2507753372192383, -1.0620023012161255, -0.8732292652130127, -0.6844562292098999, -0.4956831932067871, -0.3069101572036743, -0.11813712120056152, 0.07063591480255127, 0.25940895080566406, 0.44818198680877686, 0.6369550228118896, 0.8257280588150024, 1.014500617980957, 1.2032736539840698, 1.3920466899871826, 1.5808197259902954, 1.7695927619934082, 1.958365797996521, 2.147138833999634, 2.335911750793457, 2.5246849060058594, 2.7134580612182617, 2.902230978012085, 3.091003894805908, 3.2797770500183105, 3.468550205230713, 3.657323122024536, 3.8460960388183594, 4.034869194030762, 4.223642349243164, 4.412415504455566, 4.6011881828308105, 4.789961338043213, 4.978734493255615, 5.167507171630859, 5.356280326843262, 5.545053482055664, 5.733826637268066, 5.922599792480469, 6.111372470855713, 6.300145626068115, 6.488918781280518, 6.677691459655762, 6.866464614868164, 7.055237770080566]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 4.0, 4.0, 4.0, 4.0, 3.0, 6.0, 5.0, 4.0, 5.0, 7.0, 19.0, 8.0, 16.0, 20.0, 19.0, 15.0, 21.0, 16.0, 24.0, 23.0, 30.0, 23.0, 26.0, 36.0, 34.0, 30.0, 38.0, 39.0, 38.0, 30.0, 37.0, 40.0, 41.0, 40.0, 41.0, 40.0, 17.0, 30.0, 29.0, 17.0, 16.0, 17.0, 10.0, 7.0, 16.0, 11.0, 9.0, 4.0, 9.0, 9.0, 6.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0], "bins": [-2.538992166519165, -2.4599132537841797, -2.3808345794677734, -2.301755666732788, -2.222676992416382, -2.1435980796813965, -2.0645194053649902, -1.9854404926300049, -1.906361699104309, -1.8272829055786133, -1.7482041120529175, -1.6691253185272217, -1.5900464057922363, -1.51096773147583, -1.4318888187408447, -1.352810025215149, -1.2737312316894531, -1.1946524381637573, -1.1155736446380615, -1.0364948511123657, -0.9574159979820251, -0.8783372044563293, -0.7992583513259888, -0.720179557800293, -0.6411007642745972, -0.5620219707489014, -0.4829431474208832, -0.403864324092865, -0.3247855305671692, -0.2457067370414734, -0.1666279137134552, -0.08754909038543701, -0.008470296859741211, 0.07060851156711578, 0.14968731999397278, 0.22876612842082977, 0.30784493684768677, 0.38692373037338257, 0.46600255370140076, 0.545081377029419, 0.6241601705551147, 0.7032389640808105, 0.7823177576065063, 0.8613966107368469, 0.9404754042625427, 1.0195541381835938, 1.098633050918579, 1.177711844444275, 1.2567906379699707, 1.3358694314956665, 1.4149482250213623, 1.494027018547058, 1.573105812072754, 1.6521847248077393, 1.731263518333435, 1.8103423118591309, 1.8894211053848267, 1.9684998989105225, 2.047578811645508, 2.126657485961914, 2.2057363986968994, 2.2848150730133057, 2.363893985748291, 2.4429726600646973, 2.5220515727996826]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 4.0, 4.0, 2.0, 8.0, 4.0, 7.0, 17.0, 34.0, 46.0, 60.0, 99.0, 159.0, 314.0, 595.0, 1102.0, 2672.0, 6498.0, 19411.0, 68790.0, 290040.0, 474231.0, 132495.0, 34029.0, 10489.0, 3945.0, 1688.0, 791.0, 421.0, 218.0, 123.0, 100.0, 61.0, 25.0, 30.0, 11.0, 10.0, 12.0, 4.0, 3.0, 3.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.853515625, -3.712005615234375, -3.57049560546875, -3.428985595703125, -3.2874755859375, -3.145965576171875, -3.00445556640625, -2.862945556640625, -2.721435546875, -2.579925537109375, -2.43841552734375, -2.296905517578125, -2.1553955078125, -2.013885498046875, -1.87237548828125, -1.730865478515625, -1.58935546875, -1.447845458984375, -1.30633544921875, -1.164825439453125, -1.0233154296875, -0.881805419921875, -0.74029541015625, -0.598785400390625, -0.457275390625, -0.315765380859375, -0.17425537109375, -0.032745361328125, 0.1087646484375, 0.250274658203125, 0.39178466796875, 0.533294677734375, 0.6748046875, 0.816314697265625, 0.95782470703125, 1.099334716796875, 1.2408447265625, 1.382354736328125, 1.52386474609375, 1.665374755859375, 1.806884765625, 1.948394775390625, 2.08990478515625, 2.231414794921875, 2.3729248046875, 2.514434814453125, 2.65594482421875, 2.797454833984375, 2.93896484375, 3.080474853515625, 3.22198486328125, 3.363494873046875, 3.5050048828125, 3.646514892578125, 3.78802490234375, 3.929534912109375, 4.071044921875, 4.212554931640625, 4.35406494140625, 4.495574951171875, 4.6370849609375, 4.778594970703125, 4.92010498046875, 5.061614990234375, 5.203125]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 6.0, 8.0, 7.0, 9.0, 11.0, 20.0, 24.0, 26.0, 24.0, 46.0, 45.0, 50.0, 50.0, 45.0, 53.0, 72.0, 57.0, 62.0, 49.0, 64.0, 55.0, 45.0, 46.0, 21.0, 27.0, 20.0, 14.0, 12.0, 10.0, 4.0, 5.0, 7.0, 5.0, 0.0, 2.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9345703125, -1.8573455810546875, -1.780120849609375, -1.7028961181640625, -1.62567138671875, -1.5484466552734375, -1.471221923828125, -1.3939971923828125, -1.3167724609375, -1.2395477294921875, -1.162322998046875, -1.0850982666015625, -1.00787353515625, -0.9306488037109375, -0.853424072265625, -0.7761993408203125, -0.698974609375, -0.6217498779296875, -0.544525146484375, -0.4673004150390625, -0.39007568359375, -0.3128509521484375, -0.235626220703125, -0.1584014892578125, -0.0811767578125, -0.0039520263671875, 0.073272705078125, 0.1504974365234375, 0.22772216796875, 0.3049468994140625, 0.382171630859375, 0.4593963623046875, 0.53662109375, 0.6138458251953125, 0.691070556640625, 0.7682952880859375, 0.84552001953125, 0.9227447509765625, 0.999969482421875, 1.0771942138671875, 1.1544189453125, 1.2316436767578125, 1.308868408203125, 1.3860931396484375, 1.46331787109375, 1.5405426025390625, 1.617767333984375, 1.6949920654296875, 1.772216796875, 1.8494415283203125, 1.926666259765625, 2.0038909912109375, 2.08111572265625, 2.1583404541015625, 2.235565185546875, 2.3127899169921875, 2.3900146484375, 2.4672393798828125, 2.544464111328125, 2.6216888427734375, 2.69891357421875, 2.7761383056640625, 2.853363037109375, 2.9305877685546875, 3.0078125]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 0.0, 1.0, 4.0, 5.0, 1.0, 7.0, 9.0, 7.0, 15.0, 16.0, 20.0, 24.0, 38.0, 45.0, 73.0, 79.0, 135.0, 224.0, 319.0, 652.0, 1442.0, 4415.0, 17510.0, 114926.0, 701696.0, 174001.0, 23632.0, 5616.0, 1765.0, 777.0, 376.0, 216.0, 163.0, 96.0, 61.0, 37.0, 38.0, 28.0, 21.0, 18.0, 16.0, 6.0, 5.0, 6.0, 6.0, 6.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-5.76953125, -5.5809326171875, -5.392333984375, -5.2037353515625, -5.01513671875, -4.8265380859375, -4.637939453125, -4.4493408203125, -4.2607421875, -4.0721435546875, -3.883544921875, -3.6949462890625, -3.50634765625, -3.3177490234375, -3.129150390625, -2.9405517578125, -2.751953125, -2.5633544921875, -2.374755859375, -2.1861572265625, -1.99755859375, -1.8089599609375, -1.620361328125, -1.4317626953125, -1.2431640625, -1.0545654296875, -0.865966796875, -0.6773681640625, -0.48876953125, -0.3001708984375, -0.111572265625, 0.0770263671875, 0.265625, 0.4542236328125, 0.642822265625, 0.8314208984375, 1.02001953125, 1.2086181640625, 1.397216796875, 1.5858154296875, 1.7744140625, 1.9630126953125, 2.151611328125, 2.3402099609375, 2.52880859375, 2.7174072265625, 2.906005859375, 3.0946044921875, 3.283203125, 3.4718017578125, 3.660400390625, 3.8489990234375, 4.03759765625, 4.2261962890625, 4.414794921875, 4.6033935546875, 4.7919921875, 4.9805908203125, 5.169189453125, 5.3577880859375, 5.54638671875, 5.7349853515625, 5.923583984375, 6.1121826171875, 6.30078125]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 4.0, 2.0, 20.0, 6.0, 12.0, 21.0, 23.0, 22.0, 33.0, 47.0, 74.0, 55.0, 59.0, 66.0, 59.0, 72.0, 65.0, 76.0, 60.0, 51.0, 39.0, 28.0, 27.0, 26.0, 17.0, 13.0, 15.0, 4.0, 5.0, 6.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.4375, -9.030517578125, -8.62353515625, -8.216552734375, -7.8095703125, -7.402587890625, -6.99560546875, -6.588623046875, -6.181640625, -5.774658203125, -5.36767578125, -4.960693359375, -4.5537109375, -4.146728515625, -3.73974609375, -3.332763671875, -2.92578125, -2.518798828125, -2.11181640625, -1.704833984375, -1.2978515625, -0.890869140625, -0.48388671875, -0.076904296875, 0.330078125, 0.737060546875, 1.14404296875, 1.551025390625, 1.9580078125, 2.364990234375, 2.77197265625, 3.178955078125, 3.5859375, 3.992919921875, 4.39990234375, 4.806884765625, 5.2138671875, 5.620849609375, 6.02783203125, 6.434814453125, 6.841796875, 7.248779296875, 7.65576171875, 8.062744140625, 8.4697265625, 8.876708984375, 9.28369140625, 9.690673828125, 10.09765625, 10.504638671875, 10.91162109375, 11.318603515625, 11.7255859375, 12.132568359375, 12.53955078125, 12.946533203125, 13.353515625, 13.760498046875, 14.16748046875, 14.574462890625, 14.9814453125, 15.388427734375, 15.79541015625, 16.202392578125, 16.609375]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 16.0, 9.0, 18.0, 22.0, 34.0, 50.0, 101.0, 253.0, 667.0, 2947.0, 24846.0, 802121.0, 205356.0, 9723.0, 1574.0, 442.0, 172.0, 75.0, 43.0, 18.0, 21.0, 12.0, 9.0, 5.0, 4.0, 4.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.818359375, -3.675933837890625, -3.53350830078125, -3.391082763671875, -3.2486572265625, -3.106231689453125, -2.96380615234375, -2.821380615234375, -2.678955078125, -2.536529541015625, -2.39410400390625, -2.251678466796875, -2.1092529296875, -1.966827392578125, -1.82440185546875, -1.681976318359375, -1.53955078125, -1.397125244140625, -1.25469970703125, -1.112274169921875, -0.9698486328125, -0.827423095703125, -0.68499755859375, -0.542572021484375, -0.400146484375, -0.257720947265625, -0.11529541015625, 0.027130126953125, 0.1695556640625, 0.311981201171875, 0.45440673828125, 0.596832275390625, 0.7392578125, 0.881683349609375, 1.02410888671875, 1.166534423828125, 1.3089599609375, 1.451385498046875, 1.59381103515625, 1.736236572265625, 1.878662109375, 2.021087646484375, 2.16351318359375, 2.305938720703125, 2.4483642578125, 2.590789794921875, 2.73321533203125, 2.875640869140625, 3.01806640625, 3.160491943359375, 3.30291748046875, 3.445343017578125, 3.5877685546875, 3.730194091796875, 3.87261962890625, 4.015045166015625, 4.157470703125, 4.299896240234375, 4.44232177734375, 4.584747314453125, 4.7271728515625, 4.869598388671875, 5.01202392578125, 5.154449462890625, 5.296875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 0.0, 6.0, 5.0, 9.0, 11.0, 19.0, 29.0, 41.0, 56.0, 100.0, 124.0, 163.0, 127.0, 101.0, 68.0, 42.0, 44.0, 24.0, 7.0, 7.0, 4.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006742477416992188, -0.0006572604179382324, -0.0006402730941772461, -0.0006232857704162598, -0.0006062984466552734, -0.0005893111228942871, -0.0005723237991333008, -0.0005553364753723145, -0.0005383491516113281, -0.0005213618278503418, -0.0005043745040893555, -0.00048738718032836914, -0.0004703998565673828, -0.0004534125328063965, -0.00043642520904541016, -0.00041943788528442383, -0.0004024505615234375, -0.00038546323776245117, -0.00036847591400146484, -0.0003514885902404785, -0.0003345012664794922, -0.00031751394271850586, -0.00030052661895751953, -0.0002835392951965332, -0.0002665519714355469, -0.00024956464767456055, -0.00023257732391357422, -0.0002155900001525879, -0.00019860267639160156, -0.00018161535263061523, -0.0001646280288696289, -0.00014764070510864258, -0.00013065338134765625, -0.00011366605758666992, -9.66787338256836e-05, -7.969141006469727e-05, -6.270408630371094e-05, -4.571676254272461e-05, -2.872943878173828e-05, -1.1742115020751953e-05, 5.245208740234375e-06, 2.2232532501220703e-05, 3.921985626220703e-05, 5.620718002319336e-05, 7.319450378417969e-05, 9.018182754516602e-05, 0.00010716915130615234, 0.00012415647506713867, 0.000141143798828125, 0.00015813112258911133, 0.00017511844635009766, 0.00019210577011108398, 0.0002090930938720703, 0.00022608041763305664, 0.00024306774139404297, 0.0002600550651550293, 0.0002770423889160156, 0.00029402971267700195, 0.0003110170364379883, 0.0003280043601989746, 0.00034499168395996094, 0.00036197900772094727, 0.0003789663314819336, 0.0003959536552429199, 0.00041294097900390625]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 8.0, 2.0, 4.0, 6.0, 9.0, 18.0, 29.0, 26.0, 69.0, 125.0, 217.0, 454.0, 1206.0, 4797.0, 31024.0, 614313.0, 368529.0, 21971.0, 3851.0, 1053.0, 368.0, 215.0, 101.0, 60.0, 45.0, 22.0, 19.0, 12.0, 4.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.525390625, -2.410186767578125, -2.29498291015625, -2.179779052734375, -2.0645751953125, -1.949371337890625, -1.83416748046875, -1.718963623046875, -1.603759765625, -1.488555908203125, -1.37335205078125, -1.258148193359375, -1.1429443359375, -1.027740478515625, -0.91253662109375, -0.797332763671875, -0.68212890625, -0.566925048828125, -0.45172119140625, -0.336517333984375, -0.2213134765625, -0.106109619140625, 0.00909423828125, 0.124298095703125, 0.239501953125, 0.354705810546875, 0.46990966796875, 0.585113525390625, 0.7003173828125, 0.815521240234375, 0.93072509765625, 1.045928955078125, 1.1611328125, 1.276336669921875, 1.39154052734375, 1.506744384765625, 1.6219482421875, 1.737152099609375, 1.85235595703125, 1.967559814453125, 2.082763671875, 2.197967529296875, 2.31317138671875, 2.428375244140625, 2.5435791015625, 2.658782958984375, 2.77398681640625, 2.889190673828125, 3.00439453125, 3.119598388671875, 3.23480224609375, 3.350006103515625, 3.4652099609375, 3.580413818359375, 3.69561767578125, 3.810821533203125, 3.926025390625, 4.041229248046875, 4.15643310546875, 4.271636962890625, 4.3868408203125, 4.502044677734375, 4.61724853515625, 4.732452392578125, 4.84765625]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 5.0, 3.0, 2.0, 3.0, 7.0, 12.0, 17.0, 22.0, 25.0, 39.0, 51.0, 66.0, 117.0, 143.0, 141.0, 101.0, 74.0, 52.0, 37.0, 26.0, 20.0, 12.0, 4.0, 7.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.8984375, -4.778839111328125, -4.65924072265625, -4.539642333984375, -4.4200439453125, -4.300445556640625, -4.18084716796875, -4.061248779296875, -3.941650390625, -3.822052001953125, -3.70245361328125, -3.582855224609375, -3.4632568359375, -3.343658447265625, -3.22406005859375, -3.104461669921875, -2.98486328125, -2.865264892578125, -2.74566650390625, -2.626068115234375, -2.5064697265625, -2.386871337890625, -2.26727294921875, -2.147674560546875, -2.028076171875, -1.908477783203125, -1.78887939453125, -1.669281005859375, -1.5496826171875, -1.430084228515625, -1.31048583984375, -1.190887451171875, -1.0712890625, -0.951690673828125, -0.83209228515625, -0.712493896484375, -0.5928955078125, -0.473297119140625, -0.35369873046875, -0.234100341796875, -0.114501953125, 0.005096435546875, 0.12469482421875, 0.244293212890625, 0.3638916015625, 0.483489990234375, 0.60308837890625, 0.722686767578125, 0.84228515625, 0.961883544921875, 1.08148193359375, 1.201080322265625, 1.3206787109375, 1.440277099609375, 1.55987548828125, 1.679473876953125, 1.799072265625, 1.918670654296875, 2.03826904296875, 2.157867431640625, 2.2774658203125, 2.397064208984375, 2.51666259765625, 2.636260986328125, 2.755859375]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 5.0, 11.0, 21.0, 38.0, 85.0, 151.0, 164.0, 199.0, 146.0, 88.0, 50.0, 29.0, 13.0, 4.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-82.5912094116211, -80.71609497070312, -78.84097290039062, -76.96585845947266, -75.09074401855469, -73.21562957763672, -71.34051513671875, -69.46539306640625, -67.59027862548828, -65.71516418457031, -63.84004592895508, -61.964927673339844, -60.089813232421875, -58.214698791503906, -56.33958053588867, -54.46446228027344, -52.58934783935547, -50.7142333984375, -48.839115142822266, -46.96399688720703, -45.08888244628906, -43.213768005371094, -41.33864974975586, -39.463531494140625, -37.588417053222656, -35.71330261230469, -33.83818435668945, -31.96306800842285, -30.08795166015625, -28.21283531188965, -26.337718963623047, -24.462602615356445, -22.587486267089844, -20.712369918823242, -18.83725357055664, -16.96213722229004, -15.087020874023438, -13.211904525756836, -11.336788177490234, -9.461671829223633, -7.586555480957031, -5.71143913269043, -3.836322784423828, -1.9612064361572266, -0.086090087890625, 1.7890262603759766, 3.664142608642578, 5.53925895690918, 7.414375305175781, 9.289491653442383, 11.164608001708984, 13.039724349975586, 14.914840698242188, 16.78995704650879, 18.66507339477539, 20.540189743041992, 22.415306091308594, 24.290422439575195, 26.165538787841797, 28.0406551361084, 29.915771484375, 31.7908878326416, 33.6660041809082, 35.54112243652344, 37.416236877441406]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 7.0, 7.0, 7.0, 10.0, 22.0, 15.0, 26.0, 19.0, 24.0, 34.0, 37.0, 48.0, 55.0, 66.0, 57.0, 63.0, 60.0, 78.0, 51.0, 58.0, 39.0, 36.0, 34.0, 33.0, 27.0, 22.0, 9.0, 20.0, 9.0, 10.0, 3.0, 5.0, 7.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-47.1971435546875, -45.620635986328125, -44.044124603271484, -42.46761703491211, -40.89110565185547, -39.314598083496094, -37.73809051513672, -36.16157913208008, -34.58506774902344, -33.00856018066406, -31.432048797607422, -29.855541229248047, -28.279029846191406, -26.70252227783203, -25.126012802124023, -23.549503326416016, -21.97299575805664, -20.396486282348633, -18.819976806640625, -17.24346923828125, -15.666958808898926, -14.090449333190918, -12.513940811157227, -10.937431335449219, -9.360921859741211, -7.784412384033203, -6.2079033851623535, -4.631394386291504, -3.054884910583496, -1.4783754348754883, 0.09813308715820312, 1.674642562866211, 3.2511558532714844, 4.827665328979492, 6.404174327850342, 7.980683326721191, 9.5571928024292, 11.133702278137207, 12.710210800170898, 14.286720275878906, 15.863229751586914, 17.439739227294922, 19.01624870300293, 20.592758178710938, 22.169265747070312, 23.745777130126953, 25.322284698486328, 26.898794174194336, 28.475303649902344, 30.05181312561035, 31.62832260131836, 33.204830169677734, 34.781341552734375, 36.35784912109375, 37.934356689453125, 39.510868072509766, 41.087379455566406, 42.66388702392578, 44.24039840698242, 45.8169059753418, 47.39341735839844, 48.96992492675781, 50.54643249511719, 52.12294387817383, 53.6994514465332]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 5.0, 11.0, 6.0, 10.0, 10.0, 15.0, 23.0, 35.0, 62.0, 85.0, 132.0, 260.0, 428.0, 800.0, 1927.0, 5794.0, 40614.0, 4111647.0, 24615.0, 4439.0, 1568.0, 760.0, 389.0, 213.0, 141.0, 100.0, 50.0, 41.0, 31.0, 20.0, 21.0, 7.0, 6.0, 9.0, 2.0, 4.0, 4.0, 6.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.77734375, -5.573974609375, -5.37060546875, -5.167236328125, -4.9638671875, -4.760498046875, -4.55712890625, -4.353759765625, -4.150390625, -3.947021484375, -3.74365234375, -3.540283203125, -3.3369140625, -3.133544921875, -2.93017578125, -2.726806640625, -2.5234375, -2.320068359375, -2.11669921875, -1.913330078125, -1.7099609375, -1.506591796875, -1.30322265625, -1.099853515625, -0.896484375, -0.693115234375, -0.48974609375, -0.286376953125, -0.0830078125, 0.120361328125, 0.32373046875, 0.527099609375, 0.73046875, 0.933837890625, 1.13720703125, 1.340576171875, 1.5439453125, 1.747314453125, 1.95068359375, 2.154052734375, 2.357421875, 2.560791015625, 2.76416015625, 2.967529296875, 3.1708984375, 3.374267578125, 3.57763671875, 3.781005859375, 3.984375, 4.187744140625, 4.39111328125, 4.594482421875, 4.7978515625, 5.001220703125, 5.20458984375, 5.407958984375, 5.611328125, 5.814697265625, 6.01806640625, 6.221435546875, 6.4248046875, 6.628173828125, 6.83154296875, 7.034912109375, 7.23828125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 6.0, 3.0, 14.0, 12.0, 17.0, 24.0, 36.0, 47.0, 65.0, 72.0, 71.0, 96.0, 101.0, 89.0, 75.0, 76.0, 63.0, 36.0, 35.0, 20.0, 19.0, 12.0, 4.0, 3.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.060546875, -2.944488525390625, -2.82843017578125, -2.712371826171875, -2.5963134765625, -2.480255126953125, -2.36419677734375, -2.248138427734375, -2.132080078125, -2.016021728515625, -1.89996337890625, -1.783905029296875, -1.6678466796875, -1.551788330078125, -1.43572998046875, -1.319671630859375, -1.20361328125, -1.087554931640625, -0.97149658203125, -0.855438232421875, -0.7393798828125, -0.623321533203125, -0.50726318359375, -0.391204833984375, -0.275146484375, -0.159088134765625, -0.04302978515625, 0.073028564453125, 0.1890869140625, 0.305145263671875, 0.42120361328125, 0.537261962890625, 0.6533203125, 0.769378662109375, 0.88543701171875, 1.001495361328125, 1.1175537109375, 1.233612060546875, 1.34967041015625, 1.465728759765625, 1.581787109375, 1.697845458984375, 1.81390380859375, 1.929962158203125, 2.0460205078125, 2.162078857421875, 2.27813720703125, 2.394195556640625, 2.51025390625, 2.626312255859375, 2.74237060546875, 2.858428955078125, 2.9744873046875, 3.090545654296875, 3.20660400390625, 3.322662353515625, 3.438720703125, 3.554779052734375, 3.67083740234375, 3.786895751953125, 3.9029541015625, 4.019012451171875, 4.13507080078125, 4.251129150390625, 4.3671875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 5.0, 3.0, 2.0, 3.0, 10.0, 17.0, 24.0, 19.0, 30.0, 41.0, 49.0, 66.0, 82.0, 85.0, 126.0, 165.0, 205.0, 315.0, 463.0, 766.0, 1464.0, 3235.0, 10254.0, 80873.0, 4042592.0, 40109.0, 7159.0, 2632.0, 1196.0, 668.0, 423.0, 286.0, 227.0, 155.0, 111.0, 97.0, 64.0, 53.0, 52.0, 33.0, 34.0, 21.0, 16.0, 15.0, 13.0, 9.0, 8.0, 3.0, 3.0, 5.0, 3.0, 2.0, 1.0, 0.0, 4.0], "bins": [-6.09375, -5.9111328125, -5.728515625, -5.5458984375, -5.36328125, -5.1806640625, -4.998046875, -4.8154296875, -4.6328125, -4.4501953125, -4.267578125, -4.0849609375, -3.90234375, -3.7197265625, -3.537109375, -3.3544921875, -3.171875, -2.9892578125, -2.806640625, -2.6240234375, -2.44140625, -2.2587890625, -2.076171875, -1.8935546875, -1.7109375, -1.5283203125, -1.345703125, -1.1630859375, -0.98046875, -0.7978515625, -0.615234375, -0.4326171875, -0.25, -0.0673828125, 0.115234375, 0.2978515625, 0.48046875, 0.6630859375, 0.845703125, 1.0283203125, 1.2109375, 1.3935546875, 1.576171875, 1.7587890625, 1.94140625, 2.1240234375, 2.306640625, 2.4892578125, 2.671875, 2.8544921875, 3.037109375, 3.2197265625, 3.40234375, 3.5849609375, 3.767578125, 3.9501953125, 4.1328125, 4.3154296875, 4.498046875, 4.6806640625, 4.86328125, 5.0458984375, 5.228515625, 5.4111328125, 5.59375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 9.0, 4.0, 7.0, 13.0, 52.0, 246.0, 3575.0, 106.0, 37.0, 19.0, 8.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.505859375, -3.354888916015625, -3.20391845703125, -3.052947998046875, -2.9019775390625, -2.751007080078125, -2.60003662109375, -2.449066162109375, -2.298095703125, -2.147125244140625, -1.99615478515625, -1.845184326171875, -1.6942138671875, -1.543243408203125, -1.39227294921875, -1.241302490234375, -1.09033203125, -0.939361572265625, -0.78839111328125, -0.637420654296875, -0.4864501953125, -0.335479736328125, -0.18450927734375, -0.033538818359375, 0.117431640625, 0.268402099609375, 0.41937255859375, 0.570343017578125, 0.7213134765625, 0.872283935546875, 1.02325439453125, 1.174224853515625, 1.3251953125, 1.476165771484375, 1.62713623046875, 1.778106689453125, 1.9290771484375, 2.080047607421875, 2.23101806640625, 2.381988525390625, 2.532958984375, 2.683929443359375, 2.83489990234375, 2.985870361328125, 3.1368408203125, 3.287811279296875, 3.43878173828125, 3.589752197265625, 3.74072265625, 3.891693115234375, 4.04266357421875, 4.193634033203125, 4.3446044921875, 4.495574951171875, 4.64654541015625, 4.797515869140625, 4.948486328125, 5.099456787109375, 5.25042724609375, 5.401397705078125, 5.5523681640625, 5.703338623046875, 5.85430908203125, 6.005279541015625, 6.15625]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 5.0, 13.0, 13.0, 36.0, 73.0, 121.0, 215.0, 215.0, 156.0, 74.0, 37.0, 16.0, 17.0, 8.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.207664489746094, -11.758191108703613, -11.308717727661133, -10.859244346618652, -10.409770965576172, -9.960297584533691, -9.510824203491211, -9.06135082244873, -8.61187744140625, -8.16240406036377, -7.712930679321289, -7.263457298278809, -6.813983917236328, -6.364510536193848, -5.915037155151367, -5.465563774108887, -5.0160908699035645, -4.566617488861084, -4.1171441078186035, -3.667670726776123, -3.2181973457336426, -2.768724203109741, -2.3192508220672607, -1.8697774410247803, -1.4203040599822998, -0.9708306789398193, -0.5213573575019836, -0.07188403606414795, 0.3775893449783325, 0.8270626068115234, 1.276535987854004, 1.7260093688964844, 2.175482749938965, 2.6249561309814453, 3.074429512023926, 3.5239028930664062, 3.9733762741088867, 4.422849655151367, 4.872323036193848, 5.321796417236328, 5.771269798278809, 6.220743179321289, 6.6702165603637695, 7.11968994140625, 7.5691633224487305, 8.018636703491211, 8.468110084533691, 8.917583465576172, 9.367055892944336, 9.816529273986816, 10.266002655029297, 10.715476036071777, 11.164949417114258, 11.614422798156738, 12.063896179199219, 12.5133695602417, 12.96284294128418, 13.41231632232666, 13.86178970336914, 14.311263084411621, 14.760736465454102, 15.210209846496582, 15.659683227539062, 16.109155654907227, 16.558629989624023]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 5.0, 3.0, 9.0, 8.0, 11.0, 8.0, 15.0, 19.0, 20.0, 27.0, 30.0, 31.0, 35.0, 44.0, 48.0, 39.0, 52.0, 43.0, 63.0, 45.0, 48.0, 53.0, 39.0, 47.0, 39.0, 34.0, 30.0, 25.0, 29.0, 21.0, 12.0, 20.0, 12.0, 7.0, 12.0, 3.0, 3.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.155193328857422, -5.9484992027282715, -5.741805076599121, -5.535111427307129, -5.3284173011779785, -5.121723175048828, -4.915029048919678, -4.708334922790527, -4.501641273498535, -4.294947147369385, -4.088253021240234, -3.881559133529663, -3.674865245819092, -3.4681711196899414, -3.261476993560791, -3.0547828674316406, -2.8480887413024902, -2.64139461517334, -2.4347007274627686, -2.228006601333618, -2.021312713623047, -1.8146185874938965, -1.607924461364746, -1.4012304544448853, -1.1945364475250244, -0.9878424406051636, -0.781148374080658, -0.5744543075561523, -0.3677603006362915, -0.16106629371643066, 0.04562783241271973, 0.25232183933258057, 0.4590153694152832, 0.665709376335144, 0.8724034428596497, 1.0790975093841553, 1.2857915163040161, 1.492485523223877, 1.6991796493530273, 1.9058736562728882, 2.112567663192749, 2.3192617893218994, 2.5259556770324707, 2.732649803161621, 2.9393439292907715, 3.1460378170013428, 3.352731943130493, 3.5594258308410645, 3.766119956970215, 3.9728140830993652, 4.179508209228516, 4.386201858520508, 4.592895984649658, 4.799590110778809, 5.006284236907959, 5.212978363037109, 5.419672012329102, 5.626366138458252, 5.833060264587402, 6.0397539138793945, 6.246448040008545, 6.453142166137695, 6.659836292266846, 6.866530418395996, 7.0732245445251465]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 4.0, 4.0, 6.0, 7.0, 8.0, 21.0, 30.0, 48.0, 79.0, 134.0, 252.0, 529.0, 1181.0, 3134.0, 10047.0, 41534.0, 242574.0, 581003.0, 131979.0, 25164.0, 6825.0, 2220.0, 889.0, 382.0, 204.0, 125.0, 61.0, 38.0, 24.0, 17.0, 11.0, 5.0, 3.0, 4.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.65234375, -5.4827880859375, -5.313232421875, -5.1436767578125, -4.97412109375, -4.8045654296875, -4.635009765625, -4.4654541015625, -4.2958984375, -4.1263427734375, -3.956787109375, -3.7872314453125, -3.61767578125, -3.4481201171875, -3.278564453125, -3.1090087890625, -2.939453125, -2.7698974609375, -2.600341796875, -2.4307861328125, -2.26123046875, -2.0916748046875, -1.922119140625, -1.7525634765625, -1.5830078125, -1.4134521484375, -1.243896484375, -1.0743408203125, -0.90478515625, -0.7352294921875, -0.565673828125, -0.3961181640625, -0.2265625, -0.0570068359375, 0.112548828125, 0.2821044921875, 0.45166015625, 0.6212158203125, 0.790771484375, 0.9603271484375, 1.1298828125, 1.2994384765625, 1.468994140625, 1.6385498046875, 1.80810546875, 1.9776611328125, 2.147216796875, 2.3167724609375, 2.486328125, 2.6558837890625, 2.825439453125, 2.9949951171875, 3.16455078125, 3.3341064453125, 3.503662109375, 3.6732177734375, 3.8427734375, 4.0123291015625, 4.181884765625, 4.3514404296875, 4.52099609375, 4.6905517578125, 4.860107421875, 5.0296630859375, 5.19921875]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 6.0, 7.0, 9.0, 8.0, 18.0, 16.0, 32.0, 31.0, 36.0, 43.0, 51.0, 68.0, 65.0, 84.0, 86.0, 85.0, 69.0, 64.0, 60.0, 43.0, 35.0, 24.0, 17.0, 11.0, 12.0, 10.0, 6.0, 6.0, 1.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.076171875, -2.974395751953125, -2.87261962890625, -2.770843505859375, -2.6690673828125, -2.567291259765625, -2.46551513671875, -2.363739013671875, -2.261962890625, -2.160186767578125, -2.05841064453125, -1.956634521484375, -1.8548583984375, -1.753082275390625, -1.65130615234375, -1.549530029296875, -1.44775390625, -1.345977783203125, -1.24420166015625, -1.142425537109375, -1.0406494140625, -0.938873291015625, -0.83709716796875, -0.735321044921875, -0.633544921875, -0.531768798828125, -0.42999267578125, -0.328216552734375, -0.2264404296875, -0.124664306640625, -0.02288818359375, 0.078887939453125, 0.1806640625, 0.282440185546875, 0.38421630859375, 0.485992431640625, 0.5877685546875, 0.689544677734375, 0.79132080078125, 0.893096923828125, 0.994873046875, 1.096649169921875, 1.19842529296875, 1.300201416015625, 1.4019775390625, 1.503753662109375, 1.60552978515625, 1.707305908203125, 1.80908203125, 1.910858154296875, 2.01263427734375, 2.114410400390625, 2.2161865234375, 2.317962646484375, 2.41973876953125, 2.521514892578125, 2.623291015625, 2.725067138671875, 2.82684326171875, 2.928619384765625, 3.0303955078125, 3.132171630859375, 3.23394775390625, 3.335723876953125, 3.4375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 9.0, 16.0, 19.0, 36.0, 50.0, 111.0, 203.0, 389.0, 1108.0, 4702.0, 41341.0, 817499.0, 169235.0, 10569.0, 2062.0, 612.0, 259.0, 137.0, 76.0, 44.0, 29.0, 15.0, 16.0, 3.0, 8.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-13.5078125, -13.19158935546875, -12.8753662109375, -12.55914306640625, -12.242919921875, -11.92669677734375, -11.6104736328125, -11.29425048828125, -10.97802734375, -10.66180419921875, -10.3455810546875, -10.02935791015625, -9.713134765625, -9.39691162109375, -9.0806884765625, -8.76446533203125, -8.4482421875, -8.13201904296875, -7.8157958984375, -7.49957275390625, -7.183349609375, -6.86712646484375, -6.5509033203125, -6.23468017578125, -5.91845703125, -5.60223388671875, -5.2860107421875, -4.96978759765625, -4.653564453125, -4.33734130859375, -4.0211181640625, -3.70489501953125, -3.388671875, -3.07244873046875, -2.7562255859375, -2.44000244140625, -2.123779296875, -1.80755615234375, -1.4913330078125, -1.17510986328125, -0.85888671875, -0.54266357421875, -0.2264404296875, 0.08978271484375, 0.406005859375, 0.72222900390625, 1.0384521484375, 1.35467529296875, 1.6708984375, 1.98712158203125, 2.3033447265625, 2.61956787109375, 2.935791015625, 3.25201416015625, 3.5682373046875, 3.88446044921875, 4.20068359375, 4.51690673828125, 4.8331298828125, 5.14935302734375, 5.465576171875, 5.78179931640625, 6.0980224609375, 6.41424560546875, 6.73046875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 4.0, 3.0, 9.0, 14.0, 12.0, 14.0, 22.0, 25.0, 35.0, 35.0, 29.0, 64.0, 54.0, 68.0, 59.0, 62.0, 72.0, 55.0, 66.0, 60.0, 33.0, 34.0, 36.0, 34.0, 33.0, 19.0, 28.0, 5.0, 6.0, 5.0, 6.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.9453125, -11.56396484375, -11.1826171875, -10.80126953125, -10.419921875, -10.03857421875, -9.6572265625, -9.27587890625, -8.89453125, -8.51318359375, -8.1318359375, -7.75048828125, -7.369140625, -6.98779296875, -6.6064453125, -6.22509765625, -5.84375, -5.46240234375, -5.0810546875, -4.69970703125, -4.318359375, -3.93701171875, -3.5556640625, -3.17431640625, -2.79296875, -2.41162109375, -2.0302734375, -1.64892578125, -1.267578125, -0.88623046875, -0.5048828125, -0.12353515625, 0.2578125, 0.63916015625, 1.0205078125, 1.40185546875, 1.783203125, 2.16455078125, 2.5458984375, 2.92724609375, 3.30859375, 3.68994140625, 4.0712890625, 4.45263671875, 4.833984375, 5.21533203125, 5.5966796875, 5.97802734375, 6.359375, 6.74072265625, 7.1220703125, 7.50341796875, 7.884765625, 8.26611328125, 8.6474609375, 9.02880859375, 9.41015625, 9.79150390625, 10.1728515625, 10.55419921875, 10.935546875, 11.31689453125, 11.6982421875, 12.07958984375, 12.4609375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 2.0, 3.0, 4.0, 10.0, 11.0, 24.0, 37.0, 60.0, 129.0, 232.0, 532.0, 1266.0, 5040.0, 39378.0, 772653.0, 211647.0, 13464.0, 2537.0, 788.0, 365.0, 160.0, 81.0, 51.0, 27.0, 17.0, 13.0, 6.0, 7.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.849609375, -2.765228271484375, -2.68084716796875, -2.596466064453125, -2.5120849609375, -2.427703857421875, -2.34332275390625, -2.258941650390625, -2.174560546875, -2.090179443359375, -2.00579833984375, -1.921417236328125, -1.8370361328125, -1.752655029296875, -1.66827392578125, -1.583892822265625, -1.49951171875, -1.415130615234375, -1.33074951171875, -1.246368408203125, -1.1619873046875, -1.077606201171875, -0.99322509765625, -0.908843994140625, -0.824462890625, -0.740081787109375, -0.65570068359375, -0.571319580078125, -0.4869384765625, -0.402557373046875, -0.31817626953125, -0.233795166015625, -0.1494140625, -0.065032958984375, 0.01934814453125, 0.103729248046875, 0.1881103515625, 0.272491455078125, 0.35687255859375, 0.441253662109375, 0.525634765625, 0.610015869140625, 0.69439697265625, 0.778778076171875, 0.8631591796875, 0.947540283203125, 1.03192138671875, 1.116302490234375, 1.20068359375, 1.285064697265625, 1.36944580078125, 1.453826904296875, 1.5382080078125, 1.622589111328125, 1.70697021484375, 1.791351318359375, 1.875732421875, 1.960113525390625, 2.04449462890625, 2.128875732421875, 2.2132568359375, 2.297637939453125, 2.38201904296875, 2.466400146484375, 2.55078125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 5.0, 2.0, 6.0, 4.0, 9.0, 10.0, 9.0, 22.0, 28.0, 39.0, 71.0, 73.0, 121.0, 124.0, 118.0, 112.0, 78.0, 44.0, 35.0, 32.0, 18.0, 12.0, 12.0, 8.0, 8.0, 5.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003428459167480469, -0.00033054500818252563, -0.0003182440996170044, -0.00030594319105148315, -0.0002936422824859619, -0.0002813413739204407, -0.00026904046535491943, -0.0002567395567893982, -0.00024443864822387695, -0.0002321377396583557, -0.00021983683109283447, -0.00020753592252731323, -0.000195235013961792, -0.00018293410539627075, -0.0001706331968307495, -0.00015833228826522827, -0.00014603137969970703, -0.0001337304711341858, -0.00012142956256866455, -0.00010912865400314331, -9.682774543762207e-05, -8.452683687210083e-05, -7.222592830657959e-05, -5.992501974105835e-05, -4.762411117553711e-05, -3.532320261001587e-05, -2.302229404449463e-05, -1.0721385478973389e-05, 1.5795230865478516e-06, 1.3880431652069092e-05, 2.6181340217590332e-05, 3.848224878311157e-05, 5.078315734863281e-05, 6.308406591415405e-05, 7.538497447967529e-05, 8.768588304519653e-05, 9.998679161071777e-05, 0.00011228770017623901, 0.00012458860874176025, 0.0001368895173072815, 0.00014919042587280273, 0.00016149133443832397, 0.00017379224300384521, 0.00018609315156936646, 0.0001983940601348877, 0.00021069496870040894, 0.00022299587726593018, 0.00023529678583145142, 0.00024759769439697266, 0.0002598986029624939, 0.00027219951152801514, 0.0002845004200935364, 0.0002968013286590576, 0.00030910223722457886, 0.0003214031457901001, 0.00033370405435562134, 0.0003460049629211426, 0.0003583058714866638, 0.00037060678005218506, 0.0003829076886177063, 0.00039520859718322754, 0.0004075095057487488, 0.00041981041431427, 0.00043211132287979126, 0.0004444122314453125]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 11.0, 11.0, 21.0, 31.0, 72.0, 120.0, 354.0, 1260.0, 6783.0, 115415.0, 880431.0, 38894.0, 3872.0, 816.0, 249.0, 104.0, 47.0, 20.0, 19.0, 18.0, 7.0, 3.0, 3.0, 1.0, 2.0, 1.0], "bins": [-5.12109375, -5.015167236328125, -4.90924072265625, -4.803314208984375, -4.6973876953125, -4.591461181640625, -4.48553466796875, -4.379608154296875, -4.273681640625, -4.167755126953125, -4.06182861328125, -3.955902099609375, -3.8499755859375, -3.744049072265625, -3.63812255859375, -3.532196044921875, -3.42626953125, -3.320343017578125, -3.21441650390625, -3.108489990234375, -3.0025634765625, -2.896636962890625, -2.79071044921875, -2.684783935546875, -2.578857421875, -2.472930908203125, -2.36700439453125, -2.261077880859375, -2.1551513671875, -2.049224853515625, -1.94329833984375, -1.837371826171875, -1.7314453125, -1.625518798828125, -1.51959228515625, -1.413665771484375, -1.3077392578125, -1.201812744140625, -1.09588623046875, -0.989959716796875, -0.884033203125, -0.778106689453125, -0.67218017578125, -0.566253662109375, -0.4603271484375, -0.354400634765625, -0.24847412109375, -0.142547607421875, -0.03662109375, 0.069305419921875, 0.17523193359375, 0.281158447265625, 0.3870849609375, 0.493011474609375, 0.59893798828125, 0.704864501953125, 0.810791015625, 0.916717529296875, 1.02264404296875, 1.128570556640625, 1.2344970703125, 1.340423583984375, 1.44635009765625, 1.552276611328125, 1.658203125]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 0.0, 2.0, 8.0, 5.0, 9.0, 8.0, 13.0, 24.0, 26.0, 33.0, 38.0, 60.0, 67.0, 84.0, 88.0, 87.0, 92.0, 73.0, 69.0, 56.0, 41.0, 37.0, 22.0, 17.0, 15.0, 9.0, 6.0, 5.0, 4.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.59375, -1.530914306640625, -1.46807861328125, -1.405242919921875, -1.3424072265625, -1.279571533203125, -1.21673583984375, -1.153900146484375, -1.091064453125, -1.028228759765625, -0.96539306640625, -0.902557373046875, -0.8397216796875, -0.776885986328125, -0.71405029296875, -0.651214599609375, -0.58837890625, -0.525543212890625, -0.46270751953125, -0.399871826171875, -0.3370361328125, -0.274200439453125, -0.21136474609375, -0.148529052734375, -0.085693359375, -0.022857666015625, 0.03997802734375, 0.102813720703125, 0.1656494140625, 0.228485107421875, 0.29132080078125, 0.354156494140625, 0.4169921875, 0.479827880859375, 0.54266357421875, 0.605499267578125, 0.6683349609375, 0.731170654296875, 0.79400634765625, 0.856842041015625, 0.919677734375, 0.982513427734375, 1.04534912109375, 1.108184814453125, 1.1710205078125, 1.233856201171875, 1.29669189453125, 1.359527587890625, 1.42236328125, 1.485198974609375, 1.54803466796875, 1.610870361328125, 1.6737060546875, 1.736541748046875, 1.79937744140625, 1.862213134765625, 1.925048828125, 1.987884521484375, 2.05072021484375, 2.113555908203125, 2.1763916015625, 2.239227294921875, 2.30206298828125, 2.364898681640625, 2.427734375]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 12.0, 33.0, 79.0, 143.0, 236.0, 235.0, 152.0, 75.0, 26.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.13565063476562, -69.8863525390625, -67.63704681396484, -65.38774108886719, -63.13844299316406, -60.88914108276367, -58.63983917236328, -56.39053726196289, -54.1412353515625, -51.89193344116211, -49.64263153076172, -47.39332962036133, -45.14402770996094, -42.89472579956055, -40.645423889160156, -38.396121978759766, -36.146820068359375, -33.897518157958984, -31.648216247558594, -29.398914337158203, -27.149612426757812, -24.900310516357422, -22.65100860595703, -20.40170669555664, -18.15240478515625, -15.90310287475586, -13.653800964355469, -11.404499053955078, -9.155197143554688, -6.905895233154297, -4.656593322753906, -2.4072914123535156, -0.157989501953125, 2.0913124084472656, 4.340614318847656, 6.589916229248047, 8.839218139648438, 11.088520050048828, 13.337821960449219, 15.58712387084961, 17.83642578125, 20.08572769165039, 22.33502960205078, 24.584331512451172, 26.833633422851562, 29.082935333251953, 31.332237243652344, 33.581539154052734, 35.830841064453125, 38.080142974853516, 40.329444885253906, 42.5787467956543, 44.82804870605469, 47.07735061645508, 49.32665252685547, 51.57595443725586, 53.82525634765625, 56.07455825805664, 58.32386016845703, 60.57316207885742, 62.82246398925781, 65.07176208496094, 67.3210678100586, 69.57037353515625, 71.81967163085938]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 5.0, 11.0, 4.0, 8.0, 4.0, 14.0, 17.0, 16.0, 37.0, 29.0, 39.0, 59.0, 49.0, 71.0, 75.0, 73.0, 74.0, 74.0, 54.0, 74.0, 63.0, 45.0, 40.0, 19.0, 14.0, 16.0, 11.0, 11.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-76.77688598632812, -74.897216796875, -73.01754760742188, -71.13787078857422, -69.2582015991211, -67.37853240966797, -65.49886322021484, -63.61919021606445, -61.73952102661133, -59.8598518371582, -57.98017883300781, -56.10050964355469, -54.2208366394043, -52.34116744995117, -50.46149444580078, -48.581825256347656, -46.70215606689453, -44.822486877441406, -42.942813873291016, -41.06314468383789, -39.1834716796875, -37.303802490234375, -35.42413330078125, -33.54446029663086, -31.66478729248047, -29.78511619567871, -27.905445098876953, -26.025775909423828, -24.14610481262207, -22.266433715820312, -20.386762619018555, -18.507091522216797, -16.627422332763672, -14.747751235961914, -12.868081092834473, -10.988409996032715, -9.108739852905273, -7.229068756103516, -5.349397659301758, -3.4697275161743164, -1.5900564193725586, 0.28961431980133057, 2.1692850589752197, 4.048955917358398, 5.928626537322998, 7.808297157287598, 9.687968254089355, 11.567638397216797, 13.447309494018555, 15.326980590820312, 17.20665168762207, 19.086322784423828, 20.965991973876953, 22.84566307067871, 24.72533416748047, 26.605003356933594, 28.484676361083984, 30.364347457885742, 32.2440185546875, 34.123687744140625, 36.003360748291016, 37.88302993774414, 39.76270294189453, 41.642372131347656, 43.52204132080078]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0, 3.0, 12.0, 18.0, 29.0, 30.0, 48.0, 69.0, 103.0, 162.0, 363.0, 812.0, 1973.0, 7133.0, 39583.0, 3259317.0, 848168.0, 27825.0, 5828.0, 1605.0, 568.0, 271.0, 151.0, 109.0, 38.0, 24.0, 14.0, 9.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.953125, -7.7677001953125, -7.582275390625, -7.3968505859375, -7.21142578125, -7.0260009765625, -6.840576171875, -6.6551513671875, -6.4697265625, -6.2843017578125, -6.098876953125, -5.9134521484375, -5.72802734375, -5.5426025390625, -5.357177734375, -5.1717529296875, -4.986328125, -4.8009033203125, -4.615478515625, -4.4300537109375, -4.24462890625, -4.0592041015625, -3.873779296875, -3.6883544921875, -3.5029296875, -3.3175048828125, -3.132080078125, -2.9466552734375, -2.76123046875, -2.5758056640625, -2.390380859375, -2.2049560546875, -2.01953125, -1.8341064453125, -1.648681640625, -1.4632568359375, -1.27783203125, -1.0924072265625, -0.906982421875, -0.7215576171875, -0.5361328125, -0.3507080078125, -0.165283203125, 0.0201416015625, 0.20556640625, 0.3909912109375, 0.576416015625, 0.7618408203125, 0.947265625, 1.1326904296875, 1.318115234375, 1.5035400390625, 1.68896484375, 1.8743896484375, 2.059814453125, 2.2452392578125, 2.4306640625, 2.6160888671875, 2.801513671875, 2.9869384765625, 3.17236328125, 3.3577880859375, 3.543212890625, 3.7286376953125, 3.9140625]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 9.0, 21.0, 21.0, 30.0, 57.0, 69.0, 83.0, 86.0, 127.0, 112.0, 108.0, 76.0, 71.0, 50.0, 31.0, 19.0, 18.0, 11.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.04296875, -5.911163330078125, -5.77935791015625, -5.647552490234375, -5.5157470703125, -5.383941650390625, -5.25213623046875, -5.120330810546875, -4.988525390625, -4.856719970703125, -4.72491455078125, -4.593109130859375, -4.4613037109375, -4.329498291015625, -4.19769287109375, -4.065887451171875, -3.93408203125, -3.802276611328125, -3.67047119140625, -3.538665771484375, -3.4068603515625, -3.275054931640625, -3.14324951171875, -3.011444091796875, -2.879638671875, -2.747833251953125, -2.61602783203125, -2.484222412109375, -2.3524169921875, -2.220611572265625, -2.08880615234375, -1.957000732421875, -1.8251953125, -1.693389892578125, -1.56158447265625, -1.429779052734375, -1.2979736328125, -1.166168212890625, -1.03436279296875, -0.902557373046875, -0.770751953125, -0.638946533203125, -0.50714111328125, -0.375335693359375, -0.2435302734375, -0.111724853515625, 0.02008056640625, 0.151885986328125, 0.28369140625, 0.415496826171875, 0.54730224609375, 0.679107666015625, 0.8109130859375, 0.942718505859375, 1.07452392578125, 1.206329345703125, 1.338134765625, 1.469940185546875, 1.60174560546875, 1.733551025390625, 1.8653564453125, 1.997161865234375, 2.12896728515625, 2.260772705078125, 2.392578125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 5.0, 19.0, 21.0, 53.0, 105.0, 210.0, 579.0, 4741.0, 3933995.0, 251482.0, 2419.0, 362.0, 154.0, 82.0, 26.0, 19.0, 8.0, 4.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.796875, -15.23046875, -14.6640625, -14.09765625, -13.53125, -12.96484375, -12.3984375, -11.83203125, -11.265625, -10.69921875, -10.1328125, -9.56640625, -9.0, -8.43359375, -7.8671875, -7.30078125, -6.734375, -6.16796875, -5.6015625, -5.03515625, -4.46875, -3.90234375, -3.3359375, -2.76953125, -2.203125, -1.63671875, -1.0703125, -0.50390625, 0.0625, 0.62890625, 1.1953125, 1.76171875, 2.328125, 2.89453125, 3.4609375, 4.02734375, 4.59375, 5.16015625, 5.7265625, 6.29296875, 6.859375, 7.42578125, 7.9921875, 8.55859375, 9.125, 9.69140625, 10.2578125, 10.82421875, 11.390625, 11.95703125, 12.5234375, 13.08984375, 13.65625, 14.22265625, 14.7890625, 15.35546875, 15.921875, 16.48828125, 17.0546875, 17.62109375, 18.1875, 18.75390625, 19.3203125, 19.88671875, 20.453125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 6.0, 6.0, 19.0, 30.0, 57.0, 206.0, 2012.0, 1442.0, 154.0, 84.0, 29.0, 15.0, 9.0, 5.0, 5.0, 4.0, 0.0, 1.0, 1.0], "bins": [-20.96875, -20.56494140625, -20.1611328125, -19.75732421875, -19.353515625, -18.94970703125, -18.5458984375, -18.14208984375, -17.73828125, -17.33447265625, -16.9306640625, -16.52685546875, -16.123046875, -15.71923828125, -15.3154296875, -14.91162109375, -14.5078125, -14.10400390625, -13.7001953125, -13.29638671875, -12.892578125, -12.48876953125, -12.0849609375, -11.68115234375, -11.27734375, -10.87353515625, -10.4697265625, -10.06591796875, -9.662109375, -9.25830078125, -8.8544921875, -8.45068359375, -8.046875, -7.64306640625, -7.2392578125, -6.83544921875, -6.431640625, -6.02783203125, -5.6240234375, -5.22021484375, -4.81640625, -4.41259765625, -4.0087890625, -3.60498046875, -3.201171875, -2.79736328125, -2.3935546875, -1.98974609375, -1.5859375, -1.18212890625, -0.7783203125, -0.37451171875, 0.029296875, 0.43310546875, 0.8369140625, 1.24072265625, 1.64453125, 2.04833984375, 2.4521484375, 2.85595703125, 3.259765625, 3.66357421875, 4.0673828125, 4.47119140625, 4.875]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 6.0, 14.0, 27.0, 88.0, 224.0, 344.0, 190.0, 76.0, 24.0, 5.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.2035140991211, -63.05573272705078, -60.90795135498047, -58.760169982910156, -56.612388610839844, -54.46460723876953, -52.31682205200195, -50.16904067993164, -48.02125930786133, -45.873477935791016, -43.7256965637207, -41.57791519165039, -39.43013000488281, -37.2823486328125, -35.13456726074219, -32.986785888671875, -30.839004516601562, -28.69122314453125, -26.543441772460938, -24.395658493041992, -22.24787712097168, -20.100095748901367, -17.952312469482422, -15.80453109741211, -13.656749725341797, -11.508968353271484, -9.361186027526855, -7.213404178619385, -5.065622329711914, -2.9178409576416016, -0.7700586318969727, 1.3777236938476562, 3.5254974365234375, 5.673279285430908, 7.821061134338379, 9.968843460083008, 12.11662483215332, 14.264406204223633, 16.412189483642578, 18.55997085571289, 20.707752227783203, 22.855533599853516, 25.003314971923828, 27.151098251342773, 29.298879623413086, 31.4466609954834, 33.594444274902344, 35.742225646972656, 37.89000701904297, 40.03778839111328, 42.185569763183594, 44.333351135253906, 46.48113250732422, 48.62891387939453, 50.77669906616211, 52.92448043823242, 55.072261810302734, 57.22004318237305, 59.36782455444336, 61.51560592651367, 63.66339111328125, 65.81117248535156, 67.95895385742188, 70.10673522949219, 72.2545166015625]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 2.0, 6.0, 11.0, 8.0, 13.0, 21.0, 25.0, 27.0, 41.0, 39.0, 63.0, 71.0, 71.0, 89.0, 78.0, 78.0, 66.0, 62.0, 48.0, 39.0, 37.0, 37.0, 19.0, 19.0, 17.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-37.344871520996094, -36.14968490600586, -34.954498291015625, -33.759315490722656, -32.56412887573242, -31.368942260742188, -30.173757553100586, -28.978572845458984, -27.78338623046875, -26.588199615478516, -25.393014907836914, -24.197830200195312, -23.002643585205078, -21.807456970214844, -20.612272262573242, -19.41708755493164, -18.221900939941406, -17.026714324951172, -15.83152961730957, -14.636343955993652, -13.441158294677734, -12.245972633361816, -11.050786972045898, -9.85560131072998, -8.660415649414062, -7.4652299880981445, -6.270044326782227, -5.074858665466309, -3.8796730041503906, -2.6844873428344727, -1.4893016815185547, -0.2941160202026367, 0.9010658264160156, 2.0962514877319336, 3.2914371490478516, 4.4866228103637695, 5.6818084716796875, 6.8769941329956055, 8.072179794311523, 9.267365455627441, 10.46255111694336, 11.657736778259277, 12.852922439575195, 14.048108100891113, 15.243293762207031, 16.438480377197266, 17.633665084838867, 18.82884979248047, 20.024036407470703, 21.219223022460938, 22.41440773010254, 23.60959243774414, 24.804779052734375, 25.99996566772461, 27.19515037536621, 28.390335083007812, 29.585521697998047, 30.78070831298828, 31.975893020629883, 33.171077728271484, 34.36626434326172, 35.56145095825195, 36.75663757324219, 37.951820373535156, 39.14700698852539]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 3.0, 4.0, 3.0, 1.0, 4.0, 1.0, 5.0, 11.0, 8.0, 23.0, 21.0, 32.0, 73.0, 102.0, 186.0, 309.0, 524.0, 1001.0, 1782.0, 3392.0, 6631.0, 13536.0, 28651.0, 61283.0, 134051.0, 254950.0, 268080.0, 145440.0, 67500.0, 30945.0, 14721.0, 7216.0, 3635.0, 1962.0, 1025.0, 605.0, 317.0, 198.0, 124.0, 64.0, 43.0, 32.0, 20.0, 9.0, 12.0, 8.0, 6.0, 1.0, 2.0, 3.0, 0.0, 5.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-1.9267578125, -1.8626556396484375, -1.798553466796875, -1.7344512939453125, -1.67034912109375, -1.6062469482421875, -1.542144775390625, -1.4780426025390625, -1.4139404296875, -1.3498382568359375, -1.285736083984375, -1.2216339111328125, -1.15753173828125, -1.0934295654296875, -1.029327392578125, -0.9652252197265625, -0.901123046875, -0.8370208740234375, -0.772918701171875, -0.7088165283203125, -0.64471435546875, -0.5806121826171875, -0.516510009765625, -0.4524078369140625, -0.3883056640625, -0.3242034912109375, -0.260101318359375, -0.1959991455078125, -0.13189697265625, -0.0677947998046875, -0.003692626953125, 0.0604095458984375, 0.12451171875, 0.1886138916015625, 0.252716064453125, 0.3168182373046875, 0.38092041015625, 0.4450225830078125, 0.509124755859375, 0.5732269287109375, 0.6373291015625, 0.7014312744140625, 0.765533447265625, 0.8296356201171875, 0.89373779296875, 0.9578399658203125, 1.021942138671875, 1.0860443115234375, 1.150146484375, 1.2142486572265625, 1.278350830078125, 1.3424530029296875, 1.40655517578125, 1.4706573486328125, 1.534759521484375, 1.5988616943359375, 1.6629638671875, 1.7270660400390625, 1.791168212890625, 1.8552703857421875, 1.91937255859375, 1.9834747314453125, 2.047576904296875, 2.1116790771484375, 2.17578125]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 5.0, 10.0, 9.0, 17.0, 19.0, 19.0, 31.0, 44.0, 39.0, 54.0, 58.0, 55.0, 66.0, 58.0, 70.0, 77.0, 61.0, 58.0, 56.0, 43.0, 35.0, 25.0, 25.0, 20.0, 11.0, 15.0, 8.0, 2.0, 3.0, 9.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.248046875, -3.1705780029296875, -3.093109130859375, -3.0156402587890625, -2.93817138671875, -2.8607025146484375, -2.783233642578125, -2.7057647705078125, -2.6282958984375, -2.5508270263671875, -2.473358154296875, -2.3958892822265625, -2.31842041015625, -2.2409515380859375, -2.163482666015625, -2.0860137939453125, -2.008544921875, -1.9310760498046875, -1.853607177734375, -1.7761383056640625, -1.69866943359375, -1.6212005615234375, -1.543731689453125, -1.4662628173828125, -1.3887939453125, -1.3113250732421875, -1.233856201171875, -1.1563873291015625, -1.07891845703125, -1.0014495849609375, -0.923980712890625, -0.8465118408203125, -0.76904296875, -0.6915740966796875, -0.614105224609375, -0.5366363525390625, -0.45916748046875, -0.3816986083984375, -0.304229736328125, -0.2267608642578125, -0.1492919921875, -0.0718231201171875, 0.005645751953125, 0.0831146240234375, 0.16058349609375, 0.2380523681640625, 0.315521240234375, 0.3929901123046875, 0.470458984375, 0.5479278564453125, 0.625396728515625, 0.7028656005859375, 0.78033447265625, 0.8578033447265625, 0.935272216796875, 1.0127410888671875, 1.0902099609375, 1.1676788330078125, 1.245147705078125, 1.3226165771484375, 1.40008544921875, 1.4775543212890625, 1.555023193359375, 1.6324920654296875, 1.7099609375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 6.0, 2.0, 5.0, 6.0, 12.0, 23.0, 24.0, 44.0, 70.0, 96.0, 170.0, 305.0, 573.0, 1146.0, 3287.0, 16758.0, 253511.0, 717816.0, 45390.0, 5781.0, 1767.0, 775.0, 396.0, 227.0, 125.0, 85.0, 52.0, 28.0, 25.0, 21.0, 15.0, 5.0, 3.0, 6.0, 3.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.28515625, -5.07550048828125, -4.8658447265625, -4.65618896484375, -4.446533203125, -4.23687744140625, -4.0272216796875, -3.81756591796875, -3.60791015625, -3.39825439453125, -3.1885986328125, -2.97894287109375, -2.769287109375, -2.55963134765625, -2.3499755859375, -2.14031982421875, -1.9306640625, -1.72100830078125, -1.5113525390625, -1.30169677734375, -1.092041015625, -0.88238525390625, -0.6727294921875, -0.46307373046875, -0.25341796875, -0.04376220703125, 0.1658935546875, 0.37554931640625, 0.585205078125, 0.79486083984375, 1.0045166015625, 1.21417236328125, 1.423828125, 1.63348388671875, 1.8431396484375, 2.05279541015625, 2.262451171875, 2.47210693359375, 2.6817626953125, 2.89141845703125, 3.10107421875, 3.31072998046875, 3.5203857421875, 3.73004150390625, 3.939697265625, 4.14935302734375, 4.3590087890625, 4.56866455078125, 4.7783203125, 4.98797607421875, 5.1976318359375, 5.40728759765625, 5.616943359375, 5.82659912109375, 6.0362548828125, 6.24591064453125, 6.45556640625, 6.66522216796875, 6.8748779296875, 7.08453369140625, 7.294189453125, 7.50384521484375, 7.7135009765625, 7.92315673828125, 8.1328125]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 8.0, 3.0, 3.0, 8.0, 6.0, 16.0, 14.0, 21.0, 18.0, 20.0, 30.0, 34.0, 35.0, 32.0, 43.0, 44.0, 55.0, 36.0, 51.0, 49.0, 37.0, 48.0, 53.0, 46.0, 32.0, 33.0, 42.0, 28.0, 31.0, 13.0, 29.0, 15.0, 17.0, 16.0, 13.0, 7.0, 8.0, 5.0, 4.0, 4.0, 2.0, 0.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.79296875, -6.55572509765625, -6.3184814453125, -6.08123779296875, -5.843994140625, -5.60675048828125, -5.3695068359375, -5.13226318359375, -4.89501953125, -4.65777587890625, -4.4205322265625, -4.18328857421875, -3.946044921875, -3.70880126953125, -3.4715576171875, -3.23431396484375, -2.9970703125, -2.75982666015625, -2.5225830078125, -2.28533935546875, -2.048095703125, -1.81085205078125, -1.5736083984375, -1.33636474609375, -1.09912109375, -0.86187744140625, -0.6246337890625, -0.38739013671875, -0.150146484375, 0.08709716796875, 0.3243408203125, 0.56158447265625, 0.798828125, 1.03607177734375, 1.2733154296875, 1.51055908203125, 1.747802734375, 1.98504638671875, 2.2222900390625, 2.45953369140625, 2.69677734375, 2.93402099609375, 3.1712646484375, 3.40850830078125, 3.645751953125, 3.88299560546875, 4.1202392578125, 4.35748291015625, 4.5947265625, 4.83197021484375, 5.0692138671875, 5.30645751953125, 5.543701171875, 5.78094482421875, 6.0181884765625, 6.25543212890625, 6.49267578125, 6.72991943359375, 6.9671630859375, 7.20440673828125, 7.441650390625, 7.67889404296875, 7.9161376953125, 8.15338134765625, 8.390625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 4.0, 5.0, 5.0, 9.0, 20.0, 26.0, 34.0, 54.0, 96.0, 154.0, 341.0, 766.0, 2251.0, 9889.0, 126852.0, 848709.0, 50477.0, 6042.0, 1592.0, 614.0, 272.0, 106.0, 70.0, 52.0, 33.0, 25.0, 16.0, 10.0, 7.0, 9.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.982421875, -2.89300537109375, -2.8035888671875, -2.71417236328125, -2.624755859375, -2.53533935546875, -2.4459228515625, -2.35650634765625, -2.26708984375, -2.17767333984375, -2.0882568359375, -1.99884033203125, -1.909423828125, -1.82000732421875, -1.7305908203125, -1.64117431640625, -1.5517578125, -1.46234130859375, -1.3729248046875, -1.28350830078125, -1.194091796875, -1.10467529296875, -1.0152587890625, -0.92584228515625, -0.83642578125, -0.74700927734375, -0.6575927734375, -0.56817626953125, -0.478759765625, -0.38934326171875, -0.2999267578125, -0.21051025390625, -0.12109375, -0.03167724609375, 0.0577392578125, 0.14715576171875, 0.236572265625, 0.32598876953125, 0.4154052734375, 0.50482177734375, 0.59423828125, 0.68365478515625, 0.7730712890625, 0.86248779296875, 0.951904296875, 1.04132080078125, 1.1307373046875, 1.22015380859375, 1.3095703125, 1.39898681640625, 1.4884033203125, 1.57781982421875, 1.667236328125, 1.75665283203125, 1.8460693359375, 1.93548583984375, 2.02490234375, 2.11431884765625, 2.2037353515625, 2.29315185546875, 2.382568359375, 2.47198486328125, 2.5614013671875, 2.65081787109375, 2.740234375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 3.0, 4.0, 4.0, 7.0, 16.0, 10.0, 9.0, 21.0, 16.0, 41.0, 43.0, 52.0, 71.0, 90.0, 85.0, 95.0, 70.0, 78.0, 55.0, 53.0, 49.0, 27.0, 18.0, 23.0, 21.0, 10.0, 6.0, 10.0, 5.0, 2.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 1.0, 1.0], "bins": [-0.0004315376281738281, -0.0004210490733385086, -0.0004105605185031891, -0.00040007196366786957, -0.00038958340883255005, -0.00037909485399723053, -0.000368606299161911, -0.0003581177443265915, -0.00034762918949127197, -0.00033714063465595245, -0.00032665207982063293, -0.0003161635249853134, -0.0003056749701499939, -0.0002951864153146744, -0.00028469786047935486, -0.00027420930564403534, -0.0002637207508087158, -0.0002532321959733963, -0.00024274364113807678, -0.00023225508630275726, -0.00022176653146743774, -0.00021127797663211823, -0.0002007894217967987, -0.0001903008669614792, -0.00017981231212615967, -0.00016932375729084015, -0.00015883520245552063, -0.0001483466476202011, -0.0001378580927848816, -0.00012736953794956207, -0.00011688098311424255, -0.00010639242827892303, -9.590387344360352e-05, -8.5415318608284e-05, -7.492676377296448e-05, -6.443820893764496e-05, -5.394965410232544e-05, -4.346109926700592e-05, -3.29725444316864e-05, -2.2483989596366882e-05, -1.1995434761047363e-05, -1.5068799257278442e-06, 8.981674909591675e-06, 1.9470229744911194e-05, 2.9958784580230713e-05, 4.044733941555023e-05, 5.093589425086975e-05, 6.142444908618927e-05, 7.191300392150879e-05, 8.240155875682831e-05, 9.289011359214783e-05, 0.00010337866842746735, 0.00011386722326278687, 0.00012435577809810638, 0.0001348443329334259, 0.00014533288776874542, 0.00015582144260406494, 0.00016630999743938446, 0.00017679855227470398, 0.0001872871071100235, 0.00019777566194534302, 0.00020826421678066254, 0.00021875277161598206, 0.00022924132645130157, 0.0002397298812866211]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 8.0, 3.0, 5.0, 4.0, 11.0, 19.0, 22.0, 42.0, 57.0, 111.0, 205.0, 451.0, 1202.0, 3660.0, 21785.0, 328120.0, 648157.0, 36762.0, 5405.0, 1410.0, 521.0, 246.0, 130.0, 75.0, 58.0, 28.0, 20.0, 8.0, 10.0, 6.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5859375, -2.512298583984375, -2.43865966796875, -2.365020751953125, -2.2913818359375, -2.217742919921875, -2.14410400390625, -2.070465087890625, -1.996826171875, -1.923187255859375, -1.84954833984375, -1.775909423828125, -1.7022705078125, -1.628631591796875, -1.55499267578125, -1.481353759765625, -1.40771484375, -1.334075927734375, -1.26043701171875, -1.186798095703125, -1.1131591796875, -1.039520263671875, -0.96588134765625, -0.892242431640625, -0.818603515625, -0.744964599609375, -0.67132568359375, -0.597686767578125, -0.5240478515625, -0.450408935546875, -0.37677001953125, -0.303131103515625, -0.2294921875, -0.155853271484375, -0.08221435546875, -0.008575439453125, 0.0650634765625, 0.138702392578125, 0.21234130859375, 0.285980224609375, 0.359619140625, 0.433258056640625, 0.50689697265625, 0.580535888671875, 0.6541748046875, 0.727813720703125, 0.80145263671875, 0.875091552734375, 0.94873046875, 1.022369384765625, 1.09600830078125, 1.169647216796875, 1.2432861328125, 1.316925048828125, 1.39056396484375, 1.464202880859375, 1.537841796875, 1.611480712890625, 1.68511962890625, 1.758758544921875, 1.8323974609375, 1.906036376953125, 1.97967529296875, 2.053314208984375, 2.126953125]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 2.0, 4.0, 3.0, 12.0, 2.0, 18.0, 12.0, 17.0, 18.0, 25.0, 41.0, 36.0, 53.0, 56.0, 52.0, 71.0, 76.0, 79.0, 74.0, 60.0, 51.0, 50.0, 33.0, 32.0, 27.0, 16.0, 20.0, 15.0, 6.0, 3.0, 7.0, 6.0, 2.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.5478515625, -1.4976654052734375, -1.447479248046875, -1.3972930908203125, -1.34710693359375, -1.2969207763671875, -1.246734619140625, -1.1965484619140625, -1.1463623046875, -1.0961761474609375, -1.045989990234375, -0.9958038330078125, -0.94561767578125, -0.8954315185546875, -0.845245361328125, -0.7950592041015625, -0.744873046875, -0.6946868896484375, -0.644500732421875, -0.5943145751953125, -0.54412841796875, -0.4939422607421875, -0.443756103515625, -0.3935699462890625, -0.3433837890625, -0.2931976318359375, -0.243011474609375, -0.1928253173828125, -0.14263916015625, -0.0924530029296875, -0.042266845703125, 0.0079193115234375, 0.05810546875, 0.1082916259765625, 0.158477783203125, 0.2086639404296875, 0.25885009765625, 0.3090362548828125, 0.359222412109375, 0.4094085693359375, 0.4595947265625, 0.5097808837890625, 0.559967041015625, 0.6101531982421875, 0.66033935546875, 0.7105255126953125, 0.760711669921875, 0.8108978271484375, 0.861083984375, 0.9112701416015625, 0.961456298828125, 1.0116424560546875, 1.06182861328125, 1.1120147705078125, 1.162200927734375, 1.2123870849609375, 1.2625732421875, 1.3127593994140625, 1.362945556640625, 1.4131317138671875, 1.46331787109375, 1.5135040283203125, 1.563690185546875, 1.6138763427734375, 1.6640625]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 15.0, 59.0, 194.0, 420.0, 239.0, 53.0, 14.0, 4.0, 2.0, 4.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.158538818359375, -41.318721771240234, -38.478904724121094, -35.63909149169922, -32.79927062988281, -29.959455490112305, -27.119640350341797, -24.279823303222656, -21.440006256103516, -18.600189208984375, -15.76037311553955, -12.920557022094727, -10.080739974975586, -7.240922927856445, -4.4011077880859375, -1.5612907409667969, 1.2785263061523438, 4.118342876434326, 6.958159446716309, 9.797975540161133, 12.637792587280273, 15.477609634399414, 18.317424774169922, 21.157241821289062, 23.997058868408203, 26.836875915527344, 29.676692962646484, 32.516510009765625, 35.3563232421875, 38.196144104003906, 41.03595733642578, 43.87577438354492, 46.71559143066406, 49.5554084777832, 52.395225524902344, 55.23503875732422, 58.074859619140625, 60.9146728515625, 63.75448989868164, 66.59430694580078, 69.43412780761719, 72.27394104003906, 75.11376190185547, 77.95357513427734, 80.79339599609375, 83.63320922851562, 86.4730224609375, 89.3128433227539, 92.15265655517578, 94.99246978759766, 97.83229064941406, 100.67210388183594, 103.51192474365234, 106.35173797607422, 109.19155883789062, 112.0313720703125, 114.87118530273438, 117.71099853515625, 120.55081939697266, 123.39063262939453, 126.23045349121094, 129.0702667236328, 131.9100799560547, 134.74990844726562, 137.5897216796875]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 4.0, 5.0, 8.0, 7.0, 5.0, 10.0, 12.0, 22.0, 20.0, 30.0, 32.0, 28.0, 24.0, 30.0, 38.0, 39.0, 44.0, 71.0, 71.0, 73.0, 56.0, 58.0, 59.0, 43.0, 30.0, 47.0, 33.0, 22.0, 25.0, 14.0, 11.0, 10.0, 3.0, 5.0, 4.0, 9.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.795791625976562, -23.709667205810547, -22.6235408782959, -21.537416458129883, -20.451292037963867, -19.36516571044922, -18.279041290283203, -17.192916870117188, -16.106792449951172, -15.02066707611084, -13.934542655944824, -12.848417282104492, -11.762292861938477, -10.676167488098145, -9.590042114257812, -8.503917694091797, -7.417791366577148, -6.331666469573975, -5.245541572570801, -4.159416198730469, -3.073291301727295, -1.987166404724121, -0.9010410308837891, 0.18508386611938477, 1.2712087631225586, 2.3573336601257324, 3.4434587955474854, 4.529583930969238, 5.615708827972412, 6.701833724975586, 7.787959098815918, 8.87408447265625, 9.960208892822266, 11.046334266662598, 12.132458686828613, 13.218584060668945, 14.304708480834961, 15.390833854675293, 16.476959228515625, 17.56308364868164, 18.649208068847656, 19.735332489013672, 20.82145881652832, 21.907583236694336, 22.99370765686035, 24.079833984375, 25.165958404541016, 26.25208282470703, 27.33820915222168, 28.424333572387695, 29.510459899902344, 30.59658432006836, 31.682708740234375, 32.76883316040039, 33.854957580566406, 34.94108581542969, 36.0272102355957, 37.11333465576172, 38.199459075927734, 39.28558349609375, 40.37171173095703, 41.45783615112305, 42.54396057128906, 43.63008499145508, 44.716209411621094]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 2.0, 8.0, 5.0, 11.0, 9.0, 13.0, 19.0, 34.0, 36.0, 54.0, 83.0, 102.0, 165.0, 420.0, 3144.0, 185158.0, 3992208.0, 11128.0, 893.0, 298.0, 143.0, 102.0, 59.0, 55.0, 40.0, 19.0, 15.0, 16.0, 11.0, 7.0, 7.0, 7.0, 3.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.6015625, -9.2939453125, -8.986328125, -8.6787109375, -8.37109375, -8.0634765625, -7.755859375, -7.4482421875, -7.140625, -6.8330078125, -6.525390625, -6.2177734375, -5.91015625, -5.6025390625, -5.294921875, -4.9873046875, -4.6796875, -4.3720703125, -4.064453125, -3.7568359375, -3.44921875, -3.1416015625, -2.833984375, -2.5263671875, -2.21875, -1.9111328125, -1.603515625, -1.2958984375, -0.98828125, -0.6806640625, -0.373046875, -0.0654296875, 0.2421875, 0.5498046875, 0.857421875, 1.1650390625, 1.47265625, 1.7802734375, 2.087890625, 2.3955078125, 2.703125, 3.0107421875, 3.318359375, 3.6259765625, 3.93359375, 4.2412109375, 4.548828125, 4.8564453125, 5.1640625, 5.4716796875, 5.779296875, 6.0869140625, 6.39453125, 6.7021484375, 7.009765625, 7.3173828125, 7.625, 7.9326171875, 8.240234375, 8.5478515625, 8.85546875, 9.1630859375, 9.470703125, 9.7783203125, 10.0859375]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 10.0, 17.0, 21.0, 28.0, 43.0, 43.0, 49.0, 53.0, 66.0, 59.0, 75.0, 73.0, 79.0, 60.0, 47.0, 47.0, 54.0, 42.0, 29.0, 29.0, 16.0, 15.0, 15.0, 10.0, 6.0, 5.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.880859375, -1.80718994140625, -1.7335205078125, -1.65985107421875, -1.586181640625, -1.51251220703125, -1.4388427734375, -1.36517333984375, -1.29150390625, -1.21783447265625, -1.1441650390625, -1.07049560546875, -0.996826171875, -0.92315673828125, -0.8494873046875, -0.77581787109375, -0.7021484375, -0.62847900390625, -0.5548095703125, -0.48114013671875, -0.407470703125, -0.33380126953125, -0.2601318359375, -0.18646240234375, -0.11279296875, -0.03912353515625, 0.0345458984375, 0.10821533203125, 0.181884765625, 0.25555419921875, 0.3292236328125, 0.40289306640625, 0.4765625, 0.55023193359375, 0.6239013671875, 0.69757080078125, 0.771240234375, 0.84490966796875, 0.9185791015625, 0.99224853515625, 1.06591796875, 1.13958740234375, 1.2132568359375, 1.28692626953125, 1.360595703125, 1.43426513671875, 1.5079345703125, 1.58160400390625, 1.6552734375, 1.72894287109375, 1.8026123046875, 1.87628173828125, 1.949951171875, 2.02362060546875, 2.0972900390625, 2.17095947265625, 2.24462890625, 2.31829833984375, 2.3919677734375, 2.46563720703125, 2.539306640625, 2.61297607421875, 2.6866455078125, 2.76031494140625, 2.833984375]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 6.0, 4.0, 17.0, 19.0, 21.0, 40.0, 96.0, 214.0, 538.0, 1380.0, 4035.0, 14269.0, 80299.0, 3568207.0, 480897.0, 33094.0, 7496.0, 2333.0, 801.0, 279.0, 121.0, 50.0, 23.0, 16.0, 12.0, 6.0, 6.0, 1.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.55859375, -4.410400390625, -4.26220703125, -4.114013671875, -3.9658203125, -3.817626953125, -3.66943359375, -3.521240234375, -3.373046875, -3.224853515625, -3.07666015625, -2.928466796875, -2.7802734375, -2.632080078125, -2.48388671875, -2.335693359375, -2.1875, -2.039306640625, -1.89111328125, -1.742919921875, -1.5947265625, -1.446533203125, -1.29833984375, -1.150146484375, -1.001953125, -0.853759765625, -0.70556640625, -0.557373046875, -0.4091796875, -0.260986328125, -0.11279296875, 0.035400390625, 0.18359375, 0.331787109375, 0.47998046875, 0.628173828125, 0.7763671875, 0.924560546875, 1.07275390625, 1.220947265625, 1.369140625, 1.517333984375, 1.66552734375, 1.813720703125, 1.9619140625, 2.110107421875, 2.25830078125, 2.406494140625, 2.5546875, 2.702880859375, 2.85107421875, 2.999267578125, 3.1474609375, 3.295654296875, 3.44384765625, 3.592041015625, 3.740234375, 3.888427734375, 4.03662109375, 4.184814453125, 4.3330078125, 4.481201171875, 4.62939453125, 4.777587890625, 4.92578125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 7.0, 4.0, 5.0, 5.0, 7.0, 12.0, 10.0, 20.0, 38.0, 43.0, 101.0, 220.0, 508.0, 1112.0, 1046.0, 457.0, 193.0, 85.0, 59.0, 35.0, 29.0, 14.0, 17.0, 8.0, 9.0, 12.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.3984375, -4.27239990234375, -4.1463623046875, -4.02032470703125, -3.894287109375, -3.76824951171875, -3.6422119140625, -3.51617431640625, -3.39013671875, -3.26409912109375, -3.1380615234375, -3.01202392578125, -2.885986328125, -2.75994873046875, -2.6339111328125, -2.50787353515625, -2.3818359375, -2.25579833984375, -2.1297607421875, -2.00372314453125, -1.877685546875, -1.75164794921875, -1.6256103515625, -1.49957275390625, -1.37353515625, -1.24749755859375, -1.1214599609375, -0.99542236328125, -0.869384765625, -0.74334716796875, -0.6173095703125, -0.49127197265625, -0.365234375, -0.23919677734375, -0.1131591796875, 0.01287841796875, 0.138916015625, 0.26495361328125, 0.3909912109375, 0.51702880859375, 0.64306640625, 0.76910400390625, 0.8951416015625, 1.02117919921875, 1.147216796875, 1.27325439453125, 1.3992919921875, 1.52532958984375, 1.6513671875, 1.77740478515625, 1.9034423828125, 2.02947998046875, 2.155517578125, 2.28155517578125, 2.4075927734375, 2.53363037109375, 2.65966796875, 2.78570556640625, 2.9117431640625, 3.03778076171875, 3.163818359375, 3.28985595703125, 3.4158935546875, 3.54193115234375, 3.66796875]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 23.0, 59.0, 176.0, 307.0, 266.0, 106.0, 31.0, 14.0, 7.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-47.08795928955078, -45.64741897583008, -44.20687484741211, -42.766334533691406, -41.32579040527344, -39.885250091552734, -38.444705963134766, -37.00416564941406, -35.563621520996094, -34.12308120727539, -32.68253707885742, -31.241994857788086, -29.80145263671875, -28.360910415649414, -26.920368194580078, -25.479827880859375, -24.03928565979004, -22.598743438720703, -21.158201217651367, -19.71765899658203, -18.277116775512695, -16.83657455444336, -15.39603328704834, -13.955491065979004, -12.514948844909668, -11.074406623840332, -9.633864402770996, -8.193323135375977, -6.752780437469482, -5.3122382164001465, -3.8716964721679688, -2.431154251098633, -0.9906120300292969, 0.4499300718307495, 1.890472173690796, 3.3310141563415527, 4.771556377410889, 6.212098598480225, 7.652640342712402, 9.093182563781738, 10.533724784851074, 11.97426700592041, 13.414809226989746, 14.855350494384766, 16.2958927154541, 17.736434936523438, 19.176977157592773, 20.61751937866211, 22.058061599731445, 23.49860382080078, 24.939146041870117, 26.379688262939453, 27.82023048400879, 29.260772705078125, 30.701313018798828, 32.1418571472168, 33.5823974609375, 35.0229377746582, 36.46348190307617, 37.904022216796875, 39.344566345214844, 40.78510665893555, 42.225650787353516, 43.66619110107422, 45.10673522949219]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 1.0, 2.0, 4.0, 2.0, 4.0, 11.0, 21.0, 19.0, 17.0, 22.0, 27.0, 29.0, 33.0, 39.0, 55.0, 50.0, 62.0, 61.0, 71.0, 58.0, 56.0, 58.0, 44.0, 42.0, 44.0, 29.0, 32.0, 19.0, 26.0, 13.0, 12.0, 10.0, 10.0, 7.0, 5.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.723228454589844, -21.146329879760742, -20.56943130493164, -19.992534637451172, -19.41563606262207, -18.83873748779297, -18.261838912963867, -17.684940338134766, -17.108043670654297, -16.531145095825195, -15.95424747467041, -15.377348899841309, -14.800451278686523, -14.223552703857422, -13.64665412902832, -13.069756507873535, -12.492857933044434, -11.915959358215332, -11.339061737060547, -10.762163162231445, -10.18526554107666, -9.608366966247559, -9.031469345092773, -8.454570770263672, -7.8776726722717285, -7.300774574279785, -6.723876476287842, -6.146978378295898, -5.570079803466797, -4.993182182312012, -4.41628360748291, -3.839385509490967, -3.26248836517334, -2.6855902671813965, -2.108692169189453, -1.5317938327789307, -0.9548957347869873, -0.37799763679504395, 0.19890069961547852, 0.7757987976074219, 1.3526968955993652, 1.9295949935913086, 2.506493091583252, 3.0833914279937744, 3.6602895259857178, 4.237187385559082, 4.814085960388184, 5.390984058380127, 5.96788215637207, 6.544780254364014, 7.121678352355957, 7.698576927185059, 8.275474548339844, 8.852373123168945, 9.429271697998047, 10.006169319152832, 10.583066940307617, 11.159965515136719, 11.736863136291504, 12.313761711120605, 12.89065933227539, 13.467557907104492, 14.044456481933594, 14.621354103088379, 15.19825267791748]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 2.0, 4.0, 7.0, 12.0, 15.0, 20.0, 26.0, 36.0, 54.0, 74.0, 119.0, 175.0, 288.0, 498.0, 816.0, 1508.0, 2830.0, 5489.0, 11196.0, 24232.0, 54002.0, 120125.0, 242235.0, 284824.0, 162487.0, 74040.0, 32986.0, 15161.0, 7189.0, 3588.0, 1851.0, 1074.0, 582.0, 390.0, 209.0, 131.0, 94.0, 48.0, 43.0, 29.0, 19.0, 16.0, 10.0, 11.0, 5.0, 6.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.08203125, -2.015289306640625, -1.94854736328125, -1.881805419921875, -1.8150634765625, -1.748321533203125, -1.68157958984375, -1.614837646484375, -1.548095703125, -1.481353759765625, -1.41461181640625, -1.347869873046875, -1.2811279296875, -1.214385986328125, -1.14764404296875, -1.080902099609375, -1.01416015625, -0.947418212890625, -0.88067626953125, -0.813934326171875, -0.7471923828125, -0.680450439453125, -0.61370849609375, -0.546966552734375, -0.480224609375, -0.413482666015625, -0.34674072265625, -0.279998779296875, -0.2132568359375, -0.146514892578125, -0.07977294921875, -0.013031005859375, 0.0537109375, 0.120452880859375, 0.18719482421875, 0.253936767578125, 0.3206787109375, 0.387420654296875, 0.45416259765625, 0.520904541015625, 0.587646484375, 0.654388427734375, 0.72113037109375, 0.787872314453125, 0.8546142578125, 0.921356201171875, 0.98809814453125, 1.054840087890625, 1.12158203125, 1.188323974609375, 1.25506591796875, 1.321807861328125, 1.3885498046875, 1.455291748046875, 1.52203369140625, 1.588775634765625, 1.655517578125, 1.722259521484375, 1.78900146484375, 1.855743408203125, 1.9224853515625, 1.989227294921875, 2.05596923828125, 2.122711181640625, 2.189453125]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 9.0, 9.0, 8.0, 12.0, 18.0, 15.0, 29.0, 35.0, 22.0, 38.0, 36.0, 51.0, 51.0, 39.0, 42.0, 61.0, 45.0, 52.0, 54.0, 47.0, 52.0, 28.0, 35.0, 37.0, 35.0, 28.0, 23.0, 16.0, 15.0, 13.0, 10.0, 7.0, 12.0, 5.0, 6.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.861328125, -1.8057861328125, -1.750244140625, -1.6947021484375, -1.63916015625, -1.5836181640625, -1.528076171875, -1.4725341796875, -1.4169921875, -1.3614501953125, -1.305908203125, -1.2503662109375, -1.19482421875, -1.1392822265625, -1.083740234375, -1.0281982421875, -0.97265625, -0.9171142578125, -0.861572265625, -0.8060302734375, -0.75048828125, -0.6949462890625, -0.639404296875, -0.5838623046875, -0.5283203125, -0.4727783203125, -0.417236328125, -0.3616943359375, -0.30615234375, -0.2506103515625, -0.195068359375, -0.1395263671875, -0.083984375, -0.0284423828125, 0.027099609375, 0.0826416015625, 0.13818359375, 0.1937255859375, 0.249267578125, 0.3048095703125, 0.3603515625, 0.4158935546875, 0.471435546875, 0.5269775390625, 0.58251953125, 0.6380615234375, 0.693603515625, 0.7491455078125, 0.8046875, 0.8602294921875, 0.915771484375, 0.9713134765625, 1.02685546875, 1.0823974609375, 1.137939453125, 1.1934814453125, 1.2490234375, 1.3045654296875, 1.360107421875, 1.4156494140625, 1.47119140625, 1.5267333984375, 1.582275390625, 1.6378173828125, 1.693359375]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 6.0, 5.0, 7.0, 13.0, 9.0, 22.0, 35.0, 60.0, 104.0, 197.0, 518.0, 1223.0, 4666.0, 43619.0, 741767.0, 238578.0, 13977.0, 2317.0, 759.0, 305.0, 145.0, 77.0, 57.0, 32.0, 20.0, 13.0, 6.0, 6.0, 3.0, 7.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4296875, -6.1895751953125, -5.949462890625, -5.7093505859375, -5.46923828125, -5.2291259765625, -4.989013671875, -4.7489013671875, -4.5087890625, -4.2686767578125, -4.028564453125, -3.7884521484375, -3.54833984375, -3.3082275390625, -3.068115234375, -2.8280029296875, -2.587890625, -2.3477783203125, -2.107666015625, -1.8675537109375, -1.62744140625, -1.3873291015625, -1.147216796875, -0.9071044921875, -0.6669921875, -0.4268798828125, -0.186767578125, 0.0533447265625, 0.29345703125, 0.5335693359375, 0.773681640625, 1.0137939453125, 1.25390625, 1.4940185546875, 1.734130859375, 1.9742431640625, 2.21435546875, 2.4544677734375, 2.694580078125, 2.9346923828125, 3.1748046875, 3.4149169921875, 3.655029296875, 3.8951416015625, 4.13525390625, 4.3753662109375, 4.615478515625, 4.8555908203125, 5.095703125, 5.3358154296875, 5.575927734375, 5.8160400390625, 6.05615234375, 6.2962646484375, 6.536376953125, 6.7764892578125, 7.0166015625, 7.2567138671875, 7.496826171875, 7.7369384765625, 7.97705078125, 8.2171630859375, 8.457275390625, 8.6973876953125, 8.9375]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 3.0, 3.0, 1.0, 4.0, 6.0, 4.0, 11.0, 10.0, 18.0, 15.0, 16.0, 29.0, 18.0, 29.0, 38.0, 38.0, 43.0, 41.0, 35.0, 46.0, 50.0, 51.0, 37.0, 54.0, 45.0, 41.0, 40.0, 42.0, 35.0, 27.0, 28.0, 24.0, 25.0, 15.0, 14.0, 12.0, 11.0, 9.0, 11.0, 7.0, 6.0, 2.0, 7.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.94921875, -5.770263671875, -5.59130859375, -5.412353515625, -5.2333984375, -5.054443359375, -4.87548828125, -4.696533203125, -4.517578125, -4.338623046875, -4.15966796875, -3.980712890625, -3.8017578125, -3.622802734375, -3.44384765625, -3.264892578125, -3.0859375, -2.906982421875, -2.72802734375, -2.549072265625, -2.3701171875, -2.191162109375, -2.01220703125, -1.833251953125, -1.654296875, -1.475341796875, -1.29638671875, -1.117431640625, -0.9384765625, -0.759521484375, -0.58056640625, -0.401611328125, -0.22265625, -0.043701171875, 0.13525390625, 0.314208984375, 0.4931640625, 0.672119140625, 0.85107421875, 1.030029296875, 1.208984375, 1.387939453125, 1.56689453125, 1.745849609375, 1.9248046875, 2.103759765625, 2.28271484375, 2.461669921875, 2.640625, 2.819580078125, 2.99853515625, 3.177490234375, 3.3564453125, 3.535400390625, 3.71435546875, 3.893310546875, 4.072265625, 4.251220703125, 4.43017578125, 4.609130859375, 4.7880859375, 4.967041015625, 5.14599609375, 5.324951171875, 5.50390625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 4.0, 3.0, 8.0, 10.0, 11.0, 9.0, 24.0, 25.0, 25.0, 51.0, 82.0, 150.0, 263.0, 540.0, 1204.0, 3503.0, 15985.0, 196425.0, 770780.0, 48499.0, 7254.0, 2035.0, 777.0, 397.0, 180.0, 121.0, 70.0, 33.0, 31.0, 11.0, 15.0, 8.0, 6.0, 5.0, 4.0, 3.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.408203125, -2.3128662109375, -2.217529296875, -2.1221923828125, -2.02685546875, -1.9315185546875, -1.836181640625, -1.7408447265625, -1.6455078125, -1.5501708984375, -1.454833984375, -1.3594970703125, -1.26416015625, -1.1688232421875, -1.073486328125, -0.9781494140625, -0.8828125, -0.7874755859375, -0.692138671875, -0.5968017578125, -0.50146484375, -0.4061279296875, -0.310791015625, -0.2154541015625, -0.1201171875, -0.0247802734375, 0.070556640625, 0.1658935546875, 0.26123046875, 0.3565673828125, 0.451904296875, 0.5472412109375, 0.642578125, 0.7379150390625, 0.833251953125, 0.9285888671875, 1.02392578125, 1.1192626953125, 1.214599609375, 1.3099365234375, 1.4052734375, 1.5006103515625, 1.595947265625, 1.6912841796875, 1.78662109375, 1.8819580078125, 1.977294921875, 2.0726318359375, 2.16796875, 2.2633056640625, 2.358642578125, 2.4539794921875, 2.54931640625, 2.6446533203125, 2.739990234375, 2.8353271484375, 2.9306640625, 3.0260009765625, 3.121337890625, 3.2166748046875, 3.31201171875, 3.4073486328125, 3.502685546875, 3.5980224609375, 3.693359375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 5.0, 6.0, 5.0, 19.0, 29.0, 38.0, 56.0, 79.0, 119.0, 122.0, 157.0, 122.0, 86.0, 72.0, 29.0, 24.0, 8.0, 9.0, 7.0, 4.0, 6.0, 2.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005316734313964844, -0.0005120113492012024, -0.0004923492670059204, -0.00047268718481063843, -0.00045302510261535645, -0.00043336302042007446, -0.0004137009382247925, -0.0003940388560295105, -0.0003743767738342285, -0.00035471469163894653, -0.00033505260944366455, -0.00031539052724838257, -0.0002957284450531006, -0.0002760663628578186, -0.0002564042806625366, -0.00023674219846725464, -0.00021708011627197266, -0.00019741803407669067, -0.0001777559518814087, -0.0001580938696861267, -0.00013843178749084473, -0.00011876970529556274, -9.910762310028076e-05, -7.944554090499878e-05, -5.97834587097168e-05, -4.0121376514434814e-05, -2.0459294319152832e-05, -7.972121238708496e-07, 1.8864870071411133e-05, 3.8526952266693115e-05, 5.81890344619751e-05, 7.785111665725708e-05, 9.751319885253906e-05, 0.00011717528104782104, 0.00013683736324310303, 0.000156499445438385, 0.000176161527633667, 0.00019582360982894897, 0.00021548569202423096, 0.00023514777421951294, 0.0002548098564147949, 0.0002744719386100769, 0.0002941340208053589, 0.00031379610300064087, 0.00033345818519592285, 0.00035312026739120483, 0.0003727823495864868, 0.0003924444317817688, 0.0004121065139770508, 0.00043176859617233276, 0.00045143067836761475, 0.00047109276056289673, 0.0004907548427581787, 0.0005104169249534607, 0.0005300790071487427, 0.0005497410893440247, 0.0005694031715393066, 0.0005890652537345886, 0.0006087273359298706, 0.0006283894181251526, 0.0006480515003204346, 0.0006677135825157166, 0.0006873756647109985, 0.0007070377469062805, 0.0007266998291015625]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 9.0, 4.0, 6.0, 6.0, 16.0, 13.0, 20.0, 31.0, 53.0, 95.0, 115.0, 203.0, 396.0, 749.0, 1720.0, 5231.0, 23326.0, 193250.0, 698654.0, 103256.0, 14877.0, 3726.0, 1394.0, 625.0, 325.0, 167.0, 81.0, 68.0, 41.0, 26.0, 27.0, 12.0, 6.0, 8.0, 4.0, 2.0, 7.0, 3.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-1.76953125, -1.704864501953125, -1.64019775390625, -1.575531005859375, -1.5108642578125, -1.446197509765625, -1.38153076171875, -1.316864013671875, -1.252197265625, -1.187530517578125, -1.12286376953125, -1.058197021484375, -0.9935302734375, -0.928863525390625, -0.86419677734375, -0.799530029296875, -0.73486328125, -0.670196533203125, -0.60552978515625, -0.540863037109375, -0.4761962890625, -0.411529541015625, -0.34686279296875, -0.282196044921875, -0.217529296875, -0.152862548828125, -0.08819580078125, -0.023529052734375, 0.0411376953125, 0.105804443359375, 0.17047119140625, 0.235137939453125, 0.2998046875, 0.364471435546875, 0.42913818359375, 0.493804931640625, 0.5584716796875, 0.623138427734375, 0.68780517578125, 0.752471923828125, 0.817138671875, 0.881805419921875, 0.94647216796875, 1.011138916015625, 1.0758056640625, 1.140472412109375, 1.20513916015625, 1.269805908203125, 1.33447265625, 1.399139404296875, 1.46380615234375, 1.528472900390625, 1.5931396484375, 1.657806396484375, 1.72247314453125, 1.787139892578125, 1.851806640625, 1.916473388671875, 1.98114013671875, 2.045806884765625, 2.1104736328125, 2.175140380859375, 2.23980712890625, 2.304473876953125, 2.369140625]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 8.0, 8.0, 4.0, 8.0, 10.0, 26.0, 34.0, 33.0, 52.0, 69.0, 73.0, 86.0, 113.0, 77.0, 83.0, 75.0, 56.0, 53.0, 42.0, 28.0, 23.0, 17.0, 9.0, 6.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.865234375, -1.795562744140625, -1.72589111328125, -1.656219482421875, -1.5865478515625, -1.516876220703125, -1.44720458984375, -1.377532958984375, -1.307861328125, -1.238189697265625, -1.16851806640625, -1.098846435546875, -1.0291748046875, -0.959503173828125, -0.88983154296875, -0.820159912109375, -0.75048828125, -0.680816650390625, -0.61114501953125, -0.541473388671875, -0.4718017578125, -0.402130126953125, -0.33245849609375, -0.262786865234375, -0.193115234375, -0.123443603515625, -0.05377197265625, 0.015899658203125, 0.0855712890625, 0.155242919921875, 0.22491455078125, 0.294586181640625, 0.3642578125, 0.433929443359375, 0.50360107421875, 0.573272705078125, 0.6429443359375, 0.712615966796875, 0.78228759765625, 0.851959228515625, 0.921630859375, 0.991302490234375, 1.06097412109375, 1.130645751953125, 1.2003173828125, 1.269989013671875, 1.33966064453125, 1.409332275390625, 1.47900390625, 1.548675537109375, 1.61834716796875, 1.688018798828125, 1.7576904296875, 1.827362060546875, 1.89703369140625, 1.966705322265625, 2.036376953125, 2.106048583984375, 2.17572021484375, 2.245391845703125, 2.3150634765625, 2.384735107421875, 2.45440673828125, 2.524078369140625, 2.59375]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 7.0, 9.0, 11.0, 16.0, 38.0, 103.0, 169.0, 242.0, 177.0, 108.0, 54.0, 27.0, 18.0, 9.0, 4.0, 1.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.14666748046875, -34.72544479370117, -33.30422592163086, -31.88300323486328, -30.46178436279297, -29.04056167602539, -27.619340896606445, -26.1981201171875, -24.776899337768555, -23.35567855834961, -21.934457778930664, -20.51323699951172, -19.09201431274414, -17.670795440673828, -16.24957275390625, -14.828351974487305, -13.40713119506836, -11.985910415649414, -10.564689636230469, -9.143467903137207, -7.722247123718262, -6.301026344299316, -4.879804611206055, -3.4585838317871094, -2.037363052368164, -0.6161420345306396, 0.8050789833068848, 2.2263002395629883, 3.6475210189819336, 5.068741798400879, 6.489963531494141, 7.911184310913086, 9.332405090332031, 10.753625869750977, 12.174846649169922, 13.596068382263184, 15.017289161682129, 16.43851089477539, 17.859731674194336, 19.28095245361328, 20.702173233032227, 22.123394012451172, 23.544614791870117, 24.965835571289062, 26.38705825805664, 27.808277130126953, 29.22949981689453, 30.650720596313477, 32.07194137573242, 33.4931640625, 34.91438293457031, 36.33560562133789, 37.7568244934082, 39.17804718017578, 40.599266052246094, 42.02048873901367, 43.44171142578125, 44.86293411254883, 46.28415298461914, 47.70537567138672, 49.12659454345703, 50.54781723022461, 51.96903991699219, 53.3902587890625, 54.81147766113281]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 8.0, 3.0, 4.0, 3.0, 10.0, 9.0, 10.0, 18.0, 10.0, 18.0, 19.0, 19.0, 19.0, 20.0, 24.0, 30.0, 32.0, 32.0, 30.0, 30.0, 36.0, 36.0, 56.0, 40.0, 60.0, 36.0, 49.0, 39.0, 30.0, 24.0, 36.0, 23.0, 25.0, 21.0, 18.0, 16.0, 18.0, 13.0, 12.0, 9.0, 5.0, 6.0, 10.0, 12.0, 4.0, 4.0, 2.0, 4.0, 5.0, 2.0, 3.0, 1.0, 1.0, 0.0, 3.0, 3.0], "bins": [-19.994556427001953, -19.353740692138672, -18.71292495727539, -18.072107315063477, -17.431291580200195, -16.790475845336914, -16.149660110473633, -15.508844375610352, -14.868027687072754, -14.227211952209473, -13.586395263671875, -12.945579528808594, -12.304763793945312, -11.663947105407715, -11.023131370544434, -10.382314682006836, -9.741498947143555, -9.100683212280273, -8.459866523742676, -7.8190507888793945, -7.178234577178955, -6.537418365478516, -5.896602630615234, -5.255786418914795, -4.6149702072143555, -3.974153995513916, -3.3333380222320557, -2.6925220489501953, -2.051705837249756, -1.4108896255493164, -0.770073652267456, -0.1292576789855957, 0.5115604400634766, 1.1523765325546265, 1.7931926250457764, 2.4340085983276367, 3.074824810028076, 3.7156410217285156, 4.356456756591797, 4.997272968292236, 5.638089179992676, 6.278905391693115, 6.919721603393555, 7.560537338256836, 8.201353073120117, 8.842169761657715, 9.482985496520996, 10.123802185058594, 10.764617919921875, 11.405433654785156, 12.046250343322754, 12.687066078186035, 13.327882766723633, 13.968698501586914, 14.609514236450195, 15.250329971313477, 15.891146659851074, 16.531963348388672, 17.172779083251953, 17.813594818115234, 18.454410552978516, 19.095226287841797, 19.73604393005371, 20.376859664916992, 21.017675399780273]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 6.0, 9.0, 6.0, 11.0, 26.0, 41.0, 97.0, 143.0, 313.0, 704.0, 1814.0, 5505.0, 21207.0, 155406.0, 2981273.0, 955736.0, 56028.0, 10794.0, 3044.0, 1098.0, 504.0, 210.0, 126.0, 68.0, 33.0, 30.0, 15.0, 12.0, 12.0, 5.0, 8.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.443359375, -3.3414306640625, -3.239501953125, -3.1375732421875, -3.03564453125, -2.9337158203125, -2.831787109375, -2.7298583984375, -2.6279296875, -2.5260009765625, -2.424072265625, -2.3221435546875, -2.22021484375, -2.1182861328125, -2.016357421875, -1.9144287109375, -1.8125, -1.7105712890625, -1.608642578125, -1.5067138671875, -1.40478515625, -1.3028564453125, -1.200927734375, -1.0989990234375, -0.9970703125, -0.8951416015625, -0.793212890625, -0.6912841796875, -0.58935546875, -0.4874267578125, -0.385498046875, -0.2835693359375, -0.181640625, -0.0797119140625, 0.022216796875, 0.1241455078125, 0.22607421875, 0.3280029296875, 0.429931640625, 0.5318603515625, 0.6337890625, 0.7357177734375, 0.837646484375, 0.9395751953125, 1.04150390625, 1.1434326171875, 1.245361328125, 1.3472900390625, 1.44921875, 1.5511474609375, 1.653076171875, 1.7550048828125, 1.85693359375, 1.9588623046875, 2.060791015625, 2.1627197265625, 2.2646484375, 2.3665771484375, 2.468505859375, 2.5704345703125, 2.67236328125, 2.7742919921875, 2.876220703125, 2.9781494140625, 3.080078125]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 1.0, 2.0, 6.0, 10.0, 18.0, 25.0, 33.0, 41.0, 64.0, 79.0, 73.0, 73.0, 94.0, 82.0, 81.0, 70.0, 62.0, 56.0, 49.0, 20.0, 21.0, 19.0, 9.0, 9.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8583984375, -1.7736968994140625, -1.688995361328125, -1.6042938232421875, -1.51959228515625, -1.4348907470703125, -1.350189208984375, -1.2654876708984375, -1.1807861328125, -1.0960845947265625, -1.011383056640625, -0.9266815185546875, -0.84197998046875, -0.7572784423828125, -0.672576904296875, -0.5878753662109375, -0.503173828125, -0.4184722900390625, -0.333770751953125, -0.2490692138671875, -0.16436767578125, -0.0796661376953125, 0.005035400390625, 0.0897369384765625, 0.1744384765625, 0.2591400146484375, 0.343841552734375, 0.4285430908203125, 0.51324462890625, 0.5979461669921875, 0.682647705078125, 0.7673492431640625, 0.85205078125, 0.9367523193359375, 1.021453857421875, 1.1061553955078125, 1.19085693359375, 1.2755584716796875, 1.360260009765625, 1.4449615478515625, 1.5296630859375, 1.6143646240234375, 1.699066162109375, 1.7837677001953125, 1.86846923828125, 1.9531707763671875, 2.037872314453125, 2.1225738525390625, 2.207275390625, 2.2919769287109375, 2.376678466796875, 2.4613800048828125, 2.54608154296875, 2.6307830810546875, 2.715484619140625, 2.8001861572265625, 2.8848876953125, 2.9695892333984375, 3.054290771484375, 3.1389923095703125, 3.22369384765625, 3.3083953857421875, 3.393096923828125, 3.4777984619140625, 3.5625]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 3.0, 0.0, 7.0, 5.0, 2.0, 2.0, 14.0, 21.0, 40.0, 46.0, 61.0, 84.0, 152.0, 354.0, 702.0, 1668.0, 5037.0, 18781.0, 97384.0, 2401785.0, 1568599.0, 77848.0, 15071.0, 4152.0, 1312.0, 558.0, 237.0, 132.0, 54.0, 56.0, 43.0, 21.0, 15.0, 10.0, 8.0, 10.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.17578125, -4.049041748046875, -3.92230224609375, -3.795562744140625, -3.6688232421875, -3.542083740234375, -3.41534423828125, -3.288604736328125, -3.161865234375, -3.035125732421875, -2.90838623046875, -2.781646728515625, -2.6549072265625, -2.528167724609375, -2.40142822265625, -2.274688720703125, -2.14794921875, -2.021209716796875, -1.89447021484375, -1.767730712890625, -1.6409912109375, -1.514251708984375, -1.38751220703125, -1.260772705078125, -1.134033203125, -1.007293701171875, -0.88055419921875, -0.753814697265625, -0.6270751953125, -0.500335693359375, -0.37359619140625, -0.246856689453125, -0.1201171875, 0.006622314453125, 0.13336181640625, 0.260101318359375, 0.3868408203125, 0.513580322265625, 0.64031982421875, 0.767059326171875, 0.893798828125, 1.020538330078125, 1.14727783203125, 1.274017333984375, 1.4007568359375, 1.527496337890625, 1.65423583984375, 1.780975341796875, 1.90771484375, 2.034454345703125, 2.16119384765625, 2.287933349609375, 2.4146728515625, 2.541412353515625, 2.66815185546875, 2.794891357421875, 2.921630859375, 3.048370361328125, 3.17510986328125, 3.301849365234375, 3.4285888671875, 3.555328369140625, 3.68206787109375, 3.808807373046875, 3.935546875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 5.0, 3.0, 8.0, 7.0, 8.0, 17.0, 15.0, 24.0, 27.0, 35.0, 29.0, 57.0, 71.0, 122.0, 193.0, 383.0, 747.0, 884.0, 604.0, 296.0, 181.0, 105.0, 74.0, 46.0, 34.0, 21.0, 11.0, 14.0, 9.0, 10.0, 8.0, 10.0, 8.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.80078125, -3.67926025390625, -3.5577392578125, -3.43621826171875, -3.314697265625, -3.19317626953125, -3.0716552734375, -2.95013427734375, -2.82861328125, -2.70709228515625, -2.5855712890625, -2.46405029296875, -2.342529296875, -2.22100830078125, -2.0994873046875, -1.97796630859375, -1.8564453125, -1.73492431640625, -1.6134033203125, -1.49188232421875, -1.370361328125, -1.24884033203125, -1.1273193359375, -1.00579833984375, -0.88427734375, -0.76275634765625, -0.6412353515625, -0.51971435546875, -0.398193359375, -0.27667236328125, -0.1551513671875, -0.03363037109375, 0.087890625, 0.20941162109375, 0.3309326171875, 0.45245361328125, 0.573974609375, 0.69549560546875, 0.8170166015625, 0.93853759765625, 1.06005859375, 1.18157958984375, 1.3031005859375, 1.42462158203125, 1.546142578125, 1.66766357421875, 1.7891845703125, 1.91070556640625, 2.0322265625, 2.15374755859375, 2.2752685546875, 2.39678955078125, 2.518310546875, 2.63983154296875, 2.7613525390625, 2.88287353515625, 3.00439453125, 3.12591552734375, 3.2474365234375, 3.36895751953125, 3.490478515625, 3.61199951171875, 3.7335205078125, 3.85504150390625, 3.9765625]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 1.0, 1.0, 3.0, 16.0, 10.0, 83.0, 210.0, 421.0, 193.0, 45.0, 10.0, 5.0, 6.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-65.47093963623047, -62.90272903442383, -60.33451843261719, -57.76630783081055, -55.198097229003906, -52.6298828125, -50.06167221069336, -47.49346160888672, -44.92525100708008, -42.35704040527344, -39.7888298034668, -37.220619201660156, -34.65240478515625, -32.084197998046875, -29.51598358154297, -26.947772979736328, -24.379562377929688, -21.811351776123047, -19.243141174316406, -16.674928665161133, -14.106718063354492, -11.538507461547852, -8.970295906066895, -6.4020843505859375, -3.833873748779297, -1.265662670135498, 1.3025484085083008, 3.8707594871520996, 6.438970565795898, 9.007181167602539, 11.575392723083496, 14.143604278564453, 16.711822509765625, 19.280033111572266, 21.848243713378906, 24.41645622253418, 26.98466682434082, 29.55287742614746, 32.121089935302734, 34.689300537109375, 37.257511138916016, 39.825721740722656, 42.3939323425293, 44.96214294433594, 47.530357360839844, 50.09856414794922, 52.666778564453125, 55.234989166259766, 57.803199768066406, 60.37141036987305, 62.93962097167969, 65.5078353881836, 68.07604217529297, 70.64425659179688, 73.21246337890625, 75.78067779541016, 78.34889221191406, 80.91710662841797, 83.48531341552734, 86.05352783203125, 88.62173461914062, 91.18994903564453, 93.7581558227539, 96.32637023925781, 98.89457702636719]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 3.0, 4.0, 12.0, 7.0, 12.0, 7.0, 13.0, 28.0, 25.0, 29.0, 37.0, 45.0, 63.0, 46.0, 70.0, 77.0, 74.0, 74.0, 62.0, 47.0, 48.0, 36.0, 53.0, 35.0, 24.0, 24.0, 14.0, 7.0, 12.0, 7.0, 0.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-28.236312866210938, -27.450984954833984, -26.665658950805664, -25.88033103942871, -25.09500503540039, -24.309677124023438, -23.524349212646484, -22.73902130126953, -21.95369529724121, -21.168367385864258, -20.383041381835938, -19.597713470458984, -18.81238555908203, -18.02705955505371, -17.241731643676758, -16.456405639648438, -15.671077728271484, -14.885750770568848, -14.100423812866211, -13.315095901489258, -12.529768943786621, -11.744441986083984, -10.959114074707031, -10.173787117004395, -9.388460159301758, -8.603133201599121, -7.817805767059326, -7.032478332519531, -6.2471513748168945, -5.461824417114258, -4.676496982574463, -3.891169548034668, -3.1058425903320312, -2.3205153942108154, -1.5351881980895996, -0.7498610019683838, 0.03546619415283203, 0.8207933902740479, 1.6061205863952637, 2.3914480209350586, 3.1767749786376953, 3.962102174758911, 4.747429370880127, 5.532756805419922, 6.318083763122559, 7.103410720825195, 7.88873815536499, 8.674065589904785, 9.459392547607422, 10.244719505310059, 11.030046463012695, 11.815374374389648, 12.600701332092285, 13.386028289794922, 14.171356201171875, 14.956683158874512, 15.742010116577148, 16.5273380279541, 17.312664031982422, 18.097991943359375, 18.883319854736328, 19.66864585876465, 20.4539737701416, 21.239299774169922, 22.024627685546875]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 8.0, 11.0, 8.0, 17.0, 23.0, 50.0, 81.0, 99.0, 151.0, 263.0, 395.0, 665.0, 1058.0, 1820.0, 3093.0, 5578.0, 10025.0, 18237.0, 33781.0, 60851.0, 106037.0, 166878.0, 205778.0, 174088.0, 112675.0, 65368.0, 36389.0, 19873.0, 10855.0, 6056.0, 3311.0, 2006.0, 1131.0, 673.0, 407.0, 307.0, 172.0, 119.0, 76.0, 52.0, 28.0, 21.0, 20.0, 10.0, 4.0, 4.0, 5.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3271484375, -1.282257080078125, -1.23736572265625, -1.192474365234375, -1.1475830078125, -1.102691650390625, -1.05780029296875, -1.012908935546875, -0.968017578125, -0.923126220703125, -0.87823486328125, -0.833343505859375, -0.7884521484375, -0.743560791015625, -0.69866943359375, -0.653778076171875, -0.60888671875, -0.563995361328125, -0.51910400390625, -0.474212646484375, -0.4293212890625, -0.384429931640625, -0.33953857421875, -0.294647216796875, -0.249755859375, -0.204864501953125, -0.15997314453125, -0.115081787109375, -0.0701904296875, -0.025299072265625, 0.01959228515625, 0.064483642578125, 0.109375, 0.154266357421875, 0.19915771484375, 0.244049072265625, 0.2889404296875, 0.333831787109375, 0.37872314453125, 0.423614501953125, 0.468505859375, 0.513397216796875, 0.55828857421875, 0.603179931640625, 0.6480712890625, 0.692962646484375, 0.73785400390625, 0.782745361328125, 0.82763671875, 0.872528076171875, 0.91741943359375, 0.962310791015625, 1.0072021484375, 1.052093505859375, 1.09698486328125, 1.141876220703125, 1.186767578125, 1.231658935546875, 1.27655029296875, 1.321441650390625, 1.3663330078125, 1.411224365234375, 1.45611572265625, 1.501007080078125, 1.5458984375]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 2.0, 8.0, 11.0, 10.0, 16.0, 21.0, 20.0, 33.0, 34.0, 35.0, 45.0, 62.0, 59.0, 61.0, 60.0, 58.0, 57.0, 77.0, 57.0, 57.0, 40.0, 28.0, 37.0, 26.0, 27.0, 21.0, 11.0, 7.0, 7.0, 7.0, 3.0, 2.0, 6.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0859375, -2.0252532958984375, -1.964569091796875, -1.9038848876953125, -1.84320068359375, -1.7825164794921875, -1.721832275390625, -1.6611480712890625, -1.6004638671875, -1.5397796630859375, -1.479095458984375, -1.4184112548828125, -1.35772705078125, -1.2970428466796875, -1.236358642578125, -1.1756744384765625, -1.114990234375, -1.0543060302734375, -0.993621826171875, -0.9329376220703125, -0.87225341796875, -0.8115692138671875, -0.750885009765625, -0.6902008056640625, -0.6295166015625, -0.5688323974609375, -0.508148193359375, -0.4474639892578125, -0.38677978515625, -0.3260955810546875, -0.265411376953125, -0.2047271728515625, -0.14404296875, -0.0833587646484375, -0.022674560546875, 0.0380096435546875, 0.09869384765625, 0.1593780517578125, 0.220062255859375, 0.2807464599609375, 0.3414306640625, 0.4021148681640625, 0.462799072265625, 0.5234832763671875, 0.58416748046875, 0.6448516845703125, 0.705535888671875, 0.7662200927734375, 0.826904296875, 0.8875885009765625, 0.948272705078125, 1.0089569091796875, 1.06964111328125, 1.1303253173828125, 1.191009521484375, 1.2516937255859375, 1.3123779296875, 1.3730621337890625, 1.433746337890625, 1.4944305419921875, 1.55511474609375, 1.6157989501953125, 1.676483154296875, 1.7371673583984375, 1.7978515625]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 5.0, 7.0, 8.0, 13.0, 14.0, 14.0, 21.0, 29.0, 32.0, 44.0, 37.0, 74.0, 104.0, 135.0, 201.0, 250.0, 333.0, 487.0, 783.0, 1289.0, 2466.0, 5885.0, 20297.0, 107826.0, 585992.0, 257162.0, 46044.0, 10413.0, 3626.0, 1636.0, 937.0, 666.0, 436.0, 321.0, 219.0, 178.0, 143.0, 105.0, 76.0, 45.0, 39.0, 38.0, 30.0, 28.0, 19.0, 7.0, 7.0, 10.0, 7.0, 6.0, 2.0, 4.0, 3.0, 3.0, 3.0], "bins": [-3.8515625, -3.737457275390625, -3.62335205078125, -3.509246826171875, -3.3951416015625, -3.281036376953125, -3.16693115234375, -3.052825927734375, -2.938720703125, -2.824615478515625, -2.71051025390625, -2.596405029296875, -2.4822998046875, -2.368194580078125, -2.25408935546875, -2.139984130859375, -2.02587890625, -1.911773681640625, -1.79766845703125, -1.683563232421875, -1.5694580078125, -1.455352783203125, -1.34124755859375, -1.227142333984375, -1.113037109375, -0.998931884765625, -0.88482666015625, -0.770721435546875, -0.6566162109375, -0.542510986328125, -0.42840576171875, -0.314300537109375, -0.2001953125, -0.086090087890625, 0.02801513671875, 0.142120361328125, 0.2562255859375, 0.370330810546875, 0.48443603515625, 0.598541259765625, 0.712646484375, 0.826751708984375, 0.94085693359375, 1.054962158203125, 1.1690673828125, 1.283172607421875, 1.39727783203125, 1.511383056640625, 1.62548828125, 1.739593505859375, 1.85369873046875, 1.967803955078125, 2.0819091796875, 2.196014404296875, 2.31011962890625, 2.424224853515625, 2.538330078125, 2.652435302734375, 2.76654052734375, 2.880645751953125, 2.9947509765625, 3.108856201171875, 3.22296142578125, 3.337066650390625, 3.451171875]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 2.0, 2.0, 6.0, 5.0, 5.0, 8.0, 6.0, 11.0, 13.0, 14.0, 10.0, 15.0, 10.0, 20.0, 24.0, 33.0, 35.0, 30.0, 38.0, 34.0, 47.0, 42.0, 33.0, 51.0, 47.0, 47.0, 31.0, 37.0, 30.0, 29.0, 40.0, 28.0, 24.0, 22.0, 27.0, 19.0, 15.0, 25.0, 12.0, 8.0, 12.0, 13.0, 8.0, 7.0, 5.0, 9.0, 5.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-4.62109375, -4.4722900390625, -4.323486328125, -4.1746826171875, -4.02587890625, -3.8770751953125, -3.728271484375, -3.5794677734375, -3.4306640625, -3.2818603515625, -3.133056640625, -2.9842529296875, -2.83544921875, -2.6866455078125, -2.537841796875, -2.3890380859375, -2.240234375, -2.0914306640625, -1.942626953125, -1.7938232421875, -1.64501953125, -1.4962158203125, -1.347412109375, -1.1986083984375, -1.0498046875, -0.9010009765625, -0.752197265625, -0.6033935546875, -0.45458984375, -0.3057861328125, -0.156982421875, -0.0081787109375, 0.140625, 0.2894287109375, 0.438232421875, 0.5870361328125, 0.73583984375, 0.8846435546875, 1.033447265625, 1.1822509765625, 1.3310546875, 1.4798583984375, 1.628662109375, 1.7774658203125, 1.92626953125, 2.0750732421875, 2.223876953125, 2.3726806640625, 2.521484375, 2.6702880859375, 2.819091796875, 2.9678955078125, 3.11669921875, 3.2655029296875, 3.414306640625, 3.5631103515625, 3.7119140625, 3.8607177734375, 4.009521484375, 4.1583251953125, 4.30712890625, 4.4559326171875, 4.604736328125, 4.7535400390625, 4.90234375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 4.0, 3.0, 7.0, 9.0, 16.0, 19.0, 36.0, 44.0, 72.0, 102.0, 162.0, 301.0, 504.0, 1032.0, 2718.0, 7937.0, 33962.0, 386796.0, 562862.0, 38068.0, 8566.0, 2891.0, 1139.0, 547.0, 273.0, 182.0, 95.0, 73.0, 39.0, 30.0, 18.0, 12.0, 14.0, 9.0, 6.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.052734375, -1.9905853271484375, -1.928436279296875, -1.8662872314453125, -1.80413818359375, -1.7419891357421875, -1.679840087890625, -1.6176910400390625, -1.5555419921875, -1.4933929443359375, -1.431243896484375, -1.3690948486328125, -1.30694580078125, -1.2447967529296875, -1.182647705078125, -1.1204986572265625, -1.058349609375, -0.9962005615234375, -0.934051513671875, -0.8719024658203125, -0.80975341796875, -0.7476043701171875, -0.685455322265625, -0.6233062744140625, -0.5611572265625, -0.4990081787109375, -0.436859130859375, -0.3747100830078125, -0.31256103515625, -0.2504119873046875, -0.188262939453125, -0.1261138916015625, -0.06396484375, -0.0018157958984375, 0.060333251953125, 0.1224822998046875, 0.18463134765625, 0.2467803955078125, 0.308929443359375, 0.3710784912109375, 0.4332275390625, 0.4953765869140625, 0.557525634765625, 0.6196746826171875, 0.68182373046875, 0.7439727783203125, 0.806121826171875, 0.8682708740234375, 0.930419921875, 0.9925689697265625, 1.054718017578125, 1.1168670654296875, 1.17901611328125, 1.2411651611328125, 1.303314208984375, 1.3654632568359375, 1.4276123046875, 1.4897613525390625, 1.551910400390625, 1.6140594482421875, 1.67620849609375, 1.7383575439453125, 1.800506591796875, 1.8626556396484375, 1.9248046875]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 4.0, 5.0, 9.0, 13.0, 20.0, 28.0, 49.0, 87.0, 141.0, 201.0, 185.0, 102.0, 52.0, 34.0, 18.0, 17.0, 10.0, 6.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009059906005859375, -0.000879146158695221, -0.0008523017168045044, -0.0008254572749137878, -0.0007986128330230713, -0.0007717683911323547, -0.0007449239492416382, -0.0007180795073509216, -0.0006912350654602051, -0.0006643906235694885, -0.000637546181678772, -0.0006107017397880554, -0.0005838572978973389, -0.0005570128560066223, -0.0005301684141159058, -0.0005033239722251892, -0.00047647953033447266, -0.0004496350884437561, -0.00042279064655303955, -0.000395946204662323, -0.00036910176277160645, -0.0003422573208808899, -0.00031541287899017334, -0.0002885684370994568, -0.00026172399520874023, -0.00023487955331802368, -0.00020803511142730713, -0.00018119066953659058, -0.00015434622764587402, -0.00012750178575515747, -0.00010065734386444092, -7.381290197372437e-05, -4.696846008300781e-05, -2.012401819229126e-05, 6.720423698425293e-06, 3.3564865589141846e-05, 6.04093074798584e-05, 8.725374937057495e-05, 0.0001140981912612915, 0.00014094263315200806, 0.0001677870750427246, 0.00019463151693344116, 0.00022147595882415771, 0.00024832040071487427, 0.0002751648426055908, 0.0003020092844963074, 0.0003288537263870239, 0.0003556981682777405, 0.00038254261016845703, 0.0004093870520591736, 0.00043623149394989014, 0.0004630759358406067, 0.0004899203777313232, 0.0005167648196220398, 0.0005436092615127563, 0.0005704537034034729, 0.0005972981452941895, 0.000624142587184906, 0.0006509870290756226, 0.0006778314709663391, 0.0007046759128570557, 0.0007315203547477722, 0.0007583647966384888, 0.0007852092385292053, 0.0008120536804199219]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 2.0, 5.0, 3.0, 11.0, 7.0, 19.0, 21.0, 34.0, 70.0, 137.0, 331.0, 846.0, 2549.0, 11150.0, 156759.0, 829894.0, 38573.0, 5536.0, 1560.0, 555.0, 225.0, 111.0, 62.0, 45.0, 17.0, 14.0, 8.0, 3.0, 8.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.927734375, -2.8470458984375, -2.766357421875, -2.6856689453125, -2.60498046875, -2.5242919921875, -2.443603515625, -2.3629150390625, -2.2822265625, -2.2015380859375, -2.120849609375, -2.0401611328125, -1.95947265625, -1.8787841796875, -1.798095703125, -1.7174072265625, -1.63671875, -1.5560302734375, -1.475341796875, -1.3946533203125, -1.31396484375, -1.2332763671875, -1.152587890625, -1.0718994140625, -0.9912109375, -0.9105224609375, -0.829833984375, -0.7491455078125, -0.66845703125, -0.5877685546875, -0.507080078125, -0.4263916015625, -0.345703125, -0.2650146484375, -0.184326171875, -0.1036376953125, -0.02294921875, 0.0577392578125, 0.138427734375, 0.2191162109375, 0.2998046875, 0.3804931640625, 0.461181640625, 0.5418701171875, 0.62255859375, 0.7032470703125, 0.783935546875, 0.8646240234375, 0.9453125, 1.0260009765625, 1.106689453125, 1.1873779296875, 1.26806640625, 1.3487548828125, 1.429443359375, 1.5101318359375, 1.5908203125, 1.6715087890625, 1.752197265625, 1.8328857421875, 1.91357421875, 1.9942626953125, 2.074951171875, 2.1556396484375, 2.236328125]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 3.0, 3.0, 6.0, 9.0, 4.0, 11.0, 9.0, 26.0, 38.0, 35.0, 46.0, 46.0, 69.0, 66.0, 70.0, 73.0, 80.0, 55.0, 71.0, 45.0, 51.0, 46.0, 27.0, 23.0, 27.0, 14.0, 10.0, 6.0, 9.0, 3.0, 4.0, 2.0, 2.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-1.4990234375, -1.454345703125, -1.40966796875, -1.364990234375, -1.3203125, -1.275634765625, -1.23095703125, -1.186279296875, -1.1416015625, -1.096923828125, -1.05224609375, -1.007568359375, -0.962890625, -0.918212890625, -0.87353515625, -0.828857421875, -0.7841796875, -0.739501953125, -0.69482421875, -0.650146484375, -0.60546875, -0.560791015625, -0.51611328125, -0.471435546875, -0.4267578125, -0.382080078125, -0.33740234375, -0.292724609375, -0.248046875, -0.203369140625, -0.15869140625, -0.114013671875, -0.0693359375, -0.024658203125, 0.02001953125, 0.064697265625, 0.109375, 0.154052734375, 0.19873046875, 0.243408203125, 0.2880859375, 0.332763671875, 0.37744140625, 0.422119140625, 0.466796875, 0.511474609375, 0.55615234375, 0.600830078125, 0.6455078125, 0.690185546875, 0.73486328125, 0.779541015625, 0.82421875, 0.868896484375, 0.91357421875, 0.958251953125, 1.0029296875, 1.047607421875, 1.09228515625, 1.136962890625, 1.181640625, 1.226318359375, 1.27099609375, 1.315673828125, 1.3603515625]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 4.0, 23.0, 36.0, 91.0, 151.0, 281.0, 187.0, 103.0, 52.0, 31.0, 11.0, 13.0, 3.0, 3.0, 3.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.37413787841797, -26.028717041015625, -24.68329620361328, -23.337875366210938, -21.99245262145996, -20.647031784057617, -19.301610946655273, -17.956188201904297, -16.610767364501953, -15.26534652709961, -13.91992473602295, -12.574503898620605, -11.229082107543945, -9.883661270141602, -8.538240432739258, -7.192818641662598, -5.84739875793457, -4.501977443695068, -3.1565563678741455, -1.8111352920532227, -0.4657139778137207, 0.8797073364257812, 2.225128173828125, 3.570549964904785, 4.915970802307129, 6.261392116546631, 7.606813430786133, 8.952234268188477, 10.29765510559082, 11.64307689666748, 12.988497734069824, 14.333919525146484, 15.679340362548828, 17.024761199951172, 18.370182037353516, 19.71560287475586, 21.061025619506836, 22.40644645690918, 23.751867294311523, 25.0972900390625, 26.442710876464844, 27.788131713867188, 29.13355255126953, 30.478973388671875, 31.82439613342285, 33.16981506347656, 34.515235900878906, 35.860660552978516, 37.206077575683594, 38.55149841308594, 39.89691925048828, 41.242340087890625, 42.58776092529297, 43.93318176269531, 45.278602600097656, 46.624027252197266, 47.96944808959961, 49.31486892700195, 50.6602897644043, 52.00571060180664, 53.351131439208984, 54.696556091308594, 56.04197692871094, 57.38739776611328, 58.732818603515625]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 3.0, 7.0, 7.0, 10.0, 13.0, 15.0, 16.0, 22.0, 24.0, 23.0, 34.0, 28.0, 32.0, 31.0, 45.0, 56.0, 65.0, 66.0, 80.0, 67.0, 46.0, 24.0, 39.0, 23.0, 25.0, 22.0, 37.0, 27.0, 17.0, 19.0, 16.0, 15.0, 5.0, 7.0, 2.0, 7.0, 6.0, 7.0, 4.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-21.268718719482422, -20.515544891357422, -19.762371063232422, -19.009197235107422, -18.256023406982422, -17.502849578857422, -16.749675750732422, -15.996500968933105, -15.243327140808105, -14.490153312683105, -13.736979484558105, -12.983804702758789, -12.230630874633789, -11.477457046508789, -10.724283218383789, -9.971109390258789, -9.217935562133789, -8.464761734008789, -7.711587905883789, -6.958413600921631, -6.205239772796631, -5.452065944671631, -4.698891639709473, -3.9457178115844727, -3.1925439834594727, -2.4393701553344727, -1.6861960887908936, -0.9330220222473145, -0.17984819412231445, 0.5733256340026855, 1.3264999389648438, 2.0796737670898438, 2.832845687866211, 3.586019515991211, 4.339193344116211, 5.092367649078369, 5.845541477203369, 6.598715305328369, 7.351889610290527, 8.105063438415527, 8.858237266540527, 9.611411094665527, 10.364584922790527, 11.117759704589844, 11.870933532714844, 12.624107360839844, 13.377281188964844, 14.130455017089844, 14.883628845214844, 15.636802673339844, 16.389976501464844, 17.143150329589844, 17.896324157714844, 18.649497985839844, 19.402671813964844, 20.155845642089844, 20.909019470214844, 21.662193298339844, 22.415367126464844, 23.168540954589844, 23.921714782714844, 24.674888610839844, 25.428062438964844, 26.181236267089844, 26.934412002563477]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 4.0, 8.0, 13.0, 14.0, 22.0, 20.0, 38.0, 52.0, 81.0, 124.0, 176.0, 272.0, 435.0, 663.0, 1122.0, 1961.0, 3559.0, 7086.0, 15544.0, 39973.0, 125823.0, 496443.0, 1642719.0, 1349933.0, 356965.0, 92690.0, 31066.0, 13233.0, 6367.0, 3274.0, 1774.0, 1004.0, 646.0, 404.0, 260.0, 163.0, 123.0, 68.0, 42.0, 43.0, 33.0, 15.0, 12.0, 5.0, 3.0, 6.0, 4.0, 3.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.2236328125, -1.1852569580078125, -1.146881103515625, -1.1085052490234375, -1.07012939453125, -1.0317535400390625, -0.993377685546875, -0.9550018310546875, -0.9166259765625, -0.8782501220703125, -0.839874267578125, -0.8014984130859375, -0.76312255859375, -0.7247467041015625, -0.686370849609375, -0.6479949951171875, -0.609619140625, -0.5712432861328125, -0.532867431640625, -0.4944915771484375, -0.45611572265625, -0.4177398681640625, -0.379364013671875, -0.3409881591796875, -0.3026123046875, -0.2642364501953125, -0.225860595703125, -0.1874847412109375, -0.14910888671875, -0.1107330322265625, -0.072357177734375, -0.0339813232421875, 0.00439453125, 0.0427703857421875, 0.081146240234375, 0.1195220947265625, 0.15789794921875, 0.1962738037109375, 0.234649658203125, 0.2730255126953125, 0.3114013671875, 0.3497772216796875, 0.388153076171875, 0.4265289306640625, 0.46490478515625, 0.5032806396484375, 0.541656494140625, 0.5800323486328125, 0.618408203125, 0.6567840576171875, 0.695159912109375, 0.7335357666015625, 0.77191162109375, 0.8102874755859375, 0.848663330078125, 0.8870391845703125, 0.9254150390625, 0.9637908935546875, 1.002166748046875, 1.0405426025390625, 1.07891845703125, 1.1172943115234375, 1.155670166015625, 1.1940460205078125, 1.232421875]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 5.0, 7.0, 3.0, 12.0, 12.0, 15.0, 23.0, 39.0, 38.0, 28.0, 51.0, 50.0, 55.0, 71.0, 66.0, 60.0, 67.0, 64.0, 49.0, 57.0, 40.0, 33.0, 36.0, 32.0, 21.0, 21.0, 15.0, 6.0, 10.0, 7.0, 6.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.29296875, -2.2333831787109375, -2.173797607421875, -2.1142120361328125, -2.05462646484375, -1.9950408935546875, -1.935455322265625, -1.8758697509765625, -1.8162841796875, -1.7566986083984375, -1.697113037109375, -1.6375274658203125, -1.57794189453125, -1.5183563232421875, -1.458770751953125, -1.3991851806640625, -1.339599609375, -1.2800140380859375, -1.220428466796875, -1.1608428955078125, -1.10125732421875, -1.0416717529296875, -0.982086181640625, -0.9225006103515625, -0.8629150390625, -0.8033294677734375, -0.743743896484375, -0.6841583251953125, -0.62457275390625, -0.5649871826171875, -0.505401611328125, -0.4458160400390625, -0.38623046875, -0.3266448974609375, -0.267059326171875, -0.2074737548828125, -0.14788818359375, -0.0883026123046875, -0.028717041015625, 0.0308685302734375, 0.0904541015625, 0.1500396728515625, 0.209625244140625, 0.2692108154296875, 0.32879638671875, 0.3883819580078125, 0.447967529296875, 0.5075531005859375, 0.567138671875, 0.6267242431640625, 0.686309814453125, 0.7458953857421875, 0.80548095703125, 0.8650665283203125, 0.924652099609375, 0.9842376708984375, 1.0438232421875, 1.1034088134765625, 1.162994384765625, 1.2225799560546875, 1.28216552734375, 1.3417510986328125, 1.401336669921875, 1.4609222412109375, 1.5205078125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 6.0, 3.0, 14.0, 16.0, 31.0, 38.0, 44.0, 87.0, 134.0, 213.0, 346.0, 622.0, 1740.0, 6343.0, 34430.0, 464755.0, 3514857.0, 147514.0, 17341.0, 3634.0, 1112.0, 443.0, 247.0, 123.0, 86.0, 37.0, 26.0, 17.0, 13.0, 4.0, 4.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.505859375, -3.382049560546875, -3.25823974609375, -3.134429931640625, -3.0106201171875, -2.886810302734375, -2.76300048828125, -2.639190673828125, -2.515380859375, -2.391571044921875, -2.26776123046875, -2.143951416015625, -2.0201416015625, -1.896331787109375, -1.77252197265625, -1.648712158203125, -1.52490234375, -1.401092529296875, -1.27728271484375, -1.153472900390625, -1.0296630859375, -0.905853271484375, -0.78204345703125, -0.658233642578125, -0.534423828125, -0.410614013671875, -0.28680419921875, -0.162994384765625, -0.0391845703125, 0.084625244140625, 0.20843505859375, 0.332244873046875, 0.4560546875, 0.579864501953125, 0.70367431640625, 0.827484130859375, 0.9512939453125, 1.075103759765625, 1.19891357421875, 1.322723388671875, 1.446533203125, 1.570343017578125, 1.69415283203125, 1.817962646484375, 1.9417724609375, 2.065582275390625, 2.18939208984375, 2.313201904296875, 2.43701171875, 2.560821533203125, 2.68463134765625, 2.808441162109375, 2.9322509765625, 3.056060791015625, 3.17987060546875, 3.303680419921875, 3.427490234375, 3.551300048828125, 3.67510986328125, 3.798919677734375, 3.9227294921875, 4.046539306640625, 4.17034912109375, 4.294158935546875, 4.41796875]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 7.0, 11.0, 11.0, 15.0, 23.0, 30.0, 40.0, 73.0, 119.0, 180.0, 357.0, 717.0, 880.0, 707.0, 363.0, 190.0, 117.0, 75.0, 53.0, 31.0, 18.0, 19.0, 12.0, 9.0, 5.0, 4.0, 4.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-5.1875, -5.0653076171875, -4.943115234375, -4.8209228515625, -4.69873046875, -4.5765380859375, -4.454345703125, -4.3321533203125, -4.2099609375, -4.0877685546875, -3.965576171875, -3.8433837890625, -3.72119140625, -3.5989990234375, -3.476806640625, -3.3546142578125, -3.232421875, -3.1102294921875, -2.988037109375, -2.8658447265625, -2.74365234375, -2.6214599609375, -2.499267578125, -2.3770751953125, -2.2548828125, -2.1326904296875, -2.010498046875, -1.8883056640625, -1.76611328125, -1.6439208984375, -1.521728515625, -1.3995361328125, -1.27734375, -1.1551513671875, -1.032958984375, -0.9107666015625, -0.78857421875, -0.6663818359375, -0.544189453125, -0.4219970703125, -0.2998046875, -0.1776123046875, -0.055419921875, 0.0667724609375, 0.18896484375, 0.3111572265625, 0.433349609375, 0.5555419921875, 0.677734375, 0.7999267578125, 0.922119140625, 1.0443115234375, 1.16650390625, 1.2886962890625, 1.410888671875, 1.5330810546875, 1.6552734375, 1.7774658203125, 1.899658203125, 2.0218505859375, 2.14404296875, 2.2662353515625, 2.388427734375, 2.5106201171875, 2.6328125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 5.0, 5.0, 4.0, 12.0, 20.0, 59.0, 142.0, 233.0, 254.0, 161.0, 62.0, 26.0, 13.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-59.0982780456543, -57.72682571411133, -56.355369567871094, -54.983917236328125, -53.612464904785156, -52.24101257324219, -50.86956024169922, -49.498104095458984, -48.126651763916016, -46.75519943237305, -45.38374328613281, -44.012290954589844, -42.640838623046875, -41.269386291503906, -39.89793395996094, -38.5264778137207, -37.155025482177734, -35.783573150634766, -34.41211700439453, -33.04066467285156, -31.669212341308594, -30.297760009765625, -28.926305770874023, -27.554851531982422, -26.183399200439453, -24.811946868896484, -23.440492630004883, -22.06903839111328, -20.697586059570312, -19.326133728027344, -17.954679489135742, -16.58322525024414, -15.211771011352539, -13.840317726135254, -12.468864440917969, -11.097411155700684, -9.725957870483398, -8.354504585266113, -6.983051300048828, -5.611598014831543, -4.240144729614258, -2.8686914443969727, -1.4972381591796875, -0.12578487396240234, 1.2456684112548828, 2.617121696472168, 3.988574981689453, 5.360028266906738, 6.731481552124023, 8.102934837341309, 9.474388122558594, 10.845841407775879, 12.217294692993164, 13.58874797821045, 14.960201263427734, 16.331653594970703, 17.703107833862305, 19.074562072753906, 20.446014404296875, 21.817466735839844, 23.188920974731445, 24.560375213623047, 25.931827545166016, 27.303279876708984, 28.674734115600586]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 8.0, 5.0, 3.0, 9.0, 8.0, 9.0, 7.0, 13.0, 11.0, 22.0, 19.0, 17.0, 27.0, 35.0, 30.0, 44.0, 47.0, 50.0, 40.0, 39.0, 47.0, 44.0, 46.0, 44.0, 60.0, 38.0, 26.0, 36.0, 25.0, 21.0, 32.0, 27.0, 13.0, 17.0, 16.0, 13.0, 16.0, 15.0, 4.0, 4.0, 7.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.035188674926758, -11.605635643005371, -11.1760835647583, -10.746530532836914, -10.316977500915527, -9.88742446899414, -9.45787239074707, -9.028319358825684, -8.598766326904297, -8.16921329498291, -7.739660739898682, -7.310108184814453, -6.880555152893066, -6.451002597808838, -6.021450042724609, -5.591897010803223, -5.162344932556152, -4.732792377471924, -4.303239345550537, -3.8736867904663086, -3.444133996963501, -3.0145812034606934, -2.585028648376465, -2.1554758548736572, -1.7259230613708496, -1.296370267868042, -0.8668175935745239, -0.43726491928100586, -0.007712125778198242, 0.4218406677246094, 0.8513932228088379, 1.2809460163116455, 1.7104988098144531, 2.1400516033172607, 2.5696043968200684, 2.999156951904297, 3.4287097454071045, 3.858262538909912, 4.287815093994141, 4.717368125915527, 5.146920680999756, 5.576473236083984, 6.006026268005371, 6.4355788230896, 6.865131378173828, 7.294684410095215, 7.724236965179443, 8.153789520263672, 8.583342552185059, 9.012895584106445, 9.442447662353516, 9.872000694274902, 10.301553726196289, 10.73110580444336, 11.160658836364746, 11.590211868286133, 12.019763946533203, 12.44931697845459, 12.87886905670166, 13.308422088623047, 13.737975120544434, 14.16752815246582, 14.59708023071289, 15.026633262634277, 15.456186294555664]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 5.0, 3.0, 4.0, 7.0, 8.0, 12.0, 22.0, 25.0, 33.0, 54.0, 90.0, 146.0, 215.0, 312.0, 552.0, 805.0, 1317.0, 2109.0, 3441.0, 6125.0, 10296.0, 18030.0, 32044.0, 58017.0, 101711.0, 168511.0, 215548.0, 177148.0, 108511.0, 61885.0, 34884.0, 19380.0, 11103.0, 6375.0, 3828.0, 2240.0, 1390.0, 850.0, 561.0, 322.0, 214.0, 148.0, 92.0, 52.0, 42.0, 30.0, 19.0, 21.0, 13.0, 4.0, 4.0, 3.0, 2.0, 2.0, 0.0, 4.0, 2.0, 0.0, 0.0, 2.0], "bins": [-1.53515625, -1.48492431640625, -1.4346923828125, -1.38446044921875, -1.334228515625, -1.28399658203125, -1.2337646484375, -1.18353271484375, -1.13330078125, -1.08306884765625, -1.0328369140625, -0.98260498046875, -0.932373046875, -0.88214111328125, -0.8319091796875, -0.78167724609375, -0.7314453125, -0.68121337890625, -0.6309814453125, -0.58074951171875, -0.530517578125, -0.48028564453125, -0.4300537109375, -0.37982177734375, -0.32958984375, -0.27935791015625, -0.2291259765625, -0.17889404296875, -0.128662109375, -0.07843017578125, -0.0281982421875, 0.02203369140625, 0.072265625, 0.12249755859375, 0.1727294921875, 0.22296142578125, 0.273193359375, 0.32342529296875, 0.3736572265625, 0.42388916015625, 0.47412109375, 0.52435302734375, 0.5745849609375, 0.62481689453125, 0.675048828125, 0.72528076171875, 0.7755126953125, 0.82574462890625, 0.8759765625, 0.92620849609375, 0.9764404296875, 1.02667236328125, 1.076904296875, 1.12713623046875, 1.1773681640625, 1.22760009765625, 1.27783203125, 1.32806396484375, 1.3782958984375, 1.42852783203125, 1.478759765625, 1.52899169921875, 1.5792236328125, 1.62945556640625, 1.6796875]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 5.0, 9.0, 7.0, 11.0, 12.0, 15.0, 22.0, 36.0, 35.0, 47.0, 50.0, 57.0, 74.0, 66.0, 70.0, 59.0, 66.0, 56.0, 60.0, 52.0, 47.0, 36.0, 27.0, 17.0, 21.0, 15.0, 15.0, 9.0, 4.0, 4.0, 2.0, 3.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.470703125, -2.40576171875, -2.3408203125, -2.27587890625, -2.2109375, -2.14599609375, -2.0810546875, -2.01611328125, -1.951171875, -1.88623046875, -1.8212890625, -1.75634765625, -1.69140625, -1.62646484375, -1.5615234375, -1.49658203125, -1.431640625, -1.36669921875, -1.3017578125, -1.23681640625, -1.171875, -1.10693359375, -1.0419921875, -0.97705078125, -0.912109375, -0.84716796875, -0.7822265625, -0.71728515625, -0.65234375, -0.58740234375, -0.5224609375, -0.45751953125, -0.392578125, -0.32763671875, -0.2626953125, -0.19775390625, -0.1328125, -0.06787109375, -0.0029296875, 0.06201171875, 0.126953125, 0.19189453125, 0.2568359375, 0.32177734375, 0.38671875, 0.45166015625, 0.5166015625, 0.58154296875, 0.646484375, 0.71142578125, 0.7763671875, 0.84130859375, 0.90625, 0.97119140625, 1.0361328125, 1.10107421875, 1.166015625, 1.23095703125, 1.2958984375, 1.36083984375, 1.42578125, 1.49072265625, 1.5556640625, 1.62060546875, 1.685546875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 7.0, 3.0, 4.0, 12.0, 16.0, 16.0, 18.0, 38.0, 48.0, 58.0, 95.0, 132.0, 183.0, 240.0, 388.0, 573.0, 1007.0, 1980.0, 4929.0, 17764.0, 98356.0, 639472.0, 231680.0, 37019.0, 8272.0, 2757.0, 1257.0, 718.0, 455.0, 279.0, 217.0, 161.0, 125.0, 69.0, 54.0, 33.0, 31.0, 20.0, 20.0, 14.0, 12.0, 9.0, 3.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.28515625, -4.135986328125, -3.98681640625, -3.837646484375, -3.6884765625, -3.539306640625, -3.39013671875, -3.240966796875, -3.091796875, -2.942626953125, -2.79345703125, -2.644287109375, -2.4951171875, -2.345947265625, -2.19677734375, -2.047607421875, -1.8984375, -1.749267578125, -1.60009765625, -1.450927734375, -1.3017578125, -1.152587890625, -1.00341796875, -0.854248046875, -0.705078125, -0.555908203125, -0.40673828125, -0.257568359375, -0.1083984375, 0.040771484375, 0.18994140625, 0.339111328125, 0.48828125, 0.637451171875, 0.78662109375, 0.935791015625, 1.0849609375, 1.234130859375, 1.38330078125, 1.532470703125, 1.681640625, 1.830810546875, 1.97998046875, 2.129150390625, 2.2783203125, 2.427490234375, 2.57666015625, 2.725830078125, 2.875, 3.024169921875, 3.17333984375, 3.322509765625, 3.4716796875, 3.620849609375, 3.77001953125, 3.919189453125, 4.068359375, 4.217529296875, 4.36669921875, 4.515869140625, 4.6650390625, 4.814208984375, 4.96337890625, 5.112548828125, 5.26171875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 5.0, 8.0, 5.0, 7.0, 14.0, 9.0, 5.0, 8.0, 22.0, 13.0, 18.0, 25.0, 20.0, 22.0, 32.0, 51.0, 47.0, 38.0, 35.0, 42.0, 45.0, 32.0, 39.0, 32.0, 42.0, 31.0, 23.0, 50.0, 35.0, 31.0, 25.0, 25.0, 23.0, 18.0, 13.0, 15.0, 18.0, 13.0, 14.0, 17.0, 2.0, 9.0, 5.0, 3.0, 5.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 2.0, 2.0], "bins": [-5.04296875, -4.89208984375, -4.7412109375, -4.59033203125, -4.439453125, -4.28857421875, -4.1376953125, -3.98681640625, -3.8359375, -3.68505859375, -3.5341796875, -3.38330078125, -3.232421875, -3.08154296875, -2.9306640625, -2.77978515625, -2.62890625, -2.47802734375, -2.3271484375, -2.17626953125, -2.025390625, -1.87451171875, -1.7236328125, -1.57275390625, -1.421875, -1.27099609375, -1.1201171875, -0.96923828125, -0.818359375, -0.66748046875, -0.5166015625, -0.36572265625, -0.21484375, -0.06396484375, 0.0869140625, 0.23779296875, 0.388671875, 0.53955078125, 0.6904296875, 0.84130859375, 0.9921875, 1.14306640625, 1.2939453125, 1.44482421875, 1.595703125, 1.74658203125, 1.8974609375, 2.04833984375, 2.19921875, 2.35009765625, 2.5009765625, 2.65185546875, 2.802734375, 2.95361328125, 3.1044921875, 3.25537109375, 3.40625, 3.55712890625, 3.7080078125, 3.85888671875, 4.009765625, 4.16064453125, 4.3115234375, 4.46240234375, 4.61328125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0, 15.0, 28.0, 45.0, 64.0, 137.0, 185.0, 329.0, 816.0, 1828.0, 5328.0, 19691.0, 105970.0, 665194.0, 201741.0, 34240.0, 8279.0, 2542.0, 1063.0, 453.0, 248.0, 132.0, 76.0, 52.0, 31.0, 15.0, 15.0, 7.0, 6.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.568359375, -1.511688232421875, -1.45501708984375, -1.398345947265625, -1.3416748046875, -1.285003662109375, -1.22833251953125, -1.171661376953125, -1.114990234375, -1.058319091796875, -1.00164794921875, -0.944976806640625, -0.8883056640625, -0.831634521484375, -0.77496337890625, -0.718292236328125, -0.66162109375, -0.604949951171875, -0.54827880859375, -0.491607666015625, -0.4349365234375, -0.378265380859375, -0.32159423828125, -0.264923095703125, -0.208251953125, -0.151580810546875, -0.09490966796875, -0.038238525390625, 0.0184326171875, 0.075103759765625, 0.13177490234375, 0.188446044921875, 0.2451171875, 0.301788330078125, 0.35845947265625, 0.415130615234375, 0.4718017578125, 0.528472900390625, 0.58514404296875, 0.641815185546875, 0.698486328125, 0.755157470703125, 0.81182861328125, 0.868499755859375, 0.9251708984375, 0.981842041015625, 1.03851318359375, 1.095184326171875, 1.15185546875, 1.208526611328125, 1.26519775390625, 1.321868896484375, 1.3785400390625, 1.435211181640625, 1.49188232421875, 1.548553466796875, 1.605224609375, 1.661895751953125, 1.71856689453125, 1.775238037109375, 1.8319091796875, 1.888580322265625, 1.94525146484375, 2.001922607421875, 2.05859375]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 5.0, 5.0, 3.0, 4.0, 11.0, 11.0, 14.0, 17.0, 32.0, 35.0, 49.0, 86.0, 106.0, 138.0, 134.0, 108.0, 72.0, 39.0, 32.0, 26.0, 23.0, 12.0, 13.0, 8.0, 2.0, 3.0, 3.0, 5.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006704330444335938, -0.00064801424741745, -0.0006255954504013062, -0.0006031766533851624, -0.0005807578563690186, -0.0005583390593528748, -0.000535920262336731, -0.0005135014653205872, -0.0004910826683044434, -0.00046866387128829956, -0.00044624507427215576, -0.00042382627725601196, -0.00040140748023986816, -0.00037898868322372437, -0.00035656988620758057, -0.00033415108919143677, -0.00031173229217529297, -0.00028931349515914917, -0.00026689469814300537, -0.00024447590112686157, -0.00022205710411071777, -0.00019963830709457397, -0.00017721951007843018, -0.00015480071306228638, -0.00013238191604614258, -0.00010996311902999878, -8.754432201385498e-05, -6.512552499771118e-05, -4.270672798156738e-05, -2.0287930965423584e-05, 2.130866050720215e-06, 2.4549663066864014e-05, 4.696846008300781e-05, 6.938725709915161e-05, 9.180605411529541e-05, 0.00011422485113143921, 0.000136643648147583, 0.0001590624451637268, 0.0001814812421798706, 0.0002039000391960144, 0.0002263188362121582, 0.000248737633228302, 0.0002711564302444458, 0.0002935752272605896, 0.0003159940242767334, 0.0003384128212928772, 0.000360831618309021, 0.0003832504153251648, 0.0004056692123413086, 0.0004280880093574524, 0.0004505068063735962, 0.00047292560338974, 0.0004953444004058838, 0.0005177631974220276, 0.0005401819944381714, 0.0005626007914543152, 0.000585019588470459, 0.0006074383854866028, 0.0006298571825027466, 0.0006522759795188904, 0.0006746947765350342, 0.000697113573551178, 0.0007195323705673218, 0.0007419511675834656, 0.0007643699645996094]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 9.0, 7.0, 13.0, 13.0, 20.0, 26.0, 51.0, 92.0, 155.0, 238.0, 470.0, 1006.0, 2764.0, 10866.0, 78457.0, 723787.0, 200733.0, 22479.0, 4413.0, 1510.0, 643.0, 344.0, 205.0, 104.0, 57.0, 39.0, 24.0, 13.0, 8.0, 5.0, 6.0, 1.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4921875, -1.42645263671875, -1.3607177734375, -1.29498291015625, -1.229248046875, -1.16351318359375, -1.0977783203125, -1.03204345703125, -0.96630859375, -0.90057373046875, -0.8348388671875, -0.76910400390625, -0.703369140625, -0.63763427734375, -0.5718994140625, -0.50616455078125, -0.4404296875, -0.37469482421875, -0.3089599609375, -0.24322509765625, -0.177490234375, -0.11175537109375, -0.0460205078125, 0.01971435546875, 0.08544921875, 0.15118408203125, 0.2169189453125, 0.28265380859375, 0.348388671875, 0.41412353515625, 0.4798583984375, 0.54559326171875, 0.611328125, 0.67706298828125, 0.7427978515625, 0.80853271484375, 0.874267578125, 0.94000244140625, 1.0057373046875, 1.07147216796875, 1.13720703125, 1.20294189453125, 1.2686767578125, 1.33441162109375, 1.400146484375, 1.46588134765625, 1.5316162109375, 1.59735107421875, 1.6630859375, 1.72882080078125, 1.7945556640625, 1.86029052734375, 1.926025390625, 1.99176025390625, 2.0574951171875, 2.12322998046875, 2.18896484375, 2.25469970703125, 2.3204345703125, 2.38616943359375, 2.451904296875, 2.51763916015625, 2.5833740234375, 2.64910888671875, 2.71484375]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 4.0, 5.0, 10.0, 11.0, 15.0, 17.0, 14.0, 23.0, 29.0, 23.0, 39.0, 41.0, 48.0, 62.0, 60.0, 60.0, 73.0, 61.0, 69.0, 45.0, 52.0, 42.0, 36.0, 30.0, 21.0, 21.0, 19.0, 21.0, 9.0, 12.0, 4.0, 8.0, 6.0, 9.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.369140625, -1.32806396484375, -1.2869873046875, -1.24591064453125, -1.204833984375, -1.16375732421875, -1.1226806640625, -1.08160400390625, -1.04052734375, -0.99945068359375, -0.9583740234375, -0.91729736328125, -0.876220703125, -0.83514404296875, -0.7940673828125, -0.75299072265625, -0.7119140625, -0.67083740234375, -0.6297607421875, -0.58868408203125, -0.547607421875, -0.50653076171875, -0.4654541015625, -0.42437744140625, -0.38330078125, -0.34222412109375, -0.3011474609375, -0.26007080078125, -0.218994140625, -0.17791748046875, -0.1368408203125, -0.09576416015625, -0.0546875, -0.01361083984375, 0.0274658203125, 0.06854248046875, 0.109619140625, 0.15069580078125, 0.1917724609375, 0.23284912109375, 0.27392578125, 0.31500244140625, 0.3560791015625, 0.39715576171875, 0.438232421875, 0.47930908203125, 0.5203857421875, 0.56146240234375, 0.6025390625, 0.64361572265625, 0.6846923828125, 0.72576904296875, 0.766845703125, 0.80792236328125, 0.8489990234375, 0.89007568359375, 0.93115234375, 0.97222900390625, 1.0133056640625, 1.05438232421875, 1.095458984375, 1.13653564453125, 1.1776123046875, 1.21868896484375, 1.259765625]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 3.0, 2.0, 8.0, 1.0, 6.0, 7.0, 13.0, 39.0, 75.0, 123.0, 262.0, 228.0, 119.0, 59.0, 29.0, 15.0, 9.0, 6.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.262107849121094, -42.78217315673828, -41.30223846435547, -39.82229995727539, -38.34236526489258, -36.862430572509766, -35.38249588012695, -33.902557373046875, -32.42262268066406, -30.94268798828125, -29.462751388549805, -27.982816696166992, -26.502880096435547, -25.022945404052734, -23.543010711669922, -22.063074111938477, -20.583139419555664, -19.10320472717285, -17.623268127441406, -16.143333435058594, -14.663396835327148, -13.183462142944336, -11.703526496887207, -10.223590850830078, -8.74365520477295, -7.26371955871582, -5.783783912658691, -4.303848743438721, -2.823913097381592, -1.343977451324463, 0.1359577178955078, 1.6158933639526367, 3.0958290100097656, 4.5757646560668945, 6.055700302124023, 7.535635471343994, 9.015571594238281, 10.495506286621094, 11.975441932678223, 13.455377578735352, 14.93531322479248, 16.41524887084961, 17.895183563232422, 19.375120162963867, 20.85505485534668, 22.334991455078125, 23.814926147460938, 25.29486083984375, 26.774797439575195, 28.254732131958008, 29.734668731689453, 31.214603424072266, 32.69453811645508, 34.174476623535156, 35.65441131591797, 37.13434600830078, 38.614280700683594, 40.094215393066406, 41.57415008544922, 43.0540885925293, 44.53402328491211, 46.01395797729492, 47.493892669677734, 48.97383117675781, 50.453765869140625]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 4.0, 4.0, 8.0, 7.0, 3.0, 9.0, 9.0, 18.0, 9.0, 14.0, 14.0, 28.0, 21.0, 23.0, 37.0, 24.0, 41.0, 54.0, 59.0, 64.0, 106.0, 67.0, 51.0, 40.0, 50.0, 40.0, 29.0, 28.0, 23.0, 19.0, 16.0, 7.0, 15.0, 7.0, 11.0, 10.0, 5.0, 7.0, 5.0, 7.0, 2.0, 7.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-26.978622436523438, -26.194673538208008, -25.410724639892578, -24.62677574157715, -23.84282684326172, -23.058879852294922, -22.27492904663086, -21.490982055664062, -20.707033157348633, -19.923084259033203, -19.139135360717773, -18.355186462402344, -17.571237564086914, -16.787288665771484, -16.003341674804688, -15.219392776489258, -14.435442924499512, -13.651494026184082, -12.867545127868652, -12.083597183227539, -11.29964828491211, -10.51569938659668, -9.73175048828125, -8.94780158996582, -8.16385269165039, -7.379903793334961, -6.5959553718566895, -5.81200647354126, -5.028058052062988, -4.244109153747559, -3.460160255432129, -2.6762118339538574, -1.892263412475586, -1.1083147525787354, -0.3243659734725952, 0.4595828056335449, 1.2435314655303955, 2.027480125427246, 2.811429023742676, 3.5953774452209473, 4.379326343536377, 5.163275241851807, 5.947223663330078, 6.731172561645508, 7.5151214599609375, 8.299070358276367, 9.083019256591797, 9.86696720123291, 10.65091609954834, 11.43486499786377, 12.2188138961792, 13.002761840820312, 13.786710739135742, 14.570659637451172, 15.354608535766602, 16.13855743408203, 16.92250633239746, 17.70645523071289, 18.49040412902832, 19.27435302734375, 20.05830192565918, 20.84225082397461, 21.626197814941406, 22.410146713256836, 23.194095611572266]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 5.0, 5.0, 14.0, 16.0, 24.0, 33.0, 38.0, 56.0, 69.0, 91.0, 135.0, 169.0, 283.0, 450.0, 699.0, 1206.0, 2734.0, 6945.0, 24376.0, 178558.0, 2022481.0, 1775185.0, 145659.0, 22401.0, 6770.0, 2631.0, 1197.0, 696.0, 384.0, 278.0, 184.0, 140.0, 90.0, 56.0, 53.0, 46.0, 28.0, 21.0, 18.0, 19.0, 10.0, 4.0, 4.0, 4.0, 2.0, 2.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.095703125, -2.023345947265625, -1.95098876953125, -1.878631591796875, -1.8062744140625, -1.733917236328125, -1.66156005859375, -1.589202880859375, -1.516845703125, -1.444488525390625, -1.37213134765625, -1.299774169921875, -1.2274169921875, -1.155059814453125, -1.08270263671875, -1.010345458984375, -0.93798828125, -0.865631103515625, -0.79327392578125, -0.720916748046875, -0.6485595703125, -0.576202392578125, -0.50384521484375, -0.431488037109375, -0.359130859375, -0.286773681640625, -0.21441650390625, -0.142059326171875, -0.0697021484375, 0.002655029296875, 0.07501220703125, 0.147369384765625, 0.2197265625, 0.292083740234375, 0.36444091796875, 0.436798095703125, 0.5091552734375, 0.581512451171875, 0.65386962890625, 0.726226806640625, 0.798583984375, 0.870941162109375, 0.94329833984375, 1.015655517578125, 1.0880126953125, 1.160369873046875, 1.23272705078125, 1.305084228515625, 1.37744140625, 1.449798583984375, 1.52215576171875, 1.594512939453125, 1.6668701171875, 1.739227294921875, 1.81158447265625, 1.883941650390625, 1.956298828125, 2.028656005859375, 2.10101318359375, 2.173370361328125, 2.2457275390625, 2.318084716796875, 2.39044189453125, 2.462799072265625, 2.53515625]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 7.0, 6.0, 3.0, 19.0, 9.0, 20.0, 28.0, 31.0, 35.0, 52.0, 43.0, 50.0, 61.0, 70.0, 55.0, 65.0, 63.0, 57.0, 65.0, 50.0, 55.0, 39.0, 27.0, 26.0, 21.0, 16.0, 15.0, 7.0, 6.0, 5.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.43359375, -2.374664306640625, -2.31573486328125, -2.256805419921875, -2.1978759765625, -2.138946533203125, -2.08001708984375, -2.021087646484375, -1.962158203125, -1.903228759765625, -1.84429931640625, -1.785369873046875, -1.7264404296875, -1.667510986328125, -1.60858154296875, -1.549652099609375, -1.49072265625, -1.431793212890625, -1.37286376953125, -1.313934326171875, -1.2550048828125, -1.196075439453125, -1.13714599609375, -1.078216552734375, -1.019287109375, -0.960357666015625, -0.90142822265625, -0.842498779296875, -0.7835693359375, -0.724639892578125, -0.66571044921875, -0.606781005859375, -0.5478515625, -0.488922119140625, -0.42999267578125, -0.371063232421875, -0.3121337890625, -0.253204345703125, -0.19427490234375, -0.135345458984375, -0.076416015625, -0.017486572265625, 0.04144287109375, 0.100372314453125, 0.1593017578125, 0.218231201171875, 0.27716064453125, 0.336090087890625, 0.39501953125, 0.453948974609375, 0.51287841796875, 0.571807861328125, 0.6307373046875, 0.689666748046875, 0.74859619140625, 0.807525634765625, 0.866455078125, 0.925384521484375, 0.98431396484375, 1.043243408203125, 1.1021728515625, 1.161102294921875, 1.22003173828125, 1.278961181640625, 1.337890625]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 1.0, 3.0, 4.0, 4.0, 5.0, 5.0, 12.0, 18.0, 20.0, 30.0, 53.0, 94.0, 161.0, 325.0, 606.0, 1616.0, 4915.0, 26247.0, 628854.0, 3441113.0, 76511.0, 9207.0, 2395.0, 1004.0, 482.0, 224.0, 147.0, 94.0, 39.0, 37.0, 25.0, 10.0, 11.0, 6.0, 6.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.41796875, -4.2667236328125, -4.115478515625, -3.9642333984375, -3.81298828125, -3.6617431640625, -3.510498046875, -3.3592529296875, -3.2080078125, -3.0567626953125, -2.905517578125, -2.7542724609375, -2.60302734375, -2.4517822265625, -2.300537109375, -2.1492919921875, -1.998046875, -1.8468017578125, -1.695556640625, -1.5443115234375, -1.39306640625, -1.2418212890625, -1.090576171875, -0.9393310546875, -0.7880859375, -0.6368408203125, -0.485595703125, -0.3343505859375, -0.18310546875, -0.0318603515625, 0.119384765625, 0.2706298828125, 0.421875, 0.5731201171875, 0.724365234375, 0.8756103515625, 1.02685546875, 1.1781005859375, 1.329345703125, 1.4805908203125, 1.6318359375, 1.7830810546875, 1.934326171875, 2.0855712890625, 2.23681640625, 2.3880615234375, 2.539306640625, 2.6905517578125, 2.841796875, 2.9930419921875, 3.144287109375, 3.2955322265625, 3.44677734375, 3.5980224609375, 3.749267578125, 3.9005126953125, 4.0517578125, 4.2030029296875, 4.354248046875, 4.5054931640625, 4.65673828125, 4.8079833984375, 4.959228515625, 5.1104736328125, 5.26171875]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 4.0, 4.0, 12.0, 6.0, 15.0, 14.0, 23.0, 37.0, 62.0, 95.0, 198.0, 391.0, 738.0, 984.0, 631.0, 371.0, 193.0, 115.0, 63.0, 32.0, 28.0, 17.0, 16.0, 9.0, 6.0, 3.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.833984375, -3.703460693359375, -3.57293701171875, -3.442413330078125, -3.3118896484375, -3.181365966796875, -3.05084228515625, -2.920318603515625, -2.789794921875, -2.659271240234375, -2.52874755859375, -2.398223876953125, -2.2677001953125, -2.137176513671875, -2.00665283203125, -1.876129150390625, -1.74560546875, -1.615081787109375, -1.48455810546875, -1.354034423828125, -1.2235107421875, -1.092987060546875, -0.96246337890625, -0.831939697265625, -0.701416015625, -0.570892333984375, -0.44036865234375, -0.309844970703125, -0.1793212890625, -0.048797607421875, 0.08172607421875, 0.212249755859375, 0.3427734375, 0.473297119140625, 0.60382080078125, 0.734344482421875, 0.8648681640625, 0.995391845703125, 1.12591552734375, 1.256439208984375, 1.386962890625, 1.517486572265625, 1.64801025390625, 1.778533935546875, 1.9090576171875, 2.039581298828125, 2.17010498046875, 2.300628662109375, 2.43115234375, 2.561676025390625, 2.69219970703125, 2.822723388671875, 2.9532470703125, 3.083770751953125, 3.21429443359375, 3.344818115234375, 3.475341796875, 3.605865478515625, 3.73638916015625, 3.866912841796875, 3.9974365234375, 4.127960205078125, 4.25848388671875, 4.389007568359375, 4.51953125]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 3.0, 10.0, 31.0, 247.0, 514.0, 173.0, 25.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-83.35200500488281, -80.06864166259766, -76.7852783203125, -73.50191497802734, -70.21855163574219, -66.9351806640625, -63.651817321777344, -60.36845397949219, -57.08509063720703, -53.801727294921875, -50.51836395263672, -47.2349967956543, -43.95163345336914, -40.668270111083984, -37.38490295410156, -34.101539611816406, -30.81817626953125, -27.534812927246094, -24.251447677612305, -20.968082427978516, -17.68471908569336, -14.401355743408203, -11.117990493774414, -7.834625244140625, -4.551261901855469, -1.267897605895996, 2.0154666900634766, 5.298830986022949, 8.582195281982422, 11.865558624267578, 15.148923873901367, 18.432289123535156, 21.71564483642578, 24.999008178710938, 28.282373428344727, 31.565738677978516, 34.84910202026367, 38.13246536254883, 41.41583251953125, 44.699195861816406, 47.98255920410156, 51.26592254638672, 54.549285888671875, 57.8326530456543, 61.11601638793945, 64.39938354492188, 67.68274688720703, 70.96611022949219, 74.24947357177734, 77.5328369140625, 80.81620025634766, 84.09956359863281, 87.3829345703125, 90.66629028320312, 93.94966125488281, 97.23302459716797, 100.51638793945312, 103.79975128173828, 107.08311462402344, 110.3664779663086, 113.64984130859375, 116.93321228027344, 120.2165756225586, 123.49993896484375, 126.7833023071289]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 2.0, 6.0, 5.0, 8.0, 11.0, 14.0, 20.0, 17.0, 30.0, 34.0, 39.0, 50.0, 40.0, 50.0, 45.0, 62.0, 59.0, 62.0, 56.0, 40.0, 43.0, 55.0, 40.0, 37.0, 22.0, 24.0, 23.0, 30.0, 20.0, 16.0, 15.0, 6.0, 10.0, 6.0, 7.0, 1.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.677006721496582, -12.104056358337402, -11.531105995178223, -10.958155632019043, -10.38520622253418, -9.812255859375, -9.23930549621582, -8.66635513305664, -8.093404769897461, -7.520454406738281, -6.947504043579102, -6.37455415725708, -5.8016037940979, -5.228653430938721, -4.655703544616699, -4.0827531814575195, -3.50980281829834, -2.93685245513916, -2.3639023303985596, -1.7909520864486694, -1.2180018424987793, -0.6450514793395996, -0.07210135459899902, 0.5008487701416016, 1.0737991333007812, 1.6467493772506714, 2.2196996212005615, 2.792649745941162, 3.365600109100342, 3.9385504722595215, 4.511500358581543, 5.084450721740723, 5.657402038574219, 6.230352401733398, 6.803302764892578, 7.3762526512146, 7.949203014373779, 8.522153854370117, 9.09510326385498, 9.66805362701416, 10.24100399017334, 10.81395435333252, 11.3869047164917, 11.959855079650879, 12.532804489135742, 13.105754852294922, 13.678705215454102, 14.251655578613281, 14.824605941772461, 15.39755630493164, 15.97050666809082, 16.54345703125, 17.11640739440918, 17.68935775756836, 18.26230812072754, 18.83525848388672, 19.408206939697266, 19.981157302856445, 20.554107666015625, 21.127058029174805, 21.700008392333984, 22.272958755493164, 22.845909118652344, 23.41885757446289, 23.991809844970703]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 3.0, 5.0, 8.0, 5.0, 15.0, 29.0, 43.0, 43.0, 94.0, 147.0, 206.0, 328.0, 522.0, 859.0, 1290.0, 2249.0, 3648.0, 6487.0, 11528.0, 20628.0, 37799.0, 68321.0, 120034.0, 183437.0, 207708.0, 161578.0, 97780.0, 54644.0, 30107.0, 16477.0, 9326.0, 5269.0, 3082.0, 1898.0, 1067.0, 671.0, 451.0, 268.0, 164.0, 110.0, 78.0, 51.0, 36.0, 20.0, 12.0, 11.0, 10.0, 8.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.51171875, -1.464935302734375, -1.41815185546875, -1.371368408203125, -1.3245849609375, -1.277801513671875, -1.23101806640625, -1.184234619140625, -1.137451171875, -1.090667724609375, -1.04388427734375, -0.997100830078125, -0.9503173828125, -0.903533935546875, -0.85675048828125, -0.809967041015625, -0.76318359375, -0.716400146484375, -0.66961669921875, -0.622833251953125, -0.5760498046875, -0.529266357421875, -0.48248291015625, -0.435699462890625, -0.388916015625, -0.342132568359375, -0.29534912109375, -0.248565673828125, -0.2017822265625, -0.154998779296875, -0.10821533203125, -0.061431884765625, -0.0146484375, 0.032135009765625, 0.07891845703125, 0.125701904296875, 0.1724853515625, 0.219268798828125, 0.26605224609375, 0.312835693359375, 0.359619140625, 0.406402587890625, 0.45318603515625, 0.499969482421875, 0.5467529296875, 0.593536376953125, 0.64031982421875, 0.687103271484375, 0.73388671875, 0.780670166015625, 0.82745361328125, 0.874237060546875, 0.9210205078125, 0.967803955078125, 1.01458740234375, 1.061370849609375, 1.108154296875, 1.154937744140625, 1.20172119140625, 1.248504638671875, 1.2952880859375, 1.342071533203125, 1.38885498046875, 1.435638427734375, 1.482421875]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 4.0, 4.0, 12.0, 14.0, 9.0, 17.0, 17.0, 27.0, 34.0, 33.0, 30.0, 38.0, 52.0, 47.0, 57.0, 67.0, 68.0, 54.0, 57.0, 57.0, 57.0, 38.0, 36.0, 39.0, 35.0, 25.0, 15.0, 17.0, 14.0, 10.0, 11.0, 6.0, 5.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.421875, -2.36370849609375, -2.3055419921875, -2.24737548828125, -2.189208984375, -2.13104248046875, -2.0728759765625, -2.01470947265625, -1.95654296875, -1.89837646484375, -1.8402099609375, -1.78204345703125, -1.723876953125, -1.66571044921875, -1.6075439453125, -1.54937744140625, -1.4912109375, -1.43304443359375, -1.3748779296875, -1.31671142578125, -1.258544921875, -1.20037841796875, -1.1422119140625, -1.08404541015625, -1.02587890625, -0.96771240234375, -0.9095458984375, -0.85137939453125, -0.793212890625, -0.73504638671875, -0.6768798828125, -0.61871337890625, -0.560546875, -0.50238037109375, -0.4442138671875, -0.38604736328125, -0.327880859375, -0.26971435546875, -0.2115478515625, -0.15338134765625, -0.09521484375, -0.03704833984375, 0.0211181640625, 0.07928466796875, 0.137451171875, 0.19561767578125, 0.2537841796875, 0.31195068359375, 0.3701171875, 0.42828369140625, 0.4864501953125, 0.54461669921875, 0.602783203125, 0.66094970703125, 0.7191162109375, 0.77728271484375, 0.83544921875, 0.89361572265625, 0.9517822265625, 1.00994873046875, 1.068115234375, 1.12628173828125, 1.1844482421875, 1.24261474609375, 1.30078125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 4.0, 5.0, 3.0, 7.0, 8.0, 16.0, 20.0, 15.0, 31.0, 68.0, 88.0, 131.0, 189.0, 341.0, 723.0, 1640.0, 4811.0, 17828.0, 82849.0, 527980.0, 321855.0, 67972.0, 14876.0, 4081.0, 1500.0, 642.0, 312.0, 194.0, 106.0, 79.0, 54.0, 29.0, 33.0, 21.0, 14.0, 7.0, 5.0, 9.0, 3.0, 0.0, 3.0, 2.0, 2.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.984375, -3.8555908203125, -3.726806640625, -3.5980224609375, -3.46923828125, -3.3404541015625, -3.211669921875, -3.0828857421875, -2.9541015625, -2.8253173828125, -2.696533203125, -2.5677490234375, -2.43896484375, -2.3101806640625, -2.181396484375, -2.0526123046875, -1.923828125, -1.7950439453125, -1.666259765625, -1.5374755859375, -1.40869140625, -1.2799072265625, -1.151123046875, -1.0223388671875, -0.8935546875, -0.7647705078125, -0.635986328125, -0.5072021484375, -0.37841796875, -0.2496337890625, -0.120849609375, 0.0079345703125, 0.13671875, 0.2655029296875, 0.394287109375, 0.5230712890625, 0.65185546875, 0.7806396484375, 0.909423828125, 1.0382080078125, 1.1669921875, 1.2957763671875, 1.424560546875, 1.5533447265625, 1.68212890625, 1.8109130859375, 1.939697265625, 2.0684814453125, 2.197265625, 2.3260498046875, 2.454833984375, 2.5836181640625, 2.71240234375, 2.8411865234375, 2.969970703125, 3.0987548828125, 3.2275390625, 3.3563232421875, 3.485107421875, 3.6138916015625, 3.74267578125, 3.8714599609375, 4.000244140625, 4.1290283203125, 4.2578125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [4.0, 1.0, 3.0, 1.0, 3.0, 3.0, 2.0, 3.0, 10.0, 9.0, 5.0, 13.0, 6.0, 8.0, 14.0, 17.0, 20.0, 25.0, 19.0, 23.0, 35.0, 30.0, 49.0, 38.0, 40.0, 63.0, 54.0, 43.0, 63.0, 52.0, 33.0, 40.0, 47.0, 38.0, 34.0, 29.0, 23.0, 23.0, 21.0, 16.0, 10.0, 16.0, 8.0, 9.0, 5.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4765625, -4.31292724609375, -4.1492919921875, -3.98565673828125, -3.822021484375, -3.65838623046875, -3.4947509765625, -3.33111572265625, -3.16748046875, -3.00384521484375, -2.8402099609375, -2.67657470703125, -2.512939453125, -2.34930419921875, -2.1856689453125, -2.02203369140625, -1.8583984375, -1.69476318359375, -1.5311279296875, -1.36749267578125, -1.203857421875, -1.04022216796875, -0.8765869140625, -0.71295166015625, -0.54931640625, -0.38568115234375, -0.2220458984375, -0.05841064453125, 0.105224609375, 0.26885986328125, 0.4324951171875, 0.59613037109375, 0.759765625, 0.92340087890625, 1.0870361328125, 1.25067138671875, 1.414306640625, 1.57794189453125, 1.7415771484375, 1.90521240234375, 2.06884765625, 2.23248291015625, 2.3961181640625, 2.55975341796875, 2.723388671875, 2.88702392578125, 3.0506591796875, 3.21429443359375, 3.3779296875, 3.54156494140625, 3.7052001953125, 3.86883544921875, 4.032470703125, 4.19610595703125, 4.3597412109375, 4.52337646484375, 4.68701171875, 4.85064697265625, 5.0142822265625, 5.17791748046875, 5.341552734375, 5.50518798828125, 5.6688232421875, 5.83245849609375, 5.99609375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 0.0, 9.0, 10.0, 8.0, 16.0, 33.0, 31.0, 48.0, 79.0, 128.0, 208.0, 341.0, 653.0, 1245.0, 2564.0, 5597.0, 14284.0, 41368.0, 137729.0, 537246.0, 209565.0, 62237.0, 20599.0, 7702.0, 3320.0, 1606.0, 815.0, 455.0, 250.0, 147.0, 92.0, 62.0, 38.0, 19.0, 22.0, 11.0, 9.0, 7.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.1474609375, -1.111236572265625, -1.07501220703125, -1.038787841796875, -1.0025634765625, -0.966339111328125, -0.93011474609375, -0.893890380859375, -0.857666015625, -0.821441650390625, -0.78521728515625, -0.748992919921875, -0.7127685546875, -0.676544189453125, -0.64031982421875, -0.604095458984375, -0.56787109375, -0.531646728515625, -0.49542236328125, -0.459197998046875, -0.4229736328125, -0.386749267578125, -0.35052490234375, -0.314300537109375, -0.278076171875, -0.241851806640625, -0.20562744140625, -0.169403076171875, -0.1331787109375, -0.096954345703125, -0.06072998046875, -0.024505615234375, 0.01171875, 0.047943115234375, 0.08416748046875, 0.120391845703125, 0.1566162109375, 0.192840576171875, 0.22906494140625, 0.265289306640625, 0.301513671875, 0.337738037109375, 0.37396240234375, 0.410186767578125, 0.4464111328125, 0.482635498046875, 0.51885986328125, 0.555084228515625, 0.59130859375, 0.627532958984375, 0.66375732421875, 0.699981689453125, 0.7362060546875, 0.772430419921875, 0.80865478515625, 0.844879150390625, 0.881103515625, 0.917327880859375, 0.95355224609375, 0.989776611328125, 1.0260009765625, 1.062225341796875, 1.09844970703125, 1.134674072265625, 1.1708984375]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 10.0, 7.0, 13.0, 23.0, 36.0, 34.0, 62.0, 69.0, 75.0, 88.0, 90.0, 115.0, 101.0, 72.0, 48.0, 35.0, 28.0, 19.0, 20.0, 8.0, 7.0, 7.0, 4.0, 6.0, 4.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003783702850341797, -0.0003606565296649933, -0.0003429427742958069, -0.0003252290189266205, -0.0003075152635574341, -0.0002898015081882477, -0.0002720877528190613, -0.0002543739974498749, -0.00023666024208068848, -0.00021894648671150208, -0.00020123273134231567, -0.00018351897597312927, -0.00016580522060394287, -0.00014809146523475647, -0.00013037770986557007, -0.00011266395449638367, -9.495019912719727e-05, -7.723644375801086e-05, -5.952268838882446e-05, -4.180893301963806e-05, -2.409517765045166e-05, -6.381422281265259e-06, 1.1332333087921143e-05, 2.9046088457107544e-05, 4.6759843826293945e-05, 6.447359919548035e-05, 8.218735456466675e-05, 9.990110993385315e-05, 0.00011761486530303955, 0.00013532862067222595, 0.00015304237604141235, 0.00017075613141059875, 0.00018846988677978516, 0.00020618364214897156, 0.00022389739751815796, 0.00024161115288734436, 0.00025932490825653076, 0.00027703866362571716, 0.00029475241899490356, 0.00031246617436408997, 0.00033017992973327637, 0.00034789368510246277, 0.00036560744047164917, 0.00038332119584083557, 0.00040103495121002197, 0.0004187487065792084, 0.0004364624619483948, 0.0004541762173175812, 0.0004718899726867676, 0.000489603728055954, 0.0005073174834251404, 0.0005250312387943268, 0.0005427449941635132, 0.0005604587495326996, 0.000578172504901886, 0.0005958862602710724, 0.0006136000156402588, 0.0006313137710094452, 0.0006490275263786316, 0.000666741281747818, 0.0006844550371170044, 0.0007021687924861908, 0.0007198825478553772, 0.0007375963032245636, 0.00075531005859375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 4.0, 3.0, 2.0, 6.0, 10.0, 13.0, 16.0, 21.0, 31.0, 44.0, 70.0, 121.0, 180.0, 313.0, 584.0, 1199.0, 2660.0, 7109.0, 24300.0, 100923.0, 550486.0, 271026.0, 64061.0, 16129.0, 5101.0, 1989.0, 961.0, 491.0, 231.0, 165.0, 103.0, 66.0, 41.0, 29.0, 15.0, 11.0, 12.0, 7.0, 11.0, 6.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.3291015625, -1.2860107421875, -1.242919921875, -1.1998291015625, -1.15673828125, -1.1136474609375, -1.070556640625, -1.0274658203125, -0.984375, -0.9412841796875, -0.898193359375, -0.8551025390625, -0.81201171875, -0.7689208984375, -0.725830078125, -0.6827392578125, -0.6396484375, -0.5965576171875, -0.553466796875, -0.5103759765625, -0.46728515625, -0.4241943359375, -0.381103515625, -0.3380126953125, -0.294921875, -0.2518310546875, -0.208740234375, -0.1656494140625, -0.12255859375, -0.0794677734375, -0.036376953125, 0.0067138671875, 0.0498046875, 0.0928955078125, 0.135986328125, 0.1790771484375, 0.22216796875, 0.2652587890625, 0.308349609375, 0.3514404296875, 0.39453125, 0.4376220703125, 0.480712890625, 0.5238037109375, 0.56689453125, 0.6099853515625, 0.653076171875, 0.6961669921875, 0.7392578125, 0.7823486328125, 0.825439453125, 0.8685302734375, 0.91162109375, 0.9547119140625, 0.997802734375, 1.0408935546875, 1.083984375, 1.1270751953125, 1.170166015625, 1.2132568359375, 1.25634765625, 1.2994384765625, 1.342529296875, 1.3856201171875, 1.4287109375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 3.0, 4.0, 2.0, 3.0, 6.0, 4.0, 11.0, 11.0, 10.0, 16.0, 20.0, 29.0, 34.0, 42.0, 53.0, 60.0, 63.0, 67.0, 61.0, 68.0, 72.0, 70.0, 55.0, 49.0, 42.0, 28.0, 32.0, 17.0, 22.0, 10.0, 11.0, 9.0, 2.0, 6.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.4033203125, -1.3601226806640625, -1.316925048828125, -1.2737274169921875, -1.23052978515625, -1.1873321533203125, -1.144134521484375, -1.1009368896484375, -1.0577392578125, -1.0145416259765625, -0.971343994140625, -0.9281463623046875, -0.88494873046875, -0.8417510986328125, -0.798553466796875, -0.7553558349609375, -0.712158203125, -0.6689605712890625, -0.625762939453125, -0.5825653076171875, -0.53936767578125, -0.4961700439453125, -0.452972412109375, -0.4097747802734375, -0.3665771484375, -0.3233795166015625, -0.280181884765625, -0.2369842529296875, -0.19378662109375, -0.1505889892578125, -0.107391357421875, -0.0641937255859375, -0.02099609375, 0.0222015380859375, 0.065399169921875, 0.1085968017578125, 0.15179443359375, 0.1949920654296875, 0.238189697265625, 0.2813873291015625, 0.3245849609375, 0.3677825927734375, 0.410980224609375, 0.4541778564453125, 0.49737548828125, 0.5405731201171875, 0.583770751953125, 0.6269683837890625, 0.670166015625, 0.7133636474609375, 0.756561279296875, 0.7997589111328125, 0.84295654296875, 0.8861541748046875, 0.929351806640625, 0.9725494384765625, 1.0157470703125, 1.0589447021484375, 1.102142333984375, 1.1453399658203125, 1.18853759765625, 1.2317352294921875, 1.274932861328125, 1.3181304931640625, 1.361328125]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 3.0, 6.0, 8.0, 29.0, 45.0, 106.0, 170.0, 284.0, 159.0, 95.0, 47.0, 29.0, 6.0, 6.0, 2.0, 5.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.40463638305664, -37.13990020751953, -35.87516784667969, -34.61043167114258, -33.345699310302734, -32.080963134765625, -30.81623077392578, -29.551494598388672, -28.286762237548828, -27.02202796936035, -25.757293701171875, -24.4925594329834, -23.227825164794922, -21.963090896606445, -20.69835662841797, -19.43362045288086, -18.168886184692383, -16.904151916503906, -15.63941764831543, -14.374683380126953, -13.109949111938477, -11.84521484375, -10.580479621887207, -9.31574535369873, -8.051011085510254, -6.786276817321777, -5.521542549133301, -4.256807804107666, -2.9920735359191895, -1.727339267730713, -0.4626045227050781, 0.8021297454833984, 2.066864013671875, 3.3315982818603516, 4.596332550048828, 5.861067295074463, 7.1258015632629395, 8.390535354614258, 9.65527057647705, 10.920004844665527, 12.184739112854004, 13.44947338104248, 14.714207649230957, 15.97894287109375, 17.243677139282227, 18.508411407470703, 19.77314567565918, 21.037879943847656, 22.302614212036133, 23.56734848022461, 24.832082748413086, 26.096817016601562, 27.36155128479004, 28.626285552978516, 29.891021728515625, 31.15575408935547, 32.42049026489258, 33.68522644042969, 34.94995880126953, 36.21469497680664, 37.479427337646484, 38.744163513183594, 40.00889587402344, 41.27363204956055, 42.53836441040039]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 3.0, 7.0, 4.0, 2.0, 8.0, 4.0, 9.0, 10.0, 13.0, 9.0, 15.0, 17.0, 31.0, 28.0, 26.0, 25.0, 29.0, 30.0, 40.0, 66.0, 81.0, 78.0, 83.0, 55.0, 34.0, 43.0, 31.0, 30.0, 33.0, 31.0, 27.0, 15.0, 13.0, 13.0, 14.0, 8.0, 10.0, 5.0, 5.0, 7.0, 2.0, 3.0, 6.0, 4.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.327194213867188, -18.654258728027344, -17.9813232421875, -17.308387756347656, -16.635452270507812, -15.962516784667969, -15.289581298828125, -14.616645812988281, -13.943710327148438, -13.270774841308594, -12.59783935546875, -11.924903869628906, -11.251968383789062, -10.579032897949219, -9.906097412109375, -9.233161926269531, -8.560226440429688, -7.887290954589844, -7.21435546875, -6.541419982910156, -5.8684844970703125, -5.195549011230469, -4.522613525390625, -3.8496780395507812, -3.1767425537109375, -2.5038070678710938, -1.83087158203125, -1.1579360961914062, -0.4850006103515625, 0.18793487548828125, 0.860870361328125, 1.5338058471679688, 2.2067413330078125, 2.8796768188476562, 3.5526123046875, 4.225547790527344, 4.8984832763671875, 5.571418762207031, 6.244354248046875, 6.917289733886719, 7.5902252197265625, 8.263160705566406, 8.93609619140625, 9.609031677246094, 10.281967163085938, 10.954902648925781, 11.627838134765625, 12.300773620605469, 12.973709106445312, 13.646644592285156, 14.319580078125, 14.992515563964844, 15.665451049804688, 16.33838653564453, 17.011322021484375, 17.68425750732422, 18.357192993164062, 19.030128479003906, 19.70306396484375, 20.375999450683594, 21.048934936523438, 21.72187042236328, 22.394805908203125, 23.06774139404297, 23.740676879882812]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 3.0, 4.0, 9.0, 7.0, 12.0, 19.0, 21.0, 40.0, 51.0, 64.0, 128.0, 151.0, 258.0, 413.0, 701.0, 1032.0, 1769.0, 3302.0, 6676.0, 15404.0, 43761.0, 159302.0, 697876.0, 1877318.0, 1028014.0, 254283.0, 63449.0, 21230.0, 9029.0, 4371.0, 2323.0, 1215.0, 714.0, 436.0, 330.0, 183.0, 101.0, 95.0, 54.0, 41.0, 24.0, 26.0, 11.0, 9.0, 9.0, 5.0, 3.0, 4.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-1.392578125, -1.3511962890625, -1.309814453125, -1.2684326171875, -1.22705078125, -1.1856689453125, -1.144287109375, -1.1029052734375, -1.0615234375, -1.0201416015625, -0.978759765625, -0.9373779296875, -0.89599609375, -0.8546142578125, -0.813232421875, -0.7718505859375, -0.73046875, -0.6890869140625, -0.647705078125, -0.6063232421875, -0.56494140625, -0.5235595703125, -0.482177734375, -0.4407958984375, -0.3994140625, -0.3580322265625, -0.316650390625, -0.2752685546875, -0.23388671875, -0.1925048828125, -0.151123046875, -0.1097412109375, -0.068359375, -0.0269775390625, 0.014404296875, 0.0557861328125, 0.09716796875, 0.1385498046875, 0.179931640625, 0.2213134765625, 0.2626953125, 0.3040771484375, 0.345458984375, 0.3868408203125, 0.42822265625, 0.4696044921875, 0.510986328125, 0.5523681640625, 0.59375, 0.6351318359375, 0.676513671875, 0.7178955078125, 0.75927734375, 0.8006591796875, 0.842041015625, 0.8834228515625, 0.9248046875, 0.9661865234375, 1.007568359375, 1.0489501953125, 1.09033203125, 1.1317138671875, 1.173095703125, 1.2144775390625, 1.255859375]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 8.0, 7.0, 12.0, 13.0, 16.0, 25.0, 33.0, 40.0, 30.0, 54.0, 47.0, 46.0, 48.0, 80.0, 55.0, 64.0, 84.0, 69.0, 47.0, 49.0, 37.0, 39.0, 20.0, 13.0, 16.0, 16.0, 11.0, 9.0, 7.0, 3.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-2.44140625, -2.3825836181640625, -2.323760986328125, -2.2649383544921875, -2.20611572265625, -2.1472930908203125, -2.088470458984375, -2.0296478271484375, -1.9708251953125, -1.9120025634765625, -1.853179931640625, -1.7943572998046875, -1.73553466796875, -1.6767120361328125, -1.617889404296875, -1.5590667724609375, -1.500244140625, -1.4414215087890625, -1.382598876953125, -1.3237762451171875, -1.26495361328125, -1.2061309814453125, -1.147308349609375, -1.0884857177734375, -1.0296630859375, -0.9708404541015625, -0.912017822265625, -0.8531951904296875, -0.79437255859375, -0.7355499267578125, -0.676727294921875, -0.6179046630859375, -0.55908203125, -0.5002593994140625, -0.441436767578125, -0.3826141357421875, -0.32379150390625, -0.2649688720703125, -0.206146240234375, -0.1473236083984375, -0.0885009765625, -0.0296783447265625, 0.029144287109375, 0.0879669189453125, 0.14678955078125, 0.2056121826171875, 0.264434814453125, 0.3232574462890625, 0.382080078125, 0.4409027099609375, 0.499725341796875, 0.5585479736328125, 0.61737060546875, 0.6761932373046875, 0.735015869140625, 0.7938385009765625, 0.8526611328125, 0.9114837646484375, 0.970306396484375, 1.0291290283203125, 1.08795166015625, 1.1467742919921875, 1.205596923828125, 1.2644195556640625, 1.3232421875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 10.0, 11.0, 27.0, 51.0, 68.0, 133.0, 271.0, 651.0, 2262.0, 18681.0, 2723805.0, 1431054.0, 14079.0, 1996.0, 600.0, 296.0, 139.0, 69.0, 35.0, 18.0, 12.0, 7.0, 1.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.140625, -6.9022216796875, -6.663818359375, -6.4254150390625, -6.18701171875, -5.9486083984375, -5.710205078125, -5.4718017578125, -5.2333984375, -4.9949951171875, -4.756591796875, -4.5181884765625, -4.27978515625, -4.0413818359375, -3.802978515625, -3.5645751953125, -3.326171875, -3.0877685546875, -2.849365234375, -2.6109619140625, -2.37255859375, -2.1341552734375, -1.895751953125, -1.6573486328125, -1.4189453125, -1.1805419921875, -0.942138671875, -0.7037353515625, -0.46533203125, -0.2269287109375, 0.011474609375, 0.2498779296875, 0.48828125, 0.7266845703125, 0.965087890625, 1.2034912109375, 1.44189453125, 1.6802978515625, 1.918701171875, 2.1571044921875, 2.3955078125, 2.6339111328125, 2.872314453125, 3.1107177734375, 3.34912109375, 3.5875244140625, 3.825927734375, 4.0643310546875, 4.302734375, 4.5411376953125, 4.779541015625, 5.0179443359375, 5.25634765625, 5.4947509765625, 5.733154296875, 5.9715576171875, 6.2099609375, 6.4483642578125, 6.686767578125, 6.9251708984375, 7.16357421875, 7.4019775390625, 7.640380859375, 7.8787841796875, 8.1171875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 6.0, 8.0, 8.0, 13.0, 31.0, 65.0, 108.0, 213.0, 572.0, 1155.0, 1070.0, 485.0, 181.0, 86.0, 36.0, 23.0, 8.0, 10.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.94140625, -4.74267578125, -4.5439453125, -4.34521484375, -4.146484375, -3.94775390625, -3.7490234375, -3.55029296875, -3.3515625, -3.15283203125, -2.9541015625, -2.75537109375, -2.556640625, -2.35791015625, -2.1591796875, -1.96044921875, -1.76171875, -1.56298828125, -1.3642578125, -1.16552734375, -0.966796875, -0.76806640625, -0.5693359375, -0.37060546875, -0.171875, 0.02685546875, 0.2255859375, 0.42431640625, 0.623046875, 0.82177734375, 1.0205078125, 1.21923828125, 1.41796875, 1.61669921875, 1.8154296875, 2.01416015625, 2.212890625, 2.41162109375, 2.6103515625, 2.80908203125, 3.0078125, 3.20654296875, 3.4052734375, 3.60400390625, 3.802734375, 4.00146484375, 4.2001953125, 4.39892578125, 4.59765625, 4.79638671875, 4.9951171875, 5.19384765625, 5.392578125, 5.59130859375, 5.7900390625, 5.98876953125, 6.1875, 6.38623046875, 6.5849609375, 6.78369140625, 6.982421875, 7.18115234375, 7.3798828125, 7.57861328125, 7.77734375]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 7.0, 14.0, 16.0, 67.0, 144.0, 229.0, 225.0, 167.0, 78.0, 27.0, 15.0, 5.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.09193420410156, -65.66987609863281, -64.24781799316406, -62.82575607299805, -61.40369415283203, -59.98163604736328, -58.559574127197266, -57.137516021728516, -55.7154541015625, -54.29339599609375, -52.871334075927734, -51.449275970458984, -50.02721405029297, -48.60515594482422, -47.1830940246582, -45.76103591918945, -44.33897399902344, -42.91691589355469, -41.49485397338867, -40.07279586791992, -38.650733947753906, -37.228675842285156, -35.80661392211914, -34.38455581665039, -32.96249771118164, -31.540437698364258, -30.118377685546875, -28.696317672729492, -27.27425765991211, -25.852197647094727, -24.430137634277344, -23.008079528808594, -21.586015701293945, -20.163955688476562, -18.74189567565918, -17.319835662841797, -15.897775650024414, -14.475715637207031, -13.053656578063965, -11.631596565246582, -10.2095365524292, -8.787476539611816, -7.365416526794434, -5.943356990814209, -4.521296977996826, -3.0992369651794434, -1.6771774291992188, -0.25511741638183594, 1.1669425964355469, 2.5890026092529297, 4.0110626220703125, 5.433122158050537, 6.85518217086792, 8.277242660522461, 9.699301719665527, 11.12136173248291, 12.543421745300293, 13.965481758117676, 15.387541770935059, 16.809600830078125, 18.231660842895508, 19.65372085571289, 21.075780868530273, 22.497840881347656, 23.91990089416504]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 7.0, 5.0, 13.0, 12.0, 20.0, 19.0, 20.0, 21.0, 35.0, 35.0, 41.0, 40.0, 55.0, 61.0, 64.0, 69.0, 69.0, 59.0, 73.0, 48.0, 39.0, 32.0, 20.0, 25.0, 22.0, 25.0, 20.0, 18.0, 4.0, 6.0, 6.0, 3.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.403064727783203, -17.816274642944336, -17.22948455810547, -16.642696380615234, -16.055906295776367, -15.4691162109375, -14.88232707977295, -14.295537948608398, -13.708747863769531, -13.121957778930664, -12.535168647766113, -11.948379516601562, -11.361589431762695, -10.774799346923828, -10.188010215759277, -9.601221084594727, -9.01443099975586, -8.427640914916992, -7.840851783752441, -7.254062175750732, -6.667272567749023, -6.0804829597473145, -5.4936933517456055, -4.9069037437438965, -4.3201141357421875, -3.7333245277404785, -3.1465349197387695, -2.5597453117370605, -1.9729557037353516, -1.3861660957336426, -0.7993764877319336, -0.2125868797302246, 0.3742046356201172, 0.9609942436218262, 1.5477838516235352, 2.134573459625244, 2.721363067626953, 3.308152675628662, 3.894942283630371, 4.48173189163208, 5.068521499633789, 5.655311107635498, 6.242100715637207, 6.828890323638916, 7.415679931640625, 8.002470016479492, 8.589259147644043, 9.176048278808594, 9.762838363647461, 10.349628448486328, 10.936417579650879, 11.52320671081543, 12.109996795654297, 12.696786880493164, 13.283576011657715, 13.870365142822266, 14.457155227661133, 15.0439453125, 15.63073444366455, 16.2175235748291, 16.80431365966797, 17.391103744506836, 17.977893829345703, 18.564682006835938, 19.151472091674805]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 6.0, 3.0, 7.0, 12.0, 12.0, 15.0, 25.0, 35.0, 56.0, 78.0, 110.0, 178.0, 286.0, 457.0, 727.0, 1215.0, 2073.0, 3599.0, 6345.0, 11362.0, 21645.0, 40898.0, 78200.0, 146408.0, 229935.0, 219620.0, 133746.0, 70580.0, 36995.0, 19621.0, 10505.0, 5695.0, 3255.0, 1900.0, 1117.0, 643.0, 397.0, 284.0, 180.0, 115.0, 85.0, 53.0, 31.0, 29.0, 8.0, 8.0, 7.0, 4.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.458984375, -1.408538818359375, -1.35809326171875, -1.307647705078125, -1.2572021484375, -1.206756591796875, -1.15631103515625, -1.105865478515625, -1.055419921875, -1.004974365234375, -0.95452880859375, -0.904083251953125, -0.8536376953125, -0.803192138671875, -0.75274658203125, -0.702301025390625, -0.65185546875, -0.601409912109375, -0.55096435546875, -0.500518798828125, -0.4500732421875, -0.399627685546875, -0.34918212890625, -0.298736572265625, -0.248291015625, -0.197845458984375, -0.14739990234375, -0.096954345703125, -0.0465087890625, 0.003936767578125, 0.05438232421875, 0.104827880859375, 0.1552734375, 0.205718994140625, 0.25616455078125, 0.306610107421875, 0.3570556640625, 0.407501220703125, 0.45794677734375, 0.508392333984375, 0.558837890625, 0.609283447265625, 0.65972900390625, 0.710174560546875, 0.7606201171875, 0.811065673828125, 0.86151123046875, 0.911956787109375, 0.96240234375, 1.012847900390625, 1.06329345703125, 1.113739013671875, 1.1641845703125, 1.214630126953125, 1.26507568359375, 1.315521240234375, 1.365966796875, 1.416412353515625, 1.46685791015625, 1.517303466796875, 1.5677490234375, 1.618194580078125, 1.66864013671875, 1.719085693359375, 1.76953125]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 8.0, 7.0, 12.0, 6.0, 20.0, 20.0, 24.0, 23.0, 31.0, 28.0, 39.0, 49.0, 41.0, 51.0, 63.0, 61.0, 61.0, 66.0, 57.0, 61.0, 47.0, 52.0, 37.0, 20.0, 28.0, 17.0, 23.0, 13.0, 10.0, 8.0, 11.0, 4.0, 6.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0], "bins": [-2.447265625, -2.386383056640625, -2.32550048828125, -2.264617919921875, -2.2037353515625, -2.142852783203125, -2.08197021484375, -2.021087646484375, -1.960205078125, -1.899322509765625, -1.83843994140625, -1.777557373046875, -1.7166748046875, -1.655792236328125, -1.59490966796875, -1.534027099609375, -1.47314453125, -1.412261962890625, -1.35137939453125, -1.290496826171875, -1.2296142578125, -1.168731689453125, -1.10784912109375, -1.046966552734375, -0.986083984375, -0.925201416015625, -0.86431884765625, -0.803436279296875, -0.7425537109375, -0.681671142578125, -0.62078857421875, -0.559906005859375, -0.4990234375, -0.438140869140625, -0.37725830078125, -0.316375732421875, -0.2554931640625, -0.194610595703125, -0.13372802734375, -0.072845458984375, -0.011962890625, 0.048919677734375, 0.10980224609375, 0.170684814453125, 0.2315673828125, 0.292449951171875, 0.35333251953125, 0.414215087890625, 0.47509765625, 0.535980224609375, 0.59686279296875, 0.657745361328125, 0.7186279296875, 0.779510498046875, 0.84039306640625, 0.901275634765625, 0.962158203125, 1.023040771484375, 1.08392333984375, 1.144805908203125, 1.2056884765625, 1.266571044921875, 1.32745361328125, 1.388336181640625, 1.44921875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 6.0, 6.0, 13.0, 11.0, 26.0, 33.0, 34.0, 38.0, 65.0, 68.0, 99.0, 166.0, 225.0, 362.0, 546.0, 1030.0, 2261.0, 5948.0, 21584.0, 100062.0, 625877.0, 227318.0, 44833.0, 10905.0, 3528.0, 1350.0, 757.0, 447.0, 264.0, 163.0, 165.0, 95.0, 65.0, 50.0, 38.0, 34.0, 18.0, 20.0, 15.0, 6.0, 9.0, 3.0, 3.0, 5.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-4.3828125, -4.256683349609375, -4.13055419921875, -4.004425048828125, -3.8782958984375, -3.752166748046875, -3.62603759765625, -3.499908447265625, -3.373779296875, -3.247650146484375, -3.12152099609375, -2.995391845703125, -2.8692626953125, -2.743133544921875, -2.61700439453125, -2.490875244140625, -2.36474609375, -2.238616943359375, -2.11248779296875, -1.986358642578125, -1.8602294921875, -1.734100341796875, -1.60797119140625, -1.481842041015625, -1.355712890625, -1.229583740234375, -1.10345458984375, -0.977325439453125, -0.8511962890625, -0.725067138671875, -0.59893798828125, -0.472808837890625, -0.3466796875, -0.220550537109375, -0.09442138671875, 0.031707763671875, 0.1578369140625, 0.283966064453125, 0.41009521484375, 0.536224365234375, 0.662353515625, 0.788482666015625, 0.91461181640625, 1.040740966796875, 1.1668701171875, 1.292999267578125, 1.41912841796875, 1.545257568359375, 1.67138671875, 1.797515869140625, 1.92364501953125, 2.049774169921875, 2.1759033203125, 2.302032470703125, 2.42816162109375, 2.554290771484375, 2.680419921875, 2.806549072265625, 2.93267822265625, 3.058807373046875, 3.1849365234375, 3.311065673828125, 3.43719482421875, 3.563323974609375, 3.689453125]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 4.0, 0.0, 4.0, 7.0, 7.0, 14.0, 8.0, 12.0, 14.0, 22.0, 20.0, 27.0, 33.0, 35.0, 36.0, 40.0, 50.0, 46.0, 59.0, 68.0, 62.0, 57.0, 52.0, 58.0, 54.0, 36.0, 19.0, 26.0, 27.0, 19.0, 19.0, 16.0, 12.0, 15.0, 8.0, 4.0, 7.0, 3.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.29296875, -6.088623046875, -5.88427734375, -5.679931640625, -5.4755859375, -5.271240234375, -5.06689453125, -4.862548828125, -4.658203125, -4.453857421875, -4.24951171875, -4.045166015625, -3.8408203125, -3.636474609375, -3.43212890625, -3.227783203125, -3.0234375, -2.819091796875, -2.61474609375, -2.410400390625, -2.2060546875, -2.001708984375, -1.79736328125, -1.593017578125, -1.388671875, -1.184326171875, -0.97998046875, -0.775634765625, -0.5712890625, -0.366943359375, -0.16259765625, 0.041748046875, 0.24609375, 0.450439453125, 0.65478515625, 0.859130859375, 1.0634765625, 1.267822265625, 1.47216796875, 1.676513671875, 1.880859375, 2.085205078125, 2.28955078125, 2.493896484375, 2.6982421875, 2.902587890625, 3.10693359375, 3.311279296875, 3.515625, 3.719970703125, 3.92431640625, 4.128662109375, 4.3330078125, 4.537353515625, 4.74169921875, 4.946044921875, 5.150390625, 5.354736328125, 5.55908203125, 5.763427734375, 5.9677734375, 6.172119140625, 6.37646484375, 6.580810546875, 6.78515625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [4.0, 0.0, 2.0, 3.0, 3.0, 2.0, 4.0, 6.0, 0.0, 4.0, 7.0, 5.0, 4.0, 13.0, 15.0, 22.0, 28.0, 22.0, 37.0, 50.0, 72.0, 116.0, 158.0, 207.0, 316.0, 488.0, 822.0, 1588.0, 3269.0, 7587.0, 20416.0, 62625.0, 240451.0, 565109.0, 96368.0, 29128.0, 10599.0, 4309.0, 2012.0, 1004.0, 552.0, 343.0, 218.0, 160.0, 113.0, 85.0, 47.0, 40.0, 36.0, 36.0, 18.0, 13.0, 5.0, 9.0, 7.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.1220703125, -1.0882415771484375, -1.054412841796875, -1.0205841064453125, -0.98675537109375, -0.9529266357421875, -0.919097900390625, -0.8852691650390625, -0.8514404296875, -0.8176116943359375, -0.783782958984375, -0.7499542236328125, -0.71612548828125, -0.6822967529296875, -0.648468017578125, -0.6146392822265625, -0.580810546875, -0.5469818115234375, -0.513153076171875, -0.4793243408203125, -0.44549560546875, -0.4116668701171875, -0.377838134765625, -0.3440093994140625, -0.3101806640625, -0.2763519287109375, -0.242523193359375, -0.2086944580078125, -0.17486572265625, -0.1410369873046875, -0.107208251953125, -0.0733795166015625, -0.03955078125, -0.0057220458984375, 0.028106689453125, 0.0619354248046875, 0.09576416015625, 0.1295928955078125, 0.163421630859375, 0.1972503662109375, 0.2310791015625, 0.2649078369140625, 0.298736572265625, 0.3325653076171875, 0.36639404296875, 0.4002227783203125, 0.434051513671875, 0.4678802490234375, 0.501708984375, 0.5355377197265625, 0.569366455078125, 0.6031951904296875, 0.63702392578125, 0.6708526611328125, 0.704681396484375, 0.7385101318359375, 0.7723388671875, 0.8061676025390625, 0.839996337890625, 0.8738250732421875, 0.90765380859375, 0.9414825439453125, 0.975311279296875, 1.0091400146484375, 1.04296875]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 7.0, 9.0, 2.0, 16.0, 22.0, 20.0, 17.0, 21.0, 50.0, 38.0, 61.0, 101.0, 108.0, 111.0, 97.0, 86.0, 58.0, 37.0, 30.0, 31.0, 18.0, 16.0, 13.0, 17.0, 2.0, 4.0, 3.0, 2.0, 3.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00039768218994140625, -0.00038129836320877075, -0.00036491453647613525, -0.00034853070974349976, -0.00033214688301086426, -0.00031576305627822876, -0.00029937922954559326, -0.00028299540281295776, -0.00026661157608032227, -0.00025022774934768677, -0.00023384392261505127, -0.00021746009588241577, -0.00020107626914978027, -0.00018469244241714478, -0.00016830861568450928, -0.00015192478895187378, -0.00013554096221923828, -0.00011915713548660278, -0.00010277330875396729, -8.638948202133179e-05, -7.000565528869629e-05, -5.362182855606079e-05, -3.723800182342529e-05, -2.0854175090789795e-05, -4.470348358154297e-06, 1.1913478374481201e-05, 2.82973051071167e-05, 4.46811318397522e-05, 6.10649585723877e-05, 7.74487853050232e-05, 9.383261203765869e-05, 0.00011021643877029419, 0.0001266002655029297, 0.00014298409223556519, 0.00015936791896820068, 0.00017575174570083618, 0.00019213557243347168, 0.00020851939916610718, 0.00022490322589874268, 0.00024128705263137817, 0.00025767087936401367, 0.00027405470609664917, 0.00029043853282928467, 0.00030682235956192017, 0.00032320618629455566, 0.00033959001302719116, 0.00035597383975982666, 0.00037235766649246216, 0.00038874149322509766, 0.00040512531995773315, 0.00042150914669036865, 0.00043789297342300415, 0.00045427680015563965, 0.00047066062688827515, 0.00048704445362091064, 0.0005034282803535461, 0.0005198121070861816, 0.0005361959338188171, 0.0005525797605514526, 0.0005689635872840881, 0.0005853474140167236, 0.0006017312407493591, 0.0006181150674819946, 0.0006344988942146301, 0.0006508827209472656]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 4.0, 6.0, 8.0, 10.0, 9.0, 9.0, 22.0, 31.0, 59.0, 81.0, 115.0, 164.0, 292.0, 519.0, 935.0, 2152.0, 5420.0, 17124.0, 71088.0, 596955.0, 281103.0, 51083.0, 13095.0, 4417.0, 1810.0, 877.0, 418.0, 262.0, 166.0, 97.0, 80.0, 48.0, 25.0, 18.0, 13.0, 6.0, 6.0, 3.0, 5.0, 4.0, 1.0, 2.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.333984375, -1.2921600341796875, -1.250335693359375, -1.2085113525390625, -1.16668701171875, -1.1248626708984375, -1.083038330078125, -1.0412139892578125, -0.9993896484375, -0.9575653076171875, -0.915740966796875, -0.8739166259765625, -0.83209228515625, -0.7902679443359375, -0.748443603515625, -0.7066192626953125, -0.664794921875, -0.6229705810546875, -0.581146240234375, -0.5393218994140625, -0.49749755859375, -0.4556732177734375, -0.413848876953125, -0.3720245361328125, -0.3302001953125, -0.2883758544921875, -0.246551513671875, -0.2047271728515625, -0.16290283203125, -0.1210784912109375, -0.079254150390625, -0.0374298095703125, 0.00439453125, 0.0462188720703125, 0.088043212890625, 0.1298675537109375, 0.17169189453125, 0.2135162353515625, 0.255340576171875, 0.2971649169921875, 0.3389892578125, 0.3808135986328125, 0.422637939453125, 0.4644622802734375, 0.50628662109375, 0.5481109619140625, 0.589935302734375, 0.6317596435546875, 0.673583984375, 0.7154083251953125, 0.757232666015625, 0.7990570068359375, 0.84088134765625, 0.8827056884765625, 0.924530029296875, 0.9663543701171875, 1.0081787109375, 1.0500030517578125, 1.091827392578125, 1.1336517333984375, 1.17547607421875, 1.2173004150390625, 1.259124755859375, 1.3009490966796875, 1.3427734375]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 5.0, 3.0, 5.0, 8.0, 12.0, 31.0, 22.0, 48.0, 51.0, 94.0, 115.0, 109.0, 110.0, 117.0, 74.0, 53.0, 43.0, 37.0, 17.0, 12.0, 12.0, 5.0, 5.0, 2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0], "bins": [-2.345703125, -2.288482666015625, -2.23126220703125, -2.174041748046875, -2.1168212890625, -2.059600830078125, -2.00238037109375, -1.945159912109375, -1.887939453125, -1.830718994140625, -1.77349853515625, -1.716278076171875, -1.6590576171875, -1.601837158203125, -1.54461669921875, -1.487396240234375, -1.43017578125, -1.372955322265625, -1.31573486328125, -1.258514404296875, -1.2012939453125, -1.144073486328125, -1.08685302734375, -1.029632568359375, -0.972412109375, -0.915191650390625, -0.85797119140625, -0.800750732421875, -0.7435302734375, -0.686309814453125, -0.62908935546875, -0.571868896484375, -0.5146484375, -0.457427978515625, -0.40020751953125, -0.342987060546875, -0.2857666015625, -0.228546142578125, -0.17132568359375, -0.114105224609375, -0.056884765625, 0.000335693359375, 0.05755615234375, 0.114776611328125, 0.1719970703125, 0.229217529296875, 0.28643798828125, 0.343658447265625, 0.40087890625, 0.458099365234375, 0.51531982421875, 0.572540283203125, 0.6297607421875, 0.686981201171875, 0.74420166015625, 0.801422119140625, 0.858642578125, 0.915863037109375, 0.97308349609375, 1.030303955078125, 1.0875244140625, 1.144744873046875, 1.20196533203125, 1.259185791015625, 1.31640625]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 7.0, 12.0, 42.0, 97.0, 165.0, 360.0, 167.0, 75.0, 49.0, 13.0, 9.0, 2.0, 4.0, 1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.885417938232422, -20.35997772216797, -18.83453941345215, -17.309099197387695, -15.783659934997559, -14.258220672607422, -12.732780456542969, -11.207341194152832, -9.681901931762695, -8.156462669372559, -6.631022930145264, -5.105583190917969, -3.580143928527832, -2.0547046661376953, -0.5292644500732422, 0.9961748123168945, 2.5216140747070312, 4.047053337097168, 5.572493076324463, 7.097932815551758, 8.623372077941895, 10.148811340332031, 11.674251556396484, 13.199690818786621, 14.725130081176758, 16.25057029724121, 17.77600860595703, 19.301448822021484, 20.826889038085938, 22.352327346801758, 23.87776756286621, 25.40320587158203, 26.92864990234375, 28.454090118408203, 29.979528427124023, 31.504968643188477, 33.0304069519043, 34.55584716796875, 36.0812873840332, 37.606727600097656, 39.132164001464844, 40.6576042175293, 42.18304443359375, 43.70848083496094, 45.23392105102539, 46.759361267089844, 48.2848014831543, 49.81024169921875, 51.3356819152832, 52.861122131347656, 54.38656234741211, 55.91200256347656, 57.43743896484375, 58.9628791809082, 60.488319396972656, 62.01375961303711, 63.53919982910156, 65.06463623046875, 66.59008026123047, 68.11551666259766, 69.64096069335938, 71.16639709472656, 72.69183349609375, 74.21727752685547, 75.74271392822266]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 3.0, 3.0, 6.0, 7.0, 11.0, 13.0, 14.0, 18.0, 13.0, 25.0, 28.0, 26.0, 21.0, 31.0, 28.0, 36.0, 52.0, 72.0, 105.0, 90.0, 61.0, 44.0, 40.0, 27.0, 31.0, 27.0, 28.0, 23.0, 23.0, 17.0, 13.0, 11.0, 13.0, 8.0, 4.0, 6.0, 8.0, 4.0, 5.0, 6.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.87209129333496, -22.155550003051758, -21.439008712768555, -20.72246742248535, -20.00592613220215, -19.289384841918945, -18.57284164428711, -17.856300354003906, -17.139759063720703, -16.4232177734375, -15.706676483154297, -14.990135192871094, -14.27359390258789, -13.557052612304688, -12.840510368347168, -12.123969078063965, -11.407428741455078, -10.690887451171875, -9.974346160888672, -9.257804870605469, -8.541263580322266, -7.824721813201904, -7.108180046081543, -6.39163875579834, -5.675097465515137, -4.958556175231934, -4.2420148849487305, -3.525473117828369, -2.808931827545166, -2.092390537261963, -1.3758487701416016, -0.6593074798583984, 0.05723381042480469, 0.7737752199172974, 1.49031662940979, 2.2068581581115723, 2.9233994483947754, 3.6399407386779785, 4.35648250579834, 5.073023796081543, 5.789565086364746, 6.506106376647949, 7.222647666931152, 7.939189434051514, 8.655731201171875, 9.372272491455078, 10.088813781738281, 10.805355072021484, 11.521896362304688, 12.23843765258789, 12.954978942871094, 13.671520233154297, 14.3880615234375, 15.104602813720703, 15.821145057678223, 16.53768539428711, 17.254226684570312, 17.970767974853516, 18.68730926513672, 19.403850555419922, 20.120391845703125, 20.836933135986328, 21.55347442626953, 22.270015716552734, 22.98655891418457]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 5.0, 4.0, 10.0, 11.0, 11.0, 13.0, 23.0, 16.0, 34.0, 35.0, 51.0, 95.0, 78.0, 123.0, 208.0, 313.0, 581.0, 997.0, 2201.0, 5864.0, 21108.0, 131343.0, 1274474.0, 2364980.0, 335729.0, 40042.0, 9474.0, 3251.0, 1360.0, 703.0, 369.0, 210.0, 158.0, 108.0, 69.0, 62.0, 41.0, 36.0, 22.0, 22.0, 15.0, 9.0, 9.0, 7.0, 7.0, 5.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.25390625, -2.176727294921875, -2.09954833984375, -2.022369384765625, -1.9451904296875, -1.868011474609375, -1.79083251953125, -1.713653564453125, -1.636474609375, -1.559295654296875, -1.48211669921875, -1.404937744140625, -1.3277587890625, -1.250579833984375, -1.17340087890625, -1.096221923828125, -1.01904296875, -0.941864013671875, -0.86468505859375, -0.787506103515625, -0.7103271484375, -0.633148193359375, -0.55596923828125, -0.478790283203125, -0.401611328125, -0.324432373046875, -0.24725341796875, -0.170074462890625, -0.0928955078125, -0.015716552734375, 0.06146240234375, 0.138641357421875, 0.2158203125, 0.292999267578125, 0.37017822265625, 0.447357177734375, 0.5245361328125, 0.601715087890625, 0.67889404296875, 0.756072998046875, 0.833251953125, 0.910430908203125, 0.98760986328125, 1.064788818359375, 1.1419677734375, 1.219146728515625, 1.29632568359375, 1.373504638671875, 1.45068359375, 1.527862548828125, 1.60504150390625, 1.682220458984375, 1.7593994140625, 1.836578369140625, 1.91375732421875, 1.990936279296875, 2.068115234375, 2.145294189453125, 2.22247314453125, 2.299652099609375, 2.3768310546875, 2.454010009765625, 2.53118896484375, 2.608367919921875, 2.685546875]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 6.0, 11.0, 12.0, 12.0, 19.0, 19.0, 23.0, 29.0, 25.0, 45.0, 48.0, 41.0, 57.0, 55.0, 70.0, 56.0, 60.0, 63.0, 57.0, 50.0, 48.0, 47.0, 32.0, 21.0, 20.0, 18.0, 20.0, 14.0, 7.0, 7.0, 3.0, 5.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.1875, -2.12762451171875, -2.0677490234375, -2.00787353515625, -1.947998046875, -1.88812255859375, -1.8282470703125, -1.76837158203125, -1.70849609375, -1.64862060546875, -1.5887451171875, -1.52886962890625, -1.468994140625, -1.40911865234375, -1.3492431640625, -1.28936767578125, -1.2294921875, -1.16961669921875, -1.1097412109375, -1.04986572265625, -0.989990234375, -0.93011474609375, -0.8702392578125, -0.81036376953125, -0.75048828125, -0.69061279296875, -0.6307373046875, -0.57086181640625, -0.510986328125, -0.45111083984375, -0.3912353515625, -0.33135986328125, -0.271484375, -0.21160888671875, -0.1517333984375, -0.09185791015625, -0.031982421875, 0.02789306640625, 0.0877685546875, 0.14764404296875, 0.20751953125, 0.26739501953125, 0.3272705078125, 0.38714599609375, 0.447021484375, 0.50689697265625, 0.5667724609375, 0.62664794921875, 0.6865234375, 0.74639892578125, 0.8062744140625, 0.86614990234375, 0.926025390625, 0.98590087890625, 1.0457763671875, 1.10565185546875, 1.16552734375, 1.22540283203125, 1.2852783203125, 1.34515380859375, 1.405029296875, 1.46490478515625, 1.5247802734375, 1.58465576171875, 1.64453125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 5.0, 4.0, 5.0, 9.0, 11.0, 10.0, 18.0, 34.0, 31.0, 60.0, 93.0, 139.0, 292.0, 535.0, 1118.0, 4328.0, 28110.0, 819382.0, 3262128.0, 67152.0, 7582.0, 1710.0, 658.0, 338.0, 177.0, 122.0, 72.0, 50.0, 36.0, 24.0, 17.0, 12.0, 10.0, 5.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.91015625, -5.74212646484375, -5.5740966796875, -5.40606689453125, -5.238037109375, -5.07000732421875, -4.9019775390625, -4.73394775390625, -4.56591796875, -4.39788818359375, -4.2298583984375, -4.06182861328125, -3.893798828125, -3.72576904296875, -3.5577392578125, -3.38970947265625, -3.2216796875, -3.05364990234375, -2.8856201171875, -2.71759033203125, -2.549560546875, -2.38153076171875, -2.2135009765625, -2.04547119140625, -1.87744140625, -1.70941162109375, -1.5413818359375, -1.37335205078125, -1.205322265625, -1.03729248046875, -0.8692626953125, -0.70123291015625, -0.533203125, -0.36517333984375, -0.1971435546875, -0.02911376953125, 0.138916015625, 0.30694580078125, 0.4749755859375, 0.64300537109375, 0.81103515625, 0.97906494140625, 1.1470947265625, 1.31512451171875, 1.483154296875, 1.65118408203125, 1.8192138671875, 1.98724365234375, 2.1552734375, 2.32330322265625, 2.4913330078125, 2.65936279296875, 2.827392578125, 2.99542236328125, 3.1634521484375, 3.33148193359375, 3.49951171875, 3.66754150390625, 3.8355712890625, 4.00360107421875, 4.171630859375, 4.33966064453125, 4.5076904296875, 4.67572021484375, 4.84375]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 5.0, 1.0, 4.0, 3.0, 5.0, 5.0, 7.0, 16.0, 15.0, 17.0, 29.0, 35.0, 42.0, 74.0, 95.0, 171.0, 254.0, 420.0, 566.0, 624.0, 559.0, 381.0, 229.0, 160.0, 108.0, 71.0, 47.0, 40.0, 23.0, 21.0, 12.0, 13.0, 8.0, 9.0, 3.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.146484375, -3.036407470703125, -2.92633056640625, -2.816253662109375, -2.7061767578125, -2.596099853515625, -2.48602294921875, -2.375946044921875, -2.265869140625, -2.155792236328125, -2.04571533203125, -1.935638427734375, -1.8255615234375, -1.715484619140625, -1.60540771484375, -1.495330810546875, -1.38525390625, -1.275177001953125, -1.16510009765625, -1.055023193359375, -0.9449462890625, -0.834869384765625, -0.72479248046875, -0.614715576171875, -0.504638671875, -0.394561767578125, -0.28448486328125, -0.174407958984375, -0.0643310546875, 0.045745849609375, 0.15582275390625, 0.265899658203125, 0.3759765625, 0.486053466796875, 0.59613037109375, 0.706207275390625, 0.8162841796875, 0.926361083984375, 1.03643798828125, 1.146514892578125, 1.256591796875, 1.366668701171875, 1.47674560546875, 1.586822509765625, 1.6968994140625, 1.806976318359375, 1.91705322265625, 2.027130126953125, 2.13720703125, 2.247283935546875, 2.35736083984375, 2.467437744140625, 2.5775146484375, 2.687591552734375, 2.79766845703125, 2.907745361328125, 3.017822265625, 3.127899169921875, 3.23797607421875, 3.348052978515625, 3.4581298828125, 3.568206787109375, 3.67828369140625, 3.788360595703125, 3.8984375]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 6.0, 4.0, 13.0, 19.0, 41.0, 72.0, 151.0, 235.0, 206.0, 146.0, 67.0, 25.0, 7.0, 5.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-72.03519439697266, -70.50180053710938, -68.9684066772461, -67.43501281738281, -65.90161895751953, -64.36822509765625, -62.8348274230957, -61.30143356323242, -59.768035888671875, -58.234642028808594, -56.70124816894531, -55.16785430908203, -53.634456634521484, -52.1010627746582, -50.56766891479492, -49.03427505493164, -47.50088119506836, -45.96748733520508, -44.4340934753418, -42.90069580078125, -41.36730194091797, -39.83390808105469, -38.300514221191406, -36.767120361328125, -35.233726501464844, -33.70033264160156, -32.16693878173828, -30.633543014526367, -29.100147247314453, -27.566753387451172, -26.03335952758789, -24.49996566772461, -22.966564178466797, -21.433170318603516, -19.8997745513916, -18.36638069152832, -16.832984924316406, -15.299591064453125, -13.766197204589844, -12.232802391052246, -10.699407577514648, -9.16601276397705, -7.632618427276611, -6.099224090576172, -4.565829277038574, -3.0324344635009766, -1.4990406036376953, 0.034354209899902344, 1.5677490234375, 3.1011435985565186, 4.634538173675537, 6.167932510375977, 7.701327323913574, 9.234722137451172, 10.768115997314453, 12.30151081085205, 13.834905624389648, 15.368300437927246, 16.901695251464844, 18.435089111328125, 19.968482971191406, 21.50187873840332, 23.0352725982666, 24.568668365478516, 26.102062225341797]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 3.0, 4.0, 4.0, 6.0, 7.0, 13.0, 18.0, 18.0, 32.0, 26.0, 30.0, 54.0, 45.0, 56.0, 61.0, 58.0, 51.0, 71.0, 69.0, 72.0, 50.0, 47.0, 35.0, 35.0, 29.0, 35.0, 24.0, 16.0, 7.0, 12.0, 7.0, 5.0, 4.0, 2.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.86525535583496, -18.174976348876953, -17.484697341918945, -16.794418334960938, -16.10413932800293, -15.413860321044922, -14.72358226776123, -14.033303260803223, -13.343024253845215, -12.652745246887207, -11.9624662399292, -11.272188186645508, -10.5819091796875, -9.891630172729492, -9.201351165771484, -8.511072158813477, -7.820793151855469, -7.130514144897461, -6.440235137939453, -5.7499566078186035, -5.059677600860596, -4.369398593902588, -3.6791200637817383, -2.9888410568237305, -2.2985620498657227, -1.6082831621170044, -0.9180042743682861, -0.22772550582885742, 0.4625535011291504, 1.1528325080871582, 1.8431110382080078, 2.5333900451660156, 3.2236709594726562, 3.913949966430664, 4.604228973388672, 5.2945075035095215, 5.984786510467529, 6.675065517425537, 7.365344047546387, 8.055623054504395, 8.745902061462402, 9.43618106842041, 10.126460075378418, 10.81673812866211, 11.507017135620117, 12.197296142578125, 12.887575149536133, 13.57785415649414, 14.268133163452148, 14.958412170410156, 15.648691177368164, 16.338970184326172, 17.02924919128418, 17.719528198242188, 18.409805297851562, 19.100086212158203, 19.790363311767578, 20.480642318725586, 21.170921325683594, 21.8612003326416, 22.55147933959961, 23.241758346557617, 23.932037353515625, 24.622314453125, 25.31259536743164]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 6.0, 6.0, 15.0, 7.0, 14.0, 23.0, 35.0, 47.0, 74.0, 102.0, 122.0, 166.0, 283.0, 419.0, 622.0, 941.0, 1410.0, 2259.0, 3496.0, 5646.0, 9237.0, 15854.0, 27847.0, 51174.0, 98700.0, 188605.0, 261590.0, 175832.0, 91909.0, 47635.0, 25936.0, 14716.0, 8857.0, 5379.0, 3349.0, 2123.0, 1406.0, 943.0, 534.0, 391.0, 253.0, 179.0, 132.0, 97.0, 51.0, 44.0, 27.0, 19.0, 16.0, 9.0, 8.0, 5.0, 5.0, 2.0, 3.0, 4.0], "bins": [-2.11328125, -2.0519256591796875, -1.990570068359375, -1.9292144775390625, -1.86785888671875, -1.8065032958984375, -1.745147705078125, -1.6837921142578125, -1.6224365234375, -1.5610809326171875, -1.499725341796875, -1.4383697509765625, -1.37701416015625, -1.3156585693359375, -1.254302978515625, -1.1929473876953125, -1.131591796875, -1.0702362060546875, -1.008880615234375, -0.9475250244140625, -0.88616943359375, -0.8248138427734375, -0.763458251953125, -0.7021026611328125, -0.6407470703125, -0.5793914794921875, -0.518035888671875, -0.4566802978515625, -0.39532470703125, -0.3339691162109375, -0.272613525390625, -0.2112579345703125, -0.14990234375, -0.0885467529296875, -0.027191162109375, 0.0341644287109375, 0.09552001953125, 0.1568756103515625, 0.218231201171875, 0.2795867919921875, 0.3409423828125, 0.4022979736328125, 0.463653564453125, 0.5250091552734375, 0.58636474609375, 0.6477203369140625, 0.709075927734375, 0.7704315185546875, 0.831787109375, 0.8931427001953125, 0.954498291015625, 1.0158538818359375, 1.07720947265625, 1.1385650634765625, 1.199920654296875, 1.2612762451171875, 1.3226318359375, 1.3839874267578125, 1.445343017578125, 1.5066986083984375, 1.56805419921875, 1.6294097900390625, 1.690765380859375, 1.7521209716796875, 1.8134765625]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 6.0, 4.0, 11.0, 10.0, 10.0, 16.0, 22.0, 20.0, 28.0, 30.0, 45.0, 31.0, 48.0, 62.0, 75.0, 64.0, 67.0, 69.0, 62.0, 28.0, 45.0, 47.0, 51.0, 35.0, 25.0, 17.0, 20.0, 15.0, 12.0, 10.0, 6.0, 9.0, 3.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.22265625, -2.156036376953125, -2.08941650390625, -2.022796630859375, -1.9561767578125, -1.889556884765625, -1.82293701171875, -1.756317138671875, -1.689697265625, -1.623077392578125, -1.55645751953125, -1.489837646484375, -1.4232177734375, -1.356597900390625, -1.28997802734375, -1.223358154296875, -1.15673828125, -1.090118408203125, -1.02349853515625, -0.956878662109375, -0.8902587890625, -0.823638916015625, -0.75701904296875, -0.690399169921875, -0.623779296875, -0.557159423828125, -0.49053955078125, -0.423919677734375, -0.3572998046875, -0.290679931640625, -0.22406005859375, -0.157440185546875, -0.0908203125, -0.024200439453125, 0.04241943359375, 0.109039306640625, 0.1756591796875, 0.242279052734375, 0.30889892578125, 0.375518798828125, 0.442138671875, 0.508758544921875, 0.57537841796875, 0.641998291015625, 0.7086181640625, 0.775238037109375, 0.84185791015625, 0.908477783203125, 0.97509765625, 1.041717529296875, 1.10833740234375, 1.174957275390625, 1.2415771484375, 1.308197021484375, 1.37481689453125, 1.441436767578125, 1.508056640625, 1.574676513671875, 1.64129638671875, 1.707916259765625, 1.7745361328125, 1.841156005859375, 1.90777587890625, 1.974395751953125, 2.041015625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 3.0, 6.0, 9.0, 8.0, 18.0, 22.0, 32.0, 44.0, 53.0, 90.0, 111.0, 175.0, 224.0, 365.0, 611.0, 1080.0, 2278.0, 6126.0, 27430.0, 234953.0, 707068.0, 51862.0, 9428.0, 3066.0, 1416.0, 706.0, 423.0, 290.0, 176.0, 140.0, 90.0, 74.0, 48.0, 27.0, 30.0, 17.0, 16.0, 8.0, 5.0, 6.0, 5.0, 4.0, 8.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.7578125, -5.57305908203125, -5.3883056640625, -5.20355224609375, -5.018798828125, -4.83404541015625, -4.6492919921875, -4.46453857421875, -4.27978515625, -4.09503173828125, -3.9102783203125, -3.72552490234375, -3.540771484375, -3.35601806640625, -3.1712646484375, -2.98651123046875, -2.8017578125, -2.61700439453125, -2.4322509765625, -2.24749755859375, -2.062744140625, -1.87799072265625, -1.6932373046875, -1.50848388671875, -1.32373046875, -1.13897705078125, -0.9542236328125, -0.76947021484375, -0.584716796875, -0.39996337890625, -0.2152099609375, -0.03045654296875, 0.154296875, 0.33905029296875, 0.5238037109375, 0.70855712890625, 0.893310546875, 1.07806396484375, 1.2628173828125, 1.44757080078125, 1.63232421875, 1.81707763671875, 2.0018310546875, 2.18658447265625, 2.371337890625, 2.55609130859375, 2.7408447265625, 2.92559814453125, 3.1103515625, 3.29510498046875, 3.4798583984375, 3.66461181640625, 3.849365234375, 4.03411865234375, 4.2188720703125, 4.40362548828125, 4.58837890625, 4.77313232421875, 4.9578857421875, 5.14263916015625, 5.327392578125, 5.51214599609375, 5.6968994140625, 5.88165283203125, 6.06640625]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 3.0, 5.0, 8.0, 9.0, 9.0, 9.0, 21.0, 25.0, 30.0, 35.0, 49.0, 46.0, 61.0, 55.0, 86.0, 58.0, 63.0, 72.0, 65.0, 46.0, 45.0, 40.0, 37.0, 27.0, 13.0, 18.0, 14.0, 7.0, 8.0, 8.0, 7.0, 3.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.5625, -7.316650390625, -7.07080078125, -6.824951171875, -6.5791015625, -6.333251953125, -6.08740234375, -5.841552734375, -5.595703125, -5.349853515625, -5.10400390625, -4.858154296875, -4.6123046875, -4.366455078125, -4.12060546875, -3.874755859375, -3.62890625, -3.383056640625, -3.13720703125, -2.891357421875, -2.6455078125, -2.399658203125, -2.15380859375, -1.907958984375, -1.662109375, -1.416259765625, -1.17041015625, -0.924560546875, -0.6787109375, -0.432861328125, -0.18701171875, 0.058837890625, 0.3046875, 0.550537109375, 0.79638671875, 1.042236328125, 1.2880859375, 1.533935546875, 1.77978515625, 2.025634765625, 2.271484375, 2.517333984375, 2.76318359375, 3.009033203125, 3.2548828125, 3.500732421875, 3.74658203125, 3.992431640625, 4.23828125, 4.484130859375, 4.72998046875, 4.975830078125, 5.2216796875, 5.467529296875, 5.71337890625, 5.959228515625, 6.205078125, 6.450927734375, 6.69677734375, 6.942626953125, 7.1884765625, 7.434326171875, 7.68017578125, 7.926025390625, 8.171875]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 8.0, 7.0, 10.0, 24.0, 32.0, 70.0, 134.0, 249.0, 555.0, 1478.0, 4984.0, 26227.0, 235426.0, 720883.0, 47085.0, 8071.0, 1985.0, 678.0, 301.0, 142.0, 79.0, 55.0, 26.0, 17.0, 10.0, 8.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.5810546875, -1.530303955078125, -1.47955322265625, -1.428802490234375, -1.3780517578125, -1.327301025390625, -1.27655029296875, -1.225799560546875, -1.175048828125, -1.124298095703125, -1.07354736328125, -1.022796630859375, -0.9720458984375, -0.921295166015625, -0.87054443359375, -0.819793701171875, -0.76904296875, -0.718292236328125, -0.66754150390625, -0.616790771484375, -0.5660400390625, -0.515289306640625, -0.46453857421875, -0.413787841796875, -0.363037109375, -0.312286376953125, -0.26153564453125, -0.210784912109375, -0.1600341796875, -0.109283447265625, -0.05853271484375, -0.007781982421875, 0.04296875, 0.093719482421875, 0.14447021484375, 0.195220947265625, 0.2459716796875, 0.296722412109375, 0.34747314453125, 0.398223876953125, 0.448974609375, 0.499725341796875, 0.55047607421875, 0.601226806640625, 0.6519775390625, 0.702728271484375, 0.75347900390625, 0.804229736328125, 0.85498046875, 0.905731201171875, 0.95648193359375, 1.007232666015625, 1.0579833984375, 1.108734130859375, 1.15948486328125, 1.210235595703125, 1.260986328125, 1.311737060546875, 1.36248779296875, 1.413238525390625, 1.4639892578125, 1.514739990234375, 1.56549072265625, 1.616241455078125, 1.6669921875]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 7.0, 2.0, 1.0, 3.0, 8.0, 5.0, 11.0, 9.0, 8.0, 13.0, 26.0, 26.0, 25.0, 26.0, 39.0, 43.0, 63.0, 78.0, 81.0, 86.0, 80.0, 56.0, 55.0, 46.0, 39.0, 35.0, 22.0, 26.0, 14.0, 14.0, 11.0, 9.0, 8.0, 9.0, 6.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 5.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0], "bins": [-0.00040721893310546875, -0.00039536505937576294, -0.00038351118564605713, -0.0003716573119163513, -0.0003598034381866455, -0.0003479495644569397, -0.0003360956907272339, -0.0003242418169975281, -0.00031238794326782227, -0.00030053406953811646, -0.00028868019580841064, -0.00027682632207870483, -0.000264972448348999, -0.0002531185746192932, -0.0002412647008895874, -0.0002294108271598816, -0.00021755695343017578, -0.00020570307970046997, -0.00019384920597076416, -0.00018199533224105835, -0.00017014145851135254, -0.00015828758478164673, -0.00014643371105194092, -0.0001345798373222351, -0.0001227259635925293, -0.00011087208986282349, -9.901821613311768e-05, -8.716434240341187e-05, -7.531046867370605e-05, -6.345659494400024e-05, -5.1602721214294434e-05, -3.974884748458862e-05, -2.7894973754882812e-05, -1.6041100025177002e-05, -4.187226295471191e-06, 7.666647434234619e-06, 1.952052116394043e-05, 3.137439489364624e-05, 4.322826862335205e-05, 5.508214235305786e-05, 6.693601608276367e-05, 7.878988981246948e-05, 9.064376354217529e-05, 0.0001024976372718811, 0.00011435151100158691, 0.00012620538473129272, 0.00013805925846099854, 0.00014991313219070435, 0.00016176700592041016, 0.00017362087965011597, 0.00018547475337982178, 0.0001973286271095276, 0.0002091825008392334, 0.0002210363745689392, 0.00023289024829864502, 0.00024474412202835083, 0.00025659799575805664, 0.00026845186948776245, 0.00028030574321746826, 0.00029215961694717407, 0.0003040134906768799, 0.0003158673644065857, 0.0003277212381362915, 0.0003395751118659973, 0.0003514289855957031]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 7.0, 4.0, 7.0, 11.0, 15.0, 14.0, 13.0, 43.0, 43.0, 55.0, 85.0, 141.0, 176.0, 258.0, 462.0, 917.0, 1778.0, 3829.0, 10257.0, 33514.0, 143309.0, 665994.0, 137671.0, 32166.0, 9977.0, 3871.0, 1661.0, 868.0, 491.0, 285.0, 189.0, 140.0, 79.0, 74.0, 41.0, 28.0, 22.0, 16.0, 11.0, 6.0, 7.0, 7.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.9990234375, -0.9700546264648438, -0.9410858154296875, -0.9121170043945312, -0.883148193359375, -0.8541793823242188, -0.8252105712890625, -0.7962417602539062, -0.76727294921875, -0.7383041381835938, -0.7093353271484375, -0.6803665161132812, -0.651397705078125, -0.6224288940429688, -0.5934600830078125, -0.5644912719726562, -0.5355224609375, -0.5065536499023438, -0.4775848388671875, -0.44861602783203125, -0.419647216796875, -0.39067840576171875, -0.3617095947265625, -0.33274078369140625, -0.30377197265625, -0.27480316162109375, -0.2458343505859375, -0.21686553955078125, -0.187896728515625, -0.15892791748046875, -0.1299591064453125, -0.10099029541015625, -0.072021484375, -0.04305267333984375, -0.0140838623046875, 0.01488494873046875, 0.043853759765625, 0.07282257080078125, 0.1017913818359375, 0.13076019287109375, 0.15972900390625, 0.18869781494140625, 0.2176666259765625, 0.24663543701171875, 0.275604248046875, 0.30457305908203125, 0.3335418701171875, 0.36251068115234375, 0.3914794921875, 0.42044830322265625, 0.4494171142578125, 0.47838592529296875, 0.507354736328125, 0.5363235473632812, 0.5652923583984375, 0.5942611694335938, 0.62322998046875, 0.6521987915039062, 0.6811676025390625, 0.7101364135742188, 0.739105224609375, 0.7680740356445312, 0.7970428466796875, 0.8260116577148438, 0.85498046875]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 17.0, 10.0, 18.0, 25.0, 43.0, 62.0, 44.0, 53.0, 96.0, 96.0, 78.0, 96.0, 76.0, 52.0, 43.0, 44.0, 30.0, 21.0, 14.0, 16.0, 15.0, 7.0, 7.0, 5.0, 1.0, 2.0, 5.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.3330078125, -1.2970962524414062, -1.2611846923828125, -1.2252731323242188, -1.189361572265625, -1.1534500122070312, -1.1175384521484375, -1.0816268920898438, -1.04571533203125, -1.0098037719726562, -0.9738922119140625, -0.9379806518554688, -0.902069091796875, -0.8661575317382812, -0.8302459716796875, -0.7943344116210938, -0.7584228515625, -0.7225112915039062, -0.6865997314453125, -0.6506881713867188, -0.614776611328125, -0.5788650512695312, -0.5429534912109375, -0.5070419311523438, -0.47113037109375, -0.43521881103515625, -0.3993072509765625, -0.36339569091796875, -0.327484130859375, -0.29157257080078125, -0.2556610107421875, -0.21974945068359375, -0.183837890625, -0.14792633056640625, -0.1120147705078125, -0.07610321044921875, -0.040191650390625, -0.00428009033203125, 0.0316314697265625, 0.06754302978515625, 0.10345458984375, 0.13936614990234375, 0.1752777099609375, 0.21118927001953125, 0.247100830078125, 0.28301239013671875, 0.3189239501953125, 0.35483551025390625, 0.3907470703125, 0.42665863037109375, 0.4625701904296875, 0.49848175048828125, 0.534393310546875, 0.5703048706054688, 0.6062164306640625, 0.6421279907226562, 0.67803955078125, 0.7139511108398438, 0.7498626708984375, 0.7857742309570312, 0.821685791015625, 0.8575973510742188, 0.8935089111328125, 0.9294204711914062, 0.96533203125]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 4.0, 13.0, 15.0, 31.0, 79.0, 129.0, 398.0, 162.0, 84.0, 44.0, 23.0, 13.0, 4.0, 2.0, 2.0, 5.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-51.56752014160156, -50.07111740112305, -48.574710845947266, -47.07830810546875, -45.58190155029297, -44.08549880981445, -42.58909606933594, -41.092689514160156, -39.59628677368164, -38.099884033203125, -36.603477478027344, -35.10707473754883, -33.61066818237305, -32.11426544189453, -30.617860794067383, -29.121456146240234, -27.625051498413086, -26.128646850585938, -24.63224220275879, -23.13583755493164, -21.639434814453125, -20.143030166625977, -18.646625518798828, -17.150222778320312, -15.653817176818848, -14.1574125289917, -12.661008834838867, -11.164604187011719, -9.66819953918457, -8.171795845031738, -6.67539119720459, -5.178987503051758, -3.6825828552246094, -2.186178684234619, -0.6897742748260498, 0.8066301345825195, 2.3030343055725098, 3.7994384765625, 5.295843124389648, 6.7922468185424805, 8.288651466369629, 9.785056114196777, 11.28145980834961, 12.777864456176758, 14.274269104003906, 15.770672798156738, 17.267078399658203, 18.76348114013672, 20.259885787963867, 21.756290435791016, 23.252695083618164, 24.749099731445312, 26.245502471923828, 27.741907119750977, 29.238311767578125, 30.73471450805664, 32.23112106323242, 33.72752380371094, 35.22393035888672, 36.720333099365234, 38.216739654541016, 39.71314239501953, 41.20954895019531, 42.70595169067383, 44.202354431152344]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 3.0, 3.0, 6.0, 4.0, 2.0, 7.0, 5.0, 11.0, 14.0, 15.0, 15.0, 17.0, 18.0, 18.0, 35.0, 36.0, 26.0, 33.0, 38.0, 83.0, 138.0, 130.0, 44.0, 32.0, 25.0, 27.0, 30.0, 20.0, 24.0, 28.0, 23.0, 16.0, 5.0, 13.0, 16.0, 8.0, 8.0, 7.0, 3.0, 5.0, 4.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-23.42059326171875, -22.659746170043945, -21.89889907836914, -21.138051986694336, -20.37720489501953, -19.616357803344727, -18.855510711669922, -18.09466552734375, -17.333816528320312, -16.572969436645508, -15.812122344970703, -15.051275253295898, -14.290428161621094, -13.529581069946289, -12.7687349319458, -12.007887840270996, -11.247041702270508, -10.486194610595703, -9.725347518920898, -8.964500427246094, -8.203653335571289, -7.442806720733643, -6.681960105895996, -5.921113014221191, -5.160265922546387, -4.399418830871582, -3.6385719776153564, -2.877725124359131, -2.116878032684326, -1.3560309410095215, -0.595184326171875, 0.1656627655029297, 0.9265098571777344, 1.6873568296432495, 2.4482038021087646, 3.2090506553649902, 3.969897747039795, 4.7307448387146, 5.491591453552246, 6.252438545227051, 7.0132856369018555, 7.77413272857666, 8.534979820251465, 9.295825958251953, 10.056673049926758, 10.817520141601562, 11.578367233276367, 12.339214324951172, 13.100061416625977, 13.860908508300781, 14.621755599975586, 15.38260269165039, 16.143449783325195, 16.904296875, 17.665142059326172, 18.42599105834961, 19.18683624267578, 19.947683334350586, 20.70853042602539, 21.469377517700195, 22.230224609375, 22.991071701049805, 23.75191879272461, 24.51276397705078, 25.27361297607422]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 1.0, 11.0, 6.0, 8.0, 15.0, 16.0, 26.0, 36.0, 58.0, 90.0, 118.0, 178.0, 252.0, 356.0, 556.0, 994.0, 1452.0, 2607.0, 4603.0, 9419.0, 20463.0, 54535.0, 173241.0, 566441.0, 1394259.0, 1245136.0, 484219.0, 148075.0, 48464.0, 19227.0, 8570.0, 4421.0, 2425.0, 1427.0, 865.0, 545.0, 400.0, 237.0, 161.0, 120.0, 74.0, 58.0, 37.0, 35.0, 14.0, 8.0, 10.0, 13.0, 8.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.9951171875, -1.9395904541015625, -1.884063720703125, -1.8285369873046875, -1.77301025390625, -1.7174835205078125, -1.661956787109375, -1.6064300537109375, -1.5509033203125, -1.4953765869140625, -1.439849853515625, -1.3843231201171875, -1.32879638671875, -1.2732696533203125, -1.217742919921875, -1.1622161865234375, -1.106689453125, -1.0511627197265625, -0.995635986328125, -0.9401092529296875, -0.88458251953125, -0.8290557861328125, -0.773529052734375, -0.7180023193359375, -0.6624755859375, -0.6069488525390625, -0.551422119140625, -0.4958953857421875, -0.44036865234375, -0.3848419189453125, -0.329315185546875, -0.2737884521484375, -0.21826171875, -0.1627349853515625, -0.107208251953125, -0.0516815185546875, 0.00384521484375, 0.0593719482421875, 0.114898681640625, 0.1704254150390625, 0.2259521484375, 0.2814788818359375, 0.337005615234375, 0.3925323486328125, 0.44805908203125, 0.5035858154296875, 0.559112548828125, 0.6146392822265625, 0.670166015625, 0.7256927490234375, 0.781219482421875, 0.8367462158203125, 0.89227294921875, 0.9477996826171875, 1.003326416015625, 1.0588531494140625, 1.1143798828125, 1.1699066162109375, 1.225433349609375, 1.2809600830078125, 1.33648681640625, 1.3920135498046875, 1.447540283203125, 1.5030670166015625, 1.55859375]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 5.0, 2.0, 5.0, 2.0, 8.0, 13.0, 17.0, 19.0, 24.0, 25.0, 32.0, 32.0, 38.0, 36.0, 43.0, 53.0, 45.0, 68.0, 73.0, 52.0, 56.0, 38.0, 40.0, 40.0, 43.0, 41.0, 35.0, 22.0, 21.0, 16.0, 17.0, 12.0, 10.0, 8.0, 6.0, 4.0, 3.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.119140625, -2.0626983642578125, -2.006256103515625, -1.9498138427734375, -1.89337158203125, -1.8369293212890625, -1.780487060546875, -1.7240447998046875, -1.6676025390625, -1.6111602783203125, -1.554718017578125, -1.4982757568359375, -1.44183349609375, -1.3853912353515625, -1.328948974609375, -1.2725067138671875, -1.216064453125, -1.1596221923828125, -1.103179931640625, -1.0467376708984375, -0.99029541015625, -0.9338531494140625, -0.877410888671875, -0.8209686279296875, -0.7645263671875, -0.7080841064453125, -0.651641845703125, -0.5951995849609375, -0.53875732421875, -0.4823150634765625, -0.425872802734375, -0.3694305419921875, -0.31298828125, -0.2565460205078125, -0.200103759765625, -0.1436614990234375, -0.08721923828125, -0.0307769775390625, 0.025665283203125, 0.0821075439453125, 0.1385498046875, 0.1949920654296875, 0.251434326171875, 0.3078765869140625, 0.36431884765625, 0.4207611083984375, 0.477203369140625, 0.5336456298828125, 0.590087890625, 0.6465301513671875, 0.702972412109375, 0.7594146728515625, 0.81585693359375, 0.8722991943359375, 0.928741455078125, 0.9851837158203125, 1.0416259765625, 1.0980682373046875, 1.154510498046875, 1.2109527587890625, 1.26739501953125, 1.3238372802734375, 1.380279541015625, 1.4367218017578125, 1.4931640625]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 3.0, 3.0, 6.0, 12.0, 14.0, 17.0, 22.0, 25.0, 45.0, 67.0, 134.0, 229.0, 379.0, 712.0, 1515.0, 4530.0, 43449.0, 3868923.0, 260686.0, 9243.0, 2240.0, 867.0, 435.0, 288.0, 144.0, 97.0, 62.0, 58.0, 23.0, 14.0, 11.0, 6.0, 5.0, 8.0, 2.0, 3.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-10.2265625, -9.931884765625, -9.63720703125, -9.342529296875, -9.0478515625, -8.753173828125, -8.45849609375, -8.163818359375, -7.869140625, -7.574462890625, -7.27978515625, -6.985107421875, -6.6904296875, -6.395751953125, -6.10107421875, -5.806396484375, -5.51171875, -5.217041015625, -4.92236328125, -4.627685546875, -4.3330078125, -4.038330078125, -3.74365234375, -3.448974609375, -3.154296875, -2.859619140625, -2.56494140625, -2.270263671875, -1.9755859375, -1.680908203125, -1.38623046875, -1.091552734375, -0.796875, -0.502197265625, -0.20751953125, 0.087158203125, 0.3818359375, 0.676513671875, 0.97119140625, 1.265869140625, 1.560546875, 1.855224609375, 2.14990234375, 2.444580078125, 2.7392578125, 3.033935546875, 3.32861328125, 3.623291015625, 3.91796875, 4.212646484375, 4.50732421875, 4.802001953125, 5.0966796875, 5.391357421875, 5.68603515625, 5.980712890625, 6.275390625, 6.570068359375, 6.86474609375, 7.159423828125, 7.4541015625, 7.748779296875, 8.04345703125, 8.338134765625, 8.6328125]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 3.0, 3.0, 3.0, 2.0, 13.0, 17.0, 22.0, 46.0, 65.0, 113.0, 191.0, 253.0, 467.0, 710.0, 739.0, 532.0, 324.0, 234.0, 127.0, 77.0, 46.0, 28.0, 22.0, 20.0, 9.0, 12.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.25390625, -6.04608154296875, -5.8382568359375, -5.63043212890625, -5.422607421875, -5.21478271484375, -5.0069580078125, -4.79913330078125, -4.59130859375, -4.38348388671875, -4.1756591796875, -3.96783447265625, -3.760009765625, -3.55218505859375, -3.3443603515625, -3.13653564453125, -2.9287109375, -2.72088623046875, -2.5130615234375, -2.30523681640625, -2.097412109375, -1.88958740234375, -1.6817626953125, -1.47393798828125, -1.26611328125, -1.05828857421875, -0.8504638671875, -0.64263916015625, -0.434814453125, -0.22698974609375, -0.0191650390625, 0.18865966796875, 0.396484375, 0.60430908203125, 0.8121337890625, 1.01995849609375, 1.227783203125, 1.43560791015625, 1.6434326171875, 1.85125732421875, 2.05908203125, 2.26690673828125, 2.4747314453125, 2.68255615234375, 2.890380859375, 3.09820556640625, 3.3060302734375, 3.51385498046875, 3.7216796875, 3.92950439453125, 4.1373291015625, 4.34515380859375, 4.552978515625, 4.76080322265625, 4.9686279296875, 5.17645263671875, 5.38427734375, 5.59210205078125, 5.7999267578125, 6.00775146484375, 6.215576171875, 6.42340087890625, 6.6312255859375, 6.83905029296875, 7.046875]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 7.0, 7.0, 3.0, 6.0, 7.0, 12.0, 17.0, 25.0, 54.0, 65.0, 91.0, 102.0, 140.0, 128.0, 110.0, 63.0, 51.0, 39.0, 21.0, 13.0, 9.0, 7.0, 6.0, 5.0, 6.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-46.864784240722656, -45.5516242980957, -44.23846435546875, -42.92530822753906, -41.61214828491211, -40.298988342285156, -38.98583221435547, -37.672672271728516, -36.35951232910156, -35.04635238647461, -33.733192443847656, -32.42003631591797, -31.106876373291016, -29.793716430664062, -28.480558395385742, -27.167400360107422, -25.85424041748047, -24.541080474853516, -23.227922439575195, -21.914764404296875, -20.601604461669922, -19.28844451904297, -17.97528648376465, -16.662128448486328, -15.348968505859375, -14.035809516906738, -12.722650527954102, -11.409491539001465, -10.096332550048828, -8.783173561096191, -7.470014572143555, -6.156855583190918, -4.843696594238281, -3.5305376052856445, -2.217378616333008, -0.9042196273803711, 0.4089393615722656, 1.7220983505249023, 3.035257339477539, 4.348416328430176, 5.6615753173828125, 6.974734306335449, 8.287893295288086, 9.601052284240723, 10.91421127319336, 12.227370262145996, 13.540529251098633, 14.85368824005127, 16.166847229003906, 17.48000717163086, 18.79316520690918, 20.1063232421875, 21.419483184814453, 22.732643127441406, 24.045801162719727, 25.358959197998047, 26.672119140625, 27.985279083251953, 29.298437118530273, 30.611595153808594, 31.924755096435547, 33.2379150390625, 34.55107116699219, 35.86423110961914, 37.177391052246094]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 5.0, 3.0, 7.0, 6.0, 8.0, 14.0, 11.0, 8.0, 13.0, 11.0, 12.0, 16.0, 35.0, 20.0, 30.0, 50.0, 37.0, 56.0, 52.0, 64.0, 66.0, 60.0, 59.0, 37.0, 40.0, 36.0, 34.0, 33.0, 40.0, 24.0, 24.0, 17.0, 16.0, 9.0, 12.0, 8.0, 6.0, 5.0, 4.0, 3.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-27.391754150390625, -26.52879524230957, -25.665836334228516, -24.80287742614746, -23.939918518066406, -23.07695770263672, -22.213998794555664, -21.35103988647461, -20.488080978393555, -19.6251220703125, -18.762163162231445, -17.89920425415039, -17.036243438720703, -16.17328643798828, -15.310325622558594, -14.447366714477539, -13.584407806396484, -12.72144889831543, -11.858489990234375, -10.995530128479004, -10.13257122039795, -9.269612312316895, -8.406652450561523, -7.543693542480469, -6.680734634399414, -5.817775726318359, -4.9548163414001465, -4.091856956481934, -3.228898048400879, -2.365939140319824, -1.5029797554016113, -0.6400203704833984, 0.22294044494628906, 1.0858995914459229, 1.9488587379455566, 2.8118178844451904, 3.674777030944824, 4.537735939025879, 5.400695323944092, 6.263654708862305, 7.126613616943359, 7.989572525024414, 8.852531433105469, 9.71549129486084, 10.578450202941895, 11.44140911102295, 12.30436897277832, 13.167327880859375, 14.03028678894043, 14.893245697021484, 15.756204605102539, 16.619163513183594, 17.48212432861328, 18.345081329345703, 19.20804214477539, 20.071001052856445, 20.9339599609375, 21.796918869018555, 22.65987777709961, 23.522836685180664, 24.38579559326172, 25.248756408691406, 26.11171531677246, 26.974674224853516, 27.83763313293457]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 8.0, 7.0, 14.0, 26.0, 29.0, 62.0, 91.0, 182.0, 302.0, 619.0, 1253.0, 2724.0, 6206.0, 16964.0, 56910.0, 268388.0, 555561.0, 96593.0, 26526.0, 9054.0, 3653.0, 1669.0, 789.0, 365.0, 264.0, 118.0, 90.0, 31.0, 20.0, 18.0, 11.0, 3.0, 8.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4765625, -4.356201171875, -4.23583984375, -4.115478515625, -3.9951171875, -3.874755859375, -3.75439453125, -3.634033203125, -3.513671875, -3.393310546875, -3.27294921875, -3.152587890625, -3.0322265625, -2.911865234375, -2.79150390625, -2.671142578125, -2.55078125, -2.430419921875, -2.31005859375, -2.189697265625, -2.0693359375, -1.948974609375, -1.82861328125, -1.708251953125, -1.587890625, -1.467529296875, -1.34716796875, -1.226806640625, -1.1064453125, -0.986083984375, -0.86572265625, -0.745361328125, -0.625, -0.504638671875, -0.38427734375, -0.263916015625, -0.1435546875, -0.023193359375, 0.09716796875, 0.217529296875, 0.337890625, 0.458251953125, 0.57861328125, 0.698974609375, 0.8193359375, 0.939697265625, 1.06005859375, 1.180419921875, 1.30078125, 1.421142578125, 1.54150390625, 1.661865234375, 1.7822265625, 1.902587890625, 2.02294921875, 2.143310546875, 2.263671875, 2.384033203125, 2.50439453125, 2.624755859375, 2.7451171875, 2.865478515625, 2.98583984375, 3.106201171875, 3.2265625]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 4.0, 3.0, 7.0, 10.0, 14.0, 13.0, 17.0, 26.0, 33.0, 41.0, 36.0, 44.0, 66.0, 58.0, 63.0, 64.0, 79.0, 56.0, 70.0, 51.0, 48.0, 40.0, 32.0, 28.0, 19.0, 25.0, 13.0, 9.0, 11.0, 3.0, 7.0, 7.0, 6.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.931640625, -2.854278564453125, -2.77691650390625, -2.699554443359375, -2.6221923828125, -2.544830322265625, -2.46746826171875, -2.390106201171875, -2.312744140625, -2.235382080078125, -2.15802001953125, -2.080657958984375, -2.0032958984375, -1.925933837890625, -1.84857177734375, -1.771209716796875, -1.69384765625, -1.616485595703125, -1.53912353515625, -1.461761474609375, -1.3843994140625, -1.307037353515625, -1.22967529296875, -1.152313232421875, -1.074951171875, -0.997589111328125, -0.92022705078125, -0.842864990234375, -0.7655029296875, -0.688140869140625, -0.61077880859375, -0.533416748046875, -0.4560546875, -0.378692626953125, -0.30133056640625, -0.223968505859375, -0.1466064453125, -0.069244384765625, 0.00811767578125, 0.085479736328125, 0.162841796875, 0.240203857421875, 0.31756591796875, 0.394927978515625, 0.4722900390625, 0.549652099609375, 0.62701416015625, 0.704376220703125, 0.78173828125, 0.859100341796875, 0.93646240234375, 1.013824462890625, 1.0911865234375, 1.168548583984375, 1.24591064453125, 1.323272705078125, 1.400634765625, 1.477996826171875, 1.55535888671875, 1.632720947265625, 1.7100830078125, 1.787445068359375, 1.86480712890625, 1.942169189453125, 2.01953125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 4.0, 3.0, 6.0, 6.0, 17.0, 12.0, 22.0, 25.0, 43.0, 55.0, 88.0, 174.0, 290.0, 507.0, 1042.0, 2988.0, 12051.0, 100578.0, 879882.0, 40371.0, 6738.0, 1868.0, 781.0, 390.0, 202.0, 125.0, 95.0, 59.0, 45.0, 29.0, 15.0, 12.0, 13.0, 7.0, 5.0, 3.0, 1.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.21484375, -5.03692626953125, -4.8590087890625, -4.68109130859375, -4.503173828125, -4.32525634765625, -4.1473388671875, -3.96942138671875, -3.79150390625, -3.61358642578125, -3.4356689453125, -3.25775146484375, -3.079833984375, -2.90191650390625, -2.7239990234375, -2.54608154296875, -2.3681640625, -2.19024658203125, -2.0123291015625, -1.83441162109375, -1.656494140625, -1.47857666015625, -1.3006591796875, -1.12274169921875, -0.94482421875, -0.76690673828125, -0.5889892578125, -0.41107177734375, -0.233154296875, -0.05523681640625, 0.1226806640625, 0.30059814453125, 0.478515625, 0.65643310546875, 0.8343505859375, 1.01226806640625, 1.190185546875, 1.36810302734375, 1.5460205078125, 1.72393798828125, 1.90185546875, 2.07977294921875, 2.2576904296875, 2.43560791015625, 2.613525390625, 2.79144287109375, 2.9693603515625, 3.14727783203125, 3.3251953125, 3.50311279296875, 3.6810302734375, 3.85894775390625, 4.036865234375, 4.21478271484375, 4.3927001953125, 4.57061767578125, 4.74853515625, 4.92645263671875, 5.1043701171875, 5.28228759765625, 5.460205078125, 5.63812255859375, 5.8160400390625, 5.99395751953125, 6.171875]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 4.0, 3.0, 2.0, 4.0, 2.0, 9.0, 5.0, 4.0, 9.0, 11.0, 14.0, 14.0, 11.0, 13.0, 10.0, 19.0, 29.0, 22.0, 22.0, 34.0, 43.0, 66.0, 67.0, 95.0, 76.0, 58.0, 57.0, 44.0, 38.0, 36.0, 30.0, 17.0, 9.0, 20.0, 16.0, 17.0, 14.0, 10.0, 5.0, 9.0, 4.0, 8.0, 7.0, 5.0, 3.0, 7.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0], "bins": [-7.27734375, -7.0506591796875, -6.823974609375, -6.5972900390625, -6.37060546875, -6.1439208984375, -5.917236328125, -5.6905517578125, -5.4638671875, -5.2371826171875, -5.010498046875, -4.7838134765625, -4.55712890625, -4.3304443359375, -4.103759765625, -3.8770751953125, -3.650390625, -3.4237060546875, -3.197021484375, -2.9703369140625, -2.74365234375, -2.5169677734375, -2.290283203125, -2.0635986328125, -1.8369140625, -1.6102294921875, -1.383544921875, -1.1568603515625, -0.93017578125, -0.7034912109375, -0.476806640625, -0.2501220703125, -0.0234375, 0.2032470703125, 0.429931640625, 0.6566162109375, 0.88330078125, 1.1099853515625, 1.336669921875, 1.5633544921875, 1.7900390625, 2.0167236328125, 2.243408203125, 2.4700927734375, 2.69677734375, 2.9234619140625, 3.150146484375, 3.3768310546875, 3.603515625, 3.8302001953125, 4.056884765625, 4.2835693359375, 4.51025390625, 4.7369384765625, 4.963623046875, 5.1903076171875, 5.4169921875, 5.6436767578125, 5.870361328125, 6.0970458984375, 6.32373046875, 6.5504150390625, 6.777099609375, 7.0037841796875, 7.23046875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 5.0, 7.0, 3.0, 4.0, 2.0, 4.0, 6.0, 9.0, 5.0, 13.0, 15.0, 18.0, 28.0, 33.0, 49.0, 80.0, 123.0, 221.0, 389.0, 630.0, 1330.0, 2948.0, 7971.0, 29524.0, 228541.0, 726587.0, 34552.0, 8976.0, 3344.0, 1391.0, 691.0, 388.0, 244.0, 136.0, 86.0, 56.0, 29.0, 23.0, 21.0, 11.0, 11.0, 12.0, 5.0, 8.0, 8.0, 2.0, 2.0, 10.0, 2.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0], "bins": [-1.0693359375, -1.037933349609375, -1.00653076171875, -0.975128173828125, -0.9437255859375, -0.912322998046875, -0.88092041015625, -0.849517822265625, -0.818115234375, -0.786712646484375, -0.75531005859375, -0.723907470703125, -0.6925048828125, -0.661102294921875, -0.62969970703125, -0.598297119140625, -0.56689453125, -0.535491943359375, -0.50408935546875, -0.472686767578125, -0.4412841796875, -0.409881591796875, -0.37847900390625, -0.347076416015625, -0.315673828125, -0.284271240234375, -0.25286865234375, -0.221466064453125, -0.1900634765625, -0.158660888671875, -0.12725830078125, -0.095855712890625, -0.064453125, -0.033050537109375, -0.00164794921875, 0.029754638671875, 0.0611572265625, 0.092559814453125, 0.12396240234375, 0.155364990234375, 0.186767578125, 0.218170166015625, 0.24957275390625, 0.280975341796875, 0.3123779296875, 0.343780517578125, 0.37518310546875, 0.406585693359375, 0.43798828125, 0.469390869140625, 0.50079345703125, 0.532196044921875, 0.5635986328125, 0.595001220703125, 0.62640380859375, 0.657806396484375, 0.689208984375, 0.720611572265625, 0.75201416015625, 0.783416748046875, 0.8148193359375, 0.846221923828125, 0.87762451171875, 0.909027099609375, 0.9404296875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 6.0, 3.0, 2.0, 5.0, 3.0, 4.0, 5.0, 6.0, 10.0, 10.0, 14.0, 27.0, 30.0, 52.0, 79.0, 104.0, 158.0, 144.0, 107.0, 74.0, 37.0, 27.0, 22.0, 12.0, 10.0, 15.0, 9.0, 2.0, 7.0, 3.0, 10.0, 3.0, 7.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005240440368652344, -0.0005090534687042236, -0.0004940629005432129, -0.00047907233238220215, -0.0004640817642211914, -0.00044909119606018066, -0.0004341006278991699, -0.0004191100597381592, -0.00040411949157714844, -0.0003891289234161377, -0.00037413835525512695, -0.0003591477870941162, -0.00034415721893310547, -0.0003291666507720947, -0.000314176082611084, -0.00029918551445007324, -0.0002841949462890625, -0.00026920437812805176, -0.000254213809967041, -0.00023922324180603027, -0.00022423267364501953, -0.0002092421054840088, -0.00019425153732299805, -0.0001792609691619873, -0.00016427040100097656, -0.00014927983283996582, -0.00013428926467895508, -0.00011929869651794434, -0.0001043081283569336, -8.931756019592285e-05, -7.432699203491211e-05, -5.933642387390137e-05, -4.4345855712890625e-05, -2.9355287551879883e-05, -1.436471939086914e-05, 6.258487701416016e-07, 1.5616416931152344e-05, 3.0606985092163086e-05, 4.559755325317383e-05, 6.058812141418457e-05, 7.557868957519531e-05, 9.056925773620605e-05, 0.0001055598258972168, 0.00012055039405822754, 0.00013554096221923828, 0.00015053153038024902, 0.00016552209854125977, 0.0001805126667022705, 0.00019550323486328125, 0.000210493803024292, 0.00022548437118530273, 0.00024047493934631348, 0.0002554655075073242, 0.00027045607566833496, 0.0002854466438293457, 0.00030043721199035645, 0.0003154277801513672, 0.00033041834831237793, 0.00034540891647338867, 0.0003603994846343994, 0.00037539005279541016, 0.0003903806209564209, 0.00040537118911743164, 0.0004203617572784424, 0.0004353523254394531]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 11.0, 9.0, 20.0, 23.0, 40.0, 34.0, 71.0, 110.0, 230.0, 300.0, 583.0, 1148.0, 2347.0, 6076.0, 17396.0, 69668.0, 801130.0, 112277.0, 23400.0, 7597.0, 3087.0, 1392.0, 648.0, 378.0, 220.0, 129.0, 73.0, 45.0, 37.0, 29.0, 12.0, 6.0, 7.0, 5.0, 7.0, 6.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.82666015625, -0.8027877807617188, -0.7789154052734375, -0.7550430297851562, -0.731170654296875, -0.7072982788085938, -0.6834259033203125, -0.6595535278320312, -0.63568115234375, -0.6118087768554688, -0.5879364013671875, -0.5640640258789062, -0.540191650390625, -0.5163192749023438, -0.4924468994140625, -0.46857452392578125, -0.4447021484375, -0.42082977294921875, -0.3969573974609375, -0.37308502197265625, -0.349212646484375, -0.32534027099609375, -0.3014678955078125, -0.27759552001953125, -0.25372314453125, -0.22985076904296875, -0.2059783935546875, -0.18210601806640625, -0.158233642578125, -0.13436126708984375, -0.1104888916015625, -0.08661651611328125, -0.062744140625, -0.03887176513671875, -0.0149993896484375, 0.00887298583984375, 0.032745361328125, 0.05661773681640625, 0.0804901123046875, 0.10436248779296875, 0.12823486328125, 0.15210723876953125, 0.1759796142578125, 0.19985198974609375, 0.223724365234375, 0.24759674072265625, 0.2714691162109375, 0.29534149169921875, 0.3192138671875, 0.34308624267578125, 0.3669586181640625, 0.39083099365234375, 0.414703369140625, 0.43857574462890625, 0.4624481201171875, 0.48632049560546875, 0.51019287109375, 0.5340652465820312, 0.5579376220703125, 0.5818099975585938, 0.605682373046875, 0.6295547485351562, 0.6534271240234375, 0.6772994995117188, 0.701171875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 5.0, 1.0, 5.0, 6.0, 5.0, 11.0, 9.0, 12.0, 17.0, 19.0, 33.0, 47.0, 57.0, 76.0, 89.0, 79.0, 95.0, 76.0, 85.0, 78.0, 55.0, 35.0, 17.0, 21.0, 20.0, 17.0, 14.0, 8.0, 3.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.876953125, -0.8470458984375, -0.817138671875, -0.7872314453125, -0.75732421875, -0.7274169921875, -0.697509765625, -0.6676025390625, -0.6376953125, -0.6077880859375, -0.577880859375, -0.5479736328125, -0.51806640625, -0.4881591796875, -0.458251953125, -0.4283447265625, -0.3984375, -0.3685302734375, -0.338623046875, -0.3087158203125, -0.27880859375, -0.2489013671875, -0.218994140625, -0.1890869140625, -0.1591796875, -0.1292724609375, -0.099365234375, -0.0694580078125, -0.03955078125, -0.0096435546875, 0.020263671875, 0.0501708984375, 0.080078125, 0.1099853515625, 0.139892578125, 0.1697998046875, 0.19970703125, 0.2296142578125, 0.259521484375, 0.2894287109375, 0.3193359375, 0.3492431640625, 0.379150390625, 0.4090576171875, 0.43896484375, 0.4688720703125, 0.498779296875, 0.5286865234375, 0.55859375, 0.5885009765625, 0.618408203125, 0.6483154296875, 0.67822265625, 0.7081298828125, 0.738037109375, 0.7679443359375, 0.7978515625, 0.8277587890625, 0.857666015625, 0.8875732421875, 0.91748046875, 0.9473876953125, 0.977294921875, 1.0072021484375, 1.037109375]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 10.0, 9.0, 16.0, 44.0, 106.0, 531.0, 186.0, 56.0, 21.0, 14.0, 6.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-67.82656860351562, -66.08081817626953, -64.33506774902344, -62.589324951171875, -60.84357452392578, -59.09782409667969, -57.35207748413086, -55.60633087158203, -53.86058044433594, -52.114830017089844, -50.369083404541016, -48.62333679199219, -46.877586364746094, -45.1318359375, -43.38608932495117, -41.640342712402344, -39.89459228515625, -38.148841857910156, -36.40309524536133, -34.6573486328125, -32.911598205566406, -31.165849685668945, -29.420101165771484, -27.674352645874023, -25.928604125976562, -24.1828556060791, -22.43710708618164, -20.69135856628418, -18.94561004638672, -17.199861526489258, -15.454113006591797, -13.708364486694336, -11.96261215209961, -10.216863632202148, -8.471115112304688, -6.725366592407227, -4.979618072509766, -3.2338695526123047, -1.4881210327148438, 0.2576274871826172, 2.003376007080078, 3.749124526977539, 5.494873046875, 7.240621566772461, 8.986370086669922, 10.732118606567383, 12.477867126464844, 14.223615646362305, 15.969364166259766, 17.715112686157227, 19.460861206054688, 21.20660972595215, 22.95235824584961, 24.69810676574707, 26.44385528564453, 28.189603805541992, 29.935352325439453, 31.681100845336914, 33.426849365234375, 35.17259979248047, 36.9183464050293, 38.664093017578125, 40.40984344482422, 42.15559387207031, 43.90134048461914]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 2.0, 4.0, 1.0, 5.0, 4.0, 14.0, 13.0, 8.0, 9.0, 13.0, 31.0, 17.0, 17.0, 26.0, 25.0, 24.0, 24.0, 112.0, 363.0, 50.0, 33.0, 28.0, 22.0, 31.0, 18.0, 15.0, 20.0, 16.0, 13.0, 12.0, 9.0, 3.0, 7.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-29.131650924682617, -28.351926803588867, -27.572202682495117, -26.792478561401367, -26.01275634765625, -25.2330322265625, -24.45330810546875, -23.673583984375, -22.89385986328125, -22.1141357421875, -21.33441162109375, -20.5546875, -19.77496337890625, -18.995241165161133, -18.215517044067383, -17.435792922973633, -16.656068801879883, -15.876344680786133, -15.096620559692383, -14.31689739227295, -13.5371732711792, -12.75744915008545, -11.977725982666016, -11.198001861572266, -10.418277740478516, -9.638553619384766, -8.858829498291016, -8.079106330871582, -7.299382209777832, -6.519658088684082, -5.73993444442749, -4.960210800170898, -4.180488586425781, -3.4007647037506104, -2.6210408210754395, -1.8413169384002686, -1.0615930557250977, -0.28186917304992676, 0.49785470962524414, 1.277578353881836, 2.057302474975586, 2.837026357650757, 3.6167502403259277, 4.3964738845825195, 5.1761980056762695, 5.9559221267700195, 6.735645771026611, 7.515369415283203, 8.295093536376953, 9.074817657470703, 9.854541778564453, 10.634264945983887, 11.413989067077637, 12.193713188171387, 12.97343635559082, 13.75316047668457, 14.53288459777832, 15.31260871887207, 16.09233283996582, 16.87205696105957, 17.651779174804688, 18.431503295898438, 19.211227416992188, 19.990951538085938, 20.770675659179688]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 3.0, 6.0, 3.0, 4.0, 5.0, 5.0, 3.0, 7.0, 5.0, 13.0, 15.0, 17.0, 20.0, 21.0, 28.0, 25.0, 42.0, 40.0, 85.0, 232.0, 108.0, 59.0, 41.0, 41.0, 35.0, 24.0, 20.0, 20.0, 16.0, 15.0, 6.0, 12.0, 6.0, 6.0, 7.0, 4.0, 3.0, 1.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6015625, -1.541168212890625, -1.48077392578125, -1.420379638671875, -1.3599853515625, -1.299591064453125, -1.23919677734375, -1.178802490234375, -1.118408203125, -1.058013916015625, -0.99761962890625, -0.937225341796875, -0.8768310546875, -0.816436767578125, -0.75604248046875, -0.695648193359375, -0.63525390625, -0.574859619140625, -0.51446533203125, -0.454071044921875, -0.3936767578125, -0.333282470703125, -0.27288818359375, -0.212493896484375, -0.152099609375, -0.091705322265625, -0.03131103515625, 0.029083251953125, 0.0894775390625, 0.149871826171875, 0.21026611328125, 0.270660400390625, 0.3310546875, 0.391448974609375, 0.45184326171875, 0.512237548828125, 0.5726318359375, 0.633026123046875, 0.69342041015625, 0.753814697265625, 0.814208984375, 0.874603271484375, 0.93499755859375, 0.995391845703125, 1.0557861328125, 1.116180419921875, 1.17657470703125, 1.236968994140625, 1.29736328125, 1.357757568359375, 1.41815185546875, 1.478546142578125, 1.5389404296875, 1.599334716796875, 1.65972900390625, 1.720123291015625, 1.780517578125, 1.840911865234375, 1.90130615234375, 1.961700439453125, 2.0220947265625, 2.082489013671875, 2.14288330078125, 2.203277587890625, 2.263671875]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 1.0, 11.0, 13.0, 7.0, 2.0, 5.0, 9.0, 15.0, 11.0, 32.0, 32.0, 68.0, 128.0, 258.0, 590.0, 1771.0, 7847.0, 8282076.0, 88016.0, 5317.0, 1368.0, 437.0, 222.0, 128.0, 67.0, 39.0, 24.0, 15.0, 21.0, 9.0, 7.0, 7.0, 1.0, 2.0, 4.0, 3.0, 3.0, 5.0, 1.0, 0.0, 0.0, 2.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 4.0, 1.0], "bins": [-16.518423080444336, -15.94644546508789, -15.374467849731445, -14.802490234375, -14.230512619018555, -13.65853500366211, -13.086556434631348, -12.514578819274902, -11.942601203918457, -11.370623588562012, -10.798645973205566, -10.226668357849121, -9.65468978881836, -9.082712173461914, -8.510734558105469, -7.938756942749023, -7.366779327392578, -6.794801712036133, -6.2228240966796875, -5.650846004486084, -5.078868389129639, -4.506890773773193, -3.934912919998169, -3.3629350662231445, -2.790957450866699, -2.218979835510254, -1.6470019817352295, -1.0750242471694946, -0.5030465126037598, 0.06893110275268555, 0.64090895652771, 1.2128868103027344, 1.7848625183105469, 2.356840133666992, 2.9288179874420166, 3.500795841217041, 4.072773456573486, 4.644751071929932, 5.216729164123535, 5.7887067794799805, 6.360684394836426, 6.932662010192871, 7.504639625549316, 8.076617240905762, 8.648595809936523, 9.220573425292969, 9.792551040649414, 10.36452865600586, 10.936506271362305, 11.50848388671875, 12.080461502075195, 12.65243911743164, 13.224416732788086, 13.796394348144531, 14.368372917175293, 14.940350532531738, 15.512328147888184, 16.084306716918945, 16.65628433227539, 17.228261947631836, 17.80023956298828, 18.372217178344727, 18.944194793701172, 19.516172409057617, 20.088150024414062]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 3.0, 3.0, 3.0, 4.0, 3.0, 4.0, 2.0, 3.0, 5.0, 3.0, 4.0, 5.0, 2.0, 4.0, 3.0, 4.0, 7.0, 5.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 0.0, 5.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.030651092529297, -11.654017448425293, -11.277384757995605, -10.900751113891602, -10.524117469787598, -10.147483825683594, -9.770851135253906, -9.394217491149902, -9.017583847045898, -8.640950202941895, -8.264317512512207, -7.887683868408203, -7.511050224304199, -7.1344170570373535, -6.757783889770508, -6.381150245666504, -6.004517555236816, -5.627884387969971, -5.251250743865967, -4.874617576599121, -4.497983932495117, -4.1213507652282715, -3.744717597961426, -3.368084192276001, -2.991450786590576, -2.6148173809051514, -2.2381839752197266, -1.8615508079528809, -1.484917402267456, -1.1082839965820312, -0.7316508293151855, -0.35501742362976074, 0.02161693572998047, 0.3982502818107605, 0.7748836278915405, 1.1515169143676758, 1.5281503200531006, 1.9047837257385254, 2.281416893005371, 2.658050298690796, 3.0346837043762207, 3.4113171100616455, 3.7879505157470703, 4.164583683013916, 4.541216850280762, 4.917850494384766, 5.294483661651611, 5.671116828918457, 6.047750473022461, 6.424383640289307, 6.8010172843933105, 7.177650451660156, 7.55428409576416, 7.930917263031006, 8.307550430297852, 8.684184074401855, 9.06081771850586, 9.437451362609863, 9.81408405303955, 10.190717697143555, 10.567351341247559, 10.943984985351562, 11.32061767578125, 11.697251319885254, 12.073884010314941]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 2.0, 5.0, 2.0, 6.0, 15.0, 18.0, 27.0, 35.0, 66.0, 62.0, 99.0, 180.0, 238.0, 546.0, 1126.0, 2496.0, 6358.0, 17972.0, 57824.0, 185481.0, 173318.0, 52150.0, 16314.0, 5617.0, 2248.0, 963.0, 468.0, 246.0, 151.0, 71.0, 54.0, 29.0, 19.0, 21.0, 15.0, 4.0, 6.0, 4.0, 6.0, 4.0, 4.0, 0.0, 1.0, 2.0, 2.0], "bins": [-20.515625, -19.9891357421875, -19.462646484375, -18.9361572265625, -18.40966796875, -17.8831787109375, -17.356689453125, -16.8302001953125, -16.3037109375, -15.7772216796875, -15.250732421875, -14.7242431640625, -14.19775390625, -13.6712646484375, -13.144775390625, -12.6182861328125, -12.091796875, -11.5653076171875, -11.038818359375, -10.5123291015625, -9.98583984375, -9.4593505859375, -8.932861328125, -8.4063720703125, -7.8798828125, -7.3533935546875, -6.826904296875, -6.3004150390625, -5.77392578125, -5.2474365234375, -4.720947265625, -4.1944580078125, -3.66796875, -3.1414794921875, -2.614990234375, -2.0885009765625, -1.56201171875, -1.0355224609375, -0.509033203125, 0.0174560546875, 0.5439453125, 1.0704345703125, 1.596923828125, 2.1234130859375, 2.64990234375, 3.1763916015625, 3.702880859375, 4.2293701171875, 4.755859375, 5.2823486328125, 5.808837890625, 6.3353271484375, 6.86181640625, 7.3883056640625, 7.914794921875, 8.4412841796875, 8.9677734375, 9.4942626953125, 10.020751953125, 10.5472412109375, 11.07373046875, 11.6002197265625, 12.126708984375, 12.6531982421875, 13.1796875]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 6.0, 9.0, 2.0, 1.0, 6.0, 4.0, 10.0, 13.0, 11.0, 24.0, 23.0, 25.0, 36.0, 63.0, 60.0, 59.0, 57.0, 76.0, 62.0, 75.0, 80.0, 63.0, 61.0, 41.0, 33.0, 27.0, 22.0, 9.0, 18.0, 7.0, 7.0, 6.0, 4.0, 3.0, 3.0, 2.0, 1.0, 5.0, 1.0, 1.0], "bins": [-3.162109375, -3.0879669189453125, -3.013824462890625, -2.9396820068359375, -2.86553955078125, -2.7913970947265625, -2.717254638671875, -2.6431121826171875, -2.5689697265625, -2.4948272705078125, -2.420684814453125, -2.3465423583984375, -2.27239990234375, -2.1982574462890625, -2.124114990234375, -2.0499725341796875, -1.975830078125, -1.9016876220703125, -1.827545166015625, -1.7534027099609375, -1.67926025390625, -1.6051177978515625, -1.530975341796875, -1.4568328857421875, -1.3826904296875, -1.3085479736328125, -1.234405517578125, -1.1602630615234375, -1.08612060546875, -1.0119781494140625, -0.937835693359375, -0.8636932373046875, -0.78955078125, -0.7154083251953125, -0.641265869140625, -0.5671234130859375, -0.49298095703125, -0.4188385009765625, -0.344696044921875, -0.2705535888671875, -0.1964111328125, -0.1222686767578125, -0.048126220703125, 0.0260162353515625, 0.10015869140625, 0.1743011474609375, 0.248443603515625, 0.3225860595703125, 0.396728515625, 0.4708709716796875, 0.545013427734375, 0.6191558837890625, 0.69329833984375, 0.7674407958984375, 0.841583251953125, 0.9157257080078125, 0.9898681640625, 1.0640106201171875, 1.138153076171875, 1.2122955322265625, 1.28643798828125, 1.3605804443359375, 1.434722900390625, 1.5088653564453125, 1.5830078125]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 0.0, 1.0, 3.0, 4.0, 3.0, 2.0, 3.0, 3.0, 4.0, 7.0, 14.0, 25.0, 60.0, 106.0, 90.0, 73.0, 35.0, 19.0, 12.0, 6.0, 3.0, 4.0, 4.0, 3.0, 5.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.48833656311035, -20.893512725830078, -20.298688888549805, -19.70386505126953, -19.109041213989258, -18.514217376708984, -17.919391632080078, -17.324567794799805, -16.72974395751953, -16.134920120239258, -15.540096282958984, -14.945272445678711, -14.350447654724121, -13.755623817443848, -13.160799980163574, -12.565975189208984, -11.971152305603027, -11.376328468322754, -10.78150463104248, -10.18667984008789, -9.591856002807617, -8.997032165527344, -8.40220832824707, -7.807384014129639, -7.212560176849365, -6.617736339569092, -6.02291202545166, -5.428088188171387, -4.833264350891113, -4.238440036773682, -3.643616199493408, -3.0487918853759766, -2.453968048095703, -1.8591439723968506, -1.2643200159072876, -0.6694960594177246, -0.07467198371887207, 0.5201520919799805, 1.114975929260254, 1.7098002433776855, 2.304624080657959, 2.8994481563568115, 3.494272232055664, 4.0890960693359375, 4.683919906616211, 5.278744220733643, 5.873568058013916, 6.468392372131348, 7.063216209411621, 7.6580400466918945, 8.252863883972168, 8.847688674926758, 9.442512512207031, 10.037336349487305, 10.632160186767578, 11.226984024047852, 11.821807861328125, 12.416631698608398, 13.011455535888672, 13.606279373168945, 14.201104164123535, 14.795928001403809, 15.390751838684082, 15.985576629638672, 16.580400466918945]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 2.0, 5.0, 6.0, 5.0, 12.0, 18.0, 40.0, 101.0, 99.0, 94.0, 32.0, 16.0, 7.0, 5.0, 3.0, 4.0, 6.0, 3.0, 3.0, 2.0, 4.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.16215991973877, -11.80250072479248, -11.442840576171875, -11.083181381225586, -10.723522186279297, -10.363862991333008, -10.004202842712402, -9.644543647766113, -9.284883499145508, -8.925224304199219, -8.565564155578613, -8.205904960632324, -7.846245765686035, -7.486586093902588, -7.126926422119141, -6.767267227172852, -6.4076080322265625, -6.047948360443115, -5.688289165496826, -5.328629493713379, -4.96897029876709, -4.609310626983643, -4.249650955200195, -3.889991521835327, -3.530332088470459, -3.170672655105591, -2.8110132217407227, -2.4513535499572754, -2.0916941165924072, -1.732034683227539, -1.3723750114440918, -1.0127155780792236, -0.6530551910400391, -0.2933956980705261, 0.06626379489898682, 0.42592334747314453, 0.7855827808380127, 1.1452422142028809, 1.5049018859863281, 1.8645613193511963, 2.2242207527160645, 2.5838801860809326, 2.943539619445801, 3.303199291229248, 3.662858724594116, 4.022518157958984, 4.382177829742432, 4.741837501525879, 5.101496696472168, 5.461156368255615, 5.820815563201904, 6.180475234985352, 6.540134429931641, 6.899794101715088, 7.259453773498535, 7.619112968444824, 7.9787726402282715, 8.338432312011719, 8.698091506958008, 9.057750701904297, 9.417410850524902, 9.777070045471191, 10.136730194091797, 10.496389389038086, 10.856048583984375]}, "_wandb": {"runtime": 3631}} \ No newline at end of file diff --git a/wandb/run-20220302_074637-35y19oi2/logs/debug-internal.log b/wandb/run-20220302_074637-35y19oi2/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..345096fe6186663a3a17fc1b904e1c950e5c7738 --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/logs/debug-internal.log @@ -0,0 +1,5783 @@ +2022-03-02 07:46:38,043 INFO MainThread:253865 [internal.py:wandb_internal():89] W&B internal server running at pid: 253865, started at: 2022-03-02 07:46:38.042945 +2022-03-02 07:46:38,045 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 07:46:38,045 INFO WriterThread:253865 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb +2022-03-02 07:46:38,047 DEBUG SenderThread:253865 [sender.py:send():235] send: header +2022-03-02 07:46:38,047 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: check_version +2022-03-02 07:46:38,113 DEBUG SenderThread:253865 [sender.py:send():235] send: run +2022-03-02 07:46:38,272 INFO SenderThread:253865 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files +2022-03-02 07:46:38,272 INFO SenderThread:253865 [sender.py:_start_run_threads():809] run started: 35y19oi2 with start time 1646207197 +2022-03-02 07:46:38,272 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:46:38,273 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:46:38,273 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 07:46:38,278 DEBUG HandlerThread:253865 [meta.py:__init__():36] meta init +2022-03-02 07:46:38,278 DEBUG HandlerThread:253865 [meta.py:__init__():50] meta init done +2022-03-02 07:46:38,278 DEBUG HandlerThread:253865 [meta.py:probe():210] probe +2022-03-02 07:46:38,285 DEBUG HandlerThread:253865 [meta.py:_setup_git():200] setup git +2022-03-02 07:46:38,299 DEBUG HandlerThread:253865 [meta.py:_setup_git():207] setup git done +2022-03-02 07:46:38,299 DEBUG HandlerThread:253865 [meta.py:_save_pip():54] save pip +2022-03-02 07:46:38,300 DEBUG HandlerThread:253865 [meta.py:_save_pip():68] save pip done +2022-03-02 07:46:38,300 DEBUG HandlerThread:253865 [meta.py:probe():248] probe done +2022-03-02 07:46:38,380 DEBUG SenderThread:253865 [sender.py:send():235] send: files +2022-03-02 07:46:38,380 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 07:46:38,385 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:46:38,385 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:46:38,423 DEBUG SenderThread:253865 [sender.py:send():235] send: config +2022-03-02 07:46:38,423 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:46:38,423 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:46:38,423 WARNING SenderThread:253865 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 07:46:38,660 INFO Thread-11 :253865 [upload_job.py:push():137] Uploaded file /tmp/tmp8nu_7qs3wandb/2ral18jk-wandb-metadata.json +2022-03-02 07:46:39,274 INFO Thread-8 :253865 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-metadata.json +2022-03-02 07:46:39,275 INFO Thread-8 :253865 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/requirements.txt +2022-03-02 07:46:39,275 INFO Thread-8 :253865 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:39,275 INFO Thread-8 :253865 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:46:41,274 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:45,275 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:47,276 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:51,277 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:53,278 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:53,633 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:46:53,634 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:46:57,279 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:46:59,280 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:01,280 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:03,414 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:47:03,414 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:47:03,415 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:47:03,415 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:47:03,415 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:47:03,416 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:47:04,281 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:47:05,282 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:06,725 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:47:07,282 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:08,283 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/config.yaml +2022-03-02 07:47:08,668 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:47:08,669 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:47:11,284 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:13,284 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:17,286 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:19,287 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:23,288 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:23,707 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:47:23,707 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:47:25,289 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:26,916 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:47:26,916 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:47:26,916 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:47:27,290 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:47:28,290 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:29,290 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:30,291 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:32,291 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:34,292 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:37,105 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:47:38,293 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:38,896 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:47:38,896 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:47:40,294 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:42,295 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:46,296 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:48,297 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:50,120 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:47:50,121 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:47:50,121 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:47:50,297 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:47:51,298 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:52,298 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:54,163 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:47:54,164 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:47:54,299 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:47:56,299 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:00,301 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:02,302 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:06,303 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:07,468 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:48:08,304 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:09,266 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:48:09,266 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:48:13,026 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:48:13,026 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:48:13,027 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:48:13,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:13,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:48:14,306 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:15,306 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:17,307 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:21,308 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:23,308 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:24,309 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:48:24,309 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:48:25,309 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:29,310 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:31,311 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:33,312 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:35,451 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:48:35,452 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:48:35,452 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:48:36,313 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:48:37,313 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:37,812 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:48:38,313 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:39,314 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:39,372 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:48:39,372 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:48:42,315 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:46,316 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:48,317 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:52,318 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:54,319 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:54,486 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:48:54,486 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:48:56,320 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:48:57,942 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:48:57,943 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:48:57,943 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:48:58,320 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:49:00,321 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:02,322 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:06,323 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:08,167 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:49:08,324 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:09,688 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:49:09,689 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:49:10,324 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:14,326 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:16,326 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:18,327 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:20,382 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:49:20,383 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:49:20,383 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:49:21,328 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:49:22,328 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:24,853 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:49:24,854 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:49:25,329 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:29,331 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:31,331 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:33,332 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:37,333 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:38,510 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:49:39,334 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:39,888 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:49:39,888 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:49:41,335 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:42,802 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:49:42,802 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:49:42,803 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:49:43,335 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:49:45,336 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:47,337 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:49,338 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:53,339 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:55,112 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:49:55,112 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:49:55,340 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:49:59,341 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:01,342 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:03,342 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:05,110 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:50:05,110 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:50:05,111 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:50:05,343 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:50:07,344 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:08,857 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:50:09,344 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:10,195 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:50:10,196 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:50:11,345 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:16,347 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:18,347 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:20,348 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:24,349 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:25,372 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:50:25,373 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:50:26,350 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:27,250 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:50:27,250 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:50:27,251 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:50:27,350 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:50:28,351 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:29,351 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:32,352 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:34,353 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:36,354 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:39,199 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:50:40,355 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:40,545 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:50:40,546 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:50:42,356 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:44,356 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:48,357 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:49,042 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:50:49,042 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:50:49,043 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:50:49,358 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:50:51,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:53,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:55,616 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:50:55,617 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:50:57,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:50:59,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:01,362 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:05,363 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:07,364 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:09,365 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:09,537 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:51:10,705 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:51:10,706 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:51:10,768 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:51:10,768 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:51:10,769 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:51:11,365 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:51:12,366 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:13,366 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:15,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:17,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:21,369 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:23,369 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:25,370 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:25,900 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:51:25,900 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:51:29,371 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:31,372 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:33,019 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,025 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,025 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,030 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,030 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,030 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,030 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,030 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,036 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,036 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,041 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,041 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,046 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,052 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,057 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,062 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,070 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,071 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,072 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,072 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,072 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,072 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,077 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,077 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,077 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,077 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,077 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,078 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,079 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,089 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,090 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,091 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,091 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,091 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,101 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,101 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,101 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,101 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,102 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,103 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,114 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,114 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,114 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,114 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,114 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,119 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,120 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,121 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,121 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,121 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,126 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,126 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,126 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,131 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,142 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,147 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,158 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,159 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,164 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,170 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,170 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,170 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,170 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,171 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,172 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,173 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,174 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,175 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,176 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,177 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,178 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,179 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,180 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,181 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,182 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,183 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,184 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,185 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,186 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,187 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,188 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,189 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,190 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,191 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,192 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,193 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,194 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,195 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,196 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,197 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,198 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,199 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,200 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,201 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,202 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,203 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,204 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,205 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,206 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,207 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,208 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,209 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,210 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,211 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,212 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,213 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,214 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,215 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,216 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,217 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,218 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,219 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,220 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,221 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,222 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,223 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,224 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,225 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,226 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,227 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,228 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,229 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,230 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,231 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,232 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,233 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,234 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,235 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,236 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,237 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,238 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,239 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,240 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,241 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,242 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,243 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,244 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,245 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,246 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,247 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,248 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,249 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,250 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,251 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,252 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,253 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,254 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,255 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,256 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,257 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,258 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,259 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,260 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,261 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,262 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,263 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,264 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,265 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,266 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,267 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,268 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,269 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,270 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,271 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,272 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,273 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,274 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,275 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,276 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,277 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,278 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,279 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,280 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,281 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,282 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,283 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,284 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,285 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,286 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,287 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,288 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,289 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,290 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,291 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,292 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,293 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,294 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,295 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,296 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,297 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,298 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,299 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,300 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,301 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,302 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,303 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,304 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,305 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,306 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,307 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,308 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,309 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,310 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,311 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,312 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,313 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,314 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,315 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,316 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,317 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,318 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,319 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,320 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,321 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,322 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,323 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,324 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,325 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,326 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,327 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,328 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,329 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,330 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,331 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,332 DEBUG SenderThread:253865 [sender.py:send():235] send: metric +2022-03-02 07:51:33,332 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:51:33,420 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:51:33,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:33,511 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:51:34,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:34,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:51:38,451 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:39,928 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:51:40,451 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:41,012 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:51:41,788 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:51:42,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/config.yaml +2022-03-02 07:51:42,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:46,494 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:48,495 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:50,496 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:54,469 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:51:54,523 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:51:54,529 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:54,609 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:51:55,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:51:56,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:51:56,926 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:51:56,927 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:51:58,525 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:00,525 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:04,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:06,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:08,528 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:10,318 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:52:11,970 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:52:11,971 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:52:12,529 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:14,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:15,582 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:52:15,634 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:52:15,720 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:52:16,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:16,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:52:18,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:23,533 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:25,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:27,103 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:52:27,103 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:52:27,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:31,536 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:33,536 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:35,537 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:36,559 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:52:36,611 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:52:36,698 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:52:37,538 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:37,538 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:52:39,538 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:40,774 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:52:41,539 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:42,312 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:52:42,312 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:52:43,540 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:45,540 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:47,541 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:52,543 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:54,543 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:56,544 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:57,473 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:52:57,473 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:52:57,648 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:52:57,705 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:52:57,791 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:52:58,545 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:52:58,545 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:52:59,545 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:00,546 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:02,546 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:04,547 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:06,548 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:10,549 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:11,154 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:53:12,516 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:53:12,516 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:53:12,550 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:14,551 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:16,551 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:18,464 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:53:18,518 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:53:18,599 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:18,601 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:53:19,599 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:19,600 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:53:21,600 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:23,601 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:25,601 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:27,569 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:53:27,569 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:53:27,602 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:29,603 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:33,604 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:35,605 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:37,605 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:39,215 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:53:39,287 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:53:39,369 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:53:39,606 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:39,606 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:53:41,528 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:53:41,606 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:42,852 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:53:42,852 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:53:43,607 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:45,608 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:47,608 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:52,610 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:54,611 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:56,611 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:58,184 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:53:58,184 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:53:58,612 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:53:59,812 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:53:59,873 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:53:59,959 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:54:00,613 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:00,613 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:54:02,613 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:04,614 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:06,615 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:08,615 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:11,900 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:54:12,617 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:13,419 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:54:13,419 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:54:14,617 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:16,618 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:18,619 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:20,168 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:54:20,222 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:54:20,318 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:54:20,619 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:20,620 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:54:22,620 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:24,621 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:26,621 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:28,796 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:54:28,796 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:54:29,622 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:31,623 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:35,624 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:37,625 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:39,626 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:40,365 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:54:40,418 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:54:40,502 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:54:40,626 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:54:41,626 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:42,276 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:54:42,627 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:43,627 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:43,922 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:54:43,922 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:54:47,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:49,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:51,630 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:53,631 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:56,632 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:54:59,018 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:54:59,019 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:55:00,428 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:55:00,481 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:55:00,587 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:55:00,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:00,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:55:02,634 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:04,635 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:06,635 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:08,636 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:12,638 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:12,763 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:55:14,107 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:55:14,107 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:55:14,639 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:16,639 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:18,640 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:20,496 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:55:20,550 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:55:20,637 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:55:20,641 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:55:22,641 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:24,642 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:27,643 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:29,158 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:55:29,159 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:55:29,644 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:31,644 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:33,645 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:37,646 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:39,647 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:40,729 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:55:40,896 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:55:40,977 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:55:41,648 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:41,648 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:55:42,648 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:43,134 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:55:44,249 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:55:44,250 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:55:45,649 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:47,650 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:49,651 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:51,651 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:55,653 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:57,654 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:55:59,311 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:55:59,311 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:55:59,654 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:00,481 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:56:00,534 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:56:00,620 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:56:00,654 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:56:01,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:03,656 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:08,657 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:10,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:12,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:13,485 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:56:14,357 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:56:14,359 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:56:14,659 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:16,660 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:18,661 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:19,941 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:56:19,995 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:56:20,081 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:56:20,661 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:56:21,662 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:22,662 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:24,663 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:26,663 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:28,664 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:29,407 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:56:29,408 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:56:30,665 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:32,665 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:34,666 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:38,667 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:39,270 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:56:39,325 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:56:39,412 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:56:39,668 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:56:40,668 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:41,668 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:42,669 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:43,911 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:56:44,544 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:56:44,545 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:56:45,670 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:49,671 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:51,672 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:53,672 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:55,673 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:57,674 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:58,596 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:56:58,650 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:56:58,736 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:56:59,735 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:56:59,735 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:56:59,787 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:56:59,788 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:57:03,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:05,737 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:07,738 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:09,739 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:11,739 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:13,740 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:14,279 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:57:14,902 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:57:14,902 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:57:15,741 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:17,632 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:57:17,685 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:57:17,772 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:57:18,771 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:57:19,771 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:21,772 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:23,773 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:25,773 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:27,774 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:30,035 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:57:30,037 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:57:30,775 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:34,776 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:36,355 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:57:36,411 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:57:36,515 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:57:36,777 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:36,777 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:57:38,778 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:40,778 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:42,779 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:44,636 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:57:44,780 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:45,253 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:57:45,255 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:57:46,781 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:48,781 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:50,782 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:54,784 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:55,052 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:57:55,104 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:57:55,188 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:57:55,784 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:57:56,784 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:57:58,785 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:00,338 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:58:00,340 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:58:00,786 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:03,787 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:05,787 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:07,788 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:09,789 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:13,454 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:58:13,505 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:58:13,584 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:58:13,790 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:13,791 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:58:14,791 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:15,053 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:58:15,551 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:58:15,552 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:58:16,791 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:18,792 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:22,793 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:24,794 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:26,795 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:28,796 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:30,602 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:58:30,603 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:58:30,796 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:31,394 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:58:31,446 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:58:31,530 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:58:31,797 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:58:32,797 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:34,798 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:36,798 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:38,799 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:40,800 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:45,410 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:58:45,678 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:58:45,679 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:58:45,801 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:47,802 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:49,045 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:58:49,120 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:58:49,203 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:58:49,803 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:49,803 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:58:50,803 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:51,803 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:55,805 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:58:57,805 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:00,807 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:00,825 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:59:00,826 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:59:02,807 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:04,808 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:06,368 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:59:06,421 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:59:06,529 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:59:06,809 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:06,809 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:59:07,809 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:08,810 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:10,810 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:12,811 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:14,812 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:15,768 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:59:15,940 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:59:15,941 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:59:16,812 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:18,813 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:20,813 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:22,814 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:23,380 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:59:23,433 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:59:23,515 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:59:23,815 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:59:24,815 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:25,815 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:26,816 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:28,816 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:30,817 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:30,994 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:59:30,996 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:59:32,818 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:34,818 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:36,819 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:39,820 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:40,471 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:59:40,660 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:59:40,743 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:59:40,820 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:59:41,821 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:43,821 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:45,822 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:46,040 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 07:59:46,041 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 07:59:46,122 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 07:59:47,823 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:49,824 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:51,825 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:53,825 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:55,826 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:56,306 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 07:59:56,358 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 07:59:56,441 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 07:59:56,826 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 07:59:57,827 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 07:59:59,827 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:01,098 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:00:01,100 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:00:01,828 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:03,829 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:05,829 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:07,830 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:09,831 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:11,498 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:00:11,554 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:00:11,645 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:00:11,831 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:11,832 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:00:13,832 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:15,833 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:16,151 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:00:16,152 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:00:16,527 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:00:17,834 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:19,834 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:21,835 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:23,836 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:25,836 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:25,845 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:00:25,899 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:00:25,982 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:00:26,837 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:00:27,837 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:30,838 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:31,230 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:00:31,231 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:00:32,839 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:34,839 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:36,840 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:38,841 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:39,290 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:00:39,346 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:00:39,434 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:00:39,841 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:00:40,842 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:41,842 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:42,843 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:44,843 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:46,328 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:00:46,329 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:00:46,844 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:46,879 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:00:48,845 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:50,846 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:51,721 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:00:51,776 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:00:51,864 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:00:52,863 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:52,863 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:00:53,863 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:54,864 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:56,864 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:00:58,865 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:00,866 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:01,379 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:01:01,380 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:01:02,867 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:03,138 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:03,191 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:03,277 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:03,867 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:03,868 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:04,868 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:06,868 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:08,869 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:10,870 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:12,870 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:13,415 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:13,468 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:13,558 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:13,871 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:14,871 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:15,871 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:16,435 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:01:16,437 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:01:16,872 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:17,318 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:01:18,872 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:20,873 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:22,687 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:22,741 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:22,829 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:22,874 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:23,874 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:25,875 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:27,876 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:29,876 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:30,782 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:30,836 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:30,921 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:31,485 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:01:31,486 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:01:31,920 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:31,920 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:33,921 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:35,921 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:37,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:37,939 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:37,994 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:38,083 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:38,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:39,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:40,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:41,924 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:43,924 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:44,146 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:44,199 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:44,283 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:44,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:45,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:46,528 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:01:46,530 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:01:46,926 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:47,740 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:01:47,926 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:49,779 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:01:49,965 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:01:49,965 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:50,092 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:01:50,965 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:01:51,965 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:52,966 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:53,966 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:57,967 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:01:59,968 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:01,572 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:02:01,573 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:02:03,970 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:05,970 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:10,972 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:12,973 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:13,787 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:02:13,856 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:02:13,938 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:02:13,973 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:02:14,973 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:16,617 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:02:16,618 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:02:16,974 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:18,457 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:02:18,975 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:20,975 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:24,977 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:26,977 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:30,979 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:31,722 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:02:31,723 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:02:32,979 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:36,981 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:37,163 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:02:37,214 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:02:37,297 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:02:37,981 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:02:38,981 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:40,982 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:44,984 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:46,984 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:47,009 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:02:47,010 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:02:48,843 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:02:50,986 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:52,986 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:55,987 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:02:59,989 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:00,249 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:03:00,303 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:03:00,388 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:03:00,989 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:00,990 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:03:01,990 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:02,113 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:03:02,114 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:03:05,991 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:07,992 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:09,993 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:13,994 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:15,995 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:17,180 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:03:17,181 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:03:19,226 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:03:19,996 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:21,997 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:23,056 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:03:23,109 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:03:23,191 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:03:23,998 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:23,998 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:03:25,998 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:27,999 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:30,000 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:32,000 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:32,363 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:03:32,364 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:03:37,002 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:39,003 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:43,004 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:45,005 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:45,859 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:03:45,921 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:03:46,005 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:03:46,006 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:03:47,006 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:47,697 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:03:47,699 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:03:48,006 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:49,659 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:03:51,007 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:53,008 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:55,009 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:03:59,010 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:01,011 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:02,745 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:04:02,746 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:04:04,012 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:08,013 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:08,504 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:04:08,558 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:04:08,640 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:04:09,013 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:04:10,014 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:14,015 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:16,016 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:18,015 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:04:18,016 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:04:18,019 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:20,007 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:04:22,020 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:24,021 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:26,022 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:30,023 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:30,776 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:04:30,850 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:04:30,934 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:04:31,023 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:04:32,024 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:33,024 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:33,131 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:04:33,132 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:04:35,025 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:39,026 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:41,027 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:43,028 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:47,029 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:48,344 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:04:48,344 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:04:49,030 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:50,376 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:04:53,031 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:53,273 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:04:53,327 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:04:53,411 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:04:54,032 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:04:55,032 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:04:57,033 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:01,034 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:03,035 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:03,431 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:05:03,432 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:05:05,036 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:09,038 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:11,038 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:13,039 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:15,444 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:05:15,500 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:05:15,583 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:05:16,040 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:05:17,041 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:18,041 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:18,571 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:05:18,572 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:05:20,042 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:20,777 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:05:22,043 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:26,044 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:28,045 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:30,046 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:33,817 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:05:33,818 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:05:34,047 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:36,048 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:37,621 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:05:37,665 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:05:37,750 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:05:38,048 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:05:39,049 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:40,049 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:42,050 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:44,050 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:48,052 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:48,983 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:05:48,984 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:05:50,053 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:51,129 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:05:52,054 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:57,055 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:59,056 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:05:59,620 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:05:59,671 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:05:59,760 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:06:00,057 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:06:01,057 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:02,057 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:04,147 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:06:04,149 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:06:05,058 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:07,059 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:09,060 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:13,061 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:15,062 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:17,063 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:19,382 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:06:19,383 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:06:21,064 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:21,409 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:06:21,462 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:06:21,548 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:06:21,585 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:06:22,065 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:06:23,065 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:24,065 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:26,066 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:28,067 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:32,068 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:34,069 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:34,632 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:06:34,634 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:06:38,070 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:40,071 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:42,072 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:43,826 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:06:44,005 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:06:44,092 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:06:44,093 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:06:46,092 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:48,093 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:49,897 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:06:49,899 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:06:50,094 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:52,215 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:06:54,095 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:06:56,096 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:00,098 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:02,098 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:04,099 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:05,150 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:07:05,150 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:07:05,467 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:07:05,523 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:07:05,610 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:07:06,100 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:06,100 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:07:07,100 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:11,101 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:13,102 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:15,103 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:17,104 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:20,218 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:07:20,220 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:07:21,105 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:22,560 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:07:23,106 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:25,107 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:26,911 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:07:26,961 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:07:27,049 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:07:27,107 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:07:29,108 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:31,109 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:33,109 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:35,110 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:35,446 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:07:35,447 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:07:40,112 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:42,112 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:44,113 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:48,114 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:48,156 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:07:48,209 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:07:48,296 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:07:49,115 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:07:50,115 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:50,691 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:07:50,693 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:07:51,115 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:52,116 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:52,976 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:07:56,117 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:07:58,118 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:00,119 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:02,119 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:05,739 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:08:05,739 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:08:06,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:08,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:09,291 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:08:09,343 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:08:09,430 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:08:10,122 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:10,122 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:08:11,123 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:13,123 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:17,125 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:19,125 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:20,844 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:08:20,846 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:08:21,126 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:23,327 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:08:25,127 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:27,128 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:29,129 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:30,235 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:08:30,288 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:08:30,371 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:08:31,129 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:31,129 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:08:35,131 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:36,011 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:08:36,012 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:08:37,131 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:39,132 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:43,133 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:45,134 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:47,135 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:49,135 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:51,093 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:08:51,148 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:08:51,234 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:08:51,388 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:08:51,390 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:08:52,154 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:08:53,688 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:08:54,155 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:56,155 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:08:58,156 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:00,157 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:04,158 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:06,159 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:06,644 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:09:06,645 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:09:08,160 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:10,161 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:11,700 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:09:11,755 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:09:11,843 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:09:12,161 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:09:14,162 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:16,163 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:18,163 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:20,164 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:21,866 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:09:21,867 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:09:24,045 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:09:24,165 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:26,166 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:29,167 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:31,168 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:32,198 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:09:32,252 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:09:32,337 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:09:33,168 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:33,169 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:09:34,169 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:35,169 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:36,920 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:09:36,922 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:09:37,170 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:39,171 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:41,172 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:43,172 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:45,173 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:50,175 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:51,983 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:09:51,983 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:09:52,175 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:52,509 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:09:52,553 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:09:52,638 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:09:53,176 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:09:54,176 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:09:54,397 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:09:56,177 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:00,178 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:02,179 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:04,180 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:06,180 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:07,126 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:10:07,128 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:10:08,181 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:12,182 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:12,687 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:10:12,741 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:10:12,828 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:10:13,183 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:10:14,183 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:16,184 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:19,185 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:22,174 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:10:22,177 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:10:23,187 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:24,773 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:10:25,188 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:27,188 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:29,189 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:32,910 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:10:32,967 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:10:33,051 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:10:33,191 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:33,191 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:10:34,191 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:35,191 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:37,192 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:37,225 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:10:37,227 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:10:39,193 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:43,195 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:45,195 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:47,196 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:49,197 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:51,197 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:52,278 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:10:52,278 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:10:53,402 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:10:53,569 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:10:53,653 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:10:54,198 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:10:55,194 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:10:55,199 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:57,200 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:10:59,200 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:04,202 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:06,203 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:07,333 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:11:07,334 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:11:08,203 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:10,204 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:12,205 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:13,078 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:11:13,133 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:11:13,221 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:11:14,220 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:14,220 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:11:15,221 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:16,221 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:20,222 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:22,223 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:22,384 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:11:22,385 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:11:24,224 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:25,588 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:11:26,224 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:30,226 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:32,226 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:32,698 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:11:32,753 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:11:32,844 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:11:33,227 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:11:34,227 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:35,228 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:36,228 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:37,433 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:11:37,434 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:11:38,229 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:43,231 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:45,231 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:47,232 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:49,233 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:51,234 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:52,171 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:11:52,224 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:11:52,306 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:11:52,649 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:11:52,651 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:11:53,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:53,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:11:55,963 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:11:57,307 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:11:59,307 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:01,308 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:03,309 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:05,309 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:07,793 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:12:07,794 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:12:09,311 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:11,225 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:12:11,277 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:12:11,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:11,360 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:12:12,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:12:13,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:14,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:15,360 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:17,360 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:19,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:21,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:22,860 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:12:22,861 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:12:23,362 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:26,349 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:12:28,364 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:30,014 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:12:30,069 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:12:30,155 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:12:30,364 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:30,365 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:12:32,365 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:34,366 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:36,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:37,952 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:12:37,953 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:12:38,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:40,368 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:44,369 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:46,370 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:48,371 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:48,752 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:12:48,806 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:12:48,892 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:12:49,371 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:12:50,371 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:52,372 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:53,065 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:12:53,067 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:12:54,373 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:56,373 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:12:56,709 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:13:01,375 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:03,376 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:05,377 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:07,223 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:13:07,271 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:13:07,356 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:13:07,378 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:07,378 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:13:08,158 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:13:08,159 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:13:08,378 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:09,378 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:11,379 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:13,380 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:15,381 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:17,381 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:21,383 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:23,312 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:13:23,313 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:13:23,383 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:25,384 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:25,503 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:13:25,557 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:13:25,645 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:13:26,385 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:26,385 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:13:27,097 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:13:27,385 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:29,386 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:31,386 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:33,387 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:35,388 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:37,388 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:38,444 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:13:38,445 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:13:40,389 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:42,390 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:43,349 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:13:43,404 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:13:43,491 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:13:44,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:44,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:13:45,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:46,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:50,417 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:52,417 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:53,527 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:13:53,528 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:13:54,418 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:56,418 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:13:57,457 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:13:58,419 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:00,420 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:00,883 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:14:00,937 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:14:01,022 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:14:01,420 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:14:02,420 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:03,421 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:04,421 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:06,422 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:08,423 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:08,569 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:14:08,570 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:14:10,423 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:12,424 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:15,425 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:17,426 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:18,029 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:14:18,084 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:14:18,172 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:14:18,426 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:14:19,427 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:21,427 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:23,428 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:23,626 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:14:23,627 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:14:25,428 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:27,429 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:27,839 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:14:29,430 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:31,431 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:33,431 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:34,630 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:14:34,686 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:14:34,771 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:14:35,432 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:35,432 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:14:36,432 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:38,433 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:38,672 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:14:38,674 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:14:40,434 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:42,434 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:44,435 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:46,436 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:48,436 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:50,437 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:51,638 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:14:51,853 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:14:51,938 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:14:52,438 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:52,438 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:14:53,848 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:14:53,850 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:14:54,438 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:56,439 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:14:58,236 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:14:58,440 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:00,440 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:02,441 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:04,442 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:06,443 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:07,254 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:15:07,307 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:15:07,417 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:15:07,443 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:15:08,443 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:09,021 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:15:09,022 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:15:10,444 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:12,445 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:14,445 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:16,446 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:18,447 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:20,447 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:22,274 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:15:22,328 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:15:22,416 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:15:22,448 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:15:23,448 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:24,172 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:15:24,174 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:15:24,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:25,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:27,450 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:28,620 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:15:29,450 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:31,451 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:33,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:35,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:36,175 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:15:36,231 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:15:36,320 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:15:36,453 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:15:37,453 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:38,453 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:39,379 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:15:39,380 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:15:39,454 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:41,455 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:43,455 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:45,456 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:47,457 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:49,343 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:15:49,397 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:15:49,482 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:49,484 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:15:50,482 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:50,483 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:15:51,483 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:53,483 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:54,510 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:15:54,511 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:15:55,484 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:57,485 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:15:59,009 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:15:59,485 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:01,445 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:01,501 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:01,532 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:01,590 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:02,522 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:02,523 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:03,523 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:05,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:07,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:09,525 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:09,592 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:16:09,593 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:16:11,526 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:12,581 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:12,636 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:12,726 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:13,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:13,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:15,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:18,528 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:20,529 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:22,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:22,667 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:22,721 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:22,808 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:23,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:24,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:24,650 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:16:24,652 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:16:25,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:26,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:28,532 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:29,408 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:16:30,532 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:31,576 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:31,651 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:31,738 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:32,533 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:32,533 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:33,533 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:34,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:36,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:38,535 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:39,513 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:39,574 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:39,665 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:39,767 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:16:39,769 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:16:40,572 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:40,572 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:41,572 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:42,573 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:44,573 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:46,486 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:46,543 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:46,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:46,631 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:47,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:47,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:48,629 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:50,630 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:52,422 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:52,471 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:52,564 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:52,631 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:52,631 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:53,631 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:54,631 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:54,870 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:16:54,871 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:16:56,632 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:58,066 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:16:58,239 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:16:58,323 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:16:58,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:58,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:16:59,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:16:59,993 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:17:00,633 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:02,634 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:06,635 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:08,636 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:09,934 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:17:09,935 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:17:12,638 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:14,638 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:18,640 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:20,640 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:22,105 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:17:22,159 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:17:22,240 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:17:22,641 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:22,641 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:17:23,641 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:24,987 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:17:24,988 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:17:25,642 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:27,643 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:29,643 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:30,361 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:17:31,644 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:35,645 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:39,647 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:40,359 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:17:40,360 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:17:41,647 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:43,648 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:45,319 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:17:45,370 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:17:45,453 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:17:45,649 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:17:47,649 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:48,650 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:49,650 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:51,651 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:55,620 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:17:55,620 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:17:56,652 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:17:58,653 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:00,738 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:18:02,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:04,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:06,656 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:08,138 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:18:08,189 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:18:08,272 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:18:08,657 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:18:10,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:10,665 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:18:10,666 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:18:12,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:14,659 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:18,660 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:20,661 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:22,662 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:25,834 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:18:25,834 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:18:27,664 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:29,664 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:30,784 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:18:30,843 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:18:30,927 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:18:31,108 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:18:31,665 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:18:32,665 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:33,666 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:35,667 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:37,667 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:40,920 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:18:40,922 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:18:41,669 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:43,669 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:45,670 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:49,672 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:51,672 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:53,263 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:18:53,319 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:18:53,405 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:18:53,673 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:53,673 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:18:54,673 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:55,674 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:55,978 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:18:55,979 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:18:57,674 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:18:59,675 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:01,527 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:19:04,677 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:06,678 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:08,678 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:11,210 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:19:11,211 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:19:12,680 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:14,680 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:15,768 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:19:15,818 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:19:15,898 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:19:16,681 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:19:17,681 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:18,682 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:20,682 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:22,683 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:26,326 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:19:26,327 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:19:26,685 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:28,685 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:30,686 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:31,883 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:19:32,687 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:36,688 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:38,090 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:19:38,142 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:19:38,224 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:19:38,689 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:19:39,689 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:41,596 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:19:41,597 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:19:43,690 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:45,691 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:47,692 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:51,693 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:53,694 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:55,695 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:19:56,680 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:19:56,680 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:19:59,696 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:00,391 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:20:00,444 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:20:00,527 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:20:00,697 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:20:01,697 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:02,266 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:20:05,699 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:07,699 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:09,700 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:11,701 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:11,833 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:20:11,834 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:20:16,703 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:18,704 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:22,455 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:20:22,507 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:20:22,588 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:20:22,705 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:22,705 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:20:24,706 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:26,707 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:26,991 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:20:26,992 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:20:30,708 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:32,709 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:32,739 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:20:36,710 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:38,711 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:40,712 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:42,206 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:20:42,206 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:20:42,712 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:44,497 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:20:44,549 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:20:44,631 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:20:44,713 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:20:46,714 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:48,715 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:50,716 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:54,717 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:56,718 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:20:57,365 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:20:57,367 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:20:58,719 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:00,719 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:03,082 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:21:04,720 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:06,029 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:21:06,082 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:21:06,165 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:21:06,721 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:06,722 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:21:07,722 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:11,723 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:12,438 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:21:12,439 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:21:13,724 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:15,725 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:19,726 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:21,727 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:23,727 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:27,527 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:21:27,527 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:21:27,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:27,780 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:21:27,834 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:21:27,939 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:21:28,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:21:29,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:31,730 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:33,517 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:21:35,731 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:37,732 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:39,733 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:42,739 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:21:42,740 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:21:43,734 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:45,735 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:49,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:50,084 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:21:50,266 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:21:50,351 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:21:50,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:21:51,737 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:53,737 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:57,739 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:21:57,824 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:21:57,825 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:21:59,739 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:02,741 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:03,920 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:22:06,742 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:08,743 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:10,743 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:11,647 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:22:11,703 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:22:11,794 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:22:12,793 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:12,793 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:22:12,946 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:22:12,948 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:22:13,793 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:16,794 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:18,795 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:20,796 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:24,797 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:26,798 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:28,138 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:22:28,139 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:22:28,799 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:30,799 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:32,938 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:22:32,992 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:22:33,079 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:22:33,800 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:33,801 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:22:34,314 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:22:34,801 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:36,801 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:39,802 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:43,215 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:22:43,217 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:22:43,804 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:45,805 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:47,805 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:49,806 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:53,808 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:54,093 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:22:54,148 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:22:54,238 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:22:54,808 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:22:55,808 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:56,809 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:57,809 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:22:58,265 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:22:58,267 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:23:01,810 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:03,811 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:04,683 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:23:05,812 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:08,813 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:12,814 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:13,480 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:23:13,480 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:23:14,815 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:15,248 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:23:15,300 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:23:15,384 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:23:15,816 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:23:16,816 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:20,817 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:22,818 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:24,819 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:26,820 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:28,753 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:23:28,754 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:23:28,821 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:32,822 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:34,823 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:35,030 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:23:36,040 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:23:36,095 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:23:36,184 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:23:36,823 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:36,824 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:23:41,825 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:43,826 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:43,926 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:23:43,928 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:23:45,826 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:47,827 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:51,829 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:53,830 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:55,830 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:56,815 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:23:56,872 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:23:56,956 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:23:57,858 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:57,858 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:23:58,858 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:23:59,228 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:23:59,230 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:23:59,859 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:01,859 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:03,860 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:05,476 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:24:05,861 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:09,862 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:11,863 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:13,863 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:14,276 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:24:14,277 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:24:15,864 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:17,448 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:24:17,501 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:24:17,585 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:24:17,865 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:17,865 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:24:19,866 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:21,866 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:23,867 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:25,868 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:28,869 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:29,323 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:24:29,325 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:24:32,870 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:34,871 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:35,829 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:24:36,871 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:37,962 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:24:38,018 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:24:38,104 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:24:38,872 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:38,872 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:24:39,872 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:42,874 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:44,409 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:24:44,411 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:24:44,875 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:46,875 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:48,876 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:52,878 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:54,879 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:56,879 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:58,238 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:24:58,294 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:24:58,381 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:24:58,880 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:24:58,880 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:24:59,468 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:24:59,470 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:24:59,881 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:02,882 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:04,882 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:06,229 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:25:06,883 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:09,884 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:11,885 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:14,528 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:25:14,528 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:25:15,886 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:17,887 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:18,187 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:25:18,243 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:25:18,328 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:25:18,887 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:25:19,887 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:21,888 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:23,889 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:27,890 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:29,580 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:25:29,581 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:25:29,891 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:31,892 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:33,892 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:36,576 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:25:38,156 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:25:38,210 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:25:38,293 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:25:38,894 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:38,894 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:25:39,894 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:40,895 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:42,895 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:44,625 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:25:44,626 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:25:44,896 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:46,897 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:50,898 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:52,899 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:54,899 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:56,900 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:25:58,596 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:25:58,770 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:25:58,894 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:25:58,901 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:25:59,704 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:25:59,704 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:25:59,901 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:00,902 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:02,902 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:04,903 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:06,904 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:07,018 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:26:10,905 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:12,906 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:14,757 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:26:14,758 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:26:14,907 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:16,907 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:18,087 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:26:18,141 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:26:18,224 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:26:18,908 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:18,908 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:26:20,909 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:25,910 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:27,911 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:29,808 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:26:29,809 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:26:29,912 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:31,912 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:33,913 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:35,914 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:37,359 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:26:37,461 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:26:37,518 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:26:37,607 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:26:37,915 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:26:38,915 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:39,915 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:41,916 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:43,917 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:44,900 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:26:44,901 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:26:45,918 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:47,918 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:51,920 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:53,921 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:55,921 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:56,759 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:26:56,815 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:26:56,903 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:26:56,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:26:57,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:58,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:26:59,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:00,107 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:27:00,109 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:27:02,924 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:04,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:07,779 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:27:08,926 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:10,927 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:12,927 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:14,928 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:15,197 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:27:15,197 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:27:15,785 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:27:15,842 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:27:15,927 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:27:15,928 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:27:16,929 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:18,930 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:22,931 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:24,932 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:26,932 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:28,933 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:30,253 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:27:30,254 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:27:30,934 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:32,935 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:34,726 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:27:34,779 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:27:34,864 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:27:34,935 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:27:35,936 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:38,220 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:27:39,937 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:41,938 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:43,939 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:45,464 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:27:45,465 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:27:45,939 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:47,940 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:49,941 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:51,941 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:53,335 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:27:53,388 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:27:53,472 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:27:53,942 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:53,942 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:27:55,943 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:27:58,944 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:00,673 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:28:00,675 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:28:00,944 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:02,945 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:04,946 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:06,946 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:08,565 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:28:08,947 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:10,948 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:11,764 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:28:11,820 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:28:11,908 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:28:11,948 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:28:12,948 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:13,949 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:14,949 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:15,806 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:28:15,807 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:28:16,950 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:20,951 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:22,952 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:24,952 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:26,953 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:28,954 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:30,037 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:28:30,113 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:28:30,198 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:28:30,954 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:30,955 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:28:30,989 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:28:30,990 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:28:31,955 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:32,955 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:36,956 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:38,926 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:28:39,957 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:41,958 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:43,959 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:46,080 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:28:46,080 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:28:47,675 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:28:47,730 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:28:47,816 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:28:47,960 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:47,960 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:28:49,961 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:51,961 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:53,962 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:55,963 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:57,963 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:28:59,964 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:01,279 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:29:01,281 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:29:01,965 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:03,965 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:05,232 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:29:05,284 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:29:05,371 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:29:05,966 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:05,966 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:29:06,966 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:09,271 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:29:10,968 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:12,968 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:14,969 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:16,445 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:29:16,447 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:29:16,970 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:18,971 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:20,971 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:22,349 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:29:22,400 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:29:22,483 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:29:22,972 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:22,972 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:29:24,973 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:27,974 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:29,975 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:31,602 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:29:31,603 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:29:31,975 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:33,976 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:35,977 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:37,977 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:38,969 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:29:39,023 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:29:39,107 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:29:39,643 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:29:40,023 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:40,024 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:29:41,024 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:42,024 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:44,025 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:46,025 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:46,691 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:29:46,692 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:29:48,026 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:51,027 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:53,028 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:55,029 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:55,787 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:29:55,959 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:29:56,048 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:29:57,046 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:29:57,046 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:29:59,047 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:01,048 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:01,752 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:30:01,753 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:30:03,048 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:05,049 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:07,049 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:09,050 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:10,236 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:30:11,051 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:11,265 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:30:11,318 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:30:11,402 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:30:12,051 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:30:13,052 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:15,053 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:16,907 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:30:16,908 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:30:17,053 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:19,054 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:21,055 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:23,056 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:25,056 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:26,015 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:30:26,071 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:30:26,160 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:30:27,072 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:27,072 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:30:29,072 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:31,073 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:32,088 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:30:32,089 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:30:33,074 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:35,074 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:38,075 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:39,984 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:30:40,036 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:30:40,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:40,122 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:30:40,594 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:30:41,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:41,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:30:42,121 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:44,122 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:46,123 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:47,214 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:30:47,215 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:30:48,123 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:50,124 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:52,125 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:52,984 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:30:53,039 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:30:53,127 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:30:54,126 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:54,126 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:30:55,126 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:56,126 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:30:58,127 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:00,128 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:02,128 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:02,350 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:31:02,352 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:31:04,129 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:05,075 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:05,128 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:05,215 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:06,155 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:06,155 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:07,156 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:08,156 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:10,157 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:10,938 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:31:12,157 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:14,158 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:16,159 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:16,173 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:16,225 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:16,310 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:17,159 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:17,159 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:17,587 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:31:17,589 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:31:18,159 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:20,160 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:22,161 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:24,161 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:26,057 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:26,110 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:26,192 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:26,194 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:27,192 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:28,193 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:30,193 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:32,194 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:32,772 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:31:32,773 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:31:34,194 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:34,852 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:34,910 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:34,997 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:35,195 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:36,195 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:39,196 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:41,197 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:41,310 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:31:42,777 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:42,863 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:42,948 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:43,197 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:43,198 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:44,198 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:45,198 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:47,199 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:47,864 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:31:47,866 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:31:49,200 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:49,688 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:49,742 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:49,827 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:50,200 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:51,201 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:52,201 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:53,202 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:55,202 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:55,602 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:31:55,656 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:31:55,743 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:31:56,203 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:31:57,203 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:58,203 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:31:59,204 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:01,161 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:32:01,315 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:01,328 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:32:01,410 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:32:02,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:02,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:32:02,910 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:32:02,911 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:32:03,305 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:07,307 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:09,307 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:11,752 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:32:13,309 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:15,310 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:17,311 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:17,986 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:32:17,986 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:32:21,312 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:23,313 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:25,352 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:32:25,404 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:32:25,488 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:32:26,314 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:26,314 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:32:27,314 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:29,315 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:33,035 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:32:33,036 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:32:34,317 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:36,317 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:38,318 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:42,105 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:32:42,320 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:44,320 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:46,321 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:48,088 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:32:48,088 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:32:48,539 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:32:48,591 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:32:48,674 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:32:49,322 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:32:50,322 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:54,324 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:56,324 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:32:58,325 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:02,327 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:03,230 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:33:03,232 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:33:04,327 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:07,328 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:11,329 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:11,478 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:33:11,532 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:33:11,614 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:33:12,330 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:33:12,493 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:33:13,330 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:15,331 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:18,400 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:33:18,401 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:33:19,332 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:21,333 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:25,335 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:27,335 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:29,336 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:33,337 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:33,453 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:33:33,453 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:33:34,126 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:33:34,181 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:33:34,263 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:33:34,338 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:33:35,338 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:37,339 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:41,340 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:42,885 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:33:43,341 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:48,342 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:48,620 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:33:48,621 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:33:50,343 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:52,344 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:56,345 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:56,578 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:33:56,631 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:33:56,712 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:33:57,345 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:33:57,346 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:33:58,346 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:02,347 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:04,201 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:34:04,202 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:34:04,348 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:06,349 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:08,349 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:12,351 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:13,236 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:34:14,351 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:18,352 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:18,958 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:34:19,012 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:34:19,095 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:34:19,353 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:19,353 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:34:19,448 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:34:19,450 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:34:20,353 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:22,354 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:26,355 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:29,356 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:31,357 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:34,613 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:34:34,614 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:34:35,358 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:37,359 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:39,360 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:41,133 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:34:41,186 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:34:41,268 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:34:41,360 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:34:42,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:43,361 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:43,621 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:34:45,362 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:47,362 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:49,720 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:34:49,721 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:34:51,364 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:53,364 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:34:55,365 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:00,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:02,367 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:03,228 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:35:03,282 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:35:03,363 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:35:03,368 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:35:04,368 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:04,962 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:35:04,964 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:35:06,369 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:08,369 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:10,370 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:12,371 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:14,001 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:35:16,372 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:18,373 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:20,053 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:35:20,053 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:35:20,374 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:24,375 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:25,134 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:35:25,187 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:35:25,268 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:35:25,375 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:35:26,376 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:27,376 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:29,377 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:33,378 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:35,209 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:35:35,210 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:35:35,379 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:37,379 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:41,381 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:43,381 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:44,351 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:35:45,382 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:46,889 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:35:46,941 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:35:47,023 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:35:47,382 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:35:48,383 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:49,383 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:50,271 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:35:50,273 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:35:51,384 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:53,384 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:35:55,385 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:00,387 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:02,388 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:04,389 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:05,518 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:36:05,518 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:36:08,346 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:36:08,399 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:36:08,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:08,480 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:36:09,405 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:09,406 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:36:10,406 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:12,406 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:14,776 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:36:16,408 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:18,409 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:20,409 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:20,651 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:36:20,653 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:36:22,410 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:26,411 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:28,412 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:29,774 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:36:29,828 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:36:29,911 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:36:30,413 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:30,413 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:36:31,413 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:34,414 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:35,741 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:36:35,742 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:36:36,415 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:39,416 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:43,417 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:45,126 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:36:45,418 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:49,419 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:50,901 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:36:50,901 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:36:51,420 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:51,837 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:36:52,052 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:36:52,134 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:36:52,420 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:36:53,421 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:55,421 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:36:59,423 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:01,423 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:03,424 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:05,425 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:06,040 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:37:06,041 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:37:09,426 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:11,427 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:12,877 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:37:12,934 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:37:13,019 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:37:13,427 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:13,428 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:37:15,573 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:37:18,429 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:20,430 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:21,274 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:37:21,275 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:37:22,431 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:24,431 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:28,433 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:30,433 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:32,434 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:33,943 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:37:33,996 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:37:34,110 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:37:34,435 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:37:36,435 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:37:36,436 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:37:36,438 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:38,439 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:40,440 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:42,441 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:45,964 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:37:46,442 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:48,443 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:50,443 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:51,478 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:37:51,479 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:37:52,444 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:54,793 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:37:54,848 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:37:54,932 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:37:55,445 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:37:56,445 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:57,445 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:37:59,446 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:01,447 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:05,448 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:06,526 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:38:06,528 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:38:07,449 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:09,450 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:11,450 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:15,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:15,526 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:38:15,579 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:38:15,664 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:38:16,324 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:38:16,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:16,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:38:17,452 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:19,453 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:21,453 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:21,591 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:38:21,592 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:38:25,455 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:27,455 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:29,456 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:31,457 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:36,057 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:38:36,110 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:38:36,194 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:38:36,458 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:36,458 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:38:36,721 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:38:36,722 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:38:38,459 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:40,460 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:42,460 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:46,462 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:46,676 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:38:48,463 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:50,463 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:51,888 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:38:51,888 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:38:52,464 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:56,465 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:56,541 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:38:56,596 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:38:56,681 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:38:57,466 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:38:58,466 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:38:59,466 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:00,467 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:02,468 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:06,469 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:07,178 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:39:07,179 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:39:08,470 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:10,470 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:12,471 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:16,472 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:16,890 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:39:16,950 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:39:17,035 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:39:17,071 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:39:17,473 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:39:18,473 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:19,473 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:20,474 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:22,444 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:39:22,445 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:39:23,475 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:27,476 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:29,477 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:31,477 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:33,478 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:35,479 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:37,100 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:39:37,155 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:39:37,241 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:39:37,479 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:39:37,683 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:39:37,684 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:39:39,480 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:41,481 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:43,481 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:45,482 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:47,413 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:39:49,483 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:51,484 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:52,973 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:39:52,974 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:39:53,485 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:55,485 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:57,168 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:39:57,222 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:39:57,305 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:39:57,486 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:39:58,487 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:39:59,487 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:00,487 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:02,488 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:04,489 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:06,489 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:08,088 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:40:08,089 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:40:08,490 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:12,491 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:14,492 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:16,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:17,004 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:40:17,061 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:40:17,146 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:40:17,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:17,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:40:17,780 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:40:18,493 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:20,494 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:23,226 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:40:23,227 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:40:24,495 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:26,496 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:28,497 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:30,497 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:33,499 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:36,870 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:40:36,924 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:40:37,009 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:40:37,500 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:37,500 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:40:38,276 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:40:38,277 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:40:38,500 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:39,501 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:41,501 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:43,502 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:45,503 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:47,503 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:48,152 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:40:51,505 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:53,317 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:40:53,318 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:40:53,506 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:55,506 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:57,234 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:40:57,407 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:40:57,495 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:40:57,507 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:57,507 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:40:58,507 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:40:59,508 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:01,509 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:03,509 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:05,510 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:08,361 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:41:08,362 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:41:08,511 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:10,511 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:14,512 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:16,513 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:16,637 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:41:16,691 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:41:16,773 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:41:17,514 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:41:18,514 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:18,550 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:41:20,516 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:23,407 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:41:23,408 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:41:24,517 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:26,518 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:28,518 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:30,519 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:32,520 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:34,520 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:35,955 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:41:36,009 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:41:36,091 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:41:36,521 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:36,521 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:41:38,654 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:41:38,655 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:41:41,523 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:43,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:45,524 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:47,525 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:48,908 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:41:51,526 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:53,527 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:53,766 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:41:53,766 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:41:55,150 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:41:55,204 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:41:55,309 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:41:55,528 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:55,528 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:41:57,528 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:41:59,529 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:01,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:03,530 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:05,531 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:08,935 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:42:08,937 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:42:09,532 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:11,533 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:13,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:14,178 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:42:14,233 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:42:14,332 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:42:14,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:42:15,534 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:17,535 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:19,328 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:42:19,536 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:22,537 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:24,175 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:42:24,176 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:42:26,538 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:28,539 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:30,539 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:32,540 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:33,076 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:42:33,130 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:42:33,214 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:42:33,540 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:42:34,541 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:35,541 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:36,541 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:39,454 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:42:39,455 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:42:40,543 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:42,543 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:44,544 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:46,545 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:48,545 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:49,669 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:42:50,546 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:51,893 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:42:51,947 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:42:52,031 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:42:52,547 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:52,547 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:42:53,547 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:54,609 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:42:54,610 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:42:55,548 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:42:59,549 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:01,550 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:03,550 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:05,551 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:07,552 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:09,552 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:09,750 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:43:09,750 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:43:10,426 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:43:10,482 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:43:10,569 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:43:11,568 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:11,568 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:43:12,568 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:13,569 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:15,569 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:17,570 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:20,088 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:43:22,572 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:24,572 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:24,873 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:43:24,874 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:43:26,573 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:28,513 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:43:28,568 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:43:28,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:28,656 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:43:29,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:43:30,655 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:32,656 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:34,656 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:38,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:40,056 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:43:40,057 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:43:40,658 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:42,659 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:44,660 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:46,575 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:43:46,629 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:43:46,716 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:46,718 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:43:47,717 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:43:48,717 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:50,476 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:43:50,718 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:52,719 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:54,720 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:55,193 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:43:55,194 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:43:56,720 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:43:58,721 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:00,722 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:02,723 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:04,281 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:44:04,333 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:44:04,418 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:44:04,724 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:44:06,724 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:09,725 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:10,238 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:44:10,239 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:44:11,726 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:15,727 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:17,728 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:19,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:20,823 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:44:21,577 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:44:21,631 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:44:21,714 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:44:21,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:21,729 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:44:22,730 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:23,730 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:25,284 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:44:25,285 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:44:25,731 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:27,731 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:29,732 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:31,733 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:33,733 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:35,734 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:37,735 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:38,491 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:44:38,545 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:44:38,629 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:44:38,735 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:38,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:44:39,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:40,330 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:44:40,331 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:44:41,736 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:43,737 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:45,738 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:50,739 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:51,193 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:44:52,740 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:54,741 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:55,371 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:44:55,371 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:44:55,685 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:44:55,850 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:44:55,931 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:44:56,828 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:44:56,828 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:44:58,828 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:00,829 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:02,830 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:04,831 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:06,831 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:08,832 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:10,444 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:45:10,445 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:45:10,833 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:11,665 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:45:11,717 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:45:11,803 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:45:11,833 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:45:12,833 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:14,834 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:16,834 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:18,835 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:20,836 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:21,539 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:45:22,836 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:24,837 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:25,706 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:45:25,707 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:45:26,838 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:27,118 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:45:27,173 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:45:27,255 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:45:27,838 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:45:28,838 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:30,839 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:32,840 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:34,840 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:37,841 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:39,842 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:40,930 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:45:40,931 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:45:41,765 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:45:41,819 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:45:41,904 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:41,905 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:45:42,904 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:42,905 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:45:43,905 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:45,905 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:47,906 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:49,907 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:51,908 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:52,048 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:45:53,908 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:55,223 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:45:55,273 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:45:55,352 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:45:55,909 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:55,910 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:45:55,988 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:45:55,989 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:45:57,910 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:45:59,911 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:01,912 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:03,912 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:05,913 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:07,659 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:07,712 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:07,800 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:07,914 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:07,914 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:09,914 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:11,072 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:46:11,073 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:46:11,915 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:13,916 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:15,917 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:17,917 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:18,994 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:19,048 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:19,129 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:19,918 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:19,918 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:22,420 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:46:22,919 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:24,920 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:26,187 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:46:26,189 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:46:26,921 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:28,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:29,205 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:29,258 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:29,344 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:29,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:29,922 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:30,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:32,923 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:34,924 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:36,924 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:38,440 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:38,496 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:38,582 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:38,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:38,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:39,925 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:40,926 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:41,236 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:46:41,237 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:46:42,927 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:44,927 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:46,667 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:46,723 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:46,808 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:46,928 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:46,928 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:47,928 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:48,929 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:50,929 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:52,907 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:46:52,930 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:53,870 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:53,923 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:54,005 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:46:55,004 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:55,004 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:46:56,004 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:56,295 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:46:56,296 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:46:57,005 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:59,005 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:46:59,845 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:46:59,901 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:46:59,986 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:47:00,006 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:47:01,006 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:02,006 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:03,007 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:05,007 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:05,514 DEBUG SenderThread:253865 [sender.py:send():235] send: history +2022-03-02 08:47:05,681 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:47:05,796 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:47:06,008 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:47:07,008 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:08,008 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:09,009 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:09,373 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:09,374 DEBUG SenderThread:253865 [sender.py:send():235] send: telemetry +2022-03-02 08:47:09,374 DEBUG SenderThread:253865 [sender.py:send():235] send: exit +2022-03-02 08:47:09,374 INFO SenderThread:253865 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 08:47:09,374 INFO SenderThread:253865 [sender.py:send_exit():373] handling runtime: 3631 +2022-03-02 08:47:09,428 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:47:09,428 INFO SenderThread:253865 [sender.py:send_exit():379] send defer +2022-03-02 08:47:09,428 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:09,429 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:09,429 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 08:47:09,429 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:09,429 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 08:47:09,429 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 1 +2022-03-02 08:47:09,429 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:09,429 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 08:47:09,455 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:09,455 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 08:47:09,455 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 2 +2022-03-02 08:47:09,456 DEBUG SenderThread:253865 [sender.py:send():235] send: stats +2022-03-02 08:47:09,456 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:09,456 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 08:47:09,456 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:09,456 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 08:47:09,456 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 3 +2022-03-02 08:47:09,457 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:09,457 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 08:47:09,512 DEBUG SenderThread:253865 [sender.py:send():235] send: summary +2022-03-02 08:47:09,530 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:09,598 INFO SenderThread:253865 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:47:09,599 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:09,599 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 08:47:09,599 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 4 +2022-03-02 08:47:09,599 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:09,599 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:09,599 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 08:47:09,600 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:09,600 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 08:47:09,700 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:10,075 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:10,081 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:47:10,413 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 5 +2022-03-02 08:47:10,413 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:10,414 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:10,414 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 08:47:10,414 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:10,414 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 08:47:10,414 INFO SenderThread:253865 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 08:47:10,515 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,066 INFO Thread-8 :253865 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/config.yaml +2022-03-02 08:47:11,066 INFO SenderThread:253865 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files +2022-03-02 08:47:11,066 INFO SenderThread:253865 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-metadata.json wandb-metadata.json +2022-03-02 08:47:11,066 INFO SenderThread:253865 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log output.log +2022-03-02 08:47:11,067 INFO SenderThread:253865 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json wandb-summary.json +2022-03-02 08:47:11,070 INFO SenderThread:253865 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/requirements.txt requirements.txt +2022-03-02 08:47:11,072 INFO SenderThread:253865 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/config.yaml config.yaml +2022-03-02 08:47:11,072 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 6 +2022-03-02 08:47:11,073 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,073 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:11,073 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 08:47:11,074 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:11,074 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 08:47:11,074 INFO SenderThread:253865 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:47:11,177 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,177 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,279 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,279 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,358 INFO Thread-14 :253865 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/requirements.txt +2022-03-02 08:47:11,373 INFO Thread-12 :253865 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/output.log +2022-03-02 08:47:11,380 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,381 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,392 INFO Thread-15 :253865 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/config.yaml +2022-03-02 08:47:11,454 INFO Thread-13 :253865 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/files/wandb-summary.json +2022-03-02 08:47:11,482 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,482 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,583 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:11,583 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:11,654 INFO Thread-7 :253865 [sender.py:transition_state():392] send defer: 7 +2022-03-02 08:47:11,655 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:11,655 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 08:47:11,655 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:11,655 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 08:47:11,685 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:13,406 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 8 +2022-03-02 08:47:13,407 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:13,407 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:13,407 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 08:47:13,407 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:13,408 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 08:47:13,408 INFO SenderThread:253865 [sender.py:transition_state():392] send defer: 9 +2022-03-02 08:47:13,408 DEBUG SenderThread:253865 [sender.py:send():235] send: final +2022-03-02 08:47:13,409 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:47:13,410 INFO HandlerThread:253865 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 08:47:13,410 DEBUG SenderThread:253865 [sender.py:send():235] send: footer +2022-03-02 08:47:13,410 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: defer +2022-03-02 08:47:13,410 INFO SenderThread:253865 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 08:47:13,508 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:47:13,508 DEBUG SenderThread:253865 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:47:13,509 INFO SenderThread:253865 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:47:13,567 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 08:47:13,661 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 08:47:13,664 DEBUG HandlerThread:253865 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 08:47:13,664 INFO HandlerThread:253865 [handler.py:finish():739] shutting down handler +2022-03-02 08:47:14,410 INFO WriterThread:253865 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb +2022-03-02 08:47:14,567 INFO SenderThread:253865 [sender.py:finish():1075] shutting down sender +2022-03-02 08:47:14,567 INFO SenderThread:253865 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:47:14,567 INFO SenderThread:253865 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:47:14,573 INFO MainThread:253865 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_074637-35y19oi2/logs/debug.log b/wandb/run-20220302_074637-35y19oi2/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..a47edff54a774c33685e4d5c2968396f38b152b4 --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/logs/debug.log @@ -0,0 +1,125 @@ +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/logs/debug.log +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_074637-35y19oi2/logs/debug-internal.log +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_init.py:init():420] calling init triggers +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 07:46:37,115 INFO MainThread:253766 [wandb_init.py:init():471] starting backend +2022-03-02 07:46:37,115 INFO MainThread:253766 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 07:46:37,171 INFO MainThread:253766 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 07:46:37,226 INFO MainThread:253766 [backend.py:ensure_launched():224] started backend process with pid: 253865 +2022-03-02 07:46:37,228 INFO MainThread:253766 [wandb_init.py:init():480] backend started and connected +2022-03-02 07:46:37,239 INFO MainThread:253766 [wandb_init.py:init():550] updated telemetry +2022-03-02 07:46:37,370 INFO MainThread:253766 [wandb_init.py:init():581] communicating current version +2022-03-02 07:46:38,111 INFO MainThread:253766 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 07:46:38,112 INFO MainThread:253766 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 07:46:38,273 INFO MainThread:253766 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 07:46:38,384 INFO MainThread:253766 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 07:46:38,385 INFO MainThread:253766 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 07:46:38,385 INFO MainThread:253766 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 07:46:38,387 INFO MainThread:253766 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 07:46:38,387 INFO MainThread:253766 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 07:46:38,390 INFO MainThread:253766 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 14, 'per_device_eval_batch_size': 14, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_07-45-55_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 14, 'eval_batch_size': 14} +2022-03-02 07:46:38,393 INFO MainThread:253766 [wandb_watch.py:watch():43] Watching +2022-03-02 08:47:07,035 INFO MainThread:253766 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 08:47:07,036 INFO MainThread:253766 [wandb_run.py:_restore():1769] restore +2022-03-02 08:47:09,429 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:47:09,599 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:47:10,414 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:47:11,074 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 3 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 1985562 +} + +2022-03-02 08:47:11,178 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2286373 +} + +2022-03-02 08:47:11,280 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} + +2022-03-02 08:47:11,381 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} + +2022-03-02 08:47:11,483 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} + +2022-03-02 08:47:11,584 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} + +2022-03-02 08:47:13,407 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} + +2022-03-02 08:47:13,567 INFO MainThread:253766 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 2286373 + total_bytes: 2286373 +} +local_info { +} + +2022-03-02 08:47:14,706 INFO MainThread:253766 [wandb_run.py:_append_history():2144] rendering history +2022-03-02 08:47:14,706 INFO MainThread:253766 [wandb_run.py:_append_summary():2102] rendering summary +2022-03-02 08:47:14,708 INFO MainThread:253766 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb b/wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bdb56d55b78e26d05d51efca7ba0b6d027f34c87 --- /dev/null +++ b/wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5216f35956d3c59ea78d23e16d6b60fe893790ba50a61224a8a63df7d9957bd +size 26782320 diff --git a/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml b/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e83dd9eef765742c0c556bcc2a88d64c792d2fb7 --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646210913 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_08-47-50_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_084833-2m6jtwtj/files/output.log b/wandb/run-20220302_084833-2m6jtwtj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0a077dc3bdbb5e4dfcd411603ef891f3375f9367 --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/892 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.63 GiB (GPU 0; 15.78 GiB total capacity; 10.07 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt b/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_084833-2m6jtwtj/files/wandb-metadata.json b/wandb/run-20220302_084833-2m6jtwtj/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5c2264471d80d30c4817533311bbebb4dd8d9b2d --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T08:48:34.227833", + "startedAt": "2022-03-02T08:48:33.147557", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json b/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220302_084833-2m6jtwtj/logs/debug-internal.log b/wandb/run-20220302_084833-2m6jtwtj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8991c32be93e51933fce359db9ccda24f1a20555 --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/logs/debug-internal.log @@ -0,0 +1,137 @@ +2022-03-02 08:48:34,041 INFO MainThread:254137 [internal.py:wandb_internal():89] W&B internal server running at pid: 254137, started at: 2022-03-02 08:48:34.041581 +2022-03-02 08:48:34,043 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 08:48:34,044 INFO WriterThread:254137 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/run-2m6jtwtj.wandb +2022-03-02 08:48:34,045 DEBUG SenderThread:254137 [sender.py:send():235] send: header +2022-03-02 08:48:34,046 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: check_version +2022-03-02 08:48:34,116 DEBUG SenderThread:254137 [sender.py:send():235] send: run +2022-03-02 08:48:34,222 INFO SenderThread:254137 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files +2022-03-02 08:48:34,222 INFO SenderThread:254137 [sender.py:_start_run_threads():809] run started: 2m6jtwtj with start time 1646210913 +2022-03-02 08:48:34,222 DEBUG SenderThread:254137 [sender.py:send():235] send: summary +2022-03-02 08:48:34,222 INFO SenderThread:254137 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:48:34,223 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 08:48:34,227 DEBUG HandlerThread:254137 [meta.py:__init__():36] meta init +2022-03-02 08:48:34,227 DEBUG HandlerThread:254137 [meta.py:__init__():50] meta init done +2022-03-02 08:48:34,227 DEBUG HandlerThread:254137 [meta.py:probe():210] probe +2022-03-02 08:48:34,234 DEBUG HandlerThread:254137 [meta.py:_setup_git():200] setup git +2022-03-02 08:48:34,248 DEBUG HandlerThread:254137 [meta.py:_setup_git():207] setup git done +2022-03-02 08:48:34,248 DEBUG HandlerThread:254137 [meta.py:_save_pip():54] save pip +2022-03-02 08:48:34,249 DEBUG HandlerThread:254137 [meta.py:_save_pip():68] save pip done +2022-03-02 08:48:34,249 DEBUG HandlerThread:254137 [meta.py:probe():248] probe done +2022-03-02 08:48:34,326 DEBUG SenderThread:254137 [sender.py:send():235] send: files +2022-03-02 08:48:34,327 INFO SenderThread:254137 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 08:48:34,331 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:48:34,331 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:48:34,417 DEBUG SenderThread:254137 [sender.py:send():235] send: config +2022-03-02 08:48:34,418 DEBUG SenderThread:254137 [sender.py:send():235] send: metric +2022-03-02 08:48:34,418 DEBUG SenderThread:254137 [sender.py:send():235] send: metric +2022-03-02 08:48:34,418 WARNING SenderThread:254137 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 08:48:34,653 INFO Thread-11 :254137 [upload_job.py:push():137] Uploaded file /tmp/tmpvy5l0hy6wandb/q5wa9252-wandb-metadata.json +2022-03-02 08:48:35,224 INFO Thread-8 :254137 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt +2022-03-02 08:48:35,224 INFO Thread-8 :254137 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json +2022-03-02 08:48:35,224 INFO Thread-8 :254137 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log +2022-03-02 08:48:35,224 INFO Thread-8 :254137 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-metadata.json +2022-03-02 08:48:37,223 INFO Thread-8 :254137 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log +2022-03-02 08:48:41,224 INFO Thread-8 :254137 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log +2022-03-02 08:48:42,671 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:42,671 DEBUG SenderThread:254137 [sender.py:send():235] send: telemetry +2022-03-02 08:48:42,671 DEBUG SenderThread:254137 [sender.py:send():235] send: exit +2022-03-02 08:48:42,671 INFO SenderThread:254137 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 08:48:42,671 INFO SenderThread:254137 [sender.py:send_exit():373] handling runtime: 8 +2022-03-02 08:48:42,672 INFO SenderThread:254137 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:48:42,672 INFO SenderThread:254137 [sender.py:send_exit():379] send defer +2022-03-02 08:48:42,672 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:42,672 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,673 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 08:48:42,673 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,673 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 08:48:42,673 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 1 +2022-03-02 08:48:42,673 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,673 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 08:48:42,753 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,753 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 08:48:42,753 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 2 +2022-03-02 08:48:42,754 DEBUG SenderThread:254137 [sender.py:send():235] send: stats +2022-03-02 08:48:42,754 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,754 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 08:48:42,755 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,755 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 08:48:42,755 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 3 +2022-03-02 08:48:42,755 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,755 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 08:48:42,755 DEBUG SenderThread:254137 [sender.py:send():235] send: summary +2022-03-02 08:48:42,756 INFO SenderThread:254137 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:48:42,756 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,756 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 08:48:42,756 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 4 +2022-03-02 08:48:42,756 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,756 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 08:48:42,756 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,756 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 08:48:42,774 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:42,847 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 5 +2022-03-02 08:48:42,848 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:42,848 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:42,848 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 08:48:42,848 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:42,849 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 08:48:42,849 INFO SenderThread:254137 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 08:48:42,949 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,225 INFO Thread-8 :254137 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json +2022-03-02 08:48:43,226 INFO SenderThread:254137 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-metadata.json wandb-metadata.json +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log output.log +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json wandb-summary.json +2022-03-02 08:48:43,227 INFO SenderThread:254137 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt requirements.txt +2022-03-02 08:48:43,228 INFO SenderThread:254137 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml config.yaml +2022-03-02 08:48:43,230 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 6 +2022-03-02 08:48:43,231 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,231 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:43,231 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 08:48:43,234 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:43,234 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 08:48:43,234 INFO SenderThread:254137 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:48:43,339 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,339 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,440 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,441 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,497 INFO Thread-13 :254137 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/wandb-summary.json +2022-03-02 08:48:43,498 INFO Thread-14 :254137 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/requirements.txt +2022-03-02 08:48:43,502 INFO Thread-12 :254137 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/output.log +2022-03-02 08:48:43,533 INFO Thread-15 :254137 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/files/config.yaml +2022-03-02 08:48:43,542 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,542 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,644 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,644 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,733 INFO Thread-7 :254137 [sender.py:transition_state():392] send defer: 7 +2022-03-02 08:48:43,734 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:43,734 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 08:48:43,734 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:43,734 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 08:48:43,745 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:43,905 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 8 +2022-03-02 08:48:43,905 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:43,906 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:43,906 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 08:48:43,906 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:43,906 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 08:48:43,906 INFO SenderThread:254137 [sender.py:transition_state():392] send defer: 9 +2022-03-02 08:48:43,907 DEBUG SenderThread:254137 [sender.py:send():235] send: final +2022-03-02 08:48:43,907 DEBUG SenderThread:254137 [sender.py:send():235] send: footer +2022-03-02 08:48:43,907 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:48:43,907 INFO HandlerThread:254137 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 08:48:43,907 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: defer +2022-03-02 08:48:43,908 INFO SenderThread:254137 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 08:48:44,007 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:48:44,007 DEBUG SenderThread:254137 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:48:44,007 INFO SenderThread:254137 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:48:44,067 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 08:48:44,068 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 08:48:44,068 DEBUG HandlerThread:254137 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 08:48:44,068 INFO HandlerThread:254137 [handler.py:finish():739] shutting down handler +2022-03-02 08:48:44,907 INFO WriterThread:254137 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/run-2m6jtwtj.wandb +2022-03-02 08:48:45,066 INFO SenderThread:254137 [sender.py:finish():1075] shutting down sender +2022-03-02 08:48:45,066 INFO SenderThread:254137 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:48:45,066 INFO SenderThread:254137 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:48:45,068 INFO MainThread:254137 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_084833-2m6jtwtj/logs/debug.log b/wandb/run-20220302_084833-2m6jtwtj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0cd12adb99b373281c8e91b7cb272835616732b9 --- /dev/null +++ b/wandb/run-20220302_084833-2m6jtwtj/logs/debug.log @@ -0,0 +1,107 @@ +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/logs/debug.log +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_084833-2m6jtwtj/logs/debug-internal.log +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_init.py:init():420] calling init triggers +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 08:48:33,149 INFO MainThread:254038 [wandb_init.py:init():471] starting backend +2022-03-02 08:48:33,149 INFO MainThread:254038 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 08:48:33,204 INFO MainThread:254038 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 08:48:33,259 INFO MainThread:254038 [backend.py:ensure_launched():224] started backend process with pid: 254137 +2022-03-02 08:48:33,261 INFO MainThread:254038 [wandb_init.py:init():480] backend started and connected +2022-03-02 08:48:33,270 INFO MainThread:254038 [wandb_init.py:init():550] updated telemetry +2022-03-02 08:48:33,399 INFO MainThread:254038 [wandb_init.py:init():581] communicating current version +2022-03-02 08:48:34,114 INFO MainThread:254038 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 08:48:34,115 INFO MainThread:254038 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 08:48:34,222 INFO MainThread:254038 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 08:48:34,331 INFO MainThread:254038 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 08:48:34,331 INFO MainThread:254038 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 08:48:34,332 INFO MainThread:254038 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 08:48:34,333 INFO MainThread:254038 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 08:48:34,334 INFO MainThread:254038 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 08:48:34,337 INFO MainThread:254038 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_08-47-50_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-02 08:48:34,342 INFO MainThread:254038 [wandb_watch.py:watch():43] Watching +2022-03-02 08:48:40,411 INFO MainThread:254038 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 08:48:40,413 INFO MainThread:254038 [wandb_run.py:_restore():1769] restore +2022-03-02 08:48:42,673 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:48:42,848 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:48:43,238 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 8718 +} + +2022-03-02 08:48:43,340 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:48:43,441 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:48:43,543 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:48:43,644 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:48:43,906 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:48:44,066 INFO MainThread:254038 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-02 08:48:45,198 INFO MainThread:254038 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_084833-2m6jtwtj/run-2m6jtwtj.wandb b/wandb/run-20220302_084833-2m6jtwtj/run-2m6jtwtj.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bfb1515c604b4ca8c0e93ade7d4406b8df18b55e Binary files /dev/null and b/wandb/run-20220302_084833-2m6jtwtj/run-2m6jtwtj.wandb differ diff --git a/wandb/run-20220302_085000-3jbwcde6/files/config.yaml b/wandb/run-20220302_085000-3jbwcde6/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00d060d0aba3207a3f9c41c91763e7341d2d12f9 --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646211000 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 4 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_08-49-20_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_085000-3jbwcde6/files/output.log b/wandb/run-20220302_085000-3jbwcde6/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4b5d80ea673c020a6fab50cb87b4aeb552ad7e90 --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/446 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.64 GiB (GPU 0; 15.78 GiB total capacity; 10.11 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt b/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_085000-3jbwcde6/files/wandb-metadata.json b/wandb/run-20220302_085000-3jbwcde6/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..99ea5dc72e071e5902d254b5123abc8fee4443b6 --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T08:50:01.367126", + "startedAt": "2022-03-02T08:50:00.316296", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=4", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json b/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220302_085000-3jbwcde6/logs/debug-internal.log b/wandb/run-20220302_085000-3jbwcde6/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fe4ff5ca5ab950f152c095a9d1e7e316ab47d7de --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/logs/debug-internal.log @@ -0,0 +1,141 @@ +2022-03-02 08:50:01,203 INFO MainThread:254314 [internal.py:wandb_internal():89] W&B internal server running at pid: 254314, started at: 2022-03-02 08:50:01.203327 +2022-03-02 08:50:01,205 INFO WriterThread:254314 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/run-3jbwcde6.wandb +2022-03-02 08:50:01,206 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 08:50:01,207 DEBUG SenderThread:254314 [sender.py:send():235] send: header +2022-03-02 08:50:01,207 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: check_version +2022-03-02 08:50:01,273 DEBUG SenderThread:254314 [sender.py:send():235] send: run +2022-03-02 08:50:01,361 INFO SenderThread:254314 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files +2022-03-02 08:50:01,361 INFO SenderThread:254314 [sender.py:_start_run_threads():809] run started: 3jbwcde6 with start time 1646211000 +2022-03-02 08:50:01,361 DEBUG SenderThread:254314 [sender.py:send():235] send: summary +2022-03-02 08:50:01,361 INFO SenderThread:254314 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:50:01,362 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 08:50:01,366 DEBUG HandlerThread:254314 [meta.py:__init__():36] meta init +2022-03-02 08:50:01,367 DEBUG HandlerThread:254314 [meta.py:__init__():50] meta init done +2022-03-02 08:50:01,367 DEBUG HandlerThread:254314 [meta.py:probe():210] probe +2022-03-02 08:50:01,373 DEBUG HandlerThread:254314 [meta.py:_setup_git():200] setup git +2022-03-02 08:50:01,387 DEBUG HandlerThread:254314 [meta.py:_setup_git():207] setup git done +2022-03-02 08:50:01,388 DEBUG HandlerThread:254314 [meta.py:_save_pip():54] save pip +2022-03-02 08:50:01,388 DEBUG HandlerThread:254314 [meta.py:_save_pip():68] save pip done +2022-03-02 08:50:01,388 DEBUG HandlerThread:254314 [meta.py:probe():248] probe done +2022-03-02 08:50:01,468 DEBUG SenderThread:254314 [sender.py:send():235] send: files +2022-03-02 08:50:01,468 INFO SenderThread:254314 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 08:50:01,473 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:50:01,473 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:50:01,520 DEBUG SenderThread:254314 [sender.py:send():235] send: config +2022-03-02 08:50:01,521 DEBUG SenderThread:254314 [sender.py:send():235] send: metric +2022-03-02 08:50:01,522 DEBUG SenderThread:254314 [sender.py:send():235] send: metric +2022-03-02 08:50:01,522 WARNING SenderThread:254314 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 08:50:01,741 INFO Thread-11 :254314 [upload_job.py:push():137] Uploaded file /tmp/tmpfcch1qofwandb/3oinsdv0-wandb-metadata.json +2022-03-02 08:50:02,363 INFO Thread-8 :254314 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-metadata.json +2022-03-02 08:50:02,363 INFO Thread-8 :254314 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json +2022-03-02 08:50:02,363 INFO Thread-8 :254314 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log +2022-03-02 08:50:02,363 INFO Thread-8 :254314 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt +2022-03-02 08:50:04,362 INFO Thread-8 :254314 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log +2022-03-02 08:50:08,364 INFO Thread-8 :254314 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log +2022-03-02 08:50:09,886 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:09,886 DEBUG SenderThread:254314 [sender.py:send():235] send: telemetry +2022-03-02 08:50:09,887 DEBUG SenderThread:254314 [sender.py:send():235] send: exit +2022-03-02 08:50:09,887 INFO SenderThread:254314 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 08:50:09,887 INFO SenderThread:254314 [sender.py:send_exit():373] handling runtime: 8 +2022-03-02 08:50:09,887 INFO SenderThread:254314 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:50:09,887 INFO SenderThread:254314 [sender.py:send_exit():379] send defer +2022-03-02 08:50:09,888 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:09,888 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:09,888 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 08:50:09,888 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:09,888 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 08:50:09,889 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 1 +2022-03-02 08:50:09,889 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:09,889 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 08:50:09,992 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:09,992 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:09,992 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 08:50:09,993 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 2 +2022-03-02 08:50:09,993 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:09,993 DEBUG SenderThread:254314 [sender.py:send():235] send: stats +2022-03-02 08:50:09,994 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:09,994 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 08:50:09,994 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:09,995 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 08:50:09,995 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 3 +2022-03-02 08:50:09,995 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:09,995 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 08:50:09,995 DEBUG SenderThread:254314 [sender.py:send():235] send: summary +2022-03-02 08:50:09,995 INFO SenderThread:254314 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:50:09,996 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:09,996 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 08:50:09,996 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 4 +2022-03-02 08:50:09,996 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:09,996 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 08:50:09,996 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:09,996 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 08:50:10,100 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,126 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 5 +2022-03-02 08:50:10,126 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,127 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:10,127 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 08:50:10,127 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:10,127 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 08:50:10,127 INFO SenderThread:254314 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 08:50:10,228 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,364 INFO Thread-8 :254314 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json +2022-03-02 08:50:10,365 INFO SenderThread:254314 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log +2022-03-02 08:50:10,366 INFO SenderThread:254314 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/config.yaml +2022-03-02 08:50:10,366 INFO SenderThread:254314 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files +2022-03-02 08:50:10,366 INFO SenderThread:254314 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-metadata.json wandb-metadata.json +2022-03-02 08:50:10,366 INFO SenderThread:254314 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log output.log +2022-03-02 08:50:10,366 INFO SenderThread:254314 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json wandb-summary.json +2022-03-02 08:50:10,367 INFO SenderThread:254314 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt requirements.txt +2022-03-02 08:50:10,372 INFO SenderThread:254314 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/config.yaml config.yaml +2022-03-02 08:50:10,375 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 6 +2022-03-02 08:50:10,376 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,376 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:10,377 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 08:50:10,379 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:10,379 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 08:50:10,379 INFO SenderThread:254314 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:50:10,477 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,478 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,579 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,579 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,641 INFO Thread-12 :254314 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/output.log +2022-03-02 08:50:10,642 INFO Thread-14 :254314 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/requirements.txt +2022-03-02 08:50:10,656 INFO Thread-15 :254314 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/config.yaml +2022-03-02 08:50:10,681 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,681 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,700 INFO Thread-13 :254314 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/files/wandb-summary.json +2022-03-02 08:50:10,782 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,782 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,884 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:10,884 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:10,901 INFO Thread-7 :254314 [sender.py:transition_state():392] send defer: 7 +2022-03-02 08:50:10,901 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:10,901 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 08:50:10,902 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:10,902 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 08:50:10,985 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:11,046 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 8 +2022-03-02 08:50:11,047 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:11,047 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:11,047 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 08:50:11,047 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:11,047 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 08:50:11,047 INFO SenderThread:254314 [sender.py:transition_state():392] send defer: 9 +2022-03-02 08:50:11,048 DEBUG SenderThread:254314 [sender.py:send():235] send: final +2022-03-02 08:50:11,048 DEBUG SenderThread:254314 [sender.py:send():235] send: footer +2022-03-02 08:50:11,048 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:50:11,048 INFO HandlerThread:254314 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 08:50:11,049 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: defer +2022-03-02 08:50:11,049 INFO SenderThread:254314 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 08:50:11,148 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:50:11,148 DEBUG SenderThread:254314 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:50:11,148 INFO SenderThread:254314 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:50:11,233 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 08:50:11,233 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 08:50:11,234 DEBUG HandlerThread:254314 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 08:50:11,234 INFO HandlerThread:254314 [handler.py:finish():739] shutting down handler +2022-03-02 08:50:12,049 INFO WriterThread:254314 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/run-3jbwcde6.wandb +2022-03-02 08:50:12,232 INFO SenderThread:254314 [sender.py:finish():1075] shutting down sender +2022-03-02 08:50:12,232 INFO SenderThread:254314 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:50:12,232 INFO SenderThread:254314 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:50:12,234 INFO MainThread:254314 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_085000-3jbwcde6/logs/debug.log b/wandb/run-20220302_085000-3jbwcde6/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6f0ac56b3bacd36b29a083ebe7d9b250c0cef219 --- /dev/null +++ b/wandb/run-20220302_085000-3jbwcde6/logs/debug.log @@ -0,0 +1,123 @@ +2022-03-02 08:50:00,317 INFO MainThread:254214 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/logs/debug.log +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085000-3jbwcde6/logs/debug-internal.log +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_init.py:init():420] calling init triggers +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 08:50:00,318 INFO MainThread:254214 [wandb_init.py:init():471] starting backend +2022-03-02 08:50:00,318 INFO MainThread:254214 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 08:50:00,373 INFO MainThread:254214 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 08:50:00,427 INFO MainThread:254214 [backend.py:ensure_launched():224] started backend process with pid: 254314 +2022-03-02 08:50:00,429 INFO MainThread:254214 [wandb_init.py:init():480] backend started and connected +2022-03-02 08:50:00,439 INFO MainThread:254214 [wandb_init.py:init():550] updated telemetry +2022-03-02 08:50:00,568 INFO MainThread:254214 [wandb_init.py:init():581] communicating current version +2022-03-02 08:50:01,272 INFO MainThread:254214 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 08:50:01,272 INFO MainThread:254214 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 08:50:01,362 INFO MainThread:254214 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 08:50:01,472 INFO MainThread:254214 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 08:50:01,473 INFO MainThread:254214 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 08:50:01,473 INFO MainThread:254214 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 08:50:01,475 INFO MainThread:254214 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 08:50:01,475 INFO MainThread:254214 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 08:50:01,477 INFO MainThread:254214 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_08-49-20_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-02 08:50:01,480 INFO MainThread:254214 [wandb_watch.py:watch():43] Watching +2022-03-02 08:50:07,442 INFO MainThread:254214 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 08:50:07,444 INFO MainThread:254214 [wandb_run.py:_restore():1769] restore +2022-03-02 08:50:09,888 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:50:09,994 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:50:10,127 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:50:10,377 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 8718 +} + +2022-03-02 08:50:10,478 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:10,580 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:10,681 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:10,783 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:10,884 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:11,047 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:50:11,232 INFO MainThread:254214 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-02 08:50:12,375 INFO MainThread:254214 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_085000-3jbwcde6/run-3jbwcde6.wandb b/wandb/run-20220302_085000-3jbwcde6/run-3jbwcde6.wandb new file mode 100644 index 0000000000000000000000000000000000000000..01885a6481a3883c66c1b6aba11198cb357daae3 Binary files /dev/null and b/wandb/run-20220302_085000-3jbwcde6/run-3jbwcde6.wandb differ diff --git a/wandb/run-20220302_085127-8m3km5vl/files/config.yaml b/wandb/run-20220302_085127-8m3km5vl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3134b10f5564c25844b3b47534379c78ccab4bb --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/files/config.yaml @@ -0,0 +1,713 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + python_version: 3.9.5 + start_time: 1646211087 + t: + 1: + - 1 + - 5 + - 11 + 2: + - 1 + - 5 + - 11 + 3: + - 1 + - 7 + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 16 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0001 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_08-50-47_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 16 +per_device_train_batch_size: + desc: null + value: 16 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 16 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_085127-8m3km5vl/files/output.log b/wandb/run-20220302_085127-8m3km5vl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cd41d3f1b22cda18c0865f38c39dff62d7f0abed --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/files/output.log @@ -0,0 +1,37 @@ + + + 0%| | 0/223 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 539, in + main() + File "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", line 491, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1384, in train + tr_loss_step = self.training_step(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1959, in training_step + loss = self.compute_loss(model, inputs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1991, in compute_loss + outputs = model(**inputs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py", line 503, in forward + encoder_outputs = self.encoder( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1346, in forward + extract_features = self.feature_extractor(input_values) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 514, in forward + hidden_states = conv_layer(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/models/wav2vec2/modeling_wav2vec2.py", line 389, in forward + hidden_states = self.layer_norm(hidden_states) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl + return forward_call(*input, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/modules/normalization.py", line 189, in forward + return F.layer_norm( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/nn/functional.py", line 2347, in layer_norm + return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 10.12 GiB already allocated; 707.31 MiB free; 13.39 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file diff --git a/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt b/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_085127-8m3km5vl/files/wandb-metadata.json b/wandb/run-20220302_085127-8m3km5vl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..aa1c09b46fe65cf51365d1db83e1c40cc114771d --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T08:51:28.689226", + "startedAt": "2022-03-02T08:51:27.619370", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=8", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=1e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json b/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220302_085127-8m3km5vl/logs/debug-internal.log b/wandb/run-20220302_085127-8m3km5vl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2aa0f0afa3c7bd79aeb1112c22638f809e54d45b --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/logs/debug-internal.log @@ -0,0 +1,139 @@ +2022-03-02 08:51:28,518 INFO MainThread:254490 [internal.py:wandb_internal():89] W&B internal server running at pid: 254490, started at: 2022-03-02 08:51:28.518100 +2022-03-02 08:51:28,520 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 08:51:28,520 INFO WriterThread:254490 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/run-8m3km5vl.wandb +2022-03-02 08:51:28,522 DEBUG SenderThread:254490 [sender.py:send():235] send: header +2022-03-02 08:51:28,522 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: check_version +2022-03-02 08:51:28,588 DEBUG SenderThread:254490 [sender.py:send():235] send: run +2022-03-02 08:51:28,683 INFO SenderThread:254490 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files +2022-03-02 08:51:28,683 INFO SenderThread:254490 [sender.py:_start_run_threads():809] run started: 8m3km5vl with start time 1646211087 +2022-03-02 08:51:28,683 DEBUG SenderThread:254490 [sender.py:send():235] send: summary +2022-03-02 08:51:28,683 INFO SenderThread:254490 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:51:28,684 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 08:51:28,689 DEBUG HandlerThread:254490 [meta.py:__init__():36] meta init +2022-03-02 08:51:28,689 DEBUG HandlerThread:254490 [meta.py:__init__():50] meta init done +2022-03-02 08:51:28,689 DEBUG HandlerThread:254490 [meta.py:probe():210] probe +2022-03-02 08:51:28,695 DEBUG HandlerThread:254490 [meta.py:_setup_git():200] setup git +2022-03-02 08:51:28,710 DEBUG HandlerThread:254490 [meta.py:_setup_git():207] setup git done +2022-03-02 08:51:28,710 DEBUG HandlerThread:254490 [meta.py:_save_pip():54] save pip +2022-03-02 08:51:28,711 DEBUG HandlerThread:254490 [meta.py:_save_pip():68] save pip done +2022-03-02 08:51:28,711 DEBUG HandlerThread:254490 [meta.py:probe():248] probe done +2022-03-02 08:51:28,787 DEBUG SenderThread:254490 [sender.py:send():235] send: files +2022-03-02 08:51:28,787 INFO SenderThread:254490 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 08:51:28,792 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:51:28,792 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:51:28,825 DEBUG SenderThread:254490 [sender.py:send():235] send: config +2022-03-02 08:51:28,826 DEBUG SenderThread:254490 [sender.py:send():235] send: metric +2022-03-02 08:51:28,826 DEBUG SenderThread:254490 [sender.py:send():235] send: metric +2022-03-02 08:51:28,826 WARNING SenderThread:254490 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 08:51:29,051 INFO Thread-11 :254490 [upload_job.py:push():137] Uploaded file /tmp/tmp4hagpn04wandb/h47718pd-wandb-metadata.json +2022-03-02 08:51:29,685 INFO Thread-8 :254490 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-metadata.json +2022-03-02 08:51:29,685 INFO Thread-8 :254490 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json +2022-03-02 08:51:29,685 INFO Thread-8 :254490 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log +2022-03-02 08:51:29,685 INFO Thread-8 :254490 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt +2022-03-02 08:51:31,684 INFO Thread-8 :254490 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log +2022-03-02 08:51:35,685 INFO Thread-8 :254490 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log +2022-03-02 08:51:37,089 DEBUG SenderThread:254490 [sender.py:send():235] send: telemetry +2022-03-02 08:51:37,090 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:37,090 DEBUG SenderThread:254490 [sender.py:send():235] send: exit +2022-03-02 08:51:37,090 INFO SenderThread:254490 [sender.py:send_exit():371] handling exit code: 1 +2022-03-02 08:51:37,091 INFO SenderThread:254490 [sender.py:send_exit():373] handling runtime: 8 +2022-03-02 08:51:37,091 INFO SenderThread:254490 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:51:37,091 INFO SenderThread:254490 [sender.py:send_exit():379] send defer +2022-03-02 08:51:37,091 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:37,092 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,092 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 0 +2022-03-02 08:51:37,092 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,092 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 0 +2022-03-02 08:51:37,092 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 1 +2022-03-02 08:51:37,092 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,092 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 1 +2022-03-02 08:51:37,120 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,120 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 1 +2022-03-02 08:51:37,120 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 2 +2022-03-02 08:51:37,120 DEBUG SenderThread:254490 [sender.py:send():235] send: stats +2022-03-02 08:51:37,121 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,121 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 2 +2022-03-02 08:51:37,121 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,121 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 2 +2022-03-02 08:51:37,121 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 3 +2022-03-02 08:51:37,121 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,121 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 3 +2022-03-02 08:51:37,122 DEBUG SenderThread:254490 [sender.py:send():235] send: summary +2022-03-02 08:51:37,122 INFO SenderThread:254490 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:51:37,122 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,122 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 3 +2022-03-02 08:51:37,122 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 4 +2022-03-02 08:51:37,123 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,123 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 4 +2022-03-02 08:51:37,123 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,123 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 4 +2022-03-02 08:51:37,198 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:37,219 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 5 +2022-03-02 08:51:37,219 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:37,220 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,220 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 5 +2022-03-02 08:51:37,220 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,220 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 5 +2022-03-02 08:51:37,220 INFO SenderThread:254490 [dir_watcher.py:finish():283] shutting down directory watcher +2022-03-02 08:51:37,321 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:37,686 INFO Thread-8 :254490 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/config.yaml +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-metadata.json wandb-metadata.json +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log output.log +2022-03-02 08:51:37,687 INFO SenderThread:254490 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json wandb-summary.json +2022-03-02 08:51:37,691 INFO SenderThread:254490 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt requirements.txt +2022-03-02 08:51:37,691 INFO SenderThread:254490 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/config.yaml config.yaml +2022-03-02 08:51:37,694 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 6 +2022-03-02 08:51:37,694 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:37,700 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:37,700 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 6 +2022-03-02 08:51:37,701 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:37,701 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 6 +2022-03-02 08:51:37,701 INFO SenderThread:254490 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:51:37,803 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:37,803 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:37,905 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:37,905 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:37,949 INFO Thread-15 :254490 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/config.yaml +2022-03-02 08:51:37,966 INFO Thread-12 :254490 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/output.log +2022-03-02 08:51:37,975 INFO Thread-14 :254490 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/requirements.txt +2022-03-02 08:51:38,007 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:38,007 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:38,029 INFO Thread-13 :254490 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/files/wandb-summary.json +2022-03-02 08:51:38,108 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:38,109 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:38,210 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:38,210 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:38,229 INFO Thread-7 :254490 [sender.py:transition_state():392] send defer: 7 +2022-03-02 08:51:38,230 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:38,230 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 7 +2022-03-02 08:51:38,230 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:38,230 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 7 +2022-03-02 08:51:38,311 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:38,350 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 8 +2022-03-02 08:51:38,350 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:38,350 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:38,351 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 8 +2022-03-02 08:51:38,351 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:38,351 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 8 +2022-03-02 08:51:38,351 INFO SenderThread:254490 [sender.py:transition_state():392] send defer: 9 +2022-03-02 08:51:38,352 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: defer +2022-03-02 08:51:38,352 INFO HandlerThread:254490 [handler.py:handle_request_defer():154] handle defer: 9 +2022-03-02 08:51:38,352 DEBUG SenderThread:254490 [sender.py:send():235] send: final +2022-03-02 08:51:38,352 DEBUG SenderThread:254490 [sender.py:send():235] send: footer +2022-03-02 08:51:38,352 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: defer +2022-03-02 08:51:38,352 INFO SenderThread:254490 [sender.py:send_request_defer():388] handle sender defer: 9 +2022-03-02 08:51:38,451 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: poll_exit +2022-03-02 08:51:38,452 DEBUG SenderThread:254490 [sender.py:send_request():249] send_request: poll_exit +2022-03-02 08:51:38,452 INFO SenderThread:254490 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:51:38,532 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: get_summary +2022-03-02 08:51:38,533 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: sampled_history +2022-03-02 08:51:38,534 DEBUG HandlerThread:254490 [handler.py:handle_request():131] handle_request: shutdown +2022-03-02 08:51:38,534 INFO HandlerThread:254490 [handler.py:finish():739] shutting down handler +2022-03-02 08:51:39,352 INFO WriterThread:254490 [datastore.py:close():281] close: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/run-8m3km5vl.wandb +2022-03-02 08:51:39,532 INFO SenderThread:254490 [sender.py:finish():1075] shutting down sender +2022-03-02 08:51:39,532 INFO SenderThread:254490 [file_pusher.py:finish():177] shutting down file pusher +2022-03-02 08:51:39,532 INFO SenderThread:254490 [file_pusher.py:join():182] waiting for file pusher +2022-03-02 08:51:39,534 INFO MainThread:254490 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220302_085127-8m3km5vl/logs/debug.log b/wandb/run-20220302_085127-8m3km5vl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e206dc3c761960d0095b72218ae7e66fe5ad6d84 --- /dev/null +++ b/wandb/run-20220302_085127-8m3km5vl/logs/debug.log @@ -0,0 +1,115 @@ +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/logs/debug.log +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085127-8m3km5vl/logs/debug-internal.log +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_init.py:init():420] calling init triggers +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 08:51:27,621 INFO MainThread:254391 [wandb_init.py:init():471] starting backend +2022-03-02 08:51:27,621 INFO MainThread:254391 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 08:51:27,677 INFO MainThread:254391 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 08:51:27,732 INFO MainThread:254391 [backend.py:ensure_launched():224] started backend process with pid: 254490 +2022-03-02 08:51:27,734 INFO MainThread:254391 [wandb_init.py:init():480] backend started and connected +2022-03-02 08:51:27,745 INFO MainThread:254391 [wandb_init.py:init():550] updated telemetry +2022-03-02 08:51:27,876 INFO MainThread:254391 [wandb_init.py:init():581] communicating current version +2022-03-02 08:51:28,587 INFO MainThread:254391 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 08:51:28,587 INFO MainThread:254391 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 08:51:28,684 INFO MainThread:254391 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 08:51:28,791 INFO MainThread:254391 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 08:51:28,791 INFO MainThread:254391 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 08:51:28,792 INFO MainThread:254391 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 08:51:28,794 INFO MainThread:254391 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 08:51:28,794 INFO MainThread:254391 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 08:51:28,798 INFO MainThread:254391 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_08-50-47_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 16, 'eval_batch_size': 16} +2022-03-02 08:51:28,802 INFO MainThread:254391 [wandb_watch.py:watch():43] Watching +2022-03-02 08:51:34,841 INFO MainThread:254391 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-03-02 08:51:34,843 INFO MainThread:254391 [wandb_run.py:_restore():1769] restore +2022-03-02 08:51:37,092 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:51:37,220 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 1 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 2095 +} + +2022-03-02 08:51:37,702 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 4 +} +pusher_stats { + uploaded_bytes: 2095 + total_bytes: 8718 +} + +2022-03-02 08:51:37,804 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:37,906 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:38,008 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:38,109 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:38,211 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:38,350 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} + +2022-03-02 08:51:38,532 INFO MainThread:254391 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 +} +pusher_stats { + uploaded_bytes: 21066 + total_bytes: 21066 +} +local_info { +} + +2022-03-02 08:51:39,668 INFO MainThread:254391 [wandb_run.py:_append_files():2194] logging synced files diff --git a/wandb/run-20220302_085127-8m3km5vl/run-8m3km5vl.wandb b/wandb/run-20220302_085127-8m3km5vl/run-8m3km5vl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e141c3f014cce663b3cdd4cb1c8c73a5d5cf70ed Binary files /dev/null and b/wandb/run-20220302_085127-8m3km5vl/run-8m3km5vl.wandb differ diff --git a/wandb/run-20220302_085255-16llzpbl/files/config.yaml b/wandb/run-20220302_085255-16llzpbl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31e04deaca1ca29e71528794a554b7fe04bfab91 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/files/config.yaml @@ -0,0 +1,11335 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.17.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.ln_f\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.23\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.22\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.21\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.20\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.19\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.18\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.17\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.16\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.15\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.14\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.13\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.12\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.11\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.10\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.9\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.8\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.7\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.6\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.5\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.4\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.3\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.2\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.1\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.mlp\.c_fc\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.crossattention\.q_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_cross_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.attn\.c_attn\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.h\.0\.ln_1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wpe\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.transformer\.wte\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: eval/loss + 5: 1 + 6: + - 1 + - 1: eval/wer + 5: 1 + 6: + - 1 + - 1: eval/runtime + 5: 1 + 6: + - 1 + - 1: eval/samples_per_second + 5: 1 + 6: + - 1 + - 1: eval/steps_per_second + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1646211175 + t: + 1: + - 1 + - 5 + - 11 + 3: + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.17.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: gpt2-medium + activation_function: gelu_new + add_cross_attention: true + architectures: + - GPT2LMHeadModel + attn_pdrop: 0.0 + bad_words_ids: null + bos_token_id: 50256 + chunk_size_feed_forward: 0 + cross_attention_hidden_size: null + decoder_start_token_id: null + diversity_penalty: 0.0 + do_sample: false + early_stopping: false + embd_pdrop: 0.0 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 50256 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_epsilon: 1.0e-05 + length_penalty: 1.0 + max_length: 20 + min_length: 0 + model_type: gpt2 + n_ctx: 1024 + n_embd: 1024 + n_head: 16 + n_inner: null + n_layer: 24 + n_positions: 1024 + n_special: 0 + no_repeat_ngram_size: 0 + num_beam_groups: 1 + num_beams: 1 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: null + predict_special_tokens: true + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + reorder_and_upcast_attn: false + repetition_penalty: 1.0 + resid_pdrop: 0.0 + return_dict: true + return_dict_in_generate: false + scale_attn_by_inverse_layer_idx: false + scale_attn_weights: true + sep_token_id: null + summary_activation: null + summary_first_dropout: 0.0 + summary_proj_to_labels: true + summary_type: cls_index + summary_use_proj: true + task_specific_params: + text-generation: + do_sample: true + max_length: 50 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_cache: false + vocab_size: 50257 +decoder_start_token_id: + desc: null + value: 50256 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-large-lv60 + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: false + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.0 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.0 + hidden_dropout_prob: 0.0 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.0 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.17.0.dev0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 50256 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 8 +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 2 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: None +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hub_model_id: + desc: null + value: None +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +learning_rate: + desc: null + value: 0.0003 +length_column_name: + desc: null + value: input_length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: false +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/Mar02_08-52-14_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 50 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: None +min_length: + desc: null + value: 0 +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 1.0 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 50256 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 8 +per_device_train_batch_size: + desc: null + value: 8 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''wandb'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: 1 +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 8 +transformers_version: + desc: null + value: null +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220302_085255-16llzpbl/files/output.log b/wandb/run-20220302_085255-16llzpbl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..db538e036bbb34bc39667434bc965bb9cd4ef74a --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/files/output.log @@ -0,0 +1,1605 @@ + + + 0%| | 0/1784 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9191, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:00,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%| | 1/1784 [00:04<2:04:02, 4.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:02,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0705, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:04,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 2/1784 [00:07<1:55:56, 3.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:06,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1719, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:08,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 3/1784 [00:11<1:54:10, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:10,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7228, 'learning_rate': 6e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:11,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 4/1784 [00:15<1:52:53, 3.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:13,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8112, 'learning_rate': 1.2e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:15,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 5/1784 [00:19<1:51:12, 3.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:17,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.822, 'learning_rate': 1.8e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:19,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 6/1784 [00:22<1:50:07, 3.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:21,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:22,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6902, 'learning_rate': 2.4e-06, 'epoch': 0.0} + 0%|▎ | 7/1784 [00:26<1:49:02, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:24,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8359, 'learning_rate': 2.9999999999999997e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:26,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 8/1784 [00:29<1:48:05, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:28,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0293, 'learning_rate': 3.6e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:29,993 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 9/1784 [00:33<1:46:55, 3.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:31,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6929, 'learning_rate': 4.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:33,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 10/1784 [00:36<1:45:53, 3.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:35,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7256, 'learning_rate': 4.8e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:36,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 11/1784 [00:40<1:45:03, 3.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:38,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6387, 'learning_rate': 5.399999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:40,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 12/1784 [00:43<1:44:35, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:42,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:43,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 13/1784 [00:47<1:44:02, 3.52s/it] + 1%|▌ | 13/1784 [00:47<1:44:02, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:45,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:47,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 14/1784 [00:50<1:43:20, 3.50s/it] + + 1%|▌ | 14/1784 [00:50<1:43:20, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:49,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5333, 'learning_rate': 7.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:50,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 15/1784 [00:54<1:42:47, 3.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:52,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6495, 'learning_rate': 7.799999999999998e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:54,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 16/1784 [00:57<1:42:04, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:56,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:53:57,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6274, 'learning_rate': 8.4e-06, 'epoch': 0.01} + 1%|▊ | 17/1784 [01:01<1:41:04, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:53:59,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4705, 'learning_rate': 8.999999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:00,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 18/1784 [01:04<1:40:02, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:02,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4565, 'learning_rate': 9.6e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:04,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 19/1784 [01:07<1:39:26, 3.38s/it] + 1%|▊ | 19/1784 [01:07<1:39:26, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:06,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:07,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 20/1784 [01:11<1:38:49, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:09,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3876, 'learning_rate': 1.0799999999999998e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:10,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 21/1784 [01:14<1:38:35, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:12,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7851, 'learning_rate': 1.14e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:14,307 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 22/1784 [01:17<1:38:17, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:15,978 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5608, 'learning_rate': 1.1999999999999999e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:17,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 23/1784 [01:21<1:37:30, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:19,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3061, 'learning_rate': 1.26e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:20,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 24/1784 [01:24<1:36:40, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:22,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:24,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 25/1784 [01:27<1:35:53, 3.27s/it] + + 1%|█ | 25/1784 [01:27<1:35:53, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:25,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.591, 'learning_rate': 1.3799999999999998e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:27,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█▏ | 26/1784 [01:30<1:35:16, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:28,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:30,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 27/1784 [01:33<1:34:42, 3.23s/it] + + 2%|█▏ | 27/1784 [01:33<1:34:42, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:32,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1032, 'learning_rate': 1.4999999999999999e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:33,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 28/1784 [01:37<1:34:12, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:35,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:36,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 29/1784 [01:40<1:33:38, 3.20s/it] + + 2%|█▎ | 29/1784 [01:40<1:33:38, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:38,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3323, 'learning_rate': 1.6199999999999997e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:39,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 30/1784 [01:43<1:33:16, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:41,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2493, 'learning_rate': 1.68e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:43,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 31/1784 [01:46<1:32:46, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:44,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5836, 'learning_rate': 1.74e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:46,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 32/1784 [01:49<1:31:30, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:47,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2934, 'learning_rate': 1.7999999999999997e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:49,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 33/1784 [01:52<1:30:19, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:50,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1632, 'learning_rate': 1.8599999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:52,029 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 34/1784 [01:55<1:28:44, 3.04s/it] + 2%|█▌ | 34/1784 [01:55<1:28:44, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:53,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:54,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▌ | 35/1784 [01:58<1:27:05, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:56,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6831, 'learning_rate': 1.98e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:54:57,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 36/1784 [02:01<1:25:53, 2.95s/it] + 2%|█▌ | 36/1784 [02:01<1:25:53, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:54:59,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:00,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 37/1784 [02:03<1:24:11, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:01,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:03,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4446, 'learning_rate': 2.1e-05, 'epoch': 0.02} + 2%|█▋ | 38/1784 [02:06<1:22:56, 2.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:04,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5133, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:05,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 39/1784 [02:09<1:21:09, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:07,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.568, 'learning_rate': 2.2199999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:08,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 40/1784 [02:11<1:18:57, 2.72s/it] + 2%|█▊ | 40/1784 [02:11<1:18:57, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:09,757 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:10,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 41/1784 [02:14<1:16:50, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:12,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:13,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 42/1784 [02:16<1:14:29, 2.57s/it] + 2%|█▊ | 42/1784 [02:16<1:14:29, 2.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:14,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:15,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4335, 'learning_rate': 2.3999999999999997e-05, 'epoch': 0.02} + 2%|█▉ | 43/1784 [02:19<1:11:46, 2.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:16,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2563, 'learning_rate': 2.4599999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:17,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 44/1784 [02:21<1:08:24, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:18,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:19,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5112, 'learning_rate': 2.52e-05, 'epoch': 0.03} + 3%|█▉ | 45/1784 [02:23<1:04:30, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:20,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:21,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 46/1784 [02:24<59:50, 2.07s/it] +{'loss': 4.5207, 'learning_rate': 2.5799999999999997e-05, 'epoch': 0.03} + 3%|██ | 46/1784 [02:24<59:50, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:22,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:22,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 47/1784 [02:26<55:03, 1.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:23,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8714, 'learning_rate': 2.6999999999999996e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:24,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 48/1784 [02:27<50:23, 1.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:24,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:25,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 49/1784 [02:28<45:40, 1.58s/it] +{'loss': 4.8818, 'learning_rate': 2.7599999999999997e-05, 'epoch': 0.03} + 3%|██▏ | 49/1784 [02:28<45:40, 1.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:25,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:27,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 50/1784 [02:30<46:31, 1.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:29,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 50/1784 [02:30<46:31, 1.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:29,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:34<1:06:21, 2.30s/it]g-point operations will not be computed-02 08:55:29,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:34<1:06:21, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:32,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 51/1784 [02:34<1:06:21, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:32,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:38<1:18:28, 2.72s/it]g-point operations will not be computed-02 08:55:32,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:38<1:18:28, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:36,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 52/1784 [02:38<1:18:28, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:36,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 53/1784 [02:41<1:26:15, 2.99s/it]g-point operations will not be computed-02 08:55:36,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:55:40,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 08:55:41,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 08:55:40,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 54/1784 [02:45<1:31:36, 3.18s/it] + 3%|██▍ | 54/1784 [02:45<1:31:36, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:43,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 54/1784 [02:45<1:31:36, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:43,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 55/1784 [02:48<1:34:58, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:43,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 55/1784 [02:48<1:34:58, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:47,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 55/1784 [02:48<1:34:58, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:47,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 56/1784 [02:52<1:36:39, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:47,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 56/1784 [02:52<1:36:39, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:50,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 56/1784 [02:52<1:36:39, 3.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:50,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 57/1784 [02:55<1:38:34, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:50,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [02:59<1:39:24, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:54,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [02:59<1:39:24, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:54,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [02:59<1:39:24, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:57,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 58/1784 [02:59<1:39:24, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:57,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:02<1:39:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:55:57,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:02<1:39:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:01,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 59/1784 [03:02<1:39:33, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:01,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 60/1784 [03:06<1:40:01, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:01,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:09<1:39:23, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:04,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:09<1:39:23, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:04,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:09<1:39:23, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:08,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 61/1784 [03:09<1:39:23, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:08,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:13<1:38:46, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:08,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:13<1:38:46, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:11,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 62/1784 [03:13<1:38:46, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:11,602 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 63/1784 [03:16<1:38:16, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:15,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:20<1:37:51, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:15,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:20<1:37:51, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:15,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:20<1:37:51, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:18,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 64/1784 [03:20<1:37:51, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:18,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 65/1784 [03:23<1:37:31, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:18,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 66/1784 [03:26<1:37:07, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:21,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 66/1784 [03:26<1:37:07, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:21,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 66/1784 [03:26<1:37:07, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:25,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:30<1:36:48, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:25,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:30<1:36:48, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:25,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:30<1:36:48, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:28,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 67/1784 [03:30<1:36:48, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:28,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 68/1784 [03:33<1:35:45, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:31,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 69/1784 [03:36<1:35:12, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:31,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 69/1784 [03:36<1:35:12, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:31,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 69/1784 [03:36<1:35:12, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:35,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:40<1:34:54, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:35,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:40<1:34:54, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:35,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:40<1:34:54, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:38,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 70/1784 [03:40<1:34:54, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:38,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 71/1784 [03:43<1:34:48, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:41,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:46<1:34:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:41,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:46<1:34:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:41,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:46<1:34:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:44,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 72/1784 [03:46<1:34:22, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:44,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 73/1784 [03:49<1:33:54, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:44,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 74/1784 [03:53<1:33:19, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:48,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 74/1784 [03:53<1:33:19, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:48,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 74/1784 [03:53<1:33:19, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:51,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [03:56<1:32:50, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:51,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [03:56<1:32:50, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:51,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [03:56<1:32:50, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:54,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 75/1784 [03:56<1:32:50, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:54,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 76/1784 [03:59<1:31:22, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:57,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 77/1784 [04:02<1:30:12, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:57,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 77/1784 [04:02<1:30:12, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:56:57,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 77/1784 [04:02<1:30:12, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:00,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 77/1784 [04:02<1:30:12, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:00,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 78/1784 [04:05<1:29:28, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:03,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:08<1:28:38, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:03,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:08<1:28:38, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:03,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:08<1:28:38, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:06,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 79/1784 [04:08<1:28:38, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:06,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 80/1784 [04:11<1:27:49, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:09,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 80/1784 [04:11<1:27:49, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:09,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 81/1784 [04:14<1:27:01, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:09,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:17<1:25:42, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:12,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:17<1:25:42, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:12,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:17<1:25:42, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:15,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 82/1784 [04:17<1:25:42, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:15,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 83/1784 [04:20<1:24:58, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:15,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:23<1:23:41, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:18,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:23<1:23:41, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:18,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:23<1:23:41, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:21,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 84/1784 [04:23<1:23:41, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:21,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 85/1784 [04:26<1:22:27, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:24,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 85/1784 [04:26<1:22:27, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:24,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 86/1784 [04:29<1:21:20, 2.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:24,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:31<1:20:16, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:27,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:31<1:20:16, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:27,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:31<1:20:16, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:29,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 87/1784 [04:31<1:20:16, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:29,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 88/1784 [04:34<1:18:46, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:32,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 88/1784 [04:34<1:18:46, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:32,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 89/1784 [04:37<1:16:46, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:34,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 90/1784 [04:39<1:15:00, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:34,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 90/1784 [04:39<1:15:00, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:34,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 91/1784 [04:41<1:12:25, 2.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:37,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 91/1784 [04:41<1:12:25, 2.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:37,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 92/1784 [04:44<1:09:26, 2.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:39,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 92/1784 [04:44<1:09:26, 2.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:39,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 93/1784 [04:46<1:06:28, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:41,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 93/1784 [04:46<1:06:28, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:41,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 94/1784 [04:48<1:02:17, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:43,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 94/1784 [04:48<1:02:17, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:43,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 95/1784 [04:49<58:00, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 95/1784 [04:49<58:00, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:47,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6163, 'learning_rate': 5.519999999999999e-05, 'epoch': 0.05} + 5%|████▎ | 96/1784 [04:51<54:00, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:48,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 96/1784 [04:51<54:00, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:48,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 98/1784 [04:54<45:36, 1.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:50,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 98/1784 [04:54<45:36, 1.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:50,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 99/1784 [04:55<41:58, 1.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:52,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 99/1784 [04:55<41:58, 1.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:52,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7493, 'learning_rate': 5.76e-05, 'epoch': 0.06} + 6%|████▍ | 100/1784 [04:57<43:33, 1.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:52,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 101/1784 [05:00<1:02:21, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:55,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 101/1784 [05:00<1:02:21, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:55,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 101/1784 [05:00<1:02:21, 2.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:59,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:04<1:14:14, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:59,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:04<1:14:14, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:57:59,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 102/1784 [05:04<1:14:14, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:02,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:08<1:22:11, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:02,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:08<1:22:11, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:02,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 103/1784 [05:08<1:22:11, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:06,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 104/1784 [05:11<1:27:40, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:06,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 104/1784 [05:11<1:27:40, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:06,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:15<1:31:06, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:10,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:15<1:31:06, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:10,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 105/1784 [05:15<1:31:06, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:13,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 106/1784 [05:18<1:33:25, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:13,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 106/1784 [05:18<1:33:25, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:13,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 106/1784 [05:18<1:33:25, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:17,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:22<1:34:13, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:17,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:22<1:34:13, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:17,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:22<1:34:13, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:20,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 107/1784 [05:22<1:34:13, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:20,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 108/1784 [05:25<1:35:26, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:23,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 109/1784 [05:29<1:35:51, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:23,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 109/1784 [05:29<1:35:51, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:23,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 109/1784 [05:29<1:35:51, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:27,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:32<1:36:44, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:27,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:32<1:36:44, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:27,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 110/1784 [05:32<1:36:44, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:31,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:36<1:36:44, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:34,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 111/1784 [05:36<1:36:44, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:34,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:39<1:36:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:34,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:39<1:36:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:34,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 112/1784 [05:39<1:36:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:37,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 113/1784 [05:42<1:35:26, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:37,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 113/1784 [05:42<1:35:26, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:37,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 113/1784 [05:42<1:35:26, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:41,234 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 114/1784 [05:46<1:35:19, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:44,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 114/1784 [05:46<1:35:19, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:44,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 115/1784 [05:49<1:34:17, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:44,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 115/1784 [05:49<1:34:17, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:44,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 115/1784 [05:49<1:34:17, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:47,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [05:52<1:33:01, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:47,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [05:52<1:33:01, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:47,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [05:52<1:33:01, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:51,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 116/1784 [05:52<1:33:01, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:51,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████ | 117/1784 [05:56<1:32:35, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:54,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [05:59<1:31:36, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:54,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [05:59<1:31:36, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:54,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [05:59<1:31:36, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:57,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 118/1784 [05:59<1:31:36, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:58:57,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 119/1784 [06:02<1:31:27, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:00,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:05<1:30:45, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:00,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:05<1:30:45, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:00,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 120/1784 [06:05<1:30:45, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:04,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 121/1784 [06:09<1:30:23, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:04,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 121/1784 [06:09<1:30:23, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:04,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:12<1:30:26, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:07,397 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:12<1:30:26, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:07,397 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 122/1784 [06:12<1:30:26, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:10,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 123/1784 [06:15<1:29:55, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:10,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 123/1784 [06:15<1:29:55, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:10,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:18<1:30:04, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:13,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:18<1:30:04, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:13,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 124/1784 [06:18<1:30:04, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:17,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:22<1:29:07, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:17,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:22<1:29:07, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:17,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:22<1:29:07, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:20,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 125/1784 [06:22<1:29:07, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:20,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 126/1784 [06:25<1:28:26, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:23,358 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:28<1:27:44, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:23,358 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:28<1:27:44, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:23,358 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:28<1:27:44, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:26,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 127/1784 [06:28<1:27:44, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:26,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 128/1784 [06:31<1:27:00, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:26,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:34<1:26:25, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:29,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:34<1:26:25, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:29,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:34<1:26:25, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:32,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 129/1784 [06:34<1:26:25, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:32,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 130/1784 [06:37<1:25:21, 3.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:32,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:40<1:24:30, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:35,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:40<1:24:30, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:35,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 131/1784 [06:40<1:24:30, 3.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:38,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 132/1784 [06:43<1:23:49, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:41,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 132/1784 [06:43<1:23:49, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:41,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:46<1:23:01, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:41,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 133/1784 [06:46<1:23:01, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:41,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▊ | 134/1784 [06:49<1:21:19, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:44,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▊ | 134/1784 [06:49<1:21:19, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:44,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▊ | 134/1784 [06:49<1:21:19, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:47,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 135/1784 [06:52<1:20:35, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:47,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 135/1784 [06:52<1:20:35, 2.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:47,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [06:54<1:19:30, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:50,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [06:54<1:19:30, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:50,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [06:54<1:19:30, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:52,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 136/1784 [06:54<1:19:30, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:52,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 137/1784 [06:57<1:18:04, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:55,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:00<1:16:49, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:58,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 138/1784 [07:00<1:16:49, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:58,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 139/1784 [07:02<1:14:58, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:58,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 139/1784 [07:02<1:14:58, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 08:59:58,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 140/1784 [07:05<1:12:57, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:00,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 140/1784 [07:05<1:12:57, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:00,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 141/1784 [07:07<1:10:44, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:03,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 141/1784 [07:07<1:10:44, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:03,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 141/1784 [07:07<1:10:44, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:05,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 141/1784 [07:07<1:10:44, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:05,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 142/1784 [07:10<1:08:00, 2.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:07,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 142/1784 [07:10<1:08:00, 2.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:07,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 143/1784 [07:12<1:05:15, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:09,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 143/1784 [07:12<1:05:15, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:09,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 145/1784 [07:16<58:35, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:11,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 145/1784 [07:16<58:35, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:11,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 146/1784 [07:17<54:54, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:13,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 146/1784 [07:17<54:54, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:13,604 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 147/1784 [07:19<50:36, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:16,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 147/1784 [07:19<50:36, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:16,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 149/1784 [07:21<42:31, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:17,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 149/1784 [07:21<42:31, 1.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:17,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6928, 'learning_rate': 8.699999999999999e-05, 'epoch': 0.08} + 8%|██████▋ | 150/1784 [07:23<43:00, 1.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:19,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 150/1784 [07:23<43:00, 1.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:19,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 150/1784 [07:23<43:00, 1.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:22,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:27<1:01:01, 2.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:22,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:27<1:01:01, 2.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:22,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 151/1784 [07:27<1:01:01, 2.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:25,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:30<1:12:12, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:25,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:30<1:12:12, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:25,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 152/1784 [07:30<1:12:12, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:29,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 153/1784 [07:34<1:20:09, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:29,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 153/1784 [07:34<1:20:09, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:29,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 154/1784 [07:38<1:25:12, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:32,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 154/1784 [07:38<1:25:12, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:32,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 154/1784 [07:38<1:25:12, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:36,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:41<1:28:35, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:36,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:41<1:28:35, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:36,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 155/1784 [07:41<1:28:35, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:40,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:45<1:30:50, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:40,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:45<1:30:50, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:40,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:45<1:30:50, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:43,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 156/1784 [07:45<1:30:50, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:43,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 157/1784 [07:48<1:32:26, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:47,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 157/1784 [07:48<1:32:26, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:47,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 158/1784 [07:52<1:33:25, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:47,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 158/1784 [07:52<1:33:25, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:50,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 158/1784 [07:52<1:33:25, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:50,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 159/1784 [07:55<1:33:49, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:50,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 159/1784 [07:55<1:33:49, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:54,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 159/1784 [07:55<1:33:49, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:54,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 160/1784 [07:59<1:33:48, 3.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:54,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:02<1:33:42, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:57,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:02<1:33:42, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:00:57,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:02<1:33:42, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:01,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 161/1784 [08:02<1:33:42, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:01,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 162/1784 [08:06<1:33:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:01,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 162/1784 [08:06<1:33:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:04,465 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 162/1784 [08:06<1:33:12, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:04,465 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 163/1784 [08:09<1:32:31, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 163/1784 [08:09<1:32:31, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:12<1:31:54, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:12<1:31:54, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 164/1784 [08:12<1:31:54, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 165/1784 [08:16<1:31:44, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 165/1784 [08:16<1:31:44, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 165/1784 [08:16<1:31:44, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:07,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 166/1784 [08:19<1:31:16, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 166/1784 [08:19<1:31:16, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:22<1:30:42, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:22<1:30:42, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 167/1784 [08:22<1:30:42, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 168/1784 [08:26<1:30:05, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 168/1784 [08:26<1:30:05, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 168/1784 [08:26<1:30:05, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:17,885 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 169/1784 [08:29<1:29:28, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 169/1784 [08:29<1:29:28, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 170/1784 [08:32<1:29:22, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 170/1784 [08:32<1:29:22, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 170/1784 [08:32<1:29:22, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 171/1784 [08:36<1:28:46, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:01:35,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:01:35,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6458, 'learning_rate': 0.0001014, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:01:35,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 173/1784 [08:42<1:28:18, 3.29s/it]g-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 173/1784 [08:42<1:28:18, 3.29s/it]g-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 173/1784 [08:42<1:28:18, 3.29s/it]g-point operations will not be computed-02 09:01:27,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 174/1784 [08:45<1:27:26, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:44,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 174/1784 [08:45<1:27:26, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:44,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 175/1784 [08:49<1:26:51, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:44,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 175/1784 [08:49<1:26:51, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:44,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 175/1784 [08:49<1:26:51, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:44,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 176/1784 [08:52<1:25:39, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:50,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 176/1784 [08:52<1:25:39, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:50,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 177/1784 [08:55<1:25:18, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:50,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 177/1784 [08:55<1:25:18, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:50,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 177/1784 [08:55<1:25:18, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:50,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 178/1784 [08:58<1:24:33, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:56,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 178/1784 [08:58<1:24:33, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:56,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 179/1784 [09:01<1:23:28, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:56,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 179/1784 [09:01<1:23:28, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:56,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 179/1784 [09:01<1:23:28, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:01:56,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 180/1784 [09:04<1:23:16, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:02,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 180/1784 [09:04<1:23:16, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:02,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:07<1:22:50, 3.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:02,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:07<1:22:50, 3.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:02,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 181/1784 [09:07<1:22:50, 3.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:02,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 182/1784 [09:10<1:21:46, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:08,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 182/1784 [09:10<1:21:46, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:08,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:13<1:20:54, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:08,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:13<1:20:54, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:08,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 183/1784 [09:13<1:20:54, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:08,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 184/1784 [09:16<1:19:15, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:14,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 184/1784 [09:16<1:19:15, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:14,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 185/1784 [09:19<1:18:19, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 185/1784 [09:19<1:18:19, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 186/1784 [09:22<1:17:09, 2.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:21,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:21,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3936, 'learning_rate': 0.00011039999999999999, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:21,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 188/1784 [09:27<1:15:16, 2.83s/it]g-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 188/1784 [09:27<1:15:16, 2.83s/it]g-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:26,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:29,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:29,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6147, 'learning_rate': 0.00011219999999999999, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:02:29,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 191/1784 [09:35<1:09:29, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:32,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 191/1784 [09:35<1:09:29, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:32,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 192/1784 [09:37<1:06:39, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:35,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 192/1784 [09:37<1:06:39, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:35,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 193/1784 [09:39<1:03:15, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:37,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 193/1784 [09:39<1:03:15, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:37,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 194/1784 [09:41<1:00:09, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:39,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 194/1784 [09:41<1:00:09, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:39,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 195/1784 [09:43<57:11, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:40,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 195/1784 [09:43<57:11, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:40,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.776, 'learning_rate': 0.0001158, 'epoch': 0.11} + 11%|████████▊ | 197/1784 [09:46<49:50, 1.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:42,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 197/1784 [09:46<49:50, 1.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:42,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 198/1784 [09:48<45:43, 1.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:45,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 198/1784 [09:48<45:43, 1.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:45,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 199/1784 [09:49<41:30, 1.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:46,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 199/1784 [09:49<41:30, 1.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:46,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 200/1784 [09:50<42:05, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:49,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 200/1784 [09:50<42:05, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:49,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 201/1784 [09:54<59:45, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:49,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 201/1784 [09:54<59:45, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 201/1784 [09:54<59:45, 2.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [09:58<1:10:55, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [09:58<1:10:55, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 202/1784 [09:58<1:10:55, 2.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 203/1784 [10:02<1:18:08, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 203/1784 [10:02<1:18:08, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 203/1784 [10:02<1:18:08, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 204/1784 [10:05<1:23:16, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:03:05,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:03:05,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4065, 'learning_rate': 0.00012119999999999999, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:03:05,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 206/1784 [10:12<1:27:38, 3.33s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 206/1784 [10:12<1:27:38, 3.33s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 206/1784 [10:12<1:27:38, 3.33s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 207/1784 [10:16<1:28:49, 3.38s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 207/1784 [10:16<1:28:49, 3.38s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 207/1784 [10:16<1:28:49, 3.38s/it]g-point operations will not be computed-02 09:02:53,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 208/1784 [10:19<1:29:45, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 208/1784 [10:19<1:29:45, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:23<1:29:56, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:23<1:29:56, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 209/1784 [10:23<1:29:56, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 210/1784 [10:26<1:29:55, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 210/1784 [10:26<1:29:55, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 210/1784 [10:26<1:29:55, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:17,965 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 211/1784 [10:29<1:30:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 211/1784 [10:29<1:30:03, 3.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 212/1784 [10:33<1:29:32, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 212/1784 [10:33<1:29:32, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 212/1784 [10:33<1:29:32, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 213/1784 [10:36<1:29:21, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:03:36,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:03:36,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3386, 'learning_rate': 0.0001266, 'epoch': 0.12} + 12%|█████████▍ | 215/1784 [10:43<1:28:14, 3.37s/it]g-point operations will not be computed-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 215/1784 [10:43<1:28:14, 3.37s/it]g-point operations will not be computed-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0399, 'learning_rate': 0.00012719999999999997, 'epoch': 0.12} + 12%|█████████▍ | 215/1784 [10:43<1:28:14, 3.37s/it]g-point operations will not be computed-02 09:03:28,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 216/1784 [10:46<1:27:34, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:44,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 216/1784 [10:46<1:27:34, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:44,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 217/1784 [10:50<1:27:22, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:48,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 217/1784 [10:50<1:27:22, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:48,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [10:53<1:27:08, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:48,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [10:53<1:27:08, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:48,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 218/1784 [10:53<1:27:08, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:48,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 219/1784 [10:56<1:26:19, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:54,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 220/1784 [10:59<1:25:37, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:54,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 220/1784 [10:59<1:25:37, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:54,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9465, 'learning_rate': 0.0001302, 'epoch': 0.12} + 12%|█████████▌ | 220/1784 [10:59<1:25:37, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:03:54,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 221/1784 [11:03<1:24:59, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 222/1784 [11:06<1:24:51, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 222/1784 [11:06<1:24:51, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0395, 'learning_rate': 0.0001314, 'epoch': 0.12} + 12%|█████████▋ | 222/1784 [11:06<1:24:51, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 223/1784 [11:09<1:24:31, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:09,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:09,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5197, 'learning_rate': 0.0001326, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:09,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 225/1784 [11:15<1:23:19, 3.21s/it]g-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 225/1784 [11:15<1:23:19, 3.21s/it]g-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▊ | 225/1784 [11:15<1:23:19, 3.21s/it]g-point operations will not be computed-02 09:04:01,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 226/1784 [11:19<1:22:52, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:17,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 227/1784 [11:22<1:21:57, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:17,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 227/1784 [11:22<1:21:57, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:17,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6542, 'learning_rate': 0.0001344, 'epoch': 0.13} + 13%|█████████▉ | 227/1784 [11:22<1:21:57, 3.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:17,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 228/1784 [11:25<1:21:20, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:23,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 228/1784 [11:25<1:21:20, 3.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:23,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 229/1784 [11:28<1:20:36, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:23,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 229/1784 [11:28<1:20:36, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:23,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 229/1784 [11:28<1:20:36, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:23,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 230/1784 [11:31<1:19:45, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 230/1784 [11:31<1:19:45, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 231/1784 [11:34<1:19:03, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 231/1784 [11:34<1:19:03, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 231/1784 [11:34<1:19:03, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 232/1784 [11:37<1:18:21, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 232/1784 [11:37<1:18:21, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 233/1784 [11:40<1:16:59, 2.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:39,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:39,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.62, 'learning_rate': 0.0001386, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:39,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:35,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 235/1784 [11:45<1:14:28, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 235/1784 [11:45<1:14:28, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 236/1784 [11:48<1:13:31, 2.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:47,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:47,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6777, 'learning_rate': 0.0001404, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:47,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:43,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 238/1784 [11:53<1:10:24, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 238/1784 [11:53<1:10:24, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 239/1784 [11:56<1:08:50, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 239/1784 [11:56<1:08:50, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:55,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:57,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:57,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:59,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:04:59,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.504, 'learning_rate': 0.0001434, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:01,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:01,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:02,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:02,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:04,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:04,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.754, 'learning_rate': 0.0001458, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:07,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:07,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:09,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:09,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:11,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:11,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3781, 'learning_rate': 0.0001482, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:11,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:15,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:15,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:15,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:18,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:18,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:18,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 253/1784 [12:26<1:14:44, 2.93s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:26,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:26,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6637, 'learning_rate': 0.00015059999999999997, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:26,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 255/1784 [12:33<1:22:57, 3.26s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 255/1784 [12:33<1:22:57, 3.26s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 255/1784 [12:33<1:22:57, 3.26s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 256/1784 [12:36<1:24:42, 3.33s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 256/1784 [12:36<1:24:42, 3.33s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 256/1784 [12:36<1:24:42, 3.33s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 257/1784 [12:40<1:26:01, 3.38s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:40,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:40,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7408, 'learning_rate': 0.00015299999999999998, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:40,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 259/1784 [12:47<1:27:50, 3.46s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 259/1784 [12:47<1:27:50, 3.46s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 259/1784 [12:47<1:27:50, 3.46s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 260/1784 [12:50<1:28:18, 3.48s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:50,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:50,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0876, 'learning_rate': 0.0001548, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 09:05:50,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 262/1784 [12:57<1:26:40, 3.42s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 262/1784 [12:57<1:26:40, 3.42s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 262/1784 [12:57<1:26:40, 3.42s/it]g-point operations will not be computed-02 09:04:51,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 263/1784 [13:00<1:25:57, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 264/1784 [13:04<1:25:50, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 264/1784 [13:04<1:25:50, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4747, 'learning_rate': 0.000156, 'epoch': 0.15} + 15%|███████████▌ | 264/1784 [13:04<1:25:50, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 265/1784 [13:07<1:24:54, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 265/1784 [13:07<1:24:54, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 265/1784 [13:07<1:24:54, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 266/1784 [13:10<1:24:16, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 266/1784 [13:10<1:24:16, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:10,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:10,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:10,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 268/1784 [13:17<1:23:37, 3.31s/it]g-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 268/1784 [13:17<1:23:37, 3.31s/it]g-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:17,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:17,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:17,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 270/1784 [13:23<1:23:01, 3.29s/it]g-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 270/1784 [13:23<1:23:01, 3.29s/it]g-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 270/1784 [13:23<1:23:01, 3.29s/it]g-point operations will not be computed-02 09:05:59,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 271/1784 [13:27<1:22:34, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:25,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 271/1784 [13:27<1:22:34, 3.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:25,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 272/1784 [13:30<1:21:49, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:25,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 272/1784 [13:30<1:21:49, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:25,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 272/1784 [13:30<1:21:49, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:25,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 273/1784 [13:33<1:21:35, 3.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 274/1784 [13:36<1:20:55, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 274/1784 [13:36<1:20:55, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0948, 'learning_rate': 0.000162, 'epoch': 0.15} + 15%|███████████▉ | 274/1784 [13:36<1:20:55, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 275/1784 [13:39<1:20:06, 3.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:39,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:39,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6602, 'learning_rate': 0.0001632, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:39,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████ | 277/1784 [13:46<1:19:44, 3.18s/it]g-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:45,770 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:45,770 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7869, 'learning_rate': 0.0001644, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:45,770 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 279/1784 [13:52<1:18:45, 3.14s/it]g-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:51,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:51,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4388, 'learning_rate': 0.0001656, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:51,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 281/1784 [13:58<1:17:35, 3.10s/it]g-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:58,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:58,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3186, 'learning_rate': 0.0001668, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 09:06:58,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 283/1784 [14:04<1:15:55, 3.03s/it]g-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 283/1784 [14:04<1:15:55, 3.03s/it]g-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:03,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:03,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:03,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:06:31,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 285/1784 [14:10<1:13:37, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 285/1784 [14:10<1:13:37, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 286/1784 [14:12<1:12:45, 2.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:12,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:12,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.698, 'learning_rate': 0.00016979999999999998, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:12,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:08,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 288/1784 [14:18<1:10:14, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:16,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 288/1784 [14:18<1:10:14, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:16,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 289/1784 [14:21<1:08:26, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 289/1784 [14:21<1:08:26, 2.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 290/1784 [14:23<1:06:30, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▋ | 290/1784 [14:23<1:06:30, 2.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:22,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:24,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:24,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:26,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:26,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:28,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:28,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8979, 'learning_rate': 0.00017399999999999997, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:32,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:32,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:35,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:35,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9214, 'learning_rate': 0.00017639999999999998, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:36,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:38,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:38,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9639, 'learning_rate': 0.00017759999999999998, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:38,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:42,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:42,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:42,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 302/1784 [14:49<1:06:54, 2.71s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 302/1784 [14:49<1:06:54, 2.71s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 302/1784 [14:49<1:06:54, 2.71s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▏ | 303/1784 [14:52<1:13:58, 3.00s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:53,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:07:53,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6956, 'learning_rate': 0.00017999999999999998, 'epoch': 0.17} + 17%|█████████████▎ | 305/1784 [15:00<1:22:00, 3.33s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 305/1784 [15:00<1:22:00, 3.33s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9229, 'learning_rate': 0.00018059999999999997, 'epoch': 0.17} + 17%|█████████████▎ | 305/1784 [15:00<1:22:00, 3.33s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 306/1784 [15:03<1:23:51, 3.40s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 306/1784 [15:03<1:23:51, 3.40s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 306/1784 [15:03<1:23:51, 3.40s/it]g-point operations will not be computed-02 09:07:18,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 307/1784 [15:07<1:24:25, 3.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 308/1784 [15:10<1:24:48, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 308/1784 [15:10<1:24:48, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4496, 'learning_rate': 0.0001824, 'epoch': 0.17} + 17%|█████████████▍ | 308/1784 [15:10<1:24:48, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:14<1:25:00, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:14<1:25:00, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 309/1784 [15:14<1:25:00, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 310/1784 [15:17<1:24:43, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:17,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:17,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.757, 'learning_rate': 0.00018419999999999998, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:17,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:24<1:24:20, 3.44s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:24<1:24:20, 3.44s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 312/1784 [15:24<1:24:20, 3.44s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 313/1784 [15:27<1:24:05, 3.43s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:27,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:27,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3359, 'learning_rate': 0.000186, 'epoch': 0.18} + 18%|█████████████▊ | 315/1784 [15:34<1:23:41, 3.42s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 315/1784 [15:34<1:23:41, 3.42s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6453, 'learning_rate': 0.00018659999999999998, 'epoch': 0.18} + 18%|█████████████▊ | 315/1784 [15:34<1:23:41, 3.42s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 316/1784 [15:38<1:23:06, 3.40s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5169, 'learning_rate': 0.00018779999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:37,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [15:44<1:22:16, 3.37s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [15:44<1:22:16, 3.37s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 318/1784 [15:44<1:22:16, 3.37s/it]g-point operations will not be computed-02 09:08:05,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 319/1784 [15:47<1:21:31, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 320/1784 [15:51<1:21:26, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 320/1784 [15:51<1:21:26, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3391, 'learning_rate': 0.00018959999999999997, 'epoch': 0.18} + 18%|█████████████▉ | 320/1784 [15:51<1:21:26, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 321/1784 [15:54<1:20:51, 3.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:54,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:54,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8598, 'learning_rate': 0.00019079999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 09:08:54,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 323/1784 [16:01<1:19:36, 3.27s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:00,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:00,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5836, 'learning_rate': 0.00019199999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:00,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 325/1784 [16:07<1:17:51, 3.20s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:06,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:06,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1464, 'learning_rate': 0.00019319999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:06,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 327/1784 [16:13<1:16:57, 3.17s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:13,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:13,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9207, 'learning_rate': 0.00019439999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:13,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 329/1784 [16:19<1:15:56, 3.13s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 329/1784 [16:19<1:15:56, 3.13s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 329/1784 [16:19<1:15:56, 3.13s/it]g-point operations will not be computed-02 09:08:46,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 330/1784 [16:22<1:15:26, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:20,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 330/1784 [16:22<1:15:26, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:20,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 331/1784 [16:25<1:14:31, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:20,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 331/1784 [16:25<1:14:31, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:20,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 331/1784 [16:25<1:14:31, 3.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:20,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 332/1784 [16:28<1:13:47, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 332/1784 [16:28<1:13:47, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 333/1784 [16:31<1:13:04, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:31,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:31,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9945, 'learning_rate': 0.000198, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:31,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:26,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 335/1784 [16:37<1:10:34, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 336/1784 [16:40<1:09:45, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 336/1784 [16:40<1:09:45, 2.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:39,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:39,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6617, 'learning_rate': 0.0001998, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:39,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 338/1784 [16:45<1:07:52, 2.82s/it]g-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 338/1784 [16:45<1:07:52, 2.82s/it]g-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:44,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:47,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:47,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6245, 'learning_rate': 0.0002016, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 09:09:47,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:09:35,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 341/1784 [16:53<1:03:37, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:51,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 341/1784 [16:53<1:03:37, 2.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:51,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 342/1784 [16:55<1:01:27, 2.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:53,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 342/1784 [16:55<1:01:27, 2.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:53,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 343/1784 [16:58<58:47, 2.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:55,675 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 343/1784 [16:58<58:47, 2.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:55,675 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 344/1784 [17:00<56:05, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:57,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 344/1784 [17:00<56:05, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:57,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 345/1784 [17:01<52:54, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:59,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 345/1784 [17:01<52:54, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:09:59,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 347/1784 [17:05<46:27, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:01,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 347/1784 [17:05<46:27, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:01,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2762, 'learning_rate': 0.0002058, 'epoch': 0.19} + 20%|███████████████▌ | 348/1784 [17:06<42:46, 1.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:04,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 348/1784 [17:06<42:46, 1.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:04,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 349/1784 [17:08<38:52, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:05,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 349/1784 [17:08<38:52, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:05,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 350/1784 [17:09<39:18, 1.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:05,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 350/1784 [17:09<39:18, 1.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:08,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 350/1784 [17:09<39:18, 1.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:08,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 351/1784 [17:13<55:19, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 352/1784 [17:17<1:05:15, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 352/1784 [17:17<1:05:15, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8358, 'learning_rate': 0.00020879999999999998, 'epoch': 0.2} + 20%|███████████████▍ | 352/1784 [17:17<1:05:15, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:20<1:11:40, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:20<1:11:40, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 353/1784 [17:20<1:11:40, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 354/1784 [17:24<1:15:54, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:24,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:24,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6657, 'learning_rate': 0.00021059999999999997, 'epoch': 0.2} + 20%|███████████████▌ | 356/1784 [17:31<1:20:25, 3.38s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 356/1784 [17:31<1:20:25, 3.38s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8611, 'learning_rate': 0.00021119999999999996, 'epoch': 0.2} + 20%|███████████████▌ | 356/1784 [17:31<1:20:25, 3.38s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [17:35<1:20:59, 3.41s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [17:35<1:20:59, 3.41s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 357/1784 [17:35<1:20:59, 3.41s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 358/1784 [17:38<1:21:31, 3.43s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:38,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:38,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6709, 'learning_rate': 0.00021299999999999997, 'epoch': 0.2} + 20%|███████████████▋ | 360/1784 [17:45<1:22:12, 3.46s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 360/1784 [17:45<1:22:12, 3.46s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8099, 'learning_rate': 0.00021359999999999996, 'epoch': 0.2} + 20%|███████████████▋ | 360/1784 [17:45<1:22:12, 3.46s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 361/1784 [17:49<1:22:01, 3.46s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:49,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:49,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.556, 'learning_rate': 0.00021479999999999996, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:49,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 363/1784 [17:55<1:21:06, 3.42s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 363/1784 [17:55<1:21:06, 3.42s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 363/1784 [17:55<1:21:06, 3.42s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 364/1784 [17:59<1:20:16, 3.39s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:59,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:59,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4165, 'learning_rate': 0.00021659999999999998, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 09:10:59,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 366/1784 [18:05<1:18:52, 3.34s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 366/1784 [18:05<1:18:52, 3.34s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 366/1784 [18:05<1:18:52, 3.34s/it]g-point operations will not be computed-02 09:10:12,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 367/1784 [18:09<1:18:16, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 368/1784 [18:12<1:17:52, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████ | 368/1784 [18:12<1:17:52, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6038, 'learning_rate': 0.00021839999999999997, 'epoch': 0.21} + 21%|████████████████ | 368/1784 [18:12<1:17:52, 3.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 369/1784 [18:15<1:18:07, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:15,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:15,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0663, 'learning_rate': 0.00021959999999999997, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:15,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▏ | 371/1784 [18:22<1:17:01, 3.27s/it]g-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:21,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:21,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5039, 'learning_rate': 0.00022079999999999997, 'epoch': 0.21} + 21%|████████████████▎ | 373/1784 [18:28<1:15:45, 3.22s/it]g-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 373/1784 [18:28<1:15:45, 3.22s/it]g-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2572, 'learning_rate': 0.0002214, 'epoch': 0.21} + 21%|████████████████▎ | 373/1784 [18:28<1:15:45, 3.22s/it]g-point operations will not be computed-02 09:11:07,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▎ | 374/1784 [18:31<1:15:02, 3.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 375/1784 [18:34<1:14:23, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 375/1784 [18:34<1:14:23, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:34,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:34,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7037, 'learning_rate': 0.00022319999999999998, 'epoch': 0.21} + 21%|████████████████▍ | 377/1784 [18:40<1:13:22, 3.13s/it]g-point operations will not be computed-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 377/1784 [18:40<1:13:22, 3.13s/it]g-point operations will not be computed-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6645, 'learning_rate': 0.0002238, 'epoch': 0.21} + 21%|████████████████▍ | 377/1784 [18:40<1:13:22, 3.13s/it]g-point operations will not be computed-02 09:11:29,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 378/1784 [18:44<1:13:22, 3.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:42,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 379/1784 [18:47<1:13:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:42,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 379/1784 [18:47<1:13:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:42,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5828, 'learning_rate': 0.000225, 'epoch': 0.21} + 21%|████████████████▌ | 379/1784 [18:47<1:13:08, 3.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:42,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 380/1784 [18:50<1:12:19, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 381/1784 [18:53<1:11:36, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 381/1784 [18:53<1:11:36, 3.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:52,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:52,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7117, 'learning_rate': 0.00022679999999999998, 'epoch': 0.21} + 21%|████████████████▋ | 383/1784 [18:59<1:10:05, 3.00s/it]g-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 383/1784 [18:59<1:10:05, 3.00s/it]g-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:58,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:58,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4067, 'learning_rate': 0.00022799999999999999, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 09:11:58,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 385/1784 [19:04<1:07:18, 2.89s/it]g-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:03,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:03,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:06,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:06,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7761, 'learning_rate': 0.00022979999999999997, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:06,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:11:48,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 388/1784 [19:12<1:03:31, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 388/1784 [19:12<1:03:31, 2.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 389/1784 [19:15<1:02:16, 2.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:14,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:14,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:16,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:16,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:18,710 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:18,710 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:20,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:20,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:22,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:22,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:24,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:24,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4047, 'learning_rate': 0.00023459999999999998, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:26,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:26,007 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:28,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:28,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4495, 'learning_rate': 0.0002364, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:29,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:29,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:31,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:35,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:35,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2584, 'learning_rate': 0.0002382, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:38,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:38,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8706, 'learning_rate': 0.0002388, 'epoch': 0.23} + 23%|█████████████████▌ | 403/1784 [19:45<1:07:43, 2.94s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 403/1784 [19:45<1:07:43, 2.94s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6789, 'learning_rate': 0.0002394, 'epoch': 0.23} + 23%|█████████████████▌ | 403/1784 [19:45<1:07:43, 2.94s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 404/1784 [19:49<1:11:48, 3.12s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:49,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:12:49,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6802, 'learning_rate': 0.0002406, 'epoch': 0.23} + 23%|█████████████████▊ | 406/1784 [19:56<1:17:09, 3.36s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 406/1784 [19:56<1:17:09, 3.36s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2712, 'learning_rate': 0.00024119999999999998, 'epoch': 0.23} + 23%|█████████████████▊ | 407/1784 [20:00<1:17:38, 3.38s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 407/1784 [20:00<1:17:38, 3.38s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1058, 'learning_rate': 0.0002418, 'epoch': 0.23} + 23%|█████████████████▊ | 407/1784 [20:00<1:17:38, 3.38s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 408/1784 [20:03<1:18:05, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:03,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:03,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5337, 'learning_rate': 0.000243, 'epoch': 0.23} + 23%|█████████████████▉ | 410/1784 [20:10<1:18:04, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 410/1784 [20:10<1:18:04, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1354, 'learning_rate': 0.00024359999999999999, 'epoch': 0.23} + 23%|█████████████████▉ | 410/1784 [20:10<1:18:04, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 411/1784 [20:13<1:17:59, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:13,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:13,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2163, 'learning_rate': 0.0002448, 'epoch': 0.23} + 23%|██████████████████ | 413/1784 [20:20<1:17:49, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 413/1784 [20:20<1:17:49, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5183, 'learning_rate': 0.00024539999999999995, 'epoch': 0.23} + 23%|██████████████████ | 413/1784 [20:20<1:17:49, 3.41s/it]g-point operations will not be computed-02 09:12:10,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 414/1784 [20:23<1:17:26, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 415/1784 [20:27<1:17:26, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 415/1784 [20:27<1:17:26, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9839, 'learning_rate': 0.0002466, 'epoch': 0.23} + 23%|██████████████████▏ | 416/1784 [20:30<1:16:55, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 416/1784 [20:30<1:16:55, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:30,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:30,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3157, 'learning_rate': 0.00024779999999999995, 'epoch': 0.23} + 23%|██████████████████▎ | 418/1784 [20:37<1:16:41, 3.37s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 418/1784 [20:37<1:16:41, 3.37s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8673, 'learning_rate': 0.00024839999999999997, 'epoch': 0.23} + 23%|██████████████████▎ | 418/1784 [20:37<1:16:41, 3.37s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 419/1784 [20:40<1:15:39, 3.33s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:40,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:40,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8833, 'learning_rate': 0.00024959999999999994, 'epoch': 0.24} + 24%|██████████████████▍ | 421/1784 [20:47<1:15:45, 3.33s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 421/1784 [20:47<1:15:45, 3.33s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:47,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:47,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9571, 'learning_rate': 0.00025079999999999997, 'epoch': 0.24} + 24%|██████████████████▍ | 423/1784 [20:53<1:14:18, 3.28s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 423/1784 [20:53<1:14:18, 3.28s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0206, 'learning_rate': 0.0002514, 'epoch': 0.24} + 24%|██████████████████▍ | 423/1784 [20:53<1:14:18, 3.28s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 424/1784 [20:56<1:13:46, 3.25s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:56,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:13:56,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8988, 'learning_rate': 0.00025259999999999996, 'epoch': 0.24} + 24%|██████████████████▋ | 426/1784 [21:03<1:12:15, 3.19s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 426/1784 [21:03<1:12:15, 3.19s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:02,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:02,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0535, 'learning_rate': 0.0002538, 'epoch': 0.24} + 24%|██████████████████▋ | 428/1784 [21:09<1:10:55, 3.14s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 428/1784 [21:09<1:10:55, 3.14s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:08,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9605, 'learning_rate': 0.00025499999999999996, 'epoch': 0.24} + 24%|██████████████████▊ | 430/1784 [21:15<1:09:19, 3.07s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 430/1784 [21:15<1:09:19, 3.07s/it]g-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:14,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:14,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7265, 'learning_rate': 0.0002562, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:14,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:13:22,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 432/1784 [21:21<1:07:59, 3.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 433/1784 [21:24<1:06:51, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 433/1784 [21:24<1:06:51, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:23,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:23,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8154, 'learning_rate': 0.000258, 'epoch': 0.24} + 24%|███████████████████ | 435/1784 [21:29<1:05:13, 2.90s/it]g-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 435/1784 [21:29<1:05:13, 2.90s/it]g-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:29,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:29,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:31,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:31,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:19,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9073, 'learning_rate': 0.00025979999999999997, 'epoch': 0.24} + 25%|███████████████████▏ | 438/1784 [21:37<1:01:52, 2.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 438/1784 [21:37<1:01:52, 2.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 439/1784 [21:40<1:00:54, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▏ | 439/1784 [21:40<1:00:54, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:39,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:39,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:41,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:41,989 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:44,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:46,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:46,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.956, 'learning_rate': 0.0002628, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:47,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:47,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:49,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:49,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:51,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:51,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:53,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:53,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9474, 'learning_rate': 0.00026579999999999996, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:54,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:54,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:56,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:14:56,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8928, 'learning_rate': 0.0002676, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:00,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:00,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:04,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:04,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9016, 'learning_rate': 0.0002688, 'epoch': 0.25} + 25%|███████████████████▊ | 453/1784 [22:11<1:05:33, 2.96s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 453/1784 [22:11<1:05:33, 2.96s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7164, 'learning_rate': 0.0002694, 'epoch': 0.25} + 25%|███████████████████▊ | 454/1784 [22:14<1:10:05, 3.16s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 454/1784 [22:14<1:10:05, 3.16s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5628, 'learning_rate': 0.00027, 'epoch': 0.25} + 26%|███████████████████▉ | 455/1784 [22:18<1:12:45, 3.28s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 455/1784 [22:18<1:12:45, 3.28s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:18,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:18,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6684, 'learning_rate': 0.0002712, 'epoch': 0.26} + 26%|███████████████████▉ | 457/1784 [22:25<1:15:54, 3.43s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████▉ | 457/1784 [22:25<1:15:54, 3.43s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8451, 'learning_rate': 0.0002718, 'epoch': 0.26} + 26%|████████████████████ | 458/1784 [22:29<1:16:28, 3.46s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 458/1784 [22:29<1:16:28, 3.46s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5488, 'learning_rate': 0.0002724, 'epoch': 0.26} + 26%|████████████████████ | 459/1784 [22:32<1:16:43, 3.47s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 459/1784 [22:32<1:16:43, 3.47s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:32,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:32,734 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.476, 'learning_rate': 0.0002736, 'epoch': 0.26} + 26%|████████████████████▏ | 461/1784 [22:39<1:16:25, 3.47s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 461/1784 [22:39<1:16:25, 3.47s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3327, 'learning_rate': 0.0002742, 'epoch': 0.26} + 26%|████████████████████▏ | 461/1784 [22:39<1:16:25, 3.47s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 462/1784 [22:43<1:16:02, 3.45s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:43,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:15:43,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9985, 'learning_rate': 0.00027539999999999997, 'epoch': 0.26} + 26%|████████████████████▎ | 464/1784 [22:49<1:15:39, 3.44s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 464/1784 [22:49<1:15:39, 3.44s/it]g-point operations will not be computed-02 09:14:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5718, 'learning_rate': 0.000276, 'epoch': 0.26} + 26%|████████████████████▎ | 465/1784 [22:53<1:15:00, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 465/1784 [22:53<1:15:00, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 466/1784 [22:56<1:14:30, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 466/1784 [22:56<1:14:30, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9586, 'learning_rate': 0.0002772, 'epoch': 0.26} + 26%|████████████████████▎ | 466/1784 [22:56<1:14:30, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 467/1784 [22:59<1:14:12, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 467/1784 [22:59<1:14:12, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 467/1784 [22:59<1:14:12, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:15:51,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 468/1784 [23:03<1:13:32, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 468/1784 [23:03<1:13:32, 3.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 469/1784 [23:06<1:13:02, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 469/1784 [23:06<1:13:02, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 469/1784 [23:06<1:13:02, 3.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 470/1784 [23:09<1:12:34, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:09,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:09,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2663, 'learning_rate': 0.0002802, 'epoch': 0.26} +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:09,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 472/1784 [23:16<1:11:55, 3.29s/it]g-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:16,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:16,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7528, 'learning_rate': 0.00028139999999999996, 'epoch': 0.27} +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:16,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 474/1784 [23:22<1:11:15, 3.26s/it]g-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 474/1784 [23:22<1:11:15, 3.26s/it]g-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 474/1784 [23:22<1:11:15, 3.26s/it]g-point operations will not be computed-02 09:16:01,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 475/1784 [23:26<1:11:01, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:24,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 475/1784 [23:26<1:11:01, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:24,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 476/1784 [23:29<1:10:09, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:24,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 476/1784 [23:29<1:10:09, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:24,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 476/1784 [23:29<1:10:09, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:24,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 477/1784 [23:32<1:09:44, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 477/1784 [23:32<1:09:44, 3.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 478/1784 [23:35<1:08:59, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 478/1784 [23:35<1:08:59, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 478/1784 [23:35<1:08:59, 3.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 479/1784 [23:38<1:08:24, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 479/1784 [23:38<1:08:24, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:38,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:38,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:38,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 481/1784 [23:44<1:07:20, 3.10s/it]g-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 481/1784 [23:44<1:07:20, 3.10s/it]g-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:44,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:44,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:44,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 483/1784 [23:50<1:06:26, 3.06s/it]g-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 483/1784 [23:50<1:06:26, 3.06s/it]g-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:50,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:50,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:50,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:30,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 485/1784 [23:56<1:04:32, 2.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 485/1784 [23:56<1:04:32, 2.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 486/1784 [23:59<1:03:39, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 486/1784 [23:59<1:03:39, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:16:58,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:01,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:01,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1816, 'learning_rate': 0.00029039999999999996, 'epoch': 0.27} +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:01,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:16:54,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 489/1784 [24:07<59:44, 2.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 489/1784 [24:07<59:44, 2.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 490/1784 [24:10<58:18, 2.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 490/1784 [24:10<58:18, 2.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:09,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:11,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:11,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:13,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:13,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:15,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:15,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:17,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:17,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0931, 'learning_rate': 0.00029459999999999995, 'epoch': 0.28} +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:19,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:19,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:20,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:20,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:23,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:17:23,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.264, 'learning_rate': 0.00029699999999999996, 'epoch': 0.28} +[INFO|trainer.py:2369] 2022-03-02 09:17:25,113 >> Batch size = 8aluation *****e number of tokens of the input, floating-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 0/331 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 2/331 [00:02<06:39, 1.22s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 3/331 [00:04<08:56, 1.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 4/331 [00:06<10:12, 1.87s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 5/331 [00:09<11:44, 2.16s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 6/331 [00:12<12:44, 2.35s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 7/331 [00:14<12:53, 2.39s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|██ | 8/331 [00:17<13:13, 2.46s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 9/331 [00:20<13:51, 2.58s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 10/331 [00:23<14:50, 2.77s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 11/331 [00:26<14:25, 2.71s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 12/331 [00:28<14:14, 2.68s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 13/331 [00:31<14:04, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 14/331 [00:33<13:49, 2.62s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 15/331 [00:37<15:02, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 16/331 [00:40<15:56, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 17/331 [00:43<16:01, 3.06s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 18/331 [00:45<14:35, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 19/331 [00:48<14:11, 2.73s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 20/331 [00:50<13:19, 2.57s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 21/331 [00:53<13:56, 2.70s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 22/331 [00:57<15:01, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 23/331 [01:01<16:24, 3.20s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 24/331 [01:04<17:16, 3.38s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 25/331 [01:07<16:34, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 26/331 [01:10<15:26, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 27/331 [01:13<15:30, 3.06s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 28/331 [01:16<15:01, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 29/331 [01:18<14:37, 2.90s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 30/331 [01:21<13:58, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▋ | 31/331 [01:23<13:28, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 32/331 [01:26<13:12, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 33/331 [01:29<13:10, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▍ | 34/331 [01:31<13:13, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 35/331 [01:34<13:23, 2.72s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 36/331 [01:37<13:57, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 37/331 [01:41<14:38, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▍ | 38/331 [01:44<14:43, 3.02s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 39/331 [01:47<14:44, 3.03s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 40/331 [01:49<13:34, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|██████████▏ | 41/331 [01:51<12:53, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 42/331 [01:55<13:45, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 43/331 [01:58<14:27, 3.01s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▉ | 44/331 [02:01<14:58, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 45/331 [02:04<14:14, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 46/331 [02:06<13:17, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▋ | 47/331 [02:09<12:31, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 48/331 [02:12<12:41, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 49/331 [02:15<13:12, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▍ | 50/331 [02:17<13:02, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▋ | 51/331 [02:20<13:16, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 52/331 [02:23<12:45, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 53/331 [02:26<12:52, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▍ | 54/331 [02:28<12:17, 2.66s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 55/331 [02:31<13:13, 2.87s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 56/331 [02:34<13:04, 2.85s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 57/331 [02:37<12:38, 2.77s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 58/331 [02:40<13:11, 2.90s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 59/331 [02:42<12:24, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 60/331 [02:45<12:10, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|███████████████ | 61/331 [02:48<12:34, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 62/331 [02:51<12:28, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 63/331 [02:54<13:34, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 64/331 [02:57<13:01, 2.93s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 65/331 [03:00<12:41, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 66/331 [03:04<13:51, 3.14s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▌ | 67/331 [03:07<14:35, 3.32s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 68/331 [03:11<14:47, 3.38s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 69/331 [03:14<14:25, 3.30s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████▎ | 70/331 [03:17<14:08, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████▌ | 71/331 [03:20<14:08, 3.26s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 72/331 [03:24<13:59, 3.24s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|██████████████████ | 73/331 [03:27<13:31, 3.15s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|██████████████████▎ | 74/331 [03:30<13:17, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 75/331 [03:33<13:28, 3.16s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 76/331 [03:35<12:47, 3.01s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|███████████████████ | 77/331 [03:38<12:31, 2.96s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 78/331 [03:41<12:04, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 79/331 [03:43<11:37, 2.77s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▊ | 80/331 [03:46<11:28, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|████████████████████ | 81/331 [03:49<11:56, 2.87s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 82/331 [03:52<11:45, 2.83s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 83/331 [03:55<12:10, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▊ | 84/331 [03:59<12:57, 3.15s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 85/331 [04:01<12:03, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▎ | 86/331 [04:05<12:39, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▌ | 87/331 [04:08<12:06, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 88/331 [04:10<11:48, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████ | 89/331 [04:13<11:04, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████▎ | 90/331 [04:15<10:37, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████▌ | 91/331 [04:18<11:05, 2.77s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▊ | 92/331 [04:20<10:21, 2.60s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████ | 93/331 [04:23<10:31, 2.66s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▎ | 94/331 [04:26<10:43, 2.71s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▌ | 95/331 [04:29<10:45, 2.73s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▊ | 96/331 [04:32<10:44, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 97/331 [04:34<10:19, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 98/331 [04:37<10:39, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 99/331 [04:40<10:36, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 100/331 [04:42<10:16, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▋ | 101/331 [04:45<10:09, 2.65s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 102/331 [04:48<10:59, 2.88s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 103/331 [04:51<10:25, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▍ | 104/331 [04:53<10:25, 2.76s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 105/331 [04:56<10:29, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 106/331 [04:59<10:27, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 107/331 [05:01<09:42, 2.60s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 108/331 [05:04<09:33, 2.57s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 109/331 [05:06<09:33, 2.58s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▉ | 110/331 [05:09<10:00, 2.72s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 111/331 [05:12<10:01, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▍ | 112/331 [05:15<10:01, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 113/331 [05:17<09:30, 2.62s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▉ | 114/331 [05:20<09:40, 2.68s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▏ | 115/331 [05:23<09:36, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 116/331 [05:26<09:54, 2.76s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 117/331 [05:28<09:49, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 118/331 [05:31<09:32, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 119/331 [05:34<09:30, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 120/331 [05:36<09:32, 2.71s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 121/331 [05:40<10:04, 2.88s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 122/331 [05:42<09:49, 2.82s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 123/331 [05:46<10:29, 3.03s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 124/331 [05:49<10:17, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 125/331 [05:52<10:48, 3.15s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 126/331 [05:55<10:50, 3.18s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 127/331 [05:59<11:08, 3.28s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 128/331 [06:02<11:10, 3.30s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 129/331 [06:05<10:54, 3.24s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▊ | 130/331 [06:09<11:02, 3.30s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 131/331 [06:12<11:10, 3.35s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 132/331 [06:15<10:36, 3.20s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 133/331 [06:18<09:54, 3.00s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▊ | 134/331 [06:20<09:35, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 135/331 [06:24<09:45, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 135/331 [06:24<09:45, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 135/331 [06:24<09:45, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 137/331 [06:30<10:19, 3.19s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 138/331 [06:34<10:37, 3.30s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 139/331 [06:36<09:27, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 140/331 [06:40<10:03, 3.16s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 141/331 [06:42<09:37, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 142/331 [06:45<09:22, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 143/331 [06:49<09:45, 3.12s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▏ | 144/331 [06:51<09:22, 3.01s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 145/331 [06:54<09:14, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 146/331 [06:58<09:38, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 147/331 [07:01<09:18, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 148/331 [07:03<08:42, 2.85s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 149/331 [07:05<08:10, 2.70s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 150/331 [07:09<08:33, 2.83s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 151/331 [07:11<08:23, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 152/331 [07:14<08:02, 2.70s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 153/331 [07:16<07:56, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▋ | 154/331 [07:19<08:14, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 155/331 [07:23<08:39, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 156/331 [07:26<08:55, 3.06s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 157/331 [07:30<09:12, 3.17s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 158/331 [07:33<09:14, 3.21s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 159/331 [07:36<09:19, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 160/331 [07:39<08:46, 3.08s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 161/331 [07:42<08:33, 3.02s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▋ | 162/331 [07:45<08:56, 3.17s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 163/331 [07:49<09:01, 3.22s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 164/331 [07:51<08:36, 3.09s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▍ | 165/331 [07:54<08:18, 3.01s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 166/331 [07:57<08:06, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 167/331 [08:00<08:16, 3.03s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 168/331 [08:03<07:47, 2.87s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▎ | 169/331 [08:06<07:53, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 170/331 [08:08<07:28, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 171/331 [08:11<07:23, 2.77s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 172/331 [08:13<07:04, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 173/331 [08:16<07:12, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 174/331 [08:19<06:54, 2.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 175/331 [08:22<07:04, 2.72s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 176/331 [08:24<06:49, 2.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▎ | 177/331 [08:27<07:12, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 178/331 [08:31<07:36, 2.98s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 179/331 [08:34<07:56, 3.14s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 180/331 [08:37<07:53, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 181/331 [08:40<07:49, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 182/331 [08:43<07:14, 2.91s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▊ | 183/331 [08:45<06:43, 2.73s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 184/331 [08:47<06:19, 2.58s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 185/331 [08:49<05:55, 2.43s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 186/331 [08:52<06:05, 2.52s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▊ | 187/331 [08:55<06:36, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 188/331 [08:58<06:27, 2.71s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 189/331 [09:00<06:08, 2.59s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▍ | 190/331 [09:03<05:54, 2.51s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 191/331 [09:05<05:51, 2.51s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 192/331 [09:08<05:44, 2.48s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 193/331 [09:11<06:09, 2.68s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 194/331 [09:13<05:47, 2.54s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 195/331 [09:15<05:38, 2.49s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▉ | 196/331 [09:18<05:46, 2.57s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▏ | 197/331 [09:21<06:00, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 198/331 [09:23<05:44, 2.59s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 199/331 [09:26<05:46, 2.63s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▉ | 200/331 [09:28<05:25, 2.48s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 201/331 [09:31<05:18, 2.45s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 202/331 [09:33<05:28, 2.54s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 203/331 [09:36<05:29, 2.58s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 204/331 [09:39<05:48, 2.74s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 205/331 [09:42<05:53, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 206/331 [09:45<05:48, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 207/331 [09:48<05:58, 2.89s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 208/331 [09:51<06:01, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 209/331 [09:53<05:31, 2.71s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 210/331 [09:55<05:09, 2.56s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 211/331 [09:58<05:11, 2.59s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 212/331 [10:00<04:57, 2.50s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 213/331 [10:03<04:59, 2.54s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 214/331 [10:05<04:43, 2.42s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 215/331 [10:07<04:32, 2.35s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 216/331 [10:11<05:01, 2.62s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████ | 217/331 [10:13<05:00, 2.63s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 218/331 [10:17<05:15, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 219/331 [10:19<05:11, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▊ | 220/331 [10:22<04:56, 2.67s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 221/331 [10:24<04:56, 2.70s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 222/331 [10:27<04:43, 2.60s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 223/331 [10:30<04:44, 2.63s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 224/331 [10:32<04:46, 2.68s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 225/331 [10:35<04:44, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 226/331 [10:38<04:57, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 227/331 [10:41<04:51, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 228/331 [10:44<04:43, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 229/331 [10:46<04:41, 2.76s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▎ | 230/331 [10:49<04:32, 2.70s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▌ | 231/331 [10:52<04:37, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 232/331 [10:55<04:32, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 233/331 [10:58<04:38, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 234/331 [11:00<04:23, 2.72s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 235/331 [11:02<04:12, 2.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 236/331 [11:06<04:39, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 237/331 [11:10<04:49, 3.08s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▏ | 238/331 [11:13<04:46, 3.08s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▍ | 239/331 [11:16<04:45, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▋ | 240/331 [11:19<04:48, 3.17s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 241/331 [11:22<04:51, 3.24s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 242/331 [11:26<04:49, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 243/331 [11:29<04:46, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 244/331 [11:33<04:50, 3.34s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 245/331 [11:36<04:39, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▏ | 246/331 [11:39<04:51, 3.43s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 247/331 [11:43<04:40, 3.34s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 248/331 [11:45<04:21, 3.15s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 249/331 [11:48<03:59, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▏ | 250/331 [11:50<03:46, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▍ | 251/331 [11:53<03:48, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▋ | 252/331 [11:56<03:36, 2.75s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▉ | 253/331 [11:59<03:45, 2.89s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|██████████████████████████████████████████████████████████████▏ | 254/331 [12:02<03:38, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|██████████████████████████████████████████████████████████████▍ | 255/331 [12:05<03:42, 2.93s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|██████████████████████████████████████████████████████████████▋ | 256/331 [12:07<03:33, 2.85s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|██████████████████████████████████████████████████████████████▉ | 257/331 [12:11<03:37, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|███████████████████████████████████████████████████████████████▏ | 258/331 [12:13<03:23, 2.78s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|███████████████████████████████████████████████████████████████▍ | 259/331 [12:16<03:16, 2.73s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|███████████████████████████████████████████████████████████████▋ | 260/331 [12:19<03:18, 2.79s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|███████████████████████████████████████████████████████████████▊ | 261/331 [12:21<03:04, 2.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|████████████████████████████████████████████████████████████████ | 262/331 [12:23<03:02, 2.64s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|████████████████████████████████████████████████████████████████▎ | 263/331 [12:27<03:10, 2.80s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|████████████████████████████████████████████████████████████████▌ | 264/331 [12:29<03:02, 2.73s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|████████████████████████████████████████████████████████████████▊ | 265/331 [12:32<02:57, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|█████████████████████████████████████████████████████████████████ | 266/331 [12:34<02:50, 2.62s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▎ | 267/331 [12:37<02:59, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▌ | 268/331 [12:40<02:57, 2.82s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▊ | 269/331 [12:44<03:04, 2.97s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████ | 270/331 [12:47<02:59, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████▎ | 271/331 [12:50<03:02, 3.04s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████▌ | 272/331 [12:52<02:53, 2.94s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████▊ | 273/331 [12:55<02:50, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|███████████████████████████████████████████████████████████████████ | 274/331 [12:59<02:55, 3.08s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|███████████████████████████████████████████████████████████████████▎ | 275/331 [13:02<02:54, 3.12s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|███████████████████████████████████████████████████████████████████▌ | 276/331 [13:05<02:42, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|███████████████████████████████████████████████████████████████████▊ | 277/331 [13:07<02:34, 2.87s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|████████████████████████████████████████████████████████████████████ | 278/331 [13:10<02:30, 2.84s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|████████████████████████████████████████████████████████████████████▎ | 279/331 [13:14<02:41, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████████████████████████████████████████████████████████████████████▌ | 280/331 [13:17<02:34, 3.03s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████████████████████████████████████████████████████████████████████▊ | 281/331 [13:20<02:35, 3.11s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|█████████████████████████████████████████████████████████████████████ | 282/331 [13:23<02:32, 3.12s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|█████████████████████████████████████████████████████████████████████▎ | 283/331 [13:26<02:33, 3.20s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████████████▍ | 284/331 [13:30<02:34, 3.28s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████████████▋ | 285/331 [13:33<02:33, 3.34s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████████████▉ | 286/331 [13:37<02:31, 3.36s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|██████████████████████████████████████████████████████████████████████▏ | 287/331 [13:40<02:31, 3.45s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|██████████████████████████████████████████████████████████████████████▍ | 288/331 [13:44<02:28, 3.45s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|██████████████████████████████████████████████████████████████████████▋ | 289/331 [13:47<02:16, 3.24s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|██████████████████████████████████████████████████████████████████████▉ | 290/331 [13:49<02:05, 3.05s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|███████████████████████████████████████████████████████████████████████▏ | 291/331 [13:52<01:55, 2.88s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|███████████████████████████████████████████████████████████████████████▍ | 292/331 [13:54<01:49, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|███████████████████████████████████████████████████████████████████████▋ | 293/331 [13:57<01:46, 2.81s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|███████████████████████████████████████████████████████████████████████▉ | 294/331 [14:00<01:39, 2.69s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|████████████████████████████████████████████████████████████████████████▏ | 295/331 [14:02<01:34, 2.63s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|████████████████████████████████████████████████████████████████████████▍ | 296/331 [14:05<01:29, 2.57s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|████████████████████████████████████████████████████████████████████████▋ | 297/331 [14:08<01:37, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|████████████████████████████████████████████████████████████████████████▉ | 298/331 [14:12<01:41, 3.09s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|█████████████████████████████████████████████████████████████████████████▏ | 299/331 [14:14<01:35, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|█████████████████████████████████████████████████████████████████████████▍ | 300/331 [14:17<01:32, 2.97s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|█████████████████████████████████████████████████████████████████████████▋ | 301/331 [14:20<01:27, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|█████████████████████████████████████████████████████████████████████████▉ | 302/331 [14:23<01:22, 2.86s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▏ | 303/331 [14:25<01:17, 2.76s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▍ | 304/331 [14:28<01:16, 2.85s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▋ | 305/331 [14:32<01:16, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▉ | 306/331 [14:35<01:18, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████████████████████████████████████████████████████████████████████████▏ | 307/331 [14:39<01:18, 3.26s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████████████████████████████████████████████████████████████████████████▎ | 308/331 [14:43<01:19, 3.44s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████████████████████████████████████████████████████████████████████████▌ | 309/331 [14:46<01:16, 3.50s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|███████████████████████████████████████████████████████████████████████████▊ | 310/331 [14:49<01:08, 3.25s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|████████████████████████████████████████████████████████████████████████████ | 311/331 [14:52<01:04, 3.23s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|████████████████████████████████████████████████████████████████████████████▎ | 312/331 [14:55<00:57, 3.02s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████████████████████████████████████████████████████████████████████████████▌ | 313/331 [14:57<00:53, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████████████████████████████████████████████████████████████████████████████▊ | 314/331 [15:01<00:50, 3.00s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|█████████████████████████████████████████████████████████████████████████████ | 315/331 [15:04<00:49, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|█████████████████████████████████████████████████████████████████████████████▎ | 316/331 [15:07<00:46, 3.11s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|█████████████████████████████████████████████████████████████████████████████▌ | 317/331 [15:11<00:45, 3.23s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|█████████████████████████████████████████████████████████████████████████████▊ | 318/331 [15:13<00:39, 3.05s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|██████████████████████████████████████████████████████████████████████████████ | 319/331 [15:16<00:35, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|██████████████████████████████████████████████████████████████████████████████▎ | 320/331 [15:19<00:32, 2.95s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|██████████████████████████████████████████████████████████████████████████████▌ | 321/331 [15:22<00:29, 2.92s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|██████████████████████████████████████████████████████████████████████████████▊ | 322/331 [15:25<00:27, 3.06s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████ | 323/331 [15:28<00:23, 2.99s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████▎ | 324/331 [15:31<00:21, 3.09s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████▌ | 325/331 [15:34<00:18, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████▊ | 326/331 [15:38<00:15, 3.13s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|████████████████████████████████████████████████████████████████████████████████ | 327/331 [15:41<00:12, 3.14s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|████████████████████████████████████████████████████████████████████████████████▎| 328/331 [15:44<00:09, 3.17s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|████████████████████████████████████████████████████████████████████████████████▌| 329/331 [15:47<00:06, 3.10s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|████████████████████████████████████████████████████████████████████████████████▊| 330/331 [15:50<00:03, 3.26s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|████████████████████████████████████████████████████████████████████████████████▊| 330/331 [15:50<00:03, 3.26s/it]g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 09:33:21 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +[INFO|configuration_utils.py:438] 2022-03-02 09:33:21,037 >> Configuration saved in ./checkpoint-500/config.json g-point operations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-02 09:33:37,443 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-02 09:33:37,443 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-02 09:33:37,443 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-02 09:17:05,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 09:35:10 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220302_055556-ymuc7hv0/run-ymuc7hv0.wandb', 'wandb/run-20220302_063647-bmivw6vv/run-bmivw6vv.wandb', 'wandb/run-20220302_074637-35y19oi2/run-35y19oi2.wandb', 'wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb']. This may take a bit of time if the files are large. diff --git a/wandb/run-20220302_085255-16llzpbl/files/requirements.txt b/wandb/run-20220302_085255-16llzpbl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3974f97a24952deb24d97850f53367da9e7c347d --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/files/requirements.txt @@ -0,0 +1,184 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +anyio==3.5.0 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +babel==2.9.1 +backcall==0.2.0 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +clldutils==3.10.1 +colorlog==6.6.0 +csvw==1.11.0 +cycler==0.11.0 +datasets==1.18.3 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +executing==0.8.2 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +gitdb==4.0.9 +gitpython==3.1.27 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +grpcio==1.43.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2==3.0.3 +jiwer==2.3.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +kiwisolver==1.3.2 +librosa==0.8.1 +llvmlite==0.38.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +optax==0.1.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.0.2 +pkg-resources==0.0.0 +platformdirs==2.4.1 +pooch==1.6.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.3.0 +pygments==2.11.2 +pygtrie==2.4.2 +pyparsing==3.0.7 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +rfc3986==2.0.0 +rsa==4.8 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +stack-data==0.1.4 +tabulate==0.8.9 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +threadpoolctl==3.1.0 +tokenizers==0.11.4 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.17.0.dev0 +typing-extensions==3.10.0.2 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220302_085255-16llzpbl/files/wandb-metadata.json b/wandb/run-20220302_085255-16llzpbl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9d79274733af5e363358f8ab659f8630d1edbec9 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/files/wandb-metadata.json @@ -0,0 +1,60 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-03-02T08:52:56.527729", + "startedAt": "2022-03-02T08:52:55.418681", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--dataset_name=librispeech_asr", + "--model_name_or_path=./", + "--tokenizer_name=./", + "--dataset_config_name=clean", + "--train_split_name=train.100", + "--eval_split_name=validation", + "--output_dir=./", + "--preprocessing_num_workers=1", + "--length_column_name=input_length", + "--overwrite_output_dir", + "--num_train_epochs=1", + "--per_device_train_batch_size=8", + "--per_device_eval_batch_size=8", + "--gradient_accumulation_steps=2", + "--generation_max_length=40", + "--generation_num_beams=1", + "--learning_rate=3e-4", + "--warmup_steps=500", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=1", + "--save_total_limit=1", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--fp16", + "--group_by_length", + "--predict_with_generate", + "--do_lower_case", + "--do_train", + "--do_eval", + "--report_to=wandb", + "--push_to_hub", + "--use_auth_token" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py", + "codePath": "run_speech_recognition_seq2seq.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/wav2vec2-gpt2-wandb-grid-search", + "commit": "622e8b594e8af8169be8cfe538228e49ae08c59d" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python" +} diff --git a/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json b/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..7efd4fe616f5ad37c32ecbb7947112ed0334e3a1 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 4.6975, "train/learning_rate": 0.00029759999999999997, "train/epoch": 0.28, "train/global_step": 500, "_runtime": 2426, "_timestamp": 1646213601, "_step": 500, "gradients/decoder.transformer.ln_f.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 21.0, 171.0, 450.0, 303.0, 58.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-196.889404296875, -193.0316925048828, -189.17396545410156, -185.31625366210938, -181.45852661132812, -177.60081481933594, -173.74310302734375, -169.8853759765625, -166.0276641845703, -162.16995239257812, -158.31222534179688, -154.4545135498047, -150.5968017578125, -146.73907470703125, -142.88136291503906, -139.0236358642578, -135.16592407226562, -131.30821228027344, -127.45048522949219, -123.5927734375, -119.73505401611328, -115.87733459472656, -112.01962280273438, -108.16190338134766, -104.30418395996094, -100.44646453857422, -96.5887451171875, -92.73103332519531, -88.8733139038086, -85.01559448242188, -81.15788269042969, -77.30016326904297, -73.44243621826172, -69.584716796875, -65.72700500488281, -61.869285583496094, -58.011566162109375, -54.153846740722656, -50.2961311340332, -46.43841552734375, -42.58069610595703, -38.72297668457031, -34.86526107788086, -31.007543563842773, -27.149826049804688, -23.2921085357666, -19.434391021728516, -15.57667350769043, -11.718955993652344, -7.861238479614258, -4.003520965576172, -0.14580345153808594, 3.7119140625, 7.569631576538086, 11.427349090576172, 15.285066604614258, 19.142784118652344, 23.00050163269043, 26.858219146728516, 30.7159366607666, 34.57365417480469, 38.431373596191406, 42.28908920288086, 46.14680480957031, 50.00452423095703]}, "gradients/decoder.transformer.ln_f.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 4.0, 3.0, 4.0, 9.0, 4.0, 7.0, 7.0, 10.0, 14.0, 17.0, 11.0, 19.0, 20.0, 29.0, 27.0, 28.0, 33.0, 38.0, 54.0, 64.0, 42.0, 38.0, 47.0, 44.0, 32.0, 35.0, 42.0, 53.0, 37.0, 32.0, 30.0, 30.0, 19.0, 19.0, 10.0, 15.0, 17.0, 12.0, 11.0, 7.0, 10.0, 3.0, 4.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0], "bins": [-77.73529052734375, -75.4823989868164, -73.22949981689453, -70.97660827636719, -68.72370910644531, -66.47081756591797, -64.2179183959961, -61.96502685546875, -59.71213150024414, -57.45923614501953, -55.20634078979492, -52.95344543457031, -50.70055389404297, -48.447654724121094, -46.19476318359375, -43.94186782836914, -41.68897247314453, -39.43607711791992, -37.18318176269531, -34.9302864074707, -32.677391052246094, -30.424497604370117, -28.17160415649414, -25.91870880126953, -23.665813446044922, -21.412918090820312, -19.160022735595703, -16.907129287719727, -14.654233932495117, -12.401338577270508, -10.148444175720215, -7.895549774169922, -5.6426544189453125, -3.3897595405578613, -1.1368646621704102, 1.116030216217041, 3.368925094604492, 5.621820449829102, 7.8747148513793945, 10.127609252929688, 12.380504608154297, 14.633399963378906, 16.886295318603516, 19.139188766479492, 21.3920841217041, 23.64497947692871, 25.897872924804688, 28.150768280029297, 30.403663635253906, 32.656558990478516, 34.909454345703125, 37.162349700927734, 39.415245056152344, 41.66813659667969, 43.9210319519043, 46.173927307128906, 48.426822662353516, 50.679718017578125, 52.932613372802734, 55.185508728027344, 57.43840026855469, 59.69129943847656, 61.944190979003906, 64.19708251953125, 66.44998168945312]}, "gradients/decoder.transformer.h.23.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 5.0, 4.0, 6.0, 4.0, 5.0, 14.0, 11.0, 10.0, 15.0, 23.0, 26.0, 32.0, 35.0, 34.0, 34.0, 52.0, 66.0, 55.0, 57.0, 55.0, 50.0, 49.0, 48.0, 46.0, 40.0, 42.0, 32.0, 30.0, 22.0, 19.0, 22.0, 9.0, 19.0, 12.0, 9.0, 3.0, 4.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-2.205078125, -2.1446990966796875, -2.084320068359375, -2.0239410400390625, -1.96356201171875, -1.9031829833984375, -1.842803955078125, -1.7824249267578125, -1.7220458984375, -1.6616668701171875, -1.601287841796875, -1.5409088134765625, -1.48052978515625, -1.4201507568359375, -1.359771728515625, -1.2993927001953125, -1.239013671875, -1.1786346435546875, -1.118255615234375, -1.0578765869140625, -0.99749755859375, -0.9371185302734375, -0.876739501953125, -0.8163604736328125, -0.7559814453125, -0.6956024169921875, -0.635223388671875, -0.5748443603515625, -0.51446533203125, -0.4540863037109375, -0.393707275390625, -0.3333282470703125, -0.27294921875, -0.2125701904296875, -0.152191162109375, -0.0918121337890625, -0.03143310546875, 0.0289459228515625, 0.089324951171875, 0.1497039794921875, 0.2100830078125, 0.2704620361328125, 0.330841064453125, 0.3912200927734375, 0.45159912109375, 0.5119781494140625, 0.572357177734375, 0.6327362060546875, 0.693115234375, 0.7534942626953125, 0.813873291015625, 0.8742523193359375, 0.93463134765625, 0.9950103759765625, 1.055389404296875, 1.1157684326171875, 1.1761474609375, 1.2365264892578125, 1.296905517578125, 1.3572845458984375, 1.41766357421875, 1.4780426025390625, 1.538421630859375, 1.5988006591796875, 1.6591796875]}, "gradients/decoder.transformer.h.23.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 7.0, 7.0, 9.0, 27.0, 20.0, 33.0, 52.0, 73.0, 102.0, 150.0, 246.0, 354.0, 525.0, 831.0, 1416.0, 2709.0, 6606.0, 23966.0, 558676.0, 3534131.0, 46455.0, 9588.0, 3685.0, 1746.0, 1018.0, 634.0, 409.0, 263.0, 175.0, 102.0, 93.0, 59.0, 37.0, 22.0, 19.0, 12.0, 11.0, 9.0, 2.0, 4.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-17.828125, -17.3353271484375, -16.842529296875, -16.3497314453125, -15.85693359375, -15.3641357421875, -14.871337890625, -14.3785400390625, -13.8857421875, -13.3929443359375, -12.900146484375, -12.4073486328125, -11.91455078125, -11.4217529296875, -10.928955078125, -10.4361572265625, -9.943359375, -9.4505615234375, -8.957763671875, -8.4649658203125, -7.97216796875, -7.4793701171875, -6.986572265625, -6.4937744140625, -6.0009765625, -5.5081787109375, -5.015380859375, -4.5225830078125, -4.02978515625, -3.5369873046875, -3.044189453125, -2.5513916015625, -2.05859375, -1.5657958984375, -1.072998046875, -0.5802001953125, -0.08740234375, 0.4053955078125, 0.898193359375, 1.3909912109375, 1.8837890625, 2.3765869140625, 2.869384765625, 3.3621826171875, 3.85498046875, 4.3477783203125, 4.840576171875, 5.3333740234375, 5.826171875, 6.3189697265625, 6.811767578125, 7.3045654296875, 7.79736328125, 8.2901611328125, 8.782958984375, 9.2757568359375, 9.7685546875, 10.2613525390625, 10.754150390625, 11.2469482421875, 11.73974609375, 12.2325439453125, 12.725341796875, 13.2181396484375, 13.7109375]}, "gradients/decoder.transformer.h.23.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 6.0, 4.0, 9.0, 9.0, 9.0, 18.0, 36.0, 42.0, 44.0, 83.0, 144.0, 205.0, 337.0, 595.0, 756.0, 589.0, 346.0, 223.0, 151.0, 111.0, 78.0, 49.0, 38.0, 29.0, 36.0, 30.0, 19.0, 13.0, 13.0, 11.0, 5.0, 10.0, 11.0, 4.0, 6.0, 5.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 1.0], "bins": [-8.203125, -7.95782470703125, -7.7125244140625, -7.46722412109375, -7.221923828125, -6.97662353515625, -6.7313232421875, -6.48602294921875, -6.24072265625, -5.99542236328125, -5.7501220703125, -5.50482177734375, -5.259521484375, -5.01422119140625, -4.7689208984375, -4.52362060546875, -4.2783203125, -4.03302001953125, -3.7877197265625, -3.54241943359375, -3.297119140625, -3.05181884765625, -2.8065185546875, -2.56121826171875, -2.31591796875, -2.07061767578125, -1.8253173828125, -1.58001708984375, -1.334716796875, -1.08941650390625, -0.8441162109375, -0.59881591796875, -0.353515625, -0.10821533203125, 0.1370849609375, 0.38238525390625, 0.627685546875, 0.87298583984375, 1.1182861328125, 1.36358642578125, 1.60888671875, 1.85418701171875, 2.0994873046875, 2.34478759765625, 2.590087890625, 2.83538818359375, 3.0806884765625, 3.32598876953125, 3.5712890625, 3.81658935546875, 4.0618896484375, 4.30718994140625, 4.552490234375, 4.79779052734375, 5.0430908203125, 5.28839111328125, 5.53369140625, 5.77899169921875, 6.0242919921875, 6.26959228515625, 6.514892578125, 6.76019287109375, 7.0054931640625, 7.25079345703125, 7.49609375]}, "gradients/decoder.transformer.h.23.mlp.c_fc.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 3.0, 3.0, 6.0, 14.0, 4.0, 8.0, 6.0, 18.0, 8.0, 18.0, 19.0, 35.0, 42.0, 49.0, 43.0, 77.0, 125.0, 160.0, 242.0, 398.0, 856.0, 2430.0, 18785.0, 2089225.0, 2058486.0, 18716.0, 2206.0, 838.0, 412.0, 300.0, 151.0, 121.0, 109.0, 77.0, 57.0, 53.0, 37.0, 32.0, 22.0, 23.0, 19.0, 18.0, 13.0, 8.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-21.296875, -20.60986328125, -19.9228515625, -19.23583984375, -18.548828125, -17.86181640625, -17.1748046875, -16.48779296875, -15.80078125, -15.11376953125, -14.4267578125, -13.73974609375, -13.052734375, -12.36572265625, -11.6787109375, -10.99169921875, -10.3046875, -9.61767578125, -8.9306640625, -8.24365234375, -7.556640625, -6.86962890625, -6.1826171875, -5.49560546875, -4.80859375, -4.12158203125, -3.4345703125, -2.74755859375, -2.060546875, -1.37353515625, -0.6865234375, 0.00048828125, 0.6875, 1.37451171875, 2.0615234375, 2.74853515625, 3.435546875, 4.12255859375, 4.8095703125, 5.49658203125, 6.18359375, 6.87060546875, 7.5576171875, 8.24462890625, 8.931640625, 9.61865234375, 10.3056640625, 10.99267578125, 11.6796875, 12.36669921875, 13.0537109375, 13.74072265625, 14.427734375, 15.11474609375, 15.8017578125, 16.48876953125, 17.17578125, 17.86279296875, 18.5498046875, 19.23681640625, 19.923828125, 20.61083984375, 21.2978515625, 21.98486328125, 22.671875]}, "gradients/decoder.transformer.h.23.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 774.0, 234.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-611.0415649414062, -599.2423706054688, -587.443115234375, -575.6439208984375, -563.8447265625, -552.0454711914062, -540.2462768554688, -528.447021484375, -516.6478271484375, -504.8486022949219, -493.04937744140625, -481.25018310546875, -469.4509582519531, -457.6517333984375, -445.8525390625, -434.0533142089844, -422.25408935546875, -410.4548645019531, -398.6556396484375, -386.8564453125, -375.0572204589844, -363.25799560546875, -351.45880126953125, -339.6595764160156, -327.8603515625, -316.0611267089844, -304.26190185546875, -292.46270751953125, -280.6634826660156, -268.8642578125, -257.0650634765625, -245.26583862304688, -233.46664428710938, -221.66741943359375, -209.8682098388672, -198.06900024414062, -186.269775390625, -174.47055053710938, -162.6713409423828, -150.87213134765625, -139.07290649414062, -127.27368927001953, -115.47447204589844, -103.67525482177734, -91.87603759765625, -80.07682037353516, -68.27760314941406, -56.47838592529297, -44.679168701171875, -32.87995147705078, -21.080734252929688, -9.281517028808594, 2.5177001953125, 14.316917419433594, 26.116134643554688, 37.91535186767578, 49.714569091796875, 61.51378631591797, 73.31300354003906, 85.11222076416016, 96.91143798828125, 108.71065521240234, 120.50987243652344, 132.30908203125, 144.10830688476562]}, "gradients/decoder.transformer.h.23.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 15.0, 9.0, 27.0, 19.0, 23.0, 29.0, 38.0, 44.0, 41.0, 64.0, 80.0, 85.0, 66.0, 66.0, 60.0, 57.0, 48.0, 51.0, 46.0, 35.0, 20.0, 23.0, 15.0, 14.0, 6.0, 11.0, 6.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-94.66757202148438, -92.1966781616211, -89.72578430175781, -87.25489044189453, -84.78399658203125, -82.31309509277344, -79.84220123291016, -77.37130737304688, -74.9004135131836, -72.42951965332031, -69.95862579345703, -67.48773193359375, -65.01683044433594, -62.54594039916992, -60.075042724609375, -57.604148864746094, -55.13325500488281, -52.66236114501953, -50.19146728515625, -47.7205696105957, -45.24967575073242, -42.77878189086914, -40.307884216308594, -37.83699035644531, -35.36609649658203, -32.89520263671875, -30.424306869506836, -27.953411102294922, -25.48251724243164, -23.01162338256836, -20.540727615356445, -18.06983184814453, -15.59893798828125, -13.128043174743652, -10.657148361206055, -8.186253547668457, -5.715358734130859, -3.2444639205932617, -0.7735691070556641, 1.69732666015625, 4.168220520019531, 6.639115333557129, 9.110010147094727, 11.580904960632324, 14.051799774169922, 16.522693634033203, 18.993589401245117, 21.46448516845703, 23.935379028320312, 26.406272888183594, 28.877168655395508, 31.348064422607422, 33.8189582824707, 36.289852142333984, 38.76074981689453, 41.23164367675781, 43.702537536621094, 46.173431396484375, 48.644325256347656, 51.1152229309082, 53.586116790771484, 56.057010650634766, 58.52790832519531, 60.998802185058594, 63.469696044921875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 0.0, 2.0, 4.0, 2.0, 8.0, 6.0, 11.0, 14.0, 7.0, 11.0, 10.0, 27.0, 16.0, 18.0, 23.0, 31.0, 26.0, 36.0, 49.0, 42.0, 48.0, 53.0, 50.0, 30.0, 42.0, 51.0, 44.0, 36.0, 36.0, 34.0, 36.0, 32.0, 29.0, 26.0, 20.0, 26.0, 15.0, 13.0, 6.0, 12.0, 3.0, 4.0, 6.0, 6.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.880859375, -1.8246917724609375, -1.768524169921875, -1.7123565673828125, -1.65618896484375, -1.6000213623046875, -1.543853759765625, -1.4876861572265625, -1.4315185546875, -1.3753509521484375, -1.319183349609375, -1.2630157470703125, -1.20684814453125, -1.1506805419921875, -1.094512939453125, -1.0383453369140625, -0.982177734375, -0.9260101318359375, -0.869842529296875, -0.8136749267578125, -0.75750732421875, -0.7013397216796875, -0.645172119140625, -0.5890045166015625, -0.5328369140625, -0.4766693115234375, -0.420501708984375, -0.3643341064453125, -0.30816650390625, -0.2519989013671875, -0.195831298828125, -0.1396636962890625, -0.08349609375, -0.0273284912109375, 0.028839111328125, 0.0850067138671875, 0.14117431640625, 0.1973419189453125, 0.253509521484375, 0.3096771240234375, 0.3658447265625, 0.4220123291015625, 0.478179931640625, 0.5343475341796875, 0.59051513671875, 0.6466827392578125, 0.702850341796875, 0.7590179443359375, 0.815185546875, 0.8713531494140625, 0.927520751953125, 0.9836883544921875, 1.03985595703125, 1.0960235595703125, 1.152191162109375, 1.2083587646484375, 1.2645263671875, 1.3206939697265625, 1.376861572265625, 1.4330291748046875, 1.48919677734375, 1.5453643798828125, 1.601531982421875, 1.6576995849609375, 1.7138671875]}, "gradients/decoder.transformer.h.23.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 6.0, 3.0, 6.0, 14.0, 8.0, 9.0, 11.0, 21.0, 56.0, 56.0, 124.0, 107.0, 171.0, 237.0, 418.0, 546.0, 799.0, 1235.0, 1715.0, 2656.0, 3929.0, 6039.0, 9382.0, 14310.0, 23200.0, 39722.0, 73092.0, 155186.0, 358273.0, 169239.0, 77829.0, 42192.0, 24402.0, 15221.0, 9451.0, 6331.0, 4116.0, 2654.0, 1869.0, 1286.0, 784.0, 598.0, 327.0, 298.0, 207.0, 129.0, 102.0, 53.0, 49.0, 27.0, 20.0, 24.0, 4.0, 8.0, 7.0, 4.0, 4.0, 5.0, 0.0, 1.0, 1.0], "bins": [-0.0489501953125, -0.04739856719970703, -0.04584693908691406, -0.044295310974121094, -0.042743682861328125, -0.041192054748535156, -0.03964042663574219, -0.03808879852294922, -0.03653717041015625, -0.03498554229736328, -0.03343391418457031, -0.031882286071777344, -0.030330657958984375, -0.028779029846191406, -0.027227401733398438, -0.02567577362060547, -0.0241241455078125, -0.02257251739501953, -0.021020889282226562, -0.019469261169433594, -0.017917633056640625, -0.016366004943847656, -0.014814376831054688, -0.013262748718261719, -0.01171112060546875, -0.010159492492675781, -0.008607864379882812, -0.007056236267089844, -0.005504608154296875, -0.003952980041503906, -0.0024013519287109375, -0.0008497238159179688, 0.000701904296875, 0.0022535324096679688, 0.0038051605224609375, 0.005356788635253906, 0.006908416748046875, 0.008460044860839844, 0.010011672973632812, 0.011563301086425781, 0.01311492919921875, 0.014666557312011719, 0.016218185424804688, 0.017769813537597656, 0.019321441650390625, 0.020873069763183594, 0.022424697875976562, 0.02397632598876953, 0.0255279541015625, 0.02707958221435547, 0.028631210327148438, 0.030182838439941406, 0.031734466552734375, 0.033286094665527344, 0.03483772277832031, 0.03638935089111328, 0.03794097900390625, 0.03949260711669922, 0.04104423522949219, 0.042595863342285156, 0.044147491455078125, 0.045699119567871094, 0.04725074768066406, 0.04880237579345703, 0.05035400390625]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 3.0, 7.0, 6.0, 10.0, 10.0, 10.0, 12.0, 14.0, 23.0, 16.0, 17.0, 22.0, 28.0, 27.0, 39.0, 27.0, 45.0, 36.0, 30.0, 44.0, 37.0, 1070.0, 43.0, 58.0, 23.0, 35.0, 33.0, 41.0, 31.0, 33.0, 29.0, 26.0, 22.0, 27.0, 17.0, 9.0, 19.0, 11.0, 10.0, 7.0, 10.0, 3.0, 3.0, 6.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.068359375, -1.0326690673828125, -0.996978759765625, -0.9612884521484375, -0.92559814453125, -0.8899078369140625, -0.854217529296875, -0.8185272216796875, -0.7828369140625, -0.7471466064453125, -0.711456298828125, -0.6757659912109375, -0.64007568359375, -0.6043853759765625, -0.568695068359375, -0.5330047607421875, -0.497314453125, -0.4616241455078125, -0.425933837890625, -0.3902435302734375, -0.35455322265625, -0.3188629150390625, -0.283172607421875, -0.2474822998046875, -0.2117919921875, -0.1761016845703125, -0.140411376953125, -0.1047210693359375, -0.06903076171875, -0.0333404541015625, 0.002349853515625, 0.0380401611328125, 0.07373046875, 0.1094207763671875, 0.145111083984375, 0.1808013916015625, 0.21649169921875, 0.2521820068359375, 0.287872314453125, 0.3235626220703125, 0.3592529296875, 0.3949432373046875, 0.430633544921875, 0.4663238525390625, 0.50201416015625, 0.5377044677734375, 0.573394775390625, 0.6090850830078125, 0.644775390625, 0.6804656982421875, 0.716156005859375, 0.7518463134765625, 0.78753662109375, 0.8232269287109375, 0.858917236328125, 0.8946075439453125, 0.9302978515625, 0.9659881591796875, 1.001678466796875, 1.0373687744140625, 1.07305908203125, 1.1087493896484375, 1.144439697265625, 1.1801300048828125, 1.2158203125]}, "gradients/decoder.transformer.h.23.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 1.0, 4.0, 0.0, 3.0, 6.0, 9.0, 21.0, 30.0, 43.0, 69.0, 71.0, 111.0, 179.0, 281.0, 458.0, 661.0, 1076.0, 1664.0, 2668.0, 4202.0, 7084.0, 11403.0, 18972.0, 32643.0, 58309.0, 110419.0, 1339151.0, 266911.0, 105567.0, 56194.0, 31252.0, 18367.0, 11235.0, 6724.0, 4240.0, 2614.0, 1608.0, 1039.0, 659.0, 397.0, 279.0, 183.0, 109.0, 87.0, 43.0, 30.0, 15.0, 14.0, 14.0, 8.0, 10.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0469970703125, -0.04542875289916992, -0.043860435485839844, -0.042292118072509766, -0.04072380065917969, -0.03915548324584961, -0.03758716583251953, -0.03601884841918945, -0.034450531005859375, -0.0328822135925293, -0.03131389617919922, -0.02974557876586914, -0.028177261352539062, -0.026608943939208984, -0.025040626525878906, -0.023472309112548828, -0.02190399169921875, -0.020335674285888672, -0.018767356872558594, -0.017199039459228516, -0.015630722045898438, -0.01406240463256836, -0.012494087219238281, -0.010925769805908203, -0.009357452392578125, -0.007789134979248047, -0.006220817565917969, -0.004652500152587891, -0.0030841827392578125, -0.0015158653259277344, 5.245208740234375e-05, 0.0016207695007324219, 0.0031890869140625, 0.004757404327392578, 0.006325721740722656, 0.007894039154052734, 0.009462356567382812, 0.01103067398071289, 0.012598991394042969, 0.014167308807373047, 0.015735626220703125, 0.017303943634033203, 0.01887226104736328, 0.02044057846069336, 0.022008895874023438, 0.023577213287353516, 0.025145530700683594, 0.026713848114013672, 0.02828216552734375, 0.029850482940673828, 0.031418800354003906, 0.032987117767333984, 0.03455543518066406, 0.03612375259399414, 0.03769207000732422, 0.0392603874206543, 0.040828704833984375, 0.04239702224731445, 0.04396533966064453, 0.04553365707397461, 0.04710197448730469, 0.048670291900634766, 0.050238609313964844, 0.05180692672729492, 0.053375244140625]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 6.0, 3.0, 4.0, 4.0, 13.0, 6.0, 9.0, 8.0, 6.0, 18.0, 13.0, 7.0, 9.0, 28.0, 18.0, 16.0, 26.0, 63.0, 41.0, 25.0, 37.0, 73.0, 29.0, 34.0, 37.0, 41.0, 91.0, 42.0, 37.0, 36.0, 41.0, 25.0, 27.0, 16.0, 34.0, 7.0, 9.0, 12.0, 17.0, 8.0, 3.0, 5.0, 3.0, 7.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3245811462402344e-06, -2.2510066628456116e-06, -2.1774321794509888e-06, -2.103857696056366e-06, -2.030283212661743e-06, -1.9567087292671204e-06, -1.8831342458724976e-06, -1.8095597624778748e-06, -1.735985279083252e-06, -1.6624107956886292e-06, -1.5888363122940063e-06, -1.5152618288993835e-06, -1.4416873455047607e-06, -1.368112862110138e-06, -1.2945383787155151e-06, -1.2209638953208923e-06, -1.1473894119262695e-06, -1.0738149285316467e-06, -1.000240445137024e-06, -9.266659617424011e-07, -8.530914783477783e-07, -7.795169949531555e-07, -7.059425115585327e-07, -6.323680281639099e-07, -5.587935447692871e-07, -4.852190613746643e-07, -4.116445779800415e-07, -3.380700945854187e-07, -2.644956111907959e-07, -1.909211277961731e-07, -1.1734664440155029e-07, -4.377216100692749e-08, 2.9802322387695312e-08, 1.0337680578231812e-07, 1.7695128917694092e-07, 2.505257725715637e-07, 3.241002559661865e-07, 3.976747393608093e-07, 4.7124922275543213e-07, 5.448237061500549e-07, 6.183981895446777e-07, 6.919726729393005e-07, 7.655471563339233e-07, 8.391216397285461e-07, 9.126961231231689e-07, 9.862706065177917e-07, 1.0598450899124146e-06, 1.1334195733070374e-06, 1.2069940567016602e-06, 1.280568540096283e-06, 1.3541430234909058e-06, 1.4277175068855286e-06, 1.5012919902801514e-06, 1.5748664736747742e-06, 1.648440957069397e-06, 1.7220154404640198e-06, 1.7955899238586426e-06, 1.8691644072532654e-06, 1.942738890647888e-06, 2.016313374042511e-06, 2.089887857437134e-06, 2.1634623408317566e-06, 2.2370368242263794e-06, 2.310611307621002e-06, 2.384185791015625e-06]}, "gradients/decoder.transformer.h.23.crossattention.q_attn.weight": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 5.0, 3.0, 12.0, 14.0, 14.0, 7.0, 20.0, 24.0, 21.0, 30.0, 30.0, 44.0, 46.0, 44.0, 59.0, 67.0, 99.0, 127.0, 133.0, 167.0, 188.0, 265.0, 514.0, 2637.0, 1034328.0, 7420.0, 695.0, 275.0, 237.0, 186.0, 157.0, 114.0, 95.0, 91.0, 50.0, 58.0, 60.0, 50.0, 24.0, 24.0, 18.0, 14.0, 23.0, 15.0, 12.0, 6.0, 11.0, 4.0, 2.0, 3.0, 5.0, 5.0, 2.0, 2.0, 0.0, 1.0, 3.0], "bins": [-4.291534423828125e-05, -4.155654460191727e-05, -4.0197744965553284e-05, -3.88389453291893e-05, -3.748014569282532e-05, -3.6121346056461334e-05, -3.476254642009735e-05, -3.340374678373337e-05, -3.2044947147369385e-05, -3.06861475110054e-05, -2.932734787464142e-05, -2.7968548238277435e-05, -2.6609748601913452e-05, -2.525094896554947e-05, -2.3892149329185486e-05, -2.2533349692821503e-05, -2.117455005645752e-05, -1.9815750420093536e-05, -1.8456950783729553e-05, -1.709815114736557e-05, -1.5739351511001587e-05, -1.4380551874637604e-05, -1.302175223827362e-05, -1.1662952601909637e-05, -1.0304152965545654e-05, -8.945353329181671e-06, -7.586553692817688e-06, -6.227754056453705e-06, -4.868954420089722e-06, -3.5101547837257385e-06, -2.1513551473617554e-06, -7.925555109977722e-07, 5.662441253662109e-07, 1.925043761730194e-06, 3.2838433980941772e-06, 4.64264303445816e-06, 6.0014426708221436e-06, 7.360242307186127e-06, 8.71904194355011e-06, 1.0077841579914093e-05, 1.1436641216278076e-05, 1.279544085264206e-05, 1.4154240489006042e-05, 1.5513040125370026e-05, 1.687183976173401e-05, 1.8230639398097992e-05, 1.9589439034461975e-05, 2.0948238670825958e-05, 2.230703830718994e-05, 2.3665837943553925e-05, 2.5024637579917908e-05, 2.638343721628189e-05, 2.7742236852645874e-05, 2.9101036489009857e-05, 3.045983612537384e-05, 3.1818635761737823e-05, 3.317743539810181e-05, 3.453623503446579e-05, 3.589503467082977e-05, 3.7253834307193756e-05, 3.861263394355774e-05, 3.997143357992172e-05, 4.1330233216285706e-05, 4.268903285264969e-05, 4.404783248901367e-05]}, "gradients/decoder.transformer.h.23.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 16.0, 70.0, 306.0, 400.0, 187.0, 32.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.961140348314075e-06, -4.8410579438495915e-06, -4.720975084637757e-06, -4.600892680173274e-06, -4.480810275708791e-06, -4.360727416496957e-06, -4.2406450120324735e-06, -4.120562152820639e-06, -4.000479748356156e-06, -3.880397343891673e-06, -3.7603144846798386e-06, -3.6402320802153554e-06, -3.5201494483771967e-06, -3.400066816539038e-06, -3.2799844120745547e-06, -3.159901780236396e-06, -3.0398191483982373e-06, -2.9197365165600786e-06, -2.79965388472192e-06, -2.6795714802574366e-06, -2.559488848419278e-06, -2.4394062165811192e-06, -2.319323812116636e-06, -2.1992411802784773e-06, -2.0791585484403186e-06, -1.95907591660216e-06, -1.8389933984508389e-06, -1.7189108802995179e-06, -1.5988282484613592e-06, -1.4787456166232005e-06, -1.3586630984718795e-06, -1.2385805803205585e-06, -1.118497493735049e-06, -9.984148618968902e-07, -8.783323437455692e-07, -7.582497687508294e-07, -6.381671937560895e-07, -5.180846187613497e-07, -3.9800204376660986e-07, -2.7791946877187e-07, -1.5783689377713017e-07, -3.775431878239033e-08, 8.232825621234952e-08, 2.0241083120708936e-07, 3.224934062018292e-07, 4.4257598119656905e-07, 5.626585561913089e-07, 6.827411311860487e-07, 8.028237061807886e-07, 9.229062811755284e-07, 1.0429888561702683e-06, 1.1630713743215892e-06, 1.283154006159748e-06, 1.4032366379979067e-06, 1.5233191561492276e-06, 1.6434016743005486e-06, 1.7634843061387073e-06, 1.883566937976866e-06, 2.0036495698150247e-06, 2.123731974279508e-06, 2.2438146061176667e-06, 2.3638972379558254e-06, 2.4839796424203087e-06, 2.6040622742584674e-06, 2.724144906096626e-06]}, "gradients/decoder.transformer.h.23.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 5.0, 7.0, 4.0, 0.0, 7.0, 11.0, 12.0, 17.0, 18.0, 19.0, 30.0, 0.0, 34.0, 40.0, 48.0, 41.0, 47.0, 45.0, 58.0, 0.0, 44.0, 40.0, 54.0, 43.0, 43.0, 54.0, 46.0, 0.0, 47.0, 29.0, 18.0, 23.0, 25.0, 25.0, 18.0, 0.0, 13.0, 9.0, 10.0, 8.0, 6.0, 4.0, 2.0, 0.0, 6.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7285346984863281e-06, -1.6763806343078613e-06, -1.6242265701293945e-06, -1.5720725059509277e-06, -1.519918441772461e-06, -1.4677643775939941e-06, -1.4156103134155273e-06, -1.3634562492370605e-06, -1.3113021850585938e-06, -1.259148120880127e-06, -1.2069940567016602e-06, -1.1548399925231934e-06, -1.1026859283447266e-06, -1.0505318641662598e-06, -9.98377799987793e-07, -9.462237358093262e-07, -8.940696716308594e-07, -8.419156074523926e-07, -7.897615432739258e-07, -7.37607479095459e-07, -6.854534149169922e-07, -6.332993507385254e-07, -5.811452865600586e-07, -5.289912223815918e-07, -4.76837158203125e-07, -4.246830940246582e-07, -3.725290298461914e-07, -3.203749656677246e-07, -2.682209014892578e-07, -2.1606683731079102e-07, -1.6391277313232422e-07, -1.1175870895385742e-07, -5.960464477539063e-08, -7.450580596923828e-09, 4.470348358154297e-08, 9.685754776000977e-08, 1.4901161193847656e-07, 2.0116567611694336e-07, 2.5331974029541016e-07, 3.0547380447387695e-07, 3.5762786865234375e-07, 4.0978193283081055e-07, 4.6193599700927734e-07, 5.140900611877441e-07, 5.662441253662109e-07, 6.183981895446777e-07, 6.705522537231445e-07, 7.227063179016113e-07, 7.748603820800781e-07, 8.270144462585449e-07, 8.791685104370117e-07, 9.313225746154785e-07, 9.834766387939453e-07, 1.0356307029724121e-06, 1.087784767150879e-06, 1.1399388313293457e-06, 1.1920928955078125e-06, 1.2442469596862793e-06, 1.296401023864746e-06, 1.3485550880432129e-06, 1.4007091522216797e-06, 1.4528632164001465e-06, 1.5050172805786133e-06, 1.55717134475708e-06, 1.6093254089355469e-06]}, "gradients/decoder.transformer.h.23.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 0.0, 2.0, 4.0, 2.0, 8.0, 6.0, 11.0, 14.0, 7.0, 11.0, 10.0, 27.0, 16.0, 18.0, 23.0, 31.0, 26.0, 36.0, 49.0, 42.0, 48.0, 53.0, 50.0, 30.0, 42.0, 51.0, 44.0, 36.0, 36.0, 34.0, 36.0, 32.0, 29.0, 26.0, 20.0, 26.0, 15.0, 13.0, 6.0, 12.0, 3.0, 4.0, 6.0, 6.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.880859375, -1.8246917724609375, -1.768524169921875, -1.7123565673828125, -1.65618896484375, -1.6000213623046875, -1.543853759765625, -1.4876861572265625, -1.4315185546875, -1.3753509521484375, -1.319183349609375, -1.2630157470703125, -1.20684814453125, -1.1506805419921875, -1.094512939453125, -1.0383453369140625, -0.982177734375, -0.9260101318359375, -0.869842529296875, -0.8136749267578125, -0.75750732421875, -0.7013397216796875, -0.645172119140625, -0.5890045166015625, -0.5328369140625, -0.4766693115234375, -0.420501708984375, -0.3643341064453125, -0.30816650390625, -0.2519989013671875, -0.195831298828125, -0.1396636962890625, -0.08349609375, -0.0273284912109375, 0.028839111328125, 0.0850067138671875, 0.14117431640625, 0.1973419189453125, 0.253509521484375, 0.3096771240234375, 0.3658447265625, 0.4220123291015625, 0.478179931640625, 0.5343475341796875, 0.59051513671875, 0.6466827392578125, 0.702850341796875, 0.7590179443359375, 0.815185546875, 0.8713531494140625, 0.927520751953125, 0.9836883544921875, 1.03985595703125, 1.0960235595703125, 1.152191162109375, 1.2083587646484375, 1.2645263671875, 1.3206939697265625, 1.376861572265625, 1.4330291748046875, 1.48919677734375, 1.5453643798828125, 1.601531982421875, 1.6576995849609375, 1.7138671875]}, "gradients/decoder.transformer.h.23.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 7.0, 3.0, 3.0, 8.0, 16.0, 22.0, 33.0, 46.0, 60.0, 98.0, 139.0, 207.0, 348.0, 537.0, 844.0, 1373.0, 2328.0, 4462.0, 8921.0, 22315.0, 79123.0, 636837.0, 221714.0, 40435.0, 14082.0, 6385.0, 3275.0, 1877.0, 1122.0, 675.0, 426.0, 258.0, 182.0, 121.0, 97.0, 46.0, 44.0, 24.0, 21.0, 16.0, 5.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0], "bins": [-10.0625, -9.7457275390625, -9.428955078125, -9.1121826171875, -8.79541015625, -8.4786376953125, -8.161865234375, -7.8450927734375, -7.5283203125, -7.2115478515625, -6.894775390625, -6.5780029296875, -6.26123046875, -5.9444580078125, -5.627685546875, -5.3109130859375, -4.994140625, -4.6773681640625, -4.360595703125, -4.0438232421875, -3.72705078125, -3.4102783203125, -3.093505859375, -2.7767333984375, -2.4599609375, -2.1431884765625, -1.826416015625, -1.5096435546875, -1.19287109375, -0.8760986328125, -0.559326171875, -0.2425537109375, 0.07421875, 0.3909912109375, 0.707763671875, 1.0245361328125, 1.34130859375, 1.6580810546875, 1.974853515625, 2.2916259765625, 2.6083984375, 2.9251708984375, 3.241943359375, 3.5587158203125, 3.87548828125, 4.1922607421875, 4.509033203125, 4.8258056640625, 5.142578125, 5.4593505859375, 5.776123046875, 6.0928955078125, 6.40966796875, 6.7264404296875, 7.043212890625, 7.3599853515625, 7.6767578125, 7.9935302734375, 8.310302734375, 8.6270751953125, 8.94384765625, 9.2606201171875, 9.577392578125, 9.8941650390625, 10.2109375]}, "gradients/decoder.transformer.h.23.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 4.0, 3.0, 6.0, 7.0, 6.0, 10.0, 11.0, 8.0, 16.0, 14.0, 13.0, 26.0, 25.0, 31.0, 31.0, 43.0, 40.0, 44.0, 54.0, 66.0, 156.0, 1664.0, 239.0, 87.0, 54.0, 54.0, 61.0, 52.0, 38.0, 35.0, 25.0, 24.0, 17.0, 18.0, 17.0, 10.0, 8.0, 10.0, 6.0, 6.0, 4.0, 1.0, 1.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.375, -7.16827392578125, -6.9615478515625, -6.75482177734375, -6.548095703125, -6.34136962890625, -6.1346435546875, -5.92791748046875, -5.72119140625, -5.51446533203125, -5.3077392578125, -5.10101318359375, -4.894287109375, -4.68756103515625, -4.4808349609375, -4.27410888671875, -4.0673828125, -3.86065673828125, -3.6539306640625, -3.44720458984375, -3.240478515625, -3.03375244140625, -2.8270263671875, -2.62030029296875, -2.41357421875, -2.20684814453125, -2.0001220703125, -1.79339599609375, -1.586669921875, -1.37994384765625, -1.1732177734375, -0.96649169921875, -0.759765625, -0.55303955078125, -0.3463134765625, -0.13958740234375, 0.067138671875, 0.27386474609375, 0.4805908203125, 0.68731689453125, 0.89404296875, 1.10076904296875, 1.3074951171875, 1.51422119140625, 1.720947265625, 1.92767333984375, 2.1343994140625, 2.34112548828125, 2.5478515625, 2.75457763671875, 2.9613037109375, 3.16802978515625, 3.374755859375, 3.58148193359375, 3.7882080078125, 3.99493408203125, 4.20166015625, 4.40838623046875, 4.6151123046875, 4.82183837890625, 5.028564453125, 5.23529052734375, 5.4420166015625, 5.64874267578125, 5.85546875]}, "gradients/decoder.transformer.h.23.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 5.0, 4.0, 8.0, 5.0, 9.0, 12.0, 14.0, 15.0, 15.0, 30.0, 21.0, 24.0, 43.0, 48.0, 96.0, 143.0, 328.0, 1460.0, 54033.0, 3080473.0, 7536.0, 692.0, 247.0, 110.0, 69.0, 56.0, 42.0, 28.0, 24.0, 25.0, 21.0, 5.0, 11.0, 10.0, 10.0, 8.0, 3.0, 7.0, 1.0, 7.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-24.65625, -23.71923828125, -22.7822265625, -21.84521484375, -20.908203125, -19.97119140625, -19.0341796875, -18.09716796875, -17.16015625, -16.22314453125, -15.2861328125, -14.34912109375, -13.412109375, -12.47509765625, -11.5380859375, -10.60107421875, -9.6640625, -8.72705078125, -7.7900390625, -6.85302734375, -5.916015625, -4.97900390625, -4.0419921875, -3.10498046875, -2.16796875, -1.23095703125, -0.2939453125, 0.64306640625, 1.580078125, 2.51708984375, 3.4541015625, 4.39111328125, 5.328125, 6.26513671875, 7.2021484375, 8.13916015625, 9.076171875, 10.01318359375, 10.9501953125, 11.88720703125, 12.82421875, 13.76123046875, 14.6982421875, 15.63525390625, 16.572265625, 17.50927734375, 18.4462890625, 19.38330078125, 20.3203125, 21.25732421875, 22.1943359375, 23.13134765625, 24.068359375, 25.00537109375, 25.9423828125, 26.87939453125, 27.81640625, 28.75341796875, 29.6904296875, 30.62744140625, 31.564453125, 32.50146484375, 33.4384765625, 34.37548828125, 35.3125]}, "gradients/decoder.transformer.h.23.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 52.0, 602.0, 310.0, 36.0, 8.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.592519760131836, -17.75809097290039, -15.923664093017578, -14.089235305786133, -12.254807472229004, -10.420379638671875, -8.58595085144043, -6.751523017883301, -4.917095184326172, -3.082667112350464, -1.2482390403747559, 0.5861892700195312, 2.42061710357666, 4.255044937133789, 6.089473724365234, 7.923901557922363, 9.758329391479492, 11.592757225036621, 13.42718505859375, 15.261613845825195, 17.09604263305664, 18.930469512939453, 20.7648983001709, 22.599327087402344, 24.433753967285156, 26.2681827545166, 28.102609634399414, 29.93703842163086, 31.771465301513672, 33.60589599609375, 35.44032287597656, 37.274749755859375, 39.10917663574219, 40.943603515625, 42.77803421020508, 44.61246109008789, 46.4468879699707, 48.28131866455078, 50.115745544433594, 51.950172424316406, 53.78459930419922, 55.61902618408203, 57.45345687866211, 59.28788375854492, 61.122310638427734, 62.95674133300781, 64.79116821289062, 66.62559509277344, 68.46002197265625, 70.29444885253906, 72.12887573242188, 73.96330261230469, 75.79773712158203, 77.63216400146484, 79.46659088134766, 81.30101776123047, 83.13545227050781, 84.96987915039062, 86.80430603027344, 88.63873291015625, 90.4731674194336, 92.3075942993164, 94.14202117919922, 95.97644805908203, 97.81087493896484]}, "gradients/decoder.transformer.h.23.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 6.0, 12.0, 6.0, 8.0, 13.0, 13.0, 21.0, 14.0, 17.0, 28.0, 20.0, 43.0, 33.0, 32.0, 27.0, 35.0, 34.0, 49.0, 39.0, 32.0, 43.0, 29.0, 38.0, 41.0, 35.0, 30.0, 36.0, 26.0, 33.0, 15.0, 25.0, 27.0, 32.0, 17.0, 20.0, 6.0, 13.0, 11.0, 11.0, 1.0, 8.0, 4.0, 1.0, 3.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-20.18435287475586, -19.525632858276367, -18.866910934448242, -18.20819091796875, -17.549468994140625, -16.890748977661133, -16.23202896118164, -15.573307037353516, -14.914587020874023, -14.255866050720215, -13.597145080566406, -12.938425064086914, -12.279704093933105, -11.620983123779297, -10.962262153625488, -10.30354118347168, -9.644820213317871, -8.986099243164062, -8.327378273010254, -7.6686577796936035, -7.009937286376953, -6.3512163162231445, -5.692495346069336, -5.0337748527526855, -4.375053882598877, -3.7163331508636475, -3.057612419128418, -2.3988914489746094, -1.7401707172393799, -1.0814499855041504, -0.4227290153503418, 0.2359914779663086, 0.8947124481201172, 1.5534331798553467, 2.212153911590576, 2.8708748817443848, 3.5295956134796143, 4.188316345214844, 4.847037315368652, 5.505757808685303, 6.164478778839111, 6.82319974899292, 7.48192024230957, 8.140641212463379, 8.799362182617188, 9.45808219909668, 10.116804122924805, 10.775524139404297, 11.434245109558105, 12.092966079711914, 12.751687049865723, 13.410408020019531, 14.069128036499023, 14.727849006652832, 15.38656997680664, 16.045289993286133, 16.704011917114258, 17.36273193359375, 18.021453857421875, 18.680173873901367, 19.338895797729492, 19.997615814208984, 20.65633773803711, 21.3150577545166, 21.973777770996094]}, "gradients/decoder.transformer.h.22.mlp.c_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 6.0, 7.0, 5.0, 5.0, 10.0, 16.0, 7.0, 12.0, 19.0, 16.0, 17.0, 22.0, 25.0, 33.0, 26.0, 33.0, 48.0, 38.0, 41.0, 49.0, 51.0, 42.0, 42.0, 39.0, 50.0, 32.0, 30.0, 27.0, 37.0, 39.0, 31.0, 24.0, 28.0, 16.0, 17.0, 12.0, 10.0, 8.0, 6.0, 11.0, 4.0, 5.0, 3.0, 2.0, 5.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.826171875, -1.768096923828125, -1.71002197265625, -1.651947021484375, -1.5938720703125, -1.535797119140625, -1.47772216796875, -1.419647216796875, -1.361572265625, -1.303497314453125, -1.24542236328125, -1.187347412109375, -1.1292724609375, -1.071197509765625, -1.01312255859375, -0.955047607421875, -0.89697265625, -0.838897705078125, -0.78082275390625, -0.722747802734375, -0.6646728515625, -0.606597900390625, -0.54852294921875, -0.490447998046875, -0.432373046875, -0.374298095703125, -0.31622314453125, -0.258148193359375, -0.2000732421875, -0.141998291015625, -0.08392333984375, -0.025848388671875, 0.0322265625, 0.090301513671875, 0.14837646484375, 0.206451416015625, 0.2645263671875, 0.322601318359375, 0.38067626953125, 0.438751220703125, 0.496826171875, 0.554901123046875, 0.61297607421875, 0.671051025390625, 0.7291259765625, 0.787200927734375, 0.84527587890625, 0.903350830078125, 0.96142578125, 1.019500732421875, 1.07757568359375, 1.135650634765625, 1.1937255859375, 1.251800537109375, 1.30987548828125, 1.367950439453125, 1.426025390625, 1.484100341796875, 1.54217529296875, 1.600250244140625, 1.6583251953125, 1.716400146484375, 1.77447509765625, 1.832550048828125, 1.890625]}, "gradients/decoder.transformer.h.22.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 5.0, 7.0, 4.0, 7.0, 13.0, 16.0, 13.0, 29.0, 26.0, 39.0, 67.0, 76.0, 115.0, 176.0, 304.0, 472.0, 906.0, 2235.0, 12830.0, 1026380.0, 3124798.0, 20420.0, 2848.0, 1005.0, 553.0, 303.0, 200.0, 115.0, 96.0, 68.0, 46.0, 28.0, 25.0, 19.0, 12.0, 9.0, 8.0, 4.0, 3.0, 6.0, 4.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.59375, -16.06005859375, -15.5263671875, -14.99267578125, -14.458984375, -13.92529296875, -13.3916015625, -12.85791015625, -12.32421875, -11.79052734375, -11.2568359375, -10.72314453125, -10.189453125, -9.65576171875, -9.1220703125, -8.58837890625, -8.0546875, -7.52099609375, -6.9873046875, -6.45361328125, -5.919921875, -5.38623046875, -4.8525390625, -4.31884765625, -3.78515625, -3.25146484375, -2.7177734375, -2.18408203125, -1.650390625, -1.11669921875, -0.5830078125, -0.04931640625, 0.484375, 1.01806640625, 1.5517578125, 2.08544921875, 2.619140625, 3.15283203125, 3.6865234375, 4.22021484375, 4.75390625, 5.28759765625, 5.8212890625, 6.35498046875, 6.888671875, 7.42236328125, 7.9560546875, 8.48974609375, 9.0234375, 9.55712890625, 10.0908203125, 10.62451171875, 11.158203125, 11.69189453125, 12.2255859375, 12.75927734375, 13.29296875, 13.82666015625, 14.3603515625, 14.89404296875, 15.427734375, 15.96142578125, 16.4951171875, 17.02880859375, 17.5625]}, "gradients/decoder.transformer.h.22.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 5.0, 5.0, 5.0, 13.0, 19.0, 35.0, 48.0, 84.0, 138.0, 243.0, 397.0, 712.0, 981.0, 617.0, 341.0, 174.0, 118.0, 56.0, 46.0, 14.0, 9.0, 8.0, 5.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.359375, -12.07379150390625, -11.7882080078125, -11.50262451171875, -11.217041015625, -10.93145751953125, -10.6458740234375, -10.36029052734375, -10.07470703125, -9.78912353515625, -9.5035400390625, -9.21795654296875, -8.932373046875, -8.64678955078125, -8.3612060546875, -8.07562255859375, -7.7900390625, -7.50445556640625, -7.2188720703125, -6.93328857421875, -6.647705078125, -6.36212158203125, -6.0765380859375, -5.79095458984375, -5.50537109375, -5.21978759765625, -4.9342041015625, -4.64862060546875, -4.363037109375, -4.07745361328125, -3.7918701171875, -3.50628662109375, -3.220703125, -2.93511962890625, -2.6495361328125, -2.36395263671875, -2.078369140625, -1.79278564453125, -1.5072021484375, -1.22161865234375, -0.93603515625, -0.65045166015625, -0.3648681640625, -0.07928466796875, 0.206298828125, 0.49188232421875, 0.7774658203125, 1.06304931640625, 1.3486328125, 1.63421630859375, 1.9197998046875, 2.20538330078125, 2.490966796875, 2.77655029296875, 3.0621337890625, 3.34771728515625, 3.63330078125, 3.91888427734375, 4.2044677734375, 4.49005126953125, 4.775634765625, 5.06121826171875, 5.3468017578125, 5.63238525390625, 5.91796875]}, "gradients/decoder.transformer.h.22.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 3.0, 4.0, 6.0, 9.0, 10.0, 22.0, 25.0, 46.0, 57.0, 137.0, 242.0, 458.0, 1053.0, 3290.0, 23027.0, 471106.0, 3563252.0, 118279.0, 9696.0, 1957.0, 780.0, 342.0, 173.0, 99.0, 78.0, 47.0, 21.0, 16.0, 10.0, 12.0, 9.0, 5.0, 3.0, 6.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-15.9609375, -15.4510498046875, -14.941162109375, -14.4312744140625, -13.92138671875, -13.4114990234375, -12.901611328125, -12.3917236328125, -11.8818359375, -11.3719482421875, -10.862060546875, -10.3521728515625, -9.84228515625, -9.3323974609375, -8.822509765625, -8.3126220703125, -7.802734375, -7.2928466796875, -6.782958984375, -6.2730712890625, -5.76318359375, -5.2532958984375, -4.743408203125, -4.2335205078125, -3.7236328125, -3.2137451171875, -2.703857421875, -2.1939697265625, -1.68408203125, -1.1741943359375, -0.664306640625, -0.1544189453125, 0.35546875, 0.8653564453125, 1.375244140625, 1.8851318359375, 2.39501953125, 2.9049072265625, 3.414794921875, 3.9246826171875, 4.4345703125, 4.9444580078125, 5.454345703125, 5.9642333984375, 6.47412109375, 6.9840087890625, 7.493896484375, 8.0037841796875, 8.513671875, 9.0235595703125, 9.533447265625, 10.0433349609375, 10.55322265625, 11.0631103515625, 11.572998046875, 12.0828857421875, 12.5927734375, 13.1026611328125, 13.612548828125, 14.1224365234375, 14.63232421875, 15.1422119140625, 15.652099609375, 16.1619873046875, 16.671875]}, "gradients/decoder.transformer.h.22.ln_2.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 54.0, 478.0, 440.0, 37.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-19.828670501708984, -15.924386978149414, -12.020102500915527, -8.11581802368164, -4.21153450012207, -0.3072509765625, 3.597034454345703, 7.501317977905273, 11.405601501464844, 15.309885025024414, 19.214168548583984, 23.118453979492188, 27.022737503051758, 30.927021026611328, 34.83130645751953, 38.73558807373047, 42.63987350463867, 46.544158935546875, 50.44844055175781, 54.352725982666016, 58.25701141357422, 62.161293029785156, 66.06558227539062, 69.96986389160156, 73.8741455078125, 77.77842712402344, 81.6827163696289, 85.58699798583984, 89.49127960205078, 93.39556884765625, 97.29985046386719, 101.20413208007812, 105.10841369628906, 109.0126953125, 112.91698455810547, 116.8212661743164, 120.72554779052734, 124.62983703613281, 128.53411865234375, 132.4384002685547, 136.34268188476562, 140.24696350097656, 144.1512451171875, 148.0555419921875, 151.95982360839844, 155.86410522460938, 159.7683868408203, 163.67266845703125, 167.57696533203125, 171.4812469482422, 175.38552856445312, 179.28982543945312, 183.19410705566406, 187.098388671875, 191.00267028808594, 194.90695190429688, 198.8112335205078, 202.71551513671875, 206.6197967529297, 210.52407836914062, 214.42837524414062, 218.33265686035156, 222.2369384765625, 226.14122009277344, 230.04550170898438]}, "gradients/decoder.transformer.h.22.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 6.0, 6.0, 5.0, 2.0, 8.0, 8.0, 13.0, 12.0, 12.0, 25.0, 20.0, 19.0, 27.0, 42.0, 39.0, 36.0, 32.0, 38.0, 35.0, 39.0, 50.0, 43.0, 45.0, 39.0, 40.0, 24.0, 39.0, 38.0, 53.0, 19.0, 32.0, 26.0, 14.0, 15.0, 16.0, 15.0, 14.0, 8.0, 9.0, 6.0, 9.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 5.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-18.008506774902344, -17.43944549560547, -16.870384216308594, -16.30132484436035, -15.732263565063477, -15.163202285766602, -14.594141006469727, -14.025079727172852, -13.456019401550293, -12.886958122253418, -12.31789779663086, -11.748836517333984, -11.17977523803711, -10.61071491241455, -10.041653633117676, -9.472593307495117, -8.903532028198242, -8.334470748901367, -7.765410423278809, -7.196349143981934, -6.627288341522217, -6.0582275390625, -5.489166259765625, -4.920105457305908, -4.351044654846191, -3.7819838523864746, -3.2129228115081787, -2.643861770629883, -2.074800968170166, -1.5057401657104492, -0.9366791248321533, -0.3676180839538574, 0.20144271850585938, 0.7705036401748657, 1.339564561843872, 1.9086254835128784, 2.4776864051818848, 3.0467472076416016, 3.6158082485198975, 4.184869289398193, 4.75393009185791, 5.322990894317627, 5.892051696777344, 6.461112976074219, 7.0301737785339355, 7.599234580993652, 8.168295860290527, 8.737356185913086, 9.306417465209961, 9.875478744506836, 10.444539070129395, 11.01360034942627, 11.582660675048828, 12.151721954345703, 12.720783233642578, 13.289844512939453, 13.858904838562012, 14.427966117858887, 14.997026443481445, 15.56608772277832, 16.135149002075195, 16.704208374023438, 17.273269653320312, 17.842330932617188, 18.411392211914062]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 4.0, 3.0, 2.0, 2.0, 9.0, 9.0, 12.0, 8.0, 12.0, 6.0, 10.0, 15.0, 15.0, 17.0, 30.0, 23.0, 21.0, 31.0, 40.0, 44.0, 38.0, 41.0, 41.0, 47.0, 32.0, 35.0, 42.0, 39.0, 36.0, 35.0, 43.0, 29.0, 32.0, 25.0, 39.0, 20.0, 19.0, 22.0, 14.0, 14.0, 7.0, 8.0, 9.0, 8.0, 7.0, 5.0, 2.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.748046875, -1.6914215087890625, -1.634796142578125, -1.5781707763671875, -1.52154541015625, -1.4649200439453125, -1.408294677734375, -1.3516693115234375, -1.2950439453125, -1.2384185791015625, -1.181793212890625, -1.1251678466796875, -1.06854248046875, -1.0119171142578125, -0.955291748046875, -0.8986663818359375, -0.842041015625, -0.7854156494140625, -0.728790283203125, -0.6721649169921875, -0.61553955078125, -0.5589141845703125, -0.502288818359375, -0.4456634521484375, -0.3890380859375, -0.3324127197265625, -0.275787353515625, -0.2191619873046875, -0.16253662109375, -0.1059112548828125, -0.049285888671875, 0.0073394775390625, 0.06396484375, 0.1205902099609375, 0.177215576171875, 0.2338409423828125, 0.29046630859375, 0.3470916748046875, 0.403717041015625, 0.4603424072265625, 0.5169677734375, 0.5735931396484375, 0.630218505859375, 0.6868438720703125, 0.74346923828125, 0.8000946044921875, 0.856719970703125, 0.9133453369140625, 0.969970703125, 1.0265960693359375, 1.083221435546875, 1.1398468017578125, 1.19647216796875, 1.2530975341796875, 1.309722900390625, 1.3663482666015625, 1.4229736328125, 1.4795989990234375, 1.536224365234375, 1.5928497314453125, 1.64947509765625, 1.7061004638671875, 1.762725830078125, 1.8193511962890625, 1.8759765625]}, "gradients/decoder.transformer.h.22.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 5.0, 3.0, 2.0, 8.0, 7.0, 9.0, 12.0, 32.0, 36.0, 46.0, 76.0, 86.0, 143.0, 200.0, 295.0, 442.0, 617.0, 872.0, 1298.0, 1952.0, 2834.0, 4222.0, 6179.0, 9721.0, 14997.0, 23328.0, 38398.0, 65266.0, 123675.0, 299378.0, 214318.0, 96684.0, 53715.0, 32034.0, 19703.0, 12787.0, 8324.0, 5400.0, 3656.0, 2430.0, 1720.0, 1190.0, 773.0, 503.0, 331.0, 244.0, 194.0, 152.0, 91.0, 46.0, 38.0, 24.0, 25.0, 19.0, 16.0, 3.0, 3.0, 5.0, 2.0, 0.0, 5.0], "bins": [-0.050628662109375, -0.04908609390258789, -0.04754352569580078, -0.04600095748901367, -0.04445838928222656, -0.04291582107543945, -0.041373252868652344, -0.039830684661865234, -0.038288116455078125, -0.036745548248291016, -0.035202980041503906, -0.0336604118347168, -0.03211784362792969, -0.030575275421142578, -0.02903270721435547, -0.02749013900756836, -0.02594757080078125, -0.02440500259399414, -0.02286243438720703, -0.021319866180419922, -0.019777297973632812, -0.018234729766845703, -0.016692161560058594, -0.015149593353271484, -0.013607025146484375, -0.012064456939697266, -0.010521888732910156, -0.008979320526123047, -0.0074367523193359375, -0.005894184112548828, -0.004351615905761719, -0.0028090476989746094, -0.0012664794921875, 0.0002760887145996094, 0.0018186569213867188, 0.003361225128173828, 0.0049037933349609375, 0.006446361541748047, 0.007988929748535156, 0.009531497955322266, 0.011074066162109375, 0.012616634368896484, 0.014159202575683594, 0.015701770782470703, 0.017244338989257812, 0.018786907196044922, 0.02032947540283203, 0.02187204360961914, 0.02341461181640625, 0.02495718002319336, 0.02649974822998047, 0.028042316436767578, 0.029584884643554688, 0.031127452850341797, 0.032670021057128906, 0.034212589263916016, 0.035755157470703125, 0.037297725677490234, 0.038840293884277344, 0.04038286209106445, 0.04192543029785156, 0.04346799850463867, 0.04501056671142578, 0.04655313491821289, 0.048095703125]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 8.0, 6.0, 10.0, 7.0, 9.0, 13.0, 16.0, 16.0, 15.0, 24.0, 17.0, 17.0, 36.0, 32.0, 42.0, 25.0, 30.0, 37.0, 33.0, 47.0, 40.0, 1064.0, 40.0, 26.0, 36.0, 35.0, 28.0, 35.0, 35.0, 33.0, 22.0, 27.0, 19.0, 14.0, 21.0, 24.0, 24.0, 11.0, 12.0, 10.0, 14.0, 3.0, 2.0, 7.0, 2.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.1513671875, -1.115142822265625, -1.07891845703125, -1.042694091796875, -1.0064697265625, -0.970245361328125, -0.93402099609375, -0.897796630859375, -0.861572265625, -0.825347900390625, -0.78912353515625, -0.752899169921875, -0.7166748046875, -0.680450439453125, -0.64422607421875, -0.608001708984375, -0.57177734375, -0.535552978515625, -0.49932861328125, -0.463104248046875, -0.4268798828125, -0.390655517578125, -0.35443115234375, -0.318206787109375, -0.281982421875, -0.245758056640625, -0.20953369140625, -0.173309326171875, -0.1370849609375, -0.100860595703125, -0.06463623046875, -0.028411865234375, 0.0078125, 0.044036865234375, 0.08026123046875, 0.116485595703125, 0.1527099609375, 0.188934326171875, 0.22515869140625, 0.261383056640625, 0.297607421875, 0.333831787109375, 0.37005615234375, 0.406280517578125, 0.4425048828125, 0.478729248046875, 0.51495361328125, 0.551177978515625, 0.58740234375, 0.623626708984375, 0.65985107421875, 0.696075439453125, 0.7322998046875, 0.768524169921875, 0.80474853515625, 0.840972900390625, 0.877197265625, 0.913421630859375, 0.94964599609375, 0.985870361328125, 1.0220947265625, 1.058319091796875, 1.09454345703125, 1.130767822265625, 1.1669921875]}, "gradients/decoder.transformer.h.22.crossattention.c_attn.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 5.0, 7.0, 7.0, 29.0, 15.0, 42.0, 46.0, 64.0, 107.0, 172.0, 259.0, 385.0, 622.0, 888.0, 1429.0, 2287.0, 3562.0, 5801.0, 9248.0, 14643.0, 23824.0, 39999.0, 70141.0, 134177.0, 1376254.0, 193864.0, 90650.0, 50661.0, 29521.0, 18047.0, 11213.0, 7042.0, 4306.0, 2860.0, 1764.0, 1125.0, 696.0, 470.0, 299.0, 201.0, 145.0, 84.0, 63.0, 35.0, 28.0, 14.0, 14.0, 7.0, 7.0, 3.0, 6.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.05059814453125, -0.049002647399902344, -0.04740715026855469, -0.04581165313720703, -0.044216156005859375, -0.04262065887451172, -0.04102516174316406, -0.039429664611816406, -0.03783416748046875, -0.036238670349121094, -0.03464317321777344, -0.03304767608642578, -0.031452178955078125, -0.02985668182373047, -0.028261184692382812, -0.026665687561035156, -0.0250701904296875, -0.023474693298339844, -0.021879196166992188, -0.02028369903564453, -0.018688201904296875, -0.01709270477294922, -0.015497207641601562, -0.013901710510253906, -0.01230621337890625, -0.010710716247558594, -0.009115219116210938, -0.007519721984863281, -0.005924224853515625, -0.004328727722167969, -0.0027332305908203125, -0.0011377334594726562, 0.000457763671875, 0.0020532608032226562, 0.0036487579345703125, 0.005244255065917969, 0.006839752197265625, 0.008435249328613281, 0.010030746459960938, 0.011626243591308594, 0.01322174072265625, 0.014817237854003906, 0.016412734985351562, 0.01800823211669922, 0.019603729248046875, 0.02119922637939453, 0.022794723510742188, 0.024390220642089844, 0.0259857177734375, 0.027581214904785156, 0.029176712036132812, 0.03077220916748047, 0.032367706298828125, 0.03396320343017578, 0.03555870056152344, 0.037154197692871094, 0.03874969482421875, 0.040345191955566406, 0.04194068908691406, 0.04353618621826172, 0.045131683349609375, 0.04672718048095703, 0.04832267761230469, 0.049918174743652344, 0.051513671875]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 7.0, 7.0, 3.0, 6.0, 18.0, 9.0, 15.0, 31.0, 9.0, 12.0, 28.0, 52.0, 30.0, 35.0, 35.0, 77.0, 43.0, 43.0, 44.0, 92.0, 33.0, 34.0, 61.0, 33.0, 27.0, 31.0, 53.0, 17.0, 19.0, 11.0, 27.0, 8.0, 6.0, 7.0, 11.0, 4.0, 6.0, 10.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.205371856689453e-06, -2.1299347281455994e-06, -2.0544975996017456e-06, -1.979060471057892e-06, -1.903623342514038e-06, -1.8281862139701843e-06, -1.7527490854263306e-06, -1.6773119568824768e-06, -1.601874828338623e-06, -1.5264376997947693e-06, -1.4510005712509155e-06, -1.3755634427070618e-06, -1.300126314163208e-06, -1.2246891856193542e-06, -1.1492520570755005e-06, -1.0738149285316467e-06, -9.98377799987793e-07, -9.229406714439392e-07, -8.475035429000854e-07, -7.720664143562317e-07, -6.966292858123779e-07, -6.211921572685242e-07, -5.457550287246704e-07, -4.7031790018081665e-07, -3.948807716369629e-07, -3.1944364309310913e-07, -2.4400651454925537e-07, -1.685693860054016e-07, -9.313225746154785e-08, -1.7695128917694092e-08, 5.774199962615967e-08, 1.3317912817001343e-07, 2.086162567138672e-07, 2.8405338525772095e-07, 3.594905138015747e-07, 4.3492764234542847e-07, 5.103647708892822e-07, 5.85801899433136e-07, 6.612390279769897e-07, 7.366761565208435e-07, 8.121132850646973e-07, 8.87550413608551e-07, 9.629875421524048e-07, 1.0384246706962585e-06, 1.1138617992401123e-06, 1.189298927783966e-06, 1.2647360563278198e-06, 1.3401731848716736e-06, 1.4156103134155273e-06, 1.491047441959381e-06, 1.5664845705032349e-06, 1.6419216990470886e-06, 1.7173588275909424e-06, 1.7927959561347961e-06, 1.86823308467865e-06, 1.9436702132225037e-06, 2.0191073417663574e-06, 2.094544470310211e-06, 2.169981598854065e-06, 2.2454187273979187e-06, 2.3208558559417725e-06, 2.3962929844856262e-06, 2.47173011302948e-06, 2.5471672415733337e-06, 2.6226043701171875e-06]}, "gradients/decoder.transformer.h.22.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 3.0, 1.0, 7.0, 11.0, 3.0, 14.0, 11.0, 15.0, 16.0, 20.0, 22.0, 34.0, 37.0, 46.0, 67.0, 77.0, 117.0, 104.0, 132.0, 171.0, 249.0, 551.0, 6316.0, 871951.0, 165741.0, 1410.0, 373.0, 176.0, 163.0, 110.0, 124.0, 90.0, 72.0, 54.0, 60.0, 42.0, 37.0, 33.0, 23.0, 17.0, 10.0, 13.0, 10.0, 5.0, 7.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0], "bins": [-4.51207160949707e-05, -4.378519952297211e-05, -4.244968295097351e-05, -4.1114166378974915e-05, -3.977864980697632e-05, -3.844313323497772e-05, -3.7107616662979126e-05, -3.577210009098053e-05, -3.4436583518981934e-05, -3.310106694698334e-05, -3.176555037498474e-05, -3.0430033802986145e-05, -2.909451723098755e-05, -2.7759000658988953e-05, -2.6423484086990356e-05, -2.508796751499176e-05, -2.3752450942993164e-05, -2.2416934370994568e-05, -2.108141779899597e-05, -1.9745901226997375e-05, -1.841038465499878e-05, -1.7074868083000183e-05, -1.5739351511001587e-05, -1.440383493900299e-05, -1.3068318367004395e-05, -1.1732801795005798e-05, -1.0397285223007202e-05, -9.061768651008606e-06, -7.72625207901001e-06, -6.3907355070114136e-06, -5.055218935012817e-06, -3.719702363014221e-06, -2.384185791015625e-06, -1.0486692190170288e-06, 2.868473529815674e-07, 1.6223639249801636e-06, 2.9578804969787598e-06, 4.293397068977356e-06, 5.628913640975952e-06, 6.964430212974548e-06, 8.299946784973145e-06, 9.63546335697174e-06, 1.0970979928970337e-05, 1.2306496500968933e-05, 1.364201307296753e-05, 1.4977529644966125e-05, 1.631304621696472e-05, 1.7648562788963318e-05, 1.8984079360961914e-05, 2.031959593296051e-05, 2.1655112504959106e-05, 2.2990629076957703e-05, 2.43261456489563e-05, 2.5661662220954895e-05, 2.699717879295349e-05, 2.8332695364952087e-05, 2.9668211936950684e-05, 3.100372850894928e-05, 3.2339245080947876e-05, 3.367476165294647e-05, 3.501027822494507e-05, 3.6345794796943665e-05, 3.768131136894226e-05, 3.901682794094086e-05, 4.035234451293945e-05]}, "gradients/decoder.transformer.h.22.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 55.0, 571.0, 369.0, 19.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7032110918080434e-05, -1.6660676919855177e-05, -1.6289244740619324e-05, -1.5917810742394067e-05, -1.5546378563158214e-05, -1.5174944564932957e-05, -1.4803511476202402e-05, -1.4432078387471847e-05, -1.406064438924659e-05, -1.3689211300516035e-05, -1.331777821178548e-05, -1.2946344213560224e-05, -1.2574911124829669e-05, -1.2203478036099114e-05, -1.1832044947368558e-05, -1.1460611858638003e-05, -1.1089177860412747e-05, -1.0717744771682192e-05, -1.0346311682951637e-05, -9.97487768472638e-06, -9.603444595995825e-06, -9.23201150726527e-06, -8.860578418534715e-06, -8.48914532980416e-06, -8.117712241073605e-06, -7.74627915234305e-06, -7.374845608865144e-06, -7.003412520134589e-06, -6.631978976656683e-06, -6.260545887926128e-06, -5.889112799195573e-06, -5.517679255717667e-06, -5.14624525749241e-06, -4.774812168761855e-06, -4.403378625283949e-06, -4.031945536553394e-06, -3.660511993075488e-06, -3.289078904344933e-06, -2.9176455882407026e-06, -2.546212272136472e-06, -2.1747789560322417e-06, -1.8033456399280112e-06, -1.4319123238237808e-06, -1.060479121406388e-06, -6.890458053021575e-07, -3.1761248919792706e-07, 5.382071321946569e-08, 4.2525402932369616e-07, 7.966873454279266e-07, 1.168120661532157e-06, 1.5395539776363876e-06, 1.9109870663669426e-06, 2.2824206098448485e-06, 2.6538536985754035e-06, 3.025287014679634e-06, 3.3967203307838645e-06, 3.768153646888095e-06, 4.1395869629923254e-06, 4.5110200517228805e-06, 4.882453595200786e-06, 5.253886683931341e-06, 5.625320227409247e-06, 5.996753316139802e-06, 6.368186404870357e-06, 6.739619948348263e-06]}, "gradients/decoder.transformer.h.22.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 7.0, 12.0, 0.0, 15.0, 15.0, 17.0, 17.0, 0.0, 19.0, 25.0, 30.0, 33.0, 0.0, 37.0, 38.0, 39.0, 50.0, 0.0, 60.0, 58.0, 56.0, 52.0, 0.0, 53.0, 49.0, 47.0, 31.0, 0.0, 52.0, 27.0, 37.0, 36.0, 0.0, 26.0, 11.0, 11.0, 18.0, 0.0, 9.0, 5.0, 6.0, 7.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5497207641601562e-06, -1.5022233128547668e-06, -1.4547258615493774e-06, -1.407228410243988e-06, -1.3597309589385986e-06, -1.3122335076332092e-06, -1.2647360563278198e-06, -1.2172386050224304e-06, -1.169741153717041e-06, -1.1222437024116516e-06, -1.0747462511062622e-06, -1.0272487998008728e-06, -9.797513484954834e-07, -9.32253897190094e-07, -8.847564458847046e-07, -8.372589945793152e-07, -7.897615432739258e-07, -7.422640919685364e-07, -6.94766640663147e-07, -6.472691893577576e-07, -5.997717380523682e-07, -5.522742867469788e-07, -5.047768354415894e-07, -4.5727938413619995e-07, -4.0978193283081055e-07, -3.6228448152542114e-07, -3.1478703022003174e-07, -2.6728957891464233e-07, -2.1979212760925293e-07, -1.7229467630386353e-07, -1.2479722499847412e-07, -7.729977369308472e-08, -2.9802322387695312e-08, 1.7695128917694092e-08, 6.51925802230835e-08, 1.126900315284729e-07, 1.601874828338623e-07, 2.076849341392517e-07, 2.551823854446411e-07, 3.026798367500305e-07, 3.501772880554199e-07, 3.976747393608093e-07, 4.4517219066619873e-07, 4.926696419715881e-07, 5.401670932769775e-07, 5.876645445823669e-07, 6.351619958877563e-07, 6.826594471931458e-07, 7.301568984985352e-07, 7.776543498039246e-07, 8.25151801109314e-07, 8.726492524147034e-07, 9.201467037200928e-07, 9.676441550254822e-07, 1.0151416063308716e-06, 1.062639057636261e-06, 1.1101365089416504e-06, 1.1576339602470398e-06, 1.2051314115524292e-06, 1.2526288628578186e-06, 1.300126314163208e-06, 1.3476237654685974e-06, 1.3951212167739868e-06, 1.4426186680793762e-06, 1.4901161193847656e-06]}, "gradients/decoder.transformer.h.22.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 0.0, 1.0, 4.0, 3.0, 2.0, 2.0, 9.0, 9.0, 12.0, 8.0, 12.0, 6.0, 10.0, 15.0, 15.0, 17.0, 30.0, 23.0, 21.0, 31.0, 40.0, 44.0, 38.0, 41.0, 41.0, 47.0, 32.0, 35.0, 42.0, 39.0, 36.0, 35.0, 43.0, 29.0, 32.0, 25.0, 39.0, 20.0, 19.0, 22.0, 14.0, 14.0, 7.0, 8.0, 9.0, 8.0, 7.0, 5.0, 2.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.748046875, -1.6914215087890625, -1.634796142578125, -1.5781707763671875, -1.52154541015625, -1.4649200439453125, -1.408294677734375, -1.3516693115234375, -1.2950439453125, -1.2384185791015625, -1.181793212890625, -1.1251678466796875, -1.06854248046875, -1.0119171142578125, -0.955291748046875, -0.8986663818359375, -0.842041015625, -0.7854156494140625, -0.728790283203125, -0.6721649169921875, -0.61553955078125, -0.5589141845703125, -0.502288818359375, -0.4456634521484375, -0.3890380859375, -0.3324127197265625, -0.275787353515625, -0.2191619873046875, -0.16253662109375, -0.1059112548828125, -0.049285888671875, 0.0073394775390625, 0.06396484375, 0.1205902099609375, 0.177215576171875, 0.2338409423828125, 0.29046630859375, 0.3470916748046875, 0.403717041015625, 0.4603424072265625, 0.5169677734375, 0.5735931396484375, 0.630218505859375, 0.6868438720703125, 0.74346923828125, 0.8000946044921875, 0.856719970703125, 0.9133453369140625, 0.969970703125, 1.0265960693359375, 1.083221435546875, 1.1398468017578125, 1.19647216796875, 1.2530975341796875, 1.309722900390625, 1.3663482666015625, 1.4229736328125, 1.4795989990234375, 1.536224365234375, 1.5928497314453125, 1.64947509765625, 1.7061004638671875, 1.762725830078125, 1.8193511962890625, 1.8759765625]}, "gradients/decoder.transformer.h.22.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 6.0, 4.0, 0.0, 3.0, 5.0, 7.0, 11.0, 13.0, 12.0, 26.0, 32.0, 36.0, 67.0, 98.0, 126.0, 148.0, 217.0, 355.0, 519.0, 907.0, 1655.0, 3169.0, 7202.0, 18519.0, 57794.0, 278322.0, 535995.0, 96576.0, 27220.0, 10051.0, 4270.0, 2087.0, 1125.0, 656.0, 438.0, 280.0, 183.0, 109.0, 85.0, 65.0, 48.0, 32.0, 29.0, 29.0, 7.0, 10.0, 2.0, 3.0, 7.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.72265625, -2.6380615234375, -2.553466796875, -2.4688720703125, -2.38427734375, -2.2996826171875, -2.215087890625, -2.1304931640625, -2.0458984375, -1.9613037109375, -1.876708984375, -1.7921142578125, -1.70751953125, -1.6229248046875, -1.538330078125, -1.4537353515625, -1.369140625, -1.2845458984375, -1.199951171875, -1.1153564453125, -1.03076171875, -0.9461669921875, -0.861572265625, -0.7769775390625, -0.6923828125, -0.6077880859375, -0.523193359375, -0.4385986328125, -0.35400390625, -0.2694091796875, -0.184814453125, -0.1002197265625, -0.015625, 0.0689697265625, 0.153564453125, 0.2381591796875, 0.32275390625, 0.4073486328125, 0.491943359375, 0.5765380859375, 0.6611328125, 0.7457275390625, 0.830322265625, 0.9149169921875, 0.99951171875, 1.0841064453125, 1.168701171875, 1.2532958984375, 1.337890625, 1.4224853515625, 1.507080078125, 1.5916748046875, 1.67626953125, 1.7608642578125, 1.845458984375, 1.9300537109375, 2.0146484375, 2.0992431640625, 2.183837890625, 2.2684326171875, 2.35302734375, 2.4376220703125, 2.522216796875, 2.6068115234375, 2.69140625]}, "gradients/decoder.transformer.h.22.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 2.0, 5.0, 7.0, 6.0, 8.0, 15.0, 21.0, 8.0, 30.0, 33.0, 27.0, 41.0, 39.0, 38.0, 54.0, 52.0, 67.0, 117.0, 1882.0, 133.0, 75.0, 51.0, 60.0, 43.0, 27.0, 45.0, 25.0, 30.0, 23.0, 16.0, 14.0, 13.0, 9.0, 10.0, 5.0, 7.0, 7.0, 8.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.375, -8.125, -7.875, -7.625, -7.375, -7.125, -6.875, -6.625, -6.375, -6.125, -5.875, -5.625, -5.375, -5.125, -4.875, -4.625, -4.375, -4.125, -3.875, -3.625, -3.375, -3.125, -2.875, -2.625, -2.375, -2.125, -1.875, -1.625, -1.375, -1.125, -0.875, -0.625, -0.375, -0.125, 0.125, 0.375, 0.625, 0.875, 1.125, 1.375, 1.625, 1.875, 2.125, 2.375, 2.625, 2.875, 3.125, 3.375, 3.625, 3.875, 4.125, 4.375, 4.625, 4.875, 5.125, 5.375, 5.625, 5.875, 6.125, 6.375, 6.625, 6.875, 7.125, 7.375, 7.625]}, "gradients/decoder.transformer.h.22.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 6.0, 4.0, 8.0, 6.0, 12.0, 8.0, 11.0, 16.0, 17.0, 24.0, 29.0, 30.0, 61.0, 57.0, 87.0, 153.0, 362.0, 1212.0, 17398.0, 3107470.0, 16667.0, 1227.0, 317.0, 164.0, 86.0, 59.0, 48.0, 36.0, 36.0, 24.0, 9.0, 20.0, 19.0, 5.0, 11.0, 3.0, 4.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-17.875, -17.288818359375, -16.70263671875, -16.116455078125, -15.5302734375, -14.944091796875, -14.35791015625, -13.771728515625, -13.185546875, -12.599365234375, -12.01318359375, -11.427001953125, -10.8408203125, -10.254638671875, -9.66845703125, -9.082275390625, -8.49609375, -7.909912109375, -7.32373046875, -6.737548828125, -6.1513671875, -5.565185546875, -4.97900390625, -4.392822265625, -3.806640625, -3.220458984375, -2.63427734375, -2.048095703125, -1.4619140625, -0.875732421875, -0.28955078125, 0.296630859375, 0.8828125, 1.468994140625, 2.05517578125, 2.641357421875, 3.2275390625, 3.813720703125, 4.39990234375, 4.986083984375, 5.572265625, 6.158447265625, 6.74462890625, 7.330810546875, 7.9169921875, 8.503173828125, 9.08935546875, 9.675537109375, 10.26171875, 10.847900390625, 11.43408203125, 12.020263671875, 12.6064453125, 13.192626953125, 13.77880859375, 14.364990234375, 14.951171875, 15.537353515625, 16.12353515625, 16.709716796875, 17.2958984375, 17.882080078125, 18.46826171875, 19.054443359375, 19.640625]}, "gradients/decoder.transformer.h.22.ln_1.weight": {"_type": "histogram", "values": [977.0, 42.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.15256404876709, 1.3861885070800781, 6.924941062927246, 12.463692665100098, 18.002445220947266, 23.54119873046875, 29.07994842529297, 34.61870193481445, 40.15745544433594, 45.69620895385742, 51.234962463378906, 56.773712158203125, 62.31246566772461, 67.8512191772461, 73.38996887207031, 78.92872619628906, 84.46747589111328, 90.0062255859375, 95.54498291015625, 101.08373260498047, 106.62248229980469, 112.16123962402344, 117.69998931884766, 123.23873901367188, 128.77749633789062, 134.31625366210938, 139.85499572753906, 145.3937530517578, 150.93251037597656, 156.47125244140625, 162.010009765625, 167.54876708984375, 173.0875244140625, 178.62628173828125, 184.16502380371094, 189.7037811279297, 195.24253845214844, 200.78128051757812, 206.32003784179688, 211.85879516601562, 217.39755249023438, 222.93630981445312, 228.4750518798828, 234.01380920410156, 239.5525665283203, 245.09130859375, 250.63006591796875, 256.1688232421875, 261.70758056640625, 267.246337890625, 272.78509521484375, 278.3238525390625, 283.8625793457031, 289.4013366699219, 294.9400939941406, 300.4788513183594, 306.017578125, 311.55633544921875, 317.0950927734375, 322.63385009765625, 328.1725769042969, 333.7113342285156, 339.2500915527344, 344.7888488769531, 350.3276062011719]}, "gradients/decoder.transformer.h.22.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 3.0, 6.0, 8.0, 7.0, 9.0, 14.0, 7.0, 16.0, 26.0, 13.0, 27.0, 33.0, 28.0, 34.0, 23.0, 35.0, 44.0, 41.0, 30.0, 45.0, 38.0, 55.0, 35.0, 36.0, 41.0, 35.0, 40.0, 30.0, 28.0, 28.0, 33.0, 27.0, 26.0, 16.0, 10.0, 20.0, 10.0, 10.0, 14.0, 4.0, 4.0, 6.0, 3.0, 4.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-20.905729293823242, -20.242717742919922, -19.579708099365234, -18.916696548461914, -18.253684997558594, -17.590675354003906, -16.927663803100586, -16.264652252197266, -15.601642608642578, -14.938632011413574, -14.27562141418457, -13.61260986328125, -12.949599266052246, -12.286588668823242, -11.623577117919922, -10.960566520690918, -10.297555923461914, -9.63454532623291, -8.971534729003906, -8.308523178100586, -7.645512580871582, -6.982501983642578, -6.319490909576416, -5.656479835510254, -4.99346923828125, -4.330458641052246, -3.667447566986084, -3.004436731338501, -2.341425895690918, -1.678415060043335, -1.015404224395752, -0.35239315032958984, 0.31061553955078125, 0.9736263751983643, 1.6366372108459473, 2.2996480464935303, 2.9626588821411133, 3.6256697177886963, 4.288680553436279, 4.951691627502441, 5.614702224731445, 6.277712821960449, 6.940723896026611, 7.603734970092773, 8.266745567321777, 8.929756164550781, 9.592767715454102, 10.255778312683105, 10.91878890991211, 11.581799507141113, 12.244810104370117, 12.907821655273438, 13.570832252502441, 14.233842849731445, 14.896854400634766, 15.55986499786377, 16.222875595092773, 16.885887145996094, 17.54889678955078, 18.2119083404541, 18.874919891357422, 19.53792953491211, 20.20094108581543, 20.86395263671875, 21.526962280273438]}, "gradients/decoder.transformer.h.21.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 6.0, 0.0, 2.0, 9.0, 11.0, 8.0, 11.0, 10.0, 6.0, 16.0, 13.0, 15.0, 20.0, 28.0, 20.0, 26.0, 38.0, 32.0, 45.0, 43.0, 38.0, 35.0, 52.0, 32.0, 34.0, 35.0, 49.0, 38.0, 32.0, 40.0, 31.0, 33.0, 28.0, 33.0, 17.0, 22.0, 21.0, 12.0, 12.0, 8.0, 11.0, 10.0, 6.0, 4.0, 4.0, 3.0, 4.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.787109375, -1.7288665771484375, -1.670623779296875, -1.6123809814453125, -1.55413818359375, -1.4958953857421875, -1.437652587890625, -1.3794097900390625, -1.3211669921875, -1.2629241943359375, -1.204681396484375, -1.1464385986328125, -1.08819580078125, -1.0299530029296875, -0.971710205078125, -0.9134674072265625, -0.855224609375, -0.7969818115234375, -0.738739013671875, -0.6804962158203125, -0.62225341796875, -0.5640106201171875, -0.505767822265625, -0.4475250244140625, -0.3892822265625, -0.3310394287109375, -0.272796630859375, -0.2145538330078125, -0.15631103515625, -0.0980682373046875, -0.039825439453125, 0.0184173583984375, 0.07666015625, 0.1349029541015625, 0.193145751953125, 0.2513885498046875, 0.30963134765625, 0.3678741455078125, 0.426116943359375, 0.4843597412109375, 0.5426025390625, 0.6008453369140625, 0.659088134765625, 0.7173309326171875, 0.77557373046875, 0.8338165283203125, 0.892059326171875, 0.9503021240234375, 1.008544921875, 1.0667877197265625, 1.125030517578125, 1.1832733154296875, 1.24151611328125, 1.2997589111328125, 1.358001708984375, 1.4162445068359375, 1.4744873046875, 1.5327301025390625, 1.590972900390625, 1.6492156982421875, 1.70745849609375, 1.7657012939453125, 1.823944091796875, 1.8821868896484375, 1.9404296875]}, "gradients/decoder.transformer.h.21.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 4.0, 8.0, 9.0, 14.0, 10.0, 14.0, 23.0, 29.0, 20.0, 29.0, 38.0, 60.0, 63.0, 106.0, 143.0, 213.0, 358.0, 730.0, 2040.0, 12168.0, 292807.0, 3784575.0, 91435.0, 6492.0, 1379.0, 525.0, 276.0, 179.0, 110.0, 94.0, 78.0, 58.0, 29.0, 37.0, 36.0, 23.0, 14.0, 13.0, 13.0, 9.0, 6.0, 7.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.265625, -10.9063720703125, -10.547119140625, -10.1878662109375, -9.82861328125, -9.4693603515625, -9.110107421875, -8.7508544921875, -8.3916015625, -8.0323486328125, -7.673095703125, -7.3138427734375, -6.95458984375, -6.5953369140625, -6.236083984375, -5.8768310546875, -5.517578125, -5.1583251953125, -4.799072265625, -4.4398193359375, -4.08056640625, -3.7213134765625, -3.362060546875, -3.0028076171875, -2.6435546875, -2.2843017578125, -1.925048828125, -1.5657958984375, -1.20654296875, -0.8472900390625, -0.488037109375, -0.1287841796875, 0.23046875, 0.5897216796875, 0.948974609375, 1.3082275390625, 1.66748046875, 2.0267333984375, 2.385986328125, 2.7452392578125, 3.1044921875, 3.4637451171875, 3.822998046875, 4.1822509765625, 4.54150390625, 4.9007568359375, 5.260009765625, 5.6192626953125, 5.978515625, 6.3377685546875, 6.697021484375, 7.0562744140625, 7.41552734375, 7.7747802734375, 8.134033203125, 8.4932861328125, 8.8525390625, 9.2117919921875, 9.571044921875, 9.9302978515625, 10.28955078125, 10.6488037109375, 11.008056640625, 11.3673095703125, 11.7265625]}, "gradients/decoder.transformer.h.21.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 4.0, 5.0, 6.0, 16.0, 21.0, 37.0, 44.0, 74.0, 81.0, 123.0, 222.0, 375.0, 521.0, 743.0, 675.0, 400.0, 255.0, 168.0, 100.0, 68.0, 53.0, 26.0, 16.0, 15.0, 5.0, 8.0, 6.0, 6.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.6328125, -7.406494140625, -7.18017578125, -6.953857421875, -6.7275390625, -6.501220703125, -6.27490234375, -6.048583984375, -5.822265625, -5.595947265625, -5.36962890625, -5.143310546875, -4.9169921875, -4.690673828125, -4.46435546875, -4.238037109375, -4.01171875, -3.785400390625, -3.55908203125, -3.332763671875, -3.1064453125, -2.880126953125, -2.65380859375, -2.427490234375, -2.201171875, -1.974853515625, -1.74853515625, -1.522216796875, -1.2958984375, -1.069580078125, -0.84326171875, -0.616943359375, -0.390625, -0.164306640625, 0.06201171875, 0.288330078125, 0.5146484375, 0.740966796875, 0.96728515625, 1.193603515625, 1.419921875, 1.646240234375, 1.87255859375, 2.098876953125, 2.3251953125, 2.551513671875, 2.77783203125, 3.004150390625, 3.23046875, 3.456787109375, 3.68310546875, 3.909423828125, 4.1357421875, 4.362060546875, 4.58837890625, 4.814697265625, 5.041015625, 5.267333984375, 5.49365234375, 5.719970703125, 5.9462890625, 6.172607421875, 6.39892578125, 6.625244140625, 6.8515625]}, "gradients/decoder.transformer.h.21.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 7.0, 4.0, 17.0, 34.0, 71.0, 246.0, 747.0, 4225.0, 66264.0, 3588542.0, 517088.0, 14597.0, 1775.0, 432.0, 145.0, 65.0, 22.0, 8.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.515625, -10.828369140625, -10.14111328125, -9.453857421875, -8.7666015625, -8.079345703125, -7.39208984375, -6.704833984375, -6.017578125, -5.330322265625, -4.64306640625, -3.955810546875, -3.2685546875, -2.581298828125, -1.89404296875, -1.206787109375, -0.51953125, 0.167724609375, 0.85498046875, 1.542236328125, 2.2294921875, 2.916748046875, 3.60400390625, 4.291259765625, 4.978515625, 5.665771484375, 6.35302734375, 7.040283203125, 7.7275390625, 8.414794921875, 9.10205078125, 9.789306640625, 10.4765625, 11.163818359375, 11.85107421875, 12.538330078125, 13.2255859375, 13.912841796875, 14.60009765625, 15.287353515625, 15.974609375, 16.661865234375, 17.34912109375, 18.036376953125, 18.7236328125, 19.410888671875, 20.09814453125, 20.785400390625, 21.47265625, 22.159912109375, 22.84716796875, 23.534423828125, 24.2216796875, 24.908935546875, 25.59619140625, 26.283447265625, 26.970703125, 27.657958984375, 28.34521484375, 29.032470703125, 29.7197265625, 30.406982421875, 31.09423828125, 31.781494140625, 32.46875]}, "gradients/decoder.transformer.h.21.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 7.0, 55.0, 231.0, 430.0, 228.0, 50.0, 12.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.849897384643555, -19.843862533569336, -16.837827682495117, -13.831792831420898, -10.82575798034668, -7.819723129272461, -4.813688278198242, -1.8076534271240234, 1.1983814239501953, 4.204416275024414, 7.210451126098633, 10.216485977172852, 13.22252082824707, 16.22855567932129, 19.234590530395508, 22.240625381469727, 25.246660232543945, 28.252695083618164, 31.258729934692383, 34.26476287841797, 37.27079772949219, 40.276832580566406, 43.282867431640625, 46.288902282714844, 49.29493713378906, 52.30097198486328, 55.3070068359375, 58.31304168701172, 61.31907653808594, 64.32511138916016, 67.33114624023438, 70.3371810913086, 73.34321594238281, 76.34925079345703, 79.35528564453125, 82.36132049560547, 85.36735534667969, 88.3733901977539, 91.37942504882812, 94.38545989990234, 97.39149475097656, 100.39752960205078, 103.403564453125, 106.40959930419922, 109.41563415527344, 112.42166900634766, 115.42770385742188, 118.4337387084961, 121.43977355957031, 124.44580841064453, 127.45184326171875, 130.4578857421875, 133.4639129638672, 136.46994018554688, 139.47598266601562, 142.48202514648438, 145.48805236816406, 148.49407958984375, 151.5001220703125, 154.50616455078125, 157.51219177246094, 160.51821899414062, 163.52426147460938, 166.53030395507812, 169.5363311767578]}, "gradients/decoder.transformer.h.21.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 5.0, 4.0, 7.0, 9.0, 6.0, 11.0, 19.0, 16.0, 24.0, 31.0, 21.0, 26.0, 26.0, 21.0, 33.0, 44.0, 36.0, 40.0, 44.0, 45.0, 42.0, 40.0, 44.0, 50.0, 42.0, 32.0, 30.0, 33.0, 32.0, 20.0, 29.0, 19.0, 19.0, 13.0, 16.0, 14.0, 15.0, 11.0, 9.0, 6.0, 3.0, 2.0, 4.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-18.410629272460938, -17.824623107910156, -17.238616943359375, -16.652610778808594, -16.066604614257812, -15.480599403381348, -14.894594192504883, -14.308588027954102, -13.72258186340332, -13.136575698852539, -12.550569534301758, -11.964564323425293, -11.378558158874512, -10.79255199432373, -10.206546783447266, -9.620540618896484, -9.034534454345703, -8.448528289794922, -7.862522602081299, -7.276516914367676, -6.6905107498168945, -6.104504585266113, -5.51849889755249, -4.932493209838867, -4.346487045288086, -3.760481119155884, -3.1744751930236816, -2.5884692668914795, -2.0024633407592773, -1.4164574146270752, -0.830451488494873, -0.24444580078125, 0.34156036376953125, 0.9275662899017334, 1.5135722160339355, 2.0995781421661377, 2.68558406829834, 3.271589994430542, 3.857595920562744, 4.443601608276367, 5.029607772827148, 5.61561393737793, 6.201619625091553, 6.787625312805176, 7.373631477355957, 7.959637641906738, 8.545642852783203, 9.131649017333984, 9.717655181884766, 10.303661346435547, 10.889667510986328, 11.475672721862793, 12.061678886413574, 12.647685050964355, 13.23369026184082, 13.819696426391602, 14.405702590942383, 14.991708755493164, 15.577714920043945, 16.163721084594727, 16.749725341796875, 17.335731506347656, 17.921737670898438, 18.50774383544922, 19.09375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 7.0, 6.0, 7.0, 9.0, 5.0, 11.0, 8.0, 21.0, 14.0, 16.0, 19.0, 16.0, 26.0, 29.0, 33.0, 35.0, 41.0, 38.0, 46.0, 29.0, 34.0, 47.0, 56.0, 45.0, 37.0, 43.0, 38.0, 25.0, 22.0, 35.0, 26.0, 21.0, 20.0, 25.0, 24.0, 13.0, 23.0, 11.0, 9.0, 9.0, 7.0, 4.0, 3.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.970703125, -1.908905029296875, -1.84710693359375, -1.785308837890625, -1.7235107421875, -1.661712646484375, -1.59991455078125, -1.538116455078125, -1.476318359375, -1.414520263671875, -1.35272216796875, -1.290924072265625, -1.2291259765625, -1.167327880859375, -1.10552978515625, -1.043731689453125, -0.98193359375, -0.920135498046875, -0.85833740234375, -0.796539306640625, -0.7347412109375, -0.672943115234375, -0.61114501953125, -0.549346923828125, -0.487548828125, -0.425750732421875, -0.36395263671875, -0.302154541015625, -0.2403564453125, -0.178558349609375, -0.11676025390625, -0.054962158203125, 0.0068359375, 0.068634033203125, 0.13043212890625, 0.192230224609375, 0.2540283203125, 0.315826416015625, 0.37762451171875, 0.439422607421875, 0.501220703125, 0.563018798828125, 0.62481689453125, 0.686614990234375, 0.7484130859375, 0.810211181640625, 0.87200927734375, 0.933807373046875, 0.99560546875, 1.057403564453125, 1.11920166015625, 1.180999755859375, 1.2427978515625, 1.304595947265625, 1.36639404296875, 1.428192138671875, 1.489990234375, 1.551788330078125, 1.61358642578125, 1.675384521484375, 1.7371826171875, 1.798980712890625, 1.86077880859375, 1.922576904296875, 1.984375]}, "gradients/decoder.transformer.h.21.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 8.0, 14.0, 17.0, 30.0, 36.0, 55.0, 92.0, 148.0, 191.0, 300.0, 438.0, 555.0, 887.0, 1265.0, 1840.0, 2871.0, 4188.0, 6219.0, 9153.0, 13620.0, 20507.0, 32078.0, 51792.0, 89075.0, 193333.0, 315050.0, 123173.0, 65375.0, 39930.0, 25598.0, 16766.0, 11101.0, 7325.0, 4991.0, 3300.0, 2324.0, 1567.0, 1074.0, 719.0, 511.0, 326.0, 229.0, 146.0, 120.0, 67.0, 46.0, 36.0, 29.0, 14.0, 10.0, 9.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0513916015625, -0.04979753494262695, -0.048203468322753906, -0.04660940170288086, -0.04501533508300781, -0.043421268463134766, -0.04182720184326172, -0.04023313522338867, -0.038639068603515625, -0.03704500198364258, -0.03545093536376953, -0.033856868743896484, -0.03226280212402344, -0.03066873550415039, -0.029074668884277344, -0.027480602264404297, -0.02588653564453125, -0.024292469024658203, -0.022698402404785156, -0.02110433578491211, -0.019510269165039062, -0.017916202545166016, -0.01632213592529297, -0.014728069305419922, -0.013134002685546875, -0.011539936065673828, -0.009945869445800781, -0.008351802825927734, -0.0067577362060546875, -0.005163669586181641, -0.0035696029663085938, -0.001975536346435547, -0.0003814697265625, 0.0012125968933105469, 0.0028066635131835938, 0.004400730133056641, 0.0059947967529296875, 0.007588863372802734, 0.009182929992675781, 0.010776996612548828, 0.012371063232421875, 0.013965129852294922, 0.015559196472167969, 0.017153263092041016, 0.018747329711914062, 0.02034139633178711, 0.021935462951660156, 0.023529529571533203, 0.02512359619140625, 0.026717662811279297, 0.028311729431152344, 0.02990579605102539, 0.03149986267089844, 0.033093929290771484, 0.03468799591064453, 0.03628206253051758, 0.037876129150390625, 0.03947019577026367, 0.04106426239013672, 0.042658329010009766, 0.04425239562988281, 0.04584646224975586, 0.047440528869628906, 0.04903459548950195, 0.050628662109375]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 6.0, 2.0, 6.0, 6.0, 10.0, 10.0, 9.0, 15.0, 17.0, 17.0, 28.0, 16.0, 21.0, 26.0, 24.0, 33.0, 32.0, 29.0, 65.0, 38.0, 38.0, 1061.0, 38.0, 49.0, 39.0, 47.0, 35.0, 43.0, 39.0, 29.0, 32.0, 24.0, 21.0, 19.0, 23.0, 16.0, 14.0, 10.0, 15.0, 13.0, 7.0, 3.0, 2.0, 4.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3212890625, -1.2796783447265625, -1.238067626953125, -1.1964569091796875, -1.15484619140625, -1.1132354736328125, -1.071624755859375, -1.0300140380859375, -0.9884033203125, -0.9467926025390625, -0.905181884765625, -0.8635711669921875, -0.82196044921875, -0.7803497314453125, -0.738739013671875, -0.6971282958984375, -0.655517578125, -0.6139068603515625, -0.572296142578125, -0.5306854248046875, -0.48907470703125, -0.4474639892578125, -0.405853271484375, -0.3642425537109375, -0.3226318359375, -0.2810211181640625, -0.239410400390625, -0.1977996826171875, -0.15618896484375, -0.1145782470703125, -0.072967529296875, -0.0313568115234375, 0.01025390625, 0.0518646240234375, 0.093475341796875, 0.1350860595703125, 0.17669677734375, 0.2183074951171875, 0.259918212890625, 0.3015289306640625, 0.3431396484375, 0.3847503662109375, 0.426361083984375, 0.4679718017578125, 0.50958251953125, 0.5511932373046875, 0.592803955078125, 0.6344146728515625, 0.676025390625, 0.7176361083984375, 0.759246826171875, 0.8008575439453125, 0.84246826171875, 0.8840789794921875, 0.925689697265625, 0.9673004150390625, 1.0089111328125, 1.0505218505859375, 1.092132568359375, 1.1337432861328125, 1.17535400390625, 1.2169647216796875, 1.258575439453125, 1.3001861572265625, 1.341796875]}, "gradients/decoder.transformer.h.21.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 2.0, 6.0, 20.0, 20.0, 26.0, 38.0, 51.0, 69.0, 106.0, 192.0, 306.0, 447.0, 669.0, 1100.0, 1731.0, 2893.0, 4664.0, 7567.0, 12637.0, 21130.0, 36070.0, 66566.0, 136658.0, 1393130.0, 207734.0, 89297.0, 46855.0, 26641.0, 15624.0, 9642.0, 5656.0, 3599.0, 2169.0, 1346.0, 890.0, 543.0, 340.0, 240.0, 146.0, 108.0, 82.0, 46.0, 23.0, 17.0, 9.0, 11.0, 8.0, 4.0, 7.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.057891845703125, -0.05606698989868164, -0.05424213409423828, -0.05241727828979492, -0.05059242248535156, -0.0487675666809082, -0.046942710876464844, -0.045117855072021484, -0.043292999267578125, -0.041468143463134766, -0.039643287658691406, -0.03781843185424805, -0.03599357604980469, -0.03416872024536133, -0.03234386444091797, -0.03051900863647461, -0.02869415283203125, -0.02686929702758789, -0.02504444122314453, -0.023219585418701172, -0.021394729614257812, -0.019569873809814453, -0.017745018005371094, -0.015920162200927734, -0.014095306396484375, -0.012270450592041016, -0.010445594787597656, -0.008620738983154297, -0.0067958831787109375, -0.004971027374267578, -0.0031461715698242188, -0.0013213157653808594, 0.0005035400390625, 0.0023283958435058594, 0.004153251647949219, 0.005978107452392578, 0.0078029632568359375, 0.009627819061279297, 0.011452674865722656, 0.013277530670166016, 0.015102386474609375, 0.016927242279052734, 0.018752098083496094, 0.020576953887939453, 0.022401809692382812, 0.024226665496826172, 0.02605152130126953, 0.02787637710571289, 0.02970123291015625, 0.03152608871459961, 0.03335094451904297, 0.03517580032348633, 0.03700065612792969, 0.03882551193237305, 0.040650367736816406, 0.042475223541259766, 0.044300079345703125, 0.046124935150146484, 0.047949790954589844, 0.0497746467590332, 0.05159950256347656, 0.05342435836791992, 0.05524921417236328, 0.05707406997680664, 0.05889892578125]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 6.0, 7.0, 4.0, 9.0, 7.0, 7.0, 29.0, 26.0, 20.0, 31.0, 70.0, 37.0, 49.0, 55.0, 98.0, 66.0, 68.0, 64.0, 79.0, 59.0, 35.0, 37.0, 44.0, 28.0, 15.0, 10.0, 8.0, 6.0, 6.0, 2.0, 5.0, 7.0, 2.0, 0.0, 5.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.993511199951172e-06, -3.859400749206543e-06, -3.725290298461914e-06, -3.591179847717285e-06, -3.4570693969726562e-06, -3.3229589462280273e-06, -3.1888484954833984e-06, -3.0547380447387695e-06, -2.9206275939941406e-06, -2.7865171432495117e-06, -2.652406692504883e-06, -2.518296241760254e-06, -2.384185791015625e-06, -2.250075340270996e-06, -2.115964889526367e-06, -1.9818544387817383e-06, -1.8477439880371094e-06, -1.7136335372924805e-06, -1.5795230865478516e-06, -1.4454126358032227e-06, -1.3113021850585938e-06, -1.1771917343139648e-06, -1.043081283569336e-06, -9.08970832824707e-07, -7.748603820800781e-07, -6.407499313354492e-07, -5.066394805908203e-07, -3.725290298461914e-07, -2.384185791015625e-07, -1.043081283569336e-07, 2.9802322387695312e-08, 1.6391277313232422e-07, 2.980232238769531e-07, 4.3213367462158203e-07, 5.662441253662109e-07, 7.003545761108398e-07, 8.344650268554688e-07, 9.685754776000977e-07, 1.1026859283447266e-06, 1.2367963790893555e-06, 1.3709068298339844e-06, 1.5050172805786133e-06, 1.6391277313232422e-06, 1.773238182067871e-06, 1.9073486328125e-06, 2.041459083557129e-06, 2.175569534301758e-06, 2.3096799850463867e-06, 2.4437904357910156e-06, 2.5779008865356445e-06, 2.7120113372802734e-06, 2.8461217880249023e-06, 2.9802322387695312e-06, 3.11434268951416e-06, 3.248453140258789e-06, 3.382563591003418e-06, 3.516674041748047e-06, 3.6507844924926758e-06, 3.7848949432373047e-06, 3.919005393981934e-06, 4.0531158447265625e-06, 4.187226295471191e-06, 4.32133674621582e-06, 4.455447196960449e-06, 4.589557647705078e-06]}, "gradients/decoder.transformer.h.21.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 4.0, 4.0, 2.0, 4.0, 2.0, 3.0, 7.0, 9.0, 13.0, 16.0, 14.0, 31.0, 18.0, 37.0, 47.0, 86.0, 110.0, 98.0, 180.0, 262.0, 511.0, 2966.0, 698668.0, 342418.0, 1701.0, 445.0, 229.0, 167.0, 142.0, 75.0, 60.0, 57.0, 31.0, 29.0, 15.0, 22.0, 13.0, 11.0, 16.0, 8.0, 3.0, 8.0, 5.0, 2.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 4.0, 1.0], "bins": [-6.729364395141602e-05, -6.518512964248657e-05, -6.307661533355713e-05, -6.0968101024627686e-05, -5.885958671569824e-05, -5.67510724067688e-05, -5.4642558097839355e-05, -5.253404378890991e-05, -5.042552947998047e-05, -4.8317015171051025e-05, -4.620850086212158e-05, -4.409998655319214e-05, -4.1991472244262695e-05, -3.988295793533325e-05, -3.777444362640381e-05, -3.5665929317474365e-05, -3.355741500854492e-05, -3.144890069961548e-05, -2.9340386390686035e-05, -2.7231872081756592e-05, -2.512335777282715e-05, -2.3014843463897705e-05, -2.0906329154968262e-05, -1.879781484603882e-05, -1.6689300537109375e-05, -1.4580786228179932e-05, -1.2472271919250488e-05, -1.0363757610321045e-05, -8.255243301391602e-06, -6.146728992462158e-06, -4.038214683532715e-06, -1.9297003746032715e-06, 1.7881393432617188e-07, 2.2873282432556152e-06, 4.395842552185059e-06, 6.504356861114502e-06, 8.612871170043945e-06, 1.0721385478973389e-05, 1.2829899787902832e-05, 1.4938414096832275e-05, 1.704692840576172e-05, 1.9155442714691162e-05, 2.1263957023620605e-05, 2.337247133255005e-05, 2.5480985641479492e-05, 2.7589499950408936e-05, 2.969801425933838e-05, 3.180652856826782e-05, 3.3915042877197266e-05, 3.602355718612671e-05, 3.813207149505615e-05, 4.0240585803985596e-05, 4.234910011291504e-05, 4.445761442184448e-05, 4.6566128730773926e-05, 4.867464303970337e-05, 5.078315734863281e-05, 5.2891671657562256e-05, 5.50001859664917e-05, 5.710870027542114e-05, 5.9217214584350586e-05, 6.132572889328003e-05, 6.343424320220947e-05, 6.554275751113892e-05, 6.765127182006836e-05]}, "gradients/decoder.transformer.h.21.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 12.0, 16.0, 32.0, 60.0, 105.0, 107.0, 125.0, 167.0, 128.0, 93.0, 59.0, 48.0, 23.0, 19.0, 4.0, 10.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3144215245119995e-06, -1.2217367384437239e-06, -1.1290518386886106e-06, -1.0363669389334973e-06, -9.436821528652217e-07, -8.509973099535273e-07, -7.583124670418329e-07, -6.656276241301384e-07, -5.72942781218444e-07, -4.802579383067496e-07, -3.8757309539505513e-07, -2.948882524833607e-07, -2.0220340957166627e-07, -1.0951856665997184e-07, -1.6833723748277407e-08, 7.585111916341702e-08, 1.6853596207511146e-07, 2.612208049868059e-07, 3.539056478985003e-07, 4.4659049081019475e-07, 5.392753337218892e-07, 6.319601766335836e-07, 7.24645019545278e-07, 8.173298624569725e-07, 9.100147053686669e-07, 1.0026994914369425e-06, 1.0953843911920558e-06, 1.188069290947169e-06, 1.2807540770154446e-06, 1.3734388630837202e-06, 1.4661237628388335e-06, 1.5588086625939468e-06, 1.6514936760358978e-06, 1.7441784621041734e-06, 1.8368633618592867e-06, 1.9295482616144e-06, 2.0222330476826755e-06, 2.114917833750951e-06, 2.207602847192902e-06, 2.3002876332611777e-06, 2.3929724193294533e-06, 2.485657205397729e-06, 2.5783419914660044e-06, 2.6710270049079554e-06, 2.763711790976231e-06, 2.8563965770445066e-06, 2.9490815904864576e-06, 3.041766376554733e-06, 3.1344511626230087e-06, 3.2271359486912843e-06, 3.31982073475956e-06, 3.412505748201511e-06, 3.5051905342697864e-06, 3.597875320338062e-06, 3.690560333780013e-06, 3.7832451198482886e-06, 3.875929905916564e-06, 3.96861469198484e-06, 4.061299478053115e-06, 4.153984264121391e-06, 4.2466690501896665e-06, 4.339354291005293e-06, 4.4320390770735685e-06, 4.524723863141844e-06, 4.61740864921012e-06]}, "gradients/decoder.transformer.h.21.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 3.0, 2.0, 7.0, 6.0, 8.0, 17.0, 14.0, 13.0, 19.0, 43.0, 21.0, 32.0, 21.0, 57.0, 34.0, 36.0, 37.0, 74.0, 46.0, 47.0, 46.0, 82.0, 36.0, 36.0, 25.0, 55.0, 25.0, 30.0, 16.0, 38.0, 13.0, 9.0, 16.0, 20.0, 4.0, 8.0, 4.0, 5.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9073486328125e-06, -1.8328428268432617e-06, -1.7583370208740234e-06, -1.6838312149047852e-06, -1.6093254089355469e-06, -1.5348196029663086e-06, -1.4603137969970703e-06, -1.385807991027832e-06, -1.3113021850585938e-06, -1.2367963790893555e-06, -1.1622905731201172e-06, -1.087784767150879e-06, -1.0132789611816406e-06, -9.387731552124023e-07, -8.642673492431641e-07, -7.897615432739258e-07, -7.152557373046875e-07, -6.407499313354492e-07, -5.662441253662109e-07, -4.917383193969727e-07, -4.172325134277344e-07, -3.427267074584961e-07, -2.682209014892578e-07, -1.9371509552001953e-07, -1.1920928955078125e-07, -4.470348358154297e-08, 2.9802322387695312e-08, 1.043081283569336e-07, 1.7881393432617188e-07, 2.5331974029541016e-07, 3.2782554626464844e-07, 4.023313522338867e-07, 4.76837158203125e-07, 5.513429641723633e-07, 6.258487701416016e-07, 7.003545761108398e-07, 7.748603820800781e-07, 8.493661880493164e-07, 9.238719940185547e-07, 9.98377799987793e-07, 1.0728836059570312e-06, 1.1473894119262695e-06, 1.2218952178955078e-06, 1.296401023864746e-06, 1.3709068298339844e-06, 1.4454126358032227e-06, 1.519918441772461e-06, 1.5944242477416992e-06, 1.6689300537109375e-06, 1.7434358596801758e-06, 1.817941665649414e-06, 1.8924474716186523e-06, 1.9669532775878906e-06, 2.041459083557129e-06, 2.115964889526367e-06, 2.1904706954956055e-06, 2.2649765014648438e-06, 2.339482307434082e-06, 2.4139881134033203e-06, 2.4884939193725586e-06, 2.562999725341797e-06, 2.637505531311035e-06, 2.7120113372802734e-06, 2.7865171432495117e-06, 2.86102294921875e-06]}, "gradients/decoder.transformer.h.21.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 7.0, 6.0, 7.0, 9.0, 5.0, 11.0, 8.0, 21.0, 14.0, 16.0, 19.0, 16.0, 26.0, 29.0, 33.0, 35.0, 41.0, 38.0, 46.0, 29.0, 34.0, 47.0, 56.0, 45.0, 37.0, 43.0, 38.0, 25.0, 22.0, 35.0, 26.0, 21.0, 20.0, 25.0, 24.0, 13.0, 23.0, 11.0, 9.0, 9.0, 7.0, 4.0, 3.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.970703125, -1.908905029296875, -1.84710693359375, -1.785308837890625, -1.7235107421875, -1.661712646484375, -1.59991455078125, -1.538116455078125, -1.476318359375, -1.414520263671875, -1.35272216796875, -1.290924072265625, -1.2291259765625, -1.167327880859375, -1.10552978515625, -1.043731689453125, -0.98193359375, -0.920135498046875, -0.85833740234375, -0.796539306640625, -0.7347412109375, -0.672943115234375, -0.61114501953125, -0.549346923828125, -0.487548828125, -0.425750732421875, -0.36395263671875, -0.302154541015625, -0.2403564453125, -0.178558349609375, -0.11676025390625, -0.054962158203125, 0.0068359375, 0.068634033203125, 0.13043212890625, 0.192230224609375, 0.2540283203125, 0.315826416015625, 0.37762451171875, 0.439422607421875, 0.501220703125, 0.563018798828125, 0.62481689453125, 0.686614990234375, 0.7484130859375, 0.810211181640625, 0.87200927734375, 0.933807373046875, 0.99560546875, 1.057403564453125, 1.11920166015625, 1.180999755859375, 1.2427978515625, 1.304595947265625, 1.36639404296875, 1.428192138671875, 1.489990234375, 1.551788330078125, 1.61358642578125, 1.675384521484375, 1.7371826171875, 1.798980712890625, 1.86077880859375, 1.922576904296875, 1.984375]}, "gradients/decoder.transformer.h.21.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 3.0, 5.0, 8.0, 9.0, 18.0, 21.0, 26.0, 48.0, 60.0, 74.0, 127.0, 158.0, 247.0, 381.0, 508.0, 791.0, 1101.0, 1570.0, 2492.0, 3891.0, 6626.0, 12439.0, 26882.0, 68520.0, 245791.0, 490397.0, 108977.0, 38213.0, 16708.0, 8465.0, 4853.0, 2957.0, 2009.0, 1335.0, 865.0, 610.0, 406.0, 282.0, 209.0, 158.0, 98.0, 58.0, 42.0, 34.0, 23.0, 21.0, 15.0, 12.0, 7.0, 5.0, 2.0, 2.0, 3.0, 2.0, 1.0], "bins": [-1.84765625, -1.79364013671875, -1.7396240234375, -1.68560791015625, -1.631591796875, -1.57757568359375, -1.5235595703125, -1.46954345703125, -1.41552734375, -1.36151123046875, -1.3074951171875, -1.25347900390625, -1.199462890625, -1.14544677734375, -1.0914306640625, -1.03741455078125, -0.9833984375, -0.92938232421875, -0.8753662109375, -0.82135009765625, -0.767333984375, -0.71331787109375, -0.6593017578125, -0.60528564453125, -0.55126953125, -0.49725341796875, -0.4432373046875, -0.38922119140625, -0.335205078125, -0.28118896484375, -0.2271728515625, -0.17315673828125, -0.119140625, -0.06512451171875, -0.0111083984375, 0.04290771484375, 0.096923828125, 0.15093994140625, 0.2049560546875, 0.25897216796875, 0.31298828125, 0.36700439453125, 0.4210205078125, 0.47503662109375, 0.529052734375, 0.58306884765625, 0.6370849609375, 0.69110107421875, 0.7451171875, 0.79913330078125, 0.8531494140625, 0.90716552734375, 0.961181640625, 1.01519775390625, 1.0692138671875, 1.12322998046875, 1.17724609375, 1.23126220703125, 1.2852783203125, 1.33929443359375, 1.393310546875, 1.44732666015625, 1.5013427734375, 1.55535888671875, 1.609375]}, "gradients/decoder.transformer.h.21.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 0.0, 4.0, 7.0, 5.0, 5.0, 8.0, 9.0, 22.0, 21.0, 15.0, 11.0, 31.0, 24.0, 29.0, 26.0, 38.0, 34.0, 38.0, 48.0, 52.0, 106.0, 1911.0, 129.0, 70.0, 47.0, 47.0, 35.0, 38.0, 37.0, 31.0, 27.0, 25.0, 22.0, 15.0, 15.0, 11.0, 11.0, 12.0, 6.0, 7.0, 7.0, 10.0, 3.0, 4.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-7.70703125, -7.45513916015625, -7.2032470703125, -6.95135498046875, -6.699462890625, -6.44757080078125, -6.1956787109375, -5.94378662109375, -5.69189453125, -5.44000244140625, -5.1881103515625, -4.93621826171875, -4.684326171875, -4.43243408203125, -4.1805419921875, -3.92864990234375, -3.6767578125, -3.42486572265625, -3.1729736328125, -2.92108154296875, -2.669189453125, -2.41729736328125, -2.1654052734375, -1.91351318359375, -1.66162109375, -1.40972900390625, -1.1578369140625, -0.90594482421875, -0.654052734375, -0.40216064453125, -0.1502685546875, 0.10162353515625, 0.353515625, 0.60540771484375, 0.8572998046875, 1.10919189453125, 1.361083984375, 1.61297607421875, 1.8648681640625, 2.11676025390625, 2.36865234375, 2.62054443359375, 2.8724365234375, 3.12432861328125, 3.376220703125, 3.62811279296875, 3.8800048828125, 4.13189697265625, 4.3837890625, 4.63568115234375, 4.8875732421875, 5.13946533203125, 5.391357421875, 5.64324951171875, 5.8951416015625, 6.14703369140625, 6.39892578125, 6.65081787109375, 6.9027099609375, 7.15460205078125, 7.406494140625, 7.65838623046875, 7.9102783203125, 8.16217041015625, 8.4140625]}, "gradients/decoder.transformer.h.21.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 4.0, 2.0, 7.0, 8.0, 6.0, 8.0, 10.0, 8.0, 13.0, 20.0, 15.0, 21.0, 26.0, 31.0, 40.0, 60.0, 90.0, 130.0, 259.0, 504.0, 2030.0, 42281.0, 3077643.0, 19984.0, 1414.0, 446.0, 205.0, 122.0, 74.0, 46.0, 40.0, 30.0, 28.0, 15.0, 12.0, 22.0, 20.0, 10.0, 6.0, 5.0, 4.0, 7.0, 4.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.828125, -15.3543701171875, -14.880615234375, -14.4068603515625, -13.93310546875, -13.4593505859375, -12.985595703125, -12.5118408203125, -12.0380859375, -11.5643310546875, -11.090576171875, -10.6168212890625, -10.14306640625, -9.6693115234375, -9.195556640625, -8.7218017578125, -8.248046875, -7.7742919921875, -7.300537109375, -6.8267822265625, -6.35302734375, -5.8792724609375, -5.405517578125, -4.9317626953125, -4.4580078125, -3.9842529296875, -3.510498046875, -3.0367431640625, -2.56298828125, -2.0892333984375, -1.615478515625, -1.1417236328125, -0.66796875, -0.1942138671875, 0.279541015625, 0.7532958984375, 1.22705078125, 1.7008056640625, 2.174560546875, 2.6483154296875, 3.1220703125, 3.5958251953125, 4.069580078125, 4.5433349609375, 5.01708984375, 5.4908447265625, 5.964599609375, 6.4383544921875, 6.912109375, 7.3858642578125, 7.859619140625, 8.3333740234375, 8.80712890625, 9.2808837890625, 9.754638671875, 10.2283935546875, 10.7021484375, 11.1759033203125, 11.649658203125, 12.1234130859375, 12.59716796875, 13.0709228515625, 13.544677734375, 14.0184326171875, 14.4921875]}, "gradients/decoder.transformer.h.21.ln_1.weight": {"_type": "histogram", "values": [1014.0, 8.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.024723529815674, 2.158104419708252, 8.340932846069336, 14.523761749267578, 20.706588745117188, 26.889415740966797, 33.07224655151367, 39.25507354736328, 45.43790054321289, 51.6207275390625, 57.803558349609375, 63.986385345458984, 70.1692123413086, 76.35203552246094, 82.53486633300781, 88.71769714355469, 94.90052032470703, 101.0833511352539, 107.26617431640625, 113.44900512695312, 119.6318359375, 125.81465911865234, 131.99749755859375, 138.18031311035156, 144.36314392089844, 150.5459747314453, 156.7288055419922, 162.91162109375, 169.09445190429688, 175.27728271484375, 181.46011352539062, 187.6429443359375, 193.82577514648438, 200.00860595703125, 206.19143676757812, 212.374267578125, 218.5570831298828, 224.7399139404297, 230.92274475097656, 237.10557556152344, 243.28839111328125, 249.47122192382812, 255.654052734375, 261.8368835449219, 268.01971435546875, 274.2025146484375, 280.3853759765625, 286.56817626953125, 292.75103759765625, 298.9338684082031, 305.11669921875, 311.2995300292969, 317.48236083984375, 323.6651611328125, 329.8480224609375, 336.03082275390625, 342.2136535644531, 348.396484375, 354.5793151855469, 360.76214599609375, 366.9449768066406, 373.1278076171875, 379.31060791015625, 385.4934387207031, 391.67626953125]}, "gradients/decoder.transformer.h.21.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 4.0, 7.0, 10.0, 7.0, 12.0, 9.0, 6.0, 19.0, 20.0, 19.0, 20.0, 20.0, 21.0, 31.0, 32.0, 28.0, 42.0, 42.0, 34.0, 41.0, 49.0, 36.0, 39.0, 40.0, 37.0, 39.0, 31.0, 36.0, 37.0, 30.0, 31.0, 25.0, 27.0, 25.0, 21.0, 17.0, 12.0, 8.0, 11.0, 8.0, 4.0, 6.0, 4.0, 4.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-24.511932373046875, -23.742786407470703, -22.973642349243164, -22.204496383666992, -21.435352325439453, -20.66620635986328, -19.89706039428711, -19.12791633605957, -18.35877227783203, -17.58962631225586, -16.82048225402832, -16.05133628845215, -15.28219223022461, -14.513046264648438, -13.743901252746582, -12.974756240844727, -12.205610275268555, -11.4364652633667, -10.667320251464844, -9.898174285888672, -9.129030227661133, -8.359884262084961, -7.5907392501831055, -6.82159423828125, -6.0524492263793945, -5.283304214477539, -4.514159202575684, -3.74501371383667, -2.9758687019348145, -2.206723690032959, -1.4375782012939453, -0.6684331893920898, 0.10071372985839844, 0.8698588609695435, 1.6390039920806885, 2.408149242401123, 3.1772942543029785, 3.946439266204834, 4.715584754943848, 5.484729766845703, 6.253874778747559, 7.023019790649414, 7.7921648025512695, 8.561309814453125, 9.330455780029297, 10.099599838256836, 10.868745803833008, 11.637890815734863, 12.407035827636719, 13.176180839538574, 13.94532585144043, 14.714471817016602, 15.48361587524414, 16.252761840820312, 17.021907806396484, 17.791051864624023, 18.560195922851562, 19.329341888427734, 20.098485946655273, 20.867631912231445, 21.636775970458984, 22.405921936035156, 23.175067901611328, 23.944211959838867, 24.71335792541504]}, "gradients/decoder.transformer.h.20.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 8.0, 5.0, 5.0, 8.0, 4.0, 9.0, 9.0, 17.0, 11.0, 19.0, 21.0, 20.0, 21.0, 25.0, 29.0, 34.0, 39.0, 46.0, 35.0, 44.0, 26.0, 36.0, 66.0, 44.0, 45.0, 41.0, 40.0, 32.0, 24.0, 27.0, 24.0, 29.0, 27.0, 18.0, 26.0, 12.0, 22.0, 11.0, 12.0, 8.0, 8.0, 6.0, 3.0, 1.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.0078125, -1.94366455078125, -1.8795166015625, -1.81536865234375, -1.751220703125, -1.68707275390625, -1.6229248046875, -1.55877685546875, -1.49462890625, -1.43048095703125, -1.3663330078125, -1.30218505859375, -1.238037109375, -1.17388916015625, -1.1097412109375, -1.04559326171875, -0.9814453125, -0.91729736328125, -0.8531494140625, -0.78900146484375, -0.724853515625, -0.66070556640625, -0.5965576171875, -0.53240966796875, -0.46826171875, -0.40411376953125, -0.3399658203125, -0.27581787109375, -0.211669921875, -0.14752197265625, -0.0833740234375, -0.01922607421875, 0.044921875, 0.10906982421875, 0.1732177734375, 0.23736572265625, 0.301513671875, 0.36566162109375, 0.4298095703125, 0.49395751953125, 0.55810546875, 0.62225341796875, 0.6864013671875, 0.75054931640625, 0.814697265625, 0.87884521484375, 0.9429931640625, 1.00714111328125, 1.0712890625, 1.13543701171875, 1.1995849609375, 1.26373291015625, 1.327880859375, 1.39202880859375, 1.4561767578125, 1.52032470703125, 1.58447265625, 1.64862060546875, 1.7127685546875, 1.77691650390625, 1.841064453125, 1.90521240234375, 1.9693603515625, 2.03350830078125, 2.09765625]}, "gradients/decoder.transformer.h.20.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 3.0, 6.0, 6.0, 9.0, 8.0, 18.0, 10.0, 20.0, 26.0, 21.0, 32.0, 40.0, 59.0, 71.0, 102.0, 183.0, 294.0, 677.0, 1839.0, 7880.0, 57567.0, 1662049.0, 2374439.0, 75804.0, 9411.0, 2102.0, 682.0, 329.0, 164.0, 87.0, 66.0, 66.0, 40.0, 32.0, 26.0, 32.0, 15.0, 13.0, 8.0, 13.0, 10.0, 9.0, 6.0, 1.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-8.8671875, -8.590576171875, -8.31396484375, -8.037353515625, -7.7607421875, -7.484130859375, -7.20751953125, -6.930908203125, -6.654296875, -6.377685546875, -6.10107421875, -5.824462890625, -5.5478515625, -5.271240234375, -4.99462890625, -4.718017578125, -4.44140625, -4.164794921875, -3.88818359375, -3.611572265625, -3.3349609375, -3.058349609375, -2.78173828125, -2.505126953125, -2.228515625, -1.951904296875, -1.67529296875, -1.398681640625, -1.1220703125, -0.845458984375, -0.56884765625, -0.292236328125, -0.015625, 0.260986328125, 0.53759765625, 0.814208984375, 1.0908203125, 1.367431640625, 1.64404296875, 1.920654296875, 2.197265625, 2.473876953125, 2.75048828125, 3.027099609375, 3.3037109375, 3.580322265625, 3.85693359375, 4.133544921875, 4.41015625, 4.686767578125, 4.96337890625, 5.239990234375, 5.5166015625, 5.793212890625, 6.06982421875, 6.346435546875, 6.623046875, 6.899658203125, 7.17626953125, 7.452880859375, 7.7294921875, 8.006103515625, 8.28271484375, 8.559326171875, 8.8359375]}, "gradients/decoder.transformer.h.20.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 8.0, 8.0, 11.0, 22.0, 50.0, 73.0, 145.0, 225.0, 471.0, 781.0, 946.0, 601.0, 343.0, 163.0, 88.0, 46.0, 36.0, 28.0, 17.0, 4.0, 4.0, 0.0, 4.0, 1.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.0625, -11.76263427734375, -11.4627685546875, -11.16290283203125, -10.863037109375, -10.56317138671875, -10.2633056640625, -9.96343994140625, -9.66357421875, -9.36370849609375, -9.0638427734375, -8.76397705078125, -8.464111328125, -8.16424560546875, -7.8643798828125, -7.56451416015625, -7.2646484375, -6.96478271484375, -6.6649169921875, -6.36505126953125, -6.065185546875, -5.76531982421875, -5.4654541015625, -5.16558837890625, -4.86572265625, -4.56585693359375, -4.2659912109375, -3.96612548828125, -3.666259765625, -3.36639404296875, -3.0665283203125, -2.76666259765625, -2.466796875, -2.16693115234375, -1.8670654296875, -1.56719970703125, -1.267333984375, -0.96746826171875, -0.6676025390625, -0.36773681640625, -0.06787109375, 0.23199462890625, 0.5318603515625, 0.83172607421875, 1.131591796875, 1.43145751953125, 1.7313232421875, 2.03118896484375, 2.3310546875, 2.63092041015625, 2.9307861328125, 3.23065185546875, 3.530517578125, 3.83038330078125, 4.1302490234375, 4.43011474609375, 4.72998046875, 5.02984619140625, 5.3297119140625, 5.62957763671875, 5.929443359375, 6.22930908203125, 6.5291748046875, 6.82904052734375, 7.12890625]}, "gradients/decoder.transformer.h.20.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 9.0, 7.0, 10.0, 22.0, 47.0, 61.0, 91.0, 176.0, 346.0, 810.0, 2977.0, 26119.0, 770394.0, 3304544.0, 80189.0, 6194.0, 1315.0, 467.0, 229.0, 125.0, 63.0, 37.0, 20.0, 16.0, 5.0, 6.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-18.28125, -17.655029296875, -17.02880859375, -16.402587890625, -15.7763671875, -15.150146484375, -14.52392578125, -13.897705078125, -13.271484375, -12.645263671875, -12.01904296875, -11.392822265625, -10.7666015625, -10.140380859375, -9.51416015625, -8.887939453125, -8.26171875, -7.635498046875, -7.00927734375, -6.383056640625, -5.7568359375, -5.130615234375, -4.50439453125, -3.878173828125, -3.251953125, -2.625732421875, -1.99951171875, -1.373291015625, -0.7470703125, -0.120849609375, 0.50537109375, 1.131591796875, 1.7578125, 2.384033203125, 3.01025390625, 3.636474609375, 4.2626953125, 4.888916015625, 5.51513671875, 6.141357421875, 6.767578125, 7.393798828125, 8.02001953125, 8.646240234375, 9.2724609375, 9.898681640625, 10.52490234375, 11.151123046875, 11.77734375, 12.403564453125, 13.02978515625, 13.656005859375, 14.2822265625, 14.908447265625, 15.53466796875, 16.160888671875, 16.787109375, 17.413330078125, 18.03955078125, 18.665771484375, 19.2919921875, 19.918212890625, 20.54443359375, 21.170654296875, 21.796875]}, "gradients/decoder.transformer.h.20.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 6.0, 77.0, 317.0, 433.0, 150.0, 28.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-156.2011260986328, -152.41717529296875, -148.63323974609375, -144.8492889404297, -141.06533813476562, -137.28140258789062, -133.49745178222656, -129.7135009765625, -125.9295654296875, -122.14562225341797, -118.3616714477539, -114.57772827148438, -110.79378509521484, -107.00984191894531, -103.22589111328125, -99.44194793701172, -95.65799713134766, -91.87405395507812, -88.09010314941406, -84.30615997314453, -80.522216796875, -76.73826599121094, -72.9543228149414, -69.17037963867188, -65.38642883300781, -61.602481842041016, -57.818538665771484, -54.03459167480469, -50.250648498535156, -46.46670150756836, -42.68275451660156, -38.89881134033203, -35.1148681640625, -31.330923080444336, -27.546977996826172, -23.763031005859375, -19.97908592224121, -16.195140838623047, -12.41119384765625, -8.627248764038086, -4.843303680419922, -1.0593581199645996, 2.7245874404907227, 6.508533477783203, 10.292478561401367, 14.076423645019531, 17.860370635986328, 21.644315719604492, 25.428260803222656, 29.21220588684082, 32.996150970458984, 36.78009796142578, 40.56404113769531, 44.34798812866211, 48.131935119628906, 51.91587829589844, 55.699825286865234, 59.48377227783203, 63.26771545410156, 67.05166625976562, 70.83560943603516, 74.61955261230469, 78.40350341796875, 82.18744659423828, 85.97138977050781]}, "gradients/decoder.transformer.h.20.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 4.0, 3.0, 3.0, 3.0, 6.0, 1.0, 13.0, 11.0, 8.0, 11.0, 23.0, 12.0, 19.0, 21.0, 22.0, 33.0, 27.0, 33.0, 36.0, 49.0, 37.0, 46.0, 45.0, 48.0, 60.0, 31.0, 44.0, 43.0, 31.0, 32.0, 32.0, 26.0, 31.0, 23.0, 25.0, 21.0, 18.0, 16.0, 12.0, 11.0, 13.0, 6.0, 5.0, 5.0, 8.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-19.76416778564453, -19.17383575439453, -18.583505630493164, -17.993173599243164, -17.402841567993164, -16.812511444091797, -16.222179412841797, -15.631847381591797, -15.041516304016113, -14.45118522644043, -13.86085319519043, -13.270522117614746, -12.680191040039062, -12.089859008789062, -11.499527931213379, -10.909196853637695, -10.318864822387695, -9.728533744812012, -9.138201713562012, -8.547870635986328, -7.957539081573486, -7.3672075271606445, -6.776876449584961, -6.186544895172119, -5.596213340759277, -5.0058817863464355, -4.415550231933594, -3.82521915435791, -3.2348875999450684, -2.6445560455322266, -2.054224729537964, -1.4638934135437012, -0.8735618591308594, -0.28323042392730713, 0.3071010112762451, 0.8974324464797974, 1.4877638816833496, 2.0780954360961914, 2.668426752090454, 3.258758068084717, 3.8490896224975586, 4.4394211769104, 5.029752731323242, 5.620083808898926, 6.210415363311768, 6.800746917724609, 7.391077995300293, 7.981409549713135, 8.571741104125977, 9.16207218170166, 9.75240421295166, 10.342735290527344, 10.933067321777344, 11.523398399353027, 12.113729476928711, 12.704061508178711, 13.294392585754395, 13.884723663330078, 14.475055694580078, 15.065386772155762, 15.655717849731445, 16.246049880981445, 16.836381912231445, 17.426712036132812, 18.017044067382812]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 2.0, 5.0, 8.0, 3.0, 5.0, 5.0, 7.0, 7.0, 5.0, 14.0, 15.0, 15.0, 11.0, 20.0, 22.0, 29.0, 35.0, 36.0, 33.0, 36.0, 36.0, 38.0, 42.0, 34.0, 56.0, 47.0, 57.0, 47.0, 32.0, 31.0, 27.0, 25.0, 36.0, 24.0, 24.0, 25.0, 26.0, 16.0, 21.0, 9.0, 8.0, 9.0, 12.0, 2.0, 5.0, 4.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.974609375, -1.905792236328125, -1.83697509765625, -1.768157958984375, -1.6993408203125, -1.630523681640625, -1.56170654296875, -1.492889404296875, -1.424072265625, -1.355255126953125, -1.28643798828125, -1.217620849609375, -1.1488037109375, -1.079986572265625, -1.01116943359375, -0.942352294921875, -0.87353515625, -0.804718017578125, -0.73590087890625, -0.667083740234375, -0.5982666015625, -0.529449462890625, -0.46063232421875, -0.391815185546875, -0.322998046875, -0.254180908203125, -0.18536376953125, -0.116546630859375, -0.0477294921875, 0.021087646484375, 0.08990478515625, 0.158721923828125, 0.2275390625, 0.296356201171875, 0.36517333984375, 0.433990478515625, 0.5028076171875, 0.571624755859375, 0.64044189453125, 0.709259033203125, 0.778076171875, 0.846893310546875, 0.91571044921875, 0.984527587890625, 1.0533447265625, 1.122161865234375, 1.19097900390625, 1.259796142578125, 1.32861328125, 1.397430419921875, 1.46624755859375, 1.535064697265625, 1.6038818359375, 1.672698974609375, 1.74151611328125, 1.810333251953125, 1.879150390625, 1.947967529296875, 2.01678466796875, 2.085601806640625, 2.1544189453125, 2.223236083984375, 2.29205322265625, 2.360870361328125, 2.4296875]}, "gradients/decoder.transformer.h.20.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 4.0, 6.0, 12.0, 12.0, 29.0, 33.0, 54.0, 67.0, 129.0, 168.0, 258.0, 386.0, 610.0, 932.0, 1392.0, 2049.0, 3453.0, 5365.0, 8481.0, 13669.0, 22090.0, 37067.0, 64273.0, 125626.0, 333575.0, 209104.0, 90966.0, 50548.0, 30006.0, 17992.0, 10983.0, 6944.0, 4223.0, 2757.0, 1885.0, 1164.0, 738.0, 507.0, 349.0, 217.0, 154.0, 96.0, 71.0, 37.0, 29.0, 21.0, 15.0, 7.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.06640625, -0.06438064575195312, -0.06235504150390625, -0.060329437255859375, -0.0583038330078125, -0.056278228759765625, -0.05425262451171875, -0.052227020263671875, -0.050201416015625, -0.048175811767578125, -0.04615020751953125, -0.044124603271484375, -0.0420989990234375, -0.040073394775390625, -0.03804779052734375, -0.036022186279296875, -0.03399658203125, -0.031970977783203125, -0.02994537353515625, -0.027919769287109375, -0.0258941650390625, -0.023868560791015625, -0.02184295654296875, -0.019817352294921875, -0.017791748046875, -0.015766143798828125, -0.01374053955078125, -0.011714935302734375, -0.0096893310546875, -0.007663726806640625, -0.00563812255859375, -0.003612518310546875, -0.0015869140625, 0.000438690185546875, 0.00246429443359375, 0.004489898681640625, 0.0065155029296875, 0.008541107177734375, 0.01056671142578125, 0.012592315673828125, 0.014617919921875, 0.016643524169921875, 0.01866912841796875, 0.020694732666015625, 0.0227203369140625, 0.024745941162109375, 0.02677154541015625, 0.028797149658203125, 0.03082275390625, 0.032848358154296875, 0.03487396240234375, 0.036899566650390625, 0.0389251708984375, 0.040950775146484375, 0.04297637939453125, 0.045001983642578125, 0.047027587890625, 0.049053192138671875, 0.05107879638671875, 0.053104400634765625, 0.0551300048828125, 0.057155609130859375, 0.05918121337890625, 0.061206817626953125, 0.063232421875]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 6.0, 2.0, 4.0, 4.0, 6.0, 8.0, 6.0, 7.0, 8.0, 5.0, 12.0, 9.0, 11.0, 17.0, 21.0, 23.0, 39.0, 25.0, 28.0, 42.0, 32.0, 29.0, 30.0, 37.0, 37.0, 1067.0, 57.0, 40.0, 46.0, 41.0, 38.0, 23.0, 49.0, 35.0, 18.0, 31.0, 19.0, 16.0, 22.0, 16.0, 15.0, 16.0, 11.0, 6.0, 7.0, 4.0, 5.0, 3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.318359375, -1.2755279541015625, -1.232696533203125, -1.1898651123046875, -1.14703369140625, -1.1042022705078125, -1.061370849609375, -1.0185394287109375, -0.9757080078125, -0.9328765869140625, -0.890045166015625, -0.8472137451171875, -0.80438232421875, -0.7615509033203125, -0.718719482421875, -0.6758880615234375, -0.633056640625, -0.5902252197265625, -0.547393798828125, -0.5045623779296875, -0.46173095703125, -0.4188995361328125, -0.376068115234375, -0.3332366943359375, -0.2904052734375, -0.2475738525390625, -0.204742431640625, -0.1619110107421875, -0.11907958984375, -0.0762481689453125, -0.033416748046875, 0.0094146728515625, 0.05224609375, 0.0950775146484375, 0.137908935546875, 0.1807403564453125, 0.22357177734375, 0.2664031982421875, 0.309234619140625, 0.3520660400390625, 0.3948974609375, 0.4377288818359375, 0.480560302734375, 0.5233917236328125, 0.56622314453125, 0.6090545654296875, 0.651885986328125, 0.6947174072265625, 0.737548828125, 0.7803802490234375, 0.823211669921875, 0.8660430908203125, 0.90887451171875, 0.9517059326171875, 0.994537353515625, 1.0373687744140625, 1.0802001953125, 1.1230316162109375, 1.165863037109375, 1.2086944580078125, 1.25152587890625, 1.2943572998046875, 1.337188720703125, 1.3800201416015625, 1.4228515625]}, "gradients/decoder.transformer.h.20.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 6.0, 6.0, 9.0, 7.0, 19.0, 25.0, 35.0, 36.0, 72.0, 111.0, 192.0, 258.0, 405.0, 575.0, 880.0, 1383.0, 2158.0, 3363.0, 5356.0, 8559.0, 13760.0, 23463.0, 40255.0, 73575.0, 150833.0, 1413088.0, 170263.0, 80132.0, 43736.0, 24739.0, 14896.0, 9049.0, 5671.0, 3587.0, 2296.0, 1557.0, 971.0, 619.0, 414.0, 241.0, 176.0, 123.0, 83.0, 54.0, 30.0, 36.0, 14.0, 7.0, 5.0, 6.0, 6.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.057647705078125, -0.05576181411743164, -0.05387592315673828, -0.05199003219604492, -0.05010414123535156, -0.0482182502746582, -0.046332359313964844, -0.044446468353271484, -0.042560577392578125, -0.040674686431884766, -0.038788795471191406, -0.03690290451049805, -0.03501701354980469, -0.03313112258911133, -0.03124523162841797, -0.02935934066772461, -0.02747344970703125, -0.02558755874633789, -0.02370166778564453, -0.021815776824951172, -0.019929885864257812, -0.018043994903564453, -0.016158103942871094, -0.014272212982177734, -0.012386322021484375, -0.010500431060791016, -0.008614540100097656, -0.006728649139404297, -0.0048427581787109375, -0.002956867218017578, -0.0010709762573242188, 0.0008149147033691406, 0.0027008056640625, 0.004586696624755859, 0.006472587585449219, 0.008358478546142578, 0.010244369506835938, 0.012130260467529297, 0.014016151428222656, 0.015902042388916016, 0.017787933349609375, 0.019673824310302734, 0.021559715270996094, 0.023445606231689453, 0.025331497192382812, 0.027217388153076172, 0.02910327911376953, 0.03098917007446289, 0.03287506103515625, 0.03476095199584961, 0.03664684295654297, 0.03853273391723633, 0.04041862487792969, 0.04230451583862305, 0.044190406799316406, 0.046076297760009766, 0.047962188720703125, 0.049848079681396484, 0.051733970642089844, 0.0536198616027832, 0.05550575256347656, 0.05739164352416992, 0.05927753448486328, 0.06116342544555664, 0.06304931640625]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 1.0, 1.0, 2.0, 7.0, 3.0, 11.0, 10.0, 8.0, 9.0, 13.0, 20.0, 19.0, 21.0, 23.0, 29.0, 32.0, 59.0, 61.0, 65.0, 69.0, 77.0, 65.0, 54.0, 65.0, 36.0, 46.0, 38.0, 21.0, 24.0, 23.0, 13.0, 10.0, 17.0, 9.0, 7.0, 4.0, 5.0, 9.0, 6.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 3.0], "bins": [-4.5299530029296875e-06, -4.405155777931213e-06, -4.280358552932739e-06, -4.155561327934265e-06, -4.030764102935791e-06, -3.905966877937317e-06, -3.7811696529388428e-06, -3.6563724279403687e-06, -3.5315752029418945e-06, -3.4067779779434204e-06, -3.2819807529449463e-06, -3.157183527946472e-06, -3.032386302947998e-06, -2.907589077949524e-06, -2.78279185295105e-06, -2.6579946279525757e-06, -2.5331974029541016e-06, -2.4084001779556274e-06, -2.2836029529571533e-06, -2.158805727958679e-06, -2.034008502960205e-06, -1.909211277961731e-06, -1.7844140529632568e-06, -1.6596168279647827e-06, -1.5348196029663086e-06, -1.4100223779678345e-06, -1.2852251529693604e-06, -1.1604279279708862e-06, -1.0356307029724121e-06, -9.10833477973938e-07, -7.860362529754639e-07, -6.612390279769897e-07, -5.364418029785156e-07, -4.116445779800415e-07, -2.868473529815674e-07, -1.6205012798309326e-07, -3.725290298461914e-08, 8.754432201385498e-08, 2.123415470123291e-07, 3.371387720108032e-07, 4.6193599700927734e-07, 5.867332220077515e-07, 7.115304470062256e-07, 8.363276720046997e-07, 9.611248970031738e-07, 1.085922122001648e-06, 1.210719347000122e-06, 1.3355165719985962e-06, 1.4603137969970703e-06, 1.5851110219955444e-06, 1.7099082469940186e-06, 1.8347054719924927e-06, 1.959502696990967e-06, 2.084299921989441e-06, 2.209097146987915e-06, 2.333894371986389e-06, 2.4586915969848633e-06, 2.5834888219833374e-06, 2.7082860469818115e-06, 2.8330832719802856e-06, 2.9578804969787598e-06, 3.082677721977234e-06, 3.207474946975708e-06, 3.332272171974182e-06, 3.4570693969726562e-06]}, "gradients/decoder.transformer.h.20.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 6.0, 6.0, 7.0, 4.0, 9.0, 9.0, 16.0, 15.0, 25.0, 41.0, 47.0, 76.0, 90.0, 134.0, 154.0, 281.0, 553.0, 4790.0, 705091.0, 333658.0, 2191.0, 483.0, 261.0, 177.0, 106.0, 84.0, 49.0, 50.0, 38.0, 30.0, 25.0, 13.0, 12.0, 8.0, 5.0, 7.0, 2.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.109476089477539e-05, -5.882326513528824e-05, -5.6551769375801086e-05, -5.4280273616313934e-05, -5.200877785682678e-05, -4.973728209733963e-05, -4.746578633785248e-05, -4.5194290578365326e-05, -4.2922794818878174e-05, -4.065129905939102e-05, -3.837980329990387e-05, -3.610830754041672e-05, -3.3836811780929565e-05, -3.156531602144241e-05, -2.929382026195526e-05, -2.702232450246811e-05, -2.4750828742980957e-05, -2.2479332983493805e-05, -2.0207837224006653e-05, -1.79363414645195e-05, -1.566484570503235e-05, -1.3393349945545197e-05, -1.1121854186058044e-05, -8.850358426570892e-06, -6.57886266708374e-06, -4.307366907596588e-06, -2.035871148109436e-06, 2.3562461137771606e-07, 2.507120370864868e-06, 4.77861613035202e-06, 7.050111889839172e-06, 9.321607649326324e-06, 1.1593103408813477e-05, 1.3864599168300629e-05, 1.613609492778778e-05, 1.8407590687274933e-05, 2.0679086446762085e-05, 2.2950582206249237e-05, 2.522207796573639e-05, 2.749357372522354e-05, 2.9765069484710693e-05, 3.2036565244197845e-05, 3.4308061003685e-05, 3.657955676317215e-05, 3.88510525226593e-05, 4.1122548282146454e-05, 4.3394044041633606e-05, 4.566553980112076e-05, 4.793703556060791e-05, 5.020853132009506e-05, 5.2480027079582214e-05, 5.4751522839069366e-05, 5.702301859855652e-05, 5.929451435804367e-05, 6.156601011753082e-05, 6.383750587701797e-05, 6.610900163650513e-05, 6.838049739599228e-05, 7.065199315547943e-05, 7.292348891496658e-05, 7.519498467445374e-05, 7.746648043394089e-05, 7.973797619342804e-05, 8.200947195291519e-05, 8.428096771240234e-05]}, "gradients/decoder.transformer.h.20.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 6.0, 34.0, 84.0, 204.0, 310.0, 231.0, 106.0, 30.0, 4.0, 4.0, 1.0, 0.0, 2.0], "bins": [-1.4440538507187739e-05, -1.4180430298438296e-05, -1.3920322999183554e-05, -1.3660214790434111e-05, -1.340010749117937e-05, -1.3139999282429926e-05, -1.2879891073680483e-05, -1.2619783774425741e-05, -1.2359675565676298e-05, -1.2099567356926855e-05, -1.1839460057672113e-05, -1.157935184892267e-05, -1.1319243640173227e-05, -1.1059136340918485e-05, -1.0799028132169042e-05, -1.05389208329143e-05, -1.0278812624164857e-05, -1.0018704415415414e-05, -9.758597116160672e-06, -9.498488907411229e-06, -9.238381608156487e-06, -8.978273399407044e-06, -8.7181651906576e-06, -8.45805789140286e-06, -8.197949682653416e-06, -7.937841473903973e-06, -7.677734174649231e-06, -7.417625965899788e-06, -7.1575182118976954e-06, -6.897410457895603e-06, -6.63730224914616e-06, -6.377194495144067e-06, -6.117086741141975e-06, -5.8569789871398825e-06, -5.59687123313779e-06, -5.336763024388347e-06, -5.076655270386254e-06, -4.816547516384162e-06, -4.556439307634719e-06, -4.296331553632626e-06, -4.036223799630534e-06, -3.7761160456284415e-06, -3.5160080642526736e-06, -3.255900082876906e-06, -2.9957923288748134e-06, -2.735684574872721e-06, -2.475576593496953e-06, -2.2154686121211853e-06, -1.9553606307454174e-06, -1.6952527630564873e-06, -1.4351448953675572e-06, -1.175037027678627e-06, -9.149291599896969e-07, -6.548212923007668e-07, -3.9471342461183667e-07, -1.3460555692290654e-07, 1.2550231076602358e-07, 3.856101784549537e-07, 6.457180461438838e-07, 9.05825913832814e-07, 1.165933781521744e-06, 1.4260416492106742e-06, 1.6861495168996044e-06, 1.9462572709016968e-06, 2.2063652522774646e-06]}, "gradients/decoder.transformer.h.20.ln_cross_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 6.0, 0.0, 2.0, 5.0, 6.0, 8.0, 4.0, 15.0, 14.0, 21.0, 17.0, 15.0, 21.0, 28.0, 25.0, 34.0, 36.0, 23.0, 57.0, 31.0, 36.0, 40.0, 46.0, 36.0, 35.0, 35.0, 29.0, 87.0, 29.0, 37.0, 24.0, 21.0, 25.0, 23.0, 20.0, 17.0, 29.0, 9.0, 13.0, 12.0, 6.0, 13.0, 5.0, 5.0, 2.0, 5.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.1457672119140625e-06, -2.0796433091163635e-06, -2.0135194063186646e-06, -1.9473955035209656e-06, -1.8812716007232666e-06, -1.8151476979255676e-06, -1.7490237951278687e-06, -1.6828998923301697e-06, -1.6167759895324707e-06, -1.5506520867347717e-06, -1.4845281839370728e-06, -1.4184042811393738e-06, -1.3522803783416748e-06, -1.2861564755439758e-06, -1.2200325727462769e-06, -1.1539086699485779e-06, -1.087784767150879e-06, -1.02166086435318e-06, -9.55536961555481e-07, -8.89413058757782e-07, -8.23289155960083e-07, -7.57165253162384e-07, -6.910413503646851e-07, -6.249174475669861e-07, -5.587935447692871e-07, -4.926696419715881e-07, -4.2654573917388916e-07, -3.604218363761902e-07, -2.942979335784912e-07, -2.2817403078079224e-07, -1.6205012798309326e-07, -9.592622518539429e-08, -2.9802322387695312e-08, 3.632158041000366e-08, 1.0244548320770264e-07, 1.685693860054016e-07, 2.3469328880310059e-07, 3.0081719160079956e-07, 3.6694109439849854e-07, 4.330649971961975e-07, 4.991888999938965e-07, 5.653128027915955e-07, 6.314367055892944e-07, 6.975606083869934e-07, 7.636845111846924e-07, 8.298084139823914e-07, 8.959323167800903e-07, 9.620562195777893e-07, 1.0281801223754883e-06, 1.0943040251731873e-06, 1.1604279279708862e-06, 1.2265518307685852e-06, 1.2926757335662842e-06, 1.3587996363639832e-06, 1.4249235391616821e-06, 1.491047441959381e-06, 1.55717134475708e-06, 1.623295247554779e-06, 1.689419150352478e-06, 1.755543053150177e-06, 1.821666955947876e-06, 1.887790858745575e-06, 1.953914761543274e-06, 2.020038664340973e-06, 2.086162567138672e-06]}, "gradients/decoder.transformer.h.20.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 2.0, 5.0, 8.0, 3.0, 5.0, 5.0, 7.0, 7.0, 5.0, 14.0, 15.0, 15.0, 11.0, 20.0, 22.0, 29.0, 35.0, 36.0, 33.0, 36.0, 36.0, 38.0, 42.0, 34.0, 56.0, 47.0, 57.0, 47.0, 32.0, 31.0, 27.0, 25.0, 36.0, 24.0, 24.0, 25.0, 26.0, 16.0, 21.0, 9.0, 8.0, 9.0, 12.0, 2.0, 5.0, 4.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.974609375, -1.905792236328125, -1.83697509765625, -1.768157958984375, -1.6993408203125, -1.630523681640625, -1.56170654296875, -1.492889404296875, -1.424072265625, -1.355255126953125, -1.28643798828125, -1.217620849609375, -1.1488037109375, -1.079986572265625, -1.01116943359375, -0.942352294921875, -0.87353515625, -0.804718017578125, -0.73590087890625, -0.667083740234375, -0.5982666015625, -0.529449462890625, -0.46063232421875, -0.391815185546875, -0.322998046875, -0.254180908203125, -0.18536376953125, -0.116546630859375, -0.0477294921875, 0.021087646484375, 0.08990478515625, 0.158721923828125, 0.2275390625, 0.296356201171875, 0.36517333984375, 0.433990478515625, 0.5028076171875, 0.571624755859375, 0.64044189453125, 0.709259033203125, 0.778076171875, 0.846893310546875, 0.91571044921875, 0.984527587890625, 1.0533447265625, 1.122161865234375, 1.19097900390625, 1.259796142578125, 1.32861328125, 1.397430419921875, 1.46624755859375, 1.535064697265625, 1.6038818359375, 1.672698974609375, 1.74151611328125, 1.810333251953125, 1.879150390625, 1.947967529296875, 2.01678466796875, 2.085601806640625, 2.1544189453125, 2.223236083984375, 2.29205322265625, 2.360870361328125, 2.4296875]}, "gradients/decoder.transformer.h.20.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 6.0, 4.0, 3.0, 6.0, 6.0, 8.0, 13.0, 10.0, 20.0, 24.0, 37.0, 53.0, 73.0, 87.0, 129.0, 175.0, 236.0, 382.0, 600.0, 1011.0, 1711.0, 3248.0, 6364.0, 13788.0, 36562.0, 131906.0, 592539.0, 180894.0, 46016.0, 16493.0, 7450.0, 3616.0, 1914.0, 1130.0, 659.0, 445.0, 295.0, 189.0, 134.0, 83.0, 53.0, 50.0, 40.0, 27.0, 17.0, 15.0, 7.0, 8.0, 5.0, 2.0, 3.0, 4.0, 2.0, 0.0, 3.0, 4.0, 6.0], "bins": [-2.27734375, -2.209564208984375, -2.14178466796875, -2.074005126953125, -2.0062255859375, -1.938446044921875, -1.87066650390625, -1.802886962890625, -1.735107421875, -1.667327880859375, -1.59954833984375, -1.531768798828125, -1.4639892578125, -1.396209716796875, -1.32843017578125, -1.260650634765625, -1.19287109375, -1.125091552734375, -1.05731201171875, -0.989532470703125, -0.9217529296875, -0.853973388671875, -0.78619384765625, -0.718414306640625, -0.650634765625, -0.582855224609375, -0.51507568359375, -0.447296142578125, -0.3795166015625, -0.311737060546875, -0.24395751953125, -0.176177978515625, -0.1083984375, -0.040618896484375, 0.02716064453125, 0.094940185546875, 0.1627197265625, 0.230499267578125, 0.29827880859375, 0.366058349609375, 0.433837890625, 0.501617431640625, 0.56939697265625, 0.637176513671875, 0.7049560546875, 0.772735595703125, 0.84051513671875, 0.908294677734375, 0.97607421875, 1.043853759765625, 1.11163330078125, 1.179412841796875, 1.2471923828125, 1.314971923828125, 1.38275146484375, 1.450531005859375, 1.518310546875, 1.586090087890625, 1.65386962890625, 1.721649169921875, 1.7894287109375, 1.857208251953125, 1.92498779296875, 1.992767333984375, 2.060546875]}, "gradients/decoder.transformer.h.20.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 10.0, 3.0, 10.0, 17.0, 11.0, 14.0, 28.0, 30.0, 27.0, 23.0, 32.0, 30.0, 36.0, 42.0, 55.0, 59.0, 153.0, 1808.0, 184.0, 79.0, 58.0, 39.0, 38.0, 35.0, 32.0, 33.0, 29.0, 29.0, 29.0, 23.0, 19.0, 8.0, 7.0, 7.0, 5.0, 8.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.8828125, -8.6015625, -8.3203125, -8.0390625, -7.7578125, -7.4765625, -7.1953125, -6.9140625, -6.6328125, -6.3515625, -6.0703125, -5.7890625, -5.5078125, -5.2265625, -4.9453125, -4.6640625, -4.3828125, -4.1015625, -3.8203125, -3.5390625, -3.2578125, -2.9765625, -2.6953125, -2.4140625, -2.1328125, -1.8515625, -1.5703125, -1.2890625, -1.0078125, -0.7265625, -0.4453125, -0.1640625, 0.1171875, 0.3984375, 0.6796875, 0.9609375, 1.2421875, 1.5234375, 1.8046875, 2.0859375, 2.3671875, 2.6484375, 2.9296875, 3.2109375, 3.4921875, 3.7734375, 4.0546875, 4.3359375, 4.6171875, 4.8984375, 5.1796875, 5.4609375, 5.7421875, 6.0234375, 6.3046875, 6.5859375, 6.8671875, 7.1484375, 7.4296875, 7.7109375, 7.9921875, 8.2734375, 8.5546875, 8.8359375, 9.1171875]}, "gradients/decoder.transformer.h.20.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 6.0, 4.0, 8.0, 12.0, 13.0, 21.0, 24.0, 40.0, 34.0, 43.0, 46.0, 78.0, 126.0, 258.0, 593.0, 1821.0, 19968.0, 2997662.0, 119143.0, 4082.0, 857.0, 328.0, 175.0, 95.0, 61.0, 44.0, 25.0, 29.0, 28.0, 21.0, 16.0, 14.0, 9.0, 8.0, 3.0, 8.0, 2.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.109375, -13.6644287109375, -13.219482421875, -12.7745361328125, -12.32958984375, -11.8846435546875, -11.439697265625, -10.9947509765625, -10.5498046875, -10.1048583984375, -9.659912109375, -9.2149658203125, -8.77001953125, -8.3250732421875, -7.880126953125, -7.4351806640625, -6.990234375, -6.5452880859375, -6.100341796875, -5.6553955078125, -5.21044921875, -4.7655029296875, -4.320556640625, -3.8756103515625, -3.4306640625, -2.9857177734375, -2.540771484375, -2.0958251953125, -1.65087890625, -1.2059326171875, -0.760986328125, -0.3160400390625, 0.12890625, 0.5738525390625, 1.018798828125, 1.4637451171875, 1.90869140625, 2.3536376953125, 2.798583984375, 3.2435302734375, 3.6884765625, 4.1334228515625, 4.578369140625, 5.0233154296875, 5.46826171875, 5.9132080078125, 6.358154296875, 6.8031005859375, 7.248046875, 7.6929931640625, 8.137939453125, 8.5828857421875, 9.02783203125, 9.4727783203125, 9.917724609375, 10.3626708984375, 10.8076171875, 11.2525634765625, 11.697509765625, 12.1424560546875, 12.58740234375, 13.0323486328125, 13.477294921875, 13.9222412109375, 14.3671875]}, "gradients/decoder.transformer.h.20.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 377.0, 640.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.75509262084961, -17.15192413330078, -11.548757553100586, -5.945590972900391, -0.3424224853515625, 5.260746002197266, 10.863910675048828, 16.467079162597656, 22.070247650146484, 27.673416137695312, 33.276580810546875, 38.8797492980957, 44.48291778564453, 50.08608627319336, 55.68925094604492, 61.29241943359375, 66.89558410644531, 72.49874877929688, 78.10192108154297, 83.70508575439453, 89.30825805664062, 94.91142272949219, 100.51458740234375, 106.11775207519531, 111.72093200683594, 117.3240966796875, 122.9272689819336, 128.53042602539062, 134.13360595703125, 139.7367706298828, 145.33993530273438, 150.94309997558594, 156.5462646484375, 162.14942932128906, 167.75259399414062, 173.35577392578125, 178.9589385986328, 184.56210327148438, 190.16526794433594, 195.7684326171875, 201.37161254882812, 206.9747772216797, 212.57794189453125, 218.18112182617188, 223.78428649902344, 229.387451171875, 234.99061584472656, 240.59378051757812, 246.1969451904297, 251.80010986328125, 257.4032897949219, 263.0064392089844, 268.609619140625, 274.2127685546875, 279.8159484863281, 285.41912841796875, 291.02227783203125, 296.6254577636719, 302.2286071777344, 307.831787109375, 313.4349365234375, 319.0381164550781, 324.64129638671875, 330.24444580078125, 335.8476257324219]}, "gradients/decoder.transformer.h.20.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 1.0, 4.0, 5.0, 6.0, 8.0, 12.0, 10.0, 10.0, 23.0, 19.0, 19.0, 18.0, 32.0, 37.0, 31.0, 30.0, 28.0, 32.0, 40.0, 41.0, 44.0, 42.0, 47.0, 52.0, 44.0, 33.0, 36.0, 32.0, 25.0, 34.0, 33.0, 21.0, 19.0, 33.0, 16.0, 19.0, 22.0, 10.0, 8.0, 6.0, 3.0, 6.0, 7.0, 2.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-26.201519012451172, -25.408926010131836, -24.6163330078125, -23.823740005493164, -23.031147003173828, -22.238554000854492, -21.445960998535156, -20.65336799621582, -19.860774993896484, -19.06818199157715, -18.275588989257812, -17.482995986938477, -16.69040298461914, -15.897809982299805, -15.105216979980469, -14.312623977661133, -13.520030975341797, -12.727437973022461, -11.934844970703125, -11.142251968383789, -10.349658966064453, -9.557065963745117, -8.764472961425781, -7.971879959106445, -7.179286956787109, -6.386693954467773, -5.5941009521484375, -4.801507949829102, -4.008914947509766, -3.2163219451904297, -2.4237289428710938, -1.6311359405517578, -0.8385448455810547, -0.04595184326171875, 0.7466411590576172, 1.5392341613769531, 2.331827163696289, 3.124420166015625, 3.917013168334961, 4.709606170654297, 5.502199172973633, 6.294792175292969, 7.087385177612305, 7.879978179931641, 8.672571182250977, 9.465164184570312, 10.257757186889648, 11.050350189208984, 11.84294319152832, 12.635536193847656, 13.428129196166992, 14.220722198486328, 15.013315200805664, 15.805908203125, 16.598501205444336, 17.391094207763672, 18.183687210083008, 18.976280212402344, 19.76887321472168, 20.561466217041016, 21.35405921936035, 22.146652221679688, 22.939245223999023, 23.73183822631836, 24.524431228637695]}, "gradients/decoder.transformer.h.19.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 7.0, 3.0, 4.0, 7.0, 8.0, 5.0, 8.0, 15.0, 15.0, 19.0, 16.0, 22.0, 21.0, 33.0, 35.0, 36.0, 33.0, 35.0, 41.0, 46.0, 41.0, 42.0, 44.0, 64.0, 39.0, 43.0, 34.0, 28.0, 29.0, 31.0, 33.0, 22.0, 20.0, 28.0, 22.0, 14.0, 9.0, 11.0, 9.0, 11.0, 6.0, 4.0, 4.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.06640625, -1.995208740234375, -1.92401123046875, -1.852813720703125, -1.7816162109375, -1.710418701171875, -1.63922119140625, -1.568023681640625, -1.496826171875, -1.425628662109375, -1.35443115234375, -1.283233642578125, -1.2120361328125, -1.140838623046875, -1.06964111328125, -0.998443603515625, -0.92724609375, -0.856048583984375, -0.78485107421875, -0.713653564453125, -0.6424560546875, -0.571258544921875, -0.50006103515625, -0.428863525390625, -0.357666015625, -0.286468505859375, -0.21527099609375, -0.144073486328125, -0.0728759765625, -0.001678466796875, 0.06951904296875, 0.140716552734375, 0.2119140625, 0.283111572265625, 0.35430908203125, 0.425506591796875, 0.4967041015625, 0.567901611328125, 0.63909912109375, 0.710296630859375, 0.781494140625, 0.852691650390625, 0.92388916015625, 0.995086669921875, 1.0662841796875, 1.137481689453125, 1.20867919921875, 1.279876708984375, 1.35107421875, 1.422271728515625, 1.49346923828125, 1.564666748046875, 1.6358642578125, 1.707061767578125, 1.77825927734375, 1.849456787109375, 1.920654296875, 1.991851806640625, 2.06304931640625, 2.134246826171875, 2.2054443359375, 2.276641845703125, 2.34783935546875, 2.419036865234375, 2.490234375]}, "gradients/decoder.transformer.h.19.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 7.0, 5.0, 10.0, 6.0, 6.0, 8.0, 16.0, 12.0, 16.0, 20.0, 24.0, 31.0, 55.0, 87.0, 140.0, 322.0, 750.0, 2361.0, 9088.0, 50662.0, 701358.0, 3164421.0, 230786.0, 26318.0, 5399.0, 1394.0, 470.0, 191.0, 86.0, 58.0, 34.0, 23.0, 31.0, 15.0, 14.0, 17.0, 14.0, 7.0, 7.0, 5.0, 5.0, 5.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.42578125, -6.19061279296875, -5.9554443359375, -5.72027587890625, -5.485107421875, -5.24993896484375, -5.0147705078125, -4.77960205078125, -4.54443359375, -4.30926513671875, -4.0740966796875, -3.83892822265625, -3.603759765625, -3.36859130859375, -3.1334228515625, -2.89825439453125, -2.6630859375, -2.42791748046875, -2.1927490234375, -1.95758056640625, -1.722412109375, -1.48724365234375, -1.2520751953125, -1.01690673828125, -0.78173828125, -0.54656982421875, -0.3114013671875, -0.07623291015625, 0.158935546875, 0.39410400390625, 0.6292724609375, 0.86444091796875, 1.099609375, 1.33477783203125, 1.5699462890625, 1.80511474609375, 2.040283203125, 2.27545166015625, 2.5106201171875, 2.74578857421875, 2.98095703125, 3.21612548828125, 3.4512939453125, 3.68646240234375, 3.921630859375, 4.15679931640625, 4.3919677734375, 4.62713623046875, 4.8623046875, 5.09747314453125, 5.3326416015625, 5.56781005859375, 5.802978515625, 6.03814697265625, 6.2733154296875, 6.50848388671875, 6.74365234375, 6.97882080078125, 7.2139892578125, 7.44915771484375, 7.684326171875, 7.91949462890625, 8.1546630859375, 8.38983154296875, 8.625]}, "gradients/decoder.transformer.h.19.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 13.0, 7.0, 10.0, 18.0, 29.0, 43.0, 59.0, 104.0, 154.0, 248.0, 418.0, 681.0, 788.0, 586.0, 388.0, 198.0, 116.0, 71.0, 48.0, 35.0, 17.0, 22.0, 9.0, 6.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-9.53125, -9.276123046875, -9.02099609375, -8.765869140625, -8.5107421875, -8.255615234375, -8.00048828125, -7.745361328125, -7.490234375, -7.235107421875, -6.97998046875, -6.724853515625, -6.4697265625, -6.214599609375, -5.95947265625, -5.704345703125, -5.44921875, -5.194091796875, -4.93896484375, -4.683837890625, -4.4287109375, -4.173583984375, -3.91845703125, -3.663330078125, -3.408203125, -3.153076171875, -2.89794921875, -2.642822265625, -2.3876953125, -2.132568359375, -1.87744140625, -1.622314453125, -1.3671875, -1.112060546875, -0.85693359375, -0.601806640625, -0.3466796875, -0.091552734375, 0.16357421875, 0.418701171875, 0.673828125, 0.928955078125, 1.18408203125, 1.439208984375, 1.6943359375, 1.949462890625, 2.20458984375, 2.459716796875, 2.71484375, 2.969970703125, 3.22509765625, 3.480224609375, 3.7353515625, 3.990478515625, 4.24560546875, 4.500732421875, 4.755859375, 5.010986328125, 5.26611328125, 5.521240234375, 5.7763671875, 6.031494140625, 6.28662109375, 6.541748046875, 6.796875]}, "gradients/decoder.transformer.h.19.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 6.0, 1.0, 6.0, 8.0, 7.0, 25.0, 31.0, 40.0, 97.0, 172.0, 374.0, 1038.0, 4643.0, 44127.0, 1389466.0, 2677168.0, 68541.0, 6378.0, 1268.0, 446.0, 201.0, 105.0, 65.0, 35.0, 18.0, 12.0, 7.0, 4.0, 3.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.8515625, -11.2613525390625, -10.671142578125, -10.0809326171875, -9.49072265625, -8.9005126953125, -8.310302734375, -7.7200927734375, -7.1298828125, -6.5396728515625, -5.949462890625, -5.3592529296875, -4.76904296875, -4.1788330078125, -3.588623046875, -2.9984130859375, -2.408203125, -1.8179931640625, -1.227783203125, -0.6375732421875, -0.04736328125, 0.5428466796875, 1.133056640625, 1.7232666015625, 2.3134765625, 2.9036865234375, 3.493896484375, 4.0841064453125, 4.67431640625, 5.2645263671875, 5.854736328125, 6.4449462890625, 7.03515625, 7.6253662109375, 8.215576171875, 8.8057861328125, 9.39599609375, 9.9862060546875, 10.576416015625, 11.1666259765625, 11.7568359375, 12.3470458984375, 12.937255859375, 13.5274658203125, 14.11767578125, 14.7078857421875, 15.298095703125, 15.8883056640625, 16.478515625, 17.0687255859375, 17.658935546875, 18.2491455078125, 18.83935546875, 19.4295654296875, 20.019775390625, 20.6099853515625, 21.2001953125, 21.7904052734375, 22.380615234375, 22.9708251953125, 23.56103515625, 24.1512451171875, 24.741455078125, 25.3316650390625, 25.921875]}, "gradients/decoder.transformer.h.19.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 35.0, 240.0, 426.0, 254.0, 51.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-213.24639892578125, -209.03646850585938, -204.8265380859375, -200.61660766601562, -196.4066925048828, -192.19676208496094, -187.98683166503906, -183.7769012451172, -179.5669708251953, -175.35704040527344, -171.14710998535156, -166.93719482421875, -162.72726440429688, -158.517333984375, -154.30740356445312, -150.09747314453125, -145.88754272460938, -141.6776123046875, -137.46768188476562, -133.25775146484375, -129.04783630371094, -124.83790588378906, -120.62797546386719, -116.41804504394531, -112.2081298828125, -107.99819946289062, -103.78827667236328, -99.5783462524414, -95.36841583251953, -91.15849304199219, -86.94856262207031, -82.73863220214844, -78.52871704101562, -74.31878662109375, -70.1088638305664, -65.89893341064453, -61.689002990722656, -57.47907638549805, -53.26914978027344, -49.05921936035156, -44.84928894042969, -40.63936233520508, -36.4294319152832, -32.219505310058594, -28.00957679748535, -23.79964828491211, -19.5897216796875, -15.379793167114258, -11.169864654541016, -6.959936618804932, -2.7500085830688477, 1.4599189758300781, 5.66984748840332, 9.879776000976562, 14.089702606201172, 18.299631118774414, 22.509559631347656, 26.7194881439209, 30.92941665649414, 35.13934326171875, 39.349273681640625, 43.559200286865234, 47.769126892089844, 51.97905731201172, 56.18898391723633]}, "gradients/decoder.transformer.h.19.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 6.0, 4.0, 0.0, 7.0, 11.0, 6.0, 8.0, 9.0, 15.0, 21.0, 25.0, 15.0, 37.0, 19.0, 30.0, 47.0, 34.0, 32.0, 43.0, 31.0, 40.0, 35.0, 48.0, 48.0, 48.0, 45.0, 36.0, 37.0, 34.0, 33.0, 32.0, 28.0, 27.0, 22.0, 19.0, 18.0, 10.0, 10.0, 8.0, 8.0, 9.0, 2.0, 5.0, 5.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-24.541099548339844, -23.851760864257812, -23.16242218017578, -22.47308349609375, -21.78374481201172, -21.094406127929688, -20.405067443847656, -19.715726852416992, -19.02638816833496, -18.33704948425293, -17.6477108001709, -16.958372116088867, -16.269033432006836, -15.579693794250488, -14.890355110168457, -14.20101547241211, -13.511677742004395, -12.822339057922363, -12.133000373840332, -11.443660736083984, -10.754322052001953, -10.064983367919922, -9.37564468383789, -8.68630599975586, -7.99696683883667, -7.307628154754639, -6.618288993835449, -5.928950309753418, -5.239611625671387, -4.550272464752197, -3.860933780670166, -3.1715946197509766, -2.4822559356689453, -1.792917013168335, -1.1035782098770142, -0.41423940658569336, 0.275099515914917, 0.9644384384155273, 1.6537771224975586, 2.343116283416748, 3.0324549674987793, 3.7217938899993896, 4.4111328125, 5.100471496582031, 5.7898101806640625, 6.479149341583252, 7.168488025665283, 7.857827186584473, 8.547165870666504, 9.236504554748535, 9.925843238830566, 10.615182876586914, 11.304521560668945, 11.993860244750977, 12.683198928833008, 13.372537612915039, 14.06187629699707, 14.751214981079102, 15.440553665161133, 16.129892349243164, 16.819231033325195, 17.50857162475586, 18.19791030883789, 18.887248992919922, 19.576587677001953]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 5.0, 3.0, 2.0, 3.0, 1.0, 8.0, 9.0, 9.0, 10.0, 14.0, 23.0, 17.0, 20.0, 31.0, 26.0, 25.0, 42.0, 42.0, 26.0, 44.0, 49.0, 48.0, 39.0, 41.0, 42.0, 36.0, 37.0, 30.0, 38.0, 28.0, 34.0, 31.0, 22.0, 39.0, 20.0, 19.0, 21.0, 14.0, 14.0, 8.0, 9.0, 8.0, 6.0, 5.0, 2.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.341796875, -2.26495361328125, -2.1881103515625, -2.11126708984375, -2.034423828125, -1.95758056640625, -1.8807373046875, -1.80389404296875, -1.72705078125, -1.65020751953125, -1.5733642578125, -1.49652099609375, -1.419677734375, -1.34283447265625, -1.2659912109375, -1.18914794921875, -1.1123046875, -1.03546142578125, -0.9586181640625, -0.88177490234375, -0.804931640625, -0.72808837890625, -0.6512451171875, -0.57440185546875, -0.49755859375, -0.42071533203125, -0.3438720703125, -0.26702880859375, -0.190185546875, -0.11334228515625, -0.0364990234375, 0.04034423828125, 0.1171875, 0.19403076171875, 0.2708740234375, 0.34771728515625, 0.424560546875, 0.50140380859375, 0.5782470703125, 0.65509033203125, 0.73193359375, 0.80877685546875, 0.8856201171875, 0.96246337890625, 1.039306640625, 1.11614990234375, 1.1929931640625, 1.26983642578125, 1.3466796875, 1.42352294921875, 1.5003662109375, 1.57720947265625, 1.654052734375, 1.73089599609375, 1.8077392578125, 1.88458251953125, 1.96142578125, 2.03826904296875, 2.1151123046875, 2.19195556640625, 2.268798828125, 2.34564208984375, 2.4224853515625, 2.49932861328125, 2.576171875]}, "gradients/decoder.transformer.h.19.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 13.0, 23.0, 24.0, 35.0, 44.0, 63.0, 93.0, 138.0, 205.0, 276.0, 393.0, 635.0, 865.0, 1204.0, 1931.0, 2721.0, 4170.0, 6377.0, 9827.0, 15410.0, 24540.0, 39448.0, 65114.0, 115673.0, 267498.0, 227707.0, 104600.0, 59600.0, 36312.0, 22443.0, 14292.0, 8965.0, 5921.0, 3888.0, 2595.0, 1763.0, 1144.0, 780.0, 544.0, 385.0, 252.0, 187.0, 149.0, 89.0, 70.0, 53.0, 32.0, 25.0, 13.0, 8.0, 4.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.061248779296875, -0.059267520904541016, -0.05728626251220703, -0.05530500411987305, -0.05332374572753906, -0.05134248733520508, -0.049361228942871094, -0.04737997055053711, -0.045398712158203125, -0.04341745376586914, -0.041436195373535156, -0.03945493698120117, -0.03747367858886719, -0.0354924201965332, -0.03351116180419922, -0.031529903411865234, -0.02954864501953125, -0.027567386627197266, -0.02558612823486328, -0.023604869842529297, -0.021623611450195312, -0.019642353057861328, -0.017661094665527344, -0.01567983627319336, -0.013698577880859375, -0.01171731948852539, -0.009736061096191406, -0.007754802703857422, -0.0057735443115234375, -0.003792285919189453, -0.0018110275268554688, 0.00017023086547851562, 0.0021514892578125, 0.004132747650146484, 0.006114006042480469, 0.008095264434814453, 0.010076522827148438, 0.012057781219482422, 0.014039039611816406, 0.01602029800415039, 0.018001556396484375, 0.01998281478881836, 0.021964073181152344, 0.023945331573486328, 0.025926589965820312, 0.027907848358154297, 0.02988910675048828, 0.031870365142822266, 0.03385162353515625, 0.035832881927490234, 0.03781414031982422, 0.0397953987121582, 0.04177665710449219, 0.04375791549682617, 0.045739173889160156, 0.04772043228149414, 0.049701690673828125, 0.05168294906616211, 0.053664207458496094, 0.05564546585083008, 0.05762672424316406, 0.05960798263549805, 0.06158924102783203, 0.06357049942016602, 0.0655517578125]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 4.0, 9.0, 6.0, 2.0, 18.0, 8.0, 9.0, 14.0, 18.0, 20.0, 20.0, 17.0, 34.0, 34.0, 32.0, 37.0, 36.0, 39.0, 35.0, 35.0, 38.0, 38.0, 1055.0, 44.0, 47.0, 41.0, 29.0, 39.0, 32.0, 31.0, 28.0, 29.0, 16.0, 23.0, 17.0, 23.0, 12.0, 9.0, 13.0, 9.0, 7.0, 6.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.5673828125, -1.5185394287109375, -1.469696044921875, -1.4208526611328125, -1.37200927734375, -1.3231658935546875, -1.274322509765625, -1.2254791259765625, -1.1766357421875, -1.1277923583984375, -1.078948974609375, -1.0301055908203125, -0.98126220703125, -0.9324188232421875, -0.883575439453125, -0.8347320556640625, -0.785888671875, -0.7370452880859375, -0.688201904296875, -0.6393585205078125, -0.59051513671875, -0.5416717529296875, -0.492828369140625, -0.4439849853515625, -0.3951416015625, -0.3462982177734375, -0.297454833984375, -0.2486114501953125, -0.19976806640625, -0.1509246826171875, -0.102081298828125, -0.0532379150390625, -0.00439453125, 0.0444488525390625, 0.093292236328125, 0.1421356201171875, 0.19097900390625, 0.2398223876953125, 0.288665771484375, 0.3375091552734375, 0.3863525390625, 0.4351959228515625, 0.484039306640625, 0.5328826904296875, 0.58172607421875, 0.6305694580078125, 0.679412841796875, 0.7282562255859375, 0.777099609375, 0.8259429931640625, 0.874786376953125, 0.9236297607421875, 0.97247314453125, 1.0213165283203125, 1.070159912109375, 1.1190032958984375, 1.1678466796875, 1.2166900634765625, 1.265533447265625, 1.3143768310546875, 1.36322021484375, 1.4120635986328125, 1.460906982421875, 1.5097503662109375, 1.55859375]}, "gradients/decoder.transformer.h.19.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 2.0, 9.0, 8.0, 16.0, 22.0, 29.0, 35.0, 62.0, 88.0, 142.0, 230.0, 321.0, 503.0, 701.0, 1132.0, 1719.0, 2717.0, 4293.0, 6796.0, 11187.0, 17982.0, 30730.0, 54023.0, 101772.0, 240881.0, 1350802.0, 119900.0, 62458.0, 34690.0, 20556.0, 12384.0, 7576.0, 4765.0, 3088.0, 1931.0, 1217.0, 831.0, 508.0, 350.0, 228.0, 123.0, 117.0, 66.0, 50.0, 34.0, 28.0, 12.0, 6.0, 6.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.06878662109375, -0.06664466857910156, -0.06450271606445312, -0.06236076354980469, -0.06021881103515625, -0.05807685852050781, -0.055934906005859375, -0.05379295349121094, -0.0516510009765625, -0.04950904846191406, -0.047367095947265625, -0.04522514343261719, -0.04308319091796875, -0.04094123840332031, -0.038799285888671875, -0.03665733337402344, -0.034515380859375, -0.03237342834472656, -0.030231475830078125, -0.028089523315429688, -0.02594757080078125, -0.023805618286132812, -0.021663665771484375, -0.019521713256835938, -0.0173797607421875, -0.015237808227539062, -0.013095855712890625, -0.010953903198242188, -0.00881195068359375, -0.0066699981689453125, -0.004528045654296875, -0.0023860931396484375, -0.000244140625, 0.0018978118896484375, 0.004039764404296875, 0.0061817169189453125, 0.00832366943359375, 0.010465621948242188, 0.012607574462890625, 0.014749526977539062, 0.0168914794921875, 0.019033432006835938, 0.021175384521484375, 0.023317337036132812, 0.02545928955078125, 0.027601242065429688, 0.029743194580078125, 0.03188514709472656, 0.034027099609375, 0.03616905212402344, 0.038311004638671875, 0.04045295715332031, 0.04259490966796875, 0.04473686218261719, 0.046878814697265625, 0.04902076721191406, 0.0511627197265625, 0.05330467224121094, 0.055446624755859375, 0.05758857727050781, 0.05973052978515625, 0.06187248229980469, 0.06401443481445312, 0.06615638732910156, 0.06829833984375]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 1.0, 5.0, 5.0, 3.0, 10.0, 9.0, 7.0, 10.0, 8.0, 7.0, 17.0, 21.0, 28.0, 42.0, 40.0, 44.0, 57.0, 57.0, 54.0, 92.0, 66.0, 57.0, 56.0, 45.0, 39.0, 39.0, 33.0, 28.0, 27.0, 17.0, 20.0, 9.0, 10.0, 4.0, 7.0, 8.0, 9.0, 3.0, 2.0, 0.0, 2.0, 5.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.5762786865234375e-06, -3.4570693969726562e-06, -3.337860107421875e-06, -3.2186508178710938e-06, -3.0994415283203125e-06, -2.9802322387695312e-06, -2.86102294921875e-06, -2.7418136596679688e-06, -2.6226043701171875e-06, -2.5033950805664062e-06, -2.384185791015625e-06, -2.2649765014648438e-06, -2.1457672119140625e-06, -2.0265579223632812e-06, -1.9073486328125e-06, -1.7881393432617188e-06, -1.6689300537109375e-06, -1.5497207641601562e-06, -1.430511474609375e-06, -1.3113021850585938e-06, -1.1920928955078125e-06, -1.0728836059570312e-06, -9.5367431640625e-07, -8.344650268554688e-07, -7.152557373046875e-07, -5.960464477539062e-07, -4.76837158203125e-07, -3.5762786865234375e-07, -2.384185791015625e-07, -1.1920928955078125e-07, 0.0, 1.1920928955078125e-07, 2.384185791015625e-07, 3.5762786865234375e-07, 4.76837158203125e-07, 5.960464477539062e-07, 7.152557373046875e-07, 8.344650268554688e-07, 9.5367431640625e-07, 1.0728836059570312e-06, 1.1920928955078125e-06, 1.3113021850585938e-06, 1.430511474609375e-06, 1.5497207641601562e-06, 1.6689300537109375e-06, 1.7881393432617188e-06, 1.9073486328125e-06, 2.0265579223632812e-06, 2.1457672119140625e-06, 2.2649765014648438e-06, 2.384185791015625e-06, 2.5033950805664062e-06, 2.6226043701171875e-06, 2.7418136596679688e-06, 2.86102294921875e-06, 2.9802322387695312e-06, 3.0994415283203125e-06, 3.2186508178710938e-06, 3.337860107421875e-06, 3.4570693969726562e-06, 3.5762786865234375e-06, 3.6954879760742188e-06, 3.814697265625e-06, 3.933906555175781e-06, 4.0531158447265625e-06]}, "gradients/decoder.transformer.h.19.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 3.0, 3.0, 1.0, 10.0, 8.0, 14.0, 21.0, 17.0, 16.0, 27.0, 40.0, 55.0, 86.0, 123.0, 157.0, 278.0, 469.0, 1844.0, 92935.0, 920375.0, 29779.0, 1083.0, 408.0, 240.0, 150.0, 103.0, 88.0, 61.0, 40.0, 26.0, 26.0, 15.0, 17.0, 8.0, 3.0, 2.0, 2.0, 6.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.753206253051758e-05, -6.556790322065353e-05, -6.360374391078949e-05, -6.163958460092545e-05, -5.96754252910614e-05, -5.771126598119736e-05, -5.574710667133331e-05, -5.378294736146927e-05, -5.1818788051605225e-05, -4.985462874174118e-05, -4.7890469431877136e-05, -4.592631012201309e-05, -4.396215081214905e-05, -4.1997991502285004e-05, -4.003383219242096e-05, -3.8069672882556915e-05, -3.610551357269287e-05, -3.414135426282883e-05, -3.217719495296478e-05, -3.021303564310074e-05, -2.8248876333236694e-05, -2.628471702337265e-05, -2.4320557713508606e-05, -2.2356398403644562e-05, -2.0392239093780518e-05, -1.8428079783916473e-05, -1.646392047405243e-05, -1.4499761164188385e-05, -1.253560185432434e-05, -1.0571442544460297e-05, -8.607283234596252e-06, -6.643123924732208e-06, -4.678964614868164e-06, -2.71480530500412e-06, -7.506459951400757e-07, 1.2135133147239685e-06, 3.1776726245880127e-06, 5.141831934452057e-06, 7.105991244316101e-06, 9.070150554180145e-06, 1.103430986404419e-05, 1.2998469173908234e-05, 1.4962628483772278e-05, 1.6926787793636322e-05, 1.8890947103500366e-05, 2.085510641336441e-05, 2.2819265723228455e-05, 2.47834250330925e-05, 2.6747584342956543e-05, 2.8711743652820587e-05, 3.067590296268463e-05, 3.2640062272548676e-05, 3.460422158241272e-05, 3.6568380892276764e-05, 3.853254020214081e-05, 4.049669951200485e-05, 4.2460858821868896e-05, 4.442501813173294e-05, 4.6389177441596985e-05, 4.835333675146103e-05, 5.031749606132507e-05, 5.228165537118912e-05, 5.424581468105316e-05, 5.6209973990917206e-05, 5.817413330078125e-05]}, "gradients/decoder.transformer.h.19.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 25.0, 63.0, 168.0, 249.0, 263.0, 158.0, 60.0, 21.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.377362074592384e-06, -4.119257482670946e-06, -3.8611528907495085e-06, -3.6030485262017464e-06, -3.344943934280309e-06, -3.0868393423588714e-06, -2.8287349778111093e-06, -2.5706303858896717e-06, -2.312525793968234e-06, -2.0544212020467967e-06, -1.7963167238121969e-06, -1.538212245577597e-06, -1.2801076536561595e-06, -1.022003061734722e-06, -7.638985835001222e-07, -5.057941052655224e-07, -2.4768951334408484e-07, 1.0415021733933827e-08, 2.685195568119525e-07, 5.266240918899712e-07, 7.847286269679898e-07, 1.0428332188894274e-06, 1.3009376971240272e-06, 1.559042175358627e-06, 1.8171467672800645e-06, 2.075251359201502e-06, 2.3333559511229396e-06, 2.5914603156707017e-06, 2.849564907592139e-06, 3.1076694995135767e-06, 3.365773864061339e-06, 3.6238784559827764e-06, 3.881983502651565e-06, 4.140088094573002e-06, 4.39819268649444e-06, 4.656297278415877e-06, 4.914401870337315e-06, 5.1725064622587524e-06, 5.430610599432839e-06, 5.688715191354277e-06, 5.946819783275714e-06, 6.204924375197152e-06, 6.463028967118589e-06, 6.721133559040027e-06, 6.979237696214113e-06, 7.237342288135551e-06, 7.4954468800569884e-06, 7.753551471978426e-06, 8.011656063899864e-06, 8.269760655821301e-06, 8.527865247742739e-06, 8.785969839664176e-06, 9.044074431585614e-06, 9.302179023507051e-06, 9.560283615428489e-06, 9.818388207349926e-06, 1.0076491889776662e-05, 1.03345964816981e-05, 1.0592701073619537e-05, 1.0850805665540975e-05, 1.1108910257462412e-05, 1.136701484938385e-05, 1.1625119441305287e-05, 1.1883223123732023e-05, 1.2141328625148162e-05]}, "gradients/decoder.transformer.h.19.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 4.0, 3.0, 4.0, 8.0, 2.0, 10.0, 7.0, 11.0, 7.0, 16.0, 14.0, 25.0, 24.0, 8.0, 14.0, 27.0, 26.0, 30.0, 36.0, 34.0, 40.0, 47.0, 48.0, 53.0, 30.0, 33.0, 45.0, 40.0, 25.0, 39.0, 39.0, 21.0, 30.0, 26.0, 27.0, 25.0, 20.0, 24.0, 14.0, 12.0, 18.0, 9.0, 8.0, 8.0, 4.0, 4.0, 5.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0], "bins": [-2.0265579223632812e-06, -1.967884600162506e-06, -1.909211277961731e-06, -1.8505379557609558e-06, -1.7918646335601807e-06, -1.7331913113594055e-06, -1.6745179891586304e-06, -1.6158446669578552e-06, -1.55717134475708e-06, -1.498498022556305e-06, -1.4398247003555298e-06, -1.3811513781547546e-06, -1.3224780559539795e-06, -1.2638047337532043e-06, -1.2051314115524292e-06, -1.146458089351654e-06, -1.087784767150879e-06, -1.0291114449501038e-06, -9.704381227493286e-07, -9.117648005485535e-07, -8.530914783477783e-07, -7.944181561470032e-07, -7.35744833946228e-07, -6.770715117454529e-07, -6.183981895446777e-07, -5.597248673439026e-07, -5.010515451431274e-07, -4.423782229423523e-07, -3.8370490074157715e-07, -3.25031578540802e-07, -2.6635825634002686e-07, -2.076849341392517e-07, -1.4901161193847656e-07, -9.033828973770142e-08, -3.166496753692627e-08, 2.7008354663848877e-08, 8.568167686462402e-08, 1.4435499906539917e-07, 2.0302832126617432e-07, 2.6170164346694946e-07, 3.203749656677246e-07, 3.7904828786849976e-07, 4.377216100692749e-07, 4.9639493227005e-07, 5.550682544708252e-07, 6.137415766716003e-07, 6.724148988723755e-07, 7.310882210731506e-07, 7.897615432739258e-07, 8.484348654747009e-07, 9.071081876754761e-07, 9.657815098762512e-07, 1.0244548320770264e-06, 1.0831281542778015e-06, 1.1418014764785767e-06, 1.2004747986793518e-06, 1.259148120880127e-06, 1.317821443080902e-06, 1.3764947652816772e-06, 1.4351680874824524e-06, 1.4938414096832275e-06, 1.5525147318840027e-06, 1.6111880540847778e-06, 1.669861376285553e-06, 1.7285346984863281e-06]}, "gradients/decoder.transformer.h.19.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 5.0, 3.0, 2.0, 3.0, 1.0, 8.0, 9.0, 9.0, 10.0, 14.0, 23.0, 17.0, 20.0, 31.0, 26.0, 25.0, 42.0, 42.0, 26.0, 44.0, 49.0, 48.0, 39.0, 41.0, 42.0, 36.0, 37.0, 30.0, 38.0, 28.0, 34.0, 31.0, 22.0, 39.0, 20.0, 19.0, 21.0, 14.0, 14.0, 8.0, 9.0, 8.0, 6.0, 5.0, 2.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.341796875, -2.26495361328125, -2.1881103515625, -2.11126708984375, -2.034423828125, -1.95758056640625, -1.8807373046875, -1.80389404296875, -1.72705078125, -1.65020751953125, -1.5733642578125, -1.49652099609375, -1.419677734375, -1.34283447265625, -1.2659912109375, -1.18914794921875, -1.1123046875, -1.03546142578125, -0.9586181640625, -0.88177490234375, -0.804931640625, -0.72808837890625, -0.6512451171875, -0.57440185546875, -0.49755859375, -0.42071533203125, -0.3438720703125, -0.26702880859375, -0.190185546875, -0.11334228515625, -0.0364990234375, 0.04034423828125, 0.1171875, 0.19403076171875, 0.2708740234375, 0.34771728515625, 0.424560546875, 0.50140380859375, 0.5782470703125, 0.65509033203125, 0.73193359375, 0.80877685546875, 0.8856201171875, 0.96246337890625, 1.039306640625, 1.11614990234375, 1.1929931640625, 1.26983642578125, 1.3466796875, 1.42352294921875, 1.5003662109375, 1.57720947265625, 1.654052734375, 1.73089599609375, 1.8077392578125, 1.88458251953125, 1.96142578125, 2.03826904296875, 2.1151123046875, 2.19195556640625, 2.268798828125, 2.34564208984375, 2.4224853515625, 2.49932861328125, 2.576171875]}, "gradients/decoder.transformer.h.19.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 5.0, 4.0, 5.0, 9.0, 5.0, 8.0, 11.0, 13.0, 28.0, 36.0, 52.0, 77.0, 139.0, 174.0, 276.0, 427.0, 786.0, 1339.0, 2308.0, 4035.0, 7888.0, 16777.0, 39400.0, 105944.0, 372642.0, 330110.0, 97521.0, 36184.0, 15658.0, 7369.0, 3933.0, 2191.0, 1210.0, 745.0, 446.0, 272.0, 179.0, 113.0, 70.0, 40.0, 36.0, 26.0, 25.0, 14.0, 10.0, 7.0, 6.0, 5.0, 3.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.48828125, -1.44171142578125, -1.3951416015625, -1.34857177734375, -1.302001953125, -1.25543212890625, -1.2088623046875, -1.16229248046875, -1.11572265625, -1.06915283203125, -1.0225830078125, -0.97601318359375, -0.929443359375, -0.88287353515625, -0.8363037109375, -0.78973388671875, -0.7431640625, -0.69659423828125, -0.6500244140625, -0.60345458984375, -0.556884765625, -0.51031494140625, -0.4637451171875, -0.41717529296875, -0.37060546875, -0.32403564453125, -0.2774658203125, -0.23089599609375, -0.184326171875, -0.13775634765625, -0.0911865234375, -0.04461669921875, 0.001953125, 0.04852294921875, 0.0950927734375, 0.14166259765625, 0.188232421875, 0.23480224609375, 0.2813720703125, 0.32794189453125, 0.37451171875, 0.42108154296875, 0.4676513671875, 0.51422119140625, 0.560791015625, 0.60736083984375, 0.6539306640625, 0.70050048828125, 0.7470703125, 0.79364013671875, 0.8402099609375, 0.88677978515625, 0.933349609375, 0.97991943359375, 1.0264892578125, 1.07305908203125, 1.11962890625, 1.16619873046875, 1.2127685546875, 1.25933837890625, 1.305908203125, 1.35247802734375, 1.3990478515625, 1.44561767578125, 1.4921875]}, "gradients/decoder.transformer.h.19.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 4.0, 10.0, 18.0, 14.0, 6.0, 24.0, 30.0, 25.0, 35.0, 38.0, 42.0, 36.0, 60.0, 66.0, 128.0, 1918.0, 136.0, 68.0, 70.0, 55.0, 46.0, 39.0, 37.0, 33.0, 22.0, 19.0, 24.0, 11.0, 8.0, 4.0, 6.0, 8.0, 4.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.8984375, -9.5631103515625, -9.227783203125, -8.8924560546875, -8.55712890625, -8.2218017578125, -7.886474609375, -7.5511474609375, -7.2158203125, -6.8804931640625, -6.545166015625, -6.2098388671875, -5.87451171875, -5.5391845703125, -5.203857421875, -4.8685302734375, -4.533203125, -4.1978759765625, -3.862548828125, -3.5272216796875, -3.19189453125, -2.8565673828125, -2.521240234375, -2.1859130859375, -1.8505859375, -1.5152587890625, -1.179931640625, -0.8446044921875, -0.50927734375, -0.1739501953125, 0.161376953125, 0.4967041015625, 0.83203125, 1.1673583984375, 1.502685546875, 1.8380126953125, 2.17333984375, 2.5086669921875, 2.843994140625, 3.1793212890625, 3.5146484375, 3.8499755859375, 4.185302734375, 4.5206298828125, 4.85595703125, 5.1912841796875, 5.526611328125, 5.8619384765625, 6.197265625, 6.5325927734375, 6.867919921875, 7.2032470703125, 7.53857421875, 7.8739013671875, 8.209228515625, 8.5445556640625, 8.8798828125, 9.2152099609375, 9.550537109375, 9.8858642578125, 10.22119140625, 10.5565185546875, 10.891845703125, 11.2271728515625, 11.5625]}, "gradients/decoder.transformer.h.19.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 4.0, 4.0, 7.0, 3.0, 7.0, 8.0, 11.0, 21.0, 21.0, 21.0, 32.0, 44.0, 68.0, 91.0, 152.0, 270.0, 551.0, 2619.0, 490156.0, 2646655.0, 3650.0, 527.0, 280.0, 164.0, 106.0, 61.0, 37.0, 30.0, 31.0, 26.0, 5.0, 14.0, 18.0, 8.0, 4.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.921875, -18.3662109375, -17.810546875, -17.2548828125, -16.69921875, -16.1435546875, -15.587890625, -15.0322265625, -14.4765625, -13.9208984375, -13.365234375, -12.8095703125, -12.25390625, -11.6982421875, -11.142578125, -10.5869140625, -10.03125, -9.4755859375, -8.919921875, -8.3642578125, -7.80859375, -7.2529296875, -6.697265625, -6.1416015625, -5.5859375, -5.0302734375, -4.474609375, -3.9189453125, -3.36328125, -2.8076171875, -2.251953125, -1.6962890625, -1.140625, -0.5849609375, -0.029296875, 0.5263671875, 1.08203125, 1.6376953125, 2.193359375, 2.7490234375, 3.3046875, 3.8603515625, 4.416015625, 4.9716796875, 5.52734375, 6.0830078125, 6.638671875, 7.1943359375, 7.75, 8.3056640625, 8.861328125, 9.4169921875, 9.97265625, 10.5283203125, 11.083984375, 11.6396484375, 12.1953125, 12.7509765625, 13.306640625, 13.8623046875, 14.41796875, 14.9736328125, 15.529296875, 16.0849609375, 16.640625]}, "gradients/decoder.transformer.h.19.ln_1.weight": {"_type": "histogram", "values": [1.0, 849.0, 171.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.155245780944824, -5.682628631591797, 0.7899885177612305, 7.262604713439941, 13.735222816467285, 20.207839965820312, 26.68045425415039, 33.153072357177734, 39.62569046020508, 46.09830856323242, 52.5709228515625, 59.043540954589844, 65.51615905761719, 71.98877716064453, 78.46139526367188, 84.93400573730469, 91.40663146972656, 97.8792495727539, 104.35186767578125, 110.82447814941406, 117.29710388183594, 123.76971435546875, 130.24234008789062, 136.71495056152344, 143.18756103515625, 149.66017150878906, 156.13279724121094, 162.60540771484375, 169.07803344726562, 175.55064392089844, 182.02325439453125, 188.49588012695312, 194.968505859375, 201.4411163330078, 207.9137420654297, 214.3863525390625, 220.85897827148438, 227.3315887451172, 233.80419921875, 240.27682495117188, 246.74945068359375, 253.22206115722656, 259.6946716308594, 266.16729736328125, 272.6399230957031, 279.112548828125, 285.58514404296875, 292.0577697753906, 298.5303955078125, 305.0030212402344, 311.4756164550781, 317.9482421875, 324.4208679199219, 330.89349365234375, 337.3660888671875, 343.8387145996094, 350.3113098144531, 356.783935546875, 363.25653076171875, 369.7291564941406, 376.2017822265625, 382.6744079589844, 389.1470031738281, 395.61962890625, 402.0922546386719]}, "gradients/decoder.transformer.h.19.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 6.0, 5.0, 1.0, 7.0, 10.0, 11.0, 10.0, 14.0, 11.0, 23.0, 21.0, 21.0, 29.0, 21.0, 45.0, 31.0, 40.0, 31.0, 43.0, 42.0, 41.0, 46.0, 51.0, 40.0, 45.0, 39.0, 34.0, 37.0, 29.0, 24.0, 25.0, 26.0, 26.0, 13.0, 16.0, 13.0, 11.0, 19.0, 11.0, 11.0, 4.0, 6.0, 5.0, 4.0, 3.0, 2.0, 6.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-27.20220375061035, -26.33625602722168, -25.470308303833008, -24.604360580444336, -23.738412857055664, -22.872465133666992, -22.00651741027832, -21.14056968688965, -20.274621963500977, -19.408674240112305, -18.542726516723633, -17.67677879333496, -16.81083106994629, -15.944883346557617, -15.078935623168945, -14.212987899780273, -13.347040176391602, -12.48109245300293, -11.615144729614258, -10.749197006225586, -9.883249282836914, -9.017301559448242, -8.15135383605957, -7.285406112670898, -6.419458389282227, -5.553510665893555, -4.687562942504883, -3.821615219116211, -2.955667495727539, -2.089719772338867, -1.2237720489501953, -0.35782432556152344, 0.5081253051757812, 1.3740730285644531, 2.240020751953125, 3.105968475341797, 3.9719161987304688, 4.837863922119141, 5.7038116455078125, 6.569759368896484, 7.435707092285156, 8.301654815673828, 9.1676025390625, 10.033550262451172, 10.899497985839844, 11.765445709228516, 12.631393432617188, 13.49734115600586, 14.363288879394531, 15.229236602783203, 16.095184326171875, 16.961132049560547, 17.82707977294922, 18.69302749633789, 19.558975219726562, 20.424922943115234, 21.290870666503906, 22.156818389892578, 23.02276611328125, 23.888713836669922, 24.754661560058594, 25.620609283447266, 26.486557006835938, 27.35250473022461, 28.21845245361328]}, "gradients/decoder.transformer.h.18.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0, 6.0, 5.0, 9.0, 12.0, 15.0, 18.0, 15.0, 21.0, 26.0, 26.0, 23.0, 40.0, 40.0, 28.0, 37.0, 38.0, 45.0, 44.0, 46.0, 41.0, 42.0, 31.0, 30.0, 36.0, 31.0, 33.0, 30.0, 30.0, 29.0, 26.0, 26.0, 20.0, 18.0, 17.0, 9.0, 9.0, 9.0, 9.0, 4.0, 4.0, 3.0, 7.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.369140625, -2.29302978515625, -2.2169189453125, -2.14080810546875, -2.064697265625, -1.98858642578125, -1.9124755859375, -1.83636474609375, -1.76025390625, -1.68414306640625, -1.6080322265625, -1.53192138671875, -1.455810546875, -1.37969970703125, -1.3035888671875, -1.22747802734375, -1.1513671875, -1.07525634765625, -0.9991455078125, -0.92303466796875, -0.846923828125, -0.77081298828125, -0.6947021484375, -0.61859130859375, -0.54248046875, -0.46636962890625, -0.3902587890625, -0.31414794921875, -0.238037109375, -0.16192626953125, -0.0858154296875, -0.00970458984375, 0.06640625, 0.14251708984375, 0.2186279296875, 0.29473876953125, 0.370849609375, 0.44696044921875, 0.5230712890625, 0.59918212890625, 0.67529296875, 0.75140380859375, 0.8275146484375, 0.90362548828125, 0.979736328125, 1.05584716796875, 1.1319580078125, 1.20806884765625, 1.2841796875, 1.36029052734375, 1.4364013671875, 1.51251220703125, 1.588623046875, 1.66473388671875, 1.7408447265625, 1.81695556640625, 1.89306640625, 1.96917724609375, 2.0452880859375, 2.12139892578125, 2.197509765625, 2.27362060546875, 2.3497314453125, 2.42584228515625, 2.501953125]}, "gradients/decoder.transformer.h.18.mlp.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 0.0, 4.0, 1.0, 4.0, 6.0, 4.0, 8.0, 5.0, 14.0, 10.0, 10.0, 16.0, 21.0, 24.0, 30.0, 37.0, 48.0, 66.0, 107.0, 197.0, 472.0, 1308.0, 5126.0, 32929.0, 512129.0, 3386529.0, 229932.0, 20021.0, 3410.0, 918.0, 391.0, 155.0, 82.0, 53.0, 42.0, 32.0, 34.0, 19.0, 21.0, 11.0, 12.0, 12.0, 13.0, 7.0, 5.0, 4.0, 5.0, 3.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.203125, -7.9239501953125, -7.644775390625, -7.3656005859375, -7.08642578125, -6.8072509765625, -6.528076171875, -6.2489013671875, -5.9697265625, -5.6905517578125, -5.411376953125, -5.1322021484375, -4.85302734375, -4.5738525390625, -4.294677734375, -4.0155029296875, -3.736328125, -3.4571533203125, -3.177978515625, -2.8988037109375, -2.61962890625, -2.3404541015625, -2.061279296875, -1.7821044921875, -1.5029296875, -1.2237548828125, -0.944580078125, -0.6654052734375, -0.38623046875, -0.1070556640625, 0.172119140625, 0.4512939453125, 0.73046875, 1.0096435546875, 1.288818359375, 1.5679931640625, 1.84716796875, 2.1263427734375, 2.405517578125, 2.6846923828125, 2.9638671875, 3.2430419921875, 3.522216796875, 3.8013916015625, 4.08056640625, 4.3597412109375, 4.638916015625, 4.9180908203125, 5.197265625, 5.4764404296875, 5.755615234375, 6.0347900390625, 6.31396484375, 6.5931396484375, 6.872314453125, 7.1514892578125, 7.4306640625, 7.7098388671875, 7.989013671875, 8.2681884765625, 8.54736328125, 8.8265380859375, 9.105712890625, 9.3848876953125, 9.6640625]}, "gradients/decoder.transformer.h.18.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 5.0, 5.0, 9.0, 9.0, 17.0, 29.0, 45.0, 65.0, 93.0, 144.0, 260.0, 449.0, 691.0, 802.0, 564.0, 331.0, 203.0, 138.0, 70.0, 54.0, 38.0, 15.0, 14.0, 11.0, 5.0, 4.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.4921875, -7.226806640625, -6.96142578125, -6.696044921875, -6.4306640625, -6.165283203125, -5.89990234375, -5.634521484375, -5.369140625, -5.103759765625, -4.83837890625, -4.572998046875, -4.3076171875, -4.042236328125, -3.77685546875, -3.511474609375, -3.24609375, -2.980712890625, -2.71533203125, -2.449951171875, -2.1845703125, -1.919189453125, -1.65380859375, -1.388427734375, -1.123046875, -0.857666015625, -0.59228515625, -0.326904296875, -0.0615234375, 0.203857421875, 0.46923828125, 0.734619140625, 1.0, 1.265380859375, 1.53076171875, 1.796142578125, 2.0615234375, 2.326904296875, 2.59228515625, 2.857666015625, 3.123046875, 3.388427734375, 3.65380859375, 3.919189453125, 4.1845703125, 4.449951171875, 4.71533203125, 4.980712890625, 5.24609375, 5.511474609375, 5.77685546875, 6.042236328125, 6.3076171875, 6.572998046875, 6.83837890625, 7.103759765625, 7.369140625, 7.634521484375, 7.89990234375, 8.165283203125, 8.4306640625, 8.696044921875, 8.96142578125, 9.226806640625, 9.4921875]}, "gradients/decoder.transformer.h.18.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 5.0, 7.0, 10.0, 15.0, 18.0, 50.0, 65.0, 115.0, 197.0, 324.0, 668.0, 1407.0, 4571.0, 22858.0, 176054.0, 2684823.0, 1194021.0, 90465.0, 13222.0, 3080.0, 1038.0, 561.0, 289.0, 178.0, 91.0, 67.0, 30.0, 23.0, 19.0, 3.0, 1.0, 5.0, 4.0, 1.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.2265625, -14.8133544921875, -14.400146484375, -13.9869384765625, -13.57373046875, -13.1605224609375, -12.747314453125, -12.3341064453125, -11.9208984375, -11.5076904296875, -11.094482421875, -10.6812744140625, -10.26806640625, -9.8548583984375, -9.441650390625, -9.0284423828125, -8.615234375, -8.2020263671875, -7.788818359375, -7.3756103515625, -6.96240234375, -6.5491943359375, -6.135986328125, -5.7227783203125, -5.3095703125, -4.8963623046875, -4.483154296875, -4.0699462890625, -3.65673828125, -3.2435302734375, -2.830322265625, -2.4171142578125, -2.00390625, -1.5906982421875, -1.177490234375, -0.7642822265625, -0.35107421875, 0.0621337890625, 0.475341796875, 0.8885498046875, 1.3017578125, 1.7149658203125, 2.128173828125, 2.5413818359375, 2.95458984375, 3.3677978515625, 3.781005859375, 4.1942138671875, 4.607421875, 5.0206298828125, 5.433837890625, 5.8470458984375, 6.26025390625, 6.6734619140625, 7.086669921875, 7.4998779296875, 7.9130859375, 8.3262939453125, 8.739501953125, 9.1527099609375, 9.56591796875, 9.9791259765625, 10.392333984375, 10.8055419921875, 11.21875]}, "gradients/decoder.transformer.h.18.ln_2.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 9.0, 36.0, 107.0, 226.0, 252.0, 204.0, 121.0, 38.0, 18.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.242374420166016, -26.73195457458496, -24.221532821655273, -21.71111297607422, -19.20069122314453, -16.690271377563477, -14.179851531982422, -11.669429779052734, -9.15900993347168, -6.648589134216309, -4.138168811798096, -1.6277484893798828, 0.8826723098754883, 3.3930931091308594, 5.903512954711914, 8.413934707641602, 10.924354553222656, 13.434775352478027, 15.945196151733398, 18.455615997314453, 20.96603775024414, 23.476457595825195, 25.98687744140625, 28.497299194335938, 31.007719039916992, 33.51813888549805, 36.028560638427734, 38.538978576660156, 41.049400329589844, 43.55982208251953, 46.07024383544922, 48.580665588378906, 51.091087341308594, 53.60150909423828, 56.1119270324707, 58.62234878540039, 61.13277053833008, 63.6431884765625, 66.15361022949219, 68.66403198242188, 71.17445373535156, 73.68487548828125, 76.19529724121094, 78.70571899414062, 81.21613311767578, 83.72655487060547, 86.23697662353516, 88.74739837646484, 91.2578125, 93.76823425292969, 96.27865600585938, 98.78907775878906, 101.29949188232422, 103.8099136352539, 106.3203353881836, 108.83075714111328, 111.34117889404297, 113.85160064697266, 116.36202239990234, 118.8724365234375, 121.38285827636719, 123.89328002929688, 126.40370178222656, 128.91412353515625, 131.42454528808594]}, "gradients/decoder.transformer.h.18.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 6.0, 4.0, 0.0, 6.0, 3.0, 12.0, 10.0, 16.0, 14.0, 16.0, 17.0, 20.0, 27.0, 25.0, 35.0, 41.0, 25.0, 39.0, 29.0, 39.0, 40.0, 37.0, 42.0, 40.0, 34.0, 45.0, 44.0, 42.0, 27.0, 35.0, 33.0, 24.0, 21.0, 26.0, 20.0, 17.0, 16.0, 10.0, 8.0, 10.0, 10.0, 6.0, 12.0, 5.0, 6.0, 5.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0], "bins": [-21.26445770263672, -20.647411346435547, -20.030364990234375, -19.413318634033203, -18.79627227783203, -18.17922592163086, -17.562179565429688, -16.945133209228516, -16.328086853027344, -15.711040496826172, -15.093994140625, -14.476947784423828, -13.859901428222656, -13.242855072021484, -12.625808715820312, -12.00876235961914, -11.391716003417969, -10.774669647216797, -10.157623291015625, -9.540576934814453, -8.923530578613281, -8.30648422241211, -7.6894378662109375, -7.072391510009766, -6.455345153808594, -5.838298797607422, -5.22125244140625, -4.604206085205078, -3.9871597290039062, -3.3701133728027344, -2.7530670166015625, -2.1360206604003906, -1.518972396850586, -0.9019260406494141, -0.2848796844482422, 0.3321666717529297, 0.9492130279541016, 1.5662593841552734, 2.1833057403564453, 2.800352096557617, 3.417398452758789, 4.034444808959961, 4.651491165161133, 5.268537521362305, 5.885583877563477, 6.502630233764648, 7.11967658996582, 7.736722946166992, 8.353769302368164, 8.970815658569336, 9.587862014770508, 10.20490837097168, 10.821954727172852, 11.439001083374023, 12.056047439575195, 12.673093795776367, 13.290140151977539, 13.907186508178711, 14.524232864379883, 15.141279220581055, 15.758325576782227, 16.3753719329834, 16.99241828918457, 17.609464645385742, 18.226511001586914]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 2.0, 5.0, 2.0, 4.0, 6.0, 3.0, 4.0, 11.0, 10.0, 16.0, 19.0, 14.0, 22.0, 19.0, 29.0, 14.0, 48.0, 23.0, 38.0, 40.0, 46.0, 39.0, 37.0, 45.0, 30.0, 36.0, 46.0, 21.0, 29.0, 30.0, 30.0, 25.0, 43.0, 20.0, 40.0, 27.0, 21.0, 22.0, 16.0, 14.0, 15.0, 11.0, 8.0, 6.0, 3.0, 5.0, 3.0, 1.0, 3.0, 1.0, 4.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-2.513671875, -2.4365234375, -2.359375, -2.2822265625, -2.205078125, -2.1279296875, -2.05078125, -1.9736328125, -1.896484375, -1.8193359375, -1.7421875, -1.6650390625, -1.587890625, -1.5107421875, -1.43359375, -1.3564453125, -1.279296875, -1.2021484375, -1.125, -1.0478515625, -0.970703125, -0.8935546875, -0.81640625, -0.7392578125, -0.662109375, -0.5849609375, -0.5078125, -0.4306640625, -0.353515625, -0.2763671875, -0.19921875, -0.1220703125, -0.044921875, 0.0322265625, 0.109375, 0.1865234375, 0.263671875, 0.3408203125, 0.41796875, 0.4951171875, 0.572265625, 0.6494140625, 0.7265625, 0.8037109375, 0.880859375, 0.9580078125, 1.03515625, 1.1123046875, 1.189453125, 1.2666015625, 1.34375, 1.4208984375, 1.498046875, 1.5751953125, 1.65234375, 1.7294921875, 1.806640625, 1.8837890625, 1.9609375, 2.0380859375, 2.115234375, 2.1923828125, 2.26953125, 2.3466796875, 2.423828125]}, "gradients/decoder.transformer.h.18.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 4.0, 8.0, 4.0, 11.0, 9.0, 25.0, 38.0, 61.0, 70.0, 130.0, 189.0, 235.0, 386.0, 567.0, 853.0, 1278.0, 1934.0, 2912.0, 4510.0, 7147.0, 11296.0, 17864.0, 29278.0, 48359.0, 81826.0, 164563.0, 342237.0, 141510.0, 74611.0, 44323.0, 26704.0, 16499.0, 10269.0, 6578.0, 4182.0, 2742.0, 1711.0, 1167.0, 753.0, 565.0, 345.0, 260.0, 200.0, 118.0, 76.0, 55.0, 37.0, 14.0, 14.0, 10.0, 5.0, 6.0, 6.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-0.0743408203125, -0.07204723358154297, -0.06975364685058594, -0.0674600601196289, -0.06516647338867188, -0.06287288665771484, -0.06057929992675781, -0.05828571319580078, -0.05599212646484375, -0.05369853973388672, -0.05140495300292969, -0.049111366271972656, -0.046817779541015625, -0.044524192810058594, -0.04223060607910156, -0.03993701934814453, -0.0376434326171875, -0.03534984588623047, -0.03305625915527344, -0.030762672424316406, -0.028469085693359375, -0.026175498962402344, -0.023881912231445312, -0.02158832550048828, -0.01929473876953125, -0.01700115203857422, -0.014707565307617188, -0.012413978576660156, -0.010120391845703125, -0.007826805114746094, -0.0055332183837890625, -0.0032396316528320312, -0.000946044921875, 0.0013475418090820312, 0.0036411285400390625, 0.005934715270996094, 0.008228302001953125, 0.010521888732910156, 0.012815475463867188, 0.015109062194824219, 0.01740264892578125, 0.01969623565673828, 0.021989822387695312, 0.024283409118652344, 0.026576995849609375, 0.028870582580566406, 0.031164169311523438, 0.03345775604248047, 0.0357513427734375, 0.03804492950439453, 0.04033851623535156, 0.042632102966308594, 0.044925689697265625, 0.047219276428222656, 0.04951286315917969, 0.05180644989013672, 0.05410003662109375, 0.05639362335205078, 0.05868721008300781, 0.060980796813964844, 0.06327438354492188, 0.0655679702758789, 0.06786155700683594, 0.07015514373779297, 0.07244873046875]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 8.0, 8.0, 7.0, 15.0, 18.0, 15.0, 27.0, 16.0, 23.0, 26.0, 28.0, 25.0, 22.0, 28.0, 28.0, 34.0, 29.0, 36.0, 44.0, 30.0, 1058.0, 35.0, 32.0, 53.0, 31.0, 25.0, 37.0, 29.0, 30.0, 29.0, 16.0, 29.0, 23.0, 19.0, 17.0, 13.0, 14.0, 12.0, 6.0, 10.0, 10.0, 9.0, 1.0, 4.0, 0.0, 8.0, 2.0, 0.0, 4.0, 2.0, 0.0, 1.0], "bins": [-1.482421875, -1.4365692138671875, -1.390716552734375, -1.3448638916015625, -1.29901123046875, -1.2531585693359375, -1.207305908203125, -1.1614532470703125, -1.1156005859375, -1.0697479248046875, -1.023895263671875, -0.9780426025390625, -0.93218994140625, -0.8863372802734375, -0.840484619140625, -0.7946319580078125, -0.748779296875, -0.7029266357421875, -0.657073974609375, -0.6112213134765625, -0.56536865234375, -0.5195159912109375, -0.473663330078125, -0.4278106689453125, -0.3819580078125, -0.3361053466796875, -0.290252685546875, -0.2444000244140625, -0.19854736328125, -0.1526947021484375, -0.106842041015625, -0.0609893798828125, -0.01513671875, 0.0307159423828125, 0.076568603515625, 0.1224212646484375, 0.16827392578125, 0.2141265869140625, 0.259979248046875, 0.3058319091796875, 0.3516845703125, 0.3975372314453125, 0.443389892578125, 0.4892425537109375, 0.53509521484375, 0.5809478759765625, 0.626800537109375, 0.6726531982421875, 0.718505859375, 0.7643585205078125, 0.810211181640625, 0.8560638427734375, 0.90191650390625, 0.9477691650390625, 0.993621826171875, 1.0394744873046875, 1.0853271484375, 1.1311798095703125, 1.177032470703125, 1.2228851318359375, 1.26873779296875, 1.3145904541015625, 1.360443115234375, 1.4062957763671875, 1.4521484375]}, "gradients/decoder.transformer.h.18.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 5.0, 1.0, 5.0, 11.0, 12.0, 22.0, 26.0, 34.0, 71.0, 70.0, 150.0, 185.0, 266.0, 434.0, 619.0, 969.0, 1510.0, 2247.0, 3428.0, 5389.0, 8369.0, 13224.0, 21006.0, 34268.0, 56946.0, 102832.0, 235903.0, 1329877.0, 115653.0, 63047.0, 37246.0, 23217.0, 14426.0, 9053.0, 5849.0, 3665.0, 2388.0, 1536.0, 1009.0, 735.0, 481.0, 319.0, 204.0, 121.0, 111.0, 52.0, 52.0, 36.0, 20.0, 17.0, 8.0, 1.0, 11.0, 3.0, 1.0, 5.0, 0.0, 0.0, 1.0], "bins": [-0.06488037109375, -0.06285858154296875, -0.0608367919921875, -0.05881500244140625, -0.056793212890625, -0.05477142333984375, -0.0527496337890625, -0.05072784423828125, -0.0487060546875, -0.04668426513671875, -0.0446624755859375, -0.04264068603515625, -0.040618896484375, -0.03859710693359375, -0.0365753173828125, -0.03455352783203125, -0.03253173828125, -0.03050994873046875, -0.0284881591796875, -0.02646636962890625, -0.024444580078125, -0.02242279052734375, -0.0204010009765625, -0.01837921142578125, -0.016357421875, -0.01433563232421875, -0.0123138427734375, -0.01029205322265625, -0.008270263671875, -0.00624847412109375, -0.0042266845703125, -0.00220489501953125, -0.00018310546875, 0.00183868408203125, 0.0038604736328125, 0.00588226318359375, 0.007904052734375, 0.00992584228515625, 0.0119476318359375, 0.01396942138671875, 0.0159912109375, 0.01801300048828125, 0.0200347900390625, 0.02205657958984375, 0.024078369140625, 0.02610015869140625, 0.0281219482421875, 0.03014373779296875, 0.03216552734375, 0.03418731689453125, 0.0362091064453125, 0.03823089599609375, 0.040252685546875, 0.04227447509765625, 0.0442962646484375, 0.04631805419921875, 0.04833984375, 0.05036163330078125, 0.0523834228515625, 0.05440521240234375, 0.056427001953125, 0.05844879150390625, 0.0604705810546875, 0.06249237060546875, 0.06451416015625]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 9.0, 7.0, 3.0, 12.0, 16.0, 20.0, 14.0, 37.0, 26.0, 37.0, 37.0, 67.0, 68.0, 89.0, 102.0, 82.0, 49.0, 69.0, 66.0, 41.0, 34.0, 20.0, 35.0, 11.0, 11.0, 7.0, 9.0, 6.0, 6.0, 7.0, 4.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.556510925292969e-06, -6.361864507198334e-06, -6.167218089103699e-06, -5.972571671009064e-06, -5.777925252914429e-06, -5.583278834819794e-06, -5.388632416725159e-06, -5.193985998630524e-06, -4.999339580535889e-06, -4.804693162441254e-06, -4.610046744346619e-06, -4.415400326251984e-06, -4.220753908157349e-06, -4.026107490062714e-06, -3.831461071968079e-06, -3.6368146538734436e-06, -3.4421682357788086e-06, -3.2475218176841736e-06, -3.0528753995895386e-06, -2.8582289814949036e-06, -2.6635825634002686e-06, -2.4689361453056335e-06, -2.2742897272109985e-06, -2.0796433091163635e-06, -1.8849968910217285e-06, -1.6903504729270935e-06, -1.4957040548324585e-06, -1.3010576367378235e-06, -1.1064112186431885e-06, -9.117648005485535e-07, -7.171183824539185e-07, -5.224719643592834e-07, -3.2782554626464844e-07, -1.3317912817001343e-07, 6.146728992462158e-08, 2.561137080192566e-07, 4.507601261138916e-07, 6.454065442085266e-07, 8.400529623031616e-07, 1.0346993803977966e-06, 1.2293457984924316e-06, 1.4239922165870667e-06, 1.6186386346817017e-06, 1.8132850527763367e-06, 2.0079314708709717e-06, 2.2025778889656067e-06, 2.3972243070602417e-06, 2.5918707251548767e-06, 2.7865171432495117e-06, 2.9811635613441467e-06, 3.1758099794387817e-06, 3.3704563975334167e-06, 3.5651028156280518e-06, 3.7597492337226868e-06, 3.954395651817322e-06, 4.149042069911957e-06, 4.343688488006592e-06, 4.538334906101227e-06, 4.732981324195862e-06, 4.927627742290497e-06, 5.122274160385132e-06, 5.316920578479767e-06, 5.511566996574402e-06, 5.706213414669037e-06, 5.900859832763672e-06]}, "gradients/decoder.transformer.h.18.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 4.0, 3.0, 6.0, 5.0, 7.0, 14.0, 10.0, 22.0, 33.0, 43.0, 100.0, 151.0, 289.0, 594.0, 3398.0, 891251.0, 150454.0, 1234.0, 377.0, 232.0, 104.0, 92.0, 34.0, 30.0, 24.0, 14.0, 13.0, 6.0, 5.0, 5.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010228157043457031, -9.885057806968689e-05, -9.541958570480347e-05, -9.198859333992004e-05, -8.855760097503662e-05, -8.51266086101532e-05, -8.169561624526978e-05, -7.826462388038635e-05, -7.483363151550293e-05, -7.140263915061951e-05, -6.797164678573608e-05, -6.454065442085266e-05, -6.110966205596924e-05, -5.7678669691085815e-05, -5.424767732620239e-05, -5.081668496131897e-05, -4.738569259643555e-05, -4.3954700231552124e-05, -4.05237078666687e-05, -3.709271550178528e-05, -3.3661723136901855e-05, -3.0230730772018433e-05, -2.679973840713501e-05, -2.3368746042251587e-05, -1.9937753677368164e-05, -1.650676131248474e-05, -1.3075768947601318e-05, -9.644776582717896e-06, -6.213784217834473e-06, -2.78279185295105e-06, 6.48200511932373e-07, 4.079192876815796e-06, 7.510185241699219e-06, 1.0941177606582642e-05, 1.4372169971466064e-05, 1.7803162336349487e-05, 2.123415470123291e-05, 2.4665147066116333e-05, 2.8096139430999756e-05, 3.152713179588318e-05, 3.49581241607666e-05, 3.8389116525650024e-05, 4.182010889053345e-05, 4.525110125541687e-05, 4.868209362030029e-05, 5.2113085985183716e-05, 5.554407835006714e-05, 5.897507071495056e-05, 6.240606307983398e-05, 6.583705544471741e-05, 6.926804780960083e-05, 7.269904017448425e-05, 7.613003253936768e-05, 7.95610249042511e-05, 8.299201726913452e-05, 8.642300963401794e-05, 8.985400199890137e-05, 9.328499436378479e-05, 9.671598672866821e-05, 0.00010014697909355164, 0.00010357797145843506, 0.00010700896382331848, 0.0001104399561882019, 0.00011387094855308533, 0.00011730194091796875]}, "gradients/decoder.transformer.h.18.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 11.0, 26.0, 107.0, 248.0, 331.0, 181.0, 80.0, 25.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.309957931691315e-06, -8.992488801595755e-06, -8.675018762005493e-06, -8.357549631909933e-06, -8.040079592319671e-06, -7.722610462224111e-06, -7.4051408773812e-06, -7.087671292538289e-06, -6.770202162442729e-06, -6.452732577599818e-06, -6.135262992756907e-06, -5.817793862661347e-06, -5.500324277818436e-06, -5.182854692975525e-06, -4.865385108132614e-06, -4.547915523289703e-06, -4.230445938446792e-06, -3.912976353603881e-06, -3.5955069961346453e-06, -3.2780374112917343e-06, -2.9605680538224988e-06, -2.6430984689795878e-06, -2.325628884136677e-06, -2.0081595266674412e-06, -1.6906899418245303e-06, -1.373220470668457e-06, -1.0557509995123837e-06, -7.382814146694727e-07, -4.2081194351339946e-07, -1.0334247235732619e-07, 2.141271124855848e-07, 5.315964699548203e-07, 8.490660547977313e-07, 1.1665355259538046e-06, 1.4840049971098779e-06, 1.8014745819527889e-06, 2.1189439394220244e-06, 2.4364135242649354e-06, 2.7538831091078464e-06, 3.071352466577082e-06, 3.388822051419993e-06, 3.706291636262904e-06, 4.0237609937321395e-06, 4.3412305785750505e-06, 4.6587001634179614e-06, 4.9761692935135216e-06, 5.2936393331037834e-06, 5.6111084631993435e-06, 5.9285780480422545e-06, 6.2460476328851655e-06, 6.5635172177280765e-06, 6.8809868025709875e-06, 7.198455932666548e-06, 7.515925517509459e-06, 7.83339510235237e-06, 8.15086423244793e-06, 8.468334272038192e-06, 8.785803402133752e-06, 9.103273441724014e-06, 9.420742571819574e-06, 9.738212611409836e-06, 1.0055681741505396e-05, 1.0373150871600956e-05, 1.0690620911191218e-05, 1.1008090041286778e-05]}, "gradients/decoder.transformer.h.18.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 4.0, 4.0, 1.0, 10.0, 3.0, 15.0, 5.0, 13.0, 9.0, 24.0, 13.0, 30.0, 11.0, 15.0, 41.0, 25.0, 43.0, 22.0, 61.0, 31.0, 53.0, 34.0, 64.0, 35.0, 35.0, 60.0, 32.0, 52.0, 31.0, 46.0, 14.0, 45.0, 8.0, 33.0, 9.0, 14.0, 19.0, 11.0, 9.0, 5.0, 7.0, 1.0, 9.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.682209014892578e-06, -2.5955960154533386e-06, -2.508983016014099e-06, -2.4223700165748596e-06, -2.33575701713562e-06, -2.2491440176963806e-06, -2.162531018257141e-06, -2.0759180188179016e-06, -1.989305019378662e-06, -1.9026920199394226e-06, -1.816079020500183e-06, -1.7294660210609436e-06, -1.642853021621704e-06, -1.5562400221824646e-06, -1.469627022743225e-06, -1.3830140233039856e-06, -1.296401023864746e-06, -1.2097880244255066e-06, -1.123175024986267e-06, -1.0365620255470276e-06, -9.499490261077881e-07, -8.633360266685486e-07, -7.767230272293091e-07, -6.901100277900696e-07, -6.034970283508301e-07, -5.168840289115906e-07, -4.302710294723511e-07, -3.4365803003311157e-07, -2.5704503059387207e-07, -1.7043203115463257e-07, -8.381903171539307e-08, 2.7939677238464355e-09, 8.940696716308594e-08, 1.7601996660232544e-07, 2.6263296604156494e-07, 3.4924596548080444e-07, 4.3585896492004395e-07, 5.224719643592834e-07, 6.09084963798523e-07, 6.956979632377625e-07, 7.82310962677002e-07, 8.689239621162415e-07, 9.55536961555481e-07, 1.0421499609947205e-06, 1.12876296043396e-06, 1.2153759598731995e-06, 1.301988959312439e-06, 1.3886019587516785e-06, 1.475214958190918e-06, 1.5618279576301575e-06, 1.648440957069397e-06, 1.7350539565086365e-06, 1.821666955947876e-06, 1.9082799553871155e-06, 1.994892954826355e-06, 2.0815059542655945e-06, 2.168118953704834e-06, 2.2547319531440735e-06, 2.341344952583313e-06, 2.4279579520225525e-06, 2.514570951461792e-06, 2.6011839509010315e-06, 2.687796950340271e-06, 2.7744099497795105e-06, 2.86102294921875e-06]}, "gradients/decoder.transformer.h.18.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 3.0, 2.0, 5.0, 2.0, 4.0, 6.0, 3.0, 4.0, 11.0, 10.0, 16.0, 19.0, 14.0, 22.0, 19.0, 29.0, 14.0, 48.0, 23.0, 38.0, 40.0, 46.0, 39.0, 37.0, 45.0, 30.0, 36.0, 46.0, 21.0, 29.0, 30.0, 30.0, 25.0, 43.0, 20.0, 40.0, 27.0, 21.0, 22.0, 16.0, 14.0, 15.0, 11.0, 8.0, 6.0, 3.0, 5.0, 3.0, 1.0, 3.0, 1.0, 4.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-2.513671875, -2.4365234375, -2.359375, -2.2822265625, -2.205078125, -2.1279296875, -2.05078125, -1.9736328125, -1.896484375, -1.8193359375, -1.7421875, -1.6650390625, -1.587890625, -1.5107421875, -1.43359375, -1.3564453125, -1.279296875, -1.2021484375, -1.125, -1.0478515625, -0.970703125, -0.8935546875, -0.81640625, -0.7392578125, -0.662109375, -0.5849609375, -0.5078125, -0.4306640625, -0.353515625, -0.2763671875, -0.19921875, -0.1220703125, -0.044921875, 0.0322265625, 0.109375, 0.1865234375, 0.263671875, 0.3408203125, 0.41796875, 0.4951171875, 0.572265625, 0.6494140625, 0.7265625, 0.8037109375, 0.880859375, 0.9580078125, 1.03515625, 1.1123046875, 1.189453125, 1.2666015625, 1.34375, 1.4208984375, 1.498046875, 1.5751953125, 1.65234375, 1.7294921875, 1.806640625, 1.8837890625, 1.9609375, 2.0380859375, 2.115234375, 2.1923828125, 2.26953125, 2.3466796875, 2.423828125]}, "gradients/decoder.transformer.h.18.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 3.0, 5.0, 3.0, 10.0, 7.0, 13.0, 21.0, 27.0, 38.0, 67.0, 101.0, 137.0, 209.0, 326.0, 493.0, 685.0, 1081.0, 1658.0, 2685.0, 4566.0, 8566.0, 19763.0, 66158.0, 574062.0, 290202.0, 44595.0, 15198.0, 7207.0, 3986.0, 2404.0, 1407.0, 918.0, 645.0, 392.0, 292.0, 214.0, 135.0, 81.0, 71.0, 29.0, 25.0, 19.0, 14.0, 16.0, 7.0, 6.0, 7.0, 4.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.48046875, -3.377685546875, -3.27490234375, -3.172119140625, -3.0693359375, -2.966552734375, -2.86376953125, -2.760986328125, -2.658203125, -2.555419921875, -2.45263671875, -2.349853515625, -2.2470703125, -2.144287109375, -2.04150390625, -1.938720703125, -1.8359375, -1.733154296875, -1.63037109375, -1.527587890625, -1.4248046875, -1.322021484375, -1.21923828125, -1.116455078125, -1.013671875, -0.910888671875, -0.80810546875, -0.705322265625, -0.6025390625, -0.499755859375, -0.39697265625, -0.294189453125, -0.19140625, -0.088623046875, 0.01416015625, 0.116943359375, 0.2197265625, 0.322509765625, 0.42529296875, 0.528076171875, 0.630859375, 0.733642578125, 0.83642578125, 0.939208984375, 1.0419921875, 1.144775390625, 1.24755859375, 1.350341796875, 1.453125, 1.555908203125, 1.65869140625, 1.761474609375, 1.8642578125, 1.967041015625, 2.06982421875, 2.172607421875, 2.275390625, 2.378173828125, 2.48095703125, 2.583740234375, 2.6865234375, 2.789306640625, 2.89208984375, 2.994873046875, 3.09765625]}, "gradients/decoder.transformer.h.18.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 5.0, 8.0, 11.0, 15.0, 16.0, 23.0, 16.0, 19.0, 21.0, 35.0, 39.0, 29.0, 35.0, 39.0, 39.0, 50.0, 89.0, 279.0, 1720.0, 99.0, 58.0, 45.0, 43.0, 41.0, 48.0, 38.0, 30.0, 24.0, 21.0, 19.0, 18.0, 14.0, 10.0, 10.0, 8.0, 9.0, 6.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 1.0, 1.0, 1.0], "bins": [-10.5859375, -10.2930908203125, -10.000244140625, -9.7073974609375, -9.41455078125, -9.1217041015625, -8.828857421875, -8.5360107421875, -8.2431640625, -7.9503173828125, -7.657470703125, -7.3646240234375, -7.07177734375, -6.7789306640625, -6.486083984375, -6.1932373046875, -5.900390625, -5.6075439453125, -5.314697265625, -5.0218505859375, -4.72900390625, -4.4361572265625, -4.143310546875, -3.8504638671875, -3.5576171875, -3.2647705078125, -2.971923828125, -2.6790771484375, -2.38623046875, -2.0933837890625, -1.800537109375, -1.5076904296875, -1.21484375, -0.9219970703125, -0.629150390625, -0.3363037109375, -0.04345703125, 0.2493896484375, 0.542236328125, 0.8350830078125, 1.1279296875, 1.4207763671875, 1.713623046875, 2.0064697265625, 2.29931640625, 2.5921630859375, 2.885009765625, 3.1778564453125, 3.470703125, 3.7635498046875, 4.056396484375, 4.3492431640625, 4.64208984375, 4.9349365234375, 5.227783203125, 5.5206298828125, 5.8134765625, 6.1063232421875, 6.399169921875, 6.6920166015625, 6.98486328125, 7.2777099609375, 7.570556640625, 7.8634033203125, 8.15625]}, "gradients/decoder.transformer.h.18.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 3.0, 5.0, 3.0, 5.0, 11.0, 15.0, 11.0, 14.0, 27.0, 33.0, 33.0, 71.0, 114.0, 174.0, 291.0, 799.0, 4042.0, 54605.0, 3057259.0, 24201.0, 2637.0, 632.0, 257.0, 128.0, 99.0, 69.0, 36.0, 24.0, 33.0, 26.0, 14.0, 9.0, 5.0, 6.0, 6.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0], "bins": [-24.078125, -23.397705078125, -22.71728515625, -22.036865234375, -21.3564453125, -20.676025390625, -19.99560546875, -19.315185546875, -18.634765625, -17.954345703125, -17.27392578125, -16.593505859375, -15.9130859375, -15.232666015625, -14.55224609375, -13.871826171875, -13.19140625, -12.510986328125, -11.83056640625, -11.150146484375, -10.4697265625, -9.789306640625, -9.10888671875, -8.428466796875, -7.748046875, -7.067626953125, -6.38720703125, -5.706787109375, -5.0263671875, -4.345947265625, -3.66552734375, -2.985107421875, -2.3046875, -1.624267578125, -0.94384765625, -0.263427734375, 0.4169921875, 1.097412109375, 1.77783203125, 2.458251953125, 3.138671875, 3.819091796875, 4.49951171875, 5.179931640625, 5.8603515625, 6.540771484375, 7.22119140625, 7.901611328125, 8.58203125, 9.262451171875, 9.94287109375, 10.623291015625, 11.3037109375, 11.984130859375, 12.66455078125, 13.344970703125, 14.025390625, 14.705810546875, 15.38623046875, 16.066650390625, 16.7470703125, 17.427490234375, 18.10791015625, 18.788330078125, 19.46875]}, "gradients/decoder.transformer.h.18.ln_1.weight": {"_type": "histogram", "values": [3.0, 0.0, 26.0, 519.0, 459.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.772181510925293, -9.539936065673828, -6.307689666748047, -3.075444221496582, 0.15680217742919922, 3.3890485763549805, 6.621293067932129, 9.85353946685791, 13.085785865783691, 16.318031311035156, 19.550277709960938, 22.78252410888672, 26.0147705078125, 29.24701690673828, 32.4792594909668, 35.71150588989258, 38.94375228881836, 42.17599868774414, 45.40824508666992, 48.64048767089844, 51.87273406982422, 55.10498046875, 58.33722686767578, 61.56947326660156, 64.80171966552734, 68.03396606445312, 71.2662124633789, 74.49845886230469, 77.73070526123047, 80.96295166015625, 84.1951904296875, 87.42744445800781, 90.65968322753906, 93.89192962646484, 97.12417602539062, 100.3564224243164, 103.58866882324219, 106.82091522216797, 110.05316162109375, 113.285400390625, 116.51765441894531, 119.7499008178711, 122.98214721679688, 126.21439361572266, 129.44664001464844, 132.6788787841797, 135.9111328125, 139.14337158203125, 142.3756103515625, 145.60784912109375, 148.84010314941406, 152.0723419189453, 155.30459594726562, 158.53683471679688, 161.7690887451172, 165.00132751464844, 168.23358154296875, 171.4658203125, 174.6980743408203, 177.93031311035156, 181.16256713867188, 184.39480590820312, 187.62705993652344, 190.8592987060547, 194.091552734375]}, "gradients/decoder.transformer.h.18.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0, 6.0, 5.0, 7.0, 8.0, 5.0, 11.0, 9.0, 14.0, 25.0, 21.0, 28.0, 37.0, 38.0, 37.0, 45.0, 39.0, 53.0, 34.0, 39.0, 55.0, 50.0, 32.0, 40.0, 44.0, 46.0, 26.0, 31.0, 31.0, 33.0, 25.0, 25.0, 17.0, 18.0, 13.0, 10.0, 10.0, 10.0, 9.0, 10.0, 2.0, 2.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-33.90519332885742, -32.97258758544922, -32.03997802734375, -31.107372283935547, -30.174766540527344, -29.242158889770508, -28.309553146362305, -27.37694549560547, -26.444339752197266, -25.51173210144043, -24.579126358032227, -23.64651870727539, -22.713912963867188, -21.78130531311035, -20.84869956970215, -19.916091918945312, -18.98348617553711, -18.050878524780273, -17.11827278137207, -16.185665130615234, -15.253059387207031, -14.320451736450195, -13.387845993041992, -12.455238342285156, -11.52263069152832, -10.5900239944458, -9.657417297363281, -8.724810600280762, -7.792203903198242, -6.8595967292785645, -5.926990032196045, -4.994383335113525, -4.061777114868164, -3.1291704177856445, -2.196563720703125, -1.2639567852020264, -0.33135008811950684, 0.6012568473815918, 1.5338635444641113, 2.466470241546631, 3.3990769386291504, 4.33168363571167, 5.2642903327941895, 6.196897506713867, 7.129504203796387, 8.062110900878906, 8.994717597961426, 9.927324295043945, 10.859930992126465, 11.792537689208984, 12.725144386291504, 13.657751083374023, 14.590357780456543, 15.522964477539062, 16.4555721282959, 17.3881778717041, 18.320785522460938, 19.253393173217773, 20.185998916625977, 21.118606567382812, 22.051212310791016, 22.98381996154785, 23.916425704956055, 24.84903335571289, 25.781639099121094]}, "gradients/decoder.transformer.h.17.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 5.0, 3.0, 2.0, 7.0, 1.0, 5.0, 4.0, 13.0, 8.0, 14.0, 13.0, 16.0, 25.0, 25.0, 23.0, 26.0, 26.0, 36.0, 30.0, 41.0, 54.0, 33.0, 37.0, 33.0, 35.0, 44.0, 28.0, 25.0, 32.0, 38.0, 33.0, 26.0, 23.0, 27.0, 37.0, 34.0, 21.0, 26.0, 15.0, 19.0, 11.0, 14.0, 8.0, 5.0, 7.0, 3.0, 4.0, 3.0, 1.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-2.544921875, -2.46746826171875, -2.3900146484375, -2.31256103515625, -2.235107421875, -2.15765380859375, -2.0802001953125, -2.00274658203125, -1.92529296875, -1.84783935546875, -1.7703857421875, -1.69293212890625, -1.615478515625, -1.53802490234375, -1.4605712890625, -1.38311767578125, -1.3056640625, -1.22821044921875, -1.1507568359375, -1.07330322265625, -0.995849609375, -0.91839599609375, -0.8409423828125, -0.76348876953125, -0.68603515625, -0.60858154296875, -0.5311279296875, -0.45367431640625, -0.376220703125, -0.29876708984375, -0.2213134765625, -0.14385986328125, -0.06640625, 0.01104736328125, 0.0885009765625, 0.16595458984375, 0.243408203125, 0.32086181640625, 0.3983154296875, 0.47576904296875, 0.55322265625, 0.63067626953125, 0.7081298828125, 0.78558349609375, 0.863037109375, 0.94049072265625, 1.0179443359375, 1.09539794921875, 1.1728515625, 1.25030517578125, 1.3277587890625, 1.40521240234375, 1.482666015625, 1.56011962890625, 1.6375732421875, 1.71502685546875, 1.79248046875, 1.86993408203125, 1.9473876953125, 2.02484130859375, 2.102294921875, 2.17974853515625, 2.2572021484375, 2.33465576171875, 2.412109375]}, "gradients/decoder.transformer.h.17.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 9.0, 3.0, 4.0, 9.0, 14.0, 20.0, 11.0, 25.0, 32.0, 40.0, 59.0, 83.0, 138.0, 222.0, 305.0, 606.0, 1050.0, 1758.0, 3671.0, 7299.0, 16150.0, 38860.0, 109706.0, 414008.0, 1483413.0, 1511925.0, 421812.0, 111646.0, 39715.0, 16498.0, 7489.0, 3520.0, 1785.0, 923.0, 578.0, 331.0, 200.0, 112.0, 88.0, 54.0, 36.0, 23.0, 26.0, 13.0, 5.0, 6.0, 7.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-3.51953125, -3.41290283203125, -3.3062744140625, -3.19964599609375, -3.093017578125, -2.98638916015625, -2.8797607421875, -2.77313232421875, -2.66650390625, -2.55987548828125, -2.4532470703125, -2.34661865234375, -2.239990234375, -2.13336181640625, -2.0267333984375, -1.92010498046875, -1.8134765625, -1.70684814453125, -1.6002197265625, -1.49359130859375, -1.386962890625, -1.28033447265625, -1.1737060546875, -1.06707763671875, -0.96044921875, -0.85382080078125, -0.7471923828125, -0.64056396484375, -0.533935546875, -0.42730712890625, -0.3206787109375, -0.21405029296875, -0.107421875, -0.00079345703125, 0.1058349609375, 0.21246337890625, 0.319091796875, 0.42572021484375, 0.5323486328125, 0.63897705078125, 0.74560546875, 0.85223388671875, 0.9588623046875, 1.06549072265625, 1.172119140625, 1.27874755859375, 1.3853759765625, 1.49200439453125, 1.5986328125, 1.70526123046875, 1.8118896484375, 1.91851806640625, 2.025146484375, 2.13177490234375, 2.2384033203125, 2.34503173828125, 2.45166015625, 2.55828857421875, 2.6649169921875, 2.77154541015625, 2.878173828125, 2.98480224609375, 3.0914306640625, 3.19805908203125, 3.3046875]}, "gradients/decoder.transformer.h.17.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 10.0, 11.0, 8.0, 27.0, 37.0, 50.0, 93.0, 125.0, 217.0, 360.0, 535.0, 671.0, 704.0, 470.0, 270.0, 169.0, 117.0, 60.0, 49.0, 24.0, 20.0, 15.0, 16.0, 6.0, 1.0, 3.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.5, -11.2373046875, -10.974609375, -10.7119140625, -10.44921875, -10.1865234375, -9.923828125, -9.6611328125, -9.3984375, -9.1357421875, -8.873046875, -8.6103515625, -8.34765625, -8.0849609375, -7.822265625, -7.5595703125, -7.296875, -7.0341796875, -6.771484375, -6.5087890625, -6.24609375, -5.9833984375, -5.720703125, -5.4580078125, -5.1953125, -4.9326171875, -4.669921875, -4.4072265625, -4.14453125, -3.8818359375, -3.619140625, -3.3564453125, -3.09375, -2.8310546875, -2.568359375, -2.3056640625, -2.04296875, -1.7802734375, -1.517578125, -1.2548828125, -0.9921875, -0.7294921875, -0.466796875, -0.2041015625, 0.05859375, 0.3212890625, 0.583984375, 0.8466796875, 1.109375, 1.3720703125, 1.634765625, 1.8974609375, 2.16015625, 2.4228515625, 2.685546875, 2.9482421875, 3.2109375, 3.4736328125, 3.736328125, 3.9990234375, 4.26171875, 4.5244140625, 4.787109375, 5.0498046875, 5.3125]}, "gradients/decoder.transformer.h.17.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 1.0, 3.0, 2.0, 7.0, 4.0, 12.0, 16.0, 20.0, 24.0, 39.0, 57.0, 89.0, 123.0, 199.0, 366.0, 700.0, 1855.0, 6627.0, 34057.0, 270092.0, 2918493.0, 860820.0, 82244.0, 13093.0, 3116.0, 1086.0, 481.0, 231.0, 135.0, 73.0, 57.0, 49.0, 38.0, 23.0, 10.0, 14.0, 12.0, 6.0, 7.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.3359375, -10.95458984375, -10.5732421875, -10.19189453125, -9.810546875, -9.42919921875, -9.0478515625, -8.66650390625, -8.28515625, -7.90380859375, -7.5224609375, -7.14111328125, -6.759765625, -6.37841796875, -5.9970703125, -5.61572265625, -5.234375, -4.85302734375, -4.4716796875, -4.09033203125, -3.708984375, -3.32763671875, -2.9462890625, -2.56494140625, -2.18359375, -1.80224609375, -1.4208984375, -1.03955078125, -0.658203125, -0.27685546875, 0.1044921875, 0.48583984375, 0.8671875, 1.24853515625, 1.6298828125, 2.01123046875, 2.392578125, 2.77392578125, 3.1552734375, 3.53662109375, 3.91796875, 4.29931640625, 4.6806640625, 5.06201171875, 5.443359375, 5.82470703125, 6.2060546875, 6.58740234375, 6.96875, 7.35009765625, 7.7314453125, 8.11279296875, 8.494140625, 8.87548828125, 9.2568359375, 9.63818359375, 10.01953125, 10.40087890625, 10.7822265625, 11.16357421875, 11.544921875, 11.92626953125, 12.3076171875, 12.68896484375, 13.0703125]}, "gradients/decoder.transformer.h.17.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 10.0, 23.0, 38.0, 76.0, 130.0, 149.0, 173.0, 157.0, 111.0, 75.0, 44.0, 10.0, 10.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-77.01534271240234, -75.29344940185547, -73.57154846191406, -71.84965515136719, -70.12776184082031, -68.40586853027344, -66.68396759033203, -64.96207427978516, -63.24018096923828, -61.51828384399414, -59.796390533447266, -58.074493408203125, -56.35260009765625, -54.63070297241211, -52.908809661865234, -51.186912536621094, -49.46501922607422, -47.74312210083008, -46.0212287902832, -44.29933166503906, -42.57743835449219, -40.85554122924805, -39.13364791870117, -37.41175079345703, -35.68985366821289, -33.96795654296875, -32.246063232421875, -30.524168014526367, -28.80227279663086, -27.08037567138672, -25.358482360839844, -23.636585235595703, -21.914691925048828, -20.19279670715332, -18.470901489257812, -16.749006271362305, -15.027111053466797, -13.305214881896973, -11.583319664001465, -9.861424446105957, -8.13952922821045, -6.417634010314941, -4.695738792419434, -2.9738430976867676, -1.2519478797912598, 0.46994781494140625, 2.191843032836914, 3.913738250732422, 5.63563346862793, 7.3575286865234375, 9.079423904418945, 10.801319122314453, 12.523214340209961, 14.245110511779785, 15.967005729675293, 17.688899993896484, 19.410797119140625, 21.132692337036133, 22.85458755493164, 24.57648277282715, 26.298377990722656, 28.020275115966797, 29.742168426513672, 31.464065551757812, 33.18595886230469]}, "gradients/decoder.transformer.h.17.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 7.0, 6.0, 7.0, 9.0, 18.0, 15.0, 14.0, 21.0, 22.0, 26.0, 28.0, 32.0, 24.0, 51.0, 45.0, 43.0, 42.0, 49.0, 50.0, 46.0, 46.0, 46.0, 32.0, 38.0, 34.0, 32.0, 36.0, 32.0, 31.0, 19.0, 18.0, 15.0, 15.0, 10.0, 10.0, 10.0, 13.0, 5.0, 3.0, 2.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.953414916992188, -22.284940719604492, -21.616464614868164, -20.94799041748047, -20.27951431274414, -19.611040115356445, -18.94256591796875, -18.274089813232422, -17.605613708496094, -16.9371395111084, -16.26866340637207, -15.600189208984375, -14.931713104248047, -14.263238906860352, -13.59476375579834, -12.926288604736328, -12.257814407348633, -11.589339256286621, -10.92086410522461, -10.252389907836914, -9.583913803100586, -8.91543960571289, -8.246964454650879, -7.578489303588867, -6.9100141525268555, -6.241539001464844, -5.573063850402832, -4.9045891761779785, -4.236114025115967, -3.567638874053955, -2.8991641998291016, -2.23068904876709, -1.5622138977050781, -0.893738865852356, -0.2252638339996338, 0.44321107864379883, 1.1116862297058105, 1.7801613807678223, 2.448636054992676, 3.1171112060546875, 3.785586357116699, 4.454061508178711, 5.122536659240723, 5.791011333465576, 6.459486484527588, 7.1279616355896, 7.796436309814453, 8.464911460876465, 9.133386611938477, 9.801861763000488, 10.4703369140625, 11.138811111450195, 11.807287216186523, 12.475761413574219, 13.14423656463623, 13.812711715698242, 14.481186866760254, 15.149662017822266, 15.818137168884277, 16.48661231994629, 17.155086517333984, 17.823562622070312, 18.492036819458008, 19.160511016845703, 19.82898712158203]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 2.0, 2.0, 5.0, 7.0, 6.0, 4.0, 12.0, 14.0, 22.0, 15.0, 24.0, 19.0, 21.0, 31.0, 25.0, 29.0, 39.0, 33.0, 40.0, 43.0, 36.0, 41.0, 32.0, 34.0, 31.0, 31.0, 43.0, 27.0, 29.0, 38.0, 29.0, 26.0, 40.0, 23.0, 15.0, 22.0, 26.0, 13.0, 19.0, 16.0, 10.0, 3.0, 11.0, 4.0, 0.0, 4.0, 0.0, 2.0, 6.0, 1.0, 2.0, 0.0, 2.0], "bins": [-2.802734375, -2.720611572265625, -2.63848876953125, -2.556365966796875, -2.4742431640625, -2.392120361328125, -2.30999755859375, -2.227874755859375, -2.145751953125, -2.063629150390625, -1.98150634765625, -1.899383544921875, -1.8172607421875, -1.735137939453125, -1.65301513671875, -1.570892333984375, -1.48876953125, -1.406646728515625, -1.32452392578125, -1.242401123046875, -1.1602783203125, -1.078155517578125, -0.99603271484375, -0.913909912109375, -0.831787109375, -0.749664306640625, -0.66754150390625, -0.585418701171875, -0.5032958984375, -0.421173095703125, -0.33905029296875, -0.256927490234375, -0.1748046875, -0.092681884765625, -0.01055908203125, 0.071563720703125, 0.1536865234375, 0.235809326171875, 0.31793212890625, 0.400054931640625, 0.482177734375, 0.564300537109375, 0.64642333984375, 0.728546142578125, 0.8106689453125, 0.892791748046875, 0.97491455078125, 1.057037353515625, 1.13916015625, 1.221282958984375, 1.30340576171875, 1.385528564453125, 1.4676513671875, 1.549774169921875, 1.63189697265625, 1.714019775390625, 1.796142578125, 1.878265380859375, 1.96038818359375, 2.042510986328125, 2.1246337890625, 2.206756591796875, 2.28887939453125, 2.371002197265625, 2.453125]}, "gradients/decoder.transformer.h.17.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 4.0, 1.0, 5.0, 7.0, 10.0, 6.0, 12.0, 14.0, 24.0, 46.0, 68.0, 106.0, 194.0, 234.0, 363.0, 512.0, 790.0, 1254.0, 2043.0, 3137.0, 4968.0, 7837.0, 12623.0, 20156.0, 32448.0, 53344.0, 94489.0, 203628.0, 311927.0, 124720.0, 67135.0, 39962.0, 24732.0, 15331.0, 9680.0, 6064.0, 3840.0, 2451.0, 1547.0, 932.0, 667.0, 427.0, 286.0, 179.0, 116.0, 93.0, 57.0, 40.0, 16.0, 16.0, 7.0, 7.0, 6.0, 5.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.07830810546875, -0.07587718963623047, -0.07344627380371094, -0.0710153579711914, -0.06858444213867188, -0.06615352630615234, -0.06372261047363281, -0.06129169464111328, -0.05886077880859375, -0.05642986297607422, -0.05399894714355469, -0.051568031311035156, -0.049137115478515625, -0.046706199645996094, -0.04427528381347656, -0.04184436798095703, -0.0394134521484375, -0.03698253631591797, -0.03455162048339844, -0.032120704650878906, -0.029689788818359375, -0.027258872985839844, -0.024827957153320312, -0.02239704132080078, -0.01996612548828125, -0.01753520965576172, -0.015104293823242188, -0.012673377990722656, -0.010242462158203125, -0.007811546325683594, -0.0053806304931640625, -0.0029497146606445312, -0.000518798828125, 0.0019121170043945312, 0.0043430328369140625, 0.006773948669433594, 0.009204864501953125, 0.011635780334472656, 0.014066696166992188, 0.01649761199951172, 0.01892852783203125, 0.02135944366455078, 0.023790359497070312, 0.026221275329589844, 0.028652191162109375, 0.031083106994628906, 0.03351402282714844, 0.03594493865966797, 0.0383758544921875, 0.04080677032470703, 0.04323768615722656, 0.045668601989746094, 0.048099517822265625, 0.050530433654785156, 0.05296134948730469, 0.05539226531982422, 0.05782318115234375, 0.06025409698486328, 0.06268501281738281, 0.06511592864990234, 0.06754684448242188, 0.0699777603149414, 0.07240867614746094, 0.07483959197998047, 0.0772705078125]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 4.0, 1.0, 7.0, 4.0, 9.0, 8.0, 9.0, 7.0, 9.0, 6.0, 15.0, 10.0, 17.0, 19.0, 18.0, 25.0, 22.0, 26.0, 16.0, 38.0, 29.0, 35.0, 42.0, 36.0, 37.0, 40.0, 1066.0, 43.0, 34.0, 47.0, 37.0, 20.0, 29.0, 27.0, 28.0, 21.0, 27.0, 17.0, 13.0, 15.0, 18.0, 15.0, 14.0, 10.0, 13.0, 9.0, 12.0, 6.0, 4.0, 6.0, 4.0, 4.0, 3.0, 0.0, 3.0, 2.0, 1.0], "bins": [-1.65625, -1.6062164306640625, -1.556182861328125, -1.5061492919921875, -1.45611572265625, -1.4060821533203125, -1.356048583984375, -1.3060150146484375, -1.2559814453125, -1.2059478759765625, -1.155914306640625, -1.1058807373046875, -1.05584716796875, -1.0058135986328125, -0.955780029296875, -0.9057464599609375, -0.855712890625, -0.8056793212890625, -0.755645751953125, -0.7056121826171875, -0.65557861328125, -0.6055450439453125, -0.555511474609375, -0.5054779052734375, -0.4554443359375, -0.4054107666015625, -0.355377197265625, -0.3053436279296875, -0.25531005859375, -0.2052764892578125, -0.155242919921875, -0.1052093505859375, -0.05517578125, -0.0051422119140625, 0.044891357421875, 0.0949249267578125, 0.14495849609375, 0.1949920654296875, 0.245025634765625, 0.2950592041015625, 0.3450927734375, 0.3951263427734375, 0.445159912109375, 0.4951934814453125, 0.54522705078125, 0.5952606201171875, 0.645294189453125, 0.6953277587890625, 0.745361328125, 0.7953948974609375, 0.845428466796875, 0.8954620361328125, 0.94549560546875, 0.9955291748046875, 1.045562744140625, 1.0955963134765625, 1.1456298828125, 1.1956634521484375, 1.245697021484375, 1.2957305908203125, 1.34576416015625, 1.3957977294921875, 1.445831298828125, 1.4958648681640625, 1.5458984375]}, "gradients/decoder.transformer.h.17.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 0.0, 2.0, 2.0, 7.0, 6.0, 16.0, 17.0, 28.0, 53.0, 46.0, 87.0, 88.0, 168.0, 257.0, 363.0, 564.0, 824.0, 1152.0, 1757.0, 2739.0, 4124.0, 5987.0, 9002.0, 13682.0, 21272.0, 33927.0, 56800.0, 104731.0, 302063.0, 1279381.0, 105032.0, 56695.0, 34100.0, 21202.0, 13669.0, 9054.0, 5912.0, 4009.0, 2735.0, 1817.0, 1211.0, 831.0, 557.0, 380.0, 247.0, 176.0, 122.0, 80.0, 61.0, 31.0, 25.0, 14.0, 8.0, 14.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0], "bins": [-0.07244873046875, -0.07025337219238281, -0.06805801391601562, -0.06586265563964844, -0.06366729736328125, -0.06147193908691406, -0.059276580810546875, -0.05708122253417969, -0.0548858642578125, -0.05269050598144531, -0.050495147705078125, -0.04829978942871094, -0.04610443115234375, -0.04390907287597656, -0.041713714599609375, -0.03951835632324219, -0.037322998046875, -0.03512763977050781, -0.032932281494140625, -0.030736923217773438, -0.02854156494140625, -0.026346206665039062, -0.024150848388671875, -0.021955490112304688, -0.0197601318359375, -0.017564773559570312, -0.015369415283203125, -0.013174057006835938, -0.01097869873046875, -0.008783340454101562, -0.006587982177734375, -0.0043926239013671875, -0.002197265625, -1.9073486328125e-06, 0.002193450927734375, 0.0043888092041015625, 0.00658416748046875, 0.008779525756835938, 0.010974884033203125, 0.013170242309570312, 0.0153656005859375, 0.017560958862304688, 0.019756317138671875, 0.021951675415039062, 0.02414703369140625, 0.026342391967773438, 0.028537750244140625, 0.030733108520507812, 0.032928466796875, 0.03512382507324219, 0.037319183349609375, 0.03951454162597656, 0.04170989990234375, 0.04390525817871094, 0.046100616455078125, 0.04829597473144531, 0.0504913330078125, 0.05268669128417969, 0.054882049560546875, 0.05707740783691406, 0.05927276611328125, 0.06146812438964844, 0.06366348266601562, 0.06585884094238281, 0.06805419921875]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 5.0, 2.0, 6.0, 10.0, 16.0, 14.0, 11.0, 12.0, 23.0, 30.0, 41.0, 38.0, 53.0, 60.0, 75.0, 89.0, 94.0, 71.0, 62.0, 47.0, 43.0, 32.0, 35.0, 33.0, 23.0, 14.0, 12.0, 12.0, 6.0, 4.0, 9.0, 4.0, 3.0, 4.0, 1.0, 4.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.589557647705078e-06, -4.412606358528137e-06, -4.235655069351196e-06, -4.058703780174255e-06, -3.8817524909973145e-06, -3.7048012018203735e-06, -3.5278499126434326e-06, -3.3508986234664917e-06, -3.1739473342895508e-06, -2.99699604511261e-06, -2.820044755935669e-06, -2.643093466758728e-06, -2.466142177581787e-06, -2.289190888404846e-06, -2.1122395992279053e-06, -1.9352883100509644e-06, -1.7583370208740234e-06, -1.5813857316970825e-06, -1.4044344425201416e-06, -1.2274831533432007e-06, -1.0505318641662598e-06, -8.735805749893188e-07, -6.966292858123779e-07, -5.19677996635437e-07, -3.427267074584961e-07, -1.6577541828155518e-07, 1.1175870895385742e-08, 1.8812716007232666e-07, 3.650784492492676e-07, 5.420297384262085e-07, 7.189810276031494e-07, 8.959323167800903e-07, 1.0728836059570312e-06, 1.2498348951339722e-06, 1.426786184310913e-06, 1.603737473487854e-06, 1.780688762664795e-06, 1.957640051841736e-06, 2.1345913410186768e-06, 2.3115426301956177e-06, 2.4884939193725586e-06, 2.6654452085494995e-06, 2.8423964977264404e-06, 3.0193477869033813e-06, 3.1962990760803223e-06, 3.373250365257263e-06, 3.550201654434204e-06, 3.727152943611145e-06, 3.904104232788086e-06, 4.081055521965027e-06, 4.258006811141968e-06, 4.434958100318909e-06, 4.61190938949585e-06, 4.7888606786727905e-06, 4.9658119678497314e-06, 5.142763257026672e-06, 5.319714546203613e-06, 5.496665835380554e-06, 5.673617124557495e-06, 5.850568413734436e-06, 6.027519702911377e-06, 6.204470992088318e-06, 6.381422281265259e-06, 6.5583735704422e-06, 6.735324859619141e-06]}, "gradients/decoder.transformer.h.17.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 2.0, 0.0, 2.0, 5.0, 6.0, 6.0, 2.0, 6.0, 12.0, 15.0, 22.0, 37.0, 44.0, 49.0, 63.0, 126.0, 236.0, 438.0, 1799.0, 131318.0, 900899.0, 11914.0, 781.0, 295.0, 146.0, 105.0, 67.0, 42.0, 23.0, 29.0, 19.0, 17.0, 9.0, 11.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00010544061660766602, -0.00010261125862598419, -9.978190064430237e-05, -9.695254266262054e-05, -9.412318468093872e-05, -9.12938266992569e-05, -8.846446871757507e-05, -8.563511073589325e-05, -8.280575275421143e-05, -7.99763947725296e-05, -7.714703679084778e-05, -7.431767880916595e-05, -7.148832082748413e-05, -6.865896284580231e-05, -6.582960486412048e-05, -6.300024688243866e-05, -6.0170888900756836e-05, -5.734153091907501e-05, -5.451217293739319e-05, -5.1682814955711365e-05, -4.885345697402954e-05, -4.602409899234772e-05, -4.3194741010665894e-05, -4.036538302898407e-05, -3.7536025047302246e-05, -3.470666706562042e-05, -3.18773090839386e-05, -2.9047951102256775e-05, -2.621859312057495e-05, -2.3389235138893127e-05, -2.0559877157211304e-05, -1.773051917552948e-05, -1.4901161193847656e-05, -1.2071803212165833e-05, -9.242445230484009e-06, -6.413087248802185e-06, -3.5837292671203613e-06, -7.543712854385376e-07, 2.074986696243286e-06, 4.90434467792511e-06, 7.733702659606934e-06, 1.0563060641288757e-05, 1.3392418622970581e-05, 1.6221776604652405e-05, 1.905113458633423e-05, 2.1880492568016052e-05, 2.4709850549697876e-05, 2.75392085313797e-05, 3.0368566513061523e-05, 3.319792449474335e-05, 3.602728247642517e-05, 3.8856640458106995e-05, 4.168599843978882e-05, 4.451535642147064e-05, 4.7344714403152466e-05, 5.017407238483429e-05, 5.300343036651611e-05, 5.583278834819794e-05, 5.866214632987976e-05, 6.149150431156158e-05, 6.432086229324341e-05, 6.715022027492523e-05, 6.997957825660706e-05, 7.280893623828888e-05, 7.56382942199707e-05]}, "gradients/decoder.transformer.h.17.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 17.0, 18.0, 38.0, 74.0, 96.0, 150.0, 144.0, 143.0, 121.0, 83.0, 54.0, 35.0, 12.0, 11.0, 10.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.501549589505885e-06, -7.3298747338412795e-06, -7.158199423429323e-06, -6.986524567764718e-06, -6.814849712100113e-06, -6.6431748564355075e-06, -6.471499546023551e-06, -6.299824690358946e-06, -6.128149834694341e-06, -5.9564749790297356e-06, -5.784799668617779e-06, -5.613124812953174e-06, -5.441449957288569e-06, -5.2697751016239636e-06, -5.098099791212007e-06, -4.926424935547402e-06, -4.754750079882797e-06, -4.583075224218192e-06, -4.411399913806235e-06, -4.23972505814163e-06, -4.068050202477025e-06, -3.89637534681242e-06, -3.7247000364004634e-06, -3.553025180735858e-06, -3.381349870323902e-06, -3.2096747872856213e-06, -3.037999931621016e-06, -2.8663248485827353e-06, -2.69464999291813e-06, -2.5229749098798493e-06, -2.3512998268415686e-06, -2.1796249711769633e-06, -2.0079503428860335e-06, -1.8362753735345905e-06, -1.6646004041831475e-06, -1.4929253211448668e-06, -1.3212504654802615e-06, -1.1495753824419808e-06, -9.779004130905378e-07, -8.062254437390948e-07, -6.345504743876518e-07, -4.628755050362088e-07, -2.9120050726305635e-07, -1.1952550948990392e-07, 5.214945986153907e-08, 2.2382442921298207e-07, 3.954994554078439e-07, 5.671744247592869e-07, 7.388493941107299e-07, 9.105243634621729e-07, 1.082199332813616e-06, 1.2538744158518966e-06, 1.425549271516502e-06, 1.5972243545547826e-06, 1.7688993239062256e-06, 1.9405742932576686e-06, 2.112249148922274e-06, 2.2839242319605546e-06, 2.45559908762516e-06, 2.6272741706634406e-06, 2.798949026328046e-06, 2.9706241093663266e-06, 3.1422991924046073e-06, 3.3139740480692126e-06, 3.4856491311074933e-06]}, "gradients/decoder.transformer.h.17.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 3.0, 10.0, 7.0, 5.0, 21.0, 4.0, 9.0, 29.0, 8.0, 11.0, 25.0, 23.0, 20.0, 53.0, 21.0, 48.0, 22.0, 31.0, 60.0, 36.0, 38.0, 81.0, 21.0, 28.0, 43.0, 35.0, 48.0, 20.0, 27.0, 32.0, 18.0, 21.0, 36.0, 15.0, 11.0, 31.0, 12.0, 0.0, 14.0, 4.0, 9.0, 2.0, 5.0, 3.0, 1.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.682209014892578e-06, -2.6011839509010315e-06, -2.520158886909485e-06, -2.4391338229179382e-06, -2.3581087589263916e-06, -2.277083694934845e-06, -2.1960586309432983e-06, -2.1150335669517517e-06, -2.034008502960205e-06, -1.9529834389686584e-06, -1.8719583749771118e-06, -1.7909333109855652e-06, -1.7099082469940186e-06, -1.628883183002472e-06, -1.5478581190109253e-06, -1.4668330550193787e-06, -1.385807991027832e-06, -1.3047829270362854e-06, -1.2237578630447388e-06, -1.1427327990531921e-06, -1.0617077350616455e-06, -9.806826710700989e-07, -8.996576070785522e-07, -8.186325430870056e-07, -7.37607479095459e-07, -6.565824151039124e-07, -5.755573511123657e-07, -4.945322871208191e-07, -4.1350722312927246e-07, -3.3248215913772583e-07, -2.514570951461792e-07, -1.7043203115463257e-07, -8.940696716308594e-08, -8.381903171539307e-09, 7.264316082000732e-08, 1.5366822481155396e-07, 2.3469328880310059e-07, 3.157183527946472e-07, 3.9674341678619385e-07, 4.777684807777405e-07, 5.587935447692871e-07, 6.398186087608337e-07, 7.208436727523804e-07, 8.01868736743927e-07, 8.828938007354736e-07, 9.639188647270203e-07, 1.044943928718567e-06, 1.1259689927101135e-06, 1.2069940567016602e-06, 1.2880191206932068e-06, 1.3690441846847534e-06, 1.4500692486763e-06, 1.5310943126678467e-06, 1.6121193766593933e-06, 1.69314444065094e-06, 1.7741695046424866e-06, 1.8551945686340332e-06, 1.93621963262558e-06, 2.0172446966171265e-06, 2.098269760608673e-06, 2.1792948246002197e-06, 2.2603198885917664e-06, 2.341344952583313e-06, 2.4223700165748596e-06, 2.5033950805664062e-06]}, "gradients/decoder.transformer.h.17.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 2.0, 2.0, 5.0, 7.0, 6.0, 4.0, 12.0, 14.0, 22.0, 15.0, 24.0, 19.0, 21.0, 31.0, 25.0, 29.0, 39.0, 33.0, 40.0, 43.0, 36.0, 41.0, 32.0, 34.0, 31.0, 31.0, 43.0, 27.0, 29.0, 38.0, 29.0, 26.0, 40.0, 23.0, 15.0, 22.0, 26.0, 13.0, 19.0, 16.0, 10.0, 3.0, 11.0, 4.0, 0.0, 4.0, 0.0, 2.0, 6.0, 1.0, 2.0, 0.0, 2.0], "bins": [-2.802734375, -2.720611572265625, -2.63848876953125, -2.556365966796875, -2.4742431640625, -2.392120361328125, -2.30999755859375, -2.227874755859375, -2.145751953125, -2.063629150390625, -1.98150634765625, -1.899383544921875, -1.8172607421875, -1.735137939453125, -1.65301513671875, -1.570892333984375, -1.48876953125, -1.406646728515625, -1.32452392578125, -1.242401123046875, -1.1602783203125, -1.078155517578125, -0.99603271484375, -0.913909912109375, -0.831787109375, -0.749664306640625, -0.66754150390625, -0.585418701171875, -0.5032958984375, -0.421173095703125, -0.33905029296875, -0.256927490234375, -0.1748046875, -0.092681884765625, -0.01055908203125, 0.071563720703125, 0.1536865234375, 0.235809326171875, 0.31793212890625, 0.400054931640625, 0.482177734375, 0.564300537109375, 0.64642333984375, 0.728546142578125, 0.8106689453125, 0.892791748046875, 0.97491455078125, 1.057037353515625, 1.13916015625, 1.221282958984375, 1.30340576171875, 1.385528564453125, 1.4676513671875, 1.549774169921875, 1.63189697265625, 1.714019775390625, 1.796142578125, 1.878265380859375, 1.96038818359375, 2.042510986328125, 2.1246337890625, 2.206756591796875, 2.28887939453125, 2.371002197265625, 2.453125]}, "gradients/decoder.transformer.h.17.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 6.0, 4.0, 6.0, 9.0, 21.0, 17.0, 19.0, 36.0, 48.0, 53.0, 103.0, 129.0, 203.0, 313.0, 476.0, 732.0, 1276.0, 2287.0, 4172.0, 7915.0, 16135.0, 37922.0, 105931.0, 372221.0, 334075.0, 97062.0, 35115.0, 15158.0, 7584.0, 4028.0, 2152.0, 1280.0, 678.0, 480.0, 271.0, 193.0, 150.0, 94.0, 68.0, 31.0, 33.0, 19.0, 18.0, 11.0, 7.0, 10.0, 7.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.89453125, -1.8370513916015625, -1.779571533203125, -1.7220916748046875, -1.66461181640625, -1.6071319580078125, -1.549652099609375, -1.4921722412109375, -1.4346923828125, -1.3772125244140625, -1.319732666015625, -1.2622528076171875, -1.20477294921875, -1.1472930908203125, -1.089813232421875, -1.0323333740234375, -0.974853515625, -0.9173736572265625, -0.859893798828125, -0.8024139404296875, -0.74493408203125, -0.6874542236328125, -0.629974365234375, -0.5724945068359375, -0.5150146484375, -0.4575347900390625, -0.400054931640625, -0.3425750732421875, -0.28509521484375, -0.2276153564453125, -0.170135498046875, -0.1126556396484375, -0.05517578125, 0.0023040771484375, 0.059783935546875, 0.1172637939453125, 0.17474365234375, 0.2322235107421875, 0.289703369140625, 0.3471832275390625, 0.4046630859375, 0.4621429443359375, 0.519622802734375, 0.5771026611328125, 0.63458251953125, 0.6920623779296875, 0.749542236328125, 0.8070220947265625, 0.864501953125, 0.9219818115234375, 0.979461669921875, 1.0369415283203125, 1.09442138671875, 1.1519012451171875, 1.209381103515625, 1.2668609619140625, 1.3243408203125, 1.3818206787109375, 1.439300537109375, 1.4967803955078125, 1.55426025390625, 1.6117401123046875, 1.669219970703125, 1.7266998291015625, 1.7841796875]}, "gradients/decoder.transformer.h.17.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 1.0, 5.0, 3.0, 7.0, 8.0, 7.0, 9.0, 11.0, 12.0, 15.0, 15.0, 30.0, 17.0, 24.0, 33.0, 36.0, 40.0, 48.0, 42.0, 50.0, 79.0, 191.0, 1738.0, 157.0, 68.0, 49.0, 42.0, 39.0, 39.0, 38.0, 33.0, 27.0, 22.0, 16.0, 11.0, 14.0, 20.0, 14.0, 8.0, 7.0, 12.0, 6.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.875, -9.5609130859375, -9.246826171875, -8.9327392578125, -8.61865234375, -8.3045654296875, -7.990478515625, -7.6763916015625, -7.3623046875, -7.0482177734375, -6.734130859375, -6.4200439453125, -6.10595703125, -5.7918701171875, -5.477783203125, -5.1636962890625, -4.849609375, -4.5355224609375, -4.221435546875, -3.9073486328125, -3.59326171875, -3.2791748046875, -2.965087890625, -2.6510009765625, -2.3369140625, -2.0228271484375, -1.708740234375, -1.3946533203125, -1.08056640625, -0.7664794921875, -0.452392578125, -0.1383056640625, 0.17578125, 0.4898681640625, 0.803955078125, 1.1180419921875, 1.43212890625, 1.7462158203125, 2.060302734375, 2.3743896484375, 2.6884765625, 3.0025634765625, 3.316650390625, 3.6307373046875, 3.94482421875, 4.2589111328125, 4.572998046875, 4.8870849609375, 5.201171875, 5.5152587890625, 5.829345703125, 6.1434326171875, 6.45751953125, 6.7716064453125, 7.085693359375, 7.3997802734375, 7.7138671875, 8.0279541015625, 8.342041015625, 8.6561279296875, 8.97021484375, 9.2843017578125, 9.598388671875, 9.9124755859375, 10.2265625]}, "gradients/decoder.transformer.h.17.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 0.0, 7.0, 10.0, 10.0, 10.0, 9.0, 19.0, 19.0, 19.0, 21.0, 28.0, 34.0, 53.0, 94.0, 120.0, 108.0, 185.0, 299.0, 565.0, 2925.0, 233008.0, 2899636.0, 6500.0, 869.0, 372.0, 182.0, 147.0, 111.0, 79.0, 69.0, 38.0, 38.0, 28.0, 16.0, 16.0, 12.0, 11.0, 5.0, 11.0, 8.0, 7.0, 3.0, 5.0, 1.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.515625, -19.87744140625, -19.2392578125, -18.60107421875, -17.962890625, -17.32470703125, -16.6865234375, -16.04833984375, -15.41015625, -14.77197265625, -14.1337890625, -13.49560546875, -12.857421875, -12.21923828125, -11.5810546875, -10.94287109375, -10.3046875, -9.66650390625, -9.0283203125, -8.39013671875, -7.751953125, -7.11376953125, -6.4755859375, -5.83740234375, -5.19921875, -4.56103515625, -3.9228515625, -3.28466796875, -2.646484375, -2.00830078125, -1.3701171875, -0.73193359375, -0.09375, 0.54443359375, 1.1826171875, 1.82080078125, 2.458984375, 3.09716796875, 3.7353515625, 4.37353515625, 5.01171875, 5.64990234375, 6.2880859375, 6.92626953125, 7.564453125, 8.20263671875, 8.8408203125, 9.47900390625, 10.1171875, 10.75537109375, 11.3935546875, 12.03173828125, 12.669921875, 13.30810546875, 13.9462890625, 14.58447265625, 15.22265625, 15.86083984375, 16.4990234375, 17.13720703125, 17.775390625, 18.41357421875, 19.0517578125, 19.68994140625, 20.328125]}, "gradients/decoder.transformer.h.17.ln_1.weight": {"_type": "histogram", "values": [430.0, 588.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.455329895019531, -0.19705915451049805, 4.061211585998535, 8.319482803344727, 12.577753067016602, 16.836023330688477, 21.094295501708984, 25.35256576538086, 29.610836029052734, 33.86910629272461, 38.127376556396484, 42.385650634765625, 46.6439208984375, 50.902191162109375, 55.16046142578125, 59.418731689453125, 63.677001953125, 67.93527221679688, 72.19354248046875, 76.45181274414062, 80.7100830078125, 84.96835327148438, 89.22662353515625, 93.48489379882812, 97.7431640625, 102.00143432617188, 106.25970458984375, 110.51797485351562, 114.7762451171875, 119.03451538085938, 123.29278564453125, 127.55105590820312, 131.80934143066406, 136.06761169433594, 140.3258819580078, 144.5841522216797, 148.84242248535156, 153.10069274902344, 157.3589630126953, 161.6172332763672, 165.87550354003906, 170.13377380371094, 174.3920440673828, 178.6503143310547, 182.90858459472656, 187.16685485839844, 191.4251251220703, 195.6833953857422, 199.94168090820312, 204.199951171875, 208.45822143554688, 212.71649169921875, 216.97476196289062, 221.2330322265625, 225.49130249023438, 229.74957275390625, 234.00784301757812, 238.26611328125, 242.52438354492188, 246.78265380859375, 251.04092407226562, 255.2991943359375, 259.5574645996094, 263.81573486328125, 268.0740051269531]}, "gradients/decoder.transformer.h.17.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 6.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 5.0, 3.0, 10.0, 9.0, 15.0, 9.0, 15.0, 16.0, 19.0, 20.0, 28.0, 24.0, 24.0, 28.0, 31.0, 40.0, 31.0, 47.0, 37.0, 42.0, 43.0, 49.0, 42.0, 47.0, 43.0, 31.0, 31.0, 39.0, 29.0, 25.0, 18.0, 20.0, 25.0, 19.0, 17.0, 13.0, 11.0, 9.0, 8.0, 1.0, 6.0, 6.0, 2.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0], "bins": [-25.321617126464844, -24.521024703979492, -23.720434188842773, -22.919841766357422, -22.119251251220703, -21.31865882873535, -20.51806640625, -19.71747589111328, -18.916885375976562, -18.11629295349121, -17.315702438354492, -16.51511001586914, -15.714519500732422, -14.91392707824707, -14.113335609436035, -13.312744140625, -12.512151718139648, -11.711560249328613, -10.910968780517578, -10.110376358032227, -9.309785842895508, -8.509193420410156, -7.708601951599121, -6.908010482788086, -6.107419013977051, -5.306827545166016, -4.5062360763549805, -3.705644130706787, -2.905052661895752, -2.104461193084717, -1.3038692474365234, -0.5032777786254883, 0.2973155975341797, 1.0979071855545044, 1.898498773574829, 2.6990904808044434, 3.4996819496154785, 4.300273418426514, 5.100865364074707, 5.901456832885742, 6.702048301696777, 7.5026397705078125, 8.303231239318848, 9.103822708129883, 9.904415130615234, 10.705005645751953, 11.505598068237305, 12.30618953704834, 13.106781005859375, 13.90737247467041, 14.707963943481445, 15.508556365966797, 16.309146881103516, 17.109739303588867, 17.91033172607422, 18.710922241210938, 19.511512756347656, 20.312105178833008, 21.112695693969727, 21.913288116455078, 22.713878631591797, 23.51447105407715, 24.3150634765625, 25.11565399169922, 25.91624641418457]}, "gradients/decoder.transformer.h.16.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 2.0, 2.0, 4.0, 6.0, 4.0, 6.0, 7.0, 9.0, 24.0, 18.0, 17.0, 16.0, 32.0, 15.0, 31.0, 31.0, 26.0, 43.0, 39.0, 35.0, 43.0, 38.0, 31.0, 41.0, 36.0, 32.0, 32.0, 35.0, 32.0, 39.0, 33.0, 29.0, 30.0, 28.0, 21.0, 20.0, 24.0, 19.0, 15.0, 17.0, 8.0, 11.0, 5.0, 3.0, 5.0, 3.0, 2.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-2.93359375, -2.848876953125, -2.76416015625, -2.679443359375, -2.5947265625, -2.510009765625, -2.42529296875, -2.340576171875, -2.255859375, -2.171142578125, -2.08642578125, -2.001708984375, -1.9169921875, -1.832275390625, -1.74755859375, -1.662841796875, -1.578125, -1.493408203125, -1.40869140625, -1.323974609375, -1.2392578125, -1.154541015625, -1.06982421875, -0.985107421875, -0.900390625, -0.815673828125, -0.73095703125, -0.646240234375, -0.5615234375, -0.476806640625, -0.39208984375, -0.307373046875, -0.22265625, -0.137939453125, -0.05322265625, 0.031494140625, 0.1162109375, 0.200927734375, 0.28564453125, 0.370361328125, 0.455078125, 0.539794921875, 0.62451171875, 0.709228515625, 0.7939453125, 0.878662109375, 0.96337890625, 1.048095703125, 1.1328125, 1.217529296875, 1.30224609375, 1.386962890625, 1.4716796875, 1.556396484375, 1.64111328125, 1.725830078125, 1.810546875, 1.895263671875, 1.97998046875, 2.064697265625, 2.1494140625, 2.234130859375, 2.31884765625, 2.403564453125, 2.48828125]}, "gradients/decoder.transformer.h.16.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 8.0, 2.0, 3.0, 10.0, 10.0, 11.0, 8.0, 13.0, 21.0, 17.0, 25.0, 18.0, 23.0, 36.0, 51.0, 89.0, 131.0, 289.0, 897.0, 3193.0, 16378.0, 156171.0, 3043663.0, 917614.0, 46041.0, 6865.0, 1592.0, 503.0, 214.0, 98.0, 59.0, 40.0, 28.0, 30.0, 21.0, 23.0, 18.0, 17.0, 15.0, 5.0, 9.0, 5.0, 4.0, 5.0, 4.0, 4.0, 4.0, 3.0, 4.0, 0.0, 1.0, 1.0], "bins": [-10.5859375, -10.2813720703125, -9.976806640625, -9.6722412109375, -9.36767578125, -9.0631103515625, -8.758544921875, -8.4539794921875, -8.1494140625, -7.8448486328125, -7.540283203125, -7.2357177734375, -6.93115234375, -6.6265869140625, -6.322021484375, -6.0174560546875, -5.712890625, -5.4083251953125, -5.103759765625, -4.7991943359375, -4.49462890625, -4.1900634765625, -3.885498046875, -3.5809326171875, -3.2763671875, -2.9718017578125, -2.667236328125, -2.3626708984375, -2.05810546875, -1.7535400390625, -1.448974609375, -1.1444091796875, -0.83984375, -0.5352783203125, -0.230712890625, 0.0738525390625, 0.37841796875, 0.6829833984375, 0.987548828125, 1.2921142578125, 1.5966796875, 1.9012451171875, 2.205810546875, 2.5103759765625, 2.81494140625, 3.1195068359375, 3.424072265625, 3.7286376953125, 4.033203125, 4.3377685546875, 4.642333984375, 4.9468994140625, 5.25146484375, 5.5560302734375, 5.860595703125, 6.1651611328125, 6.4697265625, 6.7742919921875, 7.078857421875, 7.3834228515625, 7.68798828125, 7.9925537109375, 8.297119140625, 8.6016845703125, 8.90625]}, "gradients/decoder.transformer.h.16.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 7.0, 4.0, 7.0, 9.0, 28.0, 51.0, 66.0, 142.0, 291.0, 661.0, 1048.0, 869.0, 431.0, 230.0, 106.0, 59.0, 34.0, 12.0, 8.0, 8.0, 2.0, 6.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.21875, -14.815673828125, -14.41259765625, -14.009521484375, -13.6064453125, -13.203369140625, -12.80029296875, -12.397216796875, -11.994140625, -11.591064453125, -11.18798828125, -10.784912109375, -10.3818359375, -9.978759765625, -9.57568359375, -9.172607421875, -8.76953125, -8.366455078125, -7.96337890625, -7.560302734375, -7.1572265625, -6.754150390625, -6.35107421875, -5.947998046875, -5.544921875, -5.141845703125, -4.73876953125, -4.335693359375, -3.9326171875, -3.529541015625, -3.12646484375, -2.723388671875, -2.3203125, -1.917236328125, -1.51416015625, -1.111083984375, -0.7080078125, -0.304931640625, 0.09814453125, 0.501220703125, 0.904296875, 1.307373046875, 1.71044921875, 2.113525390625, 2.5166015625, 2.919677734375, 3.32275390625, 3.725830078125, 4.12890625, 4.531982421875, 4.93505859375, 5.338134765625, 5.7412109375, 6.144287109375, 6.54736328125, 6.950439453125, 7.353515625, 7.756591796875, 8.15966796875, 8.562744140625, 8.9658203125, 9.368896484375, 9.77197265625, 10.175048828125, 10.578125]}, "gradients/decoder.transformer.h.16.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 7.0, 3.0, 1.0, 6.0, 12.0, 15.0, 27.0, 60.0, 96.0, 196.0, 421.0, 1294.0, 7671.0, 236405.0, 3846005.0, 95801.0, 4588.0, 988.0, 337.0, 170.0, 81.0, 46.0, 33.0, 10.0, 7.0, 3.0, 4.0, 2.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.890625, -21.061279296875, -20.23193359375, -19.402587890625, -18.5732421875, -17.743896484375, -16.91455078125, -16.085205078125, -15.255859375, -14.426513671875, -13.59716796875, -12.767822265625, -11.9384765625, -11.109130859375, -10.27978515625, -9.450439453125, -8.62109375, -7.791748046875, -6.96240234375, -6.133056640625, -5.3037109375, -4.474365234375, -3.64501953125, -2.815673828125, -1.986328125, -1.156982421875, -0.32763671875, 0.501708984375, 1.3310546875, 2.160400390625, 2.98974609375, 3.819091796875, 4.6484375, 5.477783203125, 6.30712890625, 7.136474609375, 7.9658203125, 8.795166015625, 9.62451171875, 10.453857421875, 11.283203125, 12.112548828125, 12.94189453125, 13.771240234375, 14.6005859375, 15.429931640625, 16.25927734375, 17.088623046875, 17.91796875, 18.747314453125, 19.57666015625, 20.406005859375, 21.2353515625, 22.064697265625, 22.89404296875, 23.723388671875, 24.552734375, 25.382080078125, 26.21142578125, 27.040771484375, 27.8701171875, 28.699462890625, 29.52880859375, 30.358154296875, 31.1875]}, "gradients/decoder.transformer.h.16.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 25.0, 126.0, 422.0, 352.0, 80.0, 10.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.94117736816406, -113.00586700439453, -108.070556640625, -103.13524627685547, -98.19993591308594, -93.26461791992188, -88.32930755615234, -83.39399719238281, -78.45868682861328, -73.52337646484375, -68.58806610107422, -63.65275192260742, -58.71744155883789, -53.78213119506836, -48.84681701660156, -43.91150665283203, -38.9761962890625, -34.04088592529297, -29.105573654174805, -24.17026138305664, -19.23495101928711, -14.299640655517578, -9.364328384399414, -4.42901611328125, 0.5062942504882812, 5.441605567932129, 10.376916885375977, 15.312228202819824, 20.247539520263672, 25.182849884033203, 30.118162155151367, 35.05347442626953, 39.98878479003906, 44.924095153808594, 49.859405517578125, 54.79471969604492, 59.73003005981445, 64.66534423828125, 69.60065460205078, 74.53596496582031, 79.47127532958984, 84.40658569335938, 89.3418960571289, 94.27720642089844, 99.2125244140625, 104.1478271484375, 109.08314514160156, 114.0184555053711, 118.95376586914062, 123.88907623291016, 128.8243865966797, 133.75970458984375, 138.69500732421875, 143.6303253173828, 148.5656280517578, 153.50094604492188, 158.43624877929688, 163.37156677246094, 168.30686950683594, 173.2421875, 178.177490234375, 183.11280822753906, 188.04811096191406, 192.98342895507812, 197.9187469482422]}, "gradients/decoder.transformer.h.16.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 6.0, 4.0, 11.0, 12.0, 14.0, 15.0, 15.0, 15.0, 21.0, 20.0, 35.0, 33.0, 21.0, 33.0, 38.0, 33.0, 31.0, 37.0, 48.0, 35.0, 31.0, 46.0, 31.0, 37.0, 46.0, 33.0, 31.0, 30.0, 32.0, 21.0, 21.0, 27.0, 21.0, 15.0, 13.0, 9.0, 9.0, 12.0, 10.0, 10.0, 4.0, 11.0, 5.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0], "bins": [-20.08538055419922, -19.477312088012695, -18.869243621826172, -18.26117515563965, -17.653106689453125, -17.0450382232666, -16.436969757080078, -15.828901290893555, -15.220832824707031, -14.612764358520508, -14.004695892333984, -13.396627426147461, -12.788558959960938, -12.180490493774414, -11.57242202758789, -10.964353561401367, -10.35628604888916, -9.748217582702637, -9.140149116516113, -8.53208065032959, -7.924012184143066, -7.315943717956543, -6.707875728607178, -6.099807262420654, -5.491738796234131, -4.883670330047607, -4.275601863861084, -3.6675336360931396, -3.059465169906616, -2.4513967037200928, -1.8433284759521484, -1.235260009765625, -0.6271915435791016, -0.0191231369972229, 0.5889452695846558, 1.1970136165618896, 1.805082082748413, 2.4131505489349365, 3.021218776702881, 3.6292872428894043, 4.237355709075928, 4.845424175262451, 5.453492641448975, 6.06156063079834, 6.669629096984863, 7.277697563171387, 7.88576602935791, 8.493834495544434, 9.101902961730957, 9.70997142791748, 10.318039894104004, 10.926108360290527, 11.53417682647705, 12.142245292663574, 12.750312805175781, 13.358381271362305, 13.966449737548828, 14.574518203735352, 15.182586669921875, 15.790655136108398, 16.398723602294922, 17.006792068481445, 17.61486053466797, 18.222929000854492, 18.830997467041016]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 1.0, 4.0, 5.0, 6.0, 7.0, 7.0, 11.0, 11.0, 12.0, 22.0, 19.0, 20.0, 20.0, 24.0, 36.0, 26.0, 32.0, 39.0, 43.0, 33.0, 34.0, 35.0, 29.0, 37.0, 46.0, 30.0, 35.0, 46.0, 21.0, 29.0, 38.0, 25.0, 24.0, 25.0, 22.0, 18.0, 25.0, 23.0, 12.0, 9.0, 11.0, 11.0, 11.0, 8.0, 5.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0], "bins": [-2.89453125, -2.80810546875, -2.7216796875, -2.63525390625, -2.548828125, -2.46240234375, -2.3759765625, -2.28955078125, -2.203125, -2.11669921875, -2.0302734375, -1.94384765625, -1.857421875, -1.77099609375, -1.6845703125, -1.59814453125, -1.51171875, -1.42529296875, -1.3388671875, -1.25244140625, -1.166015625, -1.07958984375, -0.9931640625, -0.90673828125, -0.8203125, -0.73388671875, -0.6474609375, -0.56103515625, -0.474609375, -0.38818359375, -0.3017578125, -0.21533203125, -0.12890625, -0.04248046875, 0.0439453125, 0.13037109375, 0.216796875, 0.30322265625, 0.3896484375, 0.47607421875, 0.5625, 0.64892578125, 0.7353515625, 0.82177734375, 0.908203125, 0.99462890625, 1.0810546875, 1.16748046875, 1.25390625, 1.34033203125, 1.4267578125, 1.51318359375, 1.599609375, 1.68603515625, 1.7724609375, 1.85888671875, 1.9453125, 2.03173828125, 2.1181640625, 2.20458984375, 2.291015625, 2.37744140625, 2.4638671875, 2.55029296875, 2.63671875]}, "gradients/decoder.transformer.h.16.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 11.0, 7.0, 16.0, 13.0, 23.0, 23.0, 32.0, 40.0, 62.0, 80.0, 119.0, 195.0, 235.0, 350.0, 482.0, 667.0, 950.0, 1429.0, 1987.0, 2859.0, 4180.0, 6246.0, 9189.0, 13578.0, 20427.0, 31013.0, 48302.0, 78867.0, 145325.0, 314648.0, 145910.0, 78418.0, 48307.0, 30893.0, 20603.0, 13753.0, 9291.0, 6174.0, 4199.0, 2924.0, 2021.0, 1438.0, 935.0, 651.0, 471.0, 369.0, 232.0, 191.0, 125.0, 103.0, 58.0, 34.0, 34.0, 28.0, 18.0, 15.0, 8.0, 5.0, 6.0, 1.0, 1.0], "bins": [-0.0711669921875, -0.06897735595703125, -0.0667877197265625, -0.06459808349609375, -0.062408447265625, -0.06021881103515625, -0.0580291748046875, -0.05583953857421875, -0.05364990234375, -0.05146026611328125, -0.0492706298828125, -0.04708099365234375, -0.044891357421875, -0.04270172119140625, -0.0405120849609375, -0.03832244873046875, -0.0361328125, -0.03394317626953125, -0.0317535400390625, -0.02956390380859375, -0.027374267578125, -0.02518463134765625, -0.0229949951171875, -0.02080535888671875, -0.01861572265625, -0.01642608642578125, -0.0142364501953125, -0.01204681396484375, -0.009857177734375, -0.00766754150390625, -0.0054779052734375, -0.00328826904296875, -0.0010986328125, 0.00109100341796875, 0.0032806396484375, 0.00547027587890625, 0.007659912109375, 0.00984954833984375, 0.0120391845703125, 0.01422882080078125, 0.01641845703125, 0.01860809326171875, 0.0207977294921875, 0.02298736572265625, 0.025177001953125, 0.02736663818359375, 0.0295562744140625, 0.03174591064453125, 0.033935546875, 0.03612518310546875, 0.0383148193359375, 0.04050445556640625, 0.042694091796875, 0.04488372802734375, 0.0470733642578125, 0.04926300048828125, 0.05145263671875, 0.05364227294921875, 0.0558319091796875, 0.05802154541015625, 0.060211181640625, 0.06240081787109375, 0.0645904541015625, 0.06678009033203125, 0.0689697265625]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 6.0, 5.0, 5.0, 5.0, 9.0, 9.0, 16.0, 11.0, 18.0, 19.0, 26.0, 27.0, 26.0, 26.0, 37.0, 31.0, 24.0, 38.0, 48.0, 39.0, 37.0, 43.0, 1052.0, 39.0, 36.0, 40.0, 39.0, 31.0, 30.0, 31.0, 16.0, 36.0, 26.0, 19.0, 25.0, 15.0, 21.0, 13.0, 13.0, 13.0, 9.0, 3.0, 9.0, 5.0, 2.0, 3.0, 2.0, 7.0, 0.0, 1.0], "bins": [-2.119140625, -2.0603179931640625, -2.001495361328125, -1.9426727294921875, -1.88385009765625, -1.8250274658203125, -1.766204833984375, -1.7073822021484375, -1.6485595703125, -1.5897369384765625, -1.530914306640625, -1.4720916748046875, -1.41326904296875, -1.3544464111328125, -1.295623779296875, -1.2368011474609375, -1.177978515625, -1.1191558837890625, -1.060333251953125, -1.0015106201171875, -0.94268798828125, -0.8838653564453125, -0.825042724609375, -0.7662200927734375, -0.7073974609375, -0.6485748291015625, -0.589752197265625, -0.5309295654296875, -0.47210693359375, -0.4132843017578125, -0.354461669921875, -0.2956390380859375, -0.23681640625, -0.1779937744140625, -0.119171142578125, -0.0603485107421875, -0.00152587890625, 0.0572967529296875, 0.116119384765625, 0.1749420166015625, 0.2337646484375, 0.2925872802734375, 0.351409912109375, 0.4102325439453125, 0.46905517578125, 0.5278778076171875, 0.586700439453125, 0.6455230712890625, 0.704345703125, 0.7631683349609375, 0.821990966796875, 0.8808135986328125, 0.93963623046875, 0.9984588623046875, 1.057281494140625, 1.1161041259765625, 1.1749267578125, 1.2337493896484375, 1.292572021484375, 1.3513946533203125, 1.41021728515625, 1.4690399169921875, 1.527862548828125, 1.5866851806640625, 1.6455078125]}, "gradients/decoder.transformer.h.16.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 8.0, 22.0, 20.0, 28.0, 47.0, 71.0, 80.0, 152.0, 273.0, 408.0, 650.0, 1018.0, 1662.0, 2625.0, 4379.0, 7123.0, 11641.0, 19344.0, 32511.0, 57158.0, 108172.0, 268003.0, 1328340.0, 111507.0, 58165.0, 33281.0, 19772.0, 11819.0, 7175.0, 4415.0, 2686.0, 1642.0, 1067.0, 671.0, 386.0, 271.0, 184.0, 132.0, 85.0, 59.0, 17.0, 22.0, 12.0, 11.0, 5.0, 5.0, 6.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.09271240234375, -0.09006500244140625, -0.0874176025390625, -0.08477020263671875, -0.082122802734375, -0.07947540283203125, -0.0768280029296875, -0.07418060302734375, -0.071533203125, -0.06888580322265625, -0.0662384033203125, -0.06359100341796875, -0.060943603515625, -0.05829620361328125, -0.0556488037109375, -0.05300140380859375, -0.05035400390625, -0.04770660400390625, -0.0450592041015625, -0.04241180419921875, -0.039764404296875, -0.03711700439453125, -0.0344696044921875, -0.03182220458984375, -0.0291748046875, -0.02652740478515625, -0.0238800048828125, -0.02123260498046875, -0.018585205078125, -0.01593780517578125, -0.0132904052734375, -0.01064300537109375, -0.00799560546875, -0.00534820556640625, -0.0027008056640625, -5.340576171875e-05, 0.002593994140625, 0.00524139404296875, 0.0078887939453125, 0.01053619384765625, 0.01318359375, 0.01583099365234375, 0.0184783935546875, 0.02112579345703125, 0.023773193359375, 0.02642059326171875, 0.0290679931640625, 0.03171539306640625, 0.03436279296875, 0.03701019287109375, 0.0396575927734375, 0.04230499267578125, 0.044952392578125, 0.04759979248046875, 0.0502471923828125, 0.05289459228515625, 0.0555419921875, 0.05818939208984375, 0.0608367919921875, 0.06348419189453125, 0.066131591796875, 0.06877899169921875, 0.0714263916015625, 0.07407379150390625, 0.07672119140625]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 0.0, 6.0, 3.0, 2.0, 5.0, 5.0, 11.0, 16.0, 19.0, 18.0, 27.0, 30.0, 29.0, 41.0, 68.0, 56.0, 59.0, 81.0, 70.0, 48.0, 50.0, 68.0, 51.0, 46.0, 52.0, 31.0, 26.0, 23.0, 14.0, 12.0, 8.0, 11.0, 3.0, 1.0, 5.0, 3.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.854534149169922e-06, -6.658025085926056e-06, -6.46151602268219e-06, -6.265006959438324e-06, -6.068497896194458e-06, -5.871988832950592e-06, -5.675479769706726e-06, -5.47897070646286e-06, -5.282461643218994e-06, -5.085952579975128e-06, -4.889443516731262e-06, -4.692934453487396e-06, -4.49642539024353e-06, -4.299916326999664e-06, -4.103407263755798e-06, -3.906898200511932e-06, -3.7103891372680664e-06, -3.5138800740242004e-06, -3.3173710107803345e-06, -3.1208619475364685e-06, -2.9243528842926025e-06, -2.7278438210487366e-06, -2.5313347578048706e-06, -2.3348256945610046e-06, -2.1383166313171387e-06, -1.9418075680732727e-06, -1.7452985048294067e-06, -1.5487894415855408e-06, -1.3522803783416748e-06, -1.1557713150978088e-06, -9.592622518539429e-07, -7.627531886100769e-07, -5.662441253662109e-07, -3.6973506212234497e-07, -1.73225998878479e-07, 2.3283064365386963e-08, 2.1979212760925293e-07, 4.163011908531189e-07, 6.128102540969849e-07, 8.093193173408508e-07, 1.0058283805847168e-06, 1.2023374438285828e-06, 1.3988465070724487e-06, 1.5953555703163147e-06, 1.7918646335601807e-06, 1.9883736968040466e-06, 2.1848827600479126e-06, 2.3813918232917786e-06, 2.5779008865356445e-06, 2.7744099497795105e-06, 2.9709190130233765e-06, 3.1674280762672424e-06, 3.3639371395111084e-06, 3.5604462027549744e-06, 3.7569552659988403e-06, 3.953464329242706e-06, 4.149973392486572e-06, 4.346482455730438e-06, 4.542991518974304e-06, 4.73950058221817e-06, 4.936009645462036e-06, 5.132518708705902e-06, 5.329027771949768e-06, 5.525536835193634e-06, 5.7220458984375e-06]}, "gradients/decoder.transformer.h.16.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 4.0, 7.0, 6.0, 9.0, 4.0, 13.0, 22.0, 31.0, 32.0, 46.0, 89.0, 106.0, 238.0, 397.0, 1147.0, 50466.0, 974649.0, 19479.0, 867.0, 344.0, 204.0, 111.0, 81.0, 42.0, 34.0, 33.0, 25.0, 21.0, 12.0, 8.0, 4.0, 3.0, 7.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.857250213623047e-05, -8.545536547899246e-05, -8.233822882175446e-05, -7.922109216451645e-05, -7.610395550727844e-05, -7.298681885004044e-05, -6.986968219280243e-05, -6.675254553556442e-05, -6.363540887832642e-05, -6.051827222108841e-05, -5.74011355638504e-05, -5.4283998906612396e-05, -5.116686224937439e-05, -4.804972559213638e-05, -4.4932588934898376e-05, -4.181545227766037e-05, -3.869831562042236e-05, -3.558117896318436e-05, -3.246404230594635e-05, -2.9346905648708344e-05, -2.6229768991470337e-05, -2.311263233423233e-05, -1.9995495676994324e-05, -1.6878359019756317e-05, -1.376122236251831e-05, -1.0644085705280304e-05, -7.526949048042297e-06, -4.409812390804291e-06, -1.2926757335662842e-06, 1.8244609236717224e-06, 4.941597580909729e-06, 8.058734238147736e-06, 1.1175870895385742e-05, 1.4293007552623749e-05, 1.7410144209861755e-05, 2.0527280867099762e-05, 2.364441752433777e-05, 2.6761554181575775e-05, 2.9878690838813782e-05, 3.299582749605179e-05, 3.6112964153289795e-05, 3.92301008105278e-05, 4.234723746776581e-05, 4.5464374125003815e-05, 4.858151078224182e-05, 5.169864743947983e-05, 5.4815784096717834e-05, 5.793292075395584e-05, 6.105005741119385e-05, 6.416719406843185e-05, 6.728433072566986e-05, 7.040146738290787e-05, 7.351860404014587e-05, 7.663574069738388e-05, 7.975287735462189e-05, 8.28700140118599e-05, 8.59871506690979e-05, 8.910428732633591e-05, 9.222142398357391e-05, 9.533856064081192e-05, 9.845569729804993e-05, 0.00010157283395528793, 0.00010468997061252594, 0.00010780710726976395, 0.00011092424392700195]}, "gradients/decoder.transformer.h.16.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 9.0, 118.0, 519.0, 318.0, 48.0, 5.0], "bins": [-4.110072768526152e-05, -4.0424205508315936e-05, -3.9747683331370354e-05, -3.907116115442477e-05, -3.8394642615458e-05, -3.7718120438512415e-05, -3.704159826156683e-05, -3.636507608462125e-05, -3.568855390767567e-05, -3.5012031730730087e-05, -3.4335509553784505e-05, -3.365898737683892e-05, -3.298246883787215e-05, -3.2305946660926566e-05, -3.1629424483980983e-05, -3.09529023070354e-05, -3.027638013008982e-05, -2.9599857953144237e-05, -2.892333759518806e-05, -2.8246815418242477e-05, -2.7570293241296895e-05, -2.6893771064351313e-05, -2.6217250706395134e-05, -2.5540728529449552e-05, -2.4864208171493374e-05, -2.4187685994547792e-05, -2.3511165636591613e-05, -2.283464345964603e-05, -2.215812128270045e-05, -2.1481599105754867e-05, -2.080507874779869e-05, -2.0128556570853107e-05, -1.9452036212896928e-05, -1.8775514035951346e-05, -1.8098993677995168e-05, -1.7422471501049586e-05, -1.6745949324104004e-05, -1.606942714715842e-05, -1.5392906789202243e-05, -1.4716384612256661e-05, -1.4039862435311079e-05, -1.3363341167860199e-05, -1.2686818990914617e-05, -1.2010297723463736e-05, -1.1333775546518154e-05, -1.0657254279067274e-05, -9.980733011616394e-06, -9.304210834670812e-06, -8.62768865772523e-06, -7.95116739027435e-06, -7.2746452133287676e-06, -6.598123945877887e-06, -5.921601768932305e-06, -5.245080501481425e-06, -4.568558779283194e-06, -3.892037057084963e-06, -3.2155157896340825e-06, -2.5389940674358513e-06, -1.862472458924458e-06, -1.1859508504130645e-06, -5.094291282148333e-07, 1.6709259398339782e-07, 8.436140888079535e-07, 1.5201358110061847e-06, 2.196657533204416e-06]}, "gradients/decoder.transformer.h.16.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 7.0, 4.0, 5.0, 7.0, 5.0, 8.0, 18.0, 9.0, 19.0, 25.0, 13.0, 35.0, 35.0, 14.0, 46.0, 44.0, 25.0, 53.0, 25.0, 65.0, 62.0, 28.0, 45.0, 43.0, 31.0, 46.0, 48.0, 15.0, 39.0, 42.0, 6.0, 28.0, 27.0, 12.0, 21.0, 15.0, 7.0, 8.0, 8.0, 5.0, 4.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-3.337860107421875e-06, -3.2391399145126343e-06, -3.1404197216033936e-06, -3.041699528694153e-06, -2.942979335784912e-06, -2.8442591428756714e-06, -2.7455389499664307e-06, -2.64681875705719e-06, -2.5480985641479492e-06, -2.4493783712387085e-06, -2.3506581783294678e-06, -2.251937985420227e-06, -2.1532177925109863e-06, -2.0544975996017456e-06, -1.955777406692505e-06, -1.8570572137832642e-06, -1.7583370208740234e-06, -1.6596168279647827e-06, -1.560896635055542e-06, -1.4621764421463013e-06, -1.3634562492370605e-06, -1.2647360563278198e-06, -1.166015863418579e-06, -1.0672956705093384e-06, -9.685754776000977e-07, -8.698552846908569e-07, -7.711350917816162e-07, -6.724148988723755e-07, -5.736947059631348e-07, -4.7497451305389404e-07, -3.762543201446533e-07, -2.775341272354126e-07, -1.7881393432617188e-07, -8.009374141693115e-08, 1.862645149230957e-08, 1.1734664440155029e-07, 2.1606683731079102e-07, 3.1478703022003174e-07, 4.1350722312927246e-07, 5.122274160385132e-07, 6.109476089477539e-07, 7.096678018569946e-07, 8.083879947662354e-07, 9.071081876754761e-07, 1.0058283805847168e-06, 1.1045485734939575e-06, 1.2032687664031982e-06, 1.301988959312439e-06, 1.4007091522216797e-06, 1.4994293451309204e-06, 1.5981495380401611e-06, 1.6968697309494019e-06, 1.7955899238586426e-06, 1.8943101167678833e-06, 1.993030309677124e-06, 2.0917505025863647e-06, 2.1904706954956055e-06, 2.289190888404846e-06, 2.387911081314087e-06, 2.4866312742233276e-06, 2.5853514671325684e-06, 2.684071660041809e-06, 2.78279185295105e-06, 2.8815120458602905e-06, 2.9802322387695312e-06]}, "gradients/decoder.transformer.h.16.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 1.0, 4.0, 5.0, 6.0, 7.0, 7.0, 11.0, 11.0, 12.0, 22.0, 19.0, 20.0, 20.0, 24.0, 36.0, 26.0, 32.0, 39.0, 43.0, 33.0, 34.0, 35.0, 29.0, 37.0, 46.0, 30.0, 35.0, 46.0, 21.0, 29.0, 38.0, 25.0, 24.0, 25.0, 22.0, 18.0, 25.0, 23.0, 12.0, 9.0, 11.0, 11.0, 11.0, 8.0, 5.0, 5.0, 3.0, 2.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0], "bins": [-2.89453125, -2.80810546875, -2.7216796875, -2.63525390625, -2.548828125, -2.46240234375, -2.3759765625, -2.28955078125, -2.203125, -2.11669921875, -2.0302734375, -1.94384765625, -1.857421875, -1.77099609375, -1.6845703125, -1.59814453125, -1.51171875, -1.42529296875, -1.3388671875, -1.25244140625, -1.166015625, -1.07958984375, -0.9931640625, -0.90673828125, -0.8203125, -0.73388671875, -0.6474609375, -0.56103515625, -0.474609375, -0.38818359375, -0.3017578125, -0.21533203125, -0.12890625, -0.04248046875, 0.0439453125, 0.13037109375, 0.216796875, 0.30322265625, 0.3896484375, 0.47607421875, 0.5625, 0.64892578125, 0.7353515625, 0.82177734375, 0.908203125, 0.99462890625, 1.0810546875, 1.16748046875, 1.25390625, 1.34033203125, 1.4267578125, 1.51318359375, 1.599609375, 1.68603515625, 1.7724609375, 1.85888671875, 1.9453125, 2.03173828125, 2.1181640625, 2.20458984375, 2.291015625, 2.37744140625, 2.4638671875, 2.55029296875, 2.63671875]}, "gradients/decoder.transformer.h.16.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 3.0, 5.0, 13.0, 29.0, 24.0, 37.0, 46.0, 81.0, 75.0, 129.0, 192.0, 281.0, 362.0, 530.0, 721.0, 1071.0, 1523.0, 2229.0, 3334.0, 4973.0, 7903.0, 12406.0, 21467.0, 40603.0, 87099.0, 235887.0, 375766.0, 126215.0, 54532.0, 27652.0, 15658.0, 9333.0, 5973.0, 3758.0, 2615.0, 1747.0, 1294.0, 859.0, 616.0, 444.0, 324.0, 210.0, 155.0, 122.0, 76.0, 65.0, 41.0, 33.0, 16.0, 13.0, 3.0, 8.0, 6.0, 3.0, 2.0, 2.0, 2.0, 1.0], "bins": [-1.5263671875, -1.478973388671875, -1.43157958984375, -1.384185791015625, -1.3367919921875, -1.289398193359375, -1.24200439453125, -1.194610595703125, -1.147216796875, -1.099822998046875, -1.05242919921875, -1.005035400390625, -0.9576416015625, -0.910247802734375, -0.86285400390625, -0.815460205078125, -0.76806640625, -0.720672607421875, -0.67327880859375, -0.625885009765625, -0.5784912109375, -0.531097412109375, -0.48370361328125, -0.436309814453125, -0.388916015625, -0.341522216796875, -0.29412841796875, -0.246734619140625, -0.1993408203125, -0.151947021484375, -0.10455322265625, -0.057159423828125, -0.009765625, 0.037628173828125, 0.08502197265625, 0.132415771484375, 0.1798095703125, 0.227203369140625, 0.27459716796875, 0.321990966796875, 0.369384765625, 0.416778564453125, 0.46417236328125, 0.511566162109375, 0.5589599609375, 0.606353759765625, 0.65374755859375, 0.701141357421875, 0.74853515625, 0.795928955078125, 0.84332275390625, 0.890716552734375, 0.9381103515625, 0.985504150390625, 1.03289794921875, 1.080291748046875, 1.127685546875, 1.175079345703125, 1.22247314453125, 1.269866943359375, 1.3172607421875, 1.364654541015625, 1.41204833984375, 1.459442138671875, 1.5068359375]}, "gradients/decoder.transformer.h.16.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 5.0, 4.0, 6.0, 8.0, 8.0, 16.0, 12.0, 11.0, 11.0, 16.0, 20.0, 25.0, 23.0, 39.0, 34.0, 33.0, 52.0, 57.0, 42.0, 118.0, 1786.0, 223.0, 83.0, 59.0, 54.0, 46.0, 48.0, 44.0, 30.0, 23.0, 19.0, 17.0, 12.0, 14.0, 10.0, 5.0, 10.0, 4.0, 3.0, 6.0, 8.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0], "bins": [-12.5078125, -12.1375732421875, -11.767333984375, -11.3970947265625, -11.02685546875, -10.6566162109375, -10.286376953125, -9.9161376953125, -9.5458984375, -9.1756591796875, -8.805419921875, -8.4351806640625, -8.06494140625, -7.6947021484375, -7.324462890625, -6.9542236328125, -6.583984375, -6.2137451171875, -5.843505859375, -5.4732666015625, -5.10302734375, -4.7327880859375, -4.362548828125, -3.9923095703125, -3.6220703125, -3.2518310546875, -2.881591796875, -2.5113525390625, -2.14111328125, -1.7708740234375, -1.400634765625, -1.0303955078125, -0.66015625, -0.2899169921875, 0.080322265625, 0.4505615234375, 0.82080078125, 1.1910400390625, 1.561279296875, 1.9315185546875, 2.3017578125, 2.6719970703125, 3.042236328125, 3.4124755859375, 3.78271484375, 4.1529541015625, 4.523193359375, 4.8934326171875, 5.263671875, 5.6339111328125, 6.004150390625, 6.3743896484375, 6.74462890625, 7.1148681640625, 7.485107421875, 7.8553466796875, 8.2255859375, 8.5958251953125, 8.966064453125, 9.3363037109375, 9.70654296875, 10.0767822265625, 10.447021484375, 10.8172607421875, 11.1875]}, "gradients/decoder.transformer.h.16.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 3.0, 0.0, 2.0, 1.0, 8.0, 14.0, 2.0, 17.0, 16.0, 25.0, 32.0, 43.0, 74.0, 121.0, 188.0, 306.0, 736.0, 6854.0, 3109031.0, 26257.0, 952.0, 367.0, 230.0, 144.0, 82.0, 55.0, 44.0, 30.0, 21.0, 17.0, 11.0, 8.0, 7.0, 6.0, 2.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-27.28125, -26.2939453125, -25.306640625, -24.3193359375, -23.33203125, -22.3447265625, -21.357421875, -20.3701171875, -19.3828125, -18.3955078125, -17.408203125, -16.4208984375, -15.43359375, -14.4462890625, -13.458984375, -12.4716796875, -11.484375, -10.4970703125, -9.509765625, -8.5224609375, -7.53515625, -6.5478515625, -5.560546875, -4.5732421875, -3.5859375, -2.5986328125, -1.611328125, -0.6240234375, 0.36328125, 1.3505859375, 2.337890625, 3.3251953125, 4.3125, 5.2998046875, 6.287109375, 7.2744140625, 8.26171875, 9.2490234375, 10.236328125, 11.2236328125, 12.2109375, 13.1982421875, 14.185546875, 15.1728515625, 16.16015625, 17.1474609375, 18.134765625, 19.1220703125, 20.109375, 21.0966796875, 22.083984375, 23.0712890625, 24.05859375, 25.0458984375, 26.033203125, 27.0205078125, 28.0078125, 28.9951171875, 29.982421875, 30.9697265625, 31.95703125, 32.9443359375, 33.931640625, 34.9189453125, 35.90625]}, "gradients/decoder.transformer.h.16.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 991.0, 28.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.216754913330078, -19.282291412353516, -8.34782600402832, 2.586639404296875, 13.521102905273438, 24.45556640625, 35.39003372192383, 46.32449722290039, 57.25896072387695, 68.19342803955078, 79.12789154052734, 90.0623550415039, 100.99681854248047, 111.93128204345703, 122.86575317382812, 133.8002166748047, 144.73468017578125, 155.6691436767578, 166.60360717773438, 177.53807067871094, 188.4725341796875, 199.40699768066406, 210.34146118164062, 221.27593994140625, 232.21038818359375, 243.1448516845703, 254.07931518554688, 265.0137939453125, 275.9482421875, 286.8827209472656, 297.8171691894531, 308.75164794921875, 319.6861267089844, 330.62060546875, 341.5550537109375, 352.4895324707031, 363.4239807128906, 374.35845947265625, 385.29290771484375, 396.2273864746094, 407.1618347167969, 418.0963134765625, 429.03076171875, 439.9652404785156, 450.8996887207031, 461.83416748046875, 472.76861572265625, 483.7030944824219, 494.6375732421875, 505.5720520019531, 516.5065307617188, 527.4409790039062, 538.3754272460938, 549.3098754882812, 560.244384765625, 571.1788330078125, 582.11328125, 593.0477294921875, 603.9822387695312, 614.9166870117188, 625.8511352539062, 636.7855834960938, 647.7200927734375, 658.654541015625, 669.5889892578125]}, "gradients/decoder.transformer.h.16.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 3.0, 5.0, 4.0, 9.0, 13.0, 15.0, 18.0, 13.0, 18.0, 15.0, 26.0, 19.0, 28.0, 32.0, 38.0, 39.0, 45.0, 30.0, 47.0, 37.0, 45.0, 37.0, 38.0, 40.0, 40.0, 33.0, 39.0, 21.0, 27.0, 26.0, 26.0, 24.0, 37.0, 19.0, 15.0, 12.0, 11.0, 12.0, 8.0, 7.0, 7.0, 5.0, 6.0, 7.0, 2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0], "bins": [-34.57435607910156, -33.48155212402344, -32.38874816894531, -31.295948028564453, -30.203144073486328, -29.110340118408203, -28.01753807067871, -26.92473602294922, -25.831932067871094, -24.73912811279297, -23.646326065063477, -22.553524017333984, -21.46072006225586, -20.367916107177734, -19.275114059448242, -18.18231201171875, -17.089508056640625, -15.996705055236816, -14.903902053833008, -13.8110990524292, -12.71829605102539, -11.625493049621582, -10.532690048217773, -9.439887046813965, -8.347084045410156, -7.254281044006348, -6.161478042602539, -5.0686750411987305, -3.975872039794922, -2.8830690383911133, -1.7902660369873047, -0.6974630355834961, 0.3953361511230469, 1.4881391525268555, 2.580942153930664, 3.6737451553344727, 4.766548156738281, 5.85935115814209, 6.952154159545898, 8.044957160949707, 9.137760162353516, 10.230563163757324, 11.323366165161133, 12.416169166564941, 13.50897216796875, 14.601775169372559, 15.694578170776367, 16.78738021850586, 17.880184173583984, 18.97298812866211, 20.0657901763916, 21.158592224121094, 22.25139617919922, 23.344200134277344, 24.437002182006836, 25.529804229736328, 26.622608184814453, 27.715412139892578, 28.80821418762207, 29.901016235351562, 30.993820190429688, 32.08662414550781, 33.17942810058594, 34.2722282409668, 35.36503219604492]}, "gradients/decoder.transformer.h.15.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 7.0, 0.0, 0.0, 5.0, 6.0, 6.0, 5.0, 9.0, 12.0, 14.0, 13.0, 23.0, 23.0, 18.0, 16.0, 35.0, 32.0, 21.0, 35.0, 40.0, 36.0, 42.0, 33.0, 35.0, 33.0, 51.0, 39.0, 33.0, 24.0, 45.0, 34.0, 27.0, 27.0, 29.0, 35.0, 17.0, 15.0, 26.0, 19.0, 10.0, 21.0, 10.0, 11.0, 10.0, 8.0, 4.0, 7.0, 3.0, 3.0, 0.0, 2.0, 4.0, 1.0, 0.0, 1.0], "bins": [-3.1171875, -3.025482177734375, -2.93377685546875, -2.842071533203125, -2.7503662109375, -2.658660888671875, -2.56695556640625, -2.475250244140625, -2.383544921875, -2.291839599609375, -2.20013427734375, -2.108428955078125, -2.0167236328125, -1.925018310546875, -1.83331298828125, -1.741607666015625, -1.64990234375, -1.558197021484375, -1.46649169921875, -1.374786376953125, -1.2830810546875, -1.191375732421875, -1.09967041015625, -1.007965087890625, -0.916259765625, -0.824554443359375, -0.73284912109375, -0.641143798828125, -0.5494384765625, -0.457733154296875, -0.36602783203125, -0.274322509765625, -0.1826171875, -0.090911865234375, 0.00079345703125, 0.092498779296875, 0.1842041015625, 0.275909423828125, 0.36761474609375, 0.459320068359375, 0.551025390625, 0.642730712890625, 0.73443603515625, 0.826141357421875, 0.9178466796875, 1.009552001953125, 1.10125732421875, 1.192962646484375, 1.28466796875, 1.376373291015625, 1.46807861328125, 1.559783935546875, 1.6514892578125, 1.743194580078125, 1.83489990234375, 1.926605224609375, 2.018310546875, 2.110015869140625, 2.20172119140625, 2.293426513671875, 2.3851318359375, 2.476837158203125, 2.56854248046875, 2.660247802734375, 2.751953125]}, "gradients/decoder.transformer.h.15.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 3.0, 7.0, 8.0, 7.0, 20.0, 30.0, 38.0, 62.0, 76.0, 102.0, 169.0, 250.0, 342.0, 552.0, 947.0, 1616.0, 2893.0, 5586.0, 11558.0, 26252.0, 71218.0, 254511.0, 948562.0, 1732705.0, 815773.0, 212856.0, 63026.0, 23280.0, 10387.0, 5056.0, 2585.0, 1473.0, 834.0, 518.0, 327.0, 227.0, 151.0, 83.0, 59.0, 39.0, 35.0, 24.0, 11.0, 8.0, 7.0, 6.0, 5.0, 3.0, 2.0, 0.0, 2.0, 1.0], "bins": [-3.94921875, -3.8377685546875, -3.726318359375, -3.6148681640625, -3.50341796875, -3.3919677734375, -3.280517578125, -3.1690673828125, -3.0576171875, -2.9461669921875, -2.834716796875, -2.7232666015625, -2.61181640625, -2.5003662109375, -2.388916015625, -2.2774658203125, -2.166015625, -2.0545654296875, -1.943115234375, -1.8316650390625, -1.72021484375, -1.6087646484375, -1.497314453125, -1.3858642578125, -1.2744140625, -1.1629638671875, -1.051513671875, -0.9400634765625, -0.82861328125, -0.7171630859375, -0.605712890625, -0.4942626953125, -0.3828125, -0.2713623046875, -0.159912109375, -0.0484619140625, 0.06298828125, 0.1744384765625, 0.285888671875, 0.3973388671875, 0.5087890625, 0.6202392578125, 0.731689453125, 0.8431396484375, 0.95458984375, 1.0660400390625, 1.177490234375, 1.2889404296875, 1.400390625, 1.5118408203125, 1.623291015625, 1.7347412109375, 1.84619140625, 1.9576416015625, 2.069091796875, 2.1805419921875, 2.2919921875, 2.4034423828125, 2.514892578125, 2.6263427734375, 2.73779296875, 2.8492431640625, 2.960693359375, 3.0721435546875, 3.18359375]}, "gradients/decoder.transformer.h.15.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 5.0, 2.0, 3.0, 12.0, 7.0, 9.0, 21.0, 28.0, 32.0, 42.0, 58.0, 71.0, 127.0, 146.0, 249.0, 324.0, 475.0, 502.0, 481.0, 432.0, 301.0, 193.0, 134.0, 111.0, 95.0, 51.0, 41.0, 26.0, 17.0, 26.0, 14.0, 7.0, 10.0, 4.0, 6.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.734375, -6.54595947265625, -6.3575439453125, -6.16912841796875, -5.980712890625, -5.79229736328125, -5.6038818359375, -5.41546630859375, -5.22705078125, -5.03863525390625, -4.8502197265625, -4.66180419921875, -4.473388671875, -4.28497314453125, -4.0965576171875, -3.90814208984375, -3.7197265625, -3.53131103515625, -3.3428955078125, -3.15447998046875, -2.966064453125, -2.77764892578125, -2.5892333984375, -2.40081787109375, -2.21240234375, -2.02398681640625, -1.8355712890625, -1.64715576171875, -1.458740234375, -1.27032470703125, -1.0819091796875, -0.89349365234375, -0.705078125, -0.51666259765625, -0.3282470703125, -0.13983154296875, 0.048583984375, 0.23699951171875, 0.4254150390625, 0.61383056640625, 0.80224609375, 0.99066162109375, 1.1790771484375, 1.36749267578125, 1.555908203125, 1.74432373046875, 1.9327392578125, 2.12115478515625, 2.3095703125, 2.49798583984375, 2.6864013671875, 2.87481689453125, 3.063232421875, 3.25164794921875, 3.4400634765625, 3.62847900390625, 3.81689453125, 4.00531005859375, 4.1937255859375, 4.38214111328125, 4.570556640625, 4.75897216796875, 4.9473876953125, 5.13580322265625, 5.32421875]}, "gradients/decoder.transformer.h.15.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 2.0, 5.0, 8.0, 16.0, 11.0, 13.0, 30.0, 37.0, 52.0, 58.0, 103.0, 168.0, 287.0, 622.0, 1545.0, 5916.0, 31002.0, 283398.0, 3138371.0, 662087.0, 57455.0, 9119.0, 2322.0, 753.0, 367.0, 183.0, 87.0, 73.0, 58.0, 33.0, 24.0, 19.0, 13.0, 15.0, 8.0, 6.0, 3.0, 5.0, 5.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-14.953125, -14.545166015625, -14.13720703125, -13.729248046875, -13.3212890625, -12.913330078125, -12.50537109375, -12.097412109375, -11.689453125, -11.281494140625, -10.87353515625, -10.465576171875, -10.0576171875, -9.649658203125, -9.24169921875, -8.833740234375, -8.42578125, -8.017822265625, -7.60986328125, -7.201904296875, -6.7939453125, -6.385986328125, -5.97802734375, -5.570068359375, -5.162109375, -4.754150390625, -4.34619140625, -3.938232421875, -3.5302734375, -3.122314453125, -2.71435546875, -2.306396484375, -1.8984375, -1.490478515625, -1.08251953125, -0.674560546875, -0.2666015625, 0.141357421875, 0.54931640625, 0.957275390625, 1.365234375, 1.773193359375, 2.18115234375, 2.589111328125, 2.9970703125, 3.405029296875, 3.81298828125, 4.220947265625, 4.62890625, 5.036865234375, 5.44482421875, 5.852783203125, 6.2607421875, 6.668701171875, 7.07666015625, 7.484619140625, 7.892578125, 8.300537109375, 8.70849609375, 9.116455078125, 9.5244140625, 9.932373046875, 10.34033203125, 10.748291015625, 11.15625]}, "gradients/decoder.transformer.h.15.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 7.0, 15.0, 18.0, 28.0, 45.0, 84.0, 122.0, 143.0, 146.0, 129.0, 95.0, 83.0, 46.0, 29.0, 11.0, 10.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-62.12458801269531, -60.75673294067383, -59.388877868652344, -58.021026611328125, -56.65317153930664, -55.285316467285156, -53.91746139526367, -52.54960632324219, -51.1817512512207, -49.81389617919922, -48.446041107177734, -47.07818603515625, -45.71033477783203, -44.34247970581055, -42.97462463378906, -41.60676956176758, -40.238914489746094, -38.87105941772461, -37.503204345703125, -36.135353088378906, -34.76749801635742, -33.39964294433594, -32.03178787231445, -30.66393280029297, -29.29608154296875, -27.928226470947266, -26.560373306274414, -25.19251823425293, -23.824663162231445, -22.456809997558594, -21.08895492553711, -19.721099853515625, -18.35324478149414, -16.985389709472656, -15.617535591125488, -14.24968147277832, -12.881826400756836, -11.513972282409668, -10.1461181640625, -8.778263092041016, -7.410408973693848, -6.0425543785095215, -4.674699783325195, -3.3068456649780273, -1.9389910697937012, -0.571136474609375, 0.796717643737793, 2.1645727157592773, 3.5324268341064453, 4.9002814292907715, 6.268136024475098, 7.635990142822266, 9.00384521484375, 10.371699333190918, 11.739553451538086, 13.10740852355957, 14.475262641906738, 15.843116760253906, 17.21097183227539, 18.578826904296875, 19.946680068969727, 21.31453514099121, 22.682388305664062, 24.050243377685547, 25.41809844970703]}, "gradients/decoder.transformer.h.15.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 5.0, 4.0, 4.0, 6.0, 5.0, 9.0, 10.0, 13.0, 11.0, 15.0, 15.0, 15.0, 25.0, 14.0, 20.0, 17.0, 34.0, 29.0, 36.0, 38.0, 33.0, 31.0, 49.0, 40.0, 30.0, 37.0, 34.0, 39.0, 38.0, 39.0, 35.0, 19.0, 28.0, 32.0, 33.0, 24.0, 22.0, 15.0, 10.0, 11.0, 10.0, 11.0, 11.0, 10.0, 5.0, 5.0, 5.0, 9.0, 7.0, 2.0, 2.0, 4.0, 6.0, 1.0, 1.0], "bins": [-18.213775634765625, -17.67635154724121, -17.138927459716797, -16.60150146484375, -16.064077377319336, -15.526653289794922, -14.989228248596191, -14.451803207397461, -13.914379119873047, -13.376955032348633, -12.839529991149902, -12.302104949951172, -11.764680862426758, -11.227256774902344, -10.689831733703613, -10.152406692504883, -9.614982604980469, -9.077558517456055, -8.540133476257324, -8.002708435058594, -7.46528434753418, -6.927859783172607, -6.390435218811035, -5.853010654449463, -5.315586090087891, -4.778161525726318, -4.240736961364746, -3.703312397003174, -3.1658878326416016, -2.6284632682800293, -2.091038703918457, -1.5536141395568848, -1.0161895751953125, -0.47876501083374023, 0.05865955352783203, 0.5960841178894043, 1.1335086822509766, 1.6709332466125488, 2.208357810974121, 2.7457823753356934, 3.2832069396972656, 3.820631504058838, 4.35805606842041, 4.895480632781982, 5.432905197143555, 5.970329761505127, 6.507754325866699, 7.0451788902282715, 7.582603454589844, 8.120027542114258, 8.657452583312988, 9.194877624511719, 9.732301712036133, 10.269725799560547, 10.807150840759277, 11.344575881958008, 11.881999969482422, 12.419424057006836, 12.956849098205566, 13.494274139404297, 14.031698226928711, 14.569122314453125, 15.106547355651855, 15.643972396850586, 16.181396484375]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 3.0, 6.0, 7.0, 5.0, 3.0, 6.0, 13.0, 11.0, 10.0, 12.0, 27.0, 28.0, 20.0, 18.0, 32.0, 33.0, 34.0, 40.0, 31.0, 37.0, 42.0, 26.0, 36.0, 43.0, 37.0, 37.0, 26.0, 36.0, 41.0, 34.0, 29.0, 34.0, 24.0, 33.0, 21.0, 23.0, 16.0, 13.0, 14.0, 11.0, 13.0, 15.0, 4.0, 2.0, 7.0, 2.0, 1.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.974609375, -2.88232421875, -2.7900390625, -2.69775390625, -2.60546875, -2.51318359375, -2.4208984375, -2.32861328125, -2.236328125, -2.14404296875, -2.0517578125, -1.95947265625, -1.8671875, -1.77490234375, -1.6826171875, -1.59033203125, -1.498046875, -1.40576171875, -1.3134765625, -1.22119140625, -1.12890625, -1.03662109375, -0.9443359375, -0.85205078125, -0.759765625, -0.66748046875, -0.5751953125, -0.48291015625, -0.390625, -0.29833984375, -0.2060546875, -0.11376953125, -0.021484375, 0.07080078125, 0.1630859375, 0.25537109375, 0.34765625, 0.43994140625, 0.5322265625, 0.62451171875, 0.716796875, 0.80908203125, 0.9013671875, 0.99365234375, 1.0859375, 1.17822265625, 1.2705078125, 1.36279296875, 1.455078125, 1.54736328125, 1.6396484375, 1.73193359375, 1.82421875, 1.91650390625, 2.0087890625, 2.10107421875, 2.193359375, 2.28564453125, 2.3779296875, 2.47021484375, 2.5625, 2.65478515625, 2.7470703125, 2.83935546875, 2.931640625]}, "gradients/decoder.transformer.h.15.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 0.0, 6.0, 2.0, 5.0, 12.0, 26.0, 24.0, 33.0, 66.0, 95.0, 133.0, 145.0, 252.0, 401.0, 477.0, 722.0, 1033.0, 1513.0, 2235.0, 3274.0, 4738.0, 7108.0, 10803.0, 15988.0, 24643.0, 38732.0, 61390.0, 103599.0, 220807.0, 251187.0, 113224.0, 66019.0, 41380.0, 26432.0, 17213.0, 11154.0, 7554.0, 5050.0, 3352.0, 2421.0, 1600.0, 1121.0, 782.0, 528.0, 361.0, 300.0, 169.0, 132.0, 109.0, 79.0, 44.0, 26.0, 20.0, 13.0, 10.0, 10.0, 4.0, 2.0, 3.0, 4.0, 2.0], "bins": [-0.07952880859375, -0.07704925537109375, -0.0745697021484375, -0.07209014892578125, -0.069610595703125, -0.06713104248046875, -0.0646514892578125, -0.06217193603515625, -0.0596923828125, -0.05721282958984375, -0.0547332763671875, -0.05225372314453125, -0.049774169921875, -0.04729461669921875, -0.0448150634765625, -0.04233551025390625, -0.03985595703125, -0.03737640380859375, -0.0348968505859375, -0.03241729736328125, -0.029937744140625, -0.02745819091796875, -0.0249786376953125, -0.02249908447265625, -0.02001953125, -0.01753997802734375, -0.0150604248046875, -0.01258087158203125, -0.010101318359375, -0.00762176513671875, -0.0051422119140625, -0.00266265869140625, -0.00018310546875, 0.00229644775390625, 0.0047760009765625, 0.00725555419921875, 0.009735107421875, 0.01221466064453125, 0.0146942138671875, 0.01717376708984375, 0.0196533203125, 0.02213287353515625, 0.0246124267578125, 0.02709197998046875, 0.029571533203125, 0.03205108642578125, 0.0345306396484375, 0.03701019287109375, 0.03948974609375, 0.04196929931640625, 0.0444488525390625, 0.04692840576171875, 0.049407958984375, 0.05188751220703125, 0.0543670654296875, 0.05684661865234375, 0.059326171875, 0.06180572509765625, 0.0642852783203125, 0.06676483154296875, 0.069244384765625, 0.07172393798828125, 0.0742034912109375, 0.07668304443359375, 0.07916259765625]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 3.0, 11.0, 6.0, 8.0, 9.0, 10.0, 12.0, 14.0, 23.0, 23.0, 23.0, 17.0, 33.0, 27.0, 38.0, 43.0, 33.0, 36.0, 50.0, 36.0, 50.0, 1071.0, 38.0, 38.0, 40.0, 32.0, 26.0, 33.0, 36.0, 25.0, 24.0, 17.0, 25.0, 24.0, 26.0, 13.0, 15.0, 11.0, 12.0, 5.0, 3.0, 7.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.302734375, -2.23358154296875, -2.1644287109375, -2.09527587890625, -2.026123046875, -1.95697021484375, -1.8878173828125, -1.81866455078125, -1.74951171875, -1.68035888671875, -1.6112060546875, -1.54205322265625, -1.472900390625, -1.40374755859375, -1.3345947265625, -1.26544189453125, -1.1962890625, -1.12713623046875, -1.0579833984375, -0.98883056640625, -0.919677734375, -0.85052490234375, -0.7813720703125, -0.71221923828125, -0.64306640625, -0.57391357421875, -0.5047607421875, -0.43560791015625, -0.366455078125, -0.29730224609375, -0.2281494140625, -0.15899658203125, -0.08984375, -0.02069091796875, 0.0484619140625, 0.11761474609375, 0.186767578125, 0.25592041015625, 0.3250732421875, 0.39422607421875, 0.46337890625, 0.53253173828125, 0.6016845703125, 0.67083740234375, 0.739990234375, 0.80914306640625, 0.8782958984375, 0.94744873046875, 1.0166015625, 1.08575439453125, 1.1549072265625, 1.22406005859375, 1.293212890625, 1.36236572265625, 1.4315185546875, 1.50067138671875, 1.56982421875, 1.63897705078125, 1.7081298828125, 1.77728271484375, 1.846435546875, 1.91558837890625, 1.9847412109375, 2.05389404296875, 2.123046875]}, "gradients/decoder.transformer.h.15.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 9.0, 5.0, 11.0, 14.0, 15.0, 24.0, 45.0, 88.0, 124.0, 200.0, 288.0, 450.0, 729.0, 1216.0, 2015.0, 3302.0, 5472.0, 9258.0, 15305.0, 25790.0, 45194.0, 83083.0, 178667.0, 1413949.0, 144255.0, 71251.0, 39623.0, 22566.0, 13485.0, 8176.0, 4843.0, 2998.0, 1819.0, 1085.0, 678.0, 389.0, 278.0, 154.0, 97.0, 64.0, 41.0, 29.0, 22.0, 17.0, 10.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.10186767578125, -0.09881591796875, -0.09576416015625, -0.09271240234375, -0.08966064453125, -0.08660888671875, -0.08355712890625, -0.08050537109375, -0.07745361328125, -0.07440185546875, -0.07135009765625, -0.06829833984375, -0.06524658203125, -0.06219482421875, -0.05914306640625, -0.05609130859375, -0.05303955078125, -0.04998779296875, -0.04693603515625, -0.04388427734375, -0.04083251953125, -0.03778076171875, -0.03472900390625, -0.03167724609375, -0.02862548828125, -0.02557373046875, -0.02252197265625, -0.01947021484375, -0.01641845703125, -0.01336669921875, -0.01031494140625, -0.00726318359375, -0.00421142578125, -0.00115966796875, 0.00189208984375, 0.00494384765625, 0.00799560546875, 0.01104736328125, 0.01409912109375, 0.01715087890625, 0.02020263671875, 0.02325439453125, 0.02630615234375, 0.02935791015625, 0.03240966796875, 0.03546142578125, 0.03851318359375, 0.04156494140625, 0.04461669921875, 0.04766845703125, 0.05072021484375, 0.05377197265625, 0.05682373046875, 0.05987548828125, 0.06292724609375, 0.06597900390625, 0.06903076171875, 0.07208251953125, 0.07513427734375, 0.07818603515625, 0.08123779296875, 0.08428955078125, 0.08734130859375, 0.09039306640625, 0.09344482421875]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 2.0, 5.0, 2.0, 6.0, 6.0, 7.0, 10.0, 14.0, 18.0, 22.0, 15.0, 43.0, 38.0, 39.0, 65.0, 67.0, 76.0, 84.0, 83.0, 65.0, 41.0, 47.0, 46.0, 47.0, 29.0, 27.0, 25.0, 15.0, 8.0, 6.0, 3.0, 10.0, 7.0, 6.0, 5.0, 6.0, 4.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.066394805908203e-06, -4.884786903858185e-06, -4.7031790018081665e-06, -4.521571099758148e-06, -4.33996319770813e-06, -4.1583552956581116e-06, -3.976747393608093e-06, -3.795139491558075e-06, -3.6135315895080566e-06, -3.4319236874580383e-06, -3.25031578540802e-06, -3.0687078833580017e-06, -2.8870999813079834e-06, -2.705492079257965e-06, -2.5238841772079468e-06, -2.3422762751579285e-06, -2.16066837310791e-06, -1.979060471057892e-06, -1.7974525690078735e-06, -1.6158446669578552e-06, -1.434236764907837e-06, -1.2526288628578186e-06, -1.0710209608078003e-06, -8.89413058757782e-07, -7.078051567077637e-07, -5.261972546577454e-07, -3.4458935260772705e-07, -1.6298145055770874e-07, 1.862645149230957e-08, 2.0023435354232788e-07, 3.818422555923462e-07, 5.634501576423645e-07, 7.450580596923828e-07, 9.266659617424011e-07, 1.1082738637924194e-06, 1.2898817658424377e-06, 1.471489667892456e-06, 1.6530975699424744e-06, 1.8347054719924927e-06, 2.016313374042511e-06, 2.1979212760925293e-06, 2.3795291781425476e-06, 2.561137080192566e-06, 2.7427449822425842e-06, 2.9243528842926025e-06, 3.105960786342621e-06, 3.287568688392639e-06, 3.4691765904426575e-06, 3.6507844924926758e-06, 3.832392394542694e-06, 4.014000296592712e-06, 4.195608198642731e-06, 4.377216100692749e-06, 4.558824002742767e-06, 4.740431904792786e-06, 4.922039806842804e-06, 5.103647708892822e-06, 5.2852556109428406e-06, 5.466863512992859e-06, 5.648471415042877e-06, 5.8300793170928955e-06, 6.011687219142914e-06, 6.193295121192932e-06, 6.3749030232429504e-06, 6.556510925292969e-06]}, "gradients/decoder.transformer.h.15.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 2.0, 3.0, 3.0, 3.0, 6.0, 9.0, 10.0, 13.0, 5.0, 17.0, 18.0, 21.0, 35.0, 39.0, 63.0, 86.0, 130.0, 292.0, 608.0, 6779.0, 689420.0, 346196.0, 3620.0, 504.0, 223.0, 140.0, 82.0, 54.0, 42.0, 20.0, 25.0, 22.0, 14.0, 18.0, 6.0, 9.0, 4.0, 2.0, 5.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.584426879882812e-05, -9.293202310800552e-05, -9.001977741718292e-05, -8.710753172636032e-05, -8.419528603553772e-05, -8.128304034471512e-05, -7.837079465389252e-05, -7.545854896306992e-05, -7.254630327224731e-05, -6.963405758142471e-05, -6.672181189060211e-05, -6.380956619977951e-05, -6.089732050895691e-05, -5.798507481813431e-05, -5.5072829127311707e-05, -5.2160583436489105e-05, -4.9248337745666504e-05, -4.63360920548439e-05, -4.34238463640213e-05, -4.05116006731987e-05, -3.75993549823761e-05, -3.46871092915535e-05, -3.1774863600730896e-05, -2.8862617909908295e-05, -2.5950372219085693e-05, -2.3038126528263092e-05, -2.012588083744049e-05, -1.721363514661789e-05, -1.4301389455795288e-05, -1.1389143764972687e-05, -8.476898074150085e-06, -5.564652383327484e-06, -2.652406692504883e-06, 2.598389983177185e-07, 3.17208468914032e-06, 6.084330379962921e-06, 8.996576070785522e-06, 1.1908821761608124e-05, 1.4821067452430725e-05, 1.7733313143253326e-05, 2.0645558834075928e-05, 2.355780452489853e-05, 2.647005021572113e-05, 2.938229590654373e-05, 3.229454159736633e-05, 3.5206787288188934e-05, 3.8119032979011536e-05, 4.103127866983414e-05, 4.394352436065674e-05, 4.685577005147934e-05, 4.976801574230194e-05, 5.268026143312454e-05, 5.5592507123947144e-05, 5.8504752814769745e-05, 6.141699850559235e-05, 6.432924419641495e-05, 6.724148988723755e-05, 7.015373557806015e-05, 7.306598126888275e-05, 7.597822695970535e-05, 7.889047265052795e-05, 8.180271834135056e-05, 8.471496403217316e-05, 8.762720972299576e-05, 9.053945541381836e-05]}, "gradients/decoder.transformer.h.15.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.0, 16.0, 43.0, 106.0, 240.0, 251.0, 195.0, 108.0, 33.0, 11.0, 7.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.940985607187031e-06, -3.6243873182684183e-06, -3.3077888019761303e-06, -2.9911902856838424e-06, -2.67459199676523e-06, -2.3579937078466173e-06, -2.0413951915543294e-06, -1.7247966752620414e-06, -1.4081983863434289e-06, -1.0915999837379786e-06, -7.750015811325284e-07, -4.584031785270781e-07, -1.4180477592162788e-07, 1.7479362668382237e-07, 4.913920292892726e-07, 8.079905455815606e-07, 1.1245888345001731e-06, 1.4411872371056234e-06, 1.7577856397110736e-06, 2.0743841560033616e-06, 2.390982444921974e-06, 2.7075807338405866e-06, 3.0241792501328746e-06, 3.3407777664251626e-06, 3.657376055343775e-06, 3.973974344262388e-06, 4.290573087928351e-06, 4.6071713768469635e-06, 4.923769665765576e-06, 5.240367954684189e-06, 5.556966243602801e-06, 5.8735649872687645e-06, 6.190162821440026e-06, 6.506761110358639e-06, 6.823359399277251e-06, 7.139958142943215e-06, 7.456556431861827e-06, 7.77315472078044e-06, 8.089753464446403e-06, 8.406352208112366e-06, 8.722950042283628e-06, 9.039548785949592e-06, 9.356146620120853e-06, 9.672745363786817e-06, 9.98934410745278e-06, 1.0305941941624042e-05, 1.0622540685290005e-05, 1.0939138519461267e-05, 1.125573726312723e-05, 1.1572336006793194e-05, 1.1888933840964455e-05, 1.2205532584630419e-05, 1.252213041880168e-05, 1.2838729162467644e-05, 1.3155327906133607e-05, 1.347192664979957e-05, 1.3788524483970832e-05, 1.4105123227636795e-05, 1.4421721061808057e-05, 1.473831980547402e-05, 1.5054918549139984e-05, 1.5371517292805947e-05, 1.5688114217482507e-05, 1.600471296114847e-05, 1.6321311704814434e-05]}, "gradients/decoder.transformer.h.15.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 4.0, 6.0, 7.0, 8.0, 3.0, 14.0, 5.0, 13.0, 24.0, 15.0, 10.0, 12.0, 34.0, 24.0, 18.0, 62.0, 28.0, 28.0, 32.0, 47.0, 26.0, 33.0, 27.0, 45.0, 26.0, 36.0, 55.0, 41.0, 20.0, 32.0, 56.0, 14.0, 23.0, 39.0, 16.0, 12.0, 8.0, 24.0, 12.0, 10.0, 12.0, 7.0, 6.0, 6.0, 8.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 2.0, 0.0, 4.0, 2.0], "bins": [-2.4437904357910156e-06, -2.3674219846725464e-06, -2.291053533554077e-06, -2.214685082435608e-06, -2.1383166313171387e-06, -2.0619481801986694e-06, -1.9855797290802e-06, -1.909211277961731e-06, -1.8328428268432617e-06, -1.7564743757247925e-06, -1.6801059246063232e-06, -1.603737473487854e-06, -1.5273690223693848e-06, -1.4510005712509155e-06, -1.3746321201324463e-06, -1.298263669013977e-06, -1.2218952178955078e-06, -1.1455267667770386e-06, -1.0691583156585693e-06, -9.927898645401e-07, -9.164214134216309e-07, -8.400529623031616e-07, -7.636845111846924e-07, -6.873160600662231e-07, -6.109476089477539e-07, -5.345791578292847e-07, -4.5821070671081543e-07, -3.818422555923462e-07, -3.0547380447387695e-07, -2.2910535335540771e-07, -1.5273690223693848e-07, -7.636845111846924e-08, 0.0, 7.636845111846924e-08, 1.5273690223693848e-07, 2.2910535335540771e-07, 3.0547380447387695e-07, 3.818422555923462e-07, 4.5821070671081543e-07, 5.345791578292847e-07, 6.109476089477539e-07, 6.873160600662231e-07, 7.636845111846924e-07, 8.400529623031616e-07, 9.164214134216309e-07, 9.927898645401e-07, 1.0691583156585693e-06, 1.1455267667770386e-06, 1.2218952178955078e-06, 1.298263669013977e-06, 1.3746321201324463e-06, 1.4510005712509155e-06, 1.5273690223693848e-06, 1.603737473487854e-06, 1.6801059246063232e-06, 1.7564743757247925e-06, 1.8328428268432617e-06, 1.909211277961731e-06, 1.9855797290802e-06, 2.0619481801986694e-06, 2.1383166313171387e-06, 2.214685082435608e-06, 2.291053533554077e-06, 2.3674219846725464e-06, 2.4437904357910156e-06]}, "gradients/decoder.transformer.h.15.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 3.0, 6.0, 7.0, 5.0, 3.0, 6.0, 13.0, 11.0, 10.0, 12.0, 27.0, 28.0, 20.0, 18.0, 32.0, 33.0, 34.0, 40.0, 31.0, 37.0, 42.0, 26.0, 36.0, 43.0, 37.0, 37.0, 26.0, 36.0, 41.0, 34.0, 29.0, 34.0, 24.0, 33.0, 21.0, 23.0, 16.0, 13.0, 14.0, 11.0, 13.0, 15.0, 4.0, 2.0, 7.0, 2.0, 1.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.974609375, -2.88232421875, -2.7900390625, -2.69775390625, -2.60546875, -2.51318359375, -2.4208984375, -2.32861328125, -2.236328125, -2.14404296875, -2.0517578125, -1.95947265625, -1.8671875, -1.77490234375, -1.6826171875, -1.59033203125, -1.498046875, -1.40576171875, -1.3134765625, -1.22119140625, -1.12890625, -1.03662109375, -0.9443359375, -0.85205078125, -0.759765625, -0.66748046875, -0.5751953125, -0.48291015625, -0.390625, -0.29833984375, -0.2060546875, -0.11376953125, -0.021484375, 0.07080078125, 0.1630859375, 0.25537109375, 0.34765625, 0.43994140625, 0.5322265625, 0.62451171875, 0.716796875, 0.80908203125, 0.9013671875, 0.99365234375, 1.0859375, 1.17822265625, 1.2705078125, 1.36279296875, 1.455078125, 1.54736328125, 1.6396484375, 1.73193359375, 1.82421875, 1.91650390625, 2.0087890625, 2.10107421875, 2.193359375, 2.28564453125, 2.3779296875, 2.47021484375, 2.5625, 2.65478515625, 2.7470703125, 2.83935546875, 2.931640625]}, "gradients/decoder.transformer.h.15.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 5.0, 3.0, 3.0, 9.0, 13.0, 10.0, 22.0, 20.0, 52.0, 66.0, 103.0, 138.0, 196.0, 266.0, 465.0, 767.0, 1181.0, 1919.0, 3361.0, 6358.0, 13396.0, 32456.0, 93136.0, 370451.0, 369842.0, 93439.0, 32495.0, 13434.0, 6406.0, 3314.0, 1907.0, 1204.0, 711.0, 458.0, 310.0, 222.0, 134.0, 79.0, 52.0, 56.0, 32.0, 23.0, 17.0, 9.0, 8.0, 5.0, 8.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9921875, -1.92578125, -1.859375, -1.79296875, -1.7265625, -1.66015625, -1.59375, -1.52734375, -1.4609375, -1.39453125, -1.328125, -1.26171875, -1.1953125, -1.12890625, -1.0625, -0.99609375, -0.9296875, -0.86328125, -0.796875, -0.73046875, -0.6640625, -0.59765625, -0.53125, -0.46484375, -0.3984375, -0.33203125, -0.265625, -0.19921875, -0.1328125, -0.06640625, 0.0, 0.06640625, 0.1328125, 0.19921875, 0.265625, 0.33203125, 0.3984375, 0.46484375, 0.53125, 0.59765625, 0.6640625, 0.73046875, 0.796875, 0.86328125, 0.9296875, 0.99609375, 1.0625, 1.12890625, 1.1953125, 1.26171875, 1.328125, 1.39453125, 1.4609375, 1.52734375, 1.59375, 1.66015625, 1.7265625, 1.79296875, 1.859375, 1.92578125, 1.9921875, 2.05859375, 2.125, 2.19140625, 2.2578125]}, "gradients/decoder.transformer.h.15.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 6.0, 3.0, 8.0, 10.0, 3.0, 7.0, 13.0, 11.0, 17.0, 12.0, 14.0, 23.0, 30.0, 31.0, 24.0, 39.0, 43.0, 52.0, 66.0, 76.0, 155.0, 1802.0, 137.0, 72.0, 48.0, 51.0, 55.0, 33.0, 28.0, 34.0, 23.0, 20.0, 21.0, 17.0, 13.0, 11.0, 7.0, 8.0, 9.0, 8.0, 5.0, 5.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-11.25, -10.86767578125, -10.4853515625, -10.10302734375, -9.720703125, -9.33837890625, -8.9560546875, -8.57373046875, -8.19140625, -7.80908203125, -7.4267578125, -7.04443359375, -6.662109375, -6.27978515625, -5.8974609375, -5.51513671875, -5.1328125, -4.75048828125, -4.3681640625, -3.98583984375, -3.603515625, -3.22119140625, -2.8388671875, -2.45654296875, -2.07421875, -1.69189453125, -1.3095703125, -0.92724609375, -0.544921875, -0.16259765625, 0.2197265625, 0.60205078125, 0.984375, 1.36669921875, 1.7490234375, 2.13134765625, 2.513671875, 2.89599609375, 3.2783203125, 3.66064453125, 4.04296875, 4.42529296875, 4.8076171875, 5.18994140625, 5.572265625, 5.95458984375, 6.3369140625, 6.71923828125, 7.1015625, 7.48388671875, 7.8662109375, 8.24853515625, 8.630859375, 9.01318359375, 9.3955078125, 9.77783203125, 10.16015625, 10.54248046875, 10.9248046875, 11.30712890625, 11.689453125, 12.07177734375, 12.4541015625, 12.83642578125, 13.21875]}, "gradients/decoder.transformer.h.15.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 6.0, 9.0, 13.0, 13.0, 7.0, 9.0, 17.0, 20.0, 25.0, 39.0, 60.0, 56.0, 67.0, 111.0, 116.0, 206.0, 276.0, 560.0, 1694.0, 38497.0, 3091379.0, 9981.0, 1183.0, 392.0, 238.0, 180.0, 121.0, 88.0, 78.0, 59.0, 44.0, 38.0, 27.0, 23.0, 12.0, 18.0, 8.0, 5.0, 8.0, 9.0, 4.0, 7.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-29.375, -28.544189453125, -27.71337890625, -26.882568359375, -26.0517578125, -25.220947265625, -24.39013671875, -23.559326171875, -22.728515625, -21.897705078125, -21.06689453125, -20.236083984375, -19.4052734375, -18.574462890625, -17.74365234375, -16.912841796875, -16.08203125, -15.251220703125, -14.42041015625, -13.589599609375, -12.7587890625, -11.927978515625, -11.09716796875, -10.266357421875, -9.435546875, -8.604736328125, -7.77392578125, -6.943115234375, -6.1123046875, -5.281494140625, -4.45068359375, -3.619873046875, -2.7890625, -1.958251953125, -1.12744140625, -0.296630859375, 0.5341796875, 1.364990234375, 2.19580078125, 3.026611328125, 3.857421875, 4.688232421875, 5.51904296875, 6.349853515625, 7.1806640625, 8.011474609375, 8.84228515625, 9.673095703125, 10.50390625, 11.334716796875, 12.16552734375, 12.996337890625, 13.8271484375, 14.657958984375, 15.48876953125, 16.319580078125, 17.150390625, 17.981201171875, 18.81201171875, 19.642822265625, 20.4736328125, 21.304443359375, 22.13525390625, 22.966064453125, 23.796875]}, "gradients/decoder.transformer.h.15.ln_1.weight": {"_type": "histogram", "values": [988.0, 34.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.41957426071167, 2.424604892730713, 10.268783569335938, 18.11296272277832, 25.957141876220703, 33.80131912231445, 41.64550018310547, 49.489681243896484, 57.333858489990234, 65.17803955078125, 73.022216796875, 80.86639404296875, 88.7105712890625, 96.55474853515625, 104.39893341064453, 112.24311828613281, 120.08729553222656, 127.93147277832031, 135.77565002441406, 143.6198272705078, 151.46401977539062, 159.30819702148438, 167.15237426757812, 174.99655151367188, 182.84072875976562, 190.68490600585938, 198.52908325195312, 206.37326049804688, 214.21743774414062, 222.06161499023438, 229.9058074951172, 237.74998474121094, 245.59414672851562, 253.43832397460938, 261.2825012207031, 269.1266784667969, 276.9708557128906, 284.8150329589844, 292.6592102050781, 300.50341796875, 308.34759521484375, 316.1917724609375, 324.03594970703125, 331.880126953125, 339.72430419921875, 347.5684814453125, 355.41265869140625, 363.2568359375, 371.10101318359375, 378.9451904296875, 386.78936767578125, 394.633544921875, 402.47772216796875, 410.3218994140625, 418.16607666015625, 426.01025390625, 433.85443115234375, 441.6986083984375, 449.54278564453125, 457.386962890625, 465.23114013671875, 473.0753173828125, 480.91949462890625, 488.763671875, 496.6078796386719]}, "gradients/decoder.transformer.h.15.ln_1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 5.0, 4.0, 11.0, 6.0, 8.0, 8.0, 8.0, 12.0, 18.0, 24.0, 22.0, 24.0, 18.0, 31.0, 35.0, 45.0, 35.0, 39.0, 38.0, 31.0, 42.0, 42.0, 29.0, 42.0, 40.0, 44.0, 37.0, 50.0, 44.0, 24.0, 43.0, 26.0, 16.0, 17.0, 20.0, 15.0, 12.0, 13.0, 7.0, 4.0, 6.0, 10.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.072420120239258, -27.097923278808594, -26.123428344726562, -25.1489315032959, -24.174436569213867, -23.199939727783203, -22.225444793701172, -21.250947952270508, -20.276451110839844, -19.30195426940918, -18.32745933532715, -17.352962493896484, -16.378467559814453, -15.403970718383789, -14.429474830627441, -13.454978942871094, -12.480484008789062, -11.505988121032715, -10.531492233276367, -9.556995391845703, -8.582500457763672, -7.608004093170166, -6.63350772857666, -5.6590118408203125, -4.684515953063965, -3.710020065307617, -2.7355239391326904, -1.7610278129577637, -0.786531925201416, 0.18796396255493164, 1.1624603271484375, 2.136956214904785, 3.1114540100097656, 4.085949897766113, 5.060445785522461, 6.034942150115967, 7.0094380378723145, 7.983933925628662, 8.958430290222168, 9.932926177978516, 10.907422065734863, 11.881917953491211, 12.856413841247559, 13.830909729003906, 14.80540657043457, 15.779901504516602, 16.754398345947266, 17.728893280029297, 18.70339012145996, 19.677886962890625, 20.652381896972656, 21.62687873840332, 22.60137367248535, 23.575870513916016, 24.550365447998047, 25.52486228942871, 26.499359130859375, 27.47385597229004, 28.44835090637207, 29.422847747802734, 30.397342681884766, 31.37183952331543, 32.346336364746094, 33.320831298828125, 34.295326232910156]}, "gradients/decoder.transformer.h.14.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 5.0, 3.0, 7.0, 6.0, 3.0, 7.0, 15.0, 14.0, 10.0, 14.0, 16.0, 30.0, 22.0, 22.0, 31.0, 28.0, 29.0, 41.0, 40.0, 38.0, 27.0, 38.0, 25.0, 46.0, 43.0, 32.0, 33.0, 45.0, 33.0, 34.0, 31.0, 39.0, 26.0, 27.0, 19.0, 20.0, 18.0, 16.0, 8.0, 15.0, 13.0, 10.0, 5.0, 6.0, 1.0, 4.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-3.185546875, -3.089599609375, -2.99365234375, -2.897705078125, -2.8017578125, -2.705810546875, -2.60986328125, -2.513916015625, -2.41796875, -2.322021484375, -2.22607421875, -2.130126953125, -2.0341796875, -1.938232421875, -1.84228515625, -1.746337890625, -1.650390625, -1.554443359375, -1.45849609375, -1.362548828125, -1.2666015625, -1.170654296875, -1.07470703125, -0.978759765625, -0.8828125, -0.786865234375, -0.69091796875, -0.594970703125, -0.4990234375, -0.403076171875, -0.30712890625, -0.211181640625, -0.115234375, -0.019287109375, 0.07666015625, 0.172607421875, 0.2685546875, 0.364501953125, 0.46044921875, 0.556396484375, 0.65234375, 0.748291015625, 0.84423828125, 0.940185546875, 1.0361328125, 1.132080078125, 1.22802734375, 1.323974609375, 1.419921875, 1.515869140625, 1.61181640625, 1.707763671875, 1.8037109375, 1.899658203125, 1.99560546875, 2.091552734375, 2.1875, 2.283447265625, 2.37939453125, 2.475341796875, 2.5712890625, 2.667236328125, 2.76318359375, 2.859130859375, 2.955078125]}, "gradients/decoder.transformer.h.14.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 8.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 6.0, 10.0, 9.0, 9.0, 16.0, 18.0, 33.0, 27.0, 25.0, 37.0, 61.0, 67.0, 140.0, 305.0, 1017.0, 5203.0, 51959.0, 1895574.0, 2173782.0, 58514.0, 5623.0, 1024.0, 315.0, 135.0, 87.0, 52.0, 40.0, 28.0, 25.0, 18.0, 21.0, 11.0, 19.0, 11.0, 10.0, 11.0, 6.0, 5.0, 7.0, 2.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.84375, -12.454833984375, -12.06591796875, -11.677001953125, -11.2880859375, -10.899169921875, -10.51025390625, -10.121337890625, -9.732421875, -9.343505859375, -8.95458984375, -8.565673828125, -8.1767578125, -7.787841796875, -7.39892578125, -7.010009765625, -6.62109375, -6.232177734375, -5.84326171875, -5.454345703125, -5.0654296875, -4.676513671875, -4.28759765625, -3.898681640625, -3.509765625, -3.120849609375, -2.73193359375, -2.343017578125, -1.9541015625, -1.565185546875, -1.17626953125, -0.787353515625, -0.3984375, -0.009521484375, 0.37939453125, 0.768310546875, 1.1572265625, 1.546142578125, 1.93505859375, 2.323974609375, 2.712890625, 3.101806640625, 3.49072265625, 3.879638671875, 4.2685546875, 4.657470703125, 5.04638671875, 5.435302734375, 5.82421875, 6.213134765625, 6.60205078125, 6.990966796875, 7.3798828125, 7.768798828125, 8.15771484375, 8.546630859375, 8.935546875, 9.324462890625, 9.71337890625, 10.102294921875, 10.4912109375, 10.880126953125, 11.26904296875, 11.657958984375, 12.046875]}, "gradients/decoder.transformer.h.14.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 5.0, 19.0, 29.0, 57.0, 129.0, 284.0, 604.0, 1101.0, 926.0, 470.0, 228.0, 100.0, 60.0, 25.0, 17.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-18.875, -18.433837890625, -17.99267578125, -17.551513671875, -17.1103515625, -16.669189453125, -16.22802734375, -15.786865234375, -15.345703125, -14.904541015625, -14.46337890625, -14.022216796875, -13.5810546875, -13.139892578125, -12.69873046875, -12.257568359375, -11.81640625, -11.375244140625, -10.93408203125, -10.492919921875, -10.0517578125, -9.610595703125, -9.16943359375, -8.728271484375, -8.287109375, -7.845947265625, -7.40478515625, -6.963623046875, -6.5224609375, -6.081298828125, -5.64013671875, -5.198974609375, -4.7578125, -4.316650390625, -3.87548828125, -3.434326171875, -2.9931640625, -2.552001953125, -2.11083984375, -1.669677734375, -1.228515625, -0.787353515625, -0.34619140625, 0.094970703125, 0.5361328125, 0.977294921875, 1.41845703125, 1.859619140625, 2.30078125, 2.741943359375, 3.18310546875, 3.624267578125, 4.0654296875, 4.506591796875, 4.94775390625, 5.388916015625, 5.830078125, 6.271240234375, 6.71240234375, 7.153564453125, 7.5947265625, 8.035888671875, 8.47705078125, 8.918212890625, 9.359375]}, "gradients/decoder.transformer.h.14.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 8.0, 14.0, 15.0, 30.0, 46.0, 58.0, 99.0, 161.0, 299.0, 552.0, 1292.0, 5072.0, 79075.0, 3526686.0, 561156.0, 15820.0, 2247.0, 800.0, 342.0, 200.0, 122.0, 76.0, 42.0, 26.0, 18.0, 11.0, 4.0, 4.0, 1.0, 3.0, 5.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.1640625, -14.4666748046875, -13.769287109375, -13.0718994140625, -12.37451171875, -11.6771240234375, -10.979736328125, -10.2823486328125, -9.5849609375, -8.8875732421875, -8.190185546875, -7.4927978515625, -6.79541015625, -6.0980224609375, -5.400634765625, -4.7032470703125, -4.005859375, -3.3084716796875, -2.611083984375, -1.9136962890625, -1.21630859375, -0.5189208984375, 0.178466796875, 0.8758544921875, 1.5732421875, 2.2706298828125, 2.968017578125, 3.6654052734375, 4.36279296875, 5.0601806640625, 5.757568359375, 6.4549560546875, 7.15234375, 7.8497314453125, 8.547119140625, 9.2445068359375, 9.94189453125, 10.6392822265625, 11.336669921875, 12.0340576171875, 12.7314453125, 13.4288330078125, 14.126220703125, 14.8236083984375, 15.52099609375, 16.2183837890625, 16.915771484375, 17.6131591796875, 18.310546875, 19.0079345703125, 19.705322265625, 20.4027099609375, 21.10009765625, 21.7974853515625, 22.494873046875, 23.1922607421875, 23.8896484375, 24.5870361328125, 25.284423828125, 25.9818115234375, 26.67919921875, 27.3765869140625, 28.073974609375, 28.7713623046875, 29.46875]}, "gradients/decoder.transformer.h.14.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 8.0, 12.0, 13.0, 46.0, 76.0, 143.0, 178.0, 189.0, 170.0, 94.0, 58.0, 15.0, 6.0, 7.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.18794631958008, -33.26194381713867, -31.335941314697266, -29.409936904907227, -27.48393440246582, -25.557931900024414, -23.631927490234375, -21.70592498779297, -19.779922485351562, -17.853919982910156, -15.927916526794434, -14.001913070678711, -12.075910568237305, -10.149908065795898, -8.223904609680176, -6.297901153564453, -4.371898651123047, -2.4458956718444824, -0.519892692565918, 1.4061102867126465, 3.332113265991211, 5.258115768432617, 7.18411922454834, 9.110122680664062, 11.036125183105469, 12.962127685546875, 14.888131141662598, 16.81413459777832, 18.740137100219727, 20.666139602661133, 22.592144012451172, 24.518146514892578, 26.44414520263672, 28.370147705078125, 30.29615020751953, 32.22215270996094, 34.148155212402344, 36.07415771484375, 38.00016403198242, 39.92616653442383, 41.852169036865234, 43.77817153930664, 45.70417404174805, 47.63017654418945, 49.556182861328125, 51.48218536376953, 53.40818786621094, 55.334190368652344, 57.26019287109375, 59.186195373535156, 61.11219787597656, 63.03820037841797, 64.96420288085938, 66.89020538330078, 68.81620788574219, 70.74221801757812, 72.668212890625, 74.5942153930664, 76.52021789550781, 78.44622039794922, 80.37222290039062, 82.29822540283203, 84.22422790527344, 86.15023803710938, 88.07624053955078]}, "gradients/decoder.transformer.h.14.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 5.0, 8.0, 8.0, 9.0, 13.0, 13.0, 21.0, 21.0, 17.0, 20.0, 29.0, 26.0, 33.0, 38.0, 36.0, 45.0, 38.0, 37.0, 56.0, 35.0, 50.0, 42.0, 38.0, 38.0, 35.0, 29.0, 43.0, 26.0, 25.0, 28.0, 26.0, 14.0, 18.0, 18.0, 20.0, 15.0, 5.0, 14.0, 2.0, 3.0, 6.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-19.103607177734375, -18.39103889465332, -17.678470611572266, -16.96590232849121, -16.253334045410156, -15.540764808654785, -14.828195571899414, -14.11562728881836, -13.403059005737305, -12.69049072265625, -11.977922439575195, -11.265353202819824, -10.55278491973877, -9.840216636657715, -9.127647399902344, -8.415079116821289, -7.702510833740234, -6.98994255065918, -6.277373790740967, -5.564805030822754, -4.852236747741699, -4.1396684646606445, -3.4270997047424316, -2.7145309448242188, -2.001962661743164, -1.2893941402435303, -0.5768256187438965, 0.1357429027557373, 0.8483114242553711, 1.5608799457550049, 2.2734484672546387, 2.9860172271728516, 3.6985855102539062, 4.411153793334961, 5.123722553253174, 5.836291313171387, 6.548859596252441, 7.261427879333496, 7.973996639251709, 8.686565399169922, 9.399133682250977, 10.111701965332031, 10.824270248413086, 11.536839485168457, 12.249407768249512, 12.961976051330566, 13.674545288085938, 14.387113571166992, 15.099681854248047, 15.812250137329102, 16.524818420410156, 17.23738670349121, 17.949954986572266, 18.662525177001953, 19.375093460083008, 20.087661743164062, 20.800230026245117, 21.512798309326172, 22.225366592407227, 22.93793487548828, 23.65050506591797, 24.363073348999023, 25.075641632080078, 25.788209915161133, 26.500778198242188]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 3.0, 3.0, 5.0, 9.0, 8.0, 11.0, 11.0, 10.0, 20.0, 20.0, 22.0, 23.0, 24.0, 27.0, 42.0, 20.0, 44.0, 44.0, 39.0, 35.0, 29.0, 34.0, 42.0, 45.0, 41.0, 41.0, 47.0, 30.0, 36.0, 34.0, 29.0, 27.0, 21.0, 25.0, 18.0, 16.0, 15.0, 17.0, 6.0, 8.0, 3.0, 7.0, 7.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-3.638671875, -3.531402587890625, -3.42413330078125, -3.316864013671875, -3.2095947265625, -3.102325439453125, -2.99505615234375, -2.887786865234375, -2.780517578125, -2.673248291015625, -2.56597900390625, -2.458709716796875, -2.3514404296875, -2.244171142578125, -2.13690185546875, -2.029632568359375, -1.92236328125, -1.815093994140625, -1.70782470703125, -1.600555419921875, -1.4932861328125, -1.386016845703125, -1.27874755859375, -1.171478271484375, -1.064208984375, -0.956939697265625, -0.84967041015625, -0.742401123046875, -0.6351318359375, -0.527862548828125, -0.42059326171875, -0.313323974609375, -0.2060546875, -0.098785400390625, 0.00848388671875, 0.115753173828125, 0.2230224609375, 0.330291748046875, 0.43756103515625, 0.544830322265625, 0.652099609375, 0.759368896484375, 0.86663818359375, 0.973907470703125, 1.0811767578125, 1.188446044921875, 1.29571533203125, 1.402984619140625, 1.51025390625, 1.617523193359375, 1.72479248046875, 1.832061767578125, 1.9393310546875, 2.046600341796875, 2.15386962890625, 2.261138916015625, 2.368408203125, 2.475677490234375, 2.58294677734375, 2.690216064453125, 2.7974853515625, 2.904754638671875, 3.01202392578125, 3.119293212890625, 3.2265625]}, "gradients/decoder.transformer.h.14.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 7.0, 7.0, 12.0, 13.0, 27.0, 35.0, 39.0, 65.0, 96.0, 170.0, 184.0, 270.0, 441.0, 631.0, 954.0, 1314.0, 1991.0, 2982.0, 4390.0, 6475.0, 9958.0, 14895.0, 23385.0, 37624.0, 63380.0, 113302.0, 262000.0, 235323.0, 107093.0, 60093.0, 35859.0, 22238.0, 14497.0, 9504.0, 6285.0, 4187.0, 2872.0, 1893.0, 1326.0, 836.0, 630.0, 389.0, 306.0, 180.0, 127.0, 84.0, 68.0, 37.0, 33.0, 17.0, 15.0, 10.0, 7.0, 2.0, 1.0, 5.0, 2.0], "bins": [-0.09454345703125, -0.09175872802734375, -0.0889739990234375, -0.08618927001953125, -0.083404541015625, -0.08061981201171875, -0.0778350830078125, -0.07505035400390625, -0.072265625, -0.06948089599609375, -0.0666961669921875, -0.06391143798828125, -0.061126708984375, -0.05834197998046875, -0.0555572509765625, -0.05277252197265625, -0.04998779296875, -0.04720306396484375, -0.0444183349609375, -0.04163360595703125, -0.038848876953125, -0.03606414794921875, -0.0332794189453125, -0.03049468994140625, -0.0277099609375, -0.02492523193359375, -0.0221405029296875, -0.01935577392578125, -0.016571044921875, -0.01378631591796875, -0.0110015869140625, -0.00821685791015625, -0.00543212890625, -0.00264739990234375, 0.0001373291015625, 0.00292205810546875, 0.005706787109375, 0.00849151611328125, 0.0112762451171875, 0.01406097412109375, 0.016845703125, 0.01963043212890625, 0.0224151611328125, 0.02519989013671875, 0.027984619140625, 0.03076934814453125, 0.0335540771484375, 0.03633880615234375, 0.03912353515625, 0.04190826416015625, 0.0446929931640625, 0.04747772216796875, 0.050262451171875, 0.05304718017578125, 0.0558319091796875, 0.05861663818359375, 0.0614013671875, 0.06418609619140625, 0.0669708251953125, 0.06975555419921875, 0.072540283203125, 0.07532501220703125, 0.0781097412109375, 0.08089447021484375, 0.08367919921875]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 1.0, 2.0, 5.0, 6.0, 9.0, 4.0, 8.0, 20.0, 14.0, 23.0, 14.0, 25.0, 24.0, 41.0, 34.0, 36.0, 34.0, 42.0, 42.0, 41.0, 47.0, 39.0, 1084.0, 40.0, 43.0, 32.0, 31.0, 36.0, 32.0, 34.0, 19.0, 22.0, 36.0, 15.0, 18.0, 19.0, 12.0, 7.0, 15.0, 5.0, 5.0, 4.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0], "bins": [-2.87109375, -2.793670654296875, -2.71624755859375, -2.638824462890625, -2.5614013671875, -2.483978271484375, -2.40655517578125, -2.329132080078125, -2.251708984375, -2.174285888671875, -2.09686279296875, -2.019439697265625, -1.9420166015625, -1.864593505859375, -1.78717041015625, -1.709747314453125, -1.63232421875, -1.554901123046875, -1.47747802734375, -1.400054931640625, -1.3226318359375, -1.245208740234375, -1.16778564453125, -1.090362548828125, -1.012939453125, -0.935516357421875, -0.85809326171875, -0.780670166015625, -0.7032470703125, -0.625823974609375, -0.54840087890625, -0.470977783203125, -0.3935546875, -0.316131591796875, -0.23870849609375, -0.161285400390625, -0.0838623046875, -0.006439208984375, 0.07098388671875, 0.148406982421875, 0.225830078125, 0.303253173828125, 0.38067626953125, 0.458099365234375, 0.5355224609375, 0.612945556640625, 0.69036865234375, 0.767791748046875, 0.84521484375, 0.922637939453125, 1.00006103515625, 1.077484130859375, 1.1549072265625, 1.232330322265625, 1.30975341796875, 1.387176513671875, 1.464599609375, 1.542022705078125, 1.61944580078125, 1.696868896484375, 1.7742919921875, 1.851715087890625, 1.92913818359375, 2.006561279296875, 2.083984375]}, "gradients/decoder.transformer.h.14.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 6.0, 1.0, 12.0, 7.0, 8.0, 28.0, 39.0, 51.0, 59.0, 115.0, 174.0, 261.0, 421.0, 643.0, 1056.0, 1754.0, 2872.0, 4913.0, 8358.0, 14739.0, 26046.0, 48410.0, 94909.0, 238531.0, 1401362.0, 120179.0, 58830.0, 31541.0, 17410.0, 9833.0, 5832.0, 3402.0, 2004.0, 1195.0, 831.0, 481.0, 279.0, 187.0, 122.0, 93.0, 45.0, 41.0, 23.0, 10.0, 10.0, 5.0, 6.0, 5.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.1259765625, -0.12239265441894531, -0.11880874633789062, -0.11522483825683594, -0.11164093017578125, -0.10805702209472656, -0.10447311401367188, -0.10088920593261719, -0.0973052978515625, -0.09372138977050781, -0.09013748168945312, -0.08655357360839844, -0.08296966552734375, -0.07938575744628906, -0.07580184936523438, -0.07221794128417969, -0.068634033203125, -0.06505012512207031, -0.061466217041015625, -0.05788230895996094, -0.05429840087890625, -0.05071449279785156, -0.047130584716796875, -0.04354667663574219, -0.0399627685546875, -0.03637886047363281, -0.032794952392578125, -0.029211044311523438, -0.02562713623046875, -0.022043228149414062, -0.018459320068359375, -0.014875411987304688, -0.01129150390625, -0.0077075958251953125, -0.004123687744140625, -0.0005397796630859375, 0.00304412841796875, 0.0066280364990234375, 0.010211944580078125, 0.013795852661132812, 0.0173797607421875, 0.020963668823242188, 0.024547576904296875, 0.028131484985351562, 0.03171539306640625, 0.03529930114746094, 0.038883209228515625, 0.04246711730957031, 0.046051025390625, 0.04963493347167969, 0.053218841552734375, 0.05680274963378906, 0.06038665771484375, 0.06397056579589844, 0.06755447387695312, 0.07113838195800781, 0.0747222900390625, 0.07830619812011719, 0.08189010620117188, 0.08547401428222656, 0.08905792236328125, 0.09264183044433594, 0.09622573852539062, 0.09980964660644531, 0.1033935546875]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 5.0, 4.0, 2.0, 2.0, 11.0, 7.0, 9.0, 4.0, 16.0, 11.0, 21.0, 21.0, 14.0, 20.0, 31.0, 40.0, 46.0, 59.0, 59.0, 84.0, 91.0, 70.0, 69.0, 62.0, 35.0, 37.0, 40.0, 23.0, 18.0, 22.0, 10.0, 14.0, 13.0, 3.0, 9.0, 4.0, 6.0, 4.0, 5.0, 3.0, 2.0, 4.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4373016357421875e-06, -6.21844083070755e-06, -5.999580025672913e-06, -5.780719220638275e-06, -5.561858415603638e-06, -5.342997610569e-06, -5.124136805534363e-06, -4.905276000499725e-06, -4.686415195465088e-06, -4.4675543904304504e-06, -4.248693585395813e-06, -4.0298327803611755e-06, -3.810971975326538e-06, -3.5921111702919006e-06, -3.373250365257263e-06, -3.1543895602226257e-06, -2.9355287551879883e-06, -2.716667950153351e-06, -2.4978071451187134e-06, -2.278946340084076e-06, -2.0600855350494385e-06, -1.841224730014801e-06, -1.6223639249801636e-06, -1.4035031199455261e-06, -1.1846423149108887e-06, -9.657815098762512e-07, -7.469207048416138e-07, -5.280598998069763e-07, -3.0919909477233887e-07, -9.033828973770142e-08, 1.2852251529693604e-07, 3.473833203315735e-07, 5.662441253662109e-07, 7.851049304008484e-07, 1.0039657354354858e-06, 1.2228265404701233e-06, 1.4416873455047607e-06, 1.6605481505393982e-06, 1.8794089555740356e-06, 2.098269760608673e-06, 2.3171305656433105e-06, 2.535991370677948e-06, 2.7548521757125854e-06, 2.973712980747223e-06, 3.1925737857818604e-06, 3.411434590816498e-06, 3.6302953958511353e-06, 3.849156200885773e-06, 4.06801700592041e-06, 4.286877810955048e-06, 4.505738615989685e-06, 4.7245994210243225e-06, 4.94346022605896e-06, 5.162321031093597e-06, 5.381181836128235e-06, 5.600042641162872e-06, 5.81890344619751e-06, 6.037764251232147e-06, 6.256625056266785e-06, 6.475485861301422e-06, 6.6943466663360596e-06, 6.913207471370697e-06, 7.1320682764053345e-06, 7.350929081439972e-06, 7.569789886474609e-06]}, "gradients/decoder.transformer.h.14.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 4.0, 2.0, 6.0, 1.0, 3.0, 6.0, 10.0, 7.0, 11.0, 21.0, 20.0, 34.0, 20.0, 39.0, 58.0, 82.0, 130.0, 221.0, 520.0, 3317.0, 463488.0, 575621.0, 3708.0, 534.0, 238.0, 128.0, 82.0, 61.0, 35.0, 31.0, 25.0, 23.0, 22.0, 13.0, 9.0, 7.0, 9.0, 3.0, 5.0, 3.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00012004375457763672, -0.00011651497334241867, -0.00011298619210720062, -0.00010945741087198257, -0.00010592862963676453, -0.00010239984840154648, -9.887106716632843e-05, -9.534228593111038e-05, -9.181350469589233e-05, -8.828472346067429e-05, -8.475594222545624e-05, -8.122716099023819e-05, -7.769837975502014e-05, -7.41695985198021e-05, -7.064081728458405e-05, -6.7112036049366e-05, -6.358325481414795e-05, -6.00544735789299e-05, -5.652569234371185e-05, -5.2996911108493805e-05, -4.946812987327576e-05, -4.593934863805771e-05, -4.241056740283966e-05, -3.888178616762161e-05, -3.5353004932403564e-05, -3.1824223697185516e-05, -2.8295442461967468e-05, -2.476666122674942e-05, -2.1237879991531372e-05, -1.7709098756313324e-05, -1.4180317521095276e-05, -1.0651536285877228e-05, -7.12275505065918e-06, -3.5939738154411316e-06, -6.51925802230835e-08, 3.4635886549949646e-06, 6.992369890213013e-06, 1.052115112543106e-05, 1.4049932360649109e-05, 1.7578713595867157e-05, 2.1107494831085205e-05, 2.4636276066303253e-05, 2.81650573015213e-05, 3.169383853673935e-05, 3.52226197719574e-05, 3.8751401007175446e-05, 4.2280182242393494e-05, 4.580896347761154e-05, 4.933774471282959e-05, 5.286652594804764e-05, 5.6395307183265686e-05, 5.9924088418483734e-05, 6.345286965370178e-05, 6.698165088891983e-05, 7.051043212413788e-05, 7.403921335935593e-05, 7.756799459457397e-05, 8.109677582979202e-05, 8.462555706501007e-05, 8.815433830022812e-05, 9.168311953544617e-05, 9.521190077066422e-05, 9.874068200588226e-05, 0.00010226946324110031, 0.00010579824447631836]}, "gradients/decoder.transformer.h.14.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 10.0, 14.0, 40.0, 74.0, 124.0, 175.0, 200.0, 146.0, 95.0, 64.0, 25.0, 22.0, 9.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.312586720014224e-06, -6.073060376365902e-06, -5.833534487464931e-06, -5.594008143816609e-06, -5.354482254915638e-06, -5.114955911267316e-06, -4.875430022366345e-06, -4.635903678718023e-06, -4.396377335069701e-06, -4.156850991421379e-06, -3.917325102520408e-06, -3.677798758872086e-06, -3.438272869971115e-06, -3.1987465263227932e-06, -2.9592204100481467e-06, -2.7196942937735002e-06, -2.480168404872529e-06, -2.2406422885978827e-06, -2.0011161723232362e-06, -1.761589942361752e-06, -1.5220638260871056e-06, -1.282537709812459e-06, -1.0430114798509749e-06, -8.034853635763284e-07, -5.639592473016819e-07, -3.24433102605326e-07, -8.490695790897007e-08, 1.5461921520909527e-07, 3.9414533148374176e-07, 6.336714477583882e-07, 8.731976777198724e-07, 1.112723793994519e-06, 1.3522503650165163e-06, 1.5917764812911628e-06, 1.8313025975658093e-06, 2.070828941214131e-06, 2.3103548301151022e-06, 2.549881173763424e-06, 2.7894072900380706e-06, 3.028933406312717e-06, 3.2684595225873636e-06, 3.50798563886201e-06, 3.7475117551366566e-06, 3.987037871411303e-06, 4.226564215059625e-06, 4.466090103960596e-06, 4.705616447608918e-06, 4.94514279125724e-06, 5.184668680158211e-06, 5.424195023806533e-06, 5.663720912707504e-06, 5.903247256355826e-06, 6.142773145256797e-06, 6.382299488905119e-06, 6.62182537780609e-06, 6.861351721454412e-06, 7.100878065102734e-06, 7.340404408751056e-06, 7.579930297652027e-06, 7.819456186552998e-06, 8.05898253020132e-06, 8.298508873849642e-06, 8.538035217497963e-06, 8.777560651651584e-06, 9.017086995299906e-06]}, "gradients/decoder.transformer.h.14.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 5.0, 11.0, 7.0, 4.0, 10.0, 5.0, 7.0, 11.0, 7.0, 20.0, 27.0, 16.0, 14.0, 35.0, 14.0, 36.0, 46.0, 14.0, 62.0, 35.0, 16.0, 58.0, 19.0, 47.0, 46.0, 16.0, 52.0, 33.0, 23.0, 42.0, 44.0, 18.0, 26.0, 35.0, 13.0, 26.0, 24.0, 9.0, 16.0, 10.0, 5.0, 11.0, 8.0, 2.0, 7.0, 6.0, 5.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-3.2782554626464844e-06, -3.1795352697372437e-06, -3.080815076828003e-06, -2.982094883918762e-06, -2.8833746910095215e-06, -2.7846544981002808e-06, -2.68593430519104e-06, -2.5872141122817993e-06, -2.4884939193725586e-06, -2.389773726463318e-06, -2.291053533554077e-06, -2.1923333406448364e-06, -2.0936131477355957e-06, -1.994892954826355e-06, -1.8961727619171143e-06, -1.7974525690078735e-06, -1.6987323760986328e-06, -1.600012183189392e-06, -1.5012919902801514e-06, -1.4025717973709106e-06, -1.30385160446167e-06, -1.2051314115524292e-06, -1.1064112186431885e-06, -1.0076910257339478e-06, -9.08970832824707e-07, -8.102506399154663e-07, -7.115304470062256e-07, -6.128102540969849e-07, -5.140900611877441e-07, -4.153698682785034e-07, -3.166496753692627e-07, -2.1792948246002197e-07, -1.1920928955078125e-07, -2.0489096641540527e-08, 7.82310962677002e-08, 1.7695128917694092e-07, 2.7567148208618164e-07, 3.7439167499542236e-07, 4.731118679046631e-07, 5.718320608139038e-07, 6.705522537231445e-07, 7.692724466323853e-07, 8.67992639541626e-07, 9.667128324508667e-07, 1.0654330253601074e-06, 1.1641532182693481e-06, 1.2628734111785889e-06, 1.3615936040878296e-06, 1.4603137969970703e-06, 1.559033989906311e-06, 1.6577541828155518e-06, 1.7564743757247925e-06, 1.8551945686340332e-06, 1.953914761543274e-06, 2.0526349544525146e-06, 2.1513551473617554e-06, 2.250075340270996e-06, 2.348795533180237e-06, 2.4475157260894775e-06, 2.5462359189987183e-06, 2.644956111907959e-06, 2.7436763048171997e-06, 2.8423964977264404e-06, 2.941116690635681e-06, 3.039836883544922e-06]}, "gradients/decoder.transformer.h.14.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 3.0, 3.0, 5.0, 9.0, 8.0, 11.0, 11.0, 10.0, 20.0, 20.0, 22.0, 23.0, 24.0, 27.0, 42.0, 20.0, 44.0, 44.0, 39.0, 35.0, 29.0, 34.0, 42.0, 45.0, 41.0, 41.0, 47.0, 30.0, 36.0, 34.0, 29.0, 27.0, 21.0, 25.0, 18.0, 16.0, 15.0, 17.0, 6.0, 8.0, 3.0, 7.0, 7.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-3.638671875, -3.531402587890625, -3.42413330078125, -3.316864013671875, -3.2095947265625, -3.102325439453125, -2.99505615234375, -2.887786865234375, -2.780517578125, -2.673248291015625, -2.56597900390625, -2.458709716796875, -2.3514404296875, -2.244171142578125, -2.13690185546875, -2.029632568359375, -1.92236328125, -1.815093994140625, -1.70782470703125, -1.600555419921875, -1.4932861328125, -1.386016845703125, -1.27874755859375, -1.171478271484375, -1.064208984375, -0.956939697265625, -0.84967041015625, -0.742401123046875, -0.6351318359375, -0.527862548828125, -0.42059326171875, -0.313323974609375, -0.2060546875, -0.098785400390625, 0.00848388671875, 0.115753173828125, 0.2230224609375, 0.330291748046875, 0.43756103515625, 0.544830322265625, 0.652099609375, 0.759368896484375, 0.86663818359375, 0.973907470703125, 1.0811767578125, 1.188446044921875, 1.29571533203125, 1.402984619140625, 1.51025390625, 1.617523193359375, 1.72479248046875, 1.832061767578125, 1.9393310546875, 2.046600341796875, 2.15386962890625, 2.261138916015625, 2.368408203125, 2.475677490234375, 2.58294677734375, 2.690216064453125, 2.7974853515625, 2.904754638671875, 3.01202392578125, 3.119293212890625, 3.2265625]}, "gradients/decoder.transformer.h.14.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 1.0, 6.0, 5.0, 6.0, 7.0, 14.0, 21.0, 34.0, 46.0, 55.0, 94.0, 111.0, 136.0, 232.0, 311.0, 436.0, 670.0, 1019.0, 1519.0, 2447.0, 3808.0, 6229.0, 9874.0, 16460.0, 28116.0, 50688.0, 100676.0, 240703.0, 320955.0, 122980.0, 59753.0, 32189.0, 18577.0, 11223.0, 6852.0, 4239.0, 2738.0, 1777.0, 1107.0, 808.0, 543.0, 315.0, 233.0, 145.0, 115.0, 85.0, 55.0, 38.0, 36.0, 20.0, 20.0, 12.0, 8.0, 8.0, 7.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-1.9267578125, -1.8648529052734375, -1.802947998046875, -1.7410430908203125, -1.67913818359375, -1.6172332763671875, -1.555328369140625, -1.4934234619140625, -1.4315185546875, -1.3696136474609375, -1.307708740234375, -1.2458038330078125, -1.18389892578125, -1.1219940185546875, -1.060089111328125, -0.9981842041015625, -0.936279296875, -0.8743743896484375, -0.812469482421875, -0.7505645751953125, -0.68865966796875, -0.6267547607421875, -0.564849853515625, -0.5029449462890625, -0.4410400390625, -0.3791351318359375, -0.317230224609375, -0.2553253173828125, -0.19342041015625, -0.1315155029296875, -0.069610595703125, -0.0077056884765625, 0.05419921875, 0.1161041259765625, 0.178009033203125, 0.2399139404296875, 0.30181884765625, 0.3637237548828125, 0.425628662109375, 0.4875335693359375, 0.5494384765625, 0.6113433837890625, 0.673248291015625, 0.7351531982421875, 0.79705810546875, 0.8589630126953125, 0.920867919921875, 0.9827728271484375, 1.044677734375, 1.1065826416015625, 1.168487548828125, 1.2303924560546875, 1.29229736328125, 1.3542022705078125, 1.416107177734375, 1.4780120849609375, 1.5399169921875, 1.6018218994140625, 1.663726806640625, 1.7256317138671875, 1.78753662109375, 1.8494415283203125, 1.911346435546875, 1.9732513427734375, 2.03515625]}, "gradients/decoder.transformer.h.14.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 9.0, 7.0, 14.0, 13.0, 11.0, 16.0, 16.0, 19.0, 23.0, 25.0, 22.0, 26.0, 47.0, 33.0, 35.0, 52.0, 71.0, 151.0, 1608.0, 294.0, 91.0, 68.0, 49.0, 50.0, 36.0, 40.0, 27.0, 26.0, 35.0, 21.0, 22.0, 22.0, 13.0, 11.0, 10.0, 9.0, 8.0, 2.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.375, -11.017822265625, -10.66064453125, -10.303466796875, -9.9462890625, -9.589111328125, -9.23193359375, -8.874755859375, -8.517578125, -8.160400390625, -7.80322265625, -7.446044921875, -7.0888671875, -6.731689453125, -6.37451171875, -6.017333984375, -5.66015625, -5.302978515625, -4.94580078125, -4.588623046875, -4.2314453125, -3.874267578125, -3.51708984375, -3.159912109375, -2.802734375, -2.445556640625, -2.08837890625, -1.731201171875, -1.3740234375, -1.016845703125, -0.65966796875, -0.302490234375, 0.0546875, 0.411865234375, 0.76904296875, 1.126220703125, 1.4833984375, 1.840576171875, 2.19775390625, 2.554931640625, 2.912109375, 3.269287109375, 3.62646484375, 3.983642578125, 4.3408203125, 4.697998046875, 5.05517578125, 5.412353515625, 5.76953125, 6.126708984375, 6.48388671875, 6.841064453125, 7.1982421875, 7.555419921875, 7.91259765625, 8.269775390625, 8.626953125, 8.984130859375, 9.34130859375, 9.698486328125, 10.0556640625, 10.412841796875, 10.77001953125, 11.127197265625, 11.484375]}, "gradients/decoder.transformer.h.14.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 0.0, 5.0, 5.0, 16.0, 11.0, 18.0, 10.0, 21.0, 28.0, 30.0, 44.0, 64.0, 71.0, 101.0, 143.0, 168.0, 298.0, 502.0, 1954.0, 56353.0, 3052952.0, 30030.0, 1500.0, 449.0, 246.0, 154.0, 141.0, 90.0, 68.0, 53.0, 45.0, 27.0, 25.0, 17.0, 13.0, 15.0, 16.0, 11.0, 5.0, 4.0, 4.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-24.65625, -23.871826171875, -23.08740234375, -22.302978515625, -21.5185546875, -20.734130859375, -19.94970703125, -19.165283203125, -18.380859375, -17.596435546875, -16.81201171875, -16.027587890625, -15.2431640625, -14.458740234375, -13.67431640625, -12.889892578125, -12.10546875, -11.321044921875, -10.53662109375, -9.752197265625, -8.9677734375, -8.183349609375, -7.39892578125, -6.614501953125, -5.830078125, -5.045654296875, -4.26123046875, -3.476806640625, -2.6923828125, -1.907958984375, -1.12353515625, -0.339111328125, 0.4453125, 1.229736328125, 2.01416015625, 2.798583984375, 3.5830078125, 4.367431640625, 5.15185546875, 5.936279296875, 6.720703125, 7.505126953125, 8.28955078125, 9.073974609375, 9.8583984375, 10.642822265625, 11.42724609375, 12.211669921875, 12.99609375, 13.780517578125, 14.56494140625, 15.349365234375, 16.1337890625, 16.918212890625, 17.70263671875, 18.487060546875, 19.271484375, 20.055908203125, 20.84033203125, 21.624755859375, 22.4091796875, 23.193603515625, 23.97802734375, 24.762451171875, 25.546875]}, "gradients/decoder.transformer.h.14.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 69.0, 947.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.71863555908203, -26.745391845703125, -18.77214813232422, -10.79890251159668, -2.8256587982177734, 5.147586822509766, 13.120830535888672, 21.094074249267578, 29.067317962646484, 37.04056167602539, 45.0138053894043, 52.98705291748047, 60.960296630859375, 68.93354034423828, 76.90678405761719, 84.8800277709961, 92.853271484375, 100.8265151977539, 108.79975891113281, 116.77300262451172, 124.74624633789062, 132.71949768066406, 140.69273376464844, 148.66598510742188, 156.63922119140625, 164.6124725341797, 172.58570861816406, 180.5589599609375, 188.53219604492188, 196.5054473876953, 204.4786834716797, 212.45193481445312, 220.42518615722656, 228.3984375, 236.37167358398438, 244.3449249267578, 252.3181610107422, 260.2914123535156, 268.2646484375, 276.2378845214844, 284.2111511230469, 292.18438720703125, 300.15765380859375, 308.1308898925781, 316.1041259765625, 324.0773620605469, 332.0506286621094, 340.02386474609375, 347.9971008300781, 355.9703369140625, 363.943603515625, 371.9168395996094, 379.89007568359375, 387.8633117675781, 395.8365783691406, 403.809814453125, 411.7830505371094, 419.75628662109375, 427.72955322265625, 435.7027893066406, 443.676025390625, 451.6492614746094, 459.6225280761719, 467.59576416015625, 475.5690002441406]}, "gradients/decoder.transformer.h.14.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 6.0, 6.0, 5.0, 5.0, 5.0, 10.0, 10.0, 7.0, 5.0, 14.0, 5.0, 19.0, 14.0, 14.0, 13.0, 20.0, 22.0, 36.0, 25.0, 35.0, 31.0, 42.0, 25.0, 33.0, 33.0, 38.0, 34.0, 29.0, 39.0, 33.0, 28.0, 25.0, 30.0, 27.0, 40.0, 26.0, 30.0, 29.0, 19.0, 28.0, 21.0, 13.0, 16.0, 19.0, 6.0, 10.0, 7.0, 6.0, 6.0, 3.0, 7.0, 2.0, 5.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-28.430631637573242, -27.530920028686523, -26.631208419799805, -25.731496810913086, -24.831783294677734, -23.932071685791016, -23.032360076904297, -22.132648468017578, -21.23293685913086, -20.33322525024414, -19.433513641357422, -18.533802032470703, -17.634090423583984, -16.734378814697266, -15.834665298461914, -14.934953689575195, -14.035242080688477, -13.135530471801758, -12.235818862915039, -11.336106300354004, -10.436394691467285, -9.536683082580566, -8.636970520019531, -7.7372589111328125, -6.837547302246094, -5.937835693359375, -5.038123607635498, -4.138411521911621, -3.2386999130249023, -2.3389883041381836, -1.4392762184143066, -0.5395641326904297, 0.36014556884765625, 1.259857416152954, 2.159569263458252, 3.05928111076355, 3.9589929580688477, 4.858704566955566, 5.758416652679443, 6.65812873840332, 7.557840347290039, 8.457551956176758, 9.357263565063477, 10.256976127624512, 11.15668773651123, 12.05639934539795, 12.956111907958984, 13.855823516845703, 14.755535125732422, 15.65524673461914, 16.55495834350586, 17.454669952392578, 18.354381561279297, 19.254093170166016, 20.153806686401367, 21.053518295288086, 21.953229904174805, 22.852941513061523, 23.752653121948242, 24.65236473083496, 25.552078247070312, 26.45178985595703, 27.35150146484375, 28.25121307373047, 29.150924682617188]}, "gradients/decoder.transformer.h.13.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0, 7.0, 10.0, 13.0, 13.0, 19.0, 16.0, 22.0, 17.0, 27.0, 35.0, 30.0, 25.0, 41.0, 42.0, 32.0, 35.0, 39.0, 24.0, 37.0, 53.0, 32.0, 51.0, 36.0, 43.0, 29.0, 43.0, 26.0, 26.0, 19.0, 30.0, 16.0, 16.0, 18.0, 13.0, 11.0, 13.0, 4.0, 8.0, 5.0, 4.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.712890625, -3.6015625, -3.490234375, -3.37890625, -3.267578125, -3.15625, -3.044921875, -2.93359375, -2.822265625, -2.7109375, -2.599609375, -2.48828125, -2.376953125, -2.265625, -2.154296875, -2.04296875, -1.931640625, -1.8203125, -1.708984375, -1.59765625, -1.486328125, -1.375, -1.263671875, -1.15234375, -1.041015625, -0.9296875, -0.818359375, -0.70703125, -0.595703125, -0.484375, -0.373046875, -0.26171875, -0.150390625, -0.0390625, 0.072265625, 0.18359375, 0.294921875, 0.40625, 0.517578125, 0.62890625, 0.740234375, 0.8515625, 0.962890625, 1.07421875, 1.185546875, 1.296875, 1.408203125, 1.51953125, 1.630859375, 1.7421875, 1.853515625, 1.96484375, 2.076171875, 2.1875, 2.298828125, 2.41015625, 2.521484375, 2.6328125, 2.744140625, 2.85546875, 2.966796875, 3.078125, 3.189453125, 3.30078125, 3.412109375]}, "gradients/decoder.transformer.h.13.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 9.0, 5.0, 5.0, 10.0, 9.0, 9.0, 12.0, 13.0, 20.0, 18.0, 28.0, 20.0, 31.0, 52.0, 90.0, 165.0, 409.0, 1451.0, 6256.0, 41063.0, 825203.0, 3110045.0, 188107.0, 16720.0, 3152.0, 776.0, 248.0, 83.0, 57.0, 28.0, 35.0, 21.0, 33.0, 22.0, 19.0, 10.0, 9.0, 10.0, 12.0, 9.0, 3.0, 6.0, 5.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-12.4765625, -12.1126708984375, -11.748779296875, -11.3848876953125, -11.02099609375, -10.6571044921875, -10.293212890625, -9.9293212890625, -9.5654296875, -9.2015380859375, -8.837646484375, -8.4737548828125, -8.10986328125, -7.7459716796875, -7.382080078125, -7.0181884765625, -6.654296875, -6.2904052734375, -5.926513671875, -5.5626220703125, -5.19873046875, -4.8348388671875, -4.470947265625, -4.1070556640625, -3.7431640625, -3.3792724609375, -3.015380859375, -2.6514892578125, -2.28759765625, -1.9237060546875, -1.559814453125, -1.1959228515625, -0.83203125, -0.4681396484375, -0.104248046875, 0.2596435546875, 0.62353515625, 0.9874267578125, 1.351318359375, 1.7152099609375, 2.0791015625, 2.4429931640625, 2.806884765625, 3.1707763671875, 3.53466796875, 3.8985595703125, 4.262451171875, 4.6263427734375, 4.990234375, 5.3541259765625, 5.718017578125, 6.0819091796875, 6.44580078125, 6.8096923828125, 7.173583984375, 7.5374755859375, 7.9013671875, 8.2652587890625, 8.629150390625, 8.9930419921875, 9.35693359375, 9.7208251953125, 10.084716796875, 10.4486083984375, 10.8125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 9.0, 10.0, 9.0, 21.0, 43.0, 73.0, 155.0, 295.0, 559.0, 968.0, 896.0, 491.0, 230.0, 135.0, 78.0, 54.0, 15.0, 21.0, 8.0, 7.0, 1.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.21875, -12.80419921875, -12.3896484375, -11.97509765625, -11.560546875, -11.14599609375, -10.7314453125, -10.31689453125, -9.90234375, -9.48779296875, -9.0732421875, -8.65869140625, -8.244140625, -7.82958984375, -7.4150390625, -7.00048828125, -6.5859375, -6.17138671875, -5.7568359375, -5.34228515625, -4.927734375, -4.51318359375, -4.0986328125, -3.68408203125, -3.26953125, -2.85498046875, -2.4404296875, -2.02587890625, -1.611328125, -1.19677734375, -0.7822265625, -0.36767578125, 0.046875, 0.46142578125, 0.8759765625, 1.29052734375, 1.705078125, 2.11962890625, 2.5341796875, 2.94873046875, 3.36328125, 3.77783203125, 4.1923828125, 4.60693359375, 5.021484375, 5.43603515625, 5.8505859375, 6.26513671875, 6.6796875, 7.09423828125, 7.5087890625, 7.92333984375, 8.337890625, 8.75244140625, 9.1669921875, 9.58154296875, 9.99609375, 10.41064453125, 10.8251953125, 11.23974609375, 11.654296875, 12.06884765625, 12.4833984375, 12.89794921875, 13.3125]}, "gradients/decoder.transformer.h.13.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 8.0, 12.0, 20.0, 29.0, 50.0, 108.0, 234.0, 503.0, 1623.0, 23761.0, 3891234.0, 271425.0, 3913.0, 796.0, 315.0, 124.0, 70.0, 27.0, 12.0, 13.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.03125, -30.8759765625, -29.720703125, -28.5654296875, -27.41015625, -26.2548828125, -25.099609375, -23.9443359375, -22.7890625, -21.6337890625, -20.478515625, -19.3232421875, -18.16796875, -17.0126953125, -15.857421875, -14.7021484375, -13.546875, -12.3916015625, -11.236328125, -10.0810546875, -8.92578125, -7.7705078125, -6.615234375, -5.4599609375, -4.3046875, -3.1494140625, -1.994140625, -0.8388671875, 0.31640625, 1.4716796875, 2.626953125, 3.7822265625, 4.9375, 6.0927734375, 7.248046875, 8.4033203125, 9.55859375, 10.7138671875, 11.869140625, 13.0244140625, 14.1796875, 15.3349609375, 16.490234375, 17.6455078125, 18.80078125, 19.9560546875, 21.111328125, 22.2666015625, 23.421875, 24.5771484375, 25.732421875, 26.8876953125, 28.04296875, 29.1982421875, 30.353515625, 31.5087890625, 32.6640625, 33.8193359375, 34.974609375, 36.1298828125, 37.28515625, 38.4404296875, 39.595703125, 40.7509765625, 41.90625]}, "gradients/decoder.transformer.h.13.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 29.0, 100.0, 240.0, 343.0, 211.0, 67.0, 21.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-56.55815124511719, -53.100196838378906, -49.642242431640625, -46.184288024902344, -42.7263298034668, -39.268375396728516, -35.810420989990234, -32.35246276855469, -28.89451026916504, -25.436555862426758, -21.978599548339844, -18.520645141601562, -15.062689781188965, -11.604734420776367, -8.146780014038086, -4.688823699951172, -1.2308692932128906, 2.227085828781128, 5.6850409507751465, 9.142995834350586, 12.600951194763184, 16.05890655517578, 19.516860961914062, 22.974817276000977, 26.432771682739258, 29.89072608947754, 33.34868240356445, 36.806636810302734, 40.264591217041016, 43.72254943847656, 47.180503845214844, 50.638458251953125, 54.096412658691406, 57.55436706542969, 61.01232147216797, 64.47027587890625, 67.92823028564453, 71.38618469238281, 74.84414672851562, 78.3021011352539, 81.76005554199219, 85.21800994873047, 88.67596435546875, 92.13391876220703, 95.59187316894531, 99.04983520507812, 102.50778198242188, 105.96574401855469, 109.42369079589844, 112.88164520263672, 116.339599609375, 119.79755401611328, 123.25550842285156, 126.71347045898438, 130.17141723632812, 133.62937927246094, 137.08734130859375, 140.54530334472656, 144.0032501220703, 147.46121215820312, 150.91915893554688, 154.3771209716797, 157.83506774902344, 161.29302978515625, 164.7509765625]}, "gradients/decoder.transformer.h.13.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 10.0, 4.0, 10.0, 13.0, 12.0, 14.0, 18.0, 21.0, 27.0, 20.0, 31.0, 47.0, 35.0, 33.0, 44.0, 54.0, 36.0, 38.0, 42.0, 43.0, 33.0, 46.0, 39.0, 42.0, 36.0, 36.0, 41.0, 20.0, 21.0, 24.0, 16.0, 18.0, 9.0, 13.0, 12.0, 10.0, 8.0, 7.0, 4.0, 4.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-22.700668334960938, -21.99874496459961, -21.29682159423828, -20.594900131225586, -19.892976760864258, -19.19105339050293, -18.489131927490234, -17.787208557128906, -17.085285186767578, -16.38336181640625, -15.681439399719238, -14.979516983032227, -14.277593612670898, -13.57567024230957, -12.873747825622559, -12.171825408935547, -11.469902038574219, -10.76797866821289, -10.066056251525879, -9.364133834838867, -8.662210464477539, -7.960287570953369, -7.258364677429199, -6.556441783905029, -5.854518890380859, -5.1525959968566895, -4.4506731033325195, -3.7487502098083496, -3.0468273162841797, -2.3449044227600098, -1.6429815292358398, -0.9410586357116699, -0.2391357421875, 0.4627871513366699, 1.1647100448608398, 1.8666329383850098, 2.5685558319091797, 3.2704787254333496, 3.9724016189575195, 4.6743245124816895, 5.376247406005859, 6.078170299530029, 6.780093193054199, 7.482016086578369, 8.183938980102539, 8.885862350463867, 9.587784767150879, 10.28970718383789, 10.991630554199219, 11.693553924560547, 12.395476341247559, 13.09739875793457, 13.799322128295898, 14.501245498657227, 15.203167915344238, 15.90509033203125, 16.607013702392578, 17.308937072753906, 18.010860443115234, 18.71278190612793, 19.414705276489258, 20.116628646850586, 20.81855010986328, 21.52047348022461, 22.222396850585938]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 10.0, 4.0, 8.0, 5.0, 18.0, 13.0, 5.0, 16.0, 18.0, 24.0, 21.0, 28.0, 22.0, 28.0, 27.0, 32.0, 36.0, 40.0, 33.0, 33.0, 47.0, 32.0, 43.0, 38.0, 35.0, 40.0, 42.0, 34.0, 31.0, 22.0, 28.0, 30.0, 22.0, 23.0, 15.0, 24.0, 15.0, 8.0, 11.0, 10.0, 12.0, 7.0, 4.0, 1.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.572265625, -3.463134765625, -3.35400390625, -3.244873046875, -3.1357421875, -3.026611328125, -2.91748046875, -2.808349609375, -2.69921875, -2.590087890625, -2.48095703125, -2.371826171875, -2.2626953125, -2.153564453125, -2.04443359375, -1.935302734375, -1.826171875, -1.717041015625, -1.60791015625, -1.498779296875, -1.3896484375, -1.280517578125, -1.17138671875, -1.062255859375, -0.953125, -0.843994140625, -0.73486328125, -0.625732421875, -0.5166015625, -0.407470703125, -0.29833984375, -0.189208984375, -0.080078125, 0.029052734375, 0.13818359375, 0.247314453125, 0.3564453125, 0.465576171875, 0.57470703125, 0.683837890625, 0.79296875, 0.902099609375, 1.01123046875, 1.120361328125, 1.2294921875, 1.338623046875, 1.44775390625, 1.556884765625, 1.666015625, 1.775146484375, 1.88427734375, 1.993408203125, 2.1025390625, 2.211669921875, 2.32080078125, 2.429931640625, 2.5390625, 2.648193359375, 2.75732421875, 2.866455078125, 2.9755859375, 3.084716796875, 3.19384765625, 3.302978515625, 3.412109375]}, "gradients/decoder.transformer.h.13.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 3.0, 1.0, 5.0, 6.0, 6.0, 14.0, 21.0, 29.0, 40.0, 68.0, 97.0, 121.0, 195.0, 300.0, 409.0, 603.0, 978.0, 1512.0, 2154.0, 3359.0, 5113.0, 7826.0, 11975.0, 18049.0, 28441.0, 43796.0, 71178.0, 125380.0, 300287.0, 182090.0, 90811.0, 54770.0, 34405.0, 22346.0, 14441.0, 9526.0, 6195.0, 4111.0, 2644.0, 1805.0, 1106.0, 744.0, 563.0, 322.0, 247.0, 150.0, 103.0, 70.0, 47.0, 38.0, 21.0, 12.0, 14.0, 5.0, 5.0, 6.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.09716796875, -0.09419918060302734, -0.09123039245605469, -0.08826160430908203, -0.08529281616210938, -0.08232402801513672, -0.07935523986816406, -0.0763864517211914, -0.07341766357421875, -0.0704488754272461, -0.06748008728027344, -0.06451129913330078, -0.061542510986328125, -0.05857372283935547, -0.05560493469238281, -0.052636146545410156, -0.0496673583984375, -0.046698570251464844, -0.04372978210449219, -0.04076099395751953, -0.037792205810546875, -0.03482341766357422, -0.03185462951660156, -0.028885841369628906, -0.02591705322265625, -0.022948265075683594, -0.019979476928710938, -0.01701068878173828, -0.014041900634765625, -0.011073112487792969, -0.008104324340820312, -0.005135536193847656, -0.002166748046875, 0.0008020401000976562, 0.0037708282470703125, 0.006739616394042969, 0.009708404541015625, 0.012677192687988281, 0.015645980834960938, 0.018614768981933594, 0.02158355712890625, 0.024552345275878906, 0.027521133422851562, 0.03048992156982422, 0.033458709716796875, 0.03642749786376953, 0.03939628601074219, 0.042365074157714844, 0.0453338623046875, 0.048302650451660156, 0.05127143859863281, 0.05424022674560547, 0.057209014892578125, 0.06017780303955078, 0.06314659118652344, 0.0661153793334961, 0.06908416748046875, 0.0720529556274414, 0.07502174377441406, 0.07799053192138672, 0.08095932006835938, 0.08392810821533203, 0.08689689636230469, 0.08986568450927734, 0.09283447265625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 6.0, 3.0, 5.0, 9.0, 14.0, 19.0, 16.0, 21.0, 23.0, 31.0, 19.0, 24.0, 29.0, 41.0, 33.0, 41.0, 36.0, 41.0, 56.0, 1063.0, 43.0, 40.0, 37.0, 54.0, 29.0, 40.0, 29.0, 30.0, 33.0, 22.0, 24.0, 18.0, 22.0, 19.0, 21.0, 6.0, 3.0, 6.0, 8.0, 3.0, 1.0, 3.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.59765625, -2.51556396484375, -2.4334716796875, -2.35137939453125, -2.269287109375, -2.18719482421875, -2.1051025390625, -2.02301025390625, -1.94091796875, -1.85882568359375, -1.7767333984375, -1.69464111328125, -1.612548828125, -1.53045654296875, -1.4483642578125, -1.36627197265625, -1.2841796875, -1.20208740234375, -1.1199951171875, -1.03790283203125, -0.955810546875, -0.87371826171875, -0.7916259765625, -0.70953369140625, -0.62744140625, -0.54534912109375, -0.4632568359375, -0.38116455078125, -0.299072265625, -0.21697998046875, -0.1348876953125, -0.05279541015625, 0.029296875, 0.11138916015625, 0.1934814453125, 0.27557373046875, 0.357666015625, 0.43975830078125, 0.5218505859375, 0.60394287109375, 0.68603515625, 0.76812744140625, 0.8502197265625, 0.93231201171875, 1.014404296875, 1.09649658203125, 1.1785888671875, 1.26068115234375, 1.3427734375, 1.42486572265625, 1.5069580078125, 1.58905029296875, 1.671142578125, 1.75323486328125, 1.8353271484375, 1.91741943359375, 1.99951171875, 2.08160400390625, 2.1636962890625, 2.24578857421875, 2.327880859375, 2.40997314453125, 2.4920654296875, 2.57415771484375, 2.65625]}, "gradients/decoder.transformer.h.13.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 7.0, 15.0, 21.0, 32.0, 32.0, 38.0, 62.0, 119.0, 167.0, 265.0, 417.0, 621.0, 1075.0, 1681.0, 2635.0, 4727.0, 7696.0, 13011.0, 22581.0, 39450.0, 73146.0, 153381.0, 1424375.0, 171352.0, 79277.0, 42230.0, 23827.0, 13980.0, 8297.0, 4871.0, 2858.0, 1849.0, 1131.0, 654.0, 432.0, 263.0, 177.0, 146.0, 92.0, 48.0, 31.0, 18.0, 18.0, 4.0, 6.0, 11.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11376953125, -0.11016464233398438, -0.10655975341796875, -0.10295486450195312, -0.0993499755859375, -0.09574508666992188, -0.09214019775390625, -0.08853530883789062, -0.084930419921875, -0.08132553100585938, -0.07772064208984375, -0.07411575317382812, -0.0705108642578125, -0.06690597534179688, -0.06330108642578125, -0.059696197509765625, -0.05609130859375, -0.052486419677734375, -0.04888153076171875, -0.045276641845703125, -0.0416717529296875, -0.038066864013671875, -0.03446197509765625, -0.030857086181640625, -0.027252197265625, -0.023647308349609375, -0.02004241943359375, -0.016437530517578125, -0.0128326416015625, -0.009227752685546875, -0.00562286376953125, -0.002017974853515625, 0.0015869140625, 0.005191802978515625, 0.00879669189453125, 0.012401580810546875, 0.0160064697265625, 0.019611358642578125, 0.02321624755859375, 0.026821136474609375, 0.030426025390625, 0.034030914306640625, 0.03763580322265625, 0.041240692138671875, 0.0448455810546875, 0.048450469970703125, 0.05205535888671875, 0.055660247802734375, 0.05926513671875, 0.06287002563476562, 0.06647491455078125, 0.07007980346679688, 0.0736846923828125, 0.07728958129882812, 0.08089447021484375, 0.08449935913085938, 0.088104248046875, 0.09170913696289062, 0.09531402587890625, 0.09891891479492188, 0.1025238037109375, 0.10612869262695312, 0.10973358154296875, 0.11333847045898438, 0.116943359375]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 5.0, 2.0, 4.0, 2.0, 3.0, 5.0, 7.0, 5.0, 11.0, 19.0, 11.0, 16.0, 20.0, 32.0, 29.0, 31.0, 52.0, 51.0, 51.0, 86.0, 83.0, 98.0, 63.0, 56.0, 49.0, 41.0, 38.0, 27.0, 17.0, 23.0, 24.0, 7.0, 10.0, 9.0, 6.0, 5.0, 5.0, 4.0, 4.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.9141387939453125e-06, -6.621703505516052e-06, -6.329268217086792e-06, -6.036832928657532e-06, -5.7443976402282715e-06, -5.451962351799011e-06, -5.159527063369751e-06, -4.867091774940491e-06, -4.5746564865112305e-06, -4.28222119808197e-06, -3.98978590965271e-06, -3.6973506212234497e-06, -3.4049153327941895e-06, -3.112480044364929e-06, -2.820044755935669e-06, -2.5276094675064087e-06, -2.2351741790771484e-06, -1.942738890647888e-06, -1.650303602218628e-06, -1.3578683137893677e-06, -1.0654330253601074e-06, -7.729977369308472e-07, -4.805624485015869e-07, -1.8812716007232666e-07, 1.043081283569336e-07, 3.9674341678619385e-07, 6.891787052154541e-07, 9.816139936447144e-07, 1.2740492820739746e-06, 1.5664845705032349e-06, 1.8589198589324951e-06, 2.1513551473617554e-06, 2.4437904357910156e-06, 2.736225724220276e-06, 3.028661012649536e-06, 3.3210963010787964e-06, 3.6135315895080566e-06, 3.905966877937317e-06, 4.198402166366577e-06, 4.490837454795837e-06, 4.783272743225098e-06, 5.075708031654358e-06, 5.368143320083618e-06, 5.660578608512878e-06, 5.953013896942139e-06, 6.245449185371399e-06, 6.537884473800659e-06, 6.8303197622299194e-06, 7.12275505065918e-06, 7.41519033908844e-06, 7.7076256275177e-06, 8.00006091594696e-06, 8.29249620437622e-06, 8.584931492805481e-06, 8.877366781234741e-06, 9.169802069664001e-06, 9.462237358093262e-06, 9.754672646522522e-06, 1.0047107934951782e-05, 1.0339543223381042e-05, 1.0631978511810303e-05, 1.0924413800239563e-05, 1.1216849088668823e-05, 1.1509284377098083e-05, 1.1801719665527344e-05]}, "gradients/decoder.transformer.h.13.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 7.0, 2.0, 8.0, 9.0, 10.0, 17.0, 11.0, 24.0, 32.0, 29.0, 58.0, 95.0, 114.0, 221.0, 586.0, 8464.0, 1021059.0, 16480.0, 661.0, 240.0, 128.0, 75.0, 52.0, 56.0, 23.0, 19.0, 17.0, 14.0, 11.0, 10.0, 5.0, 4.0, 3.0, 5.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00021409988403320312, -0.00020868703722953796, -0.0002032741904258728, -0.00019786134362220764, -0.00019244849681854248, -0.00018703565001487732, -0.00018162280321121216, -0.000176209956407547, -0.00017079710960388184, -0.00016538426280021667, -0.00015997141599655151, -0.00015455856919288635, -0.0001491457223892212, -0.00014373287558555603, -0.00013832002878189087, -0.0001329071819782257, -0.00012749433517456055, -0.00012208148837089539, -0.00011666864156723022, -0.00011125579476356506, -0.0001058429479598999, -0.00010043010115623474, -9.501725435256958e-05, -8.960440754890442e-05, -8.419156074523926e-05, -7.87787139415741e-05, -7.336586713790894e-05, -6.795302033424377e-05, -6.254017353057861e-05, -5.712732672691345e-05, -5.171447992324829e-05, -4.630163311958313e-05, -4.088878631591797e-05, -3.547593951225281e-05, -3.0063092708587646e-05, -2.4650245904922485e-05, -1.9237399101257324e-05, -1.3824552297592163e-05, -8.411705493927002e-06, -2.998858690261841e-06, 2.4139881134033203e-06, 7.826834917068481e-06, 1.3239681720733643e-05, 1.8652528524398804e-05, 2.4065375328063965e-05, 2.9478222131729126e-05, 3.489106893539429e-05, 4.030391573905945e-05, 4.571676254272461e-05, 5.112960934638977e-05, 5.654245615005493e-05, 6.195530295372009e-05, 6.736814975738525e-05, 7.278099656105042e-05, 7.819384336471558e-05, 8.360669016838074e-05, 8.90195369720459e-05, 9.443238377571106e-05, 9.984523057937622e-05, 0.00010525807738304138, 0.00011067092418670654, 0.0001160837709903717, 0.00012149661779403687, 0.00012690946459770203, 0.0001323223114013672]}, "gradients/decoder.transformer.h.13.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 13.0, 20.0, 41.0, 87.0, 161.0, 231.0, 209.0, 123.0, 65.0, 34.0, 18.0, 10.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.984842805948574e-06, -3.6759763588634087e-06, -3.3671099117782433e-06, -3.0582432373194024e-06, -2.749376790234237e-06, -2.4405103431490716e-06, -2.1316436686902307e-06, -1.8227772216050653e-06, -1.5139107745198999e-06, -1.2050443274347344e-06, -8.961777666627313e-07, -5.87311262734147e-07, -2.7844475880556274e-07, 3.0421688279602677e-08, 3.392882490516058e-07, 6.48154809823609e-07, 9.570212569087744e-07, 1.2658877039939398e-06, 1.574754264765943e-06, 1.883620825537946e-06, 2.1924872726231115e-06, 2.501353719708277e-06, 2.8102203941671178e-06, 3.119086841252283e-06, 3.4279532883374486e-06, 3.736819735422614e-06, 4.0456861825077794e-06, 4.35455285696662e-06, 4.663419531425461e-06, 4.972285751136951e-06, 5.281152425595792e-06, 5.590019100054633e-06, 5.898884410271421e-06, 6.207751084730262e-06, 6.516617304441752e-06, 6.825483978900593e-06, 7.134350198612083e-06, 7.443216873070924e-06, 7.752083547529764e-06, 8.060949767241254e-06, 8.369815986952744e-06, 8.678682206664234e-06, 8.987549335870426e-06, 9.296415555581916e-06, 9.605281775293406e-06, 9.914148904499598e-06, 1.0223015124211088e-05, 1.0531881343922578e-05, 1.084074847312877e-05, 1.114961469284026e-05, 1.1458481822046451e-05, 1.1767348041757941e-05, 1.2076214261469431e-05, 1.2385080481180921e-05, 1.2693947610387113e-05, 1.3002813830098603e-05, 1.3311680959304795e-05, 1.3620547179016285e-05, 1.3929414308222476e-05, 1.4238280527933966e-05, 1.4547146747645456e-05, 1.4856013876851648e-05, 1.5164880096563138e-05, 1.5473746316274628e-05, 1.5782612535986118e-05]}, "gradients/decoder.transformer.h.13.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 6.0, 1.0, 2.0, 5.0, 4.0, 7.0, 7.0, 9.0, 7.0, 10.0, 20.0, 14.0, 23.0, 23.0, 25.0, 24.0, 31.0, 30.0, 59.0, 38.0, 34.0, 46.0, 24.0, 48.0, 38.0, 33.0, 52.0, 35.0, 35.0, 47.0, 31.0, 33.0, 18.0, 25.0, 23.0, 31.0, 20.0, 13.0, 15.0, 13.0, 5.0, 9.0, 4.0, 9.0, 7.0, 5.0, 9.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-4.5299530029296875e-06, -4.400499165058136e-06, -4.2710453271865845e-06, -4.141591489315033e-06, -4.0121376514434814e-06, -3.88268381357193e-06, -3.7532299757003784e-06, -3.623776137828827e-06, -3.4943222999572754e-06, -3.364868462085724e-06, -3.2354146242141724e-06, -3.105960786342621e-06, -2.9765069484710693e-06, -2.847053110599518e-06, -2.7175992727279663e-06, -2.588145434856415e-06, -2.4586915969848633e-06, -2.3292377591133118e-06, -2.1997839212417603e-06, -2.0703300833702087e-06, -1.9408762454986572e-06, -1.8114224076271057e-06, -1.6819685697555542e-06, -1.5525147318840027e-06, -1.4230608940124512e-06, -1.2936070561408997e-06, -1.1641532182693481e-06, -1.0346993803977966e-06, -9.052455425262451e-07, -7.757917046546936e-07, -6.463378667831421e-07, -5.168840289115906e-07, -3.8743019104003906e-07, -2.5797635316848755e-07, -1.2852251529693604e-07, 9.313225746154785e-10, 1.30385160446167e-07, 2.598389983177185e-07, 3.8929283618927e-07, 5.187466740608215e-07, 6.48200511932373e-07, 7.776543498039246e-07, 9.071081876754761e-07, 1.0365620255470276e-06, 1.166015863418579e-06, 1.2954697012901306e-06, 1.4249235391616821e-06, 1.5543773770332336e-06, 1.6838312149047852e-06, 1.8132850527763367e-06, 1.942738890647888e-06, 2.0721927285194397e-06, 2.201646566390991e-06, 2.3311004042625427e-06, 2.4605542421340942e-06, 2.5900080800056458e-06, 2.7194619178771973e-06, 2.8489157557487488e-06, 2.9783695936203003e-06, 3.107823431491852e-06, 3.2372772693634033e-06, 3.366731107234955e-06, 3.4961849451065063e-06, 3.625638782978058e-06, 3.7550926208496094e-06]}, "gradients/decoder.transformer.h.13.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 10.0, 4.0, 8.0, 5.0, 18.0, 13.0, 5.0, 16.0, 18.0, 24.0, 21.0, 28.0, 22.0, 28.0, 27.0, 32.0, 36.0, 40.0, 33.0, 33.0, 47.0, 32.0, 43.0, 38.0, 35.0, 40.0, 42.0, 34.0, 31.0, 22.0, 28.0, 30.0, 22.0, 23.0, 15.0, 24.0, 15.0, 8.0, 11.0, 10.0, 12.0, 7.0, 4.0, 1.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.572265625, -3.463134765625, -3.35400390625, -3.244873046875, -3.1357421875, -3.026611328125, -2.91748046875, -2.808349609375, -2.69921875, -2.590087890625, -2.48095703125, -2.371826171875, -2.2626953125, -2.153564453125, -2.04443359375, -1.935302734375, -1.826171875, -1.717041015625, -1.60791015625, -1.498779296875, -1.3896484375, -1.280517578125, -1.17138671875, -1.062255859375, -0.953125, -0.843994140625, -0.73486328125, -0.625732421875, -0.5166015625, -0.407470703125, -0.29833984375, -0.189208984375, -0.080078125, 0.029052734375, 0.13818359375, 0.247314453125, 0.3564453125, 0.465576171875, 0.57470703125, 0.683837890625, 0.79296875, 0.902099609375, 1.01123046875, 1.120361328125, 1.2294921875, 1.338623046875, 1.44775390625, 1.556884765625, 1.666015625, 1.775146484375, 1.88427734375, 1.993408203125, 2.1025390625, 2.211669921875, 2.32080078125, 2.429931640625, 2.5390625, 2.648193359375, 2.75732421875, 2.866455078125, 2.9755859375, 3.084716796875, 3.19384765625, 3.302978515625, 3.412109375]}, "gradients/decoder.transformer.h.13.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 4.0, 6.0, 10.0, 12.0, 7.0, 14.0, 45.0, 46.0, 66.0, 77.0, 139.0, 194.0, 242.0, 374.0, 516.0, 764.0, 984.0, 1619.0, 2284.0, 3333.0, 5313.0, 8742.0, 16143.0, 35787.0, 94658.0, 306028.0, 376864.0, 109145.0, 40862.0, 17768.0, 9494.0, 5554.0, 3662.0, 2451.0, 1662.0, 1123.0, 804.0, 528.0, 375.0, 278.0, 181.0, 115.0, 84.0, 63.0, 35.0, 34.0, 19.0, 20.0, 11.0, 6.0, 10.0, 1.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-2.72265625, -2.63775634765625, -2.5528564453125, -2.46795654296875, -2.383056640625, -2.29815673828125, -2.2132568359375, -2.12835693359375, -2.04345703125, -1.95855712890625, -1.8736572265625, -1.78875732421875, -1.703857421875, -1.61895751953125, -1.5340576171875, -1.44915771484375, -1.3642578125, -1.27935791015625, -1.1944580078125, -1.10955810546875, -1.024658203125, -0.93975830078125, -0.8548583984375, -0.76995849609375, -0.68505859375, -0.60015869140625, -0.5152587890625, -0.43035888671875, -0.345458984375, -0.26055908203125, -0.1756591796875, -0.09075927734375, -0.005859375, 0.07904052734375, 0.1639404296875, 0.24884033203125, 0.333740234375, 0.41864013671875, 0.5035400390625, 0.58843994140625, 0.67333984375, 0.75823974609375, 0.8431396484375, 0.92803955078125, 1.012939453125, 1.09783935546875, 1.1827392578125, 1.26763916015625, 1.3525390625, 1.43743896484375, 1.5223388671875, 1.60723876953125, 1.692138671875, 1.77703857421875, 1.8619384765625, 1.94683837890625, 2.03173828125, 2.11663818359375, 2.2015380859375, 2.28643798828125, 2.371337890625, 2.45623779296875, 2.5411376953125, 2.62603759765625, 2.7109375]}, "gradients/decoder.transformer.h.13.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 4.0, 2.0, 6.0, 8.0, 7.0, 11.0, 15.0, 24.0, 20.0, 28.0, 17.0, 20.0, 29.0, 23.0, 41.0, 46.0, 34.0, 52.0, 65.0, 97.0, 296.0, 1616.0, 117.0, 74.0, 52.0, 48.0, 43.0, 41.0, 33.0, 25.0, 23.0, 26.0, 17.0, 16.0, 24.0, 16.0, 7.0, 7.0, 3.0, 4.0, 2.0, 8.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.765625, -10.40869140625, -10.0517578125, -9.69482421875, -9.337890625, -8.98095703125, -8.6240234375, -8.26708984375, -7.91015625, -7.55322265625, -7.1962890625, -6.83935546875, -6.482421875, -6.12548828125, -5.7685546875, -5.41162109375, -5.0546875, -4.69775390625, -4.3408203125, -3.98388671875, -3.626953125, -3.27001953125, -2.9130859375, -2.55615234375, -2.19921875, -1.84228515625, -1.4853515625, -1.12841796875, -0.771484375, -0.41455078125, -0.0576171875, 0.29931640625, 0.65625, 1.01318359375, 1.3701171875, 1.72705078125, 2.083984375, 2.44091796875, 2.7978515625, 3.15478515625, 3.51171875, 3.86865234375, 4.2255859375, 4.58251953125, 4.939453125, 5.29638671875, 5.6533203125, 6.01025390625, 6.3671875, 6.72412109375, 7.0810546875, 7.43798828125, 7.794921875, 8.15185546875, 8.5087890625, 8.86572265625, 9.22265625, 9.57958984375, 9.9365234375, 10.29345703125, 10.650390625, 11.00732421875, 11.3642578125, 11.72119140625, 12.078125]}, "gradients/decoder.transformer.h.13.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 6.0, 4.0, 5.0, 13.0, 23.0, 49.0, 63.0, 137.0, 241.0, 665.0, 3154.0, 3061972.0, 76747.0, 1647.0, 480.0, 216.0, 133.0, 77.0, 29.0, 21.0, 14.0, 6.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-58.5, -56.71875, -54.9375, -53.15625, -51.375, -49.59375, -47.8125, -46.03125, -44.25, -42.46875, -40.6875, -38.90625, -37.125, -35.34375, -33.5625, -31.78125, -30.0, -28.21875, -26.4375, -24.65625, -22.875, -21.09375, -19.3125, -17.53125, -15.75, -13.96875, -12.1875, -10.40625, -8.625, -6.84375, -5.0625, -3.28125, -1.5, 0.28125, 2.0625, 3.84375, 5.625, 7.40625, 9.1875, 10.96875, 12.75, 14.53125, 16.3125, 18.09375, 19.875, 21.65625, 23.4375, 25.21875, 27.0, 28.78125, 30.5625, 32.34375, 34.125, 35.90625, 37.6875, 39.46875, 41.25, 43.03125, 44.8125, 46.59375, 48.375, 50.15625, 51.9375, 53.71875, 55.5]}, "gradients/decoder.transformer.h.13.ln_1.weight": {"_type": "histogram", "values": [1.0, 15.0, 520.0, 467.0, 18.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.245177268981934, -8.151662826538086, -4.058148384094238, 0.035366058349609375, 4.128880500793457, 8.222393989562988, 12.315909385681152, 16.409423828125, 20.50293731689453, 24.596450805664062, 28.689966201782227, 32.78348159790039, 36.87699508666992, 40.97050857543945, 45.06402587890625, 49.15753936767578, 53.25105285644531, 57.344566345214844, 61.438079833984375, 65.5315933227539, 69.62510681152344, 73.7186279296875, 77.81214141845703, 81.90565490722656, 85.9991683959961, 90.09268188476562, 94.18619537353516, 98.27970886230469, 102.37322998046875, 106.46673583984375, 110.56025695800781, 114.65377044677734, 118.74728393554688, 122.8407974243164, 126.93431091308594, 131.02783203125, 135.121337890625, 139.21485900878906, 143.30836486816406, 147.40188598632812, 151.49539184570312, 155.5889129638672, 159.6824188232422, 163.77593994140625, 167.86944580078125, 171.9629669189453, 176.0564727783203, 180.14999389648438, 184.24351501464844, 188.3370361328125, 192.4305419921875, 196.52406311035156, 200.61756896972656, 204.71109008789062, 208.80459594726562, 212.8981170654297, 216.99163818359375, 221.0851593017578, 225.1786651611328, 229.27218627929688, 233.36569213867188, 237.45921325683594, 241.55271911621094, 245.646240234375, 249.73974609375]}, "gradients/decoder.transformer.h.13.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 2.0, 2.0, 5.0, 3.0, 9.0, 11.0, 17.0, 12.0, 9.0, 13.0, 20.0, 28.0, 29.0, 34.0, 36.0, 24.0, 32.0, 32.0, 40.0, 26.0, 31.0, 44.0, 37.0, 41.0, 38.0, 35.0, 45.0, 36.0, 41.0, 35.0, 30.0, 25.0, 23.0, 21.0, 18.0, 17.0, 15.0, 16.0, 14.0, 19.0, 3.0, 8.0, 4.0, 5.0, 10.0, 5.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-27.05142593383789, -26.120784759521484, -25.190141677856445, -24.25950050354004, -23.328859329223633, -22.398216247558594, -21.467575073242188, -20.53693389892578, -19.606290817260742, -18.675649642944336, -17.745006561279297, -16.81436538696289, -15.883723258972168, -14.953081130981445, -14.022439956665039, -13.091797828674316, -12.16115665435791, -11.230514526367188, -10.299873352050781, -9.369231224060059, -8.438589096069336, -7.5079474449157715, -6.577305793762207, -5.646663665771484, -4.71602201461792, -3.7853801250457764, -2.854738235473633, -1.9240965843200684, -0.9934546947479248, -0.06281280517578125, 0.8678288459777832, 1.7984709739685059, 2.7291126251220703, 3.659754514694214, 4.590396404266357, 5.521038055419922, 6.4516801834106445, 7.382321834564209, 8.312963485717773, 9.243605613708496, 10.174247741699219, 11.104889869689941, 12.035531044006348, 12.96617317199707, 13.896815299987793, 14.827457427978516, 15.758098602294922, 16.688739776611328, 17.619380950927734, 18.55002212524414, 19.48066520690918, 20.411306381225586, 21.341947555541992, 22.27259063720703, 23.203231811523438, 24.133872985839844, 25.064516067504883, 25.99515724182129, 26.925800323486328, 27.856441497802734, 28.78708267211914, 29.71772575378418, 30.648366928100586, 31.579010009765625, 32.50965118408203]}, "gradients/decoder.transformer.h.12.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 6.0, 4.0, 5.0, 8.0, 11.0, 15.0, 10.0, 16.0, 14.0, 22.0, 22.0, 20.0, 18.0, 32.0, 25.0, 33.0, 30.0, 39.0, 31.0, 46.0, 33.0, 49.0, 27.0, 44.0, 36.0, 36.0, 49.0, 40.0, 25.0, 29.0, 23.0, 28.0, 23.0, 19.0, 31.0, 18.0, 14.0, 7.0, 12.0, 13.0, 11.0, 13.0, 4.0, 5.0, 5.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.744140625, -3.63189697265625, -3.5196533203125, -3.40740966796875, -3.295166015625, -3.18292236328125, -3.0706787109375, -2.95843505859375, -2.84619140625, -2.73394775390625, -2.6217041015625, -2.50946044921875, -2.397216796875, -2.28497314453125, -2.1727294921875, -2.06048583984375, -1.9482421875, -1.83599853515625, -1.7237548828125, -1.61151123046875, -1.499267578125, -1.38702392578125, -1.2747802734375, -1.16253662109375, -1.05029296875, -0.93804931640625, -0.8258056640625, -0.71356201171875, -0.601318359375, -0.48907470703125, -0.3768310546875, -0.26458740234375, -0.15234375, -0.04010009765625, 0.0721435546875, 0.18438720703125, 0.296630859375, 0.40887451171875, 0.5211181640625, 0.63336181640625, 0.74560546875, 0.85784912109375, 0.9700927734375, 1.08233642578125, 1.194580078125, 1.30682373046875, 1.4190673828125, 1.53131103515625, 1.6435546875, 1.75579833984375, 1.8680419921875, 1.98028564453125, 2.092529296875, 2.20477294921875, 2.3170166015625, 2.42926025390625, 2.54150390625, 2.65374755859375, 2.7659912109375, 2.87823486328125, 2.990478515625, 3.10272216796875, 3.2149658203125, 3.32720947265625, 3.439453125]}, "gradients/decoder.transformer.h.12.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 11.0, 5.0, 7.0, 7.0, 10.0, 15.0, 14.0, 18.0, 21.0, 33.0, 32.0, 80.0, 123.0, 196.0, 437.0, 875.0, 2063.0, 5160.0, 16057.0, 69521.0, 549661.0, 2557003.0, 862021.0, 99378.0, 20763.0, 6394.0, 2284.0, 998.0, 497.0, 234.0, 120.0, 59.0, 42.0, 36.0, 21.0, 20.0, 9.0, 16.0, 7.0, 9.0, 9.0, 6.0, 3.0, 6.0, 1.0, 4.0, 3.0, 2.0, 0.0, 1.0], "bins": [-8.1875, -7.95751953125, -7.7275390625, -7.49755859375, -7.267578125, -7.03759765625, -6.8076171875, -6.57763671875, -6.34765625, -6.11767578125, -5.8876953125, -5.65771484375, -5.427734375, -5.19775390625, -4.9677734375, -4.73779296875, -4.5078125, -4.27783203125, -4.0478515625, -3.81787109375, -3.587890625, -3.35791015625, -3.1279296875, -2.89794921875, -2.66796875, -2.43798828125, -2.2080078125, -1.97802734375, -1.748046875, -1.51806640625, -1.2880859375, -1.05810546875, -0.828125, -0.59814453125, -0.3681640625, -0.13818359375, 0.091796875, 0.32177734375, 0.5517578125, 0.78173828125, 1.01171875, 1.24169921875, 1.4716796875, 1.70166015625, 1.931640625, 2.16162109375, 2.3916015625, 2.62158203125, 2.8515625, 3.08154296875, 3.3115234375, 3.54150390625, 3.771484375, 4.00146484375, 4.2314453125, 4.46142578125, 4.69140625, 4.92138671875, 5.1513671875, 5.38134765625, 5.611328125, 5.84130859375, 6.0712890625, 6.30126953125, 6.53125]}, "gradients/decoder.transformer.h.12.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 8.0, 10.0, 28.0, 29.0, 47.0, 77.0, 141.0, 248.0, 452.0, 795.0, 827.0, 593.0, 325.0, 187.0, 120.0, 62.0, 52.0, 20.0, 22.0, 10.0, 7.0, 3.0, 4.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.859375, -11.5120849609375, -11.164794921875, -10.8175048828125, -10.47021484375, -10.1229248046875, -9.775634765625, -9.4283447265625, -9.0810546875, -8.7337646484375, -8.386474609375, -8.0391845703125, -7.69189453125, -7.3446044921875, -6.997314453125, -6.6500244140625, -6.302734375, -5.9554443359375, -5.608154296875, -5.2608642578125, -4.91357421875, -4.5662841796875, -4.218994140625, -3.8717041015625, -3.5244140625, -3.1771240234375, -2.829833984375, -2.4825439453125, -2.13525390625, -1.7879638671875, -1.440673828125, -1.0933837890625, -0.74609375, -0.3988037109375, -0.051513671875, 0.2957763671875, 0.64306640625, 0.9903564453125, 1.337646484375, 1.6849365234375, 2.0322265625, 2.3795166015625, 2.726806640625, 3.0740966796875, 3.42138671875, 3.7686767578125, 4.115966796875, 4.4632568359375, 4.810546875, 5.1578369140625, 5.505126953125, 5.8524169921875, 6.19970703125, 6.5469970703125, 6.894287109375, 7.2415771484375, 7.5888671875, 7.9361572265625, 8.283447265625, 8.6307373046875, 8.97802734375, 9.3253173828125, 9.672607421875, 10.0198974609375, 10.3671875]}, "gradients/decoder.transformer.h.12.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 4.0, 5.0, 9.0, 16.0, 12.0, 24.0, 37.0, 70.0, 89.0, 183.0, 344.0, 676.0, 1920.0, 15334.0, 1255140.0, 2890418.0, 26131.0, 2326.0, 777.0, 340.0, 169.0, 97.0, 60.0, 43.0, 27.0, 10.0, 11.0, 6.0, 3.0, 6.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-24.90625, -24.019287109375, -23.13232421875, -22.245361328125, -21.3583984375, -20.471435546875, -19.58447265625, -18.697509765625, -17.810546875, -16.923583984375, -16.03662109375, -15.149658203125, -14.2626953125, -13.375732421875, -12.48876953125, -11.601806640625, -10.71484375, -9.827880859375, -8.94091796875, -8.053955078125, -7.1669921875, -6.280029296875, -5.39306640625, -4.506103515625, -3.619140625, -2.732177734375, -1.84521484375, -0.958251953125, -0.0712890625, 0.815673828125, 1.70263671875, 2.589599609375, 3.4765625, 4.363525390625, 5.25048828125, 6.137451171875, 7.0244140625, 7.911376953125, 8.79833984375, 9.685302734375, 10.572265625, 11.459228515625, 12.34619140625, 13.233154296875, 14.1201171875, 15.007080078125, 15.89404296875, 16.781005859375, 17.66796875, 18.554931640625, 19.44189453125, 20.328857421875, 21.2158203125, 22.102783203125, 22.98974609375, 23.876708984375, 24.763671875, 25.650634765625, 26.53759765625, 27.424560546875, 28.3115234375, 29.198486328125, 30.08544921875, 30.972412109375, 31.859375]}, "gradients/decoder.transformer.h.12.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 6.0, 16.0, 38.0, 76.0, 111.0, 167.0, 188.0, 180.0, 106.0, 63.0, 25.0, 24.0, 7.0, 4.0, 1.0, 1.0, 1.0], "bins": [-99.61128997802734, -97.74861907958984, -95.88595581054688, -94.02328491210938, -92.16061401367188, -90.29794311523438, -88.4352798461914, -86.5726089477539, -84.70994567871094, -82.84727478027344, -80.98461151123047, -79.12194061279297, -77.25926971435547, -75.3966064453125, -73.533935546875, -71.6712646484375, -69.80859375, -67.9459228515625, -66.08325958251953, -64.22058868408203, -62.35791778564453, -60.4952507019043, -58.63258361816406, -56.76991271972656, -54.90724563598633, -53.044578552246094, -51.181907653808594, -49.31924057006836, -47.456573486328125, -45.593902587890625, -43.73123550415039, -41.868568420410156, -40.005897521972656, -38.14323043823242, -36.28055953979492, -34.41789245605469, -32.55522155761719, -30.692554473876953, -28.82988739013672, -26.96721839904785, -25.104549407958984, -23.241880416870117, -21.37921142578125, -19.516544342041016, -17.65387535095215, -15.791206359863281, -13.92853832244873, -12.06587028503418, -10.203201293945312, -8.340532302856445, -6.4778642654418945, -4.6151957511901855, -2.7525272369384766, -0.8898582458496094, 0.9728097915649414, 2.835477828979492, 4.698146820068359, 6.560815334320068, 8.423483848571777, 10.286151885986328, 12.148820877075195, 14.011489868164062, 15.874157905578613, 17.736825942993164, 19.59949493408203]}, "gradients/decoder.transformer.h.12.ln_2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 3.0, 3.0, 11.0, 7.0, 8.0, 6.0, 10.0, 11.0, 13.0, 17.0, 21.0, 14.0, 22.0, 28.0, 29.0, 18.0, 20.0, 23.0, 26.0, 36.0, 26.0, 44.0, 40.0, 31.0, 42.0, 46.0, 29.0, 46.0, 27.0, 41.0, 27.0, 38.0, 34.0, 30.0, 25.0, 13.0, 21.0, 19.0, 16.0, 10.0, 9.0, 21.0, 12.0, 8.0, 2.0, 9.0, 11.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-22.78398895263672, -22.123882293701172, -21.463777542114258, -20.80367088317871, -20.143566131591797, -19.48345947265625, -18.823352813720703, -18.163246154785156, -17.503141403198242, -16.843034744262695, -16.18292999267578, -15.522823333740234, -14.862717628479004, -14.202611923217773, -13.542505264282227, -12.882399559020996, -12.222293853759766, -11.562188148498535, -10.902082443237305, -10.241975784301758, -9.581870079040527, -8.921764373779297, -8.26165771484375, -7.6015520095825195, -6.941446304321289, -6.281340599060059, -5.62123441696167, -4.961128234863281, -4.301022529602051, -3.640916585922241, -2.9808106422424316, -2.320704460144043, -1.6605987548828125, -1.000492811203003, -0.34038686752319336, 0.3197190761566162, 0.9798250198364258, 1.6399309635162354, 2.300036907196045, 2.9601430892944336, 3.620248794555664, 4.2803544998168945, 4.940460681915283, 5.600566864013672, 6.260672569274902, 6.920778274536133, 7.5808844566345215, 8.24099063873291, 8.90109634399414, 9.561202049255371, 10.221307754516602, 10.881414413452148, 11.541520118713379, 12.20162582397461, 12.861732482910156, 13.521838188171387, 14.181943893432617, 14.842049598693848, 15.502155303955078, 16.162261962890625, 16.822368621826172, 17.482473373413086, 18.142580032348633, 18.802684783935547, 19.462791442871094]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 5.0, 2.0, 7.0, 8.0, 12.0, 5.0, 13.0, 16.0, 15.0, 19.0, 19.0, 18.0, 17.0, 26.0, 29.0, 35.0, 39.0, 30.0, 33.0, 40.0, 40.0, 38.0, 40.0, 37.0, 36.0, 38.0, 48.0, 35.0, 39.0, 29.0, 28.0, 25.0, 31.0, 21.0, 13.0, 19.0, 19.0, 17.0, 16.0, 9.0, 9.0, 8.0, 7.0, 6.0, 2.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.845703125, -3.731536865234375, -3.61737060546875, -3.503204345703125, -3.3890380859375, -3.274871826171875, -3.16070556640625, -3.046539306640625, -2.932373046875, -2.818206787109375, -2.70404052734375, -2.589874267578125, -2.4757080078125, -2.361541748046875, -2.24737548828125, -2.133209228515625, -2.01904296875, -1.904876708984375, -1.79071044921875, -1.676544189453125, -1.5623779296875, -1.448211669921875, -1.33404541015625, -1.219879150390625, -1.105712890625, -0.991546630859375, -0.87738037109375, -0.763214111328125, -0.6490478515625, -0.534881591796875, -0.42071533203125, -0.306549072265625, -0.1923828125, -0.078216552734375, 0.03594970703125, 0.150115966796875, 0.2642822265625, 0.378448486328125, 0.49261474609375, 0.606781005859375, 0.720947265625, 0.835113525390625, 0.94927978515625, 1.063446044921875, 1.1776123046875, 1.291778564453125, 1.40594482421875, 1.520111083984375, 1.63427734375, 1.748443603515625, 1.86260986328125, 1.976776123046875, 2.0909423828125, 2.205108642578125, 2.31927490234375, 2.433441162109375, 2.547607421875, 2.661773681640625, 2.77593994140625, 2.890106201171875, 3.0042724609375, 3.118438720703125, 3.23260498046875, 3.346771240234375, 3.4609375]}, "gradients/decoder.transformer.h.12.crossattention.c_proj.weight": {"_type": "histogram", "values": [5.0, 3.0, 2.0, 7.0, 10.0, 12.0, 21.0, 23.0, 37.0, 52.0, 92.0, 116.0, 188.0, 237.0, 365.0, 489.0, 759.0, 1068.0, 1462.0, 2152.0, 2992.0, 4309.0, 6103.0, 8674.0, 12486.0, 18273.0, 27105.0, 42066.0, 68336.0, 119725.0, 267041.0, 197983.0, 97140.0, 57204.0, 35882.0, 23405.0, 15874.0, 10996.0, 7778.0, 5335.0, 3826.0, 2643.0, 1954.0, 1307.0, 917.0, 646.0, 429.0, 325.0, 235.0, 146.0, 106.0, 79.0, 46.0, 38.0, 22.0, 17.0, 8.0, 10.0, 7.0, 1.0, 1.0, 4.0, 0.0, 2.0], "bins": [-0.0848388671875, -0.08208656311035156, -0.07933425903320312, -0.07658195495605469, -0.07382965087890625, -0.07107734680175781, -0.06832504272460938, -0.06557273864746094, -0.0628204345703125, -0.06006813049316406, -0.057315826416015625, -0.05456352233886719, -0.05181121826171875, -0.04905891418457031, -0.046306610107421875, -0.04355430603027344, -0.040802001953125, -0.03804969787597656, -0.035297393798828125, -0.03254508972167969, -0.02979278564453125, -0.027040481567382812, -0.024288177490234375, -0.021535873413085938, -0.0187835693359375, -0.016031265258789062, -0.013278961181640625, -0.010526657104492188, -0.00777435302734375, -0.0050220489501953125, -0.002269744873046875, 0.0004825592041015625, 0.00323486328125, 0.0059871673583984375, 0.008739471435546875, 0.011491775512695312, 0.01424407958984375, 0.016996383666992188, 0.019748687744140625, 0.022500991821289062, 0.0252532958984375, 0.028005599975585938, 0.030757904052734375, 0.03351020812988281, 0.03626251220703125, 0.03901481628417969, 0.041767120361328125, 0.04451942443847656, 0.047271728515625, 0.05002403259277344, 0.052776336669921875, 0.05552864074707031, 0.05828094482421875, 0.06103324890136719, 0.06378555297851562, 0.06653785705566406, 0.0692901611328125, 0.07204246520996094, 0.07479476928710938, 0.07754707336425781, 0.08029937744140625, 0.08305168151855469, 0.08580398559570312, 0.08855628967285156, 0.09130859375]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 6.0, 4.0, 2.0, 9.0, 6.0, 7.0, 6.0, 17.0, 15.0, 15.0, 15.0, 22.0, 23.0, 23.0, 27.0, 32.0, 52.0, 25.0, 31.0, 45.0, 41.0, 31.0, 1072.0, 44.0, 37.0, 40.0, 36.0, 31.0, 31.0, 26.0, 34.0, 28.0, 37.0, 28.0, 20.0, 22.0, 11.0, 13.0, 17.0, 7.0, 7.0, 10.0, 9.0, 2.0, 6.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.462890625, -2.382904052734375, -2.30291748046875, -2.222930908203125, -2.1429443359375, -2.062957763671875, -1.98297119140625, -1.902984619140625, -1.822998046875, -1.743011474609375, -1.66302490234375, -1.583038330078125, -1.5030517578125, -1.423065185546875, -1.34307861328125, -1.263092041015625, -1.18310546875, -1.103118896484375, -1.02313232421875, -0.943145751953125, -0.8631591796875, -0.783172607421875, -0.70318603515625, -0.623199462890625, -0.543212890625, -0.463226318359375, -0.38323974609375, -0.303253173828125, -0.2232666015625, -0.143280029296875, -0.06329345703125, 0.016693115234375, 0.0966796875, 0.176666259765625, 0.25665283203125, 0.336639404296875, 0.4166259765625, 0.496612548828125, 0.57659912109375, 0.656585693359375, 0.736572265625, 0.816558837890625, 0.89654541015625, 0.976531982421875, 1.0565185546875, 1.136505126953125, 1.21649169921875, 1.296478271484375, 1.37646484375, 1.456451416015625, 1.53643798828125, 1.616424560546875, 1.6964111328125, 1.776397705078125, 1.85638427734375, 1.936370849609375, 2.016357421875, 2.096343994140625, 2.17633056640625, 2.256317138671875, 2.3363037109375, 2.416290283203125, 2.49627685546875, 2.576263427734375, 2.65625]}, "gradients/decoder.transformer.h.12.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 4.0, 1.0, 6.0, 11.0, 10.0, 16.0, 17.0, 29.0, 59.0, 67.0, 111.0, 189.0, 254.0, 370.0, 564.0, 829.0, 1309.0, 2073.0, 3180.0, 4994.0, 8006.0, 13164.0, 22027.0, 37523.0, 66859.0, 130653.0, 1389047.0, 201935.0, 91577.0, 49530.0, 28189.0, 16736.0, 10092.0, 6444.0, 3961.0, 2528.0, 1598.0, 1082.0, 692.0, 501.0, 278.0, 210.0, 124.0, 87.0, 61.0, 52.0, 36.0, 14.0, 19.0, 12.0, 5.0, 1.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1085205078125, -0.10499095916748047, -0.10146141052246094, -0.0979318618774414, -0.09440231323242188, -0.09087276458740234, -0.08734321594238281, -0.08381366729736328, -0.08028411865234375, -0.07675457000732422, -0.07322502136230469, -0.06969547271728516, -0.06616592407226562, -0.0626363754272461, -0.05910682678222656, -0.05557727813720703, -0.0520477294921875, -0.04851818084716797, -0.04498863220214844, -0.041459083557128906, -0.037929534912109375, -0.034399986267089844, -0.030870437622070312, -0.02734088897705078, -0.02381134033203125, -0.02028179168701172, -0.016752243041992188, -0.013222694396972656, -0.009693145751953125, -0.006163597106933594, -0.0026340484619140625, 0.0008955001831054688, 0.004425048828125, 0.007954597473144531, 0.011484146118164062, 0.015013694763183594, 0.018543243408203125, 0.022072792053222656, 0.025602340698242188, 0.02913188934326172, 0.03266143798828125, 0.03619098663330078, 0.03972053527832031, 0.043250083923339844, 0.046779632568359375, 0.050309181213378906, 0.05383872985839844, 0.05736827850341797, 0.0608978271484375, 0.06442737579345703, 0.06795692443847656, 0.0714864730834961, 0.07501602172851562, 0.07854557037353516, 0.08207511901855469, 0.08560466766357422, 0.08913421630859375, 0.09266376495361328, 0.09619331359863281, 0.09972286224365234, 0.10325241088867188, 0.1067819595336914, 0.11031150817871094, 0.11384105682373047, 0.11737060546875]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 4.0, 1.0, 3.0, 4.0, 6.0, 5.0, 5.0, 8.0, 10.0, 12.0, 14.0, 7.0, 24.0, 15.0, 20.0, 29.0, 25.0, 38.0, 35.0, 43.0, 36.0, 44.0, 36.0, 60.0, 52.0, 51.0, 61.0, 40.0, 60.0, 23.0, 33.0, 31.0, 32.0, 13.0, 34.0, 19.0, 21.0, 10.0, 4.0, 8.0, 6.0, 3.0, 9.0, 2.0, 3.0, 4.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-6.4373016357421875e-06, -6.210058927536011e-06, -5.982816219329834e-06, -5.755573511123657e-06, -5.5283308029174805e-06, -5.301088094711304e-06, -5.073845386505127e-06, -4.84660267829895e-06, -4.6193599700927734e-06, -4.392117261886597e-06, -4.16487455368042e-06, -3.937631845474243e-06, -3.7103891372680664e-06, -3.4831464290618896e-06, -3.255903720855713e-06, -3.028661012649536e-06, -2.8014183044433594e-06, -2.5741755962371826e-06, -2.346932888031006e-06, -2.119690179824829e-06, -1.8924474716186523e-06, -1.6652047634124756e-06, -1.4379620552062988e-06, -1.210719347000122e-06, -9.834766387939453e-07, -7.562339305877686e-07, -5.289912223815918e-07, -3.0174851417541504e-07, -7.450580596923828e-08, 1.5273690223693848e-07, 3.7997961044311523e-07, 6.07222318649292e-07, 8.344650268554688e-07, 1.0617077350616455e-06, 1.2889504432678223e-06, 1.516193151473999e-06, 1.7434358596801758e-06, 1.9706785678863525e-06, 2.1979212760925293e-06, 2.425163984298706e-06, 2.652406692504883e-06, 2.8796494007110596e-06, 3.1068921089172363e-06, 3.334134817123413e-06, 3.56137752532959e-06, 3.7886202335357666e-06, 4.015862941741943e-06, 4.24310564994812e-06, 4.470348358154297e-06, 4.697591066360474e-06, 4.92483377456665e-06, 5.152076482772827e-06, 5.379319190979004e-06, 5.606561899185181e-06, 5.833804607391357e-06, 6.061047315597534e-06, 6.288290023803711e-06, 6.515532732009888e-06, 6.7427754402160645e-06, 6.970018148422241e-06, 7.197260856628418e-06, 7.424503564834595e-06, 7.651746273040771e-06, 7.878988981246948e-06, 8.106231689453125e-06]}, "gradients/decoder.transformer.h.12.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 3.0, 2.0, 6.0, 5.0, 8.0, 6.0, 5.0, 8.0, 7.0, 29.0, 15.0, 27.0, 29.0, 47.0, 44.0, 58.0, 83.0, 105.0, 223.0, 350.0, 1147.0, 34226.0, 985423.0, 24645.0, 995.0, 351.0, 179.0, 112.0, 92.0, 66.0, 54.0, 42.0, 36.0, 25.0, 18.0, 16.0, 16.0, 12.0, 6.0, 8.0, 4.0, 8.0, 4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0], "bins": [-0.00013709068298339844, -0.00013311300426721573, -0.00012913532555103302, -0.0001251576468348503, -0.0001211799681186676, -0.0001172022894024849, -0.00011322461068630219, -0.00010924693197011948, -0.00010526925325393677, -0.00010129157453775406, -9.731389582157135e-05, -9.333621710538864e-05, -8.935853838920593e-05, -8.538085967302322e-05, -8.140318095684052e-05, -7.74255022406578e-05, -7.34478235244751e-05, -6.947014480829239e-05, -6.549246609210968e-05, -6.151478737592697e-05, -5.753710865974426e-05, -5.3559429943561554e-05, -4.9581751227378845e-05, -4.5604072511196136e-05, -4.162639379501343e-05, -3.764871507883072e-05, -3.367103636264801e-05, -2.96933576464653e-05, -2.5715678930282593e-05, -2.1738000214099884e-05, -1.7760321497917175e-05, -1.3782642781734467e-05, -9.804964065551758e-06, -5.827285349369049e-06, -1.8496066331863403e-06, 2.1280720829963684e-06, 6.105750799179077e-06, 1.0083429515361786e-05, 1.4061108231544495e-05, 1.8038786947727203e-05, 2.2016465663909912e-05, 2.599414438009262e-05, 2.997182309627533e-05, 3.394950181245804e-05, 3.792718052864075e-05, 4.1904859244823456e-05, 4.5882537961006165e-05, 4.986021667718887e-05, 5.383789539337158e-05, 5.781557410955429e-05, 6.1793252825737e-05, 6.577093154191971e-05, 6.974861025810242e-05, 7.372628897428513e-05, 7.770396769046783e-05, 8.168164640665054e-05, 8.565932512283325e-05, 8.963700383901596e-05, 9.361468255519867e-05, 9.759236127138138e-05, 0.00010157003998756409, 0.0001055477187037468, 0.0001095253974199295, 0.00011350307613611221, 0.00011748075485229492]}, "gradients/decoder.transformer.h.12.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 10.0, 22.0, 40.0, 64.0, 108.0, 136.0, 171.0, 154.0, 106.0, 77.0, 59.0, 31.0, 15.0, 7.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.308371666818857e-06, -6.072285941627342e-06, -5.836200216435827e-06, -5.600114945991663e-06, -5.364029220800148e-06, -5.127943495608633e-06, -4.891858225164469e-06, -4.655772499972954e-06, -4.4196867747814395e-06, -4.1836010495899245e-06, -3.9475153243984096e-06, -3.7114300539542455e-06, -3.4753443287627306e-06, -3.2392586035712156e-06, -3.003173105753376e-06, -2.7670876079355367e-06, -2.5310018827440217e-06, -2.2949161575525068e-06, -2.0588306597346673e-06, -1.82274504822999e-06, -1.5866594367253128e-06, -1.3505738252206356e-06, -1.1144882137159584e-06, -8.784026022112812e-07, -6.42316990706604e-07, -4.0623137920192676e-07, -1.7014576769724954e-07, 6.593984380742768e-08, 3.020254553121049e-07, 5.381110668167821e-07, 7.741966783214593e-07, 1.0102822898261365e-06, 1.2463679013308138e-06, 1.482453512835491e-06, 1.7185391243401682e-06, 1.9546246221580077e-06, 2.1907103473495226e-06, 2.4267960725410376e-06, 2.662881570358877e-06, 2.8989670681767166e-06, 3.1350527933682315e-06, 3.3711385185597464e-06, 3.607224016377586e-06, 3.8433095141954254e-06, 4.07939523938694e-06, 4.315480964578455e-06, 4.551566235022619e-06, 4.787651960214134e-06, 5.023737685405649e-06, 5.259823410597164e-06, 5.495909135788679e-06, 5.731994406232843e-06, 5.968080131424358e-06, 6.204165856615873e-06, 6.440251127060037e-06, 6.676336852251552e-06, 6.912422577443067e-06, 7.148508302634582e-06, 7.384594027826097e-06, 7.620679298270261e-06, 7.856764568714425e-06, 8.09285029390594e-06, 8.328936019097455e-06, 8.56502174428897e-06, 8.801107469480485e-06]}, "gradients/decoder.transformer.h.12.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 8.0, 5.0, 6.0, 2.0, 7.0, 11.0, 8.0, 9.0, 11.0, 17.0, 14.0, 20.0, 20.0, 21.0, 17.0, 13.0, 25.0, 27.0, 25.0, 37.0, 31.0, 25.0, 44.0, 31.0, 34.0, 40.0, 48.0, 38.0, 43.0, 40.0, 22.0, 28.0, 45.0, 33.0, 25.0, 18.0, 26.0, 21.0, 16.0, 9.0, 13.0, 11.0, 9.0, 11.0, 10.0, 10.0, 9.0, 7.0, 4.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.814697265625e-06, -3.6908313632011414e-06, -3.5669654607772827e-06, -3.443099558353424e-06, -3.3192336559295654e-06, -3.1953677535057068e-06, -3.071501851081848e-06, -2.9476359486579895e-06, -2.823770046234131e-06, -2.6999041438102722e-06, -2.5760382413864136e-06, -2.452172338962555e-06, -2.3283064365386963e-06, -2.2044405341148376e-06, -2.080574631690979e-06, -1.9567087292671204e-06, -1.8328428268432617e-06, -1.708976924419403e-06, -1.5851110219955444e-06, -1.4612451195716858e-06, -1.3373792171478271e-06, -1.2135133147239685e-06, -1.0896474123001099e-06, -9.657815098762512e-07, -8.419156074523926e-07, -7.180497050285339e-07, -5.941838026046753e-07, -4.7031790018081665e-07, -3.46451997756958e-07, -2.2258609533309937e-07, -9.872019290924072e-08, 2.514570951461792e-08, 1.4901161193847656e-07, 2.728775143623352e-07, 3.9674341678619385e-07, 5.206093192100525e-07, 6.444752216339111e-07, 7.683411240577698e-07, 8.922070264816284e-07, 1.016072928905487e-06, 1.1399388313293457e-06, 1.2638047337532043e-06, 1.387670636177063e-06, 1.5115365386009216e-06, 1.6354024410247803e-06, 1.759268343448639e-06, 1.8831342458724976e-06, 2.007000148296356e-06, 2.130866050720215e-06, 2.2547319531440735e-06, 2.378597855567932e-06, 2.5024637579917908e-06, 2.6263296604156494e-06, 2.750195562839508e-06, 2.8740614652633667e-06, 2.9979273676872253e-06, 3.121793270111084e-06, 3.2456591725349426e-06, 3.3695250749588013e-06, 3.49339097738266e-06, 3.6172568798065186e-06, 3.741122782230377e-06, 3.864988684654236e-06, 3.9888545870780945e-06, 4.112720489501953e-06]}, "gradients/decoder.transformer.h.12.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 4.0, 5.0, 2.0, 7.0, 8.0, 12.0, 5.0, 13.0, 16.0, 15.0, 19.0, 19.0, 18.0, 17.0, 26.0, 29.0, 35.0, 39.0, 30.0, 33.0, 40.0, 40.0, 38.0, 40.0, 37.0, 36.0, 38.0, 48.0, 35.0, 39.0, 29.0, 28.0, 25.0, 31.0, 21.0, 13.0, 19.0, 19.0, 17.0, 16.0, 9.0, 9.0, 8.0, 7.0, 6.0, 2.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.845703125, -3.731536865234375, -3.61737060546875, -3.503204345703125, -3.3890380859375, -3.274871826171875, -3.16070556640625, -3.046539306640625, -2.932373046875, -2.818206787109375, -2.70404052734375, -2.589874267578125, -2.4757080078125, -2.361541748046875, -2.24737548828125, -2.133209228515625, -2.01904296875, -1.904876708984375, -1.79071044921875, -1.676544189453125, -1.5623779296875, -1.448211669921875, -1.33404541015625, -1.219879150390625, -1.105712890625, -0.991546630859375, -0.87738037109375, -0.763214111328125, -0.6490478515625, -0.534881591796875, -0.42071533203125, -0.306549072265625, -0.1923828125, -0.078216552734375, 0.03594970703125, 0.150115966796875, 0.2642822265625, 0.378448486328125, 0.49261474609375, 0.606781005859375, 0.720947265625, 0.835113525390625, 0.94927978515625, 1.063446044921875, 1.1776123046875, 1.291778564453125, 1.40594482421875, 1.520111083984375, 1.63427734375, 1.748443603515625, 1.86260986328125, 1.976776123046875, 2.0909423828125, 2.205108642578125, 2.31927490234375, 2.433441162109375, 2.547607421875, 2.661773681640625, 2.77593994140625, 2.890106201171875, 3.0042724609375, 3.118438720703125, 3.23260498046875, 3.346771240234375, 3.4609375]}, "gradients/decoder.transformer.h.12.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 4.0, 5.0, 6.0, 7.0, 15.0, 18.0, 31.0, 35.0, 63.0, 74.0, 133.0, 183.0, 289.0, 510.0, 1059.0, 2233.0, 4966.0, 13350.0, 53655.0, 593446.0, 320768.0, 38710.0, 10778.0, 4124.0, 1944.0, 913.0, 464.0, 281.0, 170.0, 104.0, 67.0, 49.0, 34.0, 31.0, 15.0, 11.0, 9.0, 5.0, 7.0, 1.0, 0.0, 2.0], "bins": [-7.171875, -7.000701904296875, -6.82952880859375, -6.658355712890625, -6.4871826171875, -6.316009521484375, -6.14483642578125, -5.973663330078125, -5.802490234375, -5.631317138671875, -5.46014404296875, -5.288970947265625, -5.1177978515625, -4.946624755859375, -4.77545166015625, -4.604278564453125, -4.43310546875, -4.261932373046875, -4.09075927734375, -3.919586181640625, -3.7484130859375, -3.577239990234375, -3.40606689453125, -3.234893798828125, -3.063720703125, -2.892547607421875, -2.72137451171875, -2.550201416015625, -2.3790283203125, -2.207855224609375, -2.03668212890625, -1.865509033203125, -1.6943359375, -1.523162841796875, -1.35198974609375, -1.180816650390625, -1.0096435546875, -0.838470458984375, -0.66729736328125, -0.496124267578125, -0.324951171875, -0.153778076171875, 0.01739501953125, 0.188568115234375, 0.3597412109375, 0.530914306640625, 0.70208740234375, 0.873260498046875, 1.04443359375, 1.215606689453125, 1.38677978515625, 1.557952880859375, 1.7291259765625, 1.900299072265625, 2.07147216796875, 2.242645263671875, 2.413818359375, 2.584991455078125, 2.75616455078125, 2.927337646484375, 3.0985107421875, 3.269683837890625, 3.44085693359375, 3.612030029296875, 3.783203125]}, "gradients/decoder.transformer.h.12.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 5.0, 2.0, 6.0, 4.0, 7.0, 10.0, 4.0, 4.0, 13.0, 16.0, 20.0, 26.0, 28.0, 33.0, 30.0, 46.0, 39.0, 42.0, 47.0, 74.0, 104.0, 1702.0, 272.0, 111.0, 64.0, 37.0, 37.0, 45.0, 42.0, 33.0, 28.0, 31.0, 20.0, 14.0, 17.0, 6.0, 6.0, 9.0, 3.0, 6.0, 5.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.65625, -12.23291015625, -11.8095703125, -11.38623046875, -10.962890625, -10.53955078125, -10.1162109375, -9.69287109375, -9.26953125, -8.84619140625, -8.4228515625, -7.99951171875, -7.576171875, -7.15283203125, -6.7294921875, -6.30615234375, -5.8828125, -5.45947265625, -5.0361328125, -4.61279296875, -4.189453125, -3.76611328125, -3.3427734375, -2.91943359375, -2.49609375, -2.07275390625, -1.6494140625, -1.22607421875, -0.802734375, -0.37939453125, 0.0439453125, 0.46728515625, 0.890625, 1.31396484375, 1.7373046875, 2.16064453125, 2.583984375, 3.00732421875, 3.4306640625, 3.85400390625, 4.27734375, 4.70068359375, 5.1240234375, 5.54736328125, 5.970703125, 6.39404296875, 6.8173828125, 7.24072265625, 7.6640625, 8.08740234375, 8.5107421875, 8.93408203125, 9.357421875, 9.78076171875, 10.2041015625, 10.62744140625, 11.05078125, 11.47412109375, 11.8974609375, 12.32080078125, 12.744140625, 13.16748046875, 13.5908203125, 14.01416015625, 14.4375]}, "gradients/decoder.transformer.h.12.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 6.0, 7.0, 7.0, 12.0, 12.0, 13.0, 18.0, 35.0, 59.0, 80.0, 135.0, 191.0, 305.0, 735.0, 7479.0, 3112279.0, 22136.0, 1205.0, 342.0, 225.0, 138.0, 82.0, 66.0, 41.0, 27.0, 12.0, 18.0, 11.0, 8.0, 10.0, 4.0, 4.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-42.90625, -41.63037109375, -40.3544921875, -39.07861328125, -37.802734375, -36.52685546875, -35.2509765625, -33.97509765625, -32.69921875, -31.42333984375, -30.1474609375, -28.87158203125, -27.595703125, -26.31982421875, -25.0439453125, -23.76806640625, -22.4921875, -21.21630859375, -19.9404296875, -18.66455078125, -17.388671875, -16.11279296875, -14.8369140625, -13.56103515625, -12.28515625, -11.00927734375, -9.7333984375, -8.45751953125, -7.181640625, -5.90576171875, -4.6298828125, -3.35400390625, -2.078125, -0.80224609375, 0.4736328125, 1.74951171875, 3.025390625, 4.30126953125, 5.5771484375, 6.85302734375, 8.12890625, 9.40478515625, 10.6806640625, 11.95654296875, 13.232421875, 14.50830078125, 15.7841796875, 17.06005859375, 18.3359375, 19.61181640625, 20.8876953125, 22.16357421875, 23.439453125, 24.71533203125, 25.9912109375, 27.26708984375, 28.54296875, 29.81884765625, 31.0947265625, 32.37060546875, 33.646484375, 34.92236328125, 36.1982421875, 37.47412109375, 38.75]}, "gradients/decoder.transformer.h.12.ln_1.weight": {"_type": "histogram", "values": [3.0, 1007.0, 9.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.964652061462402, -5.753775596618652, 3.4571008682250977, 12.667977333068848, 21.87885284423828, 31.08972930908203, 40.30060577392578, 49.51148223876953, 58.72235870361328, 67.93323516845703, 77.14411163330078, 86.35498809814453, 95.56586456298828, 104.77674102783203, 113.98761749267578, 123.19849395751953, 132.4093780517578, 141.62025451660156, 150.8311309814453, 160.04200744628906, 169.2528839111328, 178.46376037597656, 187.6746368408203, 196.88551330566406, 206.0963897705078, 215.30726623535156, 224.5181427001953, 233.72901916503906, 242.9398956298828, 252.15077209472656, 261.36163330078125, 270.572509765625, 279.78338623046875, 288.9942626953125, 298.20513916015625, 307.416015625, 316.62689208984375, 325.8377685546875, 335.04864501953125, 344.259521484375, 353.47039794921875, 362.6812744140625, 371.89215087890625, 381.10302734375, 390.31390380859375, 399.5247802734375, 408.73565673828125, 417.946533203125, 427.15740966796875, 436.3682861328125, 445.57916259765625, 454.7900390625, 464.00091552734375, 473.2117919921875, 482.42266845703125, 491.633544921875, 500.84442138671875, 510.0552978515625, 519.2661743164062, 528.47705078125, 537.6879272460938, 546.8988037109375, 556.1096801757812, 565.320556640625, 574.5314331054688]}, "gradients/decoder.transformer.h.12.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 4.0, 4.0, 7.0, 5.0, 6.0, 9.0, 7.0, 9.0, 16.0, 12.0, 23.0, 18.0, 18.0, 31.0, 33.0, 34.0, 40.0, 37.0, 36.0, 34.0, 37.0, 50.0, 36.0, 35.0, 26.0, 41.0, 33.0, 35.0, 35.0, 24.0, 30.0, 28.0, 33.0, 35.0, 23.0, 15.0, 18.0, 15.0, 7.0, 13.0, 12.0, 4.0, 13.0, 7.0, 6.0, 2.0, 5.0, 2.0, 6.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-31.024951934814453, -30.096824645996094, -29.168697357177734, -28.240570068359375, -27.31244468688965, -26.38431739807129, -25.45619010925293, -24.52806282043457, -23.599937438964844, -22.671810150146484, -21.743682861328125, -20.815555572509766, -19.88743019104004, -18.95930290222168, -18.03117561340332, -17.10304832458496, -16.1749210357666, -15.246793746948242, -14.3186674118042, -13.39054012298584, -12.462413787841797, -11.534286499023438, -10.606159210205078, -9.678031921386719, -8.749905586242676, -7.821778774261475, -6.893651962280273, -5.965524673461914, -5.037397861480713, -4.109271049499512, -3.1811437606811523, -2.253016948699951, -1.3248882293701172, -0.39676129817962646, 0.5313656330108643, 1.4594926834106445, 2.3876194953918457, 3.315746307373047, 4.243873596191406, 5.172000408172607, 6.100127220153809, 7.02825403213501, 7.956380844116211, 8.88450813293457, 9.81263542175293, 10.740761756896973, 11.668889045715332, 12.597015380859375, 13.525142669677734, 14.453269958496094, 15.381396293640137, 16.309524536132812, 17.23764991760254, 18.1657772064209, 19.093904495239258, 20.022031784057617, 20.950157165527344, 21.878284454345703, 22.806411743164062, 23.734539031982422, 24.66266441345215, 25.590791702270508, 26.518918991088867, 27.447046279907227, 28.375173568725586]}, "gradients/decoder.transformer.h.11.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 2.0, 5.0, 6.0, 3.0, 7.0, 7.0, 9.0, 10.0, 4.0, 14.0, 17.0, 17.0, 16.0, 19.0, 26.0, 20.0, 28.0, 29.0, 29.0, 28.0, 37.0, 34.0, 43.0, 39.0, 36.0, 36.0, 34.0, 35.0, 42.0, 46.0, 37.0, 31.0, 27.0, 26.0, 34.0, 17.0, 17.0, 20.0, 22.0, 20.0, 21.0, 12.0, 10.0, 6.0, 10.0, 9.0, 6.0, 1.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.716796875, -3.6021728515625, -3.487548828125, -3.3729248046875, -3.25830078125, -3.1436767578125, -3.029052734375, -2.9144287109375, -2.7998046875, -2.6851806640625, -2.570556640625, -2.4559326171875, -2.34130859375, -2.2266845703125, -2.112060546875, -1.9974365234375, -1.8828125, -1.7681884765625, -1.653564453125, -1.5389404296875, -1.42431640625, -1.3096923828125, -1.195068359375, -1.0804443359375, -0.9658203125, -0.8511962890625, -0.736572265625, -0.6219482421875, -0.50732421875, -0.3927001953125, -0.278076171875, -0.1634521484375, -0.048828125, 0.0657958984375, 0.180419921875, 0.2950439453125, 0.40966796875, 0.5242919921875, 0.638916015625, 0.7535400390625, 0.8681640625, 0.9827880859375, 1.097412109375, 1.2120361328125, 1.32666015625, 1.4412841796875, 1.555908203125, 1.6705322265625, 1.78515625, 1.8997802734375, 2.014404296875, 2.1290283203125, 2.24365234375, 2.3582763671875, 2.472900390625, 2.5875244140625, 2.7021484375, 2.8167724609375, 2.931396484375, 3.0460205078125, 3.16064453125, 3.2752685546875, 3.389892578125, 3.5045166015625, 3.619140625]}, "gradients/decoder.transformer.h.11.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 3.0, 6.0, 5.0, 6.0, 15.0, 11.0, 17.0, 18.0, 27.0, 25.0, 39.0, 46.0, 90.0, 121.0, 135.0, 288.0, 424.0, 777.0, 1450.0, 2931.0, 6781.0, 17713.0, 55634.0, 269213.0, 1451888.0, 1855327.0, 415819.0, 77727.0, 22288.0, 8210.0, 3575.0, 1638.0, 810.0, 427.0, 256.0, 145.0, 87.0, 87.0, 55.0, 40.0, 36.0, 30.0, 28.0, 12.0, 11.0, 10.0, 2.0, 1.0, 7.0, 2.0, 2.0, 2.0], "bins": [-6.7734375, -6.5909423828125, -6.408447265625, -6.2259521484375, -6.04345703125, -5.8609619140625, -5.678466796875, -5.4959716796875, -5.3134765625, -5.1309814453125, -4.948486328125, -4.7659912109375, -4.58349609375, -4.4010009765625, -4.218505859375, -4.0360107421875, -3.853515625, -3.6710205078125, -3.488525390625, -3.3060302734375, -3.12353515625, -2.9410400390625, -2.758544921875, -2.5760498046875, -2.3935546875, -2.2110595703125, -2.028564453125, -1.8460693359375, -1.66357421875, -1.4810791015625, -1.298583984375, -1.1160888671875, -0.93359375, -0.7510986328125, -0.568603515625, -0.3861083984375, -0.20361328125, -0.0211181640625, 0.161376953125, 0.3438720703125, 0.5263671875, 0.7088623046875, 0.891357421875, 1.0738525390625, 1.25634765625, 1.4388427734375, 1.621337890625, 1.8038330078125, 1.986328125, 2.1688232421875, 2.351318359375, 2.5338134765625, 2.71630859375, 2.8988037109375, 3.081298828125, 3.2637939453125, 3.4462890625, 3.6287841796875, 3.811279296875, 3.9937744140625, 4.17626953125, 4.3587646484375, 4.541259765625, 4.7237548828125, 4.90625]}, "gradients/decoder.transformer.h.11.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 4.0, 0.0, 2.0, 5.0, 10.0, 14.0, 14.0, 22.0, 40.0, 54.0, 88.0, 135.0, 245.0, 377.0, 648.0, 782.0, 606.0, 376.0, 215.0, 159.0, 89.0, 53.0, 45.0, 29.0, 25.0, 10.0, 9.0, 8.0, 6.0, 2.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.5078125, -6.20458984375, -5.9013671875, -5.59814453125, -5.294921875, -4.99169921875, -4.6884765625, -4.38525390625, -4.08203125, -3.77880859375, -3.4755859375, -3.17236328125, -2.869140625, -2.56591796875, -2.2626953125, -1.95947265625, -1.65625, -1.35302734375, -1.0498046875, -0.74658203125, -0.443359375, -0.14013671875, 0.1630859375, 0.46630859375, 0.76953125, 1.07275390625, 1.3759765625, 1.67919921875, 1.982421875, 2.28564453125, 2.5888671875, 2.89208984375, 3.1953125, 3.49853515625, 3.8017578125, 4.10498046875, 4.408203125, 4.71142578125, 5.0146484375, 5.31787109375, 5.62109375, 5.92431640625, 6.2275390625, 6.53076171875, 6.833984375, 7.13720703125, 7.4404296875, 7.74365234375, 8.046875, 8.35009765625, 8.6533203125, 8.95654296875, 9.259765625, 9.56298828125, 9.8662109375, 10.16943359375, 10.47265625, 10.77587890625, 11.0791015625, 11.38232421875, 11.685546875, 11.98876953125, 12.2919921875, 12.59521484375, 12.8984375]}, "gradients/decoder.transformer.h.11.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 4.0, 6.0, 4.0, 5.0, 8.0, 15.0, 20.0, 26.0, 48.0, 54.0, 86.0, 136.0, 255.0, 436.0, 1570.0, 28176.0, 3827780.0, 329201.0, 4946.0, 738.0, 312.0, 188.0, 100.0, 49.0, 40.0, 20.0, 24.0, 10.0, 15.0, 10.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-44.53125, -43.410888671875, -42.29052734375, -41.170166015625, -40.0498046875, -38.929443359375, -37.80908203125, -36.688720703125, -35.568359375, -34.447998046875, -33.32763671875, -32.207275390625, -31.0869140625, -29.966552734375, -28.84619140625, -27.725830078125, -26.60546875, -25.485107421875, -24.36474609375, -23.244384765625, -22.1240234375, -21.003662109375, -19.88330078125, -18.762939453125, -17.642578125, -16.522216796875, -15.40185546875, -14.281494140625, -13.1611328125, -12.040771484375, -10.92041015625, -9.800048828125, -8.6796875, -7.559326171875, -6.43896484375, -5.318603515625, -4.1982421875, -3.077880859375, -1.95751953125, -0.837158203125, 0.283203125, 1.403564453125, 2.52392578125, 3.644287109375, 4.7646484375, 5.885009765625, 7.00537109375, 8.125732421875, 9.24609375, 10.366455078125, 11.48681640625, 12.607177734375, 13.7275390625, 14.847900390625, 15.96826171875, 17.088623046875, 18.208984375, 19.329345703125, 20.44970703125, 21.570068359375, 22.6904296875, 23.810791015625, 24.93115234375, 26.051513671875, 27.171875]}, "gradients/decoder.transformer.h.11.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 39.0, 300.0, 515.0, 143.0, 12.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-38.40724182128906, -33.10000228881836, -27.792762756347656, -22.485523223876953, -17.17828369140625, -11.871044158935547, -6.563804626464844, -1.2565650939941406, 4.0506744384765625, 9.357913970947266, 14.665153503417969, 19.972393035888672, 25.279632568359375, 30.586872100830078, 35.89411163330078, 41.201351165771484, 46.50859069824219, 51.81583023071289, 57.123069763183594, 62.4303092956543, 67.737548828125, 73.04478454589844, 78.3520278930664, 83.65927124023438, 88.96650695800781, 94.27374267578125, 99.58098602294922, 104.88822937011719, 110.19546508789062, 115.50270080566406, 120.80994415283203, 126.1171875, 131.42440795898438, 136.7316436767578, 142.03887939453125, 147.34613037109375, 152.6533660888672, 157.96060180664062, 163.26785278320312, 168.57508850097656, 173.88232421875, 179.18955993652344, 184.49679565429688, 189.80404663085938, 195.1112823486328, 200.41851806640625, 205.72576904296875, 211.0330047607422, 216.34024047851562, 221.64747619628906, 226.9547119140625, 232.261962890625, 237.56919860839844, 242.87643432617188, 248.18368530273438, 253.4909210205078, 258.79815673828125, 264.10540771484375, 269.4126281738281, 274.7198791503906, 280.027099609375, 285.3343505859375, 290.6416015625, 295.9488220214844, 301.2560729980469]}, "gradients/decoder.transformer.h.11.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 4.0, 1.0, 5.0, 7.0, 11.0, 10.0, 11.0, 14.0, 19.0, 37.0, 29.0, 35.0, 33.0, 40.0, 49.0, 35.0, 52.0, 56.0, 43.0, 56.0, 59.0, 61.0, 45.0, 44.0, 44.0, 28.0, 30.0, 33.0, 20.0, 23.0, 17.0, 10.0, 15.0, 5.0, 8.0, 7.0, 3.0, 4.0, 3.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.917999267578125, -28.11575698852539, -27.313514709472656, -26.511274337768555, -25.70903205871582, -24.906789779663086, -24.104549407958984, -23.30230712890625, -22.500064849853516, -21.69782257080078, -20.895580291748047, -20.093339920043945, -19.29109764099121, -18.488855361938477, -17.686614990234375, -16.88437271118164, -16.082130432128906, -15.279888153076172, -14.477646827697754, -13.675405502319336, -12.873163223266602, -12.070920944213867, -11.26867961883545, -10.466438293457031, -9.664196014404297, -8.861953735351562, -8.059712409973145, -7.257470607757568, -6.455228805541992, -5.652987003326416, -4.85074520111084, -4.048503398895264, -3.2462615966796875, -2.4440197944641113, -1.6417779922485352, -0.839536190032959, -0.03729438781738281, 0.7649474143981934, 1.5671892166137695, 2.3694310188293457, 3.171672821044922, 3.973914623260498, 4.776156425476074, 5.57839822769165, 6.380640029907227, 7.182881832122803, 7.985123634338379, 8.787364959716797, 9.589607238769531, 10.391849517822266, 11.194090843200684, 11.996332168579102, 12.798574447631836, 13.60081672668457, 14.403058052062988, 15.205299377441406, 16.00754165649414, 16.809783935546875, 17.61202621459961, 18.41426658630371, 19.216508865356445, 20.01875114440918, 20.82099151611328, 21.623233795166016, 22.42547607421875]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 6.0, 10.0, 4.0, 12.0, 9.0, 8.0, 20.0, 19.0, 19.0, 14.0, 27.0, 27.0, 27.0, 32.0, 20.0, 36.0, 43.0, 38.0, 39.0, 36.0, 43.0, 39.0, 36.0, 43.0, 39.0, 41.0, 34.0, 47.0, 31.0, 20.0, 23.0, 22.0, 20.0, 16.0, 16.0, 20.0, 15.0, 11.0, 12.0, 7.0, 5.0, 4.0, 6.0, 4.0, 1.0, 0.0, 4.0, 2.0], "bins": [-4.5625, -4.4393310546875, -4.316162109375, -4.1929931640625, -4.06982421875, -3.9466552734375, -3.823486328125, -3.7003173828125, -3.5771484375, -3.4539794921875, -3.330810546875, -3.2076416015625, -3.08447265625, -2.9613037109375, -2.838134765625, -2.7149658203125, -2.591796875, -2.4686279296875, -2.345458984375, -2.2222900390625, -2.09912109375, -1.9759521484375, -1.852783203125, -1.7296142578125, -1.6064453125, -1.4832763671875, -1.360107421875, -1.2369384765625, -1.11376953125, -0.9906005859375, -0.867431640625, -0.7442626953125, -0.62109375, -0.4979248046875, -0.374755859375, -0.2515869140625, -0.12841796875, -0.0052490234375, 0.117919921875, 0.2410888671875, 0.3642578125, 0.4874267578125, 0.610595703125, 0.7337646484375, 0.85693359375, 0.9801025390625, 1.103271484375, 1.2264404296875, 1.349609375, 1.4727783203125, 1.595947265625, 1.7191162109375, 1.84228515625, 1.9654541015625, 2.088623046875, 2.2117919921875, 2.3349609375, 2.4581298828125, 2.581298828125, 2.7044677734375, 2.82763671875, 2.9508056640625, 3.073974609375, 3.1971435546875, 3.3203125]}, "gradients/decoder.transformer.h.11.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 5.0, 5.0, 11.0, 12.0, 17.0, 37.0, 47.0, 63.0, 108.0, 149.0, 244.0, 369.0, 651.0, 1023.0, 1583.0, 2548.0, 4146.0, 6535.0, 10465.0, 16812.0, 28094.0, 49606.0, 92036.0, 203532.0, 334406.0, 133314.0, 67443.0, 37577.0, 22005.0, 13229.0, 8373.0, 5217.0, 3341.0, 2061.0, 1299.0, 818.0, 465.0, 352.0, 171.0, 150.0, 90.0, 49.0, 35.0, 30.0, 19.0, 10.0, 3.0, 6.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.1300048828125, -0.12621021270751953, -0.12241554260253906, -0.1186208724975586, -0.11482620239257812, -0.11103153228759766, -0.10723686218261719, -0.10344219207763672, -0.09964752197265625, -0.09585285186767578, -0.09205818176269531, -0.08826351165771484, -0.08446884155273438, -0.0806741714477539, -0.07687950134277344, -0.07308483123779297, -0.0692901611328125, -0.06549549102783203, -0.06170082092285156, -0.057906150817871094, -0.054111480712890625, -0.050316810607910156, -0.04652214050292969, -0.04272747039794922, -0.03893280029296875, -0.03513813018798828, -0.03134346008300781, -0.027548789978027344, -0.023754119873046875, -0.019959449768066406, -0.016164779663085938, -0.012370109558105469, -0.008575439453125, -0.004780769348144531, -0.0009860992431640625, 0.0028085708618164062, 0.006603240966796875, 0.010397911071777344, 0.014192581176757812, 0.01798725128173828, 0.02178192138671875, 0.02557659149169922, 0.029371261596679688, 0.033165931701660156, 0.036960601806640625, 0.040755271911621094, 0.04454994201660156, 0.04834461212158203, 0.0521392822265625, 0.05593395233154297, 0.05972862243652344, 0.0635232925415039, 0.06731796264648438, 0.07111263275146484, 0.07490730285644531, 0.07870197296142578, 0.08249664306640625, 0.08629131317138672, 0.09008598327636719, 0.09388065338134766, 0.09767532348632812, 0.1014699935913086, 0.10526466369628906, 0.10905933380126953, 0.11285400390625]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 5.0, 3.0, 4.0, 6.0, 6.0, 6.0, 5.0, 13.0, 15.0, 14.0, 15.0, 28.0, 19.0, 29.0, 25.0, 34.0, 25.0, 38.0, 40.0, 36.0, 42.0, 38.0, 1070.0, 52.0, 37.0, 49.0, 50.0, 32.0, 34.0, 35.0, 28.0, 23.0, 36.0, 12.0, 29.0, 14.0, 14.0, 16.0, 14.0, 8.0, 9.0, 5.0, 5.0, 4.0, 3.0, 5.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.0078125, -2.916046142578125, -2.82427978515625, -2.732513427734375, -2.6407470703125, -2.548980712890625, -2.45721435546875, -2.365447998046875, -2.273681640625, -2.181915283203125, -2.09014892578125, -1.998382568359375, -1.9066162109375, -1.814849853515625, -1.72308349609375, -1.631317138671875, -1.53955078125, -1.447784423828125, -1.35601806640625, -1.264251708984375, -1.1724853515625, -1.080718994140625, -0.98895263671875, -0.897186279296875, -0.805419921875, -0.713653564453125, -0.62188720703125, -0.530120849609375, -0.4383544921875, -0.346588134765625, -0.25482177734375, -0.163055419921875, -0.0712890625, 0.020477294921875, 0.11224365234375, 0.204010009765625, 0.2957763671875, 0.387542724609375, 0.47930908203125, 0.571075439453125, 0.662841796875, 0.754608154296875, 0.84637451171875, 0.938140869140625, 1.0299072265625, 1.121673583984375, 1.21343994140625, 1.305206298828125, 1.39697265625, 1.488739013671875, 1.58050537109375, 1.672271728515625, 1.7640380859375, 1.855804443359375, 1.94757080078125, 2.039337158203125, 2.131103515625, 2.222869873046875, 2.31463623046875, 2.406402587890625, 2.4981689453125, 2.589935302734375, 2.68170166015625, 2.773468017578125, 2.865234375]}, "gradients/decoder.transformer.h.11.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 3.0, 6.0, 11.0, 11.0, 17.0, 21.0, 34.0, 44.0, 79.0, 121.0, 203.0, 286.0, 444.0, 659.0, 1070.0, 1710.0, 2689.0, 4370.0, 7030.0, 11660.0, 19497.0, 33877.0, 60942.0, 123560.0, 1393996.0, 224524.0, 93114.0, 48319.0, 27441.0, 16009.0, 9650.0, 5834.0, 3629.0, 2229.0, 1458.0, 905.0, 597.0, 383.0, 235.0, 146.0, 120.0, 66.0, 54.0, 31.0, 22.0, 12.0, 9.0, 5.0, 6.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.133056640625, -0.12900161743164062, -0.12494659423828125, -0.12089157104492188, -0.1168365478515625, -0.11278152465820312, -0.10872650146484375, -0.10467147827148438, -0.100616455078125, -0.09656143188476562, -0.09250640869140625, -0.08845138549804688, -0.0843963623046875, -0.08034133911132812, -0.07628631591796875, -0.07223129272460938, -0.06817626953125, -0.06412124633789062, -0.06006622314453125, -0.056011199951171875, -0.0519561767578125, -0.047901153564453125, -0.04384613037109375, -0.039791107177734375, -0.035736083984375, -0.031681060791015625, -0.02762603759765625, -0.023571014404296875, -0.0195159912109375, -0.015460968017578125, -0.01140594482421875, -0.007350921630859375, -0.0032958984375, 0.000759124755859375, 0.00481414794921875, 0.008869171142578125, 0.0129241943359375, 0.016979217529296875, 0.02103424072265625, 0.025089263916015625, 0.029144287109375, 0.033199310302734375, 0.03725433349609375, 0.041309356689453125, 0.0453643798828125, 0.049419403076171875, 0.05347442626953125, 0.057529449462890625, 0.06158447265625, 0.06563949584960938, 0.06969451904296875, 0.07374954223632812, 0.0778045654296875, 0.08185958862304688, 0.08591461181640625, 0.08996963500976562, 0.094024658203125, 0.09807968139648438, 0.10213470458984375, 0.10618972778320312, 0.1102447509765625, 0.11429977416992188, 0.11835479736328125, 0.12240982055664062, 0.12646484375]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 1.0, 3.0, 3.0, 5.0, 5.0, 4.0, 9.0, 8.0, 9.0, 7.0, 9.0, 13.0, 21.0, 14.0, 24.0, 17.0, 32.0, 33.0, 55.0, 35.0, 44.0, 67.0, 58.0, 77.0, 50.0, 63.0, 44.0, 54.0, 42.0, 31.0, 17.0, 27.0, 13.0, 19.0, 12.0, 16.0, 11.0, 9.0, 8.0, 7.0, 5.0, 2.0, 6.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-7.033348083496094e-06, -6.8265944719314575e-06, -6.619840860366821e-06, -6.413087248802185e-06, -6.206333637237549e-06, -5.999580025672913e-06, -5.792826414108276e-06, -5.58607280254364e-06, -5.379319190979004e-06, -5.172565579414368e-06, -4.9658119678497314e-06, -4.759058356285095e-06, -4.552304744720459e-06, -4.345551133155823e-06, -4.1387975215911865e-06, -3.93204391002655e-06, -3.725290298461914e-06, -3.518536686897278e-06, -3.3117830753326416e-06, -3.1050294637680054e-06, -2.898275852203369e-06, -2.691522240638733e-06, -2.4847686290740967e-06, -2.2780150175094604e-06, -2.0712614059448242e-06, -1.864507794380188e-06, -1.6577541828155518e-06, -1.4510005712509155e-06, -1.2442469596862793e-06, -1.037493348121643e-06, -8.307397365570068e-07, -6.239861249923706e-07, -4.172325134277344e-07, -2.1047890186309814e-07, -3.725290298461914e-09, 2.0302832126617432e-07, 4.0978193283081055e-07, 6.165355443954468e-07, 8.23289155960083e-07, 1.0300427675247192e-06, 1.2367963790893555e-06, 1.4435499906539917e-06, 1.650303602218628e-06, 1.8570572137832642e-06, 2.0638108253479004e-06, 2.2705644369125366e-06, 2.477318048477173e-06, 2.684071660041809e-06, 2.8908252716064453e-06, 3.0975788831710815e-06, 3.3043324947357178e-06, 3.511086106300354e-06, 3.7178397178649902e-06, 3.9245933294296265e-06, 4.131346940994263e-06, 4.338100552558899e-06, 4.544854164123535e-06, 4.751607775688171e-06, 4.958361387252808e-06, 5.165114998817444e-06, 5.37186861038208e-06, 5.578622221946716e-06, 5.7853758335113525e-06, 5.992129445075989e-06, 6.198883056640625e-06]}, "gradients/decoder.transformer.h.11.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 7.0, 8.0, 7.0, 9.0, 15.0, 16.0, 32.0, 19.0, 28.0, 28.0, 47.0, 71.0, 81.0, 138.0, 234.0, 437.0, 2072.0, 364600.0, 675884.0, 3562.0, 468.0, 255.0, 137.0, 86.0, 61.0, 41.0, 39.0, 32.0, 20.0, 26.0, 14.0, 7.0, 20.0, 8.0, 9.0, 9.0, 1.0, 4.0, 4.0, 2.0, 3.0, 1.0, 5.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.00012826919555664062, -0.00012439116835594177, -0.00012051314115524292, -0.00011663511395454407, -0.00011275708675384521, -0.00010887905955314636, -0.00010500103235244751, -0.00010112300515174866, -9.72449779510498e-05, -9.336695075035095e-05, -8.94889235496521e-05, -8.561089634895325e-05, -8.17328691482544e-05, -7.785484194755554e-05, -7.397681474685669e-05, -7.009878754615784e-05, -6.622076034545898e-05, -6.234273314476013e-05, -5.846470594406128e-05, -5.458667874336243e-05, -5.0708651542663574e-05, -4.683062434196472e-05, -4.295259714126587e-05, -3.9074569940567017e-05, -3.5196542739868164e-05, -3.131851553916931e-05, -2.744048833847046e-05, -2.3562461137771606e-05, -1.9684433937072754e-05, -1.58064067363739e-05, -1.1928379535675049e-05, -8.050352334976196e-06, -4.172325134277344e-06, -2.942979335784912e-07, 3.5837292671203613e-06, 7.461756467819214e-06, 1.1339783668518066e-05, 1.5217810869216919e-05, 1.909583806991577e-05, 2.2973865270614624e-05, 2.6851892471313477e-05, 3.072991967201233e-05, 3.460794687271118e-05, 3.8485974073410034e-05, 4.236400127410889e-05, 4.624202847480774e-05, 5.012005567550659e-05, 5.3998082876205444e-05, 5.78761100769043e-05, 6.175413727760315e-05, 6.5632164478302e-05, 6.951019167900085e-05, 7.338821887969971e-05, 7.726624608039856e-05, 8.114427328109741e-05, 8.502230048179626e-05, 8.890032768249512e-05, 9.277835488319397e-05, 9.665638208389282e-05, 0.00010053440928459167, 0.00010441243648529053, 0.00010829046368598938, 0.00011216849088668823, 0.00011604651808738708, 0.00011992454528808594]}, "gradients/decoder.transformer.h.11.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 31.0, 116.0, 320.0, 343.0, 171.0, 29.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.631112460629083e-06, -4.058183549204841e-06, -3.48525509252795e-06, -2.9123264084773837e-06, -2.339397724426817e-06, -1.7664690403762506e-06, -1.1935403563256841e-06, -6.20611899648793e-07, -4.768298822455108e-08, 5.252456958260154e-07, 1.098174379876582e-06, 1.6711030639271485e-06, 2.244031747977715e-06, 2.8169604320282815e-06, 3.389889116078848e-06, 3.962817572755739e-06, 4.535746484179981e-06, 5.108675395604223e-06, 5.681603852281114e-06, 6.254532308958005e-06, 6.827461220382247e-06, 7.400390131806489e-06, 7.97331813373603e-06, 8.546247045160271e-06, 9.119175956584513e-06, 9.692104868008755e-06, 1.0265033779432997e-05, 1.0837961781362537e-05, 1.141089069278678e-05, 1.1983819604211021e-05, 1.2556747606140561e-05, 1.3129676517564803e-05, 1.3702603609999642e-05, 1.4275532521423884e-05, 1.4848461432848126e-05, 1.5421390344272368e-05, 1.599431925569661e-05, 1.6567246348131448e-05, 1.714017525955569e-05, 1.7713104170979932e-05, 1.8286033082404174e-05, 1.8858961993828416e-05, 1.9431890905252658e-05, 2.00048198166769e-05, 2.0577746909111738e-05, 2.115067582053598e-05, 2.1723604731960222e-05, 2.2296533643384464e-05, 2.2869462554808706e-05, 2.3442391466232948e-05, 2.401532037765719e-05, 2.4588249289081432e-05, 2.5161178200505674e-05, 2.5734105292940512e-05, 2.6307034204364754e-05, 2.6879963115788996e-05, 2.7452892027213238e-05, 2.802582093863748e-05, 2.8598749850061722e-05, 2.9171678761485964e-05, 2.9744605853920802e-05, 3.0317534765345044e-05, 3.0890463676769286e-05, 3.146339440718293e-05, 3.203632149961777e-05]}, "gradients/decoder.transformer.h.11.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 0.0, 1.0, 3.0, 8.0, 4.0, 7.0, 3.0, 8.0, 9.0, 14.0, 14.0, 6.0, 23.0, 26.0, 18.0, 32.0, 11.0, 42.0, 39.0, 41.0, 38.0, 22.0, 36.0, 41.0, 40.0, 32.0, 20.0, 45.0, 38.0, 45.0, 41.0, 25.0, 31.0, 23.0, 29.0, 22.0, 15.0, 23.0, 26.0, 20.0, 18.0, 5.0, 11.0, 13.0, 8.0, 7.0, 3.0, 6.0, 7.0, 6.0, 1.0, 1.0, 5.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0994415283203125e-06, -2.9923394322395325e-06, -2.8852373361587524e-06, -2.7781352400779724e-06, -2.6710331439971924e-06, -2.5639310479164124e-06, -2.4568289518356323e-06, -2.3497268557548523e-06, -2.2426247596740723e-06, -2.1355226635932922e-06, -2.028420567512512e-06, -1.921318471431732e-06, -1.8142163753509521e-06, -1.7071142792701721e-06, -1.600012183189392e-06, -1.492910087108612e-06, -1.385807991027832e-06, -1.278705894947052e-06, -1.171603798866272e-06, -1.064501702785492e-06, -9.57399606704712e-07, -8.502975106239319e-07, -7.431954145431519e-07, -6.360933184623718e-07, -5.289912223815918e-07, -4.2188912630081177e-07, -3.1478703022003174e-07, -2.076849341392517e-07, -1.0058283805847168e-07, 6.51925802230835e-09, 1.1362135410308838e-07, 2.207234501838684e-07, 3.2782554626464844e-07, 4.3492764234542847e-07, 5.420297384262085e-07, 6.491318345069885e-07, 7.562339305877686e-07, 8.633360266685486e-07, 9.704381227493286e-07, 1.0775402188301086e-06, 1.1846423149108887e-06, 1.2917444109916687e-06, 1.3988465070724487e-06, 1.5059486031532288e-06, 1.6130506992340088e-06, 1.7201527953147888e-06, 1.8272548913955688e-06, 1.934356987476349e-06, 2.041459083557129e-06, 2.148561179637909e-06, 2.255663275718689e-06, 2.362765371799469e-06, 2.469867467880249e-06, 2.576969563961029e-06, 2.684071660041809e-06, 2.791173756122589e-06, 2.898275852203369e-06, 3.005377948284149e-06, 3.112480044364929e-06, 3.2195821404457092e-06, 3.3266842365264893e-06, 3.4337863326072693e-06, 3.5408884286880493e-06, 3.6479905247688293e-06, 3.7550926208496094e-06]}, "gradients/decoder.transformer.h.11.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 6.0, 10.0, 4.0, 12.0, 9.0, 8.0, 20.0, 19.0, 19.0, 14.0, 27.0, 27.0, 27.0, 32.0, 20.0, 36.0, 43.0, 38.0, 39.0, 36.0, 43.0, 39.0, 36.0, 43.0, 39.0, 41.0, 34.0, 47.0, 31.0, 20.0, 23.0, 22.0, 20.0, 16.0, 16.0, 20.0, 15.0, 11.0, 12.0, 7.0, 5.0, 4.0, 6.0, 4.0, 1.0, 0.0, 4.0, 2.0], "bins": [-4.5625, -4.4393310546875, -4.316162109375, -4.1929931640625, -4.06982421875, -3.9466552734375, -3.823486328125, -3.7003173828125, -3.5771484375, -3.4539794921875, -3.330810546875, -3.2076416015625, -3.08447265625, -2.9613037109375, -2.838134765625, -2.7149658203125, -2.591796875, -2.4686279296875, -2.345458984375, -2.2222900390625, -2.09912109375, -1.9759521484375, -1.852783203125, -1.7296142578125, -1.6064453125, -1.4832763671875, -1.360107421875, -1.2369384765625, -1.11376953125, -0.9906005859375, -0.867431640625, -0.7442626953125, -0.62109375, -0.4979248046875, -0.374755859375, -0.2515869140625, -0.12841796875, -0.0052490234375, 0.117919921875, 0.2410888671875, 0.3642578125, 0.4874267578125, 0.610595703125, 0.7337646484375, 0.85693359375, 0.9801025390625, 1.103271484375, 1.2264404296875, 1.349609375, 1.4727783203125, 1.595947265625, 1.7191162109375, 1.84228515625, 1.9654541015625, 2.088623046875, 2.2117919921875, 2.3349609375, 2.4581298828125, 2.581298828125, 2.7044677734375, 2.82763671875, 2.9508056640625, 3.073974609375, 3.1971435546875, 3.3203125]}, "gradients/decoder.transformer.h.11.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 4.0, 2.0, 1.0, 6.0, 9.0, 9.0, 21.0, 20.0, 25.0, 33.0, 43.0, 68.0, 102.0, 124.0, 201.0, 377.0, 547.0, 915.0, 1551.0, 2631.0, 4789.0, 8790.0, 16442.0, 31168.0, 60920.0, 122213.0, 258998.0, 274735.0, 128003.0, 64427.0, 33052.0, 17216.0, 9204.0, 5006.0, 2651.0, 1654.0, 947.0, 544.0, 364.0, 221.0, 165.0, 106.0, 83.0, 50.0, 41.0, 28.0, 16.0, 18.0, 8.0, 7.0, 6.0, 1.0, 3.0], "bins": [-2.359375, -2.2973480224609375, -2.235321044921875, -2.1732940673828125, -2.11126708984375, -2.0492401123046875, -1.987213134765625, -1.9251861572265625, -1.8631591796875, -1.8011322021484375, -1.739105224609375, -1.6770782470703125, -1.61505126953125, -1.5530242919921875, -1.490997314453125, -1.4289703369140625, -1.366943359375, -1.3049163818359375, -1.242889404296875, -1.1808624267578125, -1.11883544921875, -1.0568084716796875, -0.994781494140625, -0.9327545166015625, -0.8707275390625, -0.8087005615234375, -0.746673583984375, -0.6846466064453125, -0.62261962890625, -0.5605926513671875, -0.498565673828125, -0.4365386962890625, -0.37451171875, -0.3124847412109375, -0.250457763671875, -0.1884307861328125, -0.12640380859375, -0.0643768310546875, -0.002349853515625, 0.0596771240234375, 0.1217041015625, 0.1837310791015625, 0.245758056640625, 0.3077850341796875, 0.36981201171875, 0.4318389892578125, 0.493865966796875, 0.5558929443359375, 0.617919921875, 0.6799468994140625, 0.741973876953125, 0.8040008544921875, 0.86602783203125, 0.9280548095703125, 0.990081787109375, 1.0521087646484375, 1.1141357421875, 1.1761627197265625, 1.238189697265625, 1.3002166748046875, 1.36224365234375, 1.4242706298828125, 1.486297607421875, 1.5483245849609375, 1.6103515625]}, "gradients/decoder.transformer.h.11.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 6.0, 5.0, 2.0, 7.0, 3.0, 16.0, 16.0, 10.0, 11.0, 19.0, 25.0, 18.0, 27.0, 23.0, 35.0, 37.0, 44.0, 32.0, 62.0, 66.0, 148.0, 1649.0, 275.0, 74.0, 56.0, 58.0, 51.0, 38.0, 38.0, 31.0, 27.0, 17.0, 18.0, 14.0, 19.0, 10.0, 18.0, 7.0, 10.0, 11.0, 8.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-14.2890625, -13.8525390625, -13.416015625, -12.9794921875, -12.54296875, -12.1064453125, -11.669921875, -11.2333984375, -10.796875, -10.3603515625, -9.923828125, -9.4873046875, -9.05078125, -8.6142578125, -8.177734375, -7.7412109375, -7.3046875, -6.8681640625, -6.431640625, -5.9951171875, -5.55859375, -5.1220703125, -4.685546875, -4.2490234375, -3.8125, -3.3759765625, -2.939453125, -2.5029296875, -2.06640625, -1.6298828125, -1.193359375, -0.7568359375, -0.3203125, 0.1162109375, 0.552734375, 0.9892578125, 1.42578125, 1.8623046875, 2.298828125, 2.7353515625, 3.171875, 3.6083984375, 4.044921875, 4.4814453125, 4.91796875, 5.3544921875, 5.791015625, 6.2275390625, 6.6640625, 7.1005859375, 7.537109375, 7.9736328125, 8.41015625, 8.8466796875, 9.283203125, 9.7197265625, 10.15625, 10.5927734375, 11.029296875, 11.4658203125, 11.90234375, 12.3388671875, 12.775390625, 13.2119140625, 13.6484375]}, "gradients/decoder.transformer.h.11.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 7.0, 6.0, 6.0, 11.0, 8.0, 10.0, 20.0, 19.0, 20.0, 22.0, 26.0, 44.0, 63.0, 81.0, 106.0, 107.0, 173.0, 249.0, 422.0, 1579.0, 28331.0, 3102020.0, 10036.0, 1080.0, 377.0, 210.0, 153.0, 119.0, 77.0, 81.0, 52.0, 40.0, 26.0, 29.0, 22.0, 11.0, 17.0, 12.0, 13.0, 3.0, 4.0, 8.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-32.4375, -31.404296875, -30.37109375, -29.337890625, -28.3046875, -27.271484375, -26.23828125, -25.205078125, -24.171875, -23.138671875, -22.10546875, -21.072265625, -20.0390625, -19.005859375, -17.97265625, -16.939453125, -15.90625, -14.873046875, -13.83984375, -12.806640625, -11.7734375, -10.740234375, -9.70703125, -8.673828125, -7.640625, -6.607421875, -5.57421875, -4.541015625, -3.5078125, -2.474609375, -1.44140625, -0.408203125, 0.625, 1.658203125, 2.69140625, 3.724609375, 4.7578125, 5.791015625, 6.82421875, 7.857421875, 8.890625, 9.923828125, 10.95703125, 11.990234375, 13.0234375, 14.056640625, 15.08984375, 16.123046875, 17.15625, 18.189453125, 19.22265625, 20.255859375, 21.2890625, 22.322265625, 23.35546875, 24.388671875, 25.421875, 26.455078125, 27.48828125, 28.521484375, 29.5546875, 30.587890625, 31.62109375, 32.654296875, 33.6875]}, "gradients/decoder.transformer.h.11.ln_1.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 122.0, 821.0, 73.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.321300506591797, -14.93751335144043, -11.553725242614746, -8.169937133789062, -4.786149978637695, -1.4023628234863281, 1.9814262390136719, 5.365213394165039, 8.749000549316406, 12.132787704467773, 15.516575813293457, 18.90036392211914, 22.284151077270508, 25.667938232421875, 29.051727294921875, 32.435516357421875, 35.81930160522461, 39.20309066772461, 42.586875915527344, 45.970664978027344, 49.354454040527344, 52.73823928833008, 56.12202835083008, 59.50581359863281, 62.88960266113281, 66.27339172363281, 69.65718078613281, 73.04096984863281, 76.42475128173828, 79.80854034423828, 83.19232940673828, 86.57611846923828, 89.95989990234375, 93.34368896484375, 96.72747802734375, 100.11126708984375, 103.49504852294922, 106.87883758544922, 110.26262664794922, 113.64641571044922, 117.03019714355469, 120.41398620605469, 123.79777526855469, 127.18156433105469, 130.5653533935547, 133.94912719726562, 137.33291625976562, 140.71670532226562, 144.10049438476562, 147.48428344726562, 150.86807250976562, 154.25186157226562, 157.63565063476562, 161.01943969726562, 164.40322875976562, 167.78700256347656, 171.17080688476562, 174.55459594726562, 177.93838500976562, 181.32217407226562, 184.70596313476562, 188.08975219726562, 191.47354125976562, 194.85731506347656, 198.24110412597656]}, "gradients/decoder.transformer.h.11.ln_1.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 3.0, 6.0, 8.0, 5.0, 5.0, 8.0, 13.0, 6.0, 10.0, 14.0, 22.0, 25.0, 20.0, 31.0, 16.0, 30.0, 38.0, 33.0, 43.0, 42.0, 49.0, 56.0, 48.0, 41.0, 29.0, 41.0, 62.0, 45.0, 29.0, 32.0, 26.0, 26.0, 22.0, 18.0, 22.0, 13.0, 18.0, 10.0, 9.0, 6.0, 7.0, 2.0, 6.0, 6.0, 4.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-35.98786544799805, -34.85140609741211, -33.71495056152344, -32.5784912109375, -31.442031860351562, -30.305572509765625, -29.16911506652832, -28.032657623291016, -26.896198272705078, -25.75973892211914, -24.623281478881836, -23.48682403564453, -22.350364685058594, -21.213905334472656, -20.07744789123535, -18.940990447998047, -17.80453109741211, -16.668071746826172, -15.531614303588867, -14.395155906677246, -13.258697509765625, -12.122239112854004, -10.985780715942383, -9.849322319030762, -8.71286392211914, -7.5764055252075195, -6.439947128295898, -5.303488731384277, -4.167030334472656, -3.030571937561035, -1.894113540649414, -0.757655143737793, 0.3788032531738281, 1.5152616500854492, 2.6517200469970703, 3.7881784439086914, 4.9246368408203125, 6.061095237731934, 7.197553634643555, 8.334012031555176, 9.470470428466797, 10.606928825378418, 11.743387222290039, 12.87984561920166, 14.016304016113281, 15.152762413024902, 16.289220809936523, 17.425678253173828, 18.562137603759766, 19.698596954345703, 20.835054397583008, 21.971511840820312, 23.10797119140625, 24.244430541992188, 25.380887985229492, 26.517345428466797, 27.653804779052734, 28.790264129638672, 29.926721572875977, 31.06317901611328, 32.19963836669922, 33.336097717285156, 34.472557067871094, 35.609012603759766, 36.7454719543457]}, "gradients/decoder.transformer.h.10.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 5.0, 7.0, 10.0, 7.0, 7.0, 10.0, 15.0, 19.0, 10.0, 19.0, 19.0, 33.0, 28.0, 27.0, 30.0, 27.0, 33.0, 33.0, 45.0, 44.0, 37.0, 22.0, 45.0, 44.0, 43.0, 34.0, 43.0, 39.0, 46.0, 23.0, 21.0, 21.0, 23.0, 17.0, 16.0, 15.0, 19.0, 16.0, 13.0, 10.0, 7.0, 3.0, 2.0, 7.0, 6.0, 1.0, 3.0, 2.0, 2.0], "bins": [-4.546875, -4.423248291015625, -4.29962158203125, -4.175994873046875, -4.0523681640625, -3.928741455078125, -3.80511474609375, -3.681488037109375, -3.557861328125, -3.434234619140625, -3.31060791015625, -3.186981201171875, -3.0633544921875, -2.939727783203125, -2.81610107421875, -2.692474365234375, -2.56884765625, -2.445220947265625, -2.32159423828125, -2.197967529296875, -2.0743408203125, -1.950714111328125, -1.82708740234375, -1.703460693359375, -1.579833984375, -1.456207275390625, -1.33258056640625, -1.208953857421875, -1.0853271484375, -0.961700439453125, -0.83807373046875, -0.714447021484375, -0.5908203125, -0.467193603515625, -0.34356689453125, -0.219940185546875, -0.0963134765625, 0.027313232421875, 0.15093994140625, 0.274566650390625, 0.398193359375, 0.521820068359375, 0.64544677734375, 0.769073486328125, 0.8927001953125, 1.016326904296875, 1.13995361328125, 1.263580322265625, 1.38720703125, 1.510833740234375, 1.63446044921875, 1.758087158203125, 1.8817138671875, 2.005340576171875, 2.12896728515625, 2.252593994140625, 2.376220703125, 2.499847412109375, 2.62347412109375, 2.747100830078125, 2.8707275390625, 2.994354248046875, 3.11798095703125, 3.241607666015625, 3.365234375]}, "gradients/decoder.transformer.h.10.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 3.0, 6.0, 5.0, 5.0, 18.0, 13.0, 12.0, 14.0, 25.0, 24.0, 36.0, 52.0, 72.0, 88.0, 124.0, 273.0, 544.0, 1301.0, 4391.0, 19551.0, 172216.0, 2842718.0, 1078421.0, 60220.0, 9851.0, 2530.0, 800.0, 385.0, 170.0, 123.0, 64.0, 59.0, 38.0, 26.0, 27.0, 18.0, 20.0, 8.0, 8.0, 10.0, 6.0, 6.0, 5.0, 4.0, 2.0, 2.0, 0.0, 1.0], "bins": [-14.0, -13.6295166015625, -13.259033203125, -12.8885498046875, -12.51806640625, -12.1475830078125, -11.777099609375, -11.4066162109375, -11.0361328125, -10.6656494140625, -10.295166015625, -9.9246826171875, -9.55419921875, -9.1837158203125, -8.813232421875, -8.4427490234375, -8.072265625, -7.7017822265625, -7.331298828125, -6.9608154296875, -6.59033203125, -6.2198486328125, -5.849365234375, -5.4788818359375, -5.1083984375, -4.7379150390625, -4.367431640625, -3.9969482421875, -3.62646484375, -3.2559814453125, -2.885498046875, -2.5150146484375, -2.14453125, -1.7740478515625, -1.403564453125, -1.0330810546875, -0.66259765625, -0.2921142578125, 0.078369140625, 0.4488525390625, 0.8193359375, 1.1898193359375, 1.560302734375, 1.9307861328125, 2.30126953125, 2.6717529296875, 3.042236328125, 3.4127197265625, 3.783203125, 4.1536865234375, 4.524169921875, 4.8946533203125, 5.26513671875, 5.6356201171875, 6.006103515625, 6.3765869140625, 6.7470703125, 7.1175537109375, 7.488037109375, 7.8585205078125, 8.22900390625, 8.5994873046875, 8.969970703125, 9.3404541015625, 9.7109375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 3.0, 8.0, 16.0, 27.0, 37.0, 67.0, 124.0, 260.0, 451.0, 946.0, 901.0, 550.0, 272.0, 186.0, 87.0, 67.0, 30.0, 21.0, 10.0, 5.0, 4.0, 3.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-17.046875, -16.6317138671875, -16.216552734375, -15.8013916015625, -15.38623046875, -14.9710693359375, -14.555908203125, -14.1407470703125, -13.7255859375, -13.3104248046875, -12.895263671875, -12.4801025390625, -12.06494140625, -11.6497802734375, -11.234619140625, -10.8194580078125, -10.404296875, -9.9891357421875, -9.573974609375, -9.1588134765625, -8.74365234375, -8.3284912109375, -7.913330078125, -7.4981689453125, -7.0830078125, -6.6678466796875, -6.252685546875, -5.8375244140625, -5.42236328125, -5.0072021484375, -4.592041015625, -4.1768798828125, -3.76171875, -3.3465576171875, -2.931396484375, -2.5162353515625, -2.10107421875, -1.6859130859375, -1.270751953125, -0.8555908203125, -0.4404296875, -0.0252685546875, 0.389892578125, 0.8050537109375, 1.22021484375, 1.6353759765625, 2.050537109375, 2.4656982421875, 2.880859375, 3.2960205078125, 3.711181640625, 4.1263427734375, 4.54150390625, 4.9566650390625, 5.371826171875, 5.7869873046875, 6.2021484375, 6.6173095703125, 7.032470703125, 7.4476318359375, 7.86279296875, 8.2779541015625, 8.693115234375, 9.1082763671875, 9.5234375]}, "gradients/decoder.transformer.h.10.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 1.0, 6.0, 1.0, 6.0, 8.0, 25.0, 32.0, 66.0, 100.0, 202.0, 374.0, 882.0, 7384.0, 3552503.0, 627999.0, 3442.0, 655.0, 317.0, 128.0, 72.0, 30.0, 22.0, 10.0, 9.0, 8.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.078125, -28.565185546875, -27.05224609375, -25.539306640625, -24.0263671875, -22.513427734375, -21.00048828125, -19.487548828125, -17.974609375, -16.461669921875, -14.94873046875, -13.435791015625, -11.9228515625, -10.409912109375, -8.89697265625, -7.384033203125, -5.87109375, -4.358154296875, -2.84521484375, -1.332275390625, 0.1806640625, 1.693603515625, 3.20654296875, 4.719482421875, 6.232421875, 7.745361328125, 9.25830078125, 10.771240234375, 12.2841796875, 13.797119140625, 15.31005859375, 16.822998046875, 18.3359375, 19.848876953125, 21.36181640625, 22.874755859375, 24.3876953125, 25.900634765625, 27.41357421875, 28.926513671875, 30.439453125, 31.952392578125, 33.46533203125, 34.978271484375, 36.4912109375, 38.004150390625, 39.51708984375, 41.030029296875, 42.54296875, 44.055908203125, 45.56884765625, 47.081787109375, 48.5947265625, 50.107666015625, 51.62060546875, 53.133544921875, 54.646484375, 56.159423828125, 57.67236328125, 59.185302734375, 60.6982421875, 62.211181640625, 63.72412109375, 65.237060546875, 66.75]}, "gradients/decoder.transformer.h.10.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 13.0, 21.0, 52.0, 90.0, 143.0, 160.0, 172.0, 142.0, 100.0, 62.0, 30.0, 9.0, 10.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-72.1933822631836, -70.43190002441406, -68.67042541503906, -66.90894317626953, -65.14746856689453, -63.385986328125, -61.624507904052734, -59.86302947998047, -58.10154724121094, -56.34006881713867, -54.578590393066406, -52.817108154296875, -51.05562973022461, -49.294151306152344, -47.53267288208008, -45.77119445800781, -44.00971603393555, -42.24823760986328, -40.486759185791016, -38.72528076171875, -36.96379852294922, -35.20232009887695, -33.44084167480469, -31.679363250732422, -29.917882919311523, -28.156404495239258, -26.39492416381836, -24.633445739746094, -22.871967315673828, -21.11048698425293, -19.349008560180664, -17.587528228759766, -15.826053619384766, -14.064574241638184, -12.303094863891602, -10.541616439819336, -8.780137062072754, -7.018657684326172, -5.257179260253906, -3.495699882507324, -1.7342205047607422, 0.027258634567260742, 1.7887377738952637, 3.5502166748046875, 5.3116960525512695, 7.073175430297852, 8.834653854370117, 10.5961332321167, 12.357612609863281, 14.119091987609863, 15.880571365356445, 17.64204978942871, 19.40353012084961, 21.165008544921875, 22.92648696899414, 24.687965393066406, 26.449445724487305, 28.21092414855957, 29.97240447998047, 31.733882904052734, 33.495361328125, 35.25684356689453, 37.01831817626953, 38.77980041503906, 40.54127883911133]}, "gradients/decoder.transformer.h.10.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 4.0, 7.0, 13.0, 13.0, 16.0, 14.0, 15.0, 25.0, 23.0, 32.0, 37.0, 28.0, 44.0, 48.0, 45.0, 50.0, 52.0, 46.0, 50.0, 48.0, 47.0, 43.0, 47.0, 31.0, 45.0, 26.0, 32.0, 31.0, 23.0, 15.0, 17.0, 9.0, 10.0, 6.0, 4.0, 6.0, 6.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-32.727577209472656, -31.829729080200195, -30.9318790435791, -30.03403091430664, -29.13618278503418, -28.23833465576172, -27.340484619140625, -26.442636489868164, -25.544788360595703, -24.646940231323242, -23.74909019470215, -22.851242065429688, -21.953393936157227, -21.055545806884766, -20.157695770263672, -19.25984764099121, -18.361997604370117, -17.464149475097656, -16.566299438476562, -15.668451309204102, -14.77060317993164, -13.872754096984863, -12.974905014038086, -12.077056884765625, -11.179207801818848, -10.28135871887207, -9.38351058959961, -8.485661506652832, -7.587812900543213, -6.689964294433594, -5.792115211486816, -4.894266605377197, -3.996417999267578, -3.098569393157959, -2.2007205486297607, -1.3028717041015625, -0.40502309799194336, 0.4928255081176758, 1.3906745910644531, 2.2885231971740723, 3.1863718032836914, 4.0842204093933105, 4.98206901550293, 5.879918098449707, 6.777766704559326, 7.675615310668945, 8.573464393615723, 9.4713134765625, 10.369161605834961, 11.267010688781738, 12.1648588180542, 13.062707901000977, 13.960556030273438, 14.858405113220215, 15.756254196166992, 16.654102325439453, 17.551952362060547, 18.449800491333008, 19.3476505279541, 20.245498657226562, 21.143346786499023, 22.041194915771484, 22.939044952392578, 23.83689308166504, 24.7347412109375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 1.0, 5.0, 6.0, 6.0, 2.0, 7.0, 10.0, 5.0, 12.0, 11.0, 11.0, 15.0, 12.0, 19.0, 28.0, 22.0, 18.0, 28.0, 32.0, 31.0, 30.0, 31.0, 33.0, 34.0, 28.0, 32.0, 38.0, 27.0, 40.0, 45.0, 35.0, 29.0, 35.0, 22.0, 33.0, 26.0, 29.0, 24.0, 19.0, 22.0, 12.0, 12.0, 17.0, 13.0, 13.0, 6.0, 7.0, 5.0, 8.0, 7.0, 6.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0], "bins": [-3.609375, -3.49835205078125, -3.3873291015625, -3.27630615234375, -3.165283203125, -3.05426025390625, -2.9432373046875, -2.83221435546875, -2.72119140625, -2.61016845703125, -2.4991455078125, -2.38812255859375, -2.277099609375, -2.16607666015625, -2.0550537109375, -1.94403076171875, -1.8330078125, -1.72198486328125, -1.6109619140625, -1.49993896484375, -1.388916015625, -1.27789306640625, -1.1668701171875, -1.05584716796875, -0.94482421875, -0.83380126953125, -0.7227783203125, -0.61175537109375, -0.500732421875, -0.38970947265625, -0.2786865234375, -0.16766357421875, -0.056640625, 0.05438232421875, 0.1654052734375, 0.27642822265625, 0.387451171875, 0.49847412109375, 0.6094970703125, 0.72052001953125, 0.83154296875, 0.94256591796875, 1.0535888671875, 1.16461181640625, 1.275634765625, 1.38665771484375, 1.4976806640625, 1.60870361328125, 1.7197265625, 1.83074951171875, 1.9417724609375, 2.05279541015625, 2.163818359375, 2.27484130859375, 2.3858642578125, 2.49688720703125, 2.60791015625, 2.71893310546875, 2.8299560546875, 2.94097900390625, 3.052001953125, 3.16302490234375, 3.2740478515625, 3.38507080078125, 3.49609375]}, "gradients/decoder.transformer.h.10.crossattention.c_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 4.0, 6.0, 10.0, 7.0, 27.0, 32.0, 46.0, 69.0, 98.0, 125.0, 187.0, 296.0, 381.0, 553.0, 752.0, 1044.0, 1402.0, 1948.0, 2689.0, 3838.0, 5295.0, 7409.0, 10704.0, 15520.0, 22822.0, 34357.0, 53003.0, 87175.0, 162249.0, 279955.0, 134236.0, 75966.0, 47403.0, 30455.0, 20650.0, 14139.0, 9616.0, 6889.0, 4940.0, 3331.0, 2547.0, 1861.0, 1246.0, 930.0, 664.0, 483.0, 357.0, 268.0, 170.0, 116.0, 89.0, 64.0, 38.0, 37.0, 20.0, 21.0, 10.0, 11.0, 4.0, 5.0, 1.0, 3.0], "bins": [-0.089599609375, -0.08674335479736328, -0.08388710021972656, -0.08103084564208984, -0.07817459106445312, -0.0753183364868164, -0.07246208190917969, -0.06960582733154297, -0.06674957275390625, -0.06389331817626953, -0.06103706359863281, -0.058180809020996094, -0.055324554443359375, -0.052468299865722656, -0.04961204528808594, -0.04675579071044922, -0.0438995361328125, -0.04104328155517578, -0.03818702697753906, -0.035330772399902344, -0.032474517822265625, -0.029618263244628906, -0.026762008666992188, -0.02390575408935547, -0.02104949951171875, -0.01819324493408203, -0.015336990356445312, -0.012480735778808594, -0.009624481201171875, -0.006768226623535156, -0.0039119720458984375, -0.0010557174682617188, 0.001800537109375, 0.004656791687011719, 0.0075130462646484375, 0.010369300842285156, 0.013225555419921875, 0.016081809997558594, 0.018938064575195312, 0.02179431915283203, 0.02465057373046875, 0.02750682830810547, 0.030363082885742188, 0.033219337463378906, 0.036075592041015625, 0.038931846618652344, 0.04178810119628906, 0.04464435577392578, 0.0475006103515625, 0.05035686492919922, 0.05321311950683594, 0.056069374084472656, 0.058925628662109375, 0.061781883239746094, 0.06463813781738281, 0.06749439239501953, 0.07035064697265625, 0.07320690155029297, 0.07606315612792969, 0.0789194107055664, 0.08177566528320312, 0.08463191986083984, 0.08748817443847656, 0.09034442901611328, 0.09320068359375]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 3.0, 3.0, 2.0, 5.0, 4.0, 5.0, 7.0, 11.0, 17.0, 7.0, 18.0, 17.0, 12.0, 21.0, 23.0, 23.0, 20.0, 30.0, 24.0, 21.0, 25.0, 45.0, 36.0, 37.0, 37.0, 46.0, 1065.0, 36.0, 46.0, 29.0, 31.0, 38.0, 42.0, 28.0, 27.0, 24.0, 22.0, 21.0, 13.0, 17.0, 19.0, 13.0, 13.0, 12.0, 8.0, 6.0, 3.0, 3.0, 5.0, 6.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-2.486328125, -2.404937744140625, -2.32354736328125, -2.242156982421875, -2.1607666015625, -2.079376220703125, -1.99798583984375, -1.916595458984375, -1.835205078125, -1.753814697265625, -1.67242431640625, -1.591033935546875, -1.5096435546875, -1.428253173828125, -1.34686279296875, -1.265472412109375, -1.18408203125, -1.102691650390625, -1.02130126953125, -0.939910888671875, -0.8585205078125, -0.777130126953125, -0.69573974609375, -0.614349365234375, -0.532958984375, -0.451568603515625, -0.37017822265625, -0.288787841796875, -0.2073974609375, -0.126007080078125, -0.04461669921875, 0.036773681640625, 0.1181640625, 0.199554443359375, 0.28094482421875, 0.362335205078125, 0.4437255859375, 0.525115966796875, 0.60650634765625, 0.687896728515625, 0.769287109375, 0.850677490234375, 0.93206787109375, 1.013458251953125, 1.0948486328125, 1.176239013671875, 1.25762939453125, 1.339019775390625, 1.42041015625, 1.501800537109375, 1.58319091796875, 1.664581298828125, 1.7459716796875, 1.827362060546875, 1.90875244140625, 1.990142822265625, 2.071533203125, 2.152923583984375, 2.23431396484375, 2.315704345703125, 2.3970947265625, 2.478485107421875, 2.55987548828125, 2.641265869140625, 2.72265625]}, "gradients/decoder.transformer.h.10.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 6.0, 8.0, 6.0, 10.0, 18.0, 22.0, 24.0, 59.0, 88.0, 110.0, 161.0, 258.0, 351.0, 580.0, 821.0, 1240.0, 1798.0, 2719.0, 4263.0, 6552.0, 9907.0, 15774.0, 25060.0, 41427.0, 73306.0, 145344.0, 1397166.0, 167950.0, 81070.0, 45433.0, 27281.0, 17157.0, 10866.0, 7010.0, 4518.0, 2952.0, 1958.0, 1275.0, 852.0, 531.0, 374.0, 272.0, 180.0, 110.0, 78.0, 57.0, 42.0, 31.0, 24.0, 17.0, 8.0, 6.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.10931396484375, -0.10573959350585938, -0.10216522216796875, -0.09859085083007812, -0.0950164794921875, -0.09144210815429688, -0.08786773681640625, -0.08429336547851562, -0.080718994140625, -0.07714462280273438, -0.07357025146484375, -0.06999588012695312, -0.0664215087890625, -0.06284713745117188, -0.05927276611328125, -0.055698394775390625, -0.0521240234375, -0.048549652099609375, -0.04497528076171875, -0.041400909423828125, -0.0378265380859375, -0.034252166748046875, -0.03067779541015625, -0.027103424072265625, -0.023529052734375, -0.019954681396484375, -0.01638031005859375, -0.012805938720703125, -0.0092315673828125, -0.005657196044921875, -0.00208282470703125, 0.001491546630859375, 0.00506591796875, 0.008640289306640625, 0.01221466064453125, 0.015789031982421875, 0.0193634033203125, 0.022937774658203125, 0.02651214599609375, 0.030086517333984375, 0.033660888671875, 0.037235260009765625, 0.04080963134765625, 0.044384002685546875, 0.0479583740234375, 0.051532745361328125, 0.05510711669921875, 0.058681488037109375, 0.062255859375, 0.06583023071289062, 0.06940460205078125, 0.07297897338867188, 0.0765533447265625, 0.08012771606445312, 0.08370208740234375, 0.08727645874023438, 0.090850830078125, 0.09442520141601562, 0.09799957275390625, 0.10157394409179688, 0.1051483154296875, 0.10872268676757812, 0.11229705810546875, 0.11587142944335938, 0.11944580078125]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.bias": {"_type": "histogram", "values": [4.0, 0.0, 3.0, 5.0, 1.0, 2.0, 3.0, 7.0, 3.0, 9.0, 4.0, 9.0, 11.0, 9.0, 11.0, 24.0, 16.0, 17.0, 26.0, 26.0, 28.0, 38.0, 37.0, 32.0, 39.0, 40.0, 54.0, 47.0, 58.0, 60.0, 54.0, 43.0, 33.0, 32.0, 31.0, 25.0, 26.0, 23.0, 21.0, 26.0, 10.0, 19.0, 10.0, 9.0, 3.0, 11.0, 3.0, 4.0, 3.0, 3.0, 0.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.424022674560547e-06, -5.229376256465912e-06, -5.034729838371277e-06, -4.840083420276642e-06, -4.645437002182007e-06, -4.450790584087372e-06, -4.256144165992737e-06, -4.061497747898102e-06, -3.866851329803467e-06, -3.6722049117088318e-06, -3.4775584936141968e-06, -3.2829120755195618e-06, -3.0882656574249268e-06, -2.8936192393302917e-06, -2.6989728212356567e-06, -2.5043264031410217e-06, -2.3096799850463867e-06, -2.1150335669517517e-06, -1.9203871488571167e-06, -1.7257407307624817e-06, -1.5310943126678467e-06, -1.3364478945732117e-06, -1.1418014764785767e-06, -9.471550583839417e-07, -7.525086402893066e-07, -5.578622221946716e-07, -3.632158041000366e-07, -1.685693860054016e-07, 2.60770320892334e-08, 2.207234501838684e-07, 4.153698682785034e-07, 6.100162863731384e-07, 8.046627044677734e-07, 9.993091225624084e-07, 1.1939555406570435e-06, 1.3886019587516785e-06, 1.5832483768463135e-06, 1.7778947949409485e-06, 1.9725412130355835e-06, 2.1671876311302185e-06, 2.3618340492248535e-06, 2.5564804673194885e-06, 2.7511268854141235e-06, 2.9457733035087585e-06, 3.1404197216033936e-06, 3.3350661396980286e-06, 3.5297125577926636e-06, 3.7243589758872986e-06, 3.919005393981934e-06, 4.113651812076569e-06, 4.308298230171204e-06, 4.502944648265839e-06, 4.697591066360474e-06, 4.892237484455109e-06, 5.086883902549744e-06, 5.281530320644379e-06, 5.476176738739014e-06, 5.670823156833649e-06, 5.865469574928284e-06, 6.060115993022919e-06, 6.254762411117554e-06, 6.449408829212189e-06, 6.644055247306824e-06, 6.838701665401459e-06, 7.033348083496094e-06]}, "gradients/decoder.transformer.h.10.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 3.0, 4.0, 2.0, 11.0, 5.0, 11.0, 15.0, 15.0, 21.0, 18.0, 44.0, 45.0, 43.0, 53.0, 84.0, 100.0, 145.0, 282.0, 739.0, 10437.0, 937592.0, 96388.0, 1374.0, 378.0, 206.0, 118.0, 85.0, 72.0, 50.0, 36.0, 36.0, 28.0, 21.0, 25.0, 15.0, 10.0, 5.0, 12.0, 6.0, 7.0, 4.0, 5.0, 6.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0001341104507446289, -0.00013035442680120468, -0.00012659840285778046, -0.00012284237891435623, -0.00011908635497093201, -0.00011533033102750778, -0.00011157430708408356, -0.00010781828314065933, -0.00010406225919723511, -0.00010030623525381088, -9.655021131038666e-05, -9.279418736696243e-05, -8.903816342353821e-05, -8.528213948011398e-05, -8.152611553668976e-05, -7.777009159326553e-05, -7.401406764984131e-05, -7.025804370641708e-05, -6.650201976299286e-05, -6.274599581956863e-05, -5.898997187614441e-05, -5.5233947932720184e-05, -5.147792398929596e-05, -4.7721900045871735e-05, -4.396587610244751e-05, -4.0209852159023285e-05, -3.645382821559906e-05, -3.2697804272174835e-05, -2.894178032875061e-05, -2.5185756385326385e-05, -2.142973244190216e-05, -1.7673708498477936e-05, -1.3917684555053711e-05, -1.0161660611629486e-05, -6.405636668205261e-06, -2.6496127247810364e-06, 1.1064112186431885e-06, 4.862435162067413e-06, 8.618459105491638e-06, 1.2374483048915863e-05, 1.6130506992340088e-05, 1.9886530935764313e-05, 2.3642554879188538e-05, 2.7398578822612762e-05, 3.115460276603699e-05, 3.491062670946121e-05, 3.866665065288544e-05, 4.242267459630966e-05, 4.617869853973389e-05, 4.993472248315811e-05, 5.3690746426582336e-05, 5.744677037000656e-05, 6.120279431343079e-05, 6.495881825685501e-05, 6.871484220027924e-05, 7.247086614370346e-05, 7.622689008712769e-05, 7.998291403055191e-05, 8.373893797397614e-05, 8.749496191740036e-05, 9.125098586082458e-05, 9.500700980424881e-05, 9.876303374767303e-05, 0.00010251905769109726, 0.00010627508163452148]}, "gradients/decoder.transformer.h.10.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 11.0, 92.0, 364.0, 396.0, 134.0, 13.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0262066755094565e-05, -1.9631364921224304e-05, -1.9000661268364638e-05, -1.8369959434494376e-05, -1.7739257600624114e-05, -1.710855394776445e-05, -1.6477852113894187e-05, -1.584714846103452e-05, -1.521644662716426e-05, -1.4585743883799296e-05, -1.3955042049929034e-05, -1.332433930656407e-05, -1.2693636563199107e-05, -1.2062933819834143e-05, -1.1432231985963881e-05, -1.0801529242598917e-05, -1.0170827408728655e-05, -9.540124665363692e-06, -8.90942283149343e-06, -8.278720088128466e-06, -7.648017344763502e-06, -7.01731505614589e-06, -6.386612767528277e-06, -5.755910024163313e-06, -5.1252077355457e-06, -4.494505446928088e-06, -3.863802703563124e-06, -3.233100414945511e-06, -2.602397898954223e-06, -1.9716953829629347e-06, -1.3409930943453219e-06, -7.102903509803582e-07, -7.95880623627454e-08, 5.51114396785124e-07, 1.1818168559329933e-06, 1.8125192582374439e-06, 2.443221774228732e-06, 3.0739242902200203e-06, 3.704626578837633e-06, 4.335329322202597e-06, 4.96603161082021e-06, 5.596733899437822e-06, 6.227436642802786e-06, 6.858138931420399e-06, 7.488841220038012e-06, 8.119543963402975e-06, 8.750246706767939e-06, 9.380948540638201e-06, 1.0011651284003165e-05, 1.0642354027368128e-05, 1.127305586123839e-05, 1.1903758604603354e-05, 1.2534461347968318e-05, 1.316516318183858e-05, 1.3795865925203543e-05, 1.4426568668568507e-05, 1.5057270502438769e-05, 1.568797233630903e-05, 1.6318675989168696e-05, 1.6949377823038958e-05, 1.758007965690922e-05, 1.8210783309768885e-05, 1.8841485143639147e-05, 1.947218697750941e-05, 2.0102890630369075e-05]}, "gradients/decoder.transformer.h.10.ln_cross_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 8.0, 8.0, 12.0, 8.0, 10.0, 13.0, 14.0, 16.0, 17.0, 14.0, 23.0, 27.0, 16.0, 52.0, 38.0, 46.0, 46.0, 40.0, 48.0, 37.0, 66.0, 34.0, 29.0, 42.0, 31.0, 42.0, 43.0, 41.0, 27.0, 26.0, 35.0, 20.0, 15.0, 7.0, 14.0, 9.0, 5.0, 7.0, 5.0, 2.0, 4.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.3890828490257263e-06, -3.2614916563034058e-06, -3.133900463581085e-06, -3.0063092708587646e-06, -2.878718078136444e-06, -2.7511268854141235e-06, -2.623535692691803e-06, -2.4959444999694824e-06, -2.368353307247162e-06, -2.2407621145248413e-06, -2.1131709218025208e-06, -1.9855797290802e-06, -1.8579885363578796e-06, -1.730397343635559e-06, -1.6028061509132385e-06, -1.475214958190918e-06, -1.3476237654685974e-06, -1.2200325727462769e-06, -1.0924413800239563e-06, -9.648501873016357e-07, -8.372589945793152e-07, -7.096678018569946e-07, -5.820766091346741e-07, -4.544854164123535e-07, -3.2689422369003296e-07, -1.993030309677124e-07, -7.171183824539185e-08, 5.587935447692871e-08, 1.8347054719924927e-07, 3.110617399215698e-07, 4.386529326438904e-07, 5.662441253662109e-07, 6.938353180885315e-07, 8.21426510810852e-07, 9.490177035331726e-07, 1.0766088962554932e-06, 1.2042000889778137e-06, 1.3317912817001343e-06, 1.4593824744224548e-06, 1.5869736671447754e-06, 1.714564859867096e-06, 1.8421560525894165e-06, 1.969747245311737e-06, 2.0973384380340576e-06, 2.224929630756378e-06, 2.3525208234786987e-06, 2.4801120162010193e-06, 2.60770320892334e-06, 2.7352944016456604e-06, 2.862885594367981e-06, 2.9904767870903015e-06, 3.118067979812622e-06, 3.2456591725349426e-06, 3.373250365257263e-06, 3.5008415579795837e-06, 3.6284327507019043e-06, 3.756023943424225e-06, 3.883615136146545e-06, 4.011206328868866e-06, 4.1387975215911865e-06, 4.266388714313507e-06, 4.393979907035828e-06, 4.521571099758148e-06, 4.649162292480469e-06]}, "gradients/decoder.transformer.h.10.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 1.0, 5.0, 6.0, 6.0, 2.0, 7.0, 10.0, 5.0, 12.0, 11.0, 11.0, 15.0, 12.0, 19.0, 28.0, 22.0, 18.0, 28.0, 32.0, 31.0, 30.0, 31.0, 33.0, 34.0, 28.0, 32.0, 38.0, 27.0, 40.0, 45.0, 35.0, 29.0, 35.0, 22.0, 33.0, 26.0, 29.0, 24.0, 19.0, 22.0, 12.0, 12.0, 17.0, 13.0, 13.0, 6.0, 7.0, 5.0, 8.0, 7.0, 6.0, 3.0, 1.0, 0.0, 4.0, 1.0, 1.0, 2.0], "bins": [-3.609375, -3.49835205078125, -3.3873291015625, -3.27630615234375, -3.165283203125, -3.05426025390625, -2.9432373046875, -2.83221435546875, -2.72119140625, -2.61016845703125, -2.4991455078125, -2.38812255859375, -2.277099609375, -2.16607666015625, -2.0550537109375, -1.94403076171875, -1.8330078125, -1.72198486328125, -1.6109619140625, -1.49993896484375, -1.388916015625, -1.27789306640625, -1.1668701171875, -1.05584716796875, -0.94482421875, -0.83380126953125, -0.7227783203125, -0.61175537109375, -0.500732421875, -0.38970947265625, -0.2786865234375, -0.16766357421875, -0.056640625, 0.05438232421875, 0.1654052734375, 0.27642822265625, 0.387451171875, 0.49847412109375, 0.6094970703125, 0.72052001953125, 0.83154296875, 0.94256591796875, 1.0535888671875, 1.16461181640625, 1.275634765625, 1.38665771484375, 1.4976806640625, 1.60870361328125, 1.7197265625, 1.83074951171875, 1.9417724609375, 2.05279541015625, 2.163818359375, 2.27484130859375, 2.3858642578125, 2.49688720703125, 2.60791015625, 2.71893310546875, 2.8299560546875, 2.94097900390625, 3.052001953125, 3.16302490234375, 3.2740478515625, 3.38507080078125, 3.49609375]}, "gradients/decoder.transformer.h.10.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 5.0, 3.0, 4.0, 6.0, 3.0, 7.0, 3.0, 5.0, 14.0, 14.0, 20.0, 18.0, 23.0, 23.0, 42.0, 25.0, 41.0, 58.0, 68.0, 90.0, 98.0, 149.0, 218.0, 403.0, 692.0, 1537.0, 3673.0, 11012.0, 35813.0, 137391.0, 548437.0, 228736.0, 54333.0, 15820.0, 5309.0, 2078.0, 894.0, 444.0, 269.0, 175.0, 120.0, 89.0, 77.0, 50.0, 50.0, 50.0, 39.0, 20.0, 20.0, 18.0, 14.0, 19.0, 12.0, 10.0, 12.0, 5.0, 4.0, 4.0, 3.0, 2.0, 1.0], "bins": [-3.869140625, -3.75421142578125, -3.6392822265625, -3.52435302734375, -3.409423828125, -3.29449462890625, -3.1795654296875, -3.06463623046875, -2.94970703125, -2.83477783203125, -2.7198486328125, -2.60491943359375, -2.489990234375, -2.37506103515625, -2.2601318359375, -2.14520263671875, -2.0302734375, -1.91534423828125, -1.8004150390625, -1.68548583984375, -1.570556640625, -1.45562744140625, -1.3406982421875, -1.22576904296875, -1.11083984375, -0.99591064453125, -0.8809814453125, -0.76605224609375, -0.651123046875, -0.53619384765625, -0.4212646484375, -0.30633544921875, -0.19140625, -0.07647705078125, 0.0384521484375, 0.15338134765625, 0.268310546875, 0.38323974609375, 0.4981689453125, 0.61309814453125, 0.72802734375, 0.84295654296875, 0.9578857421875, 1.07281494140625, 1.187744140625, 1.30267333984375, 1.4176025390625, 1.53253173828125, 1.6474609375, 1.76239013671875, 1.8773193359375, 1.99224853515625, 2.107177734375, 2.22210693359375, 2.3370361328125, 2.45196533203125, 2.56689453125, 2.68182373046875, 2.7967529296875, 2.91168212890625, 3.026611328125, 3.14154052734375, 3.2564697265625, 3.37139892578125, 3.486328125]}, "gradients/decoder.transformer.h.10.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 4.0, 6.0, 8.0, 2.0, 10.0, 8.0, 9.0, 15.0, 13.0, 16.0, 14.0, 30.0, 18.0, 33.0, 19.0, 23.0, 34.0, 30.0, 46.0, 38.0, 47.0, 80.0, 211.0, 1758.0, 128.0, 59.0, 36.0, 32.0, 21.0, 33.0, 37.0, 27.0, 34.0, 27.0, 22.0, 20.0, 10.0, 18.0, 8.0, 16.0, 4.0, 7.0, 10.0, 6.0, 3.0, 5.0, 4.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0], "bins": [-11.2109375, -10.863525390625, -10.51611328125, -10.168701171875, -9.8212890625, -9.473876953125, -9.12646484375, -8.779052734375, -8.431640625, -8.084228515625, -7.73681640625, -7.389404296875, -7.0419921875, -6.694580078125, -6.34716796875, -5.999755859375, -5.65234375, -5.304931640625, -4.95751953125, -4.610107421875, -4.2626953125, -3.915283203125, -3.56787109375, -3.220458984375, -2.873046875, -2.525634765625, -2.17822265625, -1.830810546875, -1.4833984375, -1.135986328125, -0.78857421875, -0.441162109375, -0.09375, 0.253662109375, 0.60107421875, 0.948486328125, 1.2958984375, 1.643310546875, 1.99072265625, 2.338134765625, 2.685546875, 3.032958984375, 3.38037109375, 3.727783203125, 4.0751953125, 4.422607421875, 4.77001953125, 5.117431640625, 5.46484375, 5.812255859375, 6.15966796875, 6.507080078125, 6.8544921875, 7.201904296875, 7.54931640625, 7.896728515625, 8.244140625, 8.591552734375, 8.93896484375, 9.286376953125, 9.6337890625, 9.981201171875, 10.32861328125, 10.676025390625, 11.0234375]}, "gradients/decoder.transformer.h.10.attn.c_attn.weight": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 4.0, 5.0, 6.0, 6.0, 10.0, 13.0, 10.0, 13.0, 21.0, 27.0, 24.0, 37.0, 44.0, 61.0, 83.0, 92.0, 115.0, 169.0, 188.0, 282.0, 613.0, 2143.0, 58059.0, 3064810.0, 15872.0, 1390.0, 490.0, 251.0, 187.0, 120.0, 122.0, 67.0, 68.0, 60.0, 44.0, 35.0, 31.0, 26.0, 20.0, 17.0, 12.0, 9.0, 12.0, 9.0, 2.0, 9.0, 4.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0], "bins": [-21.921875, -21.222900390625, -20.52392578125, -19.824951171875, -19.1259765625, -18.427001953125, -17.72802734375, -17.029052734375, -16.330078125, -15.631103515625, -14.93212890625, -14.233154296875, -13.5341796875, -12.835205078125, -12.13623046875, -11.437255859375, -10.73828125, -10.039306640625, -9.34033203125, -8.641357421875, -7.9423828125, -7.243408203125, -6.54443359375, -5.845458984375, -5.146484375, -4.447509765625, -3.74853515625, -3.049560546875, -2.3505859375, -1.651611328125, -0.95263671875, -0.253662109375, 0.4453125, 1.144287109375, 1.84326171875, 2.542236328125, 3.2412109375, 3.940185546875, 4.63916015625, 5.338134765625, 6.037109375, 6.736083984375, 7.43505859375, 8.134033203125, 8.8330078125, 9.531982421875, 10.23095703125, 10.929931640625, 11.62890625, 12.327880859375, 13.02685546875, 13.725830078125, 14.4248046875, 15.123779296875, 15.82275390625, 16.521728515625, 17.220703125, 17.919677734375, 18.61865234375, 19.317626953125, 20.0166015625, 20.715576171875, 21.41455078125, 22.113525390625, 22.8125]}, "gradients/decoder.transformer.h.10.ln_1.weight": {"_type": "histogram", "values": [74.0, 916.0, 29.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.612714767456055, -1.7586748600006104, 2.095365047454834, 5.949404716491699, 9.803444862365723, 13.65748405456543, 17.511524200439453, 21.365564346313477, 25.2196044921875, 29.073644638061523, 32.92768478393555, 36.78172302246094, 40.635765075683594, 44.489803314208984, 48.343841552734375, 52.19788360595703, 56.05192565917969, 59.90596389770508, 63.760005950927734, 67.61404418945312, 71.46808624267578, 75.32212829589844, 79.17616271972656, 83.03020477294922, 86.88424682617188, 90.73828887939453, 94.59232330322266, 98.44636535644531, 102.30040740966797, 106.15444946289062, 110.00848388671875, 113.8625259399414, 117.71656036376953, 121.57060241699219, 125.42463684082031, 129.2786865234375, 133.13272094726562, 136.98675537109375, 140.84080505371094, 144.69483947753906, 148.5488739013672, 152.4029083251953, 156.2569580078125, 160.11099243164062, 163.96502685546875, 167.81907653808594, 171.67311096191406, 175.52716064453125, 179.38119506835938, 183.2352294921875, 187.0892791748047, 190.9433135986328, 194.79734802246094, 198.65139770507812, 202.50543212890625, 206.35946655273438, 210.21351623535156, 214.0675506591797, 217.92160034179688, 221.775634765625, 225.62966918945312, 229.4837188720703, 233.33775329589844, 237.19180297851562, 241.04583740234375]}, "gradients/decoder.transformer.h.10.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 6.0, 7.0, 4.0, 10.0, 13.0, 11.0, 13.0, 11.0, 15.0, 17.0, 13.0, 22.0, 34.0, 37.0, 33.0, 34.0, 45.0, 45.0, 33.0, 40.0, 42.0, 39.0, 43.0, 31.0, 42.0, 29.0, 43.0, 39.0, 34.0, 32.0, 22.0, 21.0, 25.0, 13.0, 12.0, 15.0, 20.0, 17.0, 9.0, 9.0, 6.0, 8.0, 2.0, 8.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-31.422157287597656, -30.425823211669922, -29.429487228393555, -28.43315315246582, -27.436819076538086, -26.44048309326172, -25.444149017333984, -24.44781494140625, -23.451478958129883, -22.45514488220215, -21.45880889892578, -20.462474822998047, -19.466140747070312, -18.469804763793945, -17.47347068786621, -16.477134704589844, -15.480801582336426, -14.484466552734375, -13.48813247680664, -12.49179744720459, -11.495462417602539, -10.499128341674805, -9.502793312072754, -8.506458282470703, -7.5101237297058105, -6.513789176940918, -5.517454147338867, -4.521119594573975, -3.524784803390503, -2.5284500122070312, -1.5321154594421387, -0.5357804298400879, 0.4605541229248047, 1.4568889141082764, 2.453223705291748, 3.4495582580566406, 4.445893287658691, 5.442227840423584, 6.438562393188477, 7.434897422790527, 8.431232452392578, 9.427567481994629, 10.423901557922363, 11.420236587524414, 12.416571617126465, 13.412906646728516, 14.40924072265625, 15.4055757522583, 16.40190887451172, 17.398242950439453, 18.39457893371582, 19.390913009643555, 20.38724708557129, 21.383583068847656, 22.37991714477539, 23.376251220703125, 24.372587203979492, 25.368921279907227, 26.365257263183594, 27.361591339111328, 28.357925415039062, 29.35426139831543, 30.350595474243164, 31.34693145751953, 32.343265533447266]}, "gradients/decoder.transformer.h.9.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 2.0, 7.0, 7.0, 11.0, 9.0, 8.0, 14.0, 13.0, 13.0, 15.0, 24.0, 23.0, 16.0, 24.0, 37.0, 32.0, 24.0, 36.0, 26.0, 33.0, 33.0, 35.0, 36.0, 37.0, 37.0, 41.0, 37.0, 36.0, 26.0, 28.0, 25.0, 27.0, 34.0, 23.0, 20.0, 17.0, 12.0, 21.0, 17.0, 16.0, 13.0, 8.0, 2.0, 7.0, 10.0, 4.0, 5.0, 6.0, 2.0, 1.0, 2.0, 2.0, 0.0, 3.0], "bins": [-3.740234375, -3.626800537109375, -3.51336669921875, -3.399932861328125, -3.2864990234375, -3.173065185546875, -3.05963134765625, -2.946197509765625, -2.832763671875, -2.719329833984375, -2.60589599609375, -2.492462158203125, -2.3790283203125, -2.265594482421875, -2.15216064453125, -2.038726806640625, -1.92529296875, -1.811859130859375, -1.69842529296875, -1.584991455078125, -1.4715576171875, -1.358123779296875, -1.24468994140625, -1.131256103515625, -1.017822265625, -0.904388427734375, -0.79095458984375, -0.677520751953125, -0.5640869140625, -0.450653076171875, -0.33721923828125, -0.223785400390625, -0.1103515625, 0.003082275390625, 0.11651611328125, 0.229949951171875, 0.3433837890625, 0.456817626953125, 0.57025146484375, 0.683685302734375, 0.797119140625, 0.910552978515625, 1.02398681640625, 1.137420654296875, 1.2508544921875, 1.364288330078125, 1.47772216796875, 1.591156005859375, 1.70458984375, 1.818023681640625, 1.93145751953125, 2.044891357421875, 2.1583251953125, 2.271759033203125, 2.38519287109375, 2.498626708984375, 2.612060546875, 2.725494384765625, 2.83892822265625, 2.952362060546875, 3.0657958984375, 3.179229736328125, 3.29266357421875, 3.406097412109375, 3.51953125]}, "gradients/decoder.transformer.h.9.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0, 4.0, 3.0, 3.0, 7.0, 6.0, 8.0, 5.0, 8.0, 11.0, 11.0, 14.0, 19.0, 31.0, 15.0, 29.0, 57.0, 72.0, 114.0, 170.0, 387.0, 844.0, 2101.0, 6073.0, 21277.0, 102870.0, 1184976.0, 2567944.0, 254501.0, 37514.0, 9792.0, 3052.0, 1174.0, 515.0, 235.0, 134.0, 65.0, 48.0, 34.0, 31.0, 24.0, 22.0, 18.0, 11.0, 14.0, 9.0, 5.0, 9.0, 4.0, 2.0, 5.0, 2.0, 4.0, 7.0, 1.0, 1.0, 1.0], "bins": [-10.1328125, -9.8365478515625, -9.540283203125, -9.2440185546875, -8.94775390625, -8.6514892578125, -8.355224609375, -8.0589599609375, -7.7626953125, -7.4664306640625, -7.170166015625, -6.8739013671875, -6.57763671875, -6.2813720703125, -5.985107421875, -5.6888427734375, -5.392578125, -5.0963134765625, -4.800048828125, -4.5037841796875, -4.20751953125, -3.9112548828125, -3.614990234375, -3.3187255859375, -3.0224609375, -2.7261962890625, -2.429931640625, -2.1336669921875, -1.83740234375, -1.5411376953125, -1.244873046875, -0.9486083984375, -0.65234375, -0.3560791015625, -0.059814453125, 0.2364501953125, 0.53271484375, 0.8289794921875, 1.125244140625, 1.4215087890625, 1.7177734375, 2.0140380859375, 2.310302734375, 2.6065673828125, 2.90283203125, 3.1990966796875, 3.495361328125, 3.7916259765625, 4.087890625, 4.3841552734375, 4.680419921875, 4.9766845703125, 5.27294921875, 5.5692138671875, 5.865478515625, 6.1617431640625, 6.4580078125, 6.7542724609375, 7.050537109375, 7.3468017578125, 7.64306640625, 7.9393310546875, 8.235595703125, 8.5318603515625, 8.828125]}, "gradients/decoder.transformer.h.9.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 6.0, 8.0, 13.0, 15.0, 29.0, 51.0, 72.0, 162.0, 280.0, 547.0, 932.0, 864.0, 453.0, 260.0, 129.0, 99.0, 50.0, 37.0, 27.0, 18.0, 9.0, 8.0, 3.0, 4.0, 5.0, 1.0, 0.0, 1.0, 1.0], "bins": [-17.296875, -16.91973876953125, -16.5426025390625, -16.16546630859375, -15.788330078125, -15.41119384765625, -15.0340576171875, -14.65692138671875, -14.27978515625, -13.90264892578125, -13.5255126953125, -13.14837646484375, -12.771240234375, -12.39410400390625, -12.0169677734375, -11.63983154296875, -11.2626953125, -10.88555908203125, -10.5084228515625, -10.13128662109375, -9.754150390625, -9.37701416015625, -8.9998779296875, -8.62274169921875, -8.24560546875, -7.86846923828125, -7.4913330078125, -7.11419677734375, -6.737060546875, -6.35992431640625, -5.9827880859375, -5.60565185546875, -5.228515625, -4.85137939453125, -4.4742431640625, -4.09710693359375, -3.719970703125, -3.34283447265625, -2.9656982421875, -2.58856201171875, -2.21142578125, -1.83428955078125, -1.4571533203125, -1.08001708984375, -0.702880859375, -0.32574462890625, 0.0513916015625, 0.42852783203125, 0.8056640625, 1.18280029296875, 1.5599365234375, 1.93707275390625, 2.314208984375, 2.69134521484375, 3.0684814453125, 3.44561767578125, 3.82275390625, 4.19989013671875, 4.5770263671875, 4.95416259765625, 5.331298828125, 5.70843505859375, 6.0855712890625, 6.46270751953125, 6.83984375]}, "gradients/decoder.transformer.h.9.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 4.0, 7.0, 4.0, 17.0, 20.0, 35.0, 59.0, 119.0, 186.0, 421.0, 1033.0, 43193.0, 4128527.0, 19166.0, 798.0, 349.0, 157.0, 101.0, 38.0, 21.0, 11.0, 14.0, 2.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.8125, -34.91796875, -33.0234375, -31.12890625, -29.234375, -27.33984375, -25.4453125, -23.55078125, -21.65625, -19.76171875, -17.8671875, -15.97265625, -14.078125, -12.18359375, -10.2890625, -8.39453125, -6.5, -4.60546875, -2.7109375, -0.81640625, 1.078125, 2.97265625, 4.8671875, 6.76171875, 8.65625, 10.55078125, 12.4453125, 14.33984375, 16.234375, 18.12890625, 20.0234375, 21.91796875, 23.8125, 25.70703125, 27.6015625, 29.49609375, 31.390625, 33.28515625, 35.1796875, 37.07421875, 38.96875, 40.86328125, 42.7578125, 44.65234375, 46.546875, 48.44140625, 50.3359375, 52.23046875, 54.125, 56.01953125, 57.9140625, 59.80859375, 61.703125, 63.59765625, 65.4921875, 67.38671875, 69.28125, 71.17578125, 73.0703125, 74.96484375, 76.859375, 78.75390625, 80.6484375, 82.54296875, 84.4375]}, "gradients/decoder.transformer.h.9.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 15.0, 92.0, 242.0, 389.0, 206.0, 53.0, 12.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-182.96707153320312, -179.25460815429688, -175.54212951660156, -171.8296661376953, -168.11720275878906, -164.40472412109375, -160.6922607421875, -156.97979736328125, -153.26731872558594, -149.5548553466797, -145.84237670898438, -142.12991333007812, -138.41744995117188, -134.70497131347656, -130.9925079345703, -127.28003692626953, -123.56757354736328, -119.8551025390625, -116.14263916015625, -112.43016815185547, -108.71769714355469, -105.00523376464844, -101.29276275634766, -97.58029174804688, -93.86782836914062, -90.15535736083984, -86.4428939819336, -82.73042297363281, -79.01795196533203, -75.30548095703125, -71.593017578125, -67.88054656982422, -64.1680679321289, -60.45560073852539, -56.74312973022461, -53.030662536621094, -49.31819152832031, -45.6057243347168, -41.89325714111328, -38.1807861328125, -34.468318939208984, -30.755849838256836, -27.043380737304688, -23.330913543701172, -19.618444442749023, -15.905975341796875, -12.19350814819336, -8.481039047241211, -4.7685699462890625, -1.0561013221740723, 2.656367301940918, 6.36883544921875, 10.081304550170898, 13.793773651123047, 17.506240844726562, 21.21870994567871, 24.93117904663086, 28.643648147583008, 32.356117248535156, 36.06858444213867, 39.78105163574219, 43.49352264404297, 47.205989837646484, 50.91845703125, 54.63092803955078]}, "gradients/decoder.transformer.h.9.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 5.0, 1.0, 7.0, 6.0, 8.0, 14.0, 10.0, 11.0, 6.0, 9.0, 11.0, 18.0, 19.0, 24.0, 30.0, 25.0, 36.0, 31.0, 33.0, 28.0, 40.0, 29.0, 41.0, 34.0, 45.0, 41.0, 38.0, 39.0, 31.0, 39.0, 37.0, 27.0, 25.0, 23.0, 26.0, 22.0, 26.0, 15.0, 17.0, 16.0, 17.0, 7.0, 7.0, 13.0, 5.0, 6.0, 8.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.64899253845215, -20.984256744384766, -20.319520950317383, -19.654787063598633, -18.99005126953125, -18.325315475463867, -17.660579681396484, -16.9958438873291, -16.33110809326172, -15.666372299194336, -15.00163745880127, -14.336901664733887, -13.67216682434082, -13.007431030273438, -12.342695236206055, -11.677959442138672, -11.013225555419922, -10.348489761352539, -9.683754920959473, -9.01901912689209, -8.354284286499023, -7.689548492431641, -7.024812698364258, -6.360077381134033, -5.695342063903809, -5.030606746673584, -4.365871429443359, -3.7011356353759766, -3.036400318145752, -2.3716650009155273, -1.7069294452667236, -1.04219388961792, -0.3774604797363281, 0.28727495670318604, 0.9520103931427002, 1.6167458295822144, 2.2814812660217285, 2.946216583251953, 3.610952138900757, 4.2756876945495605, 4.940423011779785, 5.60515832901001, 6.269893646240234, 6.934629440307617, 7.599364757537842, 8.264100074768066, 8.92883586883545, 9.593570709228516, 10.258306503295898, 10.923042297363281, 11.587777137756348, 12.25251293182373, 12.917247772216797, 13.58198356628418, 14.246719360351562, 14.911455154418945, 15.576189994812012, 16.240924835205078, 16.90566062927246, 17.570396423339844, 18.235132217407227, 18.89986801147461, 19.56460189819336, 20.229337692260742, 20.894073486328125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 6.0, 7.0, 7.0, 11.0, 7.0, 4.0, 12.0, 11.0, 19.0, 16.0, 20.0, 19.0, 16.0, 25.0, 28.0, 40.0, 31.0, 27.0, 29.0, 38.0, 44.0, 44.0, 34.0, 24.0, 42.0, 35.0, 38.0, 27.0, 38.0, 36.0, 27.0, 26.0, 23.0, 28.0, 22.0, 20.0, 14.0, 19.0, 15.0, 5.0, 13.0, 7.0, 12.0, 12.0, 5.0, 8.0, 6.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-3.814453125, -3.696136474609375, -3.57781982421875, -3.459503173828125, -3.3411865234375, -3.222869873046875, -3.10455322265625, -2.986236572265625, -2.867919921875, -2.749603271484375, -2.63128662109375, -2.512969970703125, -2.3946533203125, -2.276336669921875, -2.15802001953125, -2.039703369140625, -1.92138671875, -1.803070068359375, -1.68475341796875, -1.566436767578125, -1.4481201171875, -1.329803466796875, -1.21148681640625, -1.093170166015625, -0.974853515625, -0.856536865234375, -0.73822021484375, -0.619903564453125, -0.5015869140625, -0.383270263671875, -0.26495361328125, -0.146636962890625, -0.0283203125, 0.089996337890625, 0.20831298828125, 0.326629638671875, 0.4449462890625, 0.563262939453125, 0.68157958984375, 0.799896240234375, 0.918212890625, 1.036529541015625, 1.15484619140625, 1.273162841796875, 1.3914794921875, 1.509796142578125, 1.62811279296875, 1.746429443359375, 1.86474609375, 1.983062744140625, 2.10137939453125, 2.219696044921875, 2.3380126953125, 2.456329345703125, 2.57464599609375, 2.692962646484375, 2.811279296875, 2.929595947265625, 3.04791259765625, 3.166229248046875, 3.2845458984375, 3.402862548828125, 3.52117919921875, 3.639495849609375, 3.7578125]}, "gradients/decoder.transformer.h.9.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 6.0, 4.0, 4.0, 6.0, 8.0, 20.0, 16.0, 38.0, 54.0, 62.0, 102.0, 172.0, 235.0, 341.0, 552.0, 763.0, 1194.0, 1847.0, 2907.0, 4445.0, 6814.0, 10830.0, 17654.0, 29228.0, 50762.0, 93485.0, 216315.0, 329433.0, 123377.0, 63846.0, 36139.0, 21399.0, 13080.0, 8310.0, 5307.0, 3382.0, 2184.0, 1418.0, 947.0, 653.0, 394.0, 276.0, 185.0, 103.0, 82.0, 60.0, 41.0, 28.0, 21.0, 17.0, 6.0, 6.0, 7.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1292724609375, -0.1252574920654297, -0.12124252319335938, -0.11722755432128906, -0.11321258544921875, -0.10919761657714844, -0.10518264770507812, -0.10116767883300781, -0.0971527099609375, -0.09313774108886719, -0.08912277221679688, -0.08510780334472656, -0.08109283447265625, -0.07707786560058594, -0.07306289672851562, -0.06904792785644531, -0.065032958984375, -0.06101799011230469, -0.057003021240234375, -0.05298805236816406, -0.04897308349609375, -0.04495811462402344, -0.040943145751953125, -0.03692817687988281, -0.0329132080078125, -0.028898239135742188, -0.024883270263671875, -0.020868301391601562, -0.01685333251953125, -0.012838363647460938, -0.008823394775390625, -0.0048084259033203125, -0.00079345703125, 0.0032215118408203125, 0.007236480712890625, 0.011251449584960938, 0.01526641845703125, 0.019281387329101562, 0.023296356201171875, 0.027311325073242188, 0.0313262939453125, 0.03534126281738281, 0.039356231689453125, 0.04337120056152344, 0.04738616943359375, 0.05140113830566406, 0.055416107177734375, 0.05943107604980469, 0.063446044921875, 0.06746101379394531, 0.07147598266601562, 0.07549095153808594, 0.07950592041015625, 0.08352088928222656, 0.08753585815429688, 0.09155082702636719, 0.0955657958984375, 0.09958076477050781, 0.10359573364257812, 0.10761070251464844, 0.11162567138671875, 0.11564064025878906, 0.11965560913085938, 0.12367057800292969, 0.127685546875]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 4.0, 1.0, 4.0, 4.0, 7.0, 4.0, 4.0, 7.0, 13.0, 16.0, 19.0, 19.0, 21.0, 20.0, 24.0, 27.0, 37.0, 28.0, 29.0, 56.0, 36.0, 48.0, 44.0, 1065.0, 35.0, 41.0, 37.0, 45.0, 31.0, 32.0, 31.0, 30.0, 25.0, 26.0, 27.0, 12.0, 31.0, 18.0, 15.0, 12.0, 10.0, 13.0, 3.0, 7.0, 4.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.80078125, -2.705902099609375, -2.61102294921875, -2.516143798828125, -2.4212646484375, -2.326385498046875, -2.23150634765625, -2.136627197265625, -2.041748046875, -1.946868896484375, -1.85198974609375, -1.757110595703125, -1.6622314453125, -1.567352294921875, -1.47247314453125, -1.377593994140625, -1.28271484375, -1.187835693359375, -1.09295654296875, -0.998077392578125, -0.9031982421875, -0.808319091796875, -0.71343994140625, -0.618560791015625, -0.523681640625, -0.428802490234375, -0.33392333984375, -0.239044189453125, -0.1441650390625, -0.049285888671875, 0.04559326171875, 0.140472412109375, 0.2353515625, 0.330230712890625, 0.42510986328125, 0.519989013671875, 0.6148681640625, 0.709747314453125, 0.80462646484375, 0.899505615234375, 0.994384765625, 1.089263916015625, 1.18414306640625, 1.279022216796875, 1.3739013671875, 1.468780517578125, 1.56365966796875, 1.658538818359375, 1.75341796875, 1.848297119140625, 1.94317626953125, 2.038055419921875, 2.1329345703125, 2.227813720703125, 2.32269287109375, 2.417572021484375, 2.512451171875, 2.607330322265625, 2.70220947265625, 2.797088623046875, 2.8919677734375, 2.986846923828125, 3.08172607421875, 3.176605224609375, 3.271484375]}, "gradients/decoder.transformer.h.9.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 7.0, 7.0, 15.0, 22.0, 40.0, 51.0, 64.0, 97.0, 138.0, 233.0, 368.0, 523.0, 808.0, 1273.0, 2037.0, 3235.0, 5233.0, 8369.0, 13909.0, 22880.0, 39231.0, 70776.0, 146105.0, 1409080.0, 179795.0, 82373.0, 44368.0, 25716.0, 15229.0, 9422.0, 5695.0, 3596.0, 2248.0, 1437.0, 970.0, 606.0, 391.0, 247.0, 182.0, 124.0, 80.0, 57.0, 23.0, 21.0, 17.0, 12.0, 11.0, 8.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12353515625, -0.11936378479003906, -0.11519241333007812, -0.11102104187011719, -0.10684967041015625, -0.10267829895019531, -0.09850692749023438, -0.09433555603027344, -0.0901641845703125, -0.08599281311035156, -0.08182144165039062, -0.07765007019042969, -0.07347869873046875, -0.06930732727050781, -0.06513595581054688, -0.06096458435058594, -0.056793212890625, -0.05262184143066406, -0.048450469970703125, -0.04427909851074219, -0.04010772705078125, -0.03593635559082031, -0.031764984130859375, -0.027593612670898438, -0.0234222412109375, -0.019250869750976562, -0.015079498291015625, -0.010908126831054688, -0.00673675537109375, -0.0025653839111328125, 0.001605987548828125, 0.0057773590087890625, 0.00994873046875, 0.014120101928710938, 0.018291473388671875, 0.022462844848632812, 0.02663421630859375, 0.030805587768554688, 0.034976959228515625, 0.03914833068847656, 0.0433197021484375, 0.04749107360839844, 0.051662445068359375, 0.05583381652832031, 0.06000518798828125, 0.06417655944824219, 0.06834793090820312, 0.07251930236816406, 0.076690673828125, 0.08086204528808594, 0.08503341674804688, 0.08920478820800781, 0.09337615966796875, 0.09754753112792969, 0.10171890258789062, 0.10589027404785156, 0.1100616455078125, 0.11423301696777344, 0.11840438842773438, 0.12257575988769531, 0.12674713134765625, 0.1309185028076172, 0.13508987426757812, 0.13926124572753906, 0.1434326171875]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 6.0, 7.0, 13.0, 8.0, 6.0, 16.0, 15.0, 23.0, 36.0, 33.0, 31.0, 43.0, 47.0, 58.0, 47.0, 56.0, 58.0, 65.0, 55.0, 36.0, 47.0, 53.0, 51.0, 29.0, 22.0, 22.0, 25.0, 17.0, 11.0, 10.0, 8.0, 10.0, 3.0, 5.0, 3.0, 5.0, 3.0, 2.0, 4.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-8.940696716308594e-06, -8.654780685901642e-06, -8.36886465549469e-06, -8.082948625087738e-06, -7.797032594680786e-06, -7.511116564273834e-06, -7.225200533866882e-06, -6.93928450345993e-06, -6.6533684730529785e-06, -6.367452442646027e-06, -6.081536412239075e-06, -5.795620381832123e-06, -5.509704351425171e-06, -5.223788321018219e-06, -4.937872290611267e-06, -4.651956260204315e-06, -4.366040229797363e-06, -4.080124199390411e-06, -3.7942081689834595e-06, -3.5082921385765076e-06, -3.2223761081695557e-06, -2.9364600777626038e-06, -2.650544047355652e-06, -2.3646280169487e-06, -2.078711986541748e-06, -1.7927959561347961e-06, -1.5068799257278442e-06, -1.2209638953208923e-06, -9.350478649139404e-07, -6.491318345069885e-07, -3.632158041000366e-07, -7.729977369308472e-08, 2.086162567138672e-07, 4.945322871208191e-07, 7.80448317527771e-07, 1.066364347934723e-06, 1.3522803783416748e-06, 1.6381964087486267e-06, 1.9241124391555786e-06, 2.2100284695625305e-06, 2.4959444999694824e-06, 2.7818605303764343e-06, 3.0677765607833862e-06, 3.353692591190338e-06, 3.63960862159729e-06, 3.925524652004242e-06, 4.211440682411194e-06, 4.497356712818146e-06, 4.783272743225098e-06, 5.0691887736320496e-06, 5.3551048040390015e-06, 5.641020834445953e-06, 5.926936864852905e-06, 6.212852895259857e-06, 6.498768925666809e-06, 6.784684956073761e-06, 7.070600986480713e-06, 7.356517016887665e-06, 7.642433047294617e-06, 7.928349077701569e-06, 8.21426510810852e-06, 8.500181138515472e-06, 8.786097168922424e-06, 9.072013199329376e-06, 9.357929229736328e-06]}, "gradients/decoder.transformer.h.9.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 3.0, 2.0, 6.0, 10.0, 6.0, 13.0, 19.0, 8.0, 21.0, 26.0, 40.0, 45.0, 52.0, 105.0, 125.0, 197.0, 336.0, 890.0, 56188.0, 984253.0, 4925.0, 529.0, 241.0, 147.0, 97.0, 63.0, 42.0, 49.0, 25.0, 21.0, 12.0, 15.0, 10.0, 8.0, 6.0, 4.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00019228458404541016, -0.00018650852143764496, -0.00018073245882987976, -0.00017495639622211456, -0.00016918033361434937, -0.00016340427100658417, -0.00015762820839881897, -0.00015185214579105377, -0.00014607608318328857, -0.00014030002057552338, -0.00013452395796775818, -0.00012874789535999298, -0.00012297183275222778, -0.00011719577014446259, -0.00011141970753669739, -0.00010564364492893219, -9.986758232116699e-05, -9.40915197134018e-05, -8.83154571056366e-05, -8.25393944978714e-05, -7.67633318901062e-05, -7.0987269282341e-05, -6.52112066745758e-05, -5.943514406681061e-05, -5.365908145904541e-05, -4.788301885128021e-05, -4.2106956243515015e-05, -3.633089363574982e-05, -3.055483102798462e-05, -2.477876842021942e-05, -1.9002705812454224e-05, -1.3226643204689026e-05, -7.450580596923828e-06, -1.6745179891586304e-06, 4.101544618606567e-06, 9.877607226371765e-06, 1.5653669834136963e-05, 2.142973244190216e-05, 2.720579504966736e-05, 3.2981857657432556e-05, 3.8757920265197754e-05, 4.453398287296295e-05, 5.031004548072815e-05, 5.608610808849335e-05, 6.186217069625854e-05, 6.763823330402374e-05, 7.341429591178894e-05, 7.919035851955414e-05, 8.496642112731934e-05, 9.074248373508453e-05, 9.651854634284973e-05, 0.00010229460895061493, 0.00010807067155838013, 0.00011384673416614532, 0.00011962279677391052, 0.00012539885938167572, 0.00013117492198944092, 0.00013695098459720612, 0.0001427270472049713, 0.0001485031098127365, 0.0001542791724205017, 0.0001600552350282669, 0.0001658312976360321, 0.0001716073602437973, 0.0001773834228515625]}, "gradients/decoder.transformer.h.9.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 12.0, 18.0, 30.0, 58.0, 102.0, 146.0, 171.0, 148.0, 129.0, 90.0, 56.0, 26.0, 13.0, 2.0, 5.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0071190445160028e-05, -9.797990060178563e-06, -9.5247905846918e-06, -9.251590199710336e-06, -8.978390724223573e-06, -8.705190339242108e-06, -8.431990863755345e-06, -8.15879047877388e-06, -7.885590093792416e-06, -7.6123901635583024e-06, -7.339190233324189e-06, -7.065989848342724e-06, -6.7927899181086104e-06, -6.519589987874497e-06, -6.246390057640383e-06, -5.973190127406269e-06, -5.699990197172156e-06, -5.426790266938042e-06, -5.153590336703928e-06, -4.8803904064698145e-06, -4.60719002148835e-06, -4.333990091254236e-06, -4.0607901610201225e-06, -3.7875900034123333e-06, -3.5143900731782196e-06, -3.241190142944106e-06, -2.9679899853363167e-06, -2.694790055102203e-06, -2.4215901248680893e-06, -2.1483899672603e-06, -1.8751900370261865e-06, -1.6019898794183973e-06, -1.3287899491842836e-06, -1.0555899052633322e-06, -7.823899181857996e-07, -5.09189931108267e-07, -2.3598988718731562e-07, 3.721015673363581e-08, 3.104100869677495e-07, 5.836102445755387e-07, 8.568101748096524e-07, 1.1300102187306038e-06, 1.4032102626515552e-06, 1.676410192885669e-06, 1.9496101231197827e-06, 2.222810280727572e-06, 2.4960102109616855e-06, 2.7692103685694747e-06, 3.0424102988035884e-06, 3.315610229037702e-06, 3.5888103866454912e-06, 3.8620100895059295e-06, 4.135210474487394e-06, 4.408410404721508e-06, 4.6816103349556215e-06, 4.954810719937086e-06, 5.228010195423849e-06, 5.501210125657963e-06, 5.774410055892076e-06, 6.047610440873541e-06, 6.320810371107655e-06, 6.594010301341768e-06, 6.867210231575882e-06, 7.140410161809996e-06, 7.41361054679146e-06]}, "gradients/decoder.transformer.h.9.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 6.0, 4.0, 10.0, 7.0, 7.0, 8.0, 13.0, 16.0, 14.0, 18.0, 9.0, 36.0, 17.0, 32.0, 31.0, 26.0, 35.0, 39.0, 43.0, 34.0, 50.0, 28.0, 48.0, 57.0, 36.0, 47.0, 21.0, 41.0, 26.0, 24.0, 31.0, 21.0, 36.0, 22.0, 28.0, 15.0, 16.0, 9.0, 11.0, 7.0, 5.0, 8.0, 5.0, 4.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-4.947185516357422e-06, -4.794448614120483e-06, -4.641711711883545e-06, -4.4889748096466064e-06, -4.336237907409668e-06, -4.1835010051727295e-06, -4.030764102935791e-06, -3.8780272006988525e-06, -3.725290298461914e-06, -3.5725533962249756e-06, -3.419816493988037e-06, -3.2670795917510986e-06, -3.11434268951416e-06, -2.9616057872772217e-06, -2.808868885040283e-06, -2.6561319828033447e-06, -2.5033950805664062e-06, -2.3506581783294678e-06, -2.1979212760925293e-06, -2.045184373855591e-06, -1.8924474716186523e-06, -1.7397105693817139e-06, -1.5869736671447754e-06, -1.434236764907837e-06, -1.2814998626708984e-06, -1.12876296043396e-06, -9.760260581970215e-07, -8.23289155960083e-07, -6.705522537231445e-07, -5.178153514862061e-07, -3.650784492492676e-07, -2.123415470123291e-07, -5.960464477539063e-08, 9.313225746154785e-08, 2.4586915969848633e-07, 3.986060619354248e-07, 5.513429641723633e-07, 7.040798664093018e-07, 8.568167686462402e-07, 1.0095536708831787e-06, 1.1622905731201172e-06, 1.3150274753570557e-06, 1.4677643775939941e-06, 1.6205012798309326e-06, 1.773238182067871e-06, 1.9259750843048096e-06, 2.078711986541748e-06, 2.2314488887786865e-06, 2.384185791015625e-06, 2.5369226932525635e-06, 2.689659595489502e-06, 2.8423964977264404e-06, 2.995133399963379e-06, 3.1478703022003174e-06, 3.300607204437256e-06, 3.4533441066741943e-06, 3.606081008911133e-06, 3.7588179111480713e-06, 3.91155481338501e-06, 4.064291715621948e-06, 4.217028617858887e-06, 4.369765520095825e-06, 4.522502422332764e-06, 4.675239324569702e-06, 4.827976226806641e-06]}, "gradients/decoder.transformer.h.9.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 6.0, 7.0, 7.0, 11.0, 7.0, 4.0, 12.0, 11.0, 19.0, 16.0, 20.0, 19.0, 16.0, 25.0, 28.0, 40.0, 31.0, 27.0, 29.0, 38.0, 44.0, 44.0, 34.0, 24.0, 42.0, 35.0, 38.0, 27.0, 38.0, 36.0, 27.0, 26.0, 23.0, 28.0, 22.0, 20.0, 14.0, 19.0, 15.0, 5.0, 13.0, 7.0, 12.0, 12.0, 5.0, 8.0, 6.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-3.814453125, -3.696136474609375, -3.57781982421875, -3.459503173828125, -3.3411865234375, -3.222869873046875, -3.10455322265625, -2.986236572265625, -2.867919921875, -2.749603271484375, -2.63128662109375, -2.512969970703125, -2.3946533203125, -2.276336669921875, -2.15802001953125, -2.039703369140625, -1.92138671875, -1.803070068359375, -1.68475341796875, -1.566436767578125, -1.4481201171875, -1.329803466796875, -1.21148681640625, -1.093170166015625, -0.974853515625, -0.856536865234375, -0.73822021484375, -0.619903564453125, -0.5015869140625, -0.383270263671875, -0.26495361328125, -0.146636962890625, -0.0283203125, 0.089996337890625, 0.20831298828125, 0.326629638671875, 0.4449462890625, 0.563262939453125, 0.68157958984375, 0.799896240234375, 0.918212890625, 1.036529541015625, 1.15484619140625, 1.273162841796875, 1.3914794921875, 1.509796142578125, 1.62811279296875, 1.746429443359375, 1.86474609375, 1.983062744140625, 2.10137939453125, 2.219696044921875, 2.3380126953125, 2.456329345703125, 2.57464599609375, 2.692962646484375, 2.811279296875, 2.929595947265625, 3.04791259765625, 3.166229248046875, 3.2845458984375, 3.402862548828125, 3.52117919921875, 3.639495849609375, 3.7578125]}, "gradients/decoder.transformer.h.9.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 2.0, 3.0, 3.0, 2.0, 2.0, 9.0, 8.0, 8.0, 16.0, 14.0, 6.0, 21.0, 22.0, 40.0, 38.0, 53.0, 58.0, 64.0, 106.0, 168.0, 239.0, 360.0, 626.0, 1468.0, 3932.0, 12989.0, 51559.0, 256725.0, 581603.0, 103064.0, 24035.0, 6676.0, 2206.0, 948.0, 501.0, 261.0, 170.0, 120.0, 95.0, 67.0, 55.0, 42.0, 44.0, 27.0, 19.0, 24.0, 10.0, 16.0, 14.0, 7.0, 9.0, 7.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-6.01953125, -5.832763671875, -5.64599609375, -5.459228515625, -5.2724609375, -5.085693359375, -4.89892578125, -4.712158203125, -4.525390625, -4.338623046875, -4.15185546875, -3.965087890625, -3.7783203125, -3.591552734375, -3.40478515625, -3.218017578125, -3.03125, -2.844482421875, -2.65771484375, -2.470947265625, -2.2841796875, -2.097412109375, -1.91064453125, -1.723876953125, -1.537109375, -1.350341796875, -1.16357421875, -0.976806640625, -0.7900390625, -0.603271484375, -0.41650390625, -0.229736328125, -0.04296875, 0.143798828125, 0.33056640625, 0.517333984375, 0.7041015625, 0.890869140625, 1.07763671875, 1.264404296875, 1.451171875, 1.637939453125, 1.82470703125, 2.011474609375, 2.1982421875, 2.385009765625, 2.57177734375, 2.758544921875, 2.9453125, 3.132080078125, 3.31884765625, 3.505615234375, 3.6923828125, 3.879150390625, 4.06591796875, 4.252685546875, 4.439453125, 4.626220703125, 4.81298828125, 4.999755859375, 5.1865234375, 5.373291015625, 5.56005859375, 5.746826171875, 5.93359375]}, "gradients/decoder.transformer.h.9.attn.c_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 4.0, 1.0, 4.0, 5.0, 2.0, 7.0, 8.0, 12.0, 8.0, 13.0, 16.0, 21.0, 21.0, 26.0, 21.0, 35.0, 31.0, 38.0, 45.0, 43.0, 47.0, 87.0, 128.0, 1641.0, 247.0, 104.0, 65.0, 45.0, 37.0, 35.0, 53.0, 31.0, 37.0, 22.0, 19.0, 16.0, 15.0, 14.0, 10.0, 7.0, 8.0, 10.0, 8.0, 3.0, 5.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.78125, -11.3583984375, -10.935546875, -10.5126953125, -10.08984375, -9.6669921875, -9.244140625, -8.8212890625, -8.3984375, -7.9755859375, -7.552734375, -7.1298828125, -6.70703125, -6.2841796875, -5.861328125, -5.4384765625, -5.015625, -4.5927734375, -4.169921875, -3.7470703125, -3.32421875, -2.9013671875, -2.478515625, -2.0556640625, -1.6328125, -1.2099609375, -0.787109375, -0.3642578125, 0.05859375, 0.4814453125, 0.904296875, 1.3271484375, 1.75, 2.1728515625, 2.595703125, 3.0185546875, 3.44140625, 3.8642578125, 4.287109375, 4.7099609375, 5.1328125, 5.5556640625, 5.978515625, 6.4013671875, 6.82421875, 7.2470703125, 7.669921875, 8.0927734375, 8.515625, 8.9384765625, 9.361328125, 9.7841796875, 10.20703125, 10.6298828125, 11.052734375, 11.4755859375, 11.8984375, 12.3212890625, 12.744140625, 13.1669921875, 13.58984375, 14.0126953125, 14.435546875, 14.8583984375, 15.28125]}, "gradients/decoder.transformer.h.9.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 6.0, 3.0, 7.0, 10.0, 6.0, 8.0, 9.0, 17.0, 15.0, 20.0, 23.0, 21.0, 39.0, 48.0, 70.0, 92.0, 141.0, 268.0, 638.0, 3128.0, 95154.0, 3027557.0, 15788.0, 1476.0, 469.0, 223.0, 111.0, 73.0, 55.0, 43.0, 27.0, 30.0, 30.0, 21.0, 16.0, 14.0, 10.0, 10.0, 11.0, 4.0, 5.0, 6.0, 3.0, 4.0, 0.0, 3.0, 0.0, 3.0, 2.0], "bins": [-41.9375, -40.7822265625, -39.626953125, -38.4716796875, -37.31640625, -36.1611328125, -35.005859375, -33.8505859375, -32.6953125, -31.5400390625, -30.384765625, -29.2294921875, -28.07421875, -26.9189453125, -25.763671875, -24.6083984375, -23.453125, -22.2978515625, -21.142578125, -19.9873046875, -18.83203125, -17.6767578125, -16.521484375, -15.3662109375, -14.2109375, -13.0556640625, -11.900390625, -10.7451171875, -9.58984375, -8.4345703125, -7.279296875, -6.1240234375, -4.96875, -3.8134765625, -2.658203125, -1.5029296875, -0.34765625, 0.8076171875, 1.962890625, 3.1181640625, 4.2734375, 5.4287109375, 6.583984375, 7.7392578125, 8.89453125, 10.0498046875, 11.205078125, 12.3603515625, 13.515625, 14.6708984375, 15.826171875, 16.9814453125, 18.13671875, 19.2919921875, 20.447265625, 21.6025390625, 22.7578125, 23.9130859375, 25.068359375, 26.2236328125, 27.37890625, 28.5341796875, 29.689453125, 30.8447265625, 32.0]}, "gradients/decoder.transformer.h.9.ln_1.weight": {"_type": "histogram", "values": [32.0, 962.0, 24.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.114297866821289, -4.12638521194458, 3.861527442932129, 11.84943962097168, 19.837352752685547, 27.825265884399414, 35.81317901611328, 43.80109405517578, 51.78900146484375, 59.776912689208984, 67.76482391357422, 75.75273895263672, 83.74065399169922, 91.72856140136719, 99.71647644042969, 107.70439147949219, 115.69230651855469, 123.68022155761719, 131.6681365966797, 139.65603637695312, 147.64395141601562, 155.63186645507812, 163.61978149414062, 171.60769653320312, 179.59561157226562, 187.58352661132812, 195.57144165039062, 203.55935668945312, 211.54725646972656, 219.53517150878906, 227.52308654785156, 235.51100158691406, 243.49891662597656, 251.48683166503906, 259.4747314453125, 267.462646484375, 275.4505615234375, 283.4384765625, 291.4263916015625, 299.414306640625, 307.4022216796875, 315.39013671875, 323.3780517578125, 331.365966796875, 339.3538818359375, 347.341796875, 355.3297119140625, 363.317626953125, 371.3055114746094, 379.2934265136719, 387.2813415527344, 395.2692565917969, 403.2571716308594, 411.2450866699219, 419.23297119140625, 427.22088623046875, 435.2088317871094, 443.1967468261719, 451.1846618652344, 459.1725769042969, 467.1604919433594, 475.1484069824219, 483.13629150390625, 491.12420654296875, 499.11212158203125]}, "gradients/decoder.transformer.h.9.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 7.0, 7.0, 8.0, 6.0, 17.0, 17.0, 20.0, 20.0, 18.0, 29.0, 23.0, 32.0, 28.0, 30.0, 33.0, 33.0, 32.0, 47.0, 50.0, 39.0, 42.0, 38.0, 47.0, 38.0, 41.0, 44.0, 38.0, 31.0, 25.0, 27.0, 24.0, 14.0, 25.0, 13.0, 13.0, 12.0, 7.0, 4.0, 6.0, 6.0, 0.0, 9.0, 1.0, 0.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0], "bins": [-43.680580139160156, -42.44108581542969, -41.20159149169922, -39.96209716796875, -38.72260284423828, -37.48311233520508, -36.24361801147461, -35.00412368774414, -33.76462936401367, -32.5251350402832, -31.285640716552734, -30.0461483001709, -28.80665397644043, -27.56715965270996, -26.327667236328125, -25.088172912597656, -23.848678588867188, -22.60918426513672, -21.36968994140625, -20.130197525024414, -18.890703201293945, -17.651208877563477, -16.41171646118164, -15.172222137451172, -13.932727813720703, -12.693233489990234, -11.453740119934082, -10.21424674987793, -8.974752426147461, -7.73525857925415, -6.49576473236084, -5.2562713623046875, -4.016777038574219, -2.777283191680908, -1.5377893447875977, -0.2982954978942871, 0.9411983489990234, 2.180692195892334, 3.4201860427856445, 4.659679412841797, 5.899173736572266, 7.138667583465576, 8.378161430358887, 9.617654800415039, 10.857149124145508, 12.096643447875977, 13.336136817932129, 14.575630187988281, 15.81512451171875, 17.05461883544922, 18.294113159179688, 19.533605575561523, 20.773099899291992, 22.01259422302246, 23.252086639404297, 24.491580963134766, 25.731075286865234, 26.970569610595703, 28.210063934326172, 29.449556350708008, 30.689050674438477, 31.928544998168945, 33.16803741455078, 34.40753173828125, 35.64702606201172]}, "gradients/decoder.transformer.h.8.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 0.0, 7.0, 0.0, 2.0, 3.0, 3.0, 8.0, 9.0, 12.0, 8.0, 8.0, 12.0, 16.0, 18.0, 23.0, 14.0, 17.0, 24.0, 28.0, 36.0, 32.0, 31.0, 31.0, 44.0, 32.0, 34.0, 41.0, 40.0, 33.0, 31.0, 32.0, 43.0, 44.0, 24.0, 29.0, 20.0, 28.0, 36.0, 19.0, 19.0, 10.0, 16.0, 16.0, 15.0, 8.0, 9.0, 15.0, 12.0, 3.0, 4.0, 3.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-3.9765625, -3.84979248046875, -3.7230224609375, -3.59625244140625, -3.469482421875, -3.34271240234375, -3.2159423828125, -3.08917236328125, -2.96240234375, -2.83563232421875, -2.7088623046875, -2.58209228515625, -2.455322265625, -2.32855224609375, -2.2017822265625, -2.07501220703125, -1.9482421875, -1.82147216796875, -1.6947021484375, -1.56793212890625, -1.441162109375, -1.31439208984375, -1.1876220703125, -1.06085205078125, -0.93408203125, -0.80731201171875, -0.6805419921875, -0.55377197265625, -0.427001953125, -0.30023193359375, -0.1734619140625, -0.04669189453125, 0.080078125, 0.20684814453125, 0.3336181640625, 0.46038818359375, 0.587158203125, 0.71392822265625, 0.8406982421875, 0.96746826171875, 1.09423828125, 1.22100830078125, 1.3477783203125, 1.47454833984375, 1.601318359375, 1.72808837890625, 1.8548583984375, 1.98162841796875, 2.1083984375, 2.23516845703125, 2.3619384765625, 2.48870849609375, 2.615478515625, 2.74224853515625, 2.8690185546875, 2.99578857421875, 3.12255859375, 3.24932861328125, 3.3760986328125, 3.50286865234375, 3.629638671875, 3.75640869140625, 3.8831787109375, 4.00994873046875, 4.13671875]}, "gradients/decoder.transformer.h.8.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 4.0, 5.0, 7.0, 7.0, 6.0, 13.0, 18.0, 21.0, 21.0, 32.0, 50.0, 88.0, 108.0, 213.0, 429.0, 727.0, 1395.0, 2726.0, 5558.0, 12374.0, 31326.0, 97078.0, 468889.0, 1880671.0, 1321096.0, 266583.0, 64195.0, 22482.0, 9350.0, 4278.0, 2084.0, 1027.0, 583.0, 296.0, 193.0, 123.0, 64.0, 33.0, 34.0, 22.0, 22.0, 15.0, 8.0, 15.0, 5.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-6.9921875, -6.77252197265625, -6.5528564453125, -6.33319091796875, -6.113525390625, -5.89385986328125, -5.6741943359375, -5.45452880859375, -5.23486328125, -5.01519775390625, -4.7955322265625, -4.57586669921875, -4.356201171875, -4.13653564453125, -3.9168701171875, -3.69720458984375, -3.4775390625, -3.25787353515625, -3.0382080078125, -2.81854248046875, -2.598876953125, -2.37921142578125, -2.1595458984375, -1.93988037109375, -1.72021484375, -1.50054931640625, -1.2808837890625, -1.06121826171875, -0.841552734375, -0.62188720703125, -0.4022216796875, -0.18255615234375, 0.037109375, 0.25677490234375, 0.4764404296875, 0.69610595703125, 0.915771484375, 1.13543701171875, 1.3551025390625, 1.57476806640625, 1.79443359375, 2.01409912109375, 2.2337646484375, 2.45343017578125, 2.673095703125, 2.89276123046875, 3.1124267578125, 3.33209228515625, 3.5517578125, 3.77142333984375, 3.9910888671875, 4.21075439453125, 4.430419921875, 4.65008544921875, 4.8697509765625, 5.08941650390625, 5.30908203125, 5.52874755859375, 5.7484130859375, 5.96807861328125, 6.187744140625, 6.40740966796875, 6.6270751953125, 6.84674072265625, 7.06640625]}, "gradients/decoder.transformer.h.8.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 6.0, 7.0, 14.0, 21.0, 32.0, 82.0, 143.0, 268.0, 481.0, 873.0, 915.0, 535.0, 295.0, 168.0, 96.0, 54.0, 35.0, 24.0, 11.0, 10.0, 8.0, 5.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.6796875, -6.2054443359375, -5.731201171875, -5.2569580078125, -4.78271484375, -4.3084716796875, -3.834228515625, -3.3599853515625, -2.8857421875, -2.4114990234375, -1.937255859375, -1.4630126953125, -0.98876953125, -0.5145263671875, -0.040283203125, 0.4339599609375, 0.908203125, 1.3824462890625, 1.856689453125, 2.3309326171875, 2.80517578125, 3.2794189453125, 3.753662109375, 4.2279052734375, 4.7021484375, 5.1763916015625, 5.650634765625, 6.1248779296875, 6.59912109375, 7.0733642578125, 7.547607421875, 8.0218505859375, 8.49609375, 8.9703369140625, 9.444580078125, 9.9188232421875, 10.39306640625, 10.8673095703125, 11.341552734375, 11.8157958984375, 12.2900390625, 12.7642822265625, 13.238525390625, 13.7127685546875, 14.18701171875, 14.6612548828125, 15.135498046875, 15.6097412109375, 16.083984375, 16.5582275390625, 17.032470703125, 17.5067138671875, 17.98095703125, 18.4552001953125, 18.929443359375, 19.4036865234375, 19.8779296875, 20.3521728515625, 20.826416015625, 21.3006591796875, 21.77490234375, 22.2491455078125, 22.723388671875, 23.1976318359375, 23.671875]}, "gradients/decoder.transformer.h.8.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 10.0, 7.0, 23.0, 31.0, 46.0, 71.0, 164.0, 314.0, 553.0, 1573.0, 37732.0, 4039894.0, 110081.0, 2424.0, 689.0, 320.0, 166.0, 92.0, 41.0, 23.0, 12.0, 4.0, 9.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-66.25, -64.797119140625, -63.34423828125, -61.891357421875, -60.4384765625, -58.985595703125, -57.53271484375, -56.079833984375, -54.626953125, -53.174072265625, -51.72119140625, -50.268310546875, -48.8154296875, -47.362548828125, -45.90966796875, -44.456787109375, -43.00390625, -41.551025390625, -40.09814453125, -38.645263671875, -37.1923828125, -35.739501953125, -34.28662109375, -32.833740234375, -31.380859375, -29.927978515625, -28.47509765625, -27.022216796875, -25.5693359375, -24.116455078125, -22.66357421875, -21.210693359375, -19.7578125, -18.304931640625, -16.85205078125, -15.399169921875, -13.9462890625, -12.493408203125, -11.04052734375, -9.587646484375, -8.134765625, -6.681884765625, -5.22900390625, -3.776123046875, -2.3232421875, -0.870361328125, 0.58251953125, 2.035400390625, 3.48828125, 4.941162109375, 6.39404296875, 7.846923828125, 9.2998046875, 10.752685546875, 12.20556640625, 13.658447265625, 15.111328125, 16.564208984375, 18.01708984375, 19.469970703125, 20.9228515625, 22.375732421875, 23.82861328125, 25.281494140625, 26.734375]}, "gradients/decoder.transformer.h.8.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 16.0, 987.0, 14.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-307.8494567871094, -286.51104736328125, -265.172607421875, -243.8341827392578, -222.49575805664062, -201.15733337402344, -179.81890869140625, -158.48048400878906, -137.14205932617188, -115.80363464355469, -94.4652099609375, -73.12678527832031, -51.788360595703125, -30.449935913085938, -9.11151123046875, 12.226913452148438, 33.565338134765625, 54.90376281738281, 76.2421875, 97.58061218261719, 118.91903686523438, 140.25746154785156, 161.59588623046875, 182.93431091308594, 204.27273559570312, 225.6111602783203, 246.9495849609375, 268.28802490234375, 289.6264343261719, 310.96484375, 332.30328369140625, 353.6417236328125, 374.9801025390625, 396.31854248046875, 417.6569519042969, 438.995361328125, 460.33380126953125, 481.6722412109375, 503.0106506347656, 524.3490600585938, 545.6875, 567.0259399414062, 588.3643798828125, 609.7027587890625, 631.0411987304688, 652.379638671875, 673.718017578125, 695.0564575195312, 716.3948974609375, 737.7333374023438, 759.07177734375, 780.41015625, 801.7485961914062, 823.0870361328125, 844.4254150390625, 865.7638549804688, 887.102294921875, 908.4407348632812, 929.7791748046875, 951.1175537109375, 972.4559936523438, 993.79443359375, 1015.1328125, 1036.4713134765625, 1057.8096923828125]}, "gradients/decoder.transformer.h.8.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 8.0, 8.0, 11.0, 15.0, 21.0, 39.0, 41.0, 68.0, 75.0, 79.0, 70.0, 97.0, 77.0, 83.0, 72.0, 61.0, 54.0, 41.0, 35.0, 26.0, 13.0, 11.0, 10.0, 2.0, 3.0, 0.0, 1.0], "bins": [-84.75094604492188, -82.99827575683594, -81.24560546875, -79.4929428100586, -77.74027252197266, -75.98760223388672, -74.23493194580078, -72.48226928710938, -70.72959899902344, -68.9769287109375, -67.22425842285156, -65.47159576416016, -63.71892547607422, -61.96625518798828, -60.213584899902344, -58.46091842651367, -56.708248138427734, -54.9555778503418, -53.202911376953125, -51.45024108886719, -49.697574615478516, -47.94490432739258, -46.192237854003906, -44.43956756591797, -42.68689727783203, -40.934226989746094, -39.18156051635742, -37.428890228271484, -35.67622375488281, -33.923553466796875, -32.17088317871094, -30.418216705322266, -28.665550231933594, -26.91288185119629, -25.160213470458984, -23.407543182373047, -21.654876708984375, -19.902206420898438, -18.149538040161133, -16.396869659423828, -14.644201278686523, -12.891532897949219, -11.138864517211914, -9.386195182800293, -7.633526802062988, -5.880858421325684, -4.1281890869140625, -2.375520706176758, -0.6228523254394531, 1.1298162937164307, 2.8824849128723145, 4.635153770446777, 6.387822151184082, 8.140490531921387, 9.893159866333008, 11.645828247070312, 13.398496627807617, 15.151165008544922, 16.903833389282227, 18.65650177001953, 20.40917205810547, 22.16183853149414, 23.914508819580078, 25.667177200317383, 27.419845581054688]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 7.0, 7.0, 10.0, 9.0, 13.0, 11.0, 9.0, 17.0, 17.0, 16.0, 24.0, 26.0, 25.0, 27.0, 28.0, 35.0, 35.0, 36.0, 28.0, 39.0, 35.0, 39.0, 40.0, 37.0, 34.0, 32.0, 39.0, 37.0, 28.0, 18.0, 25.0, 32.0, 26.0, 23.0, 19.0, 17.0, 19.0, 13.0, 7.0, 11.0, 7.0, 8.0, 9.0, 5.0, 7.0, 2.0, 4.0, 4.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.91796875, -3.7921142578125, -3.666259765625, -3.5404052734375, -3.41455078125, -3.2886962890625, -3.162841796875, -3.0369873046875, -2.9111328125, -2.7852783203125, -2.659423828125, -2.5335693359375, -2.40771484375, -2.2818603515625, -2.156005859375, -2.0301513671875, -1.904296875, -1.7784423828125, -1.652587890625, -1.5267333984375, -1.40087890625, -1.2750244140625, -1.149169921875, -1.0233154296875, -0.8974609375, -0.7716064453125, -0.645751953125, -0.5198974609375, -0.39404296875, -0.2681884765625, -0.142333984375, -0.0164794921875, 0.109375, 0.2352294921875, 0.361083984375, 0.4869384765625, 0.61279296875, 0.7386474609375, 0.864501953125, 0.9903564453125, 1.1162109375, 1.2420654296875, 1.367919921875, 1.4937744140625, 1.61962890625, 1.7454833984375, 1.871337890625, 1.9971923828125, 2.123046875, 2.2489013671875, 2.374755859375, 2.5006103515625, 2.62646484375, 2.7523193359375, 2.878173828125, 3.0040283203125, 3.1298828125, 3.2557373046875, 3.381591796875, 3.5074462890625, 3.63330078125, 3.7591552734375, 3.885009765625, 4.0108642578125, 4.13671875]}, "gradients/decoder.transformer.h.8.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 5.0, 1.0, 1.0, 2.0, 4.0, 6.0, 5.0, 20.0, 21.0, 23.0, 51.0, 77.0, 121.0, 133.0, 251.0, 363.0, 600.0, 812.0, 1272.0, 1839.0, 2736.0, 4218.0, 5974.0, 9122.0, 13795.0, 20926.0, 33079.0, 54134.0, 94996.0, 192030.0, 295931.0, 128349.0, 69948.0, 41263.0, 26214.0, 16787.0, 11016.0, 7409.0, 4929.0, 3308.0, 2268.0, 1479.0, 1010.0, 686.0, 438.0, 289.0, 217.0, 132.0, 90.0, 66.0, 46.0, 25.0, 20.0, 7.0, 8.0, 9.0, 2.0, 4.0, 4.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.1175537109375, -0.11379051208496094, -0.11002731323242188, -0.10626411437988281, -0.10250091552734375, -0.09873771667480469, -0.09497451782226562, -0.09121131896972656, -0.0874481201171875, -0.08368492126464844, -0.07992172241210938, -0.07615852355957031, -0.07239532470703125, -0.06863212585449219, -0.06486892700195312, -0.06110572814941406, -0.057342529296875, -0.05357933044433594, -0.049816131591796875, -0.04605293273925781, -0.04228973388671875, -0.03852653503417969, -0.034763336181640625, -0.031000137329101562, -0.0272369384765625, -0.023473739624023438, -0.019710540771484375, -0.015947341918945312, -0.01218414306640625, -0.008420944213867188, -0.004657745361328125, -0.0008945465087890625, 0.00286865234375, 0.0066318511962890625, 0.010395050048828125, 0.014158248901367188, 0.01792144775390625, 0.021684646606445312, 0.025447845458984375, 0.029211044311523438, 0.0329742431640625, 0.03673744201660156, 0.040500640869140625, 0.04426383972167969, 0.04802703857421875, 0.05179023742675781, 0.055553436279296875, 0.05931663513183594, 0.063079833984375, 0.06684303283691406, 0.07060623168945312, 0.07436943054199219, 0.07813262939453125, 0.08189582824707031, 0.08565902709960938, 0.08942222595214844, 0.0931854248046875, 0.09694862365722656, 0.10071182250976562, 0.10447502136230469, 0.10823822021484375, 0.11200141906738281, 0.11576461791992188, 0.11952781677246094, 0.123291015625]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 0.0, 2.0, 3.0, 2.0, 4.0, 7.0, 10.0, 7.0, 14.0, 14.0, 13.0, 16.0, 10.0, 12.0, 18.0, 23.0, 20.0, 22.0, 25.0, 33.0, 31.0, 38.0, 41.0, 45.0, 35.0, 39.0, 1059.0, 41.0, 33.0, 48.0, 37.0, 35.0, 18.0, 22.0, 30.0, 31.0, 25.0, 21.0, 19.0, 18.0, 23.0, 16.0, 15.0, 14.0, 5.0, 10.0, 4.0, 7.0, 4.0, 6.0, 5.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.087890625, -2.995941162109375, -2.90399169921875, -2.812042236328125, -2.7200927734375, -2.628143310546875, -2.53619384765625, -2.444244384765625, -2.352294921875, -2.260345458984375, -2.16839599609375, -2.076446533203125, -1.9844970703125, -1.892547607421875, -1.80059814453125, -1.708648681640625, -1.61669921875, -1.524749755859375, -1.43280029296875, -1.340850830078125, -1.2489013671875, -1.156951904296875, -1.06500244140625, -0.973052978515625, -0.881103515625, -0.789154052734375, -0.69720458984375, -0.605255126953125, -0.5133056640625, -0.421356201171875, -0.32940673828125, -0.237457275390625, -0.1455078125, -0.053558349609375, 0.03839111328125, 0.130340576171875, 0.2222900390625, 0.314239501953125, 0.40618896484375, 0.498138427734375, 0.590087890625, 0.682037353515625, 0.77398681640625, 0.865936279296875, 0.9578857421875, 1.049835205078125, 1.14178466796875, 1.233734130859375, 1.32568359375, 1.417633056640625, 1.50958251953125, 1.601531982421875, 1.6934814453125, 1.785430908203125, 1.87738037109375, 1.969329833984375, 2.061279296875, 2.153228759765625, 2.24517822265625, 2.337127685546875, 2.4290771484375, 2.521026611328125, 2.61297607421875, 2.704925537109375, 2.796875]}, "gradients/decoder.transformer.h.8.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 2.0, 5.0, 5.0, 8.0, 23.0, 27.0, 41.0, 61.0, 64.0, 112.0, 137.0, 218.0, 326.0, 539.0, 813.0, 1220.0, 1761.0, 2749.0, 4334.0, 6733.0, 10271.0, 16188.0, 25930.0, 42704.0, 74796.0, 152968.0, 1394055.0, 163749.0, 78642.0, 44418.0, 26700.0, 16868.0, 10811.0, 6792.0, 4477.0, 2985.0, 1941.0, 1236.0, 792.0, 510.0, 359.0, 270.0, 178.0, 116.0, 68.0, 42.0, 21.0, 23.0, 16.0, 11.0, 10.0, 4.0, 7.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.1356201171875, -0.1315765380859375, -0.127532958984375, -0.1234893798828125, -0.11944580078125, -0.1154022216796875, -0.111358642578125, -0.1073150634765625, -0.103271484375, -0.0992279052734375, -0.095184326171875, -0.0911407470703125, -0.08709716796875, -0.0830535888671875, -0.079010009765625, -0.0749664306640625, -0.0709228515625, -0.0668792724609375, -0.062835693359375, -0.0587921142578125, -0.05474853515625, -0.0507049560546875, -0.046661376953125, -0.0426177978515625, -0.03857421875, -0.0345306396484375, -0.030487060546875, -0.0264434814453125, -0.02239990234375, -0.0183563232421875, -0.014312744140625, -0.0102691650390625, -0.0062255859375, -0.0021820068359375, 0.001861572265625, 0.0059051513671875, 0.00994873046875, 0.0139923095703125, 0.018035888671875, 0.0220794677734375, 0.026123046875, 0.0301666259765625, 0.034210205078125, 0.0382537841796875, 0.04229736328125, 0.0463409423828125, 0.050384521484375, 0.0544281005859375, 0.0584716796875, 0.0625152587890625, 0.066558837890625, 0.0706024169921875, 0.07464599609375, 0.0786895751953125, 0.082733154296875, 0.0867767333984375, 0.0908203125, 0.0948638916015625, 0.098907470703125, 0.1029510498046875, 0.10699462890625, 0.1110382080078125, 0.115081787109375, 0.1191253662109375, 0.1231689453125]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 5.0, 6.0, 3.0, 2.0, 7.0, 5.0, 10.0, 14.0, 6.0, 13.0, 12.0, 17.0, 33.0, 23.0, 16.0, 37.0, 24.0, 33.0, 44.0, 37.0, 39.0, 67.0, 38.0, 58.0, 54.0, 47.0, 44.0, 40.0, 45.0, 25.0, 36.0, 25.0, 12.0, 13.0, 20.0, 18.0, 11.0, 12.0, 11.0, 4.0, 9.0, 9.0, 3.0, 4.0, 5.0, 5.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.748603820800781e-06, -7.491558790206909e-06, -7.234513759613037e-06, -6.977468729019165e-06, -6.720423698425293e-06, -6.463378667831421e-06, -6.206333637237549e-06, -5.949288606643677e-06, -5.692243576049805e-06, -5.435198545455933e-06, -5.1781535148620605e-06, -4.9211084842681885e-06, -4.664063453674316e-06, -4.407018423080444e-06, -4.149973392486572e-06, -3.8929283618927e-06, -3.635883331298828e-06, -3.378838300704956e-06, -3.121793270111084e-06, -2.864748239517212e-06, -2.60770320892334e-06, -2.3506581783294678e-06, -2.0936131477355957e-06, -1.8365681171417236e-06, -1.5795230865478516e-06, -1.3224780559539795e-06, -1.0654330253601074e-06, -8.083879947662354e-07, -5.513429641723633e-07, -2.942979335784912e-07, -3.725290298461914e-08, 2.1979212760925293e-07, 4.76837158203125e-07, 7.338821887969971e-07, 9.909272193908691e-07, 1.2479722499847412e-06, 1.5050172805786133e-06, 1.7620623111724854e-06, 2.0191073417663574e-06, 2.2761523723602295e-06, 2.5331974029541016e-06, 2.7902424335479736e-06, 3.0472874641418457e-06, 3.3043324947357178e-06, 3.56137752532959e-06, 3.818422555923462e-06, 4.075467586517334e-06, 4.332512617111206e-06, 4.589557647705078e-06, 4.84660267829895e-06, 5.103647708892822e-06, 5.360692739486694e-06, 5.617737770080566e-06, 5.8747828006744385e-06, 6.1318278312683105e-06, 6.388872861862183e-06, 6.645917892456055e-06, 6.902962923049927e-06, 7.160007953643799e-06, 7.417052984237671e-06, 7.674098014831543e-06, 7.931143045425415e-06, 8.188188076019287e-06, 8.44523310661316e-06, 8.702278137207031e-06]}, "gradients/decoder.transformer.h.8.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 3.0, 2.0, 8.0, 6.0, 4.0, 6.0, 15.0, 17.0, 22.0, 19.0, 21.0, 29.0, 26.0, 40.0, 34.0, 57.0, 68.0, 91.0, 148.0, 231.0, 454.0, 1780.0, 176592.0, 861144.0, 6159.0, 627.0, 266.0, 150.0, 95.0, 90.0, 72.0, 40.0, 44.0, 50.0, 31.0, 14.0, 23.0, 23.0, 7.0, 14.0, 10.0, 5.0, 4.0, 5.0, 5.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 3.0], "bins": [-0.00016808509826660156, -0.00016330741345882416, -0.00015852972865104675, -0.00015375204384326935, -0.00014897435903549194, -0.00014419667422771454, -0.00013941898941993713, -0.00013464130461215973, -0.00012986361980438232, -0.00012508593499660492, -0.00012030825018882751, -0.00011553056538105011, -0.0001107528805732727, -0.0001059751957654953, -0.0001011975109577179, -9.641982614994049e-05, -9.164214134216309e-05, -8.686445653438568e-05, -8.208677172660828e-05, -7.730908691883087e-05, -7.253140211105347e-05, -6.775371730327606e-05, -6.297603249549866e-05, -5.819834768772125e-05, -5.342066287994385e-05, -4.864297807216644e-05, -4.386529326438904e-05, -3.908760845661163e-05, -3.430992364883423e-05, -2.9532238841056824e-05, -2.475455403327942e-05, -1.9976869225502014e-05, -1.519918441772461e-05, -1.0421499609947205e-05, -5.6438148021698e-06, -8.66129994392395e-07, 3.91155481338501e-06, 8.689239621162415e-06, 1.346692442893982e-05, 1.8244609236717224e-05, 2.302229404449463e-05, 2.7799978852272034e-05, 3.257766366004944e-05, 3.735534846782684e-05, 4.213303327560425e-05, 4.691071808338165e-05, 5.168840289115906e-05, 5.646608769893646e-05, 6.124377250671387e-05, 6.602145731449127e-05, 7.079914212226868e-05, 7.557682693004608e-05, 8.035451173782349e-05, 8.513219654560089e-05, 8.99098813533783e-05, 9.46875661611557e-05, 9.94652509689331e-05, 0.00010424293577671051, 0.00010902062058448792, 0.00011379830539226532, 0.00011857599020004272, 0.00012335367500782013, 0.00012813135981559753, 0.00013290904462337494, 0.00013768672943115234]}, "gradients/decoder.transformer.h.8.ln_cross_attn.weight": {"_type": "histogram", "values": [7.0, 39.0, 312.0, 533.0, 120.0, 11.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.340202965773642e-06, -2.318414317414863e-06, -1.296625669056084e-06, -2.74837020697305e-07, 7.46951627661474e-07, 1.768740276020253e-06, 2.790528924379032e-06, 3.812317572737811e-06, 4.83410622109659e-06, 5.855894869455369e-06, 6.877683517814148e-06, 7.899472620920278e-06, 8.921260814531706e-06, 9.943049008143134e-06, 1.0964838111249264e-05, 1.1986627214355394e-05, 1.3008415407966822e-05, 1.403020360157825e-05, 1.505199270468438e-05, 1.607378180779051e-05, 1.709557000140194e-05, 1.8117358195013367e-05, 1.91391482076142e-05, 2.0160936401225626e-05, 2.1182724594837055e-05, 2.2204512788448483e-05, 2.322630098205991e-05, 2.4248090994660743e-05, 2.526987918827217e-05, 2.62916673818836e-05, 2.731345739448443e-05, 2.833524558809586e-05, 2.9357033781707287e-05, 3.0378821975318715e-05, 3.140061016893014e-05, 3.242239836254157e-05, 3.3444186556153e-05, 3.4465978387743235e-05, 3.548776658135466e-05, 3.650955477496609e-05, 3.753134296857752e-05, 3.855313116218895e-05, 3.9574919355800375e-05, 4.0596707549411803e-05, 4.161849938100204e-05, 4.264028757461347e-05, 4.3662075768224895e-05, 4.468386396183632e-05, 4.570565215544775e-05, 4.672744034905918e-05, 4.774922854267061e-05, 4.8771016736282036e-05, 4.9792804929893464e-05, 5.08145967614837e-05, 5.183638495509513e-05, 5.2858173148706555e-05, 5.3879961342317984e-05, 5.490174953592941e-05, 5.592353772954084e-05, 5.694532592315227e-05, 5.79671177547425e-05, 5.898890594835393e-05, 6.001069414196536e-05, 6.103248233557679e-05, 6.205427052918822e-05]}, "gradients/decoder.transformer.h.8.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 6.0, 3.0, 4.0, 5.0, 2.0, 6.0, 19.0, 8.0, 22.0, 19.0, 7.0, 23.0, 22.0, 17.0, 41.0, 32.0, 38.0, 44.0, 28.0, 48.0, 51.0, 26.0, 51.0, 52.0, 34.0, 61.0, 30.0, 34.0, 37.0, 28.0, 29.0, 22.0, 16.0, 22.0, 21.0, 16.0, 17.0, 16.0, 9.0, 9.0, 4.0, 6.0, 6.0, 5.0, 5.0, 3.0, 3.0, 1.0, 4.0, 1.0, 0.0, 2.0], "bins": [-5.4836273193359375e-06, -5.326233804225922e-06, -5.168840289115906e-06, -5.01144677400589e-06, -4.854053258895874e-06, -4.696659743785858e-06, -4.539266228675842e-06, -4.381872713565826e-06, -4.2244791984558105e-06, -4.067085683345795e-06, -3.909692168235779e-06, -3.752298653125763e-06, -3.594905138015747e-06, -3.437511622905731e-06, -3.2801181077957153e-06, -3.1227245926856995e-06, -2.9653310775756836e-06, -2.8079375624656677e-06, -2.650544047355652e-06, -2.493150532245636e-06, -2.33575701713562e-06, -2.1783635020256042e-06, -2.0209699869155884e-06, -1.8635764718055725e-06, -1.7061829566955566e-06, -1.5487894415855408e-06, -1.391395926475525e-06, -1.234002411365509e-06, -1.0766088962554932e-06, -9.192153811454773e-07, -7.618218660354614e-07, -6.044283509254456e-07, -4.470348358154297e-07, -2.896413207054138e-07, -1.3224780559539795e-07, 2.514570951461792e-08, 1.825392246246338e-07, 3.3993273973464966e-07, 4.973262548446655e-07, 6.547197699546814e-07, 8.121132850646973e-07, 9.695068001747131e-07, 1.126900315284729e-06, 1.2842938303947449e-06, 1.4416873455047607e-06, 1.5990808606147766e-06, 1.7564743757247925e-06, 1.9138678908348083e-06, 2.0712614059448242e-06, 2.22865492105484e-06, 2.386048436164856e-06, 2.543441951274872e-06, 2.7008354663848877e-06, 2.8582289814949036e-06, 3.0156224966049194e-06, 3.1730160117149353e-06, 3.330409526824951e-06, 3.487803041934967e-06, 3.645196557044983e-06, 3.8025900721549988e-06, 3.959983587265015e-06, 4.1173771023750305e-06, 4.274770617485046e-06, 4.432164132595062e-06, 4.589557647705078e-06]}, "gradients/decoder.transformer.h.8.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 7.0, 7.0, 10.0, 9.0, 13.0, 11.0, 9.0, 17.0, 17.0, 16.0, 24.0, 26.0, 25.0, 27.0, 28.0, 35.0, 35.0, 36.0, 28.0, 39.0, 35.0, 39.0, 40.0, 37.0, 34.0, 32.0, 39.0, 37.0, 28.0, 18.0, 25.0, 32.0, 26.0, 23.0, 19.0, 17.0, 19.0, 13.0, 7.0, 11.0, 7.0, 8.0, 9.0, 5.0, 7.0, 2.0, 4.0, 4.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.91796875, -3.7921142578125, -3.666259765625, -3.5404052734375, -3.41455078125, -3.2886962890625, -3.162841796875, -3.0369873046875, -2.9111328125, -2.7852783203125, -2.659423828125, -2.5335693359375, -2.40771484375, -2.2818603515625, -2.156005859375, -2.0301513671875, -1.904296875, -1.7784423828125, -1.652587890625, -1.5267333984375, -1.40087890625, -1.2750244140625, -1.149169921875, -1.0233154296875, -0.8974609375, -0.7716064453125, -0.645751953125, -0.5198974609375, -0.39404296875, -0.2681884765625, -0.142333984375, -0.0164794921875, 0.109375, 0.2352294921875, 0.361083984375, 0.4869384765625, 0.61279296875, 0.7386474609375, 0.864501953125, 0.9903564453125, 1.1162109375, 1.2420654296875, 1.367919921875, 1.4937744140625, 1.61962890625, 1.7454833984375, 1.871337890625, 1.9971923828125, 2.123046875, 2.2489013671875, 2.374755859375, 2.5006103515625, 2.62646484375, 2.7523193359375, 2.878173828125, 3.0040283203125, 3.1298828125, 3.2557373046875, 3.381591796875, 3.5074462890625, 3.63330078125, 3.7591552734375, 3.885009765625, 4.0108642578125, 4.13671875]}, "gradients/decoder.transformer.h.8.attn.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 0.0, 0.0, 3.0, 4.0, 6.0, 8.0, 8.0, 12.0, 19.0, 26.0, 34.0, 56.0, 77.0, 148.0, 231.0, 384.0, 710.0, 1298.0, 2416.0, 4576.0, 8889.0, 17983.0, 37353.0, 85535.0, 225750.0, 383571.0, 158478.0, 63118.0, 28824.0, 14048.0, 7075.0, 3667.0, 1833.0, 1042.0, 545.0, 332.0, 166.0, 113.0, 63.0, 49.0, 33.0, 17.0, 17.0, 16.0, 8.0, 11.0, 3.0, 4.0, 3.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.79296875, -3.6590576171875, -3.525146484375, -3.3912353515625, -3.25732421875, -3.1234130859375, -2.989501953125, -2.8555908203125, -2.7216796875, -2.5877685546875, -2.453857421875, -2.3199462890625, -2.18603515625, -2.0521240234375, -1.918212890625, -1.7843017578125, -1.650390625, -1.5164794921875, -1.382568359375, -1.2486572265625, -1.11474609375, -0.9808349609375, -0.846923828125, -0.7130126953125, -0.5791015625, -0.4451904296875, -0.311279296875, -0.1773681640625, -0.04345703125, 0.0904541015625, 0.224365234375, 0.3582763671875, 0.4921875, 0.6260986328125, 0.760009765625, 0.8939208984375, 1.02783203125, 1.1617431640625, 1.295654296875, 1.4295654296875, 1.5634765625, 1.6973876953125, 1.831298828125, 1.9652099609375, 2.09912109375, 2.2330322265625, 2.366943359375, 2.5008544921875, 2.634765625, 2.7686767578125, 2.902587890625, 3.0364990234375, 3.17041015625, 3.3043212890625, 3.438232421875, 3.5721435546875, 3.7060546875, 3.8399658203125, 3.973876953125, 4.1077880859375, 4.24169921875, 4.3756103515625, 4.509521484375, 4.6434326171875, 4.77734375]}, "gradients/decoder.transformer.h.8.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 3.0, 3.0, 5.0, 6.0, 6.0, 10.0, 8.0, 13.0, 14.0, 22.0, 21.0, 26.0, 19.0, 32.0, 38.0, 53.0, 33.0, 49.0, 73.0, 96.0, 281.0, 1614.0, 161.0, 74.0, 56.0, 52.0, 53.0, 32.0, 37.0, 26.0, 19.0, 28.0, 16.0, 15.0, 12.0, 10.0, 12.0, 7.0, 9.0, 9.0, 3.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.2734375, -14.800537109375, -14.32763671875, -13.854736328125, -13.3818359375, -12.908935546875, -12.43603515625, -11.963134765625, -11.490234375, -11.017333984375, -10.54443359375, -10.071533203125, -9.5986328125, -9.125732421875, -8.65283203125, -8.179931640625, -7.70703125, -7.234130859375, -6.76123046875, -6.288330078125, -5.8154296875, -5.342529296875, -4.86962890625, -4.396728515625, -3.923828125, -3.450927734375, -2.97802734375, -2.505126953125, -2.0322265625, -1.559326171875, -1.08642578125, -0.613525390625, -0.140625, 0.332275390625, 0.80517578125, 1.278076171875, 1.7509765625, 2.223876953125, 2.69677734375, 3.169677734375, 3.642578125, 4.115478515625, 4.58837890625, 5.061279296875, 5.5341796875, 6.007080078125, 6.47998046875, 6.952880859375, 7.42578125, 7.898681640625, 8.37158203125, 8.844482421875, 9.3173828125, 9.790283203125, 10.26318359375, 10.736083984375, 11.208984375, 11.681884765625, 12.15478515625, 12.627685546875, 13.1005859375, 13.573486328125, 14.04638671875, 14.519287109375, 14.9921875]}, "gradients/decoder.transformer.h.8.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 7.0, 6.0, 8.0, 8.0, 16.0, 18.0, 21.0, 35.0, 49.0, 55.0, 91.0, 143.0, 193.0, 342.0, 932.0, 5379.0, 267788.0, 2849264.0, 18533.0, 1555.0, 460.0, 248.0, 177.0, 102.0, 70.0, 46.0, 42.0, 32.0, 28.0, 17.0, 12.0, 10.0, 6.0, 9.0, 2.0, 2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 4.0], "bins": [-33.46875, -32.569091796875, -31.66943359375, -30.769775390625, -29.8701171875, -28.970458984375, -28.07080078125, -27.171142578125, -26.271484375, -25.371826171875, -24.47216796875, -23.572509765625, -22.6728515625, -21.773193359375, -20.87353515625, -19.973876953125, -19.07421875, -18.174560546875, -17.27490234375, -16.375244140625, -15.4755859375, -14.575927734375, -13.67626953125, -12.776611328125, -11.876953125, -10.977294921875, -10.07763671875, -9.177978515625, -8.2783203125, -7.378662109375, -6.47900390625, -5.579345703125, -4.6796875, -3.780029296875, -2.88037109375, -1.980712890625, -1.0810546875, -0.181396484375, 0.71826171875, 1.617919921875, 2.517578125, 3.417236328125, 4.31689453125, 5.216552734375, 6.1162109375, 7.015869140625, 7.91552734375, 8.815185546875, 9.71484375, 10.614501953125, 11.51416015625, 12.413818359375, 13.3134765625, 14.213134765625, 15.11279296875, 16.012451171875, 16.912109375, 17.811767578125, 18.71142578125, 19.611083984375, 20.5107421875, 21.410400390625, 22.31005859375, 23.209716796875, 24.109375]}, "gradients/decoder.transformer.h.8.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 6.0, 153.0, 750.0, 109.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-225.499755859375, -221.02699279785156, -216.55421447753906, -212.08145141601562, -207.60867309570312, -203.1359100341797, -198.66314697265625, -194.19036865234375, -189.7176055908203, -185.24484252929688, -180.77206420898438, -176.29930114746094, -171.82652282714844, -167.353759765625, -162.8809814453125, -158.40821838378906, -153.93545532226562, -149.4626922607422, -144.9899139404297, -140.51715087890625, -136.04437255859375, -131.5716094970703, -127.09883880615234, -122.62606811523438, -118.15328979492188, -113.6805191040039, -109.20774841308594, -104.7349853515625, -100.26221466064453, -95.78944396972656, -91.3166732788086, -86.84390258789062, -82.37113952636719, -77.89836883544922, -73.42559814453125, -68.95283508300781, -64.48006439208984, -60.007293701171875, -55.534523010253906, -51.06175231933594, -46.58898162841797, -42.1162109375, -37.6434440612793, -33.17067337036133, -28.697904586791992, -24.225135803222656, -19.752365112304688, -15.279596328735352, -10.806827545166016, -6.3340582847595215, -1.8612890243530273, 2.611480712890625, 7.084249496459961, 11.557018280029297, 16.029788970947266, 20.5025577545166, 24.975326538085938, 29.448095321655273, 33.92086410522461, 38.39363479614258, 42.86640167236328, 47.33917236328125, 51.81194305419922, 56.28471374511719, 60.75748062133789]}, "gradients/decoder.transformer.h.8.ln_1.bias": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 6.0, 5.0, 12.0, 10.0, 9.0, 11.0, 15.0, 20.0, 23.0, 20.0, 20.0, 21.0, 19.0, 25.0, 32.0, 34.0, 58.0, 48.0, 41.0, 49.0, 34.0, 33.0, 43.0, 35.0, 42.0, 30.0, 45.0, 34.0, 22.0, 15.0, 34.0, 22.0, 20.0, 14.0, 13.0, 22.0, 13.0, 10.0, 11.0, 6.0, 5.0, 7.0, 2.0, 5.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-30.612934112548828, -29.540353775024414, -28.4677734375, -27.395191192626953, -26.32261085510254, -25.250030517578125, -24.177448272705078, -23.104867935180664, -22.03228759765625, -20.959707260131836, -19.887126922607422, -18.814544677734375, -17.74196434020996, -16.669384002685547, -15.596802711486816, -14.524221420288086, -13.451641082763672, -12.379060745239258, -11.306479454040527, -10.233898162841797, -9.161317825317383, -8.088737487792969, -7.016156196594238, -5.943575382232666, -4.870994567871094, -3.7984137535095215, -2.725832939147949, -1.653252124786377, -0.5806713104248047, 0.4919095039367676, 1.5644903182983398, 2.637071132659912, 3.7096481323242188, 4.782228946685791, 5.854809761047363, 6.9273905754089355, 7.999971389770508, 9.072551727294922, 10.145133018493652, 11.217714309692383, 12.290294647216797, 13.362874984741211, 14.435456275939941, 15.508037567138672, 16.580617904663086, 17.6531982421875, 18.725780487060547, 19.79836082458496, 20.870941162109375, 21.94352149963379, 23.016101837158203, 24.08868408203125, 25.161264419555664, 26.233844757080078, 27.306427001953125, 28.37900733947754, 29.451587677001953, 30.524168014526367, 31.59674835205078, 32.66933059692383, 33.741912841796875, 34.814491271972656, 35.8870735168457, 36.959651947021484, 38.03223419189453]}, "gradients/decoder.transformer.h.7.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 8.0, 5.0, 8.0, 3.0, 13.0, 4.0, 18.0, 17.0, 13.0, 15.0, 21.0, 14.0, 20.0, 33.0, 26.0, 32.0, 35.0, 42.0, 37.0, 44.0, 32.0, 34.0, 44.0, 32.0, 40.0, 42.0, 41.0, 30.0, 35.0, 22.0, 35.0, 31.0, 27.0, 24.0, 21.0, 19.0, 12.0, 12.0, 11.0, 7.0, 12.0, 10.0, 5.0, 7.0, 6.0, 2.0, 4.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.01171875, -3.87713623046875, -3.7425537109375, -3.60797119140625, -3.473388671875, -3.33880615234375, -3.2042236328125, -3.06964111328125, -2.93505859375, -2.80047607421875, -2.6658935546875, -2.53131103515625, -2.396728515625, -2.26214599609375, -2.1275634765625, -1.99298095703125, -1.8583984375, -1.72381591796875, -1.5892333984375, -1.45465087890625, -1.320068359375, -1.18548583984375, -1.0509033203125, -0.91632080078125, -0.78173828125, -0.64715576171875, -0.5125732421875, -0.37799072265625, -0.243408203125, -0.10882568359375, 0.0257568359375, 0.16033935546875, 0.294921875, 0.42950439453125, 0.5640869140625, 0.69866943359375, 0.833251953125, 0.96783447265625, 1.1024169921875, 1.23699951171875, 1.37158203125, 1.50616455078125, 1.6407470703125, 1.77532958984375, 1.909912109375, 2.04449462890625, 2.1790771484375, 2.31365966796875, 2.4482421875, 2.58282470703125, 2.7174072265625, 2.85198974609375, 2.986572265625, 3.12115478515625, 3.2557373046875, 3.39031982421875, 3.52490234375, 3.65948486328125, 3.7940673828125, 3.92864990234375, 4.063232421875, 4.19781494140625, 4.3323974609375, 4.46697998046875, 4.6015625]}, "gradients/decoder.transformer.h.7.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 3.0, 5.0, 1.0, 10.0, 8.0, 10.0, 9.0, 6.0, 12.0, 16.0, 23.0, 24.0, 35.0, 41.0, 89.0, 129.0, 273.0, 549.0, 1374.0, 3373.0, 9173.0, 29228.0, 128380.0, 1238531.0, 2450928.0, 262598.0, 47716.0, 13812.0, 4660.0, 1756.0, 710.0, 347.0, 150.0, 82.0, 60.0, 24.0, 20.0, 26.0, 16.0, 9.0, 17.0, 7.0, 7.0, 9.0, 10.0, 8.0, 4.0, 6.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-10.1015625, -9.777587890625, -9.45361328125, -9.129638671875, -8.8056640625, -8.481689453125, -8.15771484375, -7.833740234375, -7.509765625, -7.185791015625, -6.86181640625, -6.537841796875, -6.2138671875, -5.889892578125, -5.56591796875, -5.241943359375, -4.91796875, -4.593994140625, -4.27001953125, -3.946044921875, -3.6220703125, -3.298095703125, -2.97412109375, -2.650146484375, -2.326171875, -2.002197265625, -1.67822265625, -1.354248046875, -1.0302734375, -0.706298828125, -0.38232421875, -0.058349609375, 0.265625, 0.589599609375, 0.91357421875, 1.237548828125, 1.5615234375, 1.885498046875, 2.20947265625, 2.533447265625, 2.857421875, 3.181396484375, 3.50537109375, 3.829345703125, 4.1533203125, 4.477294921875, 4.80126953125, 5.125244140625, 5.44921875, 5.773193359375, 6.09716796875, 6.421142578125, 6.7451171875, 7.069091796875, 7.39306640625, 7.717041015625, 8.041015625, 8.364990234375, 8.68896484375, 9.012939453125, 9.3369140625, 9.660888671875, 9.98486328125, 10.308837890625, 10.6328125]}, "gradients/decoder.transformer.h.7.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 11.0, 17.0, 32.0, 44.0, 50.0, 75.0, 130.0, 208.0, 309.0, 521.0, 734.0, 700.0, 484.0, 284.0, 159.0, 96.0, 70.0, 51.0, 26.0, 25.0, 12.0, 10.0, 6.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-13.8828125, -13.52490234375, -13.1669921875, -12.80908203125, -12.451171875, -12.09326171875, -11.7353515625, -11.37744140625, -11.01953125, -10.66162109375, -10.3037109375, -9.94580078125, -9.587890625, -9.22998046875, -8.8720703125, -8.51416015625, -8.15625, -7.79833984375, -7.4404296875, -7.08251953125, -6.724609375, -6.36669921875, -6.0087890625, -5.65087890625, -5.29296875, -4.93505859375, -4.5771484375, -4.21923828125, -3.861328125, -3.50341796875, -3.1455078125, -2.78759765625, -2.4296875, -2.07177734375, -1.7138671875, -1.35595703125, -0.998046875, -0.64013671875, -0.2822265625, 0.07568359375, 0.43359375, 0.79150390625, 1.1494140625, 1.50732421875, 1.865234375, 2.22314453125, 2.5810546875, 2.93896484375, 3.296875, 3.65478515625, 4.0126953125, 4.37060546875, 4.728515625, 5.08642578125, 5.4443359375, 5.80224609375, 6.16015625, 6.51806640625, 6.8759765625, 7.23388671875, 7.591796875, 7.94970703125, 8.3076171875, 8.66552734375, 9.0234375]}, "gradients/decoder.transformer.h.7.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 6.0, 4.0, 5.0, 8.0, 7.0, 9.0, 12.0, 29.0, 41.0, 75.0, 97.0, 166.0, 254.0, 509.0, 1541.0, 10906.0, 411945.0, 3705448.0, 57820.0, 3653.0, 796.0, 362.0, 214.0, 124.0, 70.0, 60.0, 55.0, 25.0, 18.0, 12.0, 7.0, 7.0, 6.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.90625, -27.76220703125, -26.6181640625, -25.47412109375, -24.330078125, -23.18603515625, -22.0419921875, -20.89794921875, -19.75390625, -18.60986328125, -17.4658203125, -16.32177734375, -15.177734375, -14.03369140625, -12.8896484375, -11.74560546875, -10.6015625, -9.45751953125, -8.3134765625, -7.16943359375, -6.025390625, -4.88134765625, -3.7373046875, -2.59326171875, -1.44921875, -0.30517578125, 0.8388671875, 1.98291015625, 3.126953125, 4.27099609375, 5.4150390625, 6.55908203125, 7.703125, 8.84716796875, 9.9912109375, 11.13525390625, 12.279296875, 13.42333984375, 14.5673828125, 15.71142578125, 16.85546875, 17.99951171875, 19.1435546875, 20.28759765625, 21.431640625, 22.57568359375, 23.7197265625, 24.86376953125, 26.0078125, 27.15185546875, 28.2958984375, 29.43994140625, 30.583984375, 31.72802734375, 32.8720703125, 34.01611328125, 35.16015625, 36.30419921875, 37.4482421875, 38.59228515625, 39.736328125, 40.88037109375, 42.0244140625, 43.16845703125, 44.3125]}, "gradients/decoder.transformer.h.7.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 16.0, 343.0, 608.0, 52.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-364.70697021484375, -356.2657470703125, -347.82452392578125, -339.3833312988281, -330.9421081542969, -322.5008850097656, -314.0596923828125, -305.61846923828125, -297.17724609375, -288.73602294921875, -280.2947998046875, -271.8536071777344, -263.4123840332031, -254.97116088867188, -246.5299530029297, -238.0887451171875, -229.64752197265625, -221.206298828125, -212.7650909423828, -204.32388305664062, -195.88265991210938, -187.44143676757812, -179.00022888183594, -170.55902099609375, -162.1177978515625, -153.67657470703125, -145.23536682128906, -136.79415893554688, -128.35293579101562, -119.9117202758789, -111.47050476074219, -103.02928924560547, -94.58808898925781, -86.1468734741211, -77.70565795898438, -69.26444244384766, -60.82322692871094, -52.38201141357422, -43.9407958984375, -35.49958038330078, -27.058364868164062, -18.617149353027344, -10.175933837890625, -1.7347183227539062, 6.7064971923828125, 15.147712707519531, 23.58892822265625, 32.03014373779297, 40.47135925292969, 48.912574768066406, 57.353790283203125, 65.79500579833984, 74.23622131347656, 82.67743682861328, 91.11865234375, 99.55986785888672, 108.00108337402344, 116.44229888916016, 124.88351440429688, 133.32473754882812, 141.7659454345703, 150.2071533203125, 158.64837646484375, 167.089599609375, 175.5308074951172]}, "gradients/decoder.transformer.h.7.ln_2.bias": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 2.0, 3.0, 1.0, 1.0, 3.0, 4.0, 10.0, 8.0, 10.0, 10.0, 8.0, 12.0, 14.0, 12.0, 19.0, 19.0, 19.0, 25.0, 32.0, 31.0, 26.0, 35.0, 25.0, 39.0, 43.0, 28.0, 34.0, 30.0, 38.0, 43.0, 42.0, 31.0, 37.0, 37.0, 32.0, 21.0, 26.0, 21.0, 23.0, 22.0, 17.0, 25.0, 18.0, 11.0, 13.0, 11.0, 7.0, 5.0, 4.0, 10.0, 3.0, 5.0, 3.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-25.006305694580078, -24.19660758972168, -23.38690948486328, -22.577213287353516, -21.767515182495117, -20.95781707763672, -20.14811897277832, -19.338420867919922, -18.528722763061523, -17.719024658203125, -16.909326553344727, -16.099628448486328, -15.289931297302246, -14.480234146118164, -13.670536041259766, -12.860837936401367, -12.051140785217285, -11.241442680358887, -10.431745529174805, -9.622047424316406, -8.812349319458008, -8.00265121459961, -7.192954063415527, -6.383255958557129, -5.573558330535889, -4.763860702514648, -3.95416259765625, -3.1444649696350098, -2.3347671031951904, -1.525069236755371, -0.7153716087341309, 0.09432649612426758, 0.9040241241455078, 1.7137219905853271, 2.5234198570251465, 3.3331174850463867, 4.142815589904785, 4.952513217926025, 5.762210845947266, 6.571908950805664, 7.381606578826904, 8.191304206848145, 9.001002311706543, 9.810699462890625, 10.620397567749023, 11.430095672607422, 12.23979377746582, 13.049491882324219, 13.8591890335083, 14.6688871383667, 15.478584289550781, 16.28828239440918, 17.097980499267578, 17.907678604125977, 18.717376708984375, 19.52707290649414, 20.33677101135254, 21.146469116210938, 21.956167221069336, 22.765865325927734, 23.5755615234375, 24.3852596282959, 25.194957733154297, 26.004655838012695, 26.814353942871094]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 5.0, 7.0, 10.0, 11.0, 17.0, 25.0, 27.0, 21.0, 26.0, 36.0, 29.0, 44.0, 42.0, 46.0, 50.0, 41.0, 59.0, 52.0, 54.0, 49.0, 50.0, 48.0, 42.0, 33.0, 35.0, 28.0, 21.0, 28.0, 13.0, 12.0, 10.0, 10.0, 3.0, 9.0, 3.0, 1.0, 5.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.35546875, -6.16876220703125, -5.9820556640625, -5.79534912109375, -5.608642578125, -5.42193603515625, -5.2352294921875, -5.04852294921875, -4.86181640625, -4.67510986328125, -4.4884033203125, -4.30169677734375, -4.114990234375, -3.92828369140625, -3.7415771484375, -3.55487060546875, -3.3681640625, -3.18145751953125, -2.9947509765625, -2.80804443359375, -2.621337890625, -2.43463134765625, -2.2479248046875, -2.06121826171875, -1.87451171875, -1.68780517578125, -1.5010986328125, -1.31439208984375, -1.127685546875, -0.94097900390625, -0.7542724609375, -0.56756591796875, -0.380859375, -0.19415283203125, -0.0074462890625, 0.17926025390625, 0.365966796875, 0.55267333984375, 0.7393798828125, 0.92608642578125, 1.11279296875, 1.29949951171875, 1.4862060546875, 1.67291259765625, 1.859619140625, 2.04632568359375, 2.2330322265625, 2.41973876953125, 2.6064453125, 2.79315185546875, 2.9798583984375, 3.16656494140625, 3.353271484375, 3.53997802734375, 3.7266845703125, 3.91339111328125, 4.10009765625, 4.28680419921875, 4.4735107421875, 4.66021728515625, 4.846923828125, 5.03363037109375, 5.2203369140625, 5.40704345703125, 5.59375]}, "gradients/decoder.transformer.h.7.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 2.0, 5.0, 9.0, 14.0, 13.0, 25.0, 21.0, 45.0, 67.0, 103.0, 180.0, 282.0, 478.0, 779.0, 1313.0, 2165.0, 3686.0, 6149.0, 10406.0, 18253.0, 33335.0, 62336.0, 129158.0, 371563.0, 213477.0, 89584.0, 46168.0, 24748.0, 14076.0, 8105.0, 4827.0, 2857.0, 1685.0, 987.0, 631.0, 387.0, 234.0, 138.0, 97.0, 59.0, 36.0, 31.0, 16.0, 14.0, 6.0, 8.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16162109375, -0.1561908721923828, -0.15076065063476562, -0.14533042907714844, -0.13990020751953125, -0.13446998596191406, -0.12903976440429688, -0.12360954284667969, -0.1181793212890625, -0.11274909973144531, -0.10731887817382812, -0.10188865661621094, -0.09645843505859375, -0.09102821350097656, -0.08559799194335938, -0.08016777038574219, -0.074737548828125, -0.06930732727050781, -0.06387710571289062, -0.05844688415527344, -0.05301666259765625, -0.04758644104003906, -0.042156219482421875, -0.03672599792480469, -0.0312957763671875, -0.025865554809570312, -0.020435333251953125, -0.015005111694335938, -0.00957489013671875, -0.0041446685791015625, 0.001285552978515625, 0.0067157745361328125, 0.01214599609375, 0.017576217651367188, 0.023006439208984375, 0.028436660766601562, 0.03386688232421875, 0.03929710388183594, 0.044727325439453125, 0.05015754699707031, 0.0555877685546875, 0.06101799011230469, 0.06644821166992188, 0.07187843322753906, 0.07730865478515625, 0.08273887634277344, 0.08816909790039062, 0.09359931945800781, 0.099029541015625, 0.10445976257324219, 0.10988998413085938, 0.11532020568847656, 0.12075042724609375, 0.12618064880371094, 0.13161087036132812, 0.1370410919189453, 0.1424713134765625, 0.1479015350341797, 0.15333175659179688, 0.15876197814941406, 0.16419219970703125, 0.16962242126464844, 0.17505264282226562, 0.1804828643798828, 0.1859130859375]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 1.0, 5.0, 2.0, 8.0, 5.0, 3.0, 9.0, 17.0, 8.0, 8.0, 10.0, 23.0, 21.0, 17.0, 21.0, 27.0, 25.0, 44.0, 42.0, 45.0, 49.0, 41.0, 32.0, 41.0, 1061.0, 37.0, 49.0, 41.0, 40.0, 30.0, 32.0, 31.0, 39.0, 33.0, 18.0, 17.0, 23.0, 16.0, 13.0, 13.0, 12.0, 8.0, 8.0, 2.0, 5.0, 3.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6015625, -3.498870849609375, -3.39617919921875, -3.293487548828125, -3.1907958984375, -3.088104248046875, -2.98541259765625, -2.882720947265625, -2.780029296875, -2.677337646484375, -2.57464599609375, -2.471954345703125, -2.3692626953125, -2.266571044921875, -2.16387939453125, -2.061187744140625, -1.95849609375, -1.855804443359375, -1.75311279296875, -1.650421142578125, -1.5477294921875, -1.445037841796875, -1.34234619140625, -1.239654541015625, -1.136962890625, -1.034271240234375, -0.93157958984375, -0.828887939453125, -0.7261962890625, -0.623504638671875, -0.52081298828125, -0.418121337890625, -0.3154296875, -0.212738037109375, -0.11004638671875, -0.007354736328125, 0.0953369140625, 0.198028564453125, 0.30072021484375, 0.403411865234375, 0.506103515625, 0.608795166015625, 0.71148681640625, 0.814178466796875, 0.9168701171875, 1.019561767578125, 1.12225341796875, 1.224945068359375, 1.32763671875, 1.430328369140625, 1.53302001953125, 1.635711669921875, 1.7384033203125, 1.841094970703125, 1.94378662109375, 2.046478271484375, 2.149169921875, 2.251861572265625, 2.35455322265625, 2.457244873046875, 2.5599365234375, 2.662628173828125, 2.76531982421875, 2.868011474609375, 2.970703125]}, "gradients/decoder.transformer.h.7.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 6.0, 7.0, 9.0, 14.0, 13.0, 21.0, 50.0, 68.0, 93.0, 135.0, 209.0, 365.0, 575.0, 842.0, 1368.0, 2260.0, 3729.0, 6084.0, 10013.0, 16951.0, 29443.0, 53385.0, 104412.0, 260273.0, 1353308.0, 116984.0, 58433.0, 31942.0, 18084.0, 10961.0, 6537.0, 3958.0, 2488.0, 1522.0, 997.0, 568.0, 363.0, 252.0, 147.0, 86.0, 68.0, 39.0, 29.0, 20.0, 10.0, 9.0, 6.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.1588134765625, -0.15428543090820312, -0.14975738525390625, -0.14522933959960938, -0.1407012939453125, -0.13617324829101562, -0.13164520263671875, -0.12711715698242188, -0.122589111328125, -0.11806106567382812, -0.11353302001953125, -0.10900497436523438, -0.1044769287109375, -0.09994888305664062, -0.09542083740234375, -0.09089279174804688, -0.08636474609375, -0.08183670043945312, -0.07730865478515625, -0.07278060913085938, -0.0682525634765625, -0.06372451782226562, -0.05919647216796875, -0.054668426513671875, -0.050140380859375, -0.045612335205078125, -0.04108428955078125, -0.036556243896484375, -0.0320281982421875, -0.027500152587890625, -0.02297210693359375, -0.018444061279296875, -0.013916015625, -0.009387969970703125, -0.00485992431640625, -0.000331878662109375, 0.0041961669921875, 0.008724212646484375, 0.01325225830078125, 0.017780303955078125, 0.022308349609375, 0.026836395263671875, 0.03136444091796875, 0.035892486572265625, 0.0404205322265625, 0.044948577880859375, 0.04947662353515625, 0.054004669189453125, 0.05853271484375, 0.06306076049804688, 0.06758880615234375, 0.07211685180664062, 0.0766448974609375, 0.08117294311523438, 0.08570098876953125, 0.09022903442382812, 0.094757080078125, 0.09928512573242188, 0.10381317138671875, 0.10834121704101562, 0.1128692626953125, 0.11739730834960938, 0.12192535400390625, 0.12645339965820312, 0.1309814453125]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 7.0, 8.0, 9.0, 13.0, 10.0, 18.0, 17.0, 19.0, 30.0, 35.0, 38.0, 37.0, 59.0, 45.0, 77.0, 63.0, 64.0, 67.0, 48.0, 64.0, 38.0, 40.0, 39.0, 34.0, 21.0, 24.0, 12.0, 12.0, 7.0, 6.0, 8.0, 5.0, 7.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-8.761882781982422e-06, -8.491799235343933e-06, -8.221715688705444e-06, -7.951632142066956e-06, -7.681548595428467e-06, -7.411465048789978e-06, -7.141381502151489e-06, -6.8712979555130005e-06, -6.601214408874512e-06, -6.331130862236023e-06, -6.061047315597534e-06, -5.790963768959045e-06, -5.520880222320557e-06, -5.250796675682068e-06, -4.980713129043579e-06, -4.71062958240509e-06, -4.4405460357666016e-06, -4.170462489128113e-06, -3.900378942489624e-06, -3.6302953958511353e-06, -3.3602118492126465e-06, -3.0901283025741577e-06, -2.820044755935669e-06, -2.54996120929718e-06, -2.2798776626586914e-06, -2.0097941160202026e-06, -1.7397105693817139e-06, -1.469627022743225e-06, -1.1995434761047363e-06, -9.294599294662476e-07, -6.593763828277588e-07, -3.8929283618927e-07, -1.1920928955078125e-07, 1.5087425708770752e-07, 4.209578037261963e-07, 6.910413503646851e-07, 9.611248970031738e-07, 1.2312084436416626e-06, 1.5012919902801514e-06, 1.7713755369186401e-06, 2.041459083557129e-06, 2.3115426301956177e-06, 2.5816261768341064e-06, 2.8517097234725952e-06, 3.121793270111084e-06, 3.3918768167495728e-06, 3.6619603633880615e-06, 3.93204391002655e-06, 4.202127456665039e-06, 4.472211003303528e-06, 4.742294549942017e-06, 5.012378096580505e-06, 5.282461643218994e-06, 5.552545189857483e-06, 5.822628736495972e-06, 6.0927122831344604e-06, 6.362795829772949e-06, 6.632879376411438e-06, 6.902962923049927e-06, 7.1730464696884155e-06, 7.443130016326904e-06, 7.713213562965393e-06, 7.983297109603882e-06, 8.25338065624237e-06, 8.52346420288086e-06]}, "gradients/decoder.transformer.h.7.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 5.0, 3.0, 2.0, 3.0, 2.0, 5.0, 6.0, 5.0, 6.0, 18.0, 8.0, 14.0, 24.0, 34.0, 40.0, 38.0, 44.0, 90.0, 116.0, 169.0, 292.0, 620.0, 12921.0, 1006093.0, 26122.0, 834.0, 314.0, 204.0, 119.0, 93.0, 79.0, 50.0, 30.0, 30.0, 31.0, 11.0, 27.0, 12.0, 15.0, 7.0, 3.0, 9.0, 4.0, 2.0, 3.0, 4.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001481771469116211, -0.0001433286815881729, -0.00013848021626472473, -0.00013363175094127655, -0.00012878328561782837, -0.0001239348202943802, -0.00011908635497093201, -0.00011423788964748383, -0.00010938942432403564, -0.00010454095900058746, -9.969249367713928e-05, -9.48440283536911e-05, -8.999556303024292e-05, -8.514709770679474e-05, -8.029863238334656e-05, -7.545016705989838e-05, -7.06017017364502e-05, -6.575323641300201e-05, -6.090477108955383e-05, -5.605630576610565e-05, -5.120784044265747e-05, -4.635937511920929e-05, -4.151090979576111e-05, -3.666244447231293e-05, -3.1813979148864746e-05, -2.6965513825416565e-05, -2.2117048501968384e-05, -1.7268583178520203e-05, -1.2420117855072021e-05, -7.57165253162384e-06, -2.723187208175659e-06, 2.125278115272522e-06, 6.973743438720703e-06, 1.1822208762168884e-05, 1.6670674085617065e-05, 2.1519139409065247e-05, 2.6367604732513428e-05, 3.121607005596161e-05, 3.606453537940979e-05, 4.091300070285797e-05, 4.576146602630615e-05, 5.0609931349754333e-05, 5.5458396673202515e-05, 6.0306861996650696e-05, 6.515532732009888e-05, 7.000379264354706e-05, 7.485225796699524e-05, 7.970072329044342e-05, 8.45491886138916e-05, 8.939765393733978e-05, 9.424611926078796e-05, 9.909458458423615e-05, 0.00010394304990768433, 0.00010879151523113251, 0.00011363998055458069, 0.00011848844587802887, 0.00012333691120147705, 0.00012818537652492523, 0.0001330338418483734, 0.0001378823071718216, 0.00014273077249526978, 0.00014757923781871796, 0.00015242770314216614, 0.00015727616846561432, 0.0001621246337890625]}, "gradients/decoder.transformer.h.7.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 12.0, 62.0, 184.0, 329.0, 274.0, 108.0, 26.0, 11.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2409125702106394e-05, -1.1907432053703815e-05, -1.1405738405301236e-05, -1.0904043847403955e-05, -1.0402350199001376e-05, -9.900656550598796e-06, -9.398961992701516e-06, -8.897268344298936e-06, -8.395574695896357e-06, -7.893881047493778e-06, -7.392186944343848e-06, -6.890492841193918e-06, -6.388799192791339e-06, -5.88710554438876e-06, -5.38541144123883e-06, -4.8837173380889e-06, -4.382023689686321e-06, -3.880330041283742e-06, -3.3786359381338116e-06, -2.876942062357557e-06, -2.3752481865813024e-06, -1.8735543108050479e-06, -1.3718604350287933e-06, -8.701665592525387e-07, -3.6847268347628415e-07, 1.3322119229997043e-07, 6.34915068076225e-07, 1.1366089438524796e-06, 1.6383028196287341e-06, 2.1399966954049887e-06, 2.6416905711812433e-06, 3.143384446957498e-06, 3.6450783227337524e-06, 4.1467719711363316e-06, 4.6484660742862616e-06, 5.150160177436192e-06, 5.651853825838771e-06, 6.15354747424135e-06, 6.65524157739128e-06, 7.15693568054121e-06, 7.658629328943789e-06, 8.160322977346368e-06, 8.662016625748947e-06, 9.163711183646228e-06, 9.665404832048807e-06, 1.0167098480451386e-05, 1.0668793038348667e-05, 1.1170486686751246e-05, 1.1672180335153826e-05, 1.2173873983556405e-05, 1.2675567631958984e-05, 1.3177262189856265e-05, 1.3678955838258844e-05, 1.4180649486661423e-05, 1.4682344044558704e-05, 1.5184037692961283e-05, 1.5685731341363862e-05, 1.618742498976644e-05, 1.668911863816902e-05, 1.71908122865716e-05, 1.769250593497418e-05, 1.819420140236616e-05, 1.869589505076874e-05, 1.919758869917132e-05, 1.96992823475739e-05]}, "gradients/decoder.transformer.h.7.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 2.0, 3.0, 3.0, 0.0, 1.0, 4.0, 6.0, 4.0, 6.0, 8.0, 9.0, 22.0, 9.0, 21.0, 15.0, 18.0, 29.0, 20.0, 17.0, 19.0, 28.0, 29.0, 36.0, 36.0, 58.0, 32.0, 28.0, 35.0, 45.0, 48.0, 30.0, 34.0, 28.0, 41.0, 23.0, 33.0, 38.0, 33.0, 21.0, 24.0, 25.0, 10.0, 11.0, 5.0, 8.0, 12.0, 6.0, 10.0, 8.0, 4.0, 7.0, 4.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-3.635883331298828e-06, -3.5120174288749695e-06, -3.388151526451111e-06, -3.264285624027252e-06, -3.1404197216033936e-06, -3.016553819179535e-06, -2.8926879167556763e-06, -2.7688220143318176e-06, -2.644956111907959e-06, -2.5210902094841003e-06, -2.3972243070602417e-06, -2.273358404636383e-06, -2.1494925022125244e-06, -2.0256265997886658e-06, -1.9017606973648071e-06, -1.7778947949409485e-06, -1.6540288925170898e-06, -1.5301629900932312e-06, -1.4062970876693726e-06, -1.282431185245514e-06, -1.1585652828216553e-06, -1.0346993803977966e-06, -9.10833477973938e-07, -7.869675755500793e-07, -6.631016731262207e-07, -5.392357707023621e-07, -4.153698682785034e-07, -2.915039658546448e-07, -1.6763806343078613e-07, -4.377216100692749e-08, 8.009374141693115e-08, 2.039596438407898e-07, 3.2782554626464844e-07, 4.516914486885071e-07, 5.755573511123657e-07, 6.994232535362244e-07, 8.23289155960083e-07, 9.471550583839417e-07, 1.0710209608078003e-06, 1.194886863231659e-06, 1.3187527656555176e-06, 1.4426186680793762e-06, 1.5664845705032349e-06, 1.6903504729270935e-06, 1.8142163753509521e-06, 1.938082277774811e-06, 2.0619481801986694e-06, 2.185814082622528e-06, 2.3096799850463867e-06, 2.4335458874702454e-06, 2.557411789894104e-06, 2.6812776923179626e-06, 2.8051435947418213e-06, 2.92900949716568e-06, 3.0528753995895386e-06, 3.1767413020133972e-06, 3.300607204437256e-06, 3.4244731068611145e-06, 3.548339009284973e-06, 3.6722049117088318e-06, 3.7960708141326904e-06, 3.919936716556549e-06, 4.043802618980408e-06, 4.167668521404266e-06, 4.291534423828125e-06]}, "gradients/decoder.transformer.h.7.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 5.0, 7.0, 10.0, 11.0, 17.0, 25.0, 27.0, 21.0, 26.0, 36.0, 29.0, 44.0, 42.0, 46.0, 50.0, 41.0, 59.0, 52.0, 54.0, 49.0, 50.0, 48.0, 42.0, 33.0, 35.0, 28.0, 21.0, 28.0, 13.0, 12.0, 10.0, 10.0, 3.0, 9.0, 3.0, 1.0, 5.0, 4.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.35546875, -6.16876220703125, -5.9820556640625, -5.79534912109375, -5.608642578125, -5.42193603515625, -5.2352294921875, -5.04852294921875, -4.86181640625, -4.67510986328125, -4.4884033203125, -4.30169677734375, -4.114990234375, -3.92828369140625, -3.7415771484375, -3.55487060546875, -3.3681640625, -3.18145751953125, -2.9947509765625, -2.80804443359375, -2.621337890625, -2.43463134765625, -2.2479248046875, -2.06121826171875, -1.87451171875, -1.68780517578125, -1.5010986328125, -1.31439208984375, -1.127685546875, -0.94097900390625, -0.7542724609375, -0.56756591796875, -0.380859375, -0.19415283203125, -0.0074462890625, 0.17926025390625, 0.365966796875, 0.55267333984375, 0.7393798828125, 0.92608642578125, 1.11279296875, 1.29949951171875, 1.4862060546875, 1.67291259765625, 1.859619140625, 2.04632568359375, 2.2330322265625, 2.41973876953125, 2.6064453125, 2.79315185546875, 2.9798583984375, 3.16656494140625, 3.353271484375, 3.53997802734375, 3.7266845703125, 3.91339111328125, 4.10009765625, 4.28680419921875, 4.4735107421875, 4.66021728515625, 4.846923828125, 5.03363037109375, 5.2203369140625, 5.40704345703125, 5.59375]}, "gradients/decoder.transformer.h.7.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 4.0, 6.0, 11.0, 11.0, 17.0, 24.0, 37.0, 36.0, 71.0, 132.0, 274.0, 559.0, 1477.0, 3381.0, 9336.0, 33282.0, 203789.0, 662781.0, 101141.0, 21137.0, 6555.0, 2448.0, 1092.0, 461.0, 210.0, 101.0, 60.0, 33.0, 23.0, 12.0, 15.0, 15.0, 5.0, 6.0, 6.0, 2.0, 4.0, 5.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.6796875, -11.3394775390625, -10.999267578125, -10.6590576171875, -10.31884765625, -9.9786376953125, -9.638427734375, -9.2982177734375, -8.9580078125, -8.6177978515625, -8.277587890625, -7.9373779296875, -7.59716796875, -7.2569580078125, -6.916748046875, -6.5765380859375, -6.236328125, -5.8961181640625, -5.555908203125, -5.2156982421875, -4.87548828125, -4.5352783203125, -4.195068359375, -3.8548583984375, -3.5146484375, -3.1744384765625, -2.834228515625, -2.4940185546875, -2.15380859375, -1.8135986328125, -1.473388671875, -1.1331787109375, -0.79296875, -0.4527587890625, -0.112548828125, 0.2276611328125, 0.56787109375, 0.9080810546875, 1.248291015625, 1.5885009765625, 1.9287109375, 2.2689208984375, 2.609130859375, 2.9493408203125, 3.28955078125, 3.6297607421875, 3.969970703125, 4.3101806640625, 4.650390625, 4.9906005859375, 5.330810546875, 5.6710205078125, 6.01123046875, 6.3514404296875, 6.691650390625, 7.0318603515625, 7.3720703125, 7.7122802734375, 8.052490234375, 8.3927001953125, 8.73291015625, 9.0731201171875, 9.413330078125, 9.7535400390625, 10.09375]}, "gradients/decoder.transformer.h.7.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 3.0, 4.0, 2.0, 4.0, 5.0, 5.0, 7.0, 14.0, 9.0, 17.0, 13.0, 21.0, 22.0, 27.0, 23.0, 30.0, 32.0, 35.0, 31.0, 62.0, 56.0, 80.0, 205.0, 1472.0, 273.0, 117.0, 78.0, 50.0, 42.0, 40.0, 44.0, 44.0, 29.0, 32.0, 19.0, 15.0, 23.0, 19.0, 12.0, 8.0, 4.0, 10.0, 6.0, 5.0, 6.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-14.0, -13.543212890625, -13.08642578125, -12.629638671875, -12.1728515625, -11.716064453125, -11.25927734375, -10.802490234375, -10.345703125, -9.888916015625, -9.43212890625, -8.975341796875, -8.5185546875, -8.061767578125, -7.60498046875, -7.148193359375, -6.69140625, -6.234619140625, -5.77783203125, -5.321044921875, -4.8642578125, -4.407470703125, -3.95068359375, -3.493896484375, -3.037109375, -2.580322265625, -2.12353515625, -1.666748046875, -1.2099609375, -0.753173828125, -0.29638671875, 0.160400390625, 0.6171875, 1.073974609375, 1.53076171875, 1.987548828125, 2.4443359375, 2.901123046875, 3.35791015625, 3.814697265625, 4.271484375, 4.728271484375, 5.18505859375, 5.641845703125, 6.0986328125, 6.555419921875, 7.01220703125, 7.468994140625, 7.92578125, 8.382568359375, 8.83935546875, 9.296142578125, 9.7529296875, 10.209716796875, 10.66650390625, 11.123291015625, 11.580078125, 12.036865234375, 12.49365234375, 12.950439453125, 13.4072265625, 13.864013671875, 14.32080078125, 14.777587890625, 15.234375]}, "gradients/decoder.transformer.h.7.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 5.0, 10.0, 4.0, 15.0, 17.0, 11.0, 20.0, 38.0, 25.0, 62.0, 55.0, 89.0, 131.0, 179.0, 369.0, 1458.0, 12483.0, 2167372.0, 951363.0, 9887.0, 1194.0, 312.0, 184.0, 102.0, 82.0, 58.0, 45.0, 36.0, 30.0, 15.0, 11.0, 9.0, 12.0, 6.0, 8.0, 5.0, 2.0, 5.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-32.40625, -31.2470703125, -30.087890625, -28.9287109375, -27.76953125, -26.6103515625, -25.451171875, -24.2919921875, -23.1328125, -21.9736328125, -20.814453125, -19.6552734375, -18.49609375, -17.3369140625, -16.177734375, -15.0185546875, -13.859375, -12.7001953125, -11.541015625, -10.3818359375, -9.22265625, -8.0634765625, -6.904296875, -5.7451171875, -4.5859375, -3.4267578125, -2.267578125, -1.1083984375, 0.05078125, 1.2099609375, 2.369140625, 3.5283203125, 4.6875, 5.8466796875, 7.005859375, 8.1650390625, 9.32421875, 10.4833984375, 11.642578125, 12.8017578125, 13.9609375, 15.1201171875, 16.279296875, 17.4384765625, 18.59765625, 19.7568359375, 20.916015625, 22.0751953125, 23.234375, 24.3935546875, 25.552734375, 26.7119140625, 27.87109375, 29.0302734375, 30.189453125, 31.3486328125, 32.5078125, 33.6669921875, 34.826171875, 35.9853515625, 37.14453125, 38.3037109375, 39.462890625, 40.6220703125, 41.78125]}, "gradients/decoder.transformer.h.7.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 86.0, 889.0, 43.0], "bins": [-521.0990600585938, -512.7484130859375, -504.3977355957031, -496.04705810546875, -487.6964111328125, -479.3457336425781, -470.99505615234375, -462.6444091796875, -454.2937316894531, -445.94305419921875, -437.5924072265625, -429.2417297363281, -420.89105224609375, -412.5404052734375, -404.1897277832031, -395.8390808105469, -387.4884033203125, -379.1377258300781, -370.7870788574219, -362.4364013671875, -354.08575439453125, -345.7350769042969, -337.3843994140625, -329.03375244140625, -320.6830749511719, -312.3323974609375, -303.98175048828125, -295.6310729980469, -287.2803955078125, -278.92974853515625, -270.5790710449219, -262.2284240722656, -253.8777313232422, -245.52706909179688, -237.1763916015625, -228.8257293701172, -220.47506713867188, -212.12440490722656, -203.77374267578125, -195.42306518554688, -187.07240295410156, -178.72174072265625, -170.37106323242188, -162.02040100097656, -153.66973876953125, -145.31907653808594, -136.96841430664062, -128.61773681640625, -120.26708221435547, -111.91641235351562, -103.56575012207031, -95.215087890625, -86.86441802978516, -78.51374816894531, -70.1630859375, -61.81241989135742, -53.461753845214844, -45.111087799072266, -36.76042175292969, -28.40975570678711, -20.05908966064453, -11.708423614501953, -3.357757568359375, 4.992908477783203, 13.343576431274414]}, "gradients/decoder.transformer.h.7.ln_1.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 4.0, 4.0, 7.0, 6.0, 15.0, 7.0, 15.0, 7.0, 23.0, 19.0, 22.0, 21.0, 27.0, 33.0, 31.0, 33.0, 31.0, 32.0, 35.0, 30.0, 32.0, 47.0, 41.0, 33.0, 33.0, 38.0, 40.0, 26.0, 39.0, 32.0, 36.0, 22.0, 19.0, 35.0, 14.0, 12.0, 9.0, 18.0, 21.0, 10.0, 9.0, 6.0, 7.0, 5.0, 5.0, 4.0, 4.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-39.317787170410156, -38.09545135498047, -36.87311553955078, -35.650779724121094, -34.428443908691406, -33.20610809326172, -31.98377227783203, -30.761436462402344, -29.539100646972656, -28.31676483154297, -27.09442901611328, -25.872093200683594, -24.649757385253906, -23.42742156982422, -22.20508575439453, -20.982749938964844, -19.760412216186523, -18.538076400756836, -17.31574058532715, -16.09340476989746, -14.871068954467773, -13.648733139038086, -12.426396369934082, -11.204060554504395, -9.981724739074707, -8.75938892364502, -7.537053108215332, -6.314716815948486, -5.092381000518799, -3.8700451850891113, -2.6477088928222656, -1.4253730773925781, -0.20303726196289062, 1.0192986726760864, 2.2416346073150635, 3.46397066116333, 4.686306476593018, 5.908642292022705, 7.130978584289551, 8.353314399719238, 9.575650215148926, 10.797986030578613, 12.0203218460083, 13.242658615112305, 14.464994430541992, 15.68733024597168, 16.909666061401367, 18.132001876831055, 19.354337692260742, 20.57667350769043, 21.799009323120117, 23.021345138549805, 24.243680953979492, 25.46601676940918, 26.6883544921875, 27.910690307617188, 29.133026123046875, 30.355361938476562, 31.57769775390625, 32.80003356933594, 34.022369384765625, 35.24470520019531, 36.467041015625, 37.68937683105469, 38.911712646484375]}, "gradients/decoder.transformer.h.6.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 4.0, 8.0, 12.0, 11.0, 18.0, 18.0, 22.0, 27.0, 26.0, 35.0, 35.0, 40.0, 55.0, 50.0, 58.0, 46.0, 46.0, 57.0, 58.0, 51.0, 40.0, 47.0, 41.0, 37.0, 30.0, 26.0, 27.0, 11.0, 19.0, 11.0, 7.0, 11.0, 6.0, 4.0, 5.0, 6.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.796875, -6.595703125, -6.39453125, -6.193359375, -5.9921875, -5.791015625, -5.58984375, -5.388671875, -5.1875, -4.986328125, -4.78515625, -4.583984375, -4.3828125, -4.181640625, -3.98046875, -3.779296875, -3.578125, -3.376953125, -3.17578125, -2.974609375, -2.7734375, -2.572265625, -2.37109375, -2.169921875, -1.96875, -1.767578125, -1.56640625, -1.365234375, -1.1640625, -0.962890625, -0.76171875, -0.560546875, -0.359375, -0.158203125, 0.04296875, 0.244140625, 0.4453125, 0.646484375, 0.84765625, 1.048828125, 1.25, 1.451171875, 1.65234375, 1.853515625, 2.0546875, 2.255859375, 2.45703125, 2.658203125, 2.859375, 3.060546875, 3.26171875, 3.462890625, 3.6640625, 3.865234375, 4.06640625, 4.267578125, 4.46875, 4.669921875, 4.87109375, 5.072265625, 5.2734375, 5.474609375, 5.67578125, 5.876953125, 6.078125]}, "gradients/decoder.transformer.h.6.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 3.0, 11.0, 9.0, 13.0, 17.0, 14.0, 32.0, 22.0, 31.0, 64.0, 105.0, 203.0, 420.0, 1171.0, 3201.0, 10536.0, 43837.0, 333032.0, 2946722.0, 756012.0, 75506.0, 16101.0, 4424.0, 1561.0, 584.0, 279.0, 122.0, 85.0, 47.0, 29.0, 16.0, 15.0, 14.0, 9.0, 11.0, 5.0, 3.0, 3.0, 6.0, 4.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-12.84375, -12.43798828125, -12.0322265625, -11.62646484375, -11.220703125, -10.81494140625, -10.4091796875, -10.00341796875, -9.59765625, -9.19189453125, -8.7861328125, -8.38037109375, -7.974609375, -7.56884765625, -7.1630859375, -6.75732421875, -6.3515625, -5.94580078125, -5.5400390625, -5.13427734375, -4.728515625, -4.32275390625, -3.9169921875, -3.51123046875, -3.10546875, -2.69970703125, -2.2939453125, -1.88818359375, -1.482421875, -1.07666015625, -0.6708984375, -0.26513671875, 0.140625, 0.54638671875, 0.9521484375, 1.35791015625, 1.763671875, 2.16943359375, 2.5751953125, 2.98095703125, 3.38671875, 3.79248046875, 4.1982421875, 4.60400390625, 5.009765625, 5.41552734375, 5.8212890625, 6.22705078125, 6.6328125, 7.03857421875, 7.4443359375, 7.85009765625, 8.255859375, 8.66162109375, 9.0673828125, 9.47314453125, 9.87890625, 10.28466796875, 10.6904296875, 11.09619140625, 11.501953125, 11.90771484375, 12.3134765625, 12.71923828125, 13.125]}, "gradients/decoder.transformer.h.6.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 11.0, 9.0, 10.0, 13.0, 24.0, 30.0, 32.0, 61.0, 87.0, 113.0, 165.0, 212.0, 360.0, 537.0, 710.0, 583.0, 379.0, 246.0, 160.0, 95.0, 69.0, 37.0, 28.0, 23.0, 15.0, 16.0, 14.0, 10.0, 12.0, 5.0, 4.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0], "bins": [-11.6796875, -11.3685302734375, -11.057373046875, -10.7462158203125, -10.43505859375, -10.1239013671875, -9.812744140625, -9.5015869140625, -9.1904296875, -8.8792724609375, -8.568115234375, -8.2569580078125, -7.94580078125, -7.6346435546875, -7.323486328125, -7.0123291015625, -6.701171875, -6.3900146484375, -6.078857421875, -5.7677001953125, -5.45654296875, -5.1453857421875, -4.834228515625, -4.5230712890625, -4.2119140625, -3.9007568359375, -3.589599609375, -3.2784423828125, -2.96728515625, -2.6561279296875, -2.344970703125, -2.0338134765625, -1.72265625, -1.4114990234375, -1.100341796875, -0.7891845703125, -0.47802734375, -0.1668701171875, 0.144287109375, 0.4554443359375, 0.7666015625, 1.0777587890625, 1.388916015625, 1.7000732421875, 2.01123046875, 2.3223876953125, 2.633544921875, 2.9447021484375, 3.255859375, 3.5670166015625, 3.878173828125, 4.1893310546875, 4.50048828125, 4.8116455078125, 5.122802734375, 5.4339599609375, 5.7451171875, 6.0562744140625, 6.367431640625, 6.6785888671875, 6.98974609375, 7.3009033203125, 7.612060546875, 7.9232177734375, 8.234375]}, "gradients/decoder.transformer.h.6.mlp.c_fc.weight": {"_type": "histogram", "values": [4.0, 0.0, 2.0, 1.0, 7.0, 3.0, 2.0, 9.0, 9.0, 8.0, 9.0, 14.0, 20.0, 31.0, 47.0, 37.0, 44.0, 83.0, 106.0, 189.0, 373.0, 699.0, 1792.0, 5894.0, 33563.0, 390677.0, 3450275.0, 275652.0, 26586.0, 4909.0, 1526.0, 657.0, 353.0, 191.0, 142.0, 111.0, 72.0, 53.0, 39.0, 24.0, 23.0, 20.0, 9.0, 8.0, 5.0, 8.0, 7.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.359375, -17.6650390625, -16.970703125, -16.2763671875, -15.58203125, -14.8876953125, -14.193359375, -13.4990234375, -12.8046875, -12.1103515625, -11.416015625, -10.7216796875, -10.02734375, -9.3330078125, -8.638671875, -7.9443359375, -7.25, -6.5556640625, -5.861328125, -5.1669921875, -4.47265625, -3.7783203125, -3.083984375, -2.3896484375, -1.6953125, -1.0009765625, -0.306640625, 0.3876953125, 1.08203125, 1.7763671875, 2.470703125, 3.1650390625, 3.859375, 4.5537109375, 5.248046875, 5.9423828125, 6.63671875, 7.3310546875, 8.025390625, 8.7197265625, 9.4140625, 10.1083984375, 10.802734375, 11.4970703125, 12.19140625, 12.8857421875, 13.580078125, 14.2744140625, 14.96875, 15.6630859375, 16.357421875, 17.0517578125, 17.74609375, 18.4404296875, 19.134765625, 19.8291015625, 20.5234375, 21.2177734375, 21.912109375, 22.6064453125, 23.30078125, 23.9951171875, 24.689453125, 25.3837890625, 26.078125]}, "gradients/decoder.transformer.h.6.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 40.0, 528.0, 430.0, 16.0, 4.0, 2.0, 1.0, 0.0, 1.0], "bins": [-482.6432800292969, -474.1705017089844, -465.6977233886719, -457.2249450683594, -448.7521667480469, -440.2793884277344, -431.8066101074219, -423.3338317871094, -414.8610534667969, -406.3882751464844, -397.9154968261719, -389.4427185058594, -380.9699401855469, -372.4971618652344, -364.0243835449219, -355.5516052246094, -347.0788269042969, -338.6060485839844, -330.1332702636719, -321.6604919433594, -313.1877136230469, -304.7149353027344, -296.2421569824219, -287.7693786621094, -279.2966003417969, -270.8238220214844, -262.3510437011719, -253.87826538085938, -245.40548706054688, -236.93270874023438, -228.45993041992188, -219.98715209960938, -211.514404296875, -203.0416259765625, -194.56884765625, -186.0960693359375, -177.623291015625, -169.1505126953125, -160.677734375, -152.2049560546875, -143.732177734375, -135.2593994140625, -126.78662109375, -118.3138427734375, -109.841064453125, -101.3682861328125, -92.8955078125, -84.4227294921875, -75.949951171875, -67.4771728515625, -59.00439453125, -50.5316162109375, -42.058837890625, -33.5860595703125, -25.11328125, -16.6405029296875, -8.167720794677734, 0.3050575256347656, 8.777835845947266, 17.250614166259766, 25.723392486572266, 34.196170806884766, 42.668949127197266, 51.141727447509766, 59.614505767822266]}, "gradients/decoder.transformer.h.6.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 1.0, 0.0, 7.0, 5.0, 7.0, 8.0, 15.0, 14.0, 13.0, 11.0, 21.0, 25.0, 21.0, 28.0, 34.0, 31.0, 27.0, 36.0, 34.0, 41.0, 40.0, 42.0, 48.0, 37.0, 45.0, 40.0, 43.0, 35.0, 32.0, 40.0, 33.0, 21.0, 25.0, 19.0, 25.0, 20.0, 22.0, 9.0, 14.0, 11.0, 3.0, 5.0, 8.0, 5.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-30.56658935546875, -29.62184715270996, -28.677104949951172, -27.732362747192383, -26.787620544433594, -25.842878341674805, -24.898136138916016, -23.953393936157227, -23.008651733398438, -22.06390953063965, -21.11916732788086, -20.17442512512207, -19.22968292236328, -18.284940719604492, -17.340198516845703, -16.395456314086914, -15.450714111328125, -14.505971908569336, -13.561229705810547, -12.616487503051758, -11.671745300292969, -10.72700309753418, -9.78226089477539, -8.837518692016602, -7.8927764892578125, -6.948034286499023, -6.003292083740234, -5.058549880981445, -4.113807678222656, -3.169065475463867, -2.224323272705078, -1.279581069946289, -0.3348388671875, 0.6099033355712891, 1.5546455383300781, 2.499387741088867, 3.4441299438476562, 4.388872146606445, 5.333614349365234, 6.278356552124023, 7.2230987548828125, 8.167840957641602, 9.11258316040039, 10.05732536315918, 11.002067565917969, 11.946809768676758, 12.891551971435547, 13.836294174194336, 14.781036376953125, 15.725778579711914, 16.670520782470703, 17.615262985229492, 18.56000518798828, 19.50474739074707, 20.44948959350586, 21.39423179626465, 22.338973999023438, 23.283716201782227, 24.228458404541016, 25.173200607299805, 26.117942810058594, 27.062685012817383, 28.007427215576172, 28.95216941833496, 29.89691162109375]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 2.0, 5.0, 8.0, 12.0, 9.0, 17.0, 22.0, 27.0, 17.0, 34.0, 38.0, 36.0, 24.0, 39.0, 42.0, 36.0, 40.0, 43.0, 52.0, 36.0, 44.0, 45.0, 37.0, 43.0, 45.0, 31.0, 34.0, 25.0, 18.0, 23.0, 21.0, 21.0, 11.0, 17.0, 7.0, 9.0, 8.0, 4.0, 6.0, 2.0, 3.0, 4.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.73046875, -5.55682373046875, -5.3831787109375, -5.20953369140625, -5.035888671875, -4.86224365234375, -4.6885986328125, -4.51495361328125, -4.34130859375, -4.16766357421875, -3.9940185546875, -3.82037353515625, -3.646728515625, -3.47308349609375, -3.2994384765625, -3.12579345703125, -2.9521484375, -2.77850341796875, -2.6048583984375, -2.43121337890625, -2.257568359375, -2.08392333984375, -1.9102783203125, -1.73663330078125, -1.56298828125, -1.38934326171875, -1.2156982421875, -1.04205322265625, -0.868408203125, -0.69476318359375, -0.5211181640625, -0.34747314453125, -0.173828125, -0.00018310546875, 0.1734619140625, 0.34710693359375, 0.520751953125, 0.69439697265625, 0.8680419921875, 1.04168701171875, 1.21533203125, 1.38897705078125, 1.5626220703125, 1.73626708984375, 1.909912109375, 2.08355712890625, 2.2572021484375, 2.43084716796875, 2.6044921875, 2.77813720703125, 2.9517822265625, 3.12542724609375, 3.299072265625, 3.47271728515625, 3.6463623046875, 3.82000732421875, 3.99365234375, 4.16729736328125, 4.3409423828125, 4.51458740234375, 4.688232421875, 4.86187744140625, 5.0355224609375, 5.20916748046875, 5.3828125]}, "gradients/decoder.transformer.h.6.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 6.0, 2.0, 2.0, 5.0, 21.0, 11.0, 13.0, 45.0, 37.0, 64.0, 101.0, 128.0, 213.0, 289.0, 463.0, 686.0, 1063.0, 1696.0, 2659.0, 4311.0, 6991.0, 11626.0, 19497.0, 33079.0, 57659.0, 106603.0, 278192.0, 276478.0, 105767.0, 58003.0, 33376.0, 19317.0, 11381.0, 6964.0, 4268.0, 2689.0, 1676.0, 1058.0, 744.0, 441.0, 324.0, 203.0, 140.0, 77.0, 72.0, 41.0, 33.0, 22.0, 6.0, 14.0, 4.0, 5.0, 1.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1605224609375, -0.15534400939941406, -0.15016555786132812, -0.1449871063232422, -0.13980865478515625, -0.1346302032470703, -0.12945175170898438, -0.12427330017089844, -0.1190948486328125, -0.11391639709472656, -0.10873794555664062, -0.10355949401855469, -0.09838104248046875, -0.09320259094238281, -0.08802413940429688, -0.08284568786621094, -0.077667236328125, -0.07248878479003906, -0.06731033325195312, -0.06213188171386719, -0.05695343017578125, -0.05177497863769531, -0.046596527099609375, -0.04141807556152344, -0.0362396240234375, -0.031061172485351562, -0.025882720947265625, -0.020704269409179688, -0.01552581787109375, -0.010347366333007812, -0.005168914794921875, 9.5367431640625e-06, 0.00518798828125, 0.010366439819335938, 0.015544891357421875, 0.020723342895507812, 0.02590179443359375, 0.031080245971679688, 0.036258697509765625, 0.04143714904785156, 0.0466156005859375, 0.05179405212402344, 0.056972503662109375, 0.06215095520019531, 0.06732940673828125, 0.07250785827636719, 0.07768630981445312, 0.08286476135253906, 0.088043212890625, 0.09322166442871094, 0.09840011596679688, 0.10357856750488281, 0.10875701904296875, 0.11393547058105469, 0.11911392211914062, 0.12429237365722656, 0.1294708251953125, 0.13464927673339844, 0.13982772827148438, 0.1450061798095703, 0.15018463134765625, 0.1553630828857422, 0.16054153442382812, 0.16571998596191406, 0.1708984375]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 6.0, 4.0, 4.0, 8.0, 5.0, 10.0, 9.0, 8.0, 14.0, 17.0, 13.0, 21.0, 29.0, 26.0, 25.0, 20.0, 25.0, 35.0, 38.0, 37.0, 38.0, 30.0, 38.0, 33.0, 1060.0, 45.0, 30.0, 33.0, 43.0, 34.0, 31.0, 36.0, 29.0, 28.0, 19.0, 24.0, 17.0, 11.0, 14.0, 21.0, 8.0, 6.0, 16.0, 10.0, 6.0, 8.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-3.3828125, -3.282379150390625, -3.18194580078125, -3.081512451171875, -2.9810791015625, -2.880645751953125, -2.78021240234375, -2.679779052734375, -2.579345703125, -2.478912353515625, -2.37847900390625, -2.278045654296875, -2.1776123046875, -2.077178955078125, -1.97674560546875, -1.876312255859375, -1.77587890625, -1.675445556640625, -1.57501220703125, -1.474578857421875, -1.3741455078125, -1.273712158203125, -1.17327880859375, -1.072845458984375, -0.972412109375, -0.871978759765625, -0.77154541015625, -0.671112060546875, -0.5706787109375, -0.470245361328125, -0.36981201171875, -0.269378662109375, -0.1689453125, -0.068511962890625, 0.03192138671875, 0.132354736328125, 0.2327880859375, 0.333221435546875, 0.43365478515625, 0.534088134765625, 0.634521484375, 0.734954833984375, 0.83538818359375, 0.935821533203125, 1.0362548828125, 1.136688232421875, 1.23712158203125, 1.337554931640625, 1.43798828125, 1.538421630859375, 1.63885498046875, 1.739288330078125, 1.8397216796875, 1.940155029296875, 2.04058837890625, 2.141021728515625, 2.241455078125, 2.341888427734375, 2.44232177734375, 2.542755126953125, 2.6431884765625, 2.743621826171875, 2.84405517578125, 2.944488525390625, 3.044921875]}, "gradients/decoder.transformer.h.6.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 7.0, 8.0, 9.0, 15.0, 27.0, 22.0, 41.0, 66.0, 94.0, 144.0, 237.0, 308.0, 477.0, 716.0, 1088.0, 1752.0, 2662.0, 4168.0, 6338.0, 10042.0, 15751.0, 25464.0, 42617.0, 76081.0, 149364.0, 1384355.0, 169781.0, 83657.0, 46753.0, 27680.0, 17148.0, 10781.0, 6907.0, 4354.0, 2864.0, 1862.0, 1170.0, 818.0, 533.0, 321.0, 220.0, 136.0, 85.0, 77.0, 49.0, 34.0, 23.0, 12.0, 7.0, 3.0, 5.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.1480712890625, -0.1436614990234375, -0.139251708984375, -0.1348419189453125, -0.13043212890625, -0.1260223388671875, -0.121612548828125, -0.1172027587890625, -0.11279296875, -0.1083831787109375, -0.103973388671875, -0.0995635986328125, -0.09515380859375, -0.0907440185546875, -0.086334228515625, -0.0819244384765625, -0.0775146484375, -0.0731048583984375, -0.068695068359375, -0.0642852783203125, -0.05987548828125, -0.0554656982421875, -0.051055908203125, -0.0466461181640625, -0.042236328125, -0.0378265380859375, -0.033416748046875, -0.0290069580078125, -0.02459716796875, -0.0201873779296875, -0.015777587890625, -0.0113677978515625, -0.0069580078125, -0.0025482177734375, 0.001861572265625, 0.0062713623046875, 0.01068115234375, 0.0150909423828125, 0.019500732421875, 0.0239105224609375, 0.0283203125, 0.0327301025390625, 0.037139892578125, 0.0415496826171875, 0.04595947265625, 0.0503692626953125, 0.054779052734375, 0.0591888427734375, 0.0635986328125, 0.0680084228515625, 0.072418212890625, 0.0768280029296875, 0.08123779296875, 0.0856475830078125, 0.090057373046875, 0.0944671630859375, 0.098876953125, 0.1032867431640625, 0.107696533203125, 0.1121063232421875, 0.11651611328125, 0.1209259033203125, 0.125335693359375, 0.1297454833984375, 0.1341552734375]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 4.0, 3.0, 5.0, 4.0, 3.0, 7.0, 3.0, 3.0, 14.0, 10.0, 11.0, 19.0, 12.0, 22.0, 32.0, 18.0, 28.0, 38.0, 33.0, 44.0, 53.0, 44.0, 55.0, 69.0, 50.0, 51.0, 53.0, 41.0, 50.0, 38.0, 28.0, 30.0, 24.0, 15.0, 24.0, 16.0, 12.0, 9.0, 5.0, 6.0, 6.0, 3.0, 6.0, 5.0, 1.0, 0.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.973743438720703e-06, -6.7148357629776e-06, -6.455928087234497e-06, -6.197020411491394e-06, -5.938112735748291e-06, -5.679205060005188e-06, -5.420297384262085e-06, -5.161389708518982e-06, -4.902482032775879e-06, -4.643574357032776e-06, -4.384666681289673e-06, -4.12575900554657e-06, -3.866851329803467e-06, -3.6079436540603638e-06, -3.3490359783172607e-06, -3.0901283025741577e-06, -2.8312206268310547e-06, -2.5723129510879517e-06, -2.3134052753448486e-06, -2.0544975996017456e-06, -1.7955899238586426e-06, -1.5366822481155396e-06, -1.2777745723724365e-06, -1.0188668966293335e-06, -7.599592208862305e-07, -5.010515451431274e-07, -2.421438694000244e-07, 1.6763806343078613e-08, 2.7567148208618164e-07, 5.345791578292847e-07, 7.934868335723877e-07, 1.0523945093154907e-06, 1.3113021850585938e-06, 1.5702098608016968e-06, 1.8291175365447998e-06, 2.088025212287903e-06, 2.346932888031006e-06, 2.605840563774109e-06, 2.864748239517212e-06, 3.123655915260315e-06, 3.382563591003418e-06, 3.641471266746521e-06, 3.900378942489624e-06, 4.159286618232727e-06, 4.41819429397583e-06, 4.677101969718933e-06, 4.936009645462036e-06, 5.194917321205139e-06, 5.453824996948242e-06, 5.712732672691345e-06, 5.971640348434448e-06, 6.230548024177551e-06, 6.489455699920654e-06, 6.748363375663757e-06, 7.00727105140686e-06, 7.266178727149963e-06, 7.525086402893066e-06, 7.78399407863617e-06, 8.042901754379272e-06, 8.301809430122375e-06, 8.560717105865479e-06, 8.819624781608582e-06, 9.078532457351685e-06, 9.337440133094788e-06, 9.59634780883789e-06]}, "gradients/decoder.transformer.h.6.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 5.0, 3.0, 6.0, 11.0, 8.0, 10.0, 9.0, 20.0, 23.0, 24.0, 32.0, 44.0, 61.0, 66.0, 103.0, 133.0, 184.0, 330.0, 786.0, 13924.0, 967895.0, 62374.0, 1176.0, 469.0, 213.0, 133.0, 103.0, 68.0, 74.0, 52.0, 35.0, 41.0, 28.0, 21.0, 15.0, 14.0, 18.0, 11.0, 6.0, 8.0, 8.0, 2.0, 4.0, 6.0, 1.0, 2.0, 1.0], "bins": [-0.00017631053924560547, -0.0001716259866952896, -0.00016694143414497375, -0.0001622568815946579, -0.00015757232904434204, -0.00015288777649402618, -0.00014820322394371033, -0.00014351867139339447, -0.0001388341188430786, -0.00013414956629276276, -0.0001294650137424469, -0.00012478046119213104, -0.00012009590864181519, -0.00011541135609149933, -0.00011072680354118347, -0.00010604225099086761, -0.00010135769844055176, -9.66731458902359e-05, -9.198859333992004e-05, -8.730404078960419e-05, -8.261948823928833e-05, -7.793493568897247e-05, -7.325038313865662e-05, -6.856583058834076e-05, -6.38812780380249e-05, -5.9196725487709045e-05, -5.451217293739319e-05, -4.982762038707733e-05, -4.5143067836761475e-05, -4.045851528644562e-05, -3.577396273612976e-05, -3.1089410185813904e-05, -2.6404857635498047e-05, -2.172030508518219e-05, -1.7035752534866333e-05, -1.2351199984550476e-05, -7.666647434234619e-06, -2.982094883918762e-06, 1.7024576663970947e-06, 6.387010216712952e-06, 1.1071562767028809e-05, 1.5756115317344666e-05, 2.0440667867660522e-05, 2.512522041797638e-05, 2.9809772968292236e-05, 3.449432551860809e-05, 3.917887806892395e-05, 4.386343061923981e-05, 4.8547983169555664e-05, 5.323253571987152e-05, 5.791708827018738e-05, 6.260164082050323e-05, 6.728619337081909e-05, 7.197074592113495e-05, 7.66552984714508e-05, 8.133985102176666e-05, 8.602440357208252e-05, 9.070895612239838e-05, 9.539350867271423e-05, 0.00010007806122303009, 0.00010476261377334595, 0.0001094471663236618, 0.00011413171887397766, 0.00011881627142429352, 0.00012350082397460938]}, "gradients/decoder.transformer.h.6.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 14.0, 57.0, 330.0, 463.0, 137.0, 17.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.564686969388276e-05, -4.4772426917916164e-05, -4.389798050397076e-05, -4.302353772800416e-05, -4.214909131405875e-05, -4.127464853809215e-05, -4.0400202124146745e-05, -3.9525759348180145e-05, -3.865131293423474e-05, -3.777687015826814e-05, -3.690242374432273e-05, -3.602798096835613e-05, -3.5153534554410726e-05, -3.4279091778444126e-05, -3.340464536449872e-05, -3.253020258853212e-05, -3.165575617458671e-05, -3.0781313398620114e-05, -2.9906866984674707e-05, -2.9032422389718704e-05, -2.81579777947627e-05, -2.7283533199806698e-05, -2.6409088604850695e-05, -2.5534645828884095e-05, -2.4660203052917495e-05, -2.3785758457961492e-05, -2.291131386300549e-05, -2.2036869268049486e-05, -2.1162424673093483e-05, -2.028798007813748e-05, -1.9413535483181477e-05, -1.8539092707214877e-05, -1.7664648112258874e-05, -1.679020351730287e-05, -1.5915758922346868e-05, -1.5041314327390864e-05, -1.4166869732434861e-05, -1.3292425137478858e-05, -1.2417981452017557e-05, -1.1543536857061554e-05, -1.066909226210555e-05, -9.794647667149547e-06, -8.920203072193544e-06, -8.045759386732243e-06, -7.171314337028889e-06, -6.296869742072886e-06, -5.422425601864234e-06, -4.5479810069082305e-06, -3.6735364119522274e-06, -2.7990918169962242e-06, -1.9246474494138965e-06, -1.0502030818315689e-06, -1.7575848687556572e-07, 6.986861080804374e-07, 1.5731302482890896e-06, 2.4475748432450928e-06, 3.322019438201096e-06, 4.196464033157099e-06, 5.070908628113102e-06, 5.945352768321754e-06, 6.8197973632777575e-06, 7.69424150348641e-06, 8.568686098442413e-06, 9.443130693398416e-06, 1.031757528835442e-05]}, "gradients/decoder.transformer.h.6.ln_cross_attn.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 3.0, 7.0, 8.0, 13.0, 15.0, 14.0, 16.0, 12.0, 20.0, 18.0, 28.0, 25.0, 40.0, 41.0, 29.0, 52.0, 44.0, 50.0, 40.0, 51.0, 44.0, 43.0, 47.0, 23.0, 50.0, 33.0, 39.0, 17.0, 30.0, 30.0, 20.0, 16.0, 16.0, 13.0, 12.0, 9.0, 7.0, 4.0, 8.0, 3.0, 1.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.589557647705078e-06, -4.43682074546814e-06, -4.284083843231201e-06, -4.131346940994263e-06, -3.978610038757324e-06, -3.825873136520386e-06, -3.6731362342834473e-06, -3.520399332046509e-06, -3.3676624298095703e-06, -3.214925527572632e-06, -3.0621886253356934e-06, -2.909451723098755e-06, -2.7567148208618164e-06, -2.603977918624878e-06, -2.4512410163879395e-06, -2.298504114151001e-06, -2.1457672119140625e-06, -1.993030309677124e-06, -1.8402934074401855e-06, -1.687556505203247e-06, -1.5348196029663086e-06, -1.3820827007293701e-06, -1.2293457984924316e-06, -1.0766088962554932e-06, -9.238719940185547e-07, -7.711350917816162e-07, -6.183981895446777e-07, -4.6566128730773926e-07, -3.129243850708008e-07, -1.601874828338623e-07, -7.450580596923828e-09, 1.4528632164001465e-07, 2.980232238769531e-07, 4.507601261138916e-07, 6.034970283508301e-07, 7.562339305877686e-07, 9.08970832824707e-07, 1.0617077350616455e-06, 1.214444637298584e-06, 1.3671815395355225e-06, 1.519918441772461e-06, 1.6726553440093994e-06, 1.8253922462463379e-06, 1.9781291484832764e-06, 2.130866050720215e-06, 2.2836029529571533e-06, 2.436339855194092e-06, 2.5890767574310303e-06, 2.7418136596679688e-06, 2.8945505619049072e-06, 3.0472874641418457e-06, 3.200024366378784e-06, 3.3527612686157227e-06, 3.505498170852661e-06, 3.6582350730895996e-06, 3.810971975326538e-06, 3.9637088775634766e-06, 4.116445779800415e-06, 4.2691826820373535e-06, 4.421919584274292e-06, 4.5746564865112305e-06, 4.727393388748169e-06, 4.880130290985107e-06, 5.032867193222046e-06, 5.185604095458984e-06]}, "gradients/decoder.transformer.h.6.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 3.0, 2.0, 5.0, 8.0, 12.0, 9.0, 17.0, 22.0, 27.0, 17.0, 34.0, 38.0, 36.0, 24.0, 39.0, 42.0, 36.0, 40.0, 43.0, 52.0, 36.0, 44.0, 45.0, 37.0, 43.0, 45.0, 31.0, 34.0, 25.0, 18.0, 23.0, 21.0, 21.0, 11.0, 17.0, 7.0, 9.0, 8.0, 4.0, 6.0, 2.0, 3.0, 4.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.73046875, -5.55682373046875, -5.3831787109375, -5.20953369140625, -5.035888671875, -4.86224365234375, -4.6885986328125, -4.51495361328125, -4.34130859375, -4.16766357421875, -3.9940185546875, -3.82037353515625, -3.646728515625, -3.47308349609375, -3.2994384765625, -3.12579345703125, -2.9521484375, -2.77850341796875, -2.6048583984375, -2.43121337890625, -2.257568359375, -2.08392333984375, -1.9102783203125, -1.73663330078125, -1.56298828125, -1.38934326171875, -1.2156982421875, -1.04205322265625, -0.868408203125, -0.69476318359375, -0.5211181640625, -0.34747314453125, -0.173828125, -0.00018310546875, 0.1734619140625, 0.34710693359375, 0.520751953125, 0.69439697265625, 0.8680419921875, 1.04168701171875, 1.21533203125, 1.38897705078125, 1.5626220703125, 1.73626708984375, 1.909912109375, 2.08355712890625, 2.2572021484375, 2.43084716796875, 2.6044921875, 2.77813720703125, 2.9517822265625, 3.12542724609375, 3.299072265625, 3.47271728515625, 3.6463623046875, 3.82000732421875, 3.99365234375, 4.16729736328125, 4.3409423828125, 4.51458740234375, 4.688232421875, 4.86187744140625, 5.0355224609375, 5.20916748046875, 5.3828125]}, "gradients/decoder.transformer.h.6.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 6.0, 8.0, 5.0, 4.0, 11.0, 15.0, 18.0, 30.0, 44.0, 53.0, 61.0, 108.0, 154.0, 206.0, 323.0, 529.0, 1040.0, 2078.0, 4587.0, 11745.0, 30771.0, 128213.0, 722895.0, 99606.0, 27192.0, 10284.0, 4248.0, 1862.0, 994.0, 507.0, 317.0, 186.0, 114.0, 104.0, 58.0, 54.0, 38.0, 25.0, 15.0, 12.0, 11.0, 10.0, 9.0, 4.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-12.1640625, -11.80029296875, -11.4365234375, -11.07275390625, -10.708984375, -10.34521484375, -9.9814453125, -9.61767578125, -9.25390625, -8.89013671875, -8.5263671875, -8.16259765625, -7.798828125, -7.43505859375, -7.0712890625, -6.70751953125, -6.34375, -5.97998046875, -5.6162109375, -5.25244140625, -4.888671875, -4.52490234375, -4.1611328125, -3.79736328125, -3.43359375, -3.06982421875, -2.7060546875, -2.34228515625, -1.978515625, -1.61474609375, -1.2509765625, -0.88720703125, -0.5234375, -0.15966796875, 0.2041015625, 0.56787109375, 0.931640625, 1.29541015625, 1.6591796875, 2.02294921875, 2.38671875, 2.75048828125, 3.1142578125, 3.47802734375, 3.841796875, 4.20556640625, 4.5693359375, 4.93310546875, 5.296875, 5.66064453125, 6.0244140625, 6.38818359375, 6.751953125, 7.11572265625, 7.4794921875, 7.84326171875, 8.20703125, 8.57080078125, 8.9345703125, 9.29833984375, 9.662109375, 10.02587890625, 10.3896484375, 10.75341796875, 11.1171875]}, "gradients/decoder.transformer.h.6.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 4.0, 1.0, 6.0, 1.0, 9.0, 11.0, 10.0, 11.0, 15.0, 17.0, 14.0, 23.0, 26.0, 29.0, 24.0, 25.0, 38.0, 43.0, 53.0, 57.0, 88.0, 132.0, 1456.0, 339.0, 127.0, 63.0, 66.0, 58.0, 40.0, 32.0, 30.0, 33.0, 28.0, 20.0, 29.0, 21.0, 13.0, 7.0, 10.0, 12.0, 8.0, 13.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0], "bins": [-18.0, -17.5125732421875, -17.025146484375, -16.5377197265625, -16.05029296875, -15.5628662109375, -15.075439453125, -14.5880126953125, -14.1005859375, -13.6131591796875, -13.125732421875, -12.6383056640625, -12.15087890625, -11.6634521484375, -11.176025390625, -10.6885986328125, -10.201171875, -9.7137451171875, -9.226318359375, -8.7388916015625, -8.25146484375, -7.7640380859375, -7.276611328125, -6.7891845703125, -6.3017578125, -5.8143310546875, -5.326904296875, -4.8394775390625, -4.35205078125, -3.8646240234375, -3.377197265625, -2.8897705078125, -2.40234375, -1.9149169921875, -1.427490234375, -0.9400634765625, -0.45263671875, 0.0347900390625, 0.522216796875, 1.0096435546875, 1.4970703125, 1.9844970703125, 2.471923828125, 2.9593505859375, 3.44677734375, 3.9342041015625, 4.421630859375, 4.9090576171875, 5.396484375, 5.8839111328125, 6.371337890625, 6.8587646484375, 7.34619140625, 7.8336181640625, 8.321044921875, 8.8084716796875, 9.2958984375, 9.7833251953125, 10.270751953125, 10.7581787109375, 11.24560546875, 11.7330322265625, 12.220458984375, 12.7078857421875, 13.1953125]}, "gradients/decoder.transformer.h.6.attn.c_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 3.0, 4.0, 6.0, 4.0, 12.0, 12.0, 15.0, 10.0, 18.0, 21.0, 31.0, 45.0, 40.0, 56.0, 78.0, 90.0, 174.0, 355.0, 1122.0, 8044.0, 218164.0, 2887152.0, 26702.0, 2252.0, 557.0, 209.0, 132.0, 76.0, 52.0, 55.0, 47.0, 34.0, 23.0, 28.0, 12.0, 19.0, 15.0, 11.0, 4.0, 6.0, 2.0, 7.0, 3.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-39.875, -38.59765625, -37.3203125, -36.04296875, -34.765625, -33.48828125, -32.2109375, -30.93359375, -29.65625, -28.37890625, -27.1015625, -25.82421875, -24.546875, -23.26953125, -21.9921875, -20.71484375, -19.4375, -18.16015625, -16.8828125, -15.60546875, -14.328125, -13.05078125, -11.7734375, -10.49609375, -9.21875, -7.94140625, -6.6640625, -5.38671875, -4.109375, -2.83203125, -1.5546875, -0.27734375, 1.0, 2.27734375, 3.5546875, 4.83203125, 6.109375, 7.38671875, 8.6640625, 9.94140625, 11.21875, 12.49609375, 13.7734375, 15.05078125, 16.328125, 17.60546875, 18.8828125, 20.16015625, 21.4375, 22.71484375, 23.9921875, 25.26953125, 26.546875, 27.82421875, 29.1015625, 30.37890625, 31.65625, 32.93359375, 34.2109375, 35.48828125, 36.765625, 38.04296875, 39.3203125, 40.59765625, 41.875]}, "gradients/decoder.transformer.h.6.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 6.0, 9.0, 16.0, 39.0, 60.0, 82.0, 140.0, 150.0, 178.0, 146.0, 90.0, 49.0, 21.0, 14.0, 7.0, 7.0], "bins": [-60.4826774597168, -59.39608383178711, -58.30949020385742, -57.222900390625, -56.13630676269531, -55.049713134765625, -53.96311950683594, -52.87652587890625, -51.78993225097656, -50.703338623046875, -49.61674499511719, -48.530155181884766, -47.44356155395508, -46.35696792602539, -45.2703742980957, -44.183780670166016, -43.097190856933594, -42.010597229003906, -40.92400360107422, -39.8374137878418, -38.75082015991211, -37.66422653198242, -36.577632904052734, -35.49103927612305, -34.404449462890625, -33.31785583496094, -32.23126220703125, -31.144670486450195, -30.05807876586914, -28.971485137939453, -27.884891510009766, -26.798297882080078, -25.71170425415039, -24.625110626220703, -23.53851890563965, -22.45192527770996, -21.365333557128906, -20.27873992919922, -19.19214630126953, -18.105552673339844, -17.018962860107422, -15.93237018585205, -14.84577751159668, -13.759183883666992, -12.672591209411621, -11.58599853515625, -10.499404907226562, -9.412812232971191, -8.326218605041504, -7.239625930786133, -6.1530327796936035, -5.066439628601074, -3.979846954345703, -2.893254280090332, -1.8066611289978027, -0.7200679779052734, 0.36652469635009766, 1.4531176090240479, 2.539710521697998, 3.6263034343719482, 4.712896347045898, 5.7994890213012695, 6.886082172393799, 7.972675323486328, 9.0592679977417]}, "gradients/decoder.transformer.h.6.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 0.0, 1.0, 3.0, 9.0, 9.0, 12.0, 3.0, 12.0, 11.0, 19.0, 19.0, 20.0, 18.0, 29.0, 30.0, 24.0, 26.0, 30.0, 26.0, 35.0, 38.0, 41.0, 47.0, 36.0, 34.0, 43.0, 48.0, 33.0, 44.0, 38.0, 28.0, 23.0, 27.0, 25.0, 34.0, 23.0, 16.0, 16.0, 17.0, 12.0, 12.0, 11.0, 6.0, 6.0, 4.0, 3.0, 2.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-36.11053466796875, -34.922157287597656, -33.73377990722656, -32.545406341552734, -31.35702896118164, -30.168651580810547, -28.980276107788086, -27.791900634765625, -26.60352325439453, -25.415145874023438, -24.226770401000977, -23.038394927978516, -21.850017547607422, -20.661640167236328, -19.473264694213867, -18.284889221191406, -17.096511840820312, -15.908135414123535, -14.719758987426758, -13.53138256072998, -12.343006134033203, -11.154629707336426, -9.966253280639648, -8.777876853942871, -7.589500427246094, -6.401124000549316, -5.212747573852539, -4.024371147155762, -2.8359947204589844, -1.647618293762207, -0.4592418670654297, 0.7291345596313477, 1.917510986328125, 3.1058874130249023, 4.29426383972168, 5.482640266418457, 6.671016693115234, 7.859393119812012, 9.047769546508789, 10.236145973205566, 11.424522399902344, 12.612898826599121, 13.801275253295898, 14.989651679992676, 16.178028106689453, 17.366405487060547, 18.554780960083008, 19.74315643310547, 20.931533813476562, 22.119911193847656, 23.308286666870117, 24.496662139892578, 25.685039520263672, 26.873416900634766, 28.061792373657227, 29.250167846679688, 30.43854522705078, 31.626922607421875, 32.81529998779297, 34.0036735534668, 35.19205093383789, 36.380428314208984, 37.56880187988281, 38.757179260253906, 39.945556640625]}, "gradients/decoder.transformer.h.5.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 8.0, 4.0, 7.0, 6.0, 8.0, 15.0, 17.0, 28.0, 28.0, 28.0, 28.0, 23.0, 39.0, 33.0, 41.0, 42.0, 43.0, 41.0, 39.0, 53.0, 39.0, 37.0, 41.0, 40.0, 44.0, 47.0, 34.0, 25.0, 22.0, 21.0, 11.0, 21.0, 18.0, 11.0, 10.0, 14.0, 5.0, 8.0, 4.0, 5.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.625, -5.4451904296875, -5.265380859375, -5.0855712890625, -4.90576171875, -4.7259521484375, -4.546142578125, -4.3663330078125, -4.1865234375, -4.0067138671875, -3.826904296875, -3.6470947265625, -3.46728515625, -3.2874755859375, -3.107666015625, -2.9278564453125, -2.748046875, -2.5682373046875, -2.388427734375, -2.2086181640625, -2.02880859375, -1.8489990234375, -1.669189453125, -1.4893798828125, -1.3095703125, -1.1297607421875, -0.949951171875, -0.7701416015625, -0.59033203125, -0.4105224609375, -0.230712890625, -0.0509033203125, 0.12890625, 0.3087158203125, 0.488525390625, 0.6683349609375, 0.84814453125, 1.0279541015625, 1.207763671875, 1.3875732421875, 1.5673828125, 1.7471923828125, 1.927001953125, 2.1068115234375, 2.28662109375, 2.4664306640625, 2.646240234375, 2.8260498046875, 3.005859375, 3.1856689453125, 3.365478515625, 3.5452880859375, 3.72509765625, 3.9049072265625, 4.084716796875, 4.2645263671875, 4.4443359375, 4.6241455078125, 4.803955078125, 4.9837646484375, 5.16357421875, 5.3433837890625, 5.523193359375, 5.7030029296875, 5.8828125]}, "gradients/decoder.transformer.h.5.mlp.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 4.0, 3.0, 6.0, 6.0, 7.0, 11.0, 16.0, 20.0, 35.0, 48.0, 66.0, 124.0, 249.0, 367.0, 775.0, 1568.0, 3452.0, 8534.0, 23671.0, 85707.0, 575112.0, 2731209.0, 630139.0, 92064.0, 25226.0, 8894.0, 3602.0, 1650.0, 751.0, 388.0, 213.0, 123.0, 72.0, 57.0, 40.0, 18.0, 14.0, 12.0, 11.0, 4.0, 5.0, 4.0, 4.0, 4.0, 0.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.8828125, -10.5262451171875, -10.169677734375, -9.8131103515625, -9.45654296875, -9.0999755859375, -8.743408203125, -8.3868408203125, -8.0302734375, -7.6737060546875, -7.317138671875, -6.9605712890625, -6.60400390625, -6.2474365234375, -5.890869140625, -5.5343017578125, -5.177734375, -4.8211669921875, -4.464599609375, -4.1080322265625, -3.75146484375, -3.3948974609375, -3.038330078125, -2.6817626953125, -2.3251953125, -1.9686279296875, -1.612060546875, -1.2554931640625, -0.89892578125, -0.5423583984375, -0.185791015625, 0.1707763671875, 0.52734375, 0.8839111328125, 1.240478515625, 1.5970458984375, 1.95361328125, 2.3101806640625, 2.666748046875, 3.0233154296875, 3.3798828125, 3.7364501953125, 4.093017578125, 4.4495849609375, 4.80615234375, 5.1627197265625, 5.519287109375, 5.8758544921875, 6.232421875, 6.5889892578125, 6.945556640625, 7.3021240234375, 7.65869140625, 8.0152587890625, 8.371826171875, 8.7283935546875, 9.0849609375, 9.4415283203125, 9.798095703125, 10.1546630859375, 10.51123046875, 10.8677978515625, 11.224365234375, 11.5809326171875, 11.9375]}, "gradients/decoder.transformer.h.5.mlp.c_fc.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 0.0, 5.0, 5.0, 13.0, 13.0, 22.0, 25.0, 35.0, 52.0, 45.0, 79.0, 128.0, 207.0, 282.0, 433.0, 687.0, 750.0, 455.0, 274.0, 187.0, 117.0, 85.0, 58.0, 34.0, 22.0, 15.0, 7.0, 19.0, 9.0, 5.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-15.2578125, -14.8944091796875, -14.531005859375, -14.1676025390625, -13.80419921875, -13.4407958984375, -13.077392578125, -12.7139892578125, -12.3505859375, -11.9871826171875, -11.623779296875, -11.2603759765625, -10.89697265625, -10.5335693359375, -10.170166015625, -9.8067626953125, -9.443359375, -9.0799560546875, -8.716552734375, -8.3531494140625, -7.98974609375, -7.6263427734375, -7.262939453125, -6.8995361328125, -6.5361328125, -6.1727294921875, -5.809326171875, -5.4459228515625, -5.08251953125, -4.7191162109375, -4.355712890625, -3.9923095703125, -3.62890625, -3.2655029296875, -2.902099609375, -2.5386962890625, -2.17529296875, -1.8118896484375, -1.448486328125, -1.0850830078125, -0.7216796875, -0.3582763671875, 0.005126953125, 0.3685302734375, 0.73193359375, 1.0953369140625, 1.458740234375, 1.8221435546875, 2.185546875, 2.5489501953125, 2.912353515625, 3.2757568359375, 3.63916015625, 4.0025634765625, 4.365966796875, 4.7293701171875, 5.0927734375, 5.4561767578125, 5.819580078125, 6.1829833984375, 6.54638671875, 6.9097900390625, 7.273193359375, 7.6365966796875, 8.0]}, "gradients/decoder.transformer.h.5.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 9.0, 14.0, 29.0, 49.0, 74.0, 113.0, 250.0, 488.0, 1376.0, 5768.0, 46376.0, 1385727.0, 2678885.0, 65322.0, 7015.0, 1537.0, 559.0, 265.0, 129.0, 101.0, 67.0, 42.0, 25.0, 20.0, 14.0, 7.0, 3.0, 3.0, 4.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-23.125, -22.12255859375, -21.1201171875, -20.11767578125, -19.115234375, -18.11279296875, -17.1103515625, -16.10791015625, -15.10546875, -14.10302734375, -13.1005859375, -12.09814453125, -11.095703125, -10.09326171875, -9.0908203125, -8.08837890625, -7.0859375, -6.08349609375, -5.0810546875, -4.07861328125, -3.076171875, -2.07373046875, -1.0712890625, -0.06884765625, 0.93359375, 1.93603515625, 2.9384765625, 3.94091796875, 4.943359375, 5.94580078125, 6.9482421875, 7.95068359375, 8.953125, 9.95556640625, 10.9580078125, 11.96044921875, 12.962890625, 13.96533203125, 14.9677734375, 15.97021484375, 16.97265625, 17.97509765625, 18.9775390625, 19.97998046875, 20.982421875, 21.98486328125, 22.9873046875, 23.98974609375, 24.9921875, 25.99462890625, 26.9970703125, 27.99951171875, 29.001953125, 30.00439453125, 31.0068359375, 32.00927734375, 33.01171875, 34.01416015625, 35.0166015625, 36.01904296875, 37.021484375, 38.02392578125, 39.0263671875, 40.02880859375, 41.03125]}, "gradients/decoder.transformer.h.5.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 35.0, 417.0, 506.0, 55.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-479.31756591796875, -469.76031494140625, -460.2030944824219, -450.6458435058594, -441.088623046875, -431.5313720703125, -421.97412109375, -412.4168701171875, -402.8596496582031, -393.3023986816406, -383.74517822265625, -374.18792724609375, -364.63067626953125, -355.0734558105469, -345.5162048339844, -335.958984375, -326.4017333984375, -316.844482421875, -307.2872619628906, -297.7300109863281, -288.17279052734375, -278.61553955078125, -269.05828857421875, -259.50103759765625, -249.94381713867188, -240.38658142089844, -230.829345703125, -221.2720947265625, -211.71485900878906, -202.15762329101562, -192.60037231445312, -183.0431365966797, -173.48587036132812, -163.9286346435547, -154.37139892578125, -144.81414794921875, -135.2569122314453, -125.69967651367188, -116.1424331665039, -106.58518981933594, -97.0279541015625, -87.47071838378906, -77.9134750366211, -68.35623168945312, -58.79899597167969, -49.241756439208984, -39.68451690673828, -30.127273559570312, -20.570037841796875, -11.012798309326172, -1.4555587768554688, 8.101680755615234, 17.658920288085938, 27.21615982055664, 36.773399353027344, 46.33064270019531, 55.88787841796875, 65.44511413574219, 75.00235748291016, 84.55960083007812, 94.11683654785156, 103.674072265625, 113.23131561279297, 122.78855895996094, 132.34579467773438]}, "gradients/decoder.transformer.h.5.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 5.0, 6.0, 1.0, 4.0, 10.0, 9.0, 7.0, 14.0, 14.0, 26.0, 26.0, 20.0, 35.0, 30.0, 29.0, 40.0, 46.0, 30.0, 37.0, 38.0, 46.0, 44.0, 55.0, 36.0, 47.0, 41.0, 40.0, 36.0, 21.0, 21.0, 31.0, 26.0, 28.0, 22.0, 12.0, 13.0, 13.0, 14.0, 5.0, 10.0, 7.0, 4.0, 4.0, 2.0, 1.0, 0.0, 3.0, 0.0, 1.0], "bins": [-45.18284606933594, -43.971595764160156, -42.760345458984375, -41.549095153808594, -40.33784484863281, -39.12659454345703, -37.91534423828125, -36.7040901184082, -35.49283981323242, -34.28158950805664, -33.07033920288086, -31.859088897705078, -30.647836685180664, -29.436586380004883, -28.2253360748291, -27.014083862304688, -25.80283546447754, -24.591585159301758, -23.380334854125977, -22.169082641601562, -20.95783233642578, -19.74658203125, -18.53533172607422, -17.324081420898438, -16.112831115722656, -14.901580810546875, -13.690329551696777, -12.479079246520996, -11.267827987670898, -10.056577682495117, -8.845327377319336, -7.634076118469238, -6.422824859619141, -5.211574077606201, -4.000323295593262, -2.7890729904174805, -1.577822208404541, -0.36657142639160156, 0.8446788787841797, 2.0559301376342773, 3.2671804428100586, 4.478431224822998, 5.6896820068359375, 6.900932312011719, 8.1121826171875, 9.323433876037598, 10.534684181213379, 11.745935440063477, 12.957185745239258, 14.168436050415039, 15.379687309265137, 16.590938568115234, 17.802188873291016, 19.013439178466797, 20.224689483642578, 21.43593978881836, 22.64719009399414, 23.858440399169922, 25.069690704345703, 26.280941009521484, 27.4921932220459, 28.70344352722168, 29.91469383239746, 31.125946044921875, 32.337196350097656]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 1.0, 8.0, 6.0, 10.0, 7.0, 13.0, 9.0, 26.0, 26.0, 24.0, 29.0, 27.0, 28.0, 38.0, 32.0, 39.0, 37.0, 42.0, 60.0, 39.0, 41.0, 41.0, 33.0, 41.0, 34.0, 42.0, 35.0, 24.0, 32.0, 28.0, 18.0, 14.0, 22.0, 20.0, 17.0, 15.0, 8.0, 9.0, 10.0, 2.0, 5.0, 3.0, 2.0, 0.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3515625, -5.16900634765625, -4.9864501953125, -4.80389404296875, -4.621337890625, -4.43878173828125, -4.2562255859375, -4.07366943359375, -3.89111328125, -3.70855712890625, -3.5260009765625, -3.34344482421875, -3.160888671875, -2.97833251953125, -2.7957763671875, -2.61322021484375, -2.4306640625, -2.24810791015625, -2.0655517578125, -1.88299560546875, -1.700439453125, -1.51788330078125, -1.3353271484375, -1.15277099609375, -0.97021484375, -0.78765869140625, -0.6051025390625, -0.42254638671875, -0.239990234375, -0.05743408203125, 0.1251220703125, 0.30767822265625, 0.490234375, 0.67279052734375, 0.8553466796875, 1.03790283203125, 1.220458984375, 1.40301513671875, 1.5855712890625, 1.76812744140625, 1.95068359375, 2.13323974609375, 2.3157958984375, 2.49835205078125, 2.680908203125, 2.86346435546875, 3.0460205078125, 3.22857666015625, 3.4111328125, 3.59368896484375, 3.7762451171875, 3.95880126953125, 4.141357421875, 4.32391357421875, 4.5064697265625, 4.68902587890625, 4.87158203125, 5.05413818359375, 5.2366943359375, 5.41925048828125, 5.601806640625, 5.78436279296875, 5.9669189453125, 6.14947509765625, 6.33203125]}, "gradients/decoder.transformer.h.5.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 3.0, 3.0, 9.0, 10.0, 23.0, 30.0, 41.0, 63.0, 86.0, 178.0, 218.0, 351.0, 529.0, 759.0, 1266.0, 2036.0, 3156.0, 5163.0, 8002.0, 13243.0, 21957.0, 36595.0, 65474.0, 127568.0, 342954.0, 205822.0, 90547.0, 49251.0, 28261.0, 16971.0, 10620.0, 6601.0, 3856.0, 2519.0, 1535.0, 1010.0, 663.0, 417.0, 265.0, 169.0, 118.0, 71.0, 49.0, 41.0, 21.0, 17.0, 7.0, 6.0, 1.0, 3.0, 0.0, 3.0, 3.0, 1.0], "bins": [-0.20166015625, -0.19585800170898438, -0.19005584716796875, -0.18425369262695312, -0.1784515380859375, -0.17264938354492188, -0.16684722900390625, -0.16104507446289062, -0.155242919921875, -0.14944076538085938, -0.14363861083984375, -0.13783645629882812, -0.1320343017578125, -0.12623214721679688, -0.12042999267578125, -0.11462783813476562, -0.10882568359375, -0.10302352905273438, -0.09722137451171875, -0.09141921997070312, -0.0856170654296875, -0.07981491088867188, -0.07401275634765625, -0.06821060180664062, -0.062408447265625, -0.056606292724609375, -0.05080413818359375, -0.045001983642578125, -0.0391998291015625, -0.033397674560546875, -0.02759552001953125, -0.021793365478515625, -0.0159912109375, -0.010189056396484375, -0.00438690185546875, 0.001415252685546875, 0.0072174072265625, 0.013019561767578125, 0.01882171630859375, 0.024623870849609375, 0.030426025390625, 0.036228179931640625, 0.04203033447265625, 0.047832489013671875, 0.0536346435546875, 0.059436798095703125, 0.06523895263671875, 0.07104110717773438, 0.07684326171875, 0.08264541625976562, 0.08844757080078125, 0.09424972534179688, 0.1000518798828125, 0.10585403442382812, 0.11165618896484375, 0.11745834350585938, 0.123260498046875, 0.12906265258789062, 0.13486480712890625, 0.14066696166992188, 0.1464691162109375, 0.15227127075195312, 0.15807342529296875, 0.16387557983398438, 0.169677734375]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 6.0, 2.0, 10.0, 3.0, 7.0, 19.0, 11.0, 11.0, 16.0, 18.0, 24.0, 27.0, 30.0, 31.0, 35.0, 49.0, 36.0, 36.0, 44.0, 42.0, 1064.0, 32.0, 43.0, 46.0, 31.0, 41.0, 41.0, 48.0, 33.0, 42.0, 26.0, 23.0, 18.0, 19.0, 14.0, 8.0, 11.0, 7.0, 5.0, 5.0, 5.0, 4.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.3671875, -4.24163818359375, -4.1160888671875, -3.99053955078125, -3.864990234375, -3.73944091796875, -3.6138916015625, -3.48834228515625, -3.36279296875, -3.23724365234375, -3.1116943359375, -2.98614501953125, -2.860595703125, -2.73504638671875, -2.6094970703125, -2.48394775390625, -2.3583984375, -2.23284912109375, -2.1072998046875, -1.98175048828125, -1.856201171875, -1.73065185546875, -1.6051025390625, -1.47955322265625, -1.35400390625, -1.22845458984375, -1.1029052734375, -0.97735595703125, -0.851806640625, -0.72625732421875, -0.6007080078125, -0.47515869140625, -0.349609375, -0.22406005859375, -0.0985107421875, 0.02703857421875, 0.152587890625, 0.27813720703125, 0.4036865234375, 0.52923583984375, 0.65478515625, 0.78033447265625, 0.9058837890625, 1.03143310546875, 1.156982421875, 1.28253173828125, 1.4080810546875, 1.53363037109375, 1.6591796875, 1.78472900390625, 1.9102783203125, 2.03582763671875, 2.161376953125, 2.28692626953125, 2.4124755859375, 2.53802490234375, 2.66357421875, 2.78912353515625, 2.9146728515625, 3.04022216796875, 3.165771484375, 3.29132080078125, 3.4168701171875, 3.54241943359375, 3.66796875]}, "gradients/decoder.transformer.h.5.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 6.0, 3.0, 14.0, 15.0, 11.0, 22.0, 34.0, 59.0, 102.0, 147.0, 233.0, 357.0, 547.0, 924.0, 1458.0, 2290.0, 3785.0, 6328.0, 10771.0, 18826.0, 34246.0, 64461.0, 131130.0, 1385764.0, 225449.0, 96617.0, 49505.0, 26696.0, 15016.0, 8835.0, 5139.0, 3178.0, 1955.0, 1192.0, 740.0, 471.0, 273.0, 181.0, 124.0, 70.0, 65.0, 40.0, 22.0, 15.0, 11.0, 3.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.1927490234375, -0.1872100830078125, -0.181671142578125, -0.1761322021484375, -0.17059326171875, -0.1650543212890625, -0.159515380859375, -0.1539764404296875, -0.1484375, -0.1428985595703125, -0.137359619140625, -0.1318206787109375, -0.12628173828125, -0.1207427978515625, -0.115203857421875, -0.1096649169921875, -0.1041259765625, -0.0985870361328125, -0.093048095703125, -0.0875091552734375, -0.08197021484375, -0.0764312744140625, -0.070892333984375, -0.0653533935546875, -0.059814453125, -0.0542755126953125, -0.048736572265625, -0.0431976318359375, -0.03765869140625, -0.0321197509765625, -0.026580810546875, -0.0210418701171875, -0.0155029296875, -0.0099639892578125, -0.004425048828125, 0.0011138916015625, 0.00665283203125, 0.0121917724609375, 0.017730712890625, 0.0232696533203125, 0.02880859375, 0.0343475341796875, 0.039886474609375, 0.0454254150390625, 0.05096435546875, 0.0565032958984375, 0.062042236328125, 0.0675811767578125, 0.0731201171875, 0.0786590576171875, 0.084197998046875, 0.0897369384765625, 0.09527587890625, 0.1008148193359375, 0.106353759765625, 0.1118927001953125, 0.117431640625, 0.1229705810546875, 0.128509521484375, 0.1340484619140625, 0.13958740234375, 0.1451263427734375, 0.150665283203125, 0.1562042236328125, 0.1617431640625]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 6.0, 7.0, 4.0, 10.0, 15.0, 15.0, 20.0, 24.0, 32.0, 24.0, 58.0, 51.0, 72.0, 62.0, 91.0, 81.0, 82.0, 66.0, 49.0, 48.0, 50.0, 36.0, 18.0, 17.0, 13.0, 9.0, 11.0, 8.0, 6.0, 6.0, 7.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.3828277587890625e-05, -1.3476237654685974e-05, -1.3124197721481323e-05, -1.2772157788276672e-05, -1.2420117855072021e-05, -1.206807792186737e-05, -1.171603798866272e-05, -1.1363998055458069e-05, -1.1011958122253418e-05, -1.0659918189048767e-05, -1.0307878255844116e-05, -9.955838322639465e-06, -9.603798389434814e-06, -9.251758456230164e-06, -8.899718523025513e-06, -8.547678589820862e-06, -8.195638656616211e-06, -7.84359872341156e-06, -7.491558790206909e-06, -7.139518857002258e-06, -6.787478923797607e-06, -6.4354389905929565e-06, -6.083399057388306e-06, -5.731359124183655e-06, -5.379319190979004e-06, -5.027279257774353e-06, -4.675239324569702e-06, -4.323199391365051e-06, -3.9711594581604e-06, -3.6191195249557495e-06, -3.2670795917510986e-06, -2.9150396585464478e-06, -2.562999725341797e-06, -2.210959792137146e-06, -1.8589198589324951e-06, -1.5068799257278442e-06, -1.1548399925231934e-06, -8.028000593185425e-07, -4.507601261138916e-07, -9.872019290924072e-08, 2.5331974029541016e-07, 6.05359673500061e-07, 9.57399606704712e-07, 1.3094395399093628e-06, 1.6614794731140137e-06, 2.0135194063186646e-06, 2.3655593395233154e-06, 2.7175992727279663e-06, 3.069639205932617e-06, 3.421679139137268e-06, 3.773719072341919e-06, 4.12575900554657e-06, 4.477798938751221e-06, 4.829838871955872e-06, 5.1818788051605225e-06, 5.533918738365173e-06, 5.885958671569824e-06, 6.237998604774475e-06, 6.590038537979126e-06, 6.942078471183777e-06, 7.294118404388428e-06, 7.646158337593079e-06, 7.99819827079773e-06, 8.35023820400238e-06, 8.702278137207031e-06]}, "gradients/decoder.transformer.h.5.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 7.0, 11.0, 11.0, 9.0, 8.0, 16.0, 18.0, 21.0, 24.0, 35.0, 42.0, 52.0, 91.0, 147.0, 226.0, 378.0, 1237.0, 112531.0, 927103.0, 5100.0, 591.0, 287.0, 159.0, 112.0, 68.0, 56.0, 52.0, 33.0, 42.0, 24.0, 17.0, 16.0, 11.0, 6.0, 3.0, 2.0, 4.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0], "bins": [-0.0001437664031982422, -0.0001380685716867447, -0.0001323707401752472, -0.0001266729086637497, -0.0001209750771522522, -0.0001152772456407547, -0.0001095794141292572, -0.0001038815826177597, -9.818375110626221e-05, -9.248591959476471e-05, -8.678808808326721e-05, -8.109025657176971e-05, -7.539242506027222e-05, -6.969459354877472e-05, -6.399676203727722e-05, -5.8298930525779724e-05, -5.2601099014282227e-05, -4.690326750278473e-05, -4.120543599128723e-05, -3.5507604479789734e-05, -2.9809772968292236e-05, -2.411194145679474e-05, -1.841410994529724e-05, -1.2716278433799744e-05, -7.018446922302246e-06, -1.3206154108047485e-06, 4.377216100692749e-06, 1.0075047612190247e-05, 1.5772879123687744e-05, 2.1470710635185242e-05, 2.716854214668274e-05, 3.286637365818024e-05, 3.8564205169677734e-05, 4.426203668117523e-05, 4.995986819267273e-05, 5.565769970417023e-05, 6.135553121566772e-05, 6.705336272716522e-05, 7.275119423866272e-05, 7.844902575016022e-05, 8.414685726165771e-05, 8.984468877315521e-05, 9.554252028465271e-05, 0.00010124035179615021, 0.0001069381833076477, 0.0001126360148191452, 0.0001183338463306427, 0.0001240316778421402, 0.0001297295093536377, 0.0001354273408651352, 0.0001411251723766327, 0.0001468230038881302, 0.00015252083539962769, 0.00015821866691112518, 0.00016391649842262268, 0.00016961432993412018, 0.00017531216144561768, 0.00018100999295711517, 0.00018670782446861267, 0.00019240565598011017, 0.00019810348749160767, 0.00020380131900310516, 0.00020949915051460266, 0.00021519698202610016, 0.00022089481353759766]}, "gradients/decoder.transformer.h.5.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 5.0, 14.0, 34.0, 61.0, 114.0, 187.0, 225.0, 190.0, 108.0, 48.0, 23.0, 6.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9875662474078126e-05, -1.9488486941554584e-05, -1.9101313228020445e-05, -1.8714137695496902e-05, -1.832696216297336e-05, -1.7939786630449817e-05, -1.7552612916915677e-05, -1.7165437384392135e-05, -1.6778261851868592e-05, -1.639108631934505e-05, -1.600391260581091e-05, -1.5616737073287368e-05, -1.5229561540763825e-05, -1.4842386917734984e-05, -1.4455212294706143e-05, -1.40680367621826e-05, -1.368086213915376e-05, -1.3293687516124919e-05, -1.2906511983601376e-05, -1.2519337360572536e-05, -1.2132161828048993e-05, -1.1744987205020152e-05, -1.135781167249661e-05, -1.0970637049467769e-05, -1.0583462426438928e-05, -1.0196287803410087e-05, -9.809112270886544e-06, -9.421937647857703e-06, -9.03476211533416e-06, -8.64758749230532e-06, -8.260412869276479e-06, -7.873237336752936e-06, -7.486062713724095e-06, -7.098887635947904e-06, -6.711712558171712e-06, -6.324537935142871e-06, -5.937362402619328e-06, -5.5501877795904875e-06, -5.163012701814296e-06, -4.775837624038104e-06, -4.388662546261912e-06, -4.0014874684857205e-06, -3.6143123907095287e-06, -3.2271375403070124e-06, -2.8399624625308206e-06, -2.452787384754629e-06, -2.0656125343521126e-06, -1.6784374565759208e-06, -1.291262378799729e-06, -9.040873578669562e-07, -5.169123369341833e-07, -1.2973737284482922e-07, 2.5743770493136253e-07, 6.446127827075543e-07, 1.0317876331100706e-06, 1.4189627108862624e-06, 1.8061377886624541e-06, 2.193312866438646e-06, 2.5804879442148376e-06, 2.967662794617354e-06, 3.3548378723935457e-06, 3.7420129501697375e-06, 4.129187800572254e-06, 4.5163628783484455e-06, 4.903537956124637e-06]}, "gradients/decoder.transformer.h.5.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 4.0, 1.0, 1.0, 3.0, 4.0, 8.0, 9.0, 6.0, 11.0, 16.0, 12.0, 8.0, 14.0, 25.0, 10.0, 33.0, 13.0, 17.0, 32.0, 27.0, 43.0, 36.0, 33.0, 41.0, 34.0, 61.0, 31.0, 39.0, 40.0, 24.0, 30.0, 34.0, 46.0, 31.0, 22.0, 39.0, 19.0, 43.0, 16.0, 13.0, 16.0, 11.0, 13.0, 8.0, 5.0, 4.0, 6.0, 6.0, 6.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.351139068603516e-06, -4.207715392112732e-06, -4.064291715621948e-06, -3.9208680391311646e-06, -3.777444362640381e-06, -3.634020686149597e-06, -3.4905970096588135e-06, -3.3471733331680298e-06, -3.203749656677246e-06, -3.0603259801864624e-06, -2.9169023036956787e-06, -2.773478627204895e-06, -2.6300549507141113e-06, -2.4866312742233276e-06, -2.343207597732544e-06, -2.1997839212417603e-06, -2.0563602447509766e-06, -1.912936568260193e-06, -1.7695128917694092e-06, -1.6260892152786255e-06, -1.4826655387878418e-06, -1.339241862297058e-06, -1.1958181858062744e-06, -1.0523945093154907e-06, -9.08970832824707e-07, -7.655471563339233e-07, -6.221234798431396e-07, -4.78699803352356e-07, -3.3527612686157227e-07, -1.9185245037078857e-07, -4.842877388000488e-08, 9.499490261077881e-08, 2.384185791015625e-07, 3.818422555923462e-07, 5.252659320831299e-07, 6.686896085739136e-07, 8.121132850646973e-07, 9.55536961555481e-07, 1.0989606380462646e-06, 1.2423843145370483e-06, 1.385807991027832e-06, 1.5292316675186157e-06, 1.6726553440093994e-06, 1.816079020500183e-06, 1.959502696990967e-06, 2.1029263734817505e-06, 2.246350049972534e-06, 2.389773726463318e-06, 2.5331974029541016e-06, 2.6766210794448853e-06, 2.820044755935669e-06, 2.9634684324264526e-06, 3.1068921089172363e-06, 3.25031578540802e-06, 3.3937394618988037e-06, 3.5371631383895874e-06, 3.680586814880371e-06, 3.824010491371155e-06, 3.9674341678619385e-06, 4.110857844352722e-06, 4.254281520843506e-06, 4.3977051973342896e-06, 4.541128873825073e-06, 4.684552550315857e-06, 4.827976226806641e-06]}, "gradients/decoder.transformer.h.5.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 4.0, 3.0, 1.0, 8.0, 6.0, 10.0, 7.0, 13.0, 9.0, 26.0, 26.0, 24.0, 29.0, 27.0, 28.0, 38.0, 32.0, 39.0, 37.0, 42.0, 60.0, 39.0, 41.0, 41.0, 33.0, 41.0, 34.0, 42.0, 35.0, 24.0, 32.0, 28.0, 18.0, 14.0, 22.0, 20.0, 17.0, 15.0, 8.0, 9.0, 10.0, 2.0, 5.0, 3.0, 2.0, 0.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.3515625, -5.16900634765625, -4.9864501953125, -4.80389404296875, -4.621337890625, -4.43878173828125, -4.2562255859375, -4.07366943359375, -3.89111328125, -3.70855712890625, -3.5260009765625, -3.34344482421875, -3.160888671875, -2.97833251953125, -2.7957763671875, -2.61322021484375, -2.4306640625, -2.24810791015625, -2.0655517578125, -1.88299560546875, -1.700439453125, -1.51788330078125, -1.3353271484375, -1.15277099609375, -0.97021484375, -0.78765869140625, -0.6051025390625, -0.42254638671875, -0.239990234375, -0.05743408203125, 0.1251220703125, 0.30767822265625, 0.490234375, 0.67279052734375, 0.8553466796875, 1.03790283203125, 1.220458984375, 1.40301513671875, 1.5855712890625, 1.76812744140625, 1.95068359375, 2.13323974609375, 2.3157958984375, 2.49835205078125, 2.680908203125, 2.86346435546875, 3.0460205078125, 3.22857666015625, 3.4111328125, 3.59368896484375, 3.7762451171875, 3.95880126953125, 4.141357421875, 4.32391357421875, 4.5064697265625, 4.68902587890625, 4.87158203125, 5.05413818359375, 5.2366943359375, 5.41925048828125, 5.601806640625, 5.78436279296875, 5.9669189453125, 6.14947509765625, 6.33203125]}, "gradients/decoder.transformer.h.5.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 3.0, 4.0, 1.0, 2.0, 5.0, 8.0, 11.0, 11.0, 19.0, 30.0, 39.0, 56.0, 104.0, 124.0, 189.0, 269.0, 456.0, 735.0, 1248.0, 2244.0, 3826.0, 7069.0, 13377.0, 26430.0, 56367.0, 137419.0, 395383.0, 237655.0, 86197.0, 38447.0, 18634.0, 9731.0, 5321.0, 2856.0, 1679.0, 934.0, 558.0, 370.0, 250.0, 136.0, 110.0, 82.0, 47.0, 35.0, 24.0, 23.0, 14.0, 9.0, 9.0, 4.0, 3.0, 7.0, 5.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.03125, -6.8026123046875, -6.573974609375, -6.3453369140625, -6.11669921875, -5.8880615234375, -5.659423828125, -5.4307861328125, -5.2021484375, -4.9735107421875, -4.744873046875, -4.5162353515625, -4.28759765625, -4.0589599609375, -3.830322265625, -3.6016845703125, -3.373046875, -3.1444091796875, -2.915771484375, -2.6871337890625, -2.45849609375, -2.2298583984375, -2.001220703125, -1.7725830078125, -1.5439453125, -1.3153076171875, -1.086669921875, -0.8580322265625, -0.62939453125, -0.4007568359375, -0.172119140625, 0.0565185546875, 0.28515625, 0.5137939453125, 0.742431640625, 0.9710693359375, 1.19970703125, 1.4283447265625, 1.656982421875, 1.8856201171875, 2.1142578125, 2.3428955078125, 2.571533203125, 2.8001708984375, 3.02880859375, 3.2574462890625, 3.486083984375, 3.7147216796875, 3.943359375, 4.1719970703125, 4.400634765625, 4.6292724609375, 4.85791015625, 5.0865478515625, 5.315185546875, 5.5438232421875, 5.7724609375, 6.0010986328125, 6.229736328125, 6.4583740234375, 6.68701171875, 6.9156494140625, 7.144287109375, 7.3729248046875, 7.6015625]}, "gradients/decoder.transformer.h.5.attn.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 4.0, 3.0, 6.0, 6.0, 11.0, 7.0, 12.0, 12.0, 11.0, 15.0, 18.0, 17.0, 24.0, 27.0, 25.0, 30.0, 40.0, 45.0, 44.0, 55.0, 56.0, 77.0, 137.0, 272.0, 1381.0, 153.0, 90.0, 69.0, 62.0, 53.0, 30.0, 43.0, 33.0, 21.0, 22.0, 22.0, 11.0, 14.0, 19.0, 12.0, 10.0, 11.0, 10.0, 5.0, 7.0, 9.0, 1.0, 5.0, 6.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-13.3125, -12.839599609375, -12.36669921875, -11.893798828125, -11.4208984375, -10.947998046875, -10.47509765625, -10.002197265625, -9.529296875, -9.056396484375, -8.58349609375, -8.110595703125, -7.6376953125, -7.164794921875, -6.69189453125, -6.218994140625, -5.74609375, -5.273193359375, -4.80029296875, -4.327392578125, -3.8544921875, -3.381591796875, -2.90869140625, -2.435791015625, -1.962890625, -1.489990234375, -1.01708984375, -0.544189453125, -0.0712890625, 0.401611328125, 0.87451171875, 1.347412109375, 1.8203125, 2.293212890625, 2.76611328125, 3.239013671875, 3.7119140625, 4.184814453125, 4.65771484375, 5.130615234375, 5.603515625, 6.076416015625, 6.54931640625, 7.022216796875, 7.4951171875, 7.968017578125, 8.44091796875, 8.913818359375, 9.38671875, 9.859619140625, 10.33251953125, 10.805419921875, 11.2783203125, 11.751220703125, 12.22412109375, 12.697021484375, 13.169921875, 13.642822265625, 14.11572265625, 14.588623046875, 15.0615234375, 15.534423828125, 16.00732421875, 16.480224609375, 16.953125]}, "gradients/decoder.transformer.h.5.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 3.0, 10.0, 9.0, 5.0, 16.0, 14.0, 18.0, 30.0, 33.0, 66.0, 85.0, 128.0, 173.0, 295.0, 743.0, 3644.0, 31180.0, 1226331.0, 1841521.0, 35727.0, 3875.0, 829.0, 337.0, 190.0, 127.0, 95.0, 58.0, 52.0, 35.0, 17.0, 17.0, 14.0, 6.0, 4.0, 1.0, 8.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-34.46875, -33.484619140625, -32.50048828125, -31.516357421875, -30.5322265625, -29.548095703125, -28.56396484375, -27.579833984375, -26.595703125, -25.611572265625, -24.62744140625, -23.643310546875, -22.6591796875, -21.675048828125, -20.69091796875, -19.706787109375, -18.72265625, -17.738525390625, -16.75439453125, -15.770263671875, -14.7861328125, -13.802001953125, -12.81787109375, -11.833740234375, -10.849609375, -9.865478515625, -8.88134765625, -7.897216796875, -6.9130859375, -5.928955078125, -4.94482421875, -3.960693359375, -2.9765625, -1.992431640625, -1.00830078125, -0.024169921875, 0.9599609375, 1.944091796875, 2.92822265625, 3.912353515625, 4.896484375, 5.880615234375, 6.86474609375, 7.848876953125, 8.8330078125, 9.817138671875, 10.80126953125, 11.785400390625, 12.76953125, 13.753662109375, 14.73779296875, 15.721923828125, 16.7060546875, 17.690185546875, 18.67431640625, 19.658447265625, 20.642578125, 21.626708984375, 22.61083984375, 23.594970703125, 24.5791015625, 25.563232421875, 26.54736328125, 27.531494140625, 28.515625]}, "gradients/decoder.transformer.h.5.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 14.0, 642.0, 356.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-428.7663879394531, -419.623779296875, -410.4811706542969, -401.3385925292969, -392.19598388671875, -383.0533752441406, -373.9107666015625, -364.7681579589844, -355.62554931640625, -346.4829406738281, -337.34033203125, -328.19775390625, -319.0551452636719, -309.91253662109375, -300.7699279785156, -291.6273193359375, -282.4847412109375, -273.3421325683594, -264.19952392578125, -255.0569305419922, -245.91433715820312, -236.771728515625, -227.62911987304688, -218.48651123046875, -209.3439178466797, -200.20130920410156, -191.0587158203125, -181.91610717773438, -172.77349853515625, -163.6309051513672, -154.48829650878906, -145.345703125, -136.20310974121094, -127.06050872802734, -117.91790771484375, -108.77529907226562, -99.63269805908203, -90.49009704589844, -81.34748840332031, -72.20488739013672, -63.062286376953125, -53.91968536376953, -44.77708053588867, -35.63447570800781, -26.49187469482422, -17.349273681640625, -8.206668853759766, 0.9359359741210938, 10.078536987304688, 19.221139907836914, 28.36374282836914, 37.50634765625, 46.648948669433594, 55.79154968261719, 64.93415832519531, 74.0767593383789, 83.2193603515625, 92.3619613647461, 101.50456237792969, 110.64717102050781, 119.7897720336914, 128.932373046875, 138.07498168945312, 147.21759033203125, 156.3601837158203]}, "gradients/decoder.transformer.h.5.ln_1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 4.0, 4.0, 6.0, 6.0, 7.0, 10.0, 13.0, 15.0, 16.0, 26.0, 17.0, 25.0, 27.0, 29.0, 27.0, 41.0, 35.0, 40.0, 43.0, 38.0, 46.0, 38.0, 55.0, 46.0, 32.0, 36.0, 46.0, 42.0, 33.0, 37.0, 22.0, 27.0, 20.0, 15.0, 15.0, 16.0, 7.0, 9.0, 7.0, 6.0, 3.0, 6.0, 7.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-41.784217834472656, -40.57514953613281, -39.36608123779297, -38.157012939453125, -36.94794845581055, -35.7388801574707, -34.52981185913086, -33.320743560791016, -32.11167526245117, -30.902606964111328, -29.693540573120117, -28.484472274780273, -27.27540397644043, -26.06633758544922, -24.857269287109375, -23.64820098876953, -22.43913459777832, -21.230066299438477, -20.020999908447266, -18.811931610107422, -17.602863311767578, -16.393795013427734, -15.184728622436523, -13.97566032409668, -12.766592979431152, -11.557525634765625, -10.348457336425781, -9.139389991760254, -7.930322170257568, -6.721254348754883, -5.5121870040893555, -4.303118705749512, -3.0940513610839844, -1.8849836587905884, -0.6759159564971924, 0.5331516265869141, 1.7422194480895996, 2.951287269592285, 4.1603546142578125, 5.369422912597656, 6.578490257263184, 7.787558078765869, 8.996625900268555, 10.205693244934082, 11.41476058959961, 12.623828887939453, 13.83289623260498, 15.041964530944824, 16.25103187561035, 17.460100173950195, 18.669166564941406, 19.87823486328125, 21.087303161621094, 22.296371459960938, 23.50543785095215, 24.714506149291992, 25.923572540283203, 27.132640838623047, 28.341707229614258, 29.5507755279541, 30.759843826293945, 31.968910217285156, 33.177978515625, 34.387046813964844, 35.59611511230469]}, "gradients/decoder.transformer.h.4.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 3.0, 8.0, 4.0, 3.0, 16.0, 5.0, 15.0, 14.0, 15.0, 23.0, 42.0, 29.0, 33.0, 30.0, 24.0, 33.0, 34.0, 46.0, 39.0, 47.0, 42.0, 39.0, 41.0, 42.0, 39.0, 29.0, 35.0, 33.0, 35.0, 27.0, 26.0, 21.0, 10.0, 21.0, 21.0, 17.0, 16.0, 11.0, 12.0, 4.0, 4.0, 3.0, 4.0, 8.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.38671875, -5.201904296875, -5.01708984375, -4.832275390625, -4.6474609375, -4.462646484375, -4.27783203125, -4.093017578125, -3.908203125, -3.723388671875, -3.53857421875, -3.353759765625, -3.1689453125, -2.984130859375, -2.79931640625, -2.614501953125, -2.4296875, -2.244873046875, -2.06005859375, -1.875244140625, -1.6904296875, -1.505615234375, -1.32080078125, -1.135986328125, -0.951171875, -0.766357421875, -0.58154296875, -0.396728515625, -0.2119140625, -0.027099609375, 0.15771484375, 0.342529296875, 0.52734375, 0.712158203125, 0.89697265625, 1.081787109375, 1.2666015625, 1.451416015625, 1.63623046875, 1.821044921875, 2.005859375, 2.190673828125, 2.37548828125, 2.560302734375, 2.7451171875, 2.929931640625, 3.11474609375, 3.299560546875, 3.484375, 3.669189453125, 3.85400390625, 4.038818359375, 4.2236328125, 4.408447265625, 4.59326171875, 4.778076171875, 4.962890625, 5.147705078125, 5.33251953125, 5.517333984375, 5.7021484375, 5.886962890625, 6.07177734375, 6.256591796875, 6.44140625]}, "gradients/decoder.transformer.h.4.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 8.0, 7.0, 5.0, 7.0, 7.0, 14.0, 38.0, 36.0, 59.0, 100.0, 119.0, 210.0, 354.0, 604.0, 1069.0, 1795.0, 3175.0, 6029.0, 11754.0, 25744.0, 68363.0, 266555.0, 1369345.0, 1888674.0, 396139.0, 92012.0, 32220.0, 14046.0, 7081.0, 3721.0, 2051.0, 1192.0, 709.0, 380.0, 219.0, 149.0, 94.0, 60.0, 47.0, 28.0, 25.0, 14.0, 5.0, 6.0, 5.0, 8.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-7.9140625, -7.642333984375, -7.37060546875, -7.098876953125, -6.8271484375, -6.555419921875, -6.28369140625, -6.011962890625, -5.740234375, -5.468505859375, -5.19677734375, -4.925048828125, -4.6533203125, -4.381591796875, -4.10986328125, -3.838134765625, -3.56640625, -3.294677734375, -3.02294921875, -2.751220703125, -2.4794921875, -2.207763671875, -1.93603515625, -1.664306640625, -1.392578125, -1.120849609375, -0.84912109375, -0.577392578125, -0.3056640625, -0.033935546875, 0.23779296875, 0.509521484375, 0.78125, 1.052978515625, 1.32470703125, 1.596435546875, 1.8681640625, 2.139892578125, 2.41162109375, 2.683349609375, 2.955078125, 3.226806640625, 3.49853515625, 3.770263671875, 4.0419921875, 4.313720703125, 4.58544921875, 4.857177734375, 5.12890625, 5.400634765625, 5.67236328125, 5.944091796875, 6.2158203125, 6.487548828125, 6.75927734375, 7.031005859375, 7.302734375, 7.574462890625, 7.84619140625, 8.117919921875, 8.3896484375, 8.661376953125, 8.93310546875, 9.204833984375, 9.4765625]}, "gradients/decoder.transformer.h.4.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 8.0, 4.0, 11.0, 11.0, 15.0, 24.0, 34.0, 50.0, 56.0, 98.0, 160.0, 233.0, 381.0, 617.0, 776.0, 569.0, 373.0, 200.0, 125.0, 94.0, 56.0, 52.0, 33.0, 29.0, 13.0, 15.0, 11.0, 8.0, 8.0, 1.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-13.203125, -12.8499755859375, -12.496826171875, -12.1436767578125, -11.79052734375, -11.4373779296875, -11.084228515625, -10.7310791015625, -10.3779296875, -10.0247802734375, -9.671630859375, -9.3184814453125, -8.96533203125, -8.6121826171875, -8.259033203125, -7.9058837890625, -7.552734375, -7.1995849609375, -6.846435546875, -6.4932861328125, -6.14013671875, -5.7869873046875, -5.433837890625, -5.0806884765625, -4.7275390625, -4.3743896484375, -4.021240234375, -3.6680908203125, -3.31494140625, -2.9617919921875, -2.608642578125, -2.2554931640625, -1.90234375, -1.5491943359375, -1.196044921875, -0.8428955078125, -0.48974609375, -0.1365966796875, 0.216552734375, 0.5697021484375, 0.9228515625, 1.2760009765625, 1.629150390625, 1.9822998046875, 2.33544921875, 2.6885986328125, 3.041748046875, 3.3948974609375, 3.748046875, 4.1011962890625, 4.454345703125, 4.8074951171875, 5.16064453125, 5.5137939453125, 5.866943359375, 6.2200927734375, 6.5732421875, 6.9263916015625, 7.279541015625, 7.6326904296875, 7.98583984375, 8.3389892578125, 8.692138671875, 9.0452880859375, 9.3984375]}, "gradients/decoder.transformer.h.4.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 7.0, 7.0, 8.0, 8.0, 10.0, 28.0, 33.0, 44.0, 58.0, 114.0, 208.0, 335.0, 645.0, 1458.0, 3288.0, 9670.0, 31555.0, 125160.0, 861691.0, 2751897.0, 314755.0, 64240.0, 18584.0, 6061.0, 2293.0, 1024.0, 463.0, 247.0, 137.0, 84.0, 54.0, 50.0, 26.0, 12.0, 13.0, 8.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-15.2578125, -14.7698974609375, -14.281982421875, -13.7940673828125, -13.30615234375, -12.8182373046875, -12.330322265625, -11.8424072265625, -11.3544921875, -10.8665771484375, -10.378662109375, -9.8907470703125, -9.40283203125, -8.9149169921875, -8.427001953125, -7.9390869140625, -7.451171875, -6.9632568359375, -6.475341796875, -5.9874267578125, -5.49951171875, -5.0115966796875, -4.523681640625, -4.0357666015625, -3.5478515625, -3.0599365234375, -2.572021484375, -2.0841064453125, -1.59619140625, -1.1082763671875, -0.620361328125, -0.1324462890625, 0.35546875, 0.8433837890625, 1.331298828125, 1.8192138671875, 2.30712890625, 2.7950439453125, 3.282958984375, 3.7708740234375, 4.2587890625, 4.7467041015625, 5.234619140625, 5.7225341796875, 6.21044921875, 6.6983642578125, 7.186279296875, 7.6741943359375, 8.162109375, 8.6500244140625, 9.137939453125, 9.6258544921875, 10.11376953125, 10.6016845703125, 11.089599609375, 11.5775146484375, 12.0654296875, 12.5533447265625, 13.041259765625, 13.5291748046875, 14.01708984375, 14.5050048828125, 14.992919921875, 15.4808349609375, 15.96875]}, "gradients/decoder.transformer.h.4.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 73.0, 753.0, 187.0, 6.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-594.2025756835938, -582.4666137695312, -570.7306518554688, -558.9946899414062, -547.2587280273438, -535.522705078125, -523.7867431640625, -512.05078125, -500.3148193359375, -488.578857421875, -476.8428955078125, -465.10693359375, -453.3709411621094, -441.6349792480469, -429.8990173339844, -418.1630554199219, -406.4270935058594, -394.6911315917969, -382.9551696777344, -371.21917724609375, -359.48321533203125, -347.74725341796875, -336.01129150390625, -324.27532958984375, -312.53936767578125, -300.80340576171875, -289.06744384765625, -277.33148193359375, -265.5954895019531, -253.85952758789062, -242.12356567382812, -230.38760375976562, -218.65162658691406, -206.91566467285156, -195.1796875, -183.4437255859375, -171.707763671875, -159.97178649902344, -148.23582458496094, -136.49984741210938, -124.76388549804688, -113.02791595458984, -101.29194641113281, -89.55598449707031, -77.82001495361328, -66.08404541015625, -54.34808349609375, -42.61211395263672, -30.876144409179688, -19.14017677307129, -7.404209136962891, 4.331756591796875, 16.067726135253906, 27.803695678710938, 39.53965759277344, 51.27562713623047, 63.0115966796875, 74.74756622314453, 86.48353576660156, 98.21949768066406, 109.9554672241211, 121.69143676757812, 133.42739868164062, 145.16336059570312, 156.8993377685547]}, "gradients/decoder.transformer.h.4.ln_2.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 7.0, 13.0, 2.0, 8.0, 13.0, 15.0, 13.0, 14.0, 10.0, 32.0, 25.0, 25.0, 19.0, 34.0, 45.0, 49.0, 59.0, 53.0, 33.0, 39.0, 38.0, 43.0, 34.0, 42.0, 32.0, 32.0, 35.0, 23.0, 24.0, 29.0, 20.0, 19.0, 17.0, 15.0, 16.0, 13.0, 13.0, 11.0, 13.0, 10.0, 4.0, 3.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-31.199705123901367, -30.20853042602539, -29.217355728149414, -28.226181030273438, -27.235008239746094, -26.243833541870117, -25.25265884399414, -24.261484146118164, -23.270309448242188, -22.27913475036621, -21.287960052490234, -20.29678726196289, -19.305612564086914, -18.314437866210938, -17.32326316833496, -16.332088470458984, -15.34091567993164, -14.349740982055664, -13.358567237854004, -12.367392539978027, -11.376218795776367, -10.38504409790039, -9.393869400024414, -8.402694702148438, -7.411520957946777, -6.420346736907959, -5.429172515869141, -4.437997817993164, -3.4468235969543457, -2.4556493759155273, -1.4644746780395508, -0.4733004570007324, 0.5178756713867188, 1.5090500116348267, 2.5002243518829346, 3.491398811340332, 4.48257303237915, 5.473747253417969, 6.464921951293945, 7.456096172332764, 8.447270393371582, 9.438445091247559, 10.429618835449219, 11.420793533325195, 12.411968231201172, 13.403141975402832, 14.394316673278809, 15.385490417480469, 16.376665115356445, 17.367839813232422, 18.3590145111084, 19.350189208984375, 20.34136199951172, 21.332536697387695, 22.323711395263672, 23.31488609313965, 24.306060791015625, 25.2972354888916, 26.288410186767578, 27.279582977294922, 28.2707576751709, 29.261932373046875, 30.25310707092285, 31.244281768798828, 32.23545455932617]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 3.0, 7.0, 5.0, 7.0, 14.0, 9.0, 14.0, 23.0, 12.0, 21.0, 26.0, 20.0, 27.0, 35.0, 30.0, 29.0, 29.0, 39.0, 34.0, 44.0, 39.0, 35.0, 35.0, 49.0, 32.0, 28.0, 32.0, 34.0, 38.0, 31.0, 29.0, 24.0, 30.0, 16.0, 22.0, 22.0, 11.0, 8.0, 10.0, 6.0, 6.0, 8.0, 5.0, 5.0, 8.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-6.57421875, -6.38037109375, -6.1865234375, -5.99267578125, -5.798828125, -5.60498046875, -5.4111328125, -5.21728515625, -5.0234375, -4.82958984375, -4.6357421875, -4.44189453125, -4.248046875, -4.05419921875, -3.8603515625, -3.66650390625, -3.47265625, -3.27880859375, -3.0849609375, -2.89111328125, -2.697265625, -2.50341796875, -2.3095703125, -2.11572265625, -1.921875, -1.72802734375, -1.5341796875, -1.34033203125, -1.146484375, -0.95263671875, -0.7587890625, -0.56494140625, -0.37109375, -0.17724609375, 0.0166015625, 0.21044921875, 0.404296875, 0.59814453125, 0.7919921875, 0.98583984375, 1.1796875, 1.37353515625, 1.5673828125, 1.76123046875, 1.955078125, 2.14892578125, 2.3427734375, 2.53662109375, 2.73046875, 2.92431640625, 3.1181640625, 3.31201171875, 3.505859375, 3.69970703125, 3.8935546875, 4.08740234375, 4.28125, 4.47509765625, 4.6689453125, 4.86279296875, 5.056640625, 5.25048828125, 5.4443359375, 5.63818359375, 5.83203125]}, "gradients/decoder.transformer.h.4.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 5.0, 10.0, 16.0, 20.0, 36.0, 44.0, 72.0, 94.0, 141.0, 238.0, 284.0, 422.0, 640.0, 866.0, 1228.0, 1834.0, 2562.0, 3787.0, 5419.0, 7961.0, 11613.0, 17347.0, 25837.0, 38601.0, 60180.0, 102739.0, 235474.0, 243727.0, 105317.0, 61090.0, 39329.0, 26156.0, 17443.0, 11965.0, 8029.0, 5613.0, 3831.0, 2693.0, 1809.0, 1210.0, 882.0, 602.0, 410.0, 274.0, 224.0, 163.0, 102.0, 65.0, 52.0, 40.0, 32.0, 13.0, 8.0, 7.0, 4.0, 3.0, 4.0], "bins": [-0.1749267578125, -0.1697845458984375, -0.164642333984375, -0.1595001220703125, -0.15435791015625, -0.1492156982421875, -0.144073486328125, -0.1389312744140625, -0.1337890625, -0.1286468505859375, -0.123504638671875, -0.1183624267578125, -0.11322021484375, -0.1080780029296875, -0.102935791015625, -0.0977935791015625, -0.0926513671875, -0.0875091552734375, -0.082366943359375, -0.0772247314453125, -0.07208251953125, -0.0669403076171875, -0.061798095703125, -0.0566558837890625, -0.051513671875, -0.0463714599609375, -0.041229248046875, -0.0360870361328125, -0.03094482421875, -0.0258026123046875, -0.020660400390625, -0.0155181884765625, -0.0103759765625, -0.0052337646484375, -9.1552734375e-05, 0.0050506591796875, 0.01019287109375, 0.0153350830078125, 0.020477294921875, 0.0256195068359375, 0.03076171875, 0.0359039306640625, 0.041046142578125, 0.0461883544921875, 0.05133056640625, 0.0564727783203125, 0.061614990234375, 0.0667572021484375, 0.0718994140625, 0.0770416259765625, 0.082183837890625, 0.0873260498046875, 0.09246826171875, 0.0976104736328125, 0.102752685546875, 0.1078948974609375, 0.113037109375, 0.1181793212890625, 0.123321533203125, 0.1284637451171875, 0.13360595703125, 0.1387481689453125, 0.143890380859375, 0.1490325927734375, 0.1541748046875]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 4.0, 11.0, 5.0, 7.0, 9.0, 6.0, 13.0, 17.0, 16.0, 17.0, 12.0, 13.0, 19.0, 27.0, 25.0, 35.0, 40.0, 40.0, 28.0, 40.0, 41.0, 32.0, 1061.0, 35.0, 40.0, 54.0, 36.0, 39.0, 34.0, 26.0, 25.0, 31.0, 21.0, 20.0, 31.0, 23.0, 15.0, 12.0, 14.0, 13.0, 13.0, 6.0, 7.0, 4.0, 4.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-4.47265625, -4.3394775390625, -4.206298828125, -4.0731201171875, -3.93994140625, -3.8067626953125, -3.673583984375, -3.5404052734375, -3.4072265625, -3.2740478515625, -3.140869140625, -3.0076904296875, -2.87451171875, -2.7413330078125, -2.608154296875, -2.4749755859375, -2.341796875, -2.2086181640625, -2.075439453125, -1.9422607421875, -1.80908203125, -1.6759033203125, -1.542724609375, -1.4095458984375, -1.2763671875, -1.1431884765625, -1.010009765625, -0.8768310546875, -0.74365234375, -0.6104736328125, -0.477294921875, -0.3441162109375, -0.2109375, -0.0777587890625, 0.055419921875, 0.1885986328125, 0.32177734375, 0.4549560546875, 0.588134765625, 0.7213134765625, 0.8544921875, 0.9876708984375, 1.120849609375, 1.2540283203125, 1.38720703125, 1.5203857421875, 1.653564453125, 1.7867431640625, 1.919921875, 2.0531005859375, 2.186279296875, 2.3194580078125, 2.45263671875, 2.5858154296875, 2.718994140625, 2.8521728515625, 2.9853515625, 3.1185302734375, 3.251708984375, 3.3848876953125, 3.51806640625, 3.6512451171875, 3.784423828125, 3.9176025390625, 4.05078125]}, "gradients/decoder.transformer.h.4.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 7.0, 9.0, 14.0, 32.0, 22.0, 40.0, 52.0, 74.0, 116.0, 219.0, 302.0, 441.0, 612.0, 933.0, 1523.0, 2411.0, 3681.0, 5642.0, 9234.0, 14601.0, 23579.0, 39588.0, 71081.0, 144201.0, 1386988.0, 186550.0, 85701.0, 46546.0, 27057.0, 16698.0, 10515.0, 6600.0, 4288.0, 2700.0, 1738.0, 1135.0, 786.0, 500.0, 285.0, 200.0, 129.0, 110.0, 72.0, 43.0, 33.0, 12.0, 13.0, 7.0, 3.0, 6.0, 4.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.1971435546875, -0.1912841796875, -0.1854248046875, -0.1795654296875, -0.1737060546875, -0.1678466796875, -0.1619873046875, -0.1561279296875, -0.1502685546875, -0.1444091796875, -0.1385498046875, -0.1326904296875, -0.1268310546875, -0.1209716796875, -0.1151123046875, -0.1092529296875, -0.1033935546875, -0.0975341796875, -0.0916748046875, -0.0858154296875, -0.0799560546875, -0.0740966796875, -0.0682373046875, -0.0623779296875, -0.0565185546875, -0.0506591796875, -0.0447998046875, -0.0389404296875, -0.0330810546875, -0.0272216796875, -0.0213623046875, -0.0155029296875, -0.0096435546875, -0.0037841796875, 0.0020751953125, 0.0079345703125, 0.0137939453125, 0.0196533203125, 0.0255126953125, 0.0313720703125, 0.0372314453125, 0.0430908203125, 0.0489501953125, 0.0548095703125, 0.0606689453125, 0.0665283203125, 0.0723876953125, 0.0782470703125, 0.0841064453125, 0.0899658203125, 0.0958251953125, 0.1016845703125, 0.1075439453125, 0.1134033203125, 0.1192626953125, 0.1251220703125, 0.1309814453125, 0.1368408203125, 0.1427001953125, 0.1485595703125, 0.1544189453125, 0.1602783203125, 0.1661376953125, 0.1719970703125, 0.1778564453125]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 0.0, 5.0, 7.0, 3.0, 5.0, 12.0, 18.0, 22.0, 19.0, 30.0, 43.0, 52.0, 59.0, 80.0, 75.0, 93.0, 83.0, 87.0, 65.0, 58.0, 45.0, 32.0, 25.0, 17.0, 10.0, 12.0, 9.0, 5.0, 7.0, 10.0, 3.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2695789337158203e-05, -1.2215226888656616e-05, -1.173466444015503e-05, -1.1254101991653442e-05, -1.0773539543151855e-05, -1.0292977094650269e-05, -9.812414646148682e-06, -9.331852197647095e-06, -8.851289749145508e-06, -8.370727300643921e-06, -7.890164852142334e-06, -7.409602403640747e-06, -6.92903995513916e-06, -6.448477506637573e-06, -5.967915058135986e-06, -5.487352609634399e-06, -5.0067901611328125e-06, -4.526227712631226e-06, -4.045665264129639e-06, -3.5651028156280518e-06, -3.084540367126465e-06, -2.603977918624878e-06, -2.123415470123291e-06, -1.642853021621704e-06, -1.1622905731201172e-06, -6.817281246185303e-07, -2.0116567611694336e-07, 2.7939677238464355e-07, 7.599592208862305e-07, 1.2405216693878174e-06, 1.7210841178894043e-06, 2.201646566390991e-06, 2.682209014892578e-06, 3.162771463394165e-06, 3.643333911895752e-06, 4.123896360397339e-06, 4.604458808898926e-06, 5.085021257400513e-06, 5.5655837059021e-06, 6.0461461544036865e-06, 6.5267086029052734e-06, 7.00727105140686e-06, 7.487833499908447e-06, 7.968395948410034e-06, 8.448958396911621e-06, 8.929520845413208e-06, 9.410083293914795e-06, 9.890645742416382e-06, 1.0371208190917969e-05, 1.0851770639419556e-05, 1.1332333087921143e-05, 1.181289553642273e-05, 1.2293457984924316e-05, 1.2774020433425903e-05, 1.325458288192749e-05, 1.3735145330429077e-05, 1.4215707778930664e-05, 1.4696270227432251e-05, 1.5176832675933838e-05, 1.5657395124435425e-05, 1.6137957572937012e-05, 1.66185200214386e-05, 1.7099082469940186e-05, 1.7579644918441772e-05, 1.806020736694336e-05]}, "gradients/decoder.transformer.h.4.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 5.0, 4.0, 4.0, 4.0, 15.0, 11.0, 15.0, 19.0, 23.0, 26.0, 30.0, 62.0, 79.0, 85.0, 165.0, 265.0, 538.0, 3500.0, 989450.0, 52407.0, 922.0, 317.0, 173.0, 122.0, 78.0, 59.0, 56.0, 26.0, 20.0, 12.0, 15.0, 7.0, 6.0, 7.0, 7.0, 4.0, 5.0, 5.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00030231475830078125, -0.00029408372938632965, -0.00028585270047187805, -0.00027762167155742645, -0.00026939064264297485, -0.00026115961372852325, -0.00025292858481407166, -0.00024469755589962006, -0.00023646652698516846, -0.00022823549807071686, -0.00022000446915626526, -0.00021177344024181366, -0.00020354241132736206, -0.00019531138241291046, -0.00018708035349845886, -0.00017884932458400726, -0.00017061829566955566, -0.00016238726675510406, -0.00015415623784065247, -0.00014592520892620087, -0.00013769418001174927, -0.00012946315109729767, -0.00012123212218284607, -0.00011300109326839447, -0.00010477006435394287, -9.653903543949127e-05, -8.830800652503967e-05, -8.007697761058807e-05, -7.184594869613647e-05, -6.361491978168488e-05, -5.5383890867233276e-05, -4.715286195278168e-05, -3.892183303833008e-05, -3.069080412387848e-05, -2.245977520942688e-05, -1.422874629497528e-05, -5.997717380523682e-06, 2.2333115339279175e-06, 1.0464340448379517e-05, 1.8695369362831116e-05, 2.6926398277282715e-05, 3.5157427191734314e-05, 4.338845610618591e-05, 5.161948502063751e-05, 5.985051393508911e-05, 6.808154284954071e-05, 7.631257176399231e-05, 8.454360067844391e-05, 9.277462959289551e-05, 0.00010100565850734711, 0.0001092366874217987, 0.0001174677163362503, 0.0001256987452507019, 0.0001339297741651535, 0.0001421608030796051, 0.0001503918319940567, 0.0001586228609085083, 0.0001668538898229599, 0.0001750849187374115, 0.0001833159476518631, 0.0001915469765663147, 0.0001997780054807663, 0.0002080090343952179, 0.0002162400633096695, 0.0002244710922241211]}, "gradients/decoder.transformer.h.4.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 9.0, 7.0, 25.0, 44.0, 125.0, 191.0, 234.0, 174.0, 114.0, 48.0, 21.0, 17.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0214845790178515e-05, -1.9748575141420588e-05, -1.9282306311652064e-05, -1.8816035662894137e-05, -1.834976501413621e-05, -1.7883496184367687e-05, -1.741722553560976e-05, -1.6950954886851832e-05, -1.648468605708331e-05, -1.601841540832538e-05, -1.5552146578556858e-05, -1.5085875929798931e-05, -1.4619605281041004e-05, -1.4153335541777778e-05, -1.3687065802514553e-05, -1.3220795153756626e-05, -1.2754524504998699e-05, -1.2288254765735473e-05, -1.1821984116977546e-05, -1.1355714377714321e-05, -1.0889443728956394e-05, -1.0423173989693169e-05, -9.956904250429943e-06, -9.490633601672016e-06, -9.02436386240879e-06, -8.558094123145565e-06, -8.091823474387638e-06, -7.625553735124413e-06, -7.159283541113837e-06, -6.6930133471032605e-06, -6.226743607840035e-06, -5.760473413829459e-06, -5.294204129313584e-06, -4.827933935303008e-06, -4.361663741292432e-06, -3.895394002029207e-06, -3.4291238080186304e-06, -2.962853614008054e-06, -2.4965836473711533e-06, -2.0303136807342526e-06, -1.5640434867236763e-06, -1.0977734063999378e-06, -6.315033260761993e-07, -1.652332457524608e-07, 3.010368345712777e-07, 7.67307028581854e-07, 1.2335769952187547e-06, 1.6998469618556555e-06, 2.1661171558662318e-06, 2.632387349876808e-06, 3.098657316513709e-06, 3.5649272831506096e-06, 4.031197477161186e-06, 4.497467671171762e-06, 4.963737410434987e-06, 5.430007604445564e-06, 5.89627779845614e-06, 6.362547992466716e-06, 6.828818186477292e-06, 7.295087925740518e-06, 7.761358574498445e-06, 8.22762831376167e-06, 8.693898053024895e-06, 9.160168701782823e-06, 9.626438441046048e-06]}, "gradients/decoder.transformer.h.4.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 7.0, 5.0, 3.0, 2.0, 7.0, 8.0, 8.0, 12.0, 9.0, 21.0, 13.0, 14.0, 30.0, 23.0, 37.0, 29.0, 32.0, 40.0, 49.0, 31.0, 42.0, 56.0, 33.0, 46.0, 36.0, 43.0, 27.0, 49.0, 37.0, 26.0, 25.0, 25.0, 27.0, 28.0, 28.0, 16.0, 19.0, 14.0, 9.0, 16.0, 5.0, 8.0, 4.0, 7.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-7.450580596923828e-06, -7.238239049911499e-06, -7.02589750289917e-06, -6.813555955886841e-06, -6.601214408874512e-06, -6.388872861862183e-06, -6.1765313148498535e-06, -5.964189767837524e-06, -5.751848220825195e-06, -5.539506673812866e-06, -5.327165126800537e-06, -5.114823579788208e-06, -4.902482032775879e-06, -4.69014048576355e-06, -4.477798938751221e-06, -4.265457391738892e-06, -4.0531158447265625e-06, -3.840774297714233e-06, -3.6284327507019043e-06, -3.416091203689575e-06, -3.203749656677246e-06, -2.991408109664917e-06, -2.779066562652588e-06, -2.566725015640259e-06, -2.3543834686279297e-06, -2.1420419216156006e-06, -1.9297003746032715e-06, -1.7173588275909424e-06, -1.5050172805786133e-06, -1.2926757335662842e-06, -1.080334186553955e-06, -8.67992639541626e-07, -6.556510925292969e-07, -4.4330954551696777e-07, -2.3096799850463867e-07, -1.862645149230957e-08, 1.9371509552001953e-07, 4.0605664253234863e-07, 6.183981895446777e-07, 8.307397365570068e-07, 1.043081283569336e-06, 1.255422830581665e-06, 1.4677643775939941e-06, 1.6801059246063232e-06, 1.8924474716186523e-06, 2.1047890186309814e-06, 2.3171305656433105e-06, 2.5294721126556396e-06, 2.7418136596679688e-06, 2.954155206680298e-06, 3.166496753692627e-06, 3.378838300704956e-06, 3.591179847717285e-06, 3.8035213947296143e-06, 4.015862941741943e-06, 4.2282044887542725e-06, 4.4405460357666016e-06, 4.652887582778931e-06, 4.86522912979126e-06, 5.077570676803589e-06, 5.289912223815918e-06, 5.502253770828247e-06, 5.714595317840576e-06, 5.926936864852905e-06, 6.139278411865234e-06]}, "gradients/decoder.transformer.h.4.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 3.0, 7.0, 5.0, 7.0, 14.0, 9.0, 14.0, 23.0, 12.0, 21.0, 26.0, 20.0, 27.0, 35.0, 30.0, 29.0, 29.0, 39.0, 34.0, 44.0, 39.0, 35.0, 35.0, 49.0, 32.0, 28.0, 32.0, 34.0, 38.0, 31.0, 29.0, 24.0, 30.0, 16.0, 22.0, 22.0, 11.0, 8.0, 10.0, 6.0, 6.0, 8.0, 5.0, 5.0, 8.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-6.57421875, -6.38037109375, -6.1865234375, -5.99267578125, -5.798828125, -5.60498046875, -5.4111328125, -5.21728515625, -5.0234375, -4.82958984375, -4.6357421875, -4.44189453125, -4.248046875, -4.05419921875, -3.8603515625, -3.66650390625, -3.47265625, -3.27880859375, -3.0849609375, -2.89111328125, -2.697265625, -2.50341796875, -2.3095703125, -2.11572265625, -1.921875, -1.72802734375, -1.5341796875, -1.34033203125, -1.146484375, -0.95263671875, -0.7587890625, -0.56494140625, -0.37109375, -0.17724609375, 0.0166015625, 0.21044921875, 0.404296875, 0.59814453125, 0.7919921875, 0.98583984375, 1.1796875, 1.37353515625, 1.5673828125, 1.76123046875, 1.955078125, 2.14892578125, 2.3427734375, 2.53662109375, 2.73046875, 2.92431640625, 3.1181640625, 3.31201171875, 3.505859375, 3.69970703125, 3.8935546875, 4.08740234375, 4.28125, 4.47509765625, 4.6689453125, 4.86279296875, 5.056640625, 5.25048828125, 5.4443359375, 5.63818359375, 5.83203125]}, "gradients/decoder.transformer.h.4.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 5.0, 5.0, 8.0, 11.0, 19.0, 17.0, 18.0, 27.0, 39.0, 77.0, 82.0, 142.0, 224.0, 358.0, 554.0, 935.0, 1566.0, 2721.0, 4571.0, 7970.0, 14426.0, 26802.0, 53354.0, 122114.0, 352344.0, 266878.0, 97332.0, 44258.0, 22576.0, 12404.0, 6913.0, 3975.0, 2250.0, 1302.0, 867.0, 488.0, 282.0, 192.0, 140.0, 84.0, 73.0, 42.0, 40.0, 23.0, 14.0, 15.0, 9.0, 6.0, 2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.63671875, -7.38934326171875, -7.1419677734375, -6.89459228515625, -6.647216796875, -6.39984130859375, -6.1524658203125, -5.90509033203125, -5.65771484375, -5.41033935546875, -5.1629638671875, -4.91558837890625, -4.668212890625, -4.42083740234375, -4.1734619140625, -3.92608642578125, -3.6787109375, -3.43133544921875, -3.1839599609375, -2.93658447265625, -2.689208984375, -2.44183349609375, -2.1944580078125, -1.94708251953125, -1.69970703125, -1.45233154296875, -1.2049560546875, -0.95758056640625, -0.710205078125, -0.46282958984375, -0.2154541015625, 0.03192138671875, 0.279296875, 0.52667236328125, 0.7740478515625, 1.02142333984375, 1.268798828125, 1.51617431640625, 1.7635498046875, 2.01092529296875, 2.25830078125, 2.50567626953125, 2.7530517578125, 3.00042724609375, 3.247802734375, 3.49517822265625, 3.7425537109375, 3.98992919921875, 4.2373046875, 4.48468017578125, 4.7320556640625, 4.97943115234375, 5.226806640625, 5.47418212890625, 5.7215576171875, 5.96893310546875, 6.21630859375, 6.46368408203125, 6.7110595703125, 6.95843505859375, 7.205810546875, 7.45318603515625, 7.7005615234375, 7.94793701171875, 8.1953125]}, "gradients/decoder.transformer.h.4.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 6.0, 5.0, 2.0, 6.0, 8.0, 17.0, 10.0, 16.0, 19.0, 18.0, 25.0, 30.0, 35.0, 38.0, 50.0, 51.0, 62.0, 102.0, 207.0, 1500.0, 264.0, 116.0, 84.0, 54.0, 47.0, 46.0, 43.0, 37.0, 39.0, 28.0, 18.0, 16.0, 18.0, 11.0, 8.0, 9.0, 4.0, 5.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.640625, -18.011962890625, -17.38330078125, -16.754638671875, -16.1259765625, -15.497314453125, -14.86865234375, -14.239990234375, -13.611328125, -12.982666015625, -12.35400390625, -11.725341796875, -11.0966796875, -10.468017578125, -9.83935546875, -9.210693359375, -8.58203125, -7.953369140625, -7.32470703125, -6.696044921875, -6.0673828125, -5.438720703125, -4.81005859375, -4.181396484375, -3.552734375, -2.924072265625, -2.29541015625, -1.666748046875, -1.0380859375, -0.409423828125, 0.21923828125, 0.847900390625, 1.4765625, 2.105224609375, 2.73388671875, 3.362548828125, 3.9912109375, 4.619873046875, 5.24853515625, 5.877197265625, 6.505859375, 7.134521484375, 7.76318359375, 8.391845703125, 9.0205078125, 9.649169921875, 10.27783203125, 10.906494140625, 11.53515625, 12.163818359375, 12.79248046875, 13.421142578125, 14.0498046875, 14.678466796875, 15.30712890625, 15.935791015625, 16.564453125, 17.193115234375, 17.82177734375, 18.450439453125, 19.0791015625, 19.707763671875, 20.33642578125, 20.965087890625, 21.59375]}, "gradients/decoder.transformer.h.4.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 6.0, 2.0, 4.0, 6.0, 8.0, 3.0, 7.0, 14.0, 20.0, 28.0, 30.0, 39.0, 76.0, 98.0, 143.0, 236.0, 426.0, 1298.0, 8611.0, 133843.0, 2910375.0, 82069.0, 6295.0, 1072.0, 373.0, 201.0, 116.0, 79.0, 60.0, 41.0, 32.0, 27.0, 18.0, 13.0, 14.0, 7.0, 5.0, 4.0, 5.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-33.96875, -32.814453125, -31.66015625, -30.505859375, -29.3515625, -28.197265625, -27.04296875, -25.888671875, -24.734375, -23.580078125, -22.42578125, -21.271484375, -20.1171875, -18.962890625, -17.80859375, -16.654296875, -15.5, -14.345703125, -13.19140625, -12.037109375, -10.8828125, -9.728515625, -8.57421875, -7.419921875, -6.265625, -5.111328125, -3.95703125, -2.802734375, -1.6484375, -0.494140625, 0.66015625, 1.814453125, 2.96875, 4.123046875, 5.27734375, 6.431640625, 7.5859375, 8.740234375, 9.89453125, 11.048828125, 12.203125, 13.357421875, 14.51171875, 15.666015625, 16.8203125, 17.974609375, 19.12890625, 20.283203125, 21.4375, 22.591796875, 23.74609375, 24.900390625, 26.0546875, 27.208984375, 28.36328125, 29.517578125, 30.671875, 31.826171875, 32.98046875, 34.134765625, 35.2890625, 36.443359375, 37.59765625, 38.751953125, 39.90625]}, "gradients/decoder.transformer.h.4.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 8.0, 52.0, 399.0, 469.0, 76.0, 11.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-204.88690185546875, -200.24220275878906, -195.59750366210938, -190.95281982421875, -186.30812072753906, -181.66342163085938, -177.01873779296875, -172.37403869628906, -167.72933959960938, -163.0846405029297, -158.43994140625, -153.79525756835938, -149.1505584716797, -144.505859375, -139.86117553710938, -135.2164764404297, -130.57177734375, -125.92707824707031, -121.28238677978516, -116.6376953125, -111.99299621582031, -107.34829711914062, -102.70360565185547, -98.05891418457031, -93.41421508789062, -88.76951599121094, -84.12482452392578, -79.48013305664062, -74.83543395996094, -70.19073486328125, -65.5460433959961, -60.90134811401367, -56.25666809082031, -51.61197280883789, -46.96727752685547, -42.32258224487305, -37.677886962890625, -33.0331916809082, -28.38849639892578, -23.74380111694336, -19.099105834960938, -14.454410552978516, -9.809715270996094, -5.165019989013672, -0.52032470703125, 4.124370574951172, 8.769065856933594, 13.413761138916016, 18.058456420898438, 22.70315170288086, 27.34784698486328, 31.992542266845703, 36.637237548828125, 41.28193283081055, 45.92662811279297, 50.57132339477539, 55.21601867675781, 59.860713958740234, 64.50540924072266, 69.15010070800781, 73.7947998046875, 78.43949890136719, 83.08419036865234, 87.7288818359375, 92.37358093261719]}, "gradients/decoder.transformer.h.4.ln_1.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 6.0, 8.0, 6.0, 7.0, 12.0, 15.0, 8.0, 9.0, 13.0, 22.0, 16.0, 27.0, 24.0, 31.0, 30.0, 19.0, 37.0, 36.0, 42.0, 51.0, 49.0, 26.0, 43.0, 32.0, 39.0, 33.0, 31.0, 30.0, 35.0, 25.0, 30.0, 34.0, 21.0, 19.0, 21.0, 19.0, 14.0, 12.0, 11.0, 15.0, 10.0, 4.0, 9.0, 6.0, 0.0, 5.0, 6.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0], "bins": [-37.34029769897461, -36.256710052490234, -35.173118591308594, -34.08953094482422, -33.005943298339844, -31.922353744506836, -30.838764190673828, -29.755176544189453, -28.671586990356445, -27.587997436523438, -26.504409790039062, -25.420820236206055, -24.337230682373047, -23.253643035888672, -22.170053482055664, -21.086463928222656, -20.00287628173828, -18.919286727905273, -17.8356990814209, -16.75210952758789, -15.6685209274292, -14.584932327270508, -13.5013427734375, -12.417754173278809, -11.334165573120117, -10.250576972961426, -9.166988372802734, -8.083398818969727, -6.999810218811035, -5.916221618652344, -4.832632541656494, -3.7490434646606445, -2.6654586791992188, -1.5818698406219482, -0.49828100204467773, 0.5853078365325928, 1.6688966751098633, 2.7524852752685547, 3.8360743522644043, 4.919663429260254, 6.003252029418945, 7.086840629577637, 8.170429229736328, 9.254018783569336, 10.337607383728027, 11.421195983886719, 12.504785537719727, 13.588374137878418, 14.67196273803711, 15.7555513381958, 16.839139938354492, 17.9227294921875, 19.006317138671875, 20.089906692504883, 21.17349624633789, 22.257083892822266, 23.340673446655273, 24.42426300048828, 25.507850646972656, 26.591440200805664, 27.675029754638672, 28.758617401123047, 29.842206954956055, 30.925796508789062, 32.00938415527344]}, "gradients/decoder.transformer.h.3.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 4.0, 3.0, 6.0, 5.0, 10.0, 9.0, 11.0, 13.0, 14.0, 15.0, 16.0, 16.0, 13.0, 25.0, 24.0, 38.0, 31.0, 25.0, 36.0, 35.0, 34.0, 44.0, 51.0, 36.0, 33.0, 42.0, 28.0, 30.0, 34.0, 32.0, 31.0, 32.0, 39.0, 20.0, 23.0, 23.0, 20.0, 17.0, 17.0, 11.0, 14.0, 9.0, 5.0, 7.0, 6.0, 2.0, 6.0, 4.0, 2.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-6.16796875, -5.97576904296875, -5.7835693359375, -5.59136962890625, -5.399169921875, -5.20697021484375, -5.0147705078125, -4.82257080078125, -4.63037109375, -4.43817138671875, -4.2459716796875, -4.05377197265625, -3.861572265625, -3.66937255859375, -3.4771728515625, -3.28497314453125, -3.0927734375, -2.90057373046875, -2.7083740234375, -2.51617431640625, -2.323974609375, -2.13177490234375, -1.9395751953125, -1.74737548828125, -1.55517578125, -1.36297607421875, -1.1707763671875, -0.97857666015625, -0.786376953125, -0.59417724609375, -0.4019775390625, -0.20977783203125, -0.017578125, 0.17462158203125, 0.3668212890625, 0.55902099609375, 0.751220703125, 0.94342041015625, 1.1356201171875, 1.32781982421875, 1.52001953125, 1.71221923828125, 1.9044189453125, 2.09661865234375, 2.288818359375, 2.48101806640625, 2.6732177734375, 2.86541748046875, 3.0576171875, 3.24981689453125, 3.4420166015625, 3.63421630859375, 3.826416015625, 4.01861572265625, 4.2108154296875, 4.40301513671875, 4.59521484375, 4.78741455078125, 4.9796142578125, 5.17181396484375, 5.364013671875, 5.55621337890625, 5.7484130859375, 5.94061279296875, 6.1328125]}, "gradients/decoder.transformer.h.3.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 6.0, 3.0, 3.0, 5.0, 7.0, 10.0, 11.0, 16.0, 20.0, 21.0, 26.0, 22.0, 35.0, 40.0, 53.0, 74.0, 103.0, 144.0, 203.0, 267.0, 412.0, 1769.0, 42760.0, 4124046.0, 21527.0, 1311.0, 391.0, 247.0, 184.0, 127.0, 84.0, 79.0, 61.0, 52.0, 24.0, 24.0, 22.0, 22.0, 14.0, 15.0, 8.0, 6.0, 6.0, 8.0, 5.0, 2.0, 6.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-63.84375, -61.67431640625, -59.5048828125, -57.33544921875, -55.166015625, -52.99658203125, -50.8271484375, -48.65771484375, -46.48828125, -44.31884765625, -42.1494140625, -39.97998046875, -37.810546875, -35.64111328125, -33.4716796875, -31.30224609375, -29.1328125, -26.96337890625, -24.7939453125, -22.62451171875, -20.455078125, -18.28564453125, -16.1162109375, -13.94677734375, -11.77734375, -9.60791015625, -7.4384765625, -5.26904296875, -3.099609375, -0.93017578125, 1.2392578125, 3.40869140625, 5.578125, 7.74755859375, 9.9169921875, 12.08642578125, 14.255859375, 16.42529296875, 18.5947265625, 20.76416015625, 22.93359375, 25.10302734375, 27.2724609375, 29.44189453125, 31.611328125, 33.78076171875, 35.9501953125, 38.11962890625, 40.2890625, 42.45849609375, 44.6279296875, 46.79736328125, 48.966796875, 51.13623046875, 53.3056640625, 55.47509765625, 57.64453125, 59.81396484375, 61.9833984375, 64.15283203125, 66.322265625, 68.49169921875, 70.6611328125, 72.83056640625, 75.0]}, "gradients/decoder.transformer.h.3.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 3.0, 6.0, 12.0, 22.0, 34.0, 83.0, 176.0, 405.0, 1140.0, 1264.0, 460.0, 219.0, 102.0, 59.0, 32.0, 25.0, 12.0, 10.0, 11.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-22.171875, -21.48095703125, -20.7900390625, -20.09912109375, -19.408203125, -18.71728515625, -18.0263671875, -17.33544921875, -16.64453125, -15.95361328125, -15.2626953125, -14.57177734375, -13.880859375, -13.18994140625, -12.4990234375, -11.80810546875, -11.1171875, -10.42626953125, -9.7353515625, -9.04443359375, -8.353515625, -7.66259765625, -6.9716796875, -6.28076171875, -5.58984375, -4.89892578125, -4.2080078125, -3.51708984375, -2.826171875, -2.13525390625, -1.4443359375, -0.75341796875, -0.0625, 0.62841796875, 1.3193359375, 2.01025390625, 2.701171875, 3.39208984375, 4.0830078125, 4.77392578125, 5.46484375, 6.15576171875, 6.8466796875, 7.53759765625, 8.228515625, 8.91943359375, 9.6103515625, 10.30126953125, 10.9921875, 11.68310546875, 12.3740234375, 13.06494140625, 13.755859375, 14.44677734375, 15.1376953125, 15.82861328125, 16.51953125, 17.21044921875, 17.9013671875, 18.59228515625, 19.283203125, 19.97412109375, 20.6650390625, 21.35595703125, 22.046875]}, "gradients/decoder.transformer.h.3.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 8.0, 14.0, 15.0, 32.0, 22.0, 34.0, 60.0, 95.0, 137.0, 267.0, 867.0, 5125.0, 83674.0, 3889865.0, 202805.0, 9249.0, 1199.0, 368.0, 154.0, 91.0, 53.0, 33.0, 32.0, 14.0, 9.0, 13.0, 15.0, 5.0, 9.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-52.0, -50.6533203125, -49.306640625, -47.9599609375, -46.61328125, -45.2666015625, -43.919921875, -42.5732421875, -41.2265625, -39.8798828125, -38.533203125, -37.1865234375, -35.83984375, -34.4931640625, -33.146484375, -31.7998046875, -30.453125, -29.1064453125, -27.759765625, -26.4130859375, -25.06640625, -23.7197265625, -22.373046875, -21.0263671875, -19.6796875, -18.3330078125, -16.986328125, -15.6396484375, -14.29296875, -12.9462890625, -11.599609375, -10.2529296875, -8.90625, -7.5595703125, -6.212890625, -4.8662109375, -3.51953125, -2.1728515625, -0.826171875, 0.5205078125, 1.8671875, 3.2138671875, 4.560546875, 5.9072265625, 7.25390625, 8.6005859375, 9.947265625, 11.2939453125, 12.640625, 13.9873046875, 15.333984375, 16.6806640625, 18.02734375, 19.3740234375, 20.720703125, 22.0673828125, 23.4140625, 24.7607421875, 26.107421875, 27.4541015625, 28.80078125, 30.1474609375, 31.494140625, 32.8408203125, 34.1875]}, "gradients/decoder.transformer.h.3.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 8.0, 39.0, 218.0, 478.0, 230.0, 33.0, 11.0, 2.0], "bins": [-345.7807312011719, -339.97015380859375, -334.1595764160156, -328.3490295410156, -322.5384521484375, -316.7278747558594, -310.91729736328125, -305.10675048828125, -299.2961730957031, -293.485595703125, -287.6750183105469, -281.8644714355469, -276.05389404296875, -270.2433166503906, -264.4327392578125, -258.6221923828125, -252.81161499023438, -247.00103759765625, -241.1904754638672, -235.37989807128906, -229.5693359375, -223.75875854492188, -217.9481964111328, -212.1376190185547, -206.32704162597656, -200.51646423339844, -194.70590209960938, -188.89532470703125, -183.0847625732422, -177.27418518066406, -171.463623046875, -165.65304565429688, -159.84246826171875, -154.03189086914062, -148.22132873535156, -142.41075134277344, -136.60018920898438, -130.78961181640625, -124.97904968261719, -119.16847229003906, -113.35791015625, -107.5473403930664, -101.73677062988281, -95.92620086669922, -90.11563110351562, -84.30506134033203, -78.49449157714844, -72.68391418457031, -66.87335205078125, -61.062782287597656, -55.25221252441406, -49.44164276123047, -43.631072998046875, -37.82050323486328, -32.00992965698242, -26.199359893798828, -20.388790130615234, -14.57822036743164, -8.76764965057373, -2.9570789337158203, 2.8534908294677734, 8.664060592651367, 14.474632263183594, 20.285202026367188, 26.09577178955078]}, "gradients/decoder.transformer.h.3.ln_2.bias": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 4.0, 3.0, 1.0, 3.0, 4.0, 7.0, 5.0, 6.0, 6.0, 12.0, 17.0, 24.0, 21.0, 26.0, 22.0, 29.0, 26.0, 30.0, 31.0, 31.0, 31.0, 36.0, 32.0, 34.0, 36.0, 45.0, 35.0, 44.0, 38.0, 31.0, 43.0, 41.0, 42.0, 28.0, 27.0, 20.0, 17.0, 25.0, 17.0, 11.0, 14.0, 16.0, 10.0, 8.0, 6.0, 5.0, 7.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-28.379791259765625, -27.399221420288086, -26.418649673461914, -25.438079833984375, -24.457508087158203, -23.476938247680664, -22.496368408203125, -21.515796661376953, -20.53522491455078, -19.554655075073242, -18.57408332824707, -17.59351348876953, -16.61294174194336, -15.63237190246582, -14.651801109313965, -13.67123031616211, -12.69066047668457, -11.710089683532715, -10.72951889038086, -9.74894905090332, -8.768377304077148, -7.787806987762451, -6.807236671447754, -5.826665878295898, -4.846095085144043, -3.8655242919921875, -2.884953737258911, -1.9043831825256348, -0.9238123893737793, 0.05675840377807617, 1.0373287200927734, 2.017899513244629, 2.9984703063964844, 3.97904109954834, 4.959611892700195, 5.940182209014893, 6.920753002166748, 7.9013237953186035, 8.8818941116333, 9.862464904785156, 10.843035697937012, 11.823606491088867, 12.804177284240723, 13.784748077392578, 14.765317916870117, 15.745889663696289, 16.726459503173828, 17.70703125, 18.68760108947754, 19.668170928955078, 20.64874267578125, 21.62931251525879, 22.60988426208496, 23.5904541015625, 24.571025848388672, 25.55159568786621, 26.53216552734375, 27.51273536682129, 28.49330711364746, 29.473876953125, 30.454448699951172, 31.43501853942871, 32.41558837890625, 33.39616012573242, 34.376731872558594]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 3.0, 4.0, 4.0, 3.0, 2.0, 3.0, 7.0, 11.0, 4.0, 11.0, 15.0, 12.0, 19.0, 15.0, 15.0, 17.0, 22.0, 18.0, 32.0, 36.0, 31.0, 35.0, 30.0, 26.0, 33.0, 39.0, 37.0, 43.0, 39.0, 33.0, 46.0, 23.0, 30.0, 33.0, 26.0, 28.0, 34.0, 23.0, 17.0, 17.0, 16.0, 8.0, 10.0, 16.0, 19.0, 13.0, 12.0, 6.0, 11.0, 5.0, 1.0, 5.0, 7.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-6.3984375, -6.20733642578125, -6.0162353515625, -5.82513427734375, -5.634033203125, -5.44293212890625, -5.2518310546875, -5.06072998046875, -4.86962890625, -4.67852783203125, -4.4874267578125, -4.29632568359375, -4.105224609375, -3.91412353515625, -3.7230224609375, -3.53192138671875, -3.3408203125, -3.14971923828125, -2.9586181640625, -2.76751708984375, -2.576416015625, -2.38531494140625, -2.1942138671875, -2.00311279296875, -1.81201171875, -1.62091064453125, -1.4298095703125, -1.23870849609375, -1.047607421875, -0.85650634765625, -0.6654052734375, -0.47430419921875, -0.283203125, -0.09210205078125, 0.0989990234375, 0.29010009765625, 0.481201171875, 0.67230224609375, 0.8634033203125, 1.05450439453125, 1.24560546875, 1.43670654296875, 1.6278076171875, 1.81890869140625, 2.010009765625, 2.20111083984375, 2.3922119140625, 2.58331298828125, 2.7744140625, 2.96551513671875, 3.1566162109375, 3.34771728515625, 3.538818359375, 3.72991943359375, 3.9210205078125, 4.11212158203125, 4.30322265625, 4.49432373046875, 4.6854248046875, 4.87652587890625, 5.067626953125, 5.25872802734375, 5.4498291015625, 5.64093017578125, 5.83203125]}, "gradients/decoder.transformer.h.3.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 7.0, 4.0, 11.0, 11.0, 24.0, 29.0, 50.0, 59.0, 84.0, 144.0, 202.0, 302.0, 384.0, 557.0, 807.0, 1143.0, 1553.0, 2290.0, 2963.0, 4222.0, 6016.0, 8360.0, 12112.0, 17771.0, 25971.0, 39328.0, 62482.0, 107723.0, 278132.0, 213352.0, 93003.0, 55736.0, 35787.0, 23694.0, 15904.0, 11290.0, 8000.0, 5489.0, 3977.0, 2769.0, 1954.0, 1370.0, 1001.0, 743.0, 503.0, 359.0, 258.0, 191.0, 153.0, 100.0, 58.0, 53.0, 31.0, 22.0, 13.0, 10.0, 5.0, 2.0, 2.0, 1.0], "bins": [-0.1751708984375, -0.1698436737060547, -0.16451644897460938, -0.15918922424316406, -0.15386199951171875, -0.14853477478027344, -0.14320755004882812, -0.1378803253173828, -0.1325531005859375, -0.1272258758544922, -0.12189865112304688, -0.11657142639160156, -0.11124420166015625, -0.10591697692871094, -0.10058975219726562, -0.09526252746582031, -0.089935302734375, -0.08460807800292969, -0.07928085327148438, -0.07395362854003906, -0.06862640380859375, -0.06329917907714844, -0.057971954345703125, -0.05264472961425781, -0.0473175048828125, -0.04199028015136719, -0.036663055419921875, -0.03133583068847656, -0.02600860595703125, -0.020681381225585938, -0.015354156494140625, -0.010026931762695312, -0.00469970703125, 0.0006275177001953125, 0.005954742431640625, 0.011281967163085938, 0.01660919189453125, 0.021936416625976562, 0.027263641357421875, 0.03259086608886719, 0.0379180908203125, 0.04324531555175781, 0.048572540283203125, 0.05389976501464844, 0.05922698974609375, 0.06455421447753906, 0.06988143920898438, 0.07520866394042969, 0.080535888671875, 0.08586311340332031, 0.09119033813476562, 0.09651756286621094, 0.10184478759765625, 0.10717201232910156, 0.11249923706054688, 0.11782646179199219, 0.1231536865234375, 0.1284809112548828, 0.13380813598632812, 0.13913536071777344, 0.14446258544921875, 0.14978981018066406, 0.15511703491210938, 0.1604442596435547, 0.165771484375]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 5.0, 2.0, 10.0, 7.0, 10.0, 9.0, 11.0, 11.0, 13.0, 17.0, 13.0, 27.0, 25.0, 32.0, 34.0, 41.0, 52.0, 45.0, 49.0, 29.0, 41.0, 1069.0, 34.0, 28.0, 58.0, 36.0, 43.0, 32.0, 37.0, 25.0, 27.0, 21.0, 23.0, 16.0, 17.0, 12.0, 19.0, 8.0, 11.0, 5.0, 6.0, 5.0, 5.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.875, -4.7236328125, -4.572265625, -4.4208984375, -4.26953125, -4.1181640625, -3.966796875, -3.8154296875, -3.6640625, -3.5126953125, -3.361328125, -3.2099609375, -3.05859375, -2.9072265625, -2.755859375, -2.6044921875, -2.453125, -2.3017578125, -2.150390625, -1.9990234375, -1.84765625, -1.6962890625, -1.544921875, -1.3935546875, -1.2421875, -1.0908203125, -0.939453125, -0.7880859375, -0.63671875, -0.4853515625, -0.333984375, -0.1826171875, -0.03125, 0.1201171875, 0.271484375, 0.4228515625, 0.57421875, 0.7255859375, 0.876953125, 1.0283203125, 1.1796875, 1.3310546875, 1.482421875, 1.6337890625, 1.78515625, 1.9365234375, 2.087890625, 2.2392578125, 2.390625, 2.5419921875, 2.693359375, 2.8447265625, 2.99609375, 3.1474609375, 3.298828125, 3.4501953125, 3.6015625, 3.7529296875, 3.904296875, 4.0556640625, 4.20703125, 4.3583984375, 4.509765625, 4.6611328125, 4.8125]}, "gradients/decoder.transformer.h.3.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 4.0, 9.0, 19.0, 25.0, 25.0, 42.0, 56.0, 79.0, 126.0, 202.0, 295.0, 441.0, 675.0, 997.0, 1539.0, 2479.0, 3949.0, 6265.0, 10153.0, 16908.0, 29049.0, 53235.0, 106815.0, 269132.0, 1346361.0, 115131.0, 56501.0, 30486.0, 17438.0, 10615.0, 6513.0, 4145.0, 2591.0, 1705.0, 1061.0, 704.0, 438.0, 307.0, 192.0, 145.0, 95.0, 65.0, 36.0, 29.0, 18.0, 14.0, 6.0, 6.0, 5.0, 6.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.2137451171875, -0.2070751190185547, -0.20040512084960938, -0.19373512268066406, -0.18706512451171875, -0.18039512634277344, -0.17372512817382812, -0.1670551300048828, -0.1603851318359375, -0.1537151336669922, -0.14704513549804688, -0.14037513732910156, -0.13370513916015625, -0.12703514099121094, -0.12036514282226562, -0.11369514465332031, -0.107025146484375, -0.10035514831542969, -0.09368515014648438, -0.08701515197753906, -0.08034515380859375, -0.07367515563964844, -0.06700515747070312, -0.06033515930175781, -0.0536651611328125, -0.04699516296386719, -0.040325164794921875, -0.03365516662597656, -0.02698516845703125, -0.020315170288085938, -0.013645172119140625, -0.0069751739501953125, -0.00030517578125, 0.0063648223876953125, 0.013034820556640625, 0.019704818725585938, 0.02637481689453125, 0.03304481506347656, 0.039714813232421875, 0.04638481140136719, 0.0530548095703125, 0.05972480773925781, 0.06639480590820312, 0.07306480407714844, 0.07973480224609375, 0.08640480041503906, 0.09307479858398438, 0.09974479675292969, 0.106414794921875, 0.11308479309082031, 0.11975479125976562, 0.12642478942871094, 0.13309478759765625, 0.13976478576660156, 0.14643478393554688, 0.1531047821044922, 0.1597747802734375, 0.1664447784423828, 0.17311477661132812, 0.17978477478027344, 0.18645477294921875, 0.19312477111816406, 0.19979476928710938, 0.2064647674560547, 0.213134765625]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 1.0, 4.0, 3.0, 1.0, 4.0, 8.0, 8.0, 9.0, 11.0, 14.0, 15.0, 27.0, 22.0, 35.0, 44.0, 42.0, 59.0, 45.0, 65.0, 76.0, 67.0, 60.0, 60.0, 65.0, 37.0, 29.0, 41.0, 24.0, 23.0, 23.0, 14.0, 12.0, 13.0, 5.0, 5.0, 10.0, 5.0, 2.0, 2.0, 4.0, 2.0, 0.0, 1.0, 5.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1.3828277587890625e-05, -1.3415701687335968e-05, -1.3003125786781311e-05, -1.2590549886226654e-05, -1.2177973985671997e-05, -1.176539808511734e-05, -1.1352822184562683e-05, -1.0940246284008026e-05, -1.0527670383453369e-05, -1.0115094482898712e-05, -9.702518582344055e-06, -9.289942681789398e-06, -8.877366781234741e-06, -8.464790880680084e-06, -8.052214980125427e-06, -7.63963907957077e-06, -7.227063179016113e-06, -6.814487278461456e-06, -6.401911377906799e-06, -5.989335477352142e-06, -5.576759576797485e-06, -5.164183676242828e-06, -4.751607775688171e-06, -4.339031875133514e-06, -3.926455974578857e-06, -3.5138800740242004e-06, -3.1013041734695435e-06, -2.6887282729148865e-06, -2.2761523723602295e-06, -1.8635764718055725e-06, -1.4510005712509155e-06, -1.0384246706962585e-06, -6.258487701416016e-07, -2.1327286958694458e-07, 1.993030309677124e-07, 6.118789315223694e-07, 1.0244548320770264e-06, 1.4370307326316833e-06, 1.8496066331863403e-06, 2.2621825337409973e-06, 2.6747584342956543e-06, 3.0873343348503113e-06, 3.4999102354049683e-06, 3.912486135959625e-06, 4.325062036514282e-06, 4.737637937068939e-06, 5.150213837623596e-06, 5.562789738178253e-06, 5.97536563873291e-06, 6.387941539287567e-06, 6.800517439842224e-06, 7.213093340396881e-06, 7.625669240951538e-06, 8.038245141506195e-06, 8.450821042060852e-06, 8.863396942615509e-06, 9.275972843170166e-06, 9.688548743724823e-06, 1.010112464427948e-05, 1.0513700544834137e-05, 1.0926276445388794e-05, 1.1338852345943451e-05, 1.1751428246498108e-05, 1.2164004147052765e-05, 1.2576580047607422e-05]}, "gradients/decoder.transformer.h.3.crossattention.q_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 7.0, 5.0, 7.0, 3.0, 3.0, 9.0, 3.0, 9.0, 5.0, 11.0, 20.0, 17.0, 26.0, 30.0, 35.0, 52.0, 67.0, 88.0, 123.0, 177.0, 345.0, 730.0, 5451.0, 989581.0, 49239.0, 1181.0, 448.0, 255.0, 147.0, 112.0, 85.0, 76.0, 45.0, 46.0, 15.0, 20.0, 21.0, 14.0, 16.0, 8.0, 1.0, 6.0, 4.0, 7.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00022077560424804688, -0.0002133399248123169, -0.00020590424537658691, -0.00019846856594085693, -0.00019103288650512695, -0.00018359720706939697, -0.000176161527633667, -0.000168725848197937, -0.00016129016876220703, -0.00015385448932647705, -0.00014641880989074707, -0.0001389831304550171, -0.0001315474510192871, -0.00012411177158355713, -0.00011667609214782715, -0.00010924041271209717, -0.00010180473327636719, -9.436905384063721e-05, -8.693337440490723e-05, -7.949769496917725e-05, -7.206201553344727e-05, -6.462633609771729e-05, -5.7190656661987305e-05, -4.9754977226257324e-05, -4.2319297790527344e-05, -3.488361835479736e-05, -2.7447938919067383e-05, -2.0012259483337402e-05, -1.2576580047607422e-05, -5.140900611877441e-06, 2.294778823852539e-06, 9.73045825958252e-06, 1.71661376953125e-05, 2.460181713104248e-05, 3.203749656677246e-05, 3.947317600250244e-05, 4.690885543823242e-05, 5.43445348739624e-05, 6.178021430969238e-05, 6.921589374542236e-05, 7.665157318115234e-05, 8.408725261688232e-05, 9.15229320526123e-05, 9.895861148834229e-05, 0.00010639429092407227, 0.00011382997035980225, 0.00012126564979553223, 0.0001287013292312622, 0.0001361370086669922, 0.00014357268810272217, 0.00015100836753845215, 0.00015844404697418213, 0.0001658797264099121, 0.0001733154058456421, 0.00018075108528137207, 0.00018818676471710205, 0.00019562244415283203, 0.000203058123588562, 0.000210493803024292, 0.00021792948246002197, 0.00022536516189575195, 0.00023280084133148193, 0.00024023652076721191, 0.0002476722002029419, 0.0002551078796386719]}, "gradients/decoder.transformer.h.3.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 6.0, 22.0, 80.0, 359.0, 376.0, 133.0, 33.0, 5.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5574167011654936e-05, -2.466607475071214e-05, -2.3757982489769347e-05, -2.2849890228826553e-05, -2.1941799786873162e-05, -2.1033707525930367e-05, -2.0125615264987573e-05, -1.921752300404478e-05, -1.8309430743101984e-05, -1.740133848215919e-05, -1.6493246221216395e-05, -1.5585155779263005e-05, -1.467706351832021e-05, -1.3768971257377416e-05, -1.2860878996434622e-05, -1.1952786735491827e-05, -1.1044696293538436e-05, -1.0136604032595642e-05, -9.22851268114755e-06, -8.320420420204755e-06, -7.412328614009311e-06, -6.504236807813868e-06, -5.5961445468710735e-06, -4.68805274067563e-06, -3.7799609344801866e-06, -2.871869128284743e-06, -1.963777094715624e-06, -1.0556850611465052e-06, -1.4759325495106168e-07, 7.604985512443818e-07, 1.6685908121871762e-06, 2.5766826183826197e-06, 3.4847726055886596e-06, 4.392864411784103e-06, 5.300956217979547e-06, 6.209048478922341e-06, 7.1171402851177845e-06, 8.025232091313228e-06, 8.933324352256022e-06, 9.841416613198817e-06, 1.074950796464691e-05, 1.1657600225589704e-05, 1.2565691577037796e-05, 1.347378383798059e-05, 1.4381876098923385e-05, 1.5289966540876776e-05, 1.6198060620808974e-05, 1.7106151062762365e-05, 1.801424332370516e-05, 1.8922335584647954e-05, 1.9830427845590748e-05, 2.0738520106533542e-05, 2.1646610548486933e-05, 2.2554702809429727e-05, 2.3462795070372522e-05, 2.4370887331315316e-05, 2.527897959225811e-05, 2.6187071853200905e-05, 2.70951641141437e-05, 2.800325455609709e-05, 2.8911346817039885e-05, 2.981943907798268e-05, 3.072753315791488e-05, 3.163562359986827e-05, 3.254371404182166e-05]}, "gradients/decoder.transformer.h.3.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 0.0, 2.0, 2.0, 5.0, 4.0, 6.0, 7.0, 13.0, 7.0, 6.0, 12.0, 28.0, 16.0, 30.0, 15.0, 29.0, 24.0, 44.0, 32.0, 33.0, 36.0, 48.0, 34.0, 52.0, 44.0, 38.0, 39.0, 25.0, 48.0, 36.0, 34.0, 33.0, 26.0, 25.0, 32.0, 15.0, 20.0, 12.0, 15.0, 7.0, 18.0, 14.0, 7.0, 5.0, 10.0, 1.0, 3.0, 3.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-6.556510925292969e-06, -6.348825991153717e-06, -6.141141057014465e-06, -5.933456122875214e-06, -5.725771188735962e-06, -5.51808625459671e-06, -5.3104013204574585e-06, -5.102716386318207e-06, -4.895031452178955e-06, -4.687346518039703e-06, -4.479661583900452e-06, -4.2719766497612e-06, -4.064291715621948e-06, -3.8566067814826965e-06, -3.648921847343445e-06, -3.441236913204193e-06, -3.2335519790649414e-06, -3.0258670449256897e-06, -2.818182110786438e-06, -2.6104971766471863e-06, -2.4028122425079346e-06, -2.195127308368683e-06, -1.987442374229431e-06, -1.7797574400901794e-06, -1.5720725059509277e-06, -1.364387571811676e-06, -1.1567026376724243e-06, -9.490177035331726e-07, -7.413327693939209e-07, -5.336478352546692e-07, -3.259629011154175e-07, -1.1827796697616577e-07, 8.940696716308594e-08, 2.9709190130233765e-07, 5.047768354415894e-07, 7.124617695808411e-07, 9.201467037200928e-07, 1.1278316378593445e-06, 1.3355165719985962e-06, 1.543201506137848e-06, 1.7508864402770996e-06, 1.9585713744163513e-06, 2.166256308555603e-06, 2.3739412426948547e-06, 2.5816261768341064e-06, 2.789311110973358e-06, 2.99699604511261e-06, 3.2046809792518616e-06, 3.4123659133911133e-06, 3.620050847530365e-06, 3.827735781669617e-06, 4.035420715808868e-06, 4.24310564994812e-06, 4.450790584087372e-06, 4.6584755182266235e-06, 4.866160452365875e-06, 5.073845386505127e-06, 5.281530320644379e-06, 5.48921525478363e-06, 5.696900188922882e-06, 5.904585123062134e-06, 6.1122700572013855e-06, 6.319954991340637e-06, 6.527639925479889e-06, 6.735324859619141e-06]}, "gradients/decoder.transformer.h.3.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 5.0, 1.0, 3.0, 4.0, 4.0, 3.0, 2.0, 3.0, 7.0, 11.0, 4.0, 11.0, 15.0, 12.0, 19.0, 15.0, 15.0, 17.0, 22.0, 18.0, 32.0, 36.0, 31.0, 35.0, 30.0, 26.0, 33.0, 39.0, 37.0, 43.0, 39.0, 33.0, 46.0, 23.0, 30.0, 33.0, 26.0, 28.0, 34.0, 23.0, 17.0, 17.0, 16.0, 8.0, 10.0, 16.0, 19.0, 13.0, 12.0, 6.0, 11.0, 5.0, 1.0, 5.0, 7.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-6.3984375, -6.20733642578125, -6.0162353515625, -5.82513427734375, -5.634033203125, -5.44293212890625, -5.2518310546875, -5.06072998046875, -4.86962890625, -4.67852783203125, -4.4874267578125, -4.29632568359375, -4.105224609375, -3.91412353515625, -3.7230224609375, -3.53192138671875, -3.3408203125, -3.14971923828125, -2.9586181640625, -2.76751708984375, -2.576416015625, -2.38531494140625, -2.1942138671875, -2.00311279296875, -1.81201171875, -1.62091064453125, -1.4298095703125, -1.23870849609375, -1.047607421875, -0.85650634765625, -0.6654052734375, -0.47430419921875, -0.283203125, -0.09210205078125, 0.0989990234375, 0.29010009765625, 0.481201171875, 0.67230224609375, 0.8634033203125, 1.05450439453125, 1.24560546875, 1.43670654296875, 1.6278076171875, 1.81890869140625, 2.010009765625, 2.20111083984375, 2.3922119140625, 2.58331298828125, 2.7744140625, 2.96551513671875, 3.1566162109375, 3.34771728515625, 3.538818359375, 3.72991943359375, 3.9210205078125, 4.11212158203125, 4.30322265625, 4.49432373046875, 4.6854248046875, 4.87652587890625, 5.067626953125, 5.25872802734375, 5.4498291015625, 5.64093017578125, 5.83203125]}, "gradients/decoder.transformer.h.3.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 6.0, 7.0, 6.0, 18.0, 15.0, 19.0, 32.0, 26.0, 37.0, 57.0, 81.0, 90.0, 135.0, 165.0, 223.0, 306.0, 395.0, 607.0, 961.0, 1452.0, 2469.0, 4428.0, 8297.0, 16868.0, 34912.0, 77350.0, 178470.0, 333718.0, 212545.0, 91811.0, 41224.0, 19224.0, 9800.0, 5050.0, 2671.0, 1618.0, 1010.0, 669.0, 441.0, 330.0, 249.0, 178.0, 137.0, 127.0, 83.0, 61.0, 49.0, 34.0, 22.0, 19.0, 19.0, 19.0, 9.0, 9.0, 5.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-8.9921875, -8.6983642578125, -8.404541015625, -8.1107177734375, -7.81689453125, -7.5230712890625, -7.229248046875, -6.9354248046875, -6.6416015625, -6.3477783203125, -6.053955078125, -5.7601318359375, -5.46630859375, -5.1724853515625, -4.878662109375, -4.5848388671875, -4.291015625, -3.9971923828125, -3.703369140625, -3.4095458984375, -3.11572265625, -2.8218994140625, -2.528076171875, -2.2342529296875, -1.9404296875, -1.6466064453125, -1.352783203125, -1.0589599609375, -0.76513671875, -0.4713134765625, -0.177490234375, 0.1163330078125, 0.41015625, 0.7039794921875, 0.997802734375, 1.2916259765625, 1.58544921875, 1.8792724609375, 2.173095703125, 2.4669189453125, 2.7607421875, 3.0545654296875, 3.348388671875, 3.6422119140625, 3.93603515625, 4.2298583984375, 4.523681640625, 4.8175048828125, 5.111328125, 5.4051513671875, 5.698974609375, 5.9927978515625, 6.28662109375, 6.5804443359375, 6.874267578125, 7.1680908203125, 7.4619140625, 7.7557373046875, 8.049560546875, 8.3433837890625, 8.63720703125, 8.9310302734375, 9.224853515625, 9.5186767578125, 9.8125]}, "gradients/decoder.transformer.h.3.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 4.0, 6.0, 5.0, 14.0, 10.0, 14.0, 13.0, 21.0, 33.0, 28.0, 30.0, 44.0, 38.0, 41.0, 64.0, 69.0, 124.0, 278.0, 1470.0, 168.0, 129.0, 72.0, 65.0, 45.0, 42.0, 31.0, 31.0, 29.0, 27.0, 15.0, 16.0, 19.0, 15.0, 10.0, 8.0, 5.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0], "bins": [-17.78125, -17.175048828125, -16.56884765625, -15.962646484375, -15.3564453125, -14.750244140625, -14.14404296875, -13.537841796875, -12.931640625, -12.325439453125, -11.71923828125, -11.113037109375, -10.5068359375, -9.900634765625, -9.29443359375, -8.688232421875, -8.08203125, -7.475830078125, -6.86962890625, -6.263427734375, -5.6572265625, -5.051025390625, -4.44482421875, -3.838623046875, -3.232421875, -2.626220703125, -2.02001953125, -1.413818359375, -0.8076171875, -0.201416015625, 0.40478515625, 1.010986328125, 1.6171875, 2.223388671875, 2.82958984375, 3.435791015625, 4.0419921875, 4.648193359375, 5.25439453125, 5.860595703125, 6.466796875, 7.072998046875, 7.67919921875, 8.285400390625, 8.8916015625, 9.497802734375, 10.10400390625, 10.710205078125, 11.31640625, 11.922607421875, 12.52880859375, 13.135009765625, 13.7412109375, 14.347412109375, 14.95361328125, 15.559814453125, 16.166015625, 16.772216796875, 17.37841796875, 17.984619140625, 18.5908203125, 19.197021484375, 19.80322265625, 20.409423828125, 21.015625]}, "gradients/decoder.transformer.h.3.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 6.0, 3.0, 4.0, 5.0, 15.0, 14.0, 11.0, 23.0, 27.0, 39.0, 48.0, 76.0, 141.0, 249.0, 694.0, 2370.0, 2992054.0, 147392.0, 1439.0, 521.0, 246.0, 98.0, 69.0, 47.0, 22.0, 28.0, 15.0, 14.0, 13.0, 7.0, 10.0, 5.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-117.1875, -113.9189453125, -110.650390625, -107.3818359375, -104.11328125, -100.8447265625, -97.576171875, -94.3076171875, -91.0390625, -87.7705078125, -84.501953125, -81.2333984375, -77.96484375, -74.6962890625, -71.427734375, -68.1591796875, -64.890625, -61.6220703125, -58.353515625, -55.0849609375, -51.81640625, -48.5478515625, -45.279296875, -42.0107421875, -38.7421875, -35.4736328125, -32.205078125, -28.9365234375, -25.66796875, -22.3994140625, -19.130859375, -15.8623046875, -12.59375, -9.3251953125, -6.056640625, -2.7880859375, 0.48046875, 3.7490234375, 7.017578125, 10.2861328125, 13.5546875, 16.8232421875, 20.091796875, 23.3603515625, 26.62890625, 29.8974609375, 33.166015625, 36.4345703125, 39.703125, 42.9716796875, 46.240234375, 49.5087890625, 52.77734375, 56.0458984375, 59.314453125, 62.5830078125, 65.8515625, 69.1201171875, 72.388671875, 75.6572265625, 78.92578125, 82.1943359375, 85.462890625, 88.7314453125, 92.0]}, "gradients/decoder.transformer.h.3.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 7.0, 23.0, 86.0, 285.0, 370.0, 182.0, 53.0, 8.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-79.02015686035156, -74.16109466552734, -69.30203247070312, -64.4429702758789, -59.58390426635742, -54.7248420715332, -49.86577606201172, -45.0067138671875, -40.14765167236328, -35.28858947753906, -30.42952537536621, -25.57046127319336, -20.71139907836914, -15.852336883544922, -10.99327278137207, -6.134208679199219, -1.275146484375, 3.583916664123535, 8.44297981262207, 13.302042961120605, 18.16110610961914, 23.02016830444336, 27.87923240661621, 32.73829650878906, 37.59735870361328, 42.4564208984375, 47.31548309326172, 52.1745491027832, 57.03361129760742, 61.89267349243164, 66.75173950195312, 71.61080169677734, 76.46987915039062, 81.32894134521484, 86.18800354003906, 91.04706573486328, 95.9061279296875, 100.76519775390625, 105.62425994873047, 110.48332214355469, 115.3423843383789, 120.20144653320312, 125.06050872802734, 129.91957092285156, 134.7786407470703, 139.6376953125, 144.49676513671875, 149.3558349609375, 154.2148895263672, 159.07395935058594, 163.93301391601562, 168.79208374023438, 173.65113830566406, 178.5102081298828, 183.3692626953125, 188.22833251953125, 193.08740234375, 197.94647216796875, 202.80552673339844, 207.6645965576172, 212.52365112304688, 217.38272094726562, 222.2417755126953, 227.10084533691406, 231.95989990234375]}, "gradients/decoder.transformer.h.3.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 2.0, 5.0, 4.0, 6.0, 9.0, 11.0, 7.0, 9.0, 6.0, 14.0, 12.0, 12.0, 10.0, 22.0, 29.0, 23.0, 33.0, 21.0, 26.0, 30.0, 32.0, 32.0, 36.0, 37.0, 40.0, 31.0, 44.0, 40.0, 54.0, 31.0, 24.0, 33.0, 35.0, 30.0, 21.0, 26.0, 27.0, 26.0, 23.0, 19.0, 14.0, 11.0, 4.0, 5.0, 9.0, 6.0, 6.0, 7.0, 5.0, 6.0, 4.0, 3.0, 2.0, 2.0], "bins": [-52.59074401855469, -51.17850875854492, -49.766273498535156, -48.35403823852539, -46.941802978515625, -45.52956771850586, -44.117332458496094, -42.70509719848633, -41.29286193847656, -39.8806266784668, -38.46839141845703, -37.056156158447266, -35.6439208984375, -34.231685638427734, -32.81945037841797, -31.407215118408203, -29.994979858398438, -28.582744598388672, -27.170509338378906, -25.75827407836914, -24.346038818359375, -22.93380355834961, -21.521568298339844, -20.109333038330078, -18.697097778320312, -17.284862518310547, -15.872627258300781, -14.460391998291016, -13.04815673828125, -11.635921478271484, -10.223686218261719, -8.811450958251953, -7.3992156982421875, -5.986980438232422, -4.574745178222656, -3.1625099182128906, -1.750274658203125, -0.3380393981933594, 1.0741958618164062, 2.486431121826172, 3.8986663818359375, 5.310901641845703, 6.723136901855469, 8.135372161865234, 9.547607421875, 10.959842681884766, 12.372077941894531, 13.784313201904297, 15.196548461914062, 16.608783721923828, 18.021018981933594, 19.43325424194336, 20.845489501953125, 22.25772476196289, 23.669960021972656, 25.082195281982422, 26.494430541992188, 27.906665802001953, 29.31890106201172, 30.731136322021484, 32.14337158203125, 33.555606842041016, 34.96784210205078, 36.38007736206055, 37.79231262207031]}, "gradients/decoder.transformer.h.2.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 4.0, 4.0, 2.0, 3.0, 3.0, 2.0, 9.0, 5.0, 12.0, 9.0, 12.0, 13.0, 18.0, 20.0, 15.0, 20.0, 23.0, 25.0, 29.0, 26.0, 28.0, 26.0, 32.0, 39.0, 35.0, 42.0, 27.0, 45.0, 43.0, 37.0, 36.0, 31.0, 25.0, 25.0, 34.0, 30.0, 36.0, 23.0, 19.0, 11.0, 17.0, 22.0, 13.0, 11.0, 11.0, 8.0, 10.0, 7.0, 6.0, 8.0, 4.0, 5.0, 6.0, 6.0, 1.0, 1.0, 2.0], "bins": [-6.90625, -6.705322265625, -6.50439453125, -6.303466796875, -6.1025390625, -5.901611328125, -5.70068359375, -5.499755859375, -5.298828125, -5.097900390625, -4.89697265625, -4.696044921875, -4.4951171875, -4.294189453125, -4.09326171875, -3.892333984375, -3.69140625, -3.490478515625, -3.28955078125, -3.088623046875, -2.8876953125, -2.686767578125, -2.48583984375, -2.284912109375, -2.083984375, -1.883056640625, -1.68212890625, -1.481201171875, -1.2802734375, -1.079345703125, -0.87841796875, -0.677490234375, -0.4765625, -0.275634765625, -0.07470703125, 0.126220703125, 0.3271484375, 0.528076171875, 0.72900390625, 0.929931640625, 1.130859375, 1.331787109375, 1.53271484375, 1.733642578125, 1.9345703125, 2.135498046875, 2.33642578125, 2.537353515625, 2.73828125, 2.939208984375, 3.14013671875, 3.341064453125, 3.5419921875, 3.742919921875, 3.94384765625, 4.144775390625, 4.345703125, 4.546630859375, 4.74755859375, 4.948486328125, 5.1494140625, 5.350341796875, 5.55126953125, 5.752197265625, 5.953125]}, "gradients/decoder.transformer.h.2.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 7.0, 6.0, 9.0, 14.0, 15.0, 20.0, 25.0, 35.0, 58.0, 72.0, 109.0, 143.0, 251.0, 291.0, 459.0, 738.0, 1113.0, 1883.0, 3135.0, 5897.0, 11761.0, 27971.0, 121836.0, 763944.0, 2167736.0, 884996.0, 142670.0, 31607.0, 12490.0, 6202.0, 3445.0, 1862.0, 1205.0, 713.0, 510.0, 308.0, 216.0, 135.0, 124.0, 73.0, 61.0, 37.0, 36.0, 28.0, 16.0, 11.0, 10.0, 2.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-12.6328125, -12.2562255859375, -11.879638671875, -11.5030517578125, -11.12646484375, -10.7498779296875, -10.373291015625, -9.9967041015625, -9.6201171875, -9.2435302734375, -8.866943359375, -8.4903564453125, -8.11376953125, -7.7371826171875, -7.360595703125, -6.9840087890625, -6.607421875, -6.2308349609375, -5.854248046875, -5.4776611328125, -5.10107421875, -4.7244873046875, -4.347900390625, -3.9713134765625, -3.5947265625, -3.2181396484375, -2.841552734375, -2.4649658203125, -2.08837890625, -1.7117919921875, -1.335205078125, -0.9586181640625, -0.58203125, -0.2054443359375, 0.171142578125, 0.5477294921875, 0.92431640625, 1.3009033203125, 1.677490234375, 2.0540771484375, 2.4306640625, 2.8072509765625, 3.183837890625, 3.5604248046875, 3.93701171875, 4.3135986328125, 4.690185546875, 5.0667724609375, 5.443359375, 5.8199462890625, 6.196533203125, 6.5731201171875, 6.94970703125, 7.3262939453125, 7.702880859375, 8.0794677734375, 8.4560546875, 8.8326416015625, 9.209228515625, 9.5858154296875, 9.96240234375, 10.3389892578125, 10.715576171875, 11.0921630859375, 11.46875]}, "gradients/decoder.transformer.h.2.mlp.c_fc.bias": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 10.0, 1.0, 3.0, 7.0, 6.0, 10.0, 24.0, 26.0, 46.0, 64.0, 113.0, 165.0, 259.0, 407.0, 660.0, 677.0, 526.0, 350.0, 211.0, 164.0, 96.0, 64.0, 54.0, 25.0, 25.0, 15.0, 19.0, 7.0, 7.0, 8.0, 1.0, 7.0, 6.0, 2.0, 3.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.671875, -11.20849609375, -10.7451171875, -10.28173828125, -9.818359375, -9.35498046875, -8.8916015625, -8.42822265625, -7.96484375, -7.50146484375, -7.0380859375, -6.57470703125, -6.111328125, -5.64794921875, -5.1845703125, -4.72119140625, -4.2578125, -3.79443359375, -3.3310546875, -2.86767578125, -2.404296875, -1.94091796875, -1.4775390625, -1.01416015625, -0.55078125, -0.08740234375, 0.3759765625, 0.83935546875, 1.302734375, 1.76611328125, 2.2294921875, 2.69287109375, 3.15625, 3.61962890625, 4.0830078125, 4.54638671875, 5.009765625, 5.47314453125, 5.9365234375, 6.39990234375, 6.86328125, 7.32666015625, 7.7900390625, 8.25341796875, 8.716796875, 9.18017578125, 9.6435546875, 10.10693359375, 10.5703125, 11.03369140625, 11.4970703125, 11.96044921875, 12.423828125, 12.88720703125, 13.3505859375, 13.81396484375, 14.27734375, 14.74072265625, 15.2041015625, 15.66748046875, 16.130859375, 16.59423828125, 17.0576171875, 17.52099609375, 17.984375]}, "gradients/decoder.transformer.h.2.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 7.0, 9.0, 9.0, 9.0, 12.0, 23.0, 32.0, 42.0, 61.0, 113.0, 172.0, 343.0, 621.0, 1257.0, 2862.0, 8541.0, 39679.0, 465347.0, 3419119.0, 219376.0, 25606.0, 6622.0, 2224.0, 1021.0, 503.0, 258.0, 147.0, 90.0, 53.0, 35.0, 28.0, 18.0, 10.0, 8.0, 7.0, 2.0, 5.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0], "bins": [-28.125, -27.351806640625, -26.57861328125, -25.805419921875, -25.0322265625, -24.259033203125, -23.48583984375, -22.712646484375, -21.939453125, -21.166259765625, -20.39306640625, -19.619873046875, -18.8466796875, -18.073486328125, -17.30029296875, -16.527099609375, -15.75390625, -14.980712890625, -14.20751953125, -13.434326171875, -12.6611328125, -11.887939453125, -11.11474609375, -10.341552734375, -9.568359375, -8.795166015625, -8.02197265625, -7.248779296875, -6.4755859375, -5.702392578125, -4.92919921875, -4.156005859375, -3.3828125, -2.609619140625, -1.83642578125, -1.063232421875, -0.2900390625, 0.483154296875, 1.25634765625, 2.029541015625, 2.802734375, 3.575927734375, 4.34912109375, 5.122314453125, 5.8955078125, 6.668701171875, 7.44189453125, 8.215087890625, 8.98828125, 9.761474609375, 10.53466796875, 11.307861328125, 12.0810546875, 12.854248046875, 13.62744140625, 14.400634765625, 15.173828125, 15.947021484375, 16.72021484375, 17.493408203125, 18.2666015625, 19.039794921875, 19.81298828125, 20.586181640625, 21.359375]}, "gradients/decoder.transformer.h.2.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 56.0, 955.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-639.6530151367188, -610.9865112304688, -582.320068359375, -553.653564453125, -524.987060546875, -496.3205871582031, -467.65411376953125, -438.98760986328125, -410.3211364746094, -381.6546630859375, -352.9881591796875, -324.3216857910156, -295.65521240234375, -266.98870849609375, -238.32223510742188, -209.65574645996094, -180.9892578125, -152.32276916503906, -123.65628814697266, -94.98980712890625, -66.32331848144531, -37.656829833984375, -8.9903564453125, 19.676132202148438, 48.342620849609375, 77.00910949707031, 105.67559051513672, 134.34207153320312, 163.00856018066406, 191.675048828125, 220.34152221679688, 249.0080108642578, 277.674560546875, 306.3410339355469, 335.0075378417969, 363.67401123046875, 392.34051513671875, 421.0069885253906, 449.6734619140625, 478.3399658203125, 507.0064392089844, 535.6729125976562, 564.3394165039062, 593.005859375, 621.67236328125, 650.3388671875, 679.00537109375, 707.6718139648438, 736.3383178710938, 765.0048217773438, 793.6712646484375, 822.3377685546875, 851.0042724609375, 879.6707763671875, 908.3372192382812, 937.0037231445312, 965.670166015625, 994.336669921875, 1023.0031127929688, 1051.669677734375, 1080.3360595703125, 1109.0025634765625, 1137.6690673828125, 1166.3355712890625, 1195.0020751953125]}, "gradients/decoder.transformer.h.2.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0, 6.0, 6.0, 7.0, 13.0, 13.0, 26.0, 20.0, 30.0, 40.0, 36.0, 58.0, 51.0, 42.0, 53.0, 47.0, 53.0, 67.0, 55.0, 47.0, 49.0, 44.0, 44.0, 33.0, 30.0, 31.0, 31.0, 20.0, 15.0, 12.0, 12.0, 4.0, 5.0, 5.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-67.86349487304688, -65.79096221923828, -63.71843338012695, -61.645904541015625, -59.57337188720703, -57.50083923339844, -55.42831039428711, -53.35578155517578, -51.28324890136719, -49.210716247558594, -47.138187408447266, -45.06565856933594, -42.993125915527344, -40.92059326171875, -38.84806442260742, -36.775535583496094, -34.7030029296875, -32.630470275878906, -30.557941436767578, -28.485410690307617, -26.412879943847656, -24.340349197387695, -22.267818450927734, -20.195287704467773, -18.122756958007812, -16.05022621154785, -13.97769546508789, -11.90516471862793, -9.832633972167969, -7.760103225708008, -5.687572479248047, -3.615041732788086, -1.5425033569335938, 0.5300273895263672, 2.602558135986328, 4.675088882446289, 6.74761962890625, 8.820150375366211, 10.892681121826172, 12.965211868286133, 15.037742614746094, 17.110273361206055, 19.182804107666016, 21.255334854125977, 23.327865600585938, 25.4003963470459, 27.47292709350586, 29.54545783996582, 31.61798858642578, 33.690521240234375, 35.7630500793457, 37.83557891845703, 39.908111572265625, 41.98064422607422, 44.05317306518555, 46.125701904296875, 48.19823455810547, 50.27076721191406, 52.34329605102539, 54.41582489013672, 56.48835754394531, 58.560890197753906, 60.633419036865234, 62.70594787597656, 64.77848052978516]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 2.0, 5.0, 5.0, 6.0, 4.0, 14.0, 13.0, 11.0, 16.0, 11.0, 11.0, 14.0, 25.0, 22.0, 18.0, 27.0, 25.0, 37.0, 36.0, 34.0, 37.0, 37.0, 41.0, 52.0, 48.0, 37.0, 31.0, 43.0, 42.0, 38.0, 29.0, 29.0, 26.0, 26.0, 35.0, 18.0, 12.0, 17.0, 16.0, 9.0, 6.0, 5.0, 12.0, 10.0, 3.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.890625, -6.6798095703125, -6.468994140625, -6.2581787109375, -6.04736328125, -5.8365478515625, -5.625732421875, -5.4149169921875, -5.2041015625, -4.9932861328125, -4.782470703125, -4.5716552734375, -4.36083984375, -4.1500244140625, -3.939208984375, -3.7283935546875, -3.517578125, -3.3067626953125, -3.095947265625, -2.8851318359375, -2.67431640625, -2.4635009765625, -2.252685546875, -2.0418701171875, -1.8310546875, -1.6202392578125, -1.409423828125, -1.1986083984375, -0.98779296875, -0.7769775390625, -0.566162109375, -0.3553466796875, -0.14453125, 0.0662841796875, 0.277099609375, 0.4879150390625, 0.69873046875, 0.9095458984375, 1.120361328125, 1.3311767578125, 1.5419921875, 1.7528076171875, 1.963623046875, 2.1744384765625, 2.38525390625, 2.5960693359375, 2.806884765625, 3.0177001953125, 3.228515625, 3.4393310546875, 3.650146484375, 3.8609619140625, 4.07177734375, 4.2825927734375, 4.493408203125, 4.7042236328125, 4.9150390625, 5.1258544921875, 5.336669921875, 5.5474853515625, 5.75830078125, 5.9691162109375, 6.179931640625, 6.3907470703125, 6.6015625]}, "gradients/decoder.transformer.h.2.crossattention.c_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 6.0, 11.0, 8.0, 15.0, 15.0, 23.0, 32.0, 58.0, 86.0, 104.0, 175.0, 215.0, 297.0, 431.0, 647.0, 922.0, 1372.0, 1866.0, 2883.0, 4293.0, 6117.0, 9229.0, 13838.0, 20763.0, 31567.0, 49911.0, 84399.0, 160341.0, 323339.0, 131749.0, 73799.0, 44947.0, 28466.0, 18436.0, 12477.0, 8166.0, 5506.0, 3771.0, 2614.0, 1785.0, 1198.0, 791.0, 549.0, 384.0, 308.0, 199.0, 143.0, 107.0, 65.0, 51.0, 25.0, 15.0, 15.0, 10.0, 12.0, 7.0, 4.0, 3.0, 0.0, 1.0, 3.0], "bins": [-0.1856689453125, -0.1797504425048828, -0.17383193969726562, -0.16791343688964844, -0.16199493408203125, -0.15607643127441406, -0.15015792846679688, -0.1442394256591797, -0.1383209228515625, -0.1324024200439453, -0.12648391723632812, -0.12056541442871094, -0.11464691162109375, -0.10872840881347656, -0.10280990600585938, -0.09689140319824219, -0.090972900390625, -0.08505439758300781, -0.07913589477539062, -0.07321739196777344, -0.06729888916015625, -0.06138038635253906, -0.055461883544921875, -0.04954338073730469, -0.0436248779296875, -0.03770637512207031, -0.031787872314453125, -0.025869369506835938, -0.01995086669921875, -0.014032363891601562, -0.008113861083984375, -0.0021953582763671875, 0.00372314453125, 0.009641647338867188, 0.015560150146484375, 0.021478652954101562, 0.02739715576171875, 0.03331565856933594, 0.039234161376953125, 0.04515266418457031, 0.0510711669921875, 0.05698966979980469, 0.06290817260742188, 0.06882667541503906, 0.07474517822265625, 0.08066368103027344, 0.08658218383789062, 0.09250068664550781, 0.098419189453125, 0.10433769226074219, 0.11025619506835938, 0.11617469787597656, 0.12209320068359375, 0.12801170349121094, 0.13393020629882812, 0.1398487091064453, 0.1457672119140625, 0.1516857147216797, 0.15760421752929688, 0.16352272033691406, 0.16944122314453125, 0.17535972595214844, 0.18127822875976562, 0.1871967315673828, 0.193115234375]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 5.0, 5.0, 5.0, 6.0, 9.0, 7.0, 16.0, 20.0, 23.0, 21.0, 27.0, 30.0, 30.0, 40.0, 41.0, 35.0, 46.0, 33.0, 50.0, 1082.0, 57.0, 59.0, 50.0, 45.0, 28.0, 38.0, 27.0, 20.0, 36.0, 22.0, 23.0, 21.0, 17.0, 15.0, 4.0, 6.0, 11.0, 5.0, 5.0, 7.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.60546875, -5.4351806640625, -5.264892578125, -5.0946044921875, -4.92431640625, -4.7540283203125, -4.583740234375, -4.4134521484375, -4.2431640625, -4.0728759765625, -3.902587890625, -3.7322998046875, -3.56201171875, -3.3917236328125, -3.221435546875, -3.0511474609375, -2.880859375, -2.7105712890625, -2.540283203125, -2.3699951171875, -2.19970703125, -2.0294189453125, -1.859130859375, -1.6888427734375, -1.5185546875, -1.3482666015625, -1.177978515625, -1.0076904296875, -0.83740234375, -0.6671142578125, -0.496826171875, -0.3265380859375, -0.15625, 0.0140380859375, 0.184326171875, 0.3546142578125, 0.52490234375, 0.6951904296875, 0.865478515625, 1.0357666015625, 1.2060546875, 1.3763427734375, 1.546630859375, 1.7169189453125, 1.88720703125, 2.0574951171875, 2.227783203125, 2.3980712890625, 2.568359375, 2.7386474609375, 2.908935546875, 3.0792236328125, 3.24951171875, 3.4197998046875, 3.590087890625, 3.7603759765625, 3.9306640625, 4.1009521484375, 4.271240234375, 4.4415283203125, 4.61181640625, 4.7821044921875, 4.952392578125, 5.1226806640625, 5.29296875]}, "gradients/decoder.transformer.h.2.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 7.0, 7.0, 11.0, 13.0, 13.0, 25.0, 47.0, 90.0, 109.0, 176.0, 269.0, 393.0, 671.0, 1109.0, 1847.0, 3030.0, 5232.0, 8986.0, 15758.0, 28318.0, 52913.0, 108242.0, 1385285.0, 276284.0, 98267.0, 48671.0, 26235.0, 14627.0, 8336.0, 4847.0, 2801.0, 1727.0, 1040.0, 631.0, 411.0, 253.0, 172.0, 79.0, 72.0, 39.0, 33.0, 20.0, 15.0, 14.0, 7.0, 0.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.2474365234375, -0.23992347717285156, -0.23241043090820312, -0.2248973846435547, -0.21738433837890625, -0.2098712921142578, -0.20235824584960938, -0.19484519958496094, -0.1873321533203125, -0.17981910705566406, -0.17230606079101562, -0.1647930145263672, -0.15727996826171875, -0.1497669219970703, -0.14225387573242188, -0.13474082946777344, -0.127227783203125, -0.11971473693847656, -0.11220169067382812, -0.10468864440917969, -0.09717559814453125, -0.08966255187988281, -0.08214950561523438, -0.07463645935058594, -0.0671234130859375, -0.05961036682128906, -0.052097320556640625, -0.04458427429199219, -0.03707122802734375, -0.029558181762695312, -0.022045135498046875, -0.014532089233398438, -0.00701904296875, 0.0004940032958984375, 0.008007049560546875, 0.015520095825195312, 0.02303314208984375, 0.030546188354492188, 0.038059234619140625, 0.04557228088378906, 0.0530853271484375, 0.06059837341308594, 0.06811141967773438, 0.07562446594238281, 0.08313751220703125, 0.09065055847167969, 0.09816360473632812, 0.10567665100097656, 0.113189697265625, 0.12070274353027344, 0.12821578979492188, 0.1357288360595703, 0.14324188232421875, 0.1507549285888672, 0.15826797485351562, 0.16578102111816406, 0.1732940673828125, 0.18080711364746094, 0.18832015991210938, 0.1958332061767578, 0.20334625244140625, 0.2108592987060547, 0.21837234497070312, 0.22588539123535156, 0.2333984375]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 4.0, 3.0, 6.0, 7.0, 2.0, 3.0, 6.0, 4.0, 16.0, 11.0, 20.0, 27.0, 21.0, 25.0, 32.0, 49.0, 38.0, 35.0, 48.0, 52.0, 56.0, 59.0, 52.0, 50.0, 48.0, 38.0, 46.0, 44.0, 22.0, 35.0, 28.0, 27.0, 18.0, 13.0, 11.0, 5.0, 11.0, 9.0, 8.0, 6.0, 4.0, 1.0, 4.0, 0.0, 3.0, 1.0, 0.0, 1.0, 3.0], "bins": [-1.3649463653564453e-05, -1.3277865946292877e-05, -1.2906268239021301e-05, -1.2534670531749725e-05, -1.216307282447815e-05, -1.1791475117206573e-05, -1.1419877409934998e-05, -1.1048279702663422e-05, -1.0676681995391846e-05, -1.030508428812027e-05, -9.933486580848694e-06, -9.561888873577118e-06, -9.190291166305542e-06, -8.818693459033966e-06, -8.44709575176239e-06, -8.075498044490814e-06, -7.703900337219238e-06, -7.332302629947662e-06, -6.9607049226760864e-06, -6.5891072154045105e-06, -6.2175095081329346e-06, -5.845911800861359e-06, -5.474314093589783e-06, -5.102716386318207e-06, -4.731118679046631e-06, -4.359520971775055e-06, -3.987923264503479e-06, -3.616325557231903e-06, -3.244727849960327e-06, -2.8731301426887512e-06, -2.5015324354171753e-06, -2.1299347281455994e-06, -1.7583370208740234e-06, -1.3867393136024475e-06, -1.0151416063308716e-06, -6.435438990592957e-07, -2.7194619178771973e-07, 9.96515154838562e-08, 4.7124922275543213e-07, 8.428469300270081e-07, 1.214444637298584e-06, 1.58604234457016e-06, 1.957640051841736e-06, 2.3292377591133118e-06, 2.7008354663848877e-06, 3.0724331736564636e-06, 3.4440308809280396e-06, 3.8156285881996155e-06, 4.187226295471191e-06, 4.558824002742767e-06, 4.930421710014343e-06, 5.302019417285919e-06, 5.673617124557495e-06, 6.045214831829071e-06, 6.416812539100647e-06, 6.788410246372223e-06, 7.160007953643799e-06, 7.531605660915375e-06, 7.90320336818695e-06, 8.274801075458527e-06, 8.646398782730103e-06, 9.017996490001678e-06, 9.389594197273254e-06, 9.76119190454483e-06, 1.0132789611816406e-05]}, "gradients/decoder.transformer.h.2.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 4.0, 0.0, 0.0, 4.0, 2.0, 4.0, 5.0, 8.0, 7.0, 17.0, 11.0, 13.0, 11.0, 23.0, 31.0, 36.0, 49.0, 68.0, 76.0, 105.0, 148.0, 238.0, 421.0, 894.0, 13278.0, 1011583.0, 19264.0, 947.0, 444.0, 229.0, 154.0, 122.0, 72.0, 66.0, 58.0, 35.0, 27.0, 25.0, 16.0, 14.0, 14.0, 4.0, 9.0, 8.0, 6.0, 9.0, 4.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001938343048095703, -0.0001865290105342865, -0.00017922371625900269, -0.00017191842198371887, -0.00016461312770843506, -0.00015730783343315125, -0.00015000253915786743, -0.00014269724488258362, -0.0001353919506072998, -0.000128086656332016, -0.00012078136205673218, -0.00011347606778144836, -0.00010617077350616455, -9.886547923088074e-05, -9.156018495559692e-05, -8.425489068031311e-05, -7.69495964050293e-05, -6.964430212974548e-05, -6.233900785446167e-05, -5.5033713579177856e-05, -4.772841930389404e-05, -4.042312502861023e-05, -3.3117830753326416e-05, -2.5812536478042603e-05, -1.850724220275879e-05, -1.1201947927474976e-05, -3.896653652191162e-06, 3.4086406230926514e-06, 1.0713934898376465e-05, 1.801922917366028e-05, 2.5324523448944092e-05, 3.2629817724227905e-05, 3.993511199951172e-05, 4.724040627479553e-05, 5.4545700550079346e-05, 6.185099482536316e-05, 6.915628910064697e-05, 7.646158337593079e-05, 8.37668776512146e-05, 9.107217192649841e-05, 9.837746620178223e-05, 0.00010568276047706604, 0.00011298805475234985, 0.00012029334902763367, 0.00012759864330291748, 0.0001349039375782013, 0.0001422092318534851, 0.00014951452612876892, 0.00015681982040405273, 0.00016412511467933655, 0.00017143040895462036, 0.00017873570322990417, 0.000186040997505188, 0.0001933462917804718, 0.00020065158605575562, 0.00020795688033103943, 0.00021526217460632324, 0.00022256746888160706, 0.00022987276315689087, 0.00023717805743217468, 0.0002444833517074585, 0.0002517886459827423, 0.0002590939402580261, 0.00026639923453330994, 0.00027370452880859375]}, "gradients/decoder.transformer.h.2.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 37.0, 205.0, 597.0, 159.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8083050235873088e-05, -1.6736687030061148e-05, -1.5390323824249208e-05, -1.404396152793197e-05, -1.269759832212003e-05, -1.135123511630809e-05, -1.0004872819990851e-05, -8.658509614178911e-06, -7.312146408366971e-06, -5.965783202555031e-06, -4.619420451490441e-06, -3.273057700425852e-06, -1.926694494613912e-06, -5.803312888019718e-07, 7.660310075152665e-07, 2.1123942133272067e-06, 3.458757419139147e-06, 4.805120624951087e-06, 6.151483376015676e-06, 7.4978461270802654e-06, 8.844209332892206e-06, 1.0190572538704146e-05, 1.1536934835021384e-05, 1.2883298040833324e-05, 1.4229661246645264e-05, 1.5576024452457204e-05, 1.6922387658269145e-05, 1.8268750864081085e-05, 1.961511225090362e-05, 2.0961477275704965e-05, 2.23078386625275e-05, 2.3654201868339442e-05, 2.5000561436172575e-05, 2.6346924641984515e-05, 2.7693287847796455e-05, 2.903964923461899e-05, 3.0386014259420335e-05, 3.173237564624287e-05, 3.307873703306541e-05, 3.442510205786675e-05, 3.5771467082668096e-05, 3.711782846949063e-05, 3.8464193494291976e-05, 3.981055488111451e-05, 4.1156919905915856e-05, 4.250328129273839e-05, 4.384964267956093e-05, 4.519600770436227e-05, 4.654236909118481e-05, 4.7888730478007346e-05, 4.923509550280869e-05, 5.058145688963123e-05, 5.192782191443257e-05, 5.327418330125511e-05, 5.462054832605645e-05, 5.596690971287899e-05, 5.7313271099701524e-05, 5.865963248652406e-05, 6.0005997511325404e-05, 6.135235889814794e-05, 6.269872392294928e-05, 6.404508894775063e-05, 6.539144669659436e-05, 6.67378117213957e-05, 6.808417674619704e-05]}, "gradients/decoder.transformer.h.2.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 6.0, 3.0, 2.0, 4.0, 4.0, 9.0, 10.0, 14.0, 17.0, 13.0, 21.0, 21.0, 26.0, 23.0, 21.0, 27.0, 31.0, 31.0, 23.0, 40.0, 32.0, 54.0, 48.0, 33.0, 51.0, 35.0, 46.0, 38.0, 36.0, 44.0, 26.0, 31.0, 25.0, 23.0, 19.0, 18.0, 16.0, 17.0, 15.0, 5.0, 14.0, 6.0, 11.0, 4.0, 4.0, 3.0, 5.0, 5.0, 1.0, 3.0, 0.0, 1.0, 3.0], "bins": [-7.271766662597656e-06, -7.059425115585327e-06, -6.847083568572998e-06, -6.634742021560669e-06, -6.42240047454834e-06, -6.210058927536011e-06, -5.997717380523682e-06, -5.7853758335113525e-06, -5.5730342864990234e-06, -5.360692739486694e-06, -5.148351192474365e-06, -4.936009645462036e-06, -4.723668098449707e-06, -4.511326551437378e-06, -4.298985004425049e-06, -4.08664345741272e-06, -3.874301910400391e-06, -3.6619603633880615e-06, -3.4496188163757324e-06, -3.2372772693634033e-06, -3.0249357223510742e-06, -2.812594175338745e-06, -2.600252628326416e-06, -2.387911081314087e-06, -2.175569534301758e-06, -1.9632279872894287e-06, -1.7508864402770996e-06, -1.5385448932647705e-06, -1.3262033462524414e-06, -1.1138617992401123e-06, -9.015202522277832e-07, -6.891787052154541e-07, -4.76837158203125e-07, -2.644956111907959e-07, -5.21540641784668e-08, 1.601874828338623e-07, 3.725290298461914e-07, 5.848705768585205e-07, 7.972121238708496e-07, 1.0095536708831787e-06, 1.2218952178955078e-06, 1.434236764907837e-06, 1.646578311920166e-06, 1.8589198589324951e-06, 2.0712614059448242e-06, 2.2836029529571533e-06, 2.4959444999694824e-06, 2.7082860469818115e-06, 2.9206275939941406e-06, 3.1329691410064697e-06, 3.345310688018799e-06, 3.557652235031128e-06, 3.769993782043457e-06, 3.982335329055786e-06, 4.194676876068115e-06, 4.407018423080444e-06, 4.6193599700927734e-06, 4.8317015171051025e-06, 5.044043064117432e-06, 5.256384611129761e-06, 5.46872615814209e-06, 5.681067705154419e-06, 5.893409252166748e-06, 6.105750799179077e-06, 6.318092346191406e-06]}, "gradients/decoder.transformer.h.2.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 2.0, 5.0, 5.0, 6.0, 4.0, 14.0, 13.0, 11.0, 16.0, 11.0, 11.0, 14.0, 25.0, 22.0, 18.0, 27.0, 25.0, 37.0, 36.0, 34.0, 37.0, 37.0, 41.0, 52.0, 48.0, 37.0, 31.0, 43.0, 42.0, 38.0, 29.0, 29.0, 26.0, 26.0, 35.0, 18.0, 12.0, 17.0, 16.0, 9.0, 6.0, 5.0, 12.0, 10.0, 3.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.890625, -6.6798095703125, -6.468994140625, -6.2581787109375, -6.04736328125, -5.8365478515625, -5.625732421875, -5.4149169921875, -5.2041015625, -4.9932861328125, -4.782470703125, -4.5716552734375, -4.36083984375, -4.1500244140625, -3.939208984375, -3.7283935546875, -3.517578125, -3.3067626953125, -3.095947265625, -2.8851318359375, -2.67431640625, -2.4635009765625, -2.252685546875, -2.0418701171875, -1.8310546875, -1.6202392578125, -1.409423828125, -1.1986083984375, -0.98779296875, -0.7769775390625, -0.566162109375, -0.3553466796875, -0.14453125, 0.0662841796875, 0.277099609375, 0.4879150390625, 0.69873046875, 0.9095458984375, 1.120361328125, 1.3311767578125, 1.5419921875, 1.7528076171875, 1.963623046875, 2.1744384765625, 2.38525390625, 2.5960693359375, 2.806884765625, 3.0177001953125, 3.228515625, 3.4393310546875, 3.650146484375, 3.8609619140625, 4.07177734375, 4.2825927734375, 4.493408203125, 4.7042236328125, 4.9150390625, 5.1258544921875, 5.336669921875, 5.5474853515625, 5.75830078125, 5.9691162109375, 6.179931640625, 6.3907470703125, 6.6015625]}, "gradients/decoder.transformer.h.2.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 2.0, 1.0, 8.0, 10.0, 5.0, 13.0, 26.0, 25.0, 32.0, 54.0, 69.0, 83.0, 128.0, 194.0, 287.0, 392.0, 571.0, 906.0, 1362.0, 2340.0, 4494.0, 11509.0, 38887.0, 194030.0, 604385.0, 139534.0, 30075.0, 9312.0, 3894.0, 2115.0, 1280.0, 774.0, 533.0, 358.0, 231.0, 185.0, 132.0, 78.0, 60.0, 54.0, 36.0, 25.0, 17.0, 15.0, 14.0, 9.0, 2.0, 8.0, 5.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-16.09375, -15.564453125, -15.03515625, -14.505859375, -13.9765625, -13.447265625, -12.91796875, -12.388671875, -11.859375, -11.330078125, -10.80078125, -10.271484375, -9.7421875, -9.212890625, -8.68359375, -8.154296875, -7.625, -7.095703125, -6.56640625, -6.037109375, -5.5078125, -4.978515625, -4.44921875, -3.919921875, -3.390625, -2.861328125, -2.33203125, -1.802734375, -1.2734375, -0.744140625, -0.21484375, 0.314453125, 0.84375, 1.373046875, 1.90234375, 2.431640625, 2.9609375, 3.490234375, 4.01953125, 4.548828125, 5.078125, 5.607421875, 6.13671875, 6.666015625, 7.1953125, 7.724609375, 8.25390625, 8.783203125, 9.3125, 9.841796875, 10.37109375, 10.900390625, 11.4296875, 11.958984375, 12.48828125, 13.017578125, 13.546875, 14.076171875, 14.60546875, 15.134765625, 15.6640625, 16.193359375, 16.72265625, 17.251953125, 17.78125]}, "gradients/decoder.transformer.h.2.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 5.0, 3.0, 4.0, 7.0, 7.0, 3.0, 10.0, 11.0, 14.0, 13.0, 17.0, 24.0, 19.0, 26.0, 31.0, 35.0, 30.0, 50.0, 51.0, 64.0, 94.0, 167.0, 1565.0, 231.0, 99.0, 67.0, 54.0, 38.0, 41.0, 43.0, 40.0, 28.0, 20.0, 27.0, 15.0, 18.0, 14.0, 9.0, 14.0, 9.0, 10.0, 4.0, 5.0, 2.0, 3.0, 2.0, 3.0, 5.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0], "bins": [-16.8125, -16.296630859375, -15.78076171875, -15.264892578125, -14.7490234375, -14.233154296875, -13.71728515625, -13.201416015625, -12.685546875, -12.169677734375, -11.65380859375, -11.137939453125, -10.6220703125, -10.106201171875, -9.59033203125, -9.074462890625, -8.55859375, -8.042724609375, -7.52685546875, -7.010986328125, -6.4951171875, -5.979248046875, -5.46337890625, -4.947509765625, -4.431640625, -3.915771484375, -3.39990234375, -2.884033203125, -2.3681640625, -1.852294921875, -1.33642578125, -0.820556640625, -0.3046875, 0.211181640625, 0.72705078125, 1.242919921875, 1.7587890625, 2.274658203125, 2.79052734375, 3.306396484375, 3.822265625, 4.338134765625, 4.85400390625, 5.369873046875, 5.8857421875, 6.401611328125, 6.91748046875, 7.433349609375, 7.94921875, 8.465087890625, 8.98095703125, 9.496826171875, 10.0126953125, 10.528564453125, 11.04443359375, 11.560302734375, 12.076171875, 12.592041015625, 13.10791015625, 13.623779296875, 14.1396484375, 14.655517578125, 15.17138671875, 15.687255859375, 16.203125]}, "gradients/decoder.transformer.h.2.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 4.0, 5.0, 5.0, 2.0, 9.0, 4.0, 3.0, 8.0, 9.0, 13.0, 16.0, 19.0, 23.0, 28.0, 62.0, 68.0, 112.0, 132.0, 241.0, 321.0, 718.0, 1551.0, 50521.0, 3081331.0, 8074.0, 982.0, 507.0, 321.0, 204.0, 111.0, 73.0, 57.0, 35.0, 17.0, 30.0, 22.0, 15.0, 18.0, 9.0, 3.0, 7.0, 6.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-64.25, -62.26220703125, -60.2744140625, -58.28662109375, -56.298828125, -54.31103515625, -52.3232421875, -50.33544921875, -48.34765625, -46.35986328125, -44.3720703125, -42.38427734375, -40.396484375, -38.40869140625, -36.4208984375, -34.43310546875, -32.4453125, -30.45751953125, -28.4697265625, -26.48193359375, -24.494140625, -22.50634765625, -20.5185546875, -18.53076171875, -16.54296875, -14.55517578125, -12.5673828125, -10.57958984375, -8.591796875, -6.60400390625, -4.6162109375, -2.62841796875, -0.640625, 1.34716796875, 3.3349609375, 5.32275390625, 7.310546875, 9.29833984375, 11.2861328125, 13.27392578125, 15.26171875, 17.24951171875, 19.2373046875, 21.22509765625, 23.212890625, 25.20068359375, 27.1884765625, 29.17626953125, 31.1640625, 33.15185546875, 35.1396484375, 37.12744140625, 39.115234375, 41.10302734375, 43.0908203125, 45.07861328125, 47.06640625, 49.05419921875, 51.0419921875, 53.02978515625, 55.017578125, 57.00537109375, 58.9931640625, 60.98095703125, 62.96875]}, "gradients/decoder.transformer.h.2.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 13.0, 53.0, 151.0, 287.0, 267.0, 150.0, 60.0, 19.0, 7.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-110.69971466064453, -107.12454223632812, -103.54936218261719, -99.97418975830078, -96.39900970458984, -92.82383728027344, -89.2486572265625, -85.6734848022461, -82.09831237792969, -78.52313995361328, -74.94795989990234, -71.37278747558594, -67.797607421875, -64.2224349975586, -60.64725875854492, -57.07208251953125, -53.49690246582031, -49.92172622680664, -46.34654998779297, -42.77137756347656, -39.196197509765625, -35.62102508544922, -32.04584884643555, -28.470672607421875, -24.895496368408203, -21.32032012939453, -17.74514389038086, -14.16996955871582, -10.594793319702148, -7.019617080688477, -3.4444427490234375, 0.13073348999023438, 3.705902099609375, 7.281077861785889, 10.856253623962402, 14.431428909301758, 18.00660514831543, 21.5817813873291, 25.15695571899414, 28.732131958007812, 32.307308197021484, 35.882484436035156, 39.45766067504883, 43.0328369140625, 46.608009338378906, 50.183189392089844, 53.75836181640625, 57.33353805541992, 60.908714294433594, 64.48388671875, 68.05906677246094, 71.63423919677734, 75.20941925048828, 78.78459167480469, 82.35977172851562, 85.93494415283203, 89.51011657714844, 93.08528900146484, 96.66046905517578, 100.23564147949219, 103.81082153320312, 107.38599395751953, 110.96116638183594, 114.53634643554688, 118.11152648925781]}, "gradients/decoder.transformer.h.2.ln_1.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 9.0, 4.0, 5.0, 12.0, 6.0, 5.0, 10.0, 18.0, 16.0, 16.0, 19.0, 24.0, 21.0, 14.0, 20.0, 30.0, 29.0, 33.0, 44.0, 29.0, 36.0, 45.0, 34.0, 40.0, 33.0, 42.0, 36.0, 34.0, 25.0, 27.0, 32.0, 24.0, 20.0, 39.0, 21.0, 26.0, 17.0, 17.0, 19.0, 15.0, 8.0, 7.0, 8.0, 7.0, 7.0, 4.0, 5.0, 4.0, 4.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-43.72930908203125, -42.34629821777344, -40.96328353881836, -39.58027267456055, -38.19725799560547, -36.814247131347656, -35.431236267089844, -34.04822540283203, -32.66521072387695, -31.282197952270508, -29.899185180664062, -28.51617431640625, -27.133161544799805, -25.75014877319336, -24.367137908935547, -22.9841251373291, -21.601112365722656, -20.21809959411621, -18.835086822509766, -17.452075958251953, -16.069063186645508, -14.686050415039062, -13.303038597106934, -11.920026779174805, -10.53701400756836, -9.154001235961914, -7.770989418029785, -6.387977123260498, -5.004964828491211, -3.621952533721924, -2.2389402389526367, -0.8559284210205078, 0.5270843505859375, 1.9100966453552246, 3.2931089401245117, 4.676121234893799, 6.059133529663086, 7.442145824432373, 8.82515811920166, 10.208169937133789, 11.591182708740234, 12.97419548034668, 14.357207298278809, 15.740219116210938, 17.123231887817383, 18.506244659423828, 19.88925552368164, 21.272268295288086, 22.65528106689453, 24.038293838500977, 25.421306610107422, 26.804317474365234, 28.18733024597168, 29.570343017578125, 30.953353881835938, 32.33636474609375, 33.71937942504883, 35.10239028930664, 36.48540496826172, 37.86841583251953, 39.251426696777344, 40.63444137573242, 42.017452239990234, 43.40046691894531, 44.783477783203125]}, "gradients/decoder.transformer.h.1.mlp.c_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 2.0, 4.0, 6.0, 7.0, 7.0, 13.0, 14.0, 6.0, 23.0, 13.0, 12.0, 16.0, 19.0, 26.0, 21.0, 32.0, 36.0, 30.0, 38.0, 28.0, 34.0, 42.0, 36.0, 29.0, 46.0, 36.0, 41.0, 35.0, 27.0, 35.0, 28.0, 40.0, 25.0, 22.0, 25.0, 24.0, 19.0, 19.0, 13.0, 14.0, 12.0, 7.0, 9.0, 3.0, 5.0, 6.0, 3.0, 3.0, 6.0, 1.0, 5.0, 0.0, 4.0], "bins": [-7.2421875, -7.03363037109375, -6.8250732421875, -6.61651611328125, -6.407958984375, -6.19940185546875, -5.9908447265625, -5.78228759765625, -5.57373046875, -5.36517333984375, -5.1566162109375, -4.94805908203125, -4.739501953125, -4.53094482421875, -4.3223876953125, -4.11383056640625, -3.9052734375, -3.69671630859375, -3.4881591796875, -3.27960205078125, -3.071044921875, -2.86248779296875, -2.6539306640625, -2.44537353515625, -2.23681640625, -2.02825927734375, -1.8197021484375, -1.61114501953125, -1.402587890625, -1.19403076171875, -0.9854736328125, -0.77691650390625, -0.568359375, -0.35980224609375, -0.1512451171875, 0.05731201171875, 0.265869140625, 0.47442626953125, 0.6829833984375, 0.89154052734375, 1.10009765625, 1.30865478515625, 1.5172119140625, 1.72576904296875, 1.934326171875, 2.14288330078125, 2.3514404296875, 2.55999755859375, 2.7685546875, 2.97711181640625, 3.1856689453125, 3.39422607421875, 3.602783203125, 3.81134033203125, 4.0198974609375, 4.22845458984375, 4.43701171875, 4.64556884765625, 4.8541259765625, 5.06268310546875, 5.271240234375, 5.47979736328125, 5.6883544921875, 5.89691162109375, 6.10546875]}, "gradients/decoder.transformer.h.1.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 7.0, 12.0, 11.0, 16.0, 14.0, 32.0, 34.0, 49.0, 64.0, 97.0, 184.0, 233.0, 371.0, 492.0, 813.0, 1163.0, 1954.0, 3142.0, 5469.0, 10391.0, 23419.0, 88918.0, 527422.0, 2165791.0, 1091237.0, 198949.0, 41373.0, 14872.0, 7309.0, 3994.0, 2314.0, 1416.0, 931.0, 604.0, 378.0, 273.0, 179.0, 111.0, 62.0, 69.0, 40.0, 21.0, 14.0, 15.0, 8.0, 5.0, 7.0, 0.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-11.328125, -10.9710693359375, -10.614013671875, -10.2569580078125, -9.89990234375, -9.5428466796875, -9.185791015625, -8.8287353515625, -8.4716796875, -8.1146240234375, -7.757568359375, -7.4005126953125, -7.04345703125, -6.6864013671875, -6.329345703125, -5.9722900390625, -5.615234375, -5.2581787109375, -4.901123046875, -4.5440673828125, -4.18701171875, -3.8299560546875, -3.472900390625, -3.1158447265625, -2.7587890625, -2.4017333984375, -2.044677734375, -1.6876220703125, -1.33056640625, -0.9735107421875, -0.616455078125, -0.2593994140625, 0.09765625, 0.4547119140625, 0.811767578125, 1.1688232421875, 1.52587890625, 1.8829345703125, 2.239990234375, 2.5970458984375, 2.9541015625, 3.3111572265625, 3.668212890625, 4.0252685546875, 4.38232421875, 4.7393798828125, 5.096435546875, 5.4534912109375, 5.810546875, 6.1676025390625, 6.524658203125, 6.8817138671875, 7.23876953125, 7.5958251953125, 7.952880859375, 8.3099365234375, 8.6669921875, 9.0240478515625, 9.381103515625, 9.7381591796875, 10.09521484375, 10.4522705078125, 10.809326171875, 11.1663818359375, 11.5234375]}, "gradients/decoder.transformer.h.1.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 2.0, 7.0, 6.0, 15.0, 27.0, 55.0, 95.0, 170.0, 344.0, 662.0, 1126.0, 728.0, 367.0, 194.0, 97.0, 71.0, 40.0, 25.0, 13.0, 9.0, 10.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.375, -19.70654296875, -19.0380859375, -18.36962890625, -17.701171875, -17.03271484375, -16.3642578125, -15.69580078125, -15.02734375, -14.35888671875, -13.6904296875, -13.02197265625, -12.353515625, -11.68505859375, -11.0166015625, -10.34814453125, -9.6796875, -9.01123046875, -8.3427734375, -7.67431640625, -7.005859375, -6.33740234375, -5.6689453125, -5.00048828125, -4.33203125, -3.66357421875, -2.9951171875, -2.32666015625, -1.658203125, -0.98974609375, -0.3212890625, 0.34716796875, 1.015625, 1.68408203125, 2.3525390625, 3.02099609375, 3.689453125, 4.35791015625, 5.0263671875, 5.69482421875, 6.36328125, 7.03173828125, 7.7001953125, 8.36865234375, 9.037109375, 9.70556640625, 10.3740234375, 11.04248046875, 11.7109375, 12.37939453125, 13.0478515625, 13.71630859375, 14.384765625, 15.05322265625, 15.7216796875, 16.39013671875, 17.05859375, 17.72705078125, 18.3955078125, 19.06396484375, 19.732421875, 20.40087890625, 21.0693359375, 21.73779296875, 22.40625]}, "gradients/decoder.transformer.h.1.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 3.0, 2.0, 2.0, 11.0, 10.0, 14.0, 31.0, 38.0, 76.0, 132.0, 288.0, 636.0, 1642.0, 6737.0, 88417.0, 3948772.0, 136405.0, 8079.0, 1776.0, 646.0, 290.0, 108.0, 71.0, 25.0, 24.0, 17.0, 10.0, 0.0, 9.0, 7.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-44.90625, -43.56787109375, -42.2294921875, -40.89111328125, -39.552734375, -38.21435546875, -36.8759765625, -35.53759765625, -34.19921875, -32.86083984375, -31.5224609375, -30.18408203125, -28.845703125, -27.50732421875, -26.1689453125, -24.83056640625, -23.4921875, -22.15380859375, -20.8154296875, -19.47705078125, -18.138671875, -16.80029296875, -15.4619140625, -14.12353515625, -12.78515625, -11.44677734375, -10.1083984375, -8.77001953125, -7.431640625, -6.09326171875, -4.7548828125, -3.41650390625, -2.078125, -0.73974609375, 0.5986328125, 1.93701171875, 3.275390625, 4.61376953125, 5.9521484375, 7.29052734375, 8.62890625, 9.96728515625, 11.3056640625, 12.64404296875, 13.982421875, 15.32080078125, 16.6591796875, 17.99755859375, 19.3359375, 20.67431640625, 22.0126953125, 23.35107421875, 24.689453125, 26.02783203125, 27.3662109375, 28.70458984375, 30.04296875, 31.38134765625, 32.7197265625, 34.05810546875, 35.396484375, 36.73486328125, 38.0732421875, 39.41162109375, 40.75]}, "gradients/decoder.transformer.h.1.ln_2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 11.0, 52.0, 223.0, 469.0, 209.0, 33.0, 7.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-167.98924255371094, -161.14984130859375, -154.31045532226562, -147.47105407714844, -140.63165283203125, -133.79226684570312, -126.95286560058594, -120.11347198486328, -113.27407836914062, -106.43468475341797, -99.59529113769531, -92.75588989257812, -85.91649627685547, -79.07710266113281, -72.23770141601562, -65.39830780029297, -58.55891418457031, -51.719520568847656, -44.880123138427734, -38.04072570800781, -31.201332092285156, -24.3619384765625, -17.522541046142578, -10.683143615722656, -3.84375, 2.995645523071289, 9.835041046142578, 16.674436569213867, 23.513832092285156, 30.353225708007812, 37.192623138427734, 44.032020568847656, 50.871429443359375, 57.71082305908203, 64.55021667480469, 71.38961791992188, 78.22901153564453, 85.06840515136719, 91.90780639648438, 98.74720001220703, 105.58659362792969, 112.42598724365234, 119.265380859375, 126.10478210449219, 132.94418334960938, 139.7835693359375, 146.6229705810547, 153.46237182617188, 160.3017578125, 167.1411590576172, 173.9805450439453, 180.8199462890625, 187.65933227539062, 194.4987335205078, 201.338134765625, 208.17752075195312, 215.0169219970703, 221.8563232421875, 228.69570922851562, 235.5351104736328, 242.37451171875, 249.21389770507812, 256.05328369140625, 262.8927001953125, 269.7320861816406]}, "gradients/decoder.transformer.h.1.ln_2.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 0.0, 7.0, 10.0, 9.0, 11.0, 5.0, 12.0, 18.0, 19.0, 17.0, 29.0, 24.0, 25.0, 21.0, 24.0, 20.0, 40.0, 34.0, 38.0, 35.0, 26.0, 49.0, 36.0, 31.0, 42.0, 41.0, 25.0, 40.0, 25.0, 27.0, 28.0, 28.0, 10.0, 18.0, 21.0, 33.0, 24.0, 12.0, 18.0, 18.0, 9.0, 10.0, 6.0, 2.0, 8.0, 6.0, 8.0, 4.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-45.673927307128906, -44.241294860839844, -42.80866241455078, -41.37602996826172, -39.94339370727539, -38.51076126098633, -37.078128814697266, -35.6454963684082, -34.21286392211914, -32.78023147583008, -31.347597122192383, -29.91496467590332, -28.482332229614258, -27.049697875976562, -25.6170654296875, -24.184432983398438, -22.751798629760742, -21.31916618347168, -19.886531829833984, -18.453899383544922, -17.02126693725586, -15.58863353729248, -14.156000137329102, -12.723367691040039, -11.29073429107666, -9.858100891113281, -8.425468444824219, -6.99283504486084, -5.560202121734619, -4.127569198608398, -2.6949357986450195, -1.262303352355957, 0.17033004760742188, 1.6029630899429321, 3.0355961322784424, 4.468229293823242, 5.900862216949463, 7.333495140075684, 8.766128540039062, 10.198760986328125, 11.631394386291504, 13.064027786254883, 14.496660232543945, 15.929293632507324, 17.361927032470703, 18.794559478759766, 20.227191925048828, 21.65982437133789, 23.092458724975586, 24.52509117126465, 25.957725524902344, 27.390357971191406, 28.82299041748047, 30.25562286376953, 31.688257217407227, 33.120887756347656, 34.553524017333984, 35.98615646362305, 37.41878890991211, 38.85142517089844, 40.2840576171875, 41.71669006347656, 43.149322509765625, 44.58195495605469, 46.01458740234375]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 6.0, 5.0, 4.0, 8.0, 9.0, 10.0, 5.0, 8.0, 23.0, 17.0, 22.0, 28.0, 30.0, 30.0, 34.0, 31.0, 39.0, 47.0, 39.0, 51.0, 42.0, 44.0, 46.0, 44.0, 41.0, 35.0, 44.0, 28.0, 35.0, 33.0, 25.0, 21.0, 26.0, 18.0, 19.0, 14.0, 14.0, 9.0, 7.0, 6.0, 4.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.203125, -6.96661376953125, -6.7301025390625, -6.49359130859375, -6.257080078125, -6.02056884765625, -5.7840576171875, -5.54754638671875, -5.31103515625, -5.07452392578125, -4.8380126953125, -4.60150146484375, -4.364990234375, -4.12847900390625, -3.8919677734375, -3.65545654296875, -3.4189453125, -3.18243408203125, -2.9459228515625, -2.70941162109375, -2.472900390625, -2.23638916015625, -1.9998779296875, -1.76336669921875, -1.52685546875, -1.29034423828125, -1.0538330078125, -0.81732177734375, -0.580810546875, -0.34429931640625, -0.1077880859375, 0.12872314453125, 0.365234375, 0.60174560546875, 0.8382568359375, 1.07476806640625, 1.311279296875, 1.54779052734375, 1.7843017578125, 2.02081298828125, 2.25732421875, 2.49383544921875, 2.7303466796875, 2.96685791015625, 3.203369140625, 3.43988037109375, 3.6763916015625, 3.91290283203125, 4.1494140625, 4.38592529296875, 4.6224365234375, 4.85894775390625, 5.095458984375, 5.33197021484375, 5.5684814453125, 5.80499267578125, 6.04150390625, 6.27801513671875, 6.5145263671875, 6.75103759765625, 6.987548828125, 7.22406005859375, 7.4605712890625, 7.69708251953125, 7.93359375]}, "gradients/decoder.transformer.h.1.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 9.0, 6.0, 9.0, 13.0, 25.0, 46.0, 55.0, 89.0, 166.0, 259.0, 417.0, 660.0, 1026.0, 1750.0, 2713.0, 4554.0, 7390.0, 12319.0, 21263.0, 38022.0, 72189.0, 158294.0, 389901.0, 168094.0, 75274.0, 39505.0, 22021.0, 12804.0, 7582.0, 4653.0, 2796.0, 1782.0, 1054.0, 722.0, 428.0, 240.0, 173.0, 89.0, 61.0, 38.0, 31.0, 12.0, 13.0, 10.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.28076171875, -0.2723884582519531, -0.26401519775390625, -0.2556419372558594, -0.2472686767578125, -0.23889541625976562, -0.23052215576171875, -0.22214889526367188, -0.213775634765625, -0.20540237426757812, -0.19702911376953125, -0.18865585327148438, -0.1802825927734375, -0.17190933227539062, -0.16353607177734375, -0.15516281127929688, -0.14678955078125, -0.13841629028320312, -0.13004302978515625, -0.12166976928710938, -0.1132965087890625, -0.10492324829101562, -0.09654998779296875, -0.08817672729492188, -0.079803466796875, -0.07143020629882812, -0.06305694580078125, -0.054683685302734375, -0.0463104248046875, -0.037937164306640625, -0.02956390380859375, -0.021190643310546875, -0.0128173828125, -0.004444122314453125, 0.00392913818359375, 0.012302398681640625, 0.0206756591796875, 0.029048919677734375, 0.03742218017578125, 0.045795440673828125, 0.054168701171875, 0.06254196166992188, 0.07091522216796875, 0.07928848266601562, 0.0876617431640625, 0.09603500366210938, 0.10440826416015625, 0.11278152465820312, 0.12115478515625, 0.12952804565429688, 0.13790130615234375, 0.14627456665039062, 0.1546478271484375, 0.16302108764648438, 0.17139434814453125, 0.17976760864257812, 0.188140869140625, 0.19651412963867188, 0.20488739013671875, 0.21326065063476562, 0.2216339111328125, 0.23000717163085938, 0.23838043212890625, 0.24675369262695312, 0.255126953125]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 3.0, 8.0, 4.0, 6.0, 5.0, 12.0, 9.0, 18.0, 13.0, 20.0, 23.0, 20.0, 30.0, 26.0, 34.0, 27.0, 57.0, 39.0, 32.0, 46.0, 45.0, 1061.0, 33.0, 45.0, 31.0, 37.0, 43.0, 36.0, 32.0, 32.0, 29.0, 23.0, 21.0, 30.0, 20.0, 14.0, 7.0, 8.0, 8.0, 9.0, 6.0, 10.0, 9.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0], "bins": [-5.80859375, -5.6263427734375, -5.444091796875, -5.2618408203125, -5.07958984375, -4.8973388671875, -4.715087890625, -4.5328369140625, -4.3505859375, -4.1683349609375, -3.986083984375, -3.8038330078125, -3.62158203125, -3.4393310546875, -3.257080078125, -3.0748291015625, -2.892578125, -2.7103271484375, -2.528076171875, -2.3458251953125, -2.16357421875, -1.9813232421875, -1.799072265625, -1.6168212890625, -1.4345703125, -1.2523193359375, -1.070068359375, -0.8878173828125, -0.70556640625, -0.5233154296875, -0.341064453125, -0.1588134765625, 0.0234375, 0.2056884765625, 0.387939453125, 0.5701904296875, 0.75244140625, 0.9346923828125, 1.116943359375, 1.2991943359375, 1.4814453125, 1.6636962890625, 1.845947265625, 2.0281982421875, 2.21044921875, 2.3927001953125, 2.574951171875, 2.7572021484375, 2.939453125, 3.1217041015625, 3.303955078125, 3.4862060546875, 3.66845703125, 3.8507080078125, 4.032958984375, 4.2152099609375, 4.3974609375, 4.5797119140625, 4.761962890625, 4.9442138671875, 5.12646484375, 5.3087158203125, 5.490966796875, 5.6732177734375, 5.85546875]}, "gradients/decoder.transformer.h.1.crossattention.c_attn.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 8.0, 6.0, 12.0, 13.0, 20.0, 29.0, 43.0, 61.0, 102.0, 162.0, 226.0, 320.0, 470.0, 732.0, 1220.0, 1908.0, 2852.0, 4500.0, 7466.0, 12116.0, 20067.0, 35336.0, 64126.0, 127345.0, 1380887.0, 220041.0, 95691.0, 50139.0, 28134.0, 16547.0, 9827.0, 6077.0, 3846.0, 2380.0, 1499.0, 970.0, 648.0, 432.0, 306.0, 196.0, 108.0, 71.0, 56.0, 43.0, 35.0, 30.0, 13.0, 6.0, 7.0, 3.0, 4.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0], "bins": [-0.254150390625, -0.24615859985351562, -0.23816680908203125, -0.23017501831054688, -0.2221832275390625, -0.21419143676757812, -0.20619964599609375, -0.19820785522460938, -0.190216064453125, -0.18222427368164062, -0.17423248291015625, -0.16624069213867188, -0.1582489013671875, -0.15025711059570312, -0.14226531982421875, -0.13427352905273438, -0.12628173828125, -0.11828994750976562, -0.11029815673828125, -0.10230636596679688, -0.0943145751953125, -0.08632278442382812, -0.07833099365234375, -0.07033920288085938, -0.062347412109375, -0.054355621337890625, -0.04636383056640625, -0.038372039794921875, -0.0303802490234375, -0.022388458251953125, -0.01439666748046875, -0.006404876708984375, 0.0015869140625, 0.009578704833984375, 0.01757049560546875, 0.025562286376953125, 0.0335540771484375, 0.041545867919921875, 0.04953765869140625, 0.057529449462890625, 0.065521240234375, 0.07351303100585938, 0.08150482177734375, 0.08949661254882812, 0.0974884033203125, 0.10548019409179688, 0.11347198486328125, 0.12146377563476562, 0.12945556640625, 0.13744735717773438, 0.14543914794921875, 0.15343093872070312, 0.1614227294921875, 0.16941452026367188, 0.17740631103515625, 0.18539810180664062, 0.193389892578125, 0.20138168334960938, 0.20937347412109375, 0.21736526489257812, 0.2253570556640625, 0.23334884643554688, 0.24134063720703125, 0.24933242797851562, 0.25732421875]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 9.0, 9.0, 5.0, 6.0, 9.0, 20.0, 15.0, 26.0, 37.0, 43.0, 38.0, 71.0, 58.0, 48.0, 57.0, 76.0, 73.0, 68.0, 63.0, 50.0, 36.0, 40.0, 28.0, 24.0, 19.0, 16.0, 14.0, 8.0, 10.0, 7.0, 3.0, 4.0, 5.0, 4.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7642974853515625e-05, -1.7112120985984802e-05, -1.658126711845398e-05, -1.6050413250923157e-05, -1.5519559383392334e-05, -1.4988705515861511e-05, -1.4457851648330688e-05, -1.3926997780799866e-05, -1.3396143913269043e-05, -1.286529004573822e-05, -1.2334436178207397e-05, -1.1803582310676575e-05, -1.1272728443145752e-05, -1.074187457561493e-05, -1.0211020708084106e-05, -9.680166840553284e-06, -9.149312973022461e-06, -8.618459105491638e-06, -8.087605237960815e-06, -7.556751370429993e-06, -7.02589750289917e-06, -6.495043635368347e-06, -5.964189767837524e-06, -5.433335900306702e-06, -4.902482032775879e-06, -4.371628165245056e-06, -3.840774297714233e-06, -3.3099204301834106e-06, -2.779066562652588e-06, -2.248212695121765e-06, -1.7173588275909424e-06, -1.1865049600601196e-06, -6.556510925292969e-07, -1.2479722499847412e-07, 4.0605664253234863e-07, 9.369105100631714e-07, 1.4677643775939941e-06, 1.998618245124817e-06, 2.5294721126556396e-06, 3.0603259801864624e-06, 3.591179847717285e-06, 4.122033715248108e-06, 4.652887582778931e-06, 5.183741450309753e-06, 5.714595317840576e-06, 6.245449185371399e-06, 6.776303052902222e-06, 7.3071569204330444e-06, 7.838010787963867e-06, 8.36886465549469e-06, 8.899718523025513e-06, 9.430572390556335e-06, 9.961426258087158e-06, 1.0492280125617981e-05, 1.1023133993148804e-05, 1.1553987860679626e-05, 1.208484172821045e-05, 1.2615695595741272e-05, 1.3146549463272095e-05, 1.3677403330802917e-05, 1.420825719833374e-05, 1.4739111065864563e-05, 1.5269964933395386e-05, 1.580081880092621e-05, 1.633167266845703e-05]}, "gradients/decoder.transformer.h.1.crossattention.q_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 4.0, 5.0, 4.0, 4.0, 4.0, 3.0, 17.0, 15.0, 15.0, 25.0, 28.0, 36.0, 55.0, 61.0, 76.0, 136.0, 206.0, 406.0, 899.0, 9213.0, 1024544.0, 10851.0, 926.0, 384.0, 206.0, 115.0, 84.0, 75.0, 36.0, 29.0, 29.0, 14.0, 16.0, 3.0, 10.0, 9.0, 5.0, 5.0, 3.0, 5.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00030040740966796875, -0.0002905614674091339, -0.00028071552515029907, -0.00027086958289146423, -0.0002610236406326294, -0.00025117769837379456, -0.00024133175611495972, -0.00023148581385612488, -0.00022163987159729004, -0.0002117939293384552, -0.00020194798707962036, -0.00019210204482078552, -0.00018225610256195068, -0.00017241016030311584, -0.000162564218044281, -0.00015271827578544617, -0.00014287233352661133, -0.0001330263912677765, -0.00012318044900894165, -0.00011333450675010681, -0.00010348856449127197, -9.364262223243713e-05, -8.37966799736023e-05, -7.395073771476746e-05, -6.410479545593262e-05, -5.425885319709778e-05, -4.441291093826294e-05, -3.45669686794281e-05, -2.4721026420593262e-05, -1.4875084161758423e-05, -5.029141902923584e-06, 4.816800355911255e-06, 1.4662742614746094e-05, 2.4508684873580933e-05, 3.435462713241577e-05, 4.420056939125061e-05, 5.404651165008545e-05, 6.389245390892029e-05, 7.373839616775513e-05, 8.358433842658997e-05, 9.34302806854248e-05, 0.00010327622294425964, 0.00011312216520309448, 0.00012296810746192932, 0.00013281404972076416, 0.000142659991979599, 0.00015250593423843384, 0.00016235187649726868, 0.00017219781875610352, 0.00018204376101493835, 0.0001918897032737732, 0.00020173564553260803, 0.00021158158779144287, 0.0002214275300502777, 0.00023127347230911255, 0.0002411194145679474, 0.0002509653568267822, 0.00026081129908561707, 0.0002706572413444519, 0.00028050318360328674, 0.0002903491258621216, 0.0003001950681209564, 0.00031004101037979126, 0.0003198869526386261, 0.00032973289489746094]}, "gradients/decoder.transformer.h.1.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 9.0, 23.0, 140.0, 370.0, 327.0, 115.0, 20.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4028178813750856e-05, -2.310104900971055e-05, -2.217392102465965e-05, -2.1246791220619343e-05, -2.0319661416579038e-05, -1.9392531612538733e-05, -1.8465401808498427e-05, -1.7538273823447526e-05, -1.661114401940722e-05, -1.5684014215366915e-05, -1.4756885320821311e-05, -1.3829756426275708e-05, -1.2902626622235402e-05, -1.1975496818195097e-05, -1.1048367923649494e-05, -1.012123902910389e-05, -9.194109225063585e-06, -8.26697942102328e-06, -7.339850526477676e-06, -6.4127211771847215e-06, -5.485591827891767e-06, -4.558462478598813e-06, -3.631333129305858e-06, -2.7042037800129037e-06, -1.7770744307199493e-06, -8.499450814269949e-07, 7.718426786595955e-08, 1.004313617158914e-06, 1.9314429664518684e-06, 2.858572315744823e-06, 3.7857016650377773e-06, 4.712831014330732e-06, 5.63996218261309e-06, 6.567091531906044e-06, 7.4942208811989985e-06, 8.421349775744602e-06, 9.348479579784907e-06, 1.0275609383825213e-05, 1.1202738278370816e-05, 1.212986717291642e-05, 1.3056996976956725e-05, 1.398412678099703e-05, 1.4911255675542634e-05, 1.5838384570088238e-05, 1.6765514374128543e-05, 1.7692644178168848e-05, 1.861977216321975e-05, 1.9546901967260055e-05, 2.047403177130036e-05, 2.1401161575340666e-05, 2.232829137938097e-05, 2.3255419364431873e-05, 2.4182549168472178e-05, 2.5109678972512484e-05, 2.6036806957563385e-05, 2.696393676160369e-05, 2.7891066565643996e-05, 2.88181963696843e-05, 2.9745326173724607e-05, 3.067245415877551e-05, 3.159958578180522e-05, 3.252671376685612e-05, 3.345384175190702e-05, 3.438097337493673e-05, 3.530810135998763e-05]}, "gradients/decoder.transformer.h.1.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 3.0, 1.0, 4.0, 1.0, 4.0, 3.0, 5.0, 11.0, 5.0, 11.0, 9.0, 5.0, 14.0, 19.0, 21.0, 18.0, 26.0, 25.0, 30.0, 29.0, 49.0, 43.0, 33.0, 39.0, 36.0, 48.0, 37.0, 40.0, 34.0, 47.0, 29.0, 29.0, 37.0, 37.0, 27.0, 19.0, 30.0, 23.0, 17.0, 17.0, 16.0, 13.0, 15.0, 13.0, 17.0, 6.0, 6.0, 5.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0], "bins": [-8.404254913330078e-06, -8.169561624526978e-06, -7.934868335723877e-06, -7.700175046920776e-06, -7.465481758117676e-06, -7.230788469314575e-06, -6.996095180511475e-06, -6.761401891708374e-06, -6.5267086029052734e-06, -6.292015314102173e-06, -6.057322025299072e-06, -5.822628736495972e-06, -5.587935447692871e-06, -5.3532421588897705e-06, -5.11854887008667e-06, -4.883855581283569e-06, -4.649162292480469e-06, -4.414469003677368e-06, -4.179775714874268e-06, -3.945082426071167e-06, -3.7103891372680664e-06, -3.475695848464966e-06, -3.2410025596618652e-06, -3.0063092708587646e-06, -2.771615982055664e-06, -2.5369226932525635e-06, -2.302229404449463e-06, -2.0675361156463623e-06, -1.8328428268432617e-06, -1.5981495380401611e-06, -1.3634562492370605e-06, -1.12876296043396e-06, -8.940696716308594e-07, -6.593763828277588e-07, -4.246830940246582e-07, -1.8998980522155762e-07, 4.470348358154297e-08, 2.7939677238464355e-07, 5.140900611877441e-07, 7.487833499908447e-07, 9.834766387939453e-07, 1.218169927597046e-06, 1.4528632164001465e-06, 1.687556505203247e-06, 1.9222497940063477e-06, 2.1569430828094482e-06, 2.391636371612549e-06, 2.6263296604156494e-06, 2.86102294921875e-06, 3.0957162380218506e-06, 3.330409526824951e-06, 3.5651028156280518e-06, 3.7997961044311523e-06, 4.034489393234253e-06, 4.2691826820373535e-06, 4.503875970840454e-06, 4.738569259643555e-06, 4.973262548446655e-06, 5.207955837249756e-06, 5.4426491260528564e-06, 5.677342414855957e-06, 5.912035703659058e-06, 6.146728992462158e-06, 6.381422281265259e-06, 6.616115570068359e-06]}, "gradients/decoder.transformer.h.1.attn.c_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 6.0, 5.0, 4.0, 8.0, 9.0, 10.0, 5.0, 8.0, 23.0, 17.0, 22.0, 28.0, 30.0, 30.0, 34.0, 31.0, 39.0, 47.0, 39.0, 51.0, 42.0, 44.0, 46.0, 44.0, 41.0, 35.0, 44.0, 28.0, 35.0, 33.0, 25.0, 21.0, 26.0, 18.0, 19.0, 14.0, 14.0, 9.0, 7.0, 6.0, 4.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.203125, -6.96661376953125, -6.7301025390625, -6.49359130859375, -6.257080078125, -6.02056884765625, -5.7840576171875, -5.54754638671875, -5.31103515625, -5.07452392578125, -4.8380126953125, -4.60150146484375, -4.364990234375, -4.12847900390625, -3.8919677734375, -3.65545654296875, -3.4189453125, -3.18243408203125, -2.9459228515625, -2.70941162109375, -2.472900390625, -2.23638916015625, -1.9998779296875, -1.76336669921875, -1.52685546875, -1.29034423828125, -1.0538330078125, -0.81732177734375, -0.580810546875, -0.34429931640625, -0.1077880859375, 0.12872314453125, 0.365234375, 0.60174560546875, 0.8382568359375, 1.07476806640625, 1.311279296875, 1.54779052734375, 1.7843017578125, 2.02081298828125, 2.25732421875, 2.49383544921875, 2.7303466796875, 2.96685791015625, 3.203369140625, 3.43988037109375, 3.6763916015625, 3.91290283203125, 4.1494140625, 4.38592529296875, 4.6224365234375, 4.85894775390625, 5.095458984375, 5.33197021484375, 5.5684814453125, 5.80499267578125, 6.04150390625, 6.27801513671875, 6.5145263671875, 6.75103759765625, 6.987548828125, 7.22406005859375, 7.4605712890625, 7.69708251953125, 7.93359375]}, "gradients/decoder.transformer.h.1.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 9.0, 7.0, 9.0, 10.0, 10.0, 20.0, 21.0, 42.0, 82.0, 90.0, 113.0, 186.0, 365.0, 554.0, 1010.0, 2165.0, 5422.0, 17481.0, 83340.0, 582112.0, 293601.0, 43712.0, 10847.0, 3679.0, 1582.0, 838.0, 459.0, 259.0, 183.0, 95.0, 83.0, 50.0, 31.0, 33.0, 20.0, 7.0, 7.0, 5.0, 8.0, 5.0, 6.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-14.1484375, -13.689208984375, -13.22998046875, -12.770751953125, -12.3115234375, -11.852294921875, -11.39306640625, -10.933837890625, -10.474609375, -10.015380859375, -9.55615234375, -9.096923828125, -8.6376953125, -8.178466796875, -7.71923828125, -7.260009765625, -6.80078125, -6.341552734375, -5.88232421875, -5.423095703125, -4.9638671875, -4.504638671875, -4.04541015625, -3.586181640625, -3.126953125, -2.667724609375, -2.20849609375, -1.749267578125, -1.2900390625, -0.830810546875, -0.37158203125, 0.087646484375, 0.546875, 1.006103515625, 1.46533203125, 1.924560546875, 2.3837890625, 2.843017578125, 3.30224609375, 3.761474609375, 4.220703125, 4.679931640625, 5.13916015625, 5.598388671875, 6.0576171875, 6.516845703125, 6.97607421875, 7.435302734375, 7.89453125, 8.353759765625, 8.81298828125, 9.272216796875, 9.7314453125, 10.190673828125, 10.64990234375, 11.109130859375, 11.568359375, 12.027587890625, 12.48681640625, 12.946044921875, 13.4052734375, 13.864501953125, 14.32373046875, 14.782958984375, 15.2421875]}, "gradients/decoder.transformer.h.1.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 6.0, 7.0, 6.0, 4.0, 7.0, 9.0, 13.0, 13.0, 21.0, 19.0, 42.0, 49.0, 57.0, 65.0, 78.0, 123.0, 331.0, 1606.0, 153.0, 86.0, 59.0, 58.0, 40.0, 44.0, 41.0, 15.0, 16.0, 15.0, 13.0, 7.0, 11.0, 9.0, 4.0, 5.0, 3.0, 4.0, 2.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-24.484375, -23.698486328125, -22.91259765625, -22.126708984375, -21.3408203125, -20.554931640625, -19.76904296875, -18.983154296875, -18.197265625, -17.411376953125, -16.62548828125, -15.839599609375, -15.0537109375, -14.267822265625, -13.48193359375, -12.696044921875, -11.91015625, -11.124267578125, -10.33837890625, -9.552490234375, -8.7666015625, -7.980712890625, -7.19482421875, -6.408935546875, -5.623046875, -4.837158203125, -4.05126953125, -3.265380859375, -2.4794921875, -1.693603515625, -0.90771484375, -0.121826171875, 0.6640625, 1.449951171875, 2.23583984375, 3.021728515625, 3.8076171875, 4.593505859375, 5.37939453125, 6.165283203125, 6.951171875, 7.737060546875, 8.52294921875, 9.308837890625, 10.0947265625, 10.880615234375, 11.66650390625, 12.452392578125, 13.23828125, 14.024169921875, 14.81005859375, 15.595947265625, 16.3818359375, 17.167724609375, 17.95361328125, 18.739501953125, 19.525390625, 20.311279296875, 21.09716796875, 21.883056640625, 22.6689453125, 23.454833984375, 24.24072265625, 25.026611328125, 25.8125]}, "gradients/decoder.transformer.h.1.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 4.0, 6.0, 2.0, 10.0, 7.0, 6.0, 12.0, 15.0, 21.0, 30.0, 47.0, 69.0, 94.0, 152.0, 251.0, 448.0, 1200.0, 22222.0, 3114046.0, 5217.0, 890.0, 356.0, 203.0, 125.0, 98.0, 47.0, 37.0, 16.0, 12.0, 16.0, 9.0, 16.0, 8.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-70.125, -68.083984375, -66.04296875, -64.001953125, -61.9609375, -59.919921875, -57.87890625, -55.837890625, -53.796875, -51.755859375, -49.71484375, -47.673828125, -45.6328125, -43.591796875, -41.55078125, -39.509765625, -37.46875, -35.427734375, -33.38671875, -31.345703125, -29.3046875, -27.263671875, -25.22265625, -23.181640625, -21.140625, -19.099609375, -17.05859375, -15.017578125, -12.9765625, -10.935546875, -8.89453125, -6.853515625, -4.8125, -2.771484375, -0.73046875, 1.310546875, 3.3515625, 5.392578125, 7.43359375, 9.474609375, 11.515625, 13.556640625, 15.59765625, 17.638671875, 19.6796875, 21.720703125, 23.76171875, 25.802734375, 27.84375, 29.884765625, 31.92578125, 33.966796875, 36.0078125, 38.048828125, 40.08984375, 42.130859375, 44.171875, 46.212890625, 48.25390625, 50.294921875, 52.3359375, 54.376953125, 56.41796875, 58.458984375, 60.5]}, "gradients/decoder.transformer.h.1.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 3.0, 140.0, 873.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1012.1195678710938, -991.9522705078125, -971.7849731445312, -951.61767578125, -931.4503784179688, -911.2830810546875, -891.1157836914062, -870.948486328125, -850.7811279296875, -830.6138305664062, -810.446533203125, -790.2792358398438, -770.1119384765625, -749.9446411132812, -729.77734375, -709.6099853515625, -689.4427490234375, -669.2754516601562, -649.108154296875, -628.9408569335938, -608.7735595703125, -588.6062622070312, -568.43896484375, -548.2716064453125, -528.1043701171875, -507.93707275390625, -487.769775390625, -467.60247802734375, -447.4351806640625, -427.26788330078125, -407.1005554199219, -386.9332580566406, -366.7658996582031, -346.5986022949219, -326.4313049316406, -306.2640075683594, -286.0966796875, -265.92938232421875, -245.7620849609375, -225.59478759765625, -205.427490234375, -185.26019287109375, -165.0928955078125, -144.9255828857422, -124.75828552246094, -104.59098815917969, -84.4236831665039, -64.25637817382812, -44.089080810546875, -23.92177963256836, -3.7544784545898438, 16.412822723388672, 36.58012390136719, 56.74742126464844, 76.91472625732422, 97.08203125, 117.24932861328125, 137.4166259765625, 157.58392333984375, 177.75123596191406, 197.9185333251953, 218.08583068847656, 238.25314331054688, 258.4204406738281, 278.5877380371094]}, "gradients/decoder.transformer.h.1.ln_1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 5.0, 5.0, 12.0, 7.0, 11.0, 14.0, 17.0, 23.0, 17.0, 17.0, 17.0, 32.0, 36.0, 31.0, 40.0, 23.0, 41.0, 52.0, 34.0, 39.0, 36.0, 33.0, 38.0, 53.0, 35.0, 36.0, 50.0, 34.0, 31.0, 22.0, 25.0, 17.0, 23.0, 24.0, 17.0, 15.0, 6.0, 10.0, 10.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-61.51294708251953, -59.899192810058594, -58.28543472290039, -56.67168045043945, -55.057926177978516, -53.44416809082031, -51.830413818359375, -50.21665954589844, -48.6029052734375, -46.98915100097656, -45.37539291381836, -43.76163864135742, -42.147884368896484, -40.53412628173828, -38.920372009277344, -37.306617736816406, -35.69286346435547, -34.07910919189453, -32.46535110473633, -30.85159683227539, -29.237842559814453, -27.624086380004883, -26.010330200195312, -24.396575927734375, -22.782817840576172, -21.1690616607666, -19.555307388305664, -17.941551208496094, -16.327796936035156, -14.714040756225586, -13.100285530090332, -11.486530303955078, -9.87277603149414, -8.259020805358887, -6.645265579223633, -5.031509876251221, -3.417754650115967, -1.8039989471435547, -0.19024372100830078, 1.4235115051269531, 3.037266731262207, 4.651021957397461, 6.264777183532715, 7.878532886505127, 9.492288589477539, 11.106043815612793, 12.719799041748047, 14.3335542678833, 15.947309494018555, 17.561065673828125, 19.174819946289062, 20.788576126098633, 22.40233039855957, 24.01608657836914, 25.629840850830078, 27.24359703063965, 28.85735321044922, 30.47110939025879, 32.08486557006836, 33.6986198425293, 35.312374114990234, 36.92613220214844, 38.539886474609375, 40.15364074707031, 41.76739501953125]}, "gradients/decoder.transformer.h.0.mlp.c_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 5.0, 6.0, 6.0, 10.0, 16.0, 10.0, 25.0, 19.0, 17.0, 30.0, 21.0, 24.0, 22.0, 42.0, 35.0, 42.0, 48.0, 52.0, 42.0, 42.0, 34.0, 42.0, 38.0, 47.0, 38.0, 26.0, 28.0, 33.0, 31.0, 21.0, 23.0, 21.0, 28.0, 12.0, 13.0, 11.0, 16.0, 3.0, 12.0, 9.0, 5.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.75, -6.50048828125, -6.2509765625, -6.00146484375, -5.751953125, -5.50244140625, -5.2529296875, -5.00341796875, -4.75390625, -4.50439453125, -4.2548828125, -4.00537109375, -3.755859375, -3.50634765625, -3.2568359375, -3.00732421875, -2.7578125, -2.50830078125, -2.2587890625, -2.00927734375, -1.759765625, -1.51025390625, -1.2607421875, -1.01123046875, -0.76171875, -0.51220703125, -0.2626953125, -0.01318359375, 0.236328125, 0.48583984375, 0.7353515625, 0.98486328125, 1.234375, 1.48388671875, 1.7333984375, 1.98291015625, 2.232421875, 2.48193359375, 2.7314453125, 2.98095703125, 3.23046875, 3.47998046875, 3.7294921875, 3.97900390625, 4.228515625, 4.47802734375, 4.7275390625, 4.97705078125, 5.2265625, 5.47607421875, 5.7255859375, 5.97509765625, 6.224609375, 6.47412109375, 6.7236328125, 6.97314453125, 7.22265625, 7.47216796875, 7.7216796875, 7.97119140625, 8.220703125, 8.47021484375, 8.7197265625, 8.96923828125, 9.21875]}, "gradients/decoder.transformer.h.0.mlp.c_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 4.0, 11.0, 15.0, 28.0, 38.0, 45.0, 65.0, 108.0, 151.0, 205.0, 300.0, 451.0, 706.0, 1614.0, 6836.0, 165853.0, 3921724.0, 87686.0, 4981.0, 1397.0, 691.0, 439.0, 285.0, 200.0, 154.0, 106.0, 58.0, 41.0, 25.0, 20.0, 10.0, 14.0, 3.0, 6.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-48.28125, -46.45458984375, -44.6279296875, -42.80126953125, -40.974609375, -39.14794921875, -37.3212890625, -35.49462890625, -33.66796875, -31.84130859375, -30.0146484375, -28.18798828125, -26.361328125, -24.53466796875, -22.7080078125, -20.88134765625, -19.0546875, -17.22802734375, -15.4013671875, -13.57470703125, -11.748046875, -9.92138671875, -8.0947265625, -6.26806640625, -4.44140625, -2.61474609375, -0.7880859375, 1.03857421875, 2.865234375, 4.69189453125, 6.5185546875, 8.34521484375, 10.171875, 11.99853515625, 13.8251953125, 15.65185546875, 17.478515625, 19.30517578125, 21.1318359375, 22.95849609375, 24.78515625, 26.61181640625, 28.4384765625, 30.26513671875, 32.091796875, 33.91845703125, 35.7451171875, 37.57177734375, 39.3984375, 41.22509765625, 43.0517578125, 44.87841796875, 46.705078125, 48.53173828125, 50.3583984375, 52.18505859375, 54.01171875, 55.83837890625, 57.6650390625, 59.49169921875, 61.318359375, 63.14501953125, 64.9716796875, 66.79833984375, 68.625]}, "gradients/decoder.transformer.h.0.mlp.c_fc.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 6.0, 3.0, 10.0, 16.0, 27.0, 40.0, 90.0, 117.0, 171.0, 289.0, 498.0, 686.0, 696.0, 526.0, 306.0, 213.0, 139.0, 74.0, 59.0, 40.0, 24.0, 17.0, 12.0, 5.0, 5.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.140625, -17.42041015625, -16.7001953125, -15.97998046875, -15.259765625, -14.53955078125, -13.8193359375, -13.09912109375, -12.37890625, -11.65869140625, -10.9384765625, -10.21826171875, -9.498046875, -8.77783203125, -8.0576171875, -7.33740234375, -6.6171875, -5.89697265625, -5.1767578125, -4.45654296875, -3.736328125, -3.01611328125, -2.2958984375, -1.57568359375, -0.85546875, -0.13525390625, 0.5849609375, 1.30517578125, 2.025390625, 2.74560546875, 3.4658203125, 4.18603515625, 4.90625, 5.62646484375, 6.3466796875, 7.06689453125, 7.787109375, 8.50732421875, 9.2275390625, 9.94775390625, 10.66796875, 11.38818359375, 12.1083984375, 12.82861328125, 13.548828125, 14.26904296875, 14.9892578125, 15.70947265625, 16.4296875, 17.14990234375, 17.8701171875, 18.59033203125, 19.310546875, 20.03076171875, 20.7509765625, 21.47119140625, 22.19140625, 22.91162109375, 23.6318359375, 24.35205078125, 25.072265625, 25.79248046875, 26.5126953125, 27.23291015625, 27.953125]}, "gradients/decoder.transformer.h.0.mlp.c_fc.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 6.0, 6.0, 2.0, 16.0, 18.0, 21.0, 39.0, 58.0, 94.0, 198.0, 424.0, 929.0, 2233.0, 8293.0, 64867.0, 1570801.0, 2437627.0, 93567.0, 10580.0, 2605.0, 949.0, 458.0, 218.0, 111.0, 65.0, 39.0, 22.0, 16.0, 9.0, 6.0, 3.0, 2.0, 3.0, 4.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-35.34375, -34.461669921875, -33.57958984375, -32.697509765625, -31.8154296875, -30.933349609375, -30.05126953125, -29.169189453125, -28.287109375, -27.405029296875, -26.52294921875, -25.640869140625, -24.7587890625, -23.876708984375, -22.99462890625, -22.112548828125, -21.23046875, -20.348388671875, -19.46630859375, -18.584228515625, -17.7021484375, -16.820068359375, -15.93798828125, -15.055908203125, -14.173828125, -13.291748046875, -12.40966796875, -11.527587890625, -10.6455078125, -9.763427734375, -8.88134765625, -7.999267578125, -7.1171875, -6.235107421875, -5.35302734375, -4.470947265625, -3.5888671875, -2.706787109375, -1.82470703125, -0.942626953125, -0.060546875, 0.821533203125, 1.70361328125, 2.585693359375, 3.4677734375, 4.349853515625, 5.23193359375, 6.114013671875, 6.99609375, 7.878173828125, 8.76025390625, 9.642333984375, 10.5244140625, 11.406494140625, 12.28857421875, 13.170654296875, 14.052734375, 14.934814453125, 15.81689453125, 16.698974609375, 17.5810546875, 18.463134765625, 19.34521484375, 20.227294921875, 21.109375]}, "gradients/decoder.transformer.h.0.ln_2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 5.0, 10.0, 17.0, 32.0, 52.0, 63.0, 86.0, 108.0, 123.0, 133.0, 115.0, 87.0, 63.0, 43.0, 26.0, 8.0, 10.0, 7.0, 5.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-104.55110931396484, -99.80265045166016, -95.05419158935547, -90.30573272705078, -85.5572738647461, -80.8088150024414, -76.06036376953125, -71.31190490722656, -66.56344604492188, -61.81498718261719, -57.0665283203125, -52.31806945800781, -47.569610595703125, -42.82115173339844, -38.072696685791016, -33.32423782348633, -28.575775146484375, -23.827316284179688, -19.078857421875, -14.330400466918945, -9.581941604614258, -4.83348274230957, -0.08502578735351562, 4.663433074951172, 9.41189193725586, 14.160350799560547, 18.908809661865234, 23.65726661682129, 28.405725479125977, 33.15418243408203, 37.90264129638672, 42.651100158691406, 47.39955139160156, 52.14801025390625, 56.89646911621094, 61.644927978515625, 66.39338684082031, 71.141845703125, 75.89030456542969, 80.63876342773438, 85.38722229003906, 90.13568115234375, 94.88414001464844, 99.63259887695312, 104.38105773925781, 109.1295166015625, 113.87797546386719, 118.62643432617188, 123.37488555908203, 128.1233367919922, 132.87179565429688, 137.62025451660156, 142.36871337890625, 147.11717224121094, 151.86563110351562, 156.6140899658203, 161.362548828125, 166.1110076904297, 170.85946655273438, 175.60792541503906, 180.35638427734375, 185.10484313964844, 189.85330200195312, 194.6017608642578, 199.3502197265625]}, "gradients/decoder.transformer.h.0.ln_2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 4.0, 5.0, 5.0, 12.0, 7.0, 8.0, 7.0, 11.0, 19.0, 20.0, 19.0, 16.0, 21.0, 22.0, 28.0, 28.0, 25.0, 34.0, 28.0, 44.0, 43.0, 42.0, 37.0, 39.0, 33.0, 58.0, 38.0, 49.0, 37.0, 29.0, 27.0, 32.0, 23.0, 22.0, 15.0, 26.0, 14.0, 15.0, 13.0, 9.0, 6.0, 8.0, 8.0, 9.0, 8.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-70.49784088134766, -68.36172485351562, -66.22561645507812, -64.0895004272461, -61.95338439941406, -59.8172721862793, -57.68115997314453, -55.5450439453125, -53.408931732177734, -51.27281951904297, -49.13670349121094, -47.00059127807617, -44.864479064941406, -42.728363037109375, -40.59225082397461, -38.456138610839844, -36.32002258300781, -34.18391036987305, -32.047794342041016, -29.91168212890625, -27.77556800842285, -25.639453887939453, -23.503341674804688, -21.36722755432129, -19.23111343383789, -17.094999313354492, -14.95888614654541, -12.822772979736328, -10.68665885925293, -8.550544738769531, -6.414431571960449, -4.278318405151367, -2.1421966552734375, -0.006083011627197266, 2.130030632019043, 4.266144275665283, 6.402257919311523, 8.538372039794922, 10.674485206604004, 12.810598373413086, 14.946712493896484, 17.082826614379883, 19.21894073486328, 21.355052947998047, 23.491167068481445, 25.627281188964844, 27.76339340209961, 29.899507522583008, 32.035621643066406, 34.17173385620117, 36.3078498840332, 38.44396209716797, 40.580078125, 42.716190338134766, 44.85230255126953, 46.98841857910156, 49.12453079223633, 51.260643005371094, 53.396759033203125, 55.53287124633789, 57.668983459472656, 59.80509948730469, 61.94121170043945, 64.07732391357422, 66.21343994140625]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 5.0, 7.0, 7.0, 10.0, 7.0, 8.0, 11.0, 16.0, 20.0, 17.0, 33.0, 20.0, 27.0, 38.0, 29.0, 38.0, 32.0, 41.0, 40.0, 58.0, 57.0, 51.0, 44.0, 51.0, 47.0, 30.0, 45.0, 21.0, 31.0, 37.0, 18.0, 21.0, 17.0, 11.0, 11.0, 10.0, 13.0, 10.0, 11.0, 6.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.0625, -87.2314453125, -83.400390625, -79.5693359375, -75.73828125, -71.9072265625, -68.076171875, -64.2451171875, -60.4140625, -56.5830078125, -52.751953125, -48.9208984375, -45.08984375, -41.2587890625, -37.427734375, -33.5966796875, -29.765625, -25.9345703125, -22.103515625, -18.2724609375, -14.44140625, -10.6103515625, -6.779296875, -2.9482421875, 0.8828125, 4.7138671875, 8.544921875, 12.3759765625, 16.20703125, 20.0380859375, 23.869140625, 27.7001953125, 31.53125, 35.3623046875, 39.193359375, 43.0244140625, 46.85546875, 50.6865234375, 54.517578125, 58.3486328125, 62.1796875, 66.0107421875, 69.841796875, 73.6728515625, 77.50390625, 81.3349609375, 85.166015625, 88.9970703125, 92.828125, 96.6591796875, 100.490234375, 104.3212890625, 108.15234375, 111.9833984375, 115.814453125, 119.6455078125, 123.4765625, 127.3076171875, 131.138671875, 134.9697265625, 138.80078125, 142.6318359375, 146.462890625, 150.2939453125, 154.125]}, "gradients/decoder.transformer.h.0.crossattention.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 0.0, 2.0, 4.0, 5.0, 4.0, 17.0, 17.0, 29.0, 60.0, 105.0, 138.0, 278.0, 441.0, 676.0, 1167.0, 2014.0, 3417.0, 5817.0, 9930.0, 17260.0, 31177.0, 57699.0, 114507.0, 359242.0, 239577.0, 94166.0, 48344.0, 26681.0, 15062.0, 8477.0, 4938.0, 2992.0, 1744.0, 1002.0, 624.0, 380.0, 231.0, 123.0, 77.0, 49.0, 32.0, 16.0, 14.0, 9.0, 3.0, 3.0, 4.0, 1.0, 3.0, 4.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-5.33984375, -5.1773681640625, -5.014892578125, -4.8524169921875, -4.68994140625, -4.5274658203125, -4.364990234375, -4.2025146484375, -4.0400390625, -3.8775634765625, -3.715087890625, -3.5526123046875, -3.39013671875, -3.2276611328125, -3.065185546875, -2.9027099609375, -2.740234375, -2.5777587890625, -2.415283203125, -2.2528076171875, -2.09033203125, -1.9278564453125, -1.765380859375, -1.6029052734375, -1.4404296875, -1.2779541015625, -1.115478515625, -0.9530029296875, -0.79052734375, -0.6280517578125, -0.465576171875, -0.3031005859375, -0.140625, 0.0218505859375, 0.184326171875, 0.3468017578125, 0.50927734375, 0.6717529296875, 0.834228515625, 0.9967041015625, 1.1591796875, 1.3216552734375, 1.484130859375, 1.6466064453125, 1.80908203125, 1.9715576171875, 2.134033203125, 2.2965087890625, 2.458984375, 2.6214599609375, 2.783935546875, 2.9464111328125, 3.10888671875, 3.2713623046875, 3.433837890625, 3.5963134765625, 3.7587890625, 3.9212646484375, 4.083740234375, 4.2462158203125, 4.40869140625, 4.5711669921875, 4.733642578125, 4.8961181640625, 5.05859375]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 1.0, 2.0, 5.0, 4.0, 4.0, 11.0, 3.0, 11.0, 8.0, 8.0, 17.0, 16.0, 12.0, 22.0, 35.0, 20.0, 25.0, 37.0, 33.0, 36.0, 31.0, 35.0, 38.0, 36.0, 40.0, 1062.0, 38.0, 41.0, 42.0, 36.0, 31.0, 31.0, 27.0, 30.0, 21.0, 27.0, 24.0, 22.0, 21.0, 19.0, 16.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 8.0, 2.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-70.5, -68.34375, -66.1875, -64.03125, -61.875, -59.71875, -57.5625, -55.40625, -53.25, -51.09375, -48.9375, -46.78125, -44.625, -42.46875, -40.3125, -38.15625, -36.0, -33.84375, -31.6875, -29.53125, -27.375, -25.21875, -23.0625, -20.90625, -18.75, -16.59375, -14.4375, -12.28125, -10.125, -7.96875, -5.8125, -3.65625, -1.5, 0.65625, 2.8125, 4.96875, 7.125, 9.28125, 11.4375, 13.59375, 15.75, 17.90625, 20.0625, 22.21875, 24.375, 26.53125, 28.6875, 30.84375, 33.0, 35.15625, 37.3125, 39.46875, 41.625, 43.78125, 45.9375, 48.09375, 50.25, 52.40625, 54.5625, 56.71875, 58.875, 61.03125, 63.1875, 65.34375, 67.5]}, "gradients/decoder.transformer.h.0.crossattention.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 6.0, 6.0, 9.0, 11.0, 15.0, 38.0, 33.0, 55.0, 87.0, 132.0, 178.0, 301.0, 418.0, 627.0, 958.0, 1439.0, 2209.0, 3371.0, 5415.0, 8479.0, 13803.0, 22574.0, 37880.0, 65933.0, 127666.0, 1374781.0, 208081.0, 92902.0, 51190.0, 30034.0, 18013.0, 11068.0, 6845.0, 4376.0, 2815.0, 1853.0, 1204.0, 762.0, 503.0, 360.0, 239.0, 137.0, 121.0, 60.0, 47.0, 34.0, 24.0, 22.0, 5.0, 8.0, 5.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0], "bins": [-3.10546875, -3.010711669921875, -2.91595458984375, -2.821197509765625, -2.7264404296875, -2.631683349609375, -2.53692626953125, -2.442169189453125, -2.347412109375, -2.252655029296875, -2.15789794921875, -2.063140869140625, -1.9683837890625, -1.873626708984375, -1.77886962890625, -1.684112548828125, -1.58935546875, -1.494598388671875, -1.39984130859375, -1.305084228515625, -1.2103271484375, -1.115570068359375, -1.02081298828125, -0.926055908203125, -0.831298828125, -0.736541748046875, -0.64178466796875, -0.547027587890625, -0.4522705078125, -0.357513427734375, -0.26275634765625, -0.167999267578125, -0.0732421875, 0.021514892578125, 0.11627197265625, 0.211029052734375, 0.3057861328125, 0.400543212890625, 0.49530029296875, 0.590057373046875, 0.684814453125, 0.779571533203125, 0.87432861328125, 0.969085693359375, 1.0638427734375, 1.158599853515625, 1.25335693359375, 1.348114013671875, 1.44287109375, 1.537628173828125, 1.63238525390625, 1.727142333984375, 1.8218994140625, 1.916656494140625, 2.01141357421875, 2.106170654296875, 2.200927734375, 2.295684814453125, 2.39044189453125, 2.485198974609375, 2.5799560546875, 2.674713134765625, 2.76947021484375, 2.864227294921875, 2.958984375]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 6.0, 8.0, 14.0, 10.0, 18.0, 13.0, 24.0, 26.0, 43.0, 56.0, 53.0, 56.0, 79.0, 84.0, 72.0, 83.0, 69.0, 49.0, 47.0, 46.0, 35.0, 24.0, 22.0, 21.0, 12.0, 16.0, 9.0, 5.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.0003209114074707031, -0.00031247176229953766, -0.0003040321171283722, -0.0002955924719572067, -0.00028715282678604126, -0.0002787131816148758, -0.00027027353644371033, -0.00026183389127254486, -0.0002533942461013794, -0.00024495460093021393, -0.00023651495575904846, -0.000228075310587883, -0.00021963566541671753, -0.00021119602024555206, -0.0002027563750743866, -0.00019431672990322113, -0.00018587708473205566, -0.0001774374395608902, -0.00016899779438972473, -0.00016055814921855927, -0.0001521185040473938, -0.00014367885887622833, -0.00013523921370506287, -0.0001267995685338974, -0.00011835992336273193, -0.00010992027819156647, -0.000101480633020401, -9.304098784923553e-05, -8.460134267807007e-05, -7.61616975069046e-05, -6.772205233573914e-05, -5.928240716457367e-05, -5.08427619934082e-05, -4.240311682224274e-05, -3.396347165107727e-05, -2.5523826479911804e-05, -1.7084181308746338e-05, -8.644536137580872e-06, -2.0489096641540527e-07, 8.234754204750061e-06, 1.6674399375915527e-05, 2.5114044547080994e-05, 3.355368971824646e-05, 4.1993334889411926e-05, 5.043298006057739e-05, 5.887262523174286e-05, 6.731227040290833e-05, 7.575191557407379e-05, 8.419156074523926e-05, 9.263120591640472e-05, 0.00010107085108757019, 0.00010951049625873566, 0.00011795014142990112, 0.0001263897866010666, 0.00013482943177223206, 0.00014326907694339752, 0.000151708722114563, 0.00016014836728572845, 0.00016858801245689392, 0.0001770276576280594, 0.00018546730279922485, 0.00019390694797039032, 0.00020234659314155579, 0.00021078623831272125, 0.00021922588348388672]}, "gradients/decoder.transformer.h.0.crossattention.q_attn.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 3.0, 6.0, 3.0, 6.0, 11.0, 8.0, 14.0, 20.0, 43.0, 42.0, 78.0, 116.0, 215.0, 313.0, 534.0, 1124.0, 2209.0, 5027.0, 13346.0, 41477.0, 142296.0, 404403.0, 304447.0, 89089.0, 26917.0, 9309.0, 3694.0, 1669.0, 898.0, 489.0, 271.0, 165.0, 125.0, 68.0, 43.0, 29.0, 17.0, 11.0, 5.0, 10.0, 2.0, 5.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009713172912597656, -0.0009321793913841248, -0.0008930414915084839, -0.000853903591632843, -0.0008147656917572021, -0.0007756277918815613, -0.0007364898920059204, -0.0006973519921302795, -0.0006582140922546387, -0.0006190761923789978, -0.0005799382925033569, -0.0005408003926277161, -0.0005016624927520752, -0.0004625245928764343, -0.00042338669300079346, -0.0003842487931251526, -0.0003451108932495117, -0.00030597299337387085, -0.00026683509349823, -0.0002276971936225891, -0.00018855929374694824, -0.00014942139387130737, -0.0001102834939956665, -7.114559412002563e-05, -3.2007694244384766e-05, 7.1302056312561035e-06, 4.626810550689697e-05, 8.540600538253784e-05, 0.0001245439052581787, 0.00016368180513381958, 0.00020281970500946045, 0.00024195760488510132, 0.0002810955047607422, 0.00032023340463638306, 0.0003593713045120239, 0.0003985092043876648, 0.00043764710426330566, 0.00047678500413894653, 0.0005159229040145874, 0.0005550608038902283, 0.0005941987037658691, 0.00063333660364151, 0.0006724745035171509, 0.0007116124033927917, 0.0007507503032684326, 0.0007898882031440735, 0.0008290261030197144, 0.0008681640028953552, 0.0009073019027709961, 0.000946439802646637, 0.0009855777025222778, 0.0010247156023979187, 0.0010638535022735596, 0.0011029914021492004, 0.0011421293020248413, 0.0011812672019004822, 0.001220405101776123, 0.001259543001651764, 0.0012986809015274048, 0.0013378188014030457, 0.0013769567012786865, 0.0014160946011543274, 0.0014552325010299683, 0.0014943704009056091, 0.00153350830078125]}, "gradients/decoder.transformer.h.0.ln_cross_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 7.0, 3.0, 7.0, 10.0, 8.0, 10.0, 11.0, 14.0, 20.0, 29.0, 30.0, 41.0, 35.0, 69.0, 53.0, 62.0, 60.0, 69.0, 62.0, 64.0, 44.0, 53.0, 42.0, 31.0, 34.0, 37.0, 20.0, 14.0, 19.0, 8.0, 9.0, 5.0, 4.0, 7.0, 1.0, 1.0, 4.0, 3.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0001400269102305174, -0.00013583189866039902, -0.00013163688709028065, -0.00012744186096824706, -0.0001232468493981287, -0.00011905183782801032, -0.00011485681898193434, -0.00011066180013585836, -0.00010646678856573999, -0.00010227177699562162, -9.807675814954564e-05, -9.388173930346966e-05, -8.968672773335129e-05, -8.549171616323292e-05, -8.129669731715694e-05, -7.710167847108096e-05, -7.290666690096259e-05, -6.871165533084422e-05, -6.451663648476824e-05, -6.032162127667107e-05, -5.612660606857389e-05, -5.193159086047672e-05, -4.773657565237954e-05, -4.354156044428237e-05, -3.934654523618519e-05, -3.515153002808802e-05, -3.0956514819990844e-05, -2.676149961189367e-05, -2.2566484403796494e-05, -1.837146919569932e-05, -1.4176453987602144e-05, -9.98143877950497e-06, -5.786423571407795e-06, -1.5914083633106202e-06, 2.6036068447865546e-06, 6.798622052883729e-06, 1.0993637260980904e-05, 1.5188652469078079e-05, 1.9383667677175254e-05, 2.357868288527243e-05, 2.7773698093369603e-05, 3.196871330146678e-05, 3.616372850956395e-05, 4.035874371766113e-05, 4.45537589257583e-05, 4.874877413385548e-05, 5.294378934195265e-05, 5.7138804550049827e-05, 6.1333819758147e-05, 6.552883132826537e-05, 6.972385017434135e-05, 7.391886902041733e-05, 7.81138805905357e-05, 8.230889216065407e-05, 8.650391100673005e-05, 9.069892985280603e-05, 9.48939414229244e-05, 9.908895299304277e-05, 0.00010328397183911875, 0.00010747899068519473, 0.0001116740022553131, 0.00011586901382543147, 0.00012006403267150745, 0.00012425905151758343, 0.0001284540630877018]}, "gradients/decoder.transformer.h.0.ln_cross_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 4.0, 6.0, 7.0, 8.0, 4.0, 6.0, 10.0, 8.0, 15.0, 12.0, 21.0, 18.0, 18.0, 20.0, 33.0, 25.0, 30.0, 31.0, 35.0, 37.0, 44.0, 43.0, 34.0, 51.0, 44.0, 44.0, 41.0, 39.0, 42.0, 34.0, 35.0, 29.0, 17.0, 19.0, 15.0, 23.0, 16.0, 21.0, 11.0, 13.0, 9.0, 11.0, 5.0, 4.0, 4.0, 3.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.922815322875977e-05, -8.603744208812714e-05, -8.284673094749451e-05, -7.965601980686188e-05, -7.646530866622925e-05, -7.327459752559662e-05, -7.008388638496399e-05, -6.689317524433136e-05, -6.370246410369873e-05, -6.05117529630661e-05, -5.732104182243347e-05, -5.413033068180084e-05, -5.093961954116821e-05, -4.7748908400535583e-05, -4.4558197259902954e-05, -4.1367486119270325e-05, -3.8176774978637695e-05, -3.4986063838005066e-05, -3.1795352697372437e-05, -2.8604641556739807e-05, -2.5413930416107178e-05, -2.222321927547455e-05, -1.903250813484192e-05, -1.584179699420929e-05, -1.265108585357666e-05, -9.46037471294403e-06, -6.269663572311401e-06, -3.078952431678772e-06, 1.1175870895385742e-07, 3.302469849586487e-06, 6.493180990219116e-06, 9.683892130851746e-06, 1.2874603271484375e-05, 1.6065314412117004e-05, 1.9256025552749634e-05, 2.2446736693382263e-05, 2.5637447834014893e-05, 2.8828158974647522e-05, 3.201887011528015e-05, 3.520958125591278e-05, 3.840029239654541e-05, 4.159100353717804e-05, 4.478171467781067e-05, 4.79724258184433e-05, 5.116313695907593e-05, 5.435384809970856e-05, 5.7544559240341187e-05, 6.0735270380973816e-05, 6.392598152160645e-05, 6.711669266223907e-05, 7.03074038028717e-05, 7.349811494350433e-05, 7.668882608413696e-05, 7.987953722476959e-05, 8.307024836540222e-05, 8.626095950603485e-05, 8.945167064666748e-05, 9.264238178730011e-05, 9.583309292793274e-05, 9.902380406856537e-05, 0.000102214515209198, 0.00010540522634983063, 0.00010859593749046326, 0.00011178664863109589, 0.00011497735977172852]}, "gradients/decoder.transformer.h.0.attn.c_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 5.0, 7.0, 7.0, 10.0, 7.0, 8.0, 11.0, 16.0, 20.0, 17.0, 33.0, 20.0, 27.0, 38.0, 29.0, 38.0, 32.0, 41.0, 40.0, 58.0, 57.0, 51.0, 44.0, 51.0, 47.0, 30.0, 45.0, 21.0, 31.0, 37.0, 18.0, 21.0, 17.0, 11.0, 11.0, 10.0, 13.0, 10.0, 11.0, 6.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-91.0625, -87.2314453125, -83.400390625, -79.5693359375, -75.73828125, -71.9072265625, -68.076171875, -64.2451171875, -60.4140625, -56.5830078125, -52.751953125, -48.9208984375, -45.08984375, -41.2587890625, -37.427734375, -33.5966796875, -29.765625, -25.9345703125, -22.103515625, -18.2724609375, -14.44140625, -10.6103515625, -6.779296875, -2.9482421875, 0.8828125, 4.7138671875, 8.544921875, 12.3759765625, 16.20703125, 20.0380859375, 23.869140625, 27.7001953125, 31.53125, 35.3623046875, 39.193359375, 43.0244140625, 46.85546875, 50.6865234375, 54.517578125, 58.3486328125, 62.1796875, 66.0107421875, 69.841796875, 73.6728515625, 77.50390625, 81.3349609375, 85.166015625, 88.9970703125, 92.828125, 96.6591796875, 100.490234375, 104.3212890625, 108.15234375, 111.9833984375, 115.814453125, 119.6455078125, 123.4765625, 127.3076171875, 131.138671875, 134.9697265625, 138.80078125, 142.6318359375, 146.462890625, 150.2939453125, 154.125]}, "gradients/decoder.transformer.h.0.attn.c_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 11.0, 10.0, 11.0, 16.0, 13.0, 23.0, 44.0, 55.0, 87.0, 111.0, 162.0, 330.0, 704.0, 1694.0, 4949.0, 17522.0, 75586.0, 351397.0, 453387.0, 107789.0, 24232.0, 6505.0, 2122.0, 795.0, 395.0, 215.0, 128.0, 66.0, 50.0, 42.0, 34.0, 23.0, 17.0, 15.0, 13.0, 5.0, 5.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.234375, -19.5146484375, -18.794921875, -18.0751953125, -17.35546875, -16.6357421875, -15.916015625, -15.1962890625, -14.4765625, -13.7568359375, -13.037109375, -12.3173828125, -11.59765625, -10.8779296875, -10.158203125, -9.4384765625, -8.71875, -7.9990234375, -7.279296875, -6.5595703125, -5.83984375, -5.1201171875, -4.400390625, -3.6806640625, -2.9609375, -2.2412109375, -1.521484375, -0.8017578125, -0.08203125, 0.6376953125, 1.357421875, 2.0771484375, 2.796875, 3.5166015625, 4.236328125, 4.9560546875, 5.67578125, 6.3955078125, 7.115234375, 7.8349609375, 8.5546875, 9.2744140625, 9.994140625, 10.7138671875, 11.43359375, 12.1533203125, 12.873046875, 13.5927734375, 14.3125, 15.0322265625, 15.751953125, 16.4716796875, 17.19140625, 17.9111328125, 18.630859375, 19.3505859375, 20.0703125, 20.7900390625, 21.509765625, 22.2294921875, 22.94921875, 23.6689453125, 24.388671875, 25.1083984375, 25.828125]}, "gradients/decoder.transformer.h.0.attn.c_attn.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 9.0, 2.0, 9.0, 10.0, 20.0, 29.0, 50.0, 53.0, 89.0, 104.0, 179.0, 2108.0, 109.0, 81.0, 67.0, 42.0, 47.0, 14.0, 17.0, 6.0, 2.0, 3.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-364.75, -353.41015625, -342.0703125, -330.73046875, -319.390625, -308.05078125, -296.7109375, -285.37109375, -274.03125, -262.69140625, -251.3515625, -240.01171875, -228.671875, -217.33203125, -205.9921875, -194.65234375, -183.3125, -171.97265625, -160.6328125, -149.29296875, -137.953125, -126.61328125, -115.2734375, -103.93359375, -92.59375, -81.25390625, -69.9140625, -58.57421875, -47.234375, -35.89453125, -24.5546875, -13.21484375, -1.875, 9.46484375, 20.8046875, 32.14453125, 43.484375, 54.82421875, 66.1640625, 77.50390625, 88.84375, 100.18359375, 111.5234375, 122.86328125, 134.203125, 145.54296875, 156.8828125, 168.22265625, 179.5625, 190.90234375, 202.2421875, 213.58203125, 224.921875, 236.26171875, 247.6015625, 258.94140625, 270.28125, 281.62109375, 292.9609375, 304.30078125, 315.640625, 326.98046875, 338.3203125, 349.66015625, 361.0]}, "gradients/decoder.transformer.h.0.attn.c_attn.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 4.0, 3.0, 4.0, 2.0, 6.0, 7.0, 7.0, 9.0, 16.0, 17.0, 39.0, 65.0, 100.0, 159.0, 301.0, 544.0, 1356.0, 4358.0, 53128.0, 3004324.0, 73290.0, 5106.0, 1521.0, 597.0, 292.0, 182.0, 84.0, 64.0, 35.0, 26.0, 12.0, 10.0, 11.0, 7.0, 5.0, 4.0, 6.0, 5.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0], "bins": [-51.8125, -50.3125, -48.8125, -47.3125, -45.8125, -44.3125, -42.8125, -41.3125, -39.8125, -38.3125, -36.8125, -35.3125, -33.8125, -32.3125, -30.8125, -29.3125, -27.8125, -26.3125, -24.8125, -23.3125, -21.8125, -20.3125, -18.8125, -17.3125, -15.8125, -14.3125, -12.8125, -11.3125, -9.8125, -8.3125, -6.8125, -5.3125, -3.8125, -2.3125, -0.8125, 0.6875, 2.1875, 3.6875, 5.1875, 6.6875, 8.1875, 9.6875, 11.1875, 12.6875, 14.1875, 15.6875, 17.1875, 18.6875, 20.1875, 21.6875, 23.1875, 24.6875, 26.1875, 27.6875, 29.1875, 30.6875, 32.1875, 33.6875, 35.1875, 36.6875, 38.1875, 39.6875, 41.1875, 42.6875, 44.1875]}, "gradients/decoder.transformer.h.0.ln_1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 7.0, 5.0, 12.0, 21.0, 62.0, 175.0, 373.0, 180.0, 75.0, 39.0, 21.0, 9.0, 6.0, 6.0, 3.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0], "bins": [-753.6118774414062, -737.3900756835938, -721.1682739257812, -704.9464721679688, -688.7246704101562, -672.5028686523438, -656.2810668945312, -640.0592651367188, -623.8374633789062, -607.6156616210938, -591.3938598632812, -575.1720581054688, -558.9502563476562, -542.7284545898438, -526.5066528320312, -510.28485107421875, -494.0630187988281, -477.8412170410156, -461.6194152832031, -445.3976135253906, -429.1758117675781, -412.9540100097656, -396.732177734375, -380.5103759765625, -364.28857421875, -348.0667724609375, -331.844970703125, -315.6231689453125, -299.4013671875, -283.1795654296875, -266.957763671875, -250.73594665527344, -234.51416015625, -218.2923583984375, -202.070556640625, -185.8487548828125, -169.626953125, -153.4051513671875, -137.18333435058594, -120.96153259277344, -104.73973083496094, -88.51792907714844, -72.29612731933594, -56.074317932128906, -39.852516174316406, -23.630714416503906, -7.408905029296875, 8.812896728515625, 25.034698486328125, 41.256500244140625, 57.47830581665039, 73.70011138916016, 89.92191314697266, 106.14371490478516, 122.36552429199219, 138.5873260498047, 154.8091278076172, 171.0309295654297, 187.2527313232422, 203.47454833984375, 219.69635009765625, 235.91815185546875, 252.13995361328125, 268.36175537109375, 284.58355712890625]}, "gradients/decoder.transformer.h.0.ln_1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 4.0, 2.0, 6.0, 10.0, 8.0, 13.0, 12.0, 8.0, 14.0, 18.0, 22.0, 18.0, 22.0, 30.0, 24.0, 22.0, 29.0, 26.0, 32.0, 36.0, 50.0, 56.0, 47.0, 40.0, 55.0, 35.0, 39.0, 44.0, 30.0, 30.0, 31.0, 29.0, 30.0, 24.0, 12.0, 13.0, 16.0, 18.0, 7.0, 10.0, 10.0, 6.0, 4.0, 5.0, 6.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-229.15956115722656, -221.77622985839844, -214.39288330078125, -207.00955200195312, -199.626220703125, -192.2428741455078, -184.8595428466797, -177.4761962890625, -170.09286499023438, -162.70953369140625, -155.32618713378906, -147.94285583496094, -140.55950927734375, -133.17617797851562, -125.7928466796875, -118.40950775146484, -111.02616882324219, -103.64282989501953, -96.25949096679688, -88.87615966796875, -81.4928207397461, -74.10948181152344, -66.72615051269531, -59.342811584472656, -51.95947265625, -44.576133728027344, -37.19279861450195, -29.80946159362793, -22.426124572753906, -15.04278564453125, -7.659450531005859, -0.27611541748046875, 7.10723876953125, 14.490575790405273, 21.873912811279297, 29.25724983215332, 36.640586853027344, 44.02392578125, 51.40726089477539, 58.79059600830078, 66.17393493652344, 73.5572738647461, 80.94061279296875, 88.32394409179688, 95.70728302001953, 103.09062194824219, 110.47395324707031, 117.85729217529297, 125.24063110351562, 132.62396240234375, 140.00730895996094, 147.39064025878906, 154.77398681640625, 162.15731811523438, 169.5406494140625, 176.92398071289062, 184.3073272705078, 191.69065856933594, 199.07400512695312, 206.45733642578125, 213.84066772460938, 221.22401428222656, 228.6073455810547, 235.99069213867188, 243.3740234375]}, "gradients/decoder.transformer.wpe.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 4.0, 9.0, 4.0, 10.0, 9.0, 29.0, 26.0, 53.0, 74.0, 98.0, 126.0, 187.0, 259.0, 375.0, 445.0, 673.0, 912.0, 1139.0, 1040507.0, 981.0, 721.0, 575.0, 352.0, 274.0, 224.0, 141.0, 101.0, 87.0, 58.0, 33.0, 20.0, 20.0, 8.0, 9.0, 6.0, 6.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-75.4777603149414, -73.13005828857422, -70.78236389160156, -68.43466186523438, -66.08696746826172, -63.73926544189453, -61.39156723022461, -59.04386901855469, -56.696170806884766, -54.348472595214844, -52.00077438354492, -49.653076171875, -47.30537414550781, -44.957679748535156, -42.60997772216797, -40.26227951049805, -37.914581298828125, -35.5668830871582, -33.21918487548828, -30.871484756469727, -28.523786544799805, -26.176088333129883, -23.828388214111328, -21.480690002441406, -19.132991790771484, -16.785293579101562, -14.437594413757324, -12.089895248413086, -9.742197036743164, -7.394498825073242, -5.046799659729004, -2.6991004943847656, -0.351409912109375, 1.996288776397705, 4.343987464904785, 6.691686153411865, 9.039384841918945, 11.387083053588867, 13.734782218933105, 16.082481384277344, 18.430179595947266, 20.777877807617188, 23.12557601928711, 25.473276138305664, 27.820974349975586, 30.168672561645508, 32.51637268066406, 34.864070892333984, 37.211769104003906, 39.55946731567383, 41.90716552734375, 44.25486373901367, 46.602561950683594, 48.95026397705078, 51.2979621887207, 53.645660400390625, 55.99335861206055, 58.34105682373047, 60.68875503540039, 63.03645324707031, 65.3841552734375, 67.73184967041016, 70.07955169677734, 72.42724609375, 74.77494812011719]}, "gradients/decoder.transformer.wte.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 1.0, 8.0, 8.0, 6.0, 12.0, 24.0, 33.0, 52.0, 177.0, 51450296.0, 12304.0, 91.0, 42.0, 17.0, 26.0, 10.0, 5.0, 8.0, 5.0, 2.0, 3.0, 3.0, 4.0, 3.0, 6.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1213.0, -1141.22607421875, -1069.4522705078125, -997.6784057617188, -925.904541015625, -854.1306762695312, -782.3568115234375, -710.5829467773438, -638.80908203125, -567.0352172851562, -495.2613525390625, -423.48748779296875, -351.713623046875, -279.93975830078125, -208.1658935546875, -136.39202880859375, -64.6181640625, 7.15570068359375, 78.9295654296875, 150.70343017578125, 222.477294921875, 294.25115966796875, 366.0250244140625, 437.79888916015625, 509.57275390625, 581.3466186523438, 653.1204833984375, 724.8943481445312, 796.668212890625, 868.4420776367188, 940.2159423828125, 1011.9898071289062, 1083.763916015625, 1155.537841796875, 1227.3116455078125, 1299.08544921875, 1370.859375, 1442.63330078125, 1514.4071044921875, 1586.180908203125, 1657.954833984375, 1729.728759765625, 1801.5025634765625, 1873.2763671875, 1945.05029296875, 2016.82421875, 2088.59814453125, 2160.371826171875, 2232.145751953125, 2303.919677734375, 2375.693359375, 2447.46728515625, 2519.2412109375, 2591.01513671875, 2662.7890625, 2734.562744140625, 2806.336669921875, 2878.110595703125, 2949.88427734375, 3021.658203125, 3093.43212890625, 3165.2060546875, 3236.97998046875, 3308.753662109375, 3380.527587890625]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 5.0, 4.0, 10.0, 7.0, 20.0, 26.0, 44.0, 61.0, 105.0, 147.0, 250.0, 402.0, 631.0, 975.0, 1425.0, 2295.0, 3544.0, 5517.0, 8876.0, 13861.0, 22364.0, 35354.0, 58434.0, 94916.0, 159140.0, 288607.0, 677291.0, 4074615.0, 361643.0, 189057.0, 111211.0, 68358.0, 42028.0, 25892.0, 16337.0, 10030.0, 6552.0, 3977.0, 2666.0, 1667.0, 1115.0, 723.0, 456.0, 323.0, 183.0, 91.0, 83.0, 44.0, 35.0, 17.0, 19.0, 6.0, 3.0, 5.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-1.38671875, -1.343597412109375, -1.30047607421875, -1.257354736328125, -1.2142333984375, -1.171112060546875, -1.12799072265625, -1.084869384765625, -1.041748046875, -0.998626708984375, -0.95550537109375, -0.912384033203125, -0.8692626953125, -0.826141357421875, -0.78302001953125, -0.739898681640625, -0.69677734375, -0.653656005859375, -0.61053466796875, -0.567413330078125, -0.5242919921875, -0.481170654296875, -0.43804931640625, -0.394927978515625, -0.351806640625, -0.308685302734375, -0.26556396484375, -0.222442626953125, -0.1793212890625, -0.136199951171875, -0.09307861328125, -0.049957275390625, -0.0068359375, 0.036285400390625, 0.07940673828125, 0.122528076171875, 0.1656494140625, 0.208770751953125, 0.25189208984375, 0.295013427734375, 0.338134765625, 0.381256103515625, 0.42437744140625, 0.467498779296875, 0.5106201171875, 0.553741455078125, 0.59686279296875, 0.639984130859375, 0.68310546875, 0.726226806640625, 0.76934814453125, 0.812469482421875, 0.8555908203125, 0.898712158203125, 0.94183349609375, 0.984954833984375, 1.028076171875, 1.071197509765625, 1.11431884765625, 1.157440185546875, 1.2005615234375, 1.243682861328125, 1.28680419921875, 1.329925537109375, 1.373046875]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 6.0, 2.0, 2.0, 6.0, 7.0, 6.0, 8.0, 14.0, 14.0, 13.0, 21.0, 21.0, 22.0, 25.0, 38.0, 29.0, 36.0, 31.0, 33.0, 50.0, 43.0, 48.0, 977.0, 127.0, 44.0, 40.0, 44.0, 38.0, 35.0, 21.0, 23.0, 39.0, 25.0, 26.0, 19.0, 21.0, 15.0, 13.0, 16.0, 10.0, 6.0, 4.0, 6.0, 6.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-34.71875, -33.5166015625, -32.314453125, -31.1123046875, -29.91015625, -28.7080078125, -27.505859375, -26.3037109375, -25.1015625, -23.8994140625, -22.697265625, -21.4951171875, -20.29296875, -19.0908203125, -17.888671875, -16.6865234375, -15.484375, -14.2822265625, -13.080078125, -11.8779296875, -10.67578125, -9.4736328125, -8.271484375, -7.0693359375, -5.8671875, -4.6650390625, -3.462890625, -2.2607421875, -1.05859375, 0.1435546875, 1.345703125, 2.5478515625, 3.75, 4.9521484375, 6.154296875, 7.3564453125, 8.55859375, 9.7607421875, 10.962890625, 12.1650390625, 13.3671875, 14.5693359375, 15.771484375, 16.9736328125, 18.17578125, 19.3779296875, 20.580078125, 21.7822265625, 22.984375, 24.1865234375, 25.388671875, 26.5908203125, 27.79296875, 28.9951171875, 30.197265625, 31.3994140625, 32.6015625, 33.8037109375, 35.005859375, 36.2080078125, 37.41015625, 38.6123046875, 39.814453125, 41.0166015625, 42.21875]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [3.0, 5.0, 8.0, 8.0, 17.0, 12.0, 34.0, 50.0, 89.0, 111.0, 135.0, 184.0, 307.0, 480.0, 595.0, 962.0, 1434.0, 2122.0, 3152.0, 4650.0, 7085.0, 10350.0, 15889.0, 25046.0, 39523.0, 64128.0, 105114.0, 180266.0, 332363.0, 3972454.0, 795993.0, 300600.0, 166434.0, 97668.0, 59264.0, 36659.0, 23453.0, 15282.0, 9615.0, 6466.0, 4308.0, 2913.0, 2016.0, 1369.0, 895.0, 637.0, 426.0, 277.0, 204.0, 137.0, 93.0, 60.0, 38.0, 23.0, 11.0, 8.0, 10.0, 14.0, 1.0, 3.0, 0.0, 0.0, 0.0, 3.0], "bins": [-1.33984375, -1.295074462890625, -1.25030517578125, -1.205535888671875, -1.1607666015625, -1.115997314453125, -1.07122802734375, -1.026458740234375, -0.981689453125, -0.936920166015625, -0.89215087890625, -0.847381591796875, -0.8026123046875, -0.757843017578125, -0.71307373046875, -0.668304443359375, -0.62353515625, -0.578765869140625, -0.53399658203125, -0.489227294921875, -0.4444580078125, -0.399688720703125, -0.35491943359375, -0.310150146484375, -0.265380859375, -0.220611572265625, -0.17584228515625, -0.131072998046875, -0.0863037109375, -0.041534423828125, 0.00323486328125, 0.048004150390625, 0.0927734375, 0.137542724609375, 0.18231201171875, 0.227081298828125, 0.2718505859375, 0.316619873046875, 0.36138916015625, 0.406158447265625, 0.450927734375, 0.495697021484375, 0.54046630859375, 0.585235595703125, 0.6300048828125, 0.674774169921875, 0.71954345703125, 0.764312744140625, 0.80908203125, 0.853851318359375, 0.89862060546875, 0.943389892578125, 0.9881591796875, 1.032928466796875, 1.07769775390625, 1.122467041015625, 1.167236328125, 1.212005615234375, 1.25677490234375, 1.301544189453125, 1.3463134765625, 1.391082763671875, 1.43585205078125, 1.480621337890625, 1.525390625]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 8.0, 5.0, 4.0, 10.0, 14.0, 7.0, 11.0, 17.0, 9.0, 21.0, 40.0, 31.0, 30.0, 45.0, 38.0, 41.0, 44.0, 49.0, 59.0, 1061.0, 54.0, 51.0, 45.0, 36.0, 40.0, 44.0, 37.0, 27.0, 29.0, 21.0, 18.0, 16.0, 8.0, 15.0, 9.0, 11.0, 8.0, 5.0, 1.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0], "bins": [-47.9375, -46.5419921875, -45.146484375, -43.7509765625, -42.35546875, -40.9599609375, -39.564453125, -38.1689453125, -36.7734375, -35.3779296875, -33.982421875, -32.5869140625, -31.19140625, -29.7958984375, -28.400390625, -27.0048828125, -25.609375, -24.2138671875, -22.818359375, -21.4228515625, -20.02734375, -18.6318359375, -17.236328125, -15.8408203125, -14.4453125, -13.0498046875, -11.654296875, -10.2587890625, -8.86328125, -7.4677734375, -6.072265625, -4.6767578125, -3.28125, -1.8857421875, -0.490234375, 0.9052734375, 2.30078125, 3.6962890625, 5.091796875, 6.4873046875, 7.8828125, 9.2783203125, 10.673828125, 12.0693359375, 13.46484375, 14.8603515625, 16.255859375, 17.6513671875, 19.046875, 20.4423828125, 21.837890625, 23.2333984375, 24.62890625, 26.0244140625, 27.419921875, 28.8154296875, 30.2109375, 31.6064453125, 33.001953125, 34.3974609375, 35.79296875, 37.1884765625, 38.583984375, 39.9794921875, 41.375]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 6.0, 6.0, 8.0, 13.0, 15.0, 15.0, 20.0, 42.0, 61.0, 63.0, 90.0, 136.0, 230.0, 286.0, 382.0, 557.0, 873.0, 1374.0, 2026.0, 2860.0, 4412.0, 7515.0, 11646.0, 19366.0, 32700.0, 61792.0, 302127.0, 5650281.0, 89963.0, 40374.0, 21886.0, 13416.0, 9079.0, 5936.0, 4279.0, 2421.0, 1549.0, 1235.0, 942.0, 448.0, 251.0, 217.0, 164.0, 122.0, 71.0, 40.0, 34.0, 54.0, 21.0, 16.0, 9.0, 2.0, 5.0, 2.0, 3.0, 3.0, 3.0], "bins": [-3.02734375, -2.938812255859375, -2.85028076171875, -2.761749267578125, -2.6732177734375, -2.584686279296875, -2.49615478515625, -2.407623291015625, -2.319091796875, -2.230560302734375, -2.14202880859375, -2.053497314453125, -1.9649658203125, -1.876434326171875, -1.78790283203125, -1.699371337890625, -1.61083984375, -1.522308349609375, -1.43377685546875, -1.345245361328125, -1.2567138671875, -1.168182373046875, -1.07965087890625, -0.991119384765625, -0.902587890625, -0.814056396484375, -0.72552490234375, -0.636993408203125, -0.5484619140625, -0.459930419921875, -0.37139892578125, -0.282867431640625, -0.1943359375, -0.105804443359375, -0.01727294921875, 0.071258544921875, 0.1597900390625, 0.248321533203125, 0.33685302734375, 0.425384521484375, 0.513916015625, 0.602447509765625, 0.69097900390625, 0.779510498046875, 0.8680419921875, 0.956573486328125, 1.04510498046875, 1.133636474609375, 1.22216796875, 1.310699462890625, 1.39923095703125, 1.487762451171875, 1.5762939453125, 1.664825439453125, 1.75335693359375, 1.841888427734375, 1.930419921875, 2.018951416015625, 2.10748291015625, 2.196014404296875, 2.2845458984375, 2.373077392578125, 2.46160888671875, 2.550140380859375, 2.638671875]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 0.0, 4.0, 1.0, 5.0, 3.0, 7.0, 9.0, 8.0, 9.0, 13.0, 12.0, 11.0, 21.0, 21.0, 25.0, 30.0, 27.0, 35.0, 38.0, 51.0, 35.0, 46.0, 34.0, 36.0, 1057.0, 38.0, 38.0, 44.0, 26.0, 34.0, 35.0, 31.0, 25.0, 23.0, 24.0, 24.0, 26.0, 30.0, 17.0, 14.0, 11.0, 11.0, 11.0, 8.0, 9.0, 8.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0], "bins": [-33.6875, -32.71337890625, -31.7392578125, -30.76513671875, -29.791015625, -28.81689453125, -27.8427734375, -26.86865234375, -25.89453125, -24.92041015625, -23.9462890625, -22.97216796875, -21.998046875, -21.02392578125, -20.0498046875, -19.07568359375, -18.1015625, -17.12744140625, -16.1533203125, -15.17919921875, -14.205078125, -13.23095703125, -12.2568359375, -11.28271484375, -10.30859375, -9.33447265625, -8.3603515625, -7.38623046875, -6.412109375, -5.43798828125, -4.4638671875, -3.48974609375, -2.515625, -1.54150390625, -0.5673828125, 0.40673828125, 1.380859375, 2.35498046875, 3.3291015625, 4.30322265625, 5.27734375, 6.25146484375, 7.2255859375, 8.19970703125, 9.173828125, 10.14794921875, 11.1220703125, 12.09619140625, 13.0703125, 14.04443359375, 15.0185546875, 15.99267578125, 16.966796875, 17.94091796875, 18.9150390625, 19.88916015625, 20.86328125, 21.83740234375, 22.8115234375, 23.78564453125, 24.759765625, 25.73388671875, 26.7080078125, 27.68212890625, 28.65625]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 9.0, 7.0, 9.0, 24.0, 31.0, 42.0, 98.0, 536.0, 118.0, 48.0, 23.0, 22.0, 9.0, 11.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-124.16714477539062, -118.8846664428711, -113.60218811035156, -108.3197021484375, -103.03722381591797, -97.75474548339844, -92.47225952148438, -87.18978118896484, -81.90730285644531, -76.62482452392578, -71.34234619140625, -66.05986022949219, -60.777381896972656, -55.494903564453125, -50.21242141723633, -44.92993927001953, -39.6474609375, -34.36498260498047, -29.082500457763672, -23.800020217895508, -18.517539978027344, -13.23505973815918, -7.952579498291016, -2.6700973510742188, 2.6123809814453125, 7.894861221313477, 13.17734146118164, 18.459821701049805, 23.74230194091797, 29.024782180786133, 34.3072624206543, 39.589744567871094, 44.872222900390625, 50.154701232910156, 55.43718338012695, 60.71966552734375, 66.00214385986328, 71.28462219238281, 76.56710815429688, 81.8495864868164, 87.13206481933594, 92.41454315185547, 97.697021484375, 102.97950744628906, 108.2619857788086, 113.54446411132812, 118.82695007324219, 124.10942840576172, 129.39190673828125, 134.6743927001953, 139.9568634033203, 145.23934936523438, 150.52182006835938, 155.80430603027344, 161.0867919921875, 166.3692626953125, 171.65174865722656, 176.93423461914062, 182.21670532226562, 187.4991912841797, 192.78167724609375, 198.06414794921875, 203.3466339111328, 208.62911987304688, 213.91159057617188]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 5.0, 6.0, 5.0, 6.0, 10.0, 16.0, 6.0, 13.0, 17.0, 20.0, 21.0, 17.0, 16.0, 32.0, 33.0, 26.0, 51.0, 36.0, 37.0, 39.0, 45.0, 41.0, 49.0, 38.0, 28.0, 35.0, 28.0, 42.0, 37.0, 45.0, 31.0, 19.0, 19.0, 19.0, 27.0, 18.0, 14.0, 13.0, 7.0, 3.0, 8.0, 6.0, 7.0, 6.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-55.67310333251953, -53.78940963745117, -51.90571975708008, -50.02202606201172, -48.138336181640625, -46.254642486572266, -44.370948791503906, -42.48725891113281, -40.60356521606445, -38.719871520996094, -36.836181640625, -34.95248794555664, -33.06879425048828, -31.185104370117188, -29.301410675048828, -27.4177188873291, -25.534027099609375, -23.65033531188965, -21.766643524169922, -19.882949829101562, -17.999258041381836, -16.11556625366211, -14.231873512268066, -12.348180770874023, -10.464488983154297, -8.58079719543457, -6.697104454040527, -4.813412189483643, -2.929719924926758, -1.0460281372070312, 0.8376646041870117, 2.7213573455810547, 4.605049133300781, 6.488741397857666, 8.37243366241455, 10.256126403808594, 12.13981819152832, 14.023509979248047, 15.90720272064209, 17.790895462036133, 19.67458724975586, 21.558279037475586, 23.441970825195312, 25.325664520263672, 27.2093563079834, 29.093048095703125, 30.976741790771484, 32.860435485839844, 34.74412536621094, 36.6278190612793, 38.51150894165039, 40.39520263671875, 42.278892517089844, 44.1625862121582, 46.04627990722656, 47.929969787597656, 49.813663482666016, 51.697357177734375, 53.58104705810547, 55.46474075317383, 57.34843444824219, 59.23212432861328, 61.11581802368164, 62.99951171875, 64.8832015991211]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [7.0, 4.0, 7.0, 12.0, 17.0, 25.0, 47.0, 50.0, 102.0, 137.0, 156.0, 232.0, 277.0, 384.0, 481.0, 555.0, 682.0, 788.0, 960.0, 1197.0, 1485.0, 1933.0, 2401.0, 4157.0, 8560.0, 168408.0, 3969921.0, 8026.0, 5097.0, 3551.0, 2971.0, 2339.0, 1919.0, 1573.0, 1312.0, 1061.0, 836.0, 641.0, 502.0, 320.0, 285.0, 177.0, 144.0, 95.0, 76.0, 69.0, 33.0, 43.0, 51.0, 32.0, 22.0, 33.0, 23.0, 13.0, 21.0, 16.0, 6.0, 5.0, 5.0, 6.0, 5.0, 4.0, 2.0, 5.0], "bins": [-0.80419921875, -0.7734298706054688, -0.7426605224609375, -0.7118911743164062, -0.681121826171875, -0.6503524780273438, -0.6195831298828125, -0.5888137817382812, -0.55804443359375, -0.5272750854492188, -0.4965057373046875, -0.46573638916015625, -0.434967041015625, -0.40419769287109375, -0.3734283447265625, -0.34265899658203125, -0.3118896484375, -0.28112030029296875, -0.2503509521484375, -0.21958160400390625, -0.188812255859375, -0.15804290771484375, -0.1272735595703125, -0.09650421142578125, -0.06573486328125, -0.03496551513671875, -0.0041961669921875, 0.02657318115234375, 0.057342529296875, 0.08811187744140625, 0.1188812255859375, 0.14965057373046875, 0.180419921875, 0.21118927001953125, 0.2419586181640625, 0.27272796630859375, 0.303497314453125, 0.33426666259765625, 0.3650360107421875, 0.39580535888671875, 0.42657470703125, 0.45734405517578125, 0.4881134033203125, 0.5188827514648438, 0.549652099609375, 0.5804214477539062, 0.6111907958984375, 0.6419601440429688, 0.6727294921875, 0.7034988403320312, 0.7342681884765625, 0.7650375366210938, 0.795806884765625, 0.8265762329101562, 0.8573455810546875, 0.8881149291992188, 0.91888427734375, 0.9496536254882812, 0.9804229736328125, 1.0111923217773438, 1.041961669921875, 1.0727310180664062, 1.1035003662109375, 1.1342697143554688, 1.1650390625]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 0.0, 3.0, 1.0, 3.0, 4.0, 0.0, 4.0, 4.0, 6.0, 5.0, 4.0, 5.0, 17.0, 14.0, 23.0, 741.0, 57.0, 19.0, 11.0, 13.0, 11.0, 4.0, 4.0, 9.0, 8.0, 7.0, 3.0, 4.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06671142578125, -0.0641632080078125, -0.061614990234375, -0.0590667724609375, -0.0565185546875, -0.0539703369140625, -0.051422119140625, -0.0488739013671875, -0.04632568359375, -0.0437774658203125, -0.041229248046875, -0.0386810302734375, -0.0361328125, -0.0335845947265625, -0.031036376953125, -0.0284881591796875, -0.02593994140625, -0.0233917236328125, -0.020843505859375, -0.0182952880859375, -0.0157470703125, -0.0131988525390625, -0.010650634765625, -0.0081024169921875, -0.00555419921875, -0.0030059814453125, -0.000457763671875, 0.0020904541015625, 0.004638671875, 0.0071868896484375, 0.009735107421875, 0.0122833251953125, 0.01483154296875, 0.0173797607421875, 0.019927978515625, 0.0224761962890625, 0.0250244140625, 0.0275726318359375, 0.030120849609375, 0.0326690673828125, 0.03521728515625, 0.0377655029296875, 0.040313720703125, 0.0428619384765625, 0.04541015625, 0.0479583740234375, 0.050506591796875, 0.0530548095703125, 0.05560302734375, 0.0581512451171875, 0.060699462890625, 0.0632476806640625, 0.0657958984375, 0.0683441162109375, 0.070892333984375, 0.0734405517578125, 0.07598876953125, 0.0785369873046875, 0.081085205078125, 0.0836334228515625, 0.086181640625, 0.0887298583984375, 0.091278076171875, 0.0938262939453125, 0.09637451171875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 6.0, 4.0, 5.0, 9.0, 14.0, 18.0, 27.0, 50.0, 57.0, 85.0, 106.0, 158.0, 212.0, 306.0, 454.0, 646.0, 940.0, 1367.0, 2040.0, 2953.0, 4582.0, 7667.0, 13902.0, 30611.0, 195130.0, 3860669.0, 34245.0, 15007.0, 8259.0, 5041.0, 3137.0, 2042.0, 1409.0, 935.0, 693.0, 445.0, 318.0, 178.0, 147.0, 102.0, 86.0, 66.0, 50.0, 32.0, 21.0, 16.0, 11.0, 7.0, 6.0, 5.0, 3.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.459228515625, -0.4449005126953125, -0.430572509765625, -0.4162445068359375, -0.40191650390625, -0.3875885009765625, -0.373260498046875, -0.3589324951171875, -0.3446044921875, -0.3302764892578125, -0.315948486328125, -0.3016204833984375, -0.28729248046875, -0.2729644775390625, -0.258636474609375, -0.2443084716796875, -0.22998046875, -0.2156524658203125, -0.201324462890625, -0.1869964599609375, -0.17266845703125, -0.1583404541015625, -0.144012451171875, -0.1296844482421875, -0.1153564453125, -0.1010284423828125, -0.086700439453125, -0.0723724365234375, -0.05804443359375, -0.0437164306640625, -0.029388427734375, -0.0150604248046875, -0.000732421875, 0.0135955810546875, 0.027923583984375, 0.0422515869140625, 0.05657958984375, 0.0709075927734375, 0.085235595703125, 0.0995635986328125, 0.1138916015625, 0.1282196044921875, 0.142547607421875, 0.1568756103515625, 0.17120361328125, 0.1855316162109375, 0.199859619140625, 0.2141876220703125, 0.228515625, 0.2428436279296875, 0.257171630859375, 0.2714996337890625, 0.28582763671875, 0.3001556396484375, 0.314483642578125, 0.3288116455078125, 0.3431396484375, 0.3574676513671875, 0.371795654296875, 0.3861236572265625, 0.40045166015625, 0.4147796630859375, 0.429107666015625, 0.4434356689453125, 0.457763671875]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 2.0, 5.0, 6.0, 6.0, 6.0, 5.0, 6.0, 6.0, 11.0, 11.0, 18.0, 16.0, 17.0, 21.0, 46.0, 3474.0, 215.0, 38.0, 21.0, 19.0, 14.0, 12.0, 17.0, 8.0, 16.0, 5.0, 5.0, 7.0, 7.0, 6.0, 5.0, 1.0, 3.0, 7.0, 0.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.192138671875, -0.18612289428710938, -0.18010711669921875, -0.17409133911132812, -0.1680755615234375, -0.16205978393554688, -0.15604400634765625, -0.15002822875976562, -0.144012451171875, -0.13799667358398438, -0.13198089599609375, -0.12596511840820312, -0.1199493408203125, -0.11393356323242188, -0.10791778564453125, -0.10190200805664062, -0.09588623046875, -0.08987045288085938, -0.08385467529296875, -0.07783889770507812, -0.0718231201171875, -0.06580734252929688, -0.05979156494140625, -0.053775787353515625, -0.047760009765625, -0.041744232177734375, -0.03572845458984375, -0.029712677001953125, -0.0236968994140625, -0.017681121826171875, -0.01166534423828125, -0.005649566650390625, 0.0003662109375, 0.006381988525390625, 0.01239776611328125, 0.018413543701171875, 0.0244293212890625, 0.030445098876953125, 0.03646087646484375, 0.042476654052734375, 0.048492431640625, 0.054508209228515625, 0.06052398681640625, 0.06653976440429688, 0.0725555419921875, 0.07857131958007812, 0.08458709716796875, 0.09060287475585938, 0.09661865234375, 0.10263442993164062, 0.10865020751953125, 0.11466598510742188, 0.1206817626953125, 0.12669754028320312, 0.13271331787109375, 0.13872909545898438, 0.144744873046875, 0.15076065063476562, 0.15677642822265625, 0.16279220581054688, 0.1688079833984375, 0.17482376098632812, 0.18083953857421875, 0.18685531616210938, 0.19287109375]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 14.0, 34.0, 184.0, 675.0, 76.0, 17.0, 8.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.957400321960449, -2.8974404335021973, -2.8374805450439453, -2.7775204181671143, -2.7175605297088623, -2.6576006412506104, -2.5976407527923584, -2.5376806259155273, -2.4777207374572754, -2.4177608489990234, -2.3578009605407715, -2.2978408336639404, -2.2378809452056885, -2.1779210567474365, -2.1179611682891846, -2.0580010414123535, -1.9980412721633911, -1.9380813837051392, -1.8781213760375977, -1.8181614875793457, -1.7582014799118042, -1.6982415914535522, -1.6382815837860107, -1.5783216953277588, -1.5183618068695068, -1.4584019184112549, -1.3984419107437134, -1.3384820222854614, -1.27852201461792, -1.218562126159668, -1.158602237701416, -1.0986422300338745, -1.0386823415756226, -0.9787223935127258, -0.9187624454498291, -0.8588025569915771, -0.7988425493240356, -0.7388826608657837, -0.678922712802887, -0.6189627647399902, -0.5590028166770935, -0.4990428686141968, -0.43908292055130005, -0.3791230022907257, -0.319163054227829, -0.25920310616493225, -0.1992431879043579, -0.13928323984146118, -0.07932329177856445, -0.01936335116624832, 0.04059658944606781, 0.10055652260780334, 0.16051647067070007, 0.2204764187335968, 0.28043633699417114, 0.34039628505706787, 0.4003562331199646, 0.46031618118286133, 0.5202761292457581, 0.5802360773086548, 0.6401959657669067, 0.7001559734344482, 0.7601158618927002, 0.8200758099555969, 0.8800357580184937]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 6.0, 9.0, 5.0, 10.0, 8.0, 19.0, 22.0, 23.0, 52.0, 51.0, 78.0, 98.0, 94.0, 100.0, 89.0, 82.0, 68.0, 36.0, 38.0, 17.0, 18.0, 17.0, 12.0, 9.0, 4.0, 4.0, 5.0, 4.0, 3.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5743618607521057, -0.5535640120506287, -0.5327662229537964, -0.5119683742523193, -0.49117058515548706, -0.47037273645401, -0.44957491755485535, -0.4287770986557007, -0.407979279756546, -0.38718146085739136, -0.3663836419582367, -0.34558582305908203, -0.324787974357605, -0.3039901852607727, -0.28319233655929565, -0.262394517660141, -0.24159669876098633, -0.22079887986183167, -0.200001060962677, -0.17920322716236115, -0.15840540826320648, -0.13760758936405182, -0.11680976301431656, -0.0960119366645813, -0.07521411776542664, -0.054416295140981674, -0.03361847251653671, -0.012820649892091751, 0.00797717273235321, 0.028774991631507874, 0.049572817981243134, 0.0703706443309784, 0.09116840362548828, 0.11196622252464294, 0.1327640414237976, 0.15356187522411346, 0.17435969412326813, 0.1951575130224228, 0.21595534682273865, 0.2367531657218933, 0.257550984621048, 0.27834880352020264, 0.2991466224193573, 0.31994444131851196, 0.340742290019989, 0.3615400791168213, 0.38233792781829834, 0.403135746717453, 0.42393356561660767, 0.44473138451576233, 0.465529203414917, 0.48632702231407166, 0.5071248412132263, 0.5279226899147034, 0.5487204790115356, 0.5695183277130127, 0.5903161764144897, 0.6111140251159668, 0.6319118142127991, 0.6527096629142761, 0.6735074520111084, 0.6943053007125854, 0.7151030898094177, 0.7359009385108948, 0.756698727607727]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 8.0, 7.0, 13.0, 12.0, 11.0, 36.0, 48.0, 74.0, 68.0, 158.0, 251.0, 394.0, 619.0, 973.0, 1649.0, 2769.0, 4734.0, 8398.0, 16399.0, 49139.0, 904786.0, 29000.0, 12419.0, 6698.0, 3911.0, 2244.0, 1371.0, 908.0, 544.0, 328.0, 203.0, 97.0, 82.0, 68.0, 50.0, 26.0, 13.0, 16.0, 16.0, 7.0, 6.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39111328125, -0.37679290771484375, -0.3624725341796875, -0.34815216064453125, -0.333831787109375, -0.31951141357421875, -0.3051910400390625, -0.29087066650390625, -0.27655029296875, -0.26222991943359375, -0.2479095458984375, -0.23358917236328125, -0.219268798828125, -0.20494842529296875, -0.1906280517578125, -0.17630767822265625, -0.1619873046875, -0.14766693115234375, -0.1333465576171875, -0.11902618408203125, -0.104705810546875, -0.09038543701171875, -0.0760650634765625, -0.06174468994140625, -0.04742431640625, -0.03310394287109375, -0.0187835693359375, -0.00446319580078125, 0.009857177734375, 0.02417755126953125, 0.0384979248046875, 0.05281829833984375, 0.067138671875, 0.08145904541015625, 0.0957794189453125, 0.11009979248046875, 0.124420166015625, 0.13874053955078125, 0.1530609130859375, 0.16738128662109375, 0.18170166015625, 0.19602203369140625, 0.2103424072265625, 0.22466278076171875, 0.238983154296875, 0.25330352783203125, 0.2676239013671875, 0.28194427490234375, 0.2962646484375, 0.31058502197265625, 0.3249053955078125, 0.33922576904296875, 0.353546142578125, 0.36786651611328125, 0.3821868896484375, 0.39650726318359375, 0.41082763671875, 0.42514801025390625, 0.4394683837890625, 0.45378875732421875, 0.468109130859375, 0.48242950439453125, 0.4967498779296875, 0.5110702514648438, 0.525390625]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 3.0, 5.0, 6.0, 2.0, 7.0, 6.0, 8.0, 18.0, 21.0, 50.0, 746.0, 19.0, 16.0, 10.0, 8.0, 10.0, 4.0, 9.0, 7.0, 8.0, 5.0, 4.0, 1.0, 6.0, 2.0, 4.0, 5.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06512451171875, -0.06267261505126953, -0.06022071838378906, -0.057768821716308594, -0.055316925048828125, -0.052865028381347656, -0.05041313171386719, -0.04796123504638672, -0.04550933837890625, -0.04305744171142578, -0.04060554504394531, -0.038153648376464844, -0.035701751708984375, -0.033249855041503906, -0.030797958374023438, -0.02834606170654297, -0.0258941650390625, -0.02344226837158203, -0.020990371704101562, -0.018538475036621094, -0.016086578369140625, -0.013634681701660156, -0.011182785034179688, -0.008730888366699219, -0.00627899169921875, -0.0038270950317382812, -0.0013751983642578125, 0.0010766983032226562, 0.003528594970703125, 0.005980491638183594, 0.008432388305664062, 0.010884284973144531, 0.013336181640625, 0.01578807830810547, 0.018239974975585938, 0.020691871643066406, 0.023143768310546875, 0.025595664978027344, 0.028047561645507812, 0.03049945831298828, 0.03295135498046875, 0.03540325164794922, 0.03785514831542969, 0.040307044982910156, 0.042758941650390625, 0.045210838317871094, 0.04766273498535156, 0.05011463165283203, 0.0525665283203125, 0.05501842498779297, 0.05747032165527344, 0.059922218322753906, 0.062374114990234375, 0.06482601165771484, 0.06727790832519531, 0.06972980499267578, 0.07218170166015625, 0.07463359832763672, 0.07708549499511719, 0.07953739166259766, 0.08198928833007812, 0.0844411849975586, 0.08689308166503906, 0.08934497833251953, 0.091796875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 3.0, 2.0, 1.0, 4.0, 2.0, 8.0, 2.0, 2.0, 9.0, 11.0, 14.0, 14.0, 15.0, 34.0, 48.0, 71.0, 78.0, 94.0, 141.0, 201.0, 278.0, 417.0, 685.0, 1036.0, 1618.0, 2484.0, 4023.0, 6774.0, 11292.0, 20043.0, 38253.0, 83379.0, 346393.0, 358599.0, 84374.0, 38349.0, 20278.0, 11240.0, 6767.0, 4058.0, 2689.0, 1600.0, 982.0, 686.0, 433.0, 341.0, 227.0, 165.0, 90.0, 51.0, 59.0, 34.0, 23.0, 30.0, 17.0, 11.0, 17.0, 6.0, 6.0, 3.0, 6.0], "bins": [-0.276611328125, -0.26892852783203125, -0.2612457275390625, -0.25356292724609375, -0.245880126953125, -0.23819732666015625, -0.2305145263671875, -0.22283172607421875, -0.21514892578125, -0.20746612548828125, -0.1997833251953125, -0.19210052490234375, -0.184417724609375, -0.17673492431640625, -0.1690521240234375, -0.16136932373046875, -0.1536865234375, -0.14600372314453125, -0.1383209228515625, -0.13063812255859375, -0.122955322265625, -0.11527252197265625, -0.1075897216796875, -0.09990692138671875, -0.09222412109375, -0.08454132080078125, -0.0768585205078125, -0.06917572021484375, -0.061492919921875, -0.05381011962890625, -0.0461273193359375, -0.03844451904296875, -0.03076171875, -0.02307891845703125, -0.0153961181640625, -0.00771331787109375, -3.0517578125e-05, 0.00765228271484375, 0.0153350830078125, 0.02301788330078125, 0.03070068359375, 0.03838348388671875, 0.0460662841796875, 0.05374908447265625, 0.061431884765625, 0.06911468505859375, 0.0767974853515625, 0.08448028564453125, 0.0921630859375, 0.09984588623046875, 0.1075286865234375, 0.11521148681640625, 0.122894287109375, 0.13057708740234375, 0.1382598876953125, 0.14594268798828125, 0.15362548828125, 0.16130828857421875, 0.1689910888671875, 0.17667388916015625, 0.184356689453125, 0.19203948974609375, 0.1997222900390625, 0.20740509033203125, 0.215087890625]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 6.0, 3.0, 6.0, 11.0, 5.0, 9.0, 19.0, 24.0, 20.0, 14.0, 18.0, 25.0, 30.0, 34.0, 33.0, 30.0, 36.0, 31.0, 40.0, 50.0, 42.0, 47.0, 48.0, 42.0, 31.0, 40.0, 37.0, 32.0, 26.0, 34.0, 22.0, 24.0, 19.0, 21.0, 14.0, 10.0, 7.0, 12.0, 14.0, 10.0, 7.0, 1.0, 3.0, 4.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.2388916015625, -0.2303600311279297, -0.22182846069335938, -0.21329689025878906, -0.20476531982421875, -0.19623374938964844, -0.18770217895507812, -0.1791706085205078, -0.1706390380859375, -0.1621074676513672, -0.15357589721679688, -0.14504432678222656, -0.13651275634765625, -0.12798118591308594, -0.11944961547851562, -0.11091804504394531, -0.102386474609375, -0.09385490417480469, -0.08532333374023438, -0.07679176330566406, -0.06826019287109375, -0.05972862243652344, -0.051197052001953125, -0.04266548156738281, -0.0341339111328125, -0.025602340698242188, -0.017070770263671875, -0.008539199829101562, -7.62939453125e-06, 0.008523941040039062, 0.017055511474609375, 0.025587081909179688, 0.03411865234375, 0.04265022277832031, 0.051181793212890625, 0.05971336364746094, 0.06824493408203125, 0.07677650451660156, 0.08530807495117188, 0.09383964538574219, 0.1023712158203125, 0.11090278625488281, 0.11943435668945312, 0.12796592712402344, 0.13649749755859375, 0.14502906799316406, 0.15356063842773438, 0.1620922088623047, 0.170623779296875, 0.1791553497314453, 0.18768692016601562, 0.19621849060058594, 0.20475006103515625, 0.21328163146972656, 0.22181320190429688, 0.2303447723388672, 0.2388763427734375, 0.2474079132080078, 0.2559394836425781, 0.26447105407714844, 0.27300262451171875, 0.28153419494628906, 0.2900657653808594, 0.2985973358154297, 0.30712890625]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 3.0, 5.0, 9.0, 8.0, 11.0, 14.0, 16.0, 34.0, 41.0, 37.0, 88.0, 148.0, 224.0, 483.0, 1155.0, 3722.0, 18329.0, 292926.0, 698958.0, 25123.0, 4659.0, 1320.0, 558.0, 262.0, 136.0, 85.0, 55.0, 40.0, 33.0, 13.0, 15.0, 14.0, 12.0, 8.0, 5.0, 0.0, 1.0, 3.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00234222412109375, -0.002258807420730591, -0.0021753907203674316, -0.0020919740200042725, -0.0020085573196411133, -0.001925140619277954, -0.001841723918914795, -0.0017583072185516357, -0.0016748905181884766, -0.0015914738178253174, -0.0015080571174621582, -0.001424640417098999, -0.0013412237167358398, -0.0012578070163726807, -0.0011743903160095215, -0.0010909736156463623, -0.0010075569152832031, -0.0009241402149200439, -0.0008407235145568848, -0.0007573068141937256, -0.0006738901138305664, -0.0005904734134674072, -0.000507056713104248, -0.00042364001274108887, -0.0003402233123779297, -0.0002568066120147705, -0.00017338991165161133, -8.997321128845215e-05, -6.556510925292969e-06, 7.686018943786621e-05, 0.0001602768898010254, 0.00024369359016418457, 0.00032711029052734375, 0.00041052699089050293, 0.0004939436912536621, 0.0005773603916168213, 0.0006607770919799805, 0.0007441937923431396, 0.0008276104927062988, 0.000911027193069458, 0.0009944438934326172, 0.0010778605937957764, 0.0011612772941589355, 0.0012446939945220947, 0.001328110694885254, 0.001411527395248413, 0.0014949440956115723, 0.0015783607959747314, 0.0016617774963378906, 0.0017451941967010498, 0.001828610897064209, 0.0019120275974273682, 0.0019954442977905273, 0.0020788609981536865, 0.0021622776985168457, 0.002245694398880005, 0.002329111099243164, 0.0024125277996063232, 0.0024959444999694824, 0.0025793612003326416, 0.0026627779006958008, 0.00274619460105896, 0.002829611301422119, 0.0029130280017852783, 0.0029964447021484375]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 5.0, 3.0, 5.0, 7.0, 17.0, 9.0, 33.0, 21.0, 53.0, 48.0, 87.0, 60.0, 120.0, 85.0, 118.0, 60.0, 85.0, 46.0, 62.0, 22.0, 26.0, 10.0, 10.0, 5.0, 7.0, 0.0, 4.0, 0.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4570693969726562e-06, -3.3080577850341797e-06, -3.159046173095703e-06, -3.0100345611572266e-06, -2.86102294921875e-06, -2.7120113372802734e-06, -2.562999725341797e-06, -2.4139881134033203e-06, -2.2649765014648438e-06, -2.115964889526367e-06, -1.9669532775878906e-06, -1.817941665649414e-06, -1.6689300537109375e-06, -1.519918441772461e-06, -1.3709068298339844e-06, -1.2218952178955078e-06, -1.0728836059570312e-06, -9.238719940185547e-07, -7.748603820800781e-07, -6.258487701416016e-07, -4.76837158203125e-07, -3.2782554626464844e-07, -1.7881393432617188e-07, -2.9802322387695312e-08, 1.1920928955078125e-07, 2.682209014892578e-07, 4.172325134277344e-07, 5.662441253662109e-07, 7.152557373046875e-07, 8.642673492431641e-07, 1.0132789611816406e-06, 1.1622905731201172e-06, 1.3113021850585938e-06, 1.4603137969970703e-06, 1.6093254089355469e-06, 1.7583370208740234e-06, 1.9073486328125e-06, 2.0563602447509766e-06, 2.205371856689453e-06, 2.3543834686279297e-06, 2.5033950805664062e-06, 2.652406692504883e-06, 2.8014183044433594e-06, 2.950429916381836e-06, 3.0994415283203125e-06, 3.248453140258789e-06, 3.3974647521972656e-06, 3.546476364135742e-06, 3.6954879760742188e-06, 3.844499588012695e-06, 3.993511199951172e-06, 4.1425228118896484e-06, 4.291534423828125e-06, 4.4405460357666016e-06, 4.589557647705078e-06, 4.738569259643555e-06, 4.887580871582031e-06, 5.036592483520508e-06, 5.185604095458984e-06, 5.334615707397461e-06, 5.4836273193359375e-06, 5.632638931274414e-06, 5.781650543212891e-06, 5.930662155151367e-06, 6.079673767089844e-06]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 6.0, 6.0, 3.0, 7.0, 4.0, 9.0, 10.0, 25.0, 32.0, 36.0, 49.0, 75.0, 86.0, 178.0, 297.0, 451.0, 835.0, 1728.0, 3375.0, 7669.0, 21157.0, 80864.0, 717484.0, 160974.0, 32778.0, 10812.0, 4578.0, 2207.0, 1143.0, 677.0, 373.0, 214.0, 141.0, 96.0, 50.0, 39.0, 34.0, 17.0, 14.0, 11.0, 6.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.0011425018310546875, -0.0011121854186058044, -0.0010818690061569214, -0.0010515525937080383, -0.0010212361812591553, -0.0009909197688102722, -0.0009606033563613892, -0.0009302869439125061, -0.000899970531463623, -0.00086965411901474, -0.0008393377065658569, -0.0008090212941169739, -0.0007787048816680908, -0.0007483884692192078, -0.0007180720567703247, -0.0006877556443214417, -0.0006574392318725586, -0.0006271228194236755, -0.0005968064069747925, -0.0005664899945259094, -0.0005361735820770264, -0.0005058571696281433, -0.00047554075717926025, -0.0004452243447303772, -0.00041490793228149414, -0.0003845915198326111, -0.00035427510738372803, -0.00032395869493484497, -0.0002936422824859619, -0.00026332587003707886, -0.0002330094575881958, -0.00020269304513931274, -0.0001723766326904297, -0.00014206022024154663, -0.00011174380779266357, -8.142739534378052e-05, -5.111098289489746e-05, -2.0794570446014404e-05, 9.521842002868652e-06, 3.983825445175171e-05, 7.015466690063477e-05, 0.00010047107934951782, 0.00013078749179840088, 0.00016110390424728394, 0.000191420316696167, 0.00022173672914505005, 0.0002520531415939331, 0.00028236955404281616, 0.0003126859664916992, 0.0003430023789405823, 0.00037331879138946533, 0.0004036352038383484, 0.00043395161628723145, 0.0004642680287361145, 0.0004945844411849976, 0.0005249008536338806, 0.0005552172660827637, 0.0005855336785316467, 0.0006158500909805298, 0.0006461665034294128, 0.0006764829158782959, 0.000706799328327179, 0.000737115740776062, 0.0007674321532249451, 0.0007977485656738281]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 5.0, 5.0, 7.0, 5.0, 5.0, 8.0, 19.0, 22.0, 24.0, 32.0, 33.0, 75.0, 64.0, 93.0, 79.0, 100.0, 87.0, 70.0, 69.0, 46.0, 49.0, 31.0, 22.0, 8.0, 12.0, 10.0, 4.0, 5.0, 6.0, 3.0, 2.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006394386291503906, -0.0006096139550209045, -0.0005797892808914185, -0.0005499646067619324, -0.0005201399326324463, -0.0004903152585029602, -0.0004604905843734741, -0.00043066591024398804, -0.00040084123611450195, -0.00037101656198501587, -0.0003411918878555298, -0.0003113672137260437, -0.0002815425395965576, -0.00025171786546707153, -0.00022189319133758545, -0.00019206851720809937, -0.00016224384307861328, -0.0001324191689491272, -0.00010259449481964111, -7.276982069015503e-05, -4.2945146560668945e-05, -1.3120472431182861e-05, 1.6704201698303223e-05, 4.652887582778931e-05, 7.635354995727539e-05, 0.00010617822408676147, 0.00013600289821624756, 0.00016582757234573364, 0.00019565224647521973, 0.0002254769206047058, 0.0002553015947341919, 0.000285126268863678, 0.00031495094299316406, 0.00034477561712265015, 0.00037460029125213623, 0.0004044249653816223, 0.0004342496395111084, 0.0004640743136405945, 0.0004938989877700806, 0.0005237236618995667, 0.0005535483360290527, 0.0005833730101585388, 0.0006131976842880249, 0.000643022358417511, 0.0006728470325469971, 0.0007026717066764832, 0.0007324963808059692, 0.0007623210549354553, 0.0007921457290649414, 0.0008219704031944275, 0.0008517950773239136, 0.0008816197514533997, 0.0009114444255828857, 0.0009412690997123718, 0.0009710937738418579, 0.001000918447971344, 0.00103074312210083, 0.0010605677962303162, 0.0010903924703598022, 0.0011202171444892883, 0.0011500418186187744, 0.0011798664927482605, 0.0012096911668777466, 0.0012395158410072327, 0.0012693405151367188]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 8.0, 4.0, 8.0, 14.0, 25.0, 38.0, 77.0, 156.0, 373.0, 134.0, 74.0, 32.0, 29.0, 17.0, 3.0, 6.0, 7.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.089678764343262, -3.985908031463623, -3.8821372985839844, -3.7783665657043457, -3.674595832824707, -3.5708250999450684, -3.4670541286468506, -3.363283395767212, -3.2595126628875732, -3.1557419300079346, -3.051971197128296, -2.9482004642486572, -2.8444294929504395, -2.740658760070801, -2.636888027191162, -2.5331172943115234, -2.4293465614318848, -2.325575828552246, -2.2218050956726074, -2.1180343627929688, -2.01426362991333, -1.9104927778244019, -1.8067219257354736, -1.702951192855835, -1.5991804599761963, -1.4954097270965576, -1.391638994216919, -1.2878681421279907, -1.184097409248352, -1.0803266763687134, -0.9765558838844299, -0.8727850914001465, -0.7690143585205078, -0.6652436256408691, -0.5614728331565857, -0.45770207047462463, -0.3539313077926636, -0.2501605749130249, -0.14638978242874146, -0.04261898994445801, 0.061151742935180664, 0.16492250561714172, 0.2686932682991028, 0.37246403098106384, 0.4762347936630249, 0.5800055265426636, 0.683776319026947, 0.7875471115112305, 0.8913178443908691, 0.9950885772705078, 1.0988593101501465, 1.2026301622390747, 1.3064008951187134, 1.410171627998352, 1.5139424800872803, 1.617713212966919, 1.7214839458465576, 1.8252546787261963, 1.929025411605835, 2.0327961444854736, 2.1365671157836914, 2.24033784866333, 2.3441085815429688, 2.4478793144226074, 2.551650047302246]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 6.0, 6.0, 2.0, 1.0, 5.0, 4.0, 8.0, 7.0, 13.0, 14.0, 28.0, 50.0, 78.0, 99.0, 128.0, 168.0, 117.0, 86.0, 65.0, 35.0, 25.0, 16.0, 4.0, 7.0, 9.0, 3.0, 6.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.269833564758301, -6.0919575691223145, -5.914081573486328, -5.736205577850342, -5.5583295822143555, -5.380453586578369, -5.202577590942383, -5.024702072143555, -4.84682559967041, -4.668949604034424, -4.4910736083984375, -4.313197612762451, -4.135321617126465, -3.9574456214904785, -3.7795698642730713, -3.601693868637085, -3.4238181114196777, -3.2459421157836914, -3.068066120147705, -2.8901901245117188, -2.7123141288757324, -2.534438133239746, -2.356562376022339, -2.1786863803863525, -2.000810384750366, -1.8229343891143799, -1.6450583934783936, -1.4671825170516968, -1.2893065214157104, -1.1114305257797241, -0.9335546493530273, -0.755678653717041, -0.5778021812438965, -0.39992621541023254, -0.2220502495765686, -0.04417431354522705, 0.13370168209075928, 0.3115776777267456, 0.4894535541534424, 0.6673295497894287, 0.845205545425415, 1.0230815410614014, 1.2009575366973877, 1.3788334131240845, 1.5567094087600708, 1.7345854043960571, 1.912461280822754, 2.0903372764587402, 2.2682132720947266, 2.446089267730713, 2.623965263366699, 2.8018412590026855, 2.979717254638672, 3.157593250274658, 3.3354690074920654, 3.5133450031280518, 3.691220998764038, 3.8690969944000244, 4.046972751617432, 4.224848747253418, 4.402724742889404, 4.580600738525391, 4.758476734161377, 4.936352729797363, 5.11422872543335]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 3.0, 7.0, 4.0, 4.0, 5.0, 17.0, 11.0, 16.0, 21.0, 17.0, 45.0, 66.0, 136.0, 328.0, 951.0, 3260.0, 10754.0, 4126588.0, 37865.0, 9446.0, 2877.0, 956.0, 413.0, 188.0, 80.0, 56.0, 46.0, 27.0, 16.0, 19.0, 13.0, 8.0, 8.0, 10.0, 3.0, 5.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.10546875, -3.9580078125, -3.810546875, -3.6630859375, -3.515625, -3.3681640625, -3.220703125, -3.0732421875, -2.92578125, -2.7783203125, -2.630859375, -2.4833984375, -2.3359375, -2.1884765625, -2.041015625, -1.8935546875, -1.74609375, -1.5986328125, -1.451171875, -1.3037109375, -1.15625, -1.0087890625, -0.861328125, -0.7138671875, -0.56640625, -0.4189453125, -0.271484375, -0.1240234375, 0.0234375, 0.1708984375, 0.318359375, 0.4658203125, 0.61328125, 0.7607421875, 0.908203125, 1.0556640625, 1.203125, 1.3505859375, 1.498046875, 1.6455078125, 1.79296875, 1.9404296875, 2.087890625, 2.2353515625, 2.3828125, 2.5302734375, 2.677734375, 2.8251953125, 2.97265625, 3.1201171875, 3.267578125, 3.4150390625, 3.5625, 3.7099609375, 3.857421875, 4.0048828125, 4.15234375, 4.2998046875, 4.447265625, 4.5947265625, 4.7421875, 4.8896484375, 5.037109375, 5.1845703125, 5.33203125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 6.0, 3.0, 1.0, 3.0, 8.0, 4.0, 2.0, 4.0, 6.0, 2.0, 9.0, 10.0, 9.0, 8.0, 18.0, 92.0, 620.0, 80.0, 16.0, 10.0, 8.0, 6.0, 10.0, 12.0, 4.0, 6.0, 6.0, 7.0, 9.0, 6.0, 2.0, 3.0, 4.0, 0.0, 3.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.04425048828125, -0.04266023635864258, -0.041069984436035156, -0.039479732513427734, -0.03788948059082031, -0.03629922866821289, -0.03470897674560547, -0.03311872482299805, -0.031528472900390625, -0.029938220977783203, -0.02834796905517578, -0.02675771713256836, -0.025167465209960938, -0.023577213287353516, -0.021986961364746094, -0.020396709442138672, -0.01880645751953125, -0.017216205596923828, -0.015625953674316406, -0.014035701751708984, -0.012445449829101562, -0.01085519790649414, -0.009264945983886719, -0.007674694061279297, -0.006084442138671875, -0.004494190216064453, -0.0029039382934570312, -0.0013136863708496094, 0.0002765655517578125, 0.0018668174743652344, 0.0034570693969726562, 0.005047321319580078, 0.0066375732421875, 0.008227825164794922, 0.009818077087402344, 0.011408329010009766, 0.012998580932617188, 0.01458883285522461, 0.01617908477783203, 0.017769336700439453, 0.019359588623046875, 0.020949840545654297, 0.02254009246826172, 0.02413034439086914, 0.025720596313476562, 0.027310848236083984, 0.028901100158691406, 0.030491352081298828, 0.03208160400390625, 0.03367185592651367, 0.035262107849121094, 0.036852359771728516, 0.03844261169433594, 0.04003286361694336, 0.04162311553955078, 0.0432133674621582, 0.044803619384765625, 0.04639387130737305, 0.04798412322998047, 0.04957437515258789, 0.05116462707519531, 0.052754878997802734, 0.054345130920410156, 0.05593538284301758, 0.057525634765625]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 4.0, 15.0, 11.0, 24.0, 19.0, 30.0, 44.0, 70.0, 85.0, 110.0, 219.0, 383.0, 661.0, 1235.0, 2422.0, 4581.0, 9315.0, 21696.0, 59763.0, 3924615.0, 109276.0, 33105.0, 13297.0, 6198.0, 3287.0, 1619.0, 963.0, 431.0, 269.0, 174.0, 100.0, 63.0, 58.0, 49.0, 22.0, 18.0, 15.0, 9.0, 8.0, 5.0, 5.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.74267578125, -0.7206497192382812, -0.6986236572265625, -0.6765975952148438, -0.654571533203125, -0.6325454711914062, -0.6105194091796875, -0.5884933471679688, -0.56646728515625, -0.5444412231445312, -0.5224151611328125, -0.5003890991210938, -0.478363037109375, -0.45633697509765625, -0.4343109130859375, -0.41228485107421875, -0.3902587890625, -0.36823272705078125, -0.3462066650390625, -0.32418060302734375, -0.302154541015625, -0.28012847900390625, -0.2581024169921875, -0.23607635498046875, -0.21405029296875, -0.19202423095703125, -0.1699981689453125, -0.14797210693359375, -0.125946044921875, -0.10391998291015625, -0.0818939208984375, -0.05986785888671875, -0.037841796875, -0.01581573486328125, 0.0062103271484375, 0.02823638916015625, 0.050262451171875, 0.07228851318359375, 0.0943145751953125, 0.11634063720703125, 0.13836669921875, 0.16039276123046875, 0.1824188232421875, 0.20444488525390625, 0.226470947265625, 0.24849700927734375, 0.2705230712890625, 0.29254913330078125, 0.3145751953125, 0.33660125732421875, 0.3586273193359375, 0.38065338134765625, 0.402679443359375, 0.42470550537109375, 0.4467315673828125, 0.46875762939453125, 0.49078369140625, 0.5128097534179688, 0.5348358154296875, 0.5568618774414062, 0.578887939453125, 0.6009140014648438, 0.6229400634765625, 0.6449661254882812, 0.6669921875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 3.0, 3.0, 4.0, 5.0, 12.0, 12.0, 5.0, 8.0, 14.0, 11.0, 10.0, 14.0, 19.0, 21.0, 17.0, 18.0, 20.0, 20.0, 73.0, 3540.0, 22.0, 24.0, 28.0, 17.0, 21.0, 13.0, 15.0, 18.0, 16.0, 8.0, 14.0, 10.0, 5.0, 14.0, 4.0, 3.0, 5.0, 9.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08935546875, -0.08640289306640625, -0.0834503173828125, -0.08049774169921875, -0.077545166015625, -0.07459259033203125, -0.0716400146484375, -0.06868743896484375, -0.06573486328125, -0.06278228759765625, -0.0598297119140625, -0.05687713623046875, -0.053924560546875, -0.05097198486328125, -0.0480194091796875, -0.04506683349609375, -0.0421142578125, -0.03916168212890625, -0.0362091064453125, -0.03325653076171875, -0.030303955078125, -0.02735137939453125, -0.0243988037109375, -0.02144622802734375, -0.01849365234375, -0.01554107666015625, -0.0125885009765625, -0.00963592529296875, -0.006683349609375, -0.00373077392578125, -0.0007781982421875, 0.00217437744140625, 0.005126953125, 0.00807952880859375, 0.0110321044921875, 0.01398468017578125, 0.016937255859375, 0.01988983154296875, 0.0228424072265625, 0.02579498291015625, 0.02874755859375, 0.03170013427734375, 0.0346527099609375, 0.03760528564453125, 0.040557861328125, 0.04351043701171875, 0.0464630126953125, 0.04941558837890625, 0.0523681640625, 0.05532073974609375, 0.0582733154296875, 0.06122589111328125, 0.064178466796875, 0.06713104248046875, 0.0700836181640625, 0.07303619384765625, 0.07598876953125, 0.07894134521484375, 0.0818939208984375, 0.08484649658203125, 0.087799072265625, 0.09075164794921875, 0.0937042236328125, 0.09665679931640625, 0.099609375]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 3.0, 10.0, 18.0, 33.0, 123.0, 581.0, 164.0, 50.0, 20.0, 9.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6601210832595825, -0.6332012414932251, -0.6062813401222229, -0.5793614983558655, -0.5524415969848633, -0.5255217552185059, -0.49860185384750366, -0.47168201208114624, -0.44476211071014404, -0.41784223914146423, -0.3909223675727844, -0.3640024960041046, -0.3370826244354248, -0.3101627826690674, -0.2832428812980652, -0.25632303953170776, -0.22940316796302795, -0.20248329639434814, -0.17556342482566833, -0.14864355325698853, -0.12172368913888931, -0.0948038250207901, -0.06788395345211029, -0.04096408188343048, -0.014044210314750671, 0.012875659391283989, 0.03979552909731865, 0.06671539694070816, 0.09363526850938797, 0.12055513262748718, 0.147475004196167, 0.1743948757648468, 0.2013147473335266, 0.22823461890220642, 0.25515449047088623, 0.28207436203956604, 0.30899423360824585, 0.33591407537460327, 0.36283397674560547, 0.3897538185119629, 0.4166737198829651, 0.4435935914516449, 0.4705134630203247, 0.4974333345890045, 0.5243532061576843, 0.5512730479240417, 0.578192949295044, 0.6051127910614014, 0.6320326328277588, 0.6589524745941162, 0.6858723759651184, 0.7127922177314758, 0.739712119102478, 0.7666319608688354, 0.7935518622398376, 0.8204717040061951, 0.8473916053771973, 0.8743114471435547, 0.9012313485145569, 0.9281511902809143, 0.9550710916519165, 0.9819909334182739, 1.0089107751846313, 1.0358307361602783, 1.0627505779266357]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 3.0, 2.0, 6.0, 7.0, 9.0, 5.0, 14.0, 15.0, 11.0, 20.0, 34.0, 64.0, 67.0, 102.0, 126.0, 135.0, 107.0, 113.0, 57.0, 38.0, 22.0, 13.0, 5.0, 6.0, 3.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5732578039169312, -0.5556954145431519, -0.5381330847740173, -0.520570695400238, -0.5030083060264587, -0.48544594645500183, -0.4678835868835449, -0.4503211975097656, -0.4327588379383087, -0.4151964783668518, -0.3976340889930725, -0.3800717294216156, -0.3625093698501587, -0.3449469804763794, -0.3273846209049225, -0.3098222613334656, -0.2922598719596863, -0.27469751238822937, -0.2571351230144501, -0.23957276344299316, -0.22201038897037506, -0.20444801449775696, -0.18688565492630005, -0.16932328045368195, -0.15176090598106384, -0.13419853150844574, -0.11663616448640823, -0.09907379746437073, -0.08151142299175262, -0.06394904851913452, -0.046386681497097015, -0.02882431447505951, -0.011261940002441406, 0.006300430744886398, 0.023862801492214203, 0.04142517223954201, 0.05898754298686981, 0.07654991745948792, 0.09411228448152542, 0.11167465150356293, 0.12923702597618103, 0.14679940044879913, 0.16436177492141724, 0.18192413449287415, 0.19948650896549225, 0.21704888343811035, 0.23461124300956726, 0.25217360258102417, 0.26973599195480347, 0.2872983515262604, 0.3048607409000397, 0.3224231004714966, 0.3399854898452759, 0.3575478494167328, 0.3751102089881897, 0.392672598361969, 0.4102349579334259, 0.4277973175048828, 0.4453597068786621, 0.462922066450119, 0.4804844260215759, 0.4980468153953552, 0.5156092047691345, 0.533171534538269, 0.5507339239120483]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 4.0, 4.0, 9.0, 9.0, 19.0, 19.0, 25.0, 55.0, 71.0, 97.0, 160.0, 243.0, 414.0, 635.0, 1015.0, 1585.0, 2683.0, 4506.0, 7647.0, 13868.0, 28708.0, 777418.0, 158807.0, 22330.0, 11621.0, 6528.0, 3881.0, 2310.0, 1417.0, 901.0, 563.0, 345.0, 218.0, 130.0, 98.0, 73.0, 48.0, 26.0, 20.0, 14.0, 12.0, 9.0, 4.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2939453125, -0.28339385986328125, -0.2728424072265625, -0.26229095458984375, -0.251739501953125, -0.24118804931640625, -0.2306365966796875, -0.22008514404296875, -0.20953369140625, -0.19898223876953125, -0.1884307861328125, -0.17787933349609375, -0.167327880859375, -0.15677642822265625, -0.1462249755859375, -0.13567352294921875, -0.1251220703125, -0.11457061767578125, -0.1040191650390625, -0.09346771240234375, -0.082916259765625, -0.07236480712890625, -0.0618133544921875, -0.05126190185546875, -0.04071044921875, -0.03015899658203125, -0.0196075439453125, -0.00905609130859375, 0.001495361328125, 0.01204681396484375, 0.0225982666015625, 0.03314971923828125, 0.043701171875, 0.05425262451171875, 0.0648040771484375, 0.07535552978515625, 0.085906982421875, 0.09645843505859375, 0.1070098876953125, 0.11756134033203125, 0.12811279296875, 0.13866424560546875, 0.1492156982421875, 0.15976715087890625, 0.170318603515625, 0.18087005615234375, 0.1914215087890625, 0.20197296142578125, 0.2125244140625, 0.22307586669921875, 0.2336273193359375, 0.24417877197265625, 0.254730224609375, 0.26528167724609375, 0.2758331298828125, 0.28638458251953125, 0.29693603515625, 0.30748748779296875, 0.3180389404296875, 0.32859039306640625, 0.339141845703125, 0.34969329833984375, 0.3602447509765625, 0.37079620361328125, 0.38134765625]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 4.0, 5.0, 7.0, 7.0, 3.0, 4.0, 8.0, 5.0, 9.0, 7.0, 15.0, 34.0, 192.0, 420.0, 149.0, 32.0, 10.0, 5.0, 6.0, 9.0, 9.0, 9.0, 6.0, 8.0, 6.0, 7.0, 3.0, 2.0, 6.0, 2.0, 4.0, 1.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0390625, -0.03765439987182617, -0.036246299743652344, -0.034838199615478516, -0.03343009948730469, -0.03202199935913086, -0.03061389923095703, -0.029205799102783203, -0.027797698974609375, -0.026389598846435547, -0.02498149871826172, -0.02357339859008789, -0.022165298461914062, -0.020757198333740234, -0.019349098205566406, -0.017940998077392578, -0.01653289794921875, -0.015124797821044922, -0.013716697692871094, -0.012308597564697266, -0.010900497436523438, -0.00949239730834961, -0.008084297180175781, -0.006676197052001953, -0.005268096923828125, -0.003859996795654297, -0.0024518966674804688, -0.0010437965393066406, 0.0003643035888671875, 0.0017724037170410156, 0.0031805038452148438, 0.004588603973388672, 0.0059967041015625, 0.007404804229736328, 0.008812904357910156, 0.010221004486083984, 0.011629104614257812, 0.01303720474243164, 0.014445304870605469, 0.015853404998779297, 0.017261505126953125, 0.018669605255126953, 0.02007770538330078, 0.02148580551147461, 0.022893905639648438, 0.024302005767822266, 0.025710105895996094, 0.027118206024169922, 0.02852630615234375, 0.029934406280517578, 0.031342506408691406, 0.032750606536865234, 0.03415870666503906, 0.03556680679321289, 0.03697490692138672, 0.03838300704956055, 0.039791107177734375, 0.0411992073059082, 0.04260730743408203, 0.04401540756225586, 0.04542350769042969, 0.046831607818603516, 0.048239707946777344, 0.04964780807495117, 0.051055908203125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 7.0, 3.0, 5.0, 4.0, 3.0, 5.0, 8.0, 7.0, 12.0, 13.0, 28.0, 32.0, 32.0, 58.0, 92.0, 132.0, 180.0, 335.0, 726.0, 1972.0, 6400.0, 24358.0, 157110.0, 753302.0, 81788.0, 15159.0, 4027.0, 1369.0, 600.0, 275.0, 169.0, 81.0, 60.0, 57.0, 29.0, 22.0, 15.0, 15.0, 16.0, 12.0, 11.0, 9.0, 6.0, 9.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 5.0, 1.0, 0.0, 1.0], "bins": [-0.488037109375, -0.4734039306640625, -0.458770751953125, -0.4441375732421875, -0.42950439453125, -0.4148712158203125, -0.400238037109375, -0.3856048583984375, -0.3709716796875, -0.3563385009765625, -0.341705322265625, -0.3270721435546875, -0.31243896484375, -0.2978057861328125, -0.283172607421875, -0.2685394287109375, -0.25390625, -0.2392730712890625, -0.224639892578125, -0.2100067138671875, -0.19537353515625, -0.1807403564453125, -0.166107177734375, -0.1514739990234375, -0.1368408203125, -0.1222076416015625, -0.107574462890625, -0.0929412841796875, -0.07830810546875, -0.0636749267578125, -0.049041748046875, -0.0344085693359375, -0.019775390625, -0.0051422119140625, 0.009490966796875, 0.0241241455078125, 0.03875732421875, 0.0533905029296875, 0.068023681640625, 0.0826568603515625, 0.0972900390625, 0.1119232177734375, 0.126556396484375, 0.1411895751953125, 0.15582275390625, 0.1704559326171875, 0.185089111328125, 0.1997222900390625, 0.21435546875, 0.2289886474609375, 0.243621826171875, 0.2582550048828125, 0.27288818359375, 0.2875213623046875, 0.302154541015625, 0.3167877197265625, 0.3314208984375, 0.3460540771484375, 0.360687255859375, 0.3753204345703125, 0.38995361328125, 0.4045867919921875, 0.419219970703125, 0.4338531494140625, 0.448486328125]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 5.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 9.0, 6.0, 9.0, 11.0, 11.0, 16.0, 12.0, 15.0, 16.0, 19.0, 22.0, 24.0, 26.0, 31.0, 38.0, 47.0, 47.0, 32.0, 34.0, 38.0, 41.0, 52.0, 46.0, 47.0, 44.0, 29.0, 37.0, 27.0, 29.0, 35.0, 29.0, 19.0, 19.0, 18.0, 13.0, 10.0, 8.0, 8.0, 5.0, 3.0, 3.0, 5.0, 4.0, 7.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.137451171875, -0.1329669952392578, -0.12848281860351562, -0.12399864196777344, -0.11951446533203125, -0.11503028869628906, -0.11054611206054688, -0.10606193542480469, -0.1015777587890625, -0.09709358215332031, -0.09260940551757812, -0.08812522888183594, -0.08364105224609375, -0.07915687561035156, -0.07467269897460938, -0.07018852233886719, -0.065704345703125, -0.06122016906738281, -0.056735992431640625, -0.05225181579589844, -0.04776763916015625, -0.04328346252441406, -0.038799285888671875, -0.03431510925292969, -0.0298309326171875, -0.025346755981445312, -0.020862579345703125, -0.016378402709960938, -0.01189422607421875, -0.0074100494384765625, -0.002925872802734375, 0.0015583038330078125, 0.00604248046875, 0.010526657104492188, 0.015010833740234375, 0.019495010375976562, 0.02397918701171875, 0.028463363647460938, 0.032947540283203125, 0.03743171691894531, 0.0419158935546875, 0.04640007019042969, 0.050884246826171875, 0.05536842346191406, 0.05985260009765625, 0.06433677673339844, 0.06882095336914062, 0.07330513000488281, 0.077789306640625, 0.08227348327636719, 0.08675765991210938, 0.09124183654785156, 0.09572601318359375, 0.10021018981933594, 0.10469436645507812, 0.10917854309082031, 0.1136627197265625, 0.11814689636230469, 0.12263107299804688, 0.12711524963378906, 0.13159942626953125, 0.13608360290527344, 0.14056777954101562, 0.1450519561767578, 0.1495361328125]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 6.0, 6.0, 12.0, 8.0, 9.0, 18.0, 22.0, 53.0, 66.0, 110.0, 209.0, 542.0, 1620.0, 8033.0, 76418.0, 890377.0, 61603.0, 6971.0, 1458.0, 469.0, 233.0, 121.0, 72.0, 45.0, 21.0, 15.0, 17.0, 10.0, 9.0, 3.0, 2.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.013153076171875, -0.01273500919342041, -0.01231694221496582, -0.01189887523651123, -0.01148080825805664, -0.01106274127960205, -0.010644674301147461, -0.010226607322692871, -0.009808540344238281, -0.009390473365783691, -0.008972406387329102, -0.008554339408874512, -0.008136272430419922, -0.007718205451965332, -0.007300138473510742, -0.006882071495056152, -0.0064640045166015625, -0.006045937538146973, -0.005627870559692383, -0.005209803581237793, -0.004791736602783203, -0.004373669624328613, -0.0039556026458740234, -0.0035375356674194336, -0.0031194686889648438, -0.002701401710510254, -0.002283334732055664, -0.0018652677536010742, -0.0014472007751464844, -0.0010291337966918945, -0.0006110668182373047, -0.00019299983978271484, 0.000225067138671875, 0.0006431341171264648, 0.0010612010955810547, 0.0014792680740356445, 0.0018973350524902344, 0.0023154020309448242, 0.002733469009399414, 0.003151535987854004, 0.0035696029663085938, 0.003987669944763184, 0.0044057369232177734, 0.004823803901672363, 0.005241870880126953, 0.005659937858581543, 0.006078004837036133, 0.006496071815490723, 0.0069141387939453125, 0.007332205772399902, 0.007750272750854492, 0.008168339729309082, 0.008586406707763672, 0.009004473686218262, 0.009422540664672852, 0.009840607643127441, 0.010258674621582031, 0.010676741600036621, 0.011094808578491211, 0.0115128755569458, 0.01193094253540039, 0.01234900951385498, 0.01276707649230957, 0.01318514347076416, 0.01360321044921875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 6.0, 2.0, 2.0, 4.0, 4.0, 5.0, 18.0, 9.0, 29.0, 13.0, 22.0, 39.0, 43.0, 29.0, 67.0, 34.0, 40.0, 71.0, 47.0, 90.0, 36.0, 34.0, 61.0, 38.0, 38.0, 57.0, 31.0, 20.0, 40.0, 15.0, 15.0, 13.0, 6.0, 3.0, 4.0, 4.0, 9.0, 3.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.0994415283203125e-06, -3.018416464328766e-06, -2.9373914003372192e-06, -2.8563663363456726e-06, -2.775341272354126e-06, -2.6943162083625793e-06, -2.6132911443710327e-06, -2.532266080379486e-06, -2.4512410163879395e-06, -2.370215952396393e-06, -2.289190888404846e-06, -2.2081658244132996e-06, -2.127140760421753e-06, -2.0461156964302063e-06, -1.9650906324386597e-06, -1.884065568447113e-06, -1.8030405044555664e-06, -1.7220154404640198e-06, -1.6409903764724731e-06, -1.5599653124809265e-06, -1.4789402484893799e-06, -1.3979151844978333e-06, -1.3168901205062866e-06, -1.23586505651474e-06, -1.1548399925231934e-06, -1.0738149285316467e-06, -9.927898645401e-07, -9.117648005485535e-07, -8.307397365570068e-07, -7.497146725654602e-07, -6.686896085739136e-07, -5.876645445823669e-07, -5.066394805908203e-07, -4.256144165992737e-07, -3.4458935260772705e-07, -2.635642886161804e-07, -1.825392246246338e-07, -1.0151416063308716e-07, -2.0489096641540527e-08, 6.05359673500061e-08, 1.4156103134155273e-07, 2.2258609533309937e-07, 3.03611159324646e-07, 3.8463622331619263e-07, 4.6566128730773926e-07, 5.466863512992859e-07, 6.277114152908325e-07, 7.087364792823792e-07, 7.897615432739258e-07, 8.707866072654724e-07, 9.51811671257019e-07, 1.0328367352485657e-06, 1.1138617992401123e-06, 1.194886863231659e-06, 1.2759119272232056e-06, 1.3569369912147522e-06, 1.4379620552062988e-06, 1.5189871191978455e-06, 1.600012183189392e-06, 1.6810372471809387e-06, 1.7620623111724854e-06, 1.843087375164032e-06, 1.9241124391555786e-06, 2.0051375031471252e-06, 2.086162567138672e-06]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 7.0, 8.0, 10.0, 8.0, 16.0, 14.0, 28.0, 42.0, 84.0, 113.0, 283.0, 629.0, 2023.0, 7812.0, 60015.0, 902349.0, 63964.0, 8098.0, 1831.0, 597.0, 260.0, 151.0, 71.0, 46.0, 24.0, 17.0, 11.0, 10.0, 7.0, 4.0, 9.0, 1.0, 6.0, 2.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01280975341796875, -0.012375712394714355, -0.011941671371459961, -0.011507630348205566, -0.011073589324951172, -0.010639548301696777, -0.010205507278442383, -0.009771466255187988, -0.009337425231933594, -0.0089033842086792, -0.008469343185424805, -0.00803530216217041, -0.007601261138916016, -0.007167220115661621, -0.0067331790924072266, -0.006299138069152832, -0.0058650970458984375, -0.005431056022644043, -0.0049970149993896484, -0.004562973976135254, -0.004128932952880859, -0.003694891929626465, -0.0032608509063720703, -0.0028268098831176758, -0.0023927688598632812, -0.0019587278366088867, -0.0015246868133544922, -0.0010906457901000977, -0.0006566047668457031, -0.0002225637435913086, 0.00021147727966308594, 0.0006455183029174805, 0.001079559326171875, 0.0015136003494262695, 0.001947641372680664, 0.0023816823959350586, 0.002815723419189453, 0.0032497644424438477, 0.003683805465698242, 0.004117846488952637, 0.004551887512207031, 0.004985928535461426, 0.00541996955871582, 0.005854010581970215, 0.006288051605224609, 0.006722092628479004, 0.0071561336517333984, 0.007590174674987793, 0.008024215698242188, 0.008458256721496582, 0.008892297744750977, 0.009326338768005371, 0.009760379791259766, 0.01019442081451416, 0.010628461837768555, 0.01106250286102295, 0.011496543884277344, 0.011930584907531738, 0.012364625930786133, 0.012798666954040527, 0.013232707977294922, 0.013666749000549316, 0.014100790023803711, 0.014534831047058105, 0.0149688720703125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 2.0, 6.0, 2.0, 8.0, 4.0, 2.0, 8.0, 9.0, 12.0, 9.0, 21.0, 24.0, 34.0, 40.0, 53.0, 62.0, 72.0, 86.0, 86.0, 86.0, 70.0, 75.0, 59.0, 43.0, 33.0, 26.0, 15.0, 7.0, 4.0, 11.0, 10.0, 7.0, 8.0, 5.0, 1.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.004589080810546875, -0.0044558048248291016, -0.004322528839111328, -0.004189252853393555, -0.004055976867675781, -0.003922700881958008, -0.0037894248962402344, -0.003656148910522461, -0.0035228729248046875, -0.003389596939086914, -0.0032563209533691406, -0.003123044967651367, -0.0029897689819335938, -0.0028564929962158203, -0.002723217010498047, -0.0025899410247802734, -0.0024566650390625, -0.0023233890533447266, -0.002190113067626953, -0.0020568370819091797, -0.0019235610961914062, -0.0017902851104736328, -0.0016570091247558594, -0.001523733139038086, -0.0013904571533203125, -0.001257181167602539, -0.0011239051818847656, -0.0009906291961669922, -0.0008573532104492188, -0.0007240772247314453, -0.0005908012390136719, -0.00045752525329589844, -0.000324249267578125, -0.00019097328186035156, -5.7697296142578125e-05, 7.557868957519531e-05, 0.00020885467529296875, 0.0003421306610107422, 0.0004754066467285156, 0.0006086826324462891, 0.0007419586181640625, 0.0008752346038818359, 0.0010085105895996094, 0.0011417865753173828, 0.0012750625610351562, 0.0014083385467529297, 0.0015416145324707031, 0.0016748905181884766, 0.00180816650390625, 0.0019414424896240234, 0.002074718475341797, 0.0022079944610595703, 0.0023412704467773438, 0.002474546432495117, 0.0026078224182128906, 0.002741098403930664, 0.0028743743896484375, 0.003007650375366211, 0.0031409263610839844, 0.003274202346801758, 0.0034074783325195312, 0.0035407543182373047, 0.003674030303955078, 0.0038073062896728516, 0.003940582275390625]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 35.0, 817.0, 151.0, 15.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-18.07615089416504, -17.73394203186035, -17.391733169555664, -17.049524307250977, -16.70731544494629, -16.3651065826416, -16.022897720336914, -15.680688858032227, -15.338479995727539, -14.996271133422852, -14.654062271118164, -14.311853408813477, -13.969644546508789, -13.627435684204102, -13.285226821899414, -12.943017959594727, -12.600809097290039, -12.258600234985352, -11.916391372680664, -11.574182510375977, -11.231973648071289, -10.889764785766602, -10.547555923461914, -10.205347061157227, -9.863139152526855, -9.520930290222168, -9.17872142791748, -8.836512565612793, -8.494303703308105, -8.152094841003418, -7.8098859786987305, -7.467677116394043, -7.125467300415039, -6.783258438110352, -6.441049575805664, -6.098840713500977, -5.756631851196289, -5.414422988891602, -5.072214126586914, -4.730005264282227, -4.387796878814697, -4.04558801651001, -3.7033791542053223, -3.3611702919006348, -3.0189614295959473, -2.6767525672912598, -2.3345439434051514, -1.9923350811004639, -1.6501262187957764, -1.3079173564910889, -0.9657085537910461, -0.6234997510910034, -0.2812908887863159, 0.06091797351837158, 0.40312671661376953, 0.745335578918457, 1.0875444412231445, 1.429753303527832, 1.7719621658325195, 2.114171028137207, 2.4563798904418945, 2.798588752746582, 3.1407973766326904, 3.483006238937378, 3.8252151012420654]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 6.0, 7.0, 8.0, 19.0, 27.0, 22.0, 37.0, 40.0, 60.0, 77.0, 76.0, 86.0, 93.0, 80.0, 86.0, 54.0, 43.0, 36.0, 38.0, 19.0, 16.0, 14.0, 5.0, 9.0, 3.0, 4.0, 5.0, 4.0, 4.0, 5.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5708682537078857, -1.517352819442749, -1.4638375043869019, -1.4103220701217651, -1.3568066358566284, -1.3032913208007812, -1.2497758865356445, -1.1962604522705078, -1.142745018005371, -1.0892295837402344, -1.0357142686843872, -0.9821988344192505, -0.9286834001541138, -0.8751680254936218, -0.8216526508331299, -0.7681372165679932, -0.714621901512146, -0.661106526851654, -0.6075910925865173, -0.5540757179260254, -0.5005602836608887, -0.44704490900039673, -0.3935295343399048, -0.34001412987709045, -0.2864987254142761, -0.2329833209514618, -0.17946793138980865, -0.12595254182815552, -0.07243713736534119, -0.018921732902526855, 0.03459364175796509, 0.08810904622077942, 0.14162445068359375, 0.19513985514640808, 0.24865524470806122, 0.30217063426971436, 0.3556860387325287, 0.409201443195343, 0.46271681785583496, 0.5162322521209717, 0.5697476267814636, 0.6232630014419556, 0.6767784357070923, 0.7302938103675842, 0.7838091850280762, 0.8373246192932129, 0.8908399939537048, 0.9443553686141968, 0.9978708028793335, 1.0513862371444702, 1.1049015522003174, 1.158416986465454, 1.2119324207305908, 1.2654478549957275, 1.3189631700515747, 1.3724786043167114, 1.4259939193725586, 1.4795093536376953, 1.5330246686935425, 1.5865401029586792, 1.640055537223816, 1.693570852279663, 1.7470862865447998, 1.8006017208099365, 1.8541171550750732]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 7.0, 3.0, 5.0, 7.0, 5.0, 8.0, 5.0, 17.0, 32.0, 113.0, 518.0, 2111.0, 8966.0, 68425.0, 4082767.0, 22794.0, 5956.0, 1720.0, 500.0, 157.0, 67.0, 24.0, 15.0, 7.0, 10.0, 3.0, 4.0, 6.0, 5.0, 4.0, 5.0, 2.0, 4.0, 1.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.708984375, -1.64385986328125, -1.5787353515625, -1.51361083984375, -1.448486328125, -1.38336181640625, -1.3182373046875, -1.25311279296875, -1.18798828125, -1.12286376953125, -1.0577392578125, -0.99261474609375, -0.927490234375, -0.86236572265625, -0.7972412109375, -0.73211669921875, -0.6669921875, -0.60186767578125, -0.5367431640625, -0.47161865234375, -0.406494140625, -0.34136962890625, -0.2762451171875, -0.21112060546875, -0.14599609375, -0.08087158203125, -0.0157470703125, 0.04937744140625, 0.114501953125, 0.17962646484375, 0.2447509765625, 0.30987548828125, 0.375, 0.44012451171875, 0.5052490234375, 0.57037353515625, 0.635498046875, 0.70062255859375, 0.7657470703125, 0.83087158203125, 0.89599609375, 0.96112060546875, 1.0262451171875, 1.09136962890625, 1.156494140625, 1.22161865234375, 1.2867431640625, 1.35186767578125, 1.4169921875, 1.48211669921875, 1.5472412109375, 1.61236572265625, 1.677490234375, 1.74261474609375, 1.8077392578125, 1.87286376953125, 1.93798828125, 2.00311279296875, 2.0682373046875, 2.13336181640625, 2.198486328125, 2.26361083984375, 2.3287353515625, 2.39385986328125, 2.458984375]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 4.0, 1.0, 1.0, 3.0, 7.0, 3.0, 5.0, 7.0, 5.0, 8.0, 3.0, 11.0, 8.0, 11.0, 17.0, 34.0, 127.0, 259.0, 241.0, 112.0, 34.0, 16.0, 4.0, 9.0, 12.0, 4.0, 9.0, 5.0, 8.0, 3.0, 5.0, 6.0, 4.0, 4.0, 5.0, 2.0, 4.0, 1.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.029937744140625, -0.02879810333251953, -0.027658462524414062, -0.026518821716308594, -0.025379180908203125, -0.024239540100097656, -0.023099899291992188, -0.02196025848388672, -0.02082061767578125, -0.01968097686767578, -0.018541336059570312, -0.017401695251464844, -0.016262054443359375, -0.015122413635253906, -0.013982772827148438, -0.012843132019042969, -0.0117034912109375, -0.010563850402832031, -0.009424209594726562, -0.008284568786621094, -0.007144927978515625, -0.006005287170410156, -0.0048656463623046875, -0.0037260055541992188, -0.00258636474609375, -0.0014467239379882812, -0.0003070831298828125, 0.0008325576782226562, 0.001972198486328125, 0.0031118392944335938, 0.0042514801025390625, 0.005391120910644531, 0.00653076171875, 0.007670402526855469, 0.008810043334960938, 0.009949684143066406, 0.011089324951171875, 0.012228965759277344, 0.013368606567382812, 0.014508247375488281, 0.01564788818359375, 0.01678752899169922, 0.017927169799804688, 0.019066810607910156, 0.020206451416015625, 0.021346092224121094, 0.022485733032226562, 0.02362537384033203, 0.0247650146484375, 0.02590465545654297, 0.027044296264648438, 0.028183937072753906, 0.029323577880859375, 0.030463218688964844, 0.03160285949707031, 0.03274250030517578, 0.03388214111328125, 0.03502178192138672, 0.03616142272949219, 0.037301063537597656, 0.038440704345703125, 0.039580345153808594, 0.04071998596191406, 0.04185962677001953, 0.042999267578125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 2.0, 5.0, 4.0, 7.0, 9.0, 12.0, 12.0, 8.0, 13.0, 15.0, 19.0, 21.0, 20.0, 27.0, 25.0, 37.0, 64.0, 68.0, 108.0, 211.0, 880.0, 5455.0, 62341.0, 4052160.0, 65029.0, 6032.0, 977.0, 270.0, 91.0, 72.0, 45.0, 36.0, 48.0, 27.0, 22.0, 13.0, 15.0, 17.0, 11.0, 12.0, 6.0, 6.0, 3.0, 3.0, 3.0, 6.0, 4.0, 5.0, 3.0, 1.0, 3.0, 1.0], "bins": [-1.375, -1.336273193359375, -1.29754638671875, -1.258819580078125, -1.2200927734375, -1.181365966796875, -1.14263916015625, -1.103912353515625, -1.065185546875, -1.026458740234375, -0.98773193359375, -0.949005126953125, -0.9102783203125, -0.871551513671875, -0.83282470703125, -0.794097900390625, -0.75537109375, -0.716644287109375, -0.67791748046875, -0.639190673828125, -0.6004638671875, -0.561737060546875, -0.52301025390625, -0.484283447265625, -0.445556640625, -0.406829833984375, -0.36810302734375, -0.329376220703125, -0.2906494140625, -0.251922607421875, -0.21319580078125, -0.174468994140625, -0.1357421875, -0.097015380859375, -0.05828857421875, -0.019561767578125, 0.0191650390625, 0.057891845703125, 0.09661865234375, 0.135345458984375, 0.174072265625, 0.212799072265625, 0.25152587890625, 0.290252685546875, 0.3289794921875, 0.367706298828125, 0.40643310546875, 0.445159912109375, 0.48388671875, 0.522613525390625, 0.56134033203125, 0.600067138671875, 0.6387939453125, 0.677520751953125, 0.71624755859375, 0.754974365234375, 0.793701171875, 0.832427978515625, 0.87115478515625, 0.909881591796875, 0.9486083984375, 0.987335205078125, 1.02606201171875, 1.064788818359375, 1.103515625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 2.0, 7.0, 3.0, 4.0, 4.0, 2.0, 5.0, 7.0, 8.0, 9.0, 14.0, 16.0, 16.0, 12.0, 18.0, 26.0, 33.0, 25.0, 34.0, 40.0, 37.0, 51.0, 87.0, 79.0, 244.0, 2710.0, 106.0, 85.0, 50.0, 40.0, 36.0, 40.0, 34.0, 21.0, 13.0, 20.0, 16.0, 21.0, 13.0, 12.0, 10.0, 11.0, 11.0, 13.0, 6.0, 9.0, 3.0, 6.0, 0.0, 5.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.05694580078125, -0.05492877960205078, -0.05291175842285156, -0.050894737243652344, -0.048877716064453125, -0.046860694885253906, -0.04484367370605469, -0.04282665252685547, -0.04080963134765625, -0.03879261016845703, -0.03677558898925781, -0.034758567810058594, -0.032741546630859375, -0.030724525451660156, -0.028707504272460938, -0.02669048309326172, -0.0246734619140625, -0.02265644073486328, -0.020639419555664062, -0.018622398376464844, -0.016605377197265625, -0.014588356018066406, -0.012571334838867188, -0.010554313659667969, -0.00853729248046875, -0.006520271301269531, -0.0045032501220703125, -0.0024862289428710938, -0.000469207763671875, 0.0015478134155273438, 0.0035648345947265625, 0.005581855773925781, 0.007598876953125, 0.009615898132324219, 0.011632919311523438, 0.013649940490722656, 0.015666961669921875, 0.017683982849121094, 0.019701004028320312, 0.02171802520751953, 0.02373504638671875, 0.02575206756591797, 0.027769088745117188, 0.029786109924316406, 0.031803131103515625, 0.033820152282714844, 0.03583717346191406, 0.03785419464111328, 0.0398712158203125, 0.04188823699951172, 0.04390525817871094, 0.045922279357910156, 0.047939300537109375, 0.049956321716308594, 0.05197334289550781, 0.05399036407470703, 0.05600738525390625, 0.05802440643310547, 0.06004142761230469, 0.062058448791503906, 0.06407546997070312, 0.06609249114990234, 0.06810951232910156, 0.07012653350830078, 0.0721435546875]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 3.0, 3.0, 56.0, 759.0, 174.0, 17.0, 5.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2897108793258667, -0.22910049557685852, -0.16849011182785034, -0.10787972807884216, -0.047269344329833984, 0.013341039419174194, 0.07395142316818237, 0.13456180691719055, 0.19517219066619873, 0.2557825744152069, 0.3163929581642151, 0.37700334191322327, 0.43761372566223145, 0.4982241094112396, 0.5588344931602478, 0.6194448471069336, 0.6800552606582642, 0.7406656742095947, 0.8012760281562805, 0.8618863821029663, 0.9224967956542969, 0.9831072092056274, 1.043717622756958, 1.104327917098999, 1.1649383306503296, 1.2255487442016602, 1.2861590385437012, 1.3467694520950317, 1.4073798656463623, 1.4679902791976929, 1.5286006927490234, 1.5892109870910645, 1.6498212814331055, 1.710431694984436, 1.7710421085357666, 1.8316524028778076, 1.8922628164291382, 1.9528732299804688, 2.0134835243225098, 2.07409405708313, 2.134704351425171, 2.195314645767212, 2.255925178527832, 2.316535472869873, 2.377145767211914, 2.437756299972534, 2.498366594314575, 2.5589771270751953, 2.6195874214172363, 2.6801977157592773, 2.7408082485198975, 2.8014185428619385, 2.8620290756225586, 2.9226393699645996, 2.9832496643066406, 3.0438601970672607, 3.1044704914093018, 3.1650807857513428, 3.225691318511963, 3.286301612854004, 3.346911907196045, 3.407522439956665, 3.468132734298706, 3.528743267059326, 3.589353561401367]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 4.0, 1.0, 5.0, 3.0, 6.0, 7.0, 6.0, 15.0, 15.0, 19.0, 21.0, 21.0, 28.0, 29.0, 32.0, 44.0, 46.0, 72.0, 74.0, 66.0, 72.0, 69.0, 47.0, 64.0, 40.0, 40.0, 32.0, 23.0, 23.0, 17.0, 18.0, 7.0, 6.0, 6.0, 5.0, 3.0, 1.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.41621267795562744, -0.4036838412284851, -0.3911550045013428, -0.37862616777420044, -0.3660973310470581, -0.35356849431991577, -0.34103965759277344, -0.3285107910633087, -0.3159819543361664, -0.30345311760902405, -0.2909242808818817, -0.2783954441547394, -0.26586660742759705, -0.2533377408981323, -0.24080891907215118, -0.22828006744384766, -0.21575124561786652, -0.20322240889072418, -0.19069357216358185, -0.17816472053527832, -0.165635883808136, -0.15310704708099365, -0.14057821035385132, -0.12804937362670898, -0.11552052944898605, -0.10299169272184372, -0.09046284854412079, -0.07793401181697845, -0.06540517508983612, -0.05287633091211319, -0.040347494184970856, -0.027818650007247925, -0.01528981328010559, -0.002760973758995533, 0.009767865762114525, 0.022296704351902008, 0.03482554480433464, 0.04735438525676727, 0.05988322198390961, 0.07241206616163254, 0.08494090288877487, 0.0974697396159172, 0.10999858379364014, 0.12252742052078247, 0.1350562572479248, 0.14758509397506714, 0.16011393070220947, 0.172642782330513, 0.18517161905765533, 0.19770045578479767, 0.21022929251194, 0.22275814414024353, 0.23528698086738586, 0.2478158175945282, 0.26034465432167053, 0.27287349104881287, 0.2854023277759552, 0.29793116450309753, 0.31046000123023987, 0.3229888379573822, 0.33551767468452454, 0.34804654121398926, 0.3605753779411316, 0.3731042146682739, 0.38563305139541626]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 3.0, 0.0, 4.0, 5.0, 8.0, 7.0, 11.0, 21.0, 23.0, 27.0, 55.0, 66.0, 89.0, 116.0, 158.0, 251.0, 333.0, 503.0, 692.0, 1103.0, 1643.0, 2554.0, 4131.0, 6963.0, 13155.0, 31465.0, 142974.0, 706376.0, 84301.0, 23827.0, 11058.0, 6068.0, 3576.0, 2290.0, 1495.0, 997.0, 645.0, 465.0, 326.0, 212.0, 166.0, 105.0, 78.0, 70.0, 34.0, 32.0, 26.0, 19.0, 9.0, 7.0, 9.0, 4.0, 3.0, 2.0, 2.0, 5.0, 0.0, 1.0, 1.0], "bins": [-0.1937255859375, -0.18774032592773438, -0.18175506591796875, -0.17576980590820312, -0.1697845458984375, -0.16379928588867188, -0.15781402587890625, -0.15182876586914062, -0.145843505859375, -0.13985824584960938, -0.13387298583984375, -0.12788772583007812, -0.1219024658203125, -0.11591720581054688, -0.10993194580078125, -0.10394668579101562, -0.09796142578125, -0.09197616577148438, -0.08599090576171875, -0.08000564575195312, -0.0740203857421875, -0.06803512573242188, -0.06204986572265625, -0.056064605712890625, -0.050079345703125, -0.044094085693359375, -0.03810882568359375, -0.032123565673828125, -0.0261383056640625, -0.020153045654296875, -0.01416778564453125, -0.008182525634765625, -0.002197265625, 0.003787994384765625, 0.00977325439453125, 0.015758514404296875, 0.0217437744140625, 0.027729034423828125, 0.03371429443359375, 0.039699554443359375, 0.045684814453125, 0.051670074462890625, 0.05765533447265625, 0.06364059448242188, 0.0696258544921875, 0.07561111450195312, 0.08159637451171875, 0.08758163452148438, 0.09356689453125, 0.09955215454101562, 0.10553741455078125, 0.11152267456054688, 0.1175079345703125, 0.12349319458007812, 0.12947845458984375, 0.13546371459960938, 0.141448974609375, 0.14743423461914062, 0.15341949462890625, 0.15940475463867188, 0.1653900146484375, 0.17137527465820312, 0.17736053466796875, 0.18334579467773438, 0.1893310546875]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 4.0, 3.0, 3.0, 5.0, 4.0, 10.0, 7.0, 11.0, 10.0, 16.0, 39.0, 52.0, 108.0, 185.0, 174.0, 143.0, 74.0, 45.0, 26.0, 12.0, 11.0, 11.0, 10.0, 6.0, 8.0, 1.0, 7.0, 6.0, 1.0, 2.0, 4.0, 2.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03192138671875, -0.030937910079956055, -0.02995443344116211, -0.028970956802368164, -0.02798748016357422, -0.027004003524780273, -0.026020526885986328, -0.025037050247192383, -0.024053573608398438, -0.023070096969604492, -0.022086620330810547, -0.0211031436920166, -0.020119667053222656, -0.01913619041442871, -0.018152713775634766, -0.01716923713684082, -0.016185760498046875, -0.01520228385925293, -0.014218807220458984, -0.013235330581665039, -0.012251853942871094, -0.011268377304077148, -0.010284900665283203, -0.009301424026489258, -0.008317947387695312, -0.007334470748901367, -0.006350994110107422, -0.0053675174713134766, -0.004384040832519531, -0.003400564193725586, -0.0024170875549316406, -0.0014336109161376953, -0.00045013427734375, 0.0005333423614501953, 0.0015168190002441406, 0.002500295639038086, 0.0034837722778320312, 0.0044672489166259766, 0.005450725555419922, 0.006434202194213867, 0.0074176788330078125, 0.008401155471801758, 0.009384632110595703, 0.010368108749389648, 0.011351585388183594, 0.012335062026977539, 0.013318538665771484, 0.01430201530456543, 0.015285491943359375, 0.01626896858215332, 0.017252445220947266, 0.01823592185974121, 0.019219398498535156, 0.0202028751373291, 0.021186351776123047, 0.022169828414916992, 0.023153305053710938, 0.024136781692504883, 0.025120258331298828, 0.026103734970092773, 0.02708721160888672, 0.028070688247680664, 0.02905416488647461, 0.030037641525268555, 0.0310211181640625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 4.0, 3.0, 3.0, 3.0, 5.0, 5.0, 8.0, 8.0, 14.0, 6.0, 10.0, 16.0, 21.0, 16.0, 34.0, 37.0, 52.0, 59.0, 107.0, 185.0, 352.0, 878.0, 2403.0, 9780.0, 81215.0, 832930.0, 103281.0, 12090.0, 2966.0, 977.0, 433.0, 214.0, 114.0, 80.0, 61.0, 40.0, 38.0, 16.0, 24.0, 16.0, 14.0, 10.0, 3.0, 3.0, 9.0, 6.0, 2.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.305419921875, -0.2960357666015625, -0.286651611328125, -0.2772674560546875, -0.26788330078125, -0.2584991455078125, -0.249114990234375, -0.2397308349609375, -0.2303466796875, -0.2209625244140625, -0.211578369140625, -0.2021942138671875, -0.19281005859375, -0.1834259033203125, -0.174041748046875, -0.1646575927734375, -0.1552734375, -0.1458892822265625, -0.136505126953125, -0.1271209716796875, -0.11773681640625, -0.1083526611328125, -0.098968505859375, -0.0895843505859375, -0.0802001953125, -0.0708160400390625, -0.061431884765625, -0.0520477294921875, -0.04266357421875, -0.0332794189453125, -0.023895263671875, -0.0145111083984375, -0.005126953125, 0.0042572021484375, 0.013641357421875, 0.0230255126953125, 0.03240966796875, 0.0417938232421875, 0.051177978515625, 0.0605621337890625, 0.0699462890625, 0.0793304443359375, 0.088714599609375, 0.0980987548828125, 0.10748291015625, 0.1168670654296875, 0.126251220703125, 0.1356353759765625, 0.14501953125, 0.1544036865234375, 0.163787841796875, 0.1731719970703125, 0.18255615234375, 0.1919403076171875, 0.201324462890625, 0.2107086181640625, 0.2200927734375, 0.2294769287109375, 0.238861083984375, 0.2482452392578125, 0.25762939453125, 0.2670135498046875, 0.276397705078125, 0.2857818603515625, 0.295166015625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 5.0, 2.0, 2.0, 7.0, 8.0, 3.0, 4.0, 11.0, 12.0, 14.0, 21.0, 11.0, 30.0, 27.0, 33.0, 33.0, 41.0, 37.0, 36.0, 33.0, 33.0, 31.0, 40.0, 43.0, 41.0, 53.0, 37.0, 44.0, 36.0, 27.0, 30.0, 30.0, 30.0, 20.0, 28.0, 12.0, 17.0, 15.0, 10.0, 6.0, 15.0, 8.0, 9.0, 3.0, 6.0, 2.0, 3.0, 3.0, 4.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0799560546875, -0.07741069793701172, -0.07486534118652344, -0.07231998443603516, -0.06977462768554688, -0.0672292709350586, -0.06468391418457031, -0.06213855743408203, -0.05959320068359375, -0.05704784393310547, -0.05450248718261719, -0.051957130432128906, -0.049411773681640625, -0.046866416931152344, -0.04432106018066406, -0.04177570343017578, -0.0392303466796875, -0.03668498992919922, -0.03413963317871094, -0.031594276428222656, -0.029048919677734375, -0.026503562927246094, -0.023958206176757812, -0.02141284942626953, -0.01886749267578125, -0.01632213592529297, -0.013776779174804688, -0.011231422424316406, -0.008686065673828125, -0.006140708923339844, -0.0035953521728515625, -0.0010499954223632812, 0.001495361328125, 0.004040718078613281, 0.0065860748291015625, 0.009131431579589844, 0.011676788330078125, 0.014222145080566406, 0.016767501831054688, 0.01931285858154297, 0.02185821533203125, 0.02440357208251953, 0.026948928833007812, 0.029494285583496094, 0.032039642333984375, 0.034584999084472656, 0.03713035583496094, 0.03967571258544922, 0.0422210693359375, 0.04476642608642578, 0.04731178283691406, 0.049857139587402344, 0.052402496337890625, 0.054947853088378906, 0.05749320983886719, 0.06003856658935547, 0.06258392333984375, 0.06512928009033203, 0.06767463684082031, 0.0702199935913086, 0.07276535034179688, 0.07531070709228516, 0.07785606384277344, 0.08040142059326172, 0.08294677734375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 6.0, 5.0, 6.0, 9.0, 6.0, 14.0, 26.0, 25.0, 42.0, 62.0, 110.0, 243.0, 516.0, 1292.0, 5041.0, 65805.0, 948373.0, 21890.0, 3251.0, 970.0, 386.0, 189.0, 98.0, 68.0, 47.0, 34.0, 12.0, 6.0, 8.0, 4.0, 6.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.06805419921875, -0.06623172760009766, -0.06440925598144531, -0.06258678436279297, -0.060764312744140625, -0.05894184112548828, -0.05711936950683594, -0.055296897888183594, -0.05347442626953125, -0.051651954650878906, -0.04982948303222656, -0.04800701141357422, -0.046184539794921875, -0.04436206817626953, -0.04253959655761719, -0.040717124938964844, -0.0388946533203125, -0.037072181701660156, -0.03524971008300781, -0.03342723846435547, -0.031604766845703125, -0.02978229522705078, -0.027959823608398438, -0.026137351989746094, -0.02431488037109375, -0.022492408752441406, -0.020669937133789062, -0.01884746551513672, -0.017024993896484375, -0.015202522277832031, -0.013380050659179688, -0.011557579040527344, -0.009735107421875, -0.007912635803222656, -0.0060901641845703125, -0.004267692565917969, -0.002445220947265625, -0.0006227493286132812, 0.0011997222900390625, 0.0030221939086914062, 0.00484466552734375, 0.006667137145996094, 0.008489608764648438, 0.010312080383300781, 0.012134552001953125, 0.013957023620605469, 0.015779495239257812, 0.017601966857910156, 0.0194244384765625, 0.021246910095214844, 0.023069381713867188, 0.02489185333251953, 0.026714324951171875, 0.02853679656982422, 0.030359268188476562, 0.032181739807128906, 0.03400421142578125, 0.035826683044433594, 0.03764915466308594, 0.03947162628173828, 0.041294097900390625, 0.04311656951904297, 0.04493904113769531, 0.046761512756347656, 0.048583984375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 0.0, 3.0, 4.0, 0.0, 3.0, 2.0, 2.0, 5.0, 6.0, 13.0, 12.0, 18.0, 30.0, 29.0, 33.0, 52.0, 37.0, 73.0, 59.0, 52.0, 92.0, 42.0, 57.0, 67.0, 40.0, 71.0, 44.0, 42.0, 35.0, 18.0, 18.0, 12.0, 6.0, 7.0, 7.0, 7.0, 4.0, 3.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.424022674560547e-06, -5.282461643218994e-06, -5.140900611877441e-06, -4.999339580535889e-06, -4.857778549194336e-06, -4.716217517852783e-06, -4.5746564865112305e-06, -4.433095455169678e-06, -4.291534423828125e-06, -4.149973392486572e-06, -4.0084123611450195e-06, -3.866851329803467e-06, -3.725290298461914e-06, -3.5837292671203613e-06, -3.4421682357788086e-06, -3.300607204437256e-06, -3.159046173095703e-06, -3.0174851417541504e-06, -2.8759241104125977e-06, -2.734363079071045e-06, -2.592802047729492e-06, -2.4512410163879395e-06, -2.3096799850463867e-06, -2.168118953704834e-06, -2.0265579223632812e-06, -1.8849968910217285e-06, -1.7434358596801758e-06, -1.601874828338623e-06, -1.4603137969970703e-06, -1.3187527656555176e-06, -1.1771917343139648e-06, -1.0356307029724121e-06, -8.940696716308594e-07, -7.525086402893066e-07, -6.109476089477539e-07, -4.6938657760620117e-07, -3.2782554626464844e-07, -1.862645149230957e-07, -4.470348358154297e-08, 9.685754776000977e-08, 2.384185791015625e-07, 3.7997961044311523e-07, 5.21540641784668e-07, 6.631016731262207e-07, 8.046627044677734e-07, 9.462237358093262e-07, 1.087784767150879e-06, 1.2293457984924316e-06, 1.3709068298339844e-06, 1.5124678611755371e-06, 1.6540288925170898e-06, 1.7955899238586426e-06, 1.9371509552001953e-06, 2.078711986541748e-06, 2.2202730178833008e-06, 2.3618340492248535e-06, 2.5033950805664062e-06, 2.644956111907959e-06, 2.7865171432495117e-06, 2.9280781745910645e-06, 3.069639205932617e-06, 3.21120023727417e-06, 3.3527612686157227e-06, 3.4943222999572754e-06, 3.635883331298828e-06]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 8.0, 5.0, 8.0, 10.0, 12.0, 25.0, 41.0, 83.0, 154.0, 420.0, 1291.0, 7929.0, 732720.0, 296898.0, 6893.0, 1277.0, 389.0, 172.0, 78.0, 53.0, 24.0, 16.0, 9.0, 6.0, 5.0, 10.0, 3.0, 4.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0806884765625, -0.07808208465576172, -0.07547569274902344, -0.07286930084228516, -0.07026290893554688, -0.0676565170288086, -0.06505012512207031, -0.06244373321533203, -0.05983734130859375, -0.05723094940185547, -0.05462455749511719, -0.052018165588378906, -0.049411773681640625, -0.046805381774902344, -0.04419898986816406, -0.04159259796142578, -0.0389862060546875, -0.03637981414794922, -0.03377342224121094, -0.031167030334472656, -0.028560638427734375, -0.025954246520996094, -0.023347854614257812, -0.02074146270751953, -0.01813507080078125, -0.015528678894042969, -0.012922286987304688, -0.010315895080566406, -0.007709503173828125, -0.005103111267089844, -0.0024967193603515625, 0.00010967254638671875, 0.002716064453125, 0.005322456359863281, 0.007928848266601562, 0.010535240173339844, 0.013141632080078125, 0.015748023986816406, 0.018354415893554688, 0.02096080780029297, 0.02356719970703125, 0.02617359161376953, 0.028779983520507812, 0.031386375427246094, 0.033992767333984375, 0.036599159240722656, 0.03920555114746094, 0.04181194305419922, 0.0444183349609375, 0.04702472686767578, 0.04963111877441406, 0.052237510681152344, 0.054843902587890625, 0.057450294494628906, 0.06005668640136719, 0.06266307830810547, 0.06526947021484375, 0.06787586212158203, 0.07048225402832031, 0.0730886459350586, 0.07569503784179688, 0.07830142974853516, 0.08090782165527344, 0.08351421356201172, 0.08612060546875]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 8.0, 3.0, 5.0, 4.0, 9.0, 12.0, 24.0, 22.0, 35.0, 58.0, 75.0, 102.0, 150.0, 143.0, 103.0, 76.0, 49.0, 44.0, 15.0, 8.0, 14.0, 4.0, 8.0, 4.0, 4.0, 7.0, 5.0, 3.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0233154296875, -0.022609949111938477, -0.021904468536376953, -0.02119898796081543, -0.020493507385253906, -0.019788026809692383, -0.01908254623413086, -0.018377065658569336, -0.017671585083007812, -0.01696610450744629, -0.016260623931884766, -0.015555143356323242, -0.014849662780761719, -0.014144182205200195, -0.013438701629638672, -0.012733221054077148, -0.012027740478515625, -0.011322259902954102, -0.010616779327392578, -0.009911298751831055, -0.009205818176269531, -0.008500337600708008, -0.007794857025146484, -0.007089376449584961, -0.0063838958740234375, -0.005678415298461914, -0.004972934722900391, -0.004267454147338867, -0.0035619735717773438, -0.0028564929962158203, -0.002151012420654297, -0.0014455318450927734, -0.00074005126953125, -3.457069396972656e-05, 0.0006709098815917969, 0.0013763904571533203, 0.0020818710327148438, 0.002787351608276367, 0.0034928321838378906, 0.004198312759399414, 0.0049037933349609375, 0.005609273910522461, 0.006314754486083984, 0.007020235061645508, 0.007725715637207031, 0.008431196212768555, 0.009136676788330078, 0.009842157363891602, 0.010547637939453125, 0.011253118515014648, 0.011958599090576172, 0.012664079666137695, 0.013369560241699219, 0.014075040817260742, 0.014780521392822266, 0.015486001968383789, 0.016191482543945312, 0.016896963119506836, 0.01760244369506836, 0.018307924270629883, 0.019013404846191406, 0.01971888542175293, 0.020424365997314453, 0.021129846572875977, 0.0218353271484375]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 9.0, 16.0, 49.0, 181.0, 578.0, 123.0, 26.0, 14.0, 8.0, 3.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.619896411895752, -2.5609188079833984, -2.501941442489624, -2.4429638385772705, -2.383986234664917, -2.3250088691711426, -2.266031265258789, -2.2070536613464355, -2.148076295852661, -2.0890986919403076, -2.030121326446533, -1.9711437225341797, -1.9121662378311157, -1.8531886339187622, -1.7942111492156982, -1.7352335453033447, -1.6762559413909912, -1.6172784566879272, -1.5583008527755737, -1.4993233680725098, -1.4403458833694458, -1.3813682794570923, -1.3223907947540283, -1.2634131908416748, -1.2044358253479004, -1.1454583406448364, -1.086480736732483, -1.027503252029419, -0.9685257077217102, -0.9095481634140015, -0.8505706787109375, -0.7915931344032288, -0.73261559009552, -0.6736380457878113, -0.6146605610847473, -0.5556830167770386, -0.49670547246932983, -0.4377279579639435, -0.37875044345855713, -0.3197728991508484, -0.26079538464546204, -0.2018178552389145, -0.14284032583236694, -0.08386281132698059, -0.024885281920433044, 0.0340922474861145, 0.09306976199150085, 0.1520473062992096, 0.21102482080459595, 0.2700023353099823, 0.32897987961769104, 0.3879573941230774, 0.44693493843078613, 0.5059124231338501, 0.5648899674415588, 0.6238675117492676, 0.6828449964523315, 0.7418225407600403, 0.8008000254631042, 0.859777569770813, 0.9187551140785217, 0.9777326583862305, 1.0367101430892944, 1.0956876277923584, 1.154665231704712]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 2.0, 5.0, 6.0, 10.0, 16.0, 9.0, 16.0, 24.0, 28.0, 26.0, 22.0, 43.0, 37.0, 41.0, 55.0, 48.0, 55.0, 65.0, 56.0, 48.0, 69.0, 49.0, 53.0, 31.0, 34.0, 35.0, 29.0, 24.0, 22.0, 6.0, 12.0, 7.0, 5.0, 7.0, 6.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6940292119979858, -0.6709720492362976, -0.6479148864746094, -0.6248577833175659, -0.6018006205558777, -0.5787434577941895, -0.5556862950325012, -0.532629132270813, -0.5095720291137695, -0.4865148663520813, -0.46345773339271545, -0.4404005706310272, -0.4173434376716614, -0.39428627490997314, -0.3712291121482849, -0.34817197918891907, -0.32511481642723083, -0.3020576536655426, -0.27900052070617676, -0.2559433579444885, -0.23288622498512268, -0.20982906222343445, -0.1867719143629074, -0.16371476650238037, -0.14065761864185333, -0.1176004707813263, -0.09454332292079926, -0.07148616760969162, -0.04842901974916458, -0.025371871888637543, -0.0023147165775299072, 0.02074243128299713, 0.04379957914352417, 0.06685672700405121, 0.08991387486457825, 0.11297103017568588, 0.13602817058563232, 0.15908533334732056, 0.1821424812078476, 0.20519962906837463, 0.22825677692890167, 0.2513139247894287, 0.27437108755111694, 0.2974282205104828, 0.320485383272171, 0.34354251623153687, 0.3665996789932251, 0.38965684175491333, 0.4127139747142792, 0.4357711374759674, 0.45882827043533325, 0.4818854331970215, 0.5049425959587097, 0.5279996991157532, 0.5510568618774414, 0.5741140246391296, 0.5971711874008179, 0.6202283501625061, 0.6432855129241943, 0.6663426160812378, 0.689399778842926, 0.7124569416046143, 0.7355141043663025, 0.7585712671279907, 0.7816283702850342]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 5.0, 3.0, 1.0, 2.0, 2.0, 5.0, 7.0, 16.0, 20.0, 29.0, 57.0, 123.0, 216.0, 512.0, 1413.0, 5098.0, 52078.0, 4110833.0, 17670.0, 3899.0, 1332.0, 473.0, 230.0, 103.0, 47.0, 41.0, 17.0, 16.0, 9.0, 6.0, 4.0, 2.0, 6.0, 9.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.767578125, -0.745208740234375, -0.72283935546875, -0.700469970703125, -0.6781005859375, -0.655731201171875, -0.63336181640625, -0.610992431640625, -0.588623046875, -0.566253662109375, -0.54388427734375, -0.521514892578125, -0.4991455078125, -0.476776123046875, -0.45440673828125, -0.432037353515625, -0.40966796875, -0.387298583984375, -0.36492919921875, -0.342559814453125, -0.3201904296875, -0.297821044921875, -0.27545166015625, -0.253082275390625, -0.230712890625, -0.208343505859375, -0.18597412109375, -0.163604736328125, -0.1412353515625, -0.118865966796875, -0.09649658203125, -0.074127197265625, -0.0517578125, -0.029388427734375, -0.00701904296875, 0.015350341796875, 0.0377197265625, 0.060089111328125, 0.08245849609375, 0.104827880859375, 0.127197265625, 0.149566650390625, 0.17193603515625, 0.194305419921875, 0.2166748046875, 0.239044189453125, 0.26141357421875, 0.283782958984375, 0.30615234375, 0.328521728515625, 0.35089111328125, 0.373260498046875, 0.3956298828125, 0.417999267578125, 0.44036865234375, 0.462738037109375, 0.485107421875, 0.507476806640625, 0.52984619140625, 0.552215576171875, 0.5745849609375, 0.596954345703125, 0.61932373046875, 0.641693115234375, 0.6640625]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 7.0, 15.0, 16.0, 19.0, 23.0, 60.0, 69.0, 113.0, 134.0, 147.0, 126.0, 76.0, 61.0, 24.0, 20.0, 13.0, 11.0, 10.0, 3.0, 9.0, 8.0, 3.0, 2.0, 2.0, 6.0, 7.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.03155517578125, -0.030636310577392578, -0.029717445373535156, -0.028798580169677734, -0.027879714965820312, -0.02696084976196289, -0.02604198455810547, -0.025123119354248047, -0.024204254150390625, -0.023285388946533203, -0.02236652374267578, -0.02144765853881836, -0.020528793334960938, -0.019609928131103516, -0.018691062927246094, -0.017772197723388672, -0.01685333251953125, -0.015934467315673828, -0.015015602111816406, -0.014096736907958984, -0.013177871704101562, -0.01225900650024414, -0.011340141296386719, -0.010421276092529297, -0.009502410888671875, -0.008583545684814453, -0.007664680480957031, -0.006745815277099609, -0.0058269500732421875, -0.004908084869384766, -0.003989219665527344, -0.003070354461669922, -0.0021514892578125, -0.0012326240539550781, -0.00031375885009765625, 0.0006051063537597656, 0.0015239715576171875, 0.0024428367614746094, 0.0033617019653320312, 0.004280567169189453, 0.005199432373046875, 0.006118297576904297, 0.007037162780761719, 0.00795602798461914, 0.008874893188476562, 0.009793758392333984, 0.010712623596191406, 0.011631488800048828, 0.01255035400390625, 0.013469219207763672, 0.014388084411621094, 0.015306949615478516, 0.016225814819335938, 0.01714468002319336, 0.01806354522705078, 0.018982410430908203, 0.019901275634765625, 0.020820140838623047, 0.02173900604248047, 0.02265787124633789, 0.023576736450195312, 0.024495601654052734, 0.025414466857910156, 0.026333332061767578, 0.027252197265625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 7.0, 5.0, 11.0, 7.0, 9.0, 16.0, 30.0, 41.0, 78.0, 154.0, 419.0, 1371.0, 7010.0, 59670.0, 4082184.0, 36486.0, 4917.0, 1083.0, 365.0, 170.0, 98.0, 52.0, 24.0, 18.0, 12.0, 18.0, 7.0, 4.0, 9.0, 2.0, 0.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.6279296875, -0.6127662658691406, -0.5976028442382812, -0.5824394226074219, -0.5672760009765625, -0.5521125793457031, -0.5369491577148438, -0.5217857360839844, -0.506622314453125, -0.4914588928222656, -0.47629547119140625, -0.4611320495605469, -0.4459686279296875, -0.4308052062988281, -0.41564178466796875, -0.4004783630371094, -0.38531494140625, -0.3701515197753906, -0.35498809814453125, -0.3398246765136719, -0.3246612548828125, -0.3094978332519531, -0.29433441162109375, -0.2791709899902344, -0.264007568359375, -0.24884414672851562, -0.23368072509765625, -0.21851730346679688, -0.2033538818359375, -0.18819046020507812, -0.17302703857421875, -0.15786361694335938, -0.1427001953125, -0.12753677368164062, -0.11237335205078125, -0.09720993041992188, -0.0820465087890625, -0.06688308715820312, -0.05171966552734375, -0.036556243896484375, -0.021392822265625, -0.006229400634765625, 0.00893402099609375, 0.024097442626953125, 0.0392608642578125, 0.054424285888671875, 0.06958770751953125, 0.08475112915039062, 0.09991455078125, 0.11507797241210938, 0.13024139404296875, 0.14540481567382812, 0.1605682373046875, 0.17573165893554688, 0.19089508056640625, 0.20605850219726562, 0.221221923828125, 0.23638534545898438, 0.25154876708984375, 0.2667121887207031, 0.2818756103515625, 0.2970390319824219, 0.31220245361328125, 0.3273658752441406, 0.342529296875]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 0.0, 3.0, 7.0, 3.0, 6.0, 14.0, 7.0, 7.0, 10.0, 16.0, 18.0, 26.0, 26.0, 45.0, 64.0, 140.0, 3264.0, 164.0, 71.0, 37.0, 39.0, 12.0, 19.0, 17.0, 14.0, 10.0, 6.0, 6.0, 10.0, 4.0, 7.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0452880859375, -0.04328346252441406, -0.041278839111328125, -0.03927421569824219, -0.03726959228515625, -0.03526496887207031, -0.033260345458984375, -0.03125572204589844, -0.0292510986328125, -0.027246475219726562, -0.025241851806640625, -0.023237228393554688, -0.02123260498046875, -0.019227981567382812, -0.017223358154296875, -0.015218734741210938, -0.013214111328125, -0.011209487915039062, -0.009204864501953125, -0.0072002410888671875, -0.00519561767578125, -0.0031909942626953125, -0.001186370849609375, 0.0008182525634765625, 0.0028228759765625, 0.0048274993896484375, 0.006832122802734375, 0.008836746215820312, 0.01084136962890625, 0.012845993041992188, 0.014850616455078125, 0.016855239868164062, 0.01885986328125, 0.020864486694335938, 0.022869110107421875, 0.024873733520507812, 0.02687835693359375, 0.028882980346679688, 0.030887603759765625, 0.03289222717285156, 0.0348968505859375, 0.03690147399902344, 0.038906097412109375, 0.04091072082519531, 0.04291534423828125, 0.04491996765136719, 0.046924591064453125, 0.04892921447753906, 0.050933837890625, 0.05293846130371094, 0.054943084716796875, 0.05694770812988281, 0.05895233154296875, 0.06095695495605469, 0.06296157836914062, 0.06496620178222656, 0.0669708251953125, 0.06897544860839844, 0.07098007202148438, 0.07298469543457031, 0.07498931884765625, 0.07699394226074219, 0.07899856567382812, 0.08100318908691406, 0.0830078125]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 5.0, 10.0, 237.0, 726.0, 31.0, 9.0, 2.0], "bins": [-2.0777153968811035, -2.0431671142578125, -2.0086190700531006, -1.9740707874298096, -1.939522624015808, -1.904974341392517, -1.8704261779785156, -1.8358780145645142, -1.8013297319412231, -1.7667815685272217, -1.7322332859039307, -1.6976851224899292, -1.6631369590759277, -1.6285886764526367, -1.5940405130386353, -1.5594923496246338, -1.5249440670013428, -1.4903959035873413, -1.4558476209640503, -1.4212994575500488, -1.3867512941360474, -1.3522030115127563, -1.3176548480987549, -1.2831066846847534, -1.248558521270752, -1.2140103578567505, -1.1794620752334595, -1.144913911819458, -1.1103657484054565, -1.0758174657821655, -1.041269302368164, -1.0067211389541626, -0.9721728563308716, -0.9376246333122253, -0.9030764698982239, -0.8685282468795776, -0.8339800238609314, -0.7994318008422852, -0.7648836374282837, -0.7303354144096375, -0.6957871913909912, -0.661238968372345, -0.6266908049583435, -0.5921425819396973, -0.557594358921051, -0.5230461359024048, -0.4884979724884033, -0.4539497494697571, -0.4194015860557556, -0.38485339283943176, -0.3503051698207855, -0.31575697660446167, -0.28120875358581543, -0.24666056036949158, -0.21211236715316772, -0.17756415903568268, -0.14301595091819763, -0.10846774280071259, -0.07391954213380814, -0.039371341466903687, -0.00482313334941864, 0.029725074768066406, 0.06427326798439026, 0.0988214761018753, 0.13336968421936035]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 8.0, 4.0, 10.0, 13.0, 13.0, 9.0, 17.0, 20.0, 30.0, 34.0, 40.0, 50.0, 59.0, 53.0, 68.0, 61.0, 41.0, 68.0, 49.0, 70.0, 48.0, 38.0, 33.0, 32.0, 22.0, 22.0, 17.0, 18.0, 6.0, 12.0, 8.0, 5.0, 5.0, 4.0, 4.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.177884042263031, -0.17176219820976257, -0.16564033925533295, -0.15951849520206451, -0.1533966362476349, -0.14727479219436646, -0.14115294814109802, -0.1350311040878296, -0.12890924513339996, -0.12278739362955093, -0.1166655421257019, -0.11054369807243347, -0.10442184656858444, -0.09829999506473541, -0.09217815101146698, -0.08605629950761795, -0.07993444800376892, -0.07381259649991989, -0.06769074499607086, -0.06156890094280243, -0.0554470494389534, -0.04932519793510437, -0.04320335015654564, -0.03708150237798691, -0.03095965087413788, -0.024837801232933998, -0.018715951591730118, -0.012594101950526237, -0.006472252309322357, -0.00035040266811847687, 0.0057714469730854034, 0.011893294751644135, 0.018015146255493164, 0.024136995896697044, 0.030258845537900925, 0.036380693316459656, 0.042502544820308685, 0.048624396324157715, 0.054746244102716446, 0.06086809188127518, 0.0669899433851242, 0.07311179488897324, 0.07923364639282227, 0.0853554904460907, 0.09147734194993973, 0.09759919345378876, 0.10372103750705719, 0.10984288901090622, 0.11596474051475525, 0.12208659201860428, 0.1282084435224533, 0.13433028757572174, 0.14045214653015137, 0.1465739905834198, 0.15269583463668823, 0.15881767868995667, 0.1649395376443863, 0.17106138169765472, 0.17718324065208435, 0.18330508470535278, 0.18942692875862122, 0.19554878771305084, 0.20167063176631927, 0.2077924907207489, 0.21391433477401733]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 3.0, 6.0, 6.0, 9.0, 13.0, 24.0, 27.0, 36.0, 60.0, 60.0, 91.0, 142.0, 230.0, 318.0, 497.0, 780.0, 1167.0, 1917.0, 3282.0, 6025.0, 12222.0, 30807.0, 124856.0, 687016.0, 122317.0, 30430.0, 11714.0, 5824.0, 3212.0, 1887.0, 1194.0, 794.0, 503.0, 330.0, 231.0, 160.0, 101.0, 65.0, 65.0, 44.0, 34.0, 17.0, 16.0, 10.0, 8.0, 5.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.164306640625, -0.15940093994140625, -0.1544952392578125, -0.14958953857421875, -0.144683837890625, -0.13977813720703125, -0.1348724365234375, -0.12996673583984375, -0.12506103515625, -0.12015533447265625, -0.1152496337890625, -0.11034393310546875, -0.105438232421875, -0.10053253173828125, -0.0956268310546875, -0.09072113037109375, -0.0858154296875, -0.08090972900390625, -0.0760040283203125, -0.07109832763671875, -0.066192626953125, -0.06128692626953125, -0.0563812255859375, -0.05147552490234375, -0.04656982421875, -0.04166412353515625, -0.0367584228515625, -0.03185272216796875, -0.026947021484375, -0.02204132080078125, -0.0171356201171875, -0.01222991943359375, -0.00732421875, -0.00241851806640625, 0.0024871826171875, 0.00739288330078125, 0.012298583984375, 0.01720428466796875, 0.0221099853515625, 0.02701568603515625, 0.03192138671875, 0.03682708740234375, 0.0417327880859375, 0.04663848876953125, 0.051544189453125, 0.05644989013671875, 0.0613555908203125, 0.06626129150390625, 0.0711669921875, 0.07607269287109375, 0.0809783935546875, 0.08588409423828125, 0.090789794921875, 0.09569549560546875, 0.1006011962890625, 0.10550689697265625, 0.11041259765625, 0.11531829833984375, 0.1202239990234375, 0.12512969970703125, 0.130035400390625, 0.13494110107421875, 0.1398468017578125, 0.14475250244140625, 0.149658203125]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 5.0, 1.0, 3.0, 3.0, 2.0, 4.0, 9.0, 9.0, 10.0, 27.0, 31.0, 44.0, 62.0, 101.0, 132.0, 134.0, 144.0, 89.0, 60.0, 36.0, 22.0, 18.0, 10.0, 6.0, 7.0, 8.0, 7.0, 2.0, 6.0, 5.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.02801513671875, -0.027179718017578125, -0.02634429931640625, -0.025508880615234375, -0.0246734619140625, -0.023838043212890625, -0.02300262451171875, -0.022167205810546875, -0.021331787109375, -0.020496368408203125, -0.01966094970703125, -0.018825531005859375, -0.0179901123046875, -0.017154693603515625, -0.01631927490234375, -0.015483856201171875, -0.0146484375, -0.013813018798828125, -0.01297760009765625, -0.012142181396484375, -0.0113067626953125, -0.010471343994140625, -0.00963592529296875, -0.008800506591796875, -0.007965087890625, -0.007129669189453125, -0.00629425048828125, -0.005458831787109375, -0.0046234130859375, -0.003787994384765625, -0.00295257568359375, -0.002117156982421875, -0.00128173828125, -0.000446319580078125, 0.00038909912109375, 0.001224517822265625, 0.0020599365234375, 0.002895355224609375, 0.00373077392578125, 0.004566192626953125, 0.005401611328125, 0.006237030029296875, 0.00707244873046875, 0.007907867431640625, 0.0087432861328125, 0.009578704833984375, 0.01041412353515625, 0.011249542236328125, 0.0120849609375, 0.012920379638671875, 0.01375579833984375, 0.014591217041015625, 0.0154266357421875, 0.016262054443359375, 0.01709747314453125, 0.017932891845703125, 0.018768310546875, 0.019603729248046875, 0.02043914794921875, 0.021274566650390625, 0.0221099853515625, 0.022945404052734375, 0.02378082275390625, 0.024616241455078125, 0.02545166015625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 4.0, 6.0, 6.0, 7.0, 8.0, 15.0, 18.0, 31.0, 36.0, 36.0, 69.0, 90.0, 149.0, 336.0, 825.0, 2892.0, 27492.0, 841977.0, 164060.0, 7904.0, 1509.0, 472.0, 231.0, 104.0, 66.0, 42.0, 31.0, 26.0, 26.0, 17.0, 19.0, 13.0, 14.0, 11.0, 8.0, 1.0, 0.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3095703125, -0.29917144775390625, -0.2887725830078125, -0.27837371826171875, -0.267974853515625, -0.25757598876953125, -0.2471771240234375, -0.23677825927734375, -0.22637939453125, -0.21598052978515625, -0.2055816650390625, -0.19518280029296875, -0.184783935546875, -0.17438507080078125, -0.1639862060546875, -0.15358734130859375, -0.1431884765625, -0.13278961181640625, -0.1223907470703125, -0.11199188232421875, -0.101593017578125, -0.09119415283203125, -0.0807952880859375, -0.07039642333984375, -0.05999755859375, -0.04959869384765625, -0.0391998291015625, -0.02880096435546875, -0.018402099609375, -0.00800323486328125, 0.0023956298828125, 0.01279449462890625, 0.023193359375, 0.03359222412109375, 0.0439910888671875, 0.05438995361328125, 0.064788818359375, 0.07518768310546875, 0.0855865478515625, 0.09598541259765625, 0.10638427734375, 0.11678314208984375, 0.1271820068359375, 0.13758087158203125, 0.147979736328125, 0.15837860107421875, 0.1687774658203125, 0.17917633056640625, 0.1895751953125, 0.19997406005859375, 0.2103729248046875, 0.22077178955078125, 0.231170654296875, 0.24156951904296875, 0.2519683837890625, 0.26236724853515625, 0.27276611328125, 0.28316497802734375, 0.2935638427734375, 0.30396270751953125, 0.314361572265625, 0.32476043701171875, 0.3351593017578125, 0.34555816650390625, 0.35595703125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 3.0, 0.0, 1.0, 8.0, 13.0, 12.0, 14.0, 18.0, 15.0, 23.0, 24.0, 23.0, 28.0, 36.0, 41.0, 39.0, 36.0, 47.0, 53.0, 50.0, 48.0, 48.0, 53.0, 48.0, 56.0, 37.0, 40.0, 40.0, 28.0, 31.0, 27.0, 18.0, 14.0, 8.0, 6.0, 6.0, 6.0, 4.0, 4.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.10015869140625, -0.09723186492919922, -0.09430503845214844, -0.09137821197509766, -0.08845138549804688, -0.0855245590209961, -0.08259773254394531, -0.07967090606689453, -0.07674407958984375, -0.07381725311279297, -0.07089042663574219, -0.0679636001586914, -0.06503677368164062, -0.062109947204589844, -0.05918312072753906, -0.05625629425048828, -0.0533294677734375, -0.05040264129638672, -0.04747581481933594, -0.044548988342285156, -0.041622161865234375, -0.038695335388183594, -0.03576850891113281, -0.03284168243408203, -0.02991485595703125, -0.02698802947998047, -0.024061203002929688, -0.021134376525878906, -0.018207550048828125, -0.015280723571777344, -0.012353897094726562, -0.009427070617675781, -0.006500244140625, -0.0035734176635742188, -0.0006465911865234375, 0.0022802352905273438, 0.005207061767578125, 0.008133888244628906, 0.011060714721679688, 0.013987541198730469, 0.01691436767578125, 0.01984119415283203, 0.022768020629882812, 0.025694847106933594, 0.028621673583984375, 0.031548500061035156, 0.03447532653808594, 0.03740215301513672, 0.0403289794921875, 0.04325580596923828, 0.04618263244628906, 0.049109458923339844, 0.052036285400390625, 0.054963111877441406, 0.05788993835449219, 0.06081676483154297, 0.06374359130859375, 0.06667041778564453, 0.06959724426269531, 0.0725240707397461, 0.07545089721679688, 0.07837772369384766, 0.08130455017089844, 0.08423137664794922, 0.087158203125]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 9.0, 1.0, 4.0, 6.0, 11.0, 12.0, 16.0, 17.0, 37.0, 51.0, 80.0, 157.0, 271.0, 577.0, 1460.0, 5209.0, 44601.0, 932008.0, 55392.0, 5847.0, 1489.0, 631.0, 261.0, 146.0, 89.0, 52.0, 34.0, 24.0, 21.0, 13.0, 10.0, 8.0, 4.0, 1.0, 7.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.040496826171875, -0.03935718536376953, -0.03821754455566406, -0.037077903747558594, -0.035938262939453125, -0.034798622131347656, -0.03365898132324219, -0.03251934051513672, -0.03137969970703125, -0.03024005889892578, -0.029100418090820312, -0.027960777282714844, -0.026821136474609375, -0.025681495666503906, -0.024541854858398438, -0.02340221405029297, -0.0222625732421875, -0.02112293243408203, -0.019983291625976562, -0.018843650817871094, -0.017704010009765625, -0.016564369201660156, -0.015424728393554688, -0.014285087585449219, -0.01314544677734375, -0.012005805969238281, -0.010866165161132812, -0.009726524353027344, -0.008586883544921875, -0.007447242736816406, -0.0063076019287109375, -0.005167961120605469, -0.0040283203125, -0.0028886795043945312, -0.0017490386962890625, -0.0006093978881835938, 0.000530242919921875, 0.0016698837280273438, 0.0028095245361328125, 0.003949165344238281, 0.00508880615234375, 0.006228446960449219, 0.0073680877685546875, 0.008507728576660156, 0.009647369384765625, 0.010787010192871094, 0.011926651000976562, 0.013066291809082031, 0.0142059326171875, 0.015345573425292969, 0.016485214233398438, 0.017624855041503906, 0.018764495849609375, 0.019904136657714844, 0.021043777465820312, 0.02218341827392578, 0.02332305908203125, 0.02446269989013672, 0.025602340698242188, 0.026741981506347656, 0.027881622314453125, 0.029021263122558594, 0.030160903930664062, 0.03130054473876953, 0.032440185546875]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 4.0, 4.0, 10.0, 7.0, 12.0, 20.0, 17.0, 27.0, 28.0, 30.0, 41.0, 41.0, 68.0, 47.0, 67.0, 65.0, 61.0, 63.0, 60.0, 54.0, 41.0, 25.0, 36.0, 38.0, 21.0, 29.0, 18.0, 12.0, 9.0, 7.0, 8.0, 9.0, 8.0, 4.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.351139068603516e-06, -4.234723746776581e-06, -4.118308424949646e-06, -4.001893103122711e-06, -3.885477781295776e-06, -3.7690624594688416e-06, -3.6526471376419067e-06, -3.536231815814972e-06, -3.419816493988037e-06, -3.3034011721611023e-06, -3.1869858503341675e-06, -3.0705705285072327e-06, -2.954155206680298e-06, -2.837739884853363e-06, -2.7213245630264282e-06, -2.6049092411994934e-06, -2.4884939193725586e-06, -2.3720785975456238e-06, -2.255663275718689e-06, -2.139247953891754e-06, -2.0228326320648193e-06, -1.9064173102378845e-06, -1.7900019884109497e-06, -1.6735866665840149e-06, -1.55717134475708e-06, -1.4407560229301453e-06, -1.3243407011032104e-06, -1.2079253792762756e-06, -1.0915100574493408e-06, -9.75094735622406e-07, -8.586794137954712e-07, -7.422640919685364e-07, -6.258487701416016e-07, -5.094334483146667e-07, -3.9301812648773193e-07, -2.766028046607971e-07, -1.601874828338623e-07, -4.377216100692749e-08, 7.264316082000732e-08, 1.8905848264694214e-07, 3.0547380447387695e-07, 4.2188912630081177e-07, 5.383044481277466e-07, 6.547197699546814e-07, 7.711350917816162e-07, 8.87550413608551e-07, 1.0039657354354858e-06, 1.1203810572624207e-06, 1.2367963790893555e-06, 1.3532117009162903e-06, 1.469627022743225e-06, 1.58604234457016e-06, 1.7024576663970947e-06, 1.8188729882240295e-06, 1.9352883100509644e-06, 2.051703631877899e-06, 2.168118953704834e-06, 2.284534275531769e-06, 2.4009495973587036e-06, 2.5173649191856384e-06, 2.6337802410125732e-06, 2.750195562839508e-06, 2.866610884666443e-06, 2.9830262064933777e-06, 3.0994415283203125e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 8.0, 5.0, 6.0, 12.0, 15.0, 34.0, 59.0, 157.0, 433.0, 2331.0, 111436.0, 928363.0, 4549.0, 711.0, 219.0, 104.0, 33.0, 31.0, 16.0, 12.0, 14.0, 3.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10308837890625, -0.10038566589355469, -0.09768295288085938, -0.09498023986816406, -0.09227752685546875, -0.08957481384277344, -0.08687210083007812, -0.08416938781738281, -0.0814666748046875, -0.07876396179199219, -0.07606124877929688, -0.07335853576660156, -0.07065582275390625, -0.06795310974121094, -0.06525039672851562, -0.06254768371582031, -0.059844970703125, -0.05714225769042969, -0.054439544677734375, -0.05173683166503906, -0.04903411865234375, -0.04633140563964844, -0.043628692626953125, -0.04092597961425781, -0.0382232666015625, -0.03552055358886719, -0.032817840576171875, -0.030115127563476562, -0.02741241455078125, -0.024709701538085938, -0.022006988525390625, -0.019304275512695312, -0.0166015625, -0.013898849487304688, -0.011196136474609375, -0.008493423461914062, -0.00579071044921875, -0.0030879974365234375, -0.000385284423828125, 0.0023174285888671875, 0.0050201416015625, 0.0077228546142578125, 0.010425567626953125, 0.013128280639648438, 0.01583099365234375, 0.018533706665039062, 0.021236419677734375, 0.023939132690429688, 0.026641845703125, 0.029344558715820312, 0.032047271728515625, 0.03474998474121094, 0.03745269775390625, 0.04015541076660156, 0.042858123779296875, 0.04556083679199219, 0.0482635498046875, 0.05096626281738281, 0.053668975830078125, 0.05637168884277344, 0.05907440185546875, 0.06177711486816406, 0.06447982788085938, 0.06718254089355469, 0.06988525390625]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 1.0, 10.0, 10.0, 10.0, 20.0, 19.0, 41.0, 62.0, 74.0, 134.0, 152.0, 156.0, 112.0, 68.0, 55.0, 31.0, 19.0, 9.0, 7.0, 3.0, 3.0, 7.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0196075439453125, -0.018848180770874023, -0.018088817596435547, -0.01732945442199707, -0.016570091247558594, -0.015810728073120117, -0.01505136489868164, -0.014292001724243164, -0.013532638549804688, -0.012773275375366211, -0.012013912200927734, -0.011254549026489258, -0.010495185852050781, -0.009735822677612305, -0.008976459503173828, -0.008217096328735352, -0.007457733154296875, -0.0066983699798583984, -0.005939006805419922, -0.005179643630981445, -0.004420280456542969, -0.003660917282104492, -0.0029015541076660156, -0.002142190933227539, -0.0013828277587890625, -0.0006234645843505859, 0.00013589859008789062, 0.0008952617645263672, 0.0016546249389648438, 0.0024139881134033203, 0.003173351287841797, 0.0039327144622802734, 0.00469207763671875, 0.0054514408111572266, 0.006210803985595703, 0.00697016716003418, 0.007729530334472656, 0.008488893508911133, 0.00924825668334961, 0.010007619857788086, 0.010766983032226562, 0.011526346206665039, 0.012285709381103516, 0.013045072555541992, 0.013804435729980469, 0.014563798904418945, 0.015323162078857422, 0.0160825252532959, 0.016841888427734375, 0.01760125160217285, 0.018360614776611328, 0.019119977951049805, 0.01987934112548828, 0.020638704299926758, 0.021398067474365234, 0.02215743064880371, 0.022916793823242188, 0.023676156997680664, 0.02443552017211914, 0.025194883346557617, 0.025954246520996094, 0.02671360969543457, 0.027472972869873047, 0.028232336044311523, 0.02899169921875]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 6.0, 35.0, 201.0, 679.0, 79.0, 11.0, 4.0, 2.0], "bins": [-4.348362922668457, -4.2751030921936035, -4.201843738555908, -4.128583908081055, -4.055324077606201, -3.9820644855499268, -3.9088048934936523, -3.835545063018799, -3.7622854709625244, -3.68902587890625, -3.6157660484313965, -3.542506456375122, -3.4692468643188477, -3.395987033843994, -3.3227274417877197, -3.2494678497314453, -3.176208019256592, -3.1029484272003174, -3.029688596725464, -2.9564290046691895, -2.883169412612915, -2.8099095821380615, -2.736649990081787, -2.6633901596069336, -2.5901308059692383, -2.516871213912964, -2.4436113834381104, -2.370351791381836, -2.2970921993255615, -2.223832368850708, -2.1505727767944336, -2.07731294631958, -2.0040533542633057, -1.9307936429977417, -1.8575340509414673, -1.7842743396759033, -1.7110146284103394, -1.6377549171447754, -1.564495325088501, -1.491235613822937, -1.4179760217666626, -1.3447163105010986, -1.2714567184448242, -1.1981970071792603, -1.1249372959136963, -1.0516777038574219, -0.9784179925918579, -0.905158281326294, -0.8318986892700195, -0.7586390376091003, -0.6853793263435364, -0.6121196746826172, -0.5388599634170532, -0.46560031175613403, -0.39234066009521484, -0.3190809488296509, -0.2458212673664093, -0.17256158590316772, -0.09930191934108734, -0.026042252779006958, 0.04721742868423462, 0.1204771101474762, 0.19373676180839539, 0.26699647307395935, 0.34025612473487854]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 0.0, 3.0, 1.0, 4.0, 8.0, 8.0, 9.0, 17.0, 19.0, 14.0, 26.0, 30.0, 29.0, 27.0, 51.0, 41.0, 46.0, 54.0, 50.0, 62.0, 61.0, 62.0, 52.0, 53.0, 42.0, 41.0, 38.0, 31.0, 30.0, 26.0, 23.0, 9.0, 14.0, 12.0, 5.0, 4.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.7298516035079956, -0.7102358341217041, -0.6906200647354126, -0.6710042953491211, -0.6513885259628296, -0.6317728161811829, -0.6121570467948914, -0.5925412774085999, -0.5729255080223083, -0.5533097386360168, -0.5336939692497253, -0.5140781998634338, -0.4944624602794647, -0.4748466908931732, -0.4552309513092041, -0.4356151819229126, -0.4159994125366211, -0.3963836431503296, -0.3767678737640381, -0.35715213418006897, -0.33753636479377747, -0.31792059540748596, -0.29830485582351685, -0.27868908643722534, -0.25907331705093384, -0.23945754766464233, -0.21984179317951202, -0.2002260386943817, -0.1806102693080902, -0.1609944999217987, -0.1413787454366684, -0.12176299095153809, -0.10214728116989136, -0.08253151923418045, -0.06291575729846954, -0.043299995362758636, -0.02368423342704773, -0.0040684714913368225, 0.015547290444374084, 0.035163044929504395, 0.0547788143157959, 0.0743945762515068, 0.09401033818721771, 0.11362610012292862, 0.13324186205863953, 0.15285763144493103, 0.17247338593006134, 0.19208914041519165, 0.21170490980148315, 0.23132067918777466, 0.25093644857406616, 0.2705521881580353, 0.2901679575443268, 0.3097837269306183, 0.3293994665145874, 0.3490152359008789, 0.3686310052871704, 0.3882467746734619, 0.4078625440597534, 0.42747828364372253, 0.44709405303001404, 0.46670982241630554, 0.48632556200027466, 0.5059413313865662, 0.5255571007728577]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 6.0, 4.0, 3.0, 13.0, 11.0, 24.0, 35.0, 72.0, 108.0, 205.0, 412.0, 1233.0, 4506.0, 66072.0, 4109117.0, 8999.0, 2017.0, 711.0, 314.0, 177.0, 93.0, 43.0, 31.0, 16.0, 19.0, 13.0, 11.0, 7.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.362548828125, -0.3516197204589844, -0.34069061279296875, -0.3297615051269531, -0.3188323974609375, -0.3079032897949219, -0.29697418212890625, -0.2860450744628906, -0.275115966796875, -0.2641868591308594, -0.25325775146484375, -0.24232864379882812, -0.2313995361328125, -0.22047042846679688, -0.20954132080078125, -0.19861221313476562, -0.18768310546875, -0.17675399780273438, -0.16582489013671875, -0.15489578247070312, -0.1439666748046875, -0.13303756713867188, -0.12210845947265625, -0.11117935180664062, -0.100250244140625, -0.08932113647460938, -0.07839202880859375, -0.06746292114257812, -0.0565338134765625, -0.045604705810546875, -0.03467559814453125, -0.023746490478515625, -0.0128173828125, -0.001888275146484375, 0.00904083251953125, 0.019969940185546875, 0.0308990478515625, 0.041828155517578125, 0.05275726318359375, 0.06368637084960938, 0.074615478515625, 0.08554458618164062, 0.09647369384765625, 0.10740280151367188, 0.1183319091796875, 0.12926101684570312, 0.14019012451171875, 0.15111923217773438, 0.16204833984375, 0.17297744750976562, 0.18390655517578125, 0.19483566284179688, 0.2057647705078125, 0.21669387817382812, 0.22762298583984375, 0.23855209350585938, 0.249481201171875, 0.2604103088378906, 0.27133941650390625, 0.2822685241699219, 0.2931976318359375, 0.3041267395019531, 0.31505584716796875, 0.3259849548339844, 0.3369140625]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 6.0, 5.0, 8.0, 8.0, 22.0, 31.0, 32.0, 54.0, 81.0, 109.0, 119.0, 124.0, 114.0, 84.0, 59.0, 32.0, 25.0, 25.0, 10.0, 11.0, 7.0, 5.0, 5.0, 9.0, 4.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027496337890625, -0.026669979095458984, -0.02584362030029297, -0.025017261505126953, -0.024190902709960938, -0.023364543914794922, -0.022538185119628906, -0.02171182632446289, -0.020885467529296875, -0.02005910873413086, -0.019232749938964844, -0.018406391143798828, -0.017580032348632812, -0.016753673553466797, -0.01592731475830078, -0.015100955963134766, -0.01427459716796875, -0.013448238372802734, -0.012621879577636719, -0.011795520782470703, -0.010969161987304688, -0.010142803192138672, -0.009316444396972656, -0.00849008560180664, -0.007663726806640625, -0.006837368011474609, -0.006011009216308594, -0.005184650421142578, -0.0043582916259765625, -0.003531932830810547, -0.0027055740356445312, -0.0018792152404785156, -0.0010528564453125, -0.00022649765014648438, 0.0005998611450195312, 0.0014262199401855469, 0.0022525787353515625, 0.003078937530517578, 0.0039052963256835938, 0.004731655120849609, 0.005558013916015625, 0.006384372711181641, 0.007210731506347656, 0.008037090301513672, 0.008863449096679688, 0.009689807891845703, 0.010516166687011719, 0.011342525482177734, 0.01216888427734375, 0.012995243072509766, 0.013821601867675781, 0.014647960662841797, 0.015474319458007812, 0.016300678253173828, 0.017127037048339844, 0.01795339584350586, 0.018779754638671875, 0.01960611343383789, 0.020432472229003906, 0.021258831024169922, 0.022085189819335938, 0.022911548614501953, 0.02373790740966797, 0.024564266204833984, 0.025390625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 4.0, 4.0, 3.0, 5.0, 5.0, 9.0, 17.0, 19.0, 39.0, 57.0, 120.0, 239.0, 589.0, 1637.0, 5837.0, 37510.0, 4095678.0, 43366.0, 6314.0, 1697.0, 606.0, 255.0, 116.0, 53.0, 45.0, 15.0, 14.0, 10.0, 7.0, 3.0, 6.0, 3.0, 3.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.232666015625, -0.2257251739501953, -0.21878433227539062, -0.21184349060058594, -0.20490264892578125, -0.19796180725097656, -0.19102096557617188, -0.1840801239013672, -0.1771392822265625, -0.1701984405517578, -0.16325759887695312, -0.15631675720214844, -0.14937591552734375, -0.14243507385253906, -0.13549423217773438, -0.1285533905029297, -0.121612548828125, -0.11467170715332031, -0.10773086547851562, -0.10079002380371094, -0.09384918212890625, -0.08690834045410156, -0.07996749877929688, -0.07302665710449219, -0.0660858154296875, -0.05914497375488281, -0.052204132080078125, -0.04526329040527344, -0.03832244873046875, -0.03138160705566406, -0.024440765380859375, -0.017499923706054688, -0.01055908203125, -0.0036182403564453125, 0.003322601318359375, 0.010263442993164062, 0.01720428466796875, 0.024145126342773438, 0.031085968017578125, 0.03802680969238281, 0.0449676513671875, 0.05190849304199219, 0.058849334716796875, 0.06579017639160156, 0.07273101806640625, 0.07967185974121094, 0.08661270141601562, 0.09355354309082031, 0.100494384765625, 0.10743522644042969, 0.11437606811523438, 0.12131690979003906, 0.12825775146484375, 0.13519859313964844, 0.14213943481445312, 0.1490802764892578, 0.1560211181640625, 0.1629619598388672, 0.16990280151367188, 0.17684364318847656, 0.18378448486328125, 0.19072532653808594, 0.19766616821289062, 0.2046070098876953, 0.2115478515625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 2.0, 5.0, 0.0, 4.0, 4.0, 5.0, 3.0, 9.0, 9.0, 21.0, 17.0, 24.0, 32.0, 51.0, 219.0, 3292.0, 169.0, 60.0, 37.0, 26.0, 22.0, 16.0, 7.0, 12.0, 5.0, 8.0, 2.0, 2.0, 4.0, 2.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.037567138671875, -0.03633403778076172, -0.03510093688964844, -0.033867835998535156, -0.032634735107421875, -0.031401634216308594, -0.030168533325195312, -0.02893543243408203, -0.02770233154296875, -0.02646923065185547, -0.025236129760742188, -0.024003028869628906, -0.022769927978515625, -0.021536827087402344, -0.020303726196289062, -0.01907062530517578, -0.0178375244140625, -0.01660442352294922, -0.015371322631835938, -0.014138221740722656, -0.012905120849609375, -0.011672019958496094, -0.010438919067382812, -0.009205818176269531, -0.00797271728515625, -0.006739616394042969, -0.0055065155029296875, -0.004273414611816406, -0.003040313720703125, -0.0018072128295898438, -0.0005741119384765625, 0.0006589889526367188, 0.00189208984375, 0.0031251907348632812, 0.0043582916259765625, 0.005591392517089844, 0.006824493408203125, 0.008057594299316406, 0.009290695190429688, 0.010523796081542969, 0.01175689697265625, 0.012989997863769531, 0.014223098754882812, 0.015456199645996094, 0.016689300537109375, 0.017922401428222656, 0.019155502319335938, 0.02038860321044922, 0.0216217041015625, 0.02285480499267578, 0.024087905883789062, 0.025321006774902344, 0.026554107666015625, 0.027787208557128906, 0.029020309448242188, 0.03025341033935547, 0.03148651123046875, 0.03271961212158203, 0.03395271301269531, 0.035185813903808594, 0.036418914794921875, 0.037652015686035156, 0.03888511657714844, 0.04011821746826172, 0.041351318359375]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 3.0, 4.0, 19.0, 139.0, 701.0, 113.0, 29.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1117488443851471, -0.09856376051902771, -0.08537867665290833, -0.07219359278678894, -0.059008508920669556, -0.04582342505455017, -0.032638341188430786, -0.0194532573223114, -0.006268173456192017, 0.006916910409927368, 0.020101994276046753, 0.03328707814216614, 0.04647216200828552, 0.05965724587440491, 0.07284232974052429, 0.08602741360664368, 0.09921249747276306, 0.11239758133888245, 0.12558266520500183, 0.13876774907112122, 0.1519528329372406, 0.16513791680335999, 0.17832300066947937, 0.19150808453559875, 0.20469316840171814, 0.21787825226783752, 0.2310633361339569, 0.2442484200000763, 0.2574335038661957, 0.27061858773231506, 0.28380367159843445, 0.29698875546455383, 0.31017380952835083, 0.3233588933944702, 0.3365439772605896, 0.349729061126709, 0.36291414499282837, 0.37609922885894775, 0.38928431272506714, 0.4024693965911865, 0.4156544804573059, 0.4288395643234253, 0.4420246481895447, 0.45520973205566406, 0.46839481592178345, 0.48157989978790283, 0.4947649836540222, 0.5079500675201416, 0.521135151386261, 0.5343202352523804, 0.5475053191184998, 0.5606904029846191, 0.5738754868507385, 0.5870605707168579, 0.6002456545829773, 0.6134307384490967, 0.6266158223152161, 0.6398009061813354, 0.6529859900474548, 0.6661710739135742, 0.6793561577796936, 0.692541241645813, 0.7057263255119324, 0.7189114093780518, 0.7320964932441711]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 2.0, 3.0, 1.0, 7.0, 4.0, 3.0, 5.0, 15.0, 16.0, 12.0, 28.0, 23.0, 30.0, 35.0, 28.0, 41.0, 36.0, 41.0, 62.0, 58.0, 69.0, 56.0, 58.0, 46.0, 45.0, 48.0, 39.0, 35.0, 21.0, 40.0, 18.0, 16.0, 12.0, 17.0, 12.0, 7.0, 8.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09118849039077759, -0.08804483711719513, -0.08490118384361267, -0.08175753057003021, -0.07861387729644775, -0.0754702240228653, -0.07232656329870224, -0.06918291002511978, -0.06603925675153732, -0.06289560347795486, -0.059751950204372406, -0.05660829320549965, -0.05346463993191719, -0.05032098665833473, -0.047177329659461975, -0.04403367638587952, -0.04089002311229706, -0.0377463698387146, -0.03460271656513214, -0.031459059566259384, -0.028315406292676926, -0.025171753019094467, -0.02202809788286686, -0.01888444274663925, -0.015740789473056793, -0.01259713526815176, -0.009453481063246727, -0.006309826858341694, -0.0031661726534366608, -2.251937985420227e-05, 0.0031211357563734055, 0.006264790892601013, 0.009408444166183472, 0.012552098371088505, 0.015695752575993538, 0.018839407712221146, 0.021983060985803604, 0.025126714259386063, 0.02827036939561367, 0.03141402453184128, 0.03455767780542374, 0.037701331079006195, 0.040844984352588654, 0.04398864135146141, 0.04713229462504387, 0.05027594789862633, 0.053419604897499084, 0.05656325817108154, 0.059706911444664, 0.06285056471824646, 0.06599421799182892, 0.06913787126541138, 0.07228152453899384, 0.0754251778125763, 0.07856883853673935, 0.08171249181032181, 0.08485614508390427, 0.08799979835748672, 0.09114345163106918, 0.09428710490465164, 0.0974307656288147, 0.10057441890239716, 0.10371807217597961, 0.10686172544956207, 0.11000537872314453]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 4.0, 0.0, 4.0, 2.0, 4.0, 6.0, 8.0, 14.0, 17.0, 19.0, 35.0, 59.0, 62.0, 111.0, 154.0, 220.0, 306.0, 434.0, 585.0, 963.0, 1603.0, 2764.0, 4856.0, 9416.0, 22456.0, 69945.0, 419565.0, 402265.0, 68954.0, 22390.0, 9533.0, 4654.0, 2649.0, 1564.0, 981.0, 609.0, 398.0, 286.0, 196.0, 138.0, 95.0, 72.0, 47.0, 42.0, 21.0, 27.0, 9.0, 9.0, 9.0, 5.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08160400390625, -0.07888317108154297, -0.07616233825683594, -0.0734415054321289, -0.07072067260742188, -0.06799983978271484, -0.06527900695800781, -0.06255817413330078, -0.05983734130859375, -0.05711650848388672, -0.05439567565917969, -0.051674842834472656, -0.048954010009765625, -0.046233177185058594, -0.04351234436035156, -0.04079151153564453, -0.0380706787109375, -0.03534984588623047, -0.03262901306152344, -0.029908180236816406, -0.027187347412109375, -0.024466514587402344, -0.021745681762695312, -0.01902484893798828, -0.01630401611328125, -0.013583183288574219, -0.010862350463867188, -0.008141517639160156, -0.005420684814453125, -0.0026998519897460938, 2.09808349609375e-05, 0.0027418136596679688, 0.005462646484375, 0.008183479309082031, 0.010904312133789062, 0.013625144958496094, 0.016345977783203125, 0.019066810607910156, 0.021787643432617188, 0.02450847625732422, 0.02722930908203125, 0.02995014190673828, 0.03267097473144531, 0.035391807556152344, 0.038112640380859375, 0.040833473205566406, 0.04355430603027344, 0.04627513885498047, 0.0489959716796875, 0.05171680450439453, 0.05443763732910156, 0.057158470153808594, 0.059879302978515625, 0.06260013580322266, 0.06532096862792969, 0.06804180145263672, 0.07076263427734375, 0.07348346710205078, 0.07620429992675781, 0.07892513275146484, 0.08164596557617188, 0.0843667984008789, 0.08708763122558594, 0.08980846405029297, 0.092529296875]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 9.0, 4.0, 8.0, 10.0, 19.0, 33.0, 37.0, 59.0, 87.0, 107.0, 108.0, 140.0, 104.0, 76.0, 56.0, 41.0, 25.0, 17.0, 11.0, 9.0, 8.0, 7.0, 5.0, 7.0, 4.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.027313232421875, -0.026512622833251953, -0.025712013244628906, -0.02491140365600586, -0.024110794067382812, -0.023310184478759766, -0.02250957489013672, -0.021708965301513672, -0.020908355712890625, -0.020107746124267578, -0.01930713653564453, -0.018506526947021484, -0.017705917358398438, -0.01690530776977539, -0.016104698181152344, -0.015304088592529297, -0.01450347900390625, -0.013702869415283203, -0.012902259826660156, -0.01210165023803711, -0.011301040649414062, -0.010500431060791016, -0.009699821472167969, -0.008899211883544922, -0.008098602294921875, -0.007297992706298828, -0.006497383117675781, -0.005696773529052734, -0.0048961639404296875, -0.004095554351806641, -0.0032949447631835938, -0.002494335174560547, -0.0016937255859375, -0.0008931159973144531, -9.250640869140625e-05, 0.0007081031799316406, 0.0015087127685546875, 0.0023093223571777344, 0.0031099319458007812, 0.003910541534423828, 0.004711151123046875, 0.005511760711669922, 0.006312370300292969, 0.007112979888916016, 0.007913589477539062, 0.00871419906616211, 0.009514808654785156, 0.010315418243408203, 0.01111602783203125, 0.011916637420654297, 0.012717247009277344, 0.01351785659790039, 0.014318466186523438, 0.015119075775146484, 0.01591968536376953, 0.016720294952392578, 0.017520904541015625, 0.018321514129638672, 0.01912212371826172, 0.019922733306884766, 0.020723342895507812, 0.02152395248413086, 0.022324562072753906, 0.023125171661376953, 0.02392578125]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 5.0, 1.0, 6.0, 3.0, 11.0, 14.0, 27.0, 28.0, 20.0, 28.0, 30.0, 49.0, 75.0, 119.0, 302.0, 1087.0, 5715.0, 85662.0, 902822.0, 47109.0, 3888.0, 870.0, 290.0, 119.0, 68.0, 45.0, 36.0, 29.0, 29.0, 17.0, 12.0, 5.0, 7.0, 10.0, 5.0, 9.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.301025390625, -0.29318809509277344, -0.2853507995605469, -0.2775135040283203, -0.26967620849609375, -0.2618389129638672, -0.2540016174316406, -0.24616432189941406, -0.2383270263671875, -0.23048973083496094, -0.22265243530273438, -0.2148151397705078, -0.20697784423828125, -0.1991405487060547, -0.19130325317382812, -0.18346595764160156, -0.175628662109375, -0.16779136657714844, -0.15995407104492188, -0.1521167755126953, -0.14427947998046875, -0.1364421844482422, -0.12860488891601562, -0.12076759338378906, -0.1129302978515625, -0.10509300231933594, -0.09725570678710938, -0.08941841125488281, -0.08158111572265625, -0.07374382019042969, -0.06590652465820312, -0.05806922912597656, -0.05023193359375, -0.04239463806152344, -0.034557342529296875, -0.026720046997070312, -0.01888275146484375, -0.011045455932617188, -0.003208160400390625, 0.0046291351318359375, 0.0124664306640625, 0.020303726196289062, 0.028141021728515625, 0.03597831726074219, 0.04381561279296875, 0.05165290832519531, 0.059490203857421875, 0.06732749938964844, 0.075164794921875, 0.08300209045410156, 0.09083938598632812, 0.09867668151855469, 0.10651397705078125, 0.11435127258300781, 0.12218856811523438, 0.13002586364746094, 0.1378631591796875, 0.14570045471191406, 0.15353775024414062, 0.1613750457763672, 0.16921234130859375, 0.1770496368408203, 0.18488693237304688, 0.19272422790527344, 0.2005615234375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 7.0, 6.0, 8.0, 8.0, 4.0, 13.0, 17.0, 29.0, 26.0, 35.0, 34.0, 43.0, 49.0, 36.0, 52.0, 48.0, 57.0, 45.0, 48.0, 63.0, 48.0, 39.0, 42.0, 37.0, 38.0, 27.0, 24.0, 23.0, 26.0, 28.0, 12.0, 11.0, 3.0, 6.0, 1.0, 5.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07073974609375, -0.06797218322753906, -0.06520462036132812, -0.06243705749511719, -0.05966949462890625, -0.05690193176269531, -0.054134368896484375, -0.05136680603027344, -0.0485992431640625, -0.04583168029785156, -0.043064117431640625, -0.04029655456542969, -0.03752899169921875, -0.03476142883300781, -0.031993865966796875, -0.029226303100585938, -0.026458740234375, -0.023691177368164062, -0.020923614501953125, -0.018156051635742188, -0.01538848876953125, -0.012620925903320312, -0.009853363037109375, -0.0070858001708984375, -0.0043182373046875, -0.0015506744384765625, 0.001216888427734375, 0.0039844512939453125, 0.00675201416015625, 0.009519577026367188, 0.012287139892578125, 0.015054702758789062, 0.017822265625, 0.020589828491210938, 0.023357391357421875, 0.026124954223632812, 0.02889251708984375, 0.03166007995605469, 0.034427642822265625, 0.03719520568847656, 0.0399627685546875, 0.04273033142089844, 0.045497894287109375, 0.04826545715332031, 0.05103302001953125, 0.05380058288574219, 0.056568145751953125, 0.05933570861816406, 0.062103271484375, 0.06487083435058594, 0.06763839721679688, 0.07040596008300781, 0.07317352294921875, 0.07594108581542969, 0.07870864868164062, 0.08147621154785156, 0.0842437744140625, 0.08701133728027344, 0.08977890014648438, 0.09254646301269531, 0.09531402587890625, 0.09808158874511719, 0.10084915161132812, 0.10361671447753906, 0.10638427734375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 5.0, 3.0, 5.0, 14.0, 17.0, 28.0, 36.0, 75.0, 109.0, 188.0, 415.0, 944.0, 3643.0, 25617.0, 846310.0, 157723.0, 10102.0, 1934.0, 727.0, 279.0, 150.0, 89.0, 61.0, 24.0, 23.0, 12.0, 10.0, 6.0, 3.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.026458740234375, -0.025739431381225586, -0.025020122528076172, -0.024300813674926758, -0.023581504821777344, -0.02286219596862793, -0.022142887115478516, -0.0214235782623291, -0.020704269409179688, -0.019984960556030273, -0.01926565170288086, -0.018546342849731445, -0.01782703399658203, -0.017107725143432617, -0.016388416290283203, -0.01566910743713379, -0.014949798583984375, -0.014230489730834961, -0.013511180877685547, -0.012791872024536133, -0.012072563171386719, -0.011353254318237305, -0.01063394546508789, -0.009914636611938477, -0.009195327758789062, -0.008476018905639648, -0.007756710052490234, -0.00703740119934082, -0.006318092346191406, -0.005598783493041992, -0.004879474639892578, -0.004160165786743164, -0.00344085693359375, -0.002721548080444336, -0.002002239227294922, -0.0012829303741455078, -0.0005636215209960938, 0.0001556873321533203, 0.0008749961853027344, 0.0015943050384521484, 0.0023136138916015625, 0.0030329227447509766, 0.0037522315979003906, 0.004471540451049805, 0.005190849304199219, 0.005910158157348633, 0.006629467010498047, 0.007348775863647461, 0.008068084716796875, 0.008787393569946289, 0.009506702423095703, 0.010226011276245117, 0.010945320129394531, 0.011664628982543945, 0.01238393783569336, 0.013103246688842773, 0.013822555541992188, 0.014541864395141602, 0.015261173248291016, 0.01598048210144043, 0.016699790954589844, 0.017419099807739258, 0.018138408660888672, 0.018857717514038086, 0.0195770263671875]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 4.0, 8.0, 7.0, 8.0, 12.0, 10.0, 4.0, 17.0, 22.0, 24.0, 10.0, 22.0, 37.0, 12.0, 44.0, 56.0, 55.0, 31.0, 56.0, 64.0, 41.0, 58.0, 60.0, 37.0, 28.0, 43.0, 37.0, 31.0, 10.0, 32.0, 29.0, 14.0, 20.0, 11.0, 12.0, 6.0, 11.0, 9.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-3.159046173095703e-06, -3.0566006898880005e-06, -2.954155206680298e-06, -2.8517097234725952e-06, -2.7492642402648926e-06, -2.64681875705719e-06, -2.5443732738494873e-06, -2.4419277906417847e-06, -2.339482307434082e-06, -2.2370368242263794e-06, -2.1345913410186768e-06, -2.032145857810974e-06, -1.9297003746032715e-06, -1.8272548913955688e-06, -1.7248094081878662e-06, -1.6223639249801636e-06, -1.519918441772461e-06, -1.4174729585647583e-06, -1.3150274753570557e-06, -1.212581992149353e-06, -1.1101365089416504e-06, -1.0076910257339478e-06, -9.052455425262451e-07, -8.028000593185425e-07, -7.003545761108398e-07, -5.979090929031372e-07, -4.954636096954346e-07, -3.9301812648773193e-07, -2.905726432800293e-07, -1.8812716007232666e-07, -8.568167686462402e-08, 1.6763806343078613e-08, 1.1920928955078125e-07, 2.2165477275848389e-07, 3.241002559661865e-07, 4.2654573917388916e-07, 5.289912223815918e-07, 6.314367055892944e-07, 7.338821887969971e-07, 8.363276720046997e-07, 9.387731552124023e-07, 1.041218638420105e-06, 1.1436641216278076e-06, 1.2461096048355103e-06, 1.3485550880432129e-06, 1.4510005712509155e-06, 1.5534460544586182e-06, 1.6558915376663208e-06, 1.7583370208740234e-06, 1.860782504081726e-06, 1.9632279872894287e-06, 2.0656734704971313e-06, 2.168118953704834e-06, 2.2705644369125366e-06, 2.3730099201202393e-06, 2.475455403327942e-06, 2.5779008865356445e-06, 2.680346369743347e-06, 2.78279185295105e-06, 2.8852373361587524e-06, 2.987682819366455e-06, 3.0901283025741577e-06, 3.1925737857818604e-06, 3.295019268989563e-06, 3.3974647521972656e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 7.0, 7.0, 12.0, 27.0, 38.0, 84.0, 202.0, 586.0, 3112.0, 289751.0, 749618.0, 4139.0, 625.0, 170.0, 59.0, 53.0, 26.0, 14.0, 9.0, 6.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.058502197265625, -0.05655527114868164, -0.05460834503173828, -0.05266141891479492, -0.05071449279785156, -0.0487675666809082, -0.046820640563964844, -0.044873714447021484, -0.042926788330078125, -0.040979862213134766, -0.039032936096191406, -0.03708600997924805, -0.03513908386230469, -0.03319215774536133, -0.03124523162841797, -0.02929830551147461, -0.02735137939453125, -0.02540445327758789, -0.02345752716064453, -0.021510601043701172, -0.019563674926757812, -0.017616748809814453, -0.015669822692871094, -0.013722896575927734, -0.011775970458984375, -0.009829044342041016, -0.007882118225097656, -0.005935192108154297, -0.0039882659912109375, -0.002041339874267578, -9.441375732421875e-05, 0.0018525123596191406, 0.0037994384765625, 0.005746364593505859, 0.007693290710449219, 0.009640216827392578, 0.011587142944335938, 0.013534069061279297, 0.015480995178222656, 0.017427921295166016, 0.019374847412109375, 0.021321773529052734, 0.023268699645996094, 0.025215625762939453, 0.027162551879882812, 0.029109477996826172, 0.03105640411376953, 0.03300333023071289, 0.03495025634765625, 0.03689718246459961, 0.03884410858154297, 0.04079103469848633, 0.04273796081542969, 0.04468488693237305, 0.046631813049316406, 0.048578739166259766, 0.050525665283203125, 0.052472591400146484, 0.054419517517089844, 0.0563664436340332, 0.05831336975097656, 0.06026029586791992, 0.06220722198486328, 0.06415414810180664, 0.06610107421875]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 5.0, 6.0, 11.0, 19.0, 25.0, 23.0, 55.0, 85.0, 129.0, 134.0, 146.0, 123.0, 85.0, 53.0, 42.0, 21.0, 14.0, 9.0, 6.0, 6.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0233001708984375, -0.022613048553466797, -0.021925926208496094, -0.02123880386352539, -0.020551681518554688, -0.019864559173583984, -0.01917743682861328, -0.018490314483642578, -0.017803192138671875, -0.017116069793701172, -0.01642894744873047, -0.015741825103759766, -0.015054702758789062, -0.01436758041381836, -0.013680458068847656, -0.012993335723876953, -0.01230621337890625, -0.011619091033935547, -0.010931968688964844, -0.01024484634399414, -0.009557723999023438, -0.008870601654052734, -0.008183479309082031, -0.007496356964111328, -0.006809234619140625, -0.006122112274169922, -0.005434989929199219, -0.004747867584228516, -0.0040607452392578125, -0.0033736228942871094, -0.0026865005493164062, -0.001999378204345703, -0.001312255859375, -0.0006251335144042969, 6.198883056640625e-05, 0.0007491111755371094, 0.0014362335205078125, 0.0021233558654785156, 0.0028104782104492188, 0.003497600555419922, 0.004184722900390625, 0.004871845245361328, 0.005558967590332031, 0.006246089935302734, 0.0069332122802734375, 0.007620334625244141, 0.008307456970214844, 0.008994579315185547, 0.00968170166015625, 0.010368824005126953, 0.011055946350097656, 0.01174306869506836, 0.012430191040039062, 0.013117313385009766, 0.013804435729980469, 0.014491558074951172, 0.015178680419921875, 0.015865802764892578, 0.01655292510986328, 0.017240047454833984, 0.017927169799804688, 0.01861429214477539, 0.019301414489746094, 0.019988536834716797, 0.0206756591796875]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 9.0, 16.0, 31.0, 62.0, 114.0, 331.0, 260.0, 90.0, 46.0, 17.0, 18.0, 6.0, 6.0, 3.0, 1.0, 1.0, 2.0], "bins": [-1.287964105606079, -1.263604998588562, -1.239245891571045, -1.2148869037628174, -1.1905277967453003, -1.1661686897277832, -1.1418097019195557, -1.1174505949020386, -1.0930914878845215, -1.0687323808670044, -1.0443732738494873, -1.0200142860412598, -0.9956551790237427, -0.9712960720062256, -0.9469370245933533, -0.922577977180481, -0.8982188701629639, -0.8738597631454468, -0.8495007157325745, -0.8251416683197021, -0.8007825613021851, -0.776423454284668, -0.7520644068717957, -0.7277053594589233, -0.7033462524414062, -0.6789871454238892, -0.6546280980110168, -0.6302690505981445, -0.6059099435806274, -0.5815508365631104, -0.557191789150238, -0.5328327417373657, -0.5084736347198486, -0.48411455750465393, -0.45975548028945923, -0.4353964030742645, -0.4110373258590698, -0.3866782486438751, -0.3623191714286804, -0.3379600942134857, -0.313601016998291, -0.2892419397830963, -0.2648828625679016, -0.2405237853527069, -0.2161647081375122, -0.1918056309223175, -0.1674465537071228, -0.1430874764919281, -0.1187283992767334, -0.0943693220615387, -0.070010244846344, -0.04565116763114929, -0.02129209041595459, 0.0030669867992401123, 0.027426064014434814, 0.05178514122962952, 0.07614421844482422, 0.10050329566001892, 0.12486237287521362, 0.14922145009040833, 0.17358052730560303, 0.19793960452079773, 0.22229868173599243, 0.24665775895118713, 0.27101683616638184]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 9.0, 12.0, 5.0, 6.0, 12.0, 10.0, 13.0, 15.0, 17.0, 31.0, 26.0, 26.0, 33.0, 36.0, 41.0, 40.0, 38.0, 42.0, 48.0, 54.0, 36.0, 53.0, 43.0, 48.0, 42.0, 35.0, 31.0, 23.0, 19.0, 32.0, 18.0, 16.0, 15.0, 24.0, 13.0, 8.0, 6.0, 5.0, 4.0, 2.0, 7.0, 3.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.46477627754211426, -0.4509151577949524, -0.4370540678501129, -0.42319294810295105, -0.4093318581581116, -0.3954707384109497, -0.38160961866378784, -0.36774852871894836, -0.3538874089717865, -0.34002628922462463, -0.32616519927978516, -0.3123040795326233, -0.2984429895877838, -0.28458186984062195, -0.27072077989578247, -0.2568596601486206, -0.24299855530261993, -0.22913745045661926, -0.2152763456106186, -0.20141524076461792, -0.18755412101745605, -0.17369301617145538, -0.1598319113254547, -0.14597079157829285, -0.13210970163345337, -0.1182485967874527, -0.10438748449087143, -0.09052637964487076, -0.07666526734828949, -0.06280416250228882, -0.04894305765628815, -0.03508194535970688, -0.02122083306312561, -0.007359725423157215, 0.00650138221681118, 0.020362488925457, 0.03422359749674797, 0.04808470606803894, 0.06194581091403961, 0.07580692321062088, 0.08966802805662155, 0.10352913290262222, 0.11739024519920349, 0.13125135004520416, 0.14511245489120483, 0.1589735746383667, 0.17283466458320618, 0.18669578433036804, 0.2005568891763687, 0.21441799402236938, 0.22827909886837006, 0.24214020371437073, 0.2560013234615326, 0.26986241340637207, 0.28372353315353394, 0.2975846529006958, 0.3114457428455353, 0.32530686259269714, 0.3391679525375366, 0.3530290722846985, 0.36689016222953796, 0.38075128197669983, 0.3946123719215393, 0.40847349166870117, 0.42233461141586304]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 4.0, 2.0, 2.0, 3.0, 2.0, 14.0, 21.0, 28.0, 29.0, 42.0, 65.0, 178.0, 348.0, 2087.0, 4175935.0, 14213.0, 786.0, 232.0, 103.0, 61.0, 38.0, 24.0, 26.0, 11.0, 7.0, 7.0, 4.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.33447265625, -0.3248481750488281, -0.31522369384765625, -0.3055992126464844, -0.2959747314453125, -0.2863502502441406, -0.27672576904296875, -0.2671012878417969, -0.257476806640625, -0.24785232543945312, -0.23822784423828125, -0.22860336303710938, -0.2189788818359375, -0.20935440063476562, -0.19972991943359375, -0.19010543823242188, -0.18048095703125, -0.17085647583007812, -0.16123199462890625, -0.15160751342773438, -0.1419830322265625, -0.13235855102539062, -0.12273406982421875, -0.11310958862304688, -0.103485107421875, -0.09386062622070312, -0.08423614501953125, -0.07461166381835938, -0.0649871826171875, -0.055362701416015625, -0.04573822021484375, -0.036113739013671875, -0.0264892578125, -0.016864776611328125, -0.00724029541015625, 0.002384185791015625, 0.0120086669921875, 0.021633148193359375, 0.03125762939453125, 0.040882110595703125, 0.050506591796875, 0.060131072998046875, 0.06975555419921875, 0.07938003540039062, 0.0890045166015625, 0.09862899780273438, 0.10825347900390625, 0.11787796020507812, 0.12750244140625, 0.13712692260742188, 0.14675140380859375, 0.15637588500976562, 0.1660003662109375, 0.17562484741210938, 0.18524932861328125, 0.19487380981445312, 0.204498291015625, 0.21412277221679688, 0.22374725341796875, 0.23337173461914062, 0.2429962158203125, 0.2526206970214844, 0.26224517822265625, 0.2718696594238281, 0.281494140625]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 13.0, 19.0, 22.0, 18.0, 33.0, 41.0, 82.0, 78.0, 100.0, 127.0, 103.0, 76.0, 83.0, 51.0, 41.0, 27.0, 20.0, 20.0, 11.0, 5.0, 7.0, 3.0, 5.0, 2.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0260772705078125, -0.025327444076538086, -0.024577617645263672, -0.023827791213989258, -0.023077964782714844, -0.02232813835144043, -0.021578311920166016, -0.0208284854888916, -0.020078659057617188, -0.019328832626342773, -0.01857900619506836, -0.017829179763793945, -0.01707935333251953, -0.016329526901245117, -0.015579700469970703, -0.014829874038696289, -0.014080047607421875, -0.013330221176147461, -0.012580394744873047, -0.011830568313598633, -0.011080741882324219, -0.010330915451049805, -0.00958108901977539, -0.008831262588500977, -0.008081436157226562, -0.0073316097259521484, -0.006581783294677734, -0.00583195686340332, -0.005082130432128906, -0.004332304000854492, -0.003582477569580078, -0.002832651138305664, -0.00208282470703125, -0.001332998275756836, -0.0005831718444824219, 0.0001666545867919922, 0.0009164810180664062, 0.0016663074493408203, 0.0024161338806152344, 0.0031659603118896484, 0.0039157867431640625, 0.0046656131744384766, 0.005415439605712891, 0.006165266036987305, 0.006915092468261719, 0.007664918899536133, 0.008414745330810547, 0.009164571762084961, 0.009914398193359375, 0.010664224624633789, 0.011414051055908203, 0.012163877487182617, 0.012913703918457031, 0.013663530349731445, 0.01441335678100586, 0.015163183212280273, 0.015913009643554688, 0.0166628360748291, 0.017412662506103516, 0.01816248893737793, 0.018912315368652344, 0.019662141799926758, 0.020411968231201172, 0.021161794662475586, 0.02191162109375]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 4.0, 2.0, 4.0, 6.0, 11.0, 24.0, 25.0, 57.0, 117.0, 236.0, 498.0, 1449.0, 7075.0, 252354.0, 3919361.0, 9532.0, 2052.0, 754.0, 320.0, 165.0, 89.0, 66.0, 36.0, 20.0, 13.0, 7.0, 4.0, 2.0, 1.0, 2.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13330078125, -0.12980079650878906, -0.12630081176757812, -0.12280082702636719, -0.11930084228515625, -0.11580085754394531, -0.11230087280273438, -0.10880088806152344, -0.1053009033203125, -0.10180091857910156, -0.09830093383789062, -0.09480094909667969, -0.09130096435546875, -0.08780097961425781, -0.08430099487304688, -0.08080101013183594, -0.077301025390625, -0.07380104064941406, -0.07030105590820312, -0.06680107116699219, -0.06330108642578125, -0.05980110168457031, -0.056301116943359375, -0.05280113220214844, -0.0493011474609375, -0.04580116271972656, -0.042301177978515625, -0.03880119323730469, -0.03530120849609375, -0.03180122375488281, -0.028301239013671875, -0.024801254272460938, -0.02130126953125, -0.017801284790039062, -0.014301300048828125, -0.010801315307617188, -0.00730133056640625, -0.0038013458251953125, -0.000301361083984375, 0.0031986236572265625, 0.0066986083984375, 0.010198593139648438, 0.013698577880859375, 0.017198562622070312, 0.02069854736328125, 0.024198532104492188, 0.027698516845703125, 0.031198501586914062, 0.034698486328125, 0.03819847106933594, 0.041698455810546875, 0.04519844055175781, 0.04869842529296875, 0.05219841003417969, 0.055698394775390625, 0.05919837951660156, 0.0626983642578125, 0.06619834899902344, 0.06969833374023438, 0.07319831848144531, 0.07669830322265625, 0.08019828796386719, 0.08369827270507812, 0.08719825744628906, 0.0906982421875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 7.0, 12.0, 9.0, 10.0, 35.0, 41.0, 143.0, 2986.0, 652.0, 98.0, 32.0, 19.0, 11.0, 6.0, 5.0, 2.0, 3.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0216522216796875, -0.02081465721130371, -0.019977092742919922, -0.019139528274536133, -0.018301963806152344, -0.017464399337768555, -0.016626834869384766, -0.015789270401000977, -0.014951705932617188, -0.014114141464233398, -0.01327657699584961, -0.01243901252746582, -0.011601448059082031, -0.010763883590698242, -0.009926319122314453, -0.009088754653930664, -0.008251190185546875, -0.007413625717163086, -0.006576061248779297, -0.005738496780395508, -0.004900932312011719, -0.00406336784362793, -0.0032258033752441406, -0.0023882389068603516, -0.0015506744384765625, -0.0007131099700927734, 0.00012445449829101562, 0.0009620189666748047, 0.0017995834350585938, 0.002637147903442383, 0.003474712371826172, 0.004312276840209961, 0.00514984130859375, 0.005987405776977539, 0.006824970245361328, 0.007662534713745117, 0.008500099182128906, 0.009337663650512695, 0.010175228118896484, 0.011012792587280273, 0.011850357055664062, 0.012687921524047852, 0.01352548599243164, 0.01436305046081543, 0.015200614929199219, 0.016038179397583008, 0.016875743865966797, 0.017713308334350586, 0.018550872802734375, 0.019388437271118164, 0.020226001739501953, 0.021063566207885742, 0.02190113067626953, 0.02273869514465332, 0.02357625961303711, 0.0244138240814209, 0.025251388549804688, 0.026088953018188477, 0.026926517486572266, 0.027764081954956055, 0.028601646423339844, 0.029439210891723633, 0.030276775360107422, 0.03111433982849121, 0.031951904296875]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 7.0, 21.0, 39.0, 56.0, 134.0, 243.0, 249.0, 126.0, 59.0, 27.0, 14.0, 10.0, 7.0, 2.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07193657755851746, -0.06998924911022186, -0.06804192066192627, -0.06609459966421127, -0.06414727121591568, -0.06219994276762009, -0.06025261431932449, -0.0583052858710289, -0.056357961148023605, -0.05441063269972801, -0.05246330797672272, -0.050515979528427124, -0.04856865108013153, -0.046621326357126236, -0.04467399790883064, -0.04272667318582535, -0.040779344737529755, -0.03883201628923416, -0.03688469156622887, -0.03493736311793327, -0.03299003839492798, -0.031042709946632385, -0.029095381498336792, -0.027148054912686348, -0.025200728327035904, -0.02325340174138546, -0.021306075155735016, -0.019358746707439423, -0.01741142012178898, -0.015464093536138535, -0.013516766019165516, -0.011569438502192497, -0.009622111916542053, -0.007674784865230322, -0.0057274578139185905, -0.003780130762606859, -0.0018328037112951279, 0.00011452287435531616, 0.002061850391328335, 0.0040091779083013535, 0.0059565044939517975, 0.007903831079602242, 0.00985115859657526, 0.011798486113548279, 0.013745812699198723, 0.015693139284849167, 0.01764046773314476, 0.019587794318795204, 0.021535120904445648, 0.023482447490096092, 0.025429774075746536, 0.02737710252404213, 0.029324429109692574, 0.03127175569534302, 0.03321908414363861, 0.035166412591934204, 0.0371137373149395, 0.03906106576323509, 0.04100839048624039, 0.04295571893453598, 0.044903047382831573, 0.04685037210583687, 0.04879770055413246, 0.050745025277137756, 0.05269235372543335]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 2.0, 11.0, 5.0, 12.0, 18.0, 11.0, 13.0, 16.0, 25.0, 37.0, 35.0, 34.0, 40.0, 41.0, 45.0, 44.0, 45.0, 62.0, 48.0, 51.0, 52.0, 49.0, 42.0, 39.0, 32.0, 31.0, 21.0, 23.0, 25.0, 17.0, 17.0, 13.0, 13.0, 8.0, 8.0, 5.0, 4.0, 5.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.046367108821868896, -0.045011602342128754, -0.04365609586238861, -0.04230058938264847, -0.040945082902908325, -0.03958957642316818, -0.03823406994342804, -0.0368785634636879, -0.035523056983947754, -0.03416755050420761, -0.03281204402446747, -0.031456537544727325, -0.030101031064987183, -0.02874552458524704, -0.027390018105506897, -0.026034511625766754, -0.02467900514602661, -0.02332349866628647, -0.021967992186546326, -0.020612485706806183, -0.01925697922706604, -0.017901472747325897, -0.016545966267585754, -0.015190459787845612, -0.013834953308105469, -0.012479446828365326, -0.011123940348625183, -0.00976843386888504, -0.008412927389144897, -0.007057420909404755, -0.005701914429664612, -0.004346407949924469, -0.002990901470184326, -0.0016353949904441833, -0.00027988851070404053, 0.0010756179690361023, 0.002431124448776245, 0.003786630928516388, 0.005142137408256531, 0.006497643887996674, 0.007853150367736816, 0.00920865684747696, 0.010564163327217102, 0.011919669806957245, 0.013275176286697388, 0.01463068276643753, 0.015986189246177673, 0.017341695725917816, 0.01869720220565796, 0.020052708685398102, 0.021408215165138245, 0.022763721644878387, 0.02411922812461853, 0.025474734604358673, 0.026830241084098816, 0.02818574756383896, 0.0295412540435791, 0.030896760523319244, 0.03225226700305939, 0.03360777348279953, 0.03496327996253967, 0.036318786442279816, 0.03767429292201996, 0.0390297994017601, 0.040385305881500244]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 0.0, 8.0, 3.0, 9.0, 9.0, 12.0, 9.0, 14.0, 25.0, 33.0, 46.0, 66.0, 105.0, 114.0, 156.0, 220.0, 294.0, 367.0, 504.0, 890.0, 1337.0, 2194.0, 3681.0, 6686.0, 14010.0, 31673.0, 88374.0, 415552.0, 345153.0, 79006.0, 29026.0, 12773.0, 6595.0, 3535.0, 1960.0, 1244.0, 844.0, 589.0, 394.0, 290.0, 192.0, 155.0, 107.0, 71.0, 51.0, 45.0, 39.0, 37.0, 19.0, 15.0, 12.0, 8.0, 5.0, 3.0, 6.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.06024169921875, -0.058356285095214844, -0.05647087097167969, -0.05458545684814453, -0.052700042724609375, -0.05081462860107422, -0.04892921447753906, -0.047043800354003906, -0.04515838623046875, -0.043272972106933594, -0.04138755798339844, -0.03950214385986328, -0.037616729736328125, -0.03573131561279297, -0.03384590148925781, -0.031960487365722656, -0.0300750732421875, -0.028189659118652344, -0.026304244995117188, -0.02441883087158203, -0.022533416748046875, -0.02064800262451172, -0.018762588500976562, -0.016877174377441406, -0.01499176025390625, -0.013106346130371094, -0.011220932006835938, -0.009335517883300781, -0.007450103759765625, -0.005564689636230469, -0.0036792755126953125, -0.0017938613891601562, 9.1552734375e-05, 0.0019769668579101562, 0.0038623809814453125, 0.005747795104980469, 0.007633209228515625, 0.009518623352050781, 0.011404037475585938, 0.013289451599121094, 0.01517486572265625, 0.017060279846191406, 0.018945693969726562, 0.02083110809326172, 0.022716522216796875, 0.02460193634033203, 0.026487350463867188, 0.028372764587402344, 0.0302581787109375, 0.032143592834472656, 0.03402900695800781, 0.03591442108154297, 0.037799835205078125, 0.03968524932861328, 0.04157066345214844, 0.043456077575683594, 0.04534149169921875, 0.047226905822753906, 0.04911231994628906, 0.05099773406982422, 0.052883148193359375, 0.05476856231689453, 0.05665397644042969, 0.058539390563964844, 0.0604248046875]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 0.0, 2.0, 1.0, 12.0, 14.0, 22.0, 21.0, 25.0, 46.0, 55.0, 91.0, 95.0, 130.0, 114.0, 70.0, 81.0, 68.0, 41.0, 30.0, 16.0, 23.0, 12.0, 7.0, 5.0, 4.0, 5.0, 5.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0261383056640625, -0.02539229393005371, -0.024646282196044922, -0.023900270462036133, -0.023154258728027344, -0.022408246994018555, -0.021662235260009766, -0.020916223526000977, -0.020170211791992188, -0.0194242000579834, -0.01867818832397461, -0.01793217658996582, -0.01718616485595703, -0.016440153121948242, -0.015694141387939453, -0.014948129653930664, -0.014202117919921875, -0.013456106185913086, -0.012710094451904297, -0.011964082717895508, -0.011218070983886719, -0.01047205924987793, -0.00972604751586914, -0.008980035781860352, -0.008234024047851562, -0.0074880123138427734, -0.006742000579833984, -0.005995988845825195, -0.005249977111816406, -0.004503965377807617, -0.003757953643798828, -0.003011941909790039, -0.00226593017578125, -0.001519918441772461, -0.0007739067077636719, -2.7894973754882812e-05, 0.0007181167602539062, 0.0014641284942626953, 0.0022101402282714844, 0.0029561519622802734, 0.0037021636962890625, 0.0044481754302978516, 0.005194187164306641, 0.00594019889831543, 0.006686210632324219, 0.007432222366333008, 0.008178234100341797, 0.008924245834350586, 0.009670257568359375, 0.010416269302368164, 0.011162281036376953, 0.011908292770385742, 0.012654304504394531, 0.01340031623840332, 0.01414632797241211, 0.014892339706420898, 0.015638351440429688, 0.016384363174438477, 0.017130374908447266, 0.017876386642456055, 0.018622398376464844, 0.019368410110473633, 0.020114421844482422, 0.02086043357849121, 0.0216064453125]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 6.0, 2.0, 12.0, 7.0, 14.0, 21.0, 27.0, 23.0, 26.0, 38.0, 56.0, 98.0, 214.0, 477.0, 1262.0, 3900.0, 18188.0, 178131.0, 773988.0, 60068.0, 8365.0, 2129.0, 743.0, 301.0, 148.0, 78.0, 41.0, 31.0, 21.0, 25.0, 20.0, 12.0, 17.0, 6.0, 8.0, 16.0, 7.0, 6.0, 6.0, 3.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.134033203125, -0.12974929809570312, -0.12546539306640625, -0.12118148803710938, -0.1168975830078125, -0.11261367797851562, -0.10832977294921875, -0.10404586791992188, -0.099761962890625, -0.09547805786132812, -0.09119415283203125, -0.08691024780273438, -0.0826263427734375, -0.07834243774414062, -0.07405853271484375, -0.06977462768554688, -0.06549072265625, -0.061206817626953125, -0.05692291259765625, -0.052639007568359375, -0.0483551025390625, -0.044071197509765625, -0.03978729248046875, -0.035503387451171875, -0.031219482421875, -0.026935577392578125, -0.02265167236328125, -0.018367767333984375, -0.0140838623046875, -0.009799957275390625, -0.00551605224609375, -0.001232147216796875, 0.0030517578125, 0.007335662841796875, 0.01161956787109375, 0.015903472900390625, 0.0201873779296875, 0.024471282958984375, 0.02875518798828125, 0.033039093017578125, 0.037322998046875, 0.041606903076171875, 0.04589080810546875, 0.050174713134765625, 0.0544586181640625, 0.058742523193359375, 0.06302642822265625, 0.06731033325195312, 0.07159423828125, 0.07587814331054688, 0.08016204833984375, 0.08444595336914062, 0.0887298583984375, 0.09301376342773438, 0.09729766845703125, 0.10158157348632812, 0.105865478515625, 0.11014938354492188, 0.11443328857421875, 0.11871719360351562, 0.1230010986328125, 0.12728500366210938, 0.13156890869140625, 0.13585281372070312, 0.14013671875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 4.0, 3.0, 3.0, 7.0, 6.0, 6.0, 15.0, 10.0, 6.0, 16.0, 13.0, 19.0, 24.0, 21.0, 18.0, 25.0, 32.0, 36.0, 40.0, 37.0, 43.0, 53.0, 44.0, 52.0, 45.0, 51.0, 42.0, 35.0, 33.0, 36.0, 28.0, 30.0, 28.0, 27.0, 20.0, 26.0, 22.0, 15.0, 6.0, 12.0, 2.0, 6.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.065185546875, -0.06319332122802734, -0.06120109558105469, -0.05920886993408203, -0.057216644287109375, -0.05522441864013672, -0.05323219299316406, -0.051239967346191406, -0.04924774169921875, -0.047255516052246094, -0.04526329040527344, -0.04327106475830078, -0.041278839111328125, -0.03928661346435547, -0.03729438781738281, -0.035302162170410156, -0.0333099365234375, -0.031317710876464844, -0.029325485229492188, -0.02733325958251953, -0.025341033935546875, -0.02334880828857422, -0.021356582641601562, -0.019364356994628906, -0.01737213134765625, -0.015379905700683594, -0.013387680053710938, -0.011395454406738281, -0.009403228759765625, -0.007411003112792969, -0.0054187774658203125, -0.0034265518188476562, -0.001434326171875, 0.0005578994750976562, 0.0025501251220703125, 0.004542350769042969, 0.006534576416015625, 0.008526802062988281, 0.010519027709960938, 0.012511253356933594, 0.01450347900390625, 0.016495704650878906, 0.018487930297851562, 0.02048015594482422, 0.022472381591796875, 0.02446460723876953, 0.026456832885742188, 0.028449058532714844, 0.0304412841796875, 0.032433509826660156, 0.03442573547363281, 0.03641796112060547, 0.038410186767578125, 0.04040241241455078, 0.04239463806152344, 0.044386863708496094, 0.04637908935546875, 0.048371315002441406, 0.05036354064941406, 0.05235576629638672, 0.054347991943359375, 0.05634021759033203, 0.05833244323730469, 0.060324668884277344, 0.06231689453125]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 4.0, 6.0, 6.0, 12.0, 18.0, 25.0, 24.0, 35.0, 58.0, 70.0, 125.0, 202.0, 338.0, 667.0, 1312.0, 3884.0, 18710.0, 240376.0, 731101.0, 41167.0, 6549.0, 1944.0, 813.0, 430.0, 239.0, 149.0, 86.0, 53.0, 36.0, 30.0, 20.0, 12.0, 10.0, 13.0, 9.0, 3.0, 5.0, 6.0, 1.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01425933837890625, -0.013829708099365234, -0.013400077819824219, -0.012970447540283203, -0.012540817260742188, -0.012111186981201172, -0.011681556701660156, -0.01125192642211914, -0.010822296142578125, -0.01039266586303711, -0.009963035583496094, -0.009533405303955078, -0.009103775024414062, -0.008674144744873047, -0.008244514465332031, -0.007814884185791016, -0.00738525390625, -0.006955623626708984, -0.006525993347167969, -0.006096363067626953, -0.0056667327880859375, -0.005237102508544922, -0.004807472229003906, -0.004377841949462891, -0.003948211669921875, -0.0035185813903808594, -0.0030889511108398438, -0.002659320831298828, -0.0022296905517578125, -0.0018000602722167969, -0.0013704299926757812, -0.0009407997131347656, -0.00051116943359375, -8.153915405273438e-05, 0.00034809112548828125, 0.0007777214050292969, 0.0012073516845703125, 0.0016369819641113281, 0.0020666122436523438, 0.0024962425231933594, 0.002925872802734375, 0.0033555030822753906, 0.0037851333618164062, 0.004214763641357422, 0.0046443939208984375, 0.005074024200439453, 0.005503654479980469, 0.005933284759521484, 0.0063629150390625, 0.006792545318603516, 0.007222175598144531, 0.007651805877685547, 0.008081436157226562, 0.008511066436767578, 0.008940696716308594, 0.00937032699584961, 0.009799957275390625, 0.01022958755493164, 0.010659217834472656, 0.011088848114013672, 0.011518478393554688, 0.011948108673095703, 0.012377738952636719, 0.012807369232177734, 0.01323699951171875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 0.0, 7.0, 10.0, 8.0, 1.0, 12.0, 16.0, 16.0, 20.0, 32.0, 30.0, 46.0, 40.0, 52.0, 53.0, 58.0, 68.0, 29.0, 56.0, 68.0, 53.0, 56.0, 48.0, 42.0, 37.0, 36.0, 23.0, 17.0, 15.0, 17.0, 7.0, 7.0, 11.0, 1.0, 7.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-4.410743713378906e-06, -4.296191036701202e-06, -4.1816383600234985e-06, -4.067085683345795e-06, -3.952533006668091e-06, -3.837980329990387e-06, -3.723427653312683e-06, -3.6088749766349792e-06, -3.4943222999572754e-06, -3.3797696232795715e-06, -3.2652169466018677e-06, -3.150664269924164e-06, -3.03611159324646e-06, -2.921558916568756e-06, -2.8070062398910522e-06, -2.6924535632133484e-06, -2.5779008865356445e-06, -2.4633482098579407e-06, -2.348795533180237e-06, -2.234242856502533e-06, -2.119690179824829e-06, -2.0051375031471252e-06, -1.8905848264694214e-06, -1.7760321497917175e-06, -1.6614794731140137e-06, -1.5469267964363098e-06, -1.432374119758606e-06, -1.317821443080902e-06, -1.2032687664031982e-06, -1.0887160897254944e-06, -9.741634130477905e-07, -8.596107363700867e-07, -7.450580596923828e-07, -6.30505383014679e-07, -5.159527063369751e-07, -4.0140002965927124e-07, -2.868473529815674e-07, -1.7229467630386353e-07, -5.774199962615967e-08, 5.681067705154419e-08, 1.7136335372924805e-07, 2.859160304069519e-07, 4.0046870708465576e-07, 5.150213837623596e-07, 6.295740604400635e-07, 7.441267371177673e-07, 8.586794137954712e-07, 9.73232090473175e-07, 1.087784767150879e-06, 1.2023374438285828e-06, 1.3168901205062866e-06, 1.4314427971839905e-06, 1.5459954738616943e-06, 1.6605481505393982e-06, 1.775100827217102e-06, 1.889653503894806e-06, 2.0042061805725098e-06, 2.1187588572502136e-06, 2.2333115339279175e-06, 2.3478642106056213e-06, 2.462416887283325e-06, 2.576969563961029e-06, 2.691522240638733e-06, 2.8060749173164368e-06, 2.9206275939941406e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 5.0, 13.0, 9.0, 11.0, 23.0, 27.0, 48.0, 82.0, 248.0, 615.0, 2660.0, 23430.0, 915221.0, 98891.0, 5533.0, 1107.0, 314.0, 127.0, 77.0, 43.0, 15.0, 17.0, 12.0, 6.0, 3.0, 5.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0214691162109375, -0.020600080490112305, -0.01973104476928711, -0.018862009048461914, -0.01799297332763672, -0.017123937606811523, -0.016254901885986328, -0.015385866165161133, -0.014516830444335938, -0.013647794723510742, -0.012778759002685547, -0.011909723281860352, -0.011040687561035156, -0.010171651840209961, -0.009302616119384766, -0.00843358039855957, -0.007564544677734375, -0.00669550895690918, -0.005826473236083984, -0.004957437515258789, -0.004088401794433594, -0.0032193660736083984, -0.002350330352783203, -0.0014812946319580078, -0.0006122589111328125, 0.0002567768096923828, 0.0011258125305175781, 0.0019948482513427734, 0.0028638839721679688, 0.003732919692993164, 0.004601955413818359, 0.005470991134643555, 0.00634002685546875, 0.007209062576293945, 0.00807809829711914, 0.008947134017944336, 0.009816169738769531, 0.010685205459594727, 0.011554241180419922, 0.012423276901245117, 0.013292312622070312, 0.014161348342895508, 0.015030384063720703, 0.0158994197845459, 0.016768455505371094, 0.01763749122619629, 0.018506526947021484, 0.01937556266784668, 0.020244598388671875, 0.02111363410949707, 0.021982669830322266, 0.02285170555114746, 0.023720741271972656, 0.02458977699279785, 0.025458812713623047, 0.026327848434448242, 0.027196884155273438, 0.028065919876098633, 0.028934955596923828, 0.029803991317749023, 0.03067302703857422, 0.031542062759399414, 0.03241109848022461, 0.033280134201049805, 0.034149169921875]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 3.0, 5.0, 3.0, 5.0, 8.0, 16.0, 12.0, 27.0, 39.0, 38.0, 47.0, 66.0, 87.0, 125.0, 102.0, 86.0, 86.0, 62.0, 58.0, 26.0, 27.0, 19.0, 15.0, 9.0, 7.0, 12.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.0158538818359375, -0.015450835227966309, -0.015047788619995117, -0.014644742012023926, -0.014241695404052734, -0.013838648796081543, -0.013435602188110352, -0.01303255558013916, -0.012629508972167969, -0.012226462364196777, -0.011823415756225586, -0.011420369148254395, -0.011017322540283203, -0.010614275932312012, -0.01021122932434082, -0.009808182716369629, -0.009405136108398438, -0.009002089500427246, -0.008599042892456055, -0.008195996284484863, -0.007792949676513672, -0.0073899030685424805, -0.006986856460571289, -0.006583809852600098, -0.006180763244628906, -0.005777716636657715, -0.0053746700286865234, -0.004971623420715332, -0.004568576812744141, -0.004165530204772949, -0.003762483596801758, -0.0033594369888305664, -0.002956390380859375, -0.0025533437728881836, -0.002150297164916992, -0.0017472505569458008, -0.0013442039489746094, -0.000941157341003418, -0.0005381107330322266, -0.00013506412506103516, 0.00026798248291015625, 0.0006710290908813477, 0.001074075698852539, 0.0014771223068237305, 0.0018801689147949219, 0.0022832155227661133, 0.0026862621307373047, 0.003089308738708496, 0.0034923553466796875, 0.003895401954650879, 0.00429844856262207, 0.004701495170593262, 0.005104541778564453, 0.0055075883865356445, 0.005910634994506836, 0.006313681602478027, 0.006716728210449219, 0.00711977481842041, 0.0075228214263916016, 0.007925868034362793, 0.008328914642333984, 0.008731961250305176, 0.009135007858276367, 0.009538054466247559, 0.00994110107421875]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 9.0, 28.0, 54.0, 153.0, 475.0, 176.0, 67.0, 24.0, 10.0, 7.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.284898281097412, -1.2566673755645752, -1.2284364700317383, -1.2002054452896118, -1.171974539756775, -1.143743634223938, -1.115512728691101, -1.0872818231582642, -1.0590507984161377, -1.0308198928833008, -1.0025889873504639, -0.9743580222129822, -0.9461270570755005, -0.9178961515426636, -0.8896652460098267, -0.861434280872345, -0.8332034349441528, -0.8049725294113159, -0.7767415642738342, -0.7485106587409973, -0.7202796936035156, -0.6920487880706787, -0.6638178825378418, -0.6355869174003601, -0.6073559522628784, -0.5791250467300415, -0.5508940815925598, -0.5226631760597229, -0.4944322407245636, -0.4662013053894043, -0.437970370054245, -0.4097394347190857, -0.3815085291862488, -0.3532775938510895, -0.3250466585159302, -0.29681575298309326, -0.26858481764793396, -0.24035388231277466, -0.21212294697761536, -0.18389202654361725, -0.15566109120845795, -0.12743015587329865, -0.09919923543930054, -0.07096830010414124, -0.04273737221956253, -0.014506444334983826, 0.013724491000175476, 0.041955411434173584, 0.07018634676933289, 0.09841727465391159, 0.1266482025384903, 0.1548791378736496, 0.1831100583076477, 0.211340993642807, 0.2395719289779663, 0.2678028345108032, 0.2960337996482849, 0.3242647349834442, 0.3524956703186035, 0.38072657585144043, 0.40895751118659973, 0.43718844652175903, 0.46541938185691833, 0.49365031719207764, 0.5218812227249146]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 3.0, 5.0, 3.0, 4.0, 4.0, 12.0, 13.0, 26.0, 32.0, 27.0, 33.0, 48.0, 54.0, 56.0, 65.0, 69.0, 73.0, 63.0, 56.0, 50.0, 40.0, 50.0, 41.0, 29.0, 34.0, 28.0, 17.0, 21.0, 18.0, 8.0, 8.0, 4.0, 5.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.47862690687179565, -0.4630107283592224, -0.44739454984664917, -0.4317783713340759, -0.4161621928215027, -0.40054601430892944, -0.3849298357963562, -0.36931365728378296, -0.3536974787712097, -0.3380813002586365, -0.32246512174606323, -0.30684894323349, -0.29123276472091675, -0.2756165862083435, -0.26000040769577026, -0.24438422918319702, -0.22876806557178497, -0.21315188705921173, -0.1975357085466385, -0.18191953003406525, -0.166303351521492, -0.15068718791007996, -0.1350710093975067, -0.11945482343435287, -0.10383864492177963, -0.08822246640920639, -0.07260628789663315, -0.056990113109350204, -0.04137393459677696, -0.02575775980949402, -0.010141581296920776, 0.005474597215652466, 0.021090775728225708, 0.03670695424079895, 0.05232313275337219, 0.06793931126594543, 0.08355548977851868, 0.09917166084051132, 0.11478783935308456, 0.1304040253162384, 0.14602020382881165, 0.1616363823413849, 0.17725256085395813, 0.19286873936653137, 0.20848491787910461, 0.22410109639167786, 0.2397172749042511, 0.25533345341682434, 0.2709496021270752, 0.28656578063964844, 0.3021819591522217, 0.3177981376647949, 0.33341431617736816, 0.3490304946899414, 0.36464667320251465, 0.3802628517150879, 0.39587903022766113, 0.4114952087402344, 0.4271113872528076, 0.44272756576538086, 0.4583437442779541, 0.47395992279052734, 0.4895761013031006, 0.5051922798156738, 0.5208084583282471]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 3.0, 7.0, 5.0, 4.0, 12.0, 6.0, 12.0, 13.0, 31.0, 35.0, 51.0, 73.0, 97.0, 152.0, 276.0, 519.0, 964.0, 2187.0, 5986.0, 28856.0, 4094453.0, 46925.0, 7932.0, 2771.0, 1266.0, 594.0, 371.0, 205.0, 160.0, 94.0, 66.0, 44.0, 34.0, 17.0, 20.0, 13.0, 9.0, 8.0, 5.0, 6.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.12066650390625, -0.11717891693115234, -0.11369132995605469, -0.11020374298095703, -0.10671615600585938, -0.10322856903076172, -0.09974098205566406, -0.0962533950805664, -0.09276580810546875, -0.0892782211303711, -0.08579063415527344, -0.08230304718017578, -0.07881546020507812, -0.07532787322998047, -0.07184028625488281, -0.06835269927978516, -0.0648651123046875, -0.061377525329589844, -0.05788993835449219, -0.05440235137939453, -0.050914764404296875, -0.04742717742919922, -0.04393959045410156, -0.040452003479003906, -0.03696441650390625, -0.033476829528808594, -0.029989242553710938, -0.02650165557861328, -0.023014068603515625, -0.01952648162841797, -0.016038894653320312, -0.012551307678222656, -0.009063720703125, -0.005576133728027344, -0.0020885467529296875, 0.0013990402221679688, 0.004886627197265625, 0.008374214172363281, 0.011861801147460938, 0.015349388122558594, 0.01883697509765625, 0.022324562072753906, 0.025812149047851562, 0.02929973602294922, 0.032787322998046875, 0.03627490997314453, 0.03976249694824219, 0.043250083923339844, 0.0467376708984375, 0.050225257873535156, 0.05371284484863281, 0.05720043182373047, 0.060688018798828125, 0.06417560577392578, 0.06766319274902344, 0.0711507797241211, 0.07463836669921875, 0.0781259536743164, 0.08161354064941406, 0.08510112762451172, 0.08858871459960938, 0.09207630157470703, 0.09556388854980469, 0.09905147552490234, 0.1025390625]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 1.0, 3.0, 1.0, 7.0, 12.0, 12.0, 25.0, 24.0, 39.0, 52.0, 67.0, 111.0, 106.0, 105.0, 82.0, 80.0, 76.0, 51.0, 38.0, 35.0, 18.0, 17.0, 5.0, 10.0, 4.0, 7.0, 2.0, 5.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.025115966796875, -0.024390697479248047, -0.023665428161621094, -0.02294015884399414, -0.022214889526367188, -0.021489620208740234, -0.02076435089111328, -0.020039081573486328, -0.019313812255859375, -0.018588542938232422, -0.01786327362060547, -0.017138004302978516, -0.016412734985351562, -0.01568746566772461, -0.014962196350097656, -0.014236927032470703, -0.01351165771484375, -0.012786388397216797, -0.012061119079589844, -0.01133584976196289, -0.010610580444335938, -0.009885311126708984, -0.009160041809082031, -0.008434772491455078, -0.007709503173828125, -0.006984233856201172, -0.006258964538574219, -0.005533695220947266, -0.0048084259033203125, -0.004083156585693359, -0.0033578872680664062, -0.002632617950439453, -0.0019073486328125, -0.0011820793151855469, -0.00045680999755859375, 0.0002684593200683594, 0.0009937286376953125, 0.0017189979553222656, 0.0024442672729492188, 0.003169536590576172, 0.003894805908203125, 0.004620075225830078, 0.005345344543457031, 0.006070613861083984, 0.0067958831787109375, 0.007521152496337891, 0.008246421813964844, 0.008971691131591797, 0.00969696044921875, 0.010422229766845703, 0.011147499084472656, 0.01187276840209961, 0.012598037719726562, 0.013323307037353516, 0.014048576354980469, 0.014773845672607422, 0.015499114990234375, 0.016224384307861328, 0.01694965362548828, 0.017674922943115234, 0.018400192260742188, 0.01912546157836914, 0.019850730895996094, 0.020576000213623047, 0.02130126953125]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 7.0, 4.0, 7.0, 7.0, 12.0, 15.0, 27.0, 40.0, 64.0, 134.0, 317.0, 680.0, 1827.0, 5833.0, 38584.0, 4087942.0, 48086.0, 7140.0, 2055.0, 774.0, 364.0, 147.0, 77.0, 44.0, 33.0, 20.0, 9.0, 13.0, 4.0, 2.0, 4.0, 2.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.1397705078125, -0.13583660125732422, -0.13190269470214844, -0.12796878814697266, -0.12403488159179688, -0.1201009750366211, -0.11616706848144531, -0.11223316192626953, -0.10829925537109375, -0.10436534881591797, -0.10043144226074219, -0.0964975357055664, -0.09256362915039062, -0.08862972259521484, -0.08469581604003906, -0.08076190948486328, -0.0768280029296875, -0.07289409637451172, -0.06896018981933594, -0.06502628326416016, -0.061092376708984375, -0.057158470153808594, -0.05322456359863281, -0.04929065704345703, -0.04535675048828125, -0.04142284393310547, -0.03748893737792969, -0.033555030822753906, -0.029621124267578125, -0.025687217712402344, -0.021753311157226562, -0.01781940460205078, -0.013885498046875, -0.009951591491699219, -0.0060176849365234375, -0.0020837783813476562, 0.001850128173828125, 0.005784034729003906, 0.009717941284179688, 0.013651847839355469, 0.01758575439453125, 0.02151966094970703, 0.025453567504882812, 0.029387474060058594, 0.033321380615234375, 0.037255287170410156, 0.04118919372558594, 0.04512310028076172, 0.0490570068359375, 0.05299091339111328, 0.05692481994628906, 0.060858726501464844, 0.06479263305664062, 0.0687265396118164, 0.07266044616699219, 0.07659435272216797, 0.08052825927734375, 0.08446216583251953, 0.08839607238769531, 0.0923299789428711, 0.09626388549804688, 0.10019779205322266, 0.10413169860839844, 0.10806560516357422, 0.11199951171875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 8.0, 9.0, 3.0, 10.0, 9.0, 24.0, 26.0, 41.0, 71.0, 336.0, 3017.0, 287.0, 70.0, 33.0, 33.0, 20.0, 16.0, 13.0, 7.0, 6.0, 5.0, 6.0, 4.0, 1.0, 3.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.031402587890625, -0.030298233032226562, -0.029193878173828125, -0.028089523315429688, -0.02698516845703125, -0.025880813598632812, -0.024776458740234375, -0.023672103881835938, -0.0225677490234375, -0.021463394165039062, -0.020359039306640625, -0.019254684448242188, -0.01815032958984375, -0.017045974731445312, -0.015941619873046875, -0.014837265014648438, -0.01373291015625, -0.012628555297851562, -0.011524200439453125, -0.010419845581054688, -0.00931549072265625, -0.008211135864257812, -0.007106781005859375, -0.0060024261474609375, -0.0048980712890625, -0.0037937164306640625, -0.002689361572265625, -0.0015850067138671875, -0.00048065185546875, 0.0006237030029296875, 0.001728057861328125, 0.0028324127197265625, 0.003936767578125, 0.0050411224365234375, 0.006145477294921875, 0.0072498321533203125, 0.00835418701171875, 0.009458541870117188, 0.010562896728515625, 0.011667251586914062, 0.0127716064453125, 0.013875961303710938, 0.014980316162109375, 0.016084671020507812, 0.01718902587890625, 0.018293380737304688, 0.019397735595703125, 0.020502090454101562, 0.0216064453125, 0.022710800170898438, 0.023815155029296875, 0.024919509887695312, 0.02602386474609375, 0.027128219604492188, 0.028232574462890625, 0.029336929321289062, 0.0304412841796875, 0.03154563903808594, 0.032649993896484375, 0.03375434875488281, 0.03485870361328125, 0.03596305847167969, 0.037067413330078125, 0.03817176818847656, 0.039276123046875]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 5.0, 8.0, 18.0, 67.0, 268.0, 461.0, 119.0, 34.0, 15.0, 5.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.35146594047546387, -0.342714786529541, -0.33396366238594055, -0.3252125084400177, -0.31646138429641724, -0.3077102303504944, -0.29895907640457153, -0.29020795226097107, -0.2814567983150482, -0.27270564436912537, -0.2639545202255249, -0.25520336627960205, -0.2464522272348404, -0.23770108819007874, -0.22894993424415588, -0.22019879519939423, -0.21144765615463257, -0.2026965171098709, -0.19394537806510925, -0.1851942241191864, -0.17644308507442474, -0.16769194602966309, -0.15894079208374023, -0.15018965303897858, -0.14143851399421692, -0.13268737494945526, -0.123936228454113, -0.11518508195877075, -0.1064339429140091, -0.09768280386924744, -0.08893165737390518, -0.08018051087856293, -0.07142934203147888, -0.06267820298671722, -0.05392705649137497, -0.04517591372132301, -0.03642477095127106, -0.0276736281812191, -0.018922485411167145, -0.010171342641115189, -0.0014201998710632324, 0.007330942898988724, 0.01608208566904068, 0.024833228439092636, 0.03358437120914459, 0.04233551397919655, 0.051086656749248505, 0.05983779951930046, 0.06858894228935242, 0.07734008133411407, 0.08609122782945633, 0.09484237432479858, 0.10359351336956024, 0.1123446524143219, 0.12109579890966415, 0.1298469454050064, 0.13859808444976807, 0.14734922349452972, 0.15610036253929138, 0.16485151648521423, 0.1736026555299759, 0.18235379457473755, 0.1911049485206604, 0.19985608756542206, 0.20860722661018372]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 4.0, 4.0, 4.0, 6.0, 14.0, 25.0, 38.0, 41.0, 58.0, 50.0, 61.0, 75.0, 80.0, 78.0, 84.0, 71.0, 71.0, 59.0, 44.0, 50.0, 27.0, 17.0, 17.0, 10.0, 6.0, 3.0, 3.0, 3.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.15579360723495483, -0.1510552167892456, -0.14631682634353638, -0.14157843589782715, -0.13684004545211792, -0.1321016550064087, -0.12736326456069946, -0.12262488156557083, -0.1178864911198616, -0.11314810067415237, -0.10840971022844315, -0.10367131978273392, -0.09893293678760529, -0.09419454634189606, -0.08945615589618683, -0.0847177654504776, -0.07997937500476837, -0.07524098455905914, -0.07050259411334991, -0.06576420366764069, -0.061025816947221756, -0.05628742650151253, -0.0515490397810936, -0.04681064933538437, -0.04207225888967514, -0.03733386844396591, -0.03259547799825668, -0.027857091277837753, -0.023118700832128525, -0.018380310386419296, -0.013641921803355217, -0.008903533220291138, -0.004165142774581909, 0.0005732467398047447, 0.005311636254191399, 0.010050025768578053, 0.014788415282964706, 0.019526805728673935, 0.024265194311738014, 0.029003582894802094, 0.03374197334051132, 0.03848036378622055, 0.04321875423192978, 0.04795714095234871, 0.05269553139805794, 0.057433921843767166, 0.062172308564186096, 0.06691069900989532, 0.07164908945560455, 0.07638747990131378, 0.08112587034702301, 0.08586426079273224, 0.09060265123844147, 0.0953410416841507, 0.10007942467927933, 0.10481781512498856, 0.10955620557069778, 0.11429459601640701, 0.11903298646211624, 0.12377137690782547, 0.1285097599029541, 0.13324815034866333, 0.13798654079437256, 0.1427249312400818, 0.14746332168579102]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 4.0, 7.0, 7.0, 16.0, 26.0, 32.0, 44.0, 56.0, 81.0, 127.0, 153.0, 201.0, 286.0, 372.0, 551.0, 851.0, 1304.0, 1994.0, 3708.0, 7374.0, 16450.0, 44619.0, 172140.0, 584865.0, 142800.0, 39355.0, 15075.0, 6734.0, 3399.0, 1967.0, 1223.0, 824.0, 554.0, 383.0, 265.0, 184.0, 132.0, 116.0, 86.0, 56.0, 57.0, 24.0, 12.0, 9.0, 10.0, 11.0, 8.0, 5.0, 0.0, 2.0, 1.0, 5.0], "bins": [-0.08294677734375, -0.08060646057128906, -0.07826614379882812, -0.07592582702636719, -0.07358551025390625, -0.07124519348144531, -0.06890487670898438, -0.06656455993652344, -0.0642242431640625, -0.06188392639160156, -0.059543609619140625, -0.05720329284667969, -0.05486297607421875, -0.05252265930175781, -0.050182342529296875, -0.04784202575683594, -0.045501708984375, -0.04316139221191406, -0.040821075439453125, -0.03848075866699219, -0.03614044189453125, -0.03380012512207031, -0.031459808349609375, -0.029119491577148438, -0.0267791748046875, -0.024438858032226562, -0.022098541259765625, -0.019758224487304688, -0.01741790771484375, -0.015077590942382812, -0.012737274169921875, -0.010396957397460938, -0.008056640625, -0.0057163238525390625, -0.003376007080078125, -0.0010356903076171875, 0.00130462646484375, 0.0036449432373046875, 0.005985260009765625, 0.008325576782226562, 0.0106658935546875, 0.013006210327148438, 0.015346527099609375, 0.017686843872070312, 0.02002716064453125, 0.022367477416992188, 0.024707794189453125, 0.027048110961914062, 0.029388427734375, 0.03172874450683594, 0.034069061279296875, 0.03640937805175781, 0.03874969482421875, 0.04109001159667969, 0.043430328369140625, 0.04577064514160156, 0.0481109619140625, 0.05045127868652344, 0.052791595458984375, 0.05513191223144531, 0.05747222900390625, 0.05981254577636719, 0.062152862548828125, 0.06449317932128906, 0.06683349609375]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 4.0, 3.0, 2.0, 1.0, 11.0, 8.0, 18.0, 30.0, 24.0, 35.0, 66.0, 93.0, 99.0, 129.0, 86.0, 70.0, 74.0, 84.0, 39.0, 36.0, 27.0, 19.0, 9.0, 9.0, 4.0, 6.0, 4.0, 4.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0], "bins": [-0.024810791015625, -0.024106264114379883, -0.023401737213134766, -0.02269721031188965, -0.02199268341064453, -0.021288156509399414, -0.020583629608154297, -0.01987910270690918, -0.019174575805664062, -0.018470048904418945, -0.017765522003173828, -0.01706099510192871, -0.016356468200683594, -0.015651941299438477, -0.01494741439819336, -0.014242887496948242, -0.013538360595703125, -0.012833833694458008, -0.01212930679321289, -0.011424779891967773, -0.010720252990722656, -0.010015726089477539, -0.009311199188232422, -0.008606672286987305, -0.007902145385742188, -0.00719761848449707, -0.006493091583251953, -0.005788564682006836, -0.005084037780761719, -0.0043795108795166016, -0.0036749839782714844, -0.002970457077026367, -0.00226593017578125, -0.0015614032745361328, -0.0008568763732910156, -0.00015234947204589844, 0.0005521774291992188, 0.001256704330444336, 0.001961231231689453, 0.0026657581329345703, 0.0033702850341796875, 0.004074811935424805, 0.004779338836669922, 0.005483865737915039, 0.006188392639160156, 0.0068929195404052734, 0.007597446441650391, 0.008301973342895508, 0.009006500244140625, 0.009711027145385742, 0.01041555404663086, 0.011120080947875977, 0.011824607849121094, 0.012529134750366211, 0.013233661651611328, 0.013938188552856445, 0.014642715454101562, 0.01534724235534668, 0.016051769256591797, 0.016756296157836914, 0.01746082305908203, 0.01816534996032715, 0.018869876861572266, 0.019574403762817383, 0.0202789306640625]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 2.0, 3.0, 2.0, 5.0, 8.0, 14.0, 21.0, 13.0, 24.0, 20.0, 29.0, 20.0, 35.0, 57.0, 70.0, 182.0, 444.0, 1475.0, 6826.0, 57790.0, 762228.0, 197739.0, 17322.0, 2811.0, 786.0, 251.0, 105.0, 65.0, 44.0, 35.0, 24.0, 16.0, 21.0, 23.0, 11.0, 8.0, 6.0, 9.0, 7.0, 6.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.144287109375, -0.14024925231933594, -0.13621139526367188, -0.1321735382080078, -0.12813568115234375, -0.12409782409667969, -0.12005996704101562, -0.11602210998535156, -0.1119842529296875, -0.10794639587402344, -0.10390853881835938, -0.09987068176269531, -0.09583282470703125, -0.09179496765136719, -0.08775711059570312, -0.08371925354003906, -0.079681396484375, -0.07564353942871094, -0.07160568237304688, -0.06756782531738281, -0.06352996826171875, -0.05949211120605469, -0.055454254150390625, -0.05141639709472656, -0.0473785400390625, -0.04334068298339844, -0.039302825927734375, -0.03526496887207031, -0.03122711181640625, -0.027189254760742188, -0.023151397705078125, -0.019113540649414062, -0.01507568359375, -0.011037826538085938, -0.006999969482421875, -0.0029621124267578125, 0.00107574462890625, 0.0051136016845703125, 0.009151458740234375, 0.013189315795898438, 0.0172271728515625, 0.021265029907226562, 0.025302886962890625, 0.029340744018554688, 0.03337860107421875, 0.03741645812988281, 0.041454315185546875, 0.04549217224121094, 0.049530029296875, 0.05356788635253906, 0.057605743408203125, 0.06164360046386719, 0.06568145751953125, 0.06971931457519531, 0.07375717163085938, 0.07779502868652344, 0.0818328857421875, 0.08587074279785156, 0.08990859985351562, 0.09394645690917969, 0.09798431396484375, 0.10202217102050781, 0.10606002807617188, 0.11009788513183594, 0.1141357421875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 6.0, 9.0, 6.0, 8.0, 11.0, 23.0, 20.0, 16.0, 24.0, 33.0, 37.0, 34.0, 37.0, 36.0, 50.0, 41.0, 41.0, 39.0, 59.0, 47.0, 45.0, 35.0, 43.0, 41.0, 34.0, 40.0, 26.0, 22.0, 26.0, 17.0, 28.0, 12.0, 22.0, 11.0, 8.0, 6.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0518798828125, -0.050042152404785156, -0.04820442199707031, -0.04636669158935547, -0.044528961181640625, -0.04269123077392578, -0.04085350036621094, -0.039015769958496094, -0.03717803955078125, -0.035340309143066406, -0.03350257873535156, -0.03166484832763672, -0.029827117919921875, -0.02798938751220703, -0.026151657104492188, -0.024313926696777344, -0.0224761962890625, -0.020638465881347656, -0.018800735473632812, -0.01696300506591797, -0.015125274658203125, -0.013287544250488281, -0.011449813842773438, -0.009612083435058594, -0.00777435302734375, -0.005936622619628906, -0.0040988922119140625, -0.0022611618041992188, -0.000423431396484375, 0.0014142990112304688, 0.0032520294189453125, 0.005089759826660156, 0.006927490234375, 0.008765220642089844, 0.010602951049804688, 0.012440681457519531, 0.014278411865234375, 0.01611614227294922, 0.017953872680664062, 0.019791603088378906, 0.02162933349609375, 0.023467063903808594, 0.025304794311523438, 0.02714252471923828, 0.028980255126953125, 0.03081798553466797, 0.03265571594238281, 0.034493446350097656, 0.0363311767578125, 0.038168907165527344, 0.04000663757324219, 0.04184436798095703, 0.043682098388671875, 0.04551982879638672, 0.04735755920410156, 0.049195289611816406, 0.05103302001953125, 0.052870750427246094, 0.05470848083496094, 0.05654621124267578, 0.058383941650390625, 0.06022167205810547, 0.06205940246582031, 0.06389713287353516, 0.06573486328125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 4.0, 3.0, 5.0, 4.0, 9.0, 4.0, 7.0, 13.0, 16.0, 27.0, 54.0, 66.0, 122.0, 225.0, 508.0, 1151.0, 3952.0, 22441.0, 385138.0, 596854.0, 30445.0, 4933.0, 1413.0, 530.0, 246.0, 143.0, 83.0, 46.0, 29.0, 24.0, 13.0, 10.0, 11.0, 11.0, 6.0, 2.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.012908935546875, -0.012493491172790527, -0.012078046798706055, -0.011662602424621582, -0.01124715805053711, -0.010831713676452637, -0.010416269302368164, -0.010000824928283691, -0.009585380554199219, -0.009169936180114746, -0.008754491806030273, -0.0083390474319458, -0.007923603057861328, -0.0075081586837768555, -0.007092714309692383, -0.00667726993560791, -0.0062618255615234375, -0.005846381187438965, -0.005430936813354492, -0.0050154924392700195, -0.004600048065185547, -0.004184603691101074, -0.0037691593170166016, -0.003353714942932129, -0.0029382705688476562, -0.0025228261947631836, -0.002107381820678711, -0.0016919374465942383, -0.0012764930725097656, -0.000861048698425293, -0.0004456043243408203, -3.0159950256347656e-05, 0.000385284423828125, 0.0008007287979125977, 0.0012161731719970703, 0.001631617546081543, 0.0020470619201660156, 0.0024625062942504883, 0.002877950668334961, 0.0032933950424194336, 0.0037088394165039062, 0.004124283790588379, 0.0045397281646728516, 0.004955172538757324, 0.005370616912841797, 0.0057860612869262695, 0.006201505661010742, 0.006616950035095215, 0.0070323944091796875, 0.00744783878326416, 0.007863283157348633, 0.008278727531433105, 0.008694171905517578, 0.00910961627960205, 0.009525060653686523, 0.009940505027770996, 0.010355949401855469, 0.010771393775939941, 0.011186838150024414, 0.011602282524108887, 0.01201772689819336, 0.012433171272277832, 0.012848615646362305, 0.013264060020446777, 0.01367950439453125]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 8.0, 6.0, 7.0, 4.0, 9.0, 15.0, 9.0, 22.0, 24.0, 26.0, 41.0, 30.0, 48.0, 55.0, 70.0, 64.0, 25.0, 69.0, 64.0, 54.0, 67.0, 47.0, 59.0, 43.0, 32.0, 26.0, 20.0, 6.0, 23.0, 11.0, 11.0, 4.0, 5.0, 4.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6954879760742188e-06, -3.5818666219711304e-06, -3.468245267868042e-06, -3.3546239137649536e-06, -3.2410025596618652e-06, -3.127381205558777e-06, -3.0137598514556885e-06, -2.9001384973526e-06, -2.7865171432495117e-06, -2.6728957891464233e-06, -2.559274435043335e-06, -2.4456530809402466e-06, -2.332031726837158e-06, -2.21841037273407e-06, -2.1047890186309814e-06, -1.991167664527893e-06, -1.8775463104248047e-06, -1.7639249563217163e-06, -1.650303602218628e-06, -1.5366822481155396e-06, -1.4230608940124512e-06, -1.3094395399093628e-06, -1.1958181858062744e-06, -1.082196831703186e-06, -9.685754776000977e-07, -8.549541234970093e-07, -7.413327693939209e-07, -6.277114152908325e-07, -5.140900611877441e-07, -4.0046870708465576e-07, -2.868473529815674e-07, -1.73225998878479e-07, -5.960464477539063e-08, 5.4016709327697754e-08, 1.6763806343078613e-07, 2.812594175338745e-07, 3.948807716369629e-07, 5.085021257400513e-07, 6.221234798431396e-07, 7.35744833946228e-07, 8.493661880493164e-07, 9.629875421524048e-07, 1.0766088962554932e-06, 1.1902302503585815e-06, 1.30385160446167e-06, 1.4174729585647583e-06, 1.5310943126678467e-06, 1.644715666770935e-06, 1.7583370208740234e-06, 1.8719583749771118e-06, 1.9855797290802e-06, 2.0992010831832886e-06, 2.212822437286377e-06, 2.3264437913894653e-06, 2.4400651454925537e-06, 2.553686499595642e-06, 2.6673078536987305e-06, 2.780929207801819e-06, 2.8945505619049072e-06, 3.0081719160079956e-06, 3.121793270111084e-06, 3.2354146242141724e-06, 3.3490359783172607e-06, 3.462657332420349e-06, 3.5762786865234375e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 4.0, 6.0, 6.0, 10.0, 9.0, 19.0, 28.0, 42.0, 49.0, 104.0, 255.0, 666.0, 2774.0, 23235.0, 844272.0, 167101.0, 7747.0, 1413.0, 415.0, 177.0, 83.0, 54.0, 21.0, 25.0, 11.0, 15.0, 4.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0240936279296875, -0.023438453674316406, -0.022783279418945312, -0.02212810516357422, -0.021472930908203125, -0.02081775665283203, -0.020162582397460938, -0.019507408142089844, -0.01885223388671875, -0.018197059631347656, -0.017541885375976562, -0.01688671112060547, -0.016231536865234375, -0.015576362609863281, -0.014921188354492188, -0.014266014099121094, -0.01361083984375, -0.012955665588378906, -0.012300491333007812, -0.011645317077636719, -0.010990142822265625, -0.010334968566894531, -0.009679794311523438, -0.009024620056152344, -0.00836944580078125, -0.007714271545410156, -0.0070590972900390625, -0.006403923034667969, -0.005748748779296875, -0.005093574523925781, -0.0044384002685546875, -0.0037832260131835938, -0.0031280517578125, -0.0024728775024414062, -0.0018177032470703125, -0.0011625289916992188, -0.000507354736328125, 0.00014781951904296875, 0.0008029937744140625, 0.0014581680297851562, 0.00211334228515625, 0.0027685165405273438, 0.0034236907958984375, 0.004078865051269531, 0.004734039306640625, 0.005389213562011719, 0.0060443878173828125, 0.006699562072753906, 0.007354736328125, 0.008009910583496094, 0.008665084838867188, 0.009320259094238281, 0.009975433349609375, 0.010630607604980469, 0.011285781860351562, 0.011940956115722656, 0.01259613037109375, 0.013251304626464844, 0.013906478881835938, 0.014561653137207031, 0.015216827392578125, 0.01587200164794922, 0.016527175903320312, 0.017182350158691406, 0.0178375244140625]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 5.0, 11.0, 8.0, 17.0, 16.0, 25.0, 38.0, 51.0, 62.0, 65.0, 84.0, 89.0, 76.0, 101.0, 82.0, 60.0, 53.0, 46.0, 20.0, 20.0, 22.0, 12.0, 8.0, 8.0, 5.0, 7.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0081634521484375, -0.007864236831665039, -0.007565021514892578, -0.007265806198120117, -0.006966590881347656, -0.006667375564575195, -0.006368160247802734, -0.0060689449310302734, -0.0057697296142578125, -0.0054705142974853516, -0.005171298980712891, -0.00487208366394043, -0.004572868347167969, -0.004273653030395508, -0.003974437713623047, -0.003675222396850586, -0.003376007080078125, -0.003076791763305664, -0.002777576446533203, -0.002478361129760742, -0.0021791458129882812, -0.0018799304962158203, -0.0015807151794433594, -0.0012814998626708984, -0.0009822845458984375, -0.0006830692291259766, -0.0003838539123535156, -8.463859558105469e-05, 0.00021457672119140625, 0.0005137920379638672, 0.0008130073547363281, 0.001112222671508789, 0.00141143798828125, 0.001710653305053711, 0.002009868621826172, 0.002309083938598633, 0.0026082992553710938, 0.0029075145721435547, 0.0032067298889160156, 0.0035059452056884766, 0.0038051605224609375, 0.0041043758392333984, 0.004403591156005859, 0.00470280647277832, 0.005002021789550781, 0.005301237106323242, 0.005600452423095703, 0.005899667739868164, 0.006198883056640625, 0.006498098373413086, 0.006797313690185547, 0.007096529006958008, 0.007395744323730469, 0.00769495964050293, 0.00799417495727539, 0.008293390274047852, 0.008592605590820312, 0.008891820907592773, 0.009191036224365234, 0.009490251541137695, 0.009789466857910156, 0.010088682174682617, 0.010387897491455078, 0.010687112808227539, 0.010986328125]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 1.0, 0.0, 3.0, 5.0, 8.0, 7.0, 8.0, 13.0, 7.0, 29.0, 45.0, 94.0, 182.0, 236.0, 158.0, 83.0, 42.0, 24.0, 23.0, 15.0, 4.0, 10.0, 5.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.40792134404182434, -0.39608243107795715, -0.3842434883117676, -0.3724045753479004, -0.3605656325817108, -0.34872671961784363, -0.33688777685165405, -0.32504886388778687, -0.3132099509239197, -0.3013710379600525, -0.2895320951938629, -0.2776931822299957, -0.26585423946380615, -0.25401532649993896, -0.24217639863491058, -0.2303374707698822, -0.21849852800369263, -0.20665960013866425, -0.19482067227363586, -0.18298175930976868, -0.1711428165435791, -0.15930390357971191, -0.14746497571468353, -0.13562604784965515, -0.12378711998462677, -0.11194819211959839, -0.10010926425457001, -0.08827034384012222, -0.07643141597509384, -0.06459248811006546, -0.052753567695617676, -0.040914639830589294, -0.029075711965560913, -0.01723678596317768, -0.005397859960794449, 0.006441064178943634, 0.018279992043972015, 0.030118919909000397, 0.04195784032344818, 0.05379676818847656, 0.06563569605350494, 0.07747462391853333, 0.0893135517835617, 0.10115247219800949, 0.11299140006303787, 0.12483032792806625, 0.13666924834251404, 0.14850817620754242, 0.1603471040725708, 0.17218603193759918, 0.18402495980262756, 0.19586387276649475, 0.20770281553268433, 0.2195417284965515, 0.2313806563615799, 0.24321958422660828, 0.25505852699279785, 0.26689743995666504, 0.2787363827228546, 0.2905752956867218, 0.3024142384529114, 0.31425315141677856, 0.32609206438064575, 0.3379310071468353, 0.3497699201107025]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 5.0, 11.0, 10.0, 13.0, 15.0, 16.0, 21.0, 28.0, 30.0, 33.0, 42.0, 29.0, 40.0, 56.0, 54.0, 52.0, 47.0, 42.0, 41.0, 46.0, 49.0, 44.0, 45.0, 30.0, 33.0, 25.0, 16.0, 31.0, 13.0, 14.0, 12.0, 14.0, 8.0, 8.0, 7.0, 6.0, 2.0, 5.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.3073055148124695, -0.2971055507659912, -0.28690558671951294, -0.27670562267303467, -0.2665056586265564, -0.2563056945800781, -0.24610574543476105, -0.23590579628944397, -0.2257058322429657, -0.21550586819648743, -0.20530590415000916, -0.19510594010353088, -0.1849059909582138, -0.17470602691173553, -0.16450606286525726, -0.15430611371994019, -0.14410613477230072, -0.13390617072582245, -0.12370621412992477, -0.1135062500834465, -0.10330629348754883, -0.09310632944107056, -0.08290636539459229, -0.07270640879869461, -0.06250644475221634, -0.052306484431028366, -0.04210652410984039, -0.03190656006336212, -0.02170659974217415, -0.011506639420986176, -0.001306675374507904, 0.00889328122138977, 0.019093245267868042, 0.029293205589056015, 0.03949316591024399, 0.04969312995672226, 0.05989309027791023, 0.0700930505990982, 0.08029301464557648, 0.09049297124147415, 0.10069293528795242, 0.1108928993344307, 0.12109285593032837, 0.13129281997680664, 0.1414927840232849, 0.15169274806976318, 0.16189271211624146, 0.17209266126155853, 0.1822926253080368, 0.19249258935451508, 0.20269255340099335, 0.21289250254631042, 0.2230924665927887, 0.23329243063926697, 0.24349239468574524, 0.2536923587322235, 0.2638923227787018, 0.27409228682518005, 0.2842922508716583, 0.2944922149181366, 0.30469217896461487, 0.31489211320877075, 0.325092077255249, 0.3352920413017273, 0.34549200534820557]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 4.0, 6.0, 1.0, 8.0, 2.0, 6.0, 11.0, 18.0, 32.0, 61.0, 88.0, 139.0, 358.0, 1016.0, 8662.0, 4176691.0, 5573.0, 899.0, 309.0, 157.0, 92.0, 46.0, 36.0, 19.0, 13.0, 12.0, 5.0, 5.0, 6.0, 6.0, 0.0, 4.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.194580078125, -0.18857955932617188, -0.18257904052734375, -0.17657852172851562, -0.1705780029296875, -0.16457748413085938, -0.15857696533203125, -0.15257644653320312, -0.146575927734375, -0.14057540893554688, -0.13457489013671875, -0.12857437133789062, -0.1225738525390625, -0.11657333374023438, -0.11057281494140625, -0.10457229614257812, -0.09857177734375, -0.09257125854492188, -0.08657073974609375, -0.08057022094726562, -0.0745697021484375, -0.06856918334960938, -0.06256866455078125, -0.056568145751953125, -0.050567626953125, -0.044567108154296875, -0.03856658935546875, -0.032566070556640625, -0.0265655517578125, -0.020565032958984375, -0.01456451416015625, -0.008563995361328125, -0.0025634765625, 0.003437042236328125, 0.00943756103515625, 0.015438079833984375, 0.0214385986328125, 0.027439117431640625, 0.03343963623046875, 0.039440155029296875, 0.045440673828125, 0.051441192626953125, 0.05744171142578125, 0.06344223022460938, 0.0694427490234375, 0.07544326782226562, 0.08144378662109375, 0.08744430541992188, 0.09344482421875, 0.09944534301757812, 0.10544586181640625, 0.11144638061523438, 0.1174468994140625, 0.12344741821289062, 0.12944793701171875, 0.13544845581054688, 0.141448974609375, 0.14744949340820312, 0.15345001220703125, 0.15945053100585938, 0.1654510498046875, 0.17145156860351562, 0.17745208740234375, 0.18345260620117188, 0.189453125]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 2.0, 3.0, 1.0, 4.0, 9.0, 13.0, 19.0, 38.0, 42.0, 59.0, 84.0, 96.0, 127.0, 114.0, 100.0, 81.0, 52.0, 38.0, 30.0, 26.0, 24.0, 9.0, 5.0, 9.0, 2.0, 4.0, 3.0, 3.0, 0.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0246734619140625, -0.02390742301940918, -0.02314138412475586, -0.02237534523010254, -0.02160930633544922, -0.0208432674407959, -0.020077228546142578, -0.019311189651489258, -0.018545150756835938, -0.017779111862182617, -0.017013072967529297, -0.016247034072875977, -0.015480995178222656, -0.014714956283569336, -0.013948917388916016, -0.013182878494262695, -0.012416839599609375, -0.011650800704956055, -0.010884761810302734, -0.010118722915649414, -0.009352684020996094, -0.008586645126342773, -0.007820606231689453, -0.007054567337036133, -0.0062885284423828125, -0.005522489547729492, -0.004756450653076172, -0.0039904117584228516, -0.0032243728637695312, -0.002458333969116211, -0.0016922950744628906, -0.0009262561798095703, -0.00016021728515625, 0.0006058216094970703, 0.0013718605041503906, 0.002137899398803711, 0.0029039382934570312, 0.0036699771881103516, 0.004436016082763672, 0.005202054977416992, 0.0059680938720703125, 0.006734132766723633, 0.007500171661376953, 0.008266210556030273, 0.009032249450683594, 0.009798288345336914, 0.010564327239990234, 0.011330366134643555, 0.012096405029296875, 0.012862443923950195, 0.013628482818603516, 0.014394521713256836, 0.015160560607910156, 0.015926599502563477, 0.016692638397216797, 0.017458677291870117, 0.018224716186523438, 0.018990755081176758, 0.019756793975830078, 0.0205228328704834, 0.02128887176513672, 0.02205491065979004, 0.02282094955444336, 0.02358698844909668, 0.02435302734375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 5.0, 9.0, 18.0, 33.0, 39.0, 68.0, 110.0, 223.0, 567.0, 2071.0, 27689.0, 4155671.0, 5978.0, 1204.0, 352.0, 111.0, 56.0, 37.0, 20.0, 12.0, 7.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12481689453125, -0.1196737289428711, -0.11453056335449219, -0.10938739776611328, -0.10424423217773438, -0.09910106658935547, -0.09395790100097656, -0.08881473541259766, -0.08367156982421875, -0.07852840423583984, -0.07338523864746094, -0.06824207305908203, -0.06309890747070312, -0.05795574188232422, -0.05281257629394531, -0.047669410705566406, -0.0425262451171875, -0.037383079528808594, -0.03223991394042969, -0.02709674835205078, -0.021953582763671875, -0.01681041717529297, -0.011667251586914062, -0.006524085998535156, -0.00138092041015625, 0.0037622451782226562, 0.008905410766601562, 0.014048576354980469, 0.019191741943359375, 0.02433490753173828, 0.029478073120117188, 0.034621238708496094, 0.039764404296875, 0.044907569885253906, 0.05005073547363281, 0.05519390106201172, 0.060337066650390625, 0.06548023223876953, 0.07062339782714844, 0.07576656341552734, 0.08090972900390625, 0.08605289459228516, 0.09119606018066406, 0.09633922576904297, 0.10148239135742188, 0.10662555694580078, 0.11176872253417969, 0.1169118881225586, 0.1220550537109375, 0.1271982192993164, 0.1323413848876953, 0.13748455047607422, 0.14262771606445312, 0.14777088165283203, 0.15291404724121094, 0.15805721282958984, 0.16320037841796875, 0.16834354400634766, 0.17348670959472656, 0.17862987518310547, 0.18377304077148438, 0.18891620635986328, 0.1940593719482422, 0.1992025375366211, 0.204345703125]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 7.0, 8.0, 19.0, 48.0, 246.0, 3585.0, 103.0, 30.0, 7.0, 10.0, 5.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.0560302734375, -0.05482149124145508, -0.053612709045410156, -0.052403926849365234, -0.05119514465332031, -0.04998636245727539, -0.04877758026123047, -0.04756879806518555, -0.046360015869140625, -0.0451512336730957, -0.04394245147705078, -0.04273366928100586, -0.04152488708496094, -0.040316104888916016, -0.039107322692871094, -0.03789854049682617, -0.03668975830078125, -0.03548097610473633, -0.034272193908691406, -0.033063411712646484, -0.03185462951660156, -0.03064584732055664, -0.02943706512451172, -0.028228282928466797, -0.027019500732421875, -0.025810718536376953, -0.02460193634033203, -0.02339315414428711, -0.022184371948242188, -0.020975589752197266, -0.019766807556152344, -0.018558025360107422, -0.0173492431640625, -0.016140460968017578, -0.014931678771972656, -0.013722896575927734, -0.012514114379882812, -0.01130533218383789, -0.010096549987792969, -0.008887767791748047, -0.007678985595703125, -0.006470203399658203, -0.005261421203613281, -0.004052639007568359, -0.0028438568115234375, -0.0016350746154785156, -0.00042629241943359375, 0.0007824897766113281, 0.00199127197265625, 0.003200054168701172, 0.004408836364746094, 0.005617618560791016, 0.0068264007568359375, 0.00803518295288086, 0.009243965148925781, 0.010452747344970703, 0.011661529541015625, 0.012870311737060547, 0.014079093933105469, 0.01528787612915039, 0.016496658325195312, 0.017705440521240234, 0.018914222717285156, 0.020123004913330078, 0.021331787109375]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 5.0, 43.0, 520.0, 409.0, 25.0, 9.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.5661347508430481, -0.5552359223365784, -0.5443371534347534, -0.5334383249282837, -0.522539496421814, -0.511640727519989, -0.5007418990135193, -0.48984307050704956, -0.4789442718029022, -0.4680454730987549, -0.45714664459228516, -0.4462478458881378, -0.4353490471839905, -0.42445021867752075, -0.4135514199733734, -0.4026526212692261, -0.39175379276275635, -0.380854994058609, -0.3699561655521393, -0.35905736684799194, -0.3481585383415222, -0.3372597396373749, -0.32636094093322754, -0.3154621124267578, -0.3045633137226105, -0.29366451501846313, -0.2827656865119934, -0.27186688780784607, -0.26096808910369873, -0.250069260597229, -0.23917046189308167, -0.22827164828777313, -0.217372864484787, -0.20647405087947845, -0.19557523727416992, -0.18467643857002258, -0.17377762496471405, -0.16287881135940552, -0.15198001265525818, -0.14108119904994965, -0.1301823854446411, -0.11928357183933258, -0.10838476568460464, -0.09748595952987671, -0.08658714592456818, -0.07568833231925964, -0.06478952616453171, -0.05389072000980377, -0.04299190640449524, -0.032093096524477005, -0.02119428664445877, -0.010295476764440536, 0.0006033331155776978, 0.011502142995595932, 0.022400952875614166, 0.0332997590303421, 0.044198572635650635, 0.05509738251566887, 0.0659961923956871, 0.07689499855041504, 0.08779381215572357, 0.0986926257610321, 0.10959143191576004, 0.12049023807048798, 0.1313890516757965]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 14.0, 17.0, 25.0, 34.0, 63.0, 73.0, 110.0, 101.0, 117.0, 116.0, 81.0, 68.0, 77.0, 37.0, 34.0, 21.0, 7.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1334947943687439, -0.12985770404338837, -0.12622062861919403, -0.1225835382938385, -0.11894645541906357, -0.11530937254428864, -0.1116722822189331, -0.10803519934415817, -0.10439811646938324, -0.1007610335946083, -0.09712395071983337, -0.09348686039447784, -0.08984977751970291, -0.08621269464492798, -0.08257560431957245, -0.07893852144479752, -0.07530143857002258, -0.07166435569524765, -0.06802727282047272, -0.06439018249511719, -0.060753099620342255, -0.05711601674556732, -0.05347893014550209, -0.04984184354543686, -0.046204760670661926, -0.04256767779588699, -0.03893059119582176, -0.03529350459575653, -0.0316564217209816, -0.028019336983561516, -0.024382252246141434, -0.02074516750872135, -0.01710808277130127, -0.013470998033881187, -0.009833913296461105, -0.006196828559041023, -0.002559743821620941, 0.001077340915799141, 0.004714425653219223, 0.008351510390639305, 0.011988595128059387, 0.01562567986547947, 0.01926276460289955, 0.022899849340319633, 0.026536934077739716, 0.030174018815159798, 0.03381110355257988, 0.03744819015264511, 0.041085273027420044, 0.04472235590219498, 0.04835944250226021, 0.05199652910232544, 0.05563361197710037, 0.059270694851875305, 0.06290778517723083, 0.06654486805200577, 0.0701819509267807, 0.07381903380155563, 0.07745611667633057, 0.0810932070016861, 0.08473028987646103, 0.08836737275123596, 0.09200446307659149, 0.09564154595136642, 0.09927862882614136]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 3.0, 9.0, 8.0, 11.0, 9.0, 29.0, 18.0, 19.0, 40.0, 44.0, 79.0, 106.0, 138.0, 168.0, 251.0, 357.0, 465.0, 689.0, 1078.0, 1725.0, 2921.0, 5582.0, 12686.0, 32909.0, 114157.0, 592280.0, 200967.0, 47654.0, 17050.0, 7333.0, 3661.0, 2096.0, 1282.0, 767.0, 549.0, 352.0, 284.0, 198.0, 171.0, 111.0, 78.0, 57.0, 37.0, 39.0, 30.0, 15.0, 8.0, 10.0, 5.0, 10.0, 6.0, 2.0, 5.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.07763671875, -0.07526016235351562, -0.07288360595703125, -0.07050704956054688, -0.0681304931640625, -0.06575393676757812, -0.06337738037109375, -0.061000823974609375, -0.058624267578125, -0.056247711181640625, -0.05387115478515625, -0.051494598388671875, -0.0491180419921875, -0.046741485595703125, -0.04436492919921875, -0.041988372802734375, -0.03961181640625, -0.037235260009765625, -0.03485870361328125, -0.032482147216796875, -0.0301055908203125, -0.027729034423828125, -0.02535247802734375, -0.022975921630859375, -0.020599365234375, -0.018222808837890625, -0.01584625244140625, -0.013469696044921875, -0.0110931396484375, -0.008716583251953125, -0.00634002685546875, -0.003963470458984375, -0.0015869140625, 0.000789642333984375, 0.00316619873046875, 0.005542755126953125, 0.0079193115234375, 0.010295867919921875, 0.01267242431640625, 0.015048980712890625, 0.017425537109375, 0.019802093505859375, 0.02217864990234375, 0.024555206298828125, 0.0269317626953125, 0.029308319091796875, 0.03168487548828125, 0.034061431884765625, 0.03643798828125, 0.038814544677734375, 0.04119110107421875, 0.043567657470703125, 0.0459442138671875, 0.048320770263671875, 0.05069732666015625, 0.053073883056640625, 0.055450439453125, 0.057826995849609375, 0.06020355224609375, 0.06258010864257812, 0.0649566650390625, 0.06733322143554688, 0.06970977783203125, 0.07208633422851562, 0.074462890625]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 4.0, 2.0, 3.0, 4.0, 3.0, 10.0, 16.0, 35.0, 44.0, 51.0, 90.0, 99.0, 126.0, 121.0, 102.0, 86.0, 56.0, 44.0, 28.0, 24.0, 22.0, 8.0, 6.0, 5.0, 3.0, 4.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0254058837890625, -0.024619102478027344, -0.023832321166992188, -0.02304553985595703, -0.022258758544921875, -0.02147197723388672, -0.020685195922851562, -0.019898414611816406, -0.01911163330078125, -0.018324851989746094, -0.017538070678710938, -0.01675128936767578, -0.015964508056640625, -0.015177726745605469, -0.014390945434570312, -0.013604164123535156, -0.0128173828125, -0.012030601501464844, -0.011243820190429688, -0.010457038879394531, -0.009670257568359375, -0.008883476257324219, -0.008096694946289062, -0.007309913635253906, -0.00652313232421875, -0.005736351013183594, -0.0049495697021484375, -0.004162788391113281, -0.003376007080078125, -0.0025892257690429688, -0.0018024444580078125, -0.0010156631469726562, -0.0002288818359375, 0.0005578994750976562, 0.0013446807861328125, 0.0021314620971679688, 0.002918243408203125, 0.0037050247192382812, 0.0044918060302734375, 0.005278587341308594, 0.00606536865234375, 0.006852149963378906, 0.0076389312744140625, 0.008425712585449219, 0.009212493896484375, 0.009999275207519531, 0.010786056518554688, 0.011572837829589844, 0.012359619140625, 0.013146400451660156, 0.013933181762695312, 0.014719963073730469, 0.015506744384765625, 0.01629352569580078, 0.017080307006835938, 0.017867088317871094, 0.01865386962890625, 0.019440650939941406, 0.020227432250976562, 0.02101421356201172, 0.021800994873046875, 0.02258777618408203, 0.023374557495117188, 0.024161338806152344, 0.0249481201171875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 3.0, 1.0, 3.0, 0.0, 6.0, 4.0, 7.0, 12.0, 9.0, 14.0, 12.0, 14.0, 12.0, 27.0, 26.0, 29.0, 34.0, 41.0, 54.0, 115.0, 249.0, 590.0, 1913.0, 8820.0, 60089.0, 726758.0, 219145.0, 24289.0, 4229.0, 1140.0, 413.0, 174.0, 76.0, 53.0, 33.0, 31.0, 23.0, 20.0, 18.0, 11.0, 13.0, 9.0, 15.0, 8.0, 7.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.11297607421875, -0.1095285415649414, -0.10608100891113281, -0.10263347625732422, -0.09918594360351562, -0.09573841094970703, -0.09229087829589844, -0.08884334564208984, -0.08539581298828125, -0.08194828033447266, -0.07850074768066406, -0.07505321502685547, -0.07160568237304688, -0.06815814971923828, -0.06471061706542969, -0.061263084411621094, -0.0578155517578125, -0.054368019104003906, -0.05092048645019531, -0.04747295379638672, -0.044025421142578125, -0.04057788848876953, -0.03713035583496094, -0.033682823181152344, -0.03023529052734375, -0.026787757873535156, -0.023340225219726562, -0.01989269256591797, -0.016445159912109375, -0.012997627258300781, -0.009550094604492188, -0.006102561950683594, -0.002655029296875, 0.0007925033569335938, 0.0042400360107421875, 0.007687568664550781, 0.011135101318359375, 0.014582633972167969, 0.018030166625976562, 0.021477699279785156, 0.02492523193359375, 0.028372764587402344, 0.03182029724121094, 0.03526782989501953, 0.038715362548828125, 0.04216289520263672, 0.04561042785644531, 0.049057960510253906, 0.0525054931640625, 0.055953025817871094, 0.05940055847167969, 0.06284809112548828, 0.06629562377929688, 0.06974315643310547, 0.07319068908691406, 0.07663822174072266, 0.08008575439453125, 0.08353328704833984, 0.08698081970214844, 0.09042835235595703, 0.09387588500976562, 0.09732341766357422, 0.10077095031738281, 0.1042184829711914, 0.107666015625]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 7.0, 9.0, 14.0, 9.0, 13.0, 11.0, 17.0, 23.0, 21.0, 32.0, 21.0, 30.0, 28.0, 36.0, 46.0, 45.0, 48.0, 50.0, 45.0, 35.0, 40.0, 35.0, 39.0, 44.0, 22.0, 28.0, 23.0, 22.0, 22.0, 32.0, 23.0, 26.0, 13.0, 15.0, 14.0, 14.0, 7.0, 12.0, 9.0, 3.0, 7.0, 0.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.04656982421875, -0.04508209228515625, -0.0435943603515625, -0.04210662841796875, -0.040618896484375, -0.03913116455078125, -0.0376434326171875, -0.03615570068359375, -0.03466796875, -0.03318023681640625, -0.0316925048828125, -0.03020477294921875, -0.028717041015625, -0.02722930908203125, -0.0257415771484375, -0.02425384521484375, -0.02276611328125, -0.02127838134765625, -0.0197906494140625, -0.01830291748046875, -0.016815185546875, -0.01532745361328125, -0.0138397216796875, -0.01235198974609375, -0.0108642578125, -0.00937652587890625, -0.0078887939453125, -0.00640106201171875, -0.004913330078125, -0.00342559814453125, -0.0019378662109375, -0.00045013427734375, 0.00103759765625, 0.00252532958984375, 0.0040130615234375, 0.00550079345703125, 0.006988525390625, 0.00847625732421875, 0.0099639892578125, 0.01145172119140625, 0.012939453125, 0.01442718505859375, 0.0159149169921875, 0.01740264892578125, 0.018890380859375, 0.02037811279296875, 0.0218658447265625, 0.02335357666015625, 0.02484130859375, 0.02632904052734375, 0.0278167724609375, 0.02930450439453125, 0.030792236328125, 0.03227996826171875, 0.0337677001953125, 0.03525543212890625, 0.0367431640625, 0.03823089599609375, 0.0397186279296875, 0.04120635986328125, 0.042694091796875, 0.04418182373046875, 0.0456695556640625, 0.04715728759765625, 0.04864501953125]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 6.0, 5.0, 0.0, 6.0, 5.0, 9.0, 11.0, 28.0, 22.0, 46.0, 67.0, 100.0, 170.0, 297.0, 571.0, 1556.0, 5131.0, 29114.0, 536106.0, 442712.0, 25111.0, 4809.0, 1421.0, 568.0, 274.0, 145.0, 83.0, 64.0, 36.0, 29.0, 15.0, 8.0, 8.0, 5.0, 4.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 1.0], "bins": [-0.014373779296875, -0.013924121856689453, -0.013474464416503906, -0.01302480697631836, -0.012575149536132812, -0.012125492095947266, -0.011675834655761719, -0.011226177215576172, -0.010776519775390625, -0.010326862335205078, -0.009877204895019531, -0.009427547454833984, -0.008977890014648438, -0.00852823257446289, -0.008078575134277344, -0.007628917694091797, -0.00717926025390625, -0.006729602813720703, -0.006279945373535156, -0.005830287933349609, -0.0053806304931640625, -0.004930973052978516, -0.004481315612792969, -0.004031658172607422, -0.003582000732421875, -0.003132343292236328, -0.0026826858520507812, -0.0022330284118652344, -0.0017833709716796875, -0.0013337135314941406, -0.0008840560913085938, -0.0004343986511230469, 1.52587890625e-05, 0.0004649162292480469, 0.0009145736694335938, 0.0013642311096191406, 0.0018138885498046875, 0.0022635459899902344, 0.0027132034301757812, 0.003162860870361328, 0.003612518310546875, 0.004062175750732422, 0.004511833190917969, 0.004961490631103516, 0.0054111480712890625, 0.005860805511474609, 0.006310462951660156, 0.006760120391845703, 0.00720977783203125, 0.007659435272216797, 0.008109092712402344, 0.00855875015258789, 0.009008407592773438, 0.009458065032958984, 0.009907722473144531, 0.010357379913330078, 0.010807037353515625, 0.011256694793701172, 0.011706352233886719, 0.012156009674072266, 0.012605667114257812, 0.01305532455444336, 0.013504981994628906, 0.013954639434814453, 0.014404296875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 6.0, 4.0, 4.0, 9.0, 0.0, 12.0, 19.0, 16.0, 26.0, 28.0, 19.0, 27.0, 41.0, 29.0, 52.0, 40.0, 31.0, 54.0, 30.0, 58.0, 49.0, 19.0, 56.0, 55.0, 23.0, 40.0, 53.0, 20.0, 33.0, 27.0, 16.0, 29.0, 26.0, 4.0, 15.0, 5.0, 6.0, 5.0, 4.0, 2.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.2782554626464844e-06, -3.1795352697372437e-06, -3.080815076828003e-06, -2.982094883918762e-06, -2.8833746910095215e-06, -2.7846544981002808e-06, -2.68593430519104e-06, -2.5872141122817993e-06, -2.4884939193725586e-06, -2.389773726463318e-06, -2.291053533554077e-06, -2.1923333406448364e-06, -2.0936131477355957e-06, -1.994892954826355e-06, -1.8961727619171143e-06, -1.7974525690078735e-06, -1.6987323760986328e-06, -1.600012183189392e-06, -1.5012919902801514e-06, -1.4025717973709106e-06, -1.30385160446167e-06, -1.2051314115524292e-06, -1.1064112186431885e-06, -1.0076910257339478e-06, -9.08970832824707e-07, -8.102506399154663e-07, -7.115304470062256e-07, -6.128102540969849e-07, -5.140900611877441e-07, -4.153698682785034e-07, -3.166496753692627e-07, -2.1792948246002197e-07, -1.1920928955078125e-07, -2.0489096641540527e-08, 7.82310962677002e-08, 1.7695128917694092e-07, 2.7567148208618164e-07, 3.7439167499542236e-07, 4.731118679046631e-07, 5.718320608139038e-07, 6.705522537231445e-07, 7.692724466323853e-07, 8.67992639541626e-07, 9.667128324508667e-07, 1.0654330253601074e-06, 1.1641532182693481e-06, 1.2628734111785889e-06, 1.3615936040878296e-06, 1.4603137969970703e-06, 1.559033989906311e-06, 1.6577541828155518e-06, 1.7564743757247925e-06, 1.8551945686340332e-06, 1.953914761543274e-06, 2.0526349544525146e-06, 2.1513551473617554e-06, 2.250075340270996e-06, 2.348795533180237e-06, 2.4475157260894775e-06, 2.5462359189987183e-06, 2.644956111907959e-06, 2.7436763048171997e-06, 2.8423964977264404e-06, 2.941116690635681e-06, 3.039836883544922e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 2.0, 9.0, 7.0, 16.0, 25.0, 33.0, 57.0, 108.0, 312.0, 947.0, 5639.0, 278820.0, 751519.0, 9315.0, 1171.0, 322.0, 109.0, 60.0, 30.0, 21.0, 10.0, 10.0, 3.0, 7.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0257415771484375, -0.024791479110717773, -0.023841381072998047, -0.02289128303527832, -0.021941184997558594, -0.020991086959838867, -0.02004098892211914, -0.019090890884399414, -0.018140792846679688, -0.01719069480895996, -0.016240596771240234, -0.015290498733520508, -0.014340400695800781, -0.013390302658081055, -0.012440204620361328, -0.011490106582641602, -0.010540008544921875, -0.009589910507202148, -0.008639812469482422, -0.007689714431762695, -0.006739616394042969, -0.005789518356323242, -0.004839420318603516, -0.003889322280883789, -0.0029392242431640625, -0.001989126205444336, -0.0010390281677246094, -8.893013000488281e-05, 0.0008611679077148438, 0.0018112659454345703, 0.002761363983154297, 0.0037114620208740234, 0.00466156005859375, 0.0056116580963134766, 0.006561756134033203, 0.00751185417175293, 0.008461952209472656, 0.009412050247192383, 0.01036214828491211, 0.011312246322631836, 0.012262344360351562, 0.013212442398071289, 0.014162540435791016, 0.015112638473510742, 0.01606273651123047, 0.017012834548950195, 0.017962932586669922, 0.01891303062438965, 0.019863128662109375, 0.0208132266998291, 0.021763324737548828, 0.022713422775268555, 0.02366352081298828, 0.024613618850708008, 0.025563716888427734, 0.02651381492614746, 0.027463912963867188, 0.028414011001586914, 0.02936410903930664, 0.030314207077026367, 0.031264305114746094, 0.03221440315246582, 0.03316450119018555, 0.03411459922790527, 0.035064697265625]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 6.0, 9.0, 11.0, 22.0, 26.0, 40.0, 67.0, 80.0, 99.0, 111.0, 114.0, 103.0, 88.0, 78.0, 38.0, 29.0, 26.0, 14.0, 16.0, 7.0, 8.0, 2.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0152740478515625, -0.014860749244689941, -0.014447450637817383, -0.014034152030944824, -0.013620853424072266, -0.013207554817199707, -0.012794256210327148, -0.01238095760345459, -0.011967658996582031, -0.011554360389709473, -0.011141061782836914, -0.010727763175964355, -0.010314464569091797, -0.009901165962219238, -0.00948786735534668, -0.009074568748474121, -0.008661270141601562, -0.008247971534729004, -0.007834672927856445, -0.007421374320983887, -0.007008075714111328, -0.0065947771072387695, -0.006181478500366211, -0.005768179893493652, -0.005354881286621094, -0.004941582679748535, -0.0045282840728759766, -0.004114985466003418, -0.0037016868591308594, -0.0032883882522583008, -0.002875089645385742, -0.0024617910385131836, -0.002048492431640625, -0.0016351938247680664, -0.0012218952178955078, -0.0008085966110229492, -0.0003952980041503906, 1.800060272216797e-05, 0.00043129920959472656, 0.0008445978164672852, 0.0012578964233398438, 0.0016711950302124023, 0.002084493637084961, 0.0024977922439575195, 0.002911090850830078, 0.0033243894577026367, 0.0037376880645751953, 0.004150986671447754, 0.0045642852783203125, 0.004977583885192871, 0.00539088249206543, 0.005804181098937988, 0.006217479705810547, 0.0066307783126831055, 0.007044076919555664, 0.007457375526428223, 0.007870674133300781, 0.00828397274017334, 0.008697271347045898, 0.009110569953918457, 0.009523868560791016, 0.009937167167663574, 0.010350465774536133, 0.010763764381408691, 0.01117706298828125]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 20.0, 101.0, 524.0, 286.0, 54.0, 16.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7392053604125977, -1.7012755870819092, -1.6633458137512207, -1.6254160404205322, -1.5874862670898438, -1.5495563745498657, -1.5116266012191772, -1.4736968278884888, -1.4357670545578003, -1.3978372812271118, -1.3599075078964233, -1.3219777345657349, -1.2840478420257568, -1.2461180686950684, -1.2081882953643799, -1.1702585220336914, -1.132328748703003, -1.0943989753723145, -1.056469202041626, -1.0185394287109375, -0.9806095957756042, -0.9426798224449158, -0.9047499895095825, -0.866820216178894, -0.8288904428482056, -0.7909606695175171, -0.7530308961868286, -0.7151010632514954, -0.6771712899208069, -0.6392415165901184, -0.6013116836547852, -0.5633819103240967, -0.5254520177841187, -0.4875222444534302, -0.4495924413204193, -0.41166263818740845, -0.37373286485671997, -0.3358030915260315, -0.29787328839302063, -0.25994348526000977, -0.2220137119293213, -0.18408392369747162, -0.14615413546562195, -0.10822434723377228, -0.07029455900192261, -0.03236477077007294, 0.005565017461776733, 0.0434948205947876, 0.08142459392547607, 0.11935438215732574, 0.15728417038917542, 0.19521395862102509, 0.23314374685287476, 0.27107352018356323, 0.3090033233165741, 0.34693312644958496, 0.38486289978027344, 0.4227926731109619, 0.4607224762439728, 0.49865227937698364, 0.5365820527076721, 0.5745118260383606, 0.6124416589736938, 0.6503714323043823, 0.6883012056350708]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 2.0, 3.0, 3.0, 6.0, 3.0, 6.0, 7.0, 15.0, 9.0, 16.0, 24.0, 23.0, 18.0, 28.0, 24.0, 35.0, 41.0, 42.0, 52.0, 62.0, 57.0, 56.0, 51.0, 52.0, 45.0, 53.0, 42.0, 34.0, 37.0, 36.0, 19.0, 23.0, 20.0, 15.0, 14.0, 7.0, 9.0, 6.0, 5.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.31591886281967163, -0.3058605194091797, -0.29580220580101013, -0.2857438623905182, -0.27568551898002625, -0.2656271755695343, -0.25556886196136475, -0.2455105185508728, -0.23545217514038086, -0.2253938466310501, -0.21533550322055817, -0.20527717471122742, -0.19521883130073547, -0.18516050279140472, -0.17510217428207397, -0.16504383087158203, -0.15498550236225128, -0.14492717385292053, -0.1348688304424286, -0.12481050193309784, -0.1147521585226059, -0.10469383001327515, -0.0946354940533638, -0.08457715809345245, -0.07451882213354111, -0.06446048617362976, -0.054402150213718414, -0.044343817979097366, -0.03428548201918602, -0.024227146059274673, -0.014168813824653625, -0.004110477864742279, 0.005947858095169067, 0.016006194055080414, 0.02606452815234661, 0.03612286224961281, 0.046181198209524155, 0.0562395341694355, 0.06629786640405655, 0.0763562023639679, 0.08641453832387924, 0.09647287428379059, 0.10653121024370193, 0.11658954620361328, 0.12664787471294403, 0.13670621812343597, 0.14676454663276672, 0.15682289004325867, 0.16688121855258942, 0.17693954706192017, 0.1869978904724121, 0.19705621898174286, 0.2071145623922348, 0.21717289090156555, 0.2272312343120575, 0.23728956282138824, 0.247347891330719, 0.25740623474121094, 0.2674645483493805, 0.27752289175987244, 0.2875812351703644, 0.2976395785808563, 0.3076978921890259, 0.3177562355995178, 0.32781457901000977]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 5.0, 5.0, 8.0, 4.0, 16.0, 13.0, 19.0, 26.0, 35.0, 74.0, 123.0, 267.0, 657.0, 1671.0, 10622.0, 4168069.0, 9698.0, 1637.0, 625.0, 269.0, 165.0, 93.0, 49.0, 31.0, 18.0, 20.0, 20.0, 15.0, 5.0, 6.0, 6.0, 4.0, 0.0, 4.0, 1.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.234375, -0.22614669799804688, -0.21791839599609375, -0.20969009399414062, -0.2014617919921875, -0.19323348999023438, -0.18500518798828125, -0.17677688598632812, -0.168548583984375, -0.16032028198242188, -0.15209197998046875, -0.14386367797851562, -0.1356353759765625, -0.12740707397460938, -0.11917877197265625, -0.11095046997070312, -0.10272216796875, -0.09449386596679688, -0.08626556396484375, -0.07803726196289062, -0.0698089599609375, -0.061580657958984375, -0.05335235595703125, -0.045124053955078125, -0.036895751953125, -0.028667449951171875, -0.02043914794921875, -0.012210845947265625, -0.0039825439453125, 0.004245758056640625, 0.01247406005859375, 0.020702362060546875, 0.0289306640625, 0.037158966064453125, 0.04538726806640625, 0.053615570068359375, 0.0618438720703125, 0.07007217407226562, 0.07830047607421875, 0.08652877807617188, 0.094757080078125, 0.10298538208007812, 0.11121368408203125, 0.11944198608398438, 0.1276702880859375, 0.13589859008789062, 0.14412689208984375, 0.15235519409179688, 0.16058349609375, 0.16881179809570312, 0.17704010009765625, 0.18526840209960938, 0.1934967041015625, 0.20172500610351562, 0.20995330810546875, 0.21818161010742188, 0.226409912109375, 0.23463821411132812, 0.24286651611328125, 0.2510948181152344, 0.2593231201171875, 0.2675514221191406, 0.27577972412109375, 0.2840080261230469, 0.292236328125]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0, 4.0, 6.0, 5.0, 10.0, 20.0, 25.0, 55.0, 77.0, 80.0, 113.0, 129.0, 115.0, 97.0, 74.0, 62.0, 37.0, 35.0, 16.0, 9.0, 6.0, 6.0, 10.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0240478515625, -0.02321791648864746, -0.022387981414794922, -0.021558046340942383, -0.020728111267089844, -0.019898176193237305, -0.019068241119384766, -0.018238306045532227, -0.017408370971679688, -0.01657843589782715, -0.01574850082397461, -0.01491856575012207, -0.014088630676269531, -0.013258695602416992, -0.012428760528564453, -0.011598825454711914, -0.010768890380859375, -0.009938955307006836, -0.009109020233154297, -0.008279085159301758, -0.007449150085449219, -0.00661921501159668, -0.005789279937744141, -0.0049593448638916016, -0.0041294097900390625, -0.0032994747161865234, -0.0024695396423339844, -0.0016396045684814453, -0.0008096694946289062, 2.0265579223632812e-05, 0.0008502006530761719, 0.001680135726928711, 0.00251007080078125, 0.003340005874633789, 0.004169940948486328, 0.004999876022338867, 0.005829811096191406, 0.006659746170043945, 0.007489681243896484, 0.008319616317749023, 0.009149551391601562, 0.009979486465454102, 0.01080942153930664, 0.01163935661315918, 0.012469291687011719, 0.013299226760864258, 0.014129161834716797, 0.014959096908569336, 0.015789031982421875, 0.016618967056274414, 0.017448902130126953, 0.018278837203979492, 0.01910877227783203, 0.01993870735168457, 0.02076864242553711, 0.02159857749938965, 0.022428512573242188, 0.023258447647094727, 0.024088382720947266, 0.024918317794799805, 0.025748252868652344, 0.026578187942504883, 0.027408123016357422, 0.02823805809020996, 0.0290679931640625]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 11.0, 16.0, 29.0, 79.0, 126.0, 298.0, 637.0, 1692.0, 5823.0, 64387.0, 4091275.0, 24131.0, 3745.0, 1141.0, 475.0, 208.0, 91.0, 46.0, 19.0, 17.0, 8.0, 4.0, 4.0, 2.0, 0.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.104736328125, -0.1013956069946289, -0.09805488586425781, -0.09471416473388672, -0.09137344360351562, -0.08803272247314453, -0.08469200134277344, -0.08135128021240234, -0.07801055908203125, -0.07466983795166016, -0.07132911682128906, -0.06798839569091797, -0.06464767456054688, -0.06130695343017578, -0.05796623229980469, -0.054625511169433594, -0.0512847900390625, -0.047944068908691406, -0.04460334777832031, -0.04126262664794922, -0.037921905517578125, -0.03458118438720703, -0.031240463256835938, -0.027899742126464844, -0.02455902099609375, -0.021218299865722656, -0.017877578735351562, -0.014536857604980469, -0.011196136474609375, -0.007855415344238281, -0.0045146942138671875, -0.0011739730834960938, 0.002166748046875, 0.005507469177246094, 0.008848190307617188, 0.012188911437988281, 0.015529632568359375, 0.01887035369873047, 0.022211074829101562, 0.025551795959472656, 0.02889251708984375, 0.032233238220214844, 0.03557395935058594, 0.03891468048095703, 0.042255401611328125, 0.04559612274169922, 0.04893684387207031, 0.052277565002441406, 0.0556182861328125, 0.058959007263183594, 0.06229972839355469, 0.06564044952392578, 0.06898117065429688, 0.07232189178466797, 0.07566261291503906, 0.07900333404541016, 0.08234405517578125, 0.08568477630615234, 0.08902549743652344, 0.09236621856689453, 0.09570693969726562, 0.09904766082763672, 0.10238838195800781, 0.1057291030883789, 0.10906982421875]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 2.0, 0.0, 3.0, 2.0, 4.0, 6.0, 13.0, 21.0, 20.0, 32.0, 75.0, 223.0, 2986.0, 473.0, 101.0, 51.0, 13.0, 16.0, 7.0, 12.0, 5.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.034942626953125, -0.033934831619262695, -0.03292703628540039, -0.031919240951538086, -0.03091144561767578, -0.029903650283813477, -0.028895854949951172, -0.027888059616088867, -0.026880264282226562, -0.025872468948364258, -0.024864673614501953, -0.02385687828063965, -0.022849082946777344, -0.02184128761291504, -0.020833492279052734, -0.01982569694519043, -0.018817901611328125, -0.01781010627746582, -0.016802310943603516, -0.01579451560974121, -0.014786720275878906, -0.013778924942016602, -0.012771129608154297, -0.011763334274291992, -0.010755538940429688, -0.009747743606567383, -0.008739948272705078, -0.0077321529388427734, -0.006724357604980469, -0.005716562271118164, -0.004708766937255859, -0.0037009716033935547, -0.00269317626953125, -0.0016853809356689453, -0.0006775856018066406, 0.00033020973205566406, 0.0013380050659179688, 0.0023458003997802734, 0.003353595733642578, 0.004361391067504883, 0.0053691864013671875, 0.006376981735229492, 0.007384777069091797, 0.008392572402954102, 0.009400367736816406, 0.010408163070678711, 0.011415958404541016, 0.01242375373840332, 0.013431549072265625, 0.01443934440612793, 0.015447139739990234, 0.01645493507385254, 0.017462730407714844, 0.01847052574157715, 0.019478321075439453, 0.020486116409301758, 0.021493911743164062, 0.022501707077026367, 0.023509502410888672, 0.024517297744750977, 0.02552509307861328, 0.026532888412475586, 0.02754068374633789, 0.028548479080200195, 0.0295562744140625]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 3.0, 3.0, 5.0, 6.0, 7.0, 10.0, 31.0, 63.0, 157.0, 318.0, 244.0, 85.0, 39.0, 19.0, 5.0, 6.0, 2.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.21733719110488892, -0.2123745232820511, -0.20741185545921326, -0.20244918763637543, -0.1974865198135376, -0.19252383708953857, -0.18756118416786194, -0.18259850144386292, -0.17763583362102509, -0.17267316579818726, -0.16771049797534943, -0.1627478301525116, -0.15778516232967377, -0.15282249450683594, -0.14785981178283691, -0.14289714395999908, -0.13793447613716125, -0.13297180831432343, -0.1280091404914856, -0.12304647266864777, -0.11808379739522934, -0.11312112957239151, -0.10815846174955368, -0.10319578647613525, -0.09823313355445862, -0.09327046573162079, -0.08830779790878296, -0.08334513008594513, -0.0783824548125267, -0.07341978698968887, -0.06845711916685104, -0.06349444389343262, -0.05853176862001419, -0.05356910079717636, -0.04860642924904823, -0.0436437614262104, -0.038681089878082275, -0.033718422055244446, -0.028755754232406616, -0.023793082684278488, -0.01883041486144066, -0.01386774517595768, -0.008905076421797276, -0.003942407667636871, 0.0010202620178461075, 0.005982931703329086, 0.010945599526166916, 0.015908271074295044, 0.020870938897132874, 0.025833608582615852, 0.03079627826809883, 0.03575894609093666, 0.04072161763906479, 0.04568428546190262, 0.05064695328474045, 0.055609624832868576, 0.060572292655706406, 0.06553496420383453, 0.07049763202667236, 0.07546029984951019, 0.08042296767234802, 0.08538563549518585, 0.09034830331802368, 0.09531097859144211, 0.10027364641427994]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 5.0, 0.0, 10.0, 8.0, 4.0, 7.0, 6.0, 17.0, 13.0, 23.0, 21.0, 31.0, 33.0, 44.0, 58.0, 42.0, 41.0, 33.0, 51.0, 42.0, 55.0, 50.0, 57.0, 48.0, 45.0, 38.0, 35.0, 46.0, 22.0, 22.0, 23.0, 24.0, 12.0, 14.0, 9.0, 2.0, 7.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07210570573806763, -0.06994011253118515, -0.06777451932430267, -0.0656089261174202, -0.06344333291053772, -0.061277735978364944, -0.05911213904619217, -0.05694654583930969, -0.054780952632427216, -0.05261535942554474, -0.05044976621866226, -0.04828416928648949, -0.04611857607960701, -0.04395298287272453, -0.04178738594055176, -0.03962179273366928, -0.037456199526786804, -0.03529060631990433, -0.03312501311302185, -0.030959416180849075, -0.0287938229739666, -0.02662822976708412, -0.024462634697556496, -0.02229703962802887, -0.020131446421146393, -0.017965853214263916, -0.01580025814473629, -0.013634664006531239, -0.011469069868326187, -0.009303475730121136, -0.007137881591916084, -0.004972287453711033, -0.0028066933155059814, -0.00064109917730093, 0.0015244949609041214, 0.003690089099109173, 0.005855683237314224, 0.008021277375519276, 0.010186871513724327, 0.012352465651929379, 0.01451805979013443, 0.016683652997016907, 0.018849248066544533, 0.02101484313607216, 0.023180436342954636, 0.025346029549837112, 0.02751162461936474, 0.029677219688892365, 0.03184281289577484, 0.03400840610265732, 0.036173999309539795, 0.03833959624171257, 0.04050518944859505, 0.042670782655477524, 0.0448363795876503, 0.047001972794532776, 0.04916756600141525, 0.05133315920829773, 0.053498752415180206, 0.05566434934735298, 0.05782994255423546, 0.059995535761117935, 0.06216113269329071, 0.06432672590017319, 0.06649231910705566]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 4.0, 0.0, 4.0, 1.0, 9.0, 11.0, 10.0, 17.0, 21.0, 28.0, 36.0, 57.0, 71.0, 117.0, 175.0, 258.0, 375.0, 532.0, 926.0, 1596.0, 3044.0, 6947.0, 20985.0, 96337.0, 672469.0, 192406.0, 33594.0, 9696.0, 3906.0, 1896.0, 1040.0, 654.0, 436.0, 279.0, 201.0, 118.0, 98.0, 55.0, 45.0, 30.0, 28.0, 16.0, 10.0, 6.0, 8.0, 1.0, 2.0, 5.0, 4.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.07647705078125, -0.07380962371826172, -0.07114219665527344, -0.06847476959228516, -0.06580734252929688, -0.0631399154663086, -0.06047248840332031, -0.05780506134033203, -0.05513763427734375, -0.05247020721435547, -0.04980278015136719, -0.047135353088378906, -0.044467926025390625, -0.041800498962402344, -0.03913307189941406, -0.03646564483642578, -0.0337982177734375, -0.03113079071044922, -0.028463363647460938, -0.025795936584472656, -0.023128509521484375, -0.020461082458496094, -0.017793655395507812, -0.015126228332519531, -0.01245880126953125, -0.009791374206542969, -0.0071239471435546875, -0.004456520080566406, -0.001789093017578125, 0.0008783340454101562, 0.0035457611083984375, 0.006213188171386719, 0.008880615234375, 0.011548042297363281, 0.014215469360351562, 0.016882896423339844, 0.019550323486328125, 0.022217750549316406, 0.024885177612304688, 0.02755260467529297, 0.03022003173828125, 0.03288745880126953, 0.03555488586425781, 0.038222312927246094, 0.040889739990234375, 0.043557167053222656, 0.04622459411621094, 0.04889202117919922, 0.0515594482421875, 0.05422687530517578, 0.05689430236816406, 0.059561729431152344, 0.062229156494140625, 0.0648965835571289, 0.06756401062011719, 0.07023143768310547, 0.07289886474609375, 0.07556629180908203, 0.07823371887207031, 0.0809011459350586, 0.08356857299804688, 0.08623600006103516, 0.08890342712402344, 0.09157085418701172, 0.09423828125]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 5.0, 4.0, 5.0, 4.0, 19.0, 23.0, 45.0, 72.0, 96.0, 101.0, 140.0, 123.0, 98.0, 73.0, 74.0, 33.0, 35.0, 13.0, 12.0, 4.0, 7.0, 8.0, 2.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.023956298828125, -0.02310347557067871, -0.022250652313232422, -0.021397829055786133, -0.020545005798339844, -0.019692182540893555, -0.018839359283447266, -0.017986536026000977, -0.017133712768554688, -0.0162808895111084, -0.01542806625366211, -0.01457524299621582, -0.013722419738769531, -0.012869596481323242, -0.012016773223876953, -0.011163949966430664, -0.010311126708984375, -0.009458303451538086, -0.008605480194091797, -0.007752656936645508, -0.006899833679199219, -0.00604701042175293, -0.005194187164306641, -0.0043413639068603516, -0.0034885406494140625, -0.0026357173919677734, -0.0017828941345214844, -0.0009300708770751953, -7.724761962890625e-05, 0.0007755756378173828, 0.0016283988952636719, 0.002481222152709961, 0.00333404541015625, 0.004186868667602539, 0.005039691925048828, 0.005892515182495117, 0.006745338439941406, 0.007598161697387695, 0.008450984954833984, 0.009303808212280273, 0.010156631469726562, 0.011009454727172852, 0.01186227798461914, 0.01271510124206543, 0.013567924499511719, 0.014420747756958008, 0.015273571014404297, 0.016126394271850586, 0.016979217529296875, 0.017832040786743164, 0.018684864044189453, 0.019537687301635742, 0.02039051055908203, 0.02124333381652832, 0.02209615707397461, 0.0229489803314209, 0.023801803588867188, 0.024654626846313477, 0.025507450103759766, 0.026360273361206055, 0.027213096618652344, 0.028065919876098633, 0.028918743133544922, 0.02977156639099121, 0.0306243896484375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 6.0, 4.0, 5.0, 5.0, 8.0, 10.0, 8.0, 10.0, 10.0, 23.0, 25.0, 26.0, 42.0, 75.0, 96.0, 145.0, 309.0, 576.0, 1287.0, 3426.0, 12245.0, 61878.0, 631823.0, 288826.0, 35447.0, 7801.0, 2366.0, 971.0, 440.0, 233.0, 128.0, 82.0, 57.0, 39.0, 31.0, 17.0, 17.0, 17.0, 8.0, 8.0, 5.0, 9.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.08526611328125, -0.08250141143798828, -0.07973670959472656, -0.07697200775146484, -0.07420730590820312, -0.0714426040649414, -0.06867790222167969, -0.06591320037841797, -0.06314849853515625, -0.06038379669189453, -0.05761909484863281, -0.054854393005371094, -0.052089691162109375, -0.049324989318847656, -0.04656028747558594, -0.04379558563232422, -0.0410308837890625, -0.03826618194580078, -0.03550148010253906, -0.032736778259277344, -0.029972076416015625, -0.027207374572753906, -0.024442672729492188, -0.02167797088623047, -0.01891326904296875, -0.01614856719970703, -0.013383865356445312, -0.010619163513183594, -0.007854461669921875, -0.005089759826660156, -0.0023250579833984375, 0.00043964385986328125, 0.003204345703125, 0.005969047546386719, 0.008733749389648438, 0.011498451232910156, 0.014263153076171875, 0.017027854919433594, 0.019792556762695312, 0.02255725860595703, 0.02532196044921875, 0.02808666229248047, 0.030851364135742188, 0.033616065979003906, 0.036380767822265625, 0.039145469665527344, 0.04191017150878906, 0.04467487335205078, 0.0474395751953125, 0.05020427703857422, 0.05296897888183594, 0.055733680725097656, 0.058498382568359375, 0.061263084411621094, 0.06402778625488281, 0.06679248809814453, 0.06955718994140625, 0.07232189178466797, 0.07508659362792969, 0.0778512954711914, 0.08061599731445312, 0.08338069915771484, 0.08614540100097656, 0.08891010284423828, 0.0916748046875]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 0.0, 3.0, 1.0, 8.0, 4.0, 7.0, 8.0, 9.0, 15.0, 14.0, 15.0, 19.0, 22.0, 22.0, 27.0, 36.0, 39.0, 43.0, 36.0, 43.0, 46.0, 37.0, 42.0, 58.0, 38.0, 35.0, 28.0, 27.0, 53.0, 47.0, 33.0, 26.0, 33.0, 19.0, 14.0, 17.0, 16.0, 9.0, 7.0, 7.0, 12.0, 6.0, 5.0, 6.0, 3.0, 5.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.0513916015625, -0.04983806610107422, -0.04828453063964844, -0.046730995178222656, -0.045177459716796875, -0.043623924255371094, -0.04207038879394531, -0.04051685333251953, -0.03896331787109375, -0.03740978240966797, -0.03585624694824219, -0.034302711486816406, -0.032749176025390625, -0.031195640563964844, -0.029642105102539062, -0.02808856964111328, -0.0265350341796875, -0.02498149871826172, -0.023427963256835938, -0.021874427795410156, -0.020320892333984375, -0.018767356872558594, -0.017213821411132812, -0.01566028594970703, -0.01410675048828125, -0.012553215026855469, -0.010999679565429688, -0.009446144104003906, -0.007892608642578125, -0.006339073181152344, -0.0047855377197265625, -0.0032320022583007812, -0.001678466796875, -0.00012493133544921875, 0.0014286041259765625, 0.0029821395874023438, 0.004535675048828125, 0.006089210510253906, 0.0076427459716796875, 0.009196281433105469, 0.01074981689453125, 0.012303352355957031, 0.013856887817382812, 0.015410423278808594, 0.016963958740234375, 0.018517494201660156, 0.020071029663085938, 0.02162456512451172, 0.0231781005859375, 0.02473163604736328, 0.026285171508789062, 0.027838706970214844, 0.029392242431640625, 0.030945777893066406, 0.03249931335449219, 0.03405284881591797, 0.03560638427734375, 0.03715991973876953, 0.03871345520019531, 0.040266990661621094, 0.041820526123046875, 0.043374061584472656, 0.04492759704589844, 0.04648113250732422, 0.04803466796875]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 7.0, 14.0, 13.0, 30.0, 36.0, 38.0, 87.0, 127.0, 288.0, 639.0, 1978.0, 9520.0, 147590.0, 844439.0, 37110.0, 4493.0, 1187.0, 437.0, 211.0, 116.0, 70.0, 34.0, 23.0, 19.0, 11.0, 12.0, 4.0, 4.0, 5.0, 2.0, 1.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0171661376953125, -0.016617536544799805, -0.01606893539428711, -0.015520334243774414, -0.014971733093261719, -0.014423131942749023, -0.013874530792236328, -0.013325929641723633, -0.012777328491210938, -0.012228727340698242, -0.011680126190185547, -0.011131525039672852, -0.010582923889160156, -0.010034322738647461, -0.009485721588134766, -0.00893712043762207, -0.008388519287109375, -0.00783991813659668, -0.007291316986083984, -0.006742715835571289, -0.006194114685058594, -0.0056455135345458984, -0.005096912384033203, -0.004548311233520508, -0.0039997100830078125, -0.003451108932495117, -0.002902507781982422, -0.0023539066314697266, -0.0018053054809570312, -0.001256704330444336, -0.0007081031799316406, -0.0001595020294189453, 0.00038909912109375, 0.0009377002716064453, 0.0014863014221191406, 0.002034902572631836, 0.0025835037231445312, 0.0031321048736572266, 0.003680706024169922, 0.004229307174682617, 0.0047779083251953125, 0.005326509475708008, 0.005875110626220703, 0.0064237117767333984, 0.006972312927246094, 0.007520914077758789, 0.008069515228271484, 0.00861811637878418, 0.009166717529296875, 0.00971531867980957, 0.010263919830322266, 0.010812520980834961, 0.011361122131347656, 0.011909723281860352, 0.012458324432373047, 0.013006925582885742, 0.013555526733398438, 0.014104127883911133, 0.014652729034423828, 0.015201330184936523, 0.01574993133544922, 0.016298532485961914, 0.01684713363647461, 0.017395734786987305, 0.0179443359375]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 8.0, 5.0, 8.0, 4.0, 8.0, 14.0, 15.0, 16.0, 15.0, 12.0, 39.0, 30.0, 36.0, 38.0, 43.0, 67.0, 54.0, 43.0, 46.0, 50.0, 70.0, 51.0, 44.0, 37.0, 36.0, 52.0, 23.0, 31.0, 12.0, 22.0, 15.0, 12.0, 11.0, 4.0, 3.0, 10.0, 4.0, 7.0, 5.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.291534423828125e-06, -4.1602179408073425e-06, -4.02890145778656e-06, -3.897584974765778e-06, -3.766268491744995e-06, -3.6349520087242126e-06, -3.50363552570343e-06, -3.3723190426826477e-06, -3.2410025596618652e-06, -3.1096860766410828e-06, -2.9783695936203003e-06, -2.847053110599518e-06, -2.7157366275787354e-06, -2.584420144557953e-06, -2.4531036615371704e-06, -2.321787178516388e-06, -2.1904706954956055e-06, -2.059154212474823e-06, -1.9278377294540405e-06, -1.796521246433258e-06, -1.6652047634124756e-06, -1.5338882803916931e-06, -1.4025717973709106e-06, -1.2712553143501282e-06, -1.1399388313293457e-06, -1.0086223483085632e-06, -8.773058652877808e-07, -7.459893822669983e-07, -6.146728992462158e-07, -4.833564162254333e-07, -3.520399332046509e-07, -2.207234501838684e-07, -8.940696716308594e-08, 4.190951585769653e-08, 1.73225998878479e-07, 3.045424818992615e-07, 4.3585896492004395e-07, 5.671754479408264e-07, 6.984919309616089e-07, 8.298084139823914e-07, 9.611248970031738e-07, 1.0924413800239563e-06, 1.2237578630447388e-06, 1.3550743460655212e-06, 1.4863908290863037e-06, 1.6177073121070862e-06, 1.7490237951278687e-06, 1.8803402781486511e-06, 2.0116567611694336e-06, 2.142973244190216e-06, 2.2742897272109985e-06, 2.405606210231781e-06, 2.5369226932525635e-06, 2.668239176273346e-06, 2.7995556592941284e-06, 2.930872142314911e-06, 3.0621886253356934e-06, 3.193505108356476e-06, 3.3248215913772583e-06, 3.4561380743980408e-06, 3.5874545574188232e-06, 3.7187710404396057e-06, 3.850087523460388e-06, 3.981404006481171e-06, 4.112720489501953e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 7.0, 6.0, 7.0, 7.0, 18.0, 25.0, 55.0, 68.0, 115.0, 266.0, 584.0, 1963.0, 10700.0, 212484.0, 791113.0, 26164.0, 3321.0, 920.0, 346.0, 147.0, 97.0, 41.0, 33.0, 19.0, 19.0, 11.0, 7.0, 5.0, 7.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0182342529296875, -0.017667770385742188, -0.017101287841796875, -0.016534805297851562, -0.01596832275390625, -0.015401840209960938, -0.014835357666015625, -0.014268875122070312, -0.013702392578125, -0.013135910034179688, -0.012569427490234375, -0.012002944946289062, -0.01143646240234375, -0.010869979858398438, -0.010303497314453125, -0.009737014770507812, -0.0091705322265625, -0.008604049682617188, -0.008037567138671875, -0.0074710845947265625, -0.00690460205078125, -0.0063381195068359375, -0.005771636962890625, -0.0052051544189453125, -0.004638671875, -0.0040721893310546875, -0.003505706787109375, -0.0029392242431640625, -0.00237274169921875, -0.0018062591552734375, -0.001239776611328125, -0.0006732940673828125, -0.0001068115234375, 0.0004596710205078125, 0.001026153564453125, 0.0015926361083984375, 0.00215911865234375, 0.0027256011962890625, 0.003292083740234375, 0.0038585662841796875, 0.004425048828125, 0.0049915313720703125, 0.005558013916015625, 0.0061244964599609375, 0.00669097900390625, 0.0072574615478515625, 0.007823944091796875, 0.008390426635742188, 0.0089569091796875, 0.009523391723632812, 0.010089874267578125, 0.010656356811523438, 0.01122283935546875, 0.011789321899414062, 0.012355804443359375, 0.012922286987304688, 0.01348876953125, 0.014055252075195312, 0.014621734619140625, 0.015188217163085938, 0.01575469970703125, 0.016321182250976562, 0.016887664794921875, 0.017454147338867188, 0.0180206298828125]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 5.0, 2.0, 11.0, 2.0, 14.0, 14.0, 18.0, 37.0, 31.0, 69.0, 59.0, 85.0, 90.0, 96.0, 107.0, 85.0, 72.0, 51.0, 49.0, 36.0, 20.0, 21.0, 10.0, 6.0, 7.0, 2.0, 2.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0107269287109375, -0.010416388511657715, -0.01010584831237793, -0.009795308113098145, -0.00948476791381836, -0.009174227714538574, -0.008863687515258789, -0.008553147315979004, -0.008242607116699219, -0.007932066917419434, -0.0076215267181396484, -0.007310986518859863, -0.007000446319580078, -0.006689906120300293, -0.006379365921020508, -0.006068825721740723, -0.0057582855224609375, -0.005447745323181152, -0.005137205123901367, -0.004826664924621582, -0.004516124725341797, -0.004205584526062012, -0.0038950443267822266, -0.0035845041275024414, -0.0032739639282226562, -0.002963423728942871, -0.002652883529663086, -0.0023423433303833008, -0.0020318031311035156, -0.0017212629318237305, -0.0014107227325439453, -0.0011001825332641602, -0.000789642333984375, -0.00047910213470458984, -0.0001685619354248047, 0.00014197826385498047, 0.0004525184631347656, 0.0007630586624145508, 0.001073598861694336, 0.001384139060974121, 0.0016946792602539062, 0.0020052194595336914, 0.0023157596588134766, 0.0026262998580932617, 0.002936840057373047, 0.003247380256652832, 0.003557920455932617, 0.0038684606552124023, 0.0041790008544921875, 0.004489541053771973, 0.004800081253051758, 0.005110621452331543, 0.005421161651611328, 0.005731701850891113, 0.0060422420501708984, 0.006352782249450684, 0.006663322448730469, 0.006973862648010254, 0.007284402847290039, 0.007594943046569824, 0.00790548324584961, 0.008216023445129395, 0.00852656364440918, 0.008837103843688965, 0.00914764404296875]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 6.0, 7.0, 16.0, 40.0, 70.0, 178.0, 395.0, 181.0, 67.0, 31.0, 11.0, 4.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7243878841400146, -0.704513669013977, -0.6846395134925842, -0.6647653579711914, -0.6448911428451538, -0.6250169277191162, -0.6051427721977234, -0.5852686166763306, -0.565394401550293, -0.5455201864242554, -0.5256460309028625, -0.5057718753814697, -0.48589766025543213, -0.4660234749317169, -0.4461492896080017, -0.4262751042842865, -0.4064009189605713, -0.3865267336368561, -0.36665254831314087, -0.34677836298942566, -0.32690417766571045, -0.30702999234199524, -0.28715580701828003, -0.2672816216945648, -0.2474074363708496, -0.2275332510471344, -0.2076590657234192, -0.18778488039970398, -0.16791069507598877, -0.14803650975227356, -0.12816232442855835, -0.10828813910484314, -0.0884140133857727, -0.0685398280620575, -0.048665642738342285, -0.028791457414627075, -0.008917272090911865, 0.010956913232803345, 0.030831098556518555, 0.050705283880233765, 0.07057946920394897, 0.09045365452766418, 0.1103278398513794, 0.1302020251750946, 0.15007621049880981, 0.16995039582252502, 0.18982458114624023, 0.20969876646995544, 0.22957295179367065, 0.24944713711738586, 0.2693213224411011, 0.2891955077648163, 0.3090696930885315, 0.3289438784122467, 0.3488180637359619, 0.3686922490596771, 0.38856643438339233, 0.40844061970710754, 0.42831480503082275, 0.44818899035453796, 0.4680631756782532, 0.4879373610019684, 0.5078115463256836, 0.5276857614517212, 0.547559916973114]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 5.0, 3.0, 3.0, 3.0, 10.0, 10.0, 9.0, 8.0, 14.0, 16.0, 18.0, 25.0, 27.0, 21.0, 28.0, 33.0, 38.0, 26.0, 37.0, 47.0, 50.0, 48.0, 48.0, 51.0, 54.0, 38.0, 35.0, 34.0, 32.0, 42.0, 34.0, 22.0, 20.0, 24.0, 18.0, 15.0, 15.0, 26.0, 7.0, 3.0, 3.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.252676784992218, -0.24439114332199097, -0.23610550165176392, -0.22781985998153687, -0.21953421831130981, -0.21124857664108276, -0.20296292006969452, -0.19467727839946747, -0.18639163672924042, -0.17810599505901337, -0.16982035338878632, -0.16153471171855927, -0.15324905514717102, -0.14496341347694397, -0.13667777180671692, -0.12839213013648987, -0.12010648846626282, -0.11182084679603577, -0.10353520512580872, -0.09524955600500107, -0.08696391433477402, -0.07867827266454697, -0.07039262354373932, -0.06210698187351227, -0.05382134020328522, -0.045535698533058167, -0.03725005313754082, -0.028964409604668617, -0.020678766071796417, -0.012393124401569366, -0.004107479006052017, 0.004178166389465332, 0.012463808059692383, 0.020749451592564583, 0.029035095125436783, 0.03732074052095413, 0.04560638219118118, 0.053892023861408234, 0.06217766925692558, 0.07046331465244293, 0.07874895632266998, 0.08703459799289703, 0.09532023966312408, 0.10360588878393173, 0.11189153045415878, 0.12017717212438583, 0.12846282124519348, 0.13674846291542053, 0.14503410458564758, 0.15331974625587463, 0.16160538792610168, 0.16989102959632874, 0.1781766712665558, 0.18646231293678284, 0.19474796950817108, 0.20303361117839813, 0.21131925284862518, 0.21960489451885223, 0.22789053618907928, 0.23617617785930634, 0.24446183443069458, 0.25274747610092163, 0.2610331177711487, 0.26931875944137573, 0.2776044011116028]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 4.0, 1.0, 3.0, 5.0, 10.0, 2.0, 8.0, 9.0, 21.0, 27.0, 33.0, 47.0, 114.0, 144.0, 323.0, 619.0, 1587.0, 5338.0, 54285.0, 4107648.0, 18208.0, 3464.0, 1150.0, 499.0, 276.0, 136.0, 102.0, 55.0, 46.0, 32.0, 24.0, 22.0, 9.0, 3.0, 7.0, 8.0, 1.0, 5.0, 6.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0697021484375, -0.06705379486083984, -0.06440544128417969, -0.06175708770751953, -0.059108734130859375, -0.05646038055419922, -0.05381202697753906, -0.051163673400878906, -0.04851531982421875, -0.045866966247558594, -0.04321861267089844, -0.04057025909423828, -0.037921905517578125, -0.03527355194091797, -0.03262519836425781, -0.029976844787597656, -0.0273284912109375, -0.024680137634277344, -0.022031784057617188, -0.01938343048095703, -0.016735076904296875, -0.014086723327636719, -0.011438369750976562, -0.008790016174316406, -0.00614166259765625, -0.0034933090209960938, -0.0008449554443359375, 0.0018033981323242188, 0.004451751708984375, 0.007100105285644531, 0.009748458862304688, 0.012396812438964844, 0.015045166015625, 0.017693519592285156, 0.020341873168945312, 0.02299022674560547, 0.025638580322265625, 0.02828693389892578, 0.030935287475585938, 0.033583641052246094, 0.03623199462890625, 0.038880348205566406, 0.04152870178222656, 0.04417705535888672, 0.046825408935546875, 0.04947376251220703, 0.05212211608886719, 0.054770469665527344, 0.0574188232421875, 0.060067176818847656, 0.06271553039550781, 0.06536388397216797, 0.06801223754882812, 0.07066059112548828, 0.07330894470214844, 0.0759572982788086, 0.07860565185546875, 0.0812540054321289, 0.08390235900878906, 0.08655071258544922, 0.08919906616210938, 0.09184741973876953, 0.09449577331542969, 0.09714412689208984, 0.09979248046875]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 6.0, 4.0, 3.0, 11.0, 19.0, 27.0, 55.0, 86.0, 113.0, 136.0, 127.0, 107.0, 97.0, 70.0, 48.0, 34.0, 23.0, 9.0, 5.0, 8.0, 7.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.023681640625, -0.022788047790527344, -0.021894454956054688, -0.02100086212158203, -0.020107269287109375, -0.01921367645263672, -0.018320083618164062, -0.017426490783691406, -0.01653289794921875, -0.015639305114746094, -0.014745712280273438, -0.013852119445800781, -0.012958526611328125, -0.012064933776855469, -0.011171340942382812, -0.010277748107910156, -0.0093841552734375, -0.008490562438964844, -0.0075969696044921875, -0.006703376770019531, -0.005809783935546875, -0.004916191101074219, -0.0040225982666015625, -0.0031290054321289062, -0.00223541259765625, -0.0013418197631835938, -0.0004482269287109375, 0.00044536590576171875, 0.001338958740234375, 0.0022325515747070312, 0.0031261444091796875, 0.004019737243652344, 0.004913330078125, 0.005806922912597656, 0.0067005157470703125, 0.007594108581542969, 0.008487701416015625, 0.009381294250488281, 0.010274887084960938, 0.011168479919433594, 0.01206207275390625, 0.012955665588378906, 0.013849258422851562, 0.014742851257324219, 0.015636444091796875, 0.01653003692626953, 0.017423629760742188, 0.018317222595214844, 0.0192108154296875, 0.020104408264160156, 0.020998001098632812, 0.02189159393310547, 0.022785186767578125, 0.02367877960205078, 0.024572372436523438, 0.025465965270996094, 0.02635955810546875, 0.027253150939941406, 0.028146743774414062, 0.02904033660888672, 0.029933929443359375, 0.03082752227783203, 0.03172111511230469, 0.032614707946777344, 0.03350830078125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 7.0, 18.0, 25.0, 56.0, 114.0, 258.0, 589.0, 1634.0, 6010.0, 97890.0, 4072009.0, 11344.0, 2670.0, 966.0, 349.0, 165.0, 84.0, 43.0, 20.0, 9.0, 8.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.130859375, -0.12713909149169922, -0.12341880798339844, -0.11969852447509766, -0.11597824096679688, -0.1122579574584961, -0.10853767395019531, -0.10481739044189453, -0.10109710693359375, -0.09737682342529297, -0.09365653991699219, -0.0899362564086914, -0.08621597290039062, -0.08249568939208984, -0.07877540588378906, -0.07505512237548828, -0.0713348388671875, -0.06761455535888672, -0.06389427185058594, -0.060173988342285156, -0.056453704833984375, -0.052733421325683594, -0.04901313781738281, -0.04529285430908203, -0.04157257080078125, -0.03785228729248047, -0.03413200378417969, -0.030411720275878906, -0.026691436767578125, -0.022971153259277344, -0.019250869750976562, -0.015530586242675781, -0.011810302734375, -0.008090019226074219, -0.0043697357177734375, -0.0006494522094726562, 0.003070831298828125, 0.006791114807128906, 0.010511398315429688, 0.014231681823730469, 0.01795196533203125, 0.02167224884033203, 0.025392532348632812, 0.029112815856933594, 0.032833099365234375, 0.036553382873535156, 0.04027366638183594, 0.04399394989013672, 0.0477142333984375, 0.05143451690673828, 0.05515480041503906, 0.058875083923339844, 0.06259536743164062, 0.0663156509399414, 0.07003593444824219, 0.07375621795654297, 0.07747650146484375, 0.08119678497314453, 0.08491706848144531, 0.0886373519897461, 0.09235763549804688, 0.09607791900634766, 0.09979820251464844, 0.10351848602294922, 0.10723876953125]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 2.0, 15.0, 15.0, 59.0, 214.0, 3451.0, 187.0, 51.0, 22.0, 13.0, 8.0, 5.0, 4.0, 4.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.030029296875, -0.028975486755371094, -0.027921676635742188, -0.02686786651611328, -0.025814056396484375, -0.02476024627685547, -0.023706436157226562, -0.022652626037597656, -0.02159881591796875, -0.020545005798339844, -0.019491195678710938, -0.01843738555908203, -0.017383575439453125, -0.01632976531982422, -0.015275955200195312, -0.014222145080566406, -0.0131683349609375, -0.012114524841308594, -0.011060714721679688, -0.010006904602050781, -0.008953094482421875, -0.007899284362792969, -0.0068454742431640625, -0.005791664123535156, -0.00473785400390625, -0.0036840438842773438, -0.0026302337646484375, -0.0015764236450195312, -0.000522613525390625, 0.0005311965942382812, 0.0015850067138671875, 0.0026388168334960938, 0.003692626953125, 0.004746437072753906, 0.0058002471923828125, 0.006854057312011719, 0.007907867431640625, 0.008961677551269531, 0.010015487670898438, 0.011069297790527344, 0.01212310791015625, 0.013176918029785156, 0.014230728149414062, 0.015284538269042969, 0.016338348388671875, 0.01739215850830078, 0.018445968627929688, 0.019499778747558594, 0.0205535888671875, 0.021607398986816406, 0.022661209106445312, 0.02371501922607422, 0.024768829345703125, 0.02582263946533203, 0.026876449584960938, 0.027930259704589844, 0.02898406982421875, 0.030037879943847656, 0.031091690063476562, 0.03214550018310547, 0.033199310302734375, 0.03425312042236328, 0.03530693054199219, 0.036360740661621094, 0.03741455078125]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0, 11.0, 44.0, 234.0, 507.0, 142.0, 38.0, 9.0, 7.0, 9.0, 1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16338157653808594, -0.15573081374168396, -0.14808006584644318, -0.1404293179512024, -0.13277855515480042, -0.12512779235839844, -0.11747704446315765, -0.10982628911733627, -0.10217553377151489, -0.09452477842569351, -0.08687402307987213, -0.07922326773405075, -0.07157251238822937, -0.06392175704240799, -0.05627100169658661, -0.04862024635076523, -0.04096949100494385, -0.03331873565912247, -0.025667980313301086, -0.018017224967479706, -0.010366469621658325, -0.0027157142758369446, 0.004935041069984436, 0.012585796415805817, 0.020236551761627197, 0.027887307107448578, 0.03553806245326996, 0.04318881779909134, 0.05083957314491272, 0.0584903284907341, 0.06614108383655548, 0.07379183918237686, 0.08144259452819824, 0.08909334987401962, 0.096744105219841, 0.10439486056566238, 0.11204561591148376, 0.11969637125730515, 0.12734712660312653, 0.1349978744983673, 0.1426486372947693, 0.15029940009117126, 0.15795014798641205, 0.16560089588165283, 0.1732516586780548, 0.1809024214744568, 0.18855316936969757, 0.19620391726493835, 0.20385468006134033, 0.2115054428577423, 0.2191561907529831, 0.22680693864822388, 0.23445770144462585, 0.24210846424102783, 0.24975921213626862, 0.2574099600315094, 0.2650607228279114, 0.27271148562431335, 0.28036224842071533, 0.2880129814147949, 0.2956637442111969, 0.3033145070075989, 0.31096524000167847, 0.31861600279808044, 0.3262667655944824]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 7.0, 5.0, 6.0, 6.0, 17.0, 14.0, 16.0, 18.0, 27.0, 32.0, 34.0, 41.0, 54.0, 57.0, 56.0, 80.0, 60.0, 50.0, 56.0, 73.0, 57.0, 41.0, 31.0, 37.0, 31.0, 18.0, 20.0, 10.0, 10.0, 21.0, 8.0, 2.0, 2.0, 5.0, 4.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.06494659185409546, -0.06270711123943329, -0.06046763062477112, -0.05822815001010895, -0.05598866939544678, -0.05374918878078461, -0.051509708166122437, -0.049270227551460266, -0.047030746936798096, -0.044791266322135925, -0.042551785707473755, -0.040312305092811584, -0.038072824478149414, -0.035833343863487244, -0.03359386324882507, -0.0313543826341629, -0.029114902019500732, -0.026875421404838562, -0.02463594079017639, -0.02239646017551422, -0.02015697956085205, -0.01791749894618988, -0.01567801833152771, -0.01343853771686554, -0.01119905710220337, -0.008959576487541199, -0.006720095872879028, -0.004480615258216858, -0.0022411346435546875, -1.6540288925170898e-06, 0.0022378265857696533, 0.004477307200431824, 0.006716787815093994, 0.008956268429756165, 0.011195749044418335, 0.013435229659080505, 0.015674710273742676, 0.017914190888404846, 0.020153671503067017, 0.022393152117729187, 0.024632632732391357, 0.026872113347053528, 0.029111593961715698, 0.03135107457637787, 0.03359055519104004, 0.03583003580570221, 0.03806951642036438, 0.04030899703502655, 0.04254847764968872, 0.04478795826435089, 0.04702743887901306, 0.04926691949367523, 0.0515064001083374, 0.05374588072299957, 0.05598536133766174, 0.058224841952323914, 0.060464322566986084, 0.06270380318164825, 0.06494328379631042, 0.0671827644109726, 0.06942224502563477, 0.07166172564029694, 0.0739012062549591, 0.07614068686962128, 0.07838016748428345]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 1.0, 4.0, 5.0, 6.0, 19.0, 23.0, 26.0, 42.0, 72.0, 106.0, 157.0, 251.0, 430.0, 764.0, 1430.0, 3149.0, 9776.0, 53552.0, 704823.0, 239038.0, 24099.0, 5842.0, 2315.0, 1078.0, 609.0, 330.0, 200.0, 148.0, 81.0, 57.0, 39.0, 22.0, 18.0, 15.0, 9.0, 6.0, 4.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0911865234375, -0.08812618255615234, -0.08506584167480469, -0.08200550079345703, -0.07894515991210938, -0.07588481903076172, -0.07282447814941406, -0.0697641372680664, -0.06670379638671875, -0.0636434555053711, -0.06058311462402344, -0.05752277374267578, -0.054462432861328125, -0.05140209197998047, -0.04834175109863281, -0.045281410217285156, -0.0422210693359375, -0.039160728454589844, -0.03610038757324219, -0.03304004669189453, -0.029979705810546875, -0.02691936492919922, -0.023859024047851562, -0.020798683166503906, -0.01773834228515625, -0.014678001403808594, -0.011617660522460938, -0.008557319641113281, -0.005496978759765625, -0.0024366378784179688, 0.0006237030029296875, 0.0036840438842773438, 0.006744384765625, 0.009804725646972656, 0.012865066528320312, 0.01592540740966797, 0.018985748291015625, 0.02204608917236328, 0.025106430053710938, 0.028166770935058594, 0.03122711181640625, 0.034287452697753906, 0.03734779357910156, 0.04040813446044922, 0.043468475341796875, 0.04652881622314453, 0.04958915710449219, 0.052649497985839844, 0.0557098388671875, 0.058770179748535156, 0.06183052062988281, 0.06489086151123047, 0.06795120239257812, 0.07101154327392578, 0.07407188415527344, 0.0771322250366211, 0.08019256591796875, 0.0832529067993164, 0.08631324768066406, 0.08937358856201172, 0.09243392944335938, 0.09549427032470703, 0.09855461120605469, 0.10161495208740234, 0.10467529296875]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 4.0, 4.0, 7.0, 1.0, 16.0, 20.0, 48.0, 66.0, 102.0, 131.0, 148.0, 119.0, 108.0, 91.0, 48.0, 32.0, 22.0, 13.0, 8.0, 6.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0230255126953125, -0.022099733352661133, -0.021173954010009766, -0.0202481746673584, -0.01932239532470703, -0.018396615982055664, -0.017470836639404297, -0.01654505729675293, -0.015619277954101562, -0.014693498611450195, -0.013767719268798828, -0.012841939926147461, -0.011916160583496094, -0.010990381240844727, -0.01006460189819336, -0.009138822555541992, -0.008213043212890625, -0.007287263870239258, -0.006361484527587891, -0.0054357051849365234, -0.004509925842285156, -0.003584146499633789, -0.002658367156982422, -0.0017325878143310547, -0.0008068084716796875, 0.00011897087097167969, 0.0010447502136230469, 0.001970529556274414, 0.0028963088989257812, 0.0038220882415771484, 0.004747867584228516, 0.005673646926879883, 0.00659942626953125, 0.007525205612182617, 0.008450984954833984, 0.009376764297485352, 0.010302543640136719, 0.011228322982788086, 0.012154102325439453, 0.01307988166809082, 0.014005661010742188, 0.014931440353393555, 0.015857219696044922, 0.01678299903869629, 0.017708778381347656, 0.018634557723999023, 0.01956033706665039, 0.020486116409301758, 0.021411895751953125, 0.022337675094604492, 0.02326345443725586, 0.024189233779907227, 0.025115013122558594, 0.02604079246520996, 0.026966571807861328, 0.027892351150512695, 0.028818130493164062, 0.02974390983581543, 0.030669689178466797, 0.031595468521118164, 0.03252124786376953, 0.0334470272064209, 0.034372806549072266, 0.03529858589172363, 0.036224365234375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 0.0, 5.0, 5.0, 2.0, 7.0, 7.0, 5.0, 15.0, 14.0, 17.0, 26.0, 22.0, 17.0, 33.0, 27.0, 52.0, 47.0, 58.0, 89.0, 137.0, 231.0, 531.0, 1438.0, 4751.0, 17000.0, 71252.0, 490960.0, 383064.0, 58183.0, 14153.0, 3990.0, 1184.0, 425.0, 236.0, 124.0, 73.0, 64.0, 47.0, 34.0, 43.0, 37.0, 20.0, 21.0, 24.0, 14.0, 14.0, 11.0, 9.0, 10.0, 7.0, 2.0, 9.0, 4.0, 5.0, 4.0, 2.0, 0.0, 2.0], "bins": [-0.0633544921875, -0.061430931091308594, -0.05950736999511719, -0.05758380889892578, -0.055660247802734375, -0.05373668670654297, -0.05181312561035156, -0.049889564514160156, -0.04796600341796875, -0.046042442321777344, -0.04411888122558594, -0.04219532012939453, -0.040271759033203125, -0.03834819793701172, -0.03642463684082031, -0.034501075744628906, -0.0325775146484375, -0.030653953552246094, -0.028730392456054688, -0.02680683135986328, -0.024883270263671875, -0.02295970916748047, -0.021036148071289062, -0.019112586975097656, -0.01718902587890625, -0.015265464782714844, -0.013341903686523438, -0.011418342590332031, -0.009494781494140625, -0.007571220397949219, -0.0056476593017578125, -0.0037240982055664062, -0.001800537109375, 0.00012302398681640625, 0.0020465850830078125, 0.003970146179199219, 0.005893707275390625, 0.007817268371582031, 0.009740829467773438, 0.011664390563964844, 0.01358795166015625, 0.015511512756347656, 0.017435073852539062, 0.01935863494873047, 0.021282196044921875, 0.02320575714111328, 0.025129318237304688, 0.027052879333496094, 0.0289764404296875, 0.030900001525878906, 0.03282356262207031, 0.03474712371826172, 0.036670684814453125, 0.03859424591064453, 0.04051780700683594, 0.042441368103027344, 0.04436492919921875, 0.046288490295410156, 0.04821205139160156, 0.05013561248779297, 0.052059173583984375, 0.05398273468017578, 0.05590629577636719, 0.057829856872558594, 0.05975341796875]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 5.0, 3.0, 7.0, 6.0, 9.0, 4.0, 9.0, 13.0, 13.0, 11.0, 13.0, 10.0, 17.0, 23.0, 21.0, 21.0, 19.0, 36.0, 27.0, 26.0, 34.0, 28.0, 28.0, 29.0, 35.0, 32.0, 36.0, 37.0, 38.0, 32.0, 33.0, 30.0, 39.0, 23.0, 27.0, 30.0, 22.0, 26.0, 19.0, 17.0, 17.0, 20.0, 14.0, 7.0, 21.0, 6.0, 11.0, 3.0, 5.0, 5.0, 6.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 2.0], "bins": [-0.036224365234375, -0.035050392150878906, -0.03387641906738281, -0.03270244598388672, -0.031528472900390625, -0.03035449981689453, -0.029180526733398438, -0.028006553649902344, -0.02683258056640625, -0.025658607482910156, -0.024484634399414062, -0.02331066131591797, -0.022136688232421875, -0.02096271514892578, -0.019788742065429688, -0.018614768981933594, -0.0174407958984375, -0.016266822814941406, -0.015092849731445312, -0.013918876647949219, -0.012744903564453125, -0.011570930480957031, -0.010396957397460938, -0.009222984313964844, -0.00804901123046875, -0.006875038146972656, -0.0057010650634765625, -0.004527091979980469, -0.003353118896484375, -0.0021791458129882812, -0.0010051727294921875, 0.00016880035400390625, 0.0013427734375, 0.0025167465209960938, 0.0036907196044921875, 0.004864692687988281, 0.006038665771484375, 0.007212638854980469, 0.008386611938476562, 0.009560585021972656, 0.01073455810546875, 0.011908531188964844, 0.013082504272460938, 0.014256477355957031, 0.015430450439453125, 0.01660442352294922, 0.017778396606445312, 0.018952369689941406, 0.0201263427734375, 0.021300315856933594, 0.022474288940429688, 0.02364826202392578, 0.024822235107421875, 0.02599620819091797, 0.027170181274414062, 0.028344154357910156, 0.02951812744140625, 0.030692100524902344, 0.03186607360839844, 0.03304004669189453, 0.034214019775390625, 0.03538799285888672, 0.03656196594238281, 0.037735939025878906, 0.038909912109375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 6.0, 5.0, 4.0, 17.0, 23.0, 31.0, 45.0, 83.0, 182.0, 520.0, 2753.0, 45522.0, 976429.0, 20416.0, 1761.0, 394.0, 152.0, 86.0, 40.0, 41.0, 20.0, 16.0, 6.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0188140869140625, -0.017891645431518555, -0.01696920394897461, -0.016046762466430664, -0.015124320983886719, -0.014201879501342773, -0.013279438018798828, -0.012356996536254883, -0.011434555053710938, -0.010512113571166992, -0.009589672088623047, -0.008667230606079102, -0.007744789123535156, -0.006822347640991211, -0.005899906158447266, -0.00497746467590332, -0.004055023193359375, -0.0031325817108154297, -0.0022101402282714844, -0.001287698745727539, -0.00036525726318359375, 0.0005571842193603516, 0.0014796257019042969, 0.002402067184448242, 0.0033245086669921875, 0.004246950149536133, 0.005169391632080078, 0.0060918331146240234, 0.007014274597167969, 0.007936716079711914, 0.00885915756225586, 0.009781599044799805, 0.01070404052734375, 0.011626482009887695, 0.01254892349243164, 0.013471364974975586, 0.014393806457519531, 0.015316247940063477, 0.016238689422607422, 0.017161130905151367, 0.018083572387695312, 0.019006013870239258, 0.019928455352783203, 0.02085089683532715, 0.021773338317871094, 0.02269577980041504, 0.023618221282958984, 0.02454066276550293, 0.025463104248046875, 0.02638554573059082, 0.027307987213134766, 0.02823042869567871, 0.029152870178222656, 0.0300753116607666, 0.030997753143310547, 0.03192019462585449, 0.03284263610839844, 0.03376507759094238, 0.03468751907348633, 0.03560996055603027, 0.03653240203857422, 0.037454843521118164, 0.03837728500366211, 0.039299726486206055, 0.04022216796875]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 6.0, 7.0, 13.0, 19.0, 42.0, 36.0, 50.0, 83.0, 96.0, 103.0, 116.0, 123.0, 83.0, 89.0, 46.0, 31.0, 15.0, 7.0, 15.0, 5.0, 5.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.1801719665527344e-05, -1.1476688086986542e-05, -1.115165650844574e-05, -1.0826624929904938e-05, -1.0501593351364136e-05, -1.0176561772823334e-05, -9.851530194282532e-06, -9.52649861574173e-06, -9.201467037200928e-06, -8.876435458660126e-06, -8.551403880119324e-06, -8.226372301578522e-06, -7.90134072303772e-06, -7.576309144496918e-06, -7.251277565956116e-06, -6.926245987415314e-06, -6.601214408874512e-06, -6.27618283033371e-06, -5.951151251792908e-06, -5.626119673252106e-06, -5.301088094711304e-06, -4.976056516170502e-06, -4.6510249376297e-06, -4.325993359088898e-06, -4.000961780548096e-06, -3.6759302020072937e-06, -3.3508986234664917e-06, -3.0258670449256897e-06, -2.7008354663848877e-06, -2.3758038878440857e-06, -2.0507723093032837e-06, -1.7257407307624817e-06, -1.4007091522216797e-06, -1.0756775736808777e-06, -7.506459951400757e-07, -4.256144165992737e-07, -1.0058283805847168e-07, 2.2444874048233032e-07, 5.494803190231323e-07, 8.745118975639343e-07, 1.1995434761047363e-06, 1.5245750546455383e-06, 1.8496066331863403e-06, 2.1746382117271423e-06, 2.4996697902679443e-06, 2.8247013688087463e-06, 3.1497329473495483e-06, 3.4747645258903503e-06, 3.7997961044311523e-06, 4.124827682971954e-06, 4.449859261512756e-06, 4.774890840053558e-06, 5.09992241859436e-06, 5.424953997135162e-06, 5.749985575675964e-06, 6.075017154216766e-06, 6.400048732757568e-06, 6.72508031129837e-06, 7.050111889839172e-06, 7.375143468379974e-06, 7.700175046920776e-06, 8.025206625461578e-06, 8.35023820400238e-06, 8.675269782543182e-06, 9.000301361083984e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 6.0, 11.0, 12.0, 28.0, 66.0, 60.0, 161.0, 359.0, 1380.0, 16260.0, 973902.0, 53247.0, 2256.0, 438.0, 161.0, 86.0, 59.0, 29.0, 17.0, 6.0, 2.0, 5.0, 1.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0211944580078125, -0.020367860794067383, -0.019541263580322266, -0.01871466636657715, -0.01788806915283203, -0.017061471939086914, -0.016234874725341797, -0.01540827751159668, -0.014581680297851562, -0.013755083084106445, -0.012928485870361328, -0.012101888656616211, -0.011275291442871094, -0.010448694229125977, -0.00962209701538086, -0.008795499801635742, -0.007968902587890625, -0.007142305374145508, -0.006315708160400391, -0.0054891109466552734, -0.004662513732910156, -0.003835916519165039, -0.003009319305419922, -0.0021827220916748047, -0.0013561248779296875, -0.0005295276641845703, 0.0002970695495605469, 0.001123666763305664, 0.0019502639770507812, 0.0027768611907958984, 0.0036034584045410156, 0.004430055618286133, 0.00525665283203125, 0.006083250045776367, 0.006909847259521484, 0.0077364444732666016, 0.008563041687011719, 0.009389638900756836, 0.010216236114501953, 0.01104283332824707, 0.011869430541992188, 0.012696027755737305, 0.013522624969482422, 0.014349222183227539, 0.015175819396972656, 0.016002416610717773, 0.01682901382446289, 0.017655611038208008, 0.018482208251953125, 0.019308805465698242, 0.02013540267944336, 0.020961999893188477, 0.021788597106933594, 0.02261519432067871, 0.023441791534423828, 0.024268388748168945, 0.025094985961914062, 0.02592158317565918, 0.026748180389404297, 0.027574777603149414, 0.02840137481689453, 0.02922797203063965, 0.030054569244384766, 0.030881166458129883, 0.031707763671875]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 4.0, 4.0, 6.0, 9.0, 14.0, 19.0, 31.0, 68.0, 97.0, 144.0, 112.0, 153.0, 110.0, 93.0, 60.0, 32.0, 15.0, 16.0, 8.0, 6.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.0196380615234375, -0.019227802753448486, -0.018817543983459473, -0.01840728521347046, -0.017997026443481445, -0.01758676767349243, -0.017176508903503418, -0.016766250133514404, -0.01635599136352539, -0.015945732593536377, -0.015535473823547363, -0.01512521505355835, -0.014714956283569336, -0.014304697513580322, -0.013894438743591309, -0.013484179973602295, -0.013073921203613281, -0.012663662433624268, -0.012253403663635254, -0.01184314489364624, -0.011432886123657227, -0.011022627353668213, -0.0106123685836792, -0.010202109813690186, -0.009791851043701172, -0.009381592273712158, -0.008971333503723145, -0.00856107473373413, -0.008150815963745117, -0.0077405571937561035, -0.00733029842376709, -0.006920039653778076, -0.0065097808837890625, -0.006099522113800049, -0.005689263343811035, -0.0052790045738220215, -0.004868745803833008, -0.004458487033843994, -0.0040482282638549805, -0.003637969493865967, -0.003227710723876953, -0.0028174519538879395, -0.0024071931838989258, -0.001996934413909912, -0.0015866756439208984, -0.0011764168739318848, -0.0007661581039428711, -0.0003558993339538574, 5.435943603515625e-05, 0.0004646182060241699, 0.0008748769760131836, 0.0012851357460021973, 0.001695394515991211, 0.0021056532859802246, 0.0025159120559692383, 0.002926170825958252, 0.0033364295959472656, 0.0037466883659362793, 0.004156947135925293, 0.004567205905914307, 0.00497746467590332, 0.005387723445892334, 0.005797982215881348, 0.006208240985870361, 0.006618499755859375]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 10.0, 6.0, 8.0, 15.0, 17.0, 49.0, 73.0, 115.0, 203.0, 198.0, 134.0, 60.0, 36.0, 32.0, 22.0, 5.0, 9.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18146350979804993, -0.17200803756713867, -0.16255256533622742, -0.15309710800647736, -0.1436416357755661, -0.13418616354465485, -0.12473069876432419, -0.11527523398399353, -0.10581976175308228, -0.09636428952217102, -0.08690882474184036, -0.0774533599615097, -0.06799788773059845, -0.05854241922497749, -0.04908695071935654, -0.03963148593902588, -0.030176013708114624, -0.020720545202493668, -0.011265076696872711, -0.0018096081912517548, 0.007645860314369202, 0.017101328819990158, 0.026556797325611115, 0.03601226210594177, 0.04546773433685303, 0.054923202842473984, 0.06437867134809494, 0.0738341361284256, 0.08328960835933685, 0.09274508059024811, 0.10220054537057877, 0.11165601015090942, 0.12111151218414307, 0.13056698441505432, 0.14002245664596558, 0.14947791397571564, 0.1589333862066269, 0.16838885843753815, 0.1778443157672882, 0.18729978799819946, 0.19675526022911072, 0.20621073246002197, 0.21566620469093323, 0.2251216620206833, 0.23457713425159454, 0.2440326064825058, 0.25348806381225586, 0.2629435360431671, 0.27239900827407837, 0.2818544805049896, 0.2913099527359009, 0.30076542496681213, 0.3102208971977234, 0.31967633962631226, 0.3291318118572235, 0.33858728408813477, 0.348042756319046, 0.3574982285499573, 0.36695370078086853, 0.3764091730117798, 0.38586461544036865, 0.3953200876712799, 0.40477555990219116, 0.4142310321331024, 0.42368650436401367]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 1.0, 2.0, 2.0, 3.0, 7.0, 12.0, 14.0, 11.0, 7.0, 13.0, 21.0, 19.0, 27.0, 26.0, 23.0, 36.0, 43.0, 32.0, 46.0, 48.0, 35.0, 43.0, 46.0, 48.0, 58.0, 47.0, 48.0, 44.0, 25.0, 25.0, 29.0, 30.0, 25.0, 20.0, 18.0, 18.0, 9.0, 10.0, 10.0, 11.0, 5.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.24401968717575073, -0.23691141605377197, -0.2298031598329544, -0.22269488871097565, -0.21558663249015808, -0.20847836136817932, -0.20137009024620056, -0.194261834025383, -0.18715357780456543, -0.18004530668258667, -0.1729370504617691, -0.16582877933979034, -0.15872052311897278, -0.15161225199699402, -0.14450398087501526, -0.1373957246541977, -0.13028745353221893, -0.12317918986082077, -0.11607092618942261, -0.10896265506744385, -0.10185439884662628, -0.09474612772464752, -0.08763786405324936, -0.0805296003818512, -0.07342133671045303, -0.06631307303905487, -0.05920480936765671, -0.052096541970968246, -0.044988278299570084, -0.03788001462817192, -0.03077174723148346, -0.023663483560085297, -0.016555219888687134, -0.009446955285966396, -0.002338690683245659, 0.004769574850797653, 0.011877838522195816, 0.01898610219359398, 0.02609436959028244, 0.0332026332616806, 0.040310896933078766, 0.04741916060447693, 0.05452742427587509, 0.06163569167256355, 0.06874395906925201, 0.07585221529006958, 0.08296048641204834, 0.0900687500834465, 0.09717701375484467, 0.10428527742624283, 0.11139354109764099, 0.11850181221961975, 0.12561006844043732, 0.13271833956241608, 0.13982659578323364, 0.1469348669052124, 0.15404313802719116, 0.16115140914916992, 0.1682596653699875, 0.17536793649196625, 0.1824761927127838, 0.18958446383476257, 0.19669273495674133, 0.2038009911775589, 0.21090924739837646]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 4.0, 4.0, 11.0, 10.0, 11.0, 26.0, 25.0, 48.0, 86.0, 157.0, 373.0, 994.0, 4075.0, 3993483.0, 189705.0, 3440.0, 963.0, 391.0, 202.0, 90.0, 51.0, 34.0, 26.0, 18.0, 10.0, 10.0, 8.0, 7.0, 2.0, 4.0, 2.0, 4.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2496337890625, -0.24001502990722656, -0.23039627075195312, -0.2207775115966797, -0.21115875244140625, -0.2015399932861328, -0.19192123413085938, -0.18230247497558594, -0.1726837158203125, -0.16306495666503906, -0.15344619750976562, -0.1438274383544922, -0.13420867919921875, -0.12458992004394531, -0.11497116088867188, -0.10535240173339844, -0.095733642578125, -0.08611488342285156, -0.07649612426757812, -0.06687736511230469, -0.05725860595703125, -0.04763984680175781, -0.038021087646484375, -0.028402328491210938, -0.0187835693359375, -0.009164810180664062, 0.000453948974609375, 0.010072708129882812, 0.01969146728515625, 0.029310226440429688, 0.038928985595703125, 0.04854774475097656, 0.05816650390625, 0.06778526306152344, 0.07740402221679688, 0.08702278137207031, 0.09664154052734375, 0.10626029968261719, 0.11587905883789062, 0.12549781799316406, 0.1351165771484375, 0.14473533630371094, 0.15435409545898438, 0.1639728546142578, 0.17359161376953125, 0.1832103729248047, 0.19282913208007812, 0.20244789123535156, 0.212066650390625, 0.22168540954589844, 0.23130416870117188, 0.2409229278564453, 0.25054168701171875, 0.2601604461669922, 0.2697792053222656, 0.27939796447753906, 0.2890167236328125, 0.29863548278808594, 0.3082542419433594, 0.3178730010986328, 0.32749176025390625, 0.3371105194091797, 0.3467292785644531, 0.35634803771972656, 0.365966796875]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 5.0, 3.0, 4.0, 5.0, 9.0, 20.0, 46.0, 63.0, 113.0, 142.0, 132.0, 121.0, 112.0, 82.0, 50.0, 46.0, 18.0, 14.0, 7.0, 5.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0230712890625, -0.022143840789794922, -0.021216392517089844, -0.020288944244384766, -0.019361495971679688, -0.01843404769897461, -0.01750659942626953, -0.016579151153564453, -0.015651702880859375, -0.014724254608154297, -0.013796806335449219, -0.01286935806274414, -0.011941909790039062, -0.011014461517333984, -0.010087013244628906, -0.009159564971923828, -0.00823211669921875, -0.007304668426513672, -0.006377220153808594, -0.005449771881103516, -0.0045223236083984375, -0.0035948753356933594, -0.0026674270629882812, -0.0017399787902832031, -0.000812530517578125, 0.00011491775512695312, 0.0010423660278320312, 0.0019698143005371094, 0.0028972625732421875, 0.0038247108459472656, 0.004752159118652344, 0.005679607391357422, 0.0066070556640625, 0.007534503936767578, 0.008461952209472656, 0.009389400482177734, 0.010316848754882812, 0.01124429702758789, 0.012171745300292969, 0.013099193572998047, 0.014026641845703125, 0.014954090118408203, 0.01588153839111328, 0.01680898666381836, 0.017736434936523438, 0.018663883209228516, 0.019591331481933594, 0.020518779754638672, 0.02144622802734375, 0.022373676300048828, 0.023301124572753906, 0.024228572845458984, 0.025156021118164062, 0.02608346939086914, 0.02701091766357422, 0.027938365936279297, 0.028865814208984375, 0.029793262481689453, 0.03072071075439453, 0.03164815902709961, 0.03257560729980469, 0.033503055572509766, 0.034430503845214844, 0.03535795211791992, 0.036285400390625]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 16.0, 20.0, 25.0, 39.0, 50.0, 97.0, 153.0, 292.0, 642.0, 1569.0, 5309.0, 31480.0, 4099969.0, 45452.0, 6017.0, 1705.0, 650.0, 308.0, 192.0, 123.0, 67.0, 37.0, 30.0, 18.0, 11.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09661865234375, -0.09335041046142578, -0.09008216857910156, -0.08681392669677734, -0.08354568481445312, -0.0802774429321289, -0.07700920104980469, -0.07374095916748047, -0.07047271728515625, -0.06720447540283203, -0.06393623352050781, -0.060667991638183594, -0.057399749755859375, -0.054131507873535156, -0.05086326599121094, -0.04759502410888672, -0.0443267822265625, -0.04105854034423828, -0.03779029846191406, -0.034522056579589844, -0.031253814697265625, -0.027985572814941406, -0.024717330932617188, -0.02144908905029297, -0.01818084716796875, -0.014912605285644531, -0.011644363403320312, -0.008376121520996094, -0.005107879638671875, -0.0018396377563476562, 0.0014286041259765625, 0.004696846008300781, 0.007965087890625, 0.011233329772949219, 0.014501571655273438, 0.017769813537597656, 0.021038055419921875, 0.024306297302246094, 0.027574539184570312, 0.03084278106689453, 0.03411102294921875, 0.03737926483154297, 0.04064750671386719, 0.043915748596191406, 0.047183990478515625, 0.050452232360839844, 0.05372047424316406, 0.05698871612548828, 0.0602569580078125, 0.06352519989013672, 0.06679344177246094, 0.07006168365478516, 0.07332992553710938, 0.0765981674194336, 0.07986640930175781, 0.08313465118408203, 0.08640289306640625, 0.08967113494873047, 0.09293937683105469, 0.0962076187133789, 0.09947586059570312, 0.10274410247802734, 0.10601234436035156, 0.10928058624267578, 0.112548828125]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 7.0, 6.0, 8.0, 7.0, 12.0, 16.0, 30.0, 96.0, 1036.0, 2678.0, 98.0, 36.0, 20.0, 5.0, 5.0, 5.0, 5.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.040985107421875, -0.039780616760253906, -0.03857612609863281, -0.03737163543701172, -0.036167144775390625, -0.03496265411376953, -0.03375816345214844, -0.032553672790527344, -0.03134918212890625, -0.030144691467285156, -0.028940200805664062, -0.02773571014404297, -0.026531219482421875, -0.02532672882080078, -0.024122238159179688, -0.022917747497558594, -0.0217132568359375, -0.020508766174316406, -0.019304275512695312, -0.01809978485107422, -0.016895294189453125, -0.01569080352783203, -0.014486312866210938, -0.013281822204589844, -0.01207733154296875, -0.010872840881347656, -0.009668350219726562, -0.008463859558105469, -0.007259368896484375, -0.006054878234863281, -0.0048503875732421875, -0.0036458969116210938, -0.00244140625, -0.0012369155883789062, -3.24249267578125e-05, 0.0011720657348632812, 0.002376556396484375, 0.0035810470581054688, 0.0047855377197265625, 0.005990028381347656, 0.00719451904296875, 0.008399009704589844, 0.009603500366210938, 0.010807991027832031, 0.012012481689453125, 0.013216972351074219, 0.014421463012695312, 0.015625953674316406, 0.0168304443359375, 0.018034934997558594, 0.019239425659179688, 0.02044391632080078, 0.021648406982421875, 0.02285289764404297, 0.024057388305664062, 0.025261878967285156, 0.02646636962890625, 0.027670860290527344, 0.028875350952148438, 0.03007984161376953, 0.031284332275390625, 0.03248882293701172, 0.03369331359863281, 0.034897804260253906, 0.036102294921875]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 4.0, 0.0, 0.0, 5.0, 11.0, 17.0, 34.0, 78.0, 298.0, 405.0, 106.0, 25.0, 11.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1965036690235138, -0.1906023621559143, -0.18470105528831482, -0.17879974842071533, -0.17289844155311584, -0.16699713468551636, -0.16109582781791687, -0.15519452095031738, -0.1492932140827179, -0.1433919072151184, -0.13749060034751892, -0.13158929347991943, -0.12568798661231995, -0.11978667974472046, -0.11388537287712097, -0.10798406600952148, -0.1020827665925026, -0.0961814597249031, -0.09028015285730362, -0.08437884598970413, -0.07847753912210464, -0.07257623225450516, -0.06667493283748627, -0.06077362224459648, -0.054872315376996994, -0.04897100850939751, -0.04306970164179802, -0.03716839849948883, -0.03126709163188934, -0.025365782901644707, -0.01946447789669037, -0.013563171029090881, -0.007661864161491394, -0.001760557759553194, 0.004140748642385006, 0.010042054578661919, 0.015943361446261406, 0.021844668313860893, 0.02774597331881523, 0.03364728018641472, 0.039548587054014206, 0.04544989392161369, 0.05135120078921318, 0.05725250393152237, 0.06315381079912186, 0.06905511766672134, 0.07495642453432083, 0.08085773140192032, 0.0867590382695198, 0.0926603451371193, 0.09856165200471878, 0.10446295887231827, 0.11036426573991776, 0.11626557260751724, 0.12216687202453613, 0.12806817889213562, 0.1339694857597351, 0.1398707926273346, 0.14577209949493408, 0.15167340636253357, 0.15757471323013306, 0.16347602009773254, 0.16937732696533203, 0.17527863383293152, 0.181179940700531]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 5.0, 3.0, 17.0, 17.0, 21.0, 27.0, 40.0, 48.0, 52.0, 59.0, 75.0, 82.0, 71.0, 63.0, 75.0, 74.0, 60.0, 47.0, 49.0, 31.0, 25.0, 13.0, 13.0, 8.0, 12.0, 5.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.08066821098327637, -0.07782430946826935, -0.07498040795326233, -0.07213649898767471, -0.0692925974726677, -0.06644869595766068, -0.06360478699207306, -0.06076088547706604, -0.05791698396205902, -0.055073082447052, -0.052229177206754684, -0.04938527196645737, -0.04654137045145035, -0.04369746893644333, -0.04085356369614601, -0.038009658455848694, -0.035165756940841675, -0.032321855425834656, -0.029477950185537338, -0.02663404680788517, -0.023790143430233, -0.020946240052580833, -0.018102336674928665, -0.015258433297276497, -0.012414529919624329, -0.00957062654197216, -0.006726723164319992, -0.003882819786667824, -0.0010389164090156555, 0.0018049869686365128, 0.004648890346288681, 0.007492793723940849, 0.010336697101593018, 0.013180600479245186, 0.016024503856897354, 0.018868407234549522, 0.02171231061220169, 0.02455621398985386, 0.027400117367506027, 0.030244020745158195, 0.033087924122810364, 0.03593182563781738, 0.0387757308781147, 0.04161963611841202, 0.04446353763341904, 0.047307439148426056, 0.05015134438872337, 0.05299524962902069, 0.05583915114402771, 0.05868305265903473, 0.061526957899332047, 0.06437086313962936, 0.06721476465463638, 0.0700586661696434, 0.07290257513523102, 0.07574647665023804, 0.07859037816524506, 0.08143427968025208, 0.0842781811952591, 0.08712209016084671, 0.08996599167585373, 0.09280989319086075, 0.09565380215644836, 0.09849770367145538, 0.1013416051864624]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 3.0, 4.0, 9.0, 11.0, 10.0, 13.0, 30.0, 33.0, 45.0, 62.0, 83.0, 120.0, 184.0, 246.0, 418.0, 648.0, 1146.0, 2299.0, 5583.0, 21348.0, 153992.0, 719150.0, 115515.0, 17730.0, 4928.0, 2092.0, 1038.0, 613.0, 395.0, 236.0, 156.0, 126.0, 88.0, 52.0, 45.0, 35.0, 15.0, 15.0, 12.0, 5.0, 9.0, 6.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.076904296875, -0.07445812225341797, -0.07201194763183594, -0.0695657730102539, -0.06711959838867188, -0.06467342376708984, -0.06222724914550781, -0.05978107452392578, -0.05733489990234375, -0.05488872528076172, -0.05244255065917969, -0.049996376037597656, -0.047550201416015625, -0.045104026794433594, -0.04265785217285156, -0.04021167755126953, -0.0377655029296875, -0.03531932830810547, -0.03287315368652344, -0.030426979064941406, -0.027980804443359375, -0.025534629821777344, -0.023088455200195312, -0.02064228057861328, -0.01819610595703125, -0.01574993133544922, -0.013303756713867188, -0.010857582092285156, -0.008411407470703125, -0.005965232849121094, -0.0035190582275390625, -0.0010728836059570312, 0.001373291015625, 0.0038194656372070312, 0.0062656402587890625, 0.008711814880371094, 0.011157989501953125, 0.013604164123535156, 0.016050338745117188, 0.01849651336669922, 0.02094268798828125, 0.02338886260986328, 0.025835037231445312, 0.028281211853027344, 0.030727386474609375, 0.033173561096191406, 0.03561973571777344, 0.03806591033935547, 0.0405120849609375, 0.04295825958251953, 0.04540443420410156, 0.047850608825683594, 0.050296783447265625, 0.052742958068847656, 0.05518913269042969, 0.05763530731201172, 0.06008148193359375, 0.06252765655517578, 0.06497383117675781, 0.06742000579833984, 0.06986618041992188, 0.0723123550415039, 0.07475852966308594, 0.07720470428466797, 0.07965087890625]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 7.0, 15.0, 26.0, 51.0, 85.0, 123.0, 143.0, 146.0, 116.0, 101.0, 90.0, 35.0, 25.0, 15.0, 7.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.024261474609375, -0.023267269134521484, -0.02227306365966797, -0.021278858184814453, -0.020284652709960938, -0.019290447235107422, -0.018296241760253906, -0.01730203628540039, -0.016307830810546875, -0.01531362533569336, -0.014319419860839844, -0.013325214385986328, -0.012331008911132812, -0.011336803436279297, -0.010342597961425781, -0.009348392486572266, -0.00835418701171875, -0.007359981536865234, -0.006365776062011719, -0.005371570587158203, -0.0043773651123046875, -0.003383159637451172, -0.0023889541625976562, -0.0013947486877441406, -0.000400543212890625, 0.0005936622619628906, 0.0015878677368164062, 0.002582073211669922, 0.0035762786865234375, 0.004570484161376953, 0.005564689636230469, 0.006558895111083984, 0.0075531005859375, 0.008547306060791016, 0.009541511535644531, 0.010535717010498047, 0.011529922485351562, 0.012524127960205078, 0.013518333435058594, 0.01451253890991211, 0.015506744384765625, 0.01650094985961914, 0.017495155334472656, 0.018489360809326172, 0.019483566284179688, 0.020477771759033203, 0.02147197723388672, 0.022466182708740234, 0.02346038818359375, 0.024454593658447266, 0.02544879913330078, 0.026443004608154297, 0.027437210083007812, 0.028431415557861328, 0.029425621032714844, 0.03041982650756836, 0.031414031982421875, 0.03240823745727539, 0.033402442932128906, 0.03439664840698242, 0.03539085388183594, 0.03638505935668945, 0.03737926483154297, 0.038373470306396484, 0.03936767578125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 4.0, 2.0, 1.0, 4.0, 3.0, 11.0, 8.0, 12.0, 8.0, 19.0, 18.0, 23.0, 19.0, 42.0, 49.0, 63.0, 94.0, 135.0, 242.0, 480.0, 933.0, 2615.0, 7934.0, 30591.0, 179379.0, 652653.0, 137295.0, 25083.0, 6657.0, 2150.0, 867.0, 419.0, 206.0, 146.0, 99.0, 78.0, 43.0, 39.0, 22.0, 30.0, 22.0, 8.0, 16.0, 9.0, 12.0, 7.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.047821044921875, -0.04619646072387695, -0.044571876525878906, -0.04294729232788086, -0.04132270812988281, -0.039698123931884766, -0.03807353973388672, -0.03644895553588867, -0.034824371337890625, -0.03319978713989258, -0.03157520294189453, -0.029950618743896484, -0.028326034545898438, -0.02670145034790039, -0.025076866149902344, -0.023452281951904297, -0.02182769775390625, -0.020203113555908203, -0.018578529357910156, -0.01695394515991211, -0.015329360961914062, -0.013704776763916016, -0.012080192565917969, -0.010455608367919922, -0.008831024169921875, -0.007206439971923828, -0.005581855773925781, -0.003957271575927734, -0.0023326873779296875, -0.0007081031799316406, 0.0009164810180664062, 0.002541065216064453, 0.0041656494140625, 0.005790233612060547, 0.007414817810058594, 0.00903940200805664, 0.010663986206054688, 0.012288570404052734, 0.013913154602050781, 0.015537738800048828, 0.017162322998046875, 0.018786907196044922, 0.02041149139404297, 0.022036075592041016, 0.023660659790039062, 0.02528524398803711, 0.026909828186035156, 0.028534412384033203, 0.03015899658203125, 0.0317835807800293, 0.033408164978027344, 0.03503274917602539, 0.03665733337402344, 0.038281917572021484, 0.03990650177001953, 0.04153108596801758, 0.043155670166015625, 0.04478025436401367, 0.04640483856201172, 0.048029422760009766, 0.04965400695800781, 0.05127859115600586, 0.052903175354003906, 0.05452775955200195, 0.05615234375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 5.0, 8.0, 9.0, 5.0, 8.0, 12.0, 13.0, 13.0, 19.0, 28.0, 16.0, 22.0, 29.0, 26.0, 39.0, 45.0, 48.0, 45.0, 47.0, 40.0, 41.0, 47.0, 50.0, 39.0, 38.0, 45.0, 32.0, 33.0, 31.0, 31.0, 20.0, 26.0, 12.0, 16.0, 16.0, 8.0, 9.0, 10.0, 4.0, 9.0, 6.0, 4.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.04998779296875, -0.048511505126953125, -0.04703521728515625, -0.045558929443359375, -0.0440826416015625, -0.042606353759765625, -0.04113006591796875, -0.039653778076171875, -0.038177490234375, -0.036701202392578125, -0.03522491455078125, -0.033748626708984375, -0.0322723388671875, -0.030796051025390625, -0.02931976318359375, -0.027843475341796875, -0.0263671875, -0.024890899658203125, -0.02341461181640625, -0.021938323974609375, -0.0204620361328125, -0.018985748291015625, -0.01750946044921875, -0.016033172607421875, -0.014556884765625, -0.013080596923828125, -0.01160430908203125, -0.010128021240234375, -0.0086517333984375, -0.007175445556640625, -0.00569915771484375, -0.004222869873046875, -0.00274658203125, -0.001270294189453125, 0.00020599365234375, 0.001682281494140625, 0.0031585693359375, 0.004634857177734375, 0.00611114501953125, 0.007587432861328125, 0.009063720703125, 0.010540008544921875, 0.01201629638671875, 0.013492584228515625, 0.0149688720703125, 0.016445159912109375, 0.01792144775390625, 0.019397735595703125, 0.0208740234375, 0.022350311279296875, 0.02382659912109375, 0.025302886962890625, 0.0267791748046875, 0.028255462646484375, 0.02973175048828125, 0.031208038330078125, 0.032684326171875, 0.034160614013671875, 0.03563690185546875, 0.037113189697265625, 0.0385894775390625, 0.040065765380859375, 0.04154205322265625, 0.043018341064453125, 0.04449462890625]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 4.0, 5.0, 5.0, 4.0, 7.0, 9.0, 19.0, 32.0, 69.0, 85.0, 203.0, 611.0, 2599.0, 33837.0, 947404.0, 59086.0, 3297.0, 778.0, 255.0, 93.0, 47.0, 33.0, 22.0, 15.0, 12.0, 6.0, 10.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0222625732421875, -0.021509647369384766, -0.02075672149658203, -0.020003795623779297, -0.019250869750976562, -0.018497943878173828, -0.017745018005371094, -0.01699209213256836, -0.016239166259765625, -0.01548624038696289, -0.014733314514160156, -0.013980388641357422, -0.013227462768554688, -0.012474536895751953, -0.011721611022949219, -0.010968685150146484, -0.01021575927734375, -0.009462833404541016, -0.008709907531738281, -0.007956981658935547, -0.0072040557861328125, -0.006451129913330078, -0.005698204040527344, -0.004945278167724609, -0.004192352294921875, -0.0034394264221191406, -0.0026865005493164062, -0.0019335746765136719, -0.0011806488037109375, -0.0004277229309082031, 0.00032520294189453125, 0.0010781288146972656, 0.0018310546875, 0.0025839805603027344, 0.0033369064331054688, 0.004089832305908203, 0.0048427581787109375, 0.005595684051513672, 0.006348609924316406, 0.007101535797119141, 0.007854461669921875, 0.00860738754272461, 0.009360313415527344, 0.010113239288330078, 0.010866165161132812, 0.011619091033935547, 0.012372016906738281, 0.013124942779541016, 0.01387786865234375, 0.014630794525146484, 0.015383720397949219, 0.016136646270751953, 0.016889572143554688, 0.017642498016357422, 0.018395423889160156, 0.01914834976196289, 0.019901275634765625, 0.02065420150756836, 0.021407127380371094, 0.022160053253173828, 0.022912979125976562, 0.023665904998779297, 0.02441883087158203, 0.025171756744384766, 0.0259246826171875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 0.0, 5.0, 2.0, 7.0, 1.0, 7.0, 3.0, 13.0, 9.0, 18.0, 13.0, 11.0, 25.0, 31.0, 35.0, 46.0, 57.0, 40.0, 68.0, 51.0, 85.0, 53.0, 45.0, 58.0, 39.0, 56.0, 30.0, 46.0, 30.0, 19.0, 5.0, 15.0, 15.0, 10.0, 7.0, 13.0, 7.0, 0.0, 6.0, 6.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 3.0], "bins": [-4.5299530029296875e-06, -4.383735358715057e-06, -4.237517714500427e-06, -4.091300070285797e-06, -3.945082426071167e-06, -3.798864781856537e-06, -3.6526471376419067e-06, -3.5064294934272766e-06, -3.3602118492126465e-06, -3.2139942049980164e-06, -3.0677765607833862e-06, -2.921558916568756e-06, -2.775341272354126e-06, -2.629123628139496e-06, -2.4829059839248657e-06, -2.3366883397102356e-06, -2.1904706954956055e-06, -2.0442530512809753e-06, -1.8980354070663452e-06, -1.751817762851715e-06, -1.605600118637085e-06, -1.4593824744224548e-06, -1.3131648302078247e-06, -1.1669471859931946e-06, -1.0207295417785645e-06, -8.745118975639343e-07, -7.282942533493042e-07, -5.820766091346741e-07, -4.3585896492004395e-07, -2.896413207054138e-07, -1.434236764907837e-07, 2.7939677238464355e-09, 1.4901161193847656e-07, 2.952292561531067e-07, 4.414469003677368e-07, 5.876645445823669e-07, 7.338821887969971e-07, 8.800998330116272e-07, 1.0263174772262573e-06, 1.1725351214408875e-06, 1.3187527656555176e-06, 1.4649704098701477e-06, 1.6111880540847778e-06, 1.757405698299408e-06, 1.903623342514038e-06, 2.0498409867286682e-06, 2.1960586309432983e-06, 2.3422762751579285e-06, 2.4884939193725586e-06, 2.6347115635871887e-06, 2.780929207801819e-06, 2.927146852016449e-06, 3.073364496231079e-06, 3.2195821404457092e-06, 3.3657997846603394e-06, 3.5120174288749695e-06, 3.6582350730895996e-06, 3.8044527173042297e-06, 3.95067036151886e-06, 4.09688800573349e-06, 4.24310564994812e-06, 4.38932329416275e-06, 4.53554093837738e-06, 4.6817585825920105e-06, 4.827976226806641e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 1.0, 1.0, 2.0, 9.0, 7.0, 18.0, 8.0, 20.0, 37.0, 47.0, 79.0, 93.0, 152.0, 312.0, 670.0, 1589.0, 5383.0, 27433.0, 352452.0, 602977.0, 45887.0, 7645.0, 2081.0, 845.0, 323.0, 154.0, 114.0, 57.0, 36.0, 33.0, 18.0, 24.0, 11.0, 12.0, 8.0, 4.0, 5.0, 3.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.01459503173828125, -0.014126062393188477, -0.013657093048095703, -0.01318812370300293, -0.012719154357910156, -0.012250185012817383, -0.01178121566772461, -0.011312246322631836, -0.010843276977539062, -0.010374307632446289, -0.009905338287353516, -0.009436368942260742, -0.008967399597167969, -0.008498430252075195, -0.008029460906982422, -0.0075604915618896484, -0.007091522216796875, -0.0066225528717041016, -0.006153583526611328, -0.005684614181518555, -0.005215644836425781, -0.004746675491333008, -0.004277706146240234, -0.003808736801147461, -0.0033397674560546875, -0.002870798110961914, -0.0024018287658691406, -0.0019328594207763672, -0.0014638900756835938, -0.0009949207305908203, -0.0005259513854980469, -5.698204040527344e-05, 0.0004119873046875, 0.0008809566497802734, 0.0013499259948730469, 0.0018188953399658203, 0.0022878646850585938, 0.002756834030151367, 0.0032258033752441406, 0.003694772720336914, 0.0041637420654296875, 0.004632711410522461, 0.005101680755615234, 0.005570650100708008, 0.006039619445800781, 0.006508588790893555, 0.006977558135986328, 0.0074465274810791016, 0.007915496826171875, 0.008384466171264648, 0.008853435516357422, 0.009322404861450195, 0.009791374206542969, 0.010260343551635742, 0.010729312896728516, 0.011198282241821289, 0.011667251586914062, 0.012136220932006836, 0.01260519027709961, 0.013074159622192383, 0.013543128967285156, 0.01401209831237793, 0.014481067657470703, 0.014950037002563477, 0.01541900634765625]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 5.0, 0.0, 5.0, 3.0, 10.0, 7.0, 9.0, 8.0, 15.0, 35.0, 31.0, 38.0, 50.0, 70.0, 79.0, 97.0, 96.0, 86.0, 74.0, 79.0, 37.0, 31.0, 27.0, 24.0, 17.0, 18.0, 12.0, 5.0, 13.0, 5.0, 10.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01372528076171875, -0.013313651084899902, -0.012902021408081055, -0.012490391731262207, -0.01207876205444336, -0.011667132377624512, -0.011255502700805664, -0.010843873023986816, -0.010432243347167969, -0.010020613670349121, -0.009608983993530273, -0.009197354316711426, -0.008785724639892578, -0.00837409496307373, -0.007962465286254883, -0.007550835609436035, -0.0071392059326171875, -0.00672757625579834, -0.006315946578979492, -0.0059043169021606445, -0.005492687225341797, -0.005081057548522949, -0.0046694278717041016, -0.004257798194885254, -0.0038461685180664062, -0.0034345388412475586, -0.003022909164428711, -0.0026112794876098633, -0.0021996498107910156, -0.001788020133972168, -0.0013763904571533203, -0.0009647607803344727, -0.000553131103515625, -0.00014150142669677734, 0.0002701282501220703, 0.000681757926940918, 0.0010933876037597656, 0.0015050172805786133, 0.001916646957397461, 0.0023282766342163086, 0.0027399063110351562, 0.003151535987854004, 0.0035631656646728516, 0.003974795341491699, 0.004386425018310547, 0.0047980546951293945, 0.005209684371948242, 0.00562131404876709, 0.0060329437255859375, 0.006444573402404785, 0.006856203079223633, 0.0072678327560424805, 0.007679462432861328, 0.008091092109680176, 0.008502721786499023, 0.008914351463317871, 0.009325981140136719, 0.009737610816955566, 0.010149240493774414, 0.010560870170593262, 0.01097249984741211, 0.011384129524230957, 0.011795759201049805, 0.012207388877868652, 0.0126190185546875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 4.0, 3.0, 6.0, 8.0, 8.0, 9.0, 16.0, 13.0, 18.0, 29.0, 38.0, 46.0, 66.0, 127.0, 126.0, 119.0, 100.0, 88.0, 49.0, 32.0, 31.0, 14.0, 13.0, 9.0, 3.0, 5.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18577265739440918, -0.1798379272222519, -0.1739032119512558, -0.1679684817790985, -0.16203375160694122, -0.15609902143478394, -0.15016430616378784, -0.14422957599163055, -0.13829484581947327, -0.13236011564731598, -0.12642540037631989, -0.1204906702041626, -0.11455594003200531, -0.10862121731042862, -0.10268649458885193, -0.09675176441669464, -0.09081704914569855, -0.08488232642412186, -0.07894759625196457, -0.07301287353038788, -0.06707814335823059, -0.0611434206366539, -0.05520869791507721, -0.04927397146821022, -0.04333924502134323, -0.03740451857447624, -0.03146979212760925, -0.025535069406032562, -0.019600342959165573, -0.013665616512298584, -0.007730893790721893, -0.0017961673438549042, 0.004138574004173279, 0.010073299519717693, 0.016008025035262108, 0.021942749619483948, 0.027877476066350937, 0.033812202513217926, 0.03974692523479462, 0.045681651681661606, 0.051616378128528595, 0.057551104575395584, 0.06348583102226257, 0.06942055374383926, 0.07535527646541595, 0.08129000663757324, 0.08722472935914993, 0.09315945208072662, 0.09909418225288391, 0.1050289049744606, 0.11096363514661789, 0.11689835786819458, 0.12283308804035187, 0.12876781821250916, 0.13470253348350525, 0.14063726365566254, 0.14657199382781982, 0.1525067239999771, 0.1584414392709732, 0.1643761694431305, 0.17031089961528778, 0.17624562978744507, 0.18218034505844116, 0.18811507523059845, 0.19404979050159454]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 5.0, 6.0, 9.0, 12.0, 8.0, 15.0, 14.0, 14.0, 18.0, 26.0, 18.0, 34.0, 34.0, 33.0, 40.0, 40.0, 39.0, 36.0, 43.0, 71.0, 37.0, 46.0, 48.0, 46.0, 43.0, 30.0, 37.0, 31.0, 23.0, 19.0, 23.0, 29.0, 9.0, 17.0, 11.0, 13.0, 7.0, 3.0, 4.0, 7.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.22710150480270386, -0.21943968534469604, -0.21177785098552704, -0.20411601662635803, -0.19645419716835022, -0.1887923777103424, -0.1811305433511734, -0.1734687089920044, -0.16580688953399658, -0.15814507007598877, -0.15048323571681976, -0.14282140135765076, -0.13515958189964294, -0.12749776244163513, -0.11983592808246613, -0.11217410117387772, -0.1045122742652893, -0.0968504473567009, -0.08918862044811249, -0.08152679353952408, -0.07386496663093567, -0.06620313972234726, -0.05854131281375885, -0.05087948590517044, -0.04321765899658203, -0.03555583208799362, -0.027894005179405212, -0.020232178270816803, -0.012570351362228394, -0.004908524453639984, 0.0027533024549484253, 0.010415129363536835, 0.018076956272125244, 0.025738783180713654, 0.03340061008930206, 0.04106243699789047, 0.04872426390647888, 0.05638609081506729, 0.0640479177236557, 0.07170974463224411, 0.07937157154083252, 0.08703339844942093, 0.09469522535800934, 0.10235705226659775, 0.11001887917518616, 0.11768070608377457, 0.12534253299236298, 0.13300436735153198, 0.1406661868095398, 0.1483280062675476, 0.1559898406267166, 0.16365167498588562, 0.17131349444389343, 0.17897531390190125, 0.18663714826107025, 0.19429898262023926, 0.20196080207824707, 0.20962262153625488, 0.2172844558954239, 0.2249462902545929, 0.2326081097126007, 0.24026992917060852, 0.24793176352977753, 0.25559359788894653, 0.26325541734695435]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 8.0, 5.0, 1.0, 3.0, 9.0, 9.0, 3.0, 30.0, 36.0, 49.0, 92.0, 171.0, 406.0, 860.0, 2495.0, 16012.0, 4092601.0, 73830.0, 5058.0, 1346.0, 541.0, 262.0, 153.0, 90.0, 64.0, 43.0, 25.0, 18.0, 15.0, 10.0, 9.0, 8.0, 7.0, 6.0, 2.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0947265625, -0.0908966064453125, -0.087066650390625, -0.0832366943359375, -0.07940673828125, -0.0755767822265625, -0.071746826171875, -0.0679168701171875, -0.0640869140625, -0.0602569580078125, -0.056427001953125, -0.0525970458984375, -0.04876708984375, -0.0449371337890625, -0.041107177734375, -0.0372772216796875, -0.033447265625, -0.0296173095703125, -0.025787353515625, -0.0219573974609375, -0.01812744140625, -0.0142974853515625, -0.010467529296875, -0.0066375732421875, -0.0028076171875, 0.0010223388671875, 0.004852294921875, 0.0086822509765625, 0.01251220703125, 0.0163421630859375, 0.020172119140625, 0.0240020751953125, 0.02783203125, 0.0316619873046875, 0.035491943359375, 0.0393218994140625, 0.04315185546875, 0.0469818115234375, 0.050811767578125, 0.0546417236328125, 0.0584716796875, 0.0623016357421875, 0.066131591796875, 0.0699615478515625, 0.07379150390625, 0.0776214599609375, 0.081451416015625, 0.0852813720703125, 0.089111328125, 0.0929412841796875, 0.096771240234375, 0.1006011962890625, 0.10443115234375, 0.1082611083984375, 0.112091064453125, 0.1159210205078125, 0.1197509765625, 0.1235809326171875, 0.127410888671875, 0.1312408447265625, 0.13507080078125, 0.1389007568359375, 0.142730712890625, 0.1465606689453125, 0.150390625]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 4.0, 2.0, 5.0, 9.0, 16.0, 26.0, 67.0, 79.0, 132.0, 134.0, 147.0, 132.0, 99.0, 62.0, 34.0, 27.0, 13.0, 7.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0250701904296875, -0.02403426170349121, -0.022998332977294922, -0.021962404251098633, -0.020926475524902344, -0.019890546798706055, -0.018854618072509766, -0.017818689346313477, -0.016782760620117188, -0.0157468318939209, -0.01471090316772461, -0.01367497444152832, -0.012639045715332031, -0.011603116989135742, -0.010567188262939453, -0.009531259536743164, -0.008495330810546875, -0.007459402084350586, -0.006423473358154297, -0.005387544631958008, -0.004351615905761719, -0.0033156871795654297, -0.0022797584533691406, -0.0012438297271728516, -0.0002079010009765625, 0.0008280277252197266, 0.0018639564514160156, 0.0028998851776123047, 0.003935813903808594, 0.004971742630004883, 0.006007671356201172, 0.007043600082397461, 0.00807952880859375, 0.009115457534790039, 0.010151386260986328, 0.011187314987182617, 0.012223243713378906, 0.013259172439575195, 0.014295101165771484, 0.015331029891967773, 0.016366958618164062, 0.01740288734436035, 0.01843881607055664, 0.01947474479675293, 0.02051067352294922, 0.021546602249145508, 0.022582530975341797, 0.023618459701538086, 0.024654388427734375, 0.025690317153930664, 0.026726245880126953, 0.027762174606323242, 0.02879810333251953, 0.02983403205871582, 0.03086996078491211, 0.0319058895111084, 0.03294181823730469, 0.03397774696350098, 0.035013675689697266, 0.036049604415893555, 0.037085533142089844, 0.03812146186828613, 0.03915739059448242, 0.04019331932067871, 0.041229248046875]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 6.0, 9.0, 12.0, 22.0, 33.0, 74.0, 134.0, 205.0, 337.0, 682.0, 1573.0, 5178.0, 36496.0, 4066622.0, 71779.0, 7378.0, 1947.0, 862.0, 399.0, 231.0, 112.0, 72.0, 42.0, 26.0, 18.0, 14.0, 4.0, 4.0, 3.0, 2.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.073974609375, -0.07163429260253906, -0.06929397583007812, -0.06695365905761719, -0.06461334228515625, -0.06227302551269531, -0.059932708740234375, -0.05759239196777344, -0.0552520751953125, -0.05291175842285156, -0.050571441650390625, -0.04823112487792969, -0.04589080810546875, -0.04355049133300781, -0.041210174560546875, -0.03886985778808594, -0.036529541015625, -0.03418922424316406, -0.031848907470703125, -0.029508590698242188, -0.02716827392578125, -0.024827957153320312, -0.022487640380859375, -0.020147323608398438, -0.0178070068359375, -0.015466690063476562, -0.013126373291015625, -0.010786056518554688, -0.00844573974609375, -0.0061054229736328125, -0.003765106201171875, -0.0014247894287109375, 0.00091552734375, 0.0032558441162109375, 0.005596160888671875, 0.007936477661132812, 0.01027679443359375, 0.012617111206054688, 0.014957427978515625, 0.017297744750976562, 0.0196380615234375, 0.021978378295898438, 0.024318695068359375, 0.026659011840820312, 0.02899932861328125, 0.03133964538574219, 0.033679962158203125, 0.03602027893066406, 0.038360595703125, 0.04070091247558594, 0.043041229248046875, 0.04538154602050781, 0.04772186279296875, 0.05006217956542969, 0.052402496337890625, 0.05474281311035156, 0.0570831298828125, 0.05942344665527344, 0.061763763427734375, 0.06410408020019531, 0.06644439697265625, 0.06878471374511719, 0.07112503051757812, 0.07346534729003906, 0.0758056640625]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 5.0, 6.0, 21.0, 20.0, 42.0, 110.0, 2287.0, 1394.0, 85.0, 39.0, 19.0, 20.0, 7.0, 7.0, 7.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.034942626953125, -0.033693790435791016, -0.03244495391845703, -0.031196117401123047, -0.029947280883789062, -0.028698444366455078, -0.027449607849121094, -0.02620077133178711, -0.024951934814453125, -0.02370309829711914, -0.022454261779785156, -0.021205425262451172, -0.019956588745117188, -0.018707752227783203, -0.01745891571044922, -0.016210079193115234, -0.01496124267578125, -0.013712406158447266, -0.012463569641113281, -0.011214733123779297, -0.009965896606445312, -0.008717060089111328, -0.007468223571777344, -0.006219387054443359, -0.004970550537109375, -0.0037217140197753906, -0.0024728775024414062, -0.0012240409851074219, 2.47955322265625e-05, 0.0012736320495605469, 0.0025224685668945312, 0.0037713050842285156, 0.0050201416015625, 0.006268978118896484, 0.007517814636230469, 0.008766651153564453, 0.010015487670898438, 0.011264324188232422, 0.012513160705566406, 0.01376199722290039, 0.015010833740234375, 0.01625967025756836, 0.017508506774902344, 0.018757343292236328, 0.020006179809570312, 0.021255016326904297, 0.02250385284423828, 0.023752689361572266, 0.02500152587890625, 0.026250362396240234, 0.02749919891357422, 0.028748035430908203, 0.029996871948242188, 0.031245708465576172, 0.032494544982910156, 0.03374338150024414, 0.034992218017578125, 0.03624105453491211, 0.037489891052246094, 0.03873872756958008, 0.03998756408691406, 0.04123640060424805, 0.04248523712158203, 0.043734073638916016, 0.04498291015625]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 5.0, 4.0, 5.0, 2.0, 10.0, 8.0, 38.0, 83.0, 158.0, 254.0, 221.0, 107.0, 52.0, 19.0, 8.0, 9.0, 6.0, 3.0, 6.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.091093048453331, -0.08742063492536545, -0.0837482139468193, -0.08007580041885376, -0.07640337944030762, -0.07273096591234207, -0.06905855238437653, -0.06538613140583038, -0.06171371787786484, -0.058041300624608994, -0.05436888337135315, -0.050696469843387604, -0.04702405259013176, -0.043351635336875916, -0.03967922180891037, -0.036006804555654526, -0.03233438730239868, -0.028661970049142838, -0.024989554658532143, -0.021317139267921448, -0.017644722014665604, -0.01397230476140976, -0.010299889370799065, -0.00662747398018837, -0.0029550567269325256, 0.0007173595950007439, 0.004389775916934013, 0.008062192238867283, 0.011734608560800552, 0.015407025814056396, 0.01907944120466709, 0.022751856595277786, 0.02642427384853363, 0.030096691101789474, 0.03376910835504532, 0.037441521883010864, 0.04111393913626671, 0.04478635638952255, 0.0484587699174881, 0.05213118717074394, 0.055803604423999786, 0.05947602167725563, 0.06314843893051147, 0.06682085245847702, 0.07049326598644257, 0.07416568696498871, 0.07783810049295425, 0.0815105140209198, 0.08518293499946594, 0.08885534852743149, 0.09252776950597763, 0.09620018303394318, 0.09987260401248932, 0.10354501754045486, 0.10721743106842041, 0.11088985204696655, 0.1145622655749321, 0.11823467910289764, 0.12190710008144379, 0.12557952105998993, 0.12925192713737488, 0.13292434811592102, 0.13659676909446716, 0.1402691751718521, 0.14394159615039825]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 5.0, 3.0, 17.0, 19.0, 26.0, 26.0, 32.0, 36.0, 41.0, 70.0, 51.0, 55.0, 56.0, 54.0, 67.0, 60.0, 52.0, 62.0, 51.0, 41.0, 53.0, 28.0, 25.0, 13.0, 16.0, 9.0, 6.0, 6.0, 2.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.08973711729049683, -0.08722856640815735, -0.08472001552581787, -0.0822114571928978, -0.07970290631055832, -0.07719435542821884, -0.07468579709529877, -0.07217724621295929, -0.06966869533061981, -0.06716014444828033, -0.06465159356594086, -0.06214303523302078, -0.059634484350681305, -0.05712593346834183, -0.05461737886071205, -0.052108824253082275, -0.0496002733707428, -0.04709172248840332, -0.044583167880773544, -0.04207461327314377, -0.03956606239080429, -0.03705751150846481, -0.03454895690083504, -0.03204040229320526, -0.029531851410865784, -0.027023298665881157, -0.02451474592089653, -0.022006193175911903, -0.019497640430927277, -0.01698908768594265, -0.014480534940958023, -0.011971982195973396, -0.00946342945098877, -0.006954876706004143, -0.004446323961019516, -0.0019377712160348892, 0.0005707815289497375, 0.0030793342739343643, 0.005587887018918991, 0.008096439763903618, 0.010604992508888245, 0.013113545253872871, 0.015622097998857498, 0.018130650743842125, 0.02063920348882675, 0.02314775623381138, 0.025656308978796005, 0.028164861723780632, 0.03067341446876526, 0.033181965351104736, 0.03569051995873451, 0.03819907456636429, 0.040707625448703766, 0.04321617633104324, 0.04572473093867302, 0.048233285546302795, 0.05074183642864227, 0.05325038731098175, 0.055758941918611526, 0.0582674965262413, 0.06077604740858078, 0.06328459829092026, 0.06579315662384033, 0.06830170750617981, 0.07081025838851929]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 6.0, 4.0, 12.0, 16.0, 22.0, 47.0, 55.0, 83.0, 139.0, 284.0, 462.0, 908.0, 2006.0, 6207.0, 44628.0, 752884.0, 219257.0, 15305.0, 3386.0, 1353.0, 618.0, 354.0, 198.0, 102.0, 76.0, 46.0, 22.0, 21.0, 18.0, 10.0, 10.0, 5.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07952880859375, -0.07655715942382812, -0.07358551025390625, -0.07061386108398438, -0.0676422119140625, -0.06467056274414062, -0.06169891357421875, -0.058727264404296875, -0.055755615234375, -0.052783966064453125, -0.04981231689453125, -0.046840667724609375, -0.0438690185546875, -0.040897369384765625, -0.03792572021484375, -0.034954071044921875, -0.031982421875, -0.029010772705078125, -0.02603912353515625, -0.023067474365234375, -0.0200958251953125, -0.017124176025390625, -0.01415252685546875, -0.011180877685546875, -0.008209228515625, -0.005237579345703125, -0.00226593017578125, 0.000705718994140625, 0.0036773681640625, 0.006649017333984375, 0.00962066650390625, 0.012592315673828125, 0.01556396484375, 0.018535614013671875, 0.02150726318359375, 0.024478912353515625, 0.0274505615234375, 0.030422210693359375, 0.03339385986328125, 0.036365509033203125, 0.039337158203125, 0.042308807373046875, 0.04528045654296875, 0.048252105712890625, 0.0512237548828125, 0.054195404052734375, 0.05716705322265625, 0.060138702392578125, 0.0631103515625, 0.06608200073242188, 0.06905364990234375, 0.07202529907226562, 0.0749969482421875, 0.07796859741210938, 0.08094024658203125, 0.08391189575195312, 0.086883544921875, 0.08985519409179688, 0.09282684326171875, 0.09579849243164062, 0.0987701416015625, 0.10174179077148438, 0.10471343994140625, 0.10768508911132812, 0.11065673828125]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 1.0, 4.0, 7.0, 13.0, 27.0, 53.0, 60.0, 111.0, 143.0, 165.0, 134.0, 110.0, 74.0, 44.0, 26.0, 15.0, 5.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025909423828125, -0.024851322174072266, -0.02379322052001953, -0.022735118865966797, -0.021677017211914062, -0.020618915557861328, -0.019560813903808594, -0.01850271224975586, -0.017444610595703125, -0.01638650894165039, -0.015328407287597656, -0.014270305633544922, -0.013212203979492188, -0.012154102325439453, -0.011096000671386719, -0.010037899017333984, -0.00897979736328125, -0.007921695709228516, -0.006863594055175781, -0.005805492401123047, -0.0047473907470703125, -0.003689289093017578, -0.0026311874389648438, -0.0015730857849121094, -0.000514984130859375, 0.0005431175231933594, 0.0016012191772460938, 0.002659320831298828, 0.0037174224853515625, 0.004775524139404297, 0.005833625793457031, 0.006891727447509766, 0.0079498291015625, 0.009007930755615234, 0.010066032409667969, 0.011124134063720703, 0.012182235717773438, 0.013240337371826172, 0.014298439025878906, 0.01535654067993164, 0.016414642333984375, 0.01747274398803711, 0.018530845642089844, 0.019588947296142578, 0.020647048950195312, 0.021705150604248047, 0.02276325225830078, 0.023821353912353516, 0.02487945556640625, 0.025937557220458984, 0.02699565887451172, 0.028053760528564453, 0.029111862182617188, 0.030169963836669922, 0.031228065490722656, 0.03228616714477539, 0.033344268798828125, 0.03440237045288086, 0.035460472106933594, 0.03651857376098633, 0.03757667541503906, 0.0386347770690918, 0.03969287872314453, 0.040750980377197266, 0.04180908203125]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 9.0, 4.0, 12.0, 5.0, 13.0, 28.0, 29.0, 55.0, 88.0, 166.0, 268.0, 627.0, 1932.0, 9858.0, 120079.0, 829872.0, 75427.0, 7283.0, 1586.0, 555.0, 255.0, 140.0, 92.0, 57.0, 41.0, 26.0, 18.0, 12.0, 5.0, 7.0, 2.0, 1.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09375, -0.09131002426147461, -0.08887004852294922, -0.08643007278442383, -0.08399009704589844, -0.08155012130737305, -0.07911014556884766, -0.07667016983032227, -0.07423019409179688, -0.07179021835327148, -0.0693502426147461, -0.0669102668762207, -0.06447029113769531, -0.06203031539916992, -0.05959033966064453, -0.05715036392211914, -0.05471038818359375, -0.05227041244506836, -0.04983043670654297, -0.04739046096801758, -0.04495048522949219, -0.0425105094909668, -0.040070533752441406, -0.037630558013916016, -0.035190582275390625, -0.032750606536865234, -0.030310630798339844, -0.027870655059814453, -0.025430679321289062, -0.022990703582763672, -0.02055072784423828, -0.01811075210571289, -0.0156707763671875, -0.01323080062866211, -0.010790824890136719, -0.008350849151611328, -0.0059108734130859375, -0.003470897674560547, -0.0010309219360351562, 0.0014090538024902344, 0.003849029541015625, 0.006289005279541016, 0.008728981018066406, 0.011168956756591797, 0.013608932495117188, 0.016048908233642578, 0.01848888397216797, 0.02092885971069336, 0.02336883544921875, 0.02580881118774414, 0.02824878692626953, 0.030688762664794922, 0.03312873840332031, 0.0355687141418457, 0.038008689880371094, 0.040448665618896484, 0.042888641357421875, 0.045328617095947266, 0.047768592834472656, 0.05020856857299805, 0.05264854431152344, 0.05508852005004883, 0.05752849578857422, 0.05996847152709961, 0.062408447265625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 10.0, 8.0, 11.0, 19.0, 16.0, 21.0, 29.0, 50.0, 45.0, 32.0, 53.0, 56.0, 64.0, 67.0, 65.0, 62.0, 66.0, 59.0, 46.0, 45.0, 44.0, 28.0, 39.0, 18.0, 11.0, 13.0, 7.0, 4.0, 4.0, 5.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.048614501953125, -0.046498775482177734, -0.04438304901123047, -0.0422673225402832, -0.04015159606933594, -0.03803586959838867, -0.035920143127441406, -0.03380441665649414, -0.031688690185546875, -0.02957296371459961, -0.027457237243652344, -0.025341510772705078, -0.023225784301757812, -0.021110057830810547, -0.01899433135986328, -0.016878604888916016, -0.01476287841796875, -0.012647151947021484, -0.010531425476074219, -0.008415699005126953, -0.0062999725341796875, -0.004184246063232422, -0.0020685195922851562, 4.7206878662109375e-05, 0.002162933349609375, 0.004278659820556641, 0.006394386291503906, 0.008510112762451172, 0.010625839233398438, 0.012741565704345703, 0.014857292175292969, 0.016973018646240234, 0.0190887451171875, 0.021204471588134766, 0.02332019805908203, 0.025435924530029297, 0.027551651000976562, 0.029667377471923828, 0.031783103942871094, 0.03389883041381836, 0.036014556884765625, 0.03813028335571289, 0.040246009826660156, 0.04236173629760742, 0.04447746276855469, 0.04659318923950195, 0.04870891571044922, 0.050824642181396484, 0.05294036865234375, 0.055056095123291016, 0.05717182159423828, 0.05928754806518555, 0.06140327453613281, 0.06351900100708008, 0.06563472747802734, 0.06775045394897461, 0.06986618041992188, 0.07198190689086914, 0.0740976333618164, 0.07621335983276367, 0.07832908630371094, 0.0804448127746582, 0.08256053924560547, 0.08467626571655273, 0.0867919921875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 6.0, 9.0, 17.0, 22.0, 33.0, 61.0, 114.0, 347.0, 1138.0, 6508.0, 208656.0, 809181.0, 19487.0, 2122.0, 521.0, 166.0, 80.0, 28.0, 15.0, 19.0, 5.0, 4.0, 7.0, 6.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0203704833984375, -0.019717693328857422, -0.019064903259277344, -0.018412113189697266, -0.017759323120117188, -0.01710653305053711, -0.01645374298095703, -0.015800952911376953, -0.015148162841796875, -0.014495372772216797, -0.013842582702636719, -0.01318979263305664, -0.012537002563476562, -0.011884212493896484, -0.011231422424316406, -0.010578632354736328, -0.00992584228515625, -0.009273052215576172, -0.008620262145996094, -0.007967472076416016, -0.0073146820068359375, -0.006661891937255859, -0.006009101867675781, -0.005356311798095703, -0.004703521728515625, -0.004050731658935547, -0.0033979415893554688, -0.0027451515197753906, -0.0020923614501953125, -0.0014395713806152344, -0.0007867813110351562, -0.00013399124145507812, 0.000518798828125, 0.0011715888977050781, 0.0018243789672851562, 0.0024771690368652344, 0.0031299591064453125, 0.0037827491760253906, 0.004435539245605469, 0.005088329315185547, 0.005741119384765625, 0.006393909454345703, 0.007046699523925781, 0.007699489593505859, 0.008352279663085938, 0.009005069732666016, 0.009657859802246094, 0.010310649871826172, 0.01096343994140625, 0.011616230010986328, 0.012269020080566406, 0.012921810150146484, 0.013574600219726562, 0.01422739028930664, 0.014880180358886719, 0.015532970428466797, 0.016185760498046875, 0.016838550567626953, 0.01749134063720703, 0.01814413070678711, 0.018796920776367188, 0.019449710845947266, 0.020102500915527344, 0.020755290985107422, 0.0214080810546875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 6.0, 13.0, 13.0, 31.0, 44.0, 70.0, 84.0, 117.0, 96.0, 122.0, 100.0, 95.0, 83.0, 44.0, 27.0, 26.0, 14.0, 9.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1861324310302734e-05, -1.1573545634746552e-05, -1.1285766959190369e-05, -1.0997988283634186e-05, -1.0710209608078003e-05, -1.042243093252182e-05, -1.0134652256965637e-05, -9.846873581409454e-06, -9.559094905853271e-06, -9.271316230297089e-06, -8.983537554740906e-06, -8.695758879184723e-06, -8.40798020362854e-06, -8.120201528072357e-06, -7.832422852516174e-06, -7.5446441769599915e-06, -7.256865501403809e-06, -6.969086825847626e-06, -6.681308150291443e-06, -6.39352947473526e-06, -6.105750799179077e-06, -5.817972123622894e-06, -5.5301934480667114e-06, -5.2424147725105286e-06, -4.954636096954346e-06, -4.666857421398163e-06, -4.37907874584198e-06, -4.091300070285797e-06, -3.8035213947296143e-06, -3.5157427191734314e-06, -3.2279640436172485e-06, -2.9401853680610657e-06, -2.652406692504883e-06, -2.3646280169487e-06, -2.076849341392517e-06, -1.7890706658363342e-06, -1.5012919902801514e-06, -1.2135133147239685e-06, -9.257346391677856e-07, -6.379559636116028e-07, -3.501772880554199e-07, -6.239861249923706e-08, 2.253800630569458e-07, 5.131587386131287e-07, 8.009374141693115e-07, 1.0887160897254944e-06, 1.3764947652816772e-06, 1.6642734408378601e-06, 1.952052116394043e-06, 2.239830791950226e-06, 2.5276094675064087e-06, 2.8153881430625916e-06, 3.1031668186187744e-06, 3.3909454941749573e-06, 3.67872416973114e-06, 3.966502845287323e-06, 4.254281520843506e-06, 4.542060196399689e-06, 4.829838871955872e-06, 5.1176175475120544e-06, 5.405396223068237e-06, 5.69317489862442e-06, 5.980953574180603e-06, 6.268732249736786e-06, 6.556510925292969e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 11.0, 5.0, 9.0, 13.0, 15.0, 21.0, 27.0, 58.0, 61.0, 117.0, 191.0, 409.0, 913.0, 3096.0, 15695.0, 172778.0, 725485.0, 113576.0, 11766.0, 2566.0, 862.0, 359.0, 196.0, 107.0, 69.0, 45.0, 31.0, 18.0, 16.0, 13.0, 7.0, 8.0, 5.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.013031005859375, -0.012691855430603027, -0.012352705001831055, -0.012013554573059082, -0.01167440414428711, -0.011335253715515137, -0.010996103286743164, -0.010656952857971191, -0.010317802429199219, -0.009978652000427246, -0.009639501571655273, -0.0093003511428833, -0.008961200714111328, -0.008622050285339355, -0.008282899856567383, -0.00794374942779541, -0.0076045989990234375, -0.007265448570251465, -0.006926298141479492, -0.0065871477127075195, -0.006247997283935547, -0.005908846855163574, -0.0055696964263916016, -0.005230545997619629, -0.004891395568847656, -0.004552245140075684, -0.004213094711303711, -0.0038739442825317383, -0.0035347938537597656, -0.003195643424987793, -0.0028564929962158203, -0.0025173425674438477, -0.002178192138671875, -0.0018390417098999023, -0.0014998912811279297, -0.001160740852355957, -0.0008215904235839844, -0.0004824399948120117, -0.00014328956604003906, 0.0001958608627319336, 0.0005350112915039062, 0.0008741617202758789, 0.0012133121490478516, 0.0015524625778198242, 0.0018916130065917969, 0.0022307634353637695, 0.002569913864135742, 0.002909064292907715, 0.0032482147216796875, 0.00358736515045166, 0.003926515579223633, 0.0042656660079956055, 0.004604816436767578, 0.004943966865539551, 0.0052831172943115234, 0.005622267723083496, 0.005961418151855469, 0.006300568580627441, 0.006639719009399414, 0.006978869438171387, 0.007318019866943359, 0.007657170295715332, 0.007996320724487305, 0.008335471153259277, 0.00867462158203125]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 5.0, 2.0, 0.0, 2.0, 6.0, 4.0, 7.0, 5.0, 22.0, 14.0, 25.0, 38.0, 49.0, 60.0, 87.0, 82.0, 89.0, 99.0, 95.0, 75.0, 61.0, 43.0, 31.0, 28.0, 21.0, 20.0, 3.0, 15.0, 7.0, 4.0, 7.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0063629150390625, -0.006029486656188965, -0.00569605827331543, -0.0053626298904418945, -0.005029201507568359, -0.004695773124694824, -0.004362344741821289, -0.004028916358947754, -0.0036954879760742188, -0.0033620595932006836, -0.0030286312103271484, -0.0026952028274536133, -0.002361774444580078, -0.002028346061706543, -0.0016949176788330078, -0.0013614892959594727, -0.0010280609130859375, -0.0006946325302124023, -0.0003612041473388672, -2.777576446533203e-05, 0.0003056526184082031, 0.0006390810012817383, 0.0009725093841552734, 0.0013059377670288086, 0.0016393661499023438, 0.001972794532775879, 0.002306222915649414, 0.0026396512985229492, 0.0029730796813964844, 0.0033065080642700195, 0.0036399364471435547, 0.00397336483001709, 0.004306793212890625, 0.00464022159576416, 0.004973649978637695, 0.0053070783615112305, 0.005640506744384766, 0.005973935127258301, 0.006307363510131836, 0.006640791893005371, 0.006974220275878906, 0.007307648658752441, 0.0076410770416259766, 0.007974505424499512, 0.008307933807373047, 0.008641362190246582, 0.008974790573120117, 0.009308218955993652, 0.009641647338867188, 0.009975075721740723, 0.010308504104614258, 0.010641932487487793, 0.010975360870361328, 0.011308789253234863, 0.011642217636108398, 0.011975646018981934, 0.012309074401855469, 0.012642502784729004, 0.012975931167602539, 0.013309359550476074, 0.01364278793334961, 0.013976216316223145, 0.01430964469909668, 0.014643073081970215, 0.01497650146484375]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 9.0, 6.0, 9.0, 30.0, 105.0, 251.0, 355.0, 163.0, 44.0, 16.0, 8.0, 2.0, 5.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5309433937072754, -0.5127829313278198, -0.49462246894836426, -0.4764620065689087, -0.45830151438713074, -0.44014105200767517, -0.4219805896282196, -0.40382009744644165, -0.3856596350669861, -0.3674991726875305, -0.34933871030807495, -0.3311782479286194, -0.31301775574684143, -0.29485729336738586, -0.2766968309879303, -0.25853633880615234, -0.24037590622901917, -0.2222154438495636, -0.20405496656894684, -0.18589450418949127, -0.1677340269088745, -0.14957356452941895, -0.13141310214996338, -0.11325262486934662, -0.09509216248989105, -0.07693169265985489, -0.058771226555109024, -0.04061076045036316, -0.022450290620326996, -0.0042898207902908325, 0.013870641589164734, 0.032031118869781494, 0.05019158124923706, 0.06835205107927322, 0.08651252090930939, 0.10467298328876495, 0.12283345311880112, 0.14099392294883728, 0.15915438532829285, 0.1773148626089096, 0.19547532498836517, 0.21363578736782074, 0.2317962646484375, 0.24995672702789307, 0.26811718940734863, 0.2862776517868042, 0.30443811416625977, 0.3225986063480377, 0.3407590687274933, 0.35891953110694885, 0.3770799934864044, 0.3952404856681824, 0.41340094804763794, 0.4315614104270935, 0.4497218728065491, 0.46788233518600464, 0.4860427975654602, 0.5042032599449158, 0.5223637223243713, 0.5405241847038269, 0.5586846470832825, 0.5768451690673828, 0.5950056314468384, 0.613166093826294, 0.6313265562057495]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 6.0, 6.0, 1.0, 8.0, 5.0, 7.0, 15.0, 16.0, 23.0, 19.0, 21.0, 26.0, 38.0, 27.0, 34.0, 38.0, 25.0, 51.0, 49.0, 58.0, 43.0, 57.0, 53.0, 60.0, 43.0, 44.0, 38.0, 27.0, 33.0, 31.0, 23.0, 10.0, 17.0, 19.0, 6.0, 8.0, 5.0, 4.0, 8.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.1985747218132019, -0.19049561023712158, -0.18241648375988007, -0.17433735728263855, -0.16625824570655823, -0.1581791341304779, -0.1501000076532364, -0.14202088117599487, -0.13394176959991455, -0.12586265802383423, -0.11778353154659271, -0.1097044125199318, -0.10162529349327087, -0.09354617446660995, -0.08546705543994904, -0.07738793641328812, -0.0693088173866272, -0.06122969835996628, -0.05315057933330536, -0.04507146030664444, -0.03699234127998352, -0.0289132222533226, -0.020834103226661682, -0.012754984200000763, -0.004675865173339844, 0.0034032538533210754, 0.011482372879981995, 0.019561491906642914, 0.027640610933303833, 0.03571972995996475, 0.04379884898662567, 0.05187796801328659, 0.05995708703994751, 0.06803620606660843, 0.07611532509326935, 0.08419444411993027, 0.09227356314659119, 0.1003526821732521, 0.10843180119991302, 0.11651092022657394, 0.12459003925323486, 0.13266915082931519, 0.1407482773065567, 0.14882740378379822, 0.15690651535987854, 0.16498562693595886, 0.17306475341320038, 0.1811438798904419, 0.18922299146652222, 0.19730210304260254, 0.20538122951984406, 0.21346035599708557, 0.2215394675731659, 0.22961857914924622, 0.23769770562648773, 0.24577683210372925, 0.25385594367980957, 0.2619350552558899, 0.2700141668319702, 0.2780933082103729, 0.28617241978645325, 0.29425153136253357, 0.3023306727409363, 0.3104097843170166, 0.3184888958930969]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 4.0, 6.0, 17.0, 23.0, 109.0, 253.0, 1102.0, 106287.0, 4084789.0, 1241.0, 252.0, 90.0, 53.0, 24.0, 14.0, 6.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0821533203125, -0.07783699035644531, -0.07352066040039062, -0.06920433044433594, -0.06488800048828125, -0.06057167053222656, -0.056255340576171875, -0.05193901062011719, -0.0476226806640625, -0.04330635070800781, -0.038990020751953125, -0.03467369079589844, -0.03035736083984375, -0.026041030883789062, -0.021724700927734375, -0.017408370971679688, -0.013092041015625, -0.008775711059570312, -0.004459381103515625, -0.0001430511474609375, 0.00417327880859375, 0.008489608764648438, 0.012805938720703125, 0.017122268676757812, 0.0214385986328125, 0.025754928588867188, 0.030071258544921875, 0.03438758850097656, 0.03870391845703125, 0.04302024841308594, 0.047336578369140625, 0.05165290832519531, 0.05596923828125, 0.06028556823730469, 0.06460189819335938, 0.06891822814941406, 0.07323455810546875, 0.07755088806152344, 0.08186721801757812, 0.08618354797363281, 0.0904998779296875, 0.09481620788574219, 0.09913253784179688, 0.10344886779785156, 0.10776519775390625, 0.11208152770996094, 0.11639785766601562, 0.12071418762207031, 0.125030517578125, 0.1293468475341797, 0.13366317749023438, 0.13797950744628906, 0.14229583740234375, 0.14661216735839844, 0.15092849731445312, 0.1552448272705078, 0.1595611572265625, 0.1638774871826172, 0.16819381713867188, 0.17251014709472656, 0.17682647705078125, 0.18114280700683594, 0.18545913696289062, 0.1897754669189453, 0.194091796875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 3.0, 6.0, 9.0, 24.0, 38.0, 77.0, 118.0, 150.0, 149.0, 144.0, 100.0, 83.0, 41.0, 32.0, 7.0, 7.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0266876220703125, -0.025552988052368164, -0.024418354034423828, -0.023283720016479492, -0.022149085998535156, -0.02101445198059082, -0.019879817962646484, -0.01874518394470215, -0.017610549926757812, -0.016475915908813477, -0.01534128189086914, -0.014206647872924805, -0.013072013854980469, -0.011937379837036133, -0.010802745819091797, -0.009668111801147461, -0.008533477783203125, -0.007398843765258789, -0.006264209747314453, -0.005129575729370117, -0.003994941711425781, -0.0028603076934814453, -0.0017256736755371094, -0.0005910396575927734, 0.0005435943603515625, 0.0016782283782958984, 0.0028128623962402344, 0.00394749641418457, 0.005082130432128906, 0.006216764450073242, 0.007351398468017578, 0.008486032485961914, 0.00962066650390625, 0.010755300521850586, 0.011889934539794922, 0.013024568557739258, 0.014159202575683594, 0.01529383659362793, 0.016428470611572266, 0.0175631046295166, 0.018697738647460938, 0.019832372665405273, 0.02096700668334961, 0.022101640701293945, 0.02323627471923828, 0.024370908737182617, 0.025505542755126953, 0.02664017677307129, 0.027774810791015625, 0.02890944480895996, 0.030044078826904297, 0.031178712844848633, 0.03231334686279297, 0.033447980880737305, 0.03458261489868164, 0.03571724891662598, 0.03685188293457031, 0.03798651695251465, 0.039121150970458984, 0.04025578498840332, 0.041390419006347656, 0.04252505302429199, 0.04365968704223633, 0.044794321060180664, 0.045928955078125]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 1.0, 12.0, 10.0, 19.0, 27.0, 36.0, 47.0, 94.0, 176.0, 346.0, 984.0, 3584.0, 33152.0, 4135222.0, 16812.0, 2430.0, 710.0, 273.0, 137.0, 93.0, 53.0, 15.0, 23.0, 8.0, 9.0, 7.0, 1.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.04327392578125, -0.04214668273925781, -0.041019439697265625, -0.03989219665527344, -0.03876495361328125, -0.03763771057128906, -0.036510467529296875, -0.03538322448730469, -0.0342559814453125, -0.03312873840332031, -0.032001495361328125, -0.030874252319335938, -0.02974700927734375, -0.028619766235351562, -0.027492523193359375, -0.026365280151367188, -0.025238037109375, -0.024110794067382812, -0.022983551025390625, -0.021856307983398438, -0.02072906494140625, -0.019601821899414062, -0.018474578857421875, -0.017347335815429688, -0.0162200927734375, -0.015092849731445312, -0.013965606689453125, -0.012838363647460938, -0.01171112060546875, -0.010583877563476562, -0.009456634521484375, -0.008329391479492188, -0.0072021484375, -0.0060749053955078125, -0.004947662353515625, -0.0038204193115234375, -0.00269317626953125, -0.0015659332275390625, -0.000438690185546875, 0.0006885528564453125, 0.0018157958984375, 0.0029430389404296875, 0.004070281982421875, 0.0051975250244140625, 0.00632476806640625, 0.0074520111083984375, 0.008579254150390625, 0.009706497192382812, 0.010833740234375, 0.011960983276367188, 0.013088226318359375, 0.014215469360351562, 0.01534271240234375, 0.016469955444335938, 0.017597198486328125, 0.018724441528320312, 0.0198516845703125, 0.020978927612304688, 0.022106170654296875, 0.023233413696289062, 0.02436065673828125, 0.025487899780273438, 0.026615142822265625, 0.027742385864257812, 0.02886962890625]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 2.0, 3.0, 7.0, 7.0, 17.0, 34.0, 205.0, 3691.0, 77.0, 20.0, 7.0, 5.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00855255126953125, -0.008344650268554688, -0.008136749267578125, -0.007928848266601562, -0.007720947265625, -0.0075130462646484375, -0.007305145263671875, -0.0070972442626953125, -0.00688934326171875, -0.0066814422607421875, -0.006473541259765625, -0.0062656402587890625, -0.0060577392578125, -0.0058498382568359375, -0.005641937255859375, -0.0054340362548828125, -0.00522613525390625, -0.0050182342529296875, -0.004810333251953125, -0.0046024322509765625, -0.00439453125, -0.0041866302490234375, -0.003978729248046875, -0.0037708282470703125, -0.00356292724609375, -0.0033550262451171875, -0.003147125244140625, -0.0029392242431640625, -0.0027313232421875, -0.0025234222412109375, -0.002315521240234375, -0.0021076202392578125, -0.00189971923828125, -0.0016918182373046875, -0.001483917236328125, -0.0012760162353515625, -0.001068115234375, -0.0008602142333984375, -0.000652313232421875, -0.0004444122314453125, -0.00023651123046875, -2.86102294921875e-05, 0.000179290771484375, 0.0003871917724609375, 0.0005950927734375, 0.0008029937744140625, 0.001010894775390625, 0.0012187957763671875, 0.00142669677734375, 0.0016345977783203125, 0.001842498779296875, 0.0020503997802734375, 0.00225830078125, 0.0024662017822265625, 0.002674102783203125, 0.0028820037841796875, 0.00308990478515625, 0.0032978057861328125, 0.003505706787109375, 0.0037136077880859375, 0.0039215087890625, 0.0041294097900390625, 0.004337310791015625, 0.0045452117919921875, 0.00475311279296875]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 18.0, 48.0, 225.0, 458.0, 212.0, 33.0, 11.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.07619339972734451, -0.07480142265558243, -0.07340944558382034, -0.07201746851205826, -0.07062549144029617, -0.06923351436853409, -0.067841537296772, -0.06644956022500992, -0.06505758315324783, -0.06366560608148575, -0.06227362900972366, -0.06088165193796158, -0.05948967486619949, -0.05809769779443741, -0.056705720722675323, -0.05531373992562294, -0.05392175912857056, -0.05252978205680847, -0.05113780498504639, -0.0497458279132843, -0.04835385084152222, -0.04696187376976013, -0.04556989669799805, -0.04417791590094566, -0.04278594255447388, -0.04139396548271179, -0.04000198841094971, -0.03861001133918762, -0.03721803426742554, -0.03582605719566345, -0.03443408012390137, -0.033042099326848984, -0.0316501222550869, -0.030258145183324814, -0.02886616811156273, -0.027474191039800644, -0.02608221210539341, -0.024690235033631325, -0.02329825796186924, -0.021906279027462006, -0.02051430195569992, -0.019122324883937836, -0.01773034781217575, -0.016338370740413666, -0.014946391806006432, -0.013554414734244347, -0.012162437662482262, -0.010770459659397602, -0.009378483518958092, -0.007986506447196007, -0.006594528444111347, -0.005202551372349262, -0.00381057383492589, -0.0024185962975025177, -0.0010266192257404327, 0.00036535877734422684, 0.0017573358491063118, 0.003149313386529684, 0.004541290923953056, 0.005933267995715141, 0.0073252455331385136, 0.008717223070561886, 0.01010920014232397, 0.01150117814540863, 0.012893155217170715]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 5.0, 3.0, 3.0, 8.0, 6.0, 8.0, 8.0, 13.0, 12.0, 13.0, 20.0, 14.0, 23.0, 28.0, 26.0, 22.0, 36.0, 29.0, 48.0, 33.0, 50.0, 49.0, 42.0, 31.0, 32.0, 35.0, 38.0, 35.0, 35.0, 32.0, 35.0, 36.0, 30.0, 28.0, 21.0, 16.0, 15.0, 19.0, 9.0, 11.0, 15.0, 7.0, 9.0, 3.0, 3.0, 4.0, 6.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.007398486137390137, -0.00715904776006937, -0.006919609382748604, -0.006680171005427837, -0.006440732628107071, -0.0062012942507863045, -0.005961855873465538, -0.005722417496144772, -0.005482979118824005, -0.005243540741503239, -0.005004102364182472, -0.004764663986861706, -0.004525225609540939, -0.004285787232220173, -0.004046348854899406, -0.00380691047757864, -0.0035674721002578735, -0.003328033722937107, -0.0030885953456163406, -0.002849156968295574, -0.0026097185909748077, -0.0023702802136540413, -0.002130841836333275, -0.0018914034590125084, -0.001651965081691742, -0.0014125267043709755, -0.001173088327050209, -0.0009336499497294426, -0.0006942115724086761, -0.0004547731950879097, -0.00021533481776714325, 2.41035595536232e-05, 0.00026354193687438965, 0.0005029803141951561, 0.0007424186915159225, 0.000981857068836689, 0.0012212954461574554, 0.001460733823478222, 0.0017001722007989883, 0.0019396105781197548, 0.0021790489554405212, 0.0024184873327612877, 0.002657925710082054, 0.0028973640874028206, 0.003136802464723587, 0.0033762408420443535, 0.00361567921936512, 0.0038551175966858864, 0.004094555974006653, 0.004333994351327419, 0.004573432728648186, 0.004812871105968952, 0.005052309483289719, 0.005291747860610485, 0.0055311862379312515, 0.005770624615252018, 0.006010062992572784, 0.006249501369893551, 0.006488939747214317, 0.006728378124535084, 0.00696781650185585, 0.007207254879176617, 0.007446693256497383, 0.00768613163381815, 0.007925570011138916]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 8.0, 2.0, 5.0, 12.0, 21.0, 42.0, 48.0, 59.0, 91.0, 141.0, 206.0, 450.0, 879.0, 1935.0, 5812.0, 38423.0, 603194.0, 366356.0, 23230.0, 4329.0, 1577.0, 788.0, 379.0, 223.0, 115.0, 83.0, 45.0, 40.0, 13.0, 25.0, 13.0, 7.0, 4.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.061370849609375, -0.05924654006958008, -0.057122230529785156, -0.054997920989990234, -0.05287361145019531, -0.05074930191040039, -0.04862499237060547, -0.04650068283081055, -0.044376373291015625, -0.0422520637512207, -0.04012775421142578, -0.03800344467163086, -0.03587913513183594, -0.033754825592041016, -0.031630516052246094, -0.029506206512451172, -0.02738189697265625, -0.025257587432861328, -0.023133277893066406, -0.021008968353271484, -0.018884658813476562, -0.01676034927368164, -0.014636039733886719, -0.012511730194091797, -0.010387420654296875, -0.008263111114501953, -0.006138801574707031, -0.004014492034912109, -0.0018901824951171875, 0.00023412704467773438, 0.0023584365844726562, 0.004482746124267578, 0.0066070556640625, 0.008731365203857422, 0.010855674743652344, 0.012979984283447266, 0.015104293823242188, 0.01722860336303711, 0.01935291290283203, 0.021477222442626953, 0.023601531982421875, 0.025725841522216797, 0.02785015106201172, 0.02997446060180664, 0.03209877014160156, 0.034223079681396484, 0.036347389221191406, 0.03847169876098633, 0.04059600830078125, 0.04272031784057617, 0.044844627380371094, 0.046968936920166016, 0.04909324645996094, 0.05121755599975586, 0.05334186553955078, 0.0554661750793457, 0.057590484619140625, 0.05971479415893555, 0.06183910369873047, 0.06396341323852539, 0.06608772277832031, 0.06821203231811523, 0.07033634185791016, 0.07246065139770508, 0.0745849609375]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 2.0, 3.0, 6.0, 10.0, 22.0, 36.0, 82.0, 113.0, 156.0, 136.0, 143.0, 102.0, 84.0, 47.0, 33.0, 7.0, 7.0, 5.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02679443359375, -0.02565765380859375, -0.0245208740234375, -0.02338409423828125, -0.022247314453125, -0.02111053466796875, -0.0199737548828125, -0.01883697509765625, -0.0177001953125, -0.01656341552734375, -0.0154266357421875, -0.01428985595703125, -0.013153076171875, -0.01201629638671875, -0.0108795166015625, -0.00974273681640625, -0.00860595703125, -0.00746917724609375, -0.0063323974609375, -0.00519561767578125, -0.004058837890625, -0.00292205810546875, -0.0017852783203125, -0.00064849853515625, 0.00048828125, 0.00162506103515625, 0.0027618408203125, 0.00389862060546875, 0.005035400390625, 0.00617218017578125, 0.0073089599609375, 0.00844573974609375, 0.00958251953125, 0.01071929931640625, 0.0118560791015625, 0.01299285888671875, 0.014129638671875, 0.01526641845703125, 0.0164031982421875, 0.01753997802734375, 0.0186767578125, 0.01981353759765625, 0.0209503173828125, 0.02208709716796875, 0.023223876953125, 0.02436065673828125, 0.0254974365234375, 0.02663421630859375, 0.02777099609375, 0.02890777587890625, 0.0300445556640625, 0.03118133544921875, 0.032318115234375, 0.03345489501953125, 0.0345916748046875, 0.03572845458984375, 0.036865234375, 0.03800201416015625, 0.0391387939453125, 0.04027557373046875, 0.041412353515625, 0.04254913330078125, 0.0436859130859375, 0.04482269287109375, 0.04595947265625]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 4.0, 2.0, 3.0, 4.0, 3.0, 2.0, 8.0, 7.0, 7.0, 11.0, 11.0, 18.0, 18.0, 30.0, 32.0, 64.0, 50.0, 86.0, 125.0, 178.0, 261.0, 391.0, 711.0, 1309.0, 3024.0, 8258.0, 28338.0, 115671.0, 464590.0, 322983.0, 72360.0, 19071.0, 5977.0, 2284.0, 995.0, 558.0, 313.0, 225.0, 133.0, 93.0, 72.0, 63.0, 53.0, 33.0, 31.0, 25.0, 16.0, 12.0, 12.0, 7.0, 5.0, 11.0, 8.0, 5.0, 0.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.023529052734375, -0.022740602493286133, -0.021952152252197266, -0.0211637020111084, -0.02037525177001953, -0.019586801528930664, -0.018798351287841797, -0.01800990104675293, -0.017221450805664062, -0.016433000564575195, -0.015644550323486328, -0.014856100082397461, -0.014067649841308594, -0.013279199600219727, -0.01249074935913086, -0.011702299118041992, -0.010913848876953125, -0.010125398635864258, -0.00933694839477539, -0.008548498153686523, -0.007760047912597656, -0.006971597671508789, -0.006183147430419922, -0.005394697189331055, -0.0046062469482421875, -0.0038177967071533203, -0.003029346466064453, -0.002240896224975586, -0.0014524459838867188, -0.0006639957427978516, 0.00012445449829101562, 0.0009129047393798828, 0.00170135498046875, 0.002489805221557617, 0.0032782554626464844, 0.0040667057037353516, 0.004855155944824219, 0.005643606185913086, 0.006432056427001953, 0.00722050666809082, 0.008008956909179688, 0.008797407150268555, 0.009585857391357422, 0.010374307632446289, 0.011162757873535156, 0.011951208114624023, 0.01273965835571289, 0.013528108596801758, 0.014316558837890625, 0.015105009078979492, 0.01589345932006836, 0.016681909561157227, 0.017470359802246094, 0.01825881004333496, 0.019047260284423828, 0.019835710525512695, 0.020624160766601562, 0.02141261100769043, 0.022201061248779297, 0.022989511489868164, 0.02377796173095703, 0.0245664119720459, 0.025354862213134766, 0.026143312454223633, 0.0269317626953125]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 8.0, 1.0, 3.0, 4.0, 6.0, 5.0, 4.0, 10.0, 14.0, 8.0, 12.0, 16.0, 13.0, 13.0, 19.0, 23.0, 22.0, 25.0, 29.0, 40.0, 24.0, 31.0, 35.0, 36.0, 41.0, 43.0, 45.0, 36.0, 29.0, 33.0, 39.0, 31.0, 35.0, 37.0, 28.0, 30.0, 21.0, 28.0, 22.0, 20.0, 11.0, 16.0, 12.0, 11.0, 10.0, 4.0, 6.0, 7.0, 5.0, 1.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 3.0], "bins": [-0.03857421875, -0.037404537200927734, -0.03623485565185547, -0.0350651741027832, -0.03389549255371094, -0.03272581100463867, -0.031556129455566406, -0.03038644790649414, -0.029216766357421875, -0.02804708480834961, -0.026877403259277344, -0.025707721710205078, -0.024538040161132812, -0.023368358612060547, -0.02219867706298828, -0.021028995513916016, -0.01985931396484375, -0.018689632415771484, -0.01751995086669922, -0.016350269317626953, -0.015180587768554688, -0.014010906219482422, -0.012841224670410156, -0.01167154312133789, -0.010501861572265625, -0.00933218002319336, -0.008162498474121094, -0.006992816925048828, -0.0058231353759765625, -0.004653453826904297, -0.0034837722778320312, -0.0023140907287597656, -0.0011444091796875, 2.5272369384765625e-05, 0.0011949539184570312, 0.002364635467529297, 0.0035343170166015625, 0.004703998565673828, 0.005873680114746094, 0.007043361663818359, 0.008213043212890625, 0.00938272476196289, 0.010552406311035156, 0.011722087860107422, 0.012891769409179688, 0.014061450958251953, 0.015231132507324219, 0.016400814056396484, 0.01757049560546875, 0.018740177154541016, 0.01990985870361328, 0.021079540252685547, 0.022249221801757812, 0.023418903350830078, 0.024588584899902344, 0.02575826644897461, 0.026927947998046875, 0.02809762954711914, 0.029267311096191406, 0.030436992645263672, 0.03160667419433594, 0.0327763557434082, 0.03394603729248047, 0.035115718841552734, 0.036285400390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 5.0, 5.0, 4.0, 8.0, 10.0, 9.0, 18.0, 11.0, 40.0, 59.0, 109.0, 301.0, 845.0, 3541.0, 23706.0, 336098.0, 629667.0, 46416.0, 5713.0, 1241.0, 415.0, 157.0, 74.0, 40.0, 14.0, 20.0, 12.0, 5.0, 4.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01010894775390625, -0.009784340858459473, -0.009459733963012695, -0.009135127067565918, -0.00881052017211914, -0.008485913276672363, -0.008161306381225586, -0.007836699485778809, -0.007512092590332031, -0.007187485694885254, -0.0068628787994384766, -0.006538271903991699, -0.006213665008544922, -0.0058890581130981445, -0.005564451217651367, -0.00523984432220459, -0.0049152374267578125, -0.004590630531311035, -0.004266023635864258, -0.0039414167404174805, -0.003616809844970703, -0.0032922029495239258, -0.0029675960540771484, -0.002642989158630371, -0.0023183822631835938, -0.0019937753677368164, -0.001669168472290039, -0.0013445615768432617, -0.0010199546813964844, -0.000695347785949707, -0.0003707408905029297, -4.6133995056152344e-05, 0.000278472900390625, 0.0006030797958374023, 0.0009276866912841797, 0.001252293586730957, 0.0015769004821777344, 0.0019015073776245117, 0.002226114273071289, 0.0025507211685180664, 0.0028753280639648438, 0.003199934959411621, 0.0035245418548583984, 0.0038491487503051758, 0.004173755645751953, 0.0044983625411987305, 0.004822969436645508, 0.005147576332092285, 0.0054721832275390625, 0.00579679012298584, 0.006121397018432617, 0.0064460039138793945, 0.006770610809326172, 0.007095217704772949, 0.0074198246002197266, 0.007744431495666504, 0.008069038391113281, 0.008393645286560059, 0.008718252182006836, 0.009042859077453613, 0.00936746597290039, 0.009692072868347168, 0.010016679763793945, 0.010341286659240723, 0.0106658935546875]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 4.0, 5.0, 10.0, 15.0, 11.0, 18.0, 29.0, 26.0, 51.0, 25.0, 36.0, 50.0, 52.0, 58.0, 51.0, 55.0, 73.0, 65.0, 57.0, 43.0, 51.0, 44.0, 42.0, 33.0, 24.0, 20.0, 19.0, 11.0, 10.0, 5.0, 5.0, 5.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.039836883544922e-06, -2.921558916568756e-06, -2.8032809495925903e-06, -2.6850029826164246e-06, -2.566725015640259e-06, -2.448447048664093e-06, -2.3301690816879272e-06, -2.2118911147117615e-06, -2.0936131477355957e-06, -1.97533518075943e-06, -1.8570572137832642e-06, -1.7387792468070984e-06, -1.6205012798309326e-06, -1.5022233128547668e-06, -1.383945345878601e-06, -1.2656673789024353e-06, -1.1473894119262695e-06, -1.0291114449501038e-06, -9.10833477973938e-07, -7.925555109977722e-07, -6.742775440216064e-07, -5.559995770454407e-07, -4.377216100692749e-07, -3.1944364309310913e-07, -2.0116567611694336e-07, -8.288770914077759e-08, 3.5390257835388184e-08, 1.5366822481155396e-07, 2.7194619178771973e-07, 3.902241587638855e-07, 5.085021257400513e-07, 6.26780092716217e-07, 7.450580596923828e-07, 8.633360266685486e-07, 9.816139936447144e-07, 1.0998919606208801e-06, 1.218169927597046e-06, 1.3364478945732117e-06, 1.4547258615493774e-06, 1.5730038285255432e-06, 1.691281795501709e-06, 1.8095597624778748e-06, 1.9278377294540405e-06, 2.0461156964302063e-06, 2.164393663406372e-06, 2.282671630382538e-06, 2.4009495973587036e-06, 2.5192275643348694e-06, 2.637505531311035e-06, 2.755783498287201e-06, 2.8740614652633667e-06, 2.9923394322395325e-06, 3.1106173992156982e-06, 3.228895366191864e-06, 3.3471733331680298e-06, 3.4654513001441956e-06, 3.5837292671203613e-06, 3.702007234096527e-06, 3.820285201072693e-06, 3.938563168048859e-06, 4.056841135025024e-06, 4.17511910200119e-06, 4.293397068977356e-06, 4.411675035953522e-06, 4.5299530029296875e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 1.0, 6.0, 6.0, 10.0, 12.0, 21.0, 21.0, 62.0, 97.0, 181.0, 459.0, 1323.0, 6033.0, 80191.0, 835261.0, 114951.0, 7417.0, 1556.0, 481.0, 231.0, 100.0, 62.0, 27.0, 10.0, 9.0, 8.0, 2.0, 7.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.009857177734375, -0.009499430656433105, -0.009141683578491211, -0.008783936500549316, -0.008426189422607422, -0.008068442344665527, -0.007710695266723633, -0.007352948188781738, -0.006995201110839844, -0.006637454032897949, -0.006279706954956055, -0.00592195987701416, -0.005564212799072266, -0.005206465721130371, -0.0048487186431884766, -0.004490971565246582, -0.0041332244873046875, -0.003775477409362793, -0.0034177303314208984, -0.003059983253479004, -0.0027022361755371094, -0.002344489097595215, -0.0019867420196533203, -0.0016289949417114258, -0.0012712478637695312, -0.0009135007858276367, -0.0005557537078857422, -0.00019800662994384766, 0.00015974044799804688, 0.0005174875259399414, 0.0008752346038818359, 0.0012329816818237305, 0.001590728759765625, 0.0019484758377075195, 0.002306222915649414, 0.0026639699935913086, 0.003021717071533203, 0.0033794641494750977, 0.003737211227416992, 0.004094958305358887, 0.004452705383300781, 0.004810452461242676, 0.00516819953918457, 0.005525946617126465, 0.005883693695068359, 0.006241440773010254, 0.0065991878509521484, 0.006956934928894043, 0.0073146820068359375, 0.007672429084777832, 0.008030176162719727, 0.008387923240661621, 0.008745670318603516, 0.00910341739654541, 0.009461164474487305, 0.0098189115524292, 0.010176658630371094, 0.010534405708312988, 0.010892152786254883, 0.011249899864196777, 0.011607646942138672, 0.011965394020080566, 0.012323141098022461, 0.012680888175964355, 0.01303863525390625]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 9.0, 5.0, 5.0, 5.0, 6.0, 29.0, 27.0, 36.0, 73.0, 81.0, 113.0, 118.0, 103.0, 119.0, 77.0, 63.0, 45.0, 28.0, 18.0, 13.0, 10.0, 3.0, 7.0, 5.0, 3.0, 7.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007602691650390625, -0.007226884365081787, -0.006851077079772949, -0.006475269794464111, -0.0060994625091552734, -0.0057236552238464355, -0.005347847938537598, -0.00497204065322876, -0.004596233367919922, -0.004220426082611084, -0.003844618797302246, -0.003468811511993408, -0.0030930042266845703, -0.0027171969413757324, -0.0023413896560668945, -0.0019655823707580566, -0.0015897750854492188, -0.0012139678001403809, -0.000838160514831543, -0.0004623532295227051, -8.654594421386719e-05, 0.0002892613410949707, 0.0006650686264038086, 0.0010408759117126465, 0.0014166831970214844, 0.0017924904823303223, 0.00216829776763916, 0.002544105052947998, 0.002919912338256836, 0.003295719623565674, 0.0036715269088745117, 0.00404733419418335, 0.0044231414794921875, 0.004798948764801025, 0.005174756050109863, 0.005550563335418701, 0.005926370620727539, 0.006302177906036377, 0.006677985191345215, 0.007053792476654053, 0.007429599761962891, 0.0078054070472717285, 0.008181214332580566, 0.008557021617889404, 0.008932828903198242, 0.00930863618850708, 0.009684443473815918, 0.010060250759124756, 0.010436058044433594, 0.010811865329742432, 0.01118767261505127, 0.011563479900360107, 0.011939287185668945, 0.012315094470977783, 0.012690901756286621, 0.013066709041595459, 0.013442516326904297, 0.013818323612213135, 0.014194130897521973, 0.01456993818283081, 0.014945745468139648, 0.015321552753448486, 0.015697360038757324, 0.016073167324066162, 0.016448974609375]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 7.0, 19.0, 30.0, 47.0, 105.0, 212.0, 225.0, 189.0, 84.0, 43.0, 26.0, 6.0, 2.0, 7.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.4199330508708954, -0.41020315885543823, -0.40047329664230347, -0.3907434046268463, -0.38101351261138916, -0.371283620595932, -0.36155375838279724, -0.3518238663673401, -0.34209397435188293, -0.3323640823364258, -0.322634220123291, -0.31290432810783386, -0.3031744360923767, -0.29344454407691956, -0.2837146818637848, -0.27398478984832764, -0.26425492763519287, -0.2545250356197357, -0.24479515850543976, -0.2350652813911438, -0.22533538937568665, -0.21560551226139069, -0.20587563514709473, -0.19614574313163757, -0.18641585111618042, -0.17668597400188446, -0.1669560819864273, -0.15722620487213135, -0.1474963128566742, -0.13776643574237823, -0.12803655862808228, -0.11830666661262512, -0.10857678949832916, -0.0988469049334526, -0.08911702036857605, -0.07938714325428009, -0.06965725123882294, -0.05992737039923668, -0.05019748955965042, -0.040467604994773865, -0.03073772042989731, -0.021007835865020752, -0.011277953162789345, -0.0015480704605579376, 0.008181814104318619, 0.017911698669195175, 0.027641579508781433, 0.03737146407365799, 0.047101348638534546, 0.0568312332034111, 0.06656111776828766, 0.07629099488258362, 0.08602088689804077, 0.09575076401233673, 0.10548064857721329, 0.11521053314208984, 0.1249404177069664, 0.13467030227184296, 0.14440017938613892, 0.15413007140159607, 0.16385994851589203, 0.17358984053134918, 0.18331971764564514, 0.1930496096611023, 0.20277948677539825]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 6.0, 3.0, 11.0, 7.0, 15.0, 17.0, 10.0, 15.0, 13.0, 25.0, 20.0, 26.0, 13.0, 33.0, 38.0, 32.0, 37.0, 38.0, 41.0, 37.0, 50.0, 44.0, 39.0, 50.0, 49.0, 40.0, 31.0, 22.0, 25.0, 39.0, 26.0, 20.0, 25.0, 19.0, 17.0, 11.0, 11.0, 11.0, 7.0, 7.0, 5.0, 4.0, 5.0, 4.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0], "bins": [-0.20947527885437012, -0.20316317677497864, -0.19685105979442596, -0.19053895771503448, -0.1842268407344818, -0.17791473865509033, -0.17160263657569885, -0.16529053449630737, -0.1589784175157547, -0.15266631543636322, -0.14635419845581055, -0.14004209637641907, -0.1337299942970276, -0.12741787731647491, -0.12110577523708344, -0.11479366570711136, -0.10848155617713928, -0.1021694466471672, -0.09585733711719513, -0.08954523503780365, -0.08323312550783157, -0.0769210159778595, -0.07060891389846802, -0.06429680436849594, -0.057984694838523865, -0.05167258530855179, -0.04536047950387001, -0.03904837369918823, -0.032736264169216156, -0.02642415463924408, -0.0201120488345623, -0.013799943029880524, -0.007487833499908447, -0.00117572583258152, 0.005136381834745407, 0.011448489502072334, 0.01776059716939926, 0.024072706699371338, 0.030384812504053116, 0.036696918308734894, 0.04300902783870697, 0.04932113736867905, 0.055633243173360825, 0.0619453489780426, 0.06825745850801468, 0.07456956803798676, 0.08088167011737823, 0.08719377964735031, 0.09350588917732239, 0.09981799870729446, 0.10613010823726654, 0.11244221031665802, 0.1187543198466301, 0.12506642937660217, 0.13137853145599365, 0.13769063353538513, 0.1440027505159378, 0.15031485259532928, 0.15662696957588196, 0.16293907165527344, 0.16925117373466492, 0.1755632907152176, 0.18187539279460907, 0.18818750977516174, 0.19449961185455322]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 2.0, 8.0, 10.0, 25.0, 34.0, 65.0, 166.0, 356.0, 1040.0, 4924.0, 4058525.0, 123962.0, 3690.0, 846.0, 305.0, 127.0, 76.0, 41.0, 27.0, 13.0, 9.0, 10.0, 6.0, 5.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0379638671875, -0.03595924377441406, -0.033954620361328125, -0.03194999694824219, -0.02994537353515625, -0.027940750122070312, -0.025936126708984375, -0.023931503295898438, -0.0219268798828125, -0.019922256469726562, -0.017917633056640625, -0.015913009643554688, -0.01390838623046875, -0.011903762817382812, -0.009899139404296875, -0.007894515991210938, -0.005889892578125, -0.0038852691650390625, -0.001880645751953125, 0.0001239776611328125, 0.00212860107421875, 0.0041332244873046875, 0.006137847900390625, 0.008142471313476562, 0.0101470947265625, 0.012151718139648438, 0.014156341552734375, 0.016160964965820312, 0.01816558837890625, 0.020170211791992188, 0.022174835205078125, 0.024179458618164062, 0.02618408203125, 0.028188705444335938, 0.030193328857421875, 0.03219795227050781, 0.03420257568359375, 0.03620719909667969, 0.038211822509765625, 0.04021644592285156, 0.0422210693359375, 0.04422569274902344, 0.046230316162109375, 0.04823493957519531, 0.05023956298828125, 0.05224418640136719, 0.054248809814453125, 0.05625343322753906, 0.058258056640625, 0.06026268005371094, 0.062267303466796875, 0.06427192687988281, 0.06627655029296875, 0.06828117370605469, 0.07028579711914062, 0.07229042053222656, 0.0742950439453125, 0.07629966735839844, 0.07830429077148438, 0.08030891418457031, 0.08231353759765625, 0.08431816101074219, 0.08632278442382812, 0.08832740783691406, 0.09033203125]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 1.0, 8.0, 12.0, 23.0, 34.0, 85.0, 110.0, 160.0, 160.0, 135.0, 97.0, 79.0, 42.0, 23.0, 13.0, 6.0, 5.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0267333984375, -0.025597095489501953, -0.024460792541503906, -0.02332448959350586, -0.022188186645507812, -0.021051883697509766, -0.01991558074951172, -0.018779277801513672, -0.017642974853515625, -0.016506671905517578, -0.015370368957519531, -0.014234066009521484, -0.013097763061523438, -0.01196146011352539, -0.010825157165527344, -0.009688854217529297, -0.00855255126953125, -0.007416248321533203, -0.006279945373535156, -0.005143642425537109, -0.0040073394775390625, -0.0028710365295410156, -0.0017347335815429688, -0.0005984306335449219, 0.000537872314453125, 0.0016741752624511719, 0.0028104782104492188, 0.003946781158447266, 0.0050830841064453125, 0.006219387054443359, 0.007355690002441406, 0.008491992950439453, 0.0096282958984375, 0.010764598846435547, 0.011900901794433594, 0.01303720474243164, 0.014173507690429688, 0.015309810638427734, 0.01644611358642578, 0.017582416534423828, 0.018718719482421875, 0.019855022430419922, 0.02099132537841797, 0.022127628326416016, 0.023263931274414062, 0.02440023422241211, 0.025536537170410156, 0.026672840118408203, 0.02780914306640625, 0.028945446014404297, 0.030081748962402344, 0.03121805191040039, 0.03235435485839844, 0.033490657806396484, 0.03462696075439453, 0.03576326370239258, 0.036899566650390625, 0.03803586959838867, 0.03917217254638672, 0.040308475494384766, 0.04144477844238281, 0.04258108139038086, 0.043717384338378906, 0.04485368728637695, 0.045989990234375]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 3.0, 0.0, 2.0, 4.0, 2.0, 9.0, 9.0, 13.0, 20.0, 21.0, 35.0, 43.0, 78.0, 96.0, 95.0, 141.0, 195.0, 284.0, 420.0, 574.0, 926.0, 1302.0, 2132.0, 3516.0, 6591.0, 13806.0, 38345.0, 218188.0, 3815510.0, 54935.0, 17501.0, 8057.0, 4231.0, 2359.0, 1561.0, 1008.0, 697.0, 453.0, 312.0, 208.0, 179.0, 129.0, 88.0, 59.0, 35.0, 28.0, 27.0, 16.0, 12.0, 12.0, 6.0, 8.0, 2.0, 6.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00881195068359375, -0.008537769317626953, -0.008263587951660156, -0.00798940658569336, -0.0077152252197265625, -0.007441043853759766, -0.007166862487792969, -0.006892681121826172, -0.006618499755859375, -0.006344318389892578, -0.006070137023925781, -0.005795955657958984, -0.0055217742919921875, -0.005247592926025391, -0.004973411560058594, -0.004699230194091797, -0.004425048828125, -0.004150867462158203, -0.0038766860961914062, -0.0036025047302246094, -0.0033283233642578125, -0.0030541419982910156, -0.0027799606323242188, -0.002505779266357422, -0.002231597900390625, -0.001957416534423828, -0.0016832351684570312, -0.0014090538024902344, -0.0011348724365234375, -0.0008606910705566406, -0.0005865097045898438, -0.0003123283386230469, -3.814697265625e-05, 0.00023603439331054688, 0.0005102157592773438, 0.0007843971252441406, 0.0010585784912109375, 0.0013327598571777344, 0.0016069412231445312, 0.0018811225891113281, 0.002155303955078125, 0.002429485321044922, 0.0027036666870117188, 0.0029778480529785156, 0.0032520294189453125, 0.0035262107849121094, 0.0038003921508789062, 0.004074573516845703, 0.0043487548828125, 0.004622936248779297, 0.004897117614746094, 0.005171298980712891, 0.0054454803466796875, 0.005719661712646484, 0.005993843078613281, 0.006268024444580078, 0.006542205810546875, 0.006816387176513672, 0.007090568542480469, 0.007364749908447266, 0.0076389312744140625, 0.00791311264038086, 0.008187294006347656, 0.008461475372314453, 0.00873565673828125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 5.0, 1.0, 4.0, 5.0, 6.0, 5.0, 4.0, 17.0, 22.0, 44.0, 97.0, 3504.0, 197.0, 61.0, 36.0, 27.0, 12.0, 10.0, 2.0, 3.0, 3.0, 3.0, 2.0, 5.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0046234130859375, -0.0044977664947509766, -0.004372119903564453, -0.00424647331237793, -0.004120826721191406, -0.003995180130004883, -0.0038695335388183594, -0.003743886947631836, -0.0036182403564453125, -0.003492593765258789, -0.0033669471740722656, -0.003241300582885742, -0.0031156539916992188, -0.0029900074005126953, -0.002864360809326172, -0.0027387142181396484, -0.002613067626953125, -0.0024874210357666016, -0.002361774444580078, -0.0022361278533935547, -0.0021104812622070312, -0.001984834671020508, -0.0018591880798339844, -0.001733541488647461, -0.0016078948974609375, -0.001482248306274414, -0.0013566017150878906, -0.0012309551239013672, -0.0011053085327148438, -0.0009796619415283203, -0.0008540153503417969, -0.0007283687591552734, -0.00060272216796875, -0.00047707557678222656, -0.0003514289855957031, -0.0002257823944091797, -0.00010013580322265625, 2.5510787963867188e-05, 0.00015115737915039062, 0.00027680397033691406, 0.0004024505615234375, 0.0005280971527099609, 0.0006537437438964844, 0.0007793903350830078, 0.0009050369262695312, 0.0010306835174560547, 0.0011563301086425781, 0.0012819766998291016, 0.001407623291015625, 0.0015332698822021484, 0.0016589164733886719, 0.0017845630645751953, 0.0019102096557617188, 0.002035856246948242, 0.0021615028381347656, 0.002287149429321289, 0.0024127960205078125, 0.002538442611694336, 0.0026640892028808594, 0.002789735794067383, 0.0029153823852539062, 0.0030410289764404297, 0.003166675567626953, 0.0032923221588134766, 0.00341796875]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 8.0, 7.0, 27.0, 104.0, 208.0, 260.0, 223.0, 110.0, 40.0, 10.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.014972272329032421, -0.014124548062682152, -0.013276824727654457, -0.012429100461304188, -0.011581376194953918, -0.01073365192860365, -0.00988592766225338, -0.009038204327225685, -0.008190480060875416, -0.0073427557945251465, -0.0064950319938361645, -0.0056473081931471825, -0.004799583926796913, -0.003951859660446644, -0.003104135859757662, -0.00225641205906868, -0.0014086877927184105, -0.0005609637591987848, 0.00028676027432084084, 0.0011344843078404665, 0.001982208341360092, 0.0028299326077103615, 0.0036776564083993435, 0.0045253802090883255, 0.005373104475438595, 0.006220828741788864, 0.007068552542477846, 0.007916276343166828, 0.008764000609517097, 0.009611724875867367, 0.010459449142217636, 0.01130717247724533, 0.012154895812273026, 0.013002620078623295, 0.013850344344973564, 0.014698067680001259, 0.015545791946351528, 0.016393516212701797, 0.017241239547729492, 0.018088962882757187, 0.01893668808043003, 0.019784411415457726, 0.02063213661313057, 0.021479859948158264, 0.02232758328318596, 0.023175308480858803, 0.024023031815886497, 0.02487075701355934, 0.025718480348587036, 0.02656620368361473, 0.027413928881287575, 0.02826165221631527, 0.029109377413988113, 0.029957100749015808, 0.030804824084043503, 0.0316525474190712, 0.03250027447938919, 0.033347997814416885, 0.03419572114944458, 0.03504344820976257, 0.03589117154479027, 0.03673889487981796, 0.03758661821484566, 0.03843434154987335, 0.03928206488490105]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 5.0, 3.0, 4.0, 8.0, 12.0, 8.0, 13.0, 18.0, 20.0, 27.0, 17.0, 34.0, 27.0, 36.0, 41.0, 43.0, 35.0, 46.0, 52.0, 37.0, 56.0, 49.0, 52.0, 44.0, 41.0, 30.0, 25.0, 28.0, 35.0, 30.0, 24.0, 21.0, 15.0, 16.0, 15.0, 10.0, 9.0, 5.0, 6.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.008809983730316162, -0.008571495302021503, -0.008333006873726845, -0.008094518445432186, -0.007856030017137527, -0.007617541588842869, -0.00737905316054821, -0.0071405647322535515, -0.006902076303958893, -0.006663587875664234, -0.0064250994473695755, -0.006186611019074917, -0.005948122590780258, -0.0057096341624855995, -0.005471145734190941, -0.005232657305896282, -0.0049941688776016235, -0.004755680449306965, -0.004517192021012306, -0.0042787035927176476, -0.004040215164422989, -0.0038017267361283302, -0.0035632383078336716, -0.003324749879539013, -0.0030862614512443542, -0.0028477730229496956, -0.002609284594655037, -0.0023707961663603783, -0.0021323077380657196, -0.001893819309771061, -0.0016553308814764023, -0.0014168424531817436, -0.001178354024887085, -0.0009398655965924263, -0.0007013771682977676, -0.000462888740003109, -0.00022440031170845032, 1.4088116586208344e-05, 0.000252576544880867, 0.0004910649731755257, 0.0007295534014701843, 0.000968041829764843, 0.0012065302580595016, 0.0014450186863541603, 0.001683507114648819, 0.0019219955429434776, 0.0021604839712381363, 0.002398972399532795, 0.0026374608278274536, 0.0028759492561221123, 0.003114437684416771, 0.0033529261127114296, 0.0035914145410060883, 0.003829902969300747, 0.004068391397595406, 0.004306879825890064, 0.004545368254184723, 0.0047838566824793816, 0.00502234511077404, 0.005260833539068699, 0.0054993219673633575, 0.005737810395658016, 0.005976298823952675, 0.0062147872522473335, 0.006453275680541992]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 5.0, 7.0, 8.0, 10.0, 14.0, 18.0, 29.0, 26.0, 46.0, 71.0, 92.0, 136.0, 252.0, 415.0, 745.0, 1436.0, 3687.0, 13209.0, 104945.0, 713529.0, 182086.0, 19299.0, 4565.0, 1804.0, 823.0, 440.0, 305.0, 183.0, 109.0, 76.0, 51.0, 41.0, 26.0, 18.0, 13.0, 16.0, 1.0, 4.0, 7.0, 4.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0482177734375, -0.04682445526123047, -0.04543113708496094, -0.044037818908691406, -0.042644500732421875, -0.041251182556152344, -0.03985786437988281, -0.03846454620361328, -0.03707122802734375, -0.03567790985107422, -0.03428459167480469, -0.032891273498535156, -0.031497955322265625, -0.030104637145996094, -0.028711318969726562, -0.02731800079345703, -0.0259246826171875, -0.02453136444091797, -0.023138046264648438, -0.021744728088378906, -0.020351409912109375, -0.018958091735839844, -0.017564773559570312, -0.01617145538330078, -0.01477813720703125, -0.013384819030761719, -0.011991500854492188, -0.010598182678222656, -0.009204864501953125, -0.007811546325683594, -0.0064182281494140625, -0.005024909973144531, -0.003631591796875, -0.0022382736206054688, -0.0008449554443359375, 0.0005483627319335938, 0.001941680908203125, 0.0033349990844726562, 0.0047283172607421875, 0.006121635437011719, 0.00751495361328125, 0.008908271789550781, 0.010301589965820312, 0.011694908142089844, 0.013088226318359375, 0.014481544494628906, 0.015874862670898438, 0.01726818084716797, 0.0186614990234375, 0.02005481719970703, 0.021448135375976562, 0.022841453552246094, 0.024234771728515625, 0.025628089904785156, 0.027021408081054688, 0.02841472625732422, 0.02980804443359375, 0.03120136260986328, 0.03259468078613281, 0.033987998962402344, 0.035381317138671875, 0.036774635314941406, 0.03816795349121094, 0.03956127166748047, 0.04095458984375]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 2.0, 5.0, 14.0, 24.0, 38.0, 79.0, 116.0, 159.0, 165.0, 133.0, 95.0, 74.0, 47.0, 19.0, 13.0, 7.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0267333984375, -0.025594711303710938, -0.024456024169921875, -0.023317337036132812, -0.02217864990234375, -0.021039962768554688, -0.019901275634765625, -0.018762588500976562, -0.0176239013671875, -0.016485214233398438, -0.015346527099609375, -0.014207839965820312, -0.01306915283203125, -0.011930465698242188, -0.010791778564453125, -0.009653091430664062, -0.008514404296875, -0.0073757171630859375, -0.006237030029296875, -0.0050983428955078125, -0.00395965576171875, -0.0028209686279296875, -0.001682281494140625, -0.0005435943603515625, 0.0005950927734375, 0.0017337799072265625, 0.002872467041015625, 0.0040111541748046875, 0.00514984130859375, 0.0062885284423828125, 0.007427215576171875, 0.008565902709960938, 0.00970458984375, 0.010843276977539062, 0.011981964111328125, 0.013120651245117188, 0.01425933837890625, 0.015398025512695312, 0.016536712646484375, 0.017675399780273438, 0.0188140869140625, 0.019952774047851562, 0.021091461181640625, 0.022230148315429688, 0.02336883544921875, 0.024507522583007812, 0.025646209716796875, 0.026784896850585938, 0.027923583984375, 0.029062271118164062, 0.030200958251953125, 0.03133964538574219, 0.03247833251953125, 0.03361701965332031, 0.034755706787109375, 0.03589439392089844, 0.0370330810546875, 0.03817176818847656, 0.039310455322265625, 0.04044914245605469, 0.04158782958984375, 0.04272651672363281, 0.043865203857421875, 0.04500389099121094, 0.046142578125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 1.0, 1.0, 4.0, 5.0, 6.0, 5.0, 7.0, 11.0, 22.0, 22.0, 32.0, 46.0, 52.0, 96.0, 153.0, 195.0, 293.0, 453.0, 871.0, 1888.0, 5322.0, 21166.0, 124335.0, 610623.0, 234344.0, 35525.0, 7802.0, 2493.0, 1127.0, 616.0, 332.0, 219.0, 135.0, 110.0, 71.0, 47.0, 34.0, 23.0, 19.0, 20.0, 10.0, 7.0, 3.0, 4.0, 6.0, 5.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0274505615234375, -0.02665853500366211, -0.02586650848388672, -0.025074481964111328, -0.024282455444335938, -0.023490428924560547, -0.022698402404785156, -0.021906375885009766, -0.021114349365234375, -0.020322322845458984, -0.019530296325683594, -0.018738269805908203, -0.017946243286132812, -0.017154216766357422, -0.01636219024658203, -0.01557016372680664, -0.01477813720703125, -0.01398611068725586, -0.013194084167480469, -0.012402057647705078, -0.011610031127929688, -0.010818004608154297, -0.010025978088378906, -0.009233951568603516, -0.008441925048828125, -0.007649898529052734, -0.006857872009277344, -0.006065845489501953, -0.0052738189697265625, -0.004481792449951172, -0.0036897659301757812, -0.0028977394104003906, -0.002105712890625, -0.0013136863708496094, -0.0005216598510742188, 0.0002703666687011719, 0.0010623931884765625, 0.0018544197082519531, 0.0026464462280273438, 0.0034384727478027344, 0.004230499267578125, 0.005022525787353516, 0.005814552307128906, 0.006606578826904297, 0.0073986053466796875, 0.008190631866455078, 0.008982658386230469, 0.00977468490600586, 0.01056671142578125, 0.01135873794555664, 0.012150764465332031, 0.012942790985107422, 0.013734817504882812, 0.014526844024658203, 0.015318870544433594, 0.016110897064208984, 0.016902923583984375, 0.017694950103759766, 0.018486976623535156, 0.019279003143310547, 0.020071029663085938, 0.020863056182861328, 0.02165508270263672, 0.02244710922241211, 0.0232391357421875]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 1.0, 6.0, 5.0, 6.0, 5.0, 13.0, 6.0, 13.0, 11.0, 20.0, 19.0, 31.0, 27.0, 26.0, 36.0, 33.0, 27.0, 40.0, 46.0, 58.0, 34.0, 50.0, 51.0, 43.0, 35.0, 36.0, 36.0, 32.0, 34.0, 31.0, 25.0, 33.0, 14.0, 16.0, 23.0, 15.0, 11.0, 10.0, 13.0, 5.0, 6.0, 10.0, 6.0, 2.0, 1.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0], "bins": [-0.04022216796875, -0.03903961181640625, -0.0378570556640625, -0.03667449951171875, -0.035491943359375, -0.03430938720703125, -0.0331268310546875, -0.03194427490234375, -0.03076171875, -0.02957916259765625, -0.0283966064453125, -0.02721405029296875, -0.026031494140625, -0.02484893798828125, -0.0236663818359375, -0.02248382568359375, -0.02130126953125, -0.02011871337890625, -0.0189361572265625, -0.01775360107421875, -0.016571044921875, -0.01538848876953125, -0.0142059326171875, -0.01302337646484375, -0.0118408203125, -0.01065826416015625, -0.0094757080078125, -0.00829315185546875, -0.007110595703125, -0.00592803955078125, -0.0047454833984375, -0.00356292724609375, -0.00238037109375, -0.00119781494140625, -1.52587890625e-05, 0.00116729736328125, 0.002349853515625, 0.00353240966796875, 0.0047149658203125, 0.00589752197265625, 0.007080078125, 0.00826263427734375, 0.0094451904296875, 0.01062774658203125, 0.011810302734375, 0.01299285888671875, 0.0141754150390625, 0.01535797119140625, 0.01654052734375, 0.01772308349609375, 0.0189056396484375, 0.02008819580078125, 0.021270751953125, 0.02245330810546875, 0.0236358642578125, 0.02481842041015625, 0.0260009765625, 0.02718353271484375, 0.0283660888671875, 0.02954864501953125, 0.030731201171875, 0.03191375732421875, 0.0330963134765625, 0.03427886962890625, 0.03546142578125]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 5.0, 5.0, 8.0, 8.0, 19.0, 57.0, 70.0, 182.0, 605.0, 3802.0, 144665.0, 883846.0, 13552.0, 1219.0, 280.0, 118.0, 63.0, 22.0, 16.0, 11.0, 2.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0227203369140625, -0.0221555233001709, -0.021590709686279297, -0.021025896072387695, -0.020461082458496094, -0.019896268844604492, -0.01933145523071289, -0.01876664161682129, -0.018201828002929688, -0.017637014389038086, -0.017072200775146484, -0.016507387161254883, -0.01594257354736328, -0.01537775993347168, -0.014812946319580078, -0.014248132705688477, -0.013683319091796875, -0.013118505477905273, -0.012553691864013672, -0.01198887825012207, -0.011424064636230469, -0.010859251022338867, -0.010294437408447266, -0.009729623794555664, -0.009164810180664062, -0.008599996566772461, -0.00803518295288086, -0.007470369338989258, -0.006905555725097656, -0.006340742111206055, -0.005775928497314453, -0.0052111148834228516, -0.00464630126953125, -0.0040814876556396484, -0.003516674041748047, -0.0029518604278564453, -0.0023870468139648438, -0.0018222332000732422, -0.0012574195861816406, -0.0006926059722900391, -0.0001277923583984375, 0.00043702125549316406, 0.0010018348693847656, 0.0015666484832763672, 0.0021314620971679688, 0.0026962757110595703, 0.003261089324951172, 0.0038259029388427734, 0.004390716552734375, 0.0049555301666259766, 0.005520343780517578, 0.00608515739440918, 0.006649971008300781, 0.007214784622192383, 0.007779598236083984, 0.008344411849975586, 0.008909225463867188, 0.009474039077758789, 0.01003885269165039, 0.010603666305541992, 0.011168479919433594, 0.011733293533325195, 0.012298107147216797, 0.012862920761108398, 0.013427734375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 3.0, 3.0, 6.0, 1.0, 3.0, 8.0, 10.0, 4.0, 13.0, 23.0, 9.0, 15.0, 34.0, 43.0, 17.0, 32.0, 49.0, 56.0, 19.0, 58.0, 74.0, 50.0, 45.0, 68.0, 53.0, 30.0, 38.0, 42.0, 35.0, 12.0, 31.0, 25.0, 21.0, 7.0, 14.0, 14.0, 14.0, 4.0, 7.0, 11.0, 2.0, 6.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.0994415283203125e-06, -2.9960647225379944e-06, -2.8926879167556763e-06, -2.789311110973358e-06, -2.68593430519104e-06, -2.582557499408722e-06, -2.479180693626404e-06, -2.3758038878440857e-06, -2.2724270820617676e-06, -2.1690502762794495e-06, -2.0656734704971313e-06, -1.9622966647148132e-06, -1.8589198589324951e-06, -1.755543053150177e-06, -1.6521662473678589e-06, -1.5487894415855408e-06, -1.4454126358032227e-06, -1.3420358300209045e-06, -1.2386590242385864e-06, -1.1352822184562683e-06, -1.0319054126739502e-06, -9.285286068916321e-07, -8.25151801109314e-07, -7.217749953269958e-07, -6.183981895446777e-07, -5.150213837623596e-07, -4.116445779800415e-07, -3.082677721977234e-07, -2.0489096641540527e-07, -1.0151416063308716e-07, 1.862645149230957e-09, 1.0523945093154907e-07, 2.086162567138672e-07, 3.119930624961853e-07, 4.153698682785034e-07, 5.187466740608215e-07, 6.221234798431396e-07, 7.255002856254578e-07, 8.288770914077759e-07, 9.32253897190094e-07, 1.0356307029724121e-06, 1.1390075087547302e-06, 1.2423843145370483e-06, 1.3457611203193665e-06, 1.4491379261016846e-06, 1.5525147318840027e-06, 1.6558915376663208e-06, 1.759268343448639e-06, 1.862645149230957e-06, 1.966021955013275e-06, 2.0693987607955933e-06, 2.1727755665779114e-06, 2.2761523723602295e-06, 2.3795291781425476e-06, 2.4829059839248657e-06, 2.586282789707184e-06, 2.689659595489502e-06, 2.79303640127182e-06, 2.896413207054138e-06, 2.9997900128364563e-06, 3.1031668186187744e-06, 3.2065436244010925e-06, 3.3099204301834106e-06, 3.4132972359657288e-06, 3.516674041748047e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 0.0, 8.0, 7.0, 12.0, 20.0, 34.0, 105.0, 208.0, 600.0, 2831.0, 44693.0, 948076.0, 47951.0, 2950.0, 688.0, 199.0, 79.0, 49.0, 24.0, 7.0, 6.0, 8.0, 2.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.018646240234375, -0.018149137496948242, -0.017652034759521484, -0.017154932022094727, -0.01665782928466797, -0.01616072654724121, -0.015663623809814453, -0.015166521072387695, -0.014669418334960938, -0.01417231559753418, -0.013675212860107422, -0.013178110122680664, -0.012681007385253906, -0.012183904647827148, -0.01168680191040039, -0.011189699172973633, -0.010692596435546875, -0.010195493698120117, -0.00969839096069336, -0.009201288223266602, -0.008704185485839844, -0.008207082748413086, -0.007709980010986328, -0.00721287727355957, -0.0067157745361328125, -0.006218671798706055, -0.005721569061279297, -0.005224466323852539, -0.004727363586425781, -0.0042302608489990234, -0.0037331581115722656, -0.003236055374145508, -0.00273895263671875, -0.002241849899291992, -0.0017447471618652344, -0.0012476444244384766, -0.0007505416870117188, -0.00025343894958496094, 0.00024366378784179688, 0.0007407665252685547, 0.0012378692626953125, 0.0017349720001220703, 0.002232074737548828, 0.002729177474975586, 0.0032262802124023438, 0.0037233829498291016, 0.004220485687255859, 0.004717588424682617, 0.005214691162109375, 0.005711793899536133, 0.006208896636962891, 0.0067059993743896484, 0.007203102111816406, 0.007700204849243164, 0.008197307586669922, 0.00869441032409668, 0.009191513061523438, 0.009688615798950195, 0.010185718536376953, 0.010682821273803711, 0.011179924011230469, 0.011677026748657227, 0.012174129486083984, 0.012671232223510742, 0.0131683349609375]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 3.0, 11.0, 13.0, 10.0, 21.0, 21.0, 26.0, 48.0, 73.0, 98.0, 127.0, 147.0, 116.0, 96.0, 60.0, 43.0, 29.0, 20.0, 12.0, 6.0, 7.0, 4.0, 5.0, 5.0, 3.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01300811767578125, -0.012607932090759277, -0.012207746505737305, -0.011807560920715332, -0.01140737533569336, -0.011007189750671387, -0.010607004165649414, -0.010206818580627441, -0.009806632995605469, -0.009406447410583496, -0.009006261825561523, -0.00860607624053955, -0.008205890655517578, -0.0078057050704956055, -0.007405519485473633, -0.00700533390045166, -0.0066051483154296875, -0.006204962730407715, -0.005804777145385742, -0.0054045915603637695, -0.005004405975341797, -0.004604220390319824, -0.0042040348052978516, -0.003803849220275879, -0.0034036636352539062, -0.0030034780502319336, -0.002603292465209961, -0.0022031068801879883, -0.0018029212951660156, -0.001402735710144043, -0.0010025501251220703, -0.0006023645401000977, -0.000202178955078125, 0.00019800662994384766, 0.0005981922149658203, 0.000998377799987793, 0.0013985633850097656, 0.0017987489700317383, 0.002198934555053711, 0.0025991201400756836, 0.0029993057250976562, 0.003399491310119629, 0.0037996768951416016, 0.004199862480163574, 0.004600048065185547, 0.0050002336502075195, 0.005400419235229492, 0.005800604820251465, 0.0062007904052734375, 0.00660097599029541, 0.007001161575317383, 0.0074013471603393555, 0.007801532745361328, 0.0082017183303833, 0.008601903915405273, 0.009002089500427246, 0.009402275085449219, 0.009802460670471191, 0.010202646255493164, 0.010602831840515137, 0.01100301742553711, 0.011403203010559082, 0.011803388595581055, 0.012203574180603027, 0.012603759765625]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 6.0, 11.0, 19.0, 35.0, 90.0, 213.0, 246.0, 206.0, 102.0, 38.0, 19.0, 7.0, 7.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1515893042087555, -0.14292378723621368, -0.13425827026367188, -0.12559276819229126, -0.11692725121974945, -0.10826173424720764, -0.09959622472524643, -0.09093071520328522, -0.08226519823074341, -0.0735996812582016, -0.06493417173624039, -0.056268658488988876, -0.047603145241737366, -0.038937631994485855, -0.030272118747234344, -0.021606605499982834, -0.012941092252731323, -0.004275579005479813, 0.004389934241771698, 0.013055447489023209, 0.02172096073627472, 0.03038647398352623, 0.03905198723077774, 0.04771750047802925, 0.05638301372528076, 0.06504853069782257, 0.07371404021978378, 0.082379549741745, 0.0910450667142868, 0.09971058368682861, 0.10837609320878983, 0.11704160273075104, 0.12570708990097046, 0.13437260687351227, 0.14303812384605408, 0.1517036259174347, 0.1603691428899765, 0.1690346598625183, 0.17770016193389893, 0.18636567890644073, 0.19503119587898254, 0.20369671285152435, 0.21236222982406616, 0.22102773189544678, 0.2296932488679886, 0.2383587658405304, 0.247024267911911, 0.255689799785614, 0.26435530185699463, 0.27302080392837524, 0.28168633580207825, 0.29035183787345886, 0.29901736974716187, 0.3076828718185425, 0.3163483738899231, 0.3250139057636261, 0.3336794078350067, 0.34234490990638733, 0.35101044178009033, 0.35967594385147095, 0.36834144592285156, 0.37700697779655457, 0.3856724798679352, 0.3943380117416382, 0.4030035138130188]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 0.0, 5.0, 7.0, 5.0, 6.0, 7.0, 9.0, 10.0, 10.0, 17.0, 10.0, 20.0, 21.0, 17.0, 18.0, 34.0, 34.0, 30.0, 41.0, 31.0, 47.0, 33.0, 35.0, 48.0, 35.0, 51.0, 50.0, 35.0, 33.0, 38.0, 29.0, 22.0, 30.0, 23.0, 19.0, 23.0, 19.0, 16.0, 8.0, 11.0, 13.0, 8.0, 9.0, 7.0, 8.0, 7.0, 4.0, 5.0, 4.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.14930397272109985, -0.14441266655921936, -0.13952137529850006, -0.13463006913661957, -0.12973877787590027, -0.12484747171401978, -0.11995617300271988, -0.11506487429141998, -0.11017357558012009, -0.10528227686882019, -0.1003909781575203, -0.0954996794462204, -0.0906083732843399, -0.0857170820236206, -0.08082577586174011, -0.07593447715044022, -0.07104317843914032, -0.06615187972784042, -0.06126058101654053, -0.05636927857995033, -0.051477979868650436, -0.04658668115735054, -0.041695378720760345, -0.03680408000946045, -0.03191278129816055, -0.027021482586860657, -0.02213018201291561, -0.017238881438970566, -0.01234758272767067, -0.007456284016370773, -0.002564983442425728, 0.0023263171315193176, 0.007217615842819214, 0.012108915485441685, 0.017000215128064156, 0.0218915157020092, 0.026782814413309097, 0.031674113124608994, 0.03656541556119919, 0.041456714272499084, 0.04634801298379898, 0.05123931169509888, 0.05613061040639877, 0.06102191284298897, 0.06591321527957916, 0.07080450654029846, 0.07569581270217896, 0.08058711141347885, 0.08547841012477875, 0.09036970883607864, 0.09526100754737854, 0.10015230625867844, 0.10504360496997833, 0.10993491113185883, 0.11482620984315872, 0.11971750855445862, 0.12460880726575851, 0.1295001059770584, 0.1343914121389389, 0.1392827033996582, 0.1441740095615387, 0.149065300822258, 0.1539566069841385, 0.1588478982448578, 0.16373920440673828]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 6.0, 4.0, 5.0, 12.0, 10.0, 19.0, 35.0, 39.0, 89.0, 162.0, 372.0, 813.0, 2294.0, 11288.0, 4132538.0, 40002.0, 4338.0, 1224.0, 522.0, 223.0, 124.0, 54.0, 33.0, 21.0, 14.0, 11.0, 5.0, 10.0, 6.0, 5.0, 0.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.025482177734375, -0.024309635162353516, -0.02313709259033203, -0.021964550018310547, -0.020792007446289062, -0.019619464874267578, -0.018446922302246094, -0.01727437973022461, -0.016101837158203125, -0.01492929458618164, -0.013756752014160156, -0.012584209442138672, -0.011411666870117188, -0.010239124298095703, -0.009066581726074219, -0.007894039154052734, -0.00672149658203125, -0.005548954010009766, -0.004376411437988281, -0.003203868865966797, -0.0020313262939453125, -0.0008587837219238281, 0.00031375885009765625, 0.0014863014221191406, 0.002658843994140625, 0.0038313865661621094, 0.005003929138183594, 0.006176471710205078, 0.0073490142822265625, 0.008521556854248047, 0.009694099426269531, 0.010866641998291016, 0.0120391845703125, 0.013211727142333984, 0.014384269714355469, 0.015556812286376953, 0.016729354858398438, 0.017901897430419922, 0.019074440002441406, 0.02024698257446289, 0.021419525146484375, 0.02259206771850586, 0.023764610290527344, 0.024937152862548828, 0.026109695434570312, 0.027282238006591797, 0.02845478057861328, 0.029627323150634766, 0.03079986572265625, 0.031972408294677734, 0.03314495086669922, 0.0343174934387207, 0.03549003601074219, 0.03666257858276367, 0.037835121154785156, 0.03900766372680664, 0.040180206298828125, 0.04135274887084961, 0.042525291442871094, 0.04369783401489258, 0.04487037658691406, 0.04604291915893555, 0.04721546173095703, 0.048388004302978516, 0.049560546875]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 5.0, 7.0, 18.0, 17.0, 46.0, 83.0, 113.0, 160.0, 143.0, 152.0, 101.0, 65.0, 44.0, 25.0, 7.0, 6.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0267181396484375, -0.025575876235961914, -0.024433612823486328, -0.023291349411010742, -0.022149085998535156, -0.02100682258605957, -0.019864559173583984, -0.0187222957611084, -0.017580032348632812, -0.016437768936157227, -0.01529550552368164, -0.014153242111206055, -0.013010978698730469, -0.011868715286254883, -0.010726451873779297, -0.009584188461303711, -0.008441925048828125, -0.007299661636352539, -0.006157398223876953, -0.005015134811401367, -0.0038728713989257812, -0.0027306079864501953, -0.0015883445739746094, -0.00044608116149902344, 0.0006961822509765625, 0.0018384456634521484, 0.0029807090759277344, 0.00412297248840332, 0.005265235900878906, 0.006407499313354492, 0.007549762725830078, 0.008692026138305664, 0.00983428955078125, 0.010976552963256836, 0.012118816375732422, 0.013261079788208008, 0.014403343200683594, 0.01554560661315918, 0.016687870025634766, 0.01783013343811035, 0.018972396850585938, 0.020114660263061523, 0.02125692367553711, 0.022399187088012695, 0.02354145050048828, 0.024683713912963867, 0.025825977325439453, 0.02696824073791504, 0.028110504150390625, 0.02925276756286621, 0.030395030975341797, 0.03153729438781738, 0.03267955780029297, 0.033821821212768555, 0.03496408462524414, 0.03610634803771973, 0.03724861145019531, 0.0383908748626709, 0.039533138275146484, 0.04067540168762207, 0.041817665100097656, 0.04295992851257324, 0.04410219192504883, 0.045244455337524414, 0.04638671875]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 7.0, 3.0, 5.0, 8.0, 10.0, 14.0, 28.0, 24.0, 49.0, 64.0, 111.0, 177.0, 375.0, 897.0, 2587.0, 9303.0, 77464.0, 4067430.0, 27623.0, 5048.0, 1655.0, 663.0, 292.0, 166.0, 88.0, 62.0, 47.0, 28.0, 21.0, 18.0, 8.0, 11.0, 5.0, 1.0, 3.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01247406005859375, -0.011938691139221191, -0.011403322219848633, -0.010867953300476074, -0.010332584381103516, -0.009797215461730957, -0.009261846542358398, -0.00872647762298584, -0.008191108703613281, -0.007655739784240723, -0.007120370864868164, -0.0065850019454956055, -0.006049633026123047, -0.005514264106750488, -0.00497889518737793, -0.004443526268005371, -0.0039081573486328125, -0.003372788429260254, -0.0028374195098876953, -0.0023020505905151367, -0.0017666816711425781, -0.0012313127517700195, -0.0006959438323974609, -0.00016057491302490234, 0.00037479400634765625, 0.0009101629257202148, 0.0014455318450927734, 0.001980900764465332, 0.0025162696838378906, 0.0030516386032104492, 0.003587007522583008, 0.004122376441955566, 0.004657745361328125, 0.005193114280700684, 0.005728483200073242, 0.006263852119445801, 0.006799221038818359, 0.007334589958190918, 0.007869958877563477, 0.008405327796936035, 0.008940696716308594, 0.009476065635681152, 0.010011434555053711, 0.01054680347442627, 0.011082172393798828, 0.011617541313171387, 0.012152910232543945, 0.012688279151916504, 0.013223648071289062, 0.013759016990661621, 0.01429438591003418, 0.014829754829406738, 0.015365123748779297, 0.015900492668151855, 0.016435861587524414, 0.016971230506896973, 0.01750659942626953, 0.01804196834564209, 0.01857733726501465, 0.019112706184387207, 0.019648075103759766, 0.020183444023132324, 0.020718812942504883, 0.02125418186187744, 0.02178955078125]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 4.0, 2.0, 2.0, 3.0, 9.0, 28.0, 54.0, 88.0, 3706.0, 104.0, 36.0, 29.0, 6.0, 2.0, 3.0, 4.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0016956329345703125, -0.0015499889850616455, -0.0014043450355529785, -0.0012587010860443115, -0.0011130571365356445, -0.0009674131870269775, -0.0008217692375183105, -0.0006761252880096436, -0.0005304813385009766, -0.00038483738899230957, -0.00023919343948364258, -9.354948997497559e-05, 5.2094459533691406e-05, 0.0001977384090423584, 0.0003433823585510254, 0.0004890263080596924, 0.0006346702575683594, 0.0007803142070770264, 0.0009259581565856934, 0.0010716021060943604, 0.0012172460556030273, 0.0013628900051116943, 0.0015085339546203613, 0.0016541779041290283, 0.0017998218536376953, 0.0019454658031463623, 0.0020911097526550293, 0.0022367537021636963, 0.0023823976516723633, 0.0025280416011810303, 0.0026736855506896973, 0.0028193295001983643, 0.0029649734497070312, 0.0031106173992156982, 0.0032562613487243652, 0.0034019052982330322, 0.0035475492477416992, 0.003693193197250366, 0.003838837146759033, 0.0039844810962677, 0.004130125045776367, 0.004275768995285034, 0.004421412944793701, 0.004567056894302368, 0.004712700843811035, 0.004858344793319702, 0.005003988742828369, 0.005149632692337036, 0.005295276641845703, 0.00544092059135437, 0.005586564540863037, 0.005732208490371704, 0.005877852439880371, 0.006023496389389038, 0.006169140338897705, 0.006314784288406372, 0.006460428237915039, 0.006606072187423706, 0.006751716136932373, 0.00689736008644104, 0.007043004035949707, 0.007188647985458374, 0.007334291934967041, 0.007479935884475708, 0.007625579833984375]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 3.0, 2.0, 9.0, 15.0, 50.0, 106.0, 243.0, 294.0, 164.0, 78.0, 26.0, 9.0, 8.0, 2.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006665069609880447, -0.006036011036485434, -0.00540695246309042, -0.004777893424034119, -0.004148834850639105, -0.003519776277244091, -0.0028907174710184336, -0.002261658664792776, -0.0016326000913977623, -0.0010035414015874267, -0.000374482711777091, 0.0002545759780332446, 0.0008836346678435802, 0.001512693241238594, 0.0021417520474642515, 0.002770810853689909, 0.003399869427084923, 0.004028928000479937, 0.00465798657387495, 0.0052870456129312515, 0.005916104186326265, 0.006545162759721279, 0.00717422179877758, 0.007803280372172594, 0.008432338945567608, 0.009061397984623909, 0.009690456092357635, 0.010319515131413937, 0.010948574170470238, 0.011577632278203964, 0.012206691317260265, 0.012835750356316566, 0.013464808464050293, 0.014093867503106594, 0.01472292561084032, 0.015351984649896622, 0.015981042757630348, 0.016610100865364075, 0.01723916083574295, 0.017868218943476677, 0.018497277051210403, 0.01912633515894413, 0.019755395129323006, 0.020384453237056732, 0.02101351134479046, 0.021642569452524185, 0.02227162942290306, 0.022900687530636787, 0.023529747501015663, 0.02415880560874939, 0.024787865579128265, 0.025416923686861992, 0.02604598179459572, 0.026675041764974594, 0.02730409987270832, 0.027933157980442047, 0.028562217950820923, 0.02919127605855465, 0.029820336028933525, 0.03044939413666725, 0.031078452244400978, 0.031707510352134705, 0.03233657032251358, 0.032965630292892456, 0.03359468653798103]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 5.0, 6.0, 10.0, 7.0, 12.0, 13.0, 16.0, 21.0, 27.0, 26.0, 29.0, 36.0, 42.0, 50.0, 45.0, 36.0, 42.0, 48.0, 48.0, 37.0, 37.0, 44.0, 43.0, 42.0, 29.0, 45.0, 41.0, 29.0, 32.0, 23.0, 16.0, 16.0, 17.0, 2.0, 8.0, 6.0, 2.0, 4.0, 1.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.006378650665283203, -0.006181987933814526, -0.005985325202345848, -0.0057886624708771706, -0.005591999739408493, -0.0053953370079398155, -0.005198674276471138, -0.0050020115450024605, -0.004805348813533783, -0.0046086860820651054, -0.004412023350596428, -0.00421536061912775, -0.004018697887659073, -0.0038220351561903954, -0.003625372424721718, -0.0034287096932530403, -0.003232046961784363, -0.0030353842303156853, -0.0028387214988470078, -0.0026420587673783302, -0.0024453960359096527, -0.002248733304440975, -0.0020520705729722977, -0.0018554078415036201, -0.0016587451100349426, -0.001462082378566265, -0.0012654196470975876, -0.00106875691562891, -0.0008720941841602325, -0.000675431452691555, -0.0004787687212228775, -0.0002821059897542, -8.544325828552246e-05, 0.00011121947318315506, 0.0003078822046518326, 0.0005045449361205101, 0.0007012076675891876, 0.0008978703990578651, 0.0010945331305265427, 0.0012911958619952202, 0.0014878585934638977, 0.0016845213249325752, 0.0018811840564012527, 0.0020778467878699303, 0.002274509519338608, 0.0024711722508072853, 0.002667834982275963, 0.0028644977137446404, 0.003061160445213318, 0.0032578231766819954, 0.003454485908150673, 0.0036511486396193504, 0.003847811371088028, 0.0040444741025567055, 0.004241136834025383, 0.0044377995654940605, 0.004634462296962738, 0.0048311250284314156, 0.005027787759900093, 0.005224450491368771, 0.005421113222837448, 0.005617775954306126, 0.005814438685774803, 0.006011101417243481, 0.006207764148712158]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 4.0, 1.0, 4.0, 6.0, 10.0, 9.0, 8.0, 10.0, 23.0, 16.0, 24.0, 33.0, 46.0, 54.0, 93.0, 107.0, 155.0, 237.0, 364.0, 534.0, 971.0, 1733.0, 3839.0, 12738.0, 63992.0, 477966.0, 412567.0, 54058.0, 11218.0, 3658.0, 1589.0, 842.0, 535.0, 328.0, 202.0, 174.0, 95.0, 67.0, 62.0, 56.0, 46.0, 23.0, 14.0, 14.0, 14.0, 7.0, 7.0, 8.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.029296875, -0.02828502655029297, -0.027273178100585938, -0.026261329650878906, -0.025249481201171875, -0.024237632751464844, -0.023225784301757812, -0.02221393585205078, -0.02120208740234375, -0.02019023895263672, -0.019178390502929688, -0.018166542053222656, -0.017154693603515625, -0.016142845153808594, -0.015130996704101562, -0.014119148254394531, -0.0131072998046875, -0.012095451354980469, -0.011083602905273438, -0.010071754455566406, -0.009059906005859375, -0.008048057556152344, -0.0070362091064453125, -0.006024360656738281, -0.00501251220703125, -0.004000663757324219, -0.0029888153076171875, -0.0019769668579101562, -0.000965118408203125, 4.673004150390625e-05, 0.0010585784912109375, 0.0020704269409179688, 0.003082275390625, 0.004094123840332031, 0.0051059722900390625, 0.006117820739746094, 0.007129669189453125, 0.008141517639160156, 0.009153366088867188, 0.010165214538574219, 0.01117706298828125, 0.012188911437988281, 0.013200759887695312, 0.014212608337402344, 0.015224456787109375, 0.016236305236816406, 0.017248153686523438, 0.01826000213623047, 0.0192718505859375, 0.02028369903564453, 0.021295547485351562, 0.022307395935058594, 0.023319244384765625, 0.024331092834472656, 0.025342941284179688, 0.02635478973388672, 0.02736663818359375, 0.02837848663330078, 0.029390335083007812, 0.030402183532714844, 0.031414031982421875, 0.032425880432128906, 0.03343772888183594, 0.03444957733154297, 0.03546142578125]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 5.0, 6.0, 19.0, 21.0, 37.0, 92.0, 102.0, 160.0, 140.0, 154.0, 110.0, 59.0, 47.0, 22.0, 11.0, 7.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0267181396484375, -0.025577306747436523, -0.024436473846435547, -0.02329564094543457, -0.022154808044433594, -0.021013975143432617, -0.01987314224243164, -0.018732309341430664, -0.017591476440429688, -0.01645064353942871, -0.015309810638427734, -0.014168977737426758, -0.013028144836425781, -0.011887311935424805, -0.010746479034423828, -0.009605646133422852, -0.008464813232421875, -0.0073239803314208984, -0.006183147430419922, -0.005042314529418945, -0.0039014816284179688, -0.002760648727416992, -0.0016198158264160156, -0.00047898292541503906, 0.0006618499755859375, 0.001802682876586914, 0.0029435157775878906, 0.004084348678588867, 0.005225181579589844, 0.00636601448059082, 0.007506847381591797, 0.008647680282592773, 0.00978851318359375, 0.010929346084594727, 0.012070178985595703, 0.01321101188659668, 0.014351844787597656, 0.015492677688598633, 0.01663351058959961, 0.017774343490600586, 0.018915176391601562, 0.02005600929260254, 0.021196842193603516, 0.022337675094604492, 0.02347850799560547, 0.024619340896606445, 0.025760173797607422, 0.0269010066986084, 0.028041839599609375, 0.02918267250061035, 0.030323505401611328, 0.031464338302612305, 0.03260517120361328, 0.03374600410461426, 0.034886837005615234, 0.03602766990661621, 0.03716850280761719, 0.038309335708618164, 0.03945016860961914, 0.04059100151062012, 0.041731834411621094, 0.04287266731262207, 0.04401350021362305, 0.04515433311462402, 0.046295166015625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 5.0, 5.0, 5.0, 7.0, 6.0, 9.0, 21.0, 21.0, 25.0, 40.0, 47.0, 67.0, 74.0, 92.0, 160.0, 214.0, 334.0, 666.0, 1513.0, 4546.0, 17944.0, 102519.0, 585906.0, 279209.0, 41325.0, 8912.0, 2477.0, 958.0, 485.0, 265.0, 201.0, 113.0, 99.0, 71.0, 53.0, 46.0, 34.0, 20.0, 20.0, 14.0, 13.0, 3.0, 6.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.0229339599609375, -0.02223372459411621, -0.021533489227294922, -0.020833253860473633, -0.020133018493652344, -0.019432783126831055, -0.018732547760009766, -0.018032312393188477, -0.017332077026367188, -0.0166318416595459, -0.01593160629272461, -0.01523137092590332, -0.014531135559082031, -0.013830900192260742, -0.013130664825439453, -0.012430429458618164, -0.011730194091796875, -0.011029958724975586, -0.010329723358154297, -0.009629487991333008, -0.008929252624511719, -0.00822901725769043, -0.007528781890869141, -0.0068285465240478516, -0.0061283111572265625, -0.0054280757904052734, -0.004727840423583984, -0.004027605056762695, -0.0033273696899414062, -0.002627134323120117, -0.0019268989562988281, -0.001226663589477539, -0.00052642822265625, 0.00017380714416503906, 0.0008740425109863281, 0.0015742778778076172, 0.0022745132446289062, 0.0029747486114501953, 0.0036749839782714844, 0.0043752193450927734, 0.0050754547119140625, 0.0057756900787353516, 0.006475925445556641, 0.00717616081237793, 0.007876396179199219, 0.008576631546020508, 0.009276866912841797, 0.009977102279663086, 0.010677337646484375, 0.011377573013305664, 0.012077808380126953, 0.012778043746948242, 0.013478279113769531, 0.01417851448059082, 0.01487874984741211, 0.015578985214233398, 0.016279220581054688, 0.016979455947875977, 0.017679691314697266, 0.018379926681518555, 0.019080162048339844, 0.019780397415161133, 0.020480632781982422, 0.02118086814880371, 0.021881103515625]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 5.0, 9.0, 7.0, 12.0, 12.0, 13.0, 12.0, 20.0, 12.0, 22.0, 25.0, 29.0, 25.0, 36.0, 41.0, 50.0, 47.0, 39.0, 50.0, 49.0, 43.0, 36.0, 44.0, 44.0, 43.0, 41.0, 31.0, 34.0, 20.0, 24.0, 23.0, 19.0, 14.0, 11.0, 15.0, 11.0, 13.0, 5.0, 5.0, 4.0, 2.0, 1.0, 8.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.032928466796875, -0.031737327575683594, -0.030546188354492188, -0.02935504913330078, -0.028163909912109375, -0.02697277069091797, -0.025781631469726562, -0.024590492248535156, -0.02339935302734375, -0.022208213806152344, -0.021017074584960938, -0.01982593536376953, -0.018634796142578125, -0.01744365692138672, -0.016252517700195312, -0.015061378479003906, -0.0138702392578125, -0.012679100036621094, -0.011487960815429688, -0.010296821594238281, -0.009105682373046875, -0.007914543151855469, -0.0067234039306640625, -0.005532264709472656, -0.00434112548828125, -0.0031499862670898438, -0.0019588470458984375, -0.0007677078247070312, 0.000423431396484375, 0.0016145706176757812, 0.0028057098388671875, 0.003996849060058594, 0.00518798828125, 0.006379127502441406, 0.0075702667236328125, 0.008761405944824219, 0.009952545166015625, 0.011143684387207031, 0.012334823608398438, 0.013525962829589844, 0.01471710205078125, 0.015908241271972656, 0.017099380493164062, 0.01829051971435547, 0.019481658935546875, 0.02067279815673828, 0.021863937377929688, 0.023055076599121094, 0.0242462158203125, 0.025437355041503906, 0.026628494262695312, 0.02781963348388672, 0.029010772705078125, 0.03020191192626953, 0.03139305114746094, 0.032584190368652344, 0.03377532958984375, 0.034966468811035156, 0.03615760803222656, 0.03734874725341797, 0.038539886474609375, 0.03973102569580078, 0.04092216491699219, 0.042113304138183594, 0.043304443359375]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 7.0, 2.0, 2.0, 7.0, 7.0, 11.0, 18.0, 34.0, 33.0, 54.0, 94.0, 151.0, 329.0, 666.0, 1862.0, 8573.0, 91688.0, 811834.0, 119743.0, 9995.0, 2003.0, 646.0, 341.0, 160.0, 103.0, 53.0, 46.0, 18.0, 18.0, 24.0, 8.0, 11.0, 4.0, 1.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 1.0, 4.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00897216796875, -0.00871974229812622, -0.008467316627502441, -0.008214890956878662, -0.007962465286254883, -0.0077100396156311035, -0.007457613945007324, -0.007205188274383545, -0.006952762603759766, -0.006700336933135986, -0.006447911262512207, -0.006195485591888428, -0.0059430599212646484, -0.005690634250640869, -0.00543820858001709, -0.0051857829093933105, -0.004933357238769531, -0.004680931568145752, -0.004428505897521973, -0.004176080226898193, -0.003923654556274414, -0.0036712288856506348, -0.0034188032150268555, -0.003166377544403076, -0.002913951873779297, -0.0026615262031555176, -0.0024091005325317383, -0.002156674861907959, -0.0019042491912841797, -0.0016518235206604004, -0.001399397850036621, -0.0011469721794128418, -0.0008945465087890625, -0.0006421208381652832, -0.0003896951675415039, -0.0001372694969177246, 0.00011515617370605469, 0.000367581844329834, 0.0006200075149536133, 0.0008724331855773926, 0.0011248588562011719, 0.0013772845268249512, 0.0016297101974487305, 0.0018821358680725098, 0.002134561538696289, 0.0023869872093200684, 0.0026394128799438477, 0.002891838550567627, 0.0031442642211914062, 0.0033966898918151855, 0.003649115562438965, 0.003901541233062744, 0.0041539669036865234, 0.004406392574310303, 0.004658818244934082, 0.004911243915557861, 0.005163669586181641, 0.00541609525680542, 0.005668520927429199, 0.0059209465980529785, 0.006173372268676758, 0.006425797939300537, 0.006678223609924316, 0.006930649280548096, 0.007183074951171875]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 7.0, 7.0, 11.0, 7.0, 23.0, 7.0, 22.0, 16.0, 32.0, 14.0, 48.0, 28.0, 39.0, 49.0, 38.0, 55.0, 35.0, 72.0, 34.0, 54.0, 37.0, 63.0, 21.0, 44.0, 30.0, 43.0, 17.0, 31.0, 32.0, 11.0, 22.0, 8.0, 10.0, 3.0, 16.0, 3.0, 4.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-3.4570693969726562e-06, -3.3657997846603394e-06, -3.2745301723480225e-06, -3.1832605600357056e-06, -3.0919909477233887e-06, -3.0007213354110718e-06, -2.909451723098755e-06, -2.818182110786438e-06, -2.726912498474121e-06, -2.635642886161804e-06, -2.5443732738494873e-06, -2.4531036615371704e-06, -2.3618340492248535e-06, -2.2705644369125366e-06, -2.1792948246002197e-06, -2.088025212287903e-06, -1.996755599975586e-06, -1.905485987663269e-06, -1.8142163753509521e-06, -1.7229467630386353e-06, -1.6316771507263184e-06, -1.5404075384140015e-06, -1.4491379261016846e-06, -1.3578683137893677e-06, -1.2665987014770508e-06, -1.1753290891647339e-06, -1.084059476852417e-06, -9.927898645401e-07, -9.015202522277832e-07, -8.102506399154663e-07, -7.189810276031494e-07, -6.277114152908325e-07, -5.364418029785156e-07, -4.4517219066619873e-07, -3.5390257835388184e-07, -2.6263296604156494e-07, -1.7136335372924805e-07, -8.009374141693115e-08, 1.1175870895385742e-08, 1.0244548320770264e-07, 1.9371509552001953e-07, 2.849847078323364e-07, 3.762543201446533e-07, 4.675239324569702e-07, 5.587935447692871e-07, 6.50063157081604e-07, 7.413327693939209e-07, 8.326023817062378e-07, 9.238719940185547e-07, 1.0151416063308716e-06, 1.1064112186431885e-06, 1.1976808309555054e-06, 1.2889504432678223e-06, 1.3802200555801392e-06, 1.471489667892456e-06, 1.562759280204773e-06, 1.6540288925170898e-06, 1.7452985048294067e-06, 1.8365681171417236e-06, 1.9278377294540405e-06, 2.0191073417663574e-06, 2.1103769540786743e-06, 2.201646566390991e-06, 2.292916178703308e-06, 2.384185791015625e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 3.0, 3.0, 8.0, 10.0, 11.0, 16.0, 31.0, 37.0, 56.0, 107.0, 178.0, 277.0, 442.0, 986.0, 2483.0, 8743.0, 53394.0, 543881.0, 388567.0, 38435.0, 6871.0, 2082.0, 883.0, 418.0, 228.0, 133.0, 77.0, 60.0, 45.0, 15.0, 20.0, 14.0, 12.0, 6.0, 7.0, 6.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.006114959716796875, -0.0059233903884887695, -0.005731821060180664, -0.005540251731872559, -0.005348682403564453, -0.005157113075256348, -0.004965543746948242, -0.004773974418640137, -0.004582405090332031, -0.004390835762023926, -0.00419926643371582, -0.004007697105407715, -0.0038161277770996094, -0.003624558448791504, -0.0034329891204833984, -0.003241419792175293, -0.0030498504638671875, -0.002858281135559082, -0.0026667118072509766, -0.002475142478942871, -0.0022835731506347656, -0.00209200382232666, -0.0019004344940185547, -0.0017088651657104492, -0.0015172958374023438, -0.0013257265090942383, -0.0011341571807861328, -0.0009425878524780273, -0.0007510185241699219, -0.0005594491958618164, -0.00036787986755371094, -0.00017631053924560547, 1.52587890625e-05, 0.00020682811737060547, 0.00039839744567871094, 0.0005899667739868164, 0.0007815361022949219, 0.0009731054306030273, 0.0011646747589111328, 0.0013562440872192383, 0.0015478134155273438, 0.0017393827438354492, 0.0019309520721435547, 0.00212252140045166, 0.0023140907287597656, 0.002505660057067871, 0.0026972293853759766, 0.002888798713684082, 0.0030803680419921875, 0.003271937370300293, 0.0034635066986083984, 0.003655076026916504, 0.0038466453552246094, 0.004038214683532715, 0.00422978401184082, 0.004421353340148926, 0.004612922668457031, 0.004804491996765137, 0.004996061325073242, 0.005187630653381348, 0.005379199981689453, 0.005570769309997559, 0.005762338638305664, 0.0059539079666137695, 0.006145477294921875]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 1.0, 3.0, 6.0, 14.0, 19.0, 11.0, 30.0, 39.0, 48.0, 75.0, 87.0, 115.0, 125.0, 101.0, 89.0, 68.0, 51.0, 40.0, 25.0, 19.0, 18.0, 2.0, 8.0, 5.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01031494140625, -0.009964823722839355, -0.009614706039428711, -0.009264588356018066, -0.008914470672607422, -0.008564352989196777, -0.008214235305786133, -0.007864117622375488, -0.007513999938964844, -0.007163882255554199, -0.006813764572143555, -0.00646364688873291, -0.006113529205322266, -0.005763411521911621, -0.0054132938385009766, -0.005063176155090332, -0.0047130584716796875, -0.004362940788269043, -0.0040128231048583984, -0.003662705421447754, -0.0033125877380371094, -0.002962470054626465, -0.0026123523712158203, -0.0022622346878051758, -0.0019121170043945312, -0.0015619993209838867, -0.0012118816375732422, -0.0008617639541625977, -0.0005116462707519531, -0.0001615285873413086, 0.00018858909606933594, 0.0005387067794799805, 0.000888824462890625, 0.0012389421463012695, 0.001589059829711914, 0.0019391775131225586, 0.002289295196533203, 0.0026394128799438477, 0.002989530563354492, 0.0033396482467651367, 0.0036897659301757812, 0.004039883613586426, 0.00439000129699707, 0.004740118980407715, 0.005090236663818359, 0.005440354347229004, 0.0057904720306396484, 0.006140589714050293, 0.0064907073974609375, 0.006840825080871582, 0.0071909427642822266, 0.007541060447692871, 0.007891178131103516, 0.00824129581451416, 0.008591413497924805, 0.00894153118133545, 0.009291648864746094, 0.009641766548156738, 0.009991884231567383, 0.010342001914978027, 0.010692119598388672, 0.011042237281799316, 0.011392354965209961, 0.011742472648620605, 0.01209259033203125]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0, 6.0, 11.0, 15.0, 21.0, 54.0, 75.0, 151.0, 161.0, 186.0, 143.0, 85.0, 44.0, 18.0, 14.0, 5.0, 2.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.13597267866134644, -0.13046348094940186, -0.12495428323745728, -0.1194450780749321, -0.11393588036298752, -0.10842668265104294, -0.10291747748851776, -0.09740827977657318, -0.0918990820646286, -0.08638988435268402, -0.08088068664073944, -0.07537148147821426, -0.06986228376626968, -0.0643530860543251, -0.058843884617090225, -0.05333468317985535, -0.04782548546791077, -0.042316287755966187, -0.03680708631873131, -0.03129788488149643, -0.02578868716955185, -0.02027948759496212, -0.01477028802037239, -0.009261086583137512, -0.003751888871192932, 0.0017573107033967972, 0.0072665102779865265, 0.012775709852576256, 0.018284909427165985, 0.023794109001755714, 0.029303308576345444, 0.03481251001358032, 0.040321722626686096, 0.045830920338630676, 0.051340121775865555, 0.05684932321310043, 0.06235852092504501, 0.0678677186369896, 0.07337692379951477, 0.07888612151145935, 0.08439531922340393, 0.08990451693534851, 0.09541371464729309, 0.10092291980981827, 0.10643211752176285, 0.11194131523370743, 0.1174505203962326, 0.12295971810817719, 0.12846891582012177, 0.13397811353206635, 0.13948731124401093, 0.1449965089559555, 0.15050572156906128, 0.15601491928100586, 0.16152411699295044, 0.16703331470489502, 0.1725425124168396, 0.17805171012878418, 0.18356090784072876, 0.18907010555267334, 0.19457930326461792, 0.2000885158777237, 0.20559771358966827, 0.21110691130161285, 0.21661610901355743]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 5.0, 3.0, 10.0, 7.0, 16.0, 9.0, 15.0, 15.0, 11.0, 21.0, 19.0, 36.0, 23.0, 38.0, 38.0, 28.0, 52.0, 42.0, 42.0, 56.0, 48.0, 36.0, 46.0, 40.0, 41.0, 34.0, 32.0, 28.0, 32.0, 25.0, 25.0, 22.0, 14.0, 13.0, 13.0, 13.0, 10.0, 12.0, 4.0, 9.0, 4.0, 5.0, 2.0, 1.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.16538137197494507, -0.1604921817779541, -0.15560299158096313, -0.15071381628513336, -0.1458246260881424, -0.14093543589115143, -0.13604626059532166, -0.1311570703983307, -0.12626788020133972, -0.12137869000434875, -0.11648950725793839, -0.11160032451152802, -0.10671113431453705, -0.10182194411754608, -0.09693276137113571, -0.09204357862472534, -0.08715438842773438, -0.08226519823074341, -0.07737601548433304, -0.07248683273792267, -0.0675976425409317, -0.06270845234394073, -0.057819269597530365, -0.0529300831258297, -0.04804089665412903, -0.04315171018242836, -0.03826252371072769, -0.03337333723902702, -0.028484150767326355, -0.023594964295625687, -0.01870577782392502, -0.01381659135222435, -0.008927404880523682, -0.004038218408823013, 0.000850968062877655, 0.005740154534578323, 0.010629341006278992, 0.01551852747797966, 0.02040771394968033, 0.025296900421380997, 0.030186086893081665, 0.03507527336478233, 0.039964459836483, 0.04485364630818367, 0.04974283277988434, 0.05463201925158501, 0.059521205723285675, 0.06441038846969604, 0.06929957866668701, 0.07418876886367798, 0.07907795161008835, 0.08396713435649872, 0.08885632455348969, 0.09374551475048065, 0.09863469749689102, 0.10352388024330139, 0.10841307044029236, 0.11330226063728333, 0.1181914433836937, 0.12308062613010406, 0.12796981632709503, 0.132859006524086, 0.13774818181991577, 0.14263737201690674, 0.1475265622138977]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 7.0, 7.0, 15.0, 19.0, 32.0, 81.0, 158.0, 337.0, 924.0, 3796.0, 381093.0, 3802432.0, 3833.0, 863.0, 333.0, 149.0, 68.0, 41.0, 17.0, 20.0, 10.0, 13.0, 8.0, 10.0, 5.0, 2.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.049224853515625, -0.04688215255737305, -0.044539451599121094, -0.04219675064086914, -0.03985404968261719, -0.037511348724365234, -0.03516864776611328, -0.03282594680786133, -0.030483245849609375, -0.028140544891357422, -0.02579784393310547, -0.023455142974853516, -0.021112442016601562, -0.01876974105834961, -0.016427040100097656, -0.014084339141845703, -0.01174163818359375, -0.009398937225341797, -0.007056236267089844, -0.004713535308837891, -0.0023708343505859375, -2.8133392333984375e-05, 0.0023145675659179688, 0.004657268524169922, 0.006999969482421875, 0.009342670440673828, 0.011685371398925781, 0.014028072357177734, 0.016370773315429688, 0.01871347427368164, 0.021056175231933594, 0.023398876190185547, 0.0257415771484375, 0.028084278106689453, 0.030426979064941406, 0.03276968002319336, 0.03511238098144531, 0.037455081939697266, 0.03979778289794922, 0.04214048385620117, 0.044483184814453125, 0.04682588577270508, 0.04916858673095703, 0.051511287689208984, 0.05385398864746094, 0.05619668960571289, 0.058539390563964844, 0.0608820915222168, 0.06322479248046875, 0.0655674934387207, 0.06791019439697266, 0.07025289535522461, 0.07259559631347656, 0.07493829727172852, 0.07728099822998047, 0.07962369918823242, 0.08196640014648438, 0.08430910110473633, 0.08665180206298828, 0.08899450302124023, 0.09133720397949219, 0.09367990493774414, 0.0960226058959961, 0.09836530685424805, 0.1007080078125]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 6.0, 4.0, 3.0, 7.0, 11.0, 26.0, 46.0, 65.0, 127.0, 148.0, 176.0, 133.0, 103.0, 72.0, 33.0, 22.0, 10.0, 8.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.026611328125, -0.025472640991210938, -0.024333953857421875, -0.023195266723632812, -0.02205657958984375, -0.020917892456054688, -0.019779205322265625, -0.018640518188476562, -0.0175018310546875, -0.016363143920898438, -0.015224456787109375, -0.014085769653320312, -0.01294708251953125, -0.011808395385742188, -0.010669708251953125, -0.009531021118164062, -0.008392333984375, -0.0072536468505859375, -0.006114959716796875, -0.0049762725830078125, -0.00383758544921875, -0.0026988983154296875, -0.001560211181640625, -0.0004215240478515625, 0.0007171630859375, 0.0018558502197265625, 0.002994537353515625, 0.0041332244873046875, 0.00527191162109375, 0.0064105987548828125, 0.007549285888671875, 0.008687973022460938, 0.00982666015625, 0.010965347290039062, 0.012104034423828125, 0.013242721557617188, 0.01438140869140625, 0.015520095825195312, 0.016658782958984375, 0.017797470092773438, 0.0189361572265625, 0.020074844360351562, 0.021213531494140625, 0.022352218627929688, 0.02349090576171875, 0.024629592895507812, 0.025768280029296875, 0.026906967163085938, 0.028045654296875, 0.029184341430664062, 0.030323028564453125, 0.03146171569824219, 0.03260040283203125, 0.03373908996582031, 0.034877777099609375, 0.03601646423339844, 0.0371551513671875, 0.03829383850097656, 0.039432525634765625, 0.04057121276855469, 0.04170989990234375, 0.04284858703613281, 0.043987274169921875, 0.04512596130371094, 0.0462646484375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 4.0, 7.0, 6.0, 10.0, 18.0, 24.0, 39.0, 72.0, 70.0, 129.0, 184.0, 293.0, 474.0, 991.0, 2160.0, 6132.0, 27032.0, 3685427.0, 439830.0, 21856.0, 5423.0, 1947.0, 897.0, 491.0, 256.0, 170.0, 90.0, 73.0, 57.0, 34.0, 32.0, 23.0, 11.0, 10.0, 4.0, 6.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0168914794921875, -0.016362667083740234, -0.01583385467529297, -0.015305042266845703, -0.014776229858398438, -0.014247417449951172, -0.013718605041503906, -0.01318979263305664, -0.012660980224609375, -0.01213216781616211, -0.011603355407714844, -0.011074542999267578, -0.010545730590820312, -0.010016918182373047, -0.009488105773925781, -0.008959293365478516, -0.00843048095703125, -0.007901668548583984, -0.007372856140136719, -0.006844043731689453, -0.0063152313232421875, -0.005786418914794922, -0.005257606506347656, -0.004728794097900391, -0.004199981689453125, -0.0036711692810058594, -0.0031423568725585938, -0.002613544464111328, -0.0020847320556640625, -0.0015559196472167969, -0.0010271072387695312, -0.0004982948303222656, 3.0517578125e-05, 0.0005593299865722656, 0.0010881423950195312, 0.0016169548034667969, 0.0021457672119140625, 0.002674579620361328, 0.0032033920288085938, 0.0037322044372558594, 0.004261016845703125, 0.004789829254150391, 0.005318641662597656, 0.005847454071044922, 0.0063762664794921875, 0.006905078887939453, 0.007433891296386719, 0.007962703704833984, 0.00849151611328125, 0.009020328521728516, 0.009549140930175781, 0.010077953338623047, 0.010606765747070312, 0.011135578155517578, 0.011664390563964844, 0.01219320297241211, 0.012722015380859375, 0.01325082778930664, 0.013779640197753906, 0.014308452606201172, 0.014837265014648438, 0.015366077423095703, 0.01589488983154297, 0.016423702239990234, 0.0169525146484375]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 5.0, 9.0, 17.0, 27.0, 62.0, 196.0, 3414.0, 209.0, 72.0, 24.0, 11.0, 13.0, 8.0, 6.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0089569091796875, -0.008643031120300293, -0.008329153060913086, -0.008015275001525879, -0.007701396942138672, -0.007387518882751465, -0.007073640823364258, -0.006759762763977051, -0.006445884704589844, -0.006132006645202637, -0.00581812858581543, -0.005504250526428223, -0.005190372467041016, -0.004876494407653809, -0.0045626163482666016, -0.0042487382888793945, -0.0039348602294921875, -0.0036209821701049805, -0.0033071041107177734, -0.0029932260513305664, -0.0026793479919433594, -0.0023654699325561523, -0.0020515918731689453, -0.0017377138137817383, -0.0014238357543945312, -0.0011099576950073242, -0.0007960796356201172, -0.00048220157623291016, -0.00016832351684570312, 0.0001455545425415039, 0.00045943260192871094, 0.000773310661315918, 0.001087188720703125, 0.001401066780090332, 0.001714944839477539, 0.002028822898864746, 0.002342700958251953, 0.00265657901763916, 0.002970457077026367, 0.0032843351364135742, 0.0035982131958007812, 0.003912091255187988, 0.004225969314575195, 0.004539847373962402, 0.004853725433349609, 0.005167603492736816, 0.0054814815521240234, 0.0057953596115112305, 0.0061092376708984375, 0.0064231157302856445, 0.0067369937896728516, 0.007050871849060059, 0.007364749908447266, 0.007678627967834473, 0.00799250602722168, 0.008306384086608887, 0.008620262145996094, 0.0089341402053833, 0.009248018264770508, 0.009561896324157715, 0.009875774383544922, 0.010189652442932129, 0.010503530502319336, 0.010817408561706543, 0.01113128662109375]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 4.0, 9.0, 49.0, 154.0, 350.0, 286.0, 106.0, 31.0, 10.0, 3.0, 3.0, 2.0, 2.0], "bins": [-0.11561036109924316, -0.11354155838489532, -0.11147275567054749, -0.10940395295619965, -0.1073351502418518, -0.10526634752750397, -0.10319754481315613, -0.10112874209880829, -0.09905993938446045, -0.09699113667011261, -0.09492233395576477, -0.09285353124141693, -0.09078472852706909, -0.08871592581272125, -0.08664712309837341, -0.08457832038402557, -0.08250951021909714, -0.0804407075047493, -0.07837190479040146, -0.07630310207605362, -0.07423429936170578, -0.07216549664735794, -0.0700966939330101, -0.06802788376808167, -0.06595908105373383, -0.06389027833938599, -0.06182147562503815, -0.05975267291069031, -0.05768387019634247, -0.05561506748199463, -0.05354626104235649, -0.05147745832800865, -0.04940866678953171, -0.04733986407518387, -0.04527106136083603, -0.04320225864648819, -0.04113345593214035, -0.03906465321779251, -0.03699584677815437, -0.034927044063806534, -0.032858237624168396, -0.030789434909820557, -0.028720632195472717, -0.02665182761847973, -0.02458302490413189, -0.02251422218978405, -0.02044541761279106, -0.018376614898443222, -0.016307814046740532, -0.014239011332392693, -0.012170207686722279, -0.010101404041051865, -0.008032601326704025, -0.005963798612356186, -0.003894994966685772, -0.001826191321015358, 0.00024261139333248138, 0.002311414573341608, 0.004380217753350735, 0.006449020933359861, 0.008517824113368988, 0.010586626827716827, 0.012655430473387241, 0.014724234119057655, 0.016793036833405495]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 6.0, 10.0, 9.0, 10.0, 13.0, 17.0, 23.0, 22.0, 26.0, 25.0, 33.0, 31.0, 42.0, 49.0, 41.0, 63.0, 61.0, 69.0, 53.0, 56.0, 50.0, 36.0, 43.0, 49.0, 41.0, 20.0, 23.0, 18.0, 14.0, 12.0, 14.0, 11.0, 7.0, 6.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0208091139793396, -0.020249010995030403, -0.019688908010721207, -0.01912880502641201, -0.018568702042102814, -0.018008599057793617, -0.01744849607348442, -0.016888393089175224, -0.016328290104866028, -0.01576818712055683, -0.015208084136247635, -0.014647981151938438, -0.014087878167629242, -0.013527775183320045, -0.012967672199010849, -0.012407569214701653, -0.011847466230392456, -0.01128736324608326, -0.010727260261774063, -0.010167157277464867, -0.00960705429315567, -0.009046951308846474, -0.008486848324537277, -0.00792674534022808, -0.007366642355918884, -0.006806539371609688, -0.006246436387300491, -0.005686333402991295, -0.005126230418682098, -0.004566127434372902, -0.0040060244500637054, -0.003445921465754509, -0.0028858184814453125, -0.002325715497136116, -0.0017656125128269196, -0.001205509528517723, -0.0006454065442085266, -8.530355989933014e-05, 0.00047479942440986633, 0.0010349024087190628, 0.0015950053930282593, 0.0021551083773374557, 0.0027152113616466522, 0.0032753143459558487, 0.003835417330265045, 0.004395520314574242, 0.004955623298883438, 0.005515726283192635, 0.006075829267501831, 0.0066359322518110275, 0.007196035236120224, 0.0077561382204294205, 0.008316241204738617, 0.008876344189047813, 0.00943644717335701, 0.009996550157666206, 0.010556653141975403, 0.0111167561262846, 0.011676859110593796, 0.012236962094902992, 0.012797065079212189, 0.013357168063521385, 0.013917271047830582, 0.014477374032139778, 0.015037477016448975]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 6.0, 10.0, 11.0, 20.0, 21.0, 37.0, 49.0, 58.0, 106.0, 138.0, 260.0, 471.0, 846.0, 1994.0, 5828.0, 33412.0, 559371.0, 412003.0, 25217.0, 5070.0, 1743.0, 792.0, 376.0, 235.0, 143.0, 105.0, 73.0, 44.0, 31.0, 20.0, 18.0, 12.0, 5.0, 6.0, 5.0, 6.0, 6.0, 2.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.05517578125, -0.05359649658203125, -0.0520172119140625, -0.05043792724609375, -0.048858642578125, -0.04727935791015625, -0.0457000732421875, -0.04412078857421875, -0.04254150390625, -0.04096221923828125, -0.0393829345703125, -0.03780364990234375, -0.036224365234375, -0.03464508056640625, -0.0330657958984375, -0.03148651123046875, -0.0299072265625, -0.02832794189453125, -0.0267486572265625, -0.02516937255859375, -0.023590087890625, -0.02201080322265625, -0.0204315185546875, -0.01885223388671875, -0.01727294921875, -0.01569366455078125, -0.0141143798828125, -0.01253509521484375, -0.010955810546875, -0.00937652587890625, -0.0077972412109375, -0.00621795654296875, -0.004638671875, -0.00305938720703125, -0.0014801025390625, 9.918212890625e-05, 0.001678466796875, 0.00325775146484375, 0.0048370361328125, 0.00641632080078125, 0.00799560546875, 0.00957489013671875, 0.0111541748046875, 0.01273345947265625, 0.014312744140625, 0.01589202880859375, 0.0174713134765625, 0.01905059814453125, 0.0206298828125, 0.02220916748046875, 0.0237884521484375, 0.02536773681640625, 0.026947021484375, 0.02852630615234375, 0.0301055908203125, 0.03168487548828125, 0.03326416015625, 0.03484344482421875, 0.0364227294921875, 0.03800201416015625, 0.039581298828125, 0.04116058349609375, 0.0427398681640625, 0.04431915283203125, 0.0458984375]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 7.0, 2.0, 5.0, 6.0, 10.0, 33.0, 37.0, 73.0, 121.0, 153.0, 166.0, 131.0, 112.0, 69.0, 40.0, 18.0, 8.0, 7.0, 3.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0265655517578125, -0.025428056716918945, -0.02429056167602539, -0.023153066635131836, -0.02201557159423828, -0.020878076553344727, -0.019740581512451172, -0.018603086471557617, -0.017465591430664062, -0.016328096389770508, -0.015190601348876953, -0.014053106307983398, -0.012915611267089844, -0.011778116226196289, -0.010640621185302734, -0.00950312614440918, -0.008365631103515625, -0.00722813606262207, -0.006090641021728516, -0.004953145980834961, -0.0038156509399414062, -0.0026781558990478516, -0.0015406608581542969, -0.0004031658172607422, 0.0007343292236328125, 0.0018718242645263672, 0.003009319305419922, 0.0041468143463134766, 0.005284309387207031, 0.006421804428100586, 0.007559299468994141, 0.008696794509887695, 0.00983428955078125, 0.010971784591674805, 0.01210927963256836, 0.013246774673461914, 0.014384269714355469, 0.015521764755249023, 0.016659259796142578, 0.017796754837036133, 0.018934249877929688, 0.020071744918823242, 0.021209239959716797, 0.02234673500061035, 0.023484230041503906, 0.02462172508239746, 0.025759220123291016, 0.02689671516418457, 0.028034210205078125, 0.02917170524597168, 0.030309200286865234, 0.03144669532775879, 0.032584190368652344, 0.0337216854095459, 0.03485918045043945, 0.03599667549133301, 0.03713417053222656, 0.03827166557312012, 0.03940916061401367, 0.04054665565490723, 0.04168415069580078, 0.042821645736694336, 0.04395914077758789, 0.045096635818481445, 0.046234130859375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 5.0, 9.0, 7.0, 12.0, 13.0, 19.0, 30.0, 36.0, 48.0, 69.0, 103.0, 172.0, 243.0, 452.0, 799.0, 1988.0, 5871.0, 22826.0, 138172.0, 639843.0, 195465.0, 30668.0, 7209.0, 2275.0, 973.0, 464.0, 267.0, 149.0, 106.0, 81.0, 51.0, 29.0, 19.0, 17.0, 18.0, 20.0, 10.0, 8.0, 6.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0224456787109375, -0.021687030792236328, -0.020928382873535156, -0.020169734954833984, -0.019411087036132812, -0.01865243911743164, -0.01789379119873047, -0.017135143280029297, -0.016376495361328125, -0.015617847442626953, -0.014859199523925781, -0.01410055160522461, -0.013341903686523438, -0.012583255767822266, -0.011824607849121094, -0.011065959930419922, -0.01030731201171875, -0.009548664093017578, -0.008790016174316406, -0.008031368255615234, -0.0072727203369140625, -0.006514072418212891, -0.005755424499511719, -0.004996776580810547, -0.004238128662109375, -0.003479480743408203, -0.0027208328247070312, -0.0019621849060058594, -0.0012035369873046875, -0.0004448890686035156, 0.00031375885009765625, 0.0010724067687988281, 0.0018310546875, 0.002589702606201172, 0.0033483505249023438, 0.004106998443603516, 0.0048656463623046875, 0.005624294281005859, 0.006382942199707031, 0.007141590118408203, 0.007900238037109375, 0.008658885955810547, 0.009417533874511719, 0.01017618179321289, 0.010934829711914062, 0.011693477630615234, 0.012452125549316406, 0.013210773468017578, 0.01396942138671875, 0.014728069305419922, 0.015486717224121094, 0.016245365142822266, 0.017004013061523438, 0.01776266098022461, 0.01852130889892578, 0.019279956817626953, 0.020038604736328125, 0.020797252655029297, 0.02155590057373047, 0.02231454849243164, 0.023073196411132812, 0.023831844329833984, 0.024590492248535156, 0.025349140167236328, 0.0261077880859375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 2.0, 6.0, 5.0, 10.0, 7.0, 8.0, 11.0, 19.0, 14.0, 13.0, 15.0, 28.0, 32.0, 26.0, 37.0, 38.0, 42.0, 39.0, 47.0, 43.0, 37.0, 31.0, 30.0, 43.0, 51.0, 36.0, 36.0, 39.0, 34.0, 19.0, 24.0, 31.0, 21.0, 22.0, 24.0, 17.0, 14.0, 15.0, 6.0, 8.0, 6.0, 3.0, 4.0, 8.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.03594970703125, -0.03479290008544922, -0.03363609313964844, -0.032479286193847656, -0.031322479248046875, -0.030165672302246094, -0.029008865356445312, -0.02785205841064453, -0.02669525146484375, -0.02553844451904297, -0.024381637573242188, -0.023224830627441406, -0.022068023681640625, -0.020911216735839844, -0.019754409790039062, -0.01859760284423828, -0.0174407958984375, -0.01628398895263672, -0.015127182006835938, -0.013970375061035156, -0.012813568115234375, -0.011656761169433594, -0.010499954223632812, -0.009343147277832031, -0.00818634033203125, -0.007029533386230469, -0.0058727264404296875, -0.004715919494628906, -0.003559112548828125, -0.0024023056030273438, -0.0012454986572265625, -8.869171142578125e-05, 0.001068115234375, 0.0022249221801757812, 0.0033817291259765625, 0.004538536071777344, 0.005695343017578125, 0.006852149963378906, 0.008008956909179688, 0.009165763854980469, 0.01032257080078125, 0.011479377746582031, 0.012636184692382812, 0.013792991638183594, 0.014949798583984375, 0.016106605529785156, 0.017263412475585938, 0.01842021942138672, 0.0195770263671875, 0.02073383331298828, 0.021890640258789062, 0.023047447204589844, 0.024204254150390625, 0.025361061096191406, 0.026517868041992188, 0.02767467498779297, 0.02883148193359375, 0.02998828887939453, 0.031145095825195312, 0.032301902770996094, 0.033458709716796875, 0.034615516662597656, 0.03577232360839844, 0.03692913055419922, 0.0380859375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 6.0, 5.0, 3.0, 8.0, 3.0, 11.0, 21.0, 27.0, 60.0, 87.0, 200.0, 466.0, 1271.0, 5816.0, 95120.0, 886781.0, 52522.0, 4233.0, 1071.0, 409.0, 182.0, 114.0, 53.0, 25.0, 19.0, 19.0, 7.0, 8.0, 5.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00762176513671875, -0.007333278656005859, -0.007044792175292969, -0.006756305694580078, -0.0064678192138671875, -0.006179332733154297, -0.005890846252441406, -0.005602359771728516, -0.005313873291015625, -0.005025386810302734, -0.004736900329589844, -0.004448413848876953, -0.0041599273681640625, -0.003871440887451172, -0.0035829544067382812, -0.0032944679260253906, -0.0030059814453125, -0.0027174949645996094, -0.0024290084838867188, -0.002140522003173828, -0.0018520355224609375, -0.0015635490417480469, -0.0012750625610351562, -0.0009865760803222656, -0.000698089599609375, -0.0004096031188964844, -0.00012111663818359375, 0.00016736984252929688, 0.0004558563232421875, 0.0007443428039550781, 0.0010328292846679688, 0.0013213157653808594, 0.00160980224609375, 0.0018982887268066406, 0.0021867752075195312, 0.002475261688232422, 0.0027637481689453125, 0.003052234649658203, 0.0033407211303710938, 0.0036292076110839844, 0.003917694091796875, 0.004206180572509766, 0.004494667053222656, 0.004783153533935547, 0.0050716400146484375, 0.005360126495361328, 0.005648612976074219, 0.005937099456787109, 0.0062255859375, 0.006514072418212891, 0.006802558898925781, 0.007091045379638672, 0.0073795318603515625, 0.007668018341064453, 0.007956504821777344, 0.008244991302490234, 0.008533477783203125, 0.008821964263916016, 0.009110450744628906, 0.009398937225341797, 0.009687423706054688, 0.009975910186767578, 0.010264396667480469, 0.01055288314819336, 0.01084136962890625]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 8.0, 7.0, 21.0, 28.0, 22.0, 31.0, 51.0, 65.0, 70.0, 84.0, 86.0, 103.0, 75.0, 74.0, 73.0, 67.0, 41.0, 26.0, 16.0, 23.0, 16.0, 4.0, 6.0, 7.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.3248215913772583e-06, -3.1329691410064697e-06, -2.941116690635681e-06, -2.7492642402648926e-06, -2.557411789894104e-06, -2.3655593395233154e-06, -2.173706889152527e-06, -1.9818544387817383e-06, -1.7900019884109497e-06, -1.5981495380401611e-06, -1.4062970876693726e-06, -1.214444637298584e-06, -1.0225921869277954e-06, -8.307397365570068e-07, -6.388872861862183e-07, -4.470348358154297e-07, -2.551823854446411e-07, -6.332993507385254e-08, 1.2852251529693604e-07, 3.203749656677246e-07, 5.122274160385132e-07, 7.040798664093018e-07, 8.959323167800903e-07, 1.087784767150879e-06, 1.2796372175216675e-06, 1.471489667892456e-06, 1.6633421182632446e-06, 1.8551945686340332e-06, 2.0470470190048218e-06, 2.2388994693756104e-06, 2.430751919746399e-06, 2.6226043701171875e-06, 2.814456820487976e-06, 3.0063092708587646e-06, 3.1981617212295532e-06, 3.390014171600342e-06, 3.5818666219711304e-06, 3.773719072341919e-06, 3.9655715227127075e-06, 4.157423973083496e-06, 4.349276423454285e-06, 4.541128873825073e-06, 4.732981324195862e-06, 4.92483377456665e-06, 5.116686224937439e-06, 5.3085386753082275e-06, 5.500391125679016e-06, 5.692243576049805e-06, 5.884096026420593e-06, 6.075948476791382e-06, 6.26780092716217e-06, 6.459653377532959e-06, 6.6515058279037476e-06, 6.843358278274536e-06, 7.035210728645325e-06, 7.227063179016113e-06, 7.418915629386902e-06, 7.6107680797576904e-06, 7.802620530128479e-06, 7.994472980499268e-06, 8.186325430870056e-06, 8.378177881240845e-06, 8.570030331611633e-06, 8.761882781982422e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 11.0, 7.0, 9.0, 11.0, 21.0, 26.0, 67.0, 103.0, 158.0, 353.0, 907.0, 3103.0, 17239.0, 335808.0, 652300.0, 31908.0, 4280.0, 1287.0, 458.0, 210.0, 114.0, 64.0, 32.0, 28.0, 11.0, 9.0, 12.0, 6.0, 6.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005886077880859375, -0.005642116069793701, -0.005398154258728027, -0.0051541924476623535, -0.00491023063659668, -0.004666268825531006, -0.004422307014465332, -0.004178345203399658, -0.003934383392333984, -0.0036904215812683105, -0.0034464597702026367, -0.003202497959136963, -0.002958536148071289, -0.0027145743370056152, -0.0024706125259399414, -0.0022266507148742676, -0.0019826889038085938, -0.00173872709274292, -0.001494765281677246, -0.0012508034706115723, -0.0010068416595458984, -0.0007628798484802246, -0.0005189180374145508, -0.00027495622634887695, -3.0994415283203125e-05, 0.0002129673957824707, 0.00045692920684814453, 0.0007008910179138184, 0.0009448528289794922, 0.001188814640045166, 0.0014327764511108398, 0.0016767382621765137, 0.0019207000732421875, 0.0021646618843078613, 0.002408623695373535, 0.002652585506439209, 0.002896547317504883, 0.0031405091285705566, 0.0033844709396362305, 0.0036284327507019043, 0.003872394561767578, 0.004116356372833252, 0.004360318183898926, 0.0046042799949646, 0.0048482418060302734, 0.005092203617095947, 0.005336165428161621, 0.005580127239227295, 0.005824089050292969, 0.006068050861358643, 0.006312012672424316, 0.00655597448348999, 0.006799936294555664, 0.007043898105621338, 0.007287859916687012, 0.0075318217277526855, 0.007775783538818359, 0.008019745349884033, 0.008263707160949707, 0.00850766897201538, 0.008751630783081055, 0.008995592594146729, 0.009239554405212402, 0.009483516216278076, 0.00972747802734375]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 6.0, 9.0, 19.0, 13.0, 28.0, 36.0, 54.0, 62.0, 79.0, 89.0, 89.0, 106.0, 92.0, 73.0, 70.0, 60.0, 32.0, 20.0, 23.0, 14.0, 8.0, 4.0, 3.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00882720947265625, -0.008559942245483398, -0.008292675018310547, -0.008025407791137695, -0.007758140563964844, -0.007490873336791992, -0.007223606109619141, -0.006956338882446289, -0.0066890716552734375, -0.006421804428100586, -0.006154537200927734, -0.005887269973754883, -0.005620002746582031, -0.00535273551940918, -0.005085468292236328, -0.0048182010650634766, -0.004550933837890625, -0.0042836666107177734, -0.004016399383544922, -0.0037491321563720703, -0.0034818649291992188, -0.003214597702026367, -0.0029473304748535156, -0.002680063247680664, -0.0024127960205078125, -0.002145528793334961, -0.0018782615661621094, -0.0016109943389892578, -0.0013437271118164062, -0.0010764598846435547, -0.0008091926574707031, -0.0005419254302978516, -0.000274658203125, -7.3909759521484375e-06, 0.0002598762512207031, 0.0005271434783935547, 0.0007944107055664062, 0.0010616779327392578, 0.0013289451599121094, 0.001596212387084961, 0.0018634796142578125, 0.002130746841430664, 0.0023980140686035156, 0.002665281295776367, 0.0029325485229492188, 0.0031998157501220703, 0.003467082977294922, 0.0037343502044677734, 0.004001617431640625, 0.0042688846588134766, 0.004536151885986328, 0.00480341911315918, 0.005070686340332031, 0.005337953567504883, 0.005605220794677734, 0.005872488021850586, 0.0061397552490234375, 0.006407022476196289, 0.006674289703369141, 0.006941556930541992, 0.007208824157714844, 0.007476091384887695, 0.007743358612060547, 0.008010625839233398, 0.00827789306640625]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 3.0, 5.0, 13.0, 23.0, 64.0, 122.0, 237.0, 265.0, 150.0, 71.0, 28.0, 13.0, 5.0, 3.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1584058701992035, -0.15047907829284668, -0.14255227148532867, -0.13462547957897186, -0.12669867277145386, -0.11877188086509705, -0.11084508895874023, -0.10291828960180283, -0.09499149024486542, -0.08706469088792801, -0.0791378915309906, -0.07121109962463379, -0.06328430026769638, -0.05535750091075897, -0.04743070527911186, -0.03950390964746475, -0.031577110290527344, -0.023650312796235085, -0.015723515301942825, -0.007796717807650566, 0.00013007968664169312, 0.008056879043579102, 0.01598367467522621, 0.02391047030687332, 0.03183726966381073, 0.03976406902074814, 0.04769086465239525, 0.05561766028404236, 0.06354445964097977, 0.07147125899791718, 0.07939805090427399, 0.0873248502612114, 0.0952516496181488, 0.10317844897508621, 0.11110524833202362, 0.11903204023838043, 0.12695884704589844, 0.13488563895225525, 0.14281243085861206, 0.15073922276496887, 0.15866602957248688, 0.1665928214788437, 0.1745196282863617, 0.1824464201927185, 0.19037321209907532, 0.19830001890659332, 0.20622681081295013, 0.21415361762046814, 0.22208040952682495, 0.23000720143318176, 0.23793400824069977, 0.24586080014705658, 0.2537876069545746, 0.2617143988609314, 0.2696411907672882, 0.277567982673645, 0.2854948043823242, 0.29342159628868103, 0.30134838819503784, 0.30927520990371704, 0.31720200181007385, 0.32512879371643066, 0.3330555856227875, 0.3409823775291443, 0.3489091694355011]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 7.0, 8.0, 8.0, 11.0, 8.0, 12.0, 14.0, 18.0, 26.0, 25.0, 33.0, 36.0, 41.0, 46.0, 45.0, 57.0, 44.0, 65.0, 54.0, 64.0, 43.0, 39.0, 32.0, 42.0, 46.0, 29.0, 22.0, 23.0, 23.0, 21.0, 9.0, 14.0, 5.0, 10.0, 7.0, 2.0, 4.0, 4.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.19358724355697632, -0.18754632771015167, -0.18150541186332703, -0.17546449601650238, -0.16942358016967773, -0.16338267922401428, -0.15734176337718964, -0.151300847530365, -0.14525993168354034, -0.1392190158367157, -0.13317809998989105, -0.1271371841430664, -0.12109627574682236, -0.11505535989999771, -0.10901445150375366, -0.10297353565692902, -0.09693261981010437, -0.09089170396327972, -0.08485078811645508, -0.07880987972021103, -0.07276896387338638, -0.06672804802656174, -0.06068713590502739, -0.05464622378349304, -0.048605307936668396, -0.04256439208984375, -0.0365234799683094, -0.030482565984129906, -0.02444165199995041, -0.018400738015770912, -0.012359824031591415, -0.006318911910057068, -0.0002779960632324219, 0.005762917920947075, 0.011803831905126572, 0.01784474588930607, 0.023885659873485565, 0.029926573857665062, 0.03596748784184456, 0.042008399963378906, 0.04804931581020355, 0.0540902316570282, 0.060131143778562546, 0.0661720559000969, 0.07221297174692154, 0.07825388759374619, 0.08429479598999023, 0.09033571183681488, 0.09637662768363953, 0.10241754353046417, 0.10845845937728882, 0.11449936777353287, 0.12054028362035751, 0.12658119201660156, 0.1326221078634262, 0.13866302371025085, 0.1447039395570755, 0.15074485540390015, 0.1567857712507248, 0.16282668709754944, 0.1688675880432129, 0.17490850389003754, 0.18094941973686218, 0.18699033558368683, 0.19303125143051147]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 1.0, 5.0, 19.0, 33.0, 39.0, 58.0, 110.0, 201.0, 422.0, 1164.0, 4342.0, 29241.0, 3771944.0, 371264.0, 11701.0, 2146.0, 676.0, 324.0, 214.0, 119.0, 80.0, 46.0, 28.0, 26.0, 23.0, 12.0, 12.0, 11.0, 7.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02886962890625, -0.02754688262939453, -0.026224136352539062, -0.024901390075683594, -0.023578643798828125, -0.022255897521972656, -0.020933151245117188, -0.01961040496826172, -0.01828765869140625, -0.01696491241455078, -0.015642166137695312, -0.014319419860839844, -0.012996673583984375, -0.011673927307128906, -0.010351181030273438, -0.009028434753417969, -0.0077056884765625, -0.006382942199707031, -0.0050601959228515625, -0.0037374496459960938, -0.002414703369140625, -0.0010919570922851562, 0.0002307891845703125, 0.0015535354614257812, 0.00287628173828125, 0.004199028015136719, 0.0055217742919921875, 0.006844520568847656, 0.008167266845703125, 0.009490013122558594, 0.010812759399414062, 0.012135505676269531, 0.013458251953125, 0.014780998229980469, 0.016103744506835938, 0.017426490783691406, 0.018749237060546875, 0.020071983337402344, 0.021394729614257812, 0.02271747589111328, 0.02404022216796875, 0.02536296844482422, 0.026685714721679688, 0.028008460998535156, 0.029331207275390625, 0.030653953552246094, 0.03197669982910156, 0.03329944610595703, 0.0346221923828125, 0.03594493865966797, 0.03726768493652344, 0.038590431213378906, 0.039913177490234375, 0.041235923767089844, 0.04255867004394531, 0.04388141632080078, 0.04520416259765625, 0.04652690887451172, 0.04784965515136719, 0.049172401428222656, 0.050495147705078125, 0.051817893981933594, 0.05314064025878906, 0.05446338653564453, 0.0557861328125]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 3.0, 2.0, 12.0, 16.0, 43.0, 84.0, 124.0, 146.0, 172.0, 146.0, 116.0, 62.0, 36.0, 21.0, 5.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0263824462890625, -0.02525496482849121, -0.024127483367919922, -0.023000001907348633, -0.021872520446777344, -0.020745038986206055, -0.019617557525634766, -0.018490076065063477, -0.017362594604492188, -0.0162351131439209, -0.01510763168334961, -0.01398015022277832, -0.012852668762207031, -0.011725187301635742, -0.010597705841064453, -0.009470224380493164, -0.008342742919921875, -0.007215261459350586, -0.006087779998779297, -0.004960298538208008, -0.0038328170776367188, -0.0027053356170654297, -0.0015778541564941406, -0.00045037269592285156, 0.0006771087646484375, 0.0018045902252197266, 0.0029320716857910156, 0.004059553146362305, 0.005187034606933594, 0.006314516067504883, 0.007441997528076172, 0.008569478988647461, 0.00969696044921875, 0.010824441909790039, 0.011951923370361328, 0.013079404830932617, 0.014206886291503906, 0.015334367752075195, 0.016461849212646484, 0.017589330673217773, 0.018716812133789062, 0.01984429359436035, 0.02097177505493164, 0.02209925651550293, 0.02322673797607422, 0.024354219436645508, 0.025481700897216797, 0.026609182357788086, 0.027736663818359375, 0.028864145278930664, 0.029991626739501953, 0.031119108200073242, 0.03224658966064453, 0.03337407112121582, 0.03450155258178711, 0.0356290340423584, 0.03675651550292969, 0.03788399696350098, 0.039011478424072266, 0.040138959884643555, 0.041266441345214844, 0.04239392280578613, 0.04352140426635742, 0.04464888572692871, 0.0457763671875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 16.0, 14.0, 36.0, 116.0, 297.0, 1309.0, 250494.0, 3939557.0, 1928.0, 325.0, 116.0, 40.0, 19.0, 11.0, 5.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08526611328125, -0.08172321319580078, -0.07818031311035156, -0.07463741302490234, -0.07109451293945312, -0.0675516128540039, -0.06400871276855469, -0.06046581268310547, -0.05692291259765625, -0.05338001251220703, -0.04983711242675781, -0.046294212341308594, -0.042751312255859375, -0.039208412170410156, -0.03566551208496094, -0.03212261199951172, -0.0285797119140625, -0.02503681182861328, -0.021493911743164062, -0.017951011657714844, -0.014408111572265625, -0.010865211486816406, -0.0073223114013671875, -0.0037794113159179688, -0.00023651123046875, 0.0033063888549804688, 0.0068492889404296875, 0.010392189025878906, 0.013935089111328125, 0.017477989196777344, 0.021020889282226562, 0.02456378936767578, 0.028106689453125, 0.03164958953857422, 0.03519248962402344, 0.038735389709472656, 0.042278289794921875, 0.045821189880371094, 0.04936408996582031, 0.05290699005126953, 0.05644989013671875, 0.05999279022216797, 0.06353569030761719, 0.0670785903930664, 0.07062149047851562, 0.07416439056396484, 0.07770729064941406, 0.08125019073486328, 0.0847930908203125, 0.08833599090576172, 0.09187889099121094, 0.09542179107666016, 0.09896469116210938, 0.1025075912475586, 0.10605049133300781, 0.10959339141845703, 0.11313629150390625, 0.11667919158935547, 0.12022209167480469, 0.1237649917602539, 0.12730789184570312, 0.13085079193115234, 0.13439369201660156, 0.13793659210205078, 0.1414794921875]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 5.0, 5.0, 15.0, 13.0, 41.0, 60.0, 526.0, 3013.0, 291.0, 58.0, 21.0, 10.0, 8.0, 6.0, 4.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07281494140625, -0.07075738906860352, -0.06869983673095703, -0.06664228439331055, -0.06458473205566406, -0.06252717971801758, -0.060469627380371094, -0.05841207504272461, -0.056354522705078125, -0.05429697036743164, -0.052239418029785156, -0.05018186569213867, -0.04812431335449219, -0.0460667610168457, -0.04400920867919922, -0.041951656341552734, -0.03989410400390625, -0.037836551666259766, -0.03577899932861328, -0.0337214469909668, -0.03166389465332031, -0.029606342315673828, -0.027548789978027344, -0.02549123764038086, -0.023433685302734375, -0.02137613296508789, -0.019318580627441406, -0.017261028289794922, -0.015203475952148438, -0.013145923614501953, -0.011088371276855469, -0.009030818939208984, -0.0069732666015625, -0.004915714263916016, -0.0028581619262695312, -0.0008006095886230469, 0.0012569427490234375, 0.003314495086669922, 0.005372047424316406, 0.007429599761962891, 0.009487152099609375, 0.01154470443725586, 0.013602256774902344, 0.015659809112548828, 0.017717361450195312, 0.019774913787841797, 0.02183246612548828, 0.023890018463134766, 0.02594757080078125, 0.028005123138427734, 0.03006267547607422, 0.0321202278137207, 0.03417778015136719, 0.03623533248901367, 0.038292884826660156, 0.04035043716430664, 0.042407989501953125, 0.04446554183959961, 0.046523094177246094, 0.04858064651489258, 0.05063819885253906, 0.05269575119018555, 0.05475330352783203, 0.056810855865478516, 0.058868408203125]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 10.0, 21.0, 113.0, 430.0, 307.0, 88.0, 14.0, 7.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.47550541162490845, -0.46440380811691284, -0.4533022344112396, -0.442200630903244, -0.4310990571975708, -0.4199974536895752, -0.4088958501815796, -0.397794246673584, -0.38669267296791077, -0.37559106945991516, -0.36448949575424194, -0.35338789224624634, -0.34228628873825073, -0.3311847150325775, -0.3200831115245819, -0.3089815378189087, -0.2978799343109131, -0.2867783308029175, -0.27567675709724426, -0.26457515358924866, -0.25347357988357544, -0.24237197637557983, -0.23127037286758423, -0.22016878426074982, -0.2090671956539154, -0.197965607047081, -0.18686401844024658, -0.17576241493225098, -0.16466082632541656, -0.15355923771858215, -0.14245763421058655, -0.13135604560375214, -0.12025448679924011, -0.1091528981924057, -0.09805130213499069, -0.08694970607757568, -0.07584811747074127, -0.06474652886390686, -0.05364493280649185, -0.04254333674907684, -0.03144174814224243, -0.02034015581011772, -0.009238563477993011, 0.0018630288541316986, 0.012964621186256409, 0.02406621351838112, 0.03516780585050583, 0.04626940190792084, 0.05737099051475525, 0.06847257912158966, 0.07957417517900467, 0.09067577123641968, 0.10177735984325409, 0.1128789484500885, 0.12398054450750351, 0.13508214056491852, 0.14618372917175293, 0.15728531777858734, 0.16838690638542175, 0.17948850989341736, 0.19059009850025177, 0.20169168710708618, 0.2127932906150818, 0.2238948792219162, 0.2349964678287506]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 8.0, 8.0, 14.0, 14.0, 14.0, 22.0, 25.0, 26.0, 37.0, 52.0, 54.0, 46.0, 64.0, 62.0, 70.0, 57.0, 55.0, 56.0, 50.0, 53.0, 36.0, 34.0, 30.0, 25.0, 20.0, 16.0, 8.0, 14.0, 9.0, 4.0, 6.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.11361783742904663, -0.10981645435094833, -0.10601507127285004, -0.10221368819475174, -0.09841230511665344, -0.09461091458797455, -0.09080953150987625, -0.08700814843177795, -0.08320676535367966, -0.07940538227558136, -0.07560399919748306, -0.07180261611938477, -0.06800122559070587, -0.06419984996318817, -0.06039845943450928, -0.05659707635641098, -0.05279569327831268, -0.048994310200214386, -0.04519292712211609, -0.04139154031872749, -0.037590157240629196, -0.0337887741625309, -0.029987389221787453, -0.026186004281044006, -0.02238462120294571, -0.018583238124847412, -0.014781853184103966, -0.010980469174683094, -0.007179085165262222, -0.003377702087163925, 0.0004236828535795212, 0.0042250677943229675, 0.008026450872421265, 0.011827834881842136, 0.015629218891263008, 0.019430603832006454, 0.02323198691010475, 0.02703336998820305, 0.030834754928946495, 0.03463613986968994, 0.03843752294778824, 0.042238906025886536, 0.04604028910398483, 0.04984167590737343, 0.053643058985471725, 0.05744444206357002, 0.06124582886695862, 0.06504721194505692, 0.06884859502315521, 0.07264997810125351, 0.0764513611793518, 0.0802527442574501, 0.0840541273355484, 0.0878555178642273, 0.09165690094232559, 0.09545828402042389, 0.09925966709852219, 0.10306105017662048, 0.10686243325471878, 0.11066381633281708, 0.11446520686149597, 0.11826658248901367, 0.12206797301769257, 0.12586936354637146, 0.12967073917388916]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 2.0, 3.0, 4.0, 8.0, 6.0, 11.0, 14.0, 8.0, 18.0, 27.0, 37.0, 38.0, 70.0, 113.0, 155.0, 234.0, 370.0, 636.0, 1155.0, 2403.0, 6773.0, 36290.0, 390781.0, 542653.0, 52066.0, 8652.0, 2791.0, 1293.0, 704.0, 406.0, 285.0, 161.0, 118.0, 74.0, 60.0, 36.0, 27.0, 28.0, 16.0, 13.0, 2.0, 9.0, 4.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0406494140625, -0.039456844329833984, -0.03826427459716797, -0.03707170486450195, -0.03587913513183594, -0.03468656539916992, -0.033493995666503906, -0.03230142593383789, -0.031108856201171875, -0.02991628646850586, -0.028723716735839844, -0.027531147003173828, -0.026338577270507812, -0.025146007537841797, -0.02395343780517578, -0.022760868072509766, -0.02156829833984375, -0.020375728607177734, -0.01918315887451172, -0.017990589141845703, -0.016798019409179688, -0.015605449676513672, -0.014412879943847656, -0.01322031021118164, -0.012027740478515625, -0.01083517074584961, -0.009642601013183594, -0.008450031280517578, -0.0072574615478515625, -0.006064891815185547, -0.004872322082519531, -0.0036797523498535156, -0.0024871826171875, -0.0012946128845214844, -0.00010204315185546875, 0.0010905265808105469, 0.0022830963134765625, 0.003475666046142578, 0.004668235778808594, 0.005860805511474609, 0.007053375244140625, 0.00824594497680664, 0.009438514709472656, 0.010631084442138672, 0.011823654174804688, 0.013016223907470703, 0.014208793640136719, 0.015401363372802734, 0.01659393310546875, 0.017786502838134766, 0.01897907257080078, 0.020171642303466797, 0.021364212036132812, 0.022556781768798828, 0.023749351501464844, 0.02494192123413086, 0.026134490966796875, 0.02732706069946289, 0.028519630432128906, 0.029712200164794922, 0.030904769897460938, 0.03209733963012695, 0.03328990936279297, 0.034482479095458984, 0.035675048828125]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 4.0, 2.0, 4.0, 13.0, 26.0, 62.0, 112.0, 143.0, 160.0, 167.0, 122.0, 82.0, 49.0, 27.0, 14.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02728271484375, -0.026140213012695312, -0.024997711181640625, -0.023855209350585938, -0.02271270751953125, -0.021570205688476562, -0.020427703857421875, -0.019285202026367188, -0.0181427001953125, -0.017000198364257812, -0.015857696533203125, -0.014715194702148438, -0.01357269287109375, -0.012430191040039062, -0.011287689208984375, -0.010145187377929688, -0.009002685546875, -0.007860183715820312, -0.006717681884765625, -0.0055751800537109375, -0.00443267822265625, -0.0032901763916015625, -0.002147674560546875, -0.0010051727294921875, 0.0001373291015625, 0.0012798309326171875, 0.002422332763671875, 0.0035648345947265625, 0.00470733642578125, 0.0058498382568359375, 0.006992340087890625, 0.008134841918945312, 0.00927734375, 0.010419845581054688, 0.011562347412109375, 0.012704849243164062, 0.01384735107421875, 0.014989852905273438, 0.016132354736328125, 0.017274856567382812, 0.0184173583984375, 0.019559860229492188, 0.020702362060546875, 0.021844863891601562, 0.02298736572265625, 0.024129867553710938, 0.025272369384765625, 0.026414871215820312, 0.027557373046875, 0.028699874877929688, 0.029842376708984375, 0.030984878540039062, 0.03212738037109375, 0.03326988220214844, 0.034412384033203125, 0.03555488586425781, 0.0366973876953125, 0.03783988952636719, 0.038982391357421875, 0.04012489318847656, 0.04126739501953125, 0.04240989685058594, 0.043552398681640625, 0.04469490051269531, 0.04583740234375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 3.0, 1.0, 9.0, 6.0, 3.0, 7.0, 12.0, 11.0, 18.0, 27.0, 36.0, 31.0, 80.0, 139.0, 159.0, 247.0, 382.0, 832.0, 1888.0, 5390.0, 22324.0, 148727.0, 698538.0, 139639.0, 20976.0, 5274.0, 1874.0, 809.0, 415.0, 246.0, 154.0, 83.0, 71.0, 42.0, 30.0, 19.0, 23.0, 9.0, 10.0, 4.0, 5.0, 4.0, 2.0, 0.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0295867919921875, -0.028753042221069336, -0.027919292449951172, -0.027085542678833008, -0.026251792907714844, -0.02541804313659668, -0.024584293365478516, -0.02375054359436035, -0.022916793823242188, -0.022083044052124023, -0.02124929428100586, -0.020415544509887695, -0.01958179473876953, -0.018748044967651367, -0.017914295196533203, -0.01708054542541504, -0.016246795654296875, -0.015413045883178711, -0.014579296112060547, -0.013745546340942383, -0.012911796569824219, -0.012078046798706055, -0.01124429702758789, -0.010410547256469727, -0.009576797485351562, -0.008743047714233398, -0.007909297943115234, -0.00707554817199707, -0.006241798400878906, -0.005408048629760742, -0.004574298858642578, -0.003740549087524414, -0.00290679931640625, -0.002073049545288086, -0.0012392997741699219, -0.0004055500030517578, 0.00042819976806640625, 0.0012619495391845703, 0.0020956993103027344, 0.0029294490814208984, 0.0037631988525390625, 0.0045969486236572266, 0.005430698394775391, 0.006264448165893555, 0.007098197937011719, 0.007931947708129883, 0.008765697479248047, 0.009599447250366211, 0.010433197021484375, 0.011266946792602539, 0.012100696563720703, 0.012934446334838867, 0.013768196105957031, 0.014601945877075195, 0.01543569564819336, 0.016269445419311523, 0.017103195190429688, 0.01793694496154785, 0.018770694732666016, 0.01960444450378418, 0.020438194274902344, 0.021271944046020508, 0.022105693817138672, 0.022939443588256836, 0.023773193359375]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 3.0, 6.0, 5.0, 4.0, 5.0, 18.0, 11.0, 14.0, 22.0, 16.0, 17.0, 28.0, 26.0, 19.0, 18.0, 47.0, 38.0, 34.0, 30.0, 45.0, 38.0, 50.0, 28.0, 40.0, 45.0, 48.0, 37.0, 39.0, 43.0, 26.0, 30.0, 25.0, 23.0, 25.0, 20.0, 20.0, 11.0, 8.0, 13.0, 6.0, 4.0, 1.0, 5.0, 8.0, 4.0, 5.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.038238525390625, -0.037105560302734375, -0.03597259521484375, -0.034839630126953125, -0.0337066650390625, -0.032573699951171875, -0.03144073486328125, -0.030307769775390625, -0.0291748046875, -0.028041839599609375, -0.02690887451171875, -0.025775909423828125, -0.0246429443359375, -0.023509979248046875, -0.02237701416015625, -0.021244049072265625, -0.020111083984375, -0.018978118896484375, -0.01784515380859375, -0.016712188720703125, -0.0155792236328125, -0.014446258544921875, -0.01331329345703125, -0.012180328369140625, -0.01104736328125, -0.009914398193359375, -0.00878143310546875, -0.007648468017578125, -0.0065155029296875, -0.005382537841796875, -0.00424957275390625, -0.003116607666015625, -0.001983642578125, -0.000850677490234375, 0.00028228759765625, 0.001415252685546875, 0.0025482177734375, 0.003681182861328125, 0.00481414794921875, 0.005947113037109375, 0.007080078125, 0.008213043212890625, 0.00934600830078125, 0.010478973388671875, 0.0116119384765625, 0.012744903564453125, 0.01387786865234375, 0.015010833740234375, 0.016143798828125, 0.017276763916015625, 0.01840972900390625, 0.019542694091796875, 0.0206756591796875, 0.021808624267578125, 0.02294158935546875, 0.024074554443359375, 0.02520751953125, 0.026340484619140625, 0.02747344970703125, 0.028606414794921875, 0.0297393798828125, 0.030872344970703125, 0.03200531005859375, 0.033138275146484375, 0.034271240234375]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 7.0, 1.0, 7.0, 6.0, 8.0, 10.0, 21.0, 36.0, 49.0, 72.0, 121.0, 228.0, 497.0, 1269.0, 5336.0, 48062.0, 838307.0, 141424.0, 9753.0, 2045.0, 635.0, 287.0, 149.0, 78.0, 45.0, 30.0, 28.0, 12.0, 16.0, 10.0, 5.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007396697998046875, -0.007108509540557861, -0.006820321083068848, -0.006532132625579834, -0.00624394416809082, -0.005955755710601807, -0.005667567253112793, -0.005379378795623779, -0.005091190338134766, -0.004803001880645752, -0.004514813423156738, -0.004226624965667725, -0.003938436508178711, -0.0036502480506896973, -0.0033620595932006836, -0.00307387113571167, -0.0027856826782226562, -0.0024974942207336426, -0.002209305763244629, -0.0019211173057556152, -0.0016329288482666016, -0.0013447403907775879, -0.0010565519332885742, -0.0007683634757995605, -0.0004801750183105469, -0.0001919865608215332, 9.620189666748047e-05, 0.00038439035415649414, 0.0006725788116455078, 0.0009607672691345215, 0.0012489557266235352, 0.0015371441841125488, 0.0018253326416015625, 0.002113521099090576, 0.00240170955657959, 0.0026898980140686035, 0.002978086471557617, 0.003266274929046631, 0.0035544633865356445, 0.003842651844024658, 0.004130840301513672, 0.0044190287590026855, 0.004707217216491699, 0.004995405673980713, 0.0052835941314697266, 0.00557178258895874, 0.005859971046447754, 0.006148159503936768, 0.006436347961425781, 0.006724536418914795, 0.007012724876403809, 0.007300913333892822, 0.007589101791381836, 0.00787729024887085, 0.008165478706359863, 0.008453667163848877, 0.00874185562133789, 0.009030044078826904, 0.009318232536315918, 0.009606420993804932, 0.009894609451293945, 0.010182797908782959, 0.010470986366271973, 0.010759174823760986, 0.01104736328125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 7.0, 1.0, 9.0, 12.0, 6.0, 14.0, 17.0, 18.0, 32.0, 23.0, 37.0, 30.0, 55.0, 56.0, 57.0, 60.0, 39.0, 61.0, 57.0, 68.0, 55.0, 26.0, 59.0, 40.0, 38.0, 24.0, 23.0, 20.0, 12.0, 15.0, 7.0, 8.0, 7.0, 4.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2186508178710938e-06, -3.1013041734695435e-06, -2.983957529067993e-06, -2.866610884666443e-06, -2.7492642402648926e-06, -2.6319175958633423e-06, -2.514570951461792e-06, -2.3972243070602417e-06, -2.2798776626586914e-06, -2.162531018257141e-06, -2.045184373855591e-06, -1.9278377294540405e-06, -1.8104910850524902e-06, -1.69314444065094e-06, -1.5757977962493896e-06, -1.4584511518478394e-06, -1.341104507446289e-06, -1.2237578630447388e-06, -1.1064112186431885e-06, -9.890645742416382e-07, -8.717179298400879e-07, -7.543712854385376e-07, -6.370246410369873e-07, -5.19677996635437e-07, -4.023313522338867e-07, -2.849847078323364e-07, -1.6763806343078613e-07, -5.029141902923584e-08, 6.705522537231445e-08, 1.8440186977386475e-07, 3.0174851417541504e-07, 4.1909515857696533e-07, 5.364418029785156e-07, 6.537884473800659e-07, 7.711350917816162e-07, 8.884817361831665e-07, 1.0058283805847168e-06, 1.123175024986267e-06, 1.2405216693878174e-06, 1.3578683137893677e-06, 1.475214958190918e-06, 1.5925616025924683e-06, 1.7099082469940186e-06, 1.8272548913955688e-06, 1.944601535797119e-06, 2.0619481801986694e-06, 2.1792948246002197e-06, 2.29664146900177e-06, 2.4139881134033203e-06, 2.5313347578048706e-06, 2.648681402206421e-06, 2.766028046607971e-06, 2.8833746910095215e-06, 3.0007213354110718e-06, 3.118067979812622e-06, 3.2354146242141724e-06, 3.3527612686157227e-06, 3.470107913017273e-06, 3.5874545574188232e-06, 3.7048012018203735e-06, 3.822147846221924e-06, 3.939494490623474e-06, 4.056841135025024e-06, 4.174187779426575e-06, 4.291534423828125e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 4.0, 4.0, 2.0, 7.0, 5.0, 13.0, 10.0, 17.0, 30.0, 35.0, 59.0, 71.0, 115.0, 229.0, 324.0, 691.0, 1578.0, 4240.0, 16665.0, 123309.0, 726242.0, 147795.0, 19085.0, 4698.0, 1609.0, 759.0, 362.0, 210.0, 109.0, 71.0, 54.0, 40.0, 31.0, 16.0, 22.0, 13.0, 7.0, 7.0, 3.0, 4.0, 3.0, 2.0, 1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.0071258544921875, -0.006925344467163086, -0.006724834442138672, -0.006524324417114258, -0.006323814392089844, -0.00612330436706543, -0.005922794342041016, -0.0057222843170166016, -0.0055217742919921875, -0.0053212642669677734, -0.005120754241943359, -0.004920244216918945, -0.004719734191894531, -0.004519224166870117, -0.004318714141845703, -0.004118204116821289, -0.003917694091796875, -0.003717184066772461, -0.003516674041748047, -0.003316164016723633, -0.0031156539916992188, -0.0029151439666748047, -0.0027146339416503906, -0.0025141239166259766, -0.0023136138916015625, -0.0021131038665771484, -0.0019125938415527344, -0.0017120838165283203, -0.0015115737915039062, -0.0013110637664794922, -0.0011105537414550781, -0.0009100437164306641, -0.00070953369140625, -0.0005090236663818359, -0.0003085136413574219, -0.00010800361633300781, 9.250640869140625e-05, 0.0002930164337158203, 0.0004935264587402344, 0.0006940364837646484, 0.0008945465087890625, 0.0010950565338134766, 0.0012955665588378906, 0.0014960765838623047, 0.0016965866088867188, 0.0018970966339111328, 0.002097606658935547, 0.002298116683959961, 0.002498626708984375, 0.002699136734008789, 0.002899646759033203, 0.003100156784057617, 0.0033006668090820312, 0.0035011768341064453, 0.0037016868591308594, 0.0039021968841552734, 0.0041027069091796875, 0.0043032169342041016, 0.004503726959228516, 0.00470423698425293, 0.004904747009277344, 0.005105257034301758, 0.005305767059326172, 0.005506277084350586, 0.005706787109375]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 3.0, 7.0, 9.0, 1.0, 15.0, 13.0, 18.0, 29.0, 33.0, 35.0, 42.0, 48.0, 51.0, 65.0, 81.0, 86.0, 73.0, 72.0, 57.0, 54.0, 55.0, 30.0, 27.0, 19.0, 19.0, 14.0, 12.0, 6.0, 4.0, 3.0, 4.0, 1.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0], "bins": [-0.0085296630859375, -0.008289813995361328, -0.008049964904785156, -0.007810115814208984, -0.0075702667236328125, -0.007330417633056641, -0.007090568542480469, -0.006850719451904297, -0.006610870361328125, -0.006371021270751953, -0.006131172180175781, -0.005891323089599609, -0.0056514739990234375, -0.005411624908447266, -0.005171775817871094, -0.004931926727294922, -0.00469207763671875, -0.004452228546142578, -0.004212379455566406, -0.003972530364990234, -0.0037326812744140625, -0.0034928321838378906, -0.0032529830932617188, -0.003013134002685547, -0.002773284912109375, -0.002533435821533203, -0.0022935867309570312, -0.0020537376403808594, -0.0018138885498046875, -0.0015740394592285156, -0.0013341903686523438, -0.0010943412780761719, -0.0008544921875, -0.0006146430969238281, -0.00037479400634765625, -0.00013494491577148438, 0.0001049041748046875, 0.0003447532653808594, 0.0005846023559570312, 0.0008244514465332031, 0.001064300537109375, 0.0013041496276855469, 0.0015439987182617188, 0.0017838478088378906, 0.0020236968994140625, 0.0022635459899902344, 0.0025033950805664062, 0.002743244171142578, 0.00298309326171875, 0.003222942352294922, 0.0034627914428710938, 0.0037026405334472656, 0.0039424896240234375, 0.004182338714599609, 0.004422187805175781, 0.004662036895751953, 0.004901885986328125, 0.005141735076904297, 0.005381584167480469, 0.005621433258056641, 0.0058612823486328125, 0.006101131439208984, 0.006340980529785156, 0.006580829620361328, 0.0068206787109375]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 9.0, 18.0, 42.0, 61.0, 162.0, 220.0, 237.0, 127.0, 62.0, 26.0, 13.0, 13.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1841619312763214, -0.1772949993610382, -0.170428067445755, -0.1635611355304718, -0.1566942036151886, -0.1498272716999054, -0.1429603397846222, -0.136093407869339, -0.1292264759540558, -0.12235954403877258, -0.11549261212348938, -0.10862568020820618, -0.10175874829292297, -0.09489181637763977, -0.08802487701177597, -0.08115794509649277, -0.07429100573062897, -0.06742407381534576, -0.06055714190006256, -0.05369020625948906, -0.046823274344205856, -0.03995634242892265, -0.03308940678834915, -0.02622247487306595, -0.019355542957782745, -0.012488610111176968, -0.00562167726457119, 0.0012452565133571625, 0.008112188428640366, 0.014979120343923569, 0.02184605598449707, 0.028712987899780273, 0.03557991981506348, 0.04244685173034668, 0.04931378364562988, 0.056180719286203384, 0.06304764747619629, 0.06991457939147949, 0.07678151875734329, 0.0836484506726265, 0.0905153825879097, 0.0973823145031929, 0.1042492464184761, 0.1111161857843399, 0.11798311769962311, 0.12485004961490631, 0.13171698153018951, 0.13858391344547272, 0.14545084536075592, 0.15231777727603912, 0.15918470919132233, 0.16605164110660553, 0.17291857302188873, 0.17978550493717194, 0.18665245175361633, 0.19351938366889954, 0.20038631558418274, 0.20725324749946594, 0.21412017941474915, 0.22098711133003235, 0.22785404324531555, 0.23472097516059875, 0.24158790707588196, 0.24845483899116516, 0.25532177090644836]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 9.0, 9.0, 10.0, 7.0, 12.0, 13.0, 15.0, 17.0, 26.0, 19.0, 20.0, 31.0, 26.0, 30.0, 31.0, 37.0, 35.0, 50.0, 61.0, 47.0, 55.0, 47.0, 39.0, 35.0, 30.0, 32.0, 42.0, 26.0, 21.0, 29.0, 25.0, 23.0, 12.0, 12.0, 20.0, 10.0, 6.0, 8.0, 2.0, 6.0, 4.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.16360938549041748, -0.15887486934661865, -0.15414033830165863, -0.1494058221578598, -0.14467129111289978, -0.13993677496910095, -0.13520224392414093, -0.1304677277803421, -0.12573319673538208, -0.12099867314100266, -0.11626414954662323, -0.1115296259522438, -0.10679510235786438, -0.10206058621406555, -0.09732606261968613, -0.0925915390253067, -0.08785702288150787, -0.08312249928712845, -0.07838797569274902, -0.0736534520983696, -0.06891892850399017, -0.06418441236019135, -0.05944988876581192, -0.054715365171432495, -0.04998084157705307, -0.045246317982673645, -0.04051179438829422, -0.03577727451920509, -0.03104275092482567, -0.026308227330446243, -0.021573705598711967, -0.01683918386697769, -0.012104660272598267, -0.007370137609541416, -0.0026356149464845657, 0.0020989077165722847, 0.006833430379629135, 0.01156795397400856, 0.016302475705742836, 0.021036997437477112, 0.025771521031856537, 0.030506044626235962, 0.03524056822061539, 0.039975088089704514, 0.04470961168408394, 0.049444135278463364, 0.05417865514755249, 0.058913178741931915, 0.06364770233631134, 0.06838222593069077, 0.07311674952507019, 0.07785127311944962, 0.08258579671382904, 0.08732031285762787, 0.0920548364520073, 0.09678936004638672, 0.10152388364076614, 0.10625840723514557, 0.110992930829525, 0.11572745442390442, 0.12046197056770325, 0.12519650161266327, 0.1299310177564621, 0.13466554880142212, 0.13940006494522095]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 5.0, 5.0, 7.0, 18.0, 12.0, 17.0, 39.0, 53.0, 114.0, 197.0, 499.0, 1495.0, 5791.0, 47055.0, 3995654.0, 133205.0, 7478.0, 1493.0, 523.0, 229.0, 139.0, 75.0, 51.0, 24.0, 29.0, 26.0, 15.0, 9.0, 8.0, 6.0, 9.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02935791015625, -0.028212547302246094, -0.027067184448242188, -0.02592182159423828, -0.024776458740234375, -0.02363109588623047, -0.022485733032226562, -0.021340370178222656, -0.02019500732421875, -0.019049644470214844, -0.017904281616210938, -0.01675891876220703, -0.015613555908203125, -0.014468193054199219, -0.013322830200195312, -0.012177467346191406, -0.0110321044921875, -0.009886741638183594, -0.008741378784179688, -0.007596015930175781, -0.006450653076171875, -0.005305290222167969, -0.0041599273681640625, -0.0030145645141601562, -0.00186920166015625, -0.0007238388061523438, 0.0004215240478515625, 0.0015668869018554688, 0.002712249755859375, 0.0038576126098632812, 0.0050029754638671875, 0.006148338317871094, 0.007293701171875, 0.008439064025878906, 0.009584426879882812, 0.010729789733886719, 0.011875152587890625, 0.013020515441894531, 0.014165878295898438, 0.015311241149902344, 0.01645660400390625, 0.017601966857910156, 0.018747329711914062, 0.01989269256591797, 0.021038055419921875, 0.02218341827392578, 0.023328781127929688, 0.024474143981933594, 0.0256195068359375, 0.026764869689941406, 0.027910232543945312, 0.02905559539794922, 0.030200958251953125, 0.03134632110595703, 0.03249168395996094, 0.033637046813964844, 0.03478240966796875, 0.035927772521972656, 0.03707313537597656, 0.03821849822998047, 0.039363861083984375, 0.04050922393798828, 0.04165458679199219, 0.042799949645996094, 0.0439453125]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 4.0, 3.0, 5.0, 4.0, 12.0, 28.0, 63.0, 94.0, 130.0, 171.0, 156.0, 142.0, 81.0, 61.0, 25.0, 15.0, 5.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02734375, -0.026203632354736328, -0.025063514709472656, -0.023923397064208984, -0.022783279418945312, -0.02164316177368164, -0.02050304412841797, -0.019362926483154297, -0.018222808837890625, -0.017082691192626953, -0.01594257354736328, -0.01480245590209961, -0.013662338256835938, -0.012522220611572266, -0.011382102966308594, -0.010241985321044922, -0.00910186767578125, -0.007961750030517578, -0.006821632385253906, -0.005681514739990234, -0.0045413970947265625, -0.0034012794494628906, -0.0022611618041992188, -0.0011210441589355469, 1.9073486328125e-05, 0.0011591911315917969, 0.0022993087768554688, 0.0034394264221191406, 0.0045795440673828125, 0.005719661712646484, 0.006859779357910156, 0.007999897003173828, 0.0091400146484375, 0.010280132293701172, 0.011420249938964844, 0.012560367584228516, 0.013700485229492188, 0.01484060287475586, 0.01598072052001953, 0.017120838165283203, 0.018260955810546875, 0.019401073455810547, 0.02054119110107422, 0.02168130874633789, 0.022821426391601562, 0.023961544036865234, 0.025101661682128906, 0.026241779327392578, 0.02738189697265625, 0.028522014617919922, 0.029662132263183594, 0.030802249908447266, 0.03194236755371094, 0.03308248519897461, 0.03422260284423828, 0.03536272048950195, 0.036502838134765625, 0.0376429557800293, 0.03878307342529297, 0.03992319107055664, 0.04106330871582031, 0.042203426361083984, 0.043343544006347656, 0.04448366165161133, 0.045623779296875]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 12.0, 15.0, 19.0, 31.0, 58.0, 95.0, 244.0, 408.0, 1014.0, 2853.0, 10495.0, 84347.0, 3951654.0, 125422.0, 12928.0, 2896.0, 1003.0, 386.0, 175.0, 85.0, 59.0, 32.0, 16.0, 14.0, 6.0, 5.0, 6.0, 0.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0225067138671875, -0.021659135818481445, -0.02081155776977539, -0.019963979721069336, -0.01911640167236328, -0.018268823623657227, -0.017421245574951172, -0.016573667526245117, -0.015726089477539062, -0.014878511428833008, -0.014030933380126953, -0.013183355331420898, -0.012335777282714844, -0.011488199234008789, -0.010640621185302734, -0.00979304313659668, -0.008945465087890625, -0.00809788703918457, -0.007250308990478516, -0.006402730941772461, -0.005555152893066406, -0.0047075748443603516, -0.003859996795654297, -0.003012418746948242, -0.0021648406982421875, -0.0013172626495361328, -0.0004696846008300781, 0.00037789344787597656, 0.0012254714965820312, 0.002073049545288086, 0.0029206275939941406, 0.0037682056427001953, 0.00461578369140625, 0.005463361740112305, 0.006310939788818359, 0.007158517837524414, 0.008006095886230469, 0.008853673934936523, 0.009701251983642578, 0.010548830032348633, 0.011396408081054688, 0.012243986129760742, 0.013091564178466797, 0.013939142227172852, 0.014786720275878906, 0.01563429832458496, 0.016481876373291016, 0.01732945442199707, 0.018177032470703125, 0.01902461051940918, 0.019872188568115234, 0.02071976661682129, 0.021567344665527344, 0.0224149227142334, 0.023262500762939453, 0.024110078811645508, 0.024957656860351562, 0.025805234909057617, 0.026652812957763672, 0.027500391006469727, 0.02834796905517578, 0.029195547103881836, 0.03004312515258789, 0.030890703201293945, 0.03173828125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 5.0, 7.0, 7.0, 12.0, 17.0, 25.0, 50.0, 157.0, 652.0, 2041.0, 756.0, 160.0, 70.0, 39.0, 29.0, 17.0, 6.0, 9.0, 10.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.032562255859375, -0.03162026405334473, -0.030678272247314453, -0.02973628044128418, -0.028794288635253906, -0.027852296829223633, -0.02691030502319336, -0.025968313217163086, -0.025026321411132812, -0.02408432960510254, -0.023142337799072266, -0.022200345993041992, -0.02125835418701172, -0.020316362380981445, -0.019374370574951172, -0.0184323787689209, -0.017490386962890625, -0.01654839515686035, -0.015606403350830078, -0.014664411544799805, -0.013722419738769531, -0.012780427932739258, -0.011838436126708984, -0.010896444320678711, -0.009954452514648438, -0.009012460708618164, -0.00807046890258789, -0.007128477096557617, -0.006186485290527344, -0.00524449348449707, -0.004302501678466797, -0.0033605098724365234, -0.00241851806640625, -0.0014765262603759766, -0.0005345344543457031, 0.0004074573516845703, 0.0013494491577148438, 0.002291440963745117, 0.0032334327697753906, 0.004175424575805664, 0.0051174163818359375, 0.006059408187866211, 0.007001399993896484, 0.007943391799926758, 0.008885383605957031, 0.009827375411987305, 0.010769367218017578, 0.011711359024047852, 0.012653350830078125, 0.013595342636108398, 0.014537334442138672, 0.015479326248168945, 0.01642131805419922, 0.017363309860229492, 0.018305301666259766, 0.01924729347229004, 0.020189285278320312, 0.021131277084350586, 0.02207326889038086, 0.023015260696411133, 0.023957252502441406, 0.02489924430847168, 0.025841236114501953, 0.026783227920532227, 0.0277252197265625]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 6.0, 3.0, 10.0, 10.0, 33.0, 51.0, 116.0, 199.0, 226.0, 148.0, 87.0, 42.0, 25.0, 15.0, 11.0, 4.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0704718679189682, -0.0665752962231636, -0.0626787319779396, -0.05878216028213501, -0.05488559231162071, -0.050989024341106415, -0.04709245264530182, -0.04319588467478752, -0.039299316704273224, -0.035402748733758926, -0.03150617703795433, -0.027609609067440033, -0.023713041096925735, -0.01981647126376629, -0.015919901430606842, -0.012023333460092545, -0.008126761764287949, -0.0042301928624510765, -0.0003336234949529171, 0.0035629458725452423, 0.007459514774382114, 0.011356083676218987, 0.015252653509378433, 0.01914922147989273, 0.023045791313052177, 0.026942361146211624, 0.03083892911672592, 0.03473550081253052, 0.038632068783044815, 0.04252863675355911, 0.04642520844936371, 0.05032177269458771, 0.0542183443903923, 0.0581149123609066, 0.0620114840567112, 0.0659080520272255, 0.06980462372303009, 0.07370118796825409, 0.07759775966405869, 0.08149433135986328, 0.08539089560508728, 0.08928746730089188, 0.09318403154611588, 0.09708060324192047, 0.10097717493772507, 0.10487373918294907, 0.10877031087875366, 0.11266687512397766, 0.11656345427036285, 0.12046002596616745, 0.12435659021139145, 0.12825316190719604, 0.13214972615242004, 0.13604630529880524, 0.13994286954402924, 0.14383943378925323, 0.14773599803447723, 0.15163256227970123, 0.15552914142608643, 0.15942570567131042, 0.16332226991653442, 0.16721884906291962, 0.17111541330814362, 0.17501197755336761, 0.1789085566997528]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 6.0, 6.0, 8.0, 13.0, 21.0, 18.0, 22.0, 25.0, 33.0, 40.0, 40.0, 62.0, 60.0, 65.0, 48.0, 58.0, 52.0, 53.0, 62.0, 48.0, 37.0, 47.0, 29.0, 26.0, 26.0, 18.0, 24.0, 8.0, 15.0, 9.0, 9.0, 3.0, 10.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0], "bins": [-0.10447937250137329, -0.10184003412723541, -0.09920069575309753, -0.09656135737895966, -0.09392201900482178, -0.0912826731801033, -0.08864333480596542, -0.08600399643182755, -0.08336465805768967, -0.08072531968355179, -0.07808598130941391, -0.07544664293527603, -0.07280729711055756, -0.07016795873641968, -0.0675286203622818, -0.06488928198814392, -0.06224994361400604, -0.059610605239868164, -0.056971266865730286, -0.05433192476630211, -0.05169258639216423, -0.04905324801802635, -0.046413905918598175, -0.0437745675444603, -0.04113522917032242, -0.03849589079618454, -0.03585655242204666, -0.033217210322618484, -0.030577871948480606, -0.027938533574342728, -0.0252991933375597, -0.022659853100776672, -0.020020514726638794, -0.017381176352500916, -0.014741836115717888, -0.012102496810257435, -0.009463157504796982, -0.006823818199336529, -0.004184478893876076, -0.001545138657093048, 0.0010941997170448303, 0.0037335390225052834, 0.006372878327965736, 0.00901221763342619, 0.011651556938886642, 0.014290896244347095, 0.01693023554980755, 0.019569575786590576, 0.022208914160728455, 0.024848252534866333, 0.02748759277164936, 0.03012693300843239, 0.03276627138257027, 0.035405609756708145, 0.03804495185613632, 0.0406842902302742, 0.04332362860441208, 0.04596296697854996, 0.048602305352687836, 0.05124164745211601, 0.05388098582625389, 0.05652032420039177, 0.059159666299819946, 0.061799004673957825, 0.0644383430480957]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 3.0, 3.0, 1.0, 6.0, 4.0, 8.0, 8.0, 10.0, 16.0, 21.0, 32.0, 45.0, 59.0, 67.0, 119.0, 145.0, 213.0, 352.0, 540.0, 926.0, 1719.0, 4223.0, 14846.0, 89599.0, 610192.0, 276190.0, 35666.0, 7506.0, 2711.0, 1269.0, 711.0, 431.0, 269.0, 182.0, 132.0, 82.0, 75.0, 55.0, 29.0, 27.0, 12.0, 12.0, 12.0, 9.0, 9.0, 3.0, 1.0, 4.0, 5.0, 1.0, 2.0, 2.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.029083251953125, -0.028035640716552734, -0.02698802947998047, -0.025940418243408203, -0.024892807006835938, -0.023845195770263672, -0.022797584533691406, -0.02174997329711914, -0.020702362060546875, -0.01965475082397461, -0.018607139587402344, -0.017559528350830078, -0.016511917114257812, -0.015464305877685547, -0.014416694641113281, -0.013369083404541016, -0.01232147216796875, -0.011273860931396484, -0.010226249694824219, -0.009178638458251953, -0.008131027221679688, -0.007083415985107422, -0.006035804748535156, -0.004988193511962891, -0.003940582275390625, -0.0028929710388183594, -0.0018453598022460938, -0.0007977485656738281, 0.0002498626708984375, 0.0012974739074707031, 0.0023450851440429688, 0.0033926963806152344, 0.0044403076171875, 0.005487918853759766, 0.006535530090332031, 0.007583141326904297, 0.008630752563476562, 0.009678363800048828, 0.010725975036621094, 0.01177358627319336, 0.012821197509765625, 0.01386880874633789, 0.014916419982910156, 0.015964031219482422, 0.017011642456054688, 0.018059253692626953, 0.01910686492919922, 0.020154476165771484, 0.02120208740234375, 0.022249698638916016, 0.02329730987548828, 0.024344921112060547, 0.025392532348632812, 0.026440143585205078, 0.027487754821777344, 0.02853536605834961, 0.029582977294921875, 0.03063058853149414, 0.031678199768066406, 0.03272581100463867, 0.03377342224121094, 0.0348210334777832, 0.03586864471435547, 0.036916255950927734, 0.0379638671875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 5.0, 1.0, 6.0, 3.0, 8.0, 33.0, 55.0, 101.0, 121.0, 160.0, 172.0, 147.0, 82.0, 58.0, 30.0, 10.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0274200439453125, -0.026278257369995117, -0.025136470794677734, -0.02399468421936035, -0.02285289764404297, -0.021711111068725586, -0.020569324493408203, -0.01942753791809082, -0.018285751342773438, -0.017143964767456055, -0.016002178192138672, -0.014860391616821289, -0.013718605041503906, -0.012576818466186523, -0.01143503189086914, -0.010293245315551758, -0.009151458740234375, -0.008009672164916992, -0.006867885589599609, -0.0057260990142822266, -0.004584312438964844, -0.003442525863647461, -0.002300739288330078, -0.0011589527130126953, -1.71661376953125e-05, 0.0011246204376220703, 0.002266407012939453, 0.003408193588256836, 0.004549980163574219, 0.0056917667388916016, 0.006833553314208984, 0.007975339889526367, 0.00911712646484375, 0.010258913040161133, 0.011400699615478516, 0.012542486190795898, 0.013684272766113281, 0.014826059341430664, 0.015967845916748047, 0.01710963249206543, 0.018251419067382812, 0.019393205642700195, 0.020534992218017578, 0.02167677879333496, 0.022818565368652344, 0.023960351943969727, 0.02510213851928711, 0.026243925094604492, 0.027385711669921875, 0.028527498245239258, 0.02966928482055664, 0.030811071395874023, 0.031952857971191406, 0.03309464454650879, 0.03423643112182617, 0.035378217697143555, 0.03652000427246094, 0.03766179084777832, 0.0388035774230957, 0.039945363998413086, 0.04108715057373047, 0.04222893714904785, 0.043370723724365234, 0.04451251029968262, 0.045654296875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 8.0, 10.0, 11.0, 25.0, 22.0, 35.0, 43.0, 83.0, 118.0, 163.0, 303.0, 579.0, 1118.0, 2708.0, 7814.0, 32656.0, 203221.0, 659676.0, 110167.0, 20262.0, 5519.0, 1989.0, 878.0, 447.0, 258.0, 148.0, 85.0, 46.0, 43.0, 25.0, 24.0, 18.0, 12.0, 8.0, 7.0, 6.0, 6.0, 4.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.0311279296875, -0.030165672302246094, -0.029203414916992188, -0.02824115753173828, -0.027278900146484375, -0.02631664276123047, -0.025354385375976562, -0.024392127990722656, -0.02342987060546875, -0.022467613220214844, -0.021505355834960938, -0.02054309844970703, -0.019580841064453125, -0.01861858367919922, -0.017656326293945312, -0.016694068908691406, -0.0157318115234375, -0.014769554138183594, -0.013807296752929688, -0.012845039367675781, -0.011882781982421875, -0.010920524597167969, -0.009958267211914062, -0.008996009826660156, -0.00803375244140625, -0.007071495056152344, -0.0061092376708984375, -0.005146980285644531, -0.004184722900390625, -0.0032224655151367188, -0.0022602081298828125, -0.0012979507446289062, -0.000335693359375, 0.0006265640258789062, 0.0015888214111328125, 0.0025510787963867188, 0.003513336181640625, 0.004475593566894531, 0.0054378509521484375, 0.006400108337402344, 0.00736236572265625, 0.008324623107910156, 0.009286880493164062, 0.010249137878417969, 0.011211395263671875, 0.012173652648925781, 0.013135910034179688, 0.014098167419433594, 0.0150604248046875, 0.016022682189941406, 0.016984939575195312, 0.01794719696044922, 0.018909454345703125, 0.01987171173095703, 0.020833969116210938, 0.021796226501464844, 0.02275848388671875, 0.023720741271972656, 0.024682998657226562, 0.02564525604248047, 0.026607513427734375, 0.02756977081298828, 0.028532028198242188, 0.029494285583496094, 0.03045654296875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 0.0, 4.0, 3.0, 5.0, 5.0, 9.0, 9.0, 24.0, 20.0, 23.0, 18.0, 26.0, 34.0, 30.0, 41.0, 48.0, 58.0, 78.0, 45.0, 73.0, 64.0, 60.0, 46.0, 50.0, 35.0, 31.0, 29.0, 32.0, 23.0, 22.0, 15.0, 10.0, 7.0, 10.0, 3.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.049285888671875, -0.04761075973510742, -0.045935630798339844, -0.044260501861572266, -0.04258537292480469, -0.04091024398803711, -0.03923511505126953, -0.03755998611450195, -0.035884857177734375, -0.0342097282409668, -0.03253459930419922, -0.03085947036743164, -0.029184341430664062, -0.027509212493896484, -0.025834083557128906, -0.024158954620361328, -0.02248382568359375, -0.020808696746826172, -0.019133567810058594, -0.017458438873291016, -0.015783309936523438, -0.01410818099975586, -0.012433052062988281, -0.010757923126220703, -0.009082794189453125, -0.007407665252685547, -0.005732536315917969, -0.004057407379150391, -0.0023822784423828125, -0.0007071495056152344, 0.0009679794311523438, 0.002643108367919922, 0.0043182373046875, 0.005993366241455078, 0.007668495178222656, 0.009343624114990234, 0.011018753051757812, 0.01269388198852539, 0.014369010925292969, 0.016044139862060547, 0.017719268798828125, 0.019394397735595703, 0.02106952667236328, 0.02274465560913086, 0.024419784545898438, 0.026094913482666016, 0.027770042419433594, 0.029445171356201172, 0.03112030029296875, 0.03279542922973633, 0.034470558166503906, 0.036145687103271484, 0.03782081604003906, 0.03949594497680664, 0.04117107391357422, 0.0428462028503418, 0.044521331787109375, 0.04619646072387695, 0.04787158966064453, 0.04954671859741211, 0.05122184753417969, 0.052896976470947266, 0.054572105407714844, 0.05624723434448242, 0.05792236328125]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 9.0, 15.0, 22.0, 32.0, 58.0, 76.0, 159.0, 314.0, 693.0, 2171.0, 11149.0, 208073.0, 784271.0, 35127.0, 4229.0, 1141.0, 466.0, 234.0, 115.0, 71.0, 37.0, 17.0, 19.0, 11.0, 8.0, 4.0, 4.0, 3.0, 4.0, 0.0, 3.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.00852203369140625, -0.008184313774108887, -0.007846593856811523, -0.00750887393951416, -0.007171154022216797, -0.006833434104919434, -0.00649571418762207, -0.006157994270324707, -0.005820274353027344, -0.0054825544357299805, -0.005144834518432617, -0.004807114601135254, -0.004469394683837891, -0.004131674766540527, -0.003793954849243164, -0.0034562349319458008, -0.0031185150146484375, -0.0027807950973510742, -0.002443075180053711, -0.0021053552627563477, -0.0017676353454589844, -0.001429915428161621, -0.0010921955108642578, -0.0007544755935668945, -0.00041675567626953125, -7.903575897216797e-05, 0.0002586841583251953, 0.0005964040756225586, 0.0009341239929199219, 0.0012718439102172852, 0.0016095638275146484, 0.0019472837448120117, 0.002285003662109375, 0.0026227235794067383, 0.0029604434967041016, 0.003298163414001465, 0.003635883331298828, 0.003973603248596191, 0.004311323165893555, 0.004649043083190918, 0.004986763000488281, 0.0053244829177856445, 0.005662202835083008, 0.005999922752380371, 0.006337642669677734, 0.006675362586975098, 0.007013082504272461, 0.007350802421569824, 0.0076885223388671875, 0.00802624225616455, 0.008363962173461914, 0.008701682090759277, 0.00903940200805664, 0.009377121925354004, 0.009714841842651367, 0.01005256175994873, 0.010390281677246094, 0.010728001594543457, 0.01106572151184082, 0.011403441429138184, 0.011741161346435547, 0.01207888126373291, 0.012416601181030273, 0.012754321098327637, 0.013092041015625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 4.0, 3.0, 0.0, 2.0, 3.0, 8.0, 8.0, 6.0, 10.0, 10.0, 17.0, 14.0, 16.0, 28.0, 33.0, 37.0, 37.0, 44.0, 48.0, 74.0, 61.0, 48.0, 58.0, 49.0, 74.0, 34.0, 42.0, 32.0, 47.0, 36.0, 27.0, 19.0, 10.0, 20.0, 7.0, 10.0, 13.0, 5.0, 4.0, 4.0, 2.0, 4.0, 5.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.993511199951172e-06, -3.861263394355774e-06, -3.729015588760376e-06, -3.596767783164978e-06, -3.46451997756958e-06, -3.332272171974182e-06, -3.200024366378784e-06, -3.0677765607833862e-06, -2.9355287551879883e-06, -2.8032809495925903e-06, -2.6710331439971924e-06, -2.5387853384017944e-06, -2.4065375328063965e-06, -2.2742897272109985e-06, -2.1420419216156006e-06, -2.0097941160202026e-06, -1.8775463104248047e-06, -1.7452985048294067e-06, -1.6130506992340088e-06, -1.4808028936386108e-06, -1.3485550880432129e-06, -1.216307282447815e-06, -1.084059476852417e-06, -9.51811671257019e-07, -8.195638656616211e-07, -6.873160600662231e-07, -5.550682544708252e-07, -4.2282044887542725e-07, -2.905726432800293e-07, -1.5832483768463135e-07, -2.60770320892334e-08, 1.0617077350616455e-07, 2.384185791015625e-07, 3.7066638469696045e-07, 5.029141902923584e-07, 6.351619958877563e-07, 7.674098014831543e-07, 8.996576070785522e-07, 1.0319054126739502e-06, 1.1641532182693481e-06, 1.296401023864746e-06, 1.428648829460144e-06, 1.560896635055542e-06, 1.69314444065094e-06, 1.8253922462463379e-06, 1.957640051841736e-06, 2.089887857437134e-06, 2.2221356630325317e-06, 2.3543834686279297e-06, 2.4866312742233276e-06, 2.6188790798187256e-06, 2.7511268854141235e-06, 2.8833746910095215e-06, 3.0156224966049194e-06, 3.1478703022003174e-06, 3.2801181077957153e-06, 3.4123659133911133e-06, 3.5446137189865112e-06, 3.676861524581909e-06, 3.809109330177307e-06, 3.941357135772705e-06, 4.073604941368103e-06, 4.205852746963501e-06, 4.338100552558899e-06, 4.470348358154297e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 8.0, 10.0, 15.0, 21.0, 35.0, 53.0, 102.0, 213.0, 484.0, 1315.0, 5894.0, 63243.0, 872140.0, 95039.0, 7295.0, 1634.0, 565.0, 235.0, 119.0, 51.0, 35.0, 25.0, 6.0, 3.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00902557373046875, -0.008672475814819336, -0.008319377899169922, -0.007966279983520508, -0.007613182067871094, -0.00726008415222168, -0.006906986236572266, -0.0065538883209228516, -0.0062007904052734375, -0.0058476924896240234, -0.005494594573974609, -0.005141496658325195, -0.004788398742675781, -0.004435300827026367, -0.004082202911376953, -0.003729104995727539, -0.003376007080078125, -0.003022909164428711, -0.002669811248779297, -0.002316713333129883, -0.0019636154174804688, -0.0016105175018310547, -0.0012574195861816406, -0.0009043216705322266, -0.0005512237548828125, -0.00019812583923339844, 0.00015497207641601562, 0.0005080699920654297, 0.0008611679077148438, 0.0012142658233642578, 0.0015673637390136719, 0.001920461654663086, 0.0022735595703125, 0.002626657485961914, 0.002979755401611328, 0.003332853317260742, 0.0036859512329101562, 0.00403904914855957, 0.004392147064208984, 0.0047452449798583984, 0.0050983428955078125, 0.0054514408111572266, 0.005804538726806641, 0.006157636642456055, 0.006510734558105469, 0.006863832473754883, 0.007216930389404297, 0.007570028305053711, 0.007923126220703125, 0.008276224136352539, 0.008629322052001953, 0.008982419967651367, 0.009335517883300781, 0.009688615798950195, 0.01004171371459961, 0.010394811630249023, 0.010747909545898438, 0.011101007461547852, 0.011454105377197266, 0.01180720329284668, 0.012160301208496094, 0.012513399124145508, 0.012866497039794922, 0.013219594955444336, 0.01357269287109375]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 9.0, 6.0, 6.0, 5.0, 10.0, 16.0, 32.0, 33.0, 47.0, 67.0, 75.0, 126.0, 115.0, 122.0, 95.0, 63.0, 57.0, 35.0, 25.0, 16.0, 11.0, 11.0, 8.0, 7.0, 4.0, 6.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.009002685546875, -0.008649349212646484, -0.008296012878417969, -0.007942676544189453, -0.0075893402099609375, -0.007236003875732422, -0.006882667541503906, -0.006529331207275391, -0.006175994873046875, -0.005822658538818359, -0.005469322204589844, -0.005115985870361328, -0.0047626495361328125, -0.004409313201904297, -0.004055976867675781, -0.0037026405334472656, -0.00334930419921875, -0.0029959678649902344, -0.0026426315307617188, -0.002289295196533203, -0.0019359588623046875, -0.0015826225280761719, -0.0012292861938476562, -0.0008759498596191406, -0.000522613525390625, -0.00016927719116210938, 0.00018405914306640625, 0.0005373954772949219, 0.0008907318115234375, 0.0012440681457519531, 0.0015974044799804688, 0.0019507408142089844, 0.0023040771484375, 0.0026574134826660156, 0.0030107498168945312, 0.003364086151123047, 0.0037174224853515625, 0.004070758819580078, 0.004424095153808594, 0.004777431488037109, 0.005130767822265625, 0.005484104156494141, 0.005837440490722656, 0.006190776824951172, 0.0065441131591796875, 0.006897449493408203, 0.007250785827636719, 0.007604122161865234, 0.00795745849609375, 0.008310794830322266, 0.008664131164550781, 0.009017467498779297, 0.009370803833007812, 0.009724140167236328, 0.010077476501464844, 0.01043081283569336, 0.010784149169921875, 0.01113748550415039, 0.011490821838378906, 0.011844158172607422, 0.012197494506835938, 0.012550830841064453, 0.012904167175292969, 0.013257503509521484, 0.01361083984375]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 8.0, 11.0, 28.0, 61.0, 212.0, 311.0, 213.0, 89.0, 28.0, 22.0, 9.0, 6.0, 4.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.25534433126449585, -0.24533556401729584, -0.23532678186893463, -0.22531801462173462, -0.2153092324733734, -0.2053004652261734, -0.1952916979789734, -0.18528291583061218, -0.17527414858341217, -0.16526538133621216, -0.15525659918785095, -0.14524783194065094, -0.13523906469345093, -0.12523028254508972, -0.11522151529788971, -0.1052127406001091, -0.09520396590232849, -0.08519519120454788, -0.07518641650676727, -0.06517764925956726, -0.05516887456178665, -0.04516009986400604, -0.03515132889151573, -0.02514255791902542, -0.015133783221244812, -0.005125010386109352, 0.004883762449026108, 0.014892535284161568, 0.024901308119297028, 0.03491008281707764, 0.04491885378956795, 0.05492762476205826, 0.06493642926216125, 0.07494520395994186, 0.08495397865772247, 0.09496274590492249, 0.1049715206027031, 0.1149802953004837, 0.12498906254768372, 0.13499784469604492, 0.14500661194324493, 0.15501537919044495, 0.16502416133880615, 0.17503292858600616, 0.18504169583320618, 0.19505047798156738, 0.2050592452287674, 0.2150680124759674, 0.2250767946243286, 0.23508556187152863, 0.24509434401988983, 0.25510311126708984, 0.26511189341545105, 0.27512067556381226, 0.2851294279098511, 0.2951382100582123, 0.3051469922065735, 0.3151557743549347, 0.3251645267009735, 0.3351733088493347, 0.3451820909976959, 0.35519087314605713, 0.36519962549209595, 0.37520840764045715, 0.38521715998649597]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 5.0, 6.0, 2.0, 7.0, 7.0, 5.0, 11.0, 13.0, 12.0, 21.0, 17.0, 25.0, 33.0, 22.0, 23.0, 40.0, 33.0, 35.0, 37.0, 43.0, 50.0, 55.0, 66.0, 43.0, 37.0, 39.0, 40.0, 35.0, 35.0, 26.0, 28.0, 28.0, 28.0, 24.0, 8.0, 8.0, 10.0, 2.0, 7.0, 8.0, 10.0, 2.0, 5.0, 3.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.17560642957687378, -0.16978397965431213, -0.1639615148305893, -0.15813906490802765, -0.152316614985466, -0.14649415016174316, -0.14067170023918152, -0.13484925031661987, -0.12902680039405823, -0.12320434302091599, -0.11738189309835434, -0.1115594357252121, -0.10573698580265045, -0.09991452842950821, -0.09409207105636597, -0.08826962113380432, -0.08244715631008148, -0.07662469893693924, -0.0708022490143776, -0.06497979164123535, -0.05915733799338341, -0.053334884345531464, -0.04751242697238922, -0.04168997332453728, -0.03586751967668533, -0.03004506602883339, -0.024222610518336296, -0.018400155007839203, -0.012577701359987259, -0.006755247712135315, -0.0009327903389930725, 0.0048896633088588715, 0.010712116956710815, 0.01653457060456276, 0.022357026115059853, 0.028179481625556946, 0.03400193527340889, 0.039824388921260834, 0.045646846294403076, 0.05146929994225502, 0.057291753590106964, 0.0631142109632492, 0.06893666088581085, 0.0747591182589531, 0.08058157563209534, 0.08640402555465698, 0.09222648292779922, 0.09804894030094147, 0.10387139022350311, 0.10969384759664536, 0.115516297519207, 0.12133875489234924, 0.1271612048149109, 0.13298365473747253, 0.13880611956119537, 0.14462856948375702, 0.15045103430747986, 0.1562734842300415, 0.16209594905376434, 0.167918398976326, 0.17374084889888763, 0.17956331372261047, 0.18538576364517212, 0.19120821356773376, 0.1970306634902954]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 3.0, 0.0, 6.0, 1.0, 4.0, 3.0, 11.0, 15.0, 9.0, 21.0, 37.0, 53.0, 88.0, 111.0, 230.0, 362.0, 832.0, 2087.0, 5498.0, 20444.0, 265101.0, 3725060.0, 151911.0, 15967.0, 3828.0, 1244.0, 511.0, 266.0, 160.0, 101.0, 81.0, 59.0, 48.0, 28.0, 23.0, 20.0, 18.0, 10.0, 8.0, 5.0, 9.0, 5.0, 6.0, 1.0, 4.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0164337158203125, -0.0157010555267334, -0.014968395233154297, -0.014235734939575195, -0.013503074645996094, -0.012770414352416992, -0.01203775405883789, -0.011305093765258789, -0.010572433471679688, -0.009839773178100586, -0.009107112884521484, -0.008374452590942383, -0.007641792297363281, -0.00690913200378418, -0.006176471710205078, -0.0054438114166259766, -0.004711151123046875, -0.0039784908294677734, -0.003245830535888672, -0.0025131702423095703, -0.0017805099487304688, -0.0010478496551513672, -0.0003151893615722656, 0.00041747093200683594, 0.0011501312255859375, 0.001882791519165039, 0.0026154518127441406, 0.003348112106323242, 0.004080772399902344, 0.004813432693481445, 0.005546092987060547, 0.0062787532806396484, 0.00701141357421875, 0.0077440738677978516, 0.008476734161376953, 0.009209394454956055, 0.009942054748535156, 0.010674715042114258, 0.01140737533569336, 0.012140035629272461, 0.012872695922851562, 0.013605356216430664, 0.014338016510009766, 0.015070676803588867, 0.01580333709716797, 0.01653599739074707, 0.017268657684326172, 0.018001317977905273, 0.018733978271484375, 0.019466638565063477, 0.020199298858642578, 0.02093195915222168, 0.02166461944580078, 0.022397279739379883, 0.023129940032958984, 0.023862600326538086, 0.024595260620117188, 0.02532792091369629, 0.02606058120727539, 0.026793241500854492, 0.027525901794433594, 0.028258562088012695, 0.028991222381591797, 0.0297238826751709, 0.03045654296875]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 2.0, 6.0, 1.0, 11.0, 20.0, 50.0, 74.0, 139.0, 159.0, 168.0, 131.0, 94.0, 76.0, 25.0, 20.0, 14.0, 2.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0267791748046875, -0.02567744255065918, -0.02457571029663086, -0.02347397804260254, -0.02237224578857422, -0.0212705135345459, -0.020168781280517578, -0.019067049026489258, -0.017965316772460938, -0.016863584518432617, -0.015761852264404297, -0.014660120010375977, -0.013558387756347656, -0.012456655502319336, -0.011354923248291016, -0.010253190994262695, -0.009151458740234375, -0.008049726486206055, -0.006947994232177734, -0.005846261978149414, -0.004744529724121094, -0.0036427974700927734, -0.002541065216064453, -0.0014393329620361328, -0.0003376007080078125, 0.0007641315460205078, 0.0018658638000488281, 0.0029675960540771484, 0.004069328308105469, 0.005171060562133789, 0.006272792816162109, 0.00737452507019043, 0.00847625732421875, 0.00957798957824707, 0.01067972183227539, 0.011781454086303711, 0.012883186340332031, 0.013984918594360352, 0.015086650848388672, 0.016188383102416992, 0.017290115356445312, 0.018391847610473633, 0.019493579864501953, 0.020595312118530273, 0.021697044372558594, 0.022798776626586914, 0.023900508880615234, 0.025002241134643555, 0.026103973388671875, 0.027205705642700195, 0.028307437896728516, 0.029409170150756836, 0.030510902404785156, 0.03161263465881348, 0.0327143669128418, 0.03381609916687012, 0.03491783142089844, 0.03601956367492676, 0.03712129592895508, 0.0382230281829834, 0.03932476043701172, 0.04042649269104004, 0.04152822494506836, 0.04262995719909668, 0.043731689453125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 8.0, 5.0, 20.0, 14.0, 35.0, 65.0, 168.0, 398.0, 1000.0, 3798.0, 28306.0, 4008361.0, 141118.0, 7480.0, 1984.0, 719.0, 379.0, 168.0, 116.0, 53.0, 30.0, 19.0, 15.0, 6.0, 4.0, 8.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.039215087890625, -0.0376896858215332, -0.036164283752441406, -0.03463888168334961, -0.03311347961425781, -0.031588077545166016, -0.03006267547607422, -0.028537273406982422, -0.027011871337890625, -0.025486469268798828, -0.02396106719970703, -0.022435665130615234, -0.020910263061523438, -0.01938486099243164, -0.017859458923339844, -0.016334056854248047, -0.01480865478515625, -0.013283252716064453, -0.011757850646972656, -0.01023244857788086, -0.008707046508789062, -0.007181644439697266, -0.005656242370605469, -0.004130840301513672, -0.002605438232421875, -0.0010800361633300781, 0.00044536590576171875, 0.0019707679748535156, 0.0034961700439453125, 0.005021572113037109, 0.006546974182128906, 0.008072376251220703, 0.0095977783203125, 0.011123180389404297, 0.012648582458496094, 0.01417398452758789, 0.015699386596679688, 0.017224788665771484, 0.01875019073486328, 0.020275592803955078, 0.021800994873046875, 0.023326396942138672, 0.02485179901123047, 0.026377201080322266, 0.027902603149414062, 0.02942800521850586, 0.030953407287597656, 0.03247880935668945, 0.03400421142578125, 0.03552961349487305, 0.037055015563964844, 0.03858041763305664, 0.04010581970214844, 0.041631221771240234, 0.04315662384033203, 0.04468202590942383, 0.046207427978515625, 0.04773283004760742, 0.04925823211669922, 0.050783634185791016, 0.05230903625488281, 0.05383443832397461, 0.055359840393066406, 0.0568852424621582, 0.05841064453125]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 13.0, 13.0, 18.0, 39.0, 98.0, 280.0, 1594.0, 1496.0, 293.0, 93.0, 47.0, 23.0, 13.0, 11.0, 5.0, 5.0, 6.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.033416748046875, -0.031963348388671875, -0.03050994873046875, -0.029056549072265625, -0.0276031494140625, -0.026149749755859375, -0.02469635009765625, -0.023242950439453125, -0.02178955078125, -0.020336151123046875, -0.01888275146484375, -0.017429351806640625, -0.0159759521484375, -0.014522552490234375, -0.01306915283203125, -0.011615753173828125, -0.010162353515625, -0.008708953857421875, -0.00725555419921875, -0.005802154541015625, -0.0043487548828125, -0.002895355224609375, -0.00144195556640625, 1.1444091796875e-05, 0.00146484375, 0.002918243408203125, 0.00437164306640625, 0.005825042724609375, 0.0072784423828125, 0.008731842041015625, 0.01018524169921875, 0.011638641357421875, 0.013092041015625, 0.014545440673828125, 0.01599884033203125, 0.017452239990234375, 0.0189056396484375, 0.020359039306640625, 0.02181243896484375, 0.023265838623046875, 0.02471923828125, 0.026172637939453125, 0.02762603759765625, 0.029079437255859375, 0.0305328369140625, 0.031986236572265625, 0.03343963623046875, 0.034893035888671875, 0.036346435546875, 0.037799835205078125, 0.03925323486328125, 0.040706634521484375, 0.0421600341796875, 0.043613433837890625, 0.04506683349609375, 0.046520233154296875, 0.0479736328125, 0.049427032470703125, 0.05088043212890625, 0.052333831787109375, 0.0537872314453125, 0.055240631103515625, 0.05669403076171875, 0.058147430419921875, 0.059600830078125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 7.0, 1.0, 12.0, 12.0, 27.0, 51.0, 120.0, 268.0, 228.0, 140.0, 70.0, 28.0, 7.0, 10.0, 8.0, 3.0, 2.0, 2.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.18448658287525177, -0.17766569554805756, -0.17084480822086334, -0.16402392089366913, -0.1572030484676361, -0.1503821611404419, -0.14356127381324768, -0.13674038648605347, -0.12991949915885925, -0.12309861183166504, -0.11627772450447083, -0.10945684462785721, -0.102635957300663, -0.09581506997346878, -0.08899419009685516, -0.08217330276966095, -0.07535241544246674, -0.06853152811527252, -0.06171064451336861, -0.05488976091146469, -0.04806887358427048, -0.04124798625707626, -0.03442710265517235, -0.027606219053268433, -0.02078533172607422, -0.013964446261525154, -0.0071435607969760895, -0.00032267533242702484, 0.00649821013212204, 0.013319097459316254, 0.02013998106122017, 0.026960864663124084, 0.033781737089157104, 0.04060262441635132, 0.047423508018255234, 0.05424439162015915, 0.06106527894735336, 0.06788616627454758, 0.0747070461511612, 0.08152793347835541, 0.08834882080554962, 0.09516970813274384, 0.10199059545993805, 0.10881147533655167, 0.11563236266374588, 0.1224532499909401, 0.1292741298675537, 0.13609501719474792, 0.14291590452194214, 0.14973679184913635, 0.15655767917633057, 0.16337856650352478, 0.170199453830719, 0.1770203411579132, 0.18384121358394623, 0.19066210091114044, 0.19748298823833466, 0.20430387556552887, 0.21112476289272308, 0.2179456502199173, 0.22476652264595032, 0.23158740997314453, 0.23840829730033875, 0.24522918462753296, 0.2520500719547272]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 4.0, 7.0, 6.0, 9.0, 7.0, 13.0, 11.0, 17.0, 28.0, 30.0, 38.0, 40.0, 40.0, 46.0, 57.0, 78.0, 49.0, 57.0, 65.0, 46.0, 52.0, 41.0, 44.0, 44.0, 38.0, 33.0, 20.0, 25.0, 9.0, 14.0, 14.0, 11.0, 6.0, 6.0, 5.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11891436576843262, -0.1142454668879509, -0.10957656800746918, -0.10490767657756805, -0.10023877769708633, -0.09556987881660461, -0.09090098738670349, -0.08623208850622177, -0.08156318962574005, -0.07689429074525833, -0.07222539186477661, -0.06755650043487549, -0.06288760155439377, -0.05821870267391205, -0.05354980751872063, -0.048880912363529205, -0.044212013483047485, -0.039543114602565765, -0.034874219447374344, -0.030205322429537773, -0.025536425411701202, -0.02086752839386463, -0.01619863137602806, -0.01152973622083664, -0.006860837340354919, -0.0021919403225183487, 0.002476956695318222, 0.007145853713154793, 0.011814750730991364, 0.016483647748827934, 0.021152544766664505, 0.025821439921855927, 0.030490338802337646, 0.035159237682819366, 0.03982813283801079, 0.04449702799320221, 0.04916592687368393, 0.05383482575416565, 0.05850372090935707, 0.06317261606454849, 0.06784151494503021, 0.07251041382551193, 0.07717931270599365, 0.08184820413589478, 0.0865171030163765, 0.09118600189685822, 0.09585489332675934, 0.10052379220724106, 0.10519269108772278, 0.1098615899682045, 0.11453048884868622, 0.11919938027858734, 0.12386827915906906, 0.12853717803955078, 0.1332060694694519, 0.13787496089935303, 0.14254386723041534, 0.14721275866031647, 0.15188166499137878, 0.1565505564212799, 0.16121944785118103, 0.16588835418224335, 0.17055724561214447, 0.1752261519432068, 0.1798950433731079]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 4.0, 2.0, 9.0, 4.0, 10.0, 21.0, 11.0, 36.0, 37.0, 73.0, 110.0, 162.0, 313.0, 497.0, 1129.0, 2687.0, 10010.0, 110398.0, 836190.0, 74149.0, 8131.0, 2387.0, 978.0, 479.0, 252.0, 159.0, 102.0, 65.0, 52.0, 29.0, 16.0, 16.0, 16.0, 6.0, 2.0, 5.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.050323486328125, -0.048345088958740234, -0.04636669158935547, -0.0443882942199707, -0.04240989685058594, -0.04043149948120117, -0.038453102111816406, -0.03647470474243164, -0.034496307373046875, -0.03251791000366211, -0.030539512634277344, -0.028561115264892578, -0.026582717895507812, -0.024604320526123047, -0.02262592315673828, -0.020647525787353516, -0.01866912841796875, -0.016690731048583984, -0.014712333679199219, -0.012733936309814453, -0.010755538940429688, -0.008777141571044922, -0.006798744201660156, -0.004820346832275391, -0.002841949462890625, -0.0008635520935058594, 0.0011148452758789062, 0.003093242645263672, 0.0050716400146484375, 0.007050037384033203, 0.009028434753417969, 0.011006832122802734, 0.0129852294921875, 0.014963626861572266, 0.01694202423095703, 0.018920421600341797, 0.020898818969726562, 0.022877216339111328, 0.024855613708496094, 0.02683401107788086, 0.028812408447265625, 0.03079080581665039, 0.032769203186035156, 0.03474760055541992, 0.03672599792480469, 0.03870439529418945, 0.04068279266357422, 0.042661190032958984, 0.04463958740234375, 0.046617984771728516, 0.04859638214111328, 0.05057477951049805, 0.05255317687988281, 0.05453157424926758, 0.056509971618652344, 0.05848836898803711, 0.060466766357421875, 0.06244516372680664, 0.0644235610961914, 0.06640195846557617, 0.06838035583496094, 0.0703587532043457, 0.07233715057373047, 0.07431554794311523, 0.0762939453125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 7.0, 2.0, 7.0, 15.0, 27.0, 67.0, 108.0, 136.0, 163.0, 156.0, 127.0, 86.0, 52.0, 26.0, 10.0, 6.0, 3.0, 0.0, 3.0, 0.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0274200439453125, -0.026315927505493164, -0.025211811065673828, -0.024107694625854492, -0.023003578186035156, -0.02189946174621582, -0.020795345306396484, -0.01969122886657715, -0.018587112426757812, -0.017482995986938477, -0.01637887954711914, -0.015274763107299805, -0.014170646667480469, -0.013066530227661133, -0.011962413787841797, -0.010858297348022461, -0.009754180908203125, -0.008650064468383789, -0.007545948028564453, -0.006441831588745117, -0.005337715148925781, -0.004233598709106445, -0.0031294822692871094, -0.0020253658294677734, -0.0009212493896484375, 0.00018286705017089844, 0.0012869834899902344, 0.0023910999298095703, 0.0034952163696289062, 0.004599332809448242, 0.005703449249267578, 0.006807565689086914, 0.00791168212890625, 0.009015798568725586, 0.010119915008544922, 0.011224031448364258, 0.012328147888183594, 0.01343226432800293, 0.014536380767822266, 0.0156404972076416, 0.016744613647460938, 0.017848730087280273, 0.01895284652709961, 0.020056962966918945, 0.02116107940673828, 0.022265195846557617, 0.023369312286376953, 0.02447342872619629, 0.025577545166015625, 0.02668166160583496, 0.027785778045654297, 0.028889894485473633, 0.02999401092529297, 0.031098127365112305, 0.03220224380493164, 0.03330636024475098, 0.03441047668457031, 0.03551459312438965, 0.036618709564208984, 0.03772282600402832, 0.038826942443847656, 0.03993105888366699, 0.04103517532348633, 0.042139291763305664, 0.043243408203125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 11.0, 12.0, 13.0, 19.0, 38.0, 40.0, 66.0, 124.0, 228.0, 398.0, 760.0, 1570.0, 4168.0, 12939.0, 56303.0, 594199.0, 319178.0, 41513.0, 10432.0, 3519.0, 1421.0, 665.0, 388.0, 207.0, 114.0, 56.0, 63.0, 37.0, 23.0, 17.0, 14.0, 6.0, 6.0, 6.0, 2.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.042938232421875, -0.041452884674072266, -0.03996753692626953, -0.0384821891784668, -0.03699684143066406, -0.03551149368286133, -0.034026145935058594, -0.03254079818725586, -0.031055450439453125, -0.02957010269165039, -0.028084754943847656, -0.026599407196044922, -0.025114059448242188, -0.023628711700439453, -0.02214336395263672, -0.020658016204833984, -0.01917266845703125, -0.017687320709228516, -0.01620197296142578, -0.014716625213623047, -0.013231277465820312, -0.011745929718017578, -0.010260581970214844, -0.00877523422241211, -0.007289886474609375, -0.005804538726806641, -0.004319190979003906, -0.002833843231201172, -0.0013484954833984375, 0.00013685226440429688, 0.0016222000122070312, 0.0031075477600097656, 0.0045928955078125, 0.006078243255615234, 0.007563591003417969, 0.009048938751220703, 0.010534286499023438, 0.012019634246826172, 0.013504981994628906, 0.01499032974243164, 0.016475677490234375, 0.01796102523803711, 0.019446372985839844, 0.020931720733642578, 0.022417068481445312, 0.023902416229248047, 0.02538776397705078, 0.026873111724853516, 0.02835845947265625, 0.029843807220458984, 0.03132915496826172, 0.03281450271606445, 0.03429985046386719, 0.03578519821166992, 0.037270545959472656, 0.03875589370727539, 0.040241241455078125, 0.04172658920288086, 0.043211936950683594, 0.04469728469848633, 0.04618263244628906, 0.0476679801940918, 0.04915332794189453, 0.050638675689697266, 0.0521240234375]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 7.0, 4.0, 7.0, 8.0, 5.0, 19.0, 19.0, 25.0, 29.0, 35.0, 46.0, 60.0, 67.0, 62.0, 79.0, 65.0, 62.0, 60.0, 60.0, 46.0, 40.0, 45.0, 35.0, 29.0, 20.0, 18.0, 12.0, 10.0, 11.0, 5.0, 8.0, 3.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07354736328125, -0.07104873657226562, -0.06855010986328125, -0.06605148315429688, -0.0635528564453125, -0.061054229736328125, -0.05855560302734375, -0.056056976318359375, -0.053558349609375, -0.051059722900390625, -0.04856109619140625, -0.046062469482421875, -0.0435638427734375, -0.041065216064453125, -0.03856658935546875, -0.036067962646484375, -0.0335693359375, -0.031070709228515625, -0.02857208251953125, -0.026073455810546875, -0.0235748291015625, -0.021076202392578125, -0.01857757568359375, -0.016078948974609375, -0.013580322265625, -0.011081695556640625, -0.00858306884765625, -0.006084442138671875, -0.0035858154296875, -0.001087188720703125, 0.00141143798828125, 0.003910064697265625, 0.00640869140625, 0.008907318115234375, 0.01140594482421875, 0.013904571533203125, 0.0164031982421875, 0.018901824951171875, 0.02140045166015625, 0.023899078369140625, 0.026397705078125, 0.028896331787109375, 0.03139495849609375, 0.033893585205078125, 0.0363922119140625, 0.038890838623046875, 0.04138946533203125, 0.043888092041015625, 0.04638671875, 0.048885345458984375, 0.05138397216796875, 0.053882598876953125, 0.0563812255859375, 0.058879852294921875, 0.06137847900390625, 0.06387710571289062, 0.066375732421875, 0.06887435913085938, 0.07137298583984375, 0.07387161254882812, 0.0763702392578125, 0.07886886596679688, 0.08136749267578125, 0.08386611938476562, 0.08636474609375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 3.0, 12.0, 16.0, 9.0, 17.0, 23.0, 33.0, 75.0, 116.0, 238.0, 389.0, 916.0, 2549.0, 9397.0, 80684.0, 838812.0, 99997.0, 10667.0, 2636.0, 991.0, 425.0, 217.0, 137.0, 67.0, 41.0, 31.0, 20.0, 18.0, 9.0, 8.0, 0.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0085906982421875, -0.008278727531433105, -0.007966756820678711, -0.007654786109924316, -0.007342815399169922, -0.007030844688415527, -0.006718873977661133, -0.006406903266906738, -0.006094932556152344, -0.005782961845397949, -0.005470991134643555, -0.00515902042388916, -0.004847049713134766, -0.004535079002380371, -0.0042231082916259766, -0.003911137580871582, -0.0035991668701171875, -0.003287196159362793, -0.0029752254486083984, -0.002663254737854004, -0.0023512840270996094, -0.002039313316345215, -0.0017273426055908203, -0.0014153718948364258, -0.0011034011840820312, -0.0007914304733276367, -0.0004794597625732422, -0.00016748905181884766, 0.00014448165893554688, 0.0004564523696899414, 0.0007684230804443359, 0.0010803937911987305, 0.001392364501953125, 0.0017043352127075195, 0.002016305923461914, 0.0023282766342163086, 0.002640247344970703, 0.0029522180557250977, 0.003264188766479492, 0.0035761594772338867, 0.0038881301879882812, 0.004200100898742676, 0.00451207160949707, 0.004824042320251465, 0.005136013031005859, 0.005447983741760254, 0.0057599544525146484, 0.006071925163269043, 0.0063838958740234375, 0.006695866584777832, 0.0070078372955322266, 0.007319808006286621, 0.007631778717041016, 0.00794374942779541, 0.008255720138549805, 0.0085676908493042, 0.008879661560058594, 0.009191632270812988, 0.009503602981567383, 0.009815573692321777, 0.010127544403076172, 0.010439515113830566, 0.010751485824584961, 0.011063456535339355, 0.01137542724609375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 3.0, 5.0, 14.0, 8.0, 8.0, 7.0, 19.0, 22.0, 23.0, 24.0, 35.0, 30.0, 49.0, 45.0, 63.0, 56.0, 56.0, 45.0, 53.0, 67.0, 41.0, 40.0, 36.0, 51.0, 39.0, 30.0, 17.0, 24.0, 17.0, 6.0, 12.0, 12.0, 7.0, 5.0, 11.0, 6.0, 6.0, 2.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-4.351139068603516e-06, -4.217959940433502e-06, -4.084780812263489e-06, -3.951601684093475e-06, -3.818422555923462e-06, -3.6852434277534485e-06, -3.552064299583435e-06, -3.4188851714134216e-06, -3.285706043243408e-06, -3.1525269150733948e-06, -3.0193477869033813e-06, -2.886168658733368e-06, -2.7529895305633545e-06, -2.619810402393341e-06, -2.4866312742233276e-06, -2.353452146053314e-06, -2.2202730178833008e-06, -2.0870938897132874e-06, -1.953914761543274e-06, -1.8207356333732605e-06, -1.687556505203247e-06, -1.5543773770332336e-06, -1.4211982488632202e-06, -1.2880191206932068e-06, -1.1548399925231934e-06, -1.02166086435318e-06, -8.884817361831665e-07, -7.553026080131531e-07, -6.221234798431396e-07, -4.889443516731262e-07, -3.557652235031128e-07, -2.2258609533309937e-07, -8.940696716308594e-08, 4.377216100692749e-08, 1.7695128917694092e-07, 3.1013041734695435e-07, 4.4330954551696777e-07, 5.764886736869812e-07, 7.096678018569946e-07, 8.428469300270081e-07, 9.760260581970215e-07, 1.109205186367035e-06, 1.2423843145370483e-06, 1.3755634427070618e-06, 1.5087425708770752e-06, 1.6419216990470886e-06, 1.775100827217102e-06, 1.9082799553871155e-06, 2.041459083557129e-06, 2.1746382117271423e-06, 2.3078173398971558e-06, 2.440996468067169e-06, 2.5741755962371826e-06, 2.707354724407196e-06, 2.8405338525772095e-06, 2.973712980747223e-06, 3.1068921089172363e-06, 3.2400712370872498e-06, 3.373250365257263e-06, 3.5064294934272766e-06, 3.63960862159729e-06, 3.7727877497673035e-06, 3.905966877937317e-06, 4.03914600610733e-06, 4.172325134277344e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 12.0, 14.0, 17.0, 33.0, 51.0, 85.0, 153.0, 319.0, 673.0, 1800.0, 4909.0, 18441.0, 132868.0, 773764.0, 93610.0, 14953.0, 4118.0, 1461.0, 644.0, 294.0, 129.0, 84.0, 39.0, 26.0, 27.0, 17.0, 8.0, 5.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.005886077880859375, -0.00562363862991333, -0.005361199378967285, -0.00509876012802124, -0.004836320877075195, -0.00457388162612915, -0.0043114423751831055, -0.0040490031242370605, -0.0037865638732910156, -0.0035241246223449707, -0.0032616853713989258, -0.002999246120452881, -0.002736806869506836, -0.002474367618560791, -0.002211928367614746, -0.0019494891166687012, -0.0016870498657226562, -0.0014246106147766113, -0.0011621713638305664, -0.0008997321128845215, -0.0006372928619384766, -0.00037485361099243164, -0.00011241436004638672, 0.0001500248908996582, 0.0004124641418457031, 0.000674903392791748, 0.000937342643737793, 0.0011997818946838379, 0.0014622211456298828, 0.0017246603965759277, 0.0019870996475219727, 0.0022495388984680176, 0.0025119781494140625, 0.0027744174003601074, 0.0030368566513061523, 0.0032992959022521973, 0.003561735153198242, 0.003824174404144287, 0.004086613655090332, 0.004349052906036377, 0.004611492156982422, 0.004873931407928467, 0.005136370658874512, 0.005398809909820557, 0.0056612491607666016, 0.0059236884117126465, 0.006186127662658691, 0.006448566913604736, 0.006711006164550781, 0.006973445415496826, 0.007235884666442871, 0.007498323917388916, 0.007760763168334961, 0.008023202419281006, 0.00828564167022705, 0.008548080921173096, 0.00881052017211914, 0.009072959423065186, 0.00933539867401123, 0.009597837924957275, 0.00986027717590332, 0.010122716426849365, 0.01038515567779541, 0.010647594928741455, 0.0109100341796875]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 2.0, 5.0, 5.0, 5.0, 4.0, 10.0, 8.0, 13.0, 16.0, 15.0, 23.0, 26.0, 42.0, 42.0, 49.0, 54.0, 79.0, 85.0, 76.0, 69.0, 57.0, 58.0, 38.0, 47.0, 31.0, 40.0, 23.0, 11.0, 6.0, 10.0, 6.0, 9.0, 4.0, 11.0, 4.0, 1.0, 5.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.00685882568359375, -0.0066416263580322266, -0.006424427032470703, -0.00620722770690918, -0.005990028381347656, -0.005772829055786133, -0.005555629730224609, -0.005338430404663086, -0.0051212310791015625, -0.004904031753540039, -0.004686832427978516, -0.004469633102416992, -0.004252433776855469, -0.004035234451293945, -0.003818035125732422, -0.0036008358001708984, -0.003383636474609375, -0.0031664371490478516, -0.002949237823486328, -0.0027320384979248047, -0.0025148391723632812, -0.002297639846801758, -0.0020804405212402344, -0.001863241195678711, -0.0016460418701171875, -0.001428842544555664, -0.0012116432189941406, -0.0009944438934326172, -0.0007772445678710938, -0.0005600452423095703, -0.0003428459167480469, -0.00012564659118652344, 9.1552734375e-05, 0.00030875205993652344, 0.0005259513854980469, 0.0007431507110595703, 0.0009603500366210938, 0.0011775493621826172, 0.0013947486877441406, 0.001611948013305664, 0.0018291473388671875, 0.002046346664428711, 0.0022635459899902344, 0.002480745315551758, 0.0026979446411132812, 0.0029151439666748047, 0.003132343292236328, 0.0033495426177978516, 0.003566741943359375, 0.0037839412689208984, 0.004001140594482422, 0.004218339920043945, 0.004435539245605469, 0.004652738571166992, 0.004869937896728516, 0.005087137222290039, 0.0053043365478515625, 0.005521535873413086, 0.005738735198974609, 0.005955934524536133, 0.006173133850097656, 0.00639033317565918, 0.006607532501220703, 0.0068247318267822266, 0.00704193115234375]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 5.0, 6.0, 8.0, 8.0, 15.0, 22.0, 37.0, 53.0, 78.0, 153.0, 215.0, 148.0, 90.0, 65.0, 41.0, 24.0, 3.0, 5.0, 6.0, 9.0, 5.0, 5.0, 0.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16871331632137299, -0.16212114691734314, -0.1555289924144745, -0.14893682301044464, -0.1423446536064148, -0.13575248420238495, -0.1291603147983551, -0.12256816029548645, -0.1159759908914566, -0.10938382148742676, -0.10279165953397751, -0.09619949758052826, -0.08960732817649841, -0.08301515877246857, -0.07642299681901932, -0.06983083486557007, -0.06323866546154022, -0.056646499782800674, -0.05005433410406113, -0.04346216842532158, -0.03687000274658203, -0.030277837067842484, -0.023685671389102936, -0.017093505710363388, -0.01050134003162384, -0.003909174352884293, 0.002682991325855255, 0.009275157004594803, 0.01586732268333435, 0.0224594883620739, 0.029051654040813446, 0.035643819719552994, 0.042236000299453735, 0.04882816597819328, 0.05542033165693283, 0.06201249733567238, 0.06860466301441193, 0.07519683241844177, 0.08178899437189102, 0.08838115632534027, 0.09497332572937012, 0.10156549513339996, 0.10815765708684921, 0.11474981904029846, 0.12134198844432831, 0.12793415784835815, 0.1345263123512268, 0.14111848175525665, 0.1477106511592865, 0.15430282056331635, 0.1608949899673462, 0.16748714447021484, 0.1740793138742447, 0.18067148327827454, 0.1872636377811432, 0.19385580718517303, 0.20044797658920288, 0.20704014599323273, 0.21363231539726257, 0.22022446990013123, 0.22681663930416107, 0.23340880870819092, 0.24000096321105957, 0.24659313261508942, 0.25318530201911926]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 7.0, 4.0, 6.0, 6.0, 8.0, 12.0, 12.0, 8.0, 20.0, 13.0, 24.0, 26.0, 24.0, 23.0, 32.0, 31.0, 35.0, 38.0, 42.0, 64.0, 64.0, 57.0, 52.0, 59.0, 36.0, 39.0, 38.0, 27.0, 30.0, 15.0, 22.0, 21.0, 16.0, 17.0, 14.0, 9.0, 8.0, 8.0, 8.0, 4.0, 5.0, 1.0, 8.0, 5.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.19665592908859253, -0.19061660766601562, -0.18457728624343872, -0.17853796482086182, -0.1724986433982849, -0.166459321975708, -0.1604200005531311, -0.1543806791305542, -0.1483413577079773, -0.1423020362854004, -0.1362627148628235, -0.13022339344024658, -0.12418407201766968, -0.11814475059509277, -0.11210542917251587, -0.10606610774993896, -0.10002678632736206, -0.09398746490478516, -0.08794814348220825, -0.08190882205963135, -0.07586950063705444, -0.06983017921447754, -0.06379085779190063, -0.05775153636932373, -0.051712214946746826, -0.04567289352416992, -0.03963357210159302, -0.03359425067901611, -0.02755492925643921, -0.021515607833862305, -0.0154762864112854, -0.009436964988708496, -0.003397643566131592, 0.0026416778564453125, 0.008680999279022217, 0.014720320701599121, 0.020759642124176025, 0.02679896354675293, 0.032838284969329834, 0.03887760639190674, 0.04491692781448364, 0.05095624923706055, 0.05699557065963745, 0.06303489208221436, 0.06907421350479126, 0.07511353492736816, 0.08115285634994507, 0.08719217777252197, 0.09323149919509888, 0.09927082061767578, 0.10531014204025269, 0.11134946346282959, 0.1173887848854065, 0.1234281063079834, 0.1294674277305603, 0.1355067491531372, 0.1415460705757141, 0.14758539199829102, 0.15362471342086792, 0.15966403484344482, 0.16570335626602173, 0.17174267768859863, 0.17778199911117554, 0.18382132053375244, 0.18986064195632935]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 3.0, 2.0, 5.0, 7.0, 7.0, 15.0, 13.0, 28.0, 44.0, 83.0, 152.0, 303.0, 669.0, 1457.0, 2886.0, 7144.0, 28636.0, 546464.0, 3423136.0, 159922.0, 16171.0, 4528.0, 1467.0, 533.0, 226.0, 107.0, 75.0, 39.0, 29.0, 28.0, 19.0, 17.0, 16.0, 14.0, 7.0, 4.0, 6.0, 0.0, 6.0, 6.0, 4.0, 0.0, 3.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01488494873046875, -0.01429736614227295, -0.013709783554077148, -0.013122200965881348, -0.012534618377685547, -0.011947035789489746, -0.011359453201293945, -0.010771870613098145, -0.010184288024902344, -0.009596705436706543, -0.009009122848510742, -0.008421540260314941, -0.00783395767211914, -0.00724637508392334, -0.006658792495727539, -0.006071209907531738, -0.0054836273193359375, -0.004896044731140137, -0.004308462142944336, -0.003720879554748535, -0.0031332969665527344, -0.0025457143783569336, -0.001958131790161133, -0.001370549201965332, -0.0007829666137695312, -0.00019538402557373047, 0.0003921985626220703, 0.000979781150817871, 0.0015673637390136719, 0.0021549463272094727, 0.0027425289154052734, 0.0033301115036010742, 0.003917694091796875, 0.004505276679992676, 0.0050928592681884766, 0.005680441856384277, 0.006268024444580078, 0.006855607032775879, 0.00744318962097168, 0.00803077220916748, 0.008618354797363281, 0.009205937385559082, 0.009793519973754883, 0.010381102561950684, 0.010968685150146484, 0.011556267738342285, 0.012143850326538086, 0.012731432914733887, 0.013319015502929688, 0.013906598091125488, 0.014494180679321289, 0.01508176326751709, 0.01566934585571289, 0.01625692844390869, 0.016844511032104492, 0.017432093620300293, 0.018019676208496094, 0.018607258796691895, 0.019194841384887695, 0.019782423973083496, 0.020370006561279297, 0.020957589149475098, 0.0215451717376709, 0.0221327543258667, 0.0227203369140625]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 5.0, 2.0, 11.0, 8.0, 14.0, 44.0, 64.0, 109.0, 118.0, 154.0, 149.0, 122.0, 95.0, 46.0, 26.0, 20.0, 10.0, 3.0, 3.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027557373046875, -0.026447772979736328, -0.025338172912597656, -0.024228572845458984, -0.023118972778320312, -0.02200937271118164, -0.02089977264404297, -0.019790172576904297, -0.018680572509765625, -0.017570972442626953, -0.01646137237548828, -0.01535177230834961, -0.014242172241210938, -0.013132572174072266, -0.012022972106933594, -0.010913372039794922, -0.00980377197265625, -0.008694171905517578, -0.007584571838378906, -0.006474971771240234, -0.0053653717041015625, -0.004255771636962891, -0.0031461715698242188, -0.002036571502685547, -0.000926971435546875, 0.00018262863159179688, 0.0012922286987304688, 0.0024018287658691406, 0.0035114288330078125, 0.004621028900146484, 0.005730628967285156, 0.006840229034423828, 0.0079498291015625, 0.009059429168701172, 0.010169029235839844, 0.011278629302978516, 0.012388229370117188, 0.01349782943725586, 0.014607429504394531, 0.015717029571533203, 0.016826629638671875, 0.017936229705810547, 0.01904582977294922, 0.02015542984008789, 0.021265029907226562, 0.022374629974365234, 0.023484230041503906, 0.024593830108642578, 0.02570343017578125, 0.026813030242919922, 0.027922630310058594, 0.029032230377197266, 0.030141830444335938, 0.03125143051147461, 0.03236103057861328, 0.03347063064575195, 0.034580230712890625, 0.0356898307800293, 0.03679943084716797, 0.03790903091430664, 0.03901863098144531, 0.040128231048583984, 0.041237831115722656, 0.04234743118286133, 0.04345703125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 10.0, 13.0, 11.0, 19.0, 27.0, 42.0, 73.0, 112.0, 213.0, 384.0, 754.0, 1566.0, 3971.0, 13317.0, 79198.0, 3487044.0, 566045.0, 30686.0, 6738.0, 2171.0, 757.0, 450.0, 240.0, 108.0, 119.0, 62.0, 53.0, 39.0, 19.0, 17.0, 14.0, 6.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.02337646484375, -0.022704362869262695, -0.02203226089477539, -0.021360158920288086, -0.02068805694580078, -0.020015954971313477, -0.019343852996826172, -0.018671751022338867, -0.017999649047851562, -0.017327547073364258, -0.016655445098876953, -0.01598334312438965, -0.015311241149902344, -0.014639139175415039, -0.013967037200927734, -0.01329493522644043, -0.012622833251953125, -0.01195073127746582, -0.011278629302978516, -0.010606527328491211, -0.009934425354003906, -0.009262323379516602, -0.008590221405029297, -0.007918119430541992, -0.0072460174560546875, -0.006573915481567383, -0.005901813507080078, -0.0052297115325927734, -0.004557609558105469, -0.003885507583618164, -0.0032134056091308594, -0.0025413036346435547, -0.00186920166015625, -0.0011970996856689453, -0.0005249977111816406, 0.00014710426330566406, 0.0008192062377929688, 0.0014913082122802734, 0.002163410186767578, 0.002835512161254883, 0.0035076141357421875, 0.004179716110229492, 0.004851818084716797, 0.0055239200592041016, 0.006196022033691406, 0.006868124008178711, 0.007540225982666016, 0.00821232795715332, 0.008884429931640625, 0.00955653190612793, 0.010228633880615234, 0.010900735855102539, 0.011572837829589844, 0.012244939804077148, 0.012917041778564453, 0.013589143753051758, 0.014261245727539062, 0.014933347702026367, 0.015605449676513672, 0.016277551651000977, 0.01694965362548828, 0.017621755599975586, 0.01829385757446289, 0.018965959548950195, 0.0196380615234375]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 4.0, 2.0, 4.0, 6.0, 13.0, 22.0, 22.0, 37.0, 101.0, 245.0, 1067.0, 1603.0, 638.0, 157.0, 57.0, 44.0, 21.0, 12.0, 8.0, 8.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03143310546875, -0.030323505401611328, -0.029213905334472656, -0.028104305267333984, -0.026994705200195312, -0.02588510513305664, -0.02477550506591797, -0.023665904998779297, -0.022556304931640625, -0.021446704864501953, -0.02033710479736328, -0.01922750473022461, -0.018117904663085938, -0.017008304595947266, -0.015898704528808594, -0.014789104461669922, -0.01367950439453125, -0.012569904327392578, -0.011460304260253906, -0.010350704193115234, -0.009241104125976562, -0.00813150405883789, -0.007021903991699219, -0.005912303924560547, -0.004802703857421875, -0.003693103790283203, -0.0025835037231445312, -0.0014739036560058594, -0.0003643035888671875, 0.0007452964782714844, 0.0018548965454101562, 0.002964496612548828, 0.0040740966796875, 0.005183696746826172, 0.006293296813964844, 0.007402896881103516, 0.008512496948242188, 0.00962209701538086, 0.010731697082519531, 0.011841297149658203, 0.012950897216796875, 0.014060497283935547, 0.015170097351074219, 0.01627969741821289, 0.017389297485351562, 0.018498897552490234, 0.019608497619628906, 0.020718097686767578, 0.02182769775390625, 0.022937297821044922, 0.024046897888183594, 0.025156497955322266, 0.026266098022460938, 0.02737569808959961, 0.02848529815673828, 0.029594898223876953, 0.030704498291015625, 0.0318140983581543, 0.03292369842529297, 0.03403329849243164, 0.03514289855957031, 0.036252498626708984, 0.037362098693847656, 0.03847169876098633, 0.039581298828125]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 6.0, 12.0, 34.0, 67.0, 147.0, 271.0, 257.0, 112.0, 46.0, 20.0, 8.0, 10.0, 9.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12224692851305008, -0.11609340459108353, -0.10993987321853638, -0.10378634929656982, -0.09763282537460327, -0.09147930145263672, -0.08532577753067017, -0.07917224615812302, -0.07301872223615646, -0.06686519831418991, -0.06071167066693306, -0.05455814301967621, -0.048404619097709656, -0.0422510951757431, -0.03609756752848625, -0.0299440398812294, -0.023790515959262848, -0.017636990174651146, -0.011483464390039444, -0.005329938605427742, 0.00082358717918396, 0.006977112963795662, 0.013130638748407364, 0.019284166395664215, 0.025437690317630768, 0.03159121423959732, 0.03774474188685417, 0.04389826953411102, 0.050051793456077576, 0.05620531737804413, 0.06235884502530098, 0.06851237267255783, 0.07466590404510498, 0.08081942796707153, 0.08697295188903809, 0.09312648326158524, 0.09928000718355179, 0.10543353110551834, 0.11158706247806549, 0.11774058640003204, 0.1238941103219986, 0.13004763424396515, 0.1362011581659317, 0.14235468208789825, 0.148508220911026, 0.15466174483299255, 0.1608152687549591, 0.16696879267692566, 0.1731223165988922, 0.17927584052085876, 0.18542936444282532, 0.19158288836479187, 0.19773641228675842, 0.20388995110988617, 0.21004347503185272, 0.21619699895381927, 0.22235052287578583, 0.22850404679775238, 0.23465757071971893, 0.24081109464168549, 0.24696463346481323, 0.2531181573867798, 0.25927168130874634, 0.2654252052307129, 0.27157872915267944]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 3.0, 3.0, 4.0, 5.0, 7.0, 7.0, 9.0, 10.0, 19.0, 16.0, 24.0, 28.0, 27.0, 25.0, 43.0, 40.0, 46.0, 54.0, 57.0, 54.0, 53.0, 45.0, 49.0, 39.0, 47.0, 47.0, 44.0, 39.0, 30.0, 24.0, 29.0, 13.0, 9.0, 16.0, 7.0, 6.0, 5.0, 5.0, 5.0, 3.0, 9.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08929884433746338, -0.08637005090713501, -0.08344125747680664, -0.08051246404647827, -0.0775836706161499, -0.07465486973524094, -0.07172607630491257, -0.0687972828745842, -0.06586848944425583, -0.06293969601392746, -0.06001090258359909, -0.05708210542798042, -0.054153311997652054, -0.051224518567323685, -0.04829572141170502, -0.04536692798137665, -0.04243813455104828, -0.03950934112071991, -0.03658054769039154, -0.03365175053477287, -0.030722957104444504, -0.027794163674116135, -0.024865368381142616, -0.021936573088169098, -0.01900777965784073, -0.01607898622751236, -0.013150190934538841, -0.010221396572887897, -0.007292602211236954, -0.00436380784958601, -0.0014350134879350662, 0.0014937818050384521, 0.004422575235366821, 0.007351369597017765, 0.010280163958668709, 0.013208958320319653, 0.016137752681970596, 0.019066546112298965, 0.021995341405272484, 0.024924136698246002, 0.02785293012857437, 0.03078172355890274, 0.03371051698923111, 0.03663931414484978, 0.039568107575178146, 0.042496901005506516, 0.04542569816112518, 0.04835449159145355, 0.05128328502178192, 0.05421207845211029, 0.05714087188243866, 0.06006966903805733, 0.0629984587430954, 0.06592725962400436, 0.06885605305433273, 0.0717848464846611, 0.07471363991498947, 0.07764243334531784, 0.08057122677564621, 0.08350002020597458, 0.08642882108688354, 0.08935761451721191, 0.09228640794754028, 0.09521520137786865, 0.09814399480819702]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 6.0, 5.0, 6.0, 12.0, 15.0, 25.0, 43.0, 71.0, 114.0, 186.0, 336.0, 778.0, 2033.0, 9620.0, 154105.0, 850156.0, 25056.0, 3744.0, 1097.0, 516.0, 252.0, 158.0, 88.0, 48.0, 29.0, 17.0, 17.0, 11.0, 2.0, 7.0, 1.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1409912109375, -0.13730239868164062, -0.13361358642578125, -0.12992477416992188, -0.1262359619140625, -0.12254714965820312, -0.11885833740234375, -0.11516952514648438, -0.111480712890625, -0.10779190063476562, -0.10410308837890625, -0.10041427612304688, -0.0967254638671875, -0.09303665161132812, -0.08934783935546875, -0.08565902709960938, -0.08197021484375, -0.07828140258789062, -0.07459259033203125, -0.07090377807617188, -0.0672149658203125, -0.06352615356445312, -0.05983734130859375, -0.056148529052734375, -0.052459716796875, -0.048770904541015625, -0.04508209228515625, -0.041393280029296875, -0.0377044677734375, -0.034015655517578125, -0.03032684326171875, -0.026638031005859375, -0.02294921875, -0.019260406494140625, -0.01557159423828125, -0.011882781982421875, -0.0081939697265625, -0.004505157470703125, -0.00081634521484375, 0.002872467041015625, 0.006561279296875, 0.010250091552734375, 0.01393890380859375, 0.017627716064453125, 0.0213165283203125, 0.025005340576171875, 0.02869415283203125, 0.032382965087890625, 0.03607177734375, 0.039760589599609375, 0.04344940185546875, 0.047138214111328125, 0.0508270263671875, 0.054515838623046875, 0.05820465087890625, 0.061893463134765625, 0.065582275390625, 0.06927108764648438, 0.07295989990234375, 0.07664871215820312, 0.0803375244140625, 0.08402633666992188, 0.08771514892578125, 0.09140396118164062, 0.0950927734375]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 6.0, 11.0, 8.0, 34.0, 68.0, 96.0, 122.0, 167.0, 140.0, 129.0, 101.0, 60.0, 24.0, 18.0, 5.0, 5.0, 1.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0278472900390625, -0.026731252670288086, -0.025615215301513672, -0.024499177932739258, -0.023383140563964844, -0.02226710319519043, -0.021151065826416016, -0.0200350284576416, -0.018918991088867188, -0.017802953720092773, -0.01668691635131836, -0.015570878982543945, -0.014454841613769531, -0.013338804244995117, -0.012222766876220703, -0.011106729507446289, -0.009990692138671875, -0.008874654769897461, -0.007758617401123047, -0.006642580032348633, -0.005526542663574219, -0.004410505294799805, -0.0032944679260253906, -0.0021784305572509766, -0.0010623931884765625, 5.364418029785156e-05, 0.0011696815490722656, 0.0022857189178466797, 0.0034017562866210938, 0.004517793655395508, 0.005633831024169922, 0.006749868392944336, 0.00786590576171875, 0.008981943130493164, 0.010097980499267578, 0.011214017868041992, 0.012330055236816406, 0.01344609260559082, 0.014562129974365234, 0.01567816734313965, 0.016794204711914062, 0.017910242080688477, 0.01902627944946289, 0.020142316818237305, 0.02125835418701172, 0.022374391555786133, 0.023490428924560547, 0.02460646629333496, 0.025722503662109375, 0.02683854103088379, 0.027954578399658203, 0.029070615768432617, 0.03018665313720703, 0.031302690505981445, 0.03241872787475586, 0.03353476524353027, 0.03465080261230469, 0.0357668399810791, 0.036882877349853516, 0.03799891471862793, 0.039114952087402344, 0.04023098945617676, 0.04134702682495117, 0.042463064193725586, 0.0435791015625]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0, 1.0, 3.0, 3.0, 1.0, 9.0, 5.0, 9.0, 18.0, 18.0, 33.0, 40.0, 77.0, 132.0, 180.0, 278.0, 551.0, 900.0, 1860.0, 4096.0, 10771.0, 35739.0, 188701.0, 664164.0, 103312.0, 23337.0, 7687.0, 3222.0, 1539.0, 802.0, 415.0, 261.0, 137.0, 77.0, 60.0, 42.0, 16.0, 15.0, 9.0, 8.0, 6.0, 8.0, 4.0, 2.0, 1.0, 1.0, 5.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0], "bins": [-0.035980224609375, -0.03479433059692383, -0.033608436584472656, -0.032422542572021484, -0.031236648559570312, -0.03005075454711914, -0.02886486053466797, -0.027678966522216797, -0.026493072509765625, -0.025307178497314453, -0.02412128448486328, -0.02293539047241211, -0.021749496459960938, -0.020563602447509766, -0.019377708435058594, -0.018191814422607422, -0.01700592041015625, -0.015820026397705078, -0.014634132385253906, -0.013448238372802734, -0.012262344360351562, -0.01107645034790039, -0.009890556335449219, -0.008704662322998047, -0.007518768310546875, -0.006332874298095703, -0.005146980285644531, -0.003961086273193359, -0.0027751922607421875, -0.0015892982482910156, -0.00040340423583984375, 0.0007824897766113281, 0.0019683837890625, 0.003154277801513672, 0.004340171813964844, 0.005526065826416016, 0.0067119598388671875, 0.00789785385131836, 0.009083747863769531, 0.010269641876220703, 0.011455535888671875, 0.012641429901123047, 0.013827323913574219, 0.01501321792602539, 0.016199111938476562, 0.017385005950927734, 0.018570899963378906, 0.019756793975830078, 0.02094268798828125, 0.022128582000732422, 0.023314476013183594, 0.024500370025634766, 0.025686264038085938, 0.02687215805053711, 0.02805805206298828, 0.029243946075439453, 0.030429840087890625, 0.0316157341003418, 0.03280162811279297, 0.03398752212524414, 0.03517341613769531, 0.036359310150146484, 0.037545204162597656, 0.03873109817504883, 0.0399169921875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 5.0, 11.0, 7.0, 18.0, 12.0, 17.0, 27.0, 28.0, 46.0, 50.0, 60.0, 60.0, 80.0, 78.0, 84.0, 69.0, 64.0, 57.0, 57.0, 38.0, 40.0, 28.0, 18.0, 13.0, 11.0, 9.0, 7.0, 5.0, 5.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06280517578125, -0.06019020080566406, -0.057575225830078125, -0.05496025085449219, -0.05234527587890625, -0.04973030090332031, -0.047115325927734375, -0.04450035095214844, -0.0418853759765625, -0.03927040100097656, -0.036655426025390625, -0.03404045104980469, -0.03142547607421875, -0.028810501098632812, -0.026195526123046875, -0.023580551147460938, -0.020965576171875, -0.018350601196289062, -0.015735626220703125, -0.013120651245117188, -0.01050567626953125, -0.007890701293945312, -0.005275726318359375, -0.0026607513427734375, -4.57763671875e-05, 0.0025691986083984375, 0.005184173583984375, 0.0077991485595703125, 0.01041412353515625, 0.013029098510742188, 0.015644073486328125, 0.018259048461914062, 0.0208740234375, 0.023488998413085938, 0.026103973388671875, 0.028718948364257812, 0.03133392333984375, 0.03394889831542969, 0.036563873291015625, 0.03917884826660156, 0.0417938232421875, 0.04440879821777344, 0.047023773193359375, 0.04963874816894531, 0.05225372314453125, 0.05486869812011719, 0.057483673095703125, 0.06009864807128906, 0.062713623046875, 0.06532859802246094, 0.06794357299804688, 0.07055854797363281, 0.07317352294921875, 0.07578849792480469, 0.07840347290039062, 0.08101844787597656, 0.0836334228515625, 0.08624839782714844, 0.08886337280273438, 0.09147834777832031, 0.09409332275390625, 0.09670829772949219, 0.09932327270507812, 0.10193824768066406, 0.10455322265625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 0.0, 6.0, 4.0, 4.0, 12.0, 27.0, 24.0, 29.0, 53.0, 74.0, 120.0, 173.0, 304.0, 569.0, 1269.0, 3071.0, 9230.0, 42469.0, 560627.0, 377462.0, 38777.0, 8698.0, 2909.0, 1231.0, 577.0, 309.0, 193.0, 125.0, 78.0, 41.0, 26.0, 21.0, 13.0, 14.0, 2.0, 6.0, 5.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0092315673828125, -0.008933544158935547, -0.008635520935058594, -0.00833749771118164, -0.008039474487304688, -0.007741451263427734, -0.007443428039550781, -0.007145404815673828, -0.006847381591796875, -0.006549358367919922, -0.006251335144042969, -0.005953311920166016, -0.0056552886962890625, -0.005357265472412109, -0.005059242248535156, -0.004761219024658203, -0.00446319580078125, -0.004165172576904297, -0.0038671493530273438, -0.0035691261291503906, -0.0032711029052734375, -0.0029730796813964844, -0.0026750564575195312, -0.002377033233642578, -0.002079010009765625, -0.0017809867858886719, -0.0014829635620117188, -0.0011849403381347656, -0.0008869171142578125, -0.0005888938903808594, -0.00029087066650390625, 7.152557373046875e-06, 0.00030517578125, 0.0006031990051269531, 0.0009012222290039062, 0.0011992454528808594, 0.0014972686767578125, 0.0017952919006347656, 0.0020933151245117188, 0.002391338348388672, 0.002689361572265625, 0.002987384796142578, 0.0032854080200195312, 0.0035834312438964844, 0.0038814544677734375, 0.004179477691650391, 0.004477500915527344, 0.004775524139404297, 0.00507354736328125, 0.005371570587158203, 0.005669593811035156, 0.005967617034912109, 0.0062656402587890625, 0.006563663482666016, 0.006861686706542969, 0.007159709930419922, 0.007457733154296875, 0.007755756378173828, 0.008053779602050781, 0.008351802825927734, 0.008649826049804688, 0.00894784927368164, 0.009245872497558594, 0.009543895721435547, 0.0098419189453125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 5.0, 4.0, 6.0, 12.0, 3.0, 17.0, 16.0, 19.0, 28.0, 21.0, 31.0, 39.0, 42.0, 64.0, 36.0, 47.0, 68.0, 92.0, 77.0, 41.0, 63.0, 46.0, 43.0, 33.0, 24.0, 28.0, 23.0, 21.0, 12.0, 5.0, 6.0, 7.0, 3.0, 6.0, 1.0, 5.0, 2.0, 1.0, 5.0, 0.0, 2.0, 1.0, 0.0, 3.0], "bins": [-6.198883056640625e-06, -6.032176315784454e-06, -5.865469574928284e-06, -5.698762834072113e-06, -5.532056093215942e-06, -5.365349352359772e-06, -5.198642611503601e-06, -5.03193587064743e-06, -4.86522912979126e-06, -4.698522388935089e-06, -4.5318156480789185e-06, -4.365108907222748e-06, -4.198402166366577e-06, -4.0316954255104065e-06, -3.864988684654236e-06, -3.698281943798065e-06, -3.5315752029418945e-06, -3.364868462085724e-06, -3.1981617212295532e-06, -3.0314549803733826e-06, -2.864748239517212e-06, -2.6980414986610413e-06, -2.5313347578048706e-06, -2.3646280169487e-06, -2.1979212760925293e-06, -2.0312145352363586e-06, -1.864507794380188e-06, -1.6978010535240173e-06, -1.5310943126678467e-06, -1.364387571811676e-06, -1.1976808309555054e-06, -1.0309740900993347e-06, -8.642673492431641e-07, -6.975606083869934e-07, -5.308538675308228e-07, -3.641471266746521e-07, -1.9744038581848145e-07, -3.073364496231079e-08, 1.3597309589385986e-07, 3.026798367500305e-07, 4.6938657760620117e-07, 6.360933184623718e-07, 8.028000593185425e-07, 9.695068001747131e-07, 1.1362135410308838e-06, 1.3029202818870544e-06, 1.469627022743225e-06, 1.6363337635993958e-06, 1.8030405044555664e-06, 1.969747245311737e-06, 2.1364539861679077e-06, 2.3031607270240784e-06, 2.469867467880249e-06, 2.6365742087364197e-06, 2.8032809495925903e-06, 2.969987690448761e-06, 3.1366944313049316e-06, 3.3034011721611023e-06, 3.470107913017273e-06, 3.6368146538734436e-06, 3.8035213947296143e-06, 3.970228135585785e-06, 4.1369348764419556e-06, 4.303641617298126e-06, 4.470348358154297e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 5.0, 7.0, 8.0, 11.0, 14.0, 33.0, 33.0, 44.0, 90.0, 150.0, 231.0, 483.0, 991.0, 2277.0, 6890.0, 29159.0, 255314.0, 678232.0, 57647.0, 10895.0, 3347.0, 1327.0, 583.0, 299.0, 172.0, 108.0, 59.0, 46.0, 34.0, 14.0, 12.0, 12.0, 5.0, 8.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0100860595703125, -0.009807288646697998, -0.009528517723083496, -0.009249746799468994, -0.008970975875854492, -0.00869220495223999, -0.008413434028625488, -0.008134663105010986, -0.007855892181396484, -0.007577121257781982, -0.0072983503341674805, -0.0070195794105529785, -0.0067408084869384766, -0.006462037563323975, -0.006183266639709473, -0.005904495716094971, -0.005625724792480469, -0.005346953868865967, -0.005068182945251465, -0.004789412021636963, -0.004510641098022461, -0.004231870174407959, -0.003953099250793457, -0.003674328327178955, -0.003395557403564453, -0.003116786479949951, -0.0028380155563354492, -0.0025592446327209473, -0.0022804737091064453, -0.0020017027854919434, -0.0017229318618774414, -0.0014441609382629395, -0.0011653900146484375, -0.0008866190910339355, -0.0006078481674194336, -0.00032907724380493164, -5.030632019042969e-05, 0.00022846460342407227, 0.0005072355270385742, 0.0007860064506530762, 0.0010647773742675781, 0.00134354829788208, 0.001622319221496582, 0.001901090145111084, 0.002179861068725586, 0.002458631992340088, 0.00273740291595459, 0.003016173839569092, 0.0032949447631835938, 0.0035737156867980957, 0.0038524866104125977, 0.0041312575340271, 0.0044100284576416016, 0.0046887993812561035, 0.0049675703048706055, 0.005246341228485107, 0.005525112152099609, 0.005803883075714111, 0.006082653999328613, 0.006361424922943115, 0.006640195846557617, 0.006918966770172119, 0.007197737693786621, 0.007476508617401123, 0.007755279541015625]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 4.0, 6.0, 5.0, 9.0, 5.0, 11.0, 18.0, 27.0, 22.0, 54.0, 83.0, 95.0, 148.0, 125.0, 118.0, 83.0, 57.0, 43.0, 27.0, 15.0, 17.0, 7.0, 7.0, 7.0, 6.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.016326904296875, -0.015849828720092773, -0.015372753143310547, -0.01489567756652832, -0.014418601989746094, -0.013941526412963867, -0.01346445083618164, -0.012987375259399414, -0.012510299682617188, -0.012033224105834961, -0.011556148529052734, -0.011079072952270508, -0.010601997375488281, -0.010124921798706055, -0.009647846221923828, -0.009170770645141602, -0.008693695068359375, -0.008216619491577148, -0.007739543914794922, -0.007262468338012695, -0.006785392761230469, -0.006308317184448242, -0.005831241607666016, -0.005354166030883789, -0.0048770904541015625, -0.004400014877319336, -0.003922939300537109, -0.003445863723754883, -0.0029687881469726562, -0.0024917125701904297, -0.002014636993408203, -0.0015375614166259766, -0.00106048583984375, -0.0005834102630615234, -0.00010633468627929688, 0.0003707408905029297, 0.0008478164672851562, 0.0013248920440673828, 0.0018019676208496094, 0.002279043197631836, 0.0027561187744140625, 0.003233194351196289, 0.0037102699279785156, 0.004187345504760742, 0.004664421081542969, 0.005141496658325195, 0.005618572235107422, 0.0060956478118896484, 0.006572723388671875, 0.0070497989654541016, 0.007526874542236328, 0.008003950119018555, 0.008481025695800781, 0.008958101272583008, 0.009435176849365234, 0.009912252426147461, 0.010389328002929688, 0.010866403579711914, 0.01134347915649414, 0.011820554733276367, 0.012297630310058594, 0.01277470588684082, 0.013251781463623047, 0.013728857040405273, 0.0142059326171875]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 3.0, 3.0, 8.0, 10.0, 16.0, 25.0, 48.0, 82.0, 168.0, 304.0, 172.0, 89.0, 29.0, 16.0, 7.0, 8.0, 3.0, 6.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.36765825748443604, -0.3583710491657257, -0.34908387064933777, -0.33979666233062744, -0.3305094540119171, -0.3212222754955292, -0.31193506717681885, -0.3026478886604309, -0.2933606803417206, -0.28407347202301025, -0.2747862935066223, -0.265499085187912, -0.25621187686920166, -0.24692469835281372, -0.2376374900341034, -0.22835029661655426, -0.21906308829784393, -0.2097758948802948, -0.20048868656158447, -0.19120149314403534, -0.1819142997264862, -0.17262709140777588, -0.16333989799022675, -0.1540527045726776, -0.14476549625396729, -0.13547830283641815, -0.12619109451770782, -0.11690390110015869, -0.10761670768260956, -0.09832950681447983, -0.0890423059463501, -0.07975511252880096, -0.07046791911125183, -0.0611807219684124, -0.05189352482557297, -0.04260632395744324, -0.033319126814603806, -0.024031929671764374, -0.014744728803634644, -0.00545753538608551, 0.00382966548204422, 0.013116863556206226, 0.022404061630368233, 0.031691260635852814, 0.040978457778692245, 0.05026565492153168, 0.05955285578966141, 0.06884004920721054, 0.07812725007534027, 0.08741445094347, 0.09670164436101913, 0.10598884522914886, 0.115276038646698, 0.12456323951482773, 0.13385044038295746, 0.1431376338005066, 0.15242484211921692, 0.16171203553676605, 0.17099924385547638, 0.1802864372730255, 0.18957363069057465, 0.19886082410812378, 0.2081480324268341, 0.21743522584438324, 0.22672241926193237]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 9.0, 6.0, 6.0, 4.0, 10.0, 11.0, 24.0, 15.0, 23.0, 22.0, 29.0, 16.0, 30.0, 27.0, 41.0, 41.0, 59.0, 64.0, 68.0, 85.0, 71.0, 40.0, 40.0, 39.0, 27.0, 23.0, 27.0, 20.0, 22.0, 15.0, 15.0, 4.0, 16.0, 18.0, 6.0, 7.0, 9.0, 4.0, 3.0, 3.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.23183870315551758, -0.22514070570468903, -0.21844270825386047, -0.21174471080303192, -0.20504671335220337, -0.19834871590137482, -0.19165071845054626, -0.1849527359008789, -0.17825472354888916, -0.1715567260980606, -0.16485872864723206, -0.1581607311964035, -0.15146273374557495, -0.1447647362947464, -0.13806673884391785, -0.1313687562942505, -0.12467075884342194, -0.11797276139259338, -0.11127476394176483, -0.10457676649093628, -0.09787876904010773, -0.09118077158927917, -0.08448278158903122, -0.07778478413820267, -0.07108678668737411, -0.06438878923654556, -0.05769079178571701, -0.05099279806017876, -0.044294800609350204, -0.03759680315852165, -0.0308988094329834, -0.024200811982154846, -0.017502814531326294, -0.010804818011820316, -0.004106821492314339, 0.0025911740958690643, 0.009289171546697617, 0.01598716899752617, 0.022685162723064423, 0.029383160173892975, 0.03608115762472153, 0.04277915507555008, 0.04947715252637863, 0.056175146251916885, 0.06287313997745514, 0.06957113742828369, 0.07626913487911224, 0.0829671323299408, 0.08966512978076935, 0.0963631272315979, 0.10306112468242645, 0.109759122133255, 0.11645711958408356, 0.12315511703491211, 0.12985309958457947, 0.1365511119365692, 0.14324909448623657, 0.14994709193706512, 0.15664508938789368, 0.16334308683872223, 0.17004108428955078, 0.17673908174037933, 0.18343707919120789, 0.19013506174087524, 0.196833074092865]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 2.0, 6.0, 2.0, 9.0, 9.0, 12.0, 20.0, 24.0, 51.0, 89.0, 136.0, 213.0, 685.0, 3351.0, 32616.0, 4066420.0, 86730.0, 2783.0, 446.0, 271.0, 144.0, 87.0, 52.0, 36.0, 30.0, 14.0, 12.0, 9.0, 2.0, 2.0, 3.0, 3.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.05047607421875, -0.04887819290161133, -0.047280311584472656, -0.045682430267333984, -0.04408454895019531, -0.04248666763305664, -0.04088878631591797, -0.0392909049987793, -0.037693023681640625, -0.03609514236450195, -0.03449726104736328, -0.03289937973022461, -0.03130149841308594, -0.029703617095947266, -0.028105735778808594, -0.026507854461669922, -0.02490997314453125, -0.023312091827392578, -0.021714210510253906, -0.020116329193115234, -0.018518447875976562, -0.01692056655883789, -0.015322685241699219, -0.013724803924560547, -0.012126922607421875, -0.010529041290283203, -0.008931159973144531, -0.007333278656005859, -0.0057353973388671875, -0.004137516021728516, -0.0025396347045898438, -0.0009417533874511719, 0.0006561279296875, 0.002254009246826172, 0.0038518905639648438, 0.005449771881103516, 0.0070476531982421875, 0.00864553451538086, 0.010243415832519531, 0.011841297149658203, 0.013439178466796875, 0.015037059783935547, 0.01663494110107422, 0.01823282241821289, 0.019830703735351562, 0.021428585052490234, 0.023026466369628906, 0.024624347686767578, 0.02622222900390625, 0.027820110321044922, 0.029417991638183594, 0.031015872955322266, 0.03261375427246094, 0.03421163558959961, 0.03580951690673828, 0.03740739822387695, 0.039005279541015625, 0.0406031608581543, 0.04220104217529297, 0.04379892349243164, 0.04539680480957031, 0.046994686126708984, 0.048592567443847656, 0.05019044876098633, 0.051788330078125]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 10.0, 15.0, 18.0, 30.0, 64.0, 104.0, 114.0, 139.0, 146.0, 131.0, 92.0, 58.0, 40.0, 18.0, 7.0, 7.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0276641845703125, -0.026555299758911133, -0.025446414947509766, -0.0243375301361084, -0.02322864532470703, -0.022119760513305664, -0.021010875701904297, -0.01990199089050293, -0.018793106079101562, -0.017684221267700195, -0.016575336456298828, -0.015466451644897461, -0.014357566833496094, -0.013248682022094727, -0.01213979721069336, -0.011030912399291992, -0.009922027587890625, -0.008813142776489258, -0.007704257965087891, -0.0065953731536865234, -0.005486488342285156, -0.004377603530883789, -0.003268718719482422, -0.0021598339080810547, -0.0010509490966796875, 5.793571472167969e-05, 0.0011668205261230469, 0.002275705337524414, 0.0033845901489257812, 0.0044934749603271484, 0.005602359771728516, 0.006711244583129883, 0.00782012939453125, 0.008929014205932617, 0.010037899017333984, 0.011146783828735352, 0.012255668640136719, 0.013364553451538086, 0.014473438262939453, 0.01558232307434082, 0.016691207885742188, 0.017800092697143555, 0.018908977508544922, 0.02001786231994629, 0.021126747131347656, 0.022235631942749023, 0.02334451675415039, 0.024453401565551758, 0.025562286376953125, 0.026671171188354492, 0.02778005599975586, 0.028888940811157227, 0.029997825622558594, 0.03110671043395996, 0.03221559524536133, 0.033324480056762695, 0.03443336486816406, 0.03554224967956543, 0.0366511344909668, 0.037760019302368164, 0.03886890411376953, 0.0399777889251709, 0.041086673736572266, 0.04219555854797363, 0.043304443359375]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 7.0, 12.0, 15.0, 29.0, 31.0, 45.0, 81.0, 124.0, 241.0, 465.0, 941.0, 2156.0, 6243.0, 30510.0, 2069113.0, 2043854.0, 30795.0, 5934.0, 2024.0, 813.0, 375.0, 195.0, 109.0, 62.0, 30.0, 26.0, 19.0, 8.0, 4.0, 4.0, 3.0, 7.0, 4.0, 0.0, 2.0, 5.0, 1.0, 0.0, 1.0], "bins": [-0.03631591796875, -0.03540802001953125, -0.0345001220703125, -0.03359222412109375, -0.032684326171875, -0.03177642822265625, -0.0308685302734375, -0.02996063232421875, -0.029052734375, -0.02814483642578125, -0.0272369384765625, -0.02632904052734375, -0.025421142578125, -0.02451324462890625, -0.0236053466796875, -0.02269744873046875, -0.02178955078125, -0.02088165283203125, -0.0199737548828125, -0.01906585693359375, -0.018157958984375, -0.01725006103515625, -0.0163421630859375, -0.01543426513671875, -0.0145263671875, -0.01361846923828125, -0.0127105712890625, -0.01180267333984375, -0.010894775390625, -0.00998687744140625, -0.0090789794921875, -0.00817108154296875, -0.00726318359375, -0.00635528564453125, -0.0054473876953125, -0.00453948974609375, -0.003631591796875, -0.00272369384765625, -0.0018157958984375, -0.00090789794921875, 0.0, 0.00090789794921875, 0.0018157958984375, 0.00272369384765625, 0.003631591796875, 0.00453948974609375, 0.0054473876953125, 0.00635528564453125, 0.00726318359375, 0.00817108154296875, 0.0090789794921875, 0.00998687744140625, 0.010894775390625, 0.01180267333984375, 0.0127105712890625, 0.01361846923828125, 0.0145263671875, 0.01543426513671875, 0.0163421630859375, 0.01725006103515625, 0.018157958984375, 0.01906585693359375, 0.0199737548828125, 0.02088165283203125, 0.02178955078125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 2.0, 4.0, 2.0, 4.0, 13.0, 17.0, 28.0, 55.0, 118.0, 452.0, 1521.0, 1276.0, 373.0, 92.0, 56.0, 18.0, 18.0, 9.0, 6.0, 5.0, 0.0, 2.0, 2.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.047332763671875, -0.04593849182128906, -0.044544219970703125, -0.04314994812011719, -0.04175567626953125, -0.04036140441894531, -0.038967132568359375, -0.03757286071777344, -0.0361785888671875, -0.03478431701660156, -0.033390045166015625, -0.03199577331542969, -0.03060150146484375, -0.029207229614257812, -0.027812957763671875, -0.026418685913085938, -0.0250244140625, -0.023630142211914062, -0.022235870361328125, -0.020841598510742188, -0.01944732666015625, -0.018053054809570312, -0.016658782958984375, -0.015264511108398438, -0.0138702392578125, -0.012475967407226562, -0.011081695556640625, -0.009687423706054688, -0.00829315185546875, -0.0068988800048828125, -0.005504608154296875, -0.0041103363037109375, -0.002716064453125, -0.0013217926025390625, 7.2479248046875e-05, 0.0014667510986328125, 0.00286102294921875, 0.0042552947998046875, 0.005649566650390625, 0.0070438385009765625, 0.0084381103515625, 0.009832382202148438, 0.011226654052734375, 0.012620925903320312, 0.01401519775390625, 0.015409469604492188, 0.016803741455078125, 0.018198013305664062, 0.01959228515625, 0.020986557006835938, 0.022380828857421875, 0.023775100708007812, 0.02516937255859375, 0.026563644409179688, 0.027957916259765625, 0.029352188110351562, 0.0307464599609375, 0.03214073181152344, 0.033535003662109375, 0.03492927551269531, 0.03632354736328125, 0.03771781921386719, 0.039112091064453125, 0.04050636291503906, 0.041900634765625]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 2.0, 15.0, 20.0, 74.0, 238.0, 410.0, 185.0, 33.0, 13.0, 6.0, 4.0, 2.0, 1.0, 0.0, 1.0, 3.0], "bins": [-0.5749598741531372, -0.5641900897026062, -0.5534203052520752, -0.5426504611968994, -0.5318806767463684, -0.5211108922958374, -0.5103411078453064, -0.499571293592453, -0.488801509141922, -0.478031724691391, -0.4672619104385376, -0.4564921259880066, -0.4457223117351532, -0.4349525272846222, -0.4241827130317688, -0.4134129285812378, -0.4026431441307068, -0.3918733596801758, -0.3811035454273224, -0.3703337609767914, -0.359563946723938, -0.348794162273407, -0.338024377822876, -0.3272545635700226, -0.3164847493171692, -0.3057149648666382, -0.2949451506137848, -0.2841753661632538, -0.2734055519104004, -0.2626357674598694, -0.2518659830093384, -0.24109616875648499, -0.2303263545036316, -0.2195565551519394, -0.2087867558002472, -0.1980169713497162, -0.187247171998024, -0.1764773726463318, -0.1657075732946396, -0.1549377739429474, -0.14416798949241638, -0.13339819014072418, -0.12262839823961258, -0.11185859888792038, -0.10108880698680878, -0.09031900763511658, -0.07954920828342438, -0.06877941638231277, -0.058009617030620575, -0.047239821404218674, -0.03647002577781677, -0.025700226426124573, -0.014930430799722672, -0.00416063517332077, 0.0066091641783714294, 0.017378956079483032, 0.028148755431175232, 0.03891855105757713, 0.049688346683979034, 0.060458146035671234, 0.07122793793678284, 0.08199773728847504, 0.09276753664016724, 0.10353732854127884, 0.11430712789297104]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 3.0, 3.0, 4.0, 7.0, 5.0, 8.0, 16.0, 18.0, 25.0, 39.0, 43.0, 46.0, 50.0, 66.0, 53.0, 59.0, 76.0, 51.0, 65.0, 58.0, 52.0, 40.0, 45.0, 35.0, 26.0, 21.0, 19.0, 19.0, 18.0, 10.0, 11.0, 2.0, 7.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14113837480545044, -0.13661572337150574, -0.13209307193756104, -0.12757042050361633, -0.12304777652025223, -0.11852512508630753, -0.11400248110294342, -0.10947982966899872, -0.10495717823505402, -0.10043452680110931, -0.09591187536716461, -0.0913892313838005, -0.0868665799498558, -0.0823439285159111, -0.077821284532547, -0.0732986330986023, -0.06877598166465759, -0.06425333023071289, -0.05973068252205849, -0.05520803481340408, -0.05068538337945938, -0.04616273194551468, -0.041640084236860275, -0.03711743652820587, -0.03259478509426117, -0.028072135522961617, -0.023549485951662064, -0.01902683638036251, -0.014504186809062958, -0.009981537237763405, -0.005458887666463852, -0.0009362399578094482, 0.003586411476135254, 0.008109061047434807, 0.01263171061873436, 0.017154360190033913, 0.021677009761333466, 0.02619965933263302, 0.03072230890393257, 0.035244956612586975, 0.03976760804653168, 0.04429025948047638, 0.04881290718913078, 0.05333555489778519, 0.05785820633172989, 0.06238085776567459, 0.0669035017490387, 0.0714261531829834, 0.0759488046169281, 0.0804714560508728, 0.0849941074848175, 0.08951675146818161, 0.09403940290212631, 0.09856205433607101, 0.10308469831943512, 0.10760734975337982, 0.11213000118732452, 0.11665265262126923, 0.12117530405521393, 0.12569795548915863, 0.13022059202194214, 0.13474324345588684, 0.13926589488983154, 0.14378854632377625, 0.14831119775772095]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 2.0, 3.0, 4.0, 7.0, 14.0, 17.0, 21.0, 22.0, 43.0, 51.0, 77.0, 131.0, 152.0, 276.0, 437.0, 897.0, 2020.0, 5823.0, 26652.0, 213048.0, 669594.0, 105895.0, 15950.0, 4073.0, 1505.0, 696.0, 404.0, 254.0, 167.0, 107.0, 73.0, 41.0, 46.0, 21.0, 6.0, 12.0, 10.0, 2.0, 2.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0304107666015625, -0.029256582260131836, -0.028102397918701172, -0.026948213577270508, -0.025794029235839844, -0.02463984489440918, -0.023485660552978516, -0.02233147621154785, -0.021177291870117188, -0.020023107528686523, -0.01886892318725586, -0.017714738845825195, -0.01656055450439453, -0.015406370162963867, -0.014252185821533203, -0.013098001480102539, -0.011943817138671875, -0.010789632797241211, -0.009635448455810547, -0.008481264114379883, -0.007327079772949219, -0.006172895431518555, -0.005018711090087891, -0.0038645267486572266, -0.0027103424072265625, -0.0015561580657958984, -0.0004019737243652344, 0.0007522106170654297, 0.0019063949584960938, 0.003060579299926758, 0.004214763641357422, 0.005368947982788086, 0.00652313232421875, 0.007677316665649414, 0.008831501007080078, 0.009985685348510742, 0.011139869689941406, 0.01229405403137207, 0.013448238372802734, 0.014602422714233398, 0.015756607055664062, 0.016910791397094727, 0.01806497573852539, 0.019219160079956055, 0.02037334442138672, 0.021527528762817383, 0.022681713104248047, 0.02383589744567871, 0.024990081787109375, 0.02614426612854004, 0.027298450469970703, 0.028452634811401367, 0.02960681915283203, 0.030761003494262695, 0.03191518783569336, 0.03306937217712402, 0.03422355651855469, 0.03537774085998535, 0.036531925201416016, 0.03768610954284668, 0.038840293884277344, 0.03999447822570801, 0.04114866256713867, 0.042302846908569336, 0.04345703125]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 2.0, 8.0, 7.0, 16.0, 35.0, 42.0, 94.0, 118.0, 140.0, 145.0, 121.0, 101.0, 81.0, 50.0, 19.0, 8.0, 5.0, 9.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028167724609375, -0.0270538330078125, -0.02593994140625, -0.0248260498046875, -0.023712158203125, -0.0225982666015625, -0.021484375, -0.0203704833984375, -0.019256591796875, -0.0181427001953125, -0.01702880859375, -0.0159149169921875, -0.014801025390625, -0.0136871337890625, -0.0125732421875, -0.0114593505859375, -0.010345458984375, -0.0092315673828125, -0.00811767578125, -0.0070037841796875, -0.005889892578125, -0.0047760009765625, -0.003662109375, -0.0025482177734375, -0.001434326171875, -0.0003204345703125, 0.00079345703125, 0.0019073486328125, 0.003021240234375, 0.0041351318359375, 0.0052490234375, 0.0063629150390625, 0.007476806640625, 0.0085906982421875, 0.00970458984375, 0.0108184814453125, 0.011932373046875, 0.0130462646484375, 0.01416015625, 0.0152740478515625, 0.016387939453125, 0.0175018310546875, 0.01861572265625, 0.0197296142578125, 0.020843505859375, 0.0219573974609375, 0.0230712890625, 0.0241851806640625, 0.025299072265625, 0.0264129638671875, 0.02752685546875, 0.0286407470703125, 0.029754638671875, 0.0308685302734375, 0.031982421875, 0.0330963134765625, 0.034210205078125, 0.0353240966796875, 0.03643798828125, 0.0375518798828125, 0.038665771484375, 0.0397796630859375, 0.0408935546875, 0.0420074462890625, 0.043121337890625]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 14.0, 19.0, 42.0, 51.0, 86.0, 204.0, 404.0, 953.0, 3216.0, 17554.0, 206862.0, 760942.0, 48786.0, 6639.0, 1649.0, 599.0, 230.0, 151.0, 62.0, 40.0, 22.0, 11.0, 10.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.04742431640625, -0.04580211639404297, -0.04417991638183594, -0.042557716369628906, -0.040935516357421875, -0.039313316345214844, -0.03769111633300781, -0.03606891632080078, -0.03444671630859375, -0.03282451629638672, -0.031202316284179688, -0.029580116271972656, -0.027957916259765625, -0.026335716247558594, -0.024713516235351562, -0.02309131622314453, -0.0214691162109375, -0.01984691619873047, -0.018224716186523438, -0.016602516174316406, -0.014980316162109375, -0.013358116149902344, -0.011735916137695312, -0.010113716125488281, -0.00849151611328125, -0.006869316101074219, -0.0052471160888671875, -0.0036249160766601562, -0.002002716064453125, -0.00038051605224609375, 0.0012416839599609375, 0.0028638839721679688, 0.004486083984375, 0.006108283996582031, 0.0077304840087890625, 0.009352684020996094, 0.010974884033203125, 0.012597084045410156, 0.014219284057617188, 0.01584148406982422, 0.01746368408203125, 0.01908588409423828, 0.020708084106445312, 0.022330284118652344, 0.023952484130859375, 0.025574684143066406, 0.027196884155273438, 0.02881908416748047, 0.0304412841796875, 0.03206348419189453, 0.03368568420410156, 0.035307884216308594, 0.036930084228515625, 0.038552284240722656, 0.04017448425292969, 0.04179668426513672, 0.04341888427734375, 0.04504108428955078, 0.04666328430175781, 0.048285484313964844, 0.049907684326171875, 0.051529884338378906, 0.05315208435058594, 0.05477428436279297, 0.056396484375]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 11.0, 12.0, 15.0, 30.0, 38.0, 36.0, 58.0, 71.0, 102.0, 106.0, 102.0, 105.0, 87.0, 67.0, 57.0, 27.0, 25.0, 19.0, 18.0, 6.0, 4.0, 1.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10919189453125, -0.105804443359375, -0.1024169921875, -0.099029541015625, -0.09564208984375, -0.092254638671875, -0.0888671875, -0.085479736328125, -0.08209228515625, -0.078704833984375, -0.0753173828125, -0.071929931640625, -0.06854248046875, -0.065155029296875, -0.061767578125, -0.058380126953125, -0.05499267578125, -0.051605224609375, -0.0482177734375, -0.044830322265625, -0.04144287109375, -0.038055419921875, -0.03466796875, -0.031280517578125, -0.02789306640625, -0.024505615234375, -0.0211181640625, -0.017730712890625, -0.01434326171875, -0.010955810546875, -0.007568359375, -0.004180908203125, -0.00079345703125, 0.002593994140625, 0.0059814453125, 0.009368896484375, 0.01275634765625, 0.016143798828125, 0.01953125, 0.022918701171875, 0.02630615234375, 0.029693603515625, 0.0330810546875, 0.036468505859375, 0.03985595703125, 0.043243408203125, 0.046630859375, 0.050018310546875, 0.05340576171875, 0.056793212890625, 0.0601806640625, 0.063568115234375, 0.06695556640625, 0.070343017578125, 0.07373046875, 0.077117919921875, 0.08050537109375, 0.083892822265625, 0.0872802734375, 0.090667724609375, 0.09405517578125, 0.097442626953125, 0.100830078125, 0.104217529296875, 0.10760498046875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 7.0, 7.0, 14.0, 8.0, 23.0, 43.0, 105.0, 223.0, 541.0, 1722.0, 8089.0, 87002.0, 830803.0, 107497.0, 9603.0, 1845.0, 593.0, 208.0, 105.0, 53.0, 25.0, 17.0, 9.0, 7.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01190185546875, -0.011546969413757324, -0.011192083358764648, -0.010837197303771973, -0.010482311248779297, -0.010127425193786621, -0.009772539138793945, -0.00941765308380127, -0.009062767028808594, -0.008707880973815918, -0.008352994918823242, -0.007998108863830566, -0.007643222808837891, -0.007288336753845215, -0.006933450698852539, -0.006578564643859863, -0.0062236785888671875, -0.005868792533874512, -0.005513906478881836, -0.00515902042388916, -0.004804134368896484, -0.004449248313903809, -0.004094362258911133, -0.003739476203918457, -0.0033845901489257812, -0.0030297040939331055, -0.0026748180389404297, -0.002319931983947754, -0.001965045928955078, -0.0016101598739624023, -0.0012552738189697266, -0.0009003877639770508, -0.000545501708984375, -0.00019061565399169922, 0.00016427040100097656, 0.0005191564559936523, 0.0008740425109863281, 0.001228928565979004, 0.0015838146209716797, 0.0019387006759643555, 0.0022935867309570312, 0.002648472785949707, 0.003003358840942383, 0.0033582448959350586, 0.0037131309509277344, 0.00406801700592041, 0.004422903060913086, 0.004777789115905762, 0.0051326751708984375, 0.005487561225891113, 0.005842447280883789, 0.006197333335876465, 0.006552219390869141, 0.006907105445861816, 0.007261991500854492, 0.007616877555847168, 0.007971763610839844, 0.00832664966583252, 0.008681535720825195, 0.009036421775817871, 0.009391307830810547, 0.009746193885803223, 0.010101079940795898, 0.010455965995788574, 0.01081085205078125]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0, 0.0, 1.0, 3.0, 0.0, 1.0, 4.0, 4.0, 6.0, 11.0, 17.0, 16.0, 10.0, 20.0, 32.0, 36.0, 35.0, 41.0, 44.0, 42.0, 25.0, 53.0, 67.0, 54.0, 63.0, 44.0, 66.0, 57.0, 31.0, 28.0, 32.0, 38.0, 27.0, 28.0, 19.0, 12.0, 1.0, 15.0, 6.0, 8.0, 3.0, 3.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.814697265625e-06, -3.7029385566711426e-06, -3.591179847717285e-06, -3.4794211387634277e-06, -3.3676624298095703e-06, -3.255903720855713e-06, -3.1441450119018555e-06, -3.032386302947998e-06, -2.9206275939941406e-06, -2.808868885040283e-06, -2.6971101760864258e-06, -2.5853514671325684e-06, -2.473592758178711e-06, -2.3618340492248535e-06, -2.250075340270996e-06, -2.1383166313171387e-06, -2.0265579223632812e-06, -1.914799213409424e-06, -1.8030405044555664e-06, -1.691281795501709e-06, -1.5795230865478516e-06, -1.4677643775939941e-06, -1.3560056686401367e-06, -1.2442469596862793e-06, -1.1324882507324219e-06, -1.0207295417785645e-06, -9.08970832824707e-07, -7.972121238708496e-07, -6.854534149169922e-07, -5.736947059631348e-07, -4.6193599700927734e-07, -3.501772880554199e-07, -2.384185791015625e-07, -1.2665987014770508e-07, -1.4901161193847656e-08, 9.685754776000977e-08, 2.086162567138672e-07, 3.203749656677246e-07, 4.3213367462158203e-07, 5.438923835754395e-07, 6.556510925292969e-07, 7.674098014831543e-07, 8.791685104370117e-07, 9.909272193908691e-07, 1.1026859283447266e-06, 1.214444637298584e-06, 1.3262033462524414e-06, 1.4379620552062988e-06, 1.5497207641601562e-06, 1.6614794731140137e-06, 1.773238182067871e-06, 1.8849968910217285e-06, 1.996755599975586e-06, 2.1085143089294434e-06, 2.2202730178833008e-06, 2.332031726837158e-06, 2.4437904357910156e-06, 2.555549144744873e-06, 2.6673078536987305e-06, 2.779066562652588e-06, 2.8908252716064453e-06, 3.0025839805603027e-06, 3.11434268951416e-06, 3.2261013984680176e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 8.0, 1.0, 3.0, 7.0, 15.0, 22.0, 35.0, 48.0, 115.0, 212.0, 813.0, 4602.0, 117581.0, 896462.0, 25706.0, 2089.0, 482.0, 154.0, 89.0, 51.0, 27.0, 19.0, 10.0, 8.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0098876953125, -0.009375572204589844, -0.008863449096679688, -0.008351325988769531, -0.007839202880859375, -0.007327079772949219, -0.0068149566650390625, -0.006302833557128906, -0.00579071044921875, -0.005278587341308594, -0.0047664642333984375, -0.004254341125488281, -0.003742218017578125, -0.0032300949096679688, -0.0027179718017578125, -0.0022058486938476562, -0.0016937255859375, -0.0011816024780273438, -0.0006694793701171875, -0.00015735626220703125, 0.000354766845703125, 0.0008668899536132812, 0.0013790130615234375, 0.0018911361694335938, 0.00240325927734375, 0.0029153823852539062, 0.0034275054931640625, 0.003939628601074219, 0.004451751708984375, 0.004963874816894531, 0.0054759979248046875, 0.005988121032714844, 0.006500244140625, 0.007012367248535156, 0.0075244903564453125, 0.008036613464355469, 0.008548736572265625, 0.009060859680175781, 0.009572982788085938, 0.010085105895996094, 0.01059722900390625, 0.011109352111816406, 0.011621475219726562, 0.012133598327636719, 0.012645721435546875, 0.013157844543457031, 0.013669967651367188, 0.014182090759277344, 0.0146942138671875, 0.015206336975097656, 0.015718460083007812, 0.01623058319091797, 0.016742706298828125, 0.01725482940673828, 0.017766952514648438, 0.018279075622558594, 0.01879119873046875, 0.019303321838378906, 0.019815444946289062, 0.02032756805419922, 0.020839691162109375, 0.02135181427001953, 0.021863937377929688, 0.022376060485839844, 0.02288818359375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 3.0, 5.0, 12.0, 11.0, 37.0, 58.0, 139.0, 234.0, 231.0, 132.0, 77.0, 32.0, 15.0, 18.0, 6.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.009918212890625, -0.009093284606933594, -0.008268356323242188, -0.007443428039550781, -0.006618499755859375, -0.005793571472167969, -0.0049686431884765625, -0.004143714904785156, -0.00331878662109375, -0.0024938583374023438, -0.0016689300537109375, -0.0008440017700195312, -1.9073486328125e-05, 0.0008058547973632812, 0.0016307830810546875, 0.0024557113647460938, 0.0032806396484375, 0.004105567932128906, 0.0049304962158203125, 0.005755424499511719, 0.006580352783203125, 0.007405281066894531, 0.008230209350585938, 0.009055137634277344, 0.00988006591796875, 0.010704994201660156, 0.011529922485351562, 0.012354850769042969, 0.013179779052734375, 0.014004707336425781, 0.014829635620117188, 0.015654563903808594, 0.0164794921875, 0.017304420471191406, 0.018129348754882812, 0.01895427703857422, 0.019779205322265625, 0.02060413360595703, 0.021429061889648438, 0.022253990173339844, 0.02307891845703125, 0.023903846740722656, 0.024728775024414062, 0.02555370330810547, 0.026378631591796875, 0.02720355987548828, 0.028028488159179688, 0.028853416442871094, 0.0296783447265625, 0.030503273010253906, 0.03132820129394531, 0.03215312957763672, 0.032978057861328125, 0.03380298614501953, 0.03462791442871094, 0.035452842712402344, 0.03627777099609375, 0.037102699279785156, 0.03792762756347656, 0.03875255584716797, 0.039577484130859375, 0.04040241241455078, 0.04122734069824219, 0.042052268981933594, 0.042877197265625]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 10.0, 9.0, 11.0, 17.0, 34.0, 39.0, 59.0, 91.0, 147.0, 177.0, 133.0, 85.0, 58.0, 38.0, 34.0, 15.0, 8.0, 5.0, 5.0, 3.0, 5.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.1967753767967224, -0.192043736577034, -0.18731211125850677, -0.18258047103881836, -0.17784883081912994, -0.17311719059944153, -0.1683855652809143, -0.1636539250612259, -0.15892228484153748, -0.15419064462184906, -0.14945901930332184, -0.14472737908363342, -0.139995738863945, -0.1352640986442566, -0.13053247332572937, -0.12580083310604095, -0.12106920033693314, -0.11633756756782532, -0.1116059273481369, -0.10687429457902908, -0.10214265435934067, -0.09741102159023285, -0.09267938137054443, -0.08794774860143661, -0.0832161158323288, -0.07848448306322098, -0.07375284284353256, -0.06902121007442474, -0.06428956985473633, -0.05955793708562851, -0.05482630059123039, -0.050094664096832275, -0.04536301642656326, -0.040631379932165146, -0.03589974343776703, -0.03116810880601406, -0.026436472311615944, -0.021704835817217827, -0.01697320118546486, -0.012241564691066742, -0.007509928196668625, -0.002778292167931795, 0.0019533438608050346, 0.006684979423880577, 0.011416615918278694, 0.01614825241267681, 0.02087988704442978, 0.025611523538827896, 0.030343160033226013, 0.03507479652762413, 0.03980643302202225, 0.044538065791130066, 0.04926970601081848, 0.0540013387799263, 0.05873297527432442, 0.06346461176872253, 0.06819625198841095, 0.07292788475751877, 0.07765952497720718, 0.082391157746315, 0.08712279796600342, 0.09185443073511124, 0.09658606350421906, 0.10131770372390747, 0.10604933649301529]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 1.0, 4.0, 4.0, 1.0, 5.0, 5.0, 6.0, 8.0, 12.0, 14.0, 23.0, 21.0, 21.0, 17.0, 17.0, 23.0, 31.0, 31.0, 31.0, 50.0, 52.0, 74.0, 89.0, 73.0, 55.0, 41.0, 35.0, 33.0, 28.0, 41.0, 30.0, 18.0, 22.0, 18.0, 13.0, 12.0, 17.0, 10.0, 7.0, 8.0, 3.0, 2.0, 2.0, 3.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1749780774116516, -0.1681177169084549, -0.16125734150409698, -0.15439698100090027, -0.14753660559654236, -0.14067624509334564, -0.13381588459014893, -0.12695550918579102, -0.1200951412320137, -0.11323477327823639, -0.10637440532445908, -0.09951403737068176, -0.09265367686748505, -0.08579330146312714, -0.07893294095993042, -0.0720725730061531, -0.0652122050523758, -0.05835183709859848, -0.05149146914482117, -0.04463110491633415, -0.03777073696255684, -0.030910369008779526, -0.02405000478029251, -0.017189636826515198, -0.010329268872737885, -0.003468901850283146, 0.0033914651721715927, 0.010251831263303757, 0.01711219921708107, 0.023972567170858383, 0.030832931399345398, 0.03769329935312271, 0.044553667306900024, 0.05141403526067734, 0.05827440321445465, 0.06513476371765137, 0.07199513912200928, 0.078855499625206, 0.0857158675789833, 0.09257623553276062, 0.09943660348653793, 0.10629697144031525, 0.11315733939409256, 0.12001770734786987, 0.1268780678510666, 0.1337384432554245, 0.14059880375862122, 0.14745917916297913, 0.15431953966617584, 0.16117990016937256, 0.16804027557373047, 0.17490063607692719, 0.1817610114812851, 0.1886213719844818, 0.19548174738883972, 0.20234210789203644, 0.20920246839523315, 0.21606282889842987, 0.22292320430278778, 0.2297835648059845, 0.2366439402103424, 0.24350430071353912, 0.25036466121673584, 0.25722503662109375, 0.26408541202545166]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 10.0, 2.0, 9.0, 18.0, 27.0, 79.0, 178.0, 365.0, 609.0, 1162.0, 2134.0, 4350.0, 10855.0, 50306.0, 549491.0, 2956865.0, 547813.0, 52887.0, 10771.0, 3947.0, 1505.0, 522.0, 153.0, 73.0, 49.0, 20.0, 23.0, 12.0, 9.0, 7.0, 4.0, 9.0, 4.0, 3.0, 5.0, 5.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01461029052734375, -0.01409757137298584, -0.01358485221862793, -0.01307213306427002, -0.01255941390991211, -0.0120466947555542, -0.011533975601196289, -0.011021256446838379, -0.010508537292480469, -0.009995818138122559, -0.009483098983764648, -0.008970379829406738, -0.008457660675048828, -0.007944941520690918, -0.007432222366333008, -0.006919503211975098, -0.0064067840576171875, -0.005894064903259277, -0.005381345748901367, -0.004868626594543457, -0.004355907440185547, -0.0038431882858276367, -0.0033304691314697266, -0.0028177499771118164, -0.0023050308227539062, -0.001792311668395996, -0.001279592514038086, -0.0007668733596801758, -0.0002541542053222656, 0.00025856494903564453, 0.0007712841033935547, 0.0012840032577514648, 0.001796722412109375, 0.002309441566467285, 0.0028221607208251953, 0.0033348798751831055, 0.0038475990295410156, 0.004360318183898926, 0.004873037338256836, 0.005385756492614746, 0.005898475646972656, 0.006411194801330566, 0.0069239139556884766, 0.007436633110046387, 0.007949352264404297, 0.008462071418762207, 0.008974790573120117, 0.009487509727478027, 0.010000228881835938, 0.010512948036193848, 0.011025667190551758, 0.011538386344909668, 0.012051105499267578, 0.012563824653625488, 0.013076543807983398, 0.013589262962341309, 0.014101982116699219, 0.014614701271057129, 0.015127420425415039, 0.01564013957977295, 0.01615285873413086, 0.01666557788848877, 0.01717829704284668, 0.01769101619720459, 0.0182037353515625]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 5.0, 2.0, 5.0, 14.0, 8.0, 20.0, 34.0, 58.0, 77.0, 110.0, 138.0, 146.0, 112.0, 109.0, 75.0, 38.0, 23.0, 13.0, 7.0, 5.0, 6.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028076171875, -0.026963233947753906, -0.025850296020507812, -0.02473735809326172, -0.023624420166015625, -0.02251148223876953, -0.021398544311523438, -0.020285606384277344, -0.01917266845703125, -0.018059730529785156, -0.016946792602539062, -0.01583385467529297, -0.014720916748046875, -0.013607978820800781, -0.012495040893554688, -0.011382102966308594, -0.0102691650390625, -0.009156227111816406, -0.008043289184570312, -0.006930351257324219, -0.005817413330078125, -0.004704475402832031, -0.0035915374755859375, -0.0024785995483398438, -0.00136566162109375, -0.00025272369384765625, 0.0008602142333984375, 0.0019731521606445312, 0.003086090087890625, 0.004199028015136719, 0.0053119659423828125, 0.006424903869628906, 0.007537841796875, 0.008650779724121094, 0.009763717651367188, 0.010876655578613281, 0.011989593505859375, 0.013102531433105469, 0.014215469360351562, 0.015328407287597656, 0.01644134521484375, 0.017554283142089844, 0.018667221069335938, 0.01978015899658203, 0.020893096923828125, 0.02200603485107422, 0.023118972778320312, 0.024231910705566406, 0.0253448486328125, 0.026457786560058594, 0.027570724487304688, 0.02868366241455078, 0.029796600341796875, 0.03090953826904297, 0.03202247619628906, 0.033135414123535156, 0.03424835205078125, 0.035361289978027344, 0.03647422790527344, 0.03758716583251953, 0.038700103759765625, 0.03981304168701172, 0.04092597961425781, 0.042038917541503906, 0.04315185546875]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 4.0, 4.0, 5.0, 22.0, 11.0, 24.0, 29.0, 58.0, 176.0, 421.0, 1050.0, 3844.0, 29230.0, 3350902.0, 788226.0, 16182.0, 2595.0, 832.0, 347.0, 142.0, 86.0, 28.0, 15.0, 15.0, 9.0, 10.0, 8.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.032806396484375, -0.03162956237792969, -0.030452728271484375, -0.029275894165039062, -0.02809906005859375, -0.026922225952148438, -0.025745391845703125, -0.024568557739257812, -0.0233917236328125, -0.022214889526367188, -0.021038055419921875, -0.019861221313476562, -0.01868438720703125, -0.017507553100585938, -0.016330718994140625, -0.015153884887695312, -0.01397705078125, -0.012800216674804688, -0.011623382568359375, -0.010446548461914062, -0.00926971435546875, -0.008092880249023438, -0.006916046142578125, -0.0057392120361328125, -0.0045623779296875, -0.0033855438232421875, -0.002208709716796875, -0.0010318756103515625, 0.00014495849609375, 0.0013217926025390625, 0.002498626708984375, 0.0036754608154296875, 0.004852294921875, 0.0060291290283203125, 0.007205963134765625, 0.008382797241210938, 0.00955963134765625, 0.010736465454101562, 0.011913299560546875, 0.013090133666992188, 0.0142669677734375, 0.015443801879882812, 0.016620635986328125, 0.017797470092773438, 0.01897430419921875, 0.020151138305664062, 0.021327972412109375, 0.022504806518554688, 0.023681640625, 0.024858474731445312, 0.026035308837890625, 0.027212142944335938, 0.02838897705078125, 0.029565811157226562, 0.030742645263671875, 0.03191947937011719, 0.0330963134765625, 0.03427314758300781, 0.035449981689453125, 0.03662681579589844, 0.03780364990234375, 0.03898048400878906, 0.040157318115234375, 0.04133415222167969, 0.042510986328125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 6.0, 11.0, 4.0, 13.0, 15.0, 23.0, 13.0, 34.0, 82.0, 185.0, 452.0, 929.0, 1025.0, 686.0, 292.0, 125.0, 72.0, 25.0, 29.0, 12.0, 19.0, 5.0, 6.0, 3.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0269775390625, -0.025987625122070312, -0.024997711181640625, -0.024007797241210938, -0.02301788330078125, -0.022027969360351562, -0.021038055419921875, -0.020048141479492188, -0.0190582275390625, -0.018068313598632812, -0.017078399658203125, -0.016088485717773438, -0.01509857177734375, -0.014108657836914062, -0.013118743896484375, -0.012128829956054688, -0.011138916015625, -0.010149002075195312, -0.009159088134765625, -0.008169174194335938, -0.00717926025390625, -0.0061893463134765625, -0.005199432373046875, -0.0042095184326171875, -0.0032196044921875, -0.0022296905517578125, -0.001239776611328125, -0.0002498626708984375, 0.00074005126953125, 0.0017299652099609375, 0.002719879150390625, 0.0037097930908203125, 0.00469970703125, 0.0056896209716796875, 0.006679534912109375, 0.0076694488525390625, 0.00865936279296875, 0.009649276733398438, 0.010639190673828125, 0.011629104614257812, 0.0126190185546875, 0.013608932495117188, 0.014598846435546875, 0.015588760375976562, 0.01657867431640625, 0.017568588256835938, 0.018558502197265625, 0.019548416137695312, 0.020538330078125, 0.021528244018554688, 0.022518157958984375, 0.023508071899414062, 0.02449798583984375, 0.025487899780273438, 0.026477813720703125, 0.027467727661132812, 0.0284576416015625, 0.029447555541992188, 0.030437469482421875, 0.03142738342285156, 0.03241729736328125, 0.03340721130371094, 0.034397125244140625, 0.03538703918457031, 0.036376953125]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 9.0, 11.0, 31.0, 153.0, 404.0, 272.0, 100.0, 13.0, 7.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3739861845970154, -0.36254844069480896, -0.3511107265949249, -0.3396729826927185, -0.3282352387905121, -0.31679749488830566, -0.30535978078842163, -0.2939220368862152, -0.2824842929840088, -0.27104654908180237, -0.25960883498191833, -0.24817109107971191, -0.2367333471775055, -0.22529561817646027, -0.21385788917541504, -0.20242014527320862, -0.19098243117332458, -0.17954470217227936, -0.16810695827007294, -0.1566692292690277, -0.1452314853668213, -0.13379375636577606, -0.12235602736473083, -0.11091829091310501, -0.09948055446147919, -0.08804281800985336, -0.07660508155822754, -0.06516735255718231, -0.05372961610555649, -0.042291879653930664, -0.030854150652885437, -0.019416414201259613, -0.007978677749633789, 0.0034590568393468857, 0.01489679142832756, 0.026334524154663086, 0.03777226060628891, 0.049209997057914734, 0.06064772605895996, 0.07208546251058578, 0.08352319896221161, 0.09496093541383743, 0.10639867186546326, 0.11783640086650848, 0.1292741298675537, 0.14071187376976013, 0.15214960277080536, 0.16358733177185059, 0.175025075674057, 0.18646280467510223, 0.19790054857730865, 0.20933827757835388, 0.2207760214805603, 0.23221375048160553, 0.24365147948265076, 0.2550892233848572, 0.2665269374847412, 0.27796468138694763, 0.28940239548683167, 0.3008401393890381, 0.3122778832912445, 0.3237156271934509, 0.33515334129333496, 0.3465910851955414, 0.3580288290977478]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 3.0, 1.0, 7.0, 6.0, 9.0, 15.0, 28.0, 23.0, 26.0, 39.0, 53.0, 51.0, 57.0, 73.0, 109.0, 74.0, 81.0, 59.0, 58.0, 38.0, 50.0, 46.0, 37.0, 23.0, 13.0, 6.0, 8.0, 6.0, 2.0, 3.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16007930040359497, -0.15475836396217346, -0.14943744242191315, -0.14411652088165283, -0.13879558444023132, -0.13347464799880981, -0.1281537264585495, -0.12283279746770859, -0.11751186847686768, -0.11219093948602676, -0.10687001049518585, -0.10154908150434494, -0.09622815251350403, -0.09090722352266312, -0.0855862945318222, -0.08026536554098129, -0.07494443655014038, -0.06962350755929947, -0.06430257856845856, -0.058981649577617645, -0.05366072058677673, -0.04833979159593582, -0.04301886260509491, -0.037697933614254, -0.032377004623413086, -0.027056075632572174, -0.021735146641731262, -0.01641421765089035, -0.011093288660049438, -0.005772359669208527, -0.00045143067836761475, 0.004869498312473297, 0.010190427303314209, 0.01551135629415512, 0.020832285284996033, 0.026153214275836945, 0.031474143266677856, 0.03679507225751877, 0.04211600124835968, 0.04743693023920059, 0.052757859230041504, 0.058078788220882416, 0.06339971721172333, 0.06872064620256424, 0.07404157519340515, 0.07936250418424606, 0.08468343317508698, 0.09000436216592789, 0.0953252911567688, 0.10064622014760971, 0.10596714913845062, 0.11128807812929153, 0.11660900712013245, 0.12192993611097336, 0.12725086510181427, 0.13257178664207458, 0.1378927230834961, 0.1432136595249176, 0.14853458106517792, 0.15385550260543823, 0.15917643904685974, 0.16449737548828125, 0.16981829702854156, 0.17513921856880188, 0.1804601550102234]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 5.0, 4.0, 5.0, 5.0, 5.0, 16.0, 19.0, 17.0, 28.0, 37.0, 52.0, 89.0, 141.0, 279.0, 407.0, 812.0, 1798.0, 5070.0, 21895.0, 174201.0, 700953.0, 118582.0, 16715.0, 4153.0, 1598.0, 686.0, 347.0, 212.0, 131.0, 99.0, 65.0, 39.0, 37.0, 18.0, 15.0, 10.0, 7.0, 3.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0390625, -0.0378575325012207, -0.036652565002441406, -0.03544759750366211, -0.03424263000488281, -0.033037662506103516, -0.03183269500732422, -0.030627727508544922, -0.029422760009765625, -0.028217792510986328, -0.02701282501220703, -0.025807857513427734, -0.024602890014648438, -0.02339792251586914, -0.022192955017089844, -0.020987987518310547, -0.01978302001953125, -0.018578052520751953, -0.017373085021972656, -0.01616811752319336, -0.014963150024414062, -0.013758182525634766, -0.012553215026855469, -0.011348247528076172, -0.010143280029296875, -0.008938312530517578, -0.007733345031738281, -0.006528377532958984, -0.0053234100341796875, -0.004118442535400391, -0.0029134750366210938, -0.0017085075378417969, -0.0005035400390625, 0.0007014274597167969, 0.0019063949584960938, 0.0031113624572753906, 0.0043163299560546875, 0.005521297454833984, 0.006726264953613281, 0.007931232452392578, 0.009136199951171875, 0.010341167449951172, 0.011546134948730469, 0.012751102447509766, 0.013956069946289062, 0.01516103744506836, 0.016366004943847656, 0.017570972442626953, 0.01877593994140625, 0.019980907440185547, 0.021185874938964844, 0.02239084243774414, 0.023595809936523438, 0.024800777435302734, 0.02600574493408203, 0.027210712432861328, 0.028415679931640625, 0.029620647430419922, 0.03082561492919922, 0.032030582427978516, 0.03323554992675781, 0.03444051742553711, 0.035645484924316406, 0.0368504524230957, 0.038055419921875]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 4.0, 10.0, 8.0, 21.0, 29.0, 60.0, 77.0, 112.0, 135.0, 143.0, 122.0, 104.0, 81.0, 43.0, 25.0, 5.0, 7.0, 11.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0283203125, -0.02720165252685547, -0.026082992553710938, -0.024964332580566406, -0.023845672607421875, -0.022727012634277344, -0.021608352661132812, -0.02048969268798828, -0.01937103271484375, -0.01825237274169922, -0.017133712768554688, -0.016015052795410156, -0.014896392822265625, -0.013777732849121094, -0.012659072875976562, -0.011540412902832031, -0.0104217529296875, -0.009303092956542969, -0.008184432983398438, -0.007065773010253906, -0.005947113037109375, -0.004828453063964844, -0.0037097930908203125, -0.0025911331176757812, -0.00147247314453125, -0.00035381317138671875, 0.0007648468017578125, 0.0018835067749023438, 0.003002166748046875, 0.004120826721191406, 0.0052394866943359375, 0.006358146667480469, 0.007476806640625, 0.008595466613769531, 0.009714126586914062, 0.010832786560058594, 0.011951446533203125, 0.013070106506347656, 0.014188766479492188, 0.015307426452636719, 0.01642608642578125, 0.01754474639892578, 0.018663406372070312, 0.019782066345214844, 0.020900726318359375, 0.022019386291503906, 0.023138046264648438, 0.02425670623779297, 0.0253753662109375, 0.02649402618408203, 0.027612686157226562, 0.028731346130371094, 0.029850006103515625, 0.030968666076660156, 0.03208732604980469, 0.03320598602294922, 0.03432464599609375, 0.03544330596923828, 0.03656196594238281, 0.037680625915527344, 0.038799285888671875, 0.039917945861816406, 0.04103660583496094, 0.04215526580810547, 0.04327392578125]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 1.0, 5.0, 7.0, 6.0, 21.0, 32.0, 54.0, 69.0, 179.0, 332.0, 766.0, 1938.0, 8078.0, 76254.0, 859094.0, 89429.0, 8727.0, 2003.0, 747.0, 354.0, 195.0, 111.0, 56.0, 37.0, 14.0, 10.0, 14.0, 5.0, 2.0, 5.0, 2.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0518798828125, -0.05018043518066406, -0.048480987548828125, -0.04678153991699219, -0.04508209228515625, -0.04338264465332031, -0.041683197021484375, -0.03998374938964844, -0.0382843017578125, -0.03658485412597656, -0.034885406494140625, -0.03318595886230469, -0.03148651123046875, -0.029787063598632812, -0.028087615966796875, -0.026388168334960938, -0.024688720703125, -0.022989273071289062, -0.021289825439453125, -0.019590377807617188, -0.01789093017578125, -0.016191482543945312, -0.014492034912109375, -0.012792587280273438, -0.0110931396484375, -0.009393692016601562, -0.007694244384765625, -0.0059947967529296875, -0.00429534912109375, -0.0025959014892578125, -0.000896453857421875, 0.0008029937744140625, 0.00250244140625, 0.0042018890380859375, 0.005901336669921875, 0.0076007843017578125, 0.00930023193359375, 0.010999679565429688, 0.012699127197265625, 0.014398574829101562, 0.0160980224609375, 0.017797470092773438, 0.019496917724609375, 0.021196365356445312, 0.02289581298828125, 0.024595260620117188, 0.026294708251953125, 0.027994155883789062, 0.029693603515625, 0.03139305114746094, 0.033092498779296875, 0.03479194641113281, 0.03649139404296875, 0.03819084167480469, 0.039890289306640625, 0.04158973693847656, 0.0432891845703125, 0.04498863220214844, 0.046688079833984375, 0.04838752746582031, 0.05008697509765625, 0.05178642272949219, 0.053485870361328125, 0.05518531799316406, 0.056884765625]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 3.0, 8.0, 14.0, 15.0, 31.0, 48.0, 72.0, 95.0, 122.0, 122.0, 138.0, 84.0, 56.0, 79.0, 37.0, 32.0, 14.0, 13.0, 12.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.12744140625, -0.12367057800292969, -0.11989974975585938, -0.11612892150878906, -0.11235809326171875, -0.10858726501464844, -0.10481643676757812, -0.10104560852050781, -0.0972747802734375, -0.09350395202636719, -0.08973312377929688, -0.08596229553222656, -0.08219146728515625, -0.07842063903808594, -0.07464981079101562, -0.07087898254394531, -0.067108154296875, -0.06333732604980469, -0.059566497802734375, -0.05579566955566406, -0.05202484130859375, -0.04825401306152344, -0.044483184814453125, -0.04071235656738281, -0.0369415283203125, -0.03317070007324219, -0.029399871826171875, -0.025629043579101562, -0.02185821533203125, -0.018087387084960938, -0.014316558837890625, -0.010545730590820312, -0.00677490234375, -0.0030040740966796875, 0.000766754150390625, 0.0045375823974609375, 0.00830841064453125, 0.012079238891601562, 0.015850067138671875, 0.019620895385742188, 0.0233917236328125, 0.027162551879882812, 0.030933380126953125, 0.03470420837402344, 0.03847503662109375, 0.04224586486816406, 0.046016693115234375, 0.04978752136230469, 0.053558349609375, 0.05732917785644531, 0.061100006103515625, 0.06487083435058594, 0.06864166259765625, 0.07241249084472656, 0.07618331909179688, 0.07995414733886719, 0.0837249755859375, 0.08749580383300781, 0.09126663208007812, 0.09503746032714844, 0.09880828857421875, 0.10257911682128906, 0.10634994506835938, 0.11012077331542969, 0.1138916015625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 7.0, 3.0, 6.0, 6.0, 11.0, 13.0, 17.0, 15.0, 43.0, 60.0, 80.0, 141.0, 225.0, 371.0, 729.0, 1492.0, 3427.0, 8917.0, 26746.0, 96417.0, 627055.0, 207798.0, 49608.0, 15376.0, 5349.0, 2267.0, 1083.0, 550.0, 277.0, 167.0, 95.0, 68.0, 36.0, 29.0, 27.0, 12.0, 11.0, 7.0, 8.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.005321502685546875, -0.0051538944244384766, -0.004986286163330078, -0.00481867790222168, -0.004651069641113281, -0.004483461380004883, -0.004315853118896484, -0.004148244857788086, -0.0039806365966796875, -0.003813028335571289, -0.0036454200744628906, -0.003477811813354492, -0.0033102035522460938, -0.0031425952911376953, -0.002974987030029297, -0.0028073787689208984, -0.0026397705078125, -0.0024721622467041016, -0.002304553985595703, -0.0021369457244873047, -0.0019693374633789062, -0.0018017292022705078, -0.0016341209411621094, -0.001466512680053711, -0.0012989044189453125, -0.001131296157836914, -0.0009636878967285156, -0.0007960796356201172, -0.0006284713745117188, -0.0004608631134033203, -0.0002932548522949219, -0.00012564659118652344, 4.1961669921875e-05, 0.00020956993103027344, 0.0003771781921386719, 0.0005447864532470703, 0.0007123947143554688, 0.0008800029754638672, 0.0010476112365722656, 0.001215219497680664, 0.0013828277587890625, 0.001550436019897461, 0.0017180442810058594, 0.0018856525421142578, 0.0020532608032226562, 0.0022208690643310547, 0.002388477325439453, 0.0025560855865478516, 0.00272369384765625, 0.0028913021087646484, 0.003058910369873047, 0.0032265186309814453, 0.0033941268920898438, 0.003561735153198242, 0.0037293434143066406, 0.003896951675415039, 0.0040645599365234375, 0.004232168197631836, 0.004399776458740234, 0.004567384719848633, 0.004734992980957031, 0.00490260124206543, 0.005070209503173828, 0.0052378177642822266, 0.005405426025390625]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 4.0, 7.0, 7.0, 7.0, 3.0, 5.0, 17.0, 14.0, 19.0, 29.0, 32.0, 38.0, 31.0, 46.0, 47.0, 56.0, 51.0, 67.0, 63.0, 79.0, 54.0, 50.0, 44.0, 43.0, 42.0, 35.0, 34.0, 16.0, 20.0, 11.0, 10.0, 3.0, 11.0, 4.0, 7.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.589557647705078e-06, -4.4656917452812195e-06, -4.341825842857361e-06, -4.217959940433502e-06, -4.0940940380096436e-06, -3.970228135585785e-06, -3.846362233161926e-06, -3.7224963307380676e-06, -3.598630428314209e-06, -3.4747645258903503e-06, -3.3508986234664917e-06, -3.227032721042633e-06, -3.1031668186187744e-06, -2.9793009161949158e-06, -2.855435013771057e-06, -2.7315691113471985e-06, -2.60770320892334e-06, -2.483837306499481e-06, -2.3599714040756226e-06, -2.236105501651764e-06, -2.1122395992279053e-06, -1.9883736968040466e-06, -1.864507794380188e-06, -1.7406418919563293e-06, -1.6167759895324707e-06, -1.492910087108612e-06, -1.3690441846847534e-06, -1.2451782822608948e-06, -1.1213123798370361e-06, -9.974464774131775e-07, -8.735805749893188e-07, -7.497146725654602e-07, -6.258487701416016e-07, -5.019828677177429e-07, -3.781169652938843e-07, -2.5425106287002563e-07, -1.30385160446167e-07, -6.51925802230835e-09, 1.1734664440155029e-07, 2.4121254682540894e-07, 3.650784492492676e-07, 4.889443516731262e-07, 6.128102540969849e-07, 7.366761565208435e-07, 8.605420589447021e-07, 9.844079613685608e-07, 1.1082738637924194e-06, 1.232139766216278e-06, 1.3560056686401367e-06, 1.4798715710639954e-06, 1.603737473487854e-06, 1.7276033759117126e-06, 1.8514692783355713e-06, 1.97533518075943e-06, 2.0992010831832886e-06, 2.2230669856071472e-06, 2.346932888031006e-06, 2.4707987904548645e-06, 2.594664692878723e-06, 2.7185305953025818e-06, 2.8423964977264404e-06, 2.966262400150299e-06, 3.0901283025741577e-06, 3.2139942049980164e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 4.0, 3.0, 4.0, 3.0, 2.0, 11.0, 7.0, 16.0, 16.0, 27.0, 24.0, 44.0, 47.0, 90.0, 140.0, 216.0, 404.0, 766.0, 1572.0, 3763.0, 9265.0, 28937.0, 111918.0, 631426.0, 191742.0, 45125.0, 13676.0, 4983.0, 2037.0, 969.0, 513.0, 284.0, 175.0, 120.0, 66.0, 48.0, 26.0, 24.0, 19.0, 9.0, 12.0, 7.0, 6.0, 10.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.005664825439453125, -0.005506038665771484, -0.005347251892089844, -0.005188465118408203, -0.0050296783447265625, -0.004870891571044922, -0.004712104797363281, -0.004553318023681641, -0.00439453125, -0.004235744476318359, -0.004076957702636719, -0.003918170928955078, -0.0037593841552734375, -0.003600597381591797, -0.0034418106079101562, -0.0032830238342285156, -0.003124237060546875, -0.0029654502868652344, -0.0028066635131835938, -0.002647876739501953, -0.0024890899658203125, -0.002330303192138672, -0.0021715164184570312, -0.0020127296447753906, -0.00185394287109375, -0.0016951560974121094, -0.0015363693237304688, -0.0013775825500488281, -0.0012187957763671875, -0.0010600090026855469, -0.0009012222290039062, -0.0007424354553222656, -0.000583648681640625, -0.0004248619079589844, -0.00026607513427734375, -0.00010728836059570312, 5.14984130859375e-05, 0.00021028518676757812, 0.00036907196044921875, 0.0005278587341308594, 0.0006866455078125, 0.0008454322814941406, 0.0010042190551757812, 0.0011630058288574219, 0.0013217926025390625, 0.0014805793762207031, 0.0016393661499023438, 0.0017981529235839844, 0.001956939697265625, 0.0021157264709472656, 0.0022745132446289062, 0.002433300018310547, 0.0025920867919921875, 0.002750873565673828, 0.0029096603393554688, 0.0030684471130371094, 0.00322723388671875, 0.0033860206604003906, 0.0035448074340820312, 0.003703594207763672, 0.0038623809814453125, 0.004021167755126953, 0.004179954528808594, 0.004338741302490234, 0.004497528076171875]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 4.0, 14.0, 3.0, 11.0, 20.0, 19.0, 23.0, 35.0, 63.0, 61.0, 86.0, 82.0, 108.0, 101.0, 100.0, 70.0, 53.0, 37.0, 29.0, 17.0, 11.0, 10.0, 12.0, 5.0, 7.0, 8.0, 1.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00955963134765625, -0.009258031845092773, -0.008956432342529297, -0.00865483283996582, -0.008353233337402344, -0.008051633834838867, -0.007750034332275391, -0.007448434829711914, -0.0071468353271484375, -0.006845235824584961, -0.006543636322021484, -0.006242036819458008, -0.005940437316894531, -0.005638837814331055, -0.005337238311767578, -0.0050356388092041016, -0.004734039306640625, -0.0044324398040771484, -0.004130840301513672, -0.0038292407989501953, -0.0035276412963867188, -0.003226041793823242, -0.0029244422912597656, -0.002622842788696289, -0.0023212432861328125, -0.002019643783569336, -0.0017180442810058594, -0.0014164447784423828, -0.0011148452758789062, -0.0008132457733154297, -0.0005116462707519531, -0.00021004676818847656, 9.1552734375e-05, 0.00039315223693847656, 0.0006947517395019531, 0.0009963512420654297, 0.0012979507446289062, 0.0015995502471923828, 0.0019011497497558594, 0.002202749252319336, 0.0025043487548828125, 0.002805948257446289, 0.0031075477600097656, 0.003409147262573242, 0.0037107467651367188, 0.004012346267700195, 0.004313945770263672, 0.0046155452728271484, 0.004917144775390625, 0.0052187442779541016, 0.005520343780517578, 0.005821943283081055, 0.006123542785644531, 0.006425142288208008, 0.006726741790771484, 0.007028341293334961, 0.0073299407958984375, 0.007631540298461914, 0.00793313980102539, 0.008234739303588867, 0.008536338806152344, 0.00883793830871582, 0.009139537811279297, 0.009441137313842773, 0.00974273681640625]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 0.0, 2.0, 1.0, 3.0, 4.0, 14.0, 24.0, 51.0, 97.0, 182.0, 301.0, 152.0, 65.0, 50.0, 28.0, 7.0, 5.0, 7.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2834792137145996, -0.2760842442512512, -0.26868924498558044, -0.26129427552223206, -0.2538992762565613, -0.2465043067932129, -0.2391093224287033, -0.23171433806419373, -0.22431936860084534, -0.21692438423633575, -0.20952939987182617, -0.20213443040847778, -0.1947394460439682, -0.18734446167945862, -0.17994947731494904, -0.17255449295043945, -0.16515952348709106, -0.15776453912258148, -0.1503695547580719, -0.1429745852947235, -0.13557960093021393, -0.12818461656570435, -0.12078963220119476, -0.11339465528726578, -0.1059996634721756, -0.09860467910766602, -0.09120970219373703, -0.08381471782922745, -0.07641974091529846, -0.06902475655078888, -0.061629775911569595, -0.05423479527235031, -0.046839818358421326, -0.03944483771920204, -0.03204985707998276, -0.024654874578118324, -0.01725989393889904, -0.009864911437034607, -0.002469930797815323, 0.004925049841403961, 0.012320030480623245, 0.01971501111984253, 0.027109991759061813, 0.0345049723982811, 0.04189995676279068, 0.049294937402009964, 0.05668991804122925, 0.06408490240573883, 0.07147987931966782, 0.0788748636841774, 0.08626984059810638, 0.09366482496261597, 0.10105980187654495, 0.10845478624105453, 0.11584976315498352, 0.1232447475194931, 0.13063973188400269, 0.13803471624851227, 0.14542970061302185, 0.15282467007637024, 0.16021965444087982, 0.1676146388053894, 0.175009623169899, 0.18240460753440857, 0.18979957699775696]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 2.0, 2.0, 4.0, 1.0, 5.0, 8.0, 9.0, 5.0, 16.0, 19.0, 16.0, 12.0, 24.0, 23.0, 23.0, 33.0, 25.0, 37.0, 56.0, 87.0, 119.0, 94.0, 61.0, 51.0, 32.0, 32.0, 34.0, 25.0, 23.0, 21.0, 22.0, 17.0, 11.0, 12.0, 9.0, 11.0, 1.0, 2.0, 4.0, 4.0, 3.0, 2.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.24130672216415405, -0.23438642919063568, -0.2274661362171173, -0.22054584324359894, -0.21362555027008057, -0.206705242395401, -0.19978494942188263, -0.19286465644836426, -0.1859443634748459, -0.17902407050132751, -0.17210377752780914, -0.16518348455429077, -0.1582631766796112, -0.15134289860725403, -0.14442259073257446, -0.1375022977590561, -0.13058200478553772, -0.12366171181201935, -0.11674141883850098, -0.10982111841440201, -0.10290082544088364, -0.09598053246736526, -0.0890602320432663, -0.08213993906974792, -0.07521964609622955, -0.06829935312271118, -0.06137905642390251, -0.05445875972509384, -0.04753846675157547, -0.0406181737780571, -0.03369787707924843, -0.02677758038043976, -0.019857287406921387, -0.012936992570757866, -0.006016697734594345, 0.0009035971015691757, 0.007823891937732697, 0.014744184911251068, 0.021664481610059738, 0.028584778308868408, 0.03550507128238678, 0.04242536425590515, 0.04934566095471382, 0.05626595765352249, 0.06318625062704086, 0.07010654360055923, 0.0770268440246582, 0.08394713699817657, 0.09086742997169495, 0.09778772294521332, 0.10470801591873169, 0.11162831634283066, 0.11854860931634903, 0.125468909740448, 0.13238920271396637, 0.13930949568748474, 0.1462297886610031, 0.15315008163452148, 0.16007037460803986, 0.16699066758155823, 0.1739109754562378, 0.18083125352859497, 0.18775156140327454, 0.1946718543767929, 0.20159214735031128]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 6.0, 12.0, 11.0, 50.0, 187.0, 404.0, 732.0, 1382.0, 3179.0, 8970.0, 43470.0, 488109.0, 3103235.0, 487899.0, 43374.0, 9606.0, 2693.0, 689.0, 125.0, 60.0, 40.0, 18.0, 10.0, 4.0, 5.0, 6.0, 6.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01617431640625, -0.015586137771606445, -0.01499795913696289, -0.014409780502319336, -0.013821601867675781, -0.013233423233032227, -0.012645244598388672, -0.012057065963745117, -0.011468887329101562, -0.010880708694458008, -0.010292530059814453, -0.009704351425170898, -0.009116172790527344, -0.008527994155883789, -0.007939815521240234, -0.00735163688659668, -0.006763458251953125, -0.00617527961730957, -0.005587100982666016, -0.004998922348022461, -0.004410743713378906, -0.0038225650787353516, -0.003234386444091797, -0.002646207809448242, -0.0020580291748046875, -0.0014698505401611328, -0.0008816719055175781, -0.00029349327087402344, 0.00029468536376953125, 0.0008828639984130859, 0.0014710426330566406, 0.0020592212677001953, 0.00264739990234375, 0.0032355785369873047, 0.0038237571716308594, 0.004411935806274414, 0.005000114440917969, 0.0055882930755615234, 0.006176471710205078, 0.006764650344848633, 0.0073528289794921875, 0.007941007614135742, 0.008529186248779297, 0.009117364883422852, 0.009705543518066406, 0.010293722152709961, 0.010881900787353516, 0.01147007942199707, 0.012058258056640625, 0.01264643669128418, 0.013234615325927734, 0.013822793960571289, 0.014410972595214844, 0.014999151229858398, 0.015587329864501953, 0.016175508499145508, 0.016763687133789062, 0.017351865768432617, 0.017940044403076172, 0.018528223037719727, 0.01911640167236328, 0.019704580307006836, 0.02029275894165039, 0.020880937576293945, 0.0214691162109375]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 8.0, 3.0, 10.0, 7.0, 20.0, 27.0, 46.0, 68.0, 114.0, 133.0, 125.0, 117.0, 111.0, 86.0, 48.0, 32.0, 14.0, 10.0, 9.0, 3.0, 2.0, 6.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0270843505859375, -0.025982141494750977, -0.024879932403564453, -0.02377772331237793, -0.022675514221191406, -0.021573305130004883, -0.02047109603881836, -0.019368886947631836, -0.018266677856445312, -0.01716446876525879, -0.016062259674072266, -0.014960050582885742, -0.013857841491699219, -0.012755632400512695, -0.011653423309326172, -0.010551214218139648, -0.009449005126953125, -0.008346796035766602, -0.007244586944580078, -0.006142377853393555, -0.005040168762207031, -0.003937959671020508, -0.0028357505798339844, -0.001733541488647461, -0.0006313323974609375, 0.00047087669372558594, 0.0015730857849121094, 0.002675294876098633, 0.0037775039672851562, 0.00487971305847168, 0.005981922149658203, 0.0070841312408447266, 0.00818634033203125, 0.009288549423217773, 0.010390758514404297, 0.01149296760559082, 0.012595176696777344, 0.013697385787963867, 0.01479959487915039, 0.015901803970336914, 0.017004013061523438, 0.01810622215270996, 0.019208431243896484, 0.020310640335083008, 0.02141284942626953, 0.022515058517456055, 0.023617267608642578, 0.0247194766998291, 0.025821685791015625, 0.02692389488220215, 0.028026103973388672, 0.029128313064575195, 0.03023052215576172, 0.03133273124694824, 0.032434940338134766, 0.03353714942932129, 0.03463935852050781, 0.035741567611694336, 0.03684377670288086, 0.03794598579406738, 0.039048194885253906, 0.04015040397644043, 0.04125261306762695, 0.04235482215881348, 0.04345703125]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 8.0, 9.0, 18.0, 47.0, 66.0, 111.0, 257.0, 677.0, 6074.0, 1291634.0, 2886270.0, 7759.0, 844.0, 247.0, 109.0, 67.0, 43.0, 19.0, 14.0, 9.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0689697265625, -0.06715679168701172, -0.06534385681152344, -0.06353092193603516, -0.061717987060546875, -0.059905052185058594, -0.05809211730957031, -0.05627918243408203, -0.05446624755859375, -0.05265331268310547, -0.05084037780761719, -0.049027442932128906, -0.047214508056640625, -0.045401573181152344, -0.04358863830566406, -0.04177570343017578, -0.0399627685546875, -0.03814983367919922, -0.03633689880371094, -0.034523963928222656, -0.032711029052734375, -0.030898094177246094, -0.029085159301757812, -0.02727222442626953, -0.02545928955078125, -0.02364635467529297, -0.021833419799804688, -0.020020484924316406, -0.018207550048828125, -0.016394615173339844, -0.014581680297851562, -0.012768745422363281, -0.010955810546875, -0.009142875671386719, -0.0073299407958984375, -0.005517005920410156, -0.003704071044921875, -0.0018911361694335938, -7.82012939453125e-05, 0.0017347335815429688, 0.00354766845703125, 0.005360603332519531, 0.0071735382080078125, 0.008986473083496094, 0.010799407958984375, 0.012612342834472656, 0.014425277709960938, 0.01623821258544922, 0.0180511474609375, 0.01986408233642578, 0.021677017211914062, 0.023489952087402344, 0.025302886962890625, 0.027115821838378906, 0.028928756713867188, 0.03074169158935547, 0.03255462646484375, 0.03436756134033203, 0.03618049621582031, 0.037993431091308594, 0.039806365966796875, 0.041619300842285156, 0.04343223571777344, 0.04524517059326172, 0.04705810546875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 8.0, 6.0, 10.0, 22.0, 51.0, 148.0, 489.0, 1474.0, 1301.0, 400.0, 97.0, 36.0, 14.0, 11.0, 4.0, 5.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0743408203125, -0.07260847091674805, -0.0708761215209961, -0.06914377212524414, -0.06741142272949219, -0.06567907333374023, -0.06394672393798828, -0.06221437454223633, -0.060482025146484375, -0.05874967575073242, -0.05701732635498047, -0.055284976959228516, -0.05355262756347656, -0.05182027816772461, -0.050087928771972656, -0.0483555793762207, -0.04662322998046875, -0.0448908805847168, -0.043158531188964844, -0.04142618179321289, -0.03969383239746094, -0.037961483001708984, -0.03622913360595703, -0.03449678421020508, -0.032764434814453125, -0.031032085418701172, -0.02929973602294922, -0.027567386627197266, -0.025835037231445312, -0.02410268783569336, -0.022370338439941406, -0.020637989044189453, -0.0189056396484375, -0.017173290252685547, -0.015440940856933594, -0.01370859146118164, -0.011976242065429688, -0.010243892669677734, -0.008511543273925781, -0.006779193878173828, -0.005046844482421875, -0.003314495086669922, -0.0015821456909179688, 0.00015020370483398438, 0.0018825531005859375, 0.0036149024963378906, 0.005347251892089844, 0.007079601287841797, 0.00881195068359375, 0.010544300079345703, 0.012276649475097656, 0.01400899887084961, 0.015741348266601562, 0.017473697662353516, 0.01920604705810547, 0.020938396453857422, 0.022670745849609375, 0.024403095245361328, 0.02613544464111328, 0.027867794036865234, 0.029600143432617188, 0.03133249282836914, 0.033064842224121094, 0.03479719161987305, 0.036529541015625]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 24.0, 58.0, 178.0, 371.0, 248.0, 74.0, 30.0, 10.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.30379849672317505, -0.2935653328895569, -0.28333213925361633, -0.27309897541999817, -0.26286581158638, -0.25263261795043945, -0.2423994541168213, -0.23216627538204193, -0.22193309664726257, -0.21169991791248322, -0.20146675407886505, -0.1912335753440857, -0.18100039660930634, -0.17076721787452698, -0.1605340540409088, -0.15030087530612946, -0.1400677114725113, -0.12983453273773193, -0.11960136145353317, -0.10936819016933441, -0.09913501143455505, -0.08890184015035629, -0.07866866886615753, -0.06843549013137817, -0.05820231884717941, -0.04796914383769035, -0.037735968828201294, -0.027502797544002533, -0.017269622534513474, -0.007036447525024414, 0.003196723759174347, 0.013429902493953705, 0.023663073778152466, 0.033896248787641525, 0.044129423797130585, 0.054362595081329346, 0.0645957738161087, 0.07482894510030746, 0.08506211638450623, 0.09529529511928558, 0.10552846640348434, 0.1157616376876831, 0.12599481642246246, 0.13622799515724182, 0.14646115899085999, 0.15669433772563934, 0.1669275164604187, 0.17716068029403687, 0.18739385902881622, 0.19762703776359558, 0.20786020159721375, 0.2180933803319931, 0.22832655906677246, 0.23855972290039062, 0.24879290163516998, 0.25902608036994934, 0.2692592442035675, 0.27949240803718567, 0.2897256016731262, 0.2999587655067444, 0.31019192934036255, 0.3204251229763031, 0.33065828680992126, 0.3408914804458618, 0.35112464427948]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 3.0, 5.0, 6.0, 7.0, 9.0, 12.0, 33.0, 21.0, 29.0, 46.0, 56.0, 62.0, 74.0, 82.0, 86.0, 107.0, 89.0, 54.0, 61.0, 42.0, 32.0, 27.0, 21.0, 20.0, 10.0, 9.0, 3.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1224563717842102, -0.11620055884122849, -0.10994475334882736, -0.10368894040584564, -0.09743313491344452, -0.0911773219704628, -0.08492150902748108, -0.07866570353507996, -0.07240989059209824, -0.06615407764911652, -0.05989827215671539, -0.05364245921373367, -0.04738664999604225, -0.04113084077835083, -0.03487502783536911, -0.02861921861767769, -0.022363409399986267, -0.016107600182294846, -0.009851789101958275, -0.003595978021621704, 0.0026598311960697174, 0.008915640413761139, 0.015171453356742859, 0.02142726257443428, 0.027683071792125702, 0.03393888100981712, 0.040194690227508545, 0.046450503170490265, 0.052706312388181686, 0.05896212160587311, 0.06521793454885483, 0.07147374749183655, 0.07772955298423767, 0.08398536592721939, 0.09024117141962051, 0.09649698436260223, 0.10275278985500336, 0.10900860279798508, 0.1152644157409668, 0.12152022123336792, 0.12777602672576904, 0.13403183221817017, 0.14028765261173248, 0.1465434581041336, 0.15279926359653473, 0.15905508399009705, 0.16531088948249817, 0.1715666949748993, 0.1778225153684616, 0.18407832086086273, 0.19033414125442505, 0.19658994674682617, 0.2028457522392273, 0.20910155773162842, 0.21535737812519073, 0.22161318361759186, 0.22786900401115417, 0.2341248095035553, 0.24038062989711761, 0.24663643538951874, 0.25289225578308105, 0.2591480612754822, 0.2654038667678833, 0.2716596722602844, 0.27791547775268555]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 4.0, 1.0, 3.0, 4.0, 9.0, 8.0, 9.0, 14.0, 22.0, 35.0, 39.0, 59.0, 81.0, 135.0, 201.0, 313.0, 549.0, 1184.0, 2615.0, 7474.0, 29099.0, 231846.0, 670925.0, 80593.0, 14854.0, 4593.0, 1849.0, 827.0, 438.0, 269.0, 137.0, 101.0, 76.0, 60.0, 33.0, 29.0, 22.0, 14.0, 10.0, 4.0, 9.0, 4.0, 2.0, 6.0, 3.0, 1.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.04852294921875, -0.047064781188964844, -0.04560661315917969, -0.04414844512939453, -0.042690277099609375, -0.04123210906982422, -0.03977394104003906, -0.038315773010253906, -0.03685760498046875, -0.035399436950683594, -0.03394126892089844, -0.03248310089111328, -0.031024932861328125, -0.02956676483154297, -0.028108596801757812, -0.026650428771972656, -0.0251922607421875, -0.023734092712402344, -0.022275924682617188, -0.02081775665283203, -0.019359588623046875, -0.01790142059326172, -0.016443252563476562, -0.014985084533691406, -0.01352691650390625, -0.012068748474121094, -0.010610580444335938, -0.009152412414550781, -0.007694244384765625, -0.006236076354980469, -0.0047779083251953125, -0.0033197402954101562, -0.001861572265625, -0.00040340423583984375, 0.0010547637939453125, 0.0025129318237304688, 0.003971099853515625, 0.005429267883300781, 0.0068874359130859375, 0.008345603942871094, 0.00980377197265625, 0.011261940002441406, 0.012720108032226562, 0.014178276062011719, 0.015636444091796875, 0.01709461212158203, 0.018552780151367188, 0.020010948181152344, 0.0214691162109375, 0.022927284240722656, 0.024385452270507812, 0.02584362030029297, 0.027301788330078125, 0.02875995635986328, 0.030218124389648438, 0.031676292419433594, 0.03313446044921875, 0.034592628479003906, 0.03605079650878906, 0.03750896453857422, 0.038967132568359375, 0.04042530059814453, 0.04188346862792969, 0.043341636657714844, 0.0447998046875]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 8.0, 4.0, 6.0, 8.0, 19.0, 20.0, 50.0, 76.0, 101.0, 126.0, 129.0, 128.0, 106.0, 84.0, 53.0, 32.0, 19.0, 12.0, 7.0, 8.0, 2.0, 5.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027435302734375, -0.026323795318603516, -0.02521228790283203, -0.024100780487060547, -0.022989273071289062, -0.021877765655517578, -0.020766258239746094, -0.01965475082397461, -0.018543243408203125, -0.01743173599243164, -0.016320228576660156, -0.015208721160888672, -0.014097213745117188, -0.012985706329345703, -0.011874198913574219, -0.010762691497802734, -0.00965118408203125, -0.008539676666259766, -0.007428169250488281, -0.006316661834716797, -0.0052051544189453125, -0.004093647003173828, -0.0029821395874023438, -0.0018706321716308594, -0.000759124755859375, 0.0003523826599121094, 0.0014638900756835938, 0.002575397491455078, 0.0036869049072265625, 0.004798412322998047, 0.005909919738769531, 0.007021427154541016, 0.0081329345703125, 0.009244441986083984, 0.010355949401855469, 0.011467456817626953, 0.012578964233398438, 0.013690471649169922, 0.014801979064941406, 0.01591348648071289, 0.017024993896484375, 0.01813650131225586, 0.019248008728027344, 0.020359516143798828, 0.021471023559570312, 0.022582530975341797, 0.02369403839111328, 0.024805545806884766, 0.02591705322265625, 0.027028560638427734, 0.02814006805419922, 0.029251575469970703, 0.030363082885742188, 0.03147459030151367, 0.032586097717285156, 0.03369760513305664, 0.034809112548828125, 0.03592061996459961, 0.037032127380371094, 0.03814363479614258, 0.03925514221191406, 0.04036664962768555, 0.04147815704345703, 0.042589664459228516, 0.043701171875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 9.0, 3.0, 3.0, 1.0, 5.0, 8.0, 12.0, 22.0, 26.0, 30.0, 44.0, 81.0, 126.0, 227.0, 443.0, 937.0, 2369.0, 9291.0, 120928.0, 870544.0, 35237.0, 5104.0, 1568.0, 701.0, 350.0, 156.0, 106.0, 78.0, 40.0, 31.0, 21.0, 13.0, 7.0, 7.0, 10.0, 11.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0743408203125, -0.07229280471801758, -0.07024478912353516, -0.06819677352905273, -0.06614875793457031, -0.06410074234008789, -0.06205272674560547, -0.06000471115112305, -0.057956695556640625, -0.0559086799621582, -0.05386066436767578, -0.05181264877319336, -0.04976463317871094, -0.047716617584228516, -0.045668601989746094, -0.04362058639526367, -0.04157257080078125, -0.03952455520629883, -0.037476539611816406, -0.035428524017333984, -0.03338050842285156, -0.03133249282836914, -0.02928447723388672, -0.027236461639404297, -0.025188446044921875, -0.023140430450439453, -0.02109241485595703, -0.01904439926147461, -0.016996383666992188, -0.014948368072509766, -0.012900352478027344, -0.010852336883544922, -0.0088043212890625, -0.006756305694580078, -0.004708290100097656, -0.0026602745056152344, -0.0006122589111328125, 0.0014357566833496094, 0.0034837722778320312, 0.005531787872314453, 0.007579803466796875, 0.009627819061279297, 0.011675834655761719, 0.01372385025024414, 0.015771865844726562, 0.017819881439208984, 0.019867897033691406, 0.021915912628173828, 0.02396392822265625, 0.026011943817138672, 0.028059959411621094, 0.030107975006103516, 0.03215599060058594, 0.03420400619506836, 0.03625202178955078, 0.0383000373840332, 0.040348052978515625, 0.04239606857299805, 0.04444408416748047, 0.04649209976196289, 0.04854011535644531, 0.050588130950927734, 0.052636146545410156, 0.05468416213989258, 0.056732177734375]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 4.0, 6.0, 6.0, 5.0, 14.0, 17.0, 28.0, 50.0, 61.0, 66.0, 81.0, 99.0, 110.0, 117.0, 95.0, 60.0, 48.0, 42.0, 30.0, 21.0, 10.0, 11.0, 5.0, 6.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.1346435546875, -0.13094425201416016, -0.1272449493408203, -0.12354564666748047, -0.11984634399414062, -0.11614704132080078, -0.11244773864746094, -0.1087484359741211, -0.10504913330078125, -0.1013498306274414, -0.09765052795410156, -0.09395122528076172, -0.09025192260742188, -0.08655261993408203, -0.08285331726074219, -0.07915401458740234, -0.0754547119140625, -0.07175540924072266, -0.06805610656738281, -0.06435680389404297, -0.060657501220703125, -0.05695819854736328, -0.05325889587402344, -0.049559593200683594, -0.04586029052734375, -0.042160987854003906, -0.03846168518066406, -0.03476238250732422, -0.031063079833984375, -0.02736377716064453, -0.023664474487304688, -0.019965171813964844, -0.016265869140625, -0.012566566467285156, -0.008867263793945312, -0.005167961120605469, -0.001468658447265625, 0.0022306442260742188, 0.0059299468994140625, 0.009629249572753906, 0.01332855224609375, 0.017027854919433594, 0.020727157592773438, 0.02442646026611328, 0.028125762939453125, 0.03182506561279297, 0.03552436828613281, 0.039223670959472656, 0.0429229736328125, 0.046622276306152344, 0.05032157897949219, 0.05402088165283203, 0.057720184326171875, 0.06141948699951172, 0.06511878967285156, 0.0688180923461914, 0.07251739501953125, 0.0762166976928711, 0.07991600036621094, 0.08361530303955078, 0.08731460571289062, 0.09101390838623047, 0.09471321105957031, 0.09841251373291016, 0.10211181640625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 6.0, 3.0, 4.0, 6.0, 3.0, 12.0, 21.0, 35.0, 68.0, 120.0, 289.0, 743.0, 2672.0, 19777.0, 733114.0, 275072.0, 13380.0, 2104.0, 617.0, 252.0, 105.0, 58.0, 40.0, 32.0, 10.0, 7.0, 5.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01494598388671875, -0.014518022537231445, -0.01409006118774414, -0.013662099838256836, -0.013234138488769531, -0.012806177139282227, -0.012378215789794922, -0.011950254440307617, -0.011522293090820312, -0.011094331741333008, -0.010666370391845703, -0.010238409042358398, -0.009810447692871094, -0.009382486343383789, -0.008954524993896484, -0.00852656364440918, -0.008098602294921875, -0.00767064094543457, -0.007242679595947266, -0.006814718246459961, -0.006386756896972656, -0.0059587955474853516, -0.005530834197998047, -0.005102872848510742, -0.0046749114990234375, -0.004246950149536133, -0.003818988800048828, -0.0033910274505615234, -0.0029630661010742188, -0.002535104751586914, -0.0021071434020996094, -0.0016791820526123047, -0.001251220703125, -0.0008232593536376953, -0.0003952980041503906, 3.266334533691406e-05, 0.00046062469482421875, 0.0008885860443115234, 0.0013165473937988281, 0.0017445087432861328, 0.0021724700927734375, 0.002600431442260742, 0.003028392791748047, 0.0034563541412353516, 0.0038843154907226562, 0.004312276840209961, 0.004740238189697266, 0.00516819953918457, 0.005596160888671875, 0.00602412223815918, 0.006452083587646484, 0.006880044937133789, 0.007308006286621094, 0.0077359676361083984, 0.008163928985595703, 0.008591890335083008, 0.009019851684570312, 0.009447813034057617, 0.009875774383544922, 0.010303735733032227, 0.010731697082519531, 0.011159658432006836, 0.01158761978149414, 0.012015581130981445, 0.01244354248046875]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 8.0, 3.0, 13.0, 12.0, 31.0, 45.0, 27.0, 54.0, 56.0, 34.0, 80.0, 57.0, 89.0, 83.0, 57.0, 66.0, 54.0, 56.0, 44.0, 18.0, 27.0, 16.0, 21.0, 14.0, 6.0, 6.0, 6.0, 9.0, 3.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.231929779052734e-06, -4.0763989090919495e-06, -3.9208680391311646e-06, -3.7653371691703796e-06, -3.6098062992095947e-06, -3.45427542924881e-06, -3.298744559288025e-06, -3.14321368932724e-06, -2.987682819366455e-06, -2.83215194940567e-06, -2.6766210794448853e-06, -2.5210902094841003e-06, -2.3655593395233154e-06, -2.2100284695625305e-06, -2.0544975996017456e-06, -1.8989667296409607e-06, -1.7434358596801758e-06, -1.5879049897193909e-06, -1.432374119758606e-06, -1.276843249797821e-06, -1.1213123798370361e-06, -9.657815098762512e-07, -8.102506399154663e-07, -6.547197699546814e-07, -4.991888999938965e-07, -3.4365803003311157e-07, -1.8812716007232666e-07, -3.259629011154175e-08, 1.2293457984924316e-07, 2.784654498100281e-07, 4.33996319770813e-07, 5.895271897315979e-07, 7.450580596923828e-07, 9.005889296531677e-07, 1.0561197996139526e-06, 1.2116506695747375e-06, 1.3671815395355225e-06, 1.5227124094963074e-06, 1.6782432794570923e-06, 1.8337741494178772e-06, 1.989305019378662e-06, 2.144835889339447e-06, 2.300366759300232e-06, 2.455897629261017e-06, 2.6114284992218018e-06, 2.7669593691825867e-06, 2.9224902391433716e-06, 3.0780211091041565e-06, 3.2335519790649414e-06, 3.3890828490257263e-06, 3.5446137189865112e-06, 3.700144588947296e-06, 3.855675458908081e-06, 4.011206328868866e-06, 4.166737198829651e-06, 4.322268068790436e-06, 4.477798938751221e-06, 4.633329808712006e-06, 4.7888606786727905e-06, 4.9443915486335754e-06, 5.09992241859436e-06, 5.255453288555145e-06, 5.41098415851593e-06, 5.566515028476715e-06, 5.7220458984375e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 6.0, 10.0, 18.0, 30.0, 52.0, 74.0, 113.0, 240.0, 457.0, 1024.0, 3724.0, 25607.0, 739033.0, 257909.0, 15711.0, 2696.0, 895.0, 420.0, 212.0, 122.0, 66.0, 56.0, 27.0, 22.0, 5.0, 8.0, 4.0, 5.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00927734375, -0.00894474983215332, -0.00861215591430664, -0.008279561996459961, -0.007946968078613281, -0.0076143741607666016, -0.007281780242919922, -0.006949186325073242, -0.0066165924072265625, -0.006283998489379883, -0.005951404571533203, -0.0056188106536865234, -0.005286216735839844, -0.004953622817993164, -0.004621028900146484, -0.004288434982299805, -0.003955841064453125, -0.0036232471466064453, -0.0032906532287597656, -0.002958059310913086, -0.0026254653930664062, -0.0022928714752197266, -0.001960277557373047, -0.0016276836395263672, -0.0012950897216796875, -0.0009624958038330078, -0.0006299018859863281, -0.00029730796813964844, 3.528594970703125e-05, 0.00036787986755371094, 0.0007004737854003906, 0.0010330677032470703, 0.00136566162109375, 0.0016982555389404297, 0.0020308494567871094, 0.002363443374633789, 0.0026960372924804688, 0.0030286312103271484, 0.003361225128173828, 0.003693819046020508, 0.0040264129638671875, 0.004359006881713867, 0.004691600799560547, 0.0050241947174072266, 0.005356788635253906, 0.005689382553100586, 0.006021976470947266, 0.006354570388793945, 0.006687164306640625, 0.007019758224487305, 0.007352352142333984, 0.007684946060180664, 0.008017539978027344, 0.008350133895874023, 0.008682727813720703, 0.009015321731567383, 0.009347915649414062, 0.009680509567260742, 0.010013103485107422, 0.010345697402954102, 0.010678291320800781, 0.011010885238647461, 0.01134347915649414, 0.01167607307434082, 0.0120086669921875]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 3.0, 2.0, 6.0, 20.0, 18.0, 17.0, 32.0, 55.0, 86.0, 114.0, 206.0, 149.0, 107.0, 54.0, 41.0, 28.0, 27.0, 16.0, 10.0, 4.0, 4.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01447296142578125, -0.01390683650970459, -0.01334071159362793, -0.01277458667755127, -0.01220846176147461, -0.01164233684539795, -0.011076211929321289, -0.010510087013244629, -0.009943962097167969, -0.009377837181091309, -0.008811712265014648, -0.008245587348937988, -0.007679462432861328, -0.007113337516784668, -0.006547212600708008, -0.005981087684631348, -0.0054149627685546875, -0.004848837852478027, -0.004282712936401367, -0.003716588020324707, -0.003150463104248047, -0.0025843381881713867, -0.0020182132720947266, -0.0014520883560180664, -0.0008859634399414062, -0.0003198385238647461, 0.00024628639221191406, 0.0008124113082885742, 0.0013785362243652344, 0.0019446611404418945, 0.0025107860565185547, 0.003076910972595215, 0.003643035888671875, 0.004209160804748535, 0.004775285720825195, 0.0053414106369018555, 0.005907535552978516, 0.006473660469055176, 0.007039785385131836, 0.007605910301208496, 0.008172035217285156, 0.008738160133361816, 0.009304285049438477, 0.009870409965515137, 0.010436534881591797, 0.011002659797668457, 0.011568784713745117, 0.012134909629821777, 0.012701034545898438, 0.013267159461975098, 0.013833284378051758, 0.014399409294128418, 0.014965534210205078, 0.015531659126281738, 0.0160977840423584, 0.01666390895843506, 0.01723003387451172, 0.01779615879058838, 0.01836228370666504, 0.0189284086227417, 0.01949453353881836, 0.02006065845489502, 0.02062678337097168, 0.02119290828704834, 0.021759033203125]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 9.0, 6.0, 5.0, 13.0, 15.0, 32.0, 57.0, 95.0, 152.0, 307.0, 128.0, 81.0, 43.0, 19.0, 10.0, 11.0, 9.0, 8.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.25978440046310425, -0.2530006468296051, -0.24621687829494476, -0.23943312466144562, -0.23264937102794647, -0.22586560249328613, -0.219081848859787, -0.21229809522628784, -0.2055143415927887, -0.19873058795928955, -0.1919468194246292, -0.18516306579113007, -0.17837931215763092, -0.17159554362297058, -0.16481178998947144, -0.1580280363559723, -0.15124426782131195, -0.1444605141878128, -0.13767674565315247, -0.13089299201965332, -0.12410923838615417, -0.11732547730207443, -0.11054171621799469, -0.10375796258449554, -0.0969742015004158, -0.09019044041633606, -0.08340668678283691, -0.07662292569875717, -0.06983916461467743, -0.06305541098117828, -0.05627164989709854, -0.0494878925383091, -0.04270412027835846, -0.035920362919569016, -0.029136603698134422, -0.02235284447669983, -0.015569087117910385, -0.008785329759120941, -0.0020015686750411987, 0.004782188683748245, 0.01156594604253769, 0.018349703401327133, 0.025133462622761726, 0.03191722184419632, 0.038700979202985764, 0.04548473656177521, 0.05226849764585495, 0.059052255004644394, 0.06583601236343384, 0.07261977344751358, 0.07940352708101273, 0.08618728816509247, 0.09297104179859161, 0.09975480288267136, 0.1065385639667511, 0.11332231760025024, 0.12010607868432999, 0.12688983976840973, 0.13367359340190887, 0.1404573619365692, 0.14724111557006836, 0.1540248692035675, 0.16080862283706665, 0.167592391371727, 0.17437614500522614]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 3.0, 4.0, 7.0, 7.0, 10.0, 13.0, 17.0, 13.0, 22.0, 15.0, 28.0, 22.0, 31.0, 35.0, 27.0, 40.0, 49.0, 90.0, 129.0, 96.0, 56.0, 42.0, 34.0, 33.0, 26.0, 28.0, 19.0, 17.0, 11.0, 19.0, 12.0, 12.0, 11.0, 6.0, 4.0, 2.0, 6.0, 5.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1775343418121338, -0.17057602107524872, -0.16361770033836365, -0.15665937960147858, -0.1497010588645935, -0.14274273812770844, -0.13578441739082336, -0.1288261115550995, -0.12186778336763382, -0.11490946263074875, -0.10795114189386368, -0.1009928286075592, -0.09403450787067413, -0.08707618713378906, -0.08011786639690399, -0.07315954566001892, -0.06620122492313385, -0.05924290418624878, -0.05228458344936371, -0.045326266437768936, -0.038367945700883865, -0.031409624963998795, -0.024451307952404022, -0.01749298721551895, -0.01053466647863388, -0.0035763466730713844, 0.0033819731324911118, 0.010340292006731033, 0.017298612743616104, 0.024256933480501175, 0.031215250492095947, 0.03817357122898102, 0.04513189196586609, 0.05209021270275116, 0.05904853343963623, 0.0660068541765213, 0.07296517491340637, 0.07992349565029144, 0.08688180893659592, 0.09384012967348099, 0.10079845041036606, 0.10775677114725113, 0.1147150918841362, 0.12167340517044067, 0.12863172590732574, 0.13559004664421082, 0.1425483673810959, 0.14950668811798096, 0.15646500885486603, 0.1634233295917511, 0.17038165032863617, 0.17733997106552124, 0.1842982918024063, 0.19125661253929138, 0.19821491837501526, 0.20517325401306152, 0.2121315598487854, 0.21908988058567047, 0.22604820132255554, 0.2330065220594406, 0.23996484279632568, 0.24692316353321075, 0.2538814842700958, 0.2608397901058197, 0.26779812574386597]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 3.0, 0.0, 8.0, 8.0, 7.0, 11.0, 20.0, 33.0, 55.0, 90.0, 157.0, 351.0, 1017.0, 2590.0, 7866.0, 36904.0, 991967.0, 3014780.0, 120679.0, 13130.0, 2728.0, 866.0, 410.0, 210.0, 120.0, 72.0, 59.0, 44.0, 16.0, 23.0, 9.0, 14.0, 9.0, 4.0, 7.0, 3.0, 3.0, 0.0, 8.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.028411865234375, -0.027285099029541016, -0.02615833282470703, -0.025031566619873047, -0.023904800415039062, -0.022778034210205078, -0.021651268005371094, -0.02052450180053711, -0.019397735595703125, -0.01827096939086914, -0.017144203186035156, -0.016017436981201172, -0.014890670776367188, -0.013763904571533203, -0.012637138366699219, -0.011510372161865234, -0.01038360595703125, -0.009256839752197266, -0.008130073547363281, -0.007003307342529297, -0.0058765411376953125, -0.004749774932861328, -0.0036230087280273438, -0.0024962425231933594, -0.001369476318359375, -0.00024271011352539062, 0.0008840560913085938, 0.002010822296142578, 0.0031375885009765625, 0.004264354705810547, 0.005391120910644531, 0.006517887115478516, 0.0076446533203125, 0.008771419525146484, 0.009898185729980469, 0.011024951934814453, 0.012151718139648438, 0.013278484344482422, 0.014405250549316406, 0.01553201675415039, 0.016658782958984375, 0.01778554916381836, 0.018912315368652344, 0.020039081573486328, 0.021165847778320312, 0.022292613983154297, 0.02341938018798828, 0.024546146392822266, 0.02567291259765625, 0.026799678802490234, 0.02792644500732422, 0.029053211212158203, 0.030179977416992188, 0.03130674362182617, 0.032433509826660156, 0.03356027603149414, 0.034687042236328125, 0.03581380844116211, 0.036940574645996094, 0.03806734085083008, 0.03919410705566406, 0.04032087326049805, 0.04144763946533203, 0.042574405670166016, 0.043701171875]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 6.0, 1.0, 9.0, 11.0, 18.0, 21.0, 57.0, 67.0, 107.0, 107.0, 121.0, 119.0, 120.0, 96.0, 52.0, 42.0, 18.0, 15.0, 7.0, 6.0, 3.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0276336669921875, -0.026519060134887695, -0.02540445327758789, -0.024289846420288086, -0.02317523956298828, -0.022060632705688477, -0.020946025848388672, -0.019831418991088867, -0.018716812133789062, -0.017602205276489258, -0.016487598419189453, -0.015372991561889648, -0.014258384704589844, -0.013143777847290039, -0.012029170989990234, -0.01091456413269043, -0.009799957275390625, -0.00868535041809082, -0.007570743560791016, -0.006456136703491211, -0.005341529846191406, -0.0042269229888916016, -0.003112316131591797, -0.001997709274291992, -0.0008831024169921875, 0.0002315044403076172, 0.0013461112976074219, 0.0024607181549072266, 0.0035753250122070312, 0.004689931869506836, 0.005804538726806641, 0.006919145584106445, 0.00803375244140625, 0.009148359298706055, 0.01026296615600586, 0.011377573013305664, 0.012492179870605469, 0.013606786727905273, 0.014721393585205078, 0.015836000442504883, 0.016950607299804688, 0.018065214157104492, 0.019179821014404297, 0.0202944278717041, 0.021409034729003906, 0.02252364158630371, 0.023638248443603516, 0.02475285530090332, 0.025867462158203125, 0.02698206901550293, 0.028096675872802734, 0.02921128273010254, 0.030325889587402344, 0.03144049644470215, 0.03255510330200195, 0.03366971015930176, 0.03478431701660156, 0.03589892387390137, 0.03701353073120117, 0.03812813758850098, 0.03924274444580078, 0.040357351303100586, 0.04147195816040039, 0.042586565017700195, 0.043701171875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 2.0, 4.0, 1.0, 2.0, 6.0, 4.0, 10.0, 6.0, 13.0, 6.0, 13.0, 20.0, 26.0, 43.0, 64.0, 116.0, 276.0, 760.0, 3198.0, 242381.0, 3938523.0, 6921.0, 1052.0, 399.0, 174.0, 106.0, 62.0, 32.0, 25.0, 8.0, 9.0, 10.0, 6.0, 3.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.130615234375, -0.1261310577392578, -0.12164688110351562, -0.11716270446777344, -0.11267852783203125, -0.10819435119628906, -0.10371017456054688, -0.09922599792480469, -0.0947418212890625, -0.09025764465332031, -0.08577346801757812, -0.08128929138183594, -0.07680511474609375, -0.07232093811035156, -0.06783676147460938, -0.06335258483886719, -0.058868408203125, -0.05438423156738281, -0.049900054931640625, -0.04541587829589844, -0.04093170166015625, -0.03644752502441406, -0.031963348388671875, -0.027479171752929688, -0.0229949951171875, -0.018510818481445312, -0.014026641845703125, -0.009542465209960938, -0.00505828857421875, -0.0005741119384765625, 0.003910064697265625, 0.008394241333007812, 0.01287841796875, 0.017362594604492188, 0.021846771240234375, 0.026330947875976562, 0.03081512451171875, 0.03529930114746094, 0.039783477783203125, 0.04426765441894531, 0.0487518310546875, 0.05323600769042969, 0.057720184326171875, 0.06220436096191406, 0.06668853759765625, 0.07117271423339844, 0.07565689086914062, 0.08014106750488281, 0.084625244140625, 0.08910942077636719, 0.09359359741210938, 0.09807777404785156, 0.10256195068359375, 0.10704612731933594, 0.11153030395507812, 0.11601448059082031, 0.1204986572265625, 0.12498283386230469, 0.12946701049804688, 0.13395118713378906, 0.13843536376953125, 0.14291954040527344, 0.14740371704101562, 0.1518878936767578, 0.1563720703125]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 1.0, 8.0, 9.0, 21.0, 39.0, 52.0, 64.0, 120.0, 207.0, 491.0, 1050.0, 970.0, 451.0, 236.0, 118.0, 60.0, 42.0, 37.0, 22.0, 17.0, 10.0, 8.0, 8.0, 5.0, 3.0, 8.0, 7.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.08349609375, -0.08110237121582031, -0.07870864868164062, -0.07631492614746094, -0.07392120361328125, -0.07152748107910156, -0.06913375854492188, -0.06674003601074219, -0.0643463134765625, -0.06195259094238281, -0.059558868408203125, -0.05716514587402344, -0.05477142333984375, -0.05237770080566406, -0.049983978271484375, -0.04759025573730469, -0.045196533203125, -0.04280281066894531, -0.040409088134765625, -0.03801536560058594, -0.03562164306640625, -0.03322792053222656, -0.030834197998046875, -0.028440475463867188, -0.0260467529296875, -0.023653030395507812, -0.021259307861328125, -0.018865585327148438, -0.01647186279296875, -0.014078140258789062, -0.011684417724609375, -0.009290695190429688, -0.00689697265625, -0.0045032501220703125, -0.002109527587890625, 0.0002841949462890625, 0.00267791748046875, 0.0050716400146484375, 0.007465362548828125, 0.009859085083007812, 0.0122528076171875, 0.014646530151367188, 0.017040252685546875, 0.019433975219726562, 0.02182769775390625, 0.024221420288085938, 0.026615142822265625, 0.029008865356445312, 0.031402587890625, 0.03379631042480469, 0.036190032958984375, 0.03858375549316406, 0.04097747802734375, 0.04337120056152344, 0.045764923095703125, 0.04815864562988281, 0.0505523681640625, 0.05294609069824219, 0.055339813232421875, 0.05773353576660156, 0.06012725830078125, 0.06252098083496094, 0.06491470336914062, 0.06730842590332031, 0.0697021484375]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 7.0, 30.0, 65.0, 165.0, 309.0, 232.0, 89.0, 50.0, 20.0, 4.0, 5.0, 8.0, 5.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.516916036605835, -0.4940682053565979, -0.47122034430503845, -0.4483725130558014, -0.42552468180656433, -0.4026768207550049, -0.3798289895057678, -0.35698115825653076, -0.3341333270072937, -0.31128549575805664, -0.2884376347064972, -0.26558980345726013, -0.24274197220802307, -0.21989412605762482, -0.19704627990722656, -0.1741984486579895, -0.15135058760643005, -0.1285027414560318, -0.10565491020679474, -0.08280706405639648, -0.05995922535657883, -0.03711138665676117, -0.014263540506362915, 0.008584290742874146, 0.0314321368932724, 0.05427997559309006, 0.07712781429290771, 0.09997566044330597, 0.12282349914312363, 0.14567133784294128, 0.16851918399333954, 0.1913670152425766, 0.21421486139297485, 0.2370627075433731, 0.25991055369377136, 0.2827583849430084, 0.3056062161922455, 0.32845407724380493, 0.351301908493042, 0.37414973974227905, 0.3969975709915161, 0.4198454022407532, 0.4426932632923126, 0.4655410945415497, 0.48838892579078674, 0.5112367868423462, 0.5340846180915833, 0.5569324493408203, 0.5797803401947021, 0.6026281714439392, 0.6254760026931763, 0.6483238935470581, 0.6711717247962952, 0.6940195560455322, 0.7168673872947693, 0.7397152185440063, 0.7625630497932434, 0.7854108810424805, 0.8082587122917175, 0.8311065435409546, 0.8539544343948364, 0.8768022656440735, 0.8996500968933105, 0.9224979281425476, 0.9453457593917847]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 7.0, 12.0, 10.0, 23.0, 26.0, 33.0, 42.0, 65.0, 64.0, 77.0, 91.0, 110.0, 98.0, 67.0, 60.0, 50.0, 51.0, 34.0, 27.0, 13.0, 14.0, 13.0, 2.0, 4.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7037021517753601, -0.6845213174819946, -0.6653404235839844, -0.6461595892906189, -0.6269787549972534, -0.6077978610992432, -0.5886170268058777, -0.5694361925125122, -0.550255298614502, -0.5310744643211365, -0.5118935704231262, -0.49271273612976074, -0.47353190183639526, -0.4543510377407074, -0.43517017364501953, -0.41598933935165405, -0.3968085050582886, -0.3776276409626007, -0.35844680666923523, -0.33926594257354736, -0.3200851082801819, -0.300904244184494, -0.28172338008880615, -0.2625425457954407, -0.2433616816997528, -0.22418083250522614, -0.20499998331069946, -0.1858191192150116, -0.16663827002048492, -0.14745742082595825, -0.12827655673027039, -0.10909570753574371, -0.08991479873657227, -0.0707339495420456, -0.051553092896938324, -0.03237223997712135, -0.013191387057304382, 0.00598946213722229, 0.02517031878232956, 0.04435117542743683, 0.0635320246219635, 0.08271287381649017, 0.10189373046159744, 0.12107458710670471, 0.14025543630123138, 0.15943628549575806, 0.17861714959144592, 0.1977979987859726, 0.21697884798049927, 0.23615969717502594, 0.2553405463695526, 0.2745214104652405, 0.29370224475860596, 0.3128831088542938, 0.3320639729499817, 0.35124480724334717, 0.37042567133903503, 0.3896065354347229, 0.4087873697280884, 0.42796823382377625, 0.4471490979194641, 0.4663299322128296, 0.48551079630851746, 0.5046916604042053, 0.5238724946975708]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 6.0, 4.0, 4.0, 13.0, 6.0, 15.0, 13.0, 33.0, 44.0, 56.0, 87.0, 149.0, 246.0, 521.0, 1264.0, 3469.0, 19046.0, 484523.0, 513173.0, 19784.0, 3631.0, 1199.0, 547.0, 286.0, 144.0, 88.0, 61.0, 41.0, 28.0, 27.0, 15.0, 10.0, 6.0, 8.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.078125, -0.07575798034667969, -0.07339096069335938, -0.07102394104003906, -0.06865692138671875, -0.06628990173339844, -0.06392288208007812, -0.06155586242675781, -0.0591888427734375, -0.05682182312011719, -0.054454803466796875, -0.05208778381347656, -0.04972076416015625, -0.04735374450683594, -0.044986724853515625, -0.04261970520019531, -0.040252685546875, -0.03788566589355469, -0.035518646240234375, -0.03315162658691406, -0.03078460693359375, -0.028417587280273438, -0.026050567626953125, -0.023683547973632812, -0.0213165283203125, -0.018949508666992188, -0.016582489013671875, -0.014215469360351562, -0.01184844970703125, -0.009481430053710938, -0.007114410400390625, -0.0047473907470703125, -0.00238037109375, -1.33514404296875e-05, 0.002353668212890625, 0.0047206878662109375, 0.00708770751953125, 0.009454727172851562, 0.011821746826171875, 0.014188766479492188, 0.0165557861328125, 0.018922805786132812, 0.021289825439453125, 0.023656845092773438, 0.02602386474609375, 0.028390884399414062, 0.030757904052734375, 0.03312492370605469, 0.035491943359375, 0.03785896301269531, 0.040225982666015625, 0.04259300231933594, 0.04496002197265625, 0.04732704162597656, 0.049694061279296875, 0.05206108093261719, 0.0544281005859375, 0.05679512023925781, 0.059162139892578125, 0.06152915954589844, 0.06389617919921875, 0.06626319885253906, 0.06863021850585938, 0.07099723815917969, 0.0733642578125]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 11.0, 7.0, 16.0, 35.0, 48.0, 91.0, 109.0, 162.0, 156.0, 110.0, 107.0, 69.0, 34.0, 19.0, 6.0, 8.0, 5.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0697021484375, -0.06782197952270508, -0.06594181060791016, -0.06406164169311523, -0.06218147277832031, -0.06030130386352539, -0.05842113494873047, -0.05654096603393555, -0.054660797119140625, -0.0527806282043457, -0.05090045928955078, -0.04902029037475586, -0.04714012145996094, -0.045259952545166016, -0.043379783630371094, -0.04149961471557617, -0.03961944580078125, -0.03773927688598633, -0.035859107971191406, -0.033978939056396484, -0.03209877014160156, -0.03021860122680664, -0.02833843231201172, -0.026458263397216797, -0.024578094482421875, -0.022697925567626953, -0.02081775665283203, -0.01893758773803711, -0.017057418823242188, -0.015177249908447266, -0.013297080993652344, -0.011416912078857422, -0.0095367431640625, -0.007656574249267578, -0.005776405334472656, -0.0038962364196777344, -0.0020160675048828125, -0.00013589859008789062, 0.0017442703247070312, 0.003624439239501953, 0.005504608154296875, 0.007384777069091797, 0.009264945983886719, 0.01114511489868164, 0.013025283813476562, 0.014905452728271484, 0.016785621643066406, 0.018665790557861328, 0.02054595947265625, 0.022426128387451172, 0.024306297302246094, 0.026186466217041016, 0.028066635131835938, 0.02994680404663086, 0.03182697296142578, 0.0337071418762207, 0.035587310791015625, 0.03746747970581055, 0.03934764862060547, 0.04122781753540039, 0.04310798645019531, 0.044988155364990234, 0.046868324279785156, 0.04874849319458008, 0.050628662109375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 5.0, 2.0, 5.0, 8.0, 9.0, 4.0, 16.0, 25.0, 27.0, 33.0, 45.0, 70.0, 114.0, 141.0, 203.0, 341.0, 566.0, 1141.0, 3091.0, 13370.0, 189277.0, 812831.0, 20129.0, 3882.0, 1379.0, 638.0, 380.0, 232.0, 136.0, 106.0, 73.0, 65.0, 48.0, 39.0, 24.0, 20.0, 16.0, 11.0, 12.0, 7.0, 9.0, 5.0, 3.0, 2.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.046142578125, -0.04450082778930664, -0.04285907745361328, -0.04121732711791992, -0.03957557678222656, -0.0379338264465332, -0.036292076110839844, -0.034650325775146484, -0.033008575439453125, -0.031366825103759766, -0.029725074768066406, -0.028083324432373047, -0.026441574096679688, -0.024799823760986328, -0.02315807342529297, -0.02151632308959961, -0.01987457275390625, -0.01823282241821289, -0.01659107208251953, -0.014949321746826172, -0.013307571411132812, -0.011665821075439453, -0.010024070739746094, -0.008382320404052734, -0.006740570068359375, -0.005098819732666016, -0.0034570693969726562, -0.0018153190612792969, -0.0001735687255859375, 0.0014681816101074219, 0.0031099319458007812, 0.004751682281494141, 0.0063934326171875, 0.00803518295288086, 0.009676933288574219, 0.011318683624267578, 0.012960433959960938, 0.014602184295654297, 0.016243934631347656, 0.017885684967041016, 0.019527435302734375, 0.021169185638427734, 0.022810935974121094, 0.024452686309814453, 0.026094436645507812, 0.027736186981201172, 0.02937793731689453, 0.03101968765258789, 0.03266143798828125, 0.03430318832397461, 0.03594493865966797, 0.03758668899536133, 0.03922843933105469, 0.04087018966674805, 0.042511940002441406, 0.044153690338134766, 0.045795440673828125, 0.047437191009521484, 0.049078941345214844, 0.0507206916809082, 0.05236244201660156, 0.05400419235229492, 0.05564594268798828, 0.05728769302368164, 0.058929443359375]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 2.0, 2.0, 11.0, 10.0, 14.0, 14.0, 19.0, 32.0, 28.0, 59.0, 71.0, 102.0, 111.0, 150.0, 108.0, 69.0, 50.0, 36.0, 32.0, 18.0, 20.0, 10.0, 9.0, 5.0, 10.0, 6.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1129150390625, -0.10825347900390625, -0.1035919189453125, -0.09893035888671875, -0.094268798828125, -0.08960723876953125, -0.0849456787109375, -0.08028411865234375, -0.07562255859375, -0.07096099853515625, -0.0662994384765625, -0.06163787841796875, -0.056976318359375, -0.05231475830078125, -0.0476531982421875, -0.04299163818359375, -0.038330078125, -0.03366851806640625, -0.0290069580078125, -0.02434539794921875, -0.019683837890625, -0.01502227783203125, -0.0103607177734375, -0.00569915771484375, -0.00103759765625, 0.00362396240234375, 0.0082855224609375, 0.01294708251953125, 0.017608642578125, 0.02227020263671875, 0.0269317626953125, 0.03159332275390625, 0.0362548828125, 0.04091644287109375, 0.0455780029296875, 0.05023956298828125, 0.054901123046875, 0.05956268310546875, 0.0642242431640625, 0.06888580322265625, 0.07354736328125, 0.07820892333984375, 0.0828704833984375, 0.08753204345703125, 0.092193603515625, 0.09685516357421875, 0.1015167236328125, 0.10617828369140625, 0.11083984375, 0.11550140380859375, 0.1201629638671875, 0.12482452392578125, 0.129486083984375, 0.13414764404296875, 0.1388092041015625, 0.14347076416015625, 0.14813232421875, 0.15279388427734375, 0.1574554443359375, 0.16211700439453125, 0.166778564453125, 0.17144012451171875, 0.1761016845703125, 0.18076324462890625, 0.1854248046875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 4.0, 3.0, 6.0, 5.0, 6.0, 3.0, 9.0, 6.0, 14.0, 12.0, 18.0, 20.0, 25.0, 33.0, 47.0, 80.0, 121.0, 233.0, 439.0, 986.0, 2467.0, 8820.0, 105615.0, 904610.0, 18376.0, 3864.0, 1352.0, 621.0, 290.0, 140.0, 78.0, 60.0, 45.0, 30.0, 22.0, 11.0, 17.0, 14.0, 7.0, 10.0, 9.0, 5.0, 4.0, 5.0, 8.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.01169586181640625, -0.011320948600769043, -0.010946035385131836, -0.010571122169494629, -0.010196208953857422, -0.009821295738220215, -0.009446382522583008, -0.0090714693069458, -0.008696556091308594, -0.008321642875671387, -0.00794672966003418, -0.007571816444396973, -0.007196903228759766, -0.006821990013122559, -0.0064470767974853516, -0.0060721635818481445, -0.0056972503662109375, -0.0053223371505737305, -0.0049474239349365234, -0.004572510719299316, -0.004197597503662109, -0.0038226842880249023, -0.0034477710723876953, -0.0030728578567504883, -0.0026979446411132812, -0.0023230314254760742, -0.0019481182098388672, -0.0015732049942016602, -0.0011982917785644531, -0.0008233785629272461, -0.00044846534729003906, -7.355213165283203e-05, 0.000301361083984375, 0.000676274299621582, 0.001051187515258789, 0.001426100730895996, 0.0018010139465332031, 0.00217592716217041, 0.002550840377807617, 0.0029257535934448242, 0.0033006668090820312, 0.0036755800247192383, 0.004050493240356445, 0.004425406455993652, 0.004800319671630859, 0.005175232887268066, 0.0055501461029052734, 0.0059250593185424805, 0.0062999725341796875, 0.0066748857498168945, 0.0070497989654541016, 0.007424712181091309, 0.007799625396728516, 0.008174538612365723, 0.00854945182800293, 0.008924365043640137, 0.009299278259277344, 0.00967419147491455, 0.010049104690551758, 0.010424017906188965, 0.010798931121826172, 0.011173844337463379, 0.011548757553100586, 0.011923670768737793, 0.012298583984375]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 7.0, 3.0, 9.0, 15.0, 20.0, 16.0, 25.0, 23.0, 46.0, 51.0, 74.0, 43.0, 90.0, 90.0, 56.0, 80.0, 70.0, 63.0, 32.0, 34.0, 23.0, 18.0, 15.0, 23.0, 6.0, 13.0, 6.0, 12.0, 11.0, 6.0, 3.0, 1.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.304813385009766e-06, -5.143694579601288e-06, -4.98257577419281e-06, -4.821456968784332e-06, -4.6603381633758545e-06, -4.499219357967377e-06, -4.338100552558899e-06, -4.176981747150421e-06, -4.015862941741943e-06, -3.8547441363334656e-06, -3.693625330924988e-06, -3.53250652551651e-06, -3.3713877201080322e-06, -3.2102689146995544e-06, -3.0491501092910767e-06, -2.888031303882599e-06, -2.726912498474121e-06, -2.5657936930656433e-06, -2.4046748876571655e-06, -2.2435560822486877e-06, -2.08243727684021e-06, -1.921318471431732e-06, -1.7601996660232544e-06, -1.5990808606147766e-06, -1.4379620552062988e-06, -1.276843249797821e-06, -1.1157244443893433e-06, -9.546056389808655e-07, -7.934868335723877e-07, -6.323680281639099e-07, -4.7124922275543213e-07, -3.1013041734695435e-07, -1.4901161193847656e-07, 1.210719347000122e-08, 1.73225998878479e-07, 3.343448042869568e-07, 4.954636096954346e-07, 6.565824151039124e-07, 8.177012205123901e-07, 9.78820025920868e-07, 1.1399388313293457e-06, 1.3010576367378235e-06, 1.4621764421463013e-06, 1.623295247554779e-06, 1.7844140529632568e-06, 1.9455328583717346e-06, 2.1066516637802124e-06, 2.26777046918869e-06, 2.428889274597168e-06, 2.5900080800056458e-06, 2.7511268854141235e-06, 2.9122456908226013e-06, 3.073364496231079e-06, 3.234483301639557e-06, 3.3956021070480347e-06, 3.5567209124565125e-06, 3.7178397178649902e-06, 3.878958523273468e-06, 4.040077328681946e-06, 4.201196134090424e-06, 4.362314939498901e-06, 4.523433744907379e-06, 4.684552550315857e-06, 4.845671355724335e-06, 5.0067901611328125e-06]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 3.0, 4.0, 5.0, 4.0, 8.0, 5.0, 3.0, 8.0, 14.0, 16.0, 18.0, 37.0, 53.0, 89.0, 182.0, 380.0, 911.0, 2829.0, 15410.0, 872305.0, 143893.0, 8941.0, 2033.0, 691.0, 317.0, 155.0, 69.0, 52.0, 30.0, 22.0, 19.0, 14.0, 7.0, 4.0, 5.0, 7.0, 3.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.00756072998046875, -0.007330656051635742, -0.007100582122802734, -0.0068705081939697266, -0.006640434265136719, -0.006410360336303711, -0.006180286407470703, -0.005950212478637695, -0.0057201385498046875, -0.00549006462097168, -0.005259990692138672, -0.005029916763305664, -0.004799842834472656, -0.0045697689056396484, -0.004339694976806641, -0.004109621047973633, -0.003879547119140625, -0.003649473190307617, -0.0034193992614746094, -0.0031893253326416016, -0.0029592514038085938, -0.002729177474975586, -0.002499103546142578, -0.0022690296173095703, -0.0020389556884765625, -0.0018088817596435547, -0.0015788078308105469, -0.001348733901977539, -0.0011186599731445312, -0.0008885860443115234, -0.0006585121154785156, -0.0004284381866455078, -0.0001983642578125, 3.170967102050781e-05, 0.0002617835998535156, 0.0004918575286865234, 0.0007219314575195312, 0.0009520053863525391, 0.0011820793151855469, 0.0014121532440185547, 0.0016422271728515625, 0.0018723011016845703, 0.002102375030517578, 0.002332448959350586, 0.0025625228881835938, 0.0027925968170166016, 0.0030226707458496094, 0.003252744674682617, 0.003482818603515625, 0.003712892532348633, 0.003942966461181641, 0.0041730403900146484, 0.004403114318847656, 0.004633188247680664, 0.004863262176513672, 0.00509333610534668, 0.0053234100341796875, 0.005553483963012695, 0.005783557891845703, 0.006013631820678711, 0.006243705749511719, 0.0064737796783447266, 0.006703853607177734, 0.006933927536010742, 0.00716400146484375]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 5.0, 7.0, 6.0, 15.0, 19.0, 32.0, 42.0, 60.0, 105.0, 98.0, 124.0, 111.0, 100.0, 58.0, 56.0, 41.0, 27.0, 18.0, 18.0, 7.0, 10.0, 8.0, 6.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0086669921875, -0.008385658264160156, -0.008104324340820312, -0.007822990417480469, -0.007541656494140625, -0.007260322570800781, -0.0069789886474609375, -0.006697654724121094, -0.00641632080078125, -0.006134986877441406, -0.0058536529541015625, -0.005572319030761719, -0.005290985107421875, -0.005009651184082031, -0.0047283172607421875, -0.004446983337402344, -0.0041656494140625, -0.0038843154907226562, -0.0036029815673828125, -0.0033216476440429688, -0.003040313720703125, -0.0027589797973632812, -0.0024776458740234375, -0.0021963119506835938, -0.00191497802734375, -0.0016336441040039062, -0.0013523101806640625, -0.0010709762573242188, -0.000789642333984375, -0.0005083084106445312, -0.0002269744873046875, 5.435943603515625e-05, 0.000335693359375, 0.0006170272827148438, 0.0008983612060546875, 0.0011796951293945312, 0.001461029052734375, 0.0017423629760742188, 0.0020236968994140625, 0.0023050308227539062, 0.00258636474609375, 0.0028676986694335938, 0.0031490325927734375, 0.0034303665161132812, 0.003711700439453125, 0.003993034362792969, 0.0042743682861328125, 0.004555702209472656, 0.0048370361328125, 0.005118370056152344, 0.0053997039794921875, 0.005681037902832031, 0.005962371826171875, 0.006243705749511719, 0.0065250396728515625, 0.006806373596191406, 0.00708770751953125, 0.007369041442871094, 0.0076503753662109375, 0.007931709289550781, 0.008213043212890625, 0.008494377136230469, 0.008775711059570312, 0.009057044982910156, 0.00933837890625]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 9.0, 10.0, 35.0, 55.0, 116.0, 549.0, 101.0, 69.0, 32.0, 10.0, 10.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5484206080436707, -0.5348182916641235, -0.5212159752845764, -0.5076136589050293, -0.4940113425254822, -0.48040902614593506, -0.4668067395687103, -0.4532044231891632, -0.4396021068096161, -0.42599979043006897, -0.41239747405052185, -0.39879515767097473, -0.38519287109375, -0.3715905547142029, -0.35798823833465576, -0.34438592195510864, -0.3307836055755615, -0.3171812891960144, -0.3035789728164673, -0.28997665643692017, -0.27637434005737305, -0.2627720236778259, -0.2491697371006012, -0.23556742072105408, -0.22196510434150696, -0.20836278796195984, -0.19476047158241272, -0.1811581701040268, -0.16755585372447968, -0.15395353734493256, -0.14035123586654663, -0.1267489194869995, -0.11314657330513, -0.09954425692558289, -0.08594194799661636, -0.07233963906764984, -0.05873732268810272, -0.0451350063085556, -0.03153269737958908, -0.01793038845062256, -0.0043280720710754395, 0.009274240583181381, 0.022876553237438202, 0.03647886589169502, 0.05008117854595184, 0.06368349492549896, 0.07728580385446548, 0.090888112783432, 0.10449042916297913, 0.11809274554252625, 0.13169506192207336, 0.1452973634004593, 0.1588996797800064, 0.17250199615955353, 0.18610429763793945, 0.19970661401748657, 0.2133089303970337, 0.2269112467765808, 0.24051356315612793, 0.25411587953567505, 0.26771819591522217, 0.2813205122947693, 0.294922798871994, 0.30852511525154114, 0.32212743163108826]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 4.0, 5.0, 5.0, 15.0, 12.0, 9.0, 10.0, 12.0, 25.0, 17.0, 18.0, 19.0, 26.0, 29.0, 30.0, 84.0, 295.0, 127.0, 37.0, 35.0, 27.0, 20.0, 21.0, 19.0, 17.0, 16.0, 11.0, 10.0, 8.0, 6.0, 9.0, 6.0, 4.0, 4.0, 4.0, 4.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.2867671847343445, -0.2784845232963562, -0.2702018618583679, -0.26191920042037964, -0.25363653898239136, -0.24535387754440308, -0.2370712012052536, -0.22878853976726532, -0.22050587832927704, -0.21222321689128876, -0.20394055545330048, -0.1956578940153122, -0.18737521767616272, -0.17909255623817444, -0.17080989480018616, -0.16252723336219788, -0.1542445719242096, -0.1459619104862213, -0.13767924904823303, -0.12939658761024475, -0.12111391872167587, -0.11283125728368759, -0.10454858839511871, -0.09626592695713043, -0.08798326551914215, -0.07970060408115387, -0.07141794264316559, -0.06313527375459671, -0.05485261231660843, -0.04656995087862015, -0.03828728571534157, -0.03000462055206299, -0.021721959114074707, -0.013439295813441277, -0.005156632512807846, 0.0031260307878255844, 0.011408694088459015, 0.019691355526447296, 0.027974020689725876, 0.036256685853004456, 0.04453934729099274, 0.05282200872898102, 0.0611046738922596, 0.06938733905553818, 0.07767000049352646, 0.08595266193151474, 0.09423533082008362, 0.1025179922580719, 0.11080065369606018, 0.11908331513404846, 0.12736597657203674, 0.13564863801002502, 0.1439312994480133, 0.1522139608860016, 0.16049663722515106, 0.16877929866313934, 0.17706196010112762, 0.1853446215391159, 0.1936272829771042, 0.20190994441509247, 0.21019262075424194, 0.21847528219223022, 0.2267579436302185, 0.2350406050682068, 0.24332326650619507]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 9.0, 9.0, 21.0, 26.0, 22.0, 34.0, 47.0, 72.0, 139.0, 322.0, 71.0, 49.0, 44.0, 30.0, 28.0, 23.0, 16.0, 15.0, 5.0, 4.0, 2.0, 1.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.027984619140625, -0.02709031105041504, -0.026196002960205078, -0.025301694869995117, -0.024407386779785156, -0.023513078689575195, -0.022618770599365234, -0.021724462509155273, -0.020830154418945312, -0.01993584632873535, -0.01904153823852539, -0.01814723014831543, -0.01725292205810547, -0.016358613967895508, -0.015464305877685547, -0.014569997787475586, -0.013675689697265625, -0.012781381607055664, -0.011887073516845703, -0.010992765426635742, -0.010098457336425781, -0.00920414924621582, -0.00830984115600586, -0.0074155330657958984, -0.0065212249755859375, -0.0056269168853759766, -0.004732608795166016, -0.0038383007049560547, -0.0029439926147460938, -0.002049684524536133, -0.0011553764343261719, -0.00026106834411621094, 0.00063323974609375, 0.001527547836303711, 0.002421855926513672, 0.003316164016723633, 0.004210472106933594, 0.005104780197143555, 0.005999088287353516, 0.0068933963775634766, 0.0077877044677734375, 0.008682012557983398, 0.00957632064819336, 0.01047062873840332, 0.011364936828613281, 0.012259244918823242, 0.013153553009033203, 0.014047861099243164, 0.014942169189453125, 0.015836477279663086, 0.016730785369873047, 0.017625093460083008, 0.01851940155029297, 0.01941370964050293, 0.02030801773071289, 0.02120232582092285, 0.022096633911132812, 0.022990942001342773, 0.023885250091552734, 0.024779558181762695, 0.025673866271972656, 0.026568174362182617, 0.027462482452392578, 0.02835679054260254, 0.0292510986328125]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [8.0, 4.0, 1.0, 0.0, 10.0, 5.0, 4.0, 2.0, 1.0, 7.0, 2.0, 2.0, 5.0, 5.0, 6.0, 12.0, 9.0, 9.0, 18.0, 33.0, 44.0, 71.0, 87.0, 110.0, 222.0, 475.0, 1354.0, 6448.0, 8323235.0, 51027.0, 3505.0, 927.0, 383.0, 200.0, 92.0, 68.0, 37.0, 23.0, 32.0, 49.0, 11.0, 5.0, 8.0, 5.0, 8.0, 6.0, 7.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 6.0, 5.0], "bins": [-0.10351169109344482, -0.09992292523384094, -0.09633415937423706, -0.09274539351463318, -0.0891566276550293, -0.08556786179542542, -0.08197909593582153, -0.07839033007621765, -0.07480156421661377, -0.07121279835700989, -0.067624032497406, -0.06403526663780212, -0.06044650077819824, -0.05685773491859436, -0.05326896905899048, -0.0496802031993866, -0.04609144106507301, -0.04250267520546913, -0.03891390934586525, -0.03532514348626137, -0.031736377626657486, -0.028147613629698753, -0.02455884777009487, -0.02097008191049099, -0.017381316050887108, -0.013792550191283226, -0.010203784331679344, -0.006615019403398037, -0.003026253543794155, 0.0005625113844871521, 0.004151277244091034, 0.007740043103694916, 0.011328808963298798, 0.01491757482290268, 0.01850634068250656, 0.022095106542110443, 0.025683872401714325, 0.029272636398673058, 0.03286140412092209, 0.03645016998052597, 0.04003893584012985, 0.043627701699733734, 0.047216467559337616, 0.0508052334189415, 0.05439399927854538, 0.05798276513814926, 0.06157153099775314, 0.06516029685735703, 0.06874905526638031, 0.07233782112598419, 0.07592658698558807, 0.07951535284519196, 0.08310411870479584, 0.08669288456439972, 0.0902816504240036, 0.09387041628360748, 0.09745918214321136, 0.10104794800281525, 0.10463671386241913, 0.10822547972202301, 0.11181424558162689, 0.11540301144123077, 0.11899177730083466, 0.12258054316043854, 0.12616930902004242]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 2.0, 4.0, 3.0, 4.0, 4.0, 6.0, 7.0, 5.0, 6.0, 3.0, 3.0, 2.0, 7.0, 6.0, 5.0, 2.0, 4.0, 2.0, 3.0, 3.0, 6.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.06511147320270538, -0.06344001740217209, -0.061768557876348495, -0.0600970983505249, -0.05842564254999161, -0.056754183024168015, -0.05508272349834442, -0.05341126769781113, -0.05173981189727783, -0.05006835237145424, -0.048396896570920944, -0.04672543704509735, -0.045053981244564056, -0.04338252171874046, -0.04171106219291687, -0.040039606392383575, -0.03836814686655998, -0.03669668734073639, -0.035025231540203094, -0.0333537720143795, -0.03168231621384621, -0.030010856688022614, -0.02833939902484417, -0.026667941361665726, -0.024996483698487282, -0.023325026035308838, -0.021653568372130394, -0.01998211070895195, -0.018310651183128357, -0.016639195382595062, -0.014967735856771469, -0.013296278193593025, -0.01162482425570488, -0.009953366592526436, -0.008281908929347992, -0.006610450334846973, -0.0049389926716685295, -0.0032675350084900856, -0.001596076413989067, 7.538124918937683e-05, 0.0017468389123678207, 0.0034182968083769083, 0.005089754704385996, 0.006761212833225727, 0.008432670496404171, 0.010104128159582615, 0.011775586754083633, 0.013447044417262077, 0.015118502080440521, 0.016789959743618965, 0.01846141740679741, 0.020132876932621002, 0.021804332733154297, 0.02347579225897789, 0.025147249922156334, 0.026818707585334778, 0.028490165248513222, 0.030161622911691666, 0.03183308243751526, 0.03350453823804855, 0.03517599776387215, 0.03684745356440544, 0.038518913090229034, 0.04019036889076233, 0.04186182841658592]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 2.0, 3.0, 1.0, 6.0, 3.0, 8.0, 10.0, 14.0, 11.0, 17.0, 26.0, 30.0, 40.0, 55.0, 71.0, 96.0, 153.0, 210.0, 398.0, 912.0, 2851.0, 12059.0, 71552.0, 317910.0, 96279.0, 15516.0, 3515.0, 1206.0, 526.0, 282.0, 180.0, 93.0, 73.0, 45.0, 39.0, 28.0, 12.0, 9.0, 11.0, 7.0, 7.0, 1.0, 7.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12115478515625, -0.11719226837158203, -0.11322975158691406, -0.1092672348022461, -0.10530471801757812, -0.10134220123291016, -0.09737968444824219, -0.09341716766357422, -0.08945465087890625, -0.08549213409423828, -0.08152961730957031, -0.07756710052490234, -0.07360458374023438, -0.0696420669555664, -0.06567955017089844, -0.06171703338623047, -0.0577545166015625, -0.05379199981689453, -0.04982948303222656, -0.045866966247558594, -0.041904449462890625, -0.037941932678222656, -0.03397941589355469, -0.03001689910888672, -0.02605438232421875, -0.02209186553955078, -0.018129348754882812, -0.014166831970214844, -0.010204315185546875, -0.006241798400878906, -0.0022792816162109375, 0.0016832351684570312, 0.005645751953125, 0.009608268737792969, 0.013570785522460938, 0.017533302307128906, 0.021495819091796875, 0.025458335876464844, 0.029420852661132812, 0.03338336944580078, 0.03734588623046875, 0.04130840301513672, 0.04527091979980469, 0.049233436584472656, 0.053195953369140625, 0.057158470153808594, 0.06112098693847656, 0.06508350372314453, 0.0690460205078125, 0.07300853729248047, 0.07697105407714844, 0.0809335708618164, 0.08489608764648438, 0.08885860443115234, 0.09282112121582031, 0.09678363800048828, 0.10074615478515625, 0.10470867156982422, 0.10867118835449219, 0.11263370513916016, 0.11659622192382812, 0.1205587387084961, 0.12452125549316406, 0.12848377227783203, 0.1324462890625]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 5.0, 9.0, 22.0, 28.0, 66.0, 102.0, 159.0, 196.0, 184.0, 99.0, 56.0, 38.0, 14.0, 7.0, 6.0, 6.0, 7.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07122802734375, -0.06922435760498047, -0.06722068786621094, -0.0652170181274414, -0.06321334838867188, -0.061209678649902344, -0.05920600891113281, -0.05720233917236328, -0.05519866943359375, -0.05319499969482422, -0.05119132995605469, -0.049187660217285156, -0.047183990478515625, -0.045180320739746094, -0.04317665100097656, -0.04117298126220703, -0.0391693115234375, -0.03716564178466797, -0.03516197204589844, -0.033158302307128906, -0.031154632568359375, -0.029150962829589844, -0.027147293090820312, -0.02514362335205078, -0.02313995361328125, -0.02113628387451172, -0.019132614135742188, -0.017128944396972656, -0.015125274658203125, -0.013121604919433594, -0.011117935180664062, -0.009114265441894531, -0.007110595703125, -0.005106925964355469, -0.0031032562255859375, -0.0010995864868164062, 0.000904083251953125, 0.0029077529907226562, 0.0049114227294921875, 0.006915092468261719, 0.00891876220703125, 0.010922431945800781, 0.012926101684570312, 0.014929771423339844, 0.016933441162109375, 0.018937110900878906, 0.020940780639648438, 0.02294445037841797, 0.0249481201171875, 0.02695178985595703, 0.028955459594726562, 0.030959129333496094, 0.032962799072265625, 0.034966468811035156, 0.03697013854980469, 0.03897380828857422, 0.04097747802734375, 0.04298114776611328, 0.04498481750488281, 0.046988487243652344, 0.048992156982421875, 0.050995826721191406, 0.05299949645996094, 0.05500316619873047, 0.0570068359375]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 7.0, 14.0, 17.0, 48.0, 154.0, 164.0, 37.0, 17.0, 11.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2051418423652649, -0.19796115159988403, -0.19078047573566437, -0.1835997849702835, -0.17641909420490265, -0.16923841834068298, -0.16205772757530212, -0.15487703680992126, -0.1476963460445404, -0.14051565527915955, -0.13333497941493988, -0.12615428864955902, -0.11897359788417816, -0.1117929145693779, -0.10461223125457764, -0.09743154048919678, -0.09025085717439651, -0.08307017385959625, -0.0758894830942154, -0.06870879977941513, -0.06152810901403427, -0.05434742569923401, -0.04716673865914345, -0.03998605161905289, -0.032805364578962326, -0.025624677538871765, -0.018443990498781204, -0.011263305321335793, -0.004082618281245232, 0.00309806689620018, 0.010278753936290741, 0.017459440976381302, 0.024640128016471863, 0.031820815056562424, 0.039001502096652985, 0.04618218541145325, 0.053362876176834106, 0.06054355949163437, 0.06772424280643463, 0.07490493357181549, 0.08208562433719635, 0.08926630765199661, 0.09644699841737747, 0.10362768173217773, 0.1108083724975586, 0.11798905581235886, 0.12516973912715912, 0.13235042989253998, 0.13953110575675964, 0.1467117965221405, 0.15389247238636017, 0.16107316315174103, 0.1682538539171219, 0.17543452978134155, 0.1826152205467224, 0.18979591131210327, 0.19697660207748413, 0.204157292842865, 0.21133796870708466, 0.21851865947246552, 0.22569935023784637, 0.23288002610206604, 0.2400607168674469, 0.24724140763282776, 0.2544220983982086]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 4.0, 3.0, 1.0, 5.0, 5.0, 9.0, 9.0, 18.0, 25.0, 62.0, 138.0, 106.0, 41.0, 12.0, 9.0, 7.0, 9.0, 4.0, 0.0, 1.0, 5.0, 1.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.22547274827957153, -0.21915610134601593, -0.21283945441246033, -0.20652280747890472, -0.20020616054534912, -0.19388951361179352, -0.18757286667823792, -0.1812562197446823, -0.1749395728111267, -0.1686229258775711, -0.1623062789440155, -0.1559896320104599, -0.1496729850769043, -0.1433563381433487, -0.1370396912097931, -0.1307230442762375, -0.12440639734268188, -0.11808975040912628, -0.11177310347557068, -0.10545645654201508, -0.09913980960845947, -0.09282316267490387, -0.08650651574134827, -0.08018986880779266, -0.07387322187423706, -0.06755657494068146, -0.061239928007125854, -0.05492328107357025, -0.04860663414001465, -0.042289987206459045, -0.03597334027290344, -0.02965669333934784, -0.023340046405792236, -0.017023399472236633, -0.01070675253868103, -0.004390105605125427, 0.0019265413284301758, 0.008243188261985779, 0.014559835195541382, 0.020876482129096985, 0.027193129062652588, 0.03350977599620819, 0.039826422929763794, 0.0461430698633194, 0.052459716796875, 0.0587763637304306, 0.0650930106639862, 0.07140965759754181, 0.07772630453109741, 0.08404295146465302, 0.09035959839820862, 0.09667624533176422, 0.10299289226531982, 0.10930953919887543, 0.11562618613243103, 0.12194283306598663, 0.12825947999954224, 0.13457612693309784, 0.14089277386665344, 0.14720942080020905, 0.15352606773376465, 0.15984271466732025, 0.16615936160087585, 0.17247600853443146, 0.17879265546798706]}, "eval/loss": 5.36206579208374, "eval/wer": 1.9585878619595398, "eval/runtime": 955.9194, "eval/samples_per_second": 2.764, "eval/steps_per_second": 0.346} \ No newline at end of file diff --git a/wandb/run-20220302_085255-16llzpbl/logs/debug-internal.log b/wandb/run-20220302_085255-16llzpbl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..df5e0316618f7ee9d25cfba2775d8ad1b1148468 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/logs/debug-internal.log @@ -0,0 +1,6416 @@ +2022-03-02 08:52:56,323 INFO MainThread:254666 [internal.py:wandb_internal():89] W&B internal server running at pid: 254666, started at: 2022-03-02 08:52:56.323393 +2022-03-02 08:52:56,326 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: check_version +2022-03-02 08:52:56,326 INFO WriterThread:254666 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb +2022-03-02 08:52:56,327 DEBUG SenderThread:254666 [sender.py:send():235] send: header +2022-03-02 08:52:56,327 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: check_version +2022-03-02 08:52:56,394 DEBUG SenderThread:254666 [sender.py:send():235] send: run +2022-03-02 08:52:56,522 INFO SenderThread:254666 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files +2022-03-02 08:52:56,522 INFO SenderThread:254666 [sender.py:_start_run_threads():809] run started: 16llzpbl with start time 1646211175 +2022-03-02 08:52:56,522 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:52:56,522 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:52:56,523 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: run_start +2022-03-02 08:52:56,527 DEBUG HandlerThread:254666 [meta.py:__init__():36] meta init +2022-03-02 08:52:56,527 DEBUG HandlerThread:254666 [meta.py:__init__():50] meta init done +2022-03-02 08:52:56,527 DEBUG HandlerThread:254666 [meta.py:probe():210] probe +2022-03-02 08:52:56,534 DEBUG HandlerThread:254666 [meta.py:_setup_git():200] setup git +2022-03-02 08:52:56,548 DEBUG HandlerThread:254666 [meta.py:_setup_git():207] setup git done +2022-03-02 08:52:56,548 DEBUG HandlerThread:254666 [meta.py:_save_pip():54] save pip +2022-03-02 08:52:56,549 DEBUG HandlerThread:254666 [meta.py:_save_pip():68] save pip done +2022-03-02 08:52:56,549 DEBUG HandlerThread:254666 [meta.py:probe():248] probe done +2022-03-02 08:52:56,626 DEBUG SenderThread:254666 [sender.py:send():235] send: files +2022-03-02 08:52:56,626 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-03-02 08:52:56,631 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:52:56,631 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:52:56,667 DEBUG SenderThread:254666 [sender.py:send():235] send: config +2022-03-02 08:52:56,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:52:56,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:52:56,668 WARNING SenderThread:254666 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-03-02 08:52:56,881 INFO Thread-11 :254666 [upload_job.py:push():137] Uploaded file /tmp/tmpaz1pijzmwandb/21dee3ob-wandb-metadata.json +2022-03-02 08:52:57,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-metadata.json +2022-03-02 08:52:57,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:52:57,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:52:57,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/requirements.txt +2022-03-02 08:52:59,523 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:00,828 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:53:00,829 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:53:00,829 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:53:00,829 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:00,829 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:00,830 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:01,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:01,524 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:03,525 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:04,543 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:04,543 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:04,543 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:05,525 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:05,526 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:07,526 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:08,321 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:08,322 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:08,322 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:08,526 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:09,527 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:11,527 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:12,052 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:53:12,053 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:53:12,085 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:12,085 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:12,085 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:12,528 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:13,528 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:15,529 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:15,717 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:15,717 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:15,717 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:16,529 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:17,530 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:19,367 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:19,367 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:19,367 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:19,530 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:19,531 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:21,531 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:22,977 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:22,978 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:22,979 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:23,532 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:23,532 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:24,977 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:53:25,532 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:26,565 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:26,706 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:26,706 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:27,243 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:53:27,243 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:53:27,533 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/config.yaml +2022-03-02 08:53:27,533 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:27,533 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:29,534 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:30,097 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:30,098 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:30,098 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:30,534 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:31,534 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:33,535 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:33,605 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:33,605 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:33,606 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:34,535 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:35,536 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:36,536 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:37,101 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:37,101 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:37,102 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:37,536 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:38,537 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:40,538 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:40,610 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:40,611 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:40,611 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:41,538 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:42,389 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:53:42,389 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:53:42,538 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:44,097 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:44,097 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:44,098 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:44,539 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:44,539 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:46,539 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:47,550 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:47,550 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:47,550 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:48,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:48,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:50,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:50,997 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:50,998 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:50,998 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:51,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:52,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:54,410 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:54,410 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:54,411 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:54,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:54,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:55,332 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:53:56,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:53:57,554 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:53:57,555 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:53:57,768 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:53:57,768 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:53:57,769 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:53:58,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:53:58,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:00,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:01,089 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:01,090 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:01,090 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:01,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:02,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:04,426 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:04,426 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:04,427 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:04,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:04,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:06,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:07,744 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:07,744 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:07,745 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:08,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:08,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:10,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:11,084 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:11,085 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:11,085 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:11,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:12,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:12,602 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:54:12,603 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:54:14,413 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:14,413 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:14,415 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:14,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:15,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:16,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:17,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:17,676 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:17,677 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:17,677 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:18,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:18,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:19,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:20,910 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:20,910 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:20,911 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:21,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:21,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:22,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:23,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:24,124 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:24,124 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:24,125 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:24,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:25,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:25,675 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:54:26,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:27,331 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:27,331 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:27,332 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:27,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:27,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:27,700 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:54:27,701 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:54:28,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:29,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:30,525 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:30,525 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:30,525 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:30,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:31,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:32,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:33,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:33,707 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:33,707 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:33,708 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:34,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:35,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:36,868 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:36,869 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:36,869 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:37,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:37,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:39,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:40,034 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:40,035 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:40,035 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:40,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:41,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:42,935 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:54:42,936 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:54:43,174 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:43,174 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:43,175 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:43,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:43,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:45,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:46,211 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:46,211 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:46,211 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:46,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:47,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:49,280 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:49,280 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:49,281 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:49,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:49,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:51,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:52,135 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:52,135 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:52,135 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:52,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:53,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:54,994 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:54,995 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:54,996 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:55,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:55,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:56,046 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:54:57,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:54:57,850 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:54:57,851 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:54:57,851 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:54:58,042 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:54:58,043 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:54:58,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:54:59,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:00,610 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:00,611 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:00,611 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:01,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:01,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:02,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:03,364 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:03,365 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:03,365 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:03,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:04,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:05,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:06,016 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:06,016 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:06,017 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:06,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:06,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:08,558 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:08,558 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:08,559 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:08,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:08,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:10,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:11,038 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:11,038 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:11,039 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:11,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:12,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:13,233 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:55:13,234 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:55:13,418 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:13,418 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:13,418 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:13,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:14,570 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:15,675 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:15,676 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:15,676 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:16,570 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:16,571 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:17,767 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:17,767 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:17,768 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:18,571 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:18,571 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:19,682 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:19,682 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:19,683 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:20,572 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:20,572 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:21,375 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:21,375 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:21,376 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:21,572 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:22,572 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:22,894 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:22,894 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:22,895 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:23,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:24,262 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:24,263 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:24,263 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:24,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:24,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:25,464 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:25,464 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:25,465 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:25,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:26,395 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:55:26,574 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:27,336 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,341 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,341 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,341 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,341 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,342 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,342 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,347 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,347 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,352 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,357 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,363 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,368 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,373 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,378 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,384 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,389 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,389 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,389 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,389 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,397 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,397 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,397 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,397 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,398 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,404 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,405 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,405 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,410 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,410 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,410 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,410 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,415 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,421 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,426 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,427 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,427 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,427 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,427 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,432 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,432 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,437 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,438 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,443 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,443 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,443 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,591 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/config.yaml +2022-03-02 08:55:27,591 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,592 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,593 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,594 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,595 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,596 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,597 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,598 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,599 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,600 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,601 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,602 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,603 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,604 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,605 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,606 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,607 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,608 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,609 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,610 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,611 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,612 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,613 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,614 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,615 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,616 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,617 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,618 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,619 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,620 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,621 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,622 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,623 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,624 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,625 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,627 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,628 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,629 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,630 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,631 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,632 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,633 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,634 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,635 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,636 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,637 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,638 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,639 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,640 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,641 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,642 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,643 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,644 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,645 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,646 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,647 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,648 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,649 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,650 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,651 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,652 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,653 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,654 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,655 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,656 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,657 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,658 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,659 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,660 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,661 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,662 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,663 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,664 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,665 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,666 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,667 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,668 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,669 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,670 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,671 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,672 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,673 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,674 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,675 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,676 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,677 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,678 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,679 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,680 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,681 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,682 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,683 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,684 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,685 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,686 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,687 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,688 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,689 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,690 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,691 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,692 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,693 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,694 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,695 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,696 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,697 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,698 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,699 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,700 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,701 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,702 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,703 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,704 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,705 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,706 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,707 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,708 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,709 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,710 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,711 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,712 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,713 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,715 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,716 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,717 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,718 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,719 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,720 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,721 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,722 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,723 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,724 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,725 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,726 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,727 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,728 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,729 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,730 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,731 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,732 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,733 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,734 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,735 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,736 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,737 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,738 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,739 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,740 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,741 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,742 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,743 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,744 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 08:55:27,745 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:27,832 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:27,917 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:28,274 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:55:28,277 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:55:28,592 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:28,592 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:30,592 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:31,051 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:31,121 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:31,207 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:31,592 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:32,593 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:34,593 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:34,752 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:34,803 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:34,887 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:35,594 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:36,594 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:38,376 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:38,428 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:38,508 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:38,595 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:38,595 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:40,596 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:41,990 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:42,042 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:42,126 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:42,596 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:42,596 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:43,314 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:55:43,315 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:55:44,597 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:45,561 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:45,611 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:45,694 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:46,617 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:46,617 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:48,618 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:49,060 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:49,113 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:49,197 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:49,618 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:50,618 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:52,619 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:52,643 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:52,693 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:52,774 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:53,619 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:54,620 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:56,173 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:56,223 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:56,313 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:55:56,620 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:55:56,943 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:55:57,621 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:58,440 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:55:58,682 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:58,774 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:55:59,651 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:55:59,702 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:55:59,718 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/config.yaml +2022-03-02 08:55:59,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:55:59,787 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:00,708 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:00,709 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:01,709 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:03,175 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:03,226 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:03,307 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:03,709 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:03,710 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:04,710 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:05,710 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:06,589 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:06,647 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:06,730 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:07,728 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:07,728 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:08,728 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:09,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:09,986 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:10,038 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:10,117 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:10,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:10,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:11,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:13,377 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:13,430 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:13,510 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:13,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:13,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:13,985 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:56:13,986 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:56:14,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:15,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:16,760 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:16,820 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:16,903 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:17,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:17,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:18,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:19,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:20,142 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:20,192 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:20,274 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:20,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:21,734 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:23,505 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:23,556 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:23,638 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:23,734 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:23,735 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:25,735 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:26,735 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:26,868 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:26,928 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:27,012 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:27,448 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:56:27,736 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:28,736 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:29,111 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:56:29,111 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:56:29,736 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:30,134 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:30,187 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:30,272 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:30,737 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:30,737 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:31,737 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:32,737 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:33,425 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:33,476 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:33,558 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:33,738 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:34,738 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:35,739 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:36,727 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:36,780 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:36,780 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:36,864 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:37,780 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:38,781 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:39,781 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:40,045 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:40,099 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:40,185 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:40,781 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:40,782 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:41,782 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:42,782 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:43,322 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:43,377 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:43,459 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:43,782 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:44,184 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:56:44,185 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:56:44,783 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:45,783 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:46,581 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:46,635 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:46,744 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:46,784 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:46,784 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:47,784 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:48,784 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:49,813 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:49,867 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:49,952 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:50,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:50,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:51,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:52,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:53,037 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:53,088 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:53,169 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:53,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:54,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:55,787 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:56,131 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:56,180 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:56,261 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:56,787 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:56:56,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:57,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:58,098 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:56:58,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:56:59,210 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:56:59,259 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:56:59,341 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:56:59,346 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:56:59,348 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:56:59,789 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:00,789 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:01,789 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:02,300 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:02,352 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:02,431 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:02,790 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:02,790 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:03,790 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:04,790 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:05,356 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:05,408 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:05,486 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:05,791 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:06,791 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:07,791 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:08,386 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:08,437 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:08,524 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:08,792 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:08,792 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:09,792 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:10,792 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:11,391 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:11,445 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:11,527 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:11,793 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:11,793 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:13,793 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:14,308 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:14,362 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:14,443 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:14,634 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:57:14,636 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:57:14,794 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:15,794 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:17,248 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:17,300 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:17,382 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:17,794 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:17,795 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:19,795 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:20,101 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:20,154 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:20,241 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:20,795 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:21,796 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:22,916 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:22,967 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:23,049 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:23,797 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:23,797 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:24,797 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:25,702 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:25,755 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:25,839 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:25,841 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:26,839 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:26,840 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:27,840 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:28,455 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:28,512 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:28,597 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:28,731 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:57:28,840 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:29,841 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:29,989 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:57:29,989 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:57:30,841 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:31,123 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:31,176 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:31,259 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:31,841 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:31,842 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:32,842 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:33,679 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:33,732 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:33,815 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:33,842 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:33,842 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:34,842 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:35,843 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:36,194 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:36,245 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:36,330 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:36,843 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:37,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:38,551 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:38,604 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:38,724 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:38,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:38,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:39,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:40,771 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:40,822 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:40,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:40,907 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:41,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:41,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:42,887 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:42,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:42,939 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:43,023 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:43,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:43,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:44,755 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:44,808 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:44,894 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:44,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:44,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:45,167 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:57:45,169 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:57:45,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:46,462 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:46,515 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:46,599 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:46,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:46,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:47,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:48,055 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:48,108 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:48,192 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:48,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:48,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:49,493 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:49,547 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:49,638 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:49,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:49,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:50,761 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:50,815 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:50,900 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:50,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:50,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:51,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:51,962 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:52,008 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:52,116 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:52,943 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:52,943 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:53,742 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:53,915 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:53,998 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:53,999 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:54,998 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:54,998 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:55,998 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:57,430 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:57:57,484 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:57:57,567 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:57:57,999 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:57:59,000 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:57:59,619 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:58:00,253 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:58:00,254 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:58:01,000 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:01,071 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:01,124 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:01,209 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:02,001 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:03,001 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:04,676 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:04,721 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:04,804 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:05,002 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:05,002 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:07,002 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:08,263 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:08,320 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:08,403 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:09,003 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:09,003 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:11,004 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:11,810 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:11,859 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:11,963 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:12,004 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:13,005 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:15,005 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:15,300 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:58:15,302 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:58:15,398 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:15,399 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:15,483 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:16,006 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:17,006 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:18,791 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:18,842 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:18,925 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:19,007 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:19,007 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:21,007 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:22,325 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:22,370 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:22,454 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:23,008 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:23,008 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:25,009 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:25,788 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:25,838 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:25,917 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:26,009 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:27,009 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:29,010 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:29,333 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:29,383 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:29,485 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:30,010 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:30,124 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:58:30,668 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:58:30,670 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:58:31,011 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:32,807 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:32,859 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:32,940 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:33,011 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:33,012 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:35,012 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:36,012 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:36,220 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:36,274 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:36,358 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:37,013 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:37,013 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:38,013 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:39,013 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:39,588 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:39,640 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:39,719 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:40,014 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:41,014 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:42,015 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:43,008 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:43,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:43,062 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:43,141 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:44,060 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:45,060 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:45,723 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:58:45,724 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:58:46,061 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:46,315 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:46,367 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:46,447 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:47,061 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:48,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:49,560 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:49,611 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:49,692 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:50,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:50,063 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:52,063 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:52,862 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:52,913 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:52,993 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:53,063 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:54,064 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:56,064 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:56,082 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:56,132 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:56,214 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:58:57,065 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:58:58,065 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:58:59,370 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:58:59,420 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:58:59,498 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:00,066 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:00,066 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:00,735 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:59:00,772 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:59:00,774 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:59:02,067 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:02,587 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:02,640 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:02,721 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:03,067 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:04,067 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:05,823 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:05,875 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:05,956 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:06,068 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:06,068 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:08,069 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:09,097 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:09,149 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:09,229 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:10,069 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:10,070 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:12,070 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:12,307 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:12,362 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:12,447 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:13,071 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:14,071 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:15,579 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:15,630 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:15,711 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:15,861 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:59:15,863 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:59:16,072 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:16,072 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:18,072 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:18,725 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:18,776 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:18,880 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:19,073 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:20,073 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:21,073 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:21,875 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:21,926 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:22,008 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:22,074 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:22,074 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:23,074 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:24,074 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:24,996 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:25,047 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:25,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:25,127 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:26,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:27,127 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:28,092 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:28,143 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:28,153 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:28,225 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:29,143 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:29,144 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:30,144 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:31,002 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:59:31,003 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:59:31,178 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:31,229 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:31,329 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:31,363 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 08:59:32,144 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:32,145 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:33,145 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:34,145 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:34,190 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:34,238 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:34,318 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:35,145 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:35,146 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:37,146 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:37,190 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:37,242 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:37,322 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:38,146 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:39,147 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:40,180 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:40,232 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:40,315 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:41,147 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:41,148 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:43,135 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:43,186 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:43,191 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:43,302 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:44,186 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:45,186 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:45,951 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:46,022 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:46,100 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:46,163 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 08:59:46,164 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 08:59:46,187 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:47,187 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:48,825 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:48,877 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:48,957 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:49,188 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:49,188 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:51,188 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:51,632 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:51,683 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:51,765 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:52,189 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:53,189 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:54,359 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:54,411 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:54,491 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:55,190 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:55,190 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:57,057 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:57,134 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:57,213 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:57,215 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 08:59:58,214 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 08:59:59,214 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 08:59:59,639 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 08:59:59,691 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 08:59:59,775 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:00,214 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:01,215 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:01,417 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:00:01,419 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:00:01,941 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:00:02,133 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:02,187 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:02,273 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:03,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:03,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:04,532 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:04,585 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:04,666 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:05,273 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:05,273 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:06,787 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:06,837 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:06,914 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:07,273 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:07,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:08,942 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:08,992 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:09,072 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:09,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:09,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:10,897 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:10,944 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:11,049 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:11,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:11,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:12,780 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:12,832 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:12,914 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:13,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:13,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:14,479 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:14,533 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:14,616 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:15,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:15,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:15,968 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:16,021 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:16,103 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:16,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:16,277 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:16,481 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:00:16,483 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:00:17,277 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:17,323 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:17,376 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:17,458 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:18,277 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:18,277 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:18,547 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:18,599 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:18,676 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:19,277 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:19,278 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:20,269 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:20,380 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:20,428 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:20,504 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:21,317 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:21,317 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:23,317 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:23,959 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:24,010 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:24,090 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:24,318 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:25,318 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:27,319 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:27,578 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:27,626 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:27,705 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:28,319 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:29,319 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:30,320 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:31,213 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:31,262 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:31,338 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:31,339 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:31,619 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:00:31,620 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:00:32,338 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:32,719 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:00:33,338 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:34,339 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:34,785 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:34,836 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:34,914 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:35,339 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:35,339 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:36,339 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:38,345 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:38,381 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:38,397 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:38,476 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:39,371 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:40,372 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:41,890 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:41,939 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:42,018 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:42,372 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:42,373 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:44,373 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:45,444 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:45,493 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:45,570 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:46,374 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:46,374 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:46,782 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:00:46,784 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:00:48,374 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:48,979 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:49,031 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:49,112 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:49,375 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:50,375 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:52,376 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:52,483 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:52,531 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:52,609 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:53,376 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:54,377 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:55,953 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:56,002 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:56,081 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:00:56,377 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:00:56,378 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:58,378 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:00:59,413 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:00:59,464 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:00:59,544 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:00,379 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:00,379 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:01,893 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:01:01,894 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:01:02,380 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:02,823 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:02,874 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:02,951 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:03,116 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:01:03,380 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:04,380 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:06,193 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:06,242 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:06,321 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:06,381 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:06,381 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:08,382 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:09,549 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:09,604 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:09,686 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:10,382 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:10,383 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:12,383 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:12,940 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:12,989 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:13,065 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:13,383 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:14,384 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:16,289 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:16,340 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:16,414 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:16,416 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:16,969 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:01:16,970 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:01:17,415 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:18,415 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:19,611 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:19,659 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:19,742 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:20,416 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:20,416 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:22,416 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:22,906 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:22,955 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:23,033 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:23,416 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:24,417 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:26,182 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:26,231 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:26,306 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:26,417 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:26,418 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:29,418 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:29,501 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:29,551 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:29,627 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:30,419 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:31,419 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:32,045 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:01:32,046 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:01:32,755 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:32,805 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:32,887 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:33,420 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:33,420 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:33,724 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:01:35,420 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:36,022 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:36,073 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:36,150 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:36,421 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:37,421 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:39,305 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:39,355 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:39,436 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:39,438 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:40,437 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:41,437 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:42,494 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:42,547 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:42,629 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:43,438 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:43,438 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:45,438 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:45,687 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:45,737 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:45,856 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:46,439 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:47,123 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:01:47,124 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:01:47,439 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:48,783 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:48,834 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:48,914 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:49,440 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:49,440 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:51,440 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:51,942 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:51,991 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:52,071 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:52,441 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:53,441 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:55,041 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:55,093 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:55,172 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:55,442 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:55,442 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:57,443 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:01:58,072 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:01:58,144 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:01:58,220 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:01:58,443 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:01:59,443 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:01,173 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:01,223 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:01,299 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:01,444 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:01,444 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:02,320 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:02:02,321 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:02:03,445 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:04,240 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:04,290 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:04,390 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:04,424 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:02:04,445 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:05,445 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:07,217 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:07,267 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:07,341 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:07,446 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:07,446 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:09,446 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:10,181 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:10,224 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:10,303 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:10,447 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:11,447 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:13,009 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:13,059 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:13,139 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:13,448 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:13,448 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:15,448 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:15,869 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:15,918 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:15,992 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:16,449 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:17,391 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:02:17,392 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:02:17,449 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:18,449 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:18,669 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:18,718 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:18,797 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:19,450 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:19,450 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:20,450 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:21,462 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:21,513 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:21,595 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:22,451 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:22,451 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:23,451 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:24,207 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:24,259 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:24,338 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:24,451 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:24,452 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:25,452 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:26,452 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:26,859 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:26,909 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:26,988 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:27,453 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:27,453 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:28,453 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:29,413 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:29,463 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:29,542 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:30,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:30,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:31,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:31,815 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:31,866 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:31,946 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:32,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:32,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:32,644 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:02:32,646 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:02:33,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:34,082 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:34,132 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:34,213 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:34,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:34,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:34,947 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:02:35,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:36,172 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:36,221 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:36,300 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:36,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:36,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:37,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:38,174 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:38,223 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:38,302 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:38,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:38,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:39,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:40,074 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:40,125 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:40,204 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:40,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:40,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:41,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:41,806 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:41,857 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:41,938 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:42,544 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:42,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:43,348 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:43,397 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:43,475 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:43,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:43,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:44,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:44,718 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:44,767 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:44,846 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:45,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:45,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:45,918 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:45,969 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:46,047 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:46,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:46,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:47,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:47,763 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:47,931 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:02:47,932 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:48,012 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:48,013 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:02:48,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:48,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:50,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:51,397 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:51,450 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:51,537 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:51,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:52,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:54,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:55,079 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:55,129 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:55,210 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:55,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:02:56,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:58,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:02:58,686 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:02:58,738 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:02:58,821 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:02:59,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:00,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:02,309 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:02,359 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:02,440 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:02,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:02,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:03,247 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:03:03,248 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:03:04,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:05,499 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:03:05,830 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:05,882 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:05,964 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:06,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:06,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:08,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:09,308 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:09,358 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:09,439 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:09,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:10,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:12,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:12,798 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:12,849 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:12,930 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:13,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:14,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:16,303 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:16,356 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:16,438 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:16,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:16,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:18,440 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:03:18,441 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:03:18,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:19,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:19,750 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:19,804 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:19,888 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:20,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:20,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:21,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:23,181 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:23,233 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:23,317 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:23,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:23,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:24,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:25,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:26,633 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:26,686 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:26,765 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:27,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:27,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:28,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:29,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:30,010 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:30,060 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:30,138 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:30,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:30,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:31,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:33,413 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:33,463 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:33,543 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:33,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:33,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:33,715 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:03:33,716 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:03:34,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:35,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:35,933 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:03:36,768 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:36,822 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:36,904 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:37,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:37,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:38,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:39,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:40,093 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:40,144 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:40,225 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:40,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:41,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:42,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:43,390 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:43,440 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:43,524 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:43,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:43,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:44,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:45,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:46,721 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:46,773 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:46,892 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:47,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:48,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:48,853 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:03:48,854 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:03:49,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:50,044 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:50,098 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:50,181 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:50,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:51,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:53,287 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:53,341 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:53,423 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:53,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:53,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:55,570 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:56,513 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:56,564 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:56,654 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:03:57,647 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:03:57,647 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:59,648 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:03:59,724 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:03:59,778 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:03:59,863 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:00,648 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:01,649 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:02,976 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:03,027 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:03,106 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:03,649 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:03,649 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:03,945 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:04:03,946 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:04:05,650 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:06,199 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:06,250 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:06,332 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:06,415 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:04:06,650 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:07,650 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:09,390 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:09,440 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:09,522 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:09,651 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:09,651 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:11,652 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:12,540 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:12,593 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:12,676 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:13,675 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:13,675 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:15,676 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:15,696 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:15,748 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:15,829 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:16,676 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:17,676 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:18,677 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:18,776 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:18,828 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:18,911 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:19,054 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:04:19,056 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:04:19,677 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:20,677 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:21,678 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:21,863 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:21,914 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:21,995 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:22,678 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:22,678 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:23,678 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:24,679 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:24,912 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:24,965 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:25,049 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:25,679 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:25,679 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:26,679 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:27,919 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:27,970 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:28,050 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:28,680 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:28,680 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:29,680 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:30,681 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:30,914 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:30,965 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:31,045 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:31,681 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:31,681 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:32,681 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:33,887 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:33,943 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:34,029 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:34,350 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:04:34,351 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:04:34,682 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:34,683 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:35,683 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:36,683 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:36,746 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:36,800 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:36,883 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:36,938 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:04:37,683 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:37,684 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:38,684 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:39,582 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:39,637 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:39,716 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:40,715 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:40,715 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:41,716 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:42,348 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:42,399 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:42,479 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:42,716 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:42,716 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:43,716 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:44,717 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:45,116 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:45,167 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:45,253 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:45,717 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:45,718 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:46,717 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:47,770 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:47,822 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:47,901 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:48,718 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:48,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:49,473 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:04:49,474 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:04:49,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:50,365 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:50,418 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:50,501 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:50,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:50,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:51,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:52,720 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:52,901 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:52,951 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:53,030 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:53,720 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:53,720 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:54,720 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:55,312 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:55,366 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:55,447 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:55,721 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:55,721 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:56,721 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:57,481 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:57,533 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:57,614 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:57,721 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:04:58,722 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:04:59,511 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:04:59,564 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:04:59,644 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:04:59,722 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:00,723 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:01,393 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:01,438 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:01,519 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:01,723 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:02,723 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:03,096 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:03,149 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:03,232 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:03,724 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:04,553 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:05:04,554 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:05:04,691 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:04,743 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:04,758 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:04,827 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:05,749 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:06,166 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:06,217 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:06,298 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:06,749 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:06,749 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:07,517 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:07,569 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:07,650 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:07,675 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:05:07,750 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:08,750 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:08,761 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:08,812 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:08,892 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:09,750 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:09,934 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:09,990 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:10,078 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:10,751 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:10,751 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:11,663 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:11,826 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:11,908 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:12,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:12,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:14,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:15,458 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:15,509 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:15,593 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:15,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:16,786 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:18,787 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:19,025 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:19,076 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:19,159 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:19,713 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:05:19,714 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:05:19,787 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:20,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:22,667 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:22,721 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:22,801 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:22,803 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:23,802 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:24,802 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:26,301 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:26,351 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:26,430 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:26,803 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:26,803 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:28,804 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:29,824 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:29,876 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:29,956 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:30,804 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:30,804 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:32,805 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:33,317 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:33,368 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:33,451 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:33,805 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:34,805 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:34,816 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:05:34,817 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:05:36,806 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:36,822 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:36,874 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:36,955 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:37,807 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:38,338 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:05:38,807 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:40,334 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:40,388 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:40,474 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:40,808 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:41,808 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:42,808 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:43,809 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:43,876 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:43,930 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:44,059 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:44,809 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:44,809 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:45,809 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:47,401 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:47,448 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:47,528 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:47,810 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:47,810 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:48,811 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:49,811 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:49,869 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:05:49,871 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:05:50,812 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:50,865 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:50,944 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:51,848 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:51,849 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:52,849 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:53,849 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:54,134 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:54,185 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:54,263 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:54,849 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:54,850 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:55,850 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:57,465 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:05:57,516 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:05:57,600 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:05:57,850 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:05:57,851 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:58,851 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:05:59,851 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:00,849 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:00,898 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:00,975 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:01,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:01,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:02,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:03,897 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:04,122 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:04,171 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:04,251 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:04,897 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:04,923 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:06:04,925 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:06:05,897 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:07,400 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:07,450 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:07,528 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:07,898 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:07,898 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:08,965 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:06:09,899 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:10,735 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:10,784 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:10,865 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:10,899 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:11,899 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:13,900 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:13,992 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:14,044 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:14,125 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:14,900 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:15,901 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:17,231 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:17,282 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:17,361 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:17,901 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:17,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:19,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:19,972 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:06:19,973 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:06:20,524 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:20,578 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:20,662 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:20,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:21,903 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:23,763 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:23,815 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:23,895 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:23,903 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:24,904 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:25,904 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:26,904 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:26,946 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:26,998 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:27,082 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:27,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:27,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:28,905 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:30,170 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:30,220 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:30,297 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:30,906 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:30,907 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:31,907 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:32,907 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:33,329 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:33,379 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:33,459 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:33,908 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:34,908 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:35,110 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:06:35,112 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:06:35,908 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:36,441 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:36,494 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:36,576 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:36,909 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:36,909 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:37,909 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:38,910 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:39,364 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:06:39,637 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:39,691 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:39,774 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:39,910 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:40,910 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:41,911 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:42,783 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:42,838 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:42,920 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:42,922 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:43,921 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:43,921 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:44,921 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:45,884 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:45,938 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:46,019 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:46,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:46,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:47,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:48,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:48,993 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:49,043 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:49,123 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:49,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:49,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:50,251 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:06:50,253 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:06:50,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:52,053 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:52,103 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:52,184 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:52,942 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:52,943 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:54,943 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:55,112 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:55,158 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:55,237 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:55,943 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:56,944 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:06:58,135 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:06:58,185 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:06:58,267 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:06:58,945 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:06:58,945 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:00,945 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:01,072 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:01,124 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:01,206 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:01,945 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:02,946 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:03,966 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:04,016 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:04,096 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:04,947 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:04,947 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:05,297 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:07:05,299 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:07:06,807 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:06,859 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:06,941 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:06,947 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:06,948 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:08,948 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:09,645 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:09,696 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:09,779 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:09,917 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:07:09,948 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:09,949 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:10,949 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:11,949 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:12,463 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:12,518 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:12,606 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:12,950 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:13,950 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:14,950 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:15,122 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:15,172 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:15,252 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:15,951 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:15,951 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:16,951 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:17,705 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:17,755 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:17,836 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:17,952 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:17,952 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:18,952 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:19,952 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:20,199 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:20,253 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:20,338 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:20,464 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:07:20,466 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:07:20,953 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:20,953 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:21,953 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:22,536 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:22,599 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:22,681 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:22,953 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:22,954 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:23,954 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:24,750 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:24,804 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:24,887 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:24,954 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:25,954 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:26,824 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:26,875 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:26,954 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:26,955 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:26,955 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:27,955 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:28,782 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:28,829 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:28,911 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:28,956 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:28,956 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:29,956 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:30,615 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:30,673 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:30,755 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:30,956 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:30,957 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:31,957 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:32,328 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:32,369 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:32,449 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:32,957 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:32,957 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:33,905 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:33,946 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:34,026 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:34,028 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:35,027 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:35,027 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:35,311 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:35,365 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:35,446 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:35,624 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:07:35,626 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:07:36,027 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:36,027 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:36,998 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:36,998 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:37,083 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:37,085 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:38,084 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:38,084 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:38,398 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:38,575 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:38,663 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:39,084 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:40,084 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:40,720 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:07:41,085 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:42,085 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:42,132 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:42,190 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:42,318 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:43,085 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:43,086 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:44,086 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:45,823 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:45,868 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:45,955 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:46,086 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:46,087 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:47,087 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:48,087 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:49,483 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:49,538 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:49,627 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:50,088 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:50,088 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:51,088 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:52,030 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:07:52,031 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:07:52,088 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:53,165 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:53,221 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:53,312 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:54,089 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:54,089 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:55,089 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:56,090 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:07:56,783 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:07:56,840 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:07:56,951 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:07:57,090 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:07:58,090 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:00,091 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:00,366 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:00,419 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:00,505 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:01,091 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:02,092 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:03,853 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:03,909 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:03,995 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:04,092 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:04,092 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:06,093 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:07,081 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:08:07,083 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:08:07,093 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:07,344 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:07,399 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:07,509 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:08,094 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:09,094 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:10,094 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:10,828 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:10,880 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:10,991 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:11,095 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:11,095 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:11,226 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:08:12,095 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:13,095 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:14,255 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:14,312 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:14,403 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:15,096 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:15,096 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:16,096 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:17,097 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:17,661 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:17,713 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:17,799 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:18,097 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:19,098 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:20,098 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:21,106 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:21,117 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:21,161 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:21,251 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:22,108 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:22,108 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:22,134 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:08:22,135 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:08:23,108 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:24,513 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:24,567 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:24,660 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:25,109 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:25,109 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:26,109 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:27,110 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:27,938 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:27,994 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:28,084 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:28,110 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:29,110 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:30,111 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:31,111 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:31,331 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:31,394 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:31,482 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:32,111 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:33,112 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:34,112 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:34,679 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:34,736 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:34,826 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:35,112 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:35,113 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:36,113 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:37,113 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:37,197 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:08:37,199 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:08:38,015 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:38,071 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:38,157 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:39,156 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:39,156 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:40,156 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:41,156 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:41,355 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:41,410 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:41,498 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:41,834 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:08:42,157 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:42,157 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:43,157 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:44,630 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:44,685 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:44,774 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:45,158 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:45,158 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:46,158 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:47,158 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:47,965 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:48,019 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:48,105 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:48,159 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:49,159 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:50,160 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:51,160 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:51,231 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:51,288 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:51,380 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:52,160 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:52,160 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:52,247 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:08:52,248 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:08:53,161 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:54,486 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:54,544 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:54,630 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:55,161 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:55,161 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:57,162 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:08:57,689 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:08:57,744 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:08:57,832 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:08:58,162 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:08:59,162 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:00,839 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:00,893 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:00,982 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:01,163 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:02,163 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:03,164 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:03,967 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:04,023 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:04,111 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:04,164 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:04,164 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:05,164 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:06,165 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:07,112 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:07,168 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:07,257 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:07,321 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:09:07,322 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:09:08,169 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:08,169 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:09,169 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:10,169 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:10,243 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:10,299 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:10,392 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:11,170 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:11,170 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:12,170 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:12,270 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:09:13,352 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:13,410 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:13,500 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:14,171 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:14,171 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:15,171 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:16,172 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:16,440 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:16,496 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:16,585 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:17,172 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:17,172 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:18,172 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:19,510 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:19,563 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:19,650 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:20,173 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:20,174 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:21,174 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:22,174 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:22,397 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:09:22,398 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:09:22,502 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:22,556 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:22,642 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:23,174 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:23,175 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:24,175 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:25,488 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:25,546 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:25,632 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:26,175 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:26,176 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:27,176 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:28,176 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:28,444 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:28,498 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:28,587 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:29,176 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:29,177 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:30,177 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:31,321 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:31,378 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:31,466 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:32,177 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:32,178 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:34,113 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:34,166 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:34,256 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:34,258 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:35,256 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:35,256 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:36,257 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:36,929 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:36,987 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:37,076 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:37,257 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:37,465 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:09:37,466 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:09:38,257 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:39,258 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:39,677 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:39,764 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:39,855 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:40,258 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:40,258 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:42,259 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:42,420 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:42,475 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:42,563 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:42,918 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:09:43,259 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:44,259 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:45,045 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:45,098 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:45,185 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:45,260 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:46,260 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:47,598 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:47,655 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:47,739 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:48,261 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:48,261 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:50,124 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:50,178 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:50,266 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:50,267 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:50,267 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:52,267 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:52,475 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:52,531 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:52,616 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:09:52,619 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:52,619 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:09:53,267 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:53,267 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:54,268 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:54,667 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:54,723 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:54,809 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:55,268 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:55,268 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:56,268 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:56,747 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:56,804 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:56,892 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:57,269 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:57,269 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:58,269 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:09:58,647 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:09:58,700 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:09:58,830 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:09:59,269 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:09:59,270 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:00,270 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:00,433 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:00,487 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:00,573 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:01,270 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:01,270 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:02,045 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:02,100 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:02,186 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:02,270 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:02,271 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:03,271 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:03,478 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:03,542 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:03,629 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:04,271 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:04,271 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:04,726 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:04,783 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:04,874 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:05,271 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:05,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:06,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:06,523 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:06,702 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:06,789 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:07,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:07,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:07,868 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:10:07,870 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:10:08,272 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:09,273 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:10,299 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:10,354 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:10,447 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:11,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:11,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:12,274 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:13,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:13,605 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:10:14,013 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:14,062 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:14,148 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:14,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:15,275 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:16,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:17,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:17,646 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:17,704 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:17,796 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:18,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:18,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:19,276 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:21,250 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:21,307 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:21,307 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:21,392 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:22,298 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:22,299 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:22,937 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:10:22,938 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:10:23,299 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:24,824 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:24,901 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:24,991 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:25,299 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:25,300 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:26,300 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:27,300 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:28,385 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:28,441 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:28,526 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:29,301 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:29,301 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:30,301 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:31,301 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:31,851 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:31,907 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:31,991 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:32,302 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:32,302 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:33,302 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:35,303 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:35,339 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:35,391 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:35,501 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:36,303 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:37,304 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:38,225 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:10:38,226 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:10:38,850 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:38,906 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:38,992 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:39,304 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:39,304 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:41,305 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:42,305 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:42,345 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:42,390 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:42,475 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:43,305 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:44,306 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:44,310 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:10:45,306 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:45,782 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:45,857 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:45,945 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:46,307 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:46,307 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:47,307 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:48,307 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:49,184 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:49,236 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:49,322 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:50,321 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:50,321 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:51,321 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:52,321 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:52,569 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:52,622 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:52,708 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:53,294 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:10:53,295 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:10:53,322 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:53,322 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:54,322 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:55,884 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:55,939 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:56,026 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:10:56,323 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:10:56,323 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:57,323 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:58,323 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:10:59,188 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:10:59,240 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:10:59,325 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:00,324 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:00,324 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:01,324 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:02,325 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:02,460 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:02,515 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:02,599 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:03,325 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:03,325 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:04,326 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:05,720 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:05,780 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:05,871 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:06,326 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:06,327 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:07,327 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:08,327 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:08,346 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:11:08,347 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:11:08,986 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:09,043 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:09,131 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:09,327 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:10,328 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:11,328 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:12,364 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:12,365 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:12,388 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:12,481 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:13,355 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:13,355 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:14,355 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:14,936 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:11:15,587 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:15,645 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:15,735 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:16,356 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:16,356 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:17,356 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:18,357 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:18,799 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:18,854 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:18,944 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:19,357 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:20,358 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:22,024 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:22,075 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:22,160 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:22,358 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:22,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:23,404 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:11:23,405 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:11:24,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:25,160 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:25,216 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:25,308 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:25,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:26,360 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:28,289 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:28,343 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:28,428 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:28,429 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:28,430 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:30,429 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:31,398 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:31,451 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:31,466 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:31,541 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:32,456 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:33,457 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:34,457 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:34,466 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:34,518 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:34,605 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:35,457 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:35,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:36,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:37,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:37,573 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:37,631 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:37,720 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:38,454 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:11:38,455 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:11:38,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:39,459 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:40,459 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:40,710 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:40,764 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:40,886 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:41,460 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:41,460 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:42,460 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:43,460 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:43,816 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:43,873 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:43,960 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:44,461 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:45,398 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:11:45,461 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:46,461 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:46,829 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:46,883 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:46,970 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:47,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:47,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:48,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:49,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:49,826 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:49,880 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:49,967 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:50,463 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:51,463 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:52,463 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:52,811 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:52,864 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:52,950 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:53,464 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:53,464 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:53,516 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:11:53,517 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:11:54,464 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:55,464 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:55,724 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:55,778 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:55,865 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:56,465 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:57,465 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:58,465 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:11:58,497 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:11:58,551 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:11:58,635 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:11:59,466 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:11:59,466 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:00,466 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:01,275 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:01,329 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:01,417 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:01,466 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:01,467 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:02,467 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:03,467 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:03,985 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:04,039 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:04,123 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:04,467 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:05,468 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:06,468 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:06,667 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:06,730 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:06,860 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:07,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:07,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:08,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:08,559 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:12:08,560 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:12:09,261 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:09,318 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:09,404 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:09,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:09,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:10,470 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:11,470 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:11,820 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:11,881 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:11,968 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:12,470 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:12,471 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:13,471 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:14,311 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:14,361 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:14,448 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:14,471 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:15,471 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:16,087 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:12:16,472 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:16,619 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:16,674 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:16,762 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:17,472 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:17,472 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:18,472 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:18,821 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:18,879 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:18,969 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:19,473 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:19,473 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:20,473 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:20,890 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:20,946 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:21,036 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:21,473 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:21,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:22,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:22,809 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:22,866 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:22,955 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:23,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:23,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:23,625 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:12:23,626 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:12:24,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:24,556 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:24,612 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:24,702 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:25,475 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:25,475 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:26,117 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:26,170 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:26,256 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:26,475 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:26,475 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:27,448 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:27,502 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:27,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:27,588 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:28,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:28,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:28,637 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:28,695 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:28,783 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:29,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:29,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:29,777 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:29,834 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:29,944 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:30,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:30,504 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:31,529 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:31,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:31,707 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:31,794 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:32,547 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:32,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:33,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:35,199 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:35,252 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:35,340 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:35,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:36,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:38,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:38,710 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:12:38,712 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:12:38,938 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:38,997 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:39,085 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:39,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:40,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:42,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:42,560 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:42,615 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:42,701 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:43,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:44,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:46,102 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:46,181 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:46,269 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:46,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:46,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:46,803 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:12:48,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:49,682 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:49,739 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:49,824 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:50,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:50,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:51,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:52,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:53,281 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:53,335 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:53,424 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:53,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:53,919 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:12:53,920 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:12:54,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:55,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:56,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:56,712 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:12:56,767 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:12:56,853 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:12:57,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:12:58,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:12:59,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:00,171 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:00,224 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:00,311 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:00,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:00,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:01,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:02,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:03,580 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:03,632 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:03,718 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:04,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:04,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:05,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:06,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:06,995 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:07,048 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:07,133 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:07,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:08,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:09,177 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:13:09,179 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:13:09,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:10,400 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:10,455 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:10,545 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:10,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:10,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:11,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:12,561 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:13,782 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:13,840 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:13,933 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:14,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:14,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:15,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:16,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:17,203 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:17,256 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:17,344 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:17,391 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:13:17,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:18,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:19,563 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:20,561 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:20,618 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:20,619 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:20,706 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:21,609 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:21,609 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:22,609 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:23,962 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:24,014 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:24,101 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:24,344 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:13:24,345 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:13:24,610 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:24,610 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:25,610 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:26,610 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:27,286 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:27,344 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:27,435 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:27,611 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:28,611 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:29,611 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:30,598 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:30,659 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:30,749 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:31,651 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:31,651 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:33,652 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:33,998 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:34,053 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:34,141 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:34,652 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:35,652 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:37,224 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:37,275 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:37,366 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:37,653 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:37,653 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:39,475 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:13:39,476 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:13:39,654 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:40,578 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:40,633 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:40,730 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:41,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:41,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:43,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:43,914 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:43,969 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:44,056 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:44,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:45,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:47,138 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:47,195 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:47,282 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:47,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:47,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:47,840 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:13:48,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:49,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:50,354 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:50,409 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:50,497 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:50,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:51,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:52,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:53,560 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:53,614 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:53,701 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:53,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:53,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:54,554 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:13:54,555 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:13:54,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:55,734 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:56,732 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:56,789 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:56,879 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:13:57,766 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:13:57,766 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:58,766 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:59,766 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:13:59,837 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:13:59,891 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:13:59,980 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:00,767 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:01,767 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:02,767 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:02,967 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:03,023 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:03,111 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:03,768 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:03,768 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:04,768 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:05,769 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:06,021 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:06,075 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:06,164 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:06,769 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:07,769 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:08,770 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:09,042 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:09,104 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:09,227 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:09,636 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:14:09,637 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:14:09,770 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:09,770 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:10,771 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:11,771 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:12,043 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:12,098 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:12,186 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:12,771 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:14,772 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:15,051 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:15,105 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:15,190 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:15,772 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:16,773 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:17,986 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:18,040 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:18,126 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:18,392 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:14:18,773 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:18,774 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:20,774 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:20,842 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:20,899 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:20,989 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:21,774 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:22,775 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:23,702 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:23,757 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:23,844 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:24,691 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:14:24,693 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:14:24,843 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:24,843 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:26,519 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:26,574 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:26,660 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:26,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:26,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:28,844 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:29,233 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:29,286 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:29,375 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:29,845 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:30,845 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:31,901 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:31,955 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:32,041 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:32,846 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:32,846 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:34,586 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:34,639 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:34,726 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:34,847 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:34,847 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:36,847 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:37,202 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:37,257 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:37,345 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:37,847 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:38,848 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:39,726 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:39,795 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:14:39,795 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:39,890 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:39,891 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:14:40,889 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:40,889 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:42,100 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:42,155 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:42,242 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:42,889 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:42,890 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:44,259 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:44,313 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:44,398 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:44,890 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:44,890 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:46,189 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:46,240 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:46,326 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:46,891 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:46,891 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:47,979 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:48,035 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:48,123 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:48,891 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:48,892 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:48,988 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:14:49,702 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:49,757 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:49,845 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:49,892 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:50,892 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:51,293 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:51,338 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:51,423 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:51,892 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:52,690 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:52,745 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:52,835 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:52,893 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:52,893 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:53,926 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:53,981 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:54,070 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:54,894 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:54,894 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:55,052 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:14:55,053 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:14:55,173 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:55,174 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:55,262 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:55,894 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:56,894 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:14:56,978 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:14:57,156 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:14:57,240 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:14:57,895 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:14:58,895 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:00,639 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:00,693 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:00,782 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:00,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:00,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:02,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:03,896 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:04,361 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:04,415 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:04,505 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:04,897 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:05,897 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:07,898 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:07,967 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:08,024 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:08,111 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:08,898 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:09,899 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:10,393 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:15:10,394 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:15:11,610 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:11,665 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:11,754 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:11,900 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:11,900 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:13,900 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:15,188 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:15,237 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:15,324 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:15,901 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:15,901 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:17,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:18,756 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:18,810 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:18,901 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:18,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:19,735 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:15:19,902 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:21,903 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:22,337 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:22,388 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:22,474 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:22,903 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:23,904 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:25,509 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:15:25,510 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:15:25,858 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:25,912 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:25,948 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:26,033 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:26,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:27,939 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:29,364 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:29,420 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:29,508 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:29,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:29,940 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:31,941 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:32,846 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:32,904 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:32,993 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:33,991 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:33,991 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:35,991 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:36,287 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:36,343 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:36,432 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:36,992 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:37,992 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:39,705 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:39,761 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:39,849 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:39,993 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:39,993 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:40,561 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:15:40,562 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:15:41,993 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:43,145 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:43,199 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:43,286 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:43,994 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:43,995 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:45,995 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:46,564 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:46,618 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:46,704 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:46,995 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:47,996 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:49,911 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:49,967 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:50,054 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:50,055 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:50,290 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:15:51,054 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:52,054 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:53,258 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:53,312 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:53,398 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:54,055 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:54,055 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:55,617 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:15:55,618 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:15:56,056 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:56,612 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:56,665 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:15:56,755 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:15:57,056 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:15:58,056 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:15:59,900 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:15:59,973 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:00,060 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:00,062 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:01,061 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:02,061 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:03,185 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:03,237 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:03,323 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:04,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:04,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:06,062 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:06,456 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:06,511 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:06,599 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:07,063 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:08,063 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:09,708 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:09,763 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:09,895 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:10,064 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:10,665 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:16:10,666 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:16:11,064 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:12,064 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:12,981 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:13,037 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:13,124 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:13,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:13,126 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:14,124 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:15,125 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:16,223 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:16,279 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:16,367 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:17,125 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:17,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:18,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:19,126 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:19,462 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:19,518 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:19,606 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:20,127 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:20,127 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:20,852 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:16:21,127 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:22,697 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:22,753 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:22,840 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:23,128 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:23,128 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:25,128 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:25,715 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:16:25,716 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:16:25,856 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:25,886 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:25,971 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:26,129 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:27,129 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:28,990 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:29,045 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:29,134 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:29,135 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:30,134 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:31,134 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:32,086 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:32,140 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:32,229 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:33,146 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:33,146 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:35,147 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:35,174 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:35,228 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:35,314 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:36,147 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:37,147 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:38,260 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:38,318 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:38,409 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:39,148 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:39,149 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:41,112 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:16:41,113 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:16:41,149 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:41,299 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:41,355 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:41,443 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:42,149 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:43,150 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:44,391 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:44,446 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:44,537 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:45,150 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:45,151 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:47,151 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:47,375 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:47,430 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:47,517 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:48,151 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:49,152 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:50,300 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:50,356 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:50,446 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:51,152 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:51,153 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:51,384 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:16:52,153 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:53,184 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:53,238 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:53,325 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:54,153 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:54,154 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:55,154 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:56,037 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:56,091 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:56,181 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:56,183 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:56,349 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:16:56,351 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:16:57,181 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:16:57,181 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:58,182 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:16:58,822 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:16:58,878 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:16:58,968 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:16:59,182 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:00,182 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:01,512 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:01,570 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:01,662 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:02,183 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:02,183 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:04,128 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:04,180 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:04,264 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:04,266 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:05,264 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:06,265 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:06,681 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:06,736 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:06,826 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:07,265 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:08,266 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:09,141 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:09,196 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:09,281 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:10,280 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:10,281 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:11,459 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:11,511 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:11,597 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:11,640 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:17:11,641 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:17:12,281 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:12,281 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:13,626 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:13,681 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:13,765 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:14,282 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:14,282 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:15,631 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:15,685 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:15,771 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:16,282 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:16,283 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:17,548 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:17,603 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:17,689 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:18,283 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:18,283 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:19,300 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:19,355 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:19,440 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:20,284 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:20,284 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:20,848 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:20,903 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:20,988 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:21,284 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:22,024 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:17:22,213 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:22,269 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:22,358 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:22,360 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:23,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:23,437 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:23,491 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:23,577 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:24,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:24,359 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:25,165 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:17:25,340 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:17:25,459 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:17:26,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:17:26,458 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:26,778 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:17:26,780 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:17:28,459 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:32,460 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:34,461 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:36,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:38,462 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:40,463 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:41,824 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:17:41,824 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:17:44,464 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:46,465 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:48,465 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:52,467 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:52,514 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:17:54,468 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:17:56,887 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:17:56,888 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:17:58,469 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:00,470 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:02,471 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:06,472 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:10,473 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:11,949 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:18:11,950 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:18:12,474 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:14,475 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:16,476 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:20,477 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:22,478 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:22,853 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:18:26,479 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:27,389 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:18:27,389 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:18:30,481 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:34,482 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:36,483 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:38,483 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:42,436 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:18:42,436 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:18:42,487 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:44,488 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:48,489 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:50,490 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:52,490 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:53,201 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:18:54,491 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:18:57,488 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:18:57,488 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:18:58,492 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:00,493 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:04,494 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:06,495 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:10,496 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:12,496 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:12,585 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:19:12,586 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:19:16,498 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:18,498 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:20,499 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:23,544 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:19:24,501 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:27,631 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:19:27,631 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:19:28,502 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:30,503 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:34,504 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:36,505 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:38,505 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:40,506 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:42,683 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:19:42,684 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:19:44,507 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:46,508 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:50,509 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:52,509 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:53,894 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:19:54,510 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:56,511 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:19:57,774 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:19:57,775 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:20:01,512 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:05,514 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:07,515 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:09,515 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:12,866 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:20:12,866 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:20:13,517 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:15,517 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:17,518 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:21,519 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:24,236 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:20:25,521 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:27,521 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:27,948 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:20:27,948 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:20:29,522 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:33,523 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:37,525 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:41,526 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:43,175 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:20:43,176 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:20:43,527 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:47,528 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:51,529 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:53,530 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:54,590 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:20:57,531 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:20:58,223 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:20:58,224 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:20:59,532 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:03,533 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:05,534 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:09,535 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:11,536 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:13,267 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:21:13,267 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:21:13,537 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:15,538 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:19,539 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:21,540 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:24,931 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:21:25,541 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:28,316 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:21:28,316 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:21:29,542 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:31,543 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:35,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:37,545 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:39,546 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:43,410 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:21:43,411 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:21:43,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:45,548 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:47,549 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:49,550 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:53,551 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:55,275 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:21:55,552 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:21:58,467 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:21:58,468 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:21:59,553 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:01,554 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:03,555 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:07,556 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:09,557 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:11,558 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:13,511 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:22:13,512 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:22:15,559 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:17,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:19,560 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:23,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:25,562 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:25,619 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:22:28,568 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:22:28,568 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:22:29,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:31,564 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:33,565 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:35,566 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:39,567 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:41,568 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:43,619 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:22:43,620 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:22:45,569 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:47,570 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:49,571 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:51,571 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:55,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:55,968 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:22:57,573 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:22:58,675 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:22:58,675 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:22:59,574 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:03,575 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:05,576 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:09,578 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:11,578 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:13,722 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:23:13,722 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:23:15,580 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:17,580 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:21,582 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:25,583 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:26,319 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:23:28,777 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:23:28,778 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:23:29,584 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:31,585 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:35,587 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:39,588 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:41,588 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:43,828 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:23:43,828 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:23:45,590 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:47,590 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:49,591 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:53,592 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:55,593 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:56,666 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:23:57,594 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:23:58,915 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:23:58,916 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:23:59,594 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:03,596 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:05,596 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:09,598 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:11,598 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:13,960 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:24:13,960 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:24:15,600 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:17,601 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:21,602 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:23,603 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:27,012 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:24:27,604 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:29,017 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:24:29,017 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:24:29,605 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:33,606 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:35,607 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:37,607 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:41,609 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:43,609 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:44,066 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:24:44,066 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:24:45,610 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:49,611 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:51,612 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:55,613 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:24:57,366 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:24:59,124 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:24:59,124 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:24:59,615 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:01,615 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:05,617 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:07,617 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:11,619 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:14,169 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:25:14,169 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:25:15,620 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:17,621 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:21,622 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:23,623 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:27,624 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:27,714 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:25:29,236 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:25:29,237 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:25:29,625 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:31,625 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:35,627 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:37,627 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:39,628 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:43,629 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:44,303 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:25:44,303 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:25:45,630 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:48,631 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:52,632 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:54,633 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:58,066 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:25:58,634 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:25:59,357 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:25:59,357 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:26:00,635 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:04,636 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:08,638 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:10,639 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:12,639 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:14,405 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:26:14,405 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:26:14,640 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:18,641 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:20,642 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:22,643 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:26,644 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:28,409 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:26:28,645 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:29,456 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:26:29,456 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:26:30,645 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:32,646 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:34,647 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:38,648 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:40,649 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:42,649 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:44,536 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:26:44,537 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:26:44,650 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:48,651 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:50,652 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:52,653 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:56,654 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:58,655 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:26:58,753 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:26:59,583 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:26:59,583 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:27:00,656 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:02,656 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:06,658 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:08,658 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:12,660 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:14,660 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:14,683 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:27:14,683 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:27:18,662 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:20,662 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:22,663 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:24,664 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:28,665 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:29,105 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:27:29,730 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:27:29,730 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:27:30,666 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:32,666 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:34,667 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:36,668 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:40,669 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:42,670 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:44,799 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:27:44,799 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:27:46,671 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:48,672 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:50,672 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:54,674 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:56,674 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:58,675 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:27:59,448 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:27:59,853 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:27:59,854 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:28:02,677 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:04,677 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:08,679 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:10,680 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:12,681 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:14,923 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:28:14,924 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:28:16,682 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:18,682 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:22,684 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:24,684 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:26,685 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:29,792 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:28:29,971 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:28:29,972 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:28:30,686 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:32,687 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:36,688 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:38,689 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:42,690 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:44,691 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:45,016 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:28:45,017 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:28:48,692 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:52,694 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:54,695 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:28:58,696 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:00,062 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:29:00,062 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:29:00,132 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:29:02,697 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:04,698 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:08,699 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:12,701 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:14,701 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:15,131 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:29:15,132 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:29:16,702 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:20,704 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:22,704 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:24,705 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:28,706 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:30,184 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:29:30,185 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:29:30,479 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:29:30,707 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:34,708 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:36,709 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:40,710 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:42,711 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:44,711 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:45,229 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:29:45,229 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:29:48,713 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:50,713 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:52,714 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:56,715 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:29:58,716 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:00,288 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:30:00,288 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:30:00,717 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:00,828 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:30:02,718 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:06,719 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:08,720 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:12,721 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:15,371 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:30:15,371 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:30:16,722 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:18,723 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:22,724 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:24,725 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:28,726 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:30,434 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:30:30,434 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:30:30,727 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:31,175 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:30:34,728 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:36,729 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:38,730 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:42,731 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:45,483 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:30:45,483 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:30:46,732 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:48,733 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:52,734 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:55,735 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:30:59,737 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:00,538 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:31:00,538 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:31:01,516 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:31:03,738 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:07,739 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:09,740 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:13,741 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:15,587 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:31:15,587 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:31:15,742 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:19,743 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:21,744 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:23,745 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:27,746 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:29,747 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:30,746 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:31:30,747 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:31:31,750 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:31,865 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:31:33,750 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:37,752 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:41,753 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:43,754 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:45,792 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:31:45,792 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:31:47,755 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:49,756 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:53,757 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:55,758 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:31:57,759 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:00,843 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:32:00,844 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:32:01,760 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:02,208 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:32:05,762 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:07,762 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:11,764 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:15,765 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:15,888 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:32:15,889 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:32:17,766 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:21,767 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:23,768 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:27,769 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:29,770 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:30,974 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:32:30,974 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:32:32,569 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:32:33,771 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:35,772 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:39,773 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:41,774 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:45,775 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:46,029 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:32:46,030 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:32:47,776 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:51,777 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:53,778 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:57,779 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:32:59,780 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:01,088 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:33:01,088 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:33:02,933 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:33:03,781 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:07,783 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:09,783 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:13,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:15,785 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:16,165 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:33:16,166 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:33:19,787 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: metric +2022-03-02 09:33:21,039 DEBUG SenderThread:254666 [sender.py:send():235] send: history +2022-03-02 09:33:21,124 DEBUG SenderThread:254666 [sender.py:send():235] send: summary +2022-03-02 09:33:21,218 INFO SenderThread:254666 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-03-02 09:33:21,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/wandb-summary.json +2022-03-02 09:33:21,788 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:31,261 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:33:31,262 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:33:33,391 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:33:38,793 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:33:45,841 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/config.yaml +2022-03-02 09:33:46,309 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:33:46,309 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:34:01,360 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:34:01,360 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:34:01,847 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:34:03,777 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:34:16,423 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:34:16,424 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:34:31,474 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:34:31,474 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:34:34,155 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:34:46,537 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:34:46,537 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:35:01,590 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:35:01,590 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:35:04,533 DEBUG SenderThread:254666 [sender.py:send():235] send: stats +2022-03-02 09:35:11,870 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:35:12,870 INFO Thread-8 :254666 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/files/output.log +2022-03-02 09:35:16,703 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:35:16,703 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:35:31,754 DEBUG HandlerThread:254666 [handler.py:handle_request():131] handle_request: stop_status +2022-03-02 09:35:31,754 DEBUG SenderThread:254666 [sender.py:send_request():249] send_request: stop_status +2022-03-02 09:35:34,912 DEBUG SenderThread:254666 [sender.py:send():235] send: stats diff --git a/wandb/run-20220302_085255-16llzpbl/logs/debug.log b/wandb/run-20220302_085255-16llzpbl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..465e50410e91353aad0fe748a21b2e47bf4ef248 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/logs/debug.log @@ -0,0 +1,27 @@ +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/settings +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_setup.py:_flush():75] Loading settings from environment variables: {} +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program': '/home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/run_speech_recognition_seq2seq.py'} +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/logs/debug.log +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/wav2vec2-gpt2-wandb-grid-search/wandb/run-20220302_085255-16llzpbl/logs/debug-internal.log +2022-03-02 08:52:55,420 INFO MainThread:254567 [wandb_init.py:init():420] calling init triggers +2022-03-02 08:52:55,421 INFO MainThread:254567 [wandb_init.py:init():425] wandb.init called with sweep_config: {} +config: {} +2022-03-02 08:52:55,421 INFO MainThread:254567 [wandb_init.py:init():471] starting backend +2022-03-02 08:52:55,421 INFO MainThread:254567 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-03-02 08:52:55,476 INFO MainThread:254567 [backend.py:ensure_launched():219] starting backend process... +2022-03-02 08:52:55,532 INFO MainThread:254567 [backend.py:ensure_launched():224] started backend process with pid: 254666 +2022-03-02 08:52:55,535 INFO MainThread:254567 [wandb_init.py:init():480] backend started and connected +2022-03-02 08:52:55,545 INFO MainThread:254567 [wandb_init.py:init():550] updated telemetry +2022-03-02 08:52:55,673 INFO MainThread:254567 [wandb_init.py:init():581] communicating current version +2022-03-02 08:52:56,392 INFO MainThread:254567 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-03-02 08:52:56,393 INFO MainThread:254567 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-03-02 08:52:56,522 INFO MainThread:254567 [wandb_init.py:init():624] starting run threads in backend +2022-03-02 08:52:56,630 INFO MainThread:254567 [wandb_run.py:_console_start():1827] atexit reg +2022-03-02 08:52:56,631 INFO MainThread:254567 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-03-02 08:52:56,631 INFO MainThread:254567 [wandb_run.py:_redirect():1706] Redirecting console. +2022-03-02 08:52:56,633 INFO MainThread:254567 [wandb_run.py:_redirect():1762] Redirects installed. +2022-03-02 08:52:56,633 INFO MainThread:254567 [wandb_init.py:init():651] run started, returning control to user process +2022-03-02 08:52:56,635 INFO MainThread:254567 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 50, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 50256, 'eos_token_id': 50256, 'sep_token_id': None, 'decoder_start_token_id': 50256, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50257, 'n_positions': 1024, 'n_embd': 1024, 'n_layer': 24, 'n_head': 16, 'n_inner': None, 'activation_function': 'gelu_new', 'resid_pdrop': 0.0, 'embd_pdrop': 0.0, 'attn_pdrop': 0.0, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'summary_type': 'cls_index', 'summary_use_proj': True, 'summary_activation': None, 'summary_first_dropout': 0.0, 'summary_proj_to_labels': True, 'scale_attn_weights': True, 'use_cache': False, 'scale_attn_by_inverse_layer_idx': False, 'reorder_and_upcast_attn': False, 'bos_token_id': 50256, 'eos_token_id': 50256, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['GPT2LMHeadModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 50}}, 'problem_type': None, '_name_or_path': 'gpt2-medium', 'transformers_version': '4.17.0.dev0', 'n_ctx': 1024, 'n_special': 0, 'predict_special_tokens': True, 'model_type': 'gpt2'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-large-lv60', 'transformers_version': '4.17.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'hidden_dropout_prob': 0.0, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.0, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': 'None', 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Mar02_08-52-14_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 1, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'input_length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'gradient_checkpointing': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 40, 'generation_num_beams': 1, 'train_batch_size': 8, 'eval_batch_size': 8} +2022-03-02 08:52:56,638 INFO MainThread:254567 [wandb_watch.py:watch():43] Watching diff --git a/wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb b/wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d66c0f09bbaec88f9fcf4c4596019635f3b81c23 --- /dev/null +++ b/wandb/run-20220302_085255-16llzpbl/run-16llzpbl.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59980b9f4befb497509320e80fff71153f5e5cfa4454f52d827ccadc769ac0ee +size 16031828